@omote/three 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,7 +14,7 @@ npm install @omote/three @omote/core three
14
14
  import * as THREE from 'three';
15
15
  import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
16
16
  import { OmoteAvatar } from '@omote/three';
17
- import { VoicePipeline, KokoroTTSInference, DEFAULT_MODEL_URLS } from '@omote/core';
17
+ import { KokoroTTSInference } from '@omote/core';
18
18
 
19
19
  // Load avatar
20
20
  const loader = new GLTFLoader();
@@ -24,13 +24,15 @@ loader.load('/avatar.glb', async (gltf) => {
24
24
  // Create avatar with full composition (gaze, emotion, life layer)
25
25
  const avatar = new OmoteAvatar({ target: gltf.scene });
26
26
 
27
- // Wire a pipeline (mic lip sync, playback, or voice agent)
28
- const pipeline = new VoicePipeline({
27
+ // Wire conversational voice (speaker + listener via connectVoice)
28
+ await avatar.connectVoice({
29
29
  mode: 'local',
30
30
  tts: new KokoroTTSInference({ defaultVoice: 'af_heart' }),
31
- models: { senseVoice, lam, vad },
31
+ onTranscript: async (text) => {
32
+ const res = await fetch('/api/chat', { method: 'POST', body: text });
33
+ return await res.text();
34
+ },
32
35
  });
33
- avatar.connectFrameSource(pipeline);
34
36
 
35
37
  // In render loop:
36
38
  function animate() {
@@ -50,7 +52,7 @@ Full-featured avatar class with CharacterController (compositor + gaze + life la
50
52
  | Method | Description |
51
53
  |--------|-------------|
52
54
  | `update(delta, camera, avatarRotationY?)` | Call each frame in your render loop |
53
- | `connectFrameSource(source)` | Wire any pipeline (PlaybackPipeline, MicLipSync, VoicePipeline) |
55
+ | `connectFrameSource(source)` | Wire any pipeline (PlaybackPipeline, MicLipSync, VoiceOrchestrator) |
54
56
  | `disconnectFrameSource()` | Disconnect the current frame source |
55
57
  | `setFrame(blendshapes)` | Direct blendshape input |
56
58
  | `setEmotion(emotion)` | Set emotion (string preset or weights) |
package/dist/index.cjs CHANGED
@@ -1,7 +1,9 @@
1
1
  "use strict";
2
+ var __create = Object.create;
2
3
  var __defProp = Object.defineProperty;
3
4
  var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
5
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
6
8
  var __export = (target, all) => {
7
9
  for (var name in all)
@@ -15,6 +17,14 @@ var __copyProps = (to, from, except, desc) => {
15
17
  }
16
18
  return to;
17
19
  };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
18
28
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
29
 
20
30
  // src/index.ts
@@ -22,6 +32,7 @@ var index_exports = {};
22
32
  __export(index_exports, {
23
33
  BlendshapeController: () => BlendshapeController,
24
34
  OmoteAvatar: () => OmoteAvatar,
35
+ createAvatar: () => createAvatar,
25
36
  discoverScene: () => discoverScene,
26
37
  writeBlendshapes: () => writeBlendshapes
27
38
  });
@@ -29,6 +40,7 @@ module.exports = __toCommonJS(index_exports);
29
40
 
30
41
  // src/OmoteAvatar.ts
31
42
  var import_core2 = require("@omote/core");
43
+ var import_avatar = require("@omote/avatar");
32
44
  var import_three = require("three");
33
45
 
34
46
  // src/SceneDiscovery.ts
@@ -124,25 +136,18 @@ function writeBlendshapes(blendshapes, morphEntries) {
124
136
  }
125
137
 
126
138
  // src/OmoteAvatar.ts
127
- var logger2 = (0, import_core2.createLogger)("OmoteAvatar");
139
+ var logger2 = (0, import_core2.createLogger)("OmoteAvatar.Three");
128
140
  var _headWorldPos = new import_three.Vector3();
129
141
  var _camWorldPos = new import_three.Vector3();
130
142
  var _headWorldQuat = new import_three.Quaternion();
131
143
  var OmoteAvatar = class {
132
144
  constructor(options) {
145
+ // External frame source (adapter-level, handles emotion extraction)
133
146
  this.frameSourceCallback = null;
134
147
  this.connectedSource = null;
135
- // TTS integration
136
- this.ttsSpeaker = null;
137
- // Speech listener
138
- this.speechListener = null;
139
- // Voice orchestrator
140
- this.voiceOrchestrator = null;
141
148
  // State
142
149
  this.currentBlendshapes = null;
143
150
  this._emotion = null;
144
- this._isSpeaking = false;
145
- this._state = "idle";
146
151
  this._audioEnergy = 0;
147
152
  this.discovery = discoverScene(options.target);
148
153
  const controllerConfig = {
@@ -150,6 +155,13 @@ var OmoteAvatar = class {
150
155
  gaze: options.gaze
151
156
  };
152
157
  this.controller = new import_core2.CharacterController(controllerConfig);
158
+ this.core = new import_avatar.OmoteAvatarCore();
159
+ this.core.onFrame = (frame) => {
160
+ this.currentBlendshapes = frame.blendshapes;
161
+ if (frame.emotion !== void 0) {
162
+ this._emotion = frame.emotion;
163
+ }
164
+ };
153
165
  if (this.discovery.morphEntries.length === 0) {
154
166
  logger2.warn("No morph targets found \u2014 blendshape animation will have no effect");
155
167
  }
@@ -192,8 +204,8 @@ var OmoteAvatar = class {
192
204
  deltaTime: delta,
193
205
  baseBlendshapes: this.currentBlendshapes,
194
206
  emotion: this._emotion,
195
- isSpeaking: this._isSpeaking,
196
- state: this._state,
207
+ isSpeaking: this.core.isSpeaking,
208
+ state: this.core.state,
197
209
  audioEnergy: this._audioEnergy,
198
210
  cameraWorldPos,
199
211
  headWorldPos,
@@ -207,7 +219,7 @@ var OmoteAvatar = class {
207
219
  }
208
220
  }
209
221
  // -------------------------------------------------------------------------
210
- // Frame source connection
222
+ // Frame source connection (adapter-level, with emotion extraction)
211
223
  // -------------------------------------------------------------------------
212
224
  /**
213
225
  * Connect to any frame source (PlaybackPipeline, MicLipSync, etc.).
@@ -217,9 +229,6 @@ var OmoteAvatar = class {
217
229
  * disconnects the previous one.
218
230
  */
219
231
  connectFrameSource(source) {
220
- if (this.ttsSpeaker && source !== this.ttsSpeaker.frameSource) {
221
- this.ttsSpeaker.stop();
222
- }
223
232
  this.disconnectFrameSource();
224
233
  this.frameSourceCallback = (frame) => {
225
234
  this.currentBlendshapes = frame.blendshapes;
@@ -243,88 +252,26 @@ var OmoteAvatar = class {
243
252
  this.frameSourceCallback = null;
244
253
  }
245
254
  // -------------------------------------------------------------------------
246
- // Speaker (TTS → lip sync)
255
+ // Speaker (TTS → lip sync) — delegated to OmoteAvatarCore
247
256
  // -------------------------------------------------------------------------
248
- /**
249
- * Connect a TTS backend for speak() / streamText() support.
250
- * Loads LAM model and creates internal PlaybackPipeline.
251
- *
252
- * @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
253
- * @param config - A2E, expression profile, and playback configuration
254
- */
257
+ /** Warm up AudioContext for iOS/Safari autoplay policy. Call from user gesture. */
258
+ async warmup() {
259
+ return this.core.warmup();
260
+ }
255
261
  async connectSpeaker(tts, config) {
256
- await this.disconnectSpeaker();
257
- this.ttsSpeaker = new import_core2.TTSSpeaker();
258
- await this.ttsSpeaker.connect(tts, config);
259
- this.connectFrameSource(this.ttsSpeaker.frameSource);
262
+ return this.core.connectSpeaker(tts, config);
260
263
  }
261
- /**
262
- * Synthesize text and play with lip sync.
263
- * Auto-aborts previous speak if still in progress.
264
- *
265
- * @param text - Text to synthesize
266
- * @param options - Optional voice override and abort signal
267
- */
268
264
  async speak(text, options) {
269
- if (this.voiceOrchestrator) {
270
- await this.voiceOrchestrator.speak(text, options);
271
- return;
272
- }
273
- if (!this.ttsSpeaker) {
274
- throw new Error("No speaker connected. Call connectSpeaker() first.");
275
- }
276
- this._isSpeaking = true;
277
- this._state = "speaking";
278
- try {
279
- await this.ttsSpeaker.speak(text, options);
280
- } finally {
281
- this._isSpeaking = false;
282
- if (this._state === "speaking") {
283
- this._state = "idle";
284
- }
285
- }
265
+ return this.core.speak(text, options);
286
266
  }
287
- /**
288
- * Stream LLM tokens with sentence-buffered TTS + lip sync.
289
- * Returns a sink: call push(token) for each token, end() when done.
290
- */
291
267
  async streamText(options) {
292
- if (this.voiceOrchestrator) {
293
- return this.voiceOrchestrator.streamText(options);
294
- }
295
- if (!this.ttsSpeaker) {
296
- throw new Error("No speaker connected. Call connectSpeaker() first.");
297
- }
298
- this._isSpeaking = true;
299
- this._state = "speaking";
300
- const stream = await this.ttsSpeaker.streamText(options ?? {});
301
- return {
302
- push: stream.push,
303
- end: async () => {
304
- try {
305
- await stream.end();
306
- } finally {
307
- this._isSpeaking = false;
308
- if (this._state === "speaking") this._state = "idle";
309
- }
310
- }
311
- };
268
+ return this.core.streamText(options);
312
269
  }
313
- /** Stop current TTS playback. */
314
270
  stopSpeaking() {
315
- if (this.voiceOrchestrator) {
316
- this.voiceOrchestrator.stopSpeaking();
317
- return;
318
- }
319
- this.ttsSpeaker?.stop();
271
+ this.core.stopSpeaking();
320
272
  }
321
- /** Disconnect speaker and dispose its resources. */
322
273
  async disconnectSpeaker() {
323
- if (this.ttsSpeaker) {
324
- this.disconnectFrameSource();
325
- await this.ttsSpeaker.dispose();
326
- this.ttsSpeaker = null;
327
- }
274
+ return this.core.disconnectSpeaker();
328
275
  }
329
276
  /** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
330
277
  async connectTTS(tts, config) {
@@ -335,88 +282,49 @@ var OmoteAvatar = class {
335
282
  return this.disconnectSpeaker();
336
283
  }
337
284
  // -------------------------------------------------------------------------
338
- // Listener (mic → VAD → ASR → transcript)
285
+ // Listener (mic → VAD → ASR → transcript) — delegated to OmoteAvatarCore
339
286
  // -------------------------------------------------------------------------
340
- /**
341
- * Connect a speech listener for startListening() / onTranscript() support.
342
- * Loads ASR + VAD models.
343
- */
344
287
  async connectListener(config) {
345
- await this.disconnectListener();
346
- this.speechListener = new import_core2.SpeechListener(config);
347
- await this.speechListener.loadModels();
288
+ return this.core.connectListener(config);
348
289
  }
349
- /** Start listening for user speech. Requires connectListener() or connectVoice() first. */
350
290
  async startListening() {
351
- if (this.voiceOrchestrator) {
352
- await this.voiceOrchestrator.startListening();
353
- return;
354
- }
355
- if (!this.speechListener) {
356
- throw new Error("No listener connected. Call connectListener() first.");
357
- }
358
- this._state = "listening";
359
- await this.speechListener.start();
291
+ return this.core.startListening();
360
292
  }
361
- /** Stop listening. */
362
293
  stopListening() {
363
- if (this.voiceOrchestrator) {
364
- this.voiceOrchestrator.stopListening();
365
- return;
366
- }
367
- this.speechListener?.stop();
368
- if (this._state === "listening") this._state = "idle";
294
+ this.core.stopListening();
369
295
  }
370
- /**
371
- * Subscribe to transcript events. Returns an unsubscribe function.
372
- * Requires connectListener() first.
373
- */
374
296
  onTranscript(callback) {
375
- const listener = this.speechListener ?? this.voiceOrchestrator?.listener;
376
- if (!listener) {
377
- throw new Error("No listener connected. Call connectListener() or connectVoice() first.");
378
- }
379
- listener.on("transcript", callback);
380
- return () => {
381
- listener.off?.("transcript", callback);
382
- };
297
+ return this.core.onTranscript(callback);
383
298
  }
384
- /** Disconnect listener and dispose its resources. */
385
299
  async disconnectListener() {
386
- if (this.speechListener) {
387
- await this.speechListener.dispose();
388
- this.speechListener = null;
389
- }
300
+ return this.core.disconnectListener();
390
301
  }
391
302
  // -------------------------------------------------------------------------
392
- // Voice (combined speaker + listener + interruption)
303
+ // Voice (combined speaker + listener + interruption) — delegated
393
304
  // -------------------------------------------------------------------------
394
- /**
395
- * Connect voice with automatic speaker + listener + interruption wiring.
396
- * Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
397
- * Does NOT auto-start listening — call startListening() when ready.
398
- *
399
- * Backward compatible: `mode` defaults to 'local' when not specified.
400
- */
401
305
  async connectVoice(config) {
402
- await this.disconnectVoice();
403
- this.voiceOrchestrator = new import_core2.VoiceOrchestrator();
404
- await this.voiceOrchestrator.connect(config);
405
- if (this.voiceOrchestrator.frameSource) {
406
- this.connectFrameSource(this.voiceOrchestrator.frameSource);
407
- }
408
- this.voiceOrchestrator.on("state", (state) => {
409
- this._state = state;
410
- this._isSpeaking = state === "speaking";
411
- });
306
+ return this.core.connectVoice(config);
412
307
  }
413
- /** Disconnect voice (speaker + listener + interruption). */
414
308
  async disconnectVoice() {
415
- if (this.voiceOrchestrator) {
416
- this.disconnectFrameSource();
417
- await this.voiceOrchestrator.disconnect();
418
- this.voiceOrchestrator = null;
419
- }
309
+ return this.core.disconnectVoice();
310
+ }
311
+ // -------------------------------------------------------------------------
312
+ // Event subscriptions — delegated to OmoteAvatarCore
313
+ // -------------------------------------------------------------------------
314
+ onTranscriptEvent(callback) {
315
+ return this.core.onTranscriptEvent(callback);
316
+ }
317
+ onVoiceStateChange(callback) {
318
+ return this.core.onVoiceStateChange(callback);
319
+ }
320
+ onLoadingProgress(callback) {
321
+ return this.core.onLoadingProgress(callback);
322
+ }
323
+ onError(callback) {
324
+ return this.core.onError(callback);
325
+ }
326
+ onAudioLevel(callback) {
327
+ return this.core.onAudioLevel(callback);
420
328
  }
421
329
  // -------------------------------------------------------------------------
422
330
  // State setters
@@ -431,11 +339,11 @@ var OmoteAvatar = class {
431
339
  }
432
340
  /** Set whether the avatar is currently speaking (drives mouth animation intensity). */
433
341
  setSpeaking(speaking) {
434
- this._isSpeaking = speaking;
342
+ this.core.setSpeaking(speaking);
435
343
  }
436
344
  /** Set the conversational state (idle, listening, thinking, speaking). */
437
345
  setState(state) {
438
- this._state = state;
346
+ this.core.setState(state);
439
347
  }
440
348
  /** Set audio energy level (0-1, drives emphasis/gesture intensity). */
441
349
  setAudioEnergy(energy) {
@@ -466,23 +374,23 @@ var OmoteAvatar = class {
466
374
  }
467
375
  /** Whether the avatar is currently speaking via TTS. */
468
376
  get isSpeaking() {
469
- return this._isSpeaking;
377
+ return this.core.isSpeaking;
470
378
  }
471
379
  /** Whether the avatar is currently listening for speech. */
472
380
  get isListening() {
473
- return this._state === "listening";
381
+ return this.core.state === "listening";
474
382
  }
475
383
  /** Current conversational state. */
476
384
  get conversationalState() {
477
- return this._state;
385
+ return this.core.state;
478
386
  }
479
387
  /** Access the internal TTSSpeaker (null if not connected). */
480
388
  get speaker() {
481
- return this.ttsSpeaker ?? this.voiceOrchestrator?.speaker ?? null;
389
+ return this.core.speaker;
482
390
  }
483
391
  /** Access the internal SpeechListener (null if not connected). */
484
392
  get listener() {
485
- return this.speechListener ?? this.voiceOrchestrator?.listener ?? null;
393
+ return this.core.listener;
486
394
  }
487
395
  // -------------------------------------------------------------------------
488
396
  // Lifecycle
@@ -491,22 +399,88 @@ var OmoteAvatar = class {
491
399
  reset() {
492
400
  this.currentBlendshapes = null;
493
401
  this._emotion = null;
494
- this._isSpeaking = false;
495
- this._state = "idle";
496
402
  this._audioEnergy = 0;
403
+ this.core.reset();
497
404
  this.controller.reset();
498
405
  }
499
406
  /** Disconnect all voice resources, frame sources, and dispose the controller. */
500
407
  async dispose() {
501
- await this.disconnectVoice();
502
- await this.disconnectSpeaker();
503
- await this.disconnectListener();
408
+ await this.core.dispose();
504
409
  this.disconnectFrameSource();
505
410
  this.controller.dispose();
506
411
  logger2.debug("Disposed");
507
412
  }
508
413
  };
509
414
 
415
+ // src/createAvatar.ts
416
+ var THREE = __toESM(require("three"), 1);
417
+ var import_GLTFLoader = require("three/addons/loaders/GLTFLoader.js");
418
+ var import_OrbitControls = require("three/addons/controls/OrbitControls.js");
419
+ async function createAvatar(config) {
420
+ const container = typeof config.container === "string" ? document.querySelector(config.container) : config.container;
421
+ if (!container) throw new Error(`Container not found: ${config.container}`);
422
+ const { clientWidth: w, clientHeight: h } = container;
423
+ const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
424
+ renderer.setSize(w, h);
425
+ renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
426
+ renderer.toneMapping = THREE.ACESFilmicToneMapping;
427
+ container.appendChild(renderer.domElement);
428
+ const scene = new THREE.Scene();
429
+ const camera = new THREE.PerspectiveCamera(config.fov ?? 35, w / h, 0.1, 100);
430
+ camera.position.set(0, 1.5, 0.8);
431
+ scene.add(new THREE.AmbientLight(16777215, 0.5));
432
+ const dirLight = new THREE.DirectionalLight(16777215, 1);
433
+ dirLight.position.set(2, 3, 2);
434
+ scene.add(dirLight);
435
+ let controls = null;
436
+ if (config.controls !== false) {
437
+ controls = new import_OrbitControls.OrbitControls(camera, renderer.domElement);
438
+ controls.target.set(0, 1.5, 0);
439
+ controls.enableDamping = true;
440
+ controls.update();
441
+ }
442
+ const gltf = await new import_GLTFLoader.GLTFLoader().loadAsync(config.src);
443
+ scene.add(gltf.scene);
444
+ const avatar = new OmoteAvatar({
445
+ target: gltf.scene,
446
+ gaze: { enabled: true, smoothing: 0.08 }
447
+ });
448
+ const clock = new THREE.Clock();
449
+ let animId = 0;
450
+ function animate() {
451
+ animId = requestAnimationFrame(animate);
452
+ avatar.update(clock.getDelta(), camera);
453
+ controls?.update();
454
+ renderer.render(scene, camera);
455
+ }
456
+ animate();
457
+ const ro = new ResizeObserver(() => {
458
+ const { clientWidth: rw, clientHeight: rh } = container;
459
+ camera.aspect = rw / rh;
460
+ camera.updateProjectionMatrix();
461
+ renderer.setSize(rw, rh);
462
+ });
463
+ ro.observe(container);
464
+ return {
465
+ avatar,
466
+ scene,
467
+ camera,
468
+ renderer,
469
+ controls,
470
+ animations: gltf.animations ?? [],
471
+ dispose() {
472
+ cancelAnimationFrame(animId);
473
+ ro.disconnect();
474
+ controls?.dispose();
475
+ avatar.dispose();
476
+ renderer.dispose();
477
+ if (renderer.domElement.parentElement) {
478
+ renderer.domElement.parentElement.removeChild(renderer.domElement);
479
+ }
480
+ }
481
+ };
482
+ }
483
+
510
484
  // src/BlendshapeController.ts
511
485
  var import_core3 = require("@omote/core");
512
486
  var BlendshapeController = class {