npm - @omote/three - Versions diffs - 0.3.1 → 0.3.2 - Mend

@omote/three 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md CHANGED Viewed

@@ -14,7 +14,7 @@ npm install @omote/three @omote/core three
 import * as THREE from 'three';
 import { GLTFLoader } from 'three/addons/loaders/GLTFLoader.js';
 import { OmoteAvatar } from '@omote/three';
-import { VoicePipeline, KokoroTTSInference, DEFAULT_MODEL_URLS } from '@omote/core';
+import { KokoroTTSInference } from '@omote/core';
 // Load avatar
 const loader = new GLTFLoader();
@@ -24,13 +24,15 @@ loader.load('/avatar.glb', async (gltf) => {
   // Create avatar with full composition (gaze, emotion, life layer)
   const avatar = new OmoteAvatar({ target: gltf.scene });
-  // Wire a pipeline (mic lip sync, playback, or voice agent)
-  const pipeline = new VoicePipeline({
+  // Wire conversational voice (speaker + listener via connectVoice)
+  await avatar.connectVoice({
     mode: 'local',
     tts: new KokoroTTSInference({ defaultVoice: 'af_heart' }),
-    models: { senseVoice, lam, vad },
+    onTranscript: async (text) => {
+      const res = await fetch('/api/chat', { method: 'POST', body: text });
+      return await res.text();
+    },
   });
-  avatar.connectFrameSource(pipeline);
   // In render loop:
   function animate() {
@@ -50,7 +52,7 @@ Full-featured avatar class with CharacterController (compositor + gaze + life la
 | Method | Description |
 |--------|-------------|
 | `update(delta, camera, avatarRotationY?)` | Call each frame in your render loop |
-| `connectFrameSource(source)` | Wire any pipeline (PlaybackPipeline, MicLipSync, VoicePipeline) |
+| `connectFrameSource(source)` | Wire any pipeline (PlaybackPipeline, MicLipSync, VoiceOrchestrator) |
 | `disconnectFrameSource()` | Disconnect the current frame source |
 | `setFrame(blendshapes)` | Direct blendshape input |
 | `setEmotion(emotion)` | Set emotion (string preset or weights) |

package/dist/index.cjs CHANGED Viewed

@@ -1,7 +1,9 @@
 "use strict";
+var __create = Object.create;
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
 var __export = (target, all) => {
   for (var name in all)
@@ -15,6 +17,14 @@ var __copyProps = (to, from, except, desc) => {
   }
   return to;
 };
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
 var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
 // src/index.ts
@@ -22,6 +32,7 @@ var index_exports = {};
 __export(index_exports, {
   BlendshapeController: () => BlendshapeController,
   OmoteAvatar: () => OmoteAvatar,
+  createAvatar: () => createAvatar,
   discoverScene: () => discoverScene,
   writeBlendshapes: () => writeBlendshapes
 });
@@ -29,6 +40,7 @@ module.exports = __toCommonJS(index_exports);
 // src/OmoteAvatar.ts
 var import_core2 = require("@omote/core");
+var import_avatar = require("@omote/avatar");
 var import_three = require("three");
 // src/SceneDiscovery.ts
@@ -124,25 +136,18 @@ function writeBlendshapes(blendshapes, morphEntries) {
 }
 // src/OmoteAvatar.ts
-var logger2 = (0, import_core2.createLogger)("OmoteAvatar");
+var logger2 = (0, import_core2.createLogger)("OmoteAvatar.Three");
 var _headWorldPos = new import_three.Vector3();
 var _camWorldPos = new import_three.Vector3();
 var _headWorldQuat = new import_three.Quaternion();
 var OmoteAvatar = class {
   constructor(options) {
+    // External frame source (adapter-level, handles emotion extraction)
     this.frameSourceCallback = null;
     this.connectedSource = null;
-    // TTS integration
-    this.ttsSpeaker = null;
-    // Speech listener
-    this.speechListener = null;
-    // Voice orchestrator
-    this.voiceOrchestrator = null;
     // State
     this.currentBlendshapes = null;
     this._emotion = null;
-    this._isSpeaking = false;
-    this._state = "idle";
     this._audioEnergy = 0;
     this.discovery = discoverScene(options.target);
     const controllerConfig = {
@@ -150,6 +155,13 @@ var OmoteAvatar = class {
       gaze: options.gaze
     };
     this.controller = new import_core2.CharacterController(controllerConfig);
+    this.core = new import_avatar.OmoteAvatarCore();
+    this.core.onFrame = (frame) => {
+      this.currentBlendshapes = frame.blendshapes;
+      if (frame.emotion !== void 0) {
+        this._emotion = frame.emotion;
+      }
+    };
     if (this.discovery.morphEntries.length === 0) {
       logger2.warn("No morph targets found \u2014 blendshape animation will have no effect");
     }
@@ -192,8 +204,8 @@ var OmoteAvatar = class {
       deltaTime: delta,
       baseBlendshapes: this.currentBlendshapes,
       emotion: this._emotion,
-      isSpeaking: this._isSpeaking,
-      state: this._state,
+      isSpeaking: this.core.isSpeaking,
+      state: this.core.state,
       audioEnergy: this._audioEnergy,
       cameraWorldPos,
       headWorldPos,
@@ -207,7 +219,7 @@ var OmoteAvatar = class {
     }
   }
   // -------------------------------------------------------------------------
-  // Frame source connection
+  // Frame source connection (adapter-level, with emotion extraction)
   // -------------------------------------------------------------------------
   /**
    * Connect to any frame source (PlaybackPipeline, MicLipSync, etc.).
@@ -217,9 +229,6 @@ var OmoteAvatar = class {
    * disconnects the previous one.
    */
   connectFrameSource(source) {
-    if (this.ttsSpeaker && source !== this.ttsSpeaker.frameSource) {
-      this.ttsSpeaker.stop();
-    }
     this.disconnectFrameSource();
     this.frameSourceCallback = (frame) => {
       this.currentBlendshapes = frame.blendshapes;
@@ -243,88 +252,26 @@ var OmoteAvatar = class {
     this.frameSourceCallback = null;
   }
   // -------------------------------------------------------------------------
-  // Speaker (TTS → lip sync)
+  // Speaker (TTS → lip sync) — delegated to OmoteAvatarCore
   // -------------------------------------------------------------------------
-  /**
-   * Connect a TTS backend for speak() / streamText() support.
-   * Loads LAM model and creates internal PlaybackPipeline.
-   *
-   * @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
-   * @param config - A2E, expression profile, and playback configuration
-   */
+  /** Warm up AudioContext for iOS/Safari autoplay policy. Call from user gesture. */
+  async warmup() {
+    return this.core.warmup();
+  }
   async connectSpeaker(tts, config) {
-    await this.disconnectSpeaker();
-    this.ttsSpeaker = new import_core2.TTSSpeaker();
-    await this.ttsSpeaker.connect(tts, config);
-    this.connectFrameSource(this.ttsSpeaker.frameSource);
+    return this.core.connectSpeaker(tts, config);
   }
-  /**
-   * Synthesize text and play with lip sync.
-   * Auto-aborts previous speak if still in progress.
-   *
-   * @param text - Text to synthesize
-   * @param options - Optional voice override and abort signal
-   */
   async speak(text, options) {
-    if (this.voiceOrchestrator) {
-      await this.voiceOrchestrator.speak(text, options);
-      return;
-    }
-    if (!this.ttsSpeaker) {
-      throw new Error("No speaker connected. Call connectSpeaker() first.");
-    }
-    this._isSpeaking = true;
-    this._state = "speaking";
-    try {
-      await this.ttsSpeaker.speak(text, options);
-    } finally {
-      this._isSpeaking = false;
-      if (this._state === "speaking") {
-        this._state = "idle";
-      }
-    }
+    return this.core.speak(text, options);
   }
-  /**
-   * Stream LLM tokens with sentence-buffered TTS + lip sync.
-   * Returns a sink: call push(token) for each token, end() when done.
-   */
   async streamText(options) {
-    if (this.voiceOrchestrator) {
-      return this.voiceOrchestrator.streamText(options);
-    }
-    if (!this.ttsSpeaker) {
-      throw new Error("No speaker connected. Call connectSpeaker() first.");
-    }
-    this._isSpeaking = true;
-    this._state = "speaking";
-    const stream = await this.ttsSpeaker.streamText(options ?? {});
-    return {
-      push: stream.push,
-      end: async () => {
-        try {
-          await stream.end();
-        } finally {
-          this._isSpeaking = false;
-          if (this._state === "speaking") this._state = "idle";
-        }
-      }
-    };
+    return this.core.streamText(options);
   }
-  /** Stop current TTS playback. */
   stopSpeaking() {
-    if (this.voiceOrchestrator) {
-      this.voiceOrchestrator.stopSpeaking();
-      return;
-    }
-    this.ttsSpeaker?.stop();
+    this.core.stopSpeaking();
   }
-  /** Disconnect speaker and dispose its resources. */
   async disconnectSpeaker() {
-    if (this.ttsSpeaker) {
-      this.disconnectFrameSource();
-      await this.ttsSpeaker.dispose();
-      this.ttsSpeaker = null;
-    }
+    return this.core.disconnectSpeaker();
   }
   /** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
   async connectTTS(tts, config) {
@@ -335,88 +282,49 @@ var OmoteAvatar = class {
     return this.disconnectSpeaker();
   }
   // -------------------------------------------------------------------------
-  // Listener (mic → VAD → ASR → transcript)
+  // Listener (mic → VAD → ASR → transcript) — delegated to OmoteAvatarCore
   // -------------------------------------------------------------------------
-  /**
-   * Connect a speech listener for startListening() / onTranscript() support.
-   * Loads ASR + VAD models.
-   */
   async connectListener(config) {
-    await this.disconnectListener();
-    this.speechListener = new import_core2.SpeechListener(config);
-    await this.speechListener.loadModels();
+    return this.core.connectListener(config);
   }
-  /** Start listening for user speech. Requires connectListener() or connectVoice() first. */
   async startListening() {
-    if (this.voiceOrchestrator) {
-      await this.voiceOrchestrator.startListening();
-      return;
-    }
-    if (!this.speechListener) {
-      throw new Error("No listener connected. Call connectListener() first.");
-    }
-    this._state = "listening";
-    await this.speechListener.start();
+    return this.core.startListening();
   }
-  /** Stop listening. */
   stopListening() {
-    if (this.voiceOrchestrator) {
-      this.voiceOrchestrator.stopListening();
-      return;
-    }
-    this.speechListener?.stop();
-    if (this._state === "listening") this._state = "idle";
+    this.core.stopListening();
   }
-  /**
-   * Subscribe to transcript events. Returns an unsubscribe function.
-   * Requires connectListener() first.
-   */
   onTranscript(callback) {
-    const listener = this.speechListener ?? this.voiceOrchestrator?.listener;
-    if (!listener) {
-      throw new Error("No listener connected. Call connectListener() or connectVoice() first.");
-    }
-    listener.on("transcript", callback);
-    return () => {
-      listener.off?.("transcript", callback);
-    };
+    return this.core.onTranscript(callback);
   }
-  /** Disconnect listener and dispose its resources. */
   async disconnectListener() {
-    if (this.speechListener) {
-      await this.speechListener.dispose();
-      this.speechListener = null;
-    }
+    return this.core.disconnectListener();
   }
   // -------------------------------------------------------------------------
-  // Voice (combined speaker + listener + interruption)
+  // Voice (combined speaker + listener + interruption) — delegated
   // -------------------------------------------------------------------------
-  /**
-   * Connect voice with automatic speaker + listener + interruption wiring.
-   * Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
-   * Does NOT auto-start listening — call startListening() when ready.
-   *
-   * Backward compatible: `mode` defaults to 'local' when not specified.
-   */
   async connectVoice(config) {
-    await this.disconnectVoice();
-    this.voiceOrchestrator = new import_core2.VoiceOrchestrator();
-    await this.voiceOrchestrator.connect(config);
-    if (this.voiceOrchestrator.frameSource) {
-      this.connectFrameSource(this.voiceOrchestrator.frameSource);
-    }
-    this.voiceOrchestrator.on("state", (state) => {
-      this._state = state;
-      this._isSpeaking = state === "speaking";
-    });
+    return this.core.connectVoice(config);
   }
-  /** Disconnect voice (speaker + listener + interruption). */
   async disconnectVoice() {
-    if (this.voiceOrchestrator) {
-      this.disconnectFrameSource();
-      await this.voiceOrchestrator.disconnect();
-      this.voiceOrchestrator = null;
-    }
+    return this.core.disconnectVoice();
+  }
+  // -------------------------------------------------------------------------
+  // Event subscriptions — delegated to OmoteAvatarCore
+  // -------------------------------------------------------------------------
+  onTranscriptEvent(callback) {
+    return this.core.onTranscriptEvent(callback);
+  }
+  onVoiceStateChange(callback) {
+    return this.core.onVoiceStateChange(callback);
+  }
+  onLoadingProgress(callback) {
+    return this.core.onLoadingProgress(callback);
+  }
+  onError(callback) {
+    return this.core.onError(callback);
+  }
+  onAudioLevel(callback) {
+    return this.core.onAudioLevel(callback);
   }
   // -------------------------------------------------------------------------
   // State setters
@@ -431,11 +339,11 @@ var OmoteAvatar = class {
   }
   /** Set whether the avatar is currently speaking (drives mouth animation intensity). */
   setSpeaking(speaking) {
-    this._isSpeaking = speaking;
+    this.core.setSpeaking(speaking);
   }
   /** Set the conversational state (idle, listening, thinking, speaking). */
   setState(state) {
-    this._state = state;
+    this.core.setState(state);
   }
   /** Set audio energy level (0-1, drives emphasis/gesture intensity). */
   setAudioEnergy(energy) {
@@ -466,23 +374,23 @@ var OmoteAvatar = class {
   }
   /** Whether the avatar is currently speaking via TTS. */
   get isSpeaking() {
-    return this._isSpeaking;
+    return this.core.isSpeaking;
   }
   /** Whether the avatar is currently listening for speech. */
   get isListening() {
-    return this._state === "listening";
+    return this.core.state === "listening";
   }
   /** Current conversational state. */
   get conversationalState() {
-    return this._state;
+    return this.core.state;
   }
   /** Access the internal TTSSpeaker (null if not connected). */
   get speaker() {
-    return this.ttsSpeaker ?? this.voiceOrchestrator?.speaker ?? null;
+    return this.core.speaker;
   }
   /** Access the internal SpeechListener (null if not connected). */
   get listener() {
-    return this.speechListener ?? this.voiceOrchestrator?.listener ?? null;
+    return this.core.listener;
   }
   // -------------------------------------------------------------------------
   // Lifecycle
@@ -491,22 +399,88 @@ var OmoteAvatar = class {
   reset() {
     this.currentBlendshapes = null;
     this._emotion = null;
-    this._isSpeaking = false;
-    this._state = "idle";
     this._audioEnergy = 0;
+    this.core.reset();
     this.controller.reset();
   }
   /** Disconnect all voice resources, frame sources, and dispose the controller. */
   async dispose() {
-    await this.disconnectVoice();
-    await this.disconnectSpeaker();
-    await this.disconnectListener();
+    await this.core.dispose();
     this.disconnectFrameSource();
     this.controller.dispose();
     logger2.debug("Disposed");
   }
 };
+// src/createAvatar.ts
+var THREE = __toESM(require("three"), 1);
+var import_GLTFLoader = require("three/addons/loaders/GLTFLoader.js");
+var import_OrbitControls = require("three/addons/controls/OrbitControls.js");
+async function createAvatar(config) {
+  const container = typeof config.container === "string" ? document.querySelector(config.container) : config.container;
+  if (!container) throw new Error(`Container not found: ${config.container}`);
+  const { clientWidth: w, clientHeight: h } = container;
+  const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
+  renderer.setSize(w, h);
+  renderer.setPixelRatio(Math.min(window.devicePixelRatio, 2));
+  renderer.toneMapping = THREE.ACESFilmicToneMapping;
+  container.appendChild(renderer.domElement);
+  const scene = new THREE.Scene();
+  const camera = new THREE.PerspectiveCamera(config.fov ?? 35, w / h, 0.1, 100);
+  camera.position.set(0, 1.5, 0.8);
+  scene.add(new THREE.AmbientLight(16777215, 0.5));
+  const dirLight = new THREE.DirectionalLight(16777215, 1);
+  dirLight.position.set(2, 3, 2);
+  scene.add(dirLight);
+  let controls = null;
+  if (config.controls !== false) {
+    controls = new import_OrbitControls.OrbitControls(camera, renderer.domElement);
+    controls.target.set(0, 1.5, 0);
+    controls.enableDamping = true;
+    controls.update();
+  }
+  const gltf = await new import_GLTFLoader.GLTFLoader().loadAsync(config.src);
+  scene.add(gltf.scene);
+  const avatar = new OmoteAvatar({
+    target: gltf.scene,
+    gaze: { enabled: true, smoothing: 0.08 }
+  });
+  const clock = new THREE.Clock();
+  let animId = 0;
+  function animate() {
+    animId = requestAnimationFrame(animate);
+    avatar.update(clock.getDelta(), camera);
+    controls?.update();
+    renderer.render(scene, camera);
+  }
+  animate();
+  const ro = new ResizeObserver(() => {
+    const { clientWidth: rw, clientHeight: rh } = container;
+    camera.aspect = rw / rh;
+    camera.updateProjectionMatrix();
+    renderer.setSize(rw, rh);
+  });
+  ro.observe(container);
+  return {
+    avatar,
+    scene,
+    camera,
+    renderer,
+    controls,
+    animations: gltf.animations ?? [],
+    dispose() {
+      cancelAnimationFrame(animId);
+      ro.disconnect();
+      controls?.dispose();
+      avatar.dispose();
+      renderer.dispose();
+      if (renderer.domElement.parentElement) {
+        renderer.domElement.parentElement.removeChild(renderer.domElement);
+      }
+    }
+  };
+}
 // src/BlendshapeController.ts
 var import_core3 = require("@omote/core");
 var BlendshapeController = class {