@omote/three 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -0
- package/dist/index.cjs +217 -39
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +85 -38
- package/dist/index.d.ts +85 -38
- package/dist/index.js +220 -39
- package/dist/index.js.map +1 -1
- package/package.json +8 -4
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as _omote_core from '@omote/core';
|
|
2
|
-
import { FaceCompositorConfig, EmotionWeights, ConversationalState,
|
|
2
|
+
import { FaceCompositorConfig, CharacterControllerConfig, FrameSource, TTSBackend, TTSSpeakerConfig, SpeechListenerConfig, TranscriptResult, VoiceOrchestratorConfig, EmotionWeights, ConversationalState, CharacterProfile, TTSSpeaker, SpeechListener } from '@omote/core';
|
|
3
|
+
export { FrameSource, TTSSpeakerConfig as TTSConfig } from '@omote/core';
|
|
3
4
|
import { Object3D, Camera, SkinnedMesh } from 'three';
|
|
4
5
|
|
|
5
6
|
/**
|
|
@@ -67,31 +68,22 @@ interface SceneDiscoveryResult {
|
|
|
67
68
|
*/
|
|
68
69
|
declare function discoverScene(scene: SceneObject): SceneDiscoveryResult;
|
|
69
70
|
|
|
70
|
-
/** Generic frame source — any object that emits 'frame' events with blendshapes. */
|
|
71
|
-
interface FrameSource {
|
|
72
|
-
on(event: 'frame', callback: (frame: {
|
|
73
|
-
blendshapes: Float32Array;
|
|
74
|
-
}) => void): void;
|
|
75
|
-
off?(event: 'frame', callback: (...args: any[]) => void): void;
|
|
76
|
-
}
|
|
77
71
|
interface OmoteAvatarOptions {
|
|
78
72
|
/** Three.js Object3D (loaded GLB scene, Group, etc.) to traverse for meshes and bones. */
|
|
79
73
|
target: Object3D;
|
|
80
74
|
/** FaceCompositor configuration (profile, emotion, life layer). */
|
|
81
75
|
compositor?: FaceCompositorConfig;
|
|
82
76
|
/** Gaze tracking configuration. */
|
|
83
|
-
gaze?:
|
|
84
|
-
enabled?: boolean;
|
|
85
|
-
yawInfluence?: number;
|
|
86
|
-
pitchInfluence?: number;
|
|
87
|
-
smoothing?: number;
|
|
88
|
-
};
|
|
77
|
+
gaze?: CharacterControllerConfig['gaze'];
|
|
89
78
|
}
|
|
90
79
|
declare class OmoteAvatar {
|
|
91
80
|
private readonly controller;
|
|
92
81
|
private readonly discovery;
|
|
93
82
|
private frameSourceCallback;
|
|
94
83
|
private connectedSource;
|
|
84
|
+
private ttsSpeaker;
|
|
85
|
+
private speechListener;
|
|
86
|
+
private voiceOrchestrator;
|
|
95
87
|
private currentBlendshapes;
|
|
96
88
|
private _emotion;
|
|
97
89
|
private _isSpeaking;
|
|
@@ -119,6 +111,70 @@ declare class OmoteAvatar {
|
|
|
119
111
|
connectFrameSource(source: FrameSource): void;
|
|
120
112
|
/** Disconnect the currently connected frame source. */
|
|
121
113
|
disconnectFrameSource(): void;
|
|
114
|
+
/**
|
|
115
|
+
* Connect a TTS backend for speak() / streamText() support.
|
|
116
|
+
* Loads LAM model and creates internal PlaybackPipeline.
|
|
117
|
+
*
|
|
118
|
+
* @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
|
|
119
|
+
* @param config - A2E, expression profile, and playback configuration
|
|
120
|
+
*/
|
|
121
|
+
connectSpeaker(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
122
|
+
/**
|
|
123
|
+
* Synthesize text and play with lip sync.
|
|
124
|
+
* Auto-aborts previous speak if still in progress.
|
|
125
|
+
*
|
|
126
|
+
* @param text - Text to synthesize
|
|
127
|
+
* @param options - Optional voice override and abort signal
|
|
128
|
+
*/
|
|
129
|
+
speak(text: string, options?: {
|
|
130
|
+
signal?: AbortSignal;
|
|
131
|
+
voice?: string;
|
|
132
|
+
}): Promise<void>;
|
|
133
|
+
/**
|
|
134
|
+
* Stream LLM tokens with sentence-buffered TTS + lip sync.
|
|
135
|
+
* Returns a sink: call push(token) for each token, end() when done.
|
|
136
|
+
*/
|
|
137
|
+
streamText(options?: {
|
|
138
|
+
signal?: AbortSignal;
|
|
139
|
+
voice?: string;
|
|
140
|
+
}): Promise<{
|
|
141
|
+
push: (token: string) => void;
|
|
142
|
+
end: () => Promise<void>;
|
|
143
|
+
}>;
|
|
144
|
+
/** Stop current TTS playback. */
|
|
145
|
+
stopSpeaking(): void;
|
|
146
|
+
/** Disconnect speaker and dispose its resources. */
|
|
147
|
+
disconnectSpeaker(): Promise<void>;
|
|
148
|
+
/** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
|
|
149
|
+
connectTTS(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
150
|
+
/** @deprecated Use disconnectSpeaker(). Will be removed in v1.0. */
|
|
151
|
+
disconnectTTS(): Promise<void>;
|
|
152
|
+
/**
|
|
153
|
+
* Connect a speech listener for startListening() / onTranscript() support.
|
|
154
|
+
* Loads ASR + VAD models.
|
|
155
|
+
*/
|
|
156
|
+
connectListener(config?: SpeechListenerConfig): Promise<void>;
|
|
157
|
+
/** Start listening for user speech. Requires connectListener() or connectVoice() first. */
|
|
158
|
+
startListening(): Promise<void>;
|
|
159
|
+
/** Stop listening. */
|
|
160
|
+
stopListening(): void;
|
|
161
|
+
/**
|
|
162
|
+
* Subscribe to transcript events. Returns an unsubscribe function.
|
|
163
|
+
* Requires connectListener() first.
|
|
164
|
+
*/
|
|
165
|
+
onTranscript(callback: (result: TranscriptResult) => void): () => void;
|
|
166
|
+
/** Disconnect listener and dispose its resources. */
|
|
167
|
+
disconnectListener(): Promise<void>;
|
|
168
|
+
/**
|
|
169
|
+
* Connect voice with automatic speaker + listener + interruption wiring.
|
|
170
|
+
* Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
|
|
171
|
+
* Does NOT auto-start listening — call startListening() when ready.
|
|
172
|
+
*
|
|
173
|
+
* Backward compatible: `mode` defaults to 'local' when not specified.
|
|
174
|
+
*/
|
|
175
|
+
connectVoice(config: VoiceOrchestratorConfig): Promise<void>;
|
|
176
|
+
/** Disconnect voice (speaker + listener + interruption). */
|
|
177
|
+
disconnectVoice(): Promise<void>;
|
|
122
178
|
/** Set raw blendshapes directly (alternative to connectFrameSource). */
|
|
123
179
|
setFrame(blendshapes: Float32Array): void;
|
|
124
180
|
/** Set the current emotion (string preset name or EmotionWeights object). */
|
|
@@ -129,6 +185,8 @@ declare class OmoteAvatar {
|
|
|
129
185
|
setState(state: ConversationalState): void;
|
|
130
186
|
/** Set audio energy level (0-1, drives emphasis/gesture intensity). */
|
|
131
187
|
setAudioEnergy(energy: number): void;
|
|
188
|
+
/** Update character expression profile at runtime. */
|
|
189
|
+
setProfile(profile: CharacterProfile): void;
|
|
132
190
|
/** Access the underlying FaceCompositor for advanced configuration. */
|
|
133
191
|
get compositor(): _omote_core.FaceCompositor;
|
|
134
192
|
/** Access discovered scene parts (meshes, bones). */
|
|
@@ -137,10 +195,20 @@ declare class OmoteAvatar {
|
|
|
137
195
|
get hasMorphTargets(): boolean;
|
|
138
196
|
/** Number of successfully mapped ARKit blendshapes. */
|
|
139
197
|
get mappedBlendshapeCount(): number;
|
|
198
|
+
/** Whether the avatar is currently speaking via TTS. */
|
|
199
|
+
get isSpeaking(): boolean;
|
|
200
|
+
/** Whether the avatar is currently listening for speech. */
|
|
201
|
+
get isListening(): boolean;
|
|
202
|
+
/** Current conversational state. */
|
|
203
|
+
get conversationalState(): ConversationalState;
|
|
204
|
+
/** Access the internal TTSSpeaker (null if not connected). */
|
|
205
|
+
get speaker(): TTSSpeaker | null;
|
|
206
|
+
/** Access the internal SpeechListener (null if not connected). */
|
|
207
|
+
get listener(): SpeechListener | null;
|
|
140
208
|
/** Reset all state (smoothing, life layer, emotions). */
|
|
141
209
|
reset(): void;
|
|
142
|
-
/** Disconnect frame sources and dispose the controller. */
|
|
143
|
-
dispose(): void
|
|
210
|
+
/** Disconnect all voice resources, frame sources, and dispose the controller. */
|
|
211
|
+
dispose(): Promise<void>;
|
|
144
212
|
}
|
|
145
213
|
|
|
146
214
|
/**
|
|
@@ -186,25 +254,4 @@ declare class BlendshapeController {
|
|
|
186
254
|
dispose(): void;
|
|
187
255
|
}
|
|
188
256
|
|
|
189
|
-
|
|
190
|
-
target: Object3D;
|
|
191
|
-
controllerOptions?: BlendshapeControllerOptions;
|
|
192
|
-
}
|
|
193
|
-
/**
|
|
194
|
-
* @deprecated Use {@link OmoteAvatar} instead. OmoteA2E will be removed in v0.8.0.
|
|
195
|
-
*/
|
|
196
|
-
declare class OmoteA2E {
|
|
197
|
-
private orchestrator;
|
|
198
|
-
private controller;
|
|
199
|
-
constructor(options: OmoteA2EOptions);
|
|
200
|
-
load(): Promise<void>;
|
|
201
|
-
start(): Promise<void>;
|
|
202
|
-
stop(): void;
|
|
203
|
-
update(): void;
|
|
204
|
-
dispose(): Promise<void>;
|
|
205
|
-
get isReady(): boolean;
|
|
206
|
-
get isStreaming(): boolean;
|
|
207
|
-
get backend(): string | null;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
export { BlendshapeController, type BlendshapeControllerOptions, type DiscoveredBone, type DiscoveredMesh, type FrameSource, type MorphIndexEntry, OmoteA2E, type OmoteA2EOptions, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, type SceneObject, discoverScene, writeBlendshapes };
|
|
257
|
+
export { BlendshapeController, type BlendshapeControllerOptions, type DiscoveredBone, type DiscoveredMesh, type MorphIndexEntry, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, type SceneObject, discoverScene, writeBlendshapes };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as _omote_core from '@omote/core';
|
|
2
|
-
import { FaceCompositorConfig, EmotionWeights, ConversationalState,
|
|
2
|
+
import { FaceCompositorConfig, CharacterControllerConfig, FrameSource, TTSBackend, TTSSpeakerConfig, SpeechListenerConfig, TranscriptResult, VoiceOrchestratorConfig, EmotionWeights, ConversationalState, CharacterProfile, TTSSpeaker, SpeechListener } from '@omote/core';
|
|
3
|
+
export { FrameSource, TTSSpeakerConfig as TTSConfig } from '@omote/core';
|
|
3
4
|
import { Object3D, Camera, SkinnedMesh } from 'three';
|
|
4
5
|
|
|
5
6
|
/**
|
|
@@ -67,31 +68,22 @@ interface SceneDiscoveryResult {
|
|
|
67
68
|
*/
|
|
68
69
|
declare function discoverScene(scene: SceneObject): SceneDiscoveryResult;
|
|
69
70
|
|
|
70
|
-
/** Generic frame source — any object that emits 'frame' events with blendshapes. */
|
|
71
|
-
interface FrameSource {
|
|
72
|
-
on(event: 'frame', callback: (frame: {
|
|
73
|
-
blendshapes: Float32Array;
|
|
74
|
-
}) => void): void;
|
|
75
|
-
off?(event: 'frame', callback: (...args: any[]) => void): void;
|
|
76
|
-
}
|
|
77
71
|
interface OmoteAvatarOptions {
|
|
78
72
|
/** Three.js Object3D (loaded GLB scene, Group, etc.) to traverse for meshes and bones. */
|
|
79
73
|
target: Object3D;
|
|
80
74
|
/** FaceCompositor configuration (profile, emotion, life layer). */
|
|
81
75
|
compositor?: FaceCompositorConfig;
|
|
82
76
|
/** Gaze tracking configuration. */
|
|
83
|
-
gaze?:
|
|
84
|
-
enabled?: boolean;
|
|
85
|
-
yawInfluence?: number;
|
|
86
|
-
pitchInfluence?: number;
|
|
87
|
-
smoothing?: number;
|
|
88
|
-
};
|
|
77
|
+
gaze?: CharacterControllerConfig['gaze'];
|
|
89
78
|
}
|
|
90
79
|
declare class OmoteAvatar {
|
|
91
80
|
private readonly controller;
|
|
92
81
|
private readonly discovery;
|
|
93
82
|
private frameSourceCallback;
|
|
94
83
|
private connectedSource;
|
|
84
|
+
private ttsSpeaker;
|
|
85
|
+
private speechListener;
|
|
86
|
+
private voiceOrchestrator;
|
|
95
87
|
private currentBlendshapes;
|
|
96
88
|
private _emotion;
|
|
97
89
|
private _isSpeaking;
|
|
@@ -119,6 +111,70 @@ declare class OmoteAvatar {
|
|
|
119
111
|
connectFrameSource(source: FrameSource): void;
|
|
120
112
|
/** Disconnect the currently connected frame source. */
|
|
121
113
|
disconnectFrameSource(): void;
|
|
114
|
+
/**
|
|
115
|
+
* Connect a TTS backend for speak() / streamText() support.
|
|
116
|
+
* Loads LAM model and creates internal PlaybackPipeline.
|
|
117
|
+
*
|
|
118
|
+
* @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
|
|
119
|
+
* @param config - A2E, expression profile, and playback configuration
|
|
120
|
+
*/
|
|
121
|
+
connectSpeaker(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
122
|
+
/**
|
|
123
|
+
* Synthesize text and play with lip sync.
|
|
124
|
+
* Auto-aborts previous speak if still in progress.
|
|
125
|
+
*
|
|
126
|
+
* @param text - Text to synthesize
|
|
127
|
+
* @param options - Optional voice override and abort signal
|
|
128
|
+
*/
|
|
129
|
+
speak(text: string, options?: {
|
|
130
|
+
signal?: AbortSignal;
|
|
131
|
+
voice?: string;
|
|
132
|
+
}): Promise<void>;
|
|
133
|
+
/**
|
|
134
|
+
* Stream LLM tokens with sentence-buffered TTS + lip sync.
|
|
135
|
+
* Returns a sink: call push(token) for each token, end() when done.
|
|
136
|
+
*/
|
|
137
|
+
streamText(options?: {
|
|
138
|
+
signal?: AbortSignal;
|
|
139
|
+
voice?: string;
|
|
140
|
+
}): Promise<{
|
|
141
|
+
push: (token: string) => void;
|
|
142
|
+
end: () => Promise<void>;
|
|
143
|
+
}>;
|
|
144
|
+
/** Stop current TTS playback. */
|
|
145
|
+
stopSpeaking(): void;
|
|
146
|
+
/** Disconnect speaker and dispose its resources. */
|
|
147
|
+
disconnectSpeaker(): Promise<void>;
|
|
148
|
+
/** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
|
|
149
|
+
connectTTS(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
150
|
+
/** @deprecated Use disconnectSpeaker(). Will be removed in v1.0. */
|
|
151
|
+
disconnectTTS(): Promise<void>;
|
|
152
|
+
/**
|
|
153
|
+
* Connect a speech listener for startListening() / onTranscript() support.
|
|
154
|
+
* Loads ASR + VAD models.
|
|
155
|
+
*/
|
|
156
|
+
connectListener(config?: SpeechListenerConfig): Promise<void>;
|
|
157
|
+
/** Start listening for user speech. Requires connectListener() or connectVoice() first. */
|
|
158
|
+
startListening(): Promise<void>;
|
|
159
|
+
/** Stop listening. */
|
|
160
|
+
stopListening(): void;
|
|
161
|
+
/**
|
|
162
|
+
* Subscribe to transcript events. Returns an unsubscribe function.
|
|
163
|
+
* Requires connectListener() first.
|
|
164
|
+
*/
|
|
165
|
+
onTranscript(callback: (result: TranscriptResult) => void): () => void;
|
|
166
|
+
/** Disconnect listener and dispose its resources. */
|
|
167
|
+
disconnectListener(): Promise<void>;
|
|
168
|
+
/**
|
|
169
|
+
* Connect voice with automatic speaker + listener + interruption wiring.
|
|
170
|
+
* Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
|
|
171
|
+
* Does NOT auto-start listening — call startListening() when ready.
|
|
172
|
+
*
|
|
173
|
+
* Backward compatible: `mode` defaults to 'local' when not specified.
|
|
174
|
+
*/
|
|
175
|
+
connectVoice(config: VoiceOrchestratorConfig): Promise<void>;
|
|
176
|
+
/** Disconnect voice (speaker + listener + interruption). */
|
|
177
|
+
disconnectVoice(): Promise<void>;
|
|
122
178
|
/** Set raw blendshapes directly (alternative to connectFrameSource). */
|
|
123
179
|
setFrame(blendshapes: Float32Array): void;
|
|
124
180
|
/** Set the current emotion (string preset name or EmotionWeights object). */
|
|
@@ -129,6 +185,8 @@ declare class OmoteAvatar {
|
|
|
129
185
|
setState(state: ConversationalState): void;
|
|
130
186
|
/** Set audio energy level (0-1, drives emphasis/gesture intensity). */
|
|
131
187
|
setAudioEnergy(energy: number): void;
|
|
188
|
+
/** Update character expression profile at runtime. */
|
|
189
|
+
setProfile(profile: CharacterProfile): void;
|
|
132
190
|
/** Access the underlying FaceCompositor for advanced configuration. */
|
|
133
191
|
get compositor(): _omote_core.FaceCompositor;
|
|
134
192
|
/** Access discovered scene parts (meshes, bones). */
|
|
@@ -137,10 +195,20 @@ declare class OmoteAvatar {
|
|
|
137
195
|
get hasMorphTargets(): boolean;
|
|
138
196
|
/** Number of successfully mapped ARKit blendshapes. */
|
|
139
197
|
get mappedBlendshapeCount(): number;
|
|
198
|
+
/** Whether the avatar is currently speaking via TTS. */
|
|
199
|
+
get isSpeaking(): boolean;
|
|
200
|
+
/** Whether the avatar is currently listening for speech. */
|
|
201
|
+
get isListening(): boolean;
|
|
202
|
+
/** Current conversational state. */
|
|
203
|
+
get conversationalState(): ConversationalState;
|
|
204
|
+
/** Access the internal TTSSpeaker (null if not connected). */
|
|
205
|
+
get speaker(): TTSSpeaker | null;
|
|
206
|
+
/** Access the internal SpeechListener (null if not connected). */
|
|
207
|
+
get listener(): SpeechListener | null;
|
|
140
208
|
/** Reset all state (smoothing, life layer, emotions). */
|
|
141
209
|
reset(): void;
|
|
142
|
-
/** Disconnect frame sources and dispose the controller. */
|
|
143
|
-
dispose(): void
|
|
210
|
+
/** Disconnect all voice resources, frame sources, and dispose the controller. */
|
|
211
|
+
dispose(): Promise<void>;
|
|
144
212
|
}
|
|
145
213
|
|
|
146
214
|
/**
|
|
@@ -186,25 +254,4 @@ declare class BlendshapeController {
|
|
|
186
254
|
dispose(): void;
|
|
187
255
|
}
|
|
188
256
|
|
|
189
|
-
|
|
190
|
-
target: Object3D;
|
|
191
|
-
controllerOptions?: BlendshapeControllerOptions;
|
|
192
|
-
}
|
|
193
|
-
/**
|
|
194
|
-
* @deprecated Use {@link OmoteAvatar} instead. OmoteA2E will be removed in v0.8.0.
|
|
195
|
-
*/
|
|
196
|
-
declare class OmoteA2E {
|
|
197
|
-
private orchestrator;
|
|
198
|
-
private controller;
|
|
199
|
-
constructor(options: OmoteA2EOptions);
|
|
200
|
-
load(): Promise<void>;
|
|
201
|
-
start(): Promise<void>;
|
|
202
|
-
stop(): void;
|
|
203
|
-
update(): void;
|
|
204
|
-
dispose(): Promise<void>;
|
|
205
|
-
get isReady(): boolean;
|
|
206
|
-
get isStreaming(): boolean;
|
|
207
|
-
get backend(): string | null;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
export { BlendshapeController, type BlendshapeControllerOptions, type DiscoveredBone, type DiscoveredMesh, type FrameSource, type MorphIndexEntry, OmoteA2E, type OmoteA2EOptions, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, type SceneObject, discoverScene, writeBlendshapes };
|
|
257
|
+
export { BlendshapeController, type BlendshapeControllerOptions, type DiscoveredBone, type DiscoveredMesh, type MorphIndexEntry, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, type SceneObject, discoverScene, writeBlendshapes };
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
// src/OmoteAvatar.ts
|
|
2
2
|
import {
|
|
3
3
|
CharacterController,
|
|
4
|
+
TTSSpeaker,
|
|
5
|
+
SpeechListener,
|
|
6
|
+
VoiceOrchestrator,
|
|
4
7
|
createLogger as createLogger2
|
|
5
8
|
} from "@omote/core";
|
|
6
9
|
import { Vector3, Quaternion } from "three";
|
|
@@ -106,6 +109,12 @@ var OmoteAvatar = class {
|
|
|
106
109
|
constructor(options) {
|
|
107
110
|
this.frameSourceCallback = null;
|
|
108
111
|
this.connectedSource = null;
|
|
112
|
+
// TTS integration
|
|
113
|
+
this.ttsSpeaker = null;
|
|
114
|
+
// Speech listener
|
|
115
|
+
this.speechListener = null;
|
|
116
|
+
// Voice orchestrator
|
|
117
|
+
this.voiceOrchestrator = null;
|
|
109
118
|
// State
|
|
110
119
|
this.currentBlendshapes = null;
|
|
111
120
|
this._emotion = null;
|
|
@@ -185,9 +194,15 @@ var OmoteAvatar = class {
|
|
|
185
194
|
* disconnects the previous one.
|
|
186
195
|
*/
|
|
187
196
|
connectFrameSource(source) {
|
|
197
|
+
if (this.ttsSpeaker && source !== this.ttsSpeaker.frameSource) {
|
|
198
|
+
this.ttsSpeaker.stop();
|
|
199
|
+
}
|
|
188
200
|
this.disconnectFrameSource();
|
|
189
201
|
this.frameSourceCallback = (frame) => {
|
|
190
202
|
this.currentBlendshapes = frame.blendshapes;
|
|
203
|
+
if (frame.emotion !== void 0) {
|
|
204
|
+
this._emotion = frame.emotion;
|
|
205
|
+
}
|
|
191
206
|
};
|
|
192
207
|
source.on("frame", this.frameSourceCallback);
|
|
193
208
|
this.connectedSource = source;
|
|
@@ -205,6 +220,182 @@ var OmoteAvatar = class {
|
|
|
205
220
|
this.frameSourceCallback = null;
|
|
206
221
|
}
|
|
207
222
|
// -------------------------------------------------------------------------
|
|
223
|
+
// Speaker (TTS → lip sync)
|
|
224
|
+
// -------------------------------------------------------------------------
|
|
225
|
+
/**
|
|
226
|
+
* Connect a TTS backend for speak() / streamText() support.
|
|
227
|
+
* Loads LAM model and creates internal PlaybackPipeline.
|
|
228
|
+
*
|
|
229
|
+
* @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
|
|
230
|
+
* @param config - A2E, expression profile, and playback configuration
|
|
231
|
+
*/
|
|
232
|
+
async connectSpeaker(tts, config) {
|
|
233
|
+
await this.disconnectSpeaker();
|
|
234
|
+
this.ttsSpeaker = new TTSSpeaker();
|
|
235
|
+
await this.ttsSpeaker.connect(tts, config);
|
|
236
|
+
this.connectFrameSource(this.ttsSpeaker.frameSource);
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Synthesize text and play with lip sync.
|
|
240
|
+
* Auto-aborts previous speak if still in progress.
|
|
241
|
+
*
|
|
242
|
+
* @param text - Text to synthesize
|
|
243
|
+
* @param options - Optional voice override and abort signal
|
|
244
|
+
*/
|
|
245
|
+
async speak(text, options) {
|
|
246
|
+
if (this.voiceOrchestrator) {
|
|
247
|
+
await this.voiceOrchestrator.speak(text, options);
|
|
248
|
+
return;
|
|
249
|
+
}
|
|
250
|
+
if (!this.ttsSpeaker) {
|
|
251
|
+
throw new Error("No speaker connected. Call connectSpeaker() first.");
|
|
252
|
+
}
|
|
253
|
+
this._isSpeaking = true;
|
|
254
|
+
this._state = "speaking";
|
|
255
|
+
try {
|
|
256
|
+
await this.ttsSpeaker.speak(text, options);
|
|
257
|
+
} finally {
|
|
258
|
+
this._isSpeaking = false;
|
|
259
|
+
if (this._state === "speaking") {
|
|
260
|
+
this._state = "idle";
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
/**
|
|
265
|
+
* Stream LLM tokens with sentence-buffered TTS + lip sync.
|
|
266
|
+
* Returns a sink: call push(token) for each token, end() when done.
|
|
267
|
+
*/
|
|
268
|
+
async streamText(options) {
|
|
269
|
+
if (this.voiceOrchestrator) {
|
|
270
|
+
return this.voiceOrchestrator.streamText(options);
|
|
271
|
+
}
|
|
272
|
+
if (!this.ttsSpeaker) {
|
|
273
|
+
throw new Error("No speaker connected. Call connectSpeaker() first.");
|
|
274
|
+
}
|
|
275
|
+
this._isSpeaking = true;
|
|
276
|
+
this._state = "speaking";
|
|
277
|
+
const stream = await this.ttsSpeaker.streamText(options ?? {});
|
|
278
|
+
return {
|
|
279
|
+
push: stream.push,
|
|
280
|
+
end: async () => {
|
|
281
|
+
try {
|
|
282
|
+
await stream.end();
|
|
283
|
+
} finally {
|
|
284
|
+
this._isSpeaking = false;
|
|
285
|
+
if (this._state === "speaking") this._state = "idle";
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
};
|
|
289
|
+
}
|
|
290
|
+
/** Stop current TTS playback. */
|
|
291
|
+
stopSpeaking() {
|
|
292
|
+
if (this.voiceOrchestrator) {
|
|
293
|
+
this.voiceOrchestrator.stopSpeaking();
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
this.ttsSpeaker?.stop();
|
|
297
|
+
}
|
|
298
|
+
/** Disconnect speaker and dispose its resources. */
|
|
299
|
+
async disconnectSpeaker() {
|
|
300
|
+
if (this.ttsSpeaker) {
|
|
301
|
+
this.disconnectFrameSource();
|
|
302
|
+
await this.ttsSpeaker.dispose();
|
|
303
|
+
this.ttsSpeaker = null;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
/** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
|
|
307
|
+
async connectTTS(tts, config) {
|
|
308
|
+
return this.connectSpeaker(tts, config);
|
|
309
|
+
}
|
|
310
|
+
/** @deprecated Use disconnectSpeaker(). Will be removed in v1.0. */
|
|
311
|
+
async disconnectTTS() {
|
|
312
|
+
return this.disconnectSpeaker();
|
|
313
|
+
}
|
|
314
|
+
// -------------------------------------------------------------------------
|
|
315
|
+
// Listener (mic → VAD → ASR → transcript)
|
|
316
|
+
// -------------------------------------------------------------------------
|
|
317
|
+
/**
|
|
318
|
+
* Connect a speech listener for startListening() / onTranscript() support.
|
|
319
|
+
* Loads ASR + VAD models.
|
|
320
|
+
*/
|
|
321
|
+
async connectListener(config) {
|
|
322
|
+
await this.disconnectListener();
|
|
323
|
+
this.speechListener = new SpeechListener(config);
|
|
324
|
+
await this.speechListener.loadModels();
|
|
325
|
+
}
|
|
326
|
+
/** Start listening for user speech. Requires connectListener() or connectVoice() first. */
|
|
327
|
+
async startListening() {
|
|
328
|
+
if (this.voiceOrchestrator) {
|
|
329
|
+
await this.voiceOrchestrator.startListening();
|
|
330
|
+
return;
|
|
331
|
+
}
|
|
332
|
+
if (!this.speechListener) {
|
|
333
|
+
throw new Error("No listener connected. Call connectListener() first.");
|
|
334
|
+
}
|
|
335
|
+
this._state = "listening";
|
|
336
|
+
await this.speechListener.start();
|
|
337
|
+
}
|
|
338
|
+
/** Stop listening. */
|
|
339
|
+
stopListening() {
|
|
340
|
+
if (this.voiceOrchestrator) {
|
|
341
|
+
this.voiceOrchestrator.stopListening();
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
this.speechListener?.stop();
|
|
345
|
+
if (this._state === "listening") this._state = "idle";
|
|
346
|
+
}
|
|
347
|
+
/**
|
|
348
|
+
* Subscribe to transcript events. Returns an unsubscribe function.
|
|
349
|
+
* Requires connectListener() first.
|
|
350
|
+
*/
|
|
351
|
+
onTranscript(callback) {
|
|
352
|
+
const listener = this.speechListener ?? this.voiceOrchestrator?.listener;
|
|
353
|
+
if (!listener) {
|
|
354
|
+
throw new Error("No listener connected. Call connectListener() or connectVoice() first.");
|
|
355
|
+
}
|
|
356
|
+
listener.on("transcript", callback);
|
|
357
|
+
return () => {
|
|
358
|
+
listener.off?.("transcript", callback);
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
/** Disconnect listener and dispose its resources. */
|
|
362
|
+
async disconnectListener() {
|
|
363
|
+
if (this.speechListener) {
|
|
364
|
+
await this.speechListener.dispose();
|
|
365
|
+
this.speechListener = null;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
// -------------------------------------------------------------------------
|
|
369
|
+
// Voice (combined speaker + listener + interruption)
|
|
370
|
+
// -------------------------------------------------------------------------
|
|
371
|
+
/**
|
|
372
|
+
* Connect voice with automatic speaker + listener + interruption wiring.
|
|
373
|
+
* Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
|
|
374
|
+
* Does NOT auto-start listening — call startListening() when ready.
|
|
375
|
+
*
|
|
376
|
+
* Backward compatible: `mode` defaults to 'local' when not specified.
|
|
377
|
+
*/
|
|
378
|
+
async connectVoice(config) {
|
|
379
|
+
await this.disconnectVoice();
|
|
380
|
+
this.voiceOrchestrator = new VoiceOrchestrator();
|
|
381
|
+
await this.voiceOrchestrator.connect(config);
|
|
382
|
+
if (this.voiceOrchestrator.frameSource) {
|
|
383
|
+
this.connectFrameSource(this.voiceOrchestrator.frameSource);
|
|
384
|
+
}
|
|
385
|
+
this.voiceOrchestrator.on("state", (state) => {
|
|
386
|
+
this._state = state;
|
|
387
|
+
this._isSpeaking = state === "speaking";
|
|
388
|
+
});
|
|
389
|
+
}
|
|
390
|
+
/** Disconnect voice (speaker + listener + interruption). */
|
|
391
|
+
async disconnectVoice() {
|
|
392
|
+
if (this.voiceOrchestrator) {
|
|
393
|
+
this.disconnectFrameSource();
|
|
394
|
+
await this.voiceOrchestrator.disconnect();
|
|
395
|
+
this.voiceOrchestrator = null;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
// -------------------------------------------------------------------------
|
|
208
399
|
// State setters
|
|
209
400
|
// -------------------------------------------------------------------------
|
|
210
401
|
/** Set raw blendshapes directly (alternative to connectFrameSource). */
|
|
@@ -227,6 +418,10 @@ var OmoteAvatar = class {
|
|
|
227
418
|
setAudioEnergy(energy) {
|
|
228
419
|
this._audioEnergy = energy;
|
|
229
420
|
}
|
|
421
|
+
/** Update character expression profile at runtime. */
|
|
422
|
+
setProfile(profile) {
|
|
423
|
+
this.controller.setProfile(profile);
|
|
424
|
+
}
|
|
230
425
|
// -------------------------------------------------------------------------
|
|
231
426
|
// Accessors
|
|
232
427
|
// -------------------------------------------------------------------------
|
|
@@ -246,6 +441,26 @@ var OmoteAvatar = class {
|
|
|
246
441
|
get mappedBlendshapeCount() {
|
|
247
442
|
return this.discovery.mappedBlendshapeCount;
|
|
248
443
|
}
|
|
444
|
+
/** Whether the avatar is currently speaking via TTS. */
|
|
445
|
+
get isSpeaking() {
|
|
446
|
+
return this._isSpeaking;
|
|
447
|
+
}
|
|
448
|
+
/** Whether the avatar is currently listening for speech. */
|
|
449
|
+
get isListening() {
|
|
450
|
+
return this._state === "listening";
|
|
451
|
+
}
|
|
452
|
+
/** Current conversational state. */
|
|
453
|
+
get conversationalState() {
|
|
454
|
+
return this._state;
|
|
455
|
+
}
|
|
456
|
+
/** Access the internal TTSSpeaker (null if not connected). */
|
|
457
|
+
get speaker() {
|
|
458
|
+
return this.ttsSpeaker ?? this.voiceOrchestrator?.speaker ?? null;
|
|
459
|
+
}
|
|
460
|
+
/** Access the internal SpeechListener (null if not connected). */
|
|
461
|
+
get listener() {
|
|
462
|
+
return this.speechListener ?? this.voiceOrchestrator?.listener ?? null;
|
|
463
|
+
}
|
|
249
464
|
// -------------------------------------------------------------------------
|
|
250
465
|
// Lifecycle
|
|
251
466
|
// -------------------------------------------------------------------------
|
|
@@ -258,8 +473,11 @@ var OmoteAvatar = class {
|
|
|
258
473
|
this._audioEnergy = 0;
|
|
259
474
|
this.controller.reset();
|
|
260
475
|
}
|
|
261
|
-
/** Disconnect frame sources and dispose the controller. */
|
|
262
|
-
dispose() {
|
|
476
|
+
/** Disconnect all voice resources, frame sources, and dispose the controller. */
|
|
477
|
+
async dispose() {
|
|
478
|
+
await this.disconnectVoice();
|
|
479
|
+
await this.disconnectSpeaker();
|
|
480
|
+
await this.disconnectListener();
|
|
263
481
|
this.disconnectFrameSource();
|
|
264
482
|
this.controller.dispose();
|
|
265
483
|
logger2.debug("Disposed");
|
|
@@ -327,45 +545,8 @@ var BlendshapeController = class {
|
|
|
327
545
|
this.currentWeights = [];
|
|
328
546
|
}
|
|
329
547
|
};
|
|
330
|
-
|
|
331
|
-
// src/OmoteA2E.ts
|
|
332
|
-
import { A2EOrchestrator } from "@omote/core";
|
|
333
|
-
var OmoteA2E = class {
|
|
334
|
-
constructor(options) {
|
|
335
|
-
const { target, controllerOptions, ...orchestratorConfig } = options;
|
|
336
|
-
this.controller = new BlendshapeController(target, controllerOptions);
|
|
337
|
-
this.orchestrator = new A2EOrchestrator(orchestratorConfig);
|
|
338
|
-
}
|
|
339
|
-
async load() {
|
|
340
|
-
return this.orchestrator.load();
|
|
341
|
-
}
|
|
342
|
-
async start() {
|
|
343
|
-
return this.orchestrator.start();
|
|
344
|
-
}
|
|
345
|
-
stop() {
|
|
346
|
-
this.orchestrator.stop();
|
|
347
|
-
}
|
|
348
|
-
update() {
|
|
349
|
-
const w = this.orchestrator.latestWeights;
|
|
350
|
-
if (w) this.controller.update(w);
|
|
351
|
-
}
|
|
352
|
-
async dispose() {
|
|
353
|
-
await this.orchestrator.dispose();
|
|
354
|
-
this.controller.dispose();
|
|
355
|
-
}
|
|
356
|
-
get isReady() {
|
|
357
|
-
return this.orchestrator.isReady;
|
|
358
|
-
}
|
|
359
|
-
get isStreaming() {
|
|
360
|
-
return this.orchestrator.isStreaming;
|
|
361
|
-
}
|
|
362
|
-
get backend() {
|
|
363
|
-
return this.orchestrator.backend;
|
|
364
|
-
}
|
|
365
|
-
};
|
|
366
548
|
export {
|
|
367
549
|
BlendshapeController,
|
|
368
|
-
OmoteA2E,
|
|
369
550
|
OmoteAvatar,
|
|
370
551
|
discoverScene,
|
|
371
552
|
writeBlendshapes
|