@omote/babylon 0.2.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -0
- package/dist/index.cjs +224 -42
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +86 -31
- package/dist/index.d.ts +86 -31
- package/dist/index.js +232 -43
- package/dist/index.js.map +1 -1
- package/package.json +51 -45
package/dist/index.d.cts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as _omote_core from '@omote/core';
|
|
2
|
-
import { FaceCompositorConfig, CharacterControllerConfig, EmotionWeights, ConversationalState,
|
|
2
|
+
import { FaceCompositorConfig, CharacterControllerConfig, FrameSource, TTSBackend, TTSSpeakerConfig, SpeechListenerConfig, TranscriptResult, VoiceOrchestratorConfig, EmotionWeights, ConversationalState, CharacterProfile, TTSSpeaker, SpeechListener } from '@omote/core';
|
|
3
|
+
export { FrameSource, TTSSpeakerConfig as TTSConfig } from '@omote/core';
|
|
3
4
|
import { AbstractMesh, TransformNode, Scene, Camera } from '@babylonjs/core';
|
|
4
5
|
|
|
5
6
|
/**
|
|
@@ -47,13 +48,6 @@ interface SceneDiscoveryResult {
|
|
|
47
48
|
*/
|
|
48
49
|
declare function discoverScene(root: AbstractMesh): SceneDiscoveryResult;
|
|
49
50
|
|
|
50
|
-
/** Generic frame source -- any object that emits 'frame' events */
|
|
51
|
-
interface FrameSource {
|
|
52
|
-
on(event: 'frame', callback: (frame: {
|
|
53
|
-
blendshapes: Float32Array;
|
|
54
|
-
}) => void): void;
|
|
55
|
-
off?(event: 'frame', callback: (...args: any[]) => void): void;
|
|
56
|
-
}
|
|
57
51
|
interface OmoteAvatarOptions {
|
|
58
52
|
/** Root mesh of the avatar (typically loaded via SceneLoader) */
|
|
59
53
|
target: AbstractMesh;
|
|
@@ -82,6 +76,9 @@ declare class OmoteAvatar {
|
|
|
82
76
|
private _camera;
|
|
83
77
|
private frameSourceCallback;
|
|
84
78
|
private connectedSource;
|
|
79
|
+
private ttsSpeaker;
|
|
80
|
+
private speechListener;
|
|
81
|
+
private voiceOrchestrator;
|
|
85
82
|
private renderCallback;
|
|
86
83
|
private lastTime;
|
|
87
84
|
constructor(options: OmoteAvatarOptions);
|
|
@@ -107,6 +104,72 @@ declare class OmoteAvatar {
|
|
|
107
104
|
connectFrameSource(source: FrameSource): void;
|
|
108
105
|
/** Disconnect the current frame source (if any). */
|
|
109
106
|
disconnectFrameSource(): void;
|
|
107
|
+
/**
|
|
108
|
+
* Connect a TTS backend for speak() / streamText() support.
|
|
109
|
+
* Loads LAM model and creates internal PlaybackPipeline.
|
|
110
|
+
*
|
|
111
|
+
* @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
|
|
112
|
+
* @param config - A2E, expression profile, and playback configuration
|
|
113
|
+
*/
|
|
114
|
+
/** Warm up AudioContext for iOS/Safari autoplay policy. Call from user gesture. */
|
|
115
|
+
warmup(): Promise<void>;
|
|
116
|
+
connectSpeaker(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
117
|
+
/**
|
|
118
|
+
* Synthesize text and play with lip sync.
|
|
119
|
+
* Auto-aborts previous speak if still in progress.
|
|
120
|
+
*
|
|
121
|
+
* @param text - Text to synthesize
|
|
122
|
+
* @param options - Optional voice override and abort signal
|
|
123
|
+
*/
|
|
124
|
+
speak(text: string, options?: {
|
|
125
|
+
signal?: AbortSignal;
|
|
126
|
+
voice?: string;
|
|
127
|
+
}): Promise<void>;
|
|
128
|
+
/**
|
|
129
|
+
* Stream LLM tokens with sentence-buffered TTS + lip sync.
|
|
130
|
+
* Returns a sink: call push(token) for each token, end() when done.
|
|
131
|
+
*/
|
|
132
|
+
streamText(options?: {
|
|
133
|
+
signal?: AbortSignal;
|
|
134
|
+
voice?: string;
|
|
135
|
+
}): Promise<{
|
|
136
|
+
push: (token: string) => void;
|
|
137
|
+
end: () => Promise<void>;
|
|
138
|
+
}>;
|
|
139
|
+
/** Stop current TTS playback. */
|
|
140
|
+
stopSpeaking(): void;
|
|
141
|
+
/** Disconnect speaker and dispose its resources. */
|
|
142
|
+
disconnectSpeaker(): Promise<void>;
|
|
143
|
+
/** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
|
|
144
|
+
connectTTS(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
145
|
+
/** @deprecated Use disconnectSpeaker(). Will be removed in v1.0. */
|
|
146
|
+
disconnectTTS(): Promise<void>;
|
|
147
|
+
/**
|
|
148
|
+
* Connect a speech listener for startListening() / onTranscript() support.
|
|
149
|
+
* Loads ASR + VAD models.
|
|
150
|
+
*/
|
|
151
|
+
connectListener(config?: SpeechListenerConfig): Promise<void>;
|
|
152
|
+
/** Start listening for user speech. Requires connectListener() or connectVoice() first. */
|
|
153
|
+
startListening(): Promise<void>;
|
|
154
|
+
/** Stop listening. */
|
|
155
|
+
stopListening(): void;
|
|
156
|
+
/**
|
|
157
|
+
* Subscribe to transcript events. Returns an unsubscribe function.
|
|
158
|
+
* Requires connectListener() first.
|
|
159
|
+
*/
|
|
160
|
+
onTranscript(callback: (result: TranscriptResult) => void): () => void;
|
|
161
|
+
/** Disconnect listener and dispose its resources. */
|
|
162
|
+
disconnectListener(): Promise<void>;
|
|
163
|
+
/**
|
|
164
|
+
* Connect voice with automatic speaker + listener + interruption wiring.
|
|
165
|
+
* Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
|
|
166
|
+
* Does NOT auto-start listening — call startListening() when ready.
|
|
167
|
+
*
|
|
168
|
+
* Backward compatible: `mode` defaults to 'local' when not specified.
|
|
169
|
+
*/
|
|
170
|
+
connectVoice(config: VoiceOrchestratorConfig): Promise<void>;
|
|
171
|
+
/** Disconnect voice (speaker + listener + interruption). */
|
|
172
|
+
disconnectVoice(): Promise<void>;
|
|
110
173
|
/** Set blendshapes directly (alternative to connectFrameSource). */
|
|
111
174
|
setFrame(blendshapes: Float32Array): void;
|
|
112
175
|
/** Set emotion (string preset like 'happy' or EmotionWeights object). */
|
|
@@ -117,6 +180,8 @@ declare class OmoteAvatar {
|
|
|
117
180
|
setState(state: ConversationalState): void;
|
|
118
181
|
/** Set audio energy level (0-1, drives emphasis/gesture intensity). */
|
|
119
182
|
setAudioEnergy(energy: number): void;
|
|
183
|
+
/** Update character expression profile at runtime. */
|
|
184
|
+
setProfile(profile: CharacterProfile): void;
|
|
120
185
|
/**
|
|
121
186
|
* Set the active camera for gaze tracking.
|
|
122
187
|
* Required when using autoUpdate. Can also be passed directly to update().
|
|
@@ -130,10 +195,20 @@ declare class OmoteAvatar {
|
|
|
130
195
|
get hasMorphTargets(): boolean;
|
|
131
196
|
/** Number of successfully mapped ARKit blendshapes. */
|
|
132
197
|
get mappedBlendshapeCount(): number;
|
|
198
|
+
/** Whether the avatar is currently speaking via TTS. */
|
|
199
|
+
get isSpeaking(): boolean;
|
|
200
|
+
/** Whether the avatar is currently listening for speech. */
|
|
201
|
+
get isListening(): boolean;
|
|
202
|
+
/** Current conversational state. */
|
|
203
|
+
get conversationalState(): ConversationalState;
|
|
204
|
+
/** Access the internal TTSSpeaker (null if not connected). */
|
|
205
|
+
get speaker(): TTSSpeaker | null;
|
|
206
|
+
/** Access the internal SpeechListener (null if not connected). */
|
|
207
|
+
get listener(): SpeechListener | null;
|
|
133
208
|
/** Reset all state (smoothing, life layer, emotions). */
|
|
134
209
|
reset(): void;
|
|
135
|
-
/**
|
|
136
|
-
dispose(): void
|
|
210
|
+
/** Disconnect all voice resources, frame sources, unregister render loop, dispose controller. */
|
|
211
|
+
dispose(): Promise<void>;
|
|
137
212
|
private registerAutoUpdate;
|
|
138
213
|
}
|
|
139
214
|
|
|
@@ -191,24 +266,4 @@ declare class BlendshapeController {
|
|
|
191
266
|
dispose(): void;
|
|
192
267
|
}
|
|
193
268
|
|
|
194
|
-
|
|
195
|
-
target: AbstractMesh;
|
|
196
|
-
scene: Scene;
|
|
197
|
-
controllerOptions?: BlendshapeControllerOptions;
|
|
198
|
-
}
|
|
199
|
-
/** @deprecated Use {@link OmoteAvatar} instead. OmoteA2E will be removed in v0.8.0. */
|
|
200
|
-
declare class OmoteA2E {
|
|
201
|
-
private orchestrator;
|
|
202
|
-
private controller;
|
|
203
|
-
constructor(options: OmoteA2EOptions);
|
|
204
|
-
load(): Promise<void>;
|
|
205
|
-
start(): Promise<void>;
|
|
206
|
-
stop(): void;
|
|
207
|
-
update(): void;
|
|
208
|
-
dispose(): Promise<void>;
|
|
209
|
-
get isReady(): boolean;
|
|
210
|
-
get isStreaming(): boolean;
|
|
211
|
-
get backend(): string | null;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
export { BlendshapeController, type BlendshapeControllerOptions, type FrameSource, type MorphIndexEntry, OmoteA2E, type OmoteA2EOptions, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, discoverScene, writeBlendshapes };
|
|
269
|
+
export { BlendshapeController, type BlendshapeControllerOptions, type MorphIndexEntry, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, discoverScene, writeBlendshapes };
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import * as _omote_core from '@omote/core';
|
|
2
|
-
import { FaceCompositorConfig, CharacterControllerConfig, EmotionWeights, ConversationalState,
|
|
2
|
+
import { FaceCompositorConfig, CharacterControllerConfig, FrameSource, TTSBackend, TTSSpeakerConfig, SpeechListenerConfig, TranscriptResult, VoiceOrchestratorConfig, EmotionWeights, ConversationalState, CharacterProfile, TTSSpeaker, SpeechListener } from '@omote/core';
|
|
3
|
+
export { FrameSource, TTSSpeakerConfig as TTSConfig } from '@omote/core';
|
|
3
4
|
import { AbstractMesh, TransformNode, Scene, Camera } from '@babylonjs/core';
|
|
4
5
|
|
|
5
6
|
/**
|
|
@@ -47,13 +48,6 @@ interface SceneDiscoveryResult {
|
|
|
47
48
|
*/
|
|
48
49
|
declare function discoverScene(root: AbstractMesh): SceneDiscoveryResult;
|
|
49
50
|
|
|
50
|
-
/** Generic frame source -- any object that emits 'frame' events */
|
|
51
|
-
interface FrameSource {
|
|
52
|
-
on(event: 'frame', callback: (frame: {
|
|
53
|
-
blendshapes: Float32Array;
|
|
54
|
-
}) => void): void;
|
|
55
|
-
off?(event: 'frame', callback: (...args: any[]) => void): void;
|
|
56
|
-
}
|
|
57
51
|
interface OmoteAvatarOptions {
|
|
58
52
|
/** Root mesh of the avatar (typically loaded via SceneLoader) */
|
|
59
53
|
target: AbstractMesh;
|
|
@@ -82,6 +76,9 @@ declare class OmoteAvatar {
|
|
|
82
76
|
private _camera;
|
|
83
77
|
private frameSourceCallback;
|
|
84
78
|
private connectedSource;
|
|
79
|
+
private ttsSpeaker;
|
|
80
|
+
private speechListener;
|
|
81
|
+
private voiceOrchestrator;
|
|
85
82
|
private renderCallback;
|
|
86
83
|
private lastTime;
|
|
87
84
|
constructor(options: OmoteAvatarOptions);
|
|
@@ -107,6 +104,72 @@ declare class OmoteAvatar {
|
|
|
107
104
|
connectFrameSource(source: FrameSource): void;
|
|
108
105
|
/** Disconnect the current frame source (if any). */
|
|
109
106
|
disconnectFrameSource(): void;
|
|
107
|
+
/**
|
|
108
|
+
* Connect a TTS backend for speak() / streamText() support.
|
|
109
|
+
* Loads LAM model and creates internal PlaybackPipeline.
|
|
110
|
+
*
|
|
111
|
+
* @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
|
|
112
|
+
* @param config - A2E, expression profile, and playback configuration
|
|
113
|
+
*/
|
|
114
|
+
/** Warm up AudioContext for iOS/Safari autoplay policy. Call from user gesture. */
|
|
115
|
+
warmup(): Promise<void>;
|
|
116
|
+
connectSpeaker(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
117
|
+
/**
|
|
118
|
+
* Synthesize text and play with lip sync.
|
|
119
|
+
* Auto-aborts previous speak if still in progress.
|
|
120
|
+
*
|
|
121
|
+
* @param text - Text to synthesize
|
|
122
|
+
* @param options - Optional voice override and abort signal
|
|
123
|
+
*/
|
|
124
|
+
speak(text: string, options?: {
|
|
125
|
+
signal?: AbortSignal;
|
|
126
|
+
voice?: string;
|
|
127
|
+
}): Promise<void>;
|
|
128
|
+
/**
|
|
129
|
+
* Stream LLM tokens with sentence-buffered TTS + lip sync.
|
|
130
|
+
* Returns a sink: call push(token) for each token, end() when done.
|
|
131
|
+
*/
|
|
132
|
+
streamText(options?: {
|
|
133
|
+
signal?: AbortSignal;
|
|
134
|
+
voice?: string;
|
|
135
|
+
}): Promise<{
|
|
136
|
+
push: (token: string) => void;
|
|
137
|
+
end: () => Promise<void>;
|
|
138
|
+
}>;
|
|
139
|
+
/** Stop current TTS playback. */
|
|
140
|
+
stopSpeaking(): void;
|
|
141
|
+
/** Disconnect speaker and dispose its resources. */
|
|
142
|
+
disconnectSpeaker(): Promise<void>;
|
|
143
|
+
/** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
|
|
144
|
+
connectTTS(tts: TTSBackend, config?: TTSSpeakerConfig): Promise<void>;
|
|
145
|
+
/** @deprecated Use disconnectSpeaker(). Will be removed in v1.0. */
|
|
146
|
+
disconnectTTS(): Promise<void>;
|
|
147
|
+
/**
|
|
148
|
+
* Connect a speech listener for startListening() / onTranscript() support.
|
|
149
|
+
* Loads ASR + VAD models.
|
|
150
|
+
*/
|
|
151
|
+
connectListener(config?: SpeechListenerConfig): Promise<void>;
|
|
152
|
+
/** Start listening for user speech. Requires connectListener() or connectVoice() first. */
|
|
153
|
+
startListening(): Promise<void>;
|
|
154
|
+
/** Stop listening. */
|
|
155
|
+
stopListening(): void;
|
|
156
|
+
/**
|
|
157
|
+
* Subscribe to transcript events. Returns an unsubscribe function.
|
|
158
|
+
* Requires connectListener() first.
|
|
159
|
+
*/
|
|
160
|
+
onTranscript(callback: (result: TranscriptResult) => void): () => void;
|
|
161
|
+
/** Disconnect listener and dispose its resources. */
|
|
162
|
+
disconnectListener(): Promise<void>;
|
|
163
|
+
/**
|
|
164
|
+
* Connect voice with automatic speaker + listener + interruption wiring.
|
|
165
|
+
* Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
|
|
166
|
+
* Does NOT auto-start listening — call startListening() when ready.
|
|
167
|
+
*
|
|
168
|
+
* Backward compatible: `mode` defaults to 'local' when not specified.
|
|
169
|
+
*/
|
|
170
|
+
connectVoice(config: VoiceOrchestratorConfig): Promise<void>;
|
|
171
|
+
/** Disconnect voice (speaker + listener + interruption). */
|
|
172
|
+
disconnectVoice(): Promise<void>;
|
|
110
173
|
/** Set blendshapes directly (alternative to connectFrameSource). */
|
|
111
174
|
setFrame(blendshapes: Float32Array): void;
|
|
112
175
|
/** Set emotion (string preset like 'happy' or EmotionWeights object). */
|
|
@@ -117,6 +180,8 @@ declare class OmoteAvatar {
|
|
|
117
180
|
setState(state: ConversationalState): void;
|
|
118
181
|
/** Set audio energy level (0-1, drives emphasis/gesture intensity). */
|
|
119
182
|
setAudioEnergy(energy: number): void;
|
|
183
|
+
/** Update character expression profile at runtime. */
|
|
184
|
+
setProfile(profile: CharacterProfile): void;
|
|
120
185
|
/**
|
|
121
186
|
* Set the active camera for gaze tracking.
|
|
122
187
|
* Required when using autoUpdate. Can also be passed directly to update().
|
|
@@ -130,10 +195,20 @@ declare class OmoteAvatar {
|
|
|
130
195
|
get hasMorphTargets(): boolean;
|
|
131
196
|
/** Number of successfully mapped ARKit blendshapes. */
|
|
132
197
|
get mappedBlendshapeCount(): number;
|
|
198
|
+
/** Whether the avatar is currently speaking via TTS. */
|
|
199
|
+
get isSpeaking(): boolean;
|
|
200
|
+
/** Whether the avatar is currently listening for speech. */
|
|
201
|
+
get isListening(): boolean;
|
|
202
|
+
/** Current conversational state. */
|
|
203
|
+
get conversationalState(): ConversationalState;
|
|
204
|
+
/** Access the internal TTSSpeaker (null if not connected). */
|
|
205
|
+
get speaker(): TTSSpeaker | null;
|
|
206
|
+
/** Access the internal SpeechListener (null if not connected). */
|
|
207
|
+
get listener(): SpeechListener | null;
|
|
133
208
|
/** Reset all state (smoothing, life layer, emotions). */
|
|
134
209
|
reset(): void;
|
|
135
|
-
/**
|
|
136
|
-
dispose(): void
|
|
210
|
+
/** Disconnect all voice resources, frame sources, unregister render loop, dispose controller. */
|
|
211
|
+
dispose(): Promise<void>;
|
|
137
212
|
private registerAutoUpdate;
|
|
138
213
|
}
|
|
139
214
|
|
|
@@ -191,24 +266,4 @@ declare class BlendshapeController {
|
|
|
191
266
|
dispose(): void;
|
|
192
267
|
}
|
|
193
268
|
|
|
194
|
-
|
|
195
|
-
target: AbstractMesh;
|
|
196
|
-
scene: Scene;
|
|
197
|
-
controllerOptions?: BlendshapeControllerOptions;
|
|
198
|
-
}
|
|
199
|
-
/** @deprecated Use {@link OmoteAvatar} instead. OmoteA2E will be removed in v0.8.0. */
|
|
200
|
-
declare class OmoteA2E {
|
|
201
|
-
private orchestrator;
|
|
202
|
-
private controller;
|
|
203
|
-
constructor(options: OmoteA2EOptions);
|
|
204
|
-
load(): Promise<void>;
|
|
205
|
-
start(): Promise<void>;
|
|
206
|
-
stop(): void;
|
|
207
|
-
update(): void;
|
|
208
|
-
dispose(): Promise<void>;
|
|
209
|
-
get isReady(): boolean;
|
|
210
|
-
get isStreaming(): boolean;
|
|
211
|
-
get backend(): string | null;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
export { BlendshapeController, type BlendshapeControllerOptions, type FrameSource, type MorphIndexEntry, OmoteA2E, type OmoteA2EOptions, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, discoverScene, writeBlendshapes };
|
|
269
|
+
export { BlendshapeController, type BlendshapeControllerOptions, type MorphIndexEntry, OmoteAvatar, type OmoteAvatarOptions, type SceneDiscoveryResult, discoverScene, writeBlendshapes };
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
// src/OmoteAvatar.ts
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
CharacterController,
|
|
4
|
+
TTSSpeaker,
|
|
5
|
+
SpeechListener,
|
|
6
|
+
VoiceOrchestrator,
|
|
7
|
+
createLogger as createLogger2,
|
|
8
|
+
getClock
|
|
9
|
+
} from "@omote/core";
|
|
3
10
|
|
|
4
11
|
// src/SceneDiscovery.ts
|
|
5
12
|
import { LAM_BLENDSHAPES, createLogger } from "@omote/core";
|
|
@@ -116,7 +123,7 @@ function writeBlendshapes(blendshapes, morphEntries) {
|
|
|
116
123
|
}
|
|
117
124
|
|
|
118
125
|
// src/OmoteAvatar.ts
|
|
119
|
-
var logger2 = createLogger2("OmoteAvatar");
|
|
126
|
+
var logger2 = createLogger2("OmoteAvatar.Babylon");
|
|
120
127
|
var OmoteAvatar = class {
|
|
121
128
|
constructor(options) {
|
|
122
129
|
// State
|
|
@@ -129,6 +136,12 @@ var OmoteAvatar = class {
|
|
|
129
136
|
// Frame source connection
|
|
130
137
|
this.frameSourceCallback = null;
|
|
131
138
|
this.connectedSource = null;
|
|
139
|
+
// TTS integration
|
|
140
|
+
this.ttsSpeaker = null;
|
|
141
|
+
// Speech listener
|
|
142
|
+
this.speechListener = null;
|
|
143
|
+
// Voice orchestrator
|
|
144
|
+
this.voiceOrchestrator = null;
|
|
132
145
|
// Auto-update
|
|
133
146
|
this.renderCallback = null;
|
|
134
147
|
this.lastTime = 0;
|
|
@@ -207,9 +220,15 @@ var OmoteAvatar = class {
|
|
|
207
220
|
* Only one source can be connected at a time; calling again disconnects the previous.
|
|
208
221
|
*/
|
|
209
222
|
connectFrameSource(source) {
|
|
223
|
+
if (this.ttsSpeaker && source !== this.ttsSpeaker.frameSource) {
|
|
224
|
+
this.ttsSpeaker.stop();
|
|
225
|
+
}
|
|
210
226
|
this.disconnectFrameSource();
|
|
211
227
|
this.frameSourceCallback = (frame) => {
|
|
212
228
|
this.currentBlendshapes = frame.blendshapes;
|
|
229
|
+
if (frame.emotion !== void 0) {
|
|
230
|
+
this._emotion = frame.emotion;
|
|
231
|
+
}
|
|
213
232
|
};
|
|
214
233
|
source.on("frame", this.frameSourceCallback);
|
|
215
234
|
this.connectedSource = source;
|
|
@@ -225,6 +244,186 @@ var OmoteAvatar = class {
|
|
|
225
244
|
this.frameSourceCallback = null;
|
|
226
245
|
}
|
|
227
246
|
// ---------------------------------------------------------------------------
|
|
247
|
+
// Speaker (TTS → lip sync)
|
|
248
|
+
// ---------------------------------------------------------------------------
|
|
249
|
+
/**
|
|
250
|
+
* Connect a TTS backend for speak() / streamText() support.
|
|
251
|
+
* Loads LAM model and creates internal PlaybackPipeline.
|
|
252
|
+
*
|
|
253
|
+
* @param tts - TTS backend (e.g., KokoroTTSInference, ElevenLabs adapter)
|
|
254
|
+
* @param config - A2E, expression profile, and playback configuration
|
|
255
|
+
*/
|
|
256
|
+
/** Warm up AudioContext for iOS/Safari autoplay policy. Call from user gesture. */
|
|
257
|
+
async warmup() {
|
|
258
|
+
if (this.ttsSpeaker) await this.ttsSpeaker.warmup();
|
|
259
|
+
}
|
|
260
|
+
async connectSpeaker(tts, config) {
|
|
261
|
+
await this.disconnectSpeaker();
|
|
262
|
+
this.ttsSpeaker = new TTSSpeaker();
|
|
263
|
+
await this.ttsSpeaker.connect(tts, config);
|
|
264
|
+
this.connectFrameSource(this.ttsSpeaker.frameSource);
|
|
265
|
+
}
|
|
266
|
+
/**
|
|
267
|
+
* Synthesize text and play with lip sync.
|
|
268
|
+
* Auto-aborts previous speak if still in progress.
|
|
269
|
+
*
|
|
270
|
+
* @param text - Text to synthesize
|
|
271
|
+
* @param options - Optional voice override and abort signal
|
|
272
|
+
*/
|
|
273
|
+
async speak(text, options) {
|
|
274
|
+
if (this.voiceOrchestrator) {
|
|
275
|
+
await this.voiceOrchestrator.speak(text, options);
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
278
|
+
if (!this.ttsSpeaker) {
|
|
279
|
+
throw new Error("No speaker connected. Call connectSpeaker() first.");
|
|
280
|
+
}
|
|
281
|
+
this._isSpeaking = true;
|
|
282
|
+
this._state = "speaking";
|
|
283
|
+
try {
|
|
284
|
+
await this.ttsSpeaker.speak(text, options);
|
|
285
|
+
} finally {
|
|
286
|
+
this._isSpeaking = false;
|
|
287
|
+
if (this._state === "speaking") {
|
|
288
|
+
this._state = "idle";
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Stream LLM tokens with sentence-buffered TTS + lip sync.
|
|
294
|
+
* Returns a sink: call push(token) for each token, end() when done.
|
|
295
|
+
*/
|
|
296
|
+
async streamText(options) {
|
|
297
|
+
if (this.voiceOrchestrator) {
|
|
298
|
+
return this.voiceOrchestrator.streamText(options);
|
|
299
|
+
}
|
|
300
|
+
if (!this.ttsSpeaker) {
|
|
301
|
+
throw new Error("No speaker connected. Call connectSpeaker() first.");
|
|
302
|
+
}
|
|
303
|
+
this._isSpeaking = true;
|
|
304
|
+
this._state = "speaking";
|
|
305
|
+
const stream = await this.ttsSpeaker.streamText(options ?? {});
|
|
306
|
+
return {
|
|
307
|
+
push: stream.push,
|
|
308
|
+
end: async () => {
|
|
309
|
+
try {
|
|
310
|
+
await stream.end();
|
|
311
|
+
} finally {
|
|
312
|
+
this._isSpeaking = false;
|
|
313
|
+
if (this._state === "speaking") this._state = "idle";
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
/** Stop current TTS playback. */
|
|
319
|
+
stopSpeaking() {
|
|
320
|
+
if (this.voiceOrchestrator) {
|
|
321
|
+
this.voiceOrchestrator.stopSpeaking();
|
|
322
|
+
return;
|
|
323
|
+
}
|
|
324
|
+
this.ttsSpeaker?.stop();
|
|
325
|
+
}
|
|
326
|
+
/** Disconnect speaker and dispose its resources. */
|
|
327
|
+
async disconnectSpeaker() {
|
|
328
|
+
if (this.ttsSpeaker) {
|
|
329
|
+
this.disconnectFrameSource();
|
|
330
|
+
await this.ttsSpeaker.dispose();
|
|
331
|
+
this.ttsSpeaker = null;
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
/** @deprecated Use connectSpeaker(). Will be removed in v1.0. */
|
|
335
|
+
async connectTTS(tts, config) {
|
|
336
|
+
return this.connectSpeaker(tts, config);
|
|
337
|
+
}
|
|
338
|
+
/** @deprecated Use disconnectSpeaker(). Will be removed in v1.0. */
|
|
339
|
+
async disconnectTTS() {
|
|
340
|
+
return this.disconnectSpeaker();
|
|
341
|
+
}
|
|
342
|
+
// ---------------------------------------------------------------------------
|
|
343
|
+
// Listener (mic → VAD → ASR → transcript)
|
|
344
|
+
// ---------------------------------------------------------------------------
|
|
345
|
+
/**
|
|
346
|
+
* Connect a speech listener for startListening() / onTranscript() support.
|
|
347
|
+
* Loads ASR + VAD models.
|
|
348
|
+
*/
|
|
349
|
+
async connectListener(config) {
|
|
350
|
+
await this.disconnectListener();
|
|
351
|
+
this.speechListener = new SpeechListener(config);
|
|
352
|
+
await this.speechListener.loadModels();
|
|
353
|
+
}
|
|
354
|
+
/** Start listening for user speech. Requires connectListener() or connectVoice() first. */
|
|
355
|
+
async startListening() {
|
|
356
|
+
if (this.voiceOrchestrator) {
|
|
357
|
+
await this.voiceOrchestrator.startListening();
|
|
358
|
+
return;
|
|
359
|
+
}
|
|
360
|
+
if (!this.speechListener) {
|
|
361
|
+
throw new Error("No listener connected. Call connectListener() first.");
|
|
362
|
+
}
|
|
363
|
+
this._state = "listening";
|
|
364
|
+
await this.speechListener.start();
|
|
365
|
+
}
|
|
366
|
+
/** Stop listening. */
|
|
367
|
+
stopListening() {
|
|
368
|
+
if (this.voiceOrchestrator) {
|
|
369
|
+
this.voiceOrchestrator.stopListening();
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
this.speechListener?.stop();
|
|
373
|
+
if (this._state === "listening") this._state = "idle";
|
|
374
|
+
}
|
|
375
|
+
/**
|
|
376
|
+
* Subscribe to transcript events. Returns an unsubscribe function.
|
|
377
|
+
* Requires connectListener() first.
|
|
378
|
+
*/
|
|
379
|
+
onTranscript(callback) {
|
|
380
|
+
const listener = this.speechListener ?? this.voiceOrchestrator?.listener;
|
|
381
|
+
if (!listener) {
|
|
382
|
+
throw new Error("No listener connected. Call connectListener() or connectVoice() first.");
|
|
383
|
+
}
|
|
384
|
+
listener.on("transcript", callback);
|
|
385
|
+
return () => {
|
|
386
|
+
listener.off?.("transcript", callback);
|
|
387
|
+
};
|
|
388
|
+
}
|
|
389
|
+
/** Disconnect listener and dispose its resources. */
|
|
390
|
+
async disconnectListener() {
|
|
391
|
+
if (this.speechListener) {
|
|
392
|
+
await this.speechListener.dispose();
|
|
393
|
+
this.speechListener = null;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
// ---------------------------------------------------------------------------
|
|
397
|
+
// Voice (combined speaker + listener + interruption)
|
|
398
|
+
// ---------------------------------------------------------------------------
|
|
399
|
+
/**
|
|
400
|
+
* Connect voice with automatic speaker + listener + interruption wiring.
|
|
401
|
+
* Supports both local TTS (mode: 'local') and cloud TTS (mode: 'cloud').
|
|
402
|
+
* Does NOT auto-start listening — call startListening() when ready.
|
|
403
|
+
*
|
|
404
|
+
* Backward compatible: `mode` defaults to 'local' when not specified.
|
|
405
|
+
*/
|
|
406
|
+
async connectVoice(config) {
|
|
407
|
+
await this.disconnectVoice();
|
|
408
|
+
this.voiceOrchestrator = new VoiceOrchestrator();
|
|
409
|
+
await this.voiceOrchestrator.connect(config);
|
|
410
|
+
if (this.voiceOrchestrator.frameSource) {
|
|
411
|
+
this.connectFrameSource(this.voiceOrchestrator.frameSource);
|
|
412
|
+
}
|
|
413
|
+
this.voiceOrchestrator.on("state", (state) => {
|
|
414
|
+
this._state = state;
|
|
415
|
+
this._isSpeaking = state === "speaking";
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
/** Disconnect voice (speaker + listener + interruption). */
|
|
419
|
+
async disconnectVoice() {
|
|
420
|
+
if (this.voiceOrchestrator) {
|
|
421
|
+
this.disconnectFrameSource();
|
|
422
|
+
await this.voiceOrchestrator.disconnect();
|
|
423
|
+
this.voiceOrchestrator = null;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
// ---------------------------------------------------------------------------
|
|
228
427
|
// State setters
|
|
229
428
|
// ---------------------------------------------------------------------------
|
|
230
429
|
/** Set blendshapes directly (alternative to connectFrameSource). */
|
|
@@ -247,6 +446,10 @@ var OmoteAvatar = class {
|
|
|
247
446
|
setAudioEnergy(energy) {
|
|
248
447
|
this._audioEnergy = energy;
|
|
249
448
|
}
|
|
449
|
+
/** Update character expression profile at runtime. */
|
|
450
|
+
setProfile(profile) {
|
|
451
|
+
this.controller.setProfile(profile);
|
|
452
|
+
}
|
|
250
453
|
/**
|
|
251
454
|
* Set the active camera for gaze tracking.
|
|
252
455
|
* Required when using autoUpdate. Can also be passed directly to update().
|
|
@@ -273,6 +476,26 @@ var OmoteAvatar = class {
|
|
|
273
476
|
get mappedBlendshapeCount() {
|
|
274
477
|
return this.discovery.mappedBlendshapeCount;
|
|
275
478
|
}
|
|
479
|
+
/** Whether the avatar is currently speaking via TTS. */
|
|
480
|
+
get isSpeaking() {
|
|
481
|
+
return this._isSpeaking;
|
|
482
|
+
}
|
|
483
|
+
/** Whether the avatar is currently listening for speech. */
|
|
484
|
+
get isListening() {
|
|
485
|
+
return this._state === "listening";
|
|
486
|
+
}
|
|
487
|
+
/** Current conversational state. */
|
|
488
|
+
get conversationalState() {
|
|
489
|
+
return this._state;
|
|
490
|
+
}
|
|
491
|
+
/** Access the internal TTSSpeaker (null if not connected). */
|
|
492
|
+
get speaker() {
|
|
493
|
+
return this.ttsSpeaker ?? this.voiceOrchestrator?.speaker ?? null;
|
|
494
|
+
}
|
|
495
|
+
/** Access the internal SpeechListener (null if not connected). */
|
|
496
|
+
get listener() {
|
|
497
|
+
return this.speechListener ?? this.voiceOrchestrator?.listener ?? null;
|
|
498
|
+
}
|
|
276
499
|
// ---------------------------------------------------------------------------
|
|
277
500
|
// Lifecycle
|
|
278
501
|
// ---------------------------------------------------------------------------
|
|
@@ -285,8 +508,11 @@ var OmoteAvatar = class {
|
|
|
285
508
|
this._audioEnergy = 0;
|
|
286
509
|
this.controller.reset();
|
|
287
510
|
}
|
|
288
|
-
/**
|
|
289
|
-
dispose() {
|
|
511
|
+
/** Disconnect all voice resources, frame sources, unregister render loop, dispose controller. */
|
|
512
|
+
async dispose() {
|
|
513
|
+
await this.disconnectVoice();
|
|
514
|
+
await this.disconnectSpeaker();
|
|
515
|
+
await this.disconnectListener();
|
|
290
516
|
this.disconnectFrameSource();
|
|
291
517
|
if (this.renderCallback) {
|
|
292
518
|
this.scene.unregisterBeforeRender(this.renderCallback);
|
|
@@ -299,9 +525,9 @@ var OmoteAvatar = class {
|
|
|
299
525
|
// Internal
|
|
300
526
|
// ---------------------------------------------------------------------------
|
|
301
527
|
registerAutoUpdate() {
|
|
302
|
-
this.lastTime =
|
|
528
|
+
this.lastTime = getClock().now();
|
|
303
529
|
this.renderCallback = () => {
|
|
304
|
-
const now =
|
|
530
|
+
const now = getClock().now();
|
|
305
531
|
const delta = (now - this.lastTime) / 1e3;
|
|
306
532
|
this.lastTime = now;
|
|
307
533
|
if (this._camera) {
|
|
@@ -419,45 +645,8 @@ var BlendshapeController = class {
|
|
|
419
645
|
this.scene = null;
|
|
420
646
|
}
|
|
421
647
|
};
|
|
422
|
-
|
|
423
|
-
// src/OmoteA2E.ts
|
|
424
|
-
import { A2EOrchestrator } from "@omote/core";
|
|
425
|
-
var OmoteA2E = class {
|
|
426
|
-
constructor(options) {
|
|
427
|
-
const { target, scene, controllerOptions, ...orchestratorConfig } = options;
|
|
428
|
-
this.controller = new BlendshapeController(target, scene, controllerOptions);
|
|
429
|
-
this.orchestrator = new A2EOrchestrator(orchestratorConfig);
|
|
430
|
-
}
|
|
431
|
-
async load() {
|
|
432
|
-
return this.orchestrator.load();
|
|
433
|
-
}
|
|
434
|
-
async start() {
|
|
435
|
-
return this.orchestrator.start();
|
|
436
|
-
}
|
|
437
|
-
stop() {
|
|
438
|
-
this.orchestrator.stop();
|
|
439
|
-
}
|
|
440
|
-
update() {
|
|
441
|
-
const w = this.orchestrator.latestWeights;
|
|
442
|
-
if (w) this.controller.update(w);
|
|
443
|
-
}
|
|
444
|
-
async dispose() {
|
|
445
|
-
await this.orchestrator.dispose();
|
|
446
|
-
this.controller.dispose();
|
|
447
|
-
}
|
|
448
|
-
get isReady() {
|
|
449
|
-
return this.orchestrator.isReady;
|
|
450
|
-
}
|
|
451
|
-
get isStreaming() {
|
|
452
|
-
return this.orchestrator.isStreaming;
|
|
453
|
-
}
|
|
454
|
-
get backend() {
|
|
455
|
-
return this.orchestrator.backend;
|
|
456
|
-
}
|
|
457
|
-
};
|
|
458
648
|
export {
|
|
459
649
|
BlendshapeController,
|
|
460
|
-
OmoteA2E,
|
|
461
650
|
OmoteAvatar,
|
|
462
651
|
discoverScene,
|
|
463
652
|
writeBlendshapes
|