avatarlayer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +2165 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +717 -0
- package/dist/index.d.ts +717 -0
- package/dist/index.js +2112 -0
- package/dist/index.js.map +1 -0
- package/dist/react/index.cjs +332 -0
- package/dist/react/index.cjs.map +1 -0
- package/dist/react/index.js +308 -0
- package/dist/react/index.js.map +1 -0
- package/package.json +69 -0
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,2165 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
|
|
30
|
+
// src/index.ts
|
|
31
|
+
var src_exports = {};
|
|
32
|
+
__export(src_exports, {
|
|
33
|
+
AnthropicAdapter: () => AnthropicAdapter,
|
|
34
|
+
AtlasRenderer: () => AtlasRenderer,
|
|
35
|
+
AvatarSession: () => AvatarSession,
|
|
36
|
+
ElevenLabsAdapter: () => ElevenLabsAdapter,
|
|
37
|
+
ElevenLabsSTTAdapter: () => ElevenLabsSTTAdapter,
|
|
38
|
+
GeminiAdapter: () => GeminiAdapter,
|
|
39
|
+
HeyGenRenderer: () => HeyGenRenderer,
|
|
40
|
+
LemonSliceRenderer: () => LemonSliceRenderer,
|
|
41
|
+
LipSyncEngine: () => LipSyncEngine,
|
|
42
|
+
Live2DRenderer: () => Live2DRenderer,
|
|
43
|
+
MicCapture: () => MicCapture,
|
|
44
|
+
OpenAIAdapter: () => OpenAIAdapter,
|
|
45
|
+
TypedEmitter: () => TypedEmitter,
|
|
46
|
+
VRMLocalRenderer: () => VRMLocalRenderer,
|
|
47
|
+
VideoRenderer: () => VideoRenderer,
|
|
48
|
+
decodeToPcm: () => decodeToPcm,
|
|
49
|
+
playAudioBlob: () => playAudioBlob
|
|
50
|
+
});
|
|
51
|
+
module.exports = __toCommonJS(src_exports);
|
|
52
|
+
|
|
53
|
+
// src/core/events.ts
|
|
54
|
+
var TypedEmitter = class {
|
|
55
|
+
listeners = /* @__PURE__ */ new Map();
|
|
56
|
+
on(event, fn) {
|
|
57
|
+
let set = this.listeners.get(event);
|
|
58
|
+
if (!set) {
|
|
59
|
+
set = /* @__PURE__ */ new Set();
|
|
60
|
+
this.listeners.set(event, set);
|
|
61
|
+
}
|
|
62
|
+
set.add(fn);
|
|
63
|
+
return this;
|
|
64
|
+
}
|
|
65
|
+
off(event, fn) {
|
|
66
|
+
this.listeners.get(event)?.delete(fn);
|
|
67
|
+
return this;
|
|
68
|
+
}
|
|
69
|
+
emit(event, ...args) {
|
|
70
|
+
const set = this.listeners.get(event);
|
|
71
|
+
if (!set) return;
|
|
72
|
+
for (const fn of set) {
|
|
73
|
+
try {
|
|
74
|
+
fn(...args);
|
|
75
|
+
} catch {
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
removeAllListeners() {
|
|
80
|
+
this.listeners.clear();
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
// src/core/session.ts
|
|
85
|
+
var SENTENCE_DELIMITERS = /(?<=[.!?。!?\n])\s+/;
|
|
86
|
+
function splitSentences(buffer) {
|
|
87
|
+
const parts = buffer.split(SENTENCE_DELIMITERS);
|
|
88
|
+
if (parts.length <= 1) return [[], buffer];
|
|
89
|
+
const remaining = parts.pop();
|
|
90
|
+
return [parts.filter((s) => s.trim().length > 0), remaining];
|
|
91
|
+
}
|
|
92
|
+
var AvatarSession = class extends TypedEmitter {
|
|
93
|
+
config;
|
|
94
|
+
_state = "idle";
|
|
95
|
+
history = [];
|
|
96
|
+
abortController = null;
|
|
97
|
+
transactionId = 0;
|
|
98
|
+
constructor(config) {
|
|
99
|
+
super();
|
|
100
|
+
this.config = config;
|
|
101
|
+
}
|
|
102
|
+
get state() {
|
|
103
|
+
return this._state;
|
|
104
|
+
}
|
|
105
|
+
get messages() {
|
|
106
|
+
return this.history;
|
|
107
|
+
}
|
|
108
|
+
/** Mount the renderer and transition to ready. */
|
|
109
|
+
async start(container) {
|
|
110
|
+
if (this._state !== "idle" && this._state !== "destroyed") return;
|
|
111
|
+
this.setState("connecting");
|
|
112
|
+
try {
|
|
113
|
+
await this.config.renderer.mount(container);
|
|
114
|
+
this.setState("ready");
|
|
115
|
+
} catch (err) {
|
|
116
|
+
this.setState("error");
|
|
117
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/** Send a user message and run the LLM → TTS → speak pipeline. */
|
|
121
|
+
async sendMessage(text) {
|
|
122
|
+
if (this._state !== "ready" && this._state !== "speaking") return;
|
|
123
|
+
this.interrupt();
|
|
124
|
+
const userMsg = {
|
|
125
|
+
role: "user",
|
|
126
|
+
content: text,
|
|
127
|
+
id: crypto.randomUUID(),
|
|
128
|
+
timestamp: Date.now()
|
|
129
|
+
};
|
|
130
|
+
this.history.push(userMsg);
|
|
131
|
+
this.emit("message", userMsg);
|
|
132
|
+
const txId = ++this.transactionId;
|
|
133
|
+
const ac = new AbortController();
|
|
134
|
+
this.abortController = ac;
|
|
135
|
+
this.setState("thinking");
|
|
136
|
+
const messagesForLLM = [];
|
|
137
|
+
if (this.config.systemPrompt) {
|
|
138
|
+
messagesForLLM.push({ role: "system", content: this.config.systemPrompt });
|
|
139
|
+
}
|
|
140
|
+
messagesForLLM.push(...this.history);
|
|
141
|
+
let fullText = "";
|
|
142
|
+
let buffer = "";
|
|
143
|
+
const assistantMsg = {
|
|
144
|
+
role: "assistant",
|
|
145
|
+
content: "",
|
|
146
|
+
id: crypto.randomUUID(),
|
|
147
|
+
timestamp: Date.now()
|
|
148
|
+
};
|
|
149
|
+
try {
|
|
150
|
+
const stream = this.config.llm.chat(messagesForLLM, {
|
|
151
|
+
signal: ac.signal,
|
|
152
|
+
...this.config.reasoningEffort ? { reasoningEffort: this.config.reasoningEffort } : {}
|
|
153
|
+
});
|
|
154
|
+
for await (const chunk of stream) {
|
|
155
|
+
if (ac.signal.aborted || txId !== this.transactionId) return;
|
|
156
|
+
fullText += chunk.text;
|
|
157
|
+
buffer += chunk.text;
|
|
158
|
+
this.emit("chunk", chunk.text, fullText);
|
|
159
|
+
const [sentences, rest] = splitSentences(buffer);
|
|
160
|
+
buffer = rest;
|
|
161
|
+
for (const sentence of sentences) {
|
|
162
|
+
if (ac.signal.aborted || txId !== this.transactionId) return;
|
|
163
|
+
await this.synthesizeAndSpeak(sentence, ac.signal, txId);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (buffer.trim() && !ac.signal.aborted && txId === this.transactionId) {
|
|
167
|
+
await this.synthesizeAndSpeak(buffer.trim(), ac.signal, txId);
|
|
168
|
+
}
|
|
169
|
+
if (!ac.signal.aborted && txId === this.transactionId) {
|
|
170
|
+
assistantMsg.content = fullText;
|
|
171
|
+
this.history.push(assistantMsg);
|
|
172
|
+
this.emit("message", assistantMsg);
|
|
173
|
+
this.setState("ready");
|
|
174
|
+
}
|
|
175
|
+
} catch (err) {
|
|
176
|
+
if (ac.signal.aborted) return;
|
|
177
|
+
this.setState("error");
|
|
178
|
+
this.emit("error", err instanceof Error ? err : new Error(String(err)));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
/** Cancel the current LLM/TTS/speak pipeline. */
|
|
182
|
+
interrupt() {
|
|
183
|
+
if (this.abortController) {
|
|
184
|
+
this.abortController.abort();
|
|
185
|
+
this.abortController = null;
|
|
186
|
+
}
|
|
187
|
+
this.config.renderer.interrupt();
|
|
188
|
+
if (this._state === "thinking" || this._state === "speaking") {
|
|
189
|
+
this.setState("ready");
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
/** Update the renderer with new avatar control state. */
|
|
193
|
+
updateControl(control) {
|
|
194
|
+
this.config.renderer.update(control);
|
|
195
|
+
}
|
|
196
|
+
/** Replace the LLM provider at runtime. */
|
|
197
|
+
setLLM(llm) {
|
|
198
|
+
this.config = { ...this.config, llm };
|
|
199
|
+
}
|
|
200
|
+
/** Replace the TTS provider at runtime. */
|
|
201
|
+
setTTS(tts) {
|
|
202
|
+
this.config = { ...this.config, tts };
|
|
203
|
+
}
|
|
204
|
+
/** Replace the renderer at runtime. Unmounts the old one. */
|
|
205
|
+
async setRenderer(renderer, container) {
|
|
206
|
+
this.interrupt();
|
|
207
|
+
this.config.renderer.unmount();
|
|
208
|
+
this.config = { ...this.config, renderer };
|
|
209
|
+
await renderer.mount(container);
|
|
210
|
+
}
|
|
211
|
+
/** Tear down everything. */
|
|
212
|
+
destroy() {
|
|
213
|
+
this.interrupt();
|
|
214
|
+
this.config.renderer.unmount();
|
|
215
|
+
this.removeAllListeners();
|
|
216
|
+
this.setState("destroyed");
|
|
217
|
+
}
|
|
218
|
+
// -- internals --
|
|
219
|
+
async synthesizeAndSpeak(text, signal, txId) {
|
|
220
|
+
if (signal.aborted || txId !== this.transactionId) return;
|
|
221
|
+
if (this.config.renderer.speakText) {
|
|
222
|
+
this.setState("speaking");
|
|
223
|
+
this.emit("speech-start");
|
|
224
|
+
try {
|
|
225
|
+
await this.config.renderer.speakText(text, signal);
|
|
226
|
+
} finally {
|
|
227
|
+
if (!signal.aborted && txId === this.transactionId) {
|
|
228
|
+
this.emit("speech-end");
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
return;
|
|
232
|
+
}
|
|
233
|
+
if (!this.config.tts) {
|
|
234
|
+
throw new Error("TTSProvider is required when the renderer does not implement speakText");
|
|
235
|
+
}
|
|
236
|
+
const blob = await this.config.tts.synthesize(text, { signal });
|
|
237
|
+
if (signal.aborted || txId !== this.transactionId) return;
|
|
238
|
+
this.setState("speaking");
|
|
239
|
+
this.emit("speech-start");
|
|
240
|
+
try {
|
|
241
|
+
await this.config.renderer.speak(blob);
|
|
242
|
+
} finally {
|
|
243
|
+
if (!signal.aborted && txId === this.transactionId) {
|
|
244
|
+
this.emit("speech-end");
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
setState(next) {
|
|
249
|
+
if (this._state === next) return;
|
|
250
|
+
this._state = next;
|
|
251
|
+
this.emit("state-change", next);
|
|
252
|
+
}
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
// src/core/audio-utils.ts
|
|
256
|
+
var PCM_SAMPLE_RATE = 16e3;
|
|
257
|
+
var PCM_BYTES_PER_SAMPLE = 2;
|
|
258
|
+
async function decodeToPcm(audioBlob) {
|
|
259
|
+
const arrayBuffer = await audioBlob.arrayBuffer();
|
|
260
|
+
const ctx = new AudioContext();
|
|
261
|
+
let decoded;
|
|
262
|
+
try {
|
|
263
|
+
decoded = await ctx.decodeAudioData(arrayBuffer.slice(0));
|
|
264
|
+
} finally {
|
|
265
|
+
await ctx.close();
|
|
266
|
+
}
|
|
267
|
+
let resampled = decoded;
|
|
268
|
+
if (decoded.sampleRate !== PCM_SAMPLE_RATE) {
|
|
269
|
+
const length = Math.ceil(decoded.duration * PCM_SAMPLE_RATE);
|
|
270
|
+
const offline = new OfflineAudioContext(
|
|
271
|
+
decoded.numberOfChannels,
|
|
272
|
+
length,
|
|
273
|
+
PCM_SAMPLE_RATE
|
|
274
|
+
);
|
|
275
|
+
const src = offline.createBufferSource();
|
|
276
|
+
src.buffer = decoded;
|
|
277
|
+
src.connect(offline.destination);
|
|
278
|
+
src.start(0);
|
|
279
|
+
resampled = await offline.startRendering();
|
|
280
|
+
}
|
|
281
|
+
const numChannels = resampled.numberOfChannels;
|
|
282
|
+
const frameCount = resampled.length;
|
|
283
|
+
const buf = new ArrayBuffer(frameCount * numChannels * PCM_BYTES_PER_SAMPLE);
|
|
284
|
+
const view = new DataView(buf);
|
|
285
|
+
for (let i = 0; i < frameCount; i++) {
|
|
286
|
+
for (let ch = 0; ch < numChannels; ch++) {
|
|
287
|
+
const sample = resampled.getChannelData(ch)[i] ?? 0;
|
|
288
|
+
const clamped = Math.max(-1, Math.min(1, sample));
|
|
289
|
+
const int16 = clamped < 0 ? clamped * 32768 : clamped * 32767;
|
|
290
|
+
view.setInt16((i * numChannels + ch) * PCM_BYTES_PER_SAMPLE, int16, true);
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
let pcmBytes = new Uint8Array(buf);
|
|
294
|
+
if (pcmBytes.byteLength % PCM_BYTES_PER_SAMPLE !== 0) {
|
|
295
|
+
pcmBytes = pcmBytes.subarray(
|
|
296
|
+
0,
|
|
297
|
+
pcmBytes.byteLength - pcmBytes.byteLength % PCM_BYTES_PER_SAMPLE
|
|
298
|
+
);
|
|
299
|
+
}
|
|
300
|
+
return { pcmBytes, sampleRate: PCM_SAMPLE_RATE, numChannels };
|
|
301
|
+
}
|
|
302
|
+
function playAudioBlob(blob, signal) {
|
|
303
|
+
return new Promise((resolve, reject) => {
|
|
304
|
+
const url = URL.createObjectURL(blob);
|
|
305
|
+
const audio = new Audio(url);
|
|
306
|
+
const cleanup = () => {
|
|
307
|
+
audio.pause();
|
|
308
|
+
audio.removeAttribute("src");
|
|
309
|
+
URL.revokeObjectURL(url);
|
|
310
|
+
};
|
|
311
|
+
audio.addEventListener("ended", () => {
|
|
312
|
+
cleanup();
|
|
313
|
+
resolve();
|
|
314
|
+
});
|
|
315
|
+
audio.addEventListener("error", () => {
|
|
316
|
+
cleanup();
|
|
317
|
+
reject(new Error("Audio playback error"));
|
|
318
|
+
});
|
|
319
|
+
if (signal) {
|
|
320
|
+
if (signal.aborted) {
|
|
321
|
+
cleanup();
|
|
322
|
+
resolve();
|
|
323
|
+
return;
|
|
324
|
+
}
|
|
325
|
+
signal.addEventListener("abort", () => {
|
|
326
|
+
cleanup();
|
|
327
|
+
resolve();
|
|
328
|
+
});
|
|
329
|
+
}
|
|
330
|
+
audio.play().catch((err) => {
|
|
331
|
+
cleanup();
|
|
332
|
+
reject(err);
|
|
333
|
+
});
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// src/adapters/llm/openai.ts
|
|
338
|
+
var import_openai = __toESM(require("openai"), 1);
|
|
339
|
+
var OpenAIAdapter = class {
|
|
340
|
+
id = "openai";
|
|
341
|
+
client;
|
|
342
|
+
defaultModel;
|
|
343
|
+
constructor(opts) {
|
|
344
|
+
this.client = new import_openai.default({
|
|
345
|
+
apiKey: opts.apiKey,
|
|
346
|
+
baseURL: opts.baseURL,
|
|
347
|
+
dangerouslyAllowBrowser: true
|
|
348
|
+
});
|
|
349
|
+
this.defaultModel = opts.model ?? "gpt-5.4-mini";
|
|
350
|
+
}
|
|
351
|
+
async *chat(messages, opts) {
|
|
352
|
+
const reasoningEffort = opts?.reasoningEffort && opts.reasoningEffort !== "none" ? opts.reasoningEffort : void 0;
|
|
353
|
+
const stream = await this.client.chat.completions.create(
|
|
354
|
+
{
|
|
355
|
+
model: opts?.model ?? this.defaultModel,
|
|
356
|
+
messages: messages.map((m) => ({ role: m.role, content: m.content })),
|
|
357
|
+
stream: true,
|
|
358
|
+
...opts?.temperature != null ? { temperature: opts.temperature } : {},
|
|
359
|
+
...opts?.maxTokens != null ? { max_tokens: opts.maxTokens } : {},
|
|
360
|
+
...reasoningEffort ? { reasoning_effort: reasoningEffort } : {}
|
|
361
|
+
},
|
|
362
|
+
{ signal: opts?.signal }
|
|
363
|
+
);
|
|
364
|
+
for await (const chunk of stream) {
|
|
365
|
+
const delta = chunk.choices[0]?.delta?.content;
|
|
366
|
+
if (delta) {
|
|
367
|
+
yield { text: delta, done: false };
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
yield { text: "", done: true };
|
|
371
|
+
}
|
|
372
|
+
};
|
|
373
|
+
|
|
374
|
+
// src/adapters/llm/anthropic.ts
|
|
375
|
+
var import_sdk = __toESM(require("@anthropic-ai/sdk"), 1);
|
|
376
|
+
var AnthropicAdapter = class {
|
|
377
|
+
id = "anthropic";
|
|
378
|
+
client;
|
|
379
|
+
defaultModel;
|
|
380
|
+
constructor(opts) {
|
|
381
|
+
this.client = new import_sdk.default({
|
|
382
|
+
apiKey: opts.apiKey,
|
|
383
|
+
...opts.baseURL ? { baseURL: opts.baseURL } : {}
|
|
384
|
+
});
|
|
385
|
+
this.defaultModel = opts.model ?? "claude-sonnet-4.6";
|
|
386
|
+
}
|
|
387
|
+
async *chat(messages, opts) {
|
|
388
|
+
const systemPrompt = opts?.systemPrompt ?? messages.find((m) => m.role === "system")?.content;
|
|
389
|
+
const filtered = messages.filter((m) => m.role !== "system").map((m) => ({
|
|
390
|
+
role: m.role,
|
|
391
|
+
content: m.content
|
|
392
|
+
}));
|
|
393
|
+
const thinkingParam = (() => {
|
|
394
|
+
const effort = opts?.reasoningEffort;
|
|
395
|
+
if (!effort || effort === "none") return {};
|
|
396
|
+
const budgetMap = {
|
|
397
|
+
low: 4096,
|
|
398
|
+
medium: 1e4,
|
|
399
|
+
high: 32e3
|
|
400
|
+
};
|
|
401
|
+
return {
|
|
402
|
+
thinking: {
|
|
403
|
+
type: "enabled",
|
|
404
|
+
budget_tokens: budgetMap[effort] ?? 1e4
|
|
405
|
+
}
|
|
406
|
+
};
|
|
407
|
+
})();
|
|
408
|
+
const stream = this.client.messages.stream(
|
|
409
|
+
{
|
|
410
|
+
model: opts?.model ?? this.defaultModel,
|
|
411
|
+
max_tokens: opts?.maxTokens ?? (thinkingParam.thinking ? 16384 : 4096),
|
|
412
|
+
messages: filtered,
|
|
413
|
+
...systemPrompt ? { system: systemPrompt } : {},
|
|
414
|
+
...opts?.temperature != null && !thinkingParam.thinking ? { temperature: opts.temperature } : {},
|
|
415
|
+
...thinkingParam
|
|
416
|
+
},
|
|
417
|
+
{ signal: opts?.signal ?? void 0 }
|
|
418
|
+
);
|
|
419
|
+
for await (const event of stream) {
|
|
420
|
+
if (event.type === "content_block_delta" && event.delta.type === "text_delta") {
|
|
421
|
+
yield { text: event.delta.text, done: false };
|
|
422
|
+
}
|
|
423
|
+
}
|
|
424
|
+
yield { text: "", done: true };
|
|
425
|
+
}
|
|
426
|
+
};
|
|
427
|
+
|
|
428
|
+
// src/adapters/llm/gemini.ts
|
|
429
|
+
var import_generative_ai = require("@google/generative-ai");
|
|
430
|
+
var GeminiAdapter = class {
|
|
431
|
+
id = "gemini";
|
|
432
|
+
genAI;
|
|
433
|
+
defaultModel;
|
|
434
|
+
constructor(opts) {
|
|
435
|
+
this.genAI = new import_generative_ai.GoogleGenerativeAI(opts.apiKey);
|
|
436
|
+
this.defaultModel = opts.model ?? "gemini-3-flash-preview";
|
|
437
|
+
}
|
|
438
|
+
async *chat(messages, opts) {
|
|
439
|
+
const model = this.genAI.getGenerativeModel({
|
|
440
|
+
model: opts?.model ?? this.defaultModel,
|
|
441
|
+
generationConfig: {
|
|
442
|
+
...opts?.temperature != null ? { temperature: opts.temperature } : {},
|
|
443
|
+
...opts?.maxTokens != null ? { maxOutputTokens: opts.maxTokens } : {}
|
|
444
|
+
}
|
|
445
|
+
});
|
|
446
|
+
const systemInstruction = opts?.systemPrompt ?? messages.find((m) => m.role === "system")?.content;
|
|
447
|
+
const history = messages.filter((m) => m.role !== "system").slice(0, -1).map((m) => ({
|
|
448
|
+
role: m.role === "assistant" ? "model" : "user",
|
|
449
|
+
parts: [{ text: m.content }]
|
|
450
|
+
}));
|
|
451
|
+
const lastMsg = messages.filter((m) => m.role !== "system").at(-1);
|
|
452
|
+
if (!lastMsg) {
|
|
453
|
+
yield { text: "", done: true };
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
const chat = model.startChat({
|
|
457
|
+
history,
|
|
458
|
+
...systemInstruction ? { systemInstruction: { role: "system", parts: [{ text: systemInstruction }] } } : {}
|
|
459
|
+
});
|
|
460
|
+
const result = await chat.sendMessageStream(lastMsg.content);
|
|
461
|
+
for await (const chunk of result.stream) {
|
|
462
|
+
const text = chunk.text();
|
|
463
|
+
if (text) {
|
|
464
|
+
yield { text, done: false };
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
yield { text: "", done: true };
|
|
468
|
+
}
|
|
469
|
+
};
|
|
470
|
+
|
|
471
|
+
// src/adapters/tts/elevenlabs.ts
|
|
472
|
+
var ELEVENLABS_BASE = "https://api.elevenlabs.io/v1";
|
|
473
|
+
var ElevenLabsAdapter = class {
|
|
474
|
+
id = "elevenlabs";
|
|
475
|
+
apiKey;
|
|
476
|
+
defaultVoiceId;
|
|
477
|
+
defaultModelId;
|
|
478
|
+
baseURL;
|
|
479
|
+
constructor(opts) {
|
|
480
|
+
this.apiKey = opts.apiKey;
|
|
481
|
+
this.defaultVoiceId = opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM";
|
|
482
|
+
this.defaultModelId = opts.modelId ?? "eleven_multilingual_v2";
|
|
483
|
+
this.baseURL = opts.baseURL ?? ELEVENLABS_BASE;
|
|
484
|
+
}
|
|
485
|
+
async synthesize(text, opts) {
|
|
486
|
+
const voiceId = opts?.voiceId ?? this.defaultVoiceId;
|
|
487
|
+
const url = `${this.baseURL}/text-to-speech/${encodeURIComponent(voiceId)}/stream`;
|
|
488
|
+
const body = {
|
|
489
|
+
text,
|
|
490
|
+
model_id: opts?.modelId ?? this.defaultModelId,
|
|
491
|
+
output_format: opts?.outputFormat ?? "mp3_44100_128"
|
|
492
|
+
};
|
|
493
|
+
const voiceSettings = {};
|
|
494
|
+
if (opts?.stability != null) voiceSettings.stability = opts.stability;
|
|
495
|
+
if (opts?.similarityBoost != null)
|
|
496
|
+
voiceSettings.similarity_boost = opts.similarityBoost;
|
|
497
|
+
if (opts?.speed != null) voiceSettings.speed = opts.speed;
|
|
498
|
+
if (Object.keys(voiceSettings).length > 0)
|
|
499
|
+
body.voice_settings = voiceSettings;
|
|
500
|
+
const resp = await fetch(url, {
|
|
501
|
+
method: "POST",
|
|
502
|
+
headers: {
|
|
503
|
+
"Content-Type": "application/json",
|
|
504
|
+
"xi-api-key": this.apiKey
|
|
505
|
+
},
|
|
506
|
+
body: JSON.stringify(body),
|
|
507
|
+
signal: opts?.signal
|
|
508
|
+
});
|
|
509
|
+
if (!resp.ok) {
|
|
510
|
+
const detail = await resp.text().catch(() => resp.statusText);
|
|
511
|
+
throw new Error(`ElevenLabs TTS failed (${resp.status}): ${detail}`);
|
|
512
|
+
}
|
|
513
|
+
return resp.blob();
|
|
514
|
+
}
|
|
515
|
+
};
|
|
516
|
+
|
|
517
|
+
// src/adapters/stt/elevenlabs.ts
|
|
518
|
+
var isTranscriptOk = (transcript) => /\p{L}|\p{N}/u.test(transcript);
|
|
519
|
+
var ElevenLabsSTTAdapter = class extends TypedEmitter {
|
|
520
|
+
constructor(opts) {
|
|
521
|
+
super();
|
|
522
|
+
this.opts = opts;
|
|
523
|
+
}
|
|
524
|
+
opts;
|
|
525
|
+
ws = null;
|
|
526
|
+
abortController = null;
|
|
527
|
+
committedText = "";
|
|
528
|
+
pendingPartial = "";
|
|
529
|
+
getFullText() {
|
|
530
|
+
if (!this.pendingPartial) return this.committedText;
|
|
531
|
+
if (!this.committedText) return this.pendingPartial;
|
|
532
|
+
return this.committedText + " " + this.pendingPartial;
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Connect to ElevenLabs Scribe v2 realtime WebSocket.
|
|
536
|
+
* Resolves once the WebSocket is open and ready to receive audio.
|
|
537
|
+
*/
|
|
538
|
+
async connect(signal) {
|
|
539
|
+
this.disconnect();
|
|
540
|
+
const ac = new AbortController();
|
|
541
|
+
this.abortController = ac;
|
|
542
|
+
signal?.addEventListener("abort", () => ac.abort());
|
|
543
|
+
const token = await this.fetchToken(ac.signal);
|
|
544
|
+
if (ac.signal.aborted) return;
|
|
545
|
+
const modelId = this.opts.modelId ?? "scribe_v2_realtime";
|
|
546
|
+
const vadSilence = this.opts.vadSilenceThresholdSecs ?? 1;
|
|
547
|
+
const wsParams = new URLSearchParams({
|
|
548
|
+
model_id: modelId,
|
|
549
|
+
token,
|
|
550
|
+
commit_strategy: "vad",
|
|
551
|
+
vad_silence_threshold_secs: String(vadSilence),
|
|
552
|
+
audio_format: "pcm_16000"
|
|
553
|
+
});
|
|
554
|
+
const ws = new WebSocket(
|
|
555
|
+
`wss://api.elevenlabs.io/v1/speech-to-text/realtime?${wsParams.toString()}`
|
|
556
|
+
);
|
|
557
|
+
this.ws = ws;
|
|
558
|
+
ac.signal.addEventListener("abort", () => {
|
|
559
|
+
ws.close();
|
|
560
|
+
});
|
|
561
|
+
let sessionStarted = false;
|
|
562
|
+
ws.addEventListener("message", (e) => {
|
|
563
|
+
try {
|
|
564
|
+
const msg = JSON.parse(e.data);
|
|
565
|
+
if (msg.message_type === "session_started") {
|
|
566
|
+
if (!sessionStarted) {
|
|
567
|
+
sessionStarted = true;
|
|
568
|
+
this.emit("session-started");
|
|
569
|
+
}
|
|
570
|
+
} else if (msg.message_type === "partial_transcript") {
|
|
571
|
+
const newPartial = msg.text ?? "";
|
|
572
|
+
if (newPartial.length > this.pendingPartial.length) {
|
|
573
|
+
const delta = newPartial.slice(this.pendingPartial.length);
|
|
574
|
+
this.pendingPartial = newPartial;
|
|
575
|
+
this.emit("transcript", delta, { fullText: this.getFullText() });
|
|
576
|
+
} else {
|
|
577
|
+
this.pendingPartial = newPartial;
|
|
578
|
+
this.emit("transcript", "", { fullText: this.getFullText() });
|
|
579
|
+
}
|
|
580
|
+
} else if (msg.message_type === "committed_transcript") {
|
|
581
|
+
const transcript = msg.text ?? "";
|
|
582
|
+
if (transcript && isTranscriptOk(transcript)) {
|
|
583
|
+
this.committedText = this.committedText ? this.committedText + " " + transcript : transcript;
|
|
584
|
+
this.pendingPartial = "";
|
|
585
|
+
this.emit("transcript", transcript, {
|
|
586
|
+
final: true,
|
|
587
|
+
fullText: this.getFullText()
|
|
588
|
+
});
|
|
589
|
+
} else {
|
|
590
|
+
this.pendingPartial = "";
|
|
591
|
+
}
|
|
592
|
+
}
|
|
593
|
+
} catch {
|
|
594
|
+
}
|
|
595
|
+
});
|
|
596
|
+
ws.addEventListener("error", () => {
|
|
597
|
+
this.emit("error", new Error("ElevenLabs STT WebSocket error"));
|
|
598
|
+
});
|
|
599
|
+
ws.addEventListener("close", () => {
|
|
600
|
+
this.emit("close");
|
|
601
|
+
});
|
|
602
|
+
if (ws.readyState !== WebSocket.OPEN) {
|
|
603
|
+
await new Promise((resolve, reject) => {
|
|
604
|
+
const onOpen = () => {
|
|
605
|
+
cleanup();
|
|
606
|
+
resolve();
|
|
607
|
+
};
|
|
608
|
+
const onError = (ev) => {
|
|
609
|
+
cleanup();
|
|
610
|
+
reject(ev);
|
|
611
|
+
};
|
|
612
|
+
const onAbort = () => {
|
|
613
|
+
cleanup();
|
|
614
|
+
reject(new DOMException("Aborted", "AbortError"));
|
|
615
|
+
};
|
|
616
|
+
const cleanup = () => {
|
|
617
|
+
ws.removeEventListener("open", onOpen);
|
|
618
|
+
ws.removeEventListener("error", onError);
|
|
619
|
+
ac.signal.removeEventListener("abort", onAbort);
|
|
620
|
+
};
|
|
621
|
+
ws.addEventListener("open", onOpen);
|
|
622
|
+
ws.addEventListener("error", onError);
|
|
623
|
+
ac.signal.addEventListener("abort", onAbort);
|
|
624
|
+
});
|
|
625
|
+
}
|
|
626
|
+
}
|
|
627
|
+
/**
|
|
628
|
+
* Send a PCM audio buffer (Float32Array, 16 kHz mono) to the WebSocket.
|
|
629
|
+
* Converts to int16 PCM then base64 before sending.
|
|
630
|
+
*/
|
|
631
|
+
sendAudio(pcm) {
|
|
632
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
633
|
+
const int16 = new Int16Array(pcm.length);
|
|
634
|
+
for (let i = 0; i < pcm.length; i++) {
|
|
635
|
+
const s = Math.max(-1, Math.min(1, pcm[i]));
|
|
636
|
+
int16[i] = s < 0 ? s * 32768 : s * 32767;
|
|
637
|
+
}
|
|
638
|
+
const bytes = new Uint8Array(int16.buffer);
|
|
639
|
+
let binary = "";
|
|
640
|
+
for (let i = 0; i < bytes.byteLength; i++) {
|
|
641
|
+
binary += String.fromCharCode(bytes[i]);
|
|
642
|
+
}
|
|
643
|
+
const audioBase64 = btoa(binary);
|
|
644
|
+
this.ws.send(
|
|
645
|
+
JSON.stringify({
|
|
646
|
+
message_type: "input_audio_chunk",
|
|
647
|
+
audio_base_64: audioBase64
|
|
648
|
+
})
|
|
649
|
+
);
|
|
650
|
+
}
|
|
651
|
+
/** Disconnect and clean up. */
|
|
652
|
+
disconnect() {
|
|
653
|
+
if (this.abortController) {
|
|
654
|
+
this.abortController.abort();
|
|
655
|
+
this.abortController = null;
|
|
656
|
+
}
|
|
657
|
+
if (this.ws) {
|
|
658
|
+
this.ws.close();
|
|
659
|
+
this.ws = null;
|
|
660
|
+
}
|
|
661
|
+
this.committedText = "";
|
|
662
|
+
this.pendingPartial = "";
|
|
663
|
+
}
|
|
664
|
+
/** Whether a committed_transcript has been received since the last drain check. */
|
|
665
|
+
get hasCommitted() {
|
|
666
|
+
return this.committedText.length > 0;
|
|
667
|
+
}
|
|
668
|
+
async fetchToken(signal) {
|
|
669
|
+
if (this.opts.tokenUrl) {
|
|
670
|
+
const resp = await fetch(this.opts.tokenUrl, {
|
|
671
|
+
method: "POST",
|
|
672
|
+
headers: { "Content-Type": "application/json" },
|
|
673
|
+
body: JSON.stringify({ apiKey: this.opts.apiKey }),
|
|
674
|
+
signal
|
|
675
|
+
});
|
|
676
|
+
if (!resp.ok) {
|
|
677
|
+
const body = await resp.text().catch(() => resp.statusText);
|
|
678
|
+
throw new Error(`STT token request failed: ${resp.status} ${body}`);
|
|
679
|
+
}
|
|
680
|
+
const json = await resp.json();
|
|
681
|
+
return json.token;
|
|
682
|
+
}
|
|
683
|
+
if (this.opts.apiKey) {
|
|
684
|
+
const resp = await fetch(
|
|
685
|
+
"https://api.elevenlabs.io/v1/speech-to-text/single-use-token/realtime_scribe",
|
|
686
|
+
{
|
|
687
|
+
method: "POST",
|
|
688
|
+
headers: { "xi-api-key": this.opts.apiKey },
|
|
689
|
+
signal
|
|
690
|
+
}
|
|
691
|
+
);
|
|
692
|
+
if (!resp.ok) {
|
|
693
|
+
const body = await resp.text().catch(() => resp.statusText);
|
|
694
|
+
throw new Error(`STT token request failed: ${resp.status} ${body}`);
|
|
695
|
+
}
|
|
696
|
+
const json = await resp.json();
|
|
697
|
+
return json.token;
|
|
698
|
+
}
|
|
699
|
+
throw new Error(
|
|
700
|
+
"ElevenLabsSTTAdapter requires either tokenUrl or apiKey"
|
|
701
|
+
);
|
|
702
|
+
}
|
|
703
|
+
};
|
|
704
|
+
|
|
705
|
+
// src/core/mic-capture.ts
|
|
706
|
+
var MicCapture = class {
|
|
707
|
+
audioContext = null;
|
|
708
|
+
source = null;
|
|
709
|
+
workletNode = null;
|
|
710
|
+
stream = null;
|
|
711
|
+
abortController = null;
|
|
712
|
+
/**
|
|
713
|
+
* Request mic access, load the AudioWorklet, and start streaming.
|
|
714
|
+
* Resolves once the worklet is connected and producing buffers.
|
|
715
|
+
*/
|
|
716
|
+
async start(opts = {}) {
|
|
717
|
+
const {
|
|
718
|
+
workletUrl = "/audio/microphone-worklet.js",
|
|
719
|
+
sampleRate = 16e3,
|
|
720
|
+
bufferSize = 4096,
|
|
721
|
+
onBuffer,
|
|
722
|
+
onVolume
|
|
723
|
+
} = opts;
|
|
724
|
+
this.stop();
|
|
725
|
+
const ac = new AbortController();
|
|
726
|
+
this.abortController = ac;
|
|
727
|
+
const micStream = await navigator.mediaDevices.getUserMedia({
|
|
728
|
+
audio: {
|
|
729
|
+
autoGainControl: true,
|
|
730
|
+
noiseSuppression: true,
|
|
731
|
+
echoCancellation: true
|
|
732
|
+
}
|
|
733
|
+
});
|
|
734
|
+
if (ac.signal.aborted) {
|
|
735
|
+
micStream.getTracks().forEach((t) => t.stop());
|
|
736
|
+
throw new DOMException("Aborted", "AbortError");
|
|
737
|
+
}
|
|
738
|
+
this.stream = micStream;
|
|
739
|
+
const audioContext = new AudioContext();
|
|
740
|
+
this.audioContext = audioContext;
|
|
741
|
+
await audioContext.audioWorklet.addModule(workletUrl);
|
|
742
|
+
if (ac.signal.aborted) {
|
|
743
|
+
audioContext.close().catch(() => {
|
|
744
|
+
});
|
|
745
|
+
micStream.getTracks().forEach((t) => t.stop());
|
|
746
|
+
throw new DOMException("Aborted", "AbortError");
|
|
747
|
+
}
|
|
748
|
+
const source = audioContext.createMediaStreamSource(micStream);
|
|
749
|
+
this.source = source;
|
|
750
|
+
const workletNode = new AudioWorkletNode(audioContext, "volume-processor");
|
|
751
|
+
this.workletNode = workletNode;
|
|
752
|
+
workletNode.port.postMessage(
|
|
753
|
+
JSON.stringify({
|
|
754
|
+
method: "options",
|
|
755
|
+
args: {
|
|
756
|
+
sampleRate,
|
|
757
|
+
bufferSize,
|
|
758
|
+
muted: true,
|
|
759
|
+
emitVolume: !!onVolume,
|
|
760
|
+
emitBuffer: !!onBuffer
|
|
761
|
+
}
|
|
762
|
+
})
|
|
763
|
+
);
|
|
764
|
+
workletNode.port.onmessage = (e) => {
|
|
765
|
+
if (ac.signal.aborted) return;
|
|
766
|
+
if (e.data.method === "volume") {
|
|
767
|
+
onVolume?.(e.data.data);
|
|
768
|
+
} else if (e.data.method === "buffer") {
|
|
769
|
+
onBuffer?.(e.data.data);
|
|
770
|
+
}
|
|
771
|
+
};
|
|
772
|
+
source.connect(workletNode);
|
|
773
|
+
workletNode.connect(audioContext.destination);
|
|
774
|
+
return micStream;
|
|
775
|
+
}
|
|
776
|
+
/** Stop mic capture and release all resources. */
|
|
777
|
+
stop() {
|
|
778
|
+
if (this.abortController) {
|
|
779
|
+
this.abortController.abort();
|
|
780
|
+
this.abortController = null;
|
|
781
|
+
}
|
|
782
|
+
if (this.workletNode) {
|
|
783
|
+
this.workletNode.port.onmessage = null;
|
|
784
|
+
this.workletNode.disconnect();
|
|
785
|
+
this.workletNode = null;
|
|
786
|
+
}
|
|
787
|
+
if (this.source) {
|
|
788
|
+
this.source.disconnect();
|
|
789
|
+
this.source = null;
|
|
790
|
+
}
|
|
791
|
+
if (this.stream) {
|
|
792
|
+
this.stream.getTracks().forEach((t) => t.stop());
|
|
793
|
+
this.stream = null;
|
|
794
|
+
}
|
|
795
|
+
if (this.audioContext) {
|
|
796
|
+
this.audioContext.close().catch(() => {
|
|
797
|
+
});
|
|
798
|
+
this.audioContext = null;
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
/** Stop just the mic tracks (audio input) while keeping the worklet alive for drain. */
|
|
802
|
+
stopMicTracks() {
|
|
803
|
+
if (this.stream) {
|
|
804
|
+
this.stream.getTracks().forEach((t) => t.stop());
|
|
805
|
+
}
|
|
806
|
+
}
|
|
807
|
+
};
|
|
808
|
+
|
|
809
|
+
// src/adapters/avatar/lemonslice.ts
|
|
810
|
+
var AUDIO_STREAM_TOPIC = "lk.audio_stream";
|
|
811
|
+
var RPC_CLEAR_BUFFER = "lk.clear_buffer";
|
|
812
|
+
var RPC_PLAYBACK_FINISHED = "lk.playback_finished";
|
|
813
|
+
var PCM_FRAME_MS = 100;
|
|
814
|
+
var PCM_BYTES_PER_SAMPLE2 = 2;
|
|
815
|
+
var CONNECT_TIMEOUT_MS = 45e3;
|
|
816
|
+
var LemonSliceRenderer = class {
|
|
817
|
+
id = "lemonslice";
|
|
818
|
+
type = "remote";
|
|
819
|
+
opts;
|
|
820
|
+
room = null;
|
|
821
|
+
avatarIdentity = null;
|
|
822
|
+
audioStreamWriter = null;
|
|
823
|
+
clearBufferSupported = true;
|
|
824
|
+
playbackFinishedRegistered = false;
|
|
825
|
+
connected = false;
|
|
826
|
+
awaitingVideo = false;
|
|
827
|
+
connectTimer = null;
|
|
828
|
+
writeQueue = Promise.resolve();
|
|
829
|
+
container = null;
|
|
830
|
+
videoEl = null;
|
|
831
|
+
constructor(opts) {
|
|
832
|
+
this.opts = opts;
|
|
833
|
+
}
|
|
834
|
+
async mount(container) {
|
|
835
|
+
this.container = container;
|
|
836
|
+
const lk = await import("livekit-client");
|
|
837
|
+
const session = await this.opts.createSession();
|
|
838
|
+
this.avatarIdentity = session.avatarIdentity;
|
|
839
|
+
this.awaitingVideo = true;
|
|
840
|
+
const room = new lk.Room({ adaptiveStream: true, dynacast: true });
|
|
841
|
+
room.on(lk.RoomEvent.TrackSubscribed, (track) => {
|
|
842
|
+
if (track.kind === lk.Track.Kind.Video) {
|
|
843
|
+
const mt = track.mediaStreamTrack;
|
|
844
|
+
if (mt) {
|
|
845
|
+
if (this.awaitingVideo) {
|
|
846
|
+
this.awaitingVideo = false;
|
|
847
|
+
this.connected = true;
|
|
848
|
+
this.clearConnectTimeout();
|
|
849
|
+
this.opts.onStateChange?.("connected");
|
|
850
|
+
}
|
|
851
|
+
const stream = new MediaStream([mt]);
|
|
852
|
+
this.opts.onVideoStream?.(stream);
|
|
853
|
+
this.attachVideo(stream);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
if (track.kind === lk.Track.Kind.Audio) {
|
|
857
|
+
const mt = track.mediaStreamTrack;
|
|
858
|
+
if (mt) this.opts.onAudioStream?.(new MediaStream([mt]));
|
|
859
|
+
}
|
|
860
|
+
});
|
|
861
|
+
room.on(lk.RoomEvent.TrackUnsubscribed, (track) => {
|
|
862
|
+
if (track.kind === lk.Track.Kind.Video)
|
|
863
|
+
this.opts.onVideoStream?.(null);
|
|
864
|
+
if (track.kind === lk.Track.Kind.Audio)
|
|
865
|
+
this.opts.onAudioStream?.(null);
|
|
866
|
+
track.detach();
|
|
867
|
+
});
|
|
868
|
+
room.on(lk.RoomEvent.ParticipantConnected, (p) => {
|
|
869
|
+
if (p.identity.startsWith("avatar-") || p.identity.startsWith("lemonslice-")) {
|
|
870
|
+
this.avatarIdentity = p.identity;
|
|
871
|
+
}
|
|
872
|
+
});
|
|
873
|
+
room.on(lk.RoomEvent.Disconnected, () => {
|
|
874
|
+
this.connected = false;
|
|
875
|
+
this.opts.onStateChange?.("disconnected");
|
|
876
|
+
this.opts.onVideoStream?.(null);
|
|
877
|
+
this.opts.onAudioStream?.(null);
|
|
878
|
+
});
|
|
879
|
+
this.opts.onStateChange?.("connecting");
|
|
880
|
+
await room.connect(session.livekitUrl, session.livekitToken);
|
|
881
|
+
this.room = room;
|
|
882
|
+
try {
|
|
883
|
+
room.registerRpcMethod(RPC_PLAYBACK_FINISHED, async () => "ok");
|
|
884
|
+
this.playbackFinishedRegistered = true;
|
|
885
|
+
} catch {
|
|
886
|
+
}
|
|
887
|
+
this.startConnectTimeout();
|
|
888
|
+
}
|
|
889
|
+
update(_control) {
|
|
890
|
+
}
|
|
891
|
+
async speak(audio) {
|
|
892
|
+
const next = this.writeQueue.then(() => this.writeSpeechBlob(audio));
|
|
893
|
+
this.writeQueue = next.catch(() => {
|
|
894
|
+
});
|
|
895
|
+
return next;
|
|
896
|
+
}
|
|
897
|
+
interrupt() {
|
|
898
|
+
void this.closeAudioStream();
|
|
899
|
+
void this.clearAvatarBuffer();
|
|
900
|
+
}
|
|
901
|
+
unmount() {
|
|
902
|
+
this.clearConnectTimeout();
|
|
903
|
+
void this.closeAudioStream();
|
|
904
|
+
if (this.room) {
|
|
905
|
+
if (this.playbackFinishedRegistered) {
|
|
906
|
+
this.room.unregisterRpcMethod(RPC_PLAYBACK_FINISHED);
|
|
907
|
+
this.playbackFinishedRegistered = false;
|
|
908
|
+
}
|
|
909
|
+
this.room.disconnect();
|
|
910
|
+
this.room = null;
|
|
911
|
+
}
|
|
912
|
+
this.connected = false;
|
|
913
|
+
this.awaitingVideo = false;
|
|
914
|
+
this.avatarIdentity = null;
|
|
915
|
+
if (this.videoEl) {
|
|
916
|
+
this.videoEl.remove();
|
|
917
|
+
this.videoEl = null;
|
|
918
|
+
}
|
|
919
|
+
this.opts.onVideoStream?.(null);
|
|
920
|
+
this.opts.onAudioStream?.(null);
|
|
921
|
+
this.opts.onStateChange?.("disconnected");
|
|
922
|
+
}
|
|
923
|
+
// -- private ---
|
|
924
|
+
attachVideo(stream) {
|
|
925
|
+
if (!this.container) return;
|
|
926
|
+
if (!this.videoEl) {
|
|
927
|
+
this.videoEl = document.createElement("video");
|
|
928
|
+
this.videoEl.autoplay = true;
|
|
929
|
+
this.videoEl.playsInline = true;
|
|
930
|
+
this.videoEl.muted = false;
|
|
931
|
+
this.videoEl.style.width = "100%";
|
|
932
|
+
this.videoEl.style.height = "100%";
|
|
933
|
+
this.videoEl.style.objectFit = "contain";
|
|
934
|
+
this.container.appendChild(this.videoEl);
|
|
935
|
+
}
|
|
936
|
+
this.videoEl.srcObject = stream;
|
|
937
|
+
}
|
|
938
|
+
clearConnectTimeout() {
|
|
939
|
+
if (this.connectTimer) {
|
|
940
|
+
clearTimeout(this.connectTimer);
|
|
941
|
+
this.connectTimer = null;
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
startConnectTimeout() {
|
|
945
|
+
this.clearConnectTimeout();
|
|
946
|
+
this.connectTimer = setTimeout(() => {
|
|
947
|
+
if (this.awaitingVideo) {
|
|
948
|
+
this.opts.onStateChange?.(
|
|
949
|
+
"error",
|
|
950
|
+
"Timed out waiting for avatar video"
|
|
951
|
+
);
|
|
952
|
+
}
|
|
953
|
+
}, CONNECT_TIMEOUT_MS);
|
|
954
|
+
}
|
|
955
|
+
async clearAvatarBuffer() {
|
|
956
|
+
if (!this.room || !this.clearBufferSupported || !this.avatarIdentity) return;
|
|
957
|
+
try {
|
|
958
|
+
await this.room.localParticipant.performRpc({
|
|
959
|
+
destinationIdentity: this.avatarIdentity,
|
|
960
|
+
method: RPC_CLEAR_BUFFER,
|
|
961
|
+
payload: ""
|
|
962
|
+
});
|
|
963
|
+
} catch (err) {
|
|
964
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
965
|
+
if (msg.includes("Method not supported")) this.clearBufferSupported = false;
|
|
966
|
+
}
|
|
967
|
+
}
|
|
968
|
+
async closeAudioStream() {
|
|
969
|
+
const writer = this.audioStreamWriter;
|
|
970
|
+
this.audioStreamWriter = null;
|
|
971
|
+
if (writer) {
|
|
972
|
+
try {
|
|
973
|
+
await writer.close();
|
|
974
|
+
} catch {
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
}
|
|
978
|
+
async writeSpeechBlob(audioBlob) {
|
|
979
|
+
if (!this.room || !this.connected) throw new Error("Room not connected");
|
|
980
|
+
if (!this.avatarIdentity) throw new Error("Avatar participant not ready");
|
|
981
|
+
await this.closeAudioStream();
|
|
982
|
+
await this.clearAvatarBuffer();
|
|
983
|
+
const { pcmBytes, sampleRate, numChannels } = await decodeToPcm(audioBlob);
|
|
984
|
+
await this.streamPcm(pcmBytes, sampleRate, numChannels);
|
|
985
|
+
await this.closeAudioStream();
|
|
986
|
+
}
|
|
987
|
+
async streamPcm(pcmBytes, sampleRate, numChannels) {
|
|
988
|
+
if (!this.room || !this.avatarIdentity) return;
|
|
989
|
+
const segmentId = crypto.randomUUID().slice(0, 8);
|
|
990
|
+
const writer = this.audioStreamWriter ?? await this.room.localParticipant.streamBytes({
|
|
991
|
+
name: `AUDIO_${segmentId}`,
|
|
992
|
+
topic: AUDIO_STREAM_TOPIC,
|
|
993
|
+
destinationIdentities: [this.avatarIdentity],
|
|
994
|
+
attributes: {
|
|
995
|
+
sample_rate: String(sampleRate),
|
|
996
|
+
num_channels: String(numChannels)
|
|
997
|
+
}
|
|
998
|
+
});
|
|
999
|
+
this.audioStreamWriter = writer;
|
|
1000
|
+
const bytesPerSecond = sampleRate * numChannels * PCM_BYTES_PER_SAMPLE2;
|
|
1001
|
+
const chunkBytes = Math.max(1024, Math.floor(bytesPerSecond * PCM_FRAME_MS / 1e3));
|
|
1002
|
+
let offset = 0;
|
|
1003
|
+
while (offset < pcmBytes.byteLength) {
|
|
1004
|
+
const next = Math.min(offset + chunkBytes, pcmBytes.byteLength);
|
|
1005
|
+
const chunk = pcmBytes.subarray(offset, next);
|
|
1006
|
+
await writer.write(chunk);
|
|
1007
|
+
offset = next;
|
|
1008
|
+
const durationMs = Math.max(0, Math.round(chunk.byteLength / bytesPerSecond * 1e3));
|
|
1009
|
+
if (durationMs > 0) {
|
|
1010
|
+
await new Promise((r) => setTimeout(r, durationMs));
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
};
|
|
1015
|
+
|
|
1016
|
+
// src/adapters/avatar/atlas.ts
|
|
1017
|
+
var CONNECT_TIMEOUT_MS2 = 45e3;
|
|
1018
|
+
var AtlasRenderer = class {
|
|
1019
|
+
id = "atlas";
|
|
1020
|
+
type = "remote";
|
|
1021
|
+
opts;
|
|
1022
|
+
room = null;
|
|
1023
|
+
sessionId = null;
|
|
1024
|
+
connected = false;
|
|
1025
|
+
awaitingVideo = false;
|
|
1026
|
+
connectTimer = null;
|
|
1027
|
+
container = null;
|
|
1028
|
+
videoEl = null;
|
|
1029
|
+
audioCtx = null;
|
|
1030
|
+
audioDest = null;
|
|
1031
|
+
currentSource = null;
|
|
1032
|
+
speakQueue = Promise.resolve();
|
|
1033
|
+
constructor(opts) {
|
|
1034
|
+
this.opts = opts;
|
|
1035
|
+
}
|
|
1036
|
+
async mount(container) {
|
|
1037
|
+
this.container = container;
|
|
1038
|
+
const lk = await import("livekit-client");
|
|
1039
|
+
const session = await this.opts.createSession();
|
|
1040
|
+
this.sessionId = session.sessionId;
|
|
1041
|
+
this.awaitingVideo = true;
|
|
1042
|
+
this.audioCtx = new AudioContext();
|
|
1043
|
+
this.audioDest = this.audioCtx.createMediaStreamDestination();
|
|
1044
|
+
const room = new lk.Room({ adaptiveStream: true, dynacast: true });
|
|
1045
|
+
room.on(lk.RoomEvent.TrackSubscribed, (track) => {
|
|
1046
|
+
if (track.kind === lk.Track.Kind.Video) {
|
|
1047
|
+
const mt = track.mediaStreamTrack;
|
|
1048
|
+
if (mt) {
|
|
1049
|
+
if (this.awaitingVideo) {
|
|
1050
|
+
this.awaitingVideo = false;
|
|
1051
|
+
this.connected = true;
|
|
1052
|
+
this.clearConnectTimeout();
|
|
1053
|
+
this.opts.onStateChange?.("connected");
|
|
1054
|
+
}
|
|
1055
|
+
const stream = new MediaStream([mt]);
|
|
1056
|
+
this.opts.onVideoStream?.(stream);
|
|
1057
|
+
this.attachVideo(stream);
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
if (track.kind === lk.Track.Kind.Audio) {
|
|
1061
|
+
const mt = track.mediaStreamTrack;
|
|
1062
|
+
if (mt) this.opts.onAudioStream?.(new MediaStream([mt]));
|
|
1063
|
+
}
|
|
1064
|
+
});
|
|
1065
|
+
room.on(lk.RoomEvent.TrackUnsubscribed, (track) => {
|
|
1066
|
+
if (track.kind === lk.Track.Kind.Video)
|
|
1067
|
+
this.opts.onVideoStream?.(null);
|
|
1068
|
+
if (track.kind === lk.Track.Kind.Audio)
|
|
1069
|
+
this.opts.onAudioStream?.(null);
|
|
1070
|
+
track.detach();
|
|
1071
|
+
});
|
|
1072
|
+
room.on(lk.RoomEvent.Disconnected, () => {
|
|
1073
|
+
this.connected = false;
|
|
1074
|
+
this.opts.onStateChange?.("disconnected");
|
|
1075
|
+
this.opts.onVideoStream?.(null);
|
|
1076
|
+
this.opts.onAudioStream?.(null);
|
|
1077
|
+
});
|
|
1078
|
+
this.opts.onStateChange?.("connecting");
|
|
1079
|
+
await room.connect(session.livekitUrl, session.token);
|
|
1080
|
+
this.room = room;
|
|
1081
|
+
const audioTrack = this.audioDest.stream.getAudioTracks()[0];
|
|
1082
|
+
if (audioTrack) {
|
|
1083
|
+
await room.localParticipant.publishTrack(audioTrack, {
|
|
1084
|
+
source: lk.Track.Source.Microphone
|
|
1085
|
+
});
|
|
1086
|
+
}
|
|
1087
|
+
this.startConnectTimeout();
|
|
1088
|
+
}
|
|
1089
|
+
update(_control) {
|
|
1090
|
+
}
|
|
1091
|
+
async speak(audio) {
|
|
1092
|
+
const next = this.speakQueue.then(() => this.playSpeechBlob(audio));
|
|
1093
|
+
this.speakQueue = next.catch(() => {
|
|
1094
|
+
});
|
|
1095
|
+
return next;
|
|
1096
|
+
}
|
|
1097
|
+
interrupt() {
|
|
1098
|
+
if (this.currentSource) {
|
|
1099
|
+
try {
|
|
1100
|
+
this.currentSource.stop();
|
|
1101
|
+
} catch {
|
|
1102
|
+
}
|
|
1103
|
+
this.currentSource = null;
|
|
1104
|
+
}
|
|
1105
|
+
}
|
|
1106
|
+
unmount() {
|
|
1107
|
+
this.clearConnectTimeout();
|
|
1108
|
+
this.interrupt();
|
|
1109
|
+
if (this.room) {
|
|
1110
|
+
this.room.disconnect();
|
|
1111
|
+
this.room = null;
|
|
1112
|
+
}
|
|
1113
|
+
if (this.audioCtx) {
|
|
1114
|
+
void this.audioCtx.close().catch(() => {
|
|
1115
|
+
});
|
|
1116
|
+
this.audioCtx = null;
|
|
1117
|
+
this.audioDest = null;
|
|
1118
|
+
}
|
|
1119
|
+
this.connected = false;
|
|
1120
|
+
this.awaitingVideo = false;
|
|
1121
|
+
if (this.videoEl) {
|
|
1122
|
+
this.videoEl.remove();
|
|
1123
|
+
this.videoEl = null;
|
|
1124
|
+
}
|
|
1125
|
+
this.opts.onVideoStream?.(null);
|
|
1126
|
+
this.opts.onAudioStream?.(null);
|
|
1127
|
+
this.opts.onStateChange?.("disconnected");
|
|
1128
|
+
const id = this.sessionId;
|
|
1129
|
+
this.sessionId = null;
|
|
1130
|
+
if (id && this.opts.deleteSession) {
|
|
1131
|
+
void this.opts.deleteSession(id).catch(() => {
|
|
1132
|
+
});
|
|
1133
|
+
}
|
|
1134
|
+
}
|
|
1135
|
+
// -- private ---
|
|
1136
|
+
attachVideo(stream) {
|
|
1137
|
+
if (!this.container) return;
|
|
1138
|
+
if (!this.videoEl) {
|
|
1139
|
+
this.videoEl = document.createElement("video");
|
|
1140
|
+
this.videoEl.autoplay = true;
|
|
1141
|
+
this.videoEl.playsInline = true;
|
|
1142
|
+
this.videoEl.muted = false;
|
|
1143
|
+
this.videoEl.style.width = "100%";
|
|
1144
|
+
this.videoEl.style.height = "100%";
|
|
1145
|
+
this.videoEl.style.objectFit = "contain";
|
|
1146
|
+
this.container.appendChild(this.videoEl);
|
|
1147
|
+
}
|
|
1148
|
+
this.videoEl.srcObject = stream;
|
|
1149
|
+
}
|
|
1150
|
+
clearConnectTimeout() {
|
|
1151
|
+
if (this.connectTimer) {
|
|
1152
|
+
clearTimeout(this.connectTimer);
|
|
1153
|
+
this.connectTimer = null;
|
|
1154
|
+
}
|
|
1155
|
+
}
|
|
1156
|
+
startConnectTimeout() {
|
|
1157
|
+
this.clearConnectTimeout();
|
|
1158
|
+
this.connectTimer = setTimeout(() => {
|
|
1159
|
+
if (this.awaitingVideo) {
|
|
1160
|
+
this.opts.onStateChange?.(
|
|
1161
|
+
"error",
|
|
1162
|
+
"Timed out waiting for avatar video"
|
|
1163
|
+
);
|
|
1164
|
+
}
|
|
1165
|
+
}, CONNECT_TIMEOUT_MS2);
|
|
1166
|
+
}
|
|
1167
|
+
async playSpeechBlob(audioBlob) {
|
|
1168
|
+
if (!this.audioCtx || !this.audioDest || !this.connected) {
|
|
1169
|
+
throw new Error("Atlas session not connected");
|
|
1170
|
+
}
|
|
1171
|
+
if (this.audioCtx.state === "suspended") {
|
|
1172
|
+
await this.audioCtx.resume();
|
|
1173
|
+
}
|
|
1174
|
+
const arrayBuffer = await audioBlob.arrayBuffer();
|
|
1175
|
+
const audioBuffer = await this.audioCtx.decodeAudioData(arrayBuffer);
|
|
1176
|
+
const source = this.audioCtx.createBufferSource();
|
|
1177
|
+
source.buffer = audioBuffer;
|
|
1178
|
+
source.connect(this.audioDest);
|
|
1179
|
+
this.currentSource = source;
|
|
1180
|
+
return new Promise((resolve) => {
|
|
1181
|
+
source.onended = () => {
|
|
1182
|
+
if (this.currentSource === source) {
|
|
1183
|
+
this.currentSource = null;
|
|
1184
|
+
}
|
|
1185
|
+
resolve();
|
|
1186
|
+
};
|
|
1187
|
+
source.start();
|
|
1188
|
+
});
|
|
1189
|
+
}
|
|
1190
|
+
};
|
|
1191
|
+
|
|
1192
|
+
// src/adapters/avatar/heygen.ts
|
|
1193
|
+
var CONNECT_TIMEOUT_MS3 = 45e3;
|
|
1194
|
+
var WORDS_PER_MS = 150 / 6e4;
|
|
1195
|
+
var MIN_SPEAK_MS = 800;
|
|
1196
|
+
var HeyGenRenderer = class {
|
|
1197
|
+
id = "heygen";
|
|
1198
|
+
type = "remote";
|
|
1199
|
+
opts;
|
|
1200
|
+
room = null;
|
|
1201
|
+
sessionId = null;
|
|
1202
|
+
connected = false;
|
|
1203
|
+
awaitingVideo = false;
|
|
1204
|
+
connectTimer = null;
|
|
1205
|
+
container = null;
|
|
1206
|
+
videoEl = null;
|
|
1207
|
+
speakQueue = Promise.resolve();
|
|
1208
|
+
constructor(opts) {
|
|
1209
|
+
this.opts = opts;
|
|
1210
|
+
}
|
|
1211
|
+
async mount(container) {
|
|
1212
|
+
this.container = container;
|
|
1213
|
+
const lk = await import("livekit-client");
|
|
1214
|
+
const session = await this.opts.createSession();
|
|
1215
|
+
this.sessionId = session.sessionId;
|
|
1216
|
+
this.awaitingVideo = true;
|
|
1217
|
+
const room = new lk.Room({ adaptiveStream: true, dynacast: true });
|
|
1218
|
+
room.on(lk.RoomEvent.TrackSubscribed, (track) => {
|
|
1219
|
+
if (track.kind === lk.Track.Kind.Video) {
|
|
1220
|
+
const mt = track.mediaStreamTrack;
|
|
1221
|
+
if (mt) {
|
|
1222
|
+
if (this.awaitingVideo) {
|
|
1223
|
+
this.awaitingVideo = false;
|
|
1224
|
+
this.connected = true;
|
|
1225
|
+
this.clearConnectTimeout();
|
|
1226
|
+
this.opts.onStateChange?.("connected");
|
|
1227
|
+
}
|
|
1228
|
+
const stream = new MediaStream([mt]);
|
|
1229
|
+
this.opts.onVideoStream?.(stream);
|
|
1230
|
+
this.attachVideo(stream);
|
|
1231
|
+
}
|
|
1232
|
+
}
|
|
1233
|
+
if (track.kind === lk.Track.Kind.Audio) {
|
|
1234
|
+
const mt = track.mediaStreamTrack;
|
|
1235
|
+
if (mt) {
|
|
1236
|
+
const stream = new MediaStream([mt]);
|
|
1237
|
+
this.opts.onAudioStream?.(stream);
|
|
1238
|
+
this.attachAudio(stream);
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
});
|
|
1242
|
+
room.on(lk.RoomEvent.TrackUnsubscribed, (track) => {
|
|
1243
|
+
if (track.kind === lk.Track.Kind.Video)
|
|
1244
|
+
this.opts.onVideoStream?.(null);
|
|
1245
|
+
if (track.kind === lk.Track.Kind.Audio)
|
|
1246
|
+
this.opts.onAudioStream?.(null);
|
|
1247
|
+
track.detach();
|
|
1248
|
+
});
|
|
1249
|
+
room.on(lk.RoomEvent.Disconnected, () => {
|
|
1250
|
+
this.connected = false;
|
|
1251
|
+
this.opts.onStateChange?.("disconnected");
|
|
1252
|
+
this.opts.onVideoStream?.(null);
|
|
1253
|
+
this.opts.onAudioStream?.(null);
|
|
1254
|
+
});
|
|
1255
|
+
this.opts.onStateChange?.("connecting");
|
|
1256
|
+
await room.connect(session.livekitUrl, session.livekitToken);
|
|
1257
|
+
this.room = room;
|
|
1258
|
+
this.startConnectTimeout();
|
|
1259
|
+
}
|
|
1260
|
+
update(_control) {
|
|
1261
|
+
}
|
|
1262
|
+
/**
|
|
1263
|
+
* Text-driven speech: sends text to HeyGen's streaming.task endpoint.
|
|
1264
|
+
* Resolves when the estimated speech duration elapses.
|
|
1265
|
+
*/
|
|
1266
|
+
async speakText(text, signal) {
|
|
1267
|
+
const next = this.speakQueue.then(
|
|
1268
|
+
() => this.doSpeakText(text, signal)
|
|
1269
|
+
);
|
|
1270
|
+
this.speakQueue = next.catch(() => {
|
|
1271
|
+
});
|
|
1272
|
+
return next;
|
|
1273
|
+
}
|
|
1274
|
+
/** Audio-based speak — no-op for HeyGen (TTS is handled internally). */
|
|
1275
|
+
async speak(_audio) {
|
|
1276
|
+
}
|
|
1277
|
+
interrupt() {
|
|
1278
|
+
this.speakQueue = Promise.resolve();
|
|
1279
|
+
const id = this.sessionId;
|
|
1280
|
+
if (id && this.opts.interruptSession) {
|
|
1281
|
+
void this.opts.interruptSession(id).catch(() => {
|
|
1282
|
+
});
|
|
1283
|
+
}
|
|
1284
|
+
}
|
|
1285
|
+
unmount() {
|
|
1286
|
+
this.clearConnectTimeout();
|
|
1287
|
+
this.interrupt();
|
|
1288
|
+
if (this.room) {
|
|
1289
|
+
this.room.disconnect();
|
|
1290
|
+
this.room = null;
|
|
1291
|
+
}
|
|
1292
|
+
this.connected = false;
|
|
1293
|
+
this.awaitingVideo = false;
|
|
1294
|
+
if (this.videoEl) {
|
|
1295
|
+
this.videoEl.remove();
|
|
1296
|
+
this.videoEl = null;
|
|
1297
|
+
}
|
|
1298
|
+
this.opts.onVideoStream?.(null);
|
|
1299
|
+
this.opts.onAudioStream?.(null);
|
|
1300
|
+
this.opts.onStateChange?.("disconnected");
|
|
1301
|
+
const id = this.sessionId;
|
|
1302
|
+
this.sessionId = null;
|
|
1303
|
+
if (id && this.opts.closeSession) {
|
|
1304
|
+
void this.opts.closeSession(id).catch(() => {
|
|
1305
|
+
});
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
// -- private ---
|
|
1309
|
+
async doSpeakText(text, signal) {
|
|
1310
|
+
if (!this.sessionId || !this.connected) {
|
|
1311
|
+
throw new Error("HeyGen session not connected");
|
|
1312
|
+
}
|
|
1313
|
+
if (signal?.aborted) return;
|
|
1314
|
+
const result = await this.opts.sendTask(this.sessionId, text);
|
|
1315
|
+
if (signal?.aborted) return;
|
|
1316
|
+
const waitMs = result.durationMs ?? Math.max(text.split(/\s+/).length / WORDS_PER_MS, MIN_SPEAK_MS);
|
|
1317
|
+
await new Promise((resolve) => {
|
|
1318
|
+
const timer = setTimeout(resolve, waitMs);
|
|
1319
|
+
if (signal) {
|
|
1320
|
+
const onAbort = () => {
|
|
1321
|
+
clearTimeout(timer);
|
|
1322
|
+
resolve();
|
|
1323
|
+
};
|
|
1324
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
1325
|
+
}
|
|
1326
|
+
});
|
|
1327
|
+
}
|
|
1328
|
+
attachVideo(stream) {
|
|
1329
|
+
if (!this.container) return;
|
|
1330
|
+
if (!this.videoEl) {
|
|
1331
|
+
this.videoEl = document.createElement("video");
|
|
1332
|
+
this.videoEl.autoplay = true;
|
|
1333
|
+
this.videoEl.playsInline = true;
|
|
1334
|
+
this.videoEl.muted = false;
|
|
1335
|
+
this.videoEl.style.width = "100%";
|
|
1336
|
+
this.videoEl.style.height = "100%";
|
|
1337
|
+
this.videoEl.style.objectFit = "contain";
|
|
1338
|
+
this.container.appendChild(this.videoEl);
|
|
1339
|
+
}
|
|
1340
|
+
this.videoEl.srcObject = stream;
|
|
1341
|
+
}
|
|
1342
|
+
attachAudio(stream) {
|
|
1343
|
+
if (!this.container) return;
|
|
1344
|
+
let audioEl = this.container.querySelector(
|
|
1345
|
+
"audio[data-heygen-audio]"
|
|
1346
|
+
);
|
|
1347
|
+
if (!audioEl) {
|
|
1348
|
+
audioEl = document.createElement("audio");
|
|
1349
|
+
audioEl.setAttribute("data-heygen-audio", "");
|
|
1350
|
+
audioEl.autoplay = true;
|
|
1351
|
+
audioEl.style.display = "none";
|
|
1352
|
+
this.container.appendChild(audioEl);
|
|
1353
|
+
}
|
|
1354
|
+
audioEl.srcObject = stream;
|
|
1355
|
+
}
|
|
1356
|
+
clearConnectTimeout() {
|
|
1357
|
+
if (this.connectTimer) {
|
|
1358
|
+
clearTimeout(this.connectTimer);
|
|
1359
|
+
this.connectTimer = null;
|
|
1360
|
+
}
|
|
1361
|
+
}
|
|
1362
|
+
startConnectTimeout() {
|
|
1363
|
+
this.clearConnectTimeout();
|
|
1364
|
+
this.connectTimer = setTimeout(() => {
|
|
1365
|
+
if (this.awaitingVideo) {
|
|
1366
|
+
this.opts.onStateChange?.(
|
|
1367
|
+
"error",
|
|
1368
|
+
"Timed out waiting for HeyGen avatar video"
|
|
1369
|
+
);
|
|
1370
|
+
}
|
|
1371
|
+
}, CONNECT_TIMEOUT_MS3);
|
|
1372
|
+
}
|
|
1373
|
+
};
|
|
1374
|
+
|
|
1375
|
+
// src/renderers/lip-sync.ts
|
|
1376
|
+
var SMOOTHING = 0.35;
|
|
1377
|
+
var LipSyncEngine = class {
|
|
1378
|
+
analyser = null;
|
|
1379
|
+
ctx = null;
|
|
1380
|
+
source = null;
|
|
1381
|
+
dataArray = null;
|
|
1382
|
+
currentValue = 0;
|
|
1383
|
+
/** Attach to an <audio> element and start analysing. */
|
|
1384
|
+
attach(audio, audioContext) {
|
|
1385
|
+
this.detach();
|
|
1386
|
+
this.ctx = audioContext;
|
|
1387
|
+
this.analyser = audioContext.createAnalyser();
|
|
1388
|
+
this.analyser.fftSize = 256;
|
|
1389
|
+
this.analyser.smoothingTimeConstant = SMOOTHING;
|
|
1390
|
+
this.source = audioContext.createMediaElementSource(audio);
|
|
1391
|
+
this.source.connect(this.analyser);
|
|
1392
|
+
this.analyser.connect(audioContext.destination);
|
|
1393
|
+
this.dataArray = new Uint8Array(this.analyser.frequencyBinCount);
|
|
1394
|
+
}
|
|
1395
|
+
/** Detach and free resources. */
|
|
1396
|
+
detach() {
|
|
1397
|
+
if (this.source) {
|
|
1398
|
+
try {
|
|
1399
|
+
this.source.disconnect();
|
|
1400
|
+
} catch {
|
|
1401
|
+
}
|
|
1402
|
+
this.source = null;
|
|
1403
|
+
}
|
|
1404
|
+
if (this.analyser) {
|
|
1405
|
+
try {
|
|
1406
|
+
this.analyser.disconnect();
|
|
1407
|
+
} catch {
|
|
1408
|
+
}
|
|
1409
|
+
this.analyser = null;
|
|
1410
|
+
}
|
|
1411
|
+
this.dataArray = null;
|
|
1412
|
+
this.currentValue = 0;
|
|
1413
|
+
}
|
|
1414
|
+
/** Call on every animation frame to get the current mouth-open value 0..1 */
|
|
1415
|
+
sample() {
|
|
1416
|
+
if (!this.analyser || !this.dataArray) return 0;
|
|
1417
|
+
this.analyser.getByteTimeDomainData(this.dataArray);
|
|
1418
|
+
let sumSq = 0;
|
|
1419
|
+
for (let i = 0; i < this.dataArray.length; i++) {
|
|
1420
|
+
const v = (this.dataArray[i] - 128) / 128;
|
|
1421
|
+
sumSq += v * v;
|
|
1422
|
+
}
|
|
1423
|
+
const rms = Math.sqrt(sumSq / this.dataArray.length);
|
|
1424
|
+
const target = Math.min(1, rms * 4);
|
|
1425
|
+
this.currentValue += (target - this.currentValue) * 0.4;
|
|
1426
|
+
return this.currentValue;
|
|
1427
|
+
}
|
|
1428
|
+
};
|
|
1429
|
+
|
|
1430
|
+
// src/renderers/vrm-renderer.ts
|
|
1431
|
+
var VRMLocalRenderer = class {
|
|
1432
|
+
id = "vrm-local";
|
|
1433
|
+
type = "local";
|
|
1434
|
+
opts;
|
|
1435
|
+
container = null;
|
|
1436
|
+
threeRenderer = null;
|
|
1437
|
+
scene = null;
|
|
1438
|
+
camera = null;
|
|
1439
|
+
clock = null;
|
|
1440
|
+
vrm = null;
|
|
1441
|
+
VRMModule = null;
|
|
1442
|
+
mixer = null;
|
|
1443
|
+
rafId = null;
|
|
1444
|
+
blinkTimer = null;
|
|
1445
|
+
lipSync = new LipSyncEngine();
|
|
1446
|
+
audioContext = null;
|
|
1447
|
+
currentAudio = null;
|
|
1448
|
+
speakResolve = null;
|
|
1449
|
+
resizeObserver = null;
|
|
1450
|
+
constructor(opts) {
|
|
1451
|
+
this.opts = opts;
|
|
1452
|
+
}
|
|
1453
|
+
async mount(container) {
|
|
1454
|
+
this.container = container;
|
|
1455
|
+
const THREE = await import("three");
|
|
1456
|
+
const VRMModule = await import("@pixiv/three-vrm");
|
|
1457
|
+
this.VRMModule = VRMModule;
|
|
1458
|
+
this.scene = new THREE.Scene();
|
|
1459
|
+
this.camera = new THREE.PerspectiveCamera(
|
|
1460
|
+
30,
|
|
1461
|
+
container.clientWidth / container.clientHeight,
|
|
1462
|
+
0.1,
|
|
1463
|
+
20
|
|
1464
|
+
);
|
|
1465
|
+
this.camera.position.set(0, 1.35, 1.5);
|
|
1466
|
+
this.camera.lookAt(0, 1.2, 0);
|
|
1467
|
+
this.threeRenderer = new THREE.WebGLRenderer({
|
|
1468
|
+
alpha: true,
|
|
1469
|
+
antialias: true
|
|
1470
|
+
});
|
|
1471
|
+
this.threeRenderer.setPixelRatio(window.devicePixelRatio);
|
|
1472
|
+
this.threeRenderer.setSize(container.clientWidth, container.clientHeight);
|
|
1473
|
+
this.threeRenderer.outputColorSpace = THREE.SRGBColorSpace;
|
|
1474
|
+
container.appendChild(this.threeRenderer.domElement);
|
|
1475
|
+
const ambient = new THREE.AmbientLight(16777215, 0.6);
|
|
1476
|
+
this.scene.add(ambient);
|
|
1477
|
+
const dir = new THREE.DirectionalLight(16777215, 1);
|
|
1478
|
+
dir.position.set(1, 2, 1);
|
|
1479
|
+
this.scene.add(dir);
|
|
1480
|
+
this.clock = new THREE.Clock();
|
|
1481
|
+
const { GLTFLoader } = await import("three/addons/loaders/GLTFLoader.js");
|
|
1482
|
+
const loader = new GLTFLoader();
|
|
1483
|
+
loader.register((parser) => new VRMModule.VRMLoaderPlugin(parser));
|
|
1484
|
+
await this.loadVRM(this.opts.modelUrl, loader, THREE, VRMModule);
|
|
1485
|
+
this.startBlink();
|
|
1486
|
+
this.startRenderLoop();
|
|
1487
|
+
this.resizeObserver = new ResizeObserver(() => this.handleResize());
|
|
1488
|
+
this.resizeObserver.observe(container);
|
|
1489
|
+
}
|
|
1490
|
+
update(control) {
|
|
1491
|
+
if (!this.vrm || !this.VRMModule) return;
|
|
1492
|
+
const face = control.avatar?.face;
|
|
1493
|
+
if (face?.mouth) {
|
|
1494
|
+
this.vrm.expressionManager?.setValue(
|
|
1495
|
+
this.VRMModule.VRMExpressionPresetName.Aa,
|
|
1496
|
+
face.mouth.jawOpen
|
|
1497
|
+
);
|
|
1498
|
+
}
|
|
1499
|
+
if (face?.eyes) {
|
|
1500
|
+
this.vrm.expressionManager?.setValue(
|
|
1501
|
+
this.VRMModule.VRMExpressionPresetName.Blink,
|
|
1502
|
+
1 - Math.min(face.eyes.blinkL, face.eyes.blinkR)
|
|
1503
|
+
);
|
|
1504
|
+
}
|
|
1505
|
+
const emotion = control.avatar?.emotion;
|
|
1506
|
+
if (emotion?.label) {
|
|
1507
|
+
this.applyExpression(emotion.label, emotion.intensity ?? 0.8);
|
|
1508
|
+
}
|
|
1509
|
+
}
|
|
1510
|
+
async speak(audio) {
|
|
1511
|
+
return new Promise((resolve) => {
|
|
1512
|
+
this.speakResolve = resolve;
|
|
1513
|
+
const url = URL.createObjectURL(audio);
|
|
1514
|
+
const el = new Audio(url);
|
|
1515
|
+
this.currentAudio = el;
|
|
1516
|
+
if (!this.audioContext || this.audioContext.state === "closed") {
|
|
1517
|
+
this.audioContext = new AudioContext();
|
|
1518
|
+
}
|
|
1519
|
+
this.lipSync.attach(el, this.audioContext);
|
|
1520
|
+
el.addEventListener("ended", () => {
|
|
1521
|
+
this.lipSync.detach();
|
|
1522
|
+
URL.revokeObjectURL(url);
|
|
1523
|
+
this.currentAudio = null;
|
|
1524
|
+
this.speakResolve = null;
|
|
1525
|
+
resolve();
|
|
1526
|
+
});
|
|
1527
|
+
el.addEventListener("error", () => {
|
|
1528
|
+
this.lipSync.detach();
|
|
1529
|
+
URL.revokeObjectURL(url);
|
|
1530
|
+
this.currentAudio = null;
|
|
1531
|
+
this.speakResolve = null;
|
|
1532
|
+
resolve();
|
|
1533
|
+
});
|
|
1534
|
+
el.play().catch(() => {
|
|
1535
|
+
this.lipSync.detach();
|
|
1536
|
+
URL.revokeObjectURL(url);
|
|
1537
|
+
this.currentAudio = null;
|
|
1538
|
+
this.speakResolve = null;
|
|
1539
|
+
resolve();
|
|
1540
|
+
});
|
|
1541
|
+
});
|
|
1542
|
+
}
|
|
1543
|
+
interrupt() {
|
|
1544
|
+
if (this.currentAudio) {
|
|
1545
|
+
this.currentAudio.pause();
|
|
1546
|
+
this.currentAudio.removeAttribute("src");
|
|
1547
|
+
this.currentAudio = null;
|
|
1548
|
+
}
|
|
1549
|
+
this.lipSync.detach();
|
|
1550
|
+
if (this.speakResolve) {
|
|
1551
|
+
this.speakResolve();
|
|
1552
|
+
this.speakResolve = null;
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
unmount() {
|
|
1556
|
+
this.interrupt();
|
|
1557
|
+
if (this.rafId != null) cancelAnimationFrame(this.rafId);
|
|
1558
|
+
if (this.blinkTimer != null) clearInterval(this.blinkTimer);
|
|
1559
|
+
this.resizeObserver?.disconnect();
|
|
1560
|
+
this.threeRenderer?.dispose();
|
|
1561
|
+
if (this.threeRenderer?.domElement?.parentElement) {
|
|
1562
|
+
this.threeRenderer.domElement.parentElement.removeChild(
|
|
1563
|
+
this.threeRenderer.domElement
|
|
1564
|
+
);
|
|
1565
|
+
}
|
|
1566
|
+
if (this.audioContext && this.audioContext.state !== "closed") {
|
|
1567
|
+
this.audioContext.close().catch(() => {
|
|
1568
|
+
});
|
|
1569
|
+
}
|
|
1570
|
+
this.vrm = null;
|
|
1571
|
+
this.scene = null;
|
|
1572
|
+
this.camera = null;
|
|
1573
|
+
this.threeRenderer = null;
|
|
1574
|
+
this.container = null;
|
|
1575
|
+
}
|
|
1576
|
+
// -- internals --
|
|
1577
|
+
async loadVRM(url, loader, THREE, VRM) {
|
|
1578
|
+
const gltf = await new Promise((resolve, reject) => {
|
|
1579
|
+
loader.load(url, resolve, void 0, reject);
|
|
1580
|
+
});
|
|
1581
|
+
const vrm = gltf.userData.vrm;
|
|
1582
|
+
if (!vrm) throw new Error("No VRM data found in model");
|
|
1583
|
+
VRM.VRMUtils.removeUnnecessaryJoints(gltf.scene);
|
|
1584
|
+
this.scene.add(gltf.scene);
|
|
1585
|
+
this.vrm = vrm;
|
|
1586
|
+
this.mixer = new THREE.AnimationMixer(gltf.scene);
|
|
1587
|
+
}
|
|
1588
|
+
startBlink() {
|
|
1589
|
+
this.blinkTimer = setInterval(() => {
|
|
1590
|
+
if (!this.vrm || !this.VRMModule) return;
|
|
1591
|
+
const blink = this.VRMModule.VRMExpressionPresetName.Blink;
|
|
1592
|
+
this.vrm.expressionManager?.setValue(blink, 1);
|
|
1593
|
+
setTimeout(() => {
|
|
1594
|
+
this.vrm?.expressionManager?.setValue(blink, 0);
|
|
1595
|
+
}, 120);
|
|
1596
|
+
}, 3e3 + Math.random() * 3e3);
|
|
1597
|
+
}
|
|
1598
|
+
startRenderLoop() {
|
|
1599
|
+
const animate = () => {
|
|
1600
|
+
this.rafId = requestAnimationFrame(animate);
|
|
1601
|
+
const delta = this.clock.getDelta();
|
|
1602
|
+
if (this.vrm) {
|
|
1603
|
+
const mouthOpen = this.lipSync.sample();
|
|
1604
|
+
if (mouthOpen > 0.01 && this.VRMModule) {
|
|
1605
|
+
this.vrm.expressionManager?.setValue(
|
|
1606
|
+
this.VRMModule.VRMExpressionPresetName.Aa,
|
|
1607
|
+
mouthOpen
|
|
1608
|
+
);
|
|
1609
|
+
}
|
|
1610
|
+
this.vrm.update(delta);
|
|
1611
|
+
}
|
|
1612
|
+
this.mixer?.update(delta);
|
|
1613
|
+
this.threeRenderer.render(this.scene, this.camera);
|
|
1614
|
+
};
|
|
1615
|
+
animate();
|
|
1616
|
+
}
|
|
1617
|
+
applyExpression(label, intensity) {
|
|
1618
|
+
if (!this.vrm || !this.VRMModule) return;
|
|
1619
|
+
const presetMap = {
|
|
1620
|
+
happy: this.VRMModule.VRMExpressionPresetName.Happy,
|
|
1621
|
+
sad: this.VRMModule.VRMExpressionPresetName.Sad,
|
|
1622
|
+
angry: this.VRMModule.VRMExpressionPresetName.Angry,
|
|
1623
|
+
surprised: this.VRMModule.VRMExpressionPresetName.Surprised,
|
|
1624
|
+
relaxed: this.VRMModule.VRMExpressionPresetName.Relaxed,
|
|
1625
|
+
neutral: this.VRMModule.VRMExpressionPresetName.Neutral
|
|
1626
|
+
};
|
|
1627
|
+
const preset = presetMap[label.toLowerCase()];
|
|
1628
|
+
if (!preset) return;
|
|
1629
|
+
for (const v of Object.values(presetMap)) {
|
|
1630
|
+
this.vrm.expressionManager?.setValue(v, 0);
|
|
1631
|
+
}
|
|
1632
|
+
this.vrm.expressionManager?.setValue(preset, intensity);
|
|
1633
|
+
}
|
|
1634
|
+
handleResize() {
|
|
1635
|
+
if (!this.container || !this.camera || !this.threeRenderer) return;
|
|
1636
|
+
const w = this.container.clientWidth;
|
|
1637
|
+
const h = this.container.clientHeight;
|
|
1638
|
+
this.camera.aspect = w / h;
|
|
1639
|
+
this.camera.updateProjectionMatrix();
|
|
1640
|
+
this.threeRenderer.setSize(w, h);
|
|
1641
|
+
}
|
|
1642
|
+
};
|
|
1643
|
+
|
|
1644
|
+
// src/renderers/live2d-renderer.ts
|
|
1645
|
+
var CDN_LIVE2D = "https://cdn.jsdelivr.net/gh/nicx-next/live2d-cubism2@1.0.0/live2d.min.js";
|
|
1646
|
+
var CDN_PIXI = "https://cdn.jsdelivr.net/npm/pixi.js@6.5.10/dist/browser/pixi.min.js";
|
|
1647
|
+
var CDN_CUBISM2 = "https://cdn.jsdelivr.net/npm/pixi-live2d-display@0.4.0/dist/cubism2.min.js";
|
|
1648
|
+
var PARAM = {
|
|
1649
|
+
ANGLE_X: "PARAM_ANGLE_X",
|
|
1650
|
+
ANGLE_Y: "PARAM_ANGLE_Y",
|
|
1651
|
+
ANGLE_Z: "PARAM_ANGLE_Z",
|
|
1652
|
+
EYE_L_OPEN: "PARAM_EYE_L_OPEN",
|
|
1653
|
+
EYE_R_OPEN: "PARAM_EYE_R_OPEN",
|
|
1654
|
+
EYE_BALL_X: "PARAM_EYE_BALL_X",
|
|
1655
|
+
EYE_BALL_Y: "PARAM_EYE_BALL_Y",
|
|
1656
|
+
MOUTH_OPEN_Y: "PARAM_MOUTH_OPEN_Y",
|
|
1657
|
+
MOUTH_FORM: "PARAM_MOUTH_FORM"
|
|
1658
|
+
};
|
|
1659
|
+
var EMOTION_CANDIDATES = {
|
|
1660
|
+
happy: ["happy", "joy", "f_happy", "exp_happy"],
|
|
1661
|
+
joy: ["happy", "joy", "f_happy", "exp_happy"],
|
|
1662
|
+
sad: ["sad", "sorrow", "f_sad", "exp_sad"],
|
|
1663
|
+
sorrow: ["sad", "sorrow", "f_sad"],
|
|
1664
|
+
angry: ["angry", "anger", "f_angry", "exp_angry"],
|
|
1665
|
+
anger: ["angry", "anger", "f_angry"],
|
|
1666
|
+
surprised: ["surprised", "surprise", "f_surprised"],
|
|
1667
|
+
surprise: ["surprised", "surprise", "f_surprised"],
|
|
1668
|
+
relaxed: ["relaxed", "calm", "f_relaxed", "f_calm"],
|
|
1669
|
+
calm: ["relaxed", "calm", "f_relaxed"]
|
|
1670
|
+
};
|
|
1671
|
+
var libsPromise = null;
|
|
1672
|
+
function loadScript(src) {
|
|
1673
|
+
return new Promise((resolve, reject) => {
|
|
1674
|
+
const attr = "data-avatarlayer-src";
|
|
1675
|
+
const existing = document.querySelector(
|
|
1676
|
+
`script[${attr}="${CSS.escape(src)}"]`
|
|
1677
|
+
);
|
|
1678
|
+
if (existing) {
|
|
1679
|
+
if (existing.dataset.alLoaded) return resolve();
|
|
1680
|
+
if (existing.dataset.alError)
|
|
1681
|
+
return reject(new Error(`Script load failed: ${src}`));
|
|
1682
|
+
existing.addEventListener("load", () => resolve());
|
|
1683
|
+
existing.addEventListener(
|
|
1684
|
+
"error",
|
|
1685
|
+
() => reject(new Error(`Script load failed: ${src}`))
|
|
1686
|
+
);
|
|
1687
|
+
return;
|
|
1688
|
+
}
|
|
1689
|
+
const s = document.createElement("script");
|
|
1690
|
+
s.src = src;
|
|
1691
|
+
s.setAttribute(attr, src);
|
|
1692
|
+
s.onload = () => {
|
|
1693
|
+
s.dataset.alLoaded = "1";
|
|
1694
|
+
resolve();
|
|
1695
|
+
};
|
|
1696
|
+
s.onerror = () => {
|
|
1697
|
+
s.dataset.alError = "1";
|
|
1698
|
+
reject(new Error(`Script load failed: ${src}`));
|
|
1699
|
+
};
|
|
1700
|
+
document.head.appendChild(s);
|
|
1701
|
+
});
|
|
1702
|
+
}
|
|
1703
|
+
function ensureLibs(vendorBase) {
|
|
1704
|
+
if (libsPromise) return libsPromise;
|
|
1705
|
+
const live2dUrl = vendorBase ? `${vendorBase}/live2d.min.js` : CDN_LIVE2D;
|
|
1706
|
+
const pixiUrl = vendorBase ? `${vendorBase}/pixi.min.js` : CDN_PIXI;
|
|
1707
|
+
const cubism2Url = vendorBase ? `${vendorBase}/cubism2.min.js` : CDN_CUBISM2;
|
|
1708
|
+
libsPromise = loadScript(live2dUrl).then(() => loadScript(pixiUrl)).then(() => loadScript(cubism2Url)).then(() => {
|
|
1709
|
+
const P = window.PIXI;
|
|
1710
|
+
if (!P?.Application)
|
|
1711
|
+
throw new Error("PIXI runtime missing after script load");
|
|
1712
|
+
if (!P.live2d?.Live2DModel)
|
|
1713
|
+
throw new Error(
|
|
1714
|
+
"pixi-live2d-display cubism2 missing after script load"
|
|
1715
|
+
);
|
|
1716
|
+
return P;
|
|
1717
|
+
}).catch((err) => {
|
|
1718
|
+
libsPromise = null;
|
|
1719
|
+
throw err;
|
|
1720
|
+
});
|
|
1721
|
+
return libsPromise;
|
|
1722
|
+
}
|
|
1723
|
+
function clamp(v, min, max) {
|
|
1724
|
+
return Math.max(min, Math.min(max, v || 0));
|
|
1725
|
+
}
|
|
1726
|
+
function lerp(a, b, t) {
|
|
1727
|
+
return a + (b - a) * t;
|
|
1728
|
+
}
|
|
1729
|
+
function deadzone(v, zone) {
|
|
1730
|
+
const n = v || 0;
|
|
1731
|
+
if (Math.abs(n) <= zone) return 0;
|
|
1732
|
+
return n > 0 ? n - zone : n + zone;
|
|
1733
|
+
}
|
|
1734
|
+
function neutralFace() {
|
|
1735
|
+
return {
|
|
1736
|
+
yaw: 0,
|
|
1737
|
+
pitch: 0,
|
|
1738
|
+
roll: 0,
|
|
1739
|
+
gazeX: 0,
|
|
1740
|
+
gazeY: 0,
|
|
1741
|
+
blinkL: 1,
|
|
1742
|
+
blinkR: 1,
|
|
1743
|
+
jawOpen: 0,
|
|
1744
|
+
smile: 0
|
|
1745
|
+
};
|
|
1746
|
+
}
|
|
1747
|
+
var Live2DRenderer = class {
|
|
1748
|
+
id = "live2d-local";
|
|
1749
|
+
type = "local";
|
|
1750
|
+
opts;
|
|
1751
|
+
container = null;
|
|
1752
|
+
app = null;
|
|
1753
|
+
model = null;
|
|
1754
|
+
PIXI = null;
|
|
1755
|
+
destroyed = false;
|
|
1756
|
+
faceCurrent = neutralFace();
|
|
1757
|
+
faceTarget = neutralFace();
|
|
1758
|
+
lastEmotionLabel = null;
|
|
1759
|
+
lipSync = new LipSyncEngine();
|
|
1760
|
+
audioContext = null;
|
|
1761
|
+
currentAudio = null;
|
|
1762
|
+
speakResolve = null;
|
|
1763
|
+
blinkTimer = null;
|
|
1764
|
+
footSnapTimer = null;
|
|
1765
|
+
resizeObserver = null;
|
|
1766
|
+
pointerHandler = null;
|
|
1767
|
+
constructor(opts) {
|
|
1768
|
+
this.opts = {
|
|
1769
|
+
modelUrl: opts.modelUrl,
|
|
1770
|
+
vendorBase: opts.vendorBase,
|
|
1771
|
+
frameMode: opts.frameMode ?? "upperBodyFocus",
|
|
1772
|
+
autoBlink: opts.autoBlink ?? true,
|
|
1773
|
+
autoTrackPointer: opts.autoTrackPointer ?? false
|
|
1774
|
+
};
|
|
1775
|
+
}
|
|
1776
|
+
// ── AvatarRenderer interface ──────────────────────────────────────────────
|
|
1777
|
+
async mount(container) {
|
|
1778
|
+
this.container = container;
|
|
1779
|
+
this.destroyed = false;
|
|
1780
|
+
const PIXI = await ensureLibs(this.opts.vendorBase);
|
|
1781
|
+
if (this.destroyed) throw new Error("Renderer destroyed before mount");
|
|
1782
|
+
this.PIXI = PIXI;
|
|
1783
|
+
const dpr = Math.max(1, window.devicePixelRatio || 1);
|
|
1784
|
+
const viewW = Math.max(1, container.clientWidth);
|
|
1785
|
+
const viewH = Math.max(1, container.clientHeight);
|
|
1786
|
+
this.app = new PIXI.Application({
|
|
1787
|
+
width: Math.round(viewW * dpr),
|
|
1788
|
+
height: Math.round(viewH * dpr),
|
|
1789
|
+
backgroundAlpha: 0,
|
|
1790
|
+
antialias: true,
|
|
1791
|
+
resolution: 1,
|
|
1792
|
+
autoDensity: false
|
|
1793
|
+
});
|
|
1794
|
+
const canvas = this.app.view;
|
|
1795
|
+
canvas.style.width = "100%";
|
|
1796
|
+
canvas.style.height = "100%";
|
|
1797
|
+
canvas.style.display = "block";
|
|
1798
|
+
container.appendChild(canvas);
|
|
1799
|
+
this.model = await PIXI.live2d.Live2DModel.from(this.opts.modelUrl, {
|
|
1800
|
+
autoUpdate: false
|
|
1801
|
+
});
|
|
1802
|
+
if (this.destroyed) throw new Error("Renderer destroyed during model load");
|
|
1803
|
+
this.app.stage.addChild(this.model);
|
|
1804
|
+
this.applyLayout();
|
|
1805
|
+
this.scheduleFootSnap();
|
|
1806
|
+
try {
|
|
1807
|
+
const priority = PIXI.live2d.MotionPriority?.IDLE ?? 1;
|
|
1808
|
+
this.model.motion("idle", 0, priority);
|
|
1809
|
+
} catch {
|
|
1810
|
+
}
|
|
1811
|
+
this.startTicker();
|
|
1812
|
+
if (this.opts.autoBlink) this.startBlink();
|
|
1813
|
+
if (this.opts.autoTrackPointer) this.startPointerTracking();
|
|
1814
|
+
this.resizeObserver = new ResizeObserver(() => this.handleResize());
|
|
1815
|
+
this.resizeObserver.observe(container);
|
|
1816
|
+
}
|
|
1817
|
+
update(control) {
|
|
1818
|
+
const face = control.avatar?.face;
|
|
1819
|
+
if (face) {
|
|
1820
|
+
const pose = face.pose ?? {};
|
|
1821
|
+
const eyes = face.eyes ?? {};
|
|
1822
|
+
const mouth = face.mouth ?? {};
|
|
1823
|
+
this.faceTarget.yaw = deadzone(clamp(pose.yaw ?? 0, -1, 1), 0.02);
|
|
1824
|
+
this.faceTarget.pitch = deadzone(clamp(pose.pitch ?? 0, -1, 1), 0.02);
|
|
1825
|
+
this.faceTarget.roll = deadzone(clamp(pose.roll ?? 0, -1, 1), 0.02);
|
|
1826
|
+
this.faceTarget.gazeX = deadzone(clamp(eyes.gazeX ?? 0, -1, 1), 0.02);
|
|
1827
|
+
this.faceTarget.gazeY = deadzone(clamp(eyes.gazeY ?? 0, -1, 1), 0.02);
|
|
1828
|
+
this.faceTarget.blinkL = 1 - clamp(eyes.blinkL ?? 0, 0, 1);
|
|
1829
|
+
this.faceTarget.blinkR = 1 - clamp(eyes.blinkR ?? 0, 0, 1);
|
|
1830
|
+
this.faceTarget.jawOpen = clamp(mouth.jawOpen ?? 0, 0, 1);
|
|
1831
|
+
this.faceTarget.smile = clamp(mouth.smile ?? 0, -1, 1);
|
|
1832
|
+
}
|
|
1833
|
+
const emotion = control.avatar?.emotion;
|
|
1834
|
+
if (emotion) this.applyEmotion(emotion.label);
|
|
1835
|
+
}
|
|
1836
|
+
async speak(audio) {
|
|
1837
|
+
return new Promise((resolve) => {
|
|
1838
|
+
this.speakResolve = resolve;
|
|
1839
|
+
const url = URL.createObjectURL(audio);
|
|
1840
|
+
const el = new Audio(url);
|
|
1841
|
+
this.currentAudio = el;
|
|
1842
|
+
if (!this.audioContext || this.audioContext.state === "closed") {
|
|
1843
|
+
this.audioContext = new AudioContext();
|
|
1844
|
+
}
|
|
1845
|
+
this.lipSync.attach(el, this.audioContext);
|
|
1846
|
+
const cleanup = () => {
|
|
1847
|
+
this.lipSync.detach();
|
|
1848
|
+
URL.revokeObjectURL(url);
|
|
1849
|
+
this.currentAudio = null;
|
|
1850
|
+
this.speakResolve = null;
|
|
1851
|
+
resolve();
|
|
1852
|
+
};
|
|
1853
|
+
el.addEventListener("ended", cleanup);
|
|
1854
|
+
el.addEventListener("error", cleanup);
|
|
1855
|
+
el.play().catch(cleanup);
|
|
1856
|
+
});
|
|
1857
|
+
}
|
|
1858
|
+
interrupt() {
|
|
1859
|
+
if (this.currentAudio) {
|
|
1860
|
+
this.currentAudio.pause();
|
|
1861
|
+
this.currentAudio.removeAttribute("src");
|
|
1862
|
+
this.currentAudio = null;
|
|
1863
|
+
}
|
|
1864
|
+
this.lipSync.detach();
|
|
1865
|
+
if (this.speakResolve) {
|
|
1866
|
+
this.speakResolve();
|
|
1867
|
+
this.speakResolve = null;
|
|
1868
|
+
}
|
|
1869
|
+
}
|
|
1870
|
+
unmount() {
|
|
1871
|
+
this.destroyed = true;
|
|
1872
|
+
this.interrupt();
|
|
1873
|
+
if (this.blinkTimer != null) {
|
|
1874
|
+
clearInterval(this.blinkTimer);
|
|
1875
|
+
this.blinkTimer = null;
|
|
1876
|
+
}
|
|
1877
|
+
if (this.footSnapTimer != null) {
|
|
1878
|
+
clearTimeout(this.footSnapTimer);
|
|
1879
|
+
this.footSnapTimer = null;
|
|
1880
|
+
}
|
|
1881
|
+
this.resizeObserver?.disconnect();
|
|
1882
|
+
this.resizeObserver = null;
|
|
1883
|
+
if (this.pointerHandler && this.container) {
|
|
1884
|
+
this.container.removeEventListener("pointermove", this.pointerHandler);
|
|
1885
|
+
this.pointerHandler = null;
|
|
1886
|
+
}
|
|
1887
|
+
try {
|
|
1888
|
+
if (this.model && this.app) {
|
|
1889
|
+
this.app.stage.removeChild(this.model);
|
|
1890
|
+
this.model.destroy();
|
|
1891
|
+
}
|
|
1892
|
+
this.app?.destroy(true, { children: true });
|
|
1893
|
+
} catch {
|
|
1894
|
+
}
|
|
1895
|
+
this.app = null;
|
|
1896
|
+
this.model = null;
|
|
1897
|
+
this.PIXI = null;
|
|
1898
|
+
if (this.container) {
|
|
1899
|
+
while (this.container.firstChild)
|
|
1900
|
+
this.container.removeChild(this.container.firstChild);
|
|
1901
|
+
}
|
|
1902
|
+
this.container = null;
|
|
1903
|
+
if (this.audioContext && this.audioContext.state !== "closed") {
|
|
1904
|
+
this.audioContext.close().catch(() => {
|
|
1905
|
+
});
|
|
1906
|
+
}
|
|
1907
|
+
}
|
|
1908
|
+
// ── Layout ──────────────────────────────────────────────────────────────────
|
|
1909
|
+
applyLayout() {
|
|
1910
|
+
if (!this.app || !this.model || !this.container) return;
|
|
1911
|
+
const dpr = Math.max(1, window.devicePixelRatio || 1);
|
|
1912
|
+
const viewW = Math.max(1, this.container.clientWidth);
|
|
1913
|
+
const viewH = Math.max(1, this.container.clientHeight);
|
|
1914
|
+
this.app.renderer.resize(Math.round(viewW * dpr), Math.round(viewH * dpr));
|
|
1915
|
+
this.app.stage.scale.set(dpr, dpr);
|
|
1916
|
+
const mW = this.model.internalModel?.originalWidth || this.model.width || viewW;
|
|
1917
|
+
const mH = this.model.internalModel?.originalHeight || this.model.height || viewH;
|
|
1918
|
+
let scale;
|
|
1919
|
+
if (this.opts.frameMode === "upperBodyFocus") {
|
|
1920
|
+
const visibleH = mH * 0.72;
|
|
1921
|
+
scale = Math.min(viewW / mW, viewH / visibleH) * 0.97;
|
|
1922
|
+
this.model.anchor.set(0.5, 0);
|
|
1923
|
+
this.model.scale.set(scale);
|
|
1924
|
+
this.model.x = viewW / 2;
|
|
1925
|
+
this.model.y = -mH * scale * 0.03;
|
|
1926
|
+
} else {
|
|
1927
|
+
scale = Math.min(viewW / mW, viewH / mH) * 0.98;
|
|
1928
|
+
this.model.anchor.set(0.5, 1);
|
|
1929
|
+
this.model.scale.set(scale);
|
|
1930
|
+
this.model.x = viewW / 2;
|
|
1931
|
+
this.model.y = viewH - Math.round(viewH * 5e-3);
|
|
1932
|
+
}
|
|
1933
|
+
}
|
|
1934
|
+
// ── Per-frame ticker ────────────────────────────────────────────────────────
|
|
1935
|
+
startTicker() {
|
|
1936
|
+
this.app.ticker.add(() => {
|
|
1937
|
+
if (!this.model || this.destroyed) return;
|
|
1938
|
+
const lipSyncValue = this.lipSync.sample();
|
|
1939
|
+
if (lipSyncValue > 0.01) {
|
|
1940
|
+
this.faceTarget.jawOpen = lipSyncValue;
|
|
1941
|
+
}
|
|
1942
|
+
const k = 0.18;
|
|
1943
|
+
const kb = 0.35;
|
|
1944
|
+
this.faceCurrent.yaw = lerp(this.faceCurrent.yaw, this.faceTarget.yaw, k);
|
|
1945
|
+
this.faceCurrent.pitch = lerp(this.faceCurrent.pitch, this.faceTarget.pitch, k);
|
|
1946
|
+
this.faceCurrent.roll = lerp(this.faceCurrent.roll, this.faceTarget.roll, k);
|
|
1947
|
+
this.faceCurrent.gazeX = lerp(this.faceCurrent.gazeX, this.faceTarget.gazeX, k);
|
|
1948
|
+
this.faceCurrent.gazeY = lerp(this.faceCurrent.gazeY, this.faceTarget.gazeY, k);
|
|
1949
|
+
this.faceCurrent.blinkL = lerp(this.faceCurrent.blinkL, this.faceTarget.blinkL, kb);
|
|
1950
|
+
this.faceCurrent.blinkR = lerp(this.faceCurrent.blinkR, this.faceTarget.blinkR, kb);
|
|
1951
|
+
this.faceCurrent.jawOpen = lerp(this.faceCurrent.jawOpen, this.faceTarget.jawOpen, k);
|
|
1952
|
+
this.faceCurrent.smile = lerp(this.faceCurrent.smile, this.faceTarget.smile, k);
|
|
1953
|
+
try {
|
|
1954
|
+
this.model.update(this.app.ticker.deltaMS);
|
|
1955
|
+
} catch {
|
|
1956
|
+
}
|
|
1957
|
+
this.setParam(PARAM.ANGLE_X, this.faceCurrent.yaw * 30);
|
|
1958
|
+
this.setParam(PARAM.ANGLE_Y, this.faceCurrent.pitch * 30);
|
|
1959
|
+
this.setParam(PARAM.ANGLE_Z, this.faceCurrent.roll * 30);
|
|
1960
|
+
this.setParam(PARAM.EYE_BALL_X, this.faceCurrent.gazeX);
|
|
1961
|
+
this.setParam(PARAM.EYE_BALL_Y, this.faceCurrent.gazeY);
|
|
1962
|
+
this.setParam(PARAM.EYE_L_OPEN, this.faceCurrent.blinkL);
|
|
1963
|
+
this.setParam(PARAM.EYE_R_OPEN, this.faceCurrent.blinkR);
|
|
1964
|
+
this.setParam(PARAM.MOUTH_OPEN_Y, this.faceCurrent.jawOpen);
|
|
1965
|
+
this.setParam(PARAM.MOUTH_FORM, this.faceCurrent.smile);
|
|
1966
|
+
try {
|
|
1967
|
+
const core = this.model.internalModel?.coreModel;
|
|
1968
|
+
if (core && typeof core.update === "function") core.update();
|
|
1969
|
+
} catch {
|
|
1970
|
+
}
|
|
1971
|
+
});
|
|
1972
|
+
}
|
|
1973
|
+
setParam(name, value) {
|
|
1974
|
+
try {
|
|
1975
|
+
const core = this.model?.internalModel?.coreModel;
|
|
1976
|
+
if (core && typeof core.setParamFloat === "function") {
|
|
1977
|
+
core.setParamFloat(name, value);
|
|
1978
|
+
}
|
|
1979
|
+
} catch {
|
|
1980
|
+
}
|
|
1981
|
+
}
|
|
1982
|
+
// ── Auto-blink ──────────────────────────────────────────────────────────────
|
|
1983
|
+
startBlink() {
|
|
1984
|
+
const nextBlink = () => 2500 + Math.random() * 3500;
|
|
1985
|
+
this.blinkTimer = setInterval(() => {
|
|
1986
|
+
this.faceTarget.blinkL = 0;
|
|
1987
|
+
this.faceTarget.blinkR = 0;
|
|
1988
|
+
setTimeout(() => {
|
|
1989
|
+
this.faceTarget.blinkL = 1;
|
|
1990
|
+
this.faceTarget.blinkR = 1;
|
|
1991
|
+
}, 100);
|
|
1992
|
+
}, nextBlink());
|
|
1993
|
+
}
|
|
1994
|
+
// ── Pointer tracking ────────────────────────────────────────────────────────
|
|
1995
|
+
startPointerTracking() {
|
|
1996
|
+
if (!this.container) return;
|
|
1997
|
+
this.pointerHandler = (e) => {
|
|
1998
|
+
if (!this.container) return;
|
|
1999
|
+
const rect = this.container.getBoundingClientRect();
|
|
2000
|
+
const nx = (e.clientX - rect.left) / rect.width * 2 - 1;
|
|
2001
|
+
const ny = (e.clientY - rect.top) / rect.height * 2 - 1;
|
|
2002
|
+
this.faceTarget.gazeX = clamp(nx, -1, 1);
|
|
2003
|
+
this.faceTarget.gazeY = clamp(-ny, -1, 1);
|
|
2004
|
+
};
|
|
2005
|
+
this.container.addEventListener("pointermove", this.pointerHandler);
|
|
2006
|
+
}
|
|
2007
|
+
// ── Expressions / emotions ──────────────────────────────────────────────────
|
|
2008
|
+
applyEmotion(label) {
|
|
2009
|
+
if (!this.model) return;
|
|
2010
|
+
const normalized = (label || "neutral").toLowerCase();
|
|
2011
|
+
if (normalized === this.lastEmotionLabel) return;
|
|
2012
|
+
this.lastEmotionLabel = normalized;
|
|
2013
|
+
if (normalized === "neutral") {
|
|
2014
|
+
try {
|
|
2015
|
+
this.model.expression();
|
|
2016
|
+
} catch {
|
|
2017
|
+
}
|
|
2018
|
+
return;
|
|
2019
|
+
}
|
|
2020
|
+
const candidates = EMOTION_CANDIDATES[normalized] ?? [normalized];
|
|
2021
|
+
let available = [];
|
|
2022
|
+
try {
|
|
2023
|
+
if (Array.isArray(this.model.expressions)) {
|
|
2024
|
+
available = this.model.expressions.map(
|
|
2025
|
+
(e) => (typeof e === "string" ? e : e?.name ?? "").toLowerCase()
|
|
2026
|
+
);
|
|
2027
|
+
}
|
|
2028
|
+
} catch {
|
|
2029
|
+
}
|
|
2030
|
+
for (const c of candidates) {
|
|
2031
|
+
if (available.length > 0 && !available.includes(c.toLowerCase()))
|
|
2032
|
+
continue;
|
|
2033
|
+
try {
|
|
2034
|
+
this.model.expression(c);
|
|
2035
|
+
} catch {
|
|
2036
|
+
}
|
|
2037
|
+
break;
|
|
2038
|
+
}
|
|
2039
|
+
}
|
|
2040
|
+
// ── Foot-snap (full-body mode) ──────────────────────────────────────────────
|
|
2041
|
+
scheduleFootSnap() {
|
|
2042
|
+
if (this.opts.frameMode !== "fullBody") return;
|
|
2043
|
+
if (this.footSnapTimer) clearTimeout(this.footSnapTimer);
|
|
2044
|
+
this.footSnapTimer = setTimeout(() => {
|
|
2045
|
+
this.footSnapTimer = null;
|
|
2046
|
+
this.snapFeet();
|
|
2047
|
+
this.footSnapTimer = setTimeout(() => {
|
|
2048
|
+
this.footSnapTimer = null;
|
|
2049
|
+
this.snapFeet();
|
|
2050
|
+
}, 200);
|
|
2051
|
+
}, 120);
|
|
2052
|
+
}
|
|
2053
|
+
snapFeet() {
|
|
2054
|
+
if (this.destroyed || !this.model || !this.app || !this.container) return;
|
|
2055
|
+
if (this.opts.frameMode !== "fullBody") return;
|
|
2056
|
+
const bottomCSS = this.detectBottomCSS();
|
|
2057
|
+
if (bottomCSS === null) return;
|
|
2058
|
+
const viewH = Math.max(1, this.container.clientHeight);
|
|
2059
|
+
const targetBottom = viewH - Math.round(viewH * 8e-3);
|
|
2060
|
+
const delta = targetBottom - bottomCSS;
|
|
2061
|
+
if (Math.abs(delta) > 1) this.model.y += delta;
|
|
2062
|
+
}
|
|
2063
|
+
detectBottomCSS() {
|
|
2064
|
+
if (!this.app?.renderer) return null;
|
|
2065
|
+
try {
|
|
2066
|
+
const gl = this.app.renderer.gl;
|
|
2067
|
+
if (!gl) return null;
|
|
2068
|
+
const canvas = this.app.view;
|
|
2069
|
+
const cW = canvas.width;
|
|
2070
|
+
const cH = canvas.height;
|
|
2071
|
+
const dpr = Math.max(1, window.devicePixelRatio || 1);
|
|
2072
|
+
const strip = Math.min(cW, 512);
|
|
2073
|
+
const offX = Math.floor((cW - strip) / 2);
|
|
2074
|
+
const buf = new Uint8Array(strip * 4);
|
|
2075
|
+
for (let gy = 0; gy < cH; gy++) {
|
|
2076
|
+
gl.readPixels(offX, gy, strip, 1, gl.RGBA, gl.UNSIGNED_BYTE, buf);
|
|
2077
|
+
for (let xi = 3; xi < buf.length; xi += 4) {
|
|
2078
|
+
if (buf[xi] > 12) return (cH - 1 - gy) / dpr;
|
|
2079
|
+
}
|
|
2080
|
+
}
|
|
2081
|
+
return null;
|
|
2082
|
+
} catch {
|
|
2083
|
+
return null;
|
|
2084
|
+
}
|
|
2085
|
+
}
|
|
2086
|
+
// ── Resize ──────────────────────────────────────────────────────────────────
|
|
2087
|
+
handleResize() {
|
|
2088
|
+
this.applyLayout();
|
|
2089
|
+
this.scheduleFootSnap();
|
|
2090
|
+
}
|
|
2091
|
+
};
|
|
2092
|
+
|
|
2093
|
+
// src/renderers/video-renderer.ts
|
|
2094
|
+
var VideoRenderer = class {
|
|
2095
|
+
id = "video-remote";
|
|
2096
|
+
type = "remote";
|
|
2097
|
+
container = null;
|
|
2098
|
+
videoEl = null;
|
|
2099
|
+
stream = null;
|
|
2100
|
+
abortController = null;
|
|
2101
|
+
/** Optional: supply an existing MediaStream to display. */
|
|
2102
|
+
setStream(stream) {
|
|
2103
|
+
this.stream = stream;
|
|
2104
|
+
if (this.videoEl) {
|
|
2105
|
+
this.videoEl.srcObject = stream;
|
|
2106
|
+
}
|
|
2107
|
+
}
|
|
2108
|
+
async mount(container) {
|
|
2109
|
+
this.container = container;
|
|
2110
|
+
this.videoEl = document.createElement("video");
|
|
2111
|
+
this.videoEl.autoplay = true;
|
|
2112
|
+
this.videoEl.playsInline = true;
|
|
2113
|
+
this.videoEl.muted = false;
|
|
2114
|
+
this.videoEl.style.width = "100%";
|
|
2115
|
+
this.videoEl.style.height = "100%";
|
|
2116
|
+
this.videoEl.style.objectFit = "contain";
|
|
2117
|
+
this.videoEl.style.background = "#000";
|
|
2118
|
+
this.videoEl.style.borderRadius = "12px";
|
|
2119
|
+
container.appendChild(this.videoEl);
|
|
2120
|
+
if (this.stream) {
|
|
2121
|
+
this.videoEl.srcObject = this.stream;
|
|
2122
|
+
}
|
|
2123
|
+
}
|
|
2124
|
+
update(_control) {
|
|
2125
|
+
}
|
|
2126
|
+
async speak(audio) {
|
|
2127
|
+
this.abortController = new AbortController();
|
|
2128
|
+
await playAudioBlob(audio, this.abortController.signal);
|
|
2129
|
+
}
|
|
2130
|
+
interrupt() {
|
|
2131
|
+
this.abortController?.abort();
|
|
2132
|
+
this.abortController = null;
|
|
2133
|
+
}
|
|
2134
|
+
unmount() {
|
|
2135
|
+
this.interrupt();
|
|
2136
|
+
if (this.videoEl) {
|
|
2137
|
+
this.videoEl.srcObject = null;
|
|
2138
|
+
this.videoEl.remove();
|
|
2139
|
+
this.videoEl = null;
|
|
2140
|
+
}
|
|
2141
|
+
this.container = null;
|
|
2142
|
+
this.stream = null;
|
|
2143
|
+
}
|
|
2144
|
+
};
|
|
2145
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
2146
|
+
0 && (module.exports = {
|
|
2147
|
+
AnthropicAdapter,
|
|
2148
|
+
AtlasRenderer,
|
|
2149
|
+
AvatarSession,
|
|
2150
|
+
ElevenLabsAdapter,
|
|
2151
|
+
ElevenLabsSTTAdapter,
|
|
2152
|
+
GeminiAdapter,
|
|
2153
|
+
HeyGenRenderer,
|
|
2154
|
+
LemonSliceRenderer,
|
|
2155
|
+
LipSyncEngine,
|
|
2156
|
+
Live2DRenderer,
|
|
2157
|
+
MicCapture,
|
|
2158
|
+
OpenAIAdapter,
|
|
2159
|
+
TypedEmitter,
|
|
2160
|
+
VRMLocalRenderer,
|
|
2161
|
+
VideoRenderer,
|
|
2162
|
+
decodeToPcm,
|
|
2163
|
+
playAudioBlob
|
|
2164
|
+
});
|
|
2165
|
+
//# sourceMappingURL=index.cjs.map
|