@estuary-ai/sdk 0.1.23 → 0.1.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -0
- package/dist/index.d.mts +8 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +136 -1
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +46 -3
- package/dist/index.mjs.map +1 -1
- package/dist/{livekit-voice-A52TC6XZ.mjs → livekit-voice-PV3TGH2Q.mjs} +94 -2
- package/dist/livekit-voice-PV3TGH2Q.mjs.map +1 -0
- package/package.json +1 -1
- package/dist/livekit-voice-A52TC6XZ.mjs.map +0 -1
package/README.md
CHANGED
|
@@ -120,6 +120,15 @@ import { parseActions } from '@estuary-ai/sdk';
|
|
|
120
120
|
const { actions, cleanText } = parseActions(rawBotText);
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
+
### Character Info
|
|
124
|
+
|
|
125
|
+
Fetch character details (name, avatar, 3D model URLs):
|
|
126
|
+
|
|
127
|
+
```typescript
|
|
128
|
+
const character = await client.getCharacter();
|
|
129
|
+
console.log(character.name, character.avatar);
|
|
130
|
+
```
|
|
131
|
+
|
|
123
132
|
### Memory & Knowledge Graph
|
|
124
133
|
|
|
125
134
|
```typescript
|
|
@@ -237,9 +246,21 @@ interface EstuaryConfig {
|
|
|
237
246
|
voiceTransport?: 'websocket' | 'livekit' | 'auto'; // Default: 'auto'
|
|
238
247
|
realtimeMemory?: boolean; // Enable real-time memory extraction events. Default: false
|
|
239
248
|
suppressMicDuringPlayback?: boolean; // Mute mic while bot audio plays (software AEC). Default: false
|
|
249
|
+
autoInterruptOnSpeech?: boolean; // Interrupt bot audio when user speaks. Default: true
|
|
240
250
|
}
|
|
241
251
|
```
|
|
242
252
|
|
|
253
|
+
## Runtime Properties
|
|
254
|
+
|
|
255
|
+
```typescript
|
|
256
|
+
client.connectionState // ConnectionState enum (Disconnected, Connecting, Connected, ...)
|
|
257
|
+
client.isConnected // boolean shorthand
|
|
258
|
+
client.isVoiceActive // true while voice session is running
|
|
259
|
+
client.isMuted // current mute state
|
|
260
|
+
client.suppressMicDuringPlayback // get/set at runtime without reconnecting
|
|
261
|
+
client.session // SessionInfo | null after connect
|
|
262
|
+
```
|
|
263
|
+
|
|
243
264
|
## Exports
|
|
244
265
|
|
|
245
266
|
Key exports:
|
|
@@ -261,6 +282,7 @@ import { parseActions } from '@estuary-ai/sdk';
|
|
|
261
282
|
import type {
|
|
262
283
|
EstuaryConfig,
|
|
263
284
|
SessionInfo,
|
|
285
|
+
CharacterInfo,
|
|
264
286
|
BotResponse,
|
|
265
287
|
BotVoice,
|
|
266
288
|
SttResponse,
|
package/dist/index.d.mts
CHANGED
|
@@ -147,6 +147,8 @@ type EstuaryEventMap = {
|
|
|
147
147
|
livekitDisconnected: () => void;
|
|
148
148
|
audioPlaybackStarted: (messageId: string) => void;
|
|
149
149
|
audioPlaybackComplete: (messageId: string) => void;
|
|
150
|
+
/** Bot audio level 0.0–1.0, emitted during playback for both transports. */
|
|
151
|
+
botAudioLevel: (level: number) => void;
|
|
150
152
|
memoryUpdated: (event: MemoryUpdatedEvent) => void;
|
|
151
153
|
};
|
|
152
154
|
interface VoiceManager {
|
|
@@ -155,6 +157,10 @@ interface VoiceManager {
|
|
|
155
157
|
toggleMute(): void;
|
|
156
158
|
/** Suppress audio sending (software AEC). No-op if not supported. */
|
|
157
159
|
setSuppressed?(suppressed: boolean): void;
|
|
160
|
+
/** Set callback for speaking state from participant attributes (LiveKit only). */
|
|
161
|
+
setSpeakingStateCallback?(cb: (speaking: boolean) => void): void;
|
|
162
|
+
/** Set callback for audio level updates (0-1) during bot speech. */
|
|
163
|
+
setAudioLevelCallback?(cb: (level: number) => void): void;
|
|
158
164
|
readonly isMuted: boolean;
|
|
159
165
|
readonly isActive: boolean;
|
|
160
166
|
dispose(): void;
|
|
@@ -368,6 +374,8 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
|
|
|
368
374
|
private forwardSocketEvents;
|
|
369
375
|
private handleBotResponse;
|
|
370
376
|
private handleBotVoice;
|
|
377
|
+
/** Compute RMS audio level (0-1) from base64-encoded Int16 PCM. */
|
|
378
|
+
private computeAudioLevel;
|
|
371
379
|
private maybeAutoInterrupt;
|
|
372
380
|
}
|
|
373
381
|
|
package/dist/index.d.ts
CHANGED
|
@@ -147,6 +147,8 @@ type EstuaryEventMap = {
|
|
|
147
147
|
livekitDisconnected: () => void;
|
|
148
148
|
audioPlaybackStarted: (messageId: string) => void;
|
|
149
149
|
audioPlaybackComplete: (messageId: string) => void;
|
|
150
|
+
/** Bot audio level 0.0–1.0, emitted during playback for both transports. */
|
|
151
|
+
botAudioLevel: (level: number) => void;
|
|
150
152
|
memoryUpdated: (event: MemoryUpdatedEvent) => void;
|
|
151
153
|
};
|
|
152
154
|
interface VoiceManager {
|
|
@@ -155,6 +157,10 @@ interface VoiceManager {
|
|
|
155
157
|
toggleMute(): void;
|
|
156
158
|
/** Suppress audio sending (software AEC). No-op if not supported. */
|
|
157
159
|
setSuppressed?(suppressed: boolean): void;
|
|
160
|
+
/** Set callback for speaking state from participant attributes (LiveKit only). */
|
|
161
|
+
setSpeakingStateCallback?(cb: (speaking: boolean) => void): void;
|
|
162
|
+
/** Set callback for audio level updates (0-1) during bot speech. */
|
|
163
|
+
setAudioLevelCallback?(cb: (level: number) => void): void;
|
|
158
164
|
readonly isMuted: boolean;
|
|
159
165
|
readonly isActive: boolean;
|
|
160
166
|
dispose(): void;
|
|
@@ -368,6 +374,8 @@ declare class EstuaryClient extends TypedEventEmitter<EstuaryEventMap> {
|
|
|
368
374
|
private forwardSocketEvents;
|
|
369
375
|
private handleBotResponse;
|
|
370
376
|
private handleBotVoice;
|
|
377
|
+
/** Compute RMS audio level (0-1) from base64-encoded Int16 PCM. */
|
|
378
|
+
private computeAudioLevel;
|
|
371
379
|
private maybeAutoInterrupt;
|
|
372
380
|
}
|
|
373
381
|
|
package/dist/index.js
CHANGED
|
@@ -247,6 +247,13 @@ var init_livekit_voice = __esm({
|
|
|
247
247
|
// livekit-client Room (dynamically imported)
|
|
248
248
|
_isMuted = false;
|
|
249
249
|
_isActive = false;
|
|
250
|
+
speakingStateCallback = null;
|
|
251
|
+
audioLevelCallback = null;
|
|
252
|
+
// Audio analyser (via livekit-client's createAudioAnalyser)
|
|
253
|
+
calculateVolume = null;
|
|
254
|
+
analyserCleanup = null;
|
|
255
|
+
audioLevelPollTimer = null;
|
|
256
|
+
_isBotSpeaking = false;
|
|
250
257
|
constructor(socketManager, logger) {
|
|
251
258
|
this.socketManager = socketManager;
|
|
252
259
|
this.logger = logger;
|
|
@@ -257,6 +264,12 @@ var init_livekit_voice = __esm({
|
|
|
257
264
|
get isActive() {
|
|
258
265
|
return this._isActive;
|
|
259
266
|
}
|
|
267
|
+
setSpeakingStateCallback(cb) {
|
|
268
|
+
this.speakingStateCallback = cb;
|
|
269
|
+
}
|
|
270
|
+
setAudioLevelCallback(cb) {
|
|
271
|
+
this.audioLevelCallback = cb;
|
|
272
|
+
}
|
|
260
273
|
async start() {
|
|
261
274
|
if (this._isActive) {
|
|
262
275
|
throw new exports.EstuaryError("VOICE_ALREADY_ACTIVE" /* VOICE_ALREADY_ACTIVE */, "Voice is already active");
|
|
@@ -296,16 +309,24 @@ var init_livekit_voice = __esm({
|
|
|
296
309
|
}
|
|
297
310
|
audioElement.play().catch(() => {
|
|
298
311
|
});
|
|
312
|
+
this.setupAnalyser(track);
|
|
313
|
+
if (this._isBotSpeaking) {
|
|
314
|
+
setTimeout(() => this.startAudioLevelPolling(), 50);
|
|
315
|
+
}
|
|
299
316
|
}
|
|
300
317
|
});
|
|
301
318
|
this.room.on(RoomEvent.TrackUnsubscribed, (track) => {
|
|
302
319
|
if (track.kind === Track.Kind.Audio) {
|
|
320
|
+
this.teardownAnalyser();
|
|
303
321
|
track.detach().forEach((el) => el.remove());
|
|
304
322
|
}
|
|
305
323
|
});
|
|
306
324
|
this.room.on(RoomEvent.Disconnected, () => {
|
|
307
325
|
this.logger.debug("LiveKit room disconnected");
|
|
308
326
|
this._isActive = false;
|
|
327
|
+
this._isBotSpeaking = false;
|
|
328
|
+
this.teardownAnalyser();
|
|
329
|
+
this.speakingStateCallback?.(false);
|
|
309
330
|
});
|
|
310
331
|
try {
|
|
311
332
|
await this.room.connect(tokenData.url, tokenData.token);
|
|
@@ -319,6 +340,23 @@ var init_livekit_voice = __esm({
|
|
|
319
340
|
err
|
|
320
341
|
);
|
|
321
342
|
}
|
|
343
|
+
this.room.on(
|
|
344
|
+
RoomEvent.ParticipantAttributesChanged,
|
|
345
|
+
(changedAttributes, participant) => {
|
|
346
|
+
if (participant === this.room?.localParticipant) return;
|
|
347
|
+
const state = changedAttributes["estuary.state"];
|
|
348
|
+
if (state === "speaking") {
|
|
349
|
+
this._isBotSpeaking = true;
|
|
350
|
+
this.speakingStateCallback?.(true);
|
|
351
|
+
this.startAudioLevelPolling();
|
|
352
|
+
} else if (state === "idle") {
|
|
353
|
+
this._isBotSpeaking = false;
|
|
354
|
+
this.stopAudioLevelPolling();
|
|
355
|
+
this.speakingStateCallback?.(false);
|
|
356
|
+
this.audioLevelCallback?.(0);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
);
|
|
322
360
|
try {
|
|
323
361
|
await this.room.localParticipant.setMicrophoneEnabled(true);
|
|
324
362
|
this.logger.debug("Microphone enabled");
|
|
@@ -342,6 +380,9 @@ var init_livekit_voice = __esm({
|
|
|
342
380
|
this.socketManager.emitEvent("livekit_leave");
|
|
343
381
|
} catch {
|
|
344
382
|
}
|
|
383
|
+
this._isBotSpeaking = false;
|
|
384
|
+
this.teardownAnalyser();
|
|
385
|
+
this.speakingStateCallback?.(false);
|
|
345
386
|
if (this.room) {
|
|
346
387
|
for (const [, publication] of this.room.localParticipant.trackPublications) {
|
|
347
388
|
if (publication.track) {
|
|
@@ -362,6 +403,10 @@ var init_livekit_voice = __esm({
|
|
|
362
403
|
this.logger.debug("Mute toggled:", this._isMuted);
|
|
363
404
|
}
|
|
364
405
|
dispose() {
|
|
406
|
+
this.speakingStateCallback = null;
|
|
407
|
+
this.audioLevelCallback = null;
|
|
408
|
+
this._isBotSpeaking = false;
|
|
409
|
+
this.teardownAnalyser();
|
|
365
410
|
if (this.room) {
|
|
366
411
|
this.room.disconnect();
|
|
367
412
|
this.room = null;
|
|
@@ -369,6 +414,53 @@ var init_livekit_voice = __esm({
|
|
|
369
414
|
this._isActive = false;
|
|
370
415
|
this._isMuted = false;
|
|
371
416
|
}
|
|
417
|
+
// ─── Audio Analyser (livekit-client built-in) ───────────────────
|
|
418
|
+
async setupAnalyser(track) {
|
|
419
|
+
this.teardownAnalyser();
|
|
420
|
+
try {
|
|
421
|
+
const { createAudioAnalyser } = await import('livekit-client');
|
|
422
|
+
const { analyser, calculateVolume, cleanup } = createAudioAnalyser(track, {
|
|
423
|
+
fftSize: 256,
|
|
424
|
+
smoothingTimeConstant: 0.3
|
|
425
|
+
});
|
|
426
|
+
if (analyser.context.state === "suspended") {
|
|
427
|
+
await analyser.context.resume();
|
|
428
|
+
}
|
|
429
|
+
this.calculateVolume = calculateVolume;
|
|
430
|
+
this.analyserCleanup = cleanup;
|
|
431
|
+
this.logger.debug("Audio analyser created for bot track");
|
|
432
|
+
} catch (err) {
|
|
433
|
+
this.logger.debug("Failed to create audio analyser:", err);
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
teardownAnalyser() {
|
|
437
|
+
this.stopAudioLevelPolling();
|
|
438
|
+
if (this.analyserCleanup) {
|
|
439
|
+
this.analyserCleanup().catch(() => {
|
|
440
|
+
});
|
|
441
|
+
this.analyserCleanup = null;
|
|
442
|
+
}
|
|
443
|
+
this.calculateVolume = null;
|
|
444
|
+
}
|
|
445
|
+
startAudioLevelPolling() {
|
|
446
|
+
if (this.audioLevelPollTimer !== null) return;
|
|
447
|
+
if (!this.calculateVolume) return;
|
|
448
|
+
this.audioLevelPollTimer = setInterval(() => {
|
|
449
|
+
if (!this.calculateVolume) {
|
|
450
|
+
this.stopAudioLevelPolling();
|
|
451
|
+
return;
|
|
452
|
+
}
|
|
453
|
+
const vol = this.calculateVolume();
|
|
454
|
+
this.audioLevelCallback?.(vol);
|
|
455
|
+
}, 33);
|
|
456
|
+
}
|
|
457
|
+
stopAudioLevelPolling() {
|
|
458
|
+
if (this.audioLevelPollTimer !== null) {
|
|
459
|
+
clearInterval(this.audioLevelPollTimer);
|
|
460
|
+
this.audioLevelPollTimer = null;
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
// ─── Private ────────────────────────────────────────────────────
|
|
372
464
|
requestToken() {
|
|
373
465
|
return new Promise((resolve, reject) => {
|
|
374
466
|
const timeout = setTimeout(() => {
|
|
@@ -1277,6 +1369,7 @@ var EstuaryClient = class extends TypedEventEmitter {
|
|
|
1277
1369
|
}
|
|
1278
1370
|
} else if (event.type === "complete") {
|
|
1279
1371
|
this.emit("audioPlaybackComplete", event.messageId);
|
|
1372
|
+
this.emit("botAudioLevel", 0);
|
|
1280
1373
|
this.notifyAudioPlaybackComplete(event.messageId);
|
|
1281
1374
|
if (this.config.suppressMicDuringPlayback) {
|
|
1282
1375
|
this.voiceManager?.setSuppressed?.(false);
|
|
@@ -1285,6 +1378,24 @@ var EstuaryClient = class extends TypedEventEmitter {
|
|
|
1285
1378
|
});
|
|
1286
1379
|
}
|
|
1287
1380
|
await this.voiceManager.start();
|
|
1381
|
+
this.voiceManager.setSpeakingStateCallback?.((speaking) => {
|
|
1382
|
+
if (speaking) {
|
|
1383
|
+
this.emit("audioPlaybackStarted", "livekit-audio");
|
|
1384
|
+
if (this.config.suppressMicDuringPlayback) {
|
|
1385
|
+
this.voiceManager?.setSuppressed?.(true);
|
|
1386
|
+
}
|
|
1387
|
+
} else {
|
|
1388
|
+
this.emit("audioPlaybackComplete", "livekit-audio");
|
|
1389
|
+
this.emit("botAudioLevel", 0);
|
|
1390
|
+
this.notifyAudioPlaybackComplete("livekit-audio");
|
|
1391
|
+
if (this.config.suppressMicDuringPlayback) {
|
|
1392
|
+
this.voiceManager?.setSuppressed?.(false);
|
|
1393
|
+
}
|
|
1394
|
+
}
|
|
1395
|
+
});
|
|
1396
|
+
this.voiceManager.setAudioLevelCallback?.((level) => {
|
|
1397
|
+
this.emit("botAudioLevel", level);
|
|
1398
|
+
});
|
|
1288
1399
|
this.emit("voiceStarted");
|
|
1289
1400
|
}
|
|
1290
1401
|
/** Stop voice input */
|
|
@@ -1383,7 +1494,31 @@ var EstuaryClient = class extends TypedEventEmitter {
|
|
|
1383
1494
|
}
|
|
1384
1495
|
handleBotVoice(voice) {
|
|
1385
1496
|
this.emit("botVoice", voice);
|
|
1386
|
-
|
|
1497
|
+
if (voice.audio) {
|
|
1498
|
+
this.audioPlayer?.enqueue(voice);
|
|
1499
|
+
this.emit("botAudioLevel", this.computeAudioLevel(voice.audio));
|
|
1500
|
+
}
|
|
1501
|
+
}
|
|
1502
|
+
/** Compute RMS audio level (0-1) from base64-encoded Int16 PCM. */
|
|
1503
|
+
computeAudioLevel(base64Audio) {
|
|
1504
|
+
try {
|
|
1505
|
+
const binaryStr = atob(base64Audio);
|
|
1506
|
+
const len = binaryStr.length;
|
|
1507
|
+
const step = 16;
|
|
1508
|
+
let sum = 0;
|
|
1509
|
+
let count = 0;
|
|
1510
|
+
for (let i = 0; i + 1 < len; i += step) {
|
|
1511
|
+
const sample = binaryStr.charCodeAt(i) | binaryStr.charCodeAt(i + 1) << 8;
|
|
1512
|
+
const signed = sample > 32767 ? sample - 65536 : sample;
|
|
1513
|
+
const normalized = signed / 32768;
|
|
1514
|
+
sum += normalized * normalized;
|
|
1515
|
+
count++;
|
|
1516
|
+
}
|
|
1517
|
+
if (count === 0) return 0;
|
|
1518
|
+
return Math.min(1, Math.sqrt(sum / count) * 5);
|
|
1519
|
+
} catch {
|
|
1520
|
+
return 0;
|
|
1521
|
+
}
|
|
1387
1522
|
}
|
|
1388
1523
|
maybeAutoInterrupt(stt) {
|
|
1389
1524
|
if ((this.config.autoInterruptOnSpeech ?? true) === false) return;
|