@absolutejs/voice 0.0.21 → 0.0.22-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1046 -2
- package/dist/agent.d.ts +113 -0
- package/dist/angular/index.js +90 -0
- package/dist/angular/voice-controller.service.d.ts +6 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/client/actions.d.ts +41 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +84 -0
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.js +507 -5
- package/dist/correction.d.ts +18 -1
- package/dist/fileStore.d.ts +37 -0
- package/dist/index.d.ts +32 -1
- package/dist/index.js +8379 -1245
- package/dist/ops.d.ts +327 -0
- package/dist/opsPresets.d.ts +19 -0
- package/dist/opsRuntime.d.ts +66 -0
- package/dist/opsSinks.d.ts +149 -0
- package/dist/outcomeRecipes.d.ts +18 -0
- package/dist/postgresStore.d.ts +31 -0
- package/dist/queue.d.ts +276 -0
- package/dist/react/index.js +86 -0
- package/dist/react/useVoiceController.d.ts +6 -0
- package/dist/react/useVoiceStream.d.ts +6 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/s3Store.d.ts +14 -0
- package/dist/sqliteStore.d.ts +26 -0
- package/dist/svelte/index.js +84 -0
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +59 -4
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +5094 -284
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +25 -0
- package/dist/testing/stt.d.ts +2 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/trace.d.ts +236 -0
- package/dist/types.d.ts +320 -3
- package/dist/vue/index.js +90 -0
- package/dist/vue/useVoiceController.d.ts +11 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/package.json +115 -1
package/dist/client/index.js
CHANGED
|
@@ -107,6 +107,7 @@ var isVoiceServerMessage = (value) => {
|
|
|
107
107
|
return false;
|
|
108
108
|
}
|
|
109
109
|
switch (value.type) {
|
|
110
|
+
case "audio":
|
|
110
111
|
case "assistant":
|
|
111
112
|
case "complete":
|
|
112
113
|
case "error":
|
|
@@ -276,6 +277,352 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
276
277
|
subscribe
|
|
277
278
|
};
|
|
278
279
|
};
|
|
280
|
+
// src/client/audioPlayer.ts
|
|
281
|
+
var DEFAULT_LOOKAHEAD_MS = 15;
|
|
282
|
+
var createInitialState = () => ({
|
|
283
|
+
activeSourceCount: 0,
|
|
284
|
+
error: null,
|
|
285
|
+
isActive: false,
|
|
286
|
+
isPlaying: false,
|
|
287
|
+
lastInterruptLatencyMs: undefined,
|
|
288
|
+
lastPlaybackStopLatencyMs: undefined,
|
|
289
|
+
processedChunkCount: 0,
|
|
290
|
+
queuedChunkCount: 0
|
|
291
|
+
});
|
|
292
|
+
var getAudioContextCtor = () => {
|
|
293
|
+
if (typeof window === "undefined") {
|
|
294
|
+
return typeof AudioContext === "undefined" ? undefined : AudioContext;
|
|
295
|
+
}
|
|
296
|
+
return window.AudioContext ?? window.webkitAudioContext;
|
|
297
|
+
};
|
|
298
|
+
var decodePCM16LEChunk = (audioContext, chunk) => {
|
|
299
|
+
const format = chunk.format;
|
|
300
|
+
if (format.container !== "raw" || format.encoding !== "pcm_s16le") {
|
|
301
|
+
throw new Error(`Unsupported assistant audio format: ${format.container}/${format.encoding}`);
|
|
302
|
+
}
|
|
303
|
+
const bytes = chunk.chunk;
|
|
304
|
+
const channels = Math.max(1, format.channels);
|
|
305
|
+
const sampleCount = Math.floor(bytes.byteLength / 2);
|
|
306
|
+
const frameCount = Math.max(1, Math.floor(sampleCount / channels));
|
|
307
|
+
const audioBuffer = audioContext.createBuffer(channels, frameCount, format.sampleRateHz);
|
|
308
|
+
const view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
309
|
+
for (let channelIndex = 0;channelIndex < channels; channelIndex += 1) {
|
|
310
|
+
const channelData = audioBuffer.getChannelData(channelIndex);
|
|
311
|
+
for (let frameIndex = 0;frameIndex < frameCount; frameIndex += 1) {
|
|
312
|
+
const sampleIndex = frameIndex * channels + channelIndex;
|
|
313
|
+
const sampleOffset = sampleIndex * 2;
|
|
314
|
+
if (sampleOffset + 1 >= bytes.byteLength) {
|
|
315
|
+
channelData[frameIndex] = 0;
|
|
316
|
+
continue;
|
|
317
|
+
}
|
|
318
|
+
channelData[frameIndex] = view.getInt16(sampleOffset, true) / 32768;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
return audioBuffer;
|
|
322
|
+
};
|
|
323
|
+
var createVoiceAudioPlayer = (source, options = {}) => {
|
|
324
|
+
const subscribers = new Set;
|
|
325
|
+
const sourceNodes = new Set;
|
|
326
|
+
const lookaheadSeconds = (options.lookaheadMs ?? DEFAULT_LOOKAHEAD_MS) / 1000;
|
|
327
|
+
let state = createInitialState();
|
|
328
|
+
let audioContext = null;
|
|
329
|
+
let outputNode = null;
|
|
330
|
+
let queueEndTime = 0;
|
|
331
|
+
let syncPromise = Promise.resolve();
|
|
332
|
+
let interruptStartedAt = null;
|
|
333
|
+
let interruptPromise = null;
|
|
334
|
+
let resolveInterruptPromise = null;
|
|
335
|
+
let interruptFallbackTimer = null;
|
|
336
|
+
const notify = () => {
|
|
337
|
+
for (const subscriber of subscribers) {
|
|
338
|
+
subscriber();
|
|
339
|
+
}
|
|
340
|
+
};
|
|
341
|
+
const setState = (next) => {
|
|
342
|
+
state = {
|
|
343
|
+
...state,
|
|
344
|
+
...next
|
|
345
|
+
};
|
|
346
|
+
notify();
|
|
347
|
+
};
|
|
348
|
+
const clearError = () => {
|
|
349
|
+
if (state.error !== null) {
|
|
350
|
+
setState({ error: null });
|
|
351
|
+
}
|
|
352
|
+
};
|
|
353
|
+
const clearInterruptTimer = () => {
|
|
354
|
+
if (interruptFallbackTimer !== null) {
|
|
355
|
+
clearTimeout(interruptFallbackTimer);
|
|
356
|
+
interruptFallbackTimer = null;
|
|
357
|
+
}
|
|
358
|
+
};
|
|
359
|
+
const resolveInterrupt = (latencyMs) => {
|
|
360
|
+
clearInterruptTimer();
|
|
361
|
+
interruptStartedAt = null;
|
|
362
|
+
setState({
|
|
363
|
+
activeSourceCount: sourceNodes.size,
|
|
364
|
+
isPlaying: false,
|
|
365
|
+
lastInterruptLatencyMs: latencyMs,
|
|
366
|
+
lastPlaybackStopLatencyMs: state.lastPlaybackStopLatencyMs ?? latencyMs
|
|
367
|
+
});
|
|
368
|
+
resolveInterruptPromise?.();
|
|
369
|
+
resolveInterruptPromise = null;
|
|
370
|
+
interruptPromise = null;
|
|
371
|
+
};
|
|
372
|
+
const estimateOutputStopLatencyMs = (context) => {
|
|
373
|
+
if (!context) {
|
|
374
|
+
return 0;
|
|
375
|
+
}
|
|
376
|
+
return Math.max(0, ((context.baseLatency ?? 0) + (context.outputLatency ?? 0)) * 1000);
|
|
377
|
+
};
|
|
378
|
+
const restoreOutputGain = (context) => {
|
|
379
|
+
if (!outputNode) {
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
const gainValue = 1;
|
|
383
|
+
if (outputNode.gain.setValueAtTime) {
|
|
384
|
+
outputNode.gain.setValueAtTime(gainValue, context?.currentTime ?? 0);
|
|
385
|
+
return;
|
|
386
|
+
}
|
|
387
|
+
outputNode.gain.value = gainValue;
|
|
388
|
+
};
|
|
389
|
+
const muteOutputGain = (context) => {
|
|
390
|
+
if (!outputNode) {
|
|
391
|
+
return;
|
|
392
|
+
}
|
|
393
|
+
const gainValue = 0;
|
|
394
|
+
if (outputNode.gain.setValueAtTime) {
|
|
395
|
+
outputNode.gain.setValueAtTime(gainValue, context?.currentTime ?? 0);
|
|
396
|
+
return;
|
|
397
|
+
}
|
|
398
|
+
outputNode.gain.value = gainValue;
|
|
399
|
+
};
|
|
400
|
+
const maybeResolveInterrupt = () => {
|
|
401
|
+
if (interruptStartedAt === null || sourceNodes.size > 0) {
|
|
402
|
+
return;
|
|
403
|
+
}
|
|
404
|
+
resolveInterrupt(Date.now() - interruptStartedAt);
|
|
405
|
+
};
|
|
406
|
+
const ensureAudioContext = async () => {
|
|
407
|
+
if (audioContext) {
|
|
408
|
+
return audioContext;
|
|
409
|
+
}
|
|
410
|
+
if (options.createAudioContext) {
|
|
411
|
+
audioContext = options.createAudioContext();
|
|
412
|
+
} else {
|
|
413
|
+
const AudioContextCtor = getAudioContextCtor();
|
|
414
|
+
if (!AudioContextCtor) {
|
|
415
|
+
throw new Error("Assistant audio playback requires AudioContext support.");
|
|
416
|
+
}
|
|
417
|
+
audioContext = new AudioContextCtor;
|
|
418
|
+
}
|
|
419
|
+
if (audioContext.createGain) {
|
|
420
|
+
outputNode = audioContext.createGain();
|
|
421
|
+
outputNode.connect?.(audioContext.destination);
|
|
422
|
+
}
|
|
423
|
+
queueEndTime = audioContext.currentTime;
|
|
424
|
+
return audioContext;
|
|
425
|
+
};
|
|
426
|
+
const scheduleChunk = async (chunk) => {
|
|
427
|
+
const context = await ensureAudioContext();
|
|
428
|
+
const buffer = decodePCM16LEChunk(context, chunk);
|
|
429
|
+
const node = context.createBufferSource();
|
|
430
|
+
node.buffer = buffer;
|
|
431
|
+
node.connect(outputNode ?? context.destination);
|
|
432
|
+
node.onended = () => {
|
|
433
|
+
sourceNodes.delete(node);
|
|
434
|
+
node.disconnect?.();
|
|
435
|
+
setState({
|
|
436
|
+
activeSourceCount: sourceNodes.size,
|
|
437
|
+
isPlaying: sourceNodes.size > 0 && state.isActive
|
|
438
|
+
});
|
|
439
|
+
maybeResolveInterrupt();
|
|
440
|
+
};
|
|
441
|
+
const startAt = Math.max(context.currentTime + lookaheadSeconds, queueEndTime);
|
|
442
|
+
queueEndTime = startAt + buffer.duration;
|
|
443
|
+
sourceNodes.add(node);
|
|
444
|
+
setState({
|
|
445
|
+
activeSourceCount: sourceNodes.size,
|
|
446
|
+
isPlaying: true
|
|
447
|
+
});
|
|
448
|
+
node.start(startAt);
|
|
449
|
+
};
|
|
450
|
+
const stopQueuedPlayback = (options2) => {
|
|
451
|
+
for (const node of [...sourceNodes]) {
|
|
452
|
+
node.stop?.();
|
|
453
|
+
}
|
|
454
|
+
queueEndTime = audioContext ? audioContext.currentTime : 0;
|
|
455
|
+
if (options2?.forceClear) {
|
|
456
|
+
for (const node of sourceNodes) {
|
|
457
|
+
node.disconnect?.();
|
|
458
|
+
}
|
|
459
|
+
sourceNodes.clear();
|
|
460
|
+
maybeResolveInterrupt();
|
|
461
|
+
}
|
|
462
|
+
};
|
|
463
|
+
const sync = async () => {
|
|
464
|
+
if (!state.isActive) {
|
|
465
|
+
return;
|
|
466
|
+
}
|
|
467
|
+
const nextChunks = source.assistantAudio.slice(state.processedChunkCount);
|
|
468
|
+
if (nextChunks.length === 0) {
|
|
469
|
+
return;
|
|
470
|
+
}
|
|
471
|
+
try {
|
|
472
|
+
clearError();
|
|
473
|
+
for (const chunk of nextChunks) {
|
|
474
|
+
await scheduleChunk(chunk);
|
|
475
|
+
}
|
|
476
|
+
setState({
|
|
477
|
+
processedChunkCount: source.assistantAudio.length,
|
|
478
|
+
queuedChunkCount: state.queuedChunkCount + nextChunks.length
|
|
479
|
+
});
|
|
480
|
+
} catch (error) {
|
|
481
|
+
setState({
|
|
482
|
+
error: error instanceof Error ? error.message : String(error)
|
|
483
|
+
});
|
|
484
|
+
}
|
|
485
|
+
};
|
|
486
|
+
const queueSync = () => {
|
|
487
|
+
syncPromise = syncPromise.then(() => sync(), () => sync());
|
|
488
|
+
return syncPromise;
|
|
489
|
+
};
|
|
490
|
+
const unsubscribeSource = source.subscribe(() => {
|
|
491
|
+
if (options.autoStart && !state.isActive && source.assistantAudio.length > 0) {
|
|
492
|
+
player.start();
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
if (state.isActive) {
|
|
496
|
+
queueSync();
|
|
497
|
+
}
|
|
498
|
+
});
|
|
499
|
+
const player = {
|
|
500
|
+
close: async () => {
|
|
501
|
+
unsubscribeSource();
|
|
502
|
+
stopQueuedPlayback({ forceClear: true });
|
|
503
|
+
clearInterruptTimer();
|
|
504
|
+
resolveInterruptPromise?.();
|
|
505
|
+
resolveInterruptPromise = null;
|
|
506
|
+
interruptPromise = null;
|
|
507
|
+
interruptStartedAt = null;
|
|
508
|
+
if (audioContext && audioContext.state !== "closed") {
|
|
509
|
+
await audioContext.close();
|
|
510
|
+
}
|
|
511
|
+
audioContext = null;
|
|
512
|
+
outputNode?.disconnect?.();
|
|
513
|
+
outputNode = null;
|
|
514
|
+
queueEndTime = 0;
|
|
515
|
+
setState({
|
|
516
|
+
activeSourceCount: 0,
|
|
517
|
+
isActive: false,
|
|
518
|
+
isPlaying: false
|
|
519
|
+
});
|
|
520
|
+
},
|
|
521
|
+
get activeSourceCount() {
|
|
522
|
+
return state.activeSourceCount;
|
|
523
|
+
},
|
|
524
|
+
get error() {
|
|
525
|
+
return state.error;
|
|
526
|
+
},
|
|
527
|
+
getSnapshot: () => state,
|
|
528
|
+
get isActive() {
|
|
529
|
+
return state.isActive;
|
|
530
|
+
},
|
|
531
|
+
get isPlaying() {
|
|
532
|
+
return state.isPlaying;
|
|
533
|
+
},
|
|
534
|
+
interrupt: async () => {
|
|
535
|
+
const startedAt = Date.now();
|
|
536
|
+
const context = await ensureAudioContext();
|
|
537
|
+
interruptStartedAt = startedAt;
|
|
538
|
+
muteOutputGain(context);
|
|
539
|
+
const playbackStopLatencyMs = Date.now() - startedAt + estimateOutputStopLatencyMs(context);
|
|
540
|
+
setState({
|
|
541
|
+
isActive: false,
|
|
542
|
+
isPlaying: sourceNodes.size > 0,
|
|
543
|
+
lastPlaybackStopLatencyMs: playbackStopLatencyMs
|
|
544
|
+
});
|
|
545
|
+
if (sourceNodes.size === 0) {
|
|
546
|
+
resolveInterrupt(playbackStopLatencyMs);
|
|
547
|
+
return;
|
|
548
|
+
}
|
|
549
|
+
if (!interruptPromise) {
|
|
550
|
+
interruptPromise = new Promise((resolve) => {
|
|
551
|
+
resolveInterruptPromise = resolve;
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
clearInterruptTimer();
|
|
555
|
+
interruptFallbackTimer = setTimeout(() => {
|
|
556
|
+
for (const node of sourceNodes) {
|
|
557
|
+
node.disconnect?.();
|
|
558
|
+
}
|
|
559
|
+
sourceNodes.clear();
|
|
560
|
+
resolveInterrupt(Date.now() - startedAt);
|
|
561
|
+
}, 250);
|
|
562
|
+
stopQueuedPlayback();
|
|
563
|
+
await interruptPromise;
|
|
564
|
+
},
|
|
565
|
+
get lastInterruptLatencyMs() {
|
|
566
|
+
return state.lastInterruptLatencyMs;
|
|
567
|
+
},
|
|
568
|
+
get lastPlaybackStopLatencyMs() {
|
|
569
|
+
return state.lastPlaybackStopLatencyMs;
|
|
570
|
+
},
|
|
571
|
+
pause: async () => {
|
|
572
|
+
if (!audioContext) {
|
|
573
|
+
setState({
|
|
574
|
+
activeSourceCount: 0,
|
|
575
|
+
isActive: false,
|
|
576
|
+
isPlaying: false
|
|
577
|
+
});
|
|
578
|
+
return;
|
|
579
|
+
}
|
|
580
|
+
await audioContext.suspend();
|
|
581
|
+
setState({
|
|
582
|
+
activeSourceCount: sourceNodes.size,
|
|
583
|
+
isActive: false,
|
|
584
|
+
isPlaying: false
|
|
585
|
+
});
|
|
586
|
+
},
|
|
587
|
+
get processedChunkCount() {
|
|
588
|
+
return state.processedChunkCount;
|
|
589
|
+
},
|
|
590
|
+
get queuedChunkCount() {
|
|
591
|
+
return state.queuedChunkCount;
|
|
592
|
+
},
|
|
593
|
+
start: async () => {
|
|
594
|
+
try {
|
|
595
|
+
clearError();
|
|
596
|
+
const context = await ensureAudioContext();
|
|
597
|
+
restoreOutputGain(context);
|
|
598
|
+
if (context.state === "suspended") {
|
|
599
|
+
await context.resume();
|
|
600
|
+
}
|
|
601
|
+
setState({
|
|
602
|
+
activeSourceCount: sourceNodes.size,
|
|
603
|
+
isActive: true,
|
|
604
|
+
isPlaying: context.state === "running"
|
|
605
|
+
});
|
|
606
|
+
await queueSync();
|
|
607
|
+
} catch (error) {
|
|
608
|
+
setState({
|
|
609
|
+
error: error instanceof Error ? error.message : String(error),
|
|
610
|
+
isActive: false,
|
|
611
|
+
isPlaying: false
|
|
612
|
+
});
|
|
613
|
+
throw error;
|
|
614
|
+
}
|
|
615
|
+
},
|
|
616
|
+
subscribe: (subscriber) => {
|
|
617
|
+
subscribers.add(subscriber);
|
|
618
|
+
return () => {
|
|
619
|
+
subscribers.delete(subscriber);
|
|
620
|
+
};
|
|
621
|
+
}
|
|
622
|
+
};
|
|
623
|
+
return player;
|
|
624
|
+
};
|
|
625
|
+
var decodeVoiceAudioChunk = (audioContext, chunk) => decodePCM16LEChunk(audioContext, chunk);
|
|
279
626
|
// src/client/actions.ts
|
|
280
627
|
var normalizeErrorMessage = (value) => {
|
|
281
628
|
if (typeof value === "string" && value.trim()) {
|
|
@@ -306,6 +653,14 @@ var normalizeErrorMessage = (value) => {
|
|
|
306
653
|
};
|
|
307
654
|
var serverMessageToAction = (message) => {
|
|
308
655
|
switch (message.type) {
|
|
656
|
+
case "audio":
|
|
657
|
+
return {
|
|
658
|
+
chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
|
|
659
|
+
format: message.format,
|
|
660
|
+
receivedAt: message.receivedAt,
|
|
661
|
+
turnId: message.turnId,
|
|
662
|
+
type: "audio"
|
|
663
|
+
};
|
|
309
664
|
case "assistant":
|
|
310
665
|
return {
|
|
311
666
|
text: message.text,
|
|
@@ -349,7 +704,8 @@ var serverMessageToAction = (message) => {
|
|
|
349
704
|
};
|
|
350
705
|
|
|
351
706
|
// src/client/store.ts
|
|
352
|
-
var
|
|
707
|
+
var createInitialState2 = () => ({
|
|
708
|
+
assistantAudio: [],
|
|
353
709
|
assistantTexts: [],
|
|
354
710
|
error: null,
|
|
355
711
|
isConnected: false,
|
|
@@ -360,13 +716,27 @@ var createInitialState = () => ({
|
|
|
360
716
|
turns: []
|
|
361
717
|
});
|
|
362
718
|
var createVoiceStreamStore = () => {
|
|
363
|
-
let state =
|
|
719
|
+
let state = createInitialState2();
|
|
364
720
|
const subscribers = new Set;
|
|
365
721
|
const notify = () => {
|
|
366
722
|
subscribers.forEach((subscriber) => subscriber());
|
|
367
723
|
};
|
|
368
724
|
const dispatch = (action) => {
|
|
369
725
|
switch (action.type) {
|
|
726
|
+
case "audio":
|
|
727
|
+
state = {
|
|
728
|
+
...state,
|
|
729
|
+
assistantAudio: [
|
|
730
|
+
...state.assistantAudio,
|
|
731
|
+
{
|
|
732
|
+
chunk: action.chunk,
|
|
733
|
+
format: action.format,
|
|
734
|
+
receivedAt: action.receivedAt,
|
|
735
|
+
turnId: action.turnId
|
|
736
|
+
}
|
|
737
|
+
]
|
|
738
|
+
};
|
|
739
|
+
break;
|
|
370
740
|
case "assistant":
|
|
371
741
|
state = {
|
|
372
742
|
...state,
|
|
@@ -506,6 +876,9 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
506
876
|
get assistantTexts() {
|
|
507
877
|
return store.getSnapshot().assistantTexts;
|
|
508
878
|
},
|
|
879
|
+
get assistantAudio() {
|
|
880
|
+
return store.getSnapshot().assistantAudio;
|
|
881
|
+
},
|
|
509
882
|
sendAudio(audio) {
|
|
510
883
|
connection.sendAudio(audio);
|
|
511
884
|
},
|
|
@@ -882,6 +1255,58 @@ var PRESET_INPUTS = {
|
|
|
882
1255
|
transcriptStabilityMs: 1650
|
|
883
1256
|
}
|
|
884
1257
|
},
|
|
1258
|
+
"pstn-balanced": {
|
|
1259
|
+
audioConditioning: {
|
|
1260
|
+
enabled: true,
|
|
1261
|
+
maxGain: 2.8,
|
|
1262
|
+
noiseGateAttenuation: 0.07,
|
|
1263
|
+
noiseGateThreshold: 0.005,
|
|
1264
|
+
targetLevel: 0.08
|
|
1265
|
+
},
|
|
1266
|
+
capture: {
|
|
1267
|
+
channelCount: 1,
|
|
1268
|
+
sampleRateHz: 16000
|
|
1269
|
+
},
|
|
1270
|
+
connection: {
|
|
1271
|
+
maxReconnectAttempts: 14,
|
|
1272
|
+
pingInterval: 45000,
|
|
1273
|
+
reconnect: true
|
|
1274
|
+
},
|
|
1275
|
+
sttLifecycle: "continuous",
|
|
1276
|
+
turnDetection: {
|
|
1277
|
+
qualityProfile: "noisy-room",
|
|
1278
|
+
profile: "long-form",
|
|
1279
|
+
silenceMs: 660,
|
|
1280
|
+
speechThreshold: 0.012,
|
|
1281
|
+
transcriptStabilityMs: 300
|
|
1282
|
+
}
|
|
1283
|
+
},
|
|
1284
|
+
"pstn-fast": {
|
|
1285
|
+
audioConditioning: {
|
|
1286
|
+
enabled: true,
|
|
1287
|
+
maxGain: 2.75,
|
|
1288
|
+
noiseGateAttenuation: 0.06,
|
|
1289
|
+
noiseGateThreshold: 0.005,
|
|
1290
|
+
targetLevel: 0.08
|
|
1291
|
+
},
|
|
1292
|
+
capture: {
|
|
1293
|
+
channelCount: 1,
|
|
1294
|
+
sampleRateHz: 16000
|
|
1295
|
+
},
|
|
1296
|
+
connection: {
|
|
1297
|
+
maxReconnectAttempts: 14,
|
|
1298
|
+
pingInterval: 45000,
|
|
1299
|
+
reconnect: true
|
|
1300
|
+
},
|
|
1301
|
+
sttLifecycle: "continuous",
|
|
1302
|
+
turnDetection: {
|
|
1303
|
+
qualityProfile: "noisy-room",
|
|
1304
|
+
profile: "long-form",
|
|
1305
|
+
silenceMs: 620,
|
|
1306
|
+
speechThreshold: 0.012,
|
|
1307
|
+
transcriptStabilityMs: 280
|
|
1308
|
+
}
|
|
1309
|
+
},
|
|
885
1310
|
reliability: {
|
|
886
1311
|
audioConditioning: {
|
|
887
1312
|
enabled: true,
|
|
@@ -924,7 +1349,8 @@ var resolveVoiceRuntimePreset = (name = "default") => {
|
|
|
924
1349
|
};
|
|
925
1350
|
|
|
926
1351
|
// src/client/controller.ts
|
|
927
|
-
var
|
|
1352
|
+
var createInitialState3 = (stream) => ({
|
|
1353
|
+
assistantAudio: [...stream.assistantAudio],
|
|
928
1354
|
assistantTexts: [...stream.assistantTexts],
|
|
929
1355
|
error: stream.error,
|
|
930
1356
|
isConnected: stream.isConnected,
|
|
@@ -943,7 +1369,7 @@ var createVoiceController = (path, options = {}) => {
|
|
|
943
1369
|
...options.connection
|
|
944
1370
|
});
|
|
945
1371
|
let capture = null;
|
|
946
|
-
let state =
|
|
1372
|
+
let state = createInitialState3(stream);
|
|
947
1373
|
const subscribers = new Set;
|
|
948
1374
|
const notify = () => {
|
|
949
1375
|
for (const subscriber of subscribers) {
|
|
@@ -953,6 +1379,7 @@ var createVoiceController = (path, options = {}) => {
|
|
|
953
1379
|
const sync = () => {
|
|
954
1380
|
state = {
|
|
955
1381
|
...state,
|
|
1382
|
+
assistantAudio: [...stream.assistantAudio],
|
|
956
1383
|
assistantTexts: [...stream.assistantTexts],
|
|
957
1384
|
error: stream.error,
|
|
958
1385
|
isConnected: stream.isConnected,
|
|
@@ -1080,13 +1507,88 @@ var createVoiceController = (path, options = {}) => {
|
|
|
1080
1507
|
},
|
|
1081
1508
|
get assistantTexts() {
|
|
1082
1509
|
return state.assistantTexts;
|
|
1510
|
+
},
|
|
1511
|
+
get assistantAudio() {
|
|
1512
|
+
return state.assistantAudio;
|
|
1513
|
+
}
|
|
1514
|
+
};
|
|
1515
|
+
};
|
|
1516
|
+
// src/client/duplex.ts
|
|
1517
|
+
var DEFAULT_INTERRUPT_THRESHOLD = 0.08;
|
|
1518
|
+
var shouldInterruptForLevel = (level, options = {}) => (options.enabled ?? true) && level >= (options.interruptThreshold ?? DEFAULT_INTERRUPT_THRESHOLD);
|
|
1519
|
+
var bindVoiceBargeIn = (controller, player, options = {}) => {
|
|
1520
|
+
let lastPartial = controller.partial;
|
|
1521
|
+
const interruptIfPlaying = () => {
|
|
1522
|
+
if (!player.isPlaying || options.enabled === false) {
|
|
1523
|
+
return;
|
|
1524
|
+
}
|
|
1525
|
+
player.interrupt();
|
|
1526
|
+
};
|
|
1527
|
+
const unsubscribe = controller.subscribe(() => {
|
|
1528
|
+
if (options.interruptOnPartial === false) {
|
|
1529
|
+
lastPartial = controller.partial;
|
|
1530
|
+
return;
|
|
1531
|
+
}
|
|
1532
|
+
if (!lastPartial && controller.partial) {
|
|
1533
|
+
interruptIfPlaying();
|
|
1534
|
+
}
|
|
1535
|
+
lastPartial = controller.partial;
|
|
1536
|
+
});
|
|
1537
|
+
return {
|
|
1538
|
+
close: () => {
|
|
1539
|
+
unsubscribe();
|
|
1540
|
+
},
|
|
1541
|
+
handleLevel: (level) => {
|
|
1542
|
+
if (shouldInterruptForLevel(level, options)) {
|
|
1543
|
+
interruptIfPlaying();
|
|
1544
|
+
}
|
|
1545
|
+
},
|
|
1546
|
+
sendAudio: (audio) => {
|
|
1547
|
+
interruptIfPlaying();
|
|
1548
|
+
controller.sendAudio(audio);
|
|
1549
|
+
}
|
|
1550
|
+
};
|
|
1551
|
+
};
|
|
1552
|
+
var createVoiceDuplexController = (path, options = {}) => {
|
|
1553
|
+
let bargeInBinding = null;
|
|
1554
|
+
const controller = createVoiceController(path, {
|
|
1555
|
+
...options,
|
|
1556
|
+
capture: {
|
|
1557
|
+
...options.capture,
|
|
1558
|
+
onLevel: (level) => {
|
|
1559
|
+
bargeInBinding?.handleLevel(level);
|
|
1560
|
+
options.capture?.onLevel?.(level);
|
|
1561
|
+
}
|
|
1562
|
+
}
|
|
1563
|
+
});
|
|
1564
|
+
const audioPlayer = createVoiceAudioPlayer(controller, options.audioPlayer);
|
|
1565
|
+
bargeInBinding = bindVoiceBargeIn(controller, audioPlayer, options.bargeIn);
|
|
1566
|
+
const close = () => {
|
|
1567
|
+
bargeInBinding?.close();
|
|
1568
|
+
bargeInBinding = null;
|
|
1569
|
+
audioPlayer.close();
|
|
1570
|
+
controller.close();
|
|
1571
|
+
};
|
|
1572
|
+
return {
|
|
1573
|
+
...controller,
|
|
1574
|
+
audioPlayer,
|
|
1575
|
+
close,
|
|
1576
|
+
interruptAssistant: async () => {
|
|
1577
|
+
await audioPlayer.interrupt();
|
|
1578
|
+
},
|
|
1579
|
+
sendAudio: (audio) => {
|
|
1580
|
+
bargeInBinding?.sendAudio(audio);
|
|
1083
1581
|
}
|
|
1084
1582
|
};
|
|
1085
1583
|
};
|
|
1086
1584
|
export {
|
|
1585
|
+
decodeVoiceAudioChunk,
|
|
1087
1586
|
createVoiceStream,
|
|
1587
|
+
createVoiceDuplexController,
|
|
1088
1588
|
createVoiceController,
|
|
1089
1589
|
createVoiceConnection,
|
|
1590
|
+
createVoiceAudioPlayer,
|
|
1090
1591
|
createMicrophoneCapture,
|
|
1091
|
-
bindVoiceHTMX
|
|
1592
|
+
bindVoiceHTMX,
|
|
1593
|
+
bindVoiceBargeIn
|
|
1092
1594
|
};
|
package/dist/correction.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { VoicePhraseHint, VoiceTurnCorrectionHandler } from './types';
|
|
1
|
+
import type { VoiceCorrectionRiskTier, VoiceDomainTerm, VoiceLexiconEntry, VoicePhraseHint, VoiceTurnCorrectionHandler } from './types';
|
|
2
2
|
export type VoicePhraseHintCorrectionMatch = {
|
|
3
3
|
alias: string;
|
|
4
4
|
hint: VoicePhraseHint;
|
|
@@ -12,5 +12,22 @@ export type VoicePhraseHintCorrectionOptions = {
|
|
|
12
12
|
provider?: string;
|
|
13
13
|
reason?: string;
|
|
14
14
|
};
|
|
15
|
+
export type VoiceLexiconCorrectionOptions = VoicePhraseHintCorrectionOptions;
|
|
16
|
+
export type VoiceDomainHintGenerationOptions = {
|
|
17
|
+
riskTier?: VoiceCorrectionRiskTier;
|
|
18
|
+
};
|
|
19
|
+
export type VoiceRiskyTurnCorrectionHandlerOptions = VoicePhraseHintCorrectionOptions & {
|
|
20
|
+
maxAverageConfidence?: number;
|
|
21
|
+
riskTier?: Exclude<VoiceCorrectionRiskTier, 'safe'>;
|
|
22
|
+
};
|
|
23
|
+
export type VoicePhraseHintCorrectionRunOptions = {
|
|
24
|
+
riskTier?: VoiceCorrectionRiskTier;
|
|
25
|
+
};
|
|
15
26
|
export declare const applyPhraseHintCorrections: (text: string, phraseHints: VoicePhraseHint[]) => VoicePhraseHintCorrectionResult;
|
|
27
|
+
export declare const applyRiskTieredPhraseHintCorrections: (text: string, phraseHints: VoicePhraseHint[], options?: VoicePhraseHintCorrectionRunOptions) => VoicePhraseHintCorrectionResult;
|
|
28
|
+
export declare const createDomainPhraseHints: (terms: VoiceDomainTerm[], options?: VoiceDomainHintGenerationOptions) => VoicePhraseHint[];
|
|
29
|
+
export declare const createDomainLexicon: (terms: VoiceDomainTerm[]) => VoiceLexiconEntry[];
|
|
16
30
|
export declare const createPhraseHintCorrectionHandler: (options?: VoicePhraseHintCorrectionOptions) => VoiceTurnCorrectionHandler;
|
|
31
|
+
export declare const applyLexiconCorrections: (text: string, lexicon: VoiceLexiconEntry[]) => VoicePhraseHintCorrectionResult;
|
|
32
|
+
export declare const createLexiconCorrectionHandler: (options?: VoiceLexiconCorrectionOptions) => VoiceTurnCorrectionHandler;
|
|
33
|
+
export declare const createRiskyTurnCorrectionHandler: (options?: VoiceRiskyTurnCorrectionHandlerOptions) => VoiceTurnCorrectionHandler;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { type StoredVoiceTraceEvent, type VoiceTraceSinkDeliveryRecord, type VoiceTraceSinkDeliveryStore, type VoiceTraceEventStore } from './trace';
|
|
2
|
+
import type { StoredVoiceIntegrationEvent, StoredVoiceExternalObjectMap, StoredVoiceOpsTask, VoiceExternalObjectMap, VoiceExternalObjectMapStore, VoiceIntegrationEvent, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
|
|
3
|
+
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
|
|
4
|
+
import type { VoiceSessionRecord, VoiceSessionStore } from './types';
|
|
5
|
+
export type VoiceFileStoreOptions = {
|
|
6
|
+
directory: string;
|
|
7
|
+
pretty?: boolean;
|
|
8
|
+
};
|
|
9
|
+
export type VoiceFileRuntimeStorage<TSession extends VoiceSessionRecord = VoiceSessionRecord, TReview extends StoredVoiceCallReviewArtifact = StoredVoiceCallReviewArtifact, TTask extends StoredVoiceOpsTask = StoredVoiceOpsTask, TEvent extends StoredVoiceIntegrationEvent = StoredVoiceIntegrationEvent, TMapping extends StoredVoiceExternalObjectMap = StoredVoiceExternalObjectMap, TTrace extends StoredVoiceTraceEvent = StoredVoiceTraceEvent, TTraceDelivery extends VoiceTraceSinkDeliveryRecord = VoiceTraceSinkDeliveryRecord> = {
|
|
10
|
+
events: VoiceIntegrationEventStore<TEvent>;
|
|
11
|
+
externalObjects: VoiceExternalObjectMapStore<TMapping>;
|
|
12
|
+
reviews: VoiceCallReviewStore<TReview>;
|
|
13
|
+
session: VoiceSessionStore<TSession>;
|
|
14
|
+
tasks: VoiceOpsTaskStore<TTask>;
|
|
15
|
+
traceDeliveries: VoiceTraceSinkDeliveryStore<TTraceDelivery>;
|
|
16
|
+
traces: VoiceTraceEventStore<TTrace>;
|
|
17
|
+
};
|
|
18
|
+
export declare const createVoiceFileSessionStore: <TSession extends VoiceSessionRecord = VoiceSessionRecord>(options: VoiceFileStoreOptions) => VoiceSessionStore<TSession>;
|
|
19
|
+
export declare const createVoiceFileReviewStore: <TArtifact extends StoredVoiceCallReviewArtifact = StoredVoiceCallReviewArtifact>(options: VoiceFileStoreOptions) => VoiceCallReviewStore<TArtifact>;
|
|
20
|
+
export declare const createVoiceFileTaskStore: <TTask extends StoredVoiceOpsTask = StoredVoiceOpsTask>(options: VoiceFileStoreOptions) => VoiceOpsTaskStore<TTask>;
|
|
21
|
+
export declare const createVoiceFileIntegrationEventStore: <TEvent extends StoredVoiceIntegrationEvent = StoredVoiceIntegrationEvent>(options: VoiceFileStoreOptions) => VoiceIntegrationEventStore<TEvent>;
|
|
22
|
+
export declare const createVoiceFileExternalObjectMapStore: <TMapping extends StoredVoiceExternalObjectMap = StoredVoiceExternalObjectMap>(options: VoiceFileStoreOptions) => VoiceExternalObjectMapStore<TMapping>;
|
|
23
|
+
export declare const createVoiceFileTraceEventStore: <TEvent extends StoredVoiceTraceEvent = StoredVoiceTraceEvent>(options: VoiceFileStoreOptions) => VoiceTraceEventStore<TEvent>;
|
|
24
|
+
export declare const createVoiceFileTraceSinkDeliveryStore: <TDelivery extends VoiceTraceSinkDeliveryRecord = VoiceTraceSinkDeliveryRecord>(options: VoiceFileStoreOptions) => VoiceTraceSinkDeliveryStore<TDelivery>;
|
|
25
|
+
export declare const createVoiceFileRuntimeStorage: <TSession extends VoiceSessionRecord = VoiceSessionRecord, TReview extends StoredVoiceCallReviewArtifact = StoredVoiceCallReviewArtifact, TTask extends StoredVoiceOpsTask = StoredVoiceOpsTask, TEvent extends StoredVoiceIntegrationEvent = StoredVoiceIntegrationEvent, TMapping extends StoredVoiceExternalObjectMap = StoredVoiceExternalObjectMap, TTrace extends StoredVoiceTraceEvent = StoredVoiceTraceEvent, TTraceDelivery extends VoiceTraceSinkDeliveryRecord = VoiceTraceSinkDeliveryRecord>(options: VoiceFileStoreOptions) => VoiceFileRuntimeStorage<TSession, TReview, TTask, TEvent, TMapping, TTrace, TTraceDelivery>;
|
|
26
|
+
export declare const createStoredVoiceCallReviewArtifact: <TArtifact extends VoiceCallReviewArtifact = VoiceCallReviewArtifact>(id: string, artifact: TArtifact) => TArtifact & {
|
|
27
|
+
id: string;
|
|
28
|
+
};
|
|
29
|
+
export declare const createStoredVoiceOpsTask: <TTask extends Omit<VoiceOpsTask, "id"> = Omit<VoiceOpsTask, "id">>(id: string, task: TTask) => TTask & {
|
|
30
|
+
id: string;
|
|
31
|
+
};
|
|
32
|
+
export declare const createStoredVoiceIntegrationEvent: <TEvent extends Omit<VoiceIntegrationEvent, "id"> = Omit<VoiceIntegrationEvent, "id">>(id: string, event: TEvent) => TEvent & {
|
|
33
|
+
id: string;
|
|
34
|
+
};
|
|
35
|
+
export declare const createStoredVoiceExternalObjectMap: <TMapping extends Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt"> = Omit<VoiceExternalObjectMap, "id" | "createdAt" | "updatedAt">>(mapping: TMapping & {
|
|
36
|
+
at?: number;
|
|
37
|
+
}) => VoiceExternalObjectMap;
|