@absolutejs/voice 0.0.21 → 0.0.22-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/README.md +499 -2
  2. package/dist/angular/index.js +90 -0
  3. package/dist/angular/voice-controller.service.d.ts +6 -0
  4. package/dist/angular/voice-stream.service.d.ts +6 -0
  5. package/dist/client/actions.d.ts +41 -0
  6. package/dist/client/audioPlayer.d.ts +40 -0
  7. package/dist/client/duplex.d.ts +3 -0
  8. package/dist/client/htmxBootstrap.js +84 -0
  9. package/dist/client/index.d.ts +2 -0
  10. package/dist/client/index.js +507 -5
  11. package/dist/correction.d.ts +18 -1
  12. package/dist/fileStore.d.ts +27 -0
  13. package/dist/index.d.ts +12 -1
  14. package/dist/index.js +2425 -33
  15. package/dist/ops.d.ts +100 -0
  16. package/dist/react/index.js +86 -0
  17. package/dist/react/useVoiceController.d.ts +6 -0
  18. package/dist/react/useVoiceStream.d.ts +6 -0
  19. package/dist/routing.d.ts +3 -0
  20. package/dist/runtimeOps.d.ts +23 -0
  21. package/dist/svelte/index.js +84 -0
  22. package/dist/telephony/response.d.ts +7 -0
  23. package/dist/telephony/twilio.d.ts +116 -0
  24. package/dist/testing/benchmark.d.ts +59 -4
  25. package/dist/testing/corrected.d.ts +41 -0
  26. package/dist/testing/duplex.d.ts +59 -0
  27. package/dist/testing/fixtures.d.ts +18 -2
  28. package/dist/testing/index.d.ts +5 -0
  29. package/dist/testing/index.js +4940 -307
  30. package/dist/testing/review.d.ts +143 -0
  31. package/dist/testing/sessionBenchmark.d.ts +25 -0
  32. package/dist/testing/stt.d.ts +2 -1
  33. package/dist/testing/telephony.d.ts +70 -0
  34. package/dist/testing/tts.d.ts +73 -0
  35. package/dist/types.d.ts +290 -3
  36. package/dist/vue/index.js +90 -0
  37. package/dist/vue/useVoiceController.d.ts +11 -0
  38. package/dist/vue/useVoiceStream.d.ts +11 -0
  39. package/package.json +115 -1
package/dist/types.d.ts CHANGED
@@ -1,23 +1,54 @@
1
1
  import type { SessionStore } from '@absolutejs/absolute';
2
+ import type { StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
3
+ import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
2
4
  export type AudioFormat = {
3
5
  container: 'raw';
4
- encoding: 'pcm_s16le';
6
+ encoding: 'alaw' | 'mulaw' | 'pcm_s16le';
5
7
  sampleRateHz: number;
6
8
  channels: 1 | 2;
7
9
  };
8
10
  export type AudioChunk = ArrayBuffer | ArrayBufferView;
11
+ export type VoiceLanguageStrategy = {
12
+ mode: 'auto-detect';
13
+ allowedLanguages?: string[];
14
+ } | {
15
+ mode: 'fixed';
16
+ primaryLanguage: string;
17
+ secondaryLanguages?: string[];
18
+ } | {
19
+ mode: 'allow-switching';
20
+ primaryLanguage?: string;
21
+ secondaryLanguages: string[];
22
+ };
9
23
  export type VoicePhraseHint = {
10
24
  text: string;
11
25
  aliases?: string[];
12
26
  boost?: number;
13
27
  metadata?: Record<string, unknown>;
14
28
  };
29
+ export type VoiceCorrectionRiskTier = 'safe' | 'balanced' | 'risky';
30
+ export type VoiceDomainTerm = {
31
+ text: string;
32
+ aliases?: string[];
33
+ boost?: number;
34
+ language?: string;
35
+ metadata?: Record<string, unknown>;
36
+ pronunciation?: string;
37
+ };
38
+ export type VoiceLexiconEntry = {
39
+ text: string;
40
+ aliases?: string[];
41
+ language?: string;
42
+ metadata?: Record<string, unknown>;
43
+ pronunciation?: string;
44
+ };
15
45
  export type Transcript = {
16
46
  id: string;
17
47
  text: string;
18
48
  isFinal: boolean;
19
49
  confidence?: number;
20
50
  language?: string;
51
+ speaker?: string | number;
21
52
  startedAtMs?: number;
22
53
  endedAtMs?: number;
23
54
  vendor?: string;
@@ -26,6 +57,7 @@ export type VoiceTranscriptQuality = {
26
57
  averageConfidence?: number;
27
58
  confidenceSampleCount: number;
28
59
  correction?: VoiceTurnCorrectionDiagnostics;
60
+ cost?: VoiceTurnCostEstimate;
29
61
  fallbackUsed: boolean;
30
62
  finalTranscriptCount: number;
31
63
  fallback?: VoiceFallbackDiagnostics;
@@ -42,6 +74,13 @@ export type VoiceTurnCorrectionDiagnostics = {
42
74
  provider?: string;
43
75
  reason?: string;
44
76
  };
77
+ export type VoiceTurnCostEstimate = {
78
+ estimatedRelativeCostUnits: number;
79
+ fallbackAttemptCount: number;
80
+ fallbackReplayAudioMs: number;
81
+ primaryAudioMs: number;
82
+ totalBillableAudioMs: number;
83
+ };
45
84
  export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
46
85
  export type VoiceFallbackDiagnostics = {
47
86
  attempted: boolean;
@@ -97,6 +136,8 @@ export type STTAdapterSession = {
97
136
  export type STTAdapterOpenOptions = {
98
137
  sessionId: string;
99
138
  format: AudioFormat;
139
+ languageStrategy?: VoiceLanguageStrategy;
140
+ lexicon?: VoiceLexiconEntry[];
100
141
  phraseHints?: VoicePhraseHint[];
101
142
  signal?: AbortSignal;
102
143
  };
@@ -122,6 +163,7 @@ export type TTSAdapterSession = {
122
163
  };
123
164
  export type TTSAdapterOpenOptions = {
124
165
  sessionId: string;
166
+ lexicon?: VoiceLexiconEntry[];
125
167
  signal?: AbortSignal;
126
168
  };
127
169
  export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
@@ -139,6 +181,8 @@ export type RealtimeAdapterSession = {
139
181
  export type RealtimeAdapterOpenOptions = {
140
182
  sessionId: string;
141
183
  format: AudioFormat;
184
+ languageStrategy?: VoiceLanguageStrategy;
185
+ lexicon?: VoiceLexiconEntry[];
142
186
  phraseHints?: VoicePhraseHint[];
143
187
  signal?: AbortSignal;
144
188
  };
@@ -156,6 +200,17 @@ export type VoiceTurnRecord<TResult = unknown> = {
156
200
  committedAt: number;
157
201
  result?: TResult;
158
202
  };
203
+ export type VoiceCostTelemetryConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
204
+ fallbackPassCostUnit?: number;
205
+ onTurnCost?: (input: {
206
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
207
+ context: TContext;
208
+ estimate: VoiceTurnCostEstimate;
209
+ session: TSession;
210
+ turn: VoiceTurnRecord<TResult>;
211
+ }) => Promise<void> | void;
212
+ primaryPassCostUnit?: number;
213
+ };
159
214
  export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown> = {
160
215
  id: string;
161
216
  createdAt: number;
@@ -185,6 +240,7 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
185
240
  transcriptIds: string[];
186
241
  committedAt: number;
187
242
  };
243
+ call?: VoiceCallLifecycleState;
188
244
  metadata?: TMeta;
189
245
  scenarioId?: string;
190
246
  };
@@ -195,6 +251,22 @@ export type VoiceSessionSummary = {
195
251
  status: VoiceSessionStatus;
196
252
  turnCount: number;
197
253
  };
254
+ export type VoiceCallDisposition = 'completed' | 'transferred' | 'escalated' | 'voicemail' | 'no-answer' | 'failed' | 'closed';
255
+ export type VoiceCallLifecycleEvent = {
256
+ at: number;
257
+ type: 'start' | 'end' | 'transfer' | 'escalation' | 'voicemail' | 'no-answer';
258
+ disposition?: VoiceCallDisposition;
259
+ metadata?: Record<string, unknown>;
260
+ reason?: string;
261
+ target?: string;
262
+ };
263
+ export type VoiceCallLifecycleState = {
264
+ disposition?: VoiceCallDisposition;
265
+ endedAt?: number;
266
+ events: VoiceCallLifecycleEvent[];
267
+ lastEventAt: number;
268
+ startedAt: number;
269
+ };
198
270
  export type VoiceSessionStore<TSession extends VoiceSessionRecord = VoiceSessionRecord> = SessionStore<TSession, VoiceSessionSummary>;
199
271
  export type VoiceLogger = {
200
272
  debug?: (message: string, meta?: Record<string, unknown>) => void;
@@ -207,7 +279,7 @@ export type VoiceReconnectConfig = {
207
279
  timeout?: number;
208
280
  maxAttempts?: number;
209
281
  };
210
- export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'reliability';
282
+ export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'pstn-balanced' | 'pstn-fast' | 'reliability';
211
283
  export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
212
284
  export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
213
285
  export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
@@ -271,7 +343,26 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
271
343
  commitTurn: (reason?: VoiceEndOfTurnEvent['reason']) => Promise<void>;
272
344
  disconnect: (event?: VoiceCloseEvent) => Promise<void>;
273
345
  complete: (result?: TResult) => Promise<void>;
346
+ escalate: (input: {
347
+ metadata?: Record<string, unknown>;
348
+ reason: string;
349
+ result?: TResult;
350
+ }) => Promise<void>;
274
351
  fail: (error: unknown) => Promise<void>;
352
+ markNoAnswer: (input?: {
353
+ metadata?: Record<string, unknown>;
354
+ result?: TResult;
355
+ }) => Promise<void>;
356
+ markVoicemail: (input?: {
357
+ metadata?: Record<string, unknown>;
358
+ result?: TResult;
359
+ }) => Promise<void>;
360
+ transfer: (input: {
361
+ metadata?: Record<string, unknown>;
362
+ reason?: string;
363
+ result?: TResult;
364
+ target: string;
365
+ }) => Promise<void>;
275
366
  close: (reason?: string) => Promise<void>;
276
367
  snapshot: () => Promise<TSession>;
277
368
  };
@@ -279,6 +370,21 @@ export type VoiceRouteResult<TResult = unknown> = {
279
370
  complete?: boolean;
280
371
  result?: TResult;
281
372
  assistantText?: string;
373
+ transfer?: {
374
+ metadata?: Record<string, unknown>;
375
+ reason?: string;
376
+ target: string;
377
+ };
378
+ escalate?: {
379
+ metadata?: Record<string, unknown>;
380
+ reason: string;
381
+ };
382
+ voicemail?: {
383
+ metadata?: Record<string, unknown>;
384
+ };
385
+ noAnswer?: {
386
+ metadata?: Record<string, unknown>;
387
+ };
282
388
  };
283
389
  export type VoiceTurnCorrectionResult = string | {
284
390
  text: string;
@@ -290,6 +396,7 @@ export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends Voic
290
396
  api: VoiceSessionHandle<TContext, TSession, TResult>;
291
397
  context: TContext;
292
398
  fallback?: VoiceFallbackDiagnostics;
399
+ lexicon: VoiceLexiconEntry[];
293
400
  phraseHints: VoicePhraseHint[];
294
401
  session: TSession;
295
402
  text: string;
@@ -300,6 +407,11 @@ export type VoicePhraseHintResolver<TContext = unknown> = (input: {
300
407
  scenarioId?: string;
301
408
  sessionId: string;
302
409
  }) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
410
+ export type VoiceLexiconResolver<TContext = unknown> = (input: {
411
+ context: TContext;
412
+ scenarioId?: string;
413
+ sessionId: string;
414
+ }) => Promise<VoiceLexiconEntry[] | void> | VoiceLexiconEntry[] | void;
303
415
  export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
304
416
  context: TContext;
305
417
  session: TSession;
@@ -308,6 +420,20 @@ export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceS
308
420
  }) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void;
309
421
  export type VoiceOnTurnHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceOnTurnObjectHandler<TContext, TSession, TResult> | ((session: TSession, turn: VoiceTurnRecord, api: VoiceSessionHandle<TContext, TSession, TResult>, context: TContext) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void);
310
422
  export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
423
+ onCallStart?: (input: {
424
+ context: TContext;
425
+ session: TSession;
426
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
427
+ }) => Promise<void> | void;
428
+ onCallEnd?: (input: {
429
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
430
+ context: TContext;
431
+ disposition: VoiceCallDisposition;
432
+ metadata?: Record<string, unknown>;
433
+ reason?: string;
434
+ session: TSession;
435
+ target?: string;
436
+ }) => Promise<void> | void;
311
437
  onSession?: (input: {
312
438
  context: TContext;
313
439
  session: TSession;
@@ -327,6 +453,61 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
327
453
  error: unknown;
328
454
  api?: VoiceSessionHandle<TContext, TSession, TResult>;
329
455
  }) => Promise<void> | void;
456
+ onEscalation?: (input: {
457
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
458
+ context: TContext;
459
+ metadata?: Record<string, unknown>;
460
+ reason: string;
461
+ session: TSession;
462
+ }) => Promise<void> | void;
463
+ onNoAnswer?: (input: {
464
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
465
+ context: TContext;
466
+ metadata?: Record<string, unknown>;
467
+ session: TSession;
468
+ }) => Promise<void> | void;
469
+ onTransfer?: (input: {
470
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
471
+ context: TContext;
472
+ metadata?: Record<string, unknown>;
473
+ reason?: string;
474
+ session: TSession;
475
+ target: string;
476
+ }) => Promise<void> | void;
477
+ onVoicemail?: (input: {
478
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
479
+ context: TContext;
480
+ metadata?: Record<string, unknown>;
481
+ session: TSession;
482
+ }) => Promise<void> | void;
483
+ };
484
+ export type VoiceRuntimeOpsConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
485
+ buildReview?: (input: {
486
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
487
+ context: TContext;
488
+ disposition: VoiceCallDisposition;
489
+ metadata?: Record<string, unknown>;
490
+ reason?: string;
491
+ result?: TResult;
492
+ session: TSession;
493
+ target?: string;
494
+ }) => Promise<VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void> | VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void;
495
+ createTaskFromReview?: (input: {
496
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
497
+ context: TContext;
498
+ disposition: VoiceCallDisposition;
499
+ review: StoredVoiceCallReviewArtifact;
500
+ session: TSession;
501
+ }) => Promise<Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void> | Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void;
502
+ events?: VoiceIntegrationEventStore;
503
+ onEvent?: (input: {
504
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
505
+ context: TContext;
506
+ event: StoredVoiceIntegrationEvent;
507
+ session: TSession;
508
+ }) => Promise<void> | void;
509
+ reviews?: VoiceCallReviewStore;
510
+ tasks?: VoiceOpsTaskStore;
330
511
  };
331
512
  export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
332
513
  onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
@@ -337,8 +518,15 @@ export type VoiceScenario = {
337
518
  description?: string;
338
519
  metadata?: Record<string, unknown>;
339
520
  };
521
+ export type VoiceExpectedSpeakerTurn = {
522
+ speaker: string;
523
+ text: string;
524
+ };
340
525
  export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
526
+ costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
341
527
  path: string;
528
+ languageStrategy?: VoiceLanguageStrategy;
529
+ lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
342
530
  phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
343
531
  preset?: VoiceRuntimePreset;
344
532
  stt: STTAdapter;
@@ -351,12 +539,17 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
351
539
  audioConditioning?: VoiceAudioConditioningConfig;
352
540
  logger?: VoiceLogger;
353
541
  htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
542
+ ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
354
543
  } & VoiceRouteConfig<TContext, TSession, TResult>;
355
544
  export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
545
+ costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
356
546
  id: string;
357
547
  context: TContext;
358
548
  socket: VoiceSocket;
359
549
  stt: STTAdapter;
550
+ tts?: TTSAdapter;
551
+ languageStrategy?: VoiceLanguageStrategy;
552
+ lexicon?: VoiceLexiconEntry[];
360
553
  sttFallback?: VoiceResolvedSTTFallbackConfig;
361
554
  store: VoiceSessionStore<TSession>;
362
555
  reconnect: Required<VoiceReconnectConfig>;
@@ -408,6 +601,13 @@ export type VoiceServerAssistantMessage = {
408
601
  text: string;
409
602
  turnId?: string;
410
603
  };
604
+ export type VoiceServerAudioMessage = {
605
+ type: 'audio';
606
+ chunkBase64: string;
607
+ format: AudioFormat;
608
+ receivedAt: number;
609
+ turnId?: string;
610
+ };
411
611
  export type VoiceServerCompleteMessage = {
412
612
  type: 'complete';
413
613
  sessionId: string;
@@ -420,7 +620,7 @@ export type VoiceServerErrorMessage = {
420
620
  export type VoiceServerPongMessage = {
421
621
  type: 'pong';
422
622
  };
423
- export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
623
+ export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
424
624
  export type VoiceConnectionOptions = {
425
625
  protocols?: string[];
426
626
  scenarioId?: string;
@@ -440,6 +640,30 @@ export type VoiceControllerOptions = {
440
640
  capture?: VoiceCaptureOptions;
441
641
  autoStopOnComplete?: boolean;
442
642
  };
643
+ export type VoiceBargeInOptions = {
644
+ enabled?: boolean;
645
+ interruptOnPartial?: boolean;
646
+ interruptThreshold?: number;
647
+ };
648
+ export type VoiceAudioPlayerOptions = {
649
+ autoStart?: boolean;
650
+ createAudioContext?: () => AudioContext;
651
+ lookaheadMs?: number;
652
+ };
653
+ export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
654
+ audioPlayer?: VoiceAudioPlayerOptions;
655
+ bargeIn?: VoiceBargeInOptions;
656
+ };
657
+ export type VoiceSTTRoutingGoal = 'best' | 'low-cost';
658
+ export type VoiceSTTRoutingCorrectionMode = 'generic' | 'none' | 'risky-turn';
659
+ export type VoiceSTTRoutingStrategy = {
660
+ benchmarkSessionTarget: 'deepgram-corrected' | 'deepgram-flux';
661
+ correctionMode: VoiceSTTRoutingCorrectionMode;
662
+ goal: VoiceSTTRoutingGoal;
663
+ notes: string[];
664
+ preset: VoiceRuntimePreset;
665
+ sttLifecycle: VoiceSTTLifecycle;
666
+ };
443
667
  export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
444
668
  assistantTexts: string[];
445
669
  partial: string;
@@ -480,6 +704,12 @@ export type VoiceStreamState<TResult = unknown> = {
480
704
  partial: string;
481
705
  turns: VoiceTurnRecord<TResult>[];
482
706
  assistantTexts: string[];
707
+ assistantAudio: Array<{
708
+ chunk: Uint8Array;
709
+ format: AudioFormat;
710
+ receivedAt: number;
711
+ turnId?: string;
712
+ }>;
483
713
  error: string | null;
484
714
  isConnected: boolean;
485
715
  };
@@ -502,11 +732,52 @@ export type VoiceStream<TResult = unknown> = {
502
732
  subscribe: (subscriber: () => void) => () => void;
503
733
  turns: VoiceTurnRecord<TResult>[];
504
734
  assistantTexts: string[];
735
+ assistantAudio: Array<{
736
+ chunk: Uint8Array;
737
+ format: AudioFormat;
738
+ receivedAt: number;
739
+ turnId?: string;
740
+ }>;
505
741
  };
506
742
  export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
507
743
  isRecording: boolean;
508
744
  recordingError: string | null;
509
745
  };
746
+ export type VoiceAudioPlayerState = {
747
+ activeSourceCount: number;
748
+ error: string | null;
749
+ isActive: boolean;
750
+ isPlaying: boolean;
751
+ lastInterruptLatencyMs?: number;
752
+ lastPlaybackStopLatencyMs?: number;
753
+ processedChunkCount: number;
754
+ queuedChunkCount: number;
755
+ };
756
+ export type VoiceAudioPlayerSource = {
757
+ assistantAudio: VoiceStreamState['assistantAudio'];
758
+ subscribe: (subscriber: () => void) => () => void;
759
+ };
760
+ export type VoiceAudioPlayer = {
761
+ close: () => Promise<void>;
762
+ error: string | null;
763
+ getSnapshot: () => VoiceAudioPlayerState;
764
+ activeSourceCount: number;
765
+ isActive: boolean;
766
+ isPlaying: boolean;
767
+ interrupt: () => Promise<void>;
768
+ lastInterruptLatencyMs?: number;
769
+ lastPlaybackStopLatencyMs?: number;
770
+ pause: () => Promise<void>;
771
+ processedChunkCount: number;
772
+ queuedChunkCount: number;
773
+ start: () => Promise<void>;
774
+ subscribe: (subscriber: () => void) => () => void;
775
+ };
776
+ export type VoiceBargeInBinding = {
777
+ close: () => void;
778
+ handleLevel: (level: number) => void;
779
+ sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
780
+ };
510
781
  export type VoiceController<TResult = unknown> = {
511
782
  bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
512
783
  close: () => void;
@@ -532,6 +803,16 @@ export type VoiceController<TResult = unknown> = {
532
803
  toggleRecording: () => Promise<void>;
533
804
  turns: VoiceTurnRecord<TResult>[];
534
805
  assistantTexts: string[];
806
+ assistantAudio: Array<{
807
+ chunk: Uint8Array;
808
+ format: AudioFormat;
809
+ receivedAt: number;
810
+ turnId?: string;
811
+ }>;
812
+ };
813
+ export type VoiceDuplexController<TResult = unknown> = VoiceController<TResult> & {
814
+ audioPlayer: VoiceAudioPlayer;
815
+ interruptAssistant: () => Promise<void>;
535
816
  };
536
817
  export type VoiceHTMXBindingOptions = {
537
818
  element: Element | string;
@@ -556,6 +837,12 @@ export type VoiceStoreAction<TResult = unknown> = {
556
837
  } | {
557
838
  type: 'assistant';
558
839
  text: string;
840
+ } | {
841
+ type: 'audio';
842
+ chunk: Uint8Array;
843
+ format: AudioFormat;
844
+ receivedAt: number;
845
+ turnId?: string;
559
846
  } | {
560
847
  type: 'complete';
561
848
  sessionId: string;
package/dist/vue/index.js CHANGED
@@ -102,6 +102,14 @@ var normalizeErrorMessage = (value) => {
102
102
  };
103
103
  var serverMessageToAction = (message) => {
104
104
  switch (message.type) {
105
+ case "audio":
106
+ return {
107
+ chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
108
+ format: message.format,
109
+ receivedAt: message.receivedAt,
110
+ turnId: message.turnId,
111
+ type: "audio"
112
+ };
105
113
  case "assistant":
106
114
  return {
107
115
  text: message.text,
@@ -182,6 +190,7 @@ var isVoiceServerMessage = (value) => {
182
190
  return false;
183
191
  }
184
192
  switch (value.type) {
193
+ case "audio":
185
194
  case "assistant":
186
195
  case "complete":
187
196
  case "error":
@@ -354,6 +363,7 @@ var createVoiceConnection = (path, options = {}) => {
354
363
 
355
364
  // src/client/store.ts
356
365
  var createInitialState = () => ({
366
+ assistantAudio: [],
357
367
  assistantTexts: [],
358
368
  error: null,
359
369
  isConnected: false,
@@ -371,6 +381,20 @@ var createVoiceStreamStore = () => {
371
381
  };
372
382
  const dispatch = (action) => {
373
383
  switch (action.type) {
384
+ case "audio":
385
+ state = {
386
+ ...state,
387
+ assistantAudio: [
388
+ ...state.assistantAudio,
389
+ {
390
+ chunk: action.chunk,
391
+ format: action.format,
392
+ receivedAt: action.receivedAt,
393
+ turnId: action.turnId
394
+ }
395
+ ]
396
+ };
397
+ break;
374
398
  case "assistant":
375
399
  state = {
376
400
  ...state,
@@ -510,6 +534,9 @@ var createVoiceStream = (path, options = {}) => {
510
534
  get assistantTexts() {
511
535
  return store.getSnapshot().assistantTexts;
512
536
  },
537
+ get assistantAudio() {
538
+ return store.getSnapshot().assistantAudio;
539
+ },
513
540
  sendAudio(audio) {
514
541
  connection.sendAudio(audio);
515
542
  },
@@ -525,6 +552,7 @@ var createVoiceStream = (path, options = {}) => {
525
552
  // src/vue/useVoiceStream.ts
526
553
  var useVoiceStream = (path, options = {}) => {
527
554
  const stream = createVoiceStream(path, options);
555
+ const assistantAudio = shallowRef([]);
528
556
  const assistantTexts = shallowRef([]);
529
557
  const error = ref(null);
530
558
  const isConnected = ref(false);
@@ -533,6 +561,7 @@ var useVoiceStream = (path, options = {}) => {
533
561
  const status = ref(stream.status);
534
562
  const turns = shallowRef([]);
535
563
  const sync = () => {
564
+ assistantAudio.value = [...stream.assistantAudio];
536
565
  assistantTexts.value = [...stream.assistantTexts];
537
566
  error.value = stream.error;
538
567
  isConnected.value = stream.isConnected;
@@ -549,6 +578,7 @@ var useVoiceStream = (path, options = {}) => {
549
578
  };
550
579
  onUnmounted(destroy);
551
580
  return {
581
+ assistantAudio,
552
582
  assistantTexts,
553
583
  close: () => destroy(),
554
584
  endTurn: () => stream.endTurn(),
@@ -929,6 +959,58 @@ var PRESET_INPUTS = {
929
959
  transcriptStabilityMs: 1650
930
960
  }
931
961
  },
962
+ "pstn-balanced": {
963
+ audioConditioning: {
964
+ enabled: true,
965
+ maxGain: 2.8,
966
+ noiseGateAttenuation: 0.07,
967
+ noiseGateThreshold: 0.005,
968
+ targetLevel: 0.08
969
+ },
970
+ capture: {
971
+ channelCount: 1,
972
+ sampleRateHz: 16000
973
+ },
974
+ connection: {
975
+ maxReconnectAttempts: 14,
976
+ pingInterval: 45000,
977
+ reconnect: true
978
+ },
979
+ sttLifecycle: "continuous",
980
+ turnDetection: {
981
+ qualityProfile: "noisy-room",
982
+ profile: "long-form",
983
+ silenceMs: 660,
984
+ speechThreshold: 0.012,
985
+ transcriptStabilityMs: 300
986
+ }
987
+ },
988
+ "pstn-fast": {
989
+ audioConditioning: {
990
+ enabled: true,
991
+ maxGain: 2.75,
992
+ noiseGateAttenuation: 0.06,
993
+ noiseGateThreshold: 0.005,
994
+ targetLevel: 0.08
995
+ },
996
+ capture: {
997
+ channelCount: 1,
998
+ sampleRateHz: 16000
999
+ },
1000
+ connection: {
1001
+ maxReconnectAttempts: 14,
1002
+ pingInterval: 45000,
1003
+ reconnect: true
1004
+ },
1005
+ sttLifecycle: "continuous",
1006
+ turnDetection: {
1007
+ qualityProfile: "noisy-room",
1008
+ profile: "long-form",
1009
+ silenceMs: 620,
1010
+ speechThreshold: 0.012,
1011
+ transcriptStabilityMs: 280
1012
+ }
1013
+ },
932
1014
  reliability: {
933
1015
  audioConditioning: {
934
1016
  enabled: true,
@@ -972,6 +1054,7 @@ var resolveVoiceRuntimePreset = (name = "default") => {
972
1054
 
973
1055
  // src/client/controller.ts
974
1056
  var createInitialState2 = (stream) => ({
1057
+ assistantAudio: [...stream.assistantAudio],
975
1058
  assistantTexts: [...stream.assistantTexts],
976
1059
  error: stream.error,
977
1060
  isConnected: stream.isConnected,
@@ -1000,6 +1083,7 @@ var createVoiceController = (path, options = {}) => {
1000
1083
  const sync = () => {
1001
1084
  state = {
1002
1085
  ...state,
1086
+ assistantAudio: [...stream.assistantAudio],
1003
1087
  assistantTexts: [...stream.assistantTexts],
1004
1088
  error: stream.error,
1005
1089
  isConnected: stream.isConnected,
@@ -1127,6 +1211,9 @@ var createVoiceController = (path, options = {}) => {
1127
1211
  },
1128
1212
  get assistantTexts() {
1129
1213
  return state.assistantTexts;
1214
+ },
1215
+ get assistantAudio() {
1216
+ return state.assistantAudio;
1130
1217
  }
1131
1218
  };
1132
1219
  };
@@ -1134,6 +1221,7 @@ var createVoiceController = (path, options = {}) => {
1134
1221
  // src/vue/useVoiceController.ts
1135
1222
  var useVoiceController = (path, options = {}) => {
1136
1223
  const controller = createVoiceController(path, options);
1224
+ const assistantAudio = shallowRef2([]);
1137
1225
  const assistantTexts = shallowRef2([]);
1138
1226
  const error = ref2(null);
1139
1227
  const isConnected = ref2(false);
@@ -1144,6 +1232,7 @@ var useVoiceController = (path, options = {}) => {
1144
1232
  const status = ref2(controller.status);
1145
1233
  const turns = shallowRef2([]);
1146
1234
  const sync = () => {
1235
+ assistantAudio.value = [...controller.assistantAudio];
1147
1236
  assistantTexts.value = [...controller.assistantTexts];
1148
1237
  error.value = controller.error;
1149
1238
  isConnected.value = controller.isConnected;
@@ -1162,6 +1251,7 @@ var useVoiceController = (path, options = {}) => {
1162
1251
  };
1163
1252
  onUnmounted2(destroy);
1164
1253
  return {
1254
+ assistantAudio,
1165
1255
  assistantTexts,
1166
1256
  bindHTMX: controller.bindHTMX,
1167
1257
  close: () => destroy(),
@@ -1,5 +1,16 @@
1
1
  import type { VoiceControllerOptions, VoiceTurnRecord } from '../types';
2
2
  export declare const useVoiceController: <TResult = unknown>(path: string, options?: VoiceControllerOptions) => {
3
+ assistantAudio: import("vue").ShallowRef<{
4
+ chunk: Uint8Array;
5
+ format: import("..").AudioFormat;
6
+ receivedAt: number;
7
+ turnId?: string;
8
+ }[], {
9
+ chunk: Uint8Array;
10
+ format: import("..").AudioFormat;
11
+ receivedAt: number;
12
+ turnId?: string;
13
+ }[]>;
3
14
  assistantTexts: import("vue").ShallowRef<string[], string[]>;
4
15
  bindHTMX: (options: import("..").VoiceHTMXBindingOptions) => () => void;
5
16
  close: () => void;