@absolutejs/voice 0.0.21 → 0.0.22-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. package/README.md +1046 -2
  2. package/dist/agent.d.ts +113 -0
  3. package/dist/angular/index.js +90 -0
  4. package/dist/angular/voice-controller.service.d.ts +6 -0
  5. package/dist/angular/voice-stream.service.d.ts +6 -0
  6. package/dist/client/actions.d.ts +41 -0
  7. package/dist/client/audioPlayer.d.ts +40 -0
  8. package/dist/client/duplex.d.ts +3 -0
  9. package/dist/client/htmxBootstrap.js +84 -0
  10. package/dist/client/index.d.ts +2 -0
  11. package/dist/client/index.js +507 -5
  12. package/dist/correction.d.ts +18 -1
  13. package/dist/fileStore.d.ts +37 -0
  14. package/dist/index.d.ts +32 -1
  15. package/dist/index.js +8379 -1245
  16. package/dist/ops.d.ts +327 -0
  17. package/dist/opsPresets.d.ts +19 -0
  18. package/dist/opsRuntime.d.ts +66 -0
  19. package/dist/opsSinks.d.ts +149 -0
  20. package/dist/outcomeRecipes.d.ts +18 -0
  21. package/dist/postgresStore.d.ts +31 -0
  22. package/dist/queue.d.ts +276 -0
  23. package/dist/react/index.js +86 -0
  24. package/dist/react/useVoiceController.d.ts +6 -0
  25. package/dist/react/useVoiceStream.d.ts +6 -0
  26. package/dist/routing.d.ts +3 -0
  27. package/dist/runtimeOps.d.ts +23 -0
  28. package/dist/s3Store.d.ts +14 -0
  29. package/dist/sqliteStore.d.ts +26 -0
  30. package/dist/svelte/index.js +84 -0
  31. package/dist/telephony/response.d.ts +7 -0
  32. package/dist/telephony/twilio.d.ts +116 -0
  33. package/dist/testing/benchmark.d.ts +59 -4
  34. package/dist/testing/corrected.d.ts +41 -0
  35. package/dist/testing/duplex.d.ts +59 -0
  36. package/dist/testing/fixtures.d.ts +18 -2
  37. package/dist/testing/index.d.ts +5 -0
  38. package/dist/testing/index.js +5094 -284
  39. package/dist/testing/review.d.ts +143 -0
  40. package/dist/testing/sessionBenchmark.d.ts +25 -0
  41. package/dist/testing/stt.d.ts +2 -1
  42. package/dist/testing/telephony.d.ts +70 -0
  43. package/dist/testing/tts.d.ts +73 -0
  44. package/dist/trace.d.ts +236 -0
  45. package/dist/types.d.ts +320 -3
  46. package/dist/vue/index.js +90 -0
  47. package/dist/vue/useVoiceController.d.ts +11 -0
  48. package/dist/vue/useVoiceStream.d.ts +11 -0
  49. package/package.json +115 -1
package/dist/types.d.ts CHANGED
@@ -1,23 +1,56 @@
1
1
  import type { SessionStore } from '@absolutejs/absolute';
2
+ import type { VoiceOpsDispositionTaskPolicies, VoiceOpsTaskAssignmentRule, VoiceOpsTaskAssignmentRules, VoiceIntegrationWebhookConfig, StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTaskPolicy, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
3
+ import type { VoiceIntegrationSink } from './opsSinks';
4
+ import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
5
+ import type { VoiceTraceEventStore } from './trace';
2
6
  export type AudioFormat = {
3
7
  container: 'raw';
4
- encoding: 'pcm_s16le';
8
+ encoding: 'alaw' | 'mulaw' | 'pcm_s16le';
5
9
  sampleRateHz: number;
6
10
  channels: 1 | 2;
7
11
  };
8
12
  export type AudioChunk = ArrayBuffer | ArrayBufferView;
13
+ export type VoiceLanguageStrategy = {
14
+ mode: 'auto-detect';
15
+ allowedLanguages?: string[];
16
+ } | {
17
+ mode: 'fixed';
18
+ primaryLanguage: string;
19
+ secondaryLanguages?: string[];
20
+ } | {
21
+ mode: 'allow-switching';
22
+ primaryLanguage?: string;
23
+ secondaryLanguages: string[];
24
+ };
9
25
  export type VoicePhraseHint = {
10
26
  text: string;
11
27
  aliases?: string[];
12
28
  boost?: number;
13
29
  metadata?: Record<string, unknown>;
14
30
  };
31
+ export type VoiceCorrectionRiskTier = 'safe' | 'balanced' | 'risky';
32
+ export type VoiceDomainTerm = {
33
+ text: string;
34
+ aliases?: string[];
35
+ boost?: number;
36
+ language?: string;
37
+ metadata?: Record<string, unknown>;
38
+ pronunciation?: string;
39
+ };
40
+ export type VoiceLexiconEntry = {
41
+ text: string;
42
+ aliases?: string[];
43
+ language?: string;
44
+ metadata?: Record<string, unknown>;
45
+ pronunciation?: string;
46
+ };
15
47
  export type Transcript = {
16
48
  id: string;
17
49
  text: string;
18
50
  isFinal: boolean;
19
51
  confidence?: number;
20
52
  language?: string;
53
+ speaker?: string | number;
21
54
  startedAtMs?: number;
22
55
  endedAtMs?: number;
23
56
  vendor?: string;
@@ -26,6 +59,7 @@ export type VoiceTranscriptQuality = {
26
59
  averageConfidence?: number;
27
60
  confidenceSampleCount: number;
28
61
  correction?: VoiceTurnCorrectionDiagnostics;
62
+ cost?: VoiceTurnCostEstimate;
29
63
  fallbackUsed: boolean;
30
64
  finalTranscriptCount: number;
31
65
  fallback?: VoiceFallbackDiagnostics;
@@ -42,6 +76,13 @@ export type VoiceTurnCorrectionDiagnostics = {
42
76
  provider?: string;
43
77
  reason?: string;
44
78
  };
79
+ export type VoiceTurnCostEstimate = {
80
+ estimatedRelativeCostUnits: number;
81
+ fallbackAttemptCount: number;
82
+ fallbackReplayAudioMs: number;
83
+ primaryAudioMs: number;
84
+ totalBillableAudioMs: number;
85
+ };
45
86
  export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
46
87
  export type VoiceFallbackDiagnostics = {
47
88
  attempted: boolean;
@@ -97,6 +138,8 @@ export type STTAdapterSession = {
97
138
  export type STTAdapterOpenOptions = {
98
139
  sessionId: string;
99
140
  format: AudioFormat;
141
+ languageStrategy?: VoiceLanguageStrategy;
142
+ lexicon?: VoiceLexiconEntry[];
100
143
  phraseHints?: VoicePhraseHint[];
101
144
  signal?: AbortSignal;
102
145
  };
@@ -122,6 +165,7 @@ export type TTSAdapterSession = {
122
165
  };
123
166
  export type TTSAdapterOpenOptions = {
124
167
  sessionId: string;
168
+ lexicon?: VoiceLexiconEntry[];
125
169
  signal?: AbortSignal;
126
170
  };
127
171
  export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
@@ -139,6 +183,8 @@ export type RealtimeAdapterSession = {
139
183
  export type RealtimeAdapterOpenOptions = {
140
184
  sessionId: string;
141
185
  format: AudioFormat;
186
+ languageStrategy?: VoiceLanguageStrategy;
187
+ lexicon?: VoiceLexiconEntry[];
142
188
  phraseHints?: VoicePhraseHint[];
143
189
  signal?: AbortSignal;
144
190
  };
@@ -156,6 +202,17 @@ export type VoiceTurnRecord<TResult = unknown> = {
156
202
  committedAt: number;
157
203
  result?: TResult;
158
204
  };
205
+ export type VoiceCostTelemetryConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
206
+ fallbackPassCostUnit?: number;
207
+ onTurnCost?: (input: {
208
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
209
+ context: TContext;
210
+ estimate: VoiceTurnCostEstimate;
211
+ session: TSession;
212
+ turn: VoiceTurnRecord<TResult>;
213
+ }) => Promise<void> | void;
214
+ primaryPassCostUnit?: number;
215
+ };
159
216
  export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown> = {
160
217
  id: string;
161
218
  createdAt: number;
@@ -185,6 +242,7 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
185
242
  transcriptIds: string[];
186
243
  committedAt: number;
187
244
  };
245
+ call?: VoiceCallLifecycleState;
188
246
  metadata?: TMeta;
189
247
  scenarioId?: string;
190
248
  };
@@ -195,6 +253,22 @@ export type VoiceSessionSummary = {
195
253
  status: VoiceSessionStatus;
196
254
  turnCount: number;
197
255
  };
256
+ export type VoiceCallDisposition = 'completed' | 'transferred' | 'escalated' | 'voicemail' | 'no-answer' | 'failed' | 'closed';
257
+ export type VoiceCallLifecycleEvent = {
258
+ at: number;
259
+ type: 'start' | 'end' | 'transfer' | 'escalation' | 'voicemail' | 'no-answer';
260
+ disposition?: VoiceCallDisposition;
261
+ metadata?: Record<string, unknown>;
262
+ reason?: string;
263
+ target?: string;
264
+ };
265
+ export type VoiceCallLifecycleState = {
266
+ disposition?: VoiceCallDisposition;
267
+ endedAt?: number;
268
+ events: VoiceCallLifecycleEvent[];
269
+ lastEventAt: number;
270
+ startedAt: number;
271
+ };
198
272
  export type VoiceSessionStore<TSession extends VoiceSessionRecord = VoiceSessionRecord> = SessionStore<TSession, VoiceSessionSummary>;
199
273
  export type VoiceLogger = {
200
274
  debug?: (message: string, meta?: Record<string, unknown>) => void;
@@ -207,7 +281,7 @@ export type VoiceReconnectConfig = {
207
281
  timeout?: number;
208
282
  maxAttempts?: number;
209
283
  };
210
- export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'reliability';
284
+ export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'pstn-balanced' | 'pstn-fast' | 'reliability';
211
285
  export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
212
286
  export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
213
287
  export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
@@ -271,7 +345,26 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
271
345
  commitTurn: (reason?: VoiceEndOfTurnEvent['reason']) => Promise<void>;
272
346
  disconnect: (event?: VoiceCloseEvent) => Promise<void>;
273
347
  complete: (result?: TResult) => Promise<void>;
348
+ escalate: (input: {
349
+ metadata?: Record<string, unknown>;
350
+ reason: string;
351
+ result?: TResult;
352
+ }) => Promise<void>;
274
353
  fail: (error: unknown) => Promise<void>;
354
+ markNoAnswer: (input?: {
355
+ metadata?: Record<string, unknown>;
356
+ result?: TResult;
357
+ }) => Promise<void>;
358
+ markVoicemail: (input?: {
359
+ metadata?: Record<string, unknown>;
360
+ result?: TResult;
361
+ }) => Promise<void>;
362
+ transfer: (input: {
363
+ metadata?: Record<string, unknown>;
364
+ reason?: string;
365
+ result?: TResult;
366
+ target: string;
367
+ }) => Promise<void>;
275
368
  close: (reason?: string) => Promise<void>;
276
369
  snapshot: () => Promise<TSession>;
277
370
  };
@@ -279,6 +372,21 @@ export type VoiceRouteResult<TResult = unknown> = {
279
372
  complete?: boolean;
280
373
  result?: TResult;
281
374
  assistantText?: string;
375
+ transfer?: {
376
+ metadata?: Record<string, unknown>;
377
+ reason?: string;
378
+ target: string;
379
+ };
380
+ escalate?: {
381
+ metadata?: Record<string, unknown>;
382
+ reason: string;
383
+ };
384
+ voicemail?: {
385
+ metadata?: Record<string, unknown>;
386
+ };
387
+ noAnswer?: {
388
+ metadata?: Record<string, unknown>;
389
+ };
282
390
  };
283
391
  export type VoiceTurnCorrectionResult = string | {
284
392
  text: string;
@@ -290,6 +398,7 @@ export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends Voic
290
398
  api: VoiceSessionHandle<TContext, TSession, TResult>;
291
399
  context: TContext;
292
400
  fallback?: VoiceFallbackDiagnostics;
401
+ lexicon: VoiceLexiconEntry[];
293
402
  phraseHints: VoicePhraseHint[];
294
403
  session: TSession;
295
404
  text: string;
@@ -300,6 +409,11 @@ export type VoicePhraseHintResolver<TContext = unknown> = (input: {
300
409
  scenarioId?: string;
301
410
  sessionId: string;
302
411
  }) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
412
+ export type VoiceLexiconResolver<TContext = unknown> = (input: {
413
+ context: TContext;
414
+ scenarioId?: string;
415
+ sessionId: string;
416
+ }) => Promise<VoiceLexiconEntry[] | void> | VoiceLexiconEntry[] | void;
303
417
  export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
304
418
  context: TContext;
305
419
  session: TSession;
@@ -308,6 +422,20 @@ export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceS
308
422
  }) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void;
309
423
  export type VoiceOnTurnHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceOnTurnObjectHandler<TContext, TSession, TResult> | ((session: TSession, turn: VoiceTurnRecord, api: VoiceSessionHandle<TContext, TSession, TResult>, context: TContext) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void);
310
424
  export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
425
+ onCallStart?: (input: {
426
+ context: TContext;
427
+ session: TSession;
428
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
429
+ }) => Promise<void> | void;
430
+ onCallEnd?: (input: {
431
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
432
+ context: TContext;
433
+ disposition: VoiceCallDisposition;
434
+ metadata?: Record<string, unknown>;
435
+ reason?: string;
436
+ session: TSession;
437
+ target?: string;
438
+ }) => Promise<void> | void;
311
439
  onSession?: (input: {
312
440
  context: TContext;
313
441
  session: TSession;
@@ -327,6 +455,87 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
327
455
  error: unknown;
328
456
  api?: VoiceSessionHandle<TContext, TSession, TResult>;
329
457
  }) => Promise<void> | void;
458
+ onEscalation?: (input: {
459
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
460
+ context: TContext;
461
+ metadata?: Record<string, unknown>;
462
+ reason: string;
463
+ session: TSession;
464
+ }) => Promise<void> | void;
465
+ onNoAnswer?: (input: {
466
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
467
+ context: TContext;
468
+ metadata?: Record<string, unknown>;
469
+ session: TSession;
470
+ }) => Promise<void> | void;
471
+ onTransfer?: (input: {
472
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
473
+ context: TContext;
474
+ metadata?: Record<string, unknown>;
475
+ reason?: string;
476
+ session: TSession;
477
+ target: string;
478
+ }) => Promise<void> | void;
479
+ onVoicemail?: (input: {
480
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
481
+ context: TContext;
482
+ metadata?: Record<string, unknown>;
483
+ session: TSession;
484
+ }) => Promise<void> | void;
485
+ };
486
+ export type VoiceRuntimeOpsConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
487
+ buildReview?: (input: {
488
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
489
+ context: TContext;
490
+ disposition: VoiceCallDisposition;
491
+ metadata?: Record<string, unknown>;
492
+ reason?: string;
493
+ result?: TResult;
494
+ session: TSession;
495
+ target?: string;
496
+ }) => Promise<VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void> | VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void;
497
+ createTaskFromReview?: (input: {
498
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
499
+ context: TContext;
500
+ disposition: VoiceCallDisposition;
501
+ review: StoredVoiceCallReviewArtifact;
502
+ session: TSession;
503
+ }) => Promise<Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void> | Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void;
504
+ resolveTaskPolicy?: (input: {
505
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
506
+ context: TContext;
507
+ disposition: VoiceCallDisposition;
508
+ metadata?: Record<string, unknown>;
509
+ reason?: string;
510
+ review?: StoredVoiceCallReviewArtifact;
511
+ session: TSession;
512
+ target?: string;
513
+ task: StoredVoiceOpsTask;
514
+ }) => Promise<VoiceOpsTaskPolicy | void> | VoiceOpsTaskPolicy | void;
515
+ resolveTaskAssignment?: (input: {
516
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
517
+ context: TContext;
518
+ disposition: VoiceCallDisposition;
519
+ metadata?: Record<string, unknown>;
520
+ reason?: string;
521
+ review?: StoredVoiceCallReviewArtifact;
522
+ session: TSession;
523
+ target?: string;
524
+ task: StoredVoiceOpsTask;
525
+ }) => Promise<VoiceOpsTaskAssignmentRule | void> | VoiceOpsTaskAssignmentRule | void;
526
+ taskAssignmentRules?: VoiceOpsTaskAssignmentRules;
527
+ taskPolicies?: VoiceOpsDispositionTaskPolicies;
528
+ events?: VoiceIntegrationEventStore;
529
+ onEvent?: (input: {
530
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
531
+ context: TContext;
532
+ event: StoredVoiceIntegrationEvent;
533
+ session: TSession;
534
+ }) => Promise<void> | void;
535
+ reviews?: VoiceCallReviewStore;
536
+ sinks?: VoiceIntegrationSink[];
537
+ tasks?: VoiceOpsTaskStore;
538
+ webhook?: VoiceIntegrationWebhookConfig;
330
539
  };
331
540
  export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
332
541
  onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
@@ -337,8 +546,15 @@ export type VoiceScenario = {
337
546
  description?: string;
338
547
  metadata?: Record<string, unknown>;
339
548
  };
549
+ export type VoiceExpectedSpeakerTurn = {
550
+ speaker: string;
551
+ text: string;
552
+ };
340
553
  export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
554
+ costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
341
555
  path: string;
556
+ languageStrategy?: VoiceLanguageStrategy;
557
+ lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
342
558
  phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
343
559
  preset?: VoiceRuntimePreset;
344
560
  stt: STTAdapter;
@@ -351,14 +567,21 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
351
567
  audioConditioning?: VoiceAudioConditioningConfig;
352
568
  logger?: VoiceLogger;
353
569
  htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
570
+ ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
571
+ trace?: VoiceTraceEventStore;
354
572
  } & VoiceRouteConfig<TContext, TSession, TResult>;
355
573
  export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
574
+ costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
356
575
  id: string;
357
576
  context: TContext;
358
577
  socket: VoiceSocket;
359
578
  stt: STTAdapter;
579
+ tts?: TTSAdapter;
580
+ languageStrategy?: VoiceLanguageStrategy;
581
+ lexicon?: VoiceLexiconEntry[];
360
582
  sttFallback?: VoiceResolvedSTTFallbackConfig;
361
583
  store: VoiceSessionStore<TSession>;
584
+ trace?: VoiceTraceEventStore;
362
585
  reconnect: Required<VoiceReconnectConfig>;
363
586
  phraseHints?: VoicePhraseHint[];
364
587
  scenarioId?: string;
@@ -408,6 +631,13 @@ export type VoiceServerAssistantMessage = {
408
631
  text: string;
409
632
  turnId?: string;
410
633
  };
634
+ export type VoiceServerAudioMessage = {
635
+ type: 'audio';
636
+ chunkBase64: string;
637
+ format: AudioFormat;
638
+ receivedAt: number;
639
+ turnId?: string;
640
+ };
411
641
  export type VoiceServerCompleteMessage = {
412
642
  type: 'complete';
413
643
  sessionId: string;
@@ -420,7 +650,7 @@ export type VoiceServerErrorMessage = {
420
650
  export type VoiceServerPongMessage = {
421
651
  type: 'pong';
422
652
  };
423
- export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
653
+ export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
424
654
  export type VoiceConnectionOptions = {
425
655
  protocols?: string[];
426
656
  scenarioId?: string;
@@ -440,6 +670,30 @@ export type VoiceControllerOptions = {
440
670
  capture?: VoiceCaptureOptions;
441
671
  autoStopOnComplete?: boolean;
442
672
  };
673
+ export type VoiceBargeInOptions = {
674
+ enabled?: boolean;
675
+ interruptOnPartial?: boolean;
676
+ interruptThreshold?: number;
677
+ };
678
+ export type VoiceAudioPlayerOptions = {
679
+ autoStart?: boolean;
680
+ createAudioContext?: () => AudioContext;
681
+ lookaheadMs?: number;
682
+ };
683
+ export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
684
+ audioPlayer?: VoiceAudioPlayerOptions;
685
+ bargeIn?: VoiceBargeInOptions;
686
+ };
687
+ export type VoiceSTTRoutingGoal = 'best' | 'low-cost';
688
+ export type VoiceSTTRoutingCorrectionMode = 'generic' | 'none' | 'risky-turn';
689
+ export type VoiceSTTRoutingStrategy = {
690
+ benchmarkSessionTarget: 'deepgram-corrected' | 'deepgram-flux';
691
+ correctionMode: VoiceSTTRoutingCorrectionMode;
692
+ goal: VoiceSTTRoutingGoal;
693
+ notes: string[];
694
+ preset: VoiceRuntimePreset;
695
+ sttLifecycle: VoiceSTTLifecycle;
696
+ };
443
697
  export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
444
698
  assistantTexts: string[];
445
699
  partial: string;
@@ -480,6 +734,12 @@ export type VoiceStreamState<TResult = unknown> = {
480
734
  partial: string;
481
735
  turns: VoiceTurnRecord<TResult>[];
482
736
  assistantTexts: string[];
737
+ assistantAudio: Array<{
738
+ chunk: Uint8Array;
739
+ format: AudioFormat;
740
+ receivedAt: number;
741
+ turnId?: string;
742
+ }>;
483
743
  error: string | null;
484
744
  isConnected: boolean;
485
745
  };
@@ -502,11 +762,52 @@ export type VoiceStream<TResult = unknown> = {
502
762
  subscribe: (subscriber: () => void) => () => void;
503
763
  turns: VoiceTurnRecord<TResult>[];
504
764
  assistantTexts: string[];
765
+ assistantAudio: Array<{
766
+ chunk: Uint8Array;
767
+ format: AudioFormat;
768
+ receivedAt: number;
769
+ turnId?: string;
770
+ }>;
505
771
  };
506
772
  export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
507
773
  isRecording: boolean;
508
774
  recordingError: string | null;
509
775
  };
776
+ export type VoiceAudioPlayerState = {
777
+ activeSourceCount: number;
778
+ error: string | null;
779
+ isActive: boolean;
780
+ isPlaying: boolean;
781
+ lastInterruptLatencyMs?: number;
782
+ lastPlaybackStopLatencyMs?: number;
783
+ processedChunkCount: number;
784
+ queuedChunkCount: number;
785
+ };
786
+ export type VoiceAudioPlayerSource = {
787
+ assistantAudio: VoiceStreamState['assistantAudio'];
788
+ subscribe: (subscriber: () => void) => () => void;
789
+ };
790
+ export type VoiceAudioPlayer = {
791
+ close: () => Promise<void>;
792
+ error: string | null;
793
+ getSnapshot: () => VoiceAudioPlayerState;
794
+ activeSourceCount: number;
795
+ isActive: boolean;
796
+ isPlaying: boolean;
797
+ interrupt: () => Promise<void>;
798
+ lastInterruptLatencyMs?: number;
799
+ lastPlaybackStopLatencyMs?: number;
800
+ pause: () => Promise<void>;
801
+ processedChunkCount: number;
802
+ queuedChunkCount: number;
803
+ start: () => Promise<void>;
804
+ subscribe: (subscriber: () => void) => () => void;
805
+ };
806
+ export type VoiceBargeInBinding = {
807
+ close: () => void;
808
+ handleLevel: (level: number) => void;
809
+ sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
810
+ };
510
811
  export type VoiceController<TResult = unknown> = {
511
812
  bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
512
813
  close: () => void;
@@ -532,6 +833,16 @@ export type VoiceController<TResult = unknown> = {
532
833
  toggleRecording: () => Promise<void>;
533
834
  turns: VoiceTurnRecord<TResult>[];
534
835
  assistantTexts: string[];
836
+ assistantAudio: Array<{
837
+ chunk: Uint8Array;
838
+ format: AudioFormat;
839
+ receivedAt: number;
840
+ turnId?: string;
841
+ }>;
842
+ };
843
+ export type VoiceDuplexController<TResult = unknown> = VoiceController<TResult> & {
844
+ audioPlayer: VoiceAudioPlayer;
845
+ interruptAssistant: () => Promise<void>;
535
846
  };
536
847
  export type VoiceHTMXBindingOptions = {
537
848
  element: Element | string;
@@ -556,6 +867,12 @@ export type VoiceStoreAction<TResult = unknown> = {
556
867
  } | {
557
868
  type: 'assistant';
558
869
  text: string;
870
+ } | {
871
+ type: 'audio';
872
+ chunk: Uint8Array;
873
+ format: AudioFormat;
874
+ receivedAt: number;
875
+ turnId?: string;
559
876
  } | {
560
877
  type: 'complete';
561
878
  sessionId: string;