@absolutejs/voice 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/README.md +387 -4
  2. package/dist/angular/index.d.ts +1 -0
  3. package/dist/angular/index.js +669 -3
  4. package/dist/angular/voice-controller.service.d.ts +21 -0
  5. package/dist/audioConditioning.d.ts +3 -0
  6. package/dist/client/actions.d.ts +7 -0
  7. package/dist/client/connection.d.ts +5 -0
  8. package/dist/client/controller.d.ts +2 -0
  9. package/dist/client/htmxBootstrap.js +576 -167
  10. package/dist/client/index.d.ts +1 -0
  11. package/dist/client/index.js +486 -3
  12. package/dist/client/microphone.d.ts +4 -2
  13. package/dist/correction.d.ts +16 -0
  14. package/dist/index.d.ts +4 -0
  15. package/dist/index.js +1314 -283
  16. package/dist/presets.d.ts +13 -0
  17. package/dist/react/index.d.ts +1 -0
  18. package/dist/react/index.js +642 -3
  19. package/dist/react/useVoiceController.d.ts +20 -0
  20. package/dist/react/useVoiceStream.d.ts +1 -0
  21. package/dist/store.d.ts +2 -2
  22. package/dist/svelte/index.d.ts +1 -0
  23. package/dist/svelte/index.js +607 -3
  24. package/dist/testing/benchmark.d.ts +36 -0
  25. package/dist/testing/index.js +1453 -241
  26. package/dist/testing/sessionBenchmark.d.ts +67 -2
  27. package/dist/testing/stt.d.ts +1 -0
  28. package/dist/turnDetection.d.ts +5 -1
  29. package/dist/turnProfiles.d.ts +6 -0
  30. package/dist/types.d.ts +198 -8
  31. package/dist/vue/index.d.ts +1 -0
  32. package/dist/vue/index.js +660 -3
  33. package/dist/vue/useVoiceController.d.ts +19 -0
  34. package/fixtures/README.md +9 -0
  35. package/fixtures/manifest.json +59 -1
  36. package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
  37. package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
  38. package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
  39. package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
  40. package/package.json +21 -1
@@ -1,13 +1,23 @@
1
- import type { STTAdapter } from '../types';
1
+ import type { STTAdapter, VoiceAudioConditioningConfig, VoicePhraseHint, VoiceSTTFallbackConfig, VoiceSTTLifecycle, VoiceTurnCorrectionHandler, VoiceTurnProfile, VoiceTranscriptQuality } from '../types';
2
2
  import { type VoiceTranscriptAccuracy } from './accuracy';
3
3
  import type { VoiceTestFixture } from './fixtures';
4
4
  export type VoiceSessionBenchmarkScenario = VoiceTestFixture & {
5
5
  expectedTurnTexts: string[];
6
+ phraseHints?: VoicePhraseHint[];
6
7
  reconnectAtChunkIndex?: number;
7
8
  reconnectPauseMs?: number;
8
9
  silenceMs?: number;
9
10
  speechThreshold?: number;
11
+ transcriptStabilityMs?: number;
10
12
  transcriptThreshold?: number;
13
+ turnProfile?: VoiceTurnProfile;
14
+ audioConditioning?: VoiceAudioConditioningConfig;
15
+ sttLifecycle?: VoiceSTTLifecycle;
16
+ };
17
+ export type VoiceSessionBenchmarkTraceEntry = {
18
+ atMs: number;
19
+ data?: unknown;
20
+ phase: string;
11
21
  };
12
22
  export type VoiceSessionBenchmarkTurnResult = {
13
23
  actualText: string;
@@ -15,6 +25,7 @@ export type VoiceSessionBenchmarkTurnResult = {
15
25
  expectedText?: string;
16
26
  index: number;
17
27
  passes: boolean;
28
+ quality?: VoiceTranscriptQuality;
18
29
  };
19
30
  export type VoiceSessionBenchmarkScenarioResult = {
20
31
  actualTurns: string[];
@@ -28,6 +39,7 @@ export type VoiceSessionBenchmarkScenarioResult = {
28
39
  title: string;
29
40
  turnCountDelta: number;
30
41
  turnResults: VoiceSessionBenchmarkTurnResult[];
42
+ trace?: VoiceSessionBenchmarkTraceEntry[];
31
43
  };
32
44
  export type VoiceSessionBenchmarkSummary = {
33
45
  adapterId: string;
@@ -47,15 +59,68 @@ export type VoiceSessionBenchmarkReport = {
47
59
  scenarios: VoiceSessionBenchmarkScenarioResult[];
48
60
  summary: VoiceSessionBenchmarkSummary;
49
61
  };
50
- export declare const runVoiceSessionBenchmarkScenario: (adapter: STTAdapter, fixture: VoiceSessionBenchmarkScenario) => Promise<VoiceSessionBenchmarkScenarioResult>;
62
+ export type VoiceSessionBenchmarkScenarioAggregate = {
63
+ averageElapsedMs: number;
64
+ averageWordErrorRate: number;
65
+ bestWordErrorRate: number;
66
+ fixtureId: string;
67
+ passCount: number;
68
+ passRate: number;
69
+ reconnectSuccessRate: number;
70
+ runCount: number;
71
+ tags: string[];
72
+ title: string;
73
+ worstWordErrorRate: number;
74
+ };
75
+ export type VoiceSessionBenchmarkSeriesSummary = {
76
+ adapterId: string;
77
+ averageElapsedMs: number;
78
+ averagePassRate: number;
79
+ averageWordErrorRate: number;
80
+ flakyScenarioCount: number;
81
+ generatedRunCount: number;
82
+ reconnectSuccessRate: number;
83
+ scenarioCount: number;
84
+ stableScenarioCount: number;
85
+ totalPassCount: number;
86
+ totalRunCount: number;
87
+ };
88
+ export type VoiceSessionBenchmarkSeriesReport = {
89
+ adapterId: string;
90
+ generatedAt: number;
91
+ runCount: number;
92
+ scenarios: VoiceSessionBenchmarkScenarioAggregate[];
93
+ summary: VoiceSessionBenchmarkSeriesSummary;
94
+ };
95
+ export declare const runVoiceSessionBenchmarkScenario: (adapter: STTAdapter, fixture: VoiceSessionBenchmarkScenario, options?: {
96
+ correctTurn?: VoiceTurnCorrectionHandler;
97
+ sttFallback?: VoiceSTTFallbackConfig;
98
+ trace?: boolean;
99
+ }) => Promise<VoiceSessionBenchmarkScenarioResult>;
51
100
  export declare const summarizeVoiceSessionBenchmark: (adapterId: string, scenarios: VoiceSessionBenchmarkScenarioResult[]) => VoiceSessionBenchmarkSummary;
101
+ export declare const summarizeVoiceSessionBenchmarkSeries: (input: {
102
+ adapterId: string;
103
+ reports: VoiceSessionBenchmarkReport[];
104
+ }) => VoiceSessionBenchmarkSeriesReport;
52
105
  export declare const runVoiceSessionBenchmark: (input: {
53
106
  adapter: STTAdapter;
54
107
  adapterId: string;
108
+ correctTurn?: VoiceTurnCorrectionHandler;
55
109
  scenarios: VoiceSessionBenchmarkScenario[];
110
+ sttFallback?: VoiceSTTFallbackConfig;
111
+ trace?: boolean;
56
112
  }) => Promise<{
57
113
  adapterId: string;
58
114
  generatedAt: number;
59
115
  scenarios: VoiceSessionBenchmarkScenarioResult[];
60
116
  summary: VoiceSessionBenchmarkSummary;
61
117
  }>;
118
+ export declare const runVoiceSessionBenchmarkSeries: (input: {
119
+ adapter: STTAdapter;
120
+ adapterId: string;
121
+ correctTurn?: VoiceTurnCorrectionHandler;
122
+ runs: number;
123
+ scenarios: VoiceSessionBenchmarkScenario[];
124
+ sttFallback?: VoiceSTTFallbackConfig;
125
+ trace?: boolean;
126
+ }) => Promise<VoiceSessionBenchmarkSeriesReport>;
@@ -17,6 +17,7 @@ export type VoiceSTTAdapterHarnessResult = {
17
17
  finalEvents: VoiceFinalEvent[];
18
18
  finalText: string;
19
19
  partialEvents: VoicePartialEvent[];
20
+ speechEndedAt: number;
20
21
  startedAt: number;
21
22
  };
22
23
  export declare const runSTTAdapterFixture: (adapter: STTAdapter, fixture: VoiceTestFixture, options?: VoiceSTTAdapterHarnessOptions) => Promise<VoiceSTTAdapterHarnessResult>;
@@ -2,4 +2,8 @@ import type { AudioChunk, Transcript } from './types';
2
2
  export declare const DEFAULT_SILENCE_MS = 700;
3
3
  export declare const DEFAULT_SPEECH_THRESHOLD = 0.015;
4
4
  export declare const measureAudioLevel: (audio: AudioChunk) => number;
5
- export declare const buildTurnText: (transcripts: Transcript[], partialText: string) => string;
5
+ export declare const selectPreferredTranscriptText: (currentText: string, nextText: string) => string;
6
+ export declare const buildTurnText: (transcripts: Transcript[], partialText: string, options?: {
7
+ partialEndedAtMs?: number;
8
+ partialStartedAtMs?: number;
9
+ }) => string;
@@ -0,0 +1,6 @@
1
+ import type { VoiceResolvedTurnDetectionConfig, VoiceTurnDetectionConfig, VoiceTurnQualityProfile, VoiceTurnProfile } from './types';
2
+ export declare const TURN_PROFILE_DEFAULTS: Record<VoiceTurnProfile, Omit<VoiceResolvedTurnDetectionConfig, 'profile'>>;
3
+ export declare const QUALITY_PROFILE_DEFAULTS: Record<VoiceTurnQualityProfile, Partial<VoiceResolvedTurnDetectionConfig>>;
4
+ export declare const DEFAULT_TURN_PROFILE: VoiceTurnProfile;
5
+ export declare const DEFAULT_QUALITY_PROFILE: VoiceTurnQualityProfile;
6
+ export declare const resolveTurnDetectionConfig: (config?: VoiceTurnDetectionConfig) => VoiceResolvedTurnDetectionConfig;
package/dist/types.d.ts CHANGED
@@ -6,6 +6,12 @@ export type AudioFormat = {
6
6
  channels: 1 | 2;
7
7
  };
8
8
  export type AudioChunk = ArrayBuffer | ArrayBufferView;
9
+ export type VoicePhraseHint = {
10
+ text: string;
11
+ aliases?: string[];
12
+ boost?: number;
13
+ metadata?: Record<string, unknown>;
14
+ };
9
15
  export type Transcript = {
10
16
  id: string;
11
17
  text: string;
@@ -16,6 +22,39 @@ export type Transcript = {
16
22
  endedAtMs?: number;
17
23
  vendor?: string;
18
24
  };
25
+ export type VoiceTranscriptQuality = {
26
+ averageConfidence?: number;
27
+ confidenceSampleCount: number;
28
+ correction?: VoiceTurnCorrectionDiagnostics;
29
+ fallbackUsed: boolean;
30
+ finalTranscriptCount: number;
31
+ fallback?: VoiceFallbackDiagnostics;
32
+ partialTranscriptCount: number;
33
+ selectedTranscriptCount: number;
34
+ source: 'fallback' | 'primary';
35
+ };
36
+ export type VoiceTurnCorrectionDiagnostics = {
37
+ attempted: boolean;
38
+ changed: boolean;
39
+ correctedText: string;
40
+ metadata?: Record<string, unknown>;
41
+ originalText: string;
42
+ provider?: string;
43
+ reason?: string;
44
+ };
45
+ export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
46
+ export type VoiceFallbackDiagnostics = {
47
+ attempted: boolean;
48
+ fallbackConfidence?: number;
49
+ fallbackText?: string;
50
+ fallbackWordCount?: number;
51
+ primaryConfidence: number;
52
+ primaryText: string;
53
+ primaryWordCount: number;
54
+ selected: boolean;
55
+ selectionReason: VoiceFallbackSelectionReason;
56
+ trigger: 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
57
+ };
19
58
  export type VoicePartialEvent = {
20
59
  type: 'partial';
21
60
  transcript: Transcript;
@@ -58,6 +97,7 @@ export type STTAdapterSession = {
58
97
  export type STTAdapterOpenOptions = {
59
98
  sessionId: string;
60
99
  format: AudioFormat;
100
+ phraseHints?: VoicePhraseHint[];
61
101
  signal?: AbortSignal;
62
102
  };
63
103
  export type STTAdapter<TOptions extends STTAdapterOpenOptions = STTAdapterOpenOptions> = {
@@ -99,6 +139,7 @@ export type RealtimeAdapterSession = {
99
139
  export type RealtimeAdapterOpenOptions = {
100
140
  sessionId: string;
101
141
  format: AudioFormat;
142
+ phraseHints?: VoicePhraseHint[];
102
143
  signal?: AbortSignal;
103
144
  };
104
145
  export type RealtimeAdapter<TOptions extends RealtimeAdapterOpenOptions = RealtimeAdapterOpenOptions> = {
@@ -109,6 +150,7 @@ export type VoiceSessionStatus = 'active' | 'reconnecting' | 'completed' | 'fail
109
150
  export type VoiceTurnRecord<TResult = unknown> = {
110
151
  id: string;
111
152
  text: string;
153
+ quality?: VoiceTranscriptQuality;
112
154
  transcripts: Transcript[];
113
155
  assistantText?: string;
114
156
  committedAt: number;
@@ -123,8 +165,13 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
123
165
  currentTurn: {
124
166
  transcripts: Transcript[];
125
167
  partialText: string;
168
+ partialStartedAt?: number;
169
+ partialEndedAt?: number;
126
170
  finalText: string;
127
171
  lastAudioAt?: number;
172
+ lastSpeechAt?: number;
173
+ lastTranscriptAt?: number;
174
+ silenceStartedAt?: number;
128
175
  };
129
176
  turns: VoiceTurnRecord<TResult>[];
130
177
  committedTurnIds: string[];
@@ -132,7 +179,14 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
132
179
  attempts: number;
133
180
  lastDisconnectAt?: number;
134
181
  };
182
+ lastCommittedTurn?: {
183
+ signature: string;
184
+ text: string;
185
+ transcriptIds: string[];
186
+ committedAt: number;
187
+ };
135
188
  metadata?: TMeta;
189
+ scenarioId?: string;
136
190
  };
137
191
  export type VoiceSessionSummary = {
138
192
  id: string;
@@ -153,6 +207,59 @@ export type VoiceReconnectConfig = {
153
207
  timeout?: number;
154
208
  maxAttempts?: number;
155
209
  };
210
+ export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'reliability';
211
+ export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
212
+ export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
213
+ export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
214
+ export type VoiceTurnFallbackTrigger = 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
215
+ export type VoiceSTTFallbackConfig = {
216
+ adapter: STTAdapter;
217
+ trigger?: VoiceTurnFallbackTrigger;
218
+ confidenceThreshold?: number;
219
+ minTextLength?: number;
220
+ replayWindowMs?: number;
221
+ settleMs?: number;
222
+ completionTimeoutMs?: number;
223
+ maxAttemptsPerTurn?: number;
224
+ };
225
+ export type VoiceResolvedSTTFallbackConfig = {
226
+ adapter: STTAdapter;
227
+ trigger: VoiceTurnFallbackTrigger;
228
+ confidenceThreshold: number;
229
+ minTextLength: number;
230
+ replayWindowMs: number;
231
+ settleMs: number;
232
+ completionTimeoutMs: number;
233
+ maxAttemptsPerTurn: number;
234
+ };
235
+ export type VoiceTurnDetectionConfig = {
236
+ profile?: VoiceTurnProfile;
237
+ qualityProfile?: VoiceTurnQualityProfile;
238
+ silenceMs?: number;
239
+ speechThreshold?: number;
240
+ transcriptStabilityMs?: number;
241
+ };
242
+ export type VoiceResolvedTurnDetectionConfig = {
243
+ qualityProfile: VoiceTurnQualityProfile;
244
+ profile: VoiceTurnProfile;
245
+ silenceMs: number;
246
+ speechThreshold: number;
247
+ transcriptStabilityMs: number;
248
+ };
249
+ export type VoiceAudioConditioningConfig = {
250
+ enabled?: boolean;
251
+ targetLevel?: number;
252
+ maxGain?: number;
253
+ noiseGateThreshold?: number;
254
+ noiseGateAttenuation?: number;
255
+ };
256
+ export type VoiceResolvedAudioConditioningConfig = {
257
+ enabled: true;
258
+ targetLevel: number;
259
+ maxGain: number;
260
+ noiseGateThreshold: number;
261
+ noiseGateAttenuation: number;
262
+ };
156
263
  export type VoiceSocket = {
157
264
  send: (data: string | Uint8Array | ArrayBuffer) => void | Promise<void>;
158
265
  close: (code?: number, reason?: string) => void | Promise<void>;
@@ -173,6 +280,26 @@ export type VoiceRouteResult<TResult = unknown> = {
173
280
  result?: TResult;
174
281
  assistantText?: string;
175
282
  };
283
+ export type VoiceTurnCorrectionResult = string | {
284
+ text: string;
285
+ reason?: string;
286
+ provider?: string;
287
+ metadata?: Record<string, unknown>;
288
+ };
289
+ export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
290
+ api: VoiceSessionHandle<TContext, TSession, TResult>;
291
+ context: TContext;
292
+ fallback?: VoiceFallbackDiagnostics;
293
+ phraseHints: VoicePhraseHint[];
294
+ session: TSession;
295
+ text: string;
296
+ transcripts: Transcript[];
297
+ }) => Promise<VoiceTurnCorrectionResult | void> | VoiceTurnCorrectionResult | void;
298
+ export type VoicePhraseHintResolver<TContext = unknown> = (input: {
299
+ context: TContext;
300
+ scenarioId?: string;
301
+ sessionId: string;
302
+ }) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
176
303
  export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
177
304
  context: TContext;
178
305
  session: TSession;
@@ -186,6 +313,7 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
186
313
  session: TSession;
187
314
  api: VoiceSessionHandle<TContext, TSession, TResult>;
188
315
  }) => Promise<void> | void;
316
+ correctTurn?: VoiceTurnCorrectionHandler<TContext, TSession, TResult>;
189
317
  onTurn: VoiceOnTurnHandler<TContext, TSession, TResult>;
190
318
  onComplete: (input: {
191
319
  context: TContext;
@@ -203,16 +331,24 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
203
331
  export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
204
332
  onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
205
333
  };
334
+ export type VoiceScenario = {
335
+ id: string;
336
+ name?: string;
337
+ description?: string;
338
+ metadata?: Record<string, unknown>;
339
+ };
206
340
  export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
207
341
  path: string;
342
+ phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
343
+ preset?: VoiceRuntimePreset;
208
344
  stt: STTAdapter;
345
+ sttFallback?: VoiceSTTFallbackConfig;
346
+ sttLifecycle?: VoiceSTTLifecycle;
209
347
  tts?: TTSAdapter;
210
348
  session: VoiceSessionStore<NoInfer<TSession>>;
211
349
  reconnect?: VoiceReconnectConfig;
212
- turnDetection?: {
213
- silenceMs?: number;
214
- speechThreshold?: number;
215
- };
350
+ turnDetection?: VoiceTurnDetectionConfig;
351
+ audioConditioning?: VoiceAudioConditioningConfig;
216
352
  logger?: VoiceLogger;
217
353
  htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
218
354
  } & VoiceRouteConfig<TContext, TSession, TResult>;
@@ -221,12 +357,14 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
221
357
  context: TContext;
222
358
  socket: VoiceSocket;
223
359
  stt: STTAdapter;
360
+ sttFallback?: VoiceResolvedSTTFallbackConfig;
224
361
  store: VoiceSessionStore<TSession>;
225
362
  reconnect: Required<VoiceReconnectConfig>;
226
- turnDetection: {
227
- silenceMs: number;
228
- speechThreshold: number;
229
- };
363
+ phraseHints?: VoicePhraseHint[];
364
+ scenarioId?: string;
365
+ sttLifecycle: VoiceSTTLifecycle;
366
+ turnDetection: VoiceResolvedTurnDetectionConfig;
367
+ audioConditioning?: VoiceResolvedAudioConditioningConfig;
230
368
  route: VoiceNormalizedRouteConfig<TContext, TSession, TResult>;
231
369
  logger?: VoiceLogger;
232
370
  };
@@ -234,6 +372,7 @@ export type CreateVoiceSession = <TContext = unknown, TSession extends VoiceSess
234
372
  export type VoiceClientStartMessage = {
235
373
  type: 'start';
236
374
  sessionId?: string;
375
+ scenarioId?: string;
237
376
  };
238
377
  export type VoiceClientEndTurnMessage = {
239
378
  type: 'end_turn';
@@ -250,6 +389,7 @@ export type VoiceServerSessionMessage = {
250
389
  type: 'session';
251
390
  sessionId: string;
252
391
  status: VoiceSessionStatus;
392
+ scenarioId?: string;
253
393
  };
254
394
  export type VoiceServerPartialMessage = {
255
395
  type: 'partial';
@@ -283,14 +423,27 @@ export type VoiceServerPongMessage = {
283
423
  export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
284
424
  export type VoiceConnectionOptions = {
285
425
  protocols?: string[];
426
+ scenarioId?: string;
286
427
  reconnect?: boolean;
287
428
  maxReconnectAttempts?: number;
288
429
  pingInterval?: number;
289
430
  sessionId?: string;
290
431
  };
432
+ export type VoiceCaptureOptions = {
433
+ channelCount?: 1 | 2;
434
+ onLevel?: (level: number) => void;
435
+ sampleRateHz?: number;
436
+ };
437
+ export type VoiceControllerOptions = {
438
+ preset?: VoiceRuntimePreset;
439
+ connection?: VoiceConnectionOptions;
440
+ capture?: VoiceCaptureOptions;
441
+ autoStopOnComplete?: boolean;
442
+ };
291
443
  export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
292
444
  assistantTexts: string[];
293
445
  partial: string;
446
+ scenarioId?: string;
294
447
  result?: TResult;
295
448
  session?: TSession;
296
449
  sessionId?: string;
@@ -322,6 +475,7 @@ export type VoiceHTMXOptions<TSession extends VoiceSessionRecord = VoiceSessionR
322
475
  export type VoiceHTMXConfig<TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceHTMXRenderer<TSession, TResult> | VoiceHTMXOptions<TSession, TResult>;
323
476
  export type VoiceStreamState<TResult = unknown> = {
324
477
  sessionId: string | null;
478
+ scenarioId: string | null;
325
479
  status: VoiceSessionStatus | 'idle';
326
480
  partial: string;
327
481
  turns: VoiceTurnRecord<TResult>[];
@@ -331,6 +485,10 @@ export type VoiceStreamState<TResult = unknown> = {
331
485
  };
332
486
  export type VoiceStream<TResult = unknown> = {
333
487
  close: () => void;
488
+ start: (input?: {
489
+ scenarioId?: string;
490
+ sessionId?: string;
491
+ }) => Promise<void>;
334
492
  endTurn: () => void;
335
493
  error: string | null;
336
494
  getServerSnapshot: () => VoiceStreamState<TResult>;
@@ -339,8 +497,39 @@ export type VoiceStream<TResult = unknown> = {
339
497
  partial: string;
340
498
  sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
341
499
  sessionId: string | null;
500
+ scenarioId: string | null;
501
+ status: VoiceSessionStatus | 'idle';
502
+ subscribe: (subscriber: () => void) => () => void;
503
+ turns: VoiceTurnRecord<TResult>[];
504
+ assistantTexts: string[];
505
+ };
506
+ export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
507
+ isRecording: boolean;
508
+ recordingError: string | null;
509
+ };
510
+ export type VoiceController<TResult = unknown> = {
511
+ bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
512
+ close: () => void;
513
+ endTurn: () => void;
514
+ start: (input?: {
515
+ scenarioId?: string;
516
+ sessionId?: string;
517
+ }) => Promise<void>;
518
+ error: string | null;
519
+ getServerSnapshot: () => VoiceControllerState<TResult>;
520
+ getSnapshot: () => VoiceControllerState<TResult>;
521
+ isConnected: boolean;
522
+ isRecording: boolean;
523
+ partial: string;
524
+ recordingError: string | null;
525
+ sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
526
+ sessionId: string | null;
527
+ scenarioId: string | null;
528
+ startRecording: () => Promise<void>;
342
529
  status: VoiceSessionStatus | 'idle';
530
+ stopRecording: () => void;
343
531
  subscribe: (subscriber: () => void) => () => void;
532
+ toggleRecording: () => Promise<void>;
344
533
  turns: VoiceTurnRecord<TResult>[];
345
534
  assistantTexts: string[];
346
535
  };
@@ -353,6 +542,7 @@ export type VoiceHTMXBindingOptions = {
353
542
  export type VoiceStoreAction<TResult = unknown> = {
354
543
  type: 'session';
355
544
  sessionId: string;
545
+ scenarioId?: string;
356
546
  status: VoiceSessionStatus;
357
547
  } | {
358
548
  type: 'partial';
@@ -1 +1,2 @@
1
1
  export { useVoiceStream } from './useVoiceStream';
2
+ export { useVoiceController } from './useVoiceController';