@absolutejs/voice 0.0.20 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +884 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +759 -3
- package/dist/angular/voice-controller.service.d.ts +27 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +48 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +660 -167
- package/dist/client/index.d.ts +3 -0
- package/dist/client/index.js +991 -6
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +33 -0
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +3721 -298
- package/dist/ops.d.ts +100 -0
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +728 -3
- package/dist/react/useVoiceController.d.ts +26 -0
- package/dist/react/useVoiceStream.d.ts +7 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +691 -3
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +93 -2
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +6247 -402
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +92 -2
- package/dist/testing/stt.d.ts +3 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +487 -10
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +750 -3
- package/dist/vue/useVoiceController.d.ts +30 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/fixtures/README.md +9 -0
- package/fixtures/manifest.json +59 -1
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/package.json +135 -1
package/dist/types.d.ts
CHANGED
|
@@ -1,21 +1,99 @@
|
|
|
1
1
|
import type { SessionStore } from '@absolutejs/absolute';
|
|
2
|
+
import type { StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
|
|
3
|
+
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
|
|
2
4
|
export type AudioFormat = {
|
|
3
5
|
container: 'raw';
|
|
4
|
-
encoding: 'pcm_s16le';
|
|
6
|
+
encoding: 'alaw' | 'mulaw' | 'pcm_s16le';
|
|
5
7
|
sampleRateHz: number;
|
|
6
8
|
channels: 1 | 2;
|
|
7
9
|
};
|
|
8
10
|
export type AudioChunk = ArrayBuffer | ArrayBufferView;
|
|
11
|
+
export type VoiceLanguageStrategy = {
|
|
12
|
+
mode: 'auto-detect';
|
|
13
|
+
allowedLanguages?: string[];
|
|
14
|
+
} | {
|
|
15
|
+
mode: 'fixed';
|
|
16
|
+
primaryLanguage: string;
|
|
17
|
+
secondaryLanguages?: string[];
|
|
18
|
+
} | {
|
|
19
|
+
mode: 'allow-switching';
|
|
20
|
+
primaryLanguage?: string;
|
|
21
|
+
secondaryLanguages: string[];
|
|
22
|
+
};
|
|
23
|
+
export type VoicePhraseHint = {
|
|
24
|
+
text: string;
|
|
25
|
+
aliases?: string[];
|
|
26
|
+
boost?: number;
|
|
27
|
+
metadata?: Record<string, unknown>;
|
|
28
|
+
};
|
|
29
|
+
export type VoiceCorrectionRiskTier = 'safe' | 'balanced' | 'risky';
|
|
30
|
+
export type VoiceDomainTerm = {
|
|
31
|
+
text: string;
|
|
32
|
+
aliases?: string[];
|
|
33
|
+
boost?: number;
|
|
34
|
+
language?: string;
|
|
35
|
+
metadata?: Record<string, unknown>;
|
|
36
|
+
pronunciation?: string;
|
|
37
|
+
};
|
|
38
|
+
export type VoiceLexiconEntry = {
|
|
39
|
+
text: string;
|
|
40
|
+
aliases?: string[];
|
|
41
|
+
language?: string;
|
|
42
|
+
metadata?: Record<string, unknown>;
|
|
43
|
+
pronunciation?: string;
|
|
44
|
+
};
|
|
9
45
|
export type Transcript = {
|
|
10
46
|
id: string;
|
|
11
47
|
text: string;
|
|
12
48
|
isFinal: boolean;
|
|
13
49
|
confidence?: number;
|
|
14
50
|
language?: string;
|
|
51
|
+
speaker?: string | number;
|
|
15
52
|
startedAtMs?: number;
|
|
16
53
|
endedAtMs?: number;
|
|
17
54
|
vendor?: string;
|
|
18
55
|
};
|
|
56
|
+
export type VoiceTranscriptQuality = {
|
|
57
|
+
averageConfidence?: number;
|
|
58
|
+
confidenceSampleCount: number;
|
|
59
|
+
correction?: VoiceTurnCorrectionDiagnostics;
|
|
60
|
+
cost?: VoiceTurnCostEstimate;
|
|
61
|
+
fallbackUsed: boolean;
|
|
62
|
+
finalTranscriptCount: number;
|
|
63
|
+
fallback?: VoiceFallbackDiagnostics;
|
|
64
|
+
partialTranscriptCount: number;
|
|
65
|
+
selectedTranscriptCount: number;
|
|
66
|
+
source: 'fallback' | 'primary';
|
|
67
|
+
};
|
|
68
|
+
export type VoiceTurnCorrectionDiagnostics = {
|
|
69
|
+
attempted: boolean;
|
|
70
|
+
changed: boolean;
|
|
71
|
+
correctedText: string;
|
|
72
|
+
metadata?: Record<string, unknown>;
|
|
73
|
+
originalText: string;
|
|
74
|
+
provider?: string;
|
|
75
|
+
reason?: string;
|
|
76
|
+
};
|
|
77
|
+
export type VoiceTurnCostEstimate = {
|
|
78
|
+
estimatedRelativeCostUnits: number;
|
|
79
|
+
fallbackAttemptCount: number;
|
|
80
|
+
fallbackReplayAudioMs: number;
|
|
81
|
+
primaryAudioMs: number;
|
|
82
|
+
totalBillableAudioMs: number;
|
|
83
|
+
};
|
|
84
|
+
export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
|
|
85
|
+
export type VoiceFallbackDiagnostics = {
|
|
86
|
+
attempted: boolean;
|
|
87
|
+
fallbackConfidence?: number;
|
|
88
|
+
fallbackText?: string;
|
|
89
|
+
fallbackWordCount?: number;
|
|
90
|
+
primaryConfidence: number;
|
|
91
|
+
primaryText: string;
|
|
92
|
+
primaryWordCount: number;
|
|
93
|
+
selected: boolean;
|
|
94
|
+
selectionReason: VoiceFallbackSelectionReason;
|
|
95
|
+
trigger: 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
|
|
96
|
+
};
|
|
19
97
|
export type VoicePartialEvent = {
|
|
20
98
|
type: 'partial';
|
|
21
99
|
transcript: Transcript;
|
|
@@ -58,6 +136,9 @@ export type STTAdapterSession = {
|
|
|
58
136
|
export type STTAdapterOpenOptions = {
|
|
59
137
|
sessionId: string;
|
|
60
138
|
format: AudioFormat;
|
|
139
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
140
|
+
lexicon?: VoiceLexiconEntry[];
|
|
141
|
+
phraseHints?: VoicePhraseHint[];
|
|
61
142
|
signal?: AbortSignal;
|
|
62
143
|
};
|
|
63
144
|
export type STTAdapter<TOptions extends STTAdapterOpenOptions = STTAdapterOpenOptions> = {
|
|
@@ -82,6 +163,7 @@ export type TTSAdapterSession = {
|
|
|
82
163
|
};
|
|
83
164
|
export type TTSAdapterOpenOptions = {
|
|
84
165
|
sessionId: string;
|
|
166
|
+
lexicon?: VoiceLexiconEntry[];
|
|
85
167
|
signal?: AbortSignal;
|
|
86
168
|
};
|
|
87
169
|
export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
|
|
@@ -99,6 +181,9 @@ export type RealtimeAdapterSession = {
|
|
|
99
181
|
export type RealtimeAdapterOpenOptions = {
|
|
100
182
|
sessionId: string;
|
|
101
183
|
format: AudioFormat;
|
|
184
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
185
|
+
lexicon?: VoiceLexiconEntry[];
|
|
186
|
+
phraseHints?: VoicePhraseHint[];
|
|
102
187
|
signal?: AbortSignal;
|
|
103
188
|
};
|
|
104
189
|
export type RealtimeAdapter<TOptions extends RealtimeAdapterOpenOptions = RealtimeAdapterOpenOptions> = {
|
|
@@ -109,11 +194,23 @@ export type VoiceSessionStatus = 'active' | 'reconnecting' | 'completed' | 'fail
|
|
|
109
194
|
export type VoiceTurnRecord<TResult = unknown> = {
|
|
110
195
|
id: string;
|
|
111
196
|
text: string;
|
|
197
|
+
quality?: VoiceTranscriptQuality;
|
|
112
198
|
transcripts: Transcript[];
|
|
113
199
|
assistantText?: string;
|
|
114
200
|
committedAt: number;
|
|
115
201
|
result?: TResult;
|
|
116
202
|
};
|
|
203
|
+
export type VoiceCostTelemetryConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
204
|
+
fallbackPassCostUnit?: number;
|
|
205
|
+
onTurnCost?: (input: {
|
|
206
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
207
|
+
context: TContext;
|
|
208
|
+
estimate: VoiceTurnCostEstimate;
|
|
209
|
+
session: TSession;
|
|
210
|
+
turn: VoiceTurnRecord<TResult>;
|
|
211
|
+
}) => Promise<void> | void;
|
|
212
|
+
primaryPassCostUnit?: number;
|
|
213
|
+
};
|
|
117
214
|
export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown> = {
|
|
118
215
|
id: string;
|
|
119
216
|
createdAt: number;
|
|
@@ -123,8 +220,13 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
|
|
|
123
220
|
currentTurn: {
|
|
124
221
|
transcripts: Transcript[];
|
|
125
222
|
partialText: string;
|
|
223
|
+
partialStartedAt?: number;
|
|
224
|
+
partialEndedAt?: number;
|
|
126
225
|
finalText: string;
|
|
127
226
|
lastAudioAt?: number;
|
|
227
|
+
lastSpeechAt?: number;
|
|
228
|
+
lastTranscriptAt?: number;
|
|
229
|
+
silenceStartedAt?: number;
|
|
128
230
|
};
|
|
129
231
|
turns: VoiceTurnRecord<TResult>[];
|
|
130
232
|
committedTurnIds: string[];
|
|
@@ -132,7 +234,15 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
|
|
|
132
234
|
attempts: number;
|
|
133
235
|
lastDisconnectAt?: number;
|
|
134
236
|
};
|
|
237
|
+
lastCommittedTurn?: {
|
|
238
|
+
signature: string;
|
|
239
|
+
text: string;
|
|
240
|
+
transcriptIds: string[];
|
|
241
|
+
committedAt: number;
|
|
242
|
+
};
|
|
243
|
+
call?: VoiceCallLifecycleState;
|
|
135
244
|
metadata?: TMeta;
|
|
245
|
+
scenarioId?: string;
|
|
136
246
|
};
|
|
137
247
|
export type VoiceSessionSummary = {
|
|
138
248
|
id: string;
|
|
@@ -141,6 +251,22 @@ export type VoiceSessionSummary = {
|
|
|
141
251
|
status: VoiceSessionStatus;
|
|
142
252
|
turnCount: number;
|
|
143
253
|
};
|
|
254
|
+
export type VoiceCallDisposition = 'completed' | 'transferred' | 'escalated' | 'voicemail' | 'no-answer' | 'failed' | 'closed';
|
|
255
|
+
export type VoiceCallLifecycleEvent = {
|
|
256
|
+
at: number;
|
|
257
|
+
type: 'start' | 'end' | 'transfer' | 'escalation' | 'voicemail' | 'no-answer';
|
|
258
|
+
disposition?: VoiceCallDisposition;
|
|
259
|
+
metadata?: Record<string, unknown>;
|
|
260
|
+
reason?: string;
|
|
261
|
+
target?: string;
|
|
262
|
+
};
|
|
263
|
+
export type VoiceCallLifecycleState = {
|
|
264
|
+
disposition?: VoiceCallDisposition;
|
|
265
|
+
endedAt?: number;
|
|
266
|
+
events: VoiceCallLifecycleEvent[];
|
|
267
|
+
lastEventAt: number;
|
|
268
|
+
startedAt: number;
|
|
269
|
+
};
|
|
144
270
|
export type VoiceSessionStore<TSession extends VoiceSessionRecord = VoiceSessionRecord> = SessionStore<TSession, VoiceSessionSummary>;
|
|
145
271
|
export type VoiceLogger = {
|
|
146
272
|
debug?: (message: string, meta?: Record<string, unknown>) => void;
|
|
@@ -153,6 +279,59 @@ export type VoiceReconnectConfig = {
|
|
|
153
279
|
timeout?: number;
|
|
154
280
|
maxAttempts?: number;
|
|
155
281
|
};
|
|
282
|
+
export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'pstn-balanced' | 'pstn-fast' | 'reliability';
|
|
283
|
+
export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
|
|
284
|
+
export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
|
|
285
|
+
export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
|
|
286
|
+
export type VoiceTurnFallbackTrigger = 'empty-turn' | 'low-confidence' | 'empty-or-low-confidence' | 'always';
|
|
287
|
+
export type VoiceSTTFallbackConfig = {
|
|
288
|
+
adapter: STTAdapter;
|
|
289
|
+
trigger?: VoiceTurnFallbackTrigger;
|
|
290
|
+
confidenceThreshold?: number;
|
|
291
|
+
minTextLength?: number;
|
|
292
|
+
replayWindowMs?: number;
|
|
293
|
+
settleMs?: number;
|
|
294
|
+
completionTimeoutMs?: number;
|
|
295
|
+
maxAttemptsPerTurn?: number;
|
|
296
|
+
};
|
|
297
|
+
export type VoiceResolvedSTTFallbackConfig = {
|
|
298
|
+
adapter: STTAdapter;
|
|
299
|
+
trigger: VoiceTurnFallbackTrigger;
|
|
300
|
+
confidenceThreshold: number;
|
|
301
|
+
minTextLength: number;
|
|
302
|
+
replayWindowMs: number;
|
|
303
|
+
settleMs: number;
|
|
304
|
+
completionTimeoutMs: number;
|
|
305
|
+
maxAttemptsPerTurn: number;
|
|
306
|
+
};
|
|
307
|
+
export type VoiceTurnDetectionConfig = {
|
|
308
|
+
profile?: VoiceTurnProfile;
|
|
309
|
+
qualityProfile?: VoiceTurnQualityProfile;
|
|
310
|
+
silenceMs?: number;
|
|
311
|
+
speechThreshold?: number;
|
|
312
|
+
transcriptStabilityMs?: number;
|
|
313
|
+
};
|
|
314
|
+
export type VoiceResolvedTurnDetectionConfig = {
|
|
315
|
+
qualityProfile: VoiceTurnQualityProfile;
|
|
316
|
+
profile: VoiceTurnProfile;
|
|
317
|
+
silenceMs: number;
|
|
318
|
+
speechThreshold: number;
|
|
319
|
+
transcriptStabilityMs: number;
|
|
320
|
+
};
|
|
321
|
+
export type VoiceAudioConditioningConfig = {
|
|
322
|
+
enabled?: boolean;
|
|
323
|
+
targetLevel?: number;
|
|
324
|
+
maxGain?: number;
|
|
325
|
+
noiseGateThreshold?: number;
|
|
326
|
+
noiseGateAttenuation?: number;
|
|
327
|
+
};
|
|
328
|
+
export type VoiceResolvedAudioConditioningConfig = {
|
|
329
|
+
enabled: true;
|
|
330
|
+
targetLevel: number;
|
|
331
|
+
maxGain: number;
|
|
332
|
+
noiseGateThreshold: number;
|
|
333
|
+
noiseGateAttenuation: number;
|
|
334
|
+
};
|
|
156
335
|
export type VoiceSocket = {
|
|
157
336
|
send: (data: string | Uint8Array | ArrayBuffer) => void | Promise<void>;
|
|
158
337
|
close: (code?: number, reason?: string) => void | Promise<void>;
|
|
@@ -164,7 +343,26 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
|
|
|
164
343
|
commitTurn: (reason?: VoiceEndOfTurnEvent['reason']) => Promise<void>;
|
|
165
344
|
disconnect: (event?: VoiceCloseEvent) => Promise<void>;
|
|
166
345
|
complete: (result?: TResult) => Promise<void>;
|
|
346
|
+
escalate: (input: {
|
|
347
|
+
metadata?: Record<string, unknown>;
|
|
348
|
+
reason: string;
|
|
349
|
+
result?: TResult;
|
|
350
|
+
}) => Promise<void>;
|
|
167
351
|
fail: (error: unknown) => Promise<void>;
|
|
352
|
+
markNoAnswer: (input?: {
|
|
353
|
+
metadata?: Record<string, unknown>;
|
|
354
|
+
result?: TResult;
|
|
355
|
+
}) => Promise<void>;
|
|
356
|
+
markVoicemail: (input?: {
|
|
357
|
+
metadata?: Record<string, unknown>;
|
|
358
|
+
result?: TResult;
|
|
359
|
+
}) => Promise<void>;
|
|
360
|
+
transfer: (input: {
|
|
361
|
+
metadata?: Record<string, unknown>;
|
|
362
|
+
reason?: string;
|
|
363
|
+
result?: TResult;
|
|
364
|
+
target: string;
|
|
365
|
+
}) => Promise<void>;
|
|
168
366
|
close: (reason?: string) => Promise<void>;
|
|
169
367
|
snapshot: () => Promise<TSession>;
|
|
170
368
|
};
|
|
@@ -172,7 +370,48 @@ export type VoiceRouteResult<TResult = unknown> = {
|
|
|
172
370
|
complete?: boolean;
|
|
173
371
|
result?: TResult;
|
|
174
372
|
assistantText?: string;
|
|
373
|
+
transfer?: {
|
|
374
|
+
metadata?: Record<string, unknown>;
|
|
375
|
+
reason?: string;
|
|
376
|
+
target: string;
|
|
377
|
+
};
|
|
378
|
+
escalate?: {
|
|
379
|
+
metadata?: Record<string, unknown>;
|
|
380
|
+
reason: string;
|
|
381
|
+
};
|
|
382
|
+
voicemail?: {
|
|
383
|
+
metadata?: Record<string, unknown>;
|
|
384
|
+
};
|
|
385
|
+
noAnswer?: {
|
|
386
|
+
metadata?: Record<string, unknown>;
|
|
387
|
+
};
|
|
175
388
|
};
|
|
389
|
+
export type VoiceTurnCorrectionResult = string | {
|
|
390
|
+
text: string;
|
|
391
|
+
reason?: string;
|
|
392
|
+
provider?: string;
|
|
393
|
+
metadata?: Record<string, unknown>;
|
|
394
|
+
};
|
|
395
|
+
export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
|
|
396
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
397
|
+
context: TContext;
|
|
398
|
+
fallback?: VoiceFallbackDiagnostics;
|
|
399
|
+
lexicon: VoiceLexiconEntry[];
|
|
400
|
+
phraseHints: VoicePhraseHint[];
|
|
401
|
+
session: TSession;
|
|
402
|
+
text: string;
|
|
403
|
+
transcripts: Transcript[];
|
|
404
|
+
}) => Promise<VoiceTurnCorrectionResult | void> | VoiceTurnCorrectionResult | void;
|
|
405
|
+
export type VoicePhraseHintResolver<TContext = unknown> = (input: {
|
|
406
|
+
context: TContext;
|
|
407
|
+
scenarioId?: string;
|
|
408
|
+
sessionId: string;
|
|
409
|
+
}) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
|
|
410
|
+
export type VoiceLexiconResolver<TContext = unknown> = (input: {
|
|
411
|
+
context: TContext;
|
|
412
|
+
scenarioId?: string;
|
|
413
|
+
sessionId: string;
|
|
414
|
+
}) => Promise<VoiceLexiconEntry[] | void> | VoiceLexiconEntry[] | void;
|
|
176
415
|
export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
|
|
177
416
|
context: TContext;
|
|
178
417
|
session: TSession;
|
|
@@ -181,11 +420,26 @@ export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceS
|
|
|
181
420
|
}) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void;
|
|
182
421
|
export type VoiceOnTurnHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceOnTurnObjectHandler<TContext, TSession, TResult> | ((session: TSession, turn: VoiceTurnRecord, api: VoiceSessionHandle<TContext, TSession, TResult>, context: TContext) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void);
|
|
183
422
|
export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
423
|
+
onCallStart?: (input: {
|
|
424
|
+
context: TContext;
|
|
425
|
+
session: TSession;
|
|
426
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
427
|
+
}) => Promise<void> | void;
|
|
428
|
+
onCallEnd?: (input: {
|
|
429
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
430
|
+
context: TContext;
|
|
431
|
+
disposition: VoiceCallDisposition;
|
|
432
|
+
metadata?: Record<string, unknown>;
|
|
433
|
+
reason?: string;
|
|
434
|
+
session: TSession;
|
|
435
|
+
target?: string;
|
|
436
|
+
}) => Promise<void> | void;
|
|
184
437
|
onSession?: (input: {
|
|
185
438
|
context: TContext;
|
|
186
439
|
session: TSession;
|
|
187
440
|
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
188
441
|
}) => Promise<void> | void;
|
|
442
|
+
correctTurn?: VoiceTurnCorrectionHandler<TContext, TSession, TResult>;
|
|
189
443
|
onTurn: VoiceOnTurnHandler<TContext, TSession, TResult>;
|
|
190
444
|
onComplete: (input: {
|
|
191
445
|
context: TContext;
|
|
@@ -199,34 +453,111 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
|
|
|
199
453
|
error: unknown;
|
|
200
454
|
api?: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
201
455
|
}) => Promise<void> | void;
|
|
456
|
+
onEscalation?: (input: {
|
|
457
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
458
|
+
context: TContext;
|
|
459
|
+
metadata?: Record<string, unknown>;
|
|
460
|
+
reason: string;
|
|
461
|
+
session: TSession;
|
|
462
|
+
}) => Promise<void> | void;
|
|
463
|
+
onNoAnswer?: (input: {
|
|
464
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
465
|
+
context: TContext;
|
|
466
|
+
metadata?: Record<string, unknown>;
|
|
467
|
+
session: TSession;
|
|
468
|
+
}) => Promise<void> | void;
|
|
469
|
+
onTransfer?: (input: {
|
|
470
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
471
|
+
context: TContext;
|
|
472
|
+
metadata?: Record<string, unknown>;
|
|
473
|
+
reason?: string;
|
|
474
|
+
session: TSession;
|
|
475
|
+
target: string;
|
|
476
|
+
}) => Promise<void> | void;
|
|
477
|
+
onVoicemail?: (input: {
|
|
478
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
479
|
+
context: TContext;
|
|
480
|
+
metadata?: Record<string, unknown>;
|
|
481
|
+
session: TSession;
|
|
482
|
+
}) => Promise<void> | void;
|
|
483
|
+
};
|
|
484
|
+
export type VoiceRuntimeOpsConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
485
|
+
buildReview?: (input: {
|
|
486
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
487
|
+
context: TContext;
|
|
488
|
+
disposition: VoiceCallDisposition;
|
|
489
|
+
metadata?: Record<string, unknown>;
|
|
490
|
+
reason?: string;
|
|
491
|
+
result?: TResult;
|
|
492
|
+
session: TSession;
|
|
493
|
+
target?: string;
|
|
494
|
+
}) => Promise<VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void> | VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void;
|
|
495
|
+
createTaskFromReview?: (input: {
|
|
496
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
497
|
+
context: TContext;
|
|
498
|
+
disposition: VoiceCallDisposition;
|
|
499
|
+
review: StoredVoiceCallReviewArtifact;
|
|
500
|
+
session: TSession;
|
|
501
|
+
}) => Promise<Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void> | Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void;
|
|
502
|
+
events?: VoiceIntegrationEventStore;
|
|
503
|
+
onEvent?: (input: {
|
|
504
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
505
|
+
context: TContext;
|
|
506
|
+
event: StoredVoiceIntegrationEvent;
|
|
507
|
+
session: TSession;
|
|
508
|
+
}) => Promise<void> | void;
|
|
509
|
+
reviews?: VoiceCallReviewStore;
|
|
510
|
+
tasks?: VoiceOpsTaskStore;
|
|
202
511
|
};
|
|
203
512
|
export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
|
|
204
513
|
onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
|
|
205
514
|
};
|
|
515
|
+
export type VoiceScenario = {
|
|
516
|
+
id: string;
|
|
517
|
+
name?: string;
|
|
518
|
+
description?: string;
|
|
519
|
+
metadata?: Record<string, unknown>;
|
|
520
|
+
};
|
|
521
|
+
export type VoiceExpectedSpeakerTurn = {
|
|
522
|
+
speaker: string;
|
|
523
|
+
text: string;
|
|
524
|
+
};
|
|
206
525
|
export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
526
|
+
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
207
527
|
path: string;
|
|
528
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
529
|
+
lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
|
|
530
|
+
phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
|
|
531
|
+
preset?: VoiceRuntimePreset;
|
|
208
532
|
stt: STTAdapter;
|
|
533
|
+
sttFallback?: VoiceSTTFallbackConfig;
|
|
534
|
+
sttLifecycle?: VoiceSTTLifecycle;
|
|
209
535
|
tts?: TTSAdapter;
|
|
210
536
|
session: VoiceSessionStore<NoInfer<TSession>>;
|
|
211
537
|
reconnect?: VoiceReconnectConfig;
|
|
212
|
-
turnDetection?:
|
|
213
|
-
|
|
214
|
-
speechThreshold?: number;
|
|
215
|
-
};
|
|
538
|
+
turnDetection?: VoiceTurnDetectionConfig;
|
|
539
|
+
audioConditioning?: VoiceAudioConditioningConfig;
|
|
216
540
|
logger?: VoiceLogger;
|
|
217
541
|
htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
|
|
542
|
+
ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
|
|
218
543
|
} & VoiceRouteConfig<TContext, TSession, TResult>;
|
|
219
544
|
export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
545
|
+
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
220
546
|
id: string;
|
|
221
547
|
context: TContext;
|
|
222
548
|
socket: VoiceSocket;
|
|
223
549
|
stt: STTAdapter;
|
|
550
|
+
tts?: TTSAdapter;
|
|
551
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
552
|
+
lexicon?: VoiceLexiconEntry[];
|
|
553
|
+
sttFallback?: VoiceResolvedSTTFallbackConfig;
|
|
224
554
|
store: VoiceSessionStore<TSession>;
|
|
225
555
|
reconnect: Required<VoiceReconnectConfig>;
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
556
|
+
phraseHints?: VoicePhraseHint[];
|
|
557
|
+
scenarioId?: string;
|
|
558
|
+
sttLifecycle: VoiceSTTLifecycle;
|
|
559
|
+
turnDetection: VoiceResolvedTurnDetectionConfig;
|
|
560
|
+
audioConditioning?: VoiceResolvedAudioConditioningConfig;
|
|
230
561
|
route: VoiceNormalizedRouteConfig<TContext, TSession, TResult>;
|
|
231
562
|
logger?: VoiceLogger;
|
|
232
563
|
};
|
|
@@ -234,6 +565,7 @@ export type CreateVoiceSession = <TContext = unknown, TSession extends VoiceSess
|
|
|
234
565
|
export type VoiceClientStartMessage = {
|
|
235
566
|
type: 'start';
|
|
236
567
|
sessionId?: string;
|
|
568
|
+
scenarioId?: string;
|
|
237
569
|
};
|
|
238
570
|
export type VoiceClientEndTurnMessage = {
|
|
239
571
|
type: 'end_turn';
|
|
@@ -250,6 +582,7 @@ export type VoiceServerSessionMessage = {
|
|
|
250
582
|
type: 'session';
|
|
251
583
|
sessionId: string;
|
|
252
584
|
status: VoiceSessionStatus;
|
|
585
|
+
scenarioId?: string;
|
|
253
586
|
};
|
|
254
587
|
export type VoiceServerPartialMessage = {
|
|
255
588
|
type: 'partial';
|
|
@@ -268,6 +601,13 @@ export type VoiceServerAssistantMessage = {
|
|
|
268
601
|
text: string;
|
|
269
602
|
turnId?: string;
|
|
270
603
|
};
|
|
604
|
+
export type VoiceServerAudioMessage = {
|
|
605
|
+
type: 'audio';
|
|
606
|
+
chunkBase64: string;
|
|
607
|
+
format: AudioFormat;
|
|
608
|
+
receivedAt: number;
|
|
609
|
+
turnId?: string;
|
|
610
|
+
};
|
|
271
611
|
export type VoiceServerCompleteMessage = {
|
|
272
612
|
type: 'complete';
|
|
273
613
|
sessionId: string;
|
|
@@ -280,17 +620,54 @@ export type VoiceServerErrorMessage = {
|
|
|
280
620
|
export type VoiceServerPongMessage = {
|
|
281
621
|
type: 'pong';
|
|
282
622
|
};
|
|
283
|
-
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
|
|
623
|
+
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
|
|
284
624
|
export type VoiceConnectionOptions = {
|
|
285
625
|
protocols?: string[];
|
|
626
|
+
scenarioId?: string;
|
|
286
627
|
reconnect?: boolean;
|
|
287
628
|
maxReconnectAttempts?: number;
|
|
288
629
|
pingInterval?: number;
|
|
289
630
|
sessionId?: string;
|
|
290
631
|
};
|
|
632
|
+
export type VoiceCaptureOptions = {
|
|
633
|
+
channelCount?: 1 | 2;
|
|
634
|
+
onLevel?: (level: number) => void;
|
|
635
|
+
sampleRateHz?: number;
|
|
636
|
+
};
|
|
637
|
+
export type VoiceControllerOptions = {
|
|
638
|
+
preset?: VoiceRuntimePreset;
|
|
639
|
+
connection?: VoiceConnectionOptions;
|
|
640
|
+
capture?: VoiceCaptureOptions;
|
|
641
|
+
autoStopOnComplete?: boolean;
|
|
642
|
+
};
|
|
643
|
+
export type VoiceBargeInOptions = {
|
|
644
|
+
enabled?: boolean;
|
|
645
|
+
interruptOnPartial?: boolean;
|
|
646
|
+
interruptThreshold?: number;
|
|
647
|
+
};
|
|
648
|
+
export type VoiceAudioPlayerOptions = {
|
|
649
|
+
autoStart?: boolean;
|
|
650
|
+
createAudioContext?: () => AudioContext;
|
|
651
|
+
lookaheadMs?: number;
|
|
652
|
+
};
|
|
653
|
+
export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
|
|
654
|
+
audioPlayer?: VoiceAudioPlayerOptions;
|
|
655
|
+
bargeIn?: VoiceBargeInOptions;
|
|
656
|
+
};
|
|
657
|
+
export type VoiceSTTRoutingGoal = 'best' | 'low-cost';
|
|
658
|
+
export type VoiceSTTRoutingCorrectionMode = 'generic' | 'none' | 'risky-turn';
|
|
659
|
+
export type VoiceSTTRoutingStrategy = {
|
|
660
|
+
benchmarkSessionTarget: 'deepgram-corrected' | 'deepgram-flux';
|
|
661
|
+
correctionMode: VoiceSTTRoutingCorrectionMode;
|
|
662
|
+
goal: VoiceSTTRoutingGoal;
|
|
663
|
+
notes: string[];
|
|
664
|
+
preset: VoiceRuntimePreset;
|
|
665
|
+
sttLifecycle: VoiceSTTLifecycle;
|
|
666
|
+
};
|
|
291
667
|
export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
|
|
292
668
|
assistantTexts: string[];
|
|
293
669
|
partial: string;
|
|
670
|
+
scenarioId?: string;
|
|
294
671
|
result?: TResult;
|
|
295
672
|
session?: TSession;
|
|
296
673
|
sessionId?: string;
|
|
@@ -322,15 +699,26 @@ export type VoiceHTMXOptions<TSession extends VoiceSessionRecord = VoiceSessionR
|
|
|
322
699
|
export type VoiceHTMXConfig<TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceHTMXRenderer<TSession, TResult> | VoiceHTMXOptions<TSession, TResult>;
|
|
323
700
|
export type VoiceStreamState<TResult = unknown> = {
|
|
324
701
|
sessionId: string | null;
|
|
702
|
+
scenarioId: string | null;
|
|
325
703
|
status: VoiceSessionStatus | 'idle';
|
|
326
704
|
partial: string;
|
|
327
705
|
turns: VoiceTurnRecord<TResult>[];
|
|
328
706
|
assistantTexts: string[];
|
|
707
|
+
assistantAudio: Array<{
|
|
708
|
+
chunk: Uint8Array;
|
|
709
|
+
format: AudioFormat;
|
|
710
|
+
receivedAt: number;
|
|
711
|
+
turnId?: string;
|
|
712
|
+
}>;
|
|
329
713
|
error: string | null;
|
|
330
714
|
isConnected: boolean;
|
|
331
715
|
};
|
|
332
716
|
export type VoiceStream<TResult = unknown> = {
|
|
333
717
|
close: () => void;
|
|
718
|
+
start: (input?: {
|
|
719
|
+
scenarioId?: string;
|
|
720
|
+
sessionId?: string;
|
|
721
|
+
}) => Promise<void>;
|
|
334
722
|
endTurn: () => void;
|
|
335
723
|
error: string | null;
|
|
336
724
|
getServerSnapshot: () => VoiceStreamState<TResult>;
|
|
@@ -339,10 +727,92 @@ export type VoiceStream<TResult = unknown> = {
|
|
|
339
727
|
partial: string;
|
|
340
728
|
sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
|
|
341
729
|
sessionId: string | null;
|
|
730
|
+
scenarioId: string | null;
|
|
731
|
+
status: VoiceSessionStatus | 'idle';
|
|
732
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
733
|
+
turns: VoiceTurnRecord<TResult>[];
|
|
734
|
+
assistantTexts: string[];
|
|
735
|
+
assistantAudio: Array<{
|
|
736
|
+
chunk: Uint8Array;
|
|
737
|
+
format: AudioFormat;
|
|
738
|
+
receivedAt: number;
|
|
739
|
+
turnId?: string;
|
|
740
|
+
}>;
|
|
741
|
+
};
|
|
742
|
+
export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
|
|
743
|
+
isRecording: boolean;
|
|
744
|
+
recordingError: string | null;
|
|
745
|
+
};
|
|
746
|
+
export type VoiceAudioPlayerState = {
|
|
747
|
+
activeSourceCount: number;
|
|
748
|
+
error: string | null;
|
|
749
|
+
isActive: boolean;
|
|
750
|
+
isPlaying: boolean;
|
|
751
|
+
lastInterruptLatencyMs?: number;
|
|
752
|
+
lastPlaybackStopLatencyMs?: number;
|
|
753
|
+
processedChunkCount: number;
|
|
754
|
+
queuedChunkCount: number;
|
|
755
|
+
};
|
|
756
|
+
export type VoiceAudioPlayerSource = {
|
|
757
|
+
assistantAudio: VoiceStreamState['assistantAudio'];
|
|
758
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
759
|
+
};
|
|
760
|
+
export type VoiceAudioPlayer = {
|
|
761
|
+
close: () => Promise<void>;
|
|
762
|
+
error: string | null;
|
|
763
|
+
getSnapshot: () => VoiceAudioPlayerState;
|
|
764
|
+
activeSourceCount: number;
|
|
765
|
+
isActive: boolean;
|
|
766
|
+
isPlaying: boolean;
|
|
767
|
+
interrupt: () => Promise<void>;
|
|
768
|
+
lastInterruptLatencyMs?: number;
|
|
769
|
+
lastPlaybackStopLatencyMs?: number;
|
|
770
|
+
pause: () => Promise<void>;
|
|
771
|
+
processedChunkCount: number;
|
|
772
|
+
queuedChunkCount: number;
|
|
773
|
+
start: () => Promise<void>;
|
|
774
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
775
|
+
};
|
|
776
|
+
export type VoiceBargeInBinding = {
|
|
777
|
+
close: () => void;
|
|
778
|
+
handleLevel: (level: number) => void;
|
|
779
|
+
sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
|
|
780
|
+
};
|
|
781
|
+
export type VoiceController<TResult = unknown> = {
|
|
782
|
+
bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
|
|
783
|
+
close: () => void;
|
|
784
|
+
endTurn: () => void;
|
|
785
|
+
start: (input?: {
|
|
786
|
+
scenarioId?: string;
|
|
787
|
+
sessionId?: string;
|
|
788
|
+
}) => Promise<void>;
|
|
789
|
+
error: string | null;
|
|
790
|
+
getServerSnapshot: () => VoiceControllerState<TResult>;
|
|
791
|
+
getSnapshot: () => VoiceControllerState<TResult>;
|
|
792
|
+
isConnected: boolean;
|
|
793
|
+
isRecording: boolean;
|
|
794
|
+
partial: string;
|
|
795
|
+
recordingError: string | null;
|
|
796
|
+
sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
|
|
797
|
+
sessionId: string | null;
|
|
798
|
+
scenarioId: string | null;
|
|
799
|
+
startRecording: () => Promise<void>;
|
|
342
800
|
status: VoiceSessionStatus | 'idle';
|
|
801
|
+
stopRecording: () => void;
|
|
343
802
|
subscribe: (subscriber: () => void) => () => void;
|
|
803
|
+
toggleRecording: () => Promise<void>;
|
|
344
804
|
turns: VoiceTurnRecord<TResult>[];
|
|
345
805
|
assistantTexts: string[];
|
|
806
|
+
assistantAudio: Array<{
|
|
807
|
+
chunk: Uint8Array;
|
|
808
|
+
format: AudioFormat;
|
|
809
|
+
receivedAt: number;
|
|
810
|
+
turnId?: string;
|
|
811
|
+
}>;
|
|
812
|
+
};
|
|
813
|
+
export type VoiceDuplexController<TResult = unknown> = VoiceController<TResult> & {
|
|
814
|
+
audioPlayer: VoiceAudioPlayer;
|
|
815
|
+
interruptAssistant: () => Promise<void>;
|
|
346
816
|
};
|
|
347
817
|
export type VoiceHTMXBindingOptions = {
|
|
348
818
|
element: Element | string;
|
|
@@ -353,6 +823,7 @@ export type VoiceHTMXBindingOptions = {
|
|
|
353
823
|
export type VoiceStoreAction<TResult = unknown> = {
|
|
354
824
|
type: 'session';
|
|
355
825
|
sessionId: string;
|
|
826
|
+
scenarioId?: string;
|
|
356
827
|
status: VoiceSessionStatus;
|
|
357
828
|
} | {
|
|
358
829
|
type: 'partial';
|
|
@@ -366,6 +837,12 @@ export type VoiceStoreAction<TResult = unknown> = {
|
|
|
366
837
|
} | {
|
|
367
838
|
type: 'assistant';
|
|
368
839
|
text: string;
|
|
840
|
+
} | {
|
|
841
|
+
type: 'audio';
|
|
842
|
+
chunk: Uint8Array;
|
|
843
|
+
format: AudioFormat;
|
|
844
|
+
receivedAt: number;
|
|
845
|
+
turnId?: string;
|
|
369
846
|
} | {
|
|
370
847
|
type: 'complete';
|
|
371
848
|
sessionId: string;
|