@absolutejs/voice 0.0.21 → 0.0.22-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1046 -2
- package/dist/agent.d.ts +113 -0
- package/dist/angular/index.js +90 -0
- package/dist/angular/voice-controller.service.d.ts +6 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/client/actions.d.ts +41 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +84 -0
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.js +507 -5
- package/dist/correction.d.ts +18 -1
- package/dist/fileStore.d.ts +37 -0
- package/dist/index.d.ts +32 -1
- package/dist/index.js +8379 -1245
- package/dist/ops.d.ts +327 -0
- package/dist/opsPresets.d.ts +19 -0
- package/dist/opsRuntime.d.ts +66 -0
- package/dist/opsSinks.d.ts +149 -0
- package/dist/outcomeRecipes.d.ts +18 -0
- package/dist/postgresStore.d.ts +31 -0
- package/dist/queue.d.ts +276 -0
- package/dist/react/index.js +86 -0
- package/dist/react/useVoiceController.d.ts +6 -0
- package/dist/react/useVoiceStream.d.ts +6 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/s3Store.d.ts +14 -0
- package/dist/sqliteStore.d.ts +26 -0
- package/dist/svelte/index.js +84 -0
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +59 -4
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +5094 -284
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +25 -0
- package/dist/testing/stt.d.ts +2 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/trace.d.ts +236 -0
- package/dist/types.d.ts +320 -3
- package/dist/vue/index.js +90 -0
- package/dist/vue/useVoiceController.d.ts +11 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/package.json +115 -1
package/dist/types.d.ts
CHANGED
|
@@ -1,23 +1,56 @@
|
|
|
1
1
|
import type { SessionStore } from '@absolutejs/absolute';
|
|
2
|
+
import type { VoiceOpsDispositionTaskPolicies, VoiceOpsTaskAssignmentRule, VoiceOpsTaskAssignmentRules, VoiceIntegrationWebhookConfig, StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTaskPolicy, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
|
|
3
|
+
import type { VoiceIntegrationSink } from './opsSinks';
|
|
4
|
+
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
|
|
5
|
+
import type { VoiceTraceEventStore } from './trace';
|
|
2
6
|
export type AudioFormat = {
|
|
3
7
|
container: 'raw';
|
|
4
|
-
encoding: 'pcm_s16le';
|
|
8
|
+
encoding: 'alaw' | 'mulaw' | 'pcm_s16le';
|
|
5
9
|
sampleRateHz: number;
|
|
6
10
|
channels: 1 | 2;
|
|
7
11
|
};
|
|
8
12
|
export type AudioChunk = ArrayBuffer | ArrayBufferView;
|
|
13
|
+
export type VoiceLanguageStrategy = {
|
|
14
|
+
mode: 'auto-detect';
|
|
15
|
+
allowedLanguages?: string[];
|
|
16
|
+
} | {
|
|
17
|
+
mode: 'fixed';
|
|
18
|
+
primaryLanguage: string;
|
|
19
|
+
secondaryLanguages?: string[];
|
|
20
|
+
} | {
|
|
21
|
+
mode: 'allow-switching';
|
|
22
|
+
primaryLanguage?: string;
|
|
23
|
+
secondaryLanguages: string[];
|
|
24
|
+
};
|
|
9
25
|
export type VoicePhraseHint = {
|
|
10
26
|
text: string;
|
|
11
27
|
aliases?: string[];
|
|
12
28
|
boost?: number;
|
|
13
29
|
metadata?: Record<string, unknown>;
|
|
14
30
|
};
|
|
31
|
+
export type VoiceCorrectionRiskTier = 'safe' | 'balanced' | 'risky';
|
|
32
|
+
export type VoiceDomainTerm = {
|
|
33
|
+
text: string;
|
|
34
|
+
aliases?: string[];
|
|
35
|
+
boost?: number;
|
|
36
|
+
language?: string;
|
|
37
|
+
metadata?: Record<string, unknown>;
|
|
38
|
+
pronunciation?: string;
|
|
39
|
+
};
|
|
40
|
+
export type VoiceLexiconEntry = {
|
|
41
|
+
text: string;
|
|
42
|
+
aliases?: string[];
|
|
43
|
+
language?: string;
|
|
44
|
+
metadata?: Record<string, unknown>;
|
|
45
|
+
pronunciation?: string;
|
|
46
|
+
};
|
|
15
47
|
export type Transcript = {
|
|
16
48
|
id: string;
|
|
17
49
|
text: string;
|
|
18
50
|
isFinal: boolean;
|
|
19
51
|
confidence?: number;
|
|
20
52
|
language?: string;
|
|
53
|
+
speaker?: string | number;
|
|
21
54
|
startedAtMs?: number;
|
|
22
55
|
endedAtMs?: number;
|
|
23
56
|
vendor?: string;
|
|
@@ -26,6 +59,7 @@ export type VoiceTranscriptQuality = {
|
|
|
26
59
|
averageConfidence?: number;
|
|
27
60
|
confidenceSampleCount: number;
|
|
28
61
|
correction?: VoiceTurnCorrectionDiagnostics;
|
|
62
|
+
cost?: VoiceTurnCostEstimate;
|
|
29
63
|
fallbackUsed: boolean;
|
|
30
64
|
finalTranscriptCount: number;
|
|
31
65
|
fallback?: VoiceFallbackDiagnostics;
|
|
@@ -42,6 +76,13 @@ export type VoiceTurnCorrectionDiagnostics = {
|
|
|
42
76
|
provider?: string;
|
|
43
77
|
reason?: string;
|
|
44
78
|
};
|
|
79
|
+
export type VoiceTurnCostEstimate = {
|
|
80
|
+
estimatedRelativeCostUnits: number;
|
|
81
|
+
fallbackAttemptCount: number;
|
|
82
|
+
fallbackReplayAudioMs: number;
|
|
83
|
+
primaryAudioMs: number;
|
|
84
|
+
totalBillableAudioMs: number;
|
|
85
|
+
};
|
|
45
86
|
export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
|
|
46
87
|
export type VoiceFallbackDiagnostics = {
|
|
47
88
|
attempted: boolean;
|
|
@@ -97,6 +138,8 @@ export type STTAdapterSession = {
|
|
|
97
138
|
export type STTAdapterOpenOptions = {
|
|
98
139
|
sessionId: string;
|
|
99
140
|
format: AudioFormat;
|
|
141
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
142
|
+
lexicon?: VoiceLexiconEntry[];
|
|
100
143
|
phraseHints?: VoicePhraseHint[];
|
|
101
144
|
signal?: AbortSignal;
|
|
102
145
|
};
|
|
@@ -122,6 +165,7 @@ export type TTSAdapterSession = {
|
|
|
122
165
|
};
|
|
123
166
|
export type TTSAdapterOpenOptions = {
|
|
124
167
|
sessionId: string;
|
|
168
|
+
lexicon?: VoiceLexiconEntry[];
|
|
125
169
|
signal?: AbortSignal;
|
|
126
170
|
};
|
|
127
171
|
export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
|
|
@@ -139,6 +183,8 @@ export type RealtimeAdapterSession = {
|
|
|
139
183
|
export type RealtimeAdapterOpenOptions = {
|
|
140
184
|
sessionId: string;
|
|
141
185
|
format: AudioFormat;
|
|
186
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
187
|
+
lexicon?: VoiceLexiconEntry[];
|
|
142
188
|
phraseHints?: VoicePhraseHint[];
|
|
143
189
|
signal?: AbortSignal;
|
|
144
190
|
};
|
|
@@ -156,6 +202,17 @@ export type VoiceTurnRecord<TResult = unknown> = {
|
|
|
156
202
|
committedAt: number;
|
|
157
203
|
result?: TResult;
|
|
158
204
|
};
|
|
205
|
+
export type VoiceCostTelemetryConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
206
|
+
fallbackPassCostUnit?: number;
|
|
207
|
+
onTurnCost?: (input: {
|
|
208
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
209
|
+
context: TContext;
|
|
210
|
+
estimate: VoiceTurnCostEstimate;
|
|
211
|
+
session: TSession;
|
|
212
|
+
turn: VoiceTurnRecord<TResult>;
|
|
213
|
+
}) => Promise<void> | void;
|
|
214
|
+
primaryPassCostUnit?: number;
|
|
215
|
+
};
|
|
159
216
|
export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown> = {
|
|
160
217
|
id: string;
|
|
161
218
|
createdAt: number;
|
|
@@ -185,6 +242,7 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
|
|
|
185
242
|
transcriptIds: string[];
|
|
186
243
|
committedAt: number;
|
|
187
244
|
};
|
|
245
|
+
call?: VoiceCallLifecycleState;
|
|
188
246
|
metadata?: TMeta;
|
|
189
247
|
scenarioId?: string;
|
|
190
248
|
};
|
|
@@ -195,6 +253,22 @@ export type VoiceSessionSummary = {
|
|
|
195
253
|
status: VoiceSessionStatus;
|
|
196
254
|
turnCount: number;
|
|
197
255
|
};
|
|
256
|
+
export type VoiceCallDisposition = 'completed' | 'transferred' | 'escalated' | 'voicemail' | 'no-answer' | 'failed' | 'closed';
|
|
257
|
+
export type VoiceCallLifecycleEvent = {
|
|
258
|
+
at: number;
|
|
259
|
+
type: 'start' | 'end' | 'transfer' | 'escalation' | 'voicemail' | 'no-answer';
|
|
260
|
+
disposition?: VoiceCallDisposition;
|
|
261
|
+
metadata?: Record<string, unknown>;
|
|
262
|
+
reason?: string;
|
|
263
|
+
target?: string;
|
|
264
|
+
};
|
|
265
|
+
export type VoiceCallLifecycleState = {
|
|
266
|
+
disposition?: VoiceCallDisposition;
|
|
267
|
+
endedAt?: number;
|
|
268
|
+
events: VoiceCallLifecycleEvent[];
|
|
269
|
+
lastEventAt: number;
|
|
270
|
+
startedAt: number;
|
|
271
|
+
};
|
|
198
272
|
export type VoiceSessionStore<TSession extends VoiceSessionRecord = VoiceSessionRecord> = SessionStore<TSession, VoiceSessionSummary>;
|
|
199
273
|
export type VoiceLogger = {
|
|
200
274
|
debug?: (message: string, meta?: Record<string, unknown>) => void;
|
|
@@ -207,7 +281,7 @@ export type VoiceReconnectConfig = {
|
|
|
207
281
|
timeout?: number;
|
|
208
282
|
maxAttempts?: number;
|
|
209
283
|
};
|
|
210
|
-
export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'reliability';
|
|
284
|
+
export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'pstn-balanced' | 'pstn-fast' | 'reliability';
|
|
211
285
|
export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
|
|
212
286
|
export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
|
|
213
287
|
export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
|
|
@@ -271,7 +345,26 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
|
|
|
271
345
|
commitTurn: (reason?: VoiceEndOfTurnEvent['reason']) => Promise<void>;
|
|
272
346
|
disconnect: (event?: VoiceCloseEvent) => Promise<void>;
|
|
273
347
|
complete: (result?: TResult) => Promise<void>;
|
|
348
|
+
escalate: (input: {
|
|
349
|
+
metadata?: Record<string, unknown>;
|
|
350
|
+
reason: string;
|
|
351
|
+
result?: TResult;
|
|
352
|
+
}) => Promise<void>;
|
|
274
353
|
fail: (error: unknown) => Promise<void>;
|
|
354
|
+
markNoAnswer: (input?: {
|
|
355
|
+
metadata?: Record<string, unknown>;
|
|
356
|
+
result?: TResult;
|
|
357
|
+
}) => Promise<void>;
|
|
358
|
+
markVoicemail: (input?: {
|
|
359
|
+
metadata?: Record<string, unknown>;
|
|
360
|
+
result?: TResult;
|
|
361
|
+
}) => Promise<void>;
|
|
362
|
+
transfer: (input: {
|
|
363
|
+
metadata?: Record<string, unknown>;
|
|
364
|
+
reason?: string;
|
|
365
|
+
result?: TResult;
|
|
366
|
+
target: string;
|
|
367
|
+
}) => Promise<void>;
|
|
275
368
|
close: (reason?: string) => Promise<void>;
|
|
276
369
|
snapshot: () => Promise<TSession>;
|
|
277
370
|
};
|
|
@@ -279,6 +372,21 @@ export type VoiceRouteResult<TResult = unknown> = {
|
|
|
279
372
|
complete?: boolean;
|
|
280
373
|
result?: TResult;
|
|
281
374
|
assistantText?: string;
|
|
375
|
+
transfer?: {
|
|
376
|
+
metadata?: Record<string, unknown>;
|
|
377
|
+
reason?: string;
|
|
378
|
+
target: string;
|
|
379
|
+
};
|
|
380
|
+
escalate?: {
|
|
381
|
+
metadata?: Record<string, unknown>;
|
|
382
|
+
reason: string;
|
|
383
|
+
};
|
|
384
|
+
voicemail?: {
|
|
385
|
+
metadata?: Record<string, unknown>;
|
|
386
|
+
};
|
|
387
|
+
noAnswer?: {
|
|
388
|
+
metadata?: Record<string, unknown>;
|
|
389
|
+
};
|
|
282
390
|
};
|
|
283
391
|
export type VoiceTurnCorrectionResult = string | {
|
|
284
392
|
text: string;
|
|
@@ -290,6 +398,7 @@ export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends Voic
|
|
|
290
398
|
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
291
399
|
context: TContext;
|
|
292
400
|
fallback?: VoiceFallbackDiagnostics;
|
|
401
|
+
lexicon: VoiceLexiconEntry[];
|
|
293
402
|
phraseHints: VoicePhraseHint[];
|
|
294
403
|
session: TSession;
|
|
295
404
|
text: string;
|
|
@@ -300,6 +409,11 @@ export type VoicePhraseHintResolver<TContext = unknown> = (input: {
|
|
|
300
409
|
scenarioId?: string;
|
|
301
410
|
sessionId: string;
|
|
302
411
|
}) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
|
|
412
|
+
export type VoiceLexiconResolver<TContext = unknown> = (input: {
|
|
413
|
+
context: TContext;
|
|
414
|
+
scenarioId?: string;
|
|
415
|
+
sessionId: string;
|
|
416
|
+
}) => Promise<VoiceLexiconEntry[] | void> | VoiceLexiconEntry[] | void;
|
|
303
417
|
export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
|
|
304
418
|
context: TContext;
|
|
305
419
|
session: TSession;
|
|
@@ -308,6 +422,20 @@ export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceS
|
|
|
308
422
|
}) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void;
|
|
309
423
|
export type VoiceOnTurnHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceOnTurnObjectHandler<TContext, TSession, TResult> | ((session: TSession, turn: VoiceTurnRecord, api: VoiceSessionHandle<TContext, TSession, TResult>, context: TContext) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void);
|
|
310
424
|
export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
425
|
+
onCallStart?: (input: {
|
|
426
|
+
context: TContext;
|
|
427
|
+
session: TSession;
|
|
428
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
429
|
+
}) => Promise<void> | void;
|
|
430
|
+
onCallEnd?: (input: {
|
|
431
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
432
|
+
context: TContext;
|
|
433
|
+
disposition: VoiceCallDisposition;
|
|
434
|
+
metadata?: Record<string, unknown>;
|
|
435
|
+
reason?: string;
|
|
436
|
+
session: TSession;
|
|
437
|
+
target?: string;
|
|
438
|
+
}) => Promise<void> | void;
|
|
311
439
|
onSession?: (input: {
|
|
312
440
|
context: TContext;
|
|
313
441
|
session: TSession;
|
|
@@ -327,6 +455,87 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
|
|
|
327
455
|
error: unknown;
|
|
328
456
|
api?: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
329
457
|
}) => Promise<void> | void;
|
|
458
|
+
onEscalation?: (input: {
|
|
459
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
460
|
+
context: TContext;
|
|
461
|
+
metadata?: Record<string, unknown>;
|
|
462
|
+
reason: string;
|
|
463
|
+
session: TSession;
|
|
464
|
+
}) => Promise<void> | void;
|
|
465
|
+
onNoAnswer?: (input: {
|
|
466
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
467
|
+
context: TContext;
|
|
468
|
+
metadata?: Record<string, unknown>;
|
|
469
|
+
session: TSession;
|
|
470
|
+
}) => Promise<void> | void;
|
|
471
|
+
onTransfer?: (input: {
|
|
472
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
473
|
+
context: TContext;
|
|
474
|
+
metadata?: Record<string, unknown>;
|
|
475
|
+
reason?: string;
|
|
476
|
+
session: TSession;
|
|
477
|
+
target: string;
|
|
478
|
+
}) => Promise<void> | void;
|
|
479
|
+
onVoicemail?: (input: {
|
|
480
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
481
|
+
context: TContext;
|
|
482
|
+
metadata?: Record<string, unknown>;
|
|
483
|
+
session: TSession;
|
|
484
|
+
}) => Promise<void> | void;
|
|
485
|
+
};
|
|
486
|
+
export type VoiceRuntimeOpsConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
487
|
+
buildReview?: (input: {
|
|
488
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
489
|
+
context: TContext;
|
|
490
|
+
disposition: VoiceCallDisposition;
|
|
491
|
+
metadata?: Record<string, unknown>;
|
|
492
|
+
reason?: string;
|
|
493
|
+
result?: TResult;
|
|
494
|
+
session: TSession;
|
|
495
|
+
target?: string;
|
|
496
|
+
}) => Promise<VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void> | VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void;
|
|
497
|
+
createTaskFromReview?: (input: {
|
|
498
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
499
|
+
context: TContext;
|
|
500
|
+
disposition: VoiceCallDisposition;
|
|
501
|
+
review: StoredVoiceCallReviewArtifact;
|
|
502
|
+
session: TSession;
|
|
503
|
+
}) => Promise<Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void> | Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void;
|
|
504
|
+
resolveTaskPolicy?: (input: {
|
|
505
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
506
|
+
context: TContext;
|
|
507
|
+
disposition: VoiceCallDisposition;
|
|
508
|
+
metadata?: Record<string, unknown>;
|
|
509
|
+
reason?: string;
|
|
510
|
+
review?: StoredVoiceCallReviewArtifact;
|
|
511
|
+
session: TSession;
|
|
512
|
+
target?: string;
|
|
513
|
+
task: StoredVoiceOpsTask;
|
|
514
|
+
}) => Promise<VoiceOpsTaskPolicy | void> | VoiceOpsTaskPolicy | void;
|
|
515
|
+
resolveTaskAssignment?: (input: {
|
|
516
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
517
|
+
context: TContext;
|
|
518
|
+
disposition: VoiceCallDisposition;
|
|
519
|
+
metadata?: Record<string, unknown>;
|
|
520
|
+
reason?: string;
|
|
521
|
+
review?: StoredVoiceCallReviewArtifact;
|
|
522
|
+
session: TSession;
|
|
523
|
+
target?: string;
|
|
524
|
+
task: StoredVoiceOpsTask;
|
|
525
|
+
}) => Promise<VoiceOpsTaskAssignmentRule | void> | VoiceOpsTaskAssignmentRule | void;
|
|
526
|
+
taskAssignmentRules?: VoiceOpsTaskAssignmentRules;
|
|
527
|
+
taskPolicies?: VoiceOpsDispositionTaskPolicies;
|
|
528
|
+
events?: VoiceIntegrationEventStore;
|
|
529
|
+
onEvent?: (input: {
|
|
530
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
531
|
+
context: TContext;
|
|
532
|
+
event: StoredVoiceIntegrationEvent;
|
|
533
|
+
session: TSession;
|
|
534
|
+
}) => Promise<void> | void;
|
|
535
|
+
reviews?: VoiceCallReviewStore;
|
|
536
|
+
sinks?: VoiceIntegrationSink[];
|
|
537
|
+
tasks?: VoiceOpsTaskStore;
|
|
538
|
+
webhook?: VoiceIntegrationWebhookConfig;
|
|
330
539
|
};
|
|
331
540
|
export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
|
|
332
541
|
onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
|
|
@@ -337,8 +546,15 @@ export type VoiceScenario = {
|
|
|
337
546
|
description?: string;
|
|
338
547
|
metadata?: Record<string, unknown>;
|
|
339
548
|
};
|
|
549
|
+
export type VoiceExpectedSpeakerTurn = {
|
|
550
|
+
speaker: string;
|
|
551
|
+
text: string;
|
|
552
|
+
};
|
|
340
553
|
export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
554
|
+
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
341
555
|
path: string;
|
|
556
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
557
|
+
lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
|
|
342
558
|
phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
|
|
343
559
|
preset?: VoiceRuntimePreset;
|
|
344
560
|
stt: STTAdapter;
|
|
@@ -351,14 +567,21 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
|
|
|
351
567
|
audioConditioning?: VoiceAudioConditioningConfig;
|
|
352
568
|
logger?: VoiceLogger;
|
|
353
569
|
htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
|
|
570
|
+
ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
|
|
571
|
+
trace?: VoiceTraceEventStore;
|
|
354
572
|
} & VoiceRouteConfig<TContext, TSession, TResult>;
|
|
355
573
|
export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
574
|
+
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
356
575
|
id: string;
|
|
357
576
|
context: TContext;
|
|
358
577
|
socket: VoiceSocket;
|
|
359
578
|
stt: STTAdapter;
|
|
579
|
+
tts?: TTSAdapter;
|
|
580
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
581
|
+
lexicon?: VoiceLexiconEntry[];
|
|
360
582
|
sttFallback?: VoiceResolvedSTTFallbackConfig;
|
|
361
583
|
store: VoiceSessionStore<TSession>;
|
|
584
|
+
trace?: VoiceTraceEventStore;
|
|
362
585
|
reconnect: Required<VoiceReconnectConfig>;
|
|
363
586
|
phraseHints?: VoicePhraseHint[];
|
|
364
587
|
scenarioId?: string;
|
|
@@ -408,6 +631,13 @@ export type VoiceServerAssistantMessage = {
|
|
|
408
631
|
text: string;
|
|
409
632
|
turnId?: string;
|
|
410
633
|
};
|
|
634
|
+
export type VoiceServerAudioMessage = {
|
|
635
|
+
type: 'audio';
|
|
636
|
+
chunkBase64: string;
|
|
637
|
+
format: AudioFormat;
|
|
638
|
+
receivedAt: number;
|
|
639
|
+
turnId?: string;
|
|
640
|
+
};
|
|
411
641
|
export type VoiceServerCompleteMessage = {
|
|
412
642
|
type: 'complete';
|
|
413
643
|
sessionId: string;
|
|
@@ -420,7 +650,7 @@ export type VoiceServerErrorMessage = {
|
|
|
420
650
|
export type VoiceServerPongMessage = {
|
|
421
651
|
type: 'pong';
|
|
422
652
|
};
|
|
423
|
-
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
|
|
653
|
+
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
|
|
424
654
|
export type VoiceConnectionOptions = {
|
|
425
655
|
protocols?: string[];
|
|
426
656
|
scenarioId?: string;
|
|
@@ -440,6 +670,30 @@ export type VoiceControllerOptions = {
|
|
|
440
670
|
capture?: VoiceCaptureOptions;
|
|
441
671
|
autoStopOnComplete?: boolean;
|
|
442
672
|
};
|
|
673
|
+
export type VoiceBargeInOptions = {
|
|
674
|
+
enabled?: boolean;
|
|
675
|
+
interruptOnPartial?: boolean;
|
|
676
|
+
interruptThreshold?: number;
|
|
677
|
+
};
|
|
678
|
+
export type VoiceAudioPlayerOptions = {
|
|
679
|
+
autoStart?: boolean;
|
|
680
|
+
createAudioContext?: () => AudioContext;
|
|
681
|
+
lookaheadMs?: number;
|
|
682
|
+
};
|
|
683
|
+
export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
|
|
684
|
+
audioPlayer?: VoiceAudioPlayerOptions;
|
|
685
|
+
bargeIn?: VoiceBargeInOptions;
|
|
686
|
+
};
|
|
687
|
+
export type VoiceSTTRoutingGoal = 'best' | 'low-cost';
|
|
688
|
+
export type VoiceSTTRoutingCorrectionMode = 'generic' | 'none' | 'risky-turn';
|
|
689
|
+
export type VoiceSTTRoutingStrategy = {
|
|
690
|
+
benchmarkSessionTarget: 'deepgram-corrected' | 'deepgram-flux';
|
|
691
|
+
correctionMode: VoiceSTTRoutingCorrectionMode;
|
|
692
|
+
goal: VoiceSTTRoutingGoal;
|
|
693
|
+
notes: string[];
|
|
694
|
+
preset: VoiceRuntimePreset;
|
|
695
|
+
sttLifecycle: VoiceSTTLifecycle;
|
|
696
|
+
};
|
|
443
697
|
export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
|
|
444
698
|
assistantTexts: string[];
|
|
445
699
|
partial: string;
|
|
@@ -480,6 +734,12 @@ export type VoiceStreamState<TResult = unknown> = {
|
|
|
480
734
|
partial: string;
|
|
481
735
|
turns: VoiceTurnRecord<TResult>[];
|
|
482
736
|
assistantTexts: string[];
|
|
737
|
+
assistantAudio: Array<{
|
|
738
|
+
chunk: Uint8Array;
|
|
739
|
+
format: AudioFormat;
|
|
740
|
+
receivedAt: number;
|
|
741
|
+
turnId?: string;
|
|
742
|
+
}>;
|
|
483
743
|
error: string | null;
|
|
484
744
|
isConnected: boolean;
|
|
485
745
|
};
|
|
@@ -502,11 +762,52 @@ export type VoiceStream<TResult = unknown> = {
|
|
|
502
762
|
subscribe: (subscriber: () => void) => () => void;
|
|
503
763
|
turns: VoiceTurnRecord<TResult>[];
|
|
504
764
|
assistantTexts: string[];
|
|
765
|
+
assistantAudio: Array<{
|
|
766
|
+
chunk: Uint8Array;
|
|
767
|
+
format: AudioFormat;
|
|
768
|
+
receivedAt: number;
|
|
769
|
+
turnId?: string;
|
|
770
|
+
}>;
|
|
505
771
|
};
|
|
506
772
|
export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
|
|
507
773
|
isRecording: boolean;
|
|
508
774
|
recordingError: string | null;
|
|
509
775
|
};
|
|
776
|
+
export type VoiceAudioPlayerState = {
|
|
777
|
+
activeSourceCount: number;
|
|
778
|
+
error: string | null;
|
|
779
|
+
isActive: boolean;
|
|
780
|
+
isPlaying: boolean;
|
|
781
|
+
lastInterruptLatencyMs?: number;
|
|
782
|
+
lastPlaybackStopLatencyMs?: number;
|
|
783
|
+
processedChunkCount: number;
|
|
784
|
+
queuedChunkCount: number;
|
|
785
|
+
};
|
|
786
|
+
export type VoiceAudioPlayerSource = {
|
|
787
|
+
assistantAudio: VoiceStreamState['assistantAudio'];
|
|
788
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
789
|
+
};
|
|
790
|
+
export type VoiceAudioPlayer = {
|
|
791
|
+
close: () => Promise<void>;
|
|
792
|
+
error: string | null;
|
|
793
|
+
getSnapshot: () => VoiceAudioPlayerState;
|
|
794
|
+
activeSourceCount: number;
|
|
795
|
+
isActive: boolean;
|
|
796
|
+
isPlaying: boolean;
|
|
797
|
+
interrupt: () => Promise<void>;
|
|
798
|
+
lastInterruptLatencyMs?: number;
|
|
799
|
+
lastPlaybackStopLatencyMs?: number;
|
|
800
|
+
pause: () => Promise<void>;
|
|
801
|
+
processedChunkCount: number;
|
|
802
|
+
queuedChunkCount: number;
|
|
803
|
+
start: () => Promise<void>;
|
|
804
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
805
|
+
};
|
|
806
|
+
export type VoiceBargeInBinding = {
|
|
807
|
+
close: () => void;
|
|
808
|
+
handleLevel: (level: number) => void;
|
|
809
|
+
sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
|
|
810
|
+
};
|
|
510
811
|
export type VoiceController<TResult = unknown> = {
|
|
511
812
|
bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
|
|
512
813
|
close: () => void;
|
|
@@ -532,6 +833,16 @@ export type VoiceController<TResult = unknown> = {
|
|
|
532
833
|
toggleRecording: () => Promise<void>;
|
|
533
834
|
turns: VoiceTurnRecord<TResult>[];
|
|
534
835
|
assistantTexts: string[];
|
|
836
|
+
assistantAudio: Array<{
|
|
837
|
+
chunk: Uint8Array;
|
|
838
|
+
format: AudioFormat;
|
|
839
|
+
receivedAt: number;
|
|
840
|
+
turnId?: string;
|
|
841
|
+
}>;
|
|
842
|
+
};
|
|
843
|
+
export type VoiceDuplexController<TResult = unknown> = VoiceController<TResult> & {
|
|
844
|
+
audioPlayer: VoiceAudioPlayer;
|
|
845
|
+
interruptAssistant: () => Promise<void>;
|
|
535
846
|
};
|
|
536
847
|
export type VoiceHTMXBindingOptions = {
|
|
537
848
|
element: Element | string;
|
|
@@ -556,6 +867,12 @@ export type VoiceStoreAction<TResult = unknown> = {
|
|
|
556
867
|
} | {
|
|
557
868
|
type: 'assistant';
|
|
558
869
|
text: string;
|
|
870
|
+
} | {
|
|
871
|
+
type: 'audio';
|
|
872
|
+
chunk: Uint8Array;
|
|
873
|
+
format: AudioFormat;
|
|
874
|
+
receivedAt: number;
|
|
875
|
+
turnId?: string;
|
|
559
876
|
} | {
|
|
560
877
|
type: 'complete';
|
|
561
878
|
sessionId: string;
|