@absolutejs/voice 0.0.21 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +499 -2
- package/dist/angular/index.js +90 -0
- package/dist/angular/voice-controller.service.d.ts +6 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/client/actions.d.ts +41 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +84 -0
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.js +507 -5
- package/dist/correction.d.ts +18 -1
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +12 -1
- package/dist/index.js +2425 -33
- package/dist/ops.d.ts +100 -0
- package/dist/react/index.js +86 -0
- package/dist/react/useVoiceController.d.ts +6 -0
- package/dist/react/useVoiceStream.d.ts +6 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/svelte/index.js +84 -0
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +59 -4
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +4940 -307
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +25 -0
- package/dist/testing/stt.d.ts +2 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/types.d.ts +290 -3
- package/dist/vue/index.js +90 -0
- package/dist/vue/useVoiceController.d.ts +11 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/package.json +115 -1
package/dist/types.d.ts
CHANGED
|
@@ -1,23 +1,54 @@
|
|
|
1
1
|
import type { SessionStore } from '@absolutejs/absolute';
|
|
2
|
+
import type { StoredVoiceIntegrationEvent, StoredVoiceOpsTask, VoiceIntegrationEventStore, VoiceOpsTask, VoiceOpsTaskStore } from './ops';
|
|
3
|
+
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact, VoiceCallReviewStore } from './testing/review';
|
|
2
4
|
export type AudioFormat = {
|
|
3
5
|
container: 'raw';
|
|
4
|
-
encoding: 'pcm_s16le';
|
|
6
|
+
encoding: 'alaw' | 'mulaw' | 'pcm_s16le';
|
|
5
7
|
sampleRateHz: number;
|
|
6
8
|
channels: 1 | 2;
|
|
7
9
|
};
|
|
8
10
|
export type AudioChunk = ArrayBuffer | ArrayBufferView;
|
|
11
|
+
export type VoiceLanguageStrategy = {
|
|
12
|
+
mode: 'auto-detect';
|
|
13
|
+
allowedLanguages?: string[];
|
|
14
|
+
} | {
|
|
15
|
+
mode: 'fixed';
|
|
16
|
+
primaryLanguage: string;
|
|
17
|
+
secondaryLanguages?: string[];
|
|
18
|
+
} | {
|
|
19
|
+
mode: 'allow-switching';
|
|
20
|
+
primaryLanguage?: string;
|
|
21
|
+
secondaryLanguages: string[];
|
|
22
|
+
};
|
|
9
23
|
export type VoicePhraseHint = {
|
|
10
24
|
text: string;
|
|
11
25
|
aliases?: string[];
|
|
12
26
|
boost?: number;
|
|
13
27
|
metadata?: Record<string, unknown>;
|
|
14
28
|
};
|
|
29
|
+
export type VoiceCorrectionRiskTier = 'safe' | 'balanced' | 'risky';
|
|
30
|
+
export type VoiceDomainTerm = {
|
|
31
|
+
text: string;
|
|
32
|
+
aliases?: string[];
|
|
33
|
+
boost?: number;
|
|
34
|
+
language?: string;
|
|
35
|
+
metadata?: Record<string, unknown>;
|
|
36
|
+
pronunciation?: string;
|
|
37
|
+
};
|
|
38
|
+
export type VoiceLexiconEntry = {
|
|
39
|
+
text: string;
|
|
40
|
+
aliases?: string[];
|
|
41
|
+
language?: string;
|
|
42
|
+
metadata?: Record<string, unknown>;
|
|
43
|
+
pronunciation?: string;
|
|
44
|
+
};
|
|
15
45
|
export type Transcript = {
|
|
16
46
|
id: string;
|
|
17
47
|
text: string;
|
|
18
48
|
isFinal: boolean;
|
|
19
49
|
confidence?: number;
|
|
20
50
|
language?: string;
|
|
51
|
+
speaker?: string | number;
|
|
21
52
|
startedAtMs?: number;
|
|
22
53
|
endedAtMs?: number;
|
|
23
54
|
vendor?: string;
|
|
@@ -26,6 +57,7 @@ export type VoiceTranscriptQuality = {
|
|
|
26
57
|
averageConfidence?: number;
|
|
27
58
|
confidenceSampleCount: number;
|
|
28
59
|
correction?: VoiceTurnCorrectionDiagnostics;
|
|
60
|
+
cost?: VoiceTurnCostEstimate;
|
|
29
61
|
fallbackUsed: boolean;
|
|
30
62
|
finalTranscriptCount: number;
|
|
31
63
|
fallback?: VoiceFallbackDiagnostics;
|
|
@@ -42,6 +74,13 @@ export type VoiceTurnCorrectionDiagnostics = {
|
|
|
42
74
|
provider?: string;
|
|
43
75
|
reason?: string;
|
|
44
76
|
};
|
|
77
|
+
export type VoiceTurnCostEstimate = {
|
|
78
|
+
estimatedRelativeCostUnits: number;
|
|
79
|
+
fallbackAttemptCount: number;
|
|
80
|
+
fallbackReplayAudioMs: number;
|
|
81
|
+
primaryAudioMs: number;
|
|
82
|
+
totalBillableAudioMs: number;
|
|
83
|
+
};
|
|
45
84
|
export type VoiceFallbackSelectionReason = 'fallback-empty' | 'primary-empty' | 'word-count-margin' | 'confidence-margin' | 'word-count-tiebreak' | 'kept-primary';
|
|
46
85
|
export type VoiceFallbackDiagnostics = {
|
|
47
86
|
attempted: boolean;
|
|
@@ -97,6 +136,8 @@ export type STTAdapterSession = {
|
|
|
97
136
|
export type STTAdapterOpenOptions = {
|
|
98
137
|
sessionId: string;
|
|
99
138
|
format: AudioFormat;
|
|
139
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
140
|
+
lexicon?: VoiceLexiconEntry[];
|
|
100
141
|
phraseHints?: VoicePhraseHint[];
|
|
101
142
|
signal?: AbortSignal;
|
|
102
143
|
};
|
|
@@ -122,6 +163,7 @@ export type TTSAdapterSession = {
|
|
|
122
163
|
};
|
|
123
164
|
export type TTSAdapterOpenOptions = {
|
|
124
165
|
sessionId: string;
|
|
166
|
+
lexicon?: VoiceLexiconEntry[];
|
|
125
167
|
signal?: AbortSignal;
|
|
126
168
|
};
|
|
127
169
|
export type TTSAdapter<TOptions extends TTSAdapterOpenOptions = TTSAdapterOpenOptions> = {
|
|
@@ -139,6 +181,8 @@ export type RealtimeAdapterSession = {
|
|
|
139
181
|
export type RealtimeAdapterOpenOptions = {
|
|
140
182
|
sessionId: string;
|
|
141
183
|
format: AudioFormat;
|
|
184
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
185
|
+
lexicon?: VoiceLexiconEntry[];
|
|
142
186
|
phraseHints?: VoicePhraseHint[];
|
|
143
187
|
signal?: AbortSignal;
|
|
144
188
|
};
|
|
@@ -156,6 +200,17 @@ export type VoiceTurnRecord<TResult = unknown> = {
|
|
|
156
200
|
committedAt: number;
|
|
157
201
|
result?: TResult;
|
|
158
202
|
};
|
|
203
|
+
export type VoiceCostTelemetryConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
204
|
+
fallbackPassCostUnit?: number;
|
|
205
|
+
onTurnCost?: (input: {
|
|
206
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
207
|
+
context: TContext;
|
|
208
|
+
estimate: VoiceTurnCostEstimate;
|
|
209
|
+
session: TSession;
|
|
210
|
+
turn: VoiceTurnRecord<TResult>;
|
|
211
|
+
}) => Promise<void> | void;
|
|
212
|
+
primaryPassCostUnit?: number;
|
|
213
|
+
};
|
|
159
214
|
export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown> = {
|
|
160
215
|
id: string;
|
|
161
216
|
createdAt: number;
|
|
@@ -185,6 +240,7 @@ export type VoiceSessionRecord<TMeta = Record<string, never>, TResult = unknown>
|
|
|
185
240
|
transcriptIds: string[];
|
|
186
241
|
committedAt: number;
|
|
187
242
|
};
|
|
243
|
+
call?: VoiceCallLifecycleState;
|
|
188
244
|
metadata?: TMeta;
|
|
189
245
|
scenarioId?: string;
|
|
190
246
|
};
|
|
@@ -195,6 +251,22 @@ export type VoiceSessionSummary = {
|
|
|
195
251
|
status: VoiceSessionStatus;
|
|
196
252
|
turnCount: number;
|
|
197
253
|
};
|
|
254
|
+
export type VoiceCallDisposition = 'completed' | 'transferred' | 'escalated' | 'voicemail' | 'no-answer' | 'failed' | 'closed';
|
|
255
|
+
export type VoiceCallLifecycleEvent = {
|
|
256
|
+
at: number;
|
|
257
|
+
type: 'start' | 'end' | 'transfer' | 'escalation' | 'voicemail' | 'no-answer';
|
|
258
|
+
disposition?: VoiceCallDisposition;
|
|
259
|
+
metadata?: Record<string, unknown>;
|
|
260
|
+
reason?: string;
|
|
261
|
+
target?: string;
|
|
262
|
+
};
|
|
263
|
+
export type VoiceCallLifecycleState = {
|
|
264
|
+
disposition?: VoiceCallDisposition;
|
|
265
|
+
endedAt?: number;
|
|
266
|
+
events: VoiceCallLifecycleEvent[];
|
|
267
|
+
lastEventAt: number;
|
|
268
|
+
startedAt: number;
|
|
269
|
+
};
|
|
198
270
|
export type VoiceSessionStore<TSession extends VoiceSessionRecord = VoiceSessionRecord> = SessionStore<TSession, VoiceSessionSummary>;
|
|
199
271
|
export type VoiceLogger = {
|
|
200
272
|
debug?: (message: string, meta?: Record<string, unknown>) => void;
|
|
@@ -207,7 +279,7 @@ export type VoiceReconnectConfig = {
|
|
|
207
279
|
timeout?: number;
|
|
208
280
|
maxAttempts?: number;
|
|
209
281
|
};
|
|
210
|
-
export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'reliability';
|
|
282
|
+
export type VoiceRuntimePreset = 'default' | 'chat' | 'guided-intake' | 'dictation' | 'noisy-room' | 'pstn-balanced' | 'pstn-fast' | 'reliability';
|
|
211
283
|
export type VoiceSTTLifecycle = 'continuous' | 'turn-scoped';
|
|
212
284
|
export type VoiceTurnProfile = 'fast' | 'balanced' | 'long-form';
|
|
213
285
|
export type VoiceTurnQualityProfile = 'general' | 'accent-heavy' | 'noisy-room' | 'short-command';
|
|
@@ -271,7 +343,26 @@ export type VoiceSessionHandle<TContext = unknown, TSession extends VoiceSession
|
|
|
271
343
|
commitTurn: (reason?: VoiceEndOfTurnEvent['reason']) => Promise<void>;
|
|
272
344
|
disconnect: (event?: VoiceCloseEvent) => Promise<void>;
|
|
273
345
|
complete: (result?: TResult) => Promise<void>;
|
|
346
|
+
escalate: (input: {
|
|
347
|
+
metadata?: Record<string, unknown>;
|
|
348
|
+
reason: string;
|
|
349
|
+
result?: TResult;
|
|
350
|
+
}) => Promise<void>;
|
|
274
351
|
fail: (error: unknown) => Promise<void>;
|
|
352
|
+
markNoAnswer: (input?: {
|
|
353
|
+
metadata?: Record<string, unknown>;
|
|
354
|
+
result?: TResult;
|
|
355
|
+
}) => Promise<void>;
|
|
356
|
+
markVoicemail: (input?: {
|
|
357
|
+
metadata?: Record<string, unknown>;
|
|
358
|
+
result?: TResult;
|
|
359
|
+
}) => Promise<void>;
|
|
360
|
+
transfer: (input: {
|
|
361
|
+
metadata?: Record<string, unknown>;
|
|
362
|
+
reason?: string;
|
|
363
|
+
result?: TResult;
|
|
364
|
+
target: string;
|
|
365
|
+
}) => Promise<void>;
|
|
275
366
|
close: (reason?: string) => Promise<void>;
|
|
276
367
|
snapshot: () => Promise<TSession>;
|
|
277
368
|
};
|
|
@@ -279,6 +370,21 @@ export type VoiceRouteResult<TResult = unknown> = {
|
|
|
279
370
|
complete?: boolean;
|
|
280
371
|
result?: TResult;
|
|
281
372
|
assistantText?: string;
|
|
373
|
+
transfer?: {
|
|
374
|
+
metadata?: Record<string, unknown>;
|
|
375
|
+
reason?: string;
|
|
376
|
+
target: string;
|
|
377
|
+
};
|
|
378
|
+
escalate?: {
|
|
379
|
+
metadata?: Record<string, unknown>;
|
|
380
|
+
reason: string;
|
|
381
|
+
};
|
|
382
|
+
voicemail?: {
|
|
383
|
+
metadata?: Record<string, unknown>;
|
|
384
|
+
};
|
|
385
|
+
noAnswer?: {
|
|
386
|
+
metadata?: Record<string, unknown>;
|
|
387
|
+
};
|
|
282
388
|
};
|
|
283
389
|
export type VoiceTurnCorrectionResult = string | {
|
|
284
390
|
text: string;
|
|
@@ -290,6 +396,7 @@ export type VoiceTurnCorrectionHandler<TContext = unknown, TSession extends Voic
|
|
|
290
396
|
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
291
397
|
context: TContext;
|
|
292
398
|
fallback?: VoiceFallbackDiagnostics;
|
|
399
|
+
lexicon: VoiceLexiconEntry[];
|
|
293
400
|
phraseHints: VoicePhraseHint[];
|
|
294
401
|
session: TSession;
|
|
295
402
|
text: string;
|
|
@@ -300,6 +407,11 @@ export type VoicePhraseHintResolver<TContext = unknown> = (input: {
|
|
|
300
407
|
scenarioId?: string;
|
|
301
408
|
sessionId: string;
|
|
302
409
|
}) => Promise<VoicePhraseHint[] | void> | VoicePhraseHint[] | void;
|
|
410
|
+
export type VoiceLexiconResolver<TContext = unknown> = (input: {
|
|
411
|
+
context: TContext;
|
|
412
|
+
scenarioId?: string;
|
|
413
|
+
sessionId: string;
|
|
414
|
+
}) => Promise<VoiceLexiconEntry[] | void> | VoiceLexiconEntry[] | void;
|
|
303
415
|
export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = (input: {
|
|
304
416
|
context: TContext;
|
|
305
417
|
session: TSession;
|
|
@@ -308,6 +420,20 @@ export type VoiceOnTurnObjectHandler<TContext = unknown, TSession extends VoiceS
|
|
|
308
420
|
}) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void;
|
|
309
421
|
export type VoiceOnTurnHandler<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceOnTurnObjectHandler<TContext, TSession, TResult> | ((session: TSession, turn: VoiceTurnRecord, api: VoiceSessionHandle<TContext, TSession, TResult>, context: TContext) => Promise<VoiceRouteResult<TResult> | void> | VoiceRouteResult<TResult> | void);
|
|
310
422
|
export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
423
|
+
onCallStart?: (input: {
|
|
424
|
+
context: TContext;
|
|
425
|
+
session: TSession;
|
|
426
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
427
|
+
}) => Promise<void> | void;
|
|
428
|
+
onCallEnd?: (input: {
|
|
429
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
430
|
+
context: TContext;
|
|
431
|
+
disposition: VoiceCallDisposition;
|
|
432
|
+
metadata?: Record<string, unknown>;
|
|
433
|
+
reason?: string;
|
|
434
|
+
session: TSession;
|
|
435
|
+
target?: string;
|
|
436
|
+
}) => Promise<void> | void;
|
|
311
437
|
onSession?: (input: {
|
|
312
438
|
context: TContext;
|
|
313
439
|
session: TSession;
|
|
@@ -327,6 +453,61 @@ export type VoiceRouteConfig<TContext = unknown, TSession extends VoiceSessionRe
|
|
|
327
453
|
error: unknown;
|
|
328
454
|
api?: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
329
455
|
}) => Promise<void> | void;
|
|
456
|
+
onEscalation?: (input: {
|
|
457
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
458
|
+
context: TContext;
|
|
459
|
+
metadata?: Record<string, unknown>;
|
|
460
|
+
reason: string;
|
|
461
|
+
session: TSession;
|
|
462
|
+
}) => Promise<void> | void;
|
|
463
|
+
onNoAnswer?: (input: {
|
|
464
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
465
|
+
context: TContext;
|
|
466
|
+
metadata?: Record<string, unknown>;
|
|
467
|
+
session: TSession;
|
|
468
|
+
}) => Promise<void> | void;
|
|
469
|
+
onTransfer?: (input: {
|
|
470
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
471
|
+
context: TContext;
|
|
472
|
+
metadata?: Record<string, unknown>;
|
|
473
|
+
reason?: string;
|
|
474
|
+
session: TSession;
|
|
475
|
+
target: string;
|
|
476
|
+
}) => Promise<void> | void;
|
|
477
|
+
onVoicemail?: (input: {
|
|
478
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
479
|
+
context: TContext;
|
|
480
|
+
metadata?: Record<string, unknown>;
|
|
481
|
+
session: TSession;
|
|
482
|
+
}) => Promise<void> | void;
|
|
483
|
+
};
|
|
484
|
+
export type VoiceRuntimeOpsConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
485
|
+
buildReview?: (input: {
|
|
486
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
487
|
+
context: TContext;
|
|
488
|
+
disposition: VoiceCallDisposition;
|
|
489
|
+
metadata?: Record<string, unknown>;
|
|
490
|
+
reason?: string;
|
|
491
|
+
result?: TResult;
|
|
492
|
+
session: TSession;
|
|
493
|
+
target?: string;
|
|
494
|
+
}) => Promise<VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void> | VoiceCallReviewArtifact | StoredVoiceCallReviewArtifact | void;
|
|
495
|
+
createTaskFromReview?: (input: {
|
|
496
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
497
|
+
context: TContext;
|
|
498
|
+
disposition: VoiceCallDisposition;
|
|
499
|
+
review: StoredVoiceCallReviewArtifact;
|
|
500
|
+
session: TSession;
|
|
501
|
+
}) => Promise<Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void> | Omit<VoiceOpsTask, 'id'> | VoiceOpsTask | StoredVoiceOpsTask | null | void;
|
|
502
|
+
events?: VoiceIntegrationEventStore;
|
|
503
|
+
onEvent?: (input: {
|
|
504
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
505
|
+
context: TContext;
|
|
506
|
+
event: StoredVoiceIntegrationEvent;
|
|
507
|
+
session: TSession;
|
|
508
|
+
}) => Promise<void> | void;
|
|
509
|
+
reviews?: VoiceCallReviewStore;
|
|
510
|
+
tasks?: VoiceOpsTaskStore;
|
|
330
511
|
};
|
|
331
512
|
export type VoiceNormalizedRouteConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoiceRouteConfig<TContext, TSession, TResult>, 'onTurn'> & {
|
|
332
513
|
onTurn: VoiceOnTurnObjectHandler<TContext, TSession, TResult>;
|
|
@@ -337,8 +518,15 @@ export type VoiceScenario = {
|
|
|
337
518
|
description?: string;
|
|
338
519
|
metadata?: Record<string, unknown>;
|
|
339
520
|
};
|
|
521
|
+
export type VoiceExpectedSpeakerTurn = {
|
|
522
|
+
speaker: string;
|
|
523
|
+
text: string;
|
|
524
|
+
};
|
|
340
525
|
export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
526
|
+
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
341
527
|
path: string;
|
|
528
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
529
|
+
lexicon?: VoiceLexiconEntry[] | VoiceLexiconResolver<TContext>;
|
|
342
530
|
phraseHints?: VoicePhraseHint[] | VoicePhraseHintResolver<TContext>;
|
|
343
531
|
preset?: VoiceRuntimePreset;
|
|
344
532
|
stt: STTAdapter;
|
|
@@ -351,12 +539,17 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
|
|
|
351
539
|
audioConditioning?: VoiceAudioConditioningConfig;
|
|
352
540
|
logger?: VoiceLogger;
|
|
353
541
|
htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
|
|
542
|
+
ops?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
|
|
354
543
|
} & VoiceRouteConfig<TContext, TSession, TResult>;
|
|
355
544
|
export type CreateVoiceSessionOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = {
|
|
545
|
+
costTelemetry?: VoiceCostTelemetryConfig<TContext, TSession, TResult>;
|
|
356
546
|
id: string;
|
|
357
547
|
context: TContext;
|
|
358
548
|
socket: VoiceSocket;
|
|
359
549
|
stt: STTAdapter;
|
|
550
|
+
tts?: TTSAdapter;
|
|
551
|
+
languageStrategy?: VoiceLanguageStrategy;
|
|
552
|
+
lexicon?: VoiceLexiconEntry[];
|
|
360
553
|
sttFallback?: VoiceResolvedSTTFallbackConfig;
|
|
361
554
|
store: VoiceSessionStore<TSession>;
|
|
362
555
|
reconnect: Required<VoiceReconnectConfig>;
|
|
@@ -408,6 +601,13 @@ export type VoiceServerAssistantMessage = {
|
|
|
408
601
|
text: string;
|
|
409
602
|
turnId?: string;
|
|
410
603
|
};
|
|
604
|
+
export type VoiceServerAudioMessage = {
|
|
605
|
+
type: 'audio';
|
|
606
|
+
chunkBase64: string;
|
|
607
|
+
format: AudioFormat;
|
|
608
|
+
receivedAt: number;
|
|
609
|
+
turnId?: string;
|
|
610
|
+
};
|
|
411
611
|
export type VoiceServerCompleteMessage = {
|
|
412
612
|
type: 'complete';
|
|
413
613
|
sessionId: string;
|
|
@@ -420,7 +620,7 @@ export type VoiceServerErrorMessage = {
|
|
|
420
620
|
export type VoiceServerPongMessage = {
|
|
421
621
|
type: 'pong';
|
|
422
622
|
};
|
|
423
|
-
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
|
|
623
|
+
export type VoiceServerMessage<TResult = unknown> = VoiceServerSessionMessage | VoiceServerPartialMessage | VoiceServerFinalMessage | VoiceServerTurnMessage<TResult> | VoiceServerAssistantMessage | VoiceServerAudioMessage | VoiceServerCompleteMessage | VoiceServerErrorMessage | VoiceServerPongMessage;
|
|
424
624
|
export type VoiceConnectionOptions = {
|
|
425
625
|
protocols?: string[];
|
|
426
626
|
scenarioId?: string;
|
|
@@ -440,6 +640,30 @@ export type VoiceControllerOptions = {
|
|
|
440
640
|
capture?: VoiceCaptureOptions;
|
|
441
641
|
autoStopOnComplete?: boolean;
|
|
442
642
|
};
|
|
643
|
+
export type VoiceBargeInOptions = {
|
|
644
|
+
enabled?: boolean;
|
|
645
|
+
interruptOnPartial?: boolean;
|
|
646
|
+
interruptThreshold?: number;
|
|
647
|
+
};
|
|
648
|
+
export type VoiceAudioPlayerOptions = {
|
|
649
|
+
autoStart?: boolean;
|
|
650
|
+
createAudioContext?: () => AudioContext;
|
|
651
|
+
lookaheadMs?: number;
|
|
652
|
+
};
|
|
653
|
+
export type VoiceDuplexControllerOptions = VoiceControllerOptions & {
|
|
654
|
+
audioPlayer?: VoiceAudioPlayerOptions;
|
|
655
|
+
bargeIn?: VoiceBargeInOptions;
|
|
656
|
+
};
|
|
657
|
+
export type VoiceSTTRoutingGoal = 'best' | 'low-cost';
|
|
658
|
+
export type VoiceSTTRoutingCorrectionMode = 'generic' | 'none' | 'risky-turn';
|
|
659
|
+
export type VoiceSTTRoutingStrategy = {
|
|
660
|
+
benchmarkSessionTarget: 'deepgram-corrected' | 'deepgram-flux';
|
|
661
|
+
correctionMode: VoiceSTTRoutingCorrectionMode;
|
|
662
|
+
goal: VoiceSTTRoutingGoal;
|
|
663
|
+
notes: string[];
|
|
664
|
+
preset: VoiceRuntimePreset;
|
|
665
|
+
sttLifecycle: VoiceSTTLifecycle;
|
|
666
|
+
};
|
|
443
667
|
export type VoiceHTMXRenderInput<TResult = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord> = {
|
|
444
668
|
assistantTexts: string[];
|
|
445
669
|
partial: string;
|
|
@@ -480,6 +704,12 @@ export type VoiceStreamState<TResult = unknown> = {
|
|
|
480
704
|
partial: string;
|
|
481
705
|
turns: VoiceTurnRecord<TResult>[];
|
|
482
706
|
assistantTexts: string[];
|
|
707
|
+
assistantAudio: Array<{
|
|
708
|
+
chunk: Uint8Array;
|
|
709
|
+
format: AudioFormat;
|
|
710
|
+
receivedAt: number;
|
|
711
|
+
turnId?: string;
|
|
712
|
+
}>;
|
|
483
713
|
error: string | null;
|
|
484
714
|
isConnected: boolean;
|
|
485
715
|
};
|
|
@@ -502,11 +732,52 @@ export type VoiceStream<TResult = unknown> = {
|
|
|
502
732
|
subscribe: (subscriber: () => void) => () => void;
|
|
503
733
|
turns: VoiceTurnRecord<TResult>[];
|
|
504
734
|
assistantTexts: string[];
|
|
735
|
+
assistantAudio: Array<{
|
|
736
|
+
chunk: Uint8Array;
|
|
737
|
+
format: AudioFormat;
|
|
738
|
+
receivedAt: number;
|
|
739
|
+
turnId?: string;
|
|
740
|
+
}>;
|
|
505
741
|
};
|
|
506
742
|
export type VoiceControllerState<TResult = unknown> = VoiceStreamState<TResult> & {
|
|
507
743
|
isRecording: boolean;
|
|
508
744
|
recordingError: string | null;
|
|
509
745
|
};
|
|
746
|
+
export type VoiceAudioPlayerState = {
|
|
747
|
+
activeSourceCount: number;
|
|
748
|
+
error: string | null;
|
|
749
|
+
isActive: boolean;
|
|
750
|
+
isPlaying: boolean;
|
|
751
|
+
lastInterruptLatencyMs?: number;
|
|
752
|
+
lastPlaybackStopLatencyMs?: number;
|
|
753
|
+
processedChunkCount: number;
|
|
754
|
+
queuedChunkCount: number;
|
|
755
|
+
};
|
|
756
|
+
export type VoiceAudioPlayerSource = {
|
|
757
|
+
assistantAudio: VoiceStreamState['assistantAudio'];
|
|
758
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
759
|
+
};
|
|
760
|
+
export type VoiceAudioPlayer = {
|
|
761
|
+
close: () => Promise<void>;
|
|
762
|
+
error: string | null;
|
|
763
|
+
getSnapshot: () => VoiceAudioPlayerState;
|
|
764
|
+
activeSourceCount: number;
|
|
765
|
+
isActive: boolean;
|
|
766
|
+
isPlaying: boolean;
|
|
767
|
+
interrupt: () => Promise<void>;
|
|
768
|
+
lastInterruptLatencyMs?: number;
|
|
769
|
+
lastPlaybackStopLatencyMs?: number;
|
|
770
|
+
pause: () => Promise<void>;
|
|
771
|
+
processedChunkCount: number;
|
|
772
|
+
queuedChunkCount: number;
|
|
773
|
+
start: () => Promise<void>;
|
|
774
|
+
subscribe: (subscriber: () => void) => () => void;
|
|
775
|
+
};
|
|
776
|
+
export type VoiceBargeInBinding = {
|
|
777
|
+
close: () => void;
|
|
778
|
+
handleLevel: (level: number) => void;
|
|
779
|
+
sendAudio: (audio: Uint8Array | ArrayBuffer) => void;
|
|
780
|
+
};
|
|
510
781
|
export type VoiceController<TResult = unknown> = {
|
|
511
782
|
bindHTMX: (options: VoiceHTMXBindingOptions) => () => void;
|
|
512
783
|
close: () => void;
|
|
@@ -532,6 +803,16 @@ export type VoiceController<TResult = unknown> = {
|
|
|
532
803
|
toggleRecording: () => Promise<void>;
|
|
533
804
|
turns: VoiceTurnRecord<TResult>[];
|
|
534
805
|
assistantTexts: string[];
|
|
806
|
+
assistantAudio: Array<{
|
|
807
|
+
chunk: Uint8Array;
|
|
808
|
+
format: AudioFormat;
|
|
809
|
+
receivedAt: number;
|
|
810
|
+
turnId?: string;
|
|
811
|
+
}>;
|
|
812
|
+
};
|
|
813
|
+
export type VoiceDuplexController<TResult = unknown> = VoiceController<TResult> & {
|
|
814
|
+
audioPlayer: VoiceAudioPlayer;
|
|
815
|
+
interruptAssistant: () => Promise<void>;
|
|
535
816
|
};
|
|
536
817
|
export type VoiceHTMXBindingOptions = {
|
|
537
818
|
element: Element | string;
|
|
@@ -556,6 +837,12 @@ export type VoiceStoreAction<TResult = unknown> = {
|
|
|
556
837
|
} | {
|
|
557
838
|
type: 'assistant';
|
|
558
839
|
text: string;
|
|
840
|
+
} | {
|
|
841
|
+
type: 'audio';
|
|
842
|
+
chunk: Uint8Array;
|
|
843
|
+
format: AudioFormat;
|
|
844
|
+
receivedAt: number;
|
|
845
|
+
turnId?: string;
|
|
559
846
|
} | {
|
|
560
847
|
type: 'complete';
|
|
561
848
|
sessionId: string;
|
package/dist/vue/index.js
CHANGED
|
@@ -102,6 +102,14 @@ var normalizeErrorMessage = (value) => {
|
|
|
102
102
|
};
|
|
103
103
|
var serverMessageToAction = (message) => {
|
|
104
104
|
switch (message.type) {
|
|
105
|
+
case "audio":
|
|
106
|
+
return {
|
|
107
|
+
chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
|
|
108
|
+
format: message.format,
|
|
109
|
+
receivedAt: message.receivedAt,
|
|
110
|
+
turnId: message.turnId,
|
|
111
|
+
type: "audio"
|
|
112
|
+
};
|
|
105
113
|
case "assistant":
|
|
106
114
|
return {
|
|
107
115
|
text: message.text,
|
|
@@ -182,6 +190,7 @@ var isVoiceServerMessage = (value) => {
|
|
|
182
190
|
return false;
|
|
183
191
|
}
|
|
184
192
|
switch (value.type) {
|
|
193
|
+
case "audio":
|
|
185
194
|
case "assistant":
|
|
186
195
|
case "complete":
|
|
187
196
|
case "error":
|
|
@@ -354,6 +363,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
354
363
|
|
|
355
364
|
// src/client/store.ts
|
|
356
365
|
var createInitialState = () => ({
|
|
366
|
+
assistantAudio: [],
|
|
357
367
|
assistantTexts: [],
|
|
358
368
|
error: null,
|
|
359
369
|
isConnected: false,
|
|
@@ -371,6 +381,20 @@ var createVoiceStreamStore = () => {
|
|
|
371
381
|
};
|
|
372
382
|
const dispatch = (action) => {
|
|
373
383
|
switch (action.type) {
|
|
384
|
+
case "audio":
|
|
385
|
+
state = {
|
|
386
|
+
...state,
|
|
387
|
+
assistantAudio: [
|
|
388
|
+
...state.assistantAudio,
|
|
389
|
+
{
|
|
390
|
+
chunk: action.chunk,
|
|
391
|
+
format: action.format,
|
|
392
|
+
receivedAt: action.receivedAt,
|
|
393
|
+
turnId: action.turnId
|
|
394
|
+
}
|
|
395
|
+
]
|
|
396
|
+
};
|
|
397
|
+
break;
|
|
374
398
|
case "assistant":
|
|
375
399
|
state = {
|
|
376
400
|
...state,
|
|
@@ -510,6 +534,9 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
510
534
|
get assistantTexts() {
|
|
511
535
|
return store.getSnapshot().assistantTexts;
|
|
512
536
|
},
|
|
537
|
+
get assistantAudio() {
|
|
538
|
+
return store.getSnapshot().assistantAudio;
|
|
539
|
+
},
|
|
513
540
|
sendAudio(audio) {
|
|
514
541
|
connection.sendAudio(audio);
|
|
515
542
|
},
|
|
@@ -525,6 +552,7 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
525
552
|
// src/vue/useVoiceStream.ts
|
|
526
553
|
var useVoiceStream = (path, options = {}) => {
|
|
527
554
|
const stream = createVoiceStream(path, options);
|
|
555
|
+
const assistantAudio = shallowRef([]);
|
|
528
556
|
const assistantTexts = shallowRef([]);
|
|
529
557
|
const error = ref(null);
|
|
530
558
|
const isConnected = ref(false);
|
|
@@ -533,6 +561,7 @@ var useVoiceStream = (path, options = {}) => {
|
|
|
533
561
|
const status = ref(stream.status);
|
|
534
562
|
const turns = shallowRef([]);
|
|
535
563
|
const sync = () => {
|
|
564
|
+
assistantAudio.value = [...stream.assistantAudio];
|
|
536
565
|
assistantTexts.value = [...stream.assistantTexts];
|
|
537
566
|
error.value = stream.error;
|
|
538
567
|
isConnected.value = stream.isConnected;
|
|
@@ -549,6 +578,7 @@ var useVoiceStream = (path, options = {}) => {
|
|
|
549
578
|
};
|
|
550
579
|
onUnmounted(destroy);
|
|
551
580
|
return {
|
|
581
|
+
assistantAudio,
|
|
552
582
|
assistantTexts,
|
|
553
583
|
close: () => destroy(),
|
|
554
584
|
endTurn: () => stream.endTurn(),
|
|
@@ -929,6 +959,58 @@ var PRESET_INPUTS = {
|
|
|
929
959
|
transcriptStabilityMs: 1650
|
|
930
960
|
}
|
|
931
961
|
},
|
|
962
|
+
"pstn-balanced": {
|
|
963
|
+
audioConditioning: {
|
|
964
|
+
enabled: true,
|
|
965
|
+
maxGain: 2.8,
|
|
966
|
+
noiseGateAttenuation: 0.07,
|
|
967
|
+
noiseGateThreshold: 0.005,
|
|
968
|
+
targetLevel: 0.08
|
|
969
|
+
},
|
|
970
|
+
capture: {
|
|
971
|
+
channelCount: 1,
|
|
972
|
+
sampleRateHz: 16000
|
|
973
|
+
},
|
|
974
|
+
connection: {
|
|
975
|
+
maxReconnectAttempts: 14,
|
|
976
|
+
pingInterval: 45000,
|
|
977
|
+
reconnect: true
|
|
978
|
+
},
|
|
979
|
+
sttLifecycle: "continuous",
|
|
980
|
+
turnDetection: {
|
|
981
|
+
qualityProfile: "noisy-room",
|
|
982
|
+
profile: "long-form",
|
|
983
|
+
silenceMs: 660,
|
|
984
|
+
speechThreshold: 0.012,
|
|
985
|
+
transcriptStabilityMs: 300
|
|
986
|
+
}
|
|
987
|
+
},
|
|
988
|
+
"pstn-fast": {
|
|
989
|
+
audioConditioning: {
|
|
990
|
+
enabled: true,
|
|
991
|
+
maxGain: 2.75,
|
|
992
|
+
noiseGateAttenuation: 0.06,
|
|
993
|
+
noiseGateThreshold: 0.005,
|
|
994
|
+
targetLevel: 0.08
|
|
995
|
+
},
|
|
996
|
+
capture: {
|
|
997
|
+
channelCount: 1,
|
|
998
|
+
sampleRateHz: 16000
|
|
999
|
+
},
|
|
1000
|
+
connection: {
|
|
1001
|
+
maxReconnectAttempts: 14,
|
|
1002
|
+
pingInterval: 45000,
|
|
1003
|
+
reconnect: true
|
|
1004
|
+
},
|
|
1005
|
+
sttLifecycle: "continuous",
|
|
1006
|
+
turnDetection: {
|
|
1007
|
+
qualityProfile: "noisy-room",
|
|
1008
|
+
profile: "long-form",
|
|
1009
|
+
silenceMs: 620,
|
|
1010
|
+
speechThreshold: 0.012,
|
|
1011
|
+
transcriptStabilityMs: 280
|
|
1012
|
+
}
|
|
1013
|
+
},
|
|
932
1014
|
reliability: {
|
|
933
1015
|
audioConditioning: {
|
|
934
1016
|
enabled: true,
|
|
@@ -972,6 +1054,7 @@ var resolveVoiceRuntimePreset = (name = "default") => {
|
|
|
972
1054
|
|
|
973
1055
|
// src/client/controller.ts
|
|
974
1056
|
var createInitialState2 = (stream) => ({
|
|
1057
|
+
assistantAudio: [...stream.assistantAudio],
|
|
975
1058
|
assistantTexts: [...stream.assistantTexts],
|
|
976
1059
|
error: stream.error,
|
|
977
1060
|
isConnected: stream.isConnected,
|
|
@@ -1000,6 +1083,7 @@ var createVoiceController = (path, options = {}) => {
|
|
|
1000
1083
|
const sync = () => {
|
|
1001
1084
|
state = {
|
|
1002
1085
|
...state,
|
|
1086
|
+
assistantAudio: [...stream.assistantAudio],
|
|
1003
1087
|
assistantTexts: [...stream.assistantTexts],
|
|
1004
1088
|
error: stream.error,
|
|
1005
1089
|
isConnected: stream.isConnected,
|
|
@@ -1127,6 +1211,9 @@ var createVoiceController = (path, options = {}) => {
|
|
|
1127
1211
|
},
|
|
1128
1212
|
get assistantTexts() {
|
|
1129
1213
|
return state.assistantTexts;
|
|
1214
|
+
},
|
|
1215
|
+
get assistantAudio() {
|
|
1216
|
+
return state.assistantAudio;
|
|
1130
1217
|
}
|
|
1131
1218
|
};
|
|
1132
1219
|
};
|
|
@@ -1134,6 +1221,7 @@ var createVoiceController = (path, options = {}) => {
|
|
|
1134
1221
|
// src/vue/useVoiceController.ts
|
|
1135
1222
|
var useVoiceController = (path, options = {}) => {
|
|
1136
1223
|
const controller = createVoiceController(path, options);
|
|
1224
|
+
const assistantAudio = shallowRef2([]);
|
|
1137
1225
|
const assistantTexts = shallowRef2([]);
|
|
1138
1226
|
const error = ref2(null);
|
|
1139
1227
|
const isConnected = ref2(false);
|
|
@@ -1144,6 +1232,7 @@ var useVoiceController = (path, options = {}) => {
|
|
|
1144
1232
|
const status = ref2(controller.status);
|
|
1145
1233
|
const turns = shallowRef2([]);
|
|
1146
1234
|
const sync = () => {
|
|
1235
|
+
assistantAudio.value = [...controller.assistantAudio];
|
|
1147
1236
|
assistantTexts.value = [...controller.assistantTexts];
|
|
1148
1237
|
error.value = controller.error;
|
|
1149
1238
|
isConnected.value = controller.isConnected;
|
|
@@ -1162,6 +1251,7 @@ var useVoiceController = (path, options = {}) => {
|
|
|
1162
1251
|
};
|
|
1163
1252
|
onUnmounted2(destroy);
|
|
1164
1253
|
return {
|
|
1254
|
+
assistantAudio,
|
|
1165
1255
|
assistantTexts,
|
|
1166
1256
|
bindHTMX: controller.bindHTMX,
|
|
1167
1257
|
close: () => destroy(),
|
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
import type { VoiceControllerOptions, VoiceTurnRecord } from '../types';
|
|
2
2
|
export declare const useVoiceController: <TResult = unknown>(path: string, options?: VoiceControllerOptions) => {
|
|
3
|
+
assistantAudio: import("vue").ShallowRef<{
|
|
4
|
+
chunk: Uint8Array;
|
|
5
|
+
format: import("..").AudioFormat;
|
|
6
|
+
receivedAt: number;
|
|
7
|
+
turnId?: string;
|
|
8
|
+
}[], {
|
|
9
|
+
chunk: Uint8Array;
|
|
10
|
+
format: import("..").AudioFormat;
|
|
11
|
+
receivedAt: number;
|
|
12
|
+
turnId?: string;
|
|
13
|
+
}[]>;
|
|
3
14
|
assistantTexts: import("vue").ShallowRef<string[], string[]>;
|
|
4
15
|
bindHTMX: (options: import("..").VoiceHTMXBindingOptions) => () => void;
|
|
5
16
|
close: () => void;
|