@absolutejs/voice 0.0.21 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +499 -2
- package/dist/angular/index.js +90 -0
- package/dist/angular/voice-controller.service.d.ts +6 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/client/actions.d.ts +41 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +84 -0
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.js +507 -5
- package/dist/correction.d.ts +18 -1
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +12 -1
- package/dist/index.js +2425 -33
- package/dist/ops.d.ts +100 -0
- package/dist/react/index.js +86 -0
- package/dist/react/useVoiceController.d.ts +6 -0
- package/dist/react/useVoiceStream.d.ts +6 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/svelte/index.js +84 -0
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +59 -4
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +4940 -307
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +25 -0
- package/dist/testing/stt.d.ts +2 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/types.d.ts +290 -3
- package/dist/vue/index.js +90 -0
- package/dist/vue/useVoiceController.d.ts +11 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/package.json +115 -1
package/dist/ops.d.ts
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
import type { VoiceCallDisposition, VoiceSessionRecord, VoiceSessionSummary } from './types';
|
|
2
|
+
import type { StoredVoiceCallReviewArtifact } from './testing/review';
|
|
3
|
+
export type VoiceOpsTaskStatus = 'open' | 'in-progress' | 'done';
|
|
4
|
+
export type VoiceOpsTaskKind = 'callback' | 'escalation' | 'transfer-check' | 'retry-review';
|
|
5
|
+
export type VoiceOpsTaskHistoryEntry = {
|
|
6
|
+
actor: string;
|
|
7
|
+
at: number;
|
|
8
|
+
detail?: string;
|
|
9
|
+
type: 'created' | 'assigned' | 'started' | 'completed' | 'reopened';
|
|
10
|
+
};
|
|
11
|
+
export type VoiceOpsTask = {
|
|
12
|
+
assignee?: string;
|
|
13
|
+
createdAt: number;
|
|
14
|
+
description: string;
|
|
15
|
+
history: VoiceOpsTaskHistoryEntry[];
|
|
16
|
+
id: string;
|
|
17
|
+
intakeId?: string;
|
|
18
|
+
kind: VoiceOpsTaskKind;
|
|
19
|
+
outcome?: VoiceCallDisposition;
|
|
20
|
+
recommendedAction: string;
|
|
21
|
+
reviewId?: string;
|
|
22
|
+
status: VoiceOpsTaskStatus;
|
|
23
|
+
target?: string;
|
|
24
|
+
title: string;
|
|
25
|
+
updatedAt: number;
|
|
26
|
+
};
|
|
27
|
+
export type StoredVoiceOpsTask = VoiceOpsTask;
|
|
28
|
+
export type VoiceOpsTaskStore<TTask extends StoredVoiceOpsTask = StoredVoiceOpsTask> = {
|
|
29
|
+
get: (id: string) => Promise<TTask | undefined> | TTask | undefined;
|
|
30
|
+
list: () => Promise<TTask[]> | TTask[];
|
|
31
|
+
remove: (id: string) => Promise<void> | void;
|
|
32
|
+
set: (id: string, task: TTask) => Promise<void> | void;
|
|
33
|
+
};
|
|
34
|
+
export type VoiceOpsTaskSummary = {
|
|
35
|
+
byKind: Array<[VoiceOpsTaskKind, number]>;
|
|
36
|
+
byOutcome: Array<[string, number]>;
|
|
37
|
+
done: number;
|
|
38
|
+
inProgress: number;
|
|
39
|
+
open: number;
|
|
40
|
+
topAssignees: Array<[string, number]>;
|
|
41
|
+
topTargets: Array<[string, number]>;
|
|
42
|
+
total: number;
|
|
43
|
+
};
|
|
44
|
+
export type VoiceIntegrationEventType = 'call.completed' | 'review.saved' | 'task.created' | 'task.updated';
|
|
45
|
+
export type VoiceIntegrationEvent = {
|
|
46
|
+
createdAt: number;
|
|
47
|
+
deliveredAt?: number;
|
|
48
|
+
deliveredTo?: string;
|
|
49
|
+
deliveryError?: string;
|
|
50
|
+
id: string;
|
|
51
|
+
payload: Record<string, unknown>;
|
|
52
|
+
type: VoiceIntegrationEventType;
|
|
53
|
+
};
|
|
54
|
+
export type StoredVoiceIntegrationEvent = VoiceIntegrationEvent;
|
|
55
|
+
export type VoiceIntegrationEventStore<TEvent extends StoredVoiceIntegrationEvent = StoredVoiceIntegrationEvent> = {
|
|
56
|
+
get: (id: string) => Promise<TEvent | undefined> | TEvent | undefined;
|
|
57
|
+
list: () => Promise<TEvent[]> | TEvent[];
|
|
58
|
+
remove: (id: string) => Promise<void> | void;
|
|
59
|
+
set: (id: string, event: TEvent) => Promise<void> | void;
|
|
60
|
+
};
|
|
61
|
+
export declare const withVoiceOpsTaskId: <TTask extends Omit<VoiceOpsTask, "id"> = Omit<VoiceOpsTask, "id">>(id: string, task: TTask) => TTask & {
|
|
62
|
+
id: string;
|
|
63
|
+
};
|
|
64
|
+
export declare const withVoiceIntegrationEventId: <TEvent extends Omit<VoiceIntegrationEvent, "id"> = Omit<VoiceIntegrationEvent, "id">>(id: string, event: TEvent) => TEvent & {
|
|
65
|
+
id: string;
|
|
66
|
+
};
|
|
67
|
+
export declare const buildVoiceOpsTaskFromReview: (review: StoredVoiceCallReviewArtifact) => StoredVoiceOpsTask | null;
|
|
68
|
+
export declare const assignVoiceOpsTask: (task: StoredVoiceOpsTask, owner: string, input?: {
|
|
69
|
+
at?: number;
|
|
70
|
+
actor?: string;
|
|
71
|
+
}) => StoredVoiceOpsTask;
|
|
72
|
+
export declare const startVoiceOpsTask: (task: StoredVoiceOpsTask, input?: {
|
|
73
|
+
at?: number;
|
|
74
|
+
actor?: string;
|
|
75
|
+
detail?: string;
|
|
76
|
+
}) => StoredVoiceOpsTask;
|
|
77
|
+
export declare const completeVoiceOpsTask: (task: StoredVoiceOpsTask, input?: {
|
|
78
|
+
at?: number;
|
|
79
|
+
actor?: string;
|
|
80
|
+
detail?: string;
|
|
81
|
+
}) => StoredVoiceOpsTask;
|
|
82
|
+
export declare const reopenVoiceOpsTask: (task: StoredVoiceOpsTask, input?: {
|
|
83
|
+
at?: number;
|
|
84
|
+
actor?: string;
|
|
85
|
+
detail?: string;
|
|
86
|
+
}) => StoredVoiceOpsTask;
|
|
87
|
+
export declare const listVoiceOpsTasks: (tasks: StoredVoiceOpsTask[]) => VoiceOpsTask[];
|
|
88
|
+
export declare const summarizeVoiceOpsTasks: (tasks: StoredVoiceOpsTask[]) => VoiceOpsTaskSummary;
|
|
89
|
+
export declare const createVoiceIntegrationEvent: <TPayload extends Record<string, unknown> = Record<string, unknown>>(type: VoiceIntegrationEventType, payload: TPayload, input?: {
|
|
90
|
+
createdAt?: number;
|
|
91
|
+
id?: string;
|
|
92
|
+
}) => StoredVoiceIntegrationEvent;
|
|
93
|
+
export declare const createVoiceCallCompletedEvent: (input: {
|
|
94
|
+
disposition?: VoiceCallDisposition;
|
|
95
|
+
session: VoiceSessionRecord;
|
|
96
|
+
sessionSummary?: VoiceSessionSummary;
|
|
97
|
+
}) => StoredVoiceIntegrationEvent;
|
|
98
|
+
export declare const createVoiceReviewSavedEvent: (review: StoredVoiceCallReviewArtifact) => StoredVoiceIntegrationEvent;
|
|
99
|
+
export declare const createVoiceTaskCreatedEvent: (task: StoredVoiceOpsTask) => StoredVoiceIntegrationEvent;
|
|
100
|
+
export declare const createVoiceTaskUpdatedEvent: (task: StoredVoiceOpsTask) => StoredVoiceIntegrationEvent;
|
package/dist/react/index.js
CHANGED
|
@@ -102,6 +102,14 @@ var normalizeErrorMessage = (value) => {
|
|
|
102
102
|
};
|
|
103
103
|
var serverMessageToAction = (message) => {
|
|
104
104
|
switch (message.type) {
|
|
105
|
+
case "audio":
|
|
106
|
+
return {
|
|
107
|
+
chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
|
|
108
|
+
format: message.format,
|
|
109
|
+
receivedAt: message.receivedAt,
|
|
110
|
+
turnId: message.turnId,
|
|
111
|
+
type: "audio"
|
|
112
|
+
};
|
|
105
113
|
case "assistant":
|
|
106
114
|
return {
|
|
107
115
|
text: message.text,
|
|
@@ -182,6 +190,7 @@ var isVoiceServerMessage = (value) => {
|
|
|
182
190
|
return false;
|
|
183
191
|
}
|
|
184
192
|
switch (value.type) {
|
|
193
|
+
case "audio":
|
|
185
194
|
case "assistant":
|
|
186
195
|
case "complete":
|
|
187
196
|
case "error":
|
|
@@ -354,6 +363,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
354
363
|
|
|
355
364
|
// src/client/store.ts
|
|
356
365
|
var createInitialState = () => ({
|
|
366
|
+
assistantAudio: [],
|
|
357
367
|
assistantTexts: [],
|
|
358
368
|
error: null,
|
|
359
369
|
isConnected: false,
|
|
@@ -371,6 +381,20 @@ var createVoiceStreamStore = () => {
|
|
|
371
381
|
};
|
|
372
382
|
const dispatch = (action) => {
|
|
373
383
|
switch (action.type) {
|
|
384
|
+
case "audio":
|
|
385
|
+
state = {
|
|
386
|
+
...state,
|
|
387
|
+
assistantAudio: [
|
|
388
|
+
...state.assistantAudio,
|
|
389
|
+
{
|
|
390
|
+
chunk: action.chunk,
|
|
391
|
+
format: action.format,
|
|
392
|
+
receivedAt: action.receivedAt,
|
|
393
|
+
turnId: action.turnId
|
|
394
|
+
}
|
|
395
|
+
]
|
|
396
|
+
};
|
|
397
|
+
break;
|
|
374
398
|
case "assistant":
|
|
375
399
|
state = {
|
|
376
400
|
...state,
|
|
@@ -510,6 +534,9 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
510
534
|
get assistantTexts() {
|
|
511
535
|
return store.getSnapshot().assistantTexts;
|
|
512
536
|
},
|
|
537
|
+
get assistantAudio() {
|
|
538
|
+
return store.getSnapshot().assistantAudio;
|
|
539
|
+
},
|
|
513
540
|
sendAudio(audio) {
|
|
514
541
|
connection.sendAudio(audio);
|
|
515
542
|
},
|
|
@@ -524,6 +551,7 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
524
551
|
|
|
525
552
|
// src/react/useVoiceStream.tsx
|
|
526
553
|
var EMPTY_SNAPSHOT = {
|
|
554
|
+
assistantAudio: [],
|
|
527
555
|
assistantTexts: [],
|
|
528
556
|
error: null,
|
|
529
557
|
isConnected: false,
|
|
@@ -915,6 +943,58 @@ var PRESET_INPUTS = {
|
|
|
915
943
|
transcriptStabilityMs: 1650
|
|
916
944
|
}
|
|
917
945
|
},
|
|
946
|
+
"pstn-balanced": {
|
|
947
|
+
audioConditioning: {
|
|
948
|
+
enabled: true,
|
|
949
|
+
maxGain: 2.8,
|
|
950
|
+
noiseGateAttenuation: 0.07,
|
|
951
|
+
noiseGateThreshold: 0.005,
|
|
952
|
+
targetLevel: 0.08
|
|
953
|
+
},
|
|
954
|
+
capture: {
|
|
955
|
+
channelCount: 1,
|
|
956
|
+
sampleRateHz: 16000
|
|
957
|
+
},
|
|
958
|
+
connection: {
|
|
959
|
+
maxReconnectAttempts: 14,
|
|
960
|
+
pingInterval: 45000,
|
|
961
|
+
reconnect: true
|
|
962
|
+
},
|
|
963
|
+
sttLifecycle: "continuous",
|
|
964
|
+
turnDetection: {
|
|
965
|
+
qualityProfile: "noisy-room",
|
|
966
|
+
profile: "long-form",
|
|
967
|
+
silenceMs: 660,
|
|
968
|
+
speechThreshold: 0.012,
|
|
969
|
+
transcriptStabilityMs: 300
|
|
970
|
+
}
|
|
971
|
+
},
|
|
972
|
+
"pstn-fast": {
|
|
973
|
+
audioConditioning: {
|
|
974
|
+
enabled: true,
|
|
975
|
+
maxGain: 2.75,
|
|
976
|
+
noiseGateAttenuation: 0.06,
|
|
977
|
+
noiseGateThreshold: 0.005,
|
|
978
|
+
targetLevel: 0.08
|
|
979
|
+
},
|
|
980
|
+
capture: {
|
|
981
|
+
channelCount: 1,
|
|
982
|
+
sampleRateHz: 16000
|
|
983
|
+
},
|
|
984
|
+
connection: {
|
|
985
|
+
maxReconnectAttempts: 14,
|
|
986
|
+
pingInterval: 45000,
|
|
987
|
+
reconnect: true
|
|
988
|
+
},
|
|
989
|
+
sttLifecycle: "continuous",
|
|
990
|
+
turnDetection: {
|
|
991
|
+
qualityProfile: "noisy-room",
|
|
992
|
+
profile: "long-form",
|
|
993
|
+
silenceMs: 620,
|
|
994
|
+
speechThreshold: 0.012,
|
|
995
|
+
transcriptStabilityMs: 280
|
|
996
|
+
}
|
|
997
|
+
},
|
|
918
998
|
reliability: {
|
|
919
999
|
audioConditioning: {
|
|
920
1000
|
enabled: true,
|
|
@@ -958,6 +1038,7 @@ var resolveVoiceRuntimePreset = (name = "default") => {
|
|
|
958
1038
|
|
|
959
1039
|
// src/client/controller.ts
|
|
960
1040
|
var createInitialState2 = (stream) => ({
|
|
1041
|
+
assistantAudio: [...stream.assistantAudio],
|
|
961
1042
|
assistantTexts: [...stream.assistantTexts],
|
|
962
1043
|
error: stream.error,
|
|
963
1044
|
isConnected: stream.isConnected,
|
|
@@ -986,6 +1067,7 @@ var createVoiceController = (path, options = {}) => {
|
|
|
986
1067
|
const sync = () => {
|
|
987
1068
|
state = {
|
|
988
1069
|
...state,
|
|
1070
|
+
assistantAudio: [...stream.assistantAudio],
|
|
989
1071
|
assistantTexts: [...stream.assistantTexts],
|
|
990
1072
|
error: stream.error,
|
|
991
1073
|
isConnected: stream.isConnected,
|
|
@@ -1113,12 +1195,16 @@ var createVoiceController = (path, options = {}) => {
|
|
|
1113
1195
|
},
|
|
1114
1196
|
get assistantTexts() {
|
|
1115
1197
|
return state.assistantTexts;
|
|
1198
|
+
},
|
|
1199
|
+
get assistantAudio() {
|
|
1200
|
+
return state.assistantAudio;
|
|
1116
1201
|
}
|
|
1117
1202
|
};
|
|
1118
1203
|
};
|
|
1119
1204
|
|
|
1120
1205
|
// src/react/useVoiceController.tsx
|
|
1121
1206
|
var EMPTY_SNAPSHOT2 = {
|
|
1207
|
+
assistantAudio: [],
|
|
1122
1208
|
assistantTexts: [],
|
|
1123
1209
|
error: null,
|
|
1124
1210
|
isConnected: false,
|
|
@@ -13,6 +13,12 @@ export declare const useVoiceController: <TResult = unknown>(path: string, optio
|
|
|
13
13
|
partial: string;
|
|
14
14
|
turns: import("..").VoiceTurnRecord<TResult>[];
|
|
15
15
|
assistantTexts: string[];
|
|
16
|
+
assistantAudio: Array<{
|
|
17
|
+
chunk: Uint8Array;
|
|
18
|
+
format: import("..").AudioFormat;
|
|
19
|
+
receivedAt: number;
|
|
20
|
+
turnId?: string;
|
|
21
|
+
}>;
|
|
16
22
|
error: string | null;
|
|
17
23
|
isConnected: boolean;
|
|
18
24
|
isRecording: boolean;
|
|
@@ -9,6 +9,12 @@ export declare const useVoiceStream: <TResult = unknown>(path: string, options?:
|
|
|
9
9
|
partial: string;
|
|
10
10
|
turns: import("..").VoiceTurnRecord<TResult>[];
|
|
11
11
|
assistantTexts: string[];
|
|
12
|
+
assistantAudio: Array<{
|
|
13
|
+
chunk: Uint8Array;
|
|
14
|
+
format: import("..").AudioFormat;
|
|
15
|
+
receivedAt: number;
|
|
16
|
+
turnId?: string;
|
|
17
|
+
}>;
|
|
12
18
|
error: string | null;
|
|
13
19
|
isConnected: boolean;
|
|
14
20
|
};
|
|
@@ -0,0 +1,3 @@
|
|
|
1
|
+
import type { VoiceSTTRoutingCorrectionMode, VoiceSTTRoutingGoal, VoiceSTTRoutingStrategy, VoiceTurnCorrectionHandler } from './types';
|
|
2
|
+
export declare const resolveVoiceSTTRoutingStrategy: (goal?: VoiceSTTRoutingGoal) => VoiceSTTRoutingStrategy;
|
|
3
|
+
export declare const createVoiceSTTRoutingCorrectionHandler: (mode?: VoiceSTTRoutingCorrectionMode) => VoiceTurnCorrectionHandler | undefined;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { StoredVoiceCallReviewArtifact, VoiceCallReviewArtifact } from './testing/review';
|
|
2
|
+
import type { VoiceOpsTask } from './ops';
|
|
3
|
+
import type { VoiceCallDisposition, VoiceRuntimeOpsConfig, VoiceSessionHandle, VoiceSessionRecord } from './types';
|
|
4
|
+
export declare const createVoiceCallReviewFromSession: <TSession extends VoiceSessionRecord = VoiceSessionRecord>(input: {
|
|
5
|
+
disposition: VoiceCallDisposition;
|
|
6
|
+
generatedAt?: number;
|
|
7
|
+
reason?: string;
|
|
8
|
+
session: TSession;
|
|
9
|
+
target?: string;
|
|
10
|
+
}) => VoiceCallReviewArtifact;
|
|
11
|
+
export declare const recordVoiceRuntimeOps: <TContext, TSession extends VoiceSessionRecord, TResult>(input: {
|
|
12
|
+
api: VoiceSessionHandle<TContext, TSession, TResult>;
|
|
13
|
+
config?: VoiceRuntimeOpsConfig<TContext, TSession, TResult>;
|
|
14
|
+
context: TContext;
|
|
15
|
+
disposition: VoiceCallDisposition;
|
|
16
|
+
metadata?: Record<string, unknown>;
|
|
17
|
+
reason?: string;
|
|
18
|
+
session: TSession;
|
|
19
|
+
target?: string;
|
|
20
|
+
}) => Promise<{
|
|
21
|
+
review: StoredVoiceCallReviewArtifact | undefined;
|
|
22
|
+
task: VoiceOpsTask | undefined;
|
|
23
|
+
} | undefined>;
|
package/dist/svelte/index.js
CHANGED
|
@@ -99,6 +99,14 @@ var normalizeErrorMessage = (value) => {
|
|
|
99
99
|
};
|
|
100
100
|
var serverMessageToAction = (message) => {
|
|
101
101
|
switch (message.type) {
|
|
102
|
+
case "audio":
|
|
103
|
+
return {
|
|
104
|
+
chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
|
|
105
|
+
format: message.format,
|
|
106
|
+
receivedAt: message.receivedAt,
|
|
107
|
+
turnId: message.turnId,
|
|
108
|
+
type: "audio"
|
|
109
|
+
};
|
|
102
110
|
case "assistant":
|
|
103
111
|
return {
|
|
104
112
|
text: message.text,
|
|
@@ -179,6 +187,7 @@ var isVoiceServerMessage = (value) => {
|
|
|
179
187
|
return false;
|
|
180
188
|
}
|
|
181
189
|
switch (value.type) {
|
|
190
|
+
case "audio":
|
|
182
191
|
case "assistant":
|
|
183
192
|
case "complete":
|
|
184
193
|
case "error":
|
|
@@ -351,6 +360,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
351
360
|
|
|
352
361
|
// src/client/store.ts
|
|
353
362
|
var createInitialState = () => ({
|
|
363
|
+
assistantAudio: [],
|
|
354
364
|
assistantTexts: [],
|
|
355
365
|
error: null,
|
|
356
366
|
isConnected: false,
|
|
@@ -368,6 +378,20 @@ var createVoiceStreamStore = () => {
|
|
|
368
378
|
};
|
|
369
379
|
const dispatch = (action) => {
|
|
370
380
|
switch (action.type) {
|
|
381
|
+
case "audio":
|
|
382
|
+
state = {
|
|
383
|
+
...state,
|
|
384
|
+
assistantAudio: [
|
|
385
|
+
...state.assistantAudio,
|
|
386
|
+
{
|
|
387
|
+
chunk: action.chunk,
|
|
388
|
+
format: action.format,
|
|
389
|
+
receivedAt: action.receivedAt,
|
|
390
|
+
turnId: action.turnId
|
|
391
|
+
}
|
|
392
|
+
]
|
|
393
|
+
};
|
|
394
|
+
break;
|
|
371
395
|
case "assistant":
|
|
372
396
|
state = {
|
|
373
397
|
...state,
|
|
@@ -507,6 +531,9 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
507
531
|
get assistantTexts() {
|
|
508
532
|
return store.getSnapshot().assistantTexts;
|
|
509
533
|
},
|
|
534
|
+
get assistantAudio() {
|
|
535
|
+
return store.getSnapshot().assistantAudio;
|
|
536
|
+
},
|
|
510
537
|
sendAudio(audio) {
|
|
511
538
|
connection.sendAudio(audio);
|
|
512
539
|
},
|
|
@@ -886,6 +913,58 @@ var PRESET_INPUTS = {
|
|
|
886
913
|
transcriptStabilityMs: 1650
|
|
887
914
|
}
|
|
888
915
|
},
|
|
916
|
+
"pstn-balanced": {
|
|
917
|
+
audioConditioning: {
|
|
918
|
+
enabled: true,
|
|
919
|
+
maxGain: 2.8,
|
|
920
|
+
noiseGateAttenuation: 0.07,
|
|
921
|
+
noiseGateThreshold: 0.005,
|
|
922
|
+
targetLevel: 0.08
|
|
923
|
+
},
|
|
924
|
+
capture: {
|
|
925
|
+
channelCount: 1,
|
|
926
|
+
sampleRateHz: 16000
|
|
927
|
+
},
|
|
928
|
+
connection: {
|
|
929
|
+
maxReconnectAttempts: 14,
|
|
930
|
+
pingInterval: 45000,
|
|
931
|
+
reconnect: true
|
|
932
|
+
},
|
|
933
|
+
sttLifecycle: "continuous",
|
|
934
|
+
turnDetection: {
|
|
935
|
+
qualityProfile: "noisy-room",
|
|
936
|
+
profile: "long-form",
|
|
937
|
+
silenceMs: 660,
|
|
938
|
+
speechThreshold: 0.012,
|
|
939
|
+
transcriptStabilityMs: 300
|
|
940
|
+
}
|
|
941
|
+
},
|
|
942
|
+
"pstn-fast": {
|
|
943
|
+
audioConditioning: {
|
|
944
|
+
enabled: true,
|
|
945
|
+
maxGain: 2.75,
|
|
946
|
+
noiseGateAttenuation: 0.06,
|
|
947
|
+
noiseGateThreshold: 0.005,
|
|
948
|
+
targetLevel: 0.08
|
|
949
|
+
},
|
|
950
|
+
capture: {
|
|
951
|
+
channelCount: 1,
|
|
952
|
+
sampleRateHz: 16000
|
|
953
|
+
},
|
|
954
|
+
connection: {
|
|
955
|
+
maxReconnectAttempts: 14,
|
|
956
|
+
pingInterval: 45000,
|
|
957
|
+
reconnect: true
|
|
958
|
+
},
|
|
959
|
+
sttLifecycle: "continuous",
|
|
960
|
+
turnDetection: {
|
|
961
|
+
qualityProfile: "noisy-room",
|
|
962
|
+
profile: "long-form",
|
|
963
|
+
silenceMs: 620,
|
|
964
|
+
speechThreshold: 0.012,
|
|
965
|
+
transcriptStabilityMs: 280
|
|
966
|
+
}
|
|
967
|
+
},
|
|
889
968
|
reliability: {
|
|
890
969
|
audioConditioning: {
|
|
891
970
|
enabled: true,
|
|
@@ -929,6 +1008,7 @@ var resolveVoiceRuntimePreset = (name = "default") => {
|
|
|
929
1008
|
|
|
930
1009
|
// src/client/controller.ts
|
|
931
1010
|
var createInitialState2 = (stream) => ({
|
|
1011
|
+
assistantAudio: [...stream.assistantAudio],
|
|
932
1012
|
assistantTexts: [...stream.assistantTexts],
|
|
933
1013
|
error: stream.error,
|
|
934
1014
|
isConnected: stream.isConnected,
|
|
@@ -957,6 +1037,7 @@ var createVoiceController = (path, options = {}) => {
|
|
|
957
1037
|
const sync = () => {
|
|
958
1038
|
state = {
|
|
959
1039
|
...state,
|
|
1040
|
+
assistantAudio: [...stream.assistantAudio],
|
|
960
1041
|
assistantTexts: [...stream.assistantTexts],
|
|
961
1042
|
error: stream.error,
|
|
962
1043
|
isConnected: stream.isConnected,
|
|
@@ -1084,6 +1165,9 @@ var createVoiceController = (path, options = {}) => {
|
|
|
1084
1165
|
},
|
|
1085
1166
|
get assistantTexts() {
|
|
1086
1167
|
return state.assistantTexts;
|
|
1168
|
+
},
|
|
1169
|
+
get assistantAudio() {
|
|
1170
|
+
return state.assistantAudio;
|
|
1087
1171
|
}
|
|
1088
1172
|
};
|
|
1089
1173
|
};
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export type TelephonyResponseShapeMode = 'full' | 'lead-clause';
|
|
2
|
+
export type TelephonyResponseShapeOptions = {
|
|
3
|
+
mode?: TelephonyResponseShapeMode;
|
|
4
|
+
maxChars?: number;
|
|
5
|
+
maxWords?: number;
|
|
6
|
+
};
|
|
7
|
+
export declare const shapeTelephonyAssistantText: (text: string, options?: TelephonyResponseShapeOptions) => string;
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { type VoiceCallReviewArtifact, type VoiceCallReviewConfig } from '../testing/review';
|
|
2
|
+
import type { AudioFormat, VoiceLogger, VoicePluginConfig, VoiceSessionRecord, VoiceServerMessage } from '../types';
|
|
3
|
+
type TwilioMediaPayload = {
|
|
4
|
+
chunk?: string;
|
|
5
|
+
payload: string;
|
|
6
|
+
timestamp?: string;
|
|
7
|
+
track?: 'inbound' | 'outbound';
|
|
8
|
+
};
|
|
9
|
+
type TwilioConnectedMessage = {
|
|
10
|
+
event: 'connected';
|
|
11
|
+
protocol?: string;
|
|
12
|
+
version?: string;
|
|
13
|
+
};
|
|
14
|
+
type TwilioStartMessage = {
|
|
15
|
+
event: 'start';
|
|
16
|
+
sequenceNumber?: string;
|
|
17
|
+
start: {
|
|
18
|
+
accountSid?: string;
|
|
19
|
+
callSid?: string;
|
|
20
|
+
customParameters?: Record<string, string>;
|
|
21
|
+
mediaFormat?: {
|
|
22
|
+
channels?: number;
|
|
23
|
+
encoding?: string;
|
|
24
|
+
sampleRate?: number;
|
|
25
|
+
};
|
|
26
|
+
streamSid: string;
|
|
27
|
+
track?: string;
|
|
28
|
+
};
|
|
29
|
+
streamSid?: string;
|
|
30
|
+
};
|
|
31
|
+
type TwilioMediaMessage = {
|
|
32
|
+
event: 'media';
|
|
33
|
+
media: TwilioMediaPayload;
|
|
34
|
+
sequenceNumber?: string;
|
|
35
|
+
streamSid: string;
|
|
36
|
+
};
|
|
37
|
+
type TwilioMarkMessage = {
|
|
38
|
+
event: 'mark';
|
|
39
|
+
mark?: {
|
|
40
|
+
name?: string;
|
|
41
|
+
};
|
|
42
|
+
sequenceNumber?: string;
|
|
43
|
+
streamSid: string;
|
|
44
|
+
};
|
|
45
|
+
type TwilioStopMessage = {
|
|
46
|
+
event: 'stop';
|
|
47
|
+
sequenceNumber?: string;
|
|
48
|
+
stop?: {
|
|
49
|
+
accountSid?: string;
|
|
50
|
+
callSid?: string;
|
|
51
|
+
};
|
|
52
|
+
streamSid: string;
|
|
53
|
+
};
|
|
54
|
+
export type TwilioInboundMessage = TwilioConnectedMessage | TwilioStartMessage | TwilioMediaMessage | TwilioMarkMessage | TwilioStopMessage;
|
|
55
|
+
export type TwilioOutboundMediaMessage = {
|
|
56
|
+
event: 'media';
|
|
57
|
+
media: {
|
|
58
|
+
payload: string;
|
|
59
|
+
};
|
|
60
|
+
streamSid: string;
|
|
61
|
+
};
|
|
62
|
+
export type TwilioOutboundClearMessage = {
|
|
63
|
+
event: 'clear';
|
|
64
|
+
streamSid: string;
|
|
65
|
+
};
|
|
66
|
+
export type TwilioOutboundMarkMessage = {
|
|
67
|
+
event: 'mark';
|
|
68
|
+
mark: {
|
|
69
|
+
name: string;
|
|
70
|
+
};
|
|
71
|
+
streamSid: string;
|
|
72
|
+
};
|
|
73
|
+
export type TwilioOutboundMessage = TwilioOutboundMediaMessage | TwilioOutboundClearMessage | TwilioOutboundMarkMessage;
|
|
74
|
+
export type TwilioMediaStreamSocket = {
|
|
75
|
+
close: (code?: number, reason?: string) => void | Promise<void>;
|
|
76
|
+
send: (data: string) => void | Promise<void>;
|
|
77
|
+
};
|
|
78
|
+
export type TwilioMediaStreamBridgeOptions<TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = Omit<VoicePluginConfig<TContext, TSession, TResult>, 'htmx' | 'path'> & {
|
|
79
|
+
clearOnInboundMedia?: boolean;
|
|
80
|
+
context: TContext;
|
|
81
|
+
logger?: VoiceLogger;
|
|
82
|
+
onVoiceMessage?: (input: {
|
|
83
|
+
callSid?: string;
|
|
84
|
+
message: VoiceServerMessage<TResult>;
|
|
85
|
+
sessionId: string;
|
|
86
|
+
streamSid?: string;
|
|
87
|
+
}) => Promise<void> | void;
|
|
88
|
+
review?: {
|
|
89
|
+
config?: VoiceCallReviewConfig;
|
|
90
|
+
fixtureId?: string;
|
|
91
|
+
onArtifact?: (artifact: VoiceCallReviewArtifact) => Promise<void> | void;
|
|
92
|
+
path?: string;
|
|
93
|
+
title?: string;
|
|
94
|
+
};
|
|
95
|
+
scenarioId?: string;
|
|
96
|
+
sessionId?: string;
|
|
97
|
+
};
|
|
98
|
+
export type TwilioMediaStreamBridge = {
|
|
99
|
+
close: (reason?: string) => Promise<void>;
|
|
100
|
+
getSessionId: () => string | null;
|
|
101
|
+
getStreamSid: () => string | null;
|
|
102
|
+
handleMessage: (raw: string | TwilioInboundMessage) => Promise<void>;
|
|
103
|
+
};
|
|
104
|
+
export type TwilioVoiceResponseOptions = {
|
|
105
|
+
parameters?: Record<string, string | number | boolean | undefined>;
|
|
106
|
+
streamName?: string;
|
|
107
|
+
streamUrl: string;
|
|
108
|
+
track?: 'both_tracks' | 'inbound_track' | 'outbound_track';
|
|
109
|
+
};
|
|
110
|
+
export declare const decodeTwilioMulawBase64: (payload: string) => Int16Array<ArrayBuffer>;
|
|
111
|
+
export declare const encodeTwilioMulawBase64: (samples: Int16Array) => string;
|
|
112
|
+
export declare const transcodeTwilioInboundPayloadToPCM16: (payload: string) => Uint8Array<ArrayBuffer>;
|
|
113
|
+
export declare const transcodePCMToTwilioOutboundPayload: (chunk: Uint8Array, format: AudioFormat) => string;
|
|
114
|
+
export declare const createTwilioVoiceResponse: (options: TwilioVoiceResponseOptions) => string;
|
|
115
|
+
export declare const createTwilioMediaStreamBridge: <TContext = unknown, TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown>(socket: TwilioMediaStreamSocket, options: TwilioMediaStreamBridgeOptions<TContext, TSession, TResult>) => TwilioMediaStreamBridge;
|
|
116
|
+
export {};
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { STTAdapter } from '../types';
|
|
1
|
+
import type { STTAdapter, STTAdapterOpenOptions } from '../types';
|
|
2
2
|
import { type VoiceSTTAdapterHarnessOptions, type VoiceSTTAdapterHarnessResult } from './stt';
|
|
3
3
|
import type { VoiceTestFixture } from './fixtures';
|
|
4
4
|
export type VoiceExpectedTermAccuracy = {
|
|
@@ -8,7 +8,15 @@ export type VoiceExpectedTermAccuracy = {
|
|
|
8
8
|
missingTerms: string[];
|
|
9
9
|
recall: number;
|
|
10
10
|
};
|
|
11
|
-
export type VoiceSTTFixtureEnvironment = 'accent' | 'accent-noisy' | 'clean' | 'noisy' | 'other';
|
|
11
|
+
export type VoiceSTTFixtureEnvironment = 'accent' | 'accent-noisy' | 'clean' | 'code-switch' | 'jargon' | 'multilingual' | 'multi-speaker' | 'noisy' | 'telephony' | 'other';
|
|
12
|
+
export type VoiceSpeakerTurnAccuracy = {
|
|
13
|
+
available: boolean;
|
|
14
|
+
actualTurnCount: number;
|
|
15
|
+
expectedTurnCount: number;
|
|
16
|
+
passes: boolean;
|
|
17
|
+
patternMatchRate: number;
|
|
18
|
+
postClustered?: boolean;
|
|
19
|
+
};
|
|
12
20
|
export type VoiceSTTBenchmarkFixtureResult = {
|
|
13
21
|
accuracy: VoiceSTTAdapterHarnessResult['accuracy'];
|
|
14
22
|
closeCount: number;
|
|
@@ -24,6 +32,7 @@ export type VoiceSTTBenchmarkFixtureResult = {
|
|
|
24
32
|
group: VoiceSTTFixtureEnvironment;
|
|
25
33
|
passes: boolean;
|
|
26
34
|
partialCount: number;
|
|
35
|
+
speakerTurns?: VoiceSpeakerTurnAccuracy;
|
|
27
36
|
postSpeechTimeToEndOfTurnMs?: number;
|
|
28
37
|
postSpeechTimeToFirstFinalMs?: number;
|
|
29
38
|
tags: string[];
|
|
@@ -38,6 +47,7 @@ export type VoiceSTTBenchmarkSummary = {
|
|
|
38
47
|
averageElapsedMs: number;
|
|
39
48
|
averageEndOfTurnCount: number;
|
|
40
49
|
averageFinalCount: number;
|
|
50
|
+
averageSpeakerTurnMatchRate?: number;
|
|
41
51
|
averageTermRecall: number;
|
|
42
52
|
averagePostSpeechTimeToEndOfTurnMs?: number;
|
|
43
53
|
averagePostSpeechTimeToFirstFinalMs?: number;
|
|
@@ -63,6 +73,7 @@ export type VoiceSTTBenchmarkFixtureSummary = {
|
|
|
63
73
|
passRate: number;
|
|
64
74
|
wordAccuracyRate: number;
|
|
65
75
|
averageTermRecall: number;
|
|
76
|
+
averageSpeakerTurnMatchRate?: number;
|
|
66
77
|
averageWordErrorRate: number;
|
|
67
78
|
averageElapsedMs: number;
|
|
68
79
|
};
|
|
@@ -72,6 +83,38 @@ export type VoiceSTTBenchmarkReport = {
|
|
|
72
83
|
generatedAt: number;
|
|
73
84
|
summary: VoiceSTTBenchmarkSummary;
|
|
74
85
|
};
|
|
86
|
+
export type VoiceSTTBenchmarkFixtureAggregate = {
|
|
87
|
+
averageElapsedMs: number;
|
|
88
|
+
averagePassRate: number;
|
|
89
|
+
averageWordErrorRate: number;
|
|
90
|
+
bestWordErrorRate: number;
|
|
91
|
+
fixtureId: string;
|
|
92
|
+
group: VoiceSTTFixtureEnvironment;
|
|
93
|
+
passCount: number;
|
|
94
|
+
runCount: number;
|
|
95
|
+
tags: string[];
|
|
96
|
+
title: string;
|
|
97
|
+
worstWordErrorRate: number;
|
|
98
|
+
};
|
|
99
|
+
export type VoiceSTTBenchmarkSeriesSummary = {
|
|
100
|
+
adapterId: string;
|
|
101
|
+
averageElapsedMs: number;
|
|
102
|
+
averagePassRate: number;
|
|
103
|
+
averageWordErrorRate: number;
|
|
104
|
+
fixtureCount: number;
|
|
105
|
+
flakyFixtureCount: number;
|
|
106
|
+
generatedRunCount: number;
|
|
107
|
+
stableFixtureCount: number;
|
|
108
|
+
totalPassCount: number;
|
|
109
|
+
totalRunCount: number;
|
|
110
|
+
};
|
|
111
|
+
export type VoiceSTTBenchmarkSeriesReport = {
|
|
112
|
+
adapterId: string;
|
|
113
|
+
fixtures: VoiceSTTBenchmarkFixtureAggregate[];
|
|
114
|
+
generatedAt: number;
|
|
115
|
+
runCount: number;
|
|
116
|
+
summary: VoiceSTTBenchmarkSeriesSummary;
|
|
117
|
+
};
|
|
75
118
|
export type VoiceSTTBenchmarkComparisonEntry = {
|
|
76
119
|
adapterId: string;
|
|
77
120
|
summary: VoiceSTTBenchmarkSummary;
|
|
@@ -98,9 +141,10 @@ export type VoiceSTTBenchmarkAcceptanceResult = {
|
|
|
98
141
|
score: number;
|
|
99
142
|
};
|
|
100
143
|
export type VoiceSTTBenchmarkOptions = VoiceSTTAdapterHarnessOptions & {
|
|
101
|
-
fixtureOptions?: Record<string, Omit<VoiceSTTAdapterHarnessOptions, 'fixtureOptions'>>;
|
|
144
|
+
fixtureOptions?: Record<string, Omit<VoiceSTTAdapterHarnessOptions, 'fixtureOptions' | 'openOptions'>>;
|
|
145
|
+
openOptions?: Partial<STTAdapterOpenOptions> | ((fixture: VoiceTestFixture) => Partial<STTAdapterOpenOptions> | undefined);
|
|
102
146
|
};
|
|
103
|
-
export declare const resolveFixtureEnvironment: (fixture: Pick<VoiceTestFixture, "tags">) => VoiceSTTFixtureEnvironment;
|
|
147
|
+
export declare const resolveFixtureEnvironment: (fixture: Pick<VoiceTestFixture, "language" | "tags">) => VoiceSTTFixtureEnvironment;
|
|
104
148
|
export declare const summarizeSTTBenchmark: (adapterId: string, fixtures: VoiceSTTBenchmarkFixtureResult[]) => VoiceSTTBenchmarkSummary;
|
|
105
149
|
export declare const evaluateSTTBenchmarkAcceptance: (report: VoiceSTTBenchmarkReport, thresholds?: VoiceSTTBenchmarkAcceptanceThresholds) => VoiceSTTBenchmarkAcceptanceResult;
|
|
106
150
|
export declare const compareSTTBenchmarks: (reports: VoiceSTTBenchmarkReport[]) => VoiceSTTBenchmarkComparison;
|
|
@@ -110,3 +154,14 @@ export declare const runSTTAdapterBenchmark: ({ adapter, adapterId, fixtures, op
|
|
|
110
154
|
fixtures: VoiceTestFixture[];
|
|
111
155
|
options?: VoiceSTTBenchmarkOptions;
|
|
112
156
|
}) => Promise<VoiceSTTBenchmarkReport>;
|
|
157
|
+
export declare const summarizeSTTBenchmarkSeries: (input: {
|
|
158
|
+
adapterId: string;
|
|
159
|
+
reports: VoiceSTTBenchmarkReport[];
|
|
160
|
+
}) => VoiceSTTBenchmarkSeriesReport;
|
|
161
|
+
export declare const runSTTAdapterBenchmarkSeries: ({ adapter, adapterId, fixtures, options, runs }: {
|
|
162
|
+
adapter: STTAdapter;
|
|
163
|
+
adapterId: string;
|
|
164
|
+
fixtures: VoiceTestFixture[];
|
|
165
|
+
options?: VoiceSTTBenchmarkOptions;
|
|
166
|
+
runs: number;
|
|
167
|
+
}) => Promise<VoiceSTTBenchmarkSeriesReport>;
|