@absolutejs/voice 0.0.21 → 0.0.22-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1046 -2
- package/dist/agent.d.ts +113 -0
- package/dist/angular/index.js +90 -0
- package/dist/angular/voice-controller.service.d.ts +6 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/client/actions.d.ts +41 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +84 -0
- package/dist/client/index.d.ts +2 -0
- package/dist/client/index.js +507 -5
- package/dist/correction.d.ts +18 -1
- package/dist/fileStore.d.ts +37 -0
- package/dist/index.d.ts +32 -1
- package/dist/index.js +8379 -1245
- package/dist/ops.d.ts +327 -0
- package/dist/opsPresets.d.ts +19 -0
- package/dist/opsRuntime.d.ts +66 -0
- package/dist/opsSinks.d.ts +149 -0
- package/dist/outcomeRecipes.d.ts +18 -0
- package/dist/postgresStore.d.ts +31 -0
- package/dist/queue.d.ts +276 -0
- package/dist/react/index.js +86 -0
- package/dist/react/useVoiceController.d.ts +6 -0
- package/dist/react/useVoiceStream.d.ts +6 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/s3Store.d.ts +14 -0
- package/dist/sqliteStore.d.ts +26 -0
- package/dist/svelte/index.js +84 -0
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +59 -4
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +5094 -284
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +25 -0
- package/dist/testing/stt.d.ts +2 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/trace.d.ts +236 -0
- package/dist/types.d.ts +320 -3
- package/dist/vue/index.js +90 -0
- package/dist/vue/useVoiceController.d.ts +11 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/package.json +115 -1
package/README.md
CHANGED
|
@@ -37,6 +37,13 @@ const app = new Elysia()
|
|
|
37
37
|
voice({
|
|
38
38
|
path: '/voice',
|
|
39
39
|
preset: 'guided-intake',
|
|
40
|
+
lexicon: [
|
|
41
|
+
{
|
|
42
|
+
text: 'AbsoluteJS',
|
|
43
|
+
aliases: ['absoloot js'],
|
|
44
|
+
pronunciation: 'ab-so-lute jay ess'
|
|
45
|
+
}
|
|
46
|
+
],
|
|
40
47
|
phraseHints: [
|
|
41
48
|
{ text: 'AbsoluteJS', aliases: ['absolute js'] },
|
|
42
49
|
{ text: 'Joe Johnston', aliases: ['joe johnson'] }
|
|
@@ -66,11 +73,830 @@ const app = new Elysia()
|
|
|
66
73
|
|
|
67
74
|
`createVoiceMemoryStore()` is dev-only. Real deployments should provide a shared store backed by Redis, Postgres, or equivalent.
|
|
68
75
|
|
|
76
|
+
## Agent Tools And Squads
|
|
77
|
+
|
|
78
|
+
For assistant-style products, use `createVoiceAgent(...)` as the `onTurn` handler. The agent layer is provider-neutral: plug in any model adapter, register server-side tools, and return normal voice route results like `assistantText`, `transfer`, `escalate`, or `complete`.
|
|
79
|
+
|
|
80
|
+
```ts
|
|
81
|
+
import {
|
|
82
|
+
createVoiceAgent,
|
|
83
|
+
createVoiceAgentSquad,
|
|
84
|
+
createVoiceAgentTool,
|
|
85
|
+
createVoiceMemoryStore,
|
|
86
|
+
voice
|
|
87
|
+
} from '@absolutejs/voice';
|
|
88
|
+
import { deepgram } from '@absolutejs/voice-deepgram';
|
|
89
|
+
|
|
90
|
+
const lookupOrder = createVoiceAgentTool({
|
|
91
|
+
name: 'lookup_order',
|
|
92
|
+
description: 'Look up an order by id.',
|
|
93
|
+
parameters: {
|
|
94
|
+
type: 'object',
|
|
95
|
+
properties: {
|
|
96
|
+
orderId: { type: 'string' }
|
|
97
|
+
},
|
|
98
|
+
required: ['orderId']
|
|
99
|
+
},
|
|
100
|
+
execute: async ({ args }) => {
|
|
101
|
+
return { orderId: args.orderId, status: 'shipped' };
|
|
102
|
+
}
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
const supportAgent = createVoiceAgent({
|
|
106
|
+
id: 'support',
|
|
107
|
+
system: 'You are a concise support voice agent.',
|
|
108
|
+
tools: [lookupOrder],
|
|
109
|
+
model: {
|
|
110
|
+
async generate({ messages, tools }) {
|
|
111
|
+
// Call your LLM provider here. If it returns tool calls, AbsoluteJS
|
|
112
|
+
// executes them and calls the model again with tool results.
|
|
113
|
+
return {
|
|
114
|
+
assistantText: `I can help. Available tools: ${tools.map((tool) => tool.name).join(', ')}`
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
const billingAgent = createVoiceAgent({
|
|
121
|
+
id: 'billing',
|
|
122
|
+
system: 'You handle billing questions.',
|
|
123
|
+
model: {
|
|
124
|
+
async generate() {
|
|
125
|
+
return { assistantText: 'I can help with billing.' };
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const frontDesk = createVoiceAgentSquad({
|
|
131
|
+
id: 'front-desk',
|
|
132
|
+
defaultAgentId: 'support',
|
|
133
|
+
agents: [supportAgent, billingAgent]
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
voice({
|
|
137
|
+
path: '/voice',
|
|
138
|
+
session: createVoiceMemoryStore(),
|
|
139
|
+
stt: deepgram({ apiKey: process.env.DEEPGRAM_API_KEY! }),
|
|
140
|
+
onTurn: frontDesk.onTurn,
|
|
141
|
+
onComplete: async () => {}
|
|
142
|
+
});
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
`createVoiceAgentSquad(...)` gives you squad-style specialization without locking your app into a hosted voice platform. An agent can return `handoff: { targetAgentId: 'billing' }`; the squad records the handoff, runs the target agent on the same turn, and still returns a standard `VoiceRouteResult`.
|
|
146
|
+
|
|
147
|
+
## Traces And Replay
|
|
148
|
+
|
|
149
|
+
Use trace stores when you want every call to be inspectable outside a hosted platform. Trace events are append-only records for model passes, tool calls, handoffs, agent results, call lifecycle, turn timing, errors, and cost telemetry.
|
|
150
|
+
|
|
151
|
+
```ts
|
|
152
|
+
import {
|
|
153
|
+
buildVoiceTraceReplay,
|
|
154
|
+
createVoiceAgent,
|
|
155
|
+
createVoiceFileRuntimeStorage,
|
|
156
|
+
createVoiceRedisTaskLeaseCoordinator,
|
|
157
|
+
createVoiceTraceHTTPSink,
|
|
158
|
+
createVoiceTraceSinkStore,
|
|
159
|
+
createVoiceTraceSinkDeliveryWorker,
|
|
160
|
+
exportVoiceTrace,
|
|
161
|
+
pruneVoiceTraceEvents,
|
|
162
|
+
voice
|
|
163
|
+
} from '@absolutejs/voice';
|
|
164
|
+
import { deepgram } from '@absolutejs/voice-deepgram';
|
|
165
|
+
|
|
166
|
+
const runtimeStorage = createVoiceFileRuntimeStorage({
|
|
167
|
+
directory: '.voice-runtime/support'
|
|
168
|
+
});
|
|
169
|
+
const redisLeases = createVoiceRedisTaskLeaseCoordinator({
|
|
170
|
+
url: process.env.REDIS_URL
|
|
171
|
+
});
|
|
172
|
+
const trace = createVoiceTraceSinkStore({
|
|
173
|
+
store: runtimeStorage.traces,
|
|
174
|
+
deliveryQueue: runtimeStorage.traceDeliveries,
|
|
175
|
+
redact: true,
|
|
176
|
+
sinks: [
|
|
177
|
+
createVoiceTraceHTTPSink({
|
|
178
|
+
id: 'warehouse',
|
|
179
|
+
url: process.env.TRACE_WAREHOUSE_URL!
|
|
180
|
+
})
|
|
181
|
+
]
|
|
182
|
+
});
|
|
183
|
+
const traceSinkWorker = createVoiceTraceSinkDeliveryWorker({
|
|
184
|
+
deliveries: runtimeStorage.traceDeliveries,
|
|
185
|
+
leases: redisLeases,
|
|
186
|
+
redact: true,
|
|
187
|
+
sinks: [
|
|
188
|
+
createVoiceTraceHTTPSink({
|
|
189
|
+
id: 'warehouse',
|
|
190
|
+
url: process.env.TRACE_WAREHOUSE_URL!
|
|
191
|
+
})
|
|
192
|
+
],
|
|
193
|
+
workerId: 'trace-sink-worker'
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
const supportAgent = createVoiceAgent({
|
|
197
|
+
id: 'support',
|
|
198
|
+
trace,
|
|
199
|
+
model: {
|
|
200
|
+
async generate() {
|
|
201
|
+
return { assistantText: 'How can I help?' };
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
});
|
|
205
|
+
|
|
206
|
+
voice({
|
|
207
|
+
path: '/voice',
|
|
208
|
+
session: runtimeStorage.session,
|
|
209
|
+
stt: deepgram({ apiKey: process.env.DEEPGRAM_API_KEY! }),
|
|
210
|
+
trace,
|
|
211
|
+
onTurn: supportAgent.onTurn,
|
|
212
|
+
onComplete: async () => {}
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
const replay = await exportVoiceTrace({
|
|
216
|
+
store: runtimeStorage.traces,
|
|
217
|
+
filter: {
|
|
218
|
+
sessionId: 'session-123'
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
const report = buildVoiceTraceReplay(replay.events, {
|
|
223
|
+
redact: true,
|
|
224
|
+
title: 'Support call session-123'
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
console.log(report.summary);
|
|
228
|
+
console.log(report.evaluation.pass);
|
|
229
|
+
await Bun.write('trace.html', report.html);
|
|
230
|
+
|
|
231
|
+
await pruneVoiceTraceEvents({
|
|
232
|
+
store: runtimeStorage.traces,
|
|
233
|
+
before: Date.now() - 30 * 24 * 60 * 60 * 1000
|
|
234
|
+
});
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
`createVoiceMemoryTraceEventStore(...)`, `createVoiceFileTraceEventStore(...)`, `createVoiceSQLiteTraceEventStore(...)`, and `createVoicePostgresTraceEventStore(...)` all implement the same `VoiceTraceEventStore` contract. File, SQLite, and Postgres runtime storage expose `runtimeStorage.traces` and `runtimeStorage.traceDeliveries` alongside sessions, reviews, tasks, events, and external object mappings. Passing `trace` to `voice(...)` records session lifecycle, transcript, committed-turn, assistant, cost, and error events; passing it to agents records model passes, tools, results, and handoffs.
|
|
238
|
+
|
|
239
|
+
For self-hosted QA and support workflows, use `summarizeVoiceTrace(...)`, `evaluateVoiceTrace(...)`, `renderVoiceTraceMarkdown(...)`, `renderVoiceTraceHTML(...)`, or `buildVoiceTraceReplay(...)`. They turn raw trace events into portable artifacts you can attach to tickets, inspect locally, or fail in CI when a call has missing transcripts, missing turns, tool errors, session errors, or excessive handoffs.
|
|
240
|
+
|
|
241
|
+
For observability pipelines, wrap any trace store with `createVoiceTraceSinkStore(...)` and pass sinks such as `createVoiceTraceHTTPSink(...)`. The wrapper still writes to your normal file, SQLite, or Postgres store, then fans out appended events to your warehouse, logs, S3 bridge, or analytics endpoint. Use `awaitDelivery: true` only when you want trace delivery to block append completion. For durable delivery, pass `deliveryQueue` and run `createVoiceTraceSinkDeliveryWorker(...)` or `createVoiceTraceSinkDeliveryWorkerLoop(...)`; the worker uses the same Redis lease/idempotency primitives as ops workers and supports retries plus dead-letter stores.
|
|
242
|
+
|
|
243
|
+
When traces may leave your private runtime, pass `redact: true` or a redaction config to `exportVoiceTrace(...)`, `renderVoiceTraceMarkdown(...)`, `renderVoiceTraceHTML(...)`, or `buildVoiceTraceReplay(...)`. The built-in redactor scrubs common email addresses, phone numbers, and sensitive keys like `token`, `secret`, `password`, `apiKey`, `authorization`, `phone`, and `email`; you can pass custom keys or replacement text for stricter policies.
|
|
244
|
+
|
|
245
|
+
For retention jobs, `pruneVoiceTraceEvents(...)` works against any trace store. Use `dryRun: true` before deleting, filter by session, trace, scenario, turn, or event type, cap each run with `limit`, or keep only the newest N matching events with `keepNewest`.
|
|
246
|
+
|
|
247
|
+
## Production Voice Ops
|
|
248
|
+
|
|
249
|
+
The recommended production pattern is:
|
|
250
|
+
|
|
251
|
+
- persistent session storage
|
|
252
|
+
- built-in review recording
|
|
253
|
+
- built-in task creation from call outcomes
|
|
254
|
+
- built-in integration event recording
|
|
255
|
+
|
|
256
|
+
The simplest durable local setup uses `createVoiceFileRuntimeStorage(...)` plus `voice({ ops })`:
|
|
257
|
+
|
|
258
|
+
```ts
|
|
259
|
+
import { Elysia } from 'elysia';
|
|
260
|
+
import {
|
|
261
|
+
createVoiceCRMActivitySink,
|
|
262
|
+
createVoiceFileRuntimeStorage,
|
|
263
|
+
createVoiceHelpdeskTicketSink,
|
|
264
|
+
resolveVoiceOutcomeRecipe,
|
|
265
|
+
voice
|
|
266
|
+
} from '@absolutejs/voice';
|
|
267
|
+
import { deepgram } from '@absolutejs/voice-deepgram';
|
|
268
|
+
|
|
269
|
+
const runtimeStorage = createVoiceFileRuntimeStorage({
|
|
270
|
+
directory: '.voice-runtime/support'
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
const app = new Elysia().use(
|
|
274
|
+
voice({
|
|
275
|
+
path: '/voice',
|
|
276
|
+
preset: 'reliability',
|
|
277
|
+
session: runtimeStorage.session,
|
|
278
|
+
stt: deepgram({
|
|
279
|
+
apiKey: process.env.DEEPGRAM_API_KEY!,
|
|
280
|
+
model: 'flux-general-en'
|
|
281
|
+
}),
|
|
282
|
+
async onTurn({ turn }) {
|
|
283
|
+
if (turn.text.toLowerCase().includes('billing')) {
|
|
284
|
+
return {
|
|
285
|
+
assistantText: 'Transferring to billing.',
|
|
286
|
+
transfer: {
|
|
287
|
+
reason: 'caller-requested-transfer',
|
|
288
|
+
target: 'billing'
|
|
289
|
+
}
|
|
290
|
+
};
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
return {
|
|
294
|
+
assistantText: `You said: ${turn.text}`
|
|
295
|
+
};
|
|
296
|
+
},
|
|
297
|
+
onComplete: async () => {},
|
|
298
|
+
ops: {
|
|
299
|
+
...resolveVoiceOutcomeRecipe('support-triage', {
|
|
300
|
+
assignee: 'support-oncall',
|
|
301
|
+
queue: 'support-triage'
|
|
302
|
+
}),
|
|
303
|
+
reviews: runtimeStorage.reviews,
|
|
304
|
+
tasks: runtimeStorage.tasks,
|
|
305
|
+
events: runtimeStorage.events,
|
|
306
|
+
webhook: {
|
|
307
|
+
url: process.env.VOICE_OPS_WEBHOOK_URL!,
|
|
308
|
+
retries: 2,
|
|
309
|
+
backoffMs: 500,
|
|
310
|
+
signingSecret: process.env.VOICE_OPS_WEBHOOK_SECRET
|
|
311
|
+
},
|
|
312
|
+
sinks: [
|
|
313
|
+
createVoiceHelpdeskTicketSink({
|
|
314
|
+
id: 'helpdesk',
|
|
315
|
+
url: process.env.HELPDESK_SYNC_URL!
|
|
316
|
+
}),
|
|
317
|
+
createVoiceCRMActivitySink({
|
|
318
|
+
id: 'crm',
|
|
319
|
+
url: process.env.CRM_SYNC_URL!
|
|
320
|
+
})
|
|
321
|
+
]
|
|
322
|
+
}
|
|
323
|
+
})
|
|
324
|
+
);
|
|
325
|
+
```
|
|
326
|
+
|
|
327
|
+
That gives you:
|
|
328
|
+
|
|
329
|
+
- persisted sessions under `runtimeStorage.session`
|
|
330
|
+
- persisted review artifacts under `runtimeStorage.reviews`
|
|
331
|
+
- persisted follow-up tasks under `runtimeStorage.tasks`
|
|
332
|
+
- persisted integration events under `runtimeStorage.events`
|
|
333
|
+
- persisted vendor object mappings under `runtimeStorage.externalObjects`
|
|
334
|
+
- built-in webhook delivery with persisted delivery status on each event
|
|
335
|
+
- built-in sink fanout with per-sink delivery metadata on each event
|
|
336
|
+
|
|
337
|
+
If you need richer review artifacts, pass `ops.buildReview(...)`. If you need custom task routing, pass `ops.createTaskFromReview(...)`. If you need external sync side effects inside your app, use `ops.onEvent(...)`. If you want built-in outbound delivery, use `ops.webhook`. If you want core-managed CRM/helpdesk fanout, use `ops.sinks` with `createVoiceIntegrationHTTPSink(...)`, `createVoiceHelpdeskTicketSink(...)`, or `createVoiceCRMActivitySink(...)`.
|
|
338
|
+
|
|
339
|
+
For fast production defaults, spread `resolveVoiceOutcomeRecipe(...)` into `ops`. Built-in recipes cover `appointment-booking`, `lead-qualification`, `support-triage`, `voicemail-callback`, and `warm-transfer`; each returns task creation, SLA policies, and urgent routing rules while staying fully self-hosted.
|
|
340
|
+
|
|
341
|
+
For packaged external systems, core now also includes:
|
|
342
|
+
|
|
343
|
+
- `createVoiceZendeskTicketSink(...)`
|
|
344
|
+
- `createVoiceZendeskTicketUpdateSink(...)`
|
|
345
|
+
- `createVoiceZendeskTicketSyncSinks(...)`
|
|
346
|
+
- `createVoiceHubSpotTaskSink(...)`
|
|
347
|
+
- `createVoiceHubSpotTaskUpdateSink(...)`
|
|
348
|
+
- `createVoiceHubSpotTaskSyncSinks(...)`
|
|
349
|
+
- `createVoiceLinearIssueSink(...)`
|
|
350
|
+
- `createVoiceLinearIssueUpdateSink(...)`
|
|
351
|
+
- `createVoiceLinearIssueSyncSinks(...)`
|
|
352
|
+
|
|
353
|
+
Those adapters stick to the documented-safe request shapes:
|
|
354
|
+
|
|
355
|
+
- Zendesk: `POST /api/v2/tickets`
|
|
356
|
+
- Zendesk updates: `PUT /api/v2/tickets/{ticketId}`
|
|
357
|
+
- HubSpot: `POST /crm/v3/objects/tasks`
|
|
358
|
+
- HubSpot updates: `PATCH /crm/v3/objects/tasks/{taskId}`
|
|
359
|
+
- Linear: `issueCreate` over `https://api.linear.app/graphql`
|
|
360
|
+
- Linear updates: `issueUpdate` over `https://api.linear.app/graphql`
|
|
361
|
+
|
|
362
|
+
Create sinks can persist vendor object ids into `runtimeStorage.externalObjects` when you pass `externalObjects` to the adapter. Update sinks first check explicit event payload ids like `zendeskTicketId`, `hubspotTaskId`, or `linearIssueId`, then resolver callbacks like `ticketId`, `taskId`, or `issueId`, then the external object map. If no external id can be resolved, the sink records a skipped delivery instead of accidentally treating an internal AbsoluteJS task id as a vendor object id.
|
|
363
|
+
|
|
364
|
+
Use the `*SyncSinks(...)` helpers when you want create/update parity without hand-wiring two adapters. They return a pair of sinks: a create sink for creation events and an update sink for `task.updated` / `task.sla_breached`, sharing the same credentials, fetch options, and `externalObjects` mapping store.
|
|
365
|
+
|
|
366
|
+
If you want durable non-file runtime storage under Bun, use `createVoiceSQLiteRuntimeStorage(...)` with the same `ops` shape:
|
|
367
|
+
|
|
368
|
+
```ts
|
|
369
|
+
import { createVoiceSQLiteRuntimeStorage, voice } from '@absolutejs/voice';
|
|
370
|
+
|
|
371
|
+
const runtimeStorage = createVoiceSQLiteRuntimeStorage({
|
|
372
|
+
path: '.voice-runtime/support.sqlite'
|
|
373
|
+
});
|
|
374
|
+
```
|
|
375
|
+
|
|
376
|
+
This uses Bun's native `bun:sqlite` driver directly.
|
|
377
|
+
|
|
378
|
+
If you want production-friendly shared storage, use `createVoicePostgresRuntimeStorage(...)`:
|
|
379
|
+
|
|
380
|
+
```ts
|
|
381
|
+
import { createVoicePostgresRuntimeStorage, voice } from '@absolutejs/voice';
|
|
382
|
+
|
|
383
|
+
const runtimeStorage = createVoicePostgresRuntimeStorage({
|
|
384
|
+
connectionString: process.env.DATABASE_URL!,
|
|
385
|
+
schemaName: 'voice_ops',
|
|
386
|
+
tablePrefix: 'support'
|
|
387
|
+
});
|
|
388
|
+
```
|
|
389
|
+
|
|
390
|
+
This uses Bun's native `Bun.SQL` client for PostgreSQL.
|
|
391
|
+
|
|
392
|
+
File, SQLite, and Postgres runtime storage expose the same core surfaces: `session`, `reviews`, `tasks`, `events`, and `externalObjects`. Vendor create/update sink mapping works the same way across local demos and production deployments.
|
|
393
|
+
|
|
394
|
+
If you need worker coordination for follow-up tasks, use Bun's native Redis client through `createVoiceRedisTaskLeaseCoordinator(...)`:
|
|
395
|
+
|
|
396
|
+
```ts
|
|
397
|
+
import { createVoiceRedisTaskLeaseCoordinator } from '@absolutejs/voice';
|
|
398
|
+
|
|
399
|
+
const leases = createVoiceRedisTaskLeaseCoordinator({
|
|
400
|
+
url: process.env.REDIS_URL,
|
|
401
|
+
keyPrefix: 'voice:ops'
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
const claimed = await leases.claim({
|
|
405
|
+
taskId: 'task-123',
|
|
406
|
+
workerId: 'worker-a',
|
|
407
|
+
leaseMs: 30_000
|
|
408
|
+
});
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
For durable redelivery and idempotent event processing, combine that with `createVoiceRedisIdempotencyStore(...)` and `createVoiceWebhookDeliveryWorker(...)`.
|
|
412
|
+
|
|
413
|
+
If you want a long-running worker loop, use `createVoiceWebhookDeliveryWorkerLoop(...)` and attach a dead-letter store for repeatedly failing events.
|
|
414
|
+
|
|
415
|
+
If you need operator task workers in core, use `createVoiceOpsTaskWorker(...)` for lease-backed claim/heartbeat/complete/requeue flows, or `createVoiceOpsTaskProcessorWorker(...)` when you want a handler-driven queue that records failures, requeues retries, and dead-letters tasks after repeated errors.
|
|
416
|
+
|
|
417
|
+
For task queue observability, use `summarizeVoiceOpsTaskQueue(...)` to report claimed/unclaimed counts, retry-eligible tasks, overdue work, assignee/claim ownership, and dead-letter totals from the same persisted task stores.
|
|
418
|
+
|
|
419
|
+
If you want assignee and worker throughput metrics directly from stored task history, use `summarizeVoiceOpsTaskAnalytics(...)`. It derives:
|
|
420
|
+
|
|
421
|
+
- aging buckets (`fresh`, `aging`, `due-soon`, `overdue`, `stale`)
|
|
422
|
+
- assignee backlog and average completion time
|
|
423
|
+
- worker claim / heartbeat / failure / completion counts
|
|
424
|
+
- total overdue and completed workload
|
|
425
|
+
|
|
426
|
+
If you want outcome-driven SLAs in core, set `ops.taskPolicies` or `ops.resolveTaskPolicy(...)`. Tasks can now carry:
|
|
427
|
+
|
|
428
|
+
- `priority`
|
|
429
|
+
- `dueAt`
|
|
430
|
+
- `policyName`
|
|
431
|
+
- `processingAttempts`
|
|
432
|
+
- `processingError`
|
|
433
|
+
- `deadLetteredAt`
|
|
434
|
+
|
|
435
|
+
The built-in default policies already bias toward real ops behavior:
|
|
436
|
+
|
|
437
|
+
- `escalated` -> urgent, short SLA
|
|
438
|
+
- `failed` -> high priority review
|
|
439
|
+
- `voicemail` -> callback SLA
|
|
440
|
+
- `no-answer` -> retry SLA
|
|
441
|
+
- `transferred` -> verification SLA
|
|
442
|
+
|
|
443
|
+
Policies can also set:
|
|
444
|
+
|
|
445
|
+
- `assignee`
|
|
446
|
+
- `queue`
|
|
447
|
+
- `priority`
|
|
448
|
+
- `dueInMs`
|
|
449
|
+
- `recommendedAction`
|
|
450
|
+
|
|
451
|
+
If you need routing beyond static outcome policies, use `ops.taskAssignmentRules` or `ops.resolveTaskAssignment(...)`. Assignment rules run after task policy resolution, so you can do things like:
|
|
452
|
+
|
|
453
|
+
- route urgent tasks to an on-call queue
|
|
454
|
+
- move high-priority callbacks into a fast-lane pool
|
|
455
|
+
- escalate specific policy lanes to supervisor ownership
|
|
456
|
+
|
|
457
|
+
If you want SLA follow-up automation in core, use `createVoiceOpsRuntime(...).checkSLA()` or configure `sla.followUpTask` on the runtime. Overdue tasks can now:
|
|
458
|
+
|
|
459
|
+
- be marked once with `slaBreachedAt`
|
|
460
|
+
- emit a portable `task.sla_breached` integration event
|
|
461
|
+
- create a secondary follow-up task for supervisors or escalation queues
|
|
462
|
+
|
|
463
|
+
If you want one higher-level core surface instead of wiring review recording, webhook workers, task processors, and queue summaries by hand, use `createVoiceOpsRuntime(...)`:
|
|
464
|
+
|
|
465
|
+
```ts
|
|
466
|
+
import {
|
|
467
|
+
createVoiceCRMActivitySink,
|
|
468
|
+
createVoiceFileRuntimeStorage,
|
|
469
|
+
createVoiceHelpdeskTicketSink,
|
|
470
|
+
createVoiceOpsRuntime,
|
|
471
|
+
createVoiceRedisTaskLeaseCoordinator,
|
|
472
|
+
voice
|
|
473
|
+
} from '@absolutejs/voice';
|
|
474
|
+
|
|
475
|
+
const runtimeStorage = createVoiceFileRuntimeStorage({
|
|
476
|
+
dir: '.voice-runtime/support'
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
const ops = {
|
|
480
|
+
reviews: runtimeStorage.reviews,
|
|
481
|
+
tasks: runtimeStorage.tasks,
|
|
482
|
+
events: runtimeStorage.events,
|
|
483
|
+
sinks: [
|
|
484
|
+
createVoiceHelpdeskTicketSink({
|
|
485
|
+
id: 'helpdesk',
|
|
486
|
+
url: process.env.HELPDESK_SYNC_URL!
|
|
487
|
+
}),
|
|
488
|
+
createVoiceCRMActivitySink({
|
|
489
|
+
id: 'crm',
|
|
490
|
+
url: process.env.CRM_SYNC_URL!
|
|
491
|
+
})
|
|
492
|
+
]
|
|
493
|
+
} as const;
|
|
494
|
+
|
|
495
|
+
const opsRuntime = createVoiceOpsRuntime({
|
|
496
|
+
ops,
|
|
497
|
+
sinks: {
|
|
498
|
+
autoStart: true,
|
|
499
|
+
leases: createVoiceRedisTaskLeaseCoordinator({
|
|
500
|
+
url: process.env.REDIS_URL,
|
|
501
|
+
keyPrefix: 'voice:ops:sinks'
|
|
502
|
+
}),
|
|
503
|
+
maxFailures: 3,
|
|
504
|
+
workerId: 'ops-sink-worker'
|
|
505
|
+
},
|
|
506
|
+
tasks: {
|
|
507
|
+
autoStart: true,
|
|
508
|
+
leases: createVoiceRedisTaskLeaseCoordinator({
|
|
509
|
+
url: process.env.REDIS_URL,
|
|
510
|
+
keyPrefix: 'voice:ops:tasks'
|
|
511
|
+
}),
|
|
512
|
+
maxFailures: 3,
|
|
513
|
+
process: async (task) => {
|
|
514
|
+
if (task.kind === 'callback') {
|
|
515
|
+
// hand off to CRM / dialer / queue
|
|
516
|
+
return { action: 'complete' };
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
return { action: 'requeue', detail: 'Waiting for a downstream system.' };
|
|
520
|
+
},
|
|
521
|
+
workerId: 'ops-task-worker'
|
|
522
|
+
},
|
|
523
|
+
webhooks: {
|
|
524
|
+
autoStart: true,
|
|
525
|
+
leases: createVoiceRedisTaskLeaseCoordinator({
|
|
526
|
+
url: process.env.REDIS_URL,
|
|
527
|
+
keyPrefix: 'voice:ops:events'
|
|
528
|
+
}),
|
|
529
|
+
retries: 2,
|
|
530
|
+
signingSecret: process.env.VOICE_OPS_WEBHOOK_SECRET,
|
|
531
|
+
url: process.env.VOICE_OPS_WEBHOOK_URL!,
|
|
532
|
+
workerId: 'ops-webhook-worker'
|
|
533
|
+
}
|
|
534
|
+
});
|
|
535
|
+
|
|
536
|
+
opsRuntime.start();
|
|
537
|
+
|
|
538
|
+
app.use(
|
|
539
|
+
voice({
|
|
540
|
+
path: '/voice',
|
|
541
|
+
ops
|
|
542
|
+
})
|
|
543
|
+
);
|
|
544
|
+
```
|
|
545
|
+
|
|
546
|
+
That gives you:
|
|
547
|
+
|
|
548
|
+
- one portable `ops` config for review/task/event recording
|
|
549
|
+
- built-in sink fanout plus sink redelivery workers
|
|
550
|
+
- built-in webhook delivery workers
|
|
551
|
+
- built-in task processor workers
|
|
552
|
+
- unified `tick()`, `start()`, `stop()`, and `summarize()` controls
|
|
553
|
+
- one queue/runtime surface to test and operate
|
|
554
|
+
|
|
555
|
+
If you want opinionated queue routing without handcrafting every assignee/queue/SLA rule, start from `resolveVoiceOpsPreset(...)` and spread the result into your ops runtime:
|
|
556
|
+
|
|
557
|
+
```ts
|
|
558
|
+
import { resolveVoiceOpsPreset } from '@absolutejs/voice';
|
|
559
|
+
|
|
560
|
+
const opsPreset = resolveVoiceOpsPreset('support-default');
|
|
561
|
+
|
|
562
|
+
const opsRuntime = createVoiceOpsRuntime({
|
|
563
|
+
ops: {
|
|
564
|
+
reviews: runtimeStorage.reviews,
|
|
565
|
+
tasks: runtimeStorage.tasks,
|
|
566
|
+
events: runtimeStorage.events,
|
|
567
|
+
taskPolicies: opsPreset.taskPolicies
|
|
568
|
+
},
|
|
569
|
+
sla: opsPreset.sla
|
|
570
|
+
});
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
Built-in presets:
|
|
574
|
+
|
|
575
|
+
- `support-default`
|
|
576
|
+
- `sales-default`
|
|
577
|
+
- `collections-default`
|
|
578
|
+
|
|
579
|
+
Those presets include both:
|
|
580
|
+
|
|
581
|
+
- `taskPolicies`
|
|
582
|
+
- `assignmentRules`
|
|
583
|
+
|
|
584
|
+
If you want larger review artifacts in object storage instead of a local or SQL store, use Bun's native S3 client through `createVoiceS3ReviewStore(...)`.
|
|
585
|
+
|
|
586
|
+
## Production Checklist
|
|
587
|
+
|
|
588
|
+
Use this as the default deployment checklist for a real voice app:
|
|
589
|
+
|
|
590
|
+
- Storage:
|
|
591
|
+
use a shared session store for `session`
|
|
592
|
+
- Runtime ops:
|
|
593
|
+
enable `ops.reviews`, `ops.tasks`, and `ops.events`
|
|
594
|
+
- Review path:
|
|
595
|
+
make stored review artifacts visible somewhere operators can inspect quickly
|
|
596
|
+
- Task path:
|
|
597
|
+
turn non-happy outcomes like `transferred`, `escalated`, `voicemail`, `no-answer`, and `failed` into follow-up work
|
|
598
|
+
- Task policy:
|
|
599
|
+
set `ops.taskPolicies` or `ops.resolveTaskPolicy(...)` so follow-up work gets real priorities and deadlines instead of ad hoc app rules
|
|
600
|
+
- Worker path:
|
|
601
|
+
run Redis-leased task workers for follow-up ops and keep dead-letter queues for tasks that repeatedly fail downstream processing
|
|
602
|
+
- Event path:
|
|
603
|
+
persist `ops.events`, enable `ops.webhook` for outbound delivery, and reserve `ops.onEvent(...)` for app-local side effects
|
|
604
|
+
- STT:
|
|
605
|
+
use the adapter/model pair you have actually benchmarked for the channel you are shipping
|
|
606
|
+
- PSTN:
|
|
607
|
+
prefer the telephony path you have validated live, and keep channel-specific settings in presets instead of ad hoc script overrides
|
|
608
|
+
- Correction:
|
|
609
|
+
keep correction deterministic and domain-safe; do not ship benchmark-shaped seeded aliases as your default public path
|
|
610
|
+
- Observability:
|
|
611
|
+
capture first partial, first commit, first outbound audio, barge-in stop, disposition, and per-turn errors
|
|
612
|
+
- QA:
|
|
613
|
+
run repeated live benchmarks for the channel you care about, not just single-pass smoke checks
|
|
614
|
+
|
|
615
|
+
For the local file-backed starter path, the minimum production-shaped stack is:
|
|
616
|
+
|
|
617
|
+
- `createVoiceFileRuntimeStorage(...)`
|
|
618
|
+
- `voice({ session: runtimeStorage.session, ops: { reviews, tasks, events } })`
|
|
619
|
+
- one review UI
|
|
620
|
+
- one task queue UI
|
|
621
|
+
- one integration-event sink
|
|
622
|
+
|
|
623
|
+
## TTS
|
|
624
|
+
|
|
625
|
+
`@absolutejs/voice` now supports optional assistant audio streaming on the same session path. If you provide a `tts` adapter, `assistantText` responses are still sent as text, and the synthesized PCM chunks are streamed as `audio` messages alongside them.
|
|
626
|
+
|
|
627
|
+
```ts
|
|
628
|
+
import { voice, createVoiceMemoryStore } from '@absolutejs/voice';
|
|
629
|
+
import { deepgram } from '@absolutejs/voice-deepgram';
|
|
630
|
+
import { elevenlabs } from '@absolutejs/voice-elevenlabs';
|
|
631
|
+
|
|
632
|
+
app.use(
|
|
633
|
+
voice({
|
|
634
|
+
path: '/voice',
|
|
635
|
+
session: createVoiceMemoryStore(),
|
|
636
|
+
stt: deepgram({
|
|
637
|
+
apiKey: process.env.DEEPGRAM_API_KEY!,
|
|
638
|
+
model: 'flux-general-en'
|
|
639
|
+
}),
|
|
640
|
+
tts: elevenlabs({
|
|
641
|
+
apiKey: process.env.ELEVENLABS_API_KEY!,
|
|
642
|
+
voiceId: process.env.ELEVENLABS_VOICE_ID!
|
|
643
|
+
}),
|
|
644
|
+
onTurn: async ({ turn }) => ({
|
|
645
|
+
assistantText: `You said: ${turn.text}`
|
|
646
|
+
}),
|
|
647
|
+
onComplete: async () => {}
|
|
648
|
+
})
|
|
649
|
+
);
|
|
650
|
+
```
|
|
651
|
+
|
|
652
|
+
Client state now exposes `assistantAudio` on the stream/controller helpers, so apps can buffer or play synthesized chunks without inventing a second transport.
|
|
653
|
+
|
|
654
|
+
If you want a minimal browser playback path, use the client audio player:
|
|
655
|
+
|
|
656
|
+
```ts
|
|
657
|
+
import {
|
|
658
|
+
createVoiceAudioPlayer,
|
|
659
|
+
createVoiceController
|
|
660
|
+
} from '@absolutejs/voice/client';
|
|
661
|
+
|
|
662
|
+
const voice = createVoiceController('/voice', {
|
|
663
|
+
preset: 'chat'
|
|
664
|
+
});
|
|
665
|
+
const player = createVoiceAudioPlayer(voice);
|
|
666
|
+
|
|
667
|
+
await player.start(); // call from a user gesture
|
|
668
|
+
await player.interrupt(); // flush queued assistant playback for barge-in
|
|
669
|
+
```
|
|
670
|
+
|
|
671
|
+
`createVoiceAudioPlayer()` subscribes to `assistantAudio`, decodes raw `pcm_s16le` chunks, and queues them in WebAudio. It also exposes `interrupt()`, `lastInterruptLatencyMs`, and `lastPlaybackStopLatencyMs` so apps can flush assistant playback during barge-in and inspect how long it took for queued playback to fully stop.
|
|
672
|
+
|
|
673
|
+
For a higher-level client path, use the duplex helper:
|
|
674
|
+
|
|
675
|
+
```ts
|
|
676
|
+
import { createVoiceDuplexController } from '@absolutejs/voice/client';
|
|
677
|
+
|
|
678
|
+
const voice = createVoiceDuplexController('/voice', {
|
|
679
|
+
bargeIn: {
|
|
680
|
+
interruptThreshold: 0.08
|
|
681
|
+
},
|
|
682
|
+
preset: 'chat'
|
|
683
|
+
});
|
|
684
|
+
|
|
685
|
+
await voice.audioPlayer.start();
|
|
686
|
+
await voice.startRecording();
|
|
687
|
+
```
|
|
688
|
+
|
|
689
|
+
`createVoiceDuplexController()` composes the controller and audio player and automatically interrupts assistant playback when:
|
|
690
|
+
|
|
691
|
+
- microphone input crosses the configured barge-in threshold
|
|
692
|
+
- partial user speech starts arriving
|
|
693
|
+
- manual `sendAudio(...)` is called while assistant audio is playing
|
|
694
|
+
|
|
695
|
+
## Duplex Benchmarks
|
|
696
|
+
|
|
697
|
+
The first duplex benchmark lane measures package-level barge-in interruption on the client path. It records scenario pass/fail plus local interruption latency for:
|
|
698
|
+
|
|
699
|
+
- manual `sendAudio(...)`
|
|
700
|
+
- partial transcript start
|
|
701
|
+
- input-level threshold crossing
|
|
702
|
+
|
|
703
|
+
Run it with:
|
|
704
|
+
|
|
705
|
+
```bash
|
|
706
|
+
bun run bench:duplex
|
|
707
|
+
```
|
|
708
|
+
|
|
709
|
+
That writes:
|
|
710
|
+
|
|
711
|
+
- `benchmark-results/duplex-barge-in.json`
|
|
712
|
+
|
|
713
|
+
## Telephony
|
|
714
|
+
|
|
715
|
+
`@absolutejs/voice` now includes a first PSTN bridge layer for Twilio Media Streams. It converts inbound `audio/x-mulaw` 8 kHz frames into the PCM format the voice session expects, and converts assistant PCM audio back into outbound Twilio media events.
|
|
716
|
+
|
|
717
|
+
Minimal usage:
|
|
718
|
+
|
|
719
|
+
```ts
|
|
720
|
+
import { createTwilioMediaStreamBridge, createTwilioVoiceResponse } from '@absolutejs/voice';
|
|
721
|
+
import { deepgram } from '@absolutejs/voice-deepgram';
|
|
722
|
+
import { elevenlabs } from '@absolutejs/voice-elevenlabs';
|
|
723
|
+
|
|
724
|
+
const twiml = createTwilioVoiceResponse({
|
|
725
|
+
streamUrl: 'wss://example.com/voice/twilio',
|
|
726
|
+
parameters: {
|
|
727
|
+
sessionId: 'call-123',
|
|
728
|
+
scenarioId: 'phone-intake'
|
|
729
|
+
},
|
|
730
|
+
track: 'both_tracks'
|
|
731
|
+
});
|
|
732
|
+
|
|
733
|
+
const bridge = createTwilioMediaStreamBridge(twilioSocket, {
|
|
734
|
+
context: {},
|
|
735
|
+
onComplete: async () => {},
|
|
736
|
+
onTurn: async ({ turn }) => ({
|
|
737
|
+
assistantText: `You said: ${turn.text}`
|
|
738
|
+
}),
|
|
739
|
+
session: createVoiceMemoryStore(),
|
|
740
|
+
stt: deepgram({
|
|
741
|
+
apiKey: process.env.DEEPGRAM_API_KEY!,
|
|
742
|
+
model: 'flux-general-en'
|
|
743
|
+
}),
|
|
744
|
+
tts: elevenlabs({
|
|
745
|
+
apiKey: process.env.ELEVENLABS_API_KEY!,
|
|
746
|
+
voiceId: process.env.ELEVENLABS_VOICE_ID!
|
|
747
|
+
})
|
|
748
|
+
});
|
|
749
|
+
|
|
750
|
+
await bridge.handleMessage(startMessageFromTwilio);
|
|
751
|
+
await bridge.handleMessage(mediaMessageFromTwilio);
|
|
752
|
+
```
|
|
753
|
+
|
|
754
|
+
The bridge also sends Twilio `clear` events on new inbound media after assistant audio has started streaming, so telephony barge-in can stop queued outbound playback.
|
|
755
|
+
|
|
756
|
+
You can benchmark the package-level Twilio bridge path with:
|
|
757
|
+
|
|
758
|
+
```bash
|
|
759
|
+
bun run bench:telephony:run
|
|
760
|
+
```
|
|
761
|
+
|
|
762
|
+
That writes:
|
|
763
|
+
- `benchmark-results/telephony-twilio-bridge.json`
|
|
764
|
+
- `benchmark-results/telephony-run-manifest.json`
|
|
765
|
+
|
|
766
|
+
For a live vendor-backed duplex smoke benchmark on the real TTS adapters, run:
|
|
767
|
+
|
|
768
|
+
```bash
|
|
769
|
+
bun run bench:duplex:live:run
|
|
770
|
+
```
|
|
771
|
+
|
|
772
|
+
That writes fresh results to:
|
|
773
|
+
|
|
774
|
+
For a live vendor-backed telephony smoke benchmark through the Twilio bridge path, run:
|
|
775
|
+
|
|
776
|
+
```bash
|
|
777
|
+
bun run bench:telephony:live:run
|
|
778
|
+
```
|
|
779
|
+
|
|
780
|
+
That writes:
|
|
781
|
+
- `benchmark-results/telephony-live-deepgram-elevenlabs.json`
|
|
782
|
+
- `benchmark-results/telephony-live-run-manifest.json`
|
|
783
|
+
|
|
784
|
+
For a repeated live telephony stability read, run:
|
|
785
|
+
|
|
786
|
+
```bash
|
|
787
|
+
bun run bench:telephony:live:series
|
|
788
|
+
```
|
|
789
|
+
|
|
790
|
+
That writes:
|
|
791
|
+
- `benchmark-results/telephony-live-series-summary-runs-3.json`
|
|
792
|
+
|
|
793
|
+
For a live Deepgram telephony model shootout on the same PSTN path, run:
|
|
794
|
+
|
|
795
|
+
```bash
|
|
796
|
+
bun run bench:telephony:live:shootout
|
|
797
|
+
```
|
|
798
|
+
|
|
799
|
+
That writes:
|
|
800
|
+
- `benchmark-results/telephony-live-flux-general-en.json`
|
|
801
|
+
- `benchmark-results/telephony-live-nova-3-phone.json`
|
|
802
|
+
- `benchmark-results/telephony-live-shootout-manifest.json`
|
|
803
|
+
|
|
804
|
+
- `benchmark-results/duplex-live-elevenlabs.json`
|
|
805
|
+
- `benchmark-results/duplex-live-openai.json`
|
|
806
|
+
- `benchmark-results/duplex-live-all.json`
|
|
807
|
+
- `benchmark-results/duplex-live-run-manifest.json`
|
|
808
|
+
|
|
809
|
+
For a browser-run duplex benchmark that uses a real headless Chrome `AudioContext` instead of the fake Node-side playback context, run:
|
|
810
|
+
|
|
811
|
+
```bash
|
|
812
|
+
bun run bench:duplex:browser:run
|
|
813
|
+
```
|
|
814
|
+
|
|
815
|
+
That writes fresh results to:
|
|
816
|
+
|
|
817
|
+
- `benchmark-results/duplex-browser-elevenlabs.json`
|
|
818
|
+
- `benchmark-results/duplex-browser-openai.json`
|
|
819
|
+
- `benchmark-results/duplex-browser-all.json`
|
|
820
|
+
- `benchmark-results/duplex-browser-run-manifest.json`
|
|
821
|
+
|
|
822
|
+
To measure browser duplex stability across repeated runs, use:
|
|
823
|
+
|
|
824
|
+
```bash
|
|
825
|
+
bun run bench:duplex:browser:series
|
|
826
|
+
```
|
|
827
|
+
|
|
828
|
+
That writes:
|
|
829
|
+
|
|
830
|
+
- `benchmark-results/duplex-browser-series-summary-runs-3.json`
|
|
831
|
+
- per-run provider artifacts like `benchmark-results/duplex-browser-elevenlabs-series-run-1.json`
|
|
832
|
+
|
|
833
|
+
For repeated interrupt-and-resume across several consecutive assistant turns, run:
|
|
834
|
+
|
|
835
|
+
```bash
|
|
836
|
+
bun run bench:duplex:browser:overlap:run
|
|
837
|
+
```
|
|
838
|
+
|
|
839
|
+
That writes:
|
|
840
|
+
|
|
841
|
+
- `benchmark-results/duplex-browser-overlap-elevenlabs.json`
|
|
842
|
+
- `benchmark-results/duplex-browser-overlap-openai.json`
|
|
843
|
+
- `benchmark-results/duplex-browser-overlap-all.json`
|
|
844
|
+
- `benchmark-results/duplex-browser-overlap-run-manifest.json`
|
|
845
|
+
|
|
846
|
+
To measure overlap stability across repeated live browser runs, use:
|
|
847
|
+
|
|
848
|
+
```bash
|
|
849
|
+
bun run bench:duplex:browser:overlap:series
|
|
850
|
+
```
|
|
851
|
+
|
|
852
|
+
That writes:
|
|
853
|
+
|
|
854
|
+
- `benchmark-results/duplex-browser-overlap-series-summary-runs-3.json`
|
|
855
|
+
- per-run provider artifacts like `benchmark-results/duplex-browser-overlap-elevenlabs-series-run-1.json`
|
|
856
|
+
|
|
857
|
+
## TTS Benchmarks
|
|
858
|
+
|
|
859
|
+
`@absolutejs/voice` now includes a first TTS benchmark harness for streaming output adapters. The initial metrics are:
|
|
860
|
+
|
|
861
|
+
- `firstAudioLatencyMs`
|
|
862
|
+
- `elapsedMs`
|
|
863
|
+
- `audioChunkCount`
|
|
864
|
+
- `totalAudioBytes`
|
|
865
|
+
- estimated PCM `audioDurationMs`
|
|
866
|
+
- interruption responsiveness via `interruptionLatencyMs`
|
|
867
|
+
|
|
868
|
+
Run the full TTS suite with one command:
|
|
869
|
+
|
|
870
|
+
```bash
|
|
871
|
+
bun run bench:tts:run
|
|
872
|
+
```
|
|
873
|
+
|
|
874
|
+
That writes fresh results to:
|
|
875
|
+
|
|
876
|
+
- `benchmark-results/tts-all.json`
|
|
877
|
+
- `benchmark-results/tts-elevenlabs.json`
|
|
878
|
+
- `benchmark-results/tts-openai.json`
|
|
879
|
+
- `benchmark-results/tts-run-manifest.json`
|
|
880
|
+
|
|
881
|
+
To measure interruption/cancel responsiveness separately:
|
|
882
|
+
|
|
883
|
+
```bash
|
|
884
|
+
bun run bench:tts:interrupt:run
|
|
885
|
+
```
|
|
886
|
+
|
|
887
|
+
That writes fresh interruption results to:
|
|
888
|
+
|
|
889
|
+
- `benchmark-results/tts-all-interrupt.json`
|
|
890
|
+
- `benchmark-results/tts-elevenlabs-interrupt.json`
|
|
891
|
+
- `benchmark-results/tts-openai-interrupt.json`
|
|
892
|
+
- `benchmark-results/tts-interrupt-run-manifest.json`
|
|
893
|
+
|
|
69
894
|
## Recommended Production Path
|
|
70
895
|
|
|
71
896
|
The current best-performing path in the bundled benchmarks is:
|
|
72
897
|
|
|
73
898
|
- `deepgram-flux` as primary STT
|
|
899
|
+
- route-level `lexicon` for pronunciation/domain entries
|
|
74
900
|
- route-level `phraseHints`
|
|
75
901
|
- route-level `correctTurn` using `createPhraseHintCorrectionHandler()`
|
|
76
902
|
|
|
@@ -80,7 +906,9 @@ Minimal production-oriented example:
|
|
|
80
906
|
|
|
81
907
|
```ts
|
|
82
908
|
import {
|
|
909
|
+
createVoiceSTTRoutingCorrectionHandler,
|
|
83
910
|
createPhraseHintCorrectionHandler,
|
|
911
|
+
resolveVoiceSTTRoutingStrategy,
|
|
84
912
|
voice
|
|
85
913
|
} from '@absolutejs/voice';
|
|
86
914
|
import { deepgram } from '@absolutejs/voice-deepgram';
|
|
@@ -89,6 +917,13 @@ app.use(
|
|
|
89
917
|
voice({
|
|
90
918
|
path: '/voice/intake',
|
|
91
919
|
preset: 'reliability',
|
|
920
|
+
lexicon: [
|
|
921
|
+
{
|
|
922
|
+
text: 'AbsoluteJS',
|
|
923
|
+
aliases: ['absoloot js'],
|
|
924
|
+
pronunciation: 'ab-so-lute jay ess'
|
|
925
|
+
}
|
|
926
|
+
],
|
|
92
927
|
phraseHints: [
|
|
93
928
|
{ text: 'AbsoluteJS', aliases: ['absolute js'] },
|
|
94
929
|
{ text: 'Joe Johnston', aliases: ['joe johnson'] },
|
|
@@ -113,6 +948,45 @@ app.use(
|
|
|
113
948
|
|
|
114
949
|
`phraseHints` are user-controlled route config, not hidden framework magic. They are there so the app can teach the voice route its domain vocabulary.
|
|
115
950
|
|
|
951
|
+
## Best Vs Cheap STT
|
|
952
|
+
|
|
953
|
+
`@absolutejs/voice` now exposes an explicit package-level routing split so apps can choose between the strongest benchmarked path and a cheaper/raw path without inventing their own policy layer.
|
|
954
|
+
|
|
955
|
+
```ts
|
|
956
|
+
import {
|
|
957
|
+
createVoiceMemoryStore,
|
|
958
|
+
createVoiceSTTRoutingCorrectionHandler,
|
|
959
|
+
resolveVoiceSTTRoutingStrategy,
|
|
960
|
+
voice
|
|
961
|
+
} from '@absolutejs/voice';
|
|
962
|
+
import { deepgram } from '@absolutejs/voice-deepgram';
|
|
963
|
+
|
|
964
|
+
const strategy = resolveVoiceSTTRoutingStrategy('best');
|
|
965
|
+
|
|
966
|
+
app.use(
|
|
967
|
+
voice({
|
|
968
|
+
path: '/voice/stt',
|
|
969
|
+
preset: strategy.preset,
|
|
970
|
+
phraseHints: [{ text: 'Joe Johnston', aliases: ['joe johnson'] }],
|
|
971
|
+
correctTurn: createVoiceSTTRoutingCorrectionHandler(strategy.correctionMode),
|
|
972
|
+
session: createVoiceMemoryStore(),
|
|
973
|
+
sttLifecycle: strategy.sttLifecycle,
|
|
974
|
+
stt: deepgram({
|
|
975
|
+
apiKey: process.env.DEEPGRAM_API_KEY!,
|
|
976
|
+
model: 'flux-general-en'
|
|
977
|
+
})
|
|
978
|
+
})
|
|
979
|
+
);
|
|
980
|
+
```
|
|
981
|
+
|
|
982
|
+
- `best` maps to the current strongest in-package path: Deepgram Flux plus generic deterministic correction.
|
|
983
|
+
- `low-cost` maps to a cheaper/raw package path: one primary STT pass with no correction hook.
|
|
984
|
+
- session benchmarks now include per-turn cost telemetry fields like `averageRelativeCostUnits`, `averagePrimaryAudioMs`, and `averageFallbackReplayAudioMs`.
|
|
985
|
+
- use `bun run bench:stt:routing:run` to benchmark both in parallel and write fresh:
|
|
986
|
+
- `benchmark-results/sessions-best-stt-runs-3.json`
|
|
987
|
+
- `benchmark-results/sessions-cheap-stt-runs-3.json`
|
|
988
|
+
- `benchmark-results/stt-routing-run-manifest.json`
|
|
989
|
+
|
|
116
990
|
## Presets
|
|
117
991
|
|
|
118
992
|
Voice now ships named runtime presets so apps can start from a useful baseline instead of hand-tuning silence and capture settings every time.
|
|
@@ -161,11 +1035,13 @@ Presets are still overridable. If you need to tune for a specific route, layer `
|
|
|
161
1035
|
Presets are not the same thing as phrase hints:
|
|
162
1036
|
|
|
163
1037
|
- presets tune framework-owned behavior like silence windows, reconnect defaults, and audio conditioning
|
|
1038
|
+
- `lexicon` tunes pronunciation-aware domain entries that should reach STT/TTS adapters directly
|
|
164
1039
|
- `phraseHints` tune app/domain vocabulary like company names, product names, legal phrases, or subscriber-specific jargon
|
|
165
1040
|
|
|
166
1041
|
In practice:
|
|
167
1042
|
|
|
168
1043
|
- use a preset to choose the runtime shape (`guided-intake`, `reliability`, `noisy-room`)
|
|
1044
|
+
- use `lexicon` when pronunciation matters and you want adapter-consumable entries
|
|
169
1045
|
- use `phraseHints` to teach the route what words matter for your business
|
|
170
1046
|
- use `correctTurn` when you want deterministic post-STT repair before the turn is committed
|
|
171
1047
|
|
|
@@ -199,9 +1075,51 @@ The controller helpers abstract the common browser boilerplate:
|
|
|
199
1075
|
|
|
200
1076
|
They do not hide the underlying transport. You still choose the route path and preset explicitly.
|
|
201
1077
|
|
|
202
|
-
## Phrase Hints And Correction
|
|
1078
|
+
## Lexicon, Phrase Hints, And Correction
|
|
203
1079
|
|
|
204
|
-
`
|
|
1080
|
+
`lexicon` is a route-level input for pronunciation-aware domain entries.
|
|
1081
|
+
|
|
1082
|
+
It can be:
|
|
1083
|
+
|
|
1084
|
+
- a static array for known names, products, and jargon
|
|
1085
|
+
- a resolver function when entries depend on the tenant, subscriber, or scenario
|
|
1086
|
+
|
|
1087
|
+
```ts
|
|
1088
|
+
voice({
|
|
1089
|
+
path: '/voice/intake',
|
|
1090
|
+
lexicon: async ({ context }) => {
|
|
1091
|
+
return [
|
|
1092
|
+
{
|
|
1093
|
+
text: 'AbsoluteJS',
|
|
1094
|
+
aliases: ['absoloot js'],
|
|
1095
|
+
pronunciation: 'ab-so-lute jay ess'
|
|
1096
|
+
},
|
|
1097
|
+
{
|
|
1098
|
+
text: 'Eden Treaty',
|
|
1099
|
+
aliases: ['eden tree tea'],
|
|
1100
|
+
pronunciation: 'ee-den tree-tee'
|
|
1101
|
+
}
|
|
1102
|
+
];
|
|
1103
|
+
},
|
|
1104
|
+
session: createVoiceMemoryStore(),
|
|
1105
|
+
stt: deepgram({
|
|
1106
|
+
apiKey: process.env.DEEPGRAM_API_KEY!,
|
|
1107
|
+
model: 'flux-general-en'
|
|
1108
|
+
}),
|
|
1109
|
+
onTurn: async ({ turn }) => ({
|
|
1110
|
+
assistantText: turn.text
|
|
1111
|
+
}),
|
|
1112
|
+
onComplete: async () => {}
|
|
1113
|
+
});
|
|
1114
|
+
```
|
|
1115
|
+
|
|
1116
|
+
How the package uses it:
|
|
1117
|
+
|
|
1118
|
+
- adapters receive `lexicon` at open time and translate it into vendor-native hinting surfaces when possible
|
|
1119
|
+
- STT adapters can use the canonical text plus aliases to bias recognition
|
|
1120
|
+
- future TTS adapters can use the same entries for pronunciation-aware speech output
|
|
1121
|
+
|
|
1122
|
+
`phraseHints` are a separate route-level input that the application owns.
|
|
205
1123
|
|
|
206
1124
|
They can be:
|
|
207
1125
|
|
|
@@ -234,6 +1152,7 @@ voice({
|
|
|
234
1152
|
|
|
235
1153
|
How the package uses them:
|
|
236
1154
|
|
|
1155
|
+
- adapters receive `lexicon` and `phraseHints` at open time
|
|
237
1156
|
- adapters receive `phraseHints` at open time and can translate them into vendor-native hinting surfaces
|
|
238
1157
|
- the correction layer can use the same hints after STT to repair domain terms before commit
|
|
239
1158
|
|
|
@@ -361,6 +1280,11 @@ Use profiles to focus where you want to win:
|
|
|
361
1280
|
|
|
362
1281
|
- `bun run bench:vs all` (default)
|
|
363
1282
|
- `bun run bench:vs all accents`
|
|
1283
|
+
- `bun run bench:vs all code-switch`
|
|
1284
|
+
- `bun run bench:vs all jargon`
|
|
1285
|
+
- `bun run bench:vs all multilingual`
|
|
1286
|
+
- `bun run bench:vs all multi-speaker`
|
|
1287
|
+
- `bun run bench:vs all telephony`
|
|
364
1288
|
- `bun run bench:vs all clean`
|
|
365
1289
|
- `bun run bench:vs all noisy`
|
|
366
1290
|
- `bun run bench:vs deepgram accents`
|
|
@@ -387,6 +1311,21 @@ DEEPGRAM_MODEL=flux-general-en bun run bench:deepgram:accents
|
|
|
387
1311
|
DEEPGRAM_MODEL=nova-3 bun run bench:deepgram:accents
|
|
388
1312
|
```
|
|
389
1313
|
|
|
1314
|
+
To stress the STT path with synthesized narrowband phone audio:
|
|
1315
|
+
|
|
1316
|
+
```bash
|
|
1317
|
+
bun run bench:telephony
|
|
1318
|
+
bun run bench:telephony:run
|
|
1319
|
+
bun run bench:deepgram:telephony
|
|
1320
|
+
bun run bench:deepgram:corrected:telephony
|
|
1321
|
+
bun run bench:jargon
|
|
1322
|
+
bun run bench:deepgram:jargon
|
|
1323
|
+
bun run bench:deepgram:corrected:audit:jargon
|
|
1324
|
+
bun run bench:multi-speaker:run
|
|
1325
|
+
bun run bench:multi-speaker:analyze
|
|
1326
|
+
bun run bench:deepgram:multi-speaker
|
|
1327
|
+
```
|
|
1328
|
+
|
|
390
1329
|
To compare against Vapi or other providers, provide a baseline JSON file:
|
|
391
1330
|
|
|
392
1331
|
```bash
|
|
@@ -427,20 +1366,31 @@ The harness prints:
|
|
|
427
1366
|
- pass rate and recall deltas per adapter
|
|
428
1367
|
- weighted scorecard (`passRate`, term recall, word accuracy)
|
|
429
1368
|
- optional competitor deltas (Vapi)
|
|
1369
|
+
- a markdown report beside the JSON output, for example:
|
|
1370
|
+
- `benchmark-results/vs-all-telephony.json`
|
|
1371
|
+
- `benchmark-results/vs-all-telephony.md`
|
|
430
1372
|
|
|
431
1373
|
For package-level multi-turn behavior, use the session benchmark harness instead of raw STT-only benchmarking:
|
|
432
1374
|
|
|
433
1375
|
```bash
|
|
434
1376
|
bun run bench:sessions
|
|
435
1377
|
bun run bench:deepgram:sessions
|
|
1378
|
+
bun run bench:deepgram:soak:sessions
|
|
436
1379
|
bun run bench:deepgram:hybrid:sessions
|
|
437
1380
|
bun run bench:deepgram:corrected:sessions
|
|
1381
|
+
bun run bench:deepgram:corrected:soak:sessions
|
|
1382
|
+
bun run bench:stt:routing:run
|
|
438
1383
|
bun run bench:assemblyai:sessions
|
|
439
1384
|
bun run bench:openai:sessions
|
|
1385
|
+
bun run bench:soak:run
|
|
440
1386
|
```
|
|
441
1387
|
|
|
442
1388
|
That harness runs the adapter through `VoiceSession` itself, so the output reflects reconnect handling, turn commit stability, and duplicate-turn protection rather than only raw transcript quality.
|
|
443
1389
|
|
|
1390
|
+
`bench:soak:run` is the STT-5 runner. It executes the long-session soak lane for raw Deepgram Flux, corrected Deepgram, and the reconnect resilience suite in parallel, then writes fresh JSON into `benchmark-results/` without the runs deleting each other.
|
|
1391
|
+
|
|
1392
|
+
`bench:stt:routing:run` is the STT-7 runner. It benchmarks the package’s current `best` vs `low-cost` session strategies in parallel, clears stale outputs first, and writes a manifest so the cost-aware summaries are guaranteed fresh.
|
|
1393
|
+
|
|
444
1394
|
`bench:deepgram:corrected:sessions` exercises the current recommended package-level production path:
|
|
445
1395
|
|
|
446
1396
|
- Deepgram Flux as primary STT
|
|
@@ -568,6 +1518,100 @@ Fallback triggers are evaluated at commit time:
|
|
|
568
1518
|
|
|
569
1519
|
The fallback adapter receives the same window of turn audio as the primary (default `8s`, configurable with `replayWindowMs`) and can only run `maxAttemptsPerTurn` times per turn.
|
|
570
1520
|
|
|
1521
|
+
## Benchmark Fixture Sources
|
|
1522
|
+
|
|
1523
|
+
Bundled fixtures cover the current in-repo English benchmark suite. For multilingual and code-switch evaluation, add external fixture directories and let the benchmark scripts merge them automatically.
|
|
1524
|
+
|
|
1525
|
+
The public corpus builder currently assembles:
|
|
1526
|
+
|
|
1527
|
+
- FLEURS multilingual dev clips
|
|
1528
|
+
- BSC Catalan-Spanish code-switch evaluation clips
|
|
1529
|
+
- CoSHE Hindi-English code-switch evaluation clips
|
|
1530
|
+
|
|
1531
|
+
Set either:
|
|
1532
|
+
|
|
1533
|
+
- `VOICE_FIXTURE_DIR=/abs/path/to/fixtures`
|
|
1534
|
+
- `VOICE_FIXTURE_DIRS=/abs/path/one,/abs/path/two`
|
|
1535
|
+
|
|
1536
|
+
Each fixture directory must include:
|
|
1537
|
+
|
|
1538
|
+
- `manifest.json`
|
|
1539
|
+
- `pcm/*.pcm`
|
|
1540
|
+
|
|
1541
|
+
Each manifest entry can include:
|
|
1542
|
+
|
|
1543
|
+
- `language`
|
|
1544
|
+
- `tags`
|
|
1545
|
+
Use `multilingual`, `bilingual`, or `code-switch` to route fixtures into the multilingual benchmark lane.
|
|
1546
|
+
|
|
1547
|
+
Benchmark commands:
|
|
1548
|
+
|
|
1549
|
+
```bash
|
|
1550
|
+
bun run bench:multilingual
|
|
1551
|
+
bun run bench:code-switch
|
|
1552
|
+
bun run bench:code-switch:series
|
|
1553
|
+
bun run bench:code-switch:ca-es
|
|
1554
|
+
bun run bench:code-switch:ca-es:series
|
|
1555
|
+
bun run bench:code-switch:ca-es:corts:series
|
|
1556
|
+
bun run bench:code-switch:ca-es:parlament:series
|
|
1557
|
+
bun run bench:code-switch:hi-en
|
|
1558
|
+
bun run bench:code-switch:hi-en:series
|
|
1559
|
+
bun run bench:deepgram:multilingual
|
|
1560
|
+
bun run bench:deepgram:code-switch
|
|
1561
|
+
bun run bench:deepgram:code-switch:series
|
|
1562
|
+
bun run bench:deepgram:code-switch:ca-es
|
|
1563
|
+
bun run bench:deepgram:code-switch:ca-es:series
|
|
1564
|
+
bun run bench:deepgram:code-switch:ca-es:corts:series
|
|
1565
|
+
bun run bench:deepgram:code-switch:ca-es:parlament:series
|
|
1566
|
+
bun run bench:deepgram:code-switch:ca-es:nova3-multi:series
|
|
1567
|
+
bun run bench:deepgram:code-switch:ca-es:nova3-ca:series
|
|
1568
|
+
bun run bench:deepgram:code-switch:ca-es:nova3-es:series
|
|
1569
|
+
bun run bench:deepgram:code-switch:ca-es:nova2-ca:series
|
|
1570
|
+
bun run bench:deepgram:code-switch:ca-es:nova2-es:series
|
|
1571
|
+
bun run bench:deepgram:code-switch:ca-es:best:corrected:series
|
|
1572
|
+
bun run bench:deepgram:code-switch:ca-es:parlament:debug
|
|
1573
|
+
bun run bench:deepgram:code-switch:corrected:ca-es
|
|
1574
|
+
bun run bench:deepgram:code-switch:corrected:ca-es:series
|
|
1575
|
+
bun run bench:deepgram:code-switch:corrected:ca-es:corts:series
|
|
1576
|
+
bun run bench:deepgram:code-switch:corrected:ca-es:parlament:series
|
|
1577
|
+
bun run bench:deepgram:code-switch:hi-en
|
|
1578
|
+
bun run bench:deepgram:code-switch:hi-en:series
|
|
1579
|
+
bun run bench:deepgram:code-switch:corrected:hi-en
|
|
1580
|
+
bun run bench:deepgram:code-switch:corrected:hi-en:series
|
|
1581
|
+
bun run bench:deepgram:code-switch:corrected
|
|
1582
|
+
bun run bench:deepgram:code-switch:corrected:series
|
|
1583
|
+
bun run bench:assemblyai:multilingual
|
|
1584
|
+
bun run bench:assemblyai:code-switch
|
|
1585
|
+
bun run bench:openai:multilingual
|
|
1586
|
+
bun run bench:openai:code-switch
|
|
1587
|
+
bun run bench:openai:code-switch:series
|
|
1588
|
+
bun run bench:openai:code-switch:ca-es
|
|
1589
|
+
bun run bench:openai:code-switch:ca-es:series
|
|
1590
|
+
bun run bench:openai:code-switch:corrected:ca-es
|
|
1591
|
+
bun run bench:openai:code-switch:corrected:ca-es:series
|
|
1592
|
+
bun run bench:openai:code-switch:hi-en
|
|
1593
|
+
bun run bench:openai:code-switch:hi-en:series
|
|
1594
|
+
bun run bench:openai:code-switch:corrected:hi-en
|
|
1595
|
+
bun run bench:openai:code-switch:corrected:hi-en:series
|
|
1596
|
+
bun run bench:openai:code-switch:corrected
|
|
1597
|
+
bun run bench:openai:code-switch:corrected:series
|
|
1598
|
+
```
|
|
1599
|
+
|
|
1600
|
+
Current benchmark direction:
|
|
1601
|
+
|
|
1602
|
+
- `openai` is the strongest adapter on the current public multilingual corpus
|
|
1603
|
+
- `deepgram` remains the strongest browser-English path
|
|
1604
|
+
- raw code-switch remains a weaker surface for every adapter and should be benchmarked separately with `bench:code-switch`
|
|
1605
|
+
- jargon-heavy/domain-heavy English terms now have their own profile; use `bench:jargon` for the cross-adapter read and `bench:deepgram:corrected:audit:jargon` to compare `raw` vs `generic` vs `experimental` vs `benchmarkSeeded`
|
|
1606
|
+
- code-switch should be treated as language-pair-specific, not one universal lane; `ca-es` and `hi-en` now have dedicated series commands
|
|
1607
|
+
- `ca-es` also has a dedicated Deepgram model/language shootout lane so you can compare `nova-3`/`nova-2` with `multi`, `ca`, and `es` routing without overwriting results
|
|
1608
|
+
- current best `ca-es` base path is `deepgram` `nova-3` with `language=ca`; the short runner script uses that path for corrected series
|
|
1609
|
+
- `ca-es` is also split by source now: `corts_valencianes` and `parlament_parla` can be benchmarked independently, and `parlament_parla` has a dedicated transcript dump script
|
|
1610
|
+
- corrected code-switch runs now have dedicated lexicon-driven series commands so raw and corrected stability can be compared directly
|
|
1611
|
+
- multi-speaker diarization is now its own benchmark surface; use `bench:multi-speaker:run` for the parallel cross-adapter plus Deepgram-specific read
|
|
1612
|
+
- when tuning diarization specifically, use `bench:multi-speaker:analyze` to split Deepgram into clean vs noisy handoff lanes, include a corrected noisy read, and emit a speaker-pattern debug dump
|
|
1613
|
+
- use the `:series` commands when you need stability rather than a single-pass snapshot
|
|
1614
|
+
|
|
571
1615
|
## Client Primitives
|
|
572
1616
|
|
|
573
1617
|
Browser and framework helpers sit on top of the same connection core:
|