getpatter 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +5 -2
- package/dist/aec-PJJMUM5E.mjs +228 -0
- package/dist/{banner-3GNZ6VQK.mjs → banner-UYW6UM3J.mjs} +4 -1
- package/dist/{carrier-config-33HQ2W4V.mjs → carrier-config-4ZKVYAWV.mjs} +5 -2
- package/dist/{chunk-AFUYSNDH.mjs → chunk-6GR5MHHQ.mjs} +9 -0
- package/dist/chunk-CYLJVT5G.mjs +7031 -0
- package/dist/{chunk-FIFIWBL7.mjs → chunk-JUQ5WQTQ.mjs} +2157 -883
- package/dist/{chunk-VJVDG4V5.mjs → chunk-MVOQFAEO.mjs} +5 -0
- package/dist/chunk-N565J3CF.mjs +69 -0
- package/dist/chunk-X3364LSI.mjs +363 -0
- package/dist/{chunk-SEMKNPCD.mjs → chunk-XS45BAQL.mjs} +5 -1
- package/dist/cli.js +32 -621
- package/dist/client-2GJVZT42.mjs +8935 -0
- package/dist/dashboard/ui.html +63 -0
- package/dist/{dist-YRCCJQ26.mjs → dist-RYMPCILF.mjs} +28 -2
- package/dist/index.d.mts +2199 -240
- package/dist/index.d.ts +2199 -240
- package/dist/index.js +28942 -7073
- package/dist/index.mjs +2337 -447
- package/dist/{node-cron-6PRPSBG5.mjs → node-cron-JFWQQRBU.mjs} +23 -2
- package/dist/persistence-LVIAHESK.mjs +7 -0
- package/dist/silero-vad-YLCXT5GQ.mjs +7 -0
- package/dist/streamableHttp-WKNGHDVO.mjs +1496 -0
- package/dist/test-mode-Y7YG5LFZ.mjs +8 -0
- package/dist/tunnel-43CHWPVQ.mjs +8 -0
- package/package.json +7 -7
- package/src/dashboard/ui.html +63 -0
- package/dist/chunk-QHHBUCMT.mjs +0 -25
- package/dist/persistence-LQBYQPQQ.mjs +0 -7
- package/dist/test-mode-MVJ3SKG4.mjs +0 -8
- package/dist/tunnel-UVR3PPAU.mjs +0 -8
package/dist/index.mjs
CHANGED
|
@@ -1,9 +1,6 @@
|
|
|
1
|
-
import {
|
|
2
|
-
notifyDashboard
|
|
3
|
-
} from "./chunk-AFUYSNDH.mjs";
|
|
4
1
|
import {
|
|
5
2
|
startTunnel
|
|
6
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-XS45BAQL.mjs";
|
|
7
4
|
import {
|
|
8
5
|
AuthenticationError,
|
|
9
6
|
CallMetricsAccumulator,
|
|
@@ -13,6 +10,7 @@ import {
|
|
|
13
10
|
DefaultToolExecutor,
|
|
14
11
|
ElevenLabsConvAIAdapter,
|
|
15
12
|
EmbeddedServer,
|
|
13
|
+
ErrorCode,
|
|
16
14
|
EventBus,
|
|
17
15
|
LLMLoop,
|
|
18
16
|
MetricsStore,
|
|
@@ -43,12 +41,14 @@ import {
|
|
|
43
41
|
callsToJson,
|
|
44
42
|
createResampler16kTo8k,
|
|
45
43
|
createResampler24kTo16k,
|
|
44
|
+
createResampler24kTo8k,
|
|
46
45
|
createResampler8kTo16k,
|
|
47
46
|
initTracing,
|
|
48
47
|
isRemoteUrl,
|
|
49
48
|
isTracingEnabled,
|
|
50
49
|
isWebSocketUrl,
|
|
51
50
|
makeAuthMiddleware,
|
|
51
|
+
mergeAbortSignals,
|
|
52
52
|
mergePricing,
|
|
53
53
|
mountApi,
|
|
54
54
|
mountDashboard,
|
|
@@ -57,20 +57,40 @@ import {
|
|
|
57
57
|
resample16kTo8k,
|
|
58
58
|
resample24kTo16k,
|
|
59
59
|
resample8kTo16k,
|
|
60
|
+
resolveLogRoot,
|
|
60
61
|
startSpan
|
|
61
|
-
} from "./chunk-
|
|
62
|
+
} from "./chunk-JUQ5WQTQ.mjs";
|
|
62
63
|
import {
|
|
63
64
|
getLogger,
|
|
64
65
|
setLogger
|
|
65
|
-
} from "./chunk-
|
|
66
|
-
import
|
|
66
|
+
} from "./chunk-MVOQFAEO.mjs";
|
|
67
|
+
import {
|
|
68
|
+
notifyDashboard
|
|
69
|
+
} from "./chunk-6GR5MHHQ.mjs";
|
|
70
|
+
import {
|
|
71
|
+
SileroVAD
|
|
72
|
+
} from "./chunk-X3364LSI.mjs";
|
|
73
|
+
import {
|
|
74
|
+
__dirname,
|
|
75
|
+
__require,
|
|
76
|
+
init_esm_shims
|
|
77
|
+
} from "./chunk-N565J3CF.mjs";
|
|
78
|
+
|
|
79
|
+
// src/index.ts
|
|
80
|
+
init_esm_shims();
|
|
81
|
+
|
|
82
|
+
// src/client.ts
|
|
83
|
+
init_esm_shims();
|
|
67
84
|
|
|
68
85
|
// src/engines/openai.ts
|
|
86
|
+
init_esm_shims();
|
|
69
87
|
var Realtime = class {
|
|
70
88
|
kind = "openai_realtime";
|
|
71
89
|
apiKey;
|
|
72
90
|
model;
|
|
73
91
|
voice;
|
|
92
|
+
reasoningEffort;
|
|
93
|
+
inputAudioTranscriptionModel;
|
|
74
94
|
constructor(opts = {}) {
|
|
75
95
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
76
96
|
if (!key) {
|
|
@@ -81,10 +101,13 @@ var Realtime = class {
|
|
|
81
101
|
this.apiKey = key;
|
|
82
102
|
this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
|
|
83
103
|
this.voice = opts.voice ?? "alloy";
|
|
104
|
+
this.reasoningEffort = opts.reasoningEffort;
|
|
105
|
+
this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
|
|
84
106
|
}
|
|
85
107
|
};
|
|
86
108
|
|
|
87
109
|
// src/engines/elevenlabs.ts
|
|
110
|
+
init_esm_shims();
|
|
88
111
|
var ConvAI = class {
|
|
89
112
|
kind = "elevenlabs_convai";
|
|
90
113
|
apiKey;
|
|
@@ -100,7 +123,7 @@ var ConvAI = class {
|
|
|
100
123
|
}
|
|
101
124
|
if (!agent) {
|
|
102
125
|
throw new Error(
|
|
103
|
-
"ElevenLabs ConvAI requires an agentId.
|
|
126
|
+
"ElevenLabs ConvAI requires an agentId. Create one in the ElevenLabs dashboard (https://elevenlabs.io/app/conversational-ai) \u2014 the agent ID is per-deployed-agent and cannot be derived from the API key alone. Then either pass { agentId: 'agent_...' } at construction or set ELEVENLABS_AGENT_ID in the environment."
|
|
104
127
|
);
|
|
105
128
|
}
|
|
106
129
|
this.apiKey = key;
|
|
@@ -110,6 +133,7 @@ var ConvAI = class {
|
|
|
110
133
|
};
|
|
111
134
|
|
|
112
135
|
// src/tunnels/index.ts
|
|
136
|
+
init_esm_shims();
|
|
113
137
|
var CloudflareTunnel = class {
|
|
114
138
|
kind = "cloudflare";
|
|
115
139
|
};
|
|
@@ -144,11 +168,448 @@ var Ngrok = class {
|
|
|
144
168
|
}
|
|
145
169
|
};
|
|
146
170
|
|
|
171
|
+
// src/tools/schema-validation.ts
|
|
172
|
+
init_esm_shims();
|
|
173
|
+
var ToolSchemaError = class extends Error {
|
|
174
|
+
constructor(message) {
|
|
175
|
+
super(message);
|
|
176
|
+
this.name = "ToolSchemaError";
|
|
177
|
+
}
|
|
178
|
+
};
|
|
179
|
+
function validateToolSchema(tool2) {
|
|
180
|
+
const params = tool2.parameters;
|
|
181
|
+
const tag = `tool '${tool2.name}'`;
|
|
182
|
+
if (!params || typeof params !== "object" || Array.isArray(params)) {
|
|
183
|
+
throw new ToolSchemaError(
|
|
184
|
+
`${tag}: \`parameters\` must be a JSON Schema object (got ${typeof params}).`
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
if (params.type !== "object") {
|
|
188
|
+
throw new ToolSchemaError(
|
|
189
|
+
`${tag}: \`parameters.type\` must be "object" (got ${JSON.stringify(params.type)}). OpenAI function tools require an object root.`
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
if (params.properties !== void 0 && (typeof params.properties !== "object" || params.properties === null || Array.isArray(params.properties))) {
|
|
193
|
+
throw new ToolSchemaError(
|
|
194
|
+
`${tag}: \`parameters.properties\` must be an object map of field \u2192 JSON Schema.`
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
if (params.required !== void 0 && !Array.isArray(params.required)) {
|
|
198
|
+
throw new ToolSchemaError(
|
|
199
|
+
`${tag}: \`parameters.required\` must be an array of field names.`
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
if (Array.isArray(params.required) && params.properties) {
|
|
203
|
+
const props = params.properties;
|
|
204
|
+
for (const fieldName of params.required) {
|
|
205
|
+
if (typeof fieldName !== "string") {
|
|
206
|
+
throw new ToolSchemaError(
|
|
207
|
+
`${tag}: \`parameters.required\` entries must be strings (got ${typeof fieldName}).`
|
|
208
|
+
);
|
|
209
|
+
}
|
|
210
|
+
if (!(fieldName in props)) {
|
|
211
|
+
throw new ToolSchemaError(
|
|
212
|
+
`${tag}: \`parameters.required\` lists "${fieldName}" but it is not declared in \`parameters.properties\`.`
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
if (tool2.strict === true) {
|
|
218
|
+
validateStrictModeSchema(tool2.name, params);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
function validateStrictModeSchema(toolName, schema, pathParts = []) {
|
|
222
|
+
const tag = `tool '${toolName}'`;
|
|
223
|
+
const here = pathParts.length === 0 ? "parameters" : `parameters.${pathParts.join(".")}`;
|
|
224
|
+
if (schema.type === "object") {
|
|
225
|
+
if (schema.additionalProperties !== false) {
|
|
226
|
+
throw new ToolSchemaError(
|
|
227
|
+
`${tag}: strict mode requires \`${here}.additionalProperties: false\` on every object \u2014 got ${JSON.stringify(schema.additionalProperties)}.`
|
|
228
|
+
);
|
|
229
|
+
}
|
|
230
|
+
const props = schema.properties ?? {};
|
|
231
|
+
const required = Array.isArray(schema.required) ? schema.required : [];
|
|
232
|
+
for (const propName of Object.keys(props)) {
|
|
233
|
+
if (!required.includes(propName)) {
|
|
234
|
+
throw new ToolSchemaError(
|
|
235
|
+
`${tag}: strict mode requires every property to be listed in \`required\` \u2014 "${here}.${propName}" is missing. Use a nullable type (e.g. ["string", "null"]) instead of an optional field.`
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
for (const [propName, propSchema] of Object.entries(props)) {
|
|
240
|
+
if (propSchema && typeof propSchema === "object") {
|
|
241
|
+
validateStrictModeSchema(toolName, propSchema, [...pathParts, "properties", propName]);
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
} else if (schema.type === "array" && schema.items && typeof schema.items === "object") {
|
|
245
|
+
validateStrictModeSchema(toolName, schema.items, [...pathParts, "items"]);
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
function validateAllToolSchemas(tools) {
|
|
249
|
+
if (!tools) return;
|
|
250
|
+
for (const tool2 of tools) {
|
|
251
|
+
validateToolSchema(tool2);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// src/_speech-events.ts
|
|
256
|
+
init_esm_shims();
|
|
257
|
+
var logger = getLogger();
|
|
258
|
+
var otelTrace = null;
|
|
259
|
+
var otelLoaded = false;
|
|
260
|
+
function loadOtel() {
|
|
261
|
+
if (otelLoaded) return otelTrace;
|
|
262
|
+
otelLoaded = true;
|
|
263
|
+
try {
|
|
264
|
+
const mod = __require("@opentelemetry/api");
|
|
265
|
+
otelTrace = mod.trace;
|
|
266
|
+
} catch {
|
|
267
|
+
otelTrace = null;
|
|
268
|
+
}
|
|
269
|
+
return otelTrace;
|
|
270
|
+
}
|
|
271
|
+
function recordSpanEvent(name, attrs) {
|
|
272
|
+
const trace = loadOtel();
|
|
273
|
+
if (trace === null) return;
|
|
274
|
+
try {
|
|
275
|
+
const span = trace.getActiveSpan?.();
|
|
276
|
+
if (!span || !span.isRecording()) return;
|
|
277
|
+
span.addEvent(name, attrs);
|
|
278
|
+
} catch (err) {
|
|
279
|
+
logger.debug?.(`Failed to record OTel span event ${name}: ${String(err)}`);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
function nowMs() {
|
|
283
|
+
return Date.now();
|
|
284
|
+
}
|
|
285
|
+
var SpeechEvents = class {
|
|
286
|
+
// Public callback slots — any of them may be set by the user.
|
|
287
|
+
onUserSpeechStarted = null;
|
|
288
|
+
onUserSpeechEnded = null;
|
|
289
|
+
onUserSpeechEos = null;
|
|
290
|
+
onAgentSpeechStarted = null;
|
|
291
|
+
onAgentSpeechEnded = null;
|
|
292
|
+
onLlmToken = null;
|
|
293
|
+
onAudioOut = null;
|
|
294
|
+
// State machine — read via `conversationState`.
|
|
295
|
+
userState = "listening";
|
|
296
|
+
agentState = "initializing";
|
|
297
|
+
// Per-turn cursors. `turnIdxValue` increments on every committed EOU.
|
|
298
|
+
turnIdxValue = 0;
|
|
299
|
+
firstTokenForTurn = true;
|
|
300
|
+
firstAudioForTurn = true;
|
|
301
|
+
// Optional call start (ms since epoch) — used to compute `audioOffsetMs`
|
|
302
|
+
// payloads when the caller does not provide one.
|
|
303
|
+
callStartMs = null;
|
|
304
|
+
/** Snapshot of the current per-side state of the call. */
|
|
305
|
+
get conversationState() {
|
|
306
|
+
return { user: this.userState, agent: this.agentState };
|
|
307
|
+
}
|
|
308
|
+
/** Current 0-based turn index. Increments on every EOU commit. */
|
|
309
|
+
get turnIdx() {
|
|
310
|
+
return this.turnIdxValue;
|
|
311
|
+
}
|
|
312
|
+
/** Record the call-start wall-clock for ``audioOffsetMs`` math. */
|
|
313
|
+
markCallStarted(tsMs) {
|
|
314
|
+
this.callStartMs = tsMs ?? nowMs();
|
|
315
|
+
this.userState = "listening";
|
|
316
|
+
this.agentState = "idle";
|
|
317
|
+
}
|
|
318
|
+
/** Reset per-turn cursors. Called automatically on EOU commit. */
|
|
319
|
+
resetTurnState() {
|
|
320
|
+
this.firstTokenForTurn = true;
|
|
321
|
+
this.firstAudioForTurn = true;
|
|
322
|
+
}
|
|
323
|
+
// ---- User-side events -----------------------------------------------
|
|
324
|
+
/** Fire on the VAD positive edge of the inbound stream.
|
|
325
|
+
*
|
|
326
|
+
* Do not coalesce: the runner consumes positive→negative→positive
|
|
327
|
+
* transitions in order. For server-VAD engines (OpenAI Realtime, Telnyx
|
|
328
|
+
* Voice AI), forward the upstream signal directly — do not re-run a VAD
|
|
329
|
+
* layer on top.
|
|
330
|
+
*/
|
|
331
|
+
async fireUserSpeechStarted(opts = {}) {
|
|
332
|
+
const tsMs = opts.timestampMs ?? nowMs();
|
|
333
|
+
const payload = { timestamp_ms: tsMs };
|
|
334
|
+
if (opts.vadConfidence !== void 0)
|
|
335
|
+
payload.vad_confidence = opts.vadConfidence;
|
|
336
|
+
const offset = this.resolveOffset(opts.audioOffsetMs, tsMs);
|
|
337
|
+
if (offset !== null) payload.audio_offset_ms = offset;
|
|
338
|
+
this.userState = "speaking";
|
|
339
|
+
await this.dispatch(this.onUserSpeechStarted, payload, {
|
|
340
|
+
spanEvent: "patter.event.user_speech_started",
|
|
341
|
+
spanAttrs: filterUndef({
|
|
342
|
+
"patter.audio.offset_ms": payload.audio_offset_ms,
|
|
343
|
+
"patter.vad.confidence": payload.vad_confidence
|
|
344
|
+
})
|
|
345
|
+
});
|
|
346
|
+
}
|
|
347
|
+
/** Fire on the VAD trailing edge (raw — *not* EOU).
|
|
348
|
+
*
|
|
349
|
+
* `speechDurationMs` is the length of the segment that just ended; the
|
|
350
|
+
* runner uses it to compute talk-ratio.
|
|
351
|
+
*/
|
|
352
|
+
async fireUserSpeechEnded(opts) {
|
|
353
|
+
const tsMs = opts.timestampMs ?? nowMs();
|
|
354
|
+
const payload = {
|
|
355
|
+
timestamp_ms: tsMs,
|
|
356
|
+
speech_duration_ms: opts.speechDurationMs
|
|
357
|
+
};
|
|
358
|
+
if (opts.vadConfidence !== void 0)
|
|
359
|
+
payload.vad_confidence = opts.vadConfidence;
|
|
360
|
+
const offset = this.resolveOffset(opts.audioOffsetMs, tsMs);
|
|
361
|
+
if (offset !== null) payload.audio_offset_ms = offset;
|
|
362
|
+
this.userState = "listening";
|
|
363
|
+
await this.dispatch(this.onUserSpeechEnded, payload, {
|
|
364
|
+
spanEvent: "patter.event.user_speech_ended",
|
|
365
|
+
spanAttrs: { "patter.speech.duration_ms": opts.speechDurationMs }
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
/** Fire on the committed end-of-utterance.
|
|
369
|
+
*
|
|
370
|
+
* This is the canonical "user finished" signal — VAD edge + trailing
|
|
371
|
+
* silence + (optionally) a semantic turn-detector model agreement. The
|
|
372
|
+
* runner uses the timestamp of this event to compute
|
|
373
|
+
* `eos_to_first_token_ms` (Hamming AI threshold: <800 ms good, >1500 ms
|
|
374
|
+
* critical).
|
|
375
|
+
*/
|
|
376
|
+
async fireUserSpeechEos(opts) {
|
|
377
|
+
const tsMs = opts.timestampMs ?? nowMs();
|
|
378
|
+
const payload = {
|
|
379
|
+
timestamp_ms: tsMs,
|
|
380
|
+
trigger: opts.trigger
|
|
381
|
+
};
|
|
382
|
+
if (opts.trailingSilenceMs !== void 0)
|
|
383
|
+
payload.trailing_silence_ms = opts.trailingSilenceMs;
|
|
384
|
+
if (opts.transcriptSoFar !== void 0)
|
|
385
|
+
payload.transcript_so_far = opts.transcriptSoFar;
|
|
386
|
+
this.turnIdxValue += 1;
|
|
387
|
+
this.resetTurnState();
|
|
388
|
+
this.userState = "listening";
|
|
389
|
+
this.agentState = "thinking";
|
|
390
|
+
await this.dispatch(this.onUserSpeechEos, payload, {
|
|
391
|
+
spanEvent: "patter.event.user_speech_eos",
|
|
392
|
+
spanAttrs: filterUndef({
|
|
393
|
+
"patter.eos.trigger": opts.trigger,
|
|
394
|
+
"patter.eos.trailing_silence_ms": opts.trailingSilenceMs
|
|
395
|
+
})
|
|
396
|
+
});
|
|
397
|
+
}
|
|
398
|
+
// ---- Agent-side events ----------------------------------------------
|
|
399
|
+
/** Fire on the FIRST audio chunk of the current agent turn that crosses
|
|
400
|
+
* to the wire (not the first chunk produced by TTS).
|
|
401
|
+
*
|
|
402
|
+
* The user hears the wire chunk, so this is the timestamp the runner
|
|
403
|
+
* anchors barge-in latency on.
|
|
404
|
+
*/
|
|
405
|
+
async fireAgentSpeechStarted(opts = {}) {
|
|
406
|
+
const tsMs = opts.timestampMs ?? nowMs();
|
|
407
|
+
const payload = {
|
|
408
|
+
timestamp_ms: tsMs,
|
|
409
|
+
turn_idx: this.turnIdxValue
|
|
410
|
+
};
|
|
411
|
+
if (opts.ttsProvider !== void 0) payload.tts_provider = opts.ttsProvider;
|
|
412
|
+
if (opts.engine !== void 0) payload.engine = opts.engine;
|
|
413
|
+
this.agentState = "speaking";
|
|
414
|
+
await this.dispatch(this.onAgentSpeechStarted, payload, {
|
|
415
|
+
spanEvent: "patter.event.agent_speech_started",
|
|
416
|
+
spanAttrs: filterUndef({
|
|
417
|
+
"patter.turn.idx": this.turnIdxValue,
|
|
418
|
+
"patter.tts.provider": opts.ttsProvider,
|
|
419
|
+
"patter.engine": opts.engine
|
|
420
|
+
})
|
|
421
|
+
});
|
|
422
|
+
}
|
|
423
|
+
/** Fire on the LAST audio chunk of the current agent turn.
|
|
424
|
+
*
|
|
425
|
+
* `interrupted=true` marks the turn as cancelled by barge-in; the runner
|
|
426
|
+
* treats it as the `agent_speech_stopped` half of a barge-in pair.
|
|
427
|
+
*/
|
|
428
|
+
async fireAgentSpeechEnded(opts) {
|
|
429
|
+
const tsMs = opts.timestampMs ?? nowMs();
|
|
430
|
+
const interrupted = opts.interrupted ?? false;
|
|
431
|
+
const payload = {
|
|
432
|
+
timestamp_ms: tsMs,
|
|
433
|
+
turn_idx: this.turnIdxValue,
|
|
434
|
+
speech_duration_ms: opts.speechDurationMs,
|
|
435
|
+
interrupted
|
|
436
|
+
};
|
|
437
|
+
this.agentState = "idle";
|
|
438
|
+
await this.dispatch(this.onAgentSpeechEnded, payload, {
|
|
439
|
+
spanEvent: "patter.event.agent_speech_ended",
|
|
440
|
+
spanAttrs: {
|
|
441
|
+
"patter.turn.idx": this.turnIdxValue,
|
|
442
|
+
"patter.speech.duration_ms": opts.speechDurationMs,
|
|
443
|
+
"patter.turn.interrupted": interrupted
|
|
444
|
+
}
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
// ---- LLM / TTS events -----------------------------------------------
|
|
448
|
+
/** Fire on the FIRST LLM token of the current turn (TTFT marker).
|
|
449
|
+
*
|
|
450
|
+
* Idempotent within a turn — guarded by `firstTokenForTurn`. Combined
|
|
451
|
+
* with `on_user_speech_eos.timestamp_ms` the runner computes
|
|
452
|
+
* `eos_to_first_token_ms`.
|
|
453
|
+
*/
|
|
454
|
+
async fireLlmFirstToken(opts) {
|
|
455
|
+
if (!this.firstTokenForTurn) return;
|
|
456
|
+
this.firstTokenForTurn = false;
|
|
457
|
+
const tsMs = opts.timestampMs ?? nowMs();
|
|
458
|
+
const payload = {
|
|
459
|
+
timestamp_ms: tsMs,
|
|
460
|
+
turn_idx: this.turnIdxValue,
|
|
461
|
+
llm_provider: opts.llmProvider,
|
|
462
|
+
model: opts.model
|
|
463
|
+
};
|
|
464
|
+
await this.dispatch(this.onLlmToken, payload, {
|
|
465
|
+
spanEvent: "patter.event.llm_first_token",
|
|
466
|
+
spanAttrs: {
|
|
467
|
+
"gen_ai.request.model": opts.model,
|
|
468
|
+
"gen_ai.provider.name": opts.llmProvider,
|
|
469
|
+
"patter.turn.idx": this.turnIdxValue
|
|
470
|
+
}
|
|
471
|
+
});
|
|
472
|
+
}
|
|
473
|
+
/** Fire on the FIRST TTS audio chunk for the current turn.
|
|
474
|
+
*
|
|
475
|
+
* Distinct from `fireAgentSpeechStarted`: this is the agent-side buffer
|
|
476
|
+
* arrival (TTS warmup), not the wire-time chunk. Idempotent within a
|
|
477
|
+
* turn — guarded by `firstAudioForTurn`.
|
|
478
|
+
*/
|
|
479
|
+
async fireAudioOut(opts) {
|
|
480
|
+
if (!this.firstAudioForTurn) return;
|
|
481
|
+
this.firstAudioForTurn = false;
|
|
482
|
+
const tsMs = opts.timestampMs ?? nowMs();
|
|
483
|
+
const payload = {
|
|
484
|
+
timestamp_ms: tsMs,
|
|
485
|
+
turn_idx: this.turnIdxValue,
|
|
486
|
+
tts_provider: opts.ttsProvider
|
|
487
|
+
};
|
|
488
|
+
await this.dispatch(this.onAudioOut, payload, {
|
|
489
|
+
spanEvent: "patter.event.tts_first_audio",
|
|
490
|
+
spanAttrs: {
|
|
491
|
+
"patter.turn.idx": this.turnIdxValue,
|
|
492
|
+
"patter.tts.provider": opts.ttsProvider
|
|
493
|
+
}
|
|
494
|
+
});
|
|
495
|
+
}
|
|
496
|
+
// ---- Internal -------------------------------------------------------
|
|
497
|
+
resolveOffset(given, tsMs) {
|
|
498
|
+
if (given !== void 0) return given;
|
|
499
|
+
if (this.callStartMs !== null) return Math.max(0, tsMs - this.callStartMs);
|
|
500
|
+
return null;
|
|
501
|
+
}
|
|
502
|
+
async dispatch(cb, payload, opts) {
|
|
503
|
+
recordSpanEvent(opts.spanEvent, opts.spanAttrs);
|
|
504
|
+
if (cb === null) return;
|
|
505
|
+
try {
|
|
506
|
+
await cb(payload);
|
|
507
|
+
} catch (err) {
|
|
508
|
+
logger.warn?.(
|
|
509
|
+
`Speech-event callback ${opts.spanEvent} raised: ${String(err)}`
|
|
510
|
+
);
|
|
511
|
+
}
|
|
512
|
+
}
|
|
513
|
+
};
|
|
514
|
+
function filterUndef(obj) {
|
|
515
|
+
const out = {};
|
|
516
|
+
for (const [k, v] of Object.entries(obj)) {
|
|
517
|
+
if (v !== void 0) out[k] = v;
|
|
518
|
+
}
|
|
519
|
+
return out;
|
|
520
|
+
}
|
|
521
|
+
|
|
147
522
|
// src/client.ts
|
|
523
|
+
function resolvePersistRoot(persist) {
|
|
524
|
+
if (persist === false) return null;
|
|
525
|
+
if (persist === true) return resolveLogRoot("auto");
|
|
526
|
+
if (typeof persist === "string") return resolveLogRoot(persist);
|
|
527
|
+
return resolveLogRoot();
|
|
528
|
+
}
|
|
148
529
|
var Patter = class {
|
|
149
530
|
localConfig;
|
|
150
531
|
embeddedServer = null;
|
|
151
532
|
tunnelHandle = null;
|
|
533
|
+
_tunnelReadyResolve;
|
|
534
|
+
_tunnelReadyReject;
|
|
535
|
+
_tunnelReady;
|
|
536
|
+
_readyResolve;
|
|
537
|
+
_readyReject;
|
|
538
|
+
_ready;
|
|
539
|
+
/**
|
|
540
|
+
* True iff ``localConfig.webhookUrl`` was populated by ``serve()`` from a
|
|
541
|
+
* freshly-started cloudflared tunnel (rather than by the constructor from
|
|
542
|
+
* an explicit ``webhookUrl`` / ``StaticTunnel`` config). ``disconnect()``
|
|
543
|
+
* uses this flag to clear ONLY the auto-assigned hostname so a subsequent
|
|
544
|
+
* ``serve()`` call (e.g. from a plugin's ``ensureServing`` cycle that
|
|
545
|
+
* disposes + restarts on agent-identity changes) does not throw
|
|
546
|
+
* ``Cannot use both tunnel: true and webhookUrl``.
|
|
547
|
+
*/
|
|
548
|
+
tunnelOwnsWebhookUrl = false;
|
|
549
|
+
/**
|
|
550
|
+
* Speech-edge events for turn-taking instrumentation. Public surface: the
|
|
551
|
+
* seven `on*` proxy accessors below plus the `conversationState` snapshot.
|
|
552
|
+
* Defaults are no-ops — existing users who never set a callback see exactly
|
|
553
|
+
* the previous behaviour.
|
|
554
|
+
*
|
|
555
|
+
* See `src/_speech-events.ts` for the full event taxonomy and the
|
|
556
|
+
* industry-alignment table (LiveKit / Pipecat / OpenAI Realtime).
|
|
557
|
+
*/
|
|
558
|
+
speechEvents = new SpeechEvents();
|
|
559
|
+
// ---- Speech-edge event callback proxies ------------------------------
|
|
560
|
+
// The seven `on*` properties below mirror the public APIs of LiveKit
|
|
561
|
+
// Agents, Pipecat and OpenAI Realtime. They proxy to `speechEvents` so
|
|
562
|
+
// the dispatcher remains the single source of truth (state + OTel).
|
|
563
|
+
get onUserSpeechStarted() {
|
|
564
|
+
return this.speechEvents.onUserSpeechStarted;
|
|
565
|
+
}
|
|
566
|
+
set onUserSpeechStarted(cb) {
|
|
567
|
+
this.speechEvents.onUserSpeechStarted = cb;
|
|
568
|
+
}
|
|
569
|
+
get onUserSpeechEnded() {
|
|
570
|
+
return this.speechEvents.onUserSpeechEnded;
|
|
571
|
+
}
|
|
572
|
+
set onUserSpeechEnded(cb) {
|
|
573
|
+
this.speechEvents.onUserSpeechEnded = cb;
|
|
574
|
+
}
|
|
575
|
+
get onUserSpeechEos() {
|
|
576
|
+
return this.speechEvents.onUserSpeechEos;
|
|
577
|
+
}
|
|
578
|
+
set onUserSpeechEos(cb) {
|
|
579
|
+
this.speechEvents.onUserSpeechEos = cb;
|
|
580
|
+
}
|
|
581
|
+
get onAgentSpeechStarted() {
|
|
582
|
+
return this.speechEvents.onAgentSpeechStarted;
|
|
583
|
+
}
|
|
584
|
+
set onAgentSpeechStarted(cb) {
|
|
585
|
+
this.speechEvents.onAgentSpeechStarted = cb;
|
|
586
|
+
}
|
|
587
|
+
get onAgentSpeechEnded() {
|
|
588
|
+
return this.speechEvents.onAgentSpeechEnded;
|
|
589
|
+
}
|
|
590
|
+
set onAgentSpeechEnded(cb) {
|
|
591
|
+
this.speechEvents.onAgentSpeechEnded = cb;
|
|
592
|
+
}
|
|
593
|
+
get onLlmToken() {
|
|
594
|
+
return this.speechEvents.onLlmToken;
|
|
595
|
+
}
|
|
596
|
+
set onLlmToken(cb) {
|
|
597
|
+
this.speechEvents.onLlmToken = cb;
|
|
598
|
+
}
|
|
599
|
+
get onAudioOut() {
|
|
600
|
+
return this.speechEvents.onAudioOut;
|
|
601
|
+
}
|
|
602
|
+
set onAudioOut(cb) {
|
|
603
|
+
this.speechEvents.onAudioOut = cb;
|
|
604
|
+
}
|
|
605
|
+
/**
|
|
606
|
+
* Snapshot of the current per-side state of the call.
|
|
607
|
+
* Mirrors LiveKit's `user_state_changed` / `agent_state_changed`
|
|
608
|
+
* payloads. Read-only and safe to call at any time.
|
|
609
|
+
*/
|
|
610
|
+
get conversationState() {
|
|
611
|
+
return this.speechEvents.conversationState;
|
|
612
|
+
}
|
|
152
613
|
/**
|
|
153
614
|
* Live `MetricsStore` for the embedded server. Returns `null` before
|
|
154
615
|
* `serve()` is called. Exposed so integrations like `PatterTool` can
|
|
@@ -158,6 +619,42 @@ var Patter = class {
|
|
|
158
619
|
get metricsStore() {
|
|
159
620
|
return this.embeddedServer?.metricsStore ?? null;
|
|
160
621
|
}
|
|
622
|
+
/**
|
|
623
|
+
* Resolves to the public webhook hostname as soon as it is known —
|
|
624
|
+
* either statically configured or freshly minted by the tunnel.
|
|
625
|
+
*
|
|
626
|
+
* **Prefer `phone.ready` for outbound calls.** This promise resolves
|
|
627
|
+
* before the embedded HTTP / WebSocket server is in `listen` state, so
|
|
628
|
+
* a `phone.call` placed immediately afterwards can still race the
|
|
629
|
+
* Twilio Media Streams upgrade and produce a "11100 Invalid URL
|
|
630
|
+
* format" call drop on answer.
|
|
631
|
+
*
|
|
632
|
+
* Kept as a separate signal because some integrations (e.g. webhook
|
|
633
|
+
* registration) only need the hostname, not the WS server.
|
|
634
|
+
*/
|
|
635
|
+
get tunnelReady() {
|
|
636
|
+
return this._tunnelReady;
|
|
637
|
+
}
|
|
638
|
+
/**
|
|
639
|
+
* Resolves to the public webhook hostname once the SDK is fully ready
|
|
640
|
+
* to handle carrier callbacks: tunnel resolved, carrier auto-config
|
|
641
|
+
* complete, and the embedded HTTP / WS server in `listen` state.
|
|
642
|
+
*
|
|
643
|
+
* Use this for outbound calls instead of guessing `setTimeout` after
|
|
644
|
+
* `void phone.serve(...)`:
|
|
645
|
+
*
|
|
646
|
+
* ```ts
|
|
647
|
+
* void phone.serve({ agent, tunnel: true });
|
|
648
|
+
* await phone.ready;
|
|
649
|
+
* await phone.call({ to: '+15550001234', agent });
|
|
650
|
+
* ```
|
|
651
|
+
*
|
|
652
|
+
* Rejects with the underlying exception if `serve()` fails before the
|
|
653
|
+
* server is listening.
|
|
654
|
+
*/
|
|
655
|
+
get ready() {
|
|
656
|
+
return this._ready;
|
|
657
|
+
}
|
|
161
658
|
constructor(options) {
|
|
162
659
|
if (options.apiKey !== void 0) {
|
|
163
660
|
throw new Error(
|
|
@@ -190,10 +687,27 @@ var Patter = class {
|
|
|
190
687
|
phoneNumber: options.phoneNumber,
|
|
191
688
|
webhookUrl: normalizedWebhook,
|
|
192
689
|
tunnel: options.tunnel,
|
|
193
|
-
openaiKey: options.openaiKey
|
|
690
|
+
openaiKey: options.openaiKey,
|
|
691
|
+
persistRoot: resolvePersistRoot(options.persist)
|
|
194
692
|
};
|
|
693
|
+
this._tunnelReady = new Promise((resolve, reject) => {
|
|
694
|
+
this._tunnelReadyResolve = resolve;
|
|
695
|
+
this._tunnelReadyReject = reject;
|
|
696
|
+
});
|
|
697
|
+
this._tunnelReady.catch(() => {
|
|
698
|
+
});
|
|
699
|
+
if (normalizedWebhook) {
|
|
700
|
+
this._tunnelReadyResolve(normalizedWebhook);
|
|
701
|
+
}
|
|
702
|
+
this._ready = new Promise((resolve, reject) => {
|
|
703
|
+
this._readyResolve = resolve;
|
|
704
|
+
this._readyReject = reject;
|
|
705
|
+
});
|
|
706
|
+
this._ready.catch(() => {
|
|
707
|
+
});
|
|
195
708
|
}
|
|
196
709
|
// === Agent definition ===
|
|
710
|
+
/** Resolve user-supplied agent options against engine defaults and return the merged config. */
|
|
197
711
|
agent(opts) {
|
|
198
712
|
let working = { ...opts };
|
|
199
713
|
if (opts.engine) {
|
|
@@ -258,10 +772,23 @@ var Patter = class {
|
|
|
258
772
|
if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
|
|
259
773
|
throw new TypeError("variables must be an object");
|
|
260
774
|
}
|
|
775
|
+
if (working.tools) {
|
|
776
|
+
validateAllToolSchemas(working.tools);
|
|
777
|
+
}
|
|
261
778
|
return working;
|
|
262
779
|
}
|
|
263
780
|
// === Serve / test / call ===
|
|
781
|
+
/** Boot the embedded HTTP/WebSocket server, configure the carrier webhook, and resolve `ready`. */
|
|
264
782
|
async serve(opts) {
|
|
783
|
+
try {
|
|
784
|
+
await this._serveImpl(opts);
|
|
785
|
+
} catch (err) {
|
|
786
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
787
|
+
this._readyReject(e);
|
|
788
|
+
throw e;
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
async _serveImpl(opts) {
|
|
265
792
|
if (!opts.agent || typeof opts.agent !== "object") {
|
|
266
793
|
throw new TypeError("agent is required. Use phone.agent() to create one.");
|
|
267
794
|
}
|
|
@@ -286,31 +813,44 @@ var Patter = class {
|
|
|
286
813
|
if (wantsCloudflared && webhookUrl) {
|
|
287
814
|
throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
|
|
288
815
|
}
|
|
289
|
-
const { showBanner } = await import("./banner-
|
|
816
|
+
const { showBanner } = await import("./banner-UYW6UM3J.mjs");
|
|
290
817
|
showBanner();
|
|
291
818
|
if (wantsCloudflared) {
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
819
|
+
try {
|
|
820
|
+
const { startTunnel: startTunnel2 } = await import("./tunnel-43CHWPVQ.mjs");
|
|
821
|
+
this.tunnelHandle = await startTunnel2(port);
|
|
822
|
+
webhookUrl = this.tunnelHandle.hostname;
|
|
823
|
+
this.localConfig = { ...this.localConfig, webhookUrl };
|
|
824
|
+
this.tunnelOwnsWebhookUrl = true;
|
|
825
|
+
this._tunnelReadyResolve(webhookUrl);
|
|
826
|
+
} catch (err) {
|
|
827
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
828
|
+
this._tunnelReadyReject(e);
|
|
829
|
+
throw e;
|
|
830
|
+
}
|
|
296
831
|
}
|
|
297
832
|
if (!webhookUrl) {
|
|
298
|
-
|
|
833
|
+
const err = new Error(
|
|
299
834
|
"No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
|
|
300
835
|
);
|
|
836
|
+
this._tunnelReadyReject(err);
|
|
837
|
+
throw err;
|
|
301
838
|
}
|
|
302
839
|
const carrier = this.localConfig.carrier;
|
|
303
840
|
const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
|
|
304
|
-
const
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
841
|
+
const wantsCarrierManagement = opts.manageWebhook !== false || wantsCloudflared;
|
|
842
|
+
if (wantsCarrierManagement) {
|
|
843
|
+
const { autoConfigureCarrier } = await import("./carrier-config-4ZKVYAWV.mjs");
|
|
844
|
+
await autoConfigureCarrier({
|
|
845
|
+
telephonyProvider,
|
|
846
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
847
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
848
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
849
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
850
|
+
phoneNumber: this.localConfig.phoneNumber,
|
|
851
|
+
webhookHost: webhookUrl
|
|
852
|
+
});
|
|
853
|
+
}
|
|
314
854
|
this.embeddedServer = new EmbeddedServer(
|
|
315
855
|
{
|
|
316
856
|
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
@@ -321,7 +861,8 @@ var Patter = class {
|
|
|
321
861
|
telephonyProvider,
|
|
322
862
|
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
323
863
|
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
324
|
-
telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
|
|
864
|
+
telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0,
|
|
865
|
+
persistRoot: this.localConfig.persistRoot
|
|
325
866
|
},
|
|
326
867
|
opts.agent,
|
|
327
868
|
opts.onCallStart,
|
|
@@ -335,10 +876,21 @@ var Patter = class {
|
|
|
335
876
|
opts.dashboard ?? true,
|
|
336
877
|
opts.dashboardToken ?? ""
|
|
337
878
|
);
|
|
338
|
-
|
|
879
|
+
try {
|
|
880
|
+
await this.embeddedServer.start(port);
|
|
881
|
+
if (this.tunnelHandle) {
|
|
882
|
+
await waitForTunnelPubliclyReachable(webhookUrl);
|
|
883
|
+
}
|
|
884
|
+
this._readyResolve(webhookUrl);
|
|
885
|
+
} catch (err) {
|
|
886
|
+
const e = err instanceof Error ? err : new Error(String(err));
|
|
887
|
+
this._readyReject(e);
|
|
888
|
+
throw e;
|
|
889
|
+
}
|
|
339
890
|
}
|
|
891
|
+
/** Run the agent in interactive terminal-test mode (no real telephony). */
|
|
340
892
|
async test(opts) {
|
|
341
|
-
const { TestSession: TestSession2 } = await import("./test-mode-
|
|
893
|
+
const { TestSession: TestSession2 } = await import("./test-mode-Y7YG5LFZ.mjs");
|
|
342
894
|
const session = new TestSession2();
|
|
343
895
|
await session.run({
|
|
344
896
|
agent: opts.agent,
|
|
@@ -348,6 +900,7 @@ var Patter = class {
|
|
|
348
900
|
onCallEnd: opts.onCallEnd
|
|
349
901
|
});
|
|
350
902
|
}
|
|
903
|
+
/** Place an outbound call via the configured carrier. */
|
|
351
904
|
async call(options) {
|
|
352
905
|
if (!options.to) {
|
|
353
906
|
throw new Error("'to' phone number is required");
|
|
@@ -357,6 +910,10 @@ var Patter = class {
|
|
|
357
910
|
}
|
|
358
911
|
const { phoneNumber, webhookUrl, carrier } = this.localConfig;
|
|
359
912
|
const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
|
|
913
|
+
const wantsAmd = options.machineDetection !== false || Boolean(options.voicemailMessage);
|
|
914
|
+
if (this.embeddedServer) {
|
|
915
|
+
this.embeddedServer.onMachineDetection = options.onMachineDetection;
|
|
916
|
+
}
|
|
360
917
|
if (carrier.kind === "telnyx") {
|
|
361
918
|
const telnyxKey = carrier.apiKey;
|
|
362
919
|
const connectionId = carrier.connectionId;
|
|
@@ -365,6 +922,9 @@ var Patter = class {
|
|
|
365
922
|
from: phoneNumber,
|
|
366
923
|
to: options.to
|
|
367
924
|
};
|
|
925
|
+
if (wantsAmd) {
|
|
926
|
+
telnyxPayload.answering_machine_detection = "greeting_end";
|
|
927
|
+
}
|
|
368
928
|
if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
|
|
369
929
|
telnyxPayload.timeout_secs = Math.max(1, Math.floor(effectiveRingTimeout));
|
|
370
930
|
}
|
|
@@ -407,12 +967,12 @@ var Patter = class {
|
|
|
407
967
|
From: phoneNumber,
|
|
408
968
|
Twiml: inlineTwiml,
|
|
409
969
|
StatusCallback: statusCallbackUrl,
|
|
410
|
-
StatusCallbackMethod: "POST"
|
|
411
|
-
// Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
|
|
412
|
-
// transitions even when media never arrives.
|
|
413
|
-
StatusCallbackEvent: "initiated ringing answered completed"
|
|
970
|
+
StatusCallbackMethod: "POST"
|
|
414
971
|
});
|
|
415
|
-
|
|
972
|
+
for (const evt of ["initiated", "ringing", "answered", "completed"]) {
|
|
973
|
+
params.append("StatusCallbackEvent", evt);
|
|
974
|
+
}
|
|
975
|
+
if (wantsAmd) {
|
|
416
976
|
params.append("MachineDetection", "DetectMessageEnd");
|
|
417
977
|
params.append("AsyncAmd", "true");
|
|
418
978
|
params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
|
|
@@ -445,11 +1005,22 @@ var Patter = class {
|
|
|
445
1005
|
callee: options.to,
|
|
446
1006
|
direction: "outbound"
|
|
447
1007
|
});
|
|
1008
|
+
const notificationsPath = body.subresource_uris?.notifications;
|
|
1009
|
+
if (notificationsPath) {
|
|
1010
|
+
getLogger().info(
|
|
1011
|
+
`Outbound call ${callSid} placed. Twilio notifications: https://api.twilio.com${notificationsPath} (check here if the call drops with no audio).`
|
|
1012
|
+
);
|
|
1013
|
+
}
|
|
448
1014
|
}
|
|
449
1015
|
} catch {
|
|
450
1016
|
}
|
|
451
1017
|
}
|
|
452
1018
|
}
|
|
1019
|
+
/**
|
|
1020
|
+
* Stop the embedded server and any running tunnel. Safe to call multiple
|
|
1021
|
+
* times. Leaves the instance reusable: a subsequent ``serve()`` works as
|
|
1022
|
+
* if the previous lifecycle never happened.
|
|
1023
|
+
*/
|
|
453
1024
|
async disconnect() {
|
|
454
1025
|
if (this.tunnelHandle) {
|
|
455
1026
|
this.tunnelHandle.stop();
|
|
@@ -459,10 +1030,116 @@ var Patter = class {
|
|
|
459
1030
|
await this.embeddedServer.stop();
|
|
460
1031
|
this.embeddedServer = null;
|
|
461
1032
|
}
|
|
1033
|
+
if (this.tunnelOwnsWebhookUrl) {
|
|
1034
|
+
this.localConfig = { ...this.localConfig, webhookUrl: void 0 };
|
|
1035
|
+
this.tunnelOwnsWebhookUrl = false;
|
|
1036
|
+
}
|
|
1037
|
+
this._tunnelReady = new Promise((resolve, reject) => {
|
|
1038
|
+
this._tunnelReadyResolve = resolve;
|
|
1039
|
+
this._tunnelReadyReject = reject;
|
|
1040
|
+
});
|
|
1041
|
+
this._tunnelReady.catch(() => {
|
|
1042
|
+
});
|
|
1043
|
+
if (this.localConfig.webhookUrl) {
|
|
1044
|
+
this._tunnelReadyResolve(this.localConfig.webhookUrl);
|
|
1045
|
+
}
|
|
1046
|
+
this._ready = new Promise((resolve, reject) => {
|
|
1047
|
+
this._readyResolve = resolve;
|
|
1048
|
+
this._readyReject = reject;
|
|
1049
|
+
});
|
|
1050
|
+
this._ready.catch(() => {
|
|
1051
|
+
});
|
|
1052
|
+
}
|
|
1053
|
+
/**
|
|
1054
|
+
* Terminate an active call on the configured carrier.
|
|
1055
|
+
*
|
|
1056
|
+
* Posts a hangup to the carrier (Twilio
|
|
1057
|
+
* ``Calls(callSid).update({status:'completed'})`` or Telnyx
|
|
1058
|
+
* ``/v2/calls/{callControlId}/actions/hangup``) so the bridge tears down
|
|
1059
|
+
* gracefully — the SDK's WebSocket handler then fires ``onCallEnd`` with
|
|
1060
|
+
* the final ``CallMetrics`` before the WS closes.
|
|
1061
|
+
*
|
|
1062
|
+
* Use this when the host application needs to end a call programmatically
|
|
1063
|
+
* without going through the LLM tool-call path (e.g. an admin override,
|
|
1064
|
+
* a watchdog, or an integration test runner).
|
|
1065
|
+
*
|
|
1066
|
+
* @param callSid - Carrier-issued call identifier (Twilio Call SID or
|
|
1067
|
+
* Telnyx call_control_id) returned from a previous ``call(...)`` or
|
|
1068
|
+
* captured in the ``onCallStart`` callback's payload.
|
|
1069
|
+
* @throws Error when ``callSid`` is empty or no carrier is configured.
|
|
1070
|
+
*/
|
|
1071
|
+
async endCall(callSid) {
|
|
1072
|
+
if (!callSid) {
|
|
1073
|
+
throw new Error("callSid must be a non-empty string");
|
|
1074
|
+
}
|
|
1075
|
+
const carrier = this.localConfig.carrier;
|
|
1076
|
+
if (carrier.kind === "twilio") {
|
|
1077
|
+
const auth = Buffer.from(`${carrier.accountSid}:${carrier.authToken}`).toString("base64");
|
|
1078
|
+
const url = `https://api.twilio.com/2010-04-01/Accounts/${carrier.accountSid}/Calls/${callSid}.json`;
|
|
1079
|
+
const body = new URLSearchParams({ Status: "completed" });
|
|
1080
|
+
const res = await fetch(url, {
|
|
1081
|
+
method: "POST",
|
|
1082
|
+
headers: {
|
|
1083
|
+
Authorization: `Basic ${auth}`,
|
|
1084
|
+
"Content-Type": "application/x-www-form-urlencoded"
|
|
1085
|
+
},
|
|
1086
|
+
body
|
|
1087
|
+
});
|
|
1088
|
+
if (!res.ok) {
|
|
1089
|
+
throw new Error(`Twilio hangup failed: ${res.status} ${await res.text()}`);
|
|
1090
|
+
}
|
|
1091
|
+
return;
|
|
1092
|
+
}
|
|
1093
|
+
if (carrier.kind === "telnyx") {
|
|
1094
|
+
const res = await fetch(`https://api.telnyx.com/v2/calls/${callSid}/actions/hangup`, {
|
|
1095
|
+
method: "POST",
|
|
1096
|
+
headers: {
|
|
1097
|
+
Authorization: `Bearer ${carrier.apiKey}`,
|
|
1098
|
+
"Content-Type": "application/json"
|
|
1099
|
+
}
|
|
1100
|
+
});
|
|
1101
|
+
if (!res.ok) {
|
|
1102
|
+
throw new Error(`Telnyx hangup failed: ${res.status} ${await res.text()}`);
|
|
1103
|
+
}
|
|
1104
|
+
return;
|
|
1105
|
+
}
|
|
1106
|
+
throw new Error(`endCall() requires a configured carrier; got kind=${carrier.kind}`);
|
|
462
1107
|
}
|
|
463
1108
|
};
|
|
1109
|
+
async function waitForTunnelPubliclyReachable(hostname, totalTimeoutMs = 6e4, graceMs = 5e3) {
|
|
1110
|
+
const log = getLogger();
|
|
1111
|
+
const { Resolver } = await import("dns/promises");
|
|
1112
|
+
const resolver = new Resolver({ timeout: 1500, tries: 1 });
|
|
1113
|
+
resolver.setServers(["1.1.1.1", "8.8.8.8"]);
|
|
1114
|
+
const deadline = Date.now() + totalTimeoutMs;
|
|
1115
|
+
let attempt = 0;
|
|
1116
|
+
let lastErr;
|
|
1117
|
+
while (Date.now() < deadline) {
|
|
1118
|
+
attempt += 1;
|
|
1119
|
+
try {
|
|
1120
|
+
const records = await resolver.resolve4(hostname);
|
|
1121
|
+
const first = records[0] ?? "<unknown>";
|
|
1122
|
+
log.info(
|
|
1123
|
+
"Tunnel DNS resolved \u2192 %s (attempt %d); waiting %d ms grace",
|
|
1124
|
+
first,
|
|
1125
|
+
attempt,
|
|
1126
|
+
graceMs
|
|
1127
|
+
);
|
|
1128
|
+
await new Promise((r) => setTimeout(r, graceMs));
|
|
1129
|
+
return;
|
|
1130
|
+
} catch (err) {
|
|
1131
|
+
lastErr = err;
|
|
1132
|
+
}
|
|
1133
|
+
const delay = Math.min(250 * Math.pow(1.6, attempt - 1), 2e3);
|
|
1134
|
+
await new Promise((r) => setTimeout(r, delay));
|
|
1135
|
+
}
|
|
1136
|
+
throw new Error(
|
|
1137
|
+
`Tunnel hostname ${hostname} did not resolve within ${totalTimeoutMs}ms. Last error: ${lastErr instanceof Error ? lastErr.message : String(lastErr)}`
|
|
1138
|
+
);
|
|
1139
|
+
}
|
|
464
1140
|
|
|
465
|
-
// src/tool-decorator.ts
|
|
1141
|
+
// src/tools/tool-decorator.ts
|
|
1142
|
+
init_esm_shims();
|
|
466
1143
|
function defineTool(input) {
|
|
467
1144
|
const properties = {};
|
|
468
1145
|
const required = [];
|
|
@@ -492,6 +1169,7 @@ function defineTool(input) {
|
|
|
492
1169
|
}
|
|
493
1170
|
|
|
494
1171
|
// src/text-transforms.ts
|
|
1172
|
+
init_esm_shims();
|
|
495
1173
|
function filterMarkdown(text) {
|
|
496
1174
|
let result = text;
|
|
497
1175
|
result = result.replace(/```[\s\S]*?```/g, (match) => {
|
|
@@ -524,6 +1202,7 @@ function filterForTTS(text) {
|
|
|
524
1202
|
}
|
|
525
1203
|
|
|
526
1204
|
// src/providers.ts
|
|
1205
|
+
init_esm_shims();
|
|
527
1206
|
var STTConfigImpl = class {
|
|
528
1207
|
provider;
|
|
529
1208
|
apiKey;
|
|
@@ -581,10 +1260,8 @@ function openaiTts(opts) {
|
|
|
581
1260
|
function soniox(opts) {
|
|
582
1261
|
return new STTConfigImpl("soniox", opts.apiKey, opts.language ?? "en");
|
|
583
1262
|
}
|
|
584
|
-
function speechmatics(
|
|
585
|
-
|
|
586
|
-
"speechmatics() is Python-only right now \u2014 the TS Speechmatics adapter has not shipped yet. Use the Python SDK (sdk-py) or pick another STT provider such as deepgram() / assemblyai() / soniox()."
|
|
587
|
-
);
|
|
1263
|
+
function speechmatics(opts) {
|
|
1264
|
+
return new STTConfigImpl("speechmatics", opts.apiKey, opts.language ?? "en");
|
|
588
1265
|
}
|
|
589
1266
|
function assemblyai(opts) {
|
|
590
1267
|
return new STTConfigImpl("assemblyai", opts.apiKey, opts.language ?? "en");
|
|
@@ -620,6 +1297,7 @@ function geminiLive(opts) {
|
|
|
620
1297
|
}
|
|
621
1298
|
|
|
622
1299
|
// src/fallback-provider.ts
|
|
1300
|
+
init_esm_shims();
|
|
623
1301
|
var AllProvidersFailedError = class extends Error {
|
|
624
1302
|
constructor(message) {
|
|
625
1303
|
super(message);
|
|
@@ -698,6 +1376,7 @@ var FallbackLLMProvider = class {
|
|
|
698
1376
|
// -----------------------------------------------------------------------
|
|
699
1377
|
// LLMProvider implementation
|
|
700
1378
|
// -----------------------------------------------------------------------
|
|
1379
|
+
/** Streaming entry point — yields chunks from the first provider that succeeds. */
|
|
701
1380
|
async *stream(messages, tools) {
|
|
702
1381
|
const errors = [];
|
|
703
1382
|
const result = yield* this.tryProviders(
|
|
@@ -816,7 +1495,11 @@ var FallbackLLMProvider = class {
|
|
|
816
1495
|
}
|
|
817
1496
|
};
|
|
818
1497
|
|
|
1498
|
+
// src/integrations/index.ts
|
|
1499
|
+
init_esm_shims();
|
|
1500
|
+
|
|
819
1501
|
// src/integrations/patter-tool.ts
|
|
1502
|
+
init_esm_shims();
|
|
820
1503
|
import { EventEmitter } from "events";
|
|
821
1504
|
var PARAMETERS_SCHEMA = {
|
|
822
1505
|
type: "object",
|
|
@@ -970,6 +1653,7 @@ var PatterTool = class _PatterTool {
|
|
|
970
1653
|
this.started = false;
|
|
971
1654
|
}
|
|
972
1655
|
// --- Execution ----------------------------------------------------------
|
|
1656
|
+
/** Place an outbound call and resolve once it ends with the transcript and metrics. */
|
|
973
1657
|
async execute(args) {
|
|
974
1658
|
if (!this.started) await this.start();
|
|
975
1659
|
if (!args || typeof args.to !== "string" || !args.to.startsWith("+")) {
|
|
@@ -1079,6 +1763,7 @@ var PatterTool = class _PatterTool {
|
|
|
1079
1763
|
};
|
|
1080
1764
|
|
|
1081
1765
|
// src/providers/gemini-live.ts
|
|
1766
|
+
init_esm_shims();
|
|
1082
1767
|
var GEMINI_DEFAULT_INPUT_SR = 16e3;
|
|
1083
1768
|
var GEMINI_DEFAULT_OUTPUT_SR = 24e3;
|
|
1084
1769
|
var GeminiLiveAdapter = class {
|
|
@@ -1093,6 +1778,7 @@ var GeminiLiveAdapter = class {
|
|
|
1093
1778
|
this.outputSampleRate = options.outputSampleRate ?? GEMINI_DEFAULT_OUTPUT_SR;
|
|
1094
1779
|
this.temperature = options.temperature ?? 0.8;
|
|
1095
1780
|
}
|
|
1781
|
+
apiKey;
|
|
1096
1782
|
model;
|
|
1097
1783
|
voice;
|
|
1098
1784
|
instructions;
|
|
@@ -1113,6 +1799,7 @@ var GeminiLiveAdapter = class {
|
|
|
1113
1799
|
* not the call_id).
|
|
1114
1800
|
*/
|
|
1115
1801
|
pendingToolCalls = /* @__PURE__ */ new Map();
|
|
1802
|
+
/** Lazily import @google/genai, open a Live session, and start the receive loop. */
|
|
1116
1803
|
async connect() {
|
|
1117
1804
|
let genaiModule;
|
|
1118
1805
|
try {
|
|
@@ -1160,6 +1847,7 @@ var GeminiLiveAdapter = class {
|
|
|
1160
1847
|
getLogger().error(`Gemini Live receive loop error: ${String(err)}`);
|
|
1161
1848
|
});
|
|
1162
1849
|
}
|
|
1850
|
+
/** Send a PCM audio chunk to Gemini as base64 inline data. */
|
|
1163
1851
|
sendAudio(pcm) {
|
|
1164
1852
|
if (!this.session || !this.running) return;
|
|
1165
1853
|
const mime = `audio/pcm;rate=${this.inputSampleRate}`;
|
|
@@ -1173,6 +1861,7 @@ var GeminiLiveAdapter = class {
|
|
|
1173
1861
|
);
|
|
1174
1862
|
}
|
|
1175
1863
|
}
|
|
1864
|
+
/** Send a text turn to Gemini and mark the turn complete. */
|
|
1176
1865
|
async sendText(text) {
|
|
1177
1866
|
if (!this.session) return;
|
|
1178
1867
|
const sess = this.session;
|
|
@@ -1181,6 +1870,7 @@ var GeminiLiveAdapter = class {
|
|
|
1181
1870
|
turnComplete: true
|
|
1182
1871
|
});
|
|
1183
1872
|
}
|
|
1873
|
+
/** Send a tool/function-call result back to Gemini. */
|
|
1184
1874
|
async sendFunctionResult(callId, result) {
|
|
1185
1875
|
if (!this.session) return;
|
|
1186
1876
|
const sess = this.session;
|
|
@@ -1192,9 +1882,11 @@ var GeminiLiveAdapter = class {
|
|
|
1192
1882
|
]
|
|
1193
1883
|
});
|
|
1194
1884
|
}
|
|
1885
|
+
/** No-op — Gemini Live barge-in is VAD-driven, not client-cancelled. */
|
|
1195
1886
|
cancelResponse() {
|
|
1196
1887
|
getLogger().debug("Gemini Live: cancelResponse is implicit via VAD");
|
|
1197
1888
|
}
|
|
1889
|
+
/** Register an event handler that receives every Gemini Live event. */
|
|
1198
1890
|
onEvent(handler) {
|
|
1199
1891
|
this.handlers.push(handler);
|
|
1200
1892
|
}
|
|
@@ -1251,6 +1943,7 @@ var GeminiLiveAdapter = class {
|
|
|
1251
1943
|
this.running = false;
|
|
1252
1944
|
}
|
|
1253
1945
|
}
|
|
1946
|
+
/** Close the Gemini Live session and stop the receive loop. */
|
|
1254
1947
|
async close() {
|
|
1255
1948
|
this.running = false;
|
|
1256
1949
|
if (this.session) {
|
|
@@ -1271,6 +1964,7 @@ var GeminiLiveAdapter = class {
|
|
|
1271
1964
|
};
|
|
1272
1965
|
|
|
1273
1966
|
// src/providers/ultravox-realtime.ts
|
|
1967
|
+
init_esm_shims();
|
|
1274
1968
|
import WebSocket from "ws";
|
|
1275
1969
|
var ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
|
|
1276
1970
|
var ULTRAVOX_DEFAULT_SR = 16e3;
|
|
@@ -1286,6 +1980,7 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1286
1980
|
this.sampleRate = options.sampleRate ?? ULTRAVOX_DEFAULT_SR;
|
|
1287
1981
|
this.firstMessage = options.firstMessage ?? "";
|
|
1288
1982
|
}
|
|
1983
|
+
apiKey;
|
|
1289
1984
|
model;
|
|
1290
1985
|
voice;
|
|
1291
1986
|
instructions;
|
|
@@ -1298,6 +1993,7 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1298
1993
|
handlers = [];
|
|
1299
1994
|
/** Exposed for diagnostics — true while the underlying socket is open. */
|
|
1300
1995
|
running = false;
|
|
1996
|
+
/** Create the Ultravox call, fetch the joinUrl, and open the WebSocket. */
|
|
1301
1997
|
async connect() {
|
|
1302
1998
|
const body = {
|
|
1303
1999
|
model: this.model,
|
|
@@ -1367,14 +2063,17 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1367
2063
|
this.running = false;
|
|
1368
2064
|
});
|
|
1369
2065
|
}
|
|
2066
|
+
/** Send a binary PCM audio chunk to the Ultravox call. */
|
|
1370
2067
|
sendAudio(pcm) {
|
|
1371
2068
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1372
2069
|
this.ws.send(pcm, { binary: true });
|
|
1373
2070
|
}
|
|
2071
|
+
/** Inject a user text message into the Ultravox conversation. */
|
|
1374
2072
|
async sendText(text) {
|
|
1375
2073
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1376
2074
|
this.ws.send(JSON.stringify({ type: "input_text_message", text }));
|
|
1377
2075
|
}
|
|
2076
|
+
/** Send a tool/function-call result back to Ultravox. */
|
|
1378
2077
|
async sendFunctionResult(callId, result) {
|
|
1379
2078
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1380
2079
|
this.ws.send(
|
|
@@ -1386,10 +2085,12 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1386
2085
|
})
|
|
1387
2086
|
);
|
|
1388
2087
|
}
|
|
2088
|
+
/** Clear the playback buffer to interrupt the agent's current response. */
|
|
1389
2089
|
cancelResponse() {
|
|
1390
2090
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1391
2091
|
this.ws.send(JSON.stringify({ type: "playback_clear_buffer" }));
|
|
1392
2092
|
}
|
|
2093
|
+
/** Register an event handler that receives every Ultravox event. */
|
|
1393
2094
|
onEvent(handler) {
|
|
1394
2095
|
this.handlers.push(handler);
|
|
1395
2096
|
}
|
|
@@ -1436,6 +2137,7 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1436
2137
|
await this.emit("speech_started", null);
|
|
1437
2138
|
}
|
|
1438
2139
|
}
|
|
2140
|
+
/** Close the Ultravox WebSocket and mark the adapter idle. */
|
|
1439
2141
|
async close() {
|
|
1440
2142
|
this.running = false;
|
|
1441
2143
|
if (this.ws) {
|
|
@@ -1461,6 +2163,7 @@ function toolParamsToUltravox(parameters) {
|
|
|
1461
2163
|
}
|
|
1462
2164
|
|
|
1463
2165
|
// src/scheduler.ts
|
|
2166
|
+
init_esm_shims();
|
|
1464
2167
|
var cronModule = null;
|
|
1465
2168
|
var loadError = null;
|
|
1466
2169
|
async function loadCron() {
|
|
@@ -1469,7 +2172,7 @@ async function loadCron() {
|
|
|
1469
2172
|
try {
|
|
1470
2173
|
const imported = await import(
|
|
1471
2174
|
/* @vite-ignore */
|
|
1472
|
-
"./node-cron-
|
|
2175
|
+
"./node-cron-JFWQQRBU.mjs"
|
|
1473
2176
|
);
|
|
1474
2177
|
cronModule = imported && imported.default ? imported.default : imported;
|
|
1475
2178
|
return cronModule;
|
|
@@ -1576,6 +2279,7 @@ function scheduleInterval(intervalOrOpts, callback) {
|
|
|
1576
2279
|
}
|
|
1577
2280
|
|
|
1578
2281
|
// src/stt/deepgram.ts
|
|
2282
|
+
init_esm_shims();
|
|
1579
2283
|
var STT = class extends DeepgramSTT {
|
|
1580
2284
|
static providerKey = "deepgram";
|
|
1581
2285
|
constructor(opts = {}) {
|
|
@@ -1602,7 +2306,11 @@ var STT = class extends DeepgramSTT {
|
|
|
1602
2306
|
}
|
|
1603
2307
|
};
|
|
1604
2308
|
|
|
2309
|
+
// src/stt/whisper.ts
|
|
2310
|
+
init_esm_shims();
|
|
2311
|
+
|
|
1605
2312
|
// src/providers/whisper-stt.ts
|
|
2313
|
+
init_esm_shims();
|
|
1606
2314
|
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
1607
2315
|
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
1608
2316
|
var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
|
|
@@ -1666,11 +2374,13 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1666
2374
|
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
1667
2375
|
return new _WhisperSTT(apiKey, language, model);
|
|
1668
2376
|
}
|
|
2377
|
+
/** Reset the audio buffer and arm the adapter for incoming chunks. */
|
|
1669
2378
|
async connect() {
|
|
1670
2379
|
this.running = true;
|
|
1671
2380
|
this.chunks = [];
|
|
1672
2381
|
this.bufferedBytes = 0;
|
|
1673
2382
|
}
|
|
2383
|
+
/** Buffer a PCM16 chunk; flushes to Whisper once `bufferSize` bytes are reached. */
|
|
1674
2384
|
sendAudio(audio) {
|
|
1675
2385
|
if (!this.running) return;
|
|
1676
2386
|
this.chunks.push(audio);
|
|
@@ -1701,9 +2411,11 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1701
2411
|
onTranscript(callback) {
|
|
1702
2412
|
this.callbacks.add(callback);
|
|
1703
2413
|
}
|
|
2414
|
+
/** Remove a previously registered transcript listener. */
|
|
1704
2415
|
offTranscript(callback) {
|
|
1705
2416
|
this.callbacks.delete(callback);
|
|
1706
2417
|
}
|
|
2418
|
+
/** Flush any buffered audio, await pending transcriptions, and clear listeners. */
|
|
1707
2419
|
async close() {
|
|
1708
2420
|
this.running = false;
|
|
1709
2421
|
if (this.bufferedBytes > 0) {
|
|
@@ -1781,7 +2493,11 @@ var STT2 = class extends WhisperSTT {
|
|
|
1781
2493
|
}
|
|
1782
2494
|
};
|
|
1783
2495
|
|
|
2496
|
+
// src/stt/openai-transcribe.ts
|
|
2497
|
+
init_esm_shims();
|
|
2498
|
+
|
|
1784
2499
|
// src/providers/openai-transcribe-stt.ts
|
|
2500
|
+
init_esm_shims();
|
|
1785
2501
|
var ALLOWED_MODELS2 = /* @__PURE__ */ new Set(["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
|
|
1786
2502
|
var DEFAULT_BUFFER_SIZE2 = 16e3 * 2;
|
|
1787
2503
|
var OpenAITranscribeSTT = class extends WhisperSTT {
|
|
@@ -1817,11 +2533,37 @@ var STT3 = class extends OpenAITranscribeSTT {
|
|
|
1817
2533
|
}
|
|
1818
2534
|
};
|
|
1819
2535
|
|
|
2536
|
+
// src/stt/cartesia.ts
|
|
2537
|
+
init_esm_shims();
|
|
2538
|
+
|
|
1820
2539
|
// src/providers/cartesia-stt.ts
|
|
2540
|
+
init_esm_shims();
|
|
1821
2541
|
import WebSocket2 from "ws";
|
|
2542
|
+
var CartesiaSTTModel = {
|
|
2543
|
+
INK_WHISPER: "ink-whisper"
|
|
2544
|
+
};
|
|
2545
|
+
var CartesiaSTTEncoding = {
|
|
2546
|
+
PCM_S16LE: "pcm_s16le"
|
|
2547
|
+
};
|
|
2548
|
+
var CartesiaSTTSampleRate = {
|
|
2549
|
+
HZ_8000: 8e3,
|
|
2550
|
+
HZ_16000: 16e3,
|
|
2551
|
+
HZ_24000: 24e3,
|
|
2552
|
+
HZ_44100: 44100,
|
|
2553
|
+
HZ_48000: 48e3
|
|
2554
|
+
};
|
|
2555
|
+
var CartesiaSTTServerEvent = {
|
|
2556
|
+
TRANSCRIPT: "transcript",
|
|
2557
|
+
FLUSH_DONE: "flush_done",
|
|
2558
|
+
DONE: "done",
|
|
2559
|
+
ERROR: "error"
|
|
2560
|
+
};
|
|
2561
|
+
var CartesiaSTTClientFrame = {
|
|
2562
|
+
FINALIZE: "finalize"
|
|
2563
|
+
};
|
|
1822
2564
|
var DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
1823
2565
|
var API_VERSION = "2025-04-16";
|
|
1824
|
-
var USER_AGENT = "Patter/1.0
|
|
2566
|
+
var USER_AGENT = "Patter/1.0";
|
|
1825
2567
|
var KEEPALIVE_INTERVAL_MS = 3e4;
|
|
1826
2568
|
var CONNECT_TIMEOUT_MS = 1e4;
|
|
1827
2569
|
var CartesiaSTT = class {
|
|
@@ -1832,6 +2574,8 @@ var CartesiaSTT = class {
|
|
|
1832
2574
|
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
1833
2575
|
}
|
|
1834
2576
|
}
|
|
2577
|
+
apiKey;
|
|
2578
|
+
options;
|
|
1835
2579
|
ws = null;
|
|
1836
2580
|
callbacks = /* @__PURE__ */ new Set();
|
|
1837
2581
|
keepaliveTimer = null;
|
|
@@ -1855,15 +2599,16 @@ var CartesiaSTT = class {
|
|
|
1855
2599
|
}
|
|
1856
2600
|
const language = opts.language ?? "en";
|
|
1857
2601
|
const params = new URLSearchParams({
|
|
1858
|
-
model: opts.model ??
|
|
1859
|
-
sample_rate: String(opts.sampleRate ??
|
|
1860
|
-
encoding: opts.encoding ??
|
|
2602
|
+
model: opts.model ?? CartesiaSTTModel.INK_WHISPER,
|
|
2603
|
+
sample_rate: String(opts.sampleRate ?? CartesiaSTTSampleRate.HZ_16000),
|
|
2604
|
+
encoding: opts.encoding ?? CartesiaSTTEncoding.PCM_S16LE,
|
|
1861
2605
|
cartesia_version: API_VERSION,
|
|
1862
2606
|
api_key: this.apiKey,
|
|
1863
2607
|
language
|
|
1864
2608
|
});
|
|
1865
2609
|
return `${base}/stt/websocket?${params.toString()}`;
|
|
1866
2610
|
}
|
|
2611
|
+
/** Open the streaming WebSocket and arm message + keepalive handlers. */
|
|
1867
2612
|
async connect() {
|
|
1868
2613
|
const url = this.buildWsUrl();
|
|
1869
2614
|
this.ws = new WebSocket2(url, {
|
|
@@ -1903,7 +2648,7 @@ var CartesiaSTT = class {
|
|
|
1903
2648
|
}
|
|
1904
2649
|
handleEvent(event) {
|
|
1905
2650
|
const type = event.type;
|
|
1906
|
-
if (type ===
|
|
2651
|
+
if (type === CartesiaSTTServerEvent.TRANSCRIPT) {
|
|
1907
2652
|
const text = (event.text ?? "").trim();
|
|
1908
2653
|
const isFinal = Boolean(event.is_final);
|
|
1909
2654
|
if (!text && !isFinal) return;
|
|
@@ -1915,7 +2660,7 @@ var CartesiaSTT = class {
|
|
|
1915
2660
|
this.emit({ text, isFinal, confidence });
|
|
1916
2661
|
return;
|
|
1917
2662
|
}
|
|
1918
|
-
if (type ===
|
|
2663
|
+
if (type === CartesiaSTTServerEvent.ERROR) {
|
|
1919
2664
|
getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
|
|
1920
2665
|
return;
|
|
1921
2666
|
}
|
|
@@ -1925,10 +2670,12 @@ var CartesiaSTT = class {
|
|
|
1925
2670
|
cb(transcript);
|
|
1926
2671
|
}
|
|
1927
2672
|
}
|
|
2673
|
+
/** Send a binary PCM16-LE audio chunk to Cartesia for transcription. */
|
|
1928
2674
|
sendAudio(audio) {
|
|
1929
2675
|
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
|
|
1930
2676
|
this.ws.send(audio);
|
|
1931
2677
|
}
|
|
2678
|
+
/** Register a transcript listener. */
|
|
1932
2679
|
onTranscript(callback) {
|
|
1933
2680
|
this.callbacks.add(callback);
|
|
1934
2681
|
}
|
|
@@ -1951,7 +2698,7 @@ var CartesiaSTT = class {
|
|
|
1951
2698
|
}
|
|
1952
2699
|
if (this.ws) {
|
|
1953
2700
|
try {
|
|
1954
|
-
this.ws.send(
|
|
2701
|
+
this.ws.send(CartesiaSTTClientFrame.FINALIZE);
|
|
1955
2702
|
} catch {
|
|
1956
2703
|
}
|
|
1957
2704
|
this.ws.close();
|
|
@@ -1974,7 +2721,7 @@ var CartesiaSTT = class {
|
|
|
1974
2721
|
if (ws.readyState === WebSocket2.OPEN) {
|
|
1975
2722
|
try {
|
|
1976
2723
|
await new Promise((resolve) => {
|
|
1977
|
-
ws.send(
|
|
2724
|
+
ws.send(CartesiaSTTClientFrame.FINALIZE, (err) => {
|
|
1978
2725
|
if (err) getLogger().warn(`CartesiaSTT finalize send failed: ${String(err)}`);
|
|
1979
2726
|
resolve();
|
|
1980
2727
|
});
|
|
@@ -2022,12 +2769,33 @@ var STT4 = class extends CartesiaSTT {
|
|
|
2022
2769
|
}
|
|
2023
2770
|
};
|
|
2024
2771
|
|
|
2772
|
+
// src/stt/soniox.ts
|
|
2773
|
+
init_esm_shims();
|
|
2774
|
+
|
|
2025
2775
|
// src/providers/soniox-stt.ts
|
|
2776
|
+
init_esm_shims();
|
|
2026
2777
|
import WebSocket3 from "ws";
|
|
2027
2778
|
var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
2028
|
-
var
|
|
2029
|
-
|
|
2030
|
-
|
|
2779
|
+
var SonioxModel = {
|
|
2780
|
+
STT_RT_V4: "stt-rt-v4",
|
|
2781
|
+
STT_RT_V3: "stt-rt-v3",
|
|
2782
|
+
STT_RT_V2: "stt-rt-v2"
|
|
2783
|
+
};
|
|
2784
|
+
var SonioxSampleRate = {
|
|
2785
|
+
HZ_8000: 8e3,
|
|
2786
|
+
HZ_16000: 16e3,
|
|
2787
|
+
HZ_24000: 24e3
|
|
2788
|
+
};
|
|
2789
|
+
var SonioxClientFrame = {
|
|
2790
|
+
KEEPALIVE: "keepalive"
|
|
2791
|
+
};
|
|
2792
|
+
var SonioxEndpointToken = {
|
|
2793
|
+
END: "<end>",
|
|
2794
|
+
FIN: "<fin>"
|
|
2795
|
+
};
|
|
2796
|
+
var KEEPALIVE_MESSAGE = JSON.stringify({ type: SonioxClientFrame.KEEPALIVE });
|
|
2797
|
+
var END_TOKEN = SonioxEndpointToken.END;
|
|
2798
|
+
var FINALIZED_TOKEN = SonioxEndpointToken.FIN;
|
|
2031
2799
|
var KEEPALIVE_INTERVAL_MS2 = 5e3;
|
|
2032
2800
|
function isEndToken(token) {
|
|
2033
2801
|
return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
|
|
@@ -2082,10 +2850,10 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2082
2850
|
throw new Error("maxEndpointDelayMs must be between 500 and 3000");
|
|
2083
2851
|
}
|
|
2084
2852
|
this.apiKey = apiKey;
|
|
2085
|
-
this.model = options.model ??
|
|
2853
|
+
this.model = options.model ?? SonioxModel.STT_RT_V4;
|
|
2086
2854
|
this.languageHints = options.languageHints;
|
|
2087
2855
|
this.languageHintsStrict = options.languageHintsStrict ?? false;
|
|
2088
|
-
this.sampleRate = options.sampleRate ??
|
|
2856
|
+
this.sampleRate = options.sampleRate ?? SonioxSampleRate.HZ_16000;
|
|
2089
2857
|
this.numChannels = options.numChannels ?? 1;
|
|
2090
2858
|
this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
|
|
2091
2859
|
this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
|
|
@@ -2095,7 +2863,10 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2095
2863
|
}
|
|
2096
2864
|
/** Factory for Twilio-style 8 kHz linear PCM. */
|
|
2097
2865
|
static forTwilio(apiKey, languageHints) {
|
|
2098
|
-
return new _SonioxSTT(apiKey, {
|
|
2866
|
+
return new _SonioxSTT(apiKey, {
|
|
2867
|
+
sampleRate: SonioxSampleRate.HZ_8000,
|
|
2868
|
+
languageHints
|
|
2869
|
+
});
|
|
2099
2870
|
}
|
|
2100
2871
|
buildConfig() {
|
|
2101
2872
|
const config = {
|
|
@@ -2118,6 +2889,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2118
2889
|
}
|
|
2119
2890
|
return config;
|
|
2120
2891
|
}
|
|
2892
|
+
/** Open the streaming WebSocket and send the initial config payload. */
|
|
2121
2893
|
async connect() {
|
|
2122
2894
|
this.final.reset();
|
|
2123
2895
|
this.ws = new WebSocket3(this.baseUrl);
|
|
@@ -2211,11 +2983,13 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2211
2983
|
cb(transcript);
|
|
2212
2984
|
}
|
|
2213
2985
|
}
|
|
2986
|
+
/** Send a binary PCM16-LE audio chunk to Soniox for transcription. */
|
|
2214
2987
|
sendAudio(audio) {
|
|
2215
2988
|
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
2216
2989
|
if (audio.length === 0) return;
|
|
2217
2990
|
this.ws.send(audio);
|
|
2218
2991
|
}
|
|
2992
|
+
/** Register a transcript listener (max 10 concurrent listeners). */
|
|
2219
2993
|
onTranscript(callback) {
|
|
2220
2994
|
if (this.callbacks.length >= 10) {
|
|
2221
2995
|
getLogger().warn(
|
|
@@ -2226,6 +3000,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2226
3000
|
}
|
|
2227
3001
|
this.callbacks.push(callback);
|
|
2228
3002
|
}
|
|
3003
|
+
/** Send the empty-frame stream terminator and close the WebSocket. */
|
|
2229
3004
|
close() {
|
|
2230
3005
|
this.clearKeepalive();
|
|
2231
3006
|
if (this.ws) {
|
|
@@ -2258,8 +3033,41 @@ var STT5 = class extends SonioxSTT {
|
|
|
2258
3033
|
}
|
|
2259
3034
|
};
|
|
2260
3035
|
|
|
3036
|
+
// src/stt/assemblyai.ts
|
|
3037
|
+
init_esm_shims();
|
|
3038
|
+
|
|
2261
3039
|
// src/providers/assemblyai-stt.ts
|
|
3040
|
+
init_esm_shims();
|
|
2262
3041
|
import WebSocket4 from "ws";
|
|
3042
|
+
var AssemblyAIEncoding = {
|
|
3043
|
+
PCM_S16LE: "pcm_s16le",
|
|
3044
|
+
PCM_MULAW: "pcm_mulaw"
|
|
3045
|
+
};
|
|
3046
|
+
var AssemblyAIModel = {
|
|
3047
|
+
UNIVERSAL_STREAMING_ENGLISH: "universal-streaming-english",
|
|
3048
|
+
UNIVERSAL_STREAMING_MULTILINGUAL: "universal-streaming-multilingual",
|
|
3049
|
+
U3_RT_PRO: "u3-rt-pro",
|
|
3050
|
+
WHISPER_RT: "whisper-rt"
|
|
3051
|
+
};
|
|
3052
|
+
var AssemblyAIDomain = {
|
|
3053
|
+
GENERAL: "general",
|
|
3054
|
+
MEDICAL_V1: "medical-v1"
|
|
3055
|
+
};
|
|
3056
|
+
var AssemblyAISampleRate = {
|
|
3057
|
+
HZ_8000: 8e3,
|
|
3058
|
+
HZ_16000: 16e3
|
|
3059
|
+
};
|
|
3060
|
+
var AssemblyAIEventType = {
|
|
3061
|
+
BEGIN: "Begin",
|
|
3062
|
+
TURN: "Turn",
|
|
3063
|
+
SPEECH_STARTED: "SpeechStarted",
|
|
3064
|
+
TERMINATION: "Termination"
|
|
3065
|
+
};
|
|
3066
|
+
var AssemblyAIClientFrame = {
|
|
3067
|
+
UPDATE_CONFIGURATION: "UpdateConfiguration",
|
|
3068
|
+
FORCE_ENDPOINT: "ForceEndpoint",
|
|
3069
|
+
TERMINATE: "Terminate"
|
|
3070
|
+
};
|
|
2263
3071
|
var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
|
|
2264
3072
|
var DEFAULT_MIN_TURN_SILENCE_MS = 400;
|
|
2265
3073
|
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
@@ -2267,7 +3075,10 @@ var TERMINATION_WAIT_TIMEOUT_MS = 500;
|
|
|
2267
3075
|
var MIN_CHUNK_DURATION_MS = 50;
|
|
2268
3076
|
var MAX_CHUNK_DURATION_MS = 1e3;
|
|
2269
3077
|
var RECONNECT_ERROR_CODES = /* @__PURE__ */ new Set([3005, 3008]);
|
|
2270
|
-
var VALID_DOMAINS = /* @__PURE__ */ new Set([
|
|
3078
|
+
var VALID_DOMAINS = /* @__PURE__ */ new Set([
|
|
3079
|
+
AssemblyAIDomain.GENERAL,
|
|
3080
|
+
AssemblyAIDomain.MEDICAL_V1
|
|
3081
|
+
]);
|
|
2271
3082
|
var AssemblyAISTTNotConnectedError = class extends Error {
|
|
2272
3083
|
constructor(message = "AssemblyAISTT is not connected") {
|
|
2273
3084
|
super(message);
|
|
@@ -2290,38 +3101,56 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2290
3101
|
);
|
|
2291
3102
|
}
|
|
2292
3103
|
}
|
|
3104
|
+
apiKey;
|
|
3105
|
+
options;
|
|
2293
3106
|
ws = null;
|
|
2294
3107
|
callbacks = /* @__PURE__ */ new Set();
|
|
2295
3108
|
closing = false;
|
|
2296
3109
|
reconnectAttempts = 0;
|
|
2297
3110
|
terminationResolve = null;
|
|
2298
|
-
/**
|
|
3111
|
+
/**
|
|
3112
|
+
* Coalescing buffer for inbound audio frames. AssemblyAI's v3
|
|
3113
|
+
* streaming endpoint requires each ws frame to carry 50–1000 ms of
|
|
3114
|
+
* audio (server emits error 3007 below 50 ms — observed in the
|
|
3115
|
+
* field as a fully-billed call with zero transcripts). Twilio sends
|
|
3116
|
+
* 20 ms frames, so the SDK must batch ~3 frames before forwarding.
|
|
3117
|
+
*
|
|
3118
|
+
* We accumulate raw bytes here until the cumulative duration crosses
|
|
3119
|
+
* the configured target (default 60 ms — comfortably above the 50 ms
|
|
3120
|
+
* floor with one frame of headroom against jitter), then flush in a
|
|
3121
|
+
* single `ws.send()`.
|
|
3122
|
+
*/
|
|
3123
|
+
chunkBuffer = [];
|
|
3124
|
+
chunkBufferBytes = 0;
|
|
3125
|
+
/** Target send size in bytes — recomputed lazily once encoding/sample-rate is known. */
|
|
3126
|
+
chunkBufferTargetBytes = 0;
|
|
3127
|
+
/** AssemblyAI session id — set when the `Begin` message arrives. */
|
|
2299
3128
|
sessionId = null;
|
|
2300
3129
|
/** Unix timestamp when the AssemblyAI session expires. */
|
|
2301
3130
|
expiresAt = null;
|
|
2302
3131
|
/** Factory for Twilio calls — mulaw 8 kHz. */
|
|
2303
|
-
static forTwilio(apiKey, model =
|
|
3132
|
+
static forTwilio(apiKey, model = AssemblyAIModel.UNIVERSAL_STREAMING_ENGLISH) {
|
|
2304
3133
|
return new _AssemblyAISTT(apiKey, {
|
|
2305
3134
|
model,
|
|
2306
|
-
encoding:
|
|
2307
|
-
sampleRate:
|
|
3135
|
+
encoding: AssemblyAIEncoding.PCM_MULAW,
|
|
3136
|
+
sampleRate: AssemblyAISampleRate.HZ_8000
|
|
2308
3137
|
});
|
|
2309
3138
|
}
|
|
2310
3139
|
buildUrl() {
|
|
2311
3140
|
const opts = this.options;
|
|
2312
|
-
const model = opts.model ??
|
|
2313
|
-
const encoding = opts.encoding ??
|
|
2314
|
-
const sampleRate = opts.sampleRate ??
|
|
3141
|
+
const model = opts.model ?? AssemblyAIModel.UNIVERSAL_STREAMING_ENGLISH;
|
|
3142
|
+
const encoding = opts.encoding ?? AssemblyAIEncoding.PCM_S16LE;
|
|
3143
|
+
const sampleRate = opts.sampleRate ?? AssemblyAISampleRate.HZ_16000;
|
|
2315
3144
|
let minSilence;
|
|
2316
3145
|
let maxSilence;
|
|
2317
|
-
if (model ===
|
|
3146
|
+
if (model === AssemblyAIModel.U3_RT_PRO) {
|
|
2318
3147
|
minSilence = opts.minTurnSilence ?? 100;
|
|
2319
3148
|
maxSilence = opts.maxTurnSilence ?? minSilence;
|
|
2320
3149
|
} else {
|
|
2321
3150
|
minSilence = opts.minTurnSilence ?? DEFAULT_MIN_TURN_SILENCE_MS;
|
|
2322
3151
|
maxSilence = opts.maxTurnSilence;
|
|
2323
3152
|
}
|
|
2324
|
-
const languageDetection = opts.languageDetection ?? (model.includes("multilingual") || model ===
|
|
3153
|
+
const languageDetection = opts.languageDetection ?? (model.includes("multilingual") || model === AssemblyAIModel.U3_RT_PRO);
|
|
2325
3154
|
const raw = {
|
|
2326
3155
|
sample_rate: sampleRate,
|
|
2327
3156
|
encoding,
|
|
@@ -2363,6 +3192,7 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2363
3192
|
}
|
|
2364
3193
|
return headers;
|
|
2365
3194
|
}
|
|
3195
|
+
/** Open the streaming WebSocket and arm message handlers. */
|
|
2366
3196
|
async connect() {
|
|
2367
3197
|
this.closing = false;
|
|
2368
3198
|
const url = this.buildUrl();
|
|
@@ -2416,28 +3246,28 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2416
3246
|
}
|
|
2417
3247
|
handleEvent(event) {
|
|
2418
3248
|
const type = event.type;
|
|
2419
|
-
if (type ===
|
|
3249
|
+
if (type === AssemblyAIEventType.BEGIN) {
|
|
2420
3250
|
this.sessionId = event.id ?? null;
|
|
2421
3251
|
this.expiresAt = event.expires_at ?? null;
|
|
2422
3252
|
return;
|
|
2423
3253
|
}
|
|
2424
|
-
if (type ===
|
|
3254
|
+
if (type === AssemblyAIEventType.TERMINATION) {
|
|
2425
3255
|
if (this.terminationResolve) {
|
|
2426
3256
|
this.terminationResolve();
|
|
2427
3257
|
this.terminationResolve = null;
|
|
2428
3258
|
}
|
|
2429
3259
|
return;
|
|
2430
3260
|
}
|
|
2431
|
-
if (type ===
|
|
3261
|
+
if (type === AssemblyAIEventType.SPEECH_STARTED) {
|
|
2432
3262
|
this.emit({
|
|
2433
3263
|
text: "",
|
|
2434
3264
|
isFinal: false,
|
|
2435
3265
|
confidence: 0,
|
|
2436
|
-
eventType:
|
|
3266
|
+
eventType: AssemblyAIEventType.SPEECH_STARTED
|
|
2437
3267
|
});
|
|
2438
3268
|
return;
|
|
2439
3269
|
}
|
|
2440
|
-
if (type !==
|
|
3270
|
+
if (type !== AssemblyAIEventType.TURN) {
|
|
2441
3271
|
return;
|
|
2442
3272
|
}
|
|
2443
3273
|
const endOfTurn = Boolean(event.end_of_turn);
|
|
@@ -2468,25 +3298,49 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2468
3298
|
cb(transcript);
|
|
2469
3299
|
}
|
|
2470
3300
|
}
|
|
3301
|
+
/** Send a binary PCM/mu-law audio chunk to AssemblyAI for transcription. */
|
|
2471
3302
|
sendAudio(audio) {
|
|
2472
3303
|
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
|
|
2473
|
-
|
|
2474
|
-
"AssemblyAISTT.sendAudio: WebSocket is not open"
|
|
2475
|
-
);
|
|
3304
|
+
return;
|
|
2476
3305
|
}
|
|
2477
|
-
|
|
3306
|
+
if (this.chunkBufferTargetBytes === 0) {
|
|
3307
|
+
this.chunkBufferTargetBytes = this.computeTargetChunkBytes();
|
|
3308
|
+
}
|
|
3309
|
+
this.chunkBuffer.push(audio);
|
|
3310
|
+
this.chunkBufferBytes += audio.length;
|
|
3311
|
+
if (this.chunkBufferBytes < this.chunkBufferTargetBytes) {
|
|
3312
|
+
return;
|
|
3313
|
+
}
|
|
3314
|
+
const merged = Buffer.concat(this.chunkBuffer, this.chunkBufferBytes);
|
|
3315
|
+
this.chunkBuffer = [];
|
|
3316
|
+
this.chunkBufferBytes = 0;
|
|
3317
|
+
const durationMs = this.estimateChunkDurationMs(merged.length);
|
|
2478
3318
|
if (durationMs !== null && (durationMs < MIN_CHUNK_DURATION_MS || durationMs > MAX_CHUNK_DURATION_MS)) {
|
|
2479
3319
|
getLogger().warn(
|
|
2480
3320
|
`AssemblyAISTT: audio chunk duration ${durationMs.toFixed(1)}ms outside 50-1000ms bounds (may trigger error 3007).`
|
|
2481
3321
|
);
|
|
2482
3322
|
}
|
|
2483
|
-
this.ws.send(
|
|
3323
|
+
this.ws.send(merged);
|
|
3324
|
+
}
|
|
3325
|
+
/**
|
|
3326
|
+
* Compute the byte count corresponding to ~60 ms of audio for the
|
|
3327
|
+
* configured encoding / sample rate. Sits one Twilio frame (20 ms)
|
|
3328
|
+
* above AssemblyAI's 50 ms floor so jitter never dips below.
|
|
3329
|
+
*/
|
|
3330
|
+
computeTargetChunkBytes() {
|
|
3331
|
+
const targetMs = 60;
|
|
3332
|
+
const encoding = this.options.encoding ?? AssemblyAIEncoding.PCM_S16LE;
|
|
3333
|
+
const sampleRate = this.options.sampleRate ?? AssemblyAISampleRate.HZ_16000;
|
|
3334
|
+
if (encoding === AssemblyAIEncoding.PCM_MULAW) {
|
|
3335
|
+
return Math.ceil(sampleRate * targetMs / 1e3);
|
|
3336
|
+
}
|
|
3337
|
+
return Math.ceil(sampleRate * targetMs / 1e3) * 2;
|
|
2484
3338
|
}
|
|
2485
3339
|
estimateChunkDurationMs(byteLength) {
|
|
2486
3340
|
if (byteLength <= 0) return null;
|
|
2487
|
-
const sampleRate = this.options.sampleRate ??
|
|
3341
|
+
const sampleRate = this.options.sampleRate ?? AssemblyAISampleRate.HZ_16000;
|
|
2488
3342
|
if (sampleRate <= 0) return null;
|
|
2489
|
-
const bytesPerSample = (this.options.encoding ??
|
|
3343
|
+
const bytesPerSample = (this.options.encoding ?? AssemblyAIEncoding.PCM_S16LE) === AssemblyAIEncoding.PCM_S16LE ? 2 : 1;
|
|
2490
3344
|
const samples = byteLength / bytesPerSample;
|
|
2491
3345
|
return samples / sampleRate * 1e3;
|
|
2492
3346
|
}
|
|
@@ -2500,7 +3354,9 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2500
3354
|
"AssemblyAISTT.updateConfiguration: WebSocket is not open"
|
|
2501
3355
|
);
|
|
2502
3356
|
}
|
|
2503
|
-
const payload = {
|
|
3357
|
+
const payload = {
|
|
3358
|
+
type: AssemblyAIClientFrame.UPDATE_CONFIGURATION
|
|
3359
|
+
};
|
|
2504
3360
|
if (params.keytermsPrompt !== void 0) {
|
|
2505
3361
|
payload.keyterms_prompt = JSON.stringify(params.keytermsPrompt);
|
|
2506
3362
|
}
|
|
@@ -2522,19 +3378,21 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2522
3378
|
"AssemblyAISTT.forceEndpoint: WebSocket is not open"
|
|
2523
3379
|
);
|
|
2524
3380
|
}
|
|
2525
|
-
this.ws.send(JSON.stringify({ type:
|
|
3381
|
+
this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.FORCE_ENDPOINT }));
|
|
2526
3382
|
}
|
|
3383
|
+
/** Register a transcript listener. Returns an unsubscribe function. */
|
|
2527
3384
|
onTranscript(callback) {
|
|
2528
3385
|
this.callbacks.add(callback);
|
|
2529
3386
|
return () => {
|
|
2530
3387
|
this.callbacks.delete(callback);
|
|
2531
3388
|
};
|
|
2532
3389
|
}
|
|
3390
|
+
/** Send a Terminate frame, wait briefly for ack, and close the socket. */
|
|
2533
3391
|
async close() {
|
|
2534
3392
|
this.closing = true;
|
|
2535
3393
|
if (!this.ws) return;
|
|
2536
3394
|
try {
|
|
2537
|
-
this.ws.send(JSON.stringify({ type:
|
|
3395
|
+
this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
|
|
2538
3396
|
} catch {
|
|
2539
3397
|
}
|
|
2540
3398
|
await new Promise((resolve) => {
|
|
@@ -2573,13 +3431,361 @@ var STT6 = class extends AssemblyAISTT {
|
|
|
2573
3431
|
"AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
|
|
2574
3432
|
);
|
|
2575
3433
|
}
|
|
2576
|
-
const { apiKey: _ignored, ...rest } = opts;
|
|
3434
|
+
const { apiKey: _ignored, language: _lang, ...rest } = opts;
|
|
2577
3435
|
void _ignored;
|
|
3436
|
+
void _lang;
|
|
2578
3437
|
super(key, rest);
|
|
2579
3438
|
}
|
|
2580
3439
|
};
|
|
2581
3440
|
|
|
3441
|
+
// src/stt/speechmatics.ts
|
|
3442
|
+
init_esm_shims();
|
|
3443
|
+
|
|
3444
|
+
// src/providers/speechmatics-stt.ts
|
|
3445
|
+
init_esm_shims();
|
|
3446
|
+
import WebSocket5 from "ws";
|
|
3447
|
+
var SPEECHMATICS_RT_URL = "wss://eu.rt.speechmatics.com/v2";
|
|
3448
|
+
var CONNECT_TIMEOUT_MS3 = 1e4;
|
|
3449
|
+
var TurnDetectionMode = {
|
|
3450
|
+
EXTERNAL: "external",
|
|
3451
|
+
FIXED: "fixed",
|
|
3452
|
+
ADAPTIVE: "adaptive",
|
|
3453
|
+
SMART_TURN: "smart_turn"
|
|
3454
|
+
};
|
|
3455
|
+
var SpeechmaticsSampleRate = {
|
|
3456
|
+
HZ_8000: 8e3,
|
|
3457
|
+
HZ_16000: 16e3,
|
|
3458
|
+
HZ_44100: 44100
|
|
3459
|
+
};
|
|
3460
|
+
var SpeechmaticsAudioEncoding = {
|
|
3461
|
+
PCM_S16LE: "pcm_s16le"
|
|
3462
|
+
};
|
|
3463
|
+
var SpeechmaticsOperatingPoint = {
|
|
3464
|
+
ENHANCED: "enhanced",
|
|
3465
|
+
STANDARD: "standard"
|
|
3466
|
+
};
|
|
3467
|
+
var SpeechmaticsServerMessage = {
|
|
3468
|
+
RECOGNITION_STARTED: "RecognitionStarted",
|
|
3469
|
+
ADD_PARTIAL_TRANSCRIPT: "AddPartialTranscript",
|
|
3470
|
+
ADD_TRANSCRIPT: "AddTranscript",
|
|
3471
|
+
END_OF_UTTERANCE: "EndOfUtterance",
|
|
3472
|
+
END_OF_TRANSCRIPT: "EndOfTranscript",
|
|
3473
|
+
AUDIO_ADDED: "AudioAdded",
|
|
3474
|
+
INFO: "Info",
|
|
3475
|
+
WARNING: "Warning",
|
|
3476
|
+
ERROR: "Error"
|
|
3477
|
+
};
|
|
3478
|
+
var SpeechmaticsSTT = class {
|
|
3479
|
+
ws = null;
|
|
3480
|
+
transcriptCallbacks = /* @__PURE__ */ new Set();
|
|
3481
|
+
errorCallbacks = /* @__PURE__ */ new Set();
|
|
3482
|
+
running = false;
|
|
3483
|
+
/** Sequence number of the last audio chunk acknowledged via `AudioAdded`. */
|
|
3484
|
+
lastSeqNo = 0;
|
|
3485
|
+
apiKey;
|
|
3486
|
+
baseUrl;
|
|
3487
|
+
language;
|
|
3488
|
+
turnDetectionMode;
|
|
3489
|
+
sampleRate;
|
|
3490
|
+
enableDiarization;
|
|
3491
|
+
maxDelay;
|
|
3492
|
+
endOfUtteranceSilenceTrigger;
|
|
3493
|
+
endOfUtteranceMaxDelay;
|
|
3494
|
+
includePartials;
|
|
3495
|
+
additionalVocab;
|
|
3496
|
+
operatingPoint;
|
|
3497
|
+
domain;
|
|
3498
|
+
outputLocale;
|
|
3499
|
+
constructor(apiKey, options = {}) {
|
|
3500
|
+
if (!apiKey) {
|
|
3501
|
+
throw new Error("Speechmatics apiKey is required");
|
|
3502
|
+
}
|
|
3503
|
+
const eouSilence = options.endOfUtteranceSilenceTrigger;
|
|
3504
|
+
const eouMax = options.endOfUtteranceMaxDelay;
|
|
3505
|
+
const maxDelay = options.maxDelay;
|
|
3506
|
+
if (eouSilence !== void 0 && !(eouSilence > 0 && eouSilence < 2)) {
|
|
3507
|
+
throw new Error("endOfUtteranceSilenceTrigger must be between 0 and 2");
|
|
3508
|
+
}
|
|
3509
|
+
if (eouMax !== void 0 && eouSilence !== void 0 && eouMax <= eouSilence) {
|
|
3510
|
+
throw new Error(
|
|
3511
|
+
"endOfUtteranceMaxDelay must be greater than endOfUtteranceSilenceTrigger"
|
|
3512
|
+
);
|
|
3513
|
+
}
|
|
3514
|
+
if (maxDelay !== void 0 && !(maxDelay >= 0.7 && maxDelay <= 4)) {
|
|
3515
|
+
throw new Error("maxDelay must be between 0.7 and 4.0");
|
|
3516
|
+
}
|
|
3517
|
+
this.apiKey = apiKey;
|
|
3518
|
+
this.baseUrl = options.baseUrl ?? SPEECHMATICS_RT_URL;
|
|
3519
|
+
this.language = options.language ?? "en";
|
|
3520
|
+
this.turnDetectionMode = options.turnDetectionMode ?? TurnDetectionMode.ADAPTIVE;
|
|
3521
|
+
this.sampleRate = options.sampleRate ?? SpeechmaticsSampleRate.HZ_16000;
|
|
3522
|
+
this.enableDiarization = options.enableDiarization ?? false;
|
|
3523
|
+
this.maxDelay = maxDelay;
|
|
3524
|
+
this.endOfUtteranceSilenceTrigger = eouSilence;
|
|
3525
|
+
this.endOfUtteranceMaxDelay = eouMax;
|
|
3526
|
+
this.includePartials = options.includePartials ?? true;
|
|
3527
|
+
this.additionalVocab = options.additionalVocab ?? [];
|
|
3528
|
+
this.operatingPoint = options.operatingPoint;
|
|
3529
|
+
this.domain = options.domain;
|
|
3530
|
+
this.outputLocale = options.outputLocale;
|
|
3531
|
+
}
|
|
3532
|
+
/** Build the JSON `StartRecognition` payload sent on connect. */
|
|
3533
|
+
buildStartRecognition() {
|
|
3534
|
+
const transcriptionConfig = {
|
|
3535
|
+
language: this.language,
|
|
3536
|
+
enable_partials: this.includePartials,
|
|
3537
|
+
diarization: this.enableDiarization ? "speaker" : "none"
|
|
3538
|
+
};
|
|
3539
|
+
if (this.maxDelay !== void 0) transcriptionConfig.max_delay = this.maxDelay;
|
|
3540
|
+
if (this.operatingPoint !== void 0) {
|
|
3541
|
+
transcriptionConfig.operating_point = this.operatingPoint;
|
|
3542
|
+
}
|
|
3543
|
+
if (this.domain !== void 0) transcriptionConfig.domain = this.domain;
|
|
3544
|
+
if (this.outputLocale !== void 0) {
|
|
3545
|
+
transcriptionConfig.output_locale = this.outputLocale;
|
|
3546
|
+
}
|
|
3547
|
+
if (this.additionalVocab.length > 0) {
|
|
3548
|
+
transcriptionConfig.additional_vocab = [...this.additionalVocab];
|
|
3549
|
+
}
|
|
3550
|
+
const conversationConfig = {
|
|
3551
|
+
end_of_utterance_mode: this.turnDetectionMode
|
|
3552
|
+
};
|
|
3553
|
+
if (this.endOfUtteranceSilenceTrigger !== void 0) {
|
|
3554
|
+
conversationConfig.end_of_utterance_silence_trigger = this.endOfUtteranceSilenceTrigger;
|
|
3555
|
+
}
|
|
3556
|
+
if (this.endOfUtteranceMaxDelay !== void 0) {
|
|
3557
|
+
conversationConfig.end_of_utterance_max_delay = this.endOfUtteranceMaxDelay;
|
|
3558
|
+
}
|
|
3559
|
+
transcriptionConfig.conversation_config = conversationConfig;
|
|
3560
|
+
return {
|
|
3561
|
+
message: "StartRecognition",
|
|
3562
|
+
audio_format: {
|
|
3563
|
+
type: "raw",
|
|
3564
|
+
encoding: SpeechmaticsAudioEncoding.PCM_S16LE,
|
|
3565
|
+
sample_rate: this.sampleRate
|
|
3566
|
+
},
|
|
3567
|
+
transcription_config: transcriptionConfig
|
|
3568
|
+
};
|
|
3569
|
+
}
|
|
3570
|
+
/** Open the streaming WebSocket and send the `StartRecognition` frame. */
|
|
3571
|
+
async connect() {
|
|
3572
|
+
if (this.ws !== null) return;
|
|
3573
|
+
const ws = new WebSocket5(this.baseUrl, {
|
|
3574
|
+
headers: { Authorization: `Bearer ${this.apiKey}` }
|
|
3575
|
+
});
|
|
3576
|
+
this.ws = ws;
|
|
3577
|
+
await new Promise((resolve, reject) => {
|
|
3578
|
+
let settled = false;
|
|
3579
|
+
const settle = (fn) => {
|
|
3580
|
+
if (settled) return;
|
|
3581
|
+
settled = true;
|
|
3582
|
+
clearTimeout(timer);
|
|
3583
|
+
fn();
|
|
3584
|
+
};
|
|
3585
|
+
const timer = setTimeout(
|
|
3586
|
+
() => settle(
|
|
3587
|
+
() => reject(new PatterConnectionError("Speechmatics connect timeout"))
|
|
3588
|
+
),
|
|
3589
|
+
CONNECT_TIMEOUT_MS3
|
|
3590
|
+
);
|
|
3591
|
+
ws.once("open", () => settle(resolve));
|
|
3592
|
+
ws.once("error", (err) => settle(() => reject(err)));
|
|
3593
|
+
ws.once("unexpected-response", (_req, res) => {
|
|
3594
|
+
const status = res?.statusCode ?? 0;
|
|
3595
|
+
settle(() => {
|
|
3596
|
+
if (status === 401 || status === 403) {
|
|
3597
|
+
reject(
|
|
3598
|
+
new AuthenticationError(
|
|
3599
|
+
`Speechmatics rejected the API key (HTTP ${status}).`
|
|
3600
|
+
)
|
|
3601
|
+
);
|
|
3602
|
+
return;
|
|
3603
|
+
}
|
|
3604
|
+
if (status === 429) {
|
|
3605
|
+
reject(
|
|
3606
|
+
new RateLimitError("Speechmatics rate limit exceeded (HTTP 429).")
|
|
3607
|
+
);
|
|
3608
|
+
return;
|
|
3609
|
+
}
|
|
3610
|
+
reject(
|
|
3611
|
+
new PatterConnectionError(
|
|
3612
|
+
`Speechmatics WebSocket upgrade failed (HTTP ${status}).`
|
|
3613
|
+
)
|
|
3614
|
+
);
|
|
3615
|
+
});
|
|
3616
|
+
});
|
|
3617
|
+
});
|
|
3618
|
+
ws.on("message", (raw) => this.handleMessage(raw.toString()));
|
|
3619
|
+
ws.on("close", () => this.handleClose());
|
|
3620
|
+
ws.on("error", (err) => this.handleError(err));
|
|
3621
|
+
try {
|
|
3622
|
+
ws.send(JSON.stringify(this.buildStartRecognition()));
|
|
3623
|
+
} catch (err) {
|
|
3624
|
+
throw new PatterConnectionError(
|
|
3625
|
+
`Speechmatics StartRecognition send failed: ${String(err)}`
|
|
3626
|
+
);
|
|
3627
|
+
}
|
|
3628
|
+
this.running = true;
|
|
3629
|
+
}
|
|
3630
|
+
/** Send a binary PCM16-LE audio chunk to Speechmatics for transcription. */
|
|
3631
|
+
sendAudio(audio) {
|
|
3632
|
+
if (!this.ws || this.ws.readyState !== WebSocket5.OPEN) {
|
|
3633
|
+
return;
|
|
3634
|
+
}
|
|
3635
|
+
if (audio.length === 0) {
|
|
3636
|
+
return;
|
|
3637
|
+
}
|
|
3638
|
+
this.lastSeqNo += 1;
|
|
3639
|
+
try {
|
|
3640
|
+
this.ws.send(audio);
|
|
3641
|
+
} catch (err) {
|
|
3642
|
+
getLogger().error(`SpeechmaticsSTT sendAudio failed: ${String(err)}`);
|
|
3643
|
+
}
|
|
3644
|
+
}
|
|
3645
|
+
/** Register a transcript listener. */
|
|
3646
|
+
onTranscript(callback) {
|
|
3647
|
+
this.transcriptCallbacks.add(callback);
|
|
3648
|
+
}
|
|
3649
|
+
/** Remove a previously registered transcript listener. */
|
|
3650
|
+
offTranscript(callback) {
|
|
3651
|
+
this.transcriptCallbacks.delete(callback);
|
|
3652
|
+
}
|
|
3653
|
+
/** Register an error listener for socket / API failures. */
|
|
3654
|
+
onError(callback) {
|
|
3655
|
+
this.errorCallbacks.add(callback);
|
|
3656
|
+
}
|
|
3657
|
+
/** Remove a previously registered error listener. */
|
|
3658
|
+
offError(callback) {
|
|
3659
|
+
this.errorCallbacks.delete(callback);
|
|
3660
|
+
}
|
|
3661
|
+
handleMessage(raw) {
|
|
3662
|
+
let data;
|
|
3663
|
+
try {
|
|
3664
|
+
data = JSON.parse(raw);
|
|
3665
|
+
} catch {
|
|
3666
|
+
return;
|
|
3667
|
+
}
|
|
3668
|
+
const event = data.message;
|
|
3669
|
+
if (!event) return;
|
|
3670
|
+
switch (event) {
|
|
3671
|
+
case SpeechmaticsServerMessage.RECOGNITION_STARTED:
|
|
3672
|
+
case SpeechmaticsServerMessage.AUDIO_ADDED:
|
|
3673
|
+
case SpeechmaticsServerMessage.END_OF_UTTERANCE:
|
|
3674
|
+
case SpeechmaticsServerMessage.END_OF_TRANSCRIPT:
|
|
3675
|
+
case SpeechmaticsServerMessage.INFO:
|
|
3676
|
+
return;
|
|
3677
|
+
case SpeechmaticsServerMessage.WARNING:
|
|
3678
|
+
getLogger().warn(`SpeechmaticsSTT warning: ${JSON.stringify(data)}`);
|
|
3679
|
+
return;
|
|
3680
|
+
case SpeechmaticsServerMessage.ERROR: {
|
|
3681
|
+
const message = data.reason ?? data.type ?? "Speechmatics returned an Error frame";
|
|
3682
|
+
getLogger().error(`SpeechmaticsSTT error: ${message}`);
|
|
3683
|
+
this.emitError(new PatterConnectionError(`Speechmatics: ${message}`));
|
|
3684
|
+
return;
|
|
3685
|
+
}
|
|
3686
|
+
case SpeechmaticsServerMessage.ADD_PARTIAL_TRANSCRIPT:
|
|
3687
|
+
case SpeechmaticsServerMessage.ADD_TRANSCRIPT: {
|
|
3688
|
+
const isFinal = event === SpeechmaticsServerMessage.ADD_TRANSCRIPT;
|
|
3689
|
+
const transcript = this.toTranscript(data, isFinal);
|
|
3690
|
+
if (transcript !== null) this.emitTranscript(transcript);
|
|
3691
|
+
return;
|
|
3692
|
+
}
|
|
3693
|
+
default:
|
|
3694
|
+
return;
|
|
3695
|
+
}
|
|
3696
|
+
}
|
|
3697
|
+
/** Translate a Speechmatics transcript message into a Patter `Transcript`. */
|
|
3698
|
+
toTranscript(message, isFinal) {
|
|
3699
|
+
const rendered = (message.metadata?.transcript ?? "").trim();
|
|
3700
|
+
const results = message.results ?? [];
|
|
3701
|
+
let text = rendered;
|
|
3702
|
+
const confidences = [];
|
|
3703
|
+
for (const result of results) {
|
|
3704
|
+
const best = result.alternatives?.[0];
|
|
3705
|
+
if (!best) continue;
|
|
3706
|
+
const content = best.content;
|
|
3707
|
+
const confidence2 = best.confidence;
|
|
3708
|
+
if (!rendered && typeof content === "string" && content.length > 0) {
|
|
3709
|
+
text = text ? `${text} ${content}` : content;
|
|
3710
|
+
}
|
|
3711
|
+
if (typeof confidence2 === "number") {
|
|
3712
|
+
confidences.push(confidence2);
|
|
3713
|
+
}
|
|
3714
|
+
}
|
|
3715
|
+
text = text.trim();
|
|
3716
|
+
if (!text) return null;
|
|
3717
|
+
const confidence = confidences.length > 0 ? confidences.reduce((sum, c) => sum + c, 0) / confidences.length : 1;
|
|
3718
|
+
return { text, isFinal, confidence };
|
|
3719
|
+
}
|
|
3720
|
+
emitTranscript(transcript) {
|
|
3721
|
+
for (const cb of this.transcriptCallbacks) {
|
|
3722
|
+
try {
|
|
3723
|
+
cb(transcript);
|
|
3724
|
+
} catch (err) {
|
|
3725
|
+
getLogger().error(`SpeechmaticsSTT transcript callback threw: ${String(err)}`);
|
|
3726
|
+
}
|
|
3727
|
+
}
|
|
3728
|
+
}
|
|
3729
|
+
emitError(err) {
|
|
3730
|
+
for (const cb of this.errorCallbacks) {
|
|
3731
|
+
try {
|
|
3732
|
+
cb(err);
|
|
3733
|
+
} catch (cbErr) {
|
|
3734
|
+
getLogger().error(`SpeechmaticsSTT error callback threw: ${String(cbErr)}`);
|
|
3735
|
+
}
|
|
3736
|
+
}
|
|
3737
|
+
}
|
|
3738
|
+
handleError(err) {
|
|
3739
|
+
getLogger().error(`SpeechmaticsSTT WebSocket error: ${err.message}`);
|
|
3740
|
+
this.emitError(err);
|
|
3741
|
+
}
|
|
3742
|
+
handleClose() {
|
|
3743
|
+
if (!this.running) return;
|
|
3744
|
+
this.running = false;
|
|
3745
|
+
}
|
|
3746
|
+
/** Send `EndOfStream` and close the WebSocket. Idempotent. */
|
|
3747
|
+
close() {
|
|
3748
|
+
this.running = false;
|
|
3749
|
+
const ws = this.ws;
|
|
3750
|
+
if (!ws) return;
|
|
3751
|
+
this.ws = null;
|
|
3752
|
+
const sendSafe = (payload) => {
|
|
3753
|
+
if (ws.readyState === WebSocket5.OPEN) {
|
|
3754
|
+
try {
|
|
3755
|
+
ws.send(payload);
|
|
3756
|
+
} catch {
|
|
3757
|
+
}
|
|
3758
|
+
}
|
|
3759
|
+
};
|
|
3760
|
+
sendSafe(
|
|
3761
|
+
JSON.stringify({ message: "EndOfStream", last_seq_no: this.lastSeqNo })
|
|
3762
|
+
);
|
|
3763
|
+
try {
|
|
3764
|
+
ws.close();
|
|
3765
|
+
} catch {
|
|
3766
|
+
}
|
|
3767
|
+
}
|
|
3768
|
+
};
|
|
3769
|
+
|
|
3770
|
+
// src/stt/speechmatics.ts
|
|
3771
|
+
var STT7 = class extends SpeechmaticsSTT {
|
|
3772
|
+
static providerKey = "speechmatics";
|
|
3773
|
+
constructor(opts = {}) {
|
|
3774
|
+
const key = opts.apiKey ?? process.env.SPEECHMATICS_API_KEY;
|
|
3775
|
+
if (!key) {
|
|
3776
|
+
throw new Error(
|
|
3777
|
+
"Speechmatics STT requires an apiKey. Pass { apiKey: 'sm_...' } or set SPEECHMATICS_API_KEY in the environment."
|
|
3778
|
+
);
|
|
3779
|
+
}
|
|
3780
|
+
super(key, opts);
|
|
3781
|
+
}
|
|
3782
|
+
};
|
|
3783
|
+
|
|
3784
|
+
// src/tts/elevenlabs.ts
|
|
3785
|
+
init_esm_shims();
|
|
3786
|
+
|
|
2582
3787
|
// src/providers/elevenlabs-tts.ts
|
|
3788
|
+
init_esm_shims();
|
|
2583
3789
|
var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
|
|
2584
3790
|
var ELEVENLABS_VOICE_ID_BY_NAME = {
|
|
2585
3791
|
rachel: "21m00Tcm4TlvDq8ikWAM",
|
|
@@ -2636,6 +3842,27 @@ function resolveVoiceId(voice) {
|
|
|
2636
3842
|
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
2637
3843
|
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
2638
3844
|
}
|
|
3845
|
+
var ElevenLabsModel = {
|
|
3846
|
+
V3: "eleven_v3",
|
|
3847
|
+
FLASH_V2_5: "eleven_flash_v2_5",
|
|
3848
|
+
TURBO_V2_5: "eleven_turbo_v2_5",
|
|
3849
|
+
MULTILINGUAL_V2: "eleven_multilingual_v2",
|
|
3850
|
+
MONOLINGUAL_V1: "eleven_monolingual_v1"
|
|
3851
|
+
};
|
|
3852
|
+
var ElevenLabsOutputFormat = {
|
|
3853
|
+
MP3_22050_32: "mp3_22050_32",
|
|
3854
|
+
MP3_44100_32: "mp3_44100_32",
|
|
3855
|
+
MP3_44100_64: "mp3_44100_64",
|
|
3856
|
+
MP3_44100_96: "mp3_44100_96",
|
|
3857
|
+
MP3_44100_128: "mp3_44100_128",
|
|
3858
|
+
MP3_44100_192: "mp3_44100_192",
|
|
3859
|
+
PCM_8000: "pcm_8000",
|
|
3860
|
+
PCM_16000: "pcm_16000",
|
|
3861
|
+
PCM_22050: "pcm_22050",
|
|
3862
|
+
PCM_24000: "pcm_24000",
|
|
3863
|
+
PCM_44100: "pcm_44100",
|
|
3864
|
+
ULAW_8000: "ulaw_8000"
|
|
3865
|
+
};
|
|
2639
3866
|
var ElevenLabsTTS = class _ElevenLabsTTS {
|
|
2640
3867
|
apiKey;
|
|
2641
3868
|
voiceId;
|
|
@@ -2644,13 +3871,13 @@ var ElevenLabsTTS = class _ElevenLabsTTS {
|
|
|
2644
3871
|
voiceSettings;
|
|
2645
3872
|
languageCode;
|
|
2646
3873
|
chunkSize;
|
|
2647
|
-
constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId =
|
|
3874
|
+
constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = ElevenLabsModel.FLASH_V2_5, outputFormat = ElevenLabsOutputFormat.PCM_16000) {
|
|
2648
3875
|
this.apiKey = apiKey;
|
|
2649
3876
|
if (typeof voiceIdOrOptions === "object") {
|
|
2650
3877
|
const o = voiceIdOrOptions;
|
|
2651
3878
|
this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
|
|
2652
|
-
this.modelId = o.modelId ??
|
|
2653
|
-
this.outputFormat = o.outputFormat ??
|
|
3879
|
+
this.modelId = o.modelId ?? ElevenLabsModel.FLASH_V2_5;
|
|
3880
|
+
this.outputFormat = o.outputFormat ?? ElevenLabsOutputFormat.PCM_16000;
|
|
2654
3881
|
this.voiceSettings = o.voiceSettings;
|
|
2655
3882
|
this.languageCode = o.languageCode;
|
|
2656
3883
|
this.chunkSize = o.chunkSize ?? 4096;
|
|
@@ -2688,7 +3915,7 @@ var ElevenLabsTTS = class _ElevenLabsTTS {
|
|
|
2688
3915
|
return new _ElevenLabsTTS(apiKey, {
|
|
2689
3916
|
...options,
|
|
2690
3917
|
voiceSettings,
|
|
2691
|
-
outputFormat:
|
|
3918
|
+
outputFormat: ElevenLabsOutputFormat.ULAW_8000
|
|
2692
3919
|
});
|
|
2693
3920
|
}
|
|
2694
3921
|
/**
|
|
@@ -2705,7 +3932,7 @@ var ElevenLabsTTS = class _ElevenLabsTTS {
|
|
|
2705
3932
|
static forTelnyx(apiKey, options = {}) {
|
|
2706
3933
|
return new _ElevenLabsTTS(apiKey, {
|
|
2707
3934
|
...options,
|
|
2708
|
-
outputFormat:
|
|
3935
|
+
outputFormat: ElevenLabsOutputFormat.PCM_16000
|
|
2709
3936
|
});
|
|
2710
3937
|
}
|
|
2711
3938
|
/**
|
|
@@ -2783,12 +4010,13 @@ function resolveApiKey(apiKey) {
|
|
|
2783
4010
|
var TTS = class _TTS extends ElevenLabsTTS {
|
|
2784
4011
|
static providerKey = "elevenlabs";
|
|
2785
4012
|
constructor(opts = {}) {
|
|
2786
|
-
super(
|
|
2787
|
-
|
|
2788
|
-
opts.
|
|
2789
|
-
opts.
|
|
2790
|
-
opts.
|
|
2791
|
-
|
|
4013
|
+
super(resolveApiKey(opts.apiKey), {
|
|
4014
|
+
voiceId: opts.voiceId ?? "EXAVITQu4vr4xnSDxMaL",
|
|
4015
|
+
modelId: opts.modelId ?? "eleven_flash_v2_5",
|
|
4016
|
+
outputFormat: opts.outputFormat ?? "pcm_16000",
|
|
4017
|
+
languageCode: opts.languageCode,
|
|
4018
|
+
voiceSettings: opts.voiceSettings
|
|
4019
|
+
});
|
|
2792
4020
|
}
|
|
2793
4021
|
static forTwilio(arg1, arg2) {
|
|
2794
4022
|
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
@@ -2800,22 +4028,364 @@ var TTS = class _TTS extends ElevenLabsTTS {
|
|
|
2800
4028
|
}
|
|
2801
4029
|
};
|
|
2802
4030
|
|
|
4031
|
+
// src/tts/elevenlabs-ws.ts
|
|
4032
|
+
init_esm_shims();
|
|
4033
|
+
|
|
4034
|
+
// src/providers/elevenlabs-ws-tts.ts
|
|
4035
|
+
init_esm_shims();
|
|
4036
|
+
import WebSocket6 from "ws";
|
|
4037
|
+
var WS_BASE = "wss://api.elevenlabs.io/v1/text-to-speech";
|
|
4038
|
+
var DEFAULT_INACTIVITY_TIMEOUT = 60;
|
|
4039
|
+
var DEFAULT_CHUNK_SIZE = 4096;
|
|
4040
|
+
var CONNECT_TIMEOUT_MS4 = 5e3;
|
|
4041
|
+
var FRAME_TIMEOUT_MS = 3e4;
|
|
4042
|
+
var MAX_AUDIO_B64_BYTES = 512 * 1024;
|
|
4043
|
+
var ElevenLabsTTSError = class extends Error {
|
|
4044
|
+
constructor(message) {
|
|
4045
|
+
super(message);
|
|
4046
|
+
this.name = "ElevenLabsTTSError";
|
|
4047
|
+
}
|
|
4048
|
+
};
|
|
4049
|
+
var ElevenLabsPlanError = class extends ElevenLabsTTSError {
|
|
4050
|
+
constructor(message) {
|
|
4051
|
+
super(message);
|
|
4052
|
+
this.name = "ElevenLabsPlanError";
|
|
4053
|
+
}
|
|
4054
|
+
};
|
|
4055
|
+
var PLAN_REQUIRED_MSG = "ElevenLabs WS streaming requires a Pro plan or higher (the WS endpoint returned `payment_required`). Either upgrade at https://elevenlabs.io/pricing, or use the HTTP `ElevenLabsTTS` class which works on all plans (drop-in API).";
|
|
4056
|
+
function sanitiseLogStr(value, limit = 200) {
|
|
4057
|
+
return String(value).replace(/[\r\n\x00]/g, " ").slice(0, limit);
|
|
4058
|
+
}
|
|
4059
|
+
var CARRIER_NATIVE_FORMAT = {
|
|
4060
|
+
twilio: "ulaw_8000",
|
|
4061
|
+
telnyx: "pcm_16000"
|
|
4062
|
+
};
|
|
4063
|
+
var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
|
|
4064
|
+
static providerKey = "elevenlabs_ws";
|
|
4065
|
+
apiKey;
|
|
4066
|
+
voiceId;
|
|
4067
|
+
modelId;
|
|
4068
|
+
voiceSettings;
|
|
4069
|
+
languageCode;
|
|
4070
|
+
autoMode;
|
|
4071
|
+
inactivityTimeout;
|
|
4072
|
+
chunkLengthSchedule;
|
|
4073
|
+
chunkSize;
|
|
4074
|
+
/**
|
|
4075
|
+
* The wire format requested over the ElevenLabs WS. Initially set from
|
|
4076
|
+
* the constructor; ``setTelephonyCarrier`` may auto-flip it to the
|
|
4077
|
+
* carrier's native codec when the caller did NOT pass ``outputFormat``
|
|
4078
|
+
* explicitly.
|
|
4079
|
+
*/
|
|
4080
|
+
_outputFormat;
|
|
4081
|
+
_outputFormatExplicit;
|
|
4082
|
+
/** Public read-only view of the (possibly auto-flipped) wire format. */
|
|
4083
|
+
get outputFormat() {
|
|
4084
|
+
return this._outputFormat;
|
|
4085
|
+
}
|
|
4086
|
+
constructor(opts) {
|
|
4087
|
+
if (opts.modelId === "eleven_v3") {
|
|
4088
|
+
throw new Error(
|
|
4089
|
+
"eleven_v3 is not supported by the WebSocket stream-input endpoint \u2014 use the HTTP ElevenLabsTTS class instead."
|
|
4090
|
+
);
|
|
4091
|
+
}
|
|
4092
|
+
this.apiKey = opts.apiKey;
|
|
4093
|
+
this.voiceId = resolveVoiceId(opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
|
|
4094
|
+
this.modelId = opts.modelId ?? "eleven_flash_v2_5";
|
|
4095
|
+
this._outputFormatExplicit = opts.outputFormat !== void 0;
|
|
4096
|
+
this._outputFormat = opts.outputFormat ?? "pcm_16000";
|
|
4097
|
+
this.voiceSettings = opts.voiceSettings;
|
|
4098
|
+
this.languageCode = opts.languageCode;
|
|
4099
|
+
this.autoMode = opts.autoMode ?? true;
|
|
4100
|
+
this.inactivityTimeout = opts.inactivityTimeout ?? DEFAULT_INACTIVITY_TIMEOUT;
|
|
4101
|
+
this.chunkLengthSchedule = opts.chunkLengthSchedule;
|
|
4102
|
+
this.chunkSize = opts.chunkSize ?? DEFAULT_CHUNK_SIZE;
|
|
4103
|
+
}
|
|
4104
|
+
/**
|
|
4105
|
+
* Hook called by ``StreamHandler`` to advise the carrier wire format.
|
|
4106
|
+
*
|
|
4107
|
+
* When the user did NOT pass an explicit ``outputFormat`` in the
|
|
4108
|
+
* constructor options, this flips the format to the carrier's native
|
|
4109
|
+
* wire codec — saving a client-side transcode step. Calling with an
|
|
4110
|
+
* unknown carrier (``""`` / ``"custom"``) is a no-op.
|
|
4111
|
+
*
|
|
4112
|
+
* When ``outputFormat`` was explicitly passed (incl. via the
|
|
4113
|
+
* ``forTwilio`` / ``forTelnyx`` factories), this method is a no-op —
|
|
4114
|
+
* the user's choice always wins.
|
|
4115
|
+
*/
|
|
4116
|
+
setTelephonyCarrier(carrier) {
|
|
4117
|
+
if (this._outputFormatExplicit) return;
|
|
4118
|
+
const native = CARRIER_NATIVE_FORMAT[carrier];
|
|
4119
|
+
if (!native) return;
|
|
4120
|
+
this._outputFormat = native;
|
|
4121
|
+
}
|
|
4122
|
+
/** Pre-configured for Twilio Media Streams (`ulaw_8000`). */
|
|
4123
|
+
static forTwilio(opts) {
|
|
4124
|
+
return new _ElevenLabsWebSocketTTS({
|
|
4125
|
+
...opts,
|
|
4126
|
+
outputFormat: "ulaw_8000",
|
|
4127
|
+
voiceSettings: opts.voiceSettings ?? {
|
|
4128
|
+
stability: 0.6,
|
|
4129
|
+
similarity_boost: 0.75,
|
|
4130
|
+
use_speaker_boost: false
|
|
4131
|
+
}
|
|
4132
|
+
});
|
|
4133
|
+
}
|
|
4134
|
+
/** Pre-configured for Telnyx (`pcm_16000`). */
|
|
4135
|
+
static forTelnyx(opts) {
|
|
4136
|
+
return new _ElevenLabsWebSocketTTS({
|
|
4137
|
+
...opts,
|
|
4138
|
+
outputFormat: "pcm_16000"
|
|
4139
|
+
});
|
|
4140
|
+
}
|
|
4141
|
+
buildUrl() {
|
|
4142
|
+
const params = new URLSearchParams({
|
|
4143
|
+
model_id: this.modelId,
|
|
4144
|
+
output_format: this.outputFormat,
|
|
4145
|
+
inactivity_timeout: String(this.inactivityTimeout)
|
|
4146
|
+
});
|
|
4147
|
+
if (this.autoMode) params.set("auto_mode", "true");
|
|
4148
|
+
if (this.languageCode) params.set("language_code", this.languageCode);
|
|
4149
|
+
return `${WS_BASE}/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
|
|
4150
|
+
}
|
|
4151
|
+
/**
|
|
4152
|
+
* Single-shot synthesis: open WS, send text, yield bytes, close.
|
|
4153
|
+
*
|
|
4154
|
+
* Resilience contract:
|
|
4155
|
+
* - Connection bounded by ``CONNECT_TIMEOUT_MS`` (5s, was 15s).
|
|
4156
|
+
* - Each idle wait bounded by ``FRAME_TIMEOUT_MS`` (30s) so a stalled
|
|
4157
|
+
* server cannot keep the generator alive indefinitely.
|
|
4158
|
+
* - Permanent error handler attached BEFORE the open await — prevents
|
|
4159
|
+
* ``uncaughtException`` if an error fires after the once-listener
|
|
4160
|
+
* resolves.
|
|
4161
|
+
* - All event listeners removed in ``finally`` (no closure leak past
|
|
4162
|
+
* socket close).
|
|
4163
|
+
* - Server-reported ``error`` raises ``ElevenLabsTTSError``.
|
|
4164
|
+
* - Per-frame audio payload capped at ``MAX_AUDIO_B64_BYTES``.
|
|
4165
|
+
* - Best-effort EOS ``{"text":""}`` sent in finally (not immediately
|
|
4166
|
+
* after flush — auto_mode could otherwise truncate the tail audio).
|
|
4167
|
+
*/
|
|
4168
|
+
async *synthesizeStream(text) {
|
|
4169
|
+
const ws = new WebSocket6(this.buildUrl(), {
|
|
4170
|
+
headers: { "xi-api-key": this.apiKey }
|
|
4171
|
+
});
|
|
4172
|
+
const queue = [];
|
|
4173
|
+
let done = false;
|
|
4174
|
+
let pendingError = null;
|
|
4175
|
+
let resolveWaiter = null;
|
|
4176
|
+
let connectTimer;
|
|
4177
|
+
const wakeWaiter = () => {
|
|
4178
|
+
const r = resolveWaiter;
|
|
4179
|
+
resolveWaiter = null;
|
|
4180
|
+
r?.();
|
|
4181
|
+
};
|
|
4182
|
+
const onMessage = (raw) => {
|
|
4183
|
+
if (Buffer.isBuffer(raw) && !looksLikeJson(raw)) {
|
|
4184
|
+
if (raw.length > MAX_AUDIO_B64_BYTES) {
|
|
4185
|
+
getLogger().warn(
|
|
4186
|
+
`ElevenLabs WS binary frame too large (${raw.length} bytes), skipping`
|
|
4187
|
+
);
|
|
4188
|
+
return;
|
|
4189
|
+
}
|
|
4190
|
+
queue.push(raw);
|
|
4191
|
+
wakeWaiter();
|
|
4192
|
+
return;
|
|
4193
|
+
}
|
|
4194
|
+
const txt = raw.toString("utf8");
|
|
4195
|
+
let msg;
|
|
4196
|
+
try {
|
|
4197
|
+
msg = JSON.parse(txt);
|
|
4198
|
+
} catch {
|
|
4199
|
+
getLogger().warn("ElevenLabs WS sent non-JSON text frame");
|
|
4200
|
+
return;
|
|
4201
|
+
}
|
|
4202
|
+
if (msg.error) {
|
|
4203
|
+
const sanitised = sanitiseLogStr(msg.error);
|
|
4204
|
+
getLogger().error("ElevenLabs WS reported error:", sanitised);
|
|
4205
|
+
if (sanitised === "payment_required" || /payment[_ ]required/i.test(sanitised)) {
|
|
4206
|
+
pendingError = new ElevenLabsPlanError(PLAN_REQUIRED_MSG);
|
|
4207
|
+
} else {
|
|
4208
|
+
pendingError = new ElevenLabsTTSError(`ElevenLabs WS error: ${sanitised}`);
|
|
4209
|
+
}
|
|
4210
|
+
done = true;
|
|
4211
|
+
wakeWaiter();
|
|
4212
|
+
return;
|
|
4213
|
+
}
|
|
4214
|
+
if (msg.audio) {
|
|
4215
|
+
if (typeof msg.audio !== "string" || msg.audio.length > MAX_AUDIO_B64_BYTES) {
|
|
4216
|
+
getLogger().warn("ElevenLabs WS audio frame too large or malformed, skipping");
|
|
4217
|
+
} else {
|
|
4218
|
+
try {
|
|
4219
|
+
queue.push(Buffer.from(msg.audio, "base64"));
|
|
4220
|
+
} catch {
|
|
4221
|
+
getLogger().warn("ElevenLabs WS sent malformed base64 audio");
|
|
4222
|
+
}
|
|
4223
|
+
}
|
|
4224
|
+
}
|
|
4225
|
+
if (msg.isFinal) {
|
|
4226
|
+
done = true;
|
|
4227
|
+
}
|
|
4228
|
+
wakeWaiter();
|
|
4229
|
+
};
|
|
4230
|
+
const onClose = () => {
|
|
4231
|
+
done = true;
|
|
4232
|
+
wakeWaiter();
|
|
4233
|
+
};
|
|
4234
|
+
const onError = (err) => {
|
|
4235
|
+
pendingError = err;
|
|
4236
|
+
done = true;
|
|
4237
|
+
wakeWaiter();
|
|
4238
|
+
};
|
|
4239
|
+
ws.on("error", onError);
|
|
4240
|
+
try {
|
|
4241
|
+
await new Promise((resolve, reject) => {
|
|
4242
|
+
connectTimer = setTimeout(
|
|
4243
|
+
() => reject(new Error("ElevenLabs WS connect timeout")),
|
|
4244
|
+
CONNECT_TIMEOUT_MS4
|
|
4245
|
+
);
|
|
4246
|
+
ws.once("open", () => {
|
|
4247
|
+
if (connectTimer) clearTimeout(connectTimer);
|
|
4248
|
+
connectTimer = void 0;
|
|
4249
|
+
resolve();
|
|
4250
|
+
});
|
|
4251
|
+
ws.once("error", (err) => {
|
|
4252
|
+
if (connectTimer) clearTimeout(connectTimer);
|
|
4253
|
+
connectTimer = void 0;
|
|
4254
|
+
reject(err);
|
|
4255
|
+
});
|
|
4256
|
+
});
|
|
4257
|
+
const init = { text: " " };
|
|
4258
|
+
if (this.voiceSettings) init["voice_settings"] = this.voiceSettings;
|
|
4259
|
+
if (!this.autoMode && this.chunkLengthSchedule) {
|
|
4260
|
+
init["generation_config"] = { chunk_length_schedule: this.chunkLengthSchedule };
|
|
4261
|
+
}
|
|
4262
|
+
ws.send(JSON.stringify(init));
|
|
4263
|
+
ws.send(JSON.stringify({ text: text + " ", flush: true }));
|
|
4264
|
+
ws.on("message", onMessage);
|
|
4265
|
+
ws.on("close", onClose);
|
|
4266
|
+
while (true) {
|
|
4267
|
+
if (queue.length > 0) {
|
|
4268
|
+
const buf = queue.shift();
|
|
4269
|
+
for (let off = 0; off < buf.length; off += this.chunkSize) {
|
|
4270
|
+
yield buf.subarray(off, Math.min(off + this.chunkSize, buf.length));
|
|
4271
|
+
}
|
|
4272
|
+
continue;
|
|
4273
|
+
}
|
|
4274
|
+
if (done) {
|
|
4275
|
+
if (pendingError) throw pendingError;
|
|
4276
|
+
return;
|
|
4277
|
+
}
|
|
4278
|
+
let frameTimer;
|
|
4279
|
+
try {
|
|
4280
|
+
await new Promise((res, rej) => {
|
|
4281
|
+
resolveWaiter = res;
|
|
4282
|
+
frameTimer = setTimeout(
|
|
4283
|
+
() => rej(new ElevenLabsTTSError(`ElevenLabs WS no frame for ${FRAME_TIMEOUT_MS}ms`)),
|
|
4284
|
+
FRAME_TIMEOUT_MS
|
|
4285
|
+
);
|
|
4286
|
+
});
|
|
4287
|
+
} finally {
|
|
4288
|
+
if (frameTimer) clearTimeout(frameTimer);
|
|
4289
|
+
}
|
|
4290
|
+
}
|
|
4291
|
+
} finally {
|
|
4292
|
+
if (connectTimer) clearTimeout(connectTimer);
|
|
4293
|
+
try {
|
|
4294
|
+
if (ws.readyState === WebSocket6.OPEN) {
|
|
4295
|
+
ws.send(JSON.stringify({ text: "" }));
|
|
4296
|
+
}
|
|
4297
|
+
} catch {
|
|
4298
|
+
}
|
|
4299
|
+
try {
|
|
4300
|
+
if (ws.readyState === WebSocket6.OPEN || ws.readyState === WebSocket6.CONNECTING) {
|
|
4301
|
+
ws.close();
|
|
4302
|
+
}
|
|
4303
|
+
} catch {
|
|
4304
|
+
}
|
|
4305
|
+
ws.removeAllListeners();
|
|
4306
|
+
}
|
|
4307
|
+
}
|
|
4308
|
+
/** No-op — connections are per-utterance and torn down inside synthesizeStream. */
|
|
4309
|
+
async close() {
|
|
4310
|
+
}
|
|
4311
|
+
};
|
|
4312
|
+
function looksLikeJson(buf) {
|
|
4313
|
+
if (buf.length === 0) return false;
|
|
4314
|
+
const b = buf[0];
|
|
4315
|
+
return b === 123 || b === 91;
|
|
4316
|
+
}
|
|
4317
|
+
|
|
4318
|
+
// src/tts/elevenlabs-ws.ts
|
|
4319
|
+
function resolveApiKey2(apiKey) {
|
|
4320
|
+
const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
4321
|
+
if (!key) {
|
|
4322
|
+
throw new Error(
|
|
4323
|
+
"ElevenLabs WebSocket TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
4324
|
+
);
|
|
4325
|
+
}
|
|
4326
|
+
return key;
|
|
4327
|
+
}
|
|
4328
|
+
function buildOpts(opts) {
|
|
4329
|
+
const out = {
|
|
4330
|
+
apiKey: resolveApiKey2(opts.apiKey),
|
|
4331
|
+
modelId: opts.modelId ?? "eleven_flash_v2_5",
|
|
4332
|
+
outputFormat: opts.outputFormat ?? "pcm_16000",
|
|
4333
|
+
autoMode: opts.autoMode ?? true
|
|
4334
|
+
};
|
|
4335
|
+
if (opts.voiceId !== void 0) out.voiceId = opts.voiceId;
|
|
4336
|
+
if (opts.voiceSettings !== void 0) out.voiceSettings = opts.voiceSettings;
|
|
4337
|
+
if (opts.languageCode !== void 0) out.languageCode = opts.languageCode;
|
|
4338
|
+
if (opts.inactivityTimeout !== void 0) out.inactivityTimeout = opts.inactivityTimeout;
|
|
4339
|
+
if (opts.chunkLengthSchedule !== void 0) out.chunkLengthSchedule = opts.chunkLengthSchedule;
|
|
4340
|
+
return out;
|
|
4341
|
+
}
|
|
4342
|
+
var TTS2 = class _TTS extends ElevenLabsWebSocketTTS {
|
|
4343
|
+
static providerKey = "elevenlabs_ws";
|
|
4344
|
+
constructor(opts = {}) {
|
|
4345
|
+
super(buildOpts(opts));
|
|
4346
|
+
}
|
|
4347
|
+
/** WebSocket TTS pre-configured for Twilio Media Streams (`ulaw_8000`). */
|
|
4348
|
+
static forTwilio(opts = {}) {
|
|
4349
|
+
return new _TTS({ ...opts, outputFormat: "ulaw_8000" });
|
|
4350
|
+
}
|
|
4351
|
+
/** WebSocket TTS pre-configured for Telnyx (`pcm_16000`). */
|
|
4352
|
+
static forTelnyx(opts = {}) {
|
|
4353
|
+
return new _TTS({ ...opts, outputFormat: "pcm_16000" });
|
|
4354
|
+
}
|
|
4355
|
+
};
|
|
4356
|
+
|
|
4357
|
+
// src/tts/openai.ts
|
|
4358
|
+
init_esm_shims();
|
|
4359
|
+
|
|
2803
4360
|
// src/providers/openai-tts.ts
|
|
4361
|
+
init_esm_shims();
|
|
2804
4362
|
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
2805
4363
|
var INSTRUCTIONS_PREFIX = "gpt-4o-mini-tts";
|
|
2806
4364
|
var LPF_ALPHA = 0.78;
|
|
4365
|
+
var LPF_ALPHA_8K = 0.45;
|
|
2807
4366
|
var OpenAITTS = class _OpenAITTS {
|
|
2808
|
-
constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true) {
|
|
4367
|
+
constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true, targetSampleRate = 16e3) {
|
|
2809
4368
|
this.apiKey = apiKey;
|
|
2810
4369
|
this.voice = voice;
|
|
2811
4370
|
this.model = model;
|
|
2812
4371
|
this.instructions = instructions;
|
|
2813
4372
|
this.speed = speed;
|
|
2814
4373
|
this.antiAlias = antiAlias;
|
|
4374
|
+
this.targetSampleRate = targetSampleRate;
|
|
2815
4375
|
if (speed !== null && speed !== void 0 && (speed < 0.25 || speed > 4)) {
|
|
2816
4376
|
throw new Error("OpenAITTS: speed must be in [0.25, 4.0]");
|
|
2817
4377
|
}
|
|
4378
|
+
if (targetSampleRate !== 8e3 && targetSampleRate !== 16e3) {
|
|
4379
|
+
throw new Error("OpenAITTS: targetSampleRate must be 8000 or 16000");
|
|
4380
|
+
}
|
|
2818
4381
|
}
|
|
4382
|
+
apiKey;
|
|
4383
|
+
voice;
|
|
4384
|
+
model;
|
|
4385
|
+
instructions;
|
|
4386
|
+
speed;
|
|
4387
|
+
antiAlias;
|
|
4388
|
+
targetSampleRate;
|
|
2819
4389
|
/**
|
|
2820
4390
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2821
4391
|
*
|
|
@@ -2871,7 +4441,8 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2871
4441
|
carryByte: null,
|
|
2872
4442
|
leftover: [],
|
|
2873
4443
|
lpfPrev: 0,
|
|
2874
|
-
lpfEnabled: this.antiAlias
|
|
4444
|
+
lpfEnabled: this.antiAlias,
|
|
4445
|
+
targetSampleRate: this.targetSampleRate
|
|
2875
4446
|
};
|
|
2876
4447
|
const reader = response.body.getReader();
|
|
2877
4448
|
try {
|
|
@@ -2897,14 +4468,17 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2897
4468
|
}
|
|
2898
4469
|
}
|
|
2899
4470
|
/**
|
|
2900
|
-
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
|
|
2901
|
-
* lowpass ahead of the
|
|
2902
|
-
*
|
|
4471
|
+
* Streaming 24 kHz → {16, 8} kHz resampler (PCM16-LE). Applies a single-pole
|
|
4472
|
+
* lowpass ahead of the decimation and carries filter + sample state across
|
|
4473
|
+
* chunks so the cadence doesn't reset at every network read.
|
|
4474
|
+
*
|
|
4475
|
+
* Output rate is selected by ``ctx.targetSampleRate``:
|
|
4476
|
+
* 16000 → 3:2 decimation (sample 0 + mid(1,2)) [default]
|
|
4477
|
+
* 8000 → 3:1 decimation (sample 0 only) [fix #46]
|
|
2903
4478
|
*
|
|
2904
|
-
* ``ctx.lpfEnabled``
|
|
2905
|
-
* legacy static helper
|
|
2906
|
-
*
|
|
2907
|
-
* streaming path gets anti-alias filtering.
|
|
4479
|
+
* ``ctx.lpfEnabled`` controls whether the LPF is engaged — kept disabled
|
|
4480
|
+
* for the legacy static helper so the bit-exact downsample-only tests
|
|
4481
|
+
* remain valid; the real streaming path always engages it.
|
|
2908
4482
|
*/
|
|
2909
4483
|
static resampleStreaming(audio, ctx) {
|
|
2910
4484
|
let buf;
|
|
@@ -2921,6 +4495,8 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2921
4495
|
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
2922
4496
|
return Buffer.alloc(0);
|
|
2923
4497
|
}
|
|
4498
|
+
const direct8k = ctx.targetSampleRate === 8e3;
|
|
4499
|
+
const lpfAlpha = direct8k ? LPF_ALPHA_8K : LPF_ALPHA;
|
|
2924
4500
|
const sampleCount = buf.length / 2;
|
|
2925
4501
|
const samples = ctx.leftover.slice();
|
|
2926
4502
|
const lpf = ctx.lpfEnabled !== false;
|
|
@@ -2928,7 +4504,7 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2928
4504
|
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2929
4505
|
const x = buf.readInt16LE(i2 * 2);
|
|
2930
4506
|
if (lpf) {
|
|
2931
|
-
y =
|
|
4507
|
+
y = lpfAlpha * x + (1 - lpfAlpha) * y;
|
|
2932
4508
|
let s = Math.round(y);
|
|
2933
4509
|
if (s > 32767) s = 32767;
|
|
2934
4510
|
else if (s < -32768) s = -32768;
|
|
@@ -2940,10 +4516,17 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2940
4516
|
if (lpf) ctx.lpfPrev = y;
|
|
2941
4517
|
const out = [];
|
|
2942
4518
|
let i = 0;
|
|
2943
|
-
|
|
2944
|
-
|
|
2945
|
-
|
|
2946
|
-
|
|
4519
|
+
if (direct8k) {
|
|
4520
|
+
while (i + 2 < samples.length) {
|
|
4521
|
+
out.push(samples[i]);
|
|
4522
|
+
i += 3;
|
|
4523
|
+
}
|
|
4524
|
+
} else {
|
|
4525
|
+
while (i + 2 < samples.length) {
|
|
4526
|
+
out.push(samples[i]);
|
|
4527
|
+
out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
|
|
4528
|
+
i += 3;
|
|
4529
|
+
}
|
|
2947
4530
|
}
|
|
2948
4531
|
ctx.leftover = samples.slice(i);
|
|
2949
4532
|
const buffer = Buffer.alloc(out.length * 2);
|
|
@@ -2954,7 +4537,13 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2954
4537
|
}
|
|
2955
4538
|
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
2956
4539
|
static resample24kTo16k(audio) {
|
|
2957
|
-
const ctx = {
|
|
4540
|
+
const ctx = {
|
|
4541
|
+
carryByte: null,
|
|
4542
|
+
leftover: [],
|
|
4543
|
+
lpfPrev: 0,
|
|
4544
|
+
lpfEnabled: false,
|
|
4545
|
+
targetSampleRate: 16e3
|
|
4546
|
+
};
|
|
2958
4547
|
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
2959
4548
|
if (ctx.leftover.length === 0) return out;
|
|
2960
4549
|
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
@@ -2966,7 +4555,7 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2966
4555
|
};
|
|
2967
4556
|
|
|
2968
4557
|
// src/tts/openai.ts
|
|
2969
|
-
var
|
|
4558
|
+
var TTS3 = class extends OpenAITTS {
|
|
2970
4559
|
static providerKey = "openai_tts";
|
|
2971
4560
|
constructor(opts = {}) {
|
|
2972
4561
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
@@ -2986,10 +4575,41 @@ var TTS2 = class extends OpenAITTS {
|
|
|
2986
4575
|
}
|
|
2987
4576
|
};
|
|
2988
4577
|
|
|
4578
|
+
// src/tts/cartesia.ts
|
|
4579
|
+
init_esm_shims();
|
|
4580
|
+
|
|
2989
4581
|
// src/providers/cartesia-tts.ts
|
|
4582
|
+
init_esm_shims();
|
|
2990
4583
|
var CARTESIA_BASE_URL = "https://api.cartesia.ai";
|
|
2991
4584
|
var CARTESIA_API_VERSION = "2025-04-16";
|
|
2992
4585
|
var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
4586
|
+
var CartesiaTTSModel = {
|
|
4587
|
+
SONIC_3: "sonic-3",
|
|
4588
|
+
SONIC_2: "sonic-2",
|
|
4589
|
+
SONIC: "sonic"
|
|
4590
|
+
};
|
|
4591
|
+
var CartesiaTTSContainer = {
|
|
4592
|
+
RAW: "raw",
|
|
4593
|
+
WAV: "wav",
|
|
4594
|
+
MP3: "mp3"
|
|
4595
|
+
};
|
|
4596
|
+
var CartesiaTTSEncoding = {
|
|
4597
|
+
PCM_S16LE: "pcm_s16le",
|
|
4598
|
+
PCM_F32LE: "pcm_f32le",
|
|
4599
|
+
PCM_MULAW: "pcm_mulaw",
|
|
4600
|
+
PCM_ALAW: "pcm_alaw"
|
|
4601
|
+
};
|
|
4602
|
+
var CartesiaTTSSampleRate = {
|
|
4603
|
+
HZ_8000: 8e3,
|
|
4604
|
+
HZ_16000: 16e3,
|
|
4605
|
+
HZ_22050: 22050,
|
|
4606
|
+
HZ_24000: 24e3,
|
|
4607
|
+
HZ_44100: 44100
|
|
4608
|
+
};
|
|
4609
|
+
var CartesiaTTSVoiceMode = {
|
|
4610
|
+
ID: "id",
|
|
4611
|
+
EMBEDDING: "embedding"
|
|
4612
|
+
};
|
|
2993
4613
|
var CartesiaTTS = class _CartesiaTTS {
|
|
2994
4614
|
apiKey;
|
|
2995
4615
|
model;
|
|
@@ -3003,10 +4623,10 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
3003
4623
|
apiVersion;
|
|
3004
4624
|
constructor(apiKey, opts = {}) {
|
|
3005
4625
|
this.apiKey = apiKey;
|
|
3006
|
-
this.model = opts.model ??
|
|
4626
|
+
this.model = opts.model ?? CartesiaTTSModel.SONIC_3;
|
|
3007
4627
|
this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
|
|
3008
4628
|
this.language = opts.language ?? "en";
|
|
3009
|
-
this.sampleRate = opts.sampleRate ??
|
|
4629
|
+
this.sampleRate = opts.sampleRate ?? CartesiaTTSSampleRate.HZ_16000;
|
|
3010
4630
|
this.speed = opts.speed;
|
|
3011
4631
|
this.emotion = typeof opts.emotion === "string" ? [opts.emotion] : opts.emotion;
|
|
3012
4632
|
this.volume = opts.volume;
|
|
@@ -3023,7 +4643,10 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
3023
4643
|
* removes a potential aliasing source.
|
|
3024
4644
|
*/
|
|
3025
4645
|
static forTwilio(apiKey, options = {}) {
|
|
3026
|
-
return new _CartesiaTTS(apiKey, {
|
|
4646
|
+
return new _CartesiaTTS(apiKey, {
|
|
4647
|
+
...options,
|
|
4648
|
+
sampleRate: CartesiaTTSSampleRate.HZ_8000
|
|
4649
|
+
});
|
|
3027
4650
|
}
|
|
3028
4651
|
/**
|
|
3029
4652
|
* Construct an instance pre-configured for Telnyx bidirectional media.
|
|
@@ -3034,17 +4657,20 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
3034
4657
|
* {@link CartesiaTTS.forTwilio}.
|
|
3035
4658
|
*/
|
|
3036
4659
|
static forTelnyx(apiKey, options = {}) {
|
|
3037
|
-
return new _CartesiaTTS(apiKey, {
|
|
4660
|
+
return new _CartesiaTTS(apiKey, {
|
|
4661
|
+
...options,
|
|
4662
|
+
sampleRate: CartesiaTTSSampleRate.HZ_16000
|
|
4663
|
+
});
|
|
3038
4664
|
}
|
|
3039
4665
|
/** Build the JSON payload for the Cartesia bytes endpoint. */
|
|
3040
4666
|
buildPayload(text) {
|
|
3041
4667
|
const payload = {
|
|
3042
4668
|
model_id: this.model,
|
|
3043
|
-
voice: { mode:
|
|
4669
|
+
voice: { mode: CartesiaTTSVoiceMode.ID, id: this.voice },
|
|
3044
4670
|
transcript: text,
|
|
3045
4671
|
output_format: {
|
|
3046
|
-
container:
|
|
3047
|
-
encoding:
|
|
4672
|
+
container: CartesiaTTSContainer.RAW,
|
|
4673
|
+
encoding: CartesiaTTSEncoding.PCM_S16LE,
|
|
3048
4674
|
sample_rate: this.sampleRate
|
|
3049
4675
|
},
|
|
3050
4676
|
language: this.language
|
|
@@ -3108,7 +4734,7 @@ var CartesiaTTS = class _CartesiaTTS {
|
|
|
3108
4734
|
};
|
|
3109
4735
|
|
|
3110
4736
|
// src/tts/cartesia.ts
|
|
3111
|
-
function
|
|
4737
|
+
function resolveApiKey3(apiKey) {
|
|
3112
4738
|
const key = apiKey ?? process.env.CARTESIA_API_KEY;
|
|
3113
4739
|
if (!key) {
|
|
3114
4740
|
throw new Error(
|
|
@@ -3117,10 +4743,10 @@ function resolveApiKey2(apiKey) {
|
|
|
3117
4743
|
}
|
|
3118
4744
|
return key;
|
|
3119
4745
|
}
|
|
3120
|
-
var
|
|
4746
|
+
var TTS4 = class _TTS extends CartesiaTTS {
|
|
3121
4747
|
static providerKey = "cartesia_tts";
|
|
3122
4748
|
constructor(opts = {}) {
|
|
3123
|
-
const key =
|
|
4749
|
+
const key = resolveApiKey3(opts.apiKey);
|
|
3124
4750
|
const { apiKey: _ignored, ...rest } = opts;
|
|
3125
4751
|
void _ignored;
|
|
3126
4752
|
super(key, rest);
|
|
@@ -3135,15 +4761,30 @@ var TTS3 = class _TTS extends CartesiaTTS {
|
|
|
3135
4761
|
}
|
|
3136
4762
|
};
|
|
3137
4763
|
|
|
4764
|
+
// src/tts/rime.ts
|
|
4765
|
+
init_esm_shims();
|
|
4766
|
+
|
|
3138
4767
|
// src/providers/rime-tts.ts
|
|
4768
|
+
init_esm_shims();
|
|
3139
4769
|
var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
|
|
4770
|
+
var RimeModel = {
|
|
4771
|
+
ARCANA: "arcana",
|
|
4772
|
+
MIST: "mist",
|
|
4773
|
+
MIST_V2: "mistv2"
|
|
4774
|
+
};
|
|
4775
|
+
var RimeAudioFormat = {
|
|
4776
|
+
PCM: "audio/pcm",
|
|
4777
|
+
MP3: "audio/mp3",
|
|
4778
|
+
WAV: "audio/wav",
|
|
4779
|
+
MULAW: "audio/mulaw"
|
|
4780
|
+
};
|
|
3140
4781
|
var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
|
|
3141
4782
|
var MIST_MODEL_TIMEOUT_MS = 30 * 1e3;
|
|
3142
4783
|
function isMistModel(model) {
|
|
3143
|
-
return model.includes(
|
|
4784
|
+
return model.includes(RimeModel.MIST);
|
|
3144
4785
|
}
|
|
3145
4786
|
function timeoutForModel(model) {
|
|
3146
|
-
if (model ===
|
|
4787
|
+
if (model === RimeModel.ARCANA) return ARCANA_MODEL_TIMEOUT_MS;
|
|
3147
4788
|
return MIST_MODEL_TIMEOUT_MS;
|
|
3148
4789
|
}
|
|
3149
4790
|
var RimeTTS = class {
|
|
@@ -3164,7 +4805,7 @@ var RimeTTS = class {
|
|
|
3164
4805
|
totalTimeoutMs;
|
|
3165
4806
|
constructor(apiKey, opts = {}) {
|
|
3166
4807
|
this.apiKey = apiKey;
|
|
3167
|
-
this.model = opts.model ??
|
|
4808
|
+
this.model = opts.model ?? RimeModel.ARCANA;
|
|
3168
4809
|
const defaultSpeaker = isMistModel(this.model) ? "cove" : "astra";
|
|
3169
4810
|
this.speaker = opts.speaker ?? defaultSpeaker;
|
|
3170
4811
|
this.lang = opts.lang ?? "eng";
|
|
@@ -3186,7 +4827,7 @@ var RimeTTS = class {
|
|
|
3186
4827
|
text,
|
|
3187
4828
|
modelId: this.model
|
|
3188
4829
|
};
|
|
3189
|
-
if (this.model ===
|
|
4830
|
+
if (this.model === RimeModel.ARCANA) {
|
|
3190
4831
|
if (this.repetitionPenalty !== void 0)
|
|
3191
4832
|
payload.repetition_penalty = this.repetitionPenalty;
|
|
3192
4833
|
if (this.temperature !== void 0) payload.temperature = this.temperature;
|
|
@@ -3198,7 +4839,7 @@ var RimeTTS = class {
|
|
|
3198
4839
|
payload.lang = this.lang;
|
|
3199
4840
|
payload.samplingRate = this.sampleRate;
|
|
3200
4841
|
if (this.speedAlpha !== void 0) payload.speedAlpha = this.speedAlpha;
|
|
3201
|
-
if (this.model ===
|
|
4842
|
+
if (this.model === RimeModel.MIST_V2 && this.reduceLatency !== void 0) {
|
|
3202
4843
|
payload.reduceLatency = this.reduceLatency;
|
|
3203
4844
|
}
|
|
3204
4845
|
if (this.pauseBetweenBrackets !== void 0) {
|
|
@@ -3210,6 +4851,7 @@ var RimeTTS = class {
|
|
|
3210
4851
|
}
|
|
3211
4852
|
return payload;
|
|
3212
4853
|
}
|
|
4854
|
+
/** Synthesize text and return the concatenated audio buffer. */
|
|
3213
4855
|
async synthesize(text) {
|
|
3214
4856
|
const chunks = [];
|
|
3215
4857
|
for await (const chunk of this.synthesizeStream(text)) {
|
|
@@ -3225,7 +4867,7 @@ var RimeTTS = class {
|
|
|
3225
4867
|
const response = await fetch(this.baseUrl, {
|
|
3226
4868
|
method: "POST",
|
|
3227
4869
|
headers: {
|
|
3228
|
-
accept:
|
|
4870
|
+
accept: RimeAudioFormat.PCM,
|
|
3229
4871
|
Authorization: `Bearer ${this.apiKey}`,
|
|
3230
4872
|
"content-type": "application/json"
|
|
3231
4873
|
},
|
|
@@ -3263,7 +4905,7 @@ var RimeTTS = class {
|
|
|
3263
4905
|
};
|
|
3264
4906
|
|
|
3265
4907
|
// src/tts/rime.ts
|
|
3266
|
-
var
|
|
4908
|
+
var TTS5 = class extends RimeTTS {
|
|
3267
4909
|
static providerKey = "rime";
|
|
3268
4910
|
constructor(opts = {}) {
|
|
3269
4911
|
const key = opts.apiKey ?? process.env.RIME_API_KEY;
|
|
@@ -3278,8 +4920,28 @@ var TTS4 = class extends RimeTTS {
|
|
|
3278
4920
|
}
|
|
3279
4921
|
};
|
|
3280
4922
|
|
|
4923
|
+
// src/tts/lmnt.ts
|
|
4924
|
+
init_esm_shims();
|
|
4925
|
+
|
|
3281
4926
|
// src/providers/lmnt-tts.ts
|
|
4927
|
+
init_esm_shims();
|
|
3282
4928
|
var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
|
|
4929
|
+
var LMNTAudioFormat = {
|
|
4930
|
+
AAC: "aac",
|
|
4931
|
+
MP3: "mp3",
|
|
4932
|
+
MULAW: "mulaw",
|
|
4933
|
+
RAW: "raw",
|
|
4934
|
+
WAV: "wav"
|
|
4935
|
+
};
|
|
4936
|
+
var LMNTModel = {
|
|
4937
|
+
BLIZZARD: "blizzard",
|
|
4938
|
+
AURORA: "aurora"
|
|
4939
|
+
};
|
|
4940
|
+
var LMNTSampleRate = {
|
|
4941
|
+
HZ_8000: 8e3,
|
|
4942
|
+
HZ_16000: 16e3,
|
|
4943
|
+
HZ_24000: 24e3
|
|
4944
|
+
};
|
|
3283
4945
|
var LMNTTTS = class {
|
|
3284
4946
|
apiKey;
|
|
3285
4947
|
model;
|
|
@@ -3292,11 +4954,11 @@ var LMNTTTS = class {
|
|
|
3292
4954
|
baseUrl;
|
|
3293
4955
|
constructor(apiKey, opts = {}) {
|
|
3294
4956
|
this.apiKey = apiKey;
|
|
3295
|
-
this.model = opts.model ??
|
|
4957
|
+
this.model = opts.model ?? LMNTModel.BLIZZARD;
|
|
3296
4958
|
this.voice = opts.voice ?? "leah";
|
|
3297
|
-
this.language = opts.language ?? (this.model ===
|
|
3298
|
-
this.format = opts.format ??
|
|
3299
|
-
this.sampleRate = opts.sampleRate ??
|
|
4959
|
+
this.language = opts.language ?? (this.model === LMNTModel.BLIZZARD ? "auto" : "en");
|
|
4960
|
+
this.format = opts.format ?? LMNTAudioFormat.RAW;
|
|
4961
|
+
this.sampleRate = opts.sampleRate ?? LMNTSampleRate.HZ_16000;
|
|
3300
4962
|
this.temperature = opts.temperature ?? 1;
|
|
3301
4963
|
this.topP = opts.topP ?? 0.8;
|
|
3302
4964
|
this.baseUrl = opts.baseUrl ?? LMNT_BASE_URL;
|
|
@@ -3313,6 +4975,131 @@ var LMNTTTS = class {
|
|
|
3313
4975
|
top_p: this.topP
|
|
3314
4976
|
};
|
|
3315
4977
|
}
|
|
4978
|
+
/** Synthesize text and return the concatenated audio buffer. */
|
|
4979
|
+
async synthesize(text) {
|
|
4980
|
+
const chunks = [];
|
|
4981
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
4982
|
+
chunks.push(chunk);
|
|
4983
|
+
}
|
|
4984
|
+
return Buffer.concat(chunks);
|
|
4985
|
+
}
|
|
4986
|
+
/** Yield audio chunks as they arrive — raw PCM_S16LE by default. */
|
|
4987
|
+
async *synthesizeStream(text) {
|
|
4988
|
+
const response = await fetch(this.baseUrl, {
|
|
4989
|
+
method: "POST",
|
|
4990
|
+
headers: {
|
|
4991
|
+
"Content-Type": "application/json",
|
|
4992
|
+
"X-API-Key": this.apiKey
|
|
4993
|
+
},
|
|
4994
|
+
body: JSON.stringify(this.buildPayload(text)),
|
|
4995
|
+
signal: AbortSignal.timeout(3e4)
|
|
4996
|
+
});
|
|
4997
|
+
if (!response.ok) {
|
|
4998
|
+
const body = await response.text();
|
|
4999
|
+
throw new Error(`LMNT TTS error ${response.status}: ${body}`);
|
|
5000
|
+
}
|
|
5001
|
+
if (!response.body) {
|
|
5002
|
+
throw new Error("LMNT TTS: no response body");
|
|
5003
|
+
}
|
|
5004
|
+
const reader = response.body.getReader();
|
|
5005
|
+
try {
|
|
5006
|
+
while (true) {
|
|
5007
|
+
const { done, value } = await reader.read();
|
|
5008
|
+
if (done) break;
|
|
5009
|
+
if (value && value.length > 0) {
|
|
5010
|
+
yield Buffer.from(value);
|
|
5011
|
+
}
|
|
5012
|
+
}
|
|
5013
|
+
} finally {
|
|
5014
|
+
if (typeof reader.cancel === "function")
|
|
5015
|
+
await reader.cancel().catch(() => {
|
|
5016
|
+
});
|
|
5017
|
+
reader.releaseLock();
|
|
5018
|
+
}
|
|
5019
|
+
}
|
|
5020
|
+
};
|
|
5021
|
+
|
|
5022
|
+
// src/tts/lmnt.ts
|
|
5023
|
+
var TTS6 = class extends LMNTTTS {
|
|
5024
|
+
static providerKey = "lmnt";
|
|
5025
|
+
constructor(opts = {}) {
|
|
5026
|
+
const key = opts.apiKey ?? process.env.LMNT_API_KEY;
|
|
5027
|
+
if (!key) {
|
|
5028
|
+
throw new Error(
|
|
5029
|
+
"LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
|
|
5030
|
+
);
|
|
5031
|
+
}
|
|
5032
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
5033
|
+
void _ignored;
|
|
5034
|
+
super(key, rest);
|
|
5035
|
+
}
|
|
5036
|
+
};
|
|
5037
|
+
|
|
5038
|
+
// src/tts/inworld.ts
|
|
5039
|
+
init_esm_shims();
|
|
5040
|
+
|
|
5041
|
+
// src/providers/inworld-tts.ts
|
|
5042
|
+
init_esm_shims();
|
|
5043
|
+
var INWORLD_BASE_URL = "https://api.inworld.ai/tts/v1/voice:stream";
|
|
5044
|
+
var InworldModel = {
|
|
5045
|
+
TTS_2: "inworld-tts-2",
|
|
5046
|
+
TTS_1_5_MAX: "inworld-tts-1.5-max",
|
|
5047
|
+
TTS_1_5_MINI: "inworld-tts-1.5-mini",
|
|
5048
|
+
TTS_1_MAX: "inworld-tts-1-max",
|
|
5049
|
+
TTS_1: "inworld-tts-1"
|
|
5050
|
+
};
|
|
5051
|
+
var InworldAudioEncoding = {
|
|
5052
|
+
PCM: "PCM",
|
|
5053
|
+
LINEAR16: "LINEAR16",
|
|
5054
|
+
OGG_OPUS: "OGG_OPUS",
|
|
5055
|
+
MP3: "MP3"
|
|
5056
|
+
};
|
|
5057
|
+
var InworldTTS = class {
|
|
5058
|
+
authToken;
|
|
5059
|
+
model;
|
|
5060
|
+
voice;
|
|
5061
|
+
language;
|
|
5062
|
+
audioEncoding;
|
|
5063
|
+
sampleRate;
|
|
5064
|
+
bitrate;
|
|
5065
|
+
temperature;
|
|
5066
|
+
speakingRate;
|
|
5067
|
+
deliveryMode;
|
|
5068
|
+
baseUrl;
|
|
5069
|
+
constructor(authToken, opts = {}) {
|
|
5070
|
+
if (!authToken) {
|
|
5071
|
+
throw new Error("Inworld TTS: authToken is required");
|
|
5072
|
+
}
|
|
5073
|
+
this.authToken = authToken;
|
|
5074
|
+
this.model = opts.model ?? InworldModel.TTS_2;
|
|
5075
|
+
this.voice = opts.voice ?? "Ashley";
|
|
5076
|
+
this.language = opts.language;
|
|
5077
|
+
this.audioEncoding = opts.audioEncoding ?? InworldAudioEncoding.PCM;
|
|
5078
|
+
this.sampleRate = opts.sampleRate ?? 16e3;
|
|
5079
|
+
this.bitrate = opts.bitrate ?? 64e3;
|
|
5080
|
+
this.temperature = opts.temperature;
|
|
5081
|
+
this.speakingRate = opts.speakingRate ?? 1;
|
|
5082
|
+
this.deliveryMode = opts.deliveryMode;
|
|
5083
|
+
this.baseUrl = opts.baseUrl ?? INWORLD_BASE_URL;
|
|
5084
|
+
}
|
|
5085
|
+
buildPayload(text) {
|
|
5086
|
+
const payload = {
|
|
5087
|
+
text,
|
|
5088
|
+
voiceId: this.voice,
|
|
5089
|
+
modelId: this.model,
|
|
5090
|
+
audioConfig: {
|
|
5091
|
+
audioEncoding: this.audioEncoding,
|
|
5092
|
+
bitrate: this.bitrate,
|
|
5093
|
+
sampleRateHertz: this.sampleRate
|
|
5094
|
+
},
|
|
5095
|
+
speakingRate: this.speakingRate
|
|
5096
|
+
};
|
|
5097
|
+
if (this.language !== void 0) payload.language = this.language;
|
|
5098
|
+
if (this.temperature !== void 0) payload.temperature = this.temperature;
|
|
5099
|
+
if (this.deliveryMode !== void 0) payload.deliveryMode = this.deliveryMode;
|
|
5100
|
+
return payload;
|
|
5101
|
+
}
|
|
5102
|
+
/** Synthesize text and return the concatenated audio buffer. */
|
|
3316
5103
|
async synthesize(text) {
|
|
3317
5104
|
const chunks = [];
|
|
3318
5105
|
for await (const chunk of this.synthesizeStream(text)) {
|
|
@@ -3320,32 +5107,51 @@ var LMNTTTS = class {
|
|
|
3320
5107
|
}
|
|
3321
5108
|
return Buffer.concat(chunks);
|
|
3322
5109
|
}
|
|
3323
|
-
/**
|
|
5110
|
+
/**
|
|
5111
|
+
* Yield audio chunks as they arrive. With the default `audioEncoding=PCM`
|
|
5112
|
+
* these are raw PCM_S16LE bytes at `sampleRate`.
|
|
5113
|
+
*/
|
|
3324
5114
|
async *synthesizeStream(text) {
|
|
3325
5115
|
const response = await fetch(this.baseUrl, {
|
|
3326
5116
|
method: "POST",
|
|
3327
5117
|
headers: {
|
|
3328
5118
|
"Content-Type": "application/json",
|
|
3329
|
-
|
|
5119
|
+
Authorization: `Basic ${this.authToken}`
|
|
3330
5120
|
},
|
|
3331
5121
|
body: JSON.stringify(this.buildPayload(text)),
|
|
3332
|
-
signal: AbortSignal.timeout(
|
|
5122
|
+
signal: AbortSignal.timeout(6e4)
|
|
3333
5123
|
});
|
|
3334
5124
|
if (!response.ok) {
|
|
3335
5125
|
const body = await response.text();
|
|
3336
|
-
throw new Error(`
|
|
5126
|
+
throw new Error(`Inworld TTS error ${response.status}: ${body}`);
|
|
3337
5127
|
}
|
|
3338
5128
|
if (!response.body) {
|
|
3339
|
-
throw new Error("
|
|
5129
|
+
throw new Error("Inworld TTS: no response body");
|
|
3340
5130
|
}
|
|
3341
5131
|
const reader = response.body.getReader();
|
|
5132
|
+
const decoder = new TextDecoder("utf-8");
|
|
5133
|
+
let buffered = "";
|
|
3342
5134
|
try {
|
|
3343
5135
|
while (true) {
|
|
3344
5136
|
const { done, value } = await reader.read();
|
|
3345
|
-
if (done)
|
|
3346
|
-
|
|
3347
|
-
|
|
5137
|
+
if (done) {
|
|
5138
|
+
buffered += decoder.decode();
|
|
5139
|
+
break;
|
|
3348
5140
|
}
|
|
5141
|
+
buffered += decoder.decode(value, { stream: true });
|
|
5142
|
+
let newlineIdx;
|
|
5143
|
+
while ((newlineIdx = buffered.indexOf("\n")) >= 0) {
|
|
5144
|
+
const line = buffered.slice(0, newlineIdx).trim();
|
|
5145
|
+
buffered = buffered.slice(newlineIdx + 1);
|
|
5146
|
+
if (!line) continue;
|
|
5147
|
+
const audio = decodeNdjsonLine(line);
|
|
5148
|
+
if (audio && audio.length > 0) yield audio;
|
|
5149
|
+
}
|
|
5150
|
+
}
|
|
5151
|
+
const tail = buffered.trim();
|
|
5152
|
+
if (tail) {
|
|
5153
|
+
const audio = decodeNdjsonLine(tail);
|
|
5154
|
+
if (audio && audio.length > 0) yield audio;
|
|
3349
5155
|
}
|
|
3350
5156
|
} finally {
|
|
3351
5157
|
if (typeof reader.cancel === "function")
|
|
@@ -3355,15 +5161,28 @@ var LMNTTTS = class {
|
|
|
3355
5161
|
}
|
|
3356
5162
|
}
|
|
3357
5163
|
};
|
|
5164
|
+
function decodeNdjsonLine(line) {
|
|
5165
|
+
let parsed;
|
|
5166
|
+
try {
|
|
5167
|
+
parsed = JSON.parse(line);
|
|
5168
|
+
} catch {
|
|
5169
|
+
return null;
|
|
5170
|
+
}
|
|
5171
|
+
if (typeof parsed !== "object" || parsed === null) return null;
|
|
5172
|
+
const result = parsed.result;
|
|
5173
|
+
const audioB64 = result?.audioContent;
|
|
5174
|
+
if (typeof audioB64 !== "string" || audioB64.length === 0) return null;
|
|
5175
|
+
return Buffer.from(audioB64, "base64");
|
|
5176
|
+
}
|
|
3358
5177
|
|
|
3359
|
-
// src/tts/
|
|
3360
|
-
var
|
|
3361
|
-
static providerKey = "
|
|
5178
|
+
// src/tts/inworld.ts
|
|
5179
|
+
var TTS7 = class extends InworldTTS {
|
|
5180
|
+
static providerKey = "inworld";
|
|
3362
5181
|
constructor(opts = {}) {
|
|
3363
|
-
const key = opts.apiKey ?? process.env.
|
|
5182
|
+
const key = opts.apiKey ?? process.env.INWORLD_API_KEY;
|
|
3364
5183
|
if (!key) {
|
|
3365
5184
|
throw new Error(
|
|
3366
|
-
"
|
|
5185
|
+
"Inworld TTS requires an apiKey. Pass { apiKey: '...' } or set INWORLD_API_KEY in the environment."
|
|
3367
5186
|
);
|
|
3368
5187
|
}
|
|
3369
5188
|
const { apiKey: _ignored, ...rest } = opts;
|
|
@@ -3373,6 +5192,7 @@ var TTS5 = class extends LMNTTTS {
|
|
|
3373
5192
|
};
|
|
3374
5193
|
|
|
3375
5194
|
// src/llm/openai.ts
|
|
5195
|
+
init_esm_shims();
|
|
3376
5196
|
var LLM = class extends OpenAILLMProvider {
|
|
3377
5197
|
static providerKey = "openai";
|
|
3378
5198
|
constructor(opts = {}) {
|
|
@@ -3397,10 +5217,24 @@ var LLM = class extends OpenAILLMProvider {
|
|
|
3397
5217
|
}
|
|
3398
5218
|
};
|
|
3399
5219
|
|
|
5220
|
+
// src/llm/anthropic.ts
|
|
5221
|
+
init_esm_shims();
|
|
5222
|
+
|
|
3400
5223
|
// src/providers/anthropic-llm.ts
|
|
5224
|
+
init_esm_shims();
|
|
3401
5225
|
var DEFAULT_ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
|
|
3402
5226
|
var DEFAULT_ANTHROPIC_VERSION = "2023-06-01";
|
|
3403
|
-
var
|
|
5227
|
+
var AnthropicModel = {
|
|
5228
|
+
CLAUDE_HAIKU_4_5_ALIAS: "claude-haiku-4-5",
|
|
5229
|
+
CLAUDE_SONNET_4_6_ALIAS: "claude-sonnet-4-6",
|
|
5230
|
+
CLAUDE_OPUS_4_7_ALIAS: "claude-opus-4-7",
|
|
5231
|
+
CLAUDE_3_5_SONNET_ALIAS: "claude-3-5-sonnet-latest",
|
|
5232
|
+
CLAUDE_3_5_HAIKU_ALIAS: "claude-3-5-haiku-latest",
|
|
5233
|
+
CLAUDE_HAIKU_4_5_20251001: "claude-haiku-4-5-20251001",
|
|
5234
|
+
CLAUDE_3_5_SONNET_20241022: "claude-3-5-sonnet-20241022",
|
|
5235
|
+
CLAUDE_3_5_HAIKU_20241022: "claude-3-5-haiku-20241022"
|
|
5236
|
+
};
|
|
5237
|
+
var DEFAULT_MODEL = AnthropicModel.CLAUDE_HAIKU_4_5_20251001;
|
|
3404
5238
|
var DEFAULT_MAX_TOKENS = 1024;
|
|
3405
5239
|
var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
|
|
3406
5240
|
var AnthropicLLMProvider = class {
|
|
@@ -3425,7 +5259,8 @@ var AnthropicLLMProvider = class {
|
|
|
3425
5259
|
this.anthropicVersion = options.anthropicVersion ?? DEFAULT_ANTHROPIC_VERSION;
|
|
3426
5260
|
this.promptCaching = options.promptCaching ?? true;
|
|
3427
5261
|
}
|
|
3428
|
-
|
|
5262
|
+
/** Stream Patter-format LLM chunks for the given OpenAI-style chat history. */
|
|
5263
|
+
async *stream(messages, tools, opts) {
|
|
3429
5264
|
const { system, messages: anthropicMessages } = toAnthropicMessages(messages);
|
|
3430
5265
|
const anthropicTools = tools ? toAnthropicTools(tools) : null;
|
|
3431
5266
|
const body = {
|
|
@@ -3473,7 +5308,7 @@ var AnthropicLLMProvider = class {
|
|
|
3473
5308
|
method: "POST",
|
|
3474
5309
|
headers,
|
|
3475
5310
|
body: JSON.stringify(body),
|
|
3476
|
-
signal: AbortSignal.timeout(3e4)
|
|
5311
|
+
signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
|
|
3477
5312
|
});
|
|
3478
5313
|
if (!response.ok) {
|
|
3479
5314
|
const errText = await response.text();
|
|
@@ -3636,12 +5471,28 @@ var LLM2 = class extends AnthropicLLMProvider {
|
|
|
3636
5471
|
}
|
|
3637
5472
|
};
|
|
3638
5473
|
|
|
5474
|
+
// src/llm/groq.ts
|
|
5475
|
+
init_esm_shims();
|
|
5476
|
+
|
|
5477
|
+
// src/providers/groq-llm.ts
|
|
5478
|
+
init_esm_shims();
|
|
5479
|
+
|
|
3639
5480
|
// src/version.ts
|
|
3640
|
-
|
|
5481
|
+
init_esm_shims();
|
|
5482
|
+
var VERSION = "0.5.5";
|
|
3641
5483
|
|
|
3642
5484
|
// src/providers/groq-llm.ts
|
|
3643
5485
|
var GROQ_BASE_URL = "https://api.groq.com/openai/v1";
|
|
3644
|
-
var
|
|
5486
|
+
var GroqModel = {
|
|
5487
|
+
LLAMA_3_3_70B_VERSATILE: "llama-3.3-70b-versatile",
|
|
5488
|
+
LLAMA_3_1_8B_INSTANT: "llama-3.1-8b-instant",
|
|
5489
|
+
LLAMA_3_3_70B_SPECDEC: "llama-3.3-70b-specdec",
|
|
5490
|
+
LLAMA_3_70B: "llama3-70b-8192",
|
|
5491
|
+
LLAMA_3_8B: "llama3-8b-8192",
|
|
5492
|
+
MIXTRAL_8X7B: "mixtral-8x7b-32768",
|
|
5493
|
+
GEMMA2_9B: "gemma2-9b-it"
|
|
5494
|
+
};
|
|
5495
|
+
var DEFAULT_MODEL2 = GroqModel.LLAMA_3_3_70B_VERSATILE;
|
|
3645
5496
|
var GroqLLMProvider = class {
|
|
3646
5497
|
apiKey;
|
|
3647
5498
|
model;
|
|
@@ -3676,7 +5527,8 @@ var GroqLLMProvider = class {
|
|
|
3676
5527
|
this.presencePenalty = options.presencePenalty;
|
|
3677
5528
|
this.stop = options.stop;
|
|
3678
5529
|
}
|
|
3679
|
-
|
|
5530
|
+
/** Stream Patter-format LLM chunks from the Groq chat completions API. */
|
|
5531
|
+
async *stream(messages, tools, opts) {
|
|
3680
5532
|
const body = {
|
|
3681
5533
|
model: this.model,
|
|
3682
5534
|
messages,
|
|
@@ -3704,7 +5556,7 @@ var GroqLLMProvider = class {
|
|
|
3704
5556
|
"User-Agent": `getpatter/${VERSION}`
|
|
3705
5557
|
},
|
|
3706
5558
|
body: JSON.stringify(body),
|
|
3707
|
-
signal: AbortSignal.timeout(3e4)
|
|
5559
|
+
signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
|
|
3708
5560
|
});
|
|
3709
5561
|
if (!response.ok) {
|
|
3710
5562
|
const errText = await response.text();
|
|
@@ -3794,9 +5646,20 @@ var LLM3 = class extends GroqLLMProvider {
|
|
|
3794
5646
|
}
|
|
3795
5647
|
};
|
|
3796
5648
|
|
|
5649
|
+
// src/llm/cerebras.ts
|
|
5650
|
+
init_esm_shims();
|
|
5651
|
+
|
|
3797
5652
|
// src/providers/cerebras-llm.ts
|
|
5653
|
+
init_esm_shims();
|
|
3798
5654
|
var CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
|
|
3799
|
-
var
|
|
5655
|
+
var CerebrasModel = {
|
|
5656
|
+
GPT_OSS_120B: "gpt-oss-120b",
|
|
5657
|
+
LLAMA_3_1_8B: "llama3.1-8b",
|
|
5658
|
+
LLAMA_3_3_70B: "llama-3.3-70b",
|
|
5659
|
+
QWEN_3_235B_INSTRUCT: "qwen-3-235b-a22b-instruct-2507",
|
|
5660
|
+
ZAI_GLM_4_7: "zai-glm-4.7"
|
|
5661
|
+
};
|
|
5662
|
+
var DEFAULT_MODEL3 = CerebrasModel.GPT_OSS_120B;
|
|
3800
5663
|
var RETRY_BACKOFF_BASE_MS = 500;
|
|
3801
5664
|
var CerebrasLLMProvider = class {
|
|
3802
5665
|
apiKey;
|
|
@@ -3834,7 +5697,8 @@ var CerebrasLLMProvider = class {
|
|
|
3834
5697
|
this.presencePenalty = options.presencePenalty;
|
|
3835
5698
|
this.stop = options.stop;
|
|
3836
5699
|
}
|
|
3837
|
-
|
|
5700
|
+
/** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
|
|
5701
|
+
async *stream(messages, tools, opts) {
|
|
3838
5702
|
const body = {
|
|
3839
5703
|
model: this.model,
|
|
3840
5704
|
messages,
|
|
@@ -3876,7 +5740,7 @@ var CerebrasLLMProvider = class {
|
|
|
3876
5740
|
method: "POST",
|
|
3877
5741
|
headers,
|
|
3878
5742
|
body: payload,
|
|
3879
|
-
signal: AbortSignal.timeout(3e4)
|
|
5743
|
+
signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
|
|
3880
5744
|
});
|
|
3881
5745
|
if (response.ok) {
|
|
3882
5746
|
yield* parseOpenAISseStream(response);
|
|
@@ -3979,8 +5843,20 @@ var LLM4 = class extends CerebrasLLMProvider {
|
|
|
3979
5843
|
}
|
|
3980
5844
|
};
|
|
3981
5845
|
|
|
5846
|
+
// src/llm/google.ts
|
|
5847
|
+
init_esm_shims();
|
|
5848
|
+
|
|
3982
5849
|
// src/providers/google-llm.ts
|
|
3983
|
-
|
|
5850
|
+
init_esm_shims();
|
|
5851
|
+
var GoogleModel = {
|
|
5852
|
+
GEMINI_2_5_FLASH: "gemini-2.5-flash",
|
|
5853
|
+
GEMINI_2_5_PRO: "gemini-2.5-pro",
|
|
5854
|
+
GEMINI_2_0_FLASH: "gemini-2.0-flash",
|
|
5855
|
+
GEMINI_2_0_FLASH_LITE: "gemini-2.0-flash-lite",
|
|
5856
|
+
GEMINI_1_5_FLASH: "gemini-1.5-flash",
|
|
5857
|
+
GEMINI_1_5_PRO: "gemini-1.5-pro"
|
|
5858
|
+
};
|
|
5859
|
+
var DEFAULT_MODEL4 = GoogleModel.GEMINI_2_5_FLASH;
|
|
3984
5860
|
var DEFAULT_BASE_URL3 = "https://generativelanguage.googleapis.com/v1beta";
|
|
3985
5861
|
var GoogleLLMProvider = class {
|
|
3986
5862
|
apiKey;
|
|
@@ -4000,7 +5876,8 @@ var GoogleLLMProvider = class {
|
|
|
4000
5876
|
this.temperature = options.temperature;
|
|
4001
5877
|
this.maxOutputTokens = options.maxOutputTokens;
|
|
4002
5878
|
}
|
|
4003
|
-
|
|
5879
|
+
/** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
|
|
5880
|
+
async *stream(messages, tools, opts) {
|
|
4004
5881
|
const { systemInstruction, contents } = toGeminiContents(messages);
|
|
4005
5882
|
const geminiTools = tools ? toGeminiTools(tools) : null;
|
|
4006
5883
|
const body = { contents };
|
|
@@ -4018,7 +5895,7 @@ var GoogleLLMProvider = class {
|
|
|
4018
5895
|
method: "POST",
|
|
4019
5896
|
headers: { "Content-Type": "application/json" },
|
|
4020
5897
|
body: JSON.stringify(body),
|
|
4021
|
-
signal: AbortSignal.timeout(3e4)
|
|
5898
|
+
signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4))
|
|
4022
5899
|
});
|
|
4023
5900
|
if (!response.ok) {
|
|
4024
5901
|
const errText = await response.text();
|
|
@@ -4188,280 +6065,8 @@ var LLM5 = class extends GoogleLLMProvider {
|
|
|
4188
6065
|
}
|
|
4189
6066
|
};
|
|
4190
6067
|
|
|
4191
|
-
// src/
|
|
4192
|
-
|
|
4193
|
-
import * as fs from "fs";
|
|
4194
|
-
import * as path from "path";
|
|
4195
|
-
import { fileURLToPath } from "url";
|
|
4196
|
-
var SUPPORTED_SAMPLE_RATES = [8e3, 16e3];
|
|
4197
|
-
function resolveModuleDir() {
|
|
4198
|
-
try {
|
|
4199
|
-
const cjsDir = new Function("return typeof __dirname !== 'undefined' ? __dirname : null")();
|
|
4200
|
-
if (typeof cjsDir === "string") return cjsDir;
|
|
4201
|
-
} catch {
|
|
4202
|
-
}
|
|
4203
|
-
try {
|
|
4204
|
-
const url = import.meta.url;
|
|
4205
|
-
if (url) return path.dirname(fileURLToPath(url));
|
|
4206
|
-
} catch {
|
|
4207
|
-
}
|
|
4208
|
-
return process.cwd();
|
|
4209
|
-
}
|
|
4210
|
-
var MODULE_DIR = resolveModuleDir();
|
|
4211
|
-
function resolveDefaultModelPath() {
|
|
4212
|
-
const candidates = [
|
|
4213
|
-
path.join(MODULE_DIR, "resources", "silero_vad.onnx"),
|
|
4214
|
-
path.join(MODULE_DIR, "..", "resources", "silero_vad.onnx")
|
|
4215
|
-
];
|
|
4216
|
-
for (const c of candidates) if (fs.existsSync(c)) return c;
|
|
4217
|
-
return candidates[0];
|
|
4218
|
-
}
|
|
4219
|
-
var DEFAULT_MODEL_PATH = resolveDefaultModelPath();
|
|
4220
|
-
async function loadOnnxRuntime() {
|
|
4221
|
-
let firstErr;
|
|
4222
|
-
try {
|
|
4223
|
-
const mod = await import("./dist-YRCCJQ26.mjs");
|
|
4224
|
-
return mod;
|
|
4225
|
-
} catch (e) {
|
|
4226
|
-
firstErr = e;
|
|
4227
|
-
}
|
|
4228
|
-
try {
|
|
4229
|
-
const req = createRequire(path.join(process.cwd(), "package.json"));
|
|
4230
|
-
return req("onnxruntime-node");
|
|
4231
|
-
} catch (e) {
|
|
4232
|
-
const detail = e?.message ?? String(e);
|
|
4233
|
-
const original = firstErr?.message ?? String(firstErr);
|
|
4234
|
-
throw new Error(
|
|
4235
|
-
`
|
|
4236
|
-
SileroVAD requires the "onnxruntime-node" package, which could not be resolved.
|
|
4237
|
-
|
|
4238
|
-
Install: npm install onnxruntime-node
|
|
4239
|
-
|
|
4240
|
-
This is an optional peer dependency of getpatter (~210 MB) \u2014 it is only
|
|
4241
|
-
needed when you use SileroVAD in pipeline mode.
|
|
4242
|
-
|
|
4243
|
-
import() failed: ${original}
|
|
4244
|
-
cwd-require failed: ${detail}
|
|
4245
|
-
`
|
|
4246
|
-
);
|
|
4247
|
-
}
|
|
4248
|
-
}
|
|
4249
|
-
var ExpFilter = class {
|
|
4250
|
-
constructor(alpha) {
|
|
4251
|
-
this.alpha = alpha;
|
|
4252
|
-
if (!(alpha > 0 && alpha <= 1)) {
|
|
4253
|
-
throw new Error("alpha must be in (0, 1].");
|
|
4254
|
-
}
|
|
4255
|
-
}
|
|
4256
|
-
filtered = null;
|
|
4257
|
-
apply(exp, sample) {
|
|
4258
|
-
if (this.filtered === null) {
|
|
4259
|
-
this.filtered = sample;
|
|
4260
|
-
} else {
|
|
4261
|
-
const a = Math.pow(this.alpha, exp);
|
|
4262
|
-
this.filtered = a * this.filtered + (1 - a) * sample;
|
|
4263
|
-
}
|
|
4264
|
-
return this.filtered;
|
|
4265
|
-
}
|
|
4266
|
-
reset() {
|
|
4267
|
-
this.filtered = null;
|
|
4268
|
-
}
|
|
4269
|
-
};
|
|
4270
|
-
var OnnxModel = class {
|
|
4271
|
-
constructor(runtime, session, sampleRate) {
|
|
4272
|
-
this.runtime = runtime;
|
|
4273
|
-
this.session = session;
|
|
4274
|
-
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
4275
|
-
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
4276
|
-
}
|
|
4277
|
-
this.sampleRate = sampleRate;
|
|
4278
|
-
this.windowSizeSamples = sampleRate === 8e3 ? 256 : 512;
|
|
4279
|
-
this.contextSize = sampleRate === 8e3 ? 32 : 64;
|
|
4280
|
-
this.context = new Float32Array(this.contextSize);
|
|
4281
|
-
this.rnnState = new Float32Array(2 * 1 * 128);
|
|
4282
|
-
this.inputBuffer = new Float32Array(this.contextSize + this.windowSizeSamples);
|
|
4283
|
-
this.sampleRateTensor = BigInt64Array.from([BigInt(sampleRate)]);
|
|
4284
|
-
}
|
|
4285
|
-
sampleRate;
|
|
4286
|
-
windowSizeSamples;
|
|
4287
|
-
contextSize;
|
|
4288
|
-
context;
|
|
4289
|
-
rnnState;
|
|
4290
|
-
inputBuffer;
|
|
4291
|
-
sampleRateTensor;
|
|
4292
|
-
async run(window) {
|
|
4293
|
-
if (window.length !== this.windowSizeSamples) {
|
|
4294
|
-
throw new Error(
|
|
4295
|
-
`window must have exactly ${this.windowSizeSamples} samples, got ${window.length}`
|
|
4296
|
-
);
|
|
4297
|
-
}
|
|
4298
|
-
this.inputBuffer.set(this.context, 0);
|
|
4299
|
-
this.inputBuffer.set(window, this.contextSize);
|
|
4300
|
-
const { Tensor } = this.runtime;
|
|
4301
|
-
const feeds = {
|
|
4302
|
-
input: new Tensor("float32", this.inputBuffer, [1, this.inputBuffer.length]),
|
|
4303
|
-
state: new Tensor("float32", this.rnnState, [2, 1, 128]),
|
|
4304
|
-
sr: new Tensor("int64", this.sampleRateTensor, [])
|
|
4305
|
-
};
|
|
4306
|
-
const results = await this.session.run(feeds);
|
|
4307
|
-
const outputKey = Object.keys(results).find((k) => k !== "stateN") ?? "output";
|
|
4308
|
-
const stateKey = "stateN" in results ? "stateN" : Object.keys(results).find((k) => k !== outputKey);
|
|
4309
|
-
const out = results[outputKey];
|
|
4310
|
-
const newState = stateKey ? results[stateKey] : void 0;
|
|
4311
|
-
if (newState && newState.data instanceof Float32Array) {
|
|
4312
|
-
this.rnnState = Float32Array.from(newState.data);
|
|
4313
|
-
}
|
|
4314
|
-
this.context = this.inputBuffer.slice(-this.contextSize);
|
|
4315
|
-
const data = out.data;
|
|
4316
|
-
return data[0] ?? 0;
|
|
4317
|
-
}
|
|
4318
|
-
};
|
|
4319
|
-
var SileroVAD = class _SileroVAD {
|
|
4320
|
-
constructor(model, opts) {
|
|
4321
|
-
this.model = model;
|
|
4322
|
-
this.opts = opts;
|
|
4323
|
-
}
|
|
4324
|
-
pending = new Float32Array(0);
|
|
4325
|
-
expFilter = new ExpFilter(0.35);
|
|
4326
|
-
pubSpeaking = false;
|
|
4327
|
-
speechThresholdDuration = 0;
|
|
4328
|
-
silenceThresholdDuration = 0;
|
|
4329
|
-
closed = false;
|
|
4330
|
-
/**
|
|
4331
|
-
* Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
|
|
4332
|
-
* Throws if `onnxruntime-node` is not installed.
|
|
4333
|
-
*/
|
|
4334
|
-
static async load(options = {}) {
|
|
4335
|
-
const sampleRate = options.sampleRate ?? 16e3;
|
|
4336
|
-
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
4337
|
-
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
4338
|
-
}
|
|
4339
|
-
const activationThreshold = options.activationThreshold ?? 0.5;
|
|
4340
|
-
const deactivationThreshold = options.deactivationThreshold ?? Math.max(activationThreshold - 0.15, 0.01);
|
|
4341
|
-
if (deactivationThreshold <= 0) {
|
|
4342
|
-
throw new Error("deactivationThreshold must be greater than 0");
|
|
4343
|
-
}
|
|
4344
|
-
const runtime = await loadOnnxRuntime();
|
|
4345
|
-
const modelPath = options.onnxFilePath ?? DEFAULT_MODEL_PATH;
|
|
4346
|
-
const session = await runtime.InferenceSession.create(modelPath, {
|
|
4347
|
-
interOpNumThreads: 1,
|
|
4348
|
-
intraOpNumThreads: 1,
|
|
4349
|
-
executionMode: "sequential",
|
|
4350
|
-
executionProviders: options.forceCpu === false ? void 0 : ["cpu"]
|
|
4351
|
-
});
|
|
4352
|
-
const model = new OnnxModel(runtime, session, sampleRate);
|
|
4353
|
-
return new _SileroVAD(model, {
|
|
4354
|
-
minSpeechDuration: options.minSpeechDuration ?? 0.05,
|
|
4355
|
-
minSilenceDuration: options.minSilenceDuration ?? 0.55,
|
|
4356
|
-
prefixPaddingDuration: options.prefixPaddingDuration ?? 0.5,
|
|
4357
|
-
activationThreshold,
|
|
4358
|
-
deactivationThreshold,
|
|
4359
|
-
sampleRate
|
|
4360
|
-
});
|
|
4361
|
-
}
|
|
4362
|
-
/**
|
|
4363
|
-
* Internal factory used by tests — bypasses onnxruntime-node loading.
|
|
4364
|
-
* @internal
|
|
4365
|
-
*/
|
|
4366
|
-
static fromOnnxModel(runtime, session, options) {
|
|
4367
|
-
const model = new OnnxModel(runtime, session, options.sampleRate);
|
|
4368
|
-
return new _SileroVAD(model, options);
|
|
4369
|
-
}
|
|
4370
|
-
get sampleRate() {
|
|
4371
|
-
return this.opts.sampleRate;
|
|
4372
|
-
}
|
|
4373
|
-
/**
|
|
4374
|
-
* Number of int16 PCM samples that must be provided per call to
|
|
4375
|
-
* processFrame for the model to run one inference window.
|
|
4376
|
-
*
|
|
4377
|
-
* Constraint (ported from LiveKit Agents / Silero ONNX spec):
|
|
4378
|
-
* - 16 000 Hz → 512 samples (32 ms)
|
|
4379
|
-
* - 8 000 Hz → 256 samples (32 ms)
|
|
4380
|
-
*
|
|
4381
|
-
* Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
|
|
4382
|
-
* should buffer incoming audio until at least numFramesRequired() int16
|
|
4383
|
-
* samples are available before calling processFrame. The provider
|
|
4384
|
-
* internally buffers partial windows so smaller chunks are also safe, but
|
|
4385
|
-
* passing exactly one window per call minimises heap allocation.
|
|
4386
|
-
*/
|
|
4387
|
-
numFramesRequired() {
|
|
4388
|
-
return this.opts.sampleRate === 8e3 ? 256 : 512;
|
|
4389
|
-
}
|
|
4390
|
-
async processFrame(pcmChunk, sampleRate) {
|
|
4391
|
-
if (this.closed) {
|
|
4392
|
-
throw new Error("SileroVAD is closed");
|
|
4393
|
-
}
|
|
4394
|
-
if (sampleRate !== this.opts.sampleRate) {
|
|
4395
|
-
throw new Error(
|
|
4396
|
-
`input sampleRate ${sampleRate} does not match model sampleRate ${this.opts.sampleRate}; resampling is not implemented in the Patter port`
|
|
4397
|
-
);
|
|
4398
|
-
}
|
|
4399
|
-
if (pcmChunk.length === 0) {
|
|
4400
|
-
return null;
|
|
4401
|
-
}
|
|
4402
|
-
const numSamples = Math.floor(pcmChunk.length / 2);
|
|
4403
|
-
if (numSamples === 0) {
|
|
4404
|
-
return null;
|
|
4405
|
-
}
|
|
4406
|
-
const samples = new Float32Array(numSamples);
|
|
4407
|
-
for (let i = 0; i < numSamples; i++) {
|
|
4408
|
-
samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
|
|
4409
|
-
}
|
|
4410
|
-
const merged = new Float32Array(this.pending.length + samples.length);
|
|
4411
|
-
merged.set(this.pending, 0);
|
|
4412
|
-
merged.set(samples, this.pending.length);
|
|
4413
|
-
this.pending = merged;
|
|
4414
|
-
const windowSize = this.model.windowSizeSamples;
|
|
4415
|
-
let event = null;
|
|
4416
|
-
while (this.pending.length >= windowSize) {
|
|
4417
|
-
const window = this.pending.slice(0, windowSize);
|
|
4418
|
-
this.pending = this.pending.slice(windowSize);
|
|
4419
|
-
const rawP = await this.model.run(window);
|
|
4420
|
-
const p = this.expFilter.apply(1, rawP);
|
|
4421
|
-
const windowDuration = windowSize / this.opts.sampleRate;
|
|
4422
|
-
const transition = this.advanceState(p, windowDuration);
|
|
4423
|
-
if (transition !== null) {
|
|
4424
|
-
event = transition;
|
|
4425
|
-
}
|
|
4426
|
-
}
|
|
4427
|
-
return event;
|
|
4428
|
-
}
|
|
4429
|
-
advanceState(p, windowDuration) {
|
|
4430
|
-
const opts = this.opts;
|
|
4431
|
-
if (p >= opts.activationThreshold || this.pubSpeaking && p > opts.deactivationThreshold) {
|
|
4432
|
-
this.speechThresholdDuration += windowDuration;
|
|
4433
|
-
this.silenceThresholdDuration = 0;
|
|
4434
|
-
if (!this.pubSpeaking) {
|
|
4435
|
-
if (this.speechThresholdDuration >= opts.minSpeechDuration) {
|
|
4436
|
-
this.pubSpeaking = true;
|
|
4437
|
-
return {
|
|
4438
|
-
type: "speech_start",
|
|
4439
|
-
confidence: p,
|
|
4440
|
-
durationMs: this.speechThresholdDuration * 1e3
|
|
4441
|
-
};
|
|
4442
|
-
}
|
|
4443
|
-
}
|
|
4444
|
-
} else {
|
|
4445
|
-
this.silenceThresholdDuration += windowDuration;
|
|
4446
|
-
this.speechThresholdDuration = 0;
|
|
4447
|
-
if (this.pubSpeaking && this.silenceThresholdDuration >= opts.minSilenceDuration) {
|
|
4448
|
-
this.pubSpeaking = false;
|
|
4449
|
-
return {
|
|
4450
|
-
type: "speech_end",
|
|
4451
|
-
confidence: p,
|
|
4452
|
-
durationMs: this.silenceThresholdDuration * 1e3
|
|
4453
|
-
};
|
|
4454
|
-
}
|
|
4455
|
-
}
|
|
4456
|
-
return null;
|
|
4457
|
-
}
|
|
4458
|
-
async close() {
|
|
4459
|
-
if (this.closed) return;
|
|
4460
|
-
this.closed = true;
|
|
4461
|
-
}
|
|
4462
|
-
};
|
|
4463
|
-
|
|
4464
|
-
// src/carriers/twilio.ts
|
|
6068
|
+
// src/telephony/twilio.ts
|
|
6069
|
+
init_esm_shims();
|
|
4465
6070
|
var Carrier = class {
|
|
4466
6071
|
kind = "twilio";
|
|
4467
6072
|
accountSid;
|
|
@@ -4484,7 +6089,8 @@ var Carrier = class {
|
|
|
4484
6089
|
}
|
|
4485
6090
|
};
|
|
4486
6091
|
|
|
4487
|
-
// src/
|
|
6092
|
+
// src/telephony/telnyx.ts
|
|
6093
|
+
init_esm_shims();
|
|
4488
6094
|
var Carrier2 = class {
|
|
4489
6095
|
kind = "telnyx";
|
|
4490
6096
|
apiKey;
|
|
@@ -4511,6 +6117,7 @@ var Carrier2 = class {
|
|
|
4511
6117
|
};
|
|
4512
6118
|
|
|
4513
6119
|
// src/public-api.ts
|
|
6120
|
+
init_esm_shims();
|
|
4514
6121
|
var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
|
|
4515
6122
|
var Guardrail = class {
|
|
4516
6123
|
name;
|
|
@@ -4560,6 +6167,7 @@ function tool(opts) {
|
|
|
4560
6167
|
}
|
|
4561
6168
|
|
|
4562
6169
|
// src/chat-context.ts
|
|
6170
|
+
init_esm_shims();
|
|
4563
6171
|
import { randomUUID } from "crypto";
|
|
4564
6172
|
function generateId() {
|
|
4565
6173
|
return randomUUID().replace(/-/g, "").slice(0, 12);
|
|
@@ -4585,21 +6193,25 @@ var ChatContext = class _ChatContext {
|
|
|
4585
6193
|
// -------------------------------------------------------------------------
|
|
4586
6194
|
// Add messages
|
|
4587
6195
|
// -------------------------------------------------------------------------
|
|
6196
|
+
/** Append a user message and return the created `ChatMessage`. */
|
|
4588
6197
|
addUser(content) {
|
|
4589
6198
|
const msg = createMessage("user", content);
|
|
4590
6199
|
this.items = [...this.items, msg];
|
|
4591
6200
|
return msg;
|
|
4592
6201
|
}
|
|
6202
|
+
/** Append an assistant message and return the created `ChatMessage`. */
|
|
4593
6203
|
addAssistant(content) {
|
|
4594
6204
|
const msg = createMessage("assistant", content);
|
|
4595
6205
|
this.items = [...this.items, msg];
|
|
4596
6206
|
return msg;
|
|
4597
6207
|
}
|
|
6208
|
+
/** Append a system message and return the created `ChatMessage`. */
|
|
4598
6209
|
addSystem(content) {
|
|
4599
6210
|
const msg = createMessage("system", content);
|
|
4600
6211
|
this.items = [...this.items, msg];
|
|
4601
6212
|
return msg;
|
|
4602
6213
|
}
|
|
6214
|
+
/** Append a tool-result message tied to a tool-call id. */
|
|
4603
6215
|
addToolResult(content, toolCallId) {
|
|
4604
6216
|
const msg = createMessage("tool", content, { toolCallId });
|
|
4605
6217
|
this.items = [...this.items, msg];
|
|
@@ -4608,13 +6220,16 @@ var ChatContext = class _ChatContext {
|
|
|
4608
6220
|
// -------------------------------------------------------------------------
|
|
4609
6221
|
// Access
|
|
4610
6222
|
// -------------------------------------------------------------------------
|
|
6223
|
+
/** Return a snapshot of all messages currently in the context. */
|
|
4611
6224
|
getMessages() {
|
|
4612
6225
|
return [...this.items];
|
|
4613
6226
|
}
|
|
6227
|
+
/** Return the last `n` messages (or `[]` when `n <= 0`). */
|
|
4614
6228
|
getLastN(n) {
|
|
4615
6229
|
if (n <= 0) return [];
|
|
4616
6230
|
return [...this.items.slice(-n)];
|
|
4617
6231
|
}
|
|
6232
|
+
/** Number of messages currently in the context. */
|
|
4618
6233
|
get length() {
|
|
4619
6234
|
return this.items.length;
|
|
4620
6235
|
}
|
|
@@ -4641,6 +6256,7 @@ var ChatContext = class _ChatContext {
|
|
|
4641
6256
|
// -------------------------------------------------------------------------
|
|
4642
6257
|
// Provider format conversion
|
|
4643
6258
|
// -------------------------------------------------------------------------
|
|
6259
|
+
/** Convert the conversation to the OpenAI Chat Completions message format. */
|
|
4644
6260
|
toOpenAI() {
|
|
4645
6261
|
return this.items.map((msg) => {
|
|
4646
6262
|
const result = {
|
|
@@ -4678,6 +6294,7 @@ var ChatContext = class _ChatContext {
|
|
|
4678
6294
|
// -------------------------------------------------------------------------
|
|
4679
6295
|
// Copy
|
|
4680
6296
|
// -------------------------------------------------------------------------
|
|
6297
|
+
/** Return a new `ChatContext` with the same messages (independent storage). */
|
|
4681
6298
|
copy() {
|
|
4682
6299
|
const ctx = new _ChatContext();
|
|
4683
6300
|
ctx.items = this.items.map((msg) => ({ ...msg }));
|
|
@@ -4686,9 +6303,11 @@ var ChatContext = class _ChatContext {
|
|
|
4686
6303
|
// -------------------------------------------------------------------------
|
|
4687
6304
|
// Serialization
|
|
4688
6305
|
// -------------------------------------------------------------------------
|
|
6306
|
+
/** Serialize the context to a JSON-safe object. */
|
|
4689
6307
|
toJSON() {
|
|
4690
6308
|
return { messages: [...this.items] };
|
|
4691
6309
|
}
|
|
6310
|
+
/** Reconstruct a `ChatContext` from the result of `toJSON()`. */
|
|
4692
6311
|
static fromJSON(data) {
|
|
4693
6312
|
const ctx = new _ChatContext();
|
|
4694
6313
|
ctx.items = (data.messages ?? []).map((msg) => Object.freeze({ ...msg }));
|
|
@@ -4697,6 +6316,7 @@ var ChatContext = class _ChatContext {
|
|
|
4697
6316
|
};
|
|
4698
6317
|
|
|
4699
6318
|
// src/services/ivr.ts
|
|
6319
|
+
init_esm_shims();
|
|
4700
6320
|
var DTMF_EVENTS = [
|
|
4701
6321
|
"0",
|
|
4702
6322
|
"1",
|
|
@@ -4772,16 +6392,19 @@ var TfidfLoopDetector = class {
|
|
|
4772
6392
|
this.similarityThreshold = similarityThreshold;
|
|
4773
6393
|
this.consecutiveThreshold = consecutiveThreshold;
|
|
4774
6394
|
}
|
|
6395
|
+
/** Forget all previously observed chunks and reset the consecutive-hit counter. */
|
|
4775
6396
|
reset() {
|
|
4776
6397
|
this.chunks = [];
|
|
4777
6398
|
this.consecutiveSimilar = 0;
|
|
4778
6399
|
}
|
|
6400
|
+
/** Record a new transcript chunk in the rolling window. */
|
|
4779
6401
|
addChunk(text) {
|
|
4780
6402
|
this.chunks.push({ text, vec: bagOfWords(text) });
|
|
4781
6403
|
if (this.chunks.length > this.windowSize) {
|
|
4782
6404
|
this.chunks = this.chunks.slice(-this.windowSize);
|
|
4783
6405
|
}
|
|
4784
6406
|
}
|
|
6407
|
+
/** Returns true once the most recent chunks look like a repeated IVR prompt. */
|
|
4785
6408
|
checkLoopDetection() {
|
|
4786
6409
|
if (this.chunks.length < 2) return false;
|
|
4787
6410
|
const last = this.chunks[this.chunks.length - 1];
|
|
@@ -4803,6 +6426,8 @@ var DebouncedCall = class {
|
|
|
4803
6426
|
this.callback = callback;
|
|
4804
6427
|
this.delayMs = delayMs;
|
|
4805
6428
|
}
|
|
6429
|
+
callback;
|
|
6430
|
+
delayMs;
|
|
4806
6431
|
timer = null;
|
|
4807
6432
|
schedule() {
|
|
4808
6433
|
this.cancel();
|
|
@@ -4848,13 +6473,16 @@ var IVRActivity = class {
|
|
|
4848
6473
|
this.maxSilenceDurationMs
|
|
4849
6474
|
);
|
|
4850
6475
|
}
|
|
6476
|
+
/** Begin tracking transcripts and silence; call once per call. */
|
|
4851
6477
|
async start() {
|
|
4852
6478
|
this.started = true;
|
|
4853
6479
|
}
|
|
6480
|
+
/** Stop tracking and cancel any pending silence timer. */
|
|
4854
6481
|
async stop() {
|
|
4855
6482
|
this.debouncedSilence.cancel();
|
|
4856
6483
|
this.started = false;
|
|
4857
6484
|
}
|
|
6485
|
+
/** Feed a final user-side transcript chunk into the loop detector. */
|
|
4858
6486
|
async onUserTranscribed(text) {
|
|
4859
6487
|
if (!this.started || !text) return;
|
|
4860
6488
|
if (this.loopDetector !== null) {
|
|
@@ -4871,14 +6499,17 @@ var IVRActivity = class {
|
|
|
4871
6499
|
}
|
|
4872
6500
|
}
|
|
4873
6501
|
}
|
|
6502
|
+
/** Record the current user-turn state (e.g. `"listening"`, `"away"`). */
|
|
4874
6503
|
noteUserState(state) {
|
|
4875
6504
|
this.currentUserState = state;
|
|
4876
6505
|
this.scheduleSilenceCheck();
|
|
4877
6506
|
}
|
|
6507
|
+
/** Record the current agent-turn state (e.g. `"idle"`, `"listening"`). */
|
|
4878
6508
|
noteAgentState(state) {
|
|
4879
6509
|
this.currentAgentState = state;
|
|
4880
6510
|
this.scheduleSilenceCheck();
|
|
4881
6511
|
}
|
|
6512
|
+
/** Tool definitions to expose to the LLM (currently only `send_dtmf_events`). */
|
|
4882
6513
|
get tools() {
|
|
4883
6514
|
return [this.buildSendDtmfTool()];
|
|
4884
6515
|
}
|
|
@@ -4952,10 +6583,11 @@ var IVRActivity = class {
|
|
|
4952
6583
|
}
|
|
4953
6584
|
};
|
|
4954
6585
|
|
|
4955
|
-
// src/
|
|
4956
|
-
|
|
4957
|
-
import
|
|
4958
|
-
import
|
|
6586
|
+
// src/audio/background-audio.ts
|
|
6587
|
+
init_esm_shims();
|
|
6588
|
+
import { promises as fs } from "fs";
|
|
6589
|
+
import path from "path";
|
|
6590
|
+
import { fileURLToPath } from "url";
|
|
4959
6591
|
var BuiltinAudioClip = {
|
|
4960
6592
|
CITY_AMBIENCE: "city-ambience.ogg",
|
|
4961
6593
|
FOREST_AMBIENCE: "forest-ambience.ogg",
|
|
@@ -4967,8 +6599,8 @@ var BuiltinAudioClip = {
|
|
|
4967
6599
|
};
|
|
4968
6600
|
function builtinClipPath(clip) {
|
|
4969
6601
|
const meta = typeof import.meta !== "undefined" ? import.meta : void 0;
|
|
4970
|
-
const here = meta?.url ?
|
|
4971
|
-
return
|
|
6602
|
+
const here = meta?.url ? path.dirname(fileURLToPath(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
|
|
6603
|
+
return path.resolve(here, "..", "resources", "audio", clip);
|
|
4972
6604
|
}
|
|
4973
6605
|
var INT16_MIN = -32768;
|
|
4974
6606
|
var INT16_MAX = 32767;
|
|
@@ -5137,7 +6769,7 @@ var BackgroundAudioPlayer = class {
|
|
|
5137
6769
|
return source.decode(source.path);
|
|
5138
6770
|
case "builtin": {
|
|
5139
6771
|
const p = builtinClipPath(source.clip);
|
|
5140
|
-
const header = await
|
|
6772
|
+
const header = await fs.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
|
|
5141
6773
|
if (header.toString("ascii") !== "OggS") {
|
|
5142
6774
|
throw new Error(`Bundled clip ${source.clip} is not a valid Ogg file`);
|
|
5143
6775
|
}
|
|
@@ -5169,6 +6801,7 @@ function isAudioConfig(value) {
|
|
|
5169
6801
|
}
|
|
5170
6802
|
|
|
5171
6803
|
// src/providers/twilio-adapter.ts
|
|
6804
|
+
init_esm_shims();
|
|
5172
6805
|
var TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
|
|
5173
6806
|
var TwilioAdapter = class _TwilioAdapter {
|
|
5174
6807
|
accountSid;
|
|
@@ -5183,8 +6816,8 @@ var TwilioAdapter = class _TwilioAdapter {
|
|
|
5183
6816
|
this.baseUrl = opts.region ? `https://api.${opts.region}.twilio.com/2010-04-01` : TWILIO_API_BASE;
|
|
5184
6817
|
this.authHeader = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
|
|
5185
6818
|
}
|
|
5186
|
-
async request(method,
|
|
5187
|
-
const url = `${this.baseUrl}/Accounts/${encodeURIComponent(this.accountSid)}${
|
|
6819
|
+
async request(method, path2, body) {
|
|
6820
|
+
const url = `${this.baseUrl}/Accounts/${encodeURIComponent(this.accountSid)}${path2}`;
|
|
5188
6821
|
const headers = { Authorization: this.authHeader };
|
|
5189
6822
|
if (body) headers["Content-Type"] = "application/x-www-form-urlencoded";
|
|
5190
6823
|
const response = await fetch(url, {
|
|
@@ -5195,7 +6828,7 @@ var TwilioAdapter = class _TwilioAdapter {
|
|
|
5195
6828
|
});
|
|
5196
6829
|
const text = await response.text();
|
|
5197
6830
|
if (!response.ok) {
|
|
5198
|
-
throw new Error(`Twilio ${method} ${
|
|
6831
|
+
throw new Error(`Twilio ${method} ${path2} failed: ${response.status} ${text}`);
|
|
5199
6832
|
}
|
|
5200
6833
|
if (!text) return {};
|
|
5201
6834
|
try {
|
|
@@ -5213,8 +6846,8 @@ var TwilioAdapter = class _TwilioAdapter {
|
|
|
5213
6846
|
const country = encodeURIComponent(opts.countryCode);
|
|
5214
6847
|
const queryParts = ["PageSize=1"];
|
|
5215
6848
|
if (opts.areaCode) queryParts.push(`AreaCode=${encodeURIComponent(opts.areaCode)}`);
|
|
5216
|
-
const
|
|
5217
|
-
const available = await this.request("GET",
|
|
6849
|
+
const path2 = `/AvailablePhoneNumbers/${country}/Local.json?${queryParts.join("&")}`;
|
|
6850
|
+
const available = await this.request("GET", path2);
|
|
5218
6851
|
const first = available.available_phone_numbers?.[0]?.phone_number;
|
|
5219
6852
|
if (!first) {
|
|
5220
6853
|
throw new Error(`TwilioAdapter: no numbers available for country ${opts.countryCode}`);
|
|
@@ -5297,6 +6930,7 @@ var TwilioAdapter = class _TwilioAdapter {
|
|
|
5297
6930
|
};
|
|
5298
6931
|
|
|
5299
6932
|
// src/providers/telnyx-adapter.ts
|
|
6933
|
+
init_esm_shims();
|
|
5300
6934
|
import { randomUUID as randomUUID2 } from "crypto";
|
|
5301
6935
|
var TELNYX_API_BASE = "https://api.telnyx.com/v2";
|
|
5302
6936
|
var TelnyxAdapter = class {
|
|
@@ -5308,8 +6942,8 @@ var TelnyxAdapter = class {
|
|
|
5308
6942
|
this.apiKey = apiKey;
|
|
5309
6943
|
this.connectionId = connectionId;
|
|
5310
6944
|
}
|
|
5311
|
-
async request(method,
|
|
5312
|
-
const url = `${this.baseUrl}${
|
|
6945
|
+
async request(method, path2, body) {
|
|
6946
|
+
const url = `${this.baseUrl}${path2}`;
|
|
5313
6947
|
const headers = {
|
|
5314
6948
|
Authorization: `Bearer ${this.apiKey}`
|
|
5315
6949
|
};
|
|
@@ -5322,7 +6956,7 @@ var TelnyxAdapter = class {
|
|
|
5322
6956
|
});
|
|
5323
6957
|
const text = await response.text();
|
|
5324
6958
|
if (!response.ok) {
|
|
5325
|
-
throw new Error(`Telnyx ${method} ${
|
|
6959
|
+
throw new Error(`Telnyx ${method} ${path2} failed: ${response.status} ${text}`);
|
|
5326
6960
|
}
|
|
5327
6961
|
if (!text) return {};
|
|
5328
6962
|
try {
|
|
@@ -5416,6 +7050,245 @@ var TelnyxAdapter = class {
|
|
|
5416
7050
|
}
|
|
5417
7051
|
}
|
|
5418
7052
|
};
|
|
7053
|
+
|
|
7054
|
+
// src/providers/telnyx-stt.ts
|
|
7055
|
+
init_esm_shims();
|
|
7056
|
+
import WebSocket7 from "ws";
|
|
7057
|
+
var TelnyxSTTSampleRate = {
|
|
7058
|
+
HZ_8000: 8e3,
|
|
7059
|
+
HZ_16000: 16e3,
|
|
7060
|
+
HZ_24000: 24e3
|
|
7061
|
+
};
|
|
7062
|
+
var TelnyxSTTInputFormat = {
|
|
7063
|
+
WAV: "wav"
|
|
7064
|
+
};
|
|
7065
|
+
var TELNYX_STT_WS_URL = "wss://api.telnyx.com/v2/speech-to-text/transcription";
|
|
7066
|
+
var DEFAULT_SAMPLE_RATE = TelnyxSTTSampleRate.HZ_16000;
|
|
7067
|
+
var NUM_CHANNELS = 1;
|
|
7068
|
+
function createStreamingWavHeader(sampleRate, numChannels) {
|
|
7069
|
+
const bytesPerSample = 2;
|
|
7070
|
+
const byteRate = sampleRate * numChannels * bytesPerSample;
|
|
7071
|
+
const blockAlign = numChannels * bytesPerSample;
|
|
7072
|
+
const dataSize = 2147483647;
|
|
7073
|
+
const fileSize = 36 + dataSize;
|
|
7074
|
+
const header = Buffer.alloc(44);
|
|
7075
|
+
header.write("RIFF", 0);
|
|
7076
|
+
header.writeUInt32LE(fileSize, 4);
|
|
7077
|
+
header.write("WAVE", 8);
|
|
7078
|
+
header.write("fmt ", 12);
|
|
7079
|
+
header.writeUInt32LE(16, 16);
|
|
7080
|
+
header.writeUInt16LE(1, 20);
|
|
7081
|
+
header.writeUInt16LE(numChannels, 22);
|
|
7082
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
7083
|
+
header.writeUInt32LE(byteRate, 28);
|
|
7084
|
+
header.writeUInt16LE(blockAlign, 32);
|
|
7085
|
+
header.writeUInt16LE(16, 34);
|
|
7086
|
+
header.write("data", 36);
|
|
7087
|
+
header.writeUInt32LE(dataSize, 40);
|
|
7088
|
+
return header;
|
|
7089
|
+
}
|
|
7090
|
+
var TelnyxSTT = class {
|
|
7091
|
+
constructor(apiKey, language = "en", transcriptionEngine = "telnyx", sampleRate = DEFAULT_SAMPLE_RATE, baseUrl = TELNYX_STT_WS_URL) {
|
|
7092
|
+
this.apiKey = apiKey;
|
|
7093
|
+
this.language = language;
|
|
7094
|
+
this.transcriptionEngine = transcriptionEngine;
|
|
7095
|
+
this.sampleRate = sampleRate;
|
|
7096
|
+
this.baseUrl = baseUrl;
|
|
7097
|
+
}
|
|
7098
|
+
apiKey;
|
|
7099
|
+
language;
|
|
7100
|
+
transcriptionEngine;
|
|
7101
|
+
sampleRate;
|
|
7102
|
+
baseUrl;
|
|
7103
|
+
ws = null;
|
|
7104
|
+
callbacks = [];
|
|
7105
|
+
headerSent = false;
|
|
7106
|
+
/** Open the streaming WebSocket and arm message handlers. */
|
|
7107
|
+
async connect() {
|
|
7108
|
+
const params = new URLSearchParams({
|
|
7109
|
+
transcription_engine: this.transcriptionEngine,
|
|
7110
|
+
language: this.language,
|
|
7111
|
+
input_format: "wav"
|
|
7112
|
+
});
|
|
7113
|
+
const url = `${this.baseUrl}?${params.toString()}`;
|
|
7114
|
+
this.ws = new WebSocket7(url, {
|
|
7115
|
+
headers: { Authorization: `Bearer ${this.apiKey}` }
|
|
7116
|
+
});
|
|
7117
|
+
await new Promise((resolve, reject) => {
|
|
7118
|
+
const timer = setTimeout(() => reject(new Error("Telnyx STT connect timeout")), 1e4);
|
|
7119
|
+
this.ws.once("open", () => {
|
|
7120
|
+
clearTimeout(timer);
|
|
7121
|
+
resolve();
|
|
7122
|
+
});
|
|
7123
|
+
this.ws.once("error", (err) => {
|
|
7124
|
+
clearTimeout(timer);
|
|
7125
|
+
reject(err);
|
|
7126
|
+
});
|
|
7127
|
+
});
|
|
7128
|
+
this.ws.on("message", (raw) => {
|
|
7129
|
+
let data;
|
|
7130
|
+
try {
|
|
7131
|
+
data = JSON.parse(raw.toString());
|
|
7132
|
+
} catch {
|
|
7133
|
+
return;
|
|
7134
|
+
}
|
|
7135
|
+
const text = (data.transcript ?? "").trim();
|
|
7136
|
+
if (!text) return;
|
|
7137
|
+
const transcript = {
|
|
7138
|
+
text,
|
|
7139
|
+
isFinal: Boolean(data.is_final),
|
|
7140
|
+
confidence: data.confidence ?? 0
|
|
7141
|
+
};
|
|
7142
|
+
for (const cb of this.callbacks) {
|
|
7143
|
+
cb(transcript);
|
|
7144
|
+
}
|
|
7145
|
+
});
|
|
7146
|
+
this.ws.on("error", (err) => {
|
|
7147
|
+
getLogger().warn(`TelnyxSTT WebSocket error: ${String(err)}`);
|
|
7148
|
+
});
|
|
7149
|
+
}
|
|
7150
|
+
/** Send a binary PCM16 audio chunk; emits the WAV header on the first call. */
|
|
7151
|
+
sendAudio(audio) {
|
|
7152
|
+
if (!this.ws || this.ws.readyState !== WebSocket7.OPEN) return;
|
|
7153
|
+
if (!this.headerSent) {
|
|
7154
|
+
const header = createStreamingWavHeader(this.sampleRate, NUM_CHANNELS);
|
|
7155
|
+
this.ws.send(header);
|
|
7156
|
+
this.headerSent = true;
|
|
7157
|
+
}
|
|
7158
|
+
this.ws.send(audio);
|
|
7159
|
+
}
|
|
7160
|
+
/** Register a transcript listener (max 10 concurrent listeners). */
|
|
7161
|
+
onTranscript(callback) {
|
|
7162
|
+
if (this.callbacks.length >= 10) {
|
|
7163
|
+
getLogger().warn("TelnyxSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
7164
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
7165
|
+
return;
|
|
7166
|
+
}
|
|
7167
|
+
this.callbacks.push(callback);
|
|
7168
|
+
}
|
|
7169
|
+
/** Close the streaming WebSocket. */
|
|
7170
|
+
close() {
|
|
7171
|
+
if (this.ws) {
|
|
7172
|
+
try {
|
|
7173
|
+
this.ws.close();
|
|
7174
|
+
} catch {
|
|
7175
|
+
}
|
|
7176
|
+
this.ws = null;
|
|
7177
|
+
}
|
|
7178
|
+
}
|
|
7179
|
+
};
|
|
7180
|
+
|
|
7181
|
+
// src/providers/telnyx-tts.ts
|
|
7182
|
+
init_esm_shims();
|
|
7183
|
+
import WebSocket8 from "ws";
|
|
7184
|
+
var TELNYX_TTS_WS_URL = "wss://api.telnyx.com/v2/text-to-speech/speech";
|
|
7185
|
+
var TelnyxTTSVoice = {
|
|
7186
|
+
NATURAL_HD_ASTRA: "Telnyx.NaturalHD.astra",
|
|
7187
|
+
NATURAL_HD_LUNA: "Telnyx.NaturalHD.luna",
|
|
7188
|
+
NATURAL_HD_ATLAS: "Telnyx.NaturalHD.atlas",
|
|
7189
|
+
NATURAL_HD_HERA: "Telnyx.NaturalHD.hera",
|
|
7190
|
+
NATURAL_HD_ZEUS: "Telnyx.NaturalHD.zeus"
|
|
7191
|
+
};
|
|
7192
|
+
var TelnyxTTSSampleRate = {
|
|
7193
|
+
HZ_8000: 8e3,
|
|
7194
|
+
HZ_16000: 16e3,
|
|
7195
|
+
HZ_24000: 24e3
|
|
7196
|
+
};
|
|
7197
|
+
var DEFAULT_VOICE = TelnyxTTSVoice.NATURAL_HD_ASTRA;
|
|
7198
|
+
var TelnyxTTS = class {
|
|
7199
|
+
constructor(apiKey, voice = DEFAULT_VOICE, baseUrl = TELNYX_TTS_WS_URL) {
|
|
7200
|
+
this.apiKey = apiKey;
|
|
7201
|
+
this.voice = voice;
|
|
7202
|
+
this.baseUrl = baseUrl;
|
|
7203
|
+
}
|
|
7204
|
+
apiKey;
|
|
7205
|
+
voice;
|
|
7206
|
+
baseUrl;
|
|
7207
|
+
/** Collect every audio chunk into a single Buffer. */
|
|
7208
|
+
async synthesize(text) {
|
|
7209
|
+
const chunks = [];
|
|
7210
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
7211
|
+
chunks.push(chunk);
|
|
7212
|
+
}
|
|
7213
|
+
return Buffer.concat(chunks);
|
|
7214
|
+
}
|
|
7215
|
+
/**
|
|
7216
|
+
* Stream MP3-encoded audio chunks as they arrive from Telnyx.
|
|
7217
|
+
*
|
|
7218
|
+
* The server sends JSON frames of the shape `{"audio": "<base64-mp3>"}`.
|
|
7219
|
+
* Callers that need PCM must decode the MP3 bytes (e.g. via `ffmpeg`).
|
|
7220
|
+
*/
|
|
7221
|
+
async *synthesizeStream(text) {
|
|
7222
|
+
const url = `${this.baseUrl}?voice=${encodeURIComponent(this.voice)}`;
|
|
7223
|
+
const ws = new WebSocket8(url, {
|
|
7224
|
+
headers: { Authorization: `Bearer ${this.apiKey}` }
|
|
7225
|
+
});
|
|
7226
|
+
await new Promise((resolve, reject) => {
|
|
7227
|
+
const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
|
|
7228
|
+
ws.once("open", () => {
|
|
7229
|
+
clearTimeout(timer);
|
|
7230
|
+
resolve();
|
|
7231
|
+
});
|
|
7232
|
+
ws.once("error", (err) => {
|
|
7233
|
+
clearTimeout(timer);
|
|
7234
|
+
reject(err);
|
|
7235
|
+
});
|
|
7236
|
+
});
|
|
7237
|
+
const queue = [];
|
|
7238
|
+
const waiters = [];
|
|
7239
|
+
function push(item) {
|
|
7240
|
+
const w = waiters.shift();
|
|
7241
|
+
if (w) {
|
|
7242
|
+
w(item);
|
|
7243
|
+
} else {
|
|
7244
|
+
queue.push(item);
|
|
7245
|
+
}
|
|
7246
|
+
}
|
|
7247
|
+
ws.on("message", (raw) => {
|
|
7248
|
+
let data;
|
|
7249
|
+
try {
|
|
7250
|
+
data = JSON.parse(raw.toString());
|
|
7251
|
+
} catch {
|
|
7252
|
+
getLogger().warn("TelnyxTTS: received invalid JSON");
|
|
7253
|
+
return;
|
|
7254
|
+
}
|
|
7255
|
+
const audioB64 = data.audio;
|
|
7256
|
+
if (!audioB64) return;
|
|
7257
|
+
try {
|
|
7258
|
+
const audioBytes = Buffer.from(audioB64, "base64");
|
|
7259
|
+
if (audioBytes.length > 0) {
|
|
7260
|
+
push(audioBytes);
|
|
7261
|
+
}
|
|
7262
|
+
} catch {
|
|
7263
|
+
}
|
|
7264
|
+
});
|
|
7265
|
+
ws.on("close", () => {
|
|
7266
|
+
push(null);
|
|
7267
|
+
});
|
|
7268
|
+
ws.on("error", (err) => {
|
|
7269
|
+
push({ error: err instanceof Error ? err : new Error(String(err)) });
|
|
7270
|
+
});
|
|
7271
|
+
ws.send(JSON.stringify({ text: " " }));
|
|
7272
|
+
ws.send(JSON.stringify({ text }));
|
|
7273
|
+
ws.send(JSON.stringify({ text: "" }));
|
|
7274
|
+
try {
|
|
7275
|
+
while (true) {
|
|
7276
|
+
const item = queue.length > 0 ? queue.shift() : await new Promise((resolve) => waiters.push(resolve));
|
|
7277
|
+
if (item === null) return;
|
|
7278
|
+
if (typeof item === "object" && "error" in item) throw item.error;
|
|
7279
|
+
yield item;
|
|
7280
|
+
}
|
|
7281
|
+
} finally {
|
|
7282
|
+
try {
|
|
7283
|
+
ws.close();
|
|
7284
|
+
} catch {
|
|
7285
|
+
}
|
|
7286
|
+
}
|
|
7287
|
+
}
|
|
7288
|
+
};
|
|
7289
|
+
|
|
7290
|
+
// src/observability/index.ts
|
|
7291
|
+
init_esm_shims();
|
|
5419
7292
|
export {
|
|
5420
7293
|
AllProvidersFailedError,
|
|
5421
7294
|
LLM2 as AnthropicLLM,
|
|
@@ -5425,7 +7298,7 @@ export {
|
|
|
5425
7298
|
BuiltinAudioClip,
|
|
5426
7299
|
CallMetricsAccumulator,
|
|
5427
7300
|
STT4 as CartesiaSTT,
|
|
5428
|
-
|
|
7301
|
+
TTS4 as CartesiaTTS,
|
|
5429
7302
|
LLM4 as CerebrasLLM,
|
|
5430
7303
|
ChatContext,
|
|
5431
7304
|
CloudflareTunnel,
|
|
@@ -5437,6 +7310,8 @@ export {
|
|
|
5437
7310
|
ConvAI as ElevenLabsConvAI,
|
|
5438
7311
|
ElevenLabsConvAIAdapter,
|
|
5439
7312
|
TTS as ElevenLabsTTS,
|
|
7313
|
+
TTS2 as ElevenLabsWebSocketTTS,
|
|
7314
|
+
ErrorCode,
|
|
5440
7315
|
EventBus,
|
|
5441
7316
|
FallbackLLMProvider,
|
|
5442
7317
|
GEMINI_DEFAULT_INPUT_SR,
|
|
@@ -5446,15 +7321,16 @@ export {
|
|
|
5446
7321
|
LLM3 as GroqLLM,
|
|
5447
7322
|
Guardrail,
|
|
5448
7323
|
IVRActivity,
|
|
7324
|
+
TTS7 as InworldTTS,
|
|
5449
7325
|
LLMLoop,
|
|
5450
|
-
|
|
7326
|
+
TTS6 as LMNTTTS,
|
|
5451
7327
|
MetricsStore,
|
|
5452
7328
|
Ngrok,
|
|
5453
7329
|
LLM as OpenAILLM,
|
|
5454
7330
|
OpenAILLMProvider,
|
|
5455
7331
|
Realtime as OpenAIRealtime,
|
|
5456
7332
|
OpenAIRealtimeAdapter,
|
|
5457
|
-
|
|
7333
|
+
TTS3 as OpenAITTS,
|
|
5458
7334
|
STT3 as OpenAITranscribeSTT,
|
|
5459
7335
|
PartialStreamError,
|
|
5460
7336
|
Patter,
|
|
@@ -5466,7 +7342,7 @@ export {
|
|
|
5466
7342
|
ProvisionError,
|
|
5467
7343
|
RateLimitError,
|
|
5468
7344
|
RemoteMessageHandler,
|
|
5469
|
-
|
|
7345
|
+
TTS5 as RimeTTS,
|
|
5470
7346
|
SPAN_BARGEIN,
|
|
5471
7347
|
SPAN_CALL,
|
|
5472
7348
|
SPAN_ENDPOINT,
|
|
@@ -5477,10 +7353,23 @@ export {
|
|
|
5477
7353
|
SentenceChunker,
|
|
5478
7354
|
SileroVAD,
|
|
5479
7355
|
STT5 as SonioxSTT,
|
|
7356
|
+
SpeechEvents,
|
|
7357
|
+
SpeechmaticsAudioEncoding,
|
|
7358
|
+
SpeechmaticsOperatingPoint,
|
|
7359
|
+
STT7 as SpeechmaticsSTT,
|
|
7360
|
+
SpeechmaticsSampleRate,
|
|
7361
|
+
SpeechmaticsServerMessage,
|
|
7362
|
+
TurnDetectionMode as SpeechmaticsTurnDetectionMode,
|
|
5480
7363
|
StatefulResampler,
|
|
5481
7364
|
Static as StaticTunnel,
|
|
5482
7365
|
Carrier2 as Telnyx,
|
|
5483
7366
|
TelnyxAdapter,
|
|
7367
|
+
TelnyxSTT,
|
|
7368
|
+
TelnyxSTTInputFormat,
|
|
7369
|
+
TelnyxSTTSampleRate,
|
|
7370
|
+
TelnyxTTS,
|
|
7371
|
+
TelnyxTTSSampleRate,
|
|
7372
|
+
TelnyxTTSVoice,
|
|
5484
7373
|
TestSession,
|
|
5485
7374
|
TfidfLoopDetector,
|
|
5486
7375
|
Tool,
|
|
@@ -5501,6 +7390,7 @@ export {
|
|
|
5501
7390
|
cartesia,
|
|
5502
7391
|
createResampler16kTo8k,
|
|
5503
7392
|
createResampler24kTo16k,
|
|
7393
|
+
createResampler24kTo8k,
|
|
5504
7394
|
createResampler8kTo16k,
|
|
5505
7395
|
deepgram,
|
|
5506
7396
|
defineTool,
|