getpatter 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +136 -162
- package/dist/carrier-config-CPG5CROM.mjs +84 -0
- package/dist/{chunk-35EVXMGB.mjs → chunk-757NVN4L.mjs} +396 -458
- package/dist/cli.js +92 -5
- package/dist/index.d.mts +901 -241
- package/dist/index.d.ts +901 -241
- package/dist/index.js +1763 -921
- package/dist/index.mjs +1240 -419
- package/dist/{test-mode-RH65MMSP.mjs → test-mode-YFOL2HYH.mjs} +1 -1
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -10,18 +10,15 @@ import {
|
|
|
10
10
|
DEFAULT_PRICING,
|
|
11
11
|
DeepgramSTT,
|
|
12
12
|
ElevenLabsConvAIAdapter,
|
|
13
|
-
ElevenLabsTTS,
|
|
14
13
|
EmbeddedServer,
|
|
15
14
|
LLMLoop,
|
|
16
15
|
MetricsStore,
|
|
17
16
|
OpenAILLMProvider,
|
|
18
17
|
OpenAIRealtimeAdapter,
|
|
19
|
-
OpenAITTS,
|
|
20
18
|
PipelineHookExecutor,
|
|
21
19
|
RemoteMessageHandler,
|
|
22
20
|
SentenceChunker,
|
|
23
21
|
TestSession,
|
|
24
|
-
WhisperSTT,
|
|
25
22
|
calculateRealtimeCost,
|
|
26
23
|
calculateSttCost,
|
|
27
24
|
calculateTelephonyCost,
|
|
@@ -39,7 +36,7 @@ import {
|
|
|
39
36
|
resample16kTo8k,
|
|
40
37
|
resample24kTo16k,
|
|
41
38
|
resample8kTo16k
|
|
42
|
-
} from "./chunk-
|
|
39
|
+
} from "./chunk-757NVN4L.mjs";
|
|
43
40
|
import {
|
|
44
41
|
getLogger,
|
|
45
42
|
setLogger
|
|
@@ -186,49 +183,86 @@ var PatterConnection = class {
|
|
|
186
183
|
}
|
|
187
184
|
};
|
|
188
185
|
|
|
189
|
-
// src/
|
|
190
|
-
var
|
|
191
|
-
|
|
186
|
+
// src/engines/openai.ts
|
|
187
|
+
var Realtime = class {
|
|
188
|
+
kind = "openai_realtime";
|
|
192
189
|
apiKey;
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
190
|
+
model;
|
|
191
|
+
voice;
|
|
192
|
+
constructor(opts = {}) {
|
|
193
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
194
|
+
if (!key) {
|
|
195
|
+
throw new Error(
|
|
196
|
+
"OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
this.apiKey = key;
|
|
200
|
+
this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
|
|
201
|
+
this.voice = opts.voice ?? "alloy";
|
|
201
202
|
}
|
|
202
203
|
};
|
|
203
|
-
|
|
204
|
-
|
|
204
|
+
|
|
205
|
+
// src/engines/elevenlabs.ts
|
|
206
|
+
var ConvAI = class {
|
|
207
|
+
kind = "elevenlabs_convai";
|
|
205
208
|
apiKey;
|
|
209
|
+
agentId;
|
|
206
210
|
voice;
|
|
207
|
-
constructor(
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
+
constructor(opts = {}) {
|
|
212
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
213
|
+
const agent = opts.agentId ?? process.env.ELEVENLABS_AGENT_ID;
|
|
214
|
+
if (!key) {
|
|
215
|
+
throw new Error(
|
|
216
|
+
"ElevenLabs ConvAI requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
217
|
+
);
|
|
218
|
+
}
|
|
219
|
+
if (!agent) {
|
|
220
|
+
throw new Error(
|
|
221
|
+
"ElevenLabs ConvAI requires an agentId. Pass { agentId: 'agent_...' } or set ELEVENLABS_AGENT_ID in the environment."
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
this.apiKey = key;
|
|
225
|
+
this.agentId = agent;
|
|
226
|
+
this.voice = opts.voice;
|
|
211
227
|
}
|
|
212
|
-
|
|
213
|
-
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
// src/tunnels/index.ts
|
|
231
|
+
var CloudflareTunnel = class {
|
|
232
|
+
kind = "cloudflare";
|
|
233
|
+
};
|
|
234
|
+
var Static = class {
|
|
235
|
+
kind = "static";
|
|
236
|
+
hostname;
|
|
237
|
+
constructor(opts) {
|
|
238
|
+
if (!opts.hostname) {
|
|
239
|
+
throw new Error("Static tunnel requires a non-empty hostname.");
|
|
240
|
+
}
|
|
241
|
+
this.hostname = opts.hostname;
|
|
214
242
|
}
|
|
215
243
|
};
|
|
216
|
-
function deepgram(opts) {
|
|
217
|
-
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en");
|
|
218
|
-
}
|
|
219
|
-
function whisper(opts) {
|
|
220
|
-
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
221
|
-
}
|
|
222
|
-
function elevenlabs(opts) {
|
|
223
|
-
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
224
|
-
}
|
|
225
|
-
function openaiTts(opts) {
|
|
226
|
-
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
227
|
-
}
|
|
228
244
|
|
|
229
245
|
// src/client.ts
|
|
230
246
|
var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
|
|
231
247
|
var DEFAULT_REST_URL = "https://api.getpatter.com";
|
|
248
|
+
function sttConfigToDict(cfg) {
|
|
249
|
+
const out = {
|
|
250
|
+
provider: cfg.provider,
|
|
251
|
+
api_key: cfg.apiKey,
|
|
252
|
+
language: cfg.language
|
|
253
|
+
};
|
|
254
|
+
if (cfg.options) out.options = { ...cfg.options };
|
|
255
|
+
return out;
|
|
256
|
+
}
|
|
257
|
+
function ttsConfigToDict(cfg) {
|
|
258
|
+
const out = {
|
|
259
|
+
provider: cfg.provider,
|
|
260
|
+
api_key: cfg.apiKey,
|
|
261
|
+
voice: cfg.voice
|
|
262
|
+
};
|
|
263
|
+
if (cfg.options) out.options = { ...cfg.options };
|
|
264
|
+
return out;
|
|
265
|
+
}
|
|
232
266
|
var Patter = class {
|
|
233
267
|
apiKey;
|
|
234
268
|
backendUrl;
|
|
@@ -239,20 +273,39 @@ var Patter = class {
|
|
|
239
273
|
embeddedServer = null;
|
|
240
274
|
tunnelHandle = null;
|
|
241
275
|
constructor(options) {
|
|
242
|
-
|
|
276
|
+
const hasCarrier = "carrier" in options && options.carrier !== void 0;
|
|
277
|
+
const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
|
|
278
|
+
if (isLocal) {
|
|
243
279
|
const local = options;
|
|
244
280
|
if (!local.phoneNumber) {
|
|
245
281
|
throw new Error("Local mode requires phoneNumber");
|
|
246
282
|
}
|
|
247
|
-
if (!local.
|
|
248
|
-
throw new Error(
|
|
283
|
+
if (!local.carrier) {
|
|
284
|
+
throw new Error(
|
|
285
|
+
"Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
|
|
286
|
+
);
|
|
249
287
|
}
|
|
250
|
-
|
|
251
|
-
|
|
288
|
+
const carrier = local.carrier;
|
|
289
|
+
const tunnel = local.tunnel;
|
|
290
|
+
let tunnelWebhookUrl;
|
|
291
|
+
if (tunnel instanceof Static) {
|
|
292
|
+
if (local.webhookUrl) {
|
|
293
|
+
throw new Error(
|
|
294
|
+
"Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
|
|
295
|
+
);
|
|
296
|
+
}
|
|
297
|
+
tunnelWebhookUrl = tunnel.hostname;
|
|
252
298
|
}
|
|
253
299
|
this.mode = "local";
|
|
254
|
-
const
|
|
255
|
-
|
|
300
|
+
const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
|
|
301
|
+
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
302
|
+
this.localConfig = {
|
|
303
|
+
carrier,
|
|
304
|
+
phoneNumber: local.phoneNumber,
|
|
305
|
+
webhookUrl: normalizedWebhook,
|
|
306
|
+
tunnel: local.tunnel,
|
|
307
|
+
openaiKey: local.openaiKey
|
|
308
|
+
};
|
|
256
309
|
this.apiKey = "";
|
|
257
310
|
this.backendUrl = DEFAULT_BACKEND_URL2;
|
|
258
311
|
this.restUrl = DEFAULT_REST_URL;
|
|
@@ -269,25 +322,55 @@ var Patter = class {
|
|
|
269
322
|
}
|
|
270
323
|
// === Local mode ===
|
|
271
324
|
agent(opts) {
|
|
272
|
-
|
|
325
|
+
let working = { ...opts };
|
|
326
|
+
if (opts.engine) {
|
|
327
|
+
if (opts.provider) {
|
|
328
|
+
throw new Error(
|
|
329
|
+
"Cannot pass both `engine:` and `provider:`. Use one (engine is preferred)."
|
|
330
|
+
);
|
|
331
|
+
}
|
|
332
|
+
const engine = opts.engine;
|
|
333
|
+
if (engine instanceof Realtime) {
|
|
334
|
+
working = {
|
|
335
|
+
...working,
|
|
336
|
+
provider: "openai_realtime",
|
|
337
|
+
model: working.model ?? engine.model,
|
|
338
|
+
voice: working.voice ?? engine.voice
|
|
339
|
+
};
|
|
340
|
+
if (this.localConfig && !this.localConfig.openaiKey) {
|
|
341
|
+
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
342
|
+
}
|
|
343
|
+
} else if (engine instanceof ConvAI) {
|
|
344
|
+
working = {
|
|
345
|
+
...working,
|
|
346
|
+
provider: "elevenlabs_convai",
|
|
347
|
+
voice: working.voice ?? engine.voice
|
|
348
|
+
};
|
|
349
|
+
} else {
|
|
350
|
+
throw new Error(
|
|
351
|
+
"Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
|
|
352
|
+
);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
if (working.provider) {
|
|
273
356
|
const valid = ["openai_realtime", "elevenlabs_convai", "pipeline"];
|
|
274
|
-
if (!valid.includes(
|
|
275
|
-
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${
|
|
357
|
+
if (!valid.includes(working.provider)) {
|
|
358
|
+
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
|
|
276
359
|
}
|
|
277
360
|
}
|
|
278
|
-
if (
|
|
279
|
-
if (!Array.isArray(
|
|
361
|
+
if (working.tools) {
|
|
362
|
+
if (!Array.isArray(working.tools)) {
|
|
280
363
|
throw new TypeError("tools must be an array");
|
|
281
364
|
}
|
|
282
|
-
|
|
283
|
-
if (!
|
|
284
|
-
if (!
|
|
365
|
+
working.tools.forEach((tool2, i) => {
|
|
366
|
+
if (!tool2.name) throw new Error(`tools[${i}] missing required 'name' field`);
|
|
367
|
+
if (!tool2.webhookUrl && !tool2.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
|
|
285
368
|
});
|
|
286
369
|
}
|
|
287
|
-
if (
|
|
370
|
+
if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
|
|
288
371
|
throw new TypeError("variables must be an object");
|
|
289
372
|
}
|
|
290
|
-
return
|
|
373
|
+
return working;
|
|
291
374
|
}
|
|
292
375
|
async serve(opts) {
|
|
293
376
|
if (this.mode !== "local" || !this.localConfig) {
|
|
@@ -310,10 +393,14 @@ var Patter = class {
|
|
|
310
393
|
}
|
|
311
394
|
let webhookUrl = this.localConfig.webhookUrl ?? "";
|
|
312
395
|
const port = opts.port ?? 8e3;
|
|
313
|
-
|
|
396
|
+
const ctorTunnel = this.localConfig.tunnel;
|
|
397
|
+
const wantsCloudflaredFromServe = opts.tunnel === true;
|
|
398
|
+
const wantsCloudflaredFromCtor = ctorTunnel === true || ctorTunnel instanceof CloudflareTunnel;
|
|
399
|
+
const wantsCloudflared = wantsCloudflaredFromServe || wantsCloudflaredFromCtor;
|
|
400
|
+
if (wantsCloudflared && webhookUrl) {
|
|
314
401
|
throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
|
|
315
402
|
}
|
|
316
|
-
if (
|
|
403
|
+
if (wantsCloudflared) {
|
|
317
404
|
const { startTunnel: startTunnel2 } = await import("./tunnel-BL7A7GXW.mjs");
|
|
318
405
|
this.tunnelHandle = await startTunnel2(port);
|
|
319
406
|
webhookUrl = this.tunnelHandle.hostname;
|
|
@@ -323,17 +410,29 @@ var Patter = class {
|
|
|
323
410
|
"No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
|
|
324
411
|
);
|
|
325
412
|
}
|
|
413
|
+
const carrier = this.localConfig.carrier;
|
|
414
|
+
const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
|
|
415
|
+
const { autoConfigureCarrier } = await import("./carrier-config-CPG5CROM.mjs");
|
|
416
|
+
await autoConfigureCarrier({
|
|
417
|
+
telephonyProvider,
|
|
418
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
419
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
420
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
421
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
422
|
+
phoneNumber: this.localConfig.phoneNumber,
|
|
423
|
+
webhookHost: webhookUrl
|
|
424
|
+
});
|
|
326
425
|
this.embeddedServer = new EmbeddedServer(
|
|
327
426
|
{
|
|
328
|
-
twilioSid:
|
|
329
|
-
twilioToken:
|
|
427
|
+
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
428
|
+
twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
|
|
330
429
|
openaiKey: this.localConfig.openaiKey,
|
|
331
430
|
phoneNumber: this.localConfig.phoneNumber,
|
|
332
431
|
webhookUrl,
|
|
333
|
-
telephonyProvider
|
|
334
|
-
telnyxKey:
|
|
335
|
-
telnyxConnectionId:
|
|
336
|
-
telnyxPublicKey:
|
|
432
|
+
telephonyProvider,
|
|
433
|
+
telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
|
|
434
|
+
telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
|
|
435
|
+
telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
|
|
337
436
|
},
|
|
338
437
|
opts.agent,
|
|
339
438
|
opts.onCallStart,
|
|
@@ -353,7 +452,7 @@ var Patter = class {
|
|
|
353
452
|
if (this.mode !== "local") {
|
|
354
453
|
throw new Error("test() is only available in local mode");
|
|
355
454
|
}
|
|
356
|
-
const { TestSession: TestSession2 } = await import("./test-mode-
|
|
455
|
+
const { TestSession: TestSession2 } = await import("./test-mode-YFOL2HYH.mjs");
|
|
357
456
|
const session = new TestSession2();
|
|
358
457
|
await session.run({
|
|
359
458
|
agent: opts.agent,
|
|
@@ -394,32 +493,51 @@ var Patter = class {
|
|
|
394
493
|
if (!this.localConfig) {
|
|
395
494
|
throw new Error("local config missing");
|
|
396
495
|
}
|
|
397
|
-
const { phoneNumber, webhookUrl,
|
|
398
|
-
if (
|
|
399
|
-
const telnyxKey =
|
|
400
|
-
const connectionId =
|
|
496
|
+
const { phoneNumber, webhookUrl, carrier } = this.localConfig;
|
|
497
|
+
if (carrier.kind === "telnyx") {
|
|
498
|
+
const telnyxKey = carrier.apiKey;
|
|
499
|
+
const connectionId = carrier.connectionId;
|
|
401
500
|
const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
|
|
501
|
+
const telnyxPayload = {
|
|
502
|
+
connection_id: connectionId,
|
|
503
|
+
from: phoneNumber,
|
|
504
|
+
to: localOpts.to,
|
|
505
|
+
stream_url: streamUrl,
|
|
506
|
+
stream_track: "both_tracks"
|
|
507
|
+
};
|
|
508
|
+
if (localOpts.ringTimeout !== void 0) {
|
|
509
|
+
telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
|
|
510
|
+
}
|
|
402
511
|
const response2 = await fetch("https://api.telnyx.com/v2/calls", {
|
|
403
512
|
method: "POST",
|
|
404
513
|
headers: {
|
|
405
514
|
"Content-Type": "application/json",
|
|
406
515
|
Authorization: `Bearer ${telnyxKey}`
|
|
407
516
|
},
|
|
408
|
-
body: JSON.stringify(
|
|
409
|
-
connection_id: connectionId,
|
|
410
|
-
from: phoneNumber,
|
|
411
|
-
to: localOpts.to,
|
|
412
|
-
stream_url: streamUrl,
|
|
413
|
-
stream_track: "both_tracks"
|
|
414
|
-
})
|
|
517
|
+
body: JSON.stringify(telnyxPayload)
|
|
415
518
|
});
|
|
416
519
|
if (!response2.ok) {
|
|
417
520
|
throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
|
|
418
521
|
}
|
|
522
|
+
if (this.embeddedServer) {
|
|
523
|
+
try {
|
|
524
|
+
const body = await response2.clone().json();
|
|
525
|
+
const callId = body.data?.call_control_id;
|
|
526
|
+
if (callId) {
|
|
527
|
+
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
528
|
+
call_id: callId,
|
|
529
|
+
caller: phoneNumber,
|
|
530
|
+
callee: localOpts.to,
|
|
531
|
+
direction: "outbound"
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
} catch {
|
|
535
|
+
}
|
|
536
|
+
}
|
|
419
537
|
return;
|
|
420
538
|
}
|
|
421
|
-
const twilioSid =
|
|
422
|
-
const twilioToken =
|
|
539
|
+
const twilioSid = carrier.accountSid;
|
|
540
|
+
const twilioToken = carrier.authToken;
|
|
423
541
|
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
424
542
|
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
425
543
|
const params = new URLSearchParams({
|
|
@@ -427,13 +545,19 @@ var Patter = class {
|
|
|
427
545
|
From: phoneNumber,
|
|
428
546
|
Url: `https://${webhookUrl}/webhooks/twilio/voice`,
|
|
429
547
|
StatusCallback: statusCallbackUrl,
|
|
430
|
-
StatusCallbackMethod: "POST"
|
|
548
|
+
StatusCallbackMethod: "POST",
|
|
549
|
+
// Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
|
|
550
|
+
// transitions even when media never arrives.
|
|
551
|
+
StatusCallbackEvent: "initiated ringing answered completed"
|
|
431
552
|
});
|
|
432
553
|
if (localOpts.machineDetection) {
|
|
433
554
|
params.append("MachineDetection", "DetectMessageEnd");
|
|
434
555
|
params.append("AsyncAmd", "true");
|
|
435
556
|
params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
|
|
436
557
|
}
|
|
558
|
+
if (localOpts.ringTimeout !== void 0) {
|
|
559
|
+
params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
|
|
560
|
+
}
|
|
437
561
|
if (localOpts.voicemailMessage && this.embeddedServer) {
|
|
438
562
|
this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
|
|
439
563
|
}
|
|
@@ -448,6 +572,21 @@ var Patter = class {
|
|
|
448
572
|
if (!response.ok) {
|
|
449
573
|
throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
|
|
450
574
|
}
|
|
575
|
+
if (this.embeddedServer) {
|
|
576
|
+
try {
|
|
577
|
+
const body = await response.clone().json();
|
|
578
|
+
const callSid = body.sid;
|
|
579
|
+
if (callSid) {
|
|
580
|
+
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
581
|
+
call_id: callSid,
|
|
582
|
+
caller: phoneNumber,
|
|
583
|
+
callee: localOpts.to,
|
|
584
|
+
direction: "outbound"
|
|
585
|
+
});
|
|
586
|
+
}
|
|
587
|
+
} catch {
|
|
588
|
+
}
|
|
589
|
+
}
|
|
451
590
|
return;
|
|
452
591
|
}
|
|
453
592
|
const cloudOpts = options;
|
|
@@ -530,61 +669,6 @@ var Patter = class {
|
|
|
530
669
|
const data = await response.json();
|
|
531
670
|
return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
|
|
532
671
|
}
|
|
533
|
-
// Provider helpers
|
|
534
|
-
static deepgram = deepgram;
|
|
535
|
-
static whisper = whisper;
|
|
536
|
-
static elevenlabs = elevenlabs;
|
|
537
|
-
static openaiTts = openaiTts;
|
|
538
|
-
static guardrail(opts) {
|
|
539
|
-
return {
|
|
540
|
-
name: opts.name,
|
|
541
|
-
blockedTerms: opts.blockedTerms,
|
|
542
|
-
check: opts.check,
|
|
543
|
-
replacement: opts.replacement ?? "I'm sorry, I can't respond to that."
|
|
544
|
-
};
|
|
545
|
-
}
|
|
546
|
-
/**
|
|
547
|
-
* Create a tool definition for use with `agent({ tools: [...] })`.
|
|
548
|
-
*
|
|
549
|
-
* Either `handler` (a function) or `webhookUrl` must be provided.
|
|
550
|
-
*
|
|
551
|
-
* @param opts.name - Tool name (visible to the LLM).
|
|
552
|
-
* @param opts.description - What the tool does (visible to the LLM).
|
|
553
|
-
* @param opts.parameters - JSON Schema for tool arguments.
|
|
554
|
-
* @param opts.handler - Async function called in-process when the LLM invokes the tool.
|
|
555
|
-
* @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
|
|
556
|
-
*
|
|
557
|
-
* @example
|
|
558
|
-
* ```ts
|
|
559
|
-
* phone.agent({
|
|
560
|
-
* systemPrompt: 'You are a pizza bot.',
|
|
561
|
-
* tools: [
|
|
562
|
-
* Patter.tool({
|
|
563
|
-
* name: 'check_menu',
|
|
564
|
-
* description: 'Check available menu items',
|
|
565
|
-
* handler: async (args) => JSON.stringify({ items: ['margherita'] }),
|
|
566
|
-
* }),
|
|
567
|
-
* ],
|
|
568
|
-
* });
|
|
569
|
-
* ```
|
|
570
|
-
*/
|
|
571
|
-
static tool(opts) {
|
|
572
|
-
if (!opts.handler && !opts.webhookUrl) {
|
|
573
|
-
throw new Error("tool() requires either handler or webhookUrl");
|
|
574
|
-
}
|
|
575
|
-
const t = {
|
|
576
|
-
name: opts.name,
|
|
577
|
-
description: opts.description ?? "",
|
|
578
|
-
parameters: opts.parameters ?? { type: "object", properties: {} }
|
|
579
|
-
};
|
|
580
|
-
if (opts.handler) {
|
|
581
|
-
t.handler = opts.handler;
|
|
582
|
-
}
|
|
583
|
-
if (opts.webhookUrl) {
|
|
584
|
-
t.webhookUrl = opts.webhookUrl;
|
|
585
|
-
}
|
|
586
|
-
return t;
|
|
587
|
-
}
|
|
588
672
|
// Internal
|
|
589
673
|
async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
|
|
590
674
|
const credentials = { api_key: providerKey };
|
|
@@ -600,8 +684,8 @@ var Patter = class {
|
|
|
600
684
|
provider,
|
|
601
685
|
provider_credentials: credentials,
|
|
602
686
|
country,
|
|
603
|
-
stt_config: stt?.
|
|
604
|
-
tts_config: tts?.
|
|
687
|
+
stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
|
|
688
|
+
tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
|
|
605
689
|
})
|
|
606
690
|
});
|
|
607
691
|
if (response.status === 409) return;
|
|
@@ -674,6 +758,62 @@ function filterForTTS(text) {
|
|
|
674
758
|
return filterEmoji(filterMarkdown(text));
|
|
675
759
|
}
|
|
676
760
|
|
|
761
|
+
// src/providers.ts
|
|
762
|
+
var STTConfigImpl = class {
|
|
763
|
+
provider;
|
|
764
|
+
apiKey;
|
|
765
|
+
language;
|
|
766
|
+
options;
|
|
767
|
+
constructor(provider, apiKey, language = "en", options) {
|
|
768
|
+
this.provider = provider;
|
|
769
|
+
this.apiKey = apiKey;
|
|
770
|
+
this.language = language;
|
|
771
|
+
if (options) this.options = options;
|
|
772
|
+
}
|
|
773
|
+
toDict() {
|
|
774
|
+
const out = {
|
|
775
|
+
provider: this.provider,
|
|
776
|
+
api_key: this.apiKey,
|
|
777
|
+
language: this.language
|
|
778
|
+
};
|
|
779
|
+
if (this.options) out.options = { ...this.options };
|
|
780
|
+
return out;
|
|
781
|
+
}
|
|
782
|
+
};
|
|
783
|
+
var TTSConfigImpl = class {
|
|
784
|
+
provider;
|
|
785
|
+
apiKey;
|
|
786
|
+
voice;
|
|
787
|
+
constructor(provider, apiKey, voice = "alloy") {
|
|
788
|
+
this.provider = provider;
|
|
789
|
+
this.apiKey = apiKey;
|
|
790
|
+
this.voice = voice;
|
|
791
|
+
}
|
|
792
|
+
toDict() {
|
|
793
|
+
return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
|
|
794
|
+
}
|
|
795
|
+
};
|
|
796
|
+
function deepgram(opts) {
|
|
797
|
+
const options = {
|
|
798
|
+
model: opts.model ?? "nova-3",
|
|
799
|
+
endpointing_ms: opts.endpointingMs ?? 150,
|
|
800
|
+
utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
801
|
+
smart_format: opts.smartFormat ?? true,
|
|
802
|
+
interim_results: opts.interimResults ?? true
|
|
803
|
+
};
|
|
804
|
+
if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
|
|
805
|
+
return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
|
|
806
|
+
}
|
|
807
|
+
function whisper(opts) {
|
|
808
|
+
return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
|
|
809
|
+
}
|
|
810
|
+
function elevenlabs(opts) {
|
|
811
|
+
return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
|
|
812
|
+
}
|
|
813
|
+
function openaiTts(opts) {
|
|
814
|
+
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
815
|
+
}
|
|
816
|
+
|
|
677
817
|
// src/fallback-provider.ts
|
|
678
818
|
var AllProvidersFailedError = class extends Error {
|
|
679
819
|
constructor(message) {
|
|
@@ -719,6 +859,37 @@ var FallbackLLMProvider = class {
|
|
|
719
859
|
}
|
|
720
860
|
}
|
|
721
861
|
}
|
|
862
|
+
/**
|
|
863
|
+
* Async-friendly disposer. Parity with Python's ``FallbackLLMProvider.aclose()``
|
|
864
|
+
* — safe to call multiple times, returns a resolved Promise once all probe
|
|
865
|
+
* timers are cleared. Prefer this in async contexts so awaiting the
|
|
866
|
+
* shutdown integrates naturally with the owning lifecycle.
|
|
867
|
+
*/
|
|
868
|
+
async aclose() {
|
|
869
|
+
this.destroy();
|
|
870
|
+
}
|
|
871
|
+
/**
|
|
872
|
+
* Explicit-resource-management hook so callers can write
|
|
873
|
+
* ``await using fallback = new FallbackLLMProvider([...])`` and have
|
|
874
|
+
* background probe timers cleared automatically when the block exits.
|
|
875
|
+
* Mirrors Python's ``async with FallbackLLMProvider(...)``.
|
|
876
|
+
*/
|
|
877
|
+
async [Symbol.asyncDispose]() {
|
|
878
|
+
await this.aclose();
|
|
879
|
+
}
|
|
880
|
+
/**
|
|
881
|
+
* Stream only the text deltas, flattening the chunk envelope. Parity with
|
|
882
|
+
* Python's ``FallbackLLMProvider.complete_stream``. Tool-call and done
|
|
883
|
+
* markers are filtered out so callers can concatenate the yielded strings
|
|
884
|
+
* directly.
|
|
885
|
+
*/
|
|
886
|
+
async *completeStream(messages, tools) {
|
|
887
|
+
for await (const chunk of this.stream(messages, tools)) {
|
|
888
|
+
if (chunk.type === "text") {
|
|
889
|
+
yield chunk.content ?? "";
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
}
|
|
722
893
|
// -----------------------------------------------------------------------
|
|
723
894
|
// LLMProvider implementation
|
|
724
895
|
// -----------------------------------------------------------------------
|
|
@@ -1235,13 +1406,37 @@ function wrapCallback(cb) {
|
|
|
1235
1406
|
}
|
|
1236
1407
|
};
|
|
1237
1408
|
}
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1409
|
+
function scheduleCron(cron, callback) {
|
|
1410
|
+
let cancelled = false;
|
|
1411
|
+
let task = null;
|
|
1412
|
+
const jobId = `cron-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
1413
|
+
loadCron().then((cm) => {
|
|
1414
|
+
if (cancelled) return;
|
|
1415
|
+
if (!cm.validate(cron)) {
|
|
1416
|
+
throw new Error(`Invalid cron expression: ${cron}`);
|
|
1417
|
+
}
|
|
1418
|
+
task = cm.schedule(cron, wrapCallback(callback));
|
|
1419
|
+
}).catch((err) => getLogger().error(`scheduleCron failed: ${String(err)}`));
|
|
1420
|
+
return {
|
|
1421
|
+
jobId,
|
|
1422
|
+
cancel() {
|
|
1423
|
+
if (cancelled) return;
|
|
1424
|
+
cancelled = true;
|
|
1425
|
+
if (task) {
|
|
1426
|
+
try {
|
|
1427
|
+
task.stop();
|
|
1428
|
+
} catch {
|
|
1429
|
+
}
|
|
1430
|
+
try {
|
|
1431
|
+
task.destroy?.();
|
|
1432
|
+
} catch {
|
|
1433
|
+
}
|
|
1434
|
+
}
|
|
1435
|
+
},
|
|
1436
|
+
get pending() {
|
|
1437
|
+
return !cancelled;
|
|
1438
|
+
}
|
|
1439
|
+
};
|
|
1245
1440
|
}
|
|
1246
1441
|
function scheduleOnce(at, callback) {
|
|
1247
1442
|
const delayMs = at.getTime() - Date.now();
|
|
@@ -1263,8 +1458,18 @@ function scheduleOnce(at, callback) {
|
|
|
1263
1458
|
}
|
|
1264
1459
|
};
|
|
1265
1460
|
}
|
|
1266
|
-
function scheduleInterval(
|
|
1267
|
-
|
|
1461
|
+
function scheduleInterval(intervalOrOpts, callback) {
|
|
1462
|
+
let intervalMs;
|
|
1463
|
+
if (typeof intervalOrOpts === "number") {
|
|
1464
|
+
intervalMs = intervalOrOpts;
|
|
1465
|
+
} else if (intervalOrOpts.intervalMs !== void 0) {
|
|
1466
|
+
intervalMs = intervalOrOpts.intervalMs;
|
|
1467
|
+
} else if (intervalOrOpts.seconds !== void 0) {
|
|
1468
|
+
intervalMs = intervalOrOpts.seconds * 1e3;
|
|
1469
|
+
} else {
|
|
1470
|
+
throw new Error("scheduleInterval requires seconds or intervalMs");
|
|
1471
|
+
}
|
|
1472
|
+
if (intervalMs <= 0) throw new Error("interval must be positive");
|
|
1268
1473
|
let cancelled = false;
|
|
1269
1474
|
const wrapped = wrapCallback(callback);
|
|
1270
1475
|
const timer = setInterval(() => {
|
|
@@ -1281,85 +1486,378 @@ function scheduleInterval(intervalMs, callback) {
|
|
|
1281
1486
|
}
|
|
1282
1487
|
};
|
|
1283
1488
|
}
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
} catch {
|
|
1294
|
-
}
|
|
1295
|
-
try {
|
|
1296
|
-
task.destroy?.();
|
|
1297
|
-
} catch {
|
|
1298
|
-
}
|
|
1299
|
-
},
|
|
1300
|
-
get pending() {
|
|
1301
|
-
return !cancelled;
|
|
1489
|
+
|
|
1490
|
+
// src/stt/deepgram.ts
|
|
1491
|
+
var STT = class extends DeepgramSTT {
|
|
1492
|
+
constructor(opts = {}) {
|
|
1493
|
+
const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
|
|
1494
|
+
if (!key) {
|
|
1495
|
+
throw new Error(
|
|
1496
|
+
"Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
|
|
1497
|
+
);
|
|
1302
1498
|
}
|
|
1303
|
-
|
|
1304
|
-
|
|
1499
|
+
super(
|
|
1500
|
+
key,
|
|
1501
|
+
opts.language ?? "en",
|
|
1502
|
+
opts.model ?? "nova-3",
|
|
1503
|
+
opts.encoding ?? "linear16",
|
|
1504
|
+
opts.sampleRate ?? 16e3,
|
|
1505
|
+
{
|
|
1506
|
+
endpointingMs: opts.endpointingMs ?? 150,
|
|
1507
|
+
utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
1508
|
+
smartFormat: opts.smartFormat ?? true,
|
|
1509
|
+
interimResults: opts.interimResults ?? true,
|
|
1510
|
+
...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
|
|
1511
|
+
}
|
|
1512
|
+
);
|
|
1513
|
+
}
|
|
1514
|
+
};
|
|
1305
1515
|
|
|
1306
|
-
// src/providers/
|
|
1307
|
-
|
|
1308
|
-
var
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1516
|
+
// src/providers/whisper-stt.ts
|
|
1517
|
+
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
1518
|
+
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
1519
|
+
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
1520
|
+
const dataSize = pcm.length;
|
|
1521
|
+
const header = Buffer.alloc(44);
|
|
1522
|
+
header.write("RIFF", 0);
|
|
1523
|
+
header.writeUInt32LE(36 + dataSize, 4);
|
|
1524
|
+
header.write("WAVE", 8);
|
|
1525
|
+
header.write("fmt ", 12);
|
|
1526
|
+
header.writeUInt32LE(16, 16);
|
|
1527
|
+
header.writeUInt16LE(1, 20);
|
|
1528
|
+
header.writeUInt16LE(channels, 22);
|
|
1529
|
+
header.writeUInt32LE(sampleRate, 24);
|
|
1530
|
+
header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
|
|
1531
|
+
header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
|
|
1532
|
+
header.writeUInt16LE(bitsPerSample, 34);
|
|
1533
|
+
header.write("data", 36);
|
|
1534
|
+
header.writeUInt32LE(dataSize, 40);
|
|
1535
|
+
return Buffer.concat([header, pcm]);
|
|
1315
1536
|
}
|
|
1316
|
-
var
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1322
|
-
|
|
1323
|
-
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1537
|
+
var WhisperSTT = class _WhisperSTT {
|
|
1538
|
+
apiKey;
|
|
1539
|
+
model;
|
|
1540
|
+
language;
|
|
1541
|
+
bufferSize;
|
|
1542
|
+
buffer = Buffer.alloc(0);
|
|
1543
|
+
callbacks = [];
|
|
1544
|
+
running = false;
|
|
1545
|
+
pendingTranscriptions = [];
|
|
1546
|
+
constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
|
|
1547
|
+
this.apiKey = apiKey;
|
|
1548
|
+
this.model = model;
|
|
1549
|
+
this.language = language;
|
|
1550
|
+
this.bufferSize = bufferSize;
|
|
1328
1551
|
}
|
|
1329
|
-
|
|
1330
|
-
|
|
1552
|
+
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
1553
|
+
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
1554
|
+
return new _WhisperSTT(apiKey, model, language);
|
|
1331
1555
|
}
|
|
1332
|
-
|
|
1333
|
-
this.
|
|
1334
|
-
this.
|
|
1335
|
-
this.confCount = 0;
|
|
1556
|
+
async connect() {
|
|
1557
|
+
this.running = true;
|
|
1558
|
+
this.buffer = Buffer.alloc(0);
|
|
1336
1559
|
}
|
|
1337
|
-
|
|
1338
|
-
|
|
1560
|
+
sendAudio(audio) {
|
|
1561
|
+
if (!this.running) return;
|
|
1562
|
+
this.buffer = Buffer.concat([this.buffer, audio]);
|
|
1563
|
+
if (this.buffer.length >= this.bufferSize) {
|
|
1564
|
+
const pcm = this.buffer;
|
|
1565
|
+
this.buffer = Buffer.alloc(0);
|
|
1566
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1567
|
+
}
|
|
1568
|
+
}
|
|
1569
|
+
trackTranscription(promise) {
|
|
1570
|
+
const wrapped = promise.finally(() => {
|
|
1571
|
+
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
1572
|
+
if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
|
|
1573
|
+
});
|
|
1574
|
+
this.pendingTranscriptions.push(wrapped);
|
|
1339
1575
|
}
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
keepaliveTimer = null;
|
|
1346
|
-
apiKey;
|
|
1347
|
-
model;
|
|
1348
|
-
languageHints;
|
|
1349
|
-
languageHintsStrict;
|
|
1350
|
-
sampleRate;
|
|
1351
|
-
numChannels;
|
|
1352
|
-
enableSpeakerDiarization;
|
|
1353
|
-
enableLanguageIdentification;
|
|
1354
|
-
maxEndpointDelayMs;
|
|
1355
|
-
clientReferenceId;
|
|
1356
|
-
baseUrl;
|
|
1357
|
-
constructor(apiKey, options = {}) {
|
|
1358
|
-
if (!apiKey) {
|
|
1359
|
-
throw new Error("Soniox apiKey is required");
|
|
1576
|
+
onTranscript(callback) {
|
|
1577
|
+
if (this.callbacks.length >= 10) {
|
|
1578
|
+
getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
|
|
1579
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1580
|
+
return;
|
|
1360
1581
|
}
|
|
1361
|
-
|
|
1362
|
-
|
|
1582
|
+
this.callbacks.push(callback);
|
|
1583
|
+
}
|
|
1584
|
+
async close() {
|
|
1585
|
+
this.running = false;
|
|
1586
|
+
if (this.buffer.length >= this.bufferSize / 4) {
|
|
1587
|
+
const pcm = this.buffer;
|
|
1588
|
+
this.buffer = Buffer.alloc(0);
|
|
1589
|
+
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1590
|
+
} else {
|
|
1591
|
+
this.buffer = Buffer.alloc(0);
|
|
1592
|
+
}
|
|
1593
|
+
await Promise.allSettled(this.pendingTranscriptions);
|
|
1594
|
+
this.callbacks = [];
|
|
1595
|
+
}
|
|
1596
|
+
// ------------------------------------------------------------------
|
|
1597
|
+
// Private
|
|
1598
|
+
// ------------------------------------------------------------------
|
|
1599
|
+
async transcribeBuffer(pcm) {
|
|
1600
|
+
const wav = wrapPcmInWav(pcm);
|
|
1601
|
+
const formData = new FormData();
|
|
1602
|
+
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
1603
|
+
formData.append("model", this.model);
|
|
1604
|
+
if (this.language) {
|
|
1605
|
+
formData.append("language", this.language);
|
|
1606
|
+
}
|
|
1607
|
+
try {
|
|
1608
|
+
const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
|
|
1609
|
+
method: "POST",
|
|
1610
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
1611
|
+
body: formData,
|
|
1612
|
+
signal: AbortSignal.timeout(15e3)
|
|
1613
|
+
});
|
|
1614
|
+
if (!resp.ok) {
|
|
1615
|
+
const body = await resp.text();
|
|
1616
|
+
getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
|
|
1617
|
+
return;
|
|
1618
|
+
}
|
|
1619
|
+
const json = await resp.json();
|
|
1620
|
+
const text = (json.text ?? "").trim();
|
|
1621
|
+
if (!text) return;
|
|
1622
|
+
const transcript = {
|
|
1623
|
+
text,
|
|
1624
|
+
isFinal: true,
|
|
1625
|
+
confidence: 1
|
|
1626
|
+
};
|
|
1627
|
+
for (const cb of this.callbacks) {
|
|
1628
|
+
cb(transcript);
|
|
1629
|
+
}
|
|
1630
|
+
} catch (err) {
|
|
1631
|
+
getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
|
|
1632
|
+
}
|
|
1633
|
+
}
|
|
1634
|
+
};
|
|
1635
|
+
|
|
1636
|
+
// src/stt/whisper.ts
|
|
1637
|
+
var STT2 = class extends WhisperSTT {
|
|
1638
|
+
constructor(opts = {}) {
|
|
1639
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1640
|
+
if (!key) {
|
|
1641
|
+
throw new Error(
|
|
1642
|
+
"Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
1643
|
+
);
|
|
1644
|
+
}
|
|
1645
|
+
super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
|
|
1646
|
+
}
|
|
1647
|
+
};
|
|
1648
|
+
|
|
1649
|
+
// src/providers/cartesia-stt.ts
|
|
1650
|
+
import WebSocket3 from "ws";
|
|
1651
|
+
var DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
1652
|
+
var API_VERSION = "2025-04-16";
|
|
1653
|
+
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
1654
|
+
var KEEPALIVE_INTERVAL_MS = 3e4;
|
|
1655
|
+
var CONNECT_TIMEOUT_MS = 1e4;
|
|
1656
|
+
var MAX_CALLBACKS = 10;
|
|
1657
|
+
var CartesiaSTT = class {
|
|
1658
|
+
constructor(apiKey, options = {}) {
|
|
1659
|
+
this.apiKey = apiKey;
|
|
1660
|
+
this.options = options;
|
|
1661
|
+
if (!apiKey) {
|
|
1662
|
+
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
1663
|
+
}
|
|
1664
|
+
}
|
|
1665
|
+
ws = null;
|
|
1666
|
+
callbacks = [];
|
|
1667
|
+
keepaliveTimer = null;
|
|
1668
|
+
/** Cartesia request id — set from the server transcript events. */
|
|
1669
|
+
requestId = "";
|
|
1670
|
+
buildWsUrl() {
|
|
1671
|
+
const opts = this.options;
|
|
1672
|
+
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
|
|
1673
|
+
let base;
|
|
1674
|
+
if (rawBase.startsWith("http://")) {
|
|
1675
|
+
base = `ws://${rawBase.slice("http://".length)}`;
|
|
1676
|
+
} else if (rawBase.startsWith("https://")) {
|
|
1677
|
+
base = `wss://${rawBase.slice("https://".length)}`;
|
|
1678
|
+
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
1679
|
+
base = rawBase;
|
|
1680
|
+
} else {
|
|
1681
|
+
base = `wss://${rawBase}`;
|
|
1682
|
+
}
|
|
1683
|
+
const language = opts.language ?? "en";
|
|
1684
|
+
const params = new URLSearchParams({
|
|
1685
|
+
model: opts.model ?? "ink-whisper",
|
|
1686
|
+
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
1687
|
+
encoding: opts.encoding ?? "pcm_s16le",
|
|
1688
|
+
cartesia_version: API_VERSION,
|
|
1689
|
+
api_key: this.apiKey,
|
|
1690
|
+
language
|
|
1691
|
+
});
|
|
1692
|
+
return `${base}/stt/websocket?${params.toString()}`;
|
|
1693
|
+
}
|
|
1694
|
+
async connect() {
|
|
1695
|
+
const url = this.buildWsUrl();
|
|
1696
|
+
this.ws = new WebSocket3(url, {
|
|
1697
|
+
headers: { "User-Agent": USER_AGENT }
|
|
1698
|
+
});
|
|
1699
|
+
await new Promise((resolve, reject) => {
|
|
1700
|
+
const timer = setTimeout(
|
|
1701
|
+
() => reject(new Error("Cartesia STT connect timeout")),
|
|
1702
|
+
CONNECT_TIMEOUT_MS
|
|
1703
|
+
);
|
|
1704
|
+
this.ws.once("open", () => {
|
|
1705
|
+
clearTimeout(timer);
|
|
1706
|
+
resolve();
|
|
1707
|
+
});
|
|
1708
|
+
this.ws.once("error", (err) => {
|
|
1709
|
+
clearTimeout(timer);
|
|
1710
|
+
reject(err);
|
|
1711
|
+
});
|
|
1712
|
+
});
|
|
1713
|
+
this.ws.on("message", (raw) => {
|
|
1714
|
+
let event;
|
|
1715
|
+
try {
|
|
1716
|
+
event = JSON.parse(raw.toString());
|
|
1717
|
+
} catch {
|
|
1718
|
+
return;
|
|
1719
|
+
}
|
|
1720
|
+
this.handleEvent(event);
|
|
1721
|
+
});
|
|
1722
|
+
this.keepaliveTimer = setInterval(() => {
|
|
1723
|
+
if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
|
|
1724
|
+
try {
|
|
1725
|
+
this.ws.ping();
|
|
1726
|
+
} catch {
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
}, KEEPALIVE_INTERVAL_MS);
|
|
1730
|
+
}
|
|
1731
|
+
handleEvent(event) {
|
|
1732
|
+
const type = event.type;
|
|
1733
|
+
if (type === "transcript") {
|
|
1734
|
+
const text = (event.text ?? "").trim();
|
|
1735
|
+
const isFinal = Boolean(event.is_final);
|
|
1736
|
+
if (!text && !isFinal) return;
|
|
1737
|
+
if (event.request_id) {
|
|
1738
|
+
this.requestId = event.request_id;
|
|
1739
|
+
}
|
|
1740
|
+
if (!text) return;
|
|
1741
|
+
const confidence = Number(event.probability ?? 1);
|
|
1742
|
+
this.emit({ text, isFinal, confidence });
|
|
1743
|
+
return;
|
|
1744
|
+
}
|
|
1745
|
+
if (type === "error") {
|
|
1746
|
+
getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
|
|
1747
|
+
return;
|
|
1748
|
+
}
|
|
1749
|
+
}
|
|
1750
|
+
emit(transcript) {
|
|
1751
|
+
for (const cb of this.callbacks) {
|
|
1752
|
+
cb(transcript);
|
|
1753
|
+
}
|
|
1754
|
+
}
|
|
1755
|
+
sendAudio(audio) {
|
|
1756
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
1757
|
+
this.ws.send(audio);
|
|
1758
|
+
}
|
|
1759
|
+
onTranscript(callback) {
|
|
1760
|
+
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
1761
|
+
getLogger().warn(
|
|
1762
|
+
"CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1763
|
+
);
|
|
1764
|
+
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1765
|
+
return;
|
|
1766
|
+
}
|
|
1767
|
+
this.callbacks.push(callback);
|
|
1768
|
+
}
|
|
1769
|
+
close() {
|
|
1770
|
+
if (this.keepaliveTimer) {
|
|
1771
|
+
clearInterval(this.keepaliveTimer);
|
|
1772
|
+
this.keepaliveTimer = null;
|
|
1773
|
+
}
|
|
1774
|
+
if (this.ws) {
|
|
1775
|
+
try {
|
|
1776
|
+
this.ws.send("finalize");
|
|
1777
|
+
} catch {
|
|
1778
|
+
}
|
|
1779
|
+
this.ws.close();
|
|
1780
|
+
this.ws = null;
|
|
1781
|
+
}
|
|
1782
|
+
}
|
|
1783
|
+
};
|
|
1784
|
+
|
|
1785
|
+
// src/stt/cartesia.ts
|
|
1786
|
+
var STT3 = class extends CartesiaSTT {
|
|
1787
|
+
constructor(opts = {}) {
|
|
1788
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
1789
|
+
if (!key) {
|
|
1790
|
+
throw new Error(
|
|
1791
|
+
"Cartesia STT requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
1792
|
+
);
|
|
1793
|
+
}
|
|
1794
|
+
super(key, {
|
|
1795
|
+
model: opts.model,
|
|
1796
|
+
language: opts.language,
|
|
1797
|
+
encoding: opts.encoding,
|
|
1798
|
+
sampleRate: opts.sampleRate,
|
|
1799
|
+
baseUrl: opts.baseUrl
|
|
1800
|
+
});
|
|
1801
|
+
}
|
|
1802
|
+
};
|
|
1803
|
+
|
|
1804
|
+
// src/providers/soniox-stt.ts
|
|
1805
|
+
import WebSocket4 from "ws";
|
|
1806
|
+
var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
1807
|
+
var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
|
|
1808
|
+
var END_TOKEN = "<end>";
|
|
1809
|
+
var FINALIZED_TOKEN = "<fin>";
|
|
1810
|
+
var KEEPALIVE_INTERVAL_MS2 = 5e3;
|
|
1811
|
+
function isEndToken(token) {
|
|
1812
|
+
return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
|
|
1813
|
+
}
|
|
1814
|
+
var TokenAccumulator = class {
|
|
1815
|
+
text = "";
|
|
1816
|
+
confSum = 0;
|
|
1817
|
+
confCount = 0;
|
|
1818
|
+
update(token) {
|
|
1819
|
+
if (token.text) {
|
|
1820
|
+
this.text += token.text;
|
|
1821
|
+
}
|
|
1822
|
+
if (typeof token.confidence === "number") {
|
|
1823
|
+
this.confSum += token.confidence;
|
|
1824
|
+
this.confCount += 1;
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
get confidence() {
|
|
1828
|
+
return this.confCount === 0 ? 0 : this.confSum / this.confCount;
|
|
1829
|
+
}
|
|
1830
|
+
reset() {
|
|
1831
|
+
this.text = "";
|
|
1832
|
+
this.confSum = 0;
|
|
1833
|
+
this.confCount = 0;
|
|
1834
|
+
}
|
|
1835
|
+
get raw() {
|
|
1836
|
+
return { sum: this.confSum, count: this.confCount };
|
|
1837
|
+
}
|
|
1838
|
+
};
|
|
1839
|
+
var SonioxSTT = class _SonioxSTT {
|
|
1840
|
+
ws = null;
|
|
1841
|
+
callbacks = [];
|
|
1842
|
+
final = new TokenAccumulator();
|
|
1843
|
+
keepaliveTimer = null;
|
|
1844
|
+
apiKey;
|
|
1845
|
+
model;
|
|
1846
|
+
languageHints;
|
|
1847
|
+
languageHintsStrict;
|
|
1848
|
+
sampleRate;
|
|
1849
|
+
numChannels;
|
|
1850
|
+
enableSpeakerDiarization;
|
|
1851
|
+
enableLanguageIdentification;
|
|
1852
|
+
maxEndpointDelayMs;
|
|
1853
|
+
clientReferenceId;
|
|
1854
|
+
baseUrl;
|
|
1855
|
+
constructor(apiKey, options = {}) {
|
|
1856
|
+
if (!apiKey) {
|
|
1857
|
+
throw new Error("Soniox apiKey is required");
|
|
1858
|
+
}
|
|
1859
|
+
const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
|
|
1860
|
+
if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
|
|
1363
1861
|
throw new Error("maxEndpointDelayMs must be between 500 and 3000");
|
|
1364
1862
|
}
|
|
1365
1863
|
this.apiKey = apiKey;
|
|
@@ -1400,7 +1898,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1400
1898
|
return config;
|
|
1401
1899
|
}
|
|
1402
1900
|
async connect() {
|
|
1403
|
-
this.ws = new
|
|
1901
|
+
this.ws = new WebSocket4(this.baseUrl);
|
|
1404
1902
|
await new Promise((resolve, reject) => {
|
|
1405
1903
|
const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
|
|
1406
1904
|
this.ws.once("open", () => {
|
|
@@ -1419,13 +1917,13 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1419
1917
|
getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
|
|
1420
1918
|
});
|
|
1421
1919
|
this.keepaliveTimer = setInterval(() => {
|
|
1422
|
-
if (this.ws && this.ws.readyState ===
|
|
1920
|
+
if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
|
|
1423
1921
|
try {
|
|
1424
1922
|
this.ws.send(KEEPALIVE_MESSAGE);
|
|
1425
1923
|
} catch {
|
|
1426
1924
|
}
|
|
1427
1925
|
}
|
|
1428
|
-
},
|
|
1926
|
+
}, KEEPALIVE_INTERVAL_MS2);
|
|
1429
1927
|
}
|
|
1430
1928
|
clearKeepalive() {
|
|
1431
1929
|
if (this.keepaliveTimer) {
|
|
@@ -1492,7 +1990,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1492
1990
|
}
|
|
1493
1991
|
}
|
|
1494
1992
|
sendAudio(audio) {
|
|
1495
|
-
if (!this.ws || this.ws.readyState !==
|
|
1993
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
|
|
1496
1994
|
if (audio.length === 0) return;
|
|
1497
1995
|
this.ws.send(audio);
|
|
1498
1996
|
}
|
|
@@ -1522,12 +2020,27 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1522
2020
|
}
|
|
1523
2021
|
};
|
|
1524
2022
|
|
|
2023
|
+
// src/stt/soniox.ts
|
|
2024
|
+
var STT4 = class extends SonioxSTT {
|
|
2025
|
+
constructor(opts = {}) {
|
|
2026
|
+
const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
|
|
2027
|
+
if (!key) {
|
|
2028
|
+
throw new Error(
|
|
2029
|
+
"Soniox STT requires an apiKey. Pass { apiKey: '...' } or set SONIOX_API_KEY in the environment."
|
|
2030
|
+
);
|
|
2031
|
+
}
|
|
2032
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2033
|
+
void _ignored;
|
|
2034
|
+
super(key, rest);
|
|
2035
|
+
}
|
|
2036
|
+
};
|
|
2037
|
+
|
|
1525
2038
|
// src/providers/assemblyai-stt.ts
|
|
1526
|
-
import
|
|
1527
|
-
var
|
|
2039
|
+
import WebSocket5 from "ws";
|
|
2040
|
+
var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
|
|
1528
2041
|
var DEFAULT_MIN_TURN_SILENCE_MS = 100;
|
|
1529
|
-
var
|
|
1530
|
-
var
|
|
2042
|
+
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
2043
|
+
var MAX_CALLBACKS2 = 10;
|
|
1531
2044
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
1532
2045
|
constructor(apiKey, options = {}) {
|
|
1533
2046
|
this.apiKey = apiKey;
|
|
@@ -1582,175 +2095,29 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
1582
2095
|
domain: opts.domain
|
|
1583
2096
|
};
|
|
1584
2097
|
const params = new URLSearchParams();
|
|
1585
|
-
for (const [key, value] of Object.entries(raw)) {
|
|
1586
|
-
if (value === void 0 || value === null) continue;
|
|
1587
|
-
if (typeof value === "boolean") {
|
|
1588
|
-
params.set(key, value ? "true" : "false");
|
|
1589
|
-
} else {
|
|
1590
|
-
params.set(key, String(value));
|
|
1591
|
-
}
|
|
1592
|
-
}
|
|
1593
|
-
const base = opts.baseUrl ??
|
|
1594
|
-
return `${base}/v3/ws?${params.toString()}`;
|
|
1595
|
-
}
|
|
1596
|
-
async connect() {
|
|
1597
|
-
const url = this.buildUrl();
|
|
1598
|
-
this.ws = new WebSocket4(url, {
|
|
1599
|
-
headers: {
|
|
1600
|
-
Authorization: this.apiKey,
|
|
1601
|
-
"Content-Type": "application/json",
|
|
1602
|
-
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
1603
|
-
}
|
|
1604
|
-
});
|
|
1605
|
-
await new Promise((resolve, reject) => {
|
|
1606
|
-
const timer = setTimeout(
|
|
1607
|
-
() => reject(new Error("AssemblyAI connect timeout")),
|
|
1608
|
-
CONNECT_TIMEOUT_MS
|
|
1609
|
-
);
|
|
1610
|
-
this.ws.once("open", () => {
|
|
1611
|
-
clearTimeout(timer);
|
|
1612
|
-
resolve();
|
|
1613
|
-
});
|
|
1614
|
-
this.ws.once("error", (err) => {
|
|
1615
|
-
clearTimeout(timer);
|
|
1616
|
-
reject(err);
|
|
1617
|
-
});
|
|
1618
|
-
});
|
|
1619
|
-
this.ws.on("message", (raw) => {
|
|
1620
|
-
let event;
|
|
1621
|
-
try {
|
|
1622
|
-
event = JSON.parse(raw.toString());
|
|
1623
|
-
} catch {
|
|
1624
|
-
return;
|
|
1625
|
-
}
|
|
1626
|
-
this.handleEvent(event);
|
|
1627
|
-
});
|
|
1628
|
-
}
|
|
1629
|
-
handleEvent(event) {
|
|
1630
|
-
const type = event.type;
|
|
1631
|
-
if (type === "Begin") {
|
|
1632
|
-
this.sessionId = event.id ?? "";
|
|
1633
|
-
this.expiresAt = event.expires_at ?? 0;
|
|
1634
|
-
return;
|
|
1635
|
-
}
|
|
1636
|
-
if (type !== "Turn") {
|
|
1637
|
-
return;
|
|
1638
|
-
}
|
|
1639
|
-
const endOfTurn = Boolean(event.end_of_turn);
|
|
1640
|
-
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
1641
|
-
const words = event.words ?? [];
|
|
1642
|
-
const transcriptText = (event.transcript ?? "").trim();
|
|
1643
|
-
if (endOfTurn) {
|
|
1644
|
-
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
1645
|
-
if (!transcriptText) return;
|
|
1646
|
-
this.emit({
|
|
1647
|
-
text: transcriptText,
|
|
1648
|
-
isFinal: true,
|
|
1649
|
-
confidence: averageConfidence(words)
|
|
1650
|
-
});
|
|
1651
|
-
return;
|
|
1652
|
-
}
|
|
1653
|
-
if (!words.length) return;
|
|
1654
|
-
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
1655
|
-
if (!interim) return;
|
|
1656
|
-
this.emit({
|
|
1657
|
-
text: interim,
|
|
1658
|
-
isFinal: false,
|
|
1659
|
-
confidence: averageConfidence(words)
|
|
1660
|
-
});
|
|
1661
|
-
}
|
|
1662
|
-
emit(transcript) {
|
|
1663
|
-
for (const cb of this.callbacks) {
|
|
1664
|
-
cb(transcript);
|
|
1665
|
-
}
|
|
1666
|
-
}
|
|
1667
|
-
sendAudio(audio) {
|
|
1668
|
-
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
|
|
1669
|
-
this.ws.send(audio);
|
|
1670
|
-
}
|
|
1671
|
-
onTranscript(callback) {
|
|
1672
|
-
if (this.callbacks.length >= MAX_CALLBACKS) {
|
|
1673
|
-
getLogger().warn(
|
|
1674
|
-
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1675
|
-
);
|
|
1676
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1677
|
-
return;
|
|
1678
|
-
}
|
|
1679
|
-
this.callbacks.push(callback);
|
|
1680
|
-
}
|
|
1681
|
-
close() {
|
|
1682
|
-
if (this.ws) {
|
|
1683
|
-
try {
|
|
1684
|
-
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
1685
|
-
} catch {
|
|
1686
|
-
}
|
|
1687
|
-
this.ws.close();
|
|
1688
|
-
this.ws = null;
|
|
1689
|
-
}
|
|
1690
|
-
}
|
|
1691
|
-
};
|
|
1692
|
-
function averageConfidence(words) {
|
|
1693
|
-
if (!words.length) return 0;
|
|
1694
|
-
let total = 0;
|
|
1695
|
-
for (const w of words) {
|
|
1696
|
-
total += Number(w.confidence ?? 0);
|
|
1697
|
-
}
|
|
1698
|
-
return total / words.length;
|
|
1699
|
-
}
|
|
1700
|
-
|
|
1701
|
-
// src/providers/cartesia-stt.ts
|
|
1702
|
-
import WebSocket5 from "ws";
|
|
1703
|
-
var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
|
|
1704
|
-
var API_VERSION = "2025-04-16";
|
|
1705
|
-
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
1706
|
-
var KEEPALIVE_INTERVAL_MS2 = 3e4;
|
|
1707
|
-
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
1708
|
-
var MAX_CALLBACKS2 = 10;
|
|
1709
|
-
var CartesiaSTT = class {
|
|
1710
|
-
constructor(apiKey, options = {}) {
|
|
1711
|
-
this.apiKey = apiKey;
|
|
1712
|
-
this.options = options;
|
|
1713
|
-
if (!apiKey) {
|
|
1714
|
-
throw new Error("CartesiaSTT requires a non-empty apiKey");
|
|
1715
|
-
}
|
|
1716
|
-
}
|
|
1717
|
-
ws = null;
|
|
1718
|
-
callbacks = [];
|
|
1719
|
-
keepaliveTimer = null;
|
|
1720
|
-
/** Cartesia request id — set from the server transcript events. */
|
|
1721
|
-
requestId = "";
|
|
1722
|
-
buildWsUrl() {
|
|
1723
|
-
const opts = this.options;
|
|
1724
|
-
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
1725
|
-
let base;
|
|
1726
|
-
if (rawBase.startsWith("http://")) {
|
|
1727
|
-
base = `ws://${rawBase.slice("http://".length)}`;
|
|
1728
|
-
} else if (rawBase.startsWith("https://")) {
|
|
1729
|
-
base = `wss://${rawBase.slice("https://".length)}`;
|
|
1730
|
-
} else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
|
|
1731
|
-
base = rawBase;
|
|
1732
|
-
} else {
|
|
1733
|
-
base = `wss://${rawBase}`;
|
|
1734
|
-
}
|
|
1735
|
-
const language = opts.language ?? "en";
|
|
1736
|
-
const params = new URLSearchParams({
|
|
1737
|
-
model: opts.model ?? "ink-whisper",
|
|
1738
|
-
sample_rate: String(opts.sampleRate ?? 16e3),
|
|
1739
|
-
encoding: opts.encoding ?? "pcm_s16le",
|
|
1740
|
-
cartesia_version: API_VERSION,
|
|
1741
|
-
api_key: this.apiKey,
|
|
1742
|
-
language
|
|
1743
|
-
});
|
|
1744
|
-
return `${base}/stt/websocket?${params.toString()}`;
|
|
2098
|
+
for (const [key, value] of Object.entries(raw)) {
|
|
2099
|
+
if (value === void 0 || value === null) continue;
|
|
2100
|
+
if (typeof value === "boolean") {
|
|
2101
|
+
params.set(key, value ? "true" : "false");
|
|
2102
|
+
} else {
|
|
2103
|
+
params.set(key, String(value));
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
2106
|
+
const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
2107
|
+
return `${base}/v3/ws?${params.toString()}`;
|
|
1745
2108
|
}
|
|
1746
2109
|
async connect() {
|
|
1747
|
-
const url = this.
|
|
2110
|
+
const url = this.buildUrl();
|
|
1748
2111
|
this.ws = new WebSocket5(url, {
|
|
1749
|
-
headers: {
|
|
2112
|
+
headers: {
|
|
2113
|
+
Authorization: this.apiKey,
|
|
2114
|
+
"Content-Type": "application/json",
|
|
2115
|
+
"User-Agent": "Patter/1.0 (integration=LiveKit-port)"
|
|
2116
|
+
}
|
|
1750
2117
|
});
|
|
1751
2118
|
await new Promise((resolve, reject) => {
|
|
1752
2119
|
const timer = setTimeout(
|
|
1753
|
-
() => reject(new Error("
|
|
2120
|
+
() => reject(new Error("AssemblyAI connect timeout")),
|
|
1754
2121
|
CONNECT_TIMEOUT_MS2
|
|
1755
2122
|
);
|
|
1756
2123
|
this.ws.once("open", () => {
|
|
@@ -1771,33 +2138,39 @@ var CartesiaSTT = class {
|
|
|
1771
2138
|
}
|
|
1772
2139
|
this.handleEvent(event);
|
|
1773
2140
|
});
|
|
1774
|
-
this.keepaliveTimer = setInterval(() => {
|
|
1775
|
-
if (this.ws && this.ws.readyState === WebSocket5.OPEN) {
|
|
1776
|
-
try {
|
|
1777
|
-
this.ws.ping();
|
|
1778
|
-
} catch {
|
|
1779
|
-
}
|
|
1780
|
-
}
|
|
1781
|
-
}, KEEPALIVE_INTERVAL_MS2);
|
|
1782
2141
|
}
|
|
1783
2142
|
handleEvent(event) {
|
|
1784
2143
|
const type = event.type;
|
|
1785
|
-
if (type === "
|
|
1786
|
-
|
|
1787
|
-
|
|
1788
|
-
if (!text && !isFinal) return;
|
|
1789
|
-
if (event.request_id) {
|
|
1790
|
-
this.requestId = event.request_id;
|
|
1791
|
-
}
|
|
1792
|
-
if (!text) return;
|
|
1793
|
-
const confidence = Number(event.probability ?? 1);
|
|
1794
|
-
this.emit({ text, isFinal, confidence });
|
|
2144
|
+
if (type === "Begin") {
|
|
2145
|
+
this.sessionId = event.id ?? "";
|
|
2146
|
+
this.expiresAt = event.expires_at ?? 0;
|
|
1795
2147
|
return;
|
|
1796
2148
|
}
|
|
1797
|
-
if (type
|
|
1798
|
-
|
|
2149
|
+
if (type !== "Turn") {
|
|
2150
|
+
return;
|
|
2151
|
+
}
|
|
2152
|
+
const endOfTurn = Boolean(event.end_of_turn);
|
|
2153
|
+
const turnIsFormatted = Boolean(event.turn_is_formatted);
|
|
2154
|
+
const words = event.words ?? [];
|
|
2155
|
+
const transcriptText = (event.transcript ?? "").trim();
|
|
2156
|
+
if (endOfTurn) {
|
|
2157
|
+
if (this.options.formatTurns && !turnIsFormatted) return;
|
|
2158
|
+
if (!transcriptText) return;
|
|
2159
|
+
this.emit({
|
|
2160
|
+
text: transcriptText,
|
|
2161
|
+
isFinal: true,
|
|
2162
|
+
confidence: averageConfidence(words)
|
|
2163
|
+
});
|
|
1799
2164
|
return;
|
|
1800
2165
|
}
|
|
2166
|
+
if (!words.length) return;
|
|
2167
|
+
const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
|
|
2168
|
+
if (!interim) return;
|
|
2169
|
+
this.emit({
|
|
2170
|
+
text: interim,
|
|
2171
|
+
isFinal: false,
|
|
2172
|
+
confidence: averageConfidence(words)
|
|
2173
|
+
});
|
|
1801
2174
|
}
|
|
1802
2175
|
emit(transcript) {
|
|
1803
2176
|
for (const cb of this.callbacks) {
|
|
@@ -1811,7 +2184,7 @@ var CartesiaSTT = class {
|
|
|
1811
2184
|
onTranscript(callback) {
|
|
1812
2185
|
if (this.callbacks.length >= MAX_CALLBACKS2) {
|
|
1813
2186
|
getLogger().warn(
|
|
1814
|
-
"
|
|
2187
|
+
"AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1815
2188
|
);
|
|
1816
2189
|
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1817
2190
|
return;
|
|
@@ -1819,13 +2192,9 @@ var CartesiaSTT = class {
|
|
|
1819
2192
|
this.callbacks.push(callback);
|
|
1820
2193
|
}
|
|
1821
2194
|
close() {
|
|
1822
|
-
if (this.keepaliveTimer) {
|
|
1823
|
-
clearInterval(this.keepaliveTimer);
|
|
1824
|
-
this.keepaliveTimer = null;
|
|
1825
|
-
}
|
|
1826
2195
|
if (this.ws) {
|
|
1827
2196
|
try {
|
|
1828
|
-
this.ws.send("
|
|
2197
|
+
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
1829
2198
|
} catch {
|
|
1830
2199
|
}
|
|
1831
2200
|
this.ws.close();
|
|
@@ -1833,6 +2202,305 @@ var CartesiaSTT = class {
|
|
|
1833
2202
|
}
|
|
1834
2203
|
}
|
|
1835
2204
|
};
|
|
2205
|
+
function averageConfidence(words) {
|
|
2206
|
+
if (!words.length) return 0;
|
|
2207
|
+
let total = 0;
|
|
2208
|
+
for (const w of words) {
|
|
2209
|
+
total += Number(w.confidence ?? 0);
|
|
2210
|
+
}
|
|
2211
|
+
return total / words.length;
|
|
2212
|
+
}
|
|
2213
|
+
|
|
2214
|
+
// src/stt/assemblyai.ts
|
|
2215
|
+
var STT5 = class extends AssemblyAISTT {
|
|
2216
|
+
constructor(opts = {}) {
|
|
2217
|
+
const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
|
|
2218
|
+
if (!key) {
|
|
2219
|
+
throw new Error(
|
|
2220
|
+
"AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
|
|
2221
|
+
);
|
|
2222
|
+
}
|
|
2223
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2224
|
+
void _ignored;
|
|
2225
|
+
super(key, rest);
|
|
2226
|
+
}
|
|
2227
|
+
};
|
|
2228
|
+
|
|
2229
|
+
// src/providers/elevenlabs-tts.ts
|
|
2230
|
+
var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
|
|
2231
|
+
var ELEVENLABS_VOICE_ID_BY_NAME = {
|
|
2232
|
+
rachel: "21m00Tcm4TlvDq8ikWAM",
|
|
2233
|
+
drew: "29vD33N1CtxCmqQRPOHJ",
|
|
2234
|
+
clyde: "2EiwWnXFnvU5JabPnv8n",
|
|
2235
|
+
paul: "5Q0t7uMcjvnagumLfvZi",
|
|
2236
|
+
domi: "AZnzlk1XvdvUeBnXmlld",
|
|
2237
|
+
dave: "CYw3kZ02Hs0563khs1Fj",
|
|
2238
|
+
fin: "D38z5RcWu1voky8WS1ja",
|
|
2239
|
+
bella: "EXAVITQu4vr4xnSDxMaL",
|
|
2240
|
+
antoni: "ErXwobaYiN019PkySvjV",
|
|
2241
|
+
thomas: "GBv7mTt0atIp3Br8iCZE",
|
|
2242
|
+
charlie: "IKne3meq5aSn9XLyUdCD",
|
|
2243
|
+
george: "JBFqnCBsd6RMkjVDRZzb",
|
|
2244
|
+
emily: "LcfcDJNUP1GQjkzn1xUU",
|
|
2245
|
+
elli: "MF3mGyEYCl7XYWbV9V6O",
|
|
2246
|
+
callum: "N2lVS1w4EtoT3dr4eOWO",
|
|
2247
|
+
patrick: "ODq5zmih8GrVes37Dizd",
|
|
2248
|
+
harry: "SOYHLrjzK2X1ezoPC6cr",
|
|
2249
|
+
liam: "TX3LPaxmHKxFdv7VOQHJ",
|
|
2250
|
+
dorothy: "ThT5KcBeYPX3keUQqHPh",
|
|
2251
|
+
josh: "TxGEqnHWrfWFTfGW9XjX",
|
|
2252
|
+
arnold: "VR6AewLTigWG4xSOukaG",
|
|
2253
|
+
charlotte: "XB0fDUnXU5powFXDhCwa",
|
|
2254
|
+
matilda: "XrExE9yKIg1WjnnlVkGX",
|
|
2255
|
+
matthew: "Yko7PKHZNXotIFUBG7I9",
|
|
2256
|
+
james: "ZQe5CZNOzWyzPSCn5a3c",
|
|
2257
|
+
joseph: "Zlb1dXrM653N07WRdFW3",
|
|
2258
|
+
jeremy: "bVMeCyTHy58xNoL34h3p",
|
|
2259
|
+
michael: "flq6f7yk4E4fJM5XTYuZ",
|
|
2260
|
+
ethan: "g5CIjZEefAph4nQFvHAz",
|
|
2261
|
+
gigi: "jBpfuIE2acCO8z3wKNLl",
|
|
2262
|
+
freya: "jsCqWAovK2LkecY7zXl4",
|
|
2263
|
+
brian: "nPczCjzI2devNBz1zQrb",
|
|
2264
|
+
grace: "oWAxZDx7w5VEj9dCyTzz",
|
|
2265
|
+
daniel: "onwK4e9ZLuTAKqWW03F9",
|
|
2266
|
+
lily: "pFZP5JQG7iQjIQuC4Bku",
|
|
2267
|
+
serena: "pMsXgVXv3BLzUgSXRplE",
|
|
2268
|
+
adam: "pNInz6obpgDQGcFmaJgB",
|
|
2269
|
+
nicole: "piTKgcLEGmPE4e6mEKli",
|
|
2270
|
+
bill: "pqHfZKP75CvOlQylNhV4",
|
|
2271
|
+
jessie: "t0jbNlBVZ17f02VDIeMI",
|
|
2272
|
+
ryan: "wViXBPUzp2ZZixB1xQuM",
|
|
2273
|
+
sam: "yoZ06aMxZJJ28mfd3POQ",
|
|
2274
|
+
glinda: "z9fAnlkpzviPz146aGWa",
|
|
2275
|
+
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
2276
|
+
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
2277
|
+
alloy: "21m00Tcm4TlvDq8ikWAM"
|
|
2278
|
+
};
|
|
2279
|
+
var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
2280
|
+
function resolveVoiceId(voice) {
|
|
2281
|
+
if (!voice) return voice;
|
|
2282
|
+
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
2283
|
+
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
2284
|
+
}
|
|
2285
|
+
var ElevenLabsTTS = class {
|
|
2286
|
+
constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
|
|
2287
|
+
this.apiKey = apiKey;
|
|
2288
|
+
this.modelId = modelId;
|
|
2289
|
+
this.outputFormat = outputFormat;
|
|
2290
|
+
this.voiceId = resolveVoiceId(voiceId);
|
|
2291
|
+
}
|
|
2292
|
+
voiceId;
|
|
2293
|
+
/**
|
|
2294
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2295
|
+
*
|
|
2296
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
2297
|
+
*/
|
|
2298
|
+
async synthesize(text) {
|
|
2299
|
+
const chunks = [];
|
|
2300
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
2301
|
+
chunks.push(chunk);
|
|
2302
|
+
}
|
|
2303
|
+
return Buffer.concat(chunks);
|
|
2304
|
+
}
|
|
2305
|
+
/**
|
|
2306
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2307
|
+
*
|
|
2308
|
+
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
2309
|
+
* configured to).
|
|
2310
|
+
*/
|
|
2311
|
+
async *synthesizeStream(text) {
|
|
2312
|
+
const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
|
|
2313
|
+
const response = await fetch(url, {
|
|
2314
|
+
method: "POST",
|
|
2315
|
+
headers: {
|
|
2316
|
+
"xi-api-key": this.apiKey,
|
|
2317
|
+
"Content-Type": "application/json"
|
|
2318
|
+
},
|
|
2319
|
+
body: JSON.stringify({ text, model_id: this.modelId }),
|
|
2320
|
+
signal: AbortSignal.timeout(3e4)
|
|
2321
|
+
});
|
|
2322
|
+
if (!response.ok) {
|
|
2323
|
+
const body = await response.text();
|
|
2324
|
+
throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
|
|
2325
|
+
}
|
|
2326
|
+
if (!response.body) {
|
|
2327
|
+
throw new Error("ElevenLabs TTS: no response body");
|
|
2328
|
+
}
|
|
2329
|
+
const reader = response.body.getReader();
|
|
2330
|
+
try {
|
|
2331
|
+
while (true) {
|
|
2332
|
+
const { done, value } = await reader.read();
|
|
2333
|
+
if (done) break;
|
|
2334
|
+
if (value && value.length > 0) {
|
|
2335
|
+
yield Buffer.from(value);
|
|
2336
|
+
}
|
|
2337
|
+
}
|
|
2338
|
+
} finally {
|
|
2339
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
2340
|
+
});
|
|
2341
|
+
reader.releaseLock();
|
|
2342
|
+
}
|
|
2343
|
+
}
|
|
2344
|
+
};
|
|
2345
|
+
|
|
2346
|
+
// src/tts/elevenlabs.ts
|
|
2347
|
+
var TTS = class extends ElevenLabsTTS {
|
|
2348
|
+
constructor(opts = {}) {
|
|
2349
|
+
const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
2350
|
+
if (!key) {
|
|
2351
|
+
throw new Error(
|
|
2352
|
+
"ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
2353
|
+
);
|
|
2354
|
+
}
|
|
2355
|
+
super(
|
|
2356
|
+
key,
|
|
2357
|
+
opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
|
|
2358
|
+
opts.modelId ?? "eleven_turbo_v2_5",
|
|
2359
|
+
opts.outputFormat ?? "pcm_16000"
|
|
2360
|
+
);
|
|
2361
|
+
}
|
|
2362
|
+
};
|
|
2363
|
+
|
|
2364
|
+
// src/providers/openai-tts.ts
|
|
2365
|
+
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
2366
|
+
var OpenAITTS = class _OpenAITTS {
|
|
2367
|
+
constructor(apiKey, voice = "alloy", model = "tts-1") {
|
|
2368
|
+
this.apiKey = apiKey;
|
|
2369
|
+
this.voice = voice;
|
|
2370
|
+
this.model = model;
|
|
2371
|
+
}
|
|
2372
|
+
/**
|
|
2373
|
+
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2374
|
+
*
|
|
2375
|
+
* For large chunks (or when latency matters) call `synthesizeStream` instead.
|
|
2376
|
+
*/
|
|
2377
|
+
async synthesize(text) {
|
|
2378
|
+
const chunks = [];
|
|
2379
|
+
for await (const chunk of this.synthesizeStream(text)) {
|
|
2380
|
+
chunks.push(chunk);
|
|
2381
|
+
}
|
|
2382
|
+
return Buffer.concat(chunks);
|
|
2383
|
+
}
|
|
2384
|
+
/**
|
|
2385
|
+
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2386
|
+
*
|
|
2387
|
+
* OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
|
|
2388
|
+
* yielding so the output is ready for telephony pipelines.
|
|
2389
|
+
*
|
|
2390
|
+
* The resampler carries state (buffered samples + odd trailing byte)
|
|
2391
|
+
* between chunks — without that state cross-chunk sample alignment drifts
|
|
2392
|
+
* and the caller hears pops / dropped audio (BUG #23, mirror of the
|
|
2393
|
+
* Python `audioop.ratecv` fix).
|
|
2394
|
+
*/
|
|
2395
|
+
async *synthesizeStream(text) {
|
|
2396
|
+
const response = await fetch(OPENAI_TTS_URL, {
|
|
2397
|
+
method: "POST",
|
|
2398
|
+
headers: {
|
|
2399
|
+
"Authorization": `Bearer ${this.apiKey}`,
|
|
2400
|
+
"Content-Type": "application/json"
|
|
2401
|
+
},
|
|
2402
|
+
body: JSON.stringify({
|
|
2403
|
+
model: this.model,
|
|
2404
|
+
input: text,
|
|
2405
|
+
voice: this.voice,
|
|
2406
|
+
response_format: "pcm"
|
|
2407
|
+
}),
|
|
2408
|
+
signal: AbortSignal.timeout(3e4)
|
|
2409
|
+
});
|
|
2410
|
+
if (!response.ok) {
|
|
2411
|
+
const body = await response.text();
|
|
2412
|
+
throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
|
|
2413
|
+
}
|
|
2414
|
+
if (!response.body) {
|
|
2415
|
+
throw new Error("OpenAI TTS: no response body");
|
|
2416
|
+
}
|
|
2417
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
2418
|
+
const reader = response.body.getReader();
|
|
2419
|
+
try {
|
|
2420
|
+
while (true) {
|
|
2421
|
+
const { done, value } = await reader.read();
|
|
2422
|
+
if (done) break;
|
|
2423
|
+
if (value && value.length > 0) {
|
|
2424
|
+
const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
|
|
2425
|
+
if (out.length > 0) yield out;
|
|
2426
|
+
}
|
|
2427
|
+
}
|
|
2428
|
+
if (ctx.leftover.length > 0) {
|
|
2429
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
2430
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
2431
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
2432
|
+
}
|
|
2433
|
+
yield tail;
|
|
2434
|
+
}
|
|
2435
|
+
} finally {
|
|
2436
|
+
if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
|
|
2437
|
+
});
|
|
2438
|
+
reader.releaseLock();
|
|
2439
|
+
}
|
|
2440
|
+
}
|
|
2441
|
+
/**
|
|
2442
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
|
|
2443
|
+
* state so the 3:2 pattern doesn't reset at every network read.
|
|
2444
|
+
*/
|
|
2445
|
+
static resampleStreaming(audio, ctx) {
|
|
2446
|
+
let buf;
|
|
2447
|
+
if (ctx.carryByte !== null) {
|
|
2448
|
+
buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
|
|
2449
|
+
ctx.carryByte = null;
|
|
2450
|
+
} else {
|
|
2451
|
+
buf = audio;
|
|
2452
|
+
}
|
|
2453
|
+
if (buf.length % 2 === 1) {
|
|
2454
|
+
ctx.carryByte = buf[buf.length - 1];
|
|
2455
|
+
buf = buf.subarray(0, buf.length - 1);
|
|
2456
|
+
}
|
|
2457
|
+
if (buf.length === 0 && ctx.leftover.length === 0) {
|
|
2458
|
+
return Buffer.alloc(0);
|
|
2459
|
+
}
|
|
2460
|
+
const sampleCount = buf.length / 2;
|
|
2461
|
+
const samples = ctx.leftover.slice();
|
|
2462
|
+
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2463
|
+
samples.push(buf.readInt16LE(i2 * 2));
|
|
2464
|
+
}
|
|
2465
|
+
const out = [];
|
|
2466
|
+
let i = 0;
|
|
2467
|
+
while (i + 2 < samples.length) {
|
|
2468
|
+
out.push(samples[i]);
|
|
2469
|
+
out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
|
|
2470
|
+
i += 3;
|
|
2471
|
+
}
|
|
2472
|
+
ctx.leftover = samples.slice(i);
|
|
2473
|
+
const buffer = Buffer.alloc(out.length * 2);
|
|
2474
|
+
for (let j = 0; j < out.length; j++) {
|
|
2475
|
+
buffer.writeInt16LE(out[j], j * 2);
|
|
2476
|
+
}
|
|
2477
|
+
return buffer;
|
|
2478
|
+
}
|
|
2479
|
+
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
2480
|
+
static resample24kTo16k(audio) {
|
|
2481
|
+
const ctx = { carryByte: null, leftover: [] };
|
|
2482
|
+
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
2483
|
+
if (ctx.leftover.length === 0) return out;
|
|
2484
|
+
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
2485
|
+
for (let i = 0; i < ctx.leftover.length; i++) {
|
|
2486
|
+
tail.writeInt16LE(ctx.leftover[i], i * 2);
|
|
2487
|
+
}
|
|
2488
|
+
return Buffer.concat([out, tail]);
|
|
2489
|
+
}
|
|
2490
|
+
};
|
|
2491
|
+
|
|
2492
|
+
// src/tts/openai.ts
|
|
2493
|
+
var TTS2 = class extends OpenAITTS {
|
|
2494
|
+
constructor(opts = {}) {
|
|
2495
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
2496
|
+
if (!key) {
|
|
2497
|
+
throw new Error(
|
|
2498
|
+
"OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
2499
|
+
);
|
|
2500
|
+
}
|
|
2501
|
+
super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
|
|
2502
|
+
}
|
|
2503
|
+
};
|
|
1836
2504
|
|
|
1837
2505
|
// src/providers/cartesia-tts.ts
|
|
1838
2506
|
var CARTESIA_BASE_URL = "https://api.cartesia.ai";
|
|
@@ -1932,6 +2600,21 @@ var CartesiaTTS = class {
|
|
|
1932
2600
|
}
|
|
1933
2601
|
};
|
|
1934
2602
|
|
|
2603
|
+
// src/tts/cartesia.ts
|
|
2604
|
+
var TTS3 = class extends CartesiaTTS {
|
|
2605
|
+
constructor(opts = {}) {
|
|
2606
|
+
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
2607
|
+
if (!key) {
|
|
2608
|
+
throw new Error(
|
|
2609
|
+
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
2610
|
+
);
|
|
2611
|
+
}
|
|
2612
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2613
|
+
void _ignored;
|
|
2614
|
+
super(key, rest);
|
|
2615
|
+
}
|
|
2616
|
+
};
|
|
2617
|
+
|
|
1935
2618
|
// src/providers/rime-tts.ts
|
|
1936
2619
|
var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
|
|
1937
2620
|
var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
|
|
@@ -2059,6 +2742,21 @@ var RimeTTS = class {
|
|
|
2059
2742
|
}
|
|
2060
2743
|
};
|
|
2061
2744
|
|
|
2745
|
+
// src/tts/rime.ts
|
|
2746
|
+
var TTS4 = class extends RimeTTS {
|
|
2747
|
+
constructor(opts = {}) {
|
|
2748
|
+
const key = opts.apiKey ?? process.env.RIME_API_KEY;
|
|
2749
|
+
if (!key) {
|
|
2750
|
+
throw new Error(
|
|
2751
|
+
"Rime TTS requires an apiKey. Pass { apiKey: '...' } or set RIME_API_KEY in the environment."
|
|
2752
|
+
);
|
|
2753
|
+
}
|
|
2754
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2755
|
+
void _ignored;
|
|
2756
|
+
super(key, rest);
|
|
2757
|
+
}
|
|
2758
|
+
};
|
|
2759
|
+
|
|
2062
2760
|
// src/providers/lmnt-tts.ts
|
|
2063
2761
|
var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
|
|
2064
2762
|
var LMNTTTS = class {
|
|
@@ -2137,6 +2835,119 @@ var LMNTTTS = class {
|
|
|
2137
2835
|
}
|
|
2138
2836
|
};
|
|
2139
2837
|
|
|
2838
|
+
// src/tts/lmnt.ts
|
|
2839
|
+
var TTS5 = class extends LMNTTTS {
|
|
2840
|
+
constructor(opts = {}) {
|
|
2841
|
+
const key = opts.apiKey ?? process.env.LMNT_API_KEY;
|
|
2842
|
+
if (!key) {
|
|
2843
|
+
throw new Error(
|
|
2844
|
+
"LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
|
|
2845
|
+
);
|
|
2846
|
+
}
|
|
2847
|
+
const { apiKey: _ignored, ...rest } = opts;
|
|
2848
|
+
void _ignored;
|
|
2849
|
+
super(key, rest);
|
|
2850
|
+
}
|
|
2851
|
+
};
|
|
2852
|
+
|
|
2853
|
+
// src/carriers/twilio.ts
|
|
2854
|
+
var Carrier = class {
|
|
2855
|
+
kind = "twilio";
|
|
2856
|
+
accountSid;
|
|
2857
|
+
authToken;
|
|
2858
|
+
constructor(opts = {}) {
|
|
2859
|
+
const sid = opts.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
|
|
2860
|
+
const tok = opts.authToken ?? process.env.TWILIO_AUTH_TOKEN;
|
|
2861
|
+
if (!sid) {
|
|
2862
|
+
throw new Error(
|
|
2863
|
+
"Twilio carrier requires accountSid. Pass { accountSid: 'AC...' } or set TWILIO_ACCOUNT_SID in the environment."
|
|
2864
|
+
);
|
|
2865
|
+
}
|
|
2866
|
+
if (!tok) {
|
|
2867
|
+
throw new Error(
|
|
2868
|
+
"Twilio carrier requires authToken. Pass { authToken: '...' } or set TWILIO_AUTH_TOKEN in the environment."
|
|
2869
|
+
);
|
|
2870
|
+
}
|
|
2871
|
+
this.accountSid = sid;
|
|
2872
|
+
this.authToken = tok;
|
|
2873
|
+
}
|
|
2874
|
+
};
|
|
2875
|
+
|
|
2876
|
+
// src/carriers/telnyx.ts
|
|
2877
|
+
var Carrier2 = class {
|
|
2878
|
+
kind = "telnyx";
|
|
2879
|
+
apiKey;
|
|
2880
|
+
connectionId;
|
|
2881
|
+
publicKey;
|
|
2882
|
+
constructor(opts = {}) {
|
|
2883
|
+
const key = opts.apiKey ?? process.env.TELNYX_API_KEY;
|
|
2884
|
+
const conn = opts.connectionId ?? process.env.TELNYX_CONNECTION_ID;
|
|
2885
|
+
const pub = opts.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
|
|
2886
|
+
if (!key) {
|
|
2887
|
+
throw new Error(
|
|
2888
|
+
"Telnyx carrier requires apiKey. Pass { apiKey: '...' } or set TELNYX_API_KEY in the environment."
|
|
2889
|
+
);
|
|
2890
|
+
}
|
|
2891
|
+
if (!conn) {
|
|
2892
|
+
throw new Error(
|
|
2893
|
+
"Telnyx carrier requires connectionId. Pass { connectionId: '...' } or set TELNYX_CONNECTION_ID in the environment."
|
|
2894
|
+
);
|
|
2895
|
+
}
|
|
2896
|
+
this.apiKey = key;
|
|
2897
|
+
this.connectionId = conn;
|
|
2898
|
+
this.publicKey = pub;
|
|
2899
|
+
}
|
|
2900
|
+
};
|
|
2901
|
+
|
|
2902
|
+
// src/public-api.ts
|
|
2903
|
+
var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
|
|
2904
|
+
var Guardrail = class {
|
|
2905
|
+
name;
|
|
2906
|
+
blockedTerms;
|
|
2907
|
+
check;
|
|
2908
|
+
replacement;
|
|
2909
|
+
constructor(opts) {
|
|
2910
|
+
if (!opts.name) {
|
|
2911
|
+
throw new Error("Guardrail requires a non-empty name.");
|
|
2912
|
+
}
|
|
2913
|
+
this.name = opts.name;
|
|
2914
|
+
if (opts.blockedTerms) this.blockedTerms = opts.blockedTerms;
|
|
2915
|
+
if (opts.check) this.check = opts.check;
|
|
2916
|
+
this.replacement = opts.replacement ?? DEFAULT_GUARDRAIL_REPLACEMENT;
|
|
2917
|
+
}
|
|
2918
|
+
};
|
|
2919
|
+
function guardrail(opts) {
|
|
2920
|
+
return new Guardrail(opts);
|
|
2921
|
+
}
|
|
2922
|
+
var Tool = class {
|
|
2923
|
+
name;
|
|
2924
|
+
description;
|
|
2925
|
+
parameters;
|
|
2926
|
+
handler;
|
|
2927
|
+
webhookUrl;
|
|
2928
|
+
constructor(opts) {
|
|
2929
|
+
if (!opts.name) {
|
|
2930
|
+
throw new Error("Tool requires a non-empty name.");
|
|
2931
|
+
}
|
|
2932
|
+
const hasHandler = typeof opts.handler === "function";
|
|
2933
|
+
const hasWebhook = typeof opts.webhookUrl === "string" && opts.webhookUrl.length > 0;
|
|
2934
|
+
if (!hasHandler && !hasWebhook) {
|
|
2935
|
+
throw new Error("Tool requires either handler or webhookUrl.");
|
|
2936
|
+
}
|
|
2937
|
+
if (hasHandler && hasWebhook) {
|
|
2938
|
+
throw new Error("Tool accepts handler OR webhookUrl, not both.");
|
|
2939
|
+
}
|
|
2940
|
+
this.name = opts.name;
|
|
2941
|
+
this.description = opts.description ?? "";
|
|
2942
|
+
this.parameters = opts.parameters ?? { type: "object", properties: {} };
|
|
2943
|
+
if (hasHandler) this.handler = opts.handler;
|
|
2944
|
+
if (hasWebhook) this.webhookUrl = opts.webhookUrl;
|
|
2945
|
+
}
|
|
2946
|
+
};
|
|
2947
|
+
function tool(opts) {
|
|
2948
|
+
return new Tool(opts);
|
|
2949
|
+
}
|
|
2950
|
+
|
|
2140
2951
|
// src/chat-context.ts
|
|
2141
2952
|
import { randomUUID } from "crypto";
|
|
2142
2953
|
function generateId() {
|
|
@@ -2747,31 +3558,35 @@ function isAudioConfig(value) {
|
|
|
2747
3558
|
}
|
|
2748
3559
|
export {
|
|
2749
3560
|
AllProvidersFailedError,
|
|
2750
|
-
AssemblyAISTT,
|
|
3561
|
+
STT5 as AssemblyAISTT,
|
|
2751
3562
|
AuthenticationError,
|
|
2752
3563
|
BackgroundAudioPlayer,
|
|
2753
3564
|
BuiltinAudioClip,
|
|
2754
3565
|
CallMetricsAccumulator,
|
|
2755
|
-
CartesiaSTT,
|
|
2756
|
-
CartesiaTTS,
|
|
3566
|
+
STT3 as CartesiaSTT,
|
|
3567
|
+
TTS3 as CartesiaTTS,
|
|
2757
3568
|
ChatContext,
|
|
3569
|
+
CloudflareTunnel,
|
|
2758
3570
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
2759
3571
|
DEFAULT_PRICING,
|
|
2760
3572
|
DTMF_EVENTS,
|
|
2761
|
-
DeepgramSTT,
|
|
3573
|
+
STT as DeepgramSTT,
|
|
3574
|
+
ConvAI as ElevenLabsConvAI,
|
|
2762
3575
|
ElevenLabsConvAIAdapter,
|
|
2763
|
-
ElevenLabsTTS,
|
|
3576
|
+
TTS as ElevenLabsTTS,
|
|
2764
3577
|
FallbackLLMProvider,
|
|
2765
3578
|
GEMINI_DEFAULT_INPUT_SR,
|
|
2766
3579
|
GEMINI_DEFAULT_OUTPUT_SR,
|
|
2767
3580
|
GeminiLiveAdapter,
|
|
3581
|
+
Guardrail,
|
|
2768
3582
|
IVRActivity,
|
|
2769
3583
|
LLMLoop,
|
|
2770
|
-
LMNTTTS,
|
|
3584
|
+
TTS5 as LMNTTTS,
|
|
2771
3585
|
MetricsStore,
|
|
2772
3586
|
OpenAILLMProvider,
|
|
3587
|
+
Realtime as OpenAIRealtime,
|
|
2773
3588
|
OpenAIRealtimeAdapter,
|
|
2774
|
-
OpenAITTS,
|
|
3589
|
+
TTS2 as OpenAITTS,
|
|
2775
3590
|
PartialStreamError,
|
|
2776
3591
|
Patter,
|
|
2777
3592
|
PatterConnectionError,
|
|
@@ -2779,15 +3594,19 @@ export {
|
|
|
2779
3594
|
PipelineHookExecutor,
|
|
2780
3595
|
ProvisionError,
|
|
2781
3596
|
RemoteMessageHandler,
|
|
2782
|
-
RimeTTS,
|
|
3597
|
+
TTS4 as RimeTTS,
|
|
2783
3598
|
SentenceChunker,
|
|
2784
|
-
SonioxSTT,
|
|
3599
|
+
STT4 as SonioxSTT,
|
|
3600
|
+
Static as StaticTunnel,
|
|
3601
|
+
Carrier2 as Telnyx,
|
|
2785
3602
|
TestSession,
|
|
2786
3603
|
TfidfLoopDetector,
|
|
3604
|
+
Tool,
|
|
3605
|
+
Carrier as Twilio,
|
|
2787
3606
|
ULTRAVOX_DEFAULT_API_BASE,
|
|
2788
3607
|
ULTRAVOX_DEFAULT_SR,
|
|
2789
3608
|
UltravoxRealtimeAdapter,
|
|
2790
|
-
WhisperSTT,
|
|
3609
|
+
STT2 as WhisperSTT,
|
|
2791
3610
|
builtinClipPath,
|
|
2792
3611
|
calculateRealtimeCost,
|
|
2793
3612
|
calculateSttCost,
|
|
@@ -2803,6 +3622,7 @@ export {
|
|
|
2803
3622
|
filterMarkdown,
|
|
2804
3623
|
formatDtmf,
|
|
2805
3624
|
getLogger,
|
|
3625
|
+
guardrail,
|
|
2806
3626
|
isRemoteUrl,
|
|
2807
3627
|
isWebSocketUrl,
|
|
2808
3628
|
makeAuthMiddleware,
|
|
@@ -2824,5 +3644,6 @@ export {
|
|
|
2824
3644
|
selectSoundFromList,
|
|
2825
3645
|
setLogger,
|
|
2826
3646
|
startTunnel,
|
|
3647
|
+
tool,
|
|
2827
3648
|
whisper
|
|
2828
3649
|
};
|