getpatter 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/banner-3GNZ6VQK.mjs +19 -0
- package/dist/{carrier-config-CPG5CROM.mjs → carrier-config-33HQ2W4V.mjs} +2 -2
- package/dist/{chunk-B6C3KIBG.mjs → chunk-FIFIWBL7.mjs} +3226 -569
- package/dist/chunk-QHHBUCMT.mjs +25 -0
- package/dist/{chunk-AKQFOFLG.mjs → chunk-SEMKNPCD.mjs} +7 -2
- package/dist/{chunk-FMNRCP5X.mjs → chunk-VJVDG4V5.mjs} +1 -1
- package/dist/cli.js +133 -15
- package/dist/dist-YRCCJQ26.mjs +1631 -0
- package/dist/index.d.mts +2000 -289
- package/dist/index.d.ts +2000 -289
- package/dist/index.js +8019 -1984
- package/dist/index.mjs +1885 -618
- package/dist/node-cron-6PRPSBG5.mjs +1348 -0
- package/dist/onnxruntime_binding-4Q2WV26X.node +0 -0
- package/dist/onnxruntime_binding-5PVQ7RFC.node +0 -0
- package/dist/onnxruntime_binding-FNOPH2XG.node +0 -0
- package/dist/onnxruntime_binding-HSGOY4IT.node +0 -0
- package/dist/onnxruntime_binding-OY2N3XIT.node +0 -0
- package/dist/onnxruntime_binding-ZPEJPBCV.node +0 -0
- package/dist/{persistence-CYIGNHSU.mjs → persistence-LQBYQPQQ.mjs} +1 -1
- package/dist/test-mode-MVJ3SKG4.mjs +8 -0
- package/dist/tunnel-UVR3PPAU.mjs +8 -0
- package/package.json +10 -3
- package/dist/chunk-OOIUSZB4.mjs +0 -37
- package/dist/node-cron-373UVDIO.mjs +0 -935
- package/dist/test-mode-JZMYE5HY.mjs +0 -8
- package/dist/tunnel-O7ICMSTP.mjs +0 -8
package/dist/index.mjs
CHANGED
|
@@ -3,21 +3,37 @@ import {
|
|
|
3
3
|
} from "./chunk-AFUYSNDH.mjs";
|
|
4
4
|
import {
|
|
5
5
|
startTunnel
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-SEMKNPCD.mjs";
|
|
7
7
|
import {
|
|
8
|
+
AuthenticationError,
|
|
8
9
|
CallMetricsAccumulator,
|
|
9
10
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
10
11
|
DEFAULT_PRICING,
|
|
11
12
|
DeepgramSTT,
|
|
13
|
+
DefaultToolExecutor,
|
|
12
14
|
ElevenLabsConvAIAdapter,
|
|
13
15
|
EmbeddedServer,
|
|
16
|
+
EventBus,
|
|
14
17
|
LLMLoop,
|
|
15
18
|
MetricsStore,
|
|
16
19
|
OpenAILLMProvider,
|
|
17
20
|
OpenAIRealtimeAdapter,
|
|
21
|
+
PatterConnectionError,
|
|
22
|
+
PatterError,
|
|
23
|
+
PcmCarry,
|
|
18
24
|
PipelineHookExecutor,
|
|
25
|
+
ProvisionError,
|
|
26
|
+
RateLimitError,
|
|
19
27
|
RemoteMessageHandler,
|
|
28
|
+
SPAN_BARGEIN,
|
|
29
|
+
SPAN_CALL,
|
|
30
|
+
SPAN_ENDPOINT,
|
|
31
|
+
SPAN_LLM,
|
|
32
|
+
SPAN_STT,
|
|
33
|
+
SPAN_TOOL,
|
|
34
|
+
SPAN_TTS,
|
|
20
35
|
SentenceChunker,
|
|
36
|
+
StatefulResampler,
|
|
21
37
|
TestSession,
|
|
22
38
|
calculateRealtimeCost,
|
|
23
39
|
calculateSttCost,
|
|
@@ -25,7 +41,12 @@ import {
|
|
|
25
41
|
calculateTtsCost,
|
|
26
42
|
callsToCsv,
|
|
27
43
|
callsToJson,
|
|
44
|
+
createResampler16kTo8k,
|
|
45
|
+
createResampler24kTo16k,
|
|
46
|
+
createResampler8kTo16k,
|
|
47
|
+
initTracing,
|
|
28
48
|
isRemoteUrl,
|
|
49
|
+
isTracingEnabled,
|
|
29
50
|
isWebSocketUrl,
|
|
30
51
|
makeAuthMiddleware,
|
|
31
52
|
mergePricing,
|
|
@@ -35,153 +56,14 @@ import {
|
|
|
35
56
|
pcm16ToMulaw,
|
|
36
57
|
resample16kTo8k,
|
|
37
58
|
resample24kTo16k,
|
|
38
|
-
resample8kTo16k
|
|
39
|
-
|
|
59
|
+
resample8kTo16k,
|
|
60
|
+
startSpan
|
|
61
|
+
} from "./chunk-FIFIWBL7.mjs";
|
|
40
62
|
import {
|
|
41
63
|
getLogger,
|
|
42
64
|
setLogger
|
|
43
|
-
} from "./chunk-
|
|
44
|
-
import "./chunk-
|
|
45
|
-
|
|
46
|
-
// src/connection.ts
|
|
47
|
-
import WebSocket from "ws";
|
|
48
|
-
|
|
49
|
-
// src/errors.ts
|
|
50
|
-
var PatterError = class extends Error {
|
|
51
|
-
constructor(message) {
|
|
52
|
-
super(message);
|
|
53
|
-
this.name = "PatterError";
|
|
54
|
-
}
|
|
55
|
-
};
|
|
56
|
-
var PatterConnectionError = class extends PatterError {
|
|
57
|
-
constructor(message) {
|
|
58
|
-
super(message);
|
|
59
|
-
this.name = "PatterConnectionError";
|
|
60
|
-
}
|
|
61
|
-
};
|
|
62
|
-
var AuthenticationError = class extends PatterError {
|
|
63
|
-
constructor(message) {
|
|
64
|
-
super(message);
|
|
65
|
-
this.name = "AuthenticationError";
|
|
66
|
-
}
|
|
67
|
-
};
|
|
68
|
-
var ProvisionError = class extends PatterError {
|
|
69
|
-
constructor(message) {
|
|
70
|
-
super(message);
|
|
71
|
-
this.name = "ProvisionError";
|
|
72
|
-
}
|
|
73
|
-
};
|
|
74
|
-
|
|
75
|
-
// src/connection.ts
|
|
76
|
-
var DEFAULT_BACKEND_URL = "wss://api.getpatter.com";
|
|
77
|
-
var PatterConnection = class {
|
|
78
|
-
apiKey;
|
|
79
|
-
backendUrl;
|
|
80
|
-
wsUrl;
|
|
81
|
-
ws = null;
|
|
82
|
-
onMessage = null;
|
|
83
|
-
onCallStart = null;
|
|
84
|
-
onCallEnd = null;
|
|
85
|
-
constructor(apiKey, backendUrl = DEFAULT_BACKEND_URL) {
|
|
86
|
-
this.apiKey = apiKey;
|
|
87
|
-
this.backendUrl = backendUrl.replace(/\/+$/, "");
|
|
88
|
-
this.wsUrl = `${this.backendUrl}/ws/sdk`;
|
|
89
|
-
}
|
|
90
|
-
get isConnected() {
|
|
91
|
-
return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
|
|
92
|
-
}
|
|
93
|
-
async connect(options) {
|
|
94
|
-
this.onMessage = options.onMessage;
|
|
95
|
-
this.onCallStart = options.onCallStart ?? null;
|
|
96
|
-
this.onCallEnd = options.onCallEnd ?? null;
|
|
97
|
-
return new Promise((resolve, reject) => {
|
|
98
|
-
this.ws = new WebSocket(this.wsUrl, {
|
|
99
|
-
headers: { "X-API-Key": this.apiKey }
|
|
100
|
-
});
|
|
101
|
-
const onError = (err) => {
|
|
102
|
-
this.ws?.off("error", onError);
|
|
103
|
-
reject(new PatterConnectionError(`Failed to connect: ${err.message}`));
|
|
104
|
-
};
|
|
105
|
-
this.ws.once("open", () => {
|
|
106
|
-
this.ws?.off("error", onError);
|
|
107
|
-
this.setupListeners();
|
|
108
|
-
resolve();
|
|
109
|
-
});
|
|
110
|
-
this.ws.on("error", onError);
|
|
111
|
-
});
|
|
112
|
-
}
|
|
113
|
-
setupListeners() {
|
|
114
|
-
if (!this.ws) return;
|
|
115
|
-
this.ws.on("error", (err) => {
|
|
116
|
-
getLogger().error(`WebSocket error: ${err.message}`);
|
|
117
|
-
});
|
|
118
|
-
this.ws.on("message", async (data) => {
|
|
119
|
-
const raw = data.toString();
|
|
120
|
-
let parsed;
|
|
121
|
-
try {
|
|
122
|
-
parsed = JSON.parse(raw);
|
|
123
|
-
} catch {
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
126
|
-
const msgType = parsed.type;
|
|
127
|
-
if (msgType === "message" && this.onMessage) {
|
|
128
|
-
const msg = {
|
|
129
|
-
text: parsed.text,
|
|
130
|
-
callId: parsed.call_id,
|
|
131
|
-
caller: parsed.caller ?? ""
|
|
132
|
-
};
|
|
133
|
-
try {
|
|
134
|
-
const response = await this.onMessage(msg);
|
|
135
|
-
if (response != null) {
|
|
136
|
-
await this.sendResponse(msg.callId, response);
|
|
137
|
-
}
|
|
138
|
-
} catch {
|
|
139
|
-
}
|
|
140
|
-
} else if (msgType === "call_start" && this.onCallStart) {
|
|
141
|
-
await this.onCallStart(parsed);
|
|
142
|
-
} else if (msgType === "call_end" && this.onCallEnd) {
|
|
143
|
-
await this.onCallEnd(parsed);
|
|
144
|
-
}
|
|
145
|
-
});
|
|
146
|
-
this.ws.on("close", () => {
|
|
147
|
-
this.ws = null;
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
async sendResponse(callId, text) {
|
|
151
|
-
if (!this.ws) throw new PatterConnectionError("Not connected");
|
|
152
|
-
this.ws.send(JSON.stringify({ type: "response", call_id: callId, text }));
|
|
153
|
-
}
|
|
154
|
-
async requestCall(fromNumber, toNumber, firstMessage = "") {
|
|
155
|
-
if (!this.ws) throw new PatterConnectionError("Not connected");
|
|
156
|
-
this.ws.send(
|
|
157
|
-
JSON.stringify({
|
|
158
|
-
type: "call",
|
|
159
|
-
from: fromNumber,
|
|
160
|
-
to: toNumber,
|
|
161
|
-
first_message: firstMessage
|
|
162
|
-
})
|
|
163
|
-
);
|
|
164
|
-
}
|
|
165
|
-
async disconnect() {
|
|
166
|
-
if (this.ws) {
|
|
167
|
-
this.ws.close();
|
|
168
|
-
this.ws = null;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
parseMessage(raw) {
|
|
172
|
-
try {
|
|
173
|
-
const data = JSON.parse(raw);
|
|
174
|
-
if (data.type !== "message") return null;
|
|
175
|
-
return {
|
|
176
|
-
text: data.text,
|
|
177
|
-
callId: data.call_id,
|
|
178
|
-
caller: data.caller ?? ""
|
|
179
|
-
};
|
|
180
|
-
} catch {
|
|
181
|
-
return null;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
};
|
|
65
|
+
} from "./chunk-VJVDG4V5.mjs";
|
|
66
|
+
import "./chunk-QHHBUCMT.mjs";
|
|
185
67
|
|
|
186
68
|
// src/engines/openai.ts
|
|
187
69
|
var Realtime = class {
|
|
@@ -241,86 +123,77 @@ var Static = class {
|
|
|
241
123
|
this.hostname = opts.hostname;
|
|
242
124
|
}
|
|
243
125
|
};
|
|
126
|
+
var Ngrok = class {
|
|
127
|
+
kind = "ngrok";
|
|
128
|
+
hostname;
|
|
129
|
+
constructor(opts = {}) {
|
|
130
|
+
this.hostname = opts.hostname ?? "";
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Returns the configured hostname or throws if the marker was constructed
|
|
134
|
+
* without one. Patter does not start ngrok itself — the user is expected
|
|
135
|
+
* to either supply a hostname or run ngrok out-of-band.
|
|
136
|
+
*/
|
|
137
|
+
start() {
|
|
138
|
+
if (!this.hostname) {
|
|
139
|
+
throw new Error(
|
|
140
|
+
'Ngrok requires a hostname; pass new Ngrok({ hostname: "abc.ngrok.io" })'
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
return this.hostname;
|
|
144
|
+
}
|
|
145
|
+
};
|
|
244
146
|
|
|
245
147
|
// src/client.ts
|
|
246
|
-
var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
|
|
247
|
-
var DEFAULT_REST_URL = "https://api.getpatter.com";
|
|
248
|
-
function sttConfigToDict(cfg) {
|
|
249
|
-
const out = {
|
|
250
|
-
provider: cfg.provider,
|
|
251
|
-
api_key: cfg.apiKey,
|
|
252
|
-
language: cfg.language
|
|
253
|
-
};
|
|
254
|
-
if (cfg.options) out.options = { ...cfg.options };
|
|
255
|
-
return out;
|
|
256
|
-
}
|
|
257
|
-
function ttsConfigToDict(cfg) {
|
|
258
|
-
const out = {
|
|
259
|
-
provider: cfg.provider,
|
|
260
|
-
api_key: cfg.apiKey,
|
|
261
|
-
voice: cfg.voice
|
|
262
|
-
};
|
|
263
|
-
if (cfg.options) out.options = { ...cfg.options };
|
|
264
|
-
return out;
|
|
265
|
-
}
|
|
266
148
|
var Patter = class {
|
|
267
|
-
apiKey;
|
|
268
|
-
backendUrl;
|
|
269
|
-
restUrl;
|
|
270
|
-
connection;
|
|
271
|
-
mode;
|
|
272
149
|
localConfig;
|
|
273
150
|
embeddedServer = null;
|
|
274
151
|
tunnelHandle = null;
|
|
152
|
+
/**
|
|
153
|
+
* Live `MetricsStore` for the embedded server. Returns `null` before
|
|
154
|
+
* `serve()` is called. Exposed so integrations like `PatterTool` can
|
|
155
|
+
* subscribe to per-call lifecycle events (`call_initiated`,
|
|
156
|
+
* `call_start`, `call_end`).
|
|
157
|
+
*/
|
|
158
|
+
get metricsStore() {
|
|
159
|
+
return this.embeddedServer?.metricsStore ?? null;
|
|
160
|
+
}
|
|
275
161
|
constructor(options) {
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
162
|
+
if (options.apiKey !== void 0) {
|
|
163
|
+
throw new Error(
|
|
164
|
+
"Patter Cloud is not yet available in this SDK release. Use local mode with `carrier:` and `phoneNumber:`. Cloud mode will return in a future release."
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
if (!options.phoneNumber) {
|
|
168
|
+
throw new Error("Local mode requires phoneNumber");
|
|
169
|
+
}
|
|
170
|
+
if (!options.carrier) {
|
|
171
|
+
throw new Error(
|
|
172
|
+
"Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
|
|
173
|
+
);
|
|
174
|
+
}
|
|
175
|
+
const carrier = options.carrier;
|
|
176
|
+
const tunnel = options.tunnel;
|
|
177
|
+
let tunnelWebhookUrl;
|
|
178
|
+
if (tunnel instanceof Static) {
|
|
179
|
+
if (options.webhookUrl) {
|
|
284
180
|
throw new Error(
|
|
285
|
-
"
|
|
181
|
+
"Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
|
|
286
182
|
);
|
|
287
183
|
}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
this.mode = "local";
|
|
300
|
-
const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
|
|
301
|
-
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
302
|
-
this.localConfig = {
|
|
303
|
-
carrier,
|
|
304
|
-
phoneNumber: local.phoneNumber,
|
|
305
|
-
webhookUrl: normalizedWebhook,
|
|
306
|
-
tunnel: local.tunnel,
|
|
307
|
-
openaiKey: local.openaiKey
|
|
308
|
-
};
|
|
309
|
-
this.apiKey = "";
|
|
310
|
-
this.backendUrl = DEFAULT_BACKEND_URL2;
|
|
311
|
-
this.restUrl = DEFAULT_REST_URL;
|
|
312
|
-
this.connection = new PatterConnection("", DEFAULT_BACKEND_URL2);
|
|
313
|
-
} else {
|
|
314
|
-
const cloudOpts = options;
|
|
315
|
-
this.mode = "cloud";
|
|
316
|
-
this.localConfig = null;
|
|
317
|
-
this.apiKey = cloudOpts.apiKey;
|
|
318
|
-
this.backendUrl = cloudOpts.backendUrl ?? DEFAULT_BACKEND_URL2;
|
|
319
|
-
this.restUrl = cloudOpts.restUrl ?? DEFAULT_REST_URL;
|
|
320
|
-
this.connection = new PatterConnection(this.apiKey, this.backendUrl);
|
|
321
|
-
}
|
|
184
|
+
tunnelWebhookUrl = tunnel.hostname;
|
|
185
|
+
}
|
|
186
|
+
const rawWebhook = tunnelWebhookUrl ?? options.webhookUrl;
|
|
187
|
+
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
188
|
+
this.localConfig = {
|
|
189
|
+
carrier,
|
|
190
|
+
phoneNumber: options.phoneNumber,
|
|
191
|
+
webhookUrl: normalizedWebhook,
|
|
192
|
+
tunnel: options.tunnel,
|
|
193
|
+
openaiKey: options.openaiKey
|
|
194
|
+
};
|
|
322
195
|
}
|
|
323
|
-
// ===
|
|
196
|
+
// === Agent definition ===
|
|
324
197
|
agent(opts) {
|
|
325
198
|
let working = { ...opts };
|
|
326
199
|
if (opts.engine) {
|
|
@@ -337,7 +210,7 @@ var Patter = class {
|
|
|
337
210
|
model: working.model ?? engine.model,
|
|
338
211
|
voice: working.voice ?? engine.voice
|
|
339
212
|
};
|
|
340
|
-
if (
|
|
213
|
+
if (!this.localConfig.openaiKey) {
|
|
341
214
|
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
342
215
|
}
|
|
343
216
|
} else if (engine instanceof ConvAI) {
|
|
@@ -387,10 +260,8 @@ var Patter = class {
|
|
|
387
260
|
}
|
|
388
261
|
return working;
|
|
389
262
|
}
|
|
263
|
+
// === Serve / test / call ===
|
|
390
264
|
async serve(opts) {
|
|
391
|
-
if (this.mode !== "local" || !this.localConfig) {
|
|
392
|
-
throw new Error("serve() is only available in local mode");
|
|
393
|
-
}
|
|
394
265
|
if (!opts.agent || typeof opts.agent !== "object") {
|
|
395
266
|
throw new TypeError("agent is required. Use phone.agent() to create one.");
|
|
396
267
|
}
|
|
@@ -415,10 +286,13 @@ var Patter = class {
|
|
|
415
286
|
if (wantsCloudflared && webhookUrl) {
|
|
416
287
|
throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
|
|
417
288
|
}
|
|
289
|
+
const { showBanner } = await import("./banner-3GNZ6VQK.mjs");
|
|
290
|
+
showBanner();
|
|
418
291
|
if (wantsCloudflared) {
|
|
419
|
-
const { startTunnel: startTunnel2 } = await import("./tunnel-
|
|
292
|
+
const { startTunnel: startTunnel2 } = await import("./tunnel-UVR3PPAU.mjs");
|
|
420
293
|
this.tunnelHandle = await startTunnel2(port);
|
|
421
294
|
webhookUrl = this.tunnelHandle.hostname;
|
|
295
|
+
this.localConfig = { ...this.localConfig, webhookUrl };
|
|
422
296
|
}
|
|
423
297
|
if (!webhookUrl) {
|
|
424
298
|
throw new Error(
|
|
@@ -427,7 +301,7 @@ var Patter = class {
|
|
|
427
301
|
}
|
|
428
302
|
const carrier = this.localConfig.carrier;
|
|
429
303
|
const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
|
|
430
|
-
const { autoConfigureCarrier } = await import("./carrier-config-
|
|
304
|
+
const { autoConfigureCarrier } = await import("./carrier-config-33HQ2W4V.mjs");
|
|
431
305
|
await autoConfigureCarrier({
|
|
432
306
|
telephonyProvider,
|
|
433
307
|
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
@@ -464,138 +338,56 @@ var Patter = class {
|
|
|
464
338
|
await this.embeddedServer.start(port);
|
|
465
339
|
}
|
|
466
340
|
async test(opts) {
|
|
467
|
-
|
|
468
|
-
throw new Error("test() is only available in local mode");
|
|
469
|
-
}
|
|
470
|
-
const { TestSession: TestSession2 } = await import("./test-mode-JZMYE5HY.mjs");
|
|
341
|
+
const { TestSession: TestSession2 } = await import("./test-mode-MVJ3SKG4.mjs");
|
|
471
342
|
const session = new TestSession2();
|
|
472
343
|
await session.run({
|
|
473
344
|
agent: opts.agent,
|
|
474
|
-
openaiKey: this.localConfig
|
|
345
|
+
openaiKey: this.localConfig.openaiKey,
|
|
475
346
|
onMessage: typeof opts.onMessage === "function" ? opts.onMessage : void 0,
|
|
476
347
|
onCallStart: opts.onCallStart,
|
|
477
348
|
onCallEnd: opts.onCallEnd
|
|
478
349
|
});
|
|
479
350
|
}
|
|
480
|
-
// === Cloud mode legacy ===
|
|
481
|
-
async connect(options) {
|
|
482
|
-
if (options.provider && options.providerKey && options.number) {
|
|
483
|
-
await this.registerNumber(
|
|
484
|
-
options.provider,
|
|
485
|
-
options.providerKey,
|
|
486
|
-
options.number,
|
|
487
|
-
options.providerSecret,
|
|
488
|
-
options.country ?? "US",
|
|
489
|
-
options.stt,
|
|
490
|
-
options.tts
|
|
491
|
-
);
|
|
492
|
-
}
|
|
493
|
-
await this.connection.connect({
|
|
494
|
-
onMessage: options.onMessage,
|
|
495
|
-
onCallStart: options.onCallStart,
|
|
496
|
-
onCallEnd: options.onCallEnd
|
|
497
|
-
});
|
|
498
|
-
}
|
|
499
351
|
async call(options) {
|
|
500
|
-
if (
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
const
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
from: phoneNumber,
|
|
519
|
-
to: localOpts.to,
|
|
520
|
-
stream_url: streamUrl,
|
|
521
|
-
stream_track: "both_tracks"
|
|
522
|
-
};
|
|
523
|
-
if (localOpts.ringTimeout !== void 0) {
|
|
524
|
-
telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
|
|
525
|
-
}
|
|
526
|
-
const response2 = await fetch("https://api.telnyx.com/v2/calls", {
|
|
527
|
-
method: "POST",
|
|
528
|
-
headers: {
|
|
529
|
-
"Content-Type": "application/json",
|
|
530
|
-
Authorization: `Bearer ${telnyxKey}`
|
|
531
|
-
},
|
|
532
|
-
body: JSON.stringify(telnyxPayload)
|
|
533
|
-
});
|
|
534
|
-
if (!response2.ok) {
|
|
535
|
-
throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
|
|
536
|
-
}
|
|
537
|
-
if (this.embeddedServer) {
|
|
538
|
-
try {
|
|
539
|
-
const body = await response2.clone().json();
|
|
540
|
-
const callId = body.data?.call_control_id;
|
|
541
|
-
if (callId) {
|
|
542
|
-
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
543
|
-
call_id: callId,
|
|
544
|
-
caller: phoneNumber,
|
|
545
|
-
callee: localOpts.to,
|
|
546
|
-
direction: "outbound"
|
|
547
|
-
});
|
|
548
|
-
}
|
|
549
|
-
} catch {
|
|
550
|
-
}
|
|
551
|
-
}
|
|
552
|
-
return;
|
|
553
|
-
}
|
|
554
|
-
const twilioSid = carrier.accountSid;
|
|
555
|
-
const twilioToken = carrier.authToken;
|
|
556
|
-
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
557
|
-
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
558
|
-
const params = new URLSearchParams({
|
|
559
|
-
To: localOpts.to,
|
|
560
|
-
From: phoneNumber,
|
|
561
|
-
Url: `https://${webhookUrl}/webhooks/twilio/voice`,
|
|
562
|
-
StatusCallback: statusCallbackUrl,
|
|
563
|
-
StatusCallbackMethod: "POST",
|
|
564
|
-
// Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
|
|
565
|
-
// transitions even when media never arrives.
|
|
566
|
-
StatusCallbackEvent: "initiated ringing answered completed"
|
|
567
|
-
});
|
|
568
|
-
if (localOpts.machineDetection) {
|
|
569
|
-
params.append("MachineDetection", "DetectMessageEnd");
|
|
570
|
-
params.append("AsyncAmd", "true");
|
|
571
|
-
params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
|
|
572
|
-
}
|
|
573
|
-
if (localOpts.ringTimeout !== void 0) {
|
|
574
|
-
params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
|
|
575
|
-
}
|
|
576
|
-
if (localOpts.voicemailMessage && this.embeddedServer) {
|
|
577
|
-
this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
|
|
352
|
+
if (!options.to) {
|
|
353
|
+
throw new Error("'to' phone number is required");
|
|
354
|
+
}
|
|
355
|
+
if (!options.to.startsWith("+")) {
|
|
356
|
+
throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${options.to}'`);
|
|
357
|
+
}
|
|
358
|
+
const { phoneNumber, webhookUrl, carrier } = this.localConfig;
|
|
359
|
+
const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
|
|
360
|
+
if (carrier.kind === "telnyx") {
|
|
361
|
+
const telnyxKey = carrier.apiKey;
|
|
362
|
+
const connectionId = carrier.connectionId;
|
|
363
|
+
const telnyxPayload = {
|
|
364
|
+
connection_id: connectionId,
|
|
365
|
+
from: phoneNumber,
|
|
366
|
+
to: options.to
|
|
367
|
+
};
|
|
368
|
+
if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
|
|
369
|
+
telnyxPayload.timeout_secs = Math.max(1, Math.floor(effectiveRingTimeout));
|
|
578
370
|
}
|
|
579
|
-
const
|
|
371
|
+
const response2 = await fetch("https://api.telnyx.com/v2/calls", {
|
|
580
372
|
method: "POST",
|
|
581
373
|
headers: {
|
|
582
|
-
"Content-Type": "application/
|
|
583
|
-
Authorization: `
|
|
374
|
+
"Content-Type": "application/json",
|
|
375
|
+
Authorization: `Bearer ${telnyxKey}`
|
|
584
376
|
},
|
|
585
|
-
body:
|
|
377
|
+
body: JSON.stringify(telnyxPayload)
|
|
586
378
|
});
|
|
587
|
-
if (!
|
|
588
|
-
throw new ProvisionError(`Failed to initiate call: ${await
|
|
379
|
+
if (!response2.ok) {
|
|
380
|
+
throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
|
|
589
381
|
}
|
|
590
382
|
if (this.embeddedServer) {
|
|
591
383
|
try {
|
|
592
|
-
const body = await
|
|
593
|
-
const
|
|
594
|
-
if (
|
|
384
|
+
const body = await response2.clone().json();
|
|
385
|
+
const callId = body.data?.call_control_id;
|
|
386
|
+
if (callId) {
|
|
595
387
|
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
596
|
-
call_id:
|
|
388
|
+
call_id: callId,
|
|
597
389
|
caller: phoneNumber,
|
|
598
|
-
callee:
|
|
390
|
+
callee: options.to,
|
|
599
391
|
direction: "outbound"
|
|
600
392
|
});
|
|
601
393
|
}
|
|
@@ -604,21 +396,59 @@ var Patter = class {
|
|
|
604
396
|
}
|
|
605
397
|
return;
|
|
606
398
|
}
|
|
607
|
-
const
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
399
|
+
const twilioSid = carrier.accountSid;
|
|
400
|
+
const twilioToken = carrier.authToken;
|
|
401
|
+
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
402
|
+
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
403
|
+
const streamUrl = `wss://${webhookUrl}/ws/stream/outbound`;
|
|
404
|
+
const inlineTwiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${streamUrl}"/></Connect></Response>`;
|
|
405
|
+
const params = new URLSearchParams({
|
|
406
|
+
To: options.to,
|
|
407
|
+
From: phoneNumber,
|
|
408
|
+
Twiml: inlineTwiml,
|
|
409
|
+
StatusCallback: statusCallbackUrl,
|
|
410
|
+
StatusCallbackMethod: "POST",
|
|
411
|
+
// Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
|
|
412
|
+
// transitions even when media never arrives.
|
|
413
|
+
StatusCallbackEvent: "initiated ringing answered completed"
|
|
414
|
+
});
|
|
415
|
+
if (options.machineDetection) {
|
|
416
|
+
params.append("MachineDetection", "DetectMessageEnd");
|
|
417
|
+
params.append("AsyncAmd", "true");
|
|
418
|
+
params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
|
|
419
|
+
}
|
|
420
|
+
if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
|
|
421
|
+
params.append("Timeout", String(Math.max(1, Math.floor(effectiveRingTimeout))));
|
|
422
|
+
}
|
|
423
|
+
if (options.voicemailMessage && this.embeddedServer) {
|
|
424
|
+
this.embeddedServer.voicemailMessage = options.voicemailMessage;
|
|
425
|
+
}
|
|
426
|
+
const response = await fetch(url, {
|
|
427
|
+
method: "POST",
|
|
428
|
+
headers: {
|
|
429
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
430
|
+
Authorization: `Basic ${Buffer.from(`${twilioSid}:${twilioToken}`).toString("base64")}`
|
|
431
|
+
},
|
|
432
|
+
body: params.toString()
|
|
433
|
+
});
|
|
434
|
+
if (!response.ok) {
|
|
435
|
+
throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
|
|
436
|
+
}
|
|
437
|
+
if (this.embeddedServer) {
|
|
438
|
+
try {
|
|
439
|
+
const body = await response.clone().json();
|
|
440
|
+
const callSid = body.sid;
|
|
441
|
+
if (callSid) {
|
|
442
|
+
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
443
|
+
call_id: callSid,
|
|
444
|
+
caller: phoneNumber,
|
|
445
|
+
callee: options.to,
|
|
446
|
+
direction: "outbound"
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
} catch {
|
|
615
450
|
}
|
|
616
451
|
}
|
|
617
|
-
await this.connection.requestCall(
|
|
618
|
-
cloudOpts.fromNumber ?? "",
|
|
619
|
-
cloudOpts.to,
|
|
620
|
-
cloudOpts.firstMessage ?? ""
|
|
621
|
-
);
|
|
622
452
|
}
|
|
623
453
|
async disconnect() {
|
|
624
454
|
if (this.tunnelHandle) {
|
|
@@ -629,86 +459,6 @@ var Patter = class {
|
|
|
629
459
|
await this.embeddedServer.stop();
|
|
630
460
|
this.embeddedServer = null;
|
|
631
461
|
}
|
|
632
|
-
await this.connection.disconnect();
|
|
633
|
-
}
|
|
634
|
-
// === Agent Management ===
|
|
635
|
-
async createAgent(opts) {
|
|
636
|
-
const response = await fetch(`${this.restUrl}/api/agents`, {
|
|
637
|
-
method: "POST",
|
|
638
|
-
headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
|
|
639
|
-
body: JSON.stringify({
|
|
640
|
-
name: opts.name,
|
|
641
|
-
system_prompt: opts.systemPrompt,
|
|
642
|
-
model: opts.model ?? "gpt-4o-mini-realtime-preview",
|
|
643
|
-
voice: opts.voice ?? "alloy",
|
|
644
|
-
voice_provider: opts.voiceProvider ?? "openai",
|
|
645
|
-
language: opts.language ?? "en",
|
|
646
|
-
first_message: opts.firstMessage ?? null,
|
|
647
|
-
tools: opts.tools?.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters, webhook_url: t.webhookUrl })) ?? null
|
|
648
|
-
})
|
|
649
|
-
});
|
|
650
|
-
if (response.status !== 201) throw new ProvisionError(`Failed to create agent: ${await response.text()}`);
|
|
651
|
-
const data = await response.json();
|
|
652
|
-
return { id: data.id, name: data.name, systemPrompt: data.system_prompt, model: data.model, voice: data.voice, voiceProvider: data.voice_provider, language: data.language, firstMessage: data.first_message, tools: data.tools };
|
|
653
|
-
}
|
|
654
|
-
async listAgents() {
|
|
655
|
-
const response = await fetch(`${this.restUrl}/api/agents`, { headers: { "X-API-Key": this.apiKey } });
|
|
656
|
-
if (!response.ok) throw new ProvisionError(`Failed to list agents: ${response.status}`);
|
|
657
|
-
const data = await response.json();
|
|
658
|
-
return data.map((a) => ({ id: a.id, name: a.name, systemPrompt: a.system_prompt, model: a.model, voice: a.voice, voiceProvider: a.voice_provider, language: a.language, firstMessage: a.first_message, tools: a.tools }));
|
|
659
|
-
}
|
|
660
|
-
async buyNumber(opts = {}) {
|
|
661
|
-
const response = await fetch(`${this.restUrl}/api/numbers/buy`, {
|
|
662
|
-
method: "POST",
|
|
663
|
-
headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
|
|
664
|
-
body: JSON.stringify({ country: opts.country ?? "US", provider: opts.provider ?? "twilio" })
|
|
665
|
-
});
|
|
666
|
-
if (response.status !== 201) throw new ProvisionError(`Failed to buy number: ${await response.text()}`);
|
|
667
|
-
const data = await response.json();
|
|
668
|
-
return { id: data.id, number: data.number, provider: data.provider, country: data.country, status: data.status, agentId: data.agent_id };
|
|
669
|
-
}
|
|
670
|
-
async assignAgent(numberId, agentId) {
|
|
671
|
-
const response = await fetch(`${this.restUrl}/api/phone-numbers/${numberId}/assign-agent`, {
|
|
672
|
-
method: "POST",
|
|
673
|
-
headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
|
|
674
|
-
body: JSON.stringify({ agent_id: agentId })
|
|
675
|
-
});
|
|
676
|
-
if (response.status !== 200) throw new ProvisionError(`Failed to assign agent: ${await response.text()}`);
|
|
677
|
-
}
|
|
678
|
-
async listCalls(limit = 50) {
|
|
679
|
-
if (!Number.isInteger(limit) || limit < 1 || limit > 1e3) {
|
|
680
|
-
throw new RangeError(`limit must be an integer between 1 and 1000, got ${limit}`);
|
|
681
|
-
}
|
|
682
|
-
const response = await fetch(`${this.restUrl}/api/calls?limit=${limit}`, { headers: { "X-API-Key": this.apiKey } });
|
|
683
|
-
if (!response.ok) throw new ProvisionError(`Failed to list calls: ${response.status}`);
|
|
684
|
-
const data = await response.json();
|
|
685
|
-
return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
|
|
686
|
-
}
|
|
687
|
-
// Internal
|
|
688
|
-
async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
|
|
689
|
-
const credentials = { api_key: providerKey };
|
|
690
|
-
if (providerSecret) credentials.api_secret = providerSecret;
|
|
691
|
-
const response = await fetch(`${this.restUrl}/api/phone-numbers`, {
|
|
692
|
-
method: "POST",
|
|
693
|
-
headers: {
|
|
694
|
-
"Content-Type": "application/json",
|
|
695
|
-
"X-API-Key": this.apiKey
|
|
696
|
-
},
|
|
697
|
-
body: JSON.stringify({
|
|
698
|
-
number,
|
|
699
|
-
provider,
|
|
700
|
-
provider_credentials: credentials,
|
|
701
|
-
country,
|
|
702
|
-
stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
|
|
703
|
-
tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
|
|
704
|
-
})
|
|
705
|
-
});
|
|
706
|
-
if (response.status === 409) return;
|
|
707
|
-
if (response.status !== 201) {
|
|
708
|
-
throw new ProvisionError(
|
|
709
|
-
`Failed to register number: ${await response.text()}`
|
|
710
|
-
);
|
|
711
|
-
}
|
|
712
462
|
}
|
|
713
463
|
};
|
|
714
464
|
|
|
@@ -828,6 +578,46 @@ function elevenlabs(opts) {
|
|
|
828
578
|
function openaiTts(opts) {
|
|
829
579
|
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
830
580
|
}
|
|
581
|
+
function soniox(opts) {
|
|
582
|
+
return new STTConfigImpl("soniox", opts.apiKey, opts.language ?? "en");
|
|
583
|
+
}
|
|
584
|
+
function speechmatics(_opts) {
|
|
585
|
+
throw new Error(
|
|
586
|
+
"speechmatics() is Python-only right now \u2014 the TS Speechmatics adapter has not shipped yet. Use the Python SDK (sdk-py) or pick another STT provider such as deepgram() / assemblyai() / soniox()."
|
|
587
|
+
);
|
|
588
|
+
}
|
|
589
|
+
function assemblyai(opts) {
|
|
590
|
+
return new STTConfigImpl("assemblyai", opts.apiKey, opts.language ?? "en");
|
|
591
|
+
}
|
|
592
|
+
function cartesia(opts) {
|
|
593
|
+
return new TTSConfigImpl(
|
|
594
|
+
"cartesia",
|
|
595
|
+
opts.apiKey,
|
|
596
|
+
opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
|
|
597
|
+
);
|
|
598
|
+
}
|
|
599
|
+
function rime(opts) {
|
|
600
|
+
return new TTSConfigImpl("rime", opts.apiKey, opts.voice ?? "astra");
|
|
601
|
+
}
|
|
602
|
+
function lmnt(opts) {
|
|
603
|
+
return new TTSConfigImpl("lmnt", opts.apiKey, opts.voice ?? "leah");
|
|
604
|
+
}
|
|
605
|
+
function ultravox(opts) {
|
|
606
|
+
return {
|
|
607
|
+
provider: "ultravox",
|
|
608
|
+
apiKey: opts.apiKey,
|
|
609
|
+
model: opts.model,
|
|
610
|
+
voice: opts.voice
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
function geminiLive(opts) {
|
|
614
|
+
return {
|
|
615
|
+
provider: "gemini_live",
|
|
616
|
+
apiKey: opts.apiKey,
|
|
617
|
+
model: opts.model,
|
|
618
|
+
voice: opts.voice
|
|
619
|
+
};
|
|
620
|
+
}
|
|
831
621
|
|
|
832
622
|
// src/fallback-provider.ts
|
|
833
623
|
var AllProvidersFailedError = class extends Error {
|
|
@@ -1026,13 +816,275 @@ var FallbackLLMProvider = class {
|
|
|
1026
816
|
}
|
|
1027
817
|
};
|
|
1028
818
|
|
|
819
|
+
// src/integrations/patter-tool.ts
|
|
820
|
+
import { EventEmitter } from "events";
|
|
821
|
+
var PARAMETERS_SCHEMA = {
|
|
822
|
+
type: "object",
|
|
823
|
+
properties: {
|
|
824
|
+
to: {
|
|
825
|
+
type: "string",
|
|
826
|
+
description: 'Destination phone number in E.164 format (e.g. "+15551234567"). Required.'
|
|
827
|
+
},
|
|
828
|
+
goal: {
|
|
829
|
+
type: "string",
|
|
830
|
+
description: "What the agent should accomplish on the call. Becomes the in-call agent's system prompt for this single call."
|
|
831
|
+
},
|
|
832
|
+
first_message: {
|
|
833
|
+
type: "string",
|
|
834
|
+
description: "Optional first message the agent speaks when the callee answers. Defaults to a generic greeting."
|
|
835
|
+
},
|
|
836
|
+
max_duration_sec: {
|
|
837
|
+
type: "integer",
|
|
838
|
+
description: "Hard timeout for the call in seconds. Default 180. The call is force-ended at this deadline whether or not it has resolved.",
|
|
839
|
+
minimum: 5,
|
|
840
|
+
maximum: 1800
|
|
841
|
+
}
|
|
842
|
+
},
|
|
843
|
+
required: ["to"]
|
|
844
|
+
};
|
|
845
|
+
var DEFAULT_NAME = "make_phone_call";
|
|
846
|
+
var DEFAULT_DESCRIPTION = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
|
|
847
|
+
var PatterTool = class _PatterTool {
|
|
848
|
+
name;
|
|
849
|
+
description;
|
|
850
|
+
phone;
|
|
851
|
+
agent;
|
|
852
|
+
maxDurationSec;
|
|
853
|
+
recording;
|
|
854
|
+
started = false;
|
|
855
|
+
/** Resolver for the next `call_initiated` SSE event. Only set inside the
|
|
856
|
+
* dial mutex (`dialQueue`), so two parallel `execute()` calls never share
|
|
857
|
+
* it and never lose a dispatch. */
|
|
858
|
+
pendingDial = null;
|
|
859
|
+
/** Mutex that serializes the dial → call_id capture critical section.
|
|
860
|
+
* Each `execute()` chains a continuation onto this promise so the
|
|
861
|
+
* `pendingDial` slot is owned by exactly one caller at a time. */
|
|
862
|
+
dialQueue = Promise.resolve();
|
|
863
|
+
/** Captured SSE listener so `stop()` can detach it (prevents leaks when
|
|
864
|
+
* the underlying Patter instance outlives this tool). */
|
|
865
|
+
sseListener = null;
|
|
866
|
+
/** Captured Patter metrics store, for cleanup in `stop()`. */
|
|
867
|
+
metricsStoreRef = null;
|
|
868
|
+
/** call_id → pending promise machinery. */
|
|
869
|
+
pending = /* @__PURE__ */ new Map();
|
|
870
|
+
bus = new EventEmitter();
|
|
871
|
+
/** How long to wait for the `call_initiated` SSE before failing the dial. */
|
|
872
|
+
static DIAL_CAPTURE_TIMEOUT_MS = 1e4;
|
|
873
|
+
constructor(opts) {
|
|
874
|
+
if (!opts.phone) {
|
|
875
|
+
throw new Error("PatterTool: `phone` (a Patter instance) is required.");
|
|
876
|
+
}
|
|
877
|
+
this.phone = opts.phone;
|
|
878
|
+
this.agent = opts.agent;
|
|
879
|
+
this.name = opts.name ?? DEFAULT_NAME;
|
|
880
|
+
this.description = opts.description ?? DEFAULT_DESCRIPTION;
|
|
881
|
+
this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
|
|
882
|
+
this.recording = opts.recording ?? false;
|
|
883
|
+
}
|
|
884
|
+
// --- Schema exporters ---------------------------------------------------
|
|
885
|
+
/** OpenAI Chat Completions / Assistants tool spec. */
|
|
886
|
+
openaiSchema() {
|
|
887
|
+
return {
|
|
888
|
+
type: "function",
|
|
889
|
+
function: {
|
|
890
|
+
name: this.name,
|
|
891
|
+
description: this.description,
|
|
892
|
+
parameters: PARAMETERS_SCHEMA
|
|
893
|
+
}
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
/** Anthropic Messages API tool spec. */
|
|
897
|
+
anthropicSchema() {
|
|
898
|
+
return {
|
|
899
|
+
name: this.name,
|
|
900
|
+
description: this.description,
|
|
901
|
+
input_schema: PARAMETERS_SCHEMA
|
|
902
|
+
};
|
|
903
|
+
}
|
|
904
|
+
/**
|
|
905
|
+
* Hermes Agent (Nous Research) registry schema. Same JSON-Schema shape as
|
|
906
|
+
* Anthropic's; Hermes consumes it via `registry.register({ schema: ... })`.
|
|
907
|
+
*/
|
|
908
|
+
hermesSchema() {
|
|
909
|
+
return {
|
|
910
|
+
name: this.name,
|
|
911
|
+
description: this.description,
|
|
912
|
+
parameters: PARAMETERS_SCHEMA
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
// --- Lifecycle ----------------------------------------------------------
|
|
916
|
+
/** Start the underlying Patter server. Idempotent. */
|
|
917
|
+
async start() {
|
|
918
|
+
if (this.started) return;
|
|
919
|
+
if (!this.agent) {
|
|
920
|
+
throw new Error(
|
|
921
|
+
"PatterTool.start: `agent` config is required. Pass `{ stt, llm, tts }` or an `engine` (e.g. OpenAIRealtime) when constructing PatterTool."
|
|
922
|
+
);
|
|
923
|
+
}
|
|
924
|
+
const builtAgent = this.phone.agent(this.agent);
|
|
925
|
+
await this.phone.serve({
|
|
926
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
927
|
+
agent: builtAgent,
|
|
928
|
+
recording: this.recording,
|
|
929
|
+
onCallEnd: this.onCallEndHandler.bind(this)
|
|
930
|
+
});
|
|
931
|
+
const store = this.phone.metricsStore;
|
|
932
|
+
if (!store) {
|
|
933
|
+
throw new Error(
|
|
934
|
+
"PatterTool.start: phone.metricsStore is null after serve() \u2014 is the dashboard disabled?"
|
|
935
|
+
);
|
|
936
|
+
}
|
|
937
|
+
const listener = (event) => {
|
|
938
|
+
if (event.type === "call_initiated" && this.pendingDial) {
|
|
939
|
+
const callId = event.data.call_id || "";
|
|
940
|
+
if (callId) {
|
|
941
|
+
const dispatch = this.pendingDial;
|
|
942
|
+
this.pendingDial = null;
|
|
943
|
+
dispatch(callId);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
};
|
|
947
|
+
store.on("sse", listener);
|
|
948
|
+
this.sseListener = listener;
|
|
949
|
+
this.metricsStoreRef = store;
|
|
950
|
+
this.started = true;
|
|
951
|
+
}
|
|
952
|
+
/** Stop the underlying Patter server (and reject any pending calls). */
|
|
953
|
+
async stop() {
|
|
954
|
+
if (!this.started) return;
|
|
955
|
+
if (this.metricsStoreRef && this.sseListener) {
|
|
956
|
+
this.metricsStoreRef.off("sse", this.sseListener);
|
|
957
|
+
}
|
|
958
|
+
this.sseListener = null;
|
|
959
|
+
this.metricsStoreRef = null;
|
|
960
|
+
this.pendingDial = null;
|
|
961
|
+
for (const [, p] of this.pending) {
|
|
962
|
+
clearTimeout(p.timer);
|
|
963
|
+
p.reject(new Error("PatterTool: shutdown while call pending"));
|
|
964
|
+
}
|
|
965
|
+
this.pending.clear();
|
|
966
|
+
const stoppable = this.phone;
|
|
967
|
+
if (typeof stoppable.stop === "function") {
|
|
968
|
+
await stoppable.stop();
|
|
969
|
+
}
|
|
970
|
+
this.started = false;
|
|
971
|
+
}
|
|
972
|
+
// --- Execution ----------------------------------------------------------
|
|
973
|
+
async execute(args) {
|
|
974
|
+
if (!this.started) await this.start();
|
|
975
|
+
if (!args || typeof args.to !== "string" || !args.to.startsWith("+")) {
|
|
976
|
+
throw new Error('PatterTool.execute: `to` must be an E.164 phone number (e.g. "+15551234567").');
|
|
977
|
+
}
|
|
978
|
+
const timeoutSec = Math.max(
|
|
979
|
+
5,
|
|
980
|
+
Math.min(1800, args.max_duration_sec ?? this.maxDurationSec)
|
|
981
|
+
);
|
|
982
|
+
const baseAgent = this.agent ?? {};
|
|
983
|
+
const overrideAgent = this.phone.agent({
|
|
984
|
+
...baseAgent,
|
|
985
|
+
...args.goal !== void 0 ? { systemPrompt: args.goal } : {},
|
|
986
|
+
...args.first_message !== void 0 ? { firstMessage: args.first_message } : {}
|
|
987
|
+
});
|
|
988
|
+
const callId = await this.acquireCallId(args.to, overrideAgent);
|
|
989
|
+
return new Promise((resolve, reject) => {
|
|
990
|
+
const timer = setTimeout(() => {
|
|
991
|
+
this.pending.delete(callId);
|
|
992
|
+
reject(new Error(`PatterTool.execute: call ${callId} exceeded ${timeoutSec}s timeout`));
|
|
993
|
+
}, timeoutSec * 1e3);
|
|
994
|
+
this.pending.set(callId, {
|
|
995
|
+
resolve,
|
|
996
|
+
reject,
|
|
997
|
+
timer,
|
|
998
|
+
startedAt: Date.now() / 1e3
|
|
999
|
+
});
|
|
1000
|
+
});
|
|
1001
|
+
}
|
|
1002
|
+
/** Issue the outbound dial under the mutex and return its assigned call_id. */
|
|
1003
|
+
async acquireCallId(to, agent) {
|
|
1004
|
+
let release;
|
|
1005
|
+
const slot = new Promise((r) => {
|
|
1006
|
+
release = r;
|
|
1007
|
+
});
|
|
1008
|
+
const previous = this.dialQueue;
|
|
1009
|
+
this.dialQueue = previous.then(() => slot);
|
|
1010
|
+
await previous;
|
|
1011
|
+
let captureTimer = null;
|
|
1012
|
+
try {
|
|
1013
|
+
const callIdPromise = new Promise((resolve, reject) => {
|
|
1014
|
+
this.pendingDial = resolve;
|
|
1015
|
+
captureTimer = setTimeout(() => {
|
|
1016
|
+
this.pendingDial = null;
|
|
1017
|
+
reject(
|
|
1018
|
+
new Error(
|
|
1019
|
+
`PatterTool.execute: did not observe call_initiated within ${_PatterTool.DIAL_CAPTURE_TIMEOUT_MS}ms`
|
|
1020
|
+
)
|
|
1021
|
+
);
|
|
1022
|
+
}, _PatterTool.DIAL_CAPTURE_TIMEOUT_MS);
|
|
1023
|
+
});
|
|
1024
|
+
await this.phone.call({
|
|
1025
|
+
to,
|
|
1026
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1027
|
+
agent
|
|
1028
|
+
});
|
|
1029
|
+
const callId = await callIdPromise;
|
|
1030
|
+
if (captureTimer) clearTimeout(captureTimer);
|
|
1031
|
+
return callId;
|
|
1032
|
+
} finally {
|
|
1033
|
+
if (captureTimer) clearTimeout(captureTimer);
|
|
1034
|
+
this.pendingDial = null;
|
|
1035
|
+
release();
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
/**
|
|
1039
|
+
* Hermes-style handler: `(args, kwargs) => Promise<string>` returning a JSON
|
|
1040
|
+
* string with either the result envelope or an `{"error": "..."}` payload.
|
|
1041
|
+
* Mirrors the Python `PatterTool.hermes_handler` so cross-SDK adapters share
|
|
1042
|
+
* the same wire contract.
|
|
1043
|
+
*/
|
|
1044
|
+
hermesHandler() {
|
|
1045
|
+
return async (args) => {
|
|
1046
|
+
try {
|
|
1047
|
+
const result = await this.execute(args);
|
|
1048
|
+
return JSON.stringify(result);
|
|
1049
|
+
} catch (err) {
|
|
1050
|
+
return JSON.stringify({ error: err instanceof Error ? err.message : String(err) });
|
|
1051
|
+
}
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
// --- Internal: onCallEnd dispatcher -------------------------------------
|
|
1055
|
+
async onCallEndHandler(data) {
|
|
1056
|
+
const callId = data.call_id || "";
|
|
1057
|
+
if (!callId) return;
|
|
1058
|
+
const pending = this.pending.get(callId);
|
|
1059
|
+
if (!pending) {
|
|
1060
|
+
this.bus.emit("orphan_end", { call_id: callId, data });
|
|
1061
|
+
return;
|
|
1062
|
+
}
|
|
1063
|
+
clearTimeout(pending.timer);
|
|
1064
|
+
this.pending.delete(callId);
|
|
1065
|
+
const metrics = data.metrics && typeof data.metrics === "object" ? data.metrics : null;
|
|
1066
|
+
const cost = metrics && typeof metrics.cost === "object" && metrics.cost && typeof metrics.cost.total === "number" ? metrics.cost.total : void 0;
|
|
1067
|
+
const duration = typeof metrics?.duration_seconds === "number" ? metrics?.duration_seconds : Math.max(0, Date.now() / 1e3 - pending.startedAt);
|
|
1068
|
+
const transcript = Array.isArray(data.transcript) ? data.transcript : [];
|
|
1069
|
+
const status = data.status || "completed";
|
|
1070
|
+
pending.resolve({
|
|
1071
|
+
call_id: callId,
|
|
1072
|
+
status,
|
|
1073
|
+
duration_seconds: duration,
|
|
1074
|
+
cost_usd: cost,
|
|
1075
|
+
transcript,
|
|
1076
|
+
metrics
|
|
1077
|
+
});
|
|
1078
|
+
}
|
|
1079
|
+
};
|
|
1080
|
+
|
|
1029
1081
|
// src/providers/gemini-live.ts
|
|
1030
1082
|
var GEMINI_DEFAULT_INPUT_SR = 16e3;
|
|
1031
1083
|
var GEMINI_DEFAULT_OUTPUT_SR = 24e3;
|
|
1032
1084
|
var GeminiLiveAdapter = class {
|
|
1033
1085
|
constructor(apiKey, options = {}) {
|
|
1034
1086
|
this.apiKey = apiKey;
|
|
1035
|
-
this.model = options.model ?? "gemini-2.
|
|
1087
|
+
this.model = options.model ?? "gemini-2.5-flash-native-audio-preview-09-2025";
|
|
1036
1088
|
this.voice = options.voice ?? "Puck";
|
|
1037
1089
|
this.instructions = options.instructions ?? "";
|
|
1038
1090
|
this.language = options.language ?? "en-US";
|
|
@@ -1055,18 +1107,27 @@ var GeminiLiveAdapter = class {
|
|
|
1055
1107
|
receiveLoop = null;
|
|
1056
1108
|
handlers = [];
|
|
1057
1109
|
running = false;
|
|
1110
|
+
/**
|
|
1111
|
+
* Tracks call_id -> function name so tool responses can be sent back with
|
|
1112
|
+
* the correct `name` field (Gemini expects the original function name,
|
|
1113
|
+
* not the call_id).
|
|
1114
|
+
*/
|
|
1115
|
+
pendingToolCalls = /* @__PURE__ */ new Map();
|
|
1058
1116
|
async connect() {
|
|
1059
1117
|
let genaiModule;
|
|
1060
1118
|
try {
|
|
1061
1119
|
const modName = "@google/genai";
|
|
1062
1120
|
genaiModule = await import(modName);
|
|
1063
|
-
} catch
|
|
1121
|
+
} catch {
|
|
1064
1122
|
throw new Error(
|
|
1065
|
-
|
|
1123
|
+
'\nGemini Live requires the "@google/genai" package, which is not installed.\n\n Install: npm install @google/genai\n\nThis is an optional peer dependency of getpatter \u2014 it is only needed when\nyou use GeminiLive as an agent engine. Other LLM/engine providers do not\nrequire it.\n'
|
|
1066
1124
|
);
|
|
1067
1125
|
}
|
|
1068
1126
|
const { GoogleGenAI } = genaiModule;
|
|
1069
|
-
this.client = new GoogleGenAI({
|
|
1127
|
+
this.client = new GoogleGenAI({
|
|
1128
|
+
apiKey: this.apiKey,
|
|
1129
|
+
httpOptions: { apiVersion: "v1alpha" }
|
|
1130
|
+
});
|
|
1070
1131
|
const config = {
|
|
1071
1132
|
responseModalities: ["AUDIO"],
|
|
1072
1133
|
speechConfig: {
|
|
@@ -1123,9 +1184,11 @@ var GeminiLiveAdapter = class {
|
|
|
1123
1184
|
async sendFunctionResult(callId, result) {
|
|
1124
1185
|
if (!this.session) return;
|
|
1125
1186
|
const sess = this.session;
|
|
1187
|
+
const name = this.pendingToolCalls.get(callId) ?? callId;
|
|
1188
|
+
this.pendingToolCalls.delete(callId);
|
|
1126
1189
|
await sess.sendToolResponse?.({
|
|
1127
1190
|
functionResponses: [
|
|
1128
|
-
{ id: callId, name
|
|
1191
|
+
{ id: callId, name, response: { result } }
|
|
1129
1192
|
]
|
|
1130
1193
|
});
|
|
1131
1194
|
}
|
|
@@ -1169,9 +1232,14 @@ var GeminiLiveAdapter = class {
|
|
|
1169
1232
|
if (r.toolCall) {
|
|
1170
1233
|
for (const fn of r.toolCall.functionCalls ?? []) {
|
|
1171
1234
|
const args = fn.args ?? {};
|
|
1235
|
+
const callId = fn.id ?? "";
|
|
1236
|
+
const fnName = fn.name ?? "";
|
|
1237
|
+
if (callId && fnName) {
|
|
1238
|
+
this.pendingToolCalls.set(callId, fnName);
|
|
1239
|
+
}
|
|
1172
1240
|
await this.emit("function_call", {
|
|
1173
|
-
call_id:
|
|
1174
|
-
name:
|
|
1241
|
+
call_id: callId,
|
|
1242
|
+
name: fnName,
|
|
1175
1243
|
arguments: typeof args === "string" ? args : JSON.stringify(args)
|
|
1176
1244
|
});
|
|
1177
1245
|
}
|
|
@@ -1198,11 +1266,12 @@ var GeminiLiveAdapter = class {
|
|
|
1198
1266
|
await this.receiveLoop.catch(() => void 0);
|
|
1199
1267
|
this.receiveLoop = null;
|
|
1200
1268
|
}
|
|
1269
|
+
this.pendingToolCalls.clear();
|
|
1201
1270
|
}
|
|
1202
1271
|
};
|
|
1203
1272
|
|
|
1204
1273
|
// src/providers/ultravox-realtime.ts
|
|
1205
|
-
import
|
|
1274
|
+
import WebSocket from "ws";
|
|
1206
1275
|
var ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
|
|
1207
1276
|
var ULTRAVOX_DEFAULT_SR = 16e3;
|
|
1208
1277
|
var UltravoxRealtimeAdapter = class {
|
|
@@ -1239,7 +1308,6 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1239
1308
|
outputSampleRate: this.sampleRate
|
|
1240
1309
|
}
|
|
1241
1310
|
},
|
|
1242
|
-
firstSpeaker: this.firstMessage ? "FIRST_SPEAKER_AGENT" : "FIRST_SPEAKER_USER",
|
|
1243
1311
|
recordingEnabled: false
|
|
1244
1312
|
};
|
|
1245
1313
|
if (this.voice) body.voice = this.voice;
|
|
@@ -1249,6 +1317,8 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1249
1317
|
body.initialMessages = [
|
|
1250
1318
|
{ role: "MESSAGE_ROLE_AGENT", text: this.firstMessage }
|
|
1251
1319
|
];
|
|
1320
|
+
} else {
|
|
1321
|
+
body.firstSpeaker = "FIRST_SPEAKER_USER";
|
|
1252
1322
|
}
|
|
1253
1323
|
if (this.tools?.length) {
|
|
1254
1324
|
body.selectedTools = this.tools.map((t) => ({
|
|
@@ -1273,7 +1343,7 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1273
1343
|
}
|
|
1274
1344
|
const call = await resp.json();
|
|
1275
1345
|
if (!call.joinUrl) throw new Error("Ultravox response missing joinUrl");
|
|
1276
|
-
this.ws = new
|
|
1346
|
+
this.ws = new WebSocket(call.joinUrl);
|
|
1277
1347
|
await new Promise((resolve, reject) => {
|
|
1278
1348
|
const ws = this.ws;
|
|
1279
1349
|
const onOpen = () => {
|
|
@@ -1298,14 +1368,16 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1298
1368
|
});
|
|
1299
1369
|
}
|
|
1300
1370
|
sendAudio(pcm) {
|
|
1301
|
-
if (!this.ws || this.ws.readyState !==
|
|
1371
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1302
1372
|
this.ws.send(pcm, { binary: true });
|
|
1303
1373
|
}
|
|
1304
1374
|
async sendText(text) {
|
|
1305
|
-
this.ws
|
|
1375
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1376
|
+
this.ws.send(JSON.stringify({ type: "input_text_message", text }));
|
|
1306
1377
|
}
|
|
1307
1378
|
async sendFunctionResult(callId, result) {
|
|
1308
|
-
this.ws
|
|
1379
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1380
|
+
this.ws.send(
|
|
1309
1381
|
JSON.stringify({
|
|
1310
1382
|
type: "client_tool_result",
|
|
1311
1383
|
invocationId: callId,
|
|
@@ -1315,7 +1387,8 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1315
1387
|
);
|
|
1316
1388
|
}
|
|
1317
1389
|
cancelResponse() {
|
|
1318
|
-
this.ws
|
|
1390
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1391
|
+
this.ws.send(JSON.stringify({ type: "playback_clear_buffer" }));
|
|
1319
1392
|
}
|
|
1320
1393
|
onEvent(handler) {
|
|
1321
1394
|
this.handlers.push(handler);
|
|
@@ -1396,7 +1469,7 @@ async function loadCron() {
|
|
|
1396
1469
|
try {
|
|
1397
1470
|
const imported = await import(
|
|
1398
1471
|
/* @vite-ignore */
|
|
1399
|
-
"./node-cron-
|
|
1472
|
+
"./node-cron-6PRPSBG5.mjs"
|
|
1400
1473
|
);
|
|
1401
1474
|
cronModule = imported && imported.default ? imported.default : imported;
|
|
1402
1475
|
return cronModule;
|
|
@@ -1504,6 +1577,7 @@ function scheduleInterval(intervalOrOpts, callback) {
|
|
|
1504
1577
|
|
|
1505
1578
|
// src/stt/deepgram.ts
|
|
1506
1579
|
var STT = class extends DeepgramSTT {
|
|
1580
|
+
static providerKey = "deepgram";
|
|
1507
1581
|
constructor(opts = {}) {
|
|
1508
1582
|
const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
|
|
1509
1583
|
if (!key) {
|
|
@@ -1531,6 +1605,7 @@ var STT = class extends DeepgramSTT {
|
|
|
1531
1605
|
// src/providers/whisper-stt.ts
|
|
1532
1606
|
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
1533
1607
|
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
1608
|
+
var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
|
|
1534
1609
|
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
1535
1610
|
const dataSize = pcm.length;
|
|
1536
1611
|
const header = Buffer.alloc(44);
|
|
@@ -1554,33 +1629,63 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1554
1629
|
model;
|
|
1555
1630
|
language;
|
|
1556
1631
|
bufferSize;
|
|
1557
|
-
|
|
1558
|
-
|
|
1632
|
+
responseFormat;
|
|
1633
|
+
// Accumulate chunks in an array and concat once on flush — avoids the
|
|
1634
|
+
// per-``sendAudio`` O(n) ``Buffer.concat([buffer, chunk])`` that quickly
|
|
1635
|
+
// dominates CPU when the phone leg delivers 20 ms frames.
|
|
1636
|
+
chunks = [];
|
|
1637
|
+
bufferedBytes = 0;
|
|
1638
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
1559
1639
|
running = false;
|
|
1560
1640
|
pendingTranscriptions = [];
|
|
1561
|
-
|
|
1641
|
+
/**
|
|
1642
|
+
* @param apiKey OpenAI API key.
|
|
1643
|
+
* @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
|
|
1644
|
+
* @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
|
|
1645
|
+
* @param bufferSize Bytes of PCM16 to buffer before each transcription request.
|
|
1646
|
+
* @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
|
|
1647
|
+
*
|
|
1648
|
+
* Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
|
|
1649
|
+
* for cross-language parity. Pre-0.5.3 the TS positional order was
|
|
1650
|
+
* ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
|
|
1651
|
+
* the old order will need to swap ``language`` and ``model``.
|
|
1652
|
+
*/
|
|
1653
|
+
constructor(apiKey, language, model = "whisper-1", bufferSize = DEFAULT_BUFFER_SIZE, responseFormat = "json") {
|
|
1654
|
+
if (!ALLOWED_MODELS.has(model)) {
|
|
1655
|
+
throw new Error(
|
|
1656
|
+
`WhisperSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS].join(", ")}.`
|
|
1657
|
+
);
|
|
1658
|
+
}
|
|
1562
1659
|
this.apiKey = apiKey;
|
|
1563
1660
|
this.model = model;
|
|
1564
1661
|
this.language = language;
|
|
1565
1662
|
this.bufferSize = bufferSize;
|
|
1663
|
+
this.responseFormat = responseFormat;
|
|
1566
1664
|
}
|
|
1567
1665
|
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
1568
1666
|
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
1569
|
-
return new _WhisperSTT(apiKey,
|
|
1667
|
+
return new _WhisperSTT(apiKey, language, model);
|
|
1570
1668
|
}
|
|
1571
1669
|
async connect() {
|
|
1572
1670
|
this.running = true;
|
|
1573
|
-
this.
|
|
1671
|
+
this.chunks = [];
|
|
1672
|
+
this.bufferedBytes = 0;
|
|
1574
1673
|
}
|
|
1575
1674
|
sendAudio(audio) {
|
|
1576
1675
|
if (!this.running) return;
|
|
1577
|
-
this.
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1676
|
+
this.chunks.push(audio);
|
|
1677
|
+
this.bufferedBytes += audio.length;
|
|
1678
|
+
if (this.bufferedBytes >= this.bufferSize) {
|
|
1679
|
+
const pcm = this.flushChunks();
|
|
1581
1680
|
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1582
1681
|
}
|
|
1583
1682
|
}
|
|
1683
|
+
flushChunks() {
|
|
1684
|
+
const pcm = this.chunks.length === 1 ? this.chunks[0] : Buffer.concat(this.chunks, this.bufferedBytes);
|
|
1685
|
+
this.chunks = [];
|
|
1686
|
+
this.bufferedBytes = 0;
|
|
1687
|
+
return pcm;
|
|
1688
|
+
}
|
|
1584
1689
|
trackTranscription(promise) {
|
|
1585
1690
|
const wrapped = promise.finally(() => {
|
|
1586
1691
|
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
@@ -1588,25 +1693,25 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1588
1693
|
});
|
|
1589
1694
|
this.pendingTranscriptions.push(wrapped);
|
|
1590
1695
|
}
|
|
1696
|
+
/**
|
|
1697
|
+
* Register a transcript listener. Unlike the previous implementation
|
|
1698
|
+
* which capped at 10 and silently replaced the last one, we now keep all
|
|
1699
|
+
* registered callbacks in a Set; use {@link offTranscript} to remove one.
|
|
1700
|
+
*/
|
|
1591
1701
|
onTranscript(callback) {
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
}
|
|
1597
|
-
this.callbacks.push(callback);
|
|
1702
|
+
this.callbacks.add(callback);
|
|
1703
|
+
}
|
|
1704
|
+
offTranscript(callback) {
|
|
1705
|
+
this.callbacks.delete(callback);
|
|
1598
1706
|
}
|
|
1599
1707
|
async close() {
|
|
1600
1708
|
this.running = false;
|
|
1601
|
-
if (this.
|
|
1602
|
-
const pcm = this.
|
|
1603
|
-
this.buffer = Buffer.alloc(0);
|
|
1709
|
+
if (this.bufferedBytes > 0) {
|
|
1710
|
+
const pcm = this.flushChunks();
|
|
1604
1711
|
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1605
|
-
} else {
|
|
1606
|
-
this.buffer = Buffer.alloc(0);
|
|
1607
1712
|
}
|
|
1608
1713
|
await Promise.allSettled(this.pendingTranscriptions);
|
|
1609
|
-
this.callbacks
|
|
1714
|
+
this.callbacks.clear();
|
|
1610
1715
|
}
|
|
1611
1716
|
// ------------------------------------------------------------------
|
|
1612
1717
|
// Private
|
|
@@ -1616,6 +1721,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1616
1721
|
const formData = new FormData();
|
|
1617
1722
|
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
1618
1723
|
formData.append("model", this.model);
|
|
1724
|
+
formData.append("response_format", this.responseFormat);
|
|
1619
1725
|
if (this.language) {
|
|
1620
1726
|
formData.append("language", this.language);
|
|
1621
1727
|
}
|
|
@@ -1637,7 +1743,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1637
1743
|
const transcript = {
|
|
1638
1744
|
text,
|
|
1639
1745
|
isFinal: true,
|
|
1640
|
-
confidence:
|
|
1746
|
+
confidence: extractConfidence(json)
|
|
1641
1747
|
};
|
|
1642
1748
|
for (const cb of this.callbacks) {
|
|
1643
1749
|
cb(transcript);
|
|
@@ -1647,9 +1753,23 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1647
1753
|
}
|
|
1648
1754
|
}
|
|
1649
1755
|
};
|
|
1756
|
+
function extractConfidence(payload) {
|
|
1757
|
+
const segments = payload.segments;
|
|
1758
|
+
if (!segments || segments.length === 0) return 1;
|
|
1759
|
+
const scores = [];
|
|
1760
|
+
for (const seg of segments) {
|
|
1761
|
+
const logp = seg.avg_logprob;
|
|
1762
|
+
if (typeof logp === "number") {
|
|
1763
|
+
scores.push(Math.max(0, Math.min(1, Math.exp(logp))));
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
if (scores.length === 0) return 1;
|
|
1767
|
+
return scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
1768
|
+
}
|
|
1650
1769
|
|
|
1651
1770
|
// src/stt/whisper.ts
|
|
1652
1771
|
var STT2 = class extends WhisperSTT {
|
|
1772
|
+
static providerKey = "whisper";
|
|
1653
1773
|
constructor(opts = {}) {
|
|
1654
1774
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1655
1775
|
if (!key) {
|
|
@@ -1657,18 +1777,53 @@ var STT2 = class extends WhisperSTT {
|
|
|
1657
1777
|
"Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
1658
1778
|
);
|
|
1659
1779
|
}
|
|
1660
|
-
super(key, opts.model ?? "whisper-1", opts.
|
|
1780
|
+
super(key, opts.language, opts.model ?? "whisper-1", opts.bufferSize, opts.responseFormat ?? "json");
|
|
1781
|
+
}
|
|
1782
|
+
};
|
|
1783
|
+
|
|
1784
|
+
// src/providers/openai-transcribe-stt.ts
|
|
1785
|
+
var ALLOWED_MODELS2 = /* @__PURE__ */ new Set(["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
|
|
1786
|
+
var DEFAULT_BUFFER_SIZE2 = 16e3 * 2;
|
|
1787
|
+
var OpenAITranscribeSTT = class extends WhisperSTT {
|
|
1788
|
+
/**
|
|
1789
|
+
* @param apiKey OpenAI API key.
|
|
1790
|
+
* @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
|
|
1791
|
+
* @param model One of ``gpt-4o-transcribe`` (default), ``gpt-4o-mini-transcribe``.
|
|
1792
|
+
* ``"whisper-1"`` is intentionally rejected here — use ``WhisperSTT`` for that.
|
|
1793
|
+
* @param bufferSize Bytes of PCM16 to buffer before each transcription request.
|
|
1794
|
+
* @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
|
|
1795
|
+
*/
|
|
1796
|
+
constructor(apiKey, language, model = "gpt-4o-transcribe", bufferSize = DEFAULT_BUFFER_SIZE2, responseFormat = "json") {
|
|
1797
|
+
if (!ALLOWED_MODELS2.has(model)) {
|
|
1798
|
+
throw new Error(
|
|
1799
|
+
`OpenAITranscribeSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS2].join(", ")}. For "whisper-1", use WhisperSTT instead.`
|
|
1800
|
+
);
|
|
1801
|
+
}
|
|
1802
|
+
super(apiKey, language, model, bufferSize, responseFormat);
|
|
1803
|
+
}
|
|
1804
|
+
};
|
|
1805
|
+
|
|
1806
|
+
// src/stt/openai-transcribe.ts
|
|
1807
|
+
var STT3 = class extends OpenAITranscribeSTT {
|
|
1808
|
+
static providerKey = "openai_transcribe";
|
|
1809
|
+
constructor(opts = {}) {
|
|
1810
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1811
|
+
if (!key) {
|
|
1812
|
+
throw new Error(
|
|
1813
|
+
"OpenAI Transcribe STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
1814
|
+
);
|
|
1815
|
+
}
|
|
1816
|
+
super(key, opts.language, opts.model ?? "gpt-4o-transcribe", opts.bufferSize, opts.responseFormat ?? "json");
|
|
1661
1817
|
}
|
|
1662
1818
|
};
|
|
1663
1819
|
|
|
1664
1820
|
// src/providers/cartesia-stt.ts
|
|
1665
|
-
import
|
|
1821
|
+
import WebSocket2 from "ws";
|
|
1666
1822
|
var DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
1667
1823
|
var API_VERSION = "2025-04-16";
|
|
1668
1824
|
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
1669
1825
|
var KEEPALIVE_INTERVAL_MS = 3e4;
|
|
1670
1826
|
var CONNECT_TIMEOUT_MS = 1e4;
|
|
1671
|
-
var MAX_CALLBACKS = 10;
|
|
1672
1827
|
var CartesiaSTT = class {
|
|
1673
1828
|
constructor(apiKey, options = {}) {
|
|
1674
1829
|
this.apiKey = apiKey;
|
|
@@ -1678,10 +1833,13 @@ var CartesiaSTT = class {
|
|
|
1678
1833
|
}
|
|
1679
1834
|
}
|
|
1680
1835
|
ws = null;
|
|
1681
|
-
callbacks =
|
|
1836
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
1682
1837
|
keepaliveTimer = null;
|
|
1683
|
-
/**
|
|
1684
|
-
|
|
1838
|
+
/**
|
|
1839
|
+
* Cartesia request id — set from the server transcript events.
|
|
1840
|
+
* `null` until the first transcript event arrives (matches Python's `None`).
|
|
1841
|
+
*/
|
|
1842
|
+
requestId = null;
|
|
1685
1843
|
buildWsUrl() {
|
|
1686
1844
|
const opts = this.options;
|
|
1687
1845
|
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
|
|
@@ -1708,7 +1866,7 @@ var CartesiaSTT = class {
|
|
|
1708
1866
|
}
|
|
1709
1867
|
async connect() {
|
|
1710
1868
|
const url = this.buildWsUrl();
|
|
1711
|
-
this.ws = new
|
|
1869
|
+
this.ws = new WebSocket2(url, {
|
|
1712
1870
|
headers: { "User-Agent": USER_AGENT }
|
|
1713
1871
|
});
|
|
1714
1872
|
await new Promise((resolve, reject) => {
|
|
@@ -1735,7 +1893,7 @@ var CartesiaSTT = class {
|
|
|
1735
1893
|
this.handleEvent(event);
|
|
1736
1894
|
});
|
|
1737
1895
|
this.keepaliveTimer = setInterval(() => {
|
|
1738
|
-
if (this.ws && this.ws.readyState ===
|
|
1896
|
+
if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
|
|
1739
1897
|
try {
|
|
1740
1898
|
this.ws.ping();
|
|
1741
1899
|
} catch {
|
|
@@ -1768,19 +1926,24 @@ var CartesiaSTT = class {
|
|
|
1768
1926
|
}
|
|
1769
1927
|
}
|
|
1770
1928
|
sendAudio(audio) {
|
|
1771
|
-
if (!this.ws || this.ws.readyState !==
|
|
1929
|
+
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
|
|
1772
1930
|
this.ws.send(audio);
|
|
1773
1931
|
}
|
|
1774
1932
|
onTranscript(callback) {
|
|
1775
|
-
|
|
1776
|
-
getLogger().warn(
|
|
1777
|
-
"CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1778
|
-
);
|
|
1779
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1780
|
-
return;
|
|
1781
|
-
}
|
|
1782
|
-
this.callbacks.push(callback);
|
|
1933
|
+
this.callbacks.add(callback);
|
|
1783
1934
|
}
|
|
1935
|
+
/** Remove a previously registered transcript callback. */
|
|
1936
|
+
offTranscript(callback) {
|
|
1937
|
+
this.callbacks.delete(callback);
|
|
1938
|
+
}
|
|
1939
|
+
/**
|
|
1940
|
+
* Synchronous best-effort close. Sends `finalize` and closes the socket
|
|
1941
|
+
* without waiting for the server to flush any remaining transcripts.
|
|
1942
|
+
*
|
|
1943
|
+
* Limitation: any transcript events produced between the `finalize` send
|
|
1944
|
+
* and the socket close may be dropped. Callers that need to guarantee all
|
|
1945
|
+
* transcripts are delivered should await :meth:`closeAsync` instead.
|
|
1946
|
+
*/
|
|
1784
1947
|
close() {
|
|
1785
1948
|
if (this.keepaliveTimer) {
|
|
1786
1949
|
clearInterval(this.keepaliveTimer);
|
|
@@ -1795,10 +1958,53 @@ var CartesiaSTT = class {
|
|
|
1795
1958
|
this.ws = null;
|
|
1796
1959
|
}
|
|
1797
1960
|
}
|
|
1961
|
+
/**
|
|
1962
|
+
* Graceful close that awaits the `finalize` send and the socket closing
|
|
1963
|
+
* handshake, matching the Python adapter's behavior. Use this when you
|
|
1964
|
+
* need any in-flight transcripts to be flushed before teardown.
|
|
1965
|
+
*/
|
|
1966
|
+
async closeAsync() {
|
|
1967
|
+
if (this.keepaliveTimer) {
|
|
1968
|
+
clearInterval(this.keepaliveTimer);
|
|
1969
|
+
this.keepaliveTimer = null;
|
|
1970
|
+
}
|
|
1971
|
+
const ws = this.ws;
|
|
1972
|
+
this.ws = null;
|
|
1973
|
+
if (!ws) return;
|
|
1974
|
+
if (ws.readyState === WebSocket2.OPEN) {
|
|
1975
|
+
try {
|
|
1976
|
+
await new Promise((resolve) => {
|
|
1977
|
+
ws.send("finalize", (err) => {
|
|
1978
|
+
if (err) getLogger().warn(`CartesiaSTT finalize send failed: ${String(err)}`);
|
|
1979
|
+
resolve();
|
|
1980
|
+
});
|
|
1981
|
+
});
|
|
1982
|
+
} catch (err) {
|
|
1983
|
+
getLogger().warn(`CartesiaSTT finalize error: ${String(err)}`);
|
|
1984
|
+
}
|
|
1985
|
+
}
|
|
1986
|
+
if (ws.readyState === WebSocket2.OPEN || ws.readyState === WebSocket2.CONNECTING) {
|
|
1987
|
+
await new Promise((resolve) => {
|
|
1988
|
+
const done = () => {
|
|
1989
|
+
ws.off("close", done);
|
|
1990
|
+
ws.off("error", done);
|
|
1991
|
+
resolve();
|
|
1992
|
+
};
|
|
1993
|
+
ws.once("close", done);
|
|
1994
|
+
ws.once("error", done);
|
|
1995
|
+
try {
|
|
1996
|
+
ws.close();
|
|
1997
|
+
} catch {
|
|
1998
|
+
resolve();
|
|
1999
|
+
}
|
|
2000
|
+
});
|
|
2001
|
+
}
|
|
2002
|
+
}
|
|
1798
2003
|
};
|
|
1799
2004
|
|
|
1800
2005
|
// src/stt/cartesia.ts
|
|
1801
|
-
var
|
|
2006
|
+
var STT4 = class extends CartesiaSTT {
|
|
2007
|
+
static providerKey = "cartesia_stt";
|
|
1802
2008
|
constructor(opts = {}) {
|
|
1803
2009
|
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
1804
2010
|
if (!key) {
|
|
@@ -1817,7 +2023,7 @@ var STT3 = class extends CartesiaSTT {
|
|
|
1817
2023
|
};
|
|
1818
2024
|
|
|
1819
2025
|
// src/providers/soniox-stt.ts
|
|
1820
|
-
import
|
|
2026
|
+
import WebSocket3 from "ws";
|
|
1821
2027
|
var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
1822
2028
|
var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
|
|
1823
2029
|
var END_TOKEN = "<end>";
|
|
@@ -1913,7 +2119,8 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1913
2119
|
return config;
|
|
1914
2120
|
}
|
|
1915
2121
|
async connect() {
|
|
1916
|
-
this.
|
|
2122
|
+
this.final.reset();
|
|
2123
|
+
this.ws = new WebSocket3(this.baseUrl);
|
|
1917
2124
|
await new Promise((resolve, reject) => {
|
|
1918
2125
|
const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
|
|
1919
2126
|
this.ws.once("open", () => {
|
|
@@ -1932,7 +2139,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1932
2139
|
getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
|
|
1933
2140
|
});
|
|
1934
2141
|
this.keepaliveTimer = setInterval(() => {
|
|
1935
|
-
if (this.ws && this.ws.readyState ===
|
|
2142
|
+
if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
|
|
1936
2143
|
try {
|
|
1937
2144
|
this.ws.send(KEEPALIVE_MESSAGE);
|
|
1938
2145
|
} catch {
|
|
@@ -2005,7 +2212,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2005
2212
|
}
|
|
2006
2213
|
}
|
|
2007
2214
|
sendAudio(audio) {
|
|
2008
|
-
if (!this.ws || this.ws.readyState !==
|
|
2215
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
2009
2216
|
if (audio.length === 0) return;
|
|
2010
2217
|
this.ws.send(audio);
|
|
2011
2218
|
}
|
|
@@ -2036,7 +2243,8 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2036
2243
|
};
|
|
2037
2244
|
|
|
2038
2245
|
// src/stt/soniox.ts
|
|
2039
|
-
var
|
|
2246
|
+
var STT5 = class extends SonioxSTT {
|
|
2247
|
+
static providerKey = "soniox";
|
|
2040
2248
|
constructor(opts = {}) {
|
|
2041
2249
|
const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
|
|
2042
2250
|
if (!key) {
|
|
@@ -2051,11 +2259,21 @@ var STT4 = class extends SonioxSTT {
|
|
|
2051
2259
|
};
|
|
2052
2260
|
|
|
2053
2261
|
// src/providers/assemblyai-stt.ts
|
|
2054
|
-
import
|
|
2262
|
+
import WebSocket4 from "ws";
|
|
2055
2263
|
var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
|
|
2056
|
-
var DEFAULT_MIN_TURN_SILENCE_MS =
|
|
2264
|
+
var DEFAULT_MIN_TURN_SILENCE_MS = 400;
|
|
2057
2265
|
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
2058
|
-
var
|
|
2266
|
+
var TERMINATION_WAIT_TIMEOUT_MS = 500;
|
|
2267
|
+
var MIN_CHUNK_DURATION_MS = 50;
|
|
2268
|
+
var MAX_CHUNK_DURATION_MS = 1e3;
|
|
2269
|
+
var RECONNECT_ERROR_CODES = /* @__PURE__ */ new Set([3005, 3008]);
|
|
2270
|
+
var VALID_DOMAINS = /* @__PURE__ */ new Set(["general", "medical-v1"]);
|
|
2271
|
+
var AssemblyAISTTNotConnectedError = class extends Error {
|
|
2272
|
+
constructor(message = "AssemblyAISTT is not connected") {
|
|
2273
|
+
super(message);
|
|
2274
|
+
this.name = "AssemblyAISTTNotConnectedError";
|
|
2275
|
+
}
|
|
2276
|
+
};
|
|
2059
2277
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
2060
2278
|
constructor(apiKey, options = {}) {
|
|
2061
2279
|
this.apiKey = apiKey;
|
|
@@ -2063,13 +2281,24 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2063
2281
|
if (!apiKey) {
|
|
2064
2282
|
throw new Error("AssemblyAISTT requires a non-empty apiKey");
|
|
2065
2283
|
}
|
|
2284
|
+
if (options.domain !== void 0 && !VALID_DOMAINS.has(options.domain)) {
|
|
2285
|
+
const hint = options.domain === "medical" ? ' \u2014 did you mean "medical-v1"?' : "";
|
|
2286
|
+
throw new Error(
|
|
2287
|
+
`AssemblyAISTT: invalid domain "${options.domain}"; expected one of [${Array.from(
|
|
2288
|
+
VALID_DOMAINS
|
|
2289
|
+
).map((d) => `"${d}"`).join(", ")}]${hint}`
|
|
2290
|
+
);
|
|
2291
|
+
}
|
|
2066
2292
|
}
|
|
2067
2293
|
ws = null;
|
|
2068
|
-
callbacks =
|
|
2294
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
2295
|
+
closing = false;
|
|
2296
|
+
reconnectAttempts = 0;
|
|
2297
|
+
terminationResolve = null;
|
|
2069
2298
|
/** AssemblyAI session id — set when the `Begin` message arrives. */
|
|
2070
|
-
sessionId =
|
|
2299
|
+
sessionId = null;
|
|
2071
2300
|
/** Unix timestamp when the AssemblyAI session expires. */
|
|
2072
|
-
expiresAt =
|
|
2301
|
+
expiresAt = null;
|
|
2073
2302
|
/** Factory for Twilio calls — mulaw 8 kHz. */
|
|
2074
2303
|
static forTwilio(apiKey, model = "universal-streaming-english") {
|
|
2075
2304
|
return new _AssemblyAISTT(apiKey, {
|
|
@@ -2104,11 +2333,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2104
2333
|
keyterms_prompt: opts.keytermsPrompt ? JSON.stringify(opts.keytermsPrompt) : void 0,
|
|
2105
2334
|
language_detection: languageDetection,
|
|
2106
2335
|
prompt: opts.prompt,
|
|
2107
|
-
vad_threshold
|
|
2336
|
+
// vad_threshold intentionally omitted — not a valid v3 parameter.
|
|
2108
2337
|
speaker_labels: opts.speakerLabels,
|
|
2109
2338
|
max_speakers: opts.maxSpeakers,
|
|
2110
2339
|
domain: opts.domain
|
|
2111
2340
|
};
|
|
2341
|
+
if (opts.useQueryToken) {
|
|
2342
|
+
raw.token = this.apiKey;
|
|
2343
|
+
}
|
|
2112
2344
|
const params = new URLSearchParams();
|
|
2113
2345
|
for (const [key, value] of Object.entries(raw)) {
|
|
2114
2346
|
if (value === void 0 || value === null) continue;
|
|
@@ -2121,30 +2353,41 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2121
2353
|
const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
2122
2354
|
return `${base}/v3/ws?${params.toString()}`;
|
|
2123
2355
|
}
|
|
2356
|
+
buildHeaders() {
|
|
2357
|
+
const headers = {
|
|
2358
|
+
"Content-Type": "application/json",
|
|
2359
|
+
"User-Agent": "Patter/1.0"
|
|
2360
|
+
};
|
|
2361
|
+
if (!this.options.useQueryToken) {
|
|
2362
|
+
headers.Authorization = this.apiKey;
|
|
2363
|
+
}
|
|
2364
|
+
return headers;
|
|
2365
|
+
}
|
|
2124
2366
|
async connect() {
|
|
2367
|
+
this.closing = false;
|
|
2125
2368
|
const url = this.buildUrl();
|
|
2126
|
-
this.ws = new
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
}
|
|
2132
|
-
});
|
|
2369
|
+
this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
|
|
2370
|
+
await this.awaitOpen(this.ws);
|
|
2371
|
+
this.attachHandlers(this.ws);
|
|
2372
|
+
}
|
|
2373
|
+
async awaitOpen(ws) {
|
|
2133
2374
|
await new Promise((resolve, reject) => {
|
|
2134
2375
|
const timer = setTimeout(
|
|
2135
2376
|
() => reject(new Error("AssemblyAI connect timeout")),
|
|
2136
2377
|
CONNECT_TIMEOUT_MS2
|
|
2137
2378
|
);
|
|
2138
|
-
|
|
2379
|
+
ws.once("open", () => {
|
|
2139
2380
|
clearTimeout(timer);
|
|
2140
2381
|
resolve();
|
|
2141
2382
|
});
|
|
2142
|
-
|
|
2383
|
+
ws.once("error", (err) => {
|
|
2143
2384
|
clearTimeout(timer);
|
|
2144
2385
|
reject(err);
|
|
2145
2386
|
});
|
|
2146
2387
|
});
|
|
2147
|
-
|
|
2388
|
+
}
|
|
2389
|
+
attachHandlers(ws) {
|
|
2390
|
+
ws.on("message", (raw) => {
|
|
2148
2391
|
let event;
|
|
2149
2392
|
try {
|
|
2150
2393
|
event = JSON.parse(raw.toString());
|
|
@@ -2153,12 +2396,45 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2153
2396
|
}
|
|
2154
2397
|
this.handleEvent(event);
|
|
2155
2398
|
});
|
|
2399
|
+
ws.on("close", (code) => {
|
|
2400
|
+
if (!this.closing && RECONNECT_ERROR_CODES.has(code) && this.reconnectAttempts < 1) {
|
|
2401
|
+
this.reconnectAttempts += 1;
|
|
2402
|
+
getLogger().warn(
|
|
2403
|
+
`AssemblyAISTT: close code ${code} \u2014 attempting single reconnect.`
|
|
2404
|
+
);
|
|
2405
|
+
this.reconnect().catch((err) => {
|
|
2406
|
+
getLogger().error("AssemblyAISTT reconnect failed", err);
|
|
2407
|
+
});
|
|
2408
|
+
}
|
|
2409
|
+
});
|
|
2410
|
+
}
|
|
2411
|
+
async reconnect() {
|
|
2412
|
+
const url = this.buildUrl();
|
|
2413
|
+
this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
|
|
2414
|
+
await this.awaitOpen(this.ws);
|
|
2415
|
+
this.attachHandlers(this.ws);
|
|
2156
2416
|
}
|
|
2157
2417
|
handleEvent(event) {
|
|
2158
2418
|
const type = event.type;
|
|
2159
2419
|
if (type === "Begin") {
|
|
2160
|
-
this.sessionId = event.id ??
|
|
2161
|
-
this.expiresAt = event.expires_at ??
|
|
2420
|
+
this.sessionId = event.id ?? null;
|
|
2421
|
+
this.expiresAt = event.expires_at ?? null;
|
|
2422
|
+
return;
|
|
2423
|
+
}
|
|
2424
|
+
if (type === "Termination") {
|
|
2425
|
+
if (this.terminationResolve) {
|
|
2426
|
+
this.terminationResolve();
|
|
2427
|
+
this.terminationResolve = null;
|
|
2428
|
+
}
|
|
2429
|
+
return;
|
|
2430
|
+
}
|
|
2431
|
+
if (type === "SpeechStarted") {
|
|
2432
|
+
this.emit({
|
|
2433
|
+
text: "",
|
|
2434
|
+
isFinal: false,
|
|
2435
|
+
confidence: 0,
|
|
2436
|
+
eventType: "SpeechStarted"
|
|
2437
|
+
});
|
|
2162
2438
|
return;
|
|
2163
2439
|
}
|
|
2164
2440
|
if (type !== "Turn") {
|
|
@@ -2193,28 +2469,89 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2193
2469
|
}
|
|
2194
2470
|
}
|
|
2195
2471
|
sendAudio(audio) {
|
|
2196
|
-
if (!this.ws || this.ws.readyState !==
|
|
2472
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
|
|
2473
|
+
throw new AssemblyAISTTNotConnectedError(
|
|
2474
|
+
"AssemblyAISTT.sendAudio: WebSocket is not open"
|
|
2475
|
+
);
|
|
2476
|
+
}
|
|
2477
|
+
const durationMs = this.estimateChunkDurationMs(audio.length);
|
|
2478
|
+
if (durationMs !== null && (durationMs < MIN_CHUNK_DURATION_MS || durationMs > MAX_CHUNK_DURATION_MS)) {
|
|
2479
|
+
getLogger().warn(
|
|
2480
|
+
`AssemblyAISTT: audio chunk duration ${durationMs.toFixed(1)}ms outside 50-1000ms bounds (may trigger error 3007).`
|
|
2481
|
+
);
|
|
2482
|
+
}
|
|
2197
2483
|
this.ws.send(audio);
|
|
2198
2484
|
}
|
|
2199
|
-
|
|
2200
|
-
if (
|
|
2201
|
-
|
|
2202
|
-
|
|
2485
|
+
estimateChunkDurationMs(byteLength) {
|
|
2486
|
+
if (byteLength <= 0) return null;
|
|
2487
|
+
const sampleRate = this.options.sampleRate ?? 16e3;
|
|
2488
|
+
if (sampleRate <= 0) return null;
|
|
2489
|
+
const bytesPerSample = (this.options.encoding ?? "pcm_s16le") === "pcm_s16le" ? 2 : 1;
|
|
2490
|
+
const samples = byteLength / bytesPerSample;
|
|
2491
|
+
return samples / sampleRate * 1e3;
|
|
2492
|
+
}
|
|
2493
|
+
/**
|
|
2494
|
+
* Send an `UpdateConfiguration` frame to change settings mid-stream.
|
|
2495
|
+
* Only defined fields are included.
|
|
2496
|
+
*/
|
|
2497
|
+
updateConfiguration(params) {
|
|
2498
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
|
|
2499
|
+
throw new AssemblyAISTTNotConnectedError(
|
|
2500
|
+
"AssemblyAISTT.updateConfiguration: WebSocket is not open"
|
|
2203
2501
|
);
|
|
2204
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
2205
|
-
return;
|
|
2206
2502
|
}
|
|
2207
|
-
|
|
2503
|
+
const payload = { type: "UpdateConfiguration" };
|
|
2504
|
+
if (params.keytermsPrompt !== void 0) {
|
|
2505
|
+
payload.keyterms_prompt = JSON.stringify(params.keytermsPrompt);
|
|
2506
|
+
}
|
|
2507
|
+
if (params.prompt !== void 0) {
|
|
2508
|
+
payload.prompt = params.prompt;
|
|
2509
|
+
}
|
|
2510
|
+
if (params.minTurnSilence !== void 0) {
|
|
2511
|
+
payload.min_turn_silence = params.minTurnSilence;
|
|
2512
|
+
}
|
|
2513
|
+
if (params.maxTurnSilence !== void 0) {
|
|
2514
|
+
payload.max_turn_silence = params.maxTurnSilence;
|
|
2515
|
+
}
|
|
2516
|
+
this.ws.send(JSON.stringify(payload));
|
|
2208
2517
|
}
|
|
2209
|
-
|
|
2210
|
-
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2518
|
+
/** Force the server to finalize the current turn (for barge-in). */
|
|
2519
|
+
forceEndpoint() {
|
|
2520
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
|
|
2521
|
+
throw new AssemblyAISTTNotConnectedError(
|
|
2522
|
+
"AssemblyAISTT.forceEndpoint: WebSocket is not open"
|
|
2523
|
+
);
|
|
2524
|
+
}
|
|
2525
|
+
this.ws.send(JSON.stringify({ type: "ForceEndpoint" }));
|
|
2526
|
+
}
|
|
2527
|
+
onTranscript(callback) {
|
|
2528
|
+
this.callbacks.add(callback);
|
|
2529
|
+
return () => {
|
|
2530
|
+
this.callbacks.delete(callback);
|
|
2531
|
+
};
|
|
2532
|
+
}
|
|
2533
|
+
async close() {
|
|
2534
|
+
this.closing = true;
|
|
2535
|
+
if (!this.ws) return;
|
|
2536
|
+
try {
|
|
2537
|
+
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
2538
|
+
} catch {
|
|
2539
|
+
}
|
|
2540
|
+
await new Promise((resolve) => {
|
|
2541
|
+
const timer = setTimeout(() => {
|
|
2542
|
+
this.terminationResolve = null;
|
|
2543
|
+
resolve();
|
|
2544
|
+
}, TERMINATION_WAIT_TIMEOUT_MS);
|
|
2545
|
+
this.terminationResolve = () => {
|
|
2546
|
+
clearTimeout(timer);
|
|
2547
|
+
resolve();
|
|
2548
|
+
};
|
|
2549
|
+
});
|
|
2550
|
+
try {
|
|
2215
2551
|
this.ws.close();
|
|
2216
|
-
|
|
2552
|
+
} catch {
|
|
2217
2553
|
}
|
|
2554
|
+
this.ws = null;
|
|
2218
2555
|
}
|
|
2219
2556
|
};
|
|
2220
2557
|
function averageConfidence(words) {
|
|
@@ -2227,7 +2564,8 @@ function averageConfidence(words) {
|
|
|
2227
2564
|
}
|
|
2228
2565
|
|
|
2229
2566
|
// src/stt/assemblyai.ts
|
|
2230
|
-
var
|
|
2567
|
+
var STT6 = class extends AssemblyAISTT {
|
|
2568
|
+
static providerKey = "assemblyai";
|
|
2231
2569
|
constructor(opts = {}) {
|
|
2232
2570
|
const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
|
|
2233
2571
|
if (!key) {
|
|
@@ -2289,7 +2627,8 @@ var ELEVENLABS_VOICE_ID_BY_NAME = {
|
|
|
2289
2627
|
glinda: "z9fAnlkpzviPz146aGWa",
|
|
2290
2628
|
giovanni: "zcAOhNBS3c14rBihAFp1",
|
|
2291
2629
|
mimi: "zrHiDhphv9ZnVXBqCLjz",
|
|
2292
|
-
|
|
2630
|
+
sarah: "EXAVITQu4vr4xnSDxMaL",
|
|
2631
|
+
alloy: "EXAVITQu4vr4xnSDxMaL"
|
|
2293
2632
|
};
|
|
2294
2633
|
var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
|
|
2295
2634
|
function resolveVoiceId(voice) {
|
|
@@ -2297,14 +2636,78 @@ function resolveVoiceId(voice) {
|
|
|
2297
2636
|
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
2298
2637
|
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
2299
2638
|
}
|
|
2300
|
-
var ElevenLabsTTS = class {
|
|
2301
|
-
|
|
2639
|
+
var ElevenLabsTTS = class _ElevenLabsTTS {
|
|
2640
|
+
apiKey;
|
|
2641
|
+
voiceId;
|
|
2642
|
+
modelId;
|
|
2643
|
+
outputFormat;
|
|
2644
|
+
voiceSettings;
|
|
2645
|
+
languageCode;
|
|
2646
|
+
chunkSize;
|
|
2647
|
+
constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_flash_v2_5", outputFormat = "pcm_16000") {
|
|
2302
2648
|
this.apiKey = apiKey;
|
|
2303
|
-
|
|
2304
|
-
|
|
2305
|
-
|
|
2649
|
+
if (typeof voiceIdOrOptions === "object") {
|
|
2650
|
+
const o = voiceIdOrOptions;
|
|
2651
|
+
this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
|
|
2652
|
+
this.modelId = o.modelId ?? "eleven_flash_v2_5";
|
|
2653
|
+
this.outputFormat = o.outputFormat ?? "pcm_16000";
|
|
2654
|
+
this.voiceSettings = o.voiceSettings;
|
|
2655
|
+
this.languageCode = o.languageCode;
|
|
2656
|
+
this.chunkSize = o.chunkSize ?? 4096;
|
|
2657
|
+
} else {
|
|
2658
|
+
this.voiceId = resolveVoiceId(voiceIdOrOptions);
|
|
2659
|
+
this.modelId = modelId;
|
|
2660
|
+
this.outputFormat = outputFormat;
|
|
2661
|
+
this.voiceSettings = void 0;
|
|
2662
|
+
this.languageCode = void 0;
|
|
2663
|
+
this.chunkSize = 4096;
|
|
2664
|
+
}
|
|
2665
|
+
}
|
|
2666
|
+
/**
|
|
2667
|
+
* Construct an instance pre-configured for Twilio Media Streams.
|
|
2668
|
+
*
|
|
2669
|
+
* Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
|
|
2670
|
+
* directly — the exact wire format Twilio's media stream uses — letting
|
|
2671
|
+
* the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
|
|
2672
|
+
* `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
|
|
2673
|
+
* and removes a potential aliasing source.
|
|
2674
|
+
*
|
|
2675
|
+
* `voiceSettings` defaults to a low-bandwidth-friendly profile
|
|
2676
|
+
* (speaker boost off, modest stability) which sounds cleaner at 8 kHz
|
|
2677
|
+
* μ-law than the studio default. Pass an explicit object to override.
|
|
2678
|
+
*/
|
|
2679
|
+
static forTwilio(apiKey, options = {}) {
|
|
2680
|
+
const voiceSettings = options.voiceSettings ?? {
|
|
2681
|
+
// Speaker boost adds high-frequency emphasis that aliases ugly over an
|
|
2682
|
+
// 8 kHz μ-law line. Slightly higher stability tames the excursions
|
|
2683
|
+
// that compander quantization noise can amplify.
|
|
2684
|
+
stability: 0.6,
|
|
2685
|
+
similarity_boost: 0.75,
|
|
2686
|
+
use_speaker_boost: false
|
|
2687
|
+
};
|
|
2688
|
+
return new _ElevenLabsTTS(apiKey, {
|
|
2689
|
+
...options,
|
|
2690
|
+
voiceSettings,
|
|
2691
|
+
outputFormat: "ulaw_8000"
|
|
2692
|
+
});
|
|
2693
|
+
}
|
|
2694
|
+
/**
|
|
2695
|
+
* Construct an instance pre-configured for Telnyx bidirectional media.
|
|
2696
|
+
*
|
|
2697
|
+
* Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
|
|
2698
|
+
* matches our default Telnyx handler. We pick `pcm_16000` so the audio
|
|
2699
|
+
* flows end-to-end with zero resampling or transcoding.
|
|
2700
|
+
*
|
|
2701
|
+
* Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
|
|
2702
|
+
* construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
|
|
2703
|
+
* — Telnyx supports that natively too.
|
|
2704
|
+
*/
|
|
2705
|
+
static forTelnyx(apiKey, options = {}) {
|
|
2706
|
+
return new _ElevenLabsTTS(apiKey, {
|
|
2707
|
+
...options,
|
|
2708
|
+
outputFormat: "pcm_16000"
|
|
2709
|
+
});
|
|
2306
2710
|
}
|
|
2307
|
-
voiceId;
|
|
2308
2711
|
/**
|
|
2309
2712
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2310
2713
|
*
|
|
@@ -2321,22 +2724,29 @@ var ElevenLabsTTS = class {
|
|
|
2321
2724
|
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2322
2725
|
*
|
|
2323
2726
|
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
2324
|
-
* configured to).
|
|
2727
|
+
* configured to). `chunkSize` controls the maximum yield size — 512 is a
|
|
2728
|
+
* good choice for low-latency telephony.
|
|
2325
2729
|
*/
|
|
2326
2730
|
async *synthesizeStream(text) {
|
|
2327
2731
|
const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
|
|
2732
|
+
const body = {
|
|
2733
|
+
text,
|
|
2734
|
+
model_id: this.modelId
|
|
2735
|
+
};
|
|
2736
|
+
if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
|
|
2737
|
+
if (this.languageCode) body["language_code"] = this.languageCode;
|
|
2328
2738
|
const response = await fetch(url, {
|
|
2329
2739
|
method: "POST",
|
|
2330
2740
|
headers: {
|
|
2331
2741
|
"xi-api-key": this.apiKey,
|
|
2332
2742
|
"Content-Type": "application/json"
|
|
2333
2743
|
},
|
|
2334
|
-
body: JSON.stringify(
|
|
2744
|
+
body: JSON.stringify(body),
|
|
2335
2745
|
signal: AbortSignal.timeout(3e4)
|
|
2336
2746
|
});
|
|
2337
2747
|
if (!response.ok) {
|
|
2338
|
-
const
|
|
2339
|
-
throw new Error(`ElevenLabs TTS error ${response.status}: ${
|
|
2748
|
+
const errBody = await response.text();
|
|
2749
|
+
throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
|
|
2340
2750
|
}
|
|
2341
2751
|
if (!response.body) {
|
|
2342
2752
|
throw new Error("ElevenLabs TTS: no response body");
|
|
@@ -2346,8 +2756,10 @@ var ElevenLabsTTS = class {
|
|
|
2346
2756
|
while (true) {
|
|
2347
2757
|
const { done, value } = await reader.read();
|
|
2348
2758
|
if (done) break;
|
|
2349
|
-
if (value
|
|
2350
|
-
|
|
2759
|
+
if (!value || value.length === 0) continue;
|
|
2760
|
+
const buf = Buffer.from(value);
|
|
2761
|
+
for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
|
|
2762
|
+
yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
|
|
2351
2763
|
}
|
|
2352
2764
|
}
|
|
2353
2765
|
} finally {
|
|
@@ -2359,30 +2771,50 @@ var ElevenLabsTTS = class {
|
|
|
2359
2771
|
};
|
|
2360
2772
|
|
|
2361
2773
|
// src/tts/elevenlabs.ts
|
|
2362
|
-
|
|
2363
|
-
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2774
|
+
function resolveApiKey(apiKey) {
|
|
2775
|
+
const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
2776
|
+
if (!key) {
|
|
2777
|
+
throw new Error(
|
|
2778
|
+
"ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
2779
|
+
);
|
|
2780
|
+
}
|
|
2781
|
+
return key;
|
|
2782
|
+
}
|
|
2783
|
+
var TTS = class _TTS extends ElevenLabsTTS {
|
|
2784
|
+
static providerKey = "elevenlabs";
|
|
2785
|
+
constructor(opts = {}) {
|
|
2370
2786
|
super(
|
|
2371
|
-
|
|
2372
|
-
opts.voiceId ?? "
|
|
2373
|
-
opts.modelId ?? "
|
|
2787
|
+
resolveApiKey(opts.apiKey),
|
|
2788
|
+
opts.voiceId ?? "EXAVITQu4vr4xnSDxMaL",
|
|
2789
|
+
opts.modelId ?? "eleven_flash_v2_5",
|
|
2374
2790
|
opts.outputFormat ?? "pcm_16000"
|
|
2375
2791
|
);
|
|
2376
2792
|
}
|
|
2793
|
+
static forTwilio(arg1, arg2) {
|
|
2794
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
2795
|
+
return new _TTS({ ...opts, outputFormat: "ulaw_8000" });
|
|
2796
|
+
}
|
|
2797
|
+
static forTelnyx(arg1, arg2) {
|
|
2798
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
2799
|
+
return new _TTS({ ...opts, outputFormat: "pcm_16000" });
|
|
2800
|
+
}
|
|
2377
2801
|
};
|
|
2378
2802
|
|
|
2379
2803
|
// src/providers/openai-tts.ts
|
|
2380
2804
|
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
2805
|
+
var INSTRUCTIONS_PREFIX = "gpt-4o-mini-tts";
|
|
2806
|
+
var LPF_ALPHA = 0.78;
|
|
2381
2807
|
var OpenAITTS = class _OpenAITTS {
|
|
2382
|
-
constructor(apiKey, voice = "alloy", model = "tts
|
|
2808
|
+
constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true) {
|
|
2383
2809
|
this.apiKey = apiKey;
|
|
2384
2810
|
this.voice = voice;
|
|
2385
2811
|
this.model = model;
|
|
2812
|
+
this.instructions = instructions;
|
|
2813
|
+
this.speed = speed;
|
|
2814
|
+
this.antiAlias = antiAlias;
|
|
2815
|
+
if (speed !== null && speed !== void 0 && (speed < 0.25 || speed > 4)) {
|
|
2816
|
+
throw new Error("OpenAITTS: speed must be in [0.25, 4.0]");
|
|
2817
|
+
}
|
|
2386
2818
|
}
|
|
2387
2819
|
/**
|
|
2388
2820
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
@@ -2399,37 +2831,48 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2399
2831
|
/**
|
|
2400
2832
|
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2401
2833
|
*
|
|
2402
|
-
* OpenAI returns 24 kHz PCM16; each chunk is
|
|
2403
|
-
* yielding so the output is ready for
|
|
2834
|
+
* OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
|
|
2835
|
+
* decimated 3:2 to 16 kHz before yielding so the output is ready for
|
|
2836
|
+
* telephony pipelines.
|
|
2404
2837
|
*
|
|
2405
|
-
* The resampler carries state (buffered samples + odd
|
|
2406
|
-
* between chunks
|
|
2407
|
-
*
|
|
2408
|
-
* Python `audioop.ratecv` fix).
|
|
2838
|
+
* The resampler carries state (filter memory + buffered samples + odd
|
|
2839
|
+
* trailing byte) between chunks so cross-chunk sample alignment and
|
|
2840
|
+
* filter phase don't reset on every network read.
|
|
2409
2841
|
*/
|
|
2410
2842
|
async *synthesizeStream(text) {
|
|
2843
|
+
const body = {
|
|
2844
|
+
model: this.model,
|
|
2845
|
+
input: text,
|
|
2846
|
+
voice: this.voice,
|
|
2847
|
+
response_format: "pcm"
|
|
2848
|
+
};
|
|
2849
|
+
if (this.instructions !== null && this.model.startsWith(INSTRUCTIONS_PREFIX)) {
|
|
2850
|
+
body.instructions = this.instructions;
|
|
2851
|
+
}
|
|
2852
|
+
if (this.speed !== null) {
|
|
2853
|
+
body.speed = this.speed;
|
|
2854
|
+
}
|
|
2411
2855
|
const response = await fetch(OPENAI_TTS_URL, {
|
|
2412
2856
|
method: "POST",
|
|
2413
2857
|
headers: {
|
|
2414
2858
|
"Authorization": `Bearer ${this.apiKey}`,
|
|
2415
2859
|
"Content-Type": "application/json"
|
|
2416
2860
|
},
|
|
2417
|
-
body: JSON.stringify(
|
|
2418
|
-
model: this.model,
|
|
2419
|
-
input: text,
|
|
2420
|
-
voice: this.voice,
|
|
2421
|
-
response_format: "pcm"
|
|
2422
|
-
}),
|
|
2423
|
-
signal: AbortSignal.timeout(3e4)
|
|
2861
|
+
body: JSON.stringify(body)
|
|
2424
2862
|
});
|
|
2425
2863
|
if (!response.ok) {
|
|
2426
|
-
const
|
|
2427
|
-
throw new Error(`OpenAI TTS error ${response.status}: ${
|
|
2864
|
+
const errBody = await response.text();
|
|
2865
|
+
throw new Error(`OpenAI TTS error ${response.status}: ${errBody}`);
|
|
2428
2866
|
}
|
|
2429
2867
|
if (!response.body) {
|
|
2430
2868
|
throw new Error("OpenAI TTS: no response body");
|
|
2431
2869
|
}
|
|
2432
|
-
const ctx = {
|
|
2870
|
+
const ctx = {
|
|
2871
|
+
carryByte: null,
|
|
2872
|
+
leftover: [],
|
|
2873
|
+
lpfPrev: 0,
|
|
2874
|
+
lpfEnabled: this.antiAlias
|
|
2875
|
+
};
|
|
2433
2876
|
const reader = response.body.getReader();
|
|
2434
2877
|
try {
|
|
2435
2878
|
while (true) {
|
|
@@ -2454,8 +2897,14 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2454
2897
|
}
|
|
2455
2898
|
}
|
|
2456
2899
|
/**
|
|
2457
|
-
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE).
|
|
2458
|
-
*
|
|
2900
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
|
|
2901
|
+
* lowpass ahead of the 3:2 decimation and carries filter + sample state
|
|
2902
|
+
* across chunks so the cadence doesn't reset at every network read.
|
|
2903
|
+
*
|
|
2904
|
+
* ``ctx.lpfEnabled`` (default true on the streaming path, false for the
|
|
2905
|
+
* legacy static helper) controls whether the LPF is engaged — we keep
|
|
2906
|
+
* the helper bit-exact for the downsample-only tests while the real
|
|
2907
|
+
* streaming path gets anti-alias filtering.
|
|
2459
2908
|
*/
|
|
2460
2909
|
static resampleStreaming(audio, ctx) {
|
|
2461
2910
|
let buf;
|
|
@@ -2474,14 +2923,26 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2474
2923
|
}
|
|
2475
2924
|
const sampleCount = buf.length / 2;
|
|
2476
2925
|
const samples = ctx.leftover.slice();
|
|
2926
|
+
const lpf = ctx.lpfEnabled !== false;
|
|
2927
|
+
let y = ctx.lpfPrev;
|
|
2477
2928
|
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2478
|
-
|
|
2929
|
+
const x = buf.readInt16LE(i2 * 2);
|
|
2930
|
+
if (lpf) {
|
|
2931
|
+
y = LPF_ALPHA * x + (1 - LPF_ALPHA) * y;
|
|
2932
|
+
let s = Math.round(y);
|
|
2933
|
+
if (s > 32767) s = 32767;
|
|
2934
|
+
else if (s < -32768) s = -32768;
|
|
2935
|
+
samples.push(s);
|
|
2936
|
+
} else {
|
|
2937
|
+
samples.push(x);
|
|
2938
|
+
}
|
|
2479
2939
|
}
|
|
2940
|
+
if (lpf) ctx.lpfPrev = y;
|
|
2480
2941
|
const out = [];
|
|
2481
2942
|
let i = 0;
|
|
2482
2943
|
while (i + 2 < samples.length) {
|
|
2483
2944
|
out.push(samples[i]);
|
|
2484
|
-
out.push(Math.
|
|
2945
|
+
out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
|
|
2485
2946
|
i += 3;
|
|
2486
2947
|
}
|
|
2487
2948
|
ctx.leftover = samples.slice(i);
|
|
@@ -2493,7 +2954,7 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2493
2954
|
}
|
|
2494
2955
|
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
2495
2956
|
static resample24kTo16k(audio) {
|
|
2496
|
-
const ctx = { carryByte: null, leftover: [] };
|
|
2957
|
+
const ctx = { carryByte: null, leftover: [], lpfPrev: 0, lpfEnabled: false };
|
|
2497
2958
|
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
2498
2959
|
if (ctx.leftover.length === 0) return out;
|
|
2499
2960
|
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
@@ -2506,6 +2967,7 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2506
2967
|
|
|
2507
2968
|
// src/tts/openai.ts
|
|
2508
2969
|
var TTS2 = class extends OpenAITTS {
|
|
2970
|
+
static providerKey = "openai_tts";
|
|
2509
2971
|
constructor(opts = {}) {
|
|
2510
2972
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
2511
2973
|
if (!key) {
|
|
@@ -2513,15 +2975,22 @@ var TTS2 = class extends OpenAITTS {
|
|
|
2513
2975
|
"OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
2514
2976
|
);
|
|
2515
2977
|
}
|
|
2516
|
-
super(
|
|
2978
|
+
super(
|
|
2979
|
+
key,
|
|
2980
|
+
opts.voice ?? "alloy",
|
|
2981
|
+
opts.model ?? "gpt-4o-mini-tts",
|
|
2982
|
+
opts.instructions ?? null,
|
|
2983
|
+
opts.speed ?? null,
|
|
2984
|
+
opts.antiAlias ?? false
|
|
2985
|
+
);
|
|
2517
2986
|
}
|
|
2518
2987
|
};
|
|
2519
2988
|
|
|
2520
2989
|
// src/providers/cartesia-tts.ts
|
|
2521
2990
|
var CARTESIA_BASE_URL = "https://api.cartesia.ai";
|
|
2522
|
-
var CARTESIA_API_VERSION = "
|
|
2991
|
+
var CARTESIA_API_VERSION = "2025-04-16";
|
|
2523
2992
|
var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
2524
|
-
var CartesiaTTS = class {
|
|
2993
|
+
var CartesiaTTS = class _CartesiaTTS {
|
|
2525
2994
|
apiKey;
|
|
2526
2995
|
model;
|
|
2527
2996
|
voice;
|
|
@@ -2534,7 +3003,7 @@ var CartesiaTTS = class {
|
|
|
2534
3003
|
apiVersion;
|
|
2535
3004
|
constructor(apiKey, opts = {}) {
|
|
2536
3005
|
this.apiKey = apiKey;
|
|
2537
|
-
this.model = opts.model ?? "sonic-
|
|
3006
|
+
this.model = opts.model ?? "sonic-3";
|
|
2538
3007
|
this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
|
|
2539
3008
|
this.language = opts.language ?? "en";
|
|
2540
3009
|
this.sampleRate = opts.sampleRate ?? 16e3;
|
|
@@ -2544,6 +3013,29 @@ var CartesiaTTS = class {
|
|
|
2544
3013
|
this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
|
|
2545
3014
|
this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
|
|
2546
3015
|
}
|
|
3016
|
+
/**
|
|
3017
|
+
* Construct an instance pre-configured for Twilio Media Streams.
|
|
3018
|
+
*
|
|
3019
|
+
* Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
|
|
3020
|
+
* Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
|
|
3021
|
+
* PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
|
|
3022
|
+
* step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
|
|
3023
|
+
* removes a potential aliasing source.
|
|
3024
|
+
*/
|
|
3025
|
+
static forTwilio(apiKey, options = {}) {
|
|
3026
|
+
return new _CartesiaTTS(apiKey, { ...options, sampleRate: 8e3 });
|
|
3027
|
+
}
|
|
3028
|
+
/**
|
|
3029
|
+
* Construct an instance pre-configured for Telnyx bidirectional media.
|
|
3030
|
+
*
|
|
3031
|
+
* Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
|
|
3032
|
+
* audio flows end-to-end with zero resampling or transcoding. Same as
|
|
3033
|
+
* the bare-constructor default; exists for API symmetry with
|
|
3034
|
+
* {@link CartesiaTTS.forTwilio}.
|
|
3035
|
+
*/
|
|
3036
|
+
static forTelnyx(apiKey, options = {}) {
|
|
3037
|
+
return new _CartesiaTTS(apiKey, { ...options, sampleRate: 16e3 });
|
|
3038
|
+
}
|
|
2547
3039
|
/** Build the JSON payload for the Cartesia bytes endpoint. */
|
|
2548
3040
|
buildPayload(text) {
|
|
2549
3041
|
const payload = {
|
|
@@ -2616,18 +3108,31 @@ var CartesiaTTS = class {
|
|
|
2616
3108
|
};
|
|
2617
3109
|
|
|
2618
3110
|
// src/tts/cartesia.ts
|
|
2619
|
-
|
|
3111
|
+
function resolveApiKey2(apiKey) {
|
|
3112
|
+
const key = apiKey ?? process.env.CARTESIA_API_KEY;
|
|
3113
|
+
if (!key) {
|
|
3114
|
+
throw new Error(
|
|
3115
|
+
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
3116
|
+
);
|
|
3117
|
+
}
|
|
3118
|
+
return key;
|
|
3119
|
+
}
|
|
3120
|
+
var TTS3 = class _TTS extends CartesiaTTS {
|
|
3121
|
+
static providerKey = "cartesia_tts";
|
|
2620
3122
|
constructor(opts = {}) {
|
|
2621
|
-
const key = opts.apiKey
|
|
2622
|
-
if (!key) {
|
|
2623
|
-
throw new Error(
|
|
2624
|
-
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
2625
|
-
);
|
|
2626
|
-
}
|
|
3123
|
+
const key = resolveApiKey2(opts.apiKey);
|
|
2627
3124
|
const { apiKey: _ignored, ...rest } = opts;
|
|
2628
3125
|
void _ignored;
|
|
2629
3126
|
super(key, rest);
|
|
2630
3127
|
}
|
|
3128
|
+
static forTwilio(arg1, arg2) {
|
|
3129
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
3130
|
+
return new _TTS({ ...opts, sampleRate: 8e3 });
|
|
3131
|
+
}
|
|
3132
|
+
static forTelnyx(arg1, arg2) {
|
|
3133
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
3134
|
+
return new _TTS({ ...opts, sampleRate: 16e3 });
|
|
3135
|
+
}
|
|
2631
3136
|
};
|
|
2632
3137
|
|
|
2633
3138
|
// src/providers/rime-tts.ts
|
|
@@ -2759,6 +3264,7 @@ var RimeTTS = class {
|
|
|
2759
3264
|
|
|
2760
3265
|
// src/tts/rime.ts
|
|
2761
3266
|
var TTS4 = class extends RimeTTS {
|
|
3267
|
+
static providerKey = "rime";
|
|
2762
3268
|
constructor(opts = {}) {
|
|
2763
3269
|
const key = opts.apiKey ?? process.env.RIME_API_KEY;
|
|
2764
3270
|
if (!key) {
|
|
@@ -2852,6 +3358,7 @@ var LMNTTTS = class {
|
|
|
2852
3358
|
|
|
2853
3359
|
// src/tts/lmnt.ts
|
|
2854
3360
|
var TTS5 = class extends LMNTTTS {
|
|
3361
|
+
static providerKey = "lmnt";
|
|
2855
3362
|
constructor(opts = {}) {
|
|
2856
3363
|
const key = opts.apiKey ?? process.env.LMNT_API_KEY;
|
|
2857
3364
|
if (!key) {
|
|
@@ -2867,6 +3374,7 @@ var TTS5 = class extends LMNTTTS {
|
|
|
2867
3374
|
|
|
2868
3375
|
// src/llm/openai.ts
|
|
2869
3376
|
var LLM = class extends OpenAILLMProvider {
|
|
3377
|
+
static providerKey = "openai";
|
|
2870
3378
|
constructor(opts = {}) {
|
|
2871
3379
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
2872
3380
|
if (!key) {
|
|
@@ -2874,15 +3382,27 @@ var LLM = class extends OpenAILLMProvider {
|
|
|
2874
3382
|
"OpenAI LLM requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY."
|
|
2875
3383
|
);
|
|
2876
3384
|
}
|
|
2877
|
-
super(key, opts.model ?? "gpt-4o-mini"
|
|
3385
|
+
super(key, opts.model ?? "gpt-4o-mini", {
|
|
3386
|
+
temperature: opts.temperature,
|
|
3387
|
+
maxTokens: opts.maxTokens,
|
|
3388
|
+
responseFormat: opts.responseFormat,
|
|
3389
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
3390
|
+
toolChoice: opts.toolChoice,
|
|
3391
|
+
seed: opts.seed,
|
|
3392
|
+
topP: opts.topP,
|
|
3393
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
3394
|
+
presencePenalty: opts.presencePenalty,
|
|
3395
|
+
stop: opts.stop
|
|
3396
|
+
});
|
|
2878
3397
|
}
|
|
2879
3398
|
};
|
|
2880
3399
|
|
|
2881
3400
|
// src/providers/anthropic-llm.ts
|
|
2882
3401
|
var DEFAULT_ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
|
|
2883
3402
|
var DEFAULT_ANTHROPIC_VERSION = "2023-06-01";
|
|
2884
|
-
var DEFAULT_MODEL = "claude-
|
|
3403
|
+
var DEFAULT_MODEL = "claude-haiku-4-5-20251001";
|
|
2885
3404
|
var DEFAULT_MAX_TOKENS = 1024;
|
|
3405
|
+
var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
|
|
2886
3406
|
var AnthropicLLMProvider = class {
|
|
2887
3407
|
apiKey;
|
|
2888
3408
|
model;
|
|
@@ -2890,6 +3410,7 @@ var AnthropicLLMProvider = class {
|
|
|
2890
3410
|
temperature;
|
|
2891
3411
|
url;
|
|
2892
3412
|
anthropicVersion;
|
|
3413
|
+
promptCaching;
|
|
2893
3414
|
constructor(options) {
|
|
2894
3415
|
if (!options.apiKey) {
|
|
2895
3416
|
throw new Error(
|
|
@@ -2902,6 +3423,7 @@ var AnthropicLLMProvider = class {
|
|
|
2902
3423
|
this.temperature = options.temperature;
|
|
2903
3424
|
this.url = options.baseUrl ?? DEFAULT_ANTHROPIC_URL;
|
|
2904
3425
|
this.anthropicVersion = options.anthropicVersion ?? DEFAULT_ANTHROPIC_VERSION;
|
|
3426
|
+
this.promptCaching = options.promptCaching ?? true;
|
|
2905
3427
|
}
|
|
2906
3428
|
async *stream(messages, tools) {
|
|
2907
3429
|
const { system, messages: anthropicMessages } = toAnthropicMessages(messages);
|
|
@@ -2912,16 +3434,44 @@ var AnthropicLLMProvider = class {
|
|
|
2912
3434
|
max_tokens: this.maxTokens,
|
|
2913
3435
|
stream: true
|
|
2914
3436
|
};
|
|
2915
|
-
if (system)
|
|
2916
|
-
|
|
3437
|
+
if (system) {
|
|
3438
|
+
if (this.promptCaching) {
|
|
3439
|
+
const block = {
|
|
3440
|
+
type: "text",
|
|
3441
|
+
text: system,
|
|
3442
|
+
cache_control: { type: "ephemeral" }
|
|
3443
|
+
};
|
|
3444
|
+
body.system = [block];
|
|
3445
|
+
} else {
|
|
3446
|
+
body.system = system;
|
|
3447
|
+
}
|
|
3448
|
+
}
|
|
3449
|
+
if (anthropicTools && anthropicTools.length > 0) {
|
|
3450
|
+
if (this.promptCaching) {
|
|
3451
|
+
const cachedTools = anthropicTools.map(
|
|
3452
|
+
(t) => ({ ...t })
|
|
3453
|
+
);
|
|
3454
|
+
cachedTools[cachedTools.length - 1] = {
|
|
3455
|
+
...cachedTools[cachedTools.length - 1],
|
|
3456
|
+
cache_control: { type: "ephemeral" }
|
|
3457
|
+
};
|
|
3458
|
+
body.tools = cachedTools;
|
|
3459
|
+
} else {
|
|
3460
|
+
body.tools = anthropicTools;
|
|
3461
|
+
}
|
|
3462
|
+
}
|
|
2917
3463
|
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
3464
|
+
const headers = {
|
|
3465
|
+
"Content-Type": "application/json",
|
|
3466
|
+
"x-api-key": this.apiKey,
|
|
3467
|
+
"anthropic-version": this.anthropicVersion
|
|
3468
|
+
};
|
|
3469
|
+
if (this.promptCaching) {
|
|
3470
|
+
headers["anthropic-beta"] = PROMPT_CACHING_BETA;
|
|
3471
|
+
}
|
|
2918
3472
|
const response = await fetch(this.url, {
|
|
2919
3473
|
method: "POST",
|
|
2920
|
-
headers
|
|
2921
|
-
"Content-Type": "application/json",
|
|
2922
|
-
"x-api-key": this.apiKey,
|
|
2923
|
-
"anthropic-version": this.anthropicVersion
|
|
2924
|
-
},
|
|
3474
|
+
headers,
|
|
2925
3475
|
body: JSON.stringify(body),
|
|
2926
3476
|
signal: AbortSignal.timeout(3e4)
|
|
2927
3477
|
});
|
|
@@ -3066,6 +3616,7 @@ function toAnthropicMessages(messages) {
|
|
|
3066
3616
|
|
|
3067
3617
|
// src/llm/anthropic.ts
|
|
3068
3618
|
var LLM2 = class extends AnthropicLLMProvider {
|
|
3619
|
+
static providerKey = "anthropic";
|
|
3069
3620
|
constructor(opts = {}) {
|
|
3070
3621
|
const key = opts.apiKey ?? process.env.ANTHROPIC_API_KEY;
|
|
3071
3622
|
if (!key) {
|
|
@@ -3079,11 +3630,15 @@ var LLM2 = class extends AnthropicLLMProvider {
|
|
|
3079
3630
|
maxTokens: opts.maxTokens,
|
|
3080
3631
|
temperature: opts.temperature,
|
|
3081
3632
|
baseUrl: opts.baseUrl,
|
|
3082
|
-
anthropicVersion: opts.anthropicVersion
|
|
3633
|
+
anthropicVersion: opts.anthropicVersion,
|
|
3634
|
+
promptCaching: opts.promptCaching
|
|
3083
3635
|
});
|
|
3084
3636
|
}
|
|
3085
3637
|
};
|
|
3086
3638
|
|
|
3639
|
+
// src/version.ts
|
|
3640
|
+
var VERSION = "0.5.3";
|
|
3641
|
+
|
|
3087
3642
|
// src/providers/groq-llm.ts
|
|
3088
3643
|
var GROQ_BASE_URL = "https://api.groq.com/openai/v1";
|
|
3089
3644
|
var DEFAULT_MODEL2 = "llama-3.3-70b-versatile";
|
|
@@ -3091,6 +3646,16 @@ var GroqLLMProvider = class {
|
|
|
3091
3646
|
apiKey;
|
|
3092
3647
|
model;
|
|
3093
3648
|
baseUrl;
|
|
3649
|
+
temperature;
|
|
3650
|
+
maxTokens;
|
|
3651
|
+
responseFormat;
|
|
3652
|
+
parallelToolCalls;
|
|
3653
|
+
toolChoice;
|
|
3654
|
+
seed;
|
|
3655
|
+
topP;
|
|
3656
|
+
frequencyPenalty;
|
|
3657
|
+
presencePenalty;
|
|
3658
|
+
stop;
|
|
3094
3659
|
constructor(options) {
|
|
3095
3660
|
if (!options.apiKey) {
|
|
3096
3661
|
throw new Error(
|
|
@@ -3100,19 +3665,43 @@ var GroqLLMProvider = class {
|
|
|
3100
3665
|
this.apiKey = options.apiKey;
|
|
3101
3666
|
this.model = options.model ?? DEFAULT_MODEL2;
|
|
3102
3667
|
this.baseUrl = options.baseUrl ?? GROQ_BASE_URL;
|
|
3668
|
+
this.temperature = options.temperature;
|
|
3669
|
+
this.maxTokens = options.maxTokens;
|
|
3670
|
+
this.responseFormat = options.responseFormat;
|
|
3671
|
+
this.parallelToolCalls = options.parallelToolCalls;
|
|
3672
|
+
this.toolChoice = options.toolChoice;
|
|
3673
|
+
this.seed = options.seed;
|
|
3674
|
+
this.topP = options.topP;
|
|
3675
|
+
this.frequencyPenalty = options.frequencyPenalty;
|
|
3676
|
+
this.presencePenalty = options.presencePenalty;
|
|
3677
|
+
this.stop = options.stop;
|
|
3103
3678
|
}
|
|
3104
3679
|
async *stream(messages, tools) {
|
|
3105
3680
|
const body = {
|
|
3106
3681
|
model: this.model,
|
|
3107
3682
|
messages,
|
|
3108
|
-
stream: true
|
|
3683
|
+
stream: true,
|
|
3684
|
+
stream_options: { include_usage: true }
|
|
3109
3685
|
};
|
|
3686
|
+
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
3687
|
+
if (this.maxTokens !== void 0) {
|
|
3688
|
+
body.max_completion_tokens = this.maxTokens;
|
|
3689
|
+
}
|
|
3690
|
+
if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
|
|
3691
|
+
if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
|
|
3692
|
+
if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
|
|
3693
|
+
if (this.seed !== void 0) body.seed = this.seed;
|
|
3694
|
+
if (this.topP !== void 0) body.top_p = this.topP;
|
|
3695
|
+
if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
|
|
3696
|
+
if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
|
|
3697
|
+
if (this.stop !== void 0) body.stop = this.stop;
|
|
3110
3698
|
if (tools) body.tools = tools;
|
|
3111
3699
|
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
3112
3700
|
method: "POST",
|
|
3113
3701
|
headers: {
|
|
3114
3702
|
"Content-Type": "application/json",
|
|
3115
|
-
Authorization: `Bearer ${this.apiKey}
|
|
3703
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
3704
|
+
"User-Agent": `getpatter/${VERSION}`
|
|
3116
3705
|
},
|
|
3117
3706
|
body: JSON.stringify(body),
|
|
3118
3707
|
signal: AbortSignal.timeout(3e4)
|
|
@@ -3147,6 +3736,16 @@ async function* parseOpenAISseStream(response) {
|
|
|
3147
3736
|
} catch {
|
|
3148
3737
|
continue;
|
|
3149
3738
|
}
|
|
3739
|
+
const usage = chunk.usage ?? chunk.x_groq?.usage;
|
|
3740
|
+
if (usage) {
|
|
3741
|
+
const cached = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
3742
|
+
yield {
|
|
3743
|
+
type: "usage",
|
|
3744
|
+
inputTokens: usage.prompt_tokens,
|
|
3745
|
+
outputTokens: usage.completion_tokens,
|
|
3746
|
+
cacheReadInputTokens: cached
|
|
3747
|
+
};
|
|
3748
|
+
}
|
|
3150
3749
|
const delta = chunk.choices?.[0]?.delta;
|
|
3151
3750
|
if (!delta) continue;
|
|
3152
3751
|
if (delta.content) {
|
|
@@ -3169,6 +3768,7 @@ async function* parseOpenAISseStream(response) {
|
|
|
3169
3768
|
|
|
3170
3769
|
// src/llm/groq.ts
|
|
3171
3770
|
var LLM3 = class extends GroqLLMProvider {
|
|
3771
|
+
static providerKey = "groq";
|
|
3172
3772
|
constructor(opts = {}) {
|
|
3173
3773
|
const key = opts.apiKey ?? process.env.GROQ_API_KEY;
|
|
3174
3774
|
if (!key) {
|
|
@@ -3179,7 +3779,17 @@ var LLM3 = class extends GroqLLMProvider {
|
|
|
3179
3779
|
super({
|
|
3180
3780
|
apiKey: key,
|
|
3181
3781
|
model: opts.model,
|
|
3182
|
-
baseUrl: opts.baseUrl
|
|
3782
|
+
baseUrl: opts.baseUrl,
|
|
3783
|
+
temperature: opts.temperature,
|
|
3784
|
+
maxTokens: opts.maxTokens,
|
|
3785
|
+
responseFormat: opts.responseFormat,
|
|
3786
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
3787
|
+
toolChoice: opts.toolChoice,
|
|
3788
|
+
seed: opts.seed,
|
|
3789
|
+
topP: opts.topP,
|
|
3790
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
3791
|
+
presencePenalty: opts.presencePenalty,
|
|
3792
|
+
stop: opts.stop
|
|
3183
3793
|
});
|
|
3184
3794
|
}
|
|
3185
3795
|
};
|
|
@@ -3187,11 +3797,22 @@ var LLM3 = class extends GroqLLMProvider {
|
|
|
3187
3797
|
// src/providers/cerebras-llm.ts
|
|
3188
3798
|
var CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
|
|
3189
3799
|
var DEFAULT_MODEL3 = "llama3.1-8b";
|
|
3800
|
+
var RETRY_BACKOFF_BASE_MS = 500;
|
|
3190
3801
|
var CerebrasLLMProvider = class {
|
|
3191
3802
|
apiKey;
|
|
3192
3803
|
model;
|
|
3193
3804
|
baseUrl;
|
|
3194
3805
|
gzipCompression;
|
|
3806
|
+
temperature;
|
|
3807
|
+
maxTokens;
|
|
3808
|
+
responseFormat;
|
|
3809
|
+
parallelToolCalls;
|
|
3810
|
+
toolChoice;
|
|
3811
|
+
seed;
|
|
3812
|
+
topP;
|
|
3813
|
+
frequencyPenalty;
|
|
3814
|
+
presencePenalty;
|
|
3815
|
+
stop;
|
|
3195
3816
|
constructor(options) {
|
|
3196
3817
|
if (!options.apiKey) {
|
|
3197
3818
|
throw new Error(
|
|
@@ -3201,18 +3822,43 @@ var CerebrasLLMProvider = class {
|
|
|
3201
3822
|
this.apiKey = options.apiKey;
|
|
3202
3823
|
this.model = options.model ?? DEFAULT_MODEL3;
|
|
3203
3824
|
this.baseUrl = options.baseUrl ?? CEREBRAS_BASE_URL;
|
|
3204
|
-
this.gzipCompression = options.gzipCompression ??
|
|
3825
|
+
this.gzipCompression = options.gzipCompression ?? true;
|
|
3826
|
+
this.temperature = options.temperature;
|
|
3827
|
+
this.maxTokens = options.maxTokens;
|
|
3828
|
+
this.responseFormat = options.responseFormat;
|
|
3829
|
+
this.parallelToolCalls = options.parallelToolCalls;
|
|
3830
|
+
this.toolChoice = options.toolChoice;
|
|
3831
|
+
this.seed = options.seed;
|
|
3832
|
+
this.topP = options.topP;
|
|
3833
|
+
this.frequencyPenalty = options.frequencyPenalty;
|
|
3834
|
+
this.presencePenalty = options.presencePenalty;
|
|
3835
|
+
this.stop = options.stop;
|
|
3205
3836
|
}
|
|
3206
3837
|
async *stream(messages, tools) {
|
|
3207
3838
|
const body = {
|
|
3208
3839
|
model: this.model,
|
|
3209
3840
|
messages,
|
|
3210
|
-
stream: true
|
|
3841
|
+
stream: true,
|
|
3842
|
+
stream_options: { include_usage: true }
|
|
3211
3843
|
};
|
|
3844
|
+
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
3845
|
+
if (this.maxTokens !== void 0) {
|
|
3846
|
+
body.max_completion_tokens = this.maxTokens;
|
|
3847
|
+
}
|
|
3848
|
+
if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
|
|
3849
|
+
if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
|
|
3850
|
+
if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
|
|
3851
|
+
if (this.seed !== void 0) body.seed = this.seed;
|
|
3852
|
+
if (this.topP !== void 0) body.top_p = this.topP;
|
|
3853
|
+
if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
|
|
3854
|
+
if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
|
|
3855
|
+
if (this.stop !== void 0) body.stop = this.stop;
|
|
3212
3856
|
if (tools) body.tools = tools;
|
|
3213
3857
|
const headers = {
|
|
3214
3858
|
"Content-Type": "application/json",
|
|
3215
|
-
Authorization: `Bearer ${this.apiKey}
|
|
3859
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
3860
|
+
// Identify the SDK in upstream logs/rate-limit attribution.
|
|
3861
|
+
"User-Agent": `getpatter/${VERSION}`
|
|
3216
3862
|
};
|
|
3217
3863
|
let payload = JSON.stringify(body);
|
|
3218
3864
|
if (this.gzipCompression) {
|
|
@@ -3222,18 +3868,43 @@ var CerebrasLLMProvider = class {
|
|
|
3222
3868
|
headers["Content-Encoding"] = "gzip";
|
|
3223
3869
|
}
|
|
3224
3870
|
}
|
|
3225
|
-
const
|
|
3226
|
-
|
|
3227
|
-
|
|
3228
|
-
|
|
3229
|
-
|
|
3230
|
-
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
|
|
3234
|
-
|
|
3871
|
+
const maxAttempts = 2;
|
|
3872
|
+
let lastErrText = "";
|
|
3873
|
+
let lastStatus = 0;
|
|
3874
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
3875
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
3876
|
+
method: "POST",
|
|
3877
|
+
headers,
|
|
3878
|
+
body: payload,
|
|
3879
|
+
signal: AbortSignal.timeout(3e4)
|
|
3880
|
+
});
|
|
3881
|
+
if (response.ok) {
|
|
3882
|
+
yield* parseOpenAISseStream(response);
|
|
3883
|
+
return;
|
|
3884
|
+
}
|
|
3885
|
+
lastStatus = response.status;
|
|
3886
|
+
lastErrText = await response.text().catch(() => "");
|
|
3887
|
+
const isRetriable = response.status === 429 || response.status >= 500;
|
|
3888
|
+
const isLastAttempt = attempt >= maxAttempts - 1;
|
|
3889
|
+
if (!isRetriable || isLastAttempt) {
|
|
3890
|
+
if (response.status === 404 && lastErrText.includes("model_not_found")) {
|
|
3891
|
+
getLogger().error(
|
|
3892
|
+
`Cerebras: model "${this.model}" not available on your tier. Override via \`new CerebrasLLM({ model: '<id>' })\` and list tier-available ids with \`GET ${this.baseUrl}/models\` (common: llama3.1-8b, qwen-3-235b-a22b-instruct-2507, llama-3.3-70b on paid). Raw response: ${lastErrText}`
|
|
3893
|
+
);
|
|
3894
|
+
} else {
|
|
3895
|
+
getLogger().error(`Cerebras API error: ${response.status} ${lastErrText}`);
|
|
3896
|
+
}
|
|
3897
|
+
return;
|
|
3898
|
+
}
|
|
3899
|
+
const advisoryMs = parseRateLimitResetMs(response.headers);
|
|
3900
|
+
const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
|
|
3901
|
+
const delayMs = Math.max(advisoryMs, exponentialMs);
|
|
3902
|
+
getLogger().warn(
|
|
3903
|
+
`Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
|
|
3904
|
+
);
|
|
3905
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
3235
3906
|
}
|
|
3236
|
-
|
|
3907
|
+
throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
|
|
3237
3908
|
}
|
|
3238
3909
|
};
|
|
3239
3910
|
async function gzipEncode(data) {
|
|
@@ -3260,9 +3931,28 @@ async function gzipEncode(data) {
|
|
|
3260
3931
|
}
|
|
3261
3932
|
return out;
|
|
3262
3933
|
}
|
|
3934
|
+
function parseRateLimitResetMs(headers) {
|
|
3935
|
+
const candidates = [
|
|
3936
|
+
headers.get("x-ratelimit-reset-tokens-minute"),
|
|
3937
|
+
headers.get("x-ratelimit-reset-requests-minute"),
|
|
3938
|
+
// Some upstreams send the standard ``retry-after`` (seconds).
|
|
3939
|
+
headers.get("retry-after")
|
|
3940
|
+
];
|
|
3941
|
+
let bestMs = 0;
|
|
3942
|
+
for (const raw of candidates) {
|
|
3943
|
+
if (!raw) continue;
|
|
3944
|
+
const parsed = Number.parseFloat(raw);
|
|
3945
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
3946
|
+
const ms = parsed * 1e3;
|
|
3947
|
+
if (ms > bestMs) bestMs = ms;
|
|
3948
|
+
}
|
|
3949
|
+
}
|
|
3950
|
+
return bestMs;
|
|
3951
|
+
}
|
|
3263
3952
|
|
|
3264
3953
|
// src/llm/cerebras.ts
|
|
3265
3954
|
var LLM4 = class extends CerebrasLLMProvider {
|
|
3955
|
+
static providerKey = "cerebras";
|
|
3266
3956
|
constructor(opts = {}) {
|
|
3267
3957
|
const key = opts.apiKey ?? process.env.CEREBRAS_API_KEY;
|
|
3268
3958
|
if (!key) {
|
|
@@ -3274,7 +3964,17 @@ var LLM4 = class extends CerebrasLLMProvider {
|
|
|
3274
3964
|
apiKey: key,
|
|
3275
3965
|
model: opts.model,
|
|
3276
3966
|
baseUrl: opts.baseUrl,
|
|
3277
|
-
gzipCompression: opts.gzipCompression
|
|
3967
|
+
gzipCompression: opts.gzipCompression,
|
|
3968
|
+
temperature: opts.temperature,
|
|
3969
|
+
maxTokens: opts.maxTokens,
|
|
3970
|
+
responseFormat: opts.responseFormat,
|
|
3971
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
3972
|
+
toolChoice: opts.toolChoice,
|
|
3973
|
+
seed: opts.seed,
|
|
3974
|
+
topP: opts.topP,
|
|
3975
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
3976
|
+
presencePenalty: opts.presencePenalty,
|
|
3977
|
+
stop: opts.stop
|
|
3278
3978
|
});
|
|
3279
3979
|
}
|
|
3280
3980
|
};
|
|
@@ -3330,6 +4030,7 @@ var GoogleLLMProvider = class {
|
|
|
3330
4030
|
const decoder = new TextDecoder();
|
|
3331
4031
|
let buffer = "";
|
|
3332
4032
|
let nextIndex = 0;
|
|
4033
|
+
let lastUsage;
|
|
3333
4034
|
while (true) {
|
|
3334
4035
|
const { done, value } = await reader.read();
|
|
3335
4036
|
if (done) break;
|
|
@@ -3347,6 +4048,9 @@ var GoogleLLMProvider = class {
|
|
|
3347
4048
|
} catch {
|
|
3348
4049
|
continue;
|
|
3349
4050
|
}
|
|
4051
|
+
if (payload.usageMetadata) {
|
|
4052
|
+
lastUsage = payload.usageMetadata;
|
|
4053
|
+
}
|
|
3350
4054
|
const candidate = payload.candidates?.[0];
|
|
3351
4055
|
const parts = candidate?.content?.parts ?? [];
|
|
3352
4056
|
for (const part of parts) {
|
|
@@ -3369,6 +4073,14 @@ var GoogleLLMProvider = class {
|
|
|
3369
4073
|
}
|
|
3370
4074
|
}
|
|
3371
4075
|
}
|
|
4076
|
+
if (lastUsage) {
|
|
4077
|
+
yield {
|
|
4078
|
+
type: "usage",
|
|
4079
|
+
inputTokens: lastUsage.promptTokenCount,
|
|
4080
|
+
outputTokens: lastUsage.candidatesTokenCount,
|
|
4081
|
+
cacheReadInputTokens: lastUsage.cachedContentTokenCount ?? 0
|
|
4082
|
+
};
|
|
4083
|
+
}
|
|
3372
4084
|
yield { type: "done" };
|
|
3373
4085
|
}
|
|
3374
4086
|
};
|
|
@@ -3458,6 +4170,7 @@ function toGeminiContents(messages) {
|
|
|
3458
4170
|
|
|
3459
4171
|
// src/llm/google.ts
|
|
3460
4172
|
var LLM5 = class extends GoogleLLMProvider {
|
|
4173
|
+
static providerKey = "google";
|
|
3461
4174
|
constructor(opts = {}) {
|
|
3462
4175
|
const key = opts.apiKey ?? process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
|
|
3463
4176
|
if (!key) {
|
|
@@ -3475,6 +4188,279 @@ var LLM5 = class extends GoogleLLMProvider {
|
|
|
3475
4188
|
}
|
|
3476
4189
|
};
|
|
3477
4190
|
|
|
4191
|
+
// src/providers/silero-vad.ts
|
|
4192
|
+
import { createRequire } from "module";
|
|
4193
|
+
import * as fs from "fs";
|
|
4194
|
+
import * as path from "path";
|
|
4195
|
+
import { fileURLToPath } from "url";
|
|
4196
|
+
var SUPPORTED_SAMPLE_RATES = [8e3, 16e3];
|
|
4197
|
+
function resolveModuleDir() {
|
|
4198
|
+
try {
|
|
4199
|
+
const cjsDir = new Function("return typeof __dirname !== 'undefined' ? __dirname : null")();
|
|
4200
|
+
if (typeof cjsDir === "string") return cjsDir;
|
|
4201
|
+
} catch {
|
|
4202
|
+
}
|
|
4203
|
+
try {
|
|
4204
|
+
const url = import.meta.url;
|
|
4205
|
+
if (url) return path.dirname(fileURLToPath(url));
|
|
4206
|
+
} catch {
|
|
4207
|
+
}
|
|
4208
|
+
return process.cwd();
|
|
4209
|
+
}
|
|
4210
|
+
var MODULE_DIR = resolveModuleDir();
|
|
4211
|
+
function resolveDefaultModelPath() {
|
|
4212
|
+
const candidates = [
|
|
4213
|
+
path.join(MODULE_DIR, "resources", "silero_vad.onnx"),
|
|
4214
|
+
path.join(MODULE_DIR, "..", "resources", "silero_vad.onnx")
|
|
4215
|
+
];
|
|
4216
|
+
for (const c of candidates) if (fs.existsSync(c)) return c;
|
|
4217
|
+
return candidates[0];
|
|
4218
|
+
}
|
|
4219
|
+
var DEFAULT_MODEL_PATH = resolveDefaultModelPath();
|
|
4220
|
+
async function loadOnnxRuntime() {
|
|
4221
|
+
let firstErr;
|
|
4222
|
+
try {
|
|
4223
|
+
const mod = await import("./dist-YRCCJQ26.mjs");
|
|
4224
|
+
return mod;
|
|
4225
|
+
} catch (e) {
|
|
4226
|
+
firstErr = e;
|
|
4227
|
+
}
|
|
4228
|
+
try {
|
|
4229
|
+
const req = createRequire(path.join(process.cwd(), "package.json"));
|
|
4230
|
+
return req("onnxruntime-node");
|
|
4231
|
+
} catch (e) {
|
|
4232
|
+
const detail = e?.message ?? String(e);
|
|
4233
|
+
const original = firstErr?.message ?? String(firstErr);
|
|
4234
|
+
throw new Error(
|
|
4235
|
+
`
|
|
4236
|
+
SileroVAD requires the "onnxruntime-node" package, which could not be resolved.
|
|
4237
|
+
|
|
4238
|
+
Install: npm install onnxruntime-node
|
|
4239
|
+
|
|
4240
|
+
This is an optional peer dependency of getpatter (~210 MB) \u2014 it is only
|
|
4241
|
+
needed when you use SileroVAD in pipeline mode.
|
|
4242
|
+
|
|
4243
|
+
import() failed: ${original}
|
|
4244
|
+
cwd-require failed: ${detail}
|
|
4245
|
+
`
|
|
4246
|
+
);
|
|
4247
|
+
}
|
|
4248
|
+
}
|
|
4249
|
+
var ExpFilter = class {
|
|
4250
|
+
constructor(alpha) {
|
|
4251
|
+
this.alpha = alpha;
|
|
4252
|
+
if (!(alpha > 0 && alpha <= 1)) {
|
|
4253
|
+
throw new Error("alpha must be in (0, 1].");
|
|
4254
|
+
}
|
|
4255
|
+
}
|
|
4256
|
+
filtered = null;
|
|
4257
|
+
apply(exp, sample) {
|
|
4258
|
+
if (this.filtered === null) {
|
|
4259
|
+
this.filtered = sample;
|
|
4260
|
+
} else {
|
|
4261
|
+
const a = Math.pow(this.alpha, exp);
|
|
4262
|
+
this.filtered = a * this.filtered + (1 - a) * sample;
|
|
4263
|
+
}
|
|
4264
|
+
return this.filtered;
|
|
4265
|
+
}
|
|
4266
|
+
reset() {
|
|
4267
|
+
this.filtered = null;
|
|
4268
|
+
}
|
|
4269
|
+
};
|
|
4270
|
+
var OnnxModel = class {
|
|
4271
|
+
constructor(runtime, session, sampleRate) {
|
|
4272
|
+
this.runtime = runtime;
|
|
4273
|
+
this.session = session;
|
|
4274
|
+
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
4275
|
+
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
4276
|
+
}
|
|
4277
|
+
this.sampleRate = sampleRate;
|
|
4278
|
+
this.windowSizeSamples = sampleRate === 8e3 ? 256 : 512;
|
|
4279
|
+
this.contextSize = sampleRate === 8e3 ? 32 : 64;
|
|
4280
|
+
this.context = new Float32Array(this.contextSize);
|
|
4281
|
+
this.rnnState = new Float32Array(2 * 1 * 128);
|
|
4282
|
+
this.inputBuffer = new Float32Array(this.contextSize + this.windowSizeSamples);
|
|
4283
|
+
this.sampleRateTensor = BigInt64Array.from([BigInt(sampleRate)]);
|
|
4284
|
+
}
|
|
4285
|
+
sampleRate;
|
|
4286
|
+
windowSizeSamples;
|
|
4287
|
+
contextSize;
|
|
4288
|
+
context;
|
|
4289
|
+
rnnState;
|
|
4290
|
+
inputBuffer;
|
|
4291
|
+
sampleRateTensor;
|
|
4292
|
+
async run(window) {
|
|
4293
|
+
if (window.length !== this.windowSizeSamples) {
|
|
4294
|
+
throw new Error(
|
|
4295
|
+
`window must have exactly ${this.windowSizeSamples} samples, got ${window.length}`
|
|
4296
|
+
);
|
|
4297
|
+
}
|
|
4298
|
+
this.inputBuffer.set(this.context, 0);
|
|
4299
|
+
this.inputBuffer.set(window, this.contextSize);
|
|
4300
|
+
const { Tensor } = this.runtime;
|
|
4301
|
+
const feeds = {
|
|
4302
|
+
input: new Tensor("float32", this.inputBuffer, [1, this.inputBuffer.length]),
|
|
4303
|
+
state: new Tensor("float32", this.rnnState, [2, 1, 128]),
|
|
4304
|
+
sr: new Tensor("int64", this.sampleRateTensor, [])
|
|
4305
|
+
};
|
|
4306
|
+
const results = await this.session.run(feeds);
|
|
4307
|
+
const outputKey = Object.keys(results).find((k) => k !== "stateN") ?? "output";
|
|
4308
|
+
const stateKey = "stateN" in results ? "stateN" : Object.keys(results).find((k) => k !== outputKey);
|
|
4309
|
+
const out = results[outputKey];
|
|
4310
|
+
const newState = stateKey ? results[stateKey] : void 0;
|
|
4311
|
+
if (newState && newState.data instanceof Float32Array) {
|
|
4312
|
+
this.rnnState = Float32Array.from(newState.data);
|
|
4313
|
+
}
|
|
4314
|
+
this.context = this.inputBuffer.slice(-this.contextSize);
|
|
4315
|
+
const data = out.data;
|
|
4316
|
+
return data[0] ?? 0;
|
|
4317
|
+
}
|
|
4318
|
+
};
|
|
4319
|
+
var SileroVAD = class _SileroVAD {
|
|
4320
|
+
constructor(model, opts) {
|
|
4321
|
+
this.model = model;
|
|
4322
|
+
this.opts = opts;
|
|
4323
|
+
}
|
|
4324
|
+
pending = new Float32Array(0);
|
|
4325
|
+
expFilter = new ExpFilter(0.35);
|
|
4326
|
+
pubSpeaking = false;
|
|
4327
|
+
speechThresholdDuration = 0;
|
|
4328
|
+
silenceThresholdDuration = 0;
|
|
4329
|
+
closed = false;
|
|
4330
|
+
/**
|
|
4331
|
+
* Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
|
|
4332
|
+
* Throws if `onnxruntime-node` is not installed.
|
|
4333
|
+
*/
|
|
4334
|
+
static async load(options = {}) {
|
|
4335
|
+
const sampleRate = options.sampleRate ?? 16e3;
|
|
4336
|
+
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
4337
|
+
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
4338
|
+
}
|
|
4339
|
+
const activationThreshold = options.activationThreshold ?? 0.5;
|
|
4340
|
+
const deactivationThreshold = options.deactivationThreshold ?? Math.max(activationThreshold - 0.15, 0.01);
|
|
4341
|
+
if (deactivationThreshold <= 0) {
|
|
4342
|
+
throw new Error("deactivationThreshold must be greater than 0");
|
|
4343
|
+
}
|
|
4344
|
+
const runtime = await loadOnnxRuntime();
|
|
4345
|
+
const modelPath = options.onnxFilePath ?? DEFAULT_MODEL_PATH;
|
|
4346
|
+
const session = await runtime.InferenceSession.create(modelPath, {
|
|
4347
|
+
interOpNumThreads: 1,
|
|
4348
|
+
intraOpNumThreads: 1,
|
|
4349
|
+
executionMode: "sequential",
|
|
4350
|
+
executionProviders: options.forceCpu === false ? void 0 : ["cpu"]
|
|
4351
|
+
});
|
|
4352
|
+
const model = new OnnxModel(runtime, session, sampleRate);
|
|
4353
|
+
return new _SileroVAD(model, {
|
|
4354
|
+
minSpeechDuration: options.minSpeechDuration ?? 0.05,
|
|
4355
|
+
minSilenceDuration: options.minSilenceDuration ?? 0.55,
|
|
4356
|
+
prefixPaddingDuration: options.prefixPaddingDuration ?? 0.5,
|
|
4357
|
+
activationThreshold,
|
|
4358
|
+
deactivationThreshold,
|
|
4359
|
+
sampleRate
|
|
4360
|
+
});
|
|
4361
|
+
}
|
|
4362
|
+
/**
|
|
4363
|
+
* Internal factory used by tests — bypasses onnxruntime-node loading.
|
|
4364
|
+
* @internal
|
|
4365
|
+
*/
|
|
4366
|
+
static fromOnnxModel(runtime, session, options) {
|
|
4367
|
+
const model = new OnnxModel(runtime, session, options.sampleRate);
|
|
4368
|
+
return new _SileroVAD(model, options);
|
|
4369
|
+
}
|
|
4370
|
+
get sampleRate() {
|
|
4371
|
+
return this.opts.sampleRate;
|
|
4372
|
+
}
|
|
4373
|
+
/**
|
|
4374
|
+
* Number of int16 PCM samples that must be provided per call to
|
|
4375
|
+
* processFrame for the model to run one inference window.
|
|
4376
|
+
*
|
|
4377
|
+
* Constraint (ported from LiveKit Agents / Silero ONNX spec):
|
|
4378
|
+
* - 16 000 Hz → 512 samples (32 ms)
|
|
4379
|
+
* - 8 000 Hz → 256 samples (32 ms)
|
|
4380
|
+
*
|
|
4381
|
+
* Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
|
|
4382
|
+
* should buffer incoming audio until at least numFramesRequired() int16
|
|
4383
|
+
* samples are available before calling processFrame. The provider
|
|
4384
|
+
* internally buffers partial windows so smaller chunks are also safe, but
|
|
4385
|
+
* passing exactly one window per call minimises heap allocation.
|
|
4386
|
+
*/
|
|
4387
|
+
numFramesRequired() {
|
|
4388
|
+
return this.opts.sampleRate === 8e3 ? 256 : 512;
|
|
4389
|
+
}
|
|
4390
|
+
async processFrame(pcmChunk, sampleRate) {
|
|
4391
|
+
if (this.closed) {
|
|
4392
|
+
throw new Error("SileroVAD is closed");
|
|
4393
|
+
}
|
|
4394
|
+
if (sampleRate !== this.opts.sampleRate) {
|
|
4395
|
+
throw new Error(
|
|
4396
|
+
`input sampleRate ${sampleRate} does not match model sampleRate ${this.opts.sampleRate}; resampling is not implemented in the Patter port`
|
|
4397
|
+
);
|
|
4398
|
+
}
|
|
4399
|
+
if (pcmChunk.length === 0) {
|
|
4400
|
+
return null;
|
|
4401
|
+
}
|
|
4402
|
+
const numSamples = Math.floor(pcmChunk.length / 2);
|
|
4403
|
+
if (numSamples === 0) {
|
|
4404
|
+
return null;
|
|
4405
|
+
}
|
|
4406
|
+
const samples = new Float32Array(numSamples);
|
|
4407
|
+
for (let i = 0; i < numSamples; i++) {
|
|
4408
|
+
samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
|
|
4409
|
+
}
|
|
4410
|
+
const merged = new Float32Array(this.pending.length + samples.length);
|
|
4411
|
+
merged.set(this.pending, 0);
|
|
4412
|
+
merged.set(samples, this.pending.length);
|
|
4413
|
+
this.pending = merged;
|
|
4414
|
+
const windowSize = this.model.windowSizeSamples;
|
|
4415
|
+
let event = null;
|
|
4416
|
+
while (this.pending.length >= windowSize) {
|
|
4417
|
+
const window = this.pending.slice(0, windowSize);
|
|
4418
|
+
this.pending = this.pending.slice(windowSize);
|
|
4419
|
+
const rawP = await this.model.run(window);
|
|
4420
|
+
const p = this.expFilter.apply(1, rawP);
|
|
4421
|
+
const windowDuration = windowSize / this.opts.sampleRate;
|
|
4422
|
+
const transition = this.advanceState(p, windowDuration);
|
|
4423
|
+
if (transition !== null) {
|
|
4424
|
+
event = transition;
|
|
4425
|
+
}
|
|
4426
|
+
}
|
|
4427
|
+
return event;
|
|
4428
|
+
}
|
|
4429
|
+
advanceState(p, windowDuration) {
|
|
4430
|
+
const opts = this.opts;
|
|
4431
|
+
if (p >= opts.activationThreshold || this.pubSpeaking && p > opts.deactivationThreshold) {
|
|
4432
|
+
this.speechThresholdDuration += windowDuration;
|
|
4433
|
+
this.silenceThresholdDuration = 0;
|
|
4434
|
+
if (!this.pubSpeaking) {
|
|
4435
|
+
if (this.speechThresholdDuration >= opts.minSpeechDuration) {
|
|
4436
|
+
this.pubSpeaking = true;
|
|
4437
|
+
return {
|
|
4438
|
+
type: "speech_start",
|
|
4439
|
+
confidence: p,
|
|
4440
|
+
durationMs: this.speechThresholdDuration * 1e3
|
|
4441
|
+
};
|
|
4442
|
+
}
|
|
4443
|
+
}
|
|
4444
|
+
} else {
|
|
4445
|
+
this.silenceThresholdDuration += windowDuration;
|
|
4446
|
+
this.speechThresholdDuration = 0;
|
|
4447
|
+
if (this.pubSpeaking && this.silenceThresholdDuration >= opts.minSilenceDuration) {
|
|
4448
|
+
this.pubSpeaking = false;
|
|
4449
|
+
return {
|
|
4450
|
+
type: "speech_end",
|
|
4451
|
+
confidence: p,
|
|
4452
|
+
durationMs: this.silenceThresholdDuration * 1e3
|
|
4453
|
+
};
|
|
4454
|
+
}
|
|
4455
|
+
}
|
|
4456
|
+
return null;
|
|
4457
|
+
}
|
|
4458
|
+
async close() {
|
|
4459
|
+
if (this.closed) return;
|
|
4460
|
+
this.closed = true;
|
|
4461
|
+
}
|
|
4462
|
+
};
|
|
4463
|
+
|
|
3478
4464
|
// src/carriers/twilio.ts
|
|
3479
4465
|
var Carrier = class {
|
|
3480
4466
|
kind = "twilio";
|
|
@@ -3823,7 +4809,7 @@ var DebouncedCall = class {
|
|
|
3823
4809
|
this.timer = setTimeout(() => {
|
|
3824
4810
|
this.timer = null;
|
|
3825
4811
|
Promise.resolve(this.callback()).catch((err) => {
|
|
3826
|
-
|
|
4812
|
+
getLogger().error("IVR silence callback raised:", err);
|
|
3827
4813
|
});
|
|
3828
4814
|
}, this.delayMs);
|
|
3829
4815
|
}
|
|
@@ -3879,7 +4865,7 @@ var IVRActivity = class {
|
|
|
3879
4865
|
try {
|
|
3880
4866
|
await this.onLoopDetected();
|
|
3881
4867
|
} catch (err) {
|
|
3882
|
-
|
|
4868
|
+
getLogger().error("IVR onLoopDetected callback raised:", err);
|
|
3883
4869
|
}
|
|
3884
4870
|
}
|
|
3885
4871
|
}
|
|
@@ -3917,7 +4903,7 @@ var IVRActivity = class {
|
|
|
3917
4903
|
try {
|
|
3918
4904
|
await this.onSilence();
|
|
3919
4905
|
} catch (err) {
|
|
3920
|
-
|
|
4906
|
+
getLogger().error("IVR onSilence callback raised:", err);
|
|
3921
4907
|
}
|
|
3922
4908
|
}
|
|
3923
4909
|
}
|
|
@@ -3967,9 +4953,9 @@ var IVRActivity = class {
|
|
|
3967
4953
|
};
|
|
3968
4954
|
|
|
3969
4955
|
// src/services/background-audio.ts
|
|
3970
|
-
import { promises as
|
|
3971
|
-
import
|
|
3972
|
-
import { fileURLToPath } from "url";
|
|
4956
|
+
import { promises as fs2 } from "fs";
|
|
4957
|
+
import path2 from "path";
|
|
4958
|
+
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
3973
4959
|
var BuiltinAudioClip = {
|
|
3974
4960
|
CITY_AMBIENCE: "city-ambience.ogg",
|
|
3975
4961
|
FOREST_AMBIENCE: "forest-ambience.ogg",
|
|
@@ -3981,8 +4967,8 @@ var BuiltinAudioClip = {
|
|
|
3981
4967
|
};
|
|
3982
4968
|
function builtinClipPath(clip) {
|
|
3983
4969
|
const meta = typeof import.meta !== "undefined" ? import.meta : void 0;
|
|
3984
|
-
const here = meta?.url ?
|
|
3985
|
-
return
|
|
4970
|
+
const here = meta?.url ? path2.dirname(fileURLToPath2(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
|
|
4971
|
+
return path2.resolve(here, "..", "resources", "audio", clip);
|
|
3986
4972
|
}
|
|
3987
4973
|
var INT16_MIN = -32768;
|
|
3988
4974
|
var INT16_MAX = 32767;
|
|
@@ -4151,7 +5137,7 @@ var BackgroundAudioPlayer = class {
|
|
|
4151
5137
|
return source.decode(source.path);
|
|
4152
5138
|
case "builtin": {
|
|
4153
5139
|
const p = builtinClipPath(source.clip);
|
|
4154
|
-
const header = await
|
|
5140
|
+
const header = await fs2.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
|
|
4155
5141
|
if (header.toString("ascii") !== "OggS") {
|
|
4156
5142
|
throw new Error(`Bundled clip ${source.clip} is not a valid Ogg file`);
|
|
4157
5143
|
}
|
|
@@ -4181,15 +5167,264 @@ var BackgroundAudioPlayer = class {
|
|
|
4181
5167
|
function isAudioConfig(value) {
|
|
4182
5168
|
return typeof value === "object" && value !== null && "source" in value && typeof value.source === "object";
|
|
4183
5169
|
}
|
|
5170
|
+
|
|
5171
|
+
// src/providers/twilio-adapter.ts
|
|
5172
|
+
var TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
|
|
5173
|
+
var TwilioAdapter = class _TwilioAdapter {
|
|
5174
|
+
accountSid;
|
|
5175
|
+
region;
|
|
5176
|
+
baseUrl;
|
|
5177
|
+
authHeader;
|
|
5178
|
+
constructor(accountSid, authToken, opts = {}) {
|
|
5179
|
+
if (!accountSid) throw new Error("TwilioAdapter: accountSid is required");
|
|
5180
|
+
if (!authToken) throw new Error("TwilioAdapter: authToken is required");
|
|
5181
|
+
this.accountSid = accountSid;
|
|
5182
|
+
this.region = opts.region;
|
|
5183
|
+
this.baseUrl = opts.region ? `https://api.${opts.region}.twilio.com/2010-04-01` : TWILIO_API_BASE;
|
|
5184
|
+
this.authHeader = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
|
|
5185
|
+
}
|
|
5186
|
+
async request(method, path3, body) {
|
|
5187
|
+
const url = `${this.baseUrl}/Accounts/${encodeURIComponent(this.accountSid)}${path3}`;
|
|
5188
|
+
const headers = { Authorization: this.authHeader };
|
|
5189
|
+
if (body) headers["Content-Type"] = "application/x-www-form-urlencoded";
|
|
5190
|
+
const response = await fetch(url, {
|
|
5191
|
+
method,
|
|
5192
|
+
headers,
|
|
5193
|
+
body: body ? body.toString() : void 0,
|
|
5194
|
+
signal: AbortSignal.timeout(3e4)
|
|
5195
|
+
});
|
|
5196
|
+
const text = await response.text();
|
|
5197
|
+
if (!response.ok) {
|
|
5198
|
+
throw new Error(`Twilio ${method} ${path3} failed: ${response.status} ${text}`);
|
|
5199
|
+
}
|
|
5200
|
+
if (!text) return {};
|
|
5201
|
+
try {
|
|
5202
|
+
return JSON.parse(text);
|
|
5203
|
+
} catch (e) {
|
|
5204
|
+
throw new Error(`Twilio returned non-JSON response: ${String(e)}`);
|
|
5205
|
+
}
|
|
5206
|
+
}
|
|
5207
|
+
/**
|
|
5208
|
+
* Provision a local phone number in the given country.
|
|
5209
|
+
*
|
|
5210
|
+
* Lists available local numbers, then purchases the first match.
|
|
5211
|
+
*/
|
|
5212
|
+
async provisionNumber(opts) {
|
|
5213
|
+
const country = encodeURIComponent(opts.countryCode);
|
|
5214
|
+
const queryParts = ["PageSize=1"];
|
|
5215
|
+
if (opts.areaCode) queryParts.push(`AreaCode=${encodeURIComponent(opts.areaCode)}`);
|
|
5216
|
+
const path3 = `/AvailablePhoneNumbers/${country}/Local.json?${queryParts.join("&")}`;
|
|
5217
|
+
const available = await this.request("GET", path3);
|
|
5218
|
+
const first = available.available_phone_numbers?.[0]?.phone_number;
|
|
5219
|
+
if (!first) {
|
|
5220
|
+
throw new Error(`TwilioAdapter: no numbers available for country ${opts.countryCode}`);
|
|
5221
|
+
}
|
|
5222
|
+
const body = new URLSearchParams({ PhoneNumber: first });
|
|
5223
|
+
const purchased = await this.request(
|
|
5224
|
+
"POST",
|
|
5225
|
+
"/IncomingPhoneNumbers.json",
|
|
5226
|
+
body
|
|
5227
|
+
);
|
|
5228
|
+
if (!purchased.sid || !purchased.phone_number) {
|
|
5229
|
+
throw new Error("TwilioAdapter: malformed response from IncomingPhoneNumbers.create");
|
|
5230
|
+
}
|
|
5231
|
+
return { phoneNumber: purchased.phone_number, sid: purchased.sid };
|
|
5232
|
+
}
|
|
5233
|
+
/** Update an already-purchased number to point at our voice webhook. */
|
|
5234
|
+
async configureNumber(phoneNumberSid, opts) {
|
|
5235
|
+
if (!phoneNumberSid) throw new Error("TwilioAdapter: phoneNumberSid is required");
|
|
5236
|
+
const body = new URLSearchParams({
|
|
5237
|
+
VoiceUrl: opts.voiceUrl,
|
|
5238
|
+
VoiceMethod: "POST"
|
|
5239
|
+
});
|
|
5240
|
+
if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
|
|
5241
|
+
await this.request(
|
|
5242
|
+
"POST",
|
|
5243
|
+
`/IncomingPhoneNumbers/${encodeURIComponent(phoneNumberSid)}.json`,
|
|
5244
|
+
body
|
|
5245
|
+
);
|
|
5246
|
+
}
|
|
5247
|
+
/** Place an outbound call. Returns the Twilio call SID. */
|
|
5248
|
+
async initiateCall(opts) {
|
|
5249
|
+
if (!opts.url && !opts.streamUrl) {
|
|
5250
|
+
throw new Error("TwilioAdapter: initiateCall requires either url or streamUrl");
|
|
5251
|
+
}
|
|
5252
|
+
const body = new URLSearchParams({
|
|
5253
|
+
From: opts.from,
|
|
5254
|
+
To: opts.to
|
|
5255
|
+
});
|
|
5256
|
+
if (opts.url) {
|
|
5257
|
+
body.set("Url", opts.url);
|
|
5258
|
+
} else if (opts.streamUrl) {
|
|
5259
|
+
body.set("Twiml", _TwilioAdapter.generateStreamTwiml(opts.streamUrl));
|
|
5260
|
+
}
|
|
5261
|
+
if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
|
|
5262
|
+
if (opts.machineDetection) body.set("MachineDetection", opts.machineDetection);
|
|
5263
|
+
if (opts.extraParams) {
|
|
5264
|
+
for (const [key, value] of Object.entries(opts.extraParams)) {
|
|
5265
|
+
body.set(key, value);
|
|
5266
|
+
}
|
|
5267
|
+
}
|
|
5268
|
+
const call = await this.request("POST", "/Calls.json", body);
|
|
5269
|
+
if (!call.sid) {
|
|
5270
|
+
throw new Error("TwilioAdapter: Calls.create returned no SID");
|
|
5271
|
+
}
|
|
5272
|
+
return { callSid: call.sid };
|
|
5273
|
+
}
|
|
5274
|
+
/**
|
|
5275
|
+
* Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
|
|
5276
|
+
* TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
|
|
5277
|
+
*/
|
|
5278
|
+
static generateStreamTwiml(streamUrl) {
|
|
5279
|
+
const escaped = streamUrl.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
5280
|
+
return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escaped}"/></Connect></Response>`;
|
|
5281
|
+
}
|
|
5282
|
+
/** Force-complete an in-progress call. */
|
|
5283
|
+
async endCall(callSid) {
|
|
5284
|
+
if (!callSid) throw new Error("TwilioAdapter: callSid is required");
|
|
5285
|
+
const body = new URLSearchParams({ Status: "completed" });
|
|
5286
|
+
try {
|
|
5287
|
+
await this.request(
|
|
5288
|
+
"POST",
|
|
5289
|
+
`/Calls/${encodeURIComponent(callSid)}.json`,
|
|
5290
|
+
body
|
|
5291
|
+
);
|
|
5292
|
+
} catch (err) {
|
|
5293
|
+
getLogger().warn(`[TwilioAdapter] endCall failed for ${callSid}: ${String(err)}`);
|
|
5294
|
+
throw err;
|
|
5295
|
+
}
|
|
5296
|
+
}
|
|
5297
|
+
};
|
|
5298
|
+
|
|
5299
|
+
// src/providers/telnyx-adapter.ts
|
|
5300
|
+
import { randomUUID as randomUUID2 } from "crypto";
|
|
5301
|
+
var TELNYX_API_BASE = "https://api.telnyx.com/v2";
|
|
5302
|
+
var TelnyxAdapter = class {
|
|
5303
|
+
apiKey;
|
|
5304
|
+
connectionId;
|
|
5305
|
+
baseUrl = TELNYX_API_BASE;
|
|
5306
|
+
constructor(apiKey, connectionId) {
|
|
5307
|
+
if (!apiKey) throw new Error("TelnyxAdapter: apiKey is required");
|
|
5308
|
+
this.apiKey = apiKey;
|
|
5309
|
+
this.connectionId = connectionId;
|
|
5310
|
+
}
|
|
5311
|
+
async request(method, path3, body) {
|
|
5312
|
+
const url = `${this.baseUrl}${path3}`;
|
|
5313
|
+
const headers = {
|
|
5314
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
5315
|
+
};
|
|
5316
|
+
if (body !== void 0) headers["Content-Type"] = "application/json";
|
|
5317
|
+
const response = await fetch(url, {
|
|
5318
|
+
method,
|
|
5319
|
+
headers,
|
|
5320
|
+
body: body !== void 0 ? JSON.stringify(body) : void 0,
|
|
5321
|
+
signal: AbortSignal.timeout(3e4)
|
|
5322
|
+
});
|
|
5323
|
+
const text = await response.text();
|
|
5324
|
+
if (!response.ok) {
|
|
5325
|
+
throw new Error(`Telnyx ${method} ${path3} failed: ${response.status} ${text}`);
|
|
5326
|
+
}
|
|
5327
|
+
if (!text) return {};
|
|
5328
|
+
try {
|
|
5329
|
+
return JSON.parse(text);
|
|
5330
|
+
} catch (e) {
|
|
5331
|
+
throw new Error(`Telnyx returned non-JSON response: ${String(e)}`);
|
|
5332
|
+
}
|
|
5333
|
+
}
|
|
5334
|
+
/**
|
|
5335
|
+
* Search available numbers for ``countryCode`` and place an order for the
|
|
5336
|
+
* first match. Returns both the reserved E.164 number and the order ID.
|
|
5337
|
+
*/
|
|
5338
|
+
async provisionNumber(opts) {
|
|
5339
|
+
const country = encodeURIComponent(opts.countryCode);
|
|
5340
|
+
const searchPath = `/available_phone_numbers?filter[phone_number][country_code]=${country}&filter[limit]=1`;
|
|
5341
|
+
const available = await this.request("GET", searchPath);
|
|
5342
|
+
const chosen = available.data?.[0]?.phone_number;
|
|
5343
|
+
if (!chosen) {
|
|
5344
|
+
throw new Error(`TelnyxAdapter: no numbers available for ${opts.countryCode}`);
|
|
5345
|
+
}
|
|
5346
|
+
const orderBody = {
|
|
5347
|
+
phone_numbers: [{ phone_number: chosen }]
|
|
5348
|
+
};
|
|
5349
|
+
if (this.connectionId) {
|
|
5350
|
+
orderBody.connection_id = this.connectionId;
|
|
5351
|
+
}
|
|
5352
|
+
const order = await this.request(
|
|
5353
|
+
"POST",
|
|
5354
|
+
"/number_orders",
|
|
5355
|
+
orderBody
|
|
5356
|
+
);
|
|
5357
|
+
const orderId = order.data?.id ?? "";
|
|
5358
|
+
return { phoneNumber: chosen, orderId };
|
|
5359
|
+
}
|
|
5360
|
+
/** Attach a number to a Call Control Application. */
|
|
5361
|
+
async configureNumber(phoneNumber, opts) {
|
|
5362
|
+
if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
|
|
5363
|
+
if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
|
|
5364
|
+
await this.request(
|
|
5365
|
+
"PATCH",
|
|
5366
|
+
`/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
|
|
5367
|
+
{ connection_id: opts.connectionId, tech_prefix_enabled: false }
|
|
5368
|
+
);
|
|
5369
|
+
}
|
|
5370
|
+
/**
|
|
5371
|
+
* Place an outbound call on the Call Control Application.
|
|
5372
|
+
*
|
|
5373
|
+
* Note: we intentionally do NOT pass ``stream_url`` here — audio streaming
|
|
5374
|
+
* is configured on the Application itself (or started explicitly via a
|
|
5375
|
+
* ``streaming_start`` command). Passing ``stream_url`` on dial is a
|
|
5376
|
+
* deprecated code path that Telnyx rejects in newer API versions.
|
|
5377
|
+
*/
|
|
5378
|
+
async initiateCall(opts) {
|
|
5379
|
+
const connectionId = opts.connectionId ?? this.connectionId;
|
|
5380
|
+
if (!connectionId) {
|
|
5381
|
+
throw new Error("TelnyxAdapter: connectionId must be provided to initiateCall");
|
|
5382
|
+
}
|
|
5383
|
+
const payload = {
|
|
5384
|
+
connection_id: connectionId,
|
|
5385
|
+
from: opts.from,
|
|
5386
|
+
to: opts.to
|
|
5387
|
+
};
|
|
5388
|
+
if (opts.clientState) {
|
|
5389
|
+
payload.client_state = Buffer.from(opts.clientState, "utf-8").toString("base64");
|
|
5390
|
+
}
|
|
5391
|
+
const resp = await this.request("POST", "/calls", payload);
|
|
5392
|
+
const callControlId = resp.data?.call_control_id;
|
|
5393
|
+
if (!callControlId) {
|
|
5394
|
+
throw new Error("TelnyxAdapter: /calls returned no call_control_id");
|
|
5395
|
+
}
|
|
5396
|
+
return { callControlId };
|
|
5397
|
+
}
|
|
5398
|
+
/** Hang up an in-progress call. */
|
|
5399
|
+
async endCall(callControlId, opts = {}) {
|
|
5400
|
+
if (!callControlId) throw new Error("TelnyxAdapter: callControlId is required");
|
|
5401
|
+
const encoded = encodeURIComponent(callControlId);
|
|
5402
|
+
const body = {
|
|
5403
|
+
command_id: opts.commandId ?? randomUUID2()
|
|
5404
|
+
};
|
|
5405
|
+
try {
|
|
5406
|
+
await this.request(
|
|
5407
|
+
"POST",
|
|
5408
|
+
`/calls/${encoded}/actions/hangup`,
|
|
5409
|
+
body
|
|
5410
|
+
);
|
|
5411
|
+
} catch (err) {
|
|
5412
|
+
getLogger().warn(
|
|
5413
|
+
`[TelnyxAdapter] endCall failed for ${callControlId}: ${String(err)}`
|
|
5414
|
+
);
|
|
5415
|
+
throw err;
|
|
5416
|
+
}
|
|
5417
|
+
}
|
|
5418
|
+
};
|
|
4184
5419
|
export {
|
|
4185
5420
|
AllProvidersFailedError,
|
|
4186
5421
|
LLM2 as AnthropicLLM,
|
|
4187
|
-
|
|
5422
|
+
STT6 as AssemblyAISTT,
|
|
4188
5423
|
AuthenticationError,
|
|
4189
5424
|
BackgroundAudioPlayer,
|
|
4190
5425
|
BuiltinAudioClip,
|
|
4191
5426
|
CallMetricsAccumulator,
|
|
4192
|
-
|
|
5427
|
+
STT4 as CartesiaSTT,
|
|
4193
5428
|
TTS3 as CartesiaTTS,
|
|
4194
5429
|
LLM4 as CerebrasLLM,
|
|
4195
5430
|
ChatContext,
|
|
@@ -4198,9 +5433,11 @@ export {
|
|
|
4198
5433
|
DEFAULT_PRICING,
|
|
4199
5434
|
DTMF_EVENTS,
|
|
4200
5435
|
STT as DeepgramSTT,
|
|
5436
|
+
DefaultToolExecutor,
|
|
4201
5437
|
ConvAI as ElevenLabsConvAI,
|
|
4202
5438
|
ElevenLabsConvAIAdapter,
|
|
4203
5439
|
TTS as ElevenLabsTTS,
|
|
5440
|
+
EventBus,
|
|
4204
5441
|
FallbackLLMProvider,
|
|
4205
5442
|
GEMINI_DEFAULT_INPUT_SR,
|
|
4206
5443
|
GEMINI_DEFAULT_OUTPUT_SR,
|
|
@@ -4212,31 +5449,48 @@ export {
|
|
|
4212
5449
|
LLMLoop,
|
|
4213
5450
|
TTS5 as LMNTTTS,
|
|
4214
5451
|
MetricsStore,
|
|
5452
|
+
Ngrok,
|
|
4215
5453
|
LLM as OpenAILLM,
|
|
4216
5454
|
OpenAILLMProvider,
|
|
4217
5455
|
Realtime as OpenAIRealtime,
|
|
4218
5456
|
OpenAIRealtimeAdapter,
|
|
4219
5457
|
TTS2 as OpenAITTS,
|
|
5458
|
+
STT3 as OpenAITranscribeSTT,
|
|
4220
5459
|
PartialStreamError,
|
|
4221
5460
|
Patter,
|
|
4222
5461
|
PatterConnectionError,
|
|
4223
5462
|
PatterError,
|
|
5463
|
+
PatterTool,
|
|
5464
|
+
PcmCarry,
|
|
4224
5465
|
PipelineHookExecutor,
|
|
4225
5466
|
ProvisionError,
|
|
5467
|
+
RateLimitError,
|
|
4226
5468
|
RemoteMessageHandler,
|
|
4227
5469
|
TTS4 as RimeTTS,
|
|
5470
|
+
SPAN_BARGEIN,
|
|
5471
|
+
SPAN_CALL,
|
|
5472
|
+
SPAN_ENDPOINT,
|
|
5473
|
+
SPAN_LLM,
|
|
5474
|
+
SPAN_STT,
|
|
5475
|
+
SPAN_TOOL,
|
|
5476
|
+
SPAN_TTS,
|
|
4228
5477
|
SentenceChunker,
|
|
4229
|
-
|
|
5478
|
+
SileroVAD,
|
|
5479
|
+
STT5 as SonioxSTT,
|
|
5480
|
+
StatefulResampler,
|
|
4230
5481
|
Static as StaticTunnel,
|
|
4231
5482
|
Carrier2 as Telnyx,
|
|
5483
|
+
TelnyxAdapter,
|
|
4232
5484
|
TestSession,
|
|
4233
5485
|
TfidfLoopDetector,
|
|
4234
5486
|
Tool,
|
|
4235
5487
|
Carrier as Twilio,
|
|
5488
|
+
TwilioAdapter,
|
|
4236
5489
|
ULTRAVOX_DEFAULT_API_BASE,
|
|
4237
5490
|
ULTRAVOX_DEFAULT_SR,
|
|
4238
5491
|
UltravoxRealtimeAdapter,
|
|
4239
5492
|
STT2 as WhisperSTT,
|
|
5493
|
+
assemblyai,
|
|
4240
5494
|
builtinClipPath,
|
|
4241
5495
|
calculateRealtimeCost,
|
|
4242
5496
|
calculateSttCost,
|
|
@@ -4244,6 +5498,10 @@ export {
|
|
|
4244
5498
|
calculateTtsCost,
|
|
4245
5499
|
callsToCsv,
|
|
4246
5500
|
callsToJson,
|
|
5501
|
+
cartesia,
|
|
5502
|
+
createResampler16kTo8k,
|
|
5503
|
+
createResampler24kTo16k,
|
|
5504
|
+
createResampler8kTo16k,
|
|
4247
5505
|
deepgram,
|
|
4248
5506
|
defineTool,
|
|
4249
5507
|
elevenlabs,
|
|
@@ -4251,10 +5509,14 @@ export {
|
|
|
4251
5509
|
filterForTTS,
|
|
4252
5510
|
filterMarkdown,
|
|
4253
5511
|
formatDtmf,
|
|
5512
|
+
geminiLive,
|
|
4254
5513
|
getLogger,
|
|
4255
5514
|
guardrail,
|
|
5515
|
+
initTracing,
|
|
4256
5516
|
isRemoteUrl,
|
|
5517
|
+
isTracingEnabled,
|
|
4257
5518
|
isWebSocketUrl,
|
|
5519
|
+
lmnt,
|
|
4258
5520
|
makeAuthMiddleware,
|
|
4259
5521
|
mergePricing,
|
|
4260
5522
|
mixPcm,
|
|
@@ -4268,12 +5530,17 @@ export {
|
|
|
4268
5530
|
resample24kTo16k,
|
|
4269
5531
|
resample8kTo16k,
|
|
4270
5532
|
resamplePcm,
|
|
5533
|
+
rime,
|
|
4271
5534
|
scheduleCron,
|
|
4272
5535
|
scheduleInterval,
|
|
4273
5536
|
scheduleOnce,
|
|
4274
5537
|
selectSoundFromList,
|
|
4275
5538
|
setLogger,
|
|
5539
|
+
soniox,
|
|
5540
|
+
speechmatics,
|
|
5541
|
+
startSpan,
|
|
4276
5542
|
startTunnel,
|
|
4277
5543
|
tool,
|
|
5544
|
+
ultravox,
|
|
4278
5545
|
whisper
|
|
4279
5546
|
};
|