getpatter 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/{banner-FLR2HE5Z.mjs → banner-3GNZ6VQK.mjs} +1 -1
- package/dist/{carrier-config-CPG5CROM.mjs → carrier-config-33HQ2W4V.mjs} +2 -2
- package/dist/{chunk-7SDDK2AO.mjs → chunk-FIFIWBL7.mjs} +3255 -588
- package/dist/chunk-QHHBUCMT.mjs +25 -0
- package/dist/{chunk-AKQFOFLG.mjs → chunk-SEMKNPCD.mjs} +7 -2
- package/dist/{chunk-FMNRCP5X.mjs → chunk-VJVDG4V5.mjs} +1 -1
- package/dist/cli.js +126 -13
- package/dist/dist-YRCCJQ26.mjs +1631 -0
- package/dist/index.d.mts +2000 -289
- package/dist/index.d.ts +2000 -289
- package/dist/index.js +7945 -1928
- package/dist/index.mjs +1882 -618
- package/dist/node-cron-6PRPSBG5.mjs +1348 -0
- package/dist/onnxruntime_binding-4Q2WV26X.node +0 -0
- package/dist/onnxruntime_binding-5PVQ7RFC.node +0 -0
- package/dist/onnxruntime_binding-FNOPH2XG.node +0 -0
- package/dist/onnxruntime_binding-HSGOY4IT.node +0 -0
- package/dist/onnxruntime_binding-OY2N3XIT.node +0 -0
- package/dist/onnxruntime_binding-ZPEJPBCV.node +0 -0
- package/dist/{persistence-CYIGNHSU.mjs → persistence-LQBYQPQQ.mjs} +1 -1
- package/dist/test-mode-MVJ3SKG4.mjs +8 -0
- package/dist/tunnel-UVR3PPAU.mjs +8 -0
- package/package.json +10 -3
- package/dist/chunk-OOIUSZB4.mjs +0 -37
- package/dist/node-cron-373UVDIO.mjs +0 -935
- package/dist/test-mode-K2TTPRGE.mjs +0 -8
- package/dist/tunnel-O7ICMSTP.mjs +0 -8
package/dist/index.mjs
CHANGED
|
@@ -3,21 +3,37 @@ import {
|
|
|
3
3
|
} from "./chunk-AFUYSNDH.mjs";
|
|
4
4
|
import {
|
|
5
5
|
startTunnel
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-SEMKNPCD.mjs";
|
|
7
7
|
import {
|
|
8
|
+
AuthenticationError,
|
|
8
9
|
CallMetricsAccumulator,
|
|
9
10
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
10
11
|
DEFAULT_PRICING,
|
|
11
12
|
DeepgramSTT,
|
|
13
|
+
DefaultToolExecutor,
|
|
12
14
|
ElevenLabsConvAIAdapter,
|
|
13
15
|
EmbeddedServer,
|
|
16
|
+
EventBus,
|
|
14
17
|
LLMLoop,
|
|
15
18
|
MetricsStore,
|
|
16
19
|
OpenAILLMProvider,
|
|
17
20
|
OpenAIRealtimeAdapter,
|
|
21
|
+
PatterConnectionError,
|
|
22
|
+
PatterError,
|
|
23
|
+
PcmCarry,
|
|
18
24
|
PipelineHookExecutor,
|
|
25
|
+
ProvisionError,
|
|
26
|
+
RateLimitError,
|
|
19
27
|
RemoteMessageHandler,
|
|
28
|
+
SPAN_BARGEIN,
|
|
29
|
+
SPAN_CALL,
|
|
30
|
+
SPAN_ENDPOINT,
|
|
31
|
+
SPAN_LLM,
|
|
32
|
+
SPAN_STT,
|
|
33
|
+
SPAN_TOOL,
|
|
34
|
+
SPAN_TTS,
|
|
20
35
|
SentenceChunker,
|
|
36
|
+
StatefulResampler,
|
|
21
37
|
TestSession,
|
|
22
38
|
calculateRealtimeCost,
|
|
23
39
|
calculateSttCost,
|
|
@@ -25,7 +41,12 @@ import {
|
|
|
25
41
|
calculateTtsCost,
|
|
26
42
|
callsToCsv,
|
|
27
43
|
callsToJson,
|
|
44
|
+
createResampler16kTo8k,
|
|
45
|
+
createResampler24kTo16k,
|
|
46
|
+
createResampler8kTo16k,
|
|
47
|
+
initTracing,
|
|
28
48
|
isRemoteUrl,
|
|
49
|
+
isTracingEnabled,
|
|
29
50
|
isWebSocketUrl,
|
|
30
51
|
makeAuthMiddleware,
|
|
31
52
|
mergePricing,
|
|
@@ -35,153 +56,14 @@ import {
|
|
|
35
56
|
pcm16ToMulaw,
|
|
36
57
|
resample16kTo8k,
|
|
37
58
|
resample24kTo16k,
|
|
38
|
-
resample8kTo16k
|
|
39
|
-
|
|
59
|
+
resample8kTo16k,
|
|
60
|
+
startSpan
|
|
61
|
+
} from "./chunk-FIFIWBL7.mjs";
|
|
40
62
|
import {
|
|
41
63
|
getLogger,
|
|
42
64
|
setLogger
|
|
43
|
-
} from "./chunk-
|
|
44
|
-
import "./chunk-
|
|
45
|
-
|
|
46
|
-
// src/connection.ts
|
|
47
|
-
import WebSocket from "ws";
|
|
48
|
-
|
|
49
|
-
// src/errors.ts
|
|
50
|
-
var PatterError = class extends Error {
|
|
51
|
-
constructor(message) {
|
|
52
|
-
super(message);
|
|
53
|
-
this.name = "PatterError";
|
|
54
|
-
}
|
|
55
|
-
};
|
|
56
|
-
var PatterConnectionError = class extends PatterError {
|
|
57
|
-
constructor(message) {
|
|
58
|
-
super(message);
|
|
59
|
-
this.name = "PatterConnectionError";
|
|
60
|
-
}
|
|
61
|
-
};
|
|
62
|
-
var AuthenticationError = class extends PatterError {
|
|
63
|
-
constructor(message) {
|
|
64
|
-
super(message);
|
|
65
|
-
this.name = "AuthenticationError";
|
|
66
|
-
}
|
|
67
|
-
};
|
|
68
|
-
var ProvisionError = class extends PatterError {
|
|
69
|
-
constructor(message) {
|
|
70
|
-
super(message);
|
|
71
|
-
this.name = "ProvisionError";
|
|
72
|
-
}
|
|
73
|
-
};
|
|
74
|
-
|
|
75
|
-
// src/connection.ts
|
|
76
|
-
var DEFAULT_BACKEND_URL = "wss://api.getpatter.com";
|
|
77
|
-
var PatterConnection = class {
|
|
78
|
-
apiKey;
|
|
79
|
-
backendUrl;
|
|
80
|
-
wsUrl;
|
|
81
|
-
ws = null;
|
|
82
|
-
onMessage = null;
|
|
83
|
-
onCallStart = null;
|
|
84
|
-
onCallEnd = null;
|
|
85
|
-
constructor(apiKey, backendUrl = DEFAULT_BACKEND_URL) {
|
|
86
|
-
this.apiKey = apiKey;
|
|
87
|
-
this.backendUrl = backendUrl.replace(/\/+$/, "");
|
|
88
|
-
this.wsUrl = `${this.backendUrl}/ws/sdk`;
|
|
89
|
-
}
|
|
90
|
-
get isConnected() {
|
|
91
|
-
return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
|
|
92
|
-
}
|
|
93
|
-
async connect(options) {
|
|
94
|
-
this.onMessage = options.onMessage;
|
|
95
|
-
this.onCallStart = options.onCallStart ?? null;
|
|
96
|
-
this.onCallEnd = options.onCallEnd ?? null;
|
|
97
|
-
return new Promise((resolve, reject) => {
|
|
98
|
-
this.ws = new WebSocket(this.wsUrl, {
|
|
99
|
-
headers: { "X-API-Key": this.apiKey }
|
|
100
|
-
});
|
|
101
|
-
const onError = (err) => {
|
|
102
|
-
this.ws?.off("error", onError);
|
|
103
|
-
reject(new PatterConnectionError(`Failed to connect: ${err.message}`));
|
|
104
|
-
};
|
|
105
|
-
this.ws.once("open", () => {
|
|
106
|
-
this.ws?.off("error", onError);
|
|
107
|
-
this.setupListeners();
|
|
108
|
-
resolve();
|
|
109
|
-
});
|
|
110
|
-
this.ws.on("error", onError);
|
|
111
|
-
});
|
|
112
|
-
}
|
|
113
|
-
setupListeners() {
|
|
114
|
-
if (!this.ws) return;
|
|
115
|
-
this.ws.on("error", (err) => {
|
|
116
|
-
getLogger().error(`WebSocket error: ${err.message}`);
|
|
117
|
-
});
|
|
118
|
-
this.ws.on("message", async (data) => {
|
|
119
|
-
const raw = data.toString();
|
|
120
|
-
let parsed;
|
|
121
|
-
try {
|
|
122
|
-
parsed = JSON.parse(raw);
|
|
123
|
-
} catch {
|
|
124
|
-
return;
|
|
125
|
-
}
|
|
126
|
-
const msgType = parsed.type;
|
|
127
|
-
if (msgType === "message" && this.onMessage) {
|
|
128
|
-
const msg = {
|
|
129
|
-
text: parsed.text,
|
|
130
|
-
callId: parsed.call_id,
|
|
131
|
-
caller: parsed.caller ?? ""
|
|
132
|
-
};
|
|
133
|
-
try {
|
|
134
|
-
const response = await this.onMessage(msg);
|
|
135
|
-
if (response != null) {
|
|
136
|
-
await this.sendResponse(msg.callId, response);
|
|
137
|
-
}
|
|
138
|
-
} catch {
|
|
139
|
-
}
|
|
140
|
-
} else if (msgType === "call_start" && this.onCallStart) {
|
|
141
|
-
await this.onCallStart(parsed);
|
|
142
|
-
} else if (msgType === "call_end" && this.onCallEnd) {
|
|
143
|
-
await this.onCallEnd(parsed);
|
|
144
|
-
}
|
|
145
|
-
});
|
|
146
|
-
this.ws.on("close", () => {
|
|
147
|
-
this.ws = null;
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
async sendResponse(callId, text) {
|
|
151
|
-
if (!this.ws) throw new PatterConnectionError("Not connected");
|
|
152
|
-
this.ws.send(JSON.stringify({ type: "response", call_id: callId, text }));
|
|
153
|
-
}
|
|
154
|
-
async requestCall(fromNumber, toNumber, firstMessage = "") {
|
|
155
|
-
if (!this.ws) throw new PatterConnectionError("Not connected");
|
|
156
|
-
this.ws.send(
|
|
157
|
-
JSON.stringify({
|
|
158
|
-
type: "call",
|
|
159
|
-
from: fromNumber,
|
|
160
|
-
to: toNumber,
|
|
161
|
-
first_message: firstMessage
|
|
162
|
-
})
|
|
163
|
-
);
|
|
164
|
-
}
|
|
165
|
-
async disconnect() {
|
|
166
|
-
if (this.ws) {
|
|
167
|
-
this.ws.close();
|
|
168
|
-
this.ws = null;
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
parseMessage(raw) {
|
|
172
|
-
try {
|
|
173
|
-
const data = JSON.parse(raw);
|
|
174
|
-
if (data.type !== "message") return null;
|
|
175
|
-
return {
|
|
176
|
-
text: data.text,
|
|
177
|
-
callId: data.call_id,
|
|
178
|
-
caller: data.caller ?? ""
|
|
179
|
-
};
|
|
180
|
-
} catch {
|
|
181
|
-
return null;
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
};
|
|
65
|
+
} from "./chunk-VJVDG4V5.mjs";
|
|
66
|
+
import "./chunk-QHHBUCMT.mjs";
|
|
185
67
|
|
|
186
68
|
// src/engines/openai.ts
|
|
187
69
|
var Realtime = class {
|
|
@@ -241,86 +123,77 @@ var Static = class {
|
|
|
241
123
|
this.hostname = opts.hostname;
|
|
242
124
|
}
|
|
243
125
|
};
|
|
126
|
+
var Ngrok = class {
|
|
127
|
+
kind = "ngrok";
|
|
128
|
+
hostname;
|
|
129
|
+
constructor(opts = {}) {
|
|
130
|
+
this.hostname = opts.hostname ?? "";
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Returns the configured hostname or throws if the marker was constructed
|
|
134
|
+
* without one. Patter does not start ngrok itself — the user is expected
|
|
135
|
+
* to either supply a hostname or run ngrok out-of-band.
|
|
136
|
+
*/
|
|
137
|
+
start() {
|
|
138
|
+
if (!this.hostname) {
|
|
139
|
+
throw new Error(
|
|
140
|
+
'Ngrok requires a hostname; pass new Ngrok({ hostname: "abc.ngrok.io" })'
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
return this.hostname;
|
|
144
|
+
}
|
|
145
|
+
};
|
|
244
146
|
|
|
245
147
|
// src/client.ts
|
|
246
|
-
var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
|
|
247
|
-
var DEFAULT_REST_URL = "https://api.getpatter.com";
|
|
248
|
-
function sttConfigToDict(cfg) {
|
|
249
|
-
const out = {
|
|
250
|
-
provider: cfg.provider,
|
|
251
|
-
api_key: cfg.apiKey,
|
|
252
|
-
language: cfg.language
|
|
253
|
-
};
|
|
254
|
-
if (cfg.options) out.options = { ...cfg.options };
|
|
255
|
-
return out;
|
|
256
|
-
}
|
|
257
|
-
function ttsConfigToDict(cfg) {
|
|
258
|
-
const out = {
|
|
259
|
-
provider: cfg.provider,
|
|
260
|
-
api_key: cfg.apiKey,
|
|
261
|
-
voice: cfg.voice
|
|
262
|
-
};
|
|
263
|
-
if (cfg.options) out.options = { ...cfg.options };
|
|
264
|
-
return out;
|
|
265
|
-
}
|
|
266
148
|
var Patter = class {
|
|
267
|
-
apiKey;
|
|
268
|
-
backendUrl;
|
|
269
|
-
restUrl;
|
|
270
|
-
connection;
|
|
271
|
-
mode;
|
|
272
149
|
localConfig;
|
|
273
150
|
embeddedServer = null;
|
|
274
151
|
tunnelHandle = null;
|
|
152
|
+
/**
|
|
153
|
+
* Live `MetricsStore` for the embedded server. Returns `null` before
|
|
154
|
+
* `serve()` is called. Exposed so integrations like `PatterTool` can
|
|
155
|
+
* subscribe to per-call lifecycle events (`call_initiated`,
|
|
156
|
+
* `call_start`, `call_end`).
|
|
157
|
+
*/
|
|
158
|
+
get metricsStore() {
|
|
159
|
+
return this.embeddedServer?.metricsStore ?? null;
|
|
160
|
+
}
|
|
275
161
|
constructor(options) {
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
162
|
+
if (options.apiKey !== void 0) {
|
|
163
|
+
throw new Error(
|
|
164
|
+
"Patter Cloud is not yet available in this SDK release. Use local mode with `carrier:` and `phoneNumber:`. Cloud mode will return in a future release."
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
if (!options.phoneNumber) {
|
|
168
|
+
throw new Error("Local mode requires phoneNumber");
|
|
169
|
+
}
|
|
170
|
+
if (!options.carrier) {
|
|
171
|
+
throw new Error(
|
|
172
|
+
"Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
|
|
173
|
+
);
|
|
174
|
+
}
|
|
175
|
+
const carrier = options.carrier;
|
|
176
|
+
const tunnel = options.tunnel;
|
|
177
|
+
let tunnelWebhookUrl;
|
|
178
|
+
if (tunnel instanceof Static) {
|
|
179
|
+
if (options.webhookUrl) {
|
|
284
180
|
throw new Error(
|
|
285
|
-
"
|
|
181
|
+
"Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
|
|
286
182
|
);
|
|
287
183
|
}
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
this.mode = "local";
|
|
300
|
-
const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
|
|
301
|
-
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
302
|
-
this.localConfig = {
|
|
303
|
-
carrier,
|
|
304
|
-
phoneNumber: local.phoneNumber,
|
|
305
|
-
webhookUrl: normalizedWebhook,
|
|
306
|
-
tunnel: local.tunnel,
|
|
307
|
-
openaiKey: local.openaiKey
|
|
308
|
-
};
|
|
309
|
-
this.apiKey = "";
|
|
310
|
-
this.backendUrl = DEFAULT_BACKEND_URL2;
|
|
311
|
-
this.restUrl = DEFAULT_REST_URL;
|
|
312
|
-
this.connection = new PatterConnection("", DEFAULT_BACKEND_URL2);
|
|
313
|
-
} else {
|
|
314
|
-
const cloudOpts = options;
|
|
315
|
-
this.mode = "cloud";
|
|
316
|
-
this.localConfig = null;
|
|
317
|
-
this.apiKey = cloudOpts.apiKey;
|
|
318
|
-
this.backendUrl = cloudOpts.backendUrl ?? DEFAULT_BACKEND_URL2;
|
|
319
|
-
this.restUrl = cloudOpts.restUrl ?? DEFAULT_REST_URL;
|
|
320
|
-
this.connection = new PatterConnection(this.apiKey, this.backendUrl);
|
|
321
|
-
}
|
|
184
|
+
tunnelWebhookUrl = tunnel.hostname;
|
|
185
|
+
}
|
|
186
|
+
const rawWebhook = tunnelWebhookUrl ?? options.webhookUrl;
|
|
187
|
+
const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
|
|
188
|
+
this.localConfig = {
|
|
189
|
+
carrier,
|
|
190
|
+
phoneNumber: options.phoneNumber,
|
|
191
|
+
webhookUrl: normalizedWebhook,
|
|
192
|
+
tunnel: options.tunnel,
|
|
193
|
+
openaiKey: options.openaiKey
|
|
194
|
+
};
|
|
322
195
|
}
|
|
323
|
-
// ===
|
|
196
|
+
// === Agent definition ===
|
|
324
197
|
agent(opts) {
|
|
325
198
|
let working = { ...opts };
|
|
326
199
|
if (opts.engine) {
|
|
@@ -337,7 +210,7 @@ var Patter = class {
|
|
|
337
210
|
model: working.model ?? engine.model,
|
|
338
211
|
voice: working.voice ?? engine.voice
|
|
339
212
|
};
|
|
340
|
-
if (
|
|
213
|
+
if (!this.localConfig.openaiKey) {
|
|
341
214
|
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
342
215
|
}
|
|
343
216
|
} else if (engine instanceof ConvAI) {
|
|
@@ -387,10 +260,8 @@ var Patter = class {
|
|
|
387
260
|
}
|
|
388
261
|
return working;
|
|
389
262
|
}
|
|
263
|
+
// === Serve / test / call ===
|
|
390
264
|
async serve(opts) {
|
|
391
|
-
if (this.mode !== "local" || !this.localConfig) {
|
|
392
|
-
throw new Error("serve() is only available in local mode");
|
|
393
|
-
}
|
|
394
265
|
if (!opts.agent || typeof opts.agent !== "object") {
|
|
395
266
|
throw new TypeError("agent is required. Use phone.agent() to create one.");
|
|
396
267
|
}
|
|
@@ -415,12 +286,13 @@ var Patter = class {
|
|
|
415
286
|
if (wantsCloudflared && webhookUrl) {
|
|
416
287
|
throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
|
|
417
288
|
}
|
|
418
|
-
const { showBanner } = await import("./banner-
|
|
289
|
+
const { showBanner } = await import("./banner-3GNZ6VQK.mjs");
|
|
419
290
|
showBanner();
|
|
420
291
|
if (wantsCloudflared) {
|
|
421
|
-
const { startTunnel: startTunnel2 } = await import("./tunnel-
|
|
292
|
+
const { startTunnel: startTunnel2 } = await import("./tunnel-UVR3PPAU.mjs");
|
|
422
293
|
this.tunnelHandle = await startTunnel2(port);
|
|
423
294
|
webhookUrl = this.tunnelHandle.hostname;
|
|
295
|
+
this.localConfig = { ...this.localConfig, webhookUrl };
|
|
424
296
|
}
|
|
425
297
|
if (!webhookUrl) {
|
|
426
298
|
throw new Error(
|
|
@@ -429,7 +301,7 @@ var Patter = class {
|
|
|
429
301
|
}
|
|
430
302
|
const carrier = this.localConfig.carrier;
|
|
431
303
|
const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
|
|
432
|
-
const { autoConfigureCarrier } = await import("./carrier-config-
|
|
304
|
+
const { autoConfigureCarrier } = await import("./carrier-config-33HQ2W4V.mjs");
|
|
433
305
|
await autoConfigureCarrier({
|
|
434
306
|
telephonyProvider,
|
|
435
307
|
twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
|
|
@@ -466,138 +338,56 @@ var Patter = class {
|
|
|
466
338
|
await this.embeddedServer.start(port);
|
|
467
339
|
}
|
|
468
340
|
async test(opts) {
|
|
469
|
-
|
|
470
|
-
throw new Error("test() is only available in local mode");
|
|
471
|
-
}
|
|
472
|
-
const { TestSession: TestSession2 } = await import("./test-mode-K2TTPRGE.mjs");
|
|
341
|
+
const { TestSession: TestSession2 } = await import("./test-mode-MVJ3SKG4.mjs");
|
|
473
342
|
const session = new TestSession2();
|
|
474
343
|
await session.run({
|
|
475
344
|
agent: opts.agent,
|
|
476
|
-
openaiKey: this.localConfig
|
|
345
|
+
openaiKey: this.localConfig.openaiKey,
|
|
477
346
|
onMessage: typeof opts.onMessage === "function" ? opts.onMessage : void 0,
|
|
478
347
|
onCallStart: opts.onCallStart,
|
|
479
348
|
onCallEnd: opts.onCallEnd
|
|
480
349
|
});
|
|
481
350
|
}
|
|
482
|
-
// === Cloud mode legacy ===
|
|
483
|
-
async connect(options) {
|
|
484
|
-
if (options.provider && options.providerKey && options.number) {
|
|
485
|
-
await this.registerNumber(
|
|
486
|
-
options.provider,
|
|
487
|
-
options.providerKey,
|
|
488
|
-
options.number,
|
|
489
|
-
options.providerSecret,
|
|
490
|
-
options.country ?? "US",
|
|
491
|
-
options.stt,
|
|
492
|
-
options.tts
|
|
493
|
-
);
|
|
494
|
-
}
|
|
495
|
-
await this.connection.connect({
|
|
496
|
-
onMessage: options.onMessage,
|
|
497
|
-
onCallStart: options.onCallStart,
|
|
498
|
-
onCallEnd: options.onCallEnd
|
|
499
|
-
});
|
|
500
|
-
}
|
|
501
351
|
async call(options) {
|
|
502
|
-
if (
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
const
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
from: phoneNumber,
|
|
521
|
-
to: localOpts.to,
|
|
522
|
-
stream_url: streamUrl,
|
|
523
|
-
stream_track: "both_tracks"
|
|
524
|
-
};
|
|
525
|
-
if (localOpts.ringTimeout !== void 0) {
|
|
526
|
-
telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
|
|
527
|
-
}
|
|
528
|
-
const response2 = await fetch("https://api.telnyx.com/v2/calls", {
|
|
529
|
-
method: "POST",
|
|
530
|
-
headers: {
|
|
531
|
-
"Content-Type": "application/json",
|
|
532
|
-
Authorization: `Bearer ${telnyxKey}`
|
|
533
|
-
},
|
|
534
|
-
body: JSON.stringify(telnyxPayload)
|
|
535
|
-
});
|
|
536
|
-
if (!response2.ok) {
|
|
537
|
-
throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
|
|
538
|
-
}
|
|
539
|
-
if (this.embeddedServer) {
|
|
540
|
-
try {
|
|
541
|
-
const body = await response2.clone().json();
|
|
542
|
-
const callId = body.data?.call_control_id;
|
|
543
|
-
if (callId) {
|
|
544
|
-
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
545
|
-
call_id: callId,
|
|
546
|
-
caller: phoneNumber,
|
|
547
|
-
callee: localOpts.to,
|
|
548
|
-
direction: "outbound"
|
|
549
|
-
});
|
|
550
|
-
}
|
|
551
|
-
} catch {
|
|
552
|
-
}
|
|
553
|
-
}
|
|
554
|
-
return;
|
|
555
|
-
}
|
|
556
|
-
const twilioSid = carrier.accountSid;
|
|
557
|
-
const twilioToken = carrier.authToken;
|
|
558
|
-
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
559
|
-
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
560
|
-
const params = new URLSearchParams({
|
|
561
|
-
To: localOpts.to,
|
|
562
|
-
From: phoneNumber,
|
|
563
|
-
Url: `https://${webhookUrl}/webhooks/twilio/voice`,
|
|
564
|
-
StatusCallback: statusCallbackUrl,
|
|
565
|
-
StatusCallbackMethod: "POST",
|
|
566
|
-
// Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
|
|
567
|
-
// transitions even when media never arrives.
|
|
568
|
-
StatusCallbackEvent: "initiated ringing answered completed"
|
|
569
|
-
});
|
|
570
|
-
if (localOpts.machineDetection) {
|
|
571
|
-
params.append("MachineDetection", "DetectMessageEnd");
|
|
572
|
-
params.append("AsyncAmd", "true");
|
|
573
|
-
params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
|
|
574
|
-
}
|
|
575
|
-
if (localOpts.ringTimeout !== void 0) {
|
|
576
|
-
params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
|
|
577
|
-
}
|
|
578
|
-
if (localOpts.voicemailMessage && this.embeddedServer) {
|
|
579
|
-
this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
|
|
352
|
+
if (!options.to) {
|
|
353
|
+
throw new Error("'to' phone number is required");
|
|
354
|
+
}
|
|
355
|
+
if (!options.to.startsWith("+")) {
|
|
356
|
+
throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${options.to}'`);
|
|
357
|
+
}
|
|
358
|
+
const { phoneNumber, webhookUrl, carrier } = this.localConfig;
|
|
359
|
+
const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
|
|
360
|
+
if (carrier.kind === "telnyx") {
|
|
361
|
+
const telnyxKey = carrier.apiKey;
|
|
362
|
+
const connectionId = carrier.connectionId;
|
|
363
|
+
const telnyxPayload = {
|
|
364
|
+
connection_id: connectionId,
|
|
365
|
+
from: phoneNumber,
|
|
366
|
+
to: options.to
|
|
367
|
+
};
|
|
368
|
+
if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
|
|
369
|
+
telnyxPayload.timeout_secs = Math.max(1, Math.floor(effectiveRingTimeout));
|
|
580
370
|
}
|
|
581
|
-
const
|
|
371
|
+
const response2 = await fetch("https://api.telnyx.com/v2/calls", {
|
|
582
372
|
method: "POST",
|
|
583
373
|
headers: {
|
|
584
|
-
"Content-Type": "application/
|
|
585
|
-
Authorization: `
|
|
374
|
+
"Content-Type": "application/json",
|
|
375
|
+
Authorization: `Bearer ${telnyxKey}`
|
|
586
376
|
},
|
|
587
|
-
body:
|
|
377
|
+
body: JSON.stringify(telnyxPayload)
|
|
588
378
|
});
|
|
589
|
-
if (!
|
|
590
|
-
throw new ProvisionError(`Failed to initiate call: ${await
|
|
379
|
+
if (!response2.ok) {
|
|
380
|
+
throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
|
|
591
381
|
}
|
|
592
382
|
if (this.embeddedServer) {
|
|
593
383
|
try {
|
|
594
|
-
const body = await
|
|
595
|
-
const
|
|
596
|
-
if (
|
|
384
|
+
const body = await response2.clone().json();
|
|
385
|
+
const callId = body.data?.call_control_id;
|
|
386
|
+
if (callId) {
|
|
597
387
|
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
598
|
-
call_id:
|
|
388
|
+
call_id: callId,
|
|
599
389
|
caller: phoneNumber,
|
|
600
|
-
callee:
|
|
390
|
+
callee: options.to,
|
|
601
391
|
direction: "outbound"
|
|
602
392
|
});
|
|
603
393
|
}
|
|
@@ -606,21 +396,59 @@ var Patter = class {
|
|
|
606
396
|
}
|
|
607
397
|
return;
|
|
608
398
|
}
|
|
609
|
-
const
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
399
|
+
const twilioSid = carrier.accountSid;
|
|
400
|
+
const twilioToken = carrier.authToken;
|
|
401
|
+
const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
|
|
402
|
+
const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
|
|
403
|
+
const streamUrl = `wss://${webhookUrl}/ws/stream/outbound`;
|
|
404
|
+
const inlineTwiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${streamUrl}"/></Connect></Response>`;
|
|
405
|
+
const params = new URLSearchParams({
|
|
406
|
+
To: options.to,
|
|
407
|
+
From: phoneNumber,
|
|
408
|
+
Twiml: inlineTwiml,
|
|
409
|
+
StatusCallback: statusCallbackUrl,
|
|
410
|
+
StatusCallbackMethod: "POST",
|
|
411
|
+
// Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
|
|
412
|
+
// transitions even when media never arrives.
|
|
413
|
+
StatusCallbackEvent: "initiated ringing answered completed"
|
|
414
|
+
});
|
|
415
|
+
if (options.machineDetection) {
|
|
416
|
+
params.append("MachineDetection", "DetectMessageEnd");
|
|
417
|
+
params.append("AsyncAmd", "true");
|
|
418
|
+
params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
|
|
419
|
+
}
|
|
420
|
+
if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
|
|
421
|
+
params.append("Timeout", String(Math.max(1, Math.floor(effectiveRingTimeout))));
|
|
422
|
+
}
|
|
423
|
+
if (options.voicemailMessage && this.embeddedServer) {
|
|
424
|
+
this.embeddedServer.voicemailMessage = options.voicemailMessage;
|
|
425
|
+
}
|
|
426
|
+
const response = await fetch(url, {
|
|
427
|
+
method: "POST",
|
|
428
|
+
headers: {
|
|
429
|
+
"Content-Type": "application/x-www-form-urlencoded",
|
|
430
|
+
Authorization: `Basic ${Buffer.from(`${twilioSid}:${twilioToken}`).toString("base64")}`
|
|
431
|
+
},
|
|
432
|
+
body: params.toString()
|
|
433
|
+
});
|
|
434
|
+
if (!response.ok) {
|
|
435
|
+
throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
|
|
436
|
+
}
|
|
437
|
+
if (this.embeddedServer) {
|
|
438
|
+
try {
|
|
439
|
+
const body = await response.clone().json();
|
|
440
|
+
const callSid = body.sid;
|
|
441
|
+
if (callSid) {
|
|
442
|
+
this.embeddedServer.metricsStore.recordCallInitiated({
|
|
443
|
+
call_id: callSid,
|
|
444
|
+
caller: phoneNumber,
|
|
445
|
+
callee: options.to,
|
|
446
|
+
direction: "outbound"
|
|
447
|
+
});
|
|
448
|
+
}
|
|
449
|
+
} catch {
|
|
617
450
|
}
|
|
618
451
|
}
|
|
619
|
-
await this.connection.requestCall(
|
|
620
|
-
cloudOpts.fromNumber ?? "",
|
|
621
|
-
cloudOpts.to,
|
|
622
|
-
cloudOpts.firstMessage ?? ""
|
|
623
|
-
);
|
|
624
452
|
}
|
|
625
453
|
async disconnect() {
|
|
626
454
|
if (this.tunnelHandle) {
|
|
@@ -631,86 +459,6 @@ var Patter = class {
|
|
|
631
459
|
await this.embeddedServer.stop();
|
|
632
460
|
this.embeddedServer = null;
|
|
633
461
|
}
|
|
634
|
-
await this.connection.disconnect();
|
|
635
|
-
}
|
|
636
|
-
// === Agent Management ===
|
|
637
|
-
async createAgent(opts) {
|
|
638
|
-
const response = await fetch(`${this.restUrl}/api/agents`, {
|
|
639
|
-
method: "POST",
|
|
640
|
-
headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
|
|
641
|
-
body: JSON.stringify({
|
|
642
|
-
name: opts.name,
|
|
643
|
-
system_prompt: opts.systemPrompt,
|
|
644
|
-
model: opts.model ?? "gpt-4o-mini-realtime-preview",
|
|
645
|
-
voice: opts.voice ?? "alloy",
|
|
646
|
-
voice_provider: opts.voiceProvider ?? "openai",
|
|
647
|
-
language: opts.language ?? "en",
|
|
648
|
-
first_message: opts.firstMessage ?? null,
|
|
649
|
-
tools: opts.tools?.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters, webhook_url: t.webhookUrl })) ?? null
|
|
650
|
-
})
|
|
651
|
-
});
|
|
652
|
-
if (response.status !== 201) throw new ProvisionError(`Failed to create agent: ${await response.text()}`);
|
|
653
|
-
const data = await response.json();
|
|
654
|
-
return { id: data.id, name: data.name, systemPrompt: data.system_prompt, model: data.model, voice: data.voice, voiceProvider: data.voice_provider, language: data.language, firstMessage: data.first_message, tools: data.tools };
|
|
655
|
-
}
|
|
656
|
-
async listAgents() {
|
|
657
|
-
const response = await fetch(`${this.restUrl}/api/agents`, { headers: { "X-API-Key": this.apiKey } });
|
|
658
|
-
if (!response.ok) throw new ProvisionError(`Failed to list agents: ${response.status}`);
|
|
659
|
-
const data = await response.json();
|
|
660
|
-
return data.map((a) => ({ id: a.id, name: a.name, systemPrompt: a.system_prompt, model: a.model, voice: a.voice, voiceProvider: a.voice_provider, language: a.language, firstMessage: a.first_message, tools: a.tools }));
|
|
661
|
-
}
|
|
662
|
-
async buyNumber(opts = {}) {
|
|
663
|
-
const response = await fetch(`${this.restUrl}/api/numbers/buy`, {
|
|
664
|
-
method: "POST",
|
|
665
|
-
headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
|
|
666
|
-
body: JSON.stringify({ country: opts.country ?? "US", provider: opts.provider ?? "twilio" })
|
|
667
|
-
});
|
|
668
|
-
if (response.status !== 201) throw new ProvisionError(`Failed to buy number: ${await response.text()}`);
|
|
669
|
-
const data = await response.json();
|
|
670
|
-
return { id: data.id, number: data.number, provider: data.provider, country: data.country, status: data.status, agentId: data.agent_id };
|
|
671
|
-
}
|
|
672
|
-
async assignAgent(numberId, agentId) {
|
|
673
|
-
const response = await fetch(`${this.restUrl}/api/phone-numbers/${numberId}/assign-agent`, {
|
|
674
|
-
method: "POST",
|
|
675
|
-
headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
|
|
676
|
-
body: JSON.stringify({ agent_id: agentId })
|
|
677
|
-
});
|
|
678
|
-
if (response.status !== 200) throw new ProvisionError(`Failed to assign agent: ${await response.text()}`);
|
|
679
|
-
}
|
|
680
|
-
async listCalls(limit = 50) {
|
|
681
|
-
if (!Number.isInteger(limit) || limit < 1 || limit > 1e3) {
|
|
682
|
-
throw new RangeError(`limit must be an integer between 1 and 1000, got ${limit}`);
|
|
683
|
-
}
|
|
684
|
-
const response = await fetch(`${this.restUrl}/api/calls?limit=${limit}`, { headers: { "X-API-Key": this.apiKey } });
|
|
685
|
-
if (!response.ok) throw new ProvisionError(`Failed to list calls: ${response.status}`);
|
|
686
|
-
const data = await response.json();
|
|
687
|
-
return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
|
|
688
|
-
}
|
|
689
|
-
// Internal
|
|
690
|
-
async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
|
|
691
|
-
const credentials = { api_key: providerKey };
|
|
692
|
-
if (providerSecret) credentials.api_secret = providerSecret;
|
|
693
|
-
const response = await fetch(`${this.restUrl}/api/phone-numbers`, {
|
|
694
|
-
method: "POST",
|
|
695
|
-
headers: {
|
|
696
|
-
"Content-Type": "application/json",
|
|
697
|
-
"X-API-Key": this.apiKey
|
|
698
|
-
},
|
|
699
|
-
body: JSON.stringify({
|
|
700
|
-
number,
|
|
701
|
-
provider,
|
|
702
|
-
provider_credentials: credentials,
|
|
703
|
-
country,
|
|
704
|
-
stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
|
|
705
|
-
tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
|
|
706
|
-
})
|
|
707
|
-
});
|
|
708
|
-
if (response.status === 409) return;
|
|
709
|
-
if (response.status !== 201) {
|
|
710
|
-
throw new ProvisionError(
|
|
711
|
-
`Failed to register number: ${await response.text()}`
|
|
712
|
-
);
|
|
713
|
-
}
|
|
714
462
|
}
|
|
715
463
|
};
|
|
716
464
|
|
|
@@ -830,6 +578,46 @@ function elevenlabs(opts) {
|
|
|
830
578
|
function openaiTts(opts) {
|
|
831
579
|
return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
|
|
832
580
|
}
|
|
581
|
+
function soniox(opts) {
|
|
582
|
+
return new STTConfigImpl("soniox", opts.apiKey, opts.language ?? "en");
|
|
583
|
+
}
|
|
584
|
+
function speechmatics(_opts) {
|
|
585
|
+
throw new Error(
|
|
586
|
+
"speechmatics() is Python-only right now \u2014 the TS Speechmatics adapter has not shipped yet. Use the Python SDK (sdk-py) or pick another STT provider such as deepgram() / assemblyai() / soniox()."
|
|
587
|
+
);
|
|
588
|
+
}
|
|
589
|
+
function assemblyai(opts) {
|
|
590
|
+
return new STTConfigImpl("assemblyai", opts.apiKey, opts.language ?? "en");
|
|
591
|
+
}
|
|
592
|
+
function cartesia(opts) {
|
|
593
|
+
return new TTSConfigImpl(
|
|
594
|
+
"cartesia",
|
|
595
|
+
opts.apiKey,
|
|
596
|
+
opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
|
|
597
|
+
);
|
|
598
|
+
}
|
|
599
|
+
function rime(opts) {
|
|
600
|
+
return new TTSConfigImpl("rime", opts.apiKey, opts.voice ?? "astra");
|
|
601
|
+
}
|
|
602
|
+
function lmnt(opts) {
|
|
603
|
+
return new TTSConfigImpl("lmnt", opts.apiKey, opts.voice ?? "leah");
|
|
604
|
+
}
|
|
605
|
+
function ultravox(opts) {
|
|
606
|
+
return {
|
|
607
|
+
provider: "ultravox",
|
|
608
|
+
apiKey: opts.apiKey,
|
|
609
|
+
model: opts.model,
|
|
610
|
+
voice: opts.voice
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
function geminiLive(opts) {
|
|
614
|
+
return {
|
|
615
|
+
provider: "gemini_live",
|
|
616
|
+
apiKey: opts.apiKey,
|
|
617
|
+
model: opts.model,
|
|
618
|
+
voice: opts.voice
|
|
619
|
+
};
|
|
620
|
+
}
|
|
833
621
|
|
|
834
622
|
// src/fallback-provider.ts
|
|
835
623
|
var AllProvidersFailedError = class extends Error {
|
|
@@ -1028,13 +816,275 @@ var FallbackLLMProvider = class {
|
|
|
1028
816
|
}
|
|
1029
817
|
};
|
|
1030
818
|
|
|
819
|
+
// src/integrations/patter-tool.ts
|
|
820
|
+
import { EventEmitter } from "events";
|
|
821
|
+
var PARAMETERS_SCHEMA = {
|
|
822
|
+
type: "object",
|
|
823
|
+
properties: {
|
|
824
|
+
to: {
|
|
825
|
+
type: "string",
|
|
826
|
+
description: 'Destination phone number in E.164 format (e.g. "+15551234567"). Required.'
|
|
827
|
+
},
|
|
828
|
+
goal: {
|
|
829
|
+
type: "string",
|
|
830
|
+
description: "What the agent should accomplish on the call. Becomes the in-call agent's system prompt for this single call."
|
|
831
|
+
},
|
|
832
|
+
first_message: {
|
|
833
|
+
type: "string",
|
|
834
|
+
description: "Optional first message the agent speaks when the callee answers. Defaults to a generic greeting."
|
|
835
|
+
},
|
|
836
|
+
max_duration_sec: {
|
|
837
|
+
type: "integer",
|
|
838
|
+
description: "Hard timeout for the call in seconds. Default 180. The call is force-ended at this deadline whether or not it has resolved.",
|
|
839
|
+
minimum: 5,
|
|
840
|
+
maximum: 1800
|
|
841
|
+
}
|
|
842
|
+
},
|
|
843
|
+
required: ["to"]
|
|
844
|
+
};
|
|
845
|
+
var DEFAULT_NAME = "make_phone_call";
|
|
846
|
+
var DEFAULT_DESCRIPTION = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
|
|
847
|
+
var PatterTool = class _PatterTool {
|
|
848
|
+
name;
|
|
849
|
+
description;
|
|
850
|
+
phone;
|
|
851
|
+
agent;
|
|
852
|
+
maxDurationSec;
|
|
853
|
+
recording;
|
|
854
|
+
started = false;
|
|
855
|
+
/** Resolver for the next `call_initiated` SSE event. Only set inside the
|
|
856
|
+
* dial mutex (`dialQueue`), so two parallel `execute()` calls never share
|
|
857
|
+
* it and never lose a dispatch. */
|
|
858
|
+
pendingDial = null;
|
|
859
|
+
/** Mutex that serializes the dial → call_id capture critical section.
|
|
860
|
+
* Each `execute()` chains a continuation onto this promise so the
|
|
861
|
+
* `pendingDial` slot is owned by exactly one caller at a time. */
|
|
862
|
+
dialQueue = Promise.resolve();
|
|
863
|
+
/** Captured SSE listener so `stop()` can detach it (prevents leaks when
|
|
864
|
+
* the underlying Patter instance outlives this tool). */
|
|
865
|
+
sseListener = null;
|
|
866
|
+
/** Captured Patter metrics store, for cleanup in `stop()`. */
|
|
867
|
+
metricsStoreRef = null;
|
|
868
|
+
/** call_id → pending promise machinery. */
|
|
869
|
+
pending = /* @__PURE__ */ new Map();
|
|
870
|
+
bus = new EventEmitter();
|
|
871
|
+
/** How long to wait for the `call_initiated` SSE before failing the dial. */
|
|
872
|
+
static DIAL_CAPTURE_TIMEOUT_MS = 1e4;
|
|
873
|
+
constructor(opts) {
|
|
874
|
+
if (!opts.phone) {
|
|
875
|
+
throw new Error("PatterTool: `phone` (a Patter instance) is required.");
|
|
876
|
+
}
|
|
877
|
+
this.phone = opts.phone;
|
|
878
|
+
this.agent = opts.agent;
|
|
879
|
+
this.name = opts.name ?? DEFAULT_NAME;
|
|
880
|
+
this.description = opts.description ?? DEFAULT_DESCRIPTION;
|
|
881
|
+
this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
|
|
882
|
+
this.recording = opts.recording ?? false;
|
|
883
|
+
}
|
|
884
|
+
// --- Schema exporters ---------------------------------------------------
|
|
885
|
+
/** OpenAI Chat Completions / Assistants tool spec. */
|
|
886
|
+
openaiSchema() {
|
|
887
|
+
return {
|
|
888
|
+
type: "function",
|
|
889
|
+
function: {
|
|
890
|
+
name: this.name,
|
|
891
|
+
description: this.description,
|
|
892
|
+
parameters: PARAMETERS_SCHEMA
|
|
893
|
+
}
|
|
894
|
+
};
|
|
895
|
+
}
|
|
896
|
+
/** Anthropic Messages API tool spec. */
|
|
897
|
+
anthropicSchema() {
|
|
898
|
+
return {
|
|
899
|
+
name: this.name,
|
|
900
|
+
description: this.description,
|
|
901
|
+
input_schema: PARAMETERS_SCHEMA
|
|
902
|
+
};
|
|
903
|
+
}
|
|
904
|
+
/**
|
|
905
|
+
* Hermes Agent (Nous Research) registry schema. Same JSON-Schema shape as
|
|
906
|
+
* Anthropic's; Hermes consumes it via `registry.register({ schema: ... })`.
|
|
907
|
+
*/
|
|
908
|
+
hermesSchema() {
|
|
909
|
+
return {
|
|
910
|
+
name: this.name,
|
|
911
|
+
description: this.description,
|
|
912
|
+
parameters: PARAMETERS_SCHEMA
|
|
913
|
+
};
|
|
914
|
+
}
|
|
915
|
+
// --- Lifecycle ----------------------------------------------------------
|
|
916
|
+
/** Start the underlying Patter server. Idempotent. */
|
|
917
|
+
async start() {
|
|
918
|
+
if (this.started) return;
|
|
919
|
+
if (!this.agent) {
|
|
920
|
+
throw new Error(
|
|
921
|
+
"PatterTool.start: `agent` config is required. Pass `{ stt, llm, tts }` or an `engine` (e.g. OpenAIRealtime) when constructing PatterTool."
|
|
922
|
+
);
|
|
923
|
+
}
|
|
924
|
+
const builtAgent = this.phone.agent(this.agent);
|
|
925
|
+
await this.phone.serve({
|
|
926
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
927
|
+
agent: builtAgent,
|
|
928
|
+
recording: this.recording,
|
|
929
|
+
onCallEnd: this.onCallEndHandler.bind(this)
|
|
930
|
+
});
|
|
931
|
+
const store = this.phone.metricsStore;
|
|
932
|
+
if (!store) {
|
|
933
|
+
throw new Error(
|
|
934
|
+
"PatterTool.start: phone.metricsStore is null after serve() \u2014 is the dashboard disabled?"
|
|
935
|
+
);
|
|
936
|
+
}
|
|
937
|
+
const listener = (event) => {
|
|
938
|
+
if (event.type === "call_initiated" && this.pendingDial) {
|
|
939
|
+
const callId = event.data.call_id || "";
|
|
940
|
+
if (callId) {
|
|
941
|
+
const dispatch = this.pendingDial;
|
|
942
|
+
this.pendingDial = null;
|
|
943
|
+
dispatch(callId);
|
|
944
|
+
}
|
|
945
|
+
}
|
|
946
|
+
};
|
|
947
|
+
store.on("sse", listener);
|
|
948
|
+
this.sseListener = listener;
|
|
949
|
+
this.metricsStoreRef = store;
|
|
950
|
+
this.started = true;
|
|
951
|
+
}
|
|
952
|
+
/** Stop the underlying Patter server (and reject any pending calls). */
|
|
953
|
+
async stop() {
|
|
954
|
+
if (!this.started) return;
|
|
955
|
+
if (this.metricsStoreRef && this.sseListener) {
|
|
956
|
+
this.metricsStoreRef.off("sse", this.sseListener);
|
|
957
|
+
}
|
|
958
|
+
this.sseListener = null;
|
|
959
|
+
this.metricsStoreRef = null;
|
|
960
|
+
this.pendingDial = null;
|
|
961
|
+
for (const [, p] of this.pending) {
|
|
962
|
+
clearTimeout(p.timer);
|
|
963
|
+
p.reject(new Error("PatterTool: shutdown while call pending"));
|
|
964
|
+
}
|
|
965
|
+
this.pending.clear();
|
|
966
|
+
const stoppable = this.phone;
|
|
967
|
+
if (typeof stoppable.stop === "function") {
|
|
968
|
+
await stoppable.stop();
|
|
969
|
+
}
|
|
970
|
+
this.started = false;
|
|
971
|
+
}
|
|
972
|
+
// --- Execution ----------------------------------------------------------
|
|
973
|
+
async execute(args) {
|
|
974
|
+
if (!this.started) await this.start();
|
|
975
|
+
if (!args || typeof args.to !== "string" || !args.to.startsWith("+")) {
|
|
976
|
+
throw new Error('PatterTool.execute: `to` must be an E.164 phone number (e.g. "+15551234567").');
|
|
977
|
+
}
|
|
978
|
+
const timeoutSec = Math.max(
|
|
979
|
+
5,
|
|
980
|
+
Math.min(1800, args.max_duration_sec ?? this.maxDurationSec)
|
|
981
|
+
);
|
|
982
|
+
const baseAgent = this.agent ?? {};
|
|
983
|
+
const overrideAgent = this.phone.agent({
|
|
984
|
+
...baseAgent,
|
|
985
|
+
...args.goal !== void 0 ? { systemPrompt: args.goal } : {},
|
|
986
|
+
...args.first_message !== void 0 ? { firstMessage: args.first_message } : {}
|
|
987
|
+
});
|
|
988
|
+
const callId = await this.acquireCallId(args.to, overrideAgent);
|
|
989
|
+
return new Promise((resolve, reject) => {
|
|
990
|
+
const timer = setTimeout(() => {
|
|
991
|
+
this.pending.delete(callId);
|
|
992
|
+
reject(new Error(`PatterTool.execute: call ${callId} exceeded ${timeoutSec}s timeout`));
|
|
993
|
+
}, timeoutSec * 1e3);
|
|
994
|
+
this.pending.set(callId, {
|
|
995
|
+
resolve,
|
|
996
|
+
reject,
|
|
997
|
+
timer,
|
|
998
|
+
startedAt: Date.now() / 1e3
|
|
999
|
+
});
|
|
1000
|
+
});
|
|
1001
|
+
}
|
|
1002
|
+
/** Issue the outbound dial under the mutex and return its assigned call_id. */
|
|
1003
|
+
async acquireCallId(to, agent) {
|
|
1004
|
+
let release;
|
|
1005
|
+
const slot = new Promise((r) => {
|
|
1006
|
+
release = r;
|
|
1007
|
+
});
|
|
1008
|
+
const previous = this.dialQueue;
|
|
1009
|
+
this.dialQueue = previous.then(() => slot);
|
|
1010
|
+
await previous;
|
|
1011
|
+
let captureTimer = null;
|
|
1012
|
+
try {
|
|
1013
|
+
const callIdPromise = new Promise((resolve, reject) => {
|
|
1014
|
+
this.pendingDial = resolve;
|
|
1015
|
+
captureTimer = setTimeout(() => {
|
|
1016
|
+
this.pendingDial = null;
|
|
1017
|
+
reject(
|
|
1018
|
+
new Error(
|
|
1019
|
+
`PatterTool.execute: did not observe call_initiated within ${_PatterTool.DIAL_CAPTURE_TIMEOUT_MS}ms`
|
|
1020
|
+
)
|
|
1021
|
+
);
|
|
1022
|
+
}, _PatterTool.DIAL_CAPTURE_TIMEOUT_MS);
|
|
1023
|
+
});
|
|
1024
|
+
await this.phone.call({
|
|
1025
|
+
to,
|
|
1026
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
1027
|
+
agent
|
|
1028
|
+
});
|
|
1029
|
+
const callId = await callIdPromise;
|
|
1030
|
+
if (captureTimer) clearTimeout(captureTimer);
|
|
1031
|
+
return callId;
|
|
1032
|
+
} finally {
|
|
1033
|
+
if (captureTimer) clearTimeout(captureTimer);
|
|
1034
|
+
this.pendingDial = null;
|
|
1035
|
+
release();
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
/**
|
|
1039
|
+
* Hermes-style handler: `(args, kwargs) => Promise<string>` returning a JSON
|
|
1040
|
+
* string with either the result envelope or an `{"error": "..."}` payload.
|
|
1041
|
+
* Mirrors the Python `PatterTool.hermes_handler` so cross-SDK adapters share
|
|
1042
|
+
* the same wire contract.
|
|
1043
|
+
*/
|
|
1044
|
+
hermesHandler() {
|
|
1045
|
+
return async (args) => {
|
|
1046
|
+
try {
|
|
1047
|
+
const result = await this.execute(args);
|
|
1048
|
+
return JSON.stringify(result);
|
|
1049
|
+
} catch (err) {
|
|
1050
|
+
return JSON.stringify({ error: err instanceof Error ? err.message : String(err) });
|
|
1051
|
+
}
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
// --- Internal: onCallEnd dispatcher -------------------------------------
|
|
1055
|
+
async onCallEndHandler(data) {
|
|
1056
|
+
const callId = data.call_id || "";
|
|
1057
|
+
if (!callId) return;
|
|
1058
|
+
const pending = this.pending.get(callId);
|
|
1059
|
+
if (!pending) {
|
|
1060
|
+
this.bus.emit("orphan_end", { call_id: callId, data });
|
|
1061
|
+
return;
|
|
1062
|
+
}
|
|
1063
|
+
clearTimeout(pending.timer);
|
|
1064
|
+
this.pending.delete(callId);
|
|
1065
|
+
const metrics = data.metrics && typeof data.metrics === "object" ? data.metrics : null;
|
|
1066
|
+
const cost = metrics && typeof metrics.cost === "object" && metrics.cost && typeof metrics.cost.total === "number" ? metrics.cost.total : void 0;
|
|
1067
|
+
const duration = typeof metrics?.duration_seconds === "number" ? metrics?.duration_seconds : Math.max(0, Date.now() / 1e3 - pending.startedAt);
|
|
1068
|
+
const transcript = Array.isArray(data.transcript) ? data.transcript : [];
|
|
1069
|
+
const status = data.status || "completed";
|
|
1070
|
+
pending.resolve({
|
|
1071
|
+
call_id: callId,
|
|
1072
|
+
status,
|
|
1073
|
+
duration_seconds: duration,
|
|
1074
|
+
cost_usd: cost,
|
|
1075
|
+
transcript,
|
|
1076
|
+
metrics
|
|
1077
|
+
});
|
|
1078
|
+
}
|
|
1079
|
+
};
|
|
1080
|
+
|
|
1031
1081
|
// src/providers/gemini-live.ts
|
|
1032
1082
|
var GEMINI_DEFAULT_INPUT_SR = 16e3;
|
|
1033
1083
|
var GEMINI_DEFAULT_OUTPUT_SR = 24e3;
|
|
1034
1084
|
var GeminiLiveAdapter = class {
|
|
1035
1085
|
constructor(apiKey, options = {}) {
|
|
1036
1086
|
this.apiKey = apiKey;
|
|
1037
|
-
this.model = options.model ?? "gemini-2.
|
|
1087
|
+
this.model = options.model ?? "gemini-2.5-flash-native-audio-preview-09-2025";
|
|
1038
1088
|
this.voice = options.voice ?? "Puck";
|
|
1039
1089
|
this.instructions = options.instructions ?? "";
|
|
1040
1090
|
this.language = options.language ?? "en-US";
|
|
@@ -1057,18 +1107,27 @@ var GeminiLiveAdapter = class {
|
|
|
1057
1107
|
receiveLoop = null;
|
|
1058
1108
|
handlers = [];
|
|
1059
1109
|
running = false;
|
|
1110
|
+
/**
|
|
1111
|
+
* Tracks call_id -> function name so tool responses can be sent back with
|
|
1112
|
+
* the correct `name` field (Gemini expects the original function name,
|
|
1113
|
+
* not the call_id).
|
|
1114
|
+
*/
|
|
1115
|
+
pendingToolCalls = /* @__PURE__ */ new Map();
|
|
1060
1116
|
async connect() {
|
|
1061
1117
|
let genaiModule;
|
|
1062
1118
|
try {
|
|
1063
1119
|
const modName = "@google/genai";
|
|
1064
1120
|
genaiModule = await import(modName);
|
|
1065
|
-
} catch
|
|
1121
|
+
} catch {
|
|
1066
1122
|
throw new Error(
|
|
1067
|
-
|
|
1123
|
+
'\nGemini Live requires the "@google/genai" package, which is not installed.\n\n Install: npm install @google/genai\n\nThis is an optional peer dependency of getpatter \u2014 it is only needed when\nyou use GeminiLive as an agent engine. Other LLM/engine providers do not\nrequire it.\n'
|
|
1068
1124
|
);
|
|
1069
1125
|
}
|
|
1070
1126
|
const { GoogleGenAI } = genaiModule;
|
|
1071
|
-
this.client = new GoogleGenAI({
|
|
1127
|
+
this.client = new GoogleGenAI({
|
|
1128
|
+
apiKey: this.apiKey,
|
|
1129
|
+
httpOptions: { apiVersion: "v1alpha" }
|
|
1130
|
+
});
|
|
1072
1131
|
const config = {
|
|
1073
1132
|
responseModalities: ["AUDIO"],
|
|
1074
1133
|
speechConfig: {
|
|
@@ -1125,9 +1184,11 @@ var GeminiLiveAdapter = class {
|
|
|
1125
1184
|
async sendFunctionResult(callId, result) {
|
|
1126
1185
|
if (!this.session) return;
|
|
1127
1186
|
const sess = this.session;
|
|
1187
|
+
const name = this.pendingToolCalls.get(callId) ?? callId;
|
|
1188
|
+
this.pendingToolCalls.delete(callId);
|
|
1128
1189
|
await sess.sendToolResponse?.({
|
|
1129
1190
|
functionResponses: [
|
|
1130
|
-
{ id: callId, name
|
|
1191
|
+
{ id: callId, name, response: { result } }
|
|
1131
1192
|
]
|
|
1132
1193
|
});
|
|
1133
1194
|
}
|
|
@@ -1171,9 +1232,14 @@ var GeminiLiveAdapter = class {
|
|
|
1171
1232
|
if (r.toolCall) {
|
|
1172
1233
|
for (const fn of r.toolCall.functionCalls ?? []) {
|
|
1173
1234
|
const args = fn.args ?? {};
|
|
1235
|
+
const callId = fn.id ?? "";
|
|
1236
|
+
const fnName = fn.name ?? "";
|
|
1237
|
+
if (callId && fnName) {
|
|
1238
|
+
this.pendingToolCalls.set(callId, fnName);
|
|
1239
|
+
}
|
|
1174
1240
|
await this.emit("function_call", {
|
|
1175
|
-
call_id:
|
|
1176
|
-
name:
|
|
1241
|
+
call_id: callId,
|
|
1242
|
+
name: fnName,
|
|
1177
1243
|
arguments: typeof args === "string" ? args : JSON.stringify(args)
|
|
1178
1244
|
});
|
|
1179
1245
|
}
|
|
@@ -1200,11 +1266,12 @@ var GeminiLiveAdapter = class {
|
|
|
1200
1266
|
await this.receiveLoop.catch(() => void 0);
|
|
1201
1267
|
this.receiveLoop = null;
|
|
1202
1268
|
}
|
|
1269
|
+
this.pendingToolCalls.clear();
|
|
1203
1270
|
}
|
|
1204
1271
|
};
|
|
1205
1272
|
|
|
1206
1273
|
// src/providers/ultravox-realtime.ts
|
|
1207
|
-
import
|
|
1274
|
+
import WebSocket from "ws";
|
|
1208
1275
|
var ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
|
|
1209
1276
|
var ULTRAVOX_DEFAULT_SR = 16e3;
|
|
1210
1277
|
var UltravoxRealtimeAdapter = class {
|
|
@@ -1241,7 +1308,6 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1241
1308
|
outputSampleRate: this.sampleRate
|
|
1242
1309
|
}
|
|
1243
1310
|
},
|
|
1244
|
-
firstSpeaker: this.firstMessage ? "FIRST_SPEAKER_AGENT" : "FIRST_SPEAKER_USER",
|
|
1245
1311
|
recordingEnabled: false
|
|
1246
1312
|
};
|
|
1247
1313
|
if (this.voice) body.voice = this.voice;
|
|
@@ -1251,6 +1317,8 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1251
1317
|
body.initialMessages = [
|
|
1252
1318
|
{ role: "MESSAGE_ROLE_AGENT", text: this.firstMessage }
|
|
1253
1319
|
];
|
|
1320
|
+
} else {
|
|
1321
|
+
body.firstSpeaker = "FIRST_SPEAKER_USER";
|
|
1254
1322
|
}
|
|
1255
1323
|
if (this.tools?.length) {
|
|
1256
1324
|
body.selectedTools = this.tools.map((t) => ({
|
|
@@ -1275,7 +1343,7 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1275
1343
|
}
|
|
1276
1344
|
const call = await resp.json();
|
|
1277
1345
|
if (!call.joinUrl) throw new Error("Ultravox response missing joinUrl");
|
|
1278
|
-
this.ws = new
|
|
1346
|
+
this.ws = new WebSocket(call.joinUrl);
|
|
1279
1347
|
await new Promise((resolve, reject) => {
|
|
1280
1348
|
const ws = this.ws;
|
|
1281
1349
|
const onOpen = () => {
|
|
@@ -1300,14 +1368,16 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1300
1368
|
});
|
|
1301
1369
|
}
|
|
1302
1370
|
sendAudio(pcm) {
|
|
1303
|
-
if (!this.ws || this.ws.readyState !==
|
|
1371
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1304
1372
|
this.ws.send(pcm, { binary: true });
|
|
1305
1373
|
}
|
|
1306
1374
|
async sendText(text) {
|
|
1307
|
-
this.ws
|
|
1375
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1376
|
+
this.ws.send(JSON.stringify({ type: "input_text_message", text }));
|
|
1308
1377
|
}
|
|
1309
1378
|
async sendFunctionResult(callId, result) {
|
|
1310
|
-
this.ws
|
|
1379
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1380
|
+
this.ws.send(
|
|
1311
1381
|
JSON.stringify({
|
|
1312
1382
|
type: "client_tool_result",
|
|
1313
1383
|
invocationId: callId,
|
|
@@ -1317,7 +1387,8 @@ var UltravoxRealtimeAdapter = class {
|
|
|
1317
1387
|
);
|
|
1318
1388
|
}
|
|
1319
1389
|
cancelResponse() {
|
|
1320
|
-
this.ws
|
|
1390
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1391
|
+
this.ws.send(JSON.stringify({ type: "playback_clear_buffer" }));
|
|
1321
1392
|
}
|
|
1322
1393
|
onEvent(handler) {
|
|
1323
1394
|
this.handlers.push(handler);
|
|
@@ -1398,7 +1469,7 @@ async function loadCron() {
|
|
|
1398
1469
|
try {
|
|
1399
1470
|
const imported = await import(
|
|
1400
1471
|
/* @vite-ignore */
|
|
1401
|
-
"./node-cron-
|
|
1472
|
+
"./node-cron-6PRPSBG5.mjs"
|
|
1402
1473
|
);
|
|
1403
1474
|
cronModule = imported && imported.default ? imported.default : imported;
|
|
1404
1475
|
return cronModule;
|
|
@@ -1506,6 +1577,7 @@ function scheduleInterval(intervalOrOpts, callback) {
|
|
|
1506
1577
|
|
|
1507
1578
|
// src/stt/deepgram.ts
|
|
1508
1579
|
var STT = class extends DeepgramSTT {
|
|
1580
|
+
static providerKey = "deepgram";
|
|
1509
1581
|
constructor(opts = {}) {
|
|
1510
1582
|
const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
|
|
1511
1583
|
if (!key) {
|
|
@@ -1533,6 +1605,7 @@ var STT = class extends DeepgramSTT {
|
|
|
1533
1605
|
// src/providers/whisper-stt.ts
|
|
1534
1606
|
var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
|
|
1535
1607
|
var DEFAULT_BUFFER_SIZE = 16e3 * 2;
|
|
1608
|
+
var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
|
|
1536
1609
|
function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
|
|
1537
1610
|
const dataSize = pcm.length;
|
|
1538
1611
|
const header = Buffer.alloc(44);
|
|
@@ -1556,33 +1629,63 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1556
1629
|
model;
|
|
1557
1630
|
language;
|
|
1558
1631
|
bufferSize;
|
|
1559
|
-
|
|
1560
|
-
|
|
1632
|
+
responseFormat;
|
|
1633
|
+
// Accumulate chunks in an array and concat once on flush — avoids the
|
|
1634
|
+
// per-``sendAudio`` O(n) ``Buffer.concat([buffer, chunk])`` that quickly
|
|
1635
|
+
// dominates CPU when the phone leg delivers 20 ms frames.
|
|
1636
|
+
chunks = [];
|
|
1637
|
+
bufferedBytes = 0;
|
|
1638
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
1561
1639
|
running = false;
|
|
1562
1640
|
pendingTranscriptions = [];
|
|
1563
|
-
|
|
1641
|
+
/**
|
|
1642
|
+
* @param apiKey OpenAI API key.
|
|
1643
|
+
* @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
|
|
1644
|
+
* @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
|
|
1645
|
+
* @param bufferSize Bytes of PCM16 to buffer before each transcription request.
|
|
1646
|
+
* @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
|
|
1647
|
+
*
|
|
1648
|
+
* Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
|
|
1649
|
+
* for cross-language parity. Pre-0.5.3 the TS positional order was
|
|
1650
|
+
* ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
|
|
1651
|
+
* the old order will need to swap ``language`` and ``model``.
|
|
1652
|
+
*/
|
|
1653
|
+
constructor(apiKey, language, model = "whisper-1", bufferSize = DEFAULT_BUFFER_SIZE, responseFormat = "json") {
|
|
1654
|
+
if (!ALLOWED_MODELS.has(model)) {
|
|
1655
|
+
throw new Error(
|
|
1656
|
+
`WhisperSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS].join(", ")}.`
|
|
1657
|
+
);
|
|
1658
|
+
}
|
|
1564
1659
|
this.apiKey = apiKey;
|
|
1565
1660
|
this.model = model;
|
|
1566
1661
|
this.language = language;
|
|
1567
1662
|
this.bufferSize = bufferSize;
|
|
1663
|
+
this.responseFormat = responseFormat;
|
|
1568
1664
|
}
|
|
1569
1665
|
/** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
|
|
1570
1666
|
static forTwilio(apiKey, language = "en", model = "whisper-1") {
|
|
1571
|
-
return new _WhisperSTT(apiKey,
|
|
1667
|
+
return new _WhisperSTT(apiKey, language, model);
|
|
1572
1668
|
}
|
|
1573
1669
|
async connect() {
|
|
1574
1670
|
this.running = true;
|
|
1575
|
-
this.
|
|
1671
|
+
this.chunks = [];
|
|
1672
|
+
this.bufferedBytes = 0;
|
|
1576
1673
|
}
|
|
1577
1674
|
sendAudio(audio) {
|
|
1578
1675
|
if (!this.running) return;
|
|
1579
|
-
this.
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1676
|
+
this.chunks.push(audio);
|
|
1677
|
+
this.bufferedBytes += audio.length;
|
|
1678
|
+
if (this.bufferedBytes >= this.bufferSize) {
|
|
1679
|
+
const pcm = this.flushChunks();
|
|
1583
1680
|
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1584
1681
|
}
|
|
1585
1682
|
}
|
|
1683
|
+
flushChunks() {
|
|
1684
|
+
const pcm = this.chunks.length === 1 ? this.chunks[0] : Buffer.concat(this.chunks, this.bufferedBytes);
|
|
1685
|
+
this.chunks = [];
|
|
1686
|
+
this.bufferedBytes = 0;
|
|
1687
|
+
return pcm;
|
|
1688
|
+
}
|
|
1586
1689
|
trackTranscription(promise) {
|
|
1587
1690
|
const wrapped = promise.finally(() => {
|
|
1588
1691
|
const idx = this.pendingTranscriptions.indexOf(wrapped);
|
|
@@ -1590,25 +1693,25 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1590
1693
|
});
|
|
1591
1694
|
this.pendingTranscriptions.push(wrapped);
|
|
1592
1695
|
}
|
|
1696
|
+
/**
|
|
1697
|
+
* Register a transcript listener. Unlike the previous implementation
|
|
1698
|
+
* which capped at 10 and silently replaced the last one, we now keep all
|
|
1699
|
+
* registered callbacks in a Set; use {@link offTranscript} to remove one.
|
|
1700
|
+
*/
|
|
1593
1701
|
onTranscript(callback) {
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
}
|
|
1599
|
-
this.callbacks.push(callback);
|
|
1702
|
+
this.callbacks.add(callback);
|
|
1703
|
+
}
|
|
1704
|
+
offTranscript(callback) {
|
|
1705
|
+
this.callbacks.delete(callback);
|
|
1600
1706
|
}
|
|
1601
1707
|
async close() {
|
|
1602
1708
|
this.running = false;
|
|
1603
|
-
if (this.
|
|
1604
|
-
const pcm = this.
|
|
1605
|
-
this.buffer = Buffer.alloc(0);
|
|
1709
|
+
if (this.bufferedBytes > 0) {
|
|
1710
|
+
const pcm = this.flushChunks();
|
|
1606
1711
|
this.trackTranscription(this.transcribeBuffer(pcm));
|
|
1607
|
-
} else {
|
|
1608
|
-
this.buffer = Buffer.alloc(0);
|
|
1609
1712
|
}
|
|
1610
1713
|
await Promise.allSettled(this.pendingTranscriptions);
|
|
1611
|
-
this.callbacks
|
|
1714
|
+
this.callbacks.clear();
|
|
1612
1715
|
}
|
|
1613
1716
|
// ------------------------------------------------------------------
|
|
1614
1717
|
// Private
|
|
@@ -1618,6 +1721,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1618
1721
|
const formData = new FormData();
|
|
1619
1722
|
formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
|
|
1620
1723
|
formData.append("model", this.model);
|
|
1724
|
+
formData.append("response_format", this.responseFormat);
|
|
1621
1725
|
if (this.language) {
|
|
1622
1726
|
formData.append("language", this.language);
|
|
1623
1727
|
}
|
|
@@ -1639,7 +1743,7 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1639
1743
|
const transcript = {
|
|
1640
1744
|
text,
|
|
1641
1745
|
isFinal: true,
|
|
1642
|
-
confidence:
|
|
1746
|
+
confidence: extractConfidence(json)
|
|
1643
1747
|
};
|
|
1644
1748
|
for (const cb of this.callbacks) {
|
|
1645
1749
|
cb(transcript);
|
|
@@ -1649,9 +1753,23 @@ var WhisperSTT = class _WhisperSTT {
|
|
|
1649
1753
|
}
|
|
1650
1754
|
}
|
|
1651
1755
|
};
|
|
1756
|
+
function extractConfidence(payload) {
|
|
1757
|
+
const segments = payload.segments;
|
|
1758
|
+
if (!segments || segments.length === 0) return 1;
|
|
1759
|
+
const scores = [];
|
|
1760
|
+
for (const seg of segments) {
|
|
1761
|
+
const logp = seg.avg_logprob;
|
|
1762
|
+
if (typeof logp === "number") {
|
|
1763
|
+
scores.push(Math.max(0, Math.min(1, Math.exp(logp))));
|
|
1764
|
+
}
|
|
1765
|
+
}
|
|
1766
|
+
if (scores.length === 0) return 1;
|
|
1767
|
+
return scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
1768
|
+
}
|
|
1652
1769
|
|
|
1653
1770
|
// src/stt/whisper.ts
|
|
1654
1771
|
var STT2 = class extends WhisperSTT {
|
|
1772
|
+
static providerKey = "whisper";
|
|
1655
1773
|
constructor(opts = {}) {
|
|
1656
1774
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1657
1775
|
if (!key) {
|
|
@@ -1659,18 +1777,53 @@ var STT2 = class extends WhisperSTT {
|
|
|
1659
1777
|
"Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
1660
1778
|
);
|
|
1661
1779
|
}
|
|
1662
|
-
super(key, opts.model ?? "whisper-1", opts.
|
|
1780
|
+
super(key, opts.language, opts.model ?? "whisper-1", opts.bufferSize, opts.responseFormat ?? "json");
|
|
1781
|
+
}
|
|
1782
|
+
};
|
|
1783
|
+
|
|
1784
|
+
// src/providers/openai-transcribe-stt.ts
|
|
1785
|
+
var ALLOWED_MODELS2 = /* @__PURE__ */ new Set(["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
|
|
1786
|
+
var DEFAULT_BUFFER_SIZE2 = 16e3 * 2;
|
|
1787
|
+
var OpenAITranscribeSTT = class extends WhisperSTT {
|
|
1788
|
+
/**
|
|
1789
|
+
* @param apiKey OpenAI API key.
|
|
1790
|
+
* @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
|
|
1791
|
+
* @param model One of ``gpt-4o-transcribe`` (default), ``gpt-4o-mini-transcribe``.
|
|
1792
|
+
* ``"whisper-1"`` is intentionally rejected here — use ``WhisperSTT`` for that.
|
|
1793
|
+
* @param bufferSize Bytes of PCM16 to buffer before each transcription request.
|
|
1794
|
+
* @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
|
|
1795
|
+
*/
|
|
1796
|
+
constructor(apiKey, language, model = "gpt-4o-transcribe", bufferSize = DEFAULT_BUFFER_SIZE2, responseFormat = "json") {
|
|
1797
|
+
if (!ALLOWED_MODELS2.has(model)) {
|
|
1798
|
+
throw new Error(
|
|
1799
|
+
`OpenAITranscribeSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS2].join(", ")}. For "whisper-1", use WhisperSTT instead.`
|
|
1800
|
+
);
|
|
1801
|
+
}
|
|
1802
|
+
super(apiKey, language, model, bufferSize, responseFormat);
|
|
1803
|
+
}
|
|
1804
|
+
};
|
|
1805
|
+
|
|
1806
|
+
// src/stt/openai-transcribe.ts
|
|
1807
|
+
var STT3 = class extends OpenAITranscribeSTT {
|
|
1808
|
+
static providerKey = "openai_transcribe";
|
|
1809
|
+
constructor(opts = {}) {
|
|
1810
|
+
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
1811
|
+
if (!key) {
|
|
1812
|
+
throw new Error(
|
|
1813
|
+
"OpenAI Transcribe STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
1814
|
+
);
|
|
1815
|
+
}
|
|
1816
|
+
super(key, opts.language, opts.model ?? "gpt-4o-transcribe", opts.bufferSize, opts.responseFormat ?? "json");
|
|
1663
1817
|
}
|
|
1664
1818
|
};
|
|
1665
1819
|
|
|
1666
1820
|
// src/providers/cartesia-stt.ts
|
|
1667
|
-
import
|
|
1821
|
+
import WebSocket2 from "ws";
|
|
1668
1822
|
var DEFAULT_BASE_URL = "https://api.cartesia.ai";
|
|
1669
1823
|
var API_VERSION = "2025-04-16";
|
|
1670
1824
|
var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
|
|
1671
1825
|
var KEEPALIVE_INTERVAL_MS = 3e4;
|
|
1672
1826
|
var CONNECT_TIMEOUT_MS = 1e4;
|
|
1673
|
-
var MAX_CALLBACKS = 10;
|
|
1674
1827
|
var CartesiaSTT = class {
|
|
1675
1828
|
constructor(apiKey, options = {}) {
|
|
1676
1829
|
this.apiKey = apiKey;
|
|
@@ -1680,10 +1833,13 @@ var CartesiaSTT = class {
|
|
|
1680
1833
|
}
|
|
1681
1834
|
}
|
|
1682
1835
|
ws = null;
|
|
1683
|
-
callbacks =
|
|
1836
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
1684
1837
|
keepaliveTimer = null;
|
|
1685
|
-
/**
|
|
1686
|
-
|
|
1838
|
+
/**
|
|
1839
|
+
* Cartesia request id — set from the server transcript events.
|
|
1840
|
+
* `null` until the first transcript event arrives (matches Python's `None`).
|
|
1841
|
+
*/
|
|
1842
|
+
requestId = null;
|
|
1687
1843
|
buildWsUrl() {
|
|
1688
1844
|
const opts = this.options;
|
|
1689
1845
|
const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
|
|
@@ -1710,7 +1866,7 @@ var CartesiaSTT = class {
|
|
|
1710
1866
|
}
|
|
1711
1867
|
async connect() {
|
|
1712
1868
|
const url = this.buildWsUrl();
|
|
1713
|
-
this.ws = new
|
|
1869
|
+
this.ws = new WebSocket2(url, {
|
|
1714
1870
|
headers: { "User-Agent": USER_AGENT }
|
|
1715
1871
|
});
|
|
1716
1872
|
await new Promise((resolve, reject) => {
|
|
@@ -1737,7 +1893,7 @@ var CartesiaSTT = class {
|
|
|
1737
1893
|
this.handleEvent(event);
|
|
1738
1894
|
});
|
|
1739
1895
|
this.keepaliveTimer = setInterval(() => {
|
|
1740
|
-
if (this.ws && this.ws.readyState ===
|
|
1896
|
+
if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
|
|
1741
1897
|
try {
|
|
1742
1898
|
this.ws.ping();
|
|
1743
1899
|
} catch {
|
|
@@ -1770,19 +1926,24 @@ var CartesiaSTT = class {
|
|
|
1770
1926
|
}
|
|
1771
1927
|
}
|
|
1772
1928
|
sendAudio(audio) {
|
|
1773
|
-
if (!this.ws || this.ws.readyState !==
|
|
1929
|
+
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
|
|
1774
1930
|
this.ws.send(audio);
|
|
1775
1931
|
}
|
|
1776
1932
|
onTranscript(callback) {
|
|
1777
|
-
|
|
1778
|
-
getLogger().warn(
|
|
1779
|
-
"CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
|
|
1780
|
-
);
|
|
1781
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
1782
|
-
return;
|
|
1783
|
-
}
|
|
1784
|
-
this.callbacks.push(callback);
|
|
1933
|
+
this.callbacks.add(callback);
|
|
1785
1934
|
}
|
|
1935
|
+
/** Remove a previously registered transcript callback. */
|
|
1936
|
+
offTranscript(callback) {
|
|
1937
|
+
this.callbacks.delete(callback);
|
|
1938
|
+
}
|
|
1939
|
+
/**
|
|
1940
|
+
* Synchronous best-effort close. Sends `finalize` and closes the socket
|
|
1941
|
+
* without waiting for the server to flush any remaining transcripts.
|
|
1942
|
+
*
|
|
1943
|
+
* Limitation: any transcript events produced between the `finalize` send
|
|
1944
|
+
* and the socket close may be dropped. Callers that need to guarantee all
|
|
1945
|
+
* transcripts are delivered should await :meth:`closeAsync` instead.
|
|
1946
|
+
*/
|
|
1786
1947
|
close() {
|
|
1787
1948
|
if (this.keepaliveTimer) {
|
|
1788
1949
|
clearInterval(this.keepaliveTimer);
|
|
@@ -1797,10 +1958,53 @@ var CartesiaSTT = class {
|
|
|
1797
1958
|
this.ws = null;
|
|
1798
1959
|
}
|
|
1799
1960
|
}
|
|
1961
|
+
/**
|
|
1962
|
+
* Graceful close that awaits the `finalize` send and the socket closing
|
|
1963
|
+
* handshake, matching the Python adapter's behavior. Use this when you
|
|
1964
|
+
* need any in-flight transcripts to be flushed before teardown.
|
|
1965
|
+
*/
|
|
1966
|
+
async closeAsync() {
|
|
1967
|
+
if (this.keepaliveTimer) {
|
|
1968
|
+
clearInterval(this.keepaliveTimer);
|
|
1969
|
+
this.keepaliveTimer = null;
|
|
1970
|
+
}
|
|
1971
|
+
const ws = this.ws;
|
|
1972
|
+
this.ws = null;
|
|
1973
|
+
if (!ws) return;
|
|
1974
|
+
if (ws.readyState === WebSocket2.OPEN) {
|
|
1975
|
+
try {
|
|
1976
|
+
await new Promise((resolve) => {
|
|
1977
|
+
ws.send("finalize", (err) => {
|
|
1978
|
+
if (err) getLogger().warn(`CartesiaSTT finalize send failed: ${String(err)}`);
|
|
1979
|
+
resolve();
|
|
1980
|
+
});
|
|
1981
|
+
});
|
|
1982
|
+
} catch (err) {
|
|
1983
|
+
getLogger().warn(`CartesiaSTT finalize error: ${String(err)}`);
|
|
1984
|
+
}
|
|
1985
|
+
}
|
|
1986
|
+
if (ws.readyState === WebSocket2.OPEN || ws.readyState === WebSocket2.CONNECTING) {
|
|
1987
|
+
await new Promise((resolve) => {
|
|
1988
|
+
const done = () => {
|
|
1989
|
+
ws.off("close", done);
|
|
1990
|
+
ws.off("error", done);
|
|
1991
|
+
resolve();
|
|
1992
|
+
};
|
|
1993
|
+
ws.once("close", done);
|
|
1994
|
+
ws.once("error", done);
|
|
1995
|
+
try {
|
|
1996
|
+
ws.close();
|
|
1997
|
+
} catch {
|
|
1998
|
+
resolve();
|
|
1999
|
+
}
|
|
2000
|
+
});
|
|
2001
|
+
}
|
|
2002
|
+
}
|
|
1800
2003
|
};
|
|
1801
2004
|
|
|
1802
2005
|
// src/stt/cartesia.ts
|
|
1803
|
-
var
|
|
2006
|
+
var STT4 = class extends CartesiaSTT {
|
|
2007
|
+
static providerKey = "cartesia_stt";
|
|
1804
2008
|
constructor(opts = {}) {
|
|
1805
2009
|
const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
|
|
1806
2010
|
if (!key) {
|
|
@@ -1819,7 +2023,7 @@ var STT3 = class extends CartesiaSTT {
|
|
|
1819
2023
|
};
|
|
1820
2024
|
|
|
1821
2025
|
// src/providers/soniox-stt.ts
|
|
1822
|
-
import
|
|
2026
|
+
import WebSocket3 from "ws";
|
|
1823
2027
|
var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
|
|
1824
2028
|
var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
|
|
1825
2029
|
var END_TOKEN = "<end>";
|
|
@@ -1915,7 +2119,8 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1915
2119
|
return config;
|
|
1916
2120
|
}
|
|
1917
2121
|
async connect() {
|
|
1918
|
-
this.
|
|
2122
|
+
this.final.reset();
|
|
2123
|
+
this.ws = new WebSocket3(this.baseUrl);
|
|
1919
2124
|
await new Promise((resolve, reject) => {
|
|
1920
2125
|
const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
|
|
1921
2126
|
this.ws.once("open", () => {
|
|
@@ -1934,7 +2139,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
1934
2139
|
getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
|
|
1935
2140
|
});
|
|
1936
2141
|
this.keepaliveTimer = setInterval(() => {
|
|
1937
|
-
if (this.ws && this.ws.readyState ===
|
|
2142
|
+
if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
|
|
1938
2143
|
try {
|
|
1939
2144
|
this.ws.send(KEEPALIVE_MESSAGE);
|
|
1940
2145
|
} catch {
|
|
@@ -2007,7 +2212,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2007
2212
|
}
|
|
2008
2213
|
}
|
|
2009
2214
|
sendAudio(audio) {
|
|
2010
|
-
if (!this.ws || this.ws.readyState !==
|
|
2215
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
2011
2216
|
if (audio.length === 0) return;
|
|
2012
2217
|
this.ws.send(audio);
|
|
2013
2218
|
}
|
|
@@ -2038,7 +2243,8 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
2038
2243
|
};
|
|
2039
2244
|
|
|
2040
2245
|
// src/stt/soniox.ts
|
|
2041
|
-
var
|
|
2246
|
+
var STT5 = class extends SonioxSTT {
|
|
2247
|
+
static providerKey = "soniox";
|
|
2042
2248
|
constructor(opts = {}) {
|
|
2043
2249
|
const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
|
|
2044
2250
|
if (!key) {
|
|
@@ -2053,11 +2259,21 @@ var STT4 = class extends SonioxSTT {
|
|
|
2053
2259
|
};
|
|
2054
2260
|
|
|
2055
2261
|
// src/providers/assemblyai-stt.ts
|
|
2056
|
-
import
|
|
2262
|
+
import WebSocket4 from "ws";
|
|
2057
2263
|
var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
|
|
2058
|
-
var DEFAULT_MIN_TURN_SILENCE_MS =
|
|
2264
|
+
var DEFAULT_MIN_TURN_SILENCE_MS = 400;
|
|
2059
2265
|
var CONNECT_TIMEOUT_MS2 = 1e4;
|
|
2060
|
-
var
|
|
2266
|
+
var TERMINATION_WAIT_TIMEOUT_MS = 500;
|
|
2267
|
+
var MIN_CHUNK_DURATION_MS = 50;
|
|
2268
|
+
var MAX_CHUNK_DURATION_MS = 1e3;
|
|
2269
|
+
var RECONNECT_ERROR_CODES = /* @__PURE__ */ new Set([3005, 3008]);
|
|
2270
|
+
var VALID_DOMAINS = /* @__PURE__ */ new Set(["general", "medical-v1"]);
|
|
2271
|
+
var AssemblyAISTTNotConnectedError = class extends Error {
|
|
2272
|
+
constructor(message = "AssemblyAISTT is not connected") {
|
|
2273
|
+
super(message);
|
|
2274
|
+
this.name = "AssemblyAISTTNotConnectedError";
|
|
2275
|
+
}
|
|
2276
|
+
};
|
|
2061
2277
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
2062
2278
|
constructor(apiKey, options = {}) {
|
|
2063
2279
|
this.apiKey = apiKey;
|
|
@@ -2065,13 +2281,24 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2065
2281
|
if (!apiKey) {
|
|
2066
2282
|
throw new Error("AssemblyAISTT requires a non-empty apiKey");
|
|
2067
2283
|
}
|
|
2284
|
+
if (options.domain !== void 0 && !VALID_DOMAINS.has(options.domain)) {
|
|
2285
|
+
const hint = options.domain === "medical" ? ' \u2014 did you mean "medical-v1"?' : "";
|
|
2286
|
+
throw new Error(
|
|
2287
|
+
`AssemblyAISTT: invalid domain "${options.domain}"; expected one of [${Array.from(
|
|
2288
|
+
VALID_DOMAINS
|
|
2289
|
+
).map((d) => `"${d}"`).join(", ")}]${hint}`
|
|
2290
|
+
);
|
|
2291
|
+
}
|
|
2068
2292
|
}
|
|
2069
2293
|
ws = null;
|
|
2070
|
-
callbacks =
|
|
2294
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
2295
|
+
closing = false;
|
|
2296
|
+
reconnectAttempts = 0;
|
|
2297
|
+
terminationResolve = null;
|
|
2071
2298
|
/** AssemblyAI session id — set when the `Begin` message arrives. */
|
|
2072
|
-
sessionId =
|
|
2299
|
+
sessionId = null;
|
|
2073
2300
|
/** Unix timestamp when the AssemblyAI session expires. */
|
|
2074
|
-
expiresAt =
|
|
2301
|
+
expiresAt = null;
|
|
2075
2302
|
/** Factory for Twilio calls — mulaw 8 kHz. */
|
|
2076
2303
|
static forTwilio(apiKey, model = "universal-streaming-english") {
|
|
2077
2304
|
return new _AssemblyAISTT(apiKey, {
|
|
@@ -2106,11 +2333,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2106
2333
|
keyterms_prompt: opts.keytermsPrompt ? JSON.stringify(opts.keytermsPrompt) : void 0,
|
|
2107
2334
|
language_detection: languageDetection,
|
|
2108
2335
|
prompt: opts.prompt,
|
|
2109
|
-
vad_threshold
|
|
2336
|
+
// vad_threshold intentionally omitted — not a valid v3 parameter.
|
|
2110
2337
|
speaker_labels: opts.speakerLabels,
|
|
2111
2338
|
max_speakers: opts.maxSpeakers,
|
|
2112
2339
|
domain: opts.domain
|
|
2113
2340
|
};
|
|
2341
|
+
if (opts.useQueryToken) {
|
|
2342
|
+
raw.token = this.apiKey;
|
|
2343
|
+
}
|
|
2114
2344
|
const params = new URLSearchParams();
|
|
2115
2345
|
for (const [key, value] of Object.entries(raw)) {
|
|
2116
2346
|
if (value === void 0 || value === null) continue;
|
|
@@ -2123,30 +2353,41 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2123
2353
|
const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
|
|
2124
2354
|
return `${base}/v3/ws?${params.toString()}`;
|
|
2125
2355
|
}
|
|
2356
|
+
buildHeaders() {
|
|
2357
|
+
const headers = {
|
|
2358
|
+
"Content-Type": "application/json",
|
|
2359
|
+
"User-Agent": "Patter/1.0"
|
|
2360
|
+
};
|
|
2361
|
+
if (!this.options.useQueryToken) {
|
|
2362
|
+
headers.Authorization = this.apiKey;
|
|
2363
|
+
}
|
|
2364
|
+
return headers;
|
|
2365
|
+
}
|
|
2126
2366
|
async connect() {
|
|
2367
|
+
this.closing = false;
|
|
2127
2368
|
const url = this.buildUrl();
|
|
2128
|
-
this.ws = new
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
|
|
2133
|
-
}
|
|
2134
|
-
});
|
|
2369
|
+
this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
|
|
2370
|
+
await this.awaitOpen(this.ws);
|
|
2371
|
+
this.attachHandlers(this.ws);
|
|
2372
|
+
}
|
|
2373
|
+
async awaitOpen(ws) {
|
|
2135
2374
|
await new Promise((resolve, reject) => {
|
|
2136
2375
|
const timer = setTimeout(
|
|
2137
2376
|
() => reject(new Error("AssemblyAI connect timeout")),
|
|
2138
2377
|
CONNECT_TIMEOUT_MS2
|
|
2139
2378
|
);
|
|
2140
|
-
|
|
2379
|
+
ws.once("open", () => {
|
|
2141
2380
|
clearTimeout(timer);
|
|
2142
2381
|
resolve();
|
|
2143
2382
|
});
|
|
2144
|
-
|
|
2383
|
+
ws.once("error", (err) => {
|
|
2145
2384
|
clearTimeout(timer);
|
|
2146
2385
|
reject(err);
|
|
2147
2386
|
});
|
|
2148
2387
|
});
|
|
2149
|
-
|
|
2388
|
+
}
|
|
2389
|
+
attachHandlers(ws) {
|
|
2390
|
+
ws.on("message", (raw) => {
|
|
2150
2391
|
let event;
|
|
2151
2392
|
try {
|
|
2152
2393
|
event = JSON.parse(raw.toString());
|
|
@@ -2155,12 +2396,45 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2155
2396
|
}
|
|
2156
2397
|
this.handleEvent(event);
|
|
2157
2398
|
});
|
|
2399
|
+
ws.on("close", (code) => {
|
|
2400
|
+
if (!this.closing && RECONNECT_ERROR_CODES.has(code) && this.reconnectAttempts < 1) {
|
|
2401
|
+
this.reconnectAttempts += 1;
|
|
2402
|
+
getLogger().warn(
|
|
2403
|
+
`AssemblyAISTT: close code ${code} \u2014 attempting single reconnect.`
|
|
2404
|
+
);
|
|
2405
|
+
this.reconnect().catch((err) => {
|
|
2406
|
+
getLogger().error("AssemblyAISTT reconnect failed", err);
|
|
2407
|
+
});
|
|
2408
|
+
}
|
|
2409
|
+
});
|
|
2410
|
+
}
|
|
2411
|
+
async reconnect() {
|
|
2412
|
+
const url = this.buildUrl();
|
|
2413
|
+
this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
|
|
2414
|
+
await this.awaitOpen(this.ws);
|
|
2415
|
+
this.attachHandlers(this.ws);
|
|
2158
2416
|
}
|
|
2159
2417
|
handleEvent(event) {
|
|
2160
2418
|
const type = event.type;
|
|
2161
2419
|
if (type === "Begin") {
|
|
2162
|
-
this.sessionId = event.id ??
|
|
2163
|
-
this.expiresAt = event.expires_at ??
|
|
2420
|
+
this.sessionId = event.id ?? null;
|
|
2421
|
+
this.expiresAt = event.expires_at ?? null;
|
|
2422
|
+
return;
|
|
2423
|
+
}
|
|
2424
|
+
if (type === "Termination") {
|
|
2425
|
+
if (this.terminationResolve) {
|
|
2426
|
+
this.terminationResolve();
|
|
2427
|
+
this.terminationResolve = null;
|
|
2428
|
+
}
|
|
2429
|
+
return;
|
|
2430
|
+
}
|
|
2431
|
+
if (type === "SpeechStarted") {
|
|
2432
|
+
this.emit({
|
|
2433
|
+
text: "",
|
|
2434
|
+
isFinal: false,
|
|
2435
|
+
confidence: 0,
|
|
2436
|
+
eventType: "SpeechStarted"
|
|
2437
|
+
});
|
|
2164
2438
|
return;
|
|
2165
2439
|
}
|
|
2166
2440
|
if (type !== "Turn") {
|
|
@@ -2195,28 +2469,89 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
2195
2469
|
}
|
|
2196
2470
|
}
|
|
2197
2471
|
sendAudio(audio) {
|
|
2198
|
-
if (!this.ws || this.ws.readyState !==
|
|
2472
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
|
|
2473
|
+
throw new AssemblyAISTTNotConnectedError(
|
|
2474
|
+
"AssemblyAISTT.sendAudio: WebSocket is not open"
|
|
2475
|
+
);
|
|
2476
|
+
}
|
|
2477
|
+
const durationMs = this.estimateChunkDurationMs(audio.length);
|
|
2478
|
+
if (durationMs !== null && (durationMs < MIN_CHUNK_DURATION_MS || durationMs > MAX_CHUNK_DURATION_MS)) {
|
|
2479
|
+
getLogger().warn(
|
|
2480
|
+
`AssemblyAISTT: audio chunk duration ${durationMs.toFixed(1)}ms outside 50-1000ms bounds (may trigger error 3007).`
|
|
2481
|
+
);
|
|
2482
|
+
}
|
|
2199
2483
|
this.ws.send(audio);
|
|
2200
2484
|
}
|
|
2201
|
-
|
|
2202
|
-
if (
|
|
2203
|
-
|
|
2204
|
-
|
|
2485
|
+
estimateChunkDurationMs(byteLength) {
|
|
2486
|
+
if (byteLength <= 0) return null;
|
|
2487
|
+
const sampleRate = this.options.sampleRate ?? 16e3;
|
|
2488
|
+
if (sampleRate <= 0) return null;
|
|
2489
|
+
const bytesPerSample = (this.options.encoding ?? "pcm_s16le") === "pcm_s16le" ? 2 : 1;
|
|
2490
|
+
const samples = byteLength / bytesPerSample;
|
|
2491
|
+
return samples / sampleRate * 1e3;
|
|
2492
|
+
}
|
|
2493
|
+
/**
|
|
2494
|
+
* Send an `UpdateConfiguration` frame to change settings mid-stream.
|
|
2495
|
+
* Only defined fields are included.
|
|
2496
|
+
*/
|
|
2497
|
+
updateConfiguration(params) {
|
|
2498
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
|
|
2499
|
+
throw new AssemblyAISTTNotConnectedError(
|
|
2500
|
+
"AssemblyAISTT.updateConfiguration: WebSocket is not open"
|
|
2205
2501
|
);
|
|
2206
|
-
this.callbacks[this.callbacks.length - 1] = callback;
|
|
2207
|
-
return;
|
|
2208
2502
|
}
|
|
2209
|
-
|
|
2503
|
+
const payload = { type: "UpdateConfiguration" };
|
|
2504
|
+
if (params.keytermsPrompt !== void 0) {
|
|
2505
|
+
payload.keyterms_prompt = JSON.stringify(params.keytermsPrompt);
|
|
2506
|
+
}
|
|
2507
|
+
if (params.prompt !== void 0) {
|
|
2508
|
+
payload.prompt = params.prompt;
|
|
2509
|
+
}
|
|
2510
|
+
if (params.minTurnSilence !== void 0) {
|
|
2511
|
+
payload.min_turn_silence = params.minTurnSilence;
|
|
2512
|
+
}
|
|
2513
|
+
if (params.maxTurnSilence !== void 0) {
|
|
2514
|
+
payload.max_turn_silence = params.maxTurnSilence;
|
|
2515
|
+
}
|
|
2516
|
+
this.ws.send(JSON.stringify(payload));
|
|
2210
2517
|
}
|
|
2211
|
-
|
|
2212
|
-
|
|
2213
|
-
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2518
|
+
/** Force the server to finalize the current turn (for barge-in). */
|
|
2519
|
+
forceEndpoint() {
|
|
2520
|
+
if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
|
|
2521
|
+
throw new AssemblyAISTTNotConnectedError(
|
|
2522
|
+
"AssemblyAISTT.forceEndpoint: WebSocket is not open"
|
|
2523
|
+
);
|
|
2524
|
+
}
|
|
2525
|
+
this.ws.send(JSON.stringify({ type: "ForceEndpoint" }));
|
|
2526
|
+
}
|
|
2527
|
+
onTranscript(callback) {
|
|
2528
|
+
this.callbacks.add(callback);
|
|
2529
|
+
return () => {
|
|
2530
|
+
this.callbacks.delete(callback);
|
|
2531
|
+
};
|
|
2532
|
+
}
|
|
2533
|
+
async close() {
|
|
2534
|
+
this.closing = true;
|
|
2535
|
+
if (!this.ws) return;
|
|
2536
|
+
try {
|
|
2537
|
+
this.ws.send(JSON.stringify({ type: "Terminate" }));
|
|
2538
|
+
} catch {
|
|
2539
|
+
}
|
|
2540
|
+
await new Promise((resolve) => {
|
|
2541
|
+
const timer = setTimeout(() => {
|
|
2542
|
+
this.terminationResolve = null;
|
|
2543
|
+
resolve();
|
|
2544
|
+
}, TERMINATION_WAIT_TIMEOUT_MS);
|
|
2545
|
+
this.terminationResolve = () => {
|
|
2546
|
+
clearTimeout(timer);
|
|
2547
|
+
resolve();
|
|
2548
|
+
};
|
|
2549
|
+
});
|
|
2550
|
+
try {
|
|
2217
2551
|
this.ws.close();
|
|
2218
|
-
|
|
2552
|
+
} catch {
|
|
2219
2553
|
}
|
|
2554
|
+
this.ws = null;
|
|
2220
2555
|
}
|
|
2221
2556
|
};
|
|
2222
2557
|
function averageConfidence(words) {
|
|
@@ -2229,7 +2564,8 @@ function averageConfidence(words) {
|
|
|
2229
2564
|
}
|
|
2230
2565
|
|
|
2231
2566
|
// src/stt/assemblyai.ts
|
|
2232
|
-
var
|
|
2567
|
+
var STT6 = class extends AssemblyAISTT {
|
|
2568
|
+
static providerKey = "assemblyai";
|
|
2233
2569
|
constructor(opts = {}) {
|
|
2234
2570
|
const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
|
|
2235
2571
|
if (!key) {
|
|
@@ -2300,14 +2636,78 @@ function resolveVoiceId(voice) {
|
|
|
2300
2636
|
if (VOICE_ID_PATTERN.test(voice)) return voice;
|
|
2301
2637
|
return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
|
|
2302
2638
|
}
|
|
2303
|
-
var ElevenLabsTTS = class {
|
|
2304
|
-
|
|
2639
|
+
var ElevenLabsTTS = class _ElevenLabsTTS {
|
|
2640
|
+
apiKey;
|
|
2641
|
+
voiceId;
|
|
2642
|
+
modelId;
|
|
2643
|
+
outputFormat;
|
|
2644
|
+
voiceSettings;
|
|
2645
|
+
languageCode;
|
|
2646
|
+
chunkSize;
|
|
2647
|
+
constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_flash_v2_5", outputFormat = "pcm_16000") {
|
|
2305
2648
|
this.apiKey = apiKey;
|
|
2306
|
-
|
|
2307
|
-
|
|
2308
|
-
|
|
2649
|
+
if (typeof voiceIdOrOptions === "object") {
|
|
2650
|
+
const o = voiceIdOrOptions;
|
|
2651
|
+
this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
|
|
2652
|
+
this.modelId = o.modelId ?? "eleven_flash_v2_5";
|
|
2653
|
+
this.outputFormat = o.outputFormat ?? "pcm_16000";
|
|
2654
|
+
this.voiceSettings = o.voiceSettings;
|
|
2655
|
+
this.languageCode = o.languageCode;
|
|
2656
|
+
this.chunkSize = o.chunkSize ?? 4096;
|
|
2657
|
+
} else {
|
|
2658
|
+
this.voiceId = resolveVoiceId(voiceIdOrOptions);
|
|
2659
|
+
this.modelId = modelId;
|
|
2660
|
+
this.outputFormat = outputFormat;
|
|
2661
|
+
this.voiceSettings = void 0;
|
|
2662
|
+
this.languageCode = void 0;
|
|
2663
|
+
this.chunkSize = 4096;
|
|
2664
|
+
}
|
|
2665
|
+
}
|
|
2666
|
+
/**
|
|
2667
|
+
* Construct an instance pre-configured for Twilio Media Streams.
|
|
2668
|
+
*
|
|
2669
|
+
* Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
|
|
2670
|
+
* directly — the exact wire format Twilio's media stream uses — letting
|
|
2671
|
+
* the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
|
|
2672
|
+
* `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
|
|
2673
|
+
* and removes a potential aliasing source.
|
|
2674
|
+
*
|
|
2675
|
+
* `voiceSettings` defaults to a low-bandwidth-friendly profile
|
|
2676
|
+
* (speaker boost off, modest stability) which sounds cleaner at 8 kHz
|
|
2677
|
+
* μ-law than the studio default. Pass an explicit object to override.
|
|
2678
|
+
*/
|
|
2679
|
+
static forTwilio(apiKey, options = {}) {
|
|
2680
|
+
const voiceSettings = options.voiceSettings ?? {
|
|
2681
|
+
// Speaker boost adds high-frequency emphasis that aliases ugly over an
|
|
2682
|
+
// 8 kHz μ-law line. Slightly higher stability tames the excursions
|
|
2683
|
+
// that compander quantization noise can amplify.
|
|
2684
|
+
stability: 0.6,
|
|
2685
|
+
similarity_boost: 0.75,
|
|
2686
|
+
use_speaker_boost: false
|
|
2687
|
+
};
|
|
2688
|
+
return new _ElevenLabsTTS(apiKey, {
|
|
2689
|
+
...options,
|
|
2690
|
+
voiceSettings,
|
|
2691
|
+
outputFormat: "ulaw_8000"
|
|
2692
|
+
});
|
|
2693
|
+
}
|
|
2694
|
+
/**
|
|
2695
|
+
* Construct an instance pre-configured for Telnyx bidirectional media.
|
|
2696
|
+
*
|
|
2697
|
+
* Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
|
|
2698
|
+
* matches our default Telnyx handler. We pick `pcm_16000` so the audio
|
|
2699
|
+
* flows end-to-end with zero resampling or transcoding.
|
|
2700
|
+
*
|
|
2701
|
+
* Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
|
|
2702
|
+
* construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
|
|
2703
|
+
* — Telnyx supports that natively too.
|
|
2704
|
+
*/
|
|
2705
|
+
static forTelnyx(apiKey, options = {}) {
|
|
2706
|
+
return new _ElevenLabsTTS(apiKey, {
|
|
2707
|
+
...options,
|
|
2708
|
+
outputFormat: "pcm_16000"
|
|
2709
|
+
});
|
|
2309
2710
|
}
|
|
2310
|
-
voiceId;
|
|
2311
2711
|
/**
|
|
2312
2712
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
2313
2713
|
*
|
|
@@ -2324,22 +2724,29 @@ var ElevenLabsTTS = class {
|
|
|
2324
2724
|
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2325
2725
|
*
|
|
2326
2726
|
* The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
|
|
2327
|
-
* configured to).
|
|
2727
|
+
* configured to). `chunkSize` controls the maximum yield size — 512 is a
|
|
2728
|
+
* good choice for low-latency telephony.
|
|
2328
2729
|
*/
|
|
2329
2730
|
async *synthesizeStream(text) {
|
|
2330
2731
|
const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
|
|
2732
|
+
const body = {
|
|
2733
|
+
text,
|
|
2734
|
+
model_id: this.modelId
|
|
2735
|
+
};
|
|
2736
|
+
if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
|
|
2737
|
+
if (this.languageCode) body["language_code"] = this.languageCode;
|
|
2331
2738
|
const response = await fetch(url, {
|
|
2332
2739
|
method: "POST",
|
|
2333
2740
|
headers: {
|
|
2334
2741
|
"xi-api-key": this.apiKey,
|
|
2335
2742
|
"Content-Type": "application/json"
|
|
2336
2743
|
},
|
|
2337
|
-
body: JSON.stringify(
|
|
2744
|
+
body: JSON.stringify(body),
|
|
2338
2745
|
signal: AbortSignal.timeout(3e4)
|
|
2339
2746
|
});
|
|
2340
2747
|
if (!response.ok) {
|
|
2341
|
-
const
|
|
2342
|
-
throw new Error(`ElevenLabs TTS error ${response.status}: ${
|
|
2748
|
+
const errBody = await response.text();
|
|
2749
|
+
throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
|
|
2343
2750
|
}
|
|
2344
2751
|
if (!response.body) {
|
|
2345
2752
|
throw new Error("ElevenLabs TTS: no response body");
|
|
@@ -2349,8 +2756,10 @@ var ElevenLabsTTS = class {
|
|
|
2349
2756
|
while (true) {
|
|
2350
2757
|
const { done, value } = await reader.read();
|
|
2351
2758
|
if (done) break;
|
|
2352
|
-
if (value
|
|
2353
|
-
|
|
2759
|
+
if (!value || value.length === 0) continue;
|
|
2760
|
+
const buf = Buffer.from(value);
|
|
2761
|
+
for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
|
|
2762
|
+
yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
|
|
2354
2763
|
}
|
|
2355
2764
|
}
|
|
2356
2765
|
} finally {
|
|
@@ -2362,30 +2771,50 @@ var ElevenLabsTTS = class {
|
|
|
2362
2771
|
};
|
|
2363
2772
|
|
|
2364
2773
|
// src/tts/elevenlabs.ts
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2774
|
+
function resolveApiKey(apiKey) {
|
|
2775
|
+
const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
|
|
2776
|
+
if (!key) {
|
|
2777
|
+
throw new Error(
|
|
2778
|
+
"ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
|
|
2779
|
+
);
|
|
2780
|
+
}
|
|
2781
|
+
return key;
|
|
2782
|
+
}
|
|
2783
|
+
var TTS = class _TTS extends ElevenLabsTTS {
|
|
2784
|
+
static providerKey = "elevenlabs";
|
|
2785
|
+
constructor(opts = {}) {
|
|
2373
2786
|
super(
|
|
2374
|
-
|
|
2787
|
+
resolveApiKey(opts.apiKey),
|
|
2375
2788
|
opts.voiceId ?? "EXAVITQu4vr4xnSDxMaL",
|
|
2376
|
-
opts.modelId ?? "
|
|
2789
|
+
opts.modelId ?? "eleven_flash_v2_5",
|
|
2377
2790
|
opts.outputFormat ?? "pcm_16000"
|
|
2378
2791
|
);
|
|
2379
2792
|
}
|
|
2793
|
+
static forTwilio(arg1, arg2) {
|
|
2794
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
2795
|
+
return new _TTS({ ...opts, outputFormat: "ulaw_8000" });
|
|
2796
|
+
}
|
|
2797
|
+
static forTelnyx(arg1, arg2) {
|
|
2798
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
2799
|
+
return new _TTS({ ...opts, outputFormat: "pcm_16000" });
|
|
2800
|
+
}
|
|
2380
2801
|
};
|
|
2381
2802
|
|
|
2382
2803
|
// src/providers/openai-tts.ts
|
|
2383
2804
|
var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
|
|
2805
|
+
var INSTRUCTIONS_PREFIX = "gpt-4o-mini-tts";
|
|
2806
|
+
var LPF_ALPHA = 0.78;
|
|
2384
2807
|
var OpenAITTS = class _OpenAITTS {
|
|
2385
|
-
constructor(apiKey, voice = "alloy", model = "tts
|
|
2808
|
+
constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true) {
|
|
2386
2809
|
this.apiKey = apiKey;
|
|
2387
2810
|
this.voice = voice;
|
|
2388
2811
|
this.model = model;
|
|
2812
|
+
this.instructions = instructions;
|
|
2813
|
+
this.speed = speed;
|
|
2814
|
+
this.antiAlias = antiAlias;
|
|
2815
|
+
if (speed !== null && speed !== void 0 && (speed < 0.25 || speed > 4)) {
|
|
2816
|
+
throw new Error("OpenAITTS: speed must be in [0.25, 4.0]");
|
|
2817
|
+
}
|
|
2389
2818
|
}
|
|
2390
2819
|
/**
|
|
2391
2820
|
* Synthesise text to speech and return the full audio as a single Buffer.
|
|
@@ -2402,37 +2831,48 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2402
2831
|
/**
|
|
2403
2832
|
* Synthesise text and yield audio chunks as they arrive (streaming).
|
|
2404
2833
|
*
|
|
2405
|
-
* OpenAI returns 24 kHz PCM16; each chunk is
|
|
2406
|
-
* yielding so the output is ready for
|
|
2834
|
+
* OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
|
|
2835
|
+
* decimated 3:2 to 16 kHz before yielding so the output is ready for
|
|
2836
|
+
* telephony pipelines.
|
|
2407
2837
|
*
|
|
2408
|
-
* The resampler carries state (buffered samples + odd
|
|
2409
|
-
* between chunks
|
|
2410
|
-
*
|
|
2411
|
-
* Python `audioop.ratecv` fix).
|
|
2838
|
+
* The resampler carries state (filter memory + buffered samples + odd
|
|
2839
|
+
* trailing byte) between chunks so cross-chunk sample alignment and
|
|
2840
|
+
* filter phase don't reset on every network read.
|
|
2412
2841
|
*/
|
|
2413
2842
|
async *synthesizeStream(text) {
|
|
2843
|
+
const body = {
|
|
2844
|
+
model: this.model,
|
|
2845
|
+
input: text,
|
|
2846
|
+
voice: this.voice,
|
|
2847
|
+
response_format: "pcm"
|
|
2848
|
+
};
|
|
2849
|
+
if (this.instructions !== null && this.model.startsWith(INSTRUCTIONS_PREFIX)) {
|
|
2850
|
+
body.instructions = this.instructions;
|
|
2851
|
+
}
|
|
2852
|
+
if (this.speed !== null) {
|
|
2853
|
+
body.speed = this.speed;
|
|
2854
|
+
}
|
|
2414
2855
|
const response = await fetch(OPENAI_TTS_URL, {
|
|
2415
2856
|
method: "POST",
|
|
2416
2857
|
headers: {
|
|
2417
2858
|
"Authorization": `Bearer ${this.apiKey}`,
|
|
2418
2859
|
"Content-Type": "application/json"
|
|
2419
2860
|
},
|
|
2420
|
-
body: JSON.stringify(
|
|
2421
|
-
model: this.model,
|
|
2422
|
-
input: text,
|
|
2423
|
-
voice: this.voice,
|
|
2424
|
-
response_format: "pcm"
|
|
2425
|
-
}),
|
|
2426
|
-
signal: AbortSignal.timeout(3e4)
|
|
2861
|
+
body: JSON.stringify(body)
|
|
2427
2862
|
});
|
|
2428
2863
|
if (!response.ok) {
|
|
2429
|
-
const
|
|
2430
|
-
throw new Error(`OpenAI TTS error ${response.status}: ${
|
|
2864
|
+
const errBody = await response.text();
|
|
2865
|
+
throw new Error(`OpenAI TTS error ${response.status}: ${errBody}`);
|
|
2431
2866
|
}
|
|
2432
2867
|
if (!response.body) {
|
|
2433
2868
|
throw new Error("OpenAI TTS: no response body");
|
|
2434
2869
|
}
|
|
2435
|
-
const ctx = {
|
|
2870
|
+
const ctx = {
|
|
2871
|
+
carryByte: null,
|
|
2872
|
+
leftover: [],
|
|
2873
|
+
lpfPrev: 0,
|
|
2874
|
+
lpfEnabled: this.antiAlias
|
|
2875
|
+
};
|
|
2436
2876
|
const reader = response.body.getReader();
|
|
2437
2877
|
try {
|
|
2438
2878
|
while (true) {
|
|
@@ -2457,8 +2897,14 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2457
2897
|
}
|
|
2458
2898
|
}
|
|
2459
2899
|
/**
|
|
2460
|
-
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE).
|
|
2461
|
-
*
|
|
2900
|
+
* Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
|
|
2901
|
+
* lowpass ahead of the 3:2 decimation and carries filter + sample state
|
|
2902
|
+
* across chunks so the cadence doesn't reset at every network read.
|
|
2903
|
+
*
|
|
2904
|
+
* ``ctx.lpfEnabled`` (default true on the streaming path, false for the
|
|
2905
|
+
* legacy static helper) controls whether the LPF is engaged — we keep
|
|
2906
|
+
* the helper bit-exact for the downsample-only tests while the real
|
|
2907
|
+
* streaming path gets anti-alias filtering.
|
|
2462
2908
|
*/
|
|
2463
2909
|
static resampleStreaming(audio, ctx) {
|
|
2464
2910
|
let buf;
|
|
@@ -2477,14 +2923,26 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2477
2923
|
}
|
|
2478
2924
|
const sampleCount = buf.length / 2;
|
|
2479
2925
|
const samples = ctx.leftover.slice();
|
|
2926
|
+
const lpf = ctx.lpfEnabled !== false;
|
|
2927
|
+
let y = ctx.lpfPrev;
|
|
2480
2928
|
for (let i2 = 0; i2 < sampleCount; i2++) {
|
|
2481
|
-
|
|
2929
|
+
const x = buf.readInt16LE(i2 * 2);
|
|
2930
|
+
if (lpf) {
|
|
2931
|
+
y = LPF_ALPHA * x + (1 - LPF_ALPHA) * y;
|
|
2932
|
+
let s = Math.round(y);
|
|
2933
|
+
if (s > 32767) s = 32767;
|
|
2934
|
+
else if (s < -32768) s = -32768;
|
|
2935
|
+
samples.push(s);
|
|
2936
|
+
} else {
|
|
2937
|
+
samples.push(x);
|
|
2938
|
+
}
|
|
2482
2939
|
}
|
|
2940
|
+
if (lpf) ctx.lpfPrev = y;
|
|
2483
2941
|
const out = [];
|
|
2484
2942
|
let i = 0;
|
|
2485
2943
|
while (i + 2 < samples.length) {
|
|
2486
2944
|
out.push(samples[i]);
|
|
2487
|
-
out.push(Math.
|
|
2945
|
+
out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
|
|
2488
2946
|
i += 3;
|
|
2489
2947
|
}
|
|
2490
2948
|
ctx.leftover = samples.slice(i);
|
|
@@ -2496,7 +2954,7 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2496
2954
|
}
|
|
2497
2955
|
/** @deprecated use {@link resampleStreaming} with persistent state. */
|
|
2498
2956
|
static resample24kTo16k(audio) {
|
|
2499
|
-
const ctx = { carryByte: null, leftover: [] };
|
|
2957
|
+
const ctx = { carryByte: null, leftover: [], lpfPrev: 0, lpfEnabled: false };
|
|
2500
2958
|
const out = _OpenAITTS.resampleStreaming(audio, ctx);
|
|
2501
2959
|
if (ctx.leftover.length === 0) return out;
|
|
2502
2960
|
const tail = Buffer.alloc(ctx.leftover.length * 2);
|
|
@@ -2509,6 +2967,7 @@ var OpenAITTS = class _OpenAITTS {
|
|
|
2509
2967
|
|
|
2510
2968
|
// src/tts/openai.ts
|
|
2511
2969
|
var TTS2 = class extends OpenAITTS {
|
|
2970
|
+
static providerKey = "openai_tts";
|
|
2512
2971
|
constructor(opts = {}) {
|
|
2513
2972
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
2514
2973
|
if (!key) {
|
|
@@ -2516,15 +2975,22 @@ var TTS2 = class extends OpenAITTS {
|
|
|
2516
2975
|
"OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
2517
2976
|
);
|
|
2518
2977
|
}
|
|
2519
|
-
super(
|
|
2978
|
+
super(
|
|
2979
|
+
key,
|
|
2980
|
+
opts.voice ?? "alloy",
|
|
2981
|
+
opts.model ?? "gpt-4o-mini-tts",
|
|
2982
|
+
opts.instructions ?? null,
|
|
2983
|
+
opts.speed ?? null,
|
|
2984
|
+
opts.antiAlias ?? false
|
|
2985
|
+
);
|
|
2520
2986
|
}
|
|
2521
2987
|
};
|
|
2522
2988
|
|
|
2523
2989
|
// src/providers/cartesia-tts.ts
|
|
2524
2990
|
var CARTESIA_BASE_URL = "https://api.cartesia.ai";
|
|
2525
|
-
var CARTESIA_API_VERSION = "
|
|
2991
|
+
var CARTESIA_API_VERSION = "2025-04-16";
|
|
2526
2992
|
var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
|
|
2527
|
-
var CartesiaTTS = class {
|
|
2993
|
+
var CartesiaTTS = class _CartesiaTTS {
|
|
2528
2994
|
apiKey;
|
|
2529
2995
|
model;
|
|
2530
2996
|
voice;
|
|
@@ -2537,7 +3003,7 @@ var CartesiaTTS = class {
|
|
|
2537
3003
|
apiVersion;
|
|
2538
3004
|
constructor(apiKey, opts = {}) {
|
|
2539
3005
|
this.apiKey = apiKey;
|
|
2540
|
-
this.model = opts.model ?? "sonic-
|
|
3006
|
+
this.model = opts.model ?? "sonic-3";
|
|
2541
3007
|
this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
|
|
2542
3008
|
this.language = opts.language ?? "en";
|
|
2543
3009
|
this.sampleRate = opts.sampleRate ?? 16e3;
|
|
@@ -2547,6 +3013,29 @@ var CartesiaTTS = class {
|
|
|
2547
3013
|
this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
|
|
2548
3014
|
this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
|
|
2549
3015
|
}
|
|
3016
|
+
/**
|
|
3017
|
+
* Construct an instance pre-configured for Twilio Media Streams.
|
|
3018
|
+
*
|
|
3019
|
+
* Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
|
|
3020
|
+
* Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
|
|
3021
|
+
* PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
|
|
3022
|
+
* step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
|
|
3023
|
+
* removes a potential aliasing source.
|
|
3024
|
+
*/
|
|
3025
|
+
static forTwilio(apiKey, options = {}) {
|
|
3026
|
+
return new _CartesiaTTS(apiKey, { ...options, sampleRate: 8e3 });
|
|
3027
|
+
}
|
|
3028
|
+
/**
|
|
3029
|
+
* Construct an instance pre-configured for Telnyx bidirectional media.
|
|
3030
|
+
*
|
|
3031
|
+
* Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
|
|
3032
|
+
* audio flows end-to-end with zero resampling or transcoding. Same as
|
|
3033
|
+
* the bare-constructor default; exists for API symmetry with
|
|
3034
|
+
* {@link CartesiaTTS.forTwilio}.
|
|
3035
|
+
*/
|
|
3036
|
+
static forTelnyx(apiKey, options = {}) {
|
|
3037
|
+
return new _CartesiaTTS(apiKey, { ...options, sampleRate: 16e3 });
|
|
3038
|
+
}
|
|
2550
3039
|
/** Build the JSON payload for the Cartesia bytes endpoint. */
|
|
2551
3040
|
buildPayload(text) {
|
|
2552
3041
|
const payload = {
|
|
@@ -2619,18 +3108,31 @@ var CartesiaTTS = class {
|
|
|
2619
3108
|
};
|
|
2620
3109
|
|
|
2621
3110
|
// src/tts/cartesia.ts
|
|
2622
|
-
|
|
3111
|
+
function resolveApiKey2(apiKey) {
|
|
3112
|
+
const key = apiKey ?? process.env.CARTESIA_API_KEY;
|
|
3113
|
+
if (!key) {
|
|
3114
|
+
throw new Error(
|
|
3115
|
+
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
3116
|
+
);
|
|
3117
|
+
}
|
|
3118
|
+
return key;
|
|
3119
|
+
}
|
|
3120
|
+
var TTS3 = class _TTS extends CartesiaTTS {
|
|
3121
|
+
static providerKey = "cartesia_tts";
|
|
2623
3122
|
constructor(opts = {}) {
|
|
2624
|
-
const key = opts.apiKey
|
|
2625
|
-
if (!key) {
|
|
2626
|
-
throw new Error(
|
|
2627
|
-
"Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
|
|
2628
|
-
);
|
|
2629
|
-
}
|
|
3123
|
+
const key = resolveApiKey2(opts.apiKey);
|
|
2630
3124
|
const { apiKey: _ignored, ...rest } = opts;
|
|
2631
3125
|
void _ignored;
|
|
2632
3126
|
super(key, rest);
|
|
2633
3127
|
}
|
|
3128
|
+
static forTwilio(arg1, arg2) {
|
|
3129
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
3130
|
+
return new _TTS({ ...opts, sampleRate: 8e3 });
|
|
3131
|
+
}
|
|
3132
|
+
static forTelnyx(arg1, arg2) {
|
|
3133
|
+
const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
|
|
3134
|
+
return new _TTS({ ...opts, sampleRate: 16e3 });
|
|
3135
|
+
}
|
|
2634
3136
|
};
|
|
2635
3137
|
|
|
2636
3138
|
// src/providers/rime-tts.ts
|
|
@@ -2762,6 +3264,7 @@ var RimeTTS = class {
|
|
|
2762
3264
|
|
|
2763
3265
|
// src/tts/rime.ts
|
|
2764
3266
|
var TTS4 = class extends RimeTTS {
|
|
3267
|
+
static providerKey = "rime";
|
|
2765
3268
|
constructor(opts = {}) {
|
|
2766
3269
|
const key = opts.apiKey ?? process.env.RIME_API_KEY;
|
|
2767
3270
|
if (!key) {
|
|
@@ -2855,6 +3358,7 @@ var LMNTTTS = class {
|
|
|
2855
3358
|
|
|
2856
3359
|
// src/tts/lmnt.ts
|
|
2857
3360
|
var TTS5 = class extends LMNTTTS {
|
|
3361
|
+
static providerKey = "lmnt";
|
|
2858
3362
|
constructor(opts = {}) {
|
|
2859
3363
|
const key = opts.apiKey ?? process.env.LMNT_API_KEY;
|
|
2860
3364
|
if (!key) {
|
|
@@ -2870,6 +3374,7 @@ var TTS5 = class extends LMNTTTS {
|
|
|
2870
3374
|
|
|
2871
3375
|
// src/llm/openai.ts
|
|
2872
3376
|
var LLM = class extends OpenAILLMProvider {
|
|
3377
|
+
static providerKey = "openai";
|
|
2873
3378
|
constructor(opts = {}) {
|
|
2874
3379
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
2875
3380
|
if (!key) {
|
|
@@ -2877,15 +3382,27 @@ var LLM = class extends OpenAILLMProvider {
|
|
|
2877
3382
|
"OpenAI LLM requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY."
|
|
2878
3383
|
);
|
|
2879
3384
|
}
|
|
2880
|
-
super(key, opts.model ?? "gpt-4o-mini"
|
|
3385
|
+
super(key, opts.model ?? "gpt-4o-mini", {
|
|
3386
|
+
temperature: opts.temperature,
|
|
3387
|
+
maxTokens: opts.maxTokens,
|
|
3388
|
+
responseFormat: opts.responseFormat,
|
|
3389
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
3390
|
+
toolChoice: opts.toolChoice,
|
|
3391
|
+
seed: opts.seed,
|
|
3392
|
+
topP: opts.topP,
|
|
3393
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
3394
|
+
presencePenalty: opts.presencePenalty,
|
|
3395
|
+
stop: opts.stop
|
|
3396
|
+
});
|
|
2881
3397
|
}
|
|
2882
3398
|
};
|
|
2883
3399
|
|
|
2884
3400
|
// src/providers/anthropic-llm.ts
|
|
2885
3401
|
var DEFAULT_ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
|
|
2886
3402
|
var DEFAULT_ANTHROPIC_VERSION = "2023-06-01";
|
|
2887
|
-
var DEFAULT_MODEL = "claude-
|
|
3403
|
+
var DEFAULT_MODEL = "claude-haiku-4-5-20251001";
|
|
2888
3404
|
var DEFAULT_MAX_TOKENS = 1024;
|
|
3405
|
+
var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
|
|
2889
3406
|
var AnthropicLLMProvider = class {
|
|
2890
3407
|
apiKey;
|
|
2891
3408
|
model;
|
|
@@ -2893,6 +3410,7 @@ var AnthropicLLMProvider = class {
|
|
|
2893
3410
|
temperature;
|
|
2894
3411
|
url;
|
|
2895
3412
|
anthropicVersion;
|
|
3413
|
+
promptCaching;
|
|
2896
3414
|
constructor(options) {
|
|
2897
3415
|
if (!options.apiKey) {
|
|
2898
3416
|
throw new Error(
|
|
@@ -2905,6 +3423,7 @@ var AnthropicLLMProvider = class {
|
|
|
2905
3423
|
this.temperature = options.temperature;
|
|
2906
3424
|
this.url = options.baseUrl ?? DEFAULT_ANTHROPIC_URL;
|
|
2907
3425
|
this.anthropicVersion = options.anthropicVersion ?? DEFAULT_ANTHROPIC_VERSION;
|
|
3426
|
+
this.promptCaching = options.promptCaching ?? true;
|
|
2908
3427
|
}
|
|
2909
3428
|
async *stream(messages, tools) {
|
|
2910
3429
|
const { system, messages: anthropicMessages } = toAnthropicMessages(messages);
|
|
@@ -2915,16 +3434,44 @@ var AnthropicLLMProvider = class {
|
|
|
2915
3434
|
max_tokens: this.maxTokens,
|
|
2916
3435
|
stream: true
|
|
2917
3436
|
};
|
|
2918
|
-
if (system)
|
|
2919
|
-
|
|
3437
|
+
if (system) {
|
|
3438
|
+
if (this.promptCaching) {
|
|
3439
|
+
const block = {
|
|
3440
|
+
type: "text",
|
|
3441
|
+
text: system,
|
|
3442
|
+
cache_control: { type: "ephemeral" }
|
|
3443
|
+
};
|
|
3444
|
+
body.system = [block];
|
|
3445
|
+
} else {
|
|
3446
|
+
body.system = system;
|
|
3447
|
+
}
|
|
3448
|
+
}
|
|
3449
|
+
if (anthropicTools && anthropicTools.length > 0) {
|
|
3450
|
+
if (this.promptCaching) {
|
|
3451
|
+
const cachedTools = anthropicTools.map(
|
|
3452
|
+
(t) => ({ ...t })
|
|
3453
|
+
);
|
|
3454
|
+
cachedTools[cachedTools.length - 1] = {
|
|
3455
|
+
...cachedTools[cachedTools.length - 1],
|
|
3456
|
+
cache_control: { type: "ephemeral" }
|
|
3457
|
+
};
|
|
3458
|
+
body.tools = cachedTools;
|
|
3459
|
+
} else {
|
|
3460
|
+
body.tools = anthropicTools;
|
|
3461
|
+
}
|
|
3462
|
+
}
|
|
2920
3463
|
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
3464
|
+
const headers = {
|
|
3465
|
+
"Content-Type": "application/json",
|
|
3466
|
+
"x-api-key": this.apiKey,
|
|
3467
|
+
"anthropic-version": this.anthropicVersion
|
|
3468
|
+
};
|
|
3469
|
+
if (this.promptCaching) {
|
|
3470
|
+
headers["anthropic-beta"] = PROMPT_CACHING_BETA;
|
|
3471
|
+
}
|
|
2921
3472
|
const response = await fetch(this.url, {
|
|
2922
3473
|
method: "POST",
|
|
2923
|
-
headers
|
|
2924
|
-
"Content-Type": "application/json",
|
|
2925
|
-
"x-api-key": this.apiKey,
|
|
2926
|
-
"anthropic-version": this.anthropicVersion
|
|
2927
|
-
},
|
|
3474
|
+
headers,
|
|
2928
3475
|
body: JSON.stringify(body),
|
|
2929
3476
|
signal: AbortSignal.timeout(3e4)
|
|
2930
3477
|
});
|
|
@@ -3069,6 +3616,7 @@ function toAnthropicMessages(messages) {
|
|
|
3069
3616
|
|
|
3070
3617
|
// src/llm/anthropic.ts
|
|
3071
3618
|
var LLM2 = class extends AnthropicLLMProvider {
|
|
3619
|
+
static providerKey = "anthropic";
|
|
3072
3620
|
constructor(opts = {}) {
|
|
3073
3621
|
const key = opts.apiKey ?? process.env.ANTHROPIC_API_KEY;
|
|
3074
3622
|
if (!key) {
|
|
@@ -3082,11 +3630,15 @@ var LLM2 = class extends AnthropicLLMProvider {
|
|
|
3082
3630
|
maxTokens: opts.maxTokens,
|
|
3083
3631
|
temperature: opts.temperature,
|
|
3084
3632
|
baseUrl: opts.baseUrl,
|
|
3085
|
-
anthropicVersion: opts.anthropicVersion
|
|
3633
|
+
anthropicVersion: opts.anthropicVersion,
|
|
3634
|
+
promptCaching: opts.promptCaching
|
|
3086
3635
|
});
|
|
3087
3636
|
}
|
|
3088
3637
|
};
|
|
3089
3638
|
|
|
3639
|
+
// src/version.ts
|
|
3640
|
+
var VERSION = "0.5.3";
|
|
3641
|
+
|
|
3090
3642
|
// src/providers/groq-llm.ts
|
|
3091
3643
|
var GROQ_BASE_URL = "https://api.groq.com/openai/v1";
|
|
3092
3644
|
var DEFAULT_MODEL2 = "llama-3.3-70b-versatile";
|
|
@@ -3094,6 +3646,16 @@ var GroqLLMProvider = class {
|
|
|
3094
3646
|
apiKey;
|
|
3095
3647
|
model;
|
|
3096
3648
|
baseUrl;
|
|
3649
|
+
temperature;
|
|
3650
|
+
maxTokens;
|
|
3651
|
+
responseFormat;
|
|
3652
|
+
parallelToolCalls;
|
|
3653
|
+
toolChoice;
|
|
3654
|
+
seed;
|
|
3655
|
+
topP;
|
|
3656
|
+
frequencyPenalty;
|
|
3657
|
+
presencePenalty;
|
|
3658
|
+
stop;
|
|
3097
3659
|
constructor(options) {
|
|
3098
3660
|
if (!options.apiKey) {
|
|
3099
3661
|
throw new Error(
|
|
@@ -3103,19 +3665,43 @@ var GroqLLMProvider = class {
|
|
|
3103
3665
|
this.apiKey = options.apiKey;
|
|
3104
3666
|
this.model = options.model ?? DEFAULT_MODEL2;
|
|
3105
3667
|
this.baseUrl = options.baseUrl ?? GROQ_BASE_URL;
|
|
3668
|
+
this.temperature = options.temperature;
|
|
3669
|
+
this.maxTokens = options.maxTokens;
|
|
3670
|
+
this.responseFormat = options.responseFormat;
|
|
3671
|
+
this.parallelToolCalls = options.parallelToolCalls;
|
|
3672
|
+
this.toolChoice = options.toolChoice;
|
|
3673
|
+
this.seed = options.seed;
|
|
3674
|
+
this.topP = options.topP;
|
|
3675
|
+
this.frequencyPenalty = options.frequencyPenalty;
|
|
3676
|
+
this.presencePenalty = options.presencePenalty;
|
|
3677
|
+
this.stop = options.stop;
|
|
3106
3678
|
}
|
|
3107
3679
|
async *stream(messages, tools) {
|
|
3108
3680
|
const body = {
|
|
3109
3681
|
model: this.model,
|
|
3110
3682
|
messages,
|
|
3111
|
-
stream: true
|
|
3683
|
+
stream: true,
|
|
3684
|
+
stream_options: { include_usage: true }
|
|
3112
3685
|
};
|
|
3686
|
+
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
3687
|
+
if (this.maxTokens !== void 0) {
|
|
3688
|
+
body.max_completion_tokens = this.maxTokens;
|
|
3689
|
+
}
|
|
3690
|
+
if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
|
|
3691
|
+
if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
|
|
3692
|
+
if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
|
|
3693
|
+
if (this.seed !== void 0) body.seed = this.seed;
|
|
3694
|
+
if (this.topP !== void 0) body.top_p = this.topP;
|
|
3695
|
+
if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
|
|
3696
|
+
if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
|
|
3697
|
+
if (this.stop !== void 0) body.stop = this.stop;
|
|
3113
3698
|
if (tools) body.tools = tools;
|
|
3114
3699
|
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
3115
3700
|
method: "POST",
|
|
3116
3701
|
headers: {
|
|
3117
3702
|
"Content-Type": "application/json",
|
|
3118
|
-
Authorization: `Bearer ${this.apiKey}
|
|
3703
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
3704
|
+
"User-Agent": `getpatter/${VERSION}`
|
|
3119
3705
|
},
|
|
3120
3706
|
body: JSON.stringify(body),
|
|
3121
3707
|
signal: AbortSignal.timeout(3e4)
|
|
@@ -3150,6 +3736,16 @@ async function* parseOpenAISseStream(response) {
|
|
|
3150
3736
|
} catch {
|
|
3151
3737
|
continue;
|
|
3152
3738
|
}
|
|
3739
|
+
const usage = chunk.usage ?? chunk.x_groq?.usage;
|
|
3740
|
+
if (usage) {
|
|
3741
|
+
const cached = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
3742
|
+
yield {
|
|
3743
|
+
type: "usage",
|
|
3744
|
+
inputTokens: usage.prompt_tokens,
|
|
3745
|
+
outputTokens: usage.completion_tokens,
|
|
3746
|
+
cacheReadInputTokens: cached
|
|
3747
|
+
};
|
|
3748
|
+
}
|
|
3153
3749
|
const delta = chunk.choices?.[0]?.delta;
|
|
3154
3750
|
if (!delta) continue;
|
|
3155
3751
|
if (delta.content) {
|
|
@@ -3172,6 +3768,7 @@ async function* parseOpenAISseStream(response) {
|
|
|
3172
3768
|
|
|
3173
3769
|
// src/llm/groq.ts
|
|
3174
3770
|
var LLM3 = class extends GroqLLMProvider {
|
|
3771
|
+
static providerKey = "groq";
|
|
3175
3772
|
constructor(opts = {}) {
|
|
3176
3773
|
const key = opts.apiKey ?? process.env.GROQ_API_KEY;
|
|
3177
3774
|
if (!key) {
|
|
@@ -3182,19 +3779,40 @@ var LLM3 = class extends GroqLLMProvider {
|
|
|
3182
3779
|
super({
|
|
3183
3780
|
apiKey: key,
|
|
3184
3781
|
model: opts.model,
|
|
3185
|
-
baseUrl: opts.baseUrl
|
|
3782
|
+
baseUrl: opts.baseUrl,
|
|
3783
|
+
temperature: opts.temperature,
|
|
3784
|
+
maxTokens: opts.maxTokens,
|
|
3785
|
+
responseFormat: opts.responseFormat,
|
|
3786
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
3787
|
+
toolChoice: opts.toolChoice,
|
|
3788
|
+
seed: opts.seed,
|
|
3789
|
+
topP: opts.topP,
|
|
3790
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
3791
|
+
presencePenalty: opts.presencePenalty,
|
|
3792
|
+
stop: opts.stop
|
|
3186
3793
|
});
|
|
3187
3794
|
}
|
|
3188
3795
|
};
|
|
3189
3796
|
|
|
3190
3797
|
// src/providers/cerebras-llm.ts
|
|
3191
3798
|
var CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
|
|
3192
|
-
var DEFAULT_MODEL3 = "
|
|
3799
|
+
var DEFAULT_MODEL3 = "gpt-oss-120b";
|
|
3800
|
+
var RETRY_BACKOFF_BASE_MS = 500;
|
|
3193
3801
|
var CerebrasLLMProvider = class {
|
|
3194
3802
|
apiKey;
|
|
3195
3803
|
model;
|
|
3196
3804
|
baseUrl;
|
|
3197
3805
|
gzipCompression;
|
|
3806
|
+
temperature;
|
|
3807
|
+
maxTokens;
|
|
3808
|
+
responseFormat;
|
|
3809
|
+
parallelToolCalls;
|
|
3810
|
+
toolChoice;
|
|
3811
|
+
seed;
|
|
3812
|
+
topP;
|
|
3813
|
+
frequencyPenalty;
|
|
3814
|
+
presencePenalty;
|
|
3815
|
+
stop;
|
|
3198
3816
|
constructor(options) {
|
|
3199
3817
|
if (!options.apiKey) {
|
|
3200
3818
|
throw new Error(
|
|
@@ -3204,18 +3822,43 @@ var CerebrasLLMProvider = class {
|
|
|
3204
3822
|
this.apiKey = options.apiKey;
|
|
3205
3823
|
this.model = options.model ?? DEFAULT_MODEL3;
|
|
3206
3824
|
this.baseUrl = options.baseUrl ?? CEREBRAS_BASE_URL;
|
|
3207
|
-
this.gzipCompression = options.gzipCompression ??
|
|
3825
|
+
this.gzipCompression = options.gzipCompression ?? true;
|
|
3826
|
+
this.temperature = options.temperature;
|
|
3827
|
+
this.maxTokens = options.maxTokens;
|
|
3828
|
+
this.responseFormat = options.responseFormat;
|
|
3829
|
+
this.parallelToolCalls = options.parallelToolCalls;
|
|
3830
|
+
this.toolChoice = options.toolChoice;
|
|
3831
|
+
this.seed = options.seed;
|
|
3832
|
+
this.topP = options.topP;
|
|
3833
|
+
this.frequencyPenalty = options.frequencyPenalty;
|
|
3834
|
+
this.presencePenalty = options.presencePenalty;
|
|
3835
|
+
this.stop = options.stop;
|
|
3208
3836
|
}
|
|
3209
3837
|
async *stream(messages, tools) {
|
|
3210
3838
|
const body = {
|
|
3211
3839
|
model: this.model,
|
|
3212
3840
|
messages,
|
|
3213
|
-
stream: true
|
|
3841
|
+
stream: true,
|
|
3842
|
+
stream_options: { include_usage: true }
|
|
3214
3843
|
};
|
|
3844
|
+
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
3845
|
+
if (this.maxTokens !== void 0) {
|
|
3846
|
+
body.max_completion_tokens = this.maxTokens;
|
|
3847
|
+
}
|
|
3848
|
+
if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
|
|
3849
|
+
if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
|
|
3850
|
+
if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
|
|
3851
|
+
if (this.seed !== void 0) body.seed = this.seed;
|
|
3852
|
+
if (this.topP !== void 0) body.top_p = this.topP;
|
|
3853
|
+
if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
|
|
3854
|
+
if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
|
|
3855
|
+
if (this.stop !== void 0) body.stop = this.stop;
|
|
3215
3856
|
if (tools) body.tools = tools;
|
|
3216
3857
|
const headers = {
|
|
3217
3858
|
"Content-Type": "application/json",
|
|
3218
|
-
Authorization: `Bearer ${this.apiKey}
|
|
3859
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
3860
|
+
// Identify the SDK in upstream logs/rate-limit attribution.
|
|
3861
|
+
"User-Agent": `getpatter/${VERSION}`
|
|
3219
3862
|
};
|
|
3220
3863
|
let payload = JSON.stringify(body);
|
|
3221
3864
|
if (this.gzipCompression) {
|
|
@@ -3225,18 +3868,43 @@ var CerebrasLLMProvider = class {
|
|
|
3225
3868
|
headers["Content-Encoding"] = "gzip";
|
|
3226
3869
|
}
|
|
3227
3870
|
}
|
|
3228
|
-
const
|
|
3229
|
-
|
|
3230
|
-
|
|
3231
|
-
|
|
3232
|
-
|
|
3233
|
-
|
|
3234
|
-
|
|
3235
|
-
|
|
3236
|
-
|
|
3237
|
-
|
|
3871
|
+
const maxAttempts = 2;
|
|
3872
|
+
let lastErrText = "";
|
|
3873
|
+
let lastStatus = 0;
|
|
3874
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
3875
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
3876
|
+
method: "POST",
|
|
3877
|
+
headers,
|
|
3878
|
+
body: payload,
|
|
3879
|
+
signal: AbortSignal.timeout(3e4)
|
|
3880
|
+
});
|
|
3881
|
+
if (response.ok) {
|
|
3882
|
+
yield* parseOpenAISseStream(response);
|
|
3883
|
+
return;
|
|
3884
|
+
}
|
|
3885
|
+
lastStatus = response.status;
|
|
3886
|
+
lastErrText = await response.text().catch(() => "");
|
|
3887
|
+
const isRetriable = response.status === 429 || response.status >= 500;
|
|
3888
|
+
const isLastAttempt = attempt >= maxAttempts - 1;
|
|
3889
|
+
if (!isRetriable || isLastAttempt) {
|
|
3890
|
+
if (response.status === 404 && lastErrText.includes("model_not_found")) {
|
|
3891
|
+
getLogger().error(
|
|
3892
|
+
`Cerebras: model "${this.model}" not available on your tier. Override via \`new CerebrasLLM({ model: '<id>' })\` and list tier-available ids with \`GET ${this.baseUrl}/models\` (common: llama3.1-8b, qwen-3-235b-a22b-instruct-2507, llama-3.3-70b on paid). Raw response: ${lastErrText}`
|
|
3893
|
+
);
|
|
3894
|
+
} else {
|
|
3895
|
+
getLogger().error(`Cerebras API error: ${response.status} ${lastErrText}`);
|
|
3896
|
+
}
|
|
3897
|
+
return;
|
|
3898
|
+
}
|
|
3899
|
+
const advisoryMs = parseRateLimitResetMs(response.headers);
|
|
3900
|
+
const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
|
|
3901
|
+
const delayMs = Math.max(advisoryMs, exponentialMs);
|
|
3902
|
+
getLogger().warn(
|
|
3903
|
+
`Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
|
|
3904
|
+
);
|
|
3905
|
+
await new Promise((r) => setTimeout(r, delayMs));
|
|
3238
3906
|
}
|
|
3239
|
-
|
|
3907
|
+
throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
|
|
3240
3908
|
}
|
|
3241
3909
|
};
|
|
3242
3910
|
async function gzipEncode(data) {
|
|
@@ -3263,9 +3931,28 @@ async function gzipEncode(data) {
|
|
|
3263
3931
|
}
|
|
3264
3932
|
return out;
|
|
3265
3933
|
}
|
|
3934
|
+
function parseRateLimitResetMs(headers) {
|
|
3935
|
+
const candidates = [
|
|
3936
|
+
headers.get("x-ratelimit-reset-tokens-minute"),
|
|
3937
|
+
headers.get("x-ratelimit-reset-requests-minute"),
|
|
3938
|
+
// Some upstreams send the standard ``retry-after`` (seconds).
|
|
3939
|
+
headers.get("retry-after")
|
|
3940
|
+
];
|
|
3941
|
+
let bestMs = 0;
|
|
3942
|
+
for (const raw of candidates) {
|
|
3943
|
+
if (!raw) continue;
|
|
3944
|
+
const parsed = Number.parseFloat(raw);
|
|
3945
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
3946
|
+
const ms = parsed * 1e3;
|
|
3947
|
+
if (ms > bestMs) bestMs = ms;
|
|
3948
|
+
}
|
|
3949
|
+
}
|
|
3950
|
+
return bestMs;
|
|
3951
|
+
}
|
|
3266
3952
|
|
|
3267
3953
|
// src/llm/cerebras.ts
|
|
3268
3954
|
var LLM4 = class extends CerebrasLLMProvider {
|
|
3955
|
+
static providerKey = "cerebras";
|
|
3269
3956
|
constructor(opts = {}) {
|
|
3270
3957
|
const key = opts.apiKey ?? process.env.CEREBRAS_API_KEY;
|
|
3271
3958
|
if (!key) {
|
|
@@ -3277,7 +3964,17 @@ var LLM4 = class extends CerebrasLLMProvider {
|
|
|
3277
3964
|
apiKey: key,
|
|
3278
3965
|
model: opts.model,
|
|
3279
3966
|
baseUrl: opts.baseUrl,
|
|
3280
|
-
gzipCompression: opts.gzipCompression
|
|
3967
|
+
gzipCompression: opts.gzipCompression,
|
|
3968
|
+
temperature: opts.temperature,
|
|
3969
|
+
maxTokens: opts.maxTokens,
|
|
3970
|
+
responseFormat: opts.responseFormat,
|
|
3971
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
3972
|
+
toolChoice: opts.toolChoice,
|
|
3973
|
+
seed: opts.seed,
|
|
3974
|
+
topP: opts.topP,
|
|
3975
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
3976
|
+
presencePenalty: opts.presencePenalty,
|
|
3977
|
+
stop: opts.stop
|
|
3281
3978
|
});
|
|
3282
3979
|
}
|
|
3283
3980
|
};
|
|
@@ -3333,6 +4030,7 @@ var GoogleLLMProvider = class {
|
|
|
3333
4030
|
const decoder = new TextDecoder();
|
|
3334
4031
|
let buffer = "";
|
|
3335
4032
|
let nextIndex = 0;
|
|
4033
|
+
let lastUsage;
|
|
3336
4034
|
while (true) {
|
|
3337
4035
|
const { done, value } = await reader.read();
|
|
3338
4036
|
if (done) break;
|
|
@@ -3350,6 +4048,9 @@ var GoogleLLMProvider = class {
|
|
|
3350
4048
|
} catch {
|
|
3351
4049
|
continue;
|
|
3352
4050
|
}
|
|
4051
|
+
if (payload.usageMetadata) {
|
|
4052
|
+
lastUsage = payload.usageMetadata;
|
|
4053
|
+
}
|
|
3353
4054
|
const candidate = payload.candidates?.[0];
|
|
3354
4055
|
const parts = candidate?.content?.parts ?? [];
|
|
3355
4056
|
for (const part of parts) {
|
|
@@ -3372,6 +4073,14 @@ var GoogleLLMProvider = class {
|
|
|
3372
4073
|
}
|
|
3373
4074
|
}
|
|
3374
4075
|
}
|
|
4076
|
+
if (lastUsage) {
|
|
4077
|
+
yield {
|
|
4078
|
+
type: "usage",
|
|
4079
|
+
inputTokens: lastUsage.promptTokenCount,
|
|
4080
|
+
outputTokens: lastUsage.candidatesTokenCount,
|
|
4081
|
+
cacheReadInputTokens: lastUsage.cachedContentTokenCount ?? 0
|
|
4082
|
+
};
|
|
4083
|
+
}
|
|
3375
4084
|
yield { type: "done" };
|
|
3376
4085
|
}
|
|
3377
4086
|
};
|
|
@@ -3461,6 +4170,7 @@ function toGeminiContents(messages) {
|
|
|
3461
4170
|
|
|
3462
4171
|
// src/llm/google.ts
|
|
3463
4172
|
var LLM5 = class extends GoogleLLMProvider {
|
|
4173
|
+
static providerKey = "google";
|
|
3464
4174
|
constructor(opts = {}) {
|
|
3465
4175
|
const key = opts.apiKey ?? process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
|
|
3466
4176
|
if (!key) {
|
|
@@ -3478,6 +4188,279 @@ var LLM5 = class extends GoogleLLMProvider {
|
|
|
3478
4188
|
}
|
|
3479
4189
|
};
|
|
3480
4190
|
|
|
4191
|
+
// src/providers/silero-vad.ts
|
|
4192
|
+
import { createRequire } from "module";
|
|
4193
|
+
import * as fs from "fs";
|
|
4194
|
+
import * as path from "path";
|
|
4195
|
+
import { fileURLToPath } from "url";
|
|
4196
|
+
var SUPPORTED_SAMPLE_RATES = [8e3, 16e3];
|
|
4197
|
+
function resolveModuleDir() {
|
|
4198
|
+
try {
|
|
4199
|
+
const cjsDir = new Function("return typeof __dirname !== 'undefined' ? __dirname : null")();
|
|
4200
|
+
if (typeof cjsDir === "string") return cjsDir;
|
|
4201
|
+
} catch {
|
|
4202
|
+
}
|
|
4203
|
+
try {
|
|
4204
|
+
const url = import.meta.url;
|
|
4205
|
+
if (url) return path.dirname(fileURLToPath(url));
|
|
4206
|
+
} catch {
|
|
4207
|
+
}
|
|
4208
|
+
return process.cwd();
|
|
4209
|
+
}
|
|
4210
|
+
var MODULE_DIR = resolveModuleDir();
|
|
4211
|
+
function resolveDefaultModelPath() {
|
|
4212
|
+
const candidates = [
|
|
4213
|
+
path.join(MODULE_DIR, "resources", "silero_vad.onnx"),
|
|
4214
|
+
path.join(MODULE_DIR, "..", "resources", "silero_vad.onnx")
|
|
4215
|
+
];
|
|
4216
|
+
for (const c of candidates) if (fs.existsSync(c)) return c;
|
|
4217
|
+
return candidates[0];
|
|
4218
|
+
}
|
|
4219
|
+
var DEFAULT_MODEL_PATH = resolveDefaultModelPath();
|
|
4220
|
+
async function loadOnnxRuntime() {
|
|
4221
|
+
let firstErr;
|
|
4222
|
+
try {
|
|
4223
|
+
const mod = await import("./dist-YRCCJQ26.mjs");
|
|
4224
|
+
return mod;
|
|
4225
|
+
} catch (e) {
|
|
4226
|
+
firstErr = e;
|
|
4227
|
+
}
|
|
4228
|
+
try {
|
|
4229
|
+
const req = createRequire(path.join(process.cwd(), "package.json"));
|
|
4230
|
+
return req("onnxruntime-node");
|
|
4231
|
+
} catch (e) {
|
|
4232
|
+
const detail = e?.message ?? String(e);
|
|
4233
|
+
const original = firstErr?.message ?? String(firstErr);
|
|
4234
|
+
throw new Error(
|
|
4235
|
+
`
|
|
4236
|
+
SileroVAD requires the "onnxruntime-node" package, which could not be resolved.
|
|
4237
|
+
|
|
4238
|
+
Install: npm install onnxruntime-node
|
|
4239
|
+
|
|
4240
|
+
This is an optional peer dependency of getpatter (~210 MB) \u2014 it is only
|
|
4241
|
+
needed when you use SileroVAD in pipeline mode.
|
|
4242
|
+
|
|
4243
|
+
import() failed: ${original}
|
|
4244
|
+
cwd-require failed: ${detail}
|
|
4245
|
+
`
|
|
4246
|
+
);
|
|
4247
|
+
}
|
|
4248
|
+
}
|
|
4249
|
+
var ExpFilter = class {
|
|
4250
|
+
constructor(alpha) {
|
|
4251
|
+
this.alpha = alpha;
|
|
4252
|
+
if (!(alpha > 0 && alpha <= 1)) {
|
|
4253
|
+
throw new Error("alpha must be in (0, 1].");
|
|
4254
|
+
}
|
|
4255
|
+
}
|
|
4256
|
+
filtered = null;
|
|
4257
|
+
apply(exp, sample) {
|
|
4258
|
+
if (this.filtered === null) {
|
|
4259
|
+
this.filtered = sample;
|
|
4260
|
+
} else {
|
|
4261
|
+
const a = Math.pow(this.alpha, exp);
|
|
4262
|
+
this.filtered = a * this.filtered + (1 - a) * sample;
|
|
4263
|
+
}
|
|
4264
|
+
return this.filtered;
|
|
4265
|
+
}
|
|
4266
|
+
reset() {
|
|
4267
|
+
this.filtered = null;
|
|
4268
|
+
}
|
|
4269
|
+
};
|
|
4270
|
+
var OnnxModel = class {
|
|
4271
|
+
constructor(runtime, session, sampleRate) {
|
|
4272
|
+
this.runtime = runtime;
|
|
4273
|
+
this.session = session;
|
|
4274
|
+
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
4275
|
+
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
4276
|
+
}
|
|
4277
|
+
this.sampleRate = sampleRate;
|
|
4278
|
+
this.windowSizeSamples = sampleRate === 8e3 ? 256 : 512;
|
|
4279
|
+
this.contextSize = sampleRate === 8e3 ? 32 : 64;
|
|
4280
|
+
this.context = new Float32Array(this.contextSize);
|
|
4281
|
+
this.rnnState = new Float32Array(2 * 1 * 128);
|
|
4282
|
+
this.inputBuffer = new Float32Array(this.contextSize + this.windowSizeSamples);
|
|
4283
|
+
this.sampleRateTensor = BigInt64Array.from([BigInt(sampleRate)]);
|
|
4284
|
+
}
|
|
4285
|
+
sampleRate;
|
|
4286
|
+
windowSizeSamples;
|
|
4287
|
+
contextSize;
|
|
4288
|
+
context;
|
|
4289
|
+
rnnState;
|
|
4290
|
+
inputBuffer;
|
|
4291
|
+
sampleRateTensor;
|
|
4292
|
+
async run(window) {
|
|
4293
|
+
if (window.length !== this.windowSizeSamples) {
|
|
4294
|
+
throw new Error(
|
|
4295
|
+
`window must have exactly ${this.windowSizeSamples} samples, got ${window.length}`
|
|
4296
|
+
);
|
|
4297
|
+
}
|
|
4298
|
+
this.inputBuffer.set(this.context, 0);
|
|
4299
|
+
this.inputBuffer.set(window, this.contextSize);
|
|
4300
|
+
const { Tensor } = this.runtime;
|
|
4301
|
+
const feeds = {
|
|
4302
|
+
input: new Tensor("float32", this.inputBuffer, [1, this.inputBuffer.length]),
|
|
4303
|
+
state: new Tensor("float32", this.rnnState, [2, 1, 128]),
|
|
4304
|
+
sr: new Tensor("int64", this.sampleRateTensor, [])
|
|
4305
|
+
};
|
|
4306
|
+
const results = await this.session.run(feeds);
|
|
4307
|
+
const outputKey = Object.keys(results).find((k) => k !== "stateN") ?? "output";
|
|
4308
|
+
const stateKey = "stateN" in results ? "stateN" : Object.keys(results).find((k) => k !== outputKey);
|
|
4309
|
+
const out = results[outputKey];
|
|
4310
|
+
const newState = stateKey ? results[stateKey] : void 0;
|
|
4311
|
+
if (newState && newState.data instanceof Float32Array) {
|
|
4312
|
+
this.rnnState = Float32Array.from(newState.data);
|
|
4313
|
+
}
|
|
4314
|
+
this.context = this.inputBuffer.slice(-this.contextSize);
|
|
4315
|
+
const data = out.data;
|
|
4316
|
+
return data[0] ?? 0;
|
|
4317
|
+
}
|
|
4318
|
+
};
|
|
4319
|
+
var SileroVAD = class _SileroVAD {
|
|
4320
|
+
constructor(model, opts) {
|
|
4321
|
+
this.model = model;
|
|
4322
|
+
this.opts = opts;
|
|
4323
|
+
}
|
|
4324
|
+
pending = new Float32Array(0);
|
|
4325
|
+
expFilter = new ExpFilter(0.35);
|
|
4326
|
+
pubSpeaking = false;
|
|
4327
|
+
speechThresholdDuration = 0;
|
|
4328
|
+
silenceThresholdDuration = 0;
|
|
4329
|
+
closed = false;
|
|
4330
|
+
/**
|
|
4331
|
+
* Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
|
|
4332
|
+
* Throws if `onnxruntime-node` is not installed.
|
|
4333
|
+
*/
|
|
4334
|
+
static async load(options = {}) {
|
|
4335
|
+
const sampleRate = options.sampleRate ?? 16e3;
|
|
4336
|
+
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
4337
|
+
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
4338
|
+
}
|
|
4339
|
+
const activationThreshold = options.activationThreshold ?? 0.5;
|
|
4340
|
+
const deactivationThreshold = options.deactivationThreshold ?? Math.max(activationThreshold - 0.15, 0.01);
|
|
4341
|
+
if (deactivationThreshold <= 0) {
|
|
4342
|
+
throw new Error("deactivationThreshold must be greater than 0");
|
|
4343
|
+
}
|
|
4344
|
+
const runtime = await loadOnnxRuntime();
|
|
4345
|
+
const modelPath = options.onnxFilePath ?? DEFAULT_MODEL_PATH;
|
|
4346
|
+
const session = await runtime.InferenceSession.create(modelPath, {
|
|
4347
|
+
interOpNumThreads: 1,
|
|
4348
|
+
intraOpNumThreads: 1,
|
|
4349
|
+
executionMode: "sequential",
|
|
4350
|
+
executionProviders: options.forceCpu === false ? void 0 : ["cpu"]
|
|
4351
|
+
});
|
|
4352
|
+
const model = new OnnxModel(runtime, session, sampleRate);
|
|
4353
|
+
return new _SileroVAD(model, {
|
|
4354
|
+
minSpeechDuration: options.minSpeechDuration ?? 0.05,
|
|
4355
|
+
minSilenceDuration: options.minSilenceDuration ?? 0.55,
|
|
4356
|
+
prefixPaddingDuration: options.prefixPaddingDuration ?? 0.5,
|
|
4357
|
+
activationThreshold,
|
|
4358
|
+
deactivationThreshold,
|
|
4359
|
+
sampleRate
|
|
4360
|
+
});
|
|
4361
|
+
}
|
|
4362
|
+
/**
|
|
4363
|
+
* Internal factory used by tests — bypasses onnxruntime-node loading.
|
|
4364
|
+
* @internal
|
|
4365
|
+
*/
|
|
4366
|
+
static fromOnnxModel(runtime, session, options) {
|
|
4367
|
+
const model = new OnnxModel(runtime, session, options.sampleRate);
|
|
4368
|
+
return new _SileroVAD(model, options);
|
|
4369
|
+
}
|
|
4370
|
+
get sampleRate() {
|
|
4371
|
+
return this.opts.sampleRate;
|
|
4372
|
+
}
|
|
4373
|
+
/**
|
|
4374
|
+
* Number of int16 PCM samples that must be provided per call to
|
|
4375
|
+
* processFrame for the model to run one inference window.
|
|
4376
|
+
*
|
|
4377
|
+
* Constraint (ported from LiveKit Agents / Silero ONNX spec):
|
|
4378
|
+
* - 16 000 Hz → 512 samples (32 ms)
|
|
4379
|
+
* - 8 000 Hz → 256 samples (32 ms)
|
|
4380
|
+
*
|
|
4381
|
+
* Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
|
|
4382
|
+
* should buffer incoming audio until at least numFramesRequired() int16
|
|
4383
|
+
* samples are available before calling processFrame. The provider
|
|
4384
|
+
* internally buffers partial windows so smaller chunks are also safe, but
|
|
4385
|
+
* passing exactly one window per call minimises heap allocation.
|
|
4386
|
+
*/
|
|
4387
|
+
numFramesRequired() {
|
|
4388
|
+
return this.opts.sampleRate === 8e3 ? 256 : 512;
|
|
4389
|
+
}
|
|
4390
|
+
async processFrame(pcmChunk, sampleRate) {
|
|
4391
|
+
if (this.closed) {
|
|
4392
|
+
throw new Error("SileroVAD is closed");
|
|
4393
|
+
}
|
|
4394
|
+
if (sampleRate !== this.opts.sampleRate) {
|
|
4395
|
+
throw new Error(
|
|
4396
|
+
`input sampleRate ${sampleRate} does not match model sampleRate ${this.opts.sampleRate}; resampling is not implemented in the Patter port`
|
|
4397
|
+
);
|
|
4398
|
+
}
|
|
4399
|
+
if (pcmChunk.length === 0) {
|
|
4400
|
+
return null;
|
|
4401
|
+
}
|
|
4402
|
+
const numSamples = Math.floor(pcmChunk.length / 2);
|
|
4403
|
+
if (numSamples === 0) {
|
|
4404
|
+
return null;
|
|
4405
|
+
}
|
|
4406
|
+
const samples = new Float32Array(numSamples);
|
|
4407
|
+
for (let i = 0; i < numSamples; i++) {
|
|
4408
|
+
samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
|
|
4409
|
+
}
|
|
4410
|
+
const merged = new Float32Array(this.pending.length + samples.length);
|
|
4411
|
+
merged.set(this.pending, 0);
|
|
4412
|
+
merged.set(samples, this.pending.length);
|
|
4413
|
+
this.pending = merged;
|
|
4414
|
+
const windowSize = this.model.windowSizeSamples;
|
|
4415
|
+
let event = null;
|
|
4416
|
+
while (this.pending.length >= windowSize) {
|
|
4417
|
+
const window = this.pending.slice(0, windowSize);
|
|
4418
|
+
this.pending = this.pending.slice(windowSize);
|
|
4419
|
+
const rawP = await this.model.run(window);
|
|
4420
|
+
const p = this.expFilter.apply(1, rawP);
|
|
4421
|
+
const windowDuration = windowSize / this.opts.sampleRate;
|
|
4422
|
+
const transition = this.advanceState(p, windowDuration);
|
|
4423
|
+
if (transition !== null) {
|
|
4424
|
+
event = transition;
|
|
4425
|
+
}
|
|
4426
|
+
}
|
|
4427
|
+
return event;
|
|
4428
|
+
}
|
|
4429
|
+
advanceState(p, windowDuration) {
|
|
4430
|
+
const opts = this.opts;
|
|
4431
|
+
if (p >= opts.activationThreshold || this.pubSpeaking && p > opts.deactivationThreshold) {
|
|
4432
|
+
this.speechThresholdDuration += windowDuration;
|
|
4433
|
+
this.silenceThresholdDuration = 0;
|
|
4434
|
+
if (!this.pubSpeaking) {
|
|
4435
|
+
if (this.speechThresholdDuration >= opts.minSpeechDuration) {
|
|
4436
|
+
this.pubSpeaking = true;
|
|
4437
|
+
return {
|
|
4438
|
+
type: "speech_start",
|
|
4439
|
+
confidence: p,
|
|
4440
|
+
durationMs: this.speechThresholdDuration * 1e3
|
|
4441
|
+
};
|
|
4442
|
+
}
|
|
4443
|
+
}
|
|
4444
|
+
} else {
|
|
4445
|
+
this.silenceThresholdDuration += windowDuration;
|
|
4446
|
+
this.speechThresholdDuration = 0;
|
|
4447
|
+
if (this.pubSpeaking && this.silenceThresholdDuration >= opts.minSilenceDuration) {
|
|
4448
|
+
this.pubSpeaking = false;
|
|
4449
|
+
return {
|
|
4450
|
+
type: "speech_end",
|
|
4451
|
+
confidence: p,
|
|
4452
|
+
durationMs: this.silenceThresholdDuration * 1e3
|
|
4453
|
+
};
|
|
4454
|
+
}
|
|
4455
|
+
}
|
|
4456
|
+
return null;
|
|
4457
|
+
}
|
|
4458
|
+
async close() {
|
|
4459
|
+
if (this.closed) return;
|
|
4460
|
+
this.closed = true;
|
|
4461
|
+
}
|
|
4462
|
+
};
|
|
4463
|
+
|
|
3481
4464
|
// src/carriers/twilio.ts
|
|
3482
4465
|
var Carrier = class {
|
|
3483
4466
|
kind = "twilio";
|
|
@@ -3826,7 +4809,7 @@ var DebouncedCall = class {
|
|
|
3826
4809
|
this.timer = setTimeout(() => {
|
|
3827
4810
|
this.timer = null;
|
|
3828
4811
|
Promise.resolve(this.callback()).catch((err) => {
|
|
3829
|
-
|
|
4812
|
+
getLogger().error("IVR silence callback raised:", err);
|
|
3830
4813
|
});
|
|
3831
4814
|
}, this.delayMs);
|
|
3832
4815
|
}
|
|
@@ -3882,7 +4865,7 @@ var IVRActivity = class {
|
|
|
3882
4865
|
try {
|
|
3883
4866
|
await this.onLoopDetected();
|
|
3884
4867
|
} catch (err) {
|
|
3885
|
-
|
|
4868
|
+
getLogger().error("IVR onLoopDetected callback raised:", err);
|
|
3886
4869
|
}
|
|
3887
4870
|
}
|
|
3888
4871
|
}
|
|
@@ -3920,7 +4903,7 @@ var IVRActivity = class {
|
|
|
3920
4903
|
try {
|
|
3921
4904
|
await this.onSilence();
|
|
3922
4905
|
} catch (err) {
|
|
3923
|
-
|
|
4906
|
+
getLogger().error("IVR onSilence callback raised:", err);
|
|
3924
4907
|
}
|
|
3925
4908
|
}
|
|
3926
4909
|
}
|
|
@@ -3970,9 +4953,9 @@ var IVRActivity = class {
|
|
|
3970
4953
|
};
|
|
3971
4954
|
|
|
3972
4955
|
// src/services/background-audio.ts
|
|
3973
|
-
import { promises as
|
|
3974
|
-
import
|
|
3975
|
-
import { fileURLToPath } from "url";
|
|
4956
|
+
import { promises as fs2 } from "fs";
|
|
4957
|
+
import path2 from "path";
|
|
4958
|
+
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
3976
4959
|
var BuiltinAudioClip = {
|
|
3977
4960
|
CITY_AMBIENCE: "city-ambience.ogg",
|
|
3978
4961
|
FOREST_AMBIENCE: "forest-ambience.ogg",
|
|
@@ -3984,8 +4967,8 @@ var BuiltinAudioClip = {
|
|
|
3984
4967
|
};
|
|
3985
4968
|
function builtinClipPath(clip) {
|
|
3986
4969
|
const meta = typeof import.meta !== "undefined" ? import.meta : void 0;
|
|
3987
|
-
const here = meta?.url ?
|
|
3988
|
-
return
|
|
4970
|
+
const here = meta?.url ? path2.dirname(fileURLToPath2(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
|
|
4971
|
+
return path2.resolve(here, "..", "resources", "audio", clip);
|
|
3989
4972
|
}
|
|
3990
4973
|
var INT16_MIN = -32768;
|
|
3991
4974
|
var INT16_MAX = 32767;
|
|
@@ -4154,7 +5137,7 @@ var BackgroundAudioPlayer = class {
|
|
|
4154
5137
|
return source.decode(source.path);
|
|
4155
5138
|
case "builtin": {
|
|
4156
5139
|
const p = builtinClipPath(source.clip);
|
|
4157
|
-
const header = await
|
|
5140
|
+
const header = await fs2.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
|
|
4158
5141
|
if (header.toString("ascii") !== "OggS") {
|
|
4159
5142
|
throw new Error(`Bundled clip ${source.clip} is not a valid Ogg file`);
|
|
4160
5143
|
}
|
|
@@ -4184,15 +5167,264 @@ var BackgroundAudioPlayer = class {
|
|
|
4184
5167
|
function isAudioConfig(value) {
|
|
4185
5168
|
return typeof value === "object" && value !== null && "source" in value && typeof value.source === "object";
|
|
4186
5169
|
}
|
|
5170
|
+
|
|
5171
|
+
// src/providers/twilio-adapter.ts
|
|
5172
|
+
var TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
|
|
5173
|
+
var TwilioAdapter = class _TwilioAdapter {
|
|
5174
|
+
accountSid;
|
|
5175
|
+
region;
|
|
5176
|
+
baseUrl;
|
|
5177
|
+
authHeader;
|
|
5178
|
+
constructor(accountSid, authToken, opts = {}) {
|
|
5179
|
+
if (!accountSid) throw new Error("TwilioAdapter: accountSid is required");
|
|
5180
|
+
if (!authToken) throw new Error("TwilioAdapter: authToken is required");
|
|
5181
|
+
this.accountSid = accountSid;
|
|
5182
|
+
this.region = opts.region;
|
|
5183
|
+
this.baseUrl = opts.region ? `https://api.${opts.region}.twilio.com/2010-04-01` : TWILIO_API_BASE;
|
|
5184
|
+
this.authHeader = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
|
|
5185
|
+
}
|
|
5186
|
+
async request(method, path3, body) {
|
|
5187
|
+
const url = `${this.baseUrl}/Accounts/${encodeURIComponent(this.accountSid)}${path3}`;
|
|
5188
|
+
const headers = { Authorization: this.authHeader };
|
|
5189
|
+
if (body) headers["Content-Type"] = "application/x-www-form-urlencoded";
|
|
5190
|
+
const response = await fetch(url, {
|
|
5191
|
+
method,
|
|
5192
|
+
headers,
|
|
5193
|
+
body: body ? body.toString() : void 0,
|
|
5194
|
+
signal: AbortSignal.timeout(3e4)
|
|
5195
|
+
});
|
|
5196
|
+
const text = await response.text();
|
|
5197
|
+
if (!response.ok) {
|
|
5198
|
+
throw new Error(`Twilio ${method} ${path3} failed: ${response.status} ${text}`);
|
|
5199
|
+
}
|
|
5200
|
+
if (!text) return {};
|
|
5201
|
+
try {
|
|
5202
|
+
return JSON.parse(text);
|
|
5203
|
+
} catch (e) {
|
|
5204
|
+
throw new Error(`Twilio returned non-JSON response: ${String(e)}`);
|
|
5205
|
+
}
|
|
5206
|
+
}
|
|
5207
|
+
/**
|
|
5208
|
+
* Provision a local phone number in the given country.
|
|
5209
|
+
*
|
|
5210
|
+
* Lists available local numbers, then purchases the first match.
|
|
5211
|
+
*/
|
|
5212
|
+
async provisionNumber(opts) {
|
|
5213
|
+
const country = encodeURIComponent(opts.countryCode);
|
|
5214
|
+
const queryParts = ["PageSize=1"];
|
|
5215
|
+
if (opts.areaCode) queryParts.push(`AreaCode=${encodeURIComponent(opts.areaCode)}`);
|
|
5216
|
+
const path3 = `/AvailablePhoneNumbers/${country}/Local.json?${queryParts.join("&")}`;
|
|
5217
|
+
const available = await this.request("GET", path3);
|
|
5218
|
+
const first = available.available_phone_numbers?.[0]?.phone_number;
|
|
5219
|
+
if (!first) {
|
|
5220
|
+
throw new Error(`TwilioAdapter: no numbers available for country ${opts.countryCode}`);
|
|
5221
|
+
}
|
|
5222
|
+
const body = new URLSearchParams({ PhoneNumber: first });
|
|
5223
|
+
const purchased = await this.request(
|
|
5224
|
+
"POST",
|
|
5225
|
+
"/IncomingPhoneNumbers.json",
|
|
5226
|
+
body
|
|
5227
|
+
);
|
|
5228
|
+
if (!purchased.sid || !purchased.phone_number) {
|
|
5229
|
+
throw new Error("TwilioAdapter: malformed response from IncomingPhoneNumbers.create");
|
|
5230
|
+
}
|
|
5231
|
+
return { phoneNumber: purchased.phone_number, sid: purchased.sid };
|
|
5232
|
+
}
|
|
5233
|
+
/** Update an already-purchased number to point at our voice webhook. */
|
|
5234
|
+
async configureNumber(phoneNumberSid, opts) {
|
|
5235
|
+
if (!phoneNumberSid) throw new Error("TwilioAdapter: phoneNumberSid is required");
|
|
5236
|
+
const body = new URLSearchParams({
|
|
5237
|
+
VoiceUrl: opts.voiceUrl,
|
|
5238
|
+
VoiceMethod: "POST"
|
|
5239
|
+
});
|
|
5240
|
+
if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
|
|
5241
|
+
await this.request(
|
|
5242
|
+
"POST",
|
|
5243
|
+
`/IncomingPhoneNumbers/${encodeURIComponent(phoneNumberSid)}.json`,
|
|
5244
|
+
body
|
|
5245
|
+
);
|
|
5246
|
+
}
|
|
5247
|
+
/** Place an outbound call. Returns the Twilio call SID. */
|
|
5248
|
+
async initiateCall(opts) {
|
|
5249
|
+
if (!opts.url && !opts.streamUrl) {
|
|
5250
|
+
throw new Error("TwilioAdapter: initiateCall requires either url or streamUrl");
|
|
5251
|
+
}
|
|
5252
|
+
const body = new URLSearchParams({
|
|
5253
|
+
From: opts.from,
|
|
5254
|
+
To: opts.to
|
|
5255
|
+
});
|
|
5256
|
+
if (opts.url) {
|
|
5257
|
+
body.set("Url", opts.url);
|
|
5258
|
+
} else if (opts.streamUrl) {
|
|
5259
|
+
body.set("Twiml", _TwilioAdapter.generateStreamTwiml(opts.streamUrl));
|
|
5260
|
+
}
|
|
5261
|
+
if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
|
|
5262
|
+
if (opts.machineDetection) body.set("MachineDetection", opts.machineDetection);
|
|
5263
|
+
if (opts.extraParams) {
|
|
5264
|
+
for (const [key, value] of Object.entries(opts.extraParams)) {
|
|
5265
|
+
body.set(key, value);
|
|
5266
|
+
}
|
|
5267
|
+
}
|
|
5268
|
+
const call = await this.request("POST", "/Calls.json", body);
|
|
5269
|
+
if (!call.sid) {
|
|
5270
|
+
throw new Error("TwilioAdapter: Calls.create returned no SID");
|
|
5271
|
+
}
|
|
5272
|
+
return { callSid: call.sid };
|
|
5273
|
+
}
|
|
5274
|
+
/**
|
|
5275
|
+
* Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
|
|
5276
|
+
* TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
|
|
5277
|
+
*/
|
|
5278
|
+
static generateStreamTwiml(streamUrl) {
|
|
5279
|
+
const escaped = streamUrl.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
5280
|
+
return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escaped}"/></Connect></Response>`;
|
|
5281
|
+
}
|
|
5282
|
+
/** Force-complete an in-progress call. */
|
|
5283
|
+
async endCall(callSid) {
|
|
5284
|
+
if (!callSid) throw new Error("TwilioAdapter: callSid is required");
|
|
5285
|
+
const body = new URLSearchParams({ Status: "completed" });
|
|
5286
|
+
try {
|
|
5287
|
+
await this.request(
|
|
5288
|
+
"POST",
|
|
5289
|
+
`/Calls/${encodeURIComponent(callSid)}.json`,
|
|
5290
|
+
body
|
|
5291
|
+
);
|
|
5292
|
+
} catch (err) {
|
|
5293
|
+
getLogger().warn(`[TwilioAdapter] endCall failed for ${callSid}: ${String(err)}`);
|
|
5294
|
+
throw err;
|
|
5295
|
+
}
|
|
5296
|
+
}
|
|
5297
|
+
};
|
|
5298
|
+
|
|
5299
|
+
// src/providers/telnyx-adapter.ts
|
|
5300
|
+
import { randomUUID as randomUUID2 } from "crypto";
|
|
5301
|
+
var TELNYX_API_BASE = "https://api.telnyx.com/v2";
|
|
5302
|
+
var TelnyxAdapter = class {
|
|
5303
|
+
apiKey;
|
|
5304
|
+
connectionId;
|
|
5305
|
+
baseUrl = TELNYX_API_BASE;
|
|
5306
|
+
constructor(apiKey, connectionId) {
|
|
5307
|
+
if (!apiKey) throw new Error("TelnyxAdapter: apiKey is required");
|
|
5308
|
+
this.apiKey = apiKey;
|
|
5309
|
+
this.connectionId = connectionId;
|
|
5310
|
+
}
|
|
5311
|
+
async request(method, path3, body) {
|
|
5312
|
+
const url = `${this.baseUrl}${path3}`;
|
|
5313
|
+
const headers = {
|
|
5314
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
5315
|
+
};
|
|
5316
|
+
if (body !== void 0) headers["Content-Type"] = "application/json";
|
|
5317
|
+
const response = await fetch(url, {
|
|
5318
|
+
method,
|
|
5319
|
+
headers,
|
|
5320
|
+
body: body !== void 0 ? JSON.stringify(body) : void 0,
|
|
5321
|
+
signal: AbortSignal.timeout(3e4)
|
|
5322
|
+
});
|
|
5323
|
+
const text = await response.text();
|
|
5324
|
+
if (!response.ok) {
|
|
5325
|
+
throw new Error(`Telnyx ${method} ${path3} failed: ${response.status} ${text}`);
|
|
5326
|
+
}
|
|
5327
|
+
if (!text) return {};
|
|
5328
|
+
try {
|
|
5329
|
+
return JSON.parse(text);
|
|
5330
|
+
} catch (e) {
|
|
5331
|
+
throw new Error(`Telnyx returned non-JSON response: ${String(e)}`);
|
|
5332
|
+
}
|
|
5333
|
+
}
|
|
5334
|
+
/**
|
|
5335
|
+
* Search available numbers for ``countryCode`` and place an order for the
|
|
5336
|
+
* first match. Returns both the reserved E.164 number and the order ID.
|
|
5337
|
+
*/
|
|
5338
|
+
async provisionNumber(opts) {
|
|
5339
|
+
const country = encodeURIComponent(opts.countryCode);
|
|
5340
|
+
const searchPath = `/available_phone_numbers?filter[phone_number][country_code]=${country}&filter[limit]=1`;
|
|
5341
|
+
const available = await this.request("GET", searchPath);
|
|
5342
|
+
const chosen = available.data?.[0]?.phone_number;
|
|
5343
|
+
if (!chosen) {
|
|
5344
|
+
throw new Error(`TelnyxAdapter: no numbers available for ${opts.countryCode}`);
|
|
5345
|
+
}
|
|
5346
|
+
const orderBody = {
|
|
5347
|
+
phone_numbers: [{ phone_number: chosen }]
|
|
5348
|
+
};
|
|
5349
|
+
if (this.connectionId) {
|
|
5350
|
+
orderBody.connection_id = this.connectionId;
|
|
5351
|
+
}
|
|
5352
|
+
const order = await this.request(
|
|
5353
|
+
"POST",
|
|
5354
|
+
"/number_orders",
|
|
5355
|
+
orderBody
|
|
5356
|
+
);
|
|
5357
|
+
const orderId = order.data?.id ?? "";
|
|
5358
|
+
return { phoneNumber: chosen, orderId };
|
|
5359
|
+
}
|
|
5360
|
+
/** Attach a number to a Call Control Application. */
|
|
5361
|
+
async configureNumber(phoneNumber, opts) {
|
|
5362
|
+
if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
|
|
5363
|
+
if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
|
|
5364
|
+
await this.request(
|
|
5365
|
+
"PATCH",
|
|
5366
|
+
`/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
|
|
5367
|
+
{ connection_id: opts.connectionId, tech_prefix_enabled: false }
|
|
5368
|
+
);
|
|
5369
|
+
}
|
|
5370
|
+
/**
|
|
5371
|
+
* Place an outbound call on the Call Control Application.
|
|
5372
|
+
*
|
|
5373
|
+
* Note: we intentionally do NOT pass ``stream_url`` here — audio streaming
|
|
5374
|
+
* is configured on the Application itself (or started explicitly via a
|
|
5375
|
+
* ``streaming_start`` command). Passing ``stream_url`` on dial is a
|
|
5376
|
+
* deprecated code path that Telnyx rejects in newer API versions.
|
|
5377
|
+
*/
|
|
5378
|
+
async initiateCall(opts) {
|
|
5379
|
+
const connectionId = opts.connectionId ?? this.connectionId;
|
|
5380
|
+
if (!connectionId) {
|
|
5381
|
+
throw new Error("TelnyxAdapter: connectionId must be provided to initiateCall");
|
|
5382
|
+
}
|
|
5383
|
+
const payload = {
|
|
5384
|
+
connection_id: connectionId,
|
|
5385
|
+
from: opts.from,
|
|
5386
|
+
to: opts.to
|
|
5387
|
+
};
|
|
5388
|
+
if (opts.clientState) {
|
|
5389
|
+
payload.client_state = Buffer.from(opts.clientState, "utf-8").toString("base64");
|
|
5390
|
+
}
|
|
5391
|
+
const resp = await this.request("POST", "/calls", payload);
|
|
5392
|
+
const callControlId = resp.data?.call_control_id;
|
|
5393
|
+
if (!callControlId) {
|
|
5394
|
+
throw new Error("TelnyxAdapter: /calls returned no call_control_id");
|
|
5395
|
+
}
|
|
5396
|
+
return { callControlId };
|
|
5397
|
+
}
|
|
5398
|
+
/** Hang up an in-progress call. */
|
|
5399
|
+
async endCall(callControlId, opts = {}) {
|
|
5400
|
+
if (!callControlId) throw new Error("TelnyxAdapter: callControlId is required");
|
|
5401
|
+
const encoded = encodeURIComponent(callControlId);
|
|
5402
|
+
const body = {
|
|
5403
|
+
command_id: opts.commandId ?? randomUUID2()
|
|
5404
|
+
};
|
|
5405
|
+
try {
|
|
5406
|
+
await this.request(
|
|
5407
|
+
"POST",
|
|
5408
|
+
`/calls/${encoded}/actions/hangup`,
|
|
5409
|
+
body
|
|
5410
|
+
);
|
|
5411
|
+
} catch (err) {
|
|
5412
|
+
getLogger().warn(
|
|
5413
|
+
`[TelnyxAdapter] endCall failed for ${callControlId}: ${String(err)}`
|
|
5414
|
+
);
|
|
5415
|
+
throw err;
|
|
5416
|
+
}
|
|
5417
|
+
}
|
|
5418
|
+
};
|
|
4187
5419
|
export {
|
|
4188
5420
|
AllProvidersFailedError,
|
|
4189
5421
|
LLM2 as AnthropicLLM,
|
|
4190
|
-
|
|
5422
|
+
STT6 as AssemblyAISTT,
|
|
4191
5423
|
AuthenticationError,
|
|
4192
5424
|
BackgroundAudioPlayer,
|
|
4193
5425
|
BuiltinAudioClip,
|
|
4194
5426
|
CallMetricsAccumulator,
|
|
4195
|
-
|
|
5427
|
+
STT4 as CartesiaSTT,
|
|
4196
5428
|
TTS3 as CartesiaTTS,
|
|
4197
5429
|
LLM4 as CerebrasLLM,
|
|
4198
5430
|
ChatContext,
|
|
@@ -4201,9 +5433,11 @@ export {
|
|
|
4201
5433
|
DEFAULT_PRICING,
|
|
4202
5434
|
DTMF_EVENTS,
|
|
4203
5435
|
STT as DeepgramSTT,
|
|
5436
|
+
DefaultToolExecutor,
|
|
4204
5437
|
ConvAI as ElevenLabsConvAI,
|
|
4205
5438
|
ElevenLabsConvAIAdapter,
|
|
4206
5439
|
TTS as ElevenLabsTTS,
|
|
5440
|
+
EventBus,
|
|
4207
5441
|
FallbackLLMProvider,
|
|
4208
5442
|
GEMINI_DEFAULT_INPUT_SR,
|
|
4209
5443
|
GEMINI_DEFAULT_OUTPUT_SR,
|
|
@@ -4215,31 +5449,48 @@ export {
|
|
|
4215
5449
|
LLMLoop,
|
|
4216
5450
|
TTS5 as LMNTTTS,
|
|
4217
5451
|
MetricsStore,
|
|
5452
|
+
Ngrok,
|
|
4218
5453
|
LLM as OpenAILLM,
|
|
4219
5454
|
OpenAILLMProvider,
|
|
4220
5455
|
Realtime as OpenAIRealtime,
|
|
4221
5456
|
OpenAIRealtimeAdapter,
|
|
4222
5457
|
TTS2 as OpenAITTS,
|
|
5458
|
+
STT3 as OpenAITranscribeSTT,
|
|
4223
5459
|
PartialStreamError,
|
|
4224
5460
|
Patter,
|
|
4225
5461
|
PatterConnectionError,
|
|
4226
5462
|
PatterError,
|
|
5463
|
+
PatterTool,
|
|
5464
|
+
PcmCarry,
|
|
4227
5465
|
PipelineHookExecutor,
|
|
4228
5466
|
ProvisionError,
|
|
5467
|
+
RateLimitError,
|
|
4229
5468
|
RemoteMessageHandler,
|
|
4230
5469
|
TTS4 as RimeTTS,
|
|
5470
|
+
SPAN_BARGEIN,
|
|
5471
|
+
SPAN_CALL,
|
|
5472
|
+
SPAN_ENDPOINT,
|
|
5473
|
+
SPAN_LLM,
|
|
5474
|
+
SPAN_STT,
|
|
5475
|
+
SPAN_TOOL,
|
|
5476
|
+
SPAN_TTS,
|
|
4231
5477
|
SentenceChunker,
|
|
4232
|
-
|
|
5478
|
+
SileroVAD,
|
|
5479
|
+
STT5 as SonioxSTT,
|
|
5480
|
+
StatefulResampler,
|
|
4233
5481
|
Static as StaticTunnel,
|
|
4234
5482
|
Carrier2 as Telnyx,
|
|
5483
|
+
TelnyxAdapter,
|
|
4235
5484
|
TestSession,
|
|
4236
5485
|
TfidfLoopDetector,
|
|
4237
5486
|
Tool,
|
|
4238
5487
|
Carrier as Twilio,
|
|
5488
|
+
TwilioAdapter,
|
|
4239
5489
|
ULTRAVOX_DEFAULT_API_BASE,
|
|
4240
5490
|
ULTRAVOX_DEFAULT_SR,
|
|
4241
5491
|
UltravoxRealtimeAdapter,
|
|
4242
5492
|
STT2 as WhisperSTT,
|
|
5493
|
+
assemblyai,
|
|
4243
5494
|
builtinClipPath,
|
|
4244
5495
|
calculateRealtimeCost,
|
|
4245
5496
|
calculateSttCost,
|
|
@@ -4247,6 +5498,10 @@ export {
|
|
|
4247
5498
|
calculateTtsCost,
|
|
4248
5499
|
callsToCsv,
|
|
4249
5500
|
callsToJson,
|
|
5501
|
+
cartesia,
|
|
5502
|
+
createResampler16kTo8k,
|
|
5503
|
+
createResampler24kTo16k,
|
|
5504
|
+
createResampler8kTo16k,
|
|
4250
5505
|
deepgram,
|
|
4251
5506
|
defineTool,
|
|
4252
5507
|
elevenlabs,
|
|
@@ -4254,10 +5509,14 @@ export {
|
|
|
4254
5509
|
filterForTTS,
|
|
4255
5510
|
filterMarkdown,
|
|
4256
5511
|
formatDtmf,
|
|
5512
|
+
geminiLive,
|
|
4257
5513
|
getLogger,
|
|
4258
5514
|
guardrail,
|
|
5515
|
+
initTracing,
|
|
4259
5516
|
isRemoteUrl,
|
|
5517
|
+
isTracingEnabled,
|
|
4260
5518
|
isWebSocketUrl,
|
|
5519
|
+
lmnt,
|
|
4261
5520
|
makeAuthMiddleware,
|
|
4262
5521
|
mergePricing,
|
|
4263
5522
|
mixPcm,
|
|
@@ -4271,12 +5530,17 @@ export {
|
|
|
4271
5530
|
resample24kTo16k,
|
|
4272
5531
|
resample8kTo16k,
|
|
4273
5532
|
resamplePcm,
|
|
5533
|
+
rime,
|
|
4274
5534
|
scheduleCron,
|
|
4275
5535
|
scheduleInterval,
|
|
4276
5536
|
scheduleOnce,
|
|
4277
5537
|
selectSoundFromList,
|
|
4278
5538
|
setLogger,
|
|
5539
|
+
soniox,
|
|
5540
|
+
speechmatics,
|
|
5541
|
+
startSpan,
|
|
4279
5542
|
startTunnel,
|
|
4280
5543
|
tool,
|
|
5544
|
+
ultravox,
|
|
4281
5545
|
whisper
|
|
4282
5546
|
};
|