@dtelecom/agents-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,613 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/providers/index.ts
31
+ var providers_exports = {};
32
+ __export(providers_exports, {
33
+ CartesiaTTS: () => CartesiaTTS,
34
+ DeepgramSTT: () => DeepgramSTT,
35
+ OpenRouterLLM: () => OpenRouterLLM
36
+ });
37
+ module.exports = __toCommonJS(providers_exports);
38
+
39
+ // src/providers/deepgram-stt.ts
40
+ var import_ws = __toESM(require("ws"));
41
+
42
+ // src/core/base-stt-stream.ts
43
+ var import_events = require("events");
44
+ var BaseSTTStream = class extends import_events.EventEmitter {
45
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
46
+ on(event, cb) {
47
+ return super.on(event, cb);
48
+ }
49
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
50
+ emit(event, ...args) {
51
+ return super.emit(event, ...args);
52
+ }
53
+ };
54
+
55
+ // src/utils/logger.ts
56
+ var LEVELS = {
57
+ debug: 0,
58
+ info: 1,
59
+ warn: 2,
60
+ error: 3,
61
+ silent: 4
62
+ };
63
+ function detectLevel() {
64
+ const debug = typeof process !== "undefined" && process.env?.DEBUG;
65
+ if (debug && (debug === "*" || debug.includes("@dtelecom/agents"))) {
66
+ return "debug";
67
+ }
68
+ return "info";
69
+ }
70
+ var globalLevel = detectLevel();
71
+ function timestamp() {
72
+ const d = /* @__PURE__ */ new Date();
73
+ const h = String(d.getHours()).padStart(2, "0");
74
+ const m = String(d.getMinutes()).padStart(2, "0");
75
+ const s = String(d.getSeconds()).padStart(2, "0");
76
+ const ms = String(d.getMilliseconds()).padStart(3, "0");
77
+ return `${h}:${m}:${s}.${ms}`;
78
+ }
79
+ function createLogger(tag) {
80
+ const prefix = `[@dtelecom/agents:${tag}]`;
81
+ return {
82
+ debug(...args) {
83
+ if (LEVELS[globalLevel] <= LEVELS.debug) console.debug(timestamp(), prefix, ...args);
84
+ },
85
+ info(...args) {
86
+ if (LEVELS[globalLevel] <= LEVELS.info) console.info(timestamp(), prefix, ...args);
87
+ },
88
+ warn(...args) {
89
+ if (LEVELS[globalLevel] <= LEVELS.warn) console.warn(timestamp(), prefix, ...args);
90
+ },
91
+ error(...args) {
92
+ if (LEVELS[globalLevel] <= LEVELS.error) console.error(timestamp(), prefix, ...args);
93
+ }
94
+ };
95
+ }
96
+
97
+ // src/providers/deepgram-stt.ts
98
+ var log = createLogger("DeepgramSTT");
99
+ var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
100
+ var KEEPALIVE_INTERVAL_MS = 5e3;
101
+ var DeepgramSTT = class {
102
+ options;
103
+ constructor(options) {
104
+ if (!options.apiKey) {
105
+ throw new Error("DeepgramSTT requires an apiKey");
106
+ }
107
+ this.options = options;
108
+ }
109
+ createStream(options) {
110
+ const language = options?.language ?? this.options.language ?? "en";
111
+ return new DeepgramSTTStream(this.options, language);
112
+ }
113
+ };
114
+ var DeepgramSTTStream = class extends BaseSTTStream {
115
+ ws = null;
116
+ apiKey;
117
+ wsUrl;
118
+ _ready = false;
119
+ _closed = false;
120
+ pendingAudio = [];
121
+ keepAliveTimer = null;
122
+ lastAudioSentAt = 0;
123
+ /** Buffer of is_final=true transcripts for the current utterance */
124
+ utteranceBuffer = [];
125
+ /** Timestamp of the last non-empty interim result (approximates end of speech) */
126
+ lastInterimAt = 0;
127
+ constructor(options, language) {
128
+ super();
129
+ this.apiKey = options.apiKey;
130
+ this.wsUrl = buildWsUrl(options, language);
131
+ this.connect();
132
+ }
133
+ sendAudio(pcm16) {
134
+ if (this._closed) return;
135
+ if (!this._ready) {
136
+ this.pendingAudio.push(pcm16);
137
+ return;
138
+ }
139
+ if (this.ws?.readyState === import_ws.default.OPEN) {
140
+ this.ws.send(pcm16);
141
+ this.lastAudioSentAt = performance.now();
142
+ }
143
+ }
144
+ async close() {
145
+ if (this._closed) return;
146
+ this._closed = true;
147
+ this._ready = false;
148
+ this.pendingAudio = [];
149
+ this.stopKeepAlive();
150
+ if (this.ws?.readyState === import_ws.default.OPEN) {
151
+ try {
152
+ this.ws.send(JSON.stringify({ type: "CloseStream" }));
153
+ } catch {
154
+ }
155
+ }
156
+ if (this.ws) {
157
+ this.ws.close();
158
+ this.ws = null;
159
+ }
160
+ log.debug("DeepgramSTT stream closed");
161
+ }
162
+ connect() {
163
+ log.debug(`Connecting to Deepgram: ${this.wsUrl.replace(/token=[^&]+/, "token=***")}`);
164
+ this.ws = new import_ws.default(this.wsUrl, {
165
+ headers: {
166
+ Authorization: `Token ${this.apiKey}`
167
+ }
168
+ });
169
+ this.ws.on("open", () => {
170
+ log.info("Deepgram WebSocket connected");
171
+ this._ready = true;
172
+ for (const buf of this.pendingAudio) {
173
+ if (this.ws?.readyState === import_ws.default.OPEN) {
174
+ this.ws.send(buf);
175
+ }
176
+ }
177
+ this.pendingAudio = [];
178
+ this.startKeepAlive();
179
+ });
180
+ this.ws.on("message", (data) => {
181
+ try {
182
+ const msg = JSON.parse(data.toString());
183
+ this.handleMessage(msg);
184
+ } catch (err) {
185
+ log.error("Failed to parse Deepgram message:", err);
186
+ }
187
+ });
188
+ this.ws.on("error", (err) => {
189
+ log.error("Deepgram WebSocket error:", err);
190
+ this.emit("error", err instanceof Error ? err : new Error(String(err)));
191
+ });
192
+ this.ws.on("close", (code, reason) => {
193
+ log.debug(`Deepgram WebSocket closed: ${code} ${reason.toString()}`);
194
+ this._ready = false;
195
+ this.stopKeepAlive();
196
+ if (!this._closed) {
197
+ log.info("Deepgram connection lost, reconnecting in 1s...");
198
+ setTimeout(() => {
199
+ if (!this._closed) this.connect();
200
+ }, 1e3);
201
+ }
202
+ });
203
+ }
204
+ handleMessage(msg) {
205
+ const type = msg.type;
206
+ if (type === "Results") {
207
+ this.handleResults(msg);
208
+ } else if (type === "UtteranceEnd") {
209
+ this.flushUtterance();
210
+ } else if (type === "Metadata") {
211
+ log.debug("Deepgram session metadata received");
212
+ } else if (type === "SpeechStarted") {
213
+ log.debug("Speech started detected");
214
+ }
215
+ }
216
+ handleResults(msg) {
217
+ const channel = msg.channel;
218
+ const transcript = channel?.alternatives?.[0]?.transcript ?? "";
219
+ const confidence = channel?.alternatives?.[0]?.confidence;
220
+ const isFinal = msg.is_final ?? false;
221
+ const speechFinal = msg.speech_final ?? false;
222
+ if (!transcript) return;
223
+ if (!isFinal) {
224
+ this.lastInterimAt = performance.now();
225
+ const fullInterim = this.utteranceBuffer.length > 0 ? this.utteranceBuffer.join(" ") + " " + transcript : transcript;
226
+ this.emit("transcription", {
227
+ text: fullInterim,
228
+ isFinal: false,
229
+ confidence: confidence ?? void 0
230
+ });
231
+ return;
232
+ }
233
+ this.utteranceBuffer.push(transcript);
234
+ if (speechFinal) {
235
+ this.flushUtterance();
236
+ }
237
+ }
238
+ /** Emit the buffered utterance as a single final transcription result. */
239
+ flushUtterance() {
240
+ if (this.utteranceBuffer.length === 0) return;
241
+ const now = performance.now();
242
+ const fullText = this.utteranceBuffer.join(" ");
243
+ this.utteranceBuffer = [];
244
+ const sttDuration = this.lastInterimAt > 0 ? now - this.lastInterimAt : void 0;
245
+ if (sttDuration !== void 0) {
246
+ log.info(`stt_final: ${sttDuration.toFixed(0)}ms "${fullText.slice(0, 50)}"`);
247
+ }
248
+ this.lastInterimAt = 0;
249
+ this.emit("transcription", {
250
+ text: fullText,
251
+ isFinal: true,
252
+ sttDuration
253
+ });
254
+ }
255
+ startKeepAlive() {
256
+ this.stopKeepAlive();
257
+ this.keepAliveTimer = setInterval(() => {
258
+ if (this.ws?.readyState === import_ws.default.OPEN) {
259
+ this.ws.send(JSON.stringify({ type: "KeepAlive" }));
260
+ }
261
+ }, KEEPALIVE_INTERVAL_MS);
262
+ }
263
+ stopKeepAlive() {
264
+ if (this.keepAliveTimer) {
265
+ clearInterval(this.keepAliveTimer);
266
+ this.keepAliveTimer = null;
267
+ }
268
+ }
269
+ };
270
+ function buildWsUrl(options, language) {
271
+ const params = new URLSearchParams();
272
+ params.set("model", options.model ?? "nova-3");
273
+ params.set("language", language);
274
+ params.set("encoding", "linear16");
275
+ params.set("sample_rate", "16000");
276
+ params.set("channels", "1");
277
+ params.set("interim_results", String(options.interimResults ?? true));
278
+ params.set("punctuate", String(options.punctuate ?? true));
279
+ if (options.endpointing === false) {
280
+ params.set("endpointing", "false");
281
+ } else {
282
+ params.set("endpointing", String(options.endpointing ?? 300));
283
+ }
284
+ if (options.smartFormat) {
285
+ params.set("smart_format", "true");
286
+ }
287
+ if (options.utteranceEndMs !== void 0) {
288
+ params.set("utterance_end_ms", String(options.utteranceEndMs));
289
+ } else if (options.interimResults !== false) {
290
+ params.set("utterance_end_ms", "1000");
291
+ }
292
+ if (options.keywords?.length) {
293
+ for (const kw of options.keywords) {
294
+ params.append("keywords", kw);
295
+ }
296
+ }
297
+ return `${DEEPGRAM_WS_URL}?${params.toString()}`;
298
+ }
299
+
300
+ // src/providers/openrouter-llm.ts
301
+ var log2 = createLogger("OpenRouterLLM");
302
+ var OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions";
303
+ var OpenRouterLLM = class {
304
+ apiKey;
305
+ model;
306
+ maxTokens;
307
+ temperature;
308
+ provider;
309
+ constructor(options) {
310
+ if (!options.apiKey) {
311
+ throw new Error("OpenRouterLLM requires an apiKey");
312
+ }
313
+ this.apiKey = options.apiKey;
314
+ this.model = options.model;
315
+ this.maxTokens = options.maxTokens ?? 512;
316
+ this.temperature = options.temperature ?? 0.7;
317
+ if (options.providerRouting) {
318
+ this.provider = {
319
+ sort: options.providerRouting.sort,
320
+ order: options.providerRouting.order,
321
+ allow_fallbacks: options.providerRouting.allowFallbacks
322
+ };
323
+ }
324
+ }
325
+ /**
326
+ * Warm up the LLM by sending the system prompt and a short message.
327
+ * Primes the HTTP/TLS connection and model loading on the provider side.
328
+ */
329
+ async warmup(systemPrompt) {
330
+ log2.info("Warming up LLM connection...");
331
+ const start = performance.now();
332
+ const messages = [
333
+ { role: "system", content: systemPrompt },
334
+ { role: "user", content: "Hello" }
335
+ ];
336
+ try {
337
+ const gen = this.chat(messages);
338
+ for await (const chunk of gen) {
339
+ if (chunk.type === "done") break;
340
+ }
341
+ log2.info(`LLM warmup complete in ${(performance.now() - start).toFixed(0)}ms`);
342
+ } catch (err) {
343
+ log2.warn("LLM warmup failed (non-fatal):", err);
344
+ }
345
+ }
346
+ async *chat(messages, signal) {
347
+ const body = {
348
+ model: this.model,
349
+ messages,
350
+ max_tokens: this.maxTokens,
351
+ temperature: this.temperature,
352
+ stream: true
353
+ };
354
+ if (this.provider) {
355
+ body.provider = this.provider;
356
+ }
357
+ log2.debug(`LLM request: model=${this.model}, messages=${messages.length}`);
358
+ const response = await fetch(OPENROUTER_URL, {
359
+ method: "POST",
360
+ headers: {
361
+ "Content-Type": "application/json",
362
+ "Authorization": `Bearer ${this.apiKey}`
363
+ },
364
+ body: JSON.stringify(body),
365
+ signal
366
+ });
367
+ if (!response.ok) {
368
+ const errorText = await response.text();
369
+ throw new Error(`OpenRouter API error ${response.status}: ${errorText}`);
370
+ }
371
+ if (!response.body) {
372
+ throw new Error("OpenRouter response has no body");
373
+ }
374
+ const reader = response.body.getReader();
375
+ const decoder = new TextDecoder();
376
+ let buffer = "";
377
+ try {
378
+ while (true) {
379
+ if (signal?.aborted) break;
380
+ const { done, value } = await reader.read();
381
+ if (done) break;
382
+ buffer += decoder.decode(value, { stream: true });
383
+ const lines = buffer.split("\n");
384
+ buffer = lines.pop() ?? "";
385
+ for (const line of lines) {
386
+ const trimmed = line.trim();
387
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
388
+ const data = trimmed.slice(6);
389
+ if (data === "[DONE]") {
390
+ yield { type: "done" };
391
+ return;
392
+ }
393
+ try {
394
+ const parsed = JSON.parse(data);
395
+ const choice = parsed.choices?.[0];
396
+ if (!choice) continue;
397
+ const delta = choice.delta;
398
+ if (delta?.content) {
399
+ yield { type: "token", token: delta.content };
400
+ }
401
+ if (parsed.usage) {
402
+ yield {
403
+ type: "done",
404
+ usage: {
405
+ promptTokens: parsed.usage.prompt_tokens,
406
+ completionTokens: parsed.usage.completion_tokens
407
+ }
408
+ };
409
+ return;
410
+ }
411
+ } catch {
412
+ }
413
+ }
414
+ }
415
+ } finally {
416
+ reader.releaseLock();
417
+ }
418
+ yield { type: "done" };
419
+ }
420
+ };
421
+
422
+ // src/providers/cartesia-tts.ts
423
+ var import_ws2 = __toESM(require("ws"));
424
+ var log3 = createLogger("CartesiaTTS");
425
+ var CARTESIA_WS_BASE = "wss://api.cartesia.ai/tts/websocket";
426
+ var DEFAULT_API_VERSION = "2024-06-10";
427
+ var DEFAULT_MODEL = "sonic-3";
428
+ var DEFAULT_SAMPLE_RATE = 48e3;
429
+ var CartesiaTTS = class {
430
+ apiKey;
431
+ voiceId;
432
+ modelId;
433
+ sampleRate;
434
+ apiVersion;
435
+ language;
436
+ speed;
437
+ emotion;
438
+ ws = null;
439
+ _connected = false;
440
+ connectPromise = null;
441
+ /** Active contexts keyed by context_id */
442
+ contexts = /* @__PURE__ */ new Map();
443
+ contextCounter = 0;
444
+ constructor(options) {
445
+ if (!options.apiKey) {
446
+ throw new Error("CartesiaTTS requires an apiKey");
447
+ }
448
+ if (!options.voiceId) {
449
+ throw new Error("CartesiaTTS requires a voiceId");
450
+ }
451
+ this.apiKey = options.apiKey;
452
+ this.voiceId = options.voiceId;
453
+ this.modelId = options.modelId ?? DEFAULT_MODEL;
454
+ this.sampleRate = options.sampleRate ?? DEFAULT_SAMPLE_RATE;
455
+ this.apiVersion = options.apiVersion ?? DEFAULT_API_VERSION;
456
+ this.language = options.language;
457
+ this.speed = options.speed;
458
+ this.emotion = options.emotion;
459
+ }
460
+ /** Pre-connect the WebSocket so first synthesize() doesn't pay connection cost. */
461
+ async warmup() {
462
+ log3.info("Warming up TTS connection...");
463
+ const start = performance.now();
464
+ try {
465
+ await this.ensureConnection();
466
+ log3.info(`TTS warmup complete in ${(performance.now() - start).toFixed(0)}ms`);
467
+ } catch (err) {
468
+ log3.warn("TTS warmup failed (non-fatal):", err);
469
+ }
470
+ }
471
+ async *synthesize(text, signal) {
472
+ log3.debug(`Synthesizing: "${text.slice(0, 60)}"`);
473
+ await this.ensureConnection();
474
+ if (!this.ws || this.ws.readyState !== import_ws2.default.OPEN) {
475
+ throw new Error("Cartesia WebSocket not connected");
476
+ }
477
+ const contextId = `ctx-${++this.contextCounter}-${Date.now()}`;
478
+ const ctx = { chunks: [], done: false, error: null, wake: null };
479
+ this.contexts.set(contextId, ctx);
480
+ const request = {
481
+ model_id: this.modelId,
482
+ transcript: text,
483
+ voice: { mode: "id", id: this.voiceId },
484
+ output_format: {
485
+ container: "raw",
486
+ encoding: "pcm_s16le",
487
+ sample_rate: this.sampleRate
488
+ },
489
+ context_id: contextId,
490
+ continue: false
491
+ };
492
+ if (this.language) {
493
+ request.language = this.language;
494
+ }
495
+ if (this.speed !== void 0 || this.emotion !== void 0) {
496
+ const genConfig = {};
497
+ if (this.speed !== void 0) genConfig.speed = this.speed;
498
+ if (this.emotion !== void 0) genConfig.emotion = this.emotion;
499
+ request.generation_config = genConfig;
500
+ }
501
+ const onAbort = () => {
502
+ ctx.done = true;
503
+ ctx.wake?.();
504
+ if (this.ws?.readyState === import_ws2.default.OPEN) {
505
+ try {
506
+ this.ws.send(JSON.stringify({ context_id: contextId, cancel: true }));
507
+ } catch {
508
+ }
509
+ }
510
+ };
511
+ signal?.addEventListener("abort", onAbort, { once: true });
512
+ this.ws.send(JSON.stringify(request));
513
+ try {
514
+ while (true) {
515
+ if (signal?.aborted) break;
516
+ if (ctx.error) throw ctx.error;
517
+ if (ctx.chunks.length > 0) {
518
+ yield ctx.chunks.shift();
519
+ continue;
520
+ }
521
+ if (ctx.done) break;
522
+ await new Promise((resolve) => {
523
+ ctx.wake = resolve;
524
+ });
525
+ ctx.wake = null;
526
+ }
527
+ while (ctx.chunks.length > 0) {
528
+ yield ctx.chunks.shift();
529
+ }
530
+ } finally {
531
+ signal?.removeEventListener("abort", onAbort);
532
+ this.contexts.delete(contextId);
533
+ }
534
+ }
535
+ /** Ensure the persistent WebSocket is connected. */
536
+ ensureConnection() {
537
+ if (this._connected && this.ws?.readyState === import_ws2.default.OPEN) {
538
+ return Promise.resolve();
539
+ }
540
+ if (this.connectPromise) return this.connectPromise;
541
+ this.connectPromise = new Promise((resolve, reject) => {
542
+ const url = `${CARTESIA_WS_BASE}?api_key=${this.apiKey}&cartesia_version=${this.apiVersion}`;
543
+ log3.debug("Connecting to Cartesia...");
544
+ this.ws = new import_ws2.default(url);
545
+ this.ws.on("open", () => {
546
+ this._connected = true;
547
+ this.connectPromise = null;
548
+ log3.info("Cartesia WebSocket connected");
549
+ resolve();
550
+ });
551
+ this.ws.on("message", (data) => {
552
+ try {
553
+ const msg = JSON.parse(data.toString());
554
+ this.handleMessage(msg);
555
+ } catch (err) {
556
+ log3.error("Failed to parse Cartesia message:", err);
557
+ }
558
+ });
559
+ this.ws.on("error", (err) => {
560
+ const error = err instanceof Error ? err : new Error(String(err));
561
+ log3.error("Cartesia WebSocket error:", error);
562
+ for (const ctx of this.contexts.values()) {
563
+ ctx.error = error;
564
+ ctx.wake?.();
565
+ }
566
+ this._connected = false;
567
+ this.connectPromise = null;
568
+ reject(error);
569
+ });
570
+ this.ws.on("close", (code, reason) => {
571
+ log3.debug(`Cartesia WebSocket closed: ${code} ${reason.toString()}`);
572
+ this._connected = false;
573
+ this.connectPromise = null;
574
+ for (const ctx of this.contexts.values()) {
575
+ ctx.done = true;
576
+ ctx.wake?.();
577
+ }
578
+ });
579
+ });
580
+ return this.connectPromise;
581
+ }
582
+ handleMessage(msg) {
583
+ const contextId = msg.context_id;
584
+ if (!contextId) return;
585
+ const ctx = this.contexts.get(contextId);
586
+ if (!ctx) return;
587
+ const type = msg.type;
588
+ if (type === "chunk") {
589
+ const b64 = msg.data;
590
+ if (b64) {
591
+ const pcm = Buffer.from(b64, "base64");
592
+ ctx.chunks.push(pcm);
593
+ ctx.wake?.();
594
+ }
595
+ } else if (type === "done") {
596
+ log3.debug(`Cartesia synthesis done for ${contextId} (${ctx.chunks.length} chunks pending)`);
597
+ ctx.done = true;
598
+ ctx.wake?.();
599
+ } else if (type === "error") {
600
+ const errorMsg = msg.error ?? "Unknown Cartesia error";
601
+ log3.error(`Cartesia error for ${contextId}: ${errorMsg}`);
602
+ ctx.error = new Error(`Cartesia TTS error: ${errorMsg}`);
603
+ ctx.wake?.();
604
+ }
605
+ }
606
+ };
607
+ // Annotate the CommonJS export names for ESM import in node:
608
+ 0 && (module.exports = {
609
+ CartesiaTTS,
610
+ DeepgramSTT,
611
+ OpenRouterLLM
612
+ });
613
+ //# sourceMappingURL=index.js.map