@dtelecom/agents-js 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -296,8 +296,63 @@ var OpenRouterLLM = class {
296
296
  let buffer = "";
297
297
  const structured = !!this.responseFormat;
298
298
  let jsonBuffer = "";
299
- let lastSegmentIndex = 0;
300
- const segmentRe = /\{"lang"\s*:\s*"(\w+)"\s*,\s*"text"\s*:\s*"((?:[^"\\]|\\.)*)"\s*\}/g;
299
+ let segmentsYielded = false;
300
+ let lastUsage;
301
+ let inSegmentsArray = false;
302
+ let objectStart = -1;
303
+ let braceDepth = 0;
304
+ let scanIndex = 0;
305
+ let inString = false;
306
+ let escaped = false;
307
+ function extractSegments(buf) {
308
+ const results = [];
309
+ for (let i = scanIndex; i < buf.length; i++) {
310
+ const ch = buf[i];
311
+ if (escaped) {
312
+ escaped = false;
313
+ continue;
314
+ }
315
+ if (ch === "\\" && inString) {
316
+ escaped = true;
317
+ continue;
318
+ }
319
+ if (ch === '"') {
320
+ inString = !inString;
321
+ continue;
322
+ }
323
+ if (inString) continue;
324
+ if (!inSegmentsArray) {
325
+ if (ch === "[") {
326
+ const before = buf.slice(0, i).trimEnd();
327
+ if (before.endsWith(":") && buf.slice(0, i).includes('"segments"')) {
328
+ inSegmentsArray = true;
329
+ }
330
+ }
331
+ continue;
332
+ }
333
+ if (ch === "{") {
334
+ if (braceDepth === 0) objectStart = i;
335
+ braceDepth++;
336
+ } else if (ch === "}") {
337
+ braceDepth--;
338
+ if (braceDepth === 0 && objectStart >= 0) {
339
+ const objStr = buf.slice(objectStart, i + 1);
340
+ try {
341
+ const seg = JSON.parse(objStr);
342
+ if (seg.lang && seg.text) {
343
+ results.push({ lang: seg.lang, text: seg.text });
344
+ }
345
+ } catch {
346
+ }
347
+ objectStart = -1;
348
+ }
349
+ } else if (ch === "]" && braceDepth === 0) {
350
+ inSegmentsArray = false;
351
+ }
352
+ }
353
+ scanIndex = buf.length;
354
+ return results;
355
+ }
301
356
  try {
302
357
  while (true) {
303
358
  if (signal?.aborted) break;
@@ -310,10 +365,7 @@ var OpenRouterLLM = class {
310
365
  const trimmed = line.trim();
311
366
  if (!trimmed || !trimmed.startsWith("data: ")) continue;
312
367
  const data = trimmed.slice(6);
313
- if (data === "[DONE]") {
314
- yield { type: "done" };
315
- return;
316
- }
368
+ if (data === "[DONE]") break;
317
369
  try {
318
370
  const parsed = JSON.parse(data);
319
371
  const choice = parsed.choices?.[0];
@@ -322,31 +374,20 @@ var OpenRouterLLM = class {
322
374
  if (delta?.content) {
323
375
  if (structured) {
324
376
  jsonBuffer += delta.content;
325
- segmentRe.lastIndex = lastSegmentIndex;
326
- let match;
327
- while ((match = segmentRe.exec(jsonBuffer)) !== null) {
328
- const lang = match[1];
329
- const text = match[2].replace(/\\(.)/g, (_, c) => {
330
- if (c === "n") return "\n";
331
- if (c === "t") return " ";
332
- return c;
333
- });
334
- lastSegmentIndex = segmentRe.lastIndex;
335
- yield { type: "segment", segment: { lang, text } };
377
+ const segments = extractSegments(jsonBuffer);
378
+ for (const seg of segments) {
379
+ yield { type: "segment", segment: seg };
380
+ segmentsYielded = true;
336
381
  }
337
382
  } else {
338
383
  yield { type: "token", token: delta.content };
339
384
  }
340
385
  }
341
386
  if (parsed.usage) {
342
- yield {
343
- type: "done",
344
- usage: {
345
- promptTokens: parsed.usage.prompt_tokens,
346
- completionTokens: parsed.usage.completion_tokens
347
- }
387
+ lastUsage = {
388
+ promptTokens: parsed.usage.prompt_tokens,
389
+ completionTokens: parsed.usage.completion_tokens
348
390
  };
349
- return;
350
391
  }
351
392
  } catch {
352
393
  }
@@ -355,10 +396,10 @@ var OpenRouterLLM = class {
355
396
  } finally {
356
397
  reader.releaseLock();
357
398
  }
358
- if (structured && lastSegmentIndex === 0 && jsonBuffer.length > 0) {
359
- log2.warn(`Structured response yielded no segments. Raw buffer (first 200 chars): "${jsonBuffer.slice(0, 200)}"`);
399
+ if (structured && !segmentsYielded && jsonBuffer.length > 0) {
400
+ log2.warn(`LLM returned no segments. Raw JSON: "${jsonBuffer.slice(0, 300)}"`);
360
401
  }
361
- yield { type: "done" };
402
+ yield { type: "done", ...lastUsage ? { usage: lastUsage } : {} };
362
403
  }
363
404
  };
364
405
 
@@ -681,13 +722,21 @@ var DeepgramTTS = class {
681
722
  }
682
723
  async *synthesize(text, signal) {
683
724
  if (signal?.aborted) return;
684
- const segments = this.multiLanguage ? parseLangSegments(text, this.defaultLang) : [{ lang: this.defaultLang, text }];
725
+ const rawSegments = this.multiLanguage ? parseLangSegments(text, this.defaultLang) : [{ lang: this.defaultLang, text }];
726
+ const segments = [];
727
+ for (const seg of rawSegments) {
728
+ if (!seg.text.trim()) continue;
729
+ if (!/\p{L}/u.test(seg.text) && segments.length > 0) {
730
+ segments[segments.length - 1].text += seg.text;
731
+ } else {
732
+ segments.push(seg);
733
+ }
734
+ }
685
735
  const silenceBytes = Math.round(this.sampleRate * 0.2) * 2;
686
736
  const silence = Buffer.alloc(silenceBytes);
687
737
  let prevLang = null;
688
738
  for (const segment of segments) {
689
739
  if (signal?.aborted) break;
690
- if (!segment.text.trim()) continue;
691
740
  const lang = this.models[segment.lang] ? segment.lang : this.defaultLang;
692
741
  if (prevLang !== null && lang !== prevLang) {
693
742
  yield silence;
@@ -933,7 +982,8 @@ var DtelecomSTTStream = class extends BaseSTTStream {
933
982
  this.handleVadEvent(msg);
934
983
  } else if (type === "pong") {
935
984
  } else if (type === "error") {
936
- const errorMsg = msg.message || msg.error || "Unknown STT error";
985
+ const errData = msg.error;
986
+ const errorMsg = msg.message || (typeof errData === "string" ? errData : JSON.stringify(errData)) || "Unknown STT error";
937
987
  log5.error(`dTelecom STT error: ${errorMsg}`);
938
988
  this.emit("error", new Error(errorMsg));
939
989
  }
@@ -995,18 +1045,7 @@ var DtelecomSTTStream = class extends BaseSTTStream {
995
1045
 
996
1046
  // src/providers/dtelecom-tts.ts
997
1047
  import WebSocket5 from "ws";
998
- import { resample } from "wave-resampler";
999
1048
  var log6 = createLogger("DtelecomTTS");
1000
- function resample24to48(input) {
1001
- const samples = new Int16Array(input.buffer, input.byteOffset, input.length / 2);
1002
- if (samples.length === 0) return Buffer.alloc(0);
1003
- const resampled = resample(samples, 24e3, 48e3, { method: "sinc", LPF: false });
1004
- const output = new Int16Array(resampled.length);
1005
- for (let i = 0; i < resampled.length; i++) {
1006
- output[i] = Math.round(resampled[i]);
1007
- }
1008
- return Buffer.from(output.buffer, output.byteOffset, output.byteLength);
1009
- }
1010
1049
  var DtelecomTTS = class {
1011
1050
  serverUrl;
1012
1051
  voices;
@@ -1058,13 +1097,21 @@ var DtelecomTTS = class {
1058
1097
  }
1059
1098
  async *synthesize(text, signal) {
1060
1099
  if (signal?.aborted) return;
1061
- const segments = parseLangSegments(text, this.defaultLang);
1100
+ const rawSegments = parseLangSegments(text, this.defaultLang);
1101
+ const segments = [];
1102
+ for (const seg of rawSegments) {
1103
+ if (!seg.text.trim()) continue;
1104
+ if (!/\p{L}/u.test(seg.text) && segments.length > 0) {
1105
+ segments[segments.length - 1].text += seg.text;
1106
+ } else {
1107
+ segments.push(seg);
1108
+ }
1109
+ }
1062
1110
  const silenceBytes = Math.round(48e3 * 0.2) * 2;
1063
1111
  const silence = Buffer.alloc(silenceBytes);
1064
1112
  let prevLang = null;
1065
1113
  for (const segment of segments) {
1066
1114
  if (signal?.aborted) break;
1067
- if (!segment.text.trim()) continue;
1068
1115
  const lang = this.voices[segment.lang] ? segment.lang : this.defaultLang;
1069
1116
  if (prevLang !== null && lang !== prevLang) {
1070
1117
  yield silence;
@@ -1074,7 +1121,6 @@ var DtelecomTTS = class {
1074
1121
  }
1075
1122
  }
1076
1123
  async *synthesizeSegment(lang, text, signal) {
1077
- log6.debug(`Synthesizing [${lang}]: "${text.slice(0, 60)}"`);
1078
1124
  await this.ensureConnection();
1079
1125
  const ws = this.ws;
1080
1126
  if (!ws || ws.readyState !== WebSocket5.OPEN) {
@@ -1100,6 +1146,7 @@ var DtelecomTTS = class {
1100
1146
  msg.lang_code = voiceConfig.langCode;
1101
1147
  msg.speed = this.speed;
1102
1148
  }
1149
+ log6.info(`TTS send [${lang}]: voice=${voiceConfig?.voice ?? "default"} lang_code=${voiceConfig?.langCode ?? "default"} "${text.slice(0, 60)}"`);
1103
1150
  ws.send(JSON.stringify(msg));
1104
1151
  try {
1105
1152
  while (true) {
@@ -1153,8 +1200,7 @@ var DtelecomTTS = class {
1153
1200
  if (!state) return;
1154
1201
  if (isBinary) {
1155
1202
  const buf = Buffer.isBuffer(data) ? data : Buffer.from(data);
1156
- const resampled = resample24to48(buf);
1157
- state.chunks.push(resampled);
1203
+ state.chunks.push(buf);
1158
1204
  state.wake?.();
1159
1205
  } else {
1160
1206
  try {