@dtelecom/agents-js 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -387,8 +387,63 @@ var OpenRouterLLM = class {
387
387
  let buffer = "";
388
388
  const structured = !!this.responseFormat;
389
389
  let jsonBuffer = "";
390
- let lastSegmentIndex = 0;
391
- const segmentRe = /\{"lang"\s*:\s*"(\w+)"\s*,\s*"text"\s*:\s*"((?:[^"\\]|\\.)*)"\s*\}/g;
390
+ let segmentsYielded = false;
391
+ let lastUsage;
392
+ let inSegmentsArray = false;
393
+ let objectStart = -1;
394
+ let braceDepth = 0;
395
+ let scanIndex = 0;
396
+ let inString = false;
397
+ let escaped = false;
398
+ function extractSegments(buf) {
399
+ const results = [];
400
+ for (let i = scanIndex; i < buf.length; i++) {
401
+ const ch = buf[i];
402
+ if (escaped) {
403
+ escaped = false;
404
+ continue;
405
+ }
406
+ if (ch === "\\" && inString) {
407
+ escaped = true;
408
+ continue;
409
+ }
410
+ if (ch === '"') {
411
+ inString = !inString;
412
+ continue;
413
+ }
414
+ if (inString) continue;
415
+ if (!inSegmentsArray) {
416
+ if (ch === "[") {
417
+ const before = buf.slice(0, i).trimEnd();
418
+ if (before.endsWith(":") && buf.slice(0, i).includes('"segments"')) {
419
+ inSegmentsArray = true;
420
+ }
421
+ }
422
+ continue;
423
+ }
424
+ if (ch === "{") {
425
+ if (braceDepth === 0) objectStart = i;
426
+ braceDepth++;
427
+ } else if (ch === "}") {
428
+ braceDepth--;
429
+ if (braceDepth === 0 && objectStart >= 0) {
430
+ const objStr = buf.slice(objectStart, i + 1);
431
+ try {
432
+ const seg = JSON.parse(objStr);
433
+ if (seg.lang && seg.text) {
434
+ results.push({ lang: seg.lang, text: seg.text });
435
+ }
436
+ } catch {
437
+ }
438
+ objectStart = -1;
439
+ }
440
+ } else if (ch === "]" && braceDepth === 0) {
441
+ inSegmentsArray = false;
442
+ }
443
+ }
444
+ scanIndex = buf.length;
445
+ return results;
446
+ }
392
447
  try {
393
448
  while (true) {
394
449
  if (signal?.aborted) break;
@@ -401,10 +456,7 @@ var OpenRouterLLM = class {
401
456
  const trimmed = line.trim();
402
457
  if (!trimmed || !trimmed.startsWith("data: ")) continue;
403
458
  const data = trimmed.slice(6);
404
- if (data === "[DONE]") {
405
- yield { type: "done" };
406
- return;
407
- }
459
+ if (data === "[DONE]") break;
408
460
  try {
409
461
  const parsed = JSON.parse(data);
410
462
  const choice = parsed.choices?.[0];
@@ -413,31 +465,20 @@ var OpenRouterLLM = class {
413
465
  if (delta?.content) {
414
466
  if (structured) {
415
467
  jsonBuffer += delta.content;
416
- segmentRe.lastIndex = lastSegmentIndex;
417
- let match;
418
- while ((match = segmentRe.exec(jsonBuffer)) !== null) {
419
- const lang = match[1];
420
- const text = match[2].replace(/\\(.)/g, (_, c) => {
421
- if (c === "n") return "\n";
422
- if (c === "t") return " ";
423
- return c;
424
- });
425
- lastSegmentIndex = segmentRe.lastIndex;
426
- yield { type: "segment", segment: { lang, text } };
468
+ const segments = extractSegments(jsonBuffer);
469
+ for (const seg of segments) {
470
+ yield { type: "segment", segment: seg };
471
+ segmentsYielded = true;
427
472
  }
428
473
  } else {
429
474
  yield { type: "token", token: delta.content };
430
475
  }
431
476
  }
432
477
  if (parsed.usage) {
433
- yield {
434
- type: "done",
435
- usage: {
436
- promptTokens: parsed.usage.prompt_tokens,
437
- completionTokens: parsed.usage.completion_tokens
438
- }
478
+ lastUsage = {
479
+ promptTokens: parsed.usage.prompt_tokens,
480
+ completionTokens: parsed.usage.completion_tokens
439
481
  };
440
- return;
441
482
  }
442
483
  } catch {
443
484
  }
@@ -446,10 +487,10 @@ var OpenRouterLLM = class {
446
487
  } finally {
447
488
  reader.releaseLock();
448
489
  }
449
- if (structured && lastSegmentIndex === 0 && jsonBuffer.length > 0) {
450
- log2.warn(`Structured response yielded no segments. Raw buffer (first 200 chars): "${jsonBuffer.slice(0, 200)}"`);
490
+ if (structured && !segmentsYielded && jsonBuffer.length > 0) {
491
+ log2.warn(`LLM returned no segments. Raw JSON: "${jsonBuffer.slice(0, 300)}"`);
451
492
  }
452
- yield { type: "done" };
493
+ yield { type: "done", ...lastUsage ? { usage: lastUsage } : {} };
453
494
  }
454
495
  };
455
496
 
@@ -772,13 +813,21 @@ var DeepgramTTS = class {
772
813
  }
773
814
  async *synthesize(text, signal) {
774
815
  if (signal?.aborted) return;
775
- const segments = this.multiLanguage ? parseLangSegments(text, this.defaultLang) : [{ lang: this.defaultLang, text }];
816
+ const rawSegments = this.multiLanguage ? parseLangSegments(text, this.defaultLang) : [{ lang: this.defaultLang, text }];
817
+ const segments = [];
818
+ for (const seg of rawSegments) {
819
+ if (!seg.text.trim()) continue;
820
+ if (!/\p{L}/u.test(seg.text) && segments.length > 0) {
821
+ segments[segments.length - 1].text += seg.text;
822
+ } else {
823
+ segments.push(seg);
824
+ }
825
+ }
776
826
  const silenceBytes = Math.round(this.sampleRate * 0.2) * 2;
777
827
  const silence = Buffer.alloc(silenceBytes);
778
828
  let prevLang = null;
779
829
  for (const segment of segments) {
780
830
  if (signal?.aborted) break;
781
- if (!segment.text.trim()) continue;
782
831
  const lang = this.models[segment.lang] ? segment.lang : this.defaultLang;
783
832
  if (prevLang !== null && lang !== prevLang) {
784
833
  yield silence;
@@ -1024,7 +1073,8 @@ var DtelecomSTTStream = class extends BaseSTTStream {
1024
1073
  this.handleVadEvent(msg);
1025
1074
  } else if (type === "pong") {
1026
1075
  } else if (type === "error") {
1027
- const errorMsg = msg.message || msg.error || "Unknown STT error";
1076
+ const errData = msg.error;
1077
+ const errorMsg = msg.message || (typeof errData === "string" ? errData : JSON.stringify(errData)) || "Unknown STT error";
1028
1078
  log5.error(`dTelecom STT error: ${errorMsg}`);
1029
1079
  this.emit("error", new Error(errorMsg));
1030
1080
  }
@@ -1086,18 +1136,7 @@ var DtelecomSTTStream = class extends BaseSTTStream {
1086
1136
 
1087
1137
  // src/providers/dtelecom-tts.ts
1088
1138
  var import_ws5 = __toESM(require("ws"));
1089
- var import_wave_resampler = require("wave-resampler");
1090
1139
  var log6 = createLogger("DtelecomTTS");
1091
- function resample24to48(input) {
1092
- const samples = new Int16Array(input.buffer, input.byteOffset, input.length / 2);
1093
- if (samples.length === 0) return Buffer.alloc(0);
1094
- const resampled = (0, import_wave_resampler.resample)(samples, 24e3, 48e3, { method: "sinc", LPF: false });
1095
- const output = new Int16Array(resampled.length);
1096
- for (let i = 0; i < resampled.length; i++) {
1097
- output[i] = Math.round(resampled[i]);
1098
- }
1099
- return Buffer.from(output.buffer, output.byteOffset, output.byteLength);
1100
- }
1101
1140
  var DtelecomTTS = class {
1102
1141
  serverUrl;
1103
1142
  voices;
@@ -1149,13 +1188,21 @@ var DtelecomTTS = class {
1149
1188
  }
1150
1189
  async *synthesize(text, signal) {
1151
1190
  if (signal?.aborted) return;
1152
- const segments = parseLangSegments(text, this.defaultLang);
1191
+ const rawSegments = parseLangSegments(text, this.defaultLang);
1192
+ const segments = [];
1193
+ for (const seg of rawSegments) {
1194
+ if (!seg.text.trim()) continue;
1195
+ if (!/\p{L}/u.test(seg.text) && segments.length > 0) {
1196
+ segments[segments.length - 1].text += seg.text;
1197
+ } else {
1198
+ segments.push(seg);
1199
+ }
1200
+ }
1153
1201
  const silenceBytes = Math.round(48e3 * 0.2) * 2;
1154
1202
  const silence = Buffer.alloc(silenceBytes);
1155
1203
  let prevLang = null;
1156
1204
  for (const segment of segments) {
1157
1205
  if (signal?.aborted) break;
1158
- if (!segment.text.trim()) continue;
1159
1206
  const lang = this.voices[segment.lang] ? segment.lang : this.defaultLang;
1160
1207
  if (prevLang !== null && lang !== prevLang) {
1161
1208
  yield silence;
@@ -1165,7 +1212,6 @@ var DtelecomTTS = class {
1165
1212
  }
1166
1213
  }
1167
1214
  async *synthesizeSegment(lang, text, signal) {
1168
- log6.debug(`Synthesizing [${lang}]: "${text.slice(0, 60)}"`);
1169
1215
  await this.ensureConnection();
1170
1216
  const ws = this.ws;
1171
1217
  if (!ws || ws.readyState !== import_ws5.default.OPEN) {
@@ -1191,6 +1237,7 @@ var DtelecomTTS = class {
1191
1237
  msg.lang_code = voiceConfig.langCode;
1192
1238
  msg.speed = this.speed;
1193
1239
  }
1240
+ log6.info(`TTS send [${lang}]: voice=${voiceConfig?.voice ?? "default"} lang_code=${voiceConfig?.langCode ?? "default"} "${text.slice(0, 60)}"`);
1194
1241
  ws.send(JSON.stringify(msg));
1195
1242
  try {
1196
1243
  while (true) {
@@ -1244,8 +1291,7 @@ var DtelecomTTS = class {
1244
1291
  if (!state) return;
1245
1292
  if (isBinary) {
1246
1293
  const buf = Buffer.isBuffer(data) ? data : Buffer.from(data);
1247
- const resampled = resample24to48(buf);
1248
- state.chunks.push(resampled);
1294
+ state.chunks.push(buf);
1249
1295
  state.wake?.();
1250
1296
  } else {
1251
1297
  try {