@dtelecom/agents-js 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -621,6 +621,49 @@ var AUDIO_DRAIN_MS = 800;
621
621
  function sleep2(ms) {
622
622
  return new Promise((resolve) => setTimeout(resolve, ms));
623
623
  }
624
+ function prefetchTTS(tts, text, signal) {
625
+ const buffer = [];
626
+ let done = false;
627
+ let error = null;
628
+ let wake = null;
629
+ const notify = () => {
630
+ if (wake) {
631
+ const w = wake;
632
+ wake = null;
633
+ w();
634
+ }
635
+ };
636
+ void (async () => {
637
+ try {
638
+ const stream = tts.synthesize(text, signal);
639
+ for await (const chunk of stream) {
640
+ if (signal?.aborted) break;
641
+ buffer.push(chunk);
642
+ notify();
643
+ }
644
+ } catch (e) {
645
+ if (!(e instanceof Error && e.name === "AbortError")) error = e;
646
+ } finally {
647
+ done = true;
648
+ notify();
649
+ }
650
+ })();
651
+ return async function* () {
652
+ let index = 0;
653
+ while (true) {
654
+ if (signal?.aborted) return;
655
+ if (error) throw error;
656
+ if (index < buffer.length) {
657
+ yield buffer[index++];
658
+ continue;
659
+ }
660
+ if (done) return;
661
+ await new Promise((r) => {
662
+ wake = r;
663
+ });
664
+ }
665
+ };
666
+ }
624
667
  var Pipeline = class extends EventEmitter {
625
668
  stt;
626
669
  llm;
@@ -635,6 +678,7 @@ var Pipeline = class extends EventEmitter {
635
678
  nameVariants;
636
679
  beforeRespond;
637
680
  memory;
681
+ tools;
638
682
  /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
639
683
  cleanText(text) {
640
684
  return this.tts?.cleanText ? this.tts.cleanText(text) : text;
@@ -657,6 +701,7 @@ var Pipeline = class extends EventEmitter {
657
701
  this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
658
702
  this.beforeRespond = options.beforeRespond;
659
703
  this.memory = options.memory;
704
+ this.tools = options.tools;
660
705
  this.context = new ContextManager({
661
706
  instructions: options.instructions,
662
707
  maxContextTokens: options.maxContextTokens
@@ -673,28 +718,18 @@ var Pipeline = class extends EventEmitter {
673
718
  this.splitter.reset();
674
719
  this.setAgentState("idle");
675
720
  };
676
- this._warmupPromise = this.warmup(options.instructions);
721
+ this._ttsWarmupPromise = this.tts?.warmup ? this.tts.warmup().catch((err) => {
722
+ log7.warn("TTS warmup failed (non-fatal):", err);
723
+ }) : Promise.resolve();
724
+ this._llmWarmupPromise = this.llm.warmup ? this.llm.warmup(options.instructions).catch((err) => {
725
+ log7.warn("LLM warmup failed (non-fatal):", err);
726
+ }) : Promise.resolve();
727
+ this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise]).then(() => {
728
+ });
677
729
  }
678
- /** One-shot warmup — safe to call from constructor, resolves when both LLM and TTS are ready. */
679
730
  _warmupPromise;
680
- async warmup(instructions) {
681
- const tasks = [];
682
- if (this.llm.warmup) {
683
- tasks.push(
684
- this.llm.warmup(instructions).catch((err) => {
685
- log7.warn("LLM warmup failed:", err);
686
- })
687
- );
688
- }
689
- if (this.tts?.warmup) {
690
- tasks.push(
691
- this.tts.warmup().catch((err) => {
692
- log7.warn("TTS warmup failed:", err);
693
- })
694
- );
695
- }
696
- await Promise.all(tasks);
697
- }
731
+ _ttsWarmupPromise;
732
+ _llmWarmupPromise;
698
733
  get processing() {
699
734
  return this._processing;
700
735
  }
@@ -870,7 +905,7 @@ var Pipeline = class extends EventEmitter {
870
905
  segBuf.length = 0;
871
906
  pushSentence(combined);
872
907
  };
873
- const llmStream = this.llm.chat(messages, signal);
908
+ const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
874
909
  try {
875
910
  while (!signal.aborted) {
876
911
  const { value: chunk, done } = await llmStream.next();
@@ -899,6 +934,9 @@ var Pipeline = class extends EventEmitter {
899
934
  for (const sentence of sentences) {
900
935
  pushSentence(sentence);
901
936
  }
937
+ } else if (chunk.type === "tool_call" && chunk.toolCall) {
938
+ log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
939
+ this.emit("toolCall", chunk.toolCall);
902
940
  }
903
941
  }
904
942
  } finally {
@@ -921,29 +959,55 @@ var Pipeline = class extends EventEmitter {
921
959
  };
922
960
  const consumer = async () => {
923
961
  this.audioOutput.beginResponse();
962
+ const state = { prefetched: null };
924
963
  try {
925
964
  while (true) {
926
965
  if (signal.aborted) break;
927
- if (sentenceQueue.length > 0) {
928
- const sentence = sentenceQueue.shift();
966
+ let sentence;
967
+ let existingStream;
968
+ if (state.prefetched) {
969
+ sentence = state.prefetched.sentence;
970
+ existingStream = state.prefetched.streamFn();
971
+ state.prefetched = null;
972
+ } else if (sentenceQueue.length > 0) {
973
+ sentence = sentenceQueue.shift();
929
974
  if (!/\w/.test(sentence)) {
930
975
  log7.debug(`Skipping non-word sentence: "${sentence}"`);
931
976
  continue;
932
977
  }
978
+ existingStream = void 0;
979
+ } else if (producerDone) {
980
+ break;
981
+ } else {
982
+ await new Promise((resolve) => {
983
+ wakeConsumer = resolve;
984
+ });
985
+ wakeConsumer = null;
986
+ continue;
987
+ }
988
+ const tryPrefetch = () => {
989
+ if (state.prefetched || !this.tts) return;
990
+ if (sentenceQueue.length > 0) {
991
+ const next = sentenceQueue.shift();
992
+ if (/\w/.test(next)) {
993
+ state.prefetched = { sentence: next, streamFn: prefetchTTS(this.tts, next, signal) };
994
+ }
995
+ }
996
+ };
997
+ tryPrefetch();
998
+ try {
933
999
  await this.synthesizeAndPlay(sentence, signal, (t) => {
934
1000
  if (!tFirstAudioPlayed) {
935
1001
  tFirstAudioPlayed = t;
936
1002
  this.setAgentState("speaking");
937
1003
  }
938
1004
  this.emit("sentence", this.cleanText(sentence), sentence);
939
- });
940
- continue;
1005
+ tryPrefetch();
1006
+ }, existingStream);
1007
+ } catch (ttsErr) {
1008
+ if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
1009
+ log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
941
1010
  }
942
- if (producerDone) break;
943
- await new Promise((resolve) => {
944
- wakeConsumer = resolve;
945
- });
946
- wakeConsumer = null;
947
1011
  }
948
1012
  } finally {
949
1013
  if (!signal.aborted) {
@@ -996,7 +1060,7 @@ var Pipeline = class extends EventEmitter {
996
1060
  return;
997
1061
  }
998
1062
  this._processing = true;
999
- await this._warmupPromise;
1063
+ await this._ttsWarmupPromise;
1000
1064
  log7.info(`say(): "${text.slice(0, 60)}"`);
1001
1065
  try {
1002
1066
  const signal = this.bargeIn.startCycle();
@@ -1033,7 +1097,7 @@ var Pipeline = class extends EventEmitter {
1033
1097
  }
1034
1098
  }
1035
1099
  }
1036
- async synthesizeAndPlay(text, signal, onFirstAudio) {
1100
+ async synthesizeAndPlay(text, signal, onFirstAudio, existingStream) {
1037
1101
  if (!this.tts || signal.aborted) {
1038
1102
  log7.info(`[Agent says]: ${text}`);
1039
1103
  return;
@@ -1042,7 +1106,7 @@ var Pipeline = class extends EventEmitter {
1042
1106
  const ttsStart = performance.now();
1043
1107
  let firstChunk = true;
1044
1108
  let ttsChunkCount = 0;
1045
- const ttsStream = this.tts.synthesize(text, signal);
1109
+ const ttsStream = existingStream ?? this.tts.synthesize(text, signal);
1046
1110
  const measuredStream = async function* () {
1047
1111
  for await (const chunk of ttsStream) {
1048
1112
  ttsChunkCount++;
@@ -1150,12 +1214,14 @@ var VoiceAgent = class extends EventEmitter2 {
1150
1214
  agentName: this.config.agentName,
1151
1215
  nameVariants: this.config.nameVariants,
1152
1216
  memory: this.memory ?? void 0,
1153
- maxContextTokens: this.config.maxContextTokens
1217
+ maxContextTokens: this.config.maxContextTokens,
1218
+ tools: this.config.tools
1154
1219
  });
1155
1220
  this.pipeline.on("transcription", (result) => this.emit("transcription", result));
1156
1221
  this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
1157
1222
  this.pipeline.on("response", (text) => this.emit("response", text));
1158
1223
  this.pipeline.on("agentState", (state) => this.emit("agentState", state));
1224
+ this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
1159
1225
  this.pipeline.on("error", (error) => this.emit("error", error));
1160
1226
  for (const participant of this.connection.room.remoteParticipants.values()) {
1161
1227
  for (const [, pub] of participant.trackPublications) {