npm - @dtelecom/agents-js - Versions diffs - 0.2.1 → 0.3.0 - Mend

@dtelecom/agents-js 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/index.d.mts +6 -5
package/dist/index.d.ts +6 -5
package/dist/index.js +100 -34
package/dist/index.js.map +1 -1
package/dist/index.mjs +100 -34
package/dist/index.mjs.map +1 -1
package/dist/memory/index.d.mts +1 -1
package/dist/memory/index.d.ts +1 -1
package/dist/providers/index.d.mts +1 -1
package/dist/providers/index.d.ts +1 -1
package/dist/providers/index.js +42 -0
package/dist/providers/index.js.map +1 -1
package/dist/providers/index.mjs +42 -0
package/dist/providers/index.mjs.map +1 -1
package/dist/{types-BBKtiPvm.d.mts → types-Di_jxIgs.d.mts} +25 -5
package/dist/{types-BBKtiPvm.d.ts → types-Di_jxIgs.d.ts} +25 -5
package/package.json +1 -1

package/dist/index.d.mts CHANGED Viewed

@@ -1,8 +1,8 @@
 import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
 import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
 import { EventEmitter } from 'events';
-import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.mjs';
-export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.mjs';
+import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Di_jxIgs.mjs';
+export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-Di_jxIgs.mjs';
 declare class VoiceAgent extends EventEmitter {
     private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
     private readonly nameVariants;
     private readonly beforeRespond?;
     private readonly memory?;
+    private readonly tools?;
     /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
     private cleanText;
     /** Active STT streams, keyed by participant identity */
@@ -121,9 +122,9 @@ declare class Pipeline extends EventEmitter {
     /** Queued turn while current one is still processing */
     private pendingTurn;
     constructor(options: PipelineOptions);
-    /** One-shot warmup — safe to call from constructor, resolves when both LLM and TTS are ready. */
-    private _warmupPromise;
-    private warmup;
+    private readonly _warmupPromise;
+    private readonly _ttsWarmupPromise;
+    private readonly _llmWarmupPromise;
     get processing(): boolean;
     get running(): boolean;
     get agentState(): AgentState;

package/dist/index.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
 import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
 import { EventEmitter } from 'events';
-import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.js';
-export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.js';
+import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-Di_jxIgs.js';
+export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-Di_jxIgs.js';
 declare class VoiceAgent extends EventEmitter {
     private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
     private readonly nameVariants;
     private readonly beforeRespond?;
     private readonly memory?;
+    private readonly tools?;
     /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
     private cleanText;
     /** Active STT streams, keyed by participant identity */
@@ -121,9 +122,9 @@ declare class Pipeline extends EventEmitter {
     /** Queued turn while current one is still processing */
     private pendingTurn;
     constructor(options: PipelineOptions);
-    /** One-shot warmup — safe to call from constructor, resolves when both LLM and TTS are ready. */
-    private _warmupPromise;
-    private warmup;
+    private readonly _warmupPromise;
+    private readonly _ttsWarmupPromise;
+    private readonly _llmWarmupPromise;
     get processing(): boolean;
     get running(): boolean;
     get agentState(): AgentState;

package/dist/index.js CHANGED Viewed

@@ -1151,6 +1151,49 @@ var AUDIO_DRAIN_MS = 800;
 function sleep2(ms) {
   return new Promise((resolve) => setTimeout(resolve, ms));
 }
+function prefetchTTS(tts, text, signal) {
+  const buffer = [];
+  let done = false;
+  let error = null;
+  let wake = null;
+  const notify = () => {
+    if (wake) {
+      const w = wake;
+      wake = null;
+      w();
+    }
+  };
+  void (async () => {
+    try {
+      const stream = tts.synthesize(text, signal);
+      for await (const chunk of stream) {
+        if (signal?.aborted) break;
+        buffer.push(chunk);
+        notify();
+      }
+    } catch (e) {
+      if (!(e instanceof Error && e.name === "AbortError")) error = e;
+    } finally {
+      done = true;
+      notify();
+    }
+  })();
+  return async function* () {
+    let index = 0;
+    while (true) {
+      if (signal?.aborted) return;
+      if (error) throw error;
+      if (index < buffer.length) {
+        yield buffer[index++];
+        continue;
+      }
+      if (done) return;
+      await new Promise((r) => {
+        wake = r;
+      });
+    }
+  };
+}
 var Pipeline = class extends import_events.EventEmitter {
   stt;
   llm;
@@ -1165,6 +1208,7 @@ var Pipeline = class extends import_events.EventEmitter {
   nameVariants;
   beforeRespond;
   memory;
+  tools;
   /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
   cleanText(text) {
     return this.tts?.cleanText ? this.tts.cleanText(text) : text;
@@ -1187,6 +1231,7 @@ var Pipeline = class extends import_events.EventEmitter {
     this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
     this.beforeRespond = options.beforeRespond;
     this.memory = options.memory;
+    this.tools = options.tools;
     this.context = new ContextManager({
       instructions: options.instructions,
       maxContextTokens: options.maxContextTokens
@@ -1203,28 +1248,18 @@ var Pipeline = class extends import_events.EventEmitter {
       this.splitter.reset();
       this.setAgentState("idle");
     };
-    this._warmupPromise = this.warmup(options.instructions);
+    this._ttsWarmupPromise = this.tts?.warmup ? this.tts.warmup().catch((err) => {
+      log7.warn("TTS warmup failed (non-fatal):", err);
+    }) : Promise.resolve();
+    this._llmWarmupPromise = this.llm.warmup ? this.llm.warmup(options.instructions).catch((err) => {
+      log7.warn("LLM warmup failed (non-fatal):", err);
+    }) : Promise.resolve();
+    this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise]).then(() => {
+    });
   }
-  /** One-shot warmup — safe to call from constructor, resolves when both LLM and TTS are ready. */
   _warmupPromise;
-  async warmup(instructions) {
-    const tasks = [];
-    if (this.llm.warmup) {
-      tasks.push(
-        this.llm.warmup(instructions).catch((err) => {
-          log7.warn("LLM warmup failed:", err);
-        })
-      );
-    }
-    if (this.tts?.warmup) {
-      tasks.push(
-        this.tts.warmup().catch((err) => {
-          log7.warn("TTS warmup failed:", err);
-        })
-      );
-    }
-    await Promise.all(tasks);
-  }
+  _ttsWarmupPromise;
+  _llmWarmupPromise;
   get processing() {
     return this._processing;
   }
@@ -1400,7 +1435,7 @@ var Pipeline = class extends import_events.EventEmitter {
             segBuf.length = 0;
             pushSentence(combined);
           };
-          const llmStream = this.llm.chat(messages, signal);
+          const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
           try {
             while (!signal.aborted) {
               const { value: chunk, done } = await llmStream.next();
@@ -1429,6 +1464,9 @@ var Pipeline = class extends import_events.EventEmitter {
                 for (const sentence of sentences) {
                   pushSentence(sentence);
                 }
+              } else if (chunk.type === "tool_call" && chunk.toolCall) {
+                log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
+                this.emit("toolCall", chunk.toolCall);
               }
             }
           } finally {
@@ -1451,29 +1489,55 @@ var Pipeline = class extends import_events.EventEmitter {
       };
       const consumer = async () => {
         this.audioOutput.beginResponse();
+        const state = { prefetched: null };
         try {
           while (true) {
             if (signal.aborted) break;
-            if (sentenceQueue.length > 0) {
-              const sentence = sentenceQueue.shift();
+            let sentence;
+            let existingStream;
+            if (state.prefetched) {
+              sentence = state.prefetched.sentence;
+              existingStream = state.prefetched.streamFn();
+              state.prefetched = null;
+            } else if (sentenceQueue.length > 0) {
+              sentence = sentenceQueue.shift();
               if (!/\w/.test(sentence)) {
                 log7.debug(`Skipping non-word sentence: "${sentence}"`);
                 continue;
               }
+              existingStream = void 0;
+            } else if (producerDone) {
+              break;
+            } else {
+              await new Promise((resolve) => {
+                wakeConsumer = resolve;
+              });
+              wakeConsumer = null;
+              continue;
+            }
+            const tryPrefetch = () => {
+              if (state.prefetched || !this.tts) return;
+              if (sentenceQueue.length > 0) {
+                const next = sentenceQueue.shift();
+                if (/\w/.test(next)) {
+                  state.prefetched = { sentence: next, streamFn: prefetchTTS(this.tts, next, signal) };
+                }
+              }
+            };
+            tryPrefetch();
+            try {
               await this.synthesizeAndPlay(sentence, signal, (t) => {
                 if (!tFirstAudioPlayed) {
                   tFirstAudioPlayed = t;
                   this.setAgentState("speaking");
                 }
                 this.emit("sentence", this.cleanText(sentence), sentence);
-              });
-              continue;
+                tryPrefetch();
+              }, existingStream);
+            } catch (ttsErr) {
+              if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
+              log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
             }
-            if (producerDone) break;
-            await new Promise((resolve) => {
-              wakeConsumer = resolve;
-            });
-            wakeConsumer = null;
           }
         } finally {
           if (!signal.aborted) {
@@ -1526,7 +1590,7 @@ var Pipeline = class extends import_events.EventEmitter {
       return;
     }
     this._processing = true;
-    await this._warmupPromise;
+    await this._ttsWarmupPromise;
     log7.info(`say(): "${text.slice(0, 60)}"`);
     try {
       const signal = this.bargeIn.startCycle();
@@ -1563,7 +1627,7 @@ var Pipeline = class extends import_events.EventEmitter {
       }
     }
   }
-  async synthesizeAndPlay(text, signal, onFirstAudio) {
+  async synthesizeAndPlay(text, signal, onFirstAudio, existingStream) {
     if (!this.tts || signal.aborted) {
       log7.info(`[Agent says]: ${text}`);
       return;
@@ -1572,7 +1636,7 @@ var Pipeline = class extends import_events.EventEmitter {
       const ttsStart = performance.now();
       let firstChunk = true;
       let ttsChunkCount = 0;
-      const ttsStream = this.tts.synthesize(text, signal);
+      const ttsStream = existingStream ?? this.tts.synthesize(text, signal);
       const measuredStream = async function* () {
         for await (const chunk of ttsStream) {
           ttsChunkCount++;
@@ -1681,12 +1745,14 @@ var VoiceAgent = class extends import_events2.EventEmitter {
       agentName: this.config.agentName,
       nameVariants: this.config.nameVariants,
       memory: this.memory ?? void 0,
-      maxContextTokens: this.config.maxContextTokens
+      maxContextTokens: this.config.maxContextTokens,
+      tools: this.config.tools
     });
     this.pipeline.on("transcription", (result) => this.emit("transcription", result));
     this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
     this.pipeline.on("response", (text) => this.emit("response", text));
     this.pipeline.on("agentState", (state) => this.emit("agentState", state));
+    this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
     this.pipeline.on("error", (error) => this.emit("error", error));
     for (const participant of this.connection.room.remoteParticipants.values()) {
       for (const [, pub] of participant.trackPublications) {