npm - @dtelecom/agents-js - Versions diffs - 0.2.2 → 0.3.1 - Mend

@dtelecom/agents-js 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/index.d.mts +4 -2
package/dist/index.d.ts +4 -2
package/dist/index.js +44 -15
package/dist/index.js.map +1 -1
package/dist/index.mjs +44 -15
package/dist/index.mjs.map +1 -1
package/dist/memory/index.d.mts +1 -1
package/dist/memory/index.d.ts +1 -1
package/dist/providers/index.d.mts +1 -1
package/dist/providers/index.d.ts +1 -1
package/dist/providers/index.js +42 -0
package/dist/providers/index.js.map +1 -1
package/dist/providers/index.mjs +42 -0
package/dist/providers/index.mjs.map +1 -1
package/dist/{types-BBKtiPvm.d.mts → types-BJylZd8Q.d.mts} +28 -5
package/dist/{types-BBKtiPvm.d.ts → types-BJylZd8Q.d.ts} +28 -5
package/package.json +1 -1

package/dist/index.d.mts CHANGED Viewed

@@ -1,8 +1,8 @@
 import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
 import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
 import { EventEmitter } from 'events';
-import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.mjs';
-export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.mjs';
+import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BJylZd8Q.mjs';
+export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-BJylZd8Q.mjs';
 declare class VoiceAgent extends EventEmitter {
     private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
     private readonly nameVariants;
     private readonly beforeRespond?;
     private readonly memory?;
+    private readonly tools?;
     /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
     private cleanText;
     /** Active STT streams, keyed by participant identity */
@@ -124,6 +125,7 @@ declare class Pipeline extends EventEmitter {
     private readonly _warmupPromise;
     private readonly _ttsWarmupPromise;
     private readonly _llmWarmupPromise;
+    private readonly _audioReadyPromise;
     get processing(): boolean;
     get running(): boolean;
     get agentState(): AgentState;

package/dist/index.d.ts CHANGED Viewed

@@ -1,8 +1,8 @@
 import * as _dtelecom_server_sdk_node from '@dtelecom/server-sdk-node';
 import { Room, AudioSource, RemoteAudioTrack, AudioFrame } from '@dtelecom/server-sdk-node';
 import { EventEmitter } from 'events';
-import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BBKtiPvm.js';
-export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin } from './types-BBKtiPvm.js';
+import { A as AgentConfig, a as AgentStartOptions, M as Message, L as LLMPlugin, P as PipelineOptions, b as AgentState, S as STTStream, T as TranscriptionResult } from './types-BJylZd8Q.js';
+export { c as AgentEvents, d as AudioOutput, D as DataMessageHandler, e as LLMChatOptions, f as LLMChunk, g as MemoryConfig, h as PipelineEvents, R as RespondMode, i as STTPlugin, j as STTStreamOptions, k as TTSPlugin, l as ToolCallResult, m as ToolDefinition } from './types-BJylZd8Q.js';
 declare class VoiceAgent extends EventEmitter {
     private readonly config;
@@ -111,6 +111,7 @@ declare class Pipeline extends EventEmitter {
     private readonly nameVariants;
     private readonly beforeRespond?;
     private readonly memory?;
+    private readonly tools?;
     /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
     private cleanText;
     /** Active STT streams, keyed by participant identity */
@@ -124,6 +125,7 @@ declare class Pipeline extends EventEmitter {
     private readonly _warmupPromise;
     private readonly _ttsWarmupPromise;
     private readonly _llmWarmupPromise;
+    private readonly _audioReadyPromise;
     get processing(): boolean;
     get running(): boolean;
     get agentState(): AgentState;

package/dist/index.js CHANGED Viewed

@@ -682,11 +682,21 @@ var AudioOutput = class {
   _responding = false;
   _stopped = false;
   silenceInterval = null;
+  /** Resolves when the RTP transport is ready and initial silence has been sent. */
+  whenReady;
+  _resolveReady;
   /** When set, raw PCM from TTS is saved to this directory as WAV files for debugging. */
   dumpDir = null;
   dumpCounter = 0;
   constructor(source) {
     this.source = source;
+    if (source.ready) {
+      this.whenReady = Promise.resolve();
+    } else {
+      this.whenReady = new Promise((resolve) => {
+        this._resolveReady = resolve;
+      });
+    }
   }
   get playing() {
     return this._playing;
@@ -714,8 +724,11 @@ var AudioOutput = class {
   startSilence() {
     if (this.silenceInterval) return;
     const startKeepalive = () => {
-      log3.debug("Transport ready \u2014 sending initial silence + starting 3s keepalive");
-      this.sendSilenceFrame();
+      log3.debug("Transport ready \u2014 sending initial silence burst + starting 3s keepalive");
+      for (let i = 0; i < 15; i++) {
+        this.sendSilenceFrame();
+      }
+      this._resolveReady?.();
       this.silenceInterval = setInterval(() => {
         if (!this._playing && !this._responding && !this._stopped) {
           this.sendSilenceFrame();
@@ -1208,6 +1221,7 @@ var Pipeline = class extends import_events.EventEmitter {
   nameVariants;
   beforeRespond;
   memory;
+  tools;
   /** Strip provider-specific markup (e.g. SSML lang tags) for display. */
   cleanText(text) {
     return this.tts?.cleanText ? this.tts.cleanText(text) : text;
@@ -1230,6 +1244,7 @@ var Pipeline = class extends import_events.EventEmitter {
     this.nameVariants = (options.nameVariants ?? []).map((n) => n.toLowerCase());
     this.beforeRespond = options.beforeRespond;
     this.memory = options.memory;
+    this.tools = options.tools;
     this.context = new ContextManager({
       instructions: options.instructions,
       maxContextTokens: options.maxContextTokens
@@ -1252,12 +1267,14 @@ var Pipeline = class extends import_events.EventEmitter {
     this._llmWarmupPromise = this.llm.warmup ? this.llm.warmup(options.instructions).catch((err) => {
       log7.warn("LLM warmup failed (non-fatal):", err);
     }) : Promise.resolve();
-    this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise]).then(() => {
+    this._audioReadyPromise = this.audioOutput.whenReady;
+    this._warmupPromise = Promise.all([this._ttsWarmupPromise, this._llmWarmupPromise, this._audioReadyPromise]).then(() => {
     });
   }
   _warmupPromise;
   _ttsWarmupPromise;
   _llmWarmupPromise;
+  _audioReadyPromise;
   get processing() {
     return this._processing;
   }
@@ -1415,6 +1432,7 @@ var Pipeline = class extends import_events.EventEmitter {
         wake();
       };
       const MAX_LLM_RETRIES = 2;
+      let toolCallEmitted = false;
       const producer = async () => {
         const defaultLang = this.tts?.defaultLanguage;
         for (let attempt = 0; attempt <= MAX_LLM_RETRIES; attempt++) {
@@ -1433,7 +1451,7 @@ var Pipeline = class extends import_events.EventEmitter {
             segBuf.length = 0;
             pushSentence(combined);
           };
-          const llmStream = this.llm.chat(messages, signal);
+          const llmStream = this.llm.chat(messages, signal, { tools: this.tools });
           try {
             while (!signal.aborted) {
               const { value: chunk, done } = await llmStream.next();
@@ -1462,6 +1480,10 @@ var Pipeline = class extends import_events.EventEmitter {
                 for (const sentence of sentences) {
                   pushSentence(sentence);
                 }
+              } else if (chunk.type === "tool_call" && chunk.toolCall) {
+                log7.info(`Tool call: ${chunk.toolCall.name}(${chunk.toolCall.arguments})`);
+                toolCallEmitted = true;
+                this.emit("toolCall", chunk.toolCall);
               }
             }
           } finally {
@@ -1473,7 +1495,7 @@ var Pipeline = class extends import_events.EventEmitter {
             if (remaining) {
               pushSentence(remaining);
             }
-            if (fullResponse.trim()) {
+            if (fullResponse.trim() || toolCallEmitted) {
               break;
             }
             log7.warn(`LLM produced no output (attempt ${attempt + 1}/${MAX_LLM_RETRIES + 1})`);
@@ -1520,14 +1542,19 @@ var Pipeline = class extends import_events.EventEmitter {
               }
             };
             tryPrefetch();
-            await this.synthesizeAndPlay(sentence, signal, (t) => {
-              if (!tFirstAudioPlayed) {
-                tFirstAudioPlayed = t;
-                this.setAgentState("speaking");
-              }
-              this.emit("sentence", this.cleanText(sentence), sentence);
-              tryPrefetch();
-            }, existingStream);
+            try {
+              await this.synthesizeAndPlay(sentence, signal, (t) => {
+                if (!tFirstAudioPlayed) {
+                  tFirstAudioPlayed = t;
+                  this.setAgentState("speaking");
+                }
+                this.emit("sentence", this.cleanText(sentence), sentence);
+                tryPrefetch();
+              }, existingStream);
+            } catch (ttsErr) {
+              if (ttsErr instanceof Error && ttsErr.name === "AbortError") throw ttsErr;
+              log7.warn(`TTS error for sentence (skipping): "${sentence.slice(0, 40)}"`, ttsErr);
+            }
           }
         } finally {
           if (!signal.aborted) {
@@ -1580,7 +1607,7 @@ var Pipeline = class extends import_events.EventEmitter {
       return;
     }
     this._processing = true;
-    await this._ttsWarmupPromise;
+    await Promise.all([this._ttsWarmupPromise, this._audioReadyPromise]);
     log7.info(`say(): "${text.slice(0, 60)}"`);
     try {
       const signal = this.bargeIn.startCycle();
@@ -1735,12 +1762,14 @@ var VoiceAgent = class extends import_events2.EventEmitter {
       agentName: this.config.agentName,
       nameVariants: this.config.nameVariants,
       memory: this.memory ?? void 0,
-      maxContextTokens: this.config.maxContextTokens
+      maxContextTokens: this.config.maxContextTokens,
+      tools: this.config.tools
     });
     this.pipeline.on("transcription", (result) => this.emit("transcription", result));
     this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
     this.pipeline.on("response", (text) => this.emit("response", text));
     this.pipeline.on("agentState", (state) => this.emit("agentState", state));
+    this.pipeline.on("toolCall", (tc) => this.emit("toolCall", tc));
     this.pipeline.on("error", (error) => this.emit("error", error));
     for (const participant of this.connection.room.remoteParticipants.values()) {
       for (const [, pub] of participant.trackPublications) {