npm - @dtelecom/agents-js - Versions diffs - 0.1.15 → 0.1.16 - Mend

@dtelecom/agents-js 0.1.15 → 0.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/index.d.mts +12 -0
package/dist/index.d.ts +12 -0
package/dist/index.js +79 -53
package/dist/index.js.map +1 -1
package/dist/index.mjs +79 -53
package/dist/index.mjs.map +1 -1
package/dist/providers/index.d.mts +3 -3
package/dist/providers/index.d.ts +3 -3
package/dist/providers/index.js +92 -46
package/dist/providers/index.js.map +1 -1
package/dist/providers/index.mjs +92 -46
package/dist/providers/index.mjs.map +1 -1
package/package.json +1 -2

package/dist/index.d.mts CHANGED Viewed

@@ -23,6 +23,14 @@ declare class VoiceAgent extends EventEmitter {
      * Use for greetings or announcements. Supports barge-in.
      */
     say(text: string): Promise<void>;
+    /**
+     * Switch STT language on all active streams.
+     * Use for bilingual lessons — e.g. switch to 'es' with forceWhisper when
+     * expecting Spanish, back to 'auto' for Parakeet auto-detect otherwise.
+     */
+    setSTTLanguage(language: string, options?: {
+        forceWhisper?: boolean;
+    }): void;
     /** Start the agent — connect to room and begin listening. */
     start(options: AgentStartOptions): Promise<void>;
     /** Stop the agent — disconnect and clean up. */
@@ -124,6 +132,10 @@ declare class Pipeline extends EventEmitter {
     removeParticipant(identity: string): Promise<void>;
     stop(): Promise<void>;
     getContextManager(): ContextManager;
+    /** Switch STT language on all active streams (e.g. for bilingual lessons). */
+    setSTTLanguage(language: string, options?: {
+        forceWhisper?: boolean;
+    }): void;
     private lastFinalAt;
     private lastSttDuration;
     private handleTranscription;

package/dist/index.d.ts CHANGED Viewed

@@ -23,6 +23,14 @@ declare class VoiceAgent extends EventEmitter {
      * Use for greetings or announcements. Supports barge-in.
      */
     say(text: string): Promise<void>;
+    /**
+     * Switch STT language on all active streams.
+     * Use for bilingual lessons — e.g. switch to 'es' with forceWhisper when
+     * expecting Spanish, back to 'auto' for Parakeet auto-detect otherwise.
+     */
+    setSTTLanguage(language: string, options?: {
+        forceWhisper?: boolean;
+    }): void;
     /** Start the agent — connect to room and begin listening. */
     start(options: AgentStartOptions): Promise<void>;
     /** Stop the agent — disconnect and clean up. */
@@ -124,6 +132,10 @@ declare class Pipeline extends EventEmitter {
     removeParticipant(identity: string): Promise<void>;
     stop(): Promise<void>;
     getContextManager(): ContextManager;
+    /** Switch STT language on all active streams (e.g. for bilingual lessons). */
+    setSTTLanguage(language: string, options?: {
+        forceWhisper?: boolean;
+    }): void;
     private lastFinalAt;
     private lastSttDuration;
     private handleTranscription;

package/dist/index.js CHANGED Viewed

@@ -1274,11 +1274,20 @@ var Pipeline = class extends import_events.EventEmitter {
   getContextManager() {
     return this.context;
   }
+  /** Switch STT language on all active streams (e.g. for bilingual lessons). */
+  setSTTLanguage(language, options) {
+    for (const [identity, stream] of this.sttStreams) {
+      if (stream.setLanguage) {
+        stream.setLanguage(language, options);
+        log7.info(`STT language \u2192 ${language}${options?.forceWhisper ? " (whisper)" : ""} for "${identity}"`);
+      }
+    }
+  }
   lastFinalAt = 0;
   lastSttDuration = 0;
   async handleTranscription(speaker, result) {
     this.emit("transcription", { ...result, speaker });
-    if (!result.isFinal && result.text.trim()) {
+    if (!result.isFinal) {
       this.setAgentState("listening");
     }
     if (this.audioOutput.playing && result.text.trim().length > 0) {
@@ -1363,60 +1372,69 @@ var Pipeline = class extends import_events.EventEmitter {
         sentenceQueue.push(text2);
         wake();
       };
+      const MAX_LLM_RETRIES = 2;
       const producer = async () => {
-        let isFirstChunk = true;
         const defaultLang = this.tts?.defaultLanguage;
-        const segBuf = [];
-        const flushSegments = () => {
-          if (segBuf.length === 0) return;
-          const combined = segBuf.map(
-            (s) => s.lang !== defaultLang ? `<lang xml:lang="${s.lang}">${s.text}</lang>` : s.text
-          ).join(" ");
-          segBuf.length = 0;
-          pushSentence(combined);
-        };
-        const llmStream = this.llm.chat(messages, signal);
-        try {
-          while (!signal.aborted) {
-            const { value: chunk, done } = await llmStream.next();
-            if (done || !chunk) break;
-            if (signal.aborted) break;
-            if (chunk.type === "segment" && chunk.segment) {
-              if (isFirstChunk) {
-                tLlmFirstToken = performance.now();
-                isFirstChunk = false;
-                log7.info(`llm_first_segment: ${(tLlmFirstToken - tSpeechEnd).toFixed(0)}ms`);
-              }
-              if (fullResponse) fullResponse += " ";
-              fullResponse += chunk.segment.text;
-              segBuf.push(chunk.segment);
-              if (/[.!?]["'»)]*\s*$/.test(chunk.segment.text)) {
-                flushSegments();
-              }
-            } else if (chunk.type === "token" && chunk.token) {
-              if (isFirstChunk) {
-                tLlmFirstToken = performance.now();
-                isFirstChunk = false;
-                log7.info(`llm_first_token: ${(tLlmFirstToken - tSpeechEnd).toFixed(0)}ms`);
-              }
-              fullResponse += chunk.token;
-              const sentences = this.splitter.push(chunk.token);
-              for (const sentence of sentences) {
-                pushSentence(sentence);
+        for (let attempt = 0; attempt <= MAX_LLM_RETRIES; attempt++) {
+          if (signal.aborted) break;
+          if (attempt > 0) {
+            log7.warn(`LLM retry ${attempt}/${MAX_LLM_RETRIES}...`);
+            this.splitter.reset();
+          }
+          let isFirstChunk = true;
+          const segBuf = [];
+          const flushSegments = () => {
+            if (segBuf.length === 0) return;
+            const combined = segBuf.map(
+              (s) => s.lang !== defaultLang ? `<lang xml:lang="${s.lang}">${s.text}</lang>` : s.text
+            ).join(" ");
+            segBuf.length = 0;
+            pushSentence(combined);
+          };
+          const llmStream = this.llm.chat(messages, signal);
+          try {
+            while (!signal.aborted) {
+              const { value: chunk, done } = await llmStream.next();
+              if (done || !chunk) break;
+              if (signal.aborted) break;
+              if (chunk.type === "segment" && chunk.segment) {
+                if (isFirstChunk) {
+                  tLlmFirstToken = performance.now();
+                  isFirstChunk = false;
+                  log7.info(`llm_first_segment: ${(tLlmFirstToken - tSpeechEnd).toFixed(0)}ms`);
+                }
+                if (fullResponse) fullResponse += " ";
+                fullResponse += chunk.segment.text;
+                segBuf.push(chunk.segment);
+                if (/[.!?]["'»)]*\s*$/.test(chunk.segment.text)) {
+                  flushSegments();
+                }
+              } else if (chunk.type === "token" && chunk.token) {
+                if (isFirstChunk) {
+                  tLlmFirstToken = performance.now();
+                  isFirstChunk = false;
+                  log7.info(`llm_first_token: ${(tLlmFirstToken - tSpeechEnd).toFixed(0)}ms`);
+                }
+                fullResponse += chunk.token;
+                const sentences = this.splitter.push(chunk.token);
+                for (const sentence of sentences) {
+                  pushSentence(sentence);
+                }
               }
             }
+          } finally {
+            await llmStream.return(void 0);
           }
-        } finally {
-          await llmStream.return(void 0);
-        }
-        if (!signal.aborted) {
-          flushSegments();
-          const remaining = this.splitter.flush();
-          if (remaining) {
-            pushSentence(remaining);
-          }
-          if (!fullResponse.trim()) {
-            log7.warn("LLM produced no output (empty response or no segments detected)");
+          if (!signal.aborted) {
+            flushSegments();
+            const remaining = this.splitter.flush();
+            if (remaining) {
+              pushSentence(remaining);
+            }
+            if (fullResponse.trim()) {
+              break;
+            }
+            log7.warn(`LLM produced no output (attempt ${attempt + 1}/${MAX_LLM_RETRIES + 1})`);
           }
         }
         producerDone = true;
@@ -1438,7 +1456,7 @@ var Pipeline = class extends import_events.EventEmitter {
                   tFirstAudioPlayed = t;
                   this.setAgentState("speaking");
                 }
-                this.emit("sentence", this.cleanText(sentence));
+                this.emit("sentence", this.cleanText(sentence), sentence);
               });
               continue;
             }
@@ -1507,7 +1525,7 @@ var Pipeline = class extends import_events.EventEmitter {
       this.setAgentState("thinking");
       await this.synthesizeAndPlay(text, signal, () => {
         this.setAgentState("speaking");
-        this.emit("sentence", this.cleanText(text));
+        this.emit("sentence", this.cleanText(text), text);
       });
       if (!signal.aborted) {
         await this.audioOutput.writeSilence(40);
@@ -1606,6 +1624,14 @@ var VoiceAgent = class extends import_events2.EventEmitter {
     }
     await this.pipeline.say(text);
   }
+  /**
+   * Switch STT language on all active streams.
+   * Use for bilingual lessons — e.g. switch to 'es' with forceWhisper when
+   * expecting Spanish, back to 'auto' for Parakeet auto-detect otherwise.
+   */
+  setSTTLanguage(language, options) {
+    this.pipeline?.setSTTLanguage(language, options);
+  }
   /** Start the agent — connect to room and begin listening. */
   async start(options) {
     if (this._running) {
@@ -1647,7 +1673,7 @@ var VoiceAgent = class extends import_events2.EventEmitter {
       maxContextTokens: this.config.maxContextTokens
     });
     this.pipeline.on("transcription", (result) => this.emit("transcription", result));
-    this.pipeline.on("sentence", (text) => this.emit("sentence", text));
+    this.pipeline.on("sentence", (text, raw) => this.emit("sentence", text, raw));
     this.pipeline.on("response", (text) => this.emit("response", text));
     this.pipeline.on("agentState", (state) => this.emit("agentState", state));
     this.pipeline.on("error", (error) => this.emit("error", error));