npm - neoagent - Versions diffs - 2.2.0 → 2.2.1-beta.0 - Mend

neoagent 2.2.0 → 2.2.1-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/package.json +1 -1
package/server/db/database.js +35 -0
package/server/http/routes.js +1 -0
package/server/public/assets/fonts/MaterialIcons-Regular.otf +0 -0
package/server/public/flutter_bootstrap.js +1 -1
package/server/public/main.dart.js +71727 -70915
package/server/routes/widgets.js +101 -0
package/server/services/ai/engine.js +7 -2
package/server/services/ai/toolResult.js +25 -0
package/server/services/ai/tools.js +182 -0
package/server/services/manager.js +31 -0
package/server/services/scheduler/cron.js +85 -32
package/server/services/scheduler/cron_utils.js +216 -0
package/server/services/voice/bufferedLiveRelayAdapter.js +85 -17
package/server/services/voice/liveSession.js +109 -9
package/server/services/voice/providers.js +44 -18
package/server/services/voice/runtimeManager.js +75 -25
package/server/services/voice/turnRunner.js +53 -25
package/server/services/websocket.js +26 -1
package/server/services/widgets/service.js +550 -0

package/server/services/voice/runtimeManager.js CHANGED Viewed

@@ -116,6 +116,9 @@ class VoiceRuntimeManager {
   async closeSession(sessionId, reason = 'closed') {
     const session = this.getSession(sessionId);
     if (!session) return;
+    if (reason === 'socket_disconnected') {
+      await this.abortActiveRun(session.id, 'voice_disconnect');
+    }
     this.sessions.delete(session.id);
     await session.adapter?.close?.(session.id);
     await session.close(reason);
@@ -128,13 +131,14 @@ class VoiceRuntimeManager {
     session.resetTurnState();
     await session.adapter.onInputStart(session, {
       mimeType: options.mimeType,
+      turnId: options.turnId,
     });
     await session.setState('listening');
   }
   async appendInputAudio(sessionId, audioBytes, options = {}) {
     const session = this.#requireSession(sessionId);
-    await session.adapter.appendAudioChunk(session, audioBytes, options);
+    return session.adapter.appendAudioChunk(session, audioBytes, options);
   }
   async commitInput(sessionId, options = {}) {
@@ -143,7 +147,10 @@ class VoiceRuntimeManager {
       return { transcript: '' };
     }
     await session.setState('transcribing');
-    const transcript = await session.adapter.commitInput(session);
+    const transcript = await session.adapter.commitInput(session, {
+      turnId: options.turnId,
+      finalSequence: options.finalSequence,
+    });
     if (!transcript) {
       await session.setState('idle');
       return { transcript: '' };
@@ -280,7 +287,9 @@ class VoiceRuntimeManager {
       kind,
     });
-    await session.setState('speaking', { kind });
+    if (kind === 'final') {
+      await session.setState('speaking', { kind });
+    }
     const voiceOptions = normalizeVoiceSynthesisOptions({
       provider: session.voiceSettings?.liveProvider,
@@ -290,39 +299,80 @@ class VoiceRuntimeManager {
     let index = 0;
     let streamError = null;
+    const ttsAttempts = this.#buildTtsAttemptOrder(session, voiceOptions);
     try {
-      await synthesizeVoiceReplyStream(
-        content,
-        {
-          ...voiceOptions,
-          apiKey: session.voiceSettings?.liveApiKey,
-          baseUrl: session.voiceSettings?.liveBaseUrl,
-        },
-        async ({ audioBytes, mimeType }) => {
-          if (session.closed || session.interrupted) return;
-          socket.emit('voice:audio_chunk', {
-            sessionId,
-            kind,
-            index,
-            audioBase64: audioBytes.toString('base64'),
-            mimeType,
-          });
-          index += 1;
-        },
-      );
+      for (const attempt of ttsAttempts) {
+        index = 0;
+        streamError = null;
+        try {
+          await synthesizeVoiceReplyStream(
+            content,
+            attempt,
+            async ({ audioBytes, mimeType }) => {
+              if (session.closed || session.interrupted) return;
+              socket.emit('voice:audio_chunk', {
+                sessionId,
+                kind,
+                index,
+                audioBase64: audioBytes.toString('base64'),
+                mimeType,
+              });
+              index += 1;
+            },
+          );
+          streamError = null;
+          break;
+        } catch (error) {
+          streamError = String(error?.message || error || 'Voice playback failed.');
+        }
+      }
     } catch (error) {
       streamError = String(error?.message || error || 'Voice playback failed.');
+    }
+    if (!streamError && !session.closed && !session.interrupted) {
+      socket.emit('voice:audio_done', { sessionId, kind, totalChunks: index });
+    } else if (kind === 'final' && !session.closed && !session.interrupted) {
       socket.emit('voice:error', {
         sessionId,
         error: streamError,
+        recoverable: true,
+        phase: 'tts',
       });
+      await session.setState('degraded', { kind, phase: 'tts' });
     }
-    if (!streamError && !session.closed && !session.interrupted) {
-      socket.emit('voice:audio_done', { sessionId, kind, totalChunks: index });
+    if (kind === 'final' && !streamError) {
+      await session.setState('idle');
     }
+  }
-    await session.setState('idle');
+  #buildTtsAttemptOrder(session, voiceOptions) {
+    const attempts = [];
+    const providers = [
+      voiceOptions.provider,
+      ...['openai', 'deepgram', 'gemini'].filter((provider) => provider !== voiceOptions.provider),
+    ];
+    for (const provider of providers) {
+      const normalized = normalizeVoiceSynthesisOptions({
+        provider,
+        model: provider === voiceOptions.provider ? voiceOptions.model : null,
+        voice: provider === voiceOptions.provider ? voiceOptions.voice : null,
+      });
+      const runtime = provider === voiceOptions.provider
+        ? {
+            apiKey: session.voiceSettings?.liveApiKey,
+            baseUrl: session.voiceSettings?.liveBaseUrl,
+          }
+        : this.#getProviderRuntime(session.userId, provider, session.agentId);
+      attempts.push({
+        ...normalized,
+        apiKey: runtime.apiKey,
+        baseUrl: runtime.baseUrl,
+        timeoutMs: 12000,
+      });
+    }
+    return attempts;
   }
 }

package/server/services/voice/turnRunner.js CHANGED Viewed

@@ -42,21 +42,6 @@ async function runVoiceTranscriptTurn({
     model: ttsModel,
     voice: ttsVoice,
   });
-  const ttsProviderId = voiceOptions.provider === 'gemini'
-    ? 'google'
-    : voiceOptions.provider;
-  let ttsRuntime = { apiKey: '', baseUrl: '' };
-  if (ttsProviderId !== 'deepgram') {
-    try {
-      const runtime = getProviderRuntimeConfig(userId, ttsProviderId, agentId);
-      ttsRuntime = {
-        apiKey: typeof runtime.apiKey === 'string' ? runtime.apiKey.trim() : '',
-        baseUrl: typeof runtime.baseUrl === 'string' ? runtime.baseUrl.trim() : '',
-      };
-    } catch {
-      ttsRuntime = { apiKey: '', baseUrl: '' };
-    }
-  }
   const storedUserContent = transcriptText;
   const normalizedMetadata = metadata && typeof metadata === 'object' ? metadata : {};
@@ -143,15 +128,40 @@ async function runVoiceTranscriptTurn({
   let synthesized;
   let ttsError = null;
+  let providerUsed = voiceOptions.provider;
+  let modelUsed = voiceOptions.model;
+  let voiceUsed = voiceOptions.voice;
   if (synthesize !== false) {
-    try {
-      synthesized = await synthesizeVoiceReply(replyText, {
-        ...voiceOptions,
-        apiKey: ttsRuntime.apiKey,
-        baseUrl: ttsRuntime.baseUrl,
+    const attemptProviders = [
+      voiceOptions.provider,
+      ...['openai', 'deepgram', 'gemini'].filter((provider) => provider !== voiceOptions.provider),
+    ];
+    let lastTtsError = null;
+    for (const provider of attemptProviders) {
+      const normalized = normalizeVoiceSynthesisOptions({
+        provider,
+        model: provider === voiceOptions.provider ? voiceOptions.model : null,
+        voice: provider === voiceOptions.provider ? voiceOptions.voice : null,
       });
-    } catch (error) {
-      ttsError = String(error?.message || error || 'Speech synthesis failed.');
+      const runtime = resolveProviderRuntime(userId, agentId, provider);
+      try {
+        synthesized = await synthesizeVoiceReply(replyText, {
+          ...normalized,
+          apiKey: runtime.apiKey,
+          baseUrl: runtime.baseUrl,
+          timeoutMs: 12000,
+        });
+        providerUsed = normalized.provider;
+        modelUsed = normalized.model;
+        voiceUsed = normalized.voice;
+        ttsError = null;
+        break;
+      } catch (error) {
+        lastTtsError = error;
+      }
+    }
+    if (!synthesized) {
+      ttsError = String(lastTtsError?.message || lastTtsError || 'Speech synthesis failed.');
       synthesized = {
         mimeType: 'audio/mpeg',
         audioBytes: Buffer.alloc(0),
@@ -168,15 +178,33 @@ async function runVoiceTranscriptTurn({
     runId: runResult?.runId || null,
     transcript: transcriptText,
     replyText,
-    ttsProvider: voiceOptions.provider,
-    ttsModel: voiceOptions.model,
-    ttsVoice: voiceOptions.voice,
+    ttsProvider: providerUsed,
+    ttsModel: modelUsed,
+    ttsVoice: voiceUsed,
     audioMimeType: synthesized.mimeType,
     audioBase64: synthesized.audioBytes.toString('base64'),
     ttsError,
   };
 }
+function resolveProviderRuntime(userId, agentId, provider) {
+  const providerId = String(provider || '').trim().toLowerCase() === 'gemini'
+    ? 'google'
+    : String(provider || '').trim().toLowerCase();
+  if (!providerId || providerId === 'deepgram') {
+    return { apiKey: '', baseUrl: '' };
+  }
+  try {
+    const runtime = getProviderRuntimeConfig(userId, providerId, agentId);
+    return {
+      apiKey: typeof runtime.apiKey === 'string' ? runtime.apiKey.trim() : '',
+      baseUrl: typeof runtime.baseUrl === 'string' ? runtime.baseUrl.trim() : '',
+    };
+  } catch {
+    return { apiKey: '', baseUrl: '' };
+  }
+}
 module.exports = {
   runVoiceTranscriptTurn,
 };

package/server/services/websocket.js CHANGED Viewed

@@ -516,6 +516,7 @@ function setupWebSocket(io, services) {
         }
         await voiceRuntimeManager.beginInput(sessionId, {
           mimeType: toOptionalString(data?.mimeType, 128),
+          turnId: toOptionalString(data?.turnId, 128),
         });
       } catch (err) {
         console.error(`[WS] voice:input_start failed for user ${userId}:`, err);
@@ -554,8 +555,30 @@ function setupWebSocket(io, services) {
             error: `audio chunk is too large (max ${MAX_VOICE_AUDIO_CHUNK_BYTES} bytes)`,
           });
         }
-        await voiceRuntimeManager.appendInputAudio(sessionId, audioBytes, {
+        const turnId = toOptionalString(data?.turnId, 128);
+        const sequence = toBoundedInt(data?.sequence, -1, -1, 1_000_000);
+        if (!turnId) {
+          return socket.emit('voice:error', {
+            sessionId,
+            error: 'turnId is required',
+          });
+        }
+        if (sequence < 0) {
+          return socket.emit('voice:error', {
+            sessionId,
+            error: 'sequence is required',
+          });
+        }
+        const appendResult = await voiceRuntimeManager.appendInputAudio(sessionId, audioBytes, {
           mimeType: toOptionalString(data?.mimeType, 128),
+          turnId,
+          sequence,
+        });
+        socket.emit('voice:chunk_ack', {
+          sessionId,
+          turnId,
+          sequence,
+          receivedThrough: appendResult?.receivedThrough ?? sequence,
         });
       } catch (err) {
         console.error(`[WS] voice:audio_chunk failed for user ${userId}:`, err);
@@ -618,6 +641,8 @@ function setupWebSocket(io, services) {
         }
         await voiceRuntimeManager.commitInput(sessionId, {
+          turnId: toOptionalString(data?.turnId, 128),
+          finalSequence: toBoundedInt(data?.finalSequence, -1, -1, 1_000_000),
           promptHint: toOptionalString(data?.promptHint, 2000),
           metadata,
         });