npm - @mastra/voice-google-gemini-live - Versions diffs - 0.0.0-remove-unused-model-providers-api-20251030210744 → 0.0.0-safe-stringify-telemetry-20251205024938 - Mend

@mastra/voice-google-gemini-live 0.0.0-remove-unused-model-providers-api-20251030210744 → 0.0.0-safe-stringify-telemetry-20251205024938

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.js CHANGED Viewed

@@ -1260,7 +1260,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
   sessionDurationTimeout;
   // Tool integration properties
   tools;
-  requestContext;
+  runtimeContext;
   // Store the configuration options
   options;
   /**
@@ -1497,67 +1497,70 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
   /**
    * Establish connection to the Gemini Live API
    */
-  async connect({ requestContext } = {}) {
-    if (this.state === "connected") {
-      this.log("Already connected to Gemini Live API");
-      return;
-    }
-    this.requestContext = requestContext;
-    this.emit("session", { state: "connecting" });
-    try {
-      let wsUrl;
-      let headers = {};
-      if (this.options.vertexAI) {
-        wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
-        await this.authManager.initialize();
-        const accessToken = await this.authManager.getAccessToken();
-        headers = { headers: { Authorization: `Bearer ${accessToken}` } };
-        this.log("Using Vertex AI authentication with OAuth token");
-      } else {
-        wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
-        headers = {
-          headers: {
-            "x-goog-api-key": this.options.apiKey || "",
-            "Content-Type": "application/json"
-          }
-        };
-        this.log("Using Live API authentication with API key");
-      }
-      this.log("Connecting to:", wsUrl);
-      this.ws = new WebSocket(wsUrl, void 0, headers);
-      this.connectionManager.setWebSocket(this.ws);
-      this.setupEventListeners();
-      await this.connectionManager.waitForOpen();
-      if (this.isResuming && this.sessionHandle) {
-        await this.sendSessionResumption();
-      } else {
-        this.sendInitialConfig();
-        this.sessionStartTime = Date.now();
-        this.sessionId = randomUUID();
+  async connect({ runtimeContext } = {}) {
+    return this.traced(async () => {
+      if (this.state === "connected") {
+        this.log("Already connected to Gemini Live API");
+        return;
       }
-      await this.waitForSessionCreated();
-      this.state = "connected";
-      this.emit("session", {
-        state: "connected",
-        config: {
+      this.runtimeContext = runtimeContext;
+      this.emit("session", { state: "connecting" });
+      try {
+        let wsUrl;
+        let headers = {};
+        if (this.options.vertexAI) {
+          const location = this.getVertexLocation();
+          wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
+          await this.authManager.initialize();
+          const accessToken = await this.authManager.getAccessToken();
+          headers = { headers: { Authorization: `Bearer ${accessToken}` } };
+          this.log("Using Vertex AI authentication with OAuth token");
+        } else {
+          wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
+          headers = {
+            headers: {
+              "x-goog-api-key": this.options.apiKey || "",
+              "Content-Type": "application/json"
+            }
+          };
+          this.log("Using Live API authentication with API key");
+        }
+        this.log("Connecting to:", wsUrl);
+        this.ws = new WebSocket(wsUrl, void 0, headers);
+        this.connectionManager.setWebSocket(this.ws);
+        this.setupEventListeners();
+        await this.connectionManager.waitForOpen();
+        if (this.isResuming && this.sessionHandle) {
+          await this.sendSessionResumption();
+        } else {
+          this.sendInitialConfig();
+          this.sessionStartTime = Date.now();
+          this.sessionId = randomUUID();
+        }
+        await this.waitForSessionCreated();
+        this.state = "connected";
+        this.emit("session", {
+          state: "connected",
+          config: {
+            sessionId: this.sessionId,
+            isResuming: this.isResuming,
+            toolCount: Object.keys(this.tools || {}).length
+          }
+        });
+        this.log("Successfully connected to Gemini Live API", {
           sessionId: this.sessionId,
           isResuming: this.isResuming,
           toolCount: Object.keys(this.tools || {}).length
+        });
+        if (this.options.sessionConfig?.maxDuration) {
+          this.startSessionDurationMonitor();
         }
-      });
-      this.log("Successfully connected to Gemini Live API", {
-        sessionId: this.sessionId,
-        isResuming: this.isResuming,
-        toolCount: Object.keys(this.tools || {}).length
-      });
-      if (this.options.sessionConfig?.maxDuration) {
-        this.startSessionDurationMonitor();
+      } catch (error) {
+        this.state = "disconnected";
+        this.log("Connection failed", error);
+        throw error;
       }
-    } catch (error) {
-      this.state = "disconnected";
-      this.log("Connection failed", error);
-      throw error;
-    }
+    }, "gemini-live.connect")();
   }
   /**
    * Disconnect from the Gemini Live API
@@ -1595,164 +1598,172 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
    * Send text to be converted to speech
    */
   async speak(input, options) {
-    this.validateConnectionState();
-    if (typeof input !== "string") {
-      const chunks = [];
-      for await (const chunk of input) {
-        chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
+    return this.traced(async () => {
+      this.validateConnectionState();
+      if (typeof input !== "string") {
+        const chunks = [];
+        for await (const chunk of input) {
+          chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
+        }
+        input = Buffer.concat(chunks).toString("utf-8");
       }
-      input = Buffer.concat(chunks).toString("utf-8");
-    }
-    if (input.trim().length === 0) {
-      throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
-    }
-    this.addToContext("user", input);
-    const textMessage = {
-      client_content: {
-        turns: [
-          {
-            role: "user",
-            parts: [
-              {
-                text: input
-              }
-            ]
-          }
-        ],
-        turnComplete: true
+      if (input.trim().length === 0) {
+        throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
       }
-    };
-    if (options && (options.speaker || options.languageCode || options.responseModalities)) {
-      const updateMessage = {
-        type: "session.update",
-        session: {
-          generation_config: {
-            ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
-            speech_config: {
-              ...options.languageCode ? { language_code: options.languageCode } : {},
-              ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
+      this.addToContext("user", input);
+      const textMessage = {
+        client_content: {
+          turns: [
+            {
+              role: "user",
+              parts: [
+                {
+                  text: input
+                }
+              ]
             }
-          }
+          ],
+          turnComplete: true
         }
       };
+      if (options && (options.speaker || options.languageCode || options.responseModalities)) {
+        const updateMessage = {
+          type: "session.update",
+          session: {
+            generation_config: {
+              ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
+              speech_config: {
+                ...options.languageCode ? { language_code: options.languageCode } : {},
+                ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
+              }
+            }
+          }
+        };
+        try {
+          this.sendEvent("session.update", updateMessage);
+          this.log("Applied per-turn runtime options", options);
+        } catch (error) {
+          this.log("Failed to apply per-turn runtime options", error);
+        }
+      }
       try {
-        this.sendEvent("session.update", updateMessage);
-        this.log("Applied per-turn runtime options", options);
+        this.sendEvent("client_content", textMessage);
+        this.log("Text message sent", { text: input });
       } catch (error) {
-        this.log("Failed to apply per-turn runtime options", error);
+        this.log("Failed to send text message", error);
+        throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
       }
-    }
-    try {
-      this.sendEvent("client_content", textMessage);
-      this.log("Text message sent", { text: input });
-    } catch (error) {
-      this.log("Failed to send text message", error);
-      throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
-    }
+    }, "gemini-live.speak")();
   }
   /**
    * Send audio stream for processing
    */
   async send(audioData) {
-    this.validateConnectionState();
-    if ("readable" in audioData && typeof audioData.on === "function") {
-      const stream = audioData;
-      stream.on("data", (chunk) => {
-        try {
-          const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
-          const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
-          this.sendEvent("realtime_input", message);
-        } catch (error) {
-          this.log("Failed to process audio chunk", error);
-          this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
-        }
-      });
-      stream.on("error", (error) => {
-        this.log("Audio stream error", error);
-        this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
-      });
-      stream.on("end", () => {
-        this.log("Audio stream ended");
-      });
-    } else {
-      const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
-      const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
-      const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
-      this.sendEvent("realtime_input", message);
-    }
+    return this.traced(async () => {
+      this.validateConnectionState();
+      if ("readable" in audioData && typeof audioData.on === "function") {
+        const stream = audioData;
+        stream.on("data", (chunk) => {
+          try {
+            const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
+            const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
+            this.sendEvent("realtime_input", message);
+          } catch (error) {
+            this.log("Failed to process audio chunk", error);
+            this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
+          }
+        });
+        stream.on("error", (error) => {
+          this.log("Audio stream error", error);
+          this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
+        });
+        stream.on("end", () => {
+          this.log("Audio stream ended");
+        });
+      } else {
+        const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
+        const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
+        const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
+        this.sendEvent("realtime_input", message);
+      }
+    }, "gemini-live.send")();
   }
   /**
    * Process speech from audio stream (traditional STT interface)
    */
   async listen(audioStream, _options) {
-    this.validateConnectionState();
-    let transcriptionText = "";
-    const onWriting = (data) => {
-      if (data.role === "user") {
-        transcriptionText += data.text;
-        this.log("Received transcription text:", { text: data.text, total: transcriptionText });
-      }
-    };
-    const onError = (error) => {
-      throw new Error(`Transcription failed: ${error.message}`);
-    };
-    const onSession = (data) => {
-      if (data.state === "disconnected") {
-        throw new Error("Session disconnected during transcription");
-      }
-    };
-    this.on("writing", onWriting);
-    this.on("error", onError);
-    this.on("session", onSession);
-    try {
-      const result = await this.audioStreamManager.handleAudioTranscription(
-        audioStream,
-        (base64Audio) => {
-          return new Promise((resolve, reject) => {
-            try {
-              const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
-              const cleanup = () => {
-                this.off("turnComplete", onTurnComplete);
-                this.off("error", onErr);
-              };
-              const onTurnComplete = () => {
-                cleanup();
-                resolve(transcriptionText.trim());
-              };
-              const onErr = (e) => {
-                cleanup();
-                reject(new Error(e.message));
-              };
-              this.on("turnComplete", onTurnComplete);
-              this.on("error", onErr);
-              this.sendEvent("client_content", message);
-              this.log("Sent audio for transcription");
-            } catch (err) {
-              reject(err);
-            }
-          });
-        },
-        (error) => {
-          this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
+    return this.traced(async () => {
+      this.validateConnectionState();
+      let transcriptionText = "";
+      const onWriting = (data) => {
+        if (data.role === "user") {
+          transcriptionText += data.text;
+          this.log("Received transcription text:", { text: data.text, total: transcriptionText });
         }
-      );
-      return result;
-    } finally {
-      this.off("writing", onWriting);
-      this.off("error", onError);
-      this.off("session", onSession);
-    }
+      };
+      const onError = (error) => {
+        throw new Error(`Transcription failed: ${error.message}`);
+      };
+      const onSession = (data) => {
+        if (data.state === "disconnected") {
+          throw new Error("Session disconnected during transcription");
+        }
+      };
+      this.on("writing", onWriting);
+      this.on("error", onError);
+      this.on("session", onSession);
+      try {
+        const result = await this.audioStreamManager.handleAudioTranscription(
+          audioStream,
+          (base64Audio) => {
+            return new Promise((resolve, reject) => {
+              try {
+                const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
+                const cleanup = () => {
+                  this.off("turnComplete", onTurnComplete);
+                  this.off("error", onErr);
+                };
+                const onTurnComplete = () => {
+                  cleanup();
+                  resolve(transcriptionText.trim());
+                };
+                const onErr = (e) => {
+                  cleanup();
+                  reject(new Error(e.message));
+                };
+                this.on("turnComplete", onTurnComplete);
+                this.on("error", onErr);
+                this.sendEvent("client_content", message);
+                this.log("Sent audio for transcription");
+              } catch (err) {
+                reject(err);
+              }
+            });
+          },
+          (error) => {
+            this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
+          }
+        );
+        return result;
+      } finally {
+        this.off("writing", onWriting);
+        this.off("error", onError);
+        this.off("session", onSession);
+      }
+    }, "gemini-live.listen")();
   }
   /**
    * Get available speakers/voices
    */
   async getSpeakers() {
-    return [
-      { voiceId: "Puck", description: "Conversational, friendly" },
-      { voiceId: "Charon", description: "Deep, authoritative" },
-      { voiceId: "Kore", description: "Neutral, professional" },
-      { voiceId: "Fenrir", description: "Warm, approachable" }
-    ];
+    return this.traced(async () => {
+      return [
+        { voiceId: "Puck", description: "Conversational, friendly" },
+        { voiceId: "Charon", description: "Deep, authoritative" },
+        { voiceId: "Kore", description: "Neutral, professional" },
+        { voiceId: "Fenrir", description: "Warm, approachable" }
+      ];
+    }, "gemini-live.getSpeakers")();
   }
   /**
    * Resume a previous session using a session handle
@@ -2257,6 +2268,18 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
             role: "assistant"
           });
         }
+        if (part.functionCall) {
+          this.log("Found function call in serverContent.modelTurn.parts", part.functionCall);
+          const toolCallData = {
+            toolCall: {
+              name: part.functionCall.name,
+              args: part.functionCall.args || {},
+              id: part.functionCall.id || randomUUID()
+            }
+          };
+          void this.handleToolCall(toolCallData);
+          continue;
+        }
         if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
           try {
             const audioData = part.inlineData.data;
@@ -2331,9 +2354,24 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
     if (!data.toolCall) {
       return;
     }
-    const toolName = data.toolCall.name || "";
-    const toolArgs = data.toolCall.args || {};
-    const toolId = data.toolCall.id || randomUUID();
+    let toolCalls = [];
+    if (data.toolCall.functionCalls && Array.isArray(data.toolCall.functionCalls)) {
+      toolCalls = data.toolCall.functionCalls;
+    } else if (data.toolCall.name) {
+      toolCalls = [{ name: data.toolCall.name, args: data.toolCall.args, id: data.toolCall.id }];
+    }
+    for (const toolCall of toolCalls) {
+      const toolName = toolCall.name || "";
+      const toolArgs = toolCall.args || {};
+      const toolId = toolCall.id || randomUUID();
+      await this.processSingleToolCall(toolName, toolArgs, toolId);
+    }
+  }
+  /**
+   * Process a single tool call
+   * @private
+   */
+  async processSingleToolCall(toolName, toolArgs, toolId) {
     this.log("Processing tool call", { toolName, toolArgs, toolId });
     this.emit("toolCall", {
       name: toolName,
@@ -2354,7 +2392,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
       if (tool.execute) {
         this.log("Executing tool", { toolName, toolArgs });
         result = await tool.execute(
-          { context: toolArgs, requestContext: this.requestContext },
+          { context: toolArgs, runtimeContext: this.runtimeContext },
           {
             toolCallId: toolId,
             messages: []
@@ -2366,23 +2404,31 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
         result = { error: "Tool has no execute function" };
       }
       const toolResultMessage = {
-        tool_result: {
-          tool_call_id: toolId,
-          result
+        toolResponse: {
+          functionResponses: [
+            {
+              id: toolId,
+              response: result
+            }
+          ]
         }
       };
-      this.sendEvent("tool_result", toolResultMessage);
+      this.sendEvent("toolResponse", toolResultMessage);
       this.log("Tool result sent", { toolName, toolId, result });
     } catch (error) {
       const errorMessage = error instanceof Error ? error.message : "Unknown error";
       this.log("Tool execution failed", { toolName, error: errorMessage });
       const errorResultMessage = {
-        tool_result: {
-          tool_call_id: toolId,
-          result: { error: errorMessage }
+        toolResponse: {
+          functionResponses: [
+            {
+              id: toolId,
+              response: { error: errorMessage }
+            }
+          ]
         }
       };
-      this.sendEvent("tool_result", errorResultMessage);
+      this.sendEvent("toolResponse", errorResultMessage);
       this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
         toolName,
         toolArgs,
@@ -2442,6 +2488,31 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
     }
     return "text";
   }
+  /**
+   * Resolve Vertex AI location with sensible default
+   * @private
+   */
+  getVertexLocation() {
+    return this.options.location?.trim() || "us-central1";
+  }
+  /**
+   * Resolve the correct model identifier for Gemini API or Vertex AI
+   * @private
+   */
+  resolveModelIdentifier() {
+    const model = this.options.model ?? DEFAULT_MODEL;
+    if (!this.options.vertexAI) {
+      return `models/${model}`;
+    }
+    if (!this.options.project) {
+      throw this.createAndEmitError(
+        "project_id_missing" /* PROJECT_ID_MISSING */,
+        "Google Cloud project ID is required when using Vertex AI."
+      );
+    }
+    const location = this.getVertexLocation();
+    return `projects/${this.options.project}/locations/${location}/publishers/google/models/${model}`;
+  }
   /**
    * Send initial configuration to Gemini Live API
    * @private
@@ -2452,7 +2523,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
     }
     const setupMessage = {
       setup: {
-        model: `models/${this.options.model}`
+        model: this.resolveModelIdentifier()
       }
     };
     if (this.options.instructions) {
@@ -2601,6 +2672,8 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
       message = data;
     } else if (type === "realtime_input" && data.realtime_input) {
       message = data;
+    } else if (type === "toolResponse" && data.toolResponse) {
+      message = data;
     } else if (type === "session.update" && data.session) {
       message = data;
     } else {
@@ -2651,7 +2724,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
    * Get the current tools configured for this voice instance
    * @returns Object containing the current tools
    */
-  listTools() {
+  getTools() {
     return this.tools;
   }
   log(message, ...args) {