npm - @mastra/voice-google-gemini-live - Versions diffs - 0.11.0-beta.1 → 0.11.1-alpha.0 - Mend

@mastra/voice-google-gemini-live 0.11.0-beta.1 → 0.11.1-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/index.cjs CHANGED Viewed

@@ -1262,7 +1262,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
   sessionDurationTimeout;
   // Tool integration properties
   tools;
-  requestContext;
+  runtimeContext;
   // Store the configuration options
   options;
   /**
@@ -1499,67 +1499,70 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
   /**
    * Establish connection to the Gemini Live API
    */
-  async connect({ requestContext } = {}) {
-    if (this.state === "connected") {
-      this.log("Already connected to Gemini Live API");
-      return;
-    }
-    this.requestContext = requestContext;
-    this.emit("session", { state: "connecting" });
-    try {
-      let wsUrl;
-      let headers = {};
-      if (this.options.vertexAI) {
-        wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
-        await this.authManager.initialize();
-        const accessToken = await this.authManager.getAccessToken();
-        headers = { headers: { Authorization: `Bearer ${accessToken}` } };
-        this.log("Using Vertex AI authentication with OAuth token");
-      } else {
-        wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
-        headers = {
-          headers: {
-            "x-goog-api-key": this.options.apiKey || "",
-            "Content-Type": "application/json"
-          }
-        };
-        this.log("Using Live API authentication with API key");
-      }
-      this.log("Connecting to:", wsUrl);
-      this.ws = new ws.WebSocket(wsUrl, void 0, headers);
-      this.connectionManager.setWebSocket(this.ws);
-      this.setupEventListeners();
-      await this.connectionManager.waitForOpen();
-      if (this.isResuming && this.sessionHandle) {
-        await this.sendSessionResumption();
-      } else {
-        this.sendInitialConfig();
-        this.sessionStartTime = Date.now();
-        this.sessionId = crypto.randomUUID();
+  async connect({ runtimeContext } = {}) {
+    return this.traced(async () => {
+      if (this.state === "connected") {
+        this.log("Already connected to Gemini Live API");
+        return;
       }
-      await this.waitForSessionCreated();
-      this.state = "connected";
-      this.emit("session", {
-        state: "connected",
-        config: {
+      this.runtimeContext = runtimeContext;
+      this.emit("session", { state: "connecting" });
+      try {
+        let wsUrl;
+        let headers = {};
+        if (this.options.vertexAI) {
+          const location = this.getVertexLocation();
+          wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
+          await this.authManager.initialize();
+          const accessToken = await this.authManager.getAccessToken();
+          headers = { headers: { Authorization: `Bearer ${accessToken}` } };
+          this.log("Using Vertex AI authentication with OAuth token");
+        } else {
+          wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
+          headers = {
+            headers: {
+              "x-goog-api-key": this.options.apiKey || "",
+              "Content-Type": "application/json"
+            }
+          };
+          this.log("Using Live API authentication with API key");
+        }
+        this.log("Connecting to:", wsUrl);
+        this.ws = new ws.WebSocket(wsUrl, void 0, headers);
+        this.connectionManager.setWebSocket(this.ws);
+        this.setupEventListeners();
+        await this.connectionManager.waitForOpen();
+        if (this.isResuming && this.sessionHandle) {
+          await this.sendSessionResumption();
+        } else {
+          this.sendInitialConfig();
+          this.sessionStartTime = Date.now();
+          this.sessionId = crypto.randomUUID();
+        }
+        await this.waitForSessionCreated();
+        this.state = "connected";
+        this.emit("session", {
+          state: "connected",
+          config: {
+            sessionId: this.sessionId,
+            isResuming: this.isResuming,
+            toolCount: Object.keys(this.tools || {}).length
+          }
+        });
+        this.log("Successfully connected to Gemini Live API", {
           sessionId: this.sessionId,
           isResuming: this.isResuming,
           toolCount: Object.keys(this.tools || {}).length
+        });
+        if (this.options.sessionConfig?.maxDuration) {
+          this.startSessionDurationMonitor();
         }
-      });
-      this.log("Successfully connected to Gemini Live API", {
-        sessionId: this.sessionId,
-        isResuming: this.isResuming,
-        toolCount: Object.keys(this.tools || {}).length
-      });
-      if (this.options.sessionConfig?.maxDuration) {
-        this.startSessionDurationMonitor();
+      } catch (error) {
+        this.state = "disconnected";
+        this.log("Connection failed", error);
+        throw error;
       }
-    } catch (error) {
-      this.state = "disconnected";
-      this.log("Connection failed", error);
-      throw error;
-    }
+    }, "gemini-live.connect")();
   }
   /**
    * Disconnect from the Gemini Live API
@@ -1597,164 +1600,172 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
    * Send text to be converted to speech
    */
   async speak(input, options) {
-    this.validateConnectionState();
-    if (typeof input !== "string") {
-      const chunks = [];
-      for await (const chunk of input) {
-        chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
+    return this.traced(async () => {
+      this.validateConnectionState();
+      if (typeof input !== "string") {
+        const chunks = [];
+        for await (const chunk of input) {
+          chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
+        }
+        input = Buffer.concat(chunks).toString("utf-8");
       }
-      input = Buffer.concat(chunks).toString("utf-8");
-    }
-    if (input.trim().length === 0) {
-      throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
-    }
-    this.addToContext("user", input);
-    const textMessage = {
-      client_content: {
-        turns: [
-          {
-            role: "user",
-            parts: [
-              {
-                text: input
-              }
-            ]
-          }
-        ],
-        turnComplete: true
+      if (input.trim().length === 0) {
+        throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
       }
-    };
-    if (options && (options.speaker || options.languageCode || options.responseModalities)) {
-      const updateMessage = {
-        type: "session.update",
-        session: {
-          generation_config: {
-            ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
-            speech_config: {
-              ...options.languageCode ? { language_code: options.languageCode } : {},
-              ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
+      this.addToContext("user", input);
+      const textMessage = {
+        client_content: {
+          turns: [
+            {
+              role: "user",
+              parts: [
+                {
+                  text: input
+                }
+              ]
             }
-          }
+          ],
+          turnComplete: true
         }
       };
+      if (options && (options.speaker || options.languageCode || options.responseModalities)) {
+        const updateMessage = {
+          type: "session.update",
+          session: {
+            generation_config: {
+              ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
+              speech_config: {
+                ...options.languageCode ? { language_code: options.languageCode } : {},
+                ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
+              }
+            }
+          }
+        };
+        try {
+          this.sendEvent("session.update", updateMessage);
+          this.log("Applied per-turn runtime options", options);
+        } catch (error) {
+          this.log("Failed to apply per-turn runtime options", error);
+        }
+      }
       try {
-        this.sendEvent("session.update", updateMessage);
-        this.log("Applied per-turn runtime options", options);
+        this.sendEvent("client_content", textMessage);
+        this.log("Text message sent", { text: input });
       } catch (error) {
-        this.log("Failed to apply per-turn runtime options", error);
+        this.log("Failed to send text message", error);
+        throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
       }
-    }
-    try {
-      this.sendEvent("client_content", textMessage);
-      this.log("Text message sent", { text: input });
-    } catch (error) {
-      this.log("Failed to send text message", error);
-      throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
-    }
+    }, "gemini-live.speak")();
   }
   /**
    * Send audio stream for processing
    */
   async send(audioData) {
-    this.validateConnectionState();
-    if ("readable" in audioData && typeof audioData.on === "function") {
-      const stream = audioData;
-      stream.on("data", (chunk) => {
-        try {
-          const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
-          const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
-          this.sendEvent("realtime_input", message);
-        } catch (error) {
-          this.log("Failed to process audio chunk", error);
-          this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
-        }
-      });
-      stream.on("error", (error) => {
-        this.log("Audio stream error", error);
-        this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
-      });
-      stream.on("end", () => {
-        this.log("Audio stream ended");
-      });
-    } else {
-      const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
-      const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
-      const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
-      this.sendEvent("realtime_input", message);
-    }
+    return this.traced(async () => {
+      this.validateConnectionState();
+      if ("readable" in audioData && typeof audioData.on === "function") {
+        const stream = audioData;
+        stream.on("data", (chunk) => {
+          try {
+            const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
+            const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
+            this.sendEvent("realtime_input", message);
+          } catch (error) {
+            this.log("Failed to process audio chunk", error);
+            this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
+          }
+        });
+        stream.on("error", (error) => {
+          this.log("Audio stream error", error);
+          this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
+        });
+        stream.on("end", () => {
+          this.log("Audio stream ended");
+        });
+      } else {
+        const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
+        const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
+        const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
+        this.sendEvent("realtime_input", message);
+      }
+    }, "gemini-live.send")();
   }
   /**
    * Process speech from audio stream (traditional STT interface)
    */
   async listen(audioStream, _options) {
-    this.validateConnectionState();
-    let transcriptionText = "";
-    const onWriting = (data) => {
-      if (data.role === "user") {
-        transcriptionText += data.text;
-        this.log("Received transcription text:", { text: data.text, total: transcriptionText });
-      }
-    };
-    const onError = (error) => {
-      throw new Error(`Transcription failed: ${error.message}`);
-    };
-    const onSession = (data) => {
-      if (data.state === "disconnected") {
-        throw new Error("Session disconnected during transcription");
-      }
-    };
-    this.on("writing", onWriting);
-    this.on("error", onError);
-    this.on("session", onSession);
-    try {
-      const result = await this.audioStreamManager.handleAudioTranscription(
-        audioStream,
-        (base64Audio) => {
-          return new Promise((resolve, reject) => {
-            try {
-              const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
-              const cleanup = () => {
-                this.off("turnComplete", onTurnComplete);
-                this.off("error", onErr);
-              };
-              const onTurnComplete = () => {
-                cleanup();
-                resolve(transcriptionText.trim());
-              };
-              const onErr = (e) => {
-                cleanup();
-                reject(new Error(e.message));
-              };
-              this.on("turnComplete", onTurnComplete);
-              this.on("error", onErr);
-              this.sendEvent("client_content", message);
-              this.log("Sent audio for transcription");
-            } catch (err) {
-              reject(err);
-            }
-          });
-        },
-        (error) => {
-          this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
+    return this.traced(async () => {
+      this.validateConnectionState();
+      let transcriptionText = "";
+      const onWriting = (data) => {
+        if (data.role === "user") {
+          transcriptionText += data.text;
+          this.log("Received transcription text:", { text: data.text, total: transcriptionText });
         }
-      );
-      return result;
-    } finally {
-      this.off("writing", onWriting);
-      this.off("error", onError);
-      this.off("session", onSession);
-    }
+      };
+      const onError = (error) => {
+        throw new Error(`Transcription failed: ${error.message}`);
+      };
+      const onSession = (data) => {
+        if (data.state === "disconnected") {
+          throw new Error("Session disconnected during transcription");
+        }
+      };
+      this.on("writing", onWriting);
+      this.on("error", onError);
+      this.on("session", onSession);
+      try {
+        const result = await this.audioStreamManager.handleAudioTranscription(
+          audioStream,
+          (base64Audio) => {
+            return new Promise((resolve, reject) => {
+              try {
+                const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
+                const cleanup = () => {
+                  this.off("turnComplete", onTurnComplete);
+                  this.off("error", onErr);
+                };
+                const onTurnComplete = () => {
+                  cleanup();
+                  resolve(transcriptionText.trim());
+                };
+                const onErr = (e) => {
+                  cleanup();
+                  reject(new Error(e.message));
+                };
+                this.on("turnComplete", onTurnComplete);
+                this.on("error", onErr);
+                this.sendEvent("client_content", message);
+                this.log("Sent audio for transcription");
+              } catch (err) {
+                reject(err);
+              }
+            });
+          },
+          (error) => {
+            this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
+          }
+        );
+        return result;
+      } finally {
+        this.off("writing", onWriting);
+        this.off("error", onError);
+        this.off("session", onSession);
+      }
+    }, "gemini-live.listen")();
   }
   /**
    * Get available speakers/voices
    */
   async getSpeakers() {
-    return [
-      { voiceId: "Puck", description: "Conversational, friendly" },
-      { voiceId: "Charon", description: "Deep, authoritative" },
-      { voiceId: "Kore", description: "Neutral, professional" },
-      { voiceId: "Fenrir", description: "Warm, approachable" }
-    ];
+    return this.traced(async () => {
+      return [
+        { voiceId: "Puck", description: "Conversational, friendly" },
+        { voiceId: "Charon", description: "Deep, authoritative" },
+        { voiceId: "Kore", description: "Neutral, professional" },
+        { voiceId: "Fenrir", description: "Warm, approachable" }
+      ];
+    }, "gemini-live.getSpeakers")();
   }
   /**
    * Resume a previous session using a session handle
@@ -2382,7 +2393,13 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
       let result;
       if (tool.execute) {
         this.log("Executing tool", { toolName, toolArgs });
-        result = await tool.execute(toolArgs, { requestContext: this.requestContext });
+        result = await tool.execute(
+          { context: toolArgs, runtimeContext: this.runtimeContext },
+          {
+            toolCallId: toolId,
+            messages: []
+          }
+        );
         this.log("Tool executed successfully", { toolName, result });
       } else {
         this.log("Tool has no execute function", { toolName });
@@ -2473,6 +2490,31 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
     }
     return "text";
   }
+  /**
+   * Resolve Vertex AI location with sensible default
+   * @private
+   */
+  getVertexLocation() {
+    return this.options.location?.trim() || "us-central1";
+  }
+  /**
+   * Resolve the correct model identifier for Gemini API or Vertex AI
+   * @private
+   */
+  resolveModelIdentifier() {
+    const model = this.options.model ?? DEFAULT_MODEL;
+    if (!this.options.vertexAI) {
+      return `models/${model}`;
+    }
+    if (!this.options.project) {
+      throw this.createAndEmitError(
+        "project_id_missing" /* PROJECT_ID_MISSING */,
+        "Google Cloud project ID is required when using Vertex AI."
+      );
+    }
+    const location = this.getVertexLocation();
+    return `projects/${this.options.project}/locations/${location}/publishers/google/models/${model}`;
+  }
   /**
    * Send initial configuration to Gemini Live API
    * @private
@@ -2483,7 +2525,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
     }
     const setupMessage = {
       setup: {
-        model: `models/${this.options.model}`
+        model: this.resolveModelIdentifier()
       }
     };
     if (this.options.instructions) {
@@ -2659,14 +2701,14 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
    *   inputSchema: z.object({
    *     location: z.string().describe("The city and state, e.g. San Francisco, CA"),
    *   }),
-   *   execute: async (inputData) => {
+   *   execute: async ({ context }) => {
    *     // Fetch weather data from an API
    *     const response = await fetch(
-   *       `https://api.weather.com?location=${encodeURIComponent(inputData.location)}`,
+   *       `https://api.weather.com?location=${encodeURIComponent(context.location)}`,
    *     );
    *     const data = await response.json();
    *     return {
-   *       message: `The current temperature in ${inputData.location} is ${data.temperature}°F with ${data.conditions}.`,
+   *       message: `The current temperature in ${context.location} is ${data.temperature}°F with ${data.conditions}.`,
    *     };
    *   },
    * });
@@ -2684,7 +2726,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
    * Get the current tools configured for this voice instance
    * @returns Object containing the current tools
    */
-  listTools() {
+  getTools() {
     return this.tools;
   }
   log(message, ...args) {