@mastra/voice-google-gemini-live 0.0.0-fix-backport-setserver-20251201151948 → 0.0.0-fix-request-context-as-query-key-20251209093005

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1262,7 +1262,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1262
1262
  sessionDurationTimeout;
1263
1263
  // Tool integration properties
1264
1264
  tools;
1265
- runtimeContext;
1265
+ requestContext;
1266
1266
  // Store the configuration options
1267
1267
  options;
1268
1268
  /**
@@ -1499,70 +1499,68 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1499
1499
  /**
1500
1500
  * Establish connection to the Gemini Live API
1501
1501
  */
1502
- async connect({ runtimeContext } = {}) {
1503
- return this.traced(async () => {
1504
- if (this.state === "connected") {
1505
- this.log("Already connected to Gemini Live API");
1506
- return;
1507
- }
1508
- this.runtimeContext = runtimeContext;
1509
- this.emit("session", { state: "connecting" });
1510
- try {
1511
- let wsUrl;
1512
- let headers = {};
1513
- if (this.options.vertexAI) {
1514
- const location = this.getVertexLocation();
1515
- wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
1516
- await this.authManager.initialize();
1517
- const accessToken = await this.authManager.getAccessToken();
1518
- headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1519
- this.log("Using Vertex AI authentication with OAuth token");
1520
- } else {
1521
- wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1522
- headers = {
1523
- headers: {
1524
- "x-goog-api-key": this.options.apiKey || "",
1525
- "Content-Type": "application/json"
1526
- }
1527
- };
1528
- this.log("Using Live API authentication with API key");
1529
- }
1530
- this.log("Connecting to:", wsUrl);
1531
- this.ws = new ws.WebSocket(wsUrl, void 0, headers);
1532
- this.connectionManager.setWebSocket(this.ws);
1533
- this.setupEventListeners();
1534
- await this.connectionManager.waitForOpen();
1535
- if (this.isResuming && this.sessionHandle) {
1536
- await this.sendSessionResumption();
1537
- } else {
1538
- this.sendInitialConfig();
1539
- this.sessionStartTime = Date.now();
1540
- this.sessionId = crypto.randomUUID();
1541
- }
1542
- await this.waitForSessionCreated();
1543
- this.state = "connected";
1544
- this.emit("session", {
1545
- state: "connected",
1546
- config: {
1547
- sessionId: this.sessionId,
1548
- isResuming: this.isResuming,
1549
- toolCount: Object.keys(this.tools || {}).length
1502
+ async connect({ requestContext } = {}) {
1503
+ if (this.state === "connected") {
1504
+ this.log("Already connected to Gemini Live API");
1505
+ return;
1506
+ }
1507
+ this.requestContext = requestContext;
1508
+ this.emit("session", { state: "connecting" });
1509
+ try {
1510
+ let wsUrl;
1511
+ let headers = {};
1512
+ if (this.options.vertexAI) {
1513
+ const location = this.getVertexLocation();
1514
+ wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
1515
+ await this.authManager.initialize();
1516
+ const accessToken = await this.authManager.getAccessToken();
1517
+ headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1518
+ this.log("Using Vertex AI authentication with OAuth token");
1519
+ } else {
1520
+ wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1521
+ headers = {
1522
+ headers: {
1523
+ "x-goog-api-key": this.options.apiKey || "",
1524
+ "Content-Type": "application/json"
1550
1525
  }
1551
- });
1552
- this.log("Successfully connected to Gemini Live API", {
1526
+ };
1527
+ this.log("Using Live API authentication with API key");
1528
+ }
1529
+ this.log("Connecting to:", wsUrl);
1530
+ this.ws = new ws.WebSocket(wsUrl, void 0, headers);
1531
+ this.connectionManager.setWebSocket(this.ws);
1532
+ this.setupEventListeners();
1533
+ await this.connectionManager.waitForOpen();
1534
+ if (this.isResuming && this.sessionHandle) {
1535
+ await this.sendSessionResumption();
1536
+ } else {
1537
+ this.sendInitialConfig();
1538
+ this.sessionStartTime = Date.now();
1539
+ this.sessionId = crypto.randomUUID();
1540
+ }
1541
+ await this.waitForSessionCreated();
1542
+ this.state = "connected";
1543
+ this.emit("session", {
1544
+ state: "connected",
1545
+ config: {
1553
1546
  sessionId: this.sessionId,
1554
1547
  isResuming: this.isResuming,
1555
1548
  toolCount: Object.keys(this.tools || {}).length
1556
- });
1557
- if (this.options.sessionConfig?.maxDuration) {
1558
- this.startSessionDurationMonitor();
1559
1549
  }
1560
- } catch (error) {
1561
- this.state = "disconnected";
1562
- this.log("Connection failed", error);
1563
- throw error;
1550
+ });
1551
+ this.log("Successfully connected to Gemini Live API", {
1552
+ sessionId: this.sessionId,
1553
+ isResuming: this.isResuming,
1554
+ toolCount: Object.keys(this.tools || {}).length
1555
+ });
1556
+ if (this.options.sessionConfig?.maxDuration) {
1557
+ this.startSessionDurationMonitor();
1564
1558
  }
1565
- }, "gemini-live.connect")();
1559
+ } catch (error) {
1560
+ this.state = "disconnected";
1561
+ this.log("Connection failed", error);
1562
+ throw error;
1563
+ }
1566
1564
  }
1567
1565
  /**
1568
1566
  * Disconnect from the Gemini Live API
@@ -1600,172 +1598,164 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1600
1598
  * Send text to be converted to speech
1601
1599
  */
1602
1600
  async speak(input, options) {
1603
- return this.traced(async () => {
1604
- this.validateConnectionState();
1605
- if (typeof input !== "string") {
1606
- const chunks = [];
1607
- for await (const chunk of input) {
1608
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1609
- }
1610
- input = Buffer.concat(chunks).toString("utf-8");
1611
- }
1612
- if (input.trim().length === 0) {
1613
- throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1601
+ this.validateConnectionState();
1602
+ if (typeof input !== "string") {
1603
+ const chunks = [];
1604
+ for await (const chunk of input) {
1605
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1614
1606
  }
1615
- this.addToContext("user", input);
1616
- const textMessage = {
1617
- client_content: {
1618
- turns: [
1619
- {
1620
- role: "user",
1621
- parts: [
1622
- {
1623
- text: input
1624
- }
1625
- ]
1626
- }
1627
- ],
1628
- turnComplete: true
1629
- }
1630
- };
1631
- if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1632
- const updateMessage = {
1633
- type: "session.update",
1634
- session: {
1635
- generation_config: {
1636
- ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1637
- speech_config: {
1638
- ...options.languageCode ? { language_code: options.languageCode } : {},
1639
- ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1607
+ input = Buffer.concat(chunks).toString("utf-8");
1608
+ }
1609
+ if (input.trim().length === 0) {
1610
+ throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1611
+ }
1612
+ this.addToContext("user", input);
1613
+ const textMessage = {
1614
+ client_content: {
1615
+ turns: [
1616
+ {
1617
+ role: "user",
1618
+ parts: [
1619
+ {
1620
+ text: input
1640
1621
  }
1622
+ ]
1623
+ }
1624
+ ],
1625
+ turnComplete: true
1626
+ }
1627
+ };
1628
+ if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1629
+ const updateMessage = {
1630
+ type: "session.update",
1631
+ session: {
1632
+ generation_config: {
1633
+ ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1634
+ speech_config: {
1635
+ ...options.languageCode ? { language_code: options.languageCode } : {},
1636
+ ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1641
1637
  }
1642
1638
  }
1643
- };
1644
- try {
1645
- this.sendEvent("session.update", updateMessage);
1646
- this.log("Applied per-turn runtime options", options);
1647
- } catch (error) {
1648
- this.log("Failed to apply per-turn runtime options", error);
1649
1639
  }
1650
- }
1640
+ };
1651
1641
  try {
1652
- this.sendEvent("client_content", textMessage);
1653
- this.log("Text message sent", { text: input });
1642
+ this.sendEvent("session.update", updateMessage);
1643
+ this.log("Applied per-turn runtime options", options);
1654
1644
  } catch (error) {
1655
- this.log("Failed to send text message", error);
1656
- throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1645
+ this.log("Failed to apply per-turn runtime options", error);
1657
1646
  }
1658
- }, "gemini-live.speak")();
1647
+ }
1648
+ try {
1649
+ this.sendEvent("client_content", textMessage);
1650
+ this.log("Text message sent", { text: input });
1651
+ } catch (error) {
1652
+ this.log("Failed to send text message", error);
1653
+ throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1654
+ }
1659
1655
  }
1660
1656
  /**
1661
1657
  * Send audio stream for processing
1662
1658
  */
1663
1659
  async send(audioData) {
1664
- return this.traced(async () => {
1665
- this.validateConnectionState();
1666
- if ("readable" in audioData && typeof audioData.on === "function") {
1667
- const stream = audioData;
1668
- stream.on("data", (chunk) => {
1669
- try {
1670
- const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1671
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1672
- this.sendEvent("realtime_input", message);
1673
- } catch (error) {
1674
- this.log("Failed to process audio chunk", error);
1675
- this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1676
- }
1677
- });
1678
- stream.on("error", (error) => {
1679
- this.log("Audio stream error", error);
1680
- this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1681
- });
1682
- stream.on("end", () => {
1683
- this.log("Audio stream ended");
1684
- });
1685
- } else {
1686
- const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1687
- const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1688
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1689
- this.sendEvent("realtime_input", message);
1690
- }
1691
- }, "gemini-live.send")();
1660
+ this.validateConnectionState();
1661
+ if ("readable" in audioData && typeof audioData.on === "function") {
1662
+ const stream = audioData;
1663
+ stream.on("data", (chunk) => {
1664
+ try {
1665
+ const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1666
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1667
+ this.sendEvent("realtime_input", message);
1668
+ } catch (error) {
1669
+ this.log("Failed to process audio chunk", error);
1670
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1671
+ }
1672
+ });
1673
+ stream.on("error", (error) => {
1674
+ this.log("Audio stream error", error);
1675
+ this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1676
+ });
1677
+ stream.on("end", () => {
1678
+ this.log("Audio stream ended");
1679
+ });
1680
+ } else {
1681
+ const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1682
+ const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1683
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1684
+ this.sendEvent("realtime_input", message);
1685
+ }
1692
1686
  }
1693
1687
  /**
1694
1688
  * Process speech from audio stream (traditional STT interface)
1695
1689
  */
1696
1690
  async listen(audioStream, _options) {
1697
- return this.traced(async () => {
1698
- this.validateConnectionState();
1699
- let transcriptionText = "";
1700
- const onWriting = (data) => {
1701
- if (data.role === "user") {
1702
- transcriptionText += data.text;
1703
- this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1704
- }
1705
- };
1706
- const onError = (error) => {
1707
- throw new Error(`Transcription failed: ${error.message}`);
1708
- };
1709
- const onSession = (data) => {
1710
- if (data.state === "disconnected") {
1711
- throw new Error("Session disconnected during transcription");
1712
- }
1713
- };
1714
- this.on("writing", onWriting);
1715
- this.on("error", onError);
1716
- this.on("session", onSession);
1717
- try {
1718
- const result = await this.audioStreamManager.handleAudioTranscription(
1719
- audioStream,
1720
- (base64Audio) => {
1721
- return new Promise((resolve, reject) => {
1722
- try {
1723
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1724
- const cleanup = () => {
1725
- this.off("turnComplete", onTurnComplete);
1726
- this.off("error", onErr);
1727
- };
1728
- const onTurnComplete = () => {
1729
- cleanup();
1730
- resolve(transcriptionText.trim());
1731
- };
1732
- const onErr = (e) => {
1733
- cleanup();
1734
- reject(new Error(e.message));
1735
- };
1736
- this.on("turnComplete", onTurnComplete);
1737
- this.on("error", onErr);
1738
- this.sendEvent("client_content", message);
1739
- this.log("Sent audio for transcription");
1740
- } catch (err) {
1741
- reject(err);
1742
- }
1743
- });
1744
- },
1745
- (error) => {
1746
- this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1747
- }
1748
- );
1749
- return result;
1750
- } finally {
1751
- this.off("writing", onWriting);
1752
- this.off("error", onError);
1753
- this.off("session", onSession);
1691
+ this.validateConnectionState();
1692
+ let transcriptionText = "";
1693
+ const onWriting = (data) => {
1694
+ if (data.role === "user") {
1695
+ transcriptionText += data.text;
1696
+ this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1754
1697
  }
1755
- }, "gemini-live.listen")();
1698
+ };
1699
+ const onError = (error) => {
1700
+ throw new Error(`Transcription failed: ${error.message}`);
1701
+ };
1702
+ const onSession = (data) => {
1703
+ if (data.state === "disconnected") {
1704
+ throw new Error("Session disconnected during transcription");
1705
+ }
1706
+ };
1707
+ this.on("writing", onWriting);
1708
+ this.on("error", onError);
1709
+ this.on("session", onSession);
1710
+ try {
1711
+ const result = await this.audioStreamManager.handleAudioTranscription(
1712
+ audioStream,
1713
+ (base64Audio) => {
1714
+ return new Promise((resolve, reject) => {
1715
+ try {
1716
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1717
+ const cleanup = () => {
1718
+ this.off("turnComplete", onTurnComplete);
1719
+ this.off("error", onErr);
1720
+ };
1721
+ const onTurnComplete = () => {
1722
+ cleanup();
1723
+ resolve(transcriptionText.trim());
1724
+ };
1725
+ const onErr = (e) => {
1726
+ cleanup();
1727
+ reject(new Error(e.message));
1728
+ };
1729
+ this.on("turnComplete", onTurnComplete);
1730
+ this.on("error", onErr);
1731
+ this.sendEvent("client_content", message);
1732
+ this.log("Sent audio for transcription");
1733
+ } catch (err) {
1734
+ reject(err);
1735
+ }
1736
+ });
1737
+ },
1738
+ (error) => {
1739
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1740
+ }
1741
+ );
1742
+ return result;
1743
+ } finally {
1744
+ this.off("writing", onWriting);
1745
+ this.off("error", onError);
1746
+ this.off("session", onSession);
1747
+ }
1756
1748
  }
1757
1749
  /**
1758
1750
  * Get available speakers/voices
1759
1751
  */
1760
1752
  async getSpeakers() {
1761
- return this.traced(async () => {
1762
- return [
1763
- { voiceId: "Puck", description: "Conversational, friendly" },
1764
- { voiceId: "Charon", description: "Deep, authoritative" },
1765
- { voiceId: "Kore", description: "Neutral, professional" },
1766
- { voiceId: "Fenrir", description: "Warm, approachable" }
1767
- ];
1768
- }, "gemini-live.getSpeakers")();
1753
+ return [
1754
+ { voiceId: "Puck", description: "Conversational, friendly" },
1755
+ { voiceId: "Charon", description: "Deep, authoritative" },
1756
+ { voiceId: "Kore", description: "Neutral, professional" },
1757
+ { voiceId: "Fenrir", description: "Warm, approachable" }
1758
+ ];
1769
1759
  }
1770
1760
  /**
1771
1761
  * Resume a previous session using a session handle
@@ -2393,13 +2383,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
2393
2383
  let result;
2394
2384
  if (tool.execute) {
2395
2385
  this.log("Executing tool", { toolName, toolArgs });
2396
- result = await tool.execute(
2397
- { context: toolArgs, runtimeContext: this.runtimeContext },
2398
- {
2399
- toolCallId: toolId,
2400
- messages: []
2401
- }
2402
- );
2386
+ result = await tool.execute(toolArgs, { requestContext: this.requestContext });
2403
2387
  this.log("Tool executed successfully", { toolName, result });
2404
2388
  } else {
2405
2389
  this.log("Tool has no execute function", { toolName });
@@ -2701,14 +2685,14 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
2701
2685
  * inputSchema: z.object({
2702
2686
  * location: z.string().describe("The city and state, e.g. San Francisco, CA"),
2703
2687
  * }),
2704
- * execute: async ({ context }) => {
2688
+ * execute: async (inputData) => {
2705
2689
  * // Fetch weather data from an API
2706
2690
  * const response = await fetch(
2707
- * `https://api.weather.com?location=${encodeURIComponent(context.location)}`,
2691
+ * `https://api.weather.com?location=${encodeURIComponent(inputData.location)}`,
2708
2692
  * );
2709
2693
  * const data = await response.json();
2710
2694
  * return {
2711
- * message: `The current temperature in ${context.location} is ${data.temperature}°F with ${data.conditions}.`,
2695
+ * message: `The current temperature in ${inputData.location} is ${data.temperature}°F with ${data.conditions}.`,
2712
2696
  * };
2713
2697
  * },
2714
2698
  * });
@@ -2726,7 +2710,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
2726
2710
  * Get the current tools configured for this voice instance
2727
2711
  * @returns Object containing the current tools
2728
2712
  */
2729
- getTools() {
2713
+ listTools() {
2730
2714
  return this.tools;
2731
2715
  }
2732
2716
  log(message, ...args) {