@mastra/voice-google-gemini-live 0.0.0-fix-issue-10434-concurrent-write-corruption-20251124213939 → 0.0.0-fix-backport-setserver-20251201151948

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1262,7 +1262,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1262
1262
  sessionDurationTimeout;
1263
1263
  // Tool integration properties
1264
1264
  tools;
1265
- requestContext;
1265
+ runtimeContext;
1266
1266
  // Store the configuration options
1267
1267
  options;
1268
1268
  /**
@@ -1499,68 +1499,70 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1499
1499
  /**
1500
1500
  * Establish connection to the Gemini Live API
1501
1501
  */
1502
- async connect({ requestContext } = {}) {
1503
- if (this.state === "connected") {
1504
- this.log("Already connected to Gemini Live API");
1505
- return;
1506
- }
1507
- this.requestContext = requestContext;
1508
- this.emit("session", { state: "connecting" });
1509
- try {
1510
- let wsUrl;
1511
- let headers = {};
1512
- if (this.options.vertexAI) {
1513
- const location = this.getVertexLocation();
1514
- wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
1515
- await this.authManager.initialize();
1516
- const accessToken = await this.authManager.getAccessToken();
1517
- headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1518
- this.log("Using Vertex AI authentication with OAuth token");
1519
- } else {
1520
- wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1521
- headers = {
1522
- headers: {
1523
- "x-goog-api-key": this.options.apiKey || "",
1524
- "Content-Type": "application/json"
1525
- }
1526
- };
1527
- this.log("Using Live API authentication with API key");
1528
- }
1529
- this.log("Connecting to:", wsUrl);
1530
- this.ws = new ws.WebSocket(wsUrl, void 0, headers);
1531
- this.connectionManager.setWebSocket(this.ws);
1532
- this.setupEventListeners();
1533
- await this.connectionManager.waitForOpen();
1534
- if (this.isResuming && this.sessionHandle) {
1535
- await this.sendSessionResumption();
1536
- } else {
1537
- this.sendInitialConfig();
1538
- this.sessionStartTime = Date.now();
1539
- this.sessionId = crypto.randomUUID();
1502
+ async connect({ runtimeContext } = {}) {
1503
+ return this.traced(async () => {
1504
+ if (this.state === "connected") {
1505
+ this.log("Already connected to Gemini Live API");
1506
+ return;
1540
1507
  }
1541
- await this.waitForSessionCreated();
1542
- this.state = "connected";
1543
- this.emit("session", {
1544
- state: "connected",
1545
- config: {
1508
+ this.runtimeContext = runtimeContext;
1509
+ this.emit("session", { state: "connecting" });
1510
+ try {
1511
+ let wsUrl;
1512
+ let headers = {};
1513
+ if (this.options.vertexAI) {
1514
+ const location = this.getVertexLocation();
1515
+ wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
1516
+ await this.authManager.initialize();
1517
+ const accessToken = await this.authManager.getAccessToken();
1518
+ headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1519
+ this.log("Using Vertex AI authentication with OAuth token");
1520
+ } else {
1521
+ wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1522
+ headers = {
1523
+ headers: {
1524
+ "x-goog-api-key": this.options.apiKey || "",
1525
+ "Content-Type": "application/json"
1526
+ }
1527
+ };
1528
+ this.log("Using Live API authentication with API key");
1529
+ }
1530
+ this.log("Connecting to:", wsUrl);
1531
+ this.ws = new ws.WebSocket(wsUrl, void 0, headers);
1532
+ this.connectionManager.setWebSocket(this.ws);
1533
+ this.setupEventListeners();
1534
+ await this.connectionManager.waitForOpen();
1535
+ if (this.isResuming && this.sessionHandle) {
1536
+ await this.sendSessionResumption();
1537
+ } else {
1538
+ this.sendInitialConfig();
1539
+ this.sessionStartTime = Date.now();
1540
+ this.sessionId = crypto.randomUUID();
1541
+ }
1542
+ await this.waitForSessionCreated();
1543
+ this.state = "connected";
1544
+ this.emit("session", {
1545
+ state: "connected",
1546
+ config: {
1547
+ sessionId: this.sessionId,
1548
+ isResuming: this.isResuming,
1549
+ toolCount: Object.keys(this.tools || {}).length
1550
+ }
1551
+ });
1552
+ this.log("Successfully connected to Gemini Live API", {
1546
1553
  sessionId: this.sessionId,
1547
1554
  isResuming: this.isResuming,
1548
1555
  toolCount: Object.keys(this.tools || {}).length
1556
+ });
1557
+ if (this.options.sessionConfig?.maxDuration) {
1558
+ this.startSessionDurationMonitor();
1549
1559
  }
1550
- });
1551
- this.log("Successfully connected to Gemini Live API", {
1552
- sessionId: this.sessionId,
1553
- isResuming: this.isResuming,
1554
- toolCount: Object.keys(this.tools || {}).length
1555
- });
1556
- if (this.options.sessionConfig?.maxDuration) {
1557
- this.startSessionDurationMonitor();
1560
+ } catch (error) {
1561
+ this.state = "disconnected";
1562
+ this.log("Connection failed", error);
1563
+ throw error;
1558
1564
  }
1559
- } catch (error) {
1560
- this.state = "disconnected";
1561
- this.log("Connection failed", error);
1562
- throw error;
1563
- }
1565
+ }, "gemini-live.connect")();
1564
1566
  }
1565
1567
  /**
1566
1568
  * Disconnect from the Gemini Live API
@@ -1598,164 +1600,172 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1598
1600
  * Send text to be converted to speech
1599
1601
  */
1600
1602
  async speak(input, options) {
1601
- this.validateConnectionState();
1602
- if (typeof input !== "string") {
1603
- const chunks = [];
1604
- for await (const chunk of input) {
1605
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1603
+ return this.traced(async () => {
1604
+ this.validateConnectionState();
1605
+ if (typeof input !== "string") {
1606
+ const chunks = [];
1607
+ for await (const chunk of input) {
1608
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1609
+ }
1610
+ input = Buffer.concat(chunks).toString("utf-8");
1606
1611
  }
1607
- input = Buffer.concat(chunks).toString("utf-8");
1608
- }
1609
- if (input.trim().length === 0) {
1610
- throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1611
- }
1612
- this.addToContext("user", input);
1613
- const textMessage = {
1614
- client_content: {
1615
- turns: [
1616
- {
1617
- role: "user",
1618
- parts: [
1619
- {
1620
- text: input
1621
- }
1622
- ]
1623
- }
1624
- ],
1625
- turnComplete: true
1612
+ if (input.trim().length === 0) {
1613
+ throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1626
1614
  }
1627
- };
1628
- if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1629
- const updateMessage = {
1630
- type: "session.update",
1631
- session: {
1632
- generation_config: {
1633
- ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1634
- speech_config: {
1635
- ...options.languageCode ? { language_code: options.languageCode } : {},
1636
- ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1615
+ this.addToContext("user", input);
1616
+ const textMessage = {
1617
+ client_content: {
1618
+ turns: [
1619
+ {
1620
+ role: "user",
1621
+ parts: [
1622
+ {
1623
+ text: input
1624
+ }
1625
+ ]
1637
1626
  }
1638
- }
1627
+ ],
1628
+ turnComplete: true
1639
1629
  }
1640
1630
  };
1631
+ if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1632
+ const updateMessage = {
1633
+ type: "session.update",
1634
+ session: {
1635
+ generation_config: {
1636
+ ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1637
+ speech_config: {
1638
+ ...options.languageCode ? { language_code: options.languageCode } : {},
1639
+ ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1640
+ }
1641
+ }
1642
+ }
1643
+ };
1644
+ try {
1645
+ this.sendEvent("session.update", updateMessage);
1646
+ this.log("Applied per-turn runtime options", options);
1647
+ } catch (error) {
1648
+ this.log("Failed to apply per-turn runtime options", error);
1649
+ }
1650
+ }
1641
1651
  try {
1642
- this.sendEvent("session.update", updateMessage);
1643
- this.log("Applied per-turn runtime options", options);
1652
+ this.sendEvent("client_content", textMessage);
1653
+ this.log("Text message sent", { text: input });
1644
1654
  } catch (error) {
1645
- this.log("Failed to apply per-turn runtime options", error);
1655
+ this.log("Failed to send text message", error);
1656
+ throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1646
1657
  }
1647
- }
1648
- try {
1649
- this.sendEvent("client_content", textMessage);
1650
- this.log("Text message sent", { text: input });
1651
- } catch (error) {
1652
- this.log("Failed to send text message", error);
1653
- throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1654
- }
1658
+ }, "gemini-live.speak")();
1655
1659
  }
1656
1660
  /**
1657
1661
  * Send audio stream for processing
1658
1662
  */
1659
1663
  async send(audioData) {
1660
- this.validateConnectionState();
1661
- if ("readable" in audioData && typeof audioData.on === "function") {
1662
- const stream = audioData;
1663
- stream.on("data", (chunk) => {
1664
- try {
1665
- const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1666
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1667
- this.sendEvent("realtime_input", message);
1668
- } catch (error) {
1669
- this.log("Failed to process audio chunk", error);
1670
- this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1671
- }
1672
- });
1673
- stream.on("error", (error) => {
1674
- this.log("Audio stream error", error);
1675
- this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1676
- });
1677
- stream.on("end", () => {
1678
- this.log("Audio stream ended");
1679
- });
1680
- } else {
1681
- const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1682
- const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1683
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1684
- this.sendEvent("realtime_input", message);
1685
- }
1664
+ return this.traced(async () => {
1665
+ this.validateConnectionState();
1666
+ if ("readable" in audioData && typeof audioData.on === "function") {
1667
+ const stream = audioData;
1668
+ stream.on("data", (chunk) => {
1669
+ try {
1670
+ const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1671
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1672
+ this.sendEvent("realtime_input", message);
1673
+ } catch (error) {
1674
+ this.log("Failed to process audio chunk", error);
1675
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1676
+ }
1677
+ });
1678
+ stream.on("error", (error) => {
1679
+ this.log("Audio stream error", error);
1680
+ this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1681
+ });
1682
+ stream.on("end", () => {
1683
+ this.log("Audio stream ended");
1684
+ });
1685
+ } else {
1686
+ const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1687
+ const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1688
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1689
+ this.sendEvent("realtime_input", message);
1690
+ }
1691
+ }, "gemini-live.send")();
1686
1692
  }
1687
1693
  /**
1688
1694
  * Process speech from audio stream (traditional STT interface)
1689
1695
  */
1690
1696
  async listen(audioStream, _options) {
1691
- this.validateConnectionState();
1692
- let transcriptionText = "";
1693
- const onWriting = (data) => {
1694
- if (data.role === "user") {
1695
- transcriptionText += data.text;
1696
- this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1697
- }
1698
- };
1699
- const onError = (error) => {
1700
- throw new Error(`Transcription failed: ${error.message}`);
1701
- };
1702
- const onSession = (data) => {
1703
- if (data.state === "disconnected") {
1704
- throw new Error("Session disconnected during transcription");
1705
- }
1706
- };
1707
- this.on("writing", onWriting);
1708
- this.on("error", onError);
1709
- this.on("session", onSession);
1710
- try {
1711
- const result = await this.audioStreamManager.handleAudioTranscription(
1712
- audioStream,
1713
- (base64Audio) => {
1714
- return new Promise((resolve, reject) => {
1715
- try {
1716
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1717
- const cleanup = () => {
1718
- this.off("turnComplete", onTurnComplete);
1719
- this.off("error", onErr);
1720
- };
1721
- const onTurnComplete = () => {
1722
- cleanup();
1723
- resolve(transcriptionText.trim());
1724
- };
1725
- const onErr = (e) => {
1726
- cleanup();
1727
- reject(new Error(e.message));
1728
- };
1729
- this.on("turnComplete", onTurnComplete);
1730
- this.on("error", onErr);
1731
- this.sendEvent("client_content", message);
1732
- this.log("Sent audio for transcription");
1733
- } catch (err) {
1734
- reject(err);
1735
- }
1736
- });
1737
- },
1738
- (error) => {
1739
- this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1697
+ return this.traced(async () => {
1698
+ this.validateConnectionState();
1699
+ let transcriptionText = "";
1700
+ const onWriting = (data) => {
1701
+ if (data.role === "user") {
1702
+ transcriptionText += data.text;
1703
+ this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1740
1704
  }
1741
- );
1742
- return result;
1743
- } finally {
1744
- this.off("writing", onWriting);
1745
- this.off("error", onError);
1746
- this.off("session", onSession);
1747
- }
1705
+ };
1706
+ const onError = (error) => {
1707
+ throw new Error(`Transcription failed: ${error.message}`);
1708
+ };
1709
+ const onSession = (data) => {
1710
+ if (data.state === "disconnected") {
1711
+ throw new Error("Session disconnected during transcription");
1712
+ }
1713
+ };
1714
+ this.on("writing", onWriting);
1715
+ this.on("error", onError);
1716
+ this.on("session", onSession);
1717
+ try {
1718
+ const result = await this.audioStreamManager.handleAudioTranscription(
1719
+ audioStream,
1720
+ (base64Audio) => {
1721
+ return new Promise((resolve, reject) => {
1722
+ try {
1723
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1724
+ const cleanup = () => {
1725
+ this.off("turnComplete", onTurnComplete);
1726
+ this.off("error", onErr);
1727
+ };
1728
+ const onTurnComplete = () => {
1729
+ cleanup();
1730
+ resolve(transcriptionText.trim());
1731
+ };
1732
+ const onErr = (e) => {
1733
+ cleanup();
1734
+ reject(new Error(e.message));
1735
+ };
1736
+ this.on("turnComplete", onTurnComplete);
1737
+ this.on("error", onErr);
1738
+ this.sendEvent("client_content", message);
1739
+ this.log("Sent audio for transcription");
1740
+ } catch (err) {
1741
+ reject(err);
1742
+ }
1743
+ });
1744
+ },
1745
+ (error) => {
1746
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1747
+ }
1748
+ );
1749
+ return result;
1750
+ } finally {
1751
+ this.off("writing", onWriting);
1752
+ this.off("error", onError);
1753
+ this.off("session", onSession);
1754
+ }
1755
+ }, "gemini-live.listen")();
1748
1756
  }
1749
1757
  /**
1750
1758
  * Get available speakers/voices
1751
1759
  */
1752
1760
  async getSpeakers() {
1753
- return [
1754
- { voiceId: "Puck", description: "Conversational, friendly" },
1755
- { voiceId: "Charon", description: "Deep, authoritative" },
1756
- { voiceId: "Kore", description: "Neutral, professional" },
1757
- { voiceId: "Fenrir", description: "Warm, approachable" }
1758
- ];
1761
+ return this.traced(async () => {
1762
+ return [
1763
+ { voiceId: "Puck", description: "Conversational, friendly" },
1764
+ { voiceId: "Charon", description: "Deep, authoritative" },
1765
+ { voiceId: "Kore", description: "Neutral, professional" },
1766
+ { voiceId: "Fenrir", description: "Warm, approachable" }
1767
+ ];
1768
+ }, "gemini-live.getSpeakers")();
1759
1769
  }
1760
1770
  /**
1761
1771
  * Resume a previous session using a session handle
@@ -2383,7 +2393,13 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
2383
2393
  let result;
2384
2394
  if (tool.execute) {
2385
2395
  this.log("Executing tool", { toolName, toolArgs });
2386
- result = await tool.execute(toolArgs, { requestContext: this.requestContext });
2396
+ result = await tool.execute(
2397
+ { context: toolArgs, runtimeContext: this.runtimeContext },
2398
+ {
2399
+ toolCallId: toolId,
2400
+ messages: []
2401
+ }
2402
+ );
2387
2403
  this.log("Tool executed successfully", { toolName, result });
2388
2404
  } else {
2389
2405
  this.log("Tool has no execute function", { toolName });
@@ -2685,14 +2701,14 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
2685
2701
  * inputSchema: z.object({
2686
2702
  * location: z.string().describe("The city and state, e.g. San Francisco, CA"),
2687
2703
  * }),
2688
- * execute: async (inputData) => {
2704
+ * execute: async ({ context }) => {
2689
2705
  * // Fetch weather data from an API
2690
2706
  * const response = await fetch(
2691
- * `https://api.weather.com?location=${encodeURIComponent(inputData.location)}`,
2707
+ * `https://api.weather.com?location=${encodeURIComponent(context.location)}`,
2692
2708
  * );
2693
2709
  * const data = await response.json();
2694
2710
  * return {
2695
- * message: `The current temperature in ${inputData.location} is ${data.temperature}°F with ${data.conditions}.`,
2711
+ * message: `The current temperature in ${context.location} is ${data.temperature}°F with ${data.conditions}.`,
2696
2712
  * };
2697
2713
  * },
2698
2714
  * });
@@ -2710,7 +2726,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
2710
2726
  * Get the current tools configured for this voice instance
2711
2727
  * @returns Object containing the current tools
2712
2728
  */
2713
- listTools() {
2729
+ getTools() {
2714
2730
  return this.tools;
2715
2731
  }
2716
2732
  log(message, ...args) {