@mastra/voice-google-gemini-live 0.0.0-add-save-score-validation-on-stores-20250911031242 → 0.0.0-add-crumb-action-20251028111500

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -564,7 +564,7 @@ var AudioStreamManager = class {
564
564
  */
565
565
  log(message, ...args) {
566
566
  if (this.debug) {
567
- console.log(`[AudioStreamManager] ${message}`, ...args);
567
+ console.info(`[AudioStreamManager] ${message}`, ...args);
568
568
  }
569
569
  }
570
570
  /**
@@ -843,7 +843,7 @@ var ConnectionManager = class {
843
843
  */
844
844
  log(message, ...args) {
845
845
  if (this.debug) {
846
- console.log(`[ConnectionManager] ${message}`, ...args);
846
+ console.info(`[ConnectionManager] ${message}`, ...args);
847
847
  }
848
848
  }
849
849
  };
@@ -1117,7 +1117,7 @@ var AuthManager = class {
1117
1117
  */
1118
1118
  log(message, ...args) {
1119
1119
  if (this.config.debug) {
1120
- console.log(`[AuthManager] ${message}`, ...args);
1120
+ console.info(`[AuthManager] ${message}`, ...args);
1121
1121
  }
1122
1122
  }
1123
1123
  };
@@ -1233,7 +1233,7 @@ var EventManager = class {
1233
1233
  */
1234
1234
  log(message, ...args) {
1235
1235
  if (this.debug) {
1236
- console.log(`[EventManager] ${message}`, ...args);
1236
+ console.info(`[EventManager] ${message}`, ...args);
1237
1237
  }
1238
1238
  }
1239
1239
  };
@@ -1500,68 +1500,66 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1500
1500
  * Establish connection to the Gemini Live API
1501
1501
  */
1502
1502
  async connect({ runtimeContext } = {}) {
1503
- return this.traced(async () => {
1504
- if (this.state === "connected") {
1505
- this.log("Already connected to Gemini Live API");
1506
- return;
1507
- }
1508
- this.runtimeContext = runtimeContext;
1509
- this.emit("session", { state: "connecting" });
1510
- try {
1511
- let wsUrl;
1512
- let headers = {};
1513
- if (this.options.vertexAI) {
1514
- wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
1515
- await this.authManager.initialize();
1516
- const accessToken = await this.authManager.getAccessToken();
1517
- headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1518
- this.log("Using Vertex AI authentication with OAuth token");
1519
- } else {
1520
- wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1521
- headers = {
1522
- headers: {
1523
- "x-goog-api-key": this.options.apiKey || "",
1524
- "Content-Type": "application/json"
1525
- }
1526
- };
1527
- this.log("Using Live API authentication with API key");
1528
- }
1529
- this.log("Connecting to:", wsUrl);
1530
- this.ws = new ws.WebSocket(wsUrl, void 0, headers);
1531
- this.connectionManager.setWebSocket(this.ws);
1532
- this.setupEventListeners();
1533
- await this.connectionManager.waitForOpen();
1534
- if (this.isResuming && this.sessionHandle) {
1535
- await this.sendSessionResumption();
1536
- } else {
1537
- this.sendInitialConfig();
1538
- this.sessionStartTime = Date.now();
1539
- this.sessionId = crypto.randomUUID();
1540
- }
1541
- await this.waitForSessionCreated();
1542
- this.state = "connected";
1543
- this.emit("session", {
1544
- state: "connected",
1545
- config: {
1546
- sessionId: this.sessionId,
1547
- isResuming: this.isResuming,
1548
- toolCount: Object.keys(this.tools || {}).length
1503
+ if (this.state === "connected") {
1504
+ this.log("Already connected to Gemini Live API");
1505
+ return;
1506
+ }
1507
+ this.runtimeContext = runtimeContext;
1508
+ this.emit("session", { state: "connecting" });
1509
+ try {
1510
+ let wsUrl;
1511
+ let headers = {};
1512
+ if (this.options.vertexAI) {
1513
+ wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
1514
+ await this.authManager.initialize();
1515
+ const accessToken = await this.authManager.getAccessToken();
1516
+ headers = { headers: { Authorization: `Bearer ${accessToken}` } };
1517
+ this.log("Using Vertex AI authentication with OAuth token");
1518
+ } else {
1519
+ wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
1520
+ headers = {
1521
+ headers: {
1522
+ "x-goog-api-key": this.options.apiKey || "",
1523
+ "Content-Type": "application/json"
1549
1524
  }
1550
- });
1551
- this.log("Successfully connected to Gemini Live API", {
1525
+ };
1526
+ this.log("Using Live API authentication with API key");
1527
+ }
1528
+ this.log("Connecting to:", wsUrl);
1529
+ this.ws = new ws.WebSocket(wsUrl, void 0, headers);
1530
+ this.connectionManager.setWebSocket(this.ws);
1531
+ this.setupEventListeners();
1532
+ await this.connectionManager.waitForOpen();
1533
+ if (this.isResuming && this.sessionHandle) {
1534
+ await this.sendSessionResumption();
1535
+ } else {
1536
+ this.sendInitialConfig();
1537
+ this.sessionStartTime = Date.now();
1538
+ this.sessionId = crypto.randomUUID();
1539
+ }
1540
+ await this.waitForSessionCreated();
1541
+ this.state = "connected";
1542
+ this.emit("session", {
1543
+ state: "connected",
1544
+ config: {
1552
1545
  sessionId: this.sessionId,
1553
1546
  isResuming: this.isResuming,
1554
1547
  toolCount: Object.keys(this.tools || {}).length
1555
- });
1556
- if (this.options.sessionConfig?.maxDuration) {
1557
- this.startSessionDurationMonitor();
1558
1548
  }
1559
- } catch (error) {
1560
- this.state = "disconnected";
1561
- this.log("Connection failed", error);
1562
- throw error;
1549
+ });
1550
+ this.log("Successfully connected to Gemini Live API", {
1551
+ sessionId: this.sessionId,
1552
+ isResuming: this.isResuming,
1553
+ toolCount: Object.keys(this.tools || {}).length
1554
+ });
1555
+ if (this.options.sessionConfig?.maxDuration) {
1556
+ this.startSessionDurationMonitor();
1563
1557
  }
1564
- }, "gemini-live.connect")();
1558
+ } catch (error) {
1559
+ this.state = "disconnected";
1560
+ this.log("Connection failed", error);
1561
+ throw error;
1562
+ }
1565
1563
  }
1566
1564
  /**
1567
1565
  * Disconnect from the Gemini Live API
@@ -1599,172 +1597,164 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
1599
1597
  * Send text to be converted to speech
1600
1598
  */
1601
1599
  async speak(input, options) {
1602
- return this.traced(async () => {
1603
- this.validateConnectionState();
1604
- if (typeof input !== "string") {
1605
- const chunks = [];
1606
- for await (const chunk of input) {
1607
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1608
- }
1609
- input = Buffer.concat(chunks).toString("utf-8");
1610
- }
1611
- if (input.trim().length === 0) {
1612
- throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1600
+ this.validateConnectionState();
1601
+ if (typeof input !== "string") {
1602
+ const chunks = [];
1603
+ for await (const chunk of input) {
1604
+ chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
1613
1605
  }
1614
- this.addToContext("user", input);
1615
- const textMessage = {
1616
- client_content: {
1617
- turns: [
1618
- {
1619
- role: "user",
1620
- parts: [
1621
- {
1622
- text: input
1623
- }
1624
- ]
1625
- }
1626
- ],
1627
- turnComplete: true
1628
- }
1629
- };
1630
- if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1631
- const updateMessage = {
1632
- type: "session.update",
1633
- session: {
1634
- generation_config: {
1635
- ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1636
- speech_config: {
1637
- ...options.languageCode ? { language_code: options.languageCode } : {},
1638
- ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1606
+ input = Buffer.concat(chunks).toString("utf-8");
1607
+ }
1608
+ if (input.trim().length === 0) {
1609
+ throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
1610
+ }
1611
+ this.addToContext("user", input);
1612
+ const textMessage = {
1613
+ client_content: {
1614
+ turns: [
1615
+ {
1616
+ role: "user",
1617
+ parts: [
1618
+ {
1619
+ text: input
1639
1620
  }
1621
+ ]
1622
+ }
1623
+ ],
1624
+ turnComplete: true
1625
+ }
1626
+ };
1627
+ if (options && (options.speaker || options.languageCode || options.responseModalities)) {
1628
+ const updateMessage = {
1629
+ type: "session.update",
1630
+ session: {
1631
+ generation_config: {
1632
+ ...options.responseModalities ? { response_modalities: options.responseModalities } : {},
1633
+ speech_config: {
1634
+ ...options.languageCode ? { language_code: options.languageCode } : {},
1635
+ ...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
1640
1636
  }
1641
1637
  }
1642
- };
1643
- try {
1644
- this.sendEvent("session.update", updateMessage);
1645
- this.log("Applied per-turn runtime options", options);
1646
- } catch (error) {
1647
- this.log("Failed to apply per-turn runtime options", error);
1648
1638
  }
1649
- }
1639
+ };
1650
1640
  try {
1651
- this.sendEvent("client_content", textMessage);
1652
- this.log("Text message sent", { text: input });
1641
+ this.sendEvent("session.update", updateMessage);
1642
+ this.log("Applied per-turn runtime options", options);
1653
1643
  } catch (error) {
1654
- this.log("Failed to send text message", error);
1655
- throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1644
+ this.log("Failed to apply per-turn runtime options", error);
1656
1645
  }
1657
- }, "gemini-live.speak")();
1646
+ }
1647
+ try {
1648
+ this.sendEvent("client_content", textMessage);
1649
+ this.log("Text message sent", { text: input });
1650
+ } catch (error) {
1651
+ this.log("Failed to send text message", error);
1652
+ throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
1653
+ }
1658
1654
  }
1659
1655
  /**
1660
1656
  * Send audio stream for processing
1661
1657
  */
1662
1658
  async send(audioData) {
1663
- return this.traced(async () => {
1664
- this.validateConnectionState();
1665
- if ("readable" in audioData && typeof audioData.on === "function") {
1666
- const stream = audioData;
1667
- stream.on("data", (chunk) => {
1668
- try {
1669
- const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1670
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1671
- this.sendEvent("realtime_input", message);
1672
- } catch (error) {
1673
- this.log("Failed to process audio chunk", error);
1674
- this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1675
- }
1676
- });
1677
- stream.on("error", (error) => {
1678
- this.log("Audio stream error", error);
1679
- this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1680
- });
1681
- stream.on("end", () => {
1682
- this.log("Audio stream ended");
1683
- });
1684
- } else {
1685
- const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1686
- const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1687
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1688
- this.sendEvent("realtime_input", message);
1689
- }
1690
- }, "gemini-live.send")();
1659
+ this.validateConnectionState();
1660
+ if ("readable" in audioData && typeof audioData.on === "function") {
1661
+ const stream = audioData;
1662
+ stream.on("data", (chunk) => {
1663
+ try {
1664
+ const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
1665
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1666
+ this.sendEvent("realtime_input", message);
1667
+ } catch (error) {
1668
+ this.log("Failed to process audio chunk", error);
1669
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
1670
+ }
1671
+ });
1672
+ stream.on("error", (error) => {
1673
+ this.log("Audio stream error", error);
1674
+ this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
1675
+ });
1676
+ stream.on("end", () => {
1677
+ this.log("Audio stream ended");
1678
+ });
1679
+ } else {
1680
+ const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
1681
+ const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
1682
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
1683
+ this.sendEvent("realtime_input", message);
1684
+ }
1691
1685
  }
1692
1686
  /**
1693
1687
  * Process speech from audio stream (traditional STT interface)
1694
1688
  */
1695
1689
  async listen(audioStream, _options) {
1696
- return this.traced(async () => {
1697
- this.validateConnectionState();
1698
- let transcriptionText = "";
1699
- const onWriting = (data) => {
1700
- if (data.role === "user") {
1701
- transcriptionText += data.text;
1702
- this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1703
- }
1704
- };
1705
- const onError = (error) => {
1706
- throw new Error(`Transcription failed: ${error.message}`);
1707
- };
1708
- const onSession = (data) => {
1709
- if (data.state === "disconnected") {
1710
- throw new Error("Session disconnected during transcription");
1711
- }
1712
- };
1713
- this.on("writing", onWriting);
1714
- this.on("error", onError);
1715
- this.on("session", onSession);
1716
- try {
1717
- const result = await this.audioStreamManager.handleAudioTranscription(
1718
- audioStream,
1719
- (base64Audio) => {
1720
- return new Promise((resolve, reject) => {
1721
- try {
1722
- const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1723
- const cleanup = () => {
1724
- this.off("turnComplete", onTurnComplete);
1725
- this.off("error", onErr);
1726
- };
1727
- const onTurnComplete = () => {
1728
- cleanup();
1729
- resolve(transcriptionText.trim());
1730
- };
1731
- const onErr = (e) => {
1732
- cleanup();
1733
- reject(new Error(e.message));
1734
- };
1735
- this.on("turnComplete", onTurnComplete);
1736
- this.on("error", onErr);
1737
- this.sendEvent("client_content", message);
1738
- this.log("Sent audio for transcription");
1739
- } catch (err) {
1740
- reject(err);
1741
- }
1742
- });
1743
- },
1744
- (error) => {
1745
- this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1746
- }
1747
- );
1748
- return result;
1749
- } finally {
1750
- this.off("writing", onWriting);
1751
- this.off("error", onError);
1752
- this.off("session", onSession);
1690
+ this.validateConnectionState();
1691
+ let transcriptionText = "";
1692
+ const onWriting = (data) => {
1693
+ if (data.role === "user") {
1694
+ transcriptionText += data.text;
1695
+ this.log("Received transcription text:", { text: data.text, total: transcriptionText });
1753
1696
  }
1754
- }, "gemini-live.listen")();
1697
+ };
1698
+ const onError = (error) => {
1699
+ throw new Error(`Transcription failed: ${error.message}`);
1700
+ };
1701
+ const onSession = (data) => {
1702
+ if (data.state === "disconnected") {
1703
+ throw new Error("Session disconnected during transcription");
1704
+ }
1705
+ };
1706
+ this.on("writing", onWriting);
1707
+ this.on("error", onError);
1708
+ this.on("session", onSession);
1709
+ try {
1710
+ const result = await this.audioStreamManager.handleAudioTranscription(
1711
+ audioStream,
1712
+ (base64Audio) => {
1713
+ return new Promise((resolve, reject) => {
1714
+ try {
1715
+ const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
1716
+ const cleanup = () => {
1717
+ this.off("turnComplete", onTurnComplete);
1718
+ this.off("error", onErr);
1719
+ };
1720
+ const onTurnComplete = () => {
1721
+ cleanup();
1722
+ resolve(transcriptionText.trim());
1723
+ };
1724
+ const onErr = (e) => {
1725
+ cleanup();
1726
+ reject(new Error(e.message));
1727
+ };
1728
+ this.on("turnComplete", onTurnComplete);
1729
+ this.on("error", onErr);
1730
+ this.sendEvent("client_content", message);
1731
+ this.log("Sent audio for transcription");
1732
+ } catch (err) {
1733
+ reject(err);
1734
+ }
1735
+ });
1736
+ },
1737
+ (error) => {
1738
+ this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
1739
+ }
1740
+ );
1741
+ return result;
1742
+ } finally {
1743
+ this.off("writing", onWriting);
1744
+ this.off("error", onError);
1745
+ this.off("session", onSession);
1746
+ }
1755
1747
  }
1756
1748
  /**
1757
1749
  * Get available speakers/voices
1758
1750
  */
1759
1751
  async getSpeakers() {
1760
- return this.traced(async () => {
1761
- return [
1762
- { voiceId: "Puck", description: "Conversational, friendly" },
1763
- { voiceId: "Charon", description: "Deep, authoritative" },
1764
- { voiceId: "Kore", description: "Neutral, professional" },
1765
- { voiceId: "Fenrir", description: "Warm, approachable" }
1766
- ];
1767
- }, "gemini-live.getSpeakers")();
1752
+ return [
1753
+ { voiceId: "Puck", description: "Conversational, friendly" },
1754
+ { voiceId: "Charon", description: "Deep, authoritative" },
1755
+ { voiceId: "Kore", description: "Neutral, professional" },
1756
+ { voiceId: "Fenrir", description: "Warm, approachable" }
1757
+ ];
1768
1758
  }
1769
1759
  /**
1770
1760
  * Resume a previous session using a session handle
@@ -2668,7 +2658,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
2668
2658
  }
2669
2659
  log(message, ...args) {
2670
2660
  if (this.debug) {
2671
- console.log(`[GeminiLiveVoice] ${message}`, ...args);
2661
+ console.info(`[GeminiLiveVoice] ${message}`, ...args);
2672
2662
  }
2673
2663
  }
2674
2664
  /**