@mastra/voice-google-gemini-live 0.0.0-extract-tool-ui-inp-playground-ui-20251024041825 → 0.0.0-feat-add-query-option-to-playground-20251209160219
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +150 -3
- package/README.md +3 -3
- package/dist/index.cjs +270 -223
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +22 -7
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +270 -223
- package/dist/index.js.map +1 -1
- package/dist/managers/AudioStreamManager.d.ts +1 -1
- package/dist/managers/AudioStreamManager.d.ts.map +1 -1
- package/dist/managers/EventManager.d.ts +1 -1
- package/dist/managers/EventManager.d.ts.map +1 -1
- package/dist/managers/SessionManager.d.ts +1 -1
- package/dist/managers/SessionManager.d.ts.map +1 -1
- package/dist/types.d.ts +10 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +13 -10
package/dist/index.cjs
CHANGED
|
@@ -1262,7 +1262,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1262
1262
|
sessionDurationTimeout;
|
|
1263
1263
|
// Tool integration properties
|
|
1264
1264
|
tools;
|
|
1265
|
-
|
|
1265
|
+
requestContext;
|
|
1266
1266
|
// Store the configuration options
|
|
1267
1267
|
options;
|
|
1268
1268
|
/**
|
|
@@ -1499,69 +1499,68 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1499
1499
|
/**
|
|
1500
1500
|
* Establish connection to the Gemini Live API
|
|
1501
1501
|
*/
|
|
1502
|
-
async connect({
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
}
|
|
1526
|
-
};
|
|
1527
|
-
this.log("Using Live API authentication with API key");
|
|
1528
|
-
}
|
|
1529
|
-
this.log("Connecting to:", wsUrl);
|
|
1530
|
-
this.ws = new ws.WebSocket(wsUrl, void 0, headers);
|
|
1531
|
-
this.connectionManager.setWebSocket(this.ws);
|
|
1532
|
-
this.setupEventListeners();
|
|
1533
|
-
await this.connectionManager.waitForOpen();
|
|
1534
|
-
if (this.isResuming && this.sessionHandle) {
|
|
1535
|
-
await this.sendSessionResumption();
|
|
1536
|
-
} else {
|
|
1537
|
-
this.sendInitialConfig();
|
|
1538
|
-
this.sessionStartTime = Date.now();
|
|
1539
|
-
this.sessionId = crypto.randomUUID();
|
|
1540
|
-
}
|
|
1541
|
-
await this.waitForSessionCreated();
|
|
1542
|
-
this.state = "connected";
|
|
1543
|
-
this.emit("session", {
|
|
1544
|
-
state: "connected",
|
|
1545
|
-
config: {
|
|
1546
|
-
sessionId: this.sessionId,
|
|
1547
|
-
isResuming: this.isResuming,
|
|
1548
|
-
toolCount: Object.keys(this.tools || {}).length
|
|
1502
|
+
async connect({ requestContext } = {}) {
|
|
1503
|
+
if (this.state === "connected") {
|
|
1504
|
+
this.log("Already connected to Gemini Live API");
|
|
1505
|
+
return;
|
|
1506
|
+
}
|
|
1507
|
+
this.requestContext = requestContext;
|
|
1508
|
+
this.emit("session", { state: "connecting" });
|
|
1509
|
+
try {
|
|
1510
|
+
let wsUrl;
|
|
1511
|
+
let headers = {};
|
|
1512
|
+
if (this.options.vertexAI) {
|
|
1513
|
+
const location = this.getVertexLocation();
|
|
1514
|
+
wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
|
|
1515
|
+
await this.authManager.initialize();
|
|
1516
|
+
const accessToken = await this.authManager.getAccessToken();
|
|
1517
|
+
headers = { headers: { Authorization: `Bearer ${accessToken}` } };
|
|
1518
|
+
this.log("Using Vertex AI authentication with OAuth token");
|
|
1519
|
+
} else {
|
|
1520
|
+
wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
1521
|
+
headers = {
|
|
1522
|
+
headers: {
|
|
1523
|
+
"x-goog-api-key": this.options.apiKey || "",
|
|
1524
|
+
"Content-Type": "application/json"
|
|
1549
1525
|
}
|
|
1550
|
-
}
|
|
1551
|
-
this.log("
|
|
1526
|
+
};
|
|
1527
|
+
this.log("Using Live API authentication with API key");
|
|
1528
|
+
}
|
|
1529
|
+
this.log("Connecting to:", wsUrl);
|
|
1530
|
+
this.ws = new ws.WebSocket(wsUrl, void 0, headers);
|
|
1531
|
+
this.connectionManager.setWebSocket(this.ws);
|
|
1532
|
+
this.setupEventListeners();
|
|
1533
|
+
await this.connectionManager.waitForOpen();
|
|
1534
|
+
if (this.isResuming && this.sessionHandle) {
|
|
1535
|
+
await this.sendSessionResumption();
|
|
1536
|
+
} else {
|
|
1537
|
+
this.sendInitialConfig();
|
|
1538
|
+
this.sessionStartTime = Date.now();
|
|
1539
|
+
this.sessionId = crypto.randomUUID();
|
|
1540
|
+
}
|
|
1541
|
+
await this.waitForSessionCreated();
|
|
1542
|
+
this.state = "connected";
|
|
1543
|
+
this.emit("session", {
|
|
1544
|
+
state: "connected",
|
|
1545
|
+
config: {
|
|
1552
1546
|
sessionId: this.sessionId,
|
|
1553
1547
|
isResuming: this.isResuming,
|
|
1554
1548
|
toolCount: Object.keys(this.tools || {}).length
|
|
1555
|
-
});
|
|
1556
|
-
if (this.options.sessionConfig?.maxDuration) {
|
|
1557
|
-
this.startSessionDurationMonitor();
|
|
1558
1549
|
}
|
|
1559
|
-
}
|
|
1560
|
-
|
|
1561
|
-
this.
|
|
1562
|
-
|
|
1550
|
+
});
|
|
1551
|
+
this.log("Successfully connected to Gemini Live API", {
|
|
1552
|
+
sessionId: this.sessionId,
|
|
1553
|
+
isResuming: this.isResuming,
|
|
1554
|
+
toolCount: Object.keys(this.tools || {}).length
|
|
1555
|
+
});
|
|
1556
|
+
if (this.options.sessionConfig?.maxDuration) {
|
|
1557
|
+
this.startSessionDurationMonitor();
|
|
1563
1558
|
}
|
|
1564
|
-
}
|
|
1559
|
+
} catch (error) {
|
|
1560
|
+
this.state = "disconnected";
|
|
1561
|
+
this.log("Connection failed", error);
|
|
1562
|
+
throw error;
|
|
1563
|
+
}
|
|
1565
1564
|
}
|
|
1566
1565
|
/**
|
|
1567
1566
|
* Disconnect from the Gemini Live API
|
|
@@ -1599,172 +1598,164 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1599
1598
|
* Send text to be converted to speech
|
|
1600
1599
|
*/
|
|
1601
1600
|
async speak(input, options) {
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
|
|
1607
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1608
|
-
}
|
|
1609
|
-
input = Buffer.concat(chunks).toString("utf-8");
|
|
1610
|
-
}
|
|
1611
|
-
if (input.trim().length === 0) {
|
|
1612
|
-
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1601
|
+
this.validateConnectionState();
|
|
1602
|
+
if (typeof input !== "string") {
|
|
1603
|
+
const chunks = [];
|
|
1604
|
+
for await (const chunk of input) {
|
|
1605
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1613
1606
|
}
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
}
|
|
1629
|
-
};
|
|
1630
|
-
if (options && (options.speaker || options.languageCode || options.responseModalities)) {
|
|
1631
|
-
const updateMessage = {
|
|
1632
|
-
type: "session.update",
|
|
1633
|
-
session: {
|
|
1634
|
-
generation_config: {
|
|
1635
|
-
...options.responseModalities ? { response_modalities: options.responseModalities } : {},
|
|
1636
|
-
speech_config: {
|
|
1637
|
-
...options.languageCode ? { language_code: options.languageCode } : {},
|
|
1638
|
-
...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
|
|
1607
|
+
input = Buffer.concat(chunks).toString("utf-8");
|
|
1608
|
+
}
|
|
1609
|
+
if (input.trim().length === 0) {
|
|
1610
|
+
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1611
|
+
}
|
|
1612
|
+
this.addToContext("user", input);
|
|
1613
|
+
const textMessage = {
|
|
1614
|
+
client_content: {
|
|
1615
|
+
turns: [
|
|
1616
|
+
{
|
|
1617
|
+
role: "user",
|
|
1618
|
+
parts: [
|
|
1619
|
+
{
|
|
1620
|
+
text: input
|
|
1639
1621
|
}
|
|
1622
|
+
]
|
|
1623
|
+
}
|
|
1624
|
+
],
|
|
1625
|
+
turnComplete: true
|
|
1626
|
+
}
|
|
1627
|
+
};
|
|
1628
|
+
if (options && (options.speaker || options.languageCode || options.responseModalities)) {
|
|
1629
|
+
const updateMessage = {
|
|
1630
|
+
type: "session.update",
|
|
1631
|
+
session: {
|
|
1632
|
+
generation_config: {
|
|
1633
|
+
...options.responseModalities ? { response_modalities: options.responseModalities } : {},
|
|
1634
|
+
speech_config: {
|
|
1635
|
+
...options.languageCode ? { language_code: options.languageCode } : {},
|
|
1636
|
+
...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
|
|
1640
1637
|
}
|
|
1641
1638
|
}
|
|
1642
|
-
};
|
|
1643
|
-
try {
|
|
1644
|
-
this.sendEvent("session.update", updateMessage);
|
|
1645
|
-
this.log("Applied per-turn runtime options", options);
|
|
1646
|
-
} catch (error) {
|
|
1647
|
-
this.log("Failed to apply per-turn runtime options", error);
|
|
1648
1639
|
}
|
|
1649
|
-
}
|
|
1640
|
+
};
|
|
1650
1641
|
try {
|
|
1651
|
-
this.sendEvent("
|
|
1652
|
-
this.log("
|
|
1642
|
+
this.sendEvent("session.update", updateMessage);
|
|
1643
|
+
this.log("Applied per-turn runtime options", options);
|
|
1653
1644
|
} catch (error) {
|
|
1654
|
-
this.log("Failed to
|
|
1655
|
-
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1645
|
+
this.log("Failed to apply per-turn runtime options", error);
|
|
1656
1646
|
}
|
|
1657
|
-
}
|
|
1647
|
+
}
|
|
1648
|
+
try {
|
|
1649
|
+
this.sendEvent("client_content", textMessage);
|
|
1650
|
+
this.log("Text message sent", { text: input });
|
|
1651
|
+
} catch (error) {
|
|
1652
|
+
this.log("Failed to send text message", error);
|
|
1653
|
+
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1654
|
+
}
|
|
1658
1655
|
}
|
|
1659
1656
|
/**
|
|
1660
1657
|
* Send audio stream for processing
|
|
1661
1658
|
*/
|
|
1662
1659
|
async send(audioData) {
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
}
|
|
1690
|
-
}, "gemini-live.send")();
|
|
1660
|
+
this.validateConnectionState();
|
|
1661
|
+
if ("readable" in audioData && typeof audioData.on === "function") {
|
|
1662
|
+
const stream = audioData;
|
|
1663
|
+
stream.on("data", (chunk) => {
|
|
1664
|
+
try {
|
|
1665
|
+
const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
|
|
1666
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1667
|
+
this.sendEvent("realtime_input", message);
|
|
1668
|
+
} catch (error) {
|
|
1669
|
+
this.log("Failed to process audio chunk", error);
|
|
1670
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
|
|
1671
|
+
}
|
|
1672
|
+
});
|
|
1673
|
+
stream.on("error", (error) => {
|
|
1674
|
+
this.log("Audio stream error", error);
|
|
1675
|
+
this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
|
|
1676
|
+
});
|
|
1677
|
+
stream.on("end", () => {
|
|
1678
|
+
this.log("Audio stream ended");
|
|
1679
|
+
});
|
|
1680
|
+
} else {
|
|
1681
|
+
const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
|
|
1682
|
+
const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
|
|
1683
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1684
|
+
this.sendEvent("realtime_input", message);
|
|
1685
|
+
}
|
|
1691
1686
|
}
|
|
1692
1687
|
/**
|
|
1693
1688
|
* Process speech from audio stream (traditional STT interface)
|
|
1694
1689
|
*/
|
|
1695
1690
|
async listen(audioStream, _options) {
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
this.log("Received transcription text:", { text: data.text, total: transcriptionText });
|
|
1703
|
-
}
|
|
1704
|
-
};
|
|
1705
|
-
const onError = (error) => {
|
|
1706
|
-
throw new Error(`Transcription failed: ${error.message}`);
|
|
1707
|
-
};
|
|
1708
|
-
const onSession = (data) => {
|
|
1709
|
-
if (data.state === "disconnected") {
|
|
1710
|
-
throw new Error("Session disconnected during transcription");
|
|
1711
|
-
}
|
|
1712
|
-
};
|
|
1713
|
-
this.on("writing", onWriting);
|
|
1714
|
-
this.on("error", onError);
|
|
1715
|
-
this.on("session", onSession);
|
|
1716
|
-
try {
|
|
1717
|
-
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1718
|
-
audioStream,
|
|
1719
|
-
(base64Audio) => {
|
|
1720
|
-
return new Promise((resolve, reject) => {
|
|
1721
|
-
try {
|
|
1722
|
-
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1723
|
-
const cleanup = () => {
|
|
1724
|
-
this.off("turnComplete", onTurnComplete);
|
|
1725
|
-
this.off("error", onErr);
|
|
1726
|
-
};
|
|
1727
|
-
const onTurnComplete = () => {
|
|
1728
|
-
cleanup();
|
|
1729
|
-
resolve(transcriptionText.trim());
|
|
1730
|
-
};
|
|
1731
|
-
const onErr = (e) => {
|
|
1732
|
-
cleanup();
|
|
1733
|
-
reject(new Error(e.message));
|
|
1734
|
-
};
|
|
1735
|
-
this.on("turnComplete", onTurnComplete);
|
|
1736
|
-
this.on("error", onErr);
|
|
1737
|
-
this.sendEvent("client_content", message);
|
|
1738
|
-
this.log("Sent audio for transcription");
|
|
1739
|
-
} catch (err) {
|
|
1740
|
-
reject(err);
|
|
1741
|
-
}
|
|
1742
|
-
});
|
|
1743
|
-
},
|
|
1744
|
-
(error) => {
|
|
1745
|
-
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1746
|
-
}
|
|
1747
|
-
);
|
|
1748
|
-
return result;
|
|
1749
|
-
} finally {
|
|
1750
|
-
this.off("writing", onWriting);
|
|
1751
|
-
this.off("error", onError);
|
|
1752
|
-
this.off("session", onSession);
|
|
1691
|
+
this.validateConnectionState();
|
|
1692
|
+
let transcriptionText = "";
|
|
1693
|
+
const onWriting = (data) => {
|
|
1694
|
+
if (data.role === "user") {
|
|
1695
|
+
transcriptionText += data.text;
|
|
1696
|
+
this.log("Received transcription text:", { text: data.text, total: transcriptionText });
|
|
1753
1697
|
}
|
|
1754
|
-
}
|
|
1698
|
+
};
|
|
1699
|
+
const onError = (error) => {
|
|
1700
|
+
throw new Error(`Transcription failed: ${error.message}`);
|
|
1701
|
+
};
|
|
1702
|
+
const onSession = (data) => {
|
|
1703
|
+
if (data.state === "disconnected") {
|
|
1704
|
+
throw new Error("Session disconnected during transcription");
|
|
1705
|
+
}
|
|
1706
|
+
};
|
|
1707
|
+
this.on("writing", onWriting);
|
|
1708
|
+
this.on("error", onError);
|
|
1709
|
+
this.on("session", onSession);
|
|
1710
|
+
try {
|
|
1711
|
+
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1712
|
+
audioStream,
|
|
1713
|
+
(base64Audio) => {
|
|
1714
|
+
return new Promise((resolve, reject) => {
|
|
1715
|
+
try {
|
|
1716
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1717
|
+
const cleanup = () => {
|
|
1718
|
+
this.off("turnComplete", onTurnComplete);
|
|
1719
|
+
this.off("error", onErr);
|
|
1720
|
+
};
|
|
1721
|
+
const onTurnComplete = () => {
|
|
1722
|
+
cleanup();
|
|
1723
|
+
resolve(transcriptionText.trim());
|
|
1724
|
+
};
|
|
1725
|
+
const onErr = (e) => {
|
|
1726
|
+
cleanup();
|
|
1727
|
+
reject(new Error(e.message));
|
|
1728
|
+
};
|
|
1729
|
+
this.on("turnComplete", onTurnComplete);
|
|
1730
|
+
this.on("error", onErr);
|
|
1731
|
+
this.sendEvent("client_content", message);
|
|
1732
|
+
this.log("Sent audio for transcription");
|
|
1733
|
+
} catch (err) {
|
|
1734
|
+
reject(err);
|
|
1735
|
+
}
|
|
1736
|
+
});
|
|
1737
|
+
},
|
|
1738
|
+
(error) => {
|
|
1739
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1740
|
+
}
|
|
1741
|
+
);
|
|
1742
|
+
return result;
|
|
1743
|
+
} finally {
|
|
1744
|
+
this.off("writing", onWriting);
|
|
1745
|
+
this.off("error", onError);
|
|
1746
|
+
this.off("session", onSession);
|
|
1747
|
+
}
|
|
1755
1748
|
}
|
|
1756
1749
|
/**
|
|
1757
1750
|
* Get available speakers/voices
|
|
1758
1751
|
*/
|
|
1759
1752
|
async getSpeakers() {
|
|
1760
|
-
return
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
|
|
1765
|
-
|
|
1766
|
-
];
|
|
1767
|
-
}, "gemini-live.getSpeakers")();
|
|
1753
|
+
return [
|
|
1754
|
+
{ voiceId: "Puck", description: "Conversational, friendly" },
|
|
1755
|
+
{ voiceId: "Charon", description: "Deep, authoritative" },
|
|
1756
|
+
{ voiceId: "Kore", description: "Neutral, professional" },
|
|
1757
|
+
{ voiceId: "Fenrir", description: "Warm, approachable" }
|
|
1758
|
+
];
|
|
1768
1759
|
}
|
|
1769
1760
|
/**
|
|
1770
1761
|
* Resume a previous session using a session handle
|
|
@@ -2269,6 +2260,18 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2269
2260
|
role: "assistant"
|
|
2270
2261
|
});
|
|
2271
2262
|
}
|
|
2263
|
+
if (part.functionCall) {
|
|
2264
|
+
this.log("Found function call in serverContent.modelTurn.parts", part.functionCall);
|
|
2265
|
+
const toolCallData = {
|
|
2266
|
+
toolCall: {
|
|
2267
|
+
name: part.functionCall.name,
|
|
2268
|
+
args: part.functionCall.args || {},
|
|
2269
|
+
id: part.functionCall.id || crypto.randomUUID()
|
|
2270
|
+
}
|
|
2271
|
+
};
|
|
2272
|
+
void this.handleToolCall(toolCallData);
|
|
2273
|
+
continue;
|
|
2274
|
+
}
|
|
2272
2275
|
if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
|
|
2273
2276
|
try {
|
|
2274
2277
|
const audioData = part.inlineData.data;
|
|
@@ -2343,9 +2346,24 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2343
2346
|
if (!data.toolCall) {
|
|
2344
2347
|
return;
|
|
2345
2348
|
}
|
|
2346
|
-
|
|
2347
|
-
|
|
2348
|
-
|
|
2349
|
+
let toolCalls = [];
|
|
2350
|
+
if (data.toolCall.functionCalls && Array.isArray(data.toolCall.functionCalls)) {
|
|
2351
|
+
toolCalls = data.toolCall.functionCalls;
|
|
2352
|
+
} else if (data.toolCall.name) {
|
|
2353
|
+
toolCalls = [{ name: data.toolCall.name, args: data.toolCall.args, id: data.toolCall.id }];
|
|
2354
|
+
}
|
|
2355
|
+
for (const toolCall of toolCalls) {
|
|
2356
|
+
const toolName = toolCall.name || "";
|
|
2357
|
+
const toolArgs = toolCall.args || {};
|
|
2358
|
+
const toolId = toolCall.id || crypto.randomUUID();
|
|
2359
|
+
await this.processSingleToolCall(toolName, toolArgs, toolId);
|
|
2360
|
+
}
|
|
2361
|
+
}
|
|
2362
|
+
/**
|
|
2363
|
+
* Process a single tool call
|
|
2364
|
+
* @private
|
|
2365
|
+
*/
|
|
2366
|
+
async processSingleToolCall(toolName, toolArgs, toolId) {
|
|
2349
2367
|
this.log("Processing tool call", { toolName, toolArgs, toolId });
|
|
2350
2368
|
this.emit("toolCall", {
|
|
2351
2369
|
name: toolName,
|
|
@@ -2365,36 +2383,38 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2365
2383
|
let result;
|
|
2366
2384
|
if (tool.execute) {
|
|
2367
2385
|
this.log("Executing tool", { toolName, toolArgs });
|
|
2368
|
-
result = await tool.execute(
|
|
2369
|
-
{ context: toolArgs, runtimeContext: this.runtimeContext },
|
|
2370
|
-
{
|
|
2371
|
-
toolCallId: toolId,
|
|
2372
|
-
messages: []
|
|
2373
|
-
}
|
|
2374
|
-
);
|
|
2386
|
+
result = await tool.execute(toolArgs, { requestContext: this.requestContext });
|
|
2375
2387
|
this.log("Tool executed successfully", { toolName, result });
|
|
2376
2388
|
} else {
|
|
2377
2389
|
this.log("Tool has no execute function", { toolName });
|
|
2378
2390
|
result = { error: "Tool has no execute function" };
|
|
2379
2391
|
}
|
|
2380
2392
|
const toolResultMessage = {
|
|
2381
|
-
|
|
2382
|
-
|
|
2383
|
-
|
|
2393
|
+
toolResponse: {
|
|
2394
|
+
functionResponses: [
|
|
2395
|
+
{
|
|
2396
|
+
id: toolId,
|
|
2397
|
+
response: result
|
|
2398
|
+
}
|
|
2399
|
+
]
|
|
2384
2400
|
}
|
|
2385
2401
|
};
|
|
2386
|
-
this.sendEvent("
|
|
2402
|
+
this.sendEvent("toolResponse", toolResultMessage);
|
|
2387
2403
|
this.log("Tool result sent", { toolName, toolId, result });
|
|
2388
2404
|
} catch (error) {
|
|
2389
2405
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
2390
2406
|
this.log("Tool execution failed", { toolName, error: errorMessage });
|
|
2391
2407
|
const errorResultMessage = {
|
|
2392
|
-
|
|
2393
|
-
|
|
2394
|
-
|
|
2408
|
+
toolResponse: {
|
|
2409
|
+
functionResponses: [
|
|
2410
|
+
{
|
|
2411
|
+
id: toolId,
|
|
2412
|
+
response: { error: errorMessage }
|
|
2413
|
+
}
|
|
2414
|
+
]
|
|
2395
2415
|
}
|
|
2396
2416
|
};
|
|
2397
|
-
this.sendEvent("
|
|
2417
|
+
this.sendEvent("toolResponse", errorResultMessage);
|
|
2398
2418
|
this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
|
|
2399
2419
|
toolName,
|
|
2400
2420
|
toolArgs,
|
|
@@ -2454,6 +2474,31 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2454
2474
|
}
|
|
2455
2475
|
return "text";
|
|
2456
2476
|
}
|
|
2477
|
+
/**
|
|
2478
|
+
* Resolve Vertex AI location with sensible default
|
|
2479
|
+
* @private
|
|
2480
|
+
*/
|
|
2481
|
+
getVertexLocation() {
|
|
2482
|
+
return this.options.location?.trim() || "us-central1";
|
|
2483
|
+
}
|
|
2484
|
+
/**
|
|
2485
|
+
* Resolve the correct model identifier for Gemini API or Vertex AI
|
|
2486
|
+
* @private
|
|
2487
|
+
*/
|
|
2488
|
+
resolveModelIdentifier() {
|
|
2489
|
+
const model = this.options.model ?? DEFAULT_MODEL;
|
|
2490
|
+
if (!this.options.vertexAI) {
|
|
2491
|
+
return `models/${model}`;
|
|
2492
|
+
}
|
|
2493
|
+
if (!this.options.project) {
|
|
2494
|
+
throw this.createAndEmitError(
|
|
2495
|
+
"project_id_missing" /* PROJECT_ID_MISSING */,
|
|
2496
|
+
"Google Cloud project ID is required when using Vertex AI."
|
|
2497
|
+
);
|
|
2498
|
+
}
|
|
2499
|
+
const location = this.getVertexLocation();
|
|
2500
|
+
return `projects/${this.options.project}/locations/${location}/publishers/google/models/${model}`;
|
|
2501
|
+
}
|
|
2457
2502
|
/**
|
|
2458
2503
|
* Send initial configuration to Gemini Live API
|
|
2459
2504
|
* @private
|
|
@@ -2464,7 +2509,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2464
2509
|
}
|
|
2465
2510
|
const setupMessage = {
|
|
2466
2511
|
setup: {
|
|
2467
|
-
model:
|
|
2512
|
+
model: this.resolveModelIdentifier()
|
|
2468
2513
|
}
|
|
2469
2514
|
};
|
|
2470
2515
|
if (this.options.instructions) {
|
|
@@ -2613,6 +2658,8 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2613
2658
|
message = data;
|
|
2614
2659
|
} else if (type === "realtime_input" && data.realtime_input) {
|
|
2615
2660
|
message = data;
|
|
2661
|
+
} else if (type === "toolResponse" && data.toolResponse) {
|
|
2662
|
+
message = data;
|
|
2616
2663
|
} else if (type === "session.update" && data.session) {
|
|
2617
2664
|
message = data;
|
|
2618
2665
|
} else {
|
|
@@ -2638,14 +2685,14 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2638
2685
|
* inputSchema: z.object({
|
|
2639
2686
|
* location: z.string().describe("The city and state, e.g. San Francisco, CA"),
|
|
2640
2687
|
* }),
|
|
2641
|
-
* execute: async (
|
|
2688
|
+
* execute: async (inputData) => {
|
|
2642
2689
|
* // Fetch weather data from an API
|
|
2643
2690
|
* const response = await fetch(
|
|
2644
|
-
* `https://api.weather.com?location=${encodeURIComponent(
|
|
2691
|
+
* `https://api.weather.com?location=${encodeURIComponent(inputData.location)}`,
|
|
2645
2692
|
* );
|
|
2646
2693
|
* const data = await response.json();
|
|
2647
2694
|
* return {
|
|
2648
|
-
* message: `The current temperature in ${
|
|
2695
|
+
* message: `The current temperature in ${inputData.location} is ${data.temperature}°F with ${data.conditions}.`,
|
|
2649
2696
|
* };
|
|
2650
2697
|
* },
|
|
2651
2698
|
* });
|
|
@@ -2663,7 +2710,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2663
2710
|
* Get the current tools configured for this voice instance
|
|
2664
2711
|
* @returns Object containing the current tools
|
|
2665
2712
|
*/
|
|
2666
|
-
|
|
2713
|
+
listTools() {
|
|
2667
2714
|
return this.tools;
|
|
2668
2715
|
}
|
|
2669
2716
|
log(message, ...args) {
|