@mastra/voice-google-gemini-live 0.11.0-beta.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -54
- package/README.md +3 -3
- package/dist/index.cjs +259 -206
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +12 -7
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +259 -206
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +10 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +12 -10
package/dist/index.cjs
CHANGED
|
@@ -1262,7 +1262,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1262
1262
|
sessionDurationTimeout;
|
|
1263
1263
|
// Tool integration properties
|
|
1264
1264
|
tools;
|
|
1265
|
-
|
|
1265
|
+
runtimeContext;
|
|
1266
1266
|
// Store the configuration options
|
|
1267
1267
|
options;
|
|
1268
1268
|
/**
|
|
@@ -1499,67 +1499,69 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1499
1499
|
/**
|
|
1500
1500
|
* Establish connection to the Gemini Live API
|
|
1501
1501
|
*/
|
|
1502
|
-
async connect({
|
|
1503
|
-
|
|
1504
|
-
this.
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
this.requestContext = requestContext;
|
|
1508
|
-
this.emit("session", { state: "connecting" });
|
|
1509
|
-
try {
|
|
1510
|
-
let wsUrl;
|
|
1511
|
-
let headers = {};
|
|
1512
|
-
if (this.options.vertexAI) {
|
|
1513
|
-
wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
|
|
1514
|
-
await this.authManager.initialize();
|
|
1515
|
-
const accessToken = await this.authManager.getAccessToken();
|
|
1516
|
-
headers = { headers: { Authorization: `Bearer ${accessToken}` } };
|
|
1517
|
-
this.log("Using Vertex AI authentication with OAuth token");
|
|
1518
|
-
} else {
|
|
1519
|
-
wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
1520
|
-
headers = {
|
|
1521
|
-
headers: {
|
|
1522
|
-
"x-goog-api-key": this.options.apiKey || "",
|
|
1523
|
-
"Content-Type": "application/json"
|
|
1524
|
-
}
|
|
1525
|
-
};
|
|
1526
|
-
this.log("Using Live API authentication with API key");
|
|
1527
|
-
}
|
|
1528
|
-
this.log("Connecting to:", wsUrl);
|
|
1529
|
-
this.ws = new ws.WebSocket(wsUrl, void 0, headers);
|
|
1530
|
-
this.connectionManager.setWebSocket(this.ws);
|
|
1531
|
-
this.setupEventListeners();
|
|
1532
|
-
await this.connectionManager.waitForOpen();
|
|
1533
|
-
if (this.isResuming && this.sessionHandle) {
|
|
1534
|
-
await this.sendSessionResumption();
|
|
1535
|
-
} else {
|
|
1536
|
-
this.sendInitialConfig();
|
|
1537
|
-
this.sessionStartTime = Date.now();
|
|
1538
|
-
this.sessionId = crypto.randomUUID();
|
|
1502
|
+
async connect({ runtimeContext } = {}) {
|
|
1503
|
+
return this.traced(async () => {
|
|
1504
|
+
if (this.state === "connected") {
|
|
1505
|
+
this.log("Already connected to Gemini Live API");
|
|
1506
|
+
return;
|
|
1539
1507
|
}
|
|
1540
|
-
|
|
1541
|
-
this.state
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1508
|
+
this.runtimeContext = runtimeContext;
|
|
1509
|
+
this.emit("session", { state: "connecting" });
|
|
1510
|
+
try {
|
|
1511
|
+
let wsUrl;
|
|
1512
|
+
let headers = {};
|
|
1513
|
+
if (this.options.vertexAI) {
|
|
1514
|
+
wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
|
|
1515
|
+
await this.authManager.initialize();
|
|
1516
|
+
const accessToken = await this.authManager.getAccessToken();
|
|
1517
|
+
headers = { headers: { Authorization: `Bearer ${accessToken}` } };
|
|
1518
|
+
this.log("Using Vertex AI authentication with OAuth token");
|
|
1519
|
+
} else {
|
|
1520
|
+
wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
1521
|
+
headers = {
|
|
1522
|
+
headers: {
|
|
1523
|
+
"x-goog-api-key": this.options.apiKey || "",
|
|
1524
|
+
"Content-Type": "application/json"
|
|
1525
|
+
}
|
|
1526
|
+
};
|
|
1527
|
+
this.log("Using Live API authentication with API key");
|
|
1528
|
+
}
|
|
1529
|
+
this.log("Connecting to:", wsUrl);
|
|
1530
|
+
this.ws = new ws.WebSocket(wsUrl, void 0, headers);
|
|
1531
|
+
this.connectionManager.setWebSocket(this.ws);
|
|
1532
|
+
this.setupEventListeners();
|
|
1533
|
+
await this.connectionManager.waitForOpen();
|
|
1534
|
+
if (this.isResuming && this.sessionHandle) {
|
|
1535
|
+
await this.sendSessionResumption();
|
|
1536
|
+
} else {
|
|
1537
|
+
this.sendInitialConfig();
|
|
1538
|
+
this.sessionStartTime = Date.now();
|
|
1539
|
+
this.sessionId = crypto.randomUUID();
|
|
1540
|
+
}
|
|
1541
|
+
await this.waitForSessionCreated();
|
|
1542
|
+
this.state = "connected";
|
|
1543
|
+
this.emit("session", {
|
|
1544
|
+
state: "connected",
|
|
1545
|
+
config: {
|
|
1546
|
+
sessionId: this.sessionId,
|
|
1547
|
+
isResuming: this.isResuming,
|
|
1548
|
+
toolCount: Object.keys(this.tools || {}).length
|
|
1549
|
+
}
|
|
1550
|
+
});
|
|
1551
|
+
this.log("Successfully connected to Gemini Live API", {
|
|
1545
1552
|
sessionId: this.sessionId,
|
|
1546
1553
|
isResuming: this.isResuming,
|
|
1547
1554
|
toolCount: Object.keys(this.tools || {}).length
|
|
1555
|
+
});
|
|
1556
|
+
if (this.options.sessionConfig?.maxDuration) {
|
|
1557
|
+
this.startSessionDurationMonitor();
|
|
1548
1558
|
}
|
|
1549
|
-
})
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
toolCount: Object.keys(this.tools || {}).length
|
|
1554
|
-
});
|
|
1555
|
-
if (this.options.sessionConfig?.maxDuration) {
|
|
1556
|
-
this.startSessionDurationMonitor();
|
|
1559
|
+
} catch (error) {
|
|
1560
|
+
this.state = "disconnected";
|
|
1561
|
+
this.log("Connection failed", error);
|
|
1562
|
+
throw error;
|
|
1557
1563
|
}
|
|
1558
|
-
}
|
|
1559
|
-
this.state = "disconnected";
|
|
1560
|
-
this.log("Connection failed", error);
|
|
1561
|
-
throw error;
|
|
1562
|
-
}
|
|
1564
|
+
}, "gemini-live.connect")();
|
|
1563
1565
|
}
|
|
1564
1566
|
/**
|
|
1565
1567
|
* Disconnect from the Gemini Live API
|
|
@@ -1597,164 +1599,172 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1597
1599
|
* Send text to be converted to speech
|
|
1598
1600
|
*/
|
|
1599
1601
|
async speak(input, options) {
|
|
1600
|
-
this.
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1602
|
+
return this.traced(async () => {
|
|
1603
|
+
this.validateConnectionState();
|
|
1604
|
+
if (typeof input !== "string") {
|
|
1605
|
+
const chunks = [];
|
|
1606
|
+
for await (const chunk of input) {
|
|
1607
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1608
|
+
}
|
|
1609
|
+
input = Buffer.concat(chunks).toString("utf-8");
|
|
1605
1610
|
}
|
|
1606
|
-
input
|
|
1607
|
-
|
|
1608
|
-
if (input.trim().length === 0) {
|
|
1609
|
-
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1610
|
-
}
|
|
1611
|
-
this.addToContext("user", input);
|
|
1612
|
-
const textMessage = {
|
|
1613
|
-
client_content: {
|
|
1614
|
-
turns: [
|
|
1615
|
-
{
|
|
1616
|
-
role: "user",
|
|
1617
|
-
parts: [
|
|
1618
|
-
{
|
|
1619
|
-
text: input
|
|
1620
|
-
}
|
|
1621
|
-
]
|
|
1622
|
-
}
|
|
1623
|
-
],
|
|
1624
|
-
turnComplete: true
|
|
1611
|
+
if (input.trim().length === 0) {
|
|
1612
|
+
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1625
1613
|
}
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1614
|
+
this.addToContext("user", input);
|
|
1615
|
+
const textMessage = {
|
|
1616
|
+
client_content: {
|
|
1617
|
+
turns: [
|
|
1618
|
+
{
|
|
1619
|
+
role: "user",
|
|
1620
|
+
parts: [
|
|
1621
|
+
{
|
|
1622
|
+
text: input
|
|
1623
|
+
}
|
|
1624
|
+
]
|
|
1636
1625
|
}
|
|
1637
|
-
|
|
1626
|
+
],
|
|
1627
|
+
turnComplete: true
|
|
1638
1628
|
}
|
|
1639
1629
|
};
|
|
1630
|
+
if (options && (options.speaker || options.languageCode || options.responseModalities)) {
|
|
1631
|
+
const updateMessage = {
|
|
1632
|
+
type: "session.update",
|
|
1633
|
+
session: {
|
|
1634
|
+
generation_config: {
|
|
1635
|
+
...options.responseModalities ? { response_modalities: options.responseModalities } : {},
|
|
1636
|
+
speech_config: {
|
|
1637
|
+
...options.languageCode ? { language_code: options.languageCode } : {},
|
|
1638
|
+
...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
};
|
|
1643
|
+
try {
|
|
1644
|
+
this.sendEvent("session.update", updateMessage);
|
|
1645
|
+
this.log("Applied per-turn runtime options", options);
|
|
1646
|
+
} catch (error) {
|
|
1647
|
+
this.log("Failed to apply per-turn runtime options", error);
|
|
1648
|
+
}
|
|
1649
|
+
}
|
|
1640
1650
|
try {
|
|
1641
|
-
this.sendEvent("
|
|
1642
|
-
this.log("
|
|
1651
|
+
this.sendEvent("client_content", textMessage);
|
|
1652
|
+
this.log("Text message sent", { text: input });
|
|
1643
1653
|
} catch (error) {
|
|
1644
|
-
this.log("Failed to
|
|
1654
|
+
this.log("Failed to send text message", error);
|
|
1655
|
+
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1645
1656
|
}
|
|
1646
|
-
}
|
|
1647
|
-
try {
|
|
1648
|
-
this.sendEvent("client_content", textMessage);
|
|
1649
|
-
this.log("Text message sent", { text: input });
|
|
1650
|
-
} catch (error) {
|
|
1651
|
-
this.log("Failed to send text message", error);
|
|
1652
|
-
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1653
|
-
}
|
|
1657
|
+
}, "gemini-live.speak")();
|
|
1654
1658
|
}
|
|
1655
1659
|
/**
|
|
1656
1660
|
* Send audio stream for processing
|
|
1657
1661
|
*/
|
|
1658
1662
|
async send(audioData) {
|
|
1659
|
-
this.
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1663
|
+
return this.traced(async () => {
|
|
1664
|
+
this.validateConnectionState();
|
|
1665
|
+
if ("readable" in audioData && typeof audioData.on === "function") {
|
|
1666
|
+
const stream = audioData;
|
|
1667
|
+
stream.on("data", (chunk) => {
|
|
1668
|
+
try {
|
|
1669
|
+
const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
|
|
1670
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1671
|
+
this.sendEvent("realtime_input", message);
|
|
1672
|
+
} catch (error) {
|
|
1673
|
+
this.log("Failed to process audio chunk", error);
|
|
1674
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
|
|
1675
|
+
}
|
|
1676
|
+
});
|
|
1677
|
+
stream.on("error", (error) => {
|
|
1678
|
+
this.log("Audio stream error", error);
|
|
1679
|
+
this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
|
|
1680
|
+
});
|
|
1681
|
+
stream.on("end", () => {
|
|
1682
|
+
this.log("Audio stream ended");
|
|
1683
|
+
});
|
|
1684
|
+
} else {
|
|
1685
|
+
const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
|
|
1686
|
+
const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
|
|
1687
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1688
|
+
this.sendEvent("realtime_input", message);
|
|
1689
|
+
}
|
|
1690
|
+
}, "gemini-live.send")();
|
|
1685
1691
|
}
|
|
1686
1692
|
/**
|
|
1687
1693
|
* Process speech from audio stream (traditional STT interface)
|
|
1688
1694
|
*/
|
|
1689
1695
|
async listen(audioStream, _options) {
|
|
1690
|
-
this.
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
};
|
|
1698
|
-
const onError = (error) => {
|
|
1699
|
-
throw new Error(`Transcription failed: ${error.message}`);
|
|
1700
|
-
};
|
|
1701
|
-
const onSession = (data) => {
|
|
1702
|
-
if (data.state === "disconnected") {
|
|
1703
|
-
throw new Error("Session disconnected during transcription");
|
|
1704
|
-
}
|
|
1705
|
-
};
|
|
1706
|
-
this.on("writing", onWriting);
|
|
1707
|
-
this.on("error", onError);
|
|
1708
|
-
this.on("session", onSession);
|
|
1709
|
-
try {
|
|
1710
|
-
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1711
|
-
audioStream,
|
|
1712
|
-
(base64Audio) => {
|
|
1713
|
-
return new Promise((resolve, reject) => {
|
|
1714
|
-
try {
|
|
1715
|
-
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1716
|
-
const cleanup = () => {
|
|
1717
|
-
this.off("turnComplete", onTurnComplete);
|
|
1718
|
-
this.off("error", onErr);
|
|
1719
|
-
};
|
|
1720
|
-
const onTurnComplete = () => {
|
|
1721
|
-
cleanup();
|
|
1722
|
-
resolve(transcriptionText.trim());
|
|
1723
|
-
};
|
|
1724
|
-
const onErr = (e) => {
|
|
1725
|
-
cleanup();
|
|
1726
|
-
reject(new Error(e.message));
|
|
1727
|
-
};
|
|
1728
|
-
this.on("turnComplete", onTurnComplete);
|
|
1729
|
-
this.on("error", onErr);
|
|
1730
|
-
this.sendEvent("client_content", message);
|
|
1731
|
-
this.log("Sent audio for transcription");
|
|
1732
|
-
} catch (err) {
|
|
1733
|
-
reject(err);
|
|
1734
|
-
}
|
|
1735
|
-
});
|
|
1736
|
-
},
|
|
1737
|
-
(error) => {
|
|
1738
|
-
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1696
|
+
return this.traced(async () => {
|
|
1697
|
+
this.validateConnectionState();
|
|
1698
|
+
let transcriptionText = "";
|
|
1699
|
+
const onWriting = (data) => {
|
|
1700
|
+
if (data.role === "user") {
|
|
1701
|
+
transcriptionText += data.text;
|
|
1702
|
+
this.log("Received transcription text:", { text: data.text, total: transcriptionText });
|
|
1739
1703
|
}
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1704
|
+
};
|
|
1705
|
+
const onError = (error) => {
|
|
1706
|
+
throw new Error(`Transcription failed: ${error.message}`);
|
|
1707
|
+
};
|
|
1708
|
+
const onSession = (data) => {
|
|
1709
|
+
if (data.state === "disconnected") {
|
|
1710
|
+
throw new Error("Session disconnected during transcription");
|
|
1711
|
+
}
|
|
1712
|
+
};
|
|
1713
|
+
this.on("writing", onWriting);
|
|
1714
|
+
this.on("error", onError);
|
|
1715
|
+
this.on("session", onSession);
|
|
1716
|
+
try {
|
|
1717
|
+
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1718
|
+
audioStream,
|
|
1719
|
+
(base64Audio) => {
|
|
1720
|
+
return new Promise((resolve, reject) => {
|
|
1721
|
+
try {
|
|
1722
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1723
|
+
const cleanup = () => {
|
|
1724
|
+
this.off("turnComplete", onTurnComplete);
|
|
1725
|
+
this.off("error", onErr);
|
|
1726
|
+
};
|
|
1727
|
+
const onTurnComplete = () => {
|
|
1728
|
+
cleanup();
|
|
1729
|
+
resolve(transcriptionText.trim());
|
|
1730
|
+
};
|
|
1731
|
+
const onErr = (e) => {
|
|
1732
|
+
cleanup();
|
|
1733
|
+
reject(new Error(e.message));
|
|
1734
|
+
};
|
|
1735
|
+
this.on("turnComplete", onTurnComplete);
|
|
1736
|
+
this.on("error", onErr);
|
|
1737
|
+
this.sendEvent("client_content", message);
|
|
1738
|
+
this.log("Sent audio for transcription");
|
|
1739
|
+
} catch (err) {
|
|
1740
|
+
reject(err);
|
|
1741
|
+
}
|
|
1742
|
+
});
|
|
1743
|
+
},
|
|
1744
|
+
(error) => {
|
|
1745
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1746
|
+
}
|
|
1747
|
+
);
|
|
1748
|
+
return result;
|
|
1749
|
+
} finally {
|
|
1750
|
+
this.off("writing", onWriting);
|
|
1751
|
+
this.off("error", onError);
|
|
1752
|
+
this.off("session", onSession);
|
|
1753
|
+
}
|
|
1754
|
+
}, "gemini-live.listen")();
|
|
1747
1755
|
}
|
|
1748
1756
|
/**
|
|
1749
1757
|
* Get available speakers/voices
|
|
1750
1758
|
*/
|
|
1751
1759
|
async getSpeakers() {
|
|
1752
|
-
return
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1760
|
+
return this.traced(async () => {
|
|
1761
|
+
return [
|
|
1762
|
+
{ voiceId: "Puck", description: "Conversational, friendly" },
|
|
1763
|
+
{ voiceId: "Charon", description: "Deep, authoritative" },
|
|
1764
|
+
{ voiceId: "Kore", description: "Neutral, professional" },
|
|
1765
|
+
{ voiceId: "Fenrir", description: "Warm, approachable" }
|
|
1766
|
+
];
|
|
1767
|
+
}, "gemini-live.getSpeakers")();
|
|
1758
1768
|
}
|
|
1759
1769
|
/**
|
|
1760
1770
|
* Resume a previous session using a session handle
|
|
@@ -2259,6 +2269,18 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2259
2269
|
role: "assistant"
|
|
2260
2270
|
});
|
|
2261
2271
|
}
|
|
2272
|
+
if (part.functionCall) {
|
|
2273
|
+
this.log("Found function call in serverContent.modelTurn.parts", part.functionCall);
|
|
2274
|
+
const toolCallData = {
|
|
2275
|
+
toolCall: {
|
|
2276
|
+
name: part.functionCall.name,
|
|
2277
|
+
args: part.functionCall.args || {},
|
|
2278
|
+
id: part.functionCall.id || crypto.randomUUID()
|
|
2279
|
+
}
|
|
2280
|
+
};
|
|
2281
|
+
void this.handleToolCall(toolCallData);
|
|
2282
|
+
continue;
|
|
2283
|
+
}
|
|
2262
2284
|
if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
|
|
2263
2285
|
try {
|
|
2264
2286
|
const audioData = part.inlineData.data;
|
|
@@ -2333,9 +2355,24 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2333
2355
|
if (!data.toolCall) {
|
|
2334
2356
|
return;
|
|
2335
2357
|
}
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2358
|
+
let toolCalls = [];
|
|
2359
|
+
if (data.toolCall.functionCalls && Array.isArray(data.toolCall.functionCalls)) {
|
|
2360
|
+
toolCalls = data.toolCall.functionCalls;
|
|
2361
|
+
} else if (data.toolCall.name) {
|
|
2362
|
+
toolCalls = [{ name: data.toolCall.name, args: data.toolCall.args, id: data.toolCall.id }];
|
|
2363
|
+
}
|
|
2364
|
+
for (const toolCall of toolCalls) {
|
|
2365
|
+
const toolName = toolCall.name || "";
|
|
2366
|
+
const toolArgs = toolCall.args || {};
|
|
2367
|
+
const toolId = toolCall.id || crypto.randomUUID();
|
|
2368
|
+
await this.processSingleToolCall(toolName, toolArgs, toolId);
|
|
2369
|
+
}
|
|
2370
|
+
}
|
|
2371
|
+
/**
|
|
2372
|
+
* Process a single tool call
|
|
2373
|
+
* @private
|
|
2374
|
+
*/
|
|
2375
|
+
async processSingleToolCall(toolName, toolArgs, toolId) {
|
|
2339
2376
|
this.log("Processing tool call", { toolName, toolArgs, toolId });
|
|
2340
2377
|
this.emit("toolCall", {
|
|
2341
2378
|
name: toolName,
|
|
@@ -2355,30 +2392,44 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2355
2392
|
let result;
|
|
2356
2393
|
if (tool.execute) {
|
|
2357
2394
|
this.log("Executing tool", { toolName, toolArgs });
|
|
2358
|
-
result = await tool.execute(
|
|
2395
|
+
result = await tool.execute(
|
|
2396
|
+
{ context: toolArgs, runtimeContext: this.runtimeContext },
|
|
2397
|
+
{
|
|
2398
|
+
toolCallId: toolId,
|
|
2399
|
+
messages: []
|
|
2400
|
+
}
|
|
2401
|
+
);
|
|
2359
2402
|
this.log("Tool executed successfully", { toolName, result });
|
|
2360
2403
|
} else {
|
|
2361
2404
|
this.log("Tool has no execute function", { toolName });
|
|
2362
2405
|
result = { error: "Tool has no execute function" };
|
|
2363
2406
|
}
|
|
2364
2407
|
const toolResultMessage = {
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2408
|
+
toolResponse: {
|
|
2409
|
+
functionResponses: [
|
|
2410
|
+
{
|
|
2411
|
+
id: toolId,
|
|
2412
|
+
response: result
|
|
2413
|
+
}
|
|
2414
|
+
]
|
|
2368
2415
|
}
|
|
2369
2416
|
};
|
|
2370
|
-
this.sendEvent("
|
|
2417
|
+
this.sendEvent("toolResponse", toolResultMessage);
|
|
2371
2418
|
this.log("Tool result sent", { toolName, toolId, result });
|
|
2372
2419
|
} catch (error) {
|
|
2373
2420
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
2374
2421
|
this.log("Tool execution failed", { toolName, error: errorMessage });
|
|
2375
2422
|
const errorResultMessage = {
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2423
|
+
toolResponse: {
|
|
2424
|
+
functionResponses: [
|
|
2425
|
+
{
|
|
2426
|
+
id: toolId,
|
|
2427
|
+
response: { error: errorMessage }
|
|
2428
|
+
}
|
|
2429
|
+
]
|
|
2379
2430
|
}
|
|
2380
2431
|
};
|
|
2381
|
-
this.sendEvent("
|
|
2432
|
+
this.sendEvent("toolResponse", errorResultMessage);
|
|
2382
2433
|
this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
|
|
2383
2434
|
toolName,
|
|
2384
2435
|
toolArgs,
|
|
@@ -2597,6 +2648,8 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2597
2648
|
message = data;
|
|
2598
2649
|
} else if (type === "realtime_input" && data.realtime_input) {
|
|
2599
2650
|
message = data;
|
|
2651
|
+
} else if (type === "toolResponse" && data.toolResponse) {
|
|
2652
|
+
message = data;
|
|
2600
2653
|
} else if (type === "session.update" && data.session) {
|
|
2601
2654
|
message = data;
|
|
2602
2655
|
} else {
|
|
@@ -2622,14 +2675,14 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2622
2675
|
* inputSchema: z.object({
|
|
2623
2676
|
* location: z.string().describe("The city and state, e.g. San Francisco, CA"),
|
|
2624
2677
|
* }),
|
|
2625
|
-
* execute: async (
|
|
2678
|
+
* execute: async ({ context }) => {
|
|
2626
2679
|
* // Fetch weather data from an API
|
|
2627
2680
|
* const response = await fetch(
|
|
2628
|
-
* `https://api.weather.com?location=${encodeURIComponent(
|
|
2681
|
+
* `https://api.weather.com?location=${encodeURIComponent(context.location)}`,
|
|
2629
2682
|
* );
|
|
2630
2683
|
* const data = await response.json();
|
|
2631
2684
|
* return {
|
|
2632
|
-
* message: `The current temperature in ${
|
|
2685
|
+
* message: `The current temperature in ${context.location} is ${data.temperature}°F with ${data.conditions}.`,
|
|
2633
2686
|
* };
|
|
2634
2687
|
* },
|
|
2635
2688
|
* });
|
|
@@ -2647,7 +2700,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2647
2700
|
* Get the current tools configured for this voice instance
|
|
2648
2701
|
* @returns Object containing the current tools
|
|
2649
2702
|
*/
|
|
2650
|
-
|
|
2703
|
+
getTools() {
|
|
2651
2704
|
return this.tools;
|
|
2652
2705
|
}
|
|
2653
2706
|
log(message, ...args) {
|