@mastra/voice-google-gemini-live 0.0.0-remove-unused-model-providers-api-20251030210744 → 0.0.0-safe-stringify-telemetry-20251205024938
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -8
- package/dist/index.cjs +277 -204
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +19 -4
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +277 -204
- package/dist/index.js.map +1 -1
- package/dist/types.d.ts +10 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +13 -8
package/dist/index.cjs
CHANGED
|
@@ -1262,7 +1262,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1262
1262
|
sessionDurationTimeout;
|
|
1263
1263
|
// Tool integration properties
|
|
1264
1264
|
tools;
|
|
1265
|
-
|
|
1265
|
+
runtimeContext;
|
|
1266
1266
|
// Store the configuration options
|
|
1267
1267
|
options;
|
|
1268
1268
|
/**
|
|
@@ -1499,67 +1499,70 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1499
1499
|
/**
|
|
1500
1500
|
* Establish connection to the Gemini Live API
|
|
1501
1501
|
*/
|
|
1502
|
-
async connect({
|
|
1503
|
-
|
|
1504
|
-
this.
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
this.requestContext = requestContext;
|
|
1508
|
-
this.emit("session", { state: "connecting" });
|
|
1509
|
-
try {
|
|
1510
|
-
let wsUrl;
|
|
1511
|
-
let headers = {};
|
|
1512
|
-
if (this.options.vertexAI) {
|
|
1513
|
-
wsUrl = `wss://${this.options.location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.PredictionService.ServerStreamingPredict`;
|
|
1514
|
-
await this.authManager.initialize();
|
|
1515
|
-
const accessToken = await this.authManager.getAccessToken();
|
|
1516
|
-
headers = { headers: { Authorization: `Bearer ${accessToken}` } };
|
|
1517
|
-
this.log("Using Vertex AI authentication with OAuth token");
|
|
1518
|
-
} else {
|
|
1519
|
-
wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
1520
|
-
headers = {
|
|
1521
|
-
headers: {
|
|
1522
|
-
"x-goog-api-key": this.options.apiKey || "",
|
|
1523
|
-
"Content-Type": "application/json"
|
|
1524
|
-
}
|
|
1525
|
-
};
|
|
1526
|
-
this.log("Using Live API authentication with API key");
|
|
1527
|
-
}
|
|
1528
|
-
this.log("Connecting to:", wsUrl);
|
|
1529
|
-
this.ws = new ws.WebSocket(wsUrl, void 0, headers);
|
|
1530
|
-
this.connectionManager.setWebSocket(this.ws);
|
|
1531
|
-
this.setupEventListeners();
|
|
1532
|
-
await this.connectionManager.waitForOpen();
|
|
1533
|
-
if (this.isResuming && this.sessionHandle) {
|
|
1534
|
-
await this.sendSessionResumption();
|
|
1535
|
-
} else {
|
|
1536
|
-
this.sendInitialConfig();
|
|
1537
|
-
this.sessionStartTime = Date.now();
|
|
1538
|
-
this.sessionId = crypto.randomUUID();
|
|
1502
|
+
async connect({ runtimeContext } = {}) {
|
|
1503
|
+
return this.traced(async () => {
|
|
1504
|
+
if (this.state === "connected") {
|
|
1505
|
+
this.log("Already connected to Gemini Live API");
|
|
1506
|
+
return;
|
|
1539
1507
|
}
|
|
1540
|
-
|
|
1541
|
-
this.state
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
|
|
1508
|
+
this.runtimeContext = runtimeContext;
|
|
1509
|
+
this.emit("session", { state: "connecting" });
|
|
1510
|
+
try {
|
|
1511
|
+
let wsUrl;
|
|
1512
|
+
let headers = {};
|
|
1513
|
+
if (this.options.vertexAI) {
|
|
1514
|
+
const location = this.getVertexLocation();
|
|
1515
|
+
wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
|
|
1516
|
+
await this.authManager.initialize();
|
|
1517
|
+
const accessToken = await this.authManager.getAccessToken();
|
|
1518
|
+
headers = { headers: { Authorization: `Bearer ${accessToken}` } };
|
|
1519
|
+
this.log("Using Vertex AI authentication with OAuth token");
|
|
1520
|
+
} else {
|
|
1521
|
+
wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
1522
|
+
headers = {
|
|
1523
|
+
headers: {
|
|
1524
|
+
"x-goog-api-key": this.options.apiKey || "",
|
|
1525
|
+
"Content-Type": "application/json"
|
|
1526
|
+
}
|
|
1527
|
+
};
|
|
1528
|
+
this.log("Using Live API authentication with API key");
|
|
1529
|
+
}
|
|
1530
|
+
this.log("Connecting to:", wsUrl);
|
|
1531
|
+
this.ws = new ws.WebSocket(wsUrl, void 0, headers);
|
|
1532
|
+
this.connectionManager.setWebSocket(this.ws);
|
|
1533
|
+
this.setupEventListeners();
|
|
1534
|
+
await this.connectionManager.waitForOpen();
|
|
1535
|
+
if (this.isResuming && this.sessionHandle) {
|
|
1536
|
+
await this.sendSessionResumption();
|
|
1537
|
+
} else {
|
|
1538
|
+
this.sendInitialConfig();
|
|
1539
|
+
this.sessionStartTime = Date.now();
|
|
1540
|
+
this.sessionId = crypto.randomUUID();
|
|
1541
|
+
}
|
|
1542
|
+
await this.waitForSessionCreated();
|
|
1543
|
+
this.state = "connected";
|
|
1544
|
+
this.emit("session", {
|
|
1545
|
+
state: "connected",
|
|
1546
|
+
config: {
|
|
1547
|
+
sessionId: this.sessionId,
|
|
1548
|
+
isResuming: this.isResuming,
|
|
1549
|
+
toolCount: Object.keys(this.tools || {}).length
|
|
1550
|
+
}
|
|
1551
|
+
});
|
|
1552
|
+
this.log("Successfully connected to Gemini Live API", {
|
|
1545
1553
|
sessionId: this.sessionId,
|
|
1546
1554
|
isResuming: this.isResuming,
|
|
1547
1555
|
toolCount: Object.keys(this.tools || {}).length
|
|
1556
|
+
});
|
|
1557
|
+
if (this.options.sessionConfig?.maxDuration) {
|
|
1558
|
+
this.startSessionDurationMonitor();
|
|
1548
1559
|
}
|
|
1549
|
-
})
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
toolCount: Object.keys(this.tools || {}).length
|
|
1554
|
-
});
|
|
1555
|
-
if (this.options.sessionConfig?.maxDuration) {
|
|
1556
|
-
this.startSessionDurationMonitor();
|
|
1560
|
+
} catch (error) {
|
|
1561
|
+
this.state = "disconnected";
|
|
1562
|
+
this.log("Connection failed", error);
|
|
1563
|
+
throw error;
|
|
1557
1564
|
}
|
|
1558
|
-
}
|
|
1559
|
-
this.state = "disconnected";
|
|
1560
|
-
this.log("Connection failed", error);
|
|
1561
|
-
throw error;
|
|
1562
|
-
}
|
|
1565
|
+
}, "gemini-live.connect")();
|
|
1563
1566
|
}
|
|
1564
1567
|
/**
|
|
1565
1568
|
* Disconnect from the Gemini Live API
|
|
@@ -1597,164 +1600,172 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
1597
1600
|
* Send text to be converted to speech
|
|
1598
1601
|
*/
|
|
1599
1602
|
async speak(input, options) {
|
|
1600
|
-
this.
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1603
|
+
return this.traced(async () => {
|
|
1604
|
+
this.validateConnectionState();
|
|
1605
|
+
if (typeof input !== "string") {
|
|
1606
|
+
const chunks = [];
|
|
1607
|
+
for await (const chunk of input) {
|
|
1608
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1609
|
+
}
|
|
1610
|
+
input = Buffer.concat(chunks).toString("utf-8");
|
|
1605
1611
|
}
|
|
1606
|
-
input
|
|
1607
|
-
|
|
1608
|
-
if (input.trim().length === 0) {
|
|
1609
|
-
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1610
|
-
}
|
|
1611
|
-
this.addToContext("user", input);
|
|
1612
|
-
const textMessage = {
|
|
1613
|
-
client_content: {
|
|
1614
|
-
turns: [
|
|
1615
|
-
{
|
|
1616
|
-
role: "user",
|
|
1617
|
-
parts: [
|
|
1618
|
-
{
|
|
1619
|
-
text: input
|
|
1620
|
-
}
|
|
1621
|
-
]
|
|
1622
|
-
}
|
|
1623
|
-
],
|
|
1624
|
-
turnComplete: true
|
|
1612
|
+
if (input.trim().length === 0) {
|
|
1613
|
+
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1625
1614
|
}
|
|
1626
|
-
|
|
1627
|
-
|
|
1628
|
-
|
|
1629
|
-
|
|
1630
|
-
|
|
1631
|
-
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1615
|
+
this.addToContext("user", input);
|
|
1616
|
+
const textMessage = {
|
|
1617
|
+
client_content: {
|
|
1618
|
+
turns: [
|
|
1619
|
+
{
|
|
1620
|
+
role: "user",
|
|
1621
|
+
parts: [
|
|
1622
|
+
{
|
|
1623
|
+
text: input
|
|
1624
|
+
}
|
|
1625
|
+
]
|
|
1636
1626
|
}
|
|
1637
|
-
|
|
1627
|
+
],
|
|
1628
|
+
turnComplete: true
|
|
1638
1629
|
}
|
|
1639
1630
|
};
|
|
1631
|
+
if (options && (options.speaker || options.languageCode || options.responseModalities)) {
|
|
1632
|
+
const updateMessage = {
|
|
1633
|
+
type: "session.update",
|
|
1634
|
+
session: {
|
|
1635
|
+
generation_config: {
|
|
1636
|
+
...options.responseModalities ? { response_modalities: options.responseModalities } : {},
|
|
1637
|
+
speech_config: {
|
|
1638
|
+
...options.languageCode ? { language_code: options.languageCode } : {},
|
|
1639
|
+
...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
}
|
|
1643
|
+
};
|
|
1644
|
+
try {
|
|
1645
|
+
this.sendEvent("session.update", updateMessage);
|
|
1646
|
+
this.log("Applied per-turn runtime options", options);
|
|
1647
|
+
} catch (error) {
|
|
1648
|
+
this.log("Failed to apply per-turn runtime options", error);
|
|
1649
|
+
}
|
|
1650
|
+
}
|
|
1640
1651
|
try {
|
|
1641
|
-
this.sendEvent("
|
|
1642
|
-
this.log("
|
|
1652
|
+
this.sendEvent("client_content", textMessage);
|
|
1653
|
+
this.log("Text message sent", { text: input });
|
|
1643
1654
|
} catch (error) {
|
|
1644
|
-
this.log("Failed to
|
|
1655
|
+
this.log("Failed to send text message", error);
|
|
1656
|
+
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1645
1657
|
}
|
|
1646
|
-
}
|
|
1647
|
-
try {
|
|
1648
|
-
this.sendEvent("client_content", textMessage);
|
|
1649
|
-
this.log("Text message sent", { text: input });
|
|
1650
|
-
} catch (error) {
|
|
1651
|
-
this.log("Failed to send text message", error);
|
|
1652
|
-
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1653
|
-
}
|
|
1658
|
+
}, "gemini-live.speak")();
|
|
1654
1659
|
}
|
|
1655
1660
|
/**
|
|
1656
1661
|
* Send audio stream for processing
|
|
1657
1662
|
*/
|
|
1658
1663
|
async send(audioData) {
|
|
1659
|
-
this.
|
|
1660
|
-
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1664
|
+
return this.traced(async () => {
|
|
1665
|
+
this.validateConnectionState();
|
|
1666
|
+
if ("readable" in audioData && typeof audioData.on === "function") {
|
|
1667
|
+
const stream = audioData;
|
|
1668
|
+
stream.on("data", (chunk) => {
|
|
1669
|
+
try {
|
|
1670
|
+
const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
|
|
1671
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1672
|
+
this.sendEvent("realtime_input", message);
|
|
1673
|
+
} catch (error) {
|
|
1674
|
+
this.log("Failed to process audio chunk", error);
|
|
1675
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
|
|
1676
|
+
}
|
|
1677
|
+
});
|
|
1678
|
+
stream.on("error", (error) => {
|
|
1679
|
+
this.log("Audio stream error", error);
|
|
1680
|
+
this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
|
|
1681
|
+
});
|
|
1682
|
+
stream.on("end", () => {
|
|
1683
|
+
this.log("Audio stream ended");
|
|
1684
|
+
});
|
|
1685
|
+
} else {
|
|
1686
|
+
const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
|
|
1687
|
+
const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
|
|
1688
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1689
|
+
this.sendEvent("realtime_input", message);
|
|
1690
|
+
}
|
|
1691
|
+
}, "gemini-live.send")();
|
|
1685
1692
|
}
|
|
1686
1693
|
/**
|
|
1687
1694
|
* Process speech from audio stream (traditional STT interface)
|
|
1688
1695
|
*/
|
|
1689
1696
|
async listen(audioStream, _options) {
|
|
1690
|
-
this.
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
};
|
|
1698
|
-
const onError = (error) => {
|
|
1699
|
-
throw new Error(`Transcription failed: ${error.message}`);
|
|
1700
|
-
};
|
|
1701
|
-
const onSession = (data) => {
|
|
1702
|
-
if (data.state === "disconnected") {
|
|
1703
|
-
throw new Error("Session disconnected during transcription");
|
|
1704
|
-
}
|
|
1705
|
-
};
|
|
1706
|
-
this.on("writing", onWriting);
|
|
1707
|
-
this.on("error", onError);
|
|
1708
|
-
this.on("session", onSession);
|
|
1709
|
-
try {
|
|
1710
|
-
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1711
|
-
audioStream,
|
|
1712
|
-
(base64Audio) => {
|
|
1713
|
-
return new Promise((resolve, reject) => {
|
|
1714
|
-
try {
|
|
1715
|
-
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1716
|
-
const cleanup = () => {
|
|
1717
|
-
this.off("turnComplete", onTurnComplete);
|
|
1718
|
-
this.off("error", onErr);
|
|
1719
|
-
};
|
|
1720
|
-
const onTurnComplete = () => {
|
|
1721
|
-
cleanup();
|
|
1722
|
-
resolve(transcriptionText.trim());
|
|
1723
|
-
};
|
|
1724
|
-
const onErr = (e) => {
|
|
1725
|
-
cleanup();
|
|
1726
|
-
reject(new Error(e.message));
|
|
1727
|
-
};
|
|
1728
|
-
this.on("turnComplete", onTurnComplete);
|
|
1729
|
-
this.on("error", onErr);
|
|
1730
|
-
this.sendEvent("client_content", message);
|
|
1731
|
-
this.log("Sent audio for transcription");
|
|
1732
|
-
} catch (err) {
|
|
1733
|
-
reject(err);
|
|
1734
|
-
}
|
|
1735
|
-
});
|
|
1736
|
-
},
|
|
1737
|
-
(error) => {
|
|
1738
|
-
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1697
|
+
return this.traced(async () => {
|
|
1698
|
+
this.validateConnectionState();
|
|
1699
|
+
let transcriptionText = "";
|
|
1700
|
+
const onWriting = (data) => {
|
|
1701
|
+
if (data.role === "user") {
|
|
1702
|
+
transcriptionText += data.text;
|
|
1703
|
+
this.log("Received transcription text:", { text: data.text, total: transcriptionText });
|
|
1739
1704
|
}
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
1744
|
-
|
|
1745
|
-
|
|
1746
|
-
|
|
1705
|
+
};
|
|
1706
|
+
const onError = (error) => {
|
|
1707
|
+
throw new Error(`Transcription failed: ${error.message}`);
|
|
1708
|
+
};
|
|
1709
|
+
const onSession = (data) => {
|
|
1710
|
+
if (data.state === "disconnected") {
|
|
1711
|
+
throw new Error("Session disconnected during transcription");
|
|
1712
|
+
}
|
|
1713
|
+
};
|
|
1714
|
+
this.on("writing", onWriting);
|
|
1715
|
+
this.on("error", onError);
|
|
1716
|
+
this.on("session", onSession);
|
|
1717
|
+
try {
|
|
1718
|
+
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1719
|
+
audioStream,
|
|
1720
|
+
(base64Audio) => {
|
|
1721
|
+
return new Promise((resolve, reject) => {
|
|
1722
|
+
try {
|
|
1723
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1724
|
+
const cleanup = () => {
|
|
1725
|
+
this.off("turnComplete", onTurnComplete);
|
|
1726
|
+
this.off("error", onErr);
|
|
1727
|
+
};
|
|
1728
|
+
const onTurnComplete = () => {
|
|
1729
|
+
cleanup();
|
|
1730
|
+
resolve(transcriptionText.trim());
|
|
1731
|
+
};
|
|
1732
|
+
const onErr = (e) => {
|
|
1733
|
+
cleanup();
|
|
1734
|
+
reject(new Error(e.message));
|
|
1735
|
+
};
|
|
1736
|
+
this.on("turnComplete", onTurnComplete);
|
|
1737
|
+
this.on("error", onErr);
|
|
1738
|
+
this.sendEvent("client_content", message);
|
|
1739
|
+
this.log("Sent audio for transcription");
|
|
1740
|
+
} catch (err) {
|
|
1741
|
+
reject(err);
|
|
1742
|
+
}
|
|
1743
|
+
});
|
|
1744
|
+
},
|
|
1745
|
+
(error) => {
|
|
1746
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1747
|
+
}
|
|
1748
|
+
);
|
|
1749
|
+
return result;
|
|
1750
|
+
} finally {
|
|
1751
|
+
this.off("writing", onWriting);
|
|
1752
|
+
this.off("error", onError);
|
|
1753
|
+
this.off("session", onSession);
|
|
1754
|
+
}
|
|
1755
|
+
}, "gemini-live.listen")();
|
|
1747
1756
|
}
|
|
1748
1757
|
/**
|
|
1749
1758
|
* Get available speakers/voices
|
|
1750
1759
|
*/
|
|
1751
1760
|
async getSpeakers() {
|
|
1752
|
-
return
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
|
|
1756
|
-
|
|
1757
|
-
|
|
1761
|
+
return this.traced(async () => {
|
|
1762
|
+
return [
|
|
1763
|
+
{ voiceId: "Puck", description: "Conversational, friendly" },
|
|
1764
|
+
{ voiceId: "Charon", description: "Deep, authoritative" },
|
|
1765
|
+
{ voiceId: "Kore", description: "Neutral, professional" },
|
|
1766
|
+
{ voiceId: "Fenrir", description: "Warm, approachable" }
|
|
1767
|
+
];
|
|
1768
|
+
}, "gemini-live.getSpeakers")();
|
|
1758
1769
|
}
|
|
1759
1770
|
/**
|
|
1760
1771
|
* Resume a previous session using a session handle
|
|
@@ -2259,6 +2270,18 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2259
2270
|
role: "assistant"
|
|
2260
2271
|
});
|
|
2261
2272
|
}
|
|
2273
|
+
if (part.functionCall) {
|
|
2274
|
+
this.log("Found function call in serverContent.modelTurn.parts", part.functionCall);
|
|
2275
|
+
const toolCallData = {
|
|
2276
|
+
toolCall: {
|
|
2277
|
+
name: part.functionCall.name,
|
|
2278
|
+
args: part.functionCall.args || {},
|
|
2279
|
+
id: part.functionCall.id || crypto.randomUUID()
|
|
2280
|
+
}
|
|
2281
|
+
};
|
|
2282
|
+
void this.handleToolCall(toolCallData);
|
|
2283
|
+
continue;
|
|
2284
|
+
}
|
|
2262
2285
|
if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
|
|
2263
2286
|
try {
|
|
2264
2287
|
const audioData = part.inlineData.data;
|
|
@@ -2333,9 +2356,24 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2333
2356
|
if (!data.toolCall) {
|
|
2334
2357
|
return;
|
|
2335
2358
|
}
|
|
2336
|
-
|
|
2337
|
-
|
|
2338
|
-
|
|
2359
|
+
let toolCalls = [];
|
|
2360
|
+
if (data.toolCall.functionCalls && Array.isArray(data.toolCall.functionCalls)) {
|
|
2361
|
+
toolCalls = data.toolCall.functionCalls;
|
|
2362
|
+
} else if (data.toolCall.name) {
|
|
2363
|
+
toolCalls = [{ name: data.toolCall.name, args: data.toolCall.args, id: data.toolCall.id }];
|
|
2364
|
+
}
|
|
2365
|
+
for (const toolCall of toolCalls) {
|
|
2366
|
+
const toolName = toolCall.name || "";
|
|
2367
|
+
const toolArgs = toolCall.args || {};
|
|
2368
|
+
const toolId = toolCall.id || crypto.randomUUID();
|
|
2369
|
+
await this.processSingleToolCall(toolName, toolArgs, toolId);
|
|
2370
|
+
}
|
|
2371
|
+
}
|
|
2372
|
+
/**
|
|
2373
|
+
* Process a single tool call
|
|
2374
|
+
* @private
|
|
2375
|
+
*/
|
|
2376
|
+
async processSingleToolCall(toolName, toolArgs, toolId) {
|
|
2339
2377
|
this.log("Processing tool call", { toolName, toolArgs, toolId });
|
|
2340
2378
|
this.emit("toolCall", {
|
|
2341
2379
|
name: toolName,
|
|
@@ -2356,7 +2394,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2356
2394
|
if (tool.execute) {
|
|
2357
2395
|
this.log("Executing tool", { toolName, toolArgs });
|
|
2358
2396
|
result = await tool.execute(
|
|
2359
|
-
{ context: toolArgs,
|
|
2397
|
+
{ context: toolArgs, runtimeContext: this.runtimeContext },
|
|
2360
2398
|
{
|
|
2361
2399
|
toolCallId: toolId,
|
|
2362
2400
|
messages: []
|
|
@@ -2368,23 +2406,31 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2368
2406
|
result = { error: "Tool has no execute function" };
|
|
2369
2407
|
}
|
|
2370
2408
|
const toolResultMessage = {
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2409
|
+
toolResponse: {
|
|
2410
|
+
functionResponses: [
|
|
2411
|
+
{
|
|
2412
|
+
id: toolId,
|
|
2413
|
+
response: result
|
|
2414
|
+
}
|
|
2415
|
+
]
|
|
2374
2416
|
}
|
|
2375
2417
|
};
|
|
2376
|
-
this.sendEvent("
|
|
2418
|
+
this.sendEvent("toolResponse", toolResultMessage);
|
|
2377
2419
|
this.log("Tool result sent", { toolName, toolId, result });
|
|
2378
2420
|
} catch (error) {
|
|
2379
2421
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
2380
2422
|
this.log("Tool execution failed", { toolName, error: errorMessage });
|
|
2381
2423
|
const errorResultMessage = {
|
|
2382
|
-
|
|
2383
|
-
|
|
2384
|
-
|
|
2424
|
+
toolResponse: {
|
|
2425
|
+
functionResponses: [
|
|
2426
|
+
{
|
|
2427
|
+
id: toolId,
|
|
2428
|
+
response: { error: errorMessage }
|
|
2429
|
+
}
|
|
2430
|
+
]
|
|
2385
2431
|
}
|
|
2386
2432
|
};
|
|
2387
|
-
this.sendEvent("
|
|
2433
|
+
this.sendEvent("toolResponse", errorResultMessage);
|
|
2388
2434
|
this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
|
|
2389
2435
|
toolName,
|
|
2390
2436
|
toolArgs,
|
|
@@ -2444,6 +2490,31 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2444
2490
|
}
|
|
2445
2491
|
return "text";
|
|
2446
2492
|
}
|
|
2493
|
+
/**
|
|
2494
|
+
* Resolve Vertex AI location with sensible default
|
|
2495
|
+
* @private
|
|
2496
|
+
*/
|
|
2497
|
+
getVertexLocation() {
|
|
2498
|
+
return this.options.location?.trim() || "us-central1";
|
|
2499
|
+
}
|
|
2500
|
+
/**
|
|
2501
|
+
* Resolve the correct model identifier for Gemini API or Vertex AI
|
|
2502
|
+
* @private
|
|
2503
|
+
*/
|
|
2504
|
+
resolveModelIdentifier() {
|
|
2505
|
+
const model = this.options.model ?? DEFAULT_MODEL;
|
|
2506
|
+
if (!this.options.vertexAI) {
|
|
2507
|
+
return `models/${model}`;
|
|
2508
|
+
}
|
|
2509
|
+
if (!this.options.project) {
|
|
2510
|
+
throw this.createAndEmitError(
|
|
2511
|
+
"project_id_missing" /* PROJECT_ID_MISSING */,
|
|
2512
|
+
"Google Cloud project ID is required when using Vertex AI."
|
|
2513
|
+
);
|
|
2514
|
+
}
|
|
2515
|
+
const location = this.getVertexLocation();
|
|
2516
|
+
return `projects/${this.options.project}/locations/${location}/publishers/google/models/${model}`;
|
|
2517
|
+
}
|
|
2447
2518
|
/**
|
|
2448
2519
|
* Send initial configuration to Gemini Live API
|
|
2449
2520
|
* @private
|
|
@@ -2454,7 +2525,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2454
2525
|
}
|
|
2455
2526
|
const setupMessage = {
|
|
2456
2527
|
setup: {
|
|
2457
|
-
model:
|
|
2528
|
+
model: this.resolveModelIdentifier()
|
|
2458
2529
|
}
|
|
2459
2530
|
};
|
|
2460
2531
|
if (this.options.instructions) {
|
|
@@ -2603,6 +2674,8 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2603
2674
|
message = data;
|
|
2604
2675
|
} else if (type === "realtime_input" && data.realtime_input) {
|
|
2605
2676
|
message = data;
|
|
2677
|
+
} else if (type === "toolResponse" && data.toolResponse) {
|
|
2678
|
+
message = data;
|
|
2606
2679
|
} else if (type === "session.update" && data.session) {
|
|
2607
2680
|
message = data;
|
|
2608
2681
|
} else {
|
|
@@ -2653,7 +2726,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends voice.MastraVoice {
|
|
|
2653
2726
|
* Get the current tools configured for this voice instance
|
|
2654
2727
|
* @returns Object containing the current tools
|
|
2655
2728
|
*/
|
|
2656
|
-
|
|
2729
|
+
getTools() {
|
|
2657
2730
|
return this.tools;
|
|
2658
2731
|
}
|
|
2659
2732
|
log(message, ...args) {
|