@mastra/voice-google-gemini-live 0.0.0-issue-7087-20250910004053 → 0.0.0-jail-fs-20260105160110
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +292 -3
- package/README.md +3 -3
- package/dist/docs/README.md +31 -0
- package/dist/docs/SKILL.md +32 -0
- package/dist/docs/SOURCE_MAP.json +6 -0
- package/dist/docs/voice/01-overview.md +1019 -0
- package/dist/docs/voice/02-speech-to-speech.md +106 -0
- package/dist/docs/voice/03-reference.md +303 -0
- package/dist/index.cjs +275 -228
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +23 -8
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +275 -228
- package/dist/index.js.map +1 -1
- package/dist/managers/AudioStreamManager.d.ts +2 -2
- package/dist/managers/AudioStreamManager.d.ts.map +1 -1
- package/dist/managers/EventManager.d.ts +1 -1
- package/dist/managers/EventManager.d.ts.map +1 -1
- package/dist/managers/SessionManager.d.ts +1 -1
- package/dist/managers/SessionManager.d.ts.map +1 -1
- package/dist/managers/index.d.ts +11 -11
- package/dist/types.d.ts +10 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/utils/errors.d.ts +1 -1
- package/package.json +15 -10
package/dist/index.js
CHANGED
|
@@ -562,7 +562,7 @@ var AudioStreamManager = class {
|
|
|
562
562
|
*/
|
|
563
563
|
log(message, ...args) {
|
|
564
564
|
if (this.debug) {
|
|
565
|
-
console.
|
|
565
|
+
console.info(`[AudioStreamManager] ${message}`, ...args);
|
|
566
566
|
}
|
|
567
567
|
}
|
|
568
568
|
/**
|
|
@@ -841,7 +841,7 @@ var ConnectionManager = class {
|
|
|
841
841
|
*/
|
|
842
842
|
log(message, ...args) {
|
|
843
843
|
if (this.debug) {
|
|
844
|
-
console.
|
|
844
|
+
console.info(`[ConnectionManager] ${message}`, ...args);
|
|
845
845
|
}
|
|
846
846
|
}
|
|
847
847
|
};
|
|
@@ -1115,7 +1115,7 @@ var AuthManager = class {
|
|
|
1115
1115
|
*/
|
|
1116
1116
|
log(message, ...args) {
|
|
1117
1117
|
if (this.config.debug) {
|
|
1118
|
-
console.
|
|
1118
|
+
console.info(`[AuthManager] ${message}`, ...args);
|
|
1119
1119
|
}
|
|
1120
1120
|
}
|
|
1121
1121
|
};
|
|
@@ -1231,7 +1231,7 @@ var EventManager = class {
|
|
|
1231
1231
|
*/
|
|
1232
1232
|
log(message, ...args) {
|
|
1233
1233
|
if (this.debug) {
|
|
1234
|
-
console.
|
|
1234
|
+
console.info(`[EventManager] ${message}`, ...args);
|
|
1235
1235
|
}
|
|
1236
1236
|
}
|
|
1237
1237
|
};
|
|
@@ -1260,7 +1260,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
1260
1260
|
sessionDurationTimeout;
|
|
1261
1261
|
// Tool integration properties
|
|
1262
1262
|
tools;
|
|
1263
|
-
|
|
1263
|
+
requestContext;
|
|
1264
1264
|
// Store the configuration options
|
|
1265
1265
|
options;
|
|
1266
1266
|
/**
|
|
@@ -1497,69 +1497,68 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
1497
1497
|
/**
|
|
1498
1498
|
* Establish connection to the Gemini Live API
|
|
1499
1499
|
*/
|
|
1500
|
-
async connect({
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
}
|
|
1524
|
-
};
|
|
1525
|
-
this.log("Using Live API authentication with API key");
|
|
1526
|
-
}
|
|
1527
|
-
this.log("Connecting to:", wsUrl);
|
|
1528
|
-
this.ws = new WebSocket(wsUrl, void 0, headers);
|
|
1529
|
-
this.connectionManager.setWebSocket(this.ws);
|
|
1530
|
-
this.setupEventListeners();
|
|
1531
|
-
await this.connectionManager.waitForOpen();
|
|
1532
|
-
if (this.isResuming && this.sessionHandle) {
|
|
1533
|
-
await this.sendSessionResumption();
|
|
1534
|
-
} else {
|
|
1535
|
-
this.sendInitialConfig();
|
|
1536
|
-
this.sessionStartTime = Date.now();
|
|
1537
|
-
this.sessionId = randomUUID();
|
|
1538
|
-
}
|
|
1539
|
-
await this.waitForSessionCreated();
|
|
1540
|
-
this.state = "connected";
|
|
1541
|
-
this.emit("session", {
|
|
1542
|
-
state: "connected",
|
|
1543
|
-
config: {
|
|
1544
|
-
sessionId: this.sessionId,
|
|
1545
|
-
isResuming: this.isResuming,
|
|
1546
|
-
toolCount: Object.keys(this.tools || {}).length
|
|
1500
|
+
async connect({ requestContext } = {}) {
|
|
1501
|
+
if (this.state === "connected") {
|
|
1502
|
+
this.log("Already connected to Gemini Live API");
|
|
1503
|
+
return;
|
|
1504
|
+
}
|
|
1505
|
+
this.requestContext = requestContext;
|
|
1506
|
+
this.emit("session", { state: "connecting" });
|
|
1507
|
+
try {
|
|
1508
|
+
let wsUrl;
|
|
1509
|
+
let headers = {};
|
|
1510
|
+
if (this.options.vertexAI) {
|
|
1511
|
+
const location = this.getVertexLocation();
|
|
1512
|
+
wsUrl = `wss://${location}-aiplatform.googleapis.com/ws/google.cloud.aiplatform.v1beta1.LlmBidiService/BidiGenerateContent`;
|
|
1513
|
+
await this.authManager.initialize();
|
|
1514
|
+
const accessToken = await this.authManager.getAccessToken();
|
|
1515
|
+
headers = { headers: { Authorization: `Bearer ${accessToken}` } };
|
|
1516
|
+
this.log("Using Vertex AI authentication with OAuth token");
|
|
1517
|
+
} else {
|
|
1518
|
+
wsUrl = `wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContent`;
|
|
1519
|
+
headers = {
|
|
1520
|
+
headers: {
|
|
1521
|
+
"x-goog-api-key": this.options.apiKey || "",
|
|
1522
|
+
"Content-Type": "application/json"
|
|
1547
1523
|
}
|
|
1548
|
-
}
|
|
1549
|
-
this.log("
|
|
1524
|
+
};
|
|
1525
|
+
this.log("Using Live API authentication with API key");
|
|
1526
|
+
}
|
|
1527
|
+
this.log("Connecting to:", wsUrl);
|
|
1528
|
+
this.ws = new WebSocket(wsUrl, void 0, headers);
|
|
1529
|
+
this.connectionManager.setWebSocket(this.ws);
|
|
1530
|
+
this.setupEventListeners();
|
|
1531
|
+
await this.connectionManager.waitForOpen();
|
|
1532
|
+
if (this.isResuming && this.sessionHandle) {
|
|
1533
|
+
await this.sendSessionResumption();
|
|
1534
|
+
} else {
|
|
1535
|
+
this.sendInitialConfig();
|
|
1536
|
+
this.sessionStartTime = Date.now();
|
|
1537
|
+
this.sessionId = randomUUID();
|
|
1538
|
+
}
|
|
1539
|
+
await this.waitForSessionCreated();
|
|
1540
|
+
this.state = "connected";
|
|
1541
|
+
this.emit("session", {
|
|
1542
|
+
state: "connected",
|
|
1543
|
+
config: {
|
|
1550
1544
|
sessionId: this.sessionId,
|
|
1551
1545
|
isResuming: this.isResuming,
|
|
1552
1546
|
toolCount: Object.keys(this.tools || {}).length
|
|
1553
|
-
});
|
|
1554
|
-
if (this.options.sessionConfig?.maxDuration) {
|
|
1555
|
-
this.startSessionDurationMonitor();
|
|
1556
1547
|
}
|
|
1557
|
-
}
|
|
1558
|
-
|
|
1559
|
-
this.
|
|
1560
|
-
|
|
1548
|
+
});
|
|
1549
|
+
this.log("Successfully connected to Gemini Live API", {
|
|
1550
|
+
sessionId: this.sessionId,
|
|
1551
|
+
isResuming: this.isResuming,
|
|
1552
|
+
toolCount: Object.keys(this.tools || {}).length
|
|
1553
|
+
});
|
|
1554
|
+
if (this.options.sessionConfig?.maxDuration) {
|
|
1555
|
+
this.startSessionDurationMonitor();
|
|
1561
1556
|
}
|
|
1562
|
-
}
|
|
1557
|
+
} catch (error) {
|
|
1558
|
+
this.state = "disconnected";
|
|
1559
|
+
this.log("Connection failed", error);
|
|
1560
|
+
throw error;
|
|
1561
|
+
}
|
|
1563
1562
|
}
|
|
1564
1563
|
/**
|
|
1565
1564
|
* Disconnect from the Gemini Live API
|
|
@@ -1597,172 +1596,164 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
1597
1596
|
* Send text to be converted to speech
|
|
1598
1597
|
*/
|
|
1599
1598
|
async speak(input, options) {
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1606
|
-
}
|
|
1607
|
-
input = Buffer.concat(chunks).toString("utf-8");
|
|
1608
|
-
}
|
|
1609
|
-
if (input.trim().length === 0) {
|
|
1610
|
-
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1599
|
+
this.validateConnectionState();
|
|
1600
|
+
if (typeof input !== "string") {
|
|
1601
|
+
const chunks = [];
|
|
1602
|
+
for await (const chunk of input) {
|
|
1603
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
1611
1604
|
}
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1624
|
-
|
|
1625
|
-
|
|
1626
|
-
}
|
|
1627
|
-
};
|
|
1628
|
-
if (options && (options.speaker || options.languageCode || options.responseModalities)) {
|
|
1629
|
-
const updateMessage = {
|
|
1630
|
-
type: "session.update",
|
|
1631
|
-
session: {
|
|
1632
|
-
generation_config: {
|
|
1633
|
-
...options.responseModalities ? { response_modalities: options.responseModalities } : {},
|
|
1634
|
-
speech_config: {
|
|
1635
|
-
...options.languageCode ? { language_code: options.languageCode } : {},
|
|
1636
|
-
...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
|
|
1605
|
+
input = Buffer.concat(chunks).toString("utf-8");
|
|
1606
|
+
}
|
|
1607
|
+
if (input.trim().length === 0) {
|
|
1608
|
+
throw this.createAndEmitError("invalid_audio_format" /* INVALID_AUDIO_FORMAT */, "Input text is empty");
|
|
1609
|
+
}
|
|
1610
|
+
this.addToContext("user", input);
|
|
1611
|
+
const textMessage = {
|
|
1612
|
+
client_content: {
|
|
1613
|
+
turns: [
|
|
1614
|
+
{
|
|
1615
|
+
role: "user",
|
|
1616
|
+
parts: [
|
|
1617
|
+
{
|
|
1618
|
+
text: input
|
|
1637
1619
|
}
|
|
1620
|
+
]
|
|
1621
|
+
}
|
|
1622
|
+
],
|
|
1623
|
+
turnComplete: true
|
|
1624
|
+
}
|
|
1625
|
+
};
|
|
1626
|
+
if (options && (options.speaker || options.languageCode || options.responseModalities)) {
|
|
1627
|
+
const updateMessage = {
|
|
1628
|
+
type: "session.update",
|
|
1629
|
+
session: {
|
|
1630
|
+
generation_config: {
|
|
1631
|
+
...options.responseModalities ? { response_modalities: options.responseModalities } : {},
|
|
1632
|
+
speech_config: {
|
|
1633
|
+
...options.languageCode ? { language_code: options.languageCode } : {},
|
|
1634
|
+
...options.speaker ? { voice_config: { prebuilt_voice_config: { voice_name: options.speaker } } } : {}
|
|
1638
1635
|
}
|
|
1639
1636
|
}
|
|
1640
|
-
};
|
|
1641
|
-
try {
|
|
1642
|
-
this.sendEvent("session.update", updateMessage);
|
|
1643
|
-
this.log("Applied per-turn runtime options", options);
|
|
1644
|
-
} catch (error) {
|
|
1645
|
-
this.log("Failed to apply per-turn runtime options", error);
|
|
1646
1637
|
}
|
|
1647
|
-
}
|
|
1638
|
+
};
|
|
1648
1639
|
try {
|
|
1649
|
-
this.sendEvent("
|
|
1650
|
-
this.log("
|
|
1640
|
+
this.sendEvent("session.update", updateMessage);
|
|
1641
|
+
this.log("Applied per-turn runtime options", options);
|
|
1651
1642
|
} catch (error) {
|
|
1652
|
-
this.log("Failed to
|
|
1653
|
-
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1643
|
+
this.log("Failed to apply per-turn runtime options", error);
|
|
1654
1644
|
}
|
|
1655
|
-
}
|
|
1645
|
+
}
|
|
1646
|
+
try {
|
|
1647
|
+
this.sendEvent("client_content", textMessage);
|
|
1648
|
+
this.log("Text message sent", { text: input });
|
|
1649
|
+
} catch (error) {
|
|
1650
|
+
this.log("Failed to send text message", error);
|
|
1651
|
+
throw this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to send text message", error);
|
|
1652
|
+
}
|
|
1656
1653
|
}
|
|
1657
1654
|
/**
|
|
1658
1655
|
* Send audio stream for processing
|
|
1659
1656
|
*/
|
|
1660
1657
|
async send(audioData) {
|
|
1661
|
-
|
|
1662
|
-
|
|
1663
|
-
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1667
|
-
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1682
|
-
|
|
1683
|
-
|
|
1684
|
-
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
}
|
|
1688
|
-
}, "gemini-live.send")();
|
|
1658
|
+
this.validateConnectionState();
|
|
1659
|
+
if ("readable" in audioData && typeof audioData.on === "function") {
|
|
1660
|
+
const stream = audioData;
|
|
1661
|
+
stream.on("data", (chunk) => {
|
|
1662
|
+
try {
|
|
1663
|
+
const base64Audio = this.audioStreamManager.processAudioChunk(chunk);
|
|
1664
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1665
|
+
this.sendEvent("realtime_input", message);
|
|
1666
|
+
} catch (error) {
|
|
1667
|
+
this.log("Failed to process audio chunk", error);
|
|
1668
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Failed to process audio chunk", error);
|
|
1669
|
+
}
|
|
1670
|
+
});
|
|
1671
|
+
stream.on("error", (error) => {
|
|
1672
|
+
this.log("Audio stream error", error);
|
|
1673
|
+
this.createAndEmitError("audio_stream_error" /* AUDIO_STREAM_ERROR */, "Audio stream error", error);
|
|
1674
|
+
});
|
|
1675
|
+
stream.on("end", () => {
|
|
1676
|
+
this.log("Audio stream ended");
|
|
1677
|
+
});
|
|
1678
|
+
} else {
|
|
1679
|
+
const validateAudio = this.audioStreamManager.validateAndConvertAudioInput(audioData);
|
|
1680
|
+
const base64Audio = this.audioStreamManager.int16ArrayToBase64(validateAudio);
|
|
1681
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "realtime");
|
|
1682
|
+
this.sendEvent("realtime_input", message);
|
|
1683
|
+
}
|
|
1689
1684
|
}
|
|
1690
1685
|
/**
|
|
1691
1686
|
* Process speech from audio stream (traditional STT interface)
|
|
1692
1687
|
*/
|
|
1693
1688
|
async listen(audioStream, _options) {
|
|
1694
|
-
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
this.log("Received transcription text:", { text: data.text, total: transcriptionText });
|
|
1701
|
-
}
|
|
1702
|
-
};
|
|
1703
|
-
const onError = (error) => {
|
|
1704
|
-
throw new Error(`Transcription failed: ${error.message}`);
|
|
1705
|
-
};
|
|
1706
|
-
const onSession = (data) => {
|
|
1707
|
-
if (data.state === "disconnected") {
|
|
1708
|
-
throw new Error("Session disconnected during transcription");
|
|
1709
|
-
}
|
|
1710
|
-
};
|
|
1711
|
-
this.on("writing", onWriting);
|
|
1712
|
-
this.on("error", onError);
|
|
1713
|
-
this.on("session", onSession);
|
|
1714
|
-
try {
|
|
1715
|
-
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1716
|
-
audioStream,
|
|
1717
|
-
(base64Audio) => {
|
|
1718
|
-
return new Promise((resolve, reject) => {
|
|
1719
|
-
try {
|
|
1720
|
-
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1721
|
-
const cleanup = () => {
|
|
1722
|
-
this.off("turnComplete", onTurnComplete);
|
|
1723
|
-
this.off("error", onErr);
|
|
1724
|
-
};
|
|
1725
|
-
const onTurnComplete = () => {
|
|
1726
|
-
cleanup();
|
|
1727
|
-
resolve(transcriptionText.trim());
|
|
1728
|
-
};
|
|
1729
|
-
const onErr = (e) => {
|
|
1730
|
-
cleanup();
|
|
1731
|
-
reject(new Error(e.message));
|
|
1732
|
-
};
|
|
1733
|
-
this.on("turnComplete", onTurnComplete);
|
|
1734
|
-
this.on("error", onErr);
|
|
1735
|
-
this.sendEvent("client_content", message);
|
|
1736
|
-
this.log("Sent audio for transcription");
|
|
1737
|
-
} catch (err) {
|
|
1738
|
-
reject(err);
|
|
1739
|
-
}
|
|
1740
|
-
});
|
|
1741
|
-
},
|
|
1742
|
-
(error) => {
|
|
1743
|
-
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1744
|
-
}
|
|
1745
|
-
);
|
|
1746
|
-
return result;
|
|
1747
|
-
} finally {
|
|
1748
|
-
this.off("writing", onWriting);
|
|
1749
|
-
this.off("error", onError);
|
|
1750
|
-
this.off("session", onSession);
|
|
1689
|
+
this.validateConnectionState();
|
|
1690
|
+
let transcriptionText = "";
|
|
1691
|
+
const onWriting = (data) => {
|
|
1692
|
+
if (data.role === "user") {
|
|
1693
|
+
transcriptionText += data.text;
|
|
1694
|
+
this.log("Received transcription text:", { text: data.text, total: transcriptionText });
|
|
1751
1695
|
}
|
|
1752
|
-
}
|
|
1696
|
+
};
|
|
1697
|
+
const onError = (error) => {
|
|
1698
|
+
throw new Error(`Transcription failed: ${error.message}`);
|
|
1699
|
+
};
|
|
1700
|
+
const onSession = (data) => {
|
|
1701
|
+
if (data.state === "disconnected") {
|
|
1702
|
+
throw new Error("Session disconnected during transcription");
|
|
1703
|
+
}
|
|
1704
|
+
};
|
|
1705
|
+
this.on("writing", onWriting);
|
|
1706
|
+
this.on("error", onError);
|
|
1707
|
+
this.on("session", onSession);
|
|
1708
|
+
try {
|
|
1709
|
+
const result = await this.audioStreamManager.handleAudioTranscription(
|
|
1710
|
+
audioStream,
|
|
1711
|
+
(base64Audio) => {
|
|
1712
|
+
return new Promise((resolve, reject) => {
|
|
1713
|
+
try {
|
|
1714
|
+
const message = this.audioStreamManager.createAudioMessage(base64Audio, "input");
|
|
1715
|
+
const cleanup = () => {
|
|
1716
|
+
this.off("turnComplete", onTurnComplete);
|
|
1717
|
+
this.off("error", onErr);
|
|
1718
|
+
};
|
|
1719
|
+
const onTurnComplete = () => {
|
|
1720
|
+
cleanup();
|
|
1721
|
+
resolve(transcriptionText.trim());
|
|
1722
|
+
};
|
|
1723
|
+
const onErr = (e) => {
|
|
1724
|
+
cleanup();
|
|
1725
|
+
reject(new Error(e.message));
|
|
1726
|
+
};
|
|
1727
|
+
this.on("turnComplete", onTurnComplete);
|
|
1728
|
+
this.on("error", onErr);
|
|
1729
|
+
this.sendEvent("client_content", message);
|
|
1730
|
+
this.log("Sent audio for transcription");
|
|
1731
|
+
} catch (err) {
|
|
1732
|
+
reject(err);
|
|
1733
|
+
}
|
|
1734
|
+
});
|
|
1735
|
+
},
|
|
1736
|
+
(error) => {
|
|
1737
|
+
this.createAndEmitError("audio_processing_error" /* AUDIO_PROCESSING_ERROR */, "Audio transcription failed", error);
|
|
1738
|
+
}
|
|
1739
|
+
);
|
|
1740
|
+
return result;
|
|
1741
|
+
} finally {
|
|
1742
|
+
this.off("writing", onWriting);
|
|
1743
|
+
this.off("error", onError);
|
|
1744
|
+
this.off("session", onSession);
|
|
1745
|
+
}
|
|
1753
1746
|
}
|
|
1754
1747
|
/**
|
|
1755
1748
|
* Get available speakers/voices
|
|
1756
1749
|
*/
|
|
1757
1750
|
async getSpeakers() {
|
|
1758
|
-
return
|
|
1759
|
-
|
|
1760
|
-
|
|
1761
|
-
|
|
1762
|
-
|
|
1763
|
-
|
|
1764
|
-
];
|
|
1765
|
-
}, "gemini-live.getSpeakers")();
|
|
1751
|
+
return [
|
|
1752
|
+
{ voiceId: "Puck", description: "Conversational, friendly" },
|
|
1753
|
+
{ voiceId: "Charon", description: "Deep, authoritative" },
|
|
1754
|
+
{ voiceId: "Kore", description: "Neutral, professional" },
|
|
1755
|
+
{ voiceId: "Fenrir", description: "Warm, approachable" }
|
|
1756
|
+
];
|
|
1766
1757
|
}
|
|
1767
1758
|
/**
|
|
1768
1759
|
* Resume a previous session using a session handle
|
|
@@ -2267,6 +2258,18 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2267
2258
|
role: "assistant"
|
|
2268
2259
|
});
|
|
2269
2260
|
}
|
|
2261
|
+
if (part.functionCall) {
|
|
2262
|
+
this.log("Found function call in serverContent.modelTurn.parts", part.functionCall);
|
|
2263
|
+
const toolCallData = {
|
|
2264
|
+
toolCall: {
|
|
2265
|
+
name: part.functionCall.name,
|
|
2266
|
+
args: part.functionCall.args || {},
|
|
2267
|
+
id: part.functionCall.id || randomUUID()
|
|
2268
|
+
}
|
|
2269
|
+
};
|
|
2270
|
+
void this.handleToolCall(toolCallData);
|
|
2271
|
+
continue;
|
|
2272
|
+
}
|
|
2270
2273
|
if (part.inlineData?.mimeType?.includes("audio") && typeof part.inlineData.data === "string") {
|
|
2271
2274
|
try {
|
|
2272
2275
|
const audioData = part.inlineData.data;
|
|
@@ -2341,9 +2344,24 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2341
2344
|
if (!data.toolCall) {
|
|
2342
2345
|
return;
|
|
2343
2346
|
}
|
|
2344
|
-
|
|
2345
|
-
|
|
2346
|
-
|
|
2347
|
+
let toolCalls = [];
|
|
2348
|
+
if (data.toolCall.functionCalls && Array.isArray(data.toolCall.functionCalls)) {
|
|
2349
|
+
toolCalls = data.toolCall.functionCalls;
|
|
2350
|
+
} else if (data.toolCall.name) {
|
|
2351
|
+
toolCalls = [{ name: data.toolCall.name, args: data.toolCall.args, id: data.toolCall.id }];
|
|
2352
|
+
}
|
|
2353
|
+
for (const toolCall of toolCalls) {
|
|
2354
|
+
const toolName = toolCall.name || "";
|
|
2355
|
+
const toolArgs = toolCall.args || {};
|
|
2356
|
+
const toolId = toolCall.id || randomUUID();
|
|
2357
|
+
await this.processSingleToolCall(toolName, toolArgs, toolId);
|
|
2358
|
+
}
|
|
2359
|
+
}
|
|
2360
|
+
/**
|
|
2361
|
+
* Process a single tool call
|
|
2362
|
+
* @private
|
|
2363
|
+
*/
|
|
2364
|
+
async processSingleToolCall(toolName, toolArgs, toolId) {
|
|
2347
2365
|
this.log("Processing tool call", { toolName, toolArgs, toolId });
|
|
2348
2366
|
this.emit("toolCall", {
|
|
2349
2367
|
name: toolName,
|
|
@@ -2363,36 +2381,38 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2363
2381
|
let result;
|
|
2364
2382
|
if (tool.execute) {
|
|
2365
2383
|
this.log("Executing tool", { toolName, toolArgs });
|
|
2366
|
-
result = await tool.execute(
|
|
2367
|
-
{ context: toolArgs, runtimeContext: this.runtimeContext },
|
|
2368
|
-
{
|
|
2369
|
-
toolCallId: toolId,
|
|
2370
|
-
messages: []
|
|
2371
|
-
}
|
|
2372
|
-
);
|
|
2384
|
+
result = await tool.execute(toolArgs, { requestContext: this.requestContext });
|
|
2373
2385
|
this.log("Tool executed successfully", { toolName, result });
|
|
2374
2386
|
} else {
|
|
2375
2387
|
this.log("Tool has no execute function", { toolName });
|
|
2376
2388
|
result = { error: "Tool has no execute function" };
|
|
2377
2389
|
}
|
|
2378
2390
|
const toolResultMessage = {
|
|
2379
|
-
|
|
2380
|
-
|
|
2381
|
-
|
|
2391
|
+
toolResponse: {
|
|
2392
|
+
functionResponses: [
|
|
2393
|
+
{
|
|
2394
|
+
id: toolId,
|
|
2395
|
+
response: result
|
|
2396
|
+
}
|
|
2397
|
+
]
|
|
2382
2398
|
}
|
|
2383
2399
|
};
|
|
2384
|
-
this.sendEvent("
|
|
2400
|
+
this.sendEvent("toolResponse", toolResultMessage);
|
|
2385
2401
|
this.log("Tool result sent", { toolName, toolId, result });
|
|
2386
2402
|
} catch (error) {
|
|
2387
2403
|
const errorMessage = error instanceof Error ? error.message : "Unknown error";
|
|
2388
2404
|
this.log("Tool execution failed", { toolName, error: errorMessage });
|
|
2389
2405
|
const errorResultMessage = {
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2406
|
+
toolResponse: {
|
|
2407
|
+
functionResponses: [
|
|
2408
|
+
{
|
|
2409
|
+
id: toolId,
|
|
2410
|
+
response: { error: errorMessage }
|
|
2411
|
+
}
|
|
2412
|
+
]
|
|
2393
2413
|
}
|
|
2394
2414
|
};
|
|
2395
|
-
this.sendEvent("
|
|
2415
|
+
this.sendEvent("toolResponse", errorResultMessage);
|
|
2396
2416
|
this.createAndEmitError("tool_execution_error" /* TOOL_EXECUTION_ERROR */, `Tool execution failed: ${errorMessage}`, {
|
|
2397
2417
|
toolName,
|
|
2398
2418
|
toolArgs,
|
|
@@ -2452,6 +2472,31 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2452
2472
|
}
|
|
2453
2473
|
return "text";
|
|
2454
2474
|
}
|
|
2475
|
+
/**
|
|
2476
|
+
* Resolve Vertex AI location with sensible default
|
|
2477
|
+
* @private
|
|
2478
|
+
*/
|
|
2479
|
+
getVertexLocation() {
|
|
2480
|
+
return this.options.location?.trim() || "us-central1";
|
|
2481
|
+
}
|
|
2482
|
+
/**
|
|
2483
|
+
* Resolve the correct model identifier for Gemini API or Vertex AI
|
|
2484
|
+
* @private
|
|
2485
|
+
*/
|
|
2486
|
+
resolveModelIdentifier() {
|
|
2487
|
+
const model = this.options.model ?? DEFAULT_MODEL;
|
|
2488
|
+
if (!this.options.vertexAI) {
|
|
2489
|
+
return `models/${model}`;
|
|
2490
|
+
}
|
|
2491
|
+
if (!this.options.project) {
|
|
2492
|
+
throw this.createAndEmitError(
|
|
2493
|
+
"project_id_missing" /* PROJECT_ID_MISSING */,
|
|
2494
|
+
"Google Cloud project ID is required when using Vertex AI."
|
|
2495
|
+
);
|
|
2496
|
+
}
|
|
2497
|
+
const location = this.getVertexLocation();
|
|
2498
|
+
return `projects/${this.options.project}/locations/${location}/publishers/google/models/${model}`;
|
|
2499
|
+
}
|
|
2455
2500
|
/**
|
|
2456
2501
|
* Send initial configuration to Gemini Live API
|
|
2457
2502
|
* @private
|
|
@@ -2462,7 +2507,7 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2462
2507
|
}
|
|
2463
2508
|
const setupMessage = {
|
|
2464
2509
|
setup: {
|
|
2465
|
-
model:
|
|
2510
|
+
model: this.resolveModelIdentifier()
|
|
2466
2511
|
}
|
|
2467
2512
|
};
|
|
2468
2513
|
if (this.options.instructions) {
|
|
@@ -2611,6 +2656,8 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2611
2656
|
message = data;
|
|
2612
2657
|
} else if (type === "realtime_input" && data.realtime_input) {
|
|
2613
2658
|
message = data;
|
|
2659
|
+
} else if (type === "toolResponse" && data.toolResponse) {
|
|
2660
|
+
message = data;
|
|
2614
2661
|
} else if (type === "session.update" && data.session) {
|
|
2615
2662
|
message = data;
|
|
2616
2663
|
} else {
|
|
@@ -2636,14 +2683,14 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2636
2683
|
* inputSchema: z.object({
|
|
2637
2684
|
* location: z.string().describe("The city and state, e.g. San Francisco, CA"),
|
|
2638
2685
|
* }),
|
|
2639
|
-
* execute: async (
|
|
2686
|
+
* execute: async (inputData) => {
|
|
2640
2687
|
* // Fetch weather data from an API
|
|
2641
2688
|
* const response = await fetch(
|
|
2642
|
-
* `https://api.weather.com?location=${encodeURIComponent(
|
|
2689
|
+
* `https://api.weather.com?location=${encodeURIComponent(inputData.location)}`,
|
|
2643
2690
|
* );
|
|
2644
2691
|
* const data = await response.json();
|
|
2645
2692
|
* return {
|
|
2646
|
-
* message: `The current temperature in ${
|
|
2693
|
+
* message: `The current temperature in ${inputData.location} is ${data.temperature}°F with ${data.conditions}.`,
|
|
2647
2694
|
* };
|
|
2648
2695
|
* },
|
|
2649
2696
|
* });
|
|
@@ -2661,12 +2708,12 @@ var GeminiLiveVoice = class _GeminiLiveVoice extends MastraVoice {
|
|
|
2661
2708
|
* Get the current tools configured for this voice instance
|
|
2662
2709
|
* @returns Object containing the current tools
|
|
2663
2710
|
*/
|
|
2664
|
-
|
|
2711
|
+
listTools() {
|
|
2665
2712
|
return this.tools;
|
|
2666
2713
|
}
|
|
2667
2714
|
log(message, ...args) {
|
|
2668
2715
|
if (this.debug) {
|
|
2669
|
-
console.
|
|
2716
|
+
console.info(`[GeminiLiveVoice] ${message}`, ...args);
|
|
2670
2717
|
}
|
|
2671
2718
|
}
|
|
2672
2719
|
/**
|