@juspay/neurolink 9.71.0 → 9.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +330 -312
- package/dist/core/constants.d.ts +1 -0
- package/dist/core/constants.js +2 -0
- package/dist/core/toolRouting.d.ts +59 -0
- package/dist/core/toolRouting.js +232 -0
- package/dist/lib/core/constants.d.ts +1 -0
- package/dist/lib/core/constants.js +2 -0
- package/dist/lib/core/toolRouting.d.ts +59 -0
- package/dist/lib/core/toolRouting.js +233 -0
- package/dist/lib/neurolink.d.ts +31 -1
- package/dist/lib/neurolink.js +188 -1
- package/dist/lib/telemetry/attributes.js +3 -1
- package/dist/lib/types/config.d.ts +8 -0
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/livekit.d.ts +134 -0
- package/dist/lib/types/toolRouting.d.ts +91 -0
- package/dist/lib/types/toolRouting.js +19 -0
- package/dist/lib/voice/livekit/brain.js +1 -1
- package/dist/lib/voice/livekit/config.d.ts +12 -1
- package/dist/lib/voice/livekit/config.js +54 -0
- package/dist/lib/voice/livekit/eventBridge.js +4 -4
- package/dist/lib/voice/livekit/index.d.ts +9 -2
- package/dist/lib/voice/livekit/index.js +9 -2
- package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
- package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
- package/dist/lib/voice/livekit/roomContext.js +57 -0
- package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/lib/voice/livekit/roomDispatch.js +31 -0
- package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
- package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/lib/voice/livekit/vertexAuth.js +73 -0
- package/dist/lib/voice/livekit/voiceAgent.js +47 -37
- package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
- package/dist/neurolink.d.ts +31 -1
- package/dist/neurolink.js +188 -1
- package/dist/telemetry/attributes.js +3 -1
- package/dist/types/config.d.ts +8 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/livekit.d.ts +134 -0
- package/dist/types/toolRouting.d.ts +91 -0
- package/dist/types/toolRouting.js +18 -0
- package/dist/voice/livekit/brain.js +1 -1
- package/dist/voice/livekit/config.d.ts +12 -1
- package/dist/voice/livekit/config.js +54 -0
- package/dist/voice/livekit/eventBridge.js +4 -4
- package/dist/voice/livekit/index.d.ts +9 -2
- package/dist/voice/livekit/index.js +9 -2
- package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/voice/livekit/realtimeEventBridge.js +160 -0
- package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/voice/livekit/realtimeMcpTools.js +193 -0
- package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
- package/dist/voice/livekit/roomContext.d.ts +23 -0
- package/dist/voice/livekit/roomContext.js +56 -0
- package/dist/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/voice/livekit/roomDispatch.js +30 -0
- package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/voice/livekit/schemaSanitizer.js +143 -0
- package/dist/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/voice/livekit/vertexAuth.js +72 -0
- package/dist/voice/livekit/voiceAgent.js +47 -37
- package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/voice/livekit/voiceAgentWorker.js +64 -0
- package/package.json +2 -1
|
@@ -25,6 +25,22 @@ function readEnv(name, fallback) {
|
|
|
25
25
|
}
|
|
26
26
|
return fallback;
|
|
27
27
|
}
|
|
28
|
+
function readOptionalEnv(name) {
|
|
29
|
+
const value = process.env[name];
|
|
30
|
+
return typeof value === "string" && value.trim().length > 0
|
|
31
|
+
? value.trim()
|
|
32
|
+
: undefined;
|
|
33
|
+
}
|
|
34
|
+
function readNumberEnv(name, fallback) {
|
|
35
|
+
const raw = process.env[name];
|
|
36
|
+
if (typeof raw === "string" && raw.trim().length > 0) {
|
|
37
|
+
const parsed = Number(raw.trim());
|
|
38
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
39
|
+
return parsed;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return fallback;
|
|
43
|
+
}
|
|
28
44
|
/**
|
|
29
45
|
* Resolve LiveKit server connection settings from the environment.
|
|
30
46
|
*
|
|
@@ -51,6 +67,44 @@ export function resolveBrainDefaults() {
|
|
|
51
67
|
model: readEnv("VOICE_LLM_MODEL", DEFAULT_LLM_MODEL),
|
|
52
68
|
};
|
|
53
69
|
}
|
|
70
|
+
const DEFAULT_REALTIME_MODEL = "gemini-live-2.5-flash";
|
|
71
|
+
const DEFAULT_REALTIME_LOCATION = "global";
|
|
72
|
+
const DEFAULT_RESPONSE_MODALITY = "AUDIO";
|
|
73
|
+
const DEFAULT_SYSTEM_PROMPT = "You are a concise voice assistant for a merchant dashboard. Keep replies short and spoken.";
|
|
74
|
+
const DEFAULT_GREETING = "Briefly greet the merchant and ask how you can help with their store today.";
|
|
75
|
+
const DEFAULT_LIGHTHOUSE_URL = "http://localhost:5173";
|
|
76
|
+
const DEFAULT_MCP_PATH = "/ai/mcp/v2";
|
|
77
|
+
/**
|
|
78
|
+
* Resolve the realtime (Gemini Live speech-to-speech) voice configuration from
|
|
79
|
+
* the environment.
|
|
80
|
+
*
|
|
81
|
+
* Every value has a safe default so a worker can run with no realtime-specific
|
|
82
|
+
* env at all. Vertex `project` is resolved from `VERTEX_PROJECT`, then the
|
|
83
|
+
* service-account project (`GOOGLE_AUTH_BREEZE_PROJECT_ID`), then
|
|
84
|
+
* `GOOGLE_CLOUD_PROJECT_ID`. The MCP URL is `VOICE_MCP_URL` if set, otherwise
|
|
85
|
+
* `${LIGHTHOUSE_URL}/ai/mcp/v2`.
|
|
86
|
+
*/
|
|
87
|
+
export function resolveRealtimeVoiceConfig() {
|
|
88
|
+
const lighthouseUrl = readEnv("LIGHTHOUSE_URL", DEFAULT_LIGHTHOUSE_URL);
|
|
89
|
+
const mcpUrl = readEnv("VOICE_MCP_URL", `${lighthouseUrl}${DEFAULT_MCP_PATH}`);
|
|
90
|
+
return {
|
|
91
|
+
project: readOptionalEnv("VERTEX_PROJECT") ??
|
|
92
|
+
readOptionalEnv("GOOGLE_AUTH_BREEZE_PROJECT_ID") ??
|
|
93
|
+
readOptionalEnv("GOOGLE_CLOUD_PROJECT_ID"),
|
|
94
|
+
location: readEnv("VERTEX_LOCATION", DEFAULT_REALTIME_LOCATION),
|
|
95
|
+
model: readEnv("VOICE_LIVE_MODEL", DEFAULT_REALTIME_MODEL),
|
|
96
|
+
voice: readOptionalEnv("VOICE_S2S_VOICE"),
|
|
97
|
+
responseModality: readEnv("VOICE_RESPONSE_MODALITY", DEFAULT_RESPONSE_MODALITY).toUpperCase(),
|
|
98
|
+
systemPrompt: readEnv("VOICE_SYSTEM_PROMPT", DEFAULT_SYSTEM_PROMPT),
|
|
99
|
+
greeting: readEnv("VOICE_GREETING", DEFAULT_GREETING),
|
|
100
|
+
toolsEnabled: readEnv("VOICE_S2S_TOOLS", "").toLowerCase() === "true",
|
|
101
|
+
mcpUrl,
|
|
102
|
+
emptyRoomGraceMs: readNumberEnv("VOICE_EMPTY_ROOM_GRACE_MS", 30_000),
|
|
103
|
+
joinDeadlineMs: readNumberEnv("VOICE_JOIN_DEADLINE_MS", 60_000),
|
|
104
|
+
hitlTimeoutMs: readNumberEnv("VOICE_HITL_TIMEOUT_MS", 45_000),
|
|
105
|
+
metricsIntervalMs: readNumberEnv("VOICE_METRICS_INTERVAL_MS", 10_000),
|
|
106
|
+
};
|
|
107
|
+
}
|
|
54
108
|
const EOU_TRUTHY = new Set(["1", "true", "english", "en", "on", "yes"]);
|
|
55
109
|
/**
|
|
56
110
|
* Resolve the semantic end-of-utterance (EOU) turn-detection settings.
|
|
@@ -191,7 +191,7 @@ export async function attachEventBridge(params) {
|
|
|
191
191
|
const json = JSON.stringify(envelope);
|
|
192
192
|
const bytes = encoder.encode(json);
|
|
193
193
|
const onError = (transport) => (error) => {
|
|
194
|
-
logger.warn("
|
|
194
|
+
logger.warn("voice.bridge.publishFailed", {
|
|
195
195
|
transport,
|
|
196
196
|
type: event.type,
|
|
197
197
|
error: error instanceof Error ? error.message : String(error),
|
|
@@ -304,7 +304,7 @@ export async function attachEventBridge(params) {
|
|
|
304
304
|
parsed = JSON.parse(decoder.decode(payload));
|
|
305
305
|
}
|
|
306
306
|
catch (error) {
|
|
307
|
-
logger.warn("
|
|
307
|
+
logger.warn("voice.bridge.controlMalformed", {
|
|
308
308
|
error: error instanceof Error ? error.message : String(error),
|
|
309
309
|
});
|
|
310
310
|
return;
|
|
@@ -332,7 +332,7 @@ export async function attachEventBridge(params) {
|
|
|
332
332
|
};
|
|
333
333
|
room.on(RoomEvent.DataReceived, onData);
|
|
334
334
|
publish({ type: "status", data: { state: "listening" } });
|
|
335
|
-
logger.info("
|
|
335
|
+
logger.info("voice.bridge.attached", {
|
|
336
336
|
eventsTopic,
|
|
337
337
|
controlTopic,
|
|
338
338
|
filtered: include !== undefined,
|
|
@@ -353,7 +353,7 @@ export async function attachEventBridge(params) {
|
|
|
353
353
|
emitter.off(STREAM_END_EVENT, onStreamComplete);
|
|
354
354
|
emitter.off(STREAM_ERROR_EVENT, onStreamError);
|
|
355
355
|
room.off(RoomEvent.DataReceived, onData);
|
|
356
|
-
logger.info("
|
|
356
|
+
logger.info("voice.bridge.disposed", {});
|
|
357
357
|
},
|
|
358
358
|
};
|
|
359
359
|
}
|
|
@@ -8,8 +8,15 @@
|
|
|
8
8
|
* See docs/features/livekit-voice-agent.md.
|
|
9
9
|
*/
|
|
10
10
|
export { createVoiceBrain } from "./brain.js";
|
|
11
|
-
export { resolveLiveKitServerConfig, resolveBrainDefaults } from "./config.js";
|
|
11
|
+
export { resolveLiveKitServerConfig, resolveBrainDefaults, resolveRealtimeVoiceConfig, } from "./config.js";
|
|
12
12
|
export { attachEventBridge } from "./eventBridge.js";
|
|
13
|
+
export { attachRealtimeEventBridge } from "./realtimeEventBridge.js";
|
|
13
14
|
export { mintJoinToken } from "./tokens.js";
|
|
15
|
+
export { createVoiceRoom, dispatchVoiceAgent } from "./roomDispatch.js";
|
|
14
16
|
export { defineVoiceAgent } from "./voiceAgent.js";
|
|
15
|
-
export {
|
|
17
|
+
export { defineRealtimeVoiceAgent } from "./realtimeVoiceAgent.js";
|
|
18
|
+
export { startVoiceAgentWorker, startRealtimeVoiceAgentWorker, installVoiceWorkerProcessGuards, } from "./voiceAgentWorker.js";
|
|
19
|
+
export { ensureVertexAdc, clearGeminiApiKeyEnv } from "./vertexAuth.js";
|
|
20
|
+
export { readCallContextFromRoom } from "./roomContext.js";
|
|
21
|
+
export { sanitizeSchema, sanitizeToolParameters, findSchemaIssue, } from "./schemaSanitizer.js";
|
|
22
|
+
export { buildRealtimeMcpTools, mcpResultToText } from "./realtimeMcpTools.js";
|
|
@@ -8,9 +8,16 @@
|
|
|
8
8
|
* See docs/features/livekit-voice-agent.md.
|
|
9
9
|
*/
|
|
10
10
|
export { createVoiceBrain } from "./brain.js";
|
|
11
|
-
export { resolveLiveKitServerConfig, resolveBrainDefaults } from "./config.js";
|
|
11
|
+
export { resolveLiveKitServerConfig, resolveBrainDefaults, resolveRealtimeVoiceConfig, } from "./config.js";
|
|
12
12
|
export { attachEventBridge } from "./eventBridge.js";
|
|
13
|
+
export { attachRealtimeEventBridge } from "./realtimeEventBridge.js";
|
|
13
14
|
export { mintJoinToken } from "./tokens.js";
|
|
15
|
+
export { createVoiceRoom, dispatchVoiceAgent } from "./roomDispatch.js";
|
|
14
16
|
export { defineVoiceAgent } from "./voiceAgent.js";
|
|
15
|
-
export {
|
|
17
|
+
export { defineRealtimeVoiceAgent } from "./realtimeVoiceAgent.js";
|
|
18
|
+
export { startVoiceAgentWorker, startRealtimeVoiceAgentWorker, installVoiceWorkerProcessGuards, } from "./voiceAgentWorker.js";
|
|
19
|
+
export { ensureVertexAdc, clearGeminiApiKeyEnv } from "./vertexAuth.js";
|
|
20
|
+
export { readCallContextFromRoom } from "./roomContext.js";
|
|
21
|
+
export { sanitizeSchema, sanitizeToolParameters, findSchemaIssue, } from "./schemaSanitizer.js";
|
|
22
|
+
export { buildRealtimeMcpTools, mcpResultToText } from "./realtimeMcpTools.js";
|
|
16
23
|
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realtime data-channel event bridge.
|
|
3
|
+
*
|
|
4
|
+
* The cascaded bridge (`eventBridge.ts`) is driven by NeuroLink's event emitter.
|
|
5
|
+
*/
|
|
6
|
+
import type { RealtimeEventBridgeHandle, RealtimeEventBridgeParams } from "../../types/index.js";
|
|
7
|
+
/**
|
|
8
|
+
* Attach the realtime event bridge to a room.
|
|
9
|
+
*
|
|
10
|
+
* Returns `publishEvent` (worker → browser), `requestConfirmation` (HITL
|
|
11
|
+
* round-trip), and `dispose` (remove the control listener, decline anything
|
|
12
|
+
* still pending).
|
|
13
|
+
*/
|
|
14
|
+
export declare function attachRealtimeEventBridge(params: RealtimeEventBridgeParams): Promise<RealtimeEventBridgeHandle>;
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realtime data-channel event bridge.
|
|
3
|
+
*
|
|
4
|
+
* The cascaded bridge (`eventBridge.ts`) is driven by NeuroLink's event emitter.
|
|
5
|
+
*/
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { logger } from "../../utils/logger.js";
|
|
8
|
+
const DEFAULT_EVENTS_TOPIC = "ai-events";
|
|
9
|
+
const DEFAULT_CONTROL_TOPIC = "ai-control";
|
|
10
|
+
const DEFAULT_MAX_INLINE_BYTES = 12_000;
|
|
11
|
+
const DEFAULT_HITL_TIMEOUT_MS = 45_000;
|
|
12
|
+
const hitlControlMessageSchema = z.object({
|
|
13
|
+
action: z.enum(["hitl:accept", "hitl:reject"]),
|
|
14
|
+
confirmationId: z.string(),
|
|
15
|
+
});
|
|
16
|
+
/**
|
|
17
|
+
* Attach the realtime event bridge to a room.
|
|
18
|
+
*
|
|
19
|
+
* Returns `publishEvent` (worker → browser), `requestConfirmation` (HITL
|
|
20
|
+
* round-trip), and `dispose` (remove the control listener, decline anything
|
|
21
|
+
* still pending).
|
|
22
|
+
*/
|
|
23
|
+
export async function attachRealtimeEventBridge(params) {
|
|
24
|
+
const { room } = params;
|
|
25
|
+
const eventsTopic = params.eventsTopic ?? DEFAULT_EVENTS_TOPIC;
|
|
26
|
+
const controlTopic = params.controlTopic ?? DEFAULT_CONTROL_TOPIC;
|
|
27
|
+
const maxInlineBytes = params.maxInlineBytes ?? DEFAULT_MAX_INLINE_BYTES;
|
|
28
|
+
const hitlTimeoutMs = params.hitlTimeoutMs ?? DEFAULT_HITL_TIMEOUT_MS;
|
|
29
|
+
const { RoomEvent } = await import("@livekit/rtc-node");
|
|
30
|
+
const encoder = new TextEncoder();
|
|
31
|
+
const decoder = new TextDecoder();
|
|
32
|
+
let seq = 0;
|
|
33
|
+
let disposed = false;
|
|
34
|
+
const startedAt = Date.now();
|
|
35
|
+
const publishEvent = (type, data) => {
|
|
36
|
+
if (disposed) {
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
const participant = room.localParticipant;
|
|
40
|
+
if (!participant) {
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
try {
|
|
44
|
+
seq += 1;
|
|
45
|
+
const json = JSON.stringify({ seq, ts: Date.now(), type, data });
|
|
46
|
+
const bytes = encoder.encode(json);
|
|
47
|
+
const via = bytes.byteLength <= maxInlineBytes ? "data" : "stream";
|
|
48
|
+
logger.debug("realtime.bridge.publish", {
|
|
49
|
+
ms: Date.now() - startedAt,
|
|
50
|
+
seq,
|
|
51
|
+
type,
|
|
52
|
+
via,
|
|
53
|
+
bytes: bytes.byteLength,
|
|
54
|
+
});
|
|
55
|
+
if (via === "data") {
|
|
56
|
+
void participant.publishData(bytes, {
|
|
57
|
+
reliable: true,
|
|
58
|
+
topic: eventsTopic,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
void participant.sendText(json, { topic: eventsTopic });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
/* non-fatal — the UI bridge is best-effort */
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
// HITL: WRITE-labeled tools pause for user confirmation. We publish a
|
|
70
|
+
// `hitl-prompt`, the UI replies on the control topic, and we resolve the
|
|
71
|
+
// pending request by confirmationId. A timeout is treated as a decline so a
|
|
72
|
+
// turn can never hang waiting on the user.
|
|
73
|
+
const pendingHitl = new Map();
|
|
74
|
+
let hitlSeq = 0;
|
|
75
|
+
const onData = (...args) => {
|
|
76
|
+
const payload = args[0];
|
|
77
|
+
const topic = args[3];
|
|
78
|
+
if (topic !== controlTopic || !(payload instanceof Uint8Array)) {
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
let raw;
|
|
82
|
+
try {
|
|
83
|
+
raw = JSON.parse(decoder.decode(payload));
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const message = hitlControlMessageSchema.safeParse(raw);
|
|
89
|
+
if (!message.success) {
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
const { action, confirmationId } = message.data;
|
|
93
|
+
const resolver = pendingHitl.get(confirmationId);
|
|
94
|
+
if (resolver) {
|
|
95
|
+
logger.debug("realtime.bridge.controlReceived", {
|
|
96
|
+
action,
|
|
97
|
+
confirmationId,
|
|
98
|
+
});
|
|
99
|
+
pendingHitl.delete(confirmationId);
|
|
100
|
+
resolver(action === "hitl:accept");
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
logger.warn("realtime.bridge.controlUnmatched", {
|
|
104
|
+
action,
|
|
105
|
+
confirmationId,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
room.on(RoomEvent.DataReceived, onData);
|
|
110
|
+
const requestConfirmation = (toolName, args) => {
|
|
111
|
+
if (disposed) {
|
|
112
|
+
logger.warn("realtime.bridge.hitlDisposed", { toolName });
|
|
113
|
+
return Promise.resolve(false);
|
|
114
|
+
}
|
|
115
|
+
hitlSeq += 1;
|
|
116
|
+
const confirmationId = `hitl_${seq}_${hitlSeq}`;
|
|
117
|
+
publishEvent("hitl-prompt", {
|
|
118
|
+
confirmationId,
|
|
119
|
+
toolName,
|
|
120
|
+
actionType: `Run ${toolName}`,
|
|
121
|
+
arguments: args ?? {},
|
|
122
|
+
});
|
|
123
|
+
logger.info("realtime.bridge.hitlPrompt", { toolName, confirmationId });
|
|
124
|
+
return new Promise((resolve) => {
|
|
125
|
+
const timer = setTimeout(() => {
|
|
126
|
+
pendingHitl.delete(confirmationId);
|
|
127
|
+
logger.warn("realtime.bridge.hitlTimeout", {
|
|
128
|
+
toolName,
|
|
129
|
+
confirmationId,
|
|
130
|
+
timeoutMs: hitlTimeoutMs,
|
|
131
|
+
});
|
|
132
|
+
resolve(false);
|
|
133
|
+
}, hitlTimeoutMs);
|
|
134
|
+
pendingHitl.set(confirmationId, (approved) => {
|
|
135
|
+
clearTimeout(timer);
|
|
136
|
+
logger.info("realtime.bridge.hitlResolved", {
|
|
137
|
+
toolName,
|
|
138
|
+
confirmationId,
|
|
139
|
+
approved,
|
|
140
|
+
});
|
|
141
|
+
resolve(approved);
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
};
|
|
145
|
+
return {
|
|
146
|
+
publishEvent,
|
|
147
|
+
requestConfirmation,
|
|
148
|
+
dispose() {
|
|
149
|
+
if (disposed) {
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
disposed = true;
|
|
153
|
+
room.off(RoomEvent.DataReceived, onData);
|
|
154
|
+
for (const [, resolver] of pendingHitl) {
|
|
155
|
+
resolver(false);
|
|
156
|
+
}
|
|
157
|
+
pendingHitl.clear();
|
|
158
|
+
},
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
//# sourceMappingURL=realtimeEventBridge.js.map
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP tools → Gemini function tools bridge (realtime mode).
|
|
3
|
+
*
|
|
4
|
+
* In speech-to-speech mode Gemini owns the tool loop, so an external MCP
|
|
5
|
+
* server's tools are registered as LiveKit/Gemini function tools rather than
|
|
6
|
+
* NeuroLink tools. This connects to the MCP server as the calling user
|
|
7
|
+
* (`x-auth-token` + base64 `x-context`), sanitizes each tool's schema into the
|
|
8
|
+
* Gemini-safe subset, wires WRITE-labeled (destructive) tools through a HITL
|
|
9
|
+
* confirmation, and forwards tool start/result events to the browser bridge.
|
|
10
|
+
*
|
|
11
|
+
* `@livekit/agents` and `@modelcontextprotocol/sdk` are imported dynamically so
|
|
12
|
+
* the core package does not require them unless the realtime agent is used.
|
|
13
|
+
*
|
|
14
|
+
* See docs/features/livekit-voice-agent.md.
|
|
15
|
+
*/
|
|
16
|
+
import type { llm as llmNs } from "@livekit/agents";
|
|
17
|
+
import type { BuildRealtimeMcpToolsParams } from "../../types/index.js";
|
|
18
|
+
/** Flatten an MCP tool result's text content into a single string for the model. */
|
|
19
|
+
export declare function mcpResultToText(result: unknown): string;
|
|
20
|
+
/**
|
|
21
|
+
* Connect to the MCP server as the calling user and bridge every tool into a
|
|
22
|
+
* LiveKit `ToolContext`. Returns the tool map + the client (close on shutdown).
|
|
23
|
+
* The server already scopes the tool list by `x-context`, so no client-side
|
|
24
|
+
* filtering is needed.
|
|
25
|
+
*/
|
|
26
|
+
export declare function buildRealtimeMcpTools(params: BuildRealtimeMcpToolsParams): Promise<{
|
|
27
|
+
tools: llmNs.ToolContext;
|
|
28
|
+
client: {
|
|
29
|
+
close: () => Promise<void>;
|
|
30
|
+
};
|
|
31
|
+
}>;
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP tools → Gemini function tools bridge (realtime mode).
|
|
3
|
+
*
|
|
4
|
+
* In speech-to-speech mode Gemini owns the tool loop, so an external MCP
|
|
5
|
+
* server's tools are registered as LiveKit/Gemini function tools rather than
|
|
6
|
+
* NeuroLink tools. This connects to the MCP server as the calling user
|
|
7
|
+
* (`x-auth-token` + base64 `x-context`), sanitizes each tool's schema into the
|
|
8
|
+
* Gemini-safe subset, wires WRITE-labeled (destructive) tools through a HITL
|
|
9
|
+
* confirmation, and forwards tool start/result events to the browser bridge.
|
|
10
|
+
*
|
|
11
|
+
* `@livekit/agents` and `@modelcontextprotocol/sdk` are imported dynamically so
|
|
12
|
+
* the core package does not require them unless the realtime agent is used.
|
|
13
|
+
*
|
|
14
|
+
* See docs/features/livekit-voice-agent.md.
|
|
15
|
+
*/
|
|
16
|
+
import { z } from "zod";
|
|
17
|
+
import { logger } from "../../utils/logger.js";
|
|
18
|
+
import { findSchemaIssue, sanitizeToolParameters } from "./schemaSanitizer.js";
|
|
19
|
+
/**
|
|
20
|
+
* The fields of an MCP tool result we render: text content parts and the error
|
|
21
|
+
* flag. The result crosses a network boundary from an external MCP server, so it
|
|
22
|
+
* is decoded with a schema (other content kinds — image, resource — are dropped).
|
|
23
|
+
*/
|
|
24
|
+
const toolResultSchema = z.object({
|
|
25
|
+
isError: z.boolean().optional(),
|
|
26
|
+
content: z
|
|
27
|
+
.array(z.object({ type: z.string().optional(), text: z.string().optional() }))
|
|
28
|
+
.optional(),
|
|
29
|
+
});
|
|
30
|
+
/** Flatten an MCP tool result's text content into a single string for the model. */
|
|
31
|
+
export function mcpResultToText(result) {
|
|
32
|
+
const decoded = toolResultSchema.safeParse(result);
|
|
33
|
+
if (!decoded.success) {
|
|
34
|
+
return "Tool returned no content.";
|
|
35
|
+
}
|
|
36
|
+
const text = (decoded.data.content ?? [])
|
|
37
|
+
.map((part) => (part.type === "text" && part.text ? part.text : ""))
|
|
38
|
+
.filter(Boolean)
|
|
39
|
+
.join("\n")
|
|
40
|
+
.trim();
|
|
41
|
+
if (decoded.data.isError) {
|
|
42
|
+
return `Tool error: ${text || "unknown error"}`;
|
|
43
|
+
}
|
|
44
|
+
return text || "Tool returned no content.";
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* The function-result text returned to Gemini when the user rejects a HITL tool.
|
|
48
|
+
* Scope the rejection to THIS attempt — a "never retry" wording made the model
|
|
49
|
+
* refuse the tool permanently even when the user later asked for it again.
|
|
50
|
+
*/
|
|
51
|
+
function buildHitlDeclineMessage(toolName) {
|
|
52
|
+
return (`The user reviewed the request to run "${toolName}" and chose to reject it, ` +
|
|
53
|
+
`so it was NOT executed this time. Do not run it right now. Briefly let the user know ` +
|
|
54
|
+
`you didn't proceed because they declined, and ask if there's anything else. ` +
|
|
55
|
+
`If the user later asks for "${toolName}" again or says to go ahead, you may run it then — ` +
|
|
56
|
+
`this rejection applies only to this request, not to all future requests.`);
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Register a tool handler under the name variants the model might call:
|
|
60
|
+
* 1. the Gemini-safe full name (hyphens → underscores; Gemini rewrites these
|
|
61
|
+
* itself, so the rewritten form must be registered or the round-trip fails)
|
|
62
|
+
* 2. the bare short name (the model often drops the server prefix)
|
|
63
|
+
* First registration wins, so a unique full name is never shadowed by a generic
|
|
64
|
+
* short name. Returns the aliases actually registered.
|
|
65
|
+
*/
|
|
66
|
+
function registerToolAliases(toolContext, mcpToolName, handler) {
|
|
67
|
+
const safeFullName = mcpToolName.replace(/[^a-zA-Z0-9_]/g, "_");
|
|
68
|
+
const shortName = safeFullName.includes("_")
|
|
69
|
+
? safeFullName.slice(safeFullName.lastIndexOf("_") + 1)
|
|
70
|
+
: safeFullName;
|
|
71
|
+
const candidates = [safeFullName, shortName].filter((name, index, all) => name.length > 0 && all.indexOf(name) === index);
|
|
72
|
+
const registered = [];
|
|
73
|
+
for (const alias of candidates) {
|
|
74
|
+
if (alias in toolContext) {
|
|
75
|
+
continue; // taken by another tool — don't clobber
|
|
76
|
+
}
|
|
77
|
+
toolContext[alias] = handler;
|
|
78
|
+
registered.push(alias);
|
|
79
|
+
}
|
|
80
|
+
return registered;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Connect to the MCP server as the calling user and bridge every tool into a
|
|
84
|
+
* LiveKit `ToolContext`. Returns the tool map + the client (close on shutdown).
|
|
85
|
+
* The server already scopes the tool list by `x-context`, so no client-side
|
|
86
|
+
* filtering is needed.
|
|
87
|
+
*/
|
|
88
|
+
export async function buildRealtimeMcpTools(params) {
|
|
89
|
+
const { mcpUrl, authToken, xContext, publishEvent, requestConfirmation } = params;
|
|
90
|
+
const { llm } = await import("@livekit/agents");
|
|
91
|
+
const { Client: McpClient } = await import("@modelcontextprotocol/sdk/client/index.js");
|
|
92
|
+
const { StreamableHTTPClientTransport } = await import("@modelcontextprotocol/sdk/client/streamableHttp.js");
|
|
93
|
+
const transport = new StreamableHTTPClientTransport(new URL(mcpUrl), {
|
|
94
|
+
requestInit: {
|
|
95
|
+
headers: { "x-auth-token": authToken, "x-context": xContext },
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
const client = new McpClient({ name: "neurolink-realtime-voice", version: "1.0.0" }, { capabilities: {} });
|
|
99
|
+
logger.info("realtime.mcp.connecting", {
|
|
100
|
+
mcpUrl,
|
|
101
|
+
hasAuthToken: authToken !== "",
|
|
102
|
+
hasContext: xContext !== "",
|
|
103
|
+
});
|
|
104
|
+
await client.connect(transport);
|
|
105
|
+
try {
|
|
106
|
+
const { tools: mcpTools } = await client.listTools();
|
|
107
|
+
logger.info("realtime.mcp.toolsListed", { count: mcpTools.length });
|
|
108
|
+
const toolContext = {};
|
|
109
|
+
const skipped = [];
|
|
110
|
+
for (const mcpTool of mcpTools) {
|
|
111
|
+
const parameters = sanitizeToolParameters(mcpTool.inputSchema);
|
|
112
|
+
const issue = findSchemaIssue(parameters);
|
|
113
|
+
if (issue) {
|
|
114
|
+
skipped.push(mcpTool.name);
|
|
115
|
+
logger.warn("realtime.mcp.toolSkipped", {
|
|
116
|
+
tool: mcpTool.name,
|
|
117
|
+
issue,
|
|
118
|
+
rawInputSchema: JSON.stringify(mcpTool.inputSchema),
|
|
119
|
+
});
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
const requiresConfirmation = mcpTool.annotations?.destructiveHint === true;
|
|
123
|
+
const handler = llm.tool({
|
|
124
|
+
description: mcpTool.description ?? mcpTool.name,
|
|
125
|
+
parameters,
|
|
126
|
+
execute: async (args) => {
|
|
127
|
+
if (requiresConfirmation) {
|
|
128
|
+
const approved = await requestConfirmation(mcpTool.name, args ?? {});
|
|
129
|
+
if (!approved) {
|
|
130
|
+
logger.info("realtime.tool.hitlRejected", {
|
|
131
|
+
tool: mcpTool.name,
|
|
132
|
+
});
|
|
133
|
+
return buildHitlDeclineMessage(mcpTool.name);
|
|
134
|
+
}
|
|
135
|
+
logger.info("realtime.tool.hitlApproved", { tool: mcpTool.name });
|
|
136
|
+
}
|
|
137
|
+
logger.info("realtime.tool.invoke", {
|
|
138
|
+
tool: mcpTool.name,
|
|
139
|
+
args: args ?? {},
|
|
140
|
+
});
|
|
141
|
+
publishEvent("tool-start", { name: mcpTool.name });
|
|
142
|
+
const startedAt = Date.now();
|
|
143
|
+
try {
|
|
144
|
+
const result = await client.callTool({
|
|
145
|
+
name: mcpTool.name,
|
|
146
|
+
arguments: args ?? {},
|
|
147
|
+
});
|
|
148
|
+
const text = mcpResultToText(result);
|
|
149
|
+
logger.info("realtime.tool.result", {
|
|
150
|
+
tool: mcpTool.name,
|
|
151
|
+
isError: result.isError === true,
|
|
152
|
+
chars: text.length,
|
|
153
|
+
ms: Date.now() - startedAt,
|
|
154
|
+
});
|
|
155
|
+
// Forward the RAW result so the client can extract a
|
|
156
|
+
// chart/UIComponent and clear the in-progress indicator.
|
|
157
|
+
publishEvent("tool-result", { name: mcpTool.name, result });
|
|
158
|
+
return text;
|
|
159
|
+
}
|
|
160
|
+
catch (error) {
|
|
161
|
+
logger.error("realtime.tool.error", {
|
|
162
|
+
tool: mcpTool.name,
|
|
163
|
+
ms: Date.now() - startedAt,
|
|
164
|
+
error: error instanceof Error ? error.message : String(error),
|
|
165
|
+
});
|
|
166
|
+
publishEvent("tool-result", { name: mcpTool.name, result: null });
|
|
167
|
+
return `Tool ${mcpTool.name} failed: ${String(error)}`;
|
|
168
|
+
}
|
|
169
|
+
},
|
|
170
|
+
});
|
|
171
|
+
const aliases = registerToolAliases(toolContext, mcpTool.name, handler);
|
|
172
|
+
logger.info("realtime.mcp.toolRegistered", {
|
|
173
|
+
tool: mcpTool.name,
|
|
174
|
+
aliases,
|
|
175
|
+
hitl: requiresConfirmation,
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
logger.info("realtime.mcp.connected", {
|
|
179
|
+
registered: Object.keys(toolContext).length,
|
|
180
|
+
skipped: skipped.length,
|
|
181
|
+
skippedTools: skipped,
|
|
182
|
+
});
|
|
183
|
+
return { tools: toolContext, client };
|
|
184
|
+
}
|
|
185
|
+
catch (error) {
|
|
186
|
+
await client.close().catch((closeError) => {
|
|
187
|
+
logger.warn("realtime.mcp.closeFailed", {
|
|
188
|
+
error: closeError instanceof Error ? closeError.message : String(closeError),
|
|
189
|
+
});
|
|
190
|
+
});
|
|
191
|
+
throw error;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
//# sourceMappingURL=realtimeMcpTools.js.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LiveKit Agents agent definition — realtime (Gemini Live speech-to-speech).
|
|
3
|
+
*
|
|
4
|
+
* `defineRealtimeVoiceAgent` returns the agent object placed as the default
|
|
5
|
+
* export of a worker entry file. Unlike the cascaded `defineVoiceAgent`
|
|
6
|
+
* (Silero VAD → STT → NeuroLink → TTS), here a single realtime model (Gemini
|
|
7
|
+
* Live on Vertex) does STT + reasoning + TTS + turn detection over one
|
|
8
|
+
*
|
|
9
|
+
* `@livekit/agents`, `@livekit/agents-plugin-google`, `@livekit/rtc-node`, and
|
|
10
|
+
* `@google/genai` are imported dynamically so the core package does not require
|
|
11
|
+
* them unless the realtime agent is used. Type-only imports are erased at build.
|
|
12
|
+
*
|
|
13
|
+
* See docs/features/livekit-voice-agent.md.
|
|
14
|
+
*/
|
|
15
|
+
import type { JobContext } from "@livekit/agents";
|
|
16
|
+
import type { RealtimeVoiceAgentConfig } from "../../types/index.js";
|
|
17
|
+
/**
|
|
18
|
+
* Define a realtime (Gemini Live S2S) LiveKit voice agent.
|
|
19
|
+
*
|
|
20
|
+
* Place the result as the default export of the worker entry file and launch it
|
|
21
|
+
* with `startRealtimeVoiceAgentWorker`. With no `config` everything is resolved
|
|
22
|
+
* from the environment (see `resolveRealtimeVoiceConfig`).
|
|
23
|
+
*/
|
|
24
|
+
export declare function defineRealtimeVoiceAgent(config?: RealtimeVoiceAgentConfig): {
|
|
25
|
+
entry: (ctx: JobContext) => Promise<void>;
|
|
26
|
+
};
|