@juspay/neurolink 9.71.0 → 9.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +330 -312
- package/dist/core/constants.d.ts +1 -0
- package/dist/core/constants.js +2 -0
- package/dist/core/toolRouting.d.ts +59 -0
- package/dist/core/toolRouting.js +232 -0
- package/dist/lib/core/constants.d.ts +1 -0
- package/dist/lib/core/constants.js +2 -0
- package/dist/lib/core/toolRouting.d.ts +59 -0
- package/dist/lib/core/toolRouting.js +233 -0
- package/dist/lib/neurolink.d.ts +31 -1
- package/dist/lib/neurolink.js +188 -1
- package/dist/lib/telemetry/attributes.js +3 -1
- package/dist/lib/types/config.d.ts +8 -0
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/livekit.d.ts +134 -0
- package/dist/lib/types/toolRouting.d.ts +91 -0
- package/dist/lib/types/toolRouting.js +19 -0
- package/dist/lib/voice/livekit/brain.js +1 -1
- package/dist/lib/voice/livekit/config.d.ts +12 -1
- package/dist/lib/voice/livekit/config.js +54 -0
- package/dist/lib/voice/livekit/eventBridge.js +4 -4
- package/dist/lib/voice/livekit/index.d.ts +9 -2
- package/dist/lib/voice/livekit/index.js +9 -2
- package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
- package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
- package/dist/lib/voice/livekit/roomContext.js +57 -0
- package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/lib/voice/livekit/roomDispatch.js +31 -0
- package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
- package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/lib/voice/livekit/vertexAuth.js +73 -0
- package/dist/lib/voice/livekit/voiceAgent.js +47 -37
- package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
- package/dist/neurolink.d.ts +31 -1
- package/dist/neurolink.js +188 -1
- package/dist/telemetry/attributes.js +3 -1
- package/dist/types/config.d.ts +8 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/livekit.d.ts +134 -0
- package/dist/types/toolRouting.d.ts +91 -0
- package/dist/types/toolRouting.js +18 -0
- package/dist/voice/livekit/brain.js +1 -1
- package/dist/voice/livekit/config.d.ts +12 -1
- package/dist/voice/livekit/config.js +54 -0
- package/dist/voice/livekit/eventBridge.js +4 -4
- package/dist/voice/livekit/index.d.ts +9 -2
- package/dist/voice/livekit/index.js +9 -2
- package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/voice/livekit/realtimeEventBridge.js +160 -0
- package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/voice/livekit/realtimeMcpTools.js +193 -0
- package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
- package/dist/voice/livekit/roomContext.d.ts +23 -0
- package/dist/voice/livekit/roomContext.js +56 -0
- package/dist/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/voice/livekit/roomDispatch.js +30 -0
- package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/voice/livekit/schemaSanitizer.js +143 -0
- package/dist/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/voice/livekit/vertexAuth.js +72 -0
- package/dist/voice/livekit/voiceAgent.js +47 -37
- package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/voice/livekit/voiceAgentWorker.js +64 -0
- package/package.json +2 -1
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LiveKit Agents agent definition — realtime (Gemini Live speech-to-speech).
|
|
3
|
+
*
|
|
4
|
+
* `defineRealtimeVoiceAgent` returns the agent object placed as the default
|
|
5
|
+
* export of a worker entry file. Unlike the cascaded `defineVoiceAgent`
|
|
6
|
+
* (Silero VAD → STT → NeuroLink → TTS), here a single realtime model (Gemini
|
|
7
|
+
* Live on Vertex) does STT + reasoning + TTS + turn detection over one
|
|
8
|
+
*
|
|
9
|
+
* `@livekit/agents`, `@livekit/agents-plugin-google`, `@livekit/rtc-node`, and
|
|
10
|
+
* `@google/genai` are imported dynamically so the core package does not require
|
|
11
|
+
* them unless the realtime agent is used. Type-only imports are erased at build.
|
|
12
|
+
*
|
|
13
|
+
* See docs/features/livekit-voice-agent.md.
|
|
14
|
+
*/
|
|
15
|
+
import { z } from "zod";
|
|
16
|
+
import { logger } from "../../utils/logger.js";
|
|
17
|
+
import { resolveRealtimeVoiceConfig } from "./config.js";
|
|
18
|
+
import { ensureVertexAdc, clearGeminiApiKeyEnv } from "./vertexAuth.js";
|
|
19
|
+
import { readCallContextFromRoom } from "./roomContext.js";
|
|
20
|
+
import { attachRealtimeEventBridge } from "./realtimeEventBridge.js";
|
|
21
|
+
import { buildRealtimeMcpTools } from "./realtimeMcpTools.js";
|
|
22
|
+
const realtimeLogEventSchema = z.object({
|
|
23
|
+
level: z.enum(["debug", "info", "warn", "error"]),
|
|
24
|
+
message: z.string(),
|
|
25
|
+
timestamp: z.number(),
|
|
26
|
+
data: z.unknown().optional(),
|
|
27
|
+
});
|
|
28
|
+
/**
|
|
29
|
+
* Install the per-call job lifecycle: shut down when the caller leaves, log
|
|
30
|
+
* connection transitions, run the empty-room / join-deadline watchdog, and reap
|
|
31
|
+
* the worker (parent) + this job (child) on shutdown.
|
|
32
|
+
*
|
|
33
|
+
* In `connect` mode the worker does NOT exit when a job shuts down, so the child
|
|
34
|
+
* must SIGTERM its parent and hard-exit itself.
|
|
35
|
+
*/
|
|
36
|
+
async function installRealtimeJobLifecycle(ctx, cfg) {
|
|
37
|
+
const { RoomEvent } = await import("@livekit/rtc-node");
|
|
38
|
+
const joinedAt = Date.now();
|
|
39
|
+
ctx.room.on(RoomEvent.ParticipantDisconnected, () => {
|
|
40
|
+
if (ctx.room.remoteParticipants.size === 0) {
|
|
41
|
+
logger.info("realtime.room.participantLeft", {
|
|
42
|
+
remotes: 0,
|
|
43
|
+
action: "shutdown",
|
|
44
|
+
});
|
|
45
|
+
ctx.shutdown("participant left");
|
|
46
|
+
}
|
|
47
|
+
});
|
|
48
|
+
const logConn = (label) => (...args) => {
|
|
49
|
+
if (!logger.shouldLog("debug")) {
|
|
50
|
+
return;
|
|
51
|
+
}
|
|
52
|
+
logger.debug("realtime.room.connection", {
|
|
53
|
+
event: label,
|
|
54
|
+
remotes: ctx.room.remoteParticipants.size,
|
|
55
|
+
...(args.length ? { detail: JSON.stringify(args).slice(0, 200) } : {}),
|
|
56
|
+
});
|
|
57
|
+
};
|
|
58
|
+
ctx.room.on(RoomEvent.Disconnected, (...args) => {
|
|
59
|
+
logConn("Disconnected")(...args);
|
|
60
|
+
ctx.shutdown("room disconnected");
|
|
61
|
+
});
|
|
62
|
+
ctx.room.on(RoomEvent.Reconnecting, logConn("Reconnecting"));
|
|
63
|
+
ctx.room.on(RoomEvent.Reconnected, logConn("Reconnected"));
|
|
64
|
+
ctx.room.on(RoomEvent.ConnectionStateChanged, logConn("ConnectionStateChanged"));
|
|
65
|
+
ctx.room.on(RoomEvent.ConnectionQualityChanged, logConn("ConnectionQualityChanged"));
|
|
66
|
+
let sawParticipant = ctx.room.remoteParticipants.size > 0;
|
|
67
|
+
let emptySince = null;
|
|
68
|
+
const emptyRoomWatchdog = setInterval(() => {
|
|
69
|
+
const remotes = ctx.room.remoteParticipants.size;
|
|
70
|
+
if (remotes > 0) {
|
|
71
|
+
sawParticipant = true;
|
|
72
|
+
emptySince = null;
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
if (sawParticipant) {
|
|
76
|
+
emptySince ??= Date.now();
|
|
77
|
+
if (Date.now() - emptySince >= cfg.emptyRoomGraceMs) {
|
|
78
|
+
logger.info("realtime.watchdog.emptyRoom", {
|
|
79
|
+
graceMs: cfg.emptyRoomGraceMs,
|
|
80
|
+
action: "shutdown",
|
|
81
|
+
});
|
|
82
|
+
ctx.shutdown("empty-room watchdog");
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
else if (Date.now() - joinedAt >= cfg.joinDeadlineMs) {
|
|
86
|
+
logger.info("realtime.watchdog.joinDeadline", {
|
|
87
|
+
joinDeadlineMs: cfg.joinDeadlineMs,
|
|
88
|
+
action: "shutdown",
|
|
89
|
+
});
|
|
90
|
+
ctx.shutdown("join-deadline watchdog");
|
|
91
|
+
}
|
|
92
|
+
}, 5000);
|
|
93
|
+
emptyRoomWatchdog.unref?.();
|
|
94
|
+
ctx.addShutdownCallback(async () => {
|
|
95
|
+
clearInterval(emptyRoomWatchdog);
|
|
96
|
+
});
|
|
97
|
+
if (process.env.LK_REALTIME_CONNECT_MODE === "true") {
|
|
98
|
+
ctx.addShutdownCallback(async () => {
|
|
99
|
+
const parentPid = process.ppid;
|
|
100
|
+
logger.info("realtime.reap.parent", {
|
|
101
|
+
mode: "connect",
|
|
102
|
+
jobPid: process.pid,
|
|
103
|
+
parentPid,
|
|
104
|
+
});
|
|
105
|
+
if (typeof parentPid === "number" && parentPid > 1) {
|
|
106
|
+
try {
|
|
107
|
+
process.kill(parentPid, "SIGTERM");
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
logger.debug("realtime.reap.parentGone", { parentPid });
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
setTimeout(() => {
|
|
114
|
+
process.exit(0);
|
|
115
|
+
}, 1000);
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/** Merge caller overrides over the env-resolved realtime config. */
|
|
120
|
+
function resolveConfig(overrides) {
|
|
121
|
+
const base = resolveRealtimeVoiceConfig();
|
|
122
|
+
return {
|
|
123
|
+
...base,
|
|
124
|
+
project: overrides.project ?? base.project,
|
|
125
|
+
location: overrides.location ?? base.location,
|
|
126
|
+
model: overrides.model ?? base.model,
|
|
127
|
+
voice: overrides.voice ?? base.voice,
|
|
128
|
+
responseModality: overrides.responseModality ?? base.responseModality,
|
|
129
|
+
systemPrompt: overrides.systemPrompt ?? base.systemPrompt,
|
|
130
|
+
greeting: overrides.greeting ?? base.greeting,
|
|
131
|
+
toolsEnabled: overrides.tools?.enabled ?? base.toolsEnabled,
|
|
132
|
+
mcpUrl: overrides.tools?.mcpUrl ?? base.mcpUrl,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Define a realtime (Gemini Live S2S) LiveKit voice agent.
|
|
137
|
+
*
|
|
138
|
+
* Place the result as the default export of the worker entry file and launch it
|
|
139
|
+
* with `startRealtimeVoiceAgentWorker`. With no `config` everything is resolved
|
|
140
|
+
* from the environment (see `resolveRealtimeVoiceConfig`).
|
|
141
|
+
*/
|
|
142
|
+
export function defineRealtimeVoiceAgent(config = {}) {
|
|
143
|
+
const cfg = resolveConfig(config);
|
|
144
|
+
const eventsTopic = config.eventsTopic;
|
|
145
|
+
const controlTopic = config.controlTopic;
|
|
146
|
+
const onLog = config.onLog;
|
|
147
|
+
async function entry(ctx) {
|
|
148
|
+
// The Gemini Live WS authenticates to Vertex via ADC; materialise it and
|
|
149
|
+
// force ADC (not an API key) auth before any realtime connection.
|
|
150
|
+
ensureVertexAdc();
|
|
151
|
+
clearGeminiApiKeyEnv();
|
|
152
|
+
logger.info("realtime.config", {
|
|
153
|
+
model: cfg.model,
|
|
154
|
+
location: cfg.location,
|
|
155
|
+
project: cfg.project ?? null,
|
|
156
|
+
voice: cfg.voice ?? null,
|
|
157
|
+
modality: cfg.responseModality,
|
|
158
|
+
toolsEnabled: cfg.toolsEnabled,
|
|
159
|
+
mcpUrl: cfg.mcpUrl,
|
|
160
|
+
});
|
|
161
|
+
await ctx.connect();
|
|
162
|
+
const connectedAt = Date.now();
|
|
163
|
+
const sinceConnect = () => Date.now() - connectedAt;
|
|
164
|
+
if (onLog) {
|
|
165
|
+
const room = ctx.room.name ?? "unknown";
|
|
166
|
+
logger.setEventEmitter({
|
|
167
|
+
emit: (event, ...args) => {
|
|
168
|
+
if (event === "log-event") {
|
|
169
|
+
const decoded = realtimeLogEventSchema.safeParse(args[0]);
|
|
170
|
+
if (decoded.success) {
|
|
171
|
+
try {
|
|
172
|
+
onLog(decoded.data, { room });
|
|
173
|
+
}
|
|
174
|
+
catch {
|
|
175
|
+
/* a log sink must never break the call */
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return true;
|
|
180
|
+
},
|
|
181
|
+
});
|
|
182
|
+
ctx.addShutdownCallback(async () => {
|
|
183
|
+
logger.clearEventEmitter();
|
|
184
|
+
});
|
|
185
|
+
}
|
|
186
|
+
logger.info("realtime.room.joined", {
|
|
187
|
+
room: ctx.room.name ?? "unknown",
|
|
188
|
+
model: cfg.model,
|
|
189
|
+
location: cfg.location,
|
|
190
|
+
ms: sinceConnect(),
|
|
191
|
+
});
|
|
192
|
+
await installRealtimeJobLifecycle(ctx, cfg);
|
|
193
|
+
const { voice } = await import("@livekit/agents");
|
|
194
|
+
const google = await import("@livekit/agents-plugin-google");
|
|
195
|
+
const { Modality } = await import("@google/genai");
|
|
196
|
+
const bridge = await attachRealtimeEventBridge({
|
|
197
|
+
room: ctx.room,
|
|
198
|
+
hitlTimeoutMs: cfg.hitlTimeoutMs,
|
|
199
|
+
...(eventsTopic !== undefined ? { eventsTopic } : {}),
|
|
200
|
+
...(controlTopic !== undefined ? { controlTopic } : {}),
|
|
201
|
+
});
|
|
202
|
+
ctx.addShutdownCallback(async () => {
|
|
203
|
+
bridge.dispose();
|
|
204
|
+
});
|
|
205
|
+
let agentTools;
|
|
206
|
+
if (cfg.toolsEnabled) {
|
|
207
|
+
const { authToken, xContext } = readCallContextFromRoom(ctx.room.metadata);
|
|
208
|
+
if (xContext === "") {
|
|
209
|
+
logger.warn("realtime.mcp.noContext", {
|
|
210
|
+
reason: "room-metadata-missing-context",
|
|
211
|
+
action: "running-without-tools",
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
else {
|
|
215
|
+
try {
|
|
216
|
+
const toolset = await buildRealtimeMcpTools({
|
|
217
|
+
mcpUrl: cfg.mcpUrl,
|
|
218
|
+
authToken,
|
|
219
|
+
xContext,
|
|
220
|
+
publishEvent: bridge.publishEvent,
|
|
221
|
+
requestConfirmation: bridge.requestConfirmation,
|
|
222
|
+
});
|
|
223
|
+
agentTools = toolset.tools;
|
|
224
|
+
logger.info("realtime.mcp.enabled", {
|
|
225
|
+
mcpUrl: cfg.mcpUrl,
|
|
226
|
+
toolCount: Object.keys(toolset.tools).length,
|
|
227
|
+
hasAuthToken: authToken !== "",
|
|
228
|
+
});
|
|
229
|
+
ctx.addShutdownCallback(async () => {
|
|
230
|
+
await toolset.client.close();
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
catch (error) {
|
|
234
|
+
logger.error("realtime.mcp.setupFailed", {
|
|
235
|
+
mcpUrl: cfg.mcpUrl,
|
|
236
|
+
error: error instanceof Error ? error.message : String(error),
|
|
237
|
+
action: "running-without-tools",
|
|
238
|
+
});
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
const modality = Object.values(Modality).find((value) => value === cfg.responseModality);
|
|
243
|
+
const modelOptions = {
|
|
244
|
+
vertexai: true,
|
|
245
|
+
model: cfg.model,
|
|
246
|
+
// Emit text transcripts of BOTH sides; LiveKit forwards them to the room.
|
|
247
|
+
inputAudioTranscription: {},
|
|
248
|
+
outputAudioTranscription: {},
|
|
249
|
+
};
|
|
250
|
+
if (modality !== undefined) {
|
|
251
|
+
modelOptions.modalities = [modality];
|
|
252
|
+
}
|
|
253
|
+
if (cfg.project) {
|
|
254
|
+
modelOptions.project = cfg.project;
|
|
255
|
+
}
|
|
256
|
+
if (cfg.location) {
|
|
257
|
+
modelOptions.location = cfg.location;
|
|
258
|
+
}
|
|
259
|
+
if (cfg.voice) {
|
|
260
|
+
modelOptions.voice = cfg.voice;
|
|
261
|
+
}
|
|
262
|
+
const session = new voice.AgentSession({
|
|
263
|
+
llm: new google.realtime.RealtimeModel(modelOptions),
|
|
264
|
+
});
|
|
265
|
+
const agent = new voice.Agent({
|
|
266
|
+
instructions: cfg.systemPrompt,
|
|
267
|
+
...(agentTools ? { tools: agentTools } : {}),
|
|
268
|
+
});
|
|
269
|
+
agent.transcriptionNode = async (textStream, _modelSettings) => {
|
|
270
|
+
const [forUi, forDownstream] = textStream.tee();
|
|
271
|
+
void (async () => {
|
|
272
|
+
const reader = forUi.getReader();
|
|
273
|
+
let chunkCount = 0;
|
|
274
|
+
try {
|
|
275
|
+
for (;;) {
|
|
276
|
+
const { done, value } = await reader.read();
|
|
277
|
+
if (done) {
|
|
278
|
+
break;
|
|
279
|
+
}
|
|
280
|
+
const chunk = typeof value === "string" ? value : (value.text ?? "");
|
|
281
|
+
if (chunk) {
|
|
282
|
+
chunkCount += 1;
|
|
283
|
+
if (chunkCount === 1) {
|
|
284
|
+
logger.debug("realtime.session.assistantFirstChunk", {
|
|
285
|
+
ms: sinceConnect(),
|
|
286
|
+
preview: chunk.slice(0, 30),
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
bridge.publishEvent("text", { delta: chunk });
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
catch {
|
|
294
|
+
/* best-effort live streaming */
|
|
295
|
+
}
|
|
296
|
+
})();
|
|
297
|
+
return forDownstream;
|
|
298
|
+
};
|
|
299
|
+
session.on(voice.AgentSessionEventTypes.MetricsCollected, (ev) => {
|
|
300
|
+
const metrics = ev.metrics;
|
|
301
|
+
if (metrics.type !== "realtime_model_metrics") {
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
const inAudio = metrics.inputTokenDetails.audioTokens;
|
|
305
|
+
const outAudio = metrics.outputTokenDetails.audioTokens;
|
|
306
|
+
const inText = Math.max(0, metrics.inputTokens - inAudio);
|
|
307
|
+
const outText = Math.max(0, metrics.outputTokens - outAudio);
|
|
308
|
+
logger.info("realtime.usage", {
|
|
309
|
+
inputTokens: metrics.inputTokens,
|
|
310
|
+
inputAudioTokens: inAudio,
|
|
311
|
+
inputTextTokens: inText,
|
|
312
|
+
outputTokens: metrics.outputTokens,
|
|
313
|
+
outputAudioTokens: outAudio,
|
|
314
|
+
outputTextTokens: outText,
|
|
315
|
+
});
|
|
316
|
+
});
|
|
317
|
+
// --- Session events → browser ------------------------------------------
|
|
318
|
+
session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
|
|
319
|
+
logger.debug("realtime.session.userTranscript", {
|
|
320
|
+
ms: sinceConnect(),
|
|
321
|
+
final: ev.isFinal,
|
|
322
|
+
transcript: ev.transcript,
|
|
323
|
+
});
|
|
324
|
+
bridge.publishEvent("user-text", {
|
|
325
|
+
text: ev.transcript,
|
|
326
|
+
final: ev.isFinal,
|
|
327
|
+
});
|
|
328
|
+
});
|
|
329
|
+
session.on(voice.AgentSessionEventTypes.AgentStateChanged, (ev) => {
|
|
330
|
+
logger.debug("realtime.session.stateChanged", {
|
|
331
|
+
ms: sinceConnect(),
|
|
332
|
+
from: ev.oldState,
|
|
333
|
+
to: ev.newState,
|
|
334
|
+
});
|
|
335
|
+
bridge.publishEvent("status", { state: ev.newState });
|
|
336
|
+
});
|
|
337
|
+
session.on(voice.AgentSessionEventTypes.ConversationItemAdded, (ev) => {
|
|
338
|
+
if (ev.item.type === "message" &&
|
|
339
|
+
ev.item.role === "assistant" &&
|
|
340
|
+
ev.item.textContent) {
|
|
341
|
+
logger.debug("realtime.session.turnDone", { ms: sinceConnect() });
|
|
342
|
+
bridge.publishEvent("done", {});
|
|
343
|
+
}
|
|
344
|
+
});
|
|
345
|
+
session.on(voice.AgentSessionEventTypes.Error, (ev) => {
|
|
346
|
+
logger.error("realtime.session.error", {
|
|
347
|
+
ms: sinceConnect(),
|
|
348
|
+
error: ev.error,
|
|
349
|
+
});
|
|
350
|
+
bridge.publishEvent("status", { state: "error" });
|
|
351
|
+
});
|
|
352
|
+
await session.start({ agent, room: ctx.room });
|
|
353
|
+
logger.info("realtime.session.start", { ms: sinceConnect() });
|
|
354
|
+
bridge.publishEvent("status", { state: "listening" });
|
|
355
|
+
if (cfg.greeting.trim().length > 0) {
|
|
356
|
+
logger.info("realtime.session.greeting", { ms: sinceConnect() });
|
|
357
|
+
session.generateReply({ instructions: cfg.greeting });
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
return { entry };
|
|
361
|
+
}
|
|
362
|
+
//# sourceMappingURL=realtimeVoiceAgent.js.map
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-call context from LiveKit room metadata.
|
|
3
|
+
*
|
|
4
|
+
* The manager (e.g. a Lighthouse `/start` endpoint) pre-creates the room with
|
|
5
|
+
* `base64(JSON({ authToken, mcpContext }))` metadata, built from the caller's
|
|
6
|
+
* session. The worker reads it on join — nothing per-call comes from worker env.
|
|
7
|
+
* Returns the MCP `x-auth-token` and the base64(JSON) `x-context` the server
|
|
8
|
+
* expects.
|
|
9
|
+
*
|
|
10
|
+
* The metadata is untrusted input, so it is decoded with a zod schema rather
|
|
11
|
+
* than a trusted `JSON.parse` cast.
|
|
12
|
+
*
|
|
13
|
+
* See docs/features/livekit-voice-agent.md.
|
|
14
|
+
*/
|
|
15
|
+
import type { LiveKitRoomCallContext } from "../../types/index.js";
|
|
16
|
+
/**
|
|
17
|
+
* Decode `{ authToken, mcpContext }` from a room's base64(JSON) metadata.
|
|
18
|
+
*
|
|
19
|
+
* `authToken` may be empty (demo/guest, where the MCP server gates on the
|
|
20
|
+
* context's `demoMode`); `xContext` is the re-encoded base64(JSON) of
|
|
21
|
+
* `mcpContext`, or `""` when no context was supplied or the metadata is invalid.
|
|
22
|
+
*/
|
|
23
|
+
export declare function readCallContextFromRoom(roomMetadata: string | undefined): LiveKitRoomCallContext;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-call context from LiveKit room metadata.
|
|
3
|
+
*
|
|
4
|
+
* The manager (e.g. a Lighthouse `/start` endpoint) pre-creates the room with
|
|
5
|
+
* `base64(JSON({ authToken, mcpContext }))` metadata, built from the caller's
|
|
6
|
+
* session. The worker reads it on join — nothing per-call comes from worker env.
|
|
7
|
+
* Returns the MCP `x-auth-token` and the base64(JSON) `x-context` the server
|
|
8
|
+
* expects.
|
|
9
|
+
*
|
|
10
|
+
* The metadata is untrusted input, so it is decoded with a zod schema rather
|
|
11
|
+
* than a trusted `JSON.parse` cast.
|
|
12
|
+
*
|
|
13
|
+
* See docs/features/livekit-voice-agent.md.
|
|
14
|
+
*/
|
|
15
|
+
import { Buffer } from "node:buffer";
|
|
16
|
+
import { z } from "zod";
|
|
17
|
+
import { logger } from "../../utils/logger.js";
|
|
18
|
+
/** Shape the manager writes into room metadata. `mcpContext` is opaque here. */
|
|
19
|
+
const roomMetadataSchema = z.object({
|
|
20
|
+
authToken: z.string().optional(),
|
|
21
|
+
mcpContext: z.unknown().optional(),
|
|
22
|
+
});
|
|
23
|
+
/** Decode the base64(JSON) metadata string into an `unknown`, or `undefined`. */
|
|
24
|
+
function decodeBase64Json(encoded) {
|
|
25
|
+
try {
|
|
26
|
+
return JSON.parse(Buffer.from(encoded, "base64").toString("utf-8"));
|
|
27
|
+
}
|
|
28
|
+
catch (error) {
|
|
29
|
+
logger.error(`[RealtimeVoiceAgent] room metadata is not valid base64 JSON: ${String(error)}`);
|
|
30
|
+
return undefined;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Decode `{ authToken, mcpContext }` from a room's base64(JSON) metadata.
|
|
35
|
+
*
|
|
36
|
+
* `authToken` may be empty (demo/guest, where the MCP server gates on the
|
|
37
|
+
* context's `demoMode`); `xContext` is the re-encoded base64(JSON) of
|
|
38
|
+
* `mcpContext`, or `""` when no context was supplied or the metadata is invalid.
|
|
39
|
+
*/
|
|
40
|
+
export function readCallContextFromRoom(roomMetadata) {
|
|
41
|
+
const empty = { authToken: "", xContext: "" };
|
|
42
|
+
if (!roomMetadata) {
|
|
43
|
+
logger.warn("[RealtimeVoiceAgent] room has no metadata — MCP auth/context unavailable.");
|
|
44
|
+
return empty;
|
|
45
|
+
}
|
|
46
|
+
const decoded = roomMetadataSchema.safeParse(decodeBase64Json(roomMetadata));
|
|
47
|
+
if (!decoded.success) {
|
|
48
|
+
logger.error(`[RealtimeVoiceAgent] room metadata has unexpected shape: ${decoded.error.message}`);
|
|
49
|
+
return empty;
|
|
50
|
+
}
|
|
51
|
+
const { authToken, mcpContext } = decoded.data;
|
|
52
|
+
const xContext = mcpContext === undefined || mcpContext === null
|
|
53
|
+
? ""
|
|
54
|
+
: Buffer.from(JSON.stringify(mcpContext), "utf-8").toString("base64");
|
|
55
|
+
return { authToken: authToken ?? "", xContext };
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=roomContext.js.map
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LiveKit server-side room operations: create a room with metadata, and
|
|
3
|
+
* dispatch a named agent to a room.
|
|
4
|
+
*
|
|
5
|
+
* Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
|
|
6
|
+
* consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
|
|
7
|
+
* — they never depend on the SDK directly. Mirrors `mintJoinToken`.
|
|
8
|
+
*/
|
|
9
|
+
import type { LiveKitServerCredentials } from "../../types/index.js";
|
|
10
|
+
export declare function createVoiceRoom(req: LiveKitServerCredentials & {
|
|
11
|
+
room: string;
|
|
12
|
+
metadata?: string;
|
|
13
|
+
emptyTimeoutSeconds?: number;
|
|
14
|
+
departureTimeoutSeconds?: number;
|
|
15
|
+
}): Promise<void>;
|
|
16
|
+
/**
|
|
17
|
+
* Explicitly dispatch a named agent to a room. The long-lived worker registered
|
|
18
|
+
* under `agentName` receives the job and forks a child to run the call.
|
|
19
|
+
*/
|
|
20
|
+
export declare function dispatchVoiceAgent(req: LiveKitServerCredentials & {
|
|
21
|
+
room: string;
|
|
22
|
+
agentName: string;
|
|
23
|
+
metadata?: string;
|
|
24
|
+
}): Promise<void>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LiveKit server-side room operations: create a room with metadata, and
|
|
3
|
+
* dispatch a named agent to a room.
|
|
4
|
+
*
|
|
5
|
+
* Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
|
|
6
|
+
* consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
|
|
7
|
+
* — they never depend on the SDK directly. Mirrors `mintJoinToken`.
|
|
8
|
+
*/
|
|
9
|
+
const toHttpUrl = (url) => url.replace(/^ws/, "http");
|
|
10
|
+
export async function createVoiceRoom(req) {
|
|
11
|
+
const { RoomServiceClient } = await import("livekit-server-sdk");
|
|
12
|
+
const client = new RoomServiceClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
|
|
13
|
+
await client.createRoom({
|
|
14
|
+
name: req.room,
|
|
15
|
+
metadata: req.metadata ?? "",
|
|
16
|
+
emptyTimeout: req.emptyTimeoutSeconds ?? 300,
|
|
17
|
+
departureTimeout: req.departureTimeoutSeconds ?? 20,
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Explicitly dispatch a named agent to a room. The long-lived worker registered
|
|
22
|
+
* under `agentName` receives the job and forks a child to run the call.
|
|
23
|
+
*/
|
|
24
|
+
export async function dispatchVoiceAgent(req) {
|
|
25
|
+
const { AgentDispatchClient } = await import("livekit-server-sdk");
|
|
26
|
+
const client = new AgentDispatchClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
|
|
27
|
+
await client.createDispatch(req.room, req.agentName, {
|
|
28
|
+
metadata: req.metadata ?? "",
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=roomDispatch.js.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini function-calling schema sanitizer.
|
|
3
|
+
*
|
|
4
|
+
* Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
|
|
5
|
+
* accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
|
|
6
|
+
* `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
|
|
7
|
+
* every node gets a concrete `type`, unions collapse to their first concrete
|
|
8
|
+
* branch, and unsupported keywords are dropped.
|
|
9
|
+
*
|
|
10
|
+
* Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
|
|
11
|
+
* narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
|
|
15
|
+
* with a concrete `type` on every node.
|
|
16
|
+
*/
|
|
17
|
+
export declare function sanitizeSchema(node: unknown): Record<string, unknown>;
|
|
18
|
+
/** Tool parameters must be an object schema; force it and sanitize the tree. */
|
|
19
|
+
export declare function sanitizeToolParameters(schema: unknown): Record<string, unknown>;
|
|
20
|
+
/**
|
|
21
|
+
* Walk a (sanitized) schema and return the first node the google plugin would
|
|
22
|
+
* turn into `undefined` — which genai then crashes on. Returns a human-readable
|
|
23
|
+
* path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
|
|
24
|
+
* should always be `null`; if not, it names the exact offending path.
|
|
25
|
+
*/
|
|
26
|
+
export declare function findSchemaIssue(node: unknown, pathPrefix?: string): string | null;
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini function-calling schema sanitizer.
|
|
3
|
+
*
|
|
4
|
+
* Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
|
|
5
|
+
* accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
|
|
6
|
+
* `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
|
|
7
|
+
* every node gets a concrete `type`, unions collapse to their first concrete
|
|
8
|
+
* branch, and unsupported keywords are dropped.
|
|
9
|
+
*
|
|
10
|
+
* Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
|
|
11
|
+
* narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
|
|
12
|
+
*/
|
|
13
|
+
const GEMINI_TYPES = new Set([
|
|
14
|
+
"string",
|
|
15
|
+
"number",
|
|
16
|
+
"integer",
|
|
17
|
+
"boolean",
|
|
18
|
+
"array",
|
|
19
|
+
"object",
|
|
20
|
+
]);
|
|
21
|
+
function isRecord(value) {
|
|
22
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* The first concrete (non-`"null"`-typed) branch of an `anyOf`/`oneOf`/`allOf`
|
|
26
|
+
* union, or `undefined` when there is no union to collapse.
|
|
27
|
+
*/
|
|
28
|
+
function firstConcreteUnionBranch(schema) {
|
|
29
|
+
const union = schema.anyOf ?? schema.oneOf ?? schema.allOf;
|
|
30
|
+
if (!Array.isArray(union)) {
|
|
31
|
+
return undefined;
|
|
32
|
+
}
|
|
33
|
+
return union.find((branch) => isRecord(branch) && branch.type !== "null");
|
|
34
|
+
}
|
|
35
|
+
function resolveSchemaType(schema) {
|
|
36
|
+
if (typeof schema.type === "string") {
|
|
37
|
+
return GEMINI_TYPES.has(schema.type) ? schema.type : "string";
|
|
38
|
+
}
|
|
39
|
+
if (Array.isArray(schema.type)) {
|
|
40
|
+
const named = schema.type.find((entry) => typeof entry === "string" && entry !== "null");
|
|
41
|
+
if (named !== undefined && GEMINI_TYPES.has(named)) {
|
|
42
|
+
return named;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (isRecord(schema.properties)) {
|
|
46
|
+
return "object";
|
|
47
|
+
}
|
|
48
|
+
if (schema.items !== undefined) {
|
|
49
|
+
return "array";
|
|
50
|
+
}
|
|
51
|
+
return "string";
|
|
52
|
+
}
|
|
53
|
+
function sanitizeObjectMembers(schema, out) {
|
|
54
|
+
const properties = isRecord(schema.properties) ? schema.properties : {};
|
|
55
|
+
const sanitizedProperties = {};
|
|
56
|
+
for (const [key, value] of Object.entries(properties)) {
|
|
57
|
+
sanitizedProperties[key] = sanitizeSchema(value);
|
|
58
|
+
}
|
|
59
|
+
out.properties = sanitizedProperties;
|
|
60
|
+
if (Array.isArray(schema.required)) {
|
|
61
|
+
out.required = schema.required.filter((name) => typeof name === "string" && name in sanitizedProperties);
|
|
62
|
+
}
|
|
63
|
+
if (Object.keys(sanitizedProperties).length === 0) {
|
|
64
|
+
out.additionalProperties = true;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
|
|
69
|
+
* with a concrete `type` on every node.
|
|
70
|
+
*/
|
|
71
|
+
export function sanitizeSchema(node) {
|
|
72
|
+
if (!isRecord(node)) {
|
|
73
|
+
return { type: "string" };
|
|
74
|
+
}
|
|
75
|
+
const out = {};
|
|
76
|
+
if (typeof node.description === "string") {
|
|
77
|
+
out.description = node.description;
|
|
78
|
+
}
|
|
79
|
+
if (typeof node.type !== "string") {
|
|
80
|
+
const branch = firstConcreteUnionBranch(node);
|
|
81
|
+
if (branch !== undefined) {
|
|
82
|
+
const merged = sanitizeSchema(branch);
|
|
83
|
+
return out.description
|
|
84
|
+
? { ...merged, description: out.description }
|
|
85
|
+
: merged;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const type = resolveSchemaType(node);
|
|
89
|
+
out.type = type;
|
|
90
|
+
if (Array.isArray(node.enum)) {
|
|
91
|
+
out.enum = node.enum;
|
|
92
|
+
}
|
|
93
|
+
if (type === "object") {
|
|
94
|
+
sanitizeObjectMembers(node, out);
|
|
95
|
+
}
|
|
96
|
+
if (type === "array") {
|
|
97
|
+
out.items = sanitizeSchema(node.items);
|
|
98
|
+
}
|
|
99
|
+
return out;
|
|
100
|
+
}
|
|
101
|
+
/** Tool parameters must be an object schema; force it and sanitize the tree. */
|
|
102
|
+
export function sanitizeToolParameters(schema) {
|
|
103
|
+
const sanitized = sanitizeSchema(schema ?? {});
|
|
104
|
+
if (sanitized.type !== "object") {
|
|
105
|
+
return { type: "object", properties: {}, additionalProperties: true };
|
|
106
|
+
}
|
|
107
|
+
return sanitized;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Walk a (sanitized) schema and return the first node the google plugin would
|
|
111
|
+
* turn into `undefined` — which genai then crashes on. Returns a human-readable
|
|
112
|
+
* path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
|
|
113
|
+
* should always be `null`; if not, it names the exact offending path.
|
|
114
|
+
*/
|
|
115
|
+
export function findSchemaIssue(node, pathPrefix = "$") {
|
|
116
|
+
if (!isRecord(node)) {
|
|
117
|
+
return `${pathPrefix}: not an object schema`;
|
|
118
|
+
}
|
|
119
|
+
if (typeof node.type !== "string") {
|
|
120
|
+
return `${pathPrefix}: missing string "type"`;
|
|
121
|
+
}
|
|
122
|
+
if (node.type === "object") {
|
|
123
|
+
const properties = isRecord(node.properties) ? node.properties : undefined;
|
|
124
|
+
const isEmpty = properties === undefined || Object.keys(properties).length === 0;
|
|
125
|
+
if (isEmpty &&
|
|
126
|
+
(node.additionalProperties === undefined ||
|
|
127
|
+
node.additionalProperties === null)) {
|
|
128
|
+
return `${pathPrefix}: empty object schema without additionalProperties (plugin → undefined)`;
|
|
129
|
+
}
|
|
130
|
+
if (properties !== undefined) {
|
|
131
|
+
for (const [key, value] of Object.entries(properties)) {
|
|
132
|
+
const childIssue = findSchemaIssue(value, `${pathPrefix}.${key}`);
|
|
133
|
+
if (childIssue) {
|
|
134
|
+
return childIssue;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (node.type === "array") {
|
|
140
|
+
return findSchemaIssue(node.items, `${pathPrefix}[]`);
|
|
141
|
+
}
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=schemaSanitizer.js.map
|