@juspay/neurolink 9.72.0 → 9.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/browser/neurolink.min.js +51 -51
- package/dist/lib/types/livekit.d.ts +134 -0
- package/dist/lib/voice/livekit/brain.js +1 -1
- package/dist/lib/voice/livekit/config.d.ts +12 -1
- package/dist/lib/voice/livekit/config.js +54 -0
- package/dist/lib/voice/livekit/eventBridge.js +4 -4
- package/dist/lib/voice/livekit/index.d.ts +9 -2
- package/dist/lib/voice/livekit/index.js +9 -2
- package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
- package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
- package/dist/lib/voice/livekit/roomContext.js +57 -0
- package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/lib/voice/livekit/roomDispatch.js +31 -0
- package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
- package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/lib/voice/livekit/vertexAuth.js +73 -0
- package/dist/lib/voice/livekit/voiceAgent.js +47 -37
- package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
- package/dist/types/livekit.d.ts +134 -0
- package/dist/voice/livekit/brain.js +1 -1
- package/dist/voice/livekit/config.d.ts +12 -1
- package/dist/voice/livekit/config.js +54 -0
- package/dist/voice/livekit/eventBridge.js +4 -4
- package/dist/voice/livekit/index.d.ts +9 -2
- package/dist/voice/livekit/index.js +9 -2
- package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/voice/livekit/realtimeEventBridge.js +160 -0
- package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/voice/livekit/realtimeMcpTools.js +193 -0
- package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
- package/dist/voice/livekit/roomContext.d.ts +23 -0
- package/dist/voice/livekit/roomContext.js +56 -0
- package/dist/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/voice/livekit/roomDispatch.js +30 -0
- package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/voice/livekit/schemaSanitizer.js +143 -0
- package/dist/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/voice/livekit/vertexAuth.js +72 -0
- package/dist/voice/livekit/voiceAgent.js +47 -37
- package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/voice/livekit/voiceAgentWorker.js +64 -0
- package/package.json +2 -1
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LiveKit server-side room operations: create a room with metadata, and
|
|
3
|
+
* dispatch a named agent to a room.
|
|
4
|
+
*
|
|
5
|
+
* Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
|
|
6
|
+
* consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
|
|
7
|
+
* — they never depend on the SDK directly. Mirrors `mintJoinToken`.
|
|
8
|
+
*/
|
|
9
|
+
import type { LiveKitServerCredentials } from "../../types/index.js";
|
|
10
|
+
export declare function createVoiceRoom(req: LiveKitServerCredentials & {
|
|
11
|
+
room: string;
|
|
12
|
+
metadata?: string;
|
|
13
|
+
emptyTimeoutSeconds?: number;
|
|
14
|
+
departureTimeoutSeconds?: number;
|
|
15
|
+
}): Promise<void>;
|
|
16
|
+
/**
|
|
17
|
+
* Explicitly dispatch a named agent to a room. The long-lived worker registered
|
|
18
|
+
* under `agentName` receives the job and forks a child to run the call.
|
|
19
|
+
*/
|
|
20
|
+
export declare function dispatchVoiceAgent(req: LiveKitServerCredentials & {
|
|
21
|
+
room: string;
|
|
22
|
+
agentName: string;
|
|
23
|
+
metadata?: string;
|
|
24
|
+
}): Promise<void>;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LiveKit server-side room operations: create a room with metadata, and
|
|
3
|
+
* dispatch a named agent to a room.
|
|
4
|
+
*
|
|
5
|
+
* Wraps `livekit-server-sdk` (an optional dependency, imported dynamically) so
|
|
6
|
+
* consumers route all LiveKit *server* calls through `@juspay/neurolink/livekit`
|
|
7
|
+
* — they never depend on the SDK directly. Mirrors `mintJoinToken`.
|
|
8
|
+
*/
|
|
9
|
+
const toHttpUrl = (url) => url.replace(/^ws/, "http");
|
|
10
|
+
export async function createVoiceRoom(req) {
|
|
11
|
+
const { RoomServiceClient } = await import("livekit-server-sdk");
|
|
12
|
+
const client = new RoomServiceClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
|
|
13
|
+
await client.createRoom({
|
|
14
|
+
name: req.room,
|
|
15
|
+
metadata: req.metadata ?? "",
|
|
16
|
+
emptyTimeout: req.emptyTimeoutSeconds ?? 300,
|
|
17
|
+
departureTimeout: req.departureTimeoutSeconds ?? 20,
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Explicitly dispatch a named agent to a room. The long-lived worker registered
|
|
22
|
+
* under `agentName` receives the job and forks a child to run the call.
|
|
23
|
+
*/
|
|
24
|
+
export async function dispatchVoiceAgent(req) {
|
|
25
|
+
const { AgentDispatchClient } = await import("livekit-server-sdk");
|
|
26
|
+
const client = new AgentDispatchClient(toHttpUrl(req.url), req.apiKey, req.apiSecret);
|
|
27
|
+
await client.createDispatch(req.room, req.agentName, {
|
|
28
|
+
metadata: req.metadata ?? "",
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
//# sourceMappingURL=roomDispatch.js.map
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini function-calling schema sanitizer.
|
|
3
|
+
*
|
|
4
|
+
* Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
|
|
5
|
+
* accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
|
|
6
|
+
* `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
|
|
7
|
+
* every node gets a concrete `type`, unions collapse to their first concrete
|
|
8
|
+
* branch, and unsupported keywords are dropped.
|
|
9
|
+
*
|
|
10
|
+
* Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
|
|
11
|
+
* narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
|
|
12
|
+
*/
|
|
13
|
+
/**
|
|
14
|
+
* Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
|
|
15
|
+
* with a concrete `type` on every node.
|
|
16
|
+
*/
|
|
17
|
+
export declare function sanitizeSchema(node: unknown): Record<string, unknown>;
|
|
18
|
+
/** Tool parameters must be an object schema; force it and sanitize the tree. */
|
|
19
|
+
export declare function sanitizeToolParameters(schema: unknown): Record<string, unknown>;
|
|
20
|
+
/**
|
|
21
|
+
* Walk a (sanitized) schema and return the first node the google plugin would
|
|
22
|
+
* turn into `undefined` — which genai then crashes on. Returns a human-readable
|
|
23
|
+
* path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
|
|
24
|
+
* should always be `null`; if not, it names the exact offending path.
|
|
25
|
+
*/
|
|
26
|
+
export declare function findSchemaIssue(node: unknown, pathPrefix?: string): string | null;
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gemini function-calling schema sanitizer.
|
|
3
|
+
*
|
|
4
|
+
* Normalises an MCP tool's JSON Schema into the subset Gemini's function-calling
|
|
5
|
+
* accepts. `@google/genai`'s processJsonSchema crashes on untyped nodes,
|
|
6
|
+
* `$ref`/`$defs`, and some `anyOf`/`oneOf` shapes, so we rebuild a clean tree:
|
|
7
|
+
* every node gets a concrete `type`, unions collapse to their first concrete
|
|
8
|
+
* branch, and unsupported keywords are dropped.
|
|
9
|
+
*
|
|
10
|
+
* Pure, dependency-free, and assertion-free — values arrive as `unknown` and are
|
|
11
|
+
* narrowed with the `isRecord` guard. Safe to reuse for any Gemini tool path.
|
|
12
|
+
*/
|
|
13
|
+
const GEMINI_TYPES = new Set([
|
|
14
|
+
"string",
|
|
15
|
+
"number",
|
|
16
|
+
"integer",
|
|
17
|
+
"boolean",
|
|
18
|
+
"array",
|
|
19
|
+
"object",
|
|
20
|
+
]);
|
|
21
|
+
function isRecord(value) {
|
|
22
|
+
return typeof value === "object" && value !== null && !Array.isArray(value);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* The first concrete (non-`"null"`-typed) branch of an `anyOf`/`oneOf`/`allOf`
|
|
26
|
+
* union, or `undefined` when there is no union to collapse.
|
|
27
|
+
*/
|
|
28
|
+
function firstConcreteUnionBranch(schema) {
|
|
29
|
+
const union = schema.anyOf ?? schema.oneOf ?? schema.allOf;
|
|
30
|
+
if (!Array.isArray(union)) {
|
|
31
|
+
return undefined;
|
|
32
|
+
}
|
|
33
|
+
return union.find((branch) => isRecord(branch) && branch.type !== "null");
|
|
34
|
+
}
|
|
35
|
+
function resolveSchemaType(schema) {
|
|
36
|
+
if (typeof schema.type === "string") {
|
|
37
|
+
return GEMINI_TYPES.has(schema.type) ? schema.type : "string";
|
|
38
|
+
}
|
|
39
|
+
if (Array.isArray(schema.type)) {
|
|
40
|
+
const named = schema.type.find((entry) => typeof entry === "string" && entry !== "null");
|
|
41
|
+
if (named !== undefined && GEMINI_TYPES.has(named)) {
|
|
42
|
+
return named;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
if (isRecord(schema.properties)) {
|
|
46
|
+
return "object";
|
|
47
|
+
}
|
|
48
|
+
if (schema.items !== undefined) {
|
|
49
|
+
return "array";
|
|
50
|
+
}
|
|
51
|
+
return "string";
|
|
52
|
+
}
|
|
53
|
+
function sanitizeObjectMembers(schema, out) {
|
|
54
|
+
const properties = isRecord(schema.properties) ? schema.properties : {};
|
|
55
|
+
const sanitizedProperties = {};
|
|
56
|
+
for (const [key, value] of Object.entries(properties)) {
|
|
57
|
+
sanitizedProperties[key] = sanitizeSchema(value);
|
|
58
|
+
}
|
|
59
|
+
out.properties = sanitizedProperties;
|
|
60
|
+
if (Array.isArray(schema.required)) {
|
|
61
|
+
out.required = schema.required.filter((name) => typeof name === "string" && name in sanitizedProperties);
|
|
62
|
+
}
|
|
63
|
+
if (Object.keys(sanitizedProperties).length === 0) {
|
|
64
|
+
out.additionalProperties = true;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Rebuild a JSON Schema node into the Gemini-safe subset. Returns a fresh object
|
|
69
|
+
* with a concrete `type` on every node.
|
|
70
|
+
*/
|
|
71
|
+
export function sanitizeSchema(node) {
|
|
72
|
+
if (!isRecord(node)) {
|
|
73
|
+
return { type: "string" };
|
|
74
|
+
}
|
|
75
|
+
const out = {};
|
|
76
|
+
if (typeof node.description === "string") {
|
|
77
|
+
out.description = node.description;
|
|
78
|
+
}
|
|
79
|
+
if (typeof node.type !== "string") {
|
|
80
|
+
const branch = firstConcreteUnionBranch(node);
|
|
81
|
+
if (branch !== undefined) {
|
|
82
|
+
const merged = sanitizeSchema(branch);
|
|
83
|
+
return out.description
|
|
84
|
+
? { ...merged, description: out.description }
|
|
85
|
+
: merged;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const type = resolveSchemaType(node);
|
|
89
|
+
out.type = type;
|
|
90
|
+
if (Array.isArray(node.enum)) {
|
|
91
|
+
out.enum = node.enum;
|
|
92
|
+
}
|
|
93
|
+
if (type === "object") {
|
|
94
|
+
sanitizeObjectMembers(node, out);
|
|
95
|
+
}
|
|
96
|
+
if (type === "array") {
|
|
97
|
+
out.items = sanitizeSchema(node.items);
|
|
98
|
+
}
|
|
99
|
+
return out;
|
|
100
|
+
}
|
|
101
|
+
/** Tool parameters must be an object schema; force it and sanitize the tree. */
|
|
102
|
+
export function sanitizeToolParameters(schema) {
|
|
103
|
+
const sanitized = sanitizeSchema(schema ?? {});
|
|
104
|
+
if (sanitized.type !== "object") {
|
|
105
|
+
return { type: "object", properties: {}, additionalProperties: true };
|
|
106
|
+
}
|
|
107
|
+
return sanitized;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Walk a (sanitized) schema and return the first node the google plugin would
|
|
111
|
+
* turn into `undefined` — which genai then crashes on. Returns a human-readable
|
|
112
|
+
* path/reason, or `null` if the schema is safe. After `sanitizeSchema` this
|
|
113
|
+
* should always be `null`; if not, it names the exact offending path.
|
|
114
|
+
*/
|
|
115
|
+
export function findSchemaIssue(node, pathPrefix = "$") {
|
|
116
|
+
if (!isRecord(node)) {
|
|
117
|
+
return `${pathPrefix}: not an object schema`;
|
|
118
|
+
}
|
|
119
|
+
if (typeof node.type !== "string") {
|
|
120
|
+
return `${pathPrefix}: missing string "type"`;
|
|
121
|
+
}
|
|
122
|
+
if (node.type === "object") {
|
|
123
|
+
const properties = isRecord(node.properties) ? node.properties : undefined;
|
|
124
|
+
const isEmpty = properties === undefined || Object.keys(properties).length === 0;
|
|
125
|
+
if (isEmpty &&
|
|
126
|
+
(node.additionalProperties === undefined ||
|
|
127
|
+
node.additionalProperties === null)) {
|
|
128
|
+
return `${pathPrefix}: empty object schema without additionalProperties (plugin → undefined)`;
|
|
129
|
+
}
|
|
130
|
+
if (properties !== undefined) {
|
|
131
|
+
for (const [key, value] of Object.entries(properties)) {
|
|
132
|
+
const childIssue = findSchemaIssue(value, `${pathPrefix}.${key}`);
|
|
133
|
+
if (childIssue) {
|
|
134
|
+
return childIssue;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
if (node.type === "array") {
|
|
140
|
+
return findSchemaIssue(node.items, `${pathPrefix}[]`);
|
|
141
|
+
}
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=schemaSanitizer.js.map
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vertex authentication helpers for the realtime voice agent.
|
|
3
|
+
*
|
|
4
|
+
* The Gemini Live WebSocket authenticates to Vertex via Application Default
|
|
5
|
+
* Credentials (ADC). These helpers materialise ADC from the split
|
|
6
|
+
* `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
|
|
7
|
+
* any Gemini Developer API key from the environment so `@google/genai` uses
|
|
8
|
+
* Vertex/ADC auth (not an API key) for the realtime WebSocket.
|
|
9
|
+
*
|
|
10
|
+
* See docs/features/livekit-voice-agent.md.
|
|
11
|
+
*/
|
|
12
|
+
/**
|
|
13
|
+
* Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
|
|
14
|
+
*
|
|
15
|
+
* The google realtime plugin authenticates Vertex via ADC (it does not accept
|
|
16
|
+
* inline credentials), so this writes a temp service-account JSON and points
|
|
17
|
+
* `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
|
|
18
|
+
* No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
|
|
19
|
+
* fields are absent (auth then relies on ambient ADC).
|
|
20
|
+
*/
|
|
21
|
+
export declare function ensureVertexAdc(): void;
|
|
22
|
+
/**
|
|
23
|
+
* Force pure Vertex/ADC auth for the Gemini Live WebSocket.
|
|
24
|
+
*
|
|
25
|
+
* `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
|
|
26
|
+
* WebSocket auth even when `vertexai: true` and project/location are set, which
|
|
27
|
+
* Vertex rejects at the handshake (WS close 1006). The realtime worker only
|
|
28
|
+
* ever talks to Vertex, so remove these keys (only affects this process).
|
|
29
|
+
*/
|
|
30
|
+
export declare function clearGeminiApiKeyEnv(): void;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vertex authentication helpers for the realtime voice agent.
|
|
3
|
+
*
|
|
4
|
+
* The Gemini Live WebSocket authenticates to Vertex via Application Default
|
|
5
|
+
* Credentials (ADC). These helpers materialise ADC from the split
|
|
6
|
+
* `GOOGLE_AUTH_*` env fields when no credentials file is configured, and remove
|
|
7
|
+
* any Gemini Developer API key from the environment so `@google/genai` uses
|
|
8
|
+
* Vertex/ADC auth (not an API key) for the realtime WebSocket.
|
|
9
|
+
*
|
|
10
|
+
* See docs/features/livekit-voice-agent.md.
|
|
11
|
+
*/
|
|
12
|
+
import { mkdtempSync, rmSync, writeFileSync } from "node:fs";
|
|
13
|
+
import os from "node:os";
|
|
14
|
+
import path from "node:path";
|
|
15
|
+
import { logger } from "../../utils/logger.js";
|
|
16
|
+
/**
|
|
17
|
+
* Materialise Vertex ADC from the split `GOOGLE_AUTH_*` env fields.
|
|
18
|
+
*
|
|
19
|
+
* The google realtime plugin authenticates Vertex via ADC (it does not accept
|
|
20
|
+
* inline credentials), so this writes a temp service-account JSON and points
|
|
21
|
+
* `GOOGLE_APPLICATION_CREDENTIALS` at it — unless ADC is already configured.
|
|
22
|
+
* No-op when `GOOGLE_APPLICATION_CREDENTIALS` is set or the `GOOGLE_AUTH_*`
|
|
23
|
+
* fields are absent (auth then relies on ambient ADC).
|
|
24
|
+
*/
|
|
25
|
+
export function ensureVertexAdc() {
|
|
26
|
+
if (process.env.GOOGLE_APPLICATION_CREDENTIALS) {
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
const clientEmail = process.env.GOOGLE_AUTH_CLIENT_EMAIL;
|
|
30
|
+
const rawPrivateKey = process.env.GOOGLE_AUTH_PRIVATE_KEY;
|
|
31
|
+
if (!clientEmail || !rawPrivateKey) {
|
|
32
|
+
logger.warn("[RealtimeVoiceAgent] No GOOGLE_APPLICATION_CREDENTIALS and no GOOGLE_AUTH_* fields — Vertex auth will rely on ambient ADC.");
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
const credentials = {
|
|
36
|
+
type: process.env.GOOGLE_AUTH_TYPE ?? "service_account",
|
|
37
|
+
project_id: process.env.GOOGLE_AUTH_BREEZE_PROJECT_ID ??
|
|
38
|
+
process.env.GOOGLE_CLOUD_PROJECT_ID,
|
|
39
|
+
private_key_id: process.env.GOOGLE_AUTH_PRIVATE_KEY_ID,
|
|
40
|
+
private_key: rawPrivateKey.replace(/\\n/g, "\n"),
|
|
41
|
+
client_email: clientEmail,
|
|
42
|
+
token_uri: process.env.GOOGLE_AUTH_TOKEN_URI ??
|
|
43
|
+
"https://oauth2.googleapis.com/token",
|
|
44
|
+
};
|
|
45
|
+
const credentialsDir = mkdtempSync(path.join(os.tmpdir(), "vertex-adc-"));
|
|
46
|
+
const credentialsPath = path.join(credentialsDir, "adc.json");
|
|
47
|
+
writeFileSync(credentialsPath, JSON.stringify(credentials), {
|
|
48
|
+
mode: 0o600,
|
|
49
|
+
flag: "wx",
|
|
50
|
+
});
|
|
51
|
+
process.on("exit", () => {
|
|
52
|
+
rmSync(credentialsDir, { recursive: true, force: true });
|
|
53
|
+
});
|
|
54
|
+
process.env.GOOGLE_APPLICATION_CREDENTIALS = credentialsPath;
|
|
55
|
+
logger.info(`[RealtimeVoiceAgent] Vertex ADC written to ${credentialsPath} (project ${credentials.project_id}).`);
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Force pure Vertex/ADC auth for the Gemini Live WebSocket.
|
|
59
|
+
*
|
|
60
|
+
* `@google/genai` 1.52+ uses a Gemini Developer API key for the realtime
|
|
61
|
+
* WebSocket auth even when `vertexai: true` and project/location are set, which
|
|
62
|
+
* Vertex rejects at the handshake (WS close 1006). The realtime worker only
|
|
63
|
+
* ever talks to Vertex, so remove these keys (only affects this process).
|
|
64
|
+
*/
|
|
65
|
+
export function clearGeminiApiKeyEnv() {
|
|
66
|
+
for (const key of ["GOOGLE_API_KEY", "GOOGLE_AI_API_KEY", "GEMINI_API_KEY"]) {
|
|
67
|
+
if (process.env[key]) {
|
|
68
|
+
delete process.env[key];
|
|
69
|
+
logger.info(`[RealtimeVoiceAgent] cleared ${key} so genai uses Vertex/ADC auth (not API key) for the Live WS.`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=vertexAuth.js.map
|
|
@@ -192,9 +192,20 @@ export function defineVoiceAgent(config) {
|
|
|
192
192
|
async function entry(ctx) {
|
|
193
193
|
const entryStartedAt = Date.now();
|
|
194
194
|
await ctx.connect();
|
|
195
|
-
logger.debug(
|
|
196
|
-
|
|
197
|
-
|
|
195
|
+
logger.debug("voice.agent.roomJoined", {
|
|
196
|
+
room: ctx.room.name,
|
|
197
|
+
ms: Date.now() - entryStartedAt,
|
|
198
|
+
});
|
|
199
|
+
const { RoomEvent } = await import("@livekit/rtc-node");
|
|
200
|
+
ctx.room.on(RoomEvent.ParticipantDisconnected, () => {
|
|
201
|
+
if (ctx.room.remoteParticipants.size === 0) {
|
|
202
|
+
logger.info("voice.agent.participantLeft", {
|
|
203
|
+
room: ctx.room.name,
|
|
204
|
+
action: "shutdown",
|
|
205
|
+
});
|
|
206
|
+
ctx.shutdown("participant left");
|
|
207
|
+
}
|
|
208
|
+
});
|
|
198
209
|
let userStoppedSpeakingAt;
|
|
199
210
|
const neurolink = await config.createNeuroLink();
|
|
200
211
|
const brain = createVoiceBrain({
|
|
@@ -245,11 +256,6 @@ export function defineVoiceAgent(config) {
|
|
|
245
256
|
final: false,
|
|
246
257
|
});
|
|
247
258
|
}
|
|
248
|
-
/**
|
|
249
|
-
* Lock the user bubble at turn-end and reset the buffer for the next turn.
|
|
250
|
-
* `replacesPrevious` tells the client this committed turn absorbed a prior
|
|
251
|
-
* interrupted turn, so it should remove the orphaned previous user bubble.
|
|
252
|
-
*/
|
|
253
259
|
function commitUserTranscript(finalText, replacesPrevious = false) {
|
|
254
260
|
if (transcriptEmitter !== undefined) {
|
|
255
261
|
transcriptEmitter.emit("voice:user-transcript", {
|
|
@@ -274,7 +280,9 @@ export function defineVoiceAgent(config) {
|
|
|
274
280
|
pendingPrefix = "";
|
|
275
281
|
commitUserTranscript(promptText, hadPrefix);
|
|
276
282
|
if (userStoppedSpeakingAt !== undefined) {
|
|
277
|
-
logger.debug(
|
|
283
|
+
logger.debug("voice.agent.endpointingWaited", {
|
|
284
|
+
ms: Date.now() - userStoppedSpeakingAt,
|
|
285
|
+
});
|
|
278
286
|
}
|
|
279
287
|
return brainTurnStream(brain, promptText, conversationId, () => {
|
|
280
288
|
// Interrupted before producing any reply → carry this turn's text
|
|
@@ -299,7 +307,7 @@ export function defineVoiceAgent(config) {
|
|
|
299
307
|
};
|
|
300
308
|
if (eouTurnDetector !== undefined) {
|
|
301
309
|
turnHandling.turnDetection = eouTurnDetector;
|
|
302
|
-
logger.info("
|
|
310
|
+
logger.info("voice.agent.eouEnabled", { language: "english" });
|
|
303
311
|
}
|
|
304
312
|
else if (config.turn?.mode) {
|
|
305
313
|
turnHandling.turnDetection = config.turn.mode;
|
|
@@ -319,20 +327,11 @@ export function defineVoiceAgent(config) {
|
|
|
319
327
|
tts,
|
|
320
328
|
llm: new PlaceholderLLM(),
|
|
321
329
|
turnHandling,
|
|
322
|
-
// Do NOT speculatively call the LLM on preflight transcripts before the
|
|
323
|
-
// turn ends — with NeuroLink as the brain each call is a real LLM request,
|
|
324
|
-
// and it makes the agent feel like it responds while you're still talking.
|
|
325
330
|
preemptiveGeneration: false,
|
|
326
331
|
});
|
|
327
332
|
const agent = new NeuroLinkVoiceAgent({
|
|
328
333
|
instructions: config.systemPrompt ?? "",
|
|
329
334
|
});
|
|
330
|
-
// Inactivity watchdog: shut the per-call Job down after a stretch with no
|
|
331
|
-
// user or agent activity (mirrors Clairvoyance). On timeout `ctx.shutdown`
|
|
332
|
-
// runs the shutdown callbacks (disposing the bridge) and the Job process
|
|
333
|
-
// exits — freeing its RAM and the EOU model — while the browser observes a
|
|
334
|
-
// room disconnect. Reset on every interaction below. Configure via
|
|
335
|
-
// VOICE_INACTIVITY_TIMEOUT_MS (default 10 min); <= 0 disables the watchdog.
|
|
336
335
|
const inactivityTimeoutMs = Number(process.env.VOICE_INACTIVITY_TIMEOUT_MS ?? 600_000);
|
|
337
336
|
const inactivityEnabled = Number.isFinite(inactivityTimeoutMs) && inactivityTimeoutMs > 0;
|
|
338
337
|
let inactivityTimer;
|
|
@@ -350,7 +349,11 @@ export function defineVoiceAgent(config) {
|
|
|
350
349
|
clearInactivityTimer();
|
|
351
350
|
inactivityTimer = setTimeout(() => {
|
|
352
351
|
inactivityFired = true;
|
|
353
|
-
logger.info(
|
|
352
|
+
logger.info("voice.agent.inactivityTimeout", {
|
|
353
|
+
room: ctx.room.name,
|
|
354
|
+
timeoutMs: inactivityTimeoutMs,
|
|
355
|
+
action: "shutdown",
|
|
356
|
+
});
|
|
354
357
|
ctx.shutdown("inactivity timeout");
|
|
355
358
|
}, inactivityTimeoutMs);
|
|
356
359
|
// The watchdog must not, by itself, keep the event loop alive.
|
|
@@ -359,46 +362,53 @@ export function defineVoiceAgent(config) {
|
|
|
359
362
|
ctx.addShutdownCallback(async () => {
|
|
360
363
|
clearInactivityTimer();
|
|
361
364
|
});
|
|
362
|
-
|
|
363
|
-
|
|
365
|
+
if (process.env.LK_REALTIME_CONNECT_MODE === "true") {
|
|
366
|
+
ctx.addShutdownCallback(async () => {
|
|
367
|
+
const parentPid = process.ppid;
|
|
368
|
+
setTimeout(() => {
|
|
369
|
+
try {
|
|
370
|
+
if (typeof parentPid === "number" && parentPid > 1) {
|
|
371
|
+
process.kill(parentPid, "SIGTERM");
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
catch {
|
|
375
|
+
// Parent already gone — fall through to the hard exit below.
|
|
376
|
+
}
|
|
377
|
+
process.exit(0);
|
|
378
|
+
}, 500).unref?.();
|
|
379
|
+
});
|
|
380
|
+
}
|
|
364
381
|
session.on(voice.AgentSessionEventTypes.UserStateChanged, (ev) => {
|
|
365
382
|
noteActivity();
|
|
366
383
|
if (ev.oldState === "speaking" && ev.newState !== "speaking") {
|
|
367
384
|
userStoppedSpeakingAt = Date.now();
|
|
368
385
|
}
|
|
369
386
|
});
|
|
370
|
-
// Reset the inactivity watchdog on any agent speech/processing and on every
|
|
371
|
-
// committed conversation item (user turn or agent reply), so the timeout
|
|
372
|
-
// only fires during a genuine lull in the conversation.
|
|
373
387
|
session.on(voice.AgentSessionEventTypes.AgentStateChanged, () => {
|
|
374
388
|
noteActivity();
|
|
375
389
|
});
|
|
376
390
|
session.on(voice.AgentSessionEventTypes.ConversationItemAdded, () => {
|
|
377
391
|
noteActivity();
|
|
378
392
|
});
|
|
379
|
-
// Forward user STT transcripts to the data-channel bridge as a single
|
|
380
|
-
// live-updating bubble. `UserInputTranscribed` fires `isFinal: true` per
|
|
381
|
-
// finalized SEGMENT (several per turn), so we never forward those as the
|
|
382
|
-
// turn-final; `emitUserTranscriptSegment` accumulates them into the per-turn
|
|
383
|
-
// buffer and emits `final: false`. The lone `final: true` is sent from
|
|
384
|
-
// `llmNode` at the real turn boundary.
|
|
385
393
|
if (transcriptEventsEnabled) {
|
|
386
394
|
session.on(voice.AgentSessionEventTypes.UserInputTranscribed, (ev) => {
|
|
387
395
|
emitUserTranscriptSegment(ev.transcript, ev.isFinal);
|
|
388
396
|
});
|
|
389
397
|
}
|
|
390
|
-
logger.info("
|
|
398
|
+
logger.info("voice.agent.sessionStarting", {
|
|
391
399
|
room: ctx.room.name,
|
|
392
400
|
provider,
|
|
393
401
|
model,
|
|
394
402
|
});
|
|
395
403
|
await session.start({ agent, room: ctx.room });
|
|
396
|
-
|
|
397
|
-
|
|
404
|
+
if (config.greeting !== undefined && config.greeting.trim().length > 0) {
|
|
405
|
+
const greetingStream = brainTurnStream(brain, config.greeting, conversationId);
|
|
406
|
+
session.say(greetingStream, {
|
|
407
|
+
addToChatCtx: true,
|
|
408
|
+
allowInterruptions: true,
|
|
409
|
+
});
|
|
410
|
+
}
|
|
398
411
|
noteActivity();
|
|
399
|
-
// Data-channel event bridge: forward NeuroLink events (text, tool calls,
|
|
400
|
-
// results, HITL prompts, status) to the browser, and accept HITL responses
|
|
401
|
-
// back. Only when enabled and the instance exposes its event emitter.
|
|
402
412
|
if (config.events?.enabled === true && neurolink.getEventEmitter) {
|
|
403
413
|
const bridge = await attachEventBridge({
|
|
404
414
|
room: ctx.room,
|
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* See docs/features/livekit-voice-agent.md.
|
|
12
12
|
*/
|
|
13
13
|
import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
|
|
14
|
+
export declare function installVoiceWorkerProcessGuards(metricsIntervalMs?: number): void;
|
|
14
15
|
/**
|
|
15
16
|
* Launch the LiveKit voice agent worker.
|
|
16
17
|
*
|
|
@@ -25,3 +26,4 @@ import type { LiveKitWorkerLaunchOptions } from "../../types/index.js";
|
|
|
25
26
|
* ```
|
|
26
27
|
*/
|
|
27
28
|
export declare function startVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
|
|
29
|
+
export declare function startRealtimeVoiceAgentWorker(options: LiveKitWorkerLaunchOptions): Promise<void>;
|
|
@@ -11,8 +11,58 @@
|
|
|
11
11
|
* See docs/features/livekit-voice-agent.md.
|
|
12
12
|
*/
|
|
13
13
|
import { resolveEouTurnDetection, resolveLiveKitServerConfig, } from "./config.js";
|
|
14
|
+
import { logger } from "../../utils/logger.js";
|
|
14
15
|
const DEFAULT_AGENT_NAME = "neurolink-voice";
|
|
15
16
|
const EOU_METHOD_MULTILINGUAL = "lk_end_of_utterance_multilingual";
|
|
17
|
+
const IS_JOB_CHILD = process.argv.some((arg) => arg.includes("job_proc"));
|
|
18
|
+
const PROC_ROLE = IS_JOB_CHILD ? "job(child)" : "worker(parent)";
|
|
19
|
+
let processGuardsInstalled = false;
|
|
20
|
+
export function installVoiceWorkerProcessGuards(metricsIntervalMs = Number(process.env.VOICE_METRICS_INTERVAL_MS ?? 10000)) {
|
|
21
|
+
if (processGuardsInstalled) {
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
processGuardsInstalled = true;
|
|
25
|
+
const procInfo = {
|
|
26
|
+
role: PROC_ROLE,
|
|
27
|
+
pid: process.pid,
|
|
28
|
+
ppid: process.ppid,
|
|
29
|
+
};
|
|
30
|
+
process.on("uncaughtException", (error) => {
|
|
31
|
+
logger.error("voiceWorker.uncaughtException", {
|
|
32
|
+
...procInfo,
|
|
33
|
+
error: error?.stack ?? String(error),
|
|
34
|
+
});
|
|
35
|
+
if (IS_JOB_CHILD) {
|
|
36
|
+
setTimeout(() => process.exit(1), 1000).unref?.();
|
|
37
|
+
}
|
|
38
|
+
});
|
|
39
|
+
process.on("unhandledRejection", (reason) => {
|
|
40
|
+
logger.error("voiceWorker.unhandledRejection", {
|
|
41
|
+
...procInfo,
|
|
42
|
+
error: reason instanceof Error ? reason.stack : String(reason),
|
|
43
|
+
});
|
|
44
|
+
});
|
|
45
|
+
for (const signal of ["SIGTERM", "SIGINT", "SIGHUP"]) {
|
|
46
|
+
process.on(signal, () => {
|
|
47
|
+
logger.warn("voiceWorker.signal", { ...procInfo, signal });
|
|
48
|
+
setTimeout(() => process.exit(0), 1500);
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
if (Number.isFinite(metricsIntervalMs) && metricsIntervalMs > 0) {
|
|
52
|
+
const mb = (bytes) => Math.round((bytes / 1024 / 1024) * 10) / 10;
|
|
53
|
+
const timer = setInterval(() => {
|
|
54
|
+
const usage = process.memoryUsage();
|
|
55
|
+
logger.debug("voiceWorker.mem", {
|
|
56
|
+
...procInfo,
|
|
57
|
+
rssMb: mb(usage.rss),
|
|
58
|
+
heapUsedMb: mb(usage.heapUsed),
|
|
59
|
+
heapTotalMb: mb(usage.heapTotal),
|
|
60
|
+
externalMb: mb(usage.external),
|
|
61
|
+
});
|
|
62
|
+
}, metricsIntervalMs);
|
|
63
|
+
timer.unref?.();
|
|
64
|
+
}
|
|
65
|
+
}
|
|
16
66
|
/**
|
|
17
67
|
* Register the English EOU inference runner in the worker process.
|
|
18
68
|
*
|
|
@@ -55,4 +105,18 @@ export async function startVoiceAgentWorker(options) {
|
|
|
55
105
|
apiSecret: server.apiSecret,
|
|
56
106
|
}));
|
|
57
107
|
}
|
|
108
|
+
export async function startRealtimeVoiceAgentWorker(options) {
|
|
109
|
+
installVoiceWorkerProcessGuards();
|
|
110
|
+
if (process.env.LIVEKIT_EOU_TURN_DETECTION) {
|
|
111
|
+
delete process.env.LIVEKIT_EOU_TURN_DETECTION;
|
|
112
|
+
logger.info("realtime.worker.eouDisabled", {
|
|
113
|
+
reason: "s2s-in-model-turn-detection",
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
if (process.argv.includes("connect")) {
|
|
117
|
+
process.env.LK_REALTIME_CONNECT_MODE = "true";
|
|
118
|
+
logger.info("realtime.worker.connectMode", { enabled: true });
|
|
119
|
+
}
|
|
120
|
+
await startVoiceAgentWorker(options);
|
|
121
|
+
}
|
|
58
122
|
//# sourceMappingURL=voiceAgentWorker.js.map
|