@juspay/neurolink 9.71.0 → 9.73.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +330 -312
- package/dist/core/constants.d.ts +1 -0
- package/dist/core/constants.js +2 -0
- package/dist/core/toolRouting.d.ts +59 -0
- package/dist/core/toolRouting.js +232 -0
- package/dist/lib/core/constants.d.ts +1 -0
- package/dist/lib/core/constants.js +2 -0
- package/dist/lib/core/toolRouting.d.ts +59 -0
- package/dist/lib/core/toolRouting.js +233 -0
- package/dist/lib/neurolink.d.ts +31 -1
- package/dist/lib/neurolink.js +188 -1
- package/dist/lib/telemetry/attributes.js +3 -1
- package/dist/lib/types/config.d.ts +8 -0
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/livekit.d.ts +134 -0
- package/dist/lib/types/toolRouting.d.ts +91 -0
- package/dist/lib/types/toolRouting.js +19 -0
- package/dist/lib/voice/livekit/brain.js +1 -1
- package/dist/lib/voice/livekit/config.d.ts +12 -1
- package/dist/lib/voice/livekit/config.js +54 -0
- package/dist/lib/voice/livekit/eventBridge.js +4 -4
- package/dist/lib/voice/livekit/index.d.ts +9 -2
- package/dist/lib/voice/livekit/index.js +9 -2
- package/dist/lib/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/lib/voice/livekit/realtimeEventBridge.js +161 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/lib/voice/livekit/realtimeMcpTools.js +194 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/lib/voice/livekit/realtimeVoiceAgent.js +362 -0
- package/dist/lib/voice/livekit/roomContext.d.ts +23 -0
- package/dist/lib/voice/livekit/roomContext.js +57 -0
- package/dist/lib/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/lib/voice/livekit/roomDispatch.js +31 -0
- package/dist/lib/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/lib/voice/livekit/schemaSanitizer.js +144 -0
- package/dist/lib/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/lib/voice/livekit/vertexAuth.js +73 -0
- package/dist/lib/voice/livekit/voiceAgent.js +47 -37
- package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.js +64 -0
- package/dist/neurolink.d.ts +31 -1
- package/dist/neurolink.js +188 -1
- package/dist/telemetry/attributes.js +3 -1
- package/dist/types/config.d.ts +8 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/livekit.d.ts +134 -0
- package/dist/types/toolRouting.d.ts +91 -0
- package/dist/types/toolRouting.js +18 -0
- package/dist/voice/livekit/brain.js +1 -1
- package/dist/voice/livekit/config.d.ts +12 -1
- package/dist/voice/livekit/config.js +54 -0
- package/dist/voice/livekit/eventBridge.js +4 -4
- package/dist/voice/livekit/index.d.ts +9 -2
- package/dist/voice/livekit/index.js +9 -2
- package/dist/voice/livekit/realtimeEventBridge.d.ts +14 -0
- package/dist/voice/livekit/realtimeEventBridge.js +160 -0
- package/dist/voice/livekit/realtimeMcpTools.d.ts +31 -0
- package/dist/voice/livekit/realtimeMcpTools.js +193 -0
- package/dist/voice/livekit/realtimeVoiceAgent.d.ts +26 -0
- package/dist/voice/livekit/realtimeVoiceAgent.js +361 -0
- package/dist/voice/livekit/roomContext.d.ts +23 -0
- package/dist/voice/livekit/roomContext.js +56 -0
- package/dist/voice/livekit/roomDispatch.d.ts +24 -0
- package/dist/voice/livekit/roomDispatch.js +30 -0
- package/dist/voice/livekit/schemaSanitizer.d.ts +26 -0
- package/dist/voice/livekit/schemaSanitizer.js +143 -0
- package/dist/voice/livekit/vertexAuth.d.ts +30 -0
- package/dist/voice/livekit/vertexAuth.js +72 -0
- package/dist/voice/livekit/voiceAgent.js +47 -37
- package/dist/voice/livekit/voiceAgentWorker.d.ts +2 -0
- package/dist/voice/livekit/voiceAgentWorker.js +64 -0
- package/package.json +2 -1
package/dist/types/config.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ import type { ConversationMemoryConfig } from "./conversation.js";
|
|
|
9
9
|
import type { ObservabilityConfig } from "./observability.js";
|
|
10
10
|
import type { AuthProvider, AuthProviderType, AuthProviderConfig, Auth0Config, ClerkConfig, FirebaseConfig, SupabaseConfig, WorkOSConfig, BetterAuthConfig, JWTConfig, OAuth2Config, CognitoConfig, KeycloakConfig, AuthenticatedContext } from "./auth.js";
|
|
11
11
|
import type { NeurolinkCredentials } from "./providers.js";
|
|
12
|
+
import type { ToolRoutingConfig } from "./toolRouting.js";
|
|
12
13
|
/**
|
|
13
14
|
* Main NeuroLink configuration type
|
|
14
15
|
*/
|
|
@@ -66,6 +67,13 @@ export type NeurolinkConstructorConfig = {
|
|
|
66
67
|
* provider is preserved across the chain; only the model name changes.
|
|
67
68
|
*/
|
|
68
69
|
modelChain?: string[];
|
|
70
|
+
/**
|
|
71
|
+
* Pre-call tool routing: a cheap router LLM picks the tool servers
|
|
72
|
+
* relevant to each stream() turn and the unpicked servers' tools are
|
|
73
|
+
* dropped from the request via `excludeTools`. Fails open (all tools) on
|
|
74
|
+
* any router failure. See {@link ToolRoutingConfig}.
|
|
75
|
+
*/
|
|
76
|
+
toolRouting?: ToolRoutingConfig;
|
|
69
77
|
};
|
|
70
78
|
/**
|
|
71
79
|
* Configuration for MCP enhancement modules wired into generate()/stream() paths.
|
package/dist/types/index.d.ts
CHANGED
|
@@ -50,6 +50,7 @@ export * from "./stream.js";
|
|
|
50
50
|
export * from "./subscription.js";
|
|
51
51
|
export * from "./task.js";
|
|
52
52
|
export * from "./taskClassification.js";
|
|
53
|
+
export * from "./toolRouting.js";
|
|
53
54
|
export * from "./tools.js";
|
|
54
55
|
export * from "./voice.js";
|
|
55
56
|
export * from "./universalProviderOptions.js";
|
package/dist/types/index.js
CHANGED
|
@@ -51,6 +51,7 @@ export * from "./stream.js";
|
|
|
51
51
|
export * from "./subscription.js";
|
|
52
52
|
export * from "./task.js";
|
|
53
53
|
export * from "./taskClassification.js";
|
|
54
|
+
export * from "./toolRouting.js";
|
|
54
55
|
export * from "./tools.js";
|
|
55
56
|
export * from "./voice.js";
|
|
56
57
|
export * from "./universalProviderOptions.js";
|
package/dist/types/livekit.d.ts
CHANGED
|
@@ -155,6 +155,7 @@ export type LiveKitVoiceAgentConfig = {
|
|
|
155
155
|
conversationIdPrefix?: string;
|
|
156
156
|
/** Optional user id recorded alongside memory. */
|
|
157
157
|
userId?: string;
|
|
158
|
+
greeting?: string;
|
|
158
159
|
/** Silero VAD tuning (stricter = ignores background noise). */
|
|
159
160
|
vad?: LiveKitVadConfig;
|
|
160
161
|
/** Turn-detection tuning (VAD vs STT endpointing, delays). */
|
|
@@ -367,3 +368,136 @@ export type LiveKitEventBridgeHandle = {
|
|
|
367
368
|
/** Remove all listeners and stop publishing. Idempotent. */
|
|
368
369
|
dispose: () => void;
|
|
369
370
|
};
|
|
371
|
+
/**
|
|
372
|
+
* Credentials for LiveKit server-side REST calls (room create, agent dispatch).
|
|
373
|
+
* `url` accepts `ws(s)://` or `http(s)://`; helpers convert it to https.
|
|
374
|
+
*/
|
|
375
|
+
export type LiveKitServerCredentials = {
|
|
376
|
+
url: string;
|
|
377
|
+
apiKey: string;
|
|
378
|
+
apiSecret: string;
|
|
379
|
+
};
|
|
380
|
+
/**
|
|
381
|
+
* Realtime voice configuration resolved from the environment.
|
|
382
|
+
*
|
|
383
|
+
* In speech-to-speech mode one realtime model (Gemini Live on Vertex) does STT,
|
|
384
|
+
* reasoning, TTS, and turn detection — so there is no separate STT/TTS/VAD/EOU
|
|
385
|
+
* config. `resolveRealtimeVoiceConfig` fills every field from `process.env`
|
|
386
|
+
* (with defaults); `RealtimeVoiceAgentConfig` lets a caller override any of them.
|
|
387
|
+
*/
|
|
388
|
+
export type RealtimeVoiceConfig = {
|
|
389
|
+
/** Vertex project id (from VERTEX_PROJECT / GOOGLE_AUTH_* / GOOGLE_CLOUD_PROJECT_ID). */
|
|
390
|
+
project: string | undefined;
|
|
391
|
+
/** Vertex location; native-audio Live is served on `global`, not regionally. */
|
|
392
|
+
location: string;
|
|
393
|
+
/** Realtime model id (e.g. "gemini-live-2.5-flash"). */
|
|
394
|
+
model: string;
|
|
395
|
+
/** Optional Gemini voice name; omit for the plugin default. */
|
|
396
|
+
voice: string | undefined;
|
|
397
|
+
/** Response modality: "AUDIO" (native S2S) or "TEXT" (half-cascade). */
|
|
398
|
+
responseModality: string;
|
|
399
|
+
/** System prompt / instructions for the agent. */
|
|
400
|
+
systemPrompt: string;
|
|
401
|
+
/** Opening line the agent speaks on connect ("" disables). */
|
|
402
|
+
greeting: string;
|
|
403
|
+
/** Whether to bridge Lighthouse MCP tools as Gemini function tools. */
|
|
404
|
+
toolsEnabled: boolean;
|
|
405
|
+
/** Full URL of the MCP server the tools are bridged from. */
|
|
406
|
+
mcpUrl: string;
|
|
407
|
+
/** Grace period after the caller leaves before the job shuts down (ms). */
|
|
408
|
+
emptyRoomGraceMs: number;
|
|
409
|
+
/** Deadline for a participant to join before the job shuts down (ms). */
|
|
410
|
+
joinDeadlineMs: number;
|
|
411
|
+
/** How long a HITL confirmation waits before being treated as a decline (ms). */
|
|
412
|
+
hitlTimeoutMs: number;
|
|
413
|
+
/** Interval for the RSS/heap metrics log (ms). */
|
|
414
|
+
metricsIntervalMs: number;
|
|
415
|
+
};
|
|
416
|
+
/** A single log record handed to a `RealtimeVoiceAgentConfig.onLog` sink. */
|
|
417
|
+
export type RealtimeVoiceLogEntry = {
|
|
418
|
+
level: "debug" | "info" | "warn" | "error";
|
|
419
|
+
message: string;
|
|
420
|
+
timestamp: number;
|
|
421
|
+
data?: unknown;
|
|
422
|
+
};
|
|
423
|
+
export type RealtimeVoiceLogContext = {
|
|
424
|
+
room: string;
|
|
425
|
+
};
|
|
426
|
+
/**
|
|
427
|
+
* Options for `defineRealtimeVoiceAgent`. Every field is optional: omitted
|
|
428
|
+
* values fall back to `resolveRealtimeVoiceConfig()` (i.e. the environment), so
|
|
429
|
+
* a caller can use `defineRealtimeVoiceAgent()` with no arguments and configure
|
|
430
|
+
* everything via env.
|
|
431
|
+
*/
|
|
432
|
+
export type RealtimeVoiceAgentConfig = {
|
|
433
|
+
project?: string;
|
|
434
|
+
location?: string;
|
|
435
|
+
model?: string;
|
|
436
|
+
voice?: string;
|
|
437
|
+
responseModality?: string;
|
|
438
|
+
systemPrompt?: string;
|
|
439
|
+
greeting?: string;
|
|
440
|
+
/** MCP tool bridging overrides. */
|
|
441
|
+
tools?: {
|
|
442
|
+
enabled?: boolean;
|
|
443
|
+
mcpUrl?: string;
|
|
444
|
+
};
|
|
445
|
+
/** Data-channel topic for outbound events (default "ai-events"). */
|
|
446
|
+
eventsTopic?: string;
|
|
447
|
+
/** Data-channel topic for inbound control messages (default "ai-control"). */
|
|
448
|
+
controlTopic?: string;
|
|
449
|
+
/**
|
|
450
|
+
* Optional sink for the agent's own logs. When set, the realtime agent wires
|
|
451
|
+
* NeuroLink's logger to this callback for the duration of the call, so a host
|
|
452
|
+
* can forward worker logs into its logging pipeline. Each record is tagged
|
|
453
|
+
* with per-call context (the room name). Subject to the logger's level gate:
|
|
454
|
+
* without debug mode only `error` records are emitted (set `NEUROLINK_DEBUG`).
|
|
455
|
+
*/
|
|
456
|
+
onLog?: (entry: RealtimeVoiceLogEntry, ctx: RealtimeVoiceLogContext) => void;
|
|
457
|
+
};
|
|
458
|
+
/** Auth token + base64 MCP execution context decoded from a room's metadata. */
|
|
459
|
+
export type LiveKitRoomCallContext = {
|
|
460
|
+
/** Lighthouse access JWT used as `x-auth-token` to the MCP server. */
|
|
461
|
+
authToken: string;
|
|
462
|
+
/** base64(JSON) MCP execution context used as `x-context` (or "" if absent). */
|
|
463
|
+
xContext: string;
|
|
464
|
+
};
|
|
465
|
+
/** Publishes a single voice event envelope onto the room data channel. */
|
|
466
|
+
export type RealtimeEventPublisher = (type: LiveKitVoiceEventType, data: Record<string, unknown>) => void;
|
|
467
|
+
/** Requests a HITL confirmation and resolves to the user's decision. */
|
|
468
|
+
export type RealtimeConfirmationRequester = (toolName: string, args: Record<string, unknown>) => Promise<boolean>;
|
|
469
|
+
/** Handle returned by `attachRealtimeEventBridge`. */
|
|
470
|
+
export type RealtimeEventBridgeHandle = {
|
|
471
|
+
/** Publish an outbound event to the browser (data packet or text stream). */
|
|
472
|
+
publishEvent: RealtimeEventPublisher;
|
|
473
|
+
/** Open a HITL prompt and await the browser's decision (timeout = decline). */
|
|
474
|
+
requestConfirmation: RealtimeConfirmationRequester;
|
|
475
|
+
/** Remove the control-channel listener and clear pending confirmations. */
|
|
476
|
+
dispose: () => void;
|
|
477
|
+
};
|
|
478
|
+
/** Inputs to `attachRealtimeEventBridge`. */
|
|
479
|
+
export type RealtimeEventBridgeParams = {
|
|
480
|
+
/** The LiveKit room for this call (from the job context). */
|
|
481
|
+
room: LiveKitBridgeRoom;
|
|
482
|
+
/** HITL confirmation timeout in ms before a request is auto-declined. */
|
|
483
|
+
hitlTimeoutMs?: number;
|
|
484
|
+
/** Outbound events topic (default "ai-events"). */
|
|
485
|
+
eventsTopic?: string;
|
|
486
|
+
/** Inbound control topic (default "ai-control"). */
|
|
487
|
+
controlTopic?: string;
|
|
488
|
+
/** Payloads larger than this are sent via the chunked text stream (default 12000). */
|
|
489
|
+
maxInlineBytes?: number;
|
|
490
|
+
};
|
|
491
|
+
/** Inputs to `buildRealtimeMcpTools`. */
|
|
492
|
+
export type BuildRealtimeMcpToolsParams = {
|
|
493
|
+
/** Full URL of the MCP server (e.g. ".../ai/mcp/v2"). */
|
|
494
|
+
mcpUrl: string;
|
|
495
|
+
/** Lighthouse access JWT forwarded as `x-auth-token`. */
|
|
496
|
+
authToken: string;
|
|
497
|
+
/** base64(JSON) execution context forwarded as `x-context`. */
|
|
498
|
+
xContext: string;
|
|
499
|
+
/** Publishes tool start/result events to the browser. */
|
|
500
|
+
publishEvent: RealtimeEventPublisher;
|
|
501
|
+
/** Opens a HITL confirmation for destructive tools and awaits the decision. */
|
|
502
|
+
requestConfirmation: RealtimeConfirmationRequester;
|
|
503
|
+
};
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-call tool routing — configuration and catalog types.
|
|
3
|
+
*
|
|
4
|
+
* Host applications can register large numbers of custom tools (typically MCP
|
|
5
|
+
* server tools) whose names are prefixed with their server id
|
|
6
|
+
* (`${serverId}_${toolName}`). When tool routing is enabled, a cheap router
|
|
7
|
+
* LLM call runs once per `stream()` turn, picks the servers relevant to the
|
|
8
|
+
* user query, and the tools of every unpicked server are appended to the
|
|
9
|
+
* request's `excludeTools` denylist before the main model call.
|
|
10
|
+
*
|
|
11
|
+
* Denylist semantics are deliberate: the router only knows the declared
|
|
12
|
+
* server catalog — a strict subset of the real tool set. Excluding unpicked
|
|
13
|
+
* servers leaves NeuroLink's built-in direct tools, always-include servers,
|
|
14
|
+
* and any tools outside the catalog untouched. The whole mechanism fails
|
|
15
|
+
* open: any router failure resolves to an empty exclusion list (all tools),
|
|
16
|
+
* identical to routing being disabled.
|
|
17
|
+
*/
|
|
18
|
+
import type { GenerateOptions, GenerateResult } from "./generate.js";
|
|
19
|
+
/** One routable server as declared by the host application. */
|
|
20
|
+
export type ToolRoutingServerDescriptor = {
|
|
21
|
+
/**
|
|
22
|
+
* Server id. Must be the prefix used when the host registered the server's
|
|
23
|
+
* tools (`${id}_${toolName}`) — tool names are grouped by this prefix.
|
|
24
|
+
*/
|
|
25
|
+
id: string;
|
|
26
|
+
/** Routing-grade server description shown to the router LLM. */
|
|
27
|
+
description: string;
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* LLM settings for the router call. Fields omitted here fall back to the
|
|
31
|
+
* stream call's own provider/model/region, so the router uses the same model
|
|
32
|
+
* as the main chat call unless explicitly overridden.
|
|
33
|
+
*/
|
|
34
|
+
export type ToolRoutingModelConfig = {
|
|
35
|
+
provider?: string;
|
|
36
|
+
model?: string;
|
|
37
|
+
region?: string;
|
|
38
|
+
/** Router sampling temperature. Default: 0. */
|
|
39
|
+
temperature?: number;
|
|
40
|
+
};
|
|
41
|
+
/** Constructor-level configuration for pre-call tool routing. */
|
|
42
|
+
export type ToolRoutingConfig = {
|
|
43
|
+
/** Master switch. Routing runs only when true AND the server catalog is non-empty. */
|
|
44
|
+
enabled: boolean;
|
|
45
|
+
/**
|
|
46
|
+
* Routable server catalog. Hosts that only know their servers after
|
|
47
|
+
* constructing NeuroLink can supply it later via
|
|
48
|
+
* `neurolink.setToolRoutingServers()` instead.
|
|
49
|
+
*/
|
|
50
|
+
servers?: ToolRoutingServerDescriptor[];
|
|
51
|
+
/**
|
|
52
|
+
* Server ids whose tools are always kept and never offered to the router
|
|
53
|
+
* (e.g. utility / reasoning / chart servers every turn may need).
|
|
54
|
+
*/
|
|
55
|
+
alwaysIncludeServerIds?: string[];
|
|
56
|
+
/** Hard ceiling for the router LLM call before failing open. Default: 15000. */
|
|
57
|
+
timeoutMs?: number;
|
|
58
|
+
/** Router LLM override. Defaults to the stream call's provider/model/region at temperature 0. */
|
|
59
|
+
routerModel?: ToolRoutingModelConfig;
|
|
60
|
+
/**
|
|
61
|
+
* Override for the instruction text placed before the user query in the
|
|
62
|
+
* router prompt (role + task framing). When omitted, the SDK built-in
|
|
63
|
+
* default is used. The server catalog, user query, and output rules are
|
|
64
|
+
* always appended by the SDK regardless of this value.
|
|
65
|
+
*/
|
|
66
|
+
routerPromptPrefix?: string;
|
|
67
|
+
};
|
|
68
|
+
/** Catalog entry pairing a server descriptor with its registered tool names. */
|
|
69
|
+
export type ToolRoutingCatalogEntry = {
|
|
70
|
+
id: string;
|
|
71
|
+
description: string;
|
|
72
|
+
/** Registered tool names for this server, i.e. `${serverId}_${toolName}`. */
|
|
73
|
+
toolNames: string[];
|
|
74
|
+
};
|
|
75
|
+
/** Parameters for `resolveToolRoutingExclusions()`. */
|
|
76
|
+
export type ToolRoutingResolutionParams = {
|
|
77
|
+
/** Full catalog; always-include servers are filtered out internally. */
|
|
78
|
+
catalog: ToolRoutingCatalogEntry[];
|
|
79
|
+
/** Server ids never offered to the router. */
|
|
80
|
+
alwaysIncludeServerIds: string[];
|
|
81
|
+
/** Current user query (the stream input text, before memory enrichment). */
|
|
82
|
+
userQuery: string;
|
|
83
|
+
/** Instruction text placed before the user query. Defaults to the SDK built-in. */
|
|
84
|
+
routerPromptPrefix?: string;
|
|
85
|
+
/** Router LLM settings, already resolved against the stream call's options. */
|
|
86
|
+
routerModel: ToolRoutingModelConfig;
|
|
87
|
+
/** Timeout for the router call in milliseconds. */
|
|
88
|
+
timeoutMs: number;
|
|
89
|
+
/** Invokes the router LLM — `NeuroLink.generate` bound by the caller. */
|
|
90
|
+
generateFn: (options: GenerateOptions) => Promise<GenerateResult>;
|
|
91
|
+
};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pre-call tool routing — configuration and catalog types.
|
|
3
|
+
*
|
|
4
|
+
* Host applications can register large numbers of custom tools (typically MCP
|
|
5
|
+
* server tools) whose names are prefixed with their server id
|
|
6
|
+
* (`${serverId}_${toolName}`). When tool routing is enabled, a cheap router
|
|
7
|
+
* LLM call runs once per `stream()` turn, picks the servers relevant to the
|
|
8
|
+
* user query, and the tools of every unpicked server are appended to the
|
|
9
|
+
* request's `excludeTools` denylist before the main model call.
|
|
10
|
+
*
|
|
11
|
+
* Denylist semantics are deliberate: the router only knows the declared
|
|
12
|
+
* server catalog — a strict subset of the real tool set. Excluding unpicked
|
|
13
|
+
* servers leaves NeuroLink's built-in direct tools, always-include servers,
|
|
14
|
+
* and any tools outside the catalog untouched. The whole mechanism fails
|
|
15
|
+
* open: any router failure resolves to an empty exclusion list (all tools),
|
|
16
|
+
* identical to routing being disabled.
|
|
17
|
+
*/
|
|
18
|
+
export {};
|
|
@@ -61,7 +61,7 @@ export function createVoiceBrain(config) {
|
|
|
61
61
|
});
|
|
62
62
|
for await (const chunk of result.stream) {
|
|
63
63
|
if (signal?.aborted) {
|
|
64
|
-
logger.debug("
|
|
64
|
+
logger.debug("voice.brain.turnAborted", { reason: "signal-aborted" });
|
|
65
65
|
return;
|
|
66
66
|
}
|
|
67
67
|
const delta = extractTextDelta(chunk);
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* See docs/features/livekit-voice-agent.md.
|
|
9
9
|
*/
|
|
10
|
-
import type { LiveKitServerConfig, LiveKitBrainDefaults } from "../../types/index.js";
|
|
10
|
+
import type { LiveKitServerConfig, LiveKitBrainDefaults, RealtimeVoiceConfig } from "../../types/index.js";
|
|
11
11
|
/**
|
|
12
12
|
* Resolve LiveKit server connection settings from the environment.
|
|
13
13
|
*
|
|
@@ -23,6 +23,17 @@ export declare function resolveLiveKitServerConfig(): LiveKitServerConfig;
|
|
|
23
23
|
* `VOICE_LLM_MODEL`.
|
|
24
24
|
*/
|
|
25
25
|
export declare function resolveBrainDefaults(): LiveKitBrainDefaults;
|
|
26
|
+
/**
|
|
27
|
+
* Resolve the realtime (Gemini Live speech-to-speech) voice configuration from
|
|
28
|
+
* the environment.
|
|
29
|
+
*
|
|
30
|
+
* Every value has a safe default so a worker can run with no realtime-specific
|
|
31
|
+
* env at all. Vertex `project` is resolved from `VERTEX_PROJECT`, then the
|
|
32
|
+
* service-account project (`GOOGLE_AUTH_BREEZE_PROJECT_ID`), then
|
|
33
|
+
* `GOOGLE_CLOUD_PROJECT_ID`. The MCP URL is `VOICE_MCP_URL` if set, otherwise
|
|
34
|
+
* `${LIGHTHOUSE_URL}/ai/mcp/v2`.
|
|
35
|
+
*/
|
|
36
|
+
export declare function resolveRealtimeVoiceConfig(): RealtimeVoiceConfig;
|
|
26
37
|
/**
|
|
27
38
|
* Resolve the semantic end-of-utterance (EOU) turn-detection settings.
|
|
28
39
|
*
|
|
@@ -25,6 +25,22 @@ function readEnv(name, fallback) {
|
|
|
25
25
|
}
|
|
26
26
|
return fallback;
|
|
27
27
|
}
|
|
28
|
+
function readOptionalEnv(name) {
|
|
29
|
+
const value = process.env[name];
|
|
30
|
+
return typeof value === "string" && value.trim().length > 0
|
|
31
|
+
? value.trim()
|
|
32
|
+
: undefined;
|
|
33
|
+
}
|
|
34
|
+
function readNumberEnv(name, fallback) {
|
|
35
|
+
const raw = process.env[name];
|
|
36
|
+
if (typeof raw === "string" && raw.trim().length > 0) {
|
|
37
|
+
const parsed = Number(raw.trim());
|
|
38
|
+
if (Number.isFinite(parsed) && parsed > 0) {
|
|
39
|
+
return parsed;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return fallback;
|
|
43
|
+
}
|
|
28
44
|
/**
|
|
29
45
|
* Resolve LiveKit server connection settings from the environment.
|
|
30
46
|
*
|
|
@@ -51,6 +67,44 @@ export function resolveBrainDefaults() {
|
|
|
51
67
|
model: readEnv("VOICE_LLM_MODEL", DEFAULT_LLM_MODEL),
|
|
52
68
|
};
|
|
53
69
|
}
|
|
70
|
+
const DEFAULT_REALTIME_MODEL = "gemini-live-2.5-flash";
|
|
71
|
+
const DEFAULT_REALTIME_LOCATION = "global";
|
|
72
|
+
const DEFAULT_RESPONSE_MODALITY = "AUDIO";
|
|
73
|
+
const DEFAULT_SYSTEM_PROMPT = "You are a concise voice assistant for a merchant dashboard. Keep replies short and spoken.";
|
|
74
|
+
const DEFAULT_GREETING = "Briefly greet the merchant and ask how you can help with their store today.";
|
|
75
|
+
const DEFAULT_LIGHTHOUSE_URL = "http://localhost:5173";
|
|
76
|
+
const DEFAULT_MCP_PATH = "/ai/mcp/v2";
|
|
77
|
+
/**
|
|
78
|
+
* Resolve the realtime (Gemini Live speech-to-speech) voice configuration from
|
|
79
|
+
* the environment.
|
|
80
|
+
*
|
|
81
|
+
* Every value has a safe default so a worker can run with no realtime-specific
|
|
82
|
+
* env at all. Vertex `project` is resolved from `VERTEX_PROJECT`, then the
|
|
83
|
+
* service-account project (`GOOGLE_AUTH_BREEZE_PROJECT_ID`), then
|
|
84
|
+
* `GOOGLE_CLOUD_PROJECT_ID`. The MCP URL is `VOICE_MCP_URL` if set, otherwise
|
|
85
|
+
* `${LIGHTHOUSE_URL}/ai/mcp/v2`.
|
|
86
|
+
*/
|
|
87
|
+
export function resolveRealtimeVoiceConfig() {
|
|
88
|
+
const lighthouseUrl = readEnv("LIGHTHOUSE_URL", DEFAULT_LIGHTHOUSE_URL);
|
|
89
|
+
const mcpUrl = readEnv("VOICE_MCP_URL", `${lighthouseUrl}${DEFAULT_MCP_PATH}`);
|
|
90
|
+
return {
|
|
91
|
+
project: readOptionalEnv("VERTEX_PROJECT") ??
|
|
92
|
+
readOptionalEnv("GOOGLE_AUTH_BREEZE_PROJECT_ID") ??
|
|
93
|
+
readOptionalEnv("GOOGLE_CLOUD_PROJECT_ID"),
|
|
94
|
+
location: readEnv("VERTEX_LOCATION", DEFAULT_REALTIME_LOCATION),
|
|
95
|
+
model: readEnv("VOICE_LIVE_MODEL", DEFAULT_REALTIME_MODEL),
|
|
96
|
+
voice: readOptionalEnv("VOICE_S2S_VOICE"),
|
|
97
|
+
responseModality: readEnv("VOICE_RESPONSE_MODALITY", DEFAULT_RESPONSE_MODALITY).toUpperCase(),
|
|
98
|
+
systemPrompt: readEnv("VOICE_SYSTEM_PROMPT", DEFAULT_SYSTEM_PROMPT),
|
|
99
|
+
greeting: readEnv("VOICE_GREETING", DEFAULT_GREETING),
|
|
100
|
+
toolsEnabled: readEnv("VOICE_S2S_TOOLS", "").toLowerCase() === "true",
|
|
101
|
+
mcpUrl,
|
|
102
|
+
emptyRoomGraceMs: readNumberEnv("VOICE_EMPTY_ROOM_GRACE_MS", 30_000),
|
|
103
|
+
joinDeadlineMs: readNumberEnv("VOICE_JOIN_DEADLINE_MS", 60_000),
|
|
104
|
+
hitlTimeoutMs: readNumberEnv("VOICE_HITL_TIMEOUT_MS", 45_000),
|
|
105
|
+
metricsIntervalMs: readNumberEnv("VOICE_METRICS_INTERVAL_MS", 10_000),
|
|
106
|
+
};
|
|
107
|
+
}
|
|
54
108
|
const EOU_TRUTHY = new Set(["1", "true", "english", "en", "on", "yes"]);
|
|
55
109
|
/**
|
|
56
110
|
* Resolve the semantic end-of-utterance (EOU) turn-detection settings.
|
|
@@ -191,7 +191,7 @@ export async function attachEventBridge(params) {
|
|
|
191
191
|
const json = JSON.stringify(envelope);
|
|
192
192
|
const bytes = encoder.encode(json);
|
|
193
193
|
const onError = (transport) => (error) => {
|
|
194
|
-
logger.warn("
|
|
194
|
+
logger.warn("voice.bridge.publishFailed", {
|
|
195
195
|
transport,
|
|
196
196
|
type: event.type,
|
|
197
197
|
error: error instanceof Error ? error.message : String(error),
|
|
@@ -304,7 +304,7 @@ export async function attachEventBridge(params) {
|
|
|
304
304
|
parsed = JSON.parse(decoder.decode(payload));
|
|
305
305
|
}
|
|
306
306
|
catch (error) {
|
|
307
|
-
logger.warn("
|
|
307
|
+
logger.warn("voice.bridge.controlMalformed", {
|
|
308
308
|
error: error instanceof Error ? error.message : String(error),
|
|
309
309
|
});
|
|
310
310
|
return;
|
|
@@ -332,7 +332,7 @@ export async function attachEventBridge(params) {
|
|
|
332
332
|
};
|
|
333
333
|
room.on(RoomEvent.DataReceived, onData);
|
|
334
334
|
publish({ type: "status", data: { state: "listening" } });
|
|
335
|
-
logger.info("
|
|
335
|
+
logger.info("voice.bridge.attached", {
|
|
336
336
|
eventsTopic,
|
|
337
337
|
controlTopic,
|
|
338
338
|
filtered: include !== undefined,
|
|
@@ -353,7 +353,7 @@ export async function attachEventBridge(params) {
|
|
|
353
353
|
emitter.off(STREAM_END_EVENT, onStreamComplete);
|
|
354
354
|
emitter.off(STREAM_ERROR_EVENT, onStreamError);
|
|
355
355
|
room.off(RoomEvent.DataReceived, onData);
|
|
356
|
-
logger.info("
|
|
356
|
+
logger.info("voice.bridge.disposed", {});
|
|
357
357
|
},
|
|
358
358
|
};
|
|
359
359
|
}
|
|
@@ -8,8 +8,15 @@
|
|
|
8
8
|
* See docs/features/livekit-voice-agent.md.
|
|
9
9
|
*/
|
|
10
10
|
export { createVoiceBrain } from "./brain.js";
|
|
11
|
-
export { resolveLiveKitServerConfig, resolveBrainDefaults } from "./config.js";
|
|
11
|
+
export { resolveLiveKitServerConfig, resolveBrainDefaults, resolveRealtimeVoiceConfig, } from "./config.js";
|
|
12
12
|
export { attachEventBridge } from "./eventBridge.js";
|
|
13
|
+
export { attachRealtimeEventBridge } from "./realtimeEventBridge.js";
|
|
13
14
|
export { mintJoinToken } from "./tokens.js";
|
|
15
|
+
export { createVoiceRoom, dispatchVoiceAgent } from "./roomDispatch.js";
|
|
14
16
|
export { defineVoiceAgent } from "./voiceAgent.js";
|
|
15
|
-
export {
|
|
17
|
+
export { defineRealtimeVoiceAgent } from "./realtimeVoiceAgent.js";
|
|
18
|
+
export { startVoiceAgentWorker, startRealtimeVoiceAgentWorker, installVoiceWorkerProcessGuards, } from "./voiceAgentWorker.js";
|
|
19
|
+
export { ensureVertexAdc, clearGeminiApiKeyEnv } from "./vertexAuth.js";
|
|
20
|
+
export { readCallContextFromRoom } from "./roomContext.js";
|
|
21
|
+
export { sanitizeSchema, sanitizeToolParameters, findSchemaIssue, } from "./schemaSanitizer.js";
|
|
22
|
+
export { buildRealtimeMcpTools, mcpResultToText } from "./realtimeMcpTools.js";
|
|
@@ -8,8 +8,15 @@
|
|
|
8
8
|
* See docs/features/livekit-voice-agent.md.
|
|
9
9
|
*/
|
|
10
10
|
export { createVoiceBrain } from "./brain.js";
|
|
11
|
-
export { resolveLiveKitServerConfig, resolveBrainDefaults } from "./config.js";
|
|
11
|
+
export { resolveLiveKitServerConfig, resolveBrainDefaults, resolveRealtimeVoiceConfig, } from "./config.js";
|
|
12
12
|
export { attachEventBridge } from "./eventBridge.js";
|
|
13
|
+
export { attachRealtimeEventBridge } from "./realtimeEventBridge.js";
|
|
13
14
|
export { mintJoinToken } from "./tokens.js";
|
|
15
|
+
export { createVoiceRoom, dispatchVoiceAgent } from "./roomDispatch.js";
|
|
14
16
|
export { defineVoiceAgent } from "./voiceAgent.js";
|
|
15
|
-
export {
|
|
17
|
+
export { defineRealtimeVoiceAgent } from "./realtimeVoiceAgent.js";
|
|
18
|
+
export { startVoiceAgentWorker, startRealtimeVoiceAgentWorker, installVoiceWorkerProcessGuards, } from "./voiceAgentWorker.js";
|
|
19
|
+
export { ensureVertexAdc, clearGeminiApiKeyEnv } from "./vertexAuth.js";
|
|
20
|
+
export { readCallContextFromRoom } from "./roomContext.js";
|
|
21
|
+
export { sanitizeSchema, sanitizeToolParameters, findSchemaIssue, } from "./schemaSanitizer.js";
|
|
22
|
+
export { buildRealtimeMcpTools, mcpResultToText } from "./realtimeMcpTools.js";
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realtime data-channel event bridge.
|
|
3
|
+
*
|
|
4
|
+
* The cascaded bridge (`eventBridge.ts`) is driven by NeuroLink's event emitter.
|
|
5
|
+
*/
|
|
6
|
+
import type { RealtimeEventBridgeHandle, RealtimeEventBridgeParams } from "../../types/index.js";
|
|
7
|
+
/**
|
|
8
|
+
* Attach the realtime event bridge to a room.
|
|
9
|
+
*
|
|
10
|
+
* Returns `publishEvent` (worker → browser), `requestConfirmation` (HITL
|
|
11
|
+
* round-trip), and `dispose` (remove the control listener, decline anything
|
|
12
|
+
* still pending).
|
|
13
|
+
*/
|
|
14
|
+
export declare function attachRealtimeEventBridge(params: RealtimeEventBridgeParams): Promise<RealtimeEventBridgeHandle>;
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Realtime data-channel event bridge.
|
|
3
|
+
*
|
|
4
|
+
* The cascaded bridge (`eventBridge.ts`) is driven by NeuroLink's event emitter.
|
|
5
|
+
*/
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { logger } from "../../utils/logger.js";
|
|
8
|
+
const DEFAULT_EVENTS_TOPIC = "ai-events";
|
|
9
|
+
const DEFAULT_CONTROL_TOPIC = "ai-control";
|
|
10
|
+
const DEFAULT_MAX_INLINE_BYTES = 12_000;
|
|
11
|
+
const DEFAULT_HITL_TIMEOUT_MS = 45_000;
|
|
12
|
+
const hitlControlMessageSchema = z.object({
|
|
13
|
+
action: z.enum(["hitl:accept", "hitl:reject"]),
|
|
14
|
+
confirmationId: z.string(),
|
|
15
|
+
});
|
|
16
|
+
/**
|
|
17
|
+
* Attach the realtime event bridge to a room.
|
|
18
|
+
*
|
|
19
|
+
* Returns `publishEvent` (worker → browser), `requestConfirmation` (HITL
|
|
20
|
+
* round-trip), and `dispose` (remove the control listener, decline anything
|
|
21
|
+
* still pending).
|
|
22
|
+
*/
|
|
23
|
+
export async function attachRealtimeEventBridge(params) {
|
|
24
|
+
const { room } = params;
|
|
25
|
+
const eventsTopic = params.eventsTopic ?? DEFAULT_EVENTS_TOPIC;
|
|
26
|
+
const controlTopic = params.controlTopic ?? DEFAULT_CONTROL_TOPIC;
|
|
27
|
+
const maxInlineBytes = params.maxInlineBytes ?? DEFAULT_MAX_INLINE_BYTES;
|
|
28
|
+
const hitlTimeoutMs = params.hitlTimeoutMs ?? DEFAULT_HITL_TIMEOUT_MS;
|
|
29
|
+
const { RoomEvent } = await import("@livekit/rtc-node");
|
|
30
|
+
const encoder = new TextEncoder();
|
|
31
|
+
const decoder = new TextDecoder();
|
|
32
|
+
let seq = 0;
|
|
33
|
+
let disposed = false;
|
|
34
|
+
const startedAt = Date.now();
|
|
35
|
+
const publishEvent = (type, data) => {
|
|
36
|
+
if (disposed) {
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
const participant = room.localParticipant;
|
|
40
|
+
if (!participant) {
|
|
41
|
+
return;
|
|
42
|
+
}
|
|
43
|
+
try {
|
|
44
|
+
seq += 1;
|
|
45
|
+
const json = JSON.stringify({ seq, ts: Date.now(), type, data });
|
|
46
|
+
const bytes = encoder.encode(json);
|
|
47
|
+
const via = bytes.byteLength <= maxInlineBytes ? "data" : "stream";
|
|
48
|
+
logger.debug("realtime.bridge.publish", {
|
|
49
|
+
ms: Date.now() - startedAt,
|
|
50
|
+
seq,
|
|
51
|
+
type,
|
|
52
|
+
via,
|
|
53
|
+
bytes: bytes.byteLength,
|
|
54
|
+
});
|
|
55
|
+
if (via === "data") {
|
|
56
|
+
void participant.publishData(bytes, {
|
|
57
|
+
reliable: true,
|
|
58
|
+
topic: eventsTopic,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
else {
|
|
62
|
+
void participant.sendText(json, { topic: eventsTopic });
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
/* non-fatal — the UI bridge is best-effort */
|
|
67
|
+
}
|
|
68
|
+
};
|
|
69
|
+
// HITL: WRITE-labeled tools pause for user confirmation. We publish a
|
|
70
|
+
// `hitl-prompt`, the UI replies on the control topic, and we resolve the
|
|
71
|
+
// pending request by confirmationId. A timeout is treated as a decline so a
|
|
72
|
+
// turn can never hang waiting on the user.
|
|
73
|
+
const pendingHitl = new Map();
|
|
74
|
+
let hitlSeq = 0;
|
|
75
|
+
const onData = (...args) => {
|
|
76
|
+
const payload = args[0];
|
|
77
|
+
const topic = args[3];
|
|
78
|
+
if (topic !== controlTopic || !(payload instanceof Uint8Array)) {
|
|
79
|
+
return;
|
|
80
|
+
}
|
|
81
|
+
let raw;
|
|
82
|
+
try {
|
|
83
|
+
raw = JSON.parse(decoder.decode(payload));
|
|
84
|
+
}
|
|
85
|
+
catch {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const message = hitlControlMessageSchema.safeParse(raw);
|
|
89
|
+
if (!message.success) {
|
|
90
|
+
return;
|
|
91
|
+
}
|
|
92
|
+
const { action, confirmationId } = message.data;
|
|
93
|
+
const resolver = pendingHitl.get(confirmationId);
|
|
94
|
+
if (resolver) {
|
|
95
|
+
logger.debug("realtime.bridge.controlReceived", {
|
|
96
|
+
action,
|
|
97
|
+
confirmationId,
|
|
98
|
+
});
|
|
99
|
+
pendingHitl.delete(confirmationId);
|
|
100
|
+
resolver(action === "hitl:accept");
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
logger.warn("realtime.bridge.controlUnmatched", {
|
|
104
|
+
action,
|
|
105
|
+
confirmationId,
|
|
106
|
+
});
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
room.on(RoomEvent.DataReceived, onData);
|
|
110
|
+
const requestConfirmation = (toolName, args) => {
|
|
111
|
+
if (disposed) {
|
|
112
|
+
logger.warn("realtime.bridge.hitlDisposed", { toolName });
|
|
113
|
+
return Promise.resolve(false);
|
|
114
|
+
}
|
|
115
|
+
hitlSeq += 1;
|
|
116
|
+
const confirmationId = `hitl_${seq}_${hitlSeq}`;
|
|
117
|
+
publishEvent("hitl-prompt", {
|
|
118
|
+
confirmationId,
|
|
119
|
+
toolName,
|
|
120
|
+
actionType: `Run ${toolName}`,
|
|
121
|
+
arguments: args ?? {},
|
|
122
|
+
});
|
|
123
|
+
logger.info("realtime.bridge.hitlPrompt", { toolName, confirmationId });
|
|
124
|
+
return new Promise((resolve) => {
|
|
125
|
+
const timer = setTimeout(() => {
|
|
126
|
+
pendingHitl.delete(confirmationId);
|
|
127
|
+
logger.warn("realtime.bridge.hitlTimeout", {
|
|
128
|
+
toolName,
|
|
129
|
+
confirmationId,
|
|
130
|
+
timeoutMs: hitlTimeoutMs,
|
|
131
|
+
});
|
|
132
|
+
resolve(false);
|
|
133
|
+
}, hitlTimeoutMs);
|
|
134
|
+
pendingHitl.set(confirmationId, (approved) => {
|
|
135
|
+
clearTimeout(timer);
|
|
136
|
+
logger.info("realtime.bridge.hitlResolved", {
|
|
137
|
+
toolName,
|
|
138
|
+
confirmationId,
|
|
139
|
+
approved,
|
|
140
|
+
});
|
|
141
|
+
resolve(approved);
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
};
|
|
145
|
+
return {
|
|
146
|
+
publishEvent,
|
|
147
|
+
requestConfirmation,
|
|
148
|
+
dispose() {
|
|
149
|
+
if (disposed) {
|
|
150
|
+
return;
|
|
151
|
+
}
|
|
152
|
+
disposed = true;
|
|
153
|
+
room.off(RoomEvent.DataReceived, onData);
|
|
154
|
+
for (const [, resolver] of pendingHitl) {
|
|
155
|
+
resolver(false);
|
|
156
|
+
}
|
|
157
|
+
pendingHitl.clear();
|
|
158
|
+
},
|
|
159
|
+
};
|
|
160
|
+
}
|