@juspay/neurolink 9.69.3 → 9.70.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +355 -347
- package/dist/core/modules/GenerationHandler.js +75 -23
- package/dist/core/modules/structuredOutputPolicy.d.ts +28 -0
- package/dist/core/modules/structuredOutputPolicy.js +50 -0
- package/dist/lib/core/modules/GenerationHandler.js +75 -23
- package/dist/lib/core/modules/structuredOutputPolicy.d.ts +28 -0
- package/dist/lib/core/modules/structuredOutputPolicy.js +51 -0
- package/dist/lib/neurolink.js +58 -0
- package/dist/lib/providers/anthropic.js +34 -7
- package/dist/lib/providers/googleVertex.js +17 -2
- package/dist/lib/types/generate.d.ts +47 -19
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/livekit.d.ts +369 -0
- package/dist/lib/types/livekit.js +13 -0
- package/dist/lib/types/utilities.d.ts +16 -0
- package/dist/lib/utils/json/coerce.d.ts +10 -0
- package/dist/lib/utils/json/coerce.js +141 -0
- package/dist/lib/utils/json/extract.d.ts +10 -0
- package/dist/lib/utils/json/extract.js +61 -11
- package/dist/lib/utils/tokenLimits.d.ts +20 -0
- package/dist/lib/utils/tokenLimits.js +55 -0
- package/dist/lib/voice/livekit/brain.d.ts +21 -0
- package/dist/lib/voice/livekit/brain.js +75 -0
- package/dist/lib/voice/livekit/config.d.ts +41 -0
- package/dist/lib/voice/livekit/config.js +80 -0
- package/dist/lib/voice/livekit/eventBridge.d.ts +27 -0
- package/dist/lib/voice/livekit/eventBridge.js +360 -0
- package/dist/lib/voice/livekit/index.d.ts +15 -0
- package/dist/lib/voice/livekit/index.js +16 -0
- package/dist/lib/voice/livekit/tokens.d.ts +19 -0
- package/dist/lib/voice/livekit/tokens.js +51 -0
- package/dist/lib/voice/livekit/voiceAgent.d.ts +32 -0
- package/dist/lib/voice/livekit/voiceAgent.js +415 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +27 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.js +58 -0
- package/dist/neurolink.js +58 -0
- package/dist/providers/anthropic.js +34 -7
- package/dist/providers/googleVertex.js +17 -2
- package/dist/types/generate.d.ts +47 -19
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/livekit.d.ts +369 -0
- package/dist/types/livekit.js +12 -0
- package/dist/types/utilities.d.ts +16 -0
- package/dist/utils/json/coerce.d.ts +10 -0
- package/dist/utils/json/coerce.js +140 -0
- package/dist/utils/json/extract.d.ts +10 -0
- package/dist/utils/json/extract.js +61 -11
- package/dist/utils/tokenLimits.d.ts +20 -0
- package/dist/utils/tokenLimits.js +55 -0
- package/dist/voice/livekit/brain.d.ts +21 -0
- package/dist/voice/livekit/brain.js +74 -0
- package/dist/voice/livekit/config.d.ts +41 -0
- package/dist/voice/livekit/config.js +79 -0
- package/dist/voice/livekit/eventBridge.d.ts +27 -0
- package/dist/voice/livekit/eventBridge.js +359 -0
- package/dist/voice/livekit/index.d.ts +15 -0
- package/dist/voice/livekit/index.js +15 -0
- package/dist/voice/livekit/tokens.d.ts +19 -0
- package/dist/voice/livekit/tokens.js +50 -0
- package/dist/voice/livekit/voiceAgent.d.ts +32 -0
- package/dist/voice/livekit/voiceAgent.js +414 -0
- package/dist/voice/livekit/voiceAgentWorker.d.ts +27 -0
- package/dist/voice/livekit/voiceAgentWorker.js +57 -0
- package/package.json +23 -6
|
@@ -14,6 +14,7 @@ import { FileDetector } from "../utils/fileDetector.js";
|
|
|
14
14
|
import { processUnifiedFilesArray } from "../utils/messageBuilder.js";
|
|
15
15
|
import { logger } from "../utils/logger.js";
|
|
16
16
|
import { hasRestrictedOutputLimit, RESTRICTED_OUTPUT_TOKEN_LIMIT, } from "../utils/modelDetection.js";
|
|
17
|
+
import { resolveClaudeMaxTokens } from "../utils/tokenLimits.js";
|
|
17
18
|
import { validateApiKey, createVertexProjectConfig, createGoogleAuthConfig, } from "../utils/providerConfig.js";
|
|
18
19
|
import { convertZodToJsonSchema, inlineJsonSchema, ensureNestedSchemaTypes, } from "../utils/schemaConversion.js";
|
|
19
20
|
import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
|
|
@@ -2293,7 +2294,11 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2293
2294
|
: undefined;
|
|
2294
2295
|
const requestParams = {
|
|
2295
2296
|
model: modelName,
|
|
2296
|
-
|
|
2297
|
+
// Default to the model's real output ceiling (e.g. 64K for Sonnet 4.x)
|
|
2298
|
+
// instead of the legacy 4096, which silently truncated large structured
|
|
2299
|
+
// responses mid-JSON. resolveClaudeMaxTokens also clamps over-large
|
|
2300
|
+
// caller values so the native Vertex path never 400s.
|
|
2301
|
+
max_tokens: resolveClaudeMaxTokens(modelName, options.maxTokens),
|
|
2297
2302
|
messages: messages,
|
|
2298
2303
|
...(tools && tools.length > 0 && { tools }),
|
|
2299
2304
|
...(useFinalResultTool && { tool_choice: { type: "any" } }),
|
|
@@ -2813,7 +2818,8 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2813
2818
|
: undefined;
|
|
2814
2819
|
const requestParams = {
|
|
2815
2820
|
model: modelName,
|
|
2816
|
-
|
|
2821
|
+
// Default to the model's real output ceiling (see stream path note).
|
|
2822
|
+
max_tokens: resolveClaudeMaxTokens(modelName, options.maxTokens),
|
|
2817
2823
|
messages,
|
|
2818
2824
|
...(tools && tools.length > 0 && { tools }),
|
|
2819
2825
|
...(useFinalResultTool && { tool_choice: { type: "any" } }),
|
|
@@ -2835,6 +2841,10 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2835
2841
|
const allToolCalls = [];
|
|
2836
2842
|
let totalInputTokens = 0;
|
|
2837
2843
|
let totalOutputTokens = 0;
|
|
2844
|
+
// Track the final Anthropic stop_reason so we can surface finishReason
|
|
2845
|
+
// (notably "length" on token truncation) — the legacy native path always
|
|
2846
|
+
// reported "stop", hiding truncation from callers.
|
|
2847
|
+
let lastStopReason;
|
|
2838
2848
|
const currentMessages = [...messages];
|
|
2839
2849
|
while (step < maxSteps) {
|
|
2840
2850
|
step++;
|
|
@@ -2849,6 +2859,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2849
2859
|
// Update token counts
|
|
2850
2860
|
totalInputTokens += response.usage?.input_tokens || 0;
|
|
2851
2861
|
totalOutputTokens += response.usage?.output_tokens || 0;
|
|
2862
|
+
lastStopReason = response.stop_reason;
|
|
2852
2863
|
// Check if we need to handle tool use
|
|
2853
2864
|
const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
|
|
2854
2865
|
// Check for final_result tool call (for structured output)
|
|
@@ -2997,6 +3008,10 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2997
3008
|
const externalToolExecutions = toolExecutions.filter((te) => te.name !== "final_result");
|
|
2998
3009
|
const result = {
|
|
2999
3010
|
content: finalText,
|
|
3011
|
+
// Surface truncation: Anthropic "max_tokens" → unified "length" so the
|
|
3012
|
+
// SDK boundary can flag/observe incomplete structured output. Anything
|
|
3013
|
+
// else (end_turn / stop_sequence / tool_use) is a normal stop.
|
|
3014
|
+
finishReason: lastStopReason === "max_tokens" ? "length" : "stop",
|
|
3000
3015
|
provider: this.providerName,
|
|
3001
3016
|
model: modelName,
|
|
3002
3017
|
usage: {
|
package/dist/types/generate.d.ts
CHANGED
|
@@ -249,30 +249,32 @@ export type GenerateOptions = {
|
|
|
249
249
|
/**
|
|
250
250
|
* Zod schema for structured output validation
|
|
251
251
|
*
|
|
252
|
-
* @important Google Gemini
|
|
253
|
-
* Google
|
|
254
|
-
*
|
|
255
|
-
*
|
|
256
|
-
*
|
|
257
|
-
*
|
|
258
|
-
*
|
|
259
|
-
*
|
|
260
|
-
*
|
|
261
|
-
*
|
|
252
|
+
* @important Google GEMINI limitation (Gemini models only)
|
|
253
|
+
* Gemini models (Google AI Studio, and Vertex GEMINI models) cannot combine
|
|
254
|
+
* function calling with schema-enforced structured output — a Gemini API
|
|
255
|
+
* limitation ("Function calling with a response mime type:
|
|
256
|
+
* 'application/json' is unsupported"). Vertex CLAUDE models and all other
|
|
257
|
+
* providers support tools + schema simultaneously.
|
|
258
|
+
*
|
|
259
|
+
* You do NOT need to set `disableTools` yourself: when the combination is
|
|
260
|
+
* impossible, NeuroLink automatically falls back to text-mode JSON coercion
|
|
261
|
+
* (see `coerceJsonToSchema`), and `disableTools: true` remains available as
|
|
262
|
+
* an explicit override.
|
|
262
263
|
*
|
|
263
264
|
* @example
|
|
264
265
|
* ```typescript
|
|
265
|
-
* // ✅
|
|
266
|
+
* // ✅ Vertex + Claude: tools AND schema together are fully supported
|
|
266
267
|
* const result = await neurolink.generate({
|
|
267
268
|
* schema: MySchema,
|
|
268
269
|
* provider: "vertex",
|
|
269
|
-
*
|
|
270
|
+
* model: "claude-sonnet-4-6",
|
|
270
271
|
* });
|
|
271
272
|
*
|
|
272
|
-
* // ✅
|
|
273
|
+
* // ✅ Gemini + tools: SDK auto-falls back to coerced text-mode JSON
|
|
273
274
|
* const result = await neurolink.generate({
|
|
274
275
|
* schema: MySchema,
|
|
275
|
-
* provider: "
|
|
276
|
+
* provider: "google-ai",
|
|
277
|
+
* model: "gemini-2.5-pro",
|
|
276
278
|
* });
|
|
277
279
|
* ```
|
|
278
280
|
*
|
|
@@ -300,16 +302,18 @@ export type GenerateOptions = {
|
|
|
300
302
|
/**
|
|
301
303
|
* Disable tool execution (including built-in tools)
|
|
302
304
|
*
|
|
303
|
-
*
|
|
304
|
-
*
|
|
305
|
-
*
|
|
305
|
+
* Optional with schemas: the tools↔schema exclusion applies only to Google
|
|
306
|
+
* GEMINI models (Google AI Studio / Vertex Gemini — a Gemini API
|
|
307
|
+
* limitation), and NeuroLink handles it automatically by falling back to
|
|
308
|
+
* text-mode JSON coercion. Vertex CLAUDE models support tools + schema
|
|
309
|
+
* together. Set this only when you explicitly want a tool-free call.
|
|
306
310
|
*
|
|
307
311
|
* @example
|
|
308
312
|
* ```typescript
|
|
309
|
-
* //
|
|
313
|
+
* // Explicit override: schema-only call with no tools at all
|
|
310
314
|
* await neurolink.generate({
|
|
311
315
|
* schema: MySchema,
|
|
312
|
-
* provider: "
|
|
316
|
+
* provider: "google-ai",
|
|
313
317
|
* disableTools: true
|
|
314
318
|
* });
|
|
315
319
|
* ```
|
|
@@ -551,6 +555,13 @@ export type AdditionalMemoryUser = {
|
|
|
551
555
|
*/
|
|
552
556
|
export type GenerateResult = {
|
|
553
557
|
content: string;
|
|
558
|
+
/**
|
|
559
|
+
* Parsed structured object when a `schema` was requested. Populated from
|
|
560
|
+
* AI-SDK experimental_output, or from text-mode coercion (balanced-scan +
|
|
561
|
+
* jsonrepair). Prefer this over JSON.parse(content) — it never requires the
|
|
562
|
+
* caller to re-parse hand-escaped model text.
|
|
563
|
+
*/
|
|
564
|
+
structuredData?: unknown;
|
|
554
565
|
outputs?: {
|
|
555
566
|
text: string;
|
|
556
567
|
};
|
|
@@ -638,6 +649,17 @@ export type GenerateResult = {
|
|
|
638
649
|
provider?: string;
|
|
639
650
|
model?: string;
|
|
640
651
|
finishReason?: string;
|
|
652
|
+
/**
|
|
653
|
+
* True when the schema JSON in `content`/`structuredData` was repaired from
|
|
654
|
+
* malformed model text (jsonrepair ran). The result is still valid JSON.
|
|
655
|
+
*/
|
|
656
|
+
jsonRepaired?: boolean;
|
|
657
|
+
/**
|
|
658
|
+
* True when the schema JSON appears truncated — the model hit the output
|
|
659
|
+
* token cap (finishReason="length") or the recovered object came from an
|
|
660
|
+
* unclosed span. `structuredData` may be incomplete; raise `maxTokens`.
|
|
661
|
+
*/
|
|
662
|
+
jsonTruncated?: boolean;
|
|
641
663
|
usage?: TokenUsage;
|
|
642
664
|
responseTime?: number;
|
|
643
665
|
toolCalls?: Array<{
|
|
@@ -1090,7 +1112,13 @@ export type TextGenerationOptions = {
|
|
|
1090
1112
|
*/
|
|
1091
1113
|
export type TextGenerationResult = {
|
|
1092
1114
|
content: string;
|
|
1115
|
+
/** Parsed structured object when a `schema` was requested (see GenerateResult.structuredData). */
|
|
1116
|
+
structuredData?: unknown;
|
|
1093
1117
|
finishReason?: string;
|
|
1118
|
+
/** True when the schema JSON was repaired from malformed model text. */
|
|
1119
|
+
jsonRepaired?: boolean;
|
|
1120
|
+
/** True when the schema JSON appears truncated (output hit the token cap). */
|
|
1121
|
+
jsonTruncated?: boolean;
|
|
1094
1122
|
provider?: string;
|
|
1095
1123
|
model?: string;
|
|
1096
1124
|
usage?: TokenUsage;
|
package/dist/types/index.d.ts
CHANGED
package/dist/types/index.js
CHANGED
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the LiveKit voice agent integration.
|
|
3
|
+
*
|
|
4
|
+
* The integration uses LiveKit (WebRTC transport, VAD, turn detection,
|
|
5
|
+
* interruption, worker-per-call scaling) as the real-time loop, and NeuroLink
|
|
6
|
+
* as the brain (LLM, tools, memory). These types describe the brain seam, the
|
|
7
|
+
* worker configuration, and the join-token request — all transport-agnostic
|
|
8
|
+
* except where a LiveKit-specific concept is named explicitly.
|
|
9
|
+
*
|
|
10
|
+
* See docs/features/livekit-voice-agent.md.
|
|
11
|
+
*/
|
|
12
|
+
import type { NeuroLinkEvents, TypedEventEmitter } from "./common.js";
|
|
13
|
+
import type { StreamOptions, StreamResult } from "./stream.js";
|
|
14
|
+
/**
|
|
15
|
+
* Minimal structural shape of the NeuroLink instance the brain depends on.
|
|
16
|
+
*
|
|
17
|
+
* Declared structurally (rather than importing the `NeuroLink` class) so the
|
|
18
|
+
* brain layer stays decoupled from SDK construction and can be unit-tested with
|
|
19
|
+
* a lightweight stub. The real `NeuroLink` instance satisfies this shape.
|
|
20
|
+
*
|
|
21
|
+
* `getEventEmitter` is optional so lightweight stubs remain valid; the real
|
|
22
|
+
* `NeuroLink` instance provides it, and the data-channel event bridge uses it
|
|
23
|
+
* to forward tool/text/HITL events to the browser.
|
|
24
|
+
*/
|
|
25
|
+
export type LiveKitNeuroLinkStreamer = {
|
|
26
|
+
stream: (options: StreamOptions) => Promise<StreamResult>;
|
|
27
|
+
getEventEmitter?: () => TypedEventEmitter<NeuroLinkEvents>;
|
|
28
|
+
};
|
|
29
|
+
/**
|
|
30
|
+
* Configuration for the transport-agnostic voice brain.
|
|
31
|
+
*
|
|
32
|
+
* The brain owns the conversation: it calls `neurolink.stream()` with a stable
|
|
33
|
+
* `conversationId` so NeuroLink's memory layer is the source of truth, and it
|
|
34
|
+
* leaves tool-calling to the NeuroLink instance.
|
|
35
|
+
*/
|
|
36
|
+
export type LiveKitBrainConfig = {
|
|
37
|
+
/** Configured NeuroLink instance (memory + tools registered on it). */
|
|
38
|
+
neurolink: LiveKitNeuroLinkStreamer;
|
|
39
|
+
/** LLM provider name passed to `stream()` (e.g. "bedrock"). */
|
|
40
|
+
provider?: string;
|
|
41
|
+
/** LLM model name passed to `stream()` (e.g. "claude-sonnet-4-6"). */
|
|
42
|
+
model?: string;
|
|
43
|
+
/** System prompt applied to every turn. */
|
|
44
|
+
systemPrompt?: string;
|
|
45
|
+
/** Sampling temperature for spoken-style responses. */
|
|
46
|
+
temperature?: number;
|
|
47
|
+
/** Upper bound on tokens per turn. */
|
|
48
|
+
maxTokens?: number;
|
|
49
|
+
/** Optional user identifier recorded alongside memory. */
|
|
50
|
+
userId?: string;
|
|
51
|
+
};
|
|
52
|
+
/** A single user turn handed to the brain. */
|
|
53
|
+
export type LiveKitBrainTurn = {
|
|
54
|
+
/** Final transcript of the user's utterance. */
|
|
55
|
+
transcript: string;
|
|
56
|
+
/** Stable conversation id keying NeuroLink memory for this session. */
|
|
57
|
+
conversationId: string;
|
|
58
|
+
/** Cancellation signal; aborting stops the in-flight LLM and tool calls. */
|
|
59
|
+
signal?: AbortSignal;
|
|
60
|
+
};
|
|
61
|
+
/**
|
|
62
|
+
* The brain's public surface: stream the assistant reply as text deltas.
|
|
63
|
+
* The transport layer converts these deltas into audio (TTS).
|
|
64
|
+
*/
|
|
65
|
+
export type LiveKitVoiceBrain = {
|
|
66
|
+
streamReply: (turn: LiveKitBrainTurn) => AsyncGenerator<string, void, unknown>;
|
|
67
|
+
};
|
|
68
|
+
/** Speech-to-text plugin selection for the LiveKit worker. */
|
|
69
|
+
export type LiveKitSttConfig = {
|
|
70
|
+
provider: string;
|
|
71
|
+
model?: string;
|
|
72
|
+
language?: string;
|
|
73
|
+
/**
|
|
74
|
+
* Soniox only: maximum delay (ms) between speech cessation and the STT
|
|
75
|
+
* endpoint. Raise it so Soniox does not finalize on short pauses — that lets
|
|
76
|
+
* VAD silence (not the STT endpoint) decide when the turn ends.
|
|
77
|
+
*/
|
|
78
|
+
maxEndpointDelayMs?: number;
|
|
79
|
+
};
|
|
80
|
+
/** Text-to-speech plugin selection for the LiveKit worker. */
|
|
81
|
+
export type LiveKitTtsConfig = {
|
|
82
|
+
provider: string;
|
|
83
|
+
voice?: string;
|
|
84
|
+
model?: string;
|
|
85
|
+
};
|
|
86
|
+
/**
|
|
87
|
+
* Silero VAD tuning. Stricter values reject background noise (higher threshold,
|
|
88
|
+
* longer minimum speech). Durations are in seconds.
|
|
89
|
+
*/
|
|
90
|
+
export type LiveKitVadConfig = {
|
|
91
|
+
/** Probability cutoff for "this is speech" (default 0.6). Higher = stricter. */
|
|
92
|
+
activationThreshold?: number;
|
|
93
|
+
/** Minimum speech length before a turn starts, seconds (default 0.2). */
|
|
94
|
+
minSpeechDuration?: number;
|
|
95
|
+
/** Silence before a turn ends, seconds (default 0.6) — tolerates pauses. */
|
|
96
|
+
minSilenceDuration?: number;
|
|
97
|
+
};
|
|
98
|
+
/**
|
|
99
|
+
* Turn-detection (end-of-utterance) tuning.
|
|
100
|
+
*
|
|
101
|
+
* `mode` selects what decides the user's turn is over:
|
|
102
|
+
* - `"vad"` — Silero VAD silence (see `LiveKitVadConfig.minSilenceDuration`).
|
|
103
|
+
* Tolerates natural mid-sentence pauses; the turn only ends after a full
|
|
104
|
+
* silence window. This mirrors Clairvoyance's behavior.
|
|
105
|
+
* - `"stt"` — the STT provider's own endpoint detection (e.g. Soniox). Often
|
|
106
|
+
* much faster/aggressive — short pauses can prematurely split one utterance
|
|
107
|
+
* into several turns.
|
|
108
|
+
* - `"realtime_llm"` / `"manual"` — advanced/manual strategies.
|
|
109
|
+
*
|
|
110
|
+
* `minEndpointingDelay` / `maxEndpointingDelay` are the framework's endpointing
|
|
111
|
+
* window in milliseconds (in VAD mode the effective end delay is
|
|
112
|
+
* `max(VAD silence, minEndpointingDelay)`).
|
|
113
|
+
*/
|
|
114
|
+
export type LiveKitTurnConfig = {
|
|
115
|
+
mode?: "stt" | "vad" | "realtime_llm" | "manual";
|
|
116
|
+
minEndpointingDelay?: number;
|
|
117
|
+
maxEndpointingDelay?: number;
|
|
118
|
+
};
|
|
119
|
+
/**
|
|
120
|
+
* Interruption (barge-in) tuning. Requiring real words / a minimum duration
|
|
121
|
+
* stops background noise from cutting off the assistant.
|
|
122
|
+
*/
|
|
123
|
+
export type LiveKitInterruptionConfig = {
|
|
124
|
+
/** Minimum recognized words to count as an interruption (default 2). */
|
|
125
|
+
minWords?: number;
|
|
126
|
+
/** Minimum audio duration to count as an interruption, ms (default 600). */
|
|
127
|
+
minDuration?: number;
|
|
128
|
+
};
|
|
129
|
+
/**
|
|
130
|
+
* Options for `defineVoiceAgent` — the agent definition placed as the default
|
|
131
|
+
* export of the worker entry file.
|
|
132
|
+
*
|
|
133
|
+
* LiveKit runs each call as a Job in its own child process and re-imports the
|
|
134
|
+
* entry file there, so the NeuroLink instance cannot be passed as a live object
|
|
135
|
+
* from a parent. Instead, `createNeuroLink` is invoked **inside each job
|
|
136
|
+
* process** to build the brain (and register its tools) for that call.
|
|
137
|
+
*/
|
|
138
|
+
export type LiveKitVoiceAgentConfig = {
|
|
139
|
+
/**
|
|
140
|
+
* Factory that builds the NeuroLink instance for a job process.
|
|
141
|
+
* Called once per call, inside the job's own process.
|
|
142
|
+
*/
|
|
143
|
+
createNeuroLink: () => LiveKitNeuroLinkStreamer | Promise<LiveKitNeuroLinkStreamer>;
|
|
144
|
+
/** Realtime speech-to-text selection. */
|
|
145
|
+
stt: LiveKitSttConfig;
|
|
146
|
+
/** Realtime text-to-speech selection. */
|
|
147
|
+
tts: LiveKitTtsConfig;
|
|
148
|
+
/** LLM provider/model overrides (default to env-resolved values). */
|
|
149
|
+
provider?: string;
|
|
150
|
+
model?: string;
|
|
151
|
+
systemPrompt?: string;
|
|
152
|
+
temperature?: number;
|
|
153
|
+
maxTokens?: number;
|
|
154
|
+
/** Prefix used when deriving a per-room conversation id (default "voice"). */
|
|
155
|
+
conversationIdPrefix?: string;
|
|
156
|
+
/** Optional user id recorded alongside memory. */
|
|
157
|
+
userId?: string;
|
|
158
|
+
/** Silero VAD tuning (stricter = ignores background noise). */
|
|
159
|
+
vad?: LiveKitVadConfig;
|
|
160
|
+
/** Turn-detection tuning (VAD vs STT endpointing, delays). */
|
|
161
|
+
turn?: LiveKitTurnConfig;
|
|
162
|
+
/** Interruption tuning (require words/duration so noise can't barge in). */
|
|
163
|
+
interruption?: LiveKitInterruptionConfig;
|
|
164
|
+
/**
|
|
165
|
+
* Data-channel event bridge: forward NeuroLink events (text, tool calls,
|
|
166
|
+
* tool results, HITL prompts, status) to the browser over the LiveKit data
|
|
167
|
+
* channel, and accept control messages (HITL responses) back. Disabled
|
|
168
|
+
* unless `enabled` is `true`.
|
|
169
|
+
*/
|
|
170
|
+
events?: LiveKitEventBridgeConfig;
|
|
171
|
+
};
|
|
172
|
+
/** Options for `startVoiceAgentWorker` — launches the LiveKit Agents worker. */
|
|
173
|
+
export type LiveKitWorkerLaunchOptions = {
|
|
174
|
+
/**
|
|
175
|
+
* Absolute path to the entry file whose default export is the result of
|
|
176
|
+
* `defineVoiceAgent`. LiveKit re-imports this file in each job process.
|
|
177
|
+
*/
|
|
178
|
+
agentFile: string;
|
|
179
|
+
/** Name the worker registers under for dispatch (default "neurolink-voice"). */
|
|
180
|
+
agentName?: string;
|
|
181
|
+
};
|
|
182
|
+
/** Resolved LiveKit server connection settings. */
|
|
183
|
+
export type LiveKitServerConfig = {
|
|
184
|
+
/** LiveKit server URL (ws/wss). */
|
|
185
|
+
url: string;
|
|
186
|
+
/** API key for token signing and worker registration. */
|
|
187
|
+
apiKey: string;
|
|
188
|
+
/** API secret for token signing and worker registration. */
|
|
189
|
+
apiSecret: string;
|
|
190
|
+
};
|
|
191
|
+
/** LLM defaults resolved from the environment for the brain. */
|
|
192
|
+
export type LiveKitBrainDefaults = {
|
|
193
|
+
provider: string;
|
|
194
|
+
model: string;
|
|
195
|
+
};
|
|
196
|
+
/** Arguments for minting a browser join token. */
|
|
197
|
+
export type LiveKitTokenRequest = {
|
|
198
|
+
/** Participant identity (e.g. the authenticated user id). */
|
|
199
|
+
identity: string;
|
|
200
|
+
/** Room name to join (auto-created on first join). */
|
|
201
|
+
room: string;
|
|
202
|
+
/** LiveKit API key. */
|
|
203
|
+
apiKey: string;
|
|
204
|
+
/** LiveKit API secret. */
|
|
205
|
+
apiSecret: string;
|
|
206
|
+
/** Token lifetime in seconds (default 600; clamped to a 3600 max). */
|
|
207
|
+
ttlSeconds?: number;
|
|
208
|
+
};
|
|
209
|
+
/** Discriminant tags for outbound voice events. */
|
|
210
|
+
export type LiveKitVoiceEventType = "user-text" | "text" | "tool-start" | "tool-result" | "status" | "hitl-prompt" | "done";
|
|
211
|
+
/**
|
|
212
|
+
* A user STT transcript for display. Interim partials stream with
|
|
213
|
+
* `final: false`; the end-of-utterance result has `final: true`. The client
|
|
214
|
+
* updates one live bubble and commits it on `final`.
|
|
215
|
+
*
|
|
216
|
+
* `replacesPrevious` is set on the committed (`final: true`) text of a turn that
|
|
217
|
+
* absorbed a previous turn the user interrupted before it produced any reply
|
|
218
|
+
* (strict barge-in club). The client removes the orphaned previous user bubble
|
|
219
|
+
* so the merged utterance shows as one bubble.
|
|
220
|
+
*/
|
|
221
|
+
export type LiveKitVoiceUserTextEvent = {
|
|
222
|
+
type: "user-text";
|
|
223
|
+
data: {
|
|
224
|
+
text: string;
|
|
225
|
+
final: boolean;
|
|
226
|
+
replacesPrevious?: boolean;
|
|
227
|
+
};
|
|
228
|
+
};
|
|
229
|
+
/** A streamed chunk of the assistant's spoken/written reply. */
|
|
230
|
+
export type LiveKitVoiceTextEvent = {
|
|
231
|
+
type: "text";
|
|
232
|
+
data: {
|
|
233
|
+
delta: string;
|
|
234
|
+
};
|
|
235
|
+
};
|
|
236
|
+
/** A tool invocation has started (best-effort status; may not always fire). */
|
|
237
|
+
export type LiveKitVoiceToolStartEvent = {
|
|
238
|
+
type: "tool-start";
|
|
239
|
+
data: {
|
|
240
|
+
id?: string;
|
|
241
|
+
name: string;
|
|
242
|
+
input?: unknown;
|
|
243
|
+
};
|
|
244
|
+
};
|
|
245
|
+
/**
|
|
246
|
+
* A tool invocation has finished. `result` carries the tool's structured
|
|
247
|
+
* output (for example, a chart payload) for the client to render.
|
|
248
|
+
*/
|
|
249
|
+
export type LiveKitVoiceToolResultEvent = {
|
|
250
|
+
type: "tool-result";
|
|
251
|
+
data: {
|
|
252
|
+
id?: string;
|
|
253
|
+
name: string;
|
|
254
|
+
result?: unknown;
|
|
255
|
+
success?: boolean;
|
|
256
|
+
error?: string;
|
|
257
|
+
};
|
|
258
|
+
};
|
|
259
|
+
/** Coarse agent state, useful for UI indicators (e.g. "thinking…"). */
|
|
260
|
+
export type LiveKitVoiceStatusEvent = {
|
|
261
|
+
type: "status";
|
|
262
|
+
data: {
|
|
263
|
+
state: "thinking" | "speaking" | "listening" | "error";
|
|
264
|
+
detail?: string;
|
|
265
|
+
};
|
|
266
|
+
};
|
|
267
|
+
/** A human-in-the-loop confirmation the user must approve or reject. */
|
|
268
|
+
export type LiveKitVoiceHitlPromptEvent = {
|
|
269
|
+
type: "hitl-prompt";
|
|
270
|
+
data: {
|
|
271
|
+
confirmationId: string;
|
|
272
|
+
toolName: string;
|
|
273
|
+
actionType?: string;
|
|
274
|
+
arguments?: unknown;
|
|
275
|
+
timeoutMs?: number;
|
|
276
|
+
allowModification?: boolean;
|
|
277
|
+
};
|
|
278
|
+
};
|
|
279
|
+
/** The current turn has finished. */
|
|
280
|
+
export type LiveKitVoiceDoneEvent = {
|
|
281
|
+
type: "done";
|
|
282
|
+
data: {
|
|
283
|
+
reason?: string;
|
|
284
|
+
};
|
|
285
|
+
};
|
|
286
|
+
/** Discriminated union of all outbound voice events (before enveloping). */
|
|
287
|
+
export type LiveKitVoiceEvent = LiveKitVoiceUserTextEvent | LiveKitVoiceTextEvent | LiveKitVoiceToolStartEvent | LiveKitVoiceToolResultEvent | LiveKitVoiceStatusEvent | LiveKitVoiceHitlPromptEvent | LiveKitVoiceDoneEvent;
|
|
288
|
+
/**
|
|
289
|
+
* Wire format published to the browser: a `LiveKitVoiceEvent` plus a monotonic
|
|
290
|
+
* sequence number and a timestamp so the client can order and de-duplicate.
|
|
291
|
+
*/
|
|
292
|
+
export type LiveKitVoiceEventEnvelope = LiveKitVoiceEvent & {
|
|
293
|
+
seq: number;
|
|
294
|
+
ts: number;
|
|
295
|
+
};
|
|
296
|
+
/** Control messages sent from the browser back to the agent. */
|
|
297
|
+
export type LiveKitVoiceControlMessage = {
|
|
298
|
+
action: "hitl:accept";
|
|
299
|
+
confirmationId: string;
|
|
300
|
+
modifiedArguments?: unknown;
|
|
301
|
+
} | {
|
|
302
|
+
action: "hitl:reject";
|
|
303
|
+
confirmationId: string;
|
|
304
|
+
reason?: string;
|
|
305
|
+
};
|
|
306
|
+
/**
|
|
307
|
+
* Configuration for the data-channel event bridge, set on
|
|
308
|
+
* `LiveKitVoiceAgentConfig.events`.
|
|
309
|
+
*/
|
|
310
|
+
export type LiveKitEventBridgeConfig = {
|
|
311
|
+
/** Master switch — the bridge is inert unless this is `true`. */
|
|
312
|
+
enabled?: boolean;
|
|
313
|
+
/** Data-channel topic for outbound events (default "ai-events"). */
|
|
314
|
+
eventsTopic?: string;
|
|
315
|
+
/** Data-channel topic for inbound control messages (default "ai-control"). */
|
|
316
|
+
controlTopic?: string;
|
|
317
|
+
/** If set, only these event types are forwarded (default: all). */
|
|
318
|
+
include?: LiveKitVoiceEventType[];
|
|
319
|
+
/**
|
|
320
|
+
* Payloads encoded larger than this many bytes are sent via the chunked text
|
|
321
|
+
* stream API instead of a single reliable data packet (default 12000).
|
|
322
|
+
*/
|
|
323
|
+
maxInlineBytes?: number;
|
|
324
|
+
};
|
|
325
|
+
/**
|
|
326
|
+
* Minimal structural view of the LiveKit room the bridge needs: a local
|
|
327
|
+
* participant to publish on, and event (un)subscription. Declared structurally
|
|
328
|
+
* so `src/lib/types` carries no dependency on `@livekit/rtc-node`; the real
|
|
329
|
+
* `Room` from a job context satisfies this shape.
|
|
330
|
+
*/
|
|
331
|
+
export type LiveKitBridgeRoom = {
|
|
332
|
+
localParticipant?: {
|
|
333
|
+
publishData(data: Uint8Array, options: {
|
|
334
|
+
reliable?: boolean;
|
|
335
|
+
topic?: string;
|
|
336
|
+
}): Promise<void>;
|
|
337
|
+
sendText(text: string, options?: {
|
|
338
|
+
topic?: string;
|
|
339
|
+
}): Promise<unknown>;
|
|
340
|
+
};
|
|
341
|
+
on(event: string, listener: (...args: unknown[]) => void): unknown;
|
|
342
|
+
off(event: string, listener: (...args: unknown[]) => void): unknown;
|
|
343
|
+
};
|
|
344
|
+
/**
|
|
345
|
+
* Normalized tool fields extracted from a `tool:start` / `tool:end` emitter
|
|
346
|
+
* payload, used internally by the event bridge.
|
|
347
|
+
*/
|
|
348
|
+
export type LiveKitToolEventFields = {
|
|
349
|
+
name: string;
|
|
350
|
+
id?: string;
|
|
351
|
+
input?: unknown;
|
|
352
|
+
result?: unknown;
|
|
353
|
+
success?: boolean;
|
|
354
|
+
error?: string;
|
|
355
|
+
};
|
|
356
|
+
/** Inputs to `attachEventBridge`. */
|
|
357
|
+
export type LiveKitEventBridgeParams = {
|
|
358
|
+
/** The LiveKit room for this call (from the job context). */
|
|
359
|
+
room: LiveKitBridgeRoom;
|
|
360
|
+
/** NeuroLink's event emitter (`neurolink.getEventEmitter()`). */
|
|
361
|
+
emitter: TypedEventEmitter<NeuroLinkEvents>;
|
|
362
|
+
/** Bridge options (topics, filtering, chunking threshold). */
|
|
363
|
+
options?: LiveKitEventBridgeConfig;
|
|
364
|
+
};
|
|
365
|
+
/** Handle returned by `attachEventBridge` for teardown. */
|
|
366
|
+
export type LiveKitEventBridgeHandle = {
|
|
367
|
+
/** Remove all listeners and stop publishing. Idempotent. */
|
|
368
|
+
dispose: () => void;
|
|
369
|
+
};
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the LiveKit voice agent integration.
|
|
3
|
+
*
|
|
4
|
+
* The integration uses LiveKit (WebRTC transport, VAD, turn detection,
|
|
5
|
+
* interruption, worker-per-call scaling) as the real-time loop, and NeuroLink
|
|
6
|
+
* as the brain (LLM, tools, memory). These types describe the brain seam, the
|
|
7
|
+
* worker configuration, and the join-token request — all transport-agnostic
|
|
8
|
+
* except where a LiveKit-specific concept is named explicitly.
|
|
9
|
+
*
|
|
10
|
+
* See docs/features/livekit-voice-agent.md.
|
|
11
|
+
*/
|
|
12
|
+
export {};
|
|
@@ -263,3 +263,19 @@ export type StepToolResult = {
|
|
|
263
263
|
result?: unknown;
|
|
264
264
|
error?: string;
|
|
265
265
|
};
|
|
266
|
+
/**
|
|
267
|
+
* Result of coercing arbitrary model text into canonical, valid JSON.
|
|
268
|
+
* `content` is a JSON.stringify of the recovered object; `structuredData` is
|
|
269
|
+
* the parsed object itself.
|
|
270
|
+
*/
|
|
271
|
+
export type JsonCoercionResult = {
|
|
272
|
+
content: string;
|
|
273
|
+
structuredData: unknown;
|
|
274
|
+
/** True when jsonrepair altered the model text to make it parse. */
|
|
275
|
+
repaired: boolean;
|
|
276
|
+
/**
|
|
277
|
+
* True when the recovered object came from a truncated (unclosed) span —
|
|
278
|
+
* the response likely hit the output-token cap and data may be incomplete.
|
|
279
|
+
*/
|
|
280
|
+
truncated: boolean;
|
|
281
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { JsonCoercionResult, ValidationSchema } from "../../types/index.js";
|
|
2
|
+
/**
|
|
3
|
+
* Try to produce canonical JSON from `text`. Returns null when no JSON object
|
|
4
|
+
* could be recovered (caller should then keep the raw text).
|
|
5
|
+
*
|
|
6
|
+
* When `schema` is a Zod schema, candidates that satisfy it are preferred; a
|
|
7
|
+
* syntactically-valid-but-schema-failing object is still returned (we guarantee
|
|
8
|
+
* JSON *validity*, leaving schema/content checks to the caller's own pipeline).
|
|
9
|
+
*/
|
|
10
|
+
export declare function coerceJsonToSchema(text: string, schema?: ValidationSchema): JsonCoercionResult | null;
|