@juspay/neurolink 9.69.3 → 9.70.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +355 -347
- package/dist/core/modules/GenerationHandler.js +75 -23
- package/dist/core/modules/structuredOutputPolicy.d.ts +28 -0
- package/dist/core/modules/structuredOutputPolicy.js +50 -0
- package/dist/lib/core/modules/GenerationHandler.js +75 -23
- package/dist/lib/core/modules/structuredOutputPolicy.d.ts +28 -0
- package/dist/lib/core/modules/structuredOutputPolicy.js +51 -0
- package/dist/lib/neurolink.js +58 -0
- package/dist/lib/providers/anthropic.js +34 -7
- package/dist/lib/providers/googleVertex.js +17 -2
- package/dist/lib/types/generate.d.ts +47 -19
- package/dist/lib/types/index.d.ts +1 -0
- package/dist/lib/types/index.js +1 -0
- package/dist/lib/types/livekit.d.ts +369 -0
- package/dist/lib/types/livekit.js +13 -0
- package/dist/lib/types/utilities.d.ts +16 -0
- package/dist/lib/utils/json/coerce.d.ts +10 -0
- package/dist/lib/utils/json/coerce.js +141 -0
- package/dist/lib/utils/json/extract.d.ts +10 -0
- package/dist/lib/utils/json/extract.js +61 -11
- package/dist/lib/utils/tokenLimits.d.ts +20 -0
- package/dist/lib/utils/tokenLimits.js +55 -0
- package/dist/lib/voice/livekit/brain.d.ts +21 -0
- package/dist/lib/voice/livekit/brain.js +75 -0
- package/dist/lib/voice/livekit/config.d.ts +41 -0
- package/dist/lib/voice/livekit/config.js +80 -0
- package/dist/lib/voice/livekit/eventBridge.d.ts +27 -0
- package/dist/lib/voice/livekit/eventBridge.js +360 -0
- package/dist/lib/voice/livekit/index.d.ts +15 -0
- package/dist/lib/voice/livekit/index.js +16 -0
- package/dist/lib/voice/livekit/tokens.d.ts +19 -0
- package/dist/lib/voice/livekit/tokens.js +51 -0
- package/dist/lib/voice/livekit/voiceAgent.d.ts +32 -0
- package/dist/lib/voice/livekit/voiceAgent.js +415 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.d.ts +27 -0
- package/dist/lib/voice/livekit/voiceAgentWorker.js +58 -0
- package/dist/neurolink.js +58 -0
- package/dist/providers/anthropic.js +34 -7
- package/dist/providers/googleVertex.js +17 -2
- package/dist/types/generate.d.ts +47 -19
- package/dist/types/index.d.ts +1 -0
- package/dist/types/index.js +1 -0
- package/dist/types/livekit.d.ts +369 -0
- package/dist/types/livekit.js +12 -0
- package/dist/types/utilities.d.ts +16 -0
- package/dist/utils/json/coerce.d.ts +10 -0
- package/dist/utils/json/coerce.js +140 -0
- package/dist/utils/json/extract.d.ts +10 -0
- package/dist/utils/json/extract.js +61 -11
- package/dist/utils/tokenLimits.d.ts +20 -0
- package/dist/utils/tokenLimits.js +55 -0
- package/dist/voice/livekit/brain.d.ts +21 -0
- package/dist/voice/livekit/brain.js +74 -0
- package/dist/voice/livekit/config.d.ts +41 -0
- package/dist/voice/livekit/config.js +79 -0
- package/dist/voice/livekit/eventBridge.d.ts +27 -0
- package/dist/voice/livekit/eventBridge.js +359 -0
- package/dist/voice/livekit/index.d.ts +15 -0
- package/dist/voice/livekit/index.js +15 -0
- package/dist/voice/livekit/tokens.d.ts +19 -0
- package/dist/voice/livekit/tokens.js +50 -0
- package/dist/voice/livekit/voiceAgent.d.ts +32 -0
- package/dist/voice/livekit/voiceAgent.js +414 -0
- package/dist/voice/livekit/voiceAgentWorker.d.ts +27 -0
- package/dist/voice/livekit/voiceAgentWorker.js +57 -0
- package/package.json +23 -6
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coerce arbitrary model text into canonical, syntactically-valid JSON.
|
|
3
|
+
*
|
|
4
|
+
* Used on the text-mode path (providers/models that could not use AI-SDK
|
|
5
|
+
* structured output, e.g. real Gemini + tools). The model hand-writes JSON and
|
|
6
|
+
* frequently mis-escapes the content field (bare newline, unescaped quote,
|
|
7
|
+
* invalid escape like \d). A balanced-brace scan finds the object span; if
|
|
8
|
+
* JSON.parse rejects it, jsonrepair fixes common escaping mistakes; the result
|
|
9
|
+
* is re-serialised with JSON.stringify so downstream consumers always receive
|
|
10
|
+
* valid JSON.
|
|
11
|
+
*
|
|
12
|
+
* NOTE: jsonrepair is a heuristic. On content where a lone backslash is
|
|
13
|
+
* meaningful (regex/script/Windows path) it may drop the backslash, producing
|
|
14
|
+
* valid-but-semantically-altered content. This only affects the residual
|
|
15
|
+
* text-mode path — the primary Vertex+Claude path uses experimental_output and
|
|
16
|
+
* never reaches here. When jsonrepair changes the input we log at debug level
|
|
17
|
+
* so the event is observable.
|
|
18
|
+
*/
|
|
19
|
+
import { jsonrepair } from "jsonrepair";
|
|
20
|
+
import { logger } from "../logger.js";
|
|
21
|
+
import { nextBalancedJsonSpan } from "./extract.js";
|
|
22
|
+
/** True when the schema exposes a Zod-style `safeParse` we can validate with. */
|
|
23
|
+
function hasSafeParse(schema) {
|
|
24
|
+
return typeof schema.safeParse === "function";
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Parse `candidate` as JSON, repairing common escaping mistakes on failure.
|
|
28
|
+
* Returns the parsed value plus whether jsonrepair had to alter the text.
|
|
29
|
+
*/
|
|
30
|
+
function parseOrRepair(candidate) {
|
|
31
|
+
try {
|
|
32
|
+
return { value: JSON.parse(candidate), repaired: false };
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
// fall through to repair
|
|
36
|
+
}
|
|
37
|
+
try {
|
|
38
|
+
const repaired = jsonrepair(candidate);
|
|
39
|
+
const value = JSON.parse(repaired);
|
|
40
|
+
if (repaired !== candidate && logger.shouldLog("debug")) {
|
|
41
|
+
logger.debug("[coerceJsonToSchema] jsonrepair altered model output", {
|
|
42
|
+
originalLength: candidate.length,
|
|
43
|
+
repairedLength: repaired.length,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
return { value, repaired: repaired !== candidate };
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Try to produce canonical JSON from `text`. Returns null when no JSON object
|
|
54
|
+
* could be recovered (caller should then keep the raw text).
|
|
55
|
+
*
|
|
56
|
+
* When `schema` is a Zod schema, candidates that satisfy it are preferred; a
|
|
57
|
+
* syntactically-valid-but-schema-failing object is still returned (we guarantee
|
|
58
|
+
* JSON *validity*, leaving schema/content checks to the caller's own pipeline).
|
|
59
|
+
*/
|
|
60
|
+
export function coerceJsonToSchema(text, schema) {
|
|
61
|
+
if (typeof text !== "string" || text.trim().length === 0) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
// Ordered candidate substrings, best-formed first:
|
|
65
|
+
// 1. every balanced object/array span (clean, common case)
|
|
66
|
+
// 2. first "{" or "[" to last "}" or "]" (drops surrounding prose; lets
|
|
67
|
+
// jsonrepair fix escaping inside) — root ARRAYS matter for array schemas
|
|
68
|
+
// 3. first "{" or "[" to end of text (TRUNCATED output —
|
|
69
|
+
// finishReason=length — where the closing bracket was cut off;
|
|
70
|
+
// jsonrepair closes it)
|
|
71
|
+
// `truncated` marks the first-open-to-end candidate: it is only reachable
|
|
72
|
+
// when no balanced span and no first-to-last span matched, i.e. there was no
|
|
73
|
+
// closing bracket at all — the signature of token-truncated output.
|
|
74
|
+
const candidates = [];
|
|
75
|
+
let searchFrom = 0;
|
|
76
|
+
for (;;) {
|
|
77
|
+
const found = nextBalancedJsonSpan(text, searchFrom);
|
|
78
|
+
if (!found) {
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
candidates.push({ text: found.span, truncated: false });
|
|
82
|
+
searchFrom = found.end;
|
|
83
|
+
}
|
|
84
|
+
const openIndexes = [text.indexOf("{"), text.indexOf("[")].filter((i) => i >= 0);
|
|
85
|
+
const firstOpen = openIndexes.length > 0 ? Math.min(...openIndexes) : -1;
|
|
86
|
+
const lastClose = Math.max(text.lastIndexOf("}"), text.lastIndexOf("]"));
|
|
87
|
+
if (firstOpen >= 0 && lastClose > firstOpen) {
|
|
88
|
+
candidates.push({
|
|
89
|
+
text: text.slice(firstOpen, lastClose + 1),
|
|
90
|
+
truncated: false,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
if (firstOpen >= 0) {
|
|
94
|
+
candidates.push({ text: text.slice(firstOpen), truncated: true });
|
|
95
|
+
}
|
|
96
|
+
let firstValid;
|
|
97
|
+
let schemaMatch;
|
|
98
|
+
const seen = new Set();
|
|
99
|
+
for (const candidate of candidates) {
|
|
100
|
+
if (seen.has(candidate.text)) {
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
seen.add(candidate.text);
|
|
104
|
+
const outcome = parseOrRepair(candidate.text);
|
|
105
|
+
if (outcome === undefined ||
|
|
106
|
+
outcome.value === null ||
|
|
107
|
+
typeof outcome.value !== "object") {
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
const record = {
|
|
111
|
+
value: outcome.value,
|
|
112
|
+
repaired: outcome.repaired,
|
|
113
|
+
truncated: candidate.truncated,
|
|
114
|
+
};
|
|
115
|
+
if (firstValid === undefined) {
|
|
116
|
+
firstValid = record;
|
|
117
|
+
}
|
|
118
|
+
if (schema && hasSafeParse(schema)) {
|
|
119
|
+
const safeParseable = schema;
|
|
120
|
+
if (safeParseable.safeParse(outcome.value).success) {
|
|
121
|
+
schemaMatch = record;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
// No Zod schema to discriminate — first parseable object wins.
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const chosen = schemaMatch ?? firstValid;
|
|
131
|
+
if (chosen === undefined) {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
content: JSON.stringify(chosen.value),
|
|
136
|
+
structuredData: chosen.value,
|
|
137
|
+
repaired: chosen.repaired,
|
|
138
|
+
truncated: chosen.truncated,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
@@ -4,6 +4,16 @@
|
|
|
4
4
|
* Utilities for extracting JSON from mixed text content.
|
|
5
5
|
* Particularly useful for parsing AI responses that contain JSON within prose.
|
|
6
6
|
*/
|
|
7
|
+
/**
|
|
8
|
+
* Find the first balanced JSON object/array span starting at or after
|
|
9
|
+
* `fromIndex`. Quote- and escape-aware: braces inside string literals do not
|
|
10
|
+
* affect depth. Returns the matched substring and the index just past it, or
|
|
11
|
+
* null if no balanced span exists.
|
|
12
|
+
*/
|
|
13
|
+
export declare function nextBalancedJsonSpan(text: string, fromIndex?: number): {
|
|
14
|
+
span: string;
|
|
15
|
+
end: number;
|
|
16
|
+
} | null;
|
|
7
17
|
/**
|
|
8
18
|
* Extract JSON string from text that may contain surrounding content.
|
|
9
19
|
*
|
|
@@ -5,6 +5,53 @@
|
|
|
5
5
|
* Particularly useful for parsing AI responses that contain JSON within prose.
|
|
6
6
|
*/
|
|
7
7
|
import { parseJsonOrNull } from "./safeParse.js";
|
|
8
|
+
/**
|
|
9
|
+
* Find the first balanced JSON object/array span starting at or after
|
|
10
|
+
* `fromIndex`. Quote- and escape-aware: braces inside string literals do not
|
|
11
|
+
* affect depth. Returns the matched substring and the index just past it, or
|
|
12
|
+
* null if no balanced span exists.
|
|
13
|
+
*/
|
|
14
|
+
export function nextBalancedJsonSpan(text, fromIndex = 0) {
|
|
15
|
+
for (let start = fromIndex; start < text.length; start++) {
|
|
16
|
+
const openChar = text[start];
|
|
17
|
+
if (openChar !== "{" && openChar !== "[") {
|
|
18
|
+
continue;
|
|
19
|
+
}
|
|
20
|
+
const closeChar = openChar === "{" ? "}" : "]";
|
|
21
|
+
let depth = 0;
|
|
22
|
+
let inString = false;
|
|
23
|
+
let escapeNext = false;
|
|
24
|
+
for (let i = start; i < text.length; i++) {
|
|
25
|
+
const ch = text[i];
|
|
26
|
+
if (escapeNext) {
|
|
27
|
+
escapeNext = false;
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
if (ch === "\\") {
|
|
31
|
+
escapeNext = true;
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
if (ch === '"') {
|
|
35
|
+
inString = !inString;
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
if (inString) {
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (ch === openChar) {
|
|
42
|
+
depth++;
|
|
43
|
+
}
|
|
44
|
+
else if (ch === closeChar) {
|
|
45
|
+
depth--;
|
|
46
|
+
if (depth === 0) {
|
|
47
|
+
return { span: text.substring(start, i + 1), end: i + 1 };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// Unbalanced from this start — try the next opening char.
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
8
55
|
/**
|
|
9
56
|
* Extract JSON string from text that may contain surrounding content.
|
|
10
57
|
*
|
|
@@ -45,21 +92,24 @@ export function extractJsonStringFromText(text) {
|
|
|
45
92
|
// Continue to other patterns
|
|
46
93
|
}
|
|
47
94
|
}
|
|
48
|
-
//
|
|
49
|
-
//
|
|
50
|
-
//
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
95
|
+
// Scan for balanced JSON object/array spans (quote/escape aware) and return
|
|
96
|
+
// the first one that parses. Unlike a non-greedy regex, this never stops at a
|
|
97
|
+
// "}" that lives inside a string value, so nested objects are preserved.
|
|
98
|
+
let searchFrom = 0;
|
|
99
|
+
for (;;) {
|
|
100
|
+
const found = nextBalancedJsonSpan(text, searchFrom);
|
|
101
|
+
if (!found) {
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
56
104
|
try {
|
|
57
|
-
JSON.parse(
|
|
58
|
-
return
|
|
105
|
+
JSON.parse(found.span);
|
|
106
|
+
return found.span;
|
|
59
107
|
}
|
|
60
108
|
catch {
|
|
61
|
-
//
|
|
109
|
+
// Not valid JSON — resume scanning just past this opening character so a
|
|
110
|
+
// valid inner object/array can still be found.
|
|
62
111
|
}
|
|
112
|
+
searchFrom = found.end - found.span.length + 1;
|
|
63
113
|
}
|
|
64
114
|
return null;
|
|
65
115
|
}
|
|
@@ -7,6 +7,26 @@ import { PROVIDER_MAX_TOKENS } from "../core/constants.js";
|
|
|
7
7
|
* Get the safe maximum tokens for a provider and model
|
|
8
8
|
*/
|
|
9
9
|
export declare function getSafeMaxTokens(provider: keyof typeof PROVIDER_MAX_TOKENS | string, model?: string, requestedMaxTokens?: number): number | undefined;
|
|
10
|
+
/**
|
|
11
|
+
* Maximum output tokens supported by a given Anthropic Claude model.
|
|
12
|
+
*
|
|
13
|
+
* The native Vertex+Claude and native Anthropic message paths send `max_tokens`
|
|
14
|
+
* straight to the Anthropic API, which returns 400 if the value exceeds the
|
|
15
|
+
* model's published output ceiling. (The AI-SDK path clamps automatically;
|
|
16
|
+
* these native paths do not.) This table lets those paths default to the
|
|
17
|
+
* model's real ceiling — 64K for Sonnet/Haiku 4.x, 32K for Opus 4.x — instead of
|
|
18
|
+
* the legacy 4096 that silently truncated large structured responses.
|
|
19
|
+
*
|
|
20
|
+
* Unknown identifiers fall back to a safe modern floor (8192).
|
|
21
|
+
*/
|
|
22
|
+
export declare function getClaudeMaxOutputTokens(model: string | undefined): number;
|
|
23
|
+
/**
|
|
24
|
+
* Resolve the `max_tokens` to send on a native Anthropic/Claude request: honour
|
|
25
|
+
* the caller's value but clamp it to the model's published ceiling, and default
|
|
26
|
+
* to that ceiling when the caller did not specify one. Prevents both silent
|
|
27
|
+
* truncation (the legacy 4096 default) and 400s from over-large requests.
|
|
28
|
+
*/
|
|
29
|
+
export declare function resolveClaudeMaxTokens(model: string | undefined, requested?: number): number;
|
|
10
30
|
/**
|
|
11
31
|
* Validate if maxTokens is safe for a provider/model combination
|
|
12
32
|
*/
|
|
@@ -76,6 +76,61 @@ export function getSafeMaxTokens(provider, model, requestedMaxTokens) {
|
|
|
76
76
|
// Use the requested value if it's within limits
|
|
77
77
|
return requestedMaxTokens;
|
|
78
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Maximum output tokens supported by a given Anthropic Claude model.
|
|
81
|
+
*
|
|
82
|
+
* The native Vertex+Claude and native Anthropic message paths send `max_tokens`
|
|
83
|
+
* straight to the Anthropic API, which returns 400 if the value exceeds the
|
|
84
|
+
* model's published output ceiling. (The AI-SDK path clamps automatically;
|
|
85
|
+
* these native paths do not.) This table lets those paths default to the
|
|
86
|
+
* model's real ceiling — 64K for Sonnet/Haiku 4.x, 32K for Opus 4.x — instead of
|
|
87
|
+
* the legacy 4096 that silently truncated large structured responses.
|
|
88
|
+
*
|
|
89
|
+
* Unknown identifiers fall back to a safe modern floor (8192).
|
|
90
|
+
*/
|
|
91
|
+
export function getClaudeMaxOutputTokens(model) {
|
|
92
|
+
const m = (model ?? "").toLowerCase();
|
|
93
|
+
// Claude 4.x family: Opus 4.x = 32K, Sonnet/Haiku 4.x = 64K.
|
|
94
|
+
if (/opus[-_.]?4/.test(m)) {
|
|
95
|
+
return 32000;
|
|
96
|
+
}
|
|
97
|
+
if (/sonnet[-_.]?4/.test(m) || /haiku[-_.]?4/.test(m)) {
|
|
98
|
+
return 64000;
|
|
99
|
+
}
|
|
100
|
+
// Claude 3.7 Sonnet supports 64K output.
|
|
101
|
+
if (/3[-_.]?7[-_.]?sonnet/.test(m)) {
|
|
102
|
+
return 64000;
|
|
103
|
+
}
|
|
104
|
+
// Claude 3.5 Sonnet / Haiku → 8192.
|
|
105
|
+
if (/3[-_.]?5[-_.]?(sonnet|haiku)/.test(m)) {
|
|
106
|
+
return 8192;
|
|
107
|
+
}
|
|
108
|
+
// Claude 3 Opus / Sonnet / Haiku → 4096.
|
|
109
|
+
if (/claude-3-(opus|sonnet|haiku)/.test(m) || /3[-_.]?opus/.test(m)) {
|
|
110
|
+
return 4096;
|
|
111
|
+
}
|
|
112
|
+
// Bare family aliases (latest of a family) → assume the modern ceiling.
|
|
113
|
+
if (m.includes("opus")) {
|
|
114
|
+
return 32000;
|
|
115
|
+
}
|
|
116
|
+
if (m.includes("sonnet") || m.includes("haiku")) {
|
|
117
|
+
return 64000;
|
|
118
|
+
}
|
|
119
|
+
return 8192;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Resolve the `max_tokens` to send on a native Anthropic/Claude request: honour
|
|
123
|
+
* the caller's value but clamp it to the model's published ceiling, and default
|
|
124
|
+
* to that ceiling when the caller did not specify one. Prevents both silent
|
|
125
|
+
* truncation (the legacy 4096 default) and 400s from over-large requests.
|
|
126
|
+
*/
|
|
127
|
+
export function resolveClaudeMaxTokens(model, requested) {
|
|
128
|
+
const ceiling = getClaudeMaxOutputTokens(model);
|
|
129
|
+
if (requested !== undefined && requested !== null && requested > 0) {
|
|
130
|
+
return Math.min(requested, ceiling);
|
|
131
|
+
}
|
|
132
|
+
return ceiling;
|
|
133
|
+
}
|
|
79
134
|
/**
|
|
80
135
|
* Validate if maxTokens is safe for a provider/model combination
|
|
81
136
|
*/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transport-agnostic voice brain.
|
|
3
|
+
*
|
|
4
|
+
* Given a user transcript and a stable conversation id, it streams the
|
|
5
|
+
* assistant's reply as text deltas by calling `neurolink.stream()`. NeuroLink
|
|
6
|
+
* owns history (via the conversation id and its memory layer) and tools (the
|
|
7
|
+
* tool-calling loop runs inside `stream()`); this module never touches a
|
|
8
|
+
* transport. The LiveKit worker (or any other transport) consumes the text
|
|
9
|
+
* deltas and converts them to audio.
|
|
10
|
+
*
|
|
11
|
+
* See docs/features/livekit-voice-agent.md.
|
|
12
|
+
*/
|
|
13
|
+
import type { LiveKitBrainConfig, LiveKitVoiceBrain } from "../../types/index.js";
|
|
14
|
+
/**
|
|
15
|
+
* Create a voice brain bound to a configured NeuroLink instance.
|
|
16
|
+
*
|
|
17
|
+
* The returned `streamReply` is an async generator of text deltas for a single
|
|
18
|
+
* turn. Aborting `turn.signal` stops the in-flight LLM and tool calls and ends
|
|
19
|
+
* the generator promptly (barge-in support).
|
|
20
|
+
*/
|
|
21
|
+
export declare function createVoiceBrain(config: LiveKitBrainConfig): LiveKitVoiceBrain;
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transport-agnostic voice brain.
|
|
3
|
+
*
|
|
4
|
+
* Given a user transcript and a stable conversation id, it streams the
|
|
5
|
+
* assistant's reply as text deltas by calling `neurolink.stream()`. NeuroLink
|
|
6
|
+
* owns history (via the conversation id and its memory layer) and tools (the
|
|
7
|
+
* tool-calling loop runs inside `stream()`); this module never touches a
|
|
8
|
+
* transport. The LiveKit worker (or any other transport) consumes the text
|
|
9
|
+
* deltas and converts them to audio.
|
|
10
|
+
*
|
|
11
|
+
* See docs/features/livekit-voice-agent.md.
|
|
12
|
+
*/
|
|
13
|
+
import { logger } from "../../utils/logger.js";
|
|
14
|
+
/**
|
|
15
|
+
* Extract spoken text from a stream chunk without type assertions.
|
|
16
|
+
*
|
|
17
|
+
* `StreamResult.stream` yields a union: text chunks (`{ content: string }`),
|
|
18
|
+
* a no-output sentinel, and audio/image events. Only text chunks carry a
|
|
19
|
+
* string `content`; everything else yields `undefined` and is skipped.
|
|
20
|
+
*/
|
|
21
|
+
function extractTextDelta(chunk) {
|
|
22
|
+
if (typeof chunk === "object" && chunk !== null && "content" in chunk) {
|
|
23
|
+
const value = chunk.content;
|
|
24
|
+
if (typeof value === "string" && value.length > 0) {
|
|
25
|
+
return value;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return undefined;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Create a voice brain bound to a configured NeuroLink instance.
|
|
32
|
+
*
|
|
33
|
+
* The returned `streamReply` is an async generator of text deltas for a single
|
|
34
|
+
* turn. Aborting `turn.signal` stops the in-flight LLM and tool calls and ends
|
|
35
|
+
* the generator promptly (barge-in support).
|
|
36
|
+
*/
|
|
37
|
+
export function createVoiceBrain(config) {
|
|
38
|
+
const { neurolink, provider, model, systemPrompt, temperature, maxTokens, userId, } = config;
|
|
39
|
+
async function* streamReply(turn) {
|
|
40
|
+
const { transcript, conversationId, signal } = turn;
|
|
41
|
+
if (signal?.aborted) {
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
const context = {
|
|
45
|
+
sessionId: conversationId,
|
|
46
|
+
conversationId,
|
|
47
|
+
};
|
|
48
|
+
if (userId !== undefined) {
|
|
49
|
+
context.userId = userId;
|
|
50
|
+
}
|
|
51
|
+
const result = await neurolink.stream({
|
|
52
|
+
input: { text: transcript },
|
|
53
|
+
provider,
|
|
54
|
+
model,
|
|
55
|
+
systemPrompt,
|
|
56
|
+
temperature,
|
|
57
|
+
maxTokens,
|
|
58
|
+
context,
|
|
59
|
+
abortSignal: signal,
|
|
60
|
+
disableTools: false,
|
|
61
|
+
});
|
|
62
|
+
for await (const chunk of result.stream) {
|
|
63
|
+
if (signal?.aborted) {
|
|
64
|
+
logger.debug("[LiveKitBrain] Turn aborted mid-stream; stopping");
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
const delta = extractTextDelta(chunk);
|
|
68
|
+
if (delta !== undefined) {
|
|
69
|
+
yield delta;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return { streamReply };
|
|
74
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Environment resolution for the LiveKit voice agent.
|
|
3
|
+
*
|
|
4
|
+
* Reads LiveKit connection settings and LLM defaults from `process.env` with
|
|
5
|
+
* descriptive errors for missing required values. No type assertions: presence
|
|
6
|
+
* is verified with explicit string checks.
|
|
7
|
+
*
|
|
8
|
+
* See docs/features/livekit-voice-agent.md.
|
|
9
|
+
*/
|
|
10
|
+
import type { LiveKitServerConfig, LiveKitBrainDefaults } from "../../types/index.js";
|
|
11
|
+
/**
|
|
12
|
+
* Resolve LiveKit server connection settings from the environment.
|
|
13
|
+
*
|
|
14
|
+
* Requires `LIVEKIT_URL`, `LIVEKIT_API_KEY`, and `LIVEKIT_API_SECRET`. Works
|
|
15
|
+
* identically for LiveKit Cloud, a self-hosted server, or `livekit-server --dev`
|
|
16
|
+
* — only the values differ.
|
|
17
|
+
*/
|
|
18
|
+
export declare function resolveLiveKitServerConfig(): LiveKitServerConfig;
|
|
19
|
+
/**
|
|
20
|
+
* Resolve the LLM provider/model defaults for the brain.
|
|
21
|
+
*
|
|
22
|
+
* Defaults to Bedrock / Claude; overridable via `VOICE_LLM_PROVIDER` and
|
|
23
|
+
* `VOICE_LLM_MODEL`.
|
|
24
|
+
*/
|
|
25
|
+
export declare function resolveBrainDefaults(): LiveKitBrainDefaults;
|
|
26
|
+
/**
|
|
27
|
+
* Resolve the semantic end-of-utterance (EOU) turn-detection settings.
|
|
28
|
+
*
|
|
29
|
+
* Opt-in via `LIVEKIT_EOU_TURN_DETECTION` (`1`/`true`/`english`/`en`/`on`/`yes`).
|
|
30
|
+
* When enabled, the English `@livekit/agents-plugin-livekit` EOU model decides
|
|
31
|
+
* whether the user's turn is truly over, layered on top of VAD silence — so
|
|
32
|
+
* natural mid-sentence pauses don't split one utterance. English-only; the
|
|
33
|
+
* model adds ~200MB RAM per worker and ~10ms per turn-end.
|
|
34
|
+
*
|
|
35
|
+
* `LIVEKIT_EOU_UNLIKELY_THRESHOLD` optionally overrides the model's confidence
|
|
36
|
+
* threshold (lower = end the turn more eagerly).
|
|
37
|
+
*/
|
|
38
|
+
export declare function resolveEouTurnDetection(): {
|
|
39
|
+
enabled: boolean;
|
|
40
|
+
unlikelyThreshold: number | undefined;
|
|
41
|
+
};
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Environment resolution for the LiveKit voice agent.
|
|
3
|
+
*
|
|
4
|
+
* Reads LiveKit connection settings and LLM defaults from `process.env` with
|
|
5
|
+
* descriptive errors for missing required values. No type assertions: presence
|
|
6
|
+
* is verified with explicit string checks.
|
|
7
|
+
*
|
|
8
|
+
* See docs/features/livekit-voice-agent.md.
|
|
9
|
+
*/
|
|
10
|
+
const DEFAULT_LLM_PROVIDER = "bedrock";
|
|
11
|
+
const DEFAULT_LLM_MODEL = "claude-sonnet-4-6";
|
|
12
|
+
/** Read a required environment variable or throw a descriptive error. */
|
|
13
|
+
function requireEnv(name) {
|
|
14
|
+
const value = process.env[name];
|
|
15
|
+
if (typeof value !== "string" || value.trim().length === 0) {
|
|
16
|
+
throw new Error(`${name} is not set in environment (required for the LiveKit voice agent)`);
|
|
17
|
+
}
|
|
18
|
+
return value.trim();
|
|
19
|
+
}
|
|
20
|
+
/** Read an optional environment variable, falling back to a default. */
|
|
21
|
+
function readEnv(name, fallback) {
|
|
22
|
+
const value = process.env[name];
|
|
23
|
+
if (typeof value === "string" && value.trim().length > 0) {
|
|
24
|
+
return value.trim();
|
|
25
|
+
}
|
|
26
|
+
return fallback;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Resolve LiveKit server connection settings from the environment.
|
|
30
|
+
*
|
|
31
|
+
* Requires `LIVEKIT_URL`, `LIVEKIT_API_KEY`, and `LIVEKIT_API_SECRET`. Works
|
|
32
|
+
* identically for LiveKit Cloud, a self-hosted server, or `livekit-server --dev`
|
|
33
|
+
* — only the values differ.
|
|
34
|
+
*/
|
|
35
|
+
export function resolveLiveKitServerConfig() {
|
|
36
|
+
return {
|
|
37
|
+
url: requireEnv("LIVEKIT_URL"),
|
|
38
|
+
apiKey: requireEnv("LIVEKIT_API_KEY"),
|
|
39
|
+
apiSecret: requireEnv("LIVEKIT_API_SECRET"),
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Resolve the LLM provider/model defaults for the brain.
|
|
44
|
+
*
|
|
45
|
+
* Defaults to Bedrock / Claude; overridable via `VOICE_LLM_PROVIDER` and
|
|
46
|
+
* `VOICE_LLM_MODEL`.
|
|
47
|
+
*/
|
|
48
|
+
export function resolveBrainDefaults() {
|
|
49
|
+
return {
|
|
50
|
+
provider: readEnv("VOICE_LLM_PROVIDER", DEFAULT_LLM_PROVIDER),
|
|
51
|
+
model: readEnv("VOICE_LLM_MODEL", DEFAULT_LLM_MODEL),
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
const EOU_TRUTHY = new Set(["1", "true", "english", "en", "on", "yes"]);
|
|
55
|
+
/**
|
|
56
|
+
* Resolve the semantic end-of-utterance (EOU) turn-detection settings.
|
|
57
|
+
*
|
|
58
|
+
* Opt-in via `LIVEKIT_EOU_TURN_DETECTION` (`1`/`true`/`english`/`en`/`on`/`yes`).
|
|
59
|
+
* When enabled, the English `@livekit/agents-plugin-livekit` EOU model decides
|
|
60
|
+
* whether the user's turn is truly over, layered on top of VAD silence — so
|
|
61
|
+
* natural mid-sentence pauses don't split one utterance. English-only; the
|
|
62
|
+
* model adds ~200MB RAM per worker and ~10ms per turn-end.
|
|
63
|
+
*
|
|
64
|
+
* `LIVEKIT_EOU_UNLIKELY_THRESHOLD` optionally overrides the model's confidence
|
|
65
|
+
* threshold (lower = end the turn more eagerly).
|
|
66
|
+
*/
|
|
67
|
+
export function resolveEouTurnDetection() {
|
|
68
|
+
const raw = process.env.LIVEKIT_EOU_TURN_DETECTION;
|
|
69
|
+
const enabled = typeof raw === "string" && EOU_TRUTHY.has(raw.trim().toLowerCase());
|
|
70
|
+
const thresholdRaw = process.env.LIVEKIT_EOU_UNLIKELY_THRESHOLD;
|
|
71
|
+
let unlikelyThreshold;
|
|
72
|
+
if (typeof thresholdRaw === "string" && thresholdRaw.trim().length > 0) {
|
|
73
|
+
const parsed = Number(thresholdRaw.trim());
|
|
74
|
+
if (Number.isFinite(parsed)) {
|
|
75
|
+
unlikelyThreshold = parsed;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return { enabled, unlikelyThreshold };
|
|
79
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Data-channel event bridge.
|
|
3
|
+
*
|
|
4
|
+
* Connects NeuroLink's event emitter to the LiveKit room's data channel: it
|
|
5
|
+
* forwards NeuroLink events (text deltas, tool start/result, HITL prompts,
|
|
6
|
+
* stream lifecycle) to the browser as small versioned envelopes, and accepts
|
|
7
|
+
* control messages (HITL responses) back, re-emitting them onto the emitter so
|
|
8
|
+
* NeuroLink's HITL manager resolves.
|
|
9
|
+
*
|
|
10
|
+
* This is the WebRTC counterpart of the chat-mode SSE controller: same event
|
|
11
|
+
* source, different transport. The browser renders the envelopes (transcript,
|
|
12
|
+
* tool status, charts, confirmation prompts).
|
|
13
|
+
*
|
|
14
|
+
* `@livekit/rtc-node` is an optional dependency, imported dynamically only for
|
|
15
|
+
* the `RoomEvent` enum value. All payloads arrive typed as `unknown` and are
|
|
16
|
+
* narrowed with runtime guards — no type assertions.
|
|
17
|
+
*
|
|
18
|
+
* See docs/features/livekit-voice-agent.md.
|
|
19
|
+
*/
|
|
20
|
+
import type { LiveKitEventBridgeHandle, LiveKitEventBridgeParams } from "../../types/index.js";
|
|
21
|
+
/**
|
|
22
|
+
* Attach the data-channel event bridge to a room.
|
|
23
|
+
*
|
|
24
|
+
* Returns a handle whose `dispose()` removes every listener and stops
|
|
25
|
+
* publishing; it is safe to call more than once.
|
|
26
|
+
*/
|
|
27
|
+
export declare function attachEventBridge(params: LiveKitEventBridgeParams): Promise<LiveKitEventBridgeHandle>;
|