@juspay/neurolink 9.50.1 → 9.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/browser/neurolink.min.js +282 -282
- package/dist/cli/commands/proxy.js +60 -15
- package/dist/cli/utils/serverUtils.d.ts +2 -1
- package/dist/cli/utils/serverUtils.js +7 -3
- package/dist/context/contextCompactor.js +2 -2
- package/dist/context/stages/slidingWindowTruncator.d.ts +1 -1
- package/dist/context/stages/slidingWindowTruncator.js +3 -3
- package/dist/core/modules/Utilities.d.ts +5 -0
- package/dist/core/modules/Utilities.js +29 -18
- package/dist/lib/context/contextCompactor.js +2 -2
- package/dist/lib/context/stages/slidingWindowTruncator.d.ts +1 -1
- package/dist/lib/context/stages/slidingWindowTruncator.js +3 -3
- package/dist/lib/core/modules/Utilities.d.ts +5 -0
- package/dist/lib/core/modules/Utilities.js +29 -18
- package/dist/lib/mcp/externalServerManager.d.ts +5 -0
- package/dist/lib/mcp/externalServerManager.js +24 -2
- package/dist/lib/neurolink.js +37 -3
- package/dist/lib/proxy/accountQuota.d.ts +6 -0
- package/dist/lib/proxy/accountQuota.js +24 -3
- package/dist/lib/proxy/proxyPaths.d.ts +25 -0
- package/dist/lib/proxy/proxyPaths.js +35 -0
- package/dist/lib/proxy/requestLogger.d.ts +1 -1
- package/dist/lib/proxy/requestLogger.js +2 -2
- package/dist/lib/services/server/ai/observability/instrumentation.js +39 -1
- package/dist/lib/types/cli.d.ts +1 -0
- package/dist/lib/types/externalMcp.d.ts +7 -0
- package/dist/mcp/externalServerManager.d.ts +5 -0
- package/dist/mcp/externalServerManager.js +24 -2
- package/dist/neurolink.js +37 -3
- package/dist/proxy/accountQuota.d.ts +6 -0
- package/dist/proxy/accountQuota.js +24 -3
- package/dist/proxy/proxyPaths.d.ts +25 -0
- package/dist/proxy/proxyPaths.js +34 -0
- package/dist/proxy/requestLogger.d.ts +1 -1
- package/dist/proxy/requestLogger.js +2 -2
- package/dist/services/server/ai/observability/instrumentation.js +39 -1
- package/dist/types/cli.d.ts +1 -0
- package/dist/types/externalMcp.d.ts +7 -0
- package/package.json +1 -1
|
@@ -26,7 +26,14 @@ const PROXY_TELEMETRY_SCRIPT_PATH = fileURLToPath(new URL("../../../scripts/obse
|
|
|
26
26
|
// =============================================================================
|
|
27
27
|
// STATE MANAGEMENT
|
|
28
28
|
// =============================================================================
|
|
29
|
-
|
|
29
|
+
let proxyStateManager = new StateFileManager("proxy-state.json");
|
|
30
|
+
/**
|
|
31
|
+
* Reinitialise the state manager with a custom base directory.
|
|
32
|
+
* Called when --dev redirects writable paths to .neurolink-dev/.
|
|
33
|
+
*/
|
|
34
|
+
function setProxyStateDir(baseDir) {
|
|
35
|
+
proxyStateManager = new StateFileManager("proxy-state.json", baseDir);
|
|
36
|
+
}
|
|
30
37
|
function saveProxyState(state) {
|
|
31
38
|
proxyStateManager.save(state);
|
|
32
39
|
}
|
|
@@ -333,12 +340,12 @@ async function loadProxyStartEnv(argv, spinner) {
|
|
|
333
340
|
process.exit(1);
|
|
334
341
|
}
|
|
335
342
|
}
|
|
336
|
-
async function createProxyNeurolinkRuntime() {
|
|
343
|
+
async function createProxyNeurolinkRuntime(logsDir) {
|
|
337
344
|
process.env.NEUROLINK_SKIP_MCP = "true";
|
|
338
345
|
const { NeuroLink } = await import("../../lib/neurolink.js");
|
|
339
346
|
const neurolink = new NeuroLink();
|
|
340
347
|
const { initRequestLogger, cleanupLogs } = await import("../../lib/proxy/requestLogger.js");
|
|
341
|
-
initRequestLogger(true);
|
|
348
|
+
initRequestLogger(true, logsDir);
|
|
342
349
|
cleanupLogs(7, 500);
|
|
343
350
|
return { neurolink, cleanupLogs };
|
|
344
351
|
}
|
|
@@ -701,7 +708,7 @@ function registerProxyShutdownHandlers(params) {
|
|
|
701
708
|
catch {
|
|
702
709
|
// non-fatal — proxy shutdown must not block on OTel
|
|
703
710
|
}
|
|
704
|
-
if (signal === "SIGINT") {
|
|
711
|
+
if (signal === "SIGINT" && !params.isDev) {
|
|
705
712
|
try {
|
|
706
713
|
const shutdownHost = params.host === "0.0.0.0" ? "localhost" : params.host;
|
|
707
714
|
await clearClaudeProxySettings(`http://${shutdownHost}:${params.port}`);
|
|
@@ -733,7 +740,11 @@ async function startProxyRuntime(params) {
|
|
|
733
740
|
port: params.port,
|
|
734
741
|
hostname: params.host,
|
|
735
742
|
});
|
|
736
|
-
|
|
743
|
+
// Skip the fail-open guard in dev mode — it monitors the proxy and clears
|
|
744
|
+
// global Claude settings on exit, which is exactly what we want to avoid.
|
|
745
|
+
const guardPid = params.argv.dev
|
|
746
|
+
? undefined
|
|
747
|
+
: spawnFailOpenGuard(params.host, params.port, process.pid);
|
|
737
748
|
const readinessHost = params.host === "0.0.0.0" ? "127.0.0.1" : params.host;
|
|
738
749
|
await waitForProxyReadiness({
|
|
739
750
|
host: readinessHost,
|
|
@@ -767,10 +778,16 @@ async function startProxyRuntime(params) {
|
|
|
767
778
|
if (params.spinner) {
|
|
768
779
|
params.spinner.succeed(chalk.green("Claude proxy started successfully"));
|
|
769
780
|
}
|
|
781
|
+
const isDev = params.argv.dev ?? false;
|
|
770
782
|
const normalizedHost = params.host === "0.0.0.0" ? "localhost" : params.host;
|
|
771
783
|
const url = `http://${normalizedHost}:${params.port}`;
|
|
772
784
|
printProxyBanner(url, params.strategy);
|
|
773
|
-
|
|
785
|
+
if (isDev) {
|
|
786
|
+
logger.always(` ${chalk.bold("Mode:")} ${chalk.magenta("dev (isolated — state in .neurolink-dev/)")}`);
|
|
787
|
+
}
|
|
788
|
+
else {
|
|
789
|
+
logger.always(` ${chalk.bold("Mode:")} ${chalk.cyan(params.passthrough ? "passthrough" : "full")}`);
|
|
790
|
+
}
|
|
774
791
|
if (params.passthrough) {
|
|
775
792
|
logger.always(chalk.yellow(" ! Passthrough mode forwards client auth directly to Anthropic"));
|
|
776
793
|
logger.always(chalk.dim(" Stored proxy OAuth/API credentials are ignored; clients need their own valid Anthropic auth."));
|
|
@@ -778,29 +795,52 @@ async function startProxyRuntime(params) {
|
|
|
778
795
|
if (params.loadedEnvFile) {
|
|
779
796
|
logger.always(` ${chalk.bold("Env File:")} ${chalk.cyan(params.loadedEnvFile)}`);
|
|
780
797
|
}
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
798
|
+
if (!isDev) {
|
|
799
|
+
try {
|
|
800
|
+
await setClaudeProxySettings(url);
|
|
801
|
+
logger.always(chalk.green(" ✓ Auto-configured Claude Code settings"));
|
|
802
|
+
logger.always(chalk.dim(" Restart Claude Code to connect through proxy"));
|
|
803
|
+
}
|
|
804
|
+
catch (error) {
|
|
805
|
+
logger.debug("[proxy] Failed to auto-configure Claude Code: " +
|
|
806
|
+
(error instanceof Error ? error.message : String(error)));
|
|
807
|
+
}
|
|
785
808
|
}
|
|
786
|
-
|
|
787
|
-
logger.
|
|
788
|
-
(error instanceof Error ? error.message : String(error)));
|
|
809
|
+
else {
|
|
810
|
+
logger.always(chalk.dim(" ⊘ Dev mode: skipping client auto-configuration"));
|
|
789
811
|
}
|
|
790
812
|
const maintenance = startProxyBackgroundMaintenance(params.cleanupLogs);
|
|
791
813
|
registerProxyShutdownHandlers({
|
|
792
814
|
server,
|
|
793
815
|
host: params.host,
|
|
794
816
|
port: params.port,
|
|
817
|
+
isDev,
|
|
795
818
|
...maintenance,
|
|
796
819
|
});
|
|
797
820
|
}
|
|
798
821
|
async function startProxyCommandHandler(argv) {
|
|
799
822
|
const spinner = argv.quiet ? null : ora("Starting Claude proxy...").start();
|
|
823
|
+
const isDev = argv.dev ?? false;
|
|
800
824
|
try {
|
|
801
|
-
|
|
825
|
+
// In dev mode: redirect writable state to .neurolink-dev/ and skip singleton check
|
|
826
|
+
let devPaths;
|
|
827
|
+
if (isDev) {
|
|
828
|
+
const { resolveProxyPaths } = await import("../../lib/proxy/proxyPaths.js");
|
|
829
|
+
devPaths = resolveProxyPaths(true);
|
|
830
|
+
setProxyStateDir(devPaths.stateDir);
|
|
831
|
+
const { initAccountQuota } = await import("../../lib/proxy/accountQuota.js");
|
|
832
|
+
initAccountQuota(devPaths.quotaFile);
|
|
833
|
+
// Ensure the dev state directory exists
|
|
834
|
+
const { mkdirSync, existsSync } = await import("fs");
|
|
835
|
+
if (!existsSync(devPaths.stateDir)) {
|
|
836
|
+
mkdirSync(devPaths.stateDir, { recursive: true, mode: 0o700 });
|
|
837
|
+
}
|
|
838
|
+
}
|
|
839
|
+
if (!isDev) {
|
|
840
|
+
await ensureProxyStartAllowed(spinner);
|
|
841
|
+
}
|
|
802
842
|
const loadedEnvFile = await loadProxyStartEnv(argv, spinner);
|
|
803
|
-
const { neurolink, cleanupLogs } = await createProxyNeurolinkRuntime();
|
|
843
|
+
const { neurolink, cleanupLogs } = await createProxyNeurolinkRuntime(devPaths?.logsDir);
|
|
804
844
|
const { proxyConfig, strategy, modelRouter, passthrough } = await loadProxyStartConfiguration(argv, spinner);
|
|
805
845
|
if (spinner) {
|
|
806
846
|
spinner.text = "Configuring server...";
|
|
@@ -904,6 +944,11 @@ export const proxyStartCommand = {
|
|
|
904
944
|
type: "boolean",
|
|
905
945
|
default: false,
|
|
906
946
|
description: "Run in transparent passthrough mode (no retry, no rotation, no polyfill)",
|
|
947
|
+
})
|
|
948
|
+
.option("dev", {
|
|
949
|
+
type: "boolean",
|
|
950
|
+
default: false,
|
|
951
|
+
description: "Run in isolated dev mode — state files scoped to .neurolink-dev/ in cwd, no client auto-configuration, no singleton check",
|
|
907
952
|
})
|
|
908
953
|
.example("neurolink proxy start", "Start proxy on default port 55669 with fill-first strategy")
|
|
909
954
|
.example("neurolink proxy start -p 8080 -s fill-first", "Start proxy on port 8080 with fill-first")
|
|
@@ -44,8 +44,9 @@ export declare class StateFileManager<T> {
|
|
|
44
44
|
/**
|
|
45
45
|
* Create a new state file manager
|
|
46
46
|
* @param filename - Name of the state file (e.g., "serve-state.json")
|
|
47
|
+
* @param baseDir - Optional base directory (defaults to ~/.neurolink)
|
|
47
48
|
*/
|
|
48
|
-
constructor(filename: string);
|
|
49
|
+
constructor(filename: string, baseDir?: string);
|
|
49
50
|
/**
|
|
50
51
|
* Get the full path to the state file
|
|
51
52
|
*/
|
|
@@ -92,9 +92,10 @@ export class StateFileManager {
|
|
|
92
92
|
/**
|
|
93
93
|
* Create a new state file manager
|
|
94
94
|
* @param filename - Name of the state file (e.g., "serve-state.json")
|
|
95
|
+
* @param baseDir - Optional base directory (defaults to ~/.neurolink)
|
|
95
96
|
*/
|
|
96
|
-
constructor(filename) {
|
|
97
|
-
this.filePath = path.join(getNeuroLinkDir(), filename);
|
|
97
|
+
constructor(filename, baseDir) {
|
|
98
|
+
this.filePath = path.join(baseDir ?? getNeuroLinkDir(), filename);
|
|
98
99
|
}
|
|
99
100
|
/**
|
|
100
101
|
* Get the full path to the state file
|
|
@@ -107,7 +108,10 @@ export class StateFileManager {
|
|
|
107
108
|
* @param state - State object to save
|
|
108
109
|
*/
|
|
109
110
|
save(state) {
|
|
110
|
-
|
|
111
|
+
const dir = path.dirname(this.filePath);
|
|
112
|
+
if (!fs.existsSync(dir)) {
|
|
113
|
+
fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
|
|
114
|
+
}
|
|
111
115
|
fs.writeFileSync(this.filePath, JSON.stringify(state, null, 2));
|
|
112
116
|
}
|
|
113
117
|
/**
|
|
@@ -23,7 +23,7 @@ const DEFAULT_CONFIG = {
|
|
|
23
23
|
enableSummarize: true,
|
|
24
24
|
enableTruncate: true,
|
|
25
25
|
pruneProtectTokens: 40_000,
|
|
26
|
-
pruneMinimumSavings:
|
|
26
|
+
pruneMinimumSavings: 500,
|
|
27
27
|
pruneProtectedTools: ["skill"],
|
|
28
28
|
summarizationProvider: "vertex",
|
|
29
29
|
summarizationModel: "gemini-2.5-flash",
|
|
@@ -151,7 +151,7 @@ export class ContextCompactor {
|
|
|
151
151
|
targetTokens: targetTokens,
|
|
152
152
|
provider: provider,
|
|
153
153
|
adaptiveBuffer: 0.15,
|
|
154
|
-
maxIterations:
|
|
154
|
+
maxIterations: 6,
|
|
155
155
|
});
|
|
156
156
|
if (truncResult.truncated) {
|
|
157
157
|
currentMessages = truncResult.messages;
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Features:
|
|
9
9
|
* - Adaptive truncation (PERF-001): calculates fraction from actual overage
|
|
10
|
-
* instead of fixed 50%, with iterative refinement up to
|
|
10
|
+
* instead of fixed 50%, with iterative refinement up to 6 passes.
|
|
11
11
|
* - Small conversation handling (BUG-005): for <= 4 messages, truncates
|
|
12
12
|
* message content proportionally instead of returning no-op.
|
|
13
13
|
*/
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Features:
|
|
9
9
|
* - Adaptive truncation (PERF-001): calculates fraction from actual overage
|
|
10
|
-
* instead of fixed 50%, with iterative refinement up to
|
|
10
|
+
* instead of fixed 50%, with iterative refinement up to 6 passes.
|
|
11
11
|
* - Small conversation handling (BUG-005): for <= 4 messages, truncates
|
|
12
12
|
* message content proportionally instead of returning no-op.
|
|
13
13
|
*/
|
|
@@ -153,8 +153,8 @@ export function truncateWithSlidingWindow(messages, config) {
|
|
|
153
153
|
messagesRemoved: evenRemoveCount,
|
|
154
154
|
};
|
|
155
155
|
}
|
|
156
|
-
// Not enough -- increase fraction by
|
|
157
|
-
currentFraction = Math.min(0.95, currentFraction + 0.
|
|
156
|
+
// Not enough -- increase fraction by 10% for finer-grained escalation
|
|
157
|
+
currentFraction = Math.min(0.95, currentFraction + 0.1);
|
|
158
158
|
continue;
|
|
159
159
|
}
|
|
160
160
|
// No token targets -- single-pass with calculated fraction
|
|
@@ -57,6 +57,11 @@ export declare class Utilities {
|
|
|
57
57
|
* Supports number or string formats (e.g., '30s', '2m', '1h')
|
|
58
58
|
*/
|
|
59
59
|
getTimeout(options: TextGenerationOptions | StreamOptions): number;
|
|
60
|
+
/**
|
|
61
|
+
* Get timeout scaled by estimated input token count.
|
|
62
|
+
* For large contexts (>100K tokens), increase timeout proportionally.
|
|
63
|
+
*/
|
|
64
|
+
getContextAwareTimeout(options: TextGenerationOptions | StreamOptions, estimatedTokens?: number): number;
|
|
60
65
|
/**
|
|
61
66
|
* Check if a schema is a Zod schema
|
|
62
67
|
*/
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
import { z } from "zod";
|
|
20
20
|
import { logger } from "../../utils/logger.js";
|
|
21
21
|
import { getSafeMaxTokens } from "../../utils/tokenLimits.js";
|
|
22
|
-
import { TimeoutError } from "../../utils/timeout.js";
|
|
22
|
+
import { TimeoutError, getDefaultTimeout, parseTimeout, } from "../../utils/timeout.js";
|
|
23
23
|
import { validateStreamOptions as validateStreamOpts, validateTextGenerationOptions, ValidationError, createValidationSummary, } from "../../utils/parameterValidation.js";
|
|
24
24
|
import { STEP_LIMITS } from "../constants.js";
|
|
25
25
|
/**
|
|
@@ -157,25 +157,36 @@ export class Utilities {
|
|
|
157
157
|
* Supports number or string formats (e.g., '30s', '2m', '1h')
|
|
158
158
|
*/
|
|
159
159
|
getTimeout(options) {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
// Parse string timeout (e.g., '30s', '2m', '1h')
|
|
167
|
-
const timeoutStr = options.timeout.toLowerCase();
|
|
168
|
-
const value = parseInt(timeoutStr);
|
|
169
|
-
if (timeoutStr.includes("h")) {
|
|
170
|
-
return value * 60 * 60 * 1000;
|
|
171
|
-
}
|
|
172
|
-
else if (timeoutStr.includes("m")) {
|
|
173
|
-
return value * 60 * 1000;
|
|
160
|
+
// If caller specified a timeout, use it (supports number ms and string formats)
|
|
161
|
+
if (options.timeout !== undefined && options.timeout !== null) {
|
|
162
|
+
const parsed = parseTimeout(options.timeout);
|
|
163
|
+
if (parsed !== undefined) {
|
|
164
|
+
return parsed;
|
|
165
|
+
}
|
|
174
166
|
}
|
|
175
|
-
|
|
176
|
-
|
|
167
|
+
// Use per-provider default (e.g., vertex=60s, ollama=5m) instead of global 30s.
|
|
168
|
+
// Always use "generate" operation here — streaming operations have their own
|
|
169
|
+
// longer timeout (DEFAULT_TIMEOUTS.streaming = 2m) applied by the streaming
|
|
170
|
+
// infrastructure in BaseProvider.stream(). Both TextGenerationOptions and
|
|
171
|
+
// StreamOptions share the same `input` property, so there is no reliable
|
|
172
|
+
// discriminator to detect streaming at this level.
|
|
173
|
+
const providerDefault = parseTimeout(getDefaultTimeout(this.providerName, "generate"));
|
|
174
|
+
return providerDefault ?? this.defaultTimeout;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Get timeout scaled by estimated input token count.
|
|
178
|
+
* For large contexts (>100K tokens), increase timeout proportionally.
|
|
179
|
+
*/
|
|
180
|
+
getContextAwareTimeout(options, estimatedTokens) {
|
|
181
|
+
const baseTimeout = this.getTimeout(options);
|
|
182
|
+
if (!estimatedTokens || estimatedTokens <= 100_000) {
|
|
183
|
+
return baseTimeout;
|
|
177
184
|
}
|
|
178
|
-
|
|
185
|
+
// Scale: >100K → 1.5x, >200K → 2x, >300K → 2.5x (capped at 4x)
|
|
186
|
+
// Use (estimatedTokens - 1) so exact multiples stay in the lower tier
|
|
187
|
+
// (e.g., 100_000 → 1x, 100_001 → 1.5x)
|
|
188
|
+
const scale = 1 + Math.floor((estimatedTokens - 1) / 100_000) * 0.5;
|
|
189
|
+
return Math.round(baseTimeout * Math.min(scale, 4));
|
|
179
190
|
}
|
|
180
191
|
/**
|
|
181
192
|
* Check if a schema is a Zod schema
|
|
@@ -23,7 +23,7 @@ const DEFAULT_CONFIG = {
|
|
|
23
23
|
enableSummarize: true,
|
|
24
24
|
enableTruncate: true,
|
|
25
25
|
pruneProtectTokens: 40_000,
|
|
26
|
-
pruneMinimumSavings:
|
|
26
|
+
pruneMinimumSavings: 500,
|
|
27
27
|
pruneProtectedTools: ["skill"],
|
|
28
28
|
summarizationProvider: "vertex",
|
|
29
29
|
summarizationModel: "gemini-2.5-flash",
|
|
@@ -151,7 +151,7 @@ export class ContextCompactor {
|
|
|
151
151
|
targetTokens: targetTokens,
|
|
152
152
|
provider: provider,
|
|
153
153
|
adaptiveBuffer: 0.15,
|
|
154
|
-
maxIterations:
|
|
154
|
+
maxIterations: 6,
|
|
155
155
|
});
|
|
156
156
|
if (truncResult.truncated) {
|
|
157
157
|
currentMessages = truncResult.messages;
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Features:
|
|
9
9
|
* - Adaptive truncation (PERF-001): calculates fraction from actual overage
|
|
10
|
-
* instead of fixed 50%, with iterative refinement up to
|
|
10
|
+
* instead of fixed 50%, with iterative refinement up to 6 passes.
|
|
11
11
|
* - Small conversation handling (BUG-005): for <= 4 messages, truncates
|
|
12
12
|
* message content proportionally instead of returning no-op.
|
|
13
13
|
*/
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
*
|
|
8
8
|
* Features:
|
|
9
9
|
* - Adaptive truncation (PERF-001): calculates fraction from actual overage
|
|
10
|
-
* instead of fixed 50%, with iterative refinement up to
|
|
10
|
+
* instead of fixed 50%, with iterative refinement up to 6 passes.
|
|
11
11
|
* - Small conversation handling (BUG-005): for <= 4 messages, truncates
|
|
12
12
|
* message content proportionally instead of returning no-op.
|
|
13
13
|
*/
|
|
@@ -153,8 +153,8 @@ export function truncateWithSlidingWindow(messages, config) {
|
|
|
153
153
|
messagesRemoved: evenRemoveCount,
|
|
154
154
|
};
|
|
155
155
|
}
|
|
156
|
-
// Not enough -- increase fraction by
|
|
157
|
-
currentFraction = Math.min(0.95, currentFraction + 0.
|
|
156
|
+
// Not enough -- increase fraction by 10% for finer-grained escalation
|
|
157
|
+
currentFraction = Math.min(0.95, currentFraction + 0.1);
|
|
158
158
|
continue;
|
|
159
159
|
}
|
|
160
160
|
// No token targets -- single-pass with calculated fraction
|
|
@@ -57,6 +57,11 @@ export declare class Utilities {
|
|
|
57
57
|
* Supports number or string formats (e.g., '30s', '2m', '1h')
|
|
58
58
|
*/
|
|
59
59
|
getTimeout(options: TextGenerationOptions | StreamOptions): number;
|
|
60
|
+
/**
|
|
61
|
+
* Get timeout scaled by estimated input token count.
|
|
62
|
+
* For large contexts (>100K tokens), increase timeout proportionally.
|
|
63
|
+
*/
|
|
64
|
+
getContextAwareTimeout(options: TextGenerationOptions | StreamOptions, estimatedTokens?: number): number;
|
|
60
65
|
/**
|
|
61
66
|
* Check if a schema is a Zod schema
|
|
62
67
|
*/
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
import { z } from "zod";
|
|
20
20
|
import { logger } from "../../utils/logger.js";
|
|
21
21
|
import { getSafeMaxTokens } from "../../utils/tokenLimits.js";
|
|
22
|
-
import { TimeoutError } from "../../utils/timeout.js";
|
|
22
|
+
import { TimeoutError, getDefaultTimeout, parseTimeout, } from "../../utils/timeout.js";
|
|
23
23
|
import { validateStreamOptions as validateStreamOpts, validateTextGenerationOptions, ValidationError, createValidationSummary, } from "../../utils/parameterValidation.js";
|
|
24
24
|
import { STEP_LIMITS } from "../constants.js";
|
|
25
25
|
/**
|
|
@@ -157,25 +157,36 @@ export class Utilities {
|
|
|
157
157
|
* Supports number or string formats (e.g., '30s', '2m', '1h')
|
|
158
158
|
*/
|
|
159
159
|
getTimeout(options) {
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
// Parse string timeout (e.g., '30s', '2m', '1h')
|
|
167
|
-
const timeoutStr = options.timeout.toLowerCase();
|
|
168
|
-
const value = parseInt(timeoutStr);
|
|
169
|
-
if (timeoutStr.includes("h")) {
|
|
170
|
-
return value * 60 * 60 * 1000;
|
|
171
|
-
}
|
|
172
|
-
else if (timeoutStr.includes("m")) {
|
|
173
|
-
return value * 60 * 1000;
|
|
160
|
+
// If caller specified a timeout, use it (supports number ms and string formats)
|
|
161
|
+
if (options.timeout !== undefined && options.timeout !== null) {
|
|
162
|
+
const parsed = parseTimeout(options.timeout);
|
|
163
|
+
if (parsed !== undefined) {
|
|
164
|
+
return parsed;
|
|
165
|
+
}
|
|
174
166
|
}
|
|
175
|
-
|
|
176
|
-
|
|
167
|
+
// Use per-provider default (e.g., vertex=60s, ollama=5m) instead of global 30s.
|
|
168
|
+
// Always use "generate" operation here — streaming operations have their own
|
|
169
|
+
// longer timeout (DEFAULT_TIMEOUTS.streaming = 2m) applied by the streaming
|
|
170
|
+
// infrastructure in BaseProvider.stream(). Both TextGenerationOptions and
|
|
171
|
+
// StreamOptions share the same `input` property, so there is no reliable
|
|
172
|
+
// discriminator to detect streaming at this level.
|
|
173
|
+
const providerDefault = parseTimeout(getDefaultTimeout(this.providerName, "generate"));
|
|
174
|
+
return providerDefault ?? this.defaultTimeout;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Get timeout scaled by estimated input token count.
|
|
178
|
+
* For large contexts (>100K tokens), increase timeout proportionally.
|
|
179
|
+
*/
|
|
180
|
+
getContextAwareTimeout(options, estimatedTokens) {
|
|
181
|
+
const baseTimeout = this.getTimeout(options);
|
|
182
|
+
if (!estimatedTokens || estimatedTokens <= 100_000) {
|
|
183
|
+
return baseTimeout;
|
|
177
184
|
}
|
|
178
|
-
|
|
185
|
+
// Scale: >100K → 1.5x, >200K → 2x, >300K → 2.5x (capped at 4x)
|
|
186
|
+
// Use (estimatedTokens - 1) so exact multiples stay in the lower tier
|
|
187
|
+
// (e.g., 100_000 → 1x, 100_001 → 1.5x)
|
|
188
|
+
const scale = 1 + Math.floor((estimatedTokens - 1) / 100_000) * 0.5;
|
|
189
|
+
return Math.round(baseTimeout * Math.min(scale, 4));
|
|
179
190
|
}
|
|
180
191
|
/**
|
|
181
192
|
* Check if a schema is a Zod schema
|
|
@@ -36,6 +36,11 @@ export declare class ExternalServerManager extends EventEmitter {
|
|
|
36
36
|
* Get current HITL manager
|
|
37
37
|
*/
|
|
38
38
|
getHITLManager(): HITLManager | undefined;
|
|
39
|
+
/**
|
|
40
|
+
* Resolve the human-readable server name for an event payload.
|
|
41
|
+
* Falls back to serverId if the instance or config.name isn't available.
|
|
42
|
+
*/
|
|
43
|
+
getServerName(serverId: string): string;
|
|
39
44
|
/**
|
|
40
45
|
* Load MCP server configurations from .mcp-config.json file with parallel loading support
|
|
41
46
|
* Automatically registers servers found in the configuration
|
|
@@ -194,10 +194,16 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
194
194
|
this.toolDiscovery = new ToolDiscoveryService();
|
|
195
195
|
// Forward tool discovery events
|
|
196
196
|
this.toolDiscovery.on("toolRegistered", (event) => {
|
|
197
|
-
this.emit("toolDiscovered",
|
|
197
|
+
this.emit("toolDiscovered", {
|
|
198
|
+
...event,
|
|
199
|
+
serverName: this.getServerName(event.serverId),
|
|
200
|
+
});
|
|
198
201
|
});
|
|
199
202
|
this.toolDiscovery.on("toolUnregistered", (event) => {
|
|
200
|
-
this.emit("toolRemoved",
|
|
203
|
+
this.emit("toolRemoved", {
|
|
204
|
+
...event,
|
|
205
|
+
serverName: this.getServerName(event.serverId),
|
|
206
|
+
});
|
|
201
207
|
});
|
|
202
208
|
// Handle process cleanup
|
|
203
209
|
process.on("SIGINT", () => this.shutdown());
|
|
@@ -223,6 +229,14 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
223
229
|
getHITLManager() {
|
|
224
230
|
return this.hitlManager;
|
|
225
231
|
}
|
|
232
|
+
/**
|
|
233
|
+
* Resolve the human-readable server name for an event payload.
|
|
234
|
+
* Falls back to serverId if the instance or config.name isn't available.
|
|
235
|
+
*/
|
|
236
|
+
getServerName(serverId) {
|
|
237
|
+
const instance = this.servers.get(serverId);
|
|
238
|
+
return instance?.config?.name || serverId;
|
|
239
|
+
}
|
|
226
240
|
/**
|
|
227
241
|
* Load MCP server configurations from .mcp-config.json file with parallel loading support
|
|
228
242
|
* Automatically registers servers found in the configuration
|
|
@@ -712,6 +726,8 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
712
726
|
};
|
|
713
727
|
}
|
|
714
728
|
mcpLogger.info(`[ExternalServerManager] Removing server: ${serverId}`);
|
|
729
|
+
// Capture name before deletion removes the instance
|
|
730
|
+
const serverName = this.getServerName(serverId);
|
|
715
731
|
// Stop the server
|
|
716
732
|
await this.stopServer(serverId);
|
|
717
733
|
// Remove from registry
|
|
@@ -719,6 +735,7 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
719
735
|
// Emit event
|
|
720
736
|
this.emit("disconnected", {
|
|
721
737
|
serverId,
|
|
738
|
+
serverName,
|
|
722
739
|
reason: "Manually removed",
|
|
723
740
|
timestamp: new Date(),
|
|
724
741
|
});
|
|
@@ -816,6 +833,7 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
816
833
|
// Emit connected event
|
|
817
834
|
this.emit("connected", {
|
|
818
835
|
serverId,
|
|
836
|
+
serverName: this.getServerName(serverId),
|
|
819
837
|
toolCount: instance.toolsMap.size,
|
|
820
838
|
timestamp: new Date(),
|
|
821
839
|
});
|
|
@@ -921,6 +939,7 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
921
939
|
// Emit status change event
|
|
922
940
|
this.emit("statusChanged", {
|
|
923
941
|
serverId,
|
|
942
|
+
serverName: this.getServerName(serverId),
|
|
924
943
|
oldStatus,
|
|
925
944
|
newStatus,
|
|
926
945
|
timestamp: new Date(),
|
|
@@ -941,6 +960,7 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
941
960
|
// Emit failed event
|
|
942
961
|
this.emit("failed", {
|
|
943
962
|
serverId,
|
|
963
|
+
serverName: this.getServerName(serverId),
|
|
944
964
|
error: error.message,
|
|
945
965
|
timestamp: new Date(),
|
|
946
966
|
});
|
|
@@ -965,6 +985,7 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
965
985
|
// Emit disconnected event
|
|
966
986
|
this.emit("disconnected", {
|
|
967
987
|
serverId,
|
|
988
|
+
serverName: this.getServerName(serverId),
|
|
968
989
|
reason,
|
|
969
990
|
timestamp: new Date(),
|
|
970
991
|
});
|
|
@@ -1078,6 +1099,7 @@ export class ExternalServerManager extends EventEmitter {
|
|
|
1078
1099
|
// Emit health check event
|
|
1079
1100
|
this.emit("healthCheck", {
|
|
1080
1101
|
serverId,
|
|
1102
|
+
serverName: this.getServerName(serverId),
|
|
1081
1103
|
health,
|
|
1082
1104
|
timestamp: new Date(),
|
|
1083
1105
|
});
|
package/dist/lib/neurolink.js
CHANGED
|
@@ -50,7 +50,7 @@ import { createMemoryRetrievalTools } from "./memory/memoryRetrievalTools.js";
|
|
|
50
50
|
import { getMetricsAggregator, MetricsAggregator, } from "./observability/metricsAggregator.js";
|
|
51
51
|
import { SpanStatus, SpanType } from "./observability/types/spanTypes.js";
|
|
52
52
|
import { SpanSerializer } from "./observability/utils/spanSerializer.js";
|
|
53
|
-
import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
|
|
53
|
+
import { flushOpenTelemetry, getLangfuseHealthStatus, initializeOpenTelemetry, isOpenTelemetryInitialized, runWithCurrentLangfuseContext, setLangfuseContext, shutdownOpenTelemetry, } from "./services/server/ai/observability/instrumentation.js";
|
|
54
54
|
import { TaskManager } from "./tasks/taskManager.js";
|
|
55
55
|
import { createTaskTools } from "./tasks/tools/taskTools.js";
|
|
56
56
|
import { ATTR } from "./telemetry/attributes.js";
|
|
@@ -1129,7 +1129,10 @@ Current user's request: ${currentInput}`;
|
|
|
1129
1129
|
* Supports additional users with per-user prompt and maxWords overrides.
|
|
1130
1130
|
*/
|
|
1131
1131
|
storeMemoryInBackground(originalPrompt, responseContent, userId, additionalUsers) {
|
|
1132
|
-
setImmediate
|
|
1132
|
+
// Preserve AsyncLocalStorage context across setImmediate boundary so that
|
|
1133
|
+
// memory writes appear under the originating Langfuse trace instead of
|
|
1134
|
+
// becoming orphan spans.
|
|
1135
|
+
const wrappedMemoryWrite = runWithCurrentLangfuseContext(async () => {
|
|
1133
1136
|
try {
|
|
1134
1137
|
const client = this.ensureMemoryReady();
|
|
1135
1138
|
if (!client) {
|
|
@@ -1145,12 +1148,18 @@ Current user's request: ${currentInput}`;
|
|
|
1145
1148
|
: undefined;
|
|
1146
1149
|
writeOps.push(client.add(user.userId, content, addOptions));
|
|
1147
1150
|
}
|
|
1148
|
-
|
|
1151
|
+
// withTimeout races against Promise.all — if the timeout fires, the
|
|
1152
|
+
// await resolves with an error but the underlying client.add() calls
|
|
1153
|
+
// may still complete in the background. This is acceptable: the memory
|
|
1154
|
+
// client API (Mem0) doesn't support AbortSignal, and these are
|
|
1155
|
+
// fire-and-forget background writes where a stale completion is harmless.
|
|
1156
|
+
await withTimeout(Promise.all(writeOps), 30_000, new Error("Background memory write timed out after 30s"));
|
|
1149
1157
|
}
|
|
1150
1158
|
catch (error) {
|
|
1151
1159
|
logger.warn("Memory storage failed:", error);
|
|
1152
1160
|
}
|
|
1153
1161
|
});
|
|
1162
|
+
setImmediate(wrappedMemoryWrite);
|
|
1154
1163
|
}
|
|
1155
1164
|
/**
|
|
1156
1165
|
* Set up HITL event forwarding to main emitter
|
|
@@ -3723,6 +3732,21 @@ Current user's request: ${currentInput}`;
|
|
|
3723
3732
|
conversationMessageCount: conversationMessages.length,
|
|
3724
3733
|
shouldCompact: budgetResult.shouldCompact,
|
|
3725
3734
|
});
|
|
3735
|
+
// Scale timeout for large contexts if caller didn't set one explicitly.
|
|
3736
|
+
// Providers read options.timeout via getTimeout(), so setting it here
|
|
3737
|
+
// propagates to any downstream provider call.
|
|
3738
|
+
if (options.timeout === undefined &&
|
|
3739
|
+
budgetResult.estimatedInputTokens > 100_000) {
|
|
3740
|
+
// >100K → 1.5x, >200K → 2x, >300K → 2.5x (capped at 4x) of 60s base
|
|
3741
|
+
const scale = 1 + Math.floor((budgetResult.estimatedInputTokens - 1) / 100_000) * 0.5;
|
|
3742
|
+
const scaledMs = Math.round(60_000 * Math.min(scale, 4));
|
|
3743
|
+
options.timeout = scaledMs;
|
|
3744
|
+
logger.info("[TokenBudget] Scaled timeout for large context", {
|
|
3745
|
+
requestId,
|
|
3746
|
+
estimatedTokens: budgetResult.estimatedInputTokens,
|
|
3747
|
+
scaledTimeoutMs: scaledMs,
|
|
3748
|
+
});
|
|
3749
|
+
}
|
|
3726
3750
|
const compactionSessionId = this.getCompactionSessionId(options);
|
|
3727
3751
|
const lastCompactionCount = this.lastCompactionMessageCount.get(compactionSessionId) ?? 0;
|
|
3728
3752
|
if (!budgetResult.shouldCompact ||
|
|
@@ -3798,6 +3822,8 @@ Current user's request: ${currentInput}`;
|
|
|
3798
3822
|
toolDefinitions: availableTools,
|
|
3799
3823
|
});
|
|
3800
3824
|
if (!finalBudget.withinBudget) {
|
|
3825
|
+
// Clear watermark so handleContextOverflow recovery can re-compact
|
|
3826
|
+
this.lastCompactionMessageCount.delete(compactionSessionId);
|
|
3801
3827
|
throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
|
|
3802
3828
|
`Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
|
|
3803
3829
|
`Budget: ${finalBudget.availableInputTokens} tokens. ` +
|
|
@@ -3993,6 +4019,8 @@ Current user's request: ${currentInput}`;
|
|
|
3993
4019
|
: undefined,
|
|
3994
4020
|
});
|
|
3995
4021
|
if (!finalBudget.withinBudget) {
|
|
4022
|
+
// Clear watermark so handleContextOverflow recovery can re-compact
|
|
4023
|
+
this.lastCompactionMessageCount.delete(dpgCompactionSessionId);
|
|
3996
4024
|
throw new ContextBudgetExceededError(`Context exceeds model budget after all compaction stages. ` +
|
|
3997
4025
|
`Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
|
|
3998
4026
|
`Budget: ${finalBudget.availableInputTokens} tokens.`, {
|
|
@@ -5016,6 +5044,8 @@ Current user's request: ${currentInput}`;
|
|
|
5016
5044
|
toolDefinitions: availableTools,
|
|
5017
5045
|
});
|
|
5018
5046
|
if (!finalBudget.withinBudget) {
|
|
5047
|
+
// Clear watermark so handleContextOverflow recovery can re-compact
|
|
5048
|
+
this.lastCompactionMessageCount.delete(streamCompactionSessionId);
|
|
5019
5049
|
throw new ContextBudgetExceededError(`Stream context exceeds model budget after all compaction stages. ` +
|
|
5020
5050
|
`Estimated: ${finalBudget.estimatedInputTokens} tokens, ` +
|
|
5021
5051
|
`Budget: ${finalBudget.availableInputTokens} tokens.`, {
|
|
@@ -7508,6 +7538,7 @@ Current user's request: ${currentInput}`;
|
|
|
7508
7538
|
// Emit server added event
|
|
7509
7539
|
this.emitter.emit("externalMCP:serverAdded", {
|
|
7510
7540
|
serverId,
|
|
7541
|
+
serverName: config.name || serverId,
|
|
7511
7542
|
config,
|
|
7512
7543
|
toolCount: result.metadata?.toolsDiscovered || 0,
|
|
7513
7544
|
timestamp: Date.now(),
|
|
@@ -7535,12 +7566,15 @@ Current user's request: ${currentInput}`;
|
|
|
7535
7566
|
this.invalidateToolCache(); // Invalidate cache when an external server is removed
|
|
7536
7567
|
try {
|
|
7537
7568
|
mcpLogger.info(`[NeuroLink] Removing external MCP server: ${serverId}`);
|
|
7569
|
+
// Capture the configured name before removal destroys the instance
|
|
7570
|
+
const serverName = this.externalServerManager.getServerName(serverId);
|
|
7538
7571
|
const result = await this.externalServerManager.removeServer(serverId);
|
|
7539
7572
|
if (result.success) {
|
|
7540
7573
|
mcpLogger.info(`[NeuroLink] External MCP server removed successfully: ${serverId}`);
|
|
7541
7574
|
// Emit server removed event
|
|
7542
7575
|
this.emitter.emit("externalMCP:serverRemoved", {
|
|
7543
7576
|
serverId,
|
|
7577
|
+
serverName,
|
|
7544
7578
|
timestamp: Date.now(),
|
|
7545
7579
|
});
|
|
7546
7580
|
}
|