@juspay/neurolink 9.70.0 → 9.70.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/dist/browser/neurolink.min.js +355 -347
- package/dist/core/modules/GenerationHandler.js +75 -23
- package/dist/core/modules/structuredOutputPolicy.d.ts +28 -0
- package/dist/core/modules/structuredOutputPolicy.js +50 -0
- package/dist/lib/core/modules/GenerationHandler.js +75 -23
- package/dist/lib/core/modules/structuredOutputPolicy.d.ts +28 -0
- package/dist/lib/core/modules/structuredOutputPolicy.js +51 -0
- package/dist/lib/neurolink.js +58 -0
- package/dist/lib/providers/anthropic.js +34 -7
- package/dist/lib/providers/googleVertex.js +17 -2
- package/dist/lib/types/generate.d.ts +47 -19
- package/dist/lib/types/utilities.d.ts +16 -0
- package/dist/lib/utils/json/coerce.d.ts +10 -0
- package/dist/lib/utils/json/coerce.js +141 -0
- package/dist/lib/utils/json/extract.d.ts +10 -0
- package/dist/lib/utils/json/extract.js +61 -11
- package/dist/lib/utils/tokenLimits.d.ts +20 -0
- package/dist/lib/utils/tokenLimits.js +55 -0
- package/dist/neurolink.js +58 -0
- package/dist/providers/anthropic.js +34 -7
- package/dist/providers/googleVertex.js +17 -2
- package/dist/types/generate.d.ts +47 -19
- package/dist/types/utilities.d.ts +16 -0
- package/dist/utils/json/coerce.d.ts +10 -0
- package/dist/utils/json/coerce.js +140 -0
- package/dist/utils/json/extract.d.ts +10 -0
- package/dist/utils/json/extract.js +61 -11
- package/dist/utils/tokenLimits.d.ts +20 -0
- package/dist/utils/tokenLimits.js +55 -0
- package/package.json +4 -1
package/dist/neurolink.js
CHANGED
|
@@ -66,6 +66,7 @@ import { CircuitBreaker, ERROR_CODES, ErrorFactory, isAbortError, isRetriableErr
|
|
|
66
66
|
import { hasLifecycleErrorFired, markLifecycleErrorFired, } from "./utils/lifecycleCallbacks.js";
|
|
67
67
|
import { resolveLifecycleTimeoutMs } from "./utils/lifecycleTimeout.js";
|
|
68
68
|
import { cloneOptionsForCallIsolation } from "./utils/cloneOptions.js";
|
|
69
|
+
import { coerceJsonToSchema } from "./utils/json/coerce.js";
|
|
69
70
|
// Factory processing imports
|
|
70
71
|
import { createCleanStreamOptions, enhanceTextGenerationOptions, processFactoryOptions, processStreamingFactoryOptions, validateFactoryConfig, } from "./utils/factoryProcessing.js";
|
|
71
72
|
import { logger, mcpLogger } from "./utils/logger.js";
|
|
@@ -3345,6 +3346,60 @@ Current user's request: ${currentInput}`;
|
|
|
3345
3346
|
}
|
|
3346
3347
|
finalizeGenerateRequestResult(params) {
|
|
3347
3348
|
const { generateSpan, options, textOptions, textResult, factoryResult, originalPrompt, startTime, } = params;
|
|
3349
|
+
// Provider-agnostic JSON coercion for schema requests. Structured-output
|
|
3350
|
+
// enforcement makes valid JSON the overwhelming case; for every other
|
|
3351
|
+
// provider path — including generate() overrides (Vertex, Anthropic,
|
|
3352
|
+
// Bedrock, Google AI Studio) — object/array roots are recovered here via
|
|
3353
|
+
// balanced-scan + jsonrepair and scalar JSON roots via plain JSON.parse,
|
|
3354
|
+
// with the parsed value exposed as `structuredData`. If nothing
|
|
3355
|
+
// JSON-shaped is recoverable (pure prose), the raw text is returned,
|
|
3356
|
+
// `structuredData` stays undefined, and a WARN makes the case observable.
|
|
3357
|
+
// Runs BEFORE the end-of-generation emits below so event consumers see
|
|
3358
|
+
// the same coerced content/structuredData the caller receives.
|
|
3359
|
+
if (textOptions.schema &&
|
|
3360
|
+
textResult.structuredData === undefined &&
|
|
3361
|
+
typeof textResult.content === "string") {
|
|
3362
|
+
const coerced = coerceJsonToSchema(textResult.content, textOptions.schema);
|
|
3363
|
+
if (coerced) {
|
|
3364
|
+
textResult.content = coerced.content;
|
|
3365
|
+
textResult.structuredData = coerced.structuredData;
|
|
3366
|
+
if (coerced.repaired) {
|
|
3367
|
+
textResult.jsonRepaired = true;
|
|
3368
|
+
}
|
|
3369
|
+
if (coerced.truncated) {
|
|
3370
|
+
textResult.jsonTruncated = true;
|
|
3371
|
+
}
|
|
3372
|
+
}
|
|
3373
|
+
else {
|
|
3374
|
+
try {
|
|
3375
|
+
const scalar = JSON.parse(textResult.content);
|
|
3376
|
+
if (scalar !== null && scalar !== undefined) {
|
|
3377
|
+
textResult.structuredData = scalar;
|
|
3378
|
+
}
|
|
3379
|
+
}
|
|
3380
|
+
catch {
|
|
3381
|
+
logger.warn("[NeuroLink] schema requested but no JSON could be recovered from model output; returning raw text", { provider: textResult.provider, model: textResult.model });
|
|
3382
|
+
}
|
|
3383
|
+
}
|
|
3384
|
+
}
|
|
3385
|
+
// Surface truncation when a schema was requested: either the provider
|
|
3386
|
+
// reported finishReason="length" or the recovered JSON came from an
|
|
3387
|
+
// unclosed span. Either way `structuredData` may be incomplete — warn at
|
|
3388
|
+
// info level so it is observable in production (not just debug logs).
|
|
3389
|
+
if (textOptions.schema) {
|
|
3390
|
+
if (textResult.finishReason === "length") {
|
|
3391
|
+
textResult.jsonTruncated = true;
|
|
3392
|
+
}
|
|
3393
|
+
if (textResult.jsonTruncated) {
|
|
3394
|
+
logger.warn("[NeuroLink] Structured output may be truncated (finishReason=length or unclosed JSON); " +
|
|
3395
|
+
"increase maxTokens to fit the full response.", {
|
|
3396
|
+
provider: textResult.provider,
|
|
3397
|
+
model: textResult.model,
|
|
3398
|
+
finishReason: textResult.finishReason,
|
|
3399
|
+
outputTokens: textResult.usage?.output,
|
|
3400
|
+
});
|
|
3401
|
+
}
|
|
3402
|
+
}
|
|
3348
3403
|
// Skip the top-level `generation:end` emission when the provider already
|
|
3349
3404
|
// emitted it from its native generate path (Vertex / Google AI Studio).
|
|
3350
3405
|
// Without this guard, native-path providers would surface TWO events
|
|
@@ -3378,7 +3433,10 @@ Current user's request: ${currentInput}`;
|
|
|
3378
3433
|
this.emitter.emit("message", `Generation completed in ${Date.now() - startTime}ms`);
|
|
3379
3434
|
const generateResult = {
|
|
3380
3435
|
content: textResult.content,
|
|
3436
|
+
structuredData: textResult.structuredData,
|
|
3381
3437
|
finishReason: textResult.finishReason,
|
|
3438
|
+
jsonRepaired: textResult.jsonRepaired,
|
|
3439
|
+
jsonTruncated: textResult.jsonTruncated,
|
|
3382
3440
|
provider: textResult.provider,
|
|
3383
3441
|
model: textResult.model,
|
|
3384
3442
|
usage: textResult.usage
|
|
@@ -21,6 +21,7 @@ import { emitToolEndFromStepFinish } from "../utils/toolEndEmitter.js";
|
|
|
21
21
|
import { NoOutputGeneratedError } from "../utils/generationErrors.js";
|
|
22
22
|
import { buildNoOutputSentinel, stampNoOutputSpan, } from "../utils/noOutputSentinel.js";
|
|
23
23
|
import { convertZodToJsonSchema } from "../utils/schemaConversion.js";
|
|
24
|
+
import { resolveClaudeMaxTokens } from "../utils/tokenLimits.js";
|
|
24
25
|
import { createChunkQueue, createDeferredAnalytics, stringifyToolInput, } from "./openaiChatCompletionsClient.js";
|
|
25
26
|
/**
|
|
26
27
|
* Beta headers for Claude Code integration.
|
|
@@ -493,10 +494,19 @@ const mapAnthropicStopReason = (raw) => {
|
|
|
493
494
|
return "stop";
|
|
494
495
|
}
|
|
495
496
|
};
|
|
496
|
-
// Anthropic's Messages API requires max_tokens on every request.
|
|
497
|
-
//
|
|
498
|
-
//
|
|
499
|
-
|
|
497
|
+
// Anthropic's Messages API requires max_tokens on every request. When the
|
|
498
|
+
// caller omits it, default to the model's real output ceiling via
|
|
499
|
+
// resolveClaudeMaxTokens (e.g. 64K for Sonnet 4.x) instead of the legacy 4096,
|
|
500
|
+
// which silently truncated large structured responses mid-JSON.
|
|
501
|
+
//
|
|
502
|
+
// Client-level request timeout. The Anthropic SDK throws "Streaming is required
|
|
503
|
+
// for long requests" from a NON-streaming `messages.create` when `max_tokens`
|
|
504
|
+
// is large AND no client-level timeout is configured (it can't estimate a safe
|
|
505
|
+
// timeout). Setting an explicit client timeout — equal to the SDK's own default
|
|
506
|
+
// for the non-throwing path — suppresses that pre-flight throw so large
|
|
507
|
+
// max_tokens (our model-ceiling default) works. Per-request duration is still
|
|
508
|
+
// bounded by the abort signal NeuroLink composes for each call.
|
|
509
|
+
const ANTHROPIC_CLIENT_TIMEOUT_MS = 600_000;
|
|
500
510
|
/**
|
|
501
511
|
* Anthropic Provider v2 - BaseProvider Implementation
|
|
502
512
|
* Enhanced with OAuth support, subscription tiers, and beta headers for Claude Code integration.
|
|
@@ -602,6 +612,7 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
602
612
|
apiKey: "oauth-authenticated", // Placeholder, actual auth is in fetch wrapper
|
|
603
613
|
// Note: No headers passed - fetch wrapper sets oauth-2025-04-20 beta header
|
|
604
614
|
fetch: oauthFetch,
|
|
615
|
+
timeout: ANTHROPIC_CLIENT_TIMEOUT_MS,
|
|
605
616
|
});
|
|
606
617
|
logger.debug("[AnthropicProvider] Anthropic SDK client created with OAuth fetch wrapper");
|
|
607
618
|
logger.debug("Anthropic Provider initialized with OAuth", {
|
|
@@ -647,6 +658,7 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
647
658
|
defaultHeaders: headers,
|
|
648
659
|
...(normalizedBaseURL && { baseURL: normalizedBaseURL }),
|
|
649
660
|
fetch: createProxyFetch(),
|
|
661
|
+
timeout: ANTHROPIC_CLIENT_TIMEOUT_MS,
|
|
650
662
|
});
|
|
651
663
|
logger.debug("Anthropic Provider initialized with API key", {
|
|
652
664
|
modelName: this.modelName,
|
|
@@ -1122,7 +1134,7 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
1122
1134
|
const params = {
|
|
1123
1135
|
model: modelId,
|
|
1124
1136
|
messages,
|
|
1125
|
-
max_tokens: options.maxOutputTokens
|
|
1137
|
+
max_tokens: resolveClaudeMaxTokens(modelId, options.maxOutputTokens),
|
|
1126
1138
|
...(system ? { system } : {}),
|
|
1127
1139
|
...(options.temperature !== undefined && options.temperature !== null
|
|
1128
1140
|
? { temperature: options.temperature }
|
|
@@ -1137,7 +1149,22 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
1137
1149
|
...(toolChoice ? { tool_choice: toolChoice } : {}),
|
|
1138
1150
|
...(thinking ? { thinking } : {}),
|
|
1139
1151
|
};
|
|
1140
|
-
|
|
1152
|
+
// The 60s anthropic generate default was tuned for the old ~4096
|
|
1153
|
+
// max_tokens. Now that the default ceiling is the model's real max,
|
|
1154
|
+
// a large structured response needs more wall-clock to be produced —
|
|
1155
|
+
// otherwise the inner controller aborts mid-generation (the AI-SDK
|
|
1156
|
+
// doGenerate layer doesn't see the caller's `timeout`). Raise the
|
|
1157
|
+
// floor to 5 min when a large output budget is in play — but only
|
|
1158
|
+
// when the caller did NOT set an explicit timeout: an explicit value
|
|
1159
|
+
// is a contract and must never be silently extended. The abort
|
|
1160
|
+
// signal stays the real bound.
|
|
1161
|
+
const callerTimeout = options
|
|
1162
|
+
.timeout;
|
|
1163
|
+
const callerSpecifiedTimeout = callerTimeout !== undefined && callerTimeout !== null;
|
|
1164
|
+
const generateTimeoutMs = params.max_tokens > 8192 && !callerSpecifiedTimeout
|
|
1165
|
+
? Math.max(getTimeoutForOptions(options), 300_000)
|
|
1166
|
+
: getTimeoutForOptions(options);
|
|
1167
|
+
const timeoutController = createTimeoutController(generateTimeoutMs, providerName, "generate");
|
|
1141
1168
|
let response;
|
|
1142
1169
|
try {
|
|
1143
1170
|
response = await client.messages.create(params, {
|
|
@@ -1356,7 +1383,7 @@ export class AnthropicProvider extends BaseProvider {
|
|
|
1356
1383
|
const params = {
|
|
1357
1384
|
model: modelId,
|
|
1358
1385
|
messages: conversation,
|
|
1359
|
-
max_tokens: options.maxTokens
|
|
1386
|
+
max_tokens: resolveClaudeMaxTokens(modelId, options.maxTokens),
|
|
1360
1387
|
stream: true,
|
|
1361
1388
|
...(payload.system ? { system: payload.system } : {}),
|
|
1362
1389
|
...(options.temperature !== undefined && options.temperature !== null
|
|
@@ -14,6 +14,7 @@ import { FileDetector } from "../utils/fileDetector.js";
|
|
|
14
14
|
import { processUnifiedFilesArray } from "../utils/messageBuilder.js";
|
|
15
15
|
import { logger } from "../utils/logger.js";
|
|
16
16
|
import { hasRestrictedOutputLimit, RESTRICTED_OUTPUT_TOKEN_LIMIT, } from "../utils/modelDetection.js";
|
|
17
|
+
import { resolveClaudeMaxTokens } from "../utils/tokenLimits.js";
|
|
17
18
|
import { validateApiKey, createVertexProjectConfig, createGoogleAuthConfig, } from "../utils/providerConfig.js";
|
|
18
19
|
import { convertZodToJsonSchema, inlineJsonSchema, ensureNestedSchemaTypes, } from "../utils/schemaConversion.js";
|
|
19
20
|
import { createNativeThinkingConfig } from "../utils/thinkingConfig.js";
|
|
@@ -2293,7 +2294,11 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2293
2294
|
: undefined;
|
|
2294
2295
|
const requestParams = {
|
|
2295
2296
|
model: modelName,
|
|
2296
|
-
|
|
2297
|
+
// Default to the model's real output ceiling (e.g. 64K for Sonnet 4.x)
|
|
2298
|
+
// instead of the legacy 4096, which silently truncated large structured
|
|
2299
|
+
// responses mid-JSON. resolveClaudeMaxTokens also clamps over-large
|
|
2300
|
+
// caller values so the native Vertex path never 400s.
|
|
2301
|
+
max_tokens: resolveClaudeMaxTokens(modelName, options.maxTokens),
|
|
2297
2302
|
messages: messages,
|
|
2298
2303
|
...(tools && tools.length > 0 && { tools }),
|
|
2299
2304
|
...(useFinalResultTool && { tool_choice: { type: "any" } }),
|
|
@@ -2813,7 +2818,8 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2813
2818
|
: undefined;
|
|
2814
2819
|
const requestParams = {
|
|
2815
2820
|
model: modelName,
|
|
2816
|
-
|
|
2821
|
+
// Default to the model's real output ceiling (see stream path note).
|
|
2822
|
+
max_tokens: resolveClaudeMaxTokens(modelName, options.maxTokens),
|
|
2817
2823
|
messages,
|
|
2818
2824
|
...(tools && tools.length > 0 && { tools }),
|
|
2819
2825
|
...(useFinalResultTool && { tool_choice: { type: "any" } }),
|
|
@@ -2835,6 +2841,10 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2835
2841
|
const allToolCalls = [];
|
|
2836
2842
|
let totalInputTokens = 0;
|
|
2837
2843
|
let totalOutputTokens = 0;
|
|
2844
|
+
// Track the final Anthropic stop_reason so we can surface finishReason
|
|
2845
|
+
// (notably "length" on token truncation) — the legacy native path always
|
|
2846
|
+
// reported "stop", hiding truncation from callers.
|
|
2847
|
+
let lastStopReason;
|
|
2838
2848
|
const currentMessages = [...messages];
|
|
2839
2849
|
while (step < maxSteps) {
|
|
2840
2850
|
step++;
|
|
@@ -2849,6 +2859,7 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2849
2859
|
// Update token counts
|
|
2850
2860
|
totalInputTokens += response.usage?.input_tokens || 0;
|
|
2851
2861
|
totalOutputTokens += response.usage?.output_tokens || 0;
|
|
2862
|
+
lastStopReason = response.stop_reason;
|
|
2852
2863
|
// Check if we need to handle tool use
|
|
2853
2864
|
const toolUseBlocks = response.content.filter((block) => block.type === "tool_use");
|
|
2854
2865
|
// Check for final_result tool call (for structured output)
|
|
@@ -2997,6 +3008,10 @@ export class GoogleVertexProvider extends BaseProvider {
|
|
|
2997
3008
|
const externalToolExecutions = toolExecutions.filter((te) => te.name !== "final_result");
|
|
2998
3009
|
const result = {
|
|
2999
3010
|
content: finalText,
|
|
3011
|
+
// Surface truncation: Anthropic "max_tokens" → unified "length" so the
|
|
3012
|
+
// SDK boundary can flag/observe incomplete structured output. Anything
|
|
3013
|
+
// else (end_turn / stop_sequence / tool_use) is a normal stop.
|
|
3014
|
+
finishReason: lastStopReason === "max_tokens" ? "length" : "stop",
|
|
3000
3015
|
provider: this.providerName,
|
|
3001
3016
|
model: modelName,
|
|
3002
3017
|
usage: {
|
package/dist/types/generate.d.ts
CHANGED
|
@@ -249,30 +249,32 @@ export type GenerateOptions = {
|
|
|
249
249
|
/**
|
|
250
250
|
* Zod schema for structured output validation
|
|
251
251
|
*
|
|
252
|
-
* @important Google Gemini
|
|
253
|
-
* Google
|
|
254
|
-
*
|
|
255
|
-
*
|
|
256
|
-
*
|
|
257
|
-
*
|
|
258
|
-
*
|
|
259
|
-
*
|
|
260
|
-
*
|
|
261
|
-
*
|
|
252
|
+
* @important Google GEMINI limitation (Gemini models only)
|
|
253
|
+
* Gemini models (Google AI Studio, and Vertex GEMINI models) cannot combine
|
|
254
|
+
* function calling with schema-enforced structured output — a Gemini API
|
|
255
|
+
* limitation ("Function calling with a response mime type:
|
|
256
|
+
* 'application/json' is unsupported"). Vertex CLAUDE models and all other
|
|
257
|
+
* providers support tools + schema simultaneously.
|
|
258
|
+
*
|
|
259
|
+
* You do NOT need to set `disableTools` yourself: when the combination is
|
|
260
|
+
* impossible, NeuroLink automatically falls back to text-mode JSON coercion
|
|
261
|
+
* (see `coerceJsonToSchema`), and `disableTools: true` remains available as
|
|
262
|
+
* an explicit override.
|
|
262
263
|
*
|
|
263
264
|
* @example
|
|
264
265
|
* ```typescript
|
|
265
|
-
* // ✅
|
|
266
|
+
* // ✅ Vertex + Claude: tools AND schema together are fully supported
|
|
266
267
|
* const result = await neurolink.generate({
|
|
267
268
|
* schema: MySchema,
|
|
268
269
|
* provider: "vertex",
|
|
269
|
-
*
|
|
270
|
+
* model: "claude-sonnet-4-6",
|
|
270
271
|
* });
|
|
271
272
|
*
|
|
272
|
-
* // ✅
|
|
273
|
+
* // ✅ Gemini + tools: SDK auto-falls back to coerced text-mode JSON
|
|
273
274
|
* const result = await neurolink.generate({
|
|
274
275
|
* schema: MySchema,
|
|
275
|
-
* provider: "
|
|
276
|
+
* provider: "google-ai",
|
|
277
|
+
* model: "gemini-2.5-pro",
|
|
276
278
|
* });
|
|
277
279
|
* ```
|
|
278
280
|
*
|
|
@@ -300,16 +302,18 @@ export type GenerateOptions = {
|
|
|
300
302
|
/**
|
|
301
303
|
* Disable tool execution (including built-in tools)
|
|
302
304
|
*
|
|
303
|
-
*
|
|
304
|
-
*
|
|
305
|
-
*
|
|
305
|
+
* Optional with schemas: the tools↔schema exclusion applies only to Google
|
|
306
|
+
* GEMINI models (Google AI Studio / Vertex Gemini — a Gemini API
|
|
307
|
+
* limitation), and NeuroLink handles it automatically by falling back to
|
|
308
|
+
* text-mode JSON coercion. Vertex CLAUDE models support tools + schema
|
|
309
|
+
* together. Set this only when you explicitly want a tool-free call.
|
|
306
310
|
*
|
|
307
311
|
* @example
|
|
308
312
|
* ```typescript
|
|
309
|
-
* //
|
|
313
|
+
* // Explicit override: schema-only call with no tools at all
|
|
310
314
|
* await neurolink.generate({
|
|
311
315
|
* schema: MySchema,
|
|
312
|
-
* provider: "
|
|
316
|
+
* provider: "google-ai",
|
|
313
317
|
* disableTools: true
|
|
314
318
|
* });
|
|
315
319
|
* ```
|
|
@@ -551,6 +555,13 @@ export type AdditionalMemoryUser = {
|
|
|
551
555
|
*/
|
|
552
556
|
export type GenerateResult = {
|
|
553
557
|
content: string;
|
|
558
|
+
/**
|
|
559
|
+
* Parsed structured object when a `schema` was requested. Populated from
|
|
560
|
+
* AI-SDK experimental_output, or from text-mode coercion (balanced-scan +
|
|
561
|
+
* jsonrepair). Prefer this over JSON.parse(content) — it never requires the
|
|
562
|
+
* caller to re-parse hand-escaped model text.
|
|
563
|
+
*/
|
|
564
|
+
structuredData?: unknown;
|
|
554
565
|
outputs?: {
|
|
555
566
|
text: string;
|
|
556
567
|
};
|
|
@@ -638,6 +649,17 @@ export type GenerateResult = {
|
|
|
638
649
|
provider?: string;
|
|
639
650
|
model?: string;
|
|
640
651
|
finishReason?: string;
|
|
652
|
+
/**
|
|
653
|
+
* True when the schema JSON in `content`/`structuredData` was repaired from
|
|
654
|
+
* malformed model text (jsonrepair ran). The result is still valid JSON.
|
|
655
|
+
*/
|
|
656
|
+
jsonRepaired?: boolean;
|
|
657
|
+
/**
|
|
658
|
+
* True when the schema JSON appears truncated — the model hit the output
|
|
659
|
+
* token cap (finishReason="length") or the recovered object came from an
|
|
660
|
+
* unclosed span. `structuredData` may be incomplete; raise `maxTokens`.
|
|
661
|
+
*/
|
|
662
|
+
jsonTruncated?: boolean;
|
|
641
663
|
usage?: TokenUsage;
|
|
642
664
|
responseTime?: number;
|
|
643
665
|
toolCalls?: Array<{
|
|
@@ -1090,7 +1112,13 @@ export type TextGenerationOptions = {
|
|
|
1090
1112
|
*/
|
|
1091
1113
|
export type TextGenerationResult = {
|
|
1092
1114
|
content: string;
|
|
1115
|
+
/** Parsed structured object when a `schema` was requested (see GenerateResult.structuredData). */
|
|
1116
|
+
structuredData?: unknown;
|
|
1093
1117
|
finishReason?: string;
|
|
1118
|
+
/** True when the schema JSON was repaired from malformed model text. */
|
|
1119
|
+
jsonRepaired?: boolean;
|
|
1120
|
+
/** True when the schema JSON appears truncated (output hit the token cap). */
|
|
1121
|
+
jsonTruncated?: boolean;
|
|
1094
1122
|
provider?: string;
|
|
1095
1123
|
model?: string;
|
|
1096
1124
|
usage?: TokenUsage;
|
|
@@ -263,3 +263,19 @@ export type StepToolResult = {
|
|
|
263
263
|
result?: unknown;
|
|
264
264
|
error?: string;
|
|
265
265
|
};
|
|
266
|
+
/**
|
|
267
|
+
* Result of coercing arbitrary model text into canonical, valid JSON.
|
|
268
|
+
* `content` is a JSON.stringify of the recovered object; `structuredData` is
|
|
269
|
+
* the parsed object itself.
|
|
270
|
+
*/
|
|
271
|
+
export type JsonCoercionResult = {
|
|
272
|
+
content: string;
|
|
273
|
+
structuredData: unknown;
|
|
274
|
+
/** True when jsonrepair altered the model text to make it parse. */
|
|
275
|
+
repaired: boolean;
|
|
276
|
+
/**
|
|
277
|
+
* True when the recovered object came from a truncated (unclosed) span —
|
|
278
|
+
* the response likely hit the output-token cap and data may be incomplete.
|
|
279
|
+
*/
|
|
280
|
+
truncated: boolean;
|
|
281
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { JsonCoercionResult, ValidationSchema } from "../../types/index.js";
|
|
2
|
+
/**
|
|
3
|
+
* Try to produce canonical JSON from `text`. Returns null when no JSON object
|
|
4
|
+
* could be recovered (caller should then keep the raw text).
|
|
5
|
+
*
|
|
6
|
+
* When `schema` is a Zod schema, candidates that satisfy it are preferred; a
|
|
7
|
+
* syntactically-valid-but-schema-failing object is still returned (we guarantee
|
|
8
|
+
* JSON *validity*, leaving schema/content checks to the caller's own pipeline).
|
|
9
|
+
*/
|
|
10
|
+
export declare function coerceJsonToSchema(text: string, schema?: ValidationSchema): JsonCoercionResult | null;
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Coerce arbitrary model text into canonical, syntactically-valid JSON.
|
|
3
|
+
*
|
|
4
|
+
* Used on the text-mode path (providers/models that could not use AI-SDK
|
|
5
|
+
* structured output, e.g. real Gemini + tools). The model hand-writes JSON and
|
|
6
|
+
* frequently mis-escapes the content field (bare newline, unescaped quote,
|
|
7
|
+
* invalid escape like \d). A balanced-brace scan finds the object span; if
|
|
8
|
+
* JSON.parse rejects it, jsonrepair fixes common escaping mistakes; the result
|
|
9
|
+
* is re-serialised with JSON.stringify so downstream consumers always receive
|
|
10
|
+
* valid JSON.
|
|
11
|
+
*
|
|
12
|
+
* NOTE: jsonrepair is a heuristic. On content where a lone backslash is
|
|
13
|
+
* meaningful (regex/script/Windows path) it may drop the backslash, producing
|
|
14
|
+
* valid-but-semantically-altered content. This only affects the residual
|
|
15
|
+
* text-mode path — the primary Vertex+Claude path uses experimental_output and
|
|
16
|
+
* never reaches here. When jsonrepair changes the input we log at debug level
|
|
17
|
+
* so the event is observable.
|
|
18
|
+
*/
|
|
19
|
+
import { jsonrepair } from "jsonrepair";
|
|
20
|
+
import { logger } from "../logger.js";
|
|
21
|
+
import { nextBalancedJsonSpan } from "./extract.js";
|
|
22
|
+
/** True when the schema exposes a Zod-style `safeParse` we can validate with. */
|
|
23
|
+
function hasSafeParse(schema) {
|
|
24
|
+
return typeof schema.safeParse === "function";
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Parse `candidate` as JSON, repairing common escaping mistakes on failure.
|
|
28
|
+
* Returns the parsed value plus whether jsonrepair had to alter the text.
|
|
29
|
+
*/
|
|
30
|
+
function parseOrRepair(candidate) {
|
|
31
|
+
try {
|
|
32
|
+
return { value: JSON.parse(candidate), repaired: false };
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
// fall through to repair
|
|
36
|
+
}
|
|
37
|
+
try {
|
|
38
|
+
const repaired = jsonrepair(candidate);
|
|
39
|
+
const value = JSON.parse(repaired);
|
|
40
|
+
if (repaired !== candidate && logger.shouldLog("debug")) {
|
|
41
|
+
logger.debug("[coerceJsonToSchema] jsonrepair altered model output", {
|
|
42
|
+
originalLength: candidate.length,
|
|
43
|
+
repairedLength: repaired.length,
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
return { value, repaired: repaired !== candidate };
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
return undefined;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Try to produce canonical JSON from `text`. Returns null when no JSON object
|
|
54
|
+
* could be recovered (caller should then keep the raw text).
|
|
55
|
+
*
|
|
56
|
+
* When `schema` is a Zod schema, candidates that satisfy it are preferred; a
|
|
57
|
+
* syntactically-valid-but-schema-failing object is still returned (we guarantee
|
|
58
|
+
* JSON *validity*, leaving schema/content checks to the caller's own pipeline).
|
|
59
|
+
*/
|
|
60
|
+
export function coerceJsonToSchema(text, schema) {
|
|
61
|
+
if (typeof text !== "string" || text.trim().length === 0) {
|
|
62
|
+
return null;
|
|
63
|
+
}
|
|
64
|
+
// Ordered candidate substrings, best-formed first:
|
|
65
|
+
// 1. every balanced object/array span (clean, common case)
|
|
66
|
+
// 2. first "{" or "[" to last "}" or "]" (drops surrounding prose; lets
|
|
67
|
+
// jsonrepair fix escaping inside) — root ARRAYS matter for array schemas
|
|
68
|
+
// 3. first "{" or "[" to end of text (TRUNCATED output —
|
|
69
|
+
// finishReason=length — where the closing bracket was cut off;
|
|
70
|
+
// jsonrepair closes it)
|
|
71
|
+
// `truncated` marks the first-open-to-end candidate: it is only reachable
|
|
72
|
+
// when no balanced span and no first-to-last span matched, i.e. there was no
|
|
73
|
+
// closing bracket at all — the signature of token-truncated output.
|
|
74
|
+
const candidates = [];
|
|
75
|
+
let searchFrom = 0;
|
|
76
|
+
for (;;) {
|
|
77
|
+
const found = nextBalancedJsonSpan(text, searchFrom);
|
|
78
|
+
if (!found) {
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
candidates.push({ text: found.span, truncated: false });
|
|
82
|
+
searchFrom = found.end;
|
|
83
|
+
}
|
|
84
|
+
const openIndexes = [text.indexOf("{"), text.indexOf("[")].filter((i) => i >= 0);
|
|
85
|
+
const firstOpen = openIndexes.length > 0 ? Math.min(...openIndexes) : -1;
|
|
86
|
+
const lastClose = Math.max(text.lastIndexOf("}"), text.lastIndexOf("]"));
|
|
87
|
+
if (firstOpen >= 0 && lastClose > firstOpen) {
|
|
88
|
+
candidates.push({
|
|
89
|
+
text: text.slice(firstOpen, lastClose + 1),
|
|
90
|
+
truncated: false,
|
|
91
|
+
});
|
|
92
|
+
}
|
|
93
|
+
if (firstOpen >= 0) {
|
|
94
|
+
candidates.push({ text: text.slice(firstOpen), truncated: true });
|
|
95
|
+
}
|
|
96
|
+
let firstValid;
|
|
97
|
+
let schemaMatch;
|
|
98
|
+
const seen = new Set();
|
|
99
|
+
for (const candidate of candidates) {
|
|
100
|
+
if (seen.has(candidate.text)) {
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
seen.add(candidate.text);
|
|
104
|
+
const outcome = parseOrRepair(candidate.text);
|
|
105
|
+
if (outcome === undefined ||
|
|
106
|
+
outcome.value === null ||
|
|
107
|
+
typeof outcome.value !== "object") {
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
const record = {
|
|
111
|
+
value: outcome.value,
|
|
112
|
+
repaired: outcome.repaired,
|
|
113
|
+
truncated: candidate.truncated,
|
|
114
|
+
};
|
|
115
|
+
if (firstValid === undefined) {
|
|
116
|
+
firstValid = record;
|
|
117
|
+
}
|
|
118
|
+
if (schema && hasSafeParse(schema)) {
|
|
119
|
+
const safeParseable = schema;
|
|
120
|
+
if (safeParseable.safeParse(outcome.value).success) {
|
|
121
|
+
schemaMatch = record;
|
|
122
|
+
break;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
else {
|
|
126
|
+
// No Zod schema to discriminate — first parseable object wins.
|
|
127
|
+
break;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
const chosen = schemaMatch ?? firstValid;
|
|
131
|
+
if (chosen === undefined) {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
content: JSON.stringify(chosen.value),
|
|
136
|
+
structuredData: chosen.value,
|
|
137
|
+
repaired: chosen.repaired,
|
|
138
|
+
truncated: chosen.truncated,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
@@ -4,6 +4,16 @@
|
|
|
4
4
|
* Utilities for extracting JSON from mixed text content.
|
|
5
5
|
* Particularly useful for parsing AI responses that contain JSON within prose.
|
|
6
6
|
*/
|
|
7
|
+
/**
|
|
8
|
+
* Find the first balanced JSON object/array span starting at or after
|
|
9
|
+
* `fromIndex`. Quote- and escape-aware: braces inside string literals do not
|
|
10
|
+
* affect depth. Returns the matched substring and the index just past it, or
|
|
11
|
+
* null if no balanced span exists.
|
|
12
|
+
*/
|
|
13
|
+
export declare function nextBalancedJsonSpan(text: string, fromIndex?: number): {
|
|
14
|
+
span: string;
|
|
15
|
+
end: number;
|
|
16
|
+
} | null;
|
|
7
17
|
/**
|
|
8
18
|
* Extract JSON string from text that may contain surrounding content.
|
|
9
19
|
*
|
|
@@ -5,6 +5,53 @@
|
|
|
5
5
|
* Particularly useful for parsing AI responses that contain JSON within prose.
|
|
6
6
|
*/
|
|
7
7
|
import { parseJsonOrNull } from "./safeParse.js";
|
|
8
|
+
/**
|
|
9
|
+
* Find the first balanced JSON object/array span starting at or after
|
|
10
|
+
* `fromIndex`. Quote- and escape-aware: braces inside string literals do not
|
|
11
|
+
* affect depth. Returns the matched substring and the index just past it, or
|
|
12
|
+
* null if no balanced span exists.
|
|
13
|
+
*/
|
|
14
|
+
export function nextBalancedJsonSpan(text, fromIndex = 0) {
|
|
15
|
+
for (let start = fromIndex; start < text.length; start++) {
|
|
16
|
+
const openChar = text[start];
|
|
17
|
+
if (openChar !== "{" && openChar !== "[") {
|
|
18
|
+
continue;
|
|
19
|
+
}
|
|
20
|
+
const closeChar = openChar === "{" ? "}" : "]";
|
|
21
|
+
let depth = 0;
|
|
22
|
+
let inString = false;
|
|
23
|
+
let escapeNext = false;
|
|
24
|
+
for (let i = start; i < text.length; i++) {
|
|
25
|
+
const ch = text[i];
|
|
26
|
+
if (escapeNext) {
|
|
27
|
+
escapeNext = false;
|
|
28
|
+
continue;
|
|
29
|
+
}
|
|
30
|
+
if (ch === "\\") {
|
|
31
|
+
escapeNext = true;
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
if (ch === '"') {
|
|
35
|
+
inString = !inString;
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
if (inString) {
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (ch === openChar) {
|
|
42
|
+
depth++;
|
|
43
|
+
}
|
|
44
|
+
else if (ch === closeChar) {
|
|
45
|
+
depth--;
|
|
46
|
+
if (depth === 0) {
|
|
47
|
+
return { span: text.substring(start, i + 1), end: i + 1 };
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
// Unbalanced from this start — try the next opening char.
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
8
55
|
/**
|
|
9
56
|
* Extract JSON string from text that may contain surrounding content.
|
|
10
57
|
*
|
|
@@ -45,21 +92,24 @@ export function extractJsonStringFromText(text) {
|
|
|
45
92
|
// Continue to other patterns
|
|
46
93
|
}
|
|
47
94
|
}
|
|
48
|
-
//
|
|
49
|
-
//
|
|
50
|
-
//
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
95
|
+
// Scan for balanced JSON object/array spans (quote/escape aware) and return
|
|
96
|
+
// the first one that parses. Unlike a non-greedy regex, this never stops at a
|
|
97
|
+
// "}" that lives inside a string value, so nested objects are preserved.
|
|
98
|
+
let searchFrom = 0;
|
|
99
|
+
for (;;) {
|
|
100
|
+
const found = nextBalancedJsonSpan(text, searchFrom);
|
|
101
|
+
if (!found) {
|
|
102
|
+
break;
|
|
103
|
+
}
|
|
56
104
|
try {
|
|
57
|
-
JSON.parse(
|
|
58
|
-
return
|
|
105
|
+
JSON.parse(found.span);
|
|
106
|
+
return found.span;
|
|
59
107
|
}
|
|
60
108
|
catch {
|
|
61
|
-
//
|
|
109
|
+
// Not valid JSON — resume scanning just past this opening character so a
|
|
110
|
+
// valid inner object/array can still be found.
|
|
62
111
|
}
|
|
112
|
+
searchFrom = found.end - found.span.length + 1;
|
|
63
113
|
}
|
|
64
114
|
return null;
|
|
65
115
|
}
|
|
@@ -7,6 +7,26 @@ import { PROVIDER_MAX_TOKENS } from "../core/constants.js";
|
|
|
7
7
|
* Get the safe maximum tokens for a provider and model
|
|
8
8
|
*/
|
|
9
9
|
export declare function getSafeMaxTokens(provider: keyof typeof PROVIDER_MAX_TOKENS | string, model?: string, requestedMaxTokens?: number): number | undefined;
|
|
10
|
+
/**
|
|
11
|
+
* Maximum output tokens supported by a given Anthropic Claude model.
|
|
12
|
+
*
|
|
13
|
+
* The native Vertex+Claude and native Anthropic message paths send `max_tokens`
|
|
14
|
+
* straight to the Anthropic API, which returns 400 if the value exceeds the
|
|
15
|
+
* model's published output ceiling. (The AI-SDK path clamps automatically;
|
|
16
|
+
* these native paths do not.) This table lets those paths default to the
|
|
17
|
+
* model's real ceiling — 64K for Sonnet/Haiku 4.x, 32K for Opus 4.x — instead of
|
|
18
|
+
* the legacy 4096 that silently truncated large structured responses.
|
|
19
|
+
*
|
|
20
|
+
* Unknown identifiers fall back to a safe modern floor (8192).
|
|
21
|
+
*/
|
|
22
|
+
export declare function getClaudeMaxOutputTokens(model: string | undefined): number;
|
|
23
|
+
/**
|
|
24
|
+
* Resolve the `max_tokens` to send on a native Anthropic/Claude request: honour
|
|
25
|
+
* the caller's value but clamp it to the model's published ceiling, and default
|
|
26
|
+
* to that ceiling when the caller did not specify one. Prevents both silent
|
|
27
|
+
* truncation (the legacy 4096 default) and 400s from over-large requests.
|
|
28
|
+
*/
|
|
29
|
+
export declare function resolveClaudeMaxTokens(model: string | undefined, requested?: number): number;
|
|
10
30
|
/**
|
|
11
31
|
* Validate if maxTokens is safe for a provider/model combination
|
|
12
32
|
*/
|