@jsonstudio/llms 0.6.795 → 0.6.802
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/conversion/hub/pipeline/context-limit.d.ts +13 -0
- package/dist/conversion/hub/pipeline/context-limit.js +55 -0
- package/dist/conversion/shared/snapshot-hooks.js +54 -1
- package/dist/router/virtual-router/engine-selection.js +49 -4
- package/dist/tools/apply-patch/validator.js +150 -0
- package/package.json +1 -1
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { ProcessedRequest, StandardizedRequest } from '../types/standardized.js';
|
|
2
|
+
import type { TargetMetadata } from '../../../router/virtual-router/types.js';
|
|
3
|
+
export declare function enforceTargetContextLimitOrThrow(options: {
|
|
4
|
+
requestId: string;
|
|
5
|
+
routeName?: string;
|
|
6
|
+
target: TargetMetadata;
|
|
7
|
+
request: StandardizedRequest | ProcessedRequest;
|
|
8
|
+
}): {
|
|
9
|
+
estimatedInputTokens?: number;
|
|
10
|
+
maxContextTokens?: number;
|
|
11
|
+
allowedTokens?: number;
|
|
12
|
+
safetyRatio?: number;
|
|
13
|
+
};
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { computeRequestTokens } from '../../../router/virtual-router/token-estimator.js';
|
|
2
|
+
function readSafetyRatioFromEnv() {
|
|
3
|
+
const raw = process?.env?.RCC_CONTEXT_TOKEN_SAFETY_RATIO ??
|
|
4
|
+
process?.env?.ROUTECODEX_CONTEXT_TOKEN_SAFETY_RATIO ??
|
|
5
|
+
'';
|
|
6
|
+
const value = Number(raw);
|
|
7
|
+
if (!Number.isFinite(value)) {
|
|
8
|
+
return 0;
|
|
9
|
+
}
|
|
10
|
+
// Keep within sane bounds; default is 0 (exact limit).
|
|
11
|
+
return Math.max(0, Math.min(0.5, value));
|
|
12
|
+
}
|
|
13
|
+
export function enforceTargetContextLimitOrThrow(options) {
|
|
14
|
+
const maxContextTokensRaw = options.target?.maxContextTokens;
|
|
15
|
+
const maxContextTokens = typeof maxContextTokensRaw === 'number' && Number.isFinite(maxContextTokensRaw) && maxContextTokensRaw > 0
|
|
16
|
+
? Math.floor(maxContextTokensRaw)
|
|
17
|
+
: undefined;
|
|
18
|
+
if (!maxContextTokens) {
|
|
19
|
+
return {};
|
|
20
|
+
}
|
|
21
|
+
const estimatedInputTokens = computeRequestTokens(options.request, '');
|
|
22
|
+
if (!(typeof estimatedInputTokens === 'number' && Number.isFinite(estimatedInputTokens) && estimatedInputTokens > 0)) {
|
|
23
|
+
return { maxContextTokens };
|
|
24
|
+
}
|
|
25
|
+
const safetyRatio = readSafetyRatioFromEnv();
|
|
26
|
+
const allowedTokens = Math.max(1, Math.floor(maxContextTokens * (1 - safetyRatio)));
|
|
27
|
+
if (estimatedInputTokens >= allowedTokens) {
|
|
28
|
+
const providerKey = options.target?.providerKey || 'unknown';
|
|
29
|
+
const modelId = options.target?.modelId || options.request?.model || 'unknown';
|
|
30
|
+
const routeName = options.routeName || 'unknown';
|
|
31
|
+
const message = `Context too long for ${providerKey}.${modelId}: ` +
|
|
32
|
+
`estimatedInputTokens=${estimatedInputTokens} exceeds allowed=${allowedTokens} ` +
|
|
33
|
+
`(maxContextTokens=${maxContextTokens}, safetyRatio=${safetyRatio}, route=${routeName})`;
|
|
34
|
+
const err = Object.assign(new Error(message), {
|
|
35
|
+
name: 'ContextLimitError',
|
|
36
|
+
code: 'CONTEXT_TOO_LONG',
|
|
37
|
+
status: 400,
|
|
38
|
+
requestId: options.requestId,
|
|
39
|
+
providerKey: options.target?.providerKey,
|
|
40
|
+
providerType: options.target?.providerType,
|
|
41
|
+
routeName: options.routeName,
|
|
42
|
+
details: {
|
|
43
|
+
estimatedInputTokens,
|
|
44
|
+
allowedTokens,
|
|
45
|
+
maxContextTokens,
|
|
46
|
+
safetyRatio,
|
|
47
|
+
providerKey: options.target?.providerKey,
|
|
48
|
+
modelId,
|
|
49
|
+
routeName
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
throw err;
|
|
53
|
+
}
|
|
54
|
+
return { estimatedInputTokens, maxContextTokens, allowedTokens, safetyRatio };
|
|
55
|
+
}
|
|
@@ -110,6 +110,59 @@ function extractNestedGroupRequestId(value) {
|
|
|
110
110
|
}
|
|
111
111
|
return undefined;
|
|
112
112
|
}
|
|
113
|
+
function extractNestedEntryEndpoint(value) {
|
|
114
|
+
if (!value || typeof value !== 'object') {
|
|
115
|
+
return undefined;
|
|
116
|
+
}
|
|
117
|
+
const obj = value;
|
|
118
|
+
const direct = readStringField(obj.entryEndpoint) ||
|
|
119
|
+
readStringField(obj.entry_endpoint) ||
|
|
120
|
+
readStringField(obj.endpoint);
|
|
121
|
+
if (direct) {
|
|
122
|
+
return direct;
|
|
123
|
+
}
|
|
124
|
+
const meta = obj.meta;
|
|
125
|
+
if (meta && typeof meta === 'object') {
|
|
126
|
+
const m = meta;
|
|
127
|
+
const fromMeta = readStringField(m.entryEndpoint) ||
|
|
128
|
+
readStringField(m.entry_endpoint) ||
|
|
129
|
+
readStringField(m.endpoint);
|
|
130
|
+
if (fromMeta) {
|
|
131
|
+
return fromMeta;
|
|
132
|
+
}
|
|
133
|
+
const ctx = m.context;
|
|
134
|
+
if (ctx && typeof ctx === 'object') {
|
|
135
|
+
const c = ctx;
|
|
136
|
+
const fromCtx = readStringField(c.entryEndpoint) ||
|
|
137
|
+
readStringField(c.entry_endpoint) ||
|
|
138
|
+
readStringField(c.endpoint);
|
|
139
|
+
if (fromCtx) {
|
|
140
|
+
return fromCtx;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
const metadata = obj.metadata;
|
|
145
|
+
if (metadata && typeof metadata === 'object') {
|
|
146
|
+
const md = metadata;
|
|
147
|
+
const fromMetadata = readStringField(md.entryEndpoint) ||
|
|
148
|
+
readStringField(md.entry_endpoint) ||
|
|
149
|
+
readStringField(md.endpoint);
|
|
150
|
+
if (fromMetadata) {
|
|
151
|
+
return fromMetadata;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
const runtime = obj.runtime;
|
|
155
|
+
if (runtime && typeof runtime === 'object') {
|
|
156
|
+
const r = runtime;
|
|
157
|
+
const fromRuntime = readStringField(r.entryEndpoint) ||
|
|
158
|
+
readStringField(r.entry_endpoint) ||
|
|
159
|
+
readStringField(r.endpoint);
|
|
160
|
+
if (fromRuntime) {
|
|
161
|
+
return fromRuntime;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
return undefined;
|
|
165
|
+
}
|
|
113
166
|
function toErrorCode(error) {
|
|
114
167
|
if (!error || typeof error !== 'object') {
|
|
115
168
|
return undefined;
|
|
@@ -205,7 +258,7 @@ async function promotePendingDir(options) {
|
|
|
205
258
|
}
|
|
206
259
|
async function writeSnapshotFile(options) {
|
|
207
260
|
const root = resolveSnapshotRoot();
|
|
208
|
-
const folder = resolveSnapshotFolder(options.endpoint);
|
|
261
|
+
const folder = resolveSnapshotFolder(extractNestedEntryEndpoint(options.data) || options.endpoint);
|
|
209
262
|
const stageToken = sanitizeToken(options.stage, 'snapshot');
|
|
210
263
|
const groupRequestToken = sanitizeToken(options.groupRequestId ||
|
|
211
264
|
extractNestedGroupRequestId(options.data) ||
|
|
@@ -220,7 +220,11 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
220
220
|
targets = targets.filter((key) => !excludedKeys.has(key));
|
|
221
221
|
}
|
|
222
222
|
if (targets.length > 0) {
|
|
223
|
-
|
|
223
|
+
const cooled = targets.filter((key) => !deps.isProviderCoolingDown(key));
|
|
224
|
+
// 单 provider 兜底:当一个 tier 只有一个候选 key 时,不因 cooldown 造成路由池为空。
|
|
225
|
+
if (cooled.length > 0 || targets.length !== 1) {
|
|
226
|
+
targets = cooled;
|
|
227
|
+
}
|
|
224
228
|
}
|
|
225
229
|
if (allowedProviders && allowedProviders.size > 0) {
|
|
226
230
|
targets = targets.filter((key) => {
|
|
@@ -331,14 +335,22 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
331
335
|
const selectWithQuota = (candidates) => {
|
|
332
336
|
if (!quotaView) {
|
|
333
337
|
if (tier.mode === 'priority') {
|
|
334
|
-
|
|
338
|
+
const selected = selectFirstAvailable(candidates);
|
|
339
|
+
if (!selected && candidates.length === 1) {
|
|
340
|
+
return candidates[0];
|
|
341
|
+
}
|
|
342
|
+
return selected;
|
|
335
343
|
}
|
|
336
|
-
|
|
344
|
+
const selected = deps.loadBalancer.select({
|
|
337
345
|
routeName: `${routeName}:${tier.id}`,
|
|
338
346
|
candidates,
|
|
339
347
|
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
340
348
|
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
341
349
|
}, tier.mode === 'round-robin' ? 'round-robin' : undefined);
|
|
350
|
+
if (!selected && candidates.length === 1) {
|
|
351
|
+
return candidates[0];
|
|
352
|
+
}
|
|
353
|
+
return selected;
|
|
342
354
|
}
|
|
343
355
|
const buckets = new Map();
|
|
344
356
|
for (const key of candidates) {
|
|
@@ -389,6 +401,33 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
|
|
|
389
401
|
}
|
|
390
402
|
}
|
|
391
403
|
}
|
|
404
|
+
// default 路由永不因 quota gating 而“空池”:
|
|
405
|
+
// 当 quotaView 过滤后没有任何可用候选时,默认路由允许忽略 quotaView,
|
|
406
|
+
// 继续按健康/负载均衡选择一个 providerKey(但不覆盖 forced/required 约束)。
|
|
407
|
+
const quotaBypassAllowed = routeName === DEFAULT_ROUTE && (!requiredProviderKeys || requiredProviderKeys.size === 0);
|
|
408
|
+
if (quotaBypassAllowed) {
|
|
409
|
+
if (tier.mode === 'priority') {
|
|
410
|
+
const selected = selectFirstAvailable(candidates);
|
|
411
|
+
if (selected) {
|
|
412
|
+
return selected;
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
else {
|
|
416
|
+
const selected = deps.loadBalancer.select({
|
|
417
|
+
routeName: `${routeName}:${tier.id}:quota-bypass`,
|
|
418
|
+
candidates,
|
|
419
|
+
stickyKey: options.allowAliasRotation ? undefined : stickyKey,
|
|
420
|
+
availabilityCheck: (key) => deps.healthManager.isAvailable(key)
|
|
421
|
+
}, tier.mode === 'round-robin' ? 'round-robin' : undefined);
|
|
422
|
+
if (selected) {
|
|
423
|
+
return selected;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
// 单 provider 兜底:当只剩一个候选 key 时,不因 quota/blacklist/cooldown 或健康状态过滤导致无 provider。
|
|
428
|
+
if (candidates.length === 1) {
|
|
429
|
+
return candidates[0];
|
|
430
|
+
}
|
|
392
431
|
return null;
|
|
393
432
|
};
|
|
394
433
|
for (const candidatePool of prioritizedPools) {
|
|
@@ -416,10 +455,13 @@ export function selectFromStickyPool(stickyKeySet, metadata, features, state, de
|
|
|
416
455
|
]));
|
|
417
456
|
const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
|
|
418
457
|
let candidates = Array.from(stickyKeySet).filter((key) => !deps.isProviderCoolingDown(key));
|
|
458
|
+
if (!candidates.length && stickyKeySet.size === 1) {
|
|
459
|
+
candidates = Array.from(stickyKeySet);
|
|
460
|
+
}
|
|
419
461
|
const quotaView = deps.quotaView;
|
|
420
462
|
const now = quotaView ? Date.now() : 0;
|
|
421
463
|
if (quotaView) {
|
|
422
|
-
|
|
464
|
+
const filtered = candidates.filter((key) => {
|
|
423
465
|
const entry = quotaView(key);
|
|
424
466
|
if (!entry) {
|
|
425
467
|
return true;
|
|
@@ -435,6 +477,9 @@ export function selectFromStickyPool(stickyKeySet, metadata, features, state, de
|
|
|
435
477
|
}
|
|
436
478
|
return true;
|
|
437
479
|
});
|
|
480
|
+
if (filtered.length > 0 || candidates.length !== 1) {
|
|
481
|
+
candidates = filtered;
|
|
482
|
+
}
|
|
438
483
|
}
|
|
439
484
|
if (allowedProviders.size > 0) {
|
|
440
485
|
candidates = candidates.filter((key) => {
|
|
@@ -304,6 +304,129 @@ const tryParseJson = (value) => {
|
|
|
304
304
|
return undefined;
|
|
305
305
|
}
|
|
306
306
|
};
|
|
307
|
+
const escapeUnescapedQuotesInJsonStrings = (input) => {
|
|
308
|
+
// Best-effort: when JSON is almost valid but contains unescaped `"` inside string values
|
|
309
|
+
// (e.g. JSX snippets like className="..."), escape quotes that are not followed by a
|
|
310
|
+
// valid JSON token delimiter. Deterministic; does not attempt to fix structural issues.
|
|
311
|
+
let out = '';
|
|
312
|
+
let inString = false;
|
|
313
|
+
let escaped = false;
|
|
314
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
315
|
+
const ch = input[i] ?? '';
|
|
316
|
+
if (!inString) {
|
|
317
|
+
if (ch === '"') {
|
|
318
|
+
inString = true;
|
|
319
|
+
escaped = false;
|
|
320
|
+
}
|
|
321
|
+
out += ch;
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
if (escaped) {
|
|
325
|
+
out += ch;
|
|
326
|
+
escaped = false;
|
|
327
|
+
continue;
|
|
328
|
+
}
|
|
329
|
+
if (ch === '\\') {
|
|
330
|
+
out += ch;
|
|
331
|
+
escaped = true;
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
if (ch === '"') {
|
|
335
|
+
let j = i + 1;
|
|
336
|
+
while (j < input.length && /\s/.test(input[j] ?? ''))
|
|
337
|
+
j += 1;
|
|
338
|
+
const next = j < input.length ? input[j] : '';
|
|
339
|
+
if (next === '' || next === ':' || next === ',' || next === '}' || next === ']') {
|
|
340
|
+
inString = false;
|
|
341
|
+
out += ch;
|
|
342
|
+
}
|
|
343
|
+
else {
|
|
344
|
+
out += '\\"';
|
|
345
|
+
}
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
out += ch;
|
|
349
|
+
}
|
|
350
|
+
return out;
|
|
351
|
+
};
|
|
352
|
+
const balanceJsonContainers = (input) => {
|
|
353
|
+
// Best-effort bracket/brace balancing for JSON-like strings.
|
|
354
|
+
// Only operates outside string literals. When encountering a closing token that doesn't
|
|
355
|
+
// match the current stack top, inserts the missing closer(s) to recover.
|
|
356
|
+
let out = '';
|
|
357
|
+
let inString = false;
|
|
358
|
+
let escaped = false;
|
|
359
|
+
const stack = [];
|
|
360
|
+
const closeFor = (open) => (open === '{' ? '}' : ']');
|
|
361
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
362
|
+
const ch = input[i] ?? '';
|
|
363
|
+
if (inString) {
|
|
364
|
+
out += ch;
|
|
365
|
+
if (escaped) {
|
|
366
|
+
escaped = false;
|
|
367
|
+
continue;
|
|
368
|
+
}
|
|
369
|
+
if (ch === '\\') {
|
|
370
|
+
escaped = true;
|
|
371
|
+
continue;
|
|
372
|
+
}
|
|
373
|
+
if (ch === '"') {
|
|
374
|
+
inString = false;
|
|
375
|
+
}
|
|
376
|
+
continue;
|
|
377
|
+
}
|
|
378
|
+
if (ch === '"') {
|
|
379
|
+
inString = true;
|
|
380
|
+
out += ch;
|
|
381
|
+
continue;
|
|
382
|
+
}
|
|
383
|
+
if (ch === '{' || ch === '[') {
|
|
384
|
+
stack.push(ch);
|
|
385
|
+
out += ch;
|
|
386
|
+
continue;
|
|
387
|
+
}
|
|
388
|
+
if (ch === '}' || ch === ']') {
|
|
389
|
+
const expectedOpen = ch === '}' ? '{' : '[';
|
|
390
|
+
while (stack.length && stack[stack.length - 1] !== expectedOpen) {
|
|
391
|
+
const open = stack.pop();
|
|
392
|
+
out += closeFor(open);
|
|
393
|
+
}
|
|
394
|
+
if (stack.length && stack[stack.length - 1] === expectedOpen) {
|
|
395
|
+
stack.pop();
|
|
396
|
+
}
|
|
397
|
+
out += ch;
|
|
398
|
+
continue;
|
|
399
|
+
}
|
|
400
|
+
out += ch;
|
|
401
|
+
}
|
|
402
|
+
while (stack.length) {
|
|
403
|
+
const open = stack.pop();
|
|
404
|
+
out += closeFor(open);
|
|
405
|
+
}
|
|
406
|
+
return out;
|
|
407
|
+
};
|
|
408
|
+
const tryParseJsonLoose = (value) => {
|
|
409
|
+
const parsed = tryParseJson(value);
|
|
410
|
+
if (parsed !== undefined)
|
|
411
|
+
return parsed;
|
|
412
|
+
if (typeof value !== 'string')
|
|
413
|
+
return undefined;
|
|
414
|
+
const trimmed = value.trim();
|
|
415
|
+
if (!trimmed)
|
|
416
|
+
return undefined;
|
|
417
|
+
if (!(trimmed.startsWith('{') || trimmed.startsWith('[')))
|
|
418
|
+
return undefined;
|
|
419
|
+
let repaired = escapeUnescapedQuotesInJsonStrings(trimmed);
|
|
420
|
+
repaired = balanceJsonContainers(repaired);
|
|
421
|
+
if (!repaired || repaired === trimmed)
|
|
422
|
+
return undefined;
|
|
423
|
+
try {
|
|
424
|
+
return JSON.parse(repaired);
|
|
425
|
+
}
|
|
426
|
+
catch {
|
|
427
|
+
return undefined;
|
|
428
|
+
}
|
|
429
|
+
};
|
|
307
430
|
const coerceChangesArray = (value) => {
|
|
308
431
|
const parsed = tryParseJson(value);
|
|
309
432
|
if (!parsed)
|
|
@@ -402,6 +525,33 @@ export function validateApplyPatchArgs(argsString, rawArgs) {
|
|
|
402
525
|
if (instructionsField && looksLikePatch(instructionsField)) {
|
|
403
526
|
return { patchText: normalizeApplyPatchText(instructionsField) };
|
|
404
527
|
}
|
|
528
|
+
// Common wrapper shape (seen in codex samples): { _raw: "{...json...}" }.
|
|
529
|
+
// `_raw` may contain either patch text or a JSON-encoded structured payload.
|
|
530
|
+
const rawEnvelope = asString(rec._raw);
|
|
531
|
+
if (rawEnvelope) {
|
|
532
|
+
const trimmed = rawEnvelope.trim();
|
|
533
|
+
if (looksLikePatch(trimmed)) {
|
|
534
|
+
return { patchText: normalizeApplyPatchText(trimmed) };
|
|
535
|
+
}
|
|
536
|
+
const parsed = tryParseJsonLoose(trimmed);
|
|
537
|
+
if (parsed && isRecord(parsed)) {
|
|
538
|
+
return extractFromRecord(parsed);
|
|
539
|
+
}
|
|
540
|
+
if (Array.isArray(parsed) && parsed.length > 0) {
|
|
541
|
+
const changesArray = parsed.filter((entry) => isRecord(entry));
|
|
542
|
+
if (changesArray.length && changesArray.some((c) => typeof c.kind === 'string')) {
|
|
543
|
+
const payload = { changes: changesArray };
|
|
544
|
+
try {
|
|
545
|
+
return { patchText: buildStructuredPatch(payload) };
|
|
546
|
+
}
|
|
547
|
+
catch (error) {
|
|
548
|
+
if (!(error instanceof StructuredApplyPatchError))
|
|
549
|
+
throw error;
|
|
550
|
+
return { failureReason: error.reason || 'structured_apply_patch_error' };
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
}
|
|
405
555
|
const payload = coerceStructuredPayload(rec);
|
|
406
556
|
if (payload) {
|
|
407
557
|
try {
|