@jsonstudio/llms 0.6.795 → 0.6.802

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ import type { ProcessedRequest, StandardizedRequest } from '../types/standardized.js';
2
+ import type { TargetMetadata } from '../../../router/virtual-router/types.js';
3
+ export declare function enforceTargetContextLimitOrThrow(options: {
4
+ requestId: string;
5
+ routeName?: string;
6
+ target: TargetMetadata;
7
+ request: StandardizedRequest | ProcessedRequest;
8
+ }): {
9
+ estimatedInputTokens?: number;
10
+ maxContextTokens?: number;
11
+ allowedTokens?: number;
12
+ safetyRatio?: number;
13
+ };
@@ -0,0 +1,55 @@
1
+ import { computeRequestTokens } from '../../../router/virtual-router/token-estimator.js';
2
+ function readSafetyRatioFromEnv() {
3
+ const raw = process?.env?.RCC_CONTEXT_TOKEN_SAFETY_RATIO ??
4
+ process?.env?.ROUTECODEX_CONTEXT_TOKEN_SAFETY_RATIO ??
5
+ '';
6
+ const value = Number(raw);
7
+ if (!Number.isFinite(value)) {
8
+ return 0;
9
+ }
10
+ // Keep within sane bounds; default is 0 (exact limit).
11
+ return Math.max(0, Math.min(0.5, value));
12
+ }
13
+ export function enforceTargetContextLimitOrThrow(options) {
14
+ const maxContextTokensRaw = options.target?.maxContextTokens;
15
+ const maxContextTokens = typeof maxContextTokensRaw === 'number' && Number.isFinite(maxContextTokensRaw) && maxContextTokensRaw > 0
16
+ ? Math.floor(maxContextTokensRaw)
17
+ : undefined;
18
+ if (!maxContextTokens) {
19
+ return {};
20
+ }
21
+ const estimatedInputTokens = computeRequestTokens(options.request, '');
22
+ if (!(typeof estimatedInputTokens === 'number' && Number.isFinite(estimatedInputTokens) && estimatedInputTokens > 0)) {
23
+ return { maxContextTokens };
24
+ }
25
+ const safetyRatio = readSafetyRatioFromEnv();
26
+ const allowedTokens = Math.max(1, Math.floor(maxContextTokens * (1 - safetyRatio)));
27
+ if (estimatedInputTokens >= allowedTokens) {
28
+ const providerKey = options.target?.providerKey || 'unknown';
29
+ const modelId = options.target?.modelId || options.request?.model || 'unknown';
30
+ const routeName = options.routeName || 'unknown';
31
+ const message = `Context too long for ${providerKey}.${modelId}: ` +
32
+ `estimatedInputTokens=${estimatedInputTokens} exceeds allowed=${allowedTokens} ` +
33
+ `(maxContextTokens=${maxContextTokens}, safetyRatio=${safetyRatio}, route=${routeName})`;
34
+ const err = Object.assign(new Error(message), {
35
+ name: 'ContextLimitError',
36
+ code: 'CONTEXT_TOO_LONG',
37
+ status: 400,
38
+ requestId: options.requestId,
39
+ providerKey: options.target?.providerKey,
40
+ providerType: options.target?.providerType,
41
+ routeName: options.routeName,
42
+ details: {
43
+ estimatedInputTokens,
44
+ allowedTokens,
45
+ maxContextTokens,
46
+ safetyRatio,
47
+ providerKey: options.target?.providerKey,
48
+ modelId,
49
+ routeName
50
+ }
51
+ });
52
+ throw err;
53
+ }
54
+ return { estimatedInputTokens, maxContextTokens, allowedTokens, safetyRatio };
55
+ }
@@ -110,6 +110,59 @@ function extractNestedGroupRequestId(value) {
110
110
  }
111
111
  return undefined;
112
112
  }
113
+ function extractNestedEntryEndpoint(value) {
114
+ if (!value || typeof value !== 'object') {
115
+ return undefined;
116
+ }
117
+ const obj = value;
118
+ const direct = readStringField(obj.entryEndpoint) ||
119
+ readStringField(obj.entry_endpoint) ||
120
+ readStringField(obj.endpoint);
121
+ if (direct) {
122
+ return direct;
123
+ }
124
+ const meta = obj.meta;
125
+ if (meta && typeof meta === 'object') {
126
+ const m = meta;
127
+ const fromMeta = readStringField(m.entryEndpoint) ||
128
+ readStringField(m.entry_endpoint) ||
129
+ readStringField(m.endpoint);
130
+ if (fromMeta) {
131
+ return fromMeta;
132
+ }
133
+ const ctx = m.context;
134
+ if (ctx && typeof ctx === 'object') {
135
+ const c = ctx;
136
+ const fromCtx = readStringField(c.entryEndpoint) ||
137
+ readStringField(c.entry_endpoint) ||
138
+ readStringField(c.endpoint);
139
+ if (fromCtx) {
140
+ return fromCtx;
141
+ }
142
+ }
143
+ }
144
+ const metadata = obj.metadata;
145
+ if (metadata && typeof metadata === 'object') {
146
+ const md = metadata;
147
+ const fromMetadata = readStringField(md.entryEndpoint) ||
148
+ readStringField(md.entry_endpoint) ||
149
+ readStringField(md.endpoint);
150
+ if (fromMetadata) {
151
+ return fromMetadata;
152
+ }
153
+ }
154
+ const runtime = obj.runtime;
155
+ if (runtime && typeof runtime === 'object') {
156
+ const r = runtime;
157
+ const fromRuntime = readStringField(r.entryEndpoint) ||
158
+ readStringField(r.entry_endpoint) ||
159
+ readStringField(r.endpoint);
160
+ if (fromRuntime) {
161
+ return fromRuntime;
162
+ }
163
+ }
164
+ return undefined;
165
+ }
113
166
  function toErrorCode(error) {
114
167
  if (!error || typeof error !== 'object') {
115
168
  return undefined;
@@ -205,7 +258,7 @@ async function promotePendingDir(options) {
205
258
  }
206
259
  async function writeSnapshotFile(options) {
207
260
  const root = resolveSnapshotRoot();
208
- const folder = resolveSnapshotFolder(options.endpoint);
261
+ const folder = resolveSnapshotFolder(extractNestedEntryEndpoint(options.data) || options.endpoint);
209
262
  const stageToken = sanitizeToken(options.stage, 'snapshot');
210
263
  const groupRequestToken = sanitizeToken(options.groupRequestId ||
211
264
  extractNestedGroupRequestId(options.data) ||
@@ -220,7 +220,11 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
220
220
  targets = targets.filter((key) => !excludedKeys.has(key));
221
221
  }
222
222
  if (targets.length > 0) {
223
- targets = targets.filter((key) => !deps.isProviderCoolingDown(key));
223
+ const cooled = targets.filter((key) => !deps.isProviderCoolingDown(key));
224
+ // 单 provider 兜底:当一个 tier 只有一个候选 key 时,不因 cooldown 造成路由池为空。
225
+ if (cooled.length > 0 || targets.length !== 1) {
226
+ targets = cooled;
227
+ }
224
228
  }
225
229
  if (allowedProviders && allowedProviders.size > 0) {
226
230
  targets = targets.filter((key) => {
@@ -331,14 +335,22 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
331
335
  const selectWithQuota = (candidates) => {
332
336
  if (!quotaView) {
333
337
  if (tier.mode === 'priority') {
334
- return selectFirstAvailable(candidates);
338
+ const selected = selectFirstAvailable(candidates);
339
+ if (!selected && candidates.length === 1) {
340
+ return candidates[0];
341
+ }
342
+ return selected;
335
343
  }
336
- return deps.loadBalancer.select({
344
+ const selected = deps.loadBalancer.select({
337
345
  routeName: `${routeName}:${tier.id}`,
338
346
  candidates,
339
347
  stickyKey: options.allowAliasRotation ? undefined : stickyKey,
340
348
  availabilityCheck: (key) => deps.healthManager.isAvailable(key)
341
349
  }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
350
+ if (!selected && candidates.length === 1) {
351
+ return candidates[0];
352
+ }
353
+ return selected;
342
354
  }
343
355
  const buckets = new Map();
344
356
  for (const key of candidates) {
@@ -389,6 +401,33 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
389
401
  }
390
402
  }
391
403
  }
404
+ // default 路由永不因 quota gating 而“空池”:
405
+ // 当 quotaView 过滤后没有任何可用候选时,默认路由允许忽略 quotaView,
406
+ // 继续按健康/负载均衡选择一个 providerKey(但不覆盖 forced/required 约束)。
407
+ const quotaBypassAllowed = routeName === DEFAULT_ROUTE && (!requiredProviderKeys || requiredProviderKeys.size === 0);
408
+ if (quotaBypassAllowed) {
409
+ if (tier.mode === 'priority') {
410
+ const selected = selectFirstAvailable(candidates);
411
+ if (selected) {
412
+ return selected;
413
+ }
414
+ }
415
+ else {
416
+ const selected = deps.loadBalancer.select({
417
+ routeName: `${routeName}:${tier.id}:quota-bypass`,
418
+ candidates,
419
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
420
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
421
+ }, tier.mode === 'round-robin' ? 'round-robin' : undefined);
422
+ if (selected) {
423
+ return selected;
424
+ }
425
+ }
426
+ }
427
+ // 单 provider 兜底:当只剩一个候选 key 时,不因 quota/blacklist/cooldown 或健康状态过滤导致无 provider。
428
+ if (candidates.length === 1) {
429
+ return candidates[0];
430
+ }
392
431
  return null;
393
432
  };
394
433
  for (const candidatePool of prioritizedPools) {
@@ -416,10 +455,13 @@ export function selectFromStickyPool(stickyKeySet, metadata, features, state, de
416
455
  ]));
417
456
  const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
418
457
  let candidates = Array.from(stickyKeySet).filter((key) => !deps.isProviderCoolingDown(key));
458
+ if (!candidates.length && stickyKeySet.size === 1) {
459
+ candidates = Array.from(stickyKeySet);
460
+ }
419
461
  const quotaView = deps.quotaView;
420
462
  const now = quotaView ? Date.now() : 0;
421
463
  if (quotaView) {
422
- candidates = candidates.filter((key) => {
464
+ const filtered = candidates.filter((key) => {
423
465
  const entry = quotaView(key);
424
466
  if (!entry) {
425
467
  return true;
@@ -435,6 +477,9 @@ export function selectFromStickyPool(stickyKeySet, metadata, features, state, de
435
477
  }
436
478
  return true;
437
479
  });
480
+ if (filtered.length > 0 || candidates.length !== 1) {
481
+ candidates = filtered;
482
+ }
438
483
  }
439
484
  if (allowedProviders.size > 0) {
440
485
  candidates = candidates.filter((key) => {
@@ -304,6 +304,129 @@ const tryParseJson = (value) => {
304
304
  return undefined;
305
305
  }
306
306
  };
307
+ const escapeUnescapedQuotesInJsonStrings = (input) => {
308
+ // Best-effort: when JSON is almost valid but contains unescaped `"` inside string values
309
+ // (e.g. JSX snippets like className="..."), escape quotes that are not followed by a
310
+ // valid JSON token delimiter. Deterministic; does not attempt to fix structural issues.
311
+ let out = '';
312
+ let inString = false;
313
+ let escaped = false;
314
+ for (let i = 0; i < input.length; i += 1) {
315
+ const ch = input[i] ?? '';
316
+ if (!inString) {
317
+ if (ch === '"') {
318
+ inString = true;
319
+ escaped = false;
320
+ }
321
+ out += ch;
322
+ continue;
323
+ }
324
+ if (escaped) {
325
+ out += ch;
326
+ escaped = false;
327
+ continue;
328
+ }
329
+ if (ch === '\\') {
330
+ out += ch;
331
+ escaped = true;
332
+ continue;
333
+ }
334
+ if (ch === '"') {
335
+ let j = i + 1;
336
+ while (j < input.length && /\s/.test(input[j] ?? ''))
337
+ j += 1;
338
+ const next = j < input.length ? input[j] : '';
339
+ if (next === '' || next === ':' || next === ',' || next === '}' || next === ']') {
340
+ inString = false;
341
+ out += ch;
342
+ }
343
+ else {
344
+ out += '\\"';
345
+ }
346
+ continue;
347
+ }
348
+ out += ch;
349
+ }
350
+ return out;
351
+ };
352
+ const balanceJsonContainers = (input) => {
353
+ // Best-effort bracket/brace balancing for JSON-like strings.
354
+ // Only operates outside string literals. When encountering a closing token that doesn't
355
+ // match the current stack top, inserts the missing closer(s) to recover.
356
+ let out = '';
357
+ let inString = false;
358
+ let escaped = false;
359
+ const stack = [];
360
+ const closeFor = (open) => (open === '{' ? '}' : ']');
361
+ for (let i = 0; i < input.length; i += 1) {
362
+ const ch = input[i] ?? '';
363
+ if (inString) {
364
+ out += ch;
365
+ if (escaped) {
366
+ escaped = false;
367
+ continue;
368
+ }
369
+ if (ch === '\\') {
370
+ escaped = true;
371
+ continue;
372
+ }
373
+ if (ch === '"') {
374
+ inString = false;
375
+ }
376
+ continue;
377
+ }
378
+ if (ch === '"') {
379
+ inString = true;
380
+ out += ch;
381
+ continue;
382
+ }
383
+ if (ch === '{' || ch === '[') {
384
+ stack.push(ch);
385
+ out += ch;
386
+ continue;
387
+ }
388
+ if (ch === '}' || ch === ']') {
389
+ const expectedOpen = ch === '}' ? '{' : '[';
390
+ while (stack.length && stack[stack.length - 1] !== expectedOpen) {
391
+ const open = stack.pop();
392
+ out += closeFor(open);
393
+ }
394
+ if (stack.length && stack[stack.length - 1] === expectedOpen) {
395
+ stack.pop();
396
+ }
397
+ out += ch;
398
+ continue;
399
+ }
400
+ out += ch;
401
+ }
402
+ while (stack.length) {
403
+ const open = stack.pop();
404
+ out += closeFor(open);
405
+ }
406
+ return out;
407
+ };
408
+ const tryParseJsonLoose = (value) => {
409
+ const parsed = tryParseJson(value);
410
+ if (parsed !== undefined)
411
+ return parsed;
412
+ if (typeof value !== 'string')
413
+ return undefined;
414
+ const trimmed = value.trim();
415
+ if (!trimmed)
416
+ return undefined;
417
+ if (!(trimmed.startsWith('{') || trimmed.startsWith('[')))
418
+ return undefined;
419
+ let repaired = escapeUnescapedQuotesInJsonStrings(trimmed);
420
+ repaired = balanceJsonContainers(repaired);
421
+ if (!repaired || repaired === trimmed)
422
+ return undefined;
423
+ try {
424
+ return JSON.parse(repaired);
425
+ }
426
+ catch {
427
+ return undefined;
428
+ }
429
+ };
307
430
  const coerceChangesArray = (value) => {
308
431
  const parsed = tryParseJson(value);
309
432
  if (!parsed)
@@ -402,6 +525,33 @@ export function validateApplyPatchArgs(argsString, rawArgs) {
402
525
  if (instructionsField && looksLikePatch(instructionsField)) {
403
526
  return { patchText: normalizeApplyPatchText(instructionsField) };
404
527
  }
528
+ // Common wrapper shape (seen in codex samples): { _raw: "{...json...}" }.
529
+ // `_raw` may contain either patch text or a JSON-encoded structured payload.
530
+ const rawEnvelope = asString(rec._raw);
531
+ if (rawEnvelope) {
532
+ const trimmed = rawEnvelope.trim();
533
+ if (looksLikePatch(trimmed)) {
534
+ return { patchText: normalizeApplyPatchText(trimmed) };
535
+ }
536
+ const parsed = tryParseJsonLoose(trimmed);
537
+ if (parsed && isRecord(parsed)) {
538
+ return extractFromRecord(parsed);
539
+ }
540
+ if (Array.isArray(parsed) && parsed.length > 0) {
541
+ const changesArray = parsed.filter((entry) => isRecord(entry));
542
+ if (changesArray.length && changesArray.some((c) => typeof c.kind === 'string')) {
543
+ const payload = { changes: changesArray };
544
+ try {
545
+ return { patchText: buildStructuredPatch(payload) };
546
+ }
547
+ catch (error) {
548
+ if (!(error instanceof StructuredApplyPatchError))
549
+ throw error;
550
+ return { failureReason: error.reason || 'structured_apply_patch_error' };
551
+ }
552
+ }
553
+ }
554
+ }
405
555
  const payload = coerceStructuredPayload(rec);
406
556
  if (payload) {
407
557
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@jsonstudio/llms",
3
- "version": "0.6.795",
3
+ "version": "0.6.802",
4
4
  "type": "module",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.js",