@jsonstudio/llms 0.6.631 → 0.6.743

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/conversion/codecs/anthropic-openai-codec.js +0 -5
  2. package/dist/conversion/codecs/openai-openai-codec.js +0 -6
  3. package/dist/conversion/codecs/responses-openai-codec.js +1 -7
  4. package/dist/conversion/hub/node-support.js +5 -4
  5. package/dist/conversion/hub/pipeline/hub-pipeline.d.ts +14 -1
  6. package/dist/conversion/hub/pipeline/hub-pipeline.js +82 -18
  7. package/dist/conversion/hub/pipeline/session-identifiers.js +132 -2
  8. package/dist/conversion/hub/pipeline/stages/req_inbound/req_inbound_stage3_context_capture/index.js +130 -15
  9. package/dist/conversion/hub/pipeline/stages/resp_inbound/resp_inbound_stage1_sse_decode/index.js +47 -0
  10. package/dist/conversion/hub/pipeline/stages/resp_process/resp_process_stage1_tool_governance/index.js +4 -2
  11. package/dist/conversion/hub/process/chat-process.js +2 -0
  12. package/dist/conversion/hub/response/provider-response.js +6 -1
  13. package/dist/conversion/hub/snapshot-recorder.js +8 -1
  14. package/dist/conversion/pipeline/codecs/v2/shared/openai-chat-helpers.js +0 -7
  15. package/dist/conversion/responses/responses-openai-bridge.js +47 -7
  16. package/dist/conversion/shared/compaction-detect.d.ts +2 -0
  17. package/dist/conversion/shared/compaction-detect.js +53 -0
  18. package/dist/conversion/shared/errors.d.ts +1 -1
  19. package/dist/conversion/shared/reasoning-tool-normalizer.js +7 -0
  20. package/dist/conversion/shared/snapshot-hooks.d.ts +2 -0
  21. package/dist/conversion/shared/snapshot-hooks.js +180 -4
  22. package/dist/conversion/shared/snapshot-utils.d.ts +4 -0
  23. package/dist/conversion/shared/snapshot-utils.js +4 -0
  24. package/dist/conversion/shared/tool-filter-pipeline.js +3 -9
  25. package/dist/conversion/shared/tool-governor.d.ts +2 -0
  26. package/dist/conversion/shared/tool-governor.js +101 -13
  27. package/dist/conversion/shared/tool-harvester.js +42 -2
  28. package/dist/conversion/shared/tooling.d.ts +33 -0
  29. package/dist/conversion/shared/tooling.js +27 -0
  30. package/dist/filters/index.d.ts +0 -2
  31. package/dist/filters/index.js +0 -2
  32. package/dist/filters/special/request-tools-normalize.d.ts +11 -0
  33. package/dist/filters/special/request-tools-normalize.js +13 -50
  34. package/dist/filters/special/response-apply-patch-toon-decode.js +410 -67
  35. package/dist/filters/special/response-tool-arguments-stringify.js +25 -16
  36. package/dist/filters/special/response-tool-arguments-toon-decode.js +8 -76
  37. package/dist/filters/utils/snapshot-writer.js +42 -4
  38. package/dist/guidance/index.js +8 -2
  39. package/dist/router/virtual-router/engine-health.js +0 -4
  40. package/dist/router/virtual-router/engine-selection.d.ts +2 -1
  41. package/dist/router/virtual-router/engine-selection.js +101 -9
  42. package/dist/router/virtual-router/engine.d.ts +5 -1
  43. package/dist/router/virtual-router/engine.js +188 -5
  44. package/dist/router/virtual-router/routing-instructions.d.ts +6 -0
  45. package/dist/router/virtual-router/routing-instructions.js +18 -3
  46. package/dist/router/virtual-router/sticky-session-store.d.ts +1 -0
  47. package/dist/router/virtual-router/sticky-session-store.js +36 -0
  48. package/dist/router/virtual-router/types.d.ts +22 -0
  49. package/dist/servertool/engine.js +335 -9
  50. package/dist/servertool/handlers/compaction-detect.d.ts +1 -0
  51. package/dist/servertool/handlers/compaction-detect.js +1 -0
  52. package/dist/servertool/handlers/gemini-empty-reply-continue.js +29 -5
  53. package/dist/servertool/handlers/iflow-model-error-retry.js +17 -0
  54. package/dist/servertool/handlers/stop-message-auto.js +199 -19
  55. package/dist/servertool/server-side-tools.d.ts +0 -1
  56. package/dist/servertool/server-side-tools.js +0 -1
  57. package/dist/servertool/types.d.ts +1 -0
  58. package/dist/tools/apply-patch-structured.js +52 -15
  59. package/dist/tools/tool-registry.js +537 -15
  60. package/dist/utils/toon.d.ts +4 -0
  61. package/dist/utils/toon.js +75 -0
  62. package/package.json +4 -2
  63. package/dist/test-output/virtual-router/results.json +0 -1
  64. package/dist/test-output/virtual-router/summary.json +0 -12
@@ -13,22 +13,60 @@ function isSnapshotEnabled() {
13
13
  const v = String(process?.env?.RCC_FILTER_SNAPSHOT || process?.env?.RCC_HOOKS_VERBOSITY || '').toLowerCase();
14
14
  return v === '1' || v === 'true' || v === 'verbose';
15
15
  }
16
+ function sanitizeToken(value, fallback) {
17
+ if (typeof value !== 'string') {
18
+ return fallback;
19
+ }
20
+ const trimmed = value.trim();
21
+ if (!trimmed) {
22
+ return fallback;
23
+ }
24
+ return trimmed.replace(/[^A-Za-z0-9_.-]/g, '_') || fallback;
25
+ }
26
+ function toErrorCode(error) {
27
+ if (!error || typeof error !== 'object') {
28
+ return undefined;
29
+ }
30
+ const code = error.code;
31
+ return typeof code === 'string' && code.trim() ? code : undefined;
32
+ }
33
+ async function writeUniqueFile(dir, baseName, contents) {
34
+ const parsed = path.parse(baseName);
35
+ const ext = parsed.ext || '.json';
36
+ const stem = parsed.name || 'snapshot';
37
+ for (let i = 0; i < 64; i += 1) {
38
+ const name = i === 0 ? `${stem}${ext}` : `${stem}_${i}${ext}`;
39
+ try {
40
+ await fsp.writeFile(path.join(dir, name), contents, { encoding: 'utf-8', flag: 'wx' });
41
+ return;
42
+ }
43
+ catch (error) {
44
+ if (toErrorCode(error) === 'EEXIST') {
45
+ continue;
46
+ }
47
+ throw error;
48
+ }
49
+ }
50
+ const fallback = `${stem}_${Date.now()}_${Math.random().toString(36).slice(2, 8)}${ext}`;
51
+ await fsp.writeFile(path.join(dir, fallback), contents, 'utf-8');
52
+ }
16
53
  export async function writeFilterSnapshot(options) {
17
54
  try {
18
55
  if (!isSnapshotEnabled())
19
56
  return;
20
- const rid = options.requestId || `req_${Date.now()}`;
57
+ const rid = sanitizeToken(options.requestId || '', `req_${Date.now()}`);
21
58
  const baseOverride = process?.env?.RCC_SNAPSHOT_DIR;
22
59
  const base = baseOverride && baseOverride.trim()
23
60
  ? baseOverride.trim()
24
61
  : path.join(os.homedir(), '.routecodex', 'codex-samples');
25
62
  const folder = mapEndpointToFolder(options.endpoint);
26
- const dir = path.join(base, folder);
63
+ const provider = sanitizeToken(options.profile || '', '__pending__');
64
+ const dir = path.join(base, folder, provider, rid);
27
65
  await fsp.mkdir(dir, { recursive: true });
28
66
  const parts = ['filters', options.stage.replace(/\s+/g, ''), options.tag || (options.name ? `after_${options.name}` : 'after')]
29
67
  .filter(Boolean)
30
68
  .join('_');
31
- const file = path.join(dir, `${rid}_${parts}.json`);
69
+ const file = `${sanitizeToken(parts, 'filters')}.json`;
32
70
  const payload = {
33
71
  meta: {
34
72
  requestId: rid,
@@ -41,7 +79,7 @@ export async function writeFilterSnapshot(options) {
41
79
  },
42
80
  data: options.data
43
81
  };
44
- await fsp.writeFile(file, JSON.stringify(payload, null, 2), 'utf-8');
82
+ await writeUniqueFile(dir, file, JSON.stringify(payload, null, 2));
45
83
  }
46
84
  catch { /* ignore snapshot errors */ }
47
85
  }
@@ -66,7 +66,9 @@ function augmentApplyPatch(fn) {
66
66
  'Each change describes one operation, e.g. `{ "file": "src/foo.ts", "kind": "insert_after", "anchor": "const foo = 1;", "lines": ["const bar = 2;"] }`.',
67
67
  'Supported kinds: insert_after, insert_before, replace, delete, create_file, delete_file.',
68
68
  'Paths must stay relative to the workspace root (no leading "/" or drive letters).',
69
- 'Insert operations require `anchor` text; replace/delete require exact `target` snippets; `lines` omit "+/-" prefixes.'
69
+ 'Insert operations require `anchor` text; replace/delete require exact `target` snippets; `lines` omit "+/-" prefixes.',
70
+ 'If you must emit raw patch text, use "*** Begin Patch" / "*** End Patch" with "*** Update/Add/Delete File" headers (no "diff --git").',
71
+ 'Do not output "*** Create File:"; use "*** Add File:".'
70
72
  ].join('\n');
71
73
  const params = ensureObjectSchema(fn.parameters);
72
74
  const props = params.properties;
@@ -230,7 +232,10 @@ export function augmentAnthropicTools(tools) {
230
232
  'Before using apply_patch, always read the latest content of the target file (via shell or another tool) and base your changes on that content.',
231
233
  'Provide structured changes (insert_after / insert_before / replace / delete / create_file / delete_file) instead of raw patch text.',
232
234
  'Each change must include the target file (relative path) plus anchor/target snippets and the replacement lines.',
233
- '所有路径必须相对工作区根目录,禁止输出以 / 或盘符开头的绝对路径。'
235
+ '所有路径必须相对工作区根目录,禁止输出以 / 或盘符开头的绝对路径。',
236
+ 'Example: {\"changes\":[{\"file\":\"src/app.ts\",\"kind\":\"replace\",\"target\":\"const answer = 41;\",\"lines\":[\"const answer = 42;\"]}]}(修改同一文件时尽量只修改一段连续区域,多处不相邻修改请拆成多次 apply_patch 调用).',
237
+ 'Raw patch text must use "*** Begin Patch" / "*** End Patch" + "*** Update/Add/Delete File" headers(不要输出 "diff --git")。',
238
+ '不要输出 "*** Create File:";请使用 "*** Add File:".'
234
239
  ].join('\n');
235
240
  copy.description = appendOnce(desc, guidance, marker);
236
241
  }
@@ -267,6 +272,7 @@ export function buildSystemToolGuidance() {
267
272
  lines.push(bullet('File writes are FORBIDDEN via shell (no redirection, no here-doc, no sed -i, no ed -s, no tee). Use apply_patch ONLY. / 通过 shell 写文件一律禁止(不得使用重定向、heredoc、sed -i、ed -s、tee);必须使用 apply_patch。'));
268
273
  lines.push(bullet('apply_patch: Before writing, always read the target file first and compute changes against the latest content using appropriate tools. / apply_patch 在写入前必须先通过合适的工具读取目标文件最新内容,并基于该内容生成变更。'));
269
274
  lines.push(bullet('apply_patch: Provide structured JSON arguments with a `changes` array (insert_after / insert_before / replace / delete / create_file / delete_file); omit "+/-" prefixes in `lines`; file paths必须是相对路径。 / apply_patch 仅接受结构化 JSON。'));
275
+ lines.push(bullet('apply_patch: For a given file, prefer one contiguous change block per call; if you need to touch non-adjacent regions, split them into multiple apply_patch calls. / apply_patch 修改同一文件时尽量只提交一段连续补丁,多个不相邻位置请拆成多次调用。'));
270
276
  lines.push(bullet('update_plan: Keep exactly one step in_progress; others pending/completed. / 仅一个 in_progress 步骤。'));
271
277
  lines.push(bullet('view_image: Path must be an image file (.png .jpg .jpeg .gif .webp .bmp .svg). / 仅图片路径。'));
272
278
  lines.push(bullet('Do NOT use view_image for text files (.md/.ts/.js/.json). Use shell: {"command":["cat","<path>"]}. / 文本文件请用 shell: cat。'));
@@ -275,10 +275,6 @@ export function applyQuotaRecoveryImpl(event, healthManager, clearProviderCooldo
275
275
  healthManager.recordSuccess(providerKey);
276
276
  resetRateLimitBackoffForProvider(providerKey);
277
277
  clearProviderCooldown(providerKey);
278
- debug?.log?.('[virtual-router] quota recovery', {
279
- providerKey,
280
- reason: detail.reason
281
- });
282
278
  }
283
279
  catch {
284
280
  // 恢复失败不得影响主路由流程
@@ -1,4 +1,4 @@
1
- import type { ClassificationResult, RoutePoolTier, RouterMetadataInput, RoutingFeatures } from './types.js';
1
+ import type { ClassificationResult, RoutePoolTier, RouterMetadataInput, RoutingFeatures, ProviderQuotaView } from './types.js';
2
2
  import type { RoutingInstructionState } from './routing-instructions.js';
3
3
  import type { ContextAdvisor } from './context-advisor.js';
4
4
  import type { RouteLoadBalancer } from './load-balancer.js';
@@ -12,6 +12,7 @@ type SelectionDeps = {
12
12
  loadBalancer: RouteLoadBalancer;
13
13
  isProviderCoolingDown: (providerKey: string) => boolean;
14
14
  resolveStickyKey: (metadata: RouterMetadataInput) => string | undefined;
15
+ quotaView?: ProviderQuotaView;
15
16
  };
16
17
  export declare function selectProviderImpl(requestedRoute: string, metadata: RouterMetadataInput, classification: ClassificationResult, features: RoutingFeatures, activeState: RoutingInstructionState, deps: SelectionDeps, options?: {
17
18
  routingState?: RoutingInstructionState;
@@ -1,11 +1,32 @@
1
1
  import { DEFAULT_ROUTE, ROUTE_PRIORITY, VirtualRouterError, VirtualRouterErrorCode } from './types.js';
2
2
  export function selectProviderImpl(requestedRoute, metadata, classification, features, activeState, deps, options = {}) {
3
3
  const state = options.routingState ?? activeState;
4
+ const quotaView = deps.quotaView;
5
+ const quotaNow = quotaView ? Date.now() : 0;
6
+ const isAllowedByQuota = (key) => {
7
+ if (!quotaView) {
8
+ return true;
9
+ }
10
+ const entry = quotaView(key);
11
+ if (!entry) {
12
+ return true;
13
+ }
14
+ if (!entry.inPool) {
15
+ return false;
16
+ }
17
+ if (entry.cooldownUntil && entry.cooldownUntil > quotaNow) {
18
+ return false;
19
+ }
20
+ if (entry.blacklistUntil && entry.blacklistUntil > quotaNow) {
21
+ return false;
22
+ }
23
+ return true;
24
+ };
4
25
  const excludedProviderKeys = extractExcludedProviderKeySet(features.metadata);
5
26
  const forcedResolution = state.forcedTarget ? resolveInstructionTarget(state.forcedTarget, deps.providerRegistry) : null;
6
27
  if (forcedResolution && forcedResolution.mode === 'exact') {
7
28
  const forcedKey = forcedResolution.keys[0];
8
- if (!excludedProviderKeys.has(forcedKey) && !deps.isProviderCoolingDown(forcedKey)) {
29
+ if (!excludedProviderKeys.has(forcedKey) && !deps.isProviderCoolingDown(forcedKey) && isAllowedByQuota(forcedKey)) {
9
30
  return {
10
31
  providerKey: forcedKey,
11
32
  routeUsed: requestedRoute,
@@ -22,7 +43,8 @@ export function selectProviderImpl(requestedRoute, metadata, classification, fea
22
43
  const stickyKey = stickyResolution.keys[0];
23
44
  if (deps.healthManager.isAvailable(stickyKey) &&
24
45
  !excludedProviderKeys.has(stickyKey) &&
25
- !deps.isProviderCoolingDown(stickyKey)) {
46
+ !deps.isProviderCoolingDown(stickyKey) &&
47
+ isAllowedByQuota(stickyKey)) {
26
48
  return {
27
49
  providerKey: stickyKey,
28
50
  routeUsed: requestedRoute,
@@ -34,7 +56,8 @@ export function selectProviderImpl(requestedRoute, metadata, classification, fea
34
56
  if (stickyResolution && stickyResolution.mode === 'filter' && stickyResolution.keys.length > 0) {
35
57
  const liveKeys = stickyResolution.keys.filter((key) => deps.healthManager.isAvailable(key) &&
36
58
  !excludedProviderKeys.has(key) &&
37
- !deps.isProviderCoolingDown(key));
59
+ !deps.isProviderCoolingDown(key) &&
60
+ isAllowedByQuota(key));
38
61
  if (liveKeys.length > 0) {
39
62
  stickyKeySet = new Set(liveKeys);
40
63
  }
@@ -247,13 +270,62 @@ function trySelectFromTier(routeName, tier, stickyKey, estimatedTokens, features
247
270
  }
248
271
  const contextResult = deps.contextAdvisor.classify(targets, estimatedTokens, (key) => deps.providerRegistry.get(key));
249
272
  const prioritizedPools = buildContextCandidatePools(contextResult);
273
+ const quotaView = deps.quotaView;
274
+ const now = quotaView ? Date.now() : 0;
275
+ const selectWithQuota = (candidates) => {
276
+ if (!quotaView) {
277
+ return deps.loadBalancer.select({
278
+ routeName: `${routeName}:${tier.id}`,
279
+ candidates,
280
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
281
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
282
+ });
283
+ }
284
+ const buckets = new Map();
285
+ for (const key of candidates) {
286
+ const entry = quotaView(key);
287
+ if (!entry) {
288
+ const list = buckets.get(100) ?? [];
289
+ list.push(key);
290
+ buckets.set(100, list);
291
+ continue;
292
+ }
293
+ if (!entry.inPool) {
294
+ continue;
295
+ }
296
+ if (entry.cooldownUntil && entry.cooldownUntil > now) {
297
+ continue;
298
+ }
299
+ if (entry.blacklistUntil && entry.blacklistUntil > now) {
300
+ continue;
301
+ }
302
+ const tierPriority = typeof entry.priorityTier === 'number' && Number.isFinite(entry.priorityTier)
303
+ ? entry.priorityTier
304
+ : 100;
305
+ const list = buckets.get(tierPriority) ?? [];
306
+ list.push(key);
307
+ buckets.set(tierPriority, list);
308
+ }
309
+ const sortedPriorities = Array.from(buckets.keys()).sort((a, b) => a - b);
310
+ for (const priority of sortedPriorities) {
311
+ const bucketCandidates = buckets.get(priority) ?? [];
312
+ if (!bucketCandidates.length) {
313
+ continue;
314
+ }
315
+ const selected = deps.loadBalancer.select({
316
+ routeName: `${routeName}:${tier.id}`,
317
+ candidates: bucketCandidates,
318
+ stickyKey: options.allowAliasRotation ? undefined : stickyKey,
319
+ availabilityCheck: (key) => deps.healthManager.isAvailable(key)
320
+ });
321
+ if (selected) {
322
+ return selected;
323
+ }
324
+ }
325
+ return null;
326
+ };
250
327
  for (const candidatePool of prioritizedPools) {
251
- const providerKey = deps.loadBalancer.select({
252
- routeName: `${routeName}:${tier.id}`,
253
- candidates: candidatePool,
254
- stickyKey: options.allowAliasRotation ? undefined : stickyKey,
255
- availabilityCheck: (key) => deps.healthManager.isAvailable(key)
256
- });
328
+ const providerKey = selectWithQuota(candidatePool);
257
329
  if (providerKey) {
258
330
  return { providerKey, poolTargets: tier.targets, tierId: tier.id };
259
331
  }
@@ -277,6 +349,26 @@ export function selectFromStickyPool(stickyKeySet, metadata, features, state, de
277
349
  ]));
278
350
  const disabledModels = new Map(Array.from(state.disabledModels.entries()).map(([provider, models]) => [provider, new Set(models)]));
279
351
  let candidates = Array.from(stickyKeySet).filter((key) => !deps.isProviderCoolingDown(key));
352
+ const quotaView = deps.quotaView;
353
+ const now = quotaView ? Date.now() : 0;
354
+ if (quotaView) {
355
+ candidates = candidates.filter((key) => {
356
+ const entry = quotaView(key);
357
+ if (!entry) {
358
+ return true;
359
+ }
360
+ if (!entry.inPool) {
361
+ return false;
362
+ }
363
+ if (entry.cooldownUntil && entry.cooldownUntil > now) {
364
+ return false;
365
+ }
366
+ if (entry.blacklistUntil && entry.blacklistUntil > now) {
367
+ return false;
368
+ }
369
+ return true;
370
+ });
371
+ }
280
372
  if (allowedProviders.size > 0) {
281
373
  candidates = candidates.filter((key) => {
282
374
  const providerId = extractProviderId(key);
@@ -1,6 +1,7 @@
1
- import { type RoutingDecision, type RoutingDiagnostics, type RouterMetadataInput, type VirtualRouterConfig, type TargetMetadata, type ProviderFailureEvent, type ProviderErrorEvent, type VirtualRouterHealthStore } from './types.js';
1
+ import { type RoutingDecision, type RoutingDiagnostics, type StopMessageStateSnapshot, type RouterMetadataInput, type VirtualRouterConfig, type TargetMetadata, type ProviderFailureEvent, type ProviderErrorEvent, type VirtualRouterHealthStore } from './types.js';
2
2
  import type { ProcessedRequest, StandardizedRequest } from '../../conversion/hub/types/standardized.js';
3
3
  import { type RoutingInstructionState } from './routing-instructions.js';
4
+ import type { ProviderQuotaView } from './types.js';
4
5
  interface RoutingInstructionStateStore {
5
6
  loadSync(key: string): RoutingInstructionState | null;
6
7
  saveAsync(key: string, state: RoutingInstructionState | null): void;
@@ -22,9 +23,11 @@ export declare class VirtualRouterEngine {
22
23
  private healthStore?;
23
24
  private routingStateStore;
24
25
  private routingInstructionState;
26
+ private quotaView?;
25
27
  constructor(deps?: {
26
28
  healthStore?: VirtualRouterHealthStore;
27
29
  routingStateStore?: RoutingInstructionStateStore;
30
+ quotaView?: ProviderQuotaView;
28
31
  });
29
32
  initialize(config: VirtualRouterConfig): void;
30
33
  route(request: StandardizedRequest | ProcessedRequest, metadata: RouterMetadataInput): {
@@ -32,6 +35,7 @@ export declare class VirtualRouterEngine {
32
35
  decision: RoutingDecision;
33
36
  diagnostics: RoutingDiagnostics;
34
37
  };
38
+ getStopMessageState(metadata: RouterMetadataInput): StopMessageStateSnapshot | null;
35
39
  handleProviderFailure(event: ProviderFailureEvent): void;
36
40
  handleProviderError(event: ProviderErrorEvent): void;
37
41
  getStatus(): {
@@ -32,6 +32,7 @@ export class VirtualRouterEngine {
32
32
  saveAsync: saveRoutingInstructionStateAsync
33
33
  };
34
34
  routingInstructionState = new Map();
35
+ quotaView;
35
36
  constructor(deps) {
36
37
  if (deps?.healthStore) {
37
38
  this.healthStore = deps.healthStore;
@@ -39,6 +40,9 @@ export class VirtualRouterEngine {
39
40
  if (deps?.routingStateStore) {
40
41
  this.routingStateStore = deps.routingStateStore;
41
42
  }
43
+ if (deps?.quotaView) {
44
+ this.quotaView = deps.quotaView;
45
+ }
42
46
  }
43
47
  initialize(config) {
44
48
  this.validateConfig(config);
@@ -76,13 +80,121 @@ export class VirtualRouterEngine {
76
80
  stickyTarget: undefined
77
81
  };
78
82
  }
79
- const instructions = parseRoutingInstructions(request.messages);
83
+ const sessionScope = this.resolveSessionScope(metadata);
84
+ if (sessionScope) {
85
+ const sessionState = this.getRoutingInstructionState(sessionScope);
86
+ if (typeof sessionState.stopMessageText === 'string' ||
87
+ typeof sessionState.stopMessageMaxRepeats === 'number') {
88
+ routingState = {
89
+ ...routingState,
90
+ stopMessageText: sessionState.stopMessageText,
91
+ stopMessageMaxRepeats: sessionState.stopMessageMaxRepeats,
92
+ stopMessageUsed: sessionState.stopMessageUsed,
93
+ stopMessageUpdatedAt: sessionState.stopMessageUpdatedAt,
94
+ stopMessageLastUsedAt: sessionState.stopMessageLastUsedAt
95
+ };
96
+ }
97
+ }
98
+ const parsedInstructions = parseRoutingInstructions(request.messages);
99
+ let instructions = parsedInstructions;
100
+ if (sessionScope && parsedInstructions.length > 0) {
101
+ const sessionState = this.getRoutingInstructionState(sessionScope);
102
+ const hasStopMessageClear = parsedInstructions.some((entry) => entry.type === 'stopMessageClear');
103
+ const stopMessageSets = parsedInstructions.filter((entry) => entry.type === 'stopMessageSet');
104
+ if (!hasStopMessageClear && stopMessageSets.length > 0) {
105
+ const sessionText = typeof sessionState.stopMessageText === 'string' ? sessionState.stopMessageText.trim() : '';
106
+ const sessionMax = typeof sessionState.stopMessageMaxRepeats === 'number' && Number.isFinite(sessionState.stopMessageMaxRepeats)
107
+ ? Math.floor(sessionState.stopMessageMaxRepeats)
108
+ : undefined;
109
+ const allSame = stopMessageSets.every((entry) => {
110
+ const entryText = typeof entry.stopMessageText === 'string' ? entry.stopMessageText.trim() : '';
111
+ const entryMax = typeof entry.stopMessageMaxRepeats === 'number' && Number.isFinite(entry.stopMessageMaxRepeats)
112
+ ? Math.floor(entry.stopMessageMaxRepeats)
113
+ : undefined;
114
+ return Boolean(entryText) && entryText === sessionText && entryMax === sessionMax;
115
+ });
116
+ if (allSame) {
117
+ instructions = parsedInstructions.filter((entry) => entry.type !== 'stopMessageSet');
118
+ }
119
+ }
120
+ }
121
+ if (parsedInstructions.length > 0) {
122
+ request.messages = cleanMessagesFromRoutingInstructions(request.messages);
123
+ }
80
124
  if (instructions.length > 0) {
81
125
  routingState = applyRoutingInstructions(instructions, routingState);
82
126
  const effectiveKey = stickyKey || 'default';
83
127
  this.routingInstructionState.set(effectiveKey, routingState);
84
- request.messages = cleanMessagesFromRoutingInstructions(request.messages);
85
128
  this.persistRoutingInstructionState(effectiveKey, routingState);
129
+ // 对 stopMessage 指令补充一份基于 session/conversation 的持久化状态,
130
+ // 便于 server-side 工具通过 session:*/conversation:* scope 读取到相同配置。
131
+ if (sessionScope) {
132
+ const hasStopMessageSet = instructions.some((entry) => entry.type === 'stopMessageSet');
133
+ const hasStopMessageClear = instructions.some((entry) => entry.type === 'stopMessageClear');
134
+ if (hasStopMessageSet || hasStopMessageClear) {
135
+ const sessionState = this.getRoutingInstructionState(sessionScope);
136
+ let nextSessionState = {
137
+ ...sessionState
138
+ };
139
+ let shouldPersistSessionState = false;
140
+ if (hasStopMessageClear) {
141
+ nextSessionState.stopMessageText = undefined;
142
+ nextSessionState.stopMessageMaxRepeats = undefined;
143
+ nextSessionState.stopMessageUsed = undefined;
144
+ nextSessionState.stopMessageUpdatedAt = undefined;
145
+ nextSessionState.stopMessageLastUsedAt = undefined;
146
+ shouldPersistSessionState = true;
147
+ }
148
+ else if (hasStopMessageSet) {
149
+ const text = typeof routingState.stopMessageText === 'string' ? routingState.stopMessageText : '';
150
+ const maxRepeats = routingState.stopMessageMaxRepeats;
151
+ const sameText = typeof sessionState.stopMessageText === 'string' &&
152
+ sessionState.stopMessageText.trim() === text.trim();
153
+ const sameMax = typeof sessionState.stopMessageMaxRepeats === 'number' &&
154
+ typeof maxRepeats === 'number' &&
155
+ Math.floor(sessionState.stopMessageMaxRepeats) === Math.floor(maxRepeats);
156
+ const isSameInstruction = Boolean(text) && sameText && sameMax;
157
+ nextSessionState.stopMessageText = text || undefined;
158
+ nextSessionState.stopMessageMaxRepeats = maxRepeats;
159
+ if (!isSameInstruction) {
160
+ nextSessionState.stopMessageUsed = 0;
161
+ nextSessionState.stopMessageUpdatedAt =
162
+ typeof routingState.stopMessageUpdatedAt === 'number'
163
+ ? routingState.stopMessageUpdatedAt
164
+ : Date.now();
165
+ nextSessionState.stopMessageLastUsedAt = undefined;
166
+ shouldPersistSessionState = true;
167
+ }
168
+ }
169
+ if (shouldPersistSessionState) {
170
+ this.routingInstructionState.set(sessionScope, nextSessionState);
171
+ this.persistRoutingInstructionState(sessionScope, nextSessionState);
172
+ }
173
+ else {
174
+ nextSessionState = sessionState;
175
+ }
176
+ // 日志展示使用 session scope 的 stopMessage 状态,避免每次解析重复刷新时间/次数。
177
+ if (typeof nextSessionState.stopMessageText === 'string' ||
178
+ typeof nextSessionState.stopMessageMaxRepeats === 'number') {
179
+ routingState.stopMessageText = nextSessionState.stopMessageText;
180
+ routingState.stopMessageMaxRepeats = nextSessionState.stopMessageMaxRepeats;
181
+ routingState.stopMessageUsed = nextSessionState.stopMessageUsed;
182
+ routingState.stopMessageUpdatedAt = nextSessionState.stopMessageUpdatedAt;
183
+ routingState.stopMessageLastUsedAt = nextSessionState.stopMessageLastUsedAt;
184
+ }
185
+ }
186
+ }
187
+ }
188
+ if (instructions.length === 0 && sessionScope) {
189
+ const sessionState = this.getRoutingInstructionState(sessionScope);
190
+ if (typeof sessionState.stopMessageText === 'string' ||
191
+ typeof sessionState.stopMessageMaxRepeats === 'number') {
192
+ routingState.stopMessageText = sessionState.stopMessageText;
193
+ routingState.stopMessageMaxRepeats = sessionState.stopMessageMaxRepeats;
194
+ routingState.stopMessageUsed = sessionState.stopMessageUsed;
195
+ routingState.stopMessageUpdatedAt = sessionState.stopMessageUpdatedAt;
196
+ routingState.stopMessageLastUsedAt = sessionState.stopMessageLastUsedAt;
197
+ }
86
198
  }
87
199
  const routingMode = this.resolveRoutingMode([...metadataInstructions, ...instructions], routingState);
88
200
  const features = buildRoutingFeatures(request, metadata);
@@ -183,6 +295,44 @@ export class VirtualRouterEngine {
183
295
  }
184
296
  };
185
297
  }
298
+ getStopMessageState(metadata) {
299
+ const sessionScope = this.resolveSessionScope(metadata);
300
+ const sessionState = sessionScope ? this.getRoutingInstructionState(sessionScope) : null;
301
+ const stickyKey = this.resolveStickyKey(metadata);
302
+ const stickyState = stickyKey ? this.getRoutingInstructionState(stickyKey) : null;
303
+ const effectiveState = sessionState && typeof sessionState.stopMessageText === 'string' && sessionState.stopMessageText.trim()
304
+ ? sessionState
305
+ : stickyState;
306
+ if (!effectiveState) {
307
+ return null;
308
+ }
309
+ const text = typeof effectiveState.stopMessageText === 'string' ? effectiveState.stopMessageText.trim() : '';
310
+ const maxRepeats = typeof effectiveState.stopMessageMaxRepeats === 'number' &&
311
+ Number.isFinite(effectiveState.stopMessageMaxRepeats)
312
+ ? Math.max(1, Math.floor(effectiveState.stopMessageMaxRepeats))
313
+ : 0;
314
+ if (!text || maxRepeats <= 0) {
315
+ return null;
316
+ }
317
+ return {
318
+ stopMessageText: text,
319
+ stopMessageMaxRepeats: maxRepeats,
320
+ ...(typeof effectiveState.stopMessageSource === 'string' && effectiveState.stopMessageSource.trim()
321
+ ? { stopMessageSource: effectiveState.stopMessageSource.trim() }
322
+ : {}),
323
+ ...(typeof effectiveState.stopMessageUsed === 'number' && Number.isFinite(effectiveState.stopMessageUsed)
324
+ ? { stopMessageUsed: Math.max(0, Math.floor(effectiveState.stopMessageUsed)) }
325
+ : {}),
326
+ ...(typeof effectiveState.stopMessageUpdatedAt === 'number' &&
327
+ Number.isFinite(effectiveState.stopMessageUpdatedAt)
328
+ ? { stopMessageUpdatedAt: effectiveState.stopMessageUpdatedAt }
329
+ : {}),
330
+ ...(typeof effectiveState.stopMessageLastUsedAt === 'number' &&
331
+ Number.isFinite(effectiveState.stopMessageLastUsedAt)
332
+ ? { stopMessageLastUsedAt: effectiveState.stopMessageLastUsedAt }
333
+ : {})
334
+ };
335
+ }
186
336
  handleProviderFailure(event) {
187
337
  handleProviderFailureImpl(event, this.healthManager, this.providerHealthConfig(), (key, ttl) => this.markProviderCooldown(key, ttl));
188
338
  }
@@ -195,6 +345,12 @@ export class VirtualRouterEngine {
195
345
  // ignore persistence errors
196
346
  }
197
347
  }
348
+ // 当 Host 注入 quotaView 时,VirtualRouter 的入池/优先级决策应以 quota 为准;
349
+ // 此时不再在 engine-health 内部进行 429/backoff/series cooldown 等健康决策,
350
+ // 以避免与 daemon/quota-center 的长期熔断策略重复维护并导致日志噪声。
351
+ if (this.quotaView) {
352
+ return;
353
+ }
198
354
  // 配额恢复事件优先处理:一旦识别到 virtualRouterQuotaRecovery,
199
355
  // 直接清理健康状态/冷却 TTL,避免继续走常规错误映射逻辑。
200
356
  const handledByQuota = applyQuotaRecoveryImpl(event, this.healthManager, (key) => this.clearProviderCooldown(key), this.debug);
@@ -278,7 +434,8 @@ export class VirtualRouterEngine {
278
434
  contextAdvisor: this.contextAdvisor,
279
435
  loadBalancer: this.loadBalancer,
280
436
  isProviderCoolingDown: (key) => this.isProviderCoolingDown(key),
281
- resolveStickyKey: (m) => this.resolveStickyKey(m)
437
+ resolveStickyKey: (m) => this.resolveStickyKey(m),
438
+ quotaView: this.quotaView
282
439
  }, { routingState });
283
440
  }
284
441
  incrementRouteStat(routeName, providerKey) {
@@ -327,8 +484,34 @@ export class VirtualRouterEngine {
327
484
  }
328
485
  getRoutingInstructionState(stickyKey) {
329
486
  const key = stickyKey || 'default';
330
- if (this.routingInstructionState.has(key)) {
331
- return this.routingInstructionState.get(key);
487
+ const existing = this.routingInstructionState.get(key);
488
+ // 对 session:/conversation: 作用域,在每次读取时尝试从磁盘刷新 stopMessage 相关字段,
489
+ // 确保 servertool(如 stop_message_auto)通过 sticky-session-store 更新的使用次数
490
+ // 能在 VirtualRouter 日志中实时反映出来。
491
+ if (existing && (key.startsWith('session:') || key.startsWith('conversation:'))) {
492
+ try {
493
+ const persisted = loadRoutingInstructionStateSync(key);
494
+ if (persisted) {
495
+ // 以持久化状态为准(包括清空后的 undefined),避免 stopMessage 状态“卡死”在内存中。
496
+ existing.stopMessageText = persisted.stopMessageText;
497
+ existing.stopMessageMaxRepeats = persisted.stopMessageMaxRepeats;
498
+ existing.stopMessageUsed = persisted.stopMessageUsed;
499
+ existing.stopMessageUpdatedAt = persisted.stopMessageUpdatedAt;
500
+ existing.stopMessageLastUsedAt = persisted.stopMessageLastUsedAt;
501
+ }
502
+ else {
503
+ // 文件被删除或无法解析时,将内存中的 stopMessage 状态一并清空。
504
+ existing.stopMessageText = undefined;
505
+ existing.stopMessageMaxRepeats = undefined;
506
+ existing.stopMessageUsed = undefined;
507
+ existing.stopMessageUpdatedAt = undefined;
508
+ existing.stopMessageLastUsedAt = undefined;
509
+ }
510
+ }
511
+ catch {
512
+ // 刷新失败不影响原有内存状态
513
+ }
514
+ return existing;
332
515
  }
333
516
  let initial = null;
334
517
  // 仅对 session:/conversation: 作用域的 key 尝试从磁盘恢复持久化状态
@@ -28,6 +28,12 @@ export interface RoutingInstructionState {
28
28
  disabledProviders: Set<string>;
29
29
  disabledKeys: Map<string, Set<string | number>>;
30
30
  disabledModels: Map<string, Set<string>>;
31
+ /**
32
+ * Source of the current stopMessage configuration.
33
+ * - 'explicit':由用户通过 <** stopMessage:"..." **> 指令显式设置
34
+ * - 'auto':由系统基于空响应/错误自动推导(例如 Gemini 空回复)
35
+ */
36
+ stopMessageSource?: string;
31
37
  stopMessageText?: string;
32
38
  stopMessageMaxRepeats?: number;
33
39
  stopMessageUsed?: number;
@@ -398,11 +398,19 @@ export function applyRoutingInstructions(instructions, currentState) {
398
398
  ? Math.floor(instruction.stopMessageMaxRepeats)
399
399
  : 0;
400
400
  if (text && maxRepeats > 0) {
401
+ const sameText = typeof newState.stopMessageText === 'string' &&
402
+ newState.stopMessageText.trim() === text;
403
+ const sameMax = typeof newState.stopMessageMaxRepeats === 'number' &&
404
+ Math.floor(newState.stopMessageMaxRepeats) === maxRepeats;
405
+ const isSameInstruction = sameText && sameMax;
401
406
  newState.stopMessageText = text;
402
407
  newState.stopMessageMaxRepeats = maxRepeats;
403
- newState.stopMessageUsed = 0;
404
- newState.stopMessageUpdatedAt = Date.now();
405
- newState.stopMessageLastUsedAt = undefined;
408
+ newState.stopMessageSource = 'explicit';
409
+ if (!isSameInstruction) {
410
+ newState.stopMessageUsed = 0;
411
+ newState.stopMessageUpdatedAt = Date.now();
412
+ newState.stopMessageLastUsedAt = undefined;
413
+ }
406
414
  }
407
415
  break;
408
416
  }
@@ -410,6 +418,7 @@ export function applyRoutingInstructions(instructions, currentState) {
410
418
  newState.stopMessageText = undefined;
411
419
  newState.stopMessageMaxRepeats = undefined;
412
420
  newState.stopMessageUsed = undefined;
421
+ newState.stopMessageSource = undefined;
413
422
  newState.stopMessageUpdatedAt = undefined;
414
423
  newState.stopMessageLastUsedAt = undefined;
415
424
  break;
@@ -453,6 +462,9 @@ export function serializeRoutingInstructionState(state) {
453
462
  provider,
454
463
  models: Array.from(models)
455
464
  })),
465
+ ...(typeof state.stopMessageSource === 'string' && state.stopMessageSource.trim()
466
+ ? { stopMessageSource: state.stopMessageSource }
467
+ : {}),
456
468
  ...(typeof state.stopMessageText === 'string' && state.stopMessageText.trim()
457
469
  ? { stopMessageText: state.stopMessageText }
458
470
  : {}),
@@ -508,6 +520,9 @@ export function deserializeRoutingInstructionState(data) {
508
520
  }
509
521
  }
510
522
  }
523
+ if (typeof data.stopMessageSource === 'string' && data.stopMessageSource.trim()) {
524
+ state.stopMessageSource = data.stopMessageSource.trim();
525
+ }
511
526
  if (typeof data.stopMessageText === 'string' && data.stopMessageText.trim()) {
512
527
  state.stopMessageText = data.stopMessageText;
513
528
  }
@@ -1,3 +1,4 @@
1
1
  import type { RoutingInstructionState } from './routing-instructions.js';
2
2
  export declare function loadRoutingInstructionStateSync(key: string | undefined): RoutingInstructionState | null;
3
3
  export declare function saveRoutingInstructionStateAsync(key: string | undefined, state: RoutingInstructionState | null): void;
4
+ export declare function saveRoutingInstructionStateSync(key: string | undefined, state: RoutingInstructionState | null): void;