omp-cache-optimizer 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.ts +273 -346
  2. package/package.json +1 -1
package/index.ts CHANGED
@@ -312,6 +312,15 @@ type CacheUsageSample = {
312
312
  missingUsageFields: boolean;
313
313
  };
314
314
 
315
+ type PromptRewriteContext = {
316
+ options?: BuildSystemPromptOptions;
317
+ routeSnapshot?: PiRouteSnapshot;
318
+ routedModel?: PiModel;
319
+ timestamp: number;
320
+ };
321
+
322
+ const PROMPT_REWRITE_CONTEXT_TTL_MS = 10_000;
323
+
315
324
  /** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
316
325
  const MAX_RECENT_SAMPLES = 50;
317
326
 
@@ -968,16 +977,42 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
968
977
  */
969
978
  function getCompat(model: PiModel | undefined): CacheCompat {
970
979
  if (!model) return {} as CacheCompat;
971
-
972
- // The host runtime merges provider.compat with model.compat (model wins on conflicts).
973
- // We approximate this by reading from ctx.model which should already have merged compat.
974
- // However, for safety, we check both levels if available.
975
- const modelCompat = (model.compat ?? {}) as CacheCompat;
976
980
 
977
- // Note: ctx.model from the host runtime should already contain merged compat,
978
- // but we document the two-level structure for clarity.
979
- // but we document the two-level structure for clarity
980
- return modelCompat;
981
+ const record = model as PiModel & { compatConfig?: Record<string, unknown> };
982
+ return {
983
+ ...((record.compatConfig ?? {}) as CacheCompat),
984
+ ...((record.compat ?? {}) as CacheCompat),
985
+ };
986
+ }
987
+
988
+ function makePromptRewriteContextKey(sessionHash: string | undefined, model: PiModel | undefined): string | undefined {
989
+ if (!sessionHash || !model) return undefined;
990
+ return `${sessionHash}:${modelKey(model)}`;
991
+ }
992
+
993
+ function rememberPromptRewriteContext(
994
+ contexts: Map<string, PromptRewriteContext>,
995
+ key: string | undefined,
996
+ context: PromptRewriteContext,
997
+ ): void {
998
+ if (!key) return;
999
+ contexts.set(key, context);
1000
+ }
1001
+
1002
+ function getPromptRewriteContext(
1003
+ contexts: Map<string, PromptRewriteContext>,
1004
+ key: string | undefined,
1005
+ now = Date.now(),
1006
+ ttlMs = PROMPT_REWRITE_CONTEXT_TTL_MS,
1007
+ ): PromptRewriteContext | undefined {
1008
+ if (!key) return undefined;
1009
+ const context = contexts.get(key);
1010
+ if (!context) return undefined;
1011
+ if (now - context.timestamp > ttlMs) {
1012
+ contexts.delete(key);
1013
+ return undefined;
1014
+ }
1015
+ return context;
981
1016
  }
982
1017
 
983
1018
  /**
@@ -1033,18 +1068,18 @@ function isRuntimeOptimizerEnabled(): boolean {
1033
1068
  }
1034
1069
 
1035
1070
  function getOptimizerRuntimeModeLines(): string[] {
1036
- const state = runtimeOptimizerEnabled ? "enabled" : "disabled";
1071
+ const state = runtimeOptimizerEnabled ? "已启用" : "已关闭";
1037
1072
  const lines: string[] = [];
1038
- lines.push(`Runtime state: ${state}`);
1039
- lines.push(`• Prompt rewrite: ${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "on" : "off"}`);
1040
- lines.push(`• OpenAI prompt_cache_key fallback: ${shouldInjectOpenAIPromptCacheKey() ? "on" : "off"}`);
1041
- lines.push(`• Footer cache stats: on${runtimeOptimizerEnabled ? "" : " (comparison mode)"}`);
1042
- lines.push(`• Compat warnings: ${runtimeOptimizerEnabled ? "on" : "off"}`);
1043
- lines.push(`• ${PI_CACHE_RETENTION_ENV}: ${process.env[PI_CACHE_RETENTION_ENV] ?? "(unset)"}`);
1073
+ lines.push(`运行状态:${state}`);
1074
+ lines.push(`• Prompt 重写:${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "开启" : "关闭"}`);
1075
+ lines.push(`• OpenAI prompt_cache_key 回退:${shouldInjectOpenAIPromptCacheKey() ? "开启" : "关闭"}`);
1076
+ lines.push(`• Footer 缓存统计:开启${runtimeOptimizerEnabled ? "" : "(对比模式)"}`);
1077
+ lines.push(`• Compat 提示:${runtimeOptimizerEnabled ? "开启" : "关闭"}`);
1078
+ lines.push(`• ${PI_CACHE_RETENTION_ENV}:${process.env[PI_CACHE_RETENTION_ENV] ?? "(未设置)"}`);
1044
1079
  if (!runtimeOptimizerEnabled) {
1045
- lines.push("This is a current-process switch. Run /reload or restart OMP to return to startup behavior.");
1080
+ lines.push("这是当前进程内开关。运行 /reload 或重启 OMP 可恢复到启动时行为。");
1046
1081
  } else if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) || !shouldInjectOpenAIPromptCacheKey()) {
1047
- lines.push("Some features are still disabled by environment variables.");
1082
+ lines.push("仍有部分能力被环境变量关闭。");
1048
1083
  }
1049
1084
  return lines;
1050
1085
  }
@@ -1179,9 +1214,9 @@ function buildAdaptiveThinkingCompatSuggestion(_missing: string[]): Record<strin
1179
1214
  }
1180
1215
 
1181
1216
  function appendAdaptiveThinkingCompatAdviceLines(lines: string[], _missing: string[], placement: CompatAdvicePlacement = {}): void {
1182
- lines.push("- Adaptive thinking: OMP's built-in model catalog sets this automatically for official Claude models.");
1183
- lines.push(" Custom channels fronting Anthropic should rely on the bundled catalog metadata;");
1184
- lines.push(" if the upstream rejects adaptive thinking, verify the model id matches an official release.");
1217
+ lines.push("- 自适应思考:OMP 内置模型目录会为官方 Claude 模型自动设置。");
1218
+ lines.push(" 自定义 Anthropic 渠道应依赖内置 catalog 元数据;");
1219
+ lines.push(" 如果上游拒绝 adaptive thinking,请确认模型 id 是否匹配官方发布版本。");
1185
1220
  appendCredentialSafeProviderGuidance(lines, placement, {});
1186
1221
  }
1187
1222
 
@@ -1191,10 +1226,10 @@ function buildAdaptiveThinkingCompatWarningText(key: string, _missing: string[])
1191
1226
  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
1192
1227
  const modelsJsonPath = getModelsJsonDisplayPath();
1193
1228
  const lines: string[] = [
1194
- `ℹ️ omp-cache-optimizer: ${key} is an adaptive-generation Claude model.`,
1195
- `OMP's built-in catalog handles adaptive thinking automatically; no models.yml compat key is needed`,
1196
- `for official models. Custom channels fronting Anthropic may need explicit catalog metadata.`,
1197
- `See ${modelsJsonPath} -> providers["${providerLabel}"] -> models -> "${modelId ?? "<id>"}".`,
1229
+ `ℹ️ omp-cache-optimizer:${key} 是支持自适应生成的 Claude 模型。`,
1230
+ "OMP 内置 catalog 会自动处理自适应思考;官方模型不需要额外的 models.yml compat 键。",
1231
+ "如果是转发 Anthropic 的自定义渠道,可能仍需要显式 catalog 元数据。",
1232
+ `可参考 ${modelsJsonPath} -> providers["${providerLabel}"] -> models -> "${modelId ?? '<id>'}"。`,
1198
1233
  "",
1199
1234
  ];
1200
1235
  appendAdaptiveThinkingCompatAdviceLines(lines, [], { providerLabel, modelId });
@@ -1972,13 +2007,6 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
1972
2007
  return true;
1973
2008
  }
1974
2009
  if (Array.isArray(record.system) && record.system.length > 0) {
1975
- // Replace first text block, keep structure
1976
- const first = asRecord(record.system[0]);
1977
- if (first && typeof first.text === "string") {
1978
- first.text = text;
1979
- return true;
1980
- }
1981
- // Fallback: convert to single-block string form
1982
2010
  record.system = [{ type: "text", text }];
1983
2011
  return true;
1984
2012
  }
@@ -1986,11 +2014,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
1986
2014
  // google-generative-ai: payload.systemInstruction
1987
2015
  const systemInstruction = asRecord(record.systemInstruction);
1988
2016
  if (systemInstruction && Array.isArray(systemInstruction.parts) && systemInstruction.parts.length > 0) {
1989
- const firstPart = asRecord(systemInstruction.parts[0]);
1990
- if (firstPart && typeof firstPart.text === "string") {
1991
- firstPart.text = text;
1992
- return true;
1993
- }
2017
+ systemInstruction.parts = [{ text }];
2018
+ return true;
1994
2019
  }
1995
2020
 
1996
2021
  // openai-completions / openai-responses: payload.messages[] first system/developer message
@@ -2005,11 +2030,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
2005
2030
  return true;
2006
2031
  }
2007
2032
  if (Array.isArray(r.content) && r.content.length > 0) {
2008
- const first = asRecord(r.content[0]);
2009
- if (first && typeof first.text === "string") {
2010
- first.text = text;
2011
- return true;
2012
- }
2033
+ r.content = text;
2034
+ return true;
2013
2035
  }
2014
2036
  }
2015
2037
  }
@@ -2081,7 +2103,7 @@ function buildSafeOpenAIProxyCompatSuggestion(_missing: string[]): Record<string
2081
2103
  }
2082
2104
 
2083
2105
  function getPromptCacheRetentionUnsupportedHint(): string {
2084
- return "If this channel returns `400 Unsupported parameter: prompt_cache_retention`, remove/avoid `supportsLongPromptCacheRetention`; this extension does not write that field directly, but OMP may send it when long retention is requested and compat says the proxy supports it.";
2106
+ return "如果这个渠道返回 `400 Unsupported parameter: prompt_cache_retention`,请移除或避免 `supportsLongPromptCacheRetention`;扩展本身不会直接写这个字段,但当 compat 声明支持长缓存保留时,OMP 可能会发送它。";
2085
2107
  }
2086
2108
 
2087
2109
  function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
@@ -2135,20 +2157,20 @@ function appendCredentialSafeProviderGuidance(lines: string[], placement: Compat
2135
2157
  if (!providerLabel) return;
2136
2158
 
2137
2159
  lines.push("");
2138
- lines.push("If this channel has no models.yml provider entry yet:");
2139
- lines.push("- Keep existing authentication as-is; do not copy credentials, tokens, or API keys.");
2140
- lines.push(`- Add only cache/routing compat overrides in ${getModelsJsonDisplayPath()}.`);
2160
+ lines.push("如果这个渠道在 models.yml 里还没有 provider 配置:");
2161
+ lines.push("- 保留现有认证方式;不要复制 credential、token API key。");
2162
+ lines.push(`- 只在 ${getModelsJsonDisplayPath()} 里添加缓存/路由 compat 覆盖。`);
2141
2163
 
2142
2164
  if (Object.keys(compatSuggestion).length === 0) {
2143
- lines.push("- No safe copyable override is available for the missing flags shown above.");
2165
+ lines.push("- 上面这些缺失项目前没有安全可复制的 override");
2144
2166
  return;
2145
2167
  }
2146
2168
 
2147
- lines.push("Provider-level minimal override:");
2169
+ lines.push("Provider 级最小覆盖:");
2148
2170
  lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
2149
2171
 
2150
2172
  if (placement.modelId) {
2151
- lines.push("Single-model override (use this if only this model should change):");
2173
+ lines.push("单模型 override(只想影响当前模型时使用):");
2152
2174
  lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
2153
2175
  }
2154
2176
  }
@@ -2159,21 +2181,19 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
2159
2181
 
2160
2182
  if (hasSafeSuggestion) {
2161
2183
  if (options.includeJsonIntro !== false) {
2162
- lines.push("Safe default suggestion:");
2184
+ lines.push("安全默认建议:");
2163
2185
  }
2164
2186
  lines.push(JSON.stringify(suggestion, null, 2));
2165
2187
  }
2166
2188
 
2167
- // OMP divergence: session affinity is handled by multi-credential auth, not compat.
2168
- // No per-flag advice lines remain; only the optional long-retention guidance below.
2169
2189
  appendCredentialSafeProviderGuidance(lines, options, suggestion);
2170
2190
  }
2171
2191
 
2172
2192
  function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: string[]): void {
2173
2193
  if (!optional.includes("supportsLongPromptCacheRetention")) return;
2174
2194
  lines.push("");
2175
- lines.push("Optional (not required, not auto-fixed):");
2176
- lines.push("- supportsLongPromptCacheRetention: enable only after your endpoint/proxy explicitly supports OpenAI long prompt cache retention.");
2195
+ lines.push("可选项(非必需,不会自动修复):");
2196
+ lines.push("- supportsLongPromptCacheRetention:仅当 endpoint / proxy 明确支持 OpenAI long prompt cache retention 时再开启。");
2177
2197
  lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
2178
2198
  }
2179
2199
 
@@ -2190,17 +2210,15 @@ function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: s
2190
2210
  * exercise it via __internals_for_tests.
2191
2211
  */
2192
2212
  function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
2193
- // Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
2194
- // If no slash is found, fall back to the key itself.
2195
2213
  const slashIdx = key.indexOf("/");
2196
2214
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
2197
2215
  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
2198
2216
 
2199
2217
  const modelsJsonPath = getModelsJsonDisplayPath();
2200
2218
  const lines: string[] = [
2201
- `💡 omp-cache-optimizer: ${key} is a third-party GPT/OpenAI-compatible proxy but merged compat lacks ${missing.join(" and ")}.`,
2202
- `Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`,
2203
- ``,
2219
+ `💡 omp-cache-optimizer:${key} 是第三方 GPT/OpenAI 兼容代理,但合并后的 compat 缺少 ${missing.join(" ")}。`,
2220
+ `编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
2221
+ "",
2204
2222
  ];
2205
2223
 
2206
2224
  appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
@@ -2259,20 +2277,16 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
2259
2277
  function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
2260
2278
  const suggestion = buildDeepSeekCompatSuggestion(missing);
2261
2279
  if (Object.keys(suggestion).length > 0) {
2262
- lines.push("Recommended DeepSeek compat snippet:");
2280
+ lines.push("推荐的 DeepSeek compat 片段:");
2263
2281
  lines.push(JSON.stringify(suggestion, null, 2));
2264
2282
  }
2265
2283
 
2266
2284
  if (missing.includes("requiresReasoningContentForToolCalls")) {
2267
- lines.push("- requiresReasoningContentForToolCalls: true keeps replayed assistant tool-call turns compatible with DeepSeek reasoning_content requirements.");
2285
+ lines.push("- requiresReasoningContentForToolCalls:保持带工具调用的 assistant 重放与 DeepSeek reasoning_content 要求兼容。");
2268
2286
  }
2269
2287
  if (missing.includes("supportsLongPromptCacheRetention")) {
2270
- lines.push("- supportsLongPromptCacheRetention: enable for DeepSeek-compatible endpoints that support long cache retention.");
2288
+ lines.push("- supportsLongPromptCacheRetention:仅当 DeepSeek 兼容 endpoint 支持长缓存保留时再开启。");
2271
2289
  }
2272
- // OMP divergence: thinkingFormat is no longer flagged. DeepSeek reasoning format
2273
- // is auto-detected by OMP's openai-completions transport; the "deepseek" value
2274
- // is not a valid OMP thinkingFormat (OMP uses openai|openrouter|zai|qwen|...).
2275
- // Session affinity is handled by OMP multi-credential auth, not compat keys.
2276
2290
 
2277
2291
  appendCredentialSafeProviderGuidance(lines, placement, suggestion);
2278
2292
  }
@@ -2283,8 +2297,8 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
2283
2297
  const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
2284
2298
  const modelsJsonPath = getModelsJsonDisplayPath();
2285
2299
  const lines: string[] = [
2286
- `💡 omp-cache-optimizer: ${key} is DeepSeek-like but merged compat lacks ${missing.join(" and ")}.`,
2287
- `Proxies may reduce or hide cache hits. Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (at the same level as baseUrl/api/apiKey/models).`,
2300
+ `💡 omp-cache-optimizer:${key} 看起来是 DeepSeek 风格模型,但合并后的 compat 缺少 ${missing.join(" ")}。`,
2301
+ `这可能让代理降低或隐藏缓存命中。编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
2288
2302
  "",
2289
2303
  ];
2290
2304
 
@@ -2332,8 +2346,8 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
2332
2346
  if (getCompat(model).cacheControlFormat === "anthropic") return undefined;
2333
2347
 
2334
2348
  return (
2335
- `💡 Cache optimizer: ${modelKey(model)} looks Claude/Anthropic-like but OpenAI-compatible compat lacks cacheControlFormat: "anthropic". ` +
2336
- "OMP may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
2349
+ `💡 omp-cache-optimizer:${modelKey(model)} 看起来是 Claude/Anthropic 风格模型,但 OpenAI 兼容 compat 缺少 cacheControlFormat: "anthropic"。` +
2350
+ "只有当 endpoint 支持并启用了这个 compat 字段时,OMP 才能放置 Anthropic cache_control 断点。"
2337
2351
  );
2338
2352
  },
2339
2353
  },
@@ -3330,30 +3344,26 @@ function formatTokenCount(value: number): string {
3330
3344
  return `${millions.toFixed(2)}M`;
3331
3345
  }
3332
3346
 
3347
+ function localizeAdapterLabel(label: string): string {
3348
+ return label.endsWith(" cache") ? `${label.slice(0, -6)} 缓存` : label;
3349
+ }
3350
+
3333
3351
  function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): string {
3334
3352
  const percent = stats.totalInputTokens > 0
3335
3353
  ? ` (${Math.round((stats.cachedInputTokens / stats.totalInputTokens) * 100)}%)`
3336
3354
  : "";
3337
3355
  const writeText = adapter.showCacheWrite && stats.cacheWriteInputTokens > 0
3338
- ? ` · write ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
3356
+ ? ` · 写入 ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
3339
3357
  : "";
3340
3358
 
3341
- return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
3359
+ return `${localizeAdapterLabel(adapter.label)} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
3342
3360
  }
3343
3361
 
3344
- /**
3345
- * Compute a hit-ratio percentage string for a value between 0 and 1.
3346
- * Returns e.g. "75%", "0%", "100%", or "N/A" for zero total.
3347
- */
3348
3362
  function formatHitRatio(hits: number, total: number): string {
3349
- if (total <= 0) return "N/A";
3363
+ if (total <= 0) return "无数据";
3350
3364
  return `${Math.round((hits / total) * 100)}%`;
3351
3365
  }
3352
3366
 
3353
- /**
3354
- * Format a token-to-M abbreviation for stats output.
3355
- * Example: 1500000 → "1.50M"
3356
- */
3357
3367
  function formatTokenM(value: number): string {
3358
3368
  const millions = Math.max(0, Math.round(value)) / 1_000_000;
3359
3369
  if (millions === 0) return "0";
@@ -3362,27 +3372,18 @@ function formatTokenM(value: number): string {
3362
3372
  return millions.toFixed(2);
3363
3373
  }
3364
3374
 
3365
- /**
3366
- * Check if an assistant message's usage fields appear to be missing or empty.
3367
- * Returns true when normalized fields (input, cacheRead, cacheWrite) are all
3368
- * absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
3369
- * for the given adapter.
3370
- */
3371
3375
  function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter): boolean {
3372
3376
  const usage = usageRecordFromAssistant(message);
3373
3377
  if (!usage) return true;
3374
3378
 
3375
- // Check normalized fields
3376
3379
  const input = getNonNegativeNumber(usage, "input");
3377
3380
  const cacheRead = getNonNegativeNumber(usage, "cacheRead");
3378
3381
  const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
3379
3382
 
3380
- // If normalized fields exist with non-zero values, usage is present
3381
3383
  if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
3382
3384
  return false;
3383
3385
  }
3384
3386
 
3385
- // Check raw usage for the adapter's provider family
3386
3387
  const rawUsage = adapter.normalizeUsage(message);
3387
3388
  if (!rawUsage || (rawUsage.cacheRead === 0 && rawUsage.cacheWrite === 0 && rawUsage.totalInput === 0)) {
3388
3389
  return true;
@@ -3391,64 +3392,55 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
3391
3392
  return false;
3392
3393
  }
3393
3394
 
3394
- /**
3395
- * Build a summary string for the recent trend (last N samples).
3396
- * Example: "Recent 10: 7/10 hits · 65% tok cached · no missing usage"
3397
- */
3398
3395
  function formatRecentTrendSummary(samples: CacheUsageSample[], maxCount: number): string {
3399
3396
  const recent = samples.slice(-maxCount);
3400
- if (recent.length === 0) return `Recent ${maxCount}: no samples yet`;
3397
+ if (recent.length === 0) return `最近 ${maxCount} 次:暂无样本`;
3401
3398
 
3402
3399
  const hits = recent.filter((s) => s.hit).length;
3403
3400
  const totalCached = recent.reduce((sum, s) => sum + s.cachedInputTokens, 0);
3404
3401
  const totalInput = recent.reduce((sum, s) => sum + s.totalInputTokens, 0);
3405
3402
  const missingCount = recent.filter((s) => s.missingUsageFields).length;
3406
3403
 
3407
- const hitRatio = formatHitRatio(hits, recent.length);
3408
- const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "N/A";
3404
+ const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "无数据";
3409
3405
 
3410
- let result = `Recent ${recent.length}/${maxCount}: ${hits}/${recent.length} hits · ${tokenRatio} tok cached`;
3406
+ let result = `最近 ${recent.length}/${maxCount} 次:${hits}/${recent.length} 次命中 · ${tokenRatio} tok 已缓存`;
3411
3407
  if (missingCount > 0) {
3412
- result += ` · ${missingCount} missing usage`;
3408
+ result += ` · ${missingCount} usage 缺失`;
3413
3409
  }
3414
3410
  return result;
3415
3411
  }
3416
3412
 
3417
- /**
3418
- * Build the output for `/cache-optimizer stats`.
3419
- */
3420
3413
  function buildStatsOutput(model: PiModel | undefined, adapter: CacheProviderAdapter | undefined, stats: CacheStats | undefined, recentSamples: CacheUsageSample[]): string {
3421
3414
  const lines: string[] = [];
3422
3415
 
3423
3416
  if (!model || !adapter) {
3424
- lines.push("ℹ️ No cache-adapter-matched model active. Select a model with a recognized provider family.");
3417
+ lines.push("ℹ️ 当前活动模型未匹配到缓存适配器。请选择可识别模型家族后再查看统计。");
3425
3418
  return lines.join("\n");
3426
3419
  }
3427
3420
 
3428
3421
  const key = modelKey(model);
3429
3422
  const currentStats = stats ?? emptyCacheStats();
3430
3423
 
3431
- lines.push(`Model key: ${key}`);
3432
- lines.push(`Adapter: ${adapter.label}`);
3424
+ lines.push(`模型键:${key}`);
3425
+ lines.push(`适配器:${localizeAdapterLabel(adapter.label)}`);
3433
3426
  lines.push("");
3434
- lines.push("── Today ──");
3435
- lines.push(`Requests: ${currentStats.hitRequests} hit / ${currentStats.totalRequests} total · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
3436
- lines.push(`Cached tokens: ${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M input · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "N/A"}`);
3427
+ lines.push("── 今日 ──");
3428
+ lines.push(`请求数:${currentStats.hitRequests} 次命中 / ${currentStats.totalRequests} 次总计 · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
3429
+ lines.push(`缓存 tokens:${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M 输入 · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "无数据"}`);
3437
3430
  if (currentStats.cacheWriteInputTokens > 0) {
3438
- lines.push(`Cache write: ${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
3431
+ lines.push(`缓存写入:${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
3439
3432
  }
3440
3433
 
3441
3434
  lines.push("");
3442
- lines.push("── Recent trend ──");
3435
+ lines.push("── 近期趋势 ──");
3443
3436
  lines.push(formatRecentTrendSummary(recentSamples, 10));
3444
3437
  lines.push(formatRecentTrendSummary(recentSamples, 30));
3445
3438
 
3446
- // Check if any sample has missingUsageFields flagged
3447
3439
  const missingAny = recentSamples.some((s) => s.missingUsageFields);
3448
3440
  if (missingAny) {
3449
3441
  lines.push("");
3450
- lines.push("⚠️ Some recent responses had missing or empty cache usage fields. Footer may under-report hits.");
3451
- lines.push(" The proxy may not return prompt_cache_hit_tokens or usage.input/cacheRead in responses.");
3442
+ lines.push("⚠️ 近期有响应缺少或返回了空的缓存 usage 字段,footer 命中率可能偏低。");
3443
+ lines.push(" 代理可能没有返回 prompt_cache_hit_tokens,或没有返回 usage.input/cacheRead 等字段。");
3452
3444
  }
3453
3445
 
3454
3446
  return lines.join("\n");
@@ -3889,19 +3881,14 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3889
3881
  const baseUrl = lower(model.baseUrl || "");
3890
3882
  const provider = lower(model.provider);
3891
3883
 
3892
- // Router/channel diagnostics only apply to OpenAI-compatible proxy APIs.
3893
- // Native APIs like mistral-conversations, azure-openai-responses,
3894
- // anthropic-messages, or bedrock-converse-stream are intentionally excluded.
3895
3884
  if (api === "azure-openai-responses" || isMistralConversationsApi(api) || !isOpenAICompatibleApi(api)) {
3896
3885
  return notes;
3897
3886
  }
3898
3887
 
3899
- // Official OpenAI bypass — no notes needed.
3900
3888
  if (isOfficialOpenAIBaseUrl(model)) {
3901
3889
  return notes;
3902
3890
  }
3903
3891
 
3904
- // ── 1. OpenRouter ────────────────────────────────────────────────
3905
3892
  if (
3906
3893
  baseUrl.includes("openrouter.ai") ||
3907
3894
  baseUrl.includes("openrouter") ||
@@ -3913,32 +3900,28 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3913
3900
  const hasOrder = !!routing?.order;
3914
3901
 
3915
3902
  notes.push(
3916
- "🔀 Router/channel: OpenRouter detected. OpenRouter is a multi-provider router; " +
3917
- "low cache hit rates are common when each turn lands on a different upstream provider.",
3903
+ "🔀 路由/渠道:检测到 OpenRouterOpenRouter 是多上游路由器;如果每一轮落到不同上游,缓存命中率偏低很常见。",
3918
3904
  );
3919
3905
 
3920
3906
  if (!hasOnly && !hasOrder) {
3921
3907
  notes.push(
3922
- " Suggestion: Add an openRouterRouting config to fix the upstream provider. " +
3923
- "Example for models.yml -> providers[\"<providerId>\"] -> compat:",
3908
+ ' 建议:添加 openRouterRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
3924
3909
  );
3925
3910
  notes.push(
3926
3911
  ` { "supportsLongPromptCacheRetention": true, ` +
3927
3912
  `"openRouterRouting": { "only": ["<provider-slug>"] } }`,
3928
3913
  );
3929
3914
  notes.push(
3930
- ' Replace <provider-slug> with the actual OpenRouter provider slug (e.g. "openai", "anthropic").',
3915
+ ' <provider-slug> 替换成真实的 OpenRouter provider slug(如 "openai""anthropic")。',
3931
3916
  );
3932
3917
  notes.push(
3933
- " Alternatively, use openRouterRouting.order: [\"<provider-slug>\", \"...\"] for fallback order. " +
3934
- "Only set supportsLongPromptCacheRetention if your upstream supports long cache retention.",
3918
+ ' 也可以用 openRouterRouting.order: ["<provider-slug>", "..."] 作为回退顺序。只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention。',
3935
3919
  );
3936
3920
  }
3937
3921
 
3938
3922
  return notes;
3939
3923
  }
3940
3924
 
3941
- // ── 2. Vercel AI Gateway ─────────────────────────────────────────
3942
3925
  if (
3943
3926
  baseUrl.includes("ai-gateway.vercel.sh") ||
3944
3927
  provider.includes("vercel") ||
@@ -3950,81 +3933,54 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
3950
3933
  const hasOrder = !!routing?.order;
3951
3934
 
3952
3935
  notes.push(
3953
- "🔀 Router/channel: Vercel AI Gateway detected. The gateway may route to different " +
3954
- "provider endpoints per request, reducing cache locality.",
3936
+ "🔀 路由/渠道:检测到 Vercel AI Gateway。这个网关可能把不同请求分发到不同 provider endpoint,降低缓存局部性。",
3955
3937
  );
3956
3938
 
3957
3939
  if (!hasOnly && !hasOrder) {
3958
3940
  notes.push(
3959
- " Suggestion: Add a vercelGatewayRouting config to fix the upstream. " +
3960
- "Example for models.yml -> providers[\"<providerId>\"] -> compat:",
3941
+ ' 建议:添加 vercelGatewayRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
3961
3942
  );
3962
3943
  notes.push(
3963
3944
  ` { "supportsLongPromptCacheRetention": true, ` +
3964
3945
  `"vercelGatewayRouting": { "only": ["<provider-id>"] } }`,
3965
3946
  );
3966
3947
  notes.push(
3967
- " Replace <provider-id> with the actual Vercel provider ID (e.g. \"openai\").",
3948
+ ' <provider-id> 替换成真实的 Vercel provider ID(如 "openai")。',
3968
3949
  );
3969
3950
  notes.push(
3970
- " Only set supportsLongPromptCacheRetention if your upstream supports it.",
3951
+ " 只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention",
3971
3952
  );
3972
3953
  }
3973
3954
 
3974
3955
  return notes;
3975
3956
  }
3976
3957
 
3977
- // ── 3. LiteLLM / OneAPI / NewAPI / VoAPI (self-hosted aggregation) ──
3978
3958
  const aggregationPatterns = ["litellm", "oneapi", "one-api", "newapi", "new-api", "voapi", "vo-api"];
3979
3959
  if (
3980
3960
  aggregationPatterns.some((p) => baseUrl.includes(p)) ||
3981
3961
  aggregationPatterns.some((p) => provider.includes(p))
3982
3962
  ) {
3983
3963
  notes.push(
3984
- "🔀 Router/channel: Self-hosted aggregation proxy detected (LiteLLM / OneAPI / NewAPI / VoAPI). " +
3985
- "These proxies route to multiple upstream accounts or instances, which can split the cache.",
3986
- );
3987
- notes.push(
3988
- " Suggestions:",
3989
- );
3990
- notes.push(
3991
- " • Ensure the proxy can fix to a single upstream per session (session_id affinity).",
3992
- );
3993
- notes.push(
3994
- " • Forward prompt_cache_key and session-affinity headers to the upstream.",
3995
- );
3996
- notes.push(
3997
- " • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
3998
- );
3999
- notes.push(
4000
- ` Safe compat default: { "supportsLongPromptCacheRetention": true }`,
4001
- );
4002
- notes.push(
4003
- ` Add supportsLongPromptCacheRetention only if the proxy explicitly supports prompt_cache_retention.`,
3964
+ "🔀 路由/渠道:检测到自建聚合代理(LiteLLM / OneAPI / NewAPI / VoAPI)。这类代理常把请求分到多个上游账号或实例,导致缓存被拆散。",
4004
3965
  );
3966
+ notes.push(" 建议:");
3967
+ notes.push(" • 确保代理能按 session 固定到单一上游(session_id affinity)。");
3968
+ notes.push(" • 向上游透传 prompt_cache_key 与会话亲和性相关 header。");
3969
+ notes.push(" • 在响应里返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
3970
+ notes.push(` 可作为起点的 compat:{ "supportsLongPromptCacheRetention": true }`);
3971
+ notes.push(" 只有在代理明确支持 prompt_cache_retention 时才加 supportsLongPromptCacheRetention。");
4005
3972
 
4006
3973
  return notes;
4007
3974
  }
4008
3975
 
4009
- // ── 4. Generic third-party OpenAI-compatible proxy ─────────────────
4010
3976
  if (api === "openai-completions" && baseUrl) {
4011
3977
  const missing = describeMissingCacheCompatForModel(model);
4012
- notes.push(
4013
- "🔀 Router/channel: Third-party OpenAI-compatible proxy. If cache hit rates are low:",
4014
- );
4015
- notes.push(
4016
- " • Verify the proxy routes to the same upstream account/instance per session.",
4017
- );
4018
- notes.push(
4019
- " • Ensure the proxy forwards prompt_cache_key and sends session-affinity headers.",
4020
- );
4021
- notes.push(
4022
- " • Check that the proxy returns cache usage fields (prompt_cache_hit_tokens etc.).",
4023
- );
3978
+ notes.push("🔀 路由/渠道:第三方 OpenAI 兼容代理。如果缓存命中率偏低:");
3979
+ notes.push(" 确认代理会把同一 session 路由到同一个上游账号/实例。");
3980
+ notes.push(" • 确认代理会透传 prompt_cache_key,并发送会话亲和性相关 header。");
3981
+ notes.push(" • 确认代理会返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
4024
3982
  if (missing.length > 0) {
4025
- notes.push(
4026
- ` • The compat flags above (${missing.join(", ")}) are recommended for cache stability.`,
4027
- );
3983
+ notes.push(` • 上面这些 compat 字段(${missing.join(", ")})有助于提升缓存稳定性。`);
4028
3984
  }
4029
3985
 
4030
3986
  return notes;
@@ -4038,38 +3994,38 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
4038
3994
 
4039
3995
  if (isMistralConversationsApi(api)) {
4040
3996
  return [
4041
- "ℹ️ Compat check not applicable for this model.",
4042
- " Native Mistral `mistral-conversations` uses provider-native transport; OpenAI-compatible proxy compat flags do not apply.",
3997
+ "ℹ️ 当前模型不适用 compat 检查。",
3998
+ " 原生 Mistral `mistral-conversations` 使用 provider 原生传输;OpenAI 兼容代理 compat 不适用。",
4043
3999
  ];
4044
4000
  }
4045
4001
 
4046
4002
  if (api === "azure-openai-responses") {
4047
4003
  return [
4048
- "ℹ️ Compat check not applicable for this model.",
4049
- " Native Azure OpenAI Responses uses the Responses transport; OpenAI-compatible proxy compat flags do not apply.",
4004
+ "ℹ️ 当前模型不适用 compat 检查。",
4005
+ " 原生 Azure OpenAI Responses 使用 Responses 传输;OpenAI 兼容代理 compat 不适用。",
4050
4006
  ];
4051
4007
  }
4052
4008
 
4053
4009
  if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
4054
4010
  return [
4055
- "ℹ️ Compat check not applicable for this model.",
4056
- " Native Responses transports already use core runtime request handling; OpenAI-compatible proxy compat flags do not apply.",
4011
+ "ℹ️ 当前模型不适用 compat 检查。",
4012
+ " 原生 Responses 传输已经使用运行时核心请求链路;OpenAI 兼容代理 compat 不适用。",
4057
4013
  ];
4058
4014
  }
4059
4015
 
4060
- return ["ℹ️ Compat check not applicable for this model."];
4016
+ return ["ℹ️ 当前模型不适用 compat 检查。"];
4061
4017
  }
4062
4018
 
4063
4019
  function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
4064
4020
  const lines: string[] = [];
4065
- lines.push(`Provider: ${model.provider}`);
4066
- lines.push(`Model: ${model.id}`);
4067
- if (model.name && model.name !== model.id) lines.push(`Name: ${model.name}`);
4068
- lines.push(`API: ${model.api}`);
4069
- lines.push(`Base URL: ${model.baseUrl || "(default)"}`);
4021
+ lines.push(`提供方:${model.provider}`);
4022
+ lines.push(`模型: ${model.id}`);
4023
+ if (model.name && model.name !== model.id) lines.push(`名称: ${model.name}`);
4024
+ lines.push(`API ${model.api}`);
4025
+ lines.push(`Base URL ${model.baseUrl || "(默认)"}`);
4070
4026
 
4071
4027
  const compat = getCompat(model);
4072
- lines.push(`Compat: ${JSON.stringify(compat)}`);
4028
+ lines.push(`Compat ${JSON.stringify(compat)}`);
4073
4029
 
4074
4030
  const adaptiveThinkingApplicable = isAdaptiveThinkingCompatApplicable(model);
4075
4031
  const deepSeekCompatApplicable = isDeepSeekCompatCheckApplicable(model);
@@ -4082,10 +4038,10 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4082
4038
  const advisoryMissing = missing.filter(m => !safeFixableMissing.includes(m));
4083
4039
 
4084
4040
  if (safeFixableMissing.length > 0) {
4085
- lines.push(`⚠️ Missing compat flags: ${safeFixableMissing.join(", ")}`);
4041
+ lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissing.join(", ")}`);
4086
4042
  }
4087
4043
  if (advisoryMissing.length > 0) {
4088
- lines.push(`ℹ️ Optional: ${advisoryMissing.join(", ")} (enable only if needed)`);
4044
+ lines.push(`ℹ️ 可选项:${advisoryMissing.join(", ")}(仅在确认支持时启用)`);
4089
4045
  }
4090
4046
 
4091
4047
  if (missing.length > 0) {
@@ -4093,7 +4049,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4093
4049
  const slashIdx = key.indexOf("/");
4094
4050
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
4095
4051
  const modelsJsonPath = getModelsJsonDisplayPath();
4096
- lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat (same level as baseUrl/api/apiKey/models).`);
4052
+ lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`);
4097
4053
  if (adaptiveThinkingApplicable) {
4098
4054
  appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
4099
4055
  } else if (deepSeekCompatApplicable) {
@@ -4103,7 +4059,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4103
4059
  appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
4104
4060
  }
4105
4061
  } else if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
4106
- lines.push("✅ Compat fully configured.");
4062
+ lines.push("✅ compat 配置完整。");
4107
4063
  appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
4108
4064
  } else {
4109
4065
  lines.push(...getCompatCheckNotApplicableLines(model));
@@ -4112,14 +4068,13 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4112
4068
  if (isPromptCacheRetention400Applicable(model)) {
4113
4069
  lines.push("");
4114
4070
  if (options.promptCacheRetention400) {
4115
- lines.push("⚠️ A 400 response was observed while supportsLongPromptCacheRetention is enabled.");
4071
+ lines.push("⚠️ 在启用 supportsLongPromptCacheRetention 时观测到一次 400 响应。");
4116
4072
  lines.push(` ${getPromptCacheRetentionUnsupportedHint()}`);
4117
4073
  } else {
4118
- lines.push(`ℹ️ Long retention is enabled. ${getPromptCacheRetentionUnsupportedHint()}`);
4074
+ lines.push(`ℹ️ 已启用长缓存保留。${getPromptCacheRetentionUnsupportedHint()}`);
4119
4075
  }
4120
4076
  }
4121
4077
 
4122
- // ── Router/channel diagnostics ──
4123
4078
  const routerNotes = describeRouterChannelDiagnostics(model);
4124
4079
  if (routerNotes.length > 0) {
4125
4080
  lines.push("");
@@ -4128,31 +4083,24 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
4128
4083
  }
4129
4084
  }
4130
4085
 
4131
- // ── Integrity diagnostics ──
4132
4086
  if (lastPromptIntegrityWarningAt > 0) {
4133
4087
  const ago = Date.now() - lastPromptIntegrityWarningAt;
4134
4088
  const mins = Math.floor(ago / 60000);
4135
4089
  if (mins < 5) {
4136
4090
  lines.push("");
4137
- lines.push("⚠️ Recent prompt integrity issue detected:");
4138
- lines.push(` Last detected ${mins > 0 ? `${mins} min` : `${Math.floor(ago / 1000)}s`} ago. The prompt reorder was`);
4139
- lines.push(` skipped on that turn to preserve structural markers.`);
4140
- lines.push(` Common causes: extension system prompt format change, substring collision.`);
4141
- lines.push(` Steps:`);
4142
- lines.push(` 1. Run /reload to reset (may clear transient issues).`);
4143
- lines.push(` 2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 & /reload to disable reorder.`);
4144
- lines.push(` 3. If persistent, file an issue with this doctor output.`);
4091
+ lines.push("⚠️ 最近检测到 prompt 完整性问题:");
4092
+ lines.push(` 最近一次检测于 ${mins > 0 ? `${mins} 分钟` : `${Math.floor(ago / 1000)} 秒`}前;该轮已跳过 prompt 重排以保留结构标记。`);
4093
+ lines.push(" 常见原因:扩展的 system prompt 格式变化,或子串碰撞。");
4094
+ lines.push(" 建议步骤:");
4095
+ lines.push(" 1. 运行 /reload 重置(可清除瞬态问题)。");
4096
+ lines.push(" 2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 后 /reload,禁用重排。");
4097
+ lines.push(" 3. 若持续复现,请带 doctor 输出提 issue。");
4145
4098
  }
4146
4099
  }
4147
4100
 
4148
4101
  return lines.join("\n");
4149
4102
  }
4150
4103
 
4151
- /**
4152
- * Build a "Cache diagnosis" section for low-hit causes, appended to doctor output.
4153
- * This is a separate function because it depends on per-session state (recent samples,
4154
- * per-model stats) that is not available at the module level.
4155
- */
4156
4104
  function buildLowHitDiagnosis(
4157
4105
  model: PiModel,
4158
4106
  adapter: CacheProviderAdapter | undefined,
@@ -4161,101 +4109,74 @@ function buildLowHitDiagnosis(
4161
4109
  ): string[] {
4162
4110
  const lines: string[] = [];
4163
4111
 
4164
- // 1. Missing compat flags (adapter-aware: DeepSeek has extra reasoning compat)
4165
4112
  const fixSugLHD = buildFixSuggestion(model);
4166
4113
  const safeFixableMissingLHD = fixSugLHD ? Object.keys(fixSugLHD.compatKeys) : [];
4167
-
4168
- // 2. Router/channel risk (reuse existing check)
4169
4114
  const routerNotes = describeRouterChannelDiagnostics(model);
4170
-
4171
- // 3. Recent samples missing usage fields
4172
4115
  const missingUsageSamples = samples.filter((s) => s.missingUsageFields).length;
4173
-
4174
- // 4. Recent trend analysis
4175
4116
  const recent10 = samples.slice(-10);
4176
4117
  const recent10Hits = recent10.filter((s) => s.hit).length;
4177
4118
  const recent10Total = recent10.length;
4178
4119
  const recent10Cached = recent10.reduce((sum, s) => sum + s.cachedInputTokens, 0);
4179
4120
  const recent10Input = recent10.reduce((sum, s) => sum + s.totalInputTokens, 0);
4180
-
4181
- // 5. Today's overall trend from persisted stats
4182
4121
  const todayStats = stats ?? emptyCacheStats();
4183
4122
 
4184
4123
  const hasMissingCompat = safeFixableMissingLHD.length > 0;
4185
4124
  const hasRouterRisk = routerNotes.length > 0;
4186
4125
  const hasUsageMissing = missingUsageSamples > 0;
4187
-
4188
- // Today's cached-token ratio is used both inside and outside the recent-sample
4189
- // branch. Keep it block-external so doctor/stats never throw for low-hit
4190
- // models that have persisted counters but no recent in-memory samples.
4191
4126
  const todayHitRatio = todayStats.totalInputTokens > 0
4192
4127
  ? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
4193
4128
  : 0;
4194
4129
 
4195
- // Determine if there are actual issues worth flagging
4196
4130
  const hasActualIssues = hasMissingCompat || hasUsageMissing ||
4197
- // Low hit trend (today total > 3 and hit ratio < 30%)
4198
4131
  (todayStats.totalRequests > 3 && todayStats.totalInputTokens > 0 &&
4199
4132
  (todayStats.cachedInputTokens / todayStats.totalInputTokens) < 0.3) ||
4200
- // Low hit rate in recent samples (recent10Total >= 3 and all misses)
4201
4133
  (recent10Total >= 3 && recent10Hits === 0);
4202
4134
 
4203
- // Skip section if no issues
4204
4135
  if (!hasActualIssues && !(hasRouterRisk && (hasMissingCompat || hasUsageMissing))) {
4205
4136
  return lines;
4206
4137
  }
4207
4138
 
4208
4139
  lines.push("");
4209
- lines.push("── Cache diagnosis ──");
4140
+ lines.push("── 缓存诊断 ──");
4210
4141
 
4211
- // Priority 1: missing compat flags
4212
4142
  if (hasMissingCompat) {
4213
- lines.push(`⚠️ Missing compat flags: ${safeFixableMissingLHD.join(", ")}`);
4214
- lines.push(" These flags enable prompt caching and session-affinity routing.");
4215
- lines.push(" Run /cache-optimizer compat for edit instructions.");
4143
+ lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissingLHD.join(", ")}`);
4144
+ lines.push(" 这些字段有助于稳定 prompt 缓存与上游路由粘性。");
4145
+ lines.push(" 可运行 /cache-optimizer compat 查看编辑建议。");
4216
4146
  }
4217
4147
 
4218
- // Priority 2: router/channel risk (only flag when there are other issues)
4219
- // Router notes are already shown in the main doctor output, so we only
4220
- // mention them in the diagnosis section when they compound a problem.
4221
4148
  if (hasRouterRisk && (hasMissingCompat || hasUsageMissing || hasActualIssues)) {
4222
- lines.push("🔀 Router/channel proxy detected — see routing notes above.");
4149
+ lines.push("🔀 检测到路由/代理风险 —— 详见上方路由诊断。");
4223
4150
  }
4224
4151
 
4225
- // Priority 3: usage fields missing
4226
4152
  if (hasUsageMissing) {
4227
- lines.push(`⚠️ ${missingUsageSamples}/${samples.length} recent responses had missing/empty usage fields.`);
4228
- lines.push(" Footer may under-report cache hit rate.");
4229
- lines.push(" Verify the proxy returns prompt-level usage (prompt_tokens, input_tokens_details).");
4153
+ lines.push(`⚠️ 最近 ${samples.length} 条样本里有 ${missingUsageSamples} 条缺少或返回了空的 usage 字段。`);
4154
+ lines.push(" Footer 命中率可能会被低估。");
4155
+ lines.push(" 请确认代理会返回 prompt usage(如 prompt_tokensinput_tokens_details)。");
4230
4156
  }
4231
4157
 
4232
- // Priority 4: recent trend low
4233
4158
  if (recent10Total > 0) {
4234
- const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
4235
4159
  if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
4236
- lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${recent10Total} recent samples).`);
4237
- lines.push(" Likely causes: proxy routing to different backends per request,");
4238
- lines.push(" or prompt prefix changes across turns.");
4239
- lines.push(" Verify upstream routing stickiness and supportsLongPromptCacheRetention compat.");
4160
+ lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(最近 ${recent10Total} 条样本)。`);
4161
+ lines.push(" 常见原因:代理把请求路由到不同后端,或 prompt 前缀在各轮之间变化。");
4162
+ lines.push(" 请检查上游路由粘性,以及 supportsLongPromptCacheRetention 配置是否正确。");
4240
4163
  } else if (todayHitRatio < 30 && todayStats.totalRequests > 3) {
4241
- lines.push(`📉 Cache hit rate is low: ${todayHitRatio}% today (${todayStats.totalRequests} total requests).`);
4242
- lines.push(" Check compat flags and proxy upstream routing.");
4164
+ lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(共 ${todayStats.totalRequests} 次请求)。`);
4165
+ lines.push(" 请检查 compat 配置与代理上游路由。");
4243
4166
  }
4244
4167
 
4245
- // Show brief trend summary if there are enough samples
4246
4168
  if (recent10Total >= 3) {
4247
4169
  const trend = formatRecentTrendSummary(samples, 10);
4248
4170
  lines.push(`📊 ${trend}`);
4249
4171
  }
4250
4172
  }
4251
4173
 
4252
- // For fully configured but low hit models, emphasize sticky routing
4253
4174
  if (!hasMissingCompat && !hasRouterRisk && todayStats.totalRequests > 3 && todayHitRatio < 30) {
4254
- lines.push("💡 Compat is configured but cache hit rate remains low.");
4255
- lines.push(" Possible causes:");
4256
- lines.push(" • Proxy still routes to multiple backends — check session affinity on the proxy side.");
4257
- lines.push(" • Prompt prefix varies per turn — check dynamic context in system prompt.");
4258
- lines.push(" • Provider does not return cache usage fields footer can't measure hits.");
4175
+ lines.push("💡 compat 已配置完整,但缓存命中率仍然偏低。");
4176
+ lines.push(" 可能原因:");
4177
+ lines.push(" • 代理仍把请求分发到多个后端 —— 请检查代理侧的会话粘性。");
4178
+ lines.push(" • prompt 前缀每轮都在变化 —— 请检查 system prompt 中的动态上下文。");
4179
+ lines.push(" • provider 没有返回缓存 usage 字段 —— footer 无法准确测量命中。");
4259
4180
  }
4260
4181
 
4261
4182
  return lines;
@@ -4282,16 +4203,16 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
4282
4203
  const slashIdx = key.indexOf("/");
4283
4204
  const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
4284
4205
  const modelsJsonPath = getModelsJsonDisplayPath();
4285
- lines.push(`Active model: ${key}`);
4206
+ lines.push(`当前模型:${key}`);
4286
4207
  if (safeFixableMissingC.length > 0) {
4287
- lines.push(`Safe-fixable: ${safeFixableMissingC.join(", ")}`);
4208
+ lines.push(`可安全修复:${safeFixableMissingC.join(", ")}`);
4288
4209
  }
4289
4210
  if (advisoryMissingC.length > 0) {
4290
- lines.push(`Optional: ${advisoryMissingC.join(", ")} (enable only if needed)`);
4211
+ lines.push(`可选项:${advisoryMissingC.join(", ")}(仅在确认支持时启用)`);
4291
4212
  }
4292
4213
  lines.push("");
4293
- lines.push(`Edit ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
4294
- lines.push(`(at the same level as baseUrl/api/apiKey/models).`);
4214
+ lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
4215
+ lines.push("(与 baseUrl/api/apiKey/models 同级)。");
4295
4216
  if (adaptiveThinkingApplicable) {
4296
4217
  appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
4297
4218
  } else if (deepSeekCompatApplicable) {
@@ -4302,10 +4223,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
4302
4223
  }
4303
4224
  }
4304
4225
 
4305
- // When compat is fully configured but router/optional notes exist, prefix the status.
4306
4226
  if ((routerNotes.length > 0 || optionalOpenAIProxyCompat.length > 0) && missing.length === 0) {
4307
4227
  if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
4308
- lines.push("✅ Compat fully configured.");
4228
+ lines.push("✅ compat 配置完整。");
4309
4229
  if (isPromptCacheRetention400Applicable(model)) {
4310
4230
  lines.push(getPromptCacheRetentionUnsupportedHint());
4311
4231
  }
@@ -5422,6 +5342,10 @@ export const __internals_for_tests = {
5422
5342
  hashSessionId,
5423
5343
  makeSessionModelKey,
5424
5344
  modelKeyFromSessionKey,
5345
+ makePromptRewriteContextKey,
5346
+ rememberPromptRewriteContext,
5347
+ getPromptRewriteContext,
5348
+ PROMPT_REWRITE_CONTEXT_TTL_MS,
5425
5349
  filterRestorableStatsForSession,
5426
5350
  parsePersistedRoutedModelRef,
5427
5351
  routedModelRefToPiModel,
@@ -5495,11 +5419,10 @@ export default function (pi: ExtensionAPI) {
5495
5419
  let latestCacheHint: PiCacheHintSnapshot | undefined;
5496
5420
  // OMP divergence: prompt rewriting moved from before_agent_start to
5497
5421
  // before_provider_request (OMP's before_agent_start can only inject messages,
5498
- // not mutate systemPrompt). We cache systemPromptOptions + route snapshot here
5499
- // so before_provider_request can apply the 3-step pipeline to the payload.
5500
- let pendingPromptOptions: BuildSystemPromptOptions | undefined;
5501
- let pendingRouteSnapshot: PiRouteSnapshot | undefined;
5502
- let pendingRoutedModel: PiModel | undefined;
5422
+ // not mutate systemPrompt). Store prompt options per session/model so an
5423
+ // overlapping turn or sub-agent cannot overwrite another request's rewrite
5424
+ // context before before_provider_request fires.
5425
+ const promptRewriteContexts = new Map<string, PromptRewriteContext>();
5503
5426
  const PERSIST_DEBOUNCE_MS = 2000;
5504
5427
  /** In-memory recent usage samples per model key (not persisted, cleared on reload). */
5505
5428
  const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
@@ -5823,7 +5746,7 @@ export default function (pi: ExtensionAPI) {
5823
5746
  const statsText = formatCacheStats(realEntry.adapter, realEntry.stats);
5824
5747
  statusText = runtimeOptimizerEnabled
5825
5748
  ? statsText
5826
- : `Cache Optimizer disabled · ${statsText}`;
5749
+ : `缓存优化已关闭 · ${statsText}`;
5827
5750
  }
5828
5751
  }
5829
5752
 
@@ -5834,7 +5757,7 @@ export default function (pi: ExtensionAPI) {
5834
5757
  const sk = displayModel ? sessionModelKey(displayModel) : undefined;
5835
5758
  const stats = sk ? cacheStatsByModel[sk] : undefined;
5836
5759
  const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
5837
- statusText = runtimeOptimizerEnabled ? statsText : `Cache Optimizer disabled · ${statsText}`;
5760
+ statusText = runtimeOptimizerEnabled ? statsText : `缓存优化已关闭 · ${statsText}`;
5838
5761
  }
5839
5762
 
5840
5763
  // If optimizeSystemPrompt detected structural truncation on this or
@@ -5842,7 +5765,7 @@ export default function (pi: ExtensionAPI) {
5842
5765
  // /reload before continuing. The flag resets after emission so a
5843
5766
  // single-turn glitch does not permanently taint the footer.
5844
5767
  if (promptTruncationDetected && statusText !== undefined) {
5845
- statusText = statusText + " ⚠️ integrity";
5768
+ statusText = statusText + " ⚠️ 完整性";
5846
5769
  promptTruncationDetected = false;
5847
5770
  lastPromptIntegrityWarningAt = Date.now();
5848
5771
 
@@ -5850,12 +5773,12 @@ export default function (pi: ExtensionAPI) {
5850
5773
  if (!integrityNotificationShown) {
5851
5774
  integrityNotificationShown = true;
5852
5775
  ctx.ui.notify(
5853
- `⚠️ ${LOG_PREFIX}: A prompt structural marker was lost during reorder on this turn. ` +
5854
- `The original prompt was used instead to preserve integrity.\n\n` +
5855
- `Recovery steps:\n` +
5856
- `1. Run /reload to reset (may clear transient issues).\n` +
5857
- `2. Set PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 and /reload to disable reorder.\n` +
5858
- `3. If persistent, run /cache-optimizer doctor and file an issue (no API keys/prompts).`,
5776
+ `⚠️ ${LOG_PREFIX}:本轮重排导致一个 prompt 结构标记丢失。` +
5777
+ `为保证完整性,已回退到原始 prompt。\n\n` +
5778
+ `恢复步骤:\n` +
5779
+ `1. 运行 /reload 重置(可清除瞬态问题)。\n` +
5780
+ `2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 /reload,禁用重排。\n` +
5781
+ `3. 若持续复现,请运行 /cache-optimizer doctor 并提 issue(不要包含 API key / prompt)。`,
5859
5782
  "warning",
5860
5783
  );
5861
5784
  }
@@ -5874,7 +5797,7 @@ export default function (pi: ExtensionAPI) {
5874
5797
  // OpenAI-compatible proxies) do NOT trigger the marker — the doctor/compat
5875
5798
  // commands still mention them as optional guidance.
5876
5799
  if (buildFixSuggestion(displayModel) !== undefined) {
5877
- statusText = statusText + " ⚠️ compat";
5800
+ statusText = statusText + " ⚠️ 配置";
5878
5801
  }
5879
5802
  }
5880
5803
 
@@ -5913,17 +5836,18 @@ export default function (pi: ExtensionAPI) {
5913
5836
  ? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
5914
5837
  : undefined;
5915
5838
 
5916
- // OMP divergence: before_agent_start in OMP can only inject messages (return
5917
- // { message }), NOT mutate systemPrompt. We cache the prompt options + route
5918
- // snapshot here so before_provider_request can apply the 3-step pipeline to
5919
- // the provider payload. If OMP does not supply systemPromptOptions, skill
5920
- // compression and stable-prefix reorder are skipped (only churn strip runs).
5921
5839
  const eventRecord = asRecord(event);
5922
- pendingPromptOptions = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
5923
- pendingRouteSnapshot = routeSnapshot;
5924
- pendingRoutedModel = routedModel ?? _ctx.model;
5925
-
5840
+ const options = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
5926
5841
  const model = routedModel ?? _ctx.model;
5842
+ const contextKey = makePromptRewriteContextKey(sessionHashFromContext(_ctx), model);
5843
+ rememberPromptRewriteContext(promptRewriteContexts, contextKey, {
5844
+ options,
5845
+ routeSnapshot,
5846
+ routedModel: model,
5847
+ timestamp: Date.now(),
5848
+ });
5849
+
5850
+ const modelForHint = model;
5927
5851
  const promptCacheKey = getSessionPromptCacheKey(_ctx);
5928
5852
  const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
5929
5853
  const rawSystemPrompt = typeof eventRecord?.systemPrompt === "string" ? eventRecord.systemPrompt : "";
@@ -5931,9 +5855,9 @@ export default function (pi: ExtensionAPI) {
5931
5855
  sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
5932
5856
  virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
5933
5857
  virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
5934
- upstreamProvider: routeSnapshot?.provider ?? model?.provider,
5935
- upstreamModelId: routeSnapshot?.modelId ?? model?.id,
5936
- api: model?.api,
5858
+ upstreamProvider: routeSnapshot?.provider ?? modelForHint?.provider,
5859
+ upstreamModelId: routeSnapshot?.modelId ?? modelForHint?.id,
5860
+ api: modelForHint?.api,
5937
5861
  systemPrompt: rawSystemPrompt,
5938
5862
  promptCacheKey,
5939
5863
  cacheRetention,
@@ -5961,21 +5885,24 @@ export default function (pi: ExtensionAPI) {
5961
5885
  requestModel &&
5962
5886
  !isResponsesPromptRewriteBypassApi(requestModel.api)
5963
5887
  ) {
5888
+ const contextKey = makePromptRewriteContextKey(sessionHashFromContext(ctx), requestModel);
5889
+ const rewriteContext = getPromptRewriteContext(promptRewriteContexts, contextKey);
5890
+ const promptOptions = rewriteContext?.options;
5964
5891
  const original = extractSystemPrompt(resultPayload);
5965
5892
  if (original && original.trim().length > 0) {
5966
5893
  // Step 1: strip per-turn churn from <session-overview>.
5967
5894
  const stripped = stripSessionOverviewChurn(original);
5968
5895
 
5969
5896
  // Step 2: compress skills XML → one-line index (requires cached options).
5970
- const compressed = pendingPromptOptions
5971
- ? compressSkillsInSystemPrompt(stripped, pendingPromptOptions)
5897
+ const compressed = promptOptions
5898
+ ? compressSkillsInSystemPrompt(stripped, promptOptions)
5972
5899
  : stripped;
5973
5900
 
5974
5901
  // Step 3: lift stable content above dynamic content (requires cached options).
5975
5902
  let finalPrompt = compressed;
5976
5903
  let changed = false;
5977
- if (pendingPromptOptions) {
5978
- const optimized = optimizeSystemPrompt(compressed, pendingPromptOptions);
5904
+ if (promptOptions) {
5905
+ const optimized = optimizeSystemPrompt(compressed, promptOptions);
5979
5906
  if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
5980
5907
  finalPrompt = optimized.systemPrompt;
5981
5908
  changed = true;
@@ -6020,9 +5947,9 @@ export default function (pi: ExtensionAPI) {
6020
5947
  if (warnedPromptCacheRetention400Models.has(key)) return;
6021
5948
  warnedPromptCacheRetention400Models.add(key);
6022
5949
  ctx.ui.notify(
6023
- `⚠️ ${LOG_PREFIX}: ${key} returned HTTP 400 while supportsLongPromptCacheRetention is enabled. ` +
5950
+ `⚠️ ${LOG_PREFIX}:${key} 在启用 supportsLongPromptCacheRetention 时返回了 HTTP 400。` +
6024
5951
  getPromptCacheRetentionUnsupportedHint() +
6025
- ` Run /cache-optimizer doctor for the exact edit location.`,
5952
+ ` 可运行 /cache-optimizer doctor 查看精确编辑位置。`,
6026
5953
  "warning",
6027
5954
  );
6028
5955
  });
@@ -6111,16 +6038,16 @@ export default function (pi: ExtensionAPI) {
6111
6038
  resetCurrentSessionStats();
6112
6039
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6113
6040
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6114
- cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6041
+ cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
6115
6042
  } else if (subcommand === "disable") {
6116
6043
  setRuntimeOptimizerEnabled(false);
6117
6044
  resetCurrentSessionStats();
6118
6045
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6119
6046
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6120
- cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6047
+ cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
6121
6048
  } else if (subcommand === "doctor") {
6122
6049
  if (!model) {
6123
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6050
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6124
6051
  return;
6125
6052
  }
6126
6053
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
@@ -6135,7 +6062,7 @@ export default function (pi: ExtensionAPI) {
6135
6062
  cmdCtx.ui.notify(fullDiagnosis, "info");
6136
6063
  } else if (subcommand === "stats") {
6137
6064
  if (!model) {
6138
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6065
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6139
6066
  return;
6140
6067
  }
6141
6068
  const adapter = selectAdapterForModel(model);
@@ -6146,7 +6073,7 @@ export default function (pi: ExtensionAPI) {
6146
6073
  cmdCtx.ui.notify(output, "info");
6147
6074
  } else if (subcommand === "compat") {
6148
6075
  if (!model) {
6149
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6076
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6150
6077
  return;
6151
6078
  }
6152
6079
  const compatResult = buildCompatDiagnosis(model);
@@ -6155,19 +6082,19 @@ export default function (pi: ExtensionAPI) {
6155
6082
  } else {
6156
6083
  cmdCtx.ui.notify(
6157
6084
  isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
6158
- ? "✅ Compat fully configured."
6085
+ ? "✅ compat 配置完整。"
6159
6086
  : getCompatCheckNotApplicableLines(model).join("\n"),
6160
6087
  "info",
6161
6088
  );
6162
6089
  }
6163
6090
  } else if (subcommand === "reset") {
6164
6091
  if (!model) {
6165
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6092
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6166
6093
  return;
6167
6094
  }
6168
6095
  const adapter = selectAdapterForModel(model);
6169
6096
  if (!adapter) {
6170
- cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
6097
+ cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
6171
6098
  return;
6172
6099
  }
6173
6100
 
@@ -6185,21 +6112,21 @@ export default function (pi: ExtensionAPI) {
6185
6112
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6186
6113
 
6187
6114
  cmdCtx.ui.notify(
6188
- `✅ Reset local session cache stats for "${displayKey}". ` +
6189
- "Upstream provider prompt cache was not modified. " +
6190
- "New requests will start a fresh stats bucket for this OMP session.",
6115
+ `✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
6116
+ "上游 provider prompt cache 未被修改。" +
6117
+ "后续请求会为当前 OMP session 开始新的统计桶。",
6191
6118
  "info",
6192
6119
  );
6193
6120
  } else if (subcommand === "fix") {
6194
6121
  if (!model) {
6195
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6122
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6196
6123
  return;
6197
6124
  }
6198
6125
 
6199
6126
  const suggestion = buildFixSuggestion(model);
6200
6127
  if (!suggestion) {
6201
6128
  const key = modelKey(model);
6202
- cmdCtx.ui.notify(`✅ Nothing to fix for "${key}". Compat already configured.`, "info");
6129
+ cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
6203
6130
  return;
6204
6131
  }
6205
6132
 
@@ -6210,14 +6137,14 @@ export default function (pi: ExtensionAPI) {
6210
6137
  const compatResult = buildCompatDiagnosis(model);
6211
6138
  const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
6212
6139
  cmdCtx.ui.notify(
6213
- `📝 Manual fix for ${getModelsJsonDisplayPath()}:\n\n` +
6214
- `Provider: ${suggestion.providerLabel}\n` +
6215
- `Model: ${suggestion.modelId}\n\n` +
6216
- `Add these compat keys (model level, under the model entry):\n\n` +
6140
+ `📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
6141
+ `提供方:${suggestion.providerLabel}\n` +
6142
+ `模型:${suggestion.modelId}\n\n` +
6143
+ `在模型级 compat(模型条目下)添加这些键:\n\n` +
6217
6144
  `compat:\n${yamlSnippet}\n\n` +
6218
- `Or at provider level (under providers["${suggestion.providerLabel}"]):\n\n` +
6145
+ `或放到 provider 级(providers["${suggestion.providerLabel}"] 下):\n\n` +
6219
6146
  `compat:\n${yamlSnippet}\n\n` +
6220
- `After editing, run /reload.\n` +
6147
+ `编辑后运行 /reload。\n` +
6221
6148
  (compatResult ? `\n${compatResult}` : ""),
6222
6149
  "info",
6223
6150
  );
@@ -6225,31 +6152,31 @@ export default function (pi: ExtensionAPI) {
6225
6152
  // Try interactive selection menu when UI supports it
6226
6153
  if (cmdCtx.hasUI) {
6227
6154
  const menuOptions = [
6228
- "Enable Turn on runtime optimizations",
6229
- "Disable Turn off runtime optimizations",
6230
- "Doctor Show cache configuration",
6231
- "Stats Show cache stats and trend",
6232
- "Compat Show compat suggestion",
6233
- "Fix Auto-fix compat issues (writes models.yml)",
6234
- "Reset Reset local session stats",
6235
- "Cancel",
6155
+ "启用 —— 打开运行时优化",
6156
+ "关闭 —— 关闭运行时优化",
6157
+ "诊断 —— 查看缓存配置",
6158
+ "统计 —— 查看缓存统计与趋势",
6159
+ "兼容 —— 查看 compat 建议",
6160
+ "修复 —— 查看 compat 修复建议(会写 models.yml 时另行提示)",
6161
+ "重置 —— 重置本地 session 统计",
6162
+ "取消",
6236
6163
  ];
6237
- const choice = await cmdCtx.ui.select("Cache Optimizer", menuOptions);
6164
+ const choice = await cmdCtx.ui.select("缓存优化器", menuOptions);
6238
6165
  if (choice === menuOptions[0]) {
6239
6166
  setRuntimeOptimizerEnabled(true);
6240
6167
  resetCurrentSessionStats();
6241
6168
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6242
6169
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6243
- cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
6170
+ cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
6244
6171
  } else if (choice === menuOptions[1]) {
6245
6172
  setRuntimeOptimizerEnabled(false);
6246
6173
  resetCurrentSessionStats();
6247
6174
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6248
6175
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6249
- cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
6176
+ cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
6250
6177
  } else if (choice === menuOptions[2]) {
6251
6178
  if (!model) {
6252
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6179
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6253
6180
  } else {
6254
6181
  const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
6255
6182
  const adapter = selectAdapterForModel(model);
@@ -6264,7 +6191,7 @@ export default function (pi: ExtensionAPI) {
6264
6191
  }
6265
6192
  } else if (choice === menuOptions[3]) {
6266
6193
  if (!model) {
6267
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6194
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6268
6195
  } else {
6269
6196
  const adapter = selectAdapterForModel(model);
6270
6197
  const sk = model ? sessionModelKey(model) : undefined;
@@ -6275,7 +6202,7 @@ export default function (pi: ExtensionAPI) {
6275
6202
  }
6276
6203
  } else if (choice === menuOptions[4]) {
6277
6204
  if (!model) {
6278
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6205
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6279
6206
  } else {
6280
6207
  const compatResult = buildCompatDiagnosis(model);
6281
6208
  if (compatResult) {
@@ -6283,7 +6210,7 @@ export default function (pi: ExtensionAPI) {
6283
6210
  } else {
6284
6211
  cmdCtx.ui.notify(
6285
6212
  isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
6286
- ? "✅ Compat fully configured."
6213
+ ? "✅ compat 配置完整。"
6287
6214
  : getCompatCheckNotApplicableLines(model).join("\n"),
6288
6215
  "info",
6289
6216
  );
@@ -6292,13 +6219,13 @@ export default function (pi: ExtensionAPI) {
6292
6219
  } else if (choice === menuOptions[5]) {
6293
6220
  // Fix — auto-fix compat issues
6294
6221
  if (!model) {
6295
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6222
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6296
6223
  return;
6297
6224
  }
6298
6225
  const suggestion = buildFixSuggestion(model);
6299
6226
  if (!suggestion) {
6300
6227
  const key = modelKey(model);
6301
- cmdCtx.ui.notify(`✅ Nothing to fix for "${key}". Compat already configured.`, "info");
6228
+ cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
6302
6229
  return;
6303
6230
  }
6304
6231
 
@@ -6306,30 +6233,30 @@ export default function (pi: ExtensionAPI) {
6306
6233
  const compatResult = buildCompatDiagnosis(model);
6307
6234
  const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
6308
6235
  cmdCtx.ui.notify(
6309
- `📝 Manual fix for ${getModelsJsonDisplayPath()}:\n\n` +
6310
- `Provider: ${suggestion.providerLabel}\n` +
6311
- `Model: ${suggestion.modelId}\n\n` +
6312
- `Add these compat keys:\n\n` +
6236
+ `📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
6237
+ `提供方:${suggestion.providerLabel}\n` +
6238
+ `模型:${suggestion.modelId}\n\n` +
6239
+ `添加这些 compat 键:\n\n` +
6313
6240
  `compat:\n${yamlSnippet}\n\n` +
6314
- `After editing, run /reload.\n` +
6241
+ `编辑后运行 /reload。\n` +
6315
6242
  (compatResult ? `\n${compatResult}` : ""),
6316
6243
  "info",
6317
6244
  );
6318
6245
  } else if (choice === menuOptions[6]) {
6319
6246
  if (!model) {
6320
- cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
6247
+ cmdCtx.ui.notify("当前没有活动模型。请先用 /model omp --model 选择模型。", "warning");
6321
6248
  } else {
6322
6249
  const adapter = selectAdapterForModel(model);
6323
6250
  if (!adapter) {
6324
- cmdCtx.ui.notify("ℹ️ Active model does not match a cache adapter. No stats to reset.", "info");
6251
+ cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
6325
6252
  } else {
6326
6253
  const displayKey = modelKey(model);
6327
6254
  resetStatsForModel(model);
6328
6255
  await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
6329
6256
  await publishStatus(cmdCtx as unknown as ExtensionContext, model);
6330
6257
  cmdCtx.ui.notify(
6331
- `✅ Reset local session cache stats for "${displayKey}". ` +
6332
- "Upstream provider prompt cache was not modified.",
6258
+ `✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
6259
+ "上游 provider prompt cache 未被修改。",
6333
6260
  "info",
6334
6261
  );
6335
6262
  }
@@ -6341,14 +6268,14 @@ export default function (pi: ExtensionAPI) {
6341
6268
 
6342
6269
  // Fallback: text help when no interactive UI
6343
6270
  const diagnosis: string[] = [];
6344
- diagnosis.push("📋 /cache-optimizer commands:");
6345
- diagnosis.push(" enable Enable prompt/cache optimizations for this OMP process");
6346
- diagnosis.push(" disable Disable prompt/cache optimizations for this OMP process");
6347
- diagnosis.push(" doctor Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
6348
- diagnosis.push(" stats Show active model stats bucket and recent trend");
6349
- diagnosis.push(" compat Show compat suggestion with edit location");
6350
- diagnosis.push(" fix Auto-fix compat issues (writes models.yml, requires UI)");
6351
- diagnosis.push(" reset Reset local session stats for current model (does not affect upstream)");
6271
+ diagnosis.push("📋 /cache-optimizer 命令:");
6272
+ diagnosis.push(" enable —— 为当前 OMP 进程开启 prompt/cache 优化");
6273
+ diagnosis.push(" disable —— 为当前 OMP 进程关闭 prompt/cache 优化");
6274
+ diagnosis.push(" doctor —— 查看当前模型/provider/api/baseUrl/compat 与低命中诊断");
6275
+ diagnosis.push(" stats —— 查看当前活动模型的统计桶与近期趋势");
6276
+ diagnosis.push(" compat —— 查看 compat 建议与编辑位置");
6277
+ diagnosis.push(" fix —— 查看 compat 修复建议(需要 UI 时另有提示)");
6278
+ diagnosis.push(" reset —— 重置当前模型的本地 session 统计(不影响上游)");
6352
6279
  diagnosis.push("");
6353
6280
  diagnosis.push(formatOptimizerRuntimeMode());
6354
6281
  diagnosis.push("");
@@ -6356,17 +6283,17 @@ export default function (pi: ExtensionAPI) {
6356
6283
  const displayKey = modelKey(model);
6357
6284
  const missing = describeMissingCacheCompatForModel(model);
6358
6285
  if (missing.length > 0) {
6359
- diagnosis.push(`⚠️ Active model "${displayKey}" missing compat: ${missing.join(", ")}`);
6360
- diagnosis.push('Run "/cache-optimizer compat" for edit instructions.');
6286
+ diagnosis.push(`⚠️ 当前模型 "${displayKey}" 缺少 compat:${missing.join(", ")}`);
6287
+ diagnosis.push('可运行 "/cache-optimizer compat" 查看编辑建议。');
6361
6288
  } else if (isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)) {
6362
- diagnosis.push(`✅ Active model "${displayKey}": compat fully configured.`);
6289
+ diagnosis.push(`✅ 当前模型 "${displayKey}"compat 配置完整。`);
6363
6290
  } else {
6364
- diagnosis.push(`ℹ️ Active model "${displayKey}": compat check not applicable.`);
6291
+ diagnosis.push(`ℹ️ 当前模型 "${displayKey}":不适用 compat 检查。`);
6365
6292
  const detailLines = getCompatCheckNotApplicableLines(model).slice(1);
6366
6293
  for (const line of detailLines) diagnosis.push(line);
6367
6294
  }
6368
6295
  } else {
6369
- diagnosis.push("No active model selected.");
6296
+ diagnosis.push("当前没有活动模型。");
6370
6297
  }
6371
6298
  cmdCtx.ui.notify(diagnosis.join("\n"), "info");
6372
6299
  }