omp-cache-optimizer 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +273 -346
- package/package.json +1 -1
package/index.ts
CHANGED
|
@@ -312,6 +312,15 @@ type CacheUsageSample = {
|
|
|
312
312
|
missingUsageFields: boolean;
|
|
313
313
|
};
|
|
314
314
|
|
|
315
|
+
type PromptRewriteContext = {
|
|
316
|
+
options?: BuildSystemPromptOptions;
|
|
317
|
+
routeSnapshot?: PiRouteSnapshot;
|
|
318
|
+
routedModel?: PiModel;
|
|
319
|
+
timestamp: number;
|
|
320
|
+
};
|
|
321
|
+
|
|
322
|
+
const PROMPT_REWRITE_CONTEXT_TTL_MS = 10_000;
|
|
323
|
+
|
|
315
324
|
/** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
|
|
316
325
|
const MAX_RECENT_SAMPLES = 50;
|
|
317
326
|
|
|
@@ -968,16 +977,42 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
|
|
|
968
977
|
*/
|
|
969
978
|
function getCompat(model: PiModel | undefined): CacheCompat {
|
|
970
979
|
if (!model) return {} as CacheCompat;
|
|
971
|
-
|
|
972
|
-
// The host runtime merges provider.compat with model.compat (model wins on conflicts).
|
|
973
|
-
// We approximate this by reading from ctx.model which should already have merged compat.
|
|
974
|
-
// However, for safety, we check both levels if available.
|
|
975
|
-
const modelCompat = (model.compat ?? {}) as CacheCompat;
|
|
976
980
|
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
+
const record = model as PiModel & { compatConfig?: Record<string, unknown> };
|
|
982
|
+
return {
|
|
983
|
+
...((record.compatConfig ?? {}) as CacheCompat),
|
|
984
|
+
...((record.compat ?? {}) as CacheCompat),
|
|
985
|
+
};
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
function makePromptRewriteContextKey(sessionHash: string | undefined, model: PiModel | undefined): string | undefined {
|
|
989
|
+
if (!sessionHash || !model) return undefined;
|
|
990
|
+
return `${sessionHash}:${modelKey(model)}`;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
function rememberPromptRewriteContext(
|
|
994
|
+
contexts: Map<string, PromptRewriteContext>,
|
|
995
|
+
key: string | undefined,
|
|
996
|
+
context: PromptRewriteContext,
|
|
997
|
+
): void {
|
|
998
|
+
if (!key) return;
|
|
999
|
+
contexts.set(key, context);
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
function getPromptRewriteContext(
|
|
1003
|
+
contexts: Map<string, PromptRewriteContext>,
|
|
1004
|
+
key: string | undefined,
|
|
1005
|
+
now = Date.now(),
|
|
1006
|
+
ttlMs = PROMPT_REWRITE_CONTEXT_TTL_MS,
|
|
1007
|
+
): PromptRewriteContext | undefined {
|
|
1008
|
+
if (!key) return undefined;
|
|
1009
|
+
const context = contexts.get(key);
|
|
1010
|
+
if (!context) return undefined;
|
|
1011
|
+
if (now - context.timestamp > ttlMs) {
|
|
1012
|
+
contexts.delete(key);
|
|
1013
|
+
return undefined;
|
|
1014
|
+
}
|
|
1015
|
+
return context;
|
|
981
1016
|
}
|
|
982
1017
|
|
|
983
1018
|
/**
|
|
@@ -1033,18 +1068,18 @@ function isRuntimeOptimizerEnabled(): boolean {
|
|
|
1033
1068
|
}
|
|
1034
1069
|
|
|
1035
1070
|
function getOptimizerRuntimeModeLines(): string[] {
|
|
1036
|
-
const state = runtimeOptimizerEnabled ? "
|
|
1071
|
+
const state = runtimeOptimizerEnabled ? "已启用" : "已关闭";
|
|
1037
1072
|
const lines: string[] = [];
|
|
1038
|
-
lines.push(
|
|
1039
|
-
lines.push(`• Prompt
|
|
1040
|
-
lines.push(`• OpenAI prompt_cache_key
|
|
1041
|
-
lines.push(`• Footer
|
|
1042
|
-
lines.push(`• Compat
|
|
1043
|
-
lines.push(`• ${PI_CACHE_RETENTION_ENV}
|
|
1073
|
+
lines.push(`运行状态:${state}`);
|
|
1074
|
+
lines.push(`• Prompt 重写:${runtimeOptimizerEnabled && !isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) ? "开启" : "关闭"}`);
|
|
1075
|
+
lines.push(`• OpenAI prompt_cache_key 回退:${shouldInjectOpenAIPromptCacheKey() ? "开启" : "关闭"}`);
|
|
1076
|
+
lines.push(`• Footer 缓存统计:开启${runtimeOptimizerEnabled ? "" : "(对比模式)"}`);
|
|
1077
|
+
lines.push(`• Compat 提示:${runtimeOptimizerEnabled ? "开启" : "关闭"}`);
|
|
1078
|
+
lines.push(`• ${PI_CACHE_RETENTION_ENV}:${process.env[PI_CACHE_RETENTION_ENV] ?? "(未设置)"}`);
|
|
1044
1079
|
if (!runtimeOptimizerEnabled) {
|
|
1045
|
-
lines.push("
|
|
1080
|
+
lines.push("这是当前进程内开关。运行 /reload 或重启 OMP 可恢复到启动时行为。");
|
|
1046
1081
|
} else if (isEnabledEnv(process.env[NO_PROMPT_REWRITE_ENV]) || !shouldInjectOpenAIPromptCacheKey()) {
|
|
1047
|
-
lines.push("
|
|
1082
|
+
lines.push("仍有部分能力被环境变量关闭。");
|
|
1048
1083
|
}
|
|
1049
1084
|
return lines;
|
|
1050
1085
|
}
|
|
@@ -1179,9 +1214,9 @@ function buildAdaptiveThinkingCompatSuggestion(_missing: string[]): Record<strin
|
|
|
1179
1214
|
}
|
|
1180
1215
|
|
|
1181
1216
|
function appendAdaptiveThinkingCompatAdviceLines(lines: string[], _missing: string[], placement: CompatAdvicePlacement = {}): void {
|
|
1182
|
-
lines.push("-
|
|
1183
|
-
lines.push("
|
|
1184
|
-
lines.push("
|
|
1217
|
+
lines.push("- 自适应思考:OMP 内置模型目录会为官方 Claude 模型自动设置。");
|
|
1218
|
+
lines.push(" 自定义 Anthropic 渠道应依赖内置 catalog 元数据;");
|
|
1219
|
+
lines.push(" 如果上游拒绝 adaptive thinking,请确认模型 id 是否匹配官方发布版本。");
|
|
1185
1220
|
appendCredentialSafeProviderGuidance(lines, placement, {});
|
|
1186
1221
|
}
|
|
1187
1222
|
|
|
@@ -1191,10 +1226,10 @@ function buildAdaptiveThinkingCompatWarningText(key: string, _missing: string[])
|
|
|
1191
1226
|
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
1192
1227
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
1193
1228
|
const lines: string[] = [
|
|
1194
|
-
`ℹ️ omp-cache-optimizer
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1229
|
+
`ℹ️ omp-cache-optimizer:${key} 是支持自适应生成的 Claude 模型。`,
|
|
1230
|
+
"OMP 内置 catalog 会自动处理自适应思考;官方模型不需要额外的 models.yml compat 键。",
|
|
1231
|
+
"如果是转发 Anthropic 的自定义渠道,可能仍需要显式 catalog 元数据。",
|
|
1232
|
+
`可参考 ${modelsJsonPath} -> providers["${providerLabel}"] -> models -> "${modelId ?? '<id>'}"。`,
|
|
1198
1233
|
"",
|
|
1199
1234
|
];
|
|
1200
1235
|
appendAdaptiveThinkingCompatAdviceLines(lines, [], { providerLabel, modelId });
|
|
@@ -1972,13 +2007,6 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
|
|
|
1972
2007
|
return true;
|
|
1973
2008
|
}
|
|
1974
2009
|
if (Array.isArray(record.system) && record.system.length > 0) {
|
|
1975
|
-
// Replace first text block, keep structure
|
|
1976
|
-
const first = asRecord(record.system[0]);
|
|
1977
|
-
if (first && typeof first.text === "string") {
|
|
1978
|
-
first.text = text;
|
|
1979
|
-
return true;
|
|
1980
|
-
}
|
|
1981
|
-
// Fallback: convert to single-block string form
|
|
1982
2010
|
record.system = [{ type: "text", text }];
|
|
1983
2011
|
return true;
|
|
1984
2012
|
}
|
|
@@ -1986,11 +2014,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
|
|
|
1986
2014
|
// google-generative-ai: payload.systemInstruction
|
|
1987
2015
|
const systemInstruction = asRecord(record.systemInstruction);
|
|
1988
2016
|
if (systemInstruction && Array.isArray(systemInstruction.parts) && systemInstruction.parts.length > 0) {
|
|
1989
|
-
|
|
1990
|
-
|
|
1991
|
-
firstPart.text = text;
|
|
1992
|
-
return true;
|
|
1993
|
-
}
|
|
2017
|
+
systemInstruction.parts = [{ text }];
|
|
2018
|
+
return true;
|
|
1994
2019
|
}
|
|
1995
2020
|
|
|
1996
2021
|
// openai-completions / openai-responses: payload.messages[] first system/developer message
|
|
@@ -2005,11 +2030,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
|
|
|
2005
2030
|
return true;
|
|
2006
2031
|
}
|
|
2007
2032
|
if (Array.isArray(r.content) && r.content.length > 0) {
|
|
2008
|
-
|
|
2009
|
-
|
|
2010
|
-
first.text = text;
|
|
2011
|
-
return true;
|
|
2012
|
-
}
|
|
2033
|
+
r.content = text;
|
|
2034
|
+
return true;
|
|
2013
2035
|
}
|
|
2014
2036
|
}
|
|
2015
2037
|
}
|
|
@@ -2081,7 +2103,7 @@ function buildSafeOpenAIProxyCompatSuggestion(_missing: string[]): Record<string
|
|
|
2081
2103
|
}
|
|
2082
2104
|
|
|
2083
2105
|
function getPromptCacheRetentionUnsupportedHint(): string {
|
|
2084
|
-
return "
|
|
2106
|
+
return "如果这个渠道返回 `400 Unsupported parameter: prompt_cache_retention`,请移除或避免 `supportsLongPromptCacheRetention`;扩展本身不会直接写这个字段,但当 compat 声明支持长缓存保留时,OMP 可能会发送它。";
|
|
2085
2107
|
}
|
|
2086
2108
|
|
|
2087
2109
|
function hasPromptCacheRetentionUnsupportedSignal(headers: Record<string, string> | undefined): boolean {
|
|
@@ -2135,20 +2157,20 @@ function appendCredentialSafeProviderGuidance(lines: string[], placement: Compat
|
|
|
2135
2157
|
if (!providerLabel) return;
|
|
2136
2158
|
|
|
2137
2159
|
lines.push("");
|
|
2138
|
-
lines.push("
|
|
2139
|
-
lines.push("-
|
|
2140
|
-
lines.push(`-
|
|
2160
|
+
lines.push("如果这个渠道在 models.yml 里还没有 provider 配置:");
|
|
2161
|
+
lines.push("- 保留现有认证方式;不要复制 credential、token 或 API key。");
|
|
2162
|
+
lines.push(`- 只在 ${getModelsJsonDisplayPath()} 里添加缓存/路由 compat 覆盖。`);
|
|
2141
2163
|
|
|
2142
2164
|
if (Object.keys(compatSuggestion).length === 0) {
|
|
2143
|
-
lines.push("-
|
|
2165
|
+
lines.push("- 上面这些缺失项目前没有安全可复制的 override。");
|
|
2144
2166
|
return;
|
|
2145
2167
|
}
|
|
2146
2168
|
|
|
2147
|
-
lines.push("Provider
|
|
2169
|
+
lines.push("Provider 级最小覆盖:");
|
|
2148
2170
|
lines.push(JSON.stringify(buildProviderCompatOverride(providerLabel, compatSuggestion), null, 2));
|
|
2149
2171
|
|
|
2150
2172
|
if (placement.modelId) {
|
|
2151
|
-
lines.push("
|
|
2173
|
+
lines.push("单模型 override(只想影响当前模型时使用):");
|
|
2152
2174
|
lines.push(JSON.stringify(buildModelCompatOverride(providerLabel, placement.modelId, compatSuggestion), null, 2));
|
|
2153
2175
|
}
|
|
2154
2176
|
}
|
|
@@ -2159,21 +2181,19 @@ function appendOpenAIProxyCompatAdviceLines(lines: string[], missing: string[],
|
|
|
2159
2181
|
|
|
2160
2182
|
if (hasSafeSuggestion) {
|
|
2161
2183
|
if (options.includeJsonIntro !== false) {
|
|
2162
|
-
lines.push("
|
|
2184
|
+
lines.push("安全默认建议:");
|
|
2163
2185
|
}
|
|
2164
2186
|
lines.push(JSON.stringify(suggestion, null, 2));
|
|
2165
2187
|
}
|
|
2166
2188
|
|
|
2167
|
-
// OMP divergence: session affinity is handled by multi-credential auth, not compat.
|
|
2168
|
-
// No per-flag advice lines remain; only the optional long-retention guidance below.
|
|
2169
2189
|
appendCredentialSafeProviderGuidance(lines, options, suggestion);
|
|
2170
2190
|
}
|
|
2171
2191
|
|
|
2172
2192
|
function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: string[]): void {
|
|
2173
2193
|
if (!optional.includes("supportsLongPromptCacheRetention")) return;
|
|
2174
2194
|
lines.push("");
|
|
2175
|
-
lines.push("
|
|
2176
|
-
lines.push("- supportsLongPromptCacheRetention
|
|
2195
|
+
lines.push("可选项(非必需,不会自动修复):");
|
|
2196
|
+
lines.push("- supportsLongPromptCacheRetention:仅当 endpoint / proxy 明确支持 OpenAI long prompt cache retention 时再开启。");
|
|
2177
2197
|
lines.push(`- ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
2178
2198
|
}
|
|
2179
2199
|
|
|
@@ -2190,17 +2210,15 @@ function appendOptionalOpenAIProxyCompatAdviceLines(lines: string[], optional: s
|
|
|
2190
2210
|
* exercise it via __internals_for_tests.
|
|
2191
2211
|
*/
|
|
2192
2212
|
function buildOpenAIProxyCompatWarningText(key: string, missing: string[]): string {
|
|
2193
|
-
// Extract provider id from the model key (e.g. "otokapi/gpt-5.5" -> "otokapi").
|
|
2194
|
-
// If no slash is found, fall back to the key itself.
|
|
2195
2213
|
const slashIdx = key.indexOf("/");
|
|
2196
2214
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
2197
2215
|
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
2198
2216
|
|
|
2199
2217
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
2200
2218
|
const lines: string[] = [
|
|
2201
|
-
`💡 omp-cache-optimizer
|
|
2202
|
-
|
|
2203
|
-
|
|
2219
|
+
`💡 omp-cache-optimizer:${key} 是第三方 GPT/OpenAI 兼容代理,但合并后的 compat 缺少 ${missing.join(" 和 ")}。`,
|
|
2220
|
+
`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
|
|
2221
|
+
"",
|
|
2204
2222
|
];
|
|
2205
2223
|
|
|
2206
2224
|
appendOpenAIProxyCompatAdviceLines(lines, missing, { providerLabel, modelId });
|
|
@@ -2259,20 +2277,16 @@ function buildDeepSeekCompatSuggestion(missing: string[]): Record<string, unknow
|
|
|
2259
2277
|
function appendDeepSeekCompatAdviceLines(lines: string[], missing: string[], placement: CompatAdvicePlacement = {}): void {
|
|
2260
2278
|
const suggestion = buildDeepSeekCompatSuggestion(missing);
|
|
2261
2279
|
if (Object.keys(suggestion).length > 0) {
|
|
2262
|
-
lines.push("
|
|
2280
|
+
lines.push("推荐的 DeepSeek compat 片段:");
|
|
2263
2281
|
lines.push(JSON.stringify(suggestion, null, 2));
|
|
2264
2282
|
}
|
|
2265
2283
|
|
|
2266
2284
|
if (missing.includes("requiresReasoningContentForToolCalls")) {
|
|
2267
|
-
lines.push("- requiresReasoningContentForToolCalls
|
|
2285
|
+
lines.push("- requiresReasoningContentForToolCalls:保持带工具调用的 assistant 重放与 DeepSeek 的 reasoning_content 要求兼容。");
|
|
2268
2286
|
}
|
|
2269
2287
|
if (missing.includes("supportsLongPromptCacheRetention")) {
|
|
2270
|
-
lines.push("- supportsLongPromptCacheRetention
|
|
2288
|
+
lines.push("- supportsLongPromptCacheRetention:仅当 DeepSeek 兼容 endpoint 支持长缓存保留时再开启。");
|
|
2271
2289
|
}
|
|
2272
|
-
// OMP divergence: thinkingFormat is no longer flagged. DeepSeek reasoning format
|
|
2273
|
-
// is auto-detected by OMP's openai-completions transport; the "deepseek" value
|
|
2274
|
-
// is not a valid OMP thinkingFormat (OMP uses openai|openrouter|zai|qwen|...).
|
|
2275
|
-
// Session affinity is handled by OMP multi-credential auth, not compat keys.
|
|
2276
2290
|
|
|
2277
2291
|
appendCredentialSafeProviderGuidance(lines, placement, suggestion);
|
|
2278
2292
|
}
|
|
@@ -2283,8 +2297,8 @@ function buildDeepSeekCompatWarningText(key: string, missing: string[]): string
|
|
|
2283
2297
|
const modelId = slashIdx > 0 ? key.slice(slashIdx + 1) : undefined;
|
|
2284
2298
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
2285
2299
|
const lines: string[] = [
|
|
2286
|
-
`💡 omp-cache-optimizer
|
|
2287
|
-
|
|
2300
|
+
`💡 omp-cache-optimizer:${key} 看起来是 DeepSeek 风格模型,但合并后的 compat 缺少 ${missing.join(" 和 ")}。`,
|
|
2301
|
+
`这可能让代理降低或隐藏缓存命中。编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`,
|
|
2288
2302
|
"",
|
|
2289
2303
|
];
|
|
2290
2304
|
|
|
@@ -2332,8 +2346,8 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
2332
2346
|
if (getCompat(model).cacheControlFormat === "anthropic") return undefined;
|
|
2333
2347
|
|
|
2334
2348
|
return (
|
|
2335
|
-
`💡
|
|
2336
|
-
"
|
|
2349
|
+
`💡 omp-cache-optimizer:${modelKey(model)} 看起来是 Claude/Anthropic 风格模型,但 OpenAI 兼容 compat 缺少 cacheControlFormat: "anthropic"。` +
|
|
2350
|
+
"只有当 endpoint 支持并启用了这个 compat 字段时,OMP 才能放置 Anthropic 的 cache_control 断点。"
|
|
2337
2351
|
);
|
|
2338
2352
|
},
|
|
2339
2353
|
},
|
|
@@ -3330,30 +3344,26 @@ function formatTokenCount(value: number): string {
|
|
|
3330
3344
|
return `${millions.toFixed(2)}M`;
|
|
3331
3345
|
}
|
|
3332
3346
|
|
|
3347
|
+
function localizeAdapterLabel(label: string): string {
|
|
3348
|
+
return label.endsWith(" cache") ? `${label.slice(0, -6)} 缓存` : label;
|
|
3349
|
+
}
|
|
3350
|
+
|
|
3333
3351
|
function formatCacheStats(adapter: CacheProviderAdapter, stats: CacheStats): string {
|
|
3334
3352
|
const percent = stats.totalInputTokens > 0
|
|
3335
3353
|
? ` (${Math.round((stats.cachedInputTokens / stats.totalInputTokens) * 100)}%)`
|
|
3336
3354
|
: "";
|
|
3337
3355
|
const writeText = adapter.showCacheWrite && stats.cacheWriteInputTokens > 0
|
|
3338
|
-
? ` ·
|
|
3356
|
+
? ` · 写入 ${formatTokenCount(stats.cacheWriteInputTokens)} tok`
|
|
3339
3357
|
: "";
|
|
3340
3358
|
|
|
3341
|
-
return `${adapter.label} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
|
|
3359
|
+
return `${localizeAdapterLabel(adapter.label)} ${stats.hitRequests}/${stats.totalRequests} · ${formatTokenCount(stats.cachedInputTokens)}/${formatTokenCount(stats.totalInputTokens)} tok${percent}${writeText}`;
|
|
3342
3360
|
}
|
|
3343
3361
|
|
|
3344
|
-
/**
|
|
3345
|
-
* Compute a hit-ratio percentage string for a value between 0 and 1.
|
|
3346
|
-
* Returns e.g. "75%", "0%", "100%", or "N/A" for zero total.
|
|
3347
|
-
*/
|
|
3348
3362
|
function formatHitRatio(hits: number, total: number): string {
|
|
3349
|
-
if (total <= 0) return "
|
|
3363
|
+
if (total <= 0) return "无数据";
|
|
3350
3364
|
return `${Math.round((hits / total) * 100)}%`;
|
|
3351
3365
|
}
|
|
3352
3366
|
|
|
3353
|
-
/**
|
|
3354
|
-
* Format a token-to-M abbreviation for stats output.
|
|
3355
|
-
* Example: 1500000 → "1.50M"
|
|
3356
|
-
*/
|
|
3357
3367
|
function formatTokenM(value: number): string {
|
|
3358
3368
|
const millions = Math.max(0, Math.round(value)) / 1_000_000;
|
|
3359
3369
|
if (millions === 0) return "0";
|
|
@@ -3362,27 +3372,18 @@ function formatTokenM(value: number): string {
|
|
|
3362
3372
|
return millions.toFixed(2);
|
|
3363
3373
|
}
|
|
3364
3374
|
|
|
3365
|
-
/**
|
|
3366
|
-
* Check if an assistant message's usage fields appear to be missing or empty.
|
|
3367
|
-
* Returns true when normalized fields (input, cacheRead, cacheWrite) are all
|
|
3368
|
-
* absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
|
|
3369
|
-
* for the given adapter.
|
|
3370
|
-
*/
|
|
3371
3375
|
function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter): boolean {
|
|
3372
3376
|
const usage = usageRecordFromAssistant(message);
|
|
3373
3377
|
if (!usage) return true;
|
|
3374
3378
|
|
|
3375
|
-
// Check normalized fields
|
|
3376
3379
|
const input = getNonNegativeNumber(usage, "input");
|
|
3377
3380
|
const cacheRead = getNonNegativeNumber(usage, "cacheRead");
|
|
3378
3381
|
const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
|
|
3379
3382
|
|
|
3380
|
-
// If normalized fields exist with non-zero values, usage is present
|
|
3381
3383
|
if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
|
|
3382
3384
|
return false;
|
|
3383
3385
|
}
|
|
3384
3386
|
|
|
3385
|
-
// Check raw usage for the adapter's provider family
|
|
3386
3387
|
const rawUsage = adapter.normalizeUsage(message);
|
|
3387
3388
|
if (!rawUsage || (rawUsage.cacheRead === 0 && rawUsage.cacheWrite === 0 && rawUsage.totalInput === 0)) {
|
|
3388
3389
|
return true;
|
|
@@ -3391,64 +3392,55 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
|
|
|
3391
3392
|
return false;
|
|
3392
3393
|
}
|
|
3393
3394
|
|
|
3394
|
-
/**
|
|
3395
|
-
* Build a summary string for the recent trend (last N samples).
|
|
3396
|
-
* Example: "Recent 10: 7/10 hits · 65% tok cached · no missing usage"
|
|
3397
|
-
*/
|
|
3398
3395
|
function formatRecentTrendSummary(samples: CacheUsageSample[], maxCount: number): string {
|
|
3399
3396
|
const recent = samples.slice(-maxCount);
|
|
3400
|
-
if (recent.length === 0) return
|
|
3397
|
+
if (recent.length === 0) return `最近 ${maxCount} 次:暂无样本`;
|
|
3401
3398
|
|
|
3402
3399
|
const hits = recent.filter((s) => s.hit).length;
|
|
3403
3400
|
const totalCached = recent.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
3404
3401
|
const totalInput = recent.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
3405
3402
|
const missingCount = recent.filter((s) => s.missingUsageFields).length;
|
|
3406
3403
|
|
|
3407
|
-
const
|
|
3408
|
-
const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "N/A";
|
|
3404
|
+
const tokenRatio = totalInput > 0 ? formatHitRatio(totalCached, totalInput) : "无数据";
|
|
3409
3405
|
|
|
3410
|
-
let result =
|
|
3406
|
+
let result = `最近 ${recent.length}/${maxCount} 次:${hits}/${recent.length} 次命中 · ${tokenRatio} tok 已缓存`;
|
|
3411
3407
|
if (missingCount > 0) {
|
|
3412
|
-
result += ` · ${missingCount}
|
|
3408
|
+
result += ` · ${missingCount} 条 usage 缺失`;
|
|
3413
3409
|
}
|
|
3414
3410
|
return result;
|
|
3415
3411
|
}
|
|
3416
3412
|
|
|
3417
|
-
/**
|
|
3418
|
-
* Build the output for `/cache-optimizer stats`.
|
|
3419
|
-
*/
|
|
3420
3413
|
function buildStatsOutput(model: PiModel | undefined, adapter: CacheProviderAdapter | undefined, stats: CacheStats | undefined, recentSamples: CacheUsageSample[]): string {
|
|
3421
3414
|
const lines: string[] = [];
|
|
3422
3415
|
|
|
3423
3416
|
if (!model || !adapter) {
|
|
3424
|
-
lines.push("ℹ️
|
|
3417
|
+
lines.push("ℹ️ 当前活动模型未匹配到缓存适配器。请选择可识别模型家族后再查看统计。");
|
|
3425
3418
|
return lines.join("\n");
|
|
3426
3419
|
}
|
|
3427
3420
|
|
|
3428
3421
|
const key = modelKey(model);
|
|
3429
3422
|
const currentStats = stats ?? emptyCacheStats();
|
|
3430
3423
|
|
|
3431
|
-
lines.push(
|
|
3432
|
-
lines.push(
|
|
3424
|
+
lines.push(`模型键:${key}`);
|
|
3425
|
+
lines.push(`适配器:${localizeAdapterLabel(adapter.label)}`);
|
|
3433
3426
|
lines.push("");
|
|
3434
|
-
lines.push("──
|
|
3435
|
-
lines.push(
|
|
3436
|
-
lines.push(
|
|
3427
|
+
lines.push("── 今日 ──");
|
|
3428
|
+
lines.push(`请求数:${currentStats.hitRequests} 次命中 / ${currentStats.totalRequests} 次总计 · ${formatHitRatio(currentStats.hitRequests, currentStats.totalRequests)}`);
|
|
3429
|
+
lines.push(`缓存 tokens:${formatTokenM(currentStats.cachedInputTokens)}M / ${formatTokenM(currentStats.totalInputTokens)}M 输入 · ${currentStats.totalInputTokens > 0 ? `${Math.round((currentStats.cachedInputTokens / currentStats.totalInputTokens) * 100)}%` : "无数据"}`);
|
|
3437
3430
|
if (currentStats.cacheWriteInputTokens > 0) {
|
|
3438
|
-
lines.push(
|
|
3431
|
+
lines.push(`缓存写入:${formatTokenM(currentStats.cacheWriteInputTokens)}M tok`);
|
|
3439
3432
|
}
|
|
3440
3433
|
|
|
3441
3434
|
lines.push("");
|
|
3442
|
-
lines.push("──
|
|
3435
|
+
lines.push("── 近期趋势 ──");
|
|
3443
3436
|
lines.push(formatRecentTrendSummary(recentSamples, 10));
|
|
3444
3437
|
lines.push(formatRecentTrendSummary(recentSamples, 30));
|
|
3445
3438
|
|
|
3446
|
-
// Check if any sample has missingUsageFields flagged
|
|
3447
3439
|
const missingAny = recentSamples.some((s) => s.missingUsageFields);
|
|
3448
3440
|
if (missingAny) {
|
|
3449
3441
|
lines.push("");
|
|
3450
|
-
lines.push("⚠️
|
|
3451
|
-
lines.push("
|
|
3442
|
+
lines.push("⚠️ 近期有响应缺少或返回了空的缓存 usage 字段,footer 命中率可能偏低。");
|
|
3443
|
+
lines.push(" 代理可能没有返回 prompt_cache_hit_tokens,或没有返回 usage.input/cacheRead 等字段。");
|
|
3452
3444
|
}
|
|
3453
3445
|
|
|
3454
3446
|
return lines.join("\n");
|
|
@@ -3889,19 +3881,14 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3889
3881
|
const baseUrl = lower(model.baseUrl || "");
|
|
3890
3882
|
const provider = lower(model.provider);
|
|
3891
3883
|
|
|
3892
|
-
// Router/channel diagnostics only apply to OpenAI-compatible proxy APIs.
|
|
3893
|
-
// Native APIs like mistral-conversations, azure-openai-responses,
|
|
3894
|
-
// anthropic-messages, or bedrock-converse-stream are intentionally excluded.
|
|
3895
3884
|
if (api === "azure-openai-responses" || isMistralConversationsApi(api) || !isOpenAICompatibleApi(api)) {
|
|
3896
3885
|
return notes;
|
|
3897
3886
|
}
|
|
3898
3887
|
|
|
3899
|
-
// Official OpenAI bypass — no notes needed.
|
|
3900
3888
|
if (isOfficialOpenAIBaseUrl(model)) {
|
|
3901
3889
|
return notes;
|
|
3902
3890
|
}
|
|
3903
3891
|
|
|
3904
|
-
// ── 1. OpenRouter ────────────────────────────────────────────────
|
|
3905
3892
|
if (
|
|
3906
3893
|
baseUrl.includes("openrouter.ai") ||
|
|
3907
3894
|
baseUrl.includes("openrouter") ||
|
|
@@ -3913,32 +3900,28 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3913
3900
|
const hasOrder = !!routing?.order;
|
|
3914
3901
|
|
|
3915
3902
|
notes.push(
|
|
3916
|
-
"🔀
|
|
3917
|
-
"low cache hit rates are common when each turn lands on a different upstream provider.",
|
|
3903
|
+
"🔀 路由/渠道:检测到 OpenRouter。OpenRouter 是多上游路由器;如果每一轮落到不同上游,缓存命中率偏低很常见。",
|
|
3918
3904
|
);
|
|
3919
3905
|
|
|
3920
3906
|
if (!hasOnly && !hasOrder) {
|
|
3921
3907
|
notes.push(
|
|
3922
|
-
|
|
3923
|
-
"Example for models.yml -> providers[\"<providerId>\"] -> compat:",
|
|
3908
|
+
' 建议:添加 openRouterRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
|
|
3924
3909
|
);
|
|
3925
3910
|
notes.push(
|
|
3926
3911
|
` { "supportsLongPromptCacheRetention": true, ` +
|
|
3927
3912
|
`"openRouterRouting": { "only": ["<provider-slug>"] } }`,
|
|
3928
3913
|
);
|
|
3929
3914
|
notes.push(
|
|
3930
|
-
'
|
|
3915
|
+
' 把 <provider-slug> 替换成真实的 OpenRouter provider slug(如 "openai"、"anthropic")。',
|
|
3931
3916
|
);
|
|
3932
3917
|
notes.push(
|
|
3933
|
-
|
|
3934
|
-
"Only set supportsLongPromptCacheRetention if your upstream supports long cache retention.",
|
|
3918
|
+
' 也可以用 openRouterRouting.order: ["<provider-slug>", "..."] 作为回退顺序。只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention。',
|
|
3935
3919
|
);
|
|
3936
3920
|
}
|
|
3937
3921
|
|
|
3938
3922
|
return notes;
|
|
3939
3923
|
}
|
|
3940
3924
|
|
|
3941
|
-
// ── 2. Vercel AI Gateway ─────────────────────────────────────────
|
|
3942
3925
|
if (
|
|
3943
3926
|
baseUrl.includes("ai-gateway.vercel.sh") ||
|
|
3944
3927
|
provider.includes("vercel") ||
|
|
@@ -3950,81 +3933,54 @@ function describeRouterChannelDiagnostics(model: PiModel): string[] {
|
|
|
3950
3933
|
const hasOrder = !!routing?.order;
|
|
3951
3934
|
|
|
3952
3935
|
notes.push(
|
|
3953
|
-
"🔀
|
|
3954
|
-
"provider endpoints per request, reducing cache locality.",
|
|
3936
|
+
"🔀 路由/渠道:检测到 Vercel AI Gateway。这个网关可能把不同请求分发到不同 provider endpoint,降低缓存局部性。",
|
|
3955
3937
|
);
|
|
3956
3938
|
|
|
3957
3939
|
if (!hasOnly && !hasOrder) {
|
|
3958
3940
|
notes.push(
|
|
3959
|
-
|
|
3960
|
-
"Example for models.yml -> providers[\"<providerId>\"] -> compat:",
|
|
3941
|
+
' 建议:添加 vercelGatewayRouting,把上游固定住。位置:models.yml -> providers["<providerId>"] -> compat:',
|
|
3961
3942
|
);
|
|
3962
3943
|
notes.push(
|
|
3963
3944
|
` { "supportsLongPromptCacheRetention": true, ` +
|
|
3964
3945
|
`"vercelGatewayRouting": { "only": ["<provider-id>"] } }`,
|
|
3965
3946
|
);
|
|
3966
3947
|
notes.push(
|
|
3967
|
-
|
|
3948
|
+
' 把 <provider-id> 替换成真实的 Vercel provider ID(如 "openai")。',
|
|
3968
3949
|
);
|
|
3969
3950
|
notes.push(
|
|
3970
|
-
"
|
|
3951
|
+
" 只有在上游支持长缓存保留时才设置 supportsLongPromptCacheRetention。",
|
|
3971
3952
|
);
|
|
3972
3953
|
}
|
|
3973
3954
|
|
|
3974
3955
|
return notes;
|
|
3975
3956
|
}
|
|
3976
3957
|
|
|
3977
|
-
// ── 3. LiteLLM / OneAPI / NewAPI / VoAPI (self-hosted aggregation) ──
|
|
3978
3958
|
const aggregationPatterns = ["litellm", "oneapi", "one-api", "newapi", "new-api", "voapi", "vo-api"];
|
|
3979
3959
|
if (
|
|
3980
3960
|
aggregationPatterns.some((p) => baseUrl.includes(p)) ||
|
|
3981
3961
|
aggregationPatterns.some((p) => provider.includes(p))
|
|
3982
3962
|
) {
|
|
3983
3963
|
notes.push(
|
|
3984
|
-
"🔀
|
|
3985
|
-
"These proxies route to multiple upstream accounts or instances, which can split the cache.",
|
|
3986
|
-
);
|
|
3987
|
-
notes.push(
|
|
3988
|
-
" Suggestions:",
|
|
3989
|
-
);
|
|
3990
|
-
notes.push(
|
|
3991
|
-
" • Ensure the proxy can fix to a single upstream per session (session_id affinity).",
|
|
3992
|
-
);
|
|
3993
|
-
notes.push(
|
|
3994
|
-
" • Forward prompt_cache_key and session-affinity headers to the upstream.",
|
|
3995
|
-
);
|
|
3996
|
-
notes.push(
|
|
3997
|
-
" • Return cache usage fields (prompt_cache_hit_tokens, etc.) in the response.",
|
|
3998
|
-
);
|
|
3999
|
-
notes.push(
|
|
4000
|
-
` Safe compat default: { "supportsLongPromptCacheRetention": true }`,
|
|
4001
|
-
);
|
|
4002
|
-
notes.push(
|
|
4003
|
-
` Add supportsLongPromptCacheRetention only if the proxy explicitly supports prompt_cache_retention.`,
|
|
3964
|
+
"🔀 路由/渠道:检测到自建聚合代理(LiteLLM / OneAPI / NewAPI / VoAPI)。这类代理常把请求分到多个上游账号或实例,导致缓存被拆散。",
|
|
4004
3965
|
);
|
|
3966
|
+
notes.push(" 建议:");
|
|
3967
|
+
notes.push(" • 确保代理能按 session 固定到单一上游(session_id affinity)。");
|
|
3968
|
+
notes.push(" • 向上游透传 prompt_cache_key 与会话亲和性相关 header。");
|
|
3969
|
+
notes.push(" • 在响应里返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
|
|
3970
|
+
notes.push(` 可作为起点的 compat:{ "supportsLongPromptCacheRetention": true }`);
|
|
3971
|
+
notes.push(" 只有在代理明确支持 prompt_cache_retention 时才加 supportsLongPromptCacheRetention。");
|
|
4005
3972
|
|
|
4006
3973
|
return notes;
|
|
4007
3974
|
}
|
|
4008
3975
|
|
|
4009
|
-
// ── 4. Generic third-party OpenAI-compatible proxy ─────────────────
|
|
4010
3976
|
if (api === "openai-completions" && baseUrl) {
|
|
4011
3977
|
const missing = describeMissingCacheCompatForModel(model);
|
|
4012
|
-
notes.push(
|
|
4013
|
-
|
|
4014
|
-
);
|
|
4015
|
-
notes.push(
|
|
4016
|
-
" • Verify the proxy routes to the same upstream account/instance per session.",
|
|
4017
|
-
);
|
|
4018
|
-
notes.push(
|
|
4019
|
-
" • Ensure the proxy forwards prompt_cache_key and sends session-affinity headers.",
|
|
4020
|
-
);
|
|
4021
|
-
notes.push(
|
|
4022
|
-
" • Check that the proxy returns cache usage fields (prompt_cache_hit_tokens etc.).",
|
|
4023
|
-
);
|
|
3978
|
+
notes.push("🔀 路由/渠道:第三方 OpenAI 兼容代理。如果缓存命中率偏低:");
|
|
3979
|
+
notes.push(" • 确认代理会把同一 session 路由到同一个上游账号/实例。");
|
|
3980
|
+
notes.push(" • 确认代理会透传 prompt_cache_key,并发送会话亲和性相关 header。");
|
|
3981
|
+
notes.push(" • 确认代理会返回缓存 usage 字段(如 prompt_cache_hit_tokens)。");
|
|
4024
3982
|
if (missing.length > 0) {
|
|
4025
|
-
notes.push(
|
|
4026
|
-
` • The compat flags above (${missing.join(", ")}) are recommended for cache stability.`,
|
|
4027
|
-
);
|
|
3983
|
+
notes.push(` • 上面这些 compat 字段(${missing.join(", ")})有助于提升缓存稳定性。`);
|
|
4028
3984
|
}
|
|
4029
3985
|
|
|
4030
3986
|
return notes;
|
|
@@ -4038,38 +3994,38 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
|
|
|
4038
3994
|
|
|
4039
3995
|
if (isMistralConversationsApi(api)) {
|
|
4040
3996
|
return [
|
|
4041
|
-
"ℹ️
|
|
4042
|
-
"
|
|
3997
|
+
"ℹ️ 当前模型不适用 compat 检查。",
|
|
3998
|
+
" 原生 Mistral `mistral-conversations` 使用 provider 原生传输;OpenAI 兼容代理 compat 不适用。",
|
|
4043
3999
|
];
|
|
4044
4000
|
}
|
|
4045
4001
|
|
|
4046
4002
|
if (api === "azure-openai-responses") {
|
|
4047
4003
|
return [
|
|
4048
|
-
"ℹ️
|
|
4049
|
-
"
|
|
4004
|
+
"ℹ️ 当前模型不适用 compat 检查。",
|
|
4005
|
+
" 原生 Azure OpenAI Responses 使用 Responses 传输;OpenAI 兼容代理 compat 不适用。",
|
|
4050
4006
|
];
|
|
4051
4007
|
}
|
|
4052
4008
|
|
|
4053
4009
|
if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
|
|
4054
4010
|
return [
|
|
4055
|
-
"ℹ️
|
|
4056
|
-
"
|
|
4011
|
+
"ℹ️ 当前模型不适用 compat 检查。",
|
|
4012
|
+
" 原生 Responses 传输已经使用运行时核心请求链路;OpenAI 兼容代理 compat 不适用。",
|
|
4057
4013
|
];
|
|
4058
4014
|
}
|
|
4059
4015
|
|
|
4060
|
-
return ["ℹ️
|
|
4016
|
+
return ["ℹ️ 当前模型不适用 compat 检查。"];
|
|
4061
4017
|
}
|
|
4062
4018
|
|
|
4063
4019
|
function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400?: boolean } = {}): string {
|
|
4064
4020
|
const lines: string[] = [];
|
|
4065
|
-
lines.push(
|
|
4066
|
-
lines.push(
|
|
4067
|
-
if (model.name && model.name !== model.id) lines.push(
|
|
4068
|
-
lines.push(`API
|
|
4069
|
-
lines.push(`Base URL
|
|
4021
|
+
lines.push(`提供方:${model.provider}`);
|
|
4022
|
+
lines.push(`模型: ${model.id}`);
|
|
4023
|
+
if (model.name && model.name !== model.id) lines.push(`名称: ${model.name}`);
|
|
4024
|
+
lines.push(`API: ${model.api}`);
|
|
4025
|
+
lines.push(`Base URL: ${model.baseUrl || "(默认)"}`);
|
|
4070
4026
|
|
|
4071
4027
|
const compat = getCompat(model);
|
|
4072
|
-
lines.push(`Compat
|
|
4028
|
+
lines.push(`Compat: ${JSON.stringify(compat)}`);
|
|
4073
4029
|
|
|
4074
4030
|
const adaptiveThinkingApplicable = isAdaptiveThinkingCompatApplicable(model);
|
|
4075
4031
|
const deepSeekCompatApplicable = isDeepSeekCompatCheckApplicable(model);
|
|
@@ -4082,10 +4038,10 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4082
4038
|
const advisoryMissing = missing.filter(m => !safeFixableMissing.includes(m));
|
|
4083
4039
|
|
|
4084
4040
|
if (safeFixableMissing.length > 0) {
|
|
4085
|
-
lines.push(`⚠️
|
|
4041
|
+
lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissing.join(", ")}`);
|
|
4086
4042
|
}
|
|
4087
4043
|
if (advisoryMissing.length > 0) {
|
|
4088
|
-
lines.push(`ℹ️
|
|
4044
|
+
lines.push(`ℹ️ 可选项:${advisoryMissing.join(", ")}(仅在确认支持时启用)`);
|
|
4089
4045
|
}
|
|
4090
4046
|
|
|
4091
4047
|
if (missing.length > 0) {
|
|
@@ -4093,7 +4049,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4093
4049
|
const slashIdx = key.indexOf("/");
|
|
4094
4050
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
4095
4051
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
4096
|
-
lines.push(
|
|
4052
|
+
lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat(与 baseUrl/api/apiKey/models 同级)。`);
|
|
4097
4053
|
if (adaptiveThinkingApplicable) {
|
|
4098
4054
|
appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
4099
4055
|
} else if (deepSeekCompatApplicable) {
|
|
@@ -4103,7 +4059,7 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4103
4059
|
appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
|
|
4104
4060
|
}
|
|
4105
4061
|
} else if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
|
|
4106
|
-
lines.push("✅
|
|
4062
|
+
lines.push("✅ compat 配置完整。");
|
|
4107
4063
|
appendOptionalOpenAIProxyCompatAdviceLines(lines, optionalOpenAIProxyCompat);
|
|
4108
4064
|
} else {
|
|
4109
4065
|
lines.push(...getCompatCheckNotApplicableLines(model));
|
|
@@ -4112,14 +4068,13 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4112
4068
|
if (isPromptCacheRetention400Applicable(model)) {
|
|
4113
4069
|
lines.push("");
|
|
4114
4070
|
if (options.promptCacheRetention400) {
|
|
4115
|
-
lines.push("⚠️
|
|
4071
|
+
lines.push("⚠️ 在启用 supportsLongPromptCacheRetention 时观测到一次 400 响应。");
|
|
4116
4072
|
lines.push(` ${getPromptCacheRetentionUnsupportedHint()}`);
|
|
4117
4073
|
} else {
|
|
4118
|
-
lines.push(`ℹ️
|
|
4074
|
+
lines.push(`ℹ️ 已启用长缓存保留。${getPromptCacheRetentionUnsupportedHint()}`);
|
|
4119
4075
|
}
|
|
4120
4076
|
}
|
|
4121
4077
|
|
|
4122
|
-
// ── Router/channel diagnostics ──
|
|
4123
4078
|
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
4124
4079
|
if (routerNotes.length > 0) {
|
|
4125
4080
|
lines.push("");
|
|
@@ -4128,31 +4083,24 @@ function buildDoctorDiagnosis(model: PiModel, options: { promptCacheRetention400
|
|
|
4128
4083
|
}
|
|
4129
4084
|
}
|
|
4130
4085
|
|
|
4131
|
-
// ── Integrity diagnostics ──
|
|
4132
4086
|
if (lastPromptIntegrityWarningAt > 0) {
|
|
4133
4087
|
const ago = Date.now() - lastPromptIntegrityWarningAt;
|
|
4134
4088
|
const mins = Math.floor(ago / 60000);
|
|
4135
4089
|
if (mins < 5) {
|
|
4136
4090
|
lines.push("");
|
|
4137
|
-
lines.push("⚠️
|
|
4138
|
-
lines.push(`
|
|
4139
|
-
lines.push(
|
|
4140
|
-
lines.push(
|
|
4141
|
-
lines.push(
|
|
4142
|
-
lines.push(
|
|
4143
|
-
lines.push(
|
|
4144
|
-
lines.push(` 3. If persistent, file an issue with this doctor output.`);
|
|
4091
|
+
lines.push("⚠️ 最近检测到 prompt 完整性问题:");
|
|
4092
|
+
lines.push(` 最近一次检测于 ${mins > 0 ? `${mins} 分钟` : `${Math.floor(ago / 1000)} 秒`}前;该轮已跳过 prompt 重排以保留结构标记。`);
|
|
4093
|
+
lines.push(" 常见原因:扩展的 system prompt 格式变化,或子串碰撞。");
|
|
4094
|
+
lines.push(" 建议步骤:");
|
|
4095
|
+
lines.push(" 1. 运行 /reload 重置(可清除瞬态问题)。");
|
|
4096
|
+
lines.push(" 2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 后 /reload,禁用重排。");
|
|
4097
|
+
lines.push(" 3. 若持续复现,请带 doctor 输出提 issue。");
|
|
4145
4098
|
}
|
|
4146
4099
|
}
|
|
4147
4100
|
|
|
4148
4101
|
return lines.join("\n");
|
|
4149
4102
|
}
|
|
4150
4103
|
|
|
4151
|
-
/**
|
|
4152
|
-
* Build a "Cache diagnosis" section for low-hit causes, appended to doctor output.
|
|
4153
|
-
* This is a separate function because it depends on per-session state (recent samples,
|
|
4154
|
-
* per-model stats) that is not available at the module level.
|
|
4155
|
-
*/
|
|
4156
4104
|
function buildLowHitDiagnosis(
|
|
4157
4105
|
model: PiModel,
|
|
4158
4106
|
adapter: CacheProviderAdapter | undefined,
|
|
@@ -4161,101 +4109,74 @@ function buildLowHitDiagnosis(
|
|
|
4161
4109
|
): string[] {
|
|
4162
4110
|
const lines: string[] = [];
|
|
4163
4111
|
|
|
4164
|
-
// 1. Missing compat flags (adapter-aware: DeepSeek has extra reasoning compat)
|
|
4165
4112
|
const fixSugLHD = buildFixSuggestion(model);
|
|
4166
4113
|
const safeFixableMissingLHD = fixSugLHD ? Object.keys(fixSugLHD.compatKeys) : [];
|
|
4167
|
-
|
|
4168
|
-
// 2. Router/channel risk (reuse existing check)
|
|
4169
4114
|
const routerNotes = describeRouterChannelDiagnostics(model);
|
|
4170
|
-
|
|
4171
|
-
// 3. Recent samples missing usage fields
|
|
4172
4115
|
const missingUsageSamples = samples.filter((s) => s.missingUsageFields).length;
|
|
4173
|
-
|
|
4174
|
-
// 4. Recent trend analysis
|
|
4175
4116
|
const recent10 = samples.slice(-10);
|
|
4176
4117
|
const recent10Hits = recent10.filter((s) => s.hit).length;
|
|
4177
4118
|
const recent10Total = recent10.length;
|
|
4178
4119
|
const recent10Cached = recent10.reduce((sum, s) => sum + s.cachedInputTokens, 0);
|
|
4179
4120
|
const recent10Input = recent10.reduce((sum, s) => sum + s.totalInputTokens, 0);
|
|
4180
|
-
|
|
4181
|
-
// 5. Today's overall trend from persisted stats
|
|
4182
4121
|
const todayStats = stats ?? emptyCacheStats();
|
|
4183
4122
|
|
|
4184
4123
|
const hasMissingCompat = safeFixableMissingLHD.length > 0;
|
|
4185
4124
|
const hasRouterRisk = routerNotes.length > 0;
|
|
4186
4125
|
const hasUsageMissing = missingUsageSamples > 0;
|
|
4187
|
-
|
|
4188
|
-
// Today's cached-token ratio is used both inside and outside the recent-sample
|
|
4189
|
-
// branch. Keep it block-external so doctor/stats never throw for low-hit
|
|
4190
|
-
// models that have persisted counters but no recent in-memory samples.
|
|
4191
4126
|
const todayHitRatio = todayStats.totalInputTokens > 0
|
|
4192
4127
|
? Math.round((todayStats.cachedInputTokens / todayStats.totalInputTokens) * 100)
|
|
4193
4128
|
: 0;
|
|
4194
4129
|
|
|
4195
|
-
// Determine if there are actual issues worth flagging
|
|
4196
4130
|
const hasActualIssues = hasMissingCompat || hasUsageMissing ||
|
|
4197
|
-
// Low hit trend (today total > 3 and hit ratio < 30%)
|
|
4198
4131
|
(todayStats.totalRequests > 3 && todayStats.totalInputTokens > 0 &&
|
|
4199
4132
|
(todayStats.cachedInputTokens / todayStats.totalInputTokens) < 0.3) ||
|
|
4200
|
-
// Low hit rate in recent samples (recent10Total >= 3 and all misses)
|
|
4201
4133
|
(recent10Total >= 3 && recent10Hits === 0);
|
|
4202
4134
|
|
|
4203
|
-
// Skip section if no issues
|
|
4204
4135
|
if (!hasActualIssues && !(hasRouterRisk && (hasMissingCompat || hasUsageMissing))) {
|
|
4205
4136
|
return lines;
|
|
4206
4137
|
}
|
|
4207
4138
|
|
|
4208
4139
|
lines.push("");
|
|
4209
|
-
lines.push("──
|
|
4140
|
+
lines.push("── 缓存诊断 ──");
|
|
4210
4141
|
|
|
4211
|
-
// Priority 1: missing compat flags
|
|
4212
4142
|
if (hasMissingCompat) {
|
|
4213
|
-
lines.push(`⚠️
|
|
4214
|
-
lines.push("
|
|
4215
|
-
lines.push("
|
|
4143
|
+
lines.push(`⚠️ 缺少 compat 字段:${safeFixableMissingLHD.join(", ")}`);
|
|
4144
|
+
lines.push(" 这些字段有助于稳定 prompt 缓存与上游路由粘性。");
|
|
4145
|
+
lines.push(" 可运行 /cache-optimizer compat 查看编辑建议。");
|
|
4216
4146
|
}
|
|
4217
4147
|
|
|
4218
|
-
// Priority 2: router/channel risk (only flag when there are other issues)
|
|
4219
|
-
// Router notes are already shown in the main doctor output, so we only
|
|
4220
|
-
// mention them in the diagnosis section when they compound a problem.
|
|
4221
4148
|
if (hasRouterRisk && (hasMissingCompat || hasUsageMissing || hasActualIssues)) {
|
|
4222
|
-
lines.push("🔀
|
|
4149
|
+
lines.push("🔀 检测到路由/代理风险 —— 详见上方路由诊断。");
|
|
4223
4150
|
}
|
|
4224
4151
|
|
|
4225
|
-
// Priority 3: usage fields missing
|
|
4226
4152
|
if (hasUsageMissing) {
|
|
4227
|
-
lines.push(`⚠️
|
|
4228
|
-
lines.push(" Footer
|
|
4229
|
-
lines.push("
|
|
4153
|
+
lines.push(`⚠️ 最近 ${samples.length} 条样本里有 ${missingUsageSamples} 条缺少或返回了空的 usage 字段。`);
|
|
4154
|
+
lines.push(" Footer 命中率可能会被低估。");
|
|
4155
|
+
lines.push(" 请确认代理会返回 prompt 级 usage(如 prompt_tokens、input_tokens_details)。");
|
|
4230
4156
|
}
|
|
4231
4157
|
|
|
4232
|
-
// Priority 4: recent trend low
|
|
4233
4158
|
if (recent10Total > 0) {
|
|
4234
|
-
const hitRatio = recent10Input > 0 ? Math.round((recent10Cached / recent10Input) * 100) : 0;
|
|
4235
4159
|
if (recent10Hits === 0 && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
4236
|
-
lines.push(`📉
|
|
4237
|
-
lines.push("
|
|
4238
|
-
lines.push("
|
|
4239
|
-
lines.push(" Verify upstream routing stickiness and supportsLongPromptCacheRetention compat.");
|
|
4160
|
+
lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(最近 ${recent10Total} 条样本)。`);
|
|
4161
|
+
lines.push(" 常见原因:代理把请求路由到不同后端,或 prompt 前缀在各轮之间变化。");
|
|
4162
|
+
lines.push(" 请检查上游路由粘性,以及 supportsLongPromptCacheRetention 配置是否正确。");
|
|
4240
4163
|
} else if (todayHitRatio < 30 && todayStats.totalRequests > 3) {
|
|
4241
|
-
lines.push(`📉
|
|
4242
|
-
lines.push("
|
|
4164
|
+
lines.push(`📉 今日缓存命中率偏低:${todayHitRatio}%(共 ${todayStats.totalRequests} 次请求)。`);
|
|
4165
|
+
lines.push(" 请检查 compat 配置与代理上游路由。");
|
|
4243
4166
|
}
|
|
4244
4167
|
|
|
4245
|
-
// Show brief trend summary if there are enough samples
|
|
4246
4168
|
if (recent10Total >= 3) {
|
|
4247
4169
|
const trend = formatRecentTrendSummary(samples, 10);
|
|
4248
4170
|
lines.push(`📊 ${trend}`);
|
|
4249
4171
|
}
|
|
4250
4172
|
}
|
|
4251
4173
|
|
|
4252
|
-
// For fully configured but low hit models, emphasize sticky routing
|
|
4253
4174
|
if (!hasMissingCompat && !hasRouterRisk && todayStats.totalRequests > 3 && todayHitRatio < 30) {
|
|
4254
|
-
lines.push("💡
|
|
4255
|
-
lines.push("
|
|
4256
|
-
lines.push(" •
|
|
4257
|
-
lines.push(" •
|
|
4258
|
-
lines.push(" •
|
|
4175
|
+
lines.push("💡 compat 已配置完整,但缓存命中率仍然偏低。");
|
|
4176
|
+
lines.push(" 可能原因:");
|
|
4177
|
+
lines.push(" • 代理仍把请求分发到多个后端 —— 请检查代理侧的会话粘性。");
|
|
4178
|
+
lines.push(" • prompt 前缀每轮都在变化 —— 请检查 system prompt 中的动态上下文。");
|
|
4179
|
+
lines.push(" • provider 没有返回缓存 usage 字段 —— footer 无法准确测量命中。");
|
|
4259
4180
|
}
|
|
4260
4181
|
|
|
4261
4182
|
return lines;
|
|
@@ -4282,16 +4203,16 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
4282
4203
|
const slashIdx = key.indexOf("/");
|
|
4283
4204
|
const providerLabel = slashIdx > 0 ? key.slice(0, slashIdx) : key;
|
|
4284
4205
|
const modelsJsonPath = getModelsJsonDisplayPath();
|
|
4285
|
-
lines.push(
|
|
4206
|
+
lines.push(`当前模型:${key}`);
|
|
4286
4207
|
if (safeFixableMissingC.length > 0) {
|
|
4287
|
-
lines.push(
|
|
4208
|
+
lines.push(`可安全修复:${safeFixableMissingC.join(", ")}`);
|
|
4288
4209
|
}
|
|
4289
4210
|
if (advisoryMissingC.length > 0) {
|
|
4290
|
-
lines.push(
|
|
4211
|
+
lines.push(`可选项:${advisoryMissingC.join(", ")}(仅在确认支持时启用)`);
|
|
4291
4212
|
}
|
|
4292
4213
|
lines.push("");
|
|
4293
|
-
lines.push(
|
|
4294
|
-
lines.push(
|
|
4214
|
+
lines.push(`编辑 ${modelsJsonPath} -> providers["${providerLabel}"] -> compat`);
|
|
4215
|
+
lines.push("(与 baseUrl/api/apiKey/models 同级)。");
|
|
4295
4216
|
if (adaptiveThinkingApplicable) {
|
|
4296
4217
|
appendAdaptiveThinkingCompatAdviceLines(lines, missing, { providerLabel, modelId: model.id });
|
|
4297
4218
|
} else if (deepSeekCompatApplicable) {
|
|
@@ -4302,10 +4223,9 @@ function buildCompatDiagnosis(model: PiModel): string | undefined {
|
|
|
4302
4223
|
}
|
|
4303
4224
|
}
|
|
4304
4225
|
|
|
4305
|
-
// When compat is fully configured but router/optional notes exist, prefix the status.
|
|
4306
4226
|
if ((routerNotes.length > 0 || optionalOpenAIProxyCompat.length > 0) && missing.length === 0) {
|
|
4307
4227
|
if (adaptiveThinkingApplicable || deepSeekCompatApplicable || isCompatCheckApplicable(model)) {
|
|
4308
|
-
lines.push("✅
|
|
4228
|
+
lines.push("✅ compat 配置完整。");
|
|
4309
4229
|
if (isPromptCacheRetention400Applicable(model)) {
|
|
4310
4230
|
lines.push(getPromptCacheRetentionUnsupportedHint());
|
|
4311
4231
|
}
|
|
@@ -5422,6 +5342,10 @@ export const __internals_for_tests = {
|
|
|
5422
5342
|
hashSessionId,
|
|
5423
5343
|
makeSessionModelKey,
|
|
5424
5344
|
modelKeyFromSessionKey,
|
|
5345
|
+
makePromptRewriteContextKey,
|
|
5346
|
+
rememberPromptRewriteContext,
|
|
5347
|
+
getPromptRewriteContext,
|
|
5348
|
+
PROMPT_REWRITE_CONTEXT_TTL_MS,
|
|
5425
5349
|
filterRestorableStatsForSession,
|
|
5426
5350
|
parsePersistedRoutedModelRef,
|
|
5427
5351
|
routedModelRefToPiModel,
|
|
@@ -5495,11 +5419,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
5495
5419
|
let latestCacheHint: PiCacheHintSnapshot | undefined;
|
|
5496
5420
|
// OMP divergence: prompt rewriting moved from before_agent_start to
|
|
5497
5421
|
// before_provider_request (OMP's before_agent_start can only inject messages,
|
|
5498
|
-
// not mutate systemPrompt).
|
|
5499
|
-
//
|
|
5500
|
-
|
|
5501
|
-
|
|
5502
|
-
let pendingRoutedModel: PiModel | undefined;
|
|
5422
|
+
// not mutate systemPrompt). Store prompt options per session/model so an
|
|
5423
|
+
// overlapping turn or sub-agent cannot overwrite another request's rewrite
|
|
5424
|
+
// context before before_provider_request fires.
|
|
5425
|
+
const promptRewriteContexts = new Map<string, PromptRewriteContext>();
|
|
5503
5426
|
const PERSIST_DEBOUNCE_MS = 2000;
|
|
5504
5427
|
/** In-memory recent usage samples per model key (not persisted, cleared on reload). */
|
|
5505
5428
|
const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
|
|
@@ -5823,7 +5746,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5823
5746
|
const statsText = formatCacheStats(realEntry.adapter, realEntry.stats);
|
|
5824
5747
|
statusText = runtimeOptimizerEnabled
|
|
5825
5748
|
? statsText
|
|
5826
|
-
:
|
|
5749
|
+
: `缓存优化已关闭 · ${statsText}`;
|
|
5827
5750
|
}
|
|
5828
5751
|
}
|
|
5829
5752
|
|
|
@@ -5834,7 +5757,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5834
5757
|
const sk = displayModel ? sessionModelKey(displayModel) : undefined;
|
|
5835
5758
|
const stats = sk ? cacheStatsByModel[sk] : undefined;
|
|
5836
5759
|
const statsText = formatCacheStats(adapter, stats ?? emptyCacheStats());
|
|
5837
|
-
statusText = runtimeOptimizerEnabled ? statsText :
|
|
5760
|
+
statusText = runtimeOptimizerEnabled ? statsText : `缓存优化已关闭 · ${statsText}`;
|
|
5838
5761
|
}
|
|
5839
5762
|
|
|
5840
5763
|
// If optimizeSystemPrompt detected structural truncation on this or
|
|
@@ -5842,7 +5765,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5842
5765
|
// /reload before continuing. The flag resets after emission so a
|
|
5843
5766
|
// single-turn glitch does not permanently taint the footer.
|
|
5844
5767
|
if (promptTruncationDetected && statusText !== undefined) {
|
|
5845
|
-
statusText = statusText + " ⚠️
|
|
5768
|
+
statusText = statusText + " ⚠️ 完整性";
|
|
5846
5769
|
promptTruncationDetected = false;
|
|
5847
5770
|
lastPromptIntegrityWarningAt = Date.now();
|
|
5848
5771
|
|
|
@@ -5850,12 +5773,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
5850
5773
|
if (!integrityNotificationShown) {
|
|
5851
5774
|
integrityNotificationShown = true;
|
|
5852
5775
|
ctx.ui.notify(
|
|
5853
|
-
`⚠️ ${LOG_PREFIX}
|
|
5854
|
-
|
|
5855
|
-
|
|
5856
|
-
`1.
|
|
5857
|
-
`2.
|
|
5858
|
-
`3.
|
|
5776
|
+
`⚠️ ${LOG_PREFIX}:本轮重排导致一个 prompt 结构标记丢失。` +
|
|
5777
|
+
`为保证完整性,已回退到原始 prompt。\n\n` +
|
|
5778
|
+
`恢复步骤:\n` +
|
|
5779
|
+
`1. 运行 /reload 重置(可清除瞬态问题)。\n` +
|
|
5780
|
+
`2. 设置 PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE=1 后 /reload,禁用重排。\n` +
|
|
5781
|
+
`3. 若持续复现,请运行 /cache-optimizer doctor 并提 issue(不要包含 API key / prompt)。`,
|
|
5859
5782
|
"warning",
|
|
5860
5783
|
);
|
|
5861
5784
|
}
|
|
@@ -5874,7 +5797,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5874
5797
|
// OpenAI-compatible proxies) do NOT trigger the marker — the doctor/compat
|
|
5875
5798
|
// commands still mention them as optional guidance.
|
|
5876
5799
|
if (buildFixSuggestion(displayModel) !== undefined) {
|
|
5877
|
-
statusText = statusText + " ⚠️
|
|
5800
|
+
statusText = statusText + " ⚠️ 配置";
|
|
5878
5801
|
}
|
|
5879
5802
|
}
|
|
5880
5803
|
|
|
@@ -5913,17 +5836,18 @@ export default function (pi: ExtensionAPI) {
|
|
|
5913
5836
|
? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
|
|
5914
5837
|
: undefined;
|
|
5915
5838
|
|
|
5916
|
-
// OMP divergence: before_agent_start in OMP can only inject messages (return
|
|
5917
|
-
// { message }), NOT mutate systemPrompt. We cache the prompt options + route
|
|
5918
|
-
// snapshot here so before_provider_request can apply the 3-step pipeline to
|
|
5919
|
-
// the provider payload. If OMP does not supply systemPromptOptions, skill
|
|
5920
|
-
// compression and stable-prefix reorder are skipped (only churn strip runs).
|
|
5921
5839
|
const eventRecord = asRecord(event);
|
|
5922
|
-
|
|
5923
|
-
pendingRouteSnapshot = routeSnapshot;
|
|
5924
|
-
pendingRoutedModel = routedModel ?? _ctx.model;
|
|
5925
|
-
|
|
5840
|
+
const options = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
|
|
5926
5841
|
const model = routedModel ?? _ctx.model;
|
|
5842
|
+
const contextKey = makePromptRewriteContextKey(sessionHashFromContext(_ctx), model);
|
|
5843
|
+
rememberPromptRewriteContext(promptRewriteContexts, contextKey, {
|
|
5844
|
+
options,
|
|
5845
|
+
routeSnapshot,
|
|
5846
|
+
routedModel: model,
|
|
5847
|
+
timestamp: Date.now(),
|
|
5848
|
+
});
|
|
5849
|
+
|
|
5850
|
+
const modelForHint = model;
|
|
5927
5851
|
const promptCacheKey = getSessionPromptCacheKey(_ctx);
|
|
5928
5852
|
const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
|
|
5929
5853
|
const rawSystemPrompt = typeof eventRecord?.systemPrompt === "string" ? eventRecord.systemPrompt : "";
|
|
@@ -5931,9 +5855,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
5931
5855
|
sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
|
|
5932
5856
|
virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
|
|
5933
5857
|
virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
|
|
5934
|
-
upstreamProvider: routeSnapshot?.provider ??
|
|
5935
|
-
upstreamModelId: routeSnapshot?.modelId ??
|
|
5936
|
-
api:
|
|
5858
|
+
upstreamProvider: routeSnapshot?.provider ?? modelForHint?.provider,
|
|
5859
|
+
upstreamModelId: routeSnapshot?.modelId ?? modelForHint?.id,
|
|
5860
|
+
api: modelForHint?.api,
|
|
5937
5861
|
systemPrompt: rawSystemPrompt,
|
|
5938
5862
|
promptCacheKey,
|
|
5939
5863
|
cacheRetention,
|
|
@@ -5961,21 +5885,24 @@ export default function (pi: ExtensionAPI) {
|
|
|
5961
5885
|
requestModel &&
|
|
5962
5886
|
!isResponsesPromptRewriteBypassApi(requestModel.api)
|
|
5963
5887
|
) {
|
|
5888
|
+
const contextKey = makePromptRewriteContextKey(sessionHashFromContext(ctx), requestModel);
|
|
5889
|
+
const rewriteContext = getPromptRewriteContext(promptRewriteContexts, contextKey);
|
|
5890
|
+
const promptOptions = rewriteContext?.options;
|
|
5964
5891
|
const original = extractSystemPrompt(resultPayload);
|
|
5965
5892
|
if (original && original.trim().length > 0) {
|
|
5966
5893
|
// Step 1: strip per-turn churn from <session-overview>.
|
|
5967
5894
|
const stripped = stripSessionOverviewChurn(original);
|
|
5968
5895
|
|
|
5969
5896
|
// Step 2: compress skills XML → one-line index (requires cached options).
|
|
5970
|
-
const compressed =
|
|
5971
|
-
? compressSkillsInSystemPrompt(stripped,
|
|
5897
|
+
const compressed = promptOptions
|
|
5898
|
+
? compressSkillsInSystemPrompt(stripped, promptOptions)
|
|
5972
5899
|
: stripped;
|
|
5973
5900
|
|
|
5974
5901
|
// Step 3: lift stable content above dynamic content (requires cached options).
|
|
5975
5902
|
let finalPrompt = compressed;
|
|
5976
5903
|
let changed = false;
|
|
5977
|
-
if (
|
|
5978
|
-
const optimized = optimizeSystemPrompt(compressed,
|
|
5904
|
+
if (promptOptions) {
|
|
5905
|
+
const optimized = optimizeSystemPrompt(compressed, promptOptions);
|
|
5979
5906
|
if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
|
|
5980
5907
|
finalPrompt = optimized.systemPrompt;
|
|
5981
5908
|
changed = true;
|
|
@@ -6020,9 +5947,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
6020
5947
|
if (warnedPromptCacheRetention400Models.has(key)) return;
|
|
6021
5948
|
warnedPromptCacheRetention400Models.add(key);
|
|
6022
5949
|
ctx.ui.notify(
|
|
6023
|
-
`⚠️ ${LOG_PREFIX}
|
|
5950
|
+
`⚠️ ${LOG_PREFIX}:${key} 在启用 supportsLongPromptCacheRetention 时返回了 HTTP 400。` +
|
|
6024
5951
|
getPromptCacheRetentionUnsupportedHint() +
|
|
6025
|
-
`
|
|
5952
|
+
` 可运行 /cache-optimizer doctor 查看精确编辑位置。`,
|
|
6026
5953
|
"warning",
|
|
6027
5954
|
);
|
|
6028
5955
|
});
|
|
@@ -6111,16 +6038,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
6111
6038
|
resetCurrentSessionStats();
|
|
6112
6039
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6113
6040
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6114
|
-
cmdCtx.ui.notify(`✅
|
|
6041
|
+
cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6115
6042
|
} else if (subcommand === "disable") {
|
|
6116
6043
|
setRuntimeOptimizerEnabled(false);
|
|
6117
6044
|
resetCurrentSessionStats();
|
|
6118
6045
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6119
6046
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6120
|
-
cmdCtx.ui.notify(`⏸️
|
|
6047
|
+
cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6121
6048
|
} else if (subcommand === "doctor") {
|
|
6122
6049
|
if (!model) {
|
|
6123
|
-
cmdCtx.ui.notify("
|
|
6050
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6124
6051
|
return;
|
|
6125
6052
|
}
|
|
6126
6053
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
@@ -6135,7 +6062,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6135
6062
|
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
6136
6063
|
} else if (subcommand === "stats") {
|
|
6137
6064
|
if (!model) {
|
|
6138
|
-
cmdCtx.ui.notify("
|
|
6065
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6139
6066
|
return;
|
|
6140
6067
|
}
|
|
6141
6068
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6146,7 +6073,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6146
6073
|
cmdCtx.ui.notify(output, "info");
|
|
6147
6074
|
} else if (subcommand === "compat") {
|
|
6148
6075
|
if (!model) {
|
|
6149
|
-
cmdCtx.ui.notify("
|
|
6076
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6150
6077
|
return;
|
|
6151
6078
|
}
|
|
6152
6079
|
const compatResult = buildCompatDiagnosis(model);
|
|
@@ -6155,19 +6082,19 @@ export default function (pi: ExtensionAPI) {
|
|
|
6155
6082
|
} else {
|
|
6156
6083
|
cmdCtx.ui.notify(
|
|
6157
6084
|
isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
|
|
6158
|
-
? "✅
|
|
6085
|
+
? "✅ compat 配置完整。"
|
|
6159
6086
|
: getCompatCheckNotApplicableLines(model).join("\n"),
|
|
6160
6087
|
"info",
|
|
6161
6088
|
);
|
|
6162
6089
|
}
|
|
6163
6090
|
} else if (subcommand === "reset") {
|
|
6164
6091
|
if (!model) {
|
|
6165
|
-
cmdCtx.ui.notify("
|
|
6092
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6166
6093
|
return;
|
|
6167
6094
|
}
|
|
6168
6095
|
const adapter = selectAdapterForModel(model);
|
|
6169
6096
|
if (!adapter) {
|
|
6170
|
-
cmdCtx.ui.notify("ℹ️
|
|
6097
|
+
cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
|
|
6171
6098
|
return;
|
|
6172
6099
|
}
|
|
6173
6100
|
|
|
@@ -6185,21 +6112,21 @@ export default function (pi: ExtensionAPI) {
|
|
|
6185
6112
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6186
6113
|
|
|
6187
6114
|
cmdCtx.ui.notify(
|
|
6188
|
-
`✅
|
|
6189
|
-
"
|
|
6190
|
-
"
|
|
6115
|
+
`✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
|
|
6116
|
+
"上游 provider 的 prompt cache 未被修改。" +
|
|
6117
|
+
"后续请求会为当前 OMP session 开始新的统计桶。",
|
|
6191
6118
|
"info",
|
|
6192
6119
|
);
|
|
6193
6120
|
} else if (subcommand === "fix") {
|
|
6194
6121
|
if (!model) {
|
|
6195
|
-
cmdCtx.ui.notify("
|
|
6122
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6196
6123
|
return;
|
|
6197
6124
|
}
|
|
6198
6125
|
|
|
6199
6126
|
const suggestion = buildFixSuggestion(model);
|
|
6200
6127
|
if (!suggestion) {
|
|
6201
6128
|
const key = modelKey(model);
|
|
6202
|
-
cmdCtx.ui.notify(`✅
|
|
6129
|
+
cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
|
|
6203
6130
|
return;
|
|
6204
6131
|
}
|
|
6205
6132
|
|
|
@@ -6210,14 +6137,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
6210
6137
|
const compatResult = buildCompatDiagnosis(model);
|
|
6211
6138
|
const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
|
|
6212
6139
|
cmdCtx.ui.notify(
|
|
6213
|
-
`📝
|
|
6214
|
-
|
|
6215
|
-
|
|
6216
|
-
|
|
6140
|
+
`📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
|
|
6141
|
+
`提供方:${suggestion.providerLabel}\n` +
|
|
6142
|
+
`模型:${suggestion.modelId}\n\n` +
|
|
6143
|
+
`在模型级 compat(模型条目下)添加这些键:\n\n` +
|
|
6217
6144
|
`compat:\n${yamlSnippet}\n\n` +
|
|
6218
|
-
|
|
6145
|
+
`或放到 provider 级(providers["${suggestion.providerLabel}"] 下):\n\n` +
|
|
6219
6146
|
`compat:\n${yamlSnippet}\n\n` +
|
|
6220
|
-
|
|
6147
|
+
`编辑后运行 /reload。\n` +
|
|
6221
6148
|
(compatResult ? `\n${compatResult}` : ""),
|
|
6222
6149
|
"info",
|
|
6223
6150
|
);
|
|
@@ -6225,31 +6152,31 @@ export default function (pi: ExtensionAPI) {
|
|
|
6225
6152
|
// Try interactive selection menu when UI supports it
|
|
6226
6153
|
if (cmdCtx.hasUI) {
|
|
6227
6154
|
const menuOptions = [
|
|
6228
|
-
"
|
|
6229
|
-
"
|
|
6230
|
-
"
|
|
6231
|
-
"
|
|
6232
|
-
"
|
|
6233
|
-
"
|
|
6234
|
-
"
|
|
6235
|
-
"
|
|
6155
|
+
"启用 —— 打开运行时优化",
|
|
6156
|
+
"关闭 —— 关闭运行时优化",
|
|
6157
|
+
"诊断 —— 查看缓存配置",
|
|
6158
|
+
"统计 —— 查看缓存统计与趋势",
|
|
6159
|
+
"兼容 —— 查看 compat 建议",
|
|
6160
|
+
"修复 —— 查看 compat 修复建议(会写 models.yml 时另行提示)",
|
|
6161
|
+
"重置 —— 重置本地 session 统计",
|
|
6162
|
+
"取消",
|
|
6236
6163
|
];
|
|
6237
|
-
const choice = await cmdCtx.ui.select("
|
|
6164
|
+
const choice = await cmdCtx.ui.select("缓存优化器", menuOptions);
|
|
6238
6165
|
if (choice === menuOptions[0]) {
|
|
6239
6166
|
setRuntimeOptimizerEnabled(true);
|
|
6240
6167
|
resetCurrentSessionStats();
|
|
6241
6168
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6242
6169
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6243
|
-
cmdCtx.ui.notify(`✅
|
|
6170
|
+
cmdCtx.ui.notify(`✅ 已为当前 OMP 进程开启缓存优化。已重置当前 session 统计,方便做前后对比。\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6244
6171
|
} else if (choice === menuOptions[1]) {
|
|
6245
6172
|
setRuntimeOptimizerEnabled(false);
|
|
6246
6173
|
resetCurrentSessionStats();
|
|
6247
6174
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6248
6175
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6249
|
-
cmdCtx.ui.notify(`⏸️
|
|
6176
|
+
cmdCtx.ui.notify(`⏸️ 已为当前 OMP 进程关闭缓存优化。已重置当前 session 统计,并会在关闭状态下继续采集用于对比。\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6250
6177
|
} else if (choice === menuOptions[2]) {
|
|
6251
6178
|
if (!model) {
|
|
6252
|
-
cmdCtx.ui.notify("
|
|
6179
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6253
6180
|
} else {
|
|
6254
6181
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
6255
6182
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6264,7 +6191,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6264
6191
|
}
|
|
6265
6192
|
} else if (choice === menuOptions[3]) {
|
|
6266
6193
|
if (!model) {
|
|
6267
|
-
cmdCtx.ui.notify("
|
|
6194
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6268
6195
|
} else {
|
|
6269
6196
|
const adapter = selectAdapterForModel(model);
|
|
6270
6197
|
const sk = model ? sessionModelKey(model) : undefined;
|
|
@@ -6275,7 +6202,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6275
6202
|
}
|
|
6276
6203
|
} else if (choice === menuOptions[4]) {
|
|
6277
6204
|
if (!model) {
|
|
6278
|
-
cmdCtx.ui.notify("
|
|
6205
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6279
6206
|
} else {
|
|
6280
6207
|
const compatResult = buildCompatDiagnosis(model);
|
|
6281
6208
|
if (compatResult) {
|
|
@@ -6283,7 +6210,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6283
6210
|
} else {
|
|
6284
6211
|
cmdCtx.ui.notify(
|
|
6285
6212
|
isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)
|
|
6286
|
-
? "✅
|
|
6213
|
+
? "✅ compat 配置完整。"
|
|
6287
6214
|
: getCompatCheckNotApplicableLines(model).join("\n"),
|
|
6288
6215
|
"info",
|
|
6289
6216
|
);
|
|
@@ -6292,13 +6219,13 @@ export default function (pi: ExtensionAPI) {
|
|
|
6292
6219
|
} else if (choice === menuOptions[5]) {
|
|
6293
6220
|
// Fix — auto-fix compat issues
|
|
6294
6221
|
if (!model) {
|
|
6295
|
-
cmdCtx.ui.notify("
|
|
6222
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6296
6223
|
return;
|
|
6297
6224
|
}
|
|
6298
6225
|
const suggestion = buildFixSuggestion(model);
|
|
6299
6226
|
if (!suggestion) {
|
|
6300
6227
|
const key = modelKey(model);
|
|
6301
|
-
cmdCtx.ui.notify(`✅
|
|
6228
|
+
cmdCtx.ui.notify(`✅ "${key}" 当前无需修复,compat 已配置完成。`, "info");
|
|
6302
6229
|
return;
|
|
6303
6230
|
}
|
|
6304
6231
|
|
|
@@ -6306,30 +6233,30 @@ export default function (pi: ExtensionAPI) {
|
|
|
6306
6233
|
const compatResult = buildCompatDiagnosis(model);
|
|
6307
6234
|
const yamlSnippet = formatCompatKeysForInsertion(suggestion.compatKeys);
|
|
6308
6235
|
cmdCtx.ui.notify(
|
|
6309
|
-
`📝
|
|
6310
|
-
|
|
6311
|
-
|
|
6312
|
-
|
|
6236
|
+
`📝 ${getModelsJsonDisplayPath()} 的手动修复建议:\n\n` +
|
|
6237
|
+
`提供方:${suggestion.providerLabel}\n` +
|
|
6238
|
+
`模型:${suggestion.modelId}\n\n` +
|
|
6239
|
+
`添加这些 compat 键:\n\n` +
|
|
6313
6240
|
`compat:\n${yamlSnippet}\n\n` +
|
|
6314
|
-
|
|
6241
|
+
`编辑后运行 /reload。\n` +
|
|
6315
6242
|
(compatResult ? `\n${compatResult}` : ""),
|
|
6316
6243
|
"info",
|
|
6317
6244
|
);
|
|
6318
6245
|
} else if (choice === menuOptions[6]) {
|
|
6319
6246
|
if (!model) {
|
|
6320
|
-
cmdCtx.ui.notify("
|
|
6247
|
+
cmdCtx.ui.notify("当前没有活动模型。请先用 /model 或 omp --model 选择模型。", "warning");
|
|
6321
6248
|
} else {
|
|
6322
6249
|
const adapter = selectAdapterForModel(model);
|
|
6323
6250
|
if (!adapter) {
|
|
6324
|
-
cmdCtx.ui.notify("ℹ️
|
|
6251
|
+
cmdCtx.ui.notify("ℹ️ 当前活动模型未匹配到缓存适配器,无需重置统计。", "info");
|
|
6325
6252
|
} else {
|
|
6326
6253
|
const displayKey = modelKey(model);
|
|
6327
6254
|
resetStatsForModel(model);
|
|
6328
6255
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6329
6256
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6330
6257
|
cmdCtx.ui.notify(
|
|
6331
|
-
`✅
|
|
6332
|
-
"
|
|
6258
|
+
`✅ 已重置 "${displayKey}" 的本地 session 缓存统计。` +
|
|
6259
|
+
"上游 provider 的 prompt cache 未被修改。",
|
|
6333
6260
|
"info",
|
|
6334
6261
|
);
|
|
6335
6262
|
}
|
|
@@ -6341,14 +6268,14 @@ export default function (pi: ExtensionAPI) {
|
|
|
6341
6268
|
|
|
6342
6269
|
// Fallback: text help when no interactive UI
|
|
6343
6270
|
const diagnosis: string[] = [];
|
|
6344
|
-
diagnosis.push("📋 /cache-optimizer
|
|
6345
|
-
diagnosis.push(" enable
|
|
6346
|
-
diagnosis.push(" disable
|
|
6347
|
-
diagnosis.push(" doctor
|
|
6348
|
-
diagnosis.push(" stats
|
|
6349
|
-
diagnosis.push(" compat
|
|
6350
|
-
diagnosis.push(" fix
|
|
6351
|
-
diagnosis.push(" reset
|
|
6271
|
+
diagnosis.push("📋 /cache-optimizer 命令:");
|
|
6272
|
+
diagnosis.push(" enable —— 为当前 OMP 进程开启 prompt/cache 优化");
|
|
6273
|
+
diagnosis.push(" disable —— 为当前 OMP 进程关闭 prompt/cache 优化");
|
|
6274
|
+
diagnosis.push(" doctor —— 查看当前模型/provider/api/baseUrl/compat 与低命中诊断");
|
|
6275
|
+
diagnosis.push(" stats —— 查看当前活动模型的统计桶与近期趋势");
|
|
6276
|
+
diagnosis.push(" compat —— 查看 compat 建议与编辑位置");
|
|
6277
|
+
diagnosis.push(" fix —— 查看 compat 修复建议(需要 UI 时另有提示)");
|
|
6278
|
+
diagnosis.push(" reset —— 重置当前模型的本地 session 统计(不影响上游)");
|
|
6352
6279
|
diagnosis.push("");
|
|
6353
6280
|
diagnosis.push(formatOptimizerRuntimeMode());
|
|
6354
6281
|
diagnosis.push("");
|
|
@@ -6356,17 +6283,17 @@ export default function (pi: ExtensionAPI) {
|
|
|
6356
6283
|
const displayKey = modelKey(model);
|
|
6357
6284
|
const missing = describeMissingCacheCompatForModel(model);
|
|
6358
6285
|
if (missing.length > 0) {
|
|
6359
|
-
diagnosis.push(`⚠️
|
|
6360
|
-
diagnosis.push('
|
|
6286
|
+
diagnosis.push(`⚠️ 当前模型 "${displayKey}" 缺少 compat:${missing.join(", ")}`);
|
|
6287
|
+
diagnosis.push('可运行 "/cache-optimizer compat" 查看编辑建议。');
|
|
6361
6288
|
} else if (isAdaptiveThinkingCompatApplicable(model) || isDeepSeekCompatCheckApplicable(model) || isCompatCheckApplicable(model)) {
|
|
6362
|
-
diagnosis.push(`✅
|
|
6289
|
+
diagnosis.push(`✅ 当前模型 "${displayKey}":compat 配置完整。`);
|
|
6363
6290
|
} else {
|
|
6364
|
-
diagnosis.push(`ℹ️
|
|
6291
|
+
diagnosis.push(`ℹ️ 当前模型 "${displayKey}":不适用 compat 检查。`);
|
|
6365
6292
|
const detailLines = getCompatCheckNotApplicableLines(model).slice(1);
|
|
6366
6293
|
for (const line of detailLines) diagnosis.push(line);
|
|
6367
6294
|
}
|
|
6368
6295
|
} else {
|
|
6369
|
-
diagnosis.push("
|
|
6296
|
+
diagnosis.push("当前没有活动模型。");
|
|
6370
6297
|
}
|
|
6371
6298
|
cmdCtx.ui.notify(diagnosis.join("\n"), "info");
|
|
6372
6299
|
}
|