omp-cache-optimizer 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/index.ts +62 -61
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
用于提升 OMP 中 provider 侧 KV Cache / Prompt Cache 命中率的扩展:把稳定 prompt 内容前置,给 OpenAI-compatible 请求补保守的 `prompt_cache_key`,提示代理渠道常见缓存路由兼容问题,并在底部显示只读缓存统计。
|
|
10
10
|
|
|
11
|
-
> 本包从 `pi-cache-optimizer` fork
|
|
11
|
+
> 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从旧状态目录 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`;`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑(自动写入的外科 YAML 编辑器计划在后续版本实现)。
|
|
12
12
|
|
|
13
13
|
## 与原项目的关键差异
|
|
14
14
|
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
omp install npm:omp-cache-optimizer
|
|
64
64
|
```
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
如果之前安装过原版本:
|
|
67
67
|
|
|
68
68
|
```bash
|
|
69
69
|
omp remove npm:pi-cache-optimizer && omp install npm:omp-cache-optimizer
|
|
@@ -102,7 +102,7 @@ OMP 0.79.7 及之后,`omp update` 默认只更新 OMP 本体。若要更新已
|
|
|
102
102
|
|
|
103
103
|
LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completions` 代理,常会把同一个 session 分散到多个上游后端,导致 provider 侧 prompt cache 被拆散。
|
|
104
104
|
|
|
105
|
-
**OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat
|
|
105
|
+
**OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat 字段(原项目中的旧字段),而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
|
|
106
106
|
|
|
107
107
|
`models.yml` 示例:
|
|
108
108
|
|
|
@@ -129,13 +129,13 @@ providers:
|
|
|
129
129
|
|
|
130
130
|
## Anthropic adaptive thinking 模型
|
|
131
131
|
|
|
132
|
-
**OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking`
|
|
132
|
+
**OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking` 字段,语义与原项目中的 `forceAdaptiveThinking` 相反),且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示,不再提供自动修复。
|
|
133
133
|
|
|
134
134
|
`/cache-optimizer doctor` 和 `/cache-optimizer compat` 会检测 adaptive thinking 模型并显示信息性说明。自定义渠道 fronting Anthropic 时,请确保模型 id 匹配官方发布版本,以便 OMP catalog 正确识别。
|
|
135
135
|
|
|
136
136
|
## 使用 `/cache-optimizer fix` 手动修复
|
|
137
137
|
|
|
138
|
-
**OMP 差异**:当前 `/cache-optimizer fix`
|
|
138
|
+
**OMP 差异**:当前 `/cache-optimizer fix` 降级为手动建议模式。原项目中的自动写入安全协议(backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚)将在后续 PR 中为 YAML 重新实现。
|
|
139
139
|
|
|
140
140
|
当前行为:
|
|
141
141
|
|
|
@@ -274,7 +274,7 @@ registry?.registerRouter({
|
|
|
274
274
|
});
|
|
275
275
|
```
|
|
276
276
|
|
|
277
|
-
cache hints 协议(`Symbol.for("omp.cache.hints.v1")
|
|
277
|
+
cache hints 协议(`Symbol.for("omp.cache.hints.v1")`)形状与原项目一致,用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
|
|
278
278
|
|
|
279
279
|
## 卸载
|
|
280
280
|
|
package/index.ts
CHANGED
|
@@ -64,7 +64,7 @@ const LOG_PREFIX = "omp-cache-optimizer";
|
|
|
64
64
|
const STATUS_KEY = "omp-cache-stats";
|
|
65
65
|
const STATE_DIR = join(homedir(), ".omp", "agent");
|
|
66
66
|
const STATE_FILE_PATH = join(STATE_DIR, "omp-cache-optimizer-stats.json");
|
|
67
|
-
// Legacy
|
|
67
|
+
// Legacy source-project state file path: read for one-way migration only, never written.
|
|
68
68
|
const LEGACY_PI_STATE_FILE_PATH = join(homedir(), ".pi", "agent", "pi-cache-optimizer-stats.json");
|
|
69
69
|
const LEGACY_STATE_FILE_PATH = join(STATE_DIR, "deepseek-cache-optimizer-stats.json");
|
|
70
70
|
const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "gemini"];
|
|
@@ -77,7 +77,7 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
|
|
|
77
77
|
const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
|
|
78
78
|
const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
|
|
79
79
|
// Inter-extension protocol symbols are versioned under the omp.* namespace. The v1
|
|
80
|
-
// shape is identical to the legacy
|
|
80
|
+
// shape is identical to the legacy symbols; router/hints integrators on OMP
|
|
81
81
|
// should register under omp.routing.registry.v1 / omp.cache.hints.v1.
|
|
82
82
|
const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("omp.routing.registry.v1");
|
|
83
83
|
const PI_CACHE_HINTS_SYMBOL = Symbol.for("omp.cache.hints.v1");
|
|
@@ -104,7 +104,7 @@ function getLastPromptIntegrityWarningAt(): number {
|
|
|
104
104
|
}
|
|
105
105
|
|
|
106
106
|
// Minimum count of skills before compression is worth applying.
|
|
107
|
-
// Below this,
|
|
107
|
+
// Below this, the runtime's verbose XML block is small enough that the overhead of
|
|
108
108
|
// an additional one-line index isn't worth the loss of per-skill
|
|
109
109
|
// description hints. The 31-skill snapshot in this repo was 13.3 KB; one
|
|
110
110
|
// or two skills is well under 1 KB and not worth touching.
|
|
@@ -122,7 +122,7 @@ const SKILL_COMPRESSION_MIN_COUNT = 4;
|
|
|
122
122
|
// The threshold also caps the upstream string-vs-array regression we saw with
|
|
123
123
|
// trellis 0.5.16 / 0.6.0-beta.17 (subagent tool registration passing
|
|
124
124
|
// `promptGuidelines: "<long string>"` instead of `["<long string>"]`, which
|
|
125
|
-
//
|
|
125
|
+
// the runtime then iterates char-by-char). Even if a similar bug recurs upstream, this
|
|
126
126
|
// extension will not lift its single-character byproducts into the stable
|
|
127
127
|
// prefix candidate list.
|
|
128
128
|
//
|
|
@@ -268,7 +268,7 @@ type PersistedCacheStatsV3 = {
|
|
|
268
268
|
|
|
269
269
|
/**
|
|
270
270
|
* V4 format: session-scoped stats buckets.
|
|
271
|
-
* Each
|
|
271
|
+
* Each session in the host runtime gets its own stats isolated by a hashed session id.
|
|
272
272
|
*
|
|
273
273
|
* sessions: sessionHash → modelKey (provider/id) → CacheStats
|
|
274
274
|
* legacyFamily: unchanged from v3 (migration/fallback when ctx.model is unknown)
|
|
@@ -375,7 +375,7 @@ function formatSkillsForPrompt(skills: NonNullable<BuildSystemPromptOptions["ski
|
|
|
375
375
|
/**
|
|
376
376
|
* Compressed alternative to `formatSkillsForPrompt`.
|
|
377
377
|
*
|
|
378
|
-
*
|
|
378
|
+
* The host runtime emits a four-line XML block per skill (`<name>`, `<description>`,
|
|
379
379
|
* `<location>`) plus a three-sentence preamble. With 31 skills active in
|
|
380
380
|
* this repo that block measured 13.3 KB — 61.5 % of the total system
|
|
381
381
|
* prompt. The full description text matters when the model has to decide
|
|
@@ -454,7 +454,7 @@ function formatSkillsForPromptCompressed(
|
|
|
454
454
|
}
|
|
455
455
|
|
|
456
456
|
/**
|
|
457
|
-
* Replace
|
|
457
|
+
* Replace the runtime's verbose `<available_skills>` block in `prompt` with the
|
|
458
458
|
* compressed one-index form. Idempotent: if the verbose form is not
|
|
459
459
|
* present (compression already applied, or skill count below threshold),
|
|
460
460
|
* the prompt is returned unchanged.
|
|
@@ -465,7 +465,7 @@ function formatSkillsForPromptCompressed(
|
|
|
465
465
|
* - opts.skills present and visible-skill count >= SKILL_COMPRESSION_MIN_COUNT
|
|
466
466
|
* - Verbose block (built from the same `opts.skills`) is found in
|
|
467
467
|
* `prompt` (substring match, no regex). This anchors the substitution
|
|
468
|
-
* to
|
|
468
|
+
* to the runtime's own emitter; if the format changes, we no-op rather
|
|
469
469
|
* than mangle.
|
|
470
470
|
*/
|
|
471
471
|
function compressSkillsInSystemPrompt(
|
|
@@ -589,14 +589,14 @@ function stripSessionOverviewChurn(prompt: string): string {
|
|
|
589
589
|
* prompt rather than ship a corrupted one.
|
|
590
590
|
*
|
|
591
591
|
* Three marker categories are recognized (covers ~99% of real-world
|
|
592
|
-
* extension injection patterns in the
|
|
592
|
+
* extension injection patterns in the host runtime ecosystem):
|
|
593
593
|
*
|
|
594
594
|
* 1. XML-style opening tags `<tagname>` (lowercase, alpha-num + `_`/`-`)
|
|
595
595
|
* 2. XML-style closing tags `</tagname>`
|
|
596
596
|
* 3. HTML comment START/END `<!-- NAME:START -->` / `<!-- NAME:END -->`
|
|
597
597
|
*
|
|
598
598
|
* Tags with attributes (e.g., `<task id="42">`) are not currently emitted
|
|
599
|
-
* by any
|
|
599
|
+
* by any runtime extension we know of and are skipped to keep the regex tight.
|
|
600
600
|
* Markdown headers, horizontal rules, and timestamp patterns are not
|
|
601
601
|
* usable as guards because they have no closing form to verify.
|
|
602
602
|
*
|
|
@@ -676,7 +676,7 @@ function optimizeSystemPrompt(
|
|
|
676
676
|
// protected without code changes when new extensions ship.
|
|
677
677
|
//
|
|
678
678
|
// Our skills compression runs BEFORE optimizeSystemPrompt and replaces
|
|
679
|
-
//
|
|
679
|
+
// the runtime's verbose `<available_skills>` block with a compressed text
|
|
680
680
|
// section that has no XML tag. So `original` here (post-compression)
|
|
681
681
|
// does not contain `<available_skills>` and the result doesn't either
|
|
682
682
|
// — no false positive.
|
|
@@ -969,12 +969,13 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
|
|
|
969
969
|
function getCompat(model: PiModel | undefined): CacheCompat {
|
|
970
970
|
if (!model) return {} as CacheCompat;
|
|
971
971
|
|
|
972
|
-
//
|
|
973
|
-
// We approximate this by reading from ctx.model which should already have merged compat
|
|
974
|
-
// However, for safety, we check both levels if available
|
|
972
|
+
// The host runtime merges provider.compat with model.compat (model wins on conflicts).
|
|
973
|
+
// We approximate this by reading from ctx.model which should already have merged compat.
|
|
974
|
+
// However, for safety, we check both levels if available.
|
|
975
975
|
const modelCompat = (model.compat ?? {}) as CacheCompat;
|
|
976
|
-
|
|
977
|
-
// Note: ctx.model from
|
|
976
|
+
|
|
977
|
+
// Note: ctx.model from the host runtime should already contain merged compat,
|
|
978
|
+
// but we document the two-level structure for clarity.
|
|
978
979
|
// but we document the two-level structure for clarity
|
|
979
980
|
return modelCompat;
|
|
980
981
|
}
|
|
@@ -1141,7 +1142,7 @@ function isGeminiLikeAssistantMessage(message: unknown, model: PiModel | undefin
|
|
|
1141
1142
|
* Check whether the model id uses Anthropic's adaptive generation (thinking)
|
|
1142
1143
|
* that requires `forceAdaptiveThinking: true` in compat.
|
|
1143
1144
|
*
|
|
1144
|
-
* Adaptive-generation models (from
|
|
1145
|
+
* Adaptive-generation models (from the bundled model catalog) include:
|
|
1145
1146
|
* claude-opus-4-6, claude-opus-4-7, claude-opus-4-8 (also dotted 4.6/4.7/4.8)
|
|
1146
1147
|
* claude-sonnet-4-6
|
|
1147
1148
|
* claude-fable-5
|
|
@@ -1162,7 +1163,7 @@ function isAdaptiveGenerationModel(model: PiModel | undefined): boolean {
|
|
|
1162
1163
|
// OMP divergence: adaptive thinking is set automatically by the OMP built-in model
|
|
1163
1164
|
// catalog (via disableAdaptiveThinking, with reversed semantics) and is NOT
|
|
1164
1165
|
// user-configurable from models.yml (see omp models.md §Anthropic compatibility).
|
|
1165
|
-
// The
|
|
1166
|
+
// The legacy `forceAdaptiveThinking` flag no longer exists. We keep model detection
|
|
1166
1167
|
// (isAdaptiveGenerationModel) for informational doctor output, but drop the fixable
|
|
1167
1168
|
// compat-suggestion path entirely.
|
|
1168
1169
|
function isAdaptiveThinkingCompatApplicable(_model: PiModel): boolean {
|
|
@@ -1734,14 +1735,14 @@ function readCacheWriteFromDetails(details: UnknownRecord | undefined): number |
|
|
|
1734
1735
|
return getFirstNonNegativeNumber(details?.cache_write_tokens, details?.cacheWriteTokens);
|
|
1735
1736
|
}
|
|
1736
1737
|
|
|
1737
|
-
//
|
|
1738
|
+
// The host runtime normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
|
|
1738
1739
|
// cache_read_input_tokens, etc.) into a common shape:
|
|
1739
1740
|
// input = uncached prompt portion (total prompt minus cacheRead minus cacheWrite)
|
|
1740
1741
|
// cacheRead = tokens read from a previously-cached prefix
|
|
1741
1742
|
// cacheWrite= tokens newly written into cache in this request
|
|
1742
1743
|
//
|
|
1743
1744
|
// We reconstruct the total prompt-token count as input + cacheRead + cacheWrite.
|
|
1744
|
-
//
|
|
1745
|
+
// The host runtime guarantees that input, cacheRead, and cacheWrite are always present on
|
|
1745
1746
|
// assistant messages processed through its provider pipeline (at least as zero).
|
|
1746
1747
|
//
|
|
1747
1748
|
// Only DeepSeek sets allowInputOnly=true so that a cache miss (cacheRead=0) still
|
|
@@ -1757,10 +1758,10 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
|
|
|
1757
1758
|
|
|
1758
1759
|
if (!hasCacheSignal && (input === undefined || !allowInputOnly)) return undefined;
|
|
1759
1760
|
|
|
1760
|
-
// Under healthy
|
|
1761
|
+
// Under healthy runtime normalization input is the uncached portion, so
|
|
1761
1762
|
// totalInput = input + cacheRead + cacheWrite gives the full prompt token count.
|
|
1762
1763
|
// Guard against degenerate reads where a broken proxy omits prompt_tokens and
|
|
1763
|
-
//
|
|
1764
|
+
// normalized input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
|
|
1764
1765
|
const computed = (input ?? 0) + (cacheRead ?? 0) + (cacheWrite ?? 0);
|
|
1765
1766
|
const floor = (cacheRead ?? 0) + (cacheWrite ?? 0);
|
|
1766
1767
|
return {
|
|
@@ -1771,8 +1772,8 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
|
|
|
1771
1772
|
}
|
|
1772
1773
|
|
|
1773
1774
|
// Raw fallback for DeepSeek responses that still carry their native usage fields.
|
|
1774
|
-
// In practice
|
|
1775
|
-
// reached when
|
|
1775
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1776
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1776
1777
|
function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1777
1778
|
const usage = usageRecordFromAssistant(message);
|
|
1778
1779
|
if (!usage) return undefined;
|
|
@@ -1789,8 +1790,8 @@ function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1789
1790
|
}
|
|
1790
1791
|
|
|
1791
1792
|
// Raw fallback for OpenAI-family responses that still carry their native usage fields.
|
|
1792
|
-
// In practice
|
|
1793
|
-
// reached when
|
|
1793
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1794
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1794
1795
|
function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1795
1796
|
const usage = usageRecordFromAssistant(message);
|
|
1796
1797
|
if (!usage) return undefined;
|
|
@@ -1812,8 +1813,8 @@ function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1812
1813
|
}
|
|
1813
1814
|
|
|
1814
1815
|
// Raw fallback for Anthropic/Claude responses that still carry their native usage fields.
|
|
1815
|
-
// In practice
|
|
1816
|
-
// reached when
|
|
1816
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1817
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1817
1818
|
function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1818
1819
|
const usage = usageRecordFromAssistant(message);
|
|
1819
1820
|
if (!usage) return undefined;
|
|
@@ -1832,8 +1833,8 @@ function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1832
1833
|
}
|
|
1833
1834
|
|
|
1834
1835
|
// Raw fallback for Gemini/Vertex responses that still carry their native usage fields.
|
|
1835
|
-
// In practice
|
|
1836
|
-
// reached when
|
|
1836
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1837
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1837
1838
|
function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1838
1839
|
const record = getAssistantRecord(message);
|
|
1839
1840
|
if (!record) return undefined;
|
|
@@ -1867,8 +1868,8 @@ function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1867
1868
|
return { cacheRead, cacheWrite: 0, totalInput };
|
|
1868
1869
|
}
|
|
1869
1870
|
|
|
1870
|
-
// Try
|
|
1871
|
-
// provider pipeline). Fall back to provider-specific raw-field readers when
|
|
1871
|
+
// Try normalized usage first (always present for messages that went through the runtime's
|
|
1872
|
+
// provider pipeline). Fall back to provider-specific raw-field readers when normalized
|
|
1872
1873
|
// fields are absent (e.g. messages from custom/foreign providers whose raw usage shape
|
|
1873
1874
|
// matches the official API).
|
|
1874
1875
|
function normalizeWithFallback(
|
|
@@ -2038,7 +2039,7 @@ function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
|
|
|
2038
2039
|
}
|
|
2039
2040
|
|
|
2040
2041
|
function describeMissingOpenAIFamilyProxyCompat(_model: PiModel): string[] {
|
|
2041
|
-
// OMP divergence:
|
|
2042
|
+
// OMP divergence: the legacy `sendSessionAffinityHeaders` flag has no compat equivalent.
|
|
2042
2043
|
// OMP achieves upstream stickiness via multi-credential auth + session affinity
|
|
2043
2044
|
// in agent.db (see omp models.md §Auth). There is no required compat key for
|
|
2044
2045
|
// OpenAI-family proxies on OMP, so this returns an empty list. Optional long
|
|
@@ -2332,7 +2333,7 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
2332
2333
|
|
|
2333
2334
|
return (
|
|
2334
2335
|
`💡 Cache optimizer: ${modelKey(model)} looks Claude/Anthropic-like but OpenAI-compatible compat lacks cacheControlFormat: "anthropic". ` +
|
|
2335
|
-
"
|
|
2336
|
+
"OMP may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
|
|
2336
2337
|
);
|
|
2337
2338
|
},
|
|
2338
2339
|
},
|
|
@@ -3363,7 +3364,7 @@ function formatTokenM(value: number): string {
|
|
|
3363
3364
|
|
|
3364
3365
|
/**
|
|
3365
3366
|
* Check if an assistant message's usage fields appear to be missing or empty.
|
|
3366
|
-
* Returns true when
|
|
3367
|
+
* Returns true when normalized fields (input, cacheRead, cacheWrite) are all
|
|
3367
3368
|
* absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
|
|
3368
3369
|
* for the given adapter.
|
|
3369
3370
|
*/
|
|
@@ -3371,12 +3372,12 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
|
|
|
3371
3372
|
const usage = usageRecordFromAssistant(message);
|
|
3372
3373
|
if (!usage) return true;
|
|
3373
3374
|
|
|
3374
|
-
// Check
|
|
3375
|
+
// Check normalized fields
|
|
3375
3376
|
const input = getNonNegativeNumber(usage, "input");
|
|
3376
3377
|
const cacheRead = getNonNegativeNumber(usage, "cacheRead");
|
|
3377
3378
|
const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
|
|
3378
3379
|
|
|
3379
|
-
// If
|
|
3380
|
+
// If normalized fields exist with non-zero values, usage is present
|
|
3380
3381
|
if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
|
|
3381
3382
|
return false;
|
|
3382
3383
|
}
|
|
@@ -4052,7 +4053,7 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
|
|
|
4052
4053
|
if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
|
|
4053
4054
|
return [
|
|
4054
4055
|
"ℹ️ Compat check not applicable for this model.",
|
|
4055
|
-
" Native Responses transports already use
|
|
4056
|
+
" Native Responses transports already use core runtime request handling; OpenAI-compatible proxy compat flags do not apply.",
|
|
4056
4057
|
];
|
|
4057
4058
|
}
|
|
4058
4059
|
|
|
@@ -4934,9 +4935,9 @@ function chooseFixPlacement(
|
|
|
4934
4935
|
Object.keys(compatKeys),
|
|
4935
4936
|
);
|
|
4936
4937
|
|
|
4937
|
-
// Provider-level writes cannot override a model-level compat key because
|
|
4938
|
+
// Provider-level writes cannot override a model-level compat key because the runtime's
|
|
4938
4939
|
// merge order is provider.compat then model.compat. If the active model already
|
|
4939
|
-
// has one of the keys we need to repair (e.g. thinkingFormat: "legacy"), write
|
|
4940
|
+
// has one of the keys we need to repair (e.g. thinkingFormat: \"legacy\"), write
|
|
4940
4941
|
// at model level even when the key would otherwise be provider-safe.
|
|
4941
4942
|
if (decision.placement === "provider" && existingModelKeys.length > 0) {
|
|
4942
4943
|
return {
|
|
@@ -5103,7 +5104,7 @@ function selfCheckFix(
|
|
|
5103
5104
|
}
|
|
5104
5105
|
|
|
5105
5106
|
// Step 5: Compute the EFFECTIVE merged compat (provider-level + model-level),
|
|
5106
|
-
// mirroring
|
|
5107
|
+
// mirroring the runtime's mergeCompat behavior (model wins on conflicts). The fix may
|
|
5107
5108
|
// have written either level, so validation must check the merged result.
|
|
5108
5109
|
const provCompatRaw = (provider as Record<string, unknown>).compat;
|
|
5109
5110
|
const provCompat = (provCompatRaw && typeof provCompatRaw === 'object' && !Array.isArray(provCompatRaw))
|
|
@@ -5226,7 +5227,7 @@ function backupTimestamp(): string {
|
|
|
5226
5227
|
|
|
5227
5228
|
// Internal helpers exported only so the task verification script
|
|
5228
5229
|
// (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
|
|
5229
|
-
// extension's public API;
|
|
5230
|
+
// extension's public API; the host runtime only invokes the default export below.
|
|
5230
5231
|
export const __internals_for_tests = {
|
|
5231
5232
|
buildStableCandidates,
|
|
5232
5233
|
optimizeSystemPrompt,
|
|
@@ -5709,7 +5710,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5709
5710
|
|
|
5710
5711
|
if (reason === "reload") {
|
|
5711
5712
|
// /reload: preserve session-scoped stats (same session hash).
|
|
5712
|
-
//
|
|
5713
|
+
// OMP extension reload creates a fresh closure, so cacheStatsByModel
|
|
5713
5714
|
// starts empty. Read persisted data and filter for current session.
|
|
5714
5715
|
lastStatusText = undefined;
|
|
5715
5716
|
lastPromptIntegrityWarningAt = 0;
|
|
@@ -6098,7 +6099,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6098
6099
|
// (no args) — interactive menu (with UI) or help summary
|
|
6099
6100
|
// ────────────────────────────────────────────────────────────────
|
|
6100
6101
|
pi.registerCommand("cache-optimizer", {
|
|
6101
|
-
description: "Diagnose
|
|
6102
|
+
description: "Diagnose OMP cache configuration",
|
|
6102
6103
|
handler: async (args: string, cmdCtx) => {
|
|
6103
6104
|
syncSessionHash(cmdCtx);
|
|
6104
6105
|
const selectedModel = cmdCtx.model;
|
|
@@ -6110,16 +6111,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
6110
6111
|
resetCurrentSessionStats();
|
|
6111
6112
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6112
6113
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6113
|
-
cmdCtx.ui.notify(`✅
|
|
6114
|
+
cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6114
6115
|
} else if (subcommand === "disable") {
|
|
6115
6116
|
setRuntimeOptimizerEnabled(false);
|
|
6116
6117
|
resetCurrentSessionStats();
|
|
6117
6118
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6118
6119
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6119
|
-
cmdCtx.ui.notify(`⏸️
|
|
6120
|
+
cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6120
6121
|
} else if (subcommand === "doctor") {
|
|
6121
6122
|
if (!model) {
|
|
6122
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6123
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6123
6124
|
return;
|
|
6124
6125
|
}
|
|
6125
6126
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
@@ -6134,7 +6135,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6134
6135
|
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
6135
6136
|
} else if (subcommand === "stats") {
|
|
6136
6137
|
if (!model) {
|
|
6137
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6138
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6138
6139
|
return;
|
|
6139
6140
|
}
|
|
6140
6141
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6145,7 +6146,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6145
6146
|
cmdCtx.ui.notify(output, "info");
|
|
6146
6147
|
} else if (subcommand === "compat") {
|
|
6147
6148
|
if (!model) {
|
|
6148
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6149
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6149
6150
|
return;
|
|
6150
6151
|
}
|
|
6151
6152
|
const compatResult = buildCompatDiagnosis(model);
|
|
@@ -6161,7 +6162,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6161
6162
|
}
|
|
6162
6163
|
} else if (subcommand === "reset") {
|
|
6163
6164
|
if (!model) {
|
|
6164
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6165
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6165
6166
|
return;
|
|
6166
6167
|
}
|
|
6167
6168
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6186,12 +6187,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
6186
6187
|
cmdCtx.ui.notify(
|
|
6187
6188
|
`✅ Reset local session cache stats for "${displayKey}". ` +
|
|
6188
6189
|
"Upstream provider prompt cache was not modified. " +
|
|
6189
|
-
"New requests will start a fresh stats bucket for this
|
|
6190
|
+
"New requests will start a fresh stats bucket for this OMP session.",
|
|
6190
6191
|
"info",
|
|
6191
6192
|
);
|
|
6192
6193
|
} else if (subcommand === "fix") {
|
|
6193
6194
|
if (!model) {
|
|
6194
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6195
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6195
6196
|
return;
|
|
6196
6197
|
}
|
|
6197
6198
|
|
|
@@ -6239,16 +6240,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
6239
6240
|
resetCurrentSessionStats();
|
|
6240
6241
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6241
6242
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6242
|
-
cmdCtx.ui.notify(`✅
|
|
6243
|
+
cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6243
6244
|
} else if (choice === menuOptions[1]) {
|
|
6244
6245
|
setRuntimeOptimizerEnabled(false);
|
|
6245
6246
|
resetCurrentSessionStats();
|
|
6246
6247
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6247
6248
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6248
|
-
cmdCtx.ui.notify(`⏸️
|
|
6249
|
+
cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6249
6250
|
} else if (choice === menuOptions[2]) {
|
|
6250
6251
|
if (!model) {
|
|
6251
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6252
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6252
6253
|
} else {
|
|
6253
6254
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
6254
6255
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6263,7 +6264,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6263
6264
|
}
|
|
6264
6265
|
} else if (choice === menuOptions[3]) {
|
|
6265
6266
|
if (!model) {
|
|
6266
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6267
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6267
6268
|
} else {
|
|
6268
6269
|
const adapter = selectAdapterForModel(model);
|
|
6269
6270
|
const sk = model ? sessionModelKey(model) : undefined;
|
|
@@ -6274,7 +6275,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6274
6275
|
}
|
|
6275
6276
|
} else if (choice === menuOptions[4]) {
|
|
6276
6277
|
if (!model) {
|
|
6277
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6278
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6278
6279
|
} else {
|
|
6279
6280
|
const compatResult = buildCompatDiagnosis(model);
|
|
6280
6281
|
if (compatResult) {
|
|
@@ -6291,7 +6292,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6291
6292
|
} else if (choice === menuOptions[5]) {
|
|
6292
6293
|
// Fix — auto-fix compat issues
|
|
6293
6294
|
if (!model) {
|
|
6294
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6295
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6295
6296
|
return;
|
|
6296
6297
|
}
|
|
6297
6298
|
const suggestion = buildFixSuggestion(model);
|
|
@@ -6316,7 +6317,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6316
6317
|
);
|
|
6317
6318
|
} else if (choice === menuOptions[6]) {
|
|
6318
6319
|
if (!model) {
|
|
6319
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6320
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6320
6321
|
} else {
|
|
6321
6322
|
const adapter = selectAdapterForModel(model);
|
|
6322
6323
|
if (!adapter) {
|
|
@@ -6341,8 +6342,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
6341
6342
|
// Fallback: text help when no interactive UI
|
|
6342
6343
|
const diagnosis: string[] = [];
|
|
6343
6344
|
diagnosis.push("📋 /cache-optimizer commands:");
|
|
6344
|
-
diagnosis.push(" enable — Enable prompt/cache optimizations for this
|
|
6345
|
-
diagnosis.push(" disable — Disable prompt/cache optimizations for this
|
|
6345
|
+
diagnosis.push(" enable — Enable prompt/cache optimizations for this OMP process");
|
|
6346
|
+
diagnosis.push(" disable — Disable prompt/cache optimizations for this OMP process");
|
|
6346
6347
|
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
|
|
6347
6348
|
diagnosis.push(" stats — Show active model stats bucket and recent trend");
|
|
6348
6349
|
diagnosis.push(" compat — Show compat suggestion with edit location");
|
package/package.json
CHANGED