omp-cache-optimizer 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/index.ts +133 -103
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
|
|
9
9
|
用于提升 OMP 中 provider 侧 KV Cache / Prompt Cache 命中率的扩展:把稳定 prompt 内容前置,给 OpenAI-compatible 请求补保守的 `prompt_cache_key`,提示代理渠道常见缓存路由兼容问题,并在底部显示只读缓存统计。
|
|
10
10
|
|
|
11
|
-
> 本包从 `pi-cache-optimizer` fork
|
|
11
|
+
> 本包从 `pi-cache-optimizer` fork 而来。已有底部统计会自动从旧状态目录 `~/.pi/agent/` 迁移到 `~/.omp/agent/`。正常运行时扩展不会触碰你的 `~/.omp/agent/models.yml`;`/cache-optimizer fix` 当前显示可复制的 YAML compat 片段供手动编辑(自动写入的外科 YAML 编辑器计划在后续版本实现)。
|
|
12
12
|
|
|
13
13
|
## 与原项目的关键差异
|
|
14
14
|
|
|
@@ -63,7 +63,7 @@
|
|
|
63
63
|
omp install npm:omp-cache-optimizer
|
|
64
64
|
```
|
|
65
65
|
|
|
66
|
-
|
|
66
|
+
如果之前安装过原版本:
|
|
67
67
|
|
|
68
68
|
```bash
|
|
69
69
|
omp remove npm:pi-cache-optimizer && omp install npm:omp-cache-optimizer
|
|
@@ -102,7 +102,7 @@ OMP 0.79.7 及之后,`omp update` 默认只更新 OMP 本体。若要更新已
|
|
|
102
102
|
|
|
103
103
|
LiteLLM / OneAPI / NewAPI / 类 OpenRouter 渠道等第三方 `openai-completions` 代理,常会把同一个 session 分散到多个上游后端,导致 provider 侧 prompt cache 被拆散。
|
|
104
104
|
|
|
105
|
-
**OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat
|
|
105
|
+
**OMP 差异**:OMP 不再使用 `sendSessionAffinityHeaders` compat 字段(原项目中的旧字段),而是通过多凭据 auth + `agent.db` 中的会话亲和性实现上游粘性。长缓存保留改用 `supportsLongPromptCacheRetention` 字段。
|
|
106
106
|
|
|
107
107
|
`models.yml` 示例:
|
|
108
108
|
|
|
@@ -129,13 +129,13 @@ providers:
|
|
|
129
129
|
|
|
130
130
|
## Anthropic adaptive thinking 模型
|
|
131
131
|
|
|
132
|
-
**OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking`
|
|
132
|
+
**OMP 差异**:OMP 的内置 model catalog 已为官方 Claude 模型自动设置 adaptive thinking(通过 `disableAdaptiveThinking` 字段,语义与原项目中的 `forceAdaptiveThinking` 相反),且不可从 `models.yml` 用户配置。因此本扩展对 adaptive thinking 的检测改为信息性提示,不再提供自动修复。
|
|
133
133
|
|
|
134
134
|
`/cache-optimizer doctor` 和 `/cache-optimizer compat` 会检测 adaptive thinking 模型并显示信息性说明。自定义渠道 fronting Anthropic 时,请确保模型 id 匹配官方发布版本,以便 OMP catalog 正确识别。
|
|
135
135
|
|
|
136
136
|
## 使用 `/cache-optimizer fix` 手动修复
|
|
137
137
|
|
|
138
|
-
**OMP 差异**:当前 `/cache-optimizer fix`
|
|
138
|
+
**OMP 差异**:当前 `/cache-optimizer fix` 降级为手动建议模式。原项目中的自动写入安全协议(backup → 预览 + 确认 → 原子 temp+rename → 写入后自检 → 失败回滚)将在后续 PR 中为 YAML 重新实现。
|
|
139
139
|
|
|
140
140
|
当前行为:
|
|
141
141
|
|
|
@@ -274,7 +274,7 @@ registry?.registerRouter({
|
|
|
274
274
|
});
|
|
275
275
|
```
|
|
276
276
|
|
|
277
|
-
cache hints 协议(`Symbol.for("omp.cache.hints.v1")
|
|
277
|
+
cache hints 协议(`Symbol.for("omp.cache.hints.v1")`)形状与原项目一致,用于预响应阶段透传优化后的 system prompt / prompt cache key / cache retention hint。
|
|
278
278
|
|
|
279
279
|
## 卸载
|
|
280
280
|
|
package/index.ts
CHANGED
|
@@ -64,7 +64,7 @@ const LOG_PREFIX = "omp-cache-optimizer";
|
|
|
64
64
|
const STATUS_KEY = "omp-cache-stats";
|
|
65
65
|
const STATE_DIR = join(homedir(), ".omp", "agent");
|
|
66
66
|
const STATE_FILE_PATH = join(STATE_DIR, "omp-cache-optimizer-stats.json");
|
|
67
|
-
// Legacy
|
|
67
|
+
// Legacy source-project state file path: read for one-way migration only, never written.
|
|
68
68
|
const LEGACY_PI_STATE_FILE_PATH = join(homedir(), ".pi", "agent", "pi-cache-optimizer-stats.json");
|
|
69
69
|
const LEGACY_STATE_FILE_PATH = join(STATE_DIR, "deepseek-cache-optimizer-stats.json");
|
|
70
70
|
const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "gemini"];
|
|
@@ -77,7 +77,7 @@ const OPENAI_PROMPT_CACHE_KEY_MAX_LENGTH = 64;
|
|
|
77
77
|
const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
|
|
78
78
|
const NO_PROMPT_REWRITE_ENV = "PI_CACHE_OPTIMIZER_NO_PROMPT_REWRITE";
|
|
79
79
|
// Inter-extension protocol symbols are versioned under the omp.* namespace. The v1
|
|
80
|
-
// shape is identical to the legacy
|
|
80
|
+
// shape is identical to the legacy symbols; router/hints integrators on OMP
|
|
81
81
|
// should register under omp.routing.registry.v1 / omp.cache.hints.v1.
|
|
82
82
|
const PI_ROUTING_REGISTRY_SYMBOL = Symbol.for("omp.routing.registry.v1");
|
|
83
83
|
const PI_CACHE_HINTS_SYMBOL = Symbol.for("omp.cache.hints.v1");
|
|
@@ -104,7 +104,7 @@ function getLastPromptIntegrityWarningAt(): number {
|
|
|
104
104
|
}
|
|
105
105
|
|
|
106
106
|
// Minimum count of skills before compression is worth applying.
|
|
107
|
-
// Below this,
|
|
107
|
+
// Below this, the runtime's verbose XML block is small enough that the overhead of
|
|
108
108
|
// an additional one-line index isn't worth the loss of per-skill
|
|
109
109
|
// description hints. The 31-skill snapshot in this repo was 13.3 KB; one
|
|
110
110
|
// or two skills is well under 1 KB and not worth touching.
|
|
@@ -122,7 +122,7 @@ const SKILL_COMPRESSION_MIN_COUNT = 4;
|
|
|
122
122
|
// The threshold also caps the upstream string-vs-array regression we saw with
|
|
123
123
|
// trellis 0.5.16 / 0.6.0-beta.17 (subagent tool registration passing
|
|
124
124
|
// `promptGuidelines: "<long string>"` instead of `["<long string>"]`, which
|
|
125
|
-
//
|
|
125
|
+
// the runtime then iterates char-by-char). Even if a similar bug recurs upstream, this
|
|
126
126
|
// extension will not lift its single-character byproducts into the stable
|
|
127
127
|
// prefix candidate list.
|
|
128
128
|
//
|
|
@@ -268,7 +268,7 @@ type PersistedCacheStatsV3 = {
|
|
|
268
268
|
|
|
269
269
|
/**
|
|
270
270
|
* V4 format: session-scoped stats buckets.
|
|
271
|
-
* Each
|
|
271
|
+
* Each session in the host runtime gets its own stats isolated by a hashed session id.
|
|
272
272
|
*
|
|
273
273
|
* sessions: sessionHash → modelKey (provider/id) → CacheStats
|
|
274
274
|
* legacyFamily: unchanged from v3 (migration/fallback when ctx.model is unknown)
|
|
@@ -312,6 +312,15 @@ type CacheUsageSample = {
|
|
|
312
312
|
missingUsageFields: boolean;
|
|
313
313
|
};
|
|
314
314
|
|
|
315
|
+
type PromptRewriteContext = {
|
|
316
|
+
options?: BuildSystemPromptOptions;
|
|
317
|
+
routeSnapshot?: PiRouteSnapshot;
|
|
318
|
+
routedModel?: PiModel;
|
|
319
|
+
timestamp: number;
|
|
320
|
+
};
|
|
321
|
+
|
|
322
|
+
const PROMPT_REWRITE_CONTEXT_TTL_MS = 10_000;
|
|
323
|
+
|
|
315
324
|
/** Maximum number of recent samples kept per model key (in-memory only, not persisted). */
|
|
316
325
|
const MAX_RECENT_SAMPLES = 50;
|
|
317
326
|
|
|
@@ -375,7 +384,7 @@ function formatSkillsForPrompt(skills: NonNullable<BuildSystemPromptOptions["ski
|
|
|
375
384
|
/**
|
|
376
385
|
* Compressed alternative to `formatSkillsForPrompt`.
|
|
377
386
|
*
|
|
378
|
-
*
|
|
387
|
+
* The host runtime emits a four-line XML block per skill (`<name>`, `<description>`,
|
|
379
388
|
* `<location>`) plus a three-sentence preamble. With 31 skills active in
|
|
380
389
|
* this repo that block measured 13.3 KB — 61.5 % of the total system
|
|
381
390
|
* prompt. The full description text matters when the model has to decide
|
|
@@ -454,7 +463,7 @@ function formatSkillsForPromptCompressed(
|
|
|
454
463
|
}
|
|
455
464
|
|
|
456
465
|
/**
|
|
457
|
-
* Replace
|
|
466
|
+
* Replace the runtime's verbose `<available_skills>` block in `prompt` with the
|
|
458
467
|
* compressed one-index form. Idempotent: if the verbose form is not
|
|
459
468
|
* present (compression already applied, or skill count below threshold),
|
|
460
469
|
* the prompt is returned unchanged.
|
|
@@ -465,7 +474,7 @@ function formatSkillsForPromptCompressed(
|
|
|
465
474
|
* - opts.skills present and visible-skill count >= SKILL_COMPRESSION_MIN_COUNT
|
|
466
475
|
* - Verbose block (built from the same `opts.skills`) is found in
|
|
467
476
|
* `prompt` (substring match, no regex). This anchors the substitution
|
|
468
|
-
* to
|
|
477
|
+
* to the runtime's own emitter; if the format changes, we no-op rather
|
|
469
478
|
* than mangle.
|
|
470
479
|
*/
|
|
471
480
|
function compressSkillsInSystemPrompt(
|
|
@@ -589,14 +598,14 @@ function stripSessionOverviewChurn(prompt: string): string {
|
|
|
589
598
|
* prompt rather than ship a corrupted one.
|
|
590
599
|
*
|
|
591
600
|
* Three marker categories are recognized (covers ~99% of real-world
|
|
592
|
-
* extension injection patterns in the
|
|
601
|
+
* extension injection patterns in the host runtime ecosystem):
|
|
593
602
|
*
|
|
594
603
|
* 1. XML-style opening tags `<tagname>` (lowercase, alpha-num + `_`/`-`)
|
|
595
604
|
* 2. XML-style closing tags `</tagname>`
|
|
596
605
|
* 3. HTML comment START/END `<!-- NAME:START -->` / `<!-- NAME:END -->`
|
|
597
606
|
*
|
|
598
607
|
* Tags with attributes (e.g., `<task id="42">`) are not currently emitted
|
|
599
|
-
* by any
|
|
608
|
+
* by any runtime extension we know of and are skipped to keep the regex tight.
|
|
600
609
|
* Markdown headers, horizontal rules, and timestamp patterns are not
|
|
601
610
|
* usable as guards because they have no closing form to verify.
|
|
602
611
|
*
|
|
@@ -676,7 +685,7 @@ function optimizeSystemPrompt(
|
|
|
676
685
|
// protected without code changes when new extensions ship.
|
|
677
686
|
//
|
|
678
687
|
// Our skills compression runs BEFORE optimizeSystemPrompt and replaces
|
|
679
|
-
//
|
|
688
|
+
// the runtime's verbose `<available_skills>` block with a compressed text
|
|
680
689
|
// section that has no XML tag. So `original` here (post-compression)
|
|
681
690
|
// does not contain `<available_skills>` and the result doesn't either
|
|
682
691
|
// — no false positive.
|
|
@@ -968,15 +977,42 @@ function getNonNegativeNumber(record: UnknownRecord, key: string): number | unde
|
|
|
968
977
|
*/
|
|
969
978
|
function getCompat(model: PiModel | undefined): CacheCompat {
|
|
970
979
|
if (!model) return {} as CacheCompat;
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
+
|
|
981
|
+
const record = model as PiModel & { compatConfig?: Record<string, unknown> };
|
|
982
|
+
return {
|
|
983
|
+
...((record.compatConfig ?? {}) as CacheCompat),
|
|
984
|
+
...((record.compat ?? {}) as CacheCompat),
|
|
985
|
+
};
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
function makePromptRewriteContextKey(sessionHash: string | undefined, model: PiModel | undefined): string | undefined {
|
|
989
|
+
if (!sessionHash || !model) return undefined;
|
|
990
|
+
return `${sessionHash}:${modelKey(model)}`;
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
function rememberPromptRewriteContext(
|
|
994
|
+
contexts: Map<string, PromptRewriteContext>,
|
|
995
|
+
key: string | undefined,
|
|
996
|
+
context: PromptRewriteContext,
|
|
997
|
+
): void {
|
|
998
|
+
if (!key) return;
|
|
999
|
+
contexts.set(key, context);
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
function getPromptRewriteContext(
|
|
1003
|
+
contexts: Map<string, PromptRewriteContext>,
|
|
1004
|
+
key: string | undefined,
|
|
1005
|
+
now = Date.now(),
|
|
1006
|
+
ttlMs = PROMPT_REWRITE_CONTEXT_TTL_MS,
|
|
1007
|
+
): PromptRewriteContext | undefined {
|
|
1008
|
+
if (!key) return undefined;
|
|
1009
|
+
const context = contexts.get(key);
|
|
1010
|
+
if (!context) return undefined;
|
|
1011
|
+
if (now - context.timestamp > ttlMs) {
|
|
1012
|
+
contexts.delete(key);
|
|
1013
|
+
return undefined;
|
|
1014
|
+
}
|
|
1015
|
+
return context;
|
|
980
1016
|
}
|
|
981
1017
|
|
|
982
1018
|
/**
|
|
@@ -1141,7 +1177,7 @@ function isGeminiLikeAssistantMessage(message: unknown, model: PiModel | undefin
|
|
|
1141
1177
|
* Check whether the model id uses Anthropic's adaptive generation (thinking)
|
|
1142
1178
|
* that requires `forceAdaptiveThinking: true` in compat.
|
|
1143
1179
|
*
|
|
1144
|
-
* Adaptive-generation models (from
|
|
1180
|
+
* Adaptive-generation models (from the bundled model catalog) include:
|
|
1145
1181
|
* claude-opus-4-6, claude-opus-4-7, claude-opus-4-8 (also dotted 4.6/4.7/4.8)
|
|
1146
1182
|
* claude-sonnet-4-6
|
|
1147
1183
|
* claude-fable-5
|
|
@@ -1162,7 +1198,7 @@ function isAdaptiveGenerationModel(model: PiModel | undefined): boolean {
|
|
|
1162
1198
|
// OMP divergence: adaptive thinking is set automatically by the OMP built-in model
|
|
1163
1199
|
// catalog (via disableAdaptiveThinking, with reversed semantics) and is NOT
|
|
1164
1200
|
// user-configurable from models.yml (see omp models.md §Anthropic compatibility).
|
|
1165
|
-
// The
|
|
1201
|
+
// The legacy `forceAdaptiveThinking` flag no longer exists. We keep model detection
|
|
1166
1202
|
// (isAdaptiveGenerationModel) for informational doctor output, but drop the fixable
|
|
1167
1203
|
// compat-suggestion path entirely.
|
|
1168
1204
|
function isAdaptiveThinkingCompatApplicable(_model: PiModel): boolean {
|
|
@@ -1734,14 +1770,14 @@ function readCacheWriteFromDetails(details: UnknownRecord | undefined): number |
|
|
|
1734
1770
|
return getFirstNonNegativeNumber(details?.cache_write_tokens, details?.cacheWriteTokens);
|
|
1735
1771
|
}
|
|
1736
1772
|
|
|
1737
|
-
//
|
|
1773
|
+
// The host runtime normalizes provider-specific raw usage (prompt_cache_hit_tokens, cached_tokens,
|
|
1738
1774
|
// cache_read_input_tokens, etc.) into a common shape:
|
|
1739
1775
|
// input = uncached prompt portion (total prompt minus cacheRead minus cacheWrite)
|
|
1740
1776
|
// cacheRead = tokens read from a previously-cached prefix
|
|
1741
1777
|
// cacheWrite= tokens newly written into cache in this request
|
|
1742
1778
|
//
|
|
1743
1779
|
// We reconstruct the total prompt-token count as input + cacheRead + cacheWrite.
|
|
1744
|
-
//
|
|
1780
|
+
// The host runtime guarantees that input, cacheRead, and cacheWrite are always present on
|
|
1745
1781
|
// assistant messages processed through its provider pipeline (at least as zero).
|
|
1746
1782
|
//
|
|
1747
1783
|
// Only DeepSeek sets allowInputOnly=true so that a cache miss (cacheRead=0) still
|
|
@@ -1757,10 +1793,10 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
|
|
|
1757
1793
|
|
|
1758
1794
|
if (!hasCacheSignal && (input === undefined || !allowInputOnly)) return undefined;
|
|
1759
1795
|
|
|
1760
|
-
// Under healthy
|
|
1796
|
+
// Under healthy runtime normalization input is the uncached portion, so
|
|
1761
1797
|
// totalInput = input + cacheRead + cacheWrite gives the full prompt token count.
|
|
1762
1798
|
// Guard against degenerate reads where a broken proxy omits prompt_tokens and
|
|
1763
|
-
//
|
|
1799
|
+
// normalized input falls to zero: totalInput must never be less than cacheRead + cacheWrite.
|
|
1764
1800
|
const computed = (input ?? 0) + (cacheRead ?? 0) + (cacheWrite ?? 0);
|
|
1765
1801
|
const floor = (cacheRead ?? 0) + (cacheWrite ?? 0);
|
|
1766
1802
|
return {
|
|
@@ -1771,8 +1807,8 @@ function getPiNormalizedUsage(message: unknown, allowInputOnly = false): UsageSn
|
|
|
1771
1807
|
}
|
|
1772
1808
|
|
|
1773
1809
|
// Raw fallback for DeepSeek responses that still carry their native usage fields.
|
|
1774
|
-
// In practice
|
|
1775
|
-
// reached when
|
|
1810
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1811
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1776
1812
|
function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1777
1813
|
const usage = usageRecordFromAssistant(message);
|
|
1778
1814
|
if (!usage) return undefined;
|
|
@@ -1789,8 +1825,8 @@ function getDeepSeekRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1789
1825
|
}
|
|
1790
1826
|
|
|
1791
1827
|
// Raw fallback for OpenAI-family responses that still carry their native usage fields.
|
|
1792
|
-
// In practice
|
|
1793
|
-
// reached when
|
|
1828
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1829
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1794
1830
|
function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1795
1831
|
const usage = usageRecordFromAssistant(message);
|
|
1796
1832
|
if (!usage) return undefined;
|
|
@@ -1812,8 +1848,8 @@ function getOpenAIRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1812
1848
|
}
|
|
1813
1849
|
|
|
1814
1850
|
// Raw fallback for Anthropic/Claude responses that still carry their native usage fields.
|
|
1815
|
-
// In practice
|
|
1816
|
-
// reached when
|
|
1851
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1852
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1817
1853
|
function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1818
1854
|
const usage = usageRecordFromAssistant(message);
|
|
1819
1855
|
if (!usage) return undefined;
|
|
@@ -1832,8 +1868,8 @@ function getAnthropicRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1832
1868
|
}
|
|
1833
1869
|
|
|
1834
1870
|
// Raw fallback for Gemini/Vertex responses that still carry their native usage fields.
|
|
1835
|
-
// In practice
|
|
1836
|
-
// reached when
|
|
1871
|
+
// In practice the runtime normalizes usage before message_end fires, so this path is only
|
|
1872
|
+
// reached when normalized fields are absent (e.g. custom/foreign providers).
|
|
1837
1873
|
function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
1838
1874
|
const record = getAssistantRecord(message);
|
|
1839
1875
|
if (!record) return undefined;
|
|
@@ -1867,8 +1903,8 @@ function getGeminiRawUsage(message: unknown): UsageSnapshot | undefined {
|
|
|
1867
1903
|
return { cacheRead, cacheWrite: 0, totalInput };
|
|
1868
1904
|
}
|
|
1869
1905
|
|
|
1870
|
-
// Try
|
|
1871
|
-
// provider pipeline). Fall back to provider-specific raw-field readers when
|
|
1906
|
+
// Try normalized usage first (always present for messages that went through the runtime's
|
|
1907
|
+
// provider pipeline). Fall back to provider-specific raw-field readers when normalized
|
|
1872
1908
|
// fields are absent (e.g. messages from custom/foreign providers whose raw usage shape
|
|
1873
1909
|
// matches the official API).
|
|
1874
1910
|
function normalizeWithFallback(
|
|
@@ -1971,13 +2007,6 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
|
|
|
1971
2007
|
return true;
|
|
1972
2008
|
}
|
|
1973
2009
|
if (Array.isArray(record.system) && record.system.length > 0) {
|
|
1974
|
-
// Replace first text block, keep structure
|
|
1975
|
-
const first = asRecord(record.system[0]);
|
|
1976
|
-
if (first && typeof first.text === "string") {
|
|
1977
|
-
first.text = text;
|
|
1978
|
-
return true;
|
|
1979
|
-
}
|
|
1980
|
-
// Fallback: convert to single-block string form
|
|
1981
2010
|
record.system = [{ type: "text", text }];
|
|
1982
2011
|
return true;
|
|
1983
2012
|
}
|
|
@@ -1985,11 +2014,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
|
|
|
1985
2014
|
// google-generative-ai: payload.systemInstruction
|
|
1986
2015
|
const systemInstruction = asRecord(record.systemInstruction);
|
|
1987
2016
|
if (systemInstruction && Array.isArray(systemInstruction.parts) && systemInstruction.parts.length > 0) {
|
|
1988
|
-
|
|
1989
|
-
|
|
1990
|
-
firstPart.text = text;
|
|
1991
|
-
return true;
|
|
1992
|
-
}
|
|
2017
|
+
systemInstruction.parts = [{ text }];
|
|
2018
|
+
return true;
|
|
1993
2019
|
}
|
|
1994
2020
|
|
|
1995
2021
|
// openai-completions / openai-responses: payload.messages[] first system/developer message
|
|
@@ -2004,11 +2030,8 @@ function setSystemPrompt(payload: unknown, text: string): boolean {
|
|
|
2004
2030
|
return true;
|
|
2005
2031
|
}
|
|
2006
2032
|
if (Array.isArray(r.content) && r.content.length > 0) {
|
|
2007
|
-
|
|
2008
|
-
|
|
2009
|
-
first.text = text;
|
|
2010
|
-
return true;
|
|
2011
|
-
}
|
|
2033
|
+
r.content = text;
|
|
2034
|
+
return true;
|
|
2012
2035
|
}
|
|
2013
2036
|
}
|
|
2014
2037
|
}
|
|
@@ -2038,7 +2061,7 @@ function isOfficialOpenAIBaseUrl(model: PiModel): boolean {
|
|
|
2038
2061
|
}
|
|
2039
2062
|
|
|
2040
2063
|
function describeMissingOpenAIFamilyProxyCompat(_model: PiModel): string[] {
|
|
2041
|
-
// OMP divergence:
|
|
2064
|
+
// OMP divergence: the legacy `sendSessionAffinityHeaders` flag has no compat equivalent.
|
|
2042
2065
|
// OMP achieves upstream stickiness via multi-credential auth + session affinity
|
|
2043
2066
|
// in agent.db (see omp models.md §Auth). There is no required compat key for
|
|
2044
2067
|
// OpenAI-family proxies on OMP, so this returns an empty list. Optional long
|
|
@@ -2332,7 +2355,7 @@ const CACHE_PROVIDER_ADAPTERS: CacheProviderAdapter[] = [
|
|
|
2332
2355
|
|
|
2333
2356
|
return (
|
|
2334
2357
|
`💡 Cache optimizer: ${modelKey(model)} looks Claude/Anthropic-like but OpenAI-compatible compat lacks cacheControlFormat: "anthropic". ` +
|
|
2335
|
-
"
|
|
2358
|
+
"OMP may not place Anthropic cache_control breakpoints unless this endpoint supports and enables that compat flag."
|
|
2336
2359
|
);
|
|
2337
2360
|
},
|
|
2338
2361
|
},
|
|
@@ -3363,7 +3386,7 @@ function formatTokenM(value: number): string {
|
|
|
3363
3386
|
|
|
3364
3387
|
/**
|
|
3365
3388
|
* Check if an assistant message's usage fields appear to be missing or empty.
|
|
3366
|
-
* Returns true when
|
|
3389
|
+
* Returns true when normalized fields (input, cacheRead, cacheWrite) are all
|
|
3367
3390
|
* absent/zero AND raw usage fields (prompt_tokens, etc.) are also absent/zero
|
|
3368
3391
|
* for the given adapter.
|
|
3369
3392
|
*/
|
|
@@ -3371,12 +3394,12 @@ function hasMissingUsageFields(message: unknown, adapter: CacheProviderAdapter):
|
|
|
3371
3394
|
const usage = usageRecordFromAssistant(message);
|
|
3372
3395
|
if (!usage) return true;
|
|
3373
3396
|
|
|
3374
|
-
// Check
|
|
3397
|
+
// Check normalized fields
|
|
3375
3398
|
const input = getNonNegativeNumber(usage, "input");
|
|
3376
3399
|
const cacheRead = getNonNegativeNumber(usage, "cacheRead");
|
|
3377
3400
|
const cacheWrite = getNonNegativeNumber(usage, "cacheWrite");
|
|
3378
3401
|
|
|
3379
|
-
// If
|
|
3402
|
+
// If normalized fields exist with non-zero values, usage is present
|
|
3380
3403
|
if (cacheRead !== undefined || cacheWrite !== undefined || (input !== undefined && input > 0)) {
|
|
3381
3404
|
return false;
|
|
3382
3405
|
}
|
|
@@ -4052,7 +4075,7 @@ function getCompatCheckNotApplicableLines(model: PiModel): string[] {
|
|
|
4052
4075
|
if (api === "openai-codex-responses" || (api === "openai-responses" && isOfficialOpenAIBaseUrl(model))) {
|
|
4053
4076
|
return [
|
|
4054
4077
|
"ℹ️ Compat check not applicable for this model.",
|
|
4055
|
-
" Native Responses transports already use
|
|
4078
|
+
" Native Responses transports already use core runtime request handling; OpenAI-compatible proxy compat flags do not apply.",
|
|
4056
4079
|
];
|
|
4057
4080
|
}
|
|
4058
4081
|
|
|
@@ -4934,9 +4957,9 @@ function chooseFixPlacement(
|
|
|
4934
4957
|
Object.keys(compatKeys),
|
|
4935
4958
|
);
|
|
4936
4959
|
|
|
4937
|
-
// Provider-level writes cannot override a model-level compat key because
|
|
4960
|
+
// Provider-level writes cannot override a model-level compat key because the runtime's
|
|
4938
4961
|
// merge order is provider.compat then model.compat. If the active model already
|
|
4939
|
-
// has one of the keys we need to repair (e.g. thinkingFormat: "legacy"), write
|
|
4962
|
+
// has one of the keys we need to repair (e.g. thinkingFormat: \"legacy\"), write
|
|
4940
4963
|
// at model level even when the key would otherwise be provider-safe.
|
|
4941
4964
|
if (decision.placement === "provider" && existingModelKeys.length > 0) {
|
|
4942
4965
|
return {
|
|
@@ -5103,7 +5126,7 @@ function selfCheckFix(
|
|
|
5103
5126
|
}
|
|
5104
5127
|
|
|
5105
5128
|
// Step 5: Compute the EFFECTIVE merged compat (provider-level + model-level),
|
|
5106
|
-
// mirroring
|
|
5129
|
+
// mirroring the runtime's mergeCompat behavior (model wins on conflicts). The fix may
|
|
5107
5130
|
// have written either level, so validation must check the merged result.
|
|
5108
5131
|
const provCompatRaw = (provider as Record<string, unknown>).compat;
|
|
5109
5132
|
const provCompat = (provCompatRaw && typeof provCompatRaw === 'object' && !Array.isArray(provCompatRaw))
|
|
@@ -5226,7 +5249,7 @@ function backupTimestamp(): string {
|
|
|
5226
5249
|
|
|
5227
5250
|
// Internal helpers exported only so the task verification script
|
|
5228
5251
|
// (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
|
|
5229
|
-
// extension's public API;
|
|
5252
|
+
// extension's public API; the host runtime only invokes the default export below.
|
|
5230
5253
|
export const __internals_for_tests = {
|
|
5231
5254
|
buildStableCandidates,
|
|
5232
5255
|
optimizeSystemPrompt,
|
|
@@ -5421,6 +5444,10 @@ export const __internals_for_tests = {
|
|
|
5421
5444
|
hashSessionId,
|
|
5422
5445
|
makeSessionModelKey,
|
|
5423
5446
|
modelKeyFromSessionKey,
|
|
5447
|
+
makePromptRewriteContextKey,
|
|
5448
|
+
rememberPromptRewriteContext,
|
|
5449
|
+
getPromptRewriteContext,
|
|
5450
|
+
PROMPT_REWRITE_CONTEXT_TTL_MS,
|
|
5424
5451
|
filterRestorableStatsForSession,
|
|
5425
5452
|
parsePersistedRoutedModelRef,
|
|
5426
5453
|
routedModelRefToPiModel,
|
|
@@ -5494,11 +5521,10 @@ export default function (pi: ExtensionAPI) {
|
|
|
5494
5521
|
let latestCacheHint: PiCacheHintSnapshot | undefined;
|
|
5495
5522
|
// OMP divergence: prompt rewriting moved from before_agent_start to
|
|
5496
5523
|
// before_provider_request (OMP's before_agent_start can only inject messages,
|
|
5497
|
-
// not mutate systemPrompt).
|
|
5498
|
-
//
|
|
5499
|
-
|
|
5500
|
-
|
|
5501
|
-
let pendingRoutedModel: PiModel | undefined;
|
|
5524
|
+
// not mutate systemPrompt). Store prompt options per session/model so an
|
|
5525
|
+
// overlapping turn or sub-agent cannot overwrite another request's rewrite
|
|
5526
|
+
// context before before_provider_request fires.
|
|
5527
|
+
const promptRewriteContexts = new Map<string, PromptRewriteContext>();
|
|
5502
5528
|
const PERSIST_DEBOUNCE_MS = 2000;
|
|
5503
5529
|
/** In-memory recent usage samples per model key (not persisted, cleared on reload). */
|
|
5504
5530
|
const recentSamplesByModelKey = new Map<string, CacheUsageSample[]>();
|
|
@@ -5709,7 +5735,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
5709
5735
|
|
|
5710
5736
|
if (reason === "reload") {
|
|
5711
5737
|
// /reload: preserve session-scoped stats (same session hash).
|
|
5712
|
-
//
|
|
5738
|
+
// OMP extension reload creates a fresh closure, so cacheStatsByModel
|
|
5713
5739
|
// starts empty. Read persisted data and filter for current session.
|
|
5714
5740
|
lastStatusText = undefined;
|
|
5715
5741
|
lastPromptIntegrityWarningAt = 0;
|
|
@@ -5912,17 +5938,18 @@ export default function (pi: ExtensionAPI) {
|
|
|
5912
5938
|
? findModelInRegistry(_ctx.modelRegistry, routeSnapshot.provider, routeSnapshot.modelId) ?? routeSnapshotToPiModel(routeSnapshot, _ctx.model)
|
|
5913
5939
|
: undefined;
|
|
5914
5940
|
|
|
5915
|
-
// OMP divergence: before_agent_start in OMP can only inject messages (return
|
|
5916
|
-
// { message }), NOT mutate systemPrompt. We cache the prompt options + route
|
|
5917
|
-
// snapshot here so before_provider_request can apply the 3-step pipeline to
|
|
5918
|
-
// the provider payload. If OMP does not supply systemPromptOptions, skill
|
|
5919
|
-
// compression and stable-prefix reorder are skipped (only churn strip runs).
|
|
5920
5941
|
const eventRecord = asRecord(event);
|
|
5921
|
-
|
|
5922
|
-
pendingRouteSnapshot = routeSnapshot;
|
|
5923
|
-
pendingRoutedModel = routedModel ?? _ctx.model;
|
|
5924
|
-
|
|
5942
|
+
const options = (eventRecord?.systemPromptOptions as BuildSystemPromptOptions | undefined) ?? undefined;
|
|
5925
5943
|
const model = routedModel ?? _ctx.model;
|
|
5944
|
+
const contextKey = makePromptRewriteContextKey(sessionHashFromContext(_ctx), model);
|
|
5945
|
+
rememberPromptRewriteContext(promptRewriteContexts, contextKey, {
|
|
5946
|
+
options,
|
|
5947
|
+
routeSnapshot,
|
|
5948
|
+
routedModel: model,
|
|
5949
|
+
timestamp: Date.now(),
|
|
5950
|
+
});
|
|
5951
|
+
|
|
5952
|
+
const modelForHint = model;
|
|
5926
5953
|
const promptCacheKey = getSessionPromptCacheKey(_ctx);
|
|
5927
5954
|
const cacheRetention = process.env[PI_CACHE_RETENTION_ENV] === LONG_CACHE_RETENTION_VALUE ? LONG_CACHE_RETENTION_VALUE : undefined;
|
|
5928
5955
|
const rawSystemPrompt = typeof eventRecord?.systemPrompt === "string" ? eventRecord.systemPrompt : "";
|
|
@@ -5930,9 +5957,9 @@ export default function (pi: ExtensionAPI) {
|
|
|
5930
5957
|
sessionIdHash: currentSessionHashSet ? currentSessionHash : sessionHashFromContext(_ctx),
|
|
5931
5958
|
virtualProvider: routeSnapshot?.virtualProvider ?? _ctx.model?.provider,
|
|
5932
5959
|
virtualModelId: routeSnapshot?.virtualModelId ?? _ctx.model?.id,
|
|
5933
|
-
upstreamProvider: routeSnapshot?.provider ??
|
|
5934
|
-
upstreamModelId: routeSnapshot?.modelId ??
|
|
5935
|
-
api:
|
|
5960
|
+
upstreamProvider: routeSnapshot?.provider ?? modelForHint?.provider,
|
|
5961
|
+
upstreamModelId: routeSnapshot?.modelId ?? modelForHint?.id,
|
|
5962
|
+
api: modelForHint?.api,
|
|
5936
5963
|
systemPrompt: rawSystemPrompt,
|
|
5937
5964
|
promptCacheKey,
|
|
5938
5965
|
cacheRetention,
|
|
@@ -5960,21 +5987,24 @@ export default function (pi: ExtensionAPI) {
|
|
|
5960
5987
|
requestModel &&
|
|
5961
5988
|
!isResponsesPromptRewriteBypassApi(requestModel.api)
|
|
5962
5989
|
) {
|
|
5990
|
+
const contextKey = makePromptRewriteContextKey(sessionHashFromContext(ctx), requestModel);
|
|
5991
|
+
const rewriteContext = getPromptRewriteContext(promptRewriteContexts, contextKey);
|
|
5992
|
+
const promptOptions = rewriteContext?.options;
|
|
5963
5993
|
const original = extractSystemPrompt(resultPayload);
|
|
5964
5994
|
if (original && original.trim().length > 0) {
|
|
5965
5995
|
// Step 1: strip per-turn churn from <session-overview>.
|
|
5966
5996
|
const stripped = stripSessionOverviewChurn(original);
|
|
5967
5997
|
|
|
5968
5998
|
// Step 2: compress skills XML → one-line index (requires cached options).
|
|
5969
|
-
const compressed =
|
|
5970
|
-
? compressSkillsInSystemPrompt(stripped,
|
|
5999
|
+
const compressed = promptOptions
|
|
6000
|
+
? compressSkillsInSystemPrompt(stripped, promptOptions)
|
|
5971
6001
|
: stripped;
|
|
5972
6002
|
|
|
5973
6003
|
// Step 3: lift stable content above dynamic content (requires cached options).
|
|
5974
6004
|
let finalPrompt = compressed;
|
|
5975
6005
|
let changed = false;
|
|
5976
|
-
if (
|
|
5977
|
-
const optimized = optimizeSystemPrompt(compressed,
|
|
6006
|
+
if (promptOptions) {
|
|
6007
|
+
const optimized = optimizeSystemPrompt(compressed, promptOptions);
|
|
5978
6008
|
if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
|
|
5979
6009
|
finalPrompt = optimized.systemPrompt;
|
|
5980
6010
|
changed = true;
|
|
@@ -6098,7 +6128,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6098
6128
|
// (no args) — interactive menu (with UI) or help summary
|
|
6099
6129
|
// ────────────────────────────────────────────────────────────────
|
|
6100
6130
|
pi.registerCommand("cache-optimizer", {
|
|
6101
|
-
description: "Diagnose
|
|
6131
|
+
description: "Diagnose OMP cache configuration",
|
|
6102
6132
|
handler: async (args: string, cmdCtx) => {
|
|
6103
6133
|
syncSessionHash(cmdCtx);
|
|
6104
6134
|
const selectedModel = cmdCtx.model;
|
|
@@ -6110,16 +6140,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
6110
6140
|
resetCurrentSessionStats();
|
|
6111
6141
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6112
6142
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6113
|
-
cmdCtx.ui.notify(`✅
|
|
6143
|
+
cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6114
6144
|
} else if (subcommand === "disable") {
|
|
6115
6145
|
setRuntimeOptimizerEnabled(false);
|
|
6116
6146
|
resetCurrentSessionStats();
|
|
6117
6147
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6118
6148
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6119
|
-
cmdCtx.ui.notify(`⏸️
|
|
6149
|
+
cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6120
6150
|
} else if (subcommand === "doctor") {
|
|
6121
6151
|
if (!model) {
|
|
6122
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6152
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6123
6153
|
return;
|
|
6124
6154
|
}
|
|
6125
6155
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
@@ -6134,7 +6164,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6134
6164
|
cmdCtx.ui.notify(fullDiagnosis, "info");
|
|
6135
6165
|
} else if (subcommand === "stats") {
|
|
6136
6166
|
if (!model) {
|
|
6137
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6167
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6138
6168
|
return;
|
|
6139
6169
|
}
|
|
6140
6170
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6145,7 +6175,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6145
6175
|
cmdCtx.ui.notify(output, "info");
|
|
6146
6176
|
} else if (subcommand === "compat") {
|
|
6147
6177
|
if (!model) {
|
|
6148
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6178
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6149
6179
|
return;
|
|
6150
6180
|
}
|
|
6151
6181
|
const compatResult = buildCompatDiagnosis(model);
|
|
@@ -6161,7 +6191,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6161
6191
|
}
|
|
6162
6192
|
} else if (subcommand === "reset") {
|
|
6163
6193
|
if (!model) {
|
|
6164
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6194
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6165
6195
|
return;
|
|
6166
6196
|
}
|
|
6167
6197
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6186,12 +6216,12 @@ export default function (pi: ExtensionAPI) {
|
|
|
6186
6216
|
cmdCtx.ui.notify(
|
|
6187
6217
|
`✅ Reset local session cache stats for "${displayKey}". ` +
|
|
6188
6218
|
"Upstream provider prompt cache was not modified. " +
|
|
6189
|
-
"New requests will start a fresh stats bucket for this
|
|
6219
|
+
"New requests will start a fresh stats bucket for this OMP session.",
|
|
6190
6220
|
"info",
|
|
6191
6221
|
);
|
|
6192
6222
|
} else if (subcommand === "fix") {
|
|
6193
6223
|
if (!model) {
|
|
6194
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6224
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6195
6225
|
return;
|
|
6196
6226
|
}
|
|
6197
6227
|
|
|
@@ -6239,16 +6269,16 @@ export default function (pi: ExtensionAPI) {
|
|
|
6239
6269
|
resetCurrentSessionStats();
|
|
6240
6270
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6241
6271
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6242
|
-
cmdCtx.ui.notify(`✅
|
|
6272
|
+
cmdCtx.ui.notify(`✅ OMP Cache Optimizer enabled for this OMP process. Current-session stats were reset for before/after comparison.\n${formatOptimizerRuntimeMode()}`, "info");
|
|
6243
6273
|
} else if (choice === menuOptions[1]) {
|
|
6244
6274
|
setRuntimeOptimizerEnabled(false);
|
|
6245
6275
|
resetCurrentSessionStats();
|
|
6246
6276
|
await flushPersistCacheStats(cmdCtx as unknown as ExtensionContext);
|
|
6247
6277
|
await publishStatus(cmdCtx as unknown as ExtensionContext, model);
|
|
6248
|
-
cmdCtx.ui.notify(`⏸️
|
|
6278
|
+
cmdCtx.ui.notify(`⏸️ OMP Cache Optimizer disabled for this OMP process. Current-session stats were reset and will keep collecting while disabled for comparison.\n${formatOptimizerRuntimeMode()}`, "warning");
|
|
6249
6279
|
} else if (choice === menuOptions[2]) {
|
|
6250
6280
|
if (!model) {
|
|
6251
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6281
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6252
6282
|
} else {
|
|
6253
6283
|
const diagnosis = buildDoctorDiagnosis(model, { promptCacheRetention400: promptCacheRetention400Models.has(modelKey(model)) });
|
|
6254
6284
|
const adapter = selectAdapterForModel(model);
|
|
@@ -6263,7 +6293,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6263
6293
|
}
|
|
6264
6294
|
} else if (choice === menuOptions[3]) {
|
|
6265
6295
|
if (!model) {
|
|
6266
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6296
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6267
6297
|
} else {
|
|
6268
6298
|
const adapter = selectAdapterForModel(model);
|
|
6269
6299
|
const sk = model ? sessionModelKey(model) : undefined;
|
|
@@ -6274,7 +6304,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6274
6304
|
}
|
|
6275
6305
|
} else if (choice === menuOptions[4]) {
|
|
6276
6306
|
if (!model) {
|
|
6277
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6307
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6278
6308
|
} else {
|
|
6279
6309
|
const compatResult = buildCompatDiagnosis(model);
|
|
6280
6310
|
if (compatResult) {
|
|
@@ -6291,7 +6321,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6291
6321
|
} else if (choice === menuOptions[5]) {
|
|
6292
6322
|
// Fix — auto-fix compat issues
|
|
6293
6323
|
if (!model) {
|
|
6294
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6324
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6295
6325
|
return;
|
|
6296
6326
|
}
|
|
6297
6327
|
const suggestion = buildFixSuggestion(model);
|
|
@@ -6316,7 +6346,7 @@ export default function (pi: ExtensionAPI) {
|
|
|
6316
6346
|
);
|
|
6317
6347
|
} else if (choice === menuOptions[6]) {
|
|
6318
6348
|
if (!model) {
|
|
6319
|
-
cmdCtx.ui.notify("No active model selected. Select a model first with /model or
|
|
6349
|
+
cmdCtx.ui.notify("No active model selected. Select a model first with /model or omp --model.", "warning");
|
|
6320
6350
|
} else {
|
|
6321
6351
|
const adapter = selectAdapterForModel(model);
|
|
6322
6352
|
if (!adapter) {
|
|
@@ -6341,8 +6371,8 @@ export default function (pi: ExtensionAPI) {
|
|
|
6341
6371
|
// Fallback: text help when no interactive UI
|
|
6342
6372
|
const diagnosis: string[] = [];
|
|
6343
6373
|
diagnosis.push("📋 /cache-optimizer commands:");
|
|
6344
|
-
diagnosis.push(" enable — Enable prompt/cache optimizations for this
|
|
6345
|
-
diagnosis.push(" disable — Disable prompt/cache optimizations for this
|
|
6374
|
+
diagnosis.push(" enable — Enable prompt/cache optimizations for this OMP process");
|
|
6375
|
+
diagnosis.push(" disable — Disable prompt/cache optimizations for this OMP process");
|
|
6346
6376
|
diagnosis.push(" doctor — Show current model/provider/api/baseUrl/compat and low-hit diagnosis");
|
|
6347
6377
|
diagnosis.push(" stats — Show active model stats bucket and recent trend");
|
|
6348
6378
|
diagnosis.push(" compat — Show compat suggestion with edit location");
|
package/package.json
CHANGED