pi-cache-optimizer 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ import {
7
7
  } from "node:fs";
8
8
  import { mkdir, readFile, rename, unlink, writeFile } from "node:fs/promises";
9
9
  import { homedir } from "node:os";
10
- import { join } from "node:path";
10
+ import { dirname, join } from "node:path";
11
11
  import type { BuildSystemPromptOptions, ExtensionAPI, ExtensionContext } from "@earendil-works/pi-coding-agent";
12
12
 
13
13
  /**
@@ -47,8 +47,47 @@ const CACHE_PROVIDER_IDS: CacheProviderId[] = ["deepseek", "openai", "claude", "
47
47
  const OPENAI_CACHE_KEY_ENV = "PI_CACHE_OPTIMIZER_OPENAI_CACHE_KEY";
48
48
  const OPENAI_PROMPT_CACHE_KEY_PREFIX = "pi-dsco-";
49
49
  const NO_AUTO_CONFIG_ENV = "PI_CACHE_OPTIMIZER_NO_AUTO_CONFIG";
50
+ const NO_SKILL_COMPRESSION_ENV = "PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION";
50
51
  const DEEPSEEK_API_KEY_ENV = "DEEPSEEK_API_KEY";
51
52
 
53
+ // WORM-flag: if optimizeSystemPrompt ever detects that its blind-replace
54
+ // logic has accidentally truncated the trellis `<workflow-state>` block
55
+ // (or any structural marker from an upstream extension), we flip this.
56
+ // publishStatus reads it once, appends a footer warning, then resets it.
57
+ // The flag surface is kept separate from the regular cache-stats counter
58
+ // so that a one-turn glitch doesn't poison the persisted metrics.
59
+ let promptTruncationDetected = false;
60
+
61
+ // Minimum count of skills before compression is worth applying.
62
+ // Below this, pi's verbose XML block is small enough that the overhead of
63
+ // an additional one-line index isn't worth the loss of per-skill
64
+ // description hints. The 31-skill snapshot in this repo was 13.3 KB; one
65
+ // or two skills is well under 1 KB and not worth touching.
66
+ const SKILL_COMPRESSION_MIN_COUNT = 4;
67
+
68
+ // Minimum trimmed length for a candidate to qualify as a stable-prefix "part".
69
+ //
70
+ // `optimizeSystemPrompt` removes each accepted candidate from the dynamic
71
+ // remainder via `rest.replace(part, "")`. Short or character-class candidates
72
+ // (think: `S`, `- u`, `- (`, `- }`) match the FIRST occurrence of those bytes
73
+ // anywhere in `rest`, ripping unrelated text out of the prompt and yielding a
74
+ // non-deterministic dynamic remainder per request. Both behaviors poison the
75
+ // provider's prompt-prefix cache.
76
+ //
77
+ // The threshold also caps the upstream string-vs-array regression we saw with
78
+ // trellis 0.5.16 / 0.6.0-beta.17 (subagent tool registration passing
79
+ // `promptGuidelines: "<long string>"` instead of `["<long string>"]`, which
80
+ // pi then iterates char-by-char). Even if a similar bug recurs upstream, this
81
+ // extension will not lift its single-character byproducts into the stable
82
+ // prefix candidate list.
83
+ //
84
+ // 8 chars is comfortably above all single-bullet (`- X` = 3 chars) and
85
+ // short-token noise while leaving every legitimate guideline / tool snippet /
86
+ // context-file payload above the bar. If a real future guideline is shorter
87
+ // than 8 chars, the cost is that it is not lifted into the stable prefix; the
88
+ // dynamic-remainder path still includes it untouched.
89
+ const MIN_STABLE_CANDIDATE_LENGTH = 8;
90
+
52
91
  const ASSISTANT_MESSAGE_MODEL_TOKEN_KEYS = ["model", "name"];
53
92
  const OPENAI_REASONING_MODEL_PATTERN = /(^|[/\s:_-])o[1345]($|[-_.:/\s])/;
54
93
 
@@ -143,6 +182,121 @@ function formatSkillsForPrompt(skills: NonNullable<BuildSystemPromptOptions["ski
143
182
  return lines.join("\n");
144
183
  }
145
184
 
185
+ /**
186
+ * Compressed alternative to `formatSkillsForPrompt`.
187
+ *
188
+ * Pi emits a four-line XML block per skill (`<name>`, `<description>`,
189
+ * `<location>`) plus a three-sentence preamble. With 31 skills active in
190
+ * this repo that block measured 13.3 KB — 61.5 % of the total system
191
+ * prompt. The full description text matters when the model has to decide
192
+ * which skill to load, but the model can read SKILL.md on demand: the
193
+ * names alone plus a known location pattern is enough to identify
194
+ * candidates.
195
+ *
196
+ * This compressed form preserves:
197
+ * 1. The instruction to read SKILL.md when a task matches a skill name.
198
+ * 2. The relative-path resolution rule (parent of SKILL.md is the
199
+ * skill directory).
200
+ * 3. Discoverability of every skill: name + location prefix per skill.
201
+ *
202
+ * It drops:
203
+ * - Per-skill description text (model loads it via `read` when a name
204
+ * matches a task).
205
+ * - The `<available_skills>` XML envelope and per-skill XML overhead
206
+ * (~110 bytes per skill of pure structure, plus the location path).
207
+ *
208
+ * Output shape is a single text block grouped by skill-root directory so
209
+ * the model can compute each skill's full path by name. Names are sorted
210
+ * alphabetically within each group for determinism (cache stability).
211
+ */
212
+ function formatSkillsForPromptCompressed(
213
+ skills: NonNullable<BuildSystemPromptOptions["skills"]>,
214
+ ): string {
215
+ const visibleSkills = skills.filter((skill) => !skill.disableModelInvocation);
216
+ if (visibleSkills.length === 0) return "";
217
+
218
+ const groups = new Map<string, string[]>();
219
+ for (const skill of visibleSkills) {
220
+ // skill.filePath = .../<skill-name>/SKILL.md, so dirname is the
221
+ // skill directory and dirname-of-dirname is the skills root.
222
+ const skillDir = dirname(skill.filePath);
223
+ const root = dirname(skillDir);
224
+ const list = groups.get(root) ?? [];
225
+ list.push(skill.name);
226
+ groups.set(root, list);
227
+ }
228
+
229
+ // Sort group entries by root for determinism: same skill set under the
230
+ // same roots must always produce the same string, otherwise the
231
+ // provider prompt-prefix cache loses on prompt builder runs that
232
+ // happened to iterate the underlying Map in different orders.
233
+ const sortedGroups = [...groups.entries()].sort(([a], [b]) =>
234
+ a < b ? -1 : a > b ? 1 : 0,
235
+ );
236
+
237
+ const lines: string[] = [
238
+ "",
239
+ "",
240
+ "The following skills provide specialized instructions for specific tasks. When a skill name matches the task you are doing, read the SKILL.md at the listed location to load the full instructions. When a SKILL.md references a relative path, resolve it against the skill directory (parent of SKILL.md / dirname of the path) and use that absolute path in tool commands.",
241
+ ];
242
+
243
+ for (const [root, names] of sortedGroups) {
244
+ names.sort();
245
+ lines.push("");
246
+ lines.push(`Skills under ${root}/<name>/SKILL.md:`);
247
+ // Wrap the name list at ~80 columns for readability without
248
+ // affecting determinism. Each line is ` name1, name2, name3,`.
249
+ let buf = " ";
250
+ for (let i = 0; i < names.length; i++) {
251
+ const name = names[i];
252
+ const piece = (buf === " " ? "" : ", ") + name;
253
+ if (buf.length > 2 && buf.length + piece.length > 80) {
254
+ lines.push(`${buf},`);
255
+ buf = ` ${name}`;
256
+ } else {
257
+ buf += piece;
258
+ }
259
+ }
260
+ if (buf.length > 2) lines.push(buf);
261
+ }
262
+
263
+ return lines.join("\n");
264
+ }
265
+
266
+ /**
267
+ * Replace pi's verbose `<available_skills>` block in `prompt` with the
268
+ * compressed one-index form. Idempotent: if the verbose form is not
269
+ * present (compression already applied, or skill count below threshold),
270
+ * the prompt is returned unchanged.
271
+ *
272
+ * Opt-out: set `PI_CACHE_OPTIMIZER_NO_SKILL_COMPRESSION=1`.
273
+ *
274
+ * Pre-conditions for compression to fire:
275
+ * - opts.skills present and visible-skill count >= SKILL_COMPRESSION_MIN_COUNT
276
+ * - Verbose block (built from the same `opts.skills`) is found in
277
+ * `prompt` (substring match, no regex). This anchors the substitution
278
+ * to pi's own emitter; if pi changes the format, we no-op rather
279
+ * than mangle.
280
+ */
281
+ function compressSkillsInSystemPrompt(
282
+ prompt: string,
283
+ opts: BuildSystemPromptOptions,
284
+ ): string {
285
+ if (isEnabledEnv(process.env[NO_SKILL_COMPRESSION_ENV])) return prompt;
286
+ if (!opts.skills || opts.skills.length === 0) return prompt;
287
+
288
+ const visible = opts.skills.filter((skill) => !skill.disableModelInvocation);
289
+ if (visible.length < SKILL_COMPRESSION_MIN_COUNT) return prompt;
290
+
291
+ const verbose = formatSkillsForPrompt(opts.skills);
292
+ if (!verbose || !prompt.includes(verbose)) return prompt;
293
+
294
+ const compressed = formatSkillsForPromptCompressed(opts.skills);
295
+ if (!compressed || compressed.length >= verbose.length) return prompt;
296
+
297
+ return prompt.replace(verbose, compressed);
298
+ }
299
+
146
300
  function buildStableCandidates(opts: BuildSystemPromptOptions): string[] {
147
301
  const candidates: string[] = [];
148
302
 
@@ -172,12 +326,67 @@ function buildStableCandidates(opts: BuildSystemPromptOptions): string[] {
172
326
  }
173
327
 
174
328
  if (opts.skills && opts.skills.length > 0) {
329
+ // Push BOTH forms so `optimizeSystemPrompt` finds whichever is
330
+ // actually present in the prompt. The `rest.includes(part)`
331
+ // short-circuit skips the form that isn't there. The two strings
332
+ // are mutually distinguishable (the verbose form contains the
333
+ // literal `<available_skills>` envelope; the compressed form
334
+ // contains `Skills under ` and no XML tags) so they cannot
335
+ // accidentally match each other.
175
336
  candidates.push(formatSkillsForPrompt(opts.skills));
337
+ candidates.push(formatSkillsForPromptCompressed(opts.skills));
176
338
  }
177
339
 
178
340
  return candidates;
179
341
  }
180
342
 
343
+ /**
344
+ * Strip per-turn churn from trellis `<session-overview>` block.
345
+ *
346
+ * Trellis injects a session-overview that includes `RECENT COMMITS`
347
+ * (shifts on every git commit), `Working directory: Clean/N uncommitted`
348
+ * (shifts on every edit/commit), and `Line count: N / 2000` (shifts on
349
+ * every journal append). These fields are at the tail of the
350
+ * session-overview and poison the prompt-prefix cache for everything
351
+ * that follows.
352
+ *
353
+ * This function surgically removes those three churn fields from the
354
+ * `<session-overview>...</session-overview>` block. The remaining
355
+ * fields (DEVELOPER, GIT STATUS branch-only, CURRENT TASK, ACTIVE
356
+ * TASKS, MY TASKS, JOURNAL FILE active-file-only, PACKAGES, PATHS)
357
+ * are stable within a session and become cache-friendlier.
358
+ *
359
+ * No-op when the `<session-overview>` tag is not present (e.g.
360
+ * trellis hook chose not to inject it, or a different extension
361
+ * owns the prompt).
362
+ */
363
+ function stripSessionOverviewChurn(prompt: string): string {
364
+ const startTag = "<session-overview>";
365
+ const endTag = "</session-overview>";
366
+
367
+ const startIdx = prompt.indexOf(startTag);
368
+ if (startIdx === -1) return prompt;
369
+
370
+ const endIdx = prompt.indexOf(endTag, startIdx + startTag.length);
371
+ if (endIdx === -1) return prompt;
372
+
373
+ const before = prompt.slice(0, startIdx + startTag.length);
374
+ const inner = prompt.slice(startIdx + startTag.length, endIdx);
375
+ const after = prompt.slice(endIdx);
376
+
377
+ let cleaned = inner
378
+ // Drop the RECENT COMMITS section (from the heading through the
379
+ // next heading or end of inner). The model sees commit history
380
+ // via `git log`; carrying it in every system prompt is redundant.
381
+ .replace(/\n## RECENT COMMITS\n[\s\S]*?(?=\n## |$)/, "")
382
+ // Drop "Working directory: ..." (Git status tail churn).
383
+ .replace(/\nWorking directory:[^\n]*/g, "")
384
+ // Drop "Line count: N / NNNN" (Journal tail churn).
385
+ .replace(/\nLine count:[^\n]*/g, "");
386
+
387
+ return before + cleaned + after;
388
+ }
389
+
181
390
  function optimizeSystemPrompt(
182
391
  original: string,
183
392
  opts: BuildSystemPromptOptions,
@@ -187,9 +396,11 @@ function optimizeSystemPrompt(
187
396
  let rest = original;
188
397
 
189
398
  // Stable layer: content likely to be identical across sessions/turns.
399
+ // Short / single-char candidates are dropped: see MIN_STABLE_CANDIDATE_LENGTH.
190
400
  for (const candidate of buildStableCandidates(opts)) {
191
401
  const part = candidate.trim();
192
- if (!part || seen.has(part) || !rest.includes(part)) continue;
402
+ if (!part || part.length < MIN_STABLE_CANDIDATE_LENGTH) continue;
403
+ if (seen.has(part) || !rest.includes(part)) continue;
193
404
 
194
405
  stableParts.push(part);
195
406
  seen.add(part);
@@ -205,10 +416,27 @@ function optimizeSystemPrompt(
205
416
  return { systemPrompt: original, stablePrefix: "", changed: false };
206
417
  }
207
418
 
419
+ const systemPrompt =
420
+ stablePrefix +
421
+ (dynamicRemainder.length > 0 ? "\n\n---\n\n" + dynamicRemainder : "");
422
+
423
+ // Sanity check: if trellis (or another extension) injected structural
424
+ // markers into the prompt that happen to share a substring with one of
425
+ // our stable candidates, the blind `rest.replace(part, "")` could
426
+ // silently eat part of the dynamic layer. We anchor on
427
+ // `<workflow-state>` because it is the most stable structural marker
428
+ // trellis emits and is never a stable candidate itself.
429
+ //
430
+ // When the marker was present in the original but is missing in the
431
+ // result, the reorder is unsafe — fall back to the original prompt
432
+ // so the model gets a complete prompt, and flag the footer warning.
433
+ if (original.includes("<workflow-state>") && !systemPrompt.includes("<workflow-state>")) {
434
+ promptTruncationDetected = true;
435
+ return { systemPrompt: original, stablePrefix: "", changed: false };
436
+ }
437
+
208
438
  return {
209
- systemPrompt:
210
- stablePrefix +
211
- (dynamicRemainder.length > 0 ? "\n\n---\n\n" + dynamicRemainder : ""),
439
+ systemPrompt,
212
440
  stablePrefix,
213
441
  changed: true,
214
442
  };
@@ -1005,6 +1233,20 @@ function emitDeepseekApiKeyHintIfNeeded(
1005
1233
  );
1006
1234
  }
1007
1235
 
1236
+ // Internal helpers exported only so the task verification script
1237
+ // (.trellis/tasks/.../verify.ts) can exercise them. They are not part of the
1238
+ // extension's public API; pi only invokes the default export below.
1239
+ export const __internals_for_tests = {
1240
+ buildStableCandidates,
1241
+ optimizeSystemPrompt,
1242
+ stripSessionOverviewChurn,
1243
+ formatSkillsForPrompt,
1244
+ formatSkillsForPromptCompressed,
1245
+ compressSkillsInSystemPrompt,
1246
+ MIN_STABLE_CANDIDATE_LENGTH,
1247
+ SKILL_COMPRESSION_MIN_COUNT,
1248
+ };
1249
+
1008
1250
  export default function (pi: ExtensionAPI) {
1009
1251
  const warnedModels = new Set<string>();
1010
1252
  let cacheStatsByProvider: Partial<Record<CacheProviderId, CacheStats>> = emptyAllCacheStats();
@@ -1086,7 +1328,17 @@ export default function (pi: ExtensionAPI) {
1086
1328
  await rollOverStatsIfNeeded(ctx);
1087
1329
 
1088
1330
  const adapter = selectAdapterForModel(model);
1089
- const statusText = adapter ? formatCacheStats(adapter, getStatsForAdapter(adapter)) : undefined;
1331
+ let statusText: string | undefined = adapter ? formatCacheStats(adapter, getStatsForAdapter(adapter)) : undefined;
1332
+
1333
+ // If optimizeSystemPrompt detected structural truncation on this or
1334
+ // a recent turn, flag it once in the footer so the user knows to
1335
+ // /reload before continuing. The flag resets after emission so a
1336
+ // single-turn glitch does not permanently taint the footer.
1337
+ if (promptTruncationDetected && statusText !== undefined) {
1338
+ statusText = statusText + " ⚠️ integrity";
1339
+ promptTruncationDetected = false;
1340
+ }
1341
+
1090
1342
  if (statusText === lastStatusText) return;
1091
1343
 
1092
1344
  lastStatusText = statusText;
@@ -1111,13 +1363,45 @@ export default function (pi: ExtensionAPI) {
1111
1363
  });
1112
1364
 
1113
1365
  pi.on("before_agent_start", async (event, _ctx) => {
1114
- const optimized = optimizeSystemPrompt(event.systemPrompt, event.systemPromptOptions);
1366
+ // Step 1: strip per-turn churn from <session-overview>.
1367
+ // Removing RECENT COMMITS, Working directory status, and
1368
+ // Journal line count makes more of the session-overview stable
1369
+ // across turns, which DeepSeek's prefix cache can then retain.
1370
+ const strippedPrompt = stripSessionOverviewChurn(event.systemPrompt);
1371
+
1372
+ // Step 2: compress skills XML → one-line index.
1373
+ // The compressed form is identical-string-equivalent to the
1374
+ // verbose one as far as cache-stability is concerned because both
1375
+ // are deterministic from the same `event.systemPromptOptions.skills`.
1376
+ // No-op if opted out, below SKILL_COMPRESSION_MIN_COUNT, or if pi
1377
+ // emitted a format we don't recognize.
1378
+ const compressedPrompt = compressSkillsInSystemPrompt(
1379
+ strippedPrompt,
1380
+ event.systemPromptOptions,
1381
+ );
1382
+
1383
+ // Step 3: lift stable content above dynamic content for cache
1384
+ // stability. Operates on the (stripped + compressed) prompt so the
1385
+ // cache key derived from `stablePrefix` reflects what actually
1386
+ // ships to the provider.
1387
+ const optimized = optimizeSystemPrompt(compressedPrompt, event.systemPromptOptions);
1115
1388
  latestPromptCacheKey = buildPromptCacheKey(optimized.stablePrefix);
1116
1389
 
1117
1390
  if (optimized.changed && optimized.systemPrompt.trim().length > 0) {
1118
1391
  return { systemPrompt: optimized.systemPrompt };
1119
1392
  }
1120
1393
 
1394
+ // Reorder didn't apply but compression might have. Return the
1395
+ // compressed (or stripped) prompt directly so we still benefit from
1396
+ // the volume cut even when reorder is a no-op (e.g., short sessions
1397
+ // where no stable candidate is long enough).
1398
+ if (compressedPrompt !== strippedPrompt && compressedPrompt.trim().length > 0) {
1399
+ return { systemPrompt: compressedPrompt };
1400
+ }
1401
+ if (strippedPrompt !== event.systemPrompt && strippedPrompt.trim().length > 0) {
1402
+ return { systemPrompt: strippedPrompt };
1403
+ }
1404
+
1121
1405
  return {};
1122
1406
  });
1123
1407
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.0.1",
3
+ "version": "2.1.0",
4
4
  "description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -16,10 +16,12 @@
16
16
  "author": "freescheme",
17
17
  "license": "MIT",
18
18
  "files": [
19
- "extension.ts"
19
+ "index.ts"
20
20
  ],
21
21
  "pi": {
22
- "extensions": ["./extension.ts"],
22
+ "extensions": [
23
+ "./index.ts"
24
+ ],
23
25
  "image": "https://img.shields.io/badge/Pi-Cache%20Optimizer-4A90D9"
24
26
  },
25
27
  "peerDependencies": {
@@ -27,6 +29,6 @@
27
29
  },
28
30
  "repository": {
29
31
  "type": "git",
30
- "url": "git+https://github.com/jiangge/pi-deepseek-cache-optimizer.git"
32
+ "url": "git+https://github.com/jiangge/pi-cache-optimizer.git"
31
33
  }
32
34
  }