pi-cache-optimizer 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.ts +34 -0
  2. package/package.json +1 -1
package/index.ts CHANGED
@@ -1433,6 +1433,40 @@ export default function (pi: ExtensionAPI) {
1433
1433
  });
1434
1434
 
1435
1435
  pi.on("before_agent_start", async (event, _ctx) => {
1436
+ // ────────────────────────────────────────────────────────────────
1437
+ // OpenAI Responses API bypass (codex-responses + responses)
1438
+ //
1439
+ // OpenAI's Responses API endpoints — both the Codex backend
1440
+ // (openai-codex-responses, chatgpt.com) and the public
1441
+ // Responses API (openai-responses, api.openai.com / Copilot) —
1442
+ // have two properties that make client-side prompt reordering
1443
+ // unnecessary and potentially harmful:
1444
+ //
1445
+ // 1. Server-managed caching: both APIs send `prompt_cache_key`
1446
+ // (= Pi session id) in every request body, so the server
1447
+ // already maintains a stable cache without prefix ordering.
1448
+ // Client-side reordering adds no cache benefit.
1449
+ //
1450
+ // 2. Stricter content-safety filtering: the Codex backend in
1451
+ // particular has a product-level safety filter that flags
1452
+ // reordered prompts (tool snippets / guidelines lifted above
1453
+ // the assistant role) as potential prompt-injection, returning
1454
+ // `content_filter` and blocking tool calls (notably
1455
+ // `subagent`). The public Responses API shares the same
1456
+ // filter framework and could behave similarly.
1457
+ //
1458
+ // We therefore skip ALL prompt modifications (churn strip, skill
1459
+ // compression, reorder) for these APIs. Third-party providers
1460
+ // that use openai-completions are unaffected.
1461
+ // ────────────────────────────────────────────────────────────────
1462
+ const model = _ctx.model;
1463
+ if (model) {
1464
+ const api = lower(model.api);
1465
+ if (api === "openai-codex-responses" || api === "openai-responses") {
1466
+ return {};
1467
+ }
1468
+ }
1469
+
1436
1470
  // Step 1: strip per-turn churn from <session-overview>.
1437
1471
  // Removing RECENT COMMITS, Working directory status, and
1438
1472
  // Journal line count makes more of the session-overview stable
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-cache-optimizer",
3
- "version": "2.2.1",
3
+ "version": "2.3.0",
4
4
  "description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
5
5
  "keywords": [
6
6
  "pi-package",