pi-cache-optimizer 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +34 -0
- package/package.json +1 -1
package/index.ts
CHANGED
|
@@ -1433,6 +1433,40 @@ export default function (pi: ExtensionAPI) {
|
|
|
1433
1433
|
});
|
|
1434
1434
|
|
|
1435
1435
|
pi.on("before_agent_start", async (event, _ctx) => {
|
|
1436
|
+
// ────────────────────────────────────────────────────────────────
|
|
1437
|
+
// OpenAI Responses API bypass (codex-responses + responses)
|
|
1438
|
+
//
|
|
1439
|
+
// OpenAI's Responses API endpoints — both the Codex backend
|
|
1440
|
+
// (openai-codex-responses, chatgpt.com) and the public
|
|
1441
|
+
// Responses API (openai-responses, api.openai.com / Copilot) —
|
|
1442
|
+
// have two properties that make client-side prompt reordering
|
|
1443
|
+
// unnecessary and potentially harmful:
|
|
1444
|
+
//
|
|
1445
|
+
// 1. Server-managed caching: both APIs send `prompt_cache_key`
|
|
1446
|
+
// (= Pi session id) in every request body, so the server
|
|
1447
|
+
// already maintains a stable cache without prefix ordering.
|
|
1448
|
+
// Client-side reordering adds no cache benefit.
|
|
1449
|
+
//
|
|
1450
|
+
// 2. Stricter content-safety filtering: the Codex backend in
|
|
1451
|
+
// particular has a product-level safety filter that flags
|
|
1452
|
+
// reordered prompts (tool snippets / guidelines lifted above
|
|
1453
|
+
// the assistant role) as potential prompt-injection, returning
|
|
1454
|
+
// `content_filter` and blocking tool calls (notably
|
|
1455
|
+
// `subagent`). The public Responses API shares the same
|
|
1456
|
+
// filter framework and could behave similarly.
|
|
1457
|
+
//
|
|
1458
|
+
// We therefore skip ALL prompt modifications (churn strip, skill
|
|
1459
|
+
// compression, reorder) for these APIs. Third-party providers
|
|
1460
|
+
// that use openai-completions are unaffected.
|
|
1461
|
+
// ────────────────────────────────────────────────────────────────
|
|
1462
|
+
const model = _ctx.model;
|
|
1463
|
+
if (model) {
|
|
1464
|
+
const api = lower(model.api);
|
|
1465
|
+
if (api === "openai-codex-responses" || api === "openai-responses") {
|
|
1466
|
+
return {};
|
|
1467
|
+
}
|
|
1468
|
+
}
|
|
1469
|
+
|
|
1436
1470
|
// Step 1: strip per-turn churn from <session-overview>.
|
|
1437
1471
|
// Removing RECENT COMMITS, Working directory status, and
|
|
1438
1472
|
// Journal line count makes more of the session-overview stable
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-cache-optimizer",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"description": "Pi extension that improves provider-side KV/prompt cache hit rates (DeepSeek, OpenAI, Claude, Gemini) by reordering the system prompt, requesting long retention, and showing footer cache stats. Renamed from pi-deepseek-cache-optimizer.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|