pi-reasoning-zip 0.2.1 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -1
- package/README.md +5 -2
- package/dist/compactorClient.js +29 -17
- package/dist/index.js +6 -5
- package/dist/messageTransform.js +5 -1
- package/package.json +1 -1
- package/scripts/smoke-extension.mjs +1 -1
- package/src/compactorClient.ts +37 -17
- package/src/index.ts +43 -7
- package/src/messageTransform.ts +4 -1
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.5] - 2026-07-03
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- Skip Pi `thinkingSignature` and redacted thinking blocks instead of rewriting opaque provider metadata.
|
|
15
|
+
- Enforce `thresholds.targetRatio` when accepting compacted thinking output.
|
|
16
|
+
|
|
17
|
+
## [0.2.4] - 2026-07-03
|
|
18
|
+
|
|
19
|
+
### Fixed
|
|
20
|
+
|
|
21
|
+
- Prompt injection now targets real Pi `before_provider_request` events using `ctx.model.provider`.
|
|
22
|
+
|
|
23
|
+
## [0.2.3] - 2026-07-03
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
|
|
27
|
+
- Added strict-endpoint fallback retry for compactor requests that reject `chat_template_kwargs`.
|
|
28
|
+
|
|
29
|
+
## [0.2.2] - 2026-07-03
|
|
30
|
+
|
|
31
|
+
### Changed
|
|
32
|
+
|
|
33
|
+
- Replaced broad extension hook `any` casts with narrow local structural hook types.
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
|
|
37
|
+
- Added real compactor timeout abort test.
|
|
38
|
+
|
|
39
|
+
### Fixed
|
|
40
|
+
|
|
41
|
+
- Disabled compactor-side Qwen/llama.cpp thinking via `chat_template_kwargs.enable_thinking=false` so compact traces are returned in `message.content`.
|
|
42
|
+
|
|
10
43
|
## [0.2.1] - 2026-07-02
|
|
11
44
|
|
|
12
45
|
### Changed
|
|
@@ -45,7 +78,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
45
78
|
- Added package motto to README.
|
|
46
79
|
- Added npm release metadata: description, keywords, repository links, exports, and Node engine.
|
|
47
80
|
|
|
48
|
-
[Unreleased]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.
|
|
81
|
+
[Unreleased]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.5...HEAD
|
|
82
|
+
[0.2.5]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.4...v0.2.5
|
|
83
|
+
[0.2.4]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.3...v0.2.4
|
|
84
|
+
[0.2.3]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.2...v0.2.3
|
|
85
|
+
[0.2.2]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.1...v0.2.2
|
|
49
86
|
[0.2.1]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.0...v0.2.1
|
|
50
87
|
[0.2.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.1.0...v0.2.0
|
|
51
88
|
[0.1.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/releases/tag/v0.1.0
|
package/README.md
CHANGED
|
@@ -60,7 +60,7 @@ The npm library entrypoint still builds to `dist/index.js`, but Pi package metad
|
|
|
60
60
|
- **llama.cpp-first targeting** — defaults to llama.cpp-like providers such as `llama-server=http://127.0.0.1:7484`.
|
|
61
61
|
- **Prompt minimization** — optional grug-style request injection for target local providers.
|
|
62
62
|
- **Fail-open safety** — preserves original messages on errors, timeouts, invalid output, or unknown payloads.
|
|
63
|
-
- **Opaque reasoning guard** — skips signed
|
|
63
|
+
- **Opaque reasoning guard** — skips signed, encrypted, signature-bearing, redacted, or provider-opaque reasoning metadata.
|
|
64
64
|
|
|
65
65
|
## Commands
|
|
66
66
|
|
|
@@ -125,6 +125,8 @@ The compactor must expose an OpenAI-compatible chat completions endpoint:
|
|
|
125
125
|
POST {baseUrl}/chat/completions
|
|
126
126
|
```
|
|
127
127
|
|
|
128
|
+
The extension first sends `chat_template_kwargs: { "enable_thinking": false }` so llama.cpp/Qwen-style compactor calls return the compact trace in `message.content` instead of spending tokens on compactor-side reasoning. If a stricter OpenAI-compatible endpoint rejects that extra field with HTTP 400/422, the request is retried once without it.
|
|
129
|
+
|
|
128
130
|
The extension asks the compactor to produce terse output like:
|
|
129
131
|
|
|
130
132
|
```text
|
|
@@ -140,7 +142,7 @@ next:
|
|
|
140
142
|
- ...
|
|
141
143
|
```
|
|
142
144
|
|
|
143
|
-
If the compactor returns `none`, empty output, output longer than the original, or output over `thresholds.maxTraceChars`, the original block is preserved.
|
|
145
|
+
If the compactor returns `none`, empty output, output above `thresholds.targetRatio` of the original length, output longer than the original, or output over `thresholds.maxTraceChars`, the original block is preserved.
|
|
144
146
|
|
|
145
147
|
## Safety model
|
|
146
148
|
|
|
@@ -157,6 +159,7 @@ It skips:
|
|
|
157
159
|
- non-assistant messages
|
|
158
160
|
- messages without array content
|
|
159
161
|
- short thinking below `thresholds.minChars`
|
|
162
|
+
- signature-bearing or redacted thinking blocks
|
|
160
163
|
- unknown providers by default in `llama-only`
|
|
161
164
|
- hosted/non-local providers in `local-only`
|
|
162
165
|
|
package/dist/compactorClient.js
CHANGED
|
@@ -1,25 +1,37 @@
|
|
|
1
1
|
import { buildCompactionPrompt } from "./compactPrompt.js";
|
|
2
|
+
function buildPayload(thinking, settings, disableThinking) {
|
|
3
|
+
return {
|
|
4
|
+
model: settings.compactor.model,
|
|
5
|
+
messages: [
|
|
6
|
+
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
7
|
+
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
8
|
+
],
|
|
9
|
+
max_tokens: settings.compactor.maxTokens,
|
|
10
|
+
temperature: settings.compactor.temperature,
|
|
11
|
+
...(disableThinking ? { chat_template_kwargs: { enable_thinking: false } } : {}),
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
async function postCompactionRequest(thinking, settings, signal, disableThinking) {
|
|
15
|
+
return await fetch(`${settings.compactor.baseUrl}/chat/completions`, {
|
|
16
|
+
method: "POST",
|
|
17
|
+
headers: {
|
|
18
|
+
"content-type": "application/json",
|
|
19
|
+
authorization: `Bearer ${settings.compactor.apiKey}`,
|
|
20
|
+
},
|
|
21
|
+
body: JSON.stringify(buildPayload(thinking, settings, disableThinking)),
|
|
22
|
+
signal,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
2
25
|
export async function compactWithOpenAI(thinking, settings) {
|
|
3
26
|
const controller = new AbortController();
|
|
4
27
|
const timeout = setTimeout(() => controller.abort(), settings.compactor.timeoutMs);
|
|
5
28
|
try {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
body: JSON.stringify({
|
|
13
|
-
model: settings.compactor.model,
|
|
14
|
-
messages: [
|
|
15
|
-
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
16
|
-
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
17
|
-
],
|
|
18
|
-
max_tokens: settings.compactor.maxTokens,
|
|
19
|
-
temperature: settings.compactor.temperature,
|
|
20
|
-
}),
|
|
21
|
-
signal: controller.signal,
|
|
22
|
-
});
|
|
29
|
+
let response = await postCompactionRequest(thinking, settings, controller.signal, true);
|
|
30
|
+
// Some strict OpenAI-compatible endpoints reject llama.cpp/Qwen-specific
|
|
31
|
+
// chat_template_kwargs. Retry once without it for compatibility.
|
|
32
|
+
if (response.status === 400 || response.status === 422) {
|
|
33
|
+
response = await postCompactionRequest(thinking, settings, controller.signal, false);
|
|
34
|
+
}
|
|
23
35
|
if (!response.ok)
|
|
24
36
|
throw new Error(`Compactor HTTP ${response.status}`);
|
|
25
37
|
const json = (await response.json());
|
package/dist/index.js
CHANGED
|
@@ -25,12 +25,13 @@ function readSettingsSection(path) {
|
|
|
25
25
|
function readRawSettings(cwd) {
|
|
26
26
|
return readSettingsSection(projectSettingsPath(cwd)) ?? readSettingsSection(globalSettingsPath());
|
|
27
27
|
}
|
|
28
|
-
function eventProvider(event) {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
return message.provider;
|
|
28
|
+
function eventProvider(event, ctx) {
|
|
29
|
+
if (typeof event.message?.provider === "string")
|
|
30
|
+
return event.message.provider;
|
|
32
31
|
if (typeof event.provider === "string")
|
|
33
32
|
return event.provider;
|
|
33
|
+
if (typeof ctx.model?.provider === "string")
|
|
34
|
+
return ctx.model.provider;
|
|
34
35
|
return undefined;
|
|
35
36
|
}
|
|
36
37
|
export default function reasoningZipExtension(pi) {
|
|
@@ -44,7 +45,7 @@ export default function reasoningZipExtension(pi) {
|
|
|
44
45
|
});
|
|
45
46
|
extension.on("before_provider_request", (event, ctx) => {
|
|
46
47
|
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
47
|
-
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event), settings);
|
|
48
|
+
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event, ctx), settings);
|
|
48
49
|
return nextPayload === event.payload ? undefined : nextPayload;
|
|
49
50
|
});
|
|
50
51
|
}
|
package/dist/messageTransform.js
CHANGED
|
@@ -5,8 +5,10 @@ function isThinkingBlock(block) {
|
|
|
5
5
|
function hasOpaqueReasoningMetadata(block) {
|
|
6
6
|
return (typeof block.signature === "string" ||
|
|
7
7
|
typeof block.reasoning_signature === "string" ||
|
|
8
|
+
typeof block.thinkingSignature === "string" ||
|
|
8
9
|
typeof block.encrypted_content === "string" ||
|
|
9
|
-
Array.isArray(block.reasoning_details)
|
|
10
|
+
Array.isArray(block.reasoning_details) ||
|
|
11
|
+
block.redacted === true);
|
|
10
12
|
}
|
|
11
13
|
function acceptableCompaction(original, compacted, settings) {
|
|
12
14
|
const text = compacted.trim();
|
|
@@ -14,6 +16,8 @@ function acceptableCompaction(original, compacted, settings) {
|
|
|
14
16
|
return undefined;
|
|
15
17
|
if (text.length >= original.length)
|
|
16
18
|
return undefined;
|
|
19
|
+
if (text.length / original.length > settings.thresholds.targetRatio)
|
|
20
|
+
return undefined;
|
|
17
21
|
if (text.length > settings.thresholds.maxTraceChars)
|
|
18
22
|
return undefined;
|
|
19
23
|
return text;
|
package/package.json
CHANGED
|
@@ -15,7 +15,7 @@ try {
|
|
|
15
15
|
JSON.stringify({
|
|
16
16
|
reasoningZip: {
|
|
17
17
|
mode: "llama-only",
|
|
18
|
-
thresholds: { minChars: 5, maxTraceChars: 100 },
|
|
18
|
+
thresholds: { minChars: 5, targetRatio: 1, maxTraceChars: 100 },
|
|
19
19
|
compactor: { baseUrl: "http://mock.local/v1", model: "mock", timeoutMs: 1000 },
|
|
20
20
|
},
|
|
21
21
|
}),
|
package/src/compactorClient.ts
CHANGED
|
@@ -1,27 +1,47 @@
|
|
|
1
1
|
import { buildCompactionPrompt } from "./compactPrompt.js";
|
|
2
2
|
import type { ReasoningZipSettings } from "./types.js";
|
|
3
3
|
|
|
4
|
+
function buildPayload(thinking: string, settings: ReasoningZipSettings, disableThinking: boolean): Record<string, unknown> {
|
|
5
|
+
return {
|
|
6
|
+
model: settings.compactor.model,
|
|
7
|
+
messages: [
|
|
8
|
+
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
9
|
+
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
10
|
+
],
|
|
11
|
+
max_tokens: settings.compactor.maxTokens,
|
|
12
|
+
temperature: settings.compactor.temperature,
|
|
13
|
+
...(disableThinking ? { chat_template_kwargs: { enable_thinking: false } } : {}),
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async function postCompactionRequest(
|
|
18
|
+
thinking: string,
|
|
19
|
+
settings: ReasoningZipSettings,
|
|
20
|
+
signal: AbortSignal,
|
|
21
|
+
disableThinking: boolean,
|
|
22
|
+
): Promise<Response> {
|
|
23
|
+
return await fetch(`${settings.compactor.baseUrl}/chat/completions`, {
|
|
24
|
+
method: "POST",
|
|
25
|
+
headers: {
|
|
26
|
+
"content-type": "application/json",
|
|
27
|
+
authorization: `Bearer ${settings.compactor.apiKey}`,
|
|
28
|
+
},
|
|
29
|
+
body: JSON.stringify(buildPayload(thinking, settings, disableThinking)),
|
|
30
|
+
signal,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
4
34
|
export async function compactWithOpenAI(thinking: string, settings: ReasoningZipSettings): Promise<string> {
|
|
5
35
|
const controller = new AbortController();
|
|
6
36
|
const timeout = setTimeout(() => controller.abort(), settings.compactor.timeoutMs);
|
|
7
37
|
try {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
model: settings.compactor.model,
|
|
16
|
-
messages: [
|
|
17
|
-
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
18
|
-
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
19
|
-
],
|
|
20
|
-
max_tokens: settings.compactor.maxTokens,
|
|
21
|
-
temperature: settings.compactor.temperature,
|
|
22
|
-
}),
|
|
23
|
-
signal: controller.signal,
|
|
24
|
-
});
|
|
38
|
+
let response = await postCompactionRequest(thinking, settings, controller.signal, true);
|
|
39
|
+
|
|
40
|
+
// Some strict OpenAI-compatible endpoints reject llama.cpp/Qwen-specific
|
|
41
|
+
// chat_template_kwargs. Retry once without it for compatibility.
|
|
42
|
+
if (response.status === 400 || response.status === 422) {
|
|
43
|
+
response = await postCompactionRequest(thinking, settings, controller.signal, false);
|
|
44
|
+
}
|
|
25
45
|
|
|
26
46
|
if (!response.ok) throw new Error(`Compactor HTTP ${response.status}`);
|
|
27
47
|
const json = (await response.json()) as { choices?: Array<{ message?: { content?: unknown } }> };
|
package/src/index.ts
CHANGED
|
@@ -6,6 +6,42 @@ import { compactWithOpenAI } from "./compactorClient.js";
|
|
|
6
6
|
import { compactAssistantMessage } from "./messageTransform.js";
|
|
7
7
|
import { injectReasoningZipPrompt } from "./promptInjection.js";
|
|
8
8
|
import { resolveReasoningZipSettings } from "./settings.js";
|
|
9
|
+
import type { PiMessage } from "./types.js";
|
|
10
|
+
|
|
11
|
+
// Minimal structural types for the two Pi hooks we consume.
|
|
12
|
+
// Avoids `pi as any` while staying independent of upstream type changes.
|
|
13
|
+
|
|
14
|
+
interface HookContext {
|
|
15
|
+
cwd?: string;
|
|
16
|
+
model?: { provider?: string };
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface MessageEndEvent {
|
|
20
|
+
message: PiMessage;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
interface BeforeProviderRequestEvent {
|
|
24
|
+
payload: unknown;
|
|
25
|
+
message?: { provider?: string };
|
|
26
|
+
provider?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface ReasoningZipExtension {
|
|
30
|
+
on(
|
|
31
|
+
event: "message_end",
|
|
32
|
+
handler: (
|
|
33
|
+
event: MessageEndEvent,
|
|
34
|
+
ctx: HookContext,
|
|
35
|
+
) => Promise<{ message: PiMessage } | undefined>,
|
|
36
|
+
): void;
|
|
37
|
+
on(
|
|
38
|
+
event: "before_provider_request",
|
|
39
|
+
handler: (
|
|
40
|
+
event: BeforeProviderRequestEvent,
|
|
41
|
+
ctx: HookContext,
|
|
42
|
+
) => unknown | undefined,
|
|
43
|
+
): void;
|
|
44
|
+
}
|
|
9
45
|
|
|
10
46
|
function globalSettingsPath(): string {
|
|
11
47
|
return join(process.env.PI_CODING_AGENT_DIR ?? join(homedir(), ".pi", "agent"), "settings.json");
|
|
@@ -29,26 +65,26 @@ function readRawSettings(cwd: string | undefined): unknown {
|
|
|
29
65
|
return readSettingsSection(projectSettingsPath(cwd)) ?? readSettingsSection(globalSettingsPath());
|
|
30
66
|
}
|
|
31
67
|
|
|
32
|
-
function eventProvider(event:
|
|
33
|
-
|
|
34
|
-
if (typeof message?.provider === "string") return message.provider;
|
|
68
|
+
function eventProvider(event: BeforeProviderRequestEvent, ctx: HookContext): string | undefined {
|
|
69
|
+
if (typeof event.message?.provider === "string") return event.message.provider;
|
|
35
70
|
if (typeof event.provider === "string") return event.provider;
|
|
71
|
+
if (typeof ctx.model?.provider === "string") return ctx.model.provider;
|
|
36
72
|
return undefined;
|
|
37
73
|
}
|
|
38
74
|
|
|
39
75
|
export default function reasoningZipExtension(pi: ExtensionAPI) {
|
|
40
|
-
const extension = pi as
|
|
76
|
+
const extension = pi as unknown as ReasoningZipExtension;
|
|
41
77
|
|
|
42
|
-
extension.on("message_end", async (event
|
|
78
|
+
extension.on("message_end", async (event, ctx) => {
|
|
43
79
|
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
44
80
|
const result = await compactAssistantMessage(event.message, settings, (thinking) => compactWithOpenAI(thinking, settings));
|
|
45
81
|
if (result.changed) return { message: result.message };
|
|
46
82
|
return undefined;
|
|
47
83
|
});
|
|
48
84
|
|
|
49
|
-
extension.on("before_provider_request", (event
|
|
85
|
+
extension.on("before_provider_request", (event, ctx) => {
|
|
50
86
|
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
51
|
-
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event), settings);
|
|
87
|
+
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event, ctx), settings);
|
|
52
88
|
return nextPayload === event.payload ? undefined : nextPayload;
|
|
53
89
|
});
|
|
54
90
|
}
|
package/src/messageTransform.ts
CHANGED
|
@@ -11,8 +11,10 @@ function hasOpaqueReasoningMetadata(block: PiMessageBlock): boolean {
|
|
|
11
11
|
return (
|
|
12
12
|
typeof block.signature === "string" ||
|
|
13
13
|
typeof block.reasoning_signature === "string" ||
|
|
14
|
+
typeof block.thinkingSignature === "string" ||
|
|
14
15
|
typeof block.encrypted_content === "string" ||
|
|
15
|
-
Array.isArray(block.reasoning_details)
|
|
16
|
+
Array.isArray(block.reasoning_details) ||
|
|
17
|
+
block.redacted === true
|
|
16
18
|
);
|
|
17
19
|
}
|
|
18
20
|
|
|
@@ -20,6 +22,7 @@ function acceptableCompaction(original: string, compacted: string, settings: Rea
|
|
|
20
22
|
const text = compacted.trim();
|
|
21
23
|
if (!text || text === "none") return undefined;
|
|
22
24
|
if (text.length >= original.length) return undefined;
|
|
25
|
+
if (text.length / original.length > settings.thresholds.targetRatio) return undefined;
|
|
23
26
|
if (text.length > settings.thresholds.maxTraceChars) return undefined;
|
|
24
27
|
return text;
|
|
25
28
|
}
|