pi-reasoning-zip 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -1
- package/README.md +9 -4
- package/dist/compactorClient.js +29 -17
- package/dist/index.js +6 -5
- package/extensions/index.ts +3 -0
- package/package.json +4 -2
- package/src/compactPrompt.ts +12 -0
- package/src/compactorClient.ts +54 -0
- package/src/index.ts +90 -0
- package/src/messageTransform.ts +59 -0
- package/src/promptInjection.ts +43 -0
- package/src/settings.ts +71 -0
- package/src/target.ts +36 -0
- package/src/types.ts +38 -0
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,38 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [0.2.4] - 2026-07-03
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
|
|
14
|
+
- Prompt injection now targets real Pi `before_provider_request` events using `ctx.model.provider`.
|
|
15
|
+
|
|
16
|
+
## [0.2.3] - 2026-07-03
|
|
17
|
+
|
|
18
|
+
### Added
|
|
19
|
+
|
|
20
|
+
- Added strict-endpoint fallback retry for compactor requests that reject `chat_template_kwargs`.
|
|
21
|
+
|
|
22
|
+
## [0.2.2] - 2026-07-03
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
|
|
26
|
+
- Replaced broad extension hook `any` casts with narrow local structural hook types.
|
|
27
|
+
|
|
28
|
+
### Added
|
|
29
|
+
|
|
30
|
+
- Added real compactor timeout abort test.
|
|
31
|
+
|
|
32
|
+
### Fixed
|
|
33
|
+
|
|
34
|
+
- Disabled compactor-side Qwen/llama.cpp thinking via `chat_template_kwargs.enable_thinking=false` so compact traces are returned in `message.content`.
|
|
35
|
+
|
|
36
|
+
## [0.2.1] - 2026-07-02
|
|
37
|
+
|
|
38
|
+
### Changed
|
|
39
|
+
|
|
40
|
+
- Pi package metadata now loads readable source from `./extensions` and ships `src` for transparency.
|
|
41
|
+
|
|
10
42
|
## [0.2.0] - 2026-07-02
|
|
11
43
|
|
|
12
44
|
### Added
|
|
@@ -39,6 +71,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
39
71
|
- Added package motto to README.
|
|
40
72
|
- Added npm release metadata: description, keywords, repository links, exports, and Node engine.
|
|
41
73
|
|
|
42
|
-
[Unreleased]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.
|
|
74
|
+
[Unreleased]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.4...HEAD
|
|
75
|
+
[0.2.4]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.3...v0.2.4
|
|
76
|
+
[0.2.3]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.2...v0.2.3
|
|
77
|
+
[0.2.2]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.1...v0.2.2
|
|
78
|
+
[0.2.1]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.0...v0.2.1
|
|
43
79
|
[0.2.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.1.0...v0.2.0
|
|
44
80
|
[0.1.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/releases/tag/v0.1.0
|
package/README.md
CHANGED
|
@@ -44,13 +44,14 @@ npm run check
|
|
|
44
44
|
npm run smoke
|
|
45
45
|
```
|
|
46
46
|
|
|
47
|
-
For local development you can also load the
|
|
47
|
+
For local development you can also load the readable source extension directly:
|
|
48
48
|
|
|
49
49
|
```bash
|
|
50
|
-
|
|
51
|
-
pi -e ./dist/index.js
|
|
50
|
+
pi -e ./extensions
|
|
52
51
|
```
|
|
53
52
|
|
|
53
|
+
The npm library entrypoint still builds to `dist/index.js`, but Pi package metadata points at `./extensions` so Pi can inspect the source it loads.
|
|
54
|
+
|
|
54
55
|
## Features
|
|
55
56
|
|
|
56
57
|
- **Forward-only compaction** — modifies only the new assistant message being finalized.
|
|
@@ -124,6 +125,8 @@ The compactor must expose an OpenAI-compatible chat completions endpoint:
|
|
|
124
125
|
POST {baseUrl}/chat/completions
|
|
125
126
|
```
|
|
126
127
|
|
|
128
|
+
The extension first sends `chat_template_kwargs: { "enable_thinking": false }` so llama.cpp/Qwen-style compactor calls return the compact trace in `message.content` instead of spending tokens on compactor-side reasoning. If a stricter OpenAI-compatible endpoint rejects that extra field with HTTP 400/422, the request is retried once without it.
|
|
129
|
+
|
|
127
130
|
The extension asks the compactor to produce terse output like:
|
|
128
131
|
|
|
129
132
|
```text
|
|
@@ -188,6 +191,7 @@ npm test
|
|
|
188
191
|
npm run build
|
|
189
192
|
npm run check
|
|
190
193
|
npm run smoke
|
|
194
|
+
pi -e ./extensions --no-extensions --offline --list-models
|
|
191
195
|
npm pack --dry-run
|
|
192
196
|
```
|
|
193
197
|
|
|
@@ -220,11 +224,12 @@ npm pack --dry-run
|
|
|
220
224
|
[0.1.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/releases/tag/v0.1.0
|
|
221
225
|
```
|
|
222
226
|
|
|
223
|
-
4. Verify build, smoke test, package contents, and npm publish metadata.
|
|
227
|
+
4. Verify build, source-extension load, smoke test, package contents, and npm publish metadata.
|
|
224
228
|
|
|
225
229
|
```bash
|
|
226
230
|
npm run check
|
|
227
231
|
npm run smoke
|
|
232
|
+
pi -e ./extensions --no-extensions --offline --list-models
|
|
228
233
|
npm pack --dry-run
|
|
229
234
|
npm publish --dry-run
|
|
230
235
|
```
|
package/dist/compactorClient.js
CHANGED
|
@@ -1,25 +1,37 @@
|
|
|
1
1
|
import { buildCompactionPrompt } from "./compactPrompt.js";
|
|
2
|
+
function buildPayload(thinking, settings, disableThinking) {
|
|
3
|
+
return {
|
|
4
|
+
model: settings.compactor.model,
|
|
5
|
+
messages: [
|
|
6
|
+
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
7
|
+
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
8
|
+
],
|
|
9
|
+
max_tokens: settings.compactor.maxTokens,
|
|
10
|
+
temperature: settings.compactor.temperature,
|
|
11
|
+
...(disableThinking ? { chat_template_kwargs: { enable_thinking: false } } : {}),
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
async function postCompactionRequest(thinking, settings, signal, disableThinking) {
|
|
15
|
+
return await fetch(`${settings.compactor.baseUrl}/chat/completions`, {
|
|
16
|
+
method: "POST",
|
|
17
|
+
headers: {
|
|
18
|
+
"content-type": "application/json",
|
|
19
|
+
authorization: `Bearer ${settings.compactor.apiKey}`,
|
|
20
|
+
},
|
|
21
|
+
body: JSON.stringify(buildPayload(thinking, settings, disableThinking)),
|
|
22
|
+
signal,
|
|
23
|
+
});
|
|
24
|
+
}
|
|
2
25
|
export async function compactWithOpenAI(thinking, settings) {
|
|
3
26
|
const controller = new AbortController();
|
|
4
27
|
const timeout = setTimeout(() => controller.abort(), settings.compactor.timeoutMs);
|
|
5
28
|
try {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
body: JSON.stringify({
|
|
13
|
-
model: settings.compactor.model,
|
|
14
|
-
messages: [
|
|
15
|
-
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
16
|
-
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
17
|
-
],
|
|
18
|
-
max_tokens: settings.compactor.maxTokens,
|
|
19
|
-
temperature: settings.compactor.temperature,
|
|
20
|
-
}),
|
|
21
|
-
signal: controller.signal,
|
|
22
|
-
});
|
|
29
|
+
let response = await postCompactionRequest(thinking, settings, controller.signal, true);
|
|
30
|
+
// Some strict OpenAI-compatible endpoints reject llama.cpp/Qwen-specific
|
|
31
|
+
// chat_template_kwargs. Retry once without it for compatibility.
|
|
32
|
+
if (response.status === 400 || response.status === 422) {
|
|
33
|
+
response = await postCompactionRequest(thinking, settings, controller.signal, false);
|
|
34
|
+
}
|
|
23
35
|
if (!response.ok)
|
|
24
36
|
throw new Error(`Compactor HTTP ${response.status}`);
|
|
25
37
|
const json = (await response.json());
|
package/dist/index.js
CHANGED
|
@@ -25,12 +25,13 @@ function readSettingsSection(path) {
|
|
|
25
25
|
function readRawSettings(cwd) {
|
|
26
26
|
return readSettingsSection(projectSettingsPath(cwd)) ?? readSettingsSection(globalSettingsPath());
|
|
27
27
|
}
|
|
28
|
-
function eventProvider(event) {
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
return message.provider;
|
|
28
|
+
function eventProvider(event, ctx) {
|
|
29
|
+
if (typeof event.message?.provider === "string")
|
|
30
|
+
return event.message.provider;
|
|
32
31
|
if (typeof event.provider === "string")
|
|
33
32
|
return event.provider;
|
|
33
|
+
if (typeof ctx.model?.provider === "string")
|
|
34
|
+
return ctx.model.provider;
|
|
34
35
|
return undefined;
|
|
35
36
|
}
|
|
36
37
|
export default function reasoningZipExtension(pi) {
|
|
@@ -44,7 +45,7 @@ export default function reasoningZipExtension(pi) {
|
|
|
44
45
|
});
|
|
45
46
|
extension.on("before_provider_request", (event, ctx) => {
|
|
46
47
|
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
47
|
-
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event), settings);
|
|
48
|
+
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event, ctx), settings);
|
|
48
49
|
return nextPayload === event.payload ? undefined : nextPayload;
|
|
49
50
|
});
|
|
50
51
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pi-reasoning-zip",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Compact reasoning blocks to keep the context short.",
|
|
6
6
|
"license": "MIT",
|
|
@@ -10,6 +10,8 @@
|
|
|
10
10
|
"types": "dist/index.d.ts",
|
|
11
11
|
"files": [
|
|
12
12
|
"dist",
|
|
13
|
+
"src",
|
|
14
|
+
"extensions",
|
|
13
15
|
"scripts/smoke-extension.mjs",
|
|
14
16
|
"README.md",
|
|
15
17
|
"CHANGELOG.md",
|
|
@@ -57,7 +59,7 @@
|
|
|
57
59
|
},
|
|
58
60
|
"pi": {
|
|
59
61
|
"extensions": [
|
|
60
|
-
"
|
|
62
|
+
"./extensions"
|
|
61
63
|
],
|
|
62
64
|
"image": "https://raw.githubusercontent.com/Ryu-CZ/pi-reasoning-zip/main/media/banner.webp"
|
|
63
65
|
},
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export function buildCompactionPrompt(thinking: string): string {
|
|
2
|
+
return `Compress this model reasoning into a compact decision trace for future coding-agent context.
|
|
3
|
+
|
|
4
|
+
Keep exact paths, commands, symbols, errors, decisions, constraints, failed attempts, and next actions.
|
|
5
|
+
Drop self-talk, repeated planning, obvious reasoning, filler, and prose.
|
|
6
|
+
Use terse bullets under: facts, decisions, constraints, failed, next.
|
|
7
|
+
Target 10-20% of original length.
|
|
8
|
+
If no useful content remains, output exactly: none
|
|
9
|
+
|
|
10
|
+
Reasoning:
|
|
11
|
+
${thinking}`;
|
|
12
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import { buildCompactionPrompt } from "./compactPrompt.js";
|
|
2
|
+
import type { ReasoningZipSettings } from "./types.js";
|
|
3
|
+
|
|
4
|
+
function buildPayload(thinking: string, settings: ReasoningZipSettings, disableThinking: boolean): Record<string, unknown> {
|
|
5
|
+
return {
|
|
6
|
+
model: settings.compactor.model,
|
|
7
|
+
messages: [
|
|
8
|
+
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
9
|
+
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
10
|
+
],
|
|
11
|
+
max_tokens: settings.compactor.maxTokens,
|
|
12
|
+
temperature: settings.compactor.temperature,
|
|
13
|
+
...(disableThinking ? { chat_template_kwargs: { enable_thinking: false } } : {}),
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async function postCompactionRequest(
|
|
18
|
+
thinking: string,
|
|
19
|
+
settings: ReasoningZipSettings,
|
|
20
|
+
signal: AbortSignal,
|
|
21
|
+
disableThinking: boolean,
|
|
22
|
+
): Promise<Response> {
|
|
23
|
+
return await fetch(`${settings.compactor.baseUrl}/chat/completions`, {
|
|
24
|
+
method: "POST",
|
|
25
|
+
headers: {
|
|
26
|
+
"content-type": "application/json",
|
|
27
|
+
authorization: `Bearer ${settings.compactor.apiKey}`,
|
|
28
|
+
},
|
|
29
|
+
body: JSON.stringify(buildPayload(thinking, settings, disableThinking)),
|
|
30
|
+
signal,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export async function compactWithOpenAI(thinking: string, settings: ReasoningZipSettings): Promise<string> {
|
|
35
|
+
const controller = new AbortController();
|
|
36
|
+
const timeout = setTimeout(() => controller.abort(), settings.compactor.timeoutMs);
|
|
37
|
+
try {
|
|
38
|
+
let response = await postCompactionRequest(thinking, settings, controller.signal, true);
|
|
39
|
+
|
|
40
|
+
// Some strict OpenAI-compatible endpoints reject llama.cpp/Qwen-specific
|
|
41
|
+
// chat_template_kwargs. Retry once without it for compatibility.
|
|
42
|
+
if (response.status === 400 || response.status === 422) {
|
|
43
|
+
response = await postCompactionRequest(thinking, settings, controller.signal, false);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
if (!response.ok) throw new Error(`Compactor HTTP ${response.status}`);
|
|
47
|
+
const json = (await response.json()) as { choices?: Array<{ message?: { content?: unknown } }> };
|
|
48
|
+
const content = json.choices?.[0]?.message?.content;
|
|
49
|
+
if (typeof content !== "string") throw new Error("Compactor response missing message content");
|
|
50
|
+
return content.trim();
|
|
51
|
+
} finally {
|
|
52
|
+
clearTimeout(timeout);
|
|
53
|
+
}
|
|
54
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
import { homedir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { compactWithOpenAI } from "./compactorClient.js";
|
|
6
|
+
import { compactAssistantMessage } from "./messageTransform.js";
|
|
7
|
+
import { injectReasoningZipPrompt } from "./promptInjection.js";
|
|
8
|
+
import { resolveReasoningZipSettings } from "./settings.js";
|
|
9
|
+
import type { PiMessage } from "./types.js";
|
|
10
|
+
|
|
11
|
+
// Minimal structural types for the two Pi hooks we consume.
|
|
12
|
+
// Avoids `pi as any` while staying independent of upstream type changes.
|
|
13
|
+
|
|
14
|
+
interface HookContext {
|
|
15
|
+
cwd?: string;
|
|
16
|
+
model?: { provider?: string };
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
interface MessageEndEvent {
|
|
20
|
+
message: PiMessage;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
interface BeforeProviderRequestEvent {
|
|
24
|
+
payload: unknown;
|
|
25
|
+
message?: { provider?: string };
|
|
26
|
+
provider?: string;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
interface ReasoningZipExtension {
|
|
30
|
+
on(
|
|
31
|
+
event: "message_end",
|
|
32
|
+
handler: (
|
|
33
|
+
event: MessageEndEvent,
|
|
34
|
+
ctx: HookContext,
|
|
35
|
+
) => Promise<{ message: PiMessage } | undefined>,
|
|
36
|
+
): void;
|
|
37
|
+
on(
|
|
38
|
+
event: "before_provider_request",
|
|
39
|
+
handler: (
|
|
40
|
+
event: BeforeProviderRequestEvent,
|
|
41
|
+
ctx: HookContext,
|
|
42
|
+
) => unknown | undefined,
|
|
43
|
+
): void;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function globalSettingsPath(): string {
|
|
47
|
+
return join(process.env.PI_CODING_AGENT_DIR ?? join(homedir(), ".pi", "agent"), "settings.json");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function projectSettingsPath(cwd: string | undefined): string | undefined {
|
|
51
|
+
return cwd ? join(cwd, ".pi", "settings.json") : undefined;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function readSettingsSection(path: string | undefined): unknown {
|
|
55
|
+
if (!path) return undefined;
|
|
56
|
+
try {
|
|
57
|
+
const parsed = JSON.parse(readFileSync(path, "utf8")) as { reasoningZip?: unknown };
|
|
58
|
+
return parsed.reasoningZip;
|
|
59
|
+
} catch {
|
|
60
|
+
return undefined;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function readRawSettings(cwd: string | undefined): unknown {
|
|
65
|
+
return readSettingsSection(projectSettingsPath(cwd)) ?? readSettingsSection(globalSettingsPath());
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function eventProvider(event: BeforeProviderRequestEvent, ctx: HookContext): string | undefined {
|
|
69
|
+
if (typeof event.message?.provider === "string") return event.message.provider;
|
|
70
|
+
if (typeof event.provider === "string") return event.provider;
|
|
71
|
+
if (typeof ctx.model?.provider === "string") return ctx.model.provider;
|
|
72
|
+
return undefined;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export default function reasoningZipExtension(pi: ExtensionAPI) {
|
|
76
|
+
const extension = pi as unknown as ReasoningZipExtension;
|
|
77
|
+
|
|
78
|
+
extension.on("message_end", async (event, ctx) => {
|
|
79
|
+
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
80
|
+
const result = await compactAssistantMessage(event.message, settings, (thinking) => compactWithOpenAI(thinking, settings));
|
|
81
|
+
if (result.changed) return { message: result.message };
|
|
82
|
+
return undefined;
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
extension.on("before_provider_request", (event, ctx) => {
|
|
86
|
+
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
87
|
+
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event, ctx), settings);
|
|
88
|
+
return nextPayload === event.payload ? undefined : nextPayload;
|
|
89
|
+
});
|
|
90
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import type { PiMessage, PiMessageBlock, ReasoningZipSettings } from "./types.js";
|
|
2
|
+
import { shouldHandleMessage } from "./target.js";
|
|
3
|
+
|
|
4
|
+
export type CompactText = (thinking: string) => Promise<string>;
|
|
5
|
+
|
|
6
|
+
function isThinkingBlock(block: PiMessageBlock): block is PiMessageBlock & { thinking: string } {
|
|
7
|
+
return block.type === "thinking" && typeof block.thinking === "string";
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function hasOpaqueReasoningMetadata(block: PiMessageBlock): boolean {
|
|
11
|
+
return (
|
|
12
|
+
typeof block.signature === "string" ||
|
|
13
|
+
typeof block.reasoning_signature === "string" ||
|
|
14
|
+
typeof block.encrypted_content === "string" ||
|
|
15
|
+
Array.isArray(block.reasoning_details)
|
|
16
|
+
);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function acceptableCompaction(original: string, compacted: string, settings: ReasoningZipSettings): string | undefined {
|
|
20
|
+
const text = compacted.trim();
|
|
21
|
+
if (!text || text === "none") return undefined;
|
|
22
|
+
if (text.length >= original.length) return undefined;
|
|
23
|
+
if (text.length > settings.thresholds.maxTraceChars) return undefined;
|
|
24
|
+
return text;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function compactAssistantMessage(
|
|
28
|
+
message: PiMessage,
|
|
29
|
+
settings: ReasoningZipSettings,
|
|
30
|
+
compactText: CompactText,
|
|
31
|
+
): Promise<{ message: PiMessage; changed: boolean }> {
|
|
32
|
+
if (!shouldHandleMessage(message, settings)) return { message, changed: false };
|
|
33
|
+
if (!Array.isArray(message.content)) return { message, changed: false };
|
|
34
|
+
|
|
35
|
+
let changed = false;
|
|
36
|
+
const nextContent: PiMessageBlock[] = [];
|
|
37
|
+
|
|
38
|
+
for (const block of message.content) {
|
|
39
|
+
if (!isThinkingBlock(block) || hasOpaqueReasoningMetadata(block) || block.thinking.length < settings.thresholds.minChars) {
|
|
40
|
+
nextContent.push(block);
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
const compacted = acceptableCompaction(block.thinking, await compactText(block.thinking), settings);
|
|
46
|
+
if (!compacted) {
|
|
47
|
+
nextContent.push(block);
|
|
48
|
+
continue;
|
|
49
|
+
}
|
|
50
|
+
nextContent.push({ ...block, thinking: compacted });
|
|
51
|
+
changed = true;
|
|
52
|
+
} catch {
|
|
53
|
+
nextContent.push(block);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if (!changed) return { message, changed: false };
|
|
58
|
+
return { message: { ...message, content: nextContent }, changed: true };
|
|
59
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { shouldTargetProvider } from "./target.js";
|
|
2
|
+
import type { ReasoningZipSettings } from "./types.js";
|
|
3
|
+
|
|
4
|
+
export const PROMPT_MARKER = "<!-- pi-reasoning-zip -->";
|
|
5
|
+
export const PROMPT_INJECTION = `${PROMPT_MARKER}\nYou are Grug. Save token, save world.\nVisible reasoning: terse, keyword-heavy trace only. Keep facts, decisions, constraints, failed paths, next action. No prose reasoning, no self-talk.\nFinal answer: no conversational fluff, no repeated question, minimal markdown. If code is enough, give only code. Think hard, output few tokens.`;
|
|
6
|
+
|
|
7
|
+
type ChatMessage = { role?: unknown; content?: unknown; [key: string]: unknown };
|
|
8
|
+
|
|
9
|
+
type Payload = { messages?: unknown; [key: string]: unknown };
|
|
10
|
+
|
|
11
|
+
function appendToContent(content: unknown): unknown {
|
|
12
|
+
if (typeof content === "string") {
|
|
13
|
+
if (content.includes(PROMPT_MARKER)) return content;
|
|
14
|
+
return `${content}\n\n${PROMPT_INJECTION}`;
|
|
15
|
+
}
|
|
16
|
+
if (Array.isArray(content)) {
|
|
17
|
+
if (content.some((part) => typeof part === "object" && part && "text" in part && typeof part.text === "string" && part.text.includes(PROMPT_MARKER))) {
|
|
18
|
+
return content;
|
|
19
|
+
}
|
|
20
|
+
return [...content, { type: "text", text: PROMPT_INJECTION }];
|
|
21
|
+
}
|
|
22
|
+
return content;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export function injectReasoningZipPrompt(payload: unknown, provider: string | undefined, settings: ReasoningZipSettings): unknown {
|
|
26
|
+
if (!settings.injectPrompt || !shouldTargetProvider(provider, settings)) return payload;
|
|
27
|
+
if (!payload || typeof payload !== "object") return payload;
|
|
28
|
+
const typed = payload as Payload;
|
|
29
|
+
if (!Array.isArray(typed.messages)) return payload;
|
|
30
|
+
|
|
31
|
+
const messages = typed.messages as ChatMessage[];
|
|
32
|
+
const existing = JSON.stringify(messages).includes(PROMPT_MARKER);
|
|
33
|
+
if (existing) return payload;
|
|
34
|
+
|
|
35
|
+
const index = messages.findIndex((message) => message.role === "system" || message.role === "developer");
|
|
36
|
+
if (index >= 0) {
|
|
37
|
+
const nextMessages = messages.slice();
|
|
38
|
+
nextMessages[index] = { ...messages[index], content: appendToContent(messages[index].content) };
|
|
39
|
+
return { ...typed, messages: nextMessages };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return { ...typed, messages: [{ role: "system", content: PROMPT_INJECTION }, ...messages] };
|
|
43
|
+
}
|
package/src/settings.ts
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { ReasoningZipMode, ReasoningZipSettings, ReasoningZipStorageMode } from "./types.js";
|
|
2
|
+
|
|
3
|
+
export const DEFAULT_SETTINGS: ReasoningZipSettings = {
|
|
4
|
+
enabled: true,
|
|
5
|
+
mode: "llama-only",
|
|
6
|
+
storageMode: "compact-new",
|
|
7
|
+
injectPrompt: true,
|
|
8
|
+
compactor: {
|
|
9
|
+
baseUrl: "http://127.0.0.1:7484/v1",
|
|
10
|
+
model: "unsloth",
|
|
11
|
+
apiKey: "sk-placeholder",
|
|
12
|
+
maxTokens: 512,
|
|
13
|
+
temperature: 0.1,
|
|
14
|
+
timeoutMs: 30000,
|
|
15
|
+
},
|
|
16
|
+
thresholds: {
|
|
17
|
+
minChars: 1000,
|
|
18
|
+
targetRatio: 0.15,
|
|
19
|
+
maxTraceChars: 2000,
|
|
20
|
+
},
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
const modes = new Set<ReasoningZipMode>(["llama-only", "local-only", "all", "disabled"]);
|
|
24
|
+
const storageModes = new Set<ReasoningZipStorageMode>(["compact-new", "off"]);
|
|
25
|
+
|
|
26
|
+
function asObject(value: unknown): Record<string, unknown> {
|
|
27
|
+
return value && typeof value === "object" && !Array.isArray(value) ? (value as Record<string, unknown>) : {};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function stringValue(value: unknown, fallback: string): string {
|
|
31
|
+
return typeof value === "string" && value.length > 0 ? value : fallback;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function booleanValue(value: unknown, fallback: boolean): boolean {
|
|
35
|
+
return typeof value === "boolean" ? value : fallback;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function numberValue(value: unknown, fallback: number, min = 0): number {
|
|
39
|
+
return typeof value === "number" && Number.isFinite(value) && value >= min ? value : fallback;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export function resolveReasoningZipSettings(input: unknown): ReasoningZipSettings {
|
|
43
|
+
const root = asObject(input);
|
|
44
|
+
const compactor = asObject(root.compactor);
|
|
45
|
+
const thresholds = asObject(root.thresholds);
|
|
46
|
+
|
|
47
|
+
const mode = modes.has(root.mode as ReasoningZipMode) ? (root.mode as ReasoningZipMode) : DEFAULT_SETTINGS.mode;
|
|
48
|
+
const storageMode = storageModes.has(root.storageMode as ReasoningZipStorageMode)
|
|
49
|
+
? (root.storageMode as ReasoningZipStorageMode)
|
|
50
|
+
: DEFAULT_SETTINGS.storageMode;
|
|
51
|
+
|
|
52
|
+
return {
|
|
53
|
+
enabled: booleanValue(root.enabled, DEFAULT_SETTINGS.enabled),
|
|
54
|
+
mode,
|
|
55
|
+
storageMode,
|
|
56
|
+
injectPrompt: booleanValue(root.injectPrompt, DEFAULT_SETTINGS.injectPrompt),
|
|
57
|
+
compactor: {
|
|
58
|
+
baseUrl: stringValue(compactor.baseUrl, DEFAULT_SETTINGS.compactor.baseUrl).replace(/\/$/, ""),
|
|
59
|
+
model: stringValue(compactor.model, DEFAULT_SETTINGS.compactor.model),
|
|
60
|
+
apiKey: stringValue(compactor.apiKey, DEFAULT_SETTINGS.compactor.apiKey),
|
|
61
|
+
maxTokens: numberValue(compactor.maxTokens, DEFAULT_SETTINGS.compactor.maxTokens, 1),
|
|
62
|
+
temperature: numberValue(compactor.temperature, DEFAULT_SETTINGS.compactor.temperature, 0),
|
|
63
|
+
timeoutMs: numberValue(compactor.timeoutMs, DEFAULT_SETTINGS.compactor.timeoutMs, 1),
|
|
64
|
+
},
|
|
65
|
+
thresholds: {
|
|
66
|
+
minChars: numberValue(thresholds.minChars, DEFAULT_SETTINGS.thresholds.minChars, 0),
|
|
67
|
+
targetRatio: numberValue(thresholds.targetRatio, DEFAULT_SETTINGS.thresholds.targetRatio, 0),
|
|
68
|
+
maxTraceChars: numberValue(thresholds.maxTraceChars, DEFAULT_SETTINGS.thresholds.maxTraceChars, 1),
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
package/src/target.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { PiMessage, ReasoningZipSettings } from "./types.js";
|
|
2
|
+
|
|
3
|
+
export function isLlamaProvider(providerId: string | undefined): boolean {
|
|
4
|
+
if (!providerId) return false;
|
|
5
|
+
const id = providerId.toLowerCase();
|
|
6
|
+
return id.startsWith("llama-server=") || id.includes("llama.cpp") || id.includes("llamacpp");
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function isLocalUrl(value: string | undefined): boolean {
|
|
10
|
+
if (!value) return false;
|
|
11
|
+
try {
|
|
12
|
+
const url = new URL(value.includes("://") ? value : `http://${value}`);
|
|
13
|
+
return ["127.0.0.1", "localhost", "::1", "0.0.0.0"].includes(url.hostname);
|
|
14
|
+
} catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function shouldHandleMessage(message: PiMessage, settings: ReasoningZipSettings): boolean {
|
|
20
|
+
if (!settings.enabled || settings.mode === "disabled" || settings.storageMode !== "compact-new") return false;
|
|
21
|
+
if (message.role !== "assistant") return false;
|
|
22
|
+
|
|
23
|
+
const provider = typeof message.provider === "string" ? message.provider : undefined;
|
|
24
|
+
if (settings.mode === "all") return true;
|
|
25
|
+
if (settings.mode === "llama-only") return isLlamaProvider(provider);
|
|
26
|
+
if (settings.mode === "local-only") return isLocalUrl(provider) || isLlamaProvider(provider);
|
|
27
|
+
return false;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function shouldTargetProvider(provider: string | undefined, settings: ReasoningZipSettings): boolean {
|
|
31
|
+
if (!settings.enabled || settings.mode === "disabled") return false;
|
|
32
|
+
if (settings.mode === "all") return true;
|
|
33
|
+
if (settings.mode === "llama-only") return isLlamaProvider(provider);
|
|
34
|
+
if (settings.mode === "local-only") return isLocalUrl(provider) || isLlamaProvider(provider);
|
|
35
|
+
return false;
|
|
36
|
+
}
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export type ReasoningZipMode = "llama-only" | "local-only" | "all" | "disabled";
|
|
2
|
+
export type ReasoningZipStorageMode = "compact-new" | "off";
|
|
3
|
+
|
|
4
|
+
export interface ReasoningZipSettings {
|
|
5
|
+
enabled: boolean;
|
|
6
|
+
mode: ReasoningZipMode;
|
|
7
|
+
storageMode: ReasoningZipStorageMode;
|
|
8
|
+
injectPrompt: boolean;
|
|
9
|
+
compactor: {
|
|
10
|
+
baseUrl: string;
|
|
11
|
+
model: string;
|
|
12
|
+
apiKey: string;
|
|
13
|
+
maxTokens: number;
|
|
14
|
+
temperature: number;
|
|
15
|
+
timeoutMs: number;
|
|
16
|
+
};
|
|
17
|
+
thresholds: {
|
|
18
|
+
minChars: number;
|
|
19
|
+
targetRatio: number;
|
|
20
|
+
maxTraceChars: number;
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export interface PiMessageBlock {
|
|
25
|
+
type?: string;
|
|
26
|
+
text?: string;
|
|
27
|
+
thinking?: string;
|
|
28
|
+
[key: string]: unknown;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface PiMessage {
|
|
32
|
+
role?: string;
|
|
33
|
+
content?: string | PiMessageBlock[];
|
|
34
|
+
provider?: string;
|
|
35
|
+
model?: string;
|
|
36
|
+
api?: string;
|
|
37
|
+
[key: string]: unknown;
|
|
38
|
+
}
|