pi-reasoning-zip 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/LICENSE +21 -0
- package/README.md +251 -0
- package/dist/compactPrompt.d.ts +1 -0
- package/dist/compactPrompt.js +12 -0
- package/dist/compactorClient.d.ts +2 -0
- package/dist/compactorClient.js +34 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +50 -0
- package/dist/messageTransform.d.ts +6 -0
- package/dist/messageTransform.js +49 -0
- package/dist/promptInjection.d.ts +4 -0
- package/dist/promptInjection.js +37 -0
- package/dist/settings.d.ts +3 -0
- package/dist/settings.js +61 -0
- package/dist/target.d.ts +5 -0
- package/dist/target.js +42 -0
- package/dist/types.d.ts +35 -0
- package/dist/types.js +1 -0
- package/package.json +67 -0
- package/scripts/smoke-extension.mjs +83 -0
package/CHANGELOG.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.2.0] - 2026-07-02
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added developer release checklist to README.
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
|
|
18
|
+
- Updated package description to "Compact reasoning blocks to keep the context short."
|
|
19
|
+
|
|
20
|
+
## [0.1.0] - 2026-07-02
|
|
21
|
+
|
|
22
|
+
### Added
|
|
23
|
+
|
|
24
|
+
- Scaffolded Pi extension package for `pi-reasoning-zip`.
|
|
25
|
+
- Settings resolver for `reasoningZip` defaults and partial config.
|
|
26
|
+
- Provider targeting helpers for llama.cpp/local/all modes.
|
|
27
|
+
- Pure assistant `thinking` block compaction transform.
|
|
28
|
+
- OpenAI-compatible local compactor client and compaction prompt builder.
|
|
29
|
+
- Conservative grug-style prompt injection for target provider requests.
|
|
30
|
+
- Vitest coverage for settings, targeting, prompt injection, prompt building, and message transformation.
|
|
31
|
+
- MIT license metadata and license file.
|
|
32
|
+
- Compiled-extension smoke harness for hook registration, compaction, and prompt injection.
|
|
33
|
+
- GitHub/package banner image metadata.
|
|
34
|
+
|
|
35
|
+
### Changed
|
|
36
|
+
|
|
37
|
+
- Trimmed npm package contents to runtime build, smoke harness, README, changelog, and license only.
|
|
38
|
+
- Reworked README structure to match the concise Pi package style used by sibling packages.
|
|
39
|
+
- Added package motto to README.
|
|
40
|
+
- Added npm release metadata: description, keywords, repository links, exports, and Node engine.
|
|
41
|
+
|
|
42
|
+
[Unreleased]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.2.0...HEAD
|
|
43
|
+
[0.2.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/v0.1.0...v0.2.0
|
|
44
|
+
[0.1.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/releases/tag/v0.1.0
|
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Ryu-CZ
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/Ryu-CZ/pi-reasoning-zip/main/media/banner.webp" alt="pi-reasoning-zip banner" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
# pi-reasoning-zip
|
|
6
|
+
|
|
7
|
+
<p>
|
|
8
|
+
<a href="https://www.npmjs.com/package/pi-reasoning-zip">
|
|
9
|
+
<img src="https://img.shields.io/npm/v/pi-reasoning-zip" alt="npm version">
|
|
10
|
+
</a>
|
|
11
|
+
<a href="https://www.npmjs.com/package/pi-reasoning-zip">
|
|
12
|
+
<img src="https://img.shields.io/npm/dt/pi-reasoning-zip" alt="npm downloads">
|
|
13
|
+
</a>
|
|
14
|
+
<a href="LICENSE">
|
|
15
|
+
<img src="https://img.shields.io/npm/l/pi-reasoning-zip" alt="license">
|
|
16
|
+
</a>
|
|
17
|
+
<a href="https://pi.dev/packages/pi-reasoning-zip">
|
|
18
|
+
<img src="https://img.shields.io/badge/pi-package-1a1a2e" alt="pi package">
|
|
19
|
+
</a>
|
|
20
|
+
</p>
|
|
21
|
+
|
|
22
|
+
Compress reasoning blocks to keep the context short.
|
|
23
|
+
|
|
24
|
+
`pi-reasoning-zip` compresses new Pi-visible assistant `thinking` blocks into compact reasoning traces before they are stored in the session.
|
|
25
|
+
|
|
26
|
+
```text
|
|
27
|
+
verbose thinking block -> facts / decisions / constraints / failed / next
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Use this when local models expose long reasoning and you want future Pi turns to replay compact traces instead of raw reasoning haystacks.
|
|
31
|
+
|
|
32
|
+
## Install
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
# From npm, after publish
|
|
36
|
+
pi install npm:pi-reasoning-zip
|
|
37
|
+
|
|
38
|
+
# From git
|
|
39
|
+
pi install git:github.com/Ryu-CZ/pi-reasoning-zip
|
|
40
|
+
|
|
41
|
+
# Development
|
|
42
|
+
npm install
|
|
43
|
+
npm run check
|
|
44
|
+
npm run smoke
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
For local development you can also load the built extension directly:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
npm run build
|
|
51
|
+
pi -e ./dist/index.js
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Features
|
|
55
|
+
|
|
56
|
+
- **Forward-only compaction** — modifies only the new assistant message being finalized.
|
|
57
|
+
- **Stored compact traces** — future turns naturally replay compact `thinking` because that is what Pi stored.
|
|
58
|
+
- **Local compactor** — calls a configured OpenAI-compatible `/chat/completions` endpoint directly.
|
|
59
|
+
- **llama.cpp-first targeting** — defaults to llama.cpp-like providers such as `llama-server=http://127.0.0.1:7484`.
|
|
60
|
+
- **Prompt minimization** — optional grug-style request injection for target local providers.
|
|
61
|
+
- **Fail-open safety** — preserves original messages on errors, timeouts, invalid output, or unknown payloads.
|
|
62
|
+
- **Opaque reasoning guard** — skips signed/encrypted/provider-opaque reasoning metadata.
|
|
63
|
+
|
|
64
|
+
## Commands
|
|
65
|
+
|
|
66
|
+
None.
|
|
67
|
+
|
|
68
|
+
`pi-reasoning-zip` works through Pi lifecycle hooks:
|
|
69
|
+
|
|
70
|
+
| Hook | Purpose |
|
|
71
|
+
|---|---|
|
|
72
|
+
| `message_end` | Compact eligible new assistant `thinking` blocks before storage |
|
|
73
|
+
| `before_provider_request` | Optionally inject terse-reasoning guidance for target local providers |
|
|
74
|
+
|
|
75
|
+
## Configuration
|
|
76
|
+
|
|
77
|
+
Settings live in project `.pi/settings.json` or global `~/.pi/agent/settings.json` under the `reasoningZip` key. Project settings take precedence.
|
|
78
|
+
|
|
79
|
+
```json
|
|
80
|
+
{
|
|
81
|
+
"reasoningZip": {
|
|
82
|
+
"enabled": true,
|
|
83
|
+
"mode": "llama-only",
|
|
84
|
+
"storageMode": "compact-new",
|
|
85
|
+
"injectPrompt": true,
|
|
86
|
+
"compactor": {
|
|
87
|
+
"baseUrl": "http://127.0.0.1:7484/v1",
|
|
88
|
+
"model": "unsloth",
|
|
89
|
+
"apiKey": "sk-placeholder",
|
|
90
|
+
"maxTokens": 512,
|
|
91
|
+
"temperature": 0.1,
|
|
92
|
+
"timeoutMs": 30000
|
|
93
|
+
},
|
|
94
|
+
"thresholds": {
|
|
95
|
+
"minChars": 1000,
|
|
96
|
+
"targetRatio": 0.15,
|
|
97
|
+
"maxTraceChars": 2000
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Modes
|
|
104
|
+
|
|
105
|
+
| Mode | Behavior |
|
|
106
|
+
|---|---|
|
|
107
|
+
| `llama-only` | Compact llama.cpp-like providers only |
|
|
108
|
+
| `local-only` | Compact local URL providers and llama.cpp-like providers |
|
|
109
|
+
| `all` | Compact any eligible plain Pi `thinking` block |
|
|
110
|
+
| `disabled` | No-op |
|
|
111
|
+
|
|
112
|
+
### Storage modes
|
|
113
|
+
|
|
114
|
+
| Storage mode | Behavior |
|
|
115
|
+
|---|---|
|
|
116
|
+
| `compact-new` | Compact new assistant thinking before storage |
|
|
117
|
+
| `off` | Do not alter assistant messages |
|
|
118
|
+
|
|
119
|
+
## Compactor endpoint
|
|
120
|
+
|
|
121
|
+
The compactor must expose an OpenAI-compatible chat completions endpoint:
|
|
122
|
+
|
|
123
|
+
```text
|
|
124
|
+
POST {baseUrl}/chat/completions
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
The extension asks the compactor to produce terse output like:
|
|
128
|
+
|
|
129
|
+
```text
|
|
130
|
+
facts:
|
|
131
|
+
- ...
|
|
132
|
+
decisions:
|
|
133
|
+
- ...
|
|
134
|
+
constraints:
|
|
135
|
+
- ...
|
|
136
|
+
failed:
|
|
137
|
+
- ...
|
|
138
|
+
next:
|
|
139
|
+
- ...
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
If the compactor returns `none`, empty output, output longer than the original, or output over `thresholds.maxTraceChars`, the original block is preserved.
|
|
143
|
+
|
|
144
|
+
## Safety model
|
|
145
|
+
|
|
146
|
+
This extension does **not**:
|
|
147
|
+
|
|
148
|
+
- rewrite previous sessions
|
|
149
|
+
- backfill older entries in the current session
|
|
150
|
+
- mutate replayed context with the `context` hook
|
|
151
|
+
- claim to reduce hidden provider-side reasoning tokens
|
|
152
|
+
- touch signed, encrypted, or opaque provider reasoning metadata
|
|
153
|
+
|
|
154
|
+
It skips:
|
|
155
|
+
|
|
156
|
+
- non-assistant messages
|
|
157
|
+
- messages without array content
|
|
158
|
+
- short thinking below `thresholds.minChars`
|
|
159
|
+
- unknown providers by default in `llama-only`
|
|
160
|
+
- hosted/non-local providers in `local-only`
|
|
161
|
+
|
|
162
|
+
## Smoke tests
|
|
163
|
+
|
|
164
|
+
Automated local smoke test:
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
npm run smoke
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
This loads `dist/index.js`, registers the Pi hooks against a mock extension API, uses a temporary `.pi/settings.json`, mocks the OpenAI-compatible compactor, and verifies thinking compaction plus targeted prompt injection.
|
|
171
|
+
|
|
172
|
+
Manual Pi smoke test:
|
|
173
|
+
|
|
174
|
+
1. Start a local llama.cpp/OpenAI-compatible server that can compact text.
|
|
175
|
+
2. Configure `reasoningZip.compactor.baseUrl` and `reasoningZip.compactor.model`.
|
|
176
|
+
3. Enable `mode: "llama-only"` and use a llama.cpp provider in Pi.
|
|
177
|
+
4. Ask a prompt that produces long visible reasoning/thinking.
|
|
178
|
+
5. Inspect the session JSONL.
|
|
179
|
+
6. Confirm the new assistant message contains compact `thinking`, not raw verbose reasoning.
|
|
180
|
+
7. Confirm older session entries were not changed.
|
|
181
|
+
8. Send another prompt and confirm Pi replays the compact trace because that is what was stored.
|
|
182
|
+
|
|
183
|
+
## Development
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
npm run typecheck
|
|
187
|
+
npm test
|
|
188
|
+
npm run build
|
|
189
|
+
npm run check
|
|
190
|
+
npm run smoke
|
|
191
|
+
npm pack --dry-run
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Release checklist
|
|
195
|
+
|
|
196
|
+
1. Update the version in `package.json` and `package-lock.json`.
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
npm version <patch|minor|major> --no-git-tag-version
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
2. Move completed `CHANGELOG.md` entries from `[Unreleased]` to the new version section.
|
|
203
|
+
|
|
204
|
+
```md
|
|
205
|
+
## [Unreleased]
|
|
206
|
+
|
|
207
|
+
## [x.y.z] - YYYY-MM-DD
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
3. Update changelog links at the bottom.
|
|
211
|
+
|
|
212
|
+
```md
|
|
213
|
+
[Unreleased]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/vx.y.z...HEAD
|
|
214
|
+
[x.y.z]: https://github.com/Ryu-CZ/pi-reasoning-zip/compare/vprevious...vx.y.z
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
For the first release, link the version to the release page:
|
|
218
|
+
|
|
219
|
+
```md
|
|
220
|
+
[0.1.0]: https://github.com/Ryu-CZ/pi-reasoning-zip/releases/tag/v0.1.0
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
4. Verify build, smoke test, package contents, and npm publish metadata.
|
|
224
|
+
|
|
225
|
+
```bash
|
|
226
|
+
npm run check
|
|
227
|
+
npm run smoke
|
|
228
|
+
npm pack --dry-run
|
|
229
|
+
npm publish --dry-run
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
5. Commit and tag the release.
|
|
233
|
+
|
|
234
|
+
```bash
|
|
235
|
+
git add package.json package-lock.json CHANGELOG.md
|
|
236
|
+
git commit -m "chore: release vx.y.z"
|
|
237
|
+
git tag -a vx.y.z -m "vx.y.z"
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
6. Push branch and tag.
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
git push origin main
|
|
244
|
+
git push origin vx.y.z
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
7. Publish to npm when ready.
|
|
248
|
+
|
|
249
|
+
```bash
|
|
250
|
+
npm publish
|
|
251
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function buildCompactionPrompt(thinking: string): string;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export function buildCompactionPrompt(thinking) {
|
|
2
|
+
return `Compress this model reasoning into a compact decision trace for future coding-agent context.
|
|
3
|
+
|
|
4
|
+
Keep exact paths, commands, symbols, errors, decisions, constraints, failed attempts, and next actions.
|
|
5
|
+
Drop self-talk, repeated planning, obvious reasoning, filler, and prose.
|
|
6
|
+
Use terse bullets under: facts, decisions, constraints, failed, next.
|
|
7
|
+
Target 10-20% of original length.
|
|
8
|
+
If no useful content remains, output exactly: none
|
|
9
|
+
|
|
10
|
+
Reasoning:
|
|
11
|
+
${thinking}`;
|
|
12
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { buildCompactionPrompt } from "./compactPrompt.js";
|
|
2
|
+
export async function compactWithOpenAI(thinking, settings) {
|
|
3
|
+
const controller = new AbortController();
|
|
4
|
+
const timeout = setTimeout(() => controller.abort(), settings.compactor.timeoutMs);
|
|
5
|
+
try {
|
|
6
|
+
const response = await fetch(`${settings.compactor.baseUrl}/chat/completions`, {
|
|
7
|
+
method: "POST",
|
|
8
|
+
headers: {
|
|
9
|
+
"content-type": "application/json",
|
|
10
|
+
authorization: `Bearer ${settings.compactor.apiKey}`,
|
|
11
|
+
},
|
|
12
|
+
body: JSON.stringify({
|
|
13
|
+
model: settings.compactor.model,
|
|
14
|
+
messages: [
|
|
15
|
+
{ role: "system", content: "You compress reasoning traces. Output only compact trace." },
|
|
16
|
+
{ role: "user", content: buildCompactionPrompt(thinking) },
|
|
17
|
+
],
|
|
18
|
+
max_tokens: settings.compactor.maxTokens,
|
|
19
|
+
temperature: settings.compactor.temperature,
|
|
20
|
+
}),
|
|
21
|
+
signal: controller.signal,
|
|
22
|
+
});
|
|
23
|
+
if (!response.ok)
|
|
24
|
+
throw new Error(`Compactor HTTP ${response.status}`);
|
|
25
|
+
const json = (await response.json());
|
|
26
|
+
const content = json.choices?.[0]?.message?.content;
|
|
27
|
+
if (typeof content !== "string")
|
|
28
|
+
throw new Error("Compactor response missing message content");
|
|
29
|
+
return content.trim();
|
|
30
|
+
}
|
|
31
|
+
finally {
|
|
32
|
+
clearTimeout(timeout);
|
|
33
|
+
}
|
|
34
|
+
}
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { homedir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { compactWithOpenAI } from "./compactorClient.js";
|
|
5
|
+
import { compactAssistantMessage } from "./messageTransform.js";
|
|
6
|
+
import { injectReasoningZipPrompt } from "./promptInjection.js";
|
|
7
|
+
import { resolveReasoningZipSettings } from "./settings.js";
|
|
8
|
+
function globalSettingsPath() {
|
|
9
|
+
return join(process.env.PI_CODING_AGENT_DIR ?? join(homedir(), ".pi", "agent"), "settings.json");
|
|
10
|
+
}
|
|
11
|
+
function projectSettingsPath(cwd) {
|
|
12
|
+
return cwd ? join(cwd, ".pi", "settings.json") : undefined;
|
|
13
|
+
}
|
|
14
|
+
function readSettingsSection(path) {
|
|
15
|
+
if (!path)
|
|
16
|
+
return undefined;
|
|
17
|
+
try {
|
|
18
|
+
const parsed = JSON.parse(readFileSync(path, "utf8"));
|
|
19
|
+
return parsed.reasoningZip;
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
return undefined;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
function readRawSettings(cwd) {
|
|
26
|
+
return readSettingsSection(projectSettingsPath(cwd)) ?? readSettingsSection(globalSettingsPath());
|
|
27
|
+
}
|
|
28
|
+
function eventProvider(event) {
|
|
29
|
+
const message = event.message;
|
|
30
|
+
if (typeof message?.provider === "string")
|
|
31
|
+
return message.provider;
|
|
32
|
+
if (typeof event.provider === "string")
|
|
33
|
+
return event.provider;
|
|
34
|
+
return undefined;
|
|
35
|
+
}
|
|
36
|
+
export default function reasoningZipExtension(pi) {
|
|
37
|
+
const extension = pi;
|
|
38
|
+
extension.on("message_end", async (event, ctx) => {
|
|
39
|
+
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
40
|
+
const result = await compactAssistantMessage(event.message, settings, (thinking) => compactWithOpenAI(thinking, settings));
|
|
41
|
+
if (result.changed)
|
|
42
|
+
return { message: result.message };
|
|
43
|
+
return undefined;
|
|
44
|
+
});
|
|
45
|
+
extension.on("before_provider_request", (event, ctx) => {
|
|
46
|
+
const settings = resolveReasoningZipSettings(readRawSettings(ctx?.cwd));
|
|
47
|
+
const nextPayload = injectReasoningZipPrompt(event.payload, eventProvider(event), settings);
|
|
48
|
+
return nextPayload === event.payload ? undefined : nextPayload;
|
|
49
|
+
});
|
|
50
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { PiMessage, ReasoningZipSettings } from "./types.js";
|
|
2
|
+
export type CompactText = (thinking: string) => Promise<string>;
|
|
3
|
+
export declare function compactAssistantMessage(message: PiMessage, settings: ReasoningZipSettings, compactText: CompactText): Promise<{
|
|
4
|
+
message: PiMessage;
|
|
5
|
+
changed: boolean;
|
|
6
|
+
}>;
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { shouldHandleMessage } from "./target.js";
|
|
2
|
+
function isThinkingBlock(block) {
|
|
3
|
+
return block.type === "thinking" && typeof block.thinking === "string";
|
|
4
|
+
}
|
|
5
|
+
function hasOpaqueReasoningMetadata(block) {
|
|
6
|
+
return (typeof block.signature === "string" ||
|
|
7
|
+
typeof block.reasoning_signature === "string" ||
|
|
8
|
+
typeof block.encrypted_content === "string" ||
|
|
9
|
+
Array.isArray(block.reasoning_details));
|
|
10
|
+
}
|
|
11
|
+
function acceptableCompaction(original, compacted, settings) {
|
|
12
|
+
const text = compacted.trim();
|
|
13
|
+
if (!text || text === "none")
|
|
14
|
+
return undefined;
|
|
15
|
+
if (text.length >= original.length)
|
|
16
|
+
return undefined;
|
|
17
|
+
if (text.length > settings.thresholds.maxTraceChars)
|
|
18
|
+
return undefined;
|
|
19
|
+
return text;
|
|
20
|
+
}
|
|
21
|
+
export async function compactAssistantMessage(message, settings, compactText) {
|
|
22
|
+
if (!shouldHandleMessage(message, settings))
|
|
23
|
+
return { message, changed: false };
|
|
24
|
+
if (!Array.isArray(message.content))
|
|
25
|
+
return { message, changed: false };
|
|
26
|
+
let changed = false;
|
|
27
|
+
const nextContent = [];
|
|
28
|
+
for (const block of message.content) {
|
|
29
|
+
if (!isThinkingBlock(block) || hasOpaqueReasoningMetadata(block) || block.thinking.length < settings.thresholds.minChars) {
|
|
30
|
+
nextContent.push(block);
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
try {
|
|
34
|
+
const compacted = acceptableCompaction(block.thinking, await compactText(block.thinking), settings);
|
|
35
|
+
if (!compacted) {
|
|
36
|
+
nextContent.push(block);
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
nextContent.push({ ...block, thinking: compacted });
|
|
40
|
+
changed = true;
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
nextContent.push(block);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
if (!changed)
|
|
47
|
+
return { message, changed: false };
|
|
48
|
+
return { message: { ...message, content: nextContent }, changed: true };
|
|
49
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ReasoningZipSettings } from "./types.js";
|
|
2
|
+
export declare const PROMPT_MARKER = "<!-- pi-reasoning-zip -->";
|
|
3
|
+
export declare const PROMPT_INJECTION = "<!-- pi-reasoning-zip -->\nYou are Grug. Save token, save world.\nVisible reasoning: terse, keyword-heavy trace only. Keep facts, decisions, constraints, failed paths, next action. No prose reasoning, no self-talk.\nFinal answer: no conversational fluff, no repeated question, minimal markdown. If code is enough, give only code. Think hard, output few tokens.";
|
|
4
|
+
export declare function injectReasoningZipPrompt(payload: unknown, provider: string | undefined, settings: ReasoningZipSettings): unknown;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { shouldTargetProvider } from "./target.js";
|
|
2
|
+
export const PROMPT_MARKER = "<!-- pi-reasoning-zip -->";
|
|
3
|
+
export const PROMPT_INJECTION = `${PROMPT_MARKER}\nYou are Grug. Save token, save world.\nVisible reasoning: terse, keyword-heavy trace only. Keep facts, decisions, constraints, failed paths, next action. No prose reasoning, no self-talk.\nFinal answer: no conversational fluff, no repeated question, minimal markdown. If code is enough, give only code. Think hard, output few tokens.`;
|
|
4
|
+
function appendToContent(content) {
|
|
5
|
+
if (typeof content === "string") {
|
|
6
|
+
if (content.includes(PROMPT_MARKER))
|
|
7
|
+
return content;
|
|
8
|
+
return `${content}\n\n${PROMPT_INJECTION}`;
|
|
9
|
+
}
|
|
10
|
+
if (Array.isArray(content)) {
|
|
11
|
+
if (content.some((part) => typeof part === "object" && part && "text" in part && typeof part.text === "string" && part.text.includes(PROMPT_MARKER))) {
|
|
12
|
+
return content;
|
|
13
|
+
}
|
|
14
|
+
return [...content, { type: "text", text: PROMPT_INJECTION }];
|
|
15
|
+
}
|
|
16
|
+
return content;
|
|
17
|
+
}
|
|
18
|
+
export function injectReasoningZipPrompt(payload, provider, settings) {
|
|
19
|
+
if (!settings.injectPrompt || !shouldTargetProvider(provider, settings))
|
|
20
|
+
return payload;
|
|
21
|
+
if (!payload || typeof payload !== "object")
|
|
22
|
+
return payload;
|
|
23
|
+
const typed = payload;
|
|
24
|
+
if (!Array.isArray(typed.messages))
|
|
25
|
+
return payload;
|
|
26
|
+
const messages = typed.messages;
|
|
27
|
+
const existing = JSON.stringify(messages).includes(PROMPT_MARKER);
|
|
28
|
+
if (existing)
|
|
29
|
+
return payload;
|
|
30
|
+
const index = messages.findIndex((message) => message.role === "system" || message.role === "developer");
|
|
31
|
+
if (index >= 0) {
|
|
32
|
+
const nextMessages = messages.slice();
|
|
33
|
+
nextMessages[index] = { ...messages[index], content: appendToContent(messages[index].content) };
|
|
34
|
+
return { ...typed, messages: nextMessages };
|
|
35
|
+
}
|
|
36
|
+
return { ...typed, messages: [{ role: "system", content: PROMPT_INJECTION }, ...messages] };
|
|
37
|
+
}
|
package/dist/settings.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
export const DEFAULT_SETTINGS = {
|
|
2
|
+
enabled: true,
|
|
3
|
+
mode: "llama-only",
|
|
4
|
+
storageMode: "compact-new",
|
|
5
|
+
injectPrompt: true,
|
|
6
|
+
compactor: {
|
|
7
|
+
baseUrl: "http://127.0.0.1:7484/v1",
|
|
8
|
+
model: "unsloth",
|
|
9
|
+
apiKey: "sk-placeholder",
|
|
10
|
+
maxTokens: 512,
|
|
11
|
+
temperature: 0.1,
|
|
12
|
+
timeoutMs: 30000,
|
|
13
|
+
},
|
|
14
|
+
thresholds: {
|
|
15
|
+
minChars: 1000,
|
|
16
|
+
targetRatio: 0.15,
|
|
17
|
+
maxTraceChars: 2000,
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
const modes = new Set(["llama-only", "local-only", "all", "disabled"]);
|
|
21
|
+
const storageModes = new Set(["compact-new", "off"]);
|
|
22
|
+
function asObject(value) {
|
|
23
|
+
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
24
|
+
}
|
|
25
|
+
function stringValue(value, fallback) {
|
|
26
|
+
return typeof value === "string" && value.length > 0 ? value : fallback;
|
|
27
|
+
}
|
|
28
|
+
function booleanValue(value, fallback) {
|
|
29
|
+
return typeof value === "boolean" ? value : fallback;
|
|
30
|
+
}
|
|
31
|
+
function numberValue(value, fallback, min = 0) {
|
|
32
|
+
return typeof value === "number" && Number.isFinite(value) && value >= min ? value : fallback;
|
|
33
|
+
}
|
|
34
|
+
export function resolveReasoningZipSettings(input) {
|
|
35
|
+
const root = asObject(input);
|
|
36
|
+
const compactor = asObject(root.compactor);
|
|
37
|
+
const thresholds = asObject(root.thresholds);
|
|
38
|
+
const mode = modes.has(root.mode) ? root.mode : DEFAULT_SETTINGS.mode;
|
|
39
|
+
const storageMode = storageModes.has(root.storageMode)
|
|
40
|
+
? root.storageMode
|
|
41
|
+
: DEFAULT_SETTINGS.storageMode;
|
|
42
|
+
return {
|
|
43
|
+
enabled: booleanValue(root.enabled, DEFAULT_SETTINGS.enabled),
|
|
44
|
+
mode,
|
|
45
|
+
storageMode,
|
|
46
|
+
injectPrompt: booleanValue(root.injectPrompt, DEFAULT_SETTINGS.injectPrompt),
|
|
47
|
+
compactor: {
|
|
48
|
+
baseUrl: stringValue(compactor.baseUrl, DEFAULT_SETTINGS.compactor.baseUrl).replace(/\/$/, ""),
|
|
49
|
+
model: stringValue(compactor.model, DEFAULT_SETTINGS.compactor.model),
|
|
50
|
+
apiKey: stringValue(compactor.apiKey, DEFAULT_SETTINGS.compactor.apiKey),
|
|
51
|
+
maxTokens: numberValue(compactor.maxTokens, DEFAULT_SETTINGS.compactor.maxTokens, 1),
|
|
52
|
+
temperature: numberValue(compactor.temperature, DEFAULT_SETTINGS.compactor.temperature, 0),
|
|
53
|
+
timeoutMs: numberValue(compactor.timeoutMs, DEFAULT_SETTINGS.compactor.timeoutMs, 1),
|
|
54
|
+
},
|
|
55
|
+
thresholds: {
|
|
56
|
+
minChars: numberValue(thresholds.minChars, DEFAULT_SETTINGS.thresholds.minChars, 0),
|
|
57
|
+
targetRatio: numberValue(thresholds.targetRatio, DEFAULT_SETTINGS.thresholds.targetRatio, 0),
|
|
58
|
+
maxTraceChars: numberValue(thresholds.maxTraceChars, DEFAULT_SETTINGS.thresholds.maxTraceChars, 1),
|
|
59
|
+
},
|
|
60
|
+
};
|
|
61
|
+
}
|
package/dist/target.d.ts
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { PiMessage, ReasoningZipSettings } from "./types.js";
|
|
2
|
+
export declare function isLlamaProvider(providerId: string | undefined): boolean;
|
|
3
|
+
export declare function isLocalUrl(value: string | undefined): boolean;
|
|
4
|
+
export declare function shouldHandleMessage(message: PiMessage, settings: ReasoningZipSettings): boolean;
|
|
5
|
+
export declare function shouldTargetProvider(provider: string | undefined, settings: ReasoningZipSettings): boolean;
|
package/dist/target.js
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export function isLlamaProvider(providerId) {
|
|
2
|
+
if (!providerId)
|
|
3
|
+
return false;
|
|
4
|
+
const id = providerId.toLowerCase();
|
|
5
|
+
return id.startsWith("llama-server=") || id.includes("llama.cpp") || id.includes("llamacpp");
|
|
6
|
+
}
|
|
7
|
+
export function isLocalUrl(value) {
|
|
8
|
+
if (!value)
|
|
9
|
+
return false;
|
|
10
|
+
try {
|
|
11
|
+
const url = new URL(value.includes("://") ? value : `http://${value}`);
|
|
12
|
+
return ["127.0.0.1", "localhost", "::1", "0.0.0.0"].includes(url.hostname);
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export function shouldHandleMessage(message, settings) {
|
|
19
|
+
if (!settings.enabled || settings.mode === "disabled" || settings.storageMode !== "compact-new")
|
|
20
|
+
return false;
|
|
21
|
+
if (message.role !== "assistant")
|
|
22
|
+
return false;
|
|
23
|
+
const provider = typeof message.provider === "string" ? message.provider : undefined;
|
|
24
|
+
if (settings.mode === "all")
|
|
25
|
+
return true;
|
|
26
|
+
if (settings.mode === "llama-only")
|
|
27
|
+
return isLlamaProvider(provider);
|
|
28
|
+
if (settings.mode === "local-only")
|
|
29
|
+
return isLocalUrl(provider) || isLlamaProvider(provider);
|
|
30
|
+
return false;
|
|
31
|
+
}
|
|
32
|
+
export function shouldTargetProvider(provider, settings) {
|
|
33
|
+
if (!settings.enabled || settings.mode === "disabled")
|
|
34
|
+
return false;
|
|
35
|
+
if (settings.mode === "all")
|
|
36
|
+
return true;
|
|
37
|
+
if (settings.mode === "llama-only")
|
|
38
|
+
return isLlamaProvider(provider);
|
|
39
|
+
if (settings.mode === "local-only")
|
|
40
|
+
return isLocalUrl(provider) || isLlamaProvider(provider);
|
|
41
|
+
return false;
|
|
42
|
+
}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export type ReasoningZipMode = "llama-only" | "local-only" | "all" | "disabled";
|
|
2
|
+
export type ReasoningZipStorageMode = "compact-new" | "off";
|
|
3
|
+
export interface ReasoningZipSettings {
|
|
4
|
+
enabled: boolean;
|
|
5
|
+
mode: ReasoningZipMode;
|
|
6
|
+
storageMode: ReasoningZipStorageMode;
|
|
7
|
+
injectPrompt: boolean;
|
|
8
|
+
compactor: {
|
|
9
|
+
baseUrl: string;
|
|
10
|
+
model: string;
|
|
11
|
+
apiKey: string;
|
|
12
|
+
maxTokens: number;
|
|
13
|
+
temperature: number;
|
|
14
|
+
timeoutMs: number;
|
|
15
|
+
};
|
|
16
|
+
thresholds: {
|
|
17
|
+
minChars: number;
|
|
18
|
+
targetRatio: number;
|
|
19
|
+
maxTraceChars: number;
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
export interface PiMessageBlock {
|
|
23
|
+
type?: string;
|
|
24
|
+
text?: string;
|
|
25
|
+
thinking?: string;
|
|
26
|
+
[key: string]: unknown;
|
|
27
|
+
}
|
|
28
|
+
export interface PiMessage {
|
|
29
|
+
role?: string;
|
|
30
|
+
content?: string | PiMessageBlock[];
|
|
31
|
+
provider?: string;
|
|
32
|
+
model?: string;
|
|
33
|
+
api?: string;
|
|
34
|
+
[key: string]: unknown;
|
|
35
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pi-reasoning-zip",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"private": false,
|
|
5
|
+
"description": "Compact reasoning blocks to keep the context short.",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"author": "Ryu-CZ <thandeus@gmail.com> (https://github.com/Ryu-CZ)",
|
|
8
|
+
"type": "module",
|
|
9
|
+
"main": "dist/index.js",
|
|
10
|
+
"types": "dist/index.d.ts",
|
|
11
|
+
"files": [
|
|
12
|
+
"dist",
|
|
13
|
+
"scripts/smoke-extension.mjs",
|
|
14
|
+
"README.md",
|
|
15
|
+
"CHANGELOG.md",
|
|
16
|
+
"LICENSE"
|
|
17
|
+
],
|
|
18
|
+
"keywords": [
|
|
19
|
+
"pi-package",
|
|
20
|
+
"pi",
|
|
21
|
+
"pi-extension",
|
|
22
|
+
"reasoning",
|
|
23
|
+
"thinking",
|
|
24
|
+
"context",
|
|
25
|
+
"llama-cpp"
|
|
26
|
+
],
|
|
27
|
+
"repository": {
|
|
28
|
+
"type": "git",
|
|
29
|
+
"url": "git+https://git@github.com/Ryu-CZ/pi-reasoning-zip.git"
|
|
30
|
+
},
|
|
31
|
+
"bugs": {
|
|
32
|
+
"url": "https://github.com/Ryu-CZ/pi-reasoning-zip/issues"
|
|
33
|
+
},
|
|
34
|
+
"homepage": "https://github.com/Ryu-CZ/pi-reasoning-zip#readme",
|
|
35
|
+
"exports": {
|
|
36
|
+
".": {
|
|
37
|
+
"types": "./dist/index.d.ts",
|
|
38
|
+
"import": "./dist/index.js"
|
|
39
|
+
},
|
|
40
|
+
"./package.json": "./package.json"
|
|
41
|
+
},
|
|
42
|
+
"scripts": {
|
|
43
|
+
"build": "tsc -p tsconfig.json",
|
|
44
|
+
"test": "vitest run",
|
|
45
|
+
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
46
|
+
"check": "npm run typecheck && npm test && npm run build",
|
|
47
|
+
"prepack": "npm run build",
|
|
48
|
+
"smoke": "npm run build && node scripts/smoke-extension.mjs"
|
|
49
|
+
},
|
|
50
|
+
"peerDependencies": {
|
|
51
|
+
"@earendil-works/pi-coding-agent": "*"
|
|
52
|
+
},
|
|
53
|
+
"devDependencies": {
|
|
54
|
+
"@earendil-works/pi-coding-agent": "^0.80.3",
|
|
55
|
+
"typescript": "^5.8.0",
|
|
56
|
+
"vitest": "^3.2.0"
|
|
57
|
+
},
|
|
58
|
+
"pi": {
|
|
59
|
+
"extensions": [
|
|
60
|
+
"dist/index.js"
|
|
61
|
+
],
|
|
62
|
+
"image": "https://raw.githubusercontent.com/Ryu-CZ/pi-reasoning-zip/main/media/banner.webp"
|
|
63
|
+
},
|
|
64
|
+
"engines": {
|
|
65
|
+
"node": ">=22"
|
|
66
|
+
}
|
|
67
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { mkdtemp, mkdir, rm, writeFile } from "node:fs/promises";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import reasoningZipExtension from "../dist/index.js";
|
|
5
|
+
|
|
6
|
+
function assert(condition, message) {
|
|
7
|
+
if (!condition) throw new Error(message);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const cwd = await mkdtemp(join(tmpdir(), "pi-reasoning-zip-smoke-"));
|
|
11
|
+
try {
|
|
12
|
+
await mkdir(join(cwd, ".pi"), { recursive: true });
|
|
13
|
+
await writeFile(
|
|
14
|
+
join(cwd, ".pi", "settings.json"),
|
|
15
|
+
JSON.stringify({
|
|
16
|
+
reasoningZip: {
|
|
17
|
+
mode: "llama-only",
|
|
18
|
+
thresholds: { minChars: 5, maxTraceChars: 100 },
|
|
19
|
+
compactor: { baseUrl: "http://mock.local/v1", model: "mock", timeoutMs: 1000 },
|
|
20
|
+
},
|
|
21
|
+
}),
|
|
22
|
+
"utf8",
|
|
23
|
+
);
|
|
24
|
+
|
|
25
|
+
const handlers = new Map();
|
|
26
|
+
reasoningZipExtension({ on: (name, handler) => handlers.set(name, handler) });
|
|
27
|
+
|
|
28
|
+
assert(handlers.has("message_end"), "message_end hook was not registered");
|
|
29
|
+
assert(handlers.has("before_provider_request"), "before_provider_request hook was not registered");
|
|
30
|
+
|
|
31
|
+
const originalFetch = globalThis.fetch;
|
|
32
|
+
globalThis.fetch = async (url, init) => {
|
|
33
|
+
assert(String(url) === "http://mock.local/v1/chat/completions", "compactor URL mismatch");
|
|
34
|
+
const body = JSON.parse(init.body);
|
|
35
|
+
assert(body.messages[1].content.includes("abcdefghijklmnopqrstuvwxyz"), "compactor prompt missing thinking text");
|
|
36
|
+
return {
|
|
37
|
+
ok: true,
|
|
38
|
+
json: async () => ({ choices: [{ message: { content: "facts:\n- smoke compacted" } }] }),
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
try {
|
|
43
|
+
const compacted = await handlers.get("message_end")(
|
|
44
|
+
{
|
|
45
|
+
message: {
|
|
46
|
+
role: "assistant",
|
|
47
|
+
provider: "llama-server=http://127.0.0.1:7484",
|
|
48
|
+
content: [
|
|
49
|
+
{ type: "text", text: "answer" },
|
|
50
|
+
{ type: "thinking", thinking: "abcdefghijklmnopqrstuvwxyz" },
|
|
51
|
+
],
|
|
52
|
+
metadata: { keep: true },
|
|
53
|
+
},
|
|
54
|
+
},
|
|
55
|
+
{ cwd },
|
|
56
|
+
);
|
|
57
|
+
|
|
58
|
+
assert(compacted?.message?.content?.[0]?.text === "answer", "text block was not preserved");
|
|
59
|
+
assert(compacted.message.content[1].thinking === "facts:\n- smoke compacted", "thinking block was not compacted");
|
|
60
|
+
assert(compacted.message.metadata.keep === true, "assistant metadata was not preserved");
|
|
61
|
+
|
|
62
|
+
const injected = handlers.get("before_provider_request")(
|
|
63
|
+
{
|
|
64
|
+
provider: "llama-server=http://127.0.0.1:7484",
|
|
65
|
+
payload: { messages: [{ role: "system", content: "sys" }, { role: "user", content: "hi" }] },
|
|
66
|
+
},
|
|
67
|
+
{ cwd },
|
|
68
|
+
);
|
|
69
|
+
assert(injected.messages[0].content.includes("<!-- pi-reasoning-zip -->"), "prompt marker was not injected");
|
|
70
|
+
|
|
71
|
+
const skipped = handlers.get("before_provider_request")(
|
|
72
|
+
{ provider: "openai", payload: { messages: [{ role: "system", content: "sys" }] } },
|
|
73
|
+
{ cwd },
|
|
74
|
+
);
|
|
75
|
+
assert(skipped === undefined, "non-target provider should not be modified");
|
|
76
|
+
} finally {
|
|
77
|
+
globalThis.fetch = originalFetch;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
console.log("pi-reasoning-zip smoke passed");
|
|
81
|
+
} finally {
|
|
82
|
+
await rm(cwd, { recursive: true, force: true });
|
|
83
|
+
}
|