@mjasnikovs/pi-task 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ export interface PiTaskConfig {
2
+ remote: boolean;
3
+ compressReasoning: boolean;
4
+ autoCommit: boolean;
5
+ }
6
+ export declare function getConfig(): PiTaskConfig;
7
+ export declare function saveConfig(config: PiTaskConfig): Promise<void>;
@@ -0,0 +1,37 @@
1
+ import * as fs from 'node:fs';
2
+ import * as fsp from 'node:fs/promises';
3
+ import * as path from 'node:path';
4
+ import * as os from 'node:os';
5
+ const DEFAULTS = {
6
+ remote: true,
7
+ compressReasoning: true,
8
+ autoCommit: true
9
+ };
10
+ const CONFIG_PATH = path.join(os.homedir(), '.config', 'pi-task', 'config.json');
11
+ const _g = globalThis;
12
+ if (!_g.__piTaskConfig) {
13
+ _g.__piTaskConfig = { config: { ...DEFAULTS }, loaded: false };
14
+ }
15
+ const G = _g.__piTaskConfig;
16
+ // Load synchronously on module evaluation so getConfig() is always ready
17
+ // before any session_start handler fires.
18
+ if (!G.loaded) {
19
+ try {
20
+ const raw = fs.readFileSync(CONFIG_PATH, 'utf8');
21
+ const parsed = JSON.parse(raw);
22
+ G.config = { ...DEFAULTS, ...parsed };
23
+ }
24
+ catch {
25
+ G.config = { ...DEFAULTS };
26
+ }
27
+ G.loaded = true;
28
+ }
29
+ export function getConfig() {
30
+ return G.config;
31
+ }
32
+ export async function saveConfig(config) {
33
+ const dir = path.dirname(CONFIG_PATH);
34
+ await fsp.mkdir(dir, { recursive: true });
35
+ await fsp.writeFile(CONFIG_PATH, JSON.stringify(config, null, 2) + '\n', 'utf8');
36
+ G.config = { ...config };
37
+ }
@@ -0,0 +1,2 @@
1
+ import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
2
+ export declare function registerConfig(pi: ExtensionAPI): void;
@@ -0,0 +1,54 @@
1
+ import { SettingsList } from '@earendil-works/pi-tui';
2
+ import { registerBridgeCommand } from '../remote/bridge.js';
3
+ import { getConfig, saveConfig } from './config.js';
4
+ const ITEMS = [
5
+ { id: 'remote', label: 'remote', description: 'Remote UI server (QR code, phone access)' },
6
+ {
7
+ id: 'compressReasoning',
8
+ label: 'compress reasoning',
9
+ description: 'Compress <think> blocks after each message'
10
+ },
11
+ {
12
+ id: 'autoCommit',
13
+ label: 'auto-commit',
14
+ description: 'git commit after each /task-auto sub-task'
15
+ }
16
+ ];
17
+ function makeTheme(theme) {
18
+ return {
19
+ label: (text, selected) => (selected ? theme.fg('accent', text) : text),
20
+ value: text => (text === 'on' ? theme.fg('success', text) : theme.fg('muted', text)),
21
+ description: text => theme.fg('muted', text),
22
+ cursor: theme.fg('accent', '>'),
23
+ hint: text => theme.fg('dim', text)
24
+ };
25
+ }
26
+ async function handleTaskConfig(_args, ctx) {
27
+ const cfg = { ...getConfig() };
28
+ if (ctx.mode !== 'tui') {
29
+ const lines = ITEMS.map(({ id, label }) => `${label.padEnd(22)} ${cfg[id] ? 'on' : 'off'}`);
30
+ ctx.ui.notify(lines.join(' | '), 'info');
31
+ return;
32
+ }
33
+ await ctx.ui.custom((_tui, theme, _kb, done) => {
34
+ const listTheme = makeTheme(theme);
35
+ const items = ITEMS.map(({ id, label, description }) => ({
36
+ id,
37
+ label,
38
+ description,
39
+ currentValue: cfg[id] ? 'on' : 'off',
40
+ values: ['on', 'off']
41
+ }));
42
+ const list = new SettingsList(items, 10, listTheme, (id, newValue) => {
43
+ cfg[id] = newValue === 'on';
44
+ saveConfig(cfg).catch(() => { });
45
+ }, () => done(undefined));
46
+ return list;
47
+ }, { overlay: true, overlayOptions: { width: 54 } });
48
+ }
49
+ export function registerConfig(pi) {
50
+ registerBridgeCommand(pi, 'task-config', {
51
+ description: 'Configure pi-task settings (remote, compress reasoning, auto-commit).',
52
+ handler: handleTaskConfig
53
+ });
54
+ }
package/dist/index.js CHANGED
@@ -1,12 +1,14 @@
1
+ import { registerConfig } from './config/register.js';
1
2
  import { registerTask } from './task/orchestrator.js';
2
3
  import { registerTaskAuto } from './task/auto-orchestrator.js';
3
4
  import { registerWorkers } from './workers/index.js';
4
5
  import { registerRemote } from './remote/register.js';
5
- import { registerContextCompression } from './context/compress.js';
6
+ import { registerThinkingCompression } from './thinking/compress.js';
6
7
  export default function (pi) {
8
+ registerConfig(pi);
7
9
  registerTask(pi);
8
10
  registerTaskAuto(pi);
9
11
  registerWorkers(pi);
10
12
  registerRemote(pi);
11
- registerContextCompression(pi);
13
+ registerThinkingCompression(pi);
12
14
  }
@@ -1,3 +1,4 @@
1
+ import { getConfig } from '../config/config.js';
1
2
  import { getBridge, dispatchRemoteLine, dispatchRemoteNewSession, makeShimmedCtx } from './bridge.js';
2
3
  import { setupEvents } from './events.js';
3
4
  import { reset, addUserTurn } from './session-state.js';
@@ -61,7 +62,9 @@ export function registerRemote(pi) {
61
62
  === true) {
62
63
  bridge.currentCtx = makeShimmedCtx(ctx);
63
64
  }
64
- void ensureServer().catch(err => ctx.ui.notify(`Failed to start remote: ${err.message}`, 'error'));
65
+ if (getConfig().remote) {
66
+ void ensureServer().catch(err => ctx.ui.notify(`Failed to start remote: ${err.message}`, 'error'));
67
+ }
65
68
  });
66
69
  pi.on('session_shutdown', (event, _ctx) => {
67
70
  if (event.reason === 'quit') {
@@ -76,8 +79,12 @@ export function registerRemote(pi) {
76
79
  }
77
80
  });
78
81
  pi.registerCommand('remote', {
79
- description: 'Show the remote QR code & URLs (the server is always running)',
82
+ description: 'Show the remote QR code & URLs.',
80
83
  handler: async (args, ctx) => {
84
+ if (!getConfig().remote) {
85
+ ctx.ui.notify('Remote is disabled — enable it in /task-config.', 'info');
86
+ return;
87
+ }
81
88
  if (args.trim() === 'stop') {
82
89
  if (S.server) {
83
90
  const port = S.server.port;
@@ -16,6 +16,7 @@ import { writeTaskFile, readTaskFile, updateTaskFrontMatter } from './task-io.js
16
16
  import { gitCommitAll } from './auto-commit.js';
17
17
  import { runPhaseChild, USER_CANCELLED } from './child-runner.js';
18
18
  import { SessionUI, registerBridgeCommand } from '../remote/bridge.js';
19
+ import { getConfig } from '../config/config.js';
19
20
  import { startAutoLoader } from './widget.js';
20
21
  // Matches pi's @-file completion token (a path after @, until whitespace).
21
22
  const MENTION_RE = /(?:^|\s)@([^\s]+)/g;
@@ -181,7 +182,9 @@ function defaultDeps(ctx, cwd, signal, title) {
181
182
  resumeId: opts?.resumeId,
182
183
  onStart: opts?.onStart
183
184
  }),
184
- commit: (cwd2, message) => gitCommitAll(cwd2, message, signal)
185
+ commit: (cwd2, message) => getConfig().autoCommit ?
186
+ gitCommitAll(cwd2, message, signal)
187
+ : Promise.resolve({ committed: false, reason: 'auto-commit disabled' })
185
188
  };
186
189
  }
187
190
  // ─── Loop ────────────────────────────────────────────────────────────────────
@@ -0,0 +1,2 @@
1
+ import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
2
+ export declare function registerThinkingCompression(pi: ExtensionAPI): void;
@@ -0,0 +1,118 @@
1
+ import { getConfig } from '../config/config.js';
2
+ import { collectCompressible, MIN_THINKING_CHARS, rebuildWithCompressed } from './rewrite.js';
3
+ /** Hard cap so a stuck model request can never wedge a turn. */
4
+ const REQUEST_TIMEOUT_MS = 120_000;
5
+ const STATUS_KEY = 'pi-task-thinking';
6
+ const SPINNER = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏'];
7
+ const PROMPT = 'Compress this reasoning. Keep every decision/conclusion/constraint/fact relied on later. '
8
+ + 'Drop restated questions, false starts, self-talk. Output only the compressed reasoning. /no_think';
9
+ async function compressOne(text, model, auth) {
10
+ const headers = { 'Content-Type': 'application/json', ...auth.headers };
11
+ if (auth.apiKey)
12
+ headers.Authorization = `Bearer ${auth.apiKey}`;
13
+ const res = await fetch(`${model.baseUrl}/chat/completions`, {
14
+ method: 'POST',
15
+ headers,
16
+ body: JSON.stringify({
17
+ model: model.id,
18
+ messages: [{ role: 'user', content: `${PROMPT}\n\n---\n\n${text}` }],
19
+ temperature: 0,
20
+ stream: false
21
+ }),
22
+ signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
23
+ });
24
+ if (!res.ok)
25
+ throw new Error(`compress HTTP ${res.status}`);
26
+ const data = (await res.json());
27
+ const raw = data.choices?.[0]?.message?.content ?? '';
28
+ return raw.replaceAll('<think>', '').replaceAll('</think>', '').trim();
29
+ }
30
+ /** Animated footer loader. Safe in any mode — `setStatus` is a no-op outside the
31
+ * TUI. Each tick reports which block is compressing and its size. */
32
+ class Loader {
33
+ ui;
34
+ timer = null;
35
+ frame = 0;
36
+ constructor(ui) {
37
+ this.ui = ui;
38
+ }
39
+ start(label) {
40
+ this.stop();
41
+ const tick = () => {
42
+ this.ui.setStatus(STATUS_KEY, `${SPINNER[this.frame % SPINNER.length]} ${label()}`);
43
+ this.frame++;
44
+ };
45
+ tick();
46
+ this.timer = setInterval(tick, 120);
47
+ }
48
+ /** Show a final, non-animated line, then clear it after a short beat. */
49
+ finish(text) {
50
+ this.stop();
51
+ this.ui.setStatus(STATUS_KEY, text);
52
+ if (text !== undefined) {
53
+ setTimeout(() => this.ui.setStatus(STATUS_KEY, undefined), 4000);
54
+ }
55
+ }
56
+ stop() {
57
+ if (this.timer) {
58
+ clearInterval(this.timer);
59
+ this.timer = null;
60
+ }
61
+ }
62
+ }
63
+ async function resolveAuth(ctx, model) {
64
+ try {
65
+ // eslint-disable-next-line @typescript-eslint/no-unsafe-argument -- ctx.model is Model<any>; the registry wants Model<Api>
66
+ const r = await ctx.modelRegistry.getApiKeyAndHeaders(model);
67
+ return r.ok ? { apiKey: r.apiKey, headers: r.headers } : {};
68
+ }
69
+ catch {
70
+ return {};
71
+ }
72
+ }
73
+ const pct = (from, to) => Math.round((100 * (from - to)) / from);
74
+ export function registerThinkingCompression(pi) {
75
+ pi.on('message_end', async (event, ctx) => {
76
+ if (!getConfig().compressReasoning)
77
+ return;
78
+ const message = event.message;
79
+ const targets = collectCompressible(message, MIN_THINKING_CHARS);
80
+ if (targets.length === 0)
81
+ return;
82
+ const model = ctx.model;
83
+ if (!model)
84
+ return;
85
+ const loader = new Loader(ctx.ui);
86
+ const auth = await resolveAuth(ctx, model);
87
+ const modelRef = { id: model.id, baseUrl: model.baseUrl };
88
+ const replacements = new Map();
89
+ let origTotal = 0;
90
+ let newTotal = 0;
91
+ for (let i = 0; i < targets.length; i++) {
92
+ const t = targets[i];
93
+ const n = i + 1;
94
+ loader.start(() => targets.length > 1 ?
95
+ `compressing reasoning ${n}/${targets.length} (${t.text.length}c)…`
96
+ : `compressing reasoning (${t.text.length}c)…`);
97
+ try {
98
+ const compressed = await compressOne(t.text, modelRef, auth);
99
+ if (compressed.length > 0 && compressed.length < t.text.length) {
100
+ replacements.set(t.index, compressed);
101
+ origTotal += t.text.length;
102
+ newTotal += compressed.length;
103
+ }
104
+ }
105
+ catch {
106
+ // Leave this block verbatim; move on to the next.
107
+ }
108
+ }
109
+ if (replacements.size === 0) {
110
+ loader.finish(undefined);
111
+ return;
112
+ }
113
+ loader.finish(`✓ reasoning ${origTotal}→${newTotal}c (−${pct(origTotal, newTotal)}%)`);
114
+ // Cast back to the concrete AgentMessage type: the helpers work on a
115
+ // structural view, but the rewrite only swaps thinking-block text.
116
+ return { message: rebuildWithCompressed(message, replacements) };
117
+ });
118
+ }
@@ -0,0 +1,29 @@
1
+ /** Minimal structural view of a thinking content block. Kept structural (rather
2
+ * than importing pi-ai's `ThinkingContent`) so these helpers stay pure and are
3
+ * trivially unit-testable with plain objects. */
4
+ export interface ThinkingBlock {
5
+ type: 'thinking';
6
+ thinking: string;
7
+ thinkingSignature?: string;
8
+ redacted?: boolean;
9
+ }
10
+ export interface AssistantMessageLike {
11
+ role?: string;
12
+ content?: unknown;
13
+ }
14
+ export interface CompressTarget {
15
+ index: number;
16
+ text: string;
17
+ }
18
+ /** Thinking blocks shorter than this aren't worth a model round-trip. */
19
+ export declare const MIN_THINKING_CHARS = 120;
20
+ export declare function isThinkingBlock(b: unknown): b is ThinkingBlock;
21
+ export declare function isCompressible(b: ThinkingBlock, minChars: number): boolean;
22
+ /** Compressible thinking blocks of an assistant message, with their positions. */
23
+ export declare function collectCompressible(message: AssistantMessageLike, minChars: number): CompressTarget[];
24
+ /** Rebuild an assistant message with compressed text swapped into the given
25
+ * block indices. The block `type` and `thinkingSignature` are preserved so the
26
+ * local provider still replays the (now shorter) reasoning, and a replacement is
27
+ * only applied when it actually shrinks the block. Returns the same object when
28
+ * nothing changed. */
29
+ export declare function rebuildWithCompressed<T extends AssistantMessageLike>(message: T, byIndex: ReadonlyMap<number, string>): T;
@@ -0,0 +1,53 @@
1
+ /** Thinking blocks shorter than this aren't worth a model round-trip. */
2
+ export const MIN_THINKING_CHARS = 120;
3
+ /** In `openai-completions` (llama.cpp/local) the "signature" is a field *name*
4
+ * (`reasoning_content`) the reasoning is replayed under — not a crypto
5
+ * signature — so rewriting the text is safe. A long, non-sentinel signature is
6
+ * Anthropic-style extended thinking, where the signature cryptographically
7
+ * signs the original text and the block feeds the next turn's continuation;
8
+ * rewriting it would break that, so those blocks are skipped. */
9
+ const SENTINEL_SIGNATURES = new Set(['', 'reasoning_content', 'reasoning', 'reasoning_text']);
10
+ export function isThinkingBlock(b) {
11
+ return (typeof b === 'object'
12
+ && b !== null
13
+ && b.type === 'thinking'
14
+ && typeof b.thinking === 'string');
15
+ }
16
+ export function isCompressible(b, minChars) {
17
+ if (b.redacted)
18
+ return false;
19
+ if (!SENTINEL_SIGNATURES.has(b.thinkingSignature ?? ''))
20
+ return false;
21
+ return b.thinking.trim().length >= minChars;
22
+ }
23
+ /** Compressible thinking blocks of an assistant message, with their positions. */
24
+ export function collectCompressible(message, minChars) {
25
+ if (message.role !== 'assistant' || !Array.isArray(message.content))
26
+ return [];
27
+ const out = [];
28
+ message.content.forEach((b, index) => {
29
+ if (isThinkingBlock(b) && isCompressible(b, minChars))
30
+ out.push({ index, text: b.thinking });
31
+ });
32
+ return out;
33
+ }
34
+ /** Rebuild an assistant message with compressed text swapped into the given
35
+ * block indices. The block `type` and `thinkingSignature` are preserved so the
36
+ * local provider still replays the (now shorter) reasoning, and a replacement is
37
+ * only applied when it actually shrinks the block. Returns the same object when
38
+ * nothing changed. */
39
+ export function rebuildWithCompressed(message, byIndex) {
40
+ if (byIndex.size === 0 || !Array.isArray(message.content))
41
+ return message;
42
+ let changed = false;
43
+ const content = message.content.map((b, index) => {
44
+ const compressed = byIndex.get(index);
45
+ if (compressed === undefined || !isThinkingBlock(b))
46
+ return b;
47
+ if (compressed.length >= b.thinking.length)
48
+ return b;
49
+ changed = true;
50
+ return { ...b, thinking: compressed };
51
+ });
52
+ return changed ? { ...message, content } : message;
53
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mjasnikovs/pi-task",
3
- "version": "0.8.1",
3
+ "version": "0.10.0",
4
4
  "description": "Deterministic spec-orchestration for local models, with a bundled real-time remote web view and web/docs/fetch/worker subagent tools.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -1,18 +0,0 @@
1
- /** Stable content hash for a thinking block. Determinism of the compressor at
2
- * temperature 0 (validated against the local model) makes this a safe cache
3
- * key: identical reasoning compresses to identical output, so each unique
4
- * block is sent to the model exactly once, ever. */
5
- export declare function hashText(text: string): string;
6
- /** Disk-backed `hash -> compressed text` store. The on-disk file lets the
7
- * "compress once" guarantee survive process restarts, not just jiti reloads. */
8
- export declare class CompressionCache {
9
- private readonly file;
10
- private mem;
11
- private loaded;
12
- constructor(file: string);
13
- private load;
14
- get(hash: string): string | undefined;
15
- has(hash: string): boolean;
16
- set(hash: string, compressed: string): void;
17
- get size(): number;
18
- }
@@ -1,56 +0,0 @@
1
- import { createHash } from 'node:crypto';
2
- import * as fs from 'node:fs';
3
- import * as path from 'node:path';
4
- /** Stable content hash for a thinking block. Determinism of the compressor at
5
- * temperature 0 (validated against the local model) makes this a safe cache
6
- * key: identical reasoning compresses to identical output, so each unique
7
- * block is sent to the model exactly once, ever. */
8
- export function hashText(text) {
9
- return createHash('sha256').update(text).digest('hex');
10
- }
11
- /** Disk-backed `hash -> compressed text` store. The on-disk file lets the
12
- * "compress once" guarantee survive process restarts, not just jiti reloads. */
13
- export class CompressionCache {
14
- file;
15
- mem = new Map();
16
- loaded = false;
17
- constructor(file) {
18
- this.file = file;
19
- }
20
- load() {
21
- if (this.loaded)
22
- return;
23
- this.loaded = true;
24
- try {
25
- const obj = JSON.parse(fs.readFileSync(this.file, 'utf8'));
26
- for (const [k, v] of Object.entries(obj))
27
- this.mem.set(k, v);
28
- }
29
- catch {
30
- // No cache file yet (or unreadable) — start empty.
31
- }
32
- }
33
- get(hash) {
34
- this.load();
35
- return this.mem.get(hash);
36
- }
37
- has(hash) {
38
- this.load();
39
- return this.mem.has(hash);
40
- }
41
- set(hash, compressed) {
42
- this.load();
43
- this.mem.set(hash, compressed);
44
- try {
45
- fs.mkdirSync(path.dirname(this.file), { recursive: true });
46
- fs.writeFileSync(this.file, JSON.stringify(Object.fromEntries(this.mem)));
47
- }
48
- catch {
49
- // Best-effort persistence; the in-memory copy still serves this run.
50
- }
51
- }
52
- get size() {
53
- this.load();
54
- return this.mem.size;
55
- }
56
- }
@@ -1,2 +0,0 @@
1
- import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
2
- export declare function registerContextCompression(pi: ExtensionAPI): void;
@@ -1,153 +0,0 @@
1
- import * as os from 'node:os';
2
- import * as path from 'node:path';
3
- import { CompressionCache } from './cache.js';
4
- import { applyRewrites, selectCandidates } from './rewrite.js';
5
- /** Keep the most-recent messages verbatim — recent reasoning is most likely to
6
- * be relied on next turn, and compressing it would chase a moving target. */
7
- const KEEP_LAST = 8;
8
- /** Only compress sizeable blocks. Validation against the real session corpus
9
- * (median thinking block 127 chars) showed small blocks barely shrink yet still
10
- * cost ~5-15s on the local model — net-negative. Big blocks compress ~5x. */
11
- const MIN_CHARS = 1500;
12
- /** Hard cap so a stuck request can never wedge the background queue. */
13
- const REQUEST_TIMEOUT_MS = 120_000;
14
- /** Poll interval while the agent is busy — see the GPU note in `drain`. */
15
- const IDLE_BACKOFF_MS = 750;
16
- const PROMPT = 'Compress this reasoning. Keep every decision/conclusion/constraint/fact relied on later. '
17
- + 'Drop restated questions, false starts, self-talk. Output only the compressed reasoning. /no_think';
18
- const OPTS = { keepLast: KEEP_LAST, minChars: MIN_CHARS };
19
- const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
20
- async function compressOne(text, model, auth) {
21
- const headers = { 'Content-Type': 'application/json', ...auth.headers };
22
- if (auth.apiKey)
23
- headers.Authorization = `Bearer ${auth.apiKey}`;
24
- const res = await fetch(`${model.baseUrl}/chat/completions`, {
25
- method: 'POST',
26
- headers,
27
- body: JSON.stringify({
28
- model: model.id,
29
- messages: [{ role: 'user', content: `${PROMPT}\n\n---\n\n${text}` }],
30
- temperature: 0,
31
- stream: false
32
- }),
33
- signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
34
- });
35
- if (!res.ok)
36
- throw new Error(`compress HTTP ${res.status}`);
37
- const data = (await res.json());
38
- const raw = data.choices?.[0]?.message?.content ?? '';
39
- return raw.replaceAll('<think>', '').replaceAll('</think>', '').trim();
40
- }
41
- /** Owns the compression cache and a serial background queue. Persisted on
42
- * globalThis so it survives the jiti module re-evaluation that happens on every
43
- * `/new` (mirrors the pattern in remote/register.ts). */
44
- class ThinkingCompressor {
45
- cache;
46
- pending = [];
47
- inflight = new Set();
48
- draining = false;
49
- model = null;
50
- isIdle = () => true;
51
- resolveAuth = () => Promise.resolve({});
52
- auth = null;
53
- authModelId = null;
54
- constructor(cacheFile) {
55
- this.cache = new CompressionCache(cacheFile);
56
- }
57
- /** Refresh per-call context (model, idleness, auth resolver) from the latest
58
- * `context` event. Cheap and synchronous — no blocking work on this path. */
59
- bind(model, isIdle, resolveAuth) {
60
- this.model = model;
61
- this.isIdle = isIdle;
62
- this.resolveAuth = resolveAuth;
63
- if (this.authModelId !== model.id) {
64
- // Model changed — invalidate cached auth so it is re-resolved lazily.
65
- this.auth = null;
66
- this.authModelId = model.id;
67
- }
68
- }
69
- enqueue(hash, text) {
70
- if (this.cache.has(hash) || this.inflight.has(hash))
71
- return;
72
- if (this.pending.some(p => p.hash === hash))
73
- return;
74
- this.pending.push({ hash, text });
75
- void this.drain();
76
- }
77
- async getAuth() {
78
- if (this.auth)
79
- return this.auth;
80
- try {
81
- this.auth = await this.resolveAuth();
82
- }
83
- catch {
84
- this.auth = {};
85
- }
86
- return this.auth;
87
- }
88
- async drain() {
89
- if (this.draining)
90
- return;
91
- this.draining = true;
92
- try {
93
- while (this.pending.length > 0) {
94
- const model = this.model;
95
- if (!model)
96
- break;
97
- // The local model is a single-GPU llama.cpp server: a compression
98
- // request fired mid-turn would queue behind (and stall) the user's
99
- // turn. So compression only runs while the agent is idle.
100
- if (!this.isIdle()) {
101
- await delay(IDLE_BACKOFF_MS);
102
- continue;
103
- }
104
- const job = this.pending.shift();
105
- if (this.cache.has(job.hash))
106
- continue;
107
- this.inflight.add(job.hash);
108
- try {
109
- const compressed = await compressOne(job.text, model, await this.getAuth());
110
- // Only cache a genuine shrink; otherwise leave the block verbatim
111
- // (a later turn will re-enqueue and retry).
112
- if (compressed.length > 0 && compressed.length < job.text.length) {
113
- this.cache.set(job.hash, compressed);
114
- }
115
- }
116
- catch {
117
- // Transient (model busy/down) — drop the job; re-enqueued next turn.
118
- }
119
- finally {
120
- this.inflight.delete(job.hash);
121
- }
122
- }
123
- }
124
- finally {
125
- this.draining = false;
126
- }
127
- }
128
- }
129
- export function registerContextCompression(pi) {
130
- const cacheFile = path.join(os.homedir(), '.pi', 'agent', 'cache', 'pi-task', 'thinking-compression.json');
131
- const g = globalThis;
132
- const compressor = g.__piThinkingCompressor ?? new ThinkingCompressor(cacheFile);
133
- g.__piThinkingCompressor = compressor;
134
- pi.on('context', (event, ctx) => {
135
- const model = ctx.model;
136
- if (!model)
137
- return;
138
- compressor.bind({ id: model.id, baseUrl: model.baseUrl }, () => ctx.isIdle(), async () => {
139
- // eslint-disable-next-line @typescript-eslint/no-unsafe-argument -- ctx.model is Model<any>; the registry wants Model<Api>
140
- const r = await ctx.modelRegistry.getApiKeyAndHeaders(model);
141
- return r.ok ? { apiKey: r.apiKey, headers: r.headers } : {};
142
- });
143
- // Background: ensure every eligible block is queued for one-time compression.
144
- for (const c of selectCandidates(event.messages, OPTS)) {
145
- compressor.enqueue(c.hash, c.text);
146
- }
147
- // Critical path: apply only what is already cached. Pure + synchronous.
148
- const { messages, rewritten } = applyRewrites(event.messages, OPTS, h => compressor.cache.get(h));
149
- if (rewritten === 0)
150
- return;
151
- return { messages };
152
- });
153
- }
@@ -1,39 +0,0 @@
1
- /** Minimal structural view of a thinking content block. We avoid importing the
2
- * exact pi-ai `ThinkingContent` type so these helpers stay pure and trivially
3
- * unit-testable with plain objects. */
4
- export interface ThinkingBlock {
5
- type: 'thinking';
6
- thinking: string;
7
- thinkingSignature?: string;
8
- redacted?: boolean;
9
- }
10
- /** Minimal structural view of an AgentMessage. `AgentMessage[]` is assignable
11
- * to `Msg[]`, so the `context` handler passes pi's real messages straight in. */
12
- export interface Msg {
13
- role?: string;
14
- content?: unknown;
15
- }
16
- export interface Candidate {
17
- hash: string;
18
- text: string;
19
- }
20
- export interface SelectOptions {
21
- /** Number of most-recent messages to leave completely untouched. */
22
- keepLast: number;
23
- /** Minimum trimmed thinking length worth compressing. */
24
- minChars: number;
25
- }
26
- export declare function isThinkingBlock(b: unknown): b is ThinkingBlock;
27
- export declare function isRewritable(b: ThinkingBlock, minChars: number): boolean;
28
- /** Eligible thinking blocks older than the keep-last window. May contain
29
- * duplicates (the same reasoning across turns) — callers dedupe by hash. */
30
- export declare function selectCandidates(messages: readonly Msg[], opts: SelectOptions): Candidate[];
31
- /** Return a copy of `messages` with cached compressions swapped into eligible
32
- * thinking blocks. Unchanged messages keep their identity. `thinkingSignature`
33
- * and block `type` are preserved so the local provider still replays the (now
34
- * shorter) reasoning. A compression is only applied when it actually shrinks
35
- * the block, so this can never expand context. */
36
- export declare function applyRewrites<T extends Msg>(messages: readonly T[], opts: SelectOptions, lookup: (hash: string) => string | undefined): {
37
- messages: T[];
38
- rewritten: number;
39
- };
@@ -1,63 +0,0 @@
1
- import { hashText } from './cache.js';
2
- /** In `openai-completions` (llama.cpp/local), the "signature" is a field *name*
3
- * (`reasoning_content`) the prior reasoning is replayed under — not a crypto
4
- * signature — so rewriting the text is safe. A long, non-sentinel signature
5
- * means Anthropic-style extended thinking, where the signature cryptographically
6
- * signs the original text; rewriting it would be rejected, so we skip those. */
7
- const SENTINEL_SIGNATURES = new Set(['', 'reasoning_content', 'reasoning', 'reasoning_text']);
8
- export function isThinkingBlock(b) {
9
- return (typeof b === 'object'
10
- && b !== null
11
- && b.type === 'thinking'
12
- && typeof b.thinking === 'string');
13
- }
14
- export function isRewritable(b, minChars) {
15
- if (b.redacted)
16
- return false;
17
- if (!SENTINEL_SIGNATURES.has(b.thinkingSignature ?? ''))
18
- return false;
19
- return b.thinking.trim().length >= minChars;
20
- }
21
- /** Eligible thinking blocks older than the keep-last window. May contain
22
- * duplicates (the same reasoning across turns) — callers dedupe by hash. */
23
- export function selectCandidates(messages, opts) {
24
- const cutoff = messages.length - opts.keepLast;
25
- const out = [];
26
- for (let i = 0; i < cutoff; i++) {
27
- const m = messages[i];
28
- if (m.role !== 'assistant' || !Array.isArray(m.content))
29
- continue;
30
- for (const b of m.content) {
31
- if (isThinkingBlock(b) && isRewritable(b, opts.minChars)) {
32
- out.push({ hash: hashText(b.thinking), text: b.thinking });
33
- }
34
- }
35
- }
36
- return out;
37
- }
38
- /** Return a copy of `messages` with cached compressions swapped into eligible
39
- * thinking blocks. Unchanged messages keep their identity. `thinkingSignature`
40
- * and block `type` are preserved so the local provider still replays the (now
41
- * shorter) reasoning. A compression is only applied when it actually shrinks
42
- * the block, so this can never expand context. */
43
- export function applyRewrites(messages, opts, lookup) {
44
- const cutoff = messages.length - opts.keepLast;
45
- let rewritten = 0;
46
- const out = messages.map((m, i) => {
47
- if (i >= cutoff || m.role !== 'assistant' || !Array.isArray(m.content))
48
- return m;
49
- let changed = false;
50
- const content = m.content.map(b => {
51
- if (!isThinkingBlock(b) || !isRewritable(b, opts.minChars))
52
- return b;
53
- const compressed = lookup(hashText(b.thinking));
54
- if (compressed === undefined || compressed.length >= b.thinking.length)
55
- return b;
56
- changed = true;
57
- rewritten++;
58
- return { ...b, thinking: compressed };
59
- });
60
- return changed ? { ...m, content } : m;
61
- });
62
- return { messages: out, rewritten };
63
- }