@mjasnikovs/pi-task 0.7.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context/cache.d.ts +18 -0
- package/dist/context/cache.js +56 -0
- package/dist/context/compress.d.ts +2 -0
- package/dist/context/compress.js +153 -0
- package/dist/context/rewrite.d.ts +39 -0
- package/dist/context/rewrite.js +63 -0
- package/dist/index.js +2 -0
- package/dist/remote/broadcast.d.ts +0 -1
- package/dist/remote/broadcast.js +0 -3
- package/dist/remote/protocol.d.ts +1 -1
- package/dist/remote/server.js +1 -3
- package/dist/remote/ui.js +3 -19
- package/dist/think-test/cli.d.ts +1 -0
- package/dist/think-test/cli.js +98 -0
- package/dist/think-test/client.d.ts +26 -0
- package/dist/think-test/client.js +37 -0
- package/dist/think-test/compressor.d.ts +5 -0
- package/dist/think-test/compressor.js +25 -0
- package/dist/think-test/judge.d.ts +4 -0
- package/dist/think-test/judge.js +11 -0
- package/dist/think-test/score.d.ts +8 -0
- package/dist/think-test/score.js +22 -0
- package/dist/think-test/serialize.d.ts +19 -0
- package/dist/think-test/serialize.js +41 -0
- package/dist/think-test/transcript.d.ts +7 -0
- package/dist/think-test/transcript.js +41 -0
- package/dist/think-test/transform.d.ts +6 -0
- package/dist/think-test/transform.js +24 -0
- package/dist/think-test/types.d.ts +45 -0
- package/dist/think-test/types.js +1 -0
- package/dist/workers/html-clean.js +12 -7
- package/package.json +11 -12
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/** Stable content hash for a thinking block. Determinism of the compressor at
|
|
2
|
+
* temperature 0 (validated against the local model) makes this a safe cache
|
|
3
|
+
* key: identical reasoning compresses to identical output, so each unique
|
|
4
|
+
* block is sent to the model exactly once, ever. */
|
|
5
|
+
export declare function hashText(text: string): string;
|
|
6
|
+
/** Disk-backed `hash -> compressed text` store. The on-disk file lets the
|
|
7
|
+
* "compress once" guarantee survive process restarts, not just jiti reloads. */
|
|
8
|
+
export declare class CompressionCache {
|
|
9
|
+
private readonly file;
|
|
10
|
+
private mem;
|
|
11
|
+
private loaded;
|
|
12
|
+
constructor(file: string);
|
|
13
|
+
private load;
|
|
14
|
+
get(hash: string): string | undefined;
|
|
15
|
+
has(hash: string): boolean;
|
|
16
|
+
set(hash: string, compressed: string): void;
|
|
17
|
+
get size(): number;
|
|
18
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import * as fs from 'node:fs';
|
|
3
|
+
import * as path from 'node:path';
|
|
4
|
+
/** Stable content hash for a thinking block. Determinism of the compressor at
|
|
5
|
+
* temperature 0 (validated against the local model) makes this a safe cache
|
|
6
|
+
* key: identical reasoning compresses to identical output, so each unique
|
|
7
|
+
* block is sent to the model exactly once, ever. */
|
|
8
|
+
export function hashText(text) {
|
|
9
|
+
return createHash('sha256').update(text).digest('hex');
|
|
10
|
+
}
|
|
11
|
+
/** Disk-backed `hash -> compressed text` store. The on-disk file lets the
|
|
12
|
+
* "compress once" guarantee survive process restarts, not just jiti reloads. */
|
|
13
|
+
export class CompressionCache {
|
|
14
|
+
file;
|
|
15
|
+
mem = new Map();
|
|
16
|
+
loaded = false;
|
|
17
|
+
constructor(file) {
|
|
18
|
+
this.file = file;
|
|
19
|
+
}
|
|
20
|
+
load() {
|
|
21
|
+
if (this.loaded)
|
|
22
|
+
return;
|
|
23
|
+
this.loaded = true;
|
|
24
|
+
try {
|
|
25
|
+
const obj = JSON.parse(fs.readFileSync(this.file, 'utf8'));
|
|
26
|
+
for (const [k, v] of Object.entries(obj))
|
|
27
|
+
this.mem.set(k, v);
|
|
28
|
+
}
|
|
29
|
+
catch {
|
|
30
|
+
// No cache file yet (or unreadable) — start empty.
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
get(hash) {
|
|
34
|
+
this.load();
|
|
35
|
+
return this.mem.get(hash);
|
|
36
|
+
}
|
|
37
|
+
has(hash) {
|
|
38
|
+
this.load();
|
|
39
|
+
return this.mem.has(hash);
|
|
40
|
+
}
|
|
41
|
+
set(hash, compressed) {
|
|
42
|
+
this.load();
|
|
43
|
+
this.mem.set(hash, compressed);
|
|
44
|
+
try {
|
|
45
|
+
fs.mkdirSync(path.dirname(this.file), { recursive: true });
|
|
46
|
+
fs.writeFileSync(this.file, JSON.stringify(Object.fromEntries(this.mem)));
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
// Best-effort persistence; the in-memory copy still serves this run.
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
get size() {
|
|
53
|
+
this.load();
|
|
54
|
+
return this.mem.size;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import * as os from 'node:os';
|
|
2
|
+
import * as path from 'node:path';
|
|
3
|
+
import { CompressionCache } from './cache.js';
|
|
4
|
+
import { applyRewrites, selectCandidates } from './rewrite.js';
|
|
5
|
+
/** Keep the most-recent messages verbatim — recent reasoning is most likely to
|
|
6
|
+
* be relied on next turn, and compressing it would chase a moving target. */
|
|
7
|
+
const KEEP_LAST = 8;
|
|
8
|
+
/** Only compress sizeable blocks. Validation against the real session corpus
|
|
9
|
+
* (median thinking block 127 chars) showed small blocks barely shrink yet still
|
|
10
|
+
* cost ~5-15s on the local model — net-negative. Big blocks compress ~5x. */
|
|
11
|
+
const MIN_CHARS = 1500;
|
|
12
|
+
/** Hard cap so a stuck request can never wedge the background queue. */
|
|
13
|
+
const REQUEST_TIMEOUT_MS = 120_000;
|
|
14
|
+
/** Poll interval while the agent is busy — see the GPU note in `drain`. */
|
|
15
|
+
const IDLE_BACKOFF_MS = 750;
|
|
16
|
+
const PROMPT = 'Compress this reasoning. Keep every decision/conclusion/constraint/fact relied on later. '
|
|
17
|
+
+ 'Drop restated questions, false starts, self-talk. Output only the compressed reasoning. /no_think';
|
|
18
|
+
const OPTS = { keepLast: KEEP_LAST, minChars: MIN_CHARS };
|
|
19
|
+
const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
|
20
|
+
async function compressOne(text, model, auth) {
|
|
21
|
+
const headers = { 'Content-Type': 'application/json', ...auth.headers };
|
|
22
|
+
if (auth.apiKey)
|
|
23
|
+
headers.Authorization = `Bearer ${auth.apiKey}`;
|
|
24
|
+
const res = await fetch(`${model.baseUrl}/chat/completions`, {
|
|
25
|
+
method: 'POST',
|
|
26
|
+
headers,
|
|
27
|
+
body: JSON.stringify({
|
|
28
|
+
model: model.id,
|
|
29
|
+
messages: [{ role: 'user', content: `${PROMPT}\n\n---\n\n${text}` }],
|
|
30
|
+
temperature: 0,
|
|
31
|
+
stream: false
|
|
32
|
+
}),
|
|
33
|
+
signal: AbortSignal.timeout(REQUEST_TIMEOUT_MS)
|
|
34
|
+
});
|
|
35
|
+
if (!res.ok)
|
|
36
|
+
throw new Error(`compress HTTP ${res.status}`);
|
|
37
|
+
const data = (await res.json());
|
|
38
|
+
const raw = data.choices?.[0]?.message?.content ?? '';
|
|
39
|
+
return raw.replaceAll('<think>', '').replaceAll('</think>', '').trim();
|
|
40
|
+
}
|
|
41
|
+
/** Owns the compression cache and a serial background queue. Persisted on
|
|
42
|
+
* globalThis so it survives the jiti module re-evaluation that happens on every
|
|
43
|
+
* `/new` (mirrors the pattern in remote/register.ts). */
|
|
44
|
+
class ThinkingCompressor {
|
|
45
|
+
cache;
|
|
46
|
+
pending = [];
|
|
47
|
+
inflight = new Set();
|
|
48
|
+
draining = false;
|
|
49
|
+
model = null;
|
|
50
|
+
isIdle = () => true;
|
|
51
|
+
resolveAuth = () => Promise.resolve({});
|
|
52
|
+
auth = null;
|
|
53
|
+
authModelId = null;
|
|
54
|
+
constructor(cacheFile) {
|
|
55
|
+
this.cache = new CompressionCache(cacheFile);
|
|
56
|
+
}
|
|
57
|
+
/** Refresh per-call context (model, idleness, auth resolver) from the latest
|
|
58
|
+
* `context` event. Cheap and synchronous — no blocking work on this path. */
|
|
59
|
+
bind(model, isIdle, resolveAuth) {
|
|
60
|
+
this.model = model;
|
|
61
|
+
this.isIdle = isIdle;
|
|
62
|
+
this.resolveAuth = resolveAuth;
|
|
63
|
+
if (this.authModelId !== model.id) {
|
|
64
|
+
// Model changed — invalidate cached auth so it is re-resolved lazily.
|
|
65
|
+
this.auth = null;
|
|
66
|
+
this.authModelId = model.id;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
enqueue(hash, text) {
|
|
70
|
+
if (this.cache.has(hash) || this.inflight.has(hash))
|
|
71
|
+
return;
|
|
72
|
+
if (this.pending.some(p => p.hash === hash))
|
|
73
|
+
return;
|
|
74
|
+
this.pending.push({ hash, text });
|
|
75
|
+
void this.drain();
|
|
76
|
+
}
|
|
77
|
+
async getAuth() {
|
|
78
|
+
if (this.auth)
|
|
79
|
+
return this.auth;
|
|
80
|
+
try {
|
|
81
|
+
this.auth = await this.resolveAuth();
|
|
82
|
+
}
|
|
83
|
+
catch {
|
|
84
|
+
this.auth = {};
|
|
85
|
+
}
|
|
86
|
+
return this.auth;
|
|
87
|
+
}
|
|
88
|
+
async drain() {
|
|
89
|
+
if (this.draining)
|
|
90
|
+
return;
|
|
91
|
+
this.draining = true;
|
|
92
|
+
try {
|
|
93
|
+
while (this.pending.length > 0) {
|
|
94
|
+
const model = this.model;
|
|
95
|
+
if (!model)
|
|
96
|
+
break;
|
|
97
|
+
// The local model is a single-GPU llama.cpp server: a compression
|
|
98
|
+
// request fired mid-turn would queue behind (and stall) the user's
|
|
99
|
+
// turn. So compression only runs while the agent is idle.
|
|
100
|
+
if (!this.isIdle()) {
|
|
101
|
+
await delay(IDLE_BACKOFF_MS);
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
const job = this.pending.shift();
|
|
105
|
+
if (this.cache.has(job.hash))
|
|
106
|
+
continue;
|
|
107
|
+
this.inflight.add(job.hash);
|
|
108
|
+
try {
|
|
109
|
+
const compressed = await compressOne(job.text, model, await this.getAuth());
|
|
110
|
+
// Only cache a genuine shrink; otherwise leave the block verbatim
|
|
111
|
+
// (a later turn will re-enqueue and retry).
|
|
112
|
+
if (compressed.length > 0 && compressed.length < job.text.length) {
|
|
113
|
+
this.cache.set(job.hash, compressed);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
// Transient (model busy/down) — drop the job; re-enqueued next turn.
|
|
118
|
+
}
|
|
119
|
+
finally {
|
|
120
|
+
this.inflight.delete(job.hash);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
finally {
|
|
125
|
+
this.draining = false;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
export function registerContextCompression(pi) {
|
|
130
|
+
const cacheFile = path.join(os.homedir(), '.pi', 'agent', 'cache', 'pi-task', 'thinking-compression.json');
|
|
131
|
+
const g = globalThis;
|
|
132
|
+
const compressor = g.__piThinkingCompressor ?? new ThinkingCompressor(cacheFile);
|
|
133
|
+
g.__piThinkingCompressor = compressor;
|
|
134
|
+
pi.on('context', (event, ctx) => {
|
|
135
|
+
const model = ctx.model;
|
|
136
|
+
if (!model)
|
|
137
|
+
return;
|
|
138
|
+
compressor.bind({ id: model.id, baseUrl: model.baseUrl }, () => ctx.isIdle(), async () => {
|
|
139
|
+
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument -- ctx.model is Model<any>; the registry wants Model<Api>
|
|
140
|
+
const r = await ctx.modelRegistry.getApiKeyAndHeaders(model);
|
|
141
|
+
return r.ok ? { apiKey: r.apiKey, headers: r.headers } : {};
|
|
142
|
+
});
|
|
143
|
+
// Background: ensure every eligible block is queued for one-time compression.
|
|
144
|
+
for (const c of selectCandidates(event.messages, OPTS)) {
|
|
145
|
+
compressor.enqueue(c.hash, c.text);
|
|
146
|
+
}
|
|
147
|
+
// Critical path: apply only what is already cached. Pure + synchronous.
|
|
148
|
+
const { messages, rewritten } = applyRewrites(event.messages, OPTS, h => compressor.cache.get(h));
|
|
149
|
+
if (rewritten === 0)
|
|
150
|
+
return;
|
|
151
|
+
return { messages };
|
|
152
|
+
});
|
|
153
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/** Minimal structural view of a thinking content block. We avoid importing the
|
|
2
|
+
* exact pi-ai `ThinkingContent` type so these helpers stay pure and trivially
|
|
3
|
+
* unit-testable with plain objects. */
|
|
4
|
+
export interface ThinkingBlock {
|
|
5
|
+
type: 'thinking';
|
|
6
|
+
thinking: string;
|
|
7
|
+
thinkingSignature?: string;
|
|
8
|
+
redacted?: boolean;
|
|
9
|
+
}
|
|
10
|
+
/** Minimal structural view of an AgentMessage. `AgentMessage[]` is assignable
|
|
11
|
+
* to `Msg[]`, so the `context` handler passes pi's real messages straight in. */
|
|
12
|
+
export interface Msg {
|
|
13
|
+
role?: string;
|
|
14
|
+
content?: unknown;
|
|
15
|
+
}
|
|
16
|
+
export interface Candidate {
|
|
17
|
+
hash: string;
|
|
18
|
+
text: string;
|
|
19
|
+
}
|
|
20
|
+
export interface SelectOptions {
|
|
21
|
+
/** Number of most-recent messages to leave completely untouched. */
|
|
22
|
+
keepLast: number;
|
|
23
|
+
/** Minimum trimmed thinking length worth compressing. */
|
|
24
|
+
minChars: number;
|
|
25
|
+
}
|
|
26
|
+
export declare function isThinkingBlock(b: unknown): b is ThinkingBlock;
|
|
27
|
+
export declare function isRewritable(b: ThinkingBlock, minChars: number): boolean;
|
|
28
|
+
/** Eligible thinking blocks older than the keep-last window. May contain
|
|
29
|
+
* duplicates (the same reasoning across turns) — callers dedupe by hash. */
|
|
30
|
+
export declare function selectCandidates(messages: readonly Msg[], opts: SelectOptions): Candidate[];
|
|
31
|
+
/** Return a copy of `messages` with cached compressions swapped into eligible
|
|
32
|
+
* thinking blocks. Unchanged messages keep their identity. `thinkingSignature`
|
|
33
|
+
* and block `type` are preserved so the local provider still replays the (now
|
|
34
|
+
* shorter) reasoning. A compression is only applied when it actually shrinks
|
|
35
|
+
* the block, so this can never expand context. */
|
|
36
|
+
export declare function applyRewrites<T extends Msg>(messages: readonly T[], opts: SelectOptions, lookup: (hash: string) => string | undefined): {
|
|
37
|
+
messages: T[];
|
|
38
|
+
rewritten: number;
|
|
39
|
+
};
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { hashText } from './cache.js';
|
|
2
|
+
/** In `openai-completions` (llama.cpp/local), the "signature" is a field *name*
|
|
3
|
+
* (`reasoning_content`) the prior reasoning is replayed under — not a crypto
|
|
4
|
+
* signature — so rewriting the text is safe. A long, non-sentinel signature
|
|
5
|
+
* means Anthropic-style extended thinking, where the signature cryptographically
|
|
6
|
+
* signs the original text; rewriting it would be rejected, so we skip those. */
|
|
7
|
+
const SENTINEL_SIGNATURES = new Set(['', 'reasoning_content', 'reasoning', 'reasoning_text']);
|
|
8
|
+
export function isThinkingBlock(b) {
|
|
9
|
+
return (typeof b === 'object'
|
|
10
|
+
&& b !== null
|
|
11
|
+
&& b.type === 'thinking'
|
|
12
|
+
&& typeof b.thinking === 'string');
|
|
13
|
+
}
|
|
14
|
+
export function isRewritable(b, minChars) {
|
|
15
|
+
if (b.redacted)
|
|
16
|
+
return false;
|
|
17
|
+
if (!SENTINEL_SIGNATURES.has(b.thinkingSignature ?? ''))
|
|
18
|
+
return false;
|
|
19
|
+
return b.thinking.trim().length >= minChars;
|
|
20
|
+
}
|
|
21
|
+
/** Eligible thinking blocks older than the keep-last window. May contain
|
|
22
|
+
* duplicates (the same reasoning across turns) — callers dedupe by hash. */
|
|
23
|
+
export function selectCandidates(messages, opts) {
|
|
24
|
+
const cutoff = messages.length - opts.keepLast;
|
|
25
|
+
const out = [];
|
|
26
|
+
for (let i = 0; i < cutoff; i++) {
|
|
27
|
+
const m = messages[i];
|
|
28
|
+
if (m.role !== 'assistant' || !Array.isArray(m.content))
|
|
29
|
+
continue;
|
|
30
|
+
for (const b of m.content) {
|
|
31
|
+
if (isThinkingBlock(b) && isRewritable(b, opts.minChars)) {
|
|
32
|
+
out.push({ hash: hashText(b.thinking), text: b.thinking });
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return out;
|
|
37
|
+
}
|
|
38
|
+
/** Return a copy of `messages` with cached compressions swapped into eligible
|
|
39
|
+
* thinking blocks. Unchanged messages keep their identity. `thinkingSignature`
|
|
40
|
+
* and block `type` are preserved so the local provider still replays the (now
|
|
41
|
+
* shorter) reasoning. A compression is only applied when it actually shrinks
|
|
42
|
+
* the block, so this can never expand context. */
|
|
43
|
+
export function applyRewrites(messages, opts, lookup) {
|
|
44
|
+
const cutoff = messages.length - opts.keepLast;
|
|
45
|
+
let rewritten = 0;
|
|
46
|
+
const out = messages.map((m, i) => {
|
|
47
|
+
if (i >= cutoff || m.role !== 'assistant' || !Array.isArray(m.content))
|
|
48
|
+
return m;
|
|
49
|
+
let changed = false;
|
|
50
|
+
const content = m.content.map(b => {
|
|
51
|
+
if (!isThinkingBlock(b) || !isRewritable(b, opts.minChars))
|
|
52
|
+
return b;
|
|
53
|
+
const compressed = lookup(hashText(b.thinking));
|
|
54
|
+
if (compressed === undefined || compressed.length >= b.thinking.length)
|
|
55
|
+
return b;
|
|
56
|
+
changed = true;
|
|
57
|
+
rewritten++;
|
|
58
|
+
return { ...b, thinking: compressed };
|
|
59
|
+
});
|
|
60
|
+
return changed ? { ...m, content } : m;
|
|
61
|
+
});
|
|
62
|
+
return { messages: out, rewritten };
|
|
63
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -2,9 +2,11 @@ import { registerTask } from './task/orchestrator.js';
|
|
|
2
2
|
import { registerTaskAuto } from './task/auto-orchestrator.js';
|
|
3
3
|
import { registerWorkers } from './workers/index.js';
|
|
4
4
|
import { registerRemote } from './remote/register.js';
|
|
5
|
+
import { registerContextCompression } from './context/compress.js';
|
|
5
6
|
export default function (pi) {
|
|
6
7
|
registerTask(pi);
|
|
7
8
|
registerTaskAuto(pi);
|
|
8
9
|
registerWorkers(pi);
|
|
9
10
|
registerRemote(pi);
|
|
11
|
+
registerContextCompression(pi);
|
|
10
12
|
}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import type { WebSocket } from 'ws';
|
|
2
2
|
export declare function addClient(ws: WebSocket): void;
|
|
3
3
|
export declare function removeClient(ws: WebSocket): void;
|
|
4
|
-
export declare function clientCount(): number;
|
|
5
4
|
export declare function broadcast(msg: unknown): void;
|
|
6
5
|
export declare function sendTo(ws: WebSocket, msg: unknown): void;
|
package/dist/remote/broadcast.js
CHANGED
|
@@ -9,9 +9,6 @@ export function addClient(ws) {
|
|
|
9
9
|
export function removeClient(ws) {
|
|
10
10
|
clients.delete(ws);
|
|
11
11
|
}
|
|
12
|
-
export function clientCount() {
|
|
13
|
-
return clients.size;
|
|
14
|
-
}
|
|
15
12
|
export function broadcast(msg) {
|
|
16
13
|
const json = JSON.stringify(msg);
|
|
17
14
|
for (const ws of clients) {
|
|
@@ -44,7 +44,7 @@ export interface ResetMessage {
|
|
|
44
44
|
* session-state.ts (its serializer); re-exported here as part of the wire type. */
|
|
45
45
|
export type { SnapshotMessage } from './session-state.js';
|
|
46
46
|
/** Server → browser messages. The live text_delta / tool_* / agent_* /
|
|
47
|
-
*
|
|
47
|
+
* user_message deltas are emitted by the SessionState mutators
|
|
48
48
|
* and not all enumerated here; the snapshot below carries the full state. */
|
|
49
49
|
export type ServerMessage = PromptMessage | PromptResolvedMessage | WidgetMessage | NotifyMessage | ViewerMessage | ContextMessage | ResetMessage | import('./session-state.js').SnapshotMessage;
|
|
50
50
|
/** Browser → server messages. */
|
package/dist/remote/server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createServer } from 'node:http';
|
|
2
2
|
import { networkInterfaces } from 'node:os';
|
|
3
3
|
import { WebSocketServer } from 'ws';
|
|
4
|
-
import { addClient, removeClient,
|
|
4
|
+
import { addClient, removeClient, sendTo } from './broadcast.js';
|
|
5
5
|
import { answerPrompt } from './bridge.js';
|
|
6
6
|
import { getState, snapshot } from './session-state.js';
|
|
7
7
|
import { isClientMessage } from './protocol.js';
|
|
@@ -121,7 +121,6 @@ export async function startServer(onMessage, getHtml) {
|
|
|
121
121
|
handle.onFirstConnect = null;
|
|
122
122
|
// One authoritative snapshot — the client replaces its whole view with it.
|
|
123
123
|
sendTo(ws, snapshot());
|
|
124
|
-
broadcast({ type: 'client_count', count: clientCount() });
|
|
125
124
|
ws.on('message', data => {
|
|
126
125
|
let msg;
|
|
127
126
|
try {
|
|
@@ -144,7 +143,6 @@ export async function startServer(onMessage, getHtml) {
|
|
|
144
143
|
});
|
|
145
144
|
ws.on('close', () => {
|
|
146
145
|
removeClient(ws);
|
|
147
|
-
broadcast({ type: 'client_count', count: clientCount() });
|
|
148
146
|
});
|
|
149
147
|
});
|
|
150
148
|
await new Promise(resolve => httpServer.listen(port, '0.0.0.0', resolve));
|
package/dist/remote/ui.js
CHANGED
|
@@ -57,10 +57,6 @@ export function html(wsUrl) {
|
|
|
57
57
|
96% { text-shadow: 1px 0 var(--teal), -1px 0 var(--red); transform: translate(-1px, 0); }
|
|
58
58
|
}
|
|
59
59
|
@media (prefers-reduced-motion: reduce) { #header .title { animation: none; } }
|
|
60
|
-
#header .status { color: var(--subtext0); font-size: 11px; display: inline-flex; align-items: center; gap: 5px; }
|
|
61
|
-
#header .cdot { color: var(--yellow); }
|
|
62
|
-
#header .cdot.up { color: var(--green); }
|
|
63
|
-
#header .cdot.down { color: var(--red); }
|
|
64
60
|
#header .hgroup { display: flex; align-items: center; gap: 10px; }
|
|
65
61
|
#bell {
|
|
66
62
|
background: none; border: none; color: var(--subtext1); cursor: pointer;
|
|
@@ -233,7 +229,6 @@ export function html(wsUrl) {
|
|
|
233
229
|
<div id="header">
|
|
234
230
|
<span class="title">pi-task remote</span>
|
|
235
231
|
<div class="hgroup">
|
|
236
|
-
<span class="status" id="client-status"><span class="cdot" id="conn-dot">○</span></span>
|
|
237
232
|
<button id="bell" aria-label="Toggle notifications" title="Notifications">◯</button>
|
|
238
233
|
</div>
|
|
239
234
|
</div>
|
|
@@ -272,12 +267,6 @@ export function html(wsUrl) {
|
|
|
272
267
|
function setContextBar(usage) {
|
|
273
268
|
if (usage && usage.percent != null) contextFill.style.width = usage.percent + '%';
|
|
274
269
|
}
|
|
275
|
-
const connDot = document.getElementById('conn-dot');
|
|
276
|
-
// state: 'connecting' (○ yellow) | 'up' (● green) | 'down' (● red)
|
|
277
|
-
function setConn(state) {
|
|
278
|
-
connDot.textContent = state === 'connecting' ? '\\u25CB' : '\\u25CF';
|
|
279
|
-
connDot.className = 'cdot' + (state === 'up' ? ' up' : state === 'down' ? ' down' : '');
|
|
280
|
-
}
|
|
281
270
|
const reconnectOverlay = document.getElementById('reconnect-overlay');
|
|
282
271
|
const reconnectMsg = document.getElementById('reconnect-msg');
|
|
283
272
|
const cmdSuggestions = document.getElementById('cmd-suggestions');
|
|
@@ -941,9 +930,6 @@ export function html(wsUrl) {
|
|
|
941
930
|
// Seeds the bar for a client that joined mid-session.
|
|
942
931
|
setContextBar(msg.contextUsage);
|
|
943
932
|
break;
|
|
944
|
-
case 'client_count':
|
|
945
|
-
setConn('up');
|
|
946
|
-
break;
|
|
947
933
|
case 'prompt':
|
|
948
934
|
showPrompt(msg);
|
|
949
935
|
break;
|
|
@@ -983,9 +969,9 @@ export function html(wsUrl) {
|
|
|
983
969
|
cmdActive = []; cmdIndex = -1; renderSuggestions();
|
|
984
970
|
// Slash commands are handled server-side and produce no chat turn.
|
|
985
971
|
if (text.startsWith('/')) return;
|
|
986
|
-
//
|
|
987
|
-
//
|
|
988
|
-
|
|
972
|
+
// The server records the message via addUserTurn and broadcasts a
|
|
973
|
+
// user_message back to every client (us included), which renders the
|
|
974
|
+
// bubble. Don't render it here too, or the sender sees it twice.
|
|
989
975
|
setEnabled(false);
|
|
990
976
|
showThinking();
|
|
991
977
|
}
|
|
@@ -1013,7 +999,6 @@ export function html(wsUrl) {
|
|
|
1013
999
|
if (reconnectAnim) { clearInterval(reconnectAnim); reconnectAnim = null; }
|
|
1014
1000
|
reconnectOverlay.classList.remove('visible');
|
|
1015
1001
|
reconnectDelay = 1000;
|
|
1016
|
-
setConn('up');
|
|
1017
1002
|
setEnabled(true);
|
|
1018
1003
|
});
|
|
1019
1004
|
ws.addEventListener('message', (e) => {
|
|
@@ -1021,7 +1006,6 @@ export function html(wsUrl) {
|
|
|
1021
1006
|
});
|
|
1022
1007
|
ws.addEventListener('close', () => {
|
|
1023
1008
|
setEnabled(false);
|
|
1024
|
-
setConn('down');
|
|
1025
1009
|
reconnectOverlay.classList.add('visible');
|
|
1026
1010
|
// Animate the same braille spinner used elsewhere, with a live countdown.
|
|
1027
1011
|
const until = Date.now() + reconnectDelay;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
// src/think-test/cli.ts
|
|
2
|
+
import { readFileSync } from 'node:fs';
|
|
3
|
+
import { parseTranscript, decisionPoints } from './transcript.js';
|
|
4
|
+
import { applyMode } from './transform.js';
|
|
5
|
+
import { toOpenAiMessages } from './serialize.js';
|
|
6
|
+
import { createHttpClient } from './client.js';
|
|
7
|
+
import { buildCompressionMap } from './compressor.js';
|
|
8
|
+
import { scoreSamples, aggregate } from './score.js';
|
|
9
|
+
const ENDPOINT = process.env.PI_THINKTEST_ENDPOINT ?? 'http://localhost:8080/v1/chat/completions';
|
|
10
|
+
const MODES = ['full', 'none', 'compressed'];
|
|
11
|
+
function flag(name, def) {
|
|
12
|
+
const hit = process.argv.find(a => a.startsWith(`--${name}=`));
|
|
13
|
+
if (!hit)
|
|
14
|
+
return def;
|
|
15
|
+
const value = Number(hit.split('=')[1]);
|
|
16
|
+
if (!Number.isFinite(value))
|
|
17
|
+
throw new Error(`--${name} must be a number`);
|
|
18
|
+
return value;
|
|
19
|
+
}
|
|
20
|
+
function readFixtureTools() {
|
|
21
|
+
const url = new URL('./__fixtures__/tools.json', import.meta.url);
|
|
22
|
+
let raw;
|
|
23
|
+
try {
|
|
24
|
+
raw = readFileSync(url, 'utf8');
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
throw new Error('missing src/think-test/__fixtures__/tools.json — capture it first (see plan Task 6). '
|
|
28
|
+
+ 'Without the tools schema the model cannot emit tool_calls and the test is meaningless.');
|
|
29
|
+
}
|
|
30
|
+
const tools = JSON.parse(raw);
|
|
31
|
+
if (!Array.isArray(tools) || tools.length === 0) {
|
|
32
|
+
throw new Error('tools.json must be a non-empty JSON array of tool schemas');
|
|
33
|
+
}
|
|
34
|
+
return tools;
|
|
35
|
+
}
|
|
36
|
+
async function main() {
|
|
37
|
+
const sessionPath = process.argv[2];
|
|
38
|
+
if (!sessionPath) {
|
|
39
|
+
console.error('usage: bun run think-test <session.jsonl> [--n=5] [--limit=0]');
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
42
|
+
const n = flag('n', 5);
|
|
43
|
+
const limit = flag('limit', 0); // 0 = all turns
|
|
44
|
+
const tools = readFixtureTools();
|
|
45
|
+
let transcript;
|
|
46
|
+
try {
|
|
47
|
+
transcript = readFileSync(sessionPath, 'utf8');
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
throw new Error(`cannot read session file: ${sessionPath}`);
|
|
51
|
+
}
|
|
52
|
+
const messages = parseTranscript(transcript);
|
|
53
|
+
let points = decisionPoints(messages).filter(p => p.prior.length > 0);
|
|
54
|
+
if (points.length === 0) {
|
|
55
|
+
throw new Error('no decision points with prior context found in transcript');
|
|
56
|
+
}
|
|
57
|
+
if (limit > 0)
|
|
58
|
+
points = points.slice(0, limit);
|
|
59
|
+
const sampler = createHttpClient(fetch, ENDPOINT, tools);
|
|
60
|
+
const textClient = createHttpClient(fetch, ENDPOINT, []); // compression + judging: no tools, want text
|
|
61
|
+
// Only compress thinking the selected points reference (deduped inside
|
|
62
|
+
// buildCompressionMap). With --limit this avoids compressing the whole
|
|
63
|
+
// transcript; a full run covers every turn anyway.
|
|
64
|
+
const referenced = points.flatMap(p => p.prior);
|
|
65
|
+
console.error(`compressing prior thinking referenced by ${points.length} points…`);
|
|
66
|
+
const compressed = await buildCompressionMap(referenced, textClient);
|
|
67
|
+
// Dump the compressions for manual inspection (guards against a weak prompt).
|
|
68
|
+
console.error('--- compression samples (first 3) ---');
|
|
69
|
+
let shown = 0;
|
|
70
|
+
for (const [orig, comp] of compressed) {
|
|
71
|
+
if (shown++ >= 3)
|
|
72
|
+
break;
|
|
73
|
+
console.error(`[${orig.length}→${comp.length}] ${comp.slice(0, 200)}`);
|
|
74
|
+
}
|
|
75
|
+
const perTurn = { full: [], none: [], compressed: [] };
|
|
76
|
+
for (const [i, pt] of points.entries()) {
|
|
77
|
+
for (const mode of MODES) {
|
|
78
|
+
const body = toOpenAiMessages(applyMode(pt.prior, mode, compressed));
|
|
79
|
+
const samples = [];
|
|
80
|
+
for (let s = 0; s < n; s++) {
|
|
81
|
+
samples.push(await sampler.complete(body, { temperature: 1 }));
|
|
82
|
+
}
|
|
83
|
+
const rate = await scoreSamples(pt.recordedTool, pt.recordedArgs, samples, textClient);
|
|
84
|
+
perTurn[mode].push(rate);
|
|
85
|
+
}
|
|
86
|
+
console.error(`turn ${i + 1}/${points.length} (${pt.recordedTool}) `
|
|
87
|
+
+ MODES.map(m => `${m}=${perTurn[m][i].toFixed(2)}`).join(' '));
|
|
88
|
+
}
|
|
89
|
+
console.log('\n=== aggregate agreement vs recorded action ===');
|
|
90
|
+
for (const mode of MODES) {
|
|
91
|
+
console.log(`${mode.padEnd(11)} ${aggregate(perTurn[mode]).toFixed(3)}`);
|
|
92
|
+
}
|
|
93
|
+
console.log('\nread: full=ceiling, none=floor, compressed=verdict');
|
|
94
|
+
}
|
|
95
|
+
main().catch((err) => {
|
|
96
|
+
console.error(err instanceof Error ? err.message : String(err));
|
|
97
|
+
process.exit(1);
|
|
98
|
+
});
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { Action } from './types.js';
|
|
2
|
+
import type { OpenAiMessage } from './serialize.js';
|
|
3
|
+
export interface CompletionOpts {
|
|
4
|
+
temperature: number;
|
|
5
|
+
}
|
|
6
|
+
export interface ModelClient {
|
|
7
|
+
complete(messages: OpenAiMessage[], opts: CompletionOpts): Promise<Action>;
|
|
8
|
+
}
|
|
9
|
+
export interface RawChoice {
|
|
10
|
+
message: {
|
|
11
|
+
content?: string | null;
|
|
12
|
+
tool_calls?: {
|
|
13
|
+
function: {
|
|
14
|
+
name: string;
|
|
15
|
+
arguments: string;
|
|
16
|
+
};
|
|
17
|
+
}[];
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
/** Normalize one completion choice into an Action: a tool call (with parsed
|
|
21
|
+
* args, `{}` on malformed JSON) or, absent any tool call, the text. */
|
|
22
|
+
export declare function parseChoice(choice: RawChoice): Action;
|
|
23
|
+
/** HTTP client for the local llama-server. `tools` is the captured schema array
|
|
24
|
+
* — without it the model cannot emit tool_calls and the test is meaningless.
|
|
25
|
+
* `fetchFn`/`endpoint` are injectable for tests. */
|
|
26
|
+
export declare function createHttpClient(fetchFn: typeof fetch, endpoint: string, tools: unknown[]): ModelClient;
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/** Normalize one completion choice into an Action: a tool call (with parsed
|
|
2
|
+
* args, `{}` on malformed JSON) or, absent any tool call, the text. */
|
|
3
|
+
export function parseChoice(choice) {
|
|
4
|
+
const tc = choice.message.tool_calls?.[0];
|
|
5
|
+
if (tc) {
|
|
6
|
+
let args;
|
|
7
|
+
try {
|
|
8
|
+
args = JSON.parse(tc.function.arguments);
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
args = {};
|
|
12
|
+
}
|
|
13
|
+
return { tool: tc.function.name, args };
|
|
14
|
+
}
|
|
15
|
+
return { text: choice.message.content ?? '' };
|
|
16
|
+
}
|
|
17
|
+
/** HTTP client for the local llama-server. `tools` is the captured schema array
|
|
18
|
+
* — without it the model cannot emit tool_calls and the test is meaningless.
|
|
19
|
+
* `fetchFn`/`endpoint` are injectable for tests. */
|
|
20
|
+
export function createHttpClient(fetchFn, endpoint, tools) {
|
|
21
|
+
return {
|
|
22
|
+
async complete(messages, opts) {
|
|
23
|
+
const res = await fetchFn(endpoint, {
|
|
24
|
+
method: 'POST',
|
|
25
|
+
headers: { 'content-type': 'application/json' },
|
|
26
|
+
body: JSON.stringify({ messages, tools, temperature: opts.temperature })
|
|
27
|
+
});
|
|
28
|
+
if (!res.ok)
|
|
29
|
+
throw new Error(`model HTTP ${res.status}`);
|
|
30
|
+
const json = (await res.json());
|
|
31
|
+
const choice = json.choices?.[0];
|
|
32
|
+
if (!choice)
|
|
33
|
+
throw new Error('model returned no choices');
|
|
34
|
+
return parseChoice(choice);
|
|
35
|
+
}
|
|
36
|
+
};
|
|
37
|
+
}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { SessionMsg } from './types.js';
|
|
2
|
+
import type { ModelClient } from './client.js';
|
|
3
|
+
/** Compress every distinct thinking block once (compress-once semantics, even
|
|
4
|
+
* here in the test) and return a map from original text → compressed text. */
|
|
5
|
+
export declare function buildCompressionMap(messages: SessionMsg[], client: ModelClient): Promise<Map<string, string>>;
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
const COMPRESS_PROMPT = (thinking) => `Compress the following reasoning trace. Keep every decision, conclusion, `
|
|
2
|
+
+ `constraint, and fact the author will rely on later. Drop restated questions, `
|
|
3
|
+
+ `false starts, self-talk, and verbosity. Output only the compressed reasoning, `
|
|
4
|
+
+ `no preamble.\n\n---\n${thinking}\n---\n\n/no_think`;
|
|
5
|
+
/** Compress every distinct thinking block once (compress-once semantics, even
|
|
6
|
+
* here in the test) and return a map from original text → compressed text. */
|
|
7
|
+
export async function buildCompressionMap(messages, client) {
|
|
8
|
+
const unique = new Set();
|
|
9
|
+
for (const m of messages) {
|
|
10
|
+
if (m.role !== 'assistant')
|
|
11
|
+
continue;
|
|
12
|
+
for (const c of m.content) {
|
|
13
|
+
if (c.type === 'thinking')
|
|
14
|
+
unique.add(c.thinking);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
const map = new Map();
|
|
18
|
+
for (const original of unique) {
|
|
19
|
+
const action = await client.complete([{ role: 'user', content: COMPRESS_PROMPT(original) }], {
|
|
20
|
+
temperature: 0
|
|
21
|
+
});
|
|
22
|
+
map.set(original, (action.text ?? '').trim());
|
|
23
|
+
}
|
|
24
|
+
return map;
|
|
25
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { ModelClient } from './client.js';
|
|
2
|
+
/** Ask the model whether two arg objects are equivalent in intent. Deterministic
|
|
3
|
+
* (temperature 0). Only meaningful when the tool name already matched. */
|
|
4
|
+
export declare function judgeArgs(tool: string, a: Record<string, unknown>, b: Record<string, unknown>, client: ModelClient): Promise<boolean>;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
const JUDGE_PROMPT = (tool, a, b) => `Two calls to the tool "${tool}" were made. Are their arguments equivalent `
|
|
2
|
+
+ `in intent (same target/effect), ignoring cosmetic differences? Answer with `
|
|
3
|
+
+ `YES or NO only.\n\nA: ${JSON.stringify(a)}\nB: ${JSON.stringify(b)}\n\n/no_think`;
|
|
4
|
+
/** Ask the model whether two arg objects are equivalent in intent. Deterministic
|
|
5
|
+
* (temperature 0). Only meaningful when the tool name already matched. */
|
|
6
|
+
export async function judgeArgs(tool, a, b, client) {
|
|
7
|
+
const action = await client.complete([{ role: 'user', content: JUDGE_PROMPT(tool, a, b) }], {
|
|
8
|
+
temperature: 0
|
|
9
|
+
});
|
|
10
|
+
return /\byes\b/i.test(action.text ?? '');
|
|
11
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { Action } from './types.js';
|
|
2
|
+
import type { ModelClient } from './client.js';
|
|
3
|
+
/** Agreement rate for one decision point's samples against the recorded action:
|
|
4
|
+
* fraction of samples whose tool name matches AND whose args the judge deems
|
|
5
|
+
* equivalent. The judge is only consulted on a tool-name match. */
|
|
6
|
+
export declare function scoreSamples(recordedTool: string, recordedArgs: Record<string, unknown>, samples: Action[], judge: ModelClient): Promise<number>;
|
|
7
|
+
/** Mean of per-turn agreement rates; 0 for an empty list. */
|
|
8
|
+
export declare function aggregate(perTurnRates: number[]): number;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { judgeArgs } from './judge.js';
|
|
2
|
+
/** Agreement rate for one decision point's samples against the recorded action:
|
|
3
|
+
* fraction of samples whose tool name matches AND whose args the judge deems
|
|
4
|
+
* equivalent. The judge is only consulted on a tool-name match. */
|
|
5
|
+
export async function scoreSamples(recordedTool, recordedArgs, samples, judge) {
|
|
6
|
+
if (samples.length === 0)
|
|
7
|
+
return 0;
|
|
8
|
+
let hits = 0;
|
|
9
|
+
for (const s of samples) {
|
|
10
|
+
if (s.tool !== recordedTool)
|
|
11
|
+
continue;
|
|
12
|
+
if (await judgeArgs(recordedTool, recordedArgs, s.args ?? {}, judge))
|
|
13
|
+
hits++;
|
|
14
|
+
}
|
|
15
|
+
return hits / samples.length;
|
|
16
|
+
}
|
|
17
|
+
/** Mean of per-turn agreement rates; 0 for an empty list. */
|
|
18
|
+
export function aggregate(perTurnRates) {
|
|
19
|
+
if (perTurnRates.length === 0)
|
|
20
|
+
return 0;
|
|
21
|
+
return perTurnRates.reduce((a, b) => a + b, 0) / perTurnRates.length;
|
|
22
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { SessionMsg } from './types.js';
|
|
2
|
+
export interface OpenAiMessage {
|
|
3
|
+
role: 'user' | 'assistant' | 'tool';
|
|
4
|
+
content: string;
|
|
5
|
+
reasoning_content?: string;
|
|
6
|
+
tool_call_id?: string;
|
|
7
|
+
tool_calls?: {
|
|
8
|
+
id: string;
|
|
9
|
+
type: 'function';
|
|
10
|
+
function: {
|
|
11
|
+
name: string;
|
|
12
|
+
arguments: string;
|
|
13
|
+
};
|
|
14
|
+
}[];
|
|
15
|
+
}
|
|
16
|
+
/** Convert session messages to the OpenAI chat-completions wire format used by
|
|
17
|
+
* llama-server, replicating pi's `reasoning_content` carry-back so prior
|
|
18
|
+
* thinking is preserved exactly as in a real request. */
|
|
19
|
+
export declare function toOpenAiMessages(messages: SessionMsg[]): OpenAiMessage[];
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
function joinText(blocks) {
|
|
2
|
+
return blocks
|
|
3
|
+
.filter((b) => b.type === 'text')
|
|
4
|
+
.map(b => b.text)
|
|
5
|
+
.join('');
|
|
6
|
+
}
|
|
7
|
+
/** Convert session messages to the OpenAI chat-completions wire format used by
|
|
8
|
+
* llama-server, replicating pi's `reasoning_content` carry-back so prior
|
|
9
|
+
* thinking is preserved exactly as in a real request. */
|
|
10
|
+
export function toOpenAiMessages(messages) {
|
|
11
|
+
const out = [];
|
|
12
|
+
for (const m of messages) {
|
|
13
|
+
if (m.role === 'user') {
|
|
14
|
+
const content = typeof m.content === 'string' ? m.content : joinText(m.content);
|
|
15
|
+
out.push({ role: 'user', content });
|
|
16
|
+
}
|
|
17
|
+
else if (m.role === 'assistant') {
|
|
18
|
+
const content = joinText(m.content);
|
|
19
|
+
const thinking = m.content
|
|
20
|
+
.filter((c) => c.type === 'thinking')
|
|
21
|
+
.map(c => c.thinking)
|
|
22
|
+
.join('\n');
|
|
23
|
+
const toolCalls = m.content.filter((c) => c.type === 'toolCall');
|
|
24
|
+
const msg = { role: 'assistant', content };
|
|
25
|
+
if (thinking.length > 0)
|
|
26
|
+
msg.reasoning_content = thinking;
|
|
27
|
+
if (toolCalls.length > 0) {
|
|
28
|
+
msg.tool_calls = toolCalls.map(tc => ({
|
|
29
|
+
id: tc.id,
|
|
30
|
+
type: 'function',
|
|
31
|
+
function: { name: tc.name, arguments: JSON.stringify(tc.arguments) }
|
|
32
|
+
}));
|
|
33
|
+
}
|
|
34
|
+
out.push(msg);
|
|
35
|
+
}
|
|
36
|
+
else {
|
|
37
|
+
out.push({ role: 'tool', tool_call_id: m.toolCallId, content: joinText(m.content) });
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return out;
|
|
41
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { SessionMsg, DecisionPoint } from './types.js';
|
|
2
|
+
/** Parse a pi session `.jsonl` into the ordered message list, dropping
|
|
3
|
+
* non-message rows (session header, model_change, blanks, malformed lines). */
|
|
4
|
+
export declare function parseTranscript(jsonl: string): SessionMsg[];
|
|
5
|
+
/** Every assistant turn whose content includes a toolCall becomes a scoring
|
|
6
|
+
* unit: the prior context plus the recorded tool name + args. */
|
|
7
|
+
export declare function decisionPoints(messages: SessionMsg[]): DecisionPoint[];
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/** Parse a pi session `.jsonl` into the ordered message list, dropping
|
|
2
|
+
* non-message rows (session header, model_change, blanks, malformed lines). */
|
|
3
|
+
export function parseTranscript(jsonl) {
|
|
4
|
+
const out = [];
|
|
5
|
+
for (const line of jsonl.split('\n')) {
|
|
6
|
+
const trimmed = line.trim();
|
|
7
|
+
if (!trimmed)
|
|
8
|
+
continue;
|
|
9
|
+
let obj;
|
|
10
|
+
try {
|
|
11
|
+
obj = JSON.parse(trimmed);
|
|
12
|
+
}
|
|
13
|
+
catch {
|
|
14
|
+
continue;
|
|
15
|
+
}
|
|
16
|
+
const rec = obj;
|
|
17
|
+
if (rec.type !== 'message' || !rec.message)
|
|
18
|
+
continue;
|
|
19
|
+
out.push(rec.message);
|
|
20
|
+
}
|
|
21
|
+
return out;
|
|
22
|
+
}
|
|
23
|
+
/** Every assistant turn whose content includes a toolCall becomes a scoring
|
|
24
|
+
* unit: the prior context plus the recorded tool name + args. */
|
|
25
|
+
export function decisionPoints(messages) {
|
|
26
|
+
const points = [];
|
|
27
|
+
messages.forEach((m, index) => {
|
|
28
|
+
if (m.role !== 'assistant')
|
|
29
|
+
return;
|
|
30
|
+
const tc = m.content.find((c) => c.type === 'toolCall');
|
|
31
|
+
if (!tc)
|
|
32
|
+
return;
|
|
33
|
+
points.push({
|
|
34
|
+
index,
|
|
35
|
+
prior: messages.slice(0, index),
|
|
36
|
+
recordedTool: tc.name,
|
|
37
|
+
recordedArgs: tc.arguments
|
|
38
|
+
});
|
|
39
|
+
});
|
|
40
|
+
return points;
|
|
41
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { SessionMsg, ThinkMode } from './types.js';
|
|
2
|
+
/** Produce the message list to send for a given arm. `full` is identity;
|
|
3
|
+
* `none` removes thinking; `compressed` swaps each thinking block's text for
|
|
4
|
+
* its precomputed summary (keyed by the original text), leaving it verbatim on
|
|
5
|
+
* a cache miss. */
|
|
6
|
+
export declare function applyMode(messages: SessionMsg[], mode: ThinkMode, compressed: Map<string, string>): SessionMsg[];
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
function stripThinking(messages) {
|
|
2
|
+
return messages.map(m => m.role === 'assistant' ? { ...m, content: m.content.filter(c => c.type !== 'thinking') } : m);
|
|
3
|
+
}
|
|
4
|
+
function compressThinking(messages, compressed) {
|
|
5
|
+
return messages.map(m => m.role === 'assistant' ?
|
|
6
|
+
{
|
|
7
|
+
...m,
|
|
8
|
+
content: m.content.map(c => c.type === 'thinking' ?
|
|
9
|
+
{ ...c, thinking: compressed.get(c.thinking) ?? c.thinking }
|
|
10
|
+
: c)
|
|
11
|
+
}
|
|
12
|
+
: m);
|
|
13
|
+
}
|
|
14
|
+
/** Produce the message list to send for a given arm. `full` is identity;
|
|
15
|
+
* `none` removes thinking; `compressed` swaps each thinking block's text for
|
|
16
|
+
* its precomputed summary (keyed by the original text), leaving it verbatim on
|
|
17
|
+
* a cache miss. */
|
|
18
|
+
export function applyMode(messages, mode, compressed) {
|
|
19
|
+
if (mode === 'full')
|
|
20
|
+
return messages;
|
|
21
|
+
if (mode === 'none')
|
|
22
|
+
return stripThinking(messages);
|
|
23
|
+
return compressThinking(messages, compressed);
|
|
24
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
export type ThinkMode = 'full' | 'none' | 'compressed';
|
|
2
|
+
export interface ThinkingBlock {
|
|
3
|
+
type: 'thinking';
|
|
4
|
+
thinking: string;
|
|
5
|
+
thinkingSignature?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface TextBlock {
|
|
8
|
+
type: 'text';
|
|
9
|
+
text: string;
|
|
10
|
+
}
|
|
11
|
+
export interface ToolCallBlock {
|
|
12
|
+
type: 'toolCall';
|
|
13
|
+
id: string;
|
|
14
|
+
name: string;
|
|
15
|
+
arguments: Record<string, unknown>;
|
|
16
|
+
}
|
|
17
|
+
export type AssistantBlock = ThinkingBlock | TextBlock | ToolCallBlock;
|
|
18
|
+
export interface UserMsg {
|
|
19
|
+
role: 'user';
|
|
20
|
+
content: string | TextBlock[];
|
|
21
|
+
}
|
|
22
|
+
export interface AssistantMsg {
|
|
23
|
+
role: 'assistant';
|
|
24
|
+
content: AssistantBlock[];
|
|
25
|
+
}
|
|
26
|
+
export interface ToolResultMsg {
|
|
27
|
+
role: 'toolResult';
|
|
28
|
+
toolCallId: string;
|
|
29
|
+
toolName: string;
|
|
30
|
+
content: TextBlock[];
|
|
31
|
+
}
|
|
32
|
+
export type SessionMsg = UserMsg | AssistantMsg | ToolResultMsg;
|
|
33
|
+
/** An assistant turn whose recorded action was a tool call — the unit we score. */
|
|
34
|
+
export interface DecisionPoint {
|
|
35
|
+
index: number;
|
|
36
|
+
prior: SessionMsg[];
|
|
37
|
+
recordedTool: string;
|
|
38
|
+
recordedArgs: Record<string, unknown>;
|
|
39
|
+
}
|
|
40
|
+
/** The normalized next action returned by the model for one sample. */
|
|
41
|
+
export interface Action {
|
|
42
|
+
tool?: string;
|
|
43
|
+
args?: Record<string, unknown>;
|
|
44
|
+
text?: string;
|
|
45
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { readFileSync } from 'node:fs';
|
|
2
2
|
import { fileURLToPath } from 'node:url';
|
|
3
3
|
import { dirname, join } from 'node:path';
|
|
4
|
-
import {
|
|
4
|
+
import { parseHTML } from 'linkedom';
|
|
5
5
|
import { Readability } from '@mozilla/readability';
|
|
6
6
|
import TurndownService from 'turndown';
|
|
7
7
|
const turndown = new TurndownService({
|
|
@@ -10,22 +10,22 @@ const turndown = new TurndownService({
|
|
|
10
10
|
bulletListMarker: '-'
|
|
11
11
|
});
|
|
12
12
|
export function cleanHtml(html, baseUrl) {
|
|
13
|
-
const
|
|
14
|
-
const reader = new Readability(
|
|
13
|
+
const { document } = parseHTML(html);
|
|
14
|
+
const reader = new Readability(document);
|
|
15
15
|
const parsed = reader.parse();
|
|
16
16
|
if (parsed && parsed.content) {
|
|
17
17
|
return {
|
|
18
|
-
title: parsed.title ||
|
|
18
|
+
title: parsed.title || document.title || new URL(baseUrl).hostname,
|
|
19
19
|
markdown: turndown.turndown(parsed.content).trim(),
|
|
20
20
|
finalUrl: baseUrl
|
|
21
21
|
};
|
|
22
22
|
}
|
|
23
23
|
// Fallback: turndown the body
|
|
24
|
-
const body =
|
|
24
|
+
const body = document.body;
|
|
25
25
|
const bodyHtml = body ? body.innerHTML : '';
|
|
26
26
|
const markdown = turndown.turndown(bodyHtml).trim();
|
|
27
27
|
return {
|
|
28
|
-
title:
|
|
28
|
+
title: document.title || new URL(baseUrl).hostname,
|
|
29
29
|
markdown,
|
|
30
30
|
finalUrl: baseUrl
|
|
31
31
|
};
|
|
@@ -75,7 +75,12 @@ function decoderFor(contentType) {
|
|
|
75
75
|
const charset = match?.[1]?.trim().replace(/^["']|["']$/g, '');
|
|
76
76
|
if (charset) {
|
|
77
77
|
try {
|
|
78
|
-
|
|
78
|
+
// The runtime accepts any charset label string; the type is narrowed
|
|
79
|
+
// to a known-encoding union by Bun/Node's lib (DOM's looser signature
|
|
80
|
+
// is no longer pulled in transitively). Cast to the actual param type.
|
|
81
|
+
return new TextDecoder(charset, {
|
|
82
|
+
fatal: false
|
|
83
|
+
});
|
|
79
84
|
}
|
|
80
85
|
catch {
|
|
81
86
|
// Unknown/unsupported label — fall through to UTF-8.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mjasnikovs/pi-task",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.0",
|
|
4
4
|
"description": "Deterministic spec-orchestration for local models, with a bundled real-time remote web view and web/docs/fetch/worker subagent tools.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -23,13 +23,13 @@
|
|
|
23
23
|
"@earendil-works/pi-tui": "0.78.1"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
|
-
"@mozilla/readability": "
|
|
26
|
+
"@mozilla/readability": "0.6.0",
|
|
27
27
|
"@sinclair/typebox": "0.34.49",
|
|
28
|
-
"
|
|
29
|
-
"qrcode": "
|
|
30
|
-
"turndown": "
|
|
31
|
-
"web-push": "
|
|
32
|
-
"ws": "
|
|
28
|
+
"linkedom": "0.18.12",
|
|
29
|
+
"qrcode": "1.5.4",
|
|
30
|
+
"turndown": "7.2.4",
|
|
31
|
+
"web-push": "3.6.7",
|
|
32
|
+
"ws": "8.21.0"
|
|
33
33
|
},
|
|
34
34
|
"devDependencies": {
|
|
35
35
|
"@earendil-works/pi-agent-core": "0.78.1",
|
|
@@ -38,11 +38,10 @@
|
|
|
38
38
|
"@eslint/js": "10.0.1",
|
|
39
39
|
"@sinclair/typebox": "0.34.49",
|
|
40
40
|
"@types/bun": "1.3.12",
|
|
41
|
-
"@types/
|
|
42
|
-
"@types/
|
|
43
|
-
"@types/
|
|
44
|
-
"@types/
|
|
45
|
-
"@types/ws": "^8.5.14",
|
|
41
|
+
"@types/qrcode": "1.5.6",
|
|
42
|
+
"@types/turndown": "5.0.6",
|
|
43
|
+
"@types/web-push": "3.6.4",
|
|
44
|
+
"@types/ws": "8.18.1",
|
|
46
45
|
"eslint": "10.2.1",
|
|
47
46
|
"globals": "17.5.0",
|
|
48
47
|
"prettier": "3.8.3",
|