@mjasnikovs/pi-task 0.7.2 → 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/assets/pi-logo.svg +4 -0
- package/dist/remote/broadcast.d.ts +0 -1
- package/dist/remote/broadcast.js +0 -3
- package/dist/remote/protocol.d.ts +1 -1
- package/dist/remote/server.js +1 -3
- package/dist/remote/ui.js +3 -19
- package/dist/shared/leaked-tool-call.d.ts +36 -0
- package/dist/shared/leaked-tool-call.js +60 -0
- package/dist/task/child-runner.d.ts +19 -1
- package/dist/task/child-runner.js +68 -17
- package/dist/task/failure-classifier.js +10 -1
- package/dist/task/phases.js +4 -0
- package/dist/workers/html-clean.js +12 -7
- package/dist/workers/pi-worker-core.d.ts +6 -0
- package/dist/workers/pi-worker-core.js +36 -21
- package/package.json +11 -12
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
<img src="./assets/pipeline.svg" alt="pi-task pipeline: a /task request runs through refine, research, grill, compose and critique, then the final spec is delivered to your main pi session in the same chat. Every phase boundary is persisted to .pi-tasks/TASK_NNNN.md, so the task is crash-safe and resumable." width="820"/>
|
|
4
4
|
|
|
5
|
-
#
|
|
5
|
+
# <img src="./assets/pi-logo.svg" alt="" height="30" align="top"/> pi-task
|
|
6
6
|
|
|
7
7
|
**Deterministic spec-orchestration for local models — with bundled web, docs, fetch, and worker sub-agent tools.**
|
|
8
8
|
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 180 180" width="180" height="180" role="img" aria-label="pi-task logo">
|
|
2
|
+
<rect width="180" height="180" rx="38" fill="#1e1e2e"/>
|
|
3
|
+
<text x="90" y="130" font-family="Georgia, serif" font-size="100" text-anchor="middle" fill="#cba6f7">π</text>
|
|
4
|
+
</svg>
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import type { WebSocket } from 'ws';
|
|
2
2
|
export declare function addClient(ws: WebSocket): void;
|
|
3
3
|
export declare function removeClient(ws: WebSocket): void;
|
|
4
|
-
export declare function clientCount(): number;
|
|
5
4
|
export declare function broadcast(msg: unknown): void;
|
|
6
5
|
export declare function sendTo(ws: WebSocket, msg: unknown): void;
|
package/dist/remote/broadcast.js
CHANGED
|
@@ -9,9 +9,6 @@ export function addClient(ws) {
|
|
|
9
9
|
export function removeClient(ws) {
|
|
10
10
|
clients.delete(ws);
|
|
11
11
|
}
|
|
12
|
-
export function clientCount() {
|
|
13
|
-
return clients.size;
|
|
14
|
-
}
|
|
15
12
|
export function broadcast(msg) {
|
|
16
13
|
const json = JSON.stringify(msg);
|
|
17
14
|
for (const ws of clients) {
|
|
@@ -44,7 +44,7 @@ export interface ResetMessage {
|
|
|
44
44
|
* session-state.ts (its serializer); re-exported here as part of the wire type. */
|
|
45
45
|
export type { SnapshotMessage } from './session-state.js';
|
|
46
46
|
/** Server → browser messages. The live text_delta / tool_* / agent_* /
|
|
47
|
-
*
|
|
47
|
+
* user_message deltas are emitted by the SessionState mutators
|
|
48
48
|
* and not all enumerated here; the snapshot below carries the full state. */
|
|
49
49
|
export type ServerMessage = PromptMessage | PromptResolvedMessage | WidgetMessage | NotifyMessage | ViewerMessage | ContextMessage | ResetMessage | import('./session-state.js').SnapshotMessage;
|
|
50
50
|
/** Browser → server messages. */
|
package/dist/remote/server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { createServer } from 'node:http';
|
|
2
2
|
import { networkInterfaces } from 'node:os';
|
|
3
3
|
import { WebSocketServer } from 'ws';
|
|
4
|
-
import { addClient, removeClient,
|
|
4
|
+
import { addClient, removeClient, sendTo } from './broadcast.js';
|
|
5
5
|
import { answerPrompt } from './bridge.js';
|
|
6
6
|
import { getState, snapshot } from './session-state.js';
|
|
7
7
|
import { isClientMessage } from './protocol.js';
|
|
@@ -121,7 +121,6 @@ export async function startServer(onMessage, getHtml) {
|
|
|
121
121
|
handle.onFirstConnect = null;
|
|
122
122
|
// One authoritative snapshot — the client replaces its whole view with it.
|
|
123
123
|
sendTo(ws, snapshot());
|
|
124
|
-
broadcast({ type: 'client_count', count: clientCount() });
|
|
125
124
|
ws.on('message', data => {
|
|
126
125
|
let msg;
|
|
127
126
|
try {
|
|
@@ -144,7 +143,6 @@ export async function startServer(onMessage, getHtml) {
|
|
|
144
143
|
});
|
|
145
144
|
ws.on('close', () => {
|
|
146
145
|
removeClient(ws);
|
|
147
|
-
broadcast({ type: 'client_count', count: clientCount() });
|
|
148
146
|
});
|
|
149
147
|
});
|
|
150
148
|
await new Promise(resolve => httpServer.listen(port, '0.0.0.0', resolve));
|
package/dist/remote/ui.js
CHANGED
|
@@ -57,10 +57,6 @@ export function html(wsUrl) {
|
|
|
57
57
|
96% { text-shadow: 1px 0 var(--teal), -1px 0 var(--red); transform: translate(-1px, 0); }
|
|
58
58
|
}
|
|
59
59
|
@media (prefers-reduced-motion: reduce) { #header .title { animation: none; } }
|
|
60
|
-
#header .status { color: var(--subtext0); font-size: 11px; display: inline-flex; align-items: center; gap: 5px; }
|
|
61
|
-
#header .cdot { color: var(--yellow); }
|
|
62
|
-
#header .cdot.up { color: var(--green); }
|
|
63
|
-
#header .cdot.down { color: var(--red); }
|
|
64
60
|
#header .hgroup { display: flex; align-items: center; gap: 10px; }
|
|
65
61
|
#bell {
|
|
66
62
|
background: none; border: none; color: var(--subtext1); cursor: pointer;
|
|
@@ -233,7 +229,6 @@ export function html(wsUrl) {
|
|
|
233
229
|
<div id="header">
|
|
234
230
|
<span class="title">pi-task remote</span>
|
|
235
231
|
<div class="hgroup">
|
|
236
|
-
<span class="status" id="client-status"><span class="cdot" id="conn-dot">○</span></span>
|
|
237
232
|
<button id="bell" aria-label="Toggle notifications" title="Notifications">◯</button>
|
|
238
233
|
</div>
|
|
239
234
|
</div>
|
|
@@ -272,12 +267,6 @@ export function html(wsUrl) {
|
|
|
272
267
|
function setContextBar(usage) {
|
|
273
268
|
if (usage && usage.percent != null) contextFill.style.width = usage.percent + '%';
|
|
274
269
|
}
|
|
275
|
-
const connDot = document.getElementById('conn-dot');
|
|
276
|
-
// state: 'connecting' (○ yellow) | 'up' (● green) | 'down' (● red)
|
|
277
|
-
function setConn(state) {
|
|
278
|
-
connDot.textContent = state === 'connecting' ? '\\u25CB' : '\\u25CF';
|
|
279
|
-
connDot.className = 'cdot' + (state === 'up' ? ' up' : state === 'down' ? ' down' : '');
|
|
280
|
-
}
|
|
281
270
|
const reconnectOverlay = document.getElementById('reconnect-overlay');
|
|
282
271
|
const reconnectMsg = document.getElementById('reconnect-msg');
|
|
283
272
|
const cmdSuggestions = document.getElementById('cmd-suggestions');
|
|
@@ -941,9 +930,6 @@ export function html(wsUrl) {
|
|
|
941
930
|
// Seeds the bar for a client that joined mid-session.
|
|
942
931
|
setContextBar(msg.contextUsage);
|
|
943
932
|
break;
|
|
944
|
-
case 'client_count':
|
|
945
|
-
setConn('up');
|
|
946
|
-
break;
|
|
947
933
|
case 'prompt':
|
|
948
934
|
showPrompt(msg);
|
|
949
935
|
break;
|
|
@@ -983,9 +969,9 @@ export function html(wsUrl) {
|
|
|
983
969
|
cmdActive = []; cmdIndex = -1; renderSuggestions();
|
|
984
970
|
// Slash commands are handled server-side and produce no chat turn.
|
|
985
971
|
if (text.startsWith('/')) return;
|
|
986
|
-
//
|
|
987
|
-
//
|
|
988
|
-
|
|
972
|
+
// The server records the message via addUserTurn and broadcasts a
|
|
973
|
+
// user_message back to every client (us included), which renders the
|
|
974
|
+
// bubble. Don't render it here too, or the sender sees it twice.
|
|
989
975
|
setEnabled(false);
|
|
990
976
|
showThinking();
|
|
991
977
|
}
|
|
@@ -1013,7 +999,6 @@ export function html(wsUrl) {
|
|
|
1013
999
|
if (reconnectAnim) { clearInterval(reconnectAnim); reconnectAnim = null; }
|
|
1014
1000
|
reconnectOverlay.classList.remove('visible');
|
|
1015
1001
|
reconnectDelay = 1000;
|
|
1016
|
-
setConn('up');
|
|
1017
1002
|
setEnabled(true);
|
|
1018
1003
|
});
|
|
1019
1004
|
ws.addEventListener('message', (e) => {
|
|
@@ -1021,7 +1006,6 @@ export function html(wsUrl) {
|
|
|
1021
1006
|
});
|
|
1022
1007
|
ws.addEventListener('close', () => {
|
|
1023
1008
|
setEnabled(false);
|
|
1024
|
-
setConn('down');
|
|
1025
1009
|
reconnectOverlay.classList.add('visible');
|
|
1026
1010
|
// Animate the same braille spinner used elsewhere, with a live countdown.
|
|
1027
1011
|
const until = Date.now() + reconnectDelay;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detect tool calls that leaked into a child's assistant *text* instead of
|
|
3
|
+
* being executed.
|
|
4
|
+
*
|
|
5
|
+
* Background: every child pi runs under `--mode json`; pi-task only ever treats
|
|
6
|
+
* a structured `tool_execution_start` event as a tool call (see
|
|
7
|
+
* shared/child-process.ts). When a local model emits a call in a markup dialect
|
|
8
|
+
* pi's harness doesn't recognise — e.g.
|
|
9
|
+
*
|
|
10
|
+
* <tool_call>
|
|
11
|
+
* <function=bash>
|
|
12
|
+
* <parameter=command>grep …</parameter>
|
|
13
|
+
* </function>
|
|
14
|
+
* </tool_call>
|
|
15
|
+
*
|
|
16
|
+
* pi passes the raw markup through as ordinary assistant text. The command never
|
|
17
|
+
* runs, no event fires, and pi-task's guards (loop detector, widget) never see
|
|
18
|
+
* it. The phase then "passes" on its only gates (non-empty text + exit 0) and
|
|
19
|
+
* the unexecuted call flows downstream — a silently skipped beat.
|
|
20
|
+
*
|
|
21
|
+
* This is fundamentally an upstream mismatch (model output format ↔ pi's parser)
|
|
22
|
+
* that pi-task cannot fix. What it CAN do is notice the leaked markup and refuse
|
|
23
|
+
* to accept the turn, so the skip becomes visible instead of silent.
|
|
24
|
+
*/
|
|
25
|
+
export declare const MAX_LEAK_RETRIES = 2;
|
|
26
|
+
/**
|
|
27
|
+
* Return the offending marker string if `text` contains a leaked tool call, or
|
|
28
|
+
* null if it looks clean. The marker is suitable for logging and for naming the
|
|
29
|
+
* problem back to the model in a re-prompt hint.
|
|
30
|
+
*/
|
|
31
|
+
export declare function detectLeakedToolCall(text: string): string | null;
|
|
32
|
+
/**
|
|
33
|
+
* A correction hint to prepend to a re-spawn after a leak, naming the offending
|
|
34
|
+
* markup so the model stops repeating that exact mistake.
|
|
35
|
+
*/
|
|
36
|
+
export declare function leakedToolCallHint(marker: string): string;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detect tool calls that leaked into a child's assistant *text* instead of
|
|
3
|
+
* being executed.
|
|
4
|
+
*
|
|
5
|
+
* Background: every child pi runs under `--mode json`; pi-task only ever treats
|
|
6
|
+
* a structured `tool_execution_start` event as a tool call (see
|
|
7
|
+
* shared/child-process.ts). When a local model emits a call in a markup dialect
|
|
8
|
+
* pi's harness doesn't recognise — e.g.
|
|
9
|
+
*
|
|
10
|
+
* <tool_call>
|
|
11
|
+
* <function=bash>
|
|
12
|
+
* <parameter=command>grep …</parameter>
|
|
13
|
+
* </function>
|
|
14
|
+
* </tool_call>
|
|
15
|
+
*
|
|
16
|
+
* pi passes the raw markup through as ordinary assistant text. The command never
|
|
17
|
+
* runs, no event fires, and pi-task's guards (loop detector, widget) never see
|
|
18
|
+
* it. The phase then "passes" on its only gates (non-empty text + exit 0) and
|
|
19
|
+
* the unexecuted call flows downstream — a silently skipped beat.
|
|
20
|
+
*
|
|
21
|
+
* This is fundamentally an upstream mismatch (model output format ↔ pi's parser)
|
|
22
|
+
* that pi-task cannot fix. What it CAN do is notice the leaked markup and refuse
|
|
23
|
+
* to accept the turn, so the skip becomes visible instead of silent.
|
|
24
|
+
*/
|
|
25
|
+
// A child that wrote a tool call as plain text (wrong dialect, never executed)
|
|
26
|
+
// gets re-prompted with a correction hint up to this many times before the
|
|
27
|
+
// caller gives up. Mirrors MAX_LOOP_RESTARTS: 3 attempts total.
|
|
28
|
+
export const MAX_LEAK_RETRIES = 2;
|
|
29
|
+
// The Hermes-style wrapper a leaked call is most often wrapped in. pi-task never
|
|
30
|
+
// legitimately emits this tag, so its presence alone is a confident signal.
|
|
31
|
+
const TOOL_CALL_WRAPPER = /<tool_call\b[^>]*>/i;
|
|
32
|
+
// The "XML function call" dialect: <function=name> … <parameter=key>. Either tag
|
|
33
|
+
// alone is too weak (a stray "<function=x>" can appear in prose or source), so we
|
|
34
|
+
// require the structural pair before flagging it.
|
|
35
|
+
const FUNCTION_TAG = /<function=[\w.-]+\s*>/i;
|
|
36
|
+
const PARAMETER_TAG = /<parameter=[\w.-]+\s*>/i;
|
|
37
|
+
/**
|
|
38
|
+
* Return the offending marker string if `text` contains a leaked tool call, or
|
|
39
|
+
* null if it looks clean. The marker is suitable for logging and for naming the
|
|
40
|
+
* problem back to the model in a re-prompt hint.
|
|
41
|
+
*/
|
|
42
|
+
export function detectLeakedToolCall(text) {
|
|
43
|
+
const wrapper = TOOL_CALL_WRAPPER.exec(text);
|
|
44
|
+
if (wrapper)
|
|
45
|
+
return wrapper[0];
|
|
46
|
+
const fn = FUNCTION_TAG.exec(text);
|
|
47
|
+
if (fn && PARAMETER_TAG.test(text))
|
|
48
|
+
return fn[0];
|
|
49
|
+
return null;
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* A correction hint to prepend to a re-spawn after a leak, naming the offending
|
|
53
|
+
* markup so the model stops repeating that exact mistake.
|
|
54
|
+
*/
|
|
55
|
+
export function leakedToolCallHint(marker) {
|
|
56
|
+
return (`[SYSTEM NOTE: Your previous turn wrote a tool call as plain text (\`${marker}\`) `
|
|
57
|
+
+ `instead of invoking the tool — so it never ran and you proceeded without its result. `
|
|
58
|
+
+ `Invoke tools through the native tool-calling mechanism; never type `
|
|
59
|
+
+ `<tool_call>/<function=…>/<parameter=…> markup into your answer.]`);
|
|
60
|
+
}
|
|
@@ -14,6 +14,8 @@ export interface PhaseRunResult {
|
|
|
14
14
|
exitCode: number;
|
|
15
15
|
stderr: string;
|
|
16
16
|
loopHit?: LoopHit;
|
|
17
|
+
/** Set when the assistant text contains an unexecuted, leaked tool call. */
|
|
18
|
+
leakedToolCall?: string;
|
|
17
19
|
}
|
|
18
20
|
export declare function childArgs(tools: string, prompt: string): string[];
|
|
19
21
|
export declare const USER_CANCELLED = "__user_cancelled__";
|
|
@@ -38,7 +40,13 @@ interface PhaseDeps {
|
|
|
38
40
|
spawn?: SpawnFn;
|
|
39
41
|
}
|
|
40
42
|
export type { PhaseDeps };
|
|
41
|
-
/**
|
|
43
|
+
/**
|
|
44
|
+
* Run a child pi and return its assistant text. Throws if exit code != 0.
|
|
45
|
+
*
|
|
46
|
+
* If the child leaks a tool call as plain text (wrong dialect — never executed),
|
|
47
|
+
* re-prompt with a correction hint up to MAX_LEAK_RETRIES times; if it keeps
|
|
48
|
+
* leaking, throw LeakedToolCallError rather than returning the unexecuted call.
|
|
49
|
+
*/
|
|
42
50
|
export declare function runPhaseChild(deps: PhaseDeps, name: string, tools: string, prompt: string): Promise<string>;
|
|
43
51
|
export declare function prependHint(hint: string | null, prompt: string): string;
|
|
44
52
|
/**
|
|
@@ -64,3 +72,13 @@ export declare class LoopExhaustedError extends Error {
|
|
|
64
72
|
readonly history: LoopHit[];
|
|
65
73
|
constructor(phase: string, history: LoopHit[]);
|
|
66
74
|
}
|
|
75
|
+
/**
|
|
76
|
+
* Thrown when a phase child repeatedly wrote a tool call as plain text (a markup
|
|
77
|
+
* dialect pi's harness didn't parse) instead of invoking it. The call never ran,
|
|
78
|
+
* so the phase output is untrustworthy — fail loudly rather than check it off.
|
|
79
|
+
*/
|
|
80
|
+
export declare class LeakedToolCallError extends Error {
|
|
81
|
+
readonly phase: string;
|
|
82
|
+
readonly marker: string;
|
|
83
|
+
constructor(phase: string, marker: string);
|
|
84
|
+
}
|
|
@@ -9,6 +9,7 @@ import { spawn } from 'node:child_process';
|
|
|
9
9
|
import { getPiInvocation } from '../shared/pi-invocation.js';
|
|
10
10
|
import { runChild as runChildUnified, CHILD_BASE_ARGS } from '../shared/child-process.js';
|
|
11
11
|
import { LoopDetector } from './loop-detector.js';
|
|
12
|
+
import { detectLeakedToolCall, leakedToolCallHint, MAX_LEAK_RETRIES } from '../shared/leaked-tool-call.js';
|
|
12
13
|
import { readSection, setTaskSection } from './task-file.js';
|
|
13
14
|
// ─── Loop detection constants ────────────────────────────────────────────────
|
|
14
15
|
// Defined here (not in phases.ts) to avoid a circular dependency:
|
|
@@ -57,28 +58,52 @@ export async function runChild(cwd, tools, prompt, signal, onLine, onContextUsag
|
|
|
57
58
|
return hit; // propagate to unified runner so it can kill
|
|
58
59
|
}
|
|
59
60
|
});
|
|
61
|
+
// Use `||` (not `??`) so an empty string from json-events mode falls
|
|
62
|
+
// back to raw stdout. Without this, a child that exits 0 but emits no
|
|
63
|
+
// assistant text (e.g. model API error swallowed in json mode) always
|
|
64
|
+
// fails with the unhelpful "X child produced no output" — the raw
|
|
65
|
+
// stdout/stderr that might contain the real error is discarded.
|
|
66
|
+
const text = result.text || result.stdout.trim();
|
|
60
67
|
return {
|
|
61
|
-
|
|
62
|
-
// back to raw stdout. Without this, a child that exits 0 but emits no
|
|
63
|
-
// assistant text (e.g. model API error swallowed in json mode) always
|
|
64
|
-
// fails with the unhelpful "X child produced no output" — the raw
|
|
65
|
-
// stdout/stderr that might contain the real error is discarded.
|
|
66
|
-
text: result.text || result.stdout.trim(),
|
|
68
|
+
text,
|
|
67
69
|
exitCode: result.exitCode,
|
|
68
70
|
stderr: result.stderr.trim(),
|
|
69
|
-
loopHit
|
|
71
|
+
loopHit,
|
|
72
|
+
// A tool call the model wrote as text (wrong dialect) never executed and
|
|
73
|
+
// sailed past the structured-event guards above; flag it so the wrappers
|
|
74
|
+
// can re-prompt instead of accepting the unexecuted call. Only meaningful
|
|
75
|
+
// when the run otherwise succeeded — a loop kill truncates text mid-stream.
|
|
76
|
+
leakedToolCall: loopHit ? undefined : (detectLeakedToolCall(text) ?? undefined)
|
|
70
77
|
};
|
|
71
78
|
}
|
|
72
|
-
/**
|
|
79
|
+
/**
|
|
80
|
+
* Run a child pi and return its assistant text. Throws if exit code != 0.
|
|
81
|
+
*
|
|
82
|
+
* If the child leaks a tool call as plain text (wrong dialect — never executed),
|
|
83
|
+
* re-prompt with a correction hint up to MAX_LEAK_RETRIES times; if it keeps
|
|
84
|
+
* leaking, throw LeakedToolCallError rather than returning the unexecuted call.
|
|
85
|
+
*/
|
|
73
86
|
export async function runPhaseChild(deps, name, tools, prompt) {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
87
|
+
let hint = null;
|
|
88
|
+
for (let attempt = 0; attempt <= MAX_LEAK_RETRIES; attempt++) {
|
|
89
|
+
const r = await runChild(deps.cwd, tools, prependHint(hint, prompt), deps.signal, deps.onChildOutput, deps.onContextUsage, undefined, deps.spawn);
|
|
90
|
+
if (r.exitCode !== 0) {
|
|
91
|
+
throw new Error(`${name} child failed: ${r.stderr || '(no stderr)'}`);
|
|
92
|
+
}
|
|
93
|
+
if (r.text.trim().length === 0) {
|
|
94
|
+
throw new Error(`${name} child produced no output${r.stderr ? ' — stderr: ' + r.stderr : ''}`);
|
|
95
|
+
}
|
|
96
|
+
if (r.leakedToolCall) {
|
|
97
|
+
if (attempt === MAX_LEAK_RETRIES) {
|
|
98
|
+
throw new LeakedToolCallError(name, r.leakedToolCall);
|
|
99
|
+
}
|
|
100
|
+
hint = leakedToolCallHint(r.leakedToolCall);
|
|
101
|
+
continue;
|
|
102
|
+
}
|
|
103
|
+
return r.text;
|
|
80
104
|
}
|
|
81
|
-
|
|
105
|
+
// Unreachable: the loop returns clean text or throws on the final leak.
|
|
106
|
+
throw new LeakedToolCallError(name, '(unknown)');
|
|
82
107
|
}
|
|
83
108
|
function formatLoopHint(hit) {
|
|
84
109
|
const argsStr = JSON.stringify(hit.call.args);
|
|
@@ -106,12 +131,13 @@ async function appendLoopEvent(cwd, taskId, phase, hit, strike, outcome) {
|
|
|
106
131
|
*/
|
|
107
132
|
export async function runPhaseWithLoopGuard(deps, name, tools, buildPrompt) {
|
|
108
133
|
const loopHistory = [];
|
|
134
|
+
// Carries the correction hint (loop OR leaked-tool-call) into the next strike.
|
|
135
|
+
let nextHint = null;
|
|
109
136
|
for (let strike = 0; strike <= MAX_LOOP_RESTARTS; strike++) {
|
|
110
137
|
if (deps.signal.aborted)
|
|
111
138
|
throw new Error(USER_CANCELLED);
|
|
112
139
|
const detector = new LoopDetector(LOOP_WINDOW, LOOP_THRESHOLD);
|
|
113
|
-
const
|
|
114
|
-
const prompt = buildPrompt(hint);
|
|
140
|
+
const prompt = buildPrompt(nextHint);
|
|
115
141
|
const r = await runChild(deps.cwd, tools, prompt, deps.signal, deps.onChildOutput, deps.onContextUsage, call => detector.record(call), deps.spawn);
|
|
116
142
|
if (deps.signal.aborted)
|
|
117
143
|
throw new Error(USER_CANCELLED);
|
|
@@ -121,6 +147,7 @@ export async function runPhaseWithLoopGuard(deps, name, tools, buildPrompt) {
|
|
|
121
147
|
await appendLoopEvent(deps.cwd, deps.taskId, name, r.loopHit, strike + 1, isLastStrike ? 'phase failed' : 'restarted with hint');
|
|
122
148
|
if (isLastStrike)
|
|
123
149
|
throw new LoopExhaustedError(name, loopHistory);
|
|
150
|
+
nextHint = formatLoopHint(r.loopHit);
|
|
124
151
|
continue;
|
|
125
152
|
}
|
|
126
153
|
if (r.exitCode !== 0) {
|
|
@@ -129,6 +156,13 @@ export async function runPhaseWithLoopGuard(deps, name, tools, buildPrompt) {
|
|
|
129
156
|
if (r.text.trim().length === 0) {
|
|
130
157
|
throw new Error(`${name} child produced no output${r.stderr ? ' — stderr: ' + r.stderr : ''}`);
|
|
131
158
|
}
|
|
159
|
+
if (r.leakedToolCall) {
|
|
160
|
+
if (strike === MAX_LOOP_RESTARTS) {
|
|
161
|
+
throw new LeakedToolCallError(name, r.leakedToolCall);
|
|
162
|
+
}
|
|
163
|
+
nextHint = leakedToolCallHint(r.leakedToolCall);
|
|
164
|
+
continue;
|
|
165
|
+
}
|
|
132
166
|
return r.text;
|
|
133
167
|
}
|
|
134
168
|
throw new LoopExhaustedError(name, loopHistory);
|
|
@@ -160,3 +194,20 @@ export class LoopExhaustedError extends Error {
|
|
|
160
194
|
this.name = 'LoopExhaustedError';
|
|
161
195
|
}
|
|
162
196
|
}
|
|
197
|
+
// ─── LeakedToolCallError ─────────────────────────────────────────────────────
|
|
198
|
+
/**
|
|
199
|
+
* Thrown when a phase child repeatedly wrote a tool call as plain text (a markup
|
|
200
|
+
* dialect pi's harness didn't parse) instead of invoking it. The call never ran,
|
|
201
|
+
* so the phase output is untrustworthy — fail loudly rather than check it off.
|
|
202
|
+
*/
|
|
203
|
+
export class LeakedToolCallError extends Error {
|
|
204
|
+
phase;
|
|
205
|
+
marker;
|
|
206
|
+
constructor(phase, marker) {
|
|
207
|
+
super(`${phase} child wrote a tool call as text instead of invoking it `
|
|
208
|
+
+ `(${marker.trim()}) — it never ran`);
|
|
209
|
+
this.phase = phase;
|
|
210
|
+
this.marker = marker;
|
|
211
|
+
this.name = 'LeakedToolCallError';
|
|
212
|
+
}
|
|
213
|
+
}
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { updateTaskFrontMatter } from './task-file.js';
|
|
6
6
|
import { flashTerminalWidget } from './widget.js';
|
|
7
|
-
import { LoopExhaustedError, USER_CANCELLED } from './child-runner.js';
|
|
7
|
+
import { LoopExhaustedError, LeakedToolCallError, USER_CANCELLED } from './child-runner.js';
|
|
8
8
|
// ─── Classifier ──────────────────────────────────────────────────────────────
|
|
9
9
|
export function classifyFailure(err, aborted) {
|
|
10
10
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -20,6 +20,15 @@ export function classifyFailure(err, aborted) {
|
|
|
20
20
|
level: 'error'
|
|
21
21
|
};
|
|
22
22
|
}
|
|
23
|
+
if (err instanceof LeakedToolCallError) {
|
|
24
|
+
return {
|
|
25
|
+
state: 'failed',
|
|
26
|
+
reason: `leaked tool call in ${err.phase}: ${err.marker.trim()}`,
|
|
27
|
+
flash: 'leaked_tool_call',
|
|
28
|
+
notify: `failed: ${err.phase} wrote a tool call as text instead of running it — it never executed. Resume to retry.`,
|
|
29
|
+
level: 'error'
|
|
30
|
+
};
|
|
31
|
+
}
|
|
23
32
|
if (msg === 'no_verify_block') {
|
|
24
33
|
return {
|
|
25
34
|
state: 'failed',
|
package/dist/task/phases.js
CHANGED
|
@@ -233,6 +233,10 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
|
233
233
|
if (result.text.trim().length === 0) {
|
|
234
234
|
throw new Error(`Research ${name} worker produced no output`);
|
|
235
235
|
}
|
|
236
|
+
if (result.leakedToolCall) {
|
|
237
|
+
throw new Error(`Research ${name} worker wrote a tool call as text instead of invoking it `
|
|
238
|
+
+ `(${result.leakedToolCall.trim()}) — it never ran`);
|
|
239
|
+
}
|
|
236
240
|
}
|
|
237
241
|
return `FILES\n${files.text}\n\nAPIS\n${apis.text}\n\nCONTEXT\n${context.text}\n\nTOOLING\n${tooling.text}`;
|
|
238
242
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { readFileSync } from 'node:fs';
|
|
2
2
|
import { fileURLToPath } from 'node:url';
|
|
3
3
|
import { dirname, join } from 'node:path';
|
|
4
|
-
import {
|
|
4
|
+
import { parseHTML } from 'linkedom';
|
|
5
5
|
import { Readability } from '@mozilla/readability';
|
|
6
6
|
import TurndownService from 'turndown';
|
|
7
7
|
const turndown = new TurndownService({
|
|
@@ -10,22 +10,22 @@ const turndown = new TurndownService({
|
|
|
10
10
|
bulletListMarker: '-'
|
|
11
11
|
});
|
|
12
12
|
export function cleanHtml(html, baseUrl) {
|
|
13
|
-
const
|
|
14
|
-
const reader = new Readability(
|
|
13
|
+
const { document } = parseHTML(html);
|
|
14
|
+
const reader = new Readability(document);
|
|
15
15
|
const parsed = reader.parse();
|
|
16
16
|
if (parsed && parsed.content) {
|
|
17
17
|
return {
|
|
18
|
-
title: parsed.title ||
|
|
18
|
+
title: parsed.title || document.title || new URL(baseUrl).hostname,
|
|
19
19
|
markdown: turndown.turndown(parsed.content).trim(),
|
|
20
20
|
finalUrl: baseUrl
|
|
21
21
|
};
|
|
22
22
|
}
|
|
23
23
|
// Fallback: turndown the body
|
|
24
|
-
const body =
|
|
24
|
+
const body = document.body;
|
|
25
25
|
const bodyHtml = body ? body.innerHTML : '';
|
|
26
26
|
const markdown = turndown.turndown(bodyHtml).trim();
|
|
27
27
|
return {
|
|
28
|
-
title:
|
|
28
|
+
title: document.title || new URL(baseUrl).hostname,
|
|
29
29
|
markdown,
|
|
30
30
|
finalUrl: baseUrl
|
|
31
31
|
};
|
|
@@ -75,7 +75,12 @@ function decoderFor(contentType) {
|
|
|
75
75
|
const charset = match?.[1]?.trim().replace(/^["']|["']$/g, '');
|
|
76
76
|
if (charset) {
|
|
77
77
|
try {
|
|
78
|
-
|
|
78
|
+
// The runtime accepts any charset label string; the type is narrowed
|
|
79
|
+
// to a known-encoding union by Bun/Node's lib (DOM's looser signature
|
|
80
|
+
// is no longer pulled in transitively). Cast to the actual param type.
|
|
81
|
+
return new TextDecoder(charset, {
|
|
82
|
+
fatal: false
|
|
83
|
+
});
|
|
79
84
|
}
|
|
80
85
|
catch {
|
|
81
86
|
// Unknown/unsupported label — fall through to UTF-8.
|
|
@@ -24,5 +24,11 @@ export interface RunWorkerResult {
|
|
|
24
24
|
* elapsed when the child never produced output.
|
|
25
25
|
*/
|
|
26
26
|
workMs: number;
|
|
27
|
+
/**
|
|
28
|
+
* Set when the worker exhausted its re-prompts still leaking a tool call as
|
|
29
|
+
* text (wrong dialect, never executed). The caller must treat this as a
|
|
30
|
+
* failure rather than trusting the returned text.
|
|
31
|
+
*/
|
|
32
|
+
leakedToolCall?: string;
|
|
27
33
|
}
|
|
28
34
|
export declare function runWorker(input: RunWorkerInput): Promise<RunWorkerResult>;
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { getPiInvocation } from '../shared/pi-invocation.js';
|
|
2
2
|
import { CHILD_BASE_ARGS, runChildDefault } from '../shared/child-process.js';
|
|
3
3
|
import { LoopDetector } from '../task/loop-detector.js';
|
|
4
|
+
import { detectLeakedToolCall, leakedToolCallHint, MAX_LEAK_RETRIES } from '../shared/leaked-tool-call.js';
|
|
4
5
|
// `--mode json` makes pi emit structured events as they happen instead of
|
|
5
6
|
// buffering the assistant text and flushing on exit. That matters for the
|
|
6
7
|
// wait/work timing split: in text mode the first stdout chunk only arrives at
|
|
@@ -11,25 +12,39 @@ import { LoopDetector } from '../task/loop-detector.js';
|
|
|
11
12
|
const DEFAULT_TOOLS = 'read,grep,find,ls';
|
|
12
13
|
export async function runWorker(input) {
|
|
13
14
|
const tools = input.tools ?? DEFAULT_TOOLS;
|
|
14
|
-
const
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
15
|
+
const baseArgs = [...CHILD_BASE_ARGS, '--mode', 'json', '--tools', tools];
|
|
16
|
+
let hint = null;
|
|
17
|
+
for (let attempt = 0;; attempt++) {
|
|
18
|
+
const prompt = hint === null ? input.prompt : `${hint}\n\n${input.prompt}`;
|
|
19
|
+
const invocation = getPiInvocation([...baseArgs, prompt]);
|
|
20
|
+
const tStart = Date.now();
|
|
21
|
+
let tFirstByte = null;
|
|
22
|
+
const loopDetector = new LoopDetector(20, 5);
|
|
23
|
+
const result = await runChildDefault(invocation, input.cwd, input.signal, {
|
|
24
|
+
mode: 'json-events',
|
|
25
|
+
onFirstByte: () => (tFirstByte = Date.now()),
|
|
26
|
+
onToolCall: call => loopDetector.record(call)
|
|
27
|
+
}, input.spawn);
|
|
28
|
+
const tEnd = Date.now();
|
|
29
|
+
const waitMs = tFirstByte === null ? tEnd - tStart : tFirstByte - tStart;
|
|
30
|
+
const workMs = tFirstByte === null ? 0 : tEnd - tFirstByte;
|
|
31
|
+
const text = result.text ?? '';
|
|
32
|
+
// Only treat output as a leak on a clean, complete run — a non-zero exit
|
|
33
|
+
// or abort yields partial text the caller already handles, and detecting
|
|
34
|
+
// there would just mislabel the real failure.
|
|
35
|
+
const leaked = result.exitCode === 0 && !result.aborted ? detectLeakedToolCall(text) : null;
|
|
36
|
+
if (leaked && attempt < MAX_LEAK_RETRIES) {
|
|
37
|
+
hint = leakedToolCallHint(leaked);
|
|
38
|
+
continue;
|
|
39
|
+
}
|
|
40
|
+
return {
|
|
41
|
+
text,
|
|
42
|
+
exitCode: result.exitCode,
|
|
43
|
+
stderr: result.stderr.trim(),
|
|
44
|
+
aborted: result.aborted,
|
|
45
|
+
waitMs,
|
|
46
|
+
workMs,
|
|
47
|
+
...(leaked ? { leakedToolCall: leaked } : {})
|
|
48
|
+
};
|
|
49
|
+
}
|
|
35
50
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mjasnikovs/pi-task",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.4",
|
|
4
4
|
"description": "Deterministic spec-orchestration for local models, with a bundled real-time remote web view and web/docs/fetch/worker subagent tools.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -23,13 +23,13 @@
|
|
|
23
23
|
"@earendil-works/pi-tui": "0.78.1"
|
|
24
24
|
},
|
|
25
25
|
"dependencies": {
|
|
26
|
-
"@mozilla/readability": "
|
|
26
|
+
"@mozilla/readability": "0.6.0",
|
|
27
27
|
"@sinclair/typebox": "0.34.49",
|
|
28
|
-
"
|
|
29
|
-
"qrcode": "
|
|
30
|
-
"turndown": "
|
|
31
|
-
"web-push": "
|
|
32
|
-
"ws": "
|
|
28
|
+
"linkedom": "0.18.12",
|
|
29
|
+
"qrcode": "1.5.4",
|
|
30
|
+
"turndown": "7.2.4",
|
|
31
|
+
"web-push": "3.6.7",
|
|
32
|
+
"ws": "8.21.0"
|
|
33
33
|
},
|
|
34
34
|
"devDependencies": {
|
|
35
35
|
"@earendil-works/pi-agent-core": "0.78.1",
|
|
@@ -38,11 +38,10 @@
|
|
|
38
38
|
"@eslint/js": "10.0.1",
|
|
39
39
|
"@sinclair/typebox": "0.34.49",
|
|
40
40
|
"@types/bun": "1.3.12",
|
|
41
|
-
"@types/
|
|
42
|
-
"@types/
|
|
43
|
-
"@types/
|
|
44
|
-
"@types/
|
|
45
|
-
"@types/ws": "^8.5.14",
|
|
41
|
+
"@types/qrcode": "1.5.6",
|
|
42
|
+
"@types/turndown": "5.0.6",
|
|
43
|
+
"@types/web-push": "3.6.4",
|
|
44
|
+
"@types/ws": "8.18.1",
|
|
46
45
|
"eslint": "10.2.1",
|
|
47
46
|
"globals": "17.5.0",
|
|
48
47
|
"prettier": "3.8.3",
|