@mjasnikovs/pi-task 0.13.10 → 0.13.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/shared/child-process.d.ts +15 -0
- package/dist/shared/child-process.js +29 -2
- package/dist/task/auto-orchestrator.js +10 -0
- package/dist/task/child-runner.d.ts +18 -0
- package/dist/task/child-runner.js +33 -0
- package/dist/task/failure-classifier.js +10 -1
- package/dist/task/orchestrator.d.ts +18 -0
- package/dist/task/orchestrator.js +49 -2
- package/dist/task/phases.js +1 -1
- package/dist/workers/docs-retrieve.js +2 -2
- package/package.json +1 -1
|
@@ -21,6 +21,14 @@ export interface ChildResult {
|
|
|
21
21
|
aborted: boolean;
|
|
22
22
|
/** Extracted assistant text (only populated in json-events mode). */
|
|
23
23
|
text?: string;
|
|
24
|
+
/**
|
|
25
|
+
* The model-failure cause, when the child's final turn carried
|
|
26
|
+
* stopReason "error" (provider/connection failure after pi exhausted its
|
|
27
|
+
* own retries). pi emits this as an agent_end whose assistant message has
|
|
28
|
+
* empty text, so without it the phase would mis-report "produced no output".
|
|
29
|
+
* Only populated in json-events mode.
|
|
30
|
+
*/
|
|
31
|
+
modelError?: string;
|
|
24
32
|
}
|
|
25
33
|
export interface ToolCall {
|
|
26
34
|
name: string;
|
|
@@ -74,6 +82,13 @@ export declare class JsonEventSink {
|
|
|
74
82
|
private readonly onLoopKill;
|
|
75
83
|
/** Final assistant text from the agent_end event, if one arrived. */
|
|
76
84
|
finalText: string;
|
|
85
|
+
/**
|
|
86
|
+
* Set when the final assistant turn carried stopReason "error" — i.e. the
|
|
87
|
+
* model/provider failed (disconnect, fetch failed, socket hang up, 5xx)
|
|
88
|
+
* after pi exhausted its internal retries. Holds the provider's errorMessage
|
|
89
|
+
* so callers can report the real cause instead of an empty completion.
|
|
90
|
+
*/
|
|
91
|
+
modelError: string | undefined;
|
|
77
92
|
private textDeltaAccum;
|
|
78
93
|
private buf;
|
|
79
94
|
constructor(opts: RunChildJsonEventsOptions,
|
|
@@ -24,6 +24,13 @@ export class JsonEventSink {
|
|
|
24
24
|
onLoopKill;
|
|
25
25
|
/** Final assistant text from the agent_end event, if one arrived. */
|
|
26
26
|
finalText = '';
|
|
27
|
+
/**
|
|
28
|
+
* Set when the final assistant turn carried stopReason "error" — i.e. the
|
|
29
|
+
* model/provider failed (disconnect, fetch failed, socket hang up, 5xx)
|
|
30
|
+
* after pi exhausted its internal retries. Holds the provider's errorMessage
|
|
31
|
+
* so callers can report the real cause instead of an empty completion.
|
|
32
|
+
*/
|
|
33
|
+
modelError = undefined;
|
|
27
34
|
textDeltaAccum = '';
|
|
28
35
|
// json-events lines can split across data chunks; this holds the trailing
|
|
29
36
|
// partial line between feeds so events spanning a boundary still parse. We
|
|
@@ -97,7 +104,20 @@ export class JsonEventSink {
|
|
|
97
104
|
if (t === 'agent_end' && Array.isArray(evt.messages)) {
|
|
98
105
|
for (let i = evt.messages.length - 1; i >= 0; i--) {
|
|
99
106
|
const m = evt.messages[i];
|
|
100
|
-
if (m
|
|
107
|
+
if (!m || m.role !== 'assistant')
|
|
108
|
+
continue;
|
|
109
|
+
// A model failure (disconnect, fetch failed, socket hang up, 5xx
|
|
110
|
+
// after pi's own retries) arrives as an assistant message with
|
|
111
|
+
// stopReason "error" and the real cause in errorMessage — but
|
|
112
|
+
// EMPTY text content. Capture it so the phase reports the actual
|
|
113
|
+
// failure instead of the useless "produced no output".
|
|
114
|
+
if (m.stopReason === 'error'
|
|
115
|
+
&& typeof m.errorMessage === 'string'
|
|
116
|
+
&& m.errorMessage.length > 0
|
|
117
|
+
&& this.modelError === undefined) {
|
|
118
|
+
this.modelError = m.errorMessage;
|
|
119
|
+
}
|
|
120
|
+
if (Array.isArray(m.content)) {
|
|
101
121
|
const texts = [];
|
|
102
122
|
for (const c of m.content) {
|
|
103
123
|
if (c?.type === 'text' && typeof c.text === 'string') {
|
|
@@ -186,7 +206,14 @@ export function runChild(spawn, invocation, cwd, signal, opts) {
|
|
|
186
206
|
if (sink)
|
|
187
207
|
sink.flush();
|
|
188
208
|
const text = sink ? sink.text : undefined;
|
|
189
|
-
resolve({
|
|
209
|
+
resolve({
|
|
210
|
+
stdout,
|
|
211
|
+
stderr,
|
|
212
|
+
exitCode: code ?? 0,
|
|
213
|
+
aborted,
|
|
214
|
+
text,
|
|
215
|
+
modelError: sink?.modelError
|
|
216
|
+
});
|
|
190
217
|
});
|
|
191
218
|
proc.on('error', () => {
|
|
192
219
|
resolve({ stdout, stderr, exitCode: 1, aborted });
|
|
@@ -280,6 +280,16 @@ export async function runAutoLoop(ctx, cwd, id, deps) {
|
|
|
280
280
|
active.ui.notify(`${id} paused — could not start a session. Run /task-auto-resume to retry.`, 'warning');
|
|
281
281
|
return;
|
|
282
282
|
}
|
|
283
|
+
if (res.interrupted) {
|
|
284
|
+
// The user interrupted implementation (ESC) and then declined to
|
|
285
|
+
// steer (empty steer prompt) — they want to stop here. Pause
|
|
286
|
+
// without checking the task off, so /task-auto-resume re-delivers
|
|
287
|
+
// this task's spec to finish it. (A plain ESC that the user
|
|
288
|
+
// follows with steering text never reaches here — that loops on
|
|
289
|
+
// the same task inside runSingleTask until a turn completes.)
|
|
290
|
+
active.ui.notify(`${id} paused at "${next.title}" — resume with /task-auto-resume.`, 'warning');
|
|
291
|
+
return;
|
|
292
|
+
}
|
|
283
293
|
if (!res.ok) {
|
|
284
294
|
await updateTaskFrontMatter(cwd, id, { state: 'failed' });
|
|
285
295
|
active.ui.notify(`${id} stopped at "${next.title}" — fix and run /task-auto-resume.`, 'error');
|
|
@@ -16,6 +16,8 @@ export interface PhaseRunResult {
|
|
|
16
16
|
loopHit?: LoopHit;
|
|
17
17
|
/** Set when the assistant text contains an unexecuted, leaked tool call. */
|
|
18
18
|
leakedToolCall?: string;
|
|
19
|
+
/** Set when the child's final turn failed with stopReason "error" (model/provider failure). */
|
|
20
|
+
modelError?: string;
|
|
19
21
|
}
|
|
20
22
|
export declare function childArgs(tools: string, prompt: string): string[];
|
|
21
23
|
export declare const USER_CANCELLED = "__user_cancelled__";
|
|
@@ -74,6 +76,22 @@ export declare class LoopExhaustedError extends Error {
|
|
|
74
76
|
readonly history: LoopHit[];
|
|
75
77
|
constructor(phase: string, history: LoopHit[]);
|
|
76
78
|
}
|
|
79
|
+
/**
|
|
80
|
+
* Thrown when a phase child's final turn failed with stopReason "error" — the
|
|
81
|
+
* model/provider died (local model disconnect, fetch failed, socket hang up,
|
|
82
|
+
* provider 5xx) after pi exhausted its own internal retries. pi reports this as
|
|
83
|
+
* an agent_end with empty assistant text, which would otherwise surface as the
|
|
84
|
+
* misleading "produced no output"; this names the real cause instead.
|
|
85
|
+
*
|
|
86
|
+
* Fail-fast: not retried at the pi-task layer. pi already retried the retryable
|
|
87
|
+
* cases; re-spawning a fresh child against the same dead endpoint only burns
|
|
88
|
+
* time and buries the real error. Restart the model/provider, then resume.
|
|
89
|
+
*/
|
|
90
|
+
export declare class ModelError extends Error {
|
|
91
|
+
readonly phase: string;
|
|
92
|
+
readonly cause: string;
|
|
93
|
+
constructor(phase: string, cause: string);
|
|
94
|
+
}
|
|
77
95
|
/**
|
|
78
96
|
* Thrown when a phase child repeatedly wrote a tool call as plain text (a markup
|
|
79
97
|
* dialect pi's harness didn't parse) instead of invoking it. The call never ran,
|
|
@@ -69,6 +69,7 @@ export async function runChild(cwd, tools, prompt, signal, onLine, onContextUsag
|
|
|
69
69
|
exitCode: result.exitCode,
|
|
70
70
|
stderr: result.stderr.trim(),
|
|
71
71
|
loopHit,
|
|
72
|
+
modelError: result.modelError,
|
|
72
73
|
// A tool call the model wrote as text (wrong dialect) never executed and
|
|
73
74
|
// sailed past the structured-event guards above; flag it so the wrappers
|
|
74
75
|
// can re-prompt instead of accepting the unexecuted call. Only meaningful
|
|
@@ -90,6 +91,11 @@ export async function runPhaseChild(deps, name, tools, prompt) {
|
|
|
90
91
|
if (r.exitCode !== 0) {
|
|
91
92
|
throw new Error(`${name} child failed: ${r.stderr || '(no stderr)'}`);
|
|
92
93
|
}
|
|
94
|
+
if (r.modelError) {
|
|
95
|
+
// The model/provider failed (pi exited 0 with an stopReason "error"
|
|
96
|
+
// turn). Surface the real cause and fail fast — pi already retried.
|
|
97
|
+
throw new ModelError(name, r.modelError);
|
|
98
|
+
}
|
|
93
99
|
if (r.text.trim().length === 0) {
|
|
94
100
|
// An empty completion (exit 0, no assistant text, no stderr) is almost
|
|
95
101
|
// always transient — a model/API error swallowed inside --mode json,
|
|
@@ -161,6 +167,11 @@ export async function runPhaseWithLoopGuard(deps, name, tools, buildPrompt) {
|
|
|
161
167
|
if (r.exitCode !== 0) {
|
|
162
168
|
throw new Error(`${name} child failed: ${r.stderr || '(no stderr)'}`);
|
|
163
169
|
}
|
|
170
|
+
if (r.modelError) {
|
|
171
|
+
// The model/provider failed (pi exited 0 with a stopReason "error"
|
|
172
|
+
// turn). Surface the real cause and fail fast — pi already retried.
|
|
173
|
+
throw new ModelError(name, r.modelError);
|
|
174
|
+
}
|
|
164
175
|
if (r.text.trim().length === 0) {
|
|
165
176
|
// An empty completion (exit 0, no assistant text, no stderr) is almost
|
|
166
177
|
// always transient — a model/API error swallowed inside --mode json,
|
|
@@ -210,6 +221,28 @@ export class LoopExhaustedError extends Error {
|
|
|
210
221
|
this.name = 'LoopExhaustedError';
|
|
211
222
|
}
|
|
212
223
|
}
|
|
224
|
+
// ─── ModelError ──────────────────────────────────────────────────────────────
|
|
225
|
+
/**
|
|
226
|
+
* Thrown when a phase child's final turn failed with stopReason "error" — the
|
|
227
|
+
* model/provider died (local model disconnect, fetch failed, socket hang up,
|
|
228
|
+
* provider 5xx) after pi exhausted its own internal retries. pi reports this as
|
|
229
|
+
* an agent_end with empty assistant text, which would otherwise surface as the
|
|
230
|
+
* misleading "produced no output"; this names the real cause instead.
|
|
231
|
+
*
|
|
232
|
+
* Fail-fast: not retried at the pi-task layer. pi already retried the retryable
|
|
233
|
+
* cases; re-spawning a fresh child against the same dead endpoint only burns
|
|
234
|
+
* time and buries the real error. Restart the model/provider, then resume.
|
|
235
|
+
*/
|
|
236
|
+
export class ModelError extends Error {
|
|
237
|
+
phase;
|
|
238
|
+
cause;
|
|
239
|
+
constructor(phase, cause) {
|
|
240
|
+
super(`${phase} child: model error — ${cause}`);
|
|
241
|
+
this.phase = phase;
|
|
242
|
+
this.cause = cause;
|
|
243
|
+
this.name = 'ModelError';
|
|
244
|
+
}
|
|
245
|
+
}
|
|
213
246
|
// ─── LeakedToolCallError ─────────────────────────────────────────────────────
|
|
214
247
|
/**
|
|
215
248
|
* Thrown when a phase child repeatedly wrote a tool call as plain text (a markup
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { updateTaskFrontMatter } from './task-io.js';
|
|
6
6
|
import { flashTerminalWidget } from './widget.js';
|
|
7
|
-
import { LoopExhaustedError, LeakedToolCallError, USER_CANCELLED } from './child-runner.js';
|
|
7
|
+
import { LoopExhaustedError, LeakedToolCallError, ModelError, USER_CANCELLED } from './child-runner.js';
|
|
8
8
|
// ─── Classifier ──────────────────────────────────────────────────────────────
|
|
9
9
|
export function classifyFailure(err, aborted) {
|
|
10
10
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -29,6 +29,15 @@ export function classifyFailure(err, aborted) {
|
|
|
29
29
|
level: 'error'
|
|
30
30
|
};
|
|
31
31
|
}
|
|
32
|
+
if (err instanceof ModelError) {
|
|
33
|
+
return {
|
|
34
|
+
state: 'failed',
|
|
35
|
+
reason: `model_error in ${err.phase}: ${err.cause.slice(0, 160)}`,
|
|
36
|
+
flash: 'model_error',
|
|
37
|
+
notify: `failed: ${err.phase} — model error: ${err.cause.slice(0, 120)}. Restart the model, then resume.`,
|
|
38
|
+
level: 'error'
|
|
39
|
+
};
|
|
40
|
+
}
|
|
32
41
|
if (msg === 'no_verify_block') {
|
|
33
42
|
return {
|
|
34
43
|
state: 'failed',
|
|
@@ -69,6 +69,14 @@ export interface RunSingleTaskOptions {
|
|
|
69
69
|
* work. Lets callers record the id (e.g. stamp the /task-auto entry) so an
|
|
70
70
|
* interrupted run can be resumed instead of restarted. */
|
|
71
71
|
onStart?: (taskId: string) => void | Promise<void>;
|
|
72
|
+
/**
|
|
73
|
+
* Ask the user for a steering message after they interrupt (ESC) the
|
|
74
|
+
* implementation turn. Return text to continue the same task as another turn,
|
|
75
|
+
* or undefined/empty to pause the run. Only consulted with
|
|
76
|
+
* waitForImplementation. Defaults to a ctx.ui.input prompt; injectable so the
|
|
77
|
+
* steer loop is testable without a real dialog.
|
|
78
|
+
*/
|
|
79
|
+
promptSteer?: (ctx: ExtensionCommandContext) => Promise<string | undefined>;
|
|
72
80
|
}
|
|
73
81
|
export interface RunSingleTaskResult {
|
|
74
82
|
taskId: string;
|
|
@@ -83,6 +91,16 @@ export interface RunSingleTaskResult {
|
|
|
83
91
|
* only so test fakes that don't model session replacement can omit it.
|
|
84
92
|
*/
|
|
85
93
|
ctx?: ExtensionCommandContext;
|
|
94
|
+
/**
|
|
95
|
+
* Set when the user interrupted the implementation (ESC) and then declined to
|
|
96
|
+
* steer (submitted an empty steer prompt) — i.e. they want the run to pause
|
|
97
|
+
* rather than continue. Only meaningful with waitForImplementation. The
|
|
98
|
+
* /task-auto loop reads this to pause (resumable) instead of checking the task
|
|
99
|
+
* off and advancing. A plain ESC that the user follows with steering text does
|
|
100
|
+
* NOT set this — that case loops on the same task until a turn finishes
|
|
101
|
+
* uninterrupted.
|
|
102
|
+
*/
|
|
103
|
+
interrupted?: boolean;
|
|
86
104
|
}
|
|
87
105
|
/**
|
|
88
106
|
* Run one prompt through the full single-task pipeline in a fresh session and
|
|
@@ -268,6 +268,50 @@ export class TaskRunner {
|
|
|
268
268
|
}
|
|
269
269
|
}
|
|
270
270
|
}
|
|
271
|
+
/** Dialog copy for the post-interrupt steering prompt. */
|
|
272
|
+
const STEER_TITLE = 'Paused — steer the model';
|
|
273
|
+
const STEER_PLACEHOLDER = 'Type guidance to continue this task, or leave empty to pause';
|
|
274
|
+
/**
|
|
275
|
+
* True when the most recent assistant turn ended because the user interrupted it
|
|
276
|
+
* (pressed ESC). pi records a user abort as stopReason "aborted" on the assistant
|
|
277
|
+
* message, distinct from a natural "stop". Read after the implementation wait so
|
|
278
|
+
* the /task-auto loop can tell "user wants to steer" apart from "task finished".
|
|
279
|
+
*/
|
|
280
|
+
function wasInterrupted(ctx) {
|
|
281
|
+
const entries = ctx.sessionManager.getEntries();
|
|
282
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
283
|
+
const e = entries[i];
|
|
284
|
+
if ('message' in e && 'role' in e.message && e.message.role === 'assistant') {
|
|
285
|
+
return e.message.stopReason === 'aborted';
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
return false;
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* After the implementation turn settles, honour a user ESC by letting them steer.
|
|
292
|
+
*
|
|
293
|
+
* `waitForIdle` resolves both on natural completion AND on an ESC (which aborts
|
|
294
|
+
* the turn → idle). When the last turn was aborted, the host's main input loop is
|
|
295
|
+
* blocked inside our command handler, so a message typed in the editor would only
|
|
296
|
+
* queue, never run (interactive-mode routes idle input through onInputCallback,
|
|
297
|
+
* which is unset while we hold the loop). We therefore solicit the steering text
|
|
298
|
+
* ourselves and feed it back as another turn via sendUserMessage — which runs to
|
|
299
|
+
* completion when the session is idle. Repeat until a turn finishes uninterrupted.
|
|
300
|
+
*
|
|
301
|
+
* Returns true when the user declined to steer (empty/cancelled) and the run
|
|
302
|
+
* should pause; false when the implementation completed (steered or not).
|
|
303
|
+
*/
|
|
304
|
+
async function steerUntilDone(ctx, promptSteer) {
|
|
305
|
+
const ask = promptSteer ?? (c => c.ui.input(STEER_TITLE, STEER_PLACEHOLDER));
|
|
306
|
+
while (wasInterrupted(ctx)) {
|
|
307
|
+
const steer = await ask(ctx);
|
|
308
|
+
if (steer === undefined || steer.trim().length === 0)
|
|
309
|
+
return true; // pause
|
|
310
|
+
await ctx.sendUserMessage(steer);
|
|
311
|
+
await ctx.waitForIdle();
|
|
312
|
+
}
|
|
313
|
+
return false;
|
|
314
|
+
}
|
|
271
315
|
/**
|
|
272
316
|
* Run one prompt through the full single-task pipeline in a fresh session and
|
|
273
317
|
* deliver its spec. With waitForImplementation, block until the agent finishes
|
|
@@ -280,14 +324,17 @@ export async function runSingleTask(ctx, cwd, rawPrompt, opts = {}) {
|
|
|
280
324
|
// UI after the original ctx is torn down. Defaults to the original for the
|
|
281
325
|
// cancellation path (where no replacement occurs).
|
|
282
326
|
let freshCtx = ctx;
|
|
327
|
+
let interrupted = false;
|
|
283
328
|
const result = await ctx.newSession({
|
|
284
329
|
withSession: async (newCtx) => {
|
|
285
330
|
freshCtx = newCtx;
|
|
286
331
|
getBridge().currentCtx = newCtx; // keep remote dispatch ctx fresh across session replacement
|
|
287
332
|
const runner = new TaskRunner(newCtx, cwd, rawPrompt, opts.resumeId, async (spec) => {
|
|
288
333
|
await newCtx.sendUserMessage(spec);
|
|
289
|
-
if (opts.waitForImplementation)
|
|
334
|
+
if (opts.waitForImplementation) {
|
|
290
335
|
await newCtx.waitForIdle();
|
|
336
|
+
interrupted = await steerUntilDone(newCtx, opts.promptSteer);
|
|
337
|
+
}
|
|
291
338
|
}, opts.spawnFn, opts.onStart);
|
|
292
339
|
await runner.run();
|
|
293
340
|
taskId = runner.taskId;
|
|
@@ -307,7 +354,7 @@ export async function runSingleTask(ctx, cwd, rawPrompt, opts = {}) {
|
|
|
307
354
|
ok = false;
|
|
308
355
|
}
|
|
309
356
|
}
|
|
310
|
-
return { taskId, ok, sessionCancelled: false, ctx: freshCtx };
|
|
357
|
+
return { taskId, ok, sessionCancelled: false, ctx: freshCtx, interrupted };
|
|
311
358
|
}
|
|
312
359
|
// ─── Command handlers ────────────────────────────────────────────────────────
|
|
313
360
|
async function handleTask(args, ctx) {
|
package/dist/task/phases.js
CHANGED
|
@@ -450,7 +450,7 @@ export async function critiqueWithFallback(d, p) {
|
|
|
450
450
|
const msg = err instanceof Error ? err.message : String(err);
|
|
451
451
|
if (msg !== 'no_verify_block')
|
|
452
452
|
throw err;
|
|
453
|
-
p.ctx.ui.notify(
|
|
453
|
+
p.ctx.ui.notify("Critique couldn't produce a VERIFY block — using compose draft. Edit the spec manually if needed.", 'warning');
|
|
454
454
|
return p.spec;
|
|
455
455
|
}
|
|
456
456
|
}
|
|
@@ -14,10 +14,10 @@ function buildFtsQuery(tokens) {
|
|
|
14
14
|
}
|
|
15
15
|
function fallbackChunks(cache, name, version) {
|
|
16
16
|
const dts = cache.db
|
|
17
|
-
.prepare(
|
|
17
|
+
.prepare("SELECT file_path, kind, content, 0 AS rank FROM chunks WHERE name = ? AND version = ? AND kind = 'dts' ORDER BY file_path, id LIMIT 1")
|
|
18
18
|
.all(name, version);
|
|
19
19
|
const readme = cache.db
|
|
20
|
-
.prepare(
|
|
20
|
+
.prepare("SELECT file_path, kind, content, 0 AS rank FROM chunks WHERE name = ? AND version = ? AND kind = 'readme' ORDER BY id LIMIT 1")
|
|
21
21
|
.all(name, version);
|
|
22
22
|
const out = [];
|
|
23
23
|
for (const r of dts) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mjasnikovs/pi-task",
|
|
3
|
-
"version": "0.13.
|
|
3
|
+
"version": "0.13.12",
|
|
4
4
|
"description": "Deterministic spec-orchestration for local models, with a bundled real-time remote web view and web/docs/fetch/worker subagent tools.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|