@mjasnikovs/pi-task 0.13.10 → 0.13.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,14 @@ export interface ChildResult {
21
21
  aborted: boolean;
22
22
  /** Extracted assistant text (only populated in json-events mode). */
23
23
  text?: string;
24
+ /**
25
+ * The model-failure cause, when the child's final turn carried
26
+ * stopReason "error" (provider/connection failure after pi exhausted its
27
+ * own retries). pi emits this as an agent_end whose assistant message has
28
+ * empty text, so without it the phase would mis-report "produced no output".
29
+ * Only populated in json-events mode.
30
+ */
31
+ modelError?: string;
24
32
  }
25
33
  export interface ToolCall {
26
34
  name: string;
@@ -74,6 +82,13 @@ export declare class JsonEventSink {
74
82
  private readonly onLoopKill;
75
83
  /** Final assistant text from the agent_end event, if one arrived. */
76
84
  finalText: string;
85
+ /**
86
+ * Set when the final assistant turn carried stopReason "error" — i.e. the
87
+ * model/provider failed (disconnect, fetch failed, socket hang up, 5xx)
88
+ * after pi exhausted its internal retries. Holds the provider's errorMessage
89
+ * so callers can report the real cause instead of an empty completion.
90
+ */
91
+ modelError: string | undefined;
77
92
  private textDeltaAccum;
78
93
  private buf;
79
94
  constructor(opts: RunChildJsonEventsOptions,
@@ -24,6 +24,13 @@ export class JsonEventSink {
24
24
  onLoopKill;
25
25
  /** Final assistant text from the agent_end event, if one arrived. */
26
26
  finalText = '';
27
+ /**
28
+ * Set when the final assistant turn carried stopReason "error" — i.e. the
29
+ * model/provider failed (disconnect, fetch failed, socket hang up, 5xx)
30
+ * after pi exhausted its internal retries. Holds the provider's errorMessage
31
+ * so callers can report the real cause instead of an empty completion.
32
+ */
33
+ modelError = undefined;
27
34
  textDeltaAccum = '';
28
35
  // json-events lines can split across data chunks; this holds the trailing
29
36
  // partial line between feeds so events spanning a boundary still parse. We
@@ -97,7 +104,20 @@ export class JsonEventSink {
97
104
  if (t === 'agent_end' && Array.isArray(evt.messages)) {
98
105
  for (let i = evt.messages.length - 1; i >= 0; i--) {
99
106
  const m = evt.messages[i];
100
- if (m && m.role === 'assistant' && Array.isArray(m.content)) {
107
+ if (!m || m.role !== 'assistant')
108
+ continue;
109
+ // A model failure (disconnect, fetch failed, socket hang up, 5xx
110
+ // after pi's own retries) arrives as an assistant message with
111
+ // stopReason "error" and the real cause in errorMessage — but
112
+ // EMPTY text content. Capture it so the phase reports the actual
113
+ // failure instead of the useless "produced no output".
114
+ if (m.stopReason === 'error'
115
+ && typeof m.errorMessage === 'string'
116
+ && m.errorMessage.length > 0
117
+ && this.modelError === undefined) {
118
+ this.modelError = m.errorMessage;
119
+ }
120
+ if (Array.isArray(m.content)) {
101
121
  const texts = [];
102
122
  for (const c of m.content) {
103
123
  if (c?.type === 'text' && typeof c.text === 'string') {
@@ -186,7 +206,14 @@ export function runChild(spawn, invocation, cwd, signal, opts) {
186
206
  if (sink)
187
207
  sink.flush();
188
208
  const text = sink ? sink.text : undefined;
189
- resolve({ stdout, stderr, exitCode: code ?? 0, aborted, text });
209
+ resolve({
210
+ stdout,
211
+ stderr,
212
+ exitCode: code ?? 0,
213
+ aborted,
214
+ text,
215
+ modelError: sink?.modelError
216
+ });
190
217
  });
191
218
  proc.on('error', () => {
192
219
  resolve({ stdout, stderr, exitCode: 1, aborted });
@@ -280,6 +280,16 @@ export async function runAutoLoop(ctx, cwd, id, deps) {
280
280
  active.ui.notify(`${id} paused — could not start a session. Run /task-auto-resume to retry.`, 'warning');
281
281
  return;
282
282
  }
283
+ if (res.interrupted) {
284
+ // The user interrupted implementation (ESC) and then declined to
285
+ // steer (empty steer prompt) — they want to stop here. Pause
286
+ // without checking the task off, so /task-auto-resume re-delivers
287
+ // this task's spec to finish it. (A plain ESC that the user
288
+ // follows with steering text never reaches here — that loops on
289
+ // the same task inside runSingleTask until a turn completes.)
290
+ active.ui.notify(`${id} paused at "${next.title}" — resume with /task-auto-resume.`, 'warning');
291
+ return;
292
+ }
283
293
  if (!res.ok) {
284
294
  await updateTaskFrontMatter(cwd, id, { state: 'failed' });
285
295
  active.ui.notify(`${id} stopped at "${next.title}" — fix and run /task-auto-resume.`, 'error');
@@ -16,6 +16,8 @@ export interface PhaseRunResult {
16
16
  loopHit?: LoopHit;
17
17
  /** Set when the assistant text contains an unexecuted, leaked tool call. */
18
18
  leakedToolCall?: string;
19
+ /** Set when the child's final turn failed with stopReason "error" (model/provider failure). */
20
+ modelError?: string;
19
21
  }
20
22
  export declare function childArgs(tools: string, prompt: string): string[];
21
23
  export declare const USER_CANCELLED = "__user_cancelled__";
@@ -74,6 +76,22 @@ export declare class LoopExhaustedError extends Error {
74
76
  readonly history: LoopHit[];
75
77
  constructor(phase: string, history: LoopHit[]);
76
78
  }
79
+ /**
80
+ * Thrown when a phase child's final turn failed with stopReason "error" — the
81
+ * model/provider died (local model disconnect, fetch failed, socket hang up,
82
+ * provider 5xx) after pi exhausted its own internal retries. pi reports this as
83
+ * an agent_end with empty assistant text, which would otherwise surface as the
84
+ * misleading "produced no output"; this names the real cause instead.
85
+ *
86
+ * Fail-fast: not retried at the pi-task layer. pi already retried the retryable
87
+ * cases; re-spawning a fresh child against the same dead endpoint only burns
88
+ * time and buries the real error. Restart the model/provider, then resume.
89
+ */
90
+ export declare class ModelError extends Error {
91
+ readonly phase: string;
92
+ readonly cause: string;
93
+ constructor(phase: string, cause: string);
94
+ }
77
95
  /**
78
96
  * Thrown when a phase child repeatedly wrote a tool call as plain text (a markup
79
97
  * dialect pi's harness didn't parse) instead of invoking it. The call never ran,
@@ -69,6 +69,7 @@ export async function runChild(cwd, tools, prompt, signal, onLine, onContextUsag
69
69
  exitCode: result.exitCode,
70
70
  stderr: result.stderr.trim(),
71
71
  loopHit,
72
+ modelError: result.modelError,
72
73
  // A tool call the model wrote as text (wrong dialect) never executed and
73
74
  // sailed past the structured-event guards above; flag it so the wrappers
74
75
  // can re-prompt instead of accepting the unexecuted call. Only meaningful
@@ -90,6 +91,11 @@ export async function runPhaseChild(deps, name, tools, prompt) {
90
91
  if (r.exitCode !== 0) {
91
92
  throw new Error(`${name} child failed: ${r.stderr || '(no stderr)'}`);
92
93
  }
94
+ if (r.modelError) {
95
+ // The model/provider failed (pi exited 0 with an stopReason "error"
96
+ // turn). Surface the real cause and fail fast — pi already retried.
97
+ throw new ModelError(name, r.modelError);
98
+ }
93
99
  if (r.text.trim().length === 0) {
94
100
  // An empty completion (exit 0, no assistant text, no stderr) is almost
95
101
  // always transient — a model/API error swallowed inside --mode json,
@@ -161,6 +167,11 @@ export async function runPhaseWithLoopGuard(deps, name, tools, buildPrompt) {
161
167
  if (r.exitCode !== 0) {
162
168
  throw new Error(`${name} child failed: ${r.stderr || '(no stderr)'}`);
163
169
  }
170
+ if (r.modelError) {
171
+ // The model/provider failed (pi exited 0 with a stopReason "error"
172
+ // turn). Surface the real cause and fail fast — pi already retried.
173
+ throw new ModelError(name, r.modelError);
174
+ }
164
175
  if (r.text.trim().length === 0) {
165
176
  // An empty completion (exit 0, no assistant text, no stderr) is almost
166
177
  // always transient — a model/API error swallowed inside --mode json,
@@ -210,6 +221,28 @@ export class LoopExhaustedError extends Error {
210
221
  this.name = 'LoopExhaustedError';
211
222
  }
212
223
  }
224
+ // ─── ModelError ──────────────────────────────────────────────────────────────
225
+ /**
226
+ * Thrown when a phase child's final turn failed with stopReason "error" — the
227
+ * model/provider died (local model disconnect, fetch failed, socket hang up,
228
+ * provider 5xx) after pi exhausted its own internal retries. pi reports this as
229
+ * an agent_end with empty assistant text, which would otherwise surface as the
230
+ * misleading "produced no output"; this names the real cause instead.
231
+ *
232
+ * Fail-fast: not retried at the pi-task layer. pi already retried the retryable
233
+ * cases; re-spawning a fresh child against the same dead endpoint only burns
234
+ * time and buries the real error. Restart the model/provider, then resume.
235
+ */
236
+ export class ModelError extends Error {
237
+ phase;
238
+ cause;
239
+ constructor(phase, cause) {
240
+ super(`${phase} child: model error — ${cause}`);
241
+ this.phase = phase;
242
+ this.cause = cause;
243
+ this.name = 'ModelError';
244
+ }
245
+ }
213
246
  // ─── LeakedToolCallError ─────────────────────────────────────────────────────
214
247
  /**
215
248
  * Thrown when a phase child repeatedly wrote a tool call as plain text (a markup
@@ -4,7 +4,7 @@
4
4
  */
5
5
  import { updateTaskFrontMatter } from './task-io.js';
6
6
  import { flashTerminalWidget } from './widget.js';
7
- import { LoopExhaustedError, LeakedToolCallError, USER_CANCELLED } from './child-runner.js';
7
+ import { LoopExhaustedError, LeakedToolCallError, ModelError, USER_CANCELLED } from './child-runner.js';
8
8
  // ─── Classifier ──────────────────────────────────────────────────────────────
9
9
  export function classifyFailure(err, aborted) {
10
10
  const msg = err instanceof Error ? err.message : String(err);
@@ -29,6 +29,15 @@ export function classifyFailure(err, aborted) {
29
29
  level: 'error'
30
30
  };
31
31
  }
32
+ if (err instanceof ModelError) {
33
+ return {
34
+ state: 'failed',
35
+ reason: `model_error in ${err.phase}: ${err.cause.slice(0, 160)}`,
36
+ flash: 'model_error',
37
+ notify: `failed: ${err.phase} — model error: ${err.cause.slice(0, 120)}. Restart the model, then resume.`,
38
+ level: 'error'
39
+ };
40
+ }
32
41
  if (msg === 'no_verify_block') {
33
42
  return {
34
43
  state: 'failed',
@@ -69,6 +69,14 @@ export interface RunSingleTaskOptions {
69
69
  * work. Lets callers record the id (e.g. stamp the /task-auto entry) so an
70
70
  * interrupted run can be resumed instead of restarted. */
71
71
  onStart?: (taskId: string) => void | Promise<void>;
72
+ /**
73
+ * Ask the user for a steering message after they interrupt (ESC) the
74
+ * implementation turn. Return text to continue the same task as another turn,
75
+ * or undefined/empty to pause the run. Only consulted with
76
+ * waitForImplementation. Defaults to a ctx.ui.input prompt; injectable so the
77
+ * steer loop is testable without a real dialog.
78
+ */
79
+ promptSteer?: (ctx: ExtensionCommandContext) => Promise<string | undefined>;
72
80
  }
73
81
  export interface RunSingleTaskResult {
74
82
  taskId: string;
@@ -83,6 +91,16 @@ export interface RunSingleTaskResult {
83
91
  * only so test fakes that don't model session replacement can omit it.
84
92
  */
85
93
  ctx?: ExtensionCommandContext;
94
+ /**
95
+ * Set when the user interrupted the implementation (ESC) and then declined to
96
+ * steer (submitted an empty steer prompt) — i.e. they want the run to pause
97
+ * rather than continue. Only meaningful with waitForImplementation. The
98
+ * /task-auto loop reads this to pause (resumable) instead of checking the task
99
+ * off and advancing. A plain ESC that the user follows with steering text does
100
+ * NOT set this — that case loops on the same task until a turn finishes
101
+ * uninterrupted.
102
+ */
103
+ interrupted?: boolean;
86
104
  }
87
105
  /**
88
106
  * Run one prompt through the full single-task pipeline in a fresh session and
@@ -268,6 +268,50 @@ export class TaskRunner {
268
268
  }
269
269
  }
270
270
  }
271
+ /** Dialog copy for the post-interrupt steering prompt. */
272
+ const STEER_TITLE = 'Paused — steer the model';
273
+ const STEER_PLACEHOLDER = 'Type guidance to continue this task, or leave empty to pause';
274
+ /**
275
+ * True when the most recent assistant turn ended because the user interrupted it
276
+ * (pressed ESC). pi records a user abort as stopReason "aborted" on the assistant
277
+ * message, distinct from a natural "stop". Read after the implementation wait so
278
+ * the /task-auto loop can tell "user wants to steer" apart from "task finished".
279
+ */
280
+ function wasInterrupted(ctx) {
281
+ const entries = ctx.sessionManager.getEntries();
282
+ for (let i = entries.length - 1; i >= 0; i--) {
283
+ const e = entries[i];
284
+ if ('message' in e && 'role' in e.message && e.message.role === 'assistant') {
285
+ return e.message.stopReason === 'aborted';
286
+ }
287
+ }
288
+ return false;
289
+ }
290
+ /**
291
+ * After the implementation turn settles, honour a user ESC by letting them steer.
292
+ *
293
+ * `waitForIdle` resolves both on natural completion AND on an ESC (which aborts
294
+ * the turn → idle). When the last turn was aborted, the host's main input loop is
295
+ * blocked inside our command handler, so a message typed in the editor would only
296
+ * queue, never run (interactive-mode routes idle input through onInputCallback,
297
+ * which is unset while we hold the loop). We therefore solicit the steering text
298
+ * ourselves and feed it back as another turn via sendUserMessage — which runs to
299
+ * completion when the session is idle. Repeat until a turn finishes uninterrupted.
300
+ *
301
+ * Returns true when the user declined to steer (empty/cancelled) and the run
302
+ * should pause; false when the implementation completed (steered or not).
303
+ */
304
+ async function steerUntilDone(ctx, promptSteer) {
305
+ const ask = promptSteer ?? (c => c.ui.input(STEER_TITLE, STEER_PLACEHOLDER));
306
+ while (wasInterrupted(ctx)) {
307
+ const steer = await ask(ctx);
308
+ if (steer === undefined || steer.trim().length === 0)
309
+ return true; // pause
310
+ await ctx.sendUserMessage(steer);
311
+ await ctx.waitForIdle();
312
+ }
313
+ return false;
314
+ }
271
315
  /**
272
316
  * Run one prompt through the full single-task pipeline in a fresh session and
273
317
  * deliver its spec. With waitForImplementation, block until the agent finishes
@@ -280,14 +324,17 @@ export async function runSingleTask(ctx, cwd, rawPrompt, opts = {}) {
280
324
  // UI after the original ctx is torn down. Defaults to the original for the
281
325
  // cancellation path (where no replacement occurs).
282
326
  let freshCtx = ctx;
327
+ let interrupted = false;
283
328
  const result = await ctx.newSession({
284
329
  withSession: async (newCtx) => {
285
330
  freshCtx = newCtx;
286
331
  getBridge().currentCtx = newCtx; // keep remote dispatch ctx fresh across session replacement
287
332
  const runner = new TaskRunner(newCtx, cwd, rawPrompt, opts.resumeId, async (spec) => {
288
333
  await newCtx.sendUserMessage(spec);
289
- if (opts.waitForImplementation)
334
+ if (opts.waitForImplementation) {
290
335
  await newCtx.waitForIdle();
336
+ interrupted = await steerUntilDone(newCtx, opts.promptSteer);
337
+ }
291
338
  }, opts.spawnFn, opts.onStart);
292
339
  await runner.run();
293
340
  taskId = runner.taskId;
@@ -307,7 +354,7 @@ export async function runSingleTask(ctx, cwd, rawPrompt, opts = {}) {
307
354
  ok = false;
308
355
  }
309
356
  }
310
- return { taskId, ok, sessionCancelled: false, ctx: freshCtx };
357
+ return { taskId, ok, sessionCancelled: false, ctx: freshCtx, interrupted };
311
358
  }
312
359
  // ─── Command handlers ────────────────────────────────────────────────────────
313
360
  async function handleTask(args, ctx) {
@@ -450,7 +450,7 @@ export async function critiqueWithFallback(d, p) {
450
450
  const msg = err instanceof Error ? err.message : String(err);
451
451
  if (msg !== 'no_verify_block')
452
452
  throw err;
453
- p.ctx.ui.notify('Critique couldn\'t produce a VERIFY block — using compose draft. Edit the spec manually if needed.', 'warning');
453
+ p.ctx.ui.notify("Critique couldn't produce a VERIFY block — using compose draft. Edit the spec manually if needed.", 'warning');
454
454
  return p.spec;
455
455
  }
456
456
  }
@@ -14,10 +14,10 @@ function buildFtsQuery(tokens) {
14
14
  }
15
15
  function fallbackChunks(cache, name, version) {
16
16
  const dts = cache.db
17
- .prepare('SELECT file_path, kind, content, 0 AS rank FROM chunks WHERE name = ? AND version = ? AND kind = \'dts\' ORDER BY file_path, id LIMIT 1')
17
+ .prepare("SELECT file_path, kind, content, 0 AS rank FROM chunks WHERE name = ? AND version = ? AND kind = 'dts' ORDER BY file_path, id LIMIT 1")
18
18
  .all(name, version);
19
19
  const readme = cache.db
20
- .prepare('SELECT file_path, kind, content, 0 AS rank FROM chunks WHERE name = ? AND version = ? AND kind = \'readme\' ORDER BY id LIMIT 1')
20
+ .prepare("SELECT file_path, kind, content, 0 AS rank FROM chunks WHERE name = ? AND version = ? AND kind = 'readme' ORDER BY id LIMIT 1")
21
21
  .all(name, version);
22
22
  const out = [];
23
23
  for (const r of dts) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mjasnikovs/pi-task",
3
- "version": "0.13.10",
3
+ "version": "0.13.12",
4
4
  "description": "Deterministic spec-orchestration for local models, with a bundled real-time remote web view and web/docs/fetch/worker subagent tools.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",