@mjasnikovs/pi-task 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,318 @@
1
+ /**
2
+ * /task-auto — plans a feature into a resumable list of task titles, then runs
3
+ * each title through the existing single-task pipeline one at a time.
4
+ *
5
+ * This module currently holds the planning half (AutoDeps + planAuto). The run
6
+ * loop, command handlers, and defaultDeps are added by the next task.
7
+ */
8
+ import * as fsp from 'node:fs/promises';
9
+ import * as path from 'node:path';
10
+ import { runSingleTask } from './orchestrator.js';
11
+ import { parseClarifyList, deriveTitle } from './parsers.js';
12
+ import { renderInlineMarkdown, stripInlineMarkdown } from './inline-markdown.js';
13
+ import { AUTO_CLARIFY_PROMPT, AUTO_DECOMPOSE_PROMPT } from './auto-prompts.js';
14
+ import { allocateAutoId, buildAutoBody, parseDecomposeList, parseTaskList, checkOffTask, findResumableAuto } from './auto-io.js';
15
+ import { writeTaskFile, readTaskFile, updateTaskFrontMatter } from './task-io.js';
16
+ import { gitCommitAll } from './auto-commit.js';
17
+ import { runPhaseChild, USER_CANCELLED } from './child-runner.js';
18
+ import { startAutoLoader } from './widget.js';
19
+ // Matches pi's @-file completion token (a path after @, until whitespace).
20
+ const MENTION_RE = /(?:^|\s)@([^\s]+)/g;
21
+ /**
22
+ * Expand any @file references in the feature text by appending each referenced
23
+ * file's contents, so the planning children (clarify, decompose) always see the
24
+ * real spec inline instead of relying on the model to open the file itself.
25
+ * Without this, clarify on a one-line "Implement @spec.md" tends to bail with
26
+ * NONE because, to the model, the request looks small and unambiguous.
27
+ * Unreadable mentions (typos, non-file @tokens) are left untouched; the feature
28
+ * is returned verbatim when nothing readable is referenced.
29
+ */
30
+ export async function expandFeatureMentions(cwd, feature) {
31
+ const seen = new Set();
32
+ const blocks = [];
33
+ for (const m of feature.matchAll(MENTION_RE)) {
34
+ const rel = m[1];
35
+ if (seen.has(rel))
36
+ continue;
37
+ seen.add(rel);
38
+ try {
39
+ const body = await fsp.readFile(path.resolve(cwd, rel), 'utf8');
40
+ if (body.trim().length > 0) {
41
+ blocks.push(`--- contents of ${rel} ---\n${body.trim()}`);
42
+ }
43
+ }
44
+ catch {
45
+ // not a readable file — leave the @token in place, skip expansion
46
+ }
47
+ }
48
+ return blocks.length === 0 ? feature : `${feature.trim()}\n\n${blocks.join('\n\n')}`;
49
+ }
50
+ /** Plan phase: clarify → decompose → write AUTO file. Returns the new id, or null. */
51
+ export async function planAuto(ctx, cwd, feature, deps) {
52
+ // clarify — sequential & adaptive: ask one question at a time, feeding every
53
+ // answer back into the next call so later questions react to earlier ones
54
+ // (e.g. a framework choice reshapes what gets asked). Each question is shown
55
+ // with the model's recommended default pre-filled (Enter to accept, type to
56
+ // override); we never auto-answer. The model emits NONE when nothing remains.
57
+ const theme = ctx.ui.theme;
58
+ // Inline any @file spec the user referenced so clarify/decompose reason over
59
+ // the real content, not a one-line "Implement @file" that reads as trivial.
60
+ const featureForModel = await expandFeatureMentions(cwd, feature);
61
+ const answers = [];
62
+ // Open-ended: keep asking until the model emits NONE or the user dismisses.
63
+ for (;;) {
64
+ const qRaw = await deps.runChild('auto-clarify', 'read', AUTO_CLARIFY_PROMPT(featureForModel, answers.join('\n')));
65
+ const parsed = parseClarifyList(qRaw);
66
+ if (parsed.length === 0)
67
+ break; // NONE / nothing left to ask
68
+ const { question, suggested } = parsed[0];
69
+ // Render markdown (bold/code) for the displayed prompt; keep plain text
70
+ // for the editable default and the persisted file.
71
+ const shownQ = renderInlineMarkdown(question, theme);
72
+ const plainQ = stripInlineMarkdown(question);
73
+ const plainSuggested = suggested === undefined ? undefined : stripInlineMarkdown(suggested);
74
+ const title = suggested ?
75
+ `${shownQ}\n${theme.fg('muted', 'Recommended:')}\n\n${renderInlineMarkdown(suggested, theme)}\n\n${theme.fg('muted', 'press Enter to accept')}`
76
+ : `${shownQ}\n${theme.fg('muted', '(no recommendation — please answer)')}`;
77
+ const a = await ctx.ui.input(title, plainSuggested);
78
+ if (a === undefined) {
79
+ ctx.ui.notify('/task-auto cancelled.', 'warning');
80
+ return null;
81
+ }
82
+ const typed = a.trim();
83
+ let answer;
84
+ if (typed.length === 0 && plainSuggested) {
85
+ answer = `${plainSuggested} (accepted recommendation)`;
86
+ }
87
+ else if (typed.length === 0) {
88
+ answer = '(skipped)';
89
+ }
90
+ else {
91
+ answer = typed;
92
+ }
93
+ answers.push(`Q${answers.length + 1}: ${plainQ}\nA${answers.length + 1}: ${answer}`);
94
+ }
95
+ if (answers.length === 0) {
96
+ ctx.ui.notify('No clarifying questions needed — planning tasks…', 'info');
97
+ }
98
+ const clarifications = answers.join('\n');
99
+ // decompose
100
+ const listRaw = await deps.runChild('auto-decompose', 'read', AUTO_DECOMPOSE_PROMPT(featureForModel, clarifications));
101
+ const titles = parseDecomposeList(listRaw);
102
+ if (titles.length === 0) {
103
+ ctx.ui.notify('/task-auto: no tasks produced from the feature.', 'warning');
104
+ return null;
105
+ }
106
+ // persist
107
+ const id = await allocateAutoId(cwd);
108
+ const now = new Date().toISOString();
109
+ const fm = {
110
+ id,
111
+ state: 'in_progress',
112
+ phase: 'done',
113
+ created_at: now,
114
+ updated_at: now,
115
+ title: deriveTitle(feature)
116
+ };
117
+ await writeTaskFile(cwd, fm, buildAutoBody(feature, clarifications, titles));
118
+ return id;
119
+ }
120
+ /** The two feature-level planning children, shown as steps in the loader. */
121
+ const AUTO_PLAN_STEPS = {
122
+ 'auto-clarify': { step: 'clarify', stepNum: 1 },
123
+ 'auto-decompose': { step: 'decompose', stepNum: 2 }
124
+ };
125
+ const AUTO_PLAN_STEP_TOTAL = 2;
126
+ function defaultDeps(ctx, cwd, signal, title) {
127
+ // Captured by the loader's getState so the widget mirrors the child's latest
128
+ // output line and context usage, exactly like the single-task phase widget.
129
+ let lastLine;
130
+ let contextUsage;
131
+ const parentContextWindow = ctx.model?.contextWindow ?? 0;
132
+ const phaseDeps = {
133
+ cwd,
134
+ taskId: '',
135
+ signal,
136
+ onChildOutput: (line) => {
137
+ lastLine = line;
138
+ },
139
+ onContextUsage: snapshot => {
140
+ const cw = snapshot.contextWindow > 0 ?
141
+ snapshot.contextWindow
142
+ : contextUsage?.contextWindow || parentContextWindow;
143
+ const percent = cw > 0 ? Math.min(100, (snapshot.tokens / cw) * 100) : snapshot.percent;
144
+ contextUsage = { tokens: snapshot.tokens, contextWindow: cw, percent };
145
+ }
146
+ };
147
+ return {
148
+ runChild: async (name, tools, prompt) => {
149
+ // Planning children are slow LLM calls with no UI of their own; show
150
+ // the same status block as /task so this never goes silent until the
151
+ // drill dialog.
152
+ lastLine = undefined;
153
+ contextUsage = undefined;
154
+ const startedAt = Date.now();
155
+ const { step, stepNum } = AUTO_PLAN_STEPS[name] ?? { step: name, stepNum: 1 };
156
+ const stopLoader = startAutoLoader(ctx, () => ({
157
+ title,
158
+ step,
159
+ stepNum,
160
+ stepTotal: AUTO_PLAN_STEP_TOTAL,
161
+ startedAt,
162
+ lastLine,
163
+ contextUsage
164
+ }));
165
+ try {
166
+ return await runPhaseChild(phaseDeps, name, tools, prompt);
167
+ }
168
+ finally {
169
+ stopLoader();
170
+ }
171
+ },
172
+ runTask: (c, cwd2, t) => runSingleTask(c, cwd2, t, { waitForImplementation: true }),
173
+ commit: (cwd2, message) => gitCommitAll(cwd2, message, signal)
174
+ };
175
+ }
176
+ // ─── Loop ────────────────────────────────────────────────────────────────────
177
+ let cancelRequested = false;
178
+ let autoRunning = false;
179
+ export function requestAutoCancel() {
180
+ cancelRequested = true;
181
+ }
182
+ export async function runAutoLoop(ctx, cwd, id, deps) {
183
+ cancelRequested = false;
184
+ // Each task runs in its own fresh session (deps.runTask → ctx.newSession),
185
+ // which tears down the current session and leaves the ctx we passed in stale.
186
+ // Adopt the replacement ctx the runner hands back and use it for all further
187
+ // UI and the next task — reusing the captured ctx throws "stale ctx".
188
+ let active = ctx;
189
+ try {
190
+ for (;;) {
191
+ if (cancelRequested) {
192
+ active.ui.notify(`${id} cancelled — resume with /task-auto-resume.`, 'warning');
193
+ return;
194
+ }
195
+ const { body } = await readTaskFile(cwd, id);
196
+ const entries = parseTaskList(body);
197
+ const next = entries.find(e => !e.done);
198
+ if (!next) {
199
+ await updateTaskFrontMatter(cwd, id, { state: 'completed' });
200
+ active.ui.notify(`${id} complete — all ${entries.length} tasks done.`, 'info');
201
+ return;
202
+ }
203
+ active.ui.notify(`${id}: task ${next.index + 1}/${entries.length} — ${next.title}`, 'info');
204
+ const res = await deps.runTask(active, cwd, next.title);
205
+ active = res.ctx ?? active;
206
+ if (res.sessionCancelled) {
207
+ active.ui.notify(`${id} paused — could not start a session. Run /task-auto-resume to retry.`, 'warning');
208
+ return;
209
+ }
210
+ if (!res.ok) {
211
+ await updateTaskFrontMatter(cwd, id, { state: 'failed' });
212
+ active.ui.notify(`${id} stopped at "${next.title}" — fix and run /task-auto-resume.`, 'error');
213
+ return;
214
+ }
215
+ // res.ok === true means runner.run() completed, so res.taskId is the
216
+ // allocated TASK_NNNN id (never empty here). checkOffTask tolerates an
217
+ // empty id by writing a plain checked line, but that path is unreachable.
218
+ await checkOffTask(cwd, id, next.index, res.taskId, next.title);
219
+ // Commit the task's work (and the just-written check-off) as one
220
+ // snapshot. Best-effort: a failed/empty commit only warns — the task
221
+ // already passed, so the run continues.
222
+ const message = `task: ${next.title} (${res.taskId})`;
223
+ const commit = await deps.commit(cwd, message);
224
+ if (commit.committed) {
225
+ active.ui.notify(`${id}: committed "${next.title}".`, 'info');
226
+ }
227
+ else {
228
+ active.ui.notify(`${id}: not committed (${commit.reason ?? 'unknown'}) — continuing.`, 'warning');
229
+ }
230
+ }
231
+ }
232
+ finally {
233
+ cancelRequested = false;
234
+ }
235
+ }
236
+ // ─── Command handlers ────────────────────────────────────────────────────────
237
+ async function handleTaskAuto(args, ctx) {
238
+ await ctx.waitForIdle();
239
+ const cwd = ctx.cwd;
240
+ const raw = args.trim();
241
+ if (raw.length === 0) {
242
+ ctx.ui.setEditorText('/task-auto ');
243
+ ctx.ui.notify('Describe the feature after /task-auto (use @ for file completion).', 'info');
244
+ return;
245
+ }
246
+ autoRunning = true;
247
+ const abort = new AbortController();
248
+ const deps = defaultDeps(ctx, cwd, abort.signal, deriveTitle(raw));
249
+ let id;
250
+ try {
251
+ id = await planAuto(ctx, cwd, raw, deps);
252
+ }
253
+ catch (err) {
254
+ autoRunning = false;
255
+ const msg = err instanceof Error ? err.message : String(err);
256
+ if (msg === USER_CANCELLED) {
257
+ ctx.ui.notify('/task-auto cancelled.', 'warning');
258
+ return;
259
+ }
260
+ ctx.ui.notify(`/task-auto planning failed: ${msg}`, 'error');
261
+ return;
262
+ }
263
+ if (!id) {
264
+ autoRunning = false;
265
+ return;
266
+ }
267
+ // Check for a cancel that was requested during the planning phase before the
268
+ // loop resets the flag.
269
+ if (cancelRequested) {
270
+ cancelRequested = false;
271
+ autoRunning = false;
272
+ ctx.ui.notify('/task-auto cancelled.', 'warning');
273
+ return;
274
+ }
275
+ await runAutoLoop(ctx, cwd, id, deps);
276
+ autoRunning = false;
277
+ }
278
+ async function handleTaskAutoResume(_args, ctx) {
279
+ await ctx.waitForIdle();
280
+ const cwd = ctx.cwd;
281
+ const id = await findResumableAuto(cwd);
282
+ if (!id) {
283
+ ctx.ui.notify('No resumable /task-auto run.', 'info');
284
+ return;
285
+ }
286
+ ctx.ui.notify(`Resuming ${id}…`, 'info');
287
+ await updateTaskFrontMatter(cwd, id, { state: 'in_progress' });
288
+ autoRunning = true;
289
+ const abort = new AbortController();
290
+ // Resume only runs the loop (runTask); no planning children, so the loader
291
+ // title is unused here — pass the id for clarity if that ever changes.
292
+ await runAutoLoop(ctx, cwd, id, defaultDeps(ctx, cwd, abort.signal, id));
293
+ autoRunning = false;
294
+ }
295
+ // eslint-disable-next-line @typescript-eslint/require-await
296
+ async function handleTaskAutoCancel(_args, ctx) {
297
+ if (!autoRunning) {
298
+ ctx.ui.notify('No /task-auto loop is running.', 'info');
299
+ return;
300
+ }
301
+ requestAutoCancel();
302
+ ctx.ui.notify('Stopping /task-auto after the current task…', 'warning');
303
+ }
304
+ // ─── Registration ────────────────────────────────────────────────────────────
305
+ export function registerTaskAuto(pi) {
306
+ pi.registerCommand('task-auto', {
307
+ description: 'Plan a feature into tasks and run them. Usage: /task-auto <feature>',
308
+ handler: handleTaskAuto
309
+ });
310
+ pi.registerCommand('task-auto-resume', {
311
+ description: 'Resume the active /task-auto run.',
312
+ handler: handleTaskAutoResume
313
+ });
314
+ pi.registerCommand('task-auto-cancel', {
315
+ description: 'Stop the running /task-auto loop after the current task.',
316
+ handler: handleTaskAutoCancel
317
+ });
318
+ }
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Prompts for /task-auto's two feature-level child calls. These produce a task
3
+ * LIST only; all research/spec depth is /task's job, run per-title later.
4
+ */
5
+ /**
6
+ * Clarify: asks ONE question at a time. Output MUST match parseClarifyList — a
7
+ * single numbered question followed by a "SUGGESTED: <default>" line, or the
8
+ * literal token NONE when no clarification remains. priorQA carries the
9
+ * questions already answered so each next question adapts to them.
10
+ */
11
+ export declare const AUTO_CLARIFY_PROMPT: (feature: string, priorQA: string) => string;
12
+ /**
13
+ * Decompose: output a markdown checkbox list of task titles (one line each).
14
+ */
15
+ export declare const AUTO_DECOMPOSE_PROMPT: (feature: string, clarifications: string) => string;
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Prompts for /task-auto's two feature-level child calls. These produce a task
3
+ * LIST only; all research/spec depth is /task's job, run per-title later.
4
+ */
5
+ /**
6
+ * Clarify: asks ONE question at a time. Output MUST match parseClarifyList — a
7
+ * single numbered question followed by a "SUGGESTED: <default>" line, or the
8
+ * literal token NONE when no clarification remains. priorQA carries the
9
+ * questions already answered so each next question adapts to them.
10
+ */
11
+ export const AUTO_CLARIFY_PROMPT = (feature, priorQA) => `You are planning how to split a feature into separate implementation tasks, one clarifying question at a time.
12
+
13
+ FEATURE REQUEST:
14
+ ${feature.trim()}
15
+
16
+ ANSWERS SO FAR:
17
+ ${priorQA.trim() || '(none yet)'}
18
+
19
+ You may use the read tool to inspect the repo and any referenced docs so your
20
+ question and recommendation are grounded in what already exists.
21
+
22
+ Output the SINGLE most important clarifying question that REMAINS — the one whose
23
+ answer would most change HOW this feature is split into tasks (scope boundaries,
24
+ which subsystems are in/out, ordering, the cross-cutting technical choices that
25
+ fork the breakdown). Account for the answers so far:
26
+ - Never re-ask something already answered above.
27
+ - If an answer introduced a new fork or contradicts an assumption (for example,
28
+ the user chose a framework or tool the request did not anticipate), ask about
29
+ the most important consequence of that choice next — how it is built, what
30
+ extra dependencies it pulls in, how it changes the other subsystems.
31
+ - When the feature spans multiple subsystems, work through its forks one at a
32
+ time (file/blob storage, client/rendering strategy, auth and session model,
33
+ real-time vs polling transport, search, deployment).
34
+ - Skip anything /task will naturally resolve per-task during its own research.
35
+
36
+ Also propose the single most sensible default answer for this question, inferred
37
+ from the repo, the referenced docs, and any stated philosophy or constraints —
38
+ concrete and decisive, shown to the user as a recommendation they can accept or
39
+ override.
40
+
41
+ OUTPUT FORMAT (exact):
42
+ - One clarifying question as a single numbered line: "1. ...".
43
+ - On the NEXT line (never inline), a line that begins with "SUGGESTED: <your recommended default>".
44
+ - Put the core question in **bold**, followed by a short one-line rationale in plain prose. Backticks around code/identifiers are fine. Avoid other markdown (headings, bullet lists, links).
45
+ - Only when the spec already pins down every choice that would change the task breakdown — nothing decision-changing is left to ask — output exactly:
46
+ NONE`;
47
+ /**
48
+ * Decompose: output a markdown checkbox list of task titles (one line each).
49
+ */
50
+ export const AUTO_DECOMPOSE_PROMPT = (feature, clarifications) => `Split this feature into an ordered list of implementation tasks. Each task
51
+ will be handed, by its title, to a separate pipeline that does its own research
52
+ and writes its own spec — so here you produce TITLES ONLY, not specs.
53
+
54
+ FEATURE REQUEST:
55
+ ${feature.trim()}
56
+
57
+ CLARIFICATIONS:
58
+ ${clarifications.trim() || '(none)'}
59
+
60
+ RULES:
61
+ - One task per line, as a markdown checkbox: "- [ ] <title>".
62
+ - Each title is a short imperative phrase; optionally add " — <one key detail>".
63
+ - Order tasks so earlier ones unblock later ones (foundations first).
64
+ - Each task should be independently implementable as a single /task run.
65
+ - Prefer a handful of substantial tasks over many trivial ones.
66
+ - Output the checkbox list and NOTHING else (no preamble, no numbering).`;
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Inline-markdown helpers for the clarify/grill question dialogs.
3
+ *
4
+ * The model often wraps the core question in **bold** (and code in backticks)
5
+ * because it makes the question easier to read at a glance. ctx.ui.input titles
6
+ * accept ANSI styling, so we RENDER those spans to terminal bold/code for the
7
+ * displayed prompt, and STRIP them to plain text for the editable input default
8
+ * and the persisted task file (which must stay ANSI-free).
9
+ */
10
+ /** Minimal theme surface we need; ExtensionCommandContext['ui'].theme satisfies it. */
11
+ export interface InlineMarkdownTheme {
12
+ bold(text: string): string;
13
+ fg(color: 'mdCode', text: string): string;
14
+ }
15
+ /** Render **bold** and `code` spans to themed terminal styling for display. */
16
+ export declare function renderInlineMarkdown(text: string, theme: InlineMarkdownTheme): string;
17
+ /** Strip **bold** and `code` markers to plain text (for defaults and storage). */
18
+ export declare function stripInlineMarkdown(text: string): string;
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Inline-markdown helpers for the clarify/grill question dialogs.
3
+ *
4
+ * The model often wraps the core question in **bold** (and code in backticks)
5
+ * because it makes the question easier to read at a glance. ctx.ui.input titles
6
+ * accept ANSI styling, so we RENDER those spans to terminal bold/code for the
7
+ * displayed prompt, and STRIP them to plain text for the editable input default
8
+ * and the persisted task file (which must stay ANSI-free).
9
+ */
10
+ const BOLD_SPAN = /\*\*(.+?)\*\*/g;
11
+ const CODE_SPAN = /`([^`]+)`/g;
12
+ /** Render **bold** and `code` spans to themed terminal styling for display. */
13
+ export function renderInlineMarkdown(text, theme) {
14
+ return text
15
+ .replace(BOLD_SPAN, (_, b) => theme.bold(b))
16
+ .replace(CODE_SPAN, (_, c) => theme.fg('mdCode', c))
17
+ .replace(/\*\*/g, '') // drop stray/unbalanced bold markers
18
+ .replace(/`/g, ''); // drop stray backticks
19
+ }
20
+ /** Strip **bold** and `code` markers to plain text (for defaults and storage). */
21
+ export function stripInlineMarkdown(text) {
22
+ return text
23
+ .replace(BOLD_SPAN, '$1')
24
+ .replace(CODE_SPAN, '$1')
25
+ .replace(/\*\*/g, '')
26
+ .replace(/`/g, '')
27
+ .trim();
28
+ }
@@ -51,4 +51,32 @@ export declare class TaskRunner {
51
51
  run(): Promise<void>;
52
52
  private _deliverSpec;
53
53
  }
54
+ export interface RunSingleTaskOptions {
55
+ /** Await the session going idle after the spec is delivered, so the caller
56
+ * blocks until the agent has implemented it. Default false. */
57
+ waitForImplementation?: boolean;
58
+ /** Test seam: spawn function forwarded to TaskRunner. */
59
+ spawnFn?: SpawnFn;
60
+ }
61
+ export interface RunSingleTaskResult {
62
+ taskId: string;
63
+ ok: boolean;
64
+ sessionCancelled: boolean;
65
+ /**
66
+ * The session context the caller must use for any work after this call. A
67
+ * successful run replaces the session via ctx.newSession(), which leaves the
68
+ * caller's original ctx stale — this is the fresh replacement ctx and callers
69
+ * MUST adopt it (using the original throws "stale ctx"). On cancellation no
70
+ * replacement happened, so this is the original, still-live ctx. Optional
71
+ * only so test fakes that don't model session replacement can omit it.
72
+ */
73
+ ctx?: ExtensionCommandContext;
74
+ }
75
+ /**
76
+ * Run one prompt through the full single-task pipeline in a fresh session and
77
+ * deliver its spec. With waitForImplementation, block until the agent finishes
78
+ * implementing the delivered spec. Success is read off the produced task file's
79
+ * front-matter state (TaskRunner.run never throws).
80
+ */
81
+ export declare function runSingleTask(ctx: ExtensionCommandContext, cwd: string, rawPrompt: string, opts?: RunSingleTaskOptions): Promise<RunSingleTaskResult>;
54
82
  export declare function registerTask(pi: ExtensionAPI): void;
@@ -256,6 +256,46 @@ export class TaskRunner {
256
256
  }
257
257
  }
258
258
  }
259
+ /**
260
+ * Run one prompt through the full single-task pipeline in a fresh session and
261
+ * deliver its spec. With waitForImplementation, block until the agent finishes
262
+ * implementing the delivered spec. Success is read off the produced task file's
263
+ * front-matter state (TaskRunner.run never throws).
264
+ */
265
+ export async function runSingleTask(ctx, cwd, rawPrompt, opts = {}) {
266
+ let taskId = '';
267
+ // The newSession replacement ctx, captured so the caller can keep driving the
268
+ // UI after the original ctx is torn down. Defaults to the original for the
269
+ // cancellation path (where no replacement occurs).
270
+ let freshCtx = ctx;
271
+ const result = await ctx.newSession({
272
+ withSession: async (newCtx) => {
273
+ freshCtx = newCtx;
274
+ const runner = new TaskRunner(newCtx, cwd, rawPrompt, undefined, async (spec) => {
275
+ await newCtx.sendUserMessage(spec);
276
+ if (opts.waitForImplementation)
277
+ await newCtx.waitForIdle();
278
+ }, opts.spawnFn);
279
+ await runner.run();
280
+ taskId = runner.taskId;
281
+ }
282
+ });
283
+ if (result.cancelled) {
284
+ // No replacement happened — the original ctx is still live.
285
+ return { taskId, ok: false, sessionCancelled: true, ctx };
286
+ }
287
+ let ok = false;
288
+ if (taskId) {
289
+ try {
290
+ const { frontMatter } = await readTaskFile(cwd, taskId);
291
+ ok = frontMatter.state === 'completed';
292
+ }
293
+ catch {
294
+ ok = false;
295
+ }
296
+ }
297
+ return { taskId, ok, sessionCancelled: false, ctx: freshCtx };
298
+ }
259
299
  // ─── Command handlers ────────────────────────────────────────────────────────
260
300
  async function handleTask(args, ctx) {
261
301
  await ctx.waitForIdle();
@@ -266,15 +306,8 @@ async function handleTask(args, ctx) {
266
306
  ctx.ui.notify('Type your prompt after /task (use @ for file completion).', 'info');
267
307
  return;
268
308
  }
269
- const result = await ctx.newSession({
270
- withSession: async (newCtx) => {
271
- const runner = new TaskRunner(newCtx, cwd, raw, undefined, async (spec) => {
272
- await newCtx.sendUserMessage(spec);
273
- });
274
- await runner.run();
275
- }
276
- });
277
- if (result.cancelled) {
309
+ const { sessionCancelled } = await runSingleTask(ctx, cwd, raw);
310
+ if (sessionCancelled) {
278
311
  ctx.ui.notify('Could not start a fresh session for /task.', 'warning');
279
312
  }
280
313
  }
@@ -15,10 +15,17 @@ export type AutoAnswer = {
15
15
  suggested?: string;
16
16
  raw: string;
17
17
  };
18
+ /** One /task-auto clarify question with its model-recommended default answer. */
19
+ export interface ClarifyQuestion {
20
+ question: string;
21
+ suggested?: string;
22
+ }
18
23
  export declare const GRILL_LINE_RE: RegExp;
24
+ export declare const SUGGESTED_LINE_RE: RegExp;
19
25
  export declare const TITLE_MAX_CHARS = 120;
20
26
  export declare function parseVerifyBlock(spec: string): VerifyCommand[] | null;
21
27
  export declare function parseGrillQuestions(raw: string): string[];
28
+ export declare function parseClarifyList(raw: string): ClarifyQuestion[];
22
29
  export declare function parseAutoAnswer(raw: string): AutoAnswer;
23
30
  export declare function parseVerifyToolingOutput(output: string): {
24
31
  verified: string[];
@@ -28,5 +35,14 @@ export declare function parseVerifyToolingOutput(output: string): {
28
35
  }>;
29
36
  };
30
37
  export declare function isCritiqueClean(text: string): boolean;
38
+ /**
39
+ * Drop any preamble the model emitted before the spec's GOAL header. The
40
+ * thinking model sometimes narrates ("Now I have all the context. Here's the
41
+ * rewritten spec:") before GOAL — the prompts forbid it, but the critique
42
+ * validator only checks for a VERIFY block, so it leaked into the delivered
43
+ * spec. We slice from the first line that begins a GOAL section so the spec
44
+ * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
45
+ */
46
+ export declare function stripSpecPreamble(spec: string): string;
31
47
  export declare function validateSpecShape(spec: string): string | null;
32
48
  export declare function deriveTitle(refined: string): string;
@@ -6,6 +6,7 @@
6
6
  import { MAX_GRILL_QUESTIONS } from './phases.js';
7
7
  // ─── Constants ───────────────────────────────────────────────────────────────
8
8
  export const GRILL_LINE_RE = /^\s*\d+[.)]\s+(.+)$/;
9
+ export const SUGGESTED_LINE_RE = /^\s*SUGGESTED:\s*(.*)$/i;
9
10
  export const TITLE_MAX_CHARS = 120;
10
11
  // ─── Verify block parser ─────────────────────────────────────────────────────
11
12
  export function parseVerifyBlock(spec) {
@@ -49,6 +50,53 @@ export function parseGrillQuestions(raw) {
49
50
  }
50
51
  return out;
51
52
  }
53
+ // ─── Clarify (/task-auto) parser ─────────────────────────────────────────────
54
+ // Matches a "SUGGESTED:" marker anywhere in a string (not just line-start), so
55
+ // we can recover a recommendation the model wrote inline on the question line
56
+ // (e.g. "1. ...so this must be resolved. SUGGESTED: use polling.") rather than
57
+ // on its own line.
58
+ const INLINE_SUGGESTED_RE = /\bSUGGESTED:\s*/i;
59
+ /** Split a question line's text into the question and any inline SUGGESTED default. */
60
+ function splitInlineSuggested(text) {
61
+ const m = INLINE_SUGGESTED_RE.exec(text);
62
+ if (!m)
63
+ return { question: text.trim() };
64
+ const question = text.slice(0, m.index).trim();
65
+ const suggested = text.slice(m.index + m[0].length).trim();
66
+ return suggested.length > 0 ? { question, suggested } : { question };
67
+ }
68
+ // Parses the /task-auto clarify output: a numbered question list where each
69
+ // question carries a "SUGGESTED: <default>" recommendation — either on its own
70
+ // line below the question, or inline at the end of the question line. The first
71
+ // SUGGESTED for a question wins; later ones are ignored. The literal token NONE
72
+ // (its own line) means "no clarification needed" → [].
73
+ //
74
+ // Question/suggested text is returned VERBATIM (markdown intact). Inline
75
+ // markdown is rendered for display / stripped for storage at the call site via
76
+ // the helpers in inline-markdown.ts.
77
+ export function parseClarifyList(raw) {
78
+ if (/^\s*NONE\s*$/m.test(raw))
79
+ return [];
80
+ const out = [];
81
+ for (const line of raw.split('\n')) {
82
+ const q = GRILL_LINE_RE.exec(line);
83
+ if (q) {
84
+ if (out.length >= MAX_GRILL_QUESTIONS)
85
+ break;
86
+ out.push(splitInlineSuggested(q[1].trim()));
87
+ continue;
88
+ }
89
+ const s = SUGGESTED_LINE_RE.exec(line);
90
+ if (s && out.length > 0) {
91
+ const suggested = s[1].trim();
92
+ const last = out[out.length - 1];
93
+ if (suggested.length > 0 && last.suggested === undefined) {
94
+ last.suggested = suggested;
95
+ }
96
+ }
97
+ }
98
+ return out;
99
+ }
52
100
  // ─── Auto-answer parser ──────────────────────────────────────────────────────
53
101
  export function parseAutoAnswer(raw) {
54
102
  const lines = raw
@@ -120,6 +168,28 @@ export function isCritiqueClean(text) {
120
168
  return /^CLEAN[.!]?$/i.test(firstLine);
121
169
  }
122
170
  // ─── Spec shape validator ────────────────────────────────────────────────────
171
+ /**
172
+ * Drop any preamble the model emitted before the spec's GOAL header. The
173
+ * thinking model sometimes narrates ("Now I have all the context. Here's the
174
+ * rewritten spec:") before GOAL — the prompts forbid it, but the critique
175
+ * validator only checks for a VERIFY block, so it leaked into the delivered
176
+ * spec. We slice from the first line that begins a GOAL section so the spec
177
+ * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
178
+ */
179
+ export function stripSpecPreamble(spec) {
180
+ const lines = spec.split('\n');
181
+ const idx = lines.findIndex(l => /^GOAL\b/i.test(l));
182
+ if (idx <= 0)
183
+ return spec;
184
+ // Only strip plain narration. If the lead-in is a markdown fence or a
185
+ // cat-heredoc wrapper, leave it untouched — that's a malformation
186
+ // validateSpecShape must reject (and compose must retry on), not something
187
+ // to silently unwrap into a passing spec.
188
+ const preamble = lines.slice(0, idx);
189
+ if (preamble.some(l => /^\s*```/.test(l) || /^\s*cat\s*<</.test(l)))
190
+ return spec;
191
+ return lines.slice(idx).join('\n');
192
+ }
123
193
  export function validateSpecShape(spec) {
124
194
  const trimmed = spec.trim();
125
195
  if (trimmed.length === 0)