@mjasnikovs/pi-task 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,11 +6,12 @@ import type { ExtensionCommandContext } from '@earendil-works/pi-coding-agent';
6
6
  import { docsRaw, docsFocused } from '../workers/docs-core.js';
7
7
  import { fetchRaw, fetchFocused } from '../workers/fetch-core.js';
8
8
  import type { SearchCoreInput, SearchCoreResult } from '../workers/search-core.js';
9
+ import { MAX_GRILL_QUESTIONS } from './prompts.js';
9
10
  import { type PhaseName } from './task-file.js';
10
11
  import { type WidgetState } from './widget.js';
11
12
  import { type AutoAnswer } from './parsers.js';
12
13
  import { type PhaseDeps } from './child-runner.js';
13
- export { MAX_GRILL_QUESTIONS } from './prompts.js';
14
+ export { MAX_GRILL_QUESTIONS };
14
15
  export interface PhaseContext {
15
16
  cwd: string;
16
17
  id: string;
@@ -10,12 +10,13 @@ import { search as defaultSearch } from '../workers/search-core.js';
10
10
  import { extractEnrichTargets } from './enrichment.js';
11
11
  import { getFileInventory } from './file-inventory.js';
12
12
  import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
13
- import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT } from './prompts.js';
13
+ import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
14
14
  import { setTaskSection, updateTaskFrontMatter } from './task-file.js';
15
- import { parseVerifyBlock, parseGrillQuestions, parseAutoAnswer, parseVerifyToolingOutput, validateSpecShape, deriveTitle, isCritiqueClean } from './parsers.js';
15
+ import { renderInlineMarkdown, stripInlineMarkdown } from './inline-markdown.js';
16
+ import { parseVerifyBlock, parseGrillQuestions, parseAutoAnswer, parseVerifyToolingOutput, validateSpecShape, stripSpecPreamble, deriveTitle, isCritiqueClean } from './parsers.js';
16
17
  import { runPhaseChild, runPhaseWithLoopGuard, runWithEmphasisRetry, prependHint, USER_CANCELLED } from './child-runner.js';
17
18
  // ─── Re-export constants from their home modules ────────────────────────────
18
- export { MAX_GRILL_QUESTIONS } from './prompts.js';
19
+ export { MAX_GRILL_QUESTIONS };
19
20
  // ─── Tooling helpers ─────────────────────────────────────────────────────────
20
21
  /** Extract the TOOLING section commands from a research output string. */
21
22
  export function extractToolingCommands(research) {
@@ -51,7 +52,7 @@ export function replaceToolingWithVerified(research, verifiedCommands) {
51
52
  return replaced;
52
53
  }
53
54
  // ─── Phase functions ─────────────────────────────────────────────────────────
54
- export const phaseRefine = (deps, raw) => runPhaseWithLoopGuard(deps, 'refine', 'read', hint => prependHint(hint, REFINE_PROMPT(raw)));
55
+ export const phaseRefine = (deps, raw) => runPhaseWithLoopGuard(deps, 'refine', 'read', hint => prependHint(hint, appendNoThink(REFINE_PROMPT(raw))));
55
56
  export async function phaseVerifyTooling(deps, research) {
56
57
  const commands = extractToolingCommands(research);
57
58
  if (!commands || commands.length === 0) {
@@ -60,7 +61,7 @@ export async function phaseVerifyTooling(deps, research) {
60
61
  const toolingList = commands.join('\n');
61
62
  let verifyOutput;
62
63
  try {
63
- verifyOutput = await runPhaseChild(deps, 'verify-tooling', 'read,bash', VERIFY_TOOLING_PROMPT(toolingList));
64
+ verifyOutput = await runPhaseChild(deps, 'verify-tooling', 'read,bash', appendNoThink(VERIFY_TOOLING_PROMPT(toolingList)));
64
65
  }
65
66
  catch {
66
67
  return replaceToolingWithVerified(research, commands);
@@ -157,59 +158,67 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
157
158
  deps.onChildOutput(`research (${doneCount}/4 workers done)`);
158
159
  };
159
160
  // Per-worker timing split into wait (spawn → first byte) and work (first
160
- // byte → exit). When workers fan out concurrently and the upstream model
161
- // API caps concurrency, the queued workers spend most of their elapsed
162
- // time waiting for a slot the wait/work split makes that visible
163
- // instead of the previous Promise.all-relative wall-clock that conflated
164
- // the two.
161
+ // byte → exit). The workers run sequentially below, so each split is a clean
162
+ // per-worker measurement waitMs the worker's own cold-start, workMs its
163
+ // generation+tool-call costnot a Promise.all-relative wall-clock that
164
+ // conflates the two.
165
165
  const recordWorker = (label, p) => p.then(r => {
166
166
  deps.recordSubStep?.(`${label} wait`, r.waitMs);
167
167
  deps.recordSubStep?.(`${label} work`, r.workMs);
168
168
  return r;
169
169
  });
170
- const [files, apis, context, tooling] = await Promise.all([
171
- recordWorker('worker:files', runWorker({
172
- prompt: promptHeader + RESEARCH_FILES_PROMPT(refined),
173
- cwd: deps.cwd,
174
- signal: deps.signal,
175
- spawn: deps.spawn
176
- }).then(r => {
177
- updateProgress();
178
- return r;
179
- })),
180
- recordWorker('worker:apis', runWorker({
181
- prompt: promptHeader + RESEARCH_APIS_PROMPT(refined),
182
- cwd: deps.cwd,
183
- signal: deps.signal,
184
- spawn: deps.spawn
185
- }).then(r => {
186
- updateProgress();
187
- return r;
188
- })),
189
- recordWorker('worker:context', runWorker({
190
- prompt: promptHeader + RESEARCH_CONTEXT_PROMPT(refined),
191
- cwd: deps.cwd,
192
- signal: deps.signal,
193
- spawn: deps.spawn,
194
- // Context owns architectural understanding, not path discovery
195
- // — FILES handles that. Dropping `find`/`ls` keeps the worker
196
- // from spawning long enumeration loops whose output then
197
- // inflates prefill on every subsequent round.
170
+ // Run the four workers ONE AT A TIME. Settled by an A/B on the local
171
+ // llama.cpp backend (single GPU, same task/model) — and the answer flips
172
+ // with thinking:
173
+ // - thinking ON → parallel wins: long decodes batch well, 4 concurrent
174
+ // finish in ~max(worker), not the sum.
175
+ // - /no_think → sequential wins: with short decodes the batching upside
176
+ // is gone, but 4 concurrent streams still split the one GPU and slow
177
+ // each other ~4x (context worker measured 27s solo vs 128s under load),
178
+ // so summed-but-fast (~100s) beats max-of-slowed (~130s).
179
+ // Every worker runs /no_think (below), so sequential is the faster regime.
180
+ // Do NOT switch this back to Promise.all without re-running that A/B.
181
+ //
182
+ // `/no_think` is the big win: these are agentic exploration loops, and on a
183
+ // reasoning model the child would otherwise emit a full <think> trace at
184
+ // every tool step ("let me read X next…") — the single largest decode sink
185
+ // in the pipeline. Stripping it cut each worker's decode 3-8x in the A/B.
186
+ // The worker still calls as many tools as it wants; it just stops narrating
187
+ // between them. See appendNoThink. Result order (files, apis, context,
188
+ // tooling) is preserved for assembly.
189
+ const workerSpecs = [
190
+ {
191
+ label: 'worker:files',
192
+ prompt: appendNoThink(promptHeader + RESEARCH_FILES_PROMPT(refined))
193
+ },
194
+ { label: 'worker:apis', prompt: appendNoThink(promptHeader + RESEARCH_APIS_PROMPT(refined)) },
195
+ {
196
+ label: 'worker:context',
197
+ prompt: appendNoThink(promptHeader + RESEARCH_CONTEXT_PROMPT(refined)),
198
+ // Context owns architectural understanding, not path discovery —
199
+ // FILES handles that. Dropping `find`/`ls` keeps the worker from
200
+ // spawning long enumeration loops whose output then inflates
201
+ // prefill on every subsequent round.
198
202
  tools: 'read,grep'
199
- }).then(r => {
200
- updateProgress();
201
- return r;
202
- })),
203
- recordWorker('worker:tooling', runWorker({
204
- prompt: promptHeader + RESEARCH_TOOLING_PROMPT(refined),
203
+ },
204
+ {
205
+ label: 'worker:tooling',
206
+ prompt: appendNoThink(promptHeader + RESEARCH_TOOLING_PROMPT(refined))
207
+ }
208
+ ];
209
+ const workerResults = [];
210
+ for (const spec of workerSpecs) {
211
+ const r = await recordWorker(spec.label, runWorker({
212
+ prompt: spec.prompt,
205
213
  cwd: deps.cwd,
206
214
  signal: deps.signal,
207
- spawn: deps.spawn
208
- }).then(r => {
209
- updateProgress();
210
- return r;
211
- }))
212
- ]);
215
+ spawn: deps.spawn,
216
+ ...(spec.tools ? { tools: spec.tools } : {})
217
+ }));
218
+ updateProgress();
219
+ workerResults.push(r);
220
+ }
221
+ const [files, apis, context, tooling] = workerResults;
213
222
  const sections = [
214
223
  { name: 'FILES', result: files },
215
224
  { name: 'APIS', result: apis },
@@ -316,64 +325,75 @@ export async function phaseAutoAnswer(deps, refined, research, question, autoDep
316
325
  }
317
326
  }
318
327
  export async function phaseGrill(deps, ctx, widgetState, refined, research) {
319
- const tGenStart = Date.now();
320
- const raw = await runPhaseWithLoopGuard(deps, 'grill-gen', 'read', hint => prependHint(hint, GRILL_GEN_PROMPT(refined, research)));
321
- deps.recordSubStep?.('gen', Date.now() - tGenStart);
322
- const questions = parseGrillQuestions(raw);
323
- if (questions.length === 0)
324
- return '(no questions produced)';
325
- // Auto-answers are independent — generate them concurrently before the UI
326
- // loop. The user-input loop below still runs sequentially (the user can
327
- // only answer one prompt at a time), but the LLM-spawning work no longer
328
- // blocks each iteration. For N questions this turns ~N × cold-start time
329
- // into ~1 × cold-start time.
330
- const tAutoStart = Date.now();
331
- let doneCount = 0;
332
- widgetState.lastLine = `auto-answering 0/${questions.length} done…`;
333
- const autos = await Promise.all(questions.map((q, i) => phaseAutoAnswer(deps, refined, research, q).then(r => {
334
- doneCount++;
335
- widgetState.lastLine = `auto-answering ${doneCount}/${questions.length} done (Q${i + 1})`;
336
- return r;
337
- })));
338
- deps.recordSubStep?.('auto-answers', Date.now() - tAutoStart);
328
+ // Sequential & adaptive: ask one question at a time, feeding every answer
329
+ // back into the next grill-gen call so later questions react to earlier ones
330
+ // (drop resolved unknowns, surface forks an answer introduced). Each question
331
+ // still gets a research-backed auto-answer — answered cheaply (skip the user)
332
+ // or surfaced as a pre-filled recommendation. The model emits NONE when
333
+ // nothing ambiguous remains. Kept in sync with /task-auto's clarify dialog.
339
334
  const theme = ctx.ui.theme;
340
- const tInputStart = Date.now();
341
- const out = [];
342
- for (let i = 0; i < questions.length; i++) {
343
- const q = questions[i];
344
- const auto = autos[i];
345
- out.push(`Q${i + 1}: ${q}`);
335
+ const out = []; // human-facing Q&A transcript (with auto-worker debug lines)
336
+ const qa = []; // compact Q&A fed back into the next question
337
+ // Open-ended: keep asking until the model emits NONE or the user dismisses.
338
+ for (let n = 0;; n++) {
339
+ const tGenStart = Date.now();
340
+ const raw = await runPhaseWithLoopGuard(deps, 'grill-gen', 'read', hint => prependHint(hint, GRILL_GEN_PROMPT(refined, research, qa.join('\n'))));
341
+ deps.recordSubStep?.('gen', Date.now() - tGenStart);
342
+ const questions = parseGrillQuestions(raw);
343
+ if (questions.length === 0)
344
+ break; // NONE / nothing left to ask
345
+ const q = questions[0];
346
+ widgetState.lastLine = `auto-answering Q${n + 1}…`;
347
+ const tAutoStart = Date.now();
348
+ const auto = await phaseAutoAnswer(deps, refined, research, q);
349
+ deps.recordSubStep?.('auto-answer', Date.now() - tAutoStart);
350
+ // Render markdown (bold/code) for the displayed prompt; keep plain text
351
+ // for the editable default and the persisted file.
352
+ const shownQ = renderInlineMarkdown(q, theme);
353
+ const plainQ = stripInlineMarkdown(q);
354
+ out.push(`Q${n + 1}: ${plainQ}`);
346
355
  const rawTrim = auto.raw.trim();
347
356
  out.push(` (auto-worker raw: ${rawTrim.length === 0 ? '(empty)' : rawTrim.replace(/\n/g, ' ⏎ ')})`);
357
+ let answer;
348
358
  if (auto.kind === 'answered') {
349
- out.push(`A${i + 1}: ${auto.text} (auto)`);
350
- continue;
351
- }
352
- const title = auto.suggested ?
353
- `${q}\n${theme.fg('muted', 'Recommended:')}\n\n${theme.fg('text', auto.suggested)}\n\n${theme.fg('muted', 'press Enter to accept')}`
354
- : `${q}\n${theme.fg('muted', '(no recommendation — please answer)')}`;
355
- widgetState.lastLine = `awaiting Q${i + 1}`;
356
- const a = await ctx.ui.input(title, auto.suggested);
357
- if (a === undefined)
358
- throw new Error(USER_CANCELLED);
359
- const typed = a.trim();
360
- if (typed.length === 0 && auto.suggested) {
361
- out.push(`A${i + 1}: ${auto.suggested} (accepted recommendation)`);
362
- }
363
- else if (typed.length === 0) {
364
- out.push(`A${i + 1}: (skipped)`);
359
+ answer = stripInlineMarkdown(auto.text);
360
+ out.push(`A${n + 1}: ${answer} (auto)`);
365
361
  }
366
362
  else {
367
- out.push(`A${i + 1}: ${typed}`);
363
+ const plainSuggested = auto.suggested === undefined ? undefined : stripInlineMarkdown(auto.suggested);
364
+ const title = auto.suggested ?
365
+ `${shownQ}\n${theme.fg('muted', 'Recommended:')}\n\n${renderInlineMarkdown(auto.suggested, theme)}\n\n${theme.fg('muted', 'press Enter to accept')}`
366
+ : `${shownQ}\n${theme.fg('muted', '(no recommendation — please answer)')}`;
367
+ widgetState.lastLine = `awaiting Q${n + 1}`;
368
+ const a = await ctx.ui.input(title, plainSuggested);
369
+ if (a === undefined)
370
+ throw new Error(USER_CANCELLED);
371
+ const typed = a.trim();
372
+ if (typed.length === 0 && plainSuggested) {
373
+ answer = `${plainSuggested} (accepted recommendation)`;
374
+ }
375
+ else if (typed.length === 0) {
376
+ answer = '(skipped)';
377
+ }
378
+ else {
379
+ answer = typed;
380
+ }
381
+ out.push(`A${n + 1}: ${answer}`);
368
382
  }
383
+ qa.push(`Q${n + 1}: ${plainQ}\nA${n + 1}: ${answer}`);
369
384
  }
370
- deps.recordSubStep?.('user input', Date.now() - tInputStart);
385
+ if (out.length === 0)
386
+ return '(no questions produced)';
371
387
  return out.join('\n');
372
388
  }
373
389
  export async function phaseCompose(deps, refined, research, qa) {
374
390
  return runWithEmphasisRetry(deps, 'compose', 'read', problem => COMPOSE_PROMPT(refined, research, qa, problem), text => {
375
- const problem = validateSpecShape(text);
376
- return problem ? { ok: false, problem } : { ok: true, value: text };
391
+ // Trim any "here's the spec:" preamble before validating, so a
392
+ // strippable lead-in doesn't burn a full retry and the stored
393
+ // value starts at GOAL.
394
+ const stripped = stripSpecPreamble(text);
395
+ const problem = validateSpecShape(stripped);
396
+ return problem ? { ok: false, problem } : { ok: true, value: stripped };
377
397
  }, problem => new Error(`compose_invalid: ${problem}`));
378
398
  }
379
399
  export async function phaseCritique(deps, spec, refined, qa) {
@@ -397,7 +417,7 @@ export async function phaseCritique(deps, spec, refined, qa) {
397
417
  // Granting `read` here let it wander the repo to "verify" findings,
398
418
  // which made the supposedly-cheap pass cost as much as a rewrite
399
419
  // (observed ~133s). The judgement needs no file access.
400
- verdict = await runPhaseChild(deps, 'critique-triage', '', CRITIQUE_TRIAGE_PROMPT(spec, refined, qa));
420
+ verdict = await runPhaseChild(deps, 'critique-triage', '', appendNoThink(CRITIQUE_TRIAGE_PROMPT(spec, refined, qa)));
401
421
  }
402
422
  catch {
403
423
  verdict = null;
@@ -411,9 +431,15 @@ export async function phaseCritique(deps, spec, refined, qa) {
411
431
  }
412
432
  const tRewrite = Date.now();
413
433
  try {
414
- return await runWithEmphasisRetry(deps, 'critique', 'read', problem => CRITIQUE_PROMPT(spec, refined, qa, problem !== null, triageDefects), text => parseVerifyBlock(text) ?
415
- { ok: true, value: text }
416
- : { ok: false, problem: 'no_verify_block' }, () => new Error('no_verify_block'));
434
+ return await runWithEmphasisRetry(deps, 'critique', 'read', problem => CRITIQUE_PROMPT(spec, refined, qa, problem !== null, triageDefects), text => {
435
+ // The rewrite (thinking on) sometimes prepends narration before
436
+ // GOAL; the prompt forbids it but this validator only checks for
437
+ // a VERIFY block. Strip it so the delivered spec starts at GOAL.
438
+ const stripped = stripSpecPreamble(text);
439
+ return parseVerifyBlock(stripped) ?
440
+ { ok: true, value: stripped }
441
+ : { ok: false, problem: 'no_verify_block' };
442
+ }, () => new Error('no_verify_block'));
417
443
  }
418
444
  finally {
419
445
  deps.recordSubStep?.('rewrite', Date.now() - tRewrite);
@@ -5,13 +5,36 @@
5
5
  * effects, trivially testable.
6
6
  */
7
7
  export declare const MAX_GRILL_QUESTIONS = 10;
8
+ /**
9
+ * Qwen3 "soft switch": placing `/no_think` in the prompt disables the model's
10
+ * <think> reasoning trace for that turn and persists across the tool-call loop
11
+ * within the same child session.
12
+ *
13
+ * On a local reasoning model decode is the bottleneck (~50 t/s here) while
14
+ * prefill is ~10x faster, so cost is dominated by *generated* tokens. A runaway
15
+ * think trace can be 10k+ tokens — minutes — even when the phase's real output
16
+ * is a short list or a one-word verdict (an observed triage spent ~384s
17
+ * thinking to emit "CLEAN"). Stripping the monologue does NOT limit the model's
18
+ * exploration: it still calls every tool it wants and takes every step it
19
+ * needs — it just stops narrating between actions.
20
+ *
21
+ * We strip thinking from the mechanical / exploration phases (refine, the four
22
+ * research workers, verify-tooling, triage) and keep it ON for the judgment
23
+ * phases (compose, grill, critique rewrite) where the reasoning earns its
24
+ * decode cost. pi's `--thinking off` flag is a no-op for this provider
25
+ * (`supportsReasoningEffort: false` in models.json), so the in-prompt soft
26
+ * switch is the reliable control.
27
+ */
28
+ export declare const NO_THINK = "/no_think";
29
+ /** Append the Qwen3 `/no_think` soft switch to a prompt. See {@link NO_THINK}. */
30
+ export declare function appendNoThink(prompt: string): string;
8
31
  declare const REFINE_PROMPT: (raw: string) => string;
9
32
  declare const RESEARCH_READ_ONLY_CONSTRAINT = "IMPORTANT: You are ONLY allowed to READ. Do NOT create, modify, or delete any files. Use the read, grep, find, and ls tools to inspect the repo.";
10
33
  declare const RESEARCH_FILES_PROMPT: (refined: string) => string;
11
34
  declare const RESEARCH_APIS_PROMPT: (refined: string) => string;
12
35
  declare const RESEARCH_CONTEXT_PROMPT: (refined: string) => string;
13
36
  declare const RESEARCH_TOOLING_PROMPT: (refined: string) => string;
14
- declare const GRILL_GEN_PROMPT: (refined: string, research: string) => string;
37
+ declare const GRILL_GEN_PROMPT: (refined: string, research: string, priorQA: string) => string;
15
38
  declare const GRILL_AUTO_ANSWER_PROMPT: (refined: string, research: string, question: string) => string;
16
39
  declare function composeRetryEmphasis(problem: string): string;
17
40
  declare const COMPOSE_PROMPT: (refined: string, research: string, qa: string, retryProblem: string | null) => string;
@@ -5,6 +5,31 @@
5
5
  * effects, trivially testable.
6
6
  */
7
7
  export const MAX_GRILL_QUESTIONS = 10;
8
+ /**
9
+ * Qwen3 "soft switch": placing `/no_think` in the prompt disables the model's
10
+ * <think> reasoning trace for that turn and persists across the tool-call loop
11
+ * within the same child session.
12
+ *
13
+ * On a local reasoning model decode is the bottleneck (~50 t/s here) while
14
+ * prefill is ~10x faster, so cost is dominated by *generated* tokens. A runaway
15
+ * think trace can be 10k+ tokens — minutes — even when the phase's real output
16
+ * is a short list or a one-word verdict (an observed triage spent ~384s
17
+ * thinking to emit "CLEAN"). Stripping the monologue does NOT limit the model's
18
+ * exploration: it still calls every tool it wants and takes every step it
19
+ * needs — it just stops narrating between actions.
20
+ *
21
+ * We strip thinking from the mechanical / exploration phases (refine, the four
22
+ * research workers, verify-tooling, triage) and keep it ON for the judgment
23
+ * phases (compose, grill, critique rewrite) where the reasoning earns its
24
+ * decode cost. pi's `--thinking off` flag is a no-op for this provider
25
+ * (`supportsReasoningEffort: false` in models.json), so the in-prompt soft
26
+ * switch is the reliable control.
27
+ */
28
+ export const NO_THINK = '/no_think';
29
+ /** Append the Qwen3 `/no_think` soft switch to a prompt. See {@link NO_THINK}. */
30
+ export function appendNoThink(prompt) {
31
+ return `${prompt}\n\n${NO_THINK}`;
32
+ }
8
33
  const REFINE_PROMPT = (raw) => `You receive a user's task description for an AI coding agent. Rewrite it to be unambiguous and actionable.
9
34
 
10
35
  Output structure (four sections, exact headings, in this order):
@@ -118,25 +143,35 @@ No section header. No other sections. No preamble. May be empty if no verificati
118
143
 
119
144
  Task:
120
145
  ${refined}`;
121
- const GRILL_GEN_PROMPT = (refined, research) => `You are preparing clarifying questions for the user, based on a refined task description and the research that follows.
146
+ const GRILL_GEN_PROMPT = (refined, research, priorQA) => `You are preparing clarifying questions for the user, based on a refined task description, the research that follows, and the answers gathered so far. Ask ONE question at a time.
147
+
148
+ Output the SINGLE most important clarifying question that REMAINS — the one whose answer most changes the work — or NONE if no genuine ambiguity is left.
122
149
 
123
150
  Start from the KNOWN-UNKNOWNS bullets in the task. Add any new ambiguity surfaced by the research. Drop any unknowns the research already resolved.
124
151
 
152
+ ACCOUNT FOR THE ANSWERS SO FAR — read carefully:
153
+ - Never re-ask something already answered below.
154
+ - If an answer introduced a NEW fork or contradicts an assumption in the task/research (e.g. the user chose a tool or approach the task did not anticipate), ask about the most important consequence of that choice next.
155
+ - Drop questions the answers have made irrelevant.
156
+
125
157
  SCOPE RULES — read carefully:
126
158
  - Questions must clarify the EXISTING scope. Do NOT propose new deliverables, enhancements, modernizations, or "while I'm here" cleanups.
127
159
  - Forbidden patterns: "should I also…", "should we modernize…", "do you want me to update X while I'm at it…", "should I integrate Y…", "would you like guidance on Z…".
128
160
  - Allowed patterns: "by 'X' do you mean A or B?", "should failure mode Y be treated as Z?", "which of <files matching the task> applies here?".
129
- - If the refined task + research leave no genuine ambiguity, output zero questions. Zero questions is a valid and preferred outcome. Do not pad.
161
+ - If nothing genuinely ambiguous remains, output NONE. Zero questions is a valid and preferred outcome. Do not pad.
130
162
 
131
163
  Output format — read carefully:
132
- - If you have questions: emit them as a plain numbered list, one per line, at most ${MAX_GRILL_QUESTIONS}, no preamble.
133
- - If you have zero questions: emit the single literal token NONE on its own line. Do NOT emit empty output — an empty response is treated as a crash, not as "no questions". The NONE sentinel is the only way to signal an intentional empty list.
164
+ - One question as a single numbered line: "1. ...", and nothing else.
165
+ - If no question remains: emit the single literal token NONE on its own line. Do NOT emit empty output — an empty response is treated as a crash, not as "no questions". The NONE sentinel is the only way to signal an intentional empty list.
134
166
 
135
167
  Refined task:
136
168
  ${refined}
137
169
 
138
170
  Research:
139
- ${research}`;
171
+ ${research}
172
+
173
+ Answers so far:
174
+ ${priorQA.trim() || '(none yet)'}`;
140
175
  const GRILL_AUTO_ANSWER_PROMPT = (refined, research, question) => `You are pre-answering a clarifying question for an AI coding task. You have the refined task and the research notes. You can also use the read tool to open any file mentioned in the research (e.g. package.json) if it helps you answer.
141
176
 
142
177
  Your job is to produce a recommended default answer. If the default is one the user would almost certainly accept, you tag it ANSWER and we skip the user entirely. Otherwise you tag it UNKNOWN and we show the suggestion in the input box for the user to confirm or override.
@@ -21,6 +21,7 @@ export interface ContextSnapshot {
21
21
  }
22
22
  export type WidgetTheme = ExtensionCommandContext['ui']['theme'];
23
23
  export declare const WIDGET_KEY = "pi-tasks";
24
+ export declare const AUTO_WIDGET_KEY = "pi-task-auto";
24
25
  export declare const WIDGET_REFRESH_MS = 500;
25
26
  export declare const WIDGET_LAST_LINE_MAX = 120;
26
27
  export declare const NOTIFY_CLEAR_MS = 3000;
@@ -34,6 +35,24 @@ export declare function formatDuration(ms: number): string;
34
35
  export declare function formatContextTokens(count: number): string;
35
36
  export declare function contextProgressBar(percent: number): string;
36
37
  export declare function contextThresholdColor(theme: WidgetTheme, percent: number, text: string): string;
38
+ /** Render the `tokens/window [bar]` context suffix, or null when there's nothing to show. */
39
+ export declare function formatContextDetail(usage: ContextSnapshot, theme?: WidgetTheme): string | null;
37
40
  export declare function buildWidgetLines(s: WidgetState, theme?: WidgetTheme): string[];
38
41
  export declare function startWidget(ctx: ExtensionCommandContext, getState: () => WidgetState | null): () => void;
42
+ export interface AutoLoaderState {
43
+ title: string;
44
+ step: string;
45
+ stepNum: number;
46
+ stepTotal: number;
47
+ startedAt: number;
48
+ lastLine?: string;
49
+ contextUsage?: ContextSnapshot;
50
+ }
51
+ export declare function buildAutoLoaderLines(s: AutoLoaderState, theme?: WidgetTheme): string[];
52
+ /**
53
+ * Start the planning loader widget (same cadence/look as the phase widget).
54
+ * Returns a disposer that stops the refresh and clears the widget. No-op
55
+ * (returns a no-op disposer) when there's no UI.
56
+ */
57
+ export declare function startAutoLoader(ctx: ExtensionCommandContext, getState: () => AutoLoaderState | null): () => void;
39
58
  export declare function flashTerminalWidget(ctx: ExtensionCommandContext, state: Exclude<TaskState, 'pending' | 'in_progress' | 'completed'>, taskId: string, reason: string | undefined): void;
@@ -7,6 +7,7 @@
7
7
  import { PHASE_INDEX, PHASE_ORDER } from './task-file.js';
8
8
  // ─── Constants ───────────────────────────────────────────────────────────────
9
9
  export const WIDGET_KEY = 'pi-tasks';
10
+ export const AUTO_WIDGET_KEY = 'pi-task-auto';
10
11
  export const WIDGET_REFRESH_MS = 500;
11
12
  export const WIDGET_LAST_LINE_MAX = 120;
12
13
  export const NOTIFY_CLEAR_MS = 3000;
@@ -46,6 +47,27 @@ export function contextThresholdColor(theme, percent, text) {
46
47
  return theme.fg('warning', text);
47
48
  return text;
48
49
  }
50
+ /** Render the `tokens/window [bar]` context suffix, or null when there's nothing to show. */
51
+ export function formatContextDetail(usage, theme) {
52
+ const { tokens, contextWindow, percent } = usage;
53
+ if (contextWindow > 0) {
54
+ const text = `${formatContextTokens(tokens)}/${formatContextTokens(contextWindow)} ${contextProgressBar(percent)}`;
55
+ return theme ? contextThresholdColor(theme, percent, text) : text;
56
+ }
57
+ if (tokens > 0)
58
+ return formatContextTokens(tokens);
59
+ return null;
60
+ }
61
+ /** Render the muted `↳ lastLine` trailer (truncated), or null when there's no line. */
62
+ function lastLineTrailer(lastLine, theme) {
63
+ if (!lastLine)
64
+ return null;
65
+ const t = lastLine.length > WIDGET_LAST_LINE_MAX ?
66
+ lastLine.slice(0, WIDGET_LAST_LINE_MAX - 1) + '…'
67
+ : lastLine;
68
+ const raw = `↳ ${t}`;
69
+ return theme ? theme.fg('muted', raw) : raw;
70
+ }
49
71
  export function buildWidgetLines(s, theme) {
50
72
  const elapsed = formatDuration(Date.now() - s.startedAt);
51
73
  const head = `${s.taskId} · ${s.title}`;
@@ -54,23 +76,14 @@ export function buildWidgetLines(s, theme) {
54
76
  const stepNum = Math.min(idx + 1, total);
55
77
  let detail = `phase ${stepNum}/${total} ${s.phase} · ${elapsed}`;
56
78
  if (s.contextUsage) {
57
- const { tokens, contextWindow, percent } = s.contextUsage;
58
- if (contextWindow > 0) {
59
- const text = `${formatContextTokens(tokens)}/${formatContextTokens(contextWindow)} ${contextProgressBar(percent)}`;
60
- detail += ` · ${theme ? contextThresholdColor(theme, percent, text) : text}`;
61
- }
62
- else if (tokens > 0) {
63
- detail += ` · ${formatContextTokens(tokens)}`;
64
- }
79
+ const ctxDetail = formatContextDetail(s.contextUsage, theme);
80
+ if (ctxDetail)
81
+ detail += ` · ${ctxDetail}`;
65
82
  }
66
83
  const lines = [head, detail];
67
- if (s.lastLine) {
68
- const t = s.lastLine.length > WIDGET_LAST_LINE_MAX ?
69
- s.lastLine.slice(0, WIDGET_LAST_LINE_MAX - 1) + '…'
70
- : s.lastLine;
71
- const raw = `↳ ${t}`;
72
- lines.push(theme ? theme.fg('muted', raw) : raw);
73
- }
84
+ const trailer = lastLineTrailer(s.lastLine, theme);
85
+ if (trailer)
86
+ lines.push(trailer);
74
87
  return lines;
75
88
  }
76
89
  // ─── Widget lifecycle ────────────────────────────────────────────────────────
@@ -91,6 +104,51 @@ export function startWidget(ctx, getState) {
91
104
  timer.unref?.();
92
105
  return () => clearInterval(timer);
93
106
  }
107
+ export function buildAutoLoaderLines(s, theme) {
108
+ const elapsed = formatDuration(Date.now() - s.startedAt);
109
+ const head = `/task-auto · ${s.title}`;
110
+ let detail = `planning ${s.stepNum}/${s.stepTotal} ${s.step} · ${elapsed}`;
111
+ if (s.contextUsage) {
112
+ const ctxDetail = formatContextDetail(s.contextUsage, theme);
113
+ if (ctxDetail)
114
+ detail += ` · ${ctxDetail}`;
115
+ }
116
+ const lines = [head, detail];
117
+ const trailer = lastLineTrailer(s.lastLine, theme);
118
+ if (trailer)
119
+ lines.push(trailer);
120
+ return lines;
121
+ }
122
+ /**
123
+ * Start the planning loader widget (same cadence/look as the phase widget).
124
+ * Returns a disposer that stops the refresh and clears the widget. No-op
125
+ * (returns a no-op disposer) when there's no UI.
126
+ */
127
+ export function startAutoLoader(ctx, getState) {
128
+ if (!ctx.hasUI)
129
+ return () => { };
130
+ const render = () => {
131
+ const s = getState();
132
+ try {
133
+ ctx.ui.setWidget(AUTO_WIDGET_KEY, s ? buildAutoLoaderLines(s, ctx.ui.theme) : undefined);
134
+ }
135
+ catch {
136
+ /* stale ctx */
137
+ }
138
+ };
139
+ render();
140
+ const timer = setInterval(render, WIDGET_REFRESH_MS);
141
+ timer.unref?.();
142
+ return () => {
143
+ clearInterval(timer);
144
+ try {
145
+ ctx.ui.setWidget(AUTO_WIDGET_KEY, undefined);
146
+ }
147
+ catch {
148
+ /* stale ctx */
149
+ }
150
+ };
151
+ }
94
152
  export function flashTerminalWidget(ctx, state, taskId, reason) {
95
153
  if (!ctx.hasUI)
96
154
  return;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mjasnikovs/pi-task",
3
- "version": "0.2.1",
3
+ "version": "0.2.3",
4
4
  "description": "Deterministic spec-orchestration for local models, with bundled web/docs/fetch/worker subagent tools.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",