npm - @mjasnikovs/pi-task - Versions diffs - 0.2.1 → 0.2.3 - Mend

@mjasnikovs/pi-task 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +29 -0
package/dist/index.js +2 -0
package/dist/shared/child-process.js +25 -4
package/dist/task/auto-commit.d.ts +20 -0
package/dist/task/auto-commit.js +56 -0
package/dist/task/auto-io.d.ts +17 -0
package/dist/task/auto-io.js +124 -0
package/dist/task/auto-orchestrator.d.ts +28 -0
package/dist/task/auto-orchestrator.js +318 -0
package/dist/task/auto-prompts.d.ts +15 -0
package/dist/task/auto-prompts.js +66 -0
package/dist/task/inline-markdown.d.ts +18 -0
package/dist/task/inline-markdown.js +28 -0
package/dist/task/orchestrator.d.ts +28 -0
package/dist/task/orchestrator.js +42 -9
package/dist/task/parsers.d.ts +16 -0
package/dist/task/parsers.js +70 -0
package/dist/task/phases.d.ts +2 -1
package/dist/task/phases.js +126 -100
package/dist/task/prompts.d.ts +24 -1
package/dist/task/prompts.js +40 -5
package/dist/task/widget.d.ts +19 -0
package/dist/task/widget.js +73 -15
package/package.json +1 -1

package/dist/task/phases.d.ts CHANGED Viewed

@@ -6,11 +6,12 @@ import type { ExtensionCommandContext } from '@earendil-works/pi-coding-agent';
 import { docsRaw, docsFocused } from '../workers/docs-core.js';
 import { fetchRaw, fetchFocused } from '../workers/fetch-core.js';
 import type { SearchCoreInput, SearchCoreResult } from '../workers/search-core.js';
+import { MAX_GRILL_QUESTIONS } from './prompts.js';
 import { type PhaseName } from './task-file.js';
 import { type WidgetState } from './widget.js';
 import { type AutoAnswer } from './parsers.js';
 import { type PhaseDeps } from './child-runner.js';
-export { MAX_GRILL_QUESTIONS } from './prompts.js';
+export { MAX_GRILL_QUESTIONS };
 export interface PhaseContext {
     cwd: string;
     id: string;

package/dist/task/phases.js CHANGED Viewed

@@ -10,12 +10,13 @@ import { search as defaultSearch } from '../workers/search-core.js';
 import { extractEnrichTargets } from './enrichment.js';
 import { getFileInventory } from './file-inventory.js';
 import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
-import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT } from './prompts.js';
+import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
 import { setTaskSection, updateTaskFrontMatter } from './task-file.js';
-import { parseVerifyBlock, parseGrillQuestions, parseAutoAnswer, parseVerifyToolingOutput, validateSpecShape, deriveTitle, isCritiqueClean } from './parsers.js';
+import { renderInlineMarkdown, stripInlineMarkdown } from './inline-markdown.js';
+import { parseVerifyBlock, parseGrillQuestions, parseAutoAnswer, parseVerifyToolingOutput, validateSpecShape, stripSpecPreamble, deriveTitle, isCritiqueClean } from './parsers.js';
 import { runPhaseChild, runPhaseWithLoopGuard, runWithEmphasisRetry, prependHint, USER_CANCELLED } from './child-runner.js';
 // ─── Re-export constants from their home modules ────────────────────────────
-export { MAX_GRILL_QUESTIONS } from './prompts.js';
+export { MAX_GRILL_QUESTIONS };
 // ─── Tooling helpers ─────────────────────────────────────────────────────────
 /** Extract the TOOLING section commands from a research output string. */
 export function extractToolingCommands(research) {
@@ -51,7 +52,7 @@ export function replaceToolingWithVerified(research, verifiedCommands) {
     return replaced;
 }
 // ─── Phase functions ─────────────────────────────────────────────────────────
-export const phaseRefine = (deps, raw) => runPhaseWithLoopGuard(deps, 'refine', 'read', hint => prependHint(hint, REFINE_PROMPT(raw)));
+export const phaseRefine = (deps, raw) => runPhaseWithLoopGuard(deps, 'refine', 'read', hint => prependHint(hint, appendNoThink(REFINE_PROMPT(raw))));
 export async function phaseVerifyTooling(deps, research) {
     const commands = extractToolingCommands(research);
     if (!commands || commands.length === 0) {
@@ -60,7 +61,7 @@ export async function phaseVerifyTooling(deps, research) {
     const toolingList = commands.join('\n');
     let verifyOutput;
     try {
-        verifyOutput = await runPhaseChild(deps, 'verify-tooling', 'read,bash', VERIFY_TOOLING_PROMPT(toolingList));
+        verifyOutput = await runPhaseChild(deps, 'verify-tooling', 'read,bash', appendNoThink(VERIFY_TOOLING_PROMPT(toolingList)));
     }
     catch {
         return replaceToolingWithVerified(research, commands);
@@ -157,59 +158,67 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
             deps.onChildOutput(`research (${doneCount}/4 workers done)`);
     };
     // Per-worker timing split into wait (spawn → first byte) and work (first
-    // byte → exit). When workers fan out concurrently and the upstream model
-    // API caps concurrency, the queued workers spend most of their elapsed
-    // time waiting for a slot — the wait/work split makes that visible
-    // instead of the previous Promise.all-relative wall-clock that conflated
-    // the two.
+    // byte → exit). The workers run sequentially below, so each split is a clean
+    // per-worker measurement — waitMs the worker's own cold-start, workMs its
+    // generation+tool-call cost — not a Promise.all-relative wall-clock that
+    // conflates the two.
     const recordWorker = (label, p) => p.then(r => {
         deps.recordSubStep?.(`${label} wait`, r.waitMs);
         deps.recordSubStep?.(`${label} work`, r.workMs);
         return r;
     });
-    const [files, apis, context, tooling] = await Promise.all([
-        recordWorker('worker:files', runWorker({
-            prompt: promptHeader + RESEARCH_FILES_PROMPT(refined),
-            cwd: deps.cwd,
-            signal: deps.signal,
-            spawn: deps.spawn
-        }).then(r => {
-            updateProgress();
-            return r;
-        })),
-        recordWorker('worker:apis', runWorker({
-            prompt: promptHeader + RESEARCH_APIS_PROMPT(refined),
-            cwd: deps.cwd,
-            signal: deps.signal,
-            spawn: deps.spawn
-        }).then(r => {
-            updateProgress();
-            return r;
-        })),
-        recordWorker('worker:context', runWorker({
-            prompt: promptHeader + RESEARCH_CONTEXT_PROMPT(refined),
-            cwd: deps.cwd,
-            signal: deps.signal,
-            spawn: deps.spawn,
-            // Context owns architectural understanding, not path discovery
-            // — FILES handles that. Dropping `find`/`ls` keeps the worker
-            // from spawning long enumeration loops whose output then
-            // inflates prefill on every subsequent round.
+    // Run the four workers ONE AT A TIME. Settled by an A/B on the local
+    // llama.cpp backend (single GPU, same task/model) — and the answer flips
+    // with thinking:
+    //   - thinking ON → parallel wins: long decodes batch well, 4 concurrent
+    //     finish in ~max(worker), not the sum.
+    //   - /no_think   → sequential wins: with short decodes the batching upside
+    //     is gone, but 4 concurrent streams still split the one GPU and slow
+    //     each other ~4x (context worker measured 27s solo vs 128s under load),
+    //     so summed-but-fast (~100s) beats max-of-slowed (~130s).
+    // Every worker runs /no_think (below), so sequential is the faster regime.
+    // Do NOT switch this back to Promise.all without re-running that A/B.
+    //
+    // `/no_think` is the big win: these are agentic exploration loops, and on a
+    // reasoning model the child would otherwise emit a full <think> trace at
+    // every tool step ("let me read X next…") — the single largest decode sink
+    // in the pipeline. Stripping it cut each worker's decode 3-8x in the A/B.
+    // The worker still calls as many tools as it wants; it just stops narrating
+    // between them. See appendNoThink. Result order (files, apis, context,
+    // tooling) is preserved for assembly.
+    const workerSpecs = [
+        {
+            label: 'worker:files',
+            prompt: appendNoThink(promptHeader + RESEARCH_FILES_PROMPT(refined))
+        },
+        { label: 'worker:apis', prompt: appendNoThink(promptHeader + RESEARCH_APIS_PROMPT(refined)) },
+        {
+            label: 'worker:context',
+            prompt: appendNoThink(promptHeader + RESEARCH_CONTEXT_PROMPT(refined)),
+            // Context owns architectural understanding, not path discovery —
+            // FILES handles that. Dropping `find`/`ls` keeps the worker from
+            // spawning long enumeration loops whose output then inflates
+            // prefill on every subsequent round.
             tools: 'read,grep'
-        }).then(r => {
-            updateProgress();
-            return r;
-        })),
-        recordWorker('worker:tooling', runWorker({
-            prompt: promptHeader + RESEARCH_TOOLING_PROMPT(refined),
+        },
+        {
+            label: 'worker:tooling',
+            prompt: appendNoThink(promptHeader + RESEARCH_TOOLING_PROMPT(refined))
+        }
+    ];
+    const workerResults = [];
+    for (const spec of workerSpecs) {
+        const r = await recordWorker(spec.label, runWorker({
+            prompt: spec.prompt,
             cwd: deps.cwd,
             signal: deps.signal,
-            spawn: deps.spawn
-        }).then(r => {
-            updateProgress();
-            return r;
-        }))
-    ]);
+            spawn: deps.spawn,
+            ...(spec.tools ? { tools: spec.tools } : {})
+        }));
+        updateProgress();
+        workerResults.push(r);
+    }
+    const [files, apis, context, tooling] = workerResults;
     const sections = [
         { name: 'FILES', result: files },
         { name: 'APIS', result: apis },
@@ -316,64 +325,75 @@ export async function phaseAutoAnswer(deps, refined, research, question, autoDep
     }
 }
 export async function phaseGrill(deps, ctx, widgetState, refined, research) {
-    const tGenStart = Date.now();
-    const raw = await runPhaseWithLoopGuard(deps, 'grill-gen', 'read', hint => prependHint(hint, GRILL_GEN_PROMPT(refined, research)));
-    deps.recordSubStep?.('gen', Date.now() - tGenStart);
-    const questions = parseGrillQuestions(raw);
-    if (questions.length === 0)
-        return '(no questions produced)';
-    // Auto-answers are independent — generate them concurrently before the UI
-    // loop. The user-input loop below still runs sequentially (the user can
-    // only answer one prompt at a time), but the LLM-spawning work no longer
-    // blocks each iteration. For N questions this turns ~N × cold-start time
-    // into ~1 × cold-start time.
-    const tAutoStart = Date.now();
-    let doneCount = 0;
-    widgetState.lastLine = `auto-answering 0/${questions.length} done…`;
-    const autos = await Promise.all(questions.map((q, i) => phaseAutoAnswer(deps, refined, research, q).then(r => {
-        doneCount++;
-        widgetState.lastLine = `auto-answering ${doneCount}/${questions.length} done (Q${i + 1})`;
-        return r;
-    })));
-    deps.recordSubStep?.('auto-answers', Date.now() - tAutoStart);
+    // Sequential & adaptive: ask one question at a time, feeding every answer
+    // back into the next grill-gen call so later questions react to earlier ones
+    // (drop resolved unknowns, surface forks an answer introduced). Each question
+    // still gets a research-backed auto-answer — answered cheaply (skip the user)
+    // or surfaced as a pre-filled recommendation. The model emits NONE when
+    // nothing ambiguous remains. Kept in sync with /task-auto's clarify dialog.
     const theme = ctx.ui.theme;
-    const tInputStart = Date.now();
-    const out = [];
-    for (let i = 0; i < questions.length; i++) {
-        const q = questions[i];
-        const auto = autos[i];
-        out.push(`Q${i + 1}: ${q}`);
+    const out = []; // human-facing Q&A transcript (with auto-worker debug lines)
+    const qa = []; // compact Q&A fed back into the next question
+    // Open-ended: keep asking until the model emits NONE or the user dismisses.
+    for (let n = 0;; n++) {
+        const tGenStart = Date.now();
+        const raw = await runPhaseWithLoopGuard(deps, 'grill-gen', 'read', hint => prependHint(hint, GRILL_GEN_PROMPT(refined, research, qa.join('\n'))));
+        deps.recordSubStep?.('gen', Date.now() - tGenStart);
+        const questions = parseGrillQuestions(raw);
+        if (questions.length === 0)
+            break; // NONE / nothing left to ask
+        const q = questions[0];
+        widgetState.lastLine = `auto-answering Q${n + 1}…`;
+        const tAutoStart = Date.now();
+        const auto = await phaseAutoAnswer(deps, refined, research, q);
+        deps.recordSubStep?.('auto-answer', Date.now() - tAutoStart);
+        // Render markdown (bold/code) for the displayed prompt; keep plain text
+        // for the editable default and the persisted file.
+        const shownQ = renderInlineMarkdown(q, theme);
+        const plainQ = stripInlineMarkdown(q);
+        out.push(`Q${n + 1}: ${plainQ}`);
         const rawTrim = auto.raw.trim();
         out.push(`  (auto-worker raw: ${rawTrim.length === 0 ? '(empty)' : rawTrim.replace(/\n/g, ' ⏎ ')})`);
+        let answer;
         if (auto.kind === 'answered') {
-            out.push(`A${i + 1}: ${auto.text} (auto)`);
-            continue;
-        }
-        const title = auto.suggested ?
-            `${q}\n${theme.fg('muted', 'Recommended:')}\n\n${theme.fg('text', auto.suggested)}\n\n${theme.fg('muted', 'press Enter to accept')}`
-            : `${q}\n${theme.fg('muted', '(no recommendation — please answer)')}`;
-        widgetState.lastLine = `awaiting Q${i + 1}`;
-        const a = await ctx.ui.input(title, auto.suggested);
-        if (a === undefined)
-            throw new Error(USER_CANCELLED);
-        const typed = a.trim();
-        if (typed.length === 0 && auto.suggested) {
-            out.push(`A${i + 1}: ${auto.suggested} (accepted recommendation)`);
-        }
-        else if (typed.length === 0) {
-            out.push(`A${i + 1}: (skipped)`);
+            answer = stripInlineMarkdown(auto.text);
+            out.push(`A${n + 1}: ${answer} (auto)`);
         }
         else {
-            out.push(`A${i + 1}: ${typed}`);
+            const plainSuggested = auto.suggested === undefined ? undefined : stripInlineMarkdown(auto.suggested);
+            const title = auto.suggested ?
+                `${shownQ}\n${theme.fg('muted', 'Recommended:')}\n\n${renderInlineMarkdown(auto.suggested, theme)}\n\n${theme.fg('muted', 'press Enter to accept')}`
+                : `${shownQ}\n${theme.fg('muted', '(no recommendation — please answer)')}`;
+            widgetState.lastLine = `awaiting Q${n + 1}`;
+            const a = await ctx.ui.input(title, plainSuggested);
+            if (a === undefined)
+                throw new Error(USER_CANCELLED);
+            const typed = a.trim();
+            if (typed.length === 0 && plainSuggested) {
+                answer = `${plainSuggested} (accepted recommendation)`;
+            }
+            else if (typed.length === 0) {
+                answer = '(skipped)';
+            }
+            else {
+                answer = typed;
+            }
+            out.push(`A${n + 1}: ${answer}`);
         }
+        qa.push(`Q${n + 1}: ${plainQ}\nA${n + 1}: ${answer}`);
     }
-    deps.recordSubStep?.('user input', Date.now() - tInputStart);
+    if (out.length === 0)
+        return '(no questions produced)';
     return out.join('\n');
 }
 export async function phaseCompose(deps, refined, research, qa) {
     return runWithEmphasisRetry(deps, 'compose', 'read', problem => COMPOSE_PROMPT(refined, research, qa, problem), text => {
-        const problem = validateSpecShape(text);
-        return problem ? { ok: false, problem } : { ok: true, value: text };
+        // Trim any "here's the spec:" preamble before validating, so a
+        // strippable lead-in doesn't burn a full retry — and the stored
+        // value starts at GOAL.
+        const stripped = stripSpecPreamble(text);
+        const problem = validateSpecShape(stripped);
+        return problem ? { ok: false, problem } : { ok: true, value: stripped };
     }, problem => new Error(`compose_invalid: ${problem}`));
 }
 export async function phaseCritique(deps, spec, refined, qa) {
@@ -397,7 +417,7 @@ export async function phaseCritique(deps, spec, refined, qa) {
             // Granting `read` here let it wander the repo to "verify" findings,
             // which made the supposedly-cheap pass cost as much as a rewrite
             // (observed ~133s). The judgement needs no file access.
-            verdict = await runPhaseChild(deps, 'critique-triage', '', CRITIQUE_TRIAGE_PROMPT(spec, refined, qa));
+            verdict = await runPhaseChild(deps, 'critique-triage', '', appendNoThink(CRITIQUE_TRIAGE_PROMPT(spec, refined, qa)));
         }
         catch {
             verdict = null;
@@ -411,9 +431,15 @@ export async function phaseCritique(deps, spec, refined, qa) {
     }
     const tRewrite = Date.now();
     try {
-        return await runWithEmphasisRetry(deps, 'critique', 'read', problem => CRITIQUE_PROMPT(spec, refined, qa, problem !== null, triageDefects), text => parseVerifyBlock(text) ?
-            { ok: true, value: text }
-            : { ok: false, problem: 'no_verify_block' }, () => new Error('no_verify_block'));
+        return await runWithEmphasisRetry(deps, 'critique', 'read', problem => CRITIQUE_PROMPT(spec, refined, qa, problem !== null, triageDefects), text => {
+            // The rewrite (thinking on) sometimes prepends narration before
+            // GOAL; the prompt forbids it but this validator only checks for
+            // a VERIFY block. Strip it so the delivered spec starts at GOAL.
+            const stripped = stripSpecPreamble(text);
+            return parseVerifyBlock(stripped) ?
+                { ok: true, value: stripped }
+                : { ok: false, problem: 'no_verify_block' };
+        }, () => new Error('no_verify_block'));
     }
     finally {
         deps.recordSubStep?.('rewrite', Date.now() - tRewrite);

package/dist/task/prompts.d.ts CHANGED Viewed

@@ -5,13 +5,36 @@
  * effects, trivially testable.
  */
 export declare const MAX_GRILL_QUESTIONS = 10;
+/**
+ * Qwen3 "soft switch": placing `/no_think` in the prompt disables the model's
+ * <think> reasoning trace for that turn and persists across the tool-call loop
+ * within the same child session.
+ *
+ * On a local reasoning model decode is the bottleneck (~50 t/s here) while
+ * prefill is ~10x faster, so cost is dominated by *generated* tokens. A runaway
+ * think trace can be 10k+ tokens — minutes — even when the phase's real output
+ * is a short list or a one-word verdict (an observed triage spent ~384s
+ * thinking to emit "CLEAN"). Stripping the monologue does NOT limit the model's
+ * exploration: it still calls every tool it wants and takes every step it
+ * needs — it just stops narrating between actions.
+ *
+ * We strip thinking from the mechanical / exploration phases (refine, the four
+ * research workers, verify-tooling, triage) and keep it ON for the judgment
+ * phases (compose, grill, critique rewrite) where the reasoning earns its
+ * decode cost. pi's `--thinking off` flag is a no-op for this provider
+ * (`supportsReasoningEffort: false` in models.json), so the in-prompt soft
+ * switch is the reliable control.
+ */
+export declare const NO_THINK = "/no_think";
+/** Append the Qwen3 `/no_think` soft switch to a prompt. See {@link NO_THINK}. */
+export declare function appendNoThink(prompt: string): string;
 declare const REFINE_PROMPT: (raw: string) => string;
 declare const RESEARCH_READ_ONLY_CONSTRAINT = "IMPORTANT: You are ONLY allowed to READ. Do NOT create, modify, or delete any files. Use the read, grep, find, and ls tools to inspect the repo.";
 declare const RESEARCH_FILES_PROMPT: (refined: string) => string;
 declare const RESEARCH_APIS_PROMPT: (refined: string) => string;
 declare const RESEARCH_CONTEXT_PROMPT: (refined: string) => string;
 declare const RESEARCH_TOOLING_PROMPT: (refined: string) => string;
-declare const GRILL_GEN_PROMPT: (refined: string, research: string) => string;
+declare const GRILL_GEN_PROMPT: (refined: string, research: string, priorQA: string) => string;
 declare const GRILL_AUTO_ANSWER_PROMPT: (refined: string, research: string, question: string) => string;
 declare function composeRetryEmphasis(problem: string): string;
 declare const COMPOSE_PROMPT: (refined: string, research: string, qa: string, retryProblem: string | null) => string;

package/dist/task/prompts.js CHANGED Viewed

@@ -5,6 +5,31 @@
  * effects, trivially testable.
  */
 export const MAX_GRILL_QUESTIONS = 10;
+/**
+ * Qwen3 "soft switch": placing `/no_think` in the prompt disables the model's
+ * <think> reasoning trace for that turn and persists across the tool-call loop
+ * within the same child session.
+ *
+ * On a local reasoning model decode is the bottleneck (~50 t/s here) while
+ * prefill is ~10x faster, so cost is dominated by *generated* tokens. A runaway
+ * think trace can be 10k+ tokens — minutes — even when the phase's real output
+ * is a short list or a one-word verdict (an observed triage spent ~384s
+ * thinking to emit "CLEAN"). Stripping the monologue does NOT limit the model's
+ * exploration: it still calls every tool it wants and takes every step it
+ * needs — it just stops narrating between actions.
+ *
+ * We strip thinking from the mechanical / exploration phases (refine, the four
+ * research workers, verify-tooling, triage) and keep it ON for the judgment
+ * phases (compose, grill, critique rewrite) where the reasoning earns its
+ * decode cost. pi's `--thinking off` flag is a no-op for this provider
+ * (`supportsReasoningEffort: false` in models.json), so the in-prompt soft
+ * switch is the reliable control.
+ */
+export const NO_THINK = '/no_think';
+/** Append the Qwen3 `/no_think` soft switch to a prompt. See {@link NO_THINK}. */
+export function appendNoThink(prompt) {
+    return `${prompt}\n\n${NO_THINK}`;
+}
 const REFINE_PROMPT = (raw) => `You receive a user's task description for an AI coding agent. Rewrite it to be unambiguous and actionable.
 Output structure (four sections, exact headings, in this order):
@@ -118,25 +143,35 @@ No section header. No other sections. No preamble. May be empty if no verificati
 Task:
 ${refined}`;
-const GRILL_GEN_PROMPT = (refined, research) => `You are preparing clarifying questions for the user, based on a refined task description and the research that follows.
+const GRILL_GEN_PROMPT = (refined, research, priorQA) => `You are preparing clarifying questions for the user, based on a refined task description, the research that follows, and the answers gathered so far. Ask ONE question at a time.
+Output the SINGLE most important clarifying question that REMAINS — the one whose answer most changes the work — or NONE if no genuine ambiguity is left.
 Start from the KNOWN-UNKNOWNS bullets in the task. Add any new ambiguity surfaced by the research. Drop any unknowns the research already resolved.
+ACCOUNT FOR THE ANSWERS SO FAR — read carefully:
+- Never re-ask something already answered below.
+- If an answer introduced a NEW fork or contradicts an assumption in the task/research (e.g. the user chose a tool or approach the task did not anticipate), ask about the most important consequence of that choice next.
+- Drop questions the answers have made irrelevant.
 SCOPE RULES — read carefully:
 - Questions must clarify the EXISTING scope. Do NOT propose new deliverables, enhancements, modernizations, or "while I'm here" cleanups.
 - Forbidden patterns: "should I also…", "should we modernize…", "do you want me to update X while I'm at it…", "should I integrate Y…", "would you like guidance on Z…".
 - Allowed patterns: "by 'X' do you mean A or B?", "should failure mode Y be treated as Z?", "which of <files matching the task> applies here?".
-- If the refined task + research leave no genuine ambiguity, output zero questions. Zero questions is a valid and preferred outcome. Do not pad.
+- If nothing genuinely ambiguous remains, output NONE. Zero questions is a valid and preferred outcome. Do not pad.
 Output format — read carefully:
-- If you have questions: emit them as a plain numbered list, one per line, at most ${MAX_GRILL_QUESTIONS}, no preamble.
-- If you have zero questions: emit the single literal token NONE on its own line. Do NOT emit empty output — an empty response is treated as a crash, not as "no questions". The NONE sentinel is the only way to signal an intentional empty list.
+- One question as a single numbered line: "1. ...", and nothing else.
+- If no question remains: emit the single literal token NONE on its own line. Do NOT emit empty output — an empty response is treated as a crash, not as "no questions". The NONE sentinel is the only way to signal an intentional empty list.
 Refined task:
 ${refined}
 Research:
-${research}`;
+${research}
+Answers so far:
+${priorQA.trim() || '(none yet)'}`;
 const GRILL_AUTO_ANSWER_PROMPT = (refined, research, question) => `You are pre-answering a clarifying question for an AI coding task. You have the refined task and the research notes. You can also use the read tool to open any file mentioned in the research (e.g. package.json) if it helps you answer.
 Your job is to produce a recommended default answer. If the default is one the user would almost certainly accept, you tag it ANSWER and we skip the user entirely. Otherwise you tag it UNKNOWN and we show the suggestion in the input box for the user to confirm or override.

package/dist/task/widget.d.ts CHANGED Viewed

@@ -21,6 +21,7 @@ export interface ContextSnapshot {
 }
 export type WidgetTheme = ExtensionCommandContext['ui']['theme'];
 export declare const WIDGET_KEY = "pi-tasks";
+export declare const AUTO_WIDGET_KEY = "pi-task-auto";
 export declare const WIDGET_REFRESH_MS = 500;
 export declare const WIDGET_LAST_LINE_MAX = 120;
 export declare const NOTIFY_CLEAR_MS = 3000;
@@ -34,6 +35,24 @@ export declare function formatDuration(ms: number): string;
 export declare function formatContextTokens(count: number): string;
 export declare function contextProgressBar(percent: number): string;
 export declare function contextThresholdColor(theme: WidgetTheme, percent: number, text: string): string;
+/** Render the `tokens/window [bar]` context suffix, or null when there's nothing to show. */
+export declare function formatContextDetail(usage: ContextSnapshot, theme?: WidgetTheme): string | null;
 export declare function buildWidgetLines(s: WidgetState, theme?: WidgetTheme): string[];
 export declare function startWidget(ctx: ExtensionCommandContext, getState: () => WidgetState | null): () => void;
+export interface AutoLoaderState {
+    title: string;
+    step: string;
+    stepNum: number;
+    stepTotal: number;
+    startedAt: number;
+    lastLine?: string;
+    contextUsage?: ContextSnapshot;
+}
+export declare function buildAutoLoaderLines(s: AutoLoaderState, theme?: WidgetTheme): string[];
+/**
+ * Start the planning loader widget (same cadence/look as the phase widget).
+ * Returns a disposer that stops the refresh and clears the widget. No-op
+ * (returns a no-op disposer) when there's no UI.
+ */
+export declare function startAutoLoader(ctx: ExtensionCommandContext, getState: () => AutoLoaderState | null): () => void;
 export declare function flashTerminalWidget(ctx: ExtensionCommandContext, state: Exclude<TaskState, 'pending' | 'in_progress' | 'completed'>, taskId: string, reason: string | undefined): void;

package/dist/task/widget.js CHANGED Viewed

@@ -7,6 +7,7 @@
 import { PHASE_INDEX, PHASE_ORDER } from './task-file.js';
 // ─── Constants ───────────────────────────────────────────────────────────────
 export const WIDGET_KEY = 'pi-tasks';
+export const AUTO_WIDGET_KEY = 'pi-task-auto';
 export const WIDGET_REFRESH_MS = 500;
 export const WIDGET_LAST_LINE_MAX = 120;
 export const NOTIFY_CLEAR_MS = 3000;
@@ -46,6 +47,27 @@ export function contextThresholdColor(theme, percent, text) {
         return theme.fg('warning', text);
     return text;
 }
+/** Render the `tokens/window [bar]` context suffix, or null when there's nothing to show. */
+export function formatContextDetail(usage, theme) {
+    const { tokens, contextWindow, percent } = usage;
+    if (contextWindow > 0) {
+        const text = `${formatContextTokens(tokens)}/${formatContextTokens(contextWindow)} ${contextProgressBar(percent)}`;
+        return theme ? contextThresholdColor(theme, percent, text) : text;
+    }
+    if (tokens > 0)
+        return formatContextTokens(tokens);
+    return null;
+}
+/** Render the muted `↳ lastLine` trailer (truncated), or null when there's no line. */
+function lastLineTrailer(lastLine, theme) {
+    if (!lastLine)
+        return null;
+    const t = lastLine.length > WIDGET_LAST_LINE_MAX ?
+        lastLine.slice(0, WIDGET_LAST_LINE_MAX - 1) + '…'
+        : lastLine;
+    const raw = `↳ ${t}`;
+    return theme ? theme.fg('muted', raw) : raw;
+}
 export function buildWidgetLines(s, theme) {
     const elapsed = formatDuration(Date.now() - s.startedAt);
     const head = `${s.taskId} · ${s.title}`;
@@ -54,23 +76,14 @@ export function buildWidgetLines(s, theme) {
     const stepNum = Math.min(idx + 1, total);
     let detail = `phase ${stepNum}/${total} ${s.phase} · ${elapsed}`;
     if (s.contextUsage) {
-        const { tokens, contextWindow, percent } = s.contextUsage;
-        if (contextWindow > 0) {
-            const text = `${formatContextTokens(tokens)}/${formatContextTokens(contextWindow)} ${contextProgressBar(percent)}`;
-            detail += ` · ${theme ? contextThresholdColor(theme, percent, text) : text}`;
-        }
-        else if (tokens > 0) {
-            detail += ` · ${formatContextTokens(tokens)}`;
-        }
+        const ctxDetail = formatContextDetail(s.contextUsage, theme);
+        if (ctxDetail)
+            detail += ` · ${ctxDetail}`;
     }
     const lines = [head, detail];
-    if (s.lastLine) {
-        const t = s.lastLine.length > WIDGET_LAST_LINE_MAX ?
-            s.lastLine.slice(0, WIDGET_LAST_LINE_MAX - 1) + '…'
-            : s.lastLine;
-        const raw = `↳ ${t}`;
-        lines.push(theme ? theme.fg('muted', raw) : raw);
-    }
+    const trailer = lastLineTrailer(s.lastLine, theme);
+    if (trailer)
+        lines.push(trailer);
     return lines;
 }
 // ─── Widget lifecycle ────────────────────────────────────────────────────────
@@ -91,6 +104,51 @@ export function startWidget(ctx, getState) {
     timer.unref?.();
     return () => clearInterval(timer);
 }
+export function buildAutoLoaderLines(s, theme) {
+    const elapsed = formatDuration(Date.now() - s.startedAt);
+    const head = `/task-auto · ${s.title}`;
+    let detail = `planning ${s.stepNum}/${s.stepTotal} ${s.step} · ${elapsed}`;
+    if (s.contextUsage) {
+        const ctxDetail = formatContextDetail(s.contextUsage, theme);
+        if (ctxDetail)
+            detail += ` · ${ctxDetail}`;
+    }
+    const lines = [head, detail];
+    const trailer = lastLineTrailer(s.lastLine, theme);
+    if (trailer)
+        lines.push(trailer);
+    return lines;
+}
+/**
+ * Start the planning loader widget (same cadence/look as the phase widget).
+ * Returns a disposer that stops the refresh and clears the widget. No-op
+ * (returns a no-op disposer) when there's no UI.
+ */
+export function startAutoLoader(ctx, getState) {
+    if (!ctx.hasUI)
+        return () => { };
+    const render = () => {
+        const s = getState();
+        try {
+            ctx.ui.setWidget(AUTO_WIDGET_KEY, s ? buildAutoLoaderLines(s, ctx.ui.theme) : undefined);
+        }
+        catch {
+            /* stale ctx */
+        }
+    };
+    render();
+    const timer = setInterval(render, WIDGET_REFRESH_MS);
+    timer.unref?.();
+    return () => {
+        clearInterval(timer);
+        try {
+            ctx.ui.setWidget(AUTO_WIDGET_KEY, undefined);
+        }
+        catch {
+            /* stale ctx */
+        }
+    };
+}
 export function flashTerminalWidget(ctx, state, taskId, reason) {
     if (!ctx.hasUI)
         return;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@mjasnikovs/pi-task",
-    "version": "0.2.1",
+    "version": "0.2.3",
     "description": "Deterministic spec-orchestration for local models, with bundled web/docs/fetch/worker subagent tools.",
     "type": "module",
     "main": "./dist/index.js",