npm - @mjasnikovs/pi-task - Versions diffs - 0.13.6 → 0.13.8 - Mend

@mjasnikovs/pi-task 0.13.6 → 0.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/dist/config/register.js +1 -1
package/dist/remote/push.d.ts +12 -3
package/dist/remote/push.js +63 -9
package/dist/remote/register.js +7 -3
package/dist/remote/server.d.ts +4 -2
package/dist/remote/server.js +7 -3
package/dist/remote/tailscale.d.ts +8 -2
package/dist/remote/tailscale.js +13 -6
package/dist/remote/ui-script.d.ts +3 -0
package/dist/remote/ui-script.js +804 -0
package/dist/remote/ui-styles.d.ts +1 -0
package/dist/remote/ui-styles.js +202 -0
package/dist/remote/ui.js +4 -1000
package/dist/shared/child-process.d.ts +27 -0
package/dist/shared/child-process.js +151 -139
package/dist/task/auto-orchestrator.js +43 -13
package/dist/task/auto-prompts.d.ts +4 -3
package/dist/task/auto-prompts.js +9 -6
package/dist/task/child-runner.js +1 -1
package/dist/task/context-usage.d.ts +16 -0
package/dist/task/context-usage.js +22 -0
package/dist/task/external-context.d.ts +27 -0
package/dist/task/external-context.js +93 -0
package/dist/task/failure-classifier.js +1 -1
package/dist/task/orchestrator.js +7 -13
package/dist/task/parsers.d.ts +4 -15
package/dist/task/parsers.js +48 -87
package/dist/task/phases.d.ts +5 -7
package/dist/task/phases.js +29 -84
package/dist/task/prompts.d.ts +1 -0
package/dist/task/prompts.js +9 -0
package/dist/task/spec-validation.d.ts +23 -0
package/dist/task/spec-validation.js +90 -0
package/dist/task/widget.d.ts +1 -1
package/dist/task/widget.js +1 -1
package/dist/workers/html-clean.js +7 -4
package/dist/workers/pi-worker-docs.js +69 -58
package/dist/workers/pi-worker-fetch.js +25 -21
package/dist/workers/pi-worker-search.js +7 -13
package/dist/workers/pi-worker.js +8 -14
package/dist/workers/shared.d.ts +40 -0
package/dist/workers/shared.js +31 -0
package/package.json +1 -1

package/dist/task/external-context.js ADDED Viewed

@@ -0,0 +1,93 @@
+/**
+ * External-context enrichment — extract packages / URLs / services from the
+ * refined spec, fan out to docs / fetch / search workers, and assemble the
+ * `EXTERNAL CONTEXT` block the research phase prepends to every worker prompt.
+ *
+ * Split out of phases.ts so the research phase reads as "gather context → run
+ * probes → assemble", and so this fan-out has its own test surface separate
+ * from the four research workers. `enrichment.ts` stays a pure parser; the I/O
+ * lives here.
+ */
+import { docsRaw } from '../workers/docs-core.js';
+import { fetchRaw } from '../workers/fetch-core.js';
+import { formatNpmVersionSection } from '../workers/npm-version.js';
+import { search as defaultSearch } from '../workers/search-core.js';
+import { extractEnrichTargets } from './enrichment.js';
+import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
+/**
+ * Returns the `EXTERNAL CONTEXT\n…\n\n` block for the refined spec, or `''` when
+ * there is nothing to enrich (no targets, or every lookup failed).
+ */
+export async function gatherExternalContext(refined, deps, researchDeps = {}) {
+    const docsRawFn = researchDeps.docsRaw ?? docsRaw;
+    const fetchRawFn = researchDeps.fetchRaw ?? fetchRaw;
+    const searchFn = researchDeps.searchFn ?? defaultSearch;
+    const enrichTargets = extractEnrichTargets(refined);
+    if (enrichTargets.packages.length === 0
+        && enrichTargets.urls.length === 0
+        && enrichTargets.services.length === 0) {
+        return '';
+    }
+    const enrichSections = [];
+    const tEnrichStart = Date.now();
+    const [docsResults, fetchResults, serviceResults] = await Promise.all([
+        Promise.all(enrichTargets.packages.map(pkg => docsRawFn({
+            pkg,
+            query: refined.split('\n').find(l => l.trim()) ?? refined,
+            cwd: deps.cwd,
+            signal: deps.signal
+        }).catch(() => null))),
+        Promise.all(enrichTargets.urls.map(url => fetchRawFn({ url, signal: deps.signal }).catch(() => null))),
+        Promise.all(enrichTargets.services.map(s => searchFn({
+            query: `${s.name} ${s.query}`,
+            count: 3,
+            signal: deps.signal
+        }).catch(() => null)))
+    ]);
+    // npm version blocks come from docsRaw's bundled lookup and lead the
+    // section so the model anchors on live version data before reading
+    // the docs body.
+    for (let i = 0; i < enrichTargets.packages.length; i++) {
+        const v = docsResults[i]?.npmVersion;
+        if (v)
+            enrichSections.push(formatNpmVersionSection(v));
+    }
+    for (let i = 0; i < enrichTargets.packages.length; i++) {
+        const r = docsResults[i];
+        if (r?.kind === 'ok' && r.chunks.length > 0) {
+            const body = r.chunks
+                .map(c => c.content)
+                .join('\n\n')
+                .slice(0, 4000);
+            enrichSections.push(`### docs: ${enrichTargets.packages[i]}\n${body}`);
+        }
+    }
+    for (let i = 0; i < enrichTargets.urls.length; i++) {
+        const r = fetchResults[i];
+        if (r) {
+            enrichSections.push(`### url: ${enrichTargets.urls[i]}\n${r.markdown.slice(0, 4000)}`);
+        }
+    }
+    const skipped = [];
+    for (let i = 0; i < enrichTargets.services.length; i++) {
+        const s = enrichTargets.services[i];
+        const r = serviceResults[i];
+        if (r === null)
+            continue;
+        if (r.kind === 'no_key') {
+            skipped.push(s.name);
+            continue;
+        }
+        if (r.kind === 'error')
+            continue;
+        // kind === 'ok'
+        enrichSections.push(formatServiceBlock(s.name, `${s.name} ${s.query}`, r.results));
+    }
+    if (skipped.length > 0) {
+        enrichSections.push(formatFreshnessSkippedBlock(skipped));
+    }
+    deps.recordSubStep?.('enrichment', Date.now() - tEnrichStart);
+    if (enrichSections.length === 0)
+        return '';
+    return `EXTERNAL CONTEXT\n${enrichSections.join('\n\n')}\n\n`;
+}

package/dist/task/failure-classifier.js CHANGED Viewed

@@ -2,7 +2,7 @@
  * Failure classification — map runtime errors to task state transitions,
  * widget flash messages, and user notifications.
  */
-import { updateTaskFrontMatter } from './task-file.js';
+import { updateTaskFrontMatter } from './task-io.js';
 import { flashTerminalWidget } from './widget.js';
 import { LoopExhaustedError, LeakedToolCallError, USER_CANCELLED } from './child-runner.js';
 // ─── Classifier ──────────────────────────────────────────────────────────────

package/dist/task/orchestrator.js CHANGED Viewed

@@ -19,11 +19,14 @@ import * as fsp from 'node:fs/promises';
 import * as path from 'node:path';
 import { PHASES, postCommitPhase } from './phases.js';
 import { handleFailure } from './failure-classifier.js';
-import { PHASE_INDEX, PHASE_ORDER, allocateTaskId, ensureTasksDir, normaliseTaskId, parseFrontMatter, readSection, readTaskFile, setTaskSection, taskFilePath, tasksDir, updateTaskFrontMatter, writeTaskFile, extractSection, RESUMABLE_STATES } from './task-file.js';
+import { PHASE_INDEX, PHASE_ORDER, RESUMABLE_STATES } from './task-types.js';
+import { normaliseTaskId, parseFrontMatter, extractSection } from './task-parsers.js';
+import { allocateTaskId, ensureTasksDir, readSection, readTaskFile, setTaskSection, taskFilePath, tasksDir, updateTaskFrontMatter, writeTaskFile } from './task-io.js';
 import { startWidget } from './widget.js';
 import { publishViewer, publishNotify, registerBridgeCommand, getBridge } from '../remote/bridge.js';
-import { parseVerifyBlock } from './parsers.js';
+import { parseVerifyBlock } from './spec-validation.js';
 import { formatTimings } from './timings.js';
+import { getParentContextWindow, resolveContextUsage } from './context-usage.js';
 // ─── Module-level state ──────────────────────────────────────────────────────
 let activeTask = null;
 /** Set the module-level active task (avoids `this` aliasing in TaskRunner.run). */
@@ -77,7 +80,7 @@ export class TaskRunner {
             phase: 'refine',
             startedAt: this._startedAt
         };
-        const parentContextWindow = ctx.model?.contextWindow ?? 0;
+        const parentContextWindow = getParentContextWindow(ctx);
         this._deps = {
             cwd,
             taskId: '',
@@ -87,16 +90,7 @@ export class TaskRunner {
                 this._widgetState.lastLine = line;
             },
             onContextUsage: snapshot => {
-                const prev = this._widgetState.contextUsage;
-                const cw = snapshot.contextWindow > 0 ?
-                    snapshot.contextWindow
-                    : prev?.contextWindow || parentContextWindow;
-                const percent = cw > 0 ? Math.min(100, (snapshot.tokens / cw) * 100) : snapshot.percent;
-                this._widgetState.contextUsage = {
-                    tokens: snapshot.tokens,
-                    contextWindow: cw,
-                    percent
-                };
+                this._widgetState.contextUsage = resolveContextUsage(snapshot, this._widgetState.contextUsage, parentContextWindow);
             },
             recordSubStep: (label, ms) => {
                 if (this._currentPhaseChildren) {

package/dist/task/parsers.d.ts CHANGED Viewed

@@ -3,9 +3,6 @@
  *
  * Pure functions that parse raw model output into structured data.
  */
-export interface VerifyCommand {
-    raw: string;
-}
 export type AutoAnswer = {
     kind: 'answered';
     text: string;
@@ -20,12 +17,15 @@ export type AutoAnswer = {
 export interface ClarifyQuestion {
     question: string;
     suggested?: string;
+    /** Secondary option for a binary "A or B?" fork; mirrors grill's ALT line. */
+    alt?: string;
 }
 export declare const GRILL_LINE_RE: RegExp;
 export declare const SUGGESTED_LINE_RE: RegExp;
-export declare function parseVerifyBlock(spec: string): VerifyCommand[] | null;
+export declare const ALT_LINE_RE: RegExp;
 export declare function parseGrillQuestions(raw: string): string[];
 export declare function parseClarifyList(raw: string): ClarifyQuestion[];
+export declare function autoAnswerHasTag(raw: string): boolean;
 export declare function parseAutoAnswer(raw: string): AutoAnswer;
 export declare function parseVerifyToolingOutput(output: string): {
     verified: string[];
@@ -34,15 +34,4 @@ export declare function parseVerifyToolingOutput(output: string): {
         reason: string;
     }>;
 };
-export declare function isCritiqueClean(text: string): boolean;
-/**
- * Drop any preamble the model emitted before the spec's GOAL header. The
- * thinking model sometimes narrates ("Now I have all the context. Here's the
- * rewritten spec:") before GOAL — the prompts forbid it, but the critique
- * validator only checks for a VERIFY block, so it leaked into the delivered
- * spec. We slice from the first line that begins a GOAL section so the spec
- * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
- */
-export declare function stripSpecPreamble(spec: string): string;
-export declare function validateSpecShape(spec: string): string | null;
 export declare function deriveTitle(refined: string): string;

package/dist/task/parsers.js CHANGED Viewed

@@ -7,31 +7,7 @@ import { MAX_GRILL_QUESTIONS } from './phases.js';
 // ─── Constants ───────────────────────────────────────────────────────────────
 export const GRILL_LINE_RE = /^\s*\d+[.)]\s+(.+)$/;
 export const SUGGESTED_LINE_RE = /^\s*SUGGESTED:\s*(.*)$/i;
-// ─── Verify block parser ─────────────────────────────────────────────────────
-export function parseVerifyBlock(spec) {
-    const lines = spec.split('\n');
-    let i = 0;
-    while (i < lines.length && !/^VERIFY:\s*$/.test(lines[i]))
-        i++;
-    if (i >= lines.length)
-        return null;
-    i++;
-    while (i < lines.length && lines[i].trim() === '')
-        i++;
-    if (i >= lines.length)
-        return null;
-    if (!/^```(sh|bash)?\s*$/.test(lines[i]))
-        return null;
-    i++;
-    const cmds = [];
-    while (i < lines.length && !/^```\s*$/.test(lines[i])) {
-        const line = lines[i].trim();
-        if (line.length > 0 && !line.startsWith('#'))
-            cmds.push({ raw: line });
-        i++;
-    }
-    return cmds;
-}
+export const ALT_LINE_RE = /^\s*ALT:\s*(.*)$/i;
 // ─── Grill questions parser ──────────────────────────────────────────────────
 // The grill-gen prompt instructs the worker to emit the literal token `NONE`
 // when it has zero questions, so the runner's empty-output guard can still
@@ -55,14 +31,32 @@ export function parseGrillQuestions(raw) {
 // (e.g. "1. ...so this must be resolved. SUGGESTED: use polling.") rather than
 // on its own line.
 const INLINE_SUGGESTED_RE = /\bSUGGESTED:\s*/i;
-/** Split a question line's text into the question and any inline SUGGESTED default. */
+const INLINE_ALT_RE = /\bALT:\s*/i;
+/**
+ * Split a question line's text into the question, any inline SUGGESTED default,
+ * and any inline ALT secondary option (the model may write both on one line:
+ * "1. A or B? SUGGESTED: A ALT: B").
+ */
 function splitInlineSuggested(text) {
     const m = INLINE_SUGGESTED_RE.exec(text);
     if (!m)
         return { question: text.trim() };
     const question = text.slice(0, m.index).trim();
-    const suggested = text.slice(m.index + m[0].length).trim();
-    return suggested.length > 0 ? { question, suggested } : { question };
+    let rest = text.slice(m.index + m[0].length);
+    let alt;
+    const altM = INLINE_ALT_RE.exec(rest);
+    if (altM) {
+        const altText = rest.slice(altM.index + altM[0].length).trim();
+        if (altText.length > 0)
+            alt = altText;
+        rest = rest.slice(0, altM.index);
+    }
+    const suggested = rest.trim();
+    return {
+        question,
+        ...(suggested.length > 0 && { suggested }),
+        ...(alt !== undefined && { alt })
+    };
 }
 // Parses the /task-auto clarify output: a numbered question list where each
 // question carries a "SUGGESTED: <default>" recommendation — either on its own
@@ -92,11 +86,28 @@ export function parseClarifyList(raw) {
             if (suggested.length > 0 && last.suggested === undefined) {
                 last.suggested = suggested;
             }
+            continue;
+        }
+        const altLine = ALT_LINE_RE.exec(line);
+        if (altLine && out.length > 0) {
+            const alt = altLine[1].trim();
+            const last = out[out.length - 1];
+            if (alt.length > 0 && last.alt === undefined) {
+                last.alt = alt;
+            }
         }
     }
     return out;
 }
 // ─── Auto-answer parser ──────────────────────────────────────────────────────
+// Did the model use one of the required output tags (ANSWER/UNKNOWN/ALT,
+// tolerating the same ANSWER misspellings parseAutoAnswer accepts)? When no tag
+// is present the model ignored the format and wrote free-form prose, so the
+// caller reprompts instead of trusting parseAutoAnswer's lenient salvage.
+const AUTO_ANSWER_TAG_RE = /^\s*(AN[SW]{1,3}E?R|UNKNOWN|ALT)\s*:/im;
+export function autoAnswerHasTag(raw) {
+    return AUTO_ANSWER_TAG_RE.test(raw);
+}
 export function parseAutoAnswer(raw) {
     const lines = raw
         .split('\n')
@@ -140,8 +151,15 @@ export function parseAutoAnswer(raw) {
             raw
         };
     }
-    if (lines.length > 0)
-        return { kind: 'unknown', suggested: lines[0], raw };
+    // Last-resort salvage: the model emitted no tag at all. Take the first line
+    // that reads like an answer, NOT a preamble — a trailing colon marks a
+    // heading ("Here's the analysis:") that introduces prose rather than
+    // recommending anything. Surfacing such a line as the recommendation is the
+    // "wrong format" the user sees; better to offer no default and let them
+    // answer than to pre-fill a meaningless preamble.
+    const salvaged = lines.find(l => !l.endsWith(':'));
+    if (salvaged)
+        return { kind: 'unknown', suggested: salvaged, raw };
     return { kind: 'unknown', raw };
 }
 // ─── Verify tooling output parser ────────────────────────────────────────────
@@ -173,63 +191,6 @@ export function parseVerifyToolingOutput(output) {
     }
     return { verified, rejected };
 }
-// ─── Critique triage parser ──────────────────────────────────────────────────
-// The critique-triage prompt instructs the worker to emit the literal token
-// `CLEAN` on its own line when the compose draft has no substantive defects, so
-// we can skip the expensive full-rewrite pass. Anything else is treated as a
-// defect list that gets fed into the rewrite. Empty output is NOT clean — that
-// would be a silent crash, and treating it as clean would skip review entirely.
-export function isCritiqueClean(text) {
-    const firstLine = text
-        .split('\n')
-        .map(l => l.trim())
-        .find(l => l.length > 0);
-    if (!firstLine)
-        return false;
-    return /^CLEAN[.!]?$/i.test(firstLine);
-}
-// ─── Spec shape validator ────────────────────────────────────────────────────
-/**
- * Drop any preamble the model emitted before the spec's GOAL header. The
- * thinking model sometimes narrates ("Now I have all the context. Here's the
- * rewritten spec:") before GOAL — the prompts forbid it, but the critique
- * validator only checks for a VERIFY block, so it leaked into the delivered
- * spec. We slice from the first line that begins a GOAL section so the spec
- * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
- */
-export function stripSpecPreamble(spec) {
-    const lines = spec.split('\n');
-    const idx = lines.findIndex(l => /^GOAL\b/i.test(l));
-    if (idx <= 0)
-        return spec;
-    // Only strip plain narration. If the lead-in is a markdown fence or a
-    // cat-heredoc wrapper, leave it untouched — that's a malformation
-    // validateSpecShape must reject (and compose must retry on), not something
-    // to silently unwrap into a passing spec.
-    const preamble = lines.slice(0, idx);
-    if (preamble.some(l => /^\s*```/.test(l) || /^\s*cat\s*<</.test(l)))
-        return spec;
-    return lines.slice(idx).join('\n');
-}
-export function validateSpecShape(spec) {
-    const trimmed = spec.trim();
-    if (trimmed.length === 0)
-        return 'spec is empty';
-    const firstLine = trimmed.split('\n', 1)[0];
-    if (/^\s*```/.test(firstLine))
-        return 'spec starts with a markdown fence';
-    if (/^\s*cat\s*<<\s*['"]?[A-Za-z_][A-Za-z0-9_]*['"]?/.test(firstLine)) {
-        return 'spec is wrapped in a cat heredoc';
-    }
-    if (!/^GOAL\b/i.test(trimmed))
-        return 'spec does not start with GOAL';
-    for (const section of ['CONSTRAINTS', 'ACCEPTANCE', 'VERIFY']) {
-        if (!new RegExp(`^\\s*${section}\\b`, 'm').test(trimmed)) {
-            return `spec missing required section: ${section}`;
-        }
-    }
-    return null;
-}
 // ─── Title derivation ────────────────────────────────────────────────────────
 export function deriveTitle(refined) {
     const stripBold = (s) => s.replace(/^\*+|\*+$/g, '').trim();

package/dist/task/phases.d.ts CHANGED Viewed

@@ -3,11 +3,12 @@
  * critique) plus the config table that drives the orchestrator loop.
  */
 import type { ExtensionCommandContext } from '@earendil-works/pi-coding-agent';
-import { docsRaw, docsFocused } from '../workers/docs-core.js';
-import { fetchRaw, fetchFocused } from '../workers/fetch-core.js';
+import { docsFocused } from '../workers/docs-core.js';
+import { fetchFocused } from '../workers/fetch-core.js';
 import type { SearchCoreInput, SearchCoreResult } from '../workers/search-core.js';
+import { type ExternalContextDeps } from './external-context.js';
 import { MAX_GRILL_QUESTIONS } from './prompts.js';
-import { type PhaseName } from './task-file.js';
+import { type PhaseName } from './task-types.js';
 import { type WidgetState } from './widget.js';
 import { type AutoAnswer } from './parsers.js';
 import { type PhaseDeps } from './child-runner.js';
@@ -36,11 +37,8 @@ export declare function extractToolingCommands(research: string): string[] | nul
 export declare function replaceToolingWithVerified(research: string, verifiedCommands: string[]): string;
 export declare const phaseRefine: (deps: PhaseDeps, raw: string) => Promise<string>;
 export declare function phaseVerifyTooling(deps: PhaseDeps, research: string): Promise<string>;
-export interface PhaseResearchDeps {
-    docsRaw?: typeof docsRaw;
-    fetchRaw?: typeof fetchRaw;
+export interface PhaseResearchDeps extends ExternalContextDeps {
     getFileInventory?: (cwd: string, signal?: AbortSignal) => Promise<string>;
-    searchFn?: (input: SearchCoreInput) => Promise<SearchCoreResult>;
 }
 export declare function phaseResearch(deps: PhaseDeps, refined: string, researchDeps?: PhaseResearchDeps): Promise<string>;
 export interface PhaseAutoAnswerDeps {

package/dist/task/phases.js CHANGED Viewed

@@ -2,18 +2,20 @@
  * Phase pipeline — the five phase functions (refine, research, grill, compose,
  * critique) plus the config table that drives the orchestrator loop.
  */
-import { docsRaw, docsFocused } from '../workers/docs-core.js';
-import { fetchRaw, fetchFocused } from '../workers/fetch-core.js';
+import { docsFocused } from '../workers/docs-core.js';
+import { fetchFocused } from '../workers/fetch-core.js';
 import { formatNpmVersionSection } from '../workers/npm-version.js';
 import { runWorker } from '../workers/pi-worker-core.js';
 import { search as defaultSearch } from '../workers/search-core.js';
 import { extractEnrichTargets } from './enrichment.js';
 import { getFileInventory } from './file-inventory.js';
 import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
-import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
-import { setTaskSection, updateTaskFrontMatter } from './task-file.js';
+import { gatherExternalContext } from './external-context.js';
+import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, GRILL_AUTO_FORMAT_HINT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
+import { setTaskSection, updateTaskFrontMatter } from './task-io.js';
 import { renderInlineMarkdown, stripInlineMarkdown } from './inline-markdown.js';
-import { parseVerifyBlock, parseGrillQuestions, parseAutoAnswer, parseVerifyToolingOutput, validateSpecShape, stripSpecPreamble, deriveTitle, isCritiqueClean } from './parsers.js';
+import { parseGrillQuestions, parseAutoAnswer, autoAnswerHasTag, parseVerifyToolingOutput, deriveTitle } from './parsers.js';
+import { parseVerifyBlock, validateSpecShape, stripSpecPreamble, isCritiqueClean } from './spec-validation.js';
 import { runPhaseChild, runPhaseWithLoopGuard, runWithEmphasisRetry, prependHint, USER_CANCELLED } from './child-runner.js';
 import { SessionUI } from '../remote/bridge.js';
 // ─── Re-export constants from their home modules ────────────────────────────
@@ -76,75 +78,8 @@ export async function phaseVerifyTooling(deps, research) {
 }
 const DOCS_EXTENSION_PATH = new URL('../workers/docs-extension.js', import.meta.url).pathname;
 export async function phaseResearch(deps, refined, researchDeps = {}) {
-    const docsRawFn = researchDeps.docsRaw ?? docsRaw;
-    const fetchRawFn = researchDeps.fetchRaw ?? fetchRaw;
     const fileInventoryFn = researchDeps.getFileInventory ?? getFileInventory;
-    const searchFn = researchDeps.searchFn ?? defaultSearch;
-    const enrichTargets = extractEnrichTargets(refined);
-    const enrichSections = [];
-    if (enrichTargets.packages.length > 0
-        || enrichTargets.urls.length > 0
-        || enrichTargets.services.length > 0) {
-        const tEnrichStart = Date.now();
-        const [docsResults, fetchResults, serviceResults] = await Promise.all([
-            Promise.all(enrichTargets.packages.map(pkg => docsRawFn({
-                pkg,
-                query: refined.split('\n').find(l => l.trim()) ?? refined,
-                cwd: deps.cwd,
-                signal: deps.signal
-            }).catch(() => null))),
-            Promise.all(enrichTargets.urls.map(url => fetchRawFn({ url, signal: deps.signal }).catch(() => null))),
-            Promise.all(enrichTargets.services.map(s => searchFn({
-                query: `${s.name} ${s.query}`,
-                count: 3,
-                signal: deps.signal
-            }).catch(() => null)))
-        ]);
-        // npm version blocks come from docsRaw's bundled lookup and lead the
-        // section so the model anchors on live version data before reading
-        // the docs body.
-        for (let i = 0; i < enrichTargets.packages.length; i++) {
-            const v = docsResults[i]?.npmVersion;
-            if (v)
-                enrichSections.push(formatNpmVersionSection(v));
-        }
-        for (let i = 0; i < enrichTargets.packages.length; i++) {
-            const r = docsResults[i];
-            if (r?.kind === 'ok' && r.chunks.length > 0) {
-                const body = r.chunks
-                    .map(c => c.content)
-                    .join('\n\n')
-                    .slice(0, 4000);
-                enrichSections.push(`### docs: ${enrichTargets.packages[i]}\n${body}`);
-            }
-        }
-        for (let i = 0; i < enrichTargets.urls.length; i++) {
-            const r = fetchResults[i];
-            if (r) {
-                enrichSections.push(`### url: ${enrichTargets.urls[i]}\n${r.markdown.slice(0, 4000)}`);
-            }
-        }
-        const skipped = [];
-        for (let i = 0; i < enrichTargets.services.length; i++) {
-            const s = enrichTargets.services[i];
-            const r = serviceResults[i];
-            if (r === null)
-                continue;
-            if (r.kind === 'no_key') {
-                skipped.push(s.name);
-                continue;
-            }
-            if (r.kind === 'error')
-                continue;
-            // kind === 'ok'
-            enrichSections.push(formatServiceBlock(s.name, `${s.name} ${s.query}`, r.results));
-        }
-        if (skipped.length > 0) {
-            enrichSections.push(formatFreshnessSkippedBlock(skipped));
-        }
-        deps.recordSubStep?.('enrichment', Date.now() - tEnrichStart);
-    }
-    const externalContext = enrichSections.length > 0 ? `EXTERNAL CONTEXT\n${enrichSections.join('\n\n')}\n\n` : '';
+    const externalContext = await gatherExternalContext(refined, deps, researchDeps);
     // Pre-compute the project file inventory once and hand it to every worker.
     // Workers can then jump straight to targeted read/grep on known paths
     // instead of each spawning its own discovery loop (find/ls). A '' result
@@ -156,8 +91,9 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
     let doneCount = 0;
     const updateProgress = () => {
         doneCount++;
-        if (deps.onChildOutput)
-            deps.onChildOutput(`research (${doneCount}/4 workers done)`);
+        if (deps.onChildOutput) {
+            deps.onChildOutput(`research (${doneCount}/${workerSpecs.length} workers done)`);
+        }
     };
     // Per-worker timing split into wait (spawn → first byte) and work (first
     // byte → exit). The workers run sequentially below, so each split is a clean
@@ -190,16 +126,19 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
     // tooling) is preserved for assembly.
     const workerSpecs = [
         {
+            section: 'FILES',
             label: 'worker:files',
             prompt: appendNoThink(promptHeader + RESEARCH_FILES_PROMPT(refined))
         },
         {
+            section: 'APIS',
             label: 'worker:apis',
             prompt: appendNoThink(promptHeader + RESEARCH_APIS_PROMPT(refined)),
             tools: 'read,grep,find,ls,pi-worker-docs',
             extensions: [DOCS_EXTENSION_PATH]
         },
         {
+            section: 'CONTEXT',
             label: 'worker:context',
             prompt: appendNoThink(promptHeader + RESEARCH_CONTEXT_PROMPT(refined)),
             // Context owns architectural understanding, not path discovery —
@@ -209,6 +148,7 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
             tools: 'read,grep'
         },
         {
+            section: 'TOOLING',
             label: 'worker:tooling',
             prompt: appendNoThink(promptHeader + RESEARCH_TOOLING_PROMPT(refined))
         }
@@ -234,13 +174,10 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
         updateProgress();
         workerResults.push(r);
     }
-    const [files, apis, context, tooling] = workerResults;
-    const sections = [
-        { name: 'FILES', result: files },
-        { name: 'APIS', result: apis },
-        { name: 'CONTEXT', result: context },
-        { name: 'TOOLING', result: tooling }
-    ];
+    // Validate + assemble by mapping spec.section over the results, so adding or
+    // reordering a worker is a single edit to workerSpecs (the result order
+    // mirrors workerSpecs order — the loop above pushes in sequence).
+    const sections = workerSpecs.map((spec, i) => ({ name: spec.section, result: workerResults[i] }));
     for (const { name, result } of sections) {
         if (result.exitCode !== 0) {
             throw new Error(`Research ${name} worker failed (exit ${result.exitCode}): ${result.stderr.slice(-500)}`);
@@ -253,7 +190,7 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
                 + `(${result.leakedToolCall.trim()}) — it never ran`);
         }
     }
-    return `FILES\n${files.text}\n\nAPIS\n${apis.text}\n\nCONTEXT\n${context.text}\n\nTOOLING\n${tooling.text}`;
+    return sections.map(({ name, result }) => `${name}\n${result.text}`).join('\n\n');
 }
 export async function phaseAutoAnswer(deps, refined, research, question, autoDeps = {}) {
     const docsFocusedFn = autoDeps.docsFocused ?? docsFocused;
@@ -336,7 +273,15 @@ export async function phaseAutoAnswer(deps, refined, research, question, autoDep
         const externalContext = contextSections.length > 0 ?
             `EXTERNAL CONTEXT\n${contextSections.join('\n\n')}\n\n`
             : '';
-        const text = await runPhaseChild(deps, 'grill-auto', 'read', externalContext + GRILL_AUTO_ANSWER_PROMPT(refined, research, question));
+        const basePrompt = externalContext + GRILL_AUTO_ANSWER_PROMPT(refined, research, question);
+        let text = await runPhaseChild(deps, 'grill-auto', 'read', basePrompt);
+        if (!autoAnswerHasTag(text)) {
+            // The model ignored the ANSWER/UNKNOWN/ALT format and wrote prose
+            // (typically an "analysis" preamble). Reprompt once, forcing the
+            // tagged form, before falling back to parseAutoAnswer's salvage —
+            // otherwise a preamble line leaks out as the recommended answer.
+            text = await runPhaseChild(deps, 'grill-auto', 'read', prependHint(GRILL_AUTO_FORMAT_HINT, basePrompt));
+        }
         return parseAutoAnswer(text);
     }
     catch (err) {

package/dist/task/prompts.d.ts CHANGED Viewed

@@ -36,6 +36,7 @@ declare const RESEARCH_CONTEXT_PROMPT: (refined: string) => string;
 declare const RESEARCH_TOOLING_PROMPT: (refined: string) => string;
 declare const GRILL_GEN_PROMPT: (refined: string, research: string, priorQA: string) => string;
 declare const GRILL_AUTO_ANSWER_PROMPT: (refined: string, research: string, question: string) => string;
+export declare const GRILL_AUTO_FORMAT_HINT: string;
 declare function composeRetryEmphasis(problem: string): string;
 declare const COMPOSE_PROMPT: (refined: string, research: string, qa: string, retryProblem: string | null) => string;
 declare const CRITIQUE_TRIAGE_PROMPT: (spec: string, refined: string, qa: string) => string;

package/dist/task/prompts.js CHANGED Viewed

@@ -215,6 +215,15 @@ Research:
 ${research}
 Question: ${question}`;
+// Reprompt prefix when grill-auto's first reply ignored the tagged output format
+// and wrote free-form prose (an "analysis" preamble). Forces the terse form so a
+// real recommendation reaches the user instead of a leaked preamble line.
+export const GRILL_AUTO_FORMAT_HINT = '[SYSTEM NOTE: Your previous reply did NOT follow the required format — it had no '
+    + 'ANSWER:, UNKNOWN:, or ALT: line and read as free-form prose. Do not explain or '
+    + 'analyse. Output ONLY the tagged lines and nothing else. For a binary "A or B?" '
+    + 'question emit two lines:\nUNKNOWN: <primary option>\nALT: <alternative>\n'
+    + 'For a safe default the user would accept without thinking, emit one ANSWER: line. '
+    + 'No preamble, no markdown.]';
 function composeRetryEmphasis(problem) {
     if (problem === 'spec does not start with GOAL'
         || problem === 'spec starts with a markdown fence'

package/dist/task/spec-validation.d.ts ADDED Viewed

@@ -0,0 +1,23 @@
+/**
+ * Spec gate — the guards that decide whether a composed spec is acceptable at
+ * handoff. Unlike the informational parsers in parsers.ts, these answer a
+ * yes/no (or "what's wrong") question the orchestrator and critique phase act
+ * on: is the VERIFY block runnable, is the shape well-formed, did critique come
+ * back CLEAN. Self-contained (no imports) so the gate doesn't drag in the phase
+ * pipeline.
+ */
+export interface VerifyCommand {
+    raw: string;
+}
+export declare function parseVerifyBlock(spec: string): VerifyCommand[] | null;
+export declare function isCritiqueClean(text: string): boolean;
+/**
+ * Drop any preamble the model emitted before the spec's GOAL header. The
+ * thinking model sometimes narrates ("Now I have all the context. Here's the
+ * rewritten spec:") before GOAL — the prompts forbid it, but the critique
+ * validator only checks for a VERIFY block, so it leaked into the delivered
+ * spec. We slice from the first line that begins a GOAL section so the spec
+ * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
+ */
+export declare function stripSpecPreamble(spec: string): string;
+export declare function validateSpecShape(spec: string): string | null;