@mjasnikovs/pi-task 0.13.6 → 0.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/config/register.js +1 -1
  2. package/dist/remote/push.d.ts +12 -3
  3. package/dist/remote/push.js +63 -9
  4. package/dist/remote/register.js +7 -3
  5. package/dist/remote/server.d.ts +4 -2
  6. package/dist/remote/server.js +7 -3
  7. package/dist/remote/tailscale.d.ts +8 -2
  8. package/dist/remote/tailscale.js +13 -6
  9. package/dist/remote/ui-script.d.ts +3 -0
  10. package/dist/remote/ui-script.js +804 -0
  11. package/dist/remote/ui-styles.d.ts +1 -0
  12. package/dist/remote/ui-styles.js +202 -0
  13. package/dist/remote/ui.js +4 -1000
  14. package/dist/shared/child-process.d.ts +27 -0
  15. package/dist/shared/child-process.js +151 -139
  16. package/dist/task/auto-orchestrator.js +43 -13
  17. package/dist/task/auto-prompts.d.ts +4 -3
  18. package/dist/task/auto-prompts.js +9 -6
  19. package/dist/task/child-runner.js +1 -1
  20. package/dist/task/context-usage.d.ts +16 -0
  21. package/dist/task/context-usage.js +22 -0
  22. package/dist/task/external-context.d.ts +27 -0
  23. package/dist/task/external-context.js +93 -0
  24. package/dist/task/failure-classifier.js +1 -1
  25. package/dist/task/orchestrator.js +7 -13
  26. package/dist/task/parsers.d.ts +4 -15
  27. package/dist/task/parsers.js +48 -87
  28. package/dist/task/phases.d.ts +5 -7
  29. package/dist/task/phases.js +29 -84
  30. package/dist/task/prompts.d.ts +1 -0
  31. package/dist/task/prompts.js +9 -0
  32. package/dist/task/spec-validation.d.ts +23 -0
  33. package/dist/task/spec-validation.js +90 -0
  34. package/dist/task/widget.d.ts +1 -1
  35. package/dist/task/widget.js +1 -1
  36. package/dist/workers/html-clean.js +7 -4
  37. package/dist/workers/pi-worker-docs.js +69 -58
  38. package/dist/workers/pi-worker-fetch.js +25 -21
  39. package/dist/workers/pi-worker-search.js +7 -13
  40. package/dist/workers/pi-worker.js +8 -14
  41. package/dist/workers/shared.d.ts +40 -0
  42. package/dist/workers/shared.js +31 -0
  43. package/package.json +1 -1
@@ -0,0 +1,93 @@
1
+ /**
2
+ * External-context enrichment — extract packages / URLs / services from the
3
+ * refined spec, fan out to docs / fetch / search workers, and assemble the
4
+ * `EXTERNAL CONTEXT` block the research phase prepends to every worker prompt.
5
+ *
6
+ * Split out of phases.ts so the research phase reads as "gather context → run
7
+ * probes → assemble", and so this fan-out has its own test surface separate
8
+ * from the four research workers. `enrichment.ts` stays a pure parser; the I/O
9
+ * lives here.
10
+ */
11
+ import { docsRaw } from '../workers/docs-core.js';
12
+ import { fetchRaw } from '../workers/fetch-core.js';
13
+ import { formatNpmVersionSection } from '../workers/npm-version.js';
14
+ import { search as defaultSearch } from '../workers/search-core.js';
15
+ import { extractEnrichTargets } from './enrichment.js';
16
+ import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
17
+ /**
18
+ * Returns the `EXTERNAL CONTEXT\n…\n\n` block for the refined spec, or `''` when
19
+ * there is nothing to enrich (no targets, or every lookup failed).
20
+ */
21
+ export async function gatherExternalContext(refined, deps, researchDeps = {}) {
22
+ const docsRawFn = researchDeps.docsRaw ?? docsRaw;
23
+ const fetchRawFn = researchDeps.fetchRaw ?? fetchRaw;
24
+ const searchFn = researchDeps.searchFn ?? defaultSearch;
25
+ const enrichTargets = extractEnrichTargets(refined);
26
+ if (enrichTargets.packages.length === 0
27
+ && enrichTargets.urls.length === 0
28
+ && enrichTargets.services.length === 0) {
29
+ return '';
30
+ }
31
+ const enrichSections = [];
32
+ const tEnrichStart = Date.now();
33
+ const [docsResults, fetchResults, serviceResults] = await Promise.all([
34
+ Promise.all(enrichTargets.packages.map(pkg => docsRawFn({
35
+ pkg,
36
+ query: refined.split('\n').find(l => l.trim()) ?? refined,
37
+ cwd: deps.cwd,
38
+ signal: deps.signal
39
+ }).catch(() => null))),
40
+ Promise.all(enrichTargets.urls.map(url => fetchRawFn({ url, signal: deps.signal }).catch(() => null))),
41
+ Promise.all(enrichTargets.services.map(s => searchFn({
42
+ query: `${s.name} ${s.query}`,
43
+ count: 3,
44
+ signal: deps.signal
45
+ }).catch(() => null)))
46
+ ]);
47
+ // npm version blocks come from docsRaw's bundled lookup and lead the
48
+ // section so the model anchors on live version data before reading
49
+ // the docs body.
50
+ for (let i = 0; i < enrichTargets.packages.length; i++) {
51
+ const v = docsResults[i]?.npmVersion;
52
+ if (v)
53
+ enrichSections.push(formatNpmVersionSection(v));
54
+ }
55
+ for (let i = 0; i < enrichTargets.packages.length; i++) {
56
+ const r = docsResults[i];
57
+ if (r?.kind === 'ok' && r.chunks.length > 0) {
58
+ const body = r.chunks
59
+ .map(c => c.content)
60
+ .join('\n\n')
61
+ .slice(0, 4000);
62
+ enrichSections.push(`### docs: ${enrichTargets.packages[i]}\n${body}`);
63
+ }
64
+ }
65
+ for (let i = 0; i < enrichTargets.urls.length; i++) {
66
+ const r = fetchResults[i];
67
+ if (r) {
68
+ enrichSections.push(`### url: ${enrichTargets.urls[i]}\n${r.markdown.slice(0, 4000)}`);
69
+ }
70
+ }
71
+ const skipped = [];
72
+ for (let i = 0; i < enrichTargets.services.length; i++) {
73
+ const s = enrichTargets.services[i];
74
+ const r = serviceResults[i];
75
+ if (r === null)
76
+ continue;
77
+ if (r.kind === 'no_key') {
78
+ skipped.push(s.name);
79
+ continue;
80
+ }
81
+ if (r.kind === 'error')
82
+ continue;
83
+ // kind === 'ok'
84
+ enrichSections.push(formatServiceBlock(s.name, `${s.name} ${s.query}`, r.results));
85
+ }
86
+ if (skipped.length > 0) {
87
+ enrichSections.push(formatFreshnessSkippedBlock(skipped));
88
+ }
89
+ deps.recordSubStep?.('enrichment', Date.now() - tEnrichStart);
90
+ if (enrichSections.length === 0)
91
+ return '';
92
+ return `EXTERNAL CONTEXT\n${enrichSections.join('\n\n')}\n\n`;
93
+ }
@@ -2,7 +2,7 @@
2
2
  * Failure classification — map runtime errors to task state transitions,
3
3
  * widget flash messages, and user notifications.
4
4
  */
5
- import { updateTaskFrontMatter } from './task-file.js';
5
+ import { updateTaskFrontMatter } from './task-io.js';
6
6
  import { flashTerminalWidget } from './widget.js';
7
7
  import { LoopExhaustedError, LeakedToolCallError, USER_CANCELLED } from './child-runner.js';
8
8
  // ─── Classifier ──────────────────────────────────────────────────────────────
@@ -19,11 +19,14 @@ import * as fsp from 'node:fs/promises';
19
19
  import * as path from 'node:path';
20
20
  import { PHASES, postCommitPhase } from './phases.js';
21
21
  import { handleFailure } from './failure-classifier.js';
22
- import { PHASE_INDEX, PHASE_ORDER, allocateTaskId, ensureTasksDir, normaliseTaskId, parseFrontMatter, readSection, readTaskFile, setTaskSection, taskFilePath, tasksDir, updateTaskFrontMatter, writeTaskFile, extractSection, RESUMABLE_STATES } from './task-file.js';
22
+ import { PHASE_INDEX, PHASE_ORDER, RESUMABLE_STATES } from './task-types.js';
23
+ import { normaliseTaskId, parseFrontMatter, extractSection } from './task-parsers.js';
24
+ import { allocateTaskId, ensureTasksDir, readSection, readTaskFile, setTaskSection, taskFilePath, tasksDir, updateTaskFrontMatter, writeTaskFile } from './task-io.js';
23
25
  import { startWidget } from './widget.js';
24
26
  import { publishViewer, publishNotify, registerBridgeCommand, getBridge } from '../remote/bridge.js';
25
- import { parseVerifyBlock } from './parsers.js';
27
+ import { parseVerifyBlock } from './spec-validation.js';
26
28
  import { formatTimings } from './timings.js';
29
+ import { getParentContextWindow, resolveContextUsage } from './context-usage.js';
27
30
  // ─── Module-level state ──────────────────────────────────────────────────────
28
31
  let activeTask = null;
29
32
  /** Set the module-level active task (avoids `this` aliasing in TaskRunner.run). */
@@ -77,7 +80,7 @@ export class TaskRunner {
77
80
  phase: 'refine',
78
81
  startedAt: this._startedAt
79
82
  };
80
- const parentContextWindow = ctx.model?.contextWindow ?? 0;
83
+ const parentContextWindow = getParentContextWindow(ctx);
81
84
  this._deps = {
82
85
  cwd,
83
86
  taskId: '',
@@ -87,16 +90,7 @@ export class TaskRunner {
87
90
  this._widgetState.lastLine = line;
88
91
  },
89
92
  onContextUsage: snapshot => {
90
- const prev = this._widgetState.contextUsage;
91
- const cw = snapshot.contextWindow > 0 ?
92
- snapshot.contextWindow
93
- : prev?.contextWindow || parentContextWindow;
94
- const percent = cw > 0 ? Math.min(100, (snapshot.tokens / cw) * 100) : snapshot.percent;
95
- this._widgetState.contextUsage = {
96
- tokens: snapshot.tokens,
97
- contextWindow: cw,
98
- percent
99
- };
93
+ this._widgetState.contextUsage = resolveContextUsage(snapshot, this._widgetState.contextUsage, parentContextWindow);
100
94
  },
101
95
  recordSubStep: (label, ms) => {
102
96
  if (this._currentPhaseChildren) {
@@ -3,9 +3,6 @@
3
3
  *
4
4
  * Pure functions that parse raw model output into structured data.
5
5
  */
6
- export interface VerifyCommand {
7
- raw: string;
8
- }
9
6
  export type AutoAnswer = {
10
7
  kind: 'answered';
11
8
  text: string;
@@ -20,12 +17,15 @@ export type AutoAnswer = {
20
17
  export interface ClarifyQuestion {
21
18
  question: string;
22
19
  suggested?: string;
20
+ /** Secondary option for a binary "A or B?" fork; mirrors grill's ALT line. */
21
+ alt?: string;
23
22
  }
24
23
  export declare const GRILL_LINE_RE: RegExp;
25
24
  export declare const SUGGESTED_LINE_RE: RegExp;
26
- export declare function parseVerifyBlock(spec: string): VerifyCommand[] | null;
25
+ export declare const ALT_LINE_RE: RegExp;
27
26
  export declare function parseGrillQuestions(raw: string): string[];
28
27
  export declare function parseClarifyList(raw: string): ClarifyQuestion[];
28
+ export declare function autoAnswerHasTag(raw: string): boolean;
29
29
  export declare function parseAutoAnswer(raw: string): AutoAnswer;
30
30
  export declare function parseVerifyToolingOutput(output: string): {
31
31
  verified: string[];
@@ -34,15 +34,4 @@ export declare function parseVerifyToolingOutput(output: string): {
34
34
  reason: string;
35
35
  }>;
36
36
  };
37
- export declare function isCritiqueClean(text: string): boolean;
38
- /**
39
- * Drop any preamble the model emitted before the spec's GOAL header. The
40
- * thinking model sometimes narrates ("Now I have all the context. Here's the
41
- * rewritten spec:") before GOAL — the prompts forbid it, but the critique
42
- * validator only checks for a VERIFY block, so it leaked into the delivered
43
- * spec. We slice from the first line that begins a GOAL section so the spec
44
- * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
45
- */
46
- export declare function stripSpecPreamble(spec: string): string;
47
- export declare function validateSpecShape(spec: string): string | null;
48
37
  export declare function deriveTitle(refined: string): string;
@@ -7,31 +7,7 @@ import { MAX_GRILL_QUESTIONS } from './phases.js';
7
7
  // ─── Constants ───────────────────────────────────────────────────────────────
8
8
  export const GRILL_LINE_RE = /^\s*\d+[.)]\s+(.+)$/;
9
9
  export const SUGGESTED_LINE_RE = /^\s*SUGGESTED:\s*(.*)$/i;
10
- // ─── Verify block parser ─────────────────────────────────────────────────────
11
- export function parseVerifyBlock(spec) {
12
- const lines = spec.split('\n');
13
- let i = 0;
14
- while (i < lines.length && !/^VERIFY:\s*$/.test(lines[i]))
15
- i++;
16
- if (i >= lines.length)
17
- return null;
18
- i++;
19
- while (i < lines.length && lines[i].trim() === '')
20
- i++;
21
- if (i >= lines.length)
22
- return null;
23
- if (!/^```(sh|bash)?\s*$/.test(lines[i]))
24
- return null;
25
- i++;
26
- const cmds = [];
27
- while (i < lines.length && !/^```\s*$/.test(lines[i])) {
28
- const line = lines[i].trim();
29
- if (line.length > 0 && !line.startsWith('#'))
30
- cmds.push({ raw: line });
31
- i++;
32
- }
33
- return cmds;
34
- }
10
+ export const ALT_LINE_RE = /^\s*ALT:\s*(.*)$/i;
35
11
  // ─── Grill questions parser ──────────────────────────────────────────────────
36
12
  // The grill-gen prompt instructs the worker to emit the literal token `NONE`
37
13
  // when it has zero questions, so the runner's empty-output guard can still
@@ -55,14 +31,32 @@ export function parseGrillQuestions(raw) {
55
31
  // (e.g. "1. ...so this must be resolved. SUGGESTED: use polling.") rather than
56
32
  // on its own line.
57
33
  const INLINE_SUGGESTED_RE = /\bSUGGESTED:\s*/i;
58
- /** Split a question line's text into the question and any inline SUGGESTED default. */
34
+ const INLINE_ALT_RE = /\bALT:\s*/i;
35
+ /**
36
+ * Split a question line's text into the question, any inline SUGGESTED default,
37
+ * and any inline ALT secondary option (the model may write both on one line:
38
+ * "1. A or B? SUGGESTED: A ALT: B").
39
+ */
59
40
  function splitInlineSuggested(text) {
60
41
  const m = INLINE_SUGGESTED_RE.exec(text);
61
42
  if (!m)
62
43
  return { question: text.trim() };
63
44
  const question = text.slice(0, m.index).trim();
64
- const suggested = text.slice(m.index + m[0].length).trim();
65
- return suggested.length > 0 ? { question, suggested } : { question };
45
+ let rest = text.slice(m.index + m[0].length);
46
+ let alt;
47
+ const altM = INLINE_ALT_RE.exec(rest);
48
+ if (altM) {
49
+ const altText = rest.slice(altM.index + altM[0].length).trim();
50
+ if (altText.length > 0)
51
+ alt = altText;
52
+ rest = rest.slice(0, altM.index);
53
+ }
54
+ const suggested = rest.trim();
55
+ return {
56
+ question,
57
+ ...(suggested.length > 0 && { suggested }),
58
+ ...(alt !== undefined && { alt })
59
+ };
66
60
  }
67
61
  // Parses the /task-auto clarify output: a numbered question list where each
68
62
  // question carries a "SUGGESTED: <default>" recommendation — either on its own
@@ -92,11 +86,28 @@ export function parseClarifyList(raw) {
92
86
  if (suggested.length > 0 && last.suggested === undefined) {
93
87
  last.suggested = suggested;
94
88
  }
89
+ continue;
90
+ }
91
+ const altLine = ALT_LINE_RE.exec(line);
92
+ if (altLine && out.length > 0) {
93
+ const alt = altLine[1].trim();
94
+ const last = out[out.length - 1];
95
+ if (alt.length > 0 && last.alt === undefined) {
96
+ last.alt = alt;
97
+ }
95
98
  }
96
99
  }
97
100
  return out;
98
101
  }
99
102
  // ─── Auto-answer parser ──────────────────────────────────────────────────────
103
+ // Did the model use one of the required output tags (ANSWER/UNKNOWN/ALT,
104
+ // tolerating the same ANSWER misspellings parseAutoAnswer accepts)? When no tag
105
+ // is present the model ignored the format and wrote free-form prose, so the
106
+ // caller reprompts instead of trusting parseAutoAnswer's lenient salvage.
107
+ const AUTO_ANSWER_TAG_RE = /^\s*(AN[SW]{1,3}E?R|UNKNOWN|ALT)\s*:/im;
108
+ export function autoAnswerHasTag(raw) {
109
+ return AUTO_ANSWER_TAG_RE.test(raw);
110
+ }
100
111
  export function parseAutoAnswer(raw) {
101
112
  const lines = raw
102
113
  .split('\n')
@@ -140,8 +151,15 @@ export function parseAutoAnswer(raw) {
140
151
  raw
141
152
  };
142
153
  }
143
- if (lines.length > 0)
144
- return { kind: 'unknown', suggested: lines[0], raw };
154
+ // Last-resort salvage: the model emitted no tag at all. Take the first line
155
+ // that reads like an answer, NOT a preamble — a trailing colon marks a
156
+ // heading ("Here's the analysis:") that introduces prose rather than
157
+ // recommending anything. Surfacing such a line as the recommendation is the
158
+ // "wrong format" the user sees; better to offer no default and let them
159
+ // answer than to pre-fill a meaningless preamble.
160
+ const salvaged = lines.find(l => !l.endsWith(':'));
161
+ if (salvaged)
162
+ return { kind: 'unknown', suggested: salvaged, raw };
145
163
  return { kind: 'unknown', raw };
146
164
  }
147
165
  // ─── Verify tooling output parser ────────────────────────────────────────────
@@ -173,63 +191,6 @@ export function parseVerifyToolingOutput(output) {
173
191
  }
174
192
  return { verified, rejected };
175
193
  }
176
- // ─── Critique triage parser ──────────────────────────────────────────────────
177
- // The critique-triage prompt instructs the worker to emit the literal token
178
- // `CLEAN` on its own line when the compose draft has no substantive defects, so
179
- // we can skip the expensive full-rewrite pass. Anything else is treated as a
180
- // defect list that gets fed into the rewrite. Empty output is NOT clean — that
181
- // would be a silent crash, and treating it as clean would skip review entirely.
182
- export function isCritiqueClean(text) {
183
- const firstLine = text
184
- .split('\n')
185
- .map(l => l.trim())
186
- .find(l => l.length > 0);
187
- if (!firstLine)
188
- return false;
189
- return /^CLEAN[.!]?$/i.test(firstLine);
190
- }
191
- // ─── Spec shape validator ────────────────────────────────────────────────────
192
- /**
193
- * Drop any preamble the model emitted before the spec's GOAL header. The
194
- * thinking model sometimes narrates ("Now I have all the context. Here's the
195
- * rewritten spec:") before GOAL — the prompts forbid it, but the critique
196
- * validator only checks for a VERIFY block, so it leaked into the delivered
197
- * spec. We slice from the first line that begins a GOAL section so the spec
198
- * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
199
- */
200
- export function stripSpecPreamble(spec) {
201
- const lines = spec.split('\n');
202
- const idx = lines.findIndex(l => /^GOAL\b/i.test(l));
203
- if (idx <= 0)
204
- return spec;
205
- // Only strip plain narration. If the lead-in is a markdown fence or a
206
- // cat-heredoc wrapper, leave it untouched — that's a malformation
207
- // validateSpecShape must reject (and compose must retry on), not something
208
- // to silently unwrap into a passing spec.
209
- const preamble = lines.slice(0, idx);
210
- if (preamble.some(l => /^\s*```/.test(l) || /^\s*cat\s*<</.test(l)))
211
- return spec;
212
- return lines.slice(idx).join('\n');
213
- }
214
- export function validateSpecShape(spec) {
215
- const trimmed = spec.trim();
216
- if (trimmed.length === 0)
217
- return 'spec is empty';
218
- const firstLine = trimmed.split('\n', 1)[0];
219
- if (/^\s*```/.test(firstLine))
220
- return 'spec starts with a markdown fence';
221
- if (/^\s*cat\s*<<\s*['"]?[A-Za-z_][A-Za-z0-9_]*['"]?/.test(firstLine)) {
222
- return 'spec is wrapped in a cat heredoc';
223
- }
224
- if (!/^GOAL\b/i.test(trimmed))
225
- return 'spec does not start with GOAL';
226
- for (const section of ['CONSTRAINTS', 'ACCEPTANCE', 'VERIFY']) {
227
- if (!new RegExp(`^\\s*${section}\\b`, 'm').test(trimmed)) {
228
- return `spec missing required section: ${section}`;
229
- }
230
- }
231
- return null;
232
- }
233
194
  // ─── Title derivation ────────────────────────────────────────────────────────
234
195
  export function deriveTitle(refined) {
235
196
  const stripBold = (s) => s.replace(/^\*+|\*+$/g, '').trim();
@@ -3,11 +3,12 @@
3
3
  * critique) plus the config table that drives the orchestrator loop.
4
4
  */
5
5
  import type { ExtensionCommandContext } from '@earendil-works/pi-coding-agent';
6
- import { docsRaw, docsFocused } from '../workers/docs-core.js';
7
- import { fetchRaw, fetchFocused } from '../workers/fetch-core.js';
6
+ import { docsFocused } from '../workers/docs-core.js';
7
+ import { fetchFocused } from '../workers/fetch-core.js';
8
8
  import type { SearchCoreInput, SearchCoreResult } from '../workers/search-core.js';
9
+ import { type ExternalContextDeps } from './external-context.js';
9
10
  import { MAX_GRILL_QUESTIONS } from './prompts.js';
10
- import { type PhaseName } from './task-file.js';
11
+ import { type PhaseName } from './task-types.js';
11
12
  import { type WidgetState } from './widget.js';
12
13
  import { type AutoAnswer } from './parsers.js';
13
14
  import { type PhaseDeps } from './child-runner.js';
@@ -36,11 +37,8 @@ export declare function extractToolingCommands(research: string): string[] | nul
36
37
  export declare function replaceToolingWithVerified(research: string, verifiedCommands: string[]): string;
37
38
  export declare const phaseRefine: (deps: PhaseDeps, raw: string) => Promise<string>;
38
39
  export declare function phaseVerifyTooling(deps: PhaseDeps, research: string): Promise<string>;
39
- export interface PhaseResearchDeps {
40
- docsRaw?: typeof docsRaw;
41
- fetchRaw?: typeof fetchRaw;
40
+ export interface PhaseResearchDeps extends ExternalContextDeps {
42
41
  getFileInventory?: (cwd: string, signal?: AbortSignal) => Promise<string>;
43
- searchFn?: (input: SearchCoreInput) => Promise<SearchCoreResult>;
44
42
  }
45
43
  export declare function phaseResearch(deps: PhaseDeps, refined: string, researchDeps?: PhaseResearchDeps): Promise<string>;
46
44
  export interface PhaseAutoAnswerDeps {
@@ -2,18 +2,20 @@
2
2
  * Phase pipeline — the five phase functions (refine, research, grill, compose,
3
3
  * critique) plus the config table that drives the orchestrator loop.
4
4
  */
5
- import { docsRaw, docsFocused } from '../workers/docs-core.js';
6
- import { fetchRaw, fetchFocused } from '../workers/fetch-core.js';
5
+ import { docsFocused } from '../workers/docs-core.js';
6
+ import { fetchFocused } from '../workers/fetch-core.js';
7
7
  import { formatNpmVersionSection } from '../workers/npm-version.js';
8
8
  import { runWorker } from '../workers/pi-worker-core.js';
9
9
  import { search as defaultSearch } from '../workers/search-core.js';
10
10
  import { extractEnrichTargets } from './enrichment.js';
11
11
  import { getFileInventory } from './file-inventory.js';
12
12
  import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
13
- import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
14
- import { setTaskSection, updateTaskFrontMatter } from './task-file.js';
13
+ import { gatherExternalContext } from './external-context.js';
14
+ import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, GRILL_AUTO_FORMAT_HINT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
15
+ import { setTaskSection, updateTaskFrontMatter } from './task-io.js';
15
16
  import { renderInlineMarkdown, stripInlineMarkdown } from './inline-markdown.js';
16
- import { parseVerifyBlock, parseGrillQuestions, parseAutoAnswer, parseVerifyToolingOutput, validateSpecShape, stripSpecPreamble, deriveTitle, isCritiqueClean } from './parsers.js';
17
+ import { parseGrillQuestions, parseAutoAnswer, autoAnswerHasTag, parseVerifyToolingOutput, deriveTitle } from './parsers.js';
18
+ import { parseVerifyBlock, validateSpecShape, stripSpecPreamble, isCritiqueClean } from './spec-validation.js';
17
19
  import { runPhaseChild, runPhaseWithLoopGuard, runWithEmphasisRetry, prependHint, USER_CANCELLED } from './child-runner.js';
18
20
  import { SessionUI } from '../remote/bridge.js';
19
21
  // ─── Re-export constants from their home modules ────────────────────────────
@@ -76,75 +78,8 @@ export async function phaseVerifyTooling(deps, research) {
76
78
  }
77
79
  const DOCS_EXTENSION_PATH = new URL('../workers/docs-extension.js', import.meta.url).pathname;
78
80
  export async function phaseResearch(deps, refined, researchDeps = {}) {
79
- const docsRawFn = researchDeps.docsRaw ?? docsRaw;
80
- const fetchRawFn = researchDeps.fetchRaw ?? fetchRaw;
81
81
  const fileInventoryFn = researchDeps.getFileInventory ?? getFileInventory;
82
- const searchFn = researchDeps.searchFn ?? defaultSearch;
83
- const enrichTargets = extractEnrichTargets(refined);
84
- const enrichSections = [];
85
- if (enrichTargets.packages.length > 0
86
- || enrichTargets.urls.length > 0
87
- || enrichTargets.services.length > 0) {
88
- const tEnrichStart = Date.now();
89
- const [docsResults, fetchResults, serviceResults] = await Promise.all([
90
- Promise.all(enrichTargets.packages.map(pkg => docsRawFn({
91
- pkg,
92
- query: refined.split('\n').find(l => l.trim()) ?? refined,
93
- cwd: deps.cwd,
94
- signal: deps.signal
95
- }).catch(() => null))),
96
- Promise.all(enrichTargets.urls.map(url => fetchRawFn({ url, signal: deps.signal }).catch(() => null))),
97
- Promise.all(enrichTargets.services.map(s => searchFn({
98
- query: `${s.name} ${s.query}`,
99
- count: 3,
100
- signal: deps.signal
101
- }).catch(() => null)))
102
- ]);
103
- // npm version blocks come from docsRaw's bundled lookup and lead the
104
- // section so the model anchors on live version data before reading
105
- // the docs body.
106
- for (let i = 0; i < enrichTargets.packages.length; i++) {
107
- const v = docsResults[i]?.npmVersion;
108
- if (v)
109
- enrichSections.push(formatNpmVersionSection(v));
110
- }
111
- for (let i = 0; i < enrichTargets.packages.length; i++) {
112
- const r = docsResults[i];
113
- if (r?.kind === 'ok' && r.chunks.length > 0) {
114
- const body = r.chunks
115
- .map(c => c.content)
116
- .join('\n\n')
117
- .slice(0, 4000);
118
- enrichSections.push(`### docs: ${enrichTargets.packages[i]}\n${body}`);
119
- }
120
- }
121
- for (let i = 0; i < enrichTargets.urls.length; i++) {
122
- const r = fetchResults[i];
123
- if (r) {
124
- enrichSections.push(`### url: ${enrichTargets.urls[i]}\n${r.markdown.slice(0, 4000)}`);
125
- }
126
- }
127
- const skipped = [];
128
- for (let i = 0; i < enrichTargets.services.length; i++) {
129
- const s = enrichTargets.services[i];
130
- const r = serviceResults[i];
131
- if (r === null)
132
- continue;
133
- if (r.kind === 'no_key') {
134
- skipped.push(s.name);
135
- continue;
136
- }
137
- if (r.kind === 'error')
138
- continue;
139
- // kind === 'ok'
140
- enrichSections.push(formatServiceBlock(s.name, `${s.name} ${s.query}`, r.results));
141
- }
142
- if (skipped.length > 0) {
143
- enrichSections.push(formatFreshnessSkippedBlock(skipped));
144
- }
145
- deps.recordSubStep?.('enrichment', Date.now() - tEnrichStart);
146
- }
147
- const externalContext = enrichSections.length > 0 ? `EXTERNAL CONTEXT\n${enrichSections.join('\n\n')}\n\n` : '';
82
+ const externalContext = await gatherExternalContext(refined, deps, researchDeps);
148
83
  // Pre-compute the project file inventory once and hand it to every worker.
149
84
  // Workers can then jump straight to targeted read/grep on known paths
150
85
  // instead of each spawning its own discovery loop (find/ls). A '' result
@@ -156,8 +91,9 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
156
91
  let doneCount = 0;
157
92
  const updateProgress = () => {
158
93
  doneCount++;
159
- if (deps.onChildOutput)
160
- deps.onChildOutput(`research (${doneCount}/4 workers done)`);
94
+ if (deps.onChildOutput) {
95
+ deps.onChildOutput(`research (${doneCount}/${workerSpecs.length} workers done)`);
96
+ }
161
97
  };
162
98
  // Per-worker timing split into wait (spawn → first byte) and work (first
163
99
  // byte → exit). The workers run sequentially below, so each split is a clean
@@ -190,16 +126,19 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
190
126
  // tooling) is preserved for assembly.
191
127
  const workerSpecs = [
192
128
  {
129
+ section: 'FILES',
193
130
  label: 'worker:files',
194
131
  prompt: appendNoThink(promptHeader + RESEARCH_FILES_PROMPT(refined))
195
132
  },
196
133
  {
134
+ section: 'APIS',
197
135
  label: 'worker:apis',
198
136
  prompt: appendNoThink(promptHeader + RESEARCH_APIS_PROMPT(refined)),
199
137
  tools: 'read,grep,find,ls,pi-worker-docs',
200
138
  extensions: [DOCS_EXTENSION_PATH]
201
139
  },
202
140
  {
141
+ section: 'CONTEXT',
203
142
  label: 'worker:context',
204
143
  prompt: appendNoThink(promptHeader + RESEARCH_CONTEXT_PROMPT(refined)),
205
144
  // Context owns architectural understanding, not path discovery —
@@ -209,6 +148,7 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
209
148
  tools: 'read,grep'
210
149
  },
211
150
  {
151
+ section: 'TOOLING',
212
152
  label: 'worker:tooling',
213
153
  prompt: appendNoThink(promptHeader + RESEARCH_TOOLING_PROMPT(refined))
214
154
  }
@@ -234,13 +174,10 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
234
174
  updateProgress();
235
175
  workerResults.push(r);
236
176
  }
237
- const [files, apis, context, tooling] = workerResults;
238
- const sections = [
239
- { name: 'FILES', result: files },
240
- { name: 'APIS', result: apis },
241
- { name: 'CONTEXT', result: context },
242
- { name: 'TOOLING', result: tooling }
243
- ];
177
+ // Validate + assemble by mapping spec.section over the results, so adding or
178
+ // reordering a worker is a single edit to workerSpecs (the result order
179
+ // mirrors workerSpecs order the loop above pushes in sequence).
180
+ const sections = workerSpecs.map((spec, i) => ({ name: spec.section, result: workerResults[i] }));
244
181
  for (const { name, result } of sections) {
245
182
  if (result.exitCode !== 0) {
246
183
  throw new Error(`Research ${name} worker failed (exit ${result.exitCode}): ${result.stderr.slice(-500)}`);
@@ -253,7 +190,7 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
253
190
  + `(${result.leakedToolCall.trim()}) — it never ran`);
254
191
  }
255
192
  }
256
- return `FILES\n${files.text}\n\nAPIS\n${apis.text}\n\nCONTEXT\n${context.text}\n\nTOOLING\n${tooling.text}`;
193
+ return sections.map(({ name, result }) => `${name}\n${result.text}`).join('\n\n');
257
194
  }
258
195
  export async function phaseAutoAnswer(deps, refined, research, question, autoDeps = {}) {
259
196
  const docsFocusedFn = autoDeps.docsFocused ?? docsFocused;
@@ -336,7 +273,15 @@ export async function phaseAutoAnswer(deps, refined, research, question, autoDep
336
273
  const externalContext = contextSections.length > 0 ?
337
274
  `EXTERNAL CONTEXT\n${contextSections.join('\n\n')}\n\n`
338
275
  : '';
339
- const text = await runPhaseChild(deps, 'grill-auto', 'read', externalContext + GRILL_AUTO_ANSWER_PROMPT(refined, research, question));
276
+ const basePrompt = externalContext + GRILL_AUTO_ANSWER_PROMPT(refined, research, question);
277
+ let text = await runPhaseChild(deps, 'grill-auto', 'read', basePrompt);
278
+ if (!autoAnswerHasTag(text)) {
279
+ // The model ignored the ANSWER/UNKNOWN/ALT format and wrote prose
280
+ // (typically an "analysis" preamble). Reprompt once, forcing the
281
+ // tagged form, before falling back to parseAutoAnswer's salvage —
282
+ // otherwise a preamble line leaks out as the recommended answer.
283
+ text = await runPhaseChild(deps, 'grill-auto', 'read', prependHint(GRILL_AUTO_FORMAT_HINT, basePrompt));
284
+ }
340
285
  return parseAutoAnswer(text);
341
286
  }
342
287
  catch (err) {
@@ -36,6 +36,7 @@ declare const RESEARCH_CONTEXT_PROMPT: (refined: string) => string;
36
36
  declare const RESEARCH_TOOLING_PROMPT: (refined: string) => string;
37
37
  declare const GRILL_GEN_PROMPT: (refined: string, research: string, priorQA: string) => string;
38
38
  declare const GRILL_AUTO_ANSWER_PROMPT: (refined: string, research: string, question: string) => string;
39
+ export declare const GRILL_AUTO_FORMAT_HINT: string;
39
40
  declare function composeRetryEmphasis(problem: string): string;
40
41
  declare const COMPOSE_PROMPT: (refined: string, research: string, qa: string, retryProblem: string | null) => string;
41
42
  declare const CRITIQUE_TRIAGE_PROMPT: (spec: string, refined: string, qa: string) => string;
@@ -215,6 +215,15 @@ Research:
215
215
  ${research}
216
216
 
217
217
  Question: ${question}`;
218
+ // Reprompt prefix when grill-auto's first reply ignored the tagged output format
219
+ // and wrote free-form prose (an "analysis" preamble). Forces the terse form so a
220
+ // real recommendation reaches the user instead of a leaked preamble line.
221
+ export const GRILL_AUTO_FORMAT_HINT = '[SYSTEM NOTE: Your previous reply did NOT follow the required format — it had no '
222
+ + 'ANSWER:, UNKNOWN:, or ALT: line and read as free-form prose. Do not explain or '
223
+ + 'analyse. Output ONLY the tagged lines and nothing else. For a binary "A or B?" '
224
+ + 'question emit two lines:\nUNKNOWN: <primary option>\nALT: <alternative>\n'
225
+ + 'For a safe default the user would accept without thinking, emit one ANSWER: line. '
226
+ + 'No preamble, no markdown.]';
218
227
  function composeRetryEmphasis(problem) {
219
228
  if (problem === 'spec does not start with GOAL'
220
229
  || problem === 'spec starts with a markdown fence'
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Spec gate — the guards that decide whether a composed spec is acceptable at
3
+ * handoff. Unlike the informational parsers in parsers.ts, these answer a
4
+ * yes/no (or "what's wrong") question the orchestrator and critique phase act
5
+ * on: is the VERIFY block runnable, is the shape well-formed, did critique come
6
+ * back CLEAN. Self-contained (no imports) so the gate doesn't drag in the phase
7
+ * pipeline.
8
+ */
9
+ export interface VerifyCommand {
10
+ raw: string;
11
+ }
12
+ export declare function parseVerifyBlock(spec: string): VerifyCommand[] | null;
13
+ export declare function isCritiqueClean(text: string): boolean;
14
+ /**
15
+ * Drop any preamble the model emitted before the spec's GOAL header. The
16
+ * thinking model sometimes narrates ("Now I have all the context. Here's the
17
+ * rewritten spec:") before GOAL — the prompts forbid it, but the critique
18
+ * validator only checks for a VERIFY block, so it leaked into the delivered
19
+ * spec. We slice from the first line that begins a GOAL section so the spec
20
+ * starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
21
+ */
22
+ export declare function stripSpecPreamble(spec: string): string;
23
+ export declare function validateSpecShape(spec: string): string | null;