@mjasnikovs/pi-task 0.13.6 → 0.13.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/remote/push.d.ts +12 -3
- package/dist/remote/push.js +63 -9
- package/dist/remote/ui-script.d.ts +3 -0
- package/dist/remote/ui-script.js +804 -0
- package/dist/remote/ui-styles.d.ts +1 -0
- package/dist/remote/ui-styles.js +202 -0
- package/dist/remote/ui.js +4 -1000
- package/dist/shared/child-process.d.ts +27 -0
- package/dist/shared/child-process.js +151 -139
- package/dist/task/auto-orchestrator.js +3 -6
- package/dist/task/child-runner.js +1 -1
- package/dist/task/context-usage.d.ts +16 -0
- package/dist/task/context-usage.js +22 -0
- package/dist/task/external-context.d.ts +27 -0
- package/dist/task/external-context.js +93 -0
- package/dist/task/failure-classifier.js +1 -1
- package/dist/task/orchestrator.js +7 -13
- package/dist/task/parsers.d.ts +1 -15
- package/dist/task/parsers.js +17 -84
- package/dist/task/phases.d.ts +5 -7
- package/dist/task/phases.js +29 -84
- package/dist/task/prompts.d.ts +1 -0
- package/dist/task/prompts.js +9 -0
- package/dist/task/spec-validation.d.ts +23 -0
- package/dist/task/spec-validation.js +90 -0
- package/dist/task/widget.d.ts +1 -1
- package/dist/task/widget.js +1 -1
- package/dist/workers/pi-worker-docs.js +69 -58
- package/dist/workers/pi-worker-fetch.js +25 -21
- package/dist/workers/pi-worker-search.js +7 -13
- package/dist/workers/pi-worker.js +8 -14
- package/dist/workers/shared.d.ts +40 -0
- package/dist/workers/shared.js +31 -0
- package/package.json +1 -1
package/dist/task/parsers.js
CHANGED
|
@@ -7,31 +7,6 @@ import { MAX_GRILL_QUESTIONS } from './phases.js';
|
|
|
7
7
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
8
8
|
export const GRILL_LINE_RE = /^\s*\d+[.)]\s+(.+)$/;
|
|
9
9
|
export const SUGGESTED_LINE_RE = /^\s*SUGGESTED:\s*(.*)$/i;
|
|
10
|
-
// ─── Verify block parser ─────────────────────────────────────────────────────
|
|
11
|
-
export function parseVerifyBlock(spec) {
|
|
12
|
-
const lines = spec.split('\n');
|
|
13
|
-
let i = 0;
|
|
14
|
-
while (i < lines.length && !/^VERIFY:\s*$/.test(lines[i]))
|
|
15
|
-
i++;
|
|
16
|
-
if (i >= lines.length)
|
|
17
|
-
return null;
|
|
18
|
-
i++;
|
|
19
|
-
while (i < lines.length && lines[i].trim() === '')
|
|
20
|
-
i++;
|
|
21
|
-
if (i >= lines.length)
|
|
22
|
-
return null;
|
|
23
|
-
if (!/^```(sh|bash)?\s*$/.test(lines[i]))
|
|
24
|
-
return null;
|
|
25
|
-
i++;
|
|
26
|
-
const cmds = [];
|
|
27
|
-
while (i < lines.length && !/^```\s*$/.test(lines[i])) {
|
|
28
|
-
const line = lines[i].trim();
|
|
29
|
-
if (line.length > 0 && !line.startsWith('#'))
|
|
30
|
-
cmds.push({ raw: line });
|
|
31
|
-
i++;
|
|
32
|
-
}
|
|
33
|
-
return cmds;
|
|
34
|
-
}
|
|
35
10
|
// ─── Grill questions parser ──────────────────────────────────────────────────
|
|
36
11
|
// The grill-gen prompt instructs the worker to emit the literal token `NONE`
|
|
37
12
|
// when it has zero questions, so the runner's empty-output guard can still
|
|
@@ -97,6 +72,14 @@ export function parseClarifyList(raw) {
|
|
|
97
72
|
return out;
|
|
98
73
|
}
|
|
99
74
|
// ─── Auto-answer parser ──────────────────────────────────────────────────────
|
|
75
|
+
// Did the model use one of the required output tags (ANSWER/UNKNOWN/ALT,
|
|
76
|
+
// tolerating the same ANSWER misspellings parseAutoAnswer accepts)? When no tag
|
|
77
|
+
// is present the model ignored the format and wrote free-form prose, so the
|
|
78
|
+
// caller reprompts instead of trusting parseAutoAnswer's lenient salvage.
|
|
79
|
+
const AUTO_ANSWER_TAG_RE = /^\s*(AN[SW]{1,3}E?R|UNKNOWN|ALT)\s*:/im;
|
|
80
|
+
export function autoAnswerHasTag(raw) {
|
|
81
|
+
return AUTO_ANSWER_TAG_RE.test(raw);
|
|
82
|
+
}
|
|
100
83
|
export function parseAutoAnswer(raw) {
|
|
101
84
|
const lines = raw
|
|
102
85
|
.split('\n')
|
|
@@ -140,8 +123,15 @@ export function parseAutoAnswer(raw) {
|
|
|
140
123
|
raw
|
|
141
124
|
};
|
|
142
125
|
}
|
|
143
|
-
|
|
144
|
-
|
|
126
|
+
// Last-resort salvage: the model emitted no tag at all. Take the first line
|
|
127
|
+
// that reads like an answer, NOT a preamble — a trailing colon marks a
|
|
128
|
+
// heading ("Here's the analysis:") that introduces prose rather than
|
|
129
|
+
// recommending anything. Surfacing such a line as the recommendation is the
|
|
130
|
+
// "wrong format" the user sees; better to offer no default and let them
|
|
131
|
+
// answer than to pre-fill a meaningless preamble.
|
|
132
|
+
const salvaged = lines.find(l => !l.endsWith(':'));
|
|
133
|
+
if (salvaged)
|
|
134
|
+
return { kind: 'unknown', suggested: salvaged, raw };
|
|
145
135
|
return { kind: 'unknown', raw };
|
|
146
136
|
}
|
|
147
137
|
// ─── Verify tooling output parser ────────────────────────────────────────────
|
|
@@ -173,63 +163,6 @@ export function parseVerifyToolingOutput(output) {
|
|
|
173
163
|
}
|
|
174
164
|
return { verified, rejected };
|
|
175
165
|
}
|
|
176
|
-
// ─── Critique triage parser ──────────────────────────────────────────────────
|
|
177
|
-
// The critique-triage prompt instructs the worker to emit the literal token
|
|
178
|
-
// `CLEAN` on its own line when the compose draft has no substantive defects, so
|
|
179
|
-
// we can skip the expensive full-rewrite pass. Anything else is treated as a
|
|
180
|
-
// defect list that gets fed into the rewrite. Empty output is NOT clean — that
|
|
181
|
-
// would be a silent crash, and treating it as clean would skip review entirely.
|
|
182
|
-
export function isCritiqueClean(text) {
|
|
183
|
-
const firstLine = text
|
|
184
|
-
.split('\n')
|
|
185
|
-
.map(l => l.trim())
|
|
186
|
-
.find(l => l.length > 0);
|
|
187
|
-
if (!firstLine)
|
|
188
|
-
return false;
|
|
189
|
-
return /^CLEAN[.!]?$/i.test(firstLine);
|
|
190
|
-
}
|
|
191
|
-
// ─── Spec shape validator ────────────────────────────────────────────────────
|
|
192
|
-
/**
|
|
193
|
-
* Drop any preamble the model emitted before the spec's GOAL header. The
|
|
194
|
-
* thinking model sometimes narrates ("Now I have all the context. Here's the
|
|
195
|
-
* rewritten spec:") before GOAL — the prompts forbid it, but the critique
|
|
196
|
-
* validator only checks for a VERIFY block, so it leaked into the delivered
|
|
197
|
-
* spec. We slice from the first line that begins a GOAL section so the spec
|
|
198
|
-
* starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
|
|
199
|
-
*/
|
|
200
|
-
export function stripSpecPreamble(spec) {
|
|
201
|
-
const lines = spec.split('\n');
|
|
202
|
-
const idx = lines.findIndex(l => /^GOAL\b/i.test(l));
|
|
203
|
-
if (idx <= 0)
|
|
204
|
-
return spec;
|
|
205
|
-
// Only strip plain narration. If the lead-in is a markdown fence or a
|
|
206
|
-
// cat-heredoc wrapper, leave it untouched — that's a malformation
|
|
207
|
-
// validateSpecShape must reject (and compose must retry on), not something
|
|
208
|
-
// to silently unwrap into a passing spec.
|
|
209
|
-
const preamble = lines.slice(0, idx);
|
|
210
|
-
if (preamble.some(l => /^\s*```/.test(l) || /^\s*cat\s*<</.test(l)))
|
|
211
|
-
return spec;
|
|
212
|
-
return lines.slice(idx).join('\n');
|
|
213
|
-
}
|
|
214
|
-
export function validateSpecShape(spec) {
|
|
215
|
-
const trimmed = spec.trim();
|
|
216
|
-
if (trimmed.length === 0)
|
|
217
|
-
return 'spec is empty';
|
|
218
|
-
const firstLine = trimmed.split('\n', 1)[0];
|
|
219
|
-
if (/^\s*```/.test(firstLine))
|
|
220
|
-
return 'spec starts with a markdown fence';
|
|
221
|
-
if (/^\s*cat\s*<<\s*['"]?[A-Za-z_][A-Za-z0-9_]*['"]?/.test(firstLine)) {
|
|
222
|
-
return 'spec is wrapped in a cat heredoc';
|
|
223
|
-
}
|
|
224
|
-
if (!/^GOAL\b/i.test(trimmed))
|
|
225
|
-
return 'spec does not start with GOAL';
|
|
226
|
-
for (const section of ['CONSTRAINTS', 'ACCEPTANCE', 'VERIFY']) {
|
|
227
|
-
if (!new RegExp(`^\\s*${section}\\b`, 'm').test(trimmed)) {
|
|
228
|
-
return `spec missing required section: ${section}`;
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
return null;
|
|
232
|
-
}
|
|
233
166
|
// ─── Title derivation ────────────────────────────────────────────────────────
|
|
234
167
|
export function deriveTitle(refined) {
|
|
235
168
|
const stripBold = (s) => s.replace(/^\*+|\*+$/g, '').trim();
|
package/dist/task/phases.d.ts
CHANGED
|
@@ -3,11 +3,12 @@
|
|
|
3
3
|
* critique) plus the config table that drives the orchestrator loop.
|
|
4
4
|
*/
|
|
5
5
|
import type { ExtensionCommandContext } from '@earendil-works/pi-coding-agent';
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
6
|
+
import { docsFocused } from '../workers/docs-core.js';
|
|
7
|
+
import { fetchFocused } from '../workers/fetch-core.js';
|
|
8
8
|
import type { SearchCoreInput, SearchCoreResult } from '../workers/search-core.js';
|
|
9
|
+
import { type ExternalContextDeps } from './external-context.js';
|
|
9
10
|
import { MAX_GRILL_QUESTIONS } from './prompts.js';
|
|
10
|
-
import { type PhaseName } from './task-
|
|
11
|
+
import { type PhaseName } from './task-types.js';
|
|
11
12
|
import { type WidgetState } from './widget.js';
|
|
12
13
|
import { type AutoAnswer } from './parsers.js';
|
|
13
14
|
import { type PhaseDeps } from './child-runner.js';
|
|
@@ -36,11 +37,8 @@ export declare function extractToolingCommands(research: string): string[] | nul
|
|
|
36
37
|
export declare function replaceToolingWithVerified(research: string, verifiedCommands: string[]): string;
|
|
37
38
|
export declare const phaseRefine: (deps: PhaseDeps, raw: string) => Promise<string>;
|
|
38
39
|
export declare function phaseVerifyTooling(deps: PhaseDeps, research: string): Promise<string>;
|
|
39
|
-
export interface PhaseResearchDeps {
|
|
40
|
-
docsRaw?: typeof docsRaw;
|
|
41
|
-
fetchRaw?: typeof fetchRaw;
|
|
40
|
+
export interface PhaseResearchDeps extends ExternalContextDeps {
|
|
42
41
|
getFileInventory?: (cwd: string, signal?: AbortSignal) => Promise<string>;
|
|
43
|
-
searchFn?: (input: SearchCoreInput) => Promise<SearchCoreResult>;
|
|
44
42
|
}
|
|
45
43
|
export declare function phaseResearch(deps: PhaseDeps, refined: string, researchDeps?: PhaseResearchDeps): Promise<string>;
|
|
46
44
|
export interface PhaseAutoAnswerDeps {
|
package/dist/task/phases.js
CHANGED
|
@@ -2,18 +2,20 @@
|
|
|
2
2
|
* Phase pipeline — the five phase functions (refine, research, grill, compose,
|
|
3
3
|
* critique) plus the config table that drives the orchestrator loop.
|
|
4
4
|
*/
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
5
|
+
import { docsFocused } from '../workers/docs-core.js';
|
|
6
|
+
import { fetchFocused } from '../workers/fetch-core.js';
|
|
7
7
|
import { formatNpmVersionSection } from '../workers/npm-version.js';
|
|
8
8
|
import { runWorker } from '../workers/pi-worker-core.js';
|
|
9
9
|
import { search as defaultSearch } from '../workers/search-core.js';
|
|
10
10
|
import { extractEnrichTargets } from './enrichment.js';
|
|
11
11
|
import { getFileInventory } from './file-inventory.js';
|
|
12
12
|
import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
|
|
13
|
-
import {
|
|
14
|
-
import {
|
|
13
|
+
import { gatherExternalContext } from './external-context.js';
|
|
14
|
+
import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, GRILL_AUTO_FORMAT_HINT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
|
|
15
|
+
import { setTaskSection, updateTaskFrontMatter } from './task-io.js';
|
|
15
16
|
import { renderInlineMarkdown, stripInlineMarkdown } from './inline-markdown.js';
|
|
16
|
-
import {
|
|
17
|
+
import { parseGrillQuestions, parseAutoAnswer, autoAnswerHasTag, parseVerifyToolingOutput, deriveTitle } from './parsers.js';
|
|
18
|
+
import { parseVerifyBlock, validateSpecShape, stripSpecPreamble, isCritiqueClean } from './spec-validation.js';
|
|
17
19
|
import { runPhaseChild, runPhaseWithLoopGuard, runWithEmphasisRetry, prependHint, USER_CANCELLED } from './child-runner.js';
|
|
18
20
|
import { SessionUI } from '../remote/bridge.js';
|
|
19
21
|
// ─── Re-export constants from their home modules ────────────────────────────
|
|
@@ -76,75 +78,8 @@ export async function phaseVerifyTooling(deps, research) {
|
|
|
76
78
|
}
|
|
77
79
|
const DOCS_EXTENSION_PATH = new URL('../workers/docs-extension.js', import.meta.url).pathname;
|
|
78
80
|
export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
79
|
-
const docsRawFn = researchDeps.docsRaw ?? docsRaw;
|
|
80
|
-
const fetchRawFn = researchDeps.fetchRaw ?? fetchRaw;
|
|
81
81
|
const fileInventoryFn = researchDeps.getFileInventory ?? getFileInventory;
|
|
82
|
-
const
|
|
83
|
-
const enrichTargets = extractEnrichTargets(refined);
|
|
84
|
-
const enrichSections = [];
|
|
85
|
-
if (enrichTargets.packages.length > 0
|
|
86
|
-
|| enrichTargets.urls.length > 0
|
|
87
|
-
|| enrichTargets.services.length > 0) {
|
|
88
|
-
const tEnrichStart = Date.now();
|
|
89
|
-
const [docsResults, fetchResults, serviceResults] = await Promise.all([
|
|
90
|
-
Promise.all(enrichTargets.packages.map(pkg => docsRawFn({
|
|
91
|
-
pkg,
|
|
92
|
-
query: refined.split('\n').find(l => l.trim()) ?? refined,
|
|
93
|
-
cwd: deps.cwd,
|
|
94
|
-
signal: deps.signal
|
|
95
|
-
}).catch(() => null))),
|
|
96
|
-
Promise.all(enrichTargets.urls.map(url => fetchRawFn({ url, signal: deps.signal }).catch(() => null))),
|
|
97
|
-
Promise.all(enrichTargets.services.map(s => searchFn({
|
|
98
|
-
query: `${s.name} ${s.query}`,
|
|
99
|
-
count: 3,
|
|
100
|
-
signal: deps.signal
|
|
101
|
-
}).catch(() => null)))
|
|
102
|
-
]);
|
|
103
|
-
// npm version blocks come from docsRaw's bundled lookup and lead the
|
|
104
|
-
// section so the model anchors on live version data before reading
|
|
105
|
-
// the docs body.
|
|
106
|
-
for (let i = 0; i < enrichTargets.packages.length; i++) {
|
|
107
|
-
const v = docsResults[i]?.npmVersion;
|
|
108
|
-
if (v)
|
|
109
|
-
enrichSections.push(formatNpmVersionSection(v));
|
|
110
|
-
}
|
|
111
|
-
for (let i = 0; i < enrichTargets.packages.length; i++) {
|
|
112
|
-
const r = docsResults[i];
|
|
113
|
-
if (r?.kind === 'ok' && r.chunks.length > 0) {
|
|
114
|
-
const body = r.chunks
|
|
115
|
-
.map(c => c.content)
|
|
116
|
-
.join('\n\n')
|
|
117
|
-
.slice(0, 4000);
|
|
118
|
-
enrichSections.push(`### docs: ${enrichTargets.packages[i]}\n${body}`);
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
for (let i = 0; i < enrichTargets.urls.length; i++) {
|
|
122
|
-
const r = fetchResults[i];
|
|
123
|
-
if (r) {
|
|
124
|
-
enrichSections.push(`### url: ${enrichTargets.urls[i]}\n${r.markdown.slice(0, 4000)}`);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
const skipped = [];
|
|
128
|
-
for (let i = 0; i < enrichTargets.services.length; i++) {
|
|
129
|
-
const s = enrichTargets.services[i];
|
|
130
|
-
const r = serviceResults[i];
|
|
131
|
-
if (r === null)
|
|
132
|
-
continue;
|
|
133
|
-
if (r.kind === 'no_key') {
|
|
134
|
-
skipped.push(s.name);
|
|
135
|
-
continue;
|
|
136
|
-
}
|
|
137
|
-
if (r.kind === 'error')
|
|
138
|
-
continue;
|
|
139
|
-
// kind === 'ok'
|
|
140
|
-
enrichSections.push(formatServiceBlock(s.name, `${s.name} ${s.query}`, r.results));
|
|
141
|
-
}
|
|
142
|
-
if (skipped.length > 0) {
|
|
143
|
-
enrichSections.push(formatFreshnessSkippedBlock(skipped));
|
|
144
|
-
}
|
|
145
|
-
deps.recordSubStep?.('enrichment', Date.now() - tEnrichStart);
|
|
146
|
-
}
|
|
147
|
-
const externalContext = enrichSections.length > 0 ? `EXTERNAL CONTEXT\n${enrichSections.join('\n\n')}\n\n` : '';
|
|
82
|
+
const externalContext = await gatherExternalContext(refined, deps, researchDeps);
|
|
148
83
|
// Pre-compute the project file inventory once and hand it to every worker.
|
|
149
84
|
// Workers can then jump straight to targeted read/grep on known paths
|
|
150
85
|
// instead of each spawning its own discovery loop (find/ls). A '' result
|
|
@@ -156,8 +91,9 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
|
156
91
|
let doneCount = 0;
|
|
157
92
|
const updateProgress = () => {
|
|
158
93
|
doneCount++;
|
|
159
|
-
if (deps.onChildOutput)
|
|
160
|
-
deps.onChildOutput(`research (${doneCount}
|
|
94
|
+
if (deps.onChildOutput) {
|
|
95
|
+
deps.onChildOutput(`research (${doneCount}/${workerSpecs.length} workers done)`);
|
|
96
|
+
}
|
|
161
97
|
};
|
|
162
98
|
// Per-worker timing split into wait (spawn → first byte) and work (first
|
|
163
99
|
// byte → exit). The workers run sequentially below, so each split is a clean
|
|
@@ -190,16 +126,19 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
|
190
126
|
// tooling) is preserved for assembly.
|
|
191
127
|
const workerSpecs = [
|
|
192
128
|
{
|
|
129
|
+
section: 'FILES',
|
|
193
130
|
label: 'worker:files',
|
|
194
131
|
prompt: appendNoThink(promptHeader + RESEARCH_FILES_PROMPT(refined))
|
|
195
132
|
},
|
|
196
133
|
{
|
|
134
|
+
section: 'APIS',
|
|
197
135
|
label: 'worker:apis',
|
|
198
136
|
prompt: appendNoThink(promptHeader + RESEARCH_APIS_PROMPT(refined)),
|
|
199
137
|
tools: 'read,grep,find,ls,pi-worker-docs',
|
|
200
138
|
extensions: [DOCS_EXTENSION_PATH]
|
|
201
139
|
},
|
|
202
140
|
{
|
|
141
|
+
section: 'CONTEXT',
|
|
203
142
|
label: 'worker:context',
|
|
204
143
|
prompt: appendNoThink(promptHeader + RESEARCH_CONTEXT_PROMPT(refined)),
|
|
205
144
|
// Context owns architectural understanding, not path discovery —
|
|
@@ -209,6 +148,7 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
|
209
148
|
tools: 'read,grep'
|
|
210
149
|
},
|
|
211
150
|
{
|
|
151
|
+
section: 'TOOLING',
|
|
212
152
|
label: 'worker:tooling',
|
|
213
153
|
prompt: appendNoThink(promptHeader + RESEARCH_TOOLING_PROMPT(refined))
|
|
214
154
|
}
|
|
@@ -234,13 +174,10 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
|
234
174
|
updateProgress();
|
|
235
175
|
workerResults.push(r);
|
|
236
176
|
}
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
{ name: 'CONTEXT', result: context },
|
|
242
|
-
{ name: 'TOOLING', result: tooling }
|
|
243
|
-
];
|
|
177
|
+
// Validate + assemble by mapping spec.section over the results, so adding or
|
|
178
|
+
// reordering a worker is a single edit to workerSpecs (the result order
|
|
179
|
+
// mirrors workerSpecs order — the loop above pushes in sequence).
|
|
180
|
+
const sections = workerSpecs.map((spec, i) => ({ name: spec.section, result: workerResults[i] }));
|
|
244
181
|
for (const { name, result } of sections) {
|
|
245
182
|
if (result.exitCode !== 0) {
|
|
246
183
|
throw new Error(`Research ${name} worker failed (exit ${result.exitCode}): ${result.stderr.slice(-500)}`);
|
|
@@ -253,7 +190,7 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
|
253
190
|
+ `(${result.leakedToolCall.trim()}) — it never ran`);
|
|
254
191
|
}
|
|
255
192
|
}
|
|
256
|
-
return
|
|
193
|
+
return sections.map(({ name, result }) => `${name}\n${result.text}`).join('\n\n');
|
|
257
194
|
}
|
|
258
195
|
export async function phaseAutoAnswer(deps, refined, research, question, autoDeps = {}) {
|
|
259
196
|
const docsFocusedFn = autoDeps.docsFocused ?? docsFocused;
|
|
@@ -336,7 +273,15 @@ export async function phaseAutoAnswer(deps, refined, research, question, autoDep
|
|
|
336
273
|
const externalContext = contextSections.length > 0 ?
|
|
337
274
|
`EXTERNAL CONTEXT\n${contextSections.join('\n\n')}\n\n`
|
|
338
275
|
: '';
|
|
339
|
-
const
|
|
276
|
+
const basePrompt = externalContext + GRILL_AUTO_ANSWER_PROMPT(refined, research, question);
|
|
277
|
+
let text = await runPhaseChild(deps, 'grill-auto', 'read', basePrompt);
|
|
278
|
+
if (!autoAnswerHasTag(text)) {
|
|
279
|
+
// The model ignored the ANSWER/UNKNOWN/ALT format and wrote prose
|
|
280
|
+
// (typically an "analysis" preamble). Reprompt once, forcing the
|
|
281
|
+
// tagged form, before falling back to parseAutoAnswer's salvage —
|
|
282
|
+
// otherwise a preamble line leaks out as the recommended answer.
|
|
283
|
+
text = await runPhaseChild(deps, 'grill-auto', 'read', prependHint(GRILL_AUTO_FORMAT_HINT, basePrompt));
|
|
284
|
+
}
|
|
340
285
|
return parseAutoAnswer(text);
|
|
341
286
|
}
|
|
342
287
|
catch (err) {
|
package/dist/task/prompts.d.ts
CHANGED
|
@@ -36,6 +36,7 @@ declare const RESEARCH_CONTEXT_PROMPT: (refined: string) => string;
|
|
|
36
36
|
declare const RESEARCH_TOOLING_PROMPT: (refined: string) => string;
|
|
37
37
|
declare const GRILL_GEN_PROMPT: (refined: string, research: string, priorQA: string) => string;
|
|
38
38
|
declare const GRILL_AUTO_ANSWER_PROMPT: (refined: string, research: string, question: string) => string;
|
|
39
|
+
export declare const GRILL_AUTO_FORMAT_HINT: string;
|
|
39
40
|
declare function composeRetryEmphasis(problem: string): string;
|
|
40
41
|
declare const COMPOSE_PROMPT: (refined: string, research: string, qa: string, retryProblem: string | null) => string;
|
|
41
42
|
declare const CRITIQUE_TRIAGE_PROMPT: (spec: string, refined: string, qa: string) => string;
|
package/dist/task/prompts.js
CHANGED
|
@@ -215,6 +215,15 @@ Research:
|
|
|
215
215
|
${research}
|
|
216
216
|
|
|
217
217
|
Question: ${question}`;
|
|
218
|
+
// Reprompt prefix when grill-auto's first reply ignored the tagged output format
|
|
219
|
+
// and wrote free-form prose (an "analysis" preamble). Forces the terse form so a
|
|
220
|
+
// real recommendation reaches the user instead of a leaked preamble line.
|
|
221
|
+
export const GRILL_AUTO_FORMAT_HINT = '[SYSTEM NOTE: Your previous reply did NOT follow the required format — it had no '
|
|
222
|
+
+ 'ANSWER:, UNKNOWN:, or ALT: line and read as free-form prose. Do not explain or '
|
|
223
|
+
+ 'analyse. Output ONLY the tagged lines and nothing else. For a binary "A or B?" '
|
|
224
|
+
+ 'question emit two lines:\nUNKNOWN: <primary option>\nALT: <alternative>\n'
|
|
225
|
+
+ 'For a safe default the user would accept without thinking, emit one ANSWER: line. '
|
|
226
|
+
+ 'No preamble, no markdown.]';
|
|
218
227
|
function composeRetryEmphasis(problem) {
|
|
219
228
|
if (problem === 'spec does not start with GOAL'
|
|
220
229
|
|| problem === 'spec starts with a markdown fence'
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Spec gate — the guards that decide whether a composed spec is acceptable at
|
|
3
|
+
* handoff. Unlike the informational parsers in parsers.ts, these answer a
|
|
4
|
+
* yes/no (or "what's wrong") question the orchestrator and critique phase act
|
|
5
|
+
* on: is the VERIFY block runnable, is the shape well-formed, did critique come
|
|
6
|
+
* back CLEAN. Self-contained (no imports) so the gate doesn't drag in the phase
|
|
7
|
+
* pipeline.
|
|
8
|
+
*/
|
|
9
|
+
export interface VerifyCommand {
|
|
10
|
+
raw: string;
|
|
11
|
+
}
|
|
12
|
+
export declare function parseVerifyBlock(spec: string): VerifyCommand[] | null;
|
|
13
|
+
export declare function isCritiqueClean(text: string): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Drop any preamble the model emitted before the spec's GOAL header. The
|
|
16
|
+
* thinking model sometimes narrates ("Now I have all the context. Here's the
|
|
17
|
+
* rewritten spec:") before GOAL — the prompts forbid it, but the critique
|
|
18
|
+
* validator only checks for a VERIFY block, so it leaked into the delivered
|
|
19
|
+
* spec. We slice from the first line that begins a GOAL section so the spec
|
|
20
|
+
* starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
|
|
21
|
+
*/
|
|
22
|
+
export declare function stripSpecPreamble(spec: string): string;
|
|
23
|
+
export declare function validateSpecShape(spec: string): string | null;
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Spec gate — the guards that decide whether a composed spec is acceptable at
|
|
3
|
+
* handoff. Unlike the informational parsers in parsers.ts, these answer a
|
|
4
|
+
* yes/no (or "what's wrong") question the orchestrator and critique phase act
|
|
5
|
+
* on: is the VERIFY block runnable, is the shape well-formed, did critique come
|
|
6
|
+
* back CLEAN. Self-contained (no imports) so the gate doesn't drag in the phase
|
|
7
|
+
* pipeline.
|
|
8
|
+
*/
|
|
9
|
+
// ─── Verify block parser ─────────────────────────────────────────────────────
|
|
10
|
+
export function parseVerifyBlock(spec) {
|
|
11
|
+
const lines = spec.split('\n');
|
|
12
|
+
let i = 0;
|
|
13
|
+
while (i < lines.length && !/^VERIFY:\s*$/.test(lines[i]))
|
|
14
|
+
i++;
|
|
15
|
+
if (i >= lines.length)
|
|
16
|
+
return null;
|
|
17
|
+
i++;
|
|
18
|
+
while (i < lines.length && lines[i].trim() === '')
|
|
19
|
+
i++;
|
|
20
|
+
if (i >= lines.length)
|
|
21
|
+
return null;
|
|
22
|
+
if (!/^```(sh|bash)?\s*$/.test(lines[i]))
|
|
23
|
+
return null;
|
|
24
|
+
i++;
|
|
25
|
+
const cmds = [];
|
|
26
|
+
while (i < lines.length && !/^```\s*$/.test(lines[i])) {
|
|
27
|
+
const line = lines[i].trim();
|
|
28
|
+
if (line.length > 0 && !line.startsWith('#'))
|
|
29
|
+
cmds.push({ raw: line });
|
|
30
|
+
i++;
|
|
31
|
+
}
|
|
32
|
+
return cmds;
|
|
33
|
+
}
|
|
34
|
+
// ─── Critique triage gate ────────────────────────────────────────────────────
|
|
35
|
+
// The critique-triage prompt instructs the worker to emit the literal token
|
|
36
|
+
// `CLEAN` on its own line when the compose draft has no substantive defects, so
|
|
37
|
+
// we can skip the expensive full-rewrite pass. Anything else is treated as a
|
|
38
|
+
// defect list that gets fed into the rewrite. Empty output is NOT clean — that
|
|
39
|
+
// would be a silent crash, and treating it as clean would skip review entirely.
|
|
40
|
+
export function isCritiqueClean(text) {
|
|
41
|
+
const firstLine = text
|
|
42
|
+
.split('\n')
|
|
43
|
+
.map(l => l.trim())
|
|
44
|
+
.find(l => l.length > 0);
|
|
45
|
+
if (!firstLine)
|
|
46
|
+
return false;
|
|
47
|
+
return /^CLEAN[.!]?$/i.test(firstLine);
|
|
48
|
+
}
|
|
49
|
+
// ─── Spec shape gate ─────────────────────────────────────────────────────────
|
|
50
|
+
/**
|
|
51
|
+
* Drop any preamble the model emitted before the spec's GOAL header. The
|
|
52
|
+
* thinking model sometimes narrates ("Now I have all the context. Here's the
|
|
53
|
+
* rewritten spec:") before GOAL — the prompts forbid it, but the critique
|
|
54
|
+
* validator only checks for a VERIFY block, so it leaked into the delivered
|
|
55
|
+
* spec. We slice from the first line that begins a GOAL section so the spec
|
|
56
|
+
* starts at GOAL. No GOAL line → returned unchanged (validation then flags it).
|
|
57
|
+
*/
|
|
58
|
+
export function stripSpecPreamble(spec) {
|
|
59
|
+
const lines = spec.split('\n');
|
|
60
|
+
const idx = lines.findIndex(l => /^GOAL\b/i.test(l));
|
|
61
|
+
if (idx <= 0)
|
|
62
|
+
return spec;
|
|
63
|
+
// Only strip plain narration. If the lead-in is a markdown fence or a
|
|
64
|
+
// cat-heredoc wrapper, leave it untouched — that's a malformation
|
|
65
|
+
// validateSpecShape must reject (and compose must retry on), not something
|
|
66
|
+
// to silently unwrap into a passing spec.
|
|
67
|
+
const preamble = lines.slice(0, idx);
|
|
68
|
+
if (preamble.some(l => /^\s*```/.test(l) || /^\s*cat\s*<</.test(l)))
|
|
69
|
+
return spec;
|
|
70
|
+
return lines.slice(idx).join('\n');
|
|
71
|
+
}
|
|
72
|
+
export function validateSpecShape(spec) {
|
|
73
|
+
const trimmed = spec.trim();
|
|
74
|
+
if (trimmed.length === 0)
|
|
75
|
+
return 'spec is empty';
|
|
76
|
+
const firstLine = trimmed.split('\n', 1)[0];
|
|
77
|
+
if (/^\s*```/.test(firstLine))
|
|
78
|
+
return 'spec starts with a markdown fence';
|
|
79
|
+
if (/^\s*cat\s*<<\s*['"]?[A-Za-z_][A-Za-z0-9_]*['"]?/.test(firstLine)) {
|
|
80
|
+
return 'spec is wrapped in a cat heredoc';
|
|
81
|
+
}
|
|
82
|
+
if (!/^GOAL\b/i.test(trimmed))
|
|
83
|
+
return 'spec does not start with GOAL';
|
|
84
|
+
for (const section of ['CONSTRAINTS', 'ACCEPTANCE', 'VERIFY']) {
|
|
85
|
+
if (!new RegExp(`^\\s*${section}\\b`, 'm').test(trimmed)) {
|
|
86
|
+
return `spec missing required section: ${section}`;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return null;
|
|
90
|
+
}
|
package/dist/task/widget.d.ts
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* context usage, and the latest child-process line.
|
|
6
6
|
*/
|
|
7
7
|
import type { ExtensionCommandContext } from '@earendil-works/pi-coding-agent';
|
|
8
|
-
import { type PhaseName, type TaskState } from './task-
|
|
8
|
+
import { type PhaseName, type TaskState } from './task-types.js';
|
|
9
9
|
export interface WidgetState {
|
|
10
10
|
taskId: string;
|
|
11
11
|
title: string;
|
package/dist/task/widget.js
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* Renders a live-updating status block showing task id, phase, elapsed time,
|
|
5
5
|
* context usage, and the latest child-process line.
|
|
6
6
|
*/
|
|
7
|
-
import { PHASE_INDEX, PHASE_ORDER } from './task-
|
|
7
|
+
import { PHASE_INDEX, PHASE_ORDER } from './task-types.js';
|
|
8
8
|
import { setTaskWidget } from '../remote/session-state.js';
|
|
9
9
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
10
10
|
export const WIDGET_KEY = 'pi-tasks';
|