@mjasnikovs/pi-task 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -0
- package/dist/index.js +2 -0
- package/dist/shared/child-process.js +25 -4
- package/dist/task/auto-commit.d.ts +20 -0
- package/dist/task/auto-commit.js +56 -0
- package/dist/task/auto-io.d.ts +17 -0
- package/dist/task/auto-io.js +124 -0
- package/dist/task/auto-orchestrator.d.ts +28 -0
- package/dist/task/auto-orchestrator.js +318 -0
- package/dist/task/auto-prompts.d.ts +15 -0
- package/dist/task/auto-prompts.js +66 -0
- package/dist/task/inline-markdown.d.ts +18 -0
- package/dist/task/inline-markdown.js +28 -0
- package/dist/task/orchestrator.d.ts +28 -0
- package/dist/task/orchestrator.js +42 -9
- package/dist/task/parsers.d.ts +16 -0
- package/dist/task/parsers.js +70 -0
- package/dist/task/phases.d.ts +2 -1
- package/dist/task/phases.js +126 -100
- package/dist/task/prompts.d.ts +24 -1
- package/dist/task/prompts.js +40 -5
- package/dist/task/widget.d.ts +19 -0
- package/dist/task/widget.js +73 -15
- package/package.json +1 -1
package/dist/task/phases.d.ts
CHANGED
|
@@ -6,11 +6,12 @@ import type { ExtensionCommandContext } from '@earendil-works/pi-coding-agent';
|
|
|
6
6
|
import { docsRaw, docsFocused } from '../workers/docs-core.js';
|
|
7
7
|
import { fetchRaw, fetchFocused } from '../workers/fetch-core.js';
|
|
8
8
|
import type { SearchCoreInput, SearchCoreResult } from '../workers/search-core.js';
|
|
9
|
+
import { MAX_GRILL_QUESTIONS } from './prompts.js';
|
|
9
10
|
import { type PhaseName } from './task-file.js';
|
|
10
11
|
import { type WidgetState } from './widget.js';
|
|
11
12
|
import { type AutoAnswer } from './parsers.js';
|
|
12
13
|
import { type PhaseDeps } from './child-runner.js';
|
|
13
|
-
export { MAX_GRILL_QUESTIONS }
|
|
14
|
+
export { MAX_GRILL_QUESTIONS };
|
|
14
15
|
export interface PhaseContext {
|
|
15
16
|
cwd: string;
|
|
16
17
|
id: string;
|
package/dist/task/phases.js
CHANGED
|
@@ -10,12 +10,13 @@ import { search as defaultSearch } from '../workers/search-core.js';
|
|
|
10
10
|
import { extractEnrichTargets } from './enrichment.js';
|
|
11
11
|
import { getFileInventory } from './file-inventory.js';
|
|
12
12
|
import { formatServiceBlock, formatFreshnessSkippedBlock } from './service-blocks.js';
|
|
13
|
-
import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT } from './prompts.js';
|
|
13
|
+
import { REFINE_PROMPT, RESEARCH_FILES_PROMPT, RESEARCH_APIS_PROMPT, RESEARCH_CONTEXT_PROMPT, RESEARCH_TOOLING_PROMPT, GRILL_GEN_PROMPT, GRILL_AUTO_ANSWER_PROMPT, COMPOSE_PROMPT, CRITIQUE_PROMPT, CRITIQUE_TRIAGE_PROMPT, VERIFY_TOOLING_PROMPT, MAX_GRILL_QUESTIONS, appendNoThink } from './prompts.js';
|
|
14
14
|
import { setTaskSection, updateTaskFrontMatter } from './task-file.js';
|
|
15
|
-
import {
|
|
15
|
+
import { renderInlineMarkdown, stripInlineMarkdown } from './inline-markdown.js';
|
|
16
|
+
import { parseVerifyBlock, parseGrillQuestions, parseAutoAnswer, parseVerifyToolingOutput, validateSpecShape, stripSpecPreamble, deriveTitle, isCritiqueClean } from './parsers.js';
|
|
16
17
|
import { runPhaseChild, runPhaseWithLoopGuard, runWithEmphasisRetry, prependHint, USER_CANCELLED } from './child-runner.js';
|
|
17
18
|
// ─── Re-export constants from their home modules ────────────────────────────
|
|
18
|
-
export { MAX_GRILL_QUESTIONS }
|
|
19
|
+
export { MAX_GRILL_QUESTIONS };
|
|
19
20
|
// ─── Tooling helpers ─────────────────────────────────────────────────────────
|
|
20
21
|
/** Extract the TOOLING section commands from a research output string. */
|
|
21
22
|
export function extractToolingCommands(research) {
|
|
@@ -51,7 +52,7 @@ export function replaceToolingWithVerified(research, verifiedCommands) {
|
|
|
51
52
|
return replaced;
|
|
52
53
|
}
|
|
53
54
|
// ─── Phase functions ─────────────────────────────────────────────────────────
|
|
54
|
-
export const phaseRefine = (deps, raw) => runPhaseWithLoopGuard(deps, 'refine', 'read', hint => prependHint(hint, REFINE_PROMPT(raw)));
|
|
55
|
+
export const phaseRefine = (deps, raw) => runPhaseWithLoopGuard(deps, 'refine', 'read', hint => prependHint(hint, appendNoThink(REFINE_PROMPT(raw))));
|
|
55
56
|
export async function phaseVerifyTooling(deps, research) {
|
|
56
57
|
const commands = extractToolingCommands(research);
|
|
57
58
|
if (!commands || commands.length === 0) {
|
|
@@ -60,7 +61,7 @@ export async function phaseVerifyTooling(deps, research) {
|
|
|
60
61
|
const toolingList = commands.join('\n');
|
|
61
62
|
let verifyOutput;
|
|
62
63
|
try {
|
|
63
|
-
verifyOutput = await runPhaseChild(deps, 'verify-tooling', 'read,bash', VERIFY_TOOLING_PROMPT(toolingList));
|
|
64
|
+
verifyOutput = await runPhaseChild(deps, 'verify-tooling', 'read,bash', appendNoThink(VERIFY_TOOLING_PROMPT(toolingList)));
|
|
64
65
|
}
|
|
65
66
|
catch {
|
|
66
67
|
return replaceToolingWithVerified(research, commands);
|
|
@@ -157,59 +158,67 @@ export async function phaseResearch(deps, refined, researchDeps = {}) {
|
|
|
157
158
|
deps.onChildOutput(`research (${doneCount}/4 workers done)`);
|
|
158
159
|
};
|
|
159
160
|
// Per-worker timing split into wait (spawn → first byte) and work (first
|
|
160
|
-
// byte → exit).
|
|
161
|
-
//
|
|
162
|
-
//
|
|
163
|
-
//
|
|
164
|
-
// the two.
|
|
161
|
+
// byte → exit). The workers run sequentially below, so each split is a clean
|
|
162
|
+
// per-worker measurement — waitMs the worker's own cold-start, workMs its
|
|
163
|
+
// generation+tool-call cost — not a Promise.all-relative wall-clock that
|
|
164
|
+
// conflates the two.
|
|
165
165
|
const recordWorker = (label, p) => p.then(r => {
|
|
166
166
|
deps.recordSubStep?.(`${label} wait`, r.waitMs);
|
|
167
167
|
deps.recordSubStep?.(`${label} work`, r.workMs);
|
|
168
168
|
return r;
|
|
169
169
|
});
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
170
|
+
// Run the four workers ONE AT A TIME. Settled by an A/B on the local
|
|
171
|
+
// llama.cpp backend (single GPU, same task/model) — and the answer flips
|
|
172
|
+
// with thinking:
|
|
173
|
+
// - thinking ON → parallel wins: long decodes batch well, 4 concurrent
|
|
174
|
+
// finish in ~max(worker), not the sum.
|
|
175
|
+
// - /no_think → sequential wins: with short decodes the batching upside
|
|
176
|
+
// is gone, but 4 concurrent streams still split the one GPU and slow
|
|
177
|
+
// each other ~4x (context worker measured 27s solo vs 128s under load),
|
|
178
|
+
// so summed-but-fast (~100s) beats max-of-slowed (~130s).
|
|
179
|
+
// Every worker runs /no_think (below), so sequential is the faster regime.
|
|
180
|
+
// Do NOT switch this back to Promise.all without re-running that A/B.
|
|
181
|
+
//
|
|
182
|
+
// `/no_think` is the big win: these are agentic exploration loops, and on a
|
|
183
|
+
// reasoning model the child would otherwise emit a full <think> trace at
|
|
184
|
+
// every tool step ("let me read X next…") — the single largest decode sink
|
|
185
|
+
// in the pipeline. Stripping it cut each worker's decode 3-8x in the A/B.
|
|
186
|
+
// The worker still calls as many tools as it wants; it just stops narrating
|
|
187
|
+
// between them. See appendNoThink. Result order (files, apis, context,
|
|
188
|
+
// tooling) is preserved for assembly.
|
|
189
|
+
const workerSpecs = [
|
|
190
|
+
{
|
|
191
|
+
label: 'worker:files',
|
|
192
|
+
prompt: appendNoThink(promptHeader + RESEARCH_FILES_PROMPT(refined))
|
|
193
|
+
},
|
|
194
|
+
{ label: 'worker:apis', prompt: appendNoThink(promptHeader + RESEARCH_APIS_PROMPT(refined)) },
|
|
195
|
+
{
|
|
196
|
+
label: 'worker:context',
|
|
197
|
+
prompt: appendNoThink(promptHeader + RESEARCH_CONTEXT_PROMPT(refined)),
|
|
198
|
+
// Context owns architectural understanding, not path discovery —
|
|
199
|
+
// FILES handles that. Dropping `find`/`ls` keeps the worker from
|
|
200
|
+
// spawning long enumeration loops whose output then inflates
|
|
201
|
+
// prefill on every subsequent round.
|
|
198
202
|
tools: 'read,grep'
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
label: 'worker:tooling',
|
|
206
|
+
prompt: appendNoThink(promptHeader + RESEARCH_TOOLING_PROMPT(refined))
|
|
207
|
+
}
|
|
208
|
+
];
|
|
209
|
+
const workerResults = [];
|
|
210
|
+
for (const spec of workerSpecs) {
|
|
211
|
+
const r = await recordWorker(spec.label, runWorker({
|
|
212
|
+
prompt: spec.prompt,
|
|
205
213
|
cwd: deps.cwd,
|
|
206
214
|
signal: deps.signal,
|
|
207
|
-
spawn: deps.spawn
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
215
|
+
spawn: deps.spawn,
|
|
216
|
+
...(spec.tools ? { tools: spec.tools } : {})
|
|
217
|
+
}));
|
|
218
|
+
updateProgress();
|
|
219
|
+
workerResults.push(r);
|
|
220
|
+
}
|
|
221
|
+
const [files, apis, context, tooling] = workerResults;
|
|
213
222
|
const sections = [
|
|
214
223
|
{ name: 'FILES', result: files },
|
|
215
224
|
{ name: 'APIS', result: apis },
|
|
@@ -316,64 +325,75 @@ export async function phaseAutoAnswer(deps, refined, research, question, autoDep
|
|
|
316
325
|
}
|
|
317
326
|
}
|
|
318
327
|
export async function phaseGrill(deps, ctx, widgetState, refined, research) {
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
// Auto-answers are independent — generate them concurrently before the UI
|
|
326
|
-
// loop. The user-input loop below still runs sequentially (the user can
|
|
327
|
-
// only answer one prompt at a time), but the LLM-spawning work no longer
|
|
328
|
-
// blocks each iteration. For N questions this turns ~N × cold-start time
|
|
329
|
-
// into ~1 × cold-start time.
|
|
330
|
-
const tAutoStart = Date.now();
|
|
331
|
-
let doneCount = 0;
|
|
332
|
-
widgetState.lastLine = `auto-answering 0/${questions.length} done…`;
|
|
333
|
-
const autos = await Promise.all(questions.map((q, i) => phaseAutoAnswer(deps, refined, research, q).then(r => {
|
|
334
|
-
doneCount++;
|
|
335
|
-
widgetState.lastLine = `auto-answering ${doneCount}/${questions.length} done (Q${i + 1})`;
|
|
336
|
-
return r;
|
|
337
|
-
})));
|
|
338
|
-
deps.recordSubStep?.('auto-answers', Date.now() - tAutoStart);
|
|
328
|
+
// Sequential & adaptive: ask one question at a time, feeding every answer
|
|
329
|
+
// back into the next grill-gen call so later questions react to earlier ones
|
|
330
|
+
// (drop resolved unknowns, surface forks an answer introduced). Each question
|
|
331
|
+
// still gets a research-backed auto-answer — answered cheaply (skip the user)
|
|
332
|
+
// or surfaced as a pre-filled recommendation. The model emits NONE when
|
|
333
|
+
// nothing ambiguous remains. Kept in sync with /task-auto's clarify dialog.
|
|
339
334
|
const theme = ctx.ui.theme;
|
|
340
|
-
const
|
|
341
|
-
const
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
const
|
|
345
|
-
|
|
335
|
+
const out = []; // human-facing Q&A transcript (with auto-worker debug lines)
|
|
336
|
+
const qa = []; // compact Q&A fed back into the next question
|
|
337
|
+
// Open-ended: keep asking until the model emits NONE or the user dismisses.
|
|
338
|
+
for (let n = 0;; n++) {
|
|
339
|
+
const tGenStart = Date.now();
|
|
340
|
+
const raw = await runPhaseWithLoopGuard(deps, 'grill-gen', 'read', hint => prependHint(hint, GRILL_GEN_PROMPT(refined, research, qa.join('\n'))));
|
|
341
|
+
deps.recordSubStep?.('gen', Date.now() - tGenStart);
|
|
342
|
+
const questions = parseGrillQuestions(raw);
|
|
343
|
+
if (questions.length === 0)
|
|
344
|
+
break; // NONE / nothing left to ask
|
|
345
|
+
const q = questions[0];
|
|
346
|
+
widgetState.lastLine = `auto-answering Q${n + 1}…`;
|
|
347
|
+
const tAutoStart = Date.now();
|
|
348
|
+
const auto = await phaseAutoAnswer(deps, refined, research, q);
|
|
349
|
+
deps.recordSubStep?.('auto-answer', Date.now() - tAutoStart);
|
|
350
|
+
// Render markdown (bold/code) for the displayed prompt; keep plain text
|
|
351
|
+
// for the editable default and the persisted file.
|
|
352
|
+
const shownQ = renderInlineMarkdown(q, theme);
|
|
353
|
+
const plainQ = stripInlineMarkdown(q);
|
|
354
|
+
out.push(`Q${n + 1}: ${plainQ}`);
|
|
346
355
|
const rawTrim = auto.raw.trim();
|
|
347
356
|
out.push(` (auto-worker raw: ${rawTrim.length === 0 ? '(empty)' : rawTrim.replace(/\n/g, ' ⏎ ')})`);
|
|
357
|
+
let answer;
|
|
348
358
|
if (auto.kind === 'answered') {
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
}
|
|
352
|
-
const title = auto.suggested ?
|
|
353
|
-
`${q}\n${theme.fg('muted', 'Recommended:')}\n\n${theme.fg('text', auto.suggested)}\n\n${theme.fg('muted', 'press Enter to accept')}`
|
|
354
|
-
: `${q}\n${theme.fg('muted', '(no recommendation — please answer)')}`;
|
|
355
|
-
widgetState.lastLine = `awaiting Q${i + 1}`;
|
|
356
|
-
const a = await ctx.ui.input(title, auto.suggested);
|
|
357
|
-
if (a === undefined)
|
|
358
|
-
throw new Error(USER_CANCELLED);
|
|
359
|
-
const typed = a.trim();
|
|
360
|
-
if (typed.length === 0 && auto.suggested) {
|
|
361
|
-
out.push(`A${i + 1}: ${auto.suggested} (accepted recommendation)`);
|
|
362
|
-
}
|
|
363
|
-
else if (typed.length === 0) {
|
|
364
|
-
out.push(`A${i + 1}: (skipped)`);
|
|
359
|
+
answer = stripInlineMarkdown(auto.text);
|
|
360
|
+
out.push(`A${n + 1}: ${answer} (auto)`);
|
|
365
361
|
}
|
|
366
362
|
else {
|
|
367
|
-
|
|
363
|
+
const plainSuggested = auto.suggested === undefined ? undefined : stripInlineMarkdown(auto.suggested);
|
|
364
|
+
const title = auto.suggested ?
|
|
365
|
+
`${shownQ}\n${theme.fg('muted', 'Recommended:')}\n\n${renderInlineMarkdown(auto.suggested, theme)}\n\n${theme.fg('muted', 'press Enter to accept')}`
|
|
366
|
+
: `${shownQ}\n${theme.fg('muted', '(no recommendation — please answer)')}`;
|
|
367
|
+
widgetState.lastLine = `awaiting Q${n + 1}`;
|
|
368
|
+
const a = await ctx.ui.input(title, plainSuggested);
|
|
369
|
+
if (a === undefined)
|
|
370
|
+
throw new Error(USER_CANCELLED);
|
|
371
|
+
const typed = a.trim();
|
|
372
|
+
if (typed.length === 0 && plainSuggested) {
|
|
373
|
+
answer = `${plainSuggested} (accepted recommendation)`;
|
|
374
|
+
}
|
|
375
|
+
else if (typed.length === 0) {
|
|
376
|
+
answer = '(skipped)';
|
|
377
|
+
}
|
|
378
|
+
else {
|
|
379
|
+
answer = typed;
|
|
380
|
+
}
|
|
381
|
+
out.push(`A${n + 1}: ${answer}`);
|
|
368
382
|
}
|
|
383
|
+
qa.push(`Q${n + 1}: ${plainQ}\nA${n + 1}: ${answer}`);
|
|
369
384
|
}
|
|
370
|
-
|
|
385
|
+
if (out.length === 0)
|
|
386
|
+
return '(no questions produced)';
|
|
371
387
|
return out.join('\n');
|
|
372
388
|
}
|
|
373
389
|
export async function phaseCompose(deps, refined, research, qa) {
|
|
374
390
|
return runWithEmphasisRetry(deps, 'compose', 'read', problem => COMPOSE_PROMPT(refined, research, qa, problem), text => {
|
|
375
|
-
|
|
376
|
-
|
|
391
|
+
// Trim any "here's the spec:" preamble before validating, so a
|
|
392
|
+
// strippable lead-in doesn't burn a full retry — and the stored
|
|
393
|
+
// value starts at GOAL.
|
|
394
|
+
const stripped = stripSpecPreamble(text);
|
|
395
|
+
const problem = validateSpecShape(stripped);
|
|
396
|
+
return problem ? { ok: false, problem } : { ok: true, value: stripped };
|
|
377
397
|
}, problem => new Error(`compose_invalid: ${problem}`));
|
|
378
398
|
}
|
|
379
399
|
export async function phaseCritique(deps, spec, refined, qa) {
|
|
@@ -397,7 +417,7 @@ export async function phaseCritique(deps, spec, refined, qa) {
|
|
|
397
417
|
// Granting `read` here let it wander the repo to "verify" findings,
|
|
398
418
|
// which made the supposedly-cheap pass cost as much as a rewrite
|
|
399
419
|
// (observed ~133s). The judgement needs no file access.
|
|
400
|
-
verdict = await runPhaseChild(deps, 'critique-triage', '', CRITIQUE_TRIAGE_PROMPT(spec, refined, qa));
|
|
420
|
+
verdict = await runPhaseChild(deps, 'critique-triage', '', appendNoThink(CRITIQUE_TRIAGE_PROMPT(spec, refined, qa)));
|
|
401
421
|
}
|
|
402
422
|
catch {
|
|
403
423
|
verdict = null;
|
|
@@ -411,9 +431,15 @@ export async function phaseCritique(deps, spec, refined, qa) {
|
|
|
411
431
|
}
|
|
412
432
|
const tRewrite = Date.now();
|
|
413
433
|
try {
|
|
414
|
-
return await runWithEmphasisRetry(deps, 'critique', 'read', problem => CRITIQUE_PROMPT(spec, refined, qa, problem !== null, triageDefects), text =>
|
|
415
|
-
|
|
416
|
-
|
|
434
|
+
return await runWithEmphasisRetry(deps, 'critique', 'read', problem => CRITIQUE_PROMPT(spec, refined, qa, problem !== null, triageDefects), text => {
|
|
435
|
+
// The rewrite (thinking on) sometimes prepends narration before
|
|
436
|
+
// GOAL; the prompt forbids it but this validator only checks for
|
|
437
|
+
// a VERIFY block. Strip it so the delivered spec starts at GOAL.
|
|
438
|
+
const stripped = stripSpecPreamble(text);
|
|
439
|
+
return parseVerifyBlock(stripped) ?
|
|
440
|
+
{ ok: true, value: stripped }
|
|
441
|
+
: { ok: false, problem: 'no_verify_block' };
|
|
442
|
+
}, () => new Error('no_verify_block'));
|
|
417
443
|
}
|
|
418
444
|
finally {
|
|
419
445
|
deps.recordSubStep?.('rewrite', Date.now() - tRewrite);
|
package/dist/task/prompts.d.ts
CHANGED
|
@@ -5,13 +5,36 @@
|
|
|
5
5
|
* effects, trivially testable.
|
|
6
6
|
*/
|
|
7
7
|
export declare const MAX_GRILL_QUESTIONS = 10;
|
|
8
|
+
/**
|
|
9
|
+
* Qwen3 "soft switch": placing `/no_think` in the prompt disables the model's
|
|
10
|
+
* <think> reasoning trace for that turn and persists across the tool-call loop
|
|
11
|
+
* within the same child session.
|
|
12
|
+
*
|
|
13
|
+
* On a local reasoning model decode is the bottleneck (~50 t/s here) while
|
|
14
|
+
* prefill is ~10x faster, so cost is dominated by *generated* tokens. A runaway
|
|
15
|
+
* think trace can be 10k+ tokens — minutes — even when the phase's real output
|
|
16
|
+
* is a short list or a one-word verdict (an observed triage spent ~384s
|
|
17
|
+
* thinking to emit "CLEAN"). Stripping the monologue does NOT limit the model's
|
|
18
|
+
* exploration: it still calls every tool it wants and takes every step it
|
|
19
|
+
* needs — it just stops narrating between actions.
|
|
20
|
+
*
|
|
21
|
+
* We strip thinking from the mechanical / exploration phases (refine, the four
|
|
22
|
+
* research workers, verify-tooling, triage) and keep it ON for the judgment
|
|
23
|
+
* phases (compose, grill, critique rewrite) where the reasoning earns its
|
|
24
|
+
* decode cost. pi's `--thinking off` flag is a no-op for this provider
|
|
25
|
+
* (`supportsReasoningEffort: false` in models.json), so the in-prompt soft
|
|
26
|
+
* switch is the reliable control.
|
|
27
|
+
*/
|
|
28
|
+
export declare const NO_THINK = "/no_think";
|
|
29
|
+
/** Append the Qwen3 `/no_think` soft switch to a prompt. See {@link NO_THINK}. */
|
|
30
|
+
export declare function appendNoThink(prompt: string): string;
|
|
8
31
|
declare const REFINE_PROMPT: (raw: string) => string;
|
|
9
32
|
declare const RESEARCH_READ_ONLY_CONSTRAINT = "IMPORTANT: You are ONLY allowed to READ. Do NOT create, modify, or delete any files. Use the read, grep, find, and ls tools to inspect the repo.";
|
|
10
33
|
declare const RESEARCH_FILES_PROMPT: (refined: string) => string;
|
|
11
34
|
declare const RESEARCH_APIS_PROMPT: (refined: string) => string;
|
|
12
35
|
declare const RESEARCH_CONTEXT_PROMPT: (refined: string) => string;
|
|
13
36
|
declare const RESEARCH_TOOLING_PROMPT: (refined: string) => string;
|
|
14
|
-
declare const GRILL_GEN_PROMPT: (refined: string, research: string) => string;
|
|
37
|
+
declare const GRILL_GEN_PROMPT: (refined: string, research: string, priorQA: string) => string;
|
|
15
38
|
declare const GRILL_AUTO_ANSWER_PROMPT: (refined: string, research: string, question: string) => string;
|
|
16
39
|
declare function composeRetryEmphasis(problem: string): string;
|
|
17
40
|
declare const COMPOSE_PROMPT: (refined: string, research: string, qa: string, retryProblem: string | null) => string;
|
package/dist/task/prompts.js
CHANGED
|
@@ -5,6 +5,31 @@
|
|
|
5
5
|
* effects, trivially testable.
|
|
6
6
|
*/
|
|
7
7
|
export const MAX_GRILL_QUESTIONS = 10;
|
|
8
|
+
/**
|
|
9
|
+
* Qwen3 "soft switch": placing `/no_think` in the prompt disables the model's
|
|
10
|
+
* <think> reasoning trace for that turn and persists across the tool-call loop
|
|
11
|
+
* within the same child session.
|
|
12
|
+
*
|
|
13
|
+
* On a local reasoning model decode is the bottleneck (~50 t/s here) while
|
|
14
|
+
* prefill is ~10x faster, so cost is dominated by *generated* tokens. A runaway
|
|
15
|
+
* think trace can be 10k+ tokens — minutes — even when the phase's real output
|
|
16
|
+
* is a short list or a one-word verdict (an observed triage spent ~384s
|
|
17
|
+
* thinking to emit "CLEAN"). Stripping the monologue does NOT limit the model's
|
|
18
|
+
* exploration: it still calls every tool it wants and takes every step it
|
|
19
|
+
* needs — it just stops narrating between actions.
|
|
20
|
+
*
|
|
21
|
+
* We strip thinking from the mechanical / exploration phases (refine, the four
|
|
22
|
+
* research workers, verify-tooling, triage) and keep it ON for the judgment
|
|
23
|
+
* phases (compose, grill, critique rewrite) where the reasoning earns its
|
|
24
|
+
* decode cost. pi's `--thinking off` flag is a no-op for this provider
|
|
25
|
+
* (`supportsReasoningEffort: false` in models.json), so the in-prompt soft
|
|
26
|
+
* switch is the reliable control.
|
|
27
|
+
*/
|
|
28
|
+
export const NO_THINK = '/no_think';
|
|
29
|
+
/** Append the Qwen3 `/no_think` soft switch to a prompt. See {@link NO_THINK}. */
|
|
30
|
+
export function appendNoThink(prompt) {
|
|
31
|
+
return `${prompt}\n\n${NO_THINK}`;
|
|
32
|
+
}
|
|
8
33
|
const REFINE_PROMPT = (raw) => `You receive a user's task description for an AI coding agent. Rewrite it to be unambiguous and actionable.
|
|
9
34
|
|
|
10
35
|
Output structure (four sections, exact headings, in this order):
|
|
@@ -118,25 +143,35 @@ No section header. No other sections. No preamble. May be empty if no verificati
|
|
|
118
143
|
|
|
119
144
|
Task:
|
|
120
145
|
${refined}`;
|
|
121
|
-
const GRILL_GEN_PROMPT = (refined, research) => `You are preparing clarifying questions for the user, based on a refined task description
|
|
146
|
+
const GRILL_GEN_PROMPT = (refined, research, priorQA) => `You are preparing clarifying questions for the user, based on a refined task description, the research that follows, and the answers gathered so far. Ask ONE question at a time.
|
|
147
|
+
|
|
148
|
+
Output the SINGLE most important clarifying question that REMAINS — the one whose answer most changes the work — or NONE if no genuine ambiguity is left.
|
|
122
149
|
|
|
123
150
|
Start from the KNOWN-UNKNOWNS bullets in the task. Add any new ambiguity surfaced by the research. Drop any unknowns the research already resolved.
|
|
124
151
|
|
|
152
|
+
ACCOUNT FOR THE ANSWERS SO FAR — read carefully:
|
|
153
|
+
- Never re-ask something already answered below.
|
|
154
|
+
- If an answer introduced a NEW fork or contradicts an assumption in the task/research (e.g. the user chose a tool or approach the task did not anticipate), ask about the most important consequence of that choice next.
|
|
155
|
+
- Drop questions the answers have made irrelevant.
|
|
156
|
+
|
|
125
157
|
SCOPE RULES — read carefully:
|
|
126
158
|
- Questions must clarify the EXISTING scope. Do NOT propose new deliverables, enhancements, modernizations, or "while I'm here" cleanups.
|
|
127
159
|
- Forbidden patterns: "should I also…", "should we modernize…", "do you want me to update X while I'm at it…", "should I integrate Y…", "would you like guidance on Z…".
|
|
128
160
|
- Allowed patterns: "by 'X' do you mean A or B?", "should failure mode Y be treated as Z?", "which of <files matching the task> applies here?".
|
|
129
|
-
- If
|
|
161
|
+
- If nothing genuinely ambiguous remains, output NONE. Zero questions is a valid and preferred outcome. Do not pad.
|
|
130
162
|
|
|
131
163
|
Output format — read carefully:
|
|
132
|
-
-
|
|
133
|
-
- If
|
|
164
|
+
- One question as a single numbered line: "1. ...", and nothing else.
|
|
165
|
+
- If no question remains: emit the single literal token NONE on its own line. Do NOT emit empty output — an empty response is treated as a crash, not as "no questions". The NONE sentinel is the only way to signal an intentional empty list.
|
|
134
166
|
|
|
135
167
|
Refined task:
|
|
136
168
|
${refined}
|
|
137
169
|
|
|
138
170
|
Research:
|
|
139
|
-
${research}
|
|
171
|
+
${research}
|
|
172
|
+
|
|
173
|
+
Answers so far:
|
|
174
|
+
${priorQA.trim() || '(none yet)'}`;
|
|
140
175
|
const GRILL_AUTO_ANSWER_PROMPT = (refined, research, question) => `You are pre-answering a clarifying question for an AI coding task. You have the refined task and the research notes. You can also use the read tool to open any file mentioned in the research (e.g. package.json) if it helps you answer.
|
|
141
176
|
|
|
142
177
|
Your job is to produce a recommended default answer. If the default is one the user would almost certainly accept, you tag it ANSWER and we skip the user entirely. Otherwise you tag it UNKNOWN and we show the suggestion in the input box for the user to confirm or override.
|
package/dist/task/widget.d.ts
CHANGED
|
@@ -21,6 +21,7 @@ export interface ContextSnapshot {
|
|
|
21
21
|
}
|
|
22
22
|
export type WidgetTheme = ExtensionCommandContext['ui']['theme'];
|
|
23
23
|
export declare const WIDGET_KEY = "pi-tasks";
|
|
24
|
+
export declare const AUTO_WIDGET_KEY = "pi-task-auto";
|
|
24
25
|
export declare const WIDGET_REFRESH_MS = 500;
|
|
25
26
|
export declare const WIDGET_LAST_LINE_MAX = 120;
|
|
26
27
|
export declare const NOTIFY_CLEAR_MS = 3000;
|
|
@@ -34,6 +35,24 @@ export declare function formatDuration(ms: number): string;
|
|
|
34
35
|
export declare function formatContextTokens(count: number): string;
|
|
35
36
|
export declare function contextProgressBar(percent: number): string;
|
|
36
37
|
export declare function contextThresholdColor(theme: WidgetTheme, percent: number, text: string): string;
|
|
38
|
+
/** Render the `tokens/window [bar]` context suffix, or null when there's nothing to show. */
|
|
39
|
+
export declare function formatContextDetail(usage: ContextSnapshot, theme?: WidgetTheme): string | null;
|
|
37
40
|
export declare function buildWidgetLines(s: WidgetState, theme?: WidgetTheme): string[];
|
|
38
41
|
export declare function startWidget(ctx: ExtensionCommandContext, getState: () => WidgetState | null): () => void;
|
|
42
|
+
export interface AutoLoaderState {
|
|
43
|
+
title: string;
|
|
44
|
+
step: string;
|
|
45
|
+
stepNum: number;
|
|
46
|
+
stepTotal: number;
|
|
47
|
+
startedAt: number;
|
|
48
|
+
lastLine?: string;
|
|
49
|
+
contextUsage?: ContextSnapshot;
|
|
50
|
+
}
|
|
51
|
+
export declare function buildAutoLoaderLines(s: AutoLoaderState, theme?: WidgetTheme): string[];
|
|
52
|
+
/**
|
|
53
|
+
* Start the planning loader widget (same cadence/look as the phase widget).
|
|
54
|
+
* Returns a disposer that stops the refresh and clears the widget. No-op
|
|
55
|
+
* (returns a no-op disposer) when there's no UI.
|
|
56
|
+
*/
|
|
57
|
+
export declare function startAutoLoader(ctx: ExtensionCommandContext, getState: () => AutoLoaderState | null): () => void;
|
|
39
58
|
export declare function flashTerminalWidget(ctx: ExtensionCommandContext, state: Exclude<TaskState, 'pending' | 'in_progress' | 'completed'>, taskId: string, reason: string | undefined): void;
|
package/dist/task/widget.js
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
import { PHASE_INDEX, PHASE_ORDER } from './task-file.js';
|
|
8
8
|
// ─── Constants ───────────────────────────────────────────────────────────────
|
|
9
9
|
export const WIDGET_KEY = 'pi-tasks';
|
|
10
|
+
export const AUTO_WIDGET_KEY = 'pi-task-auto';
|
|
10
11
|
export const WIDGET_REFRESH_MS = 500;
|
|
11
12
|
export const WIDGET_LAST_LINE_MAX = 120;
|
|
12
13
|
export const NOTIFY_CLEAR_MS = 3000;
|
|
@@ -46,6 +47,27 @@ export function contextThresholdColor(theme, percent, text) {
|
|
|
46
47
|
return theme.fg('warning', text);
|
|
47
48
|
return text;
|
|
48
49
|
}
|
|
50
|
+
/** Render the `tokens/window [bar]` context suffix, or null when there's nothing to show. */
|
|
51
|
+
export function formatContextDetail(usage, theme) {
|
|
52
|
+
const { tokens, contextWindow, percent } = usage;
|
|
53
|
+
if (contextWindow > 0) {
|
|
54
|
+
const text = `${formatContextTokens(tokens)}/${formatContextTokens(contextWindow)} ${contextProgressBar(percent)}`;
|
|
55
|
+
return theme ? contextThresholdColor(theme, percent, text) : text;
|
|
56
|
+
}
|
|
57
|
+
if (tokens > 0)
|
|
58
|
+
return formatContextTokens(tokens);
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
/** Render the muted `↳ lastLine` trailer (truncated), or null when there's no line. */
|
|
62
|
+
function lastLineTrailer(lastLine, theme) {
|
|
63
|
+
if (!lastLine)
|
|
64
|
+
return null;
|
|
65
|
+
const t = lastLine.length > WIDGET_LAST_LINE_MAX ?
|
|
66
|
+
lastLine.slice(0, WIDGET_LAST_LINE_MAX - 1) + '…'
|
|
67
|
+
: lastLine;
|
|
68
|
+
const raw = `↳ ${t}`;
|
|
69
|
+
return theme ? theme.fg('muted', raw) : raw;
|
|
70
|
+
}
|
|
49
71
|
export function buildWidgetLines(s, theme) {
|
|
50
72
|
const elapsed = formatDuration(Date.now() - s.startedAt);
|
|
51
73
|
const head = `${s.taskId} · ${s.title}`;
|
|
@@ -54,23 +76,14 @@ export function buildWidgetLines(s, theme) {
|
|
|
54
76
|
const stepNum = Math.min(idx + 1, total);
|
|
55
77
|
let detail = `phase ${stepNum}/${total} ${s.phase} · ${elapsed}`;
|
|
56
78
|
if (s.contextUsage) {
|
|
57
|
-
const
|
|
58
|
-
if (
|
|
59
|
-
|
|
60
|
-
detail += ` · ${theme ? contextThresholdColor(theme, percent, text) : text}`;
|
|
61
|
-
}
|
|
62
|
-
else if (tokens > 0) {
|
|
63
|
-
detail += ` · ${formatContextTokens(tokens)}`;
|
|
64
|
-
}
|
|
79
|
+
const ctxDetail = formatContextDetail(s.contextUsage, theme);
|
|
80
|
+
if (ctxDetail)
|
|
81
|
+
detail += ` · ${ctxDetail}`;
|
|
65
82
|
}
|
|
66
83
|
const lines = [head, detail];
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
: s.lastLine;
|
|
71
|
-
const raw = `↳ ${t}`;
|
|
72
|
-
lines.push(theme ? theme.fg('muted', raw) : raw);
|
|
73
|
-
}
|
|
84
|
+
const trailer = lastLineTrailer(s.lastLine, theme);
|
|
85
|
+
if (trailer)
|
|
86
|
+
lines.push(trailer);
|
|
74
87
|
return lines;
|
|
75
88
|
}
|
|
76
89
|
// ─── Widget lifecycle ────────────────────────────────────────────────────────
|
|
@@ -91,6 +104,51 @@ export function startWidget(ctx, getState) {
|
|
|
91
104
|
timer.unref?.();
|
|
92
105
|
return () => clearInterval(timer);
|
|
93
106
|
}
|
|
107
|
+
export function buildAutoLoaderLines(s, theme) {
|
|
108
|
+
const elapsed = formatDuration(Date.now() - s.startedAt);
|
|
109
|
+
const head = `/task-auto · ${s.title}`;
|
|
110
|
+
let detail = `planning ${s.stepNum}/${s.stepTotal} ${s.step} · ${elapsed}`;
|
|
111
|
+
if (s.contextUsage) {
|
|
112
|
+
const ctxDetail = formatContextDetail(s.contextUsage, theme);
|
|
113
|
+
if (ctxDetail)
|
|
114
|
+
detail += ` · ${ctxDetail}`;
|
|
115
|
+
}
|
|
116
|
+
const lines = [head, detail];
|
|
117
|
+
const trailer = lastLineTrailer(s.lastLine, theme);
|
|
118
|
+
if (trailer)
|
|
119
|
+
lines.push(trailer);
|
|
120
|
+
return lines;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Start the planning loader widget (same cadence/look as the phase widget).
|
|
124
|
+
* Returns a disposer that stops the refresh and clears the widget. No-op
|
|
125
|
+
* (returns a no-op disposer) when there's no UI.
|
|
126
|
+
*/
|
|
127
|
+
export function startAutoLoader(ctx, getState) {
|
|
128
|
+
if (!ctx.hasUI)
|
|
129
|
+
return () => { };
|
|
130
|
+
const render = () => {
|
|
131
|
+
const s = getState();
|
|
132
|
+
try {
|
|
133
|
+
ctx.ui.setWidget(AUTO_WIDGET_KEY, s ? buildAutoLoaderLines(s, ctx.ui.theme) : undefined);
|
|
134
|
+
}
|
|
135
|
+
catch {
|
|
136
|
+
/* stale ctx */
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
render();
|
|
140
|
+
const timer = setInterval(render, WIDGET_REFRESH_MS);
|
|
141
|
+
timer.unref?.();
|
|
142
|
+
return () => {
|
|
143
|
+
clearInterval(timer);
|
|
144
|
+
try {
|
|
145
|
+
ctx.ui.setWidget(AUTO_WIDGET_KEY, undefined);
|
|
146
|
+
}
|
|
147
|
+
catch {
|
|
148
|
+
/* stale ctx */
|
|
149
|
+
}
|
|
150
|
+
};
|
|
151
|
+
}
|
|
94
152
|
export function flashTerminalWidget(ctx, state, taskId, reason) {
|
|
95
153
|
if (!ctx.hasUI)
|
|
96
154
|
return;
|
package/package.json
CHANGED