@quintinshaw/pi-dynamic-workflows 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +14 -1
- package/dist/adversarial-review.d.ts +7 -2
- package/dist/adversarial-review.js +46 -38
- package/dist/agent.d.ts +2 -0
- package/dist/agent.js +6 -2
- package/dist/builtin-commands.d.ts +8 -0
- package/dist/builtin-commands.js +77 -0
- package/dist/deep-research.d.ts +10 -10
- package/dist/deep-research.js +45 -45
- package/dist/index.d.ts +5 -1
- package/dist/index.js +3 -0
- package/dist/web-tools.d.ts +15 -0
- package/dist/web-tools.js +119 -0
- package/dist/workflow.d.ts +1 -0
- package/dist/workflow.js +18 -2
- package/dist/worktree.d.ts +25 -0
- package/dist/worktree.js +61 -0
- package/extensions/workflow.ts +8 -1
- package/package.json +1 -1
- package/src/adversarial-review.ts +46 -43
- package/src/agent.ts +8 -2
- package/src/builtin-commands.ts +77 -0
- package/src/deep-research.ts +51 -59
- package/src/index.ts +5 -0
- package/src/web-tools.ts +123 -0
- package/src/workflow.ts +17 -3
- package/src/worktree.ts +76 -0
package/dist/workflow.js
CHANGED
|
@@ -6,6 +6,7 @@ import { DEFAULT_AGENT_TIMEOUT_MS, MAX_AGENTS_PER_RUN, MAX_CONCURRENCY } from ".
|
|
|
6
6
|
import { WorkflowError, WorkflowErrorCode, wrapError } from "./errors.js";
|
|
7
7
|
import { createWorkflowLogger } from "./logger.js";
|
|
8
8
|
import { parseModelRoutingFromMeta, resolveModelForPhase } from "./model-routing.js";
|
|
9
|
+
import { createWorktree, removeWorktree } from "./worktree.js";
|
|
9
10
|
const DETERMINISM_BLOCKLIST = /\bDate\s*\.\s*now\b|\bMath\s*\.\s*random\b|\bnew\s+Date\s*\(\s*\)/;
|
|
10
11
|
export async function runWorkflow(script, options = {}) {
|
|
11
12
|
const started = Date.now();
|
|
@@ -15,6 +16,7 @@ export async function runWorkflow(script, options = {}) {
|
|
|
15
16
|
const maxAgents = options.maxAgents ?? MAX_AGENTS_PER_RUN;
|
|
16
17
|
const agentTimeoutMs = options.agentTimeoutMs ?? DEFAULT_AGENT_TIMEOUT_MS;
|
|
17
18
|
const runId = options.runId ?? `run-${started.toString(36)}`;
|
|
19
|
+
const baseCwd = options.cwd ?? process.cwd();
|
|
18
20
|
// Initialize logger
|
|
19
21
|
const logger = createWorkflowLogger({
|
|
20
22
|
runId,
|
|
@@ -88,6 +90,14 @@ export async function runWorkflow(script, options = {}) {
|
|
|
88
90
|
const label = requestedLabel || defaultAgentLabel(assignedPhase, state.agentCount);
|
|
89
91
|
const timeout = agentOptions.timeoutMs ?? agentTimeoutMs;
|
|
90
92
|
options.onAgentStart?.({ label, phase: assignedPhase, prompt, model: modelSpec });
|
|
93
|
+
// Optional per-agent worktree isolation (deterministic name -> stable resume keys).
|
|
94
|
+
let worktree;
|
|
95
|
+
if (agentOptions.isolation === "worktree") {
|
|
96
|
+
worktree = await createWorktree(baseCwd, `${runId}-${callIndex}-${label}`);
|
|
97
|
+
if (!worktree.isolated)
|
|
98
|
+
log(`isolation ignored for "${label}" (${worktree.reason})`);
|
|
99
|
+
}
|
|
100
|
+
const runCwd = worktree?.isolated ? worktree.cwd : undefined;
|
|
91
101
|
// Captured from the subagent's real session usage; falls back to an
|
|
92
102
|
// estimate when the provider reports no usage (total === 0).
|
|
93
103
|
let usage;
|
|
@@ -111,6 +121,7 @@ export async function runWorkflow(script, options = {}) {
|
|
|
111
121
|
signal: options.signal,
|
|
112
122
|
instructions: buildAgentInstructions(assignedPhase, agentOptions),
|
|
113
123
|
model: modelSpec,
|
|
124
|
+
cwd: runCwd,
|
|
114
125
|
onUsage: (u) => {
|
|
115
126
|
usage = u;
|
|
116
127
|
},
|
|
@@ -118,7 +129,7 @@ export async function runWorkflow(script, options = {}) {
|
|
|
118
129
|
throwIfAborted();
|
|
119
130
|
const tokens = recordTokens(result);
|
|
120
131
|
options.onAgentJournal?.({ index: callIndex, hash: callHash, result });
|
|
121
|
-
options.onAgentEnd?.({ label, phase: assignedPhase, result, tokens });
|
|
132
|
+
options.onAgentEnd?.({ label, phase: assignedPhase, result, tokens, worktree: runCwd });
|
|
122
133
|
return result;
|
|
123
134
|
}
|
|
124
135
|
catch (error) {
|
|
@@ -127,13 +138,18 @@ export async function runWorkflow(script, options = {}) {
|
|
|
127
138
|
const workflowError = wrapError(error, { agentLabel: label });
|
|
128
139
|
logger.error(`agent ${label} failed: ${workflowError.message}`);
|
|
129
140
|
const tokens = recordTokens(null);
|
|
130
|
-
options.onAgentEnd?.({ label, phase: assignedPhase, result: null, tokens });
|
|
141
|
+
options.onAgentEnd?.({ label, phase: assignedPhase, result: null, tokens, worktree: runCwd });
|
|
131
142
|
// Return null for recoverable errors
|
|
132
143
|
if (workflowError.recoverable) {
|
|
133
144
|
return null;
|
|
134
145
|
}
|
|
135
146
|
throw workflowError;
|
|
136
147
|
}
|
|
148
|
+
finally {
|
|
149
|
+
// Always tear down the worktree, even on timeout/abort.
|
|
150
|
+
if (worktree?.isolated)
|
|
151
|
+
await removeWorktree(worktree);
|
|
152
|
+
}
|
|
137
153
|
});
|
|
138
154
|
};
|
|
139
155
|
const parallel = async (thunks) => {
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-agent git worktree isolation. When an agent requests `isolation: "worktree"`,
|
|
3
|
+
* it runs in a throwaway worktree on its own branch so parallel agents can edit the
|
|
4
|
+
* same files without conflict. Results are NOT auto-merged — the path is surfaced for
|
|
5
|
+
* the caller to inspect. Falls back to a logged no-op when isolation isn't possible.
|
|
6
|
+
*/
|
|
7
|
+
export interface Worktree {
|
|
8
|
+
/** True when a real worktree was created; false means "ran in the shared tree". */
|
|
9
|
+
isolated: boolean;
|
|
10
|
+
/** cwd the agent should run in (worktree path when isolated, else the base cwd). */
|
|
11
|
+
cwd: string;
|
|
12
|
+
branch?: string;
|
|
13
|
+
/** Repo root the worktree was added to (for teardown). */
|
|
14
|
+
repoRoot?: string;
|
|
15
|
+
/** Why isolation was skipped, when isolated === false. */
|
|
16
|
+
reason?: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Create an isolated worktree under `<repoRoot>/.pi/worktrees/<name>` on branch
|
|
20
|
+
* `pi/wf/<name>`. The `name` must be deterministic (derived from runId + call index,
|
|
21
|
+
* never wall-clock) so resume keys stay stable. Returns a no-op Worktree on any failure.
|
|
22
|
+
*/
|
|
23
|
+
export declare function createWorktree(baseCwd: string, name: string): Promise<Worktree>;
|
|
24
|
+
/** Remove a worktree and its branch. Best-effort; safe to call on a no-op Worktree. */
|
|
25
|
+
export declare function removeWorktree(wt: Worktree): Promise<void>;
|
package/dist/worktree.js
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-agent git worktree isolation. When an agent requests `isolation: "worktree"`,
|
|
3
|
+
* it runs in a throwaway worktree on its own branch so parallel agents can edit the
|
|
4
|
+
* same files without conflict. Results are NOT auto-merged — the path is surfaced for
|
|
5
|
+
* the caller to inspect. Falls back to a logged no-op when isolation isn't possible.
|
|
6
|
+
*/
|
|
7
|
+
import { execFile } from "node:child_process";
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
import { promisify } from "node:util";
|
|
10
|
+
const exec = promisify(execFile);
|
|
11
|
+
function slug(name) {
|
|
12
|
+
return (name
|
|
13
|
+
.toLowerCase()
|
|
14
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
15
|
+
.replace(/^-+|-+$/g, "")
|
|
16
|
+
.slice(0, 32) || "agent");
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Create an isolated worktree under `<repoRoot>/.pi/worktrees/<name>` on branch
|
|
20
|
+
* `pi/wf/<name>`. The `name` must be deterministic (derived from runId + call index,
|
|
21
|
+
* never wall-clock) so resume keys stay stable. Returns a no-op Worktree on any failure.
|
|
22
|
+
*/
|
|
23
|
+
export async function createWorktree(baseCwd, name) {
|
|
24
|
+
const id = slug(name);
|
|
25
|
+
let repoRoot;
|
|
26
|
+
try {
|
|
27
|
+
const { stdout } = await exec("git", ["-C", baseCwd, "rev-parse", "--show-toplevel"]);
|
|
28
|
+
repoRoot = stdout.trim();
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return { isolated: false, cwd: baseCwd, reason: "not a git repository" };
|
|
32
|
+
}
|
|
33
|
+
const path = join(repoRoot, ".pi", "worktrees", id);
|
|
34
|
+
const branch = `pi/wf/${id}`;
|
|
35
|
+
try {
|
|
36
|
+
await exec("git", ["-C", repoRoot, "worktree", "add", "-b", branch, path, "HEAD"]);
|
|
37
|
+
return { isolated: true, cwd: path, branch, repoRoot };
|
|
38
|
+
}
|
|
39
|
+
catch (error) {
|
|
40
|
+
return { isolated: false, cwd: baseCwd, reason: error instanceof Error ? error.message : String(error) };
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
/** Remove a worktree and its branch. Best-effort; safe to call on a no-op Worktree. */
|
|
44
|
+
export async function removeWorktree(wt) {
|
|
45
|
+
if (!wt.isolated || !wt.repoRoot)
|
|
46
|
+
return;
|
|
47
|
+
try {
|
|
48
|
+
await exec("git", ["-C", wt.repoRoot, "worktree", "remove", "--force", wt.cwd]);
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
// already gone / locked — fall through
|
|
52
|
+
}
|
|
53
|
+
if (wt.branch) {
|
|
54
|
+
try {
|
|
55
|
+
await exec("git", ["-C", wt.repoRoot, "branch", "-D", wt.branch]);
|
|
56
|
+
}
|
|
57
|
+
catch {
|
|
58
|
+
// branch already deleted
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
package/extensions/workflow.ts
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
createWorkflowStorage,
|
|
4
|
+
createWorkflowTool,
|
|
5
|
+
registerBuiltinWorkflows,
|
|
6
|
+
registerWorkflowCommands,
|
|
7
|
+
WorkflowManager,
|
|
8
|
+
} from "../src/index.js";
|
|
3
9
|
|
|
4
10
|
export default function extension(pi: ExtensionAPI) {
|
|
5
11
|
// Single manager/storage shared by the workflow tool and the /workflows command,
|
|
@@ -11,6 +17,7 @@ export default function extension(pi: ExtensionAPI) {
|
|
|
11
17
|
const workflowTool = createWorkflowTool({ cwd, manager, storage });
|
|
12
18
|
pi.registerTool(workflowTool);
|
|
13
19
|
registerWorkflowCommands(pi, manager);
|
|
20
|
+
registerBuiltinWorkflows(pi, { cwd });
|
|
14
21
|
|
|
15
22
|
pi.on("session_start", () => {
|
|
16
23
|
const active = pi.getActiveTools();
|
package/package.json
CHANGED
|
@@ -12,61 +12,64 @@ export interface AdversarialReviewConfig {
|
|
|
12
12
|
agreementThreshold: number;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
const DEFAULT_CONFIG: AdversarialReviewConfig = {
|
|
16
|
-
reviewerCount: 2,
|
|
17
|
-
filterContested: true,
|
|
18
|
-
agreementThreshold: 0.5,
|
|
19
|
-
};
|
|
20
|
-
|
|
21
15
|
/**
|
|
22
|
-
* Generate an adversarial
|
|
16
|
+
* Generate an adversarial-review workflow. The script is static and reads its
|
|
17
|
+
* inputs from `args` (task/reviewers/threshold) — no string interpolation.
|
|
18
|
+
*
|
|
19
|
+
* Each finding is judged independently by N reviewers who are told to REFUTE it;
|
|
20
|
+
* a finding survives only when the share of reviewers calling it real meets the
|
|
21
|
+
* agreement threshold.
|
|
23
22
|
*/
|
|
24
|
-
export function generateAdversarialReviewWorkflow(
|
|
25
|
-
taskDescription: string,
|
|
26
|
-
config: Partial<AdversarialReviewConfig> = {},
|
|
27
|
-
): string {
|
|
28
|
-
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
29
|
-
|
|
23
|
+
export function generateAdversarialReviewWorkflow(): string {
|
|
30
24
|
return `export const meta = {
|
|
31
25
|
name: 'adversarial_review',
|
|
32
|
-
description: 'Adversarial review
|
|
26
|
+
description: 'Adversarial review: findings cross-checked by independent skeptics',
|
|
33
27
|
phases: [
|
|
34
|
-
{ title: '
|
|
35
|
-
{ title: '
|
|
36
|
-
{ title: 'Cross-Check' },
|
|
28
|
+
{ title: 'Investigate' },
|
|
29
|
+
{ title: 'Refute' },
|
|
37
30
|
{ title: 'Consensus' },
|
|
38
31
|
],
|
|
39
|
-
}
|
|
32
|
+
}
|
|
40
33
|
|
|
41
|
-
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
{ label: 'investigator' }
|
|
45
|
-
);
|
|
34
|
+
const task = (args && args.task) || ''
|
|
35
|
+
const reviewers = (args && args.reviewers) || 2
|
|
36
|
+
const threshold = (args && args.threshold) || 0.5
|
|
46
37
|
|
|
47
|
-
phase('
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
));
|
|
38
|
+
phase('Investigate')
|
|
39
|
+
const investigation = await agent(
|
|
40
|
+
'Investigate the following and list concrete, individually-checkable findings:\\n' + task,
|
|
41
|
+
{ label: 'investigate', schema: { type: 'object', properties: { findings: { type: 'array', items: { type: 'string' } } }, required: ['findings'] } }
|
|
42
|
+
)
|
|
43
|
+
const findings = investigation.findings || []
|
|
54
44
|
|
|
55
|
-
phase('
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
45
|
+
phase('Refute')
|
|
46
|
+
const judged = await parallel(findings.map((f, i) => () =>
|
|
47
|
+
parallel(Array.from({ length: reviewers }, (_, r) => () =>
|
|
48
|
+
agent(
|
|
49
|
+
'You are a skeptical reviewer. Try to REFUTE this finding for the task below. ' +
|
|
50
|
+
'Default to real=false when uncertain. Investigate with the available tools if needed.\\n\\n' +
|
|
51
|
+
'TASK: ' + task + '\\nFINDING: ' + f,
|
|
52
|
+
{ label: 'refute ' + (i + 1) + '.' + (r + 1), schema: { type: 'object', properties: { real: { type: 'boolean' }, reason: { type: 'string' } }, required: ['real'] } }
|
|
53
|
+
)
|
|
54
|
+
)).then((votes) => {
|
|
55
|
+
const valid = votes.filter(Boolean)
|
|
56
|
+
const realCount = valid.filter((v) => v && v.real).length
|
|
57
|
+
const ratio = valid.length ? realCount / valid.length : 0
|
|
58
|
+
return { finding: f, realVotes: realCount, totalVotes: valid.length, survives: ratio >= threshold }
|
|
59
|
+
})
|
|
60
|
+
))
|
|
62
61
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
)
|
|
62
|
+
const survivors = judged.filter((j) => j && j.survives)
|
|
63
|
+
|
|
64
|
+
phase('Consensus')
|
|
65
|
+
const report = await agent(
|
|
66
|
+
'Write a final review report. Include ONLY the findings that survived adversarial review (listed below), ' +
|
|
67
|
+
'each with a short justification. Note how many were discarded.\\n\\n' +
|
|
68
|
+
'SURVIVING FINDINGS JSON:\\n' + JSON.stringify(survivors),
|
|
69
|
+
{ label: 'consensus' }
|
|
70
|
+
)
|
|
68
71
|
|
|
69
|
-
return { findings,
|
|
72
|
+
return { total: findings.length, survivors, report }`;
|
|
70
73
|
}
|
|
71
74
|
|
|
72
75
|
/**
|
package/src/agent.ts
CHANGED
|
@@ -54,6 +54,8 @@ export interface AgentRunOptions<TSchemaDef extends TSchema | undefined = undefi
|
|
|
54
54
|
model?: string;
|
|
55
55
|
/** Called with the resolved model id once known (for display/telemetry). */
|
|
56
56
|
onModelResolved?: (modelId: string) => void;
|
|
57
|
+
/** Run this agent in a different working directory (e.g. an isolated worktree). */
|
|
58
|
+
cwd?: string;
|
|
57
59
|
}
|
|
58
60
|
|
|
59
61
|
export type AgentRunResult<TSchemaDef extends TSchema | undefined> = TSchemaDef extends TSchema
|
|
@@ -105,7 +107,11 @@ export class WorkflowAgent {
|
|
|
105
107
|
options: AgentRunOptions<TSchemaDef> = {},
|
|
106
108
|
): Promise<AgentRunResult<TSchemaDef>> {
|
|
107
109
|
const capture: StructuredOutputCapture<any> = { called: false, value: undefined };
|
|
108
|
-
|
|
110
|
+
// Per-call cwd (e.g. a worktree) needs coding tools bound to that directory,
|
|
111
|
+
// since tools capture their cwd at construction and can't be relocated.
|
|
112
|
+
const runCwd = options.cwd ?? this.cwd;
|
|
113
|
+
const baseTools = runCwd === this.cwd ? this.baseTools : createCodingTools(runCwd);
|
|
114
|
+
const customTools: ToolDefinition[] = [...baseTools, ...(options.tools ?? [])];
|
|
109
115
|
|
|
110
116
|
if (options.schema) {
|
|
111
117
|
customTools.push(createStructuredOutputTool({ schema: options.schema, capture }) as unknown as ToolDefinition);
|
|
@@ -125,7 +131,7 @@ export class WorkflowAgent {
|
|
|
125
131
|
|
|
126
132
|
const agentDir = getAgentDir();
|
|
127
133
|
const { session } = await createAgentSession({
|
|
128
|
-
cwd:
|
|
134
|
+
cwd: runCwd,
|
|
129
135
|
agentDir,
|
|
130
136
|
sessionManager: SessionManager.inMemory(),
|
|
131
137
|
// Use real SettingsManager to inherit user's default provider/model settings.
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bundled workflow commands: `/deep-research` and `/adversarial-review`.
|
|
3
|
+
* They run a generated workflow script and print the final report.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { createCodingTools, type ExtensionAPI, type ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
7
|
+
import { generateAdversarialReviewWorkflow } from "./adversarial-review.js";
|
|
8
|
+
import { generateDeepResearchWorkflow } from "./deep-research.js";
|
|
9
|
+
import { createWebTools } from "./web-tools.js";
|
|
10
|
+
import { runWorkflow, type WorkflowRunResult } from "./workflow.js";
|
|
11
|
+
|
|
12
|
+
function alreadyRegistered(pi: ExtensionAPI, name: string): boolean {
|
|
13
|
+
try {
|
|
14
|
+
return (pi.getCommands?.() ?? []).some((c: { name: string }) => c.name === name);
|
|
15
|
+
} catch {
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function reportText(result: WorkflowRunResult): string {
|
|
21
|
+
const r = result.result as { report?: unknown } | undefined;
|
|
22
|
+
if (r && typeof r.report === "string" && r.report.trim()) return r.report;
|
|
23
|
+
return JSON.stringify(result.result, null, 2);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function registerBuiltinWorkflows(pi: ExtensionAPI, opts: { cwd: string }): void {
|
|
27
|
+
const cwd = opts.cwd;
|
|
28
|
+
|
|
29
|
+
if (!alreadyRegistered(pi, "deep-research")) {
|
|
30
|
+
pi.registerCommand("deep-research", {
|
|
31
|
+
description: "Research a question across the web with cross-checked sources",
|
|
32
|
+
async handler(args: string, ctx: ExtensionCommandContext) {
|
|
33
|
+
const question = args.trim();
|
|
34
|
+
if (!question) return ctx.ui.notify("Usage: /deep-research <question>", "warning");
|
|
35
|
+
ctx.ui.notify("Researching — running web searches across several angles…", "info");
|
|
36
|
+
try {
|
|
37
|
+
const result = await runWorkflow(generateDeepResearchWorkflow(), {
|
|
38
|
+
cwd,
|
|
39
|
+
args: { question },
|
|
40
|
+
// Research agents need real web access on top of the coding tools.
|
|
41
|
+
tools: [...createCodingTools(cwd), ...createWebTools()],
|
|
42
|
+
onPhase: (title) => ctx.ui.setStatus("deep-research", `research: ${title}`),
|
|
43
|
+
});
|
|
44
|
+
ctx.ui.setStatus("deep-research", undefined);
|
|
45
|
+
await pi.sendMessage({ customType: "deep-research", content: reportText(result), display: true });
|
|
46
|
+
} catch (error) {
|
|
47
|
+
ctx.ui.setStatus("deep-research", undefined);
|
|
48
|
+
ctx.ui.notify(`deep-research failed: ${error instanceof Error ? error.message : error}`, "error");
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!alreadyRegistered(pi, "adversarial-review")) {
|
|
55
|
+
pi.registerCommand("adversarial-review", {
|
|
56
|
+
description: "Investigate a task, then cross-check each finding with skeptical reviewers",
|
|
57
|
+
async handler(args: string, ctx: ExtensionCommandContext) {
|
|
58
|
+
const task = args.trim();
|
|
59
|
+
if (!task) return ctx.ui.notify("Usage: /adversarial-review <task or question>", "warning");
|
|
60
|
+
ctx.ui.notify("Reviewing — investigating then refuting each finding…", "info");
|
|
61
|
+
try {
|
|
62
|
+
const result = await runWorkflow(generateAdversarialReviewWorkflow(), {
|
|
63
|
+
cwd,
|
|
64
|
+
args: { task },
|
|
65
|
+
tools: createCodingTools(cwd),
|
|
66
|
+
onPhase: (title) => ctx.ui.setStatus("adversarial-review", `review: ${title}`),
|
|
67
|
+
});
|
|
68
|
+
ctx.ui.setStatus("adversarial-review", undefined);
|
|
69
|
+
await pi.sendMessage({ customType: "adversarial-review", content: reportText(result), display: true });
|
|
70
|
+
} catch (error) {
|
|
71
|
+
ctx.ui.setStatus("adversarial-review", undefined);
|
|
72
|
+
ctx.ui.notify(`adversarial-review failed: ${error instanceof Error ? error.message : error}`, "error");
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
package/src/deep-research.ts
CHANGED
|
@@ -4,80 +4,72 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
export interface DeepResearchConfig {
|
|
7
|
-
/** Number of search angles to explore. */
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
|
|
11
|
-
/** Whether to cross-check claims across sources. */
|
|
12
|
-
crossCheck: boolean;
|
|
13
|
-
/** Maximum number of agents to use. */
|
|
14
|
-
maxAgents: number;
|
|
7
|
+
/** Number of distinct search angles/queries to explore. */
|
|
8
|
+
angles: number;
|
|
9
|
+
/** Minimum distinct sources required for a claim to survive cross-checking. */
|
|
10
|
+
minSupport: number;
|
|
15
11
|
}
|
|
16
12
|
|
|
17
|
-
const DEFAULT_CONFIG: DeepResearchConfig = {
|
|
18
|
-
searchAngles: 4,
|
|
19
|
-
sourcesPerAngle: 3,
|
|
20
|
-
crossCheck: true,
|
|
21
|
-
maxAgents: 20,
|
|
22
|
-
};
|
|
23
|
-
|
|
24
13
|
/**
|
|
25
|
-
* Generate a deep
|
|
14
|
+
* Generate a deep-research workflow that uses the real web_search/web_fetch tools.
|
|
15
|
+
*
|
|
16
|
+
* The script is static and reads its inputs from `args` (question/angles/minSupport),
|
|
17
|
+
* so the question is never string-interpolated into source — no escaping hazards.
|
|
18
|
+
* Inject the web tools at run time via the agent's `tools` option.
|
|
26
19
|
*/
|
|
27
|
-
export function generateDeepResearchWorkflow(
|
|
28
|
-
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
29
|
-
const escapedQuestion = question.replace(/'/g, "\\'").slice(0, 80);
|
|
30
|
-
|
|
31
|
-
const crossCheckPhase = cfg.crossCheck
|
|
32
|
-
? `phase('Cross-Check');
|
|
33
|
-
const crossCheck = await agent(
|
|
34
|
-
'Cross-check these research findings. Identify claims that are supported by multiple sources vs. claims that appear in only one source:\\n' +
|
|
35
|
-
'Sources: ' + JSON.stringify(sources),
|
|
36
|
-
{ label: 'cross-checker' }
|
|
37
|
-
);`
|
|
38
|
-
: "";
|
|
39
|
-
|
|
40
|
-
const crossCheckRef = cfg.crossCheck ? "'Cross-check: ' + crossCheck + '\\n' + " : "";
|
|
41
|
-
const crossCheckReturn = cfg.crossCheck ? "crossCheck, " : "";
|
|
42
|
-
|
|
20
|
+
export function generateDeepResearchWorkflow(): string {
|
|
43
21
|
return `export const meta = {
|
|
44
22
|
name: 'deep_research',
|
|
45
|
-
description: 'Deep research
|
|
23
|
+
description: 'Deep research with real web search and cross-checked claims',
|
|
46
24
|
phases: [
|
|
47
|
-
{ title: '
|
|
48
|
-
{ title: '
|
|
49
|
-
{ title: '
|
|
25
|
+
{ title: 'Queries' },
|
|
26
|
+
{ title: 'Gather' },
|
|
27
|
+
{ title: 'Verify' },
|
|
50
28
|
{ title: 'Report' },
|
|
51
29
|
],
|
|
52
|
-
}
|
|
30
|
+
}
|
|
53
31
|
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
32
|
+
const question = (args && args.question) || ''
|
|
33
|
+
const angles = (args && args.angles) || 4
|
|
34
|
+
const minSupport = (args && args.minSupport) || 2
|
|
35
|
+
|
|
36
|
+
phase('Queries')
|
|
37
|
+
const plan = await agent(
|
|
38
|
+
'You are planning web research for this question:\\n' + question +
|
|
39
|
+
'\\n\\nProduce ' + angles + ' diverse, specific search queries that together cover the question from different angles.',
|
|
40
|
+
{ label: 'plan queries', schema: { type: 'object', properties: { queries: { type: 'array', items: { type: 'string' } } }, required: ['queries'] } }
|
|
41
|
+
)
|
|
42
|
+
const queries = (plan.queries || []).slice(0, angles)
|
|
60
43
|
|
|
61
|
-
phase('
|
|
62
|
-
const
|
|
44
|
+
phase('Gather')
|
|
45
|
+
const gathered = await parallel(queries.map((q, i) => () =>
|
|
63
46
|
agent(
|
|
64
|
-
'Research
|
|
65
|
-
|
|
47
|
+
'Research this query using the web_search and web_fetch tools.\\nQuery: ' + q +
|
|
48
|
+
'\\n\\nSteps: (1) call web_search with the query; (2) web_fetch the 2 most relevant result URLs; ' +
|
|
49
|
+
'(3) extract concrete, verifiable factual claims, each tagged with the exact source URL it came from. ' +
|
|
50
|
+
'Do NOT invent sources or claims — report only what the fetched pages actually say.',
|
|
51
|
+
{ label: 'research ' + (i + 1), schema: { type: 'object', properties: { sources: { type: 'array', items: { type: 'object', properties: { url: { type: 'string' }, claims: { type: 'array', items: { type: 'string' } } }, required: ['url', 'claims'] } } }, required: ['sources'] } }
|
|
66
52
|
)
|
|
67
|
-
))
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
53
|
+
))
|
|
54
|
+
const allSources = gathered.filter(Boolean).flatMap((g) => (g && g.sources) || [])
|
|
55
|
+
|
|
56
|
+
phase('Verify')
|
|
57
|
+
const verdict = await agent(
|
|
58
|
+
'Cross-check these research sources. Group claims that assert the same fact across different source URLs. ' +
|
|
59
|
+
'Keep a claim only if it is supported by at least ' + minSupport + ' distinct source URLs OR by one clearly authoritative source. ' +
|
|
60
|
+
'Discard claims found in a single weak source or that conflict with others.\\n\\nSOURCES JSON:\\n' + JSON.stringify(allSources),
|
|
61
|
+
{ label: 'cross-check', schema: { type: 'object', properties: { supported: { type: 'array', items: { type: 'object', properties: { claim: { type: 'string' }, sources: { type: 'array', items: { type: 'string' } } }, required: ['claim', 'sources'] } }, discarded: { type: 'array', items: { type: 'string' } } }, required: ['supported'] } }
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
phase('Report')
|
|
72
65
|
const report = await agent(
|
|
73
|
-
'
|
|
74
|
-
'
|
|
75
|
-
'
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
);
|
|
66
|
+
'Write a concise, well-structured research report that answers the question using ONLY the supported claims below. ' +
|
|
67
|
+
'Cite source URLs inline next to each claim. If the evidence is thin, say so explicitly.\\n\\n' +
|
|
68
|
+
'QUESTION: ' + question + '\\n\\nSUPPORTED CLAIMS JSON:\\n' + JSON.stringify((verdict && verdict.supported) || []),
|
|
69
|
+
{ label: 'write report' }
|
|
70
|
+
)
|
|
79
71
|
|
|
80
|
-
return {
|
|
72
|
+
return { question, queries, supported: (verdict && verdict.supported) || [], report }`;
|
|
81
73
|
}
|
|
82
74
|
|
|
83
75
|
/**
|
package/src/index.ts
CHANGED
|
@@ -4,6 +4,7 @@ export type { AgentRunOptions, AgentRunResult, WorkflowAgentOptions } from "./ag
|
|
|
4
4
|
export { WorkflowAgent } from "./agent.js";
|
|
5
5
|
export type { AutoWorkflowConfig } from "./auto-workflow.js";
|
|
6
6
|
export { shouldUseWorkflow, suggestWorkflowScript } from "./auto-workflow.js";
|
|
7
|
+
export { registerBuiltinWorkflows } from "./builtin-commands.js";
|
|
7
8
|
export * from "./config.js";
|
|
8
9
|
export type { DeepResearchConfig } from "./deep-research.js";
|
|
9
10
|
export { generateCodebaseAuditWorkflow, generateDeepResearchWorkflow } from "./deep-research.js";
|
|
@@ -39,8 +40,10 @@ export type { PersistedRunState, RunPersistence, RunStatus } from "./run-persist
|
|
|
39
40
|
export { createRunPersistence, generateRunId } from "./run-persistence.js";
|
|
40
41
|
export type { StructuredOutputCapture, StructuredOutputToolOptions } from "./structured-output.js";
|
|
41
42
|
export { createStructuredOutputTool } from "./structured-output.js";
|
|
43
|
+
export { createWebFetchTool, createWebSearchTool, createWebTools } from "./web-tools.js";
|
|
42
44
|
export type {
|
|
43
45
|
AgentOptions,
|
|
46
|
+
JournalEntry,
|
|
44
47
|
WorkflowMeta,
|
|
45
48
|
WorkflowMetaPhase,
|
|
46
49
|
WorkflowRunOptions,
|
|
@@ -54,3 +57,5 @@ export type { SavedWorkflow, WorkflowStorage } from "./workflow-saved.js";
|
|
|
54
57
|
export { createWorkflowStorage } from "./workflow-saved.js";
|
|
55
58
|
export type { WorkflowToolInput, WorkflowToolOptions } from "./workflow-tool.js";
|
|
56
59
|
export { createWorkflowTool } from "./workflow-tool.js";
|
|
60
|
+
export type { Worktree } from "./worktree.js";
|
|
61
|
+
export { createWorktree, removeWorktree } from "./worktree.js";
|