@quintinshaw/pi-dynamic-workflows 1.5.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -1
- package/dist/adversarial-review.d.ts +7 -2
- package/dist/adversarial-review.js +46 -38
- package/dist/builtin-commands.d.ts +8 -0
- package/dist/builtin-commands.js +77 -0
- package/dist/deep-research.d.ts +10 -10
- package/dist/deep-research.js +45 -45
- package/dist/index.d.ts +2 -0
- package/dist/index.js +2 -0
- package/dist/web-tools.d.ts +15 -0
- package/dist/web-tools.js +119 -0
- package/extensions/workflow.ts +8 -1
- package/package.json +1 -1
- package/src/adversarial-review.ts +46 -43
- package/src/builtin-commands.ts +77 -0
- package/src/deep-research.ts +51 -59
- package/src/index.ts +2 -0
- package/src/web-tools.ts +123 -0
package/README.md
CHANGED
|
@@ -55,9 +55,19 @@ Ask for a background workflow (the model passes `background: true`) and it runs
|
|
|
55
55
|
/workflows status <id> # show a run's progress
|
|
56
56
|
/workflows stop <id> # abort a running run
|
|
57
57
|
/workflows pause <id> # pause a running run
|
|
58
|
+
/workflows resume <id> # resume an interrupted run (replays cached results)
|
|
58
59
|
/workflows rm <id> # remove a run from the list
|
|
59
60
|
```
|
|
60
61
|
|
|
62
|
+
### Bundled workflows
|
|
63
|
+
|
|
64
|
+
```text
|
|
65
|
+
/deep-research <question> # web-researched, source-cross-checked report
|
|
66
|
+
/adversarial-review <task> # findings cross-checked by skeptical reviewers
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
`/deep-research` fans out web searches across several angles, fetches the top sources with real `web_search` / `web_fetch` tools, keeps only claims supported by multiple sources, and writes a cited report.
|
|
70
|
+
|
|
61
71
|
## Workflow script shape
|
|
62
72
|
|
|
63
73
|
A workflow is plain JavaScript. The first statement must export literal metadata:
|
|
@@ -135,6 +145,7 @@ Scripts run inside a Node `vm` sandbox. Intentionally unavailable: `Date.now()`,
|
|
|
135
145
|
- **Real token & cost accounting** — read from each subagent's SDK session (input / output / total / cost), with a character estimate only as fallback when a provider reports no usage; `budget` gates on the real total
|
|
136
146
|
- **Real per-agent / per-phase model routing** — `opts.model` and `meta.phases[].model` actually select the model (resolved against your authed model registry), with graceful fallback
|
|
137
147
|
- **`/workflows` command** — list, inspect, stop, pause, **resume**, and remove background runs; runs started with `background: true` are reachable from the command
|
|
148
|
+
- **Bundled `/deep-research` & `/adversarial-review`** — `/deep-research` runs real web searches (via built-in `web_search` / `web_fetch` tools), extracts claims, cross-checks them across sources, and reports only what survived; `/adversarial-review` investigates a task then has independent skeptics try to refute each finding, keeping only those that clear an agreement threshold
|
|
138
149
|
- **Resume** — each agent result is journaled by a deterministic call index; resuming replays the unchanged prefix from cache (no re-run, no tokens) and runs only new or edited calls live
|
|
139
150
|
- **Worktree isolation** — `isolation: "worktree"` runs an agent in its own git worktree on a throwaway branch, so parallel agents can edit the same files without conflict; the worktree is torn down after (results are not auto-merged), and it falls back to a logged no-op outside a git repo
|
|
140
151
|
- **Safety limits** — 1000-agent cap (`maxAgents`), per-agent timeout (`agentTimeoutMs`), recoverable-vs-fatal error classification
|
|
@@ -145,7 +156,6 @@ Scripts run inside a Node `vm` sandbox. Intentionally unavailable: `Date.now()`,
|
|
|
145
156
|
|
|
146
157
|
Tracked toward closer parity with Claude Code dynamic workflows:
|
|
147
158
|
|
|
148
|
-
- **Bundled `/deep-research`** and `/adversarial-review` workflows
|
|
149
159
|
- **Saved workflows** as `/<name>` slash commands
|
|
150
160
|
- **Nested `workflow()`** to compose saved workflows inline
|
|
151
161
|
|
|
@@ -11,9 +11,14 @@ export interface AdversarialReviewConfig {
|
|
|
11
11
|
agreementThreshold: number;
|
|
12
12
|
}
|
|
13
13
|
/**
|
|
14
|
-
* Generate an adversarial
|
|
14
|
+
* Generate an adversarial-review workflow. The script is static and reads its
|
|
15
|
+
* inputs from `args` (task/reviewers/threshold) — no string interpolation.
|
|
16
|
+
*
|
|
17
|
+
* Each finding is judged independently by N reviewers who are told to REFUTE it;
|
|
18
|
+
* a finding survives only when the share of reviewers calling it real meets the
|
|
19
|
+
* agreement threshold.
|
|
15
20
|
*/
|
|
16
|
-
export declare function generateAdversarialReviewWorkflow(
|
|
21
|
+
export declare function generateAdversarialReviewWorkflow(): string;
|
|
17
22
|
/**
|
|
18
23
|
* Generate a multi-perspective analysis workflow.
|
|
19
24
|
*/
|
|
@@ -2,56 +2,64 @@
|
|
|
2
2
|
* Adversarial review mode for workflows.
|
|
3
3
|
* Agents cross-check each other's findings for higher quality results.
|
|
4
4
|
*/
|
|
5
|
-
const DEFAULT_CONFIG = {
|
|
6
|
-
reviewerCount: 2,
|
|
7
|
-
filterContested: true,
|
|
8
|
-
agreementThreshold: 0.5,
|
|
9
|
-
};
|
|
10
5
|
/**
|
|
11
|
-
* Generate an adversarial
|
|
6
|
+
* Generate an adversarial-review workflow. The script is static and reads its
|
|
7
|
+
* inputs from `args` (task/reviewers/threshold) — no string interpolation.
|
|
8
|
+
*
|
|
9
|
+
* Each finding is judged independently by N reviewers who are told to REFUTE it;
|
|
10
|
+
* a finding survives only when the share of reviewers calling it real meets the
|
|
11
|
+
* agreement threshold.
|
|
12
12
|
*/
|
|
13
|
-
export function generateAdversarialReviewWorkflow(
|
|
14
|
-
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
13
|
+
export function generateAdversarialReviewWorkflow() {
|
|
15
14
|
return `export const meta = {
|
|
16
15
|
name: 'adversarial_review',
|
|
17
|
-
description: 'Adversarial review
|
|
16
|
+
description: 'Adversarial review: findings cross-checked by independent skeptics',
|
|
18
17
|
phases: [
|
|
19
|
-
{ title: '
|
|
20
|
-
{ title: '
|
|
21
|
-
{ title: 'Cross-Check' },
|
|
18
|
+
{ title: 'Investigate' },
|
|
19
|
+
{ title: 'Refute' },
|
|
22
20
|
{ title: 'Consensus' },
|
|
23
21
|
],
|
|
24
|
-
}
|
|
22
|
+
}
|
|
25
23
|
|
|
26
|
-
|
|
27
|
-
const
|
|
28
|
-
|
|
29
|
-
{ label: 'investigator' }
|
|
30
|
-
);
|
|
24
|
+
const task = (args && args.task) || ''
|
|
25
|
+
const reviewers = (args && args.reviewers) || 2
|
|
26
|
+
const threshold = (args && args.threshold) || 0.5
|
|
31
27
|
|
|
32
|
-
phase('
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
));
|
|
28
|
+
phase('Investigate')
|
|
29
|
+
const investigation = await agent(
|
|
30
|
+
'Investigate the following and list concrete, individually-checkable findings:\\n' + task,
|
|
31
|
+
{ label: 'investigate', schema: { type: 'object', properties: { findings: { type: 'array', items: { type: 'string' } } }, required: ['findings'] } }
|
|
32
|
+
)
|
|
33
|
+
const findings = investigation.findings || []
|
|
39
34
|
|
|
40
|
-
phase('
|
|
41
|
-
const
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
35
|
+
phase('Refute')
|
|
36
|
+
const judged = await parallel(findings.map((f, i) => () =>
|
|
37
|
+
parallel(Array.from({ length: reviewers }, (_, r) => () =>
|
|
38
|
+
agent(
|
|
39
|
+
'You are a skeptical reviewer. Try to REFUTE this finding for the task below. ' +
|
|
40
|
+
'Default to real=false when uncertain. Investigate with the available tools if needed.\\n\\n' +
|
|
41
|
+
'TASK: ' + task + '\\nFINDING: ' + f,
|
|
42
|
+
{ label: 'refute ' + (i + 1) + '.' + (r + 1), schema: { type: 'object', properties: { real: { type: 'boolean' }, reason: { type: 'string' } }, required: ['real'] } }
|
|
43
|
+
)
|
|
44
|
+
)).then((votes) => {
|
|
45
|
+
const valid = votes.filter(Boolean)
|
|
46
|
+
const realCount = valid.filter((v) => v && v.real).length
|
|
47
|
+
const ratio = valid.length ? realCount / valid.length : 0
|
|
48
|
+
return { finding: f, realVotes: realCount, totalVotes: valid.length, survives: ratio >= threshold }
|
|
49
|
+
})
|
|
50
|
+
))
|
|
47
51
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
)
|
|
52
|
+
const survivors = judged.filter((j) => j && j.survives)
|
|
53
|
+
|
|
54
|
+
phase('Consensus')
|
|
55
|
+
const report = await agent(
|
|
56
|
+
'Write a final review report. Include ONLY the findings that survived adversarial review (listed below), ' +
|
|
57
|
+
'each with a short justification. Note how many were discarded.\\n\\n' +
|
|
58
|
+
'SURVIVING FINDINGS JSON:\\n' + JSON.stringify(survivors),
|
|
59
|
+
{ label: 'consensus' }
|
|
60
|
+
)
|
|
53
61
|
|
|
54
|
-
return { findings,
|
|
62
|
+
return { total: findings.length, survivors, report }`;
|
|
55
63
|
}
|
|
56
64
|
/**
|
|
57
65
|
* Generate a multi-perspective analysis workflow.
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bundled workflow commands: `/deep-research` and `/adversarial-review`.
|
|
3
|
+
* They run a generated workflow script and print the final report.
|
|
4
|
+
*/
|
|
5
|
+
import { type ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
6
|
+
export declare function registerBuiltinWorkflows(pi: ExtensionAPI, opts: {
|
|
7
|
+
cwd: string;
|
|
8
|
+
}): void;
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bundled workflow commands: `/deep-research` and `/adversarial-review`.
|
|
3
|
+
* They run a generated workflow script and print the final report.
|
|
4
|
+
*/
|
|
5
|
+
import { createCodingTools } from "@earendil-works/pi-coding-agent";
|
|
6
|
+
import { generateAdversarialReviewWorkflow } from "./adversarial-review.js";
|
|
7
|
+
import { generateDeepResearchWorkflow } from "./deep-research.js";
|
|
8
|
+
import { createWebTools } from "./web-tools.js";
|
|
9
|
+
import { runWorkflow } from "./workflow.js";
|
|
10
|
+
function alreadyRegistered(pi, name) {
|
|
11
|
+
try {
|
|
12
|
+
return (pi.getCommands?.() ?? []).some((c) => c.name === name);
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
function reportText(result) {
|
|
19
|
+
const r = result.result;
|
|
20
|
+
if (r && typeof r.report === "string" && r.report.trim())
|
|
21
|
+
return r.report;
|
|
22
|
+
return JSON.stringify(result.result, null, 2);
|
|
23
|
+
}
|
|
24
|
+
export function registerBuiltinWorkflows(pi, opts) {
|
|
25
|
+
const cwd = opts.cwd;
|
|
26
|
+
if (!alreadyRegistered(pi, "deep-research")) {
|
|
27
|
+
pi.registerCommand("deep-research", {
|
|
28
|
+
description: "Research a question across the web with cross-checked sources",
|
|
29
|
+
async handler(args, ctx) {
|
|
30
|
+
const question = args.trim();
|
|
31
|
+
if (!question)
|
|
32
|
+
return ctx.ui.notify("Usage: /deep-research <question>", "warning");
|
|
33
|
+
ctx.ui.notify("Researching — running web searches across several angles…", "info");
|
|
34
|
+
try {
|
|
35
|
+
const result = await runWorkflow(generateDeepResearchWorkflow(), {
|
|
36
|
+
cwd,
|
|
37
|
+
args: { question },
|
|
38
|
+
// Research agents need real web access on top of the coding tools.
|
|
39
|
+
tools: [...createCodingTools(cwd), ...createWebTools()],
|
|
40
|
+
onPhase: (title) => ctx.ui.setStatus("deep-research", `research: ${title}`),
|
|
41
|
+
});
|
|
42
|
+
ctx.ui.setStatus("deep-research", undefined);
|
|
43
|
+
await pi.sendMessage({ customType: "deep-research", content: reportText(result), display: true });
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
ctx.ui.setStatus("deep-research", undefined);
|
|
47
|
+
ctx.ui.notify(`deep-research failed: ${error instanceof Error ? error.message : error}`, "error");
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
if (!alreadyRegistered(pi, "adversarial-review")) {
|
|
53
|
+
pi.registerCommand("adversarial-review", {
|
|
54
|
+
description: "Investigate a task, then cross-check each finding with skeptical reviewers",
|
|
55
|
+
async handler(args, ctx) {
|
|
56
|
+
const task = args.trim();
|
|
57
|
+
if (!task)
|
|
58
|
+
return ctx.ui.notify("Usage: /adversarial-review <task or question>", "warning");
|
|
59
|
+
ctx.ui.notify("Reviewing — investigating then refuting each finding…", "info");
|
|
60
|
+
try {
|
|
61
|
+
const result = await runWorkflow(generateAdversarialReviewWorkflow(), {
|
|
62
|
+
cwd,
|
|
63
|
+
args: { task },
|
|
64
|
+
tools: createCodingTools(cwd),
|
|
65
|
+
onPhase: (title) => ctx.ui.setStatus("adversarial-review", `review: ${title}`),
|
|
66
|
+
});
|
|
67
|
+
ctx.ui.setStatus("adversarial-review", undefined);
|
|
68
|
+
await pi.sendMessage({ customType: "adversarial-review", content: reportText(result), display: true });
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
ctx.ui.setStatus("adversarial-review", undefined);
|
|
72
|
+
ctx.ui.notify(`adversarial-review failed: ${error instanceof Error ? error.message : error}`, "error");
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
package/dist/deep-research.d.ts
CHANGED
|
@@ -3,19 +3,19 @@
|
|
|
3
3
|
* Built-in workflow for comprehensive research across multiple sources.
|
|
4
4
|
*/
|
|
5
5
|
export interface DeepResearchConfig {
|
|
6
|
-
/** Number of search angles to explore. */
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
|
|
10
|
-
/** Whether to cross-check claims across sources. */
|
|
11
|
-
crossCheck: boolean;
|
|
12
|
-
/** Maximum number of agents to use. */
|
|
13
|
-
maxAgents: number;
|
|
6
|
+
/** Number of distinct search angles/queries to explore. */
|
|
7
|
+
angles: number;
|
|
8
|
+
/** Minimum distinct sources required for a claim to survive cross-checking. */
|
|
9
|
+
minSupport: number;
|
|
14
10
|
}
|
|
15
11
|
/**
|
|
16
|
-
* Generate a deep
|
|
12
|
+
* Generate a deep-research workflow that uses the real web_search/web_fetch tools.
|
|
13
|
+
*
|
|
14
|
+
* The script is static and reads its inputs from `args` (question/angles/minSupport),
|
|
15
|
+
* so the question is never string-interpolated into source — no escaping hazards.
|
|
16
|
+
* Inject the web tools at run time via the agent's `tools` option.
|
|
17
17
|
*/
|
|
18
|
-
export declare function generateDeepResearchWorkflow(
|
|
18
|
+
export declare function generateDeepResearchWorkflow(): string;
|
|
19
19
|
/**
|
|
20
20
|
* Generate a codebase audit workflow.
|
|
21
21
|
*/
|
package/dist/deep-research.js
CHANGED
|
@@ -2,66 +2,66 @@
|
|
|
2
2
|
* Deep research workflow.
|
|
3
3
|
* Built-in workflow for comprehensive research across multiple sources.
|
|
4
4
|
*/
|
|
5
|
-
const DEFAULT_CONFIG = {
|
|
6
|
-
searchAngles: 4,
|
|
7
|
-
sourcesPerAngle: 3,
|
|
8
|
-
crossCheck: true,
|
|
9
|
-
maxAgents: 20,
|
|
10
|
-
};
|
|
11
5
|
/**
|
|
12
|
-
* Generate a deep
|
|
6
|
+
* Generate a deep-research workflow that uses the real web_search/web_fetch tools.
|
|
7
|
+
*
|
|
8
|
+
* The script is static and reads its inputs from `args` (question/angles/minSupport),
|
|
9
|
+
* so the question is never string-interpolated into source — no escaping hazards.
|
|
10
|
+
* Inject the web tools at run time via the agent's `tools` option.
|
|
13
11
|
*/
|
|
14
|
-
export function generateDeepResearchWorkflow(
|
|
15
|
-
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
16
|
-
const escapedQuestion = question.replace(/'/g, "\\'").slice(0, 80);
|
|
17
|
-
const crossCheckPhase = cfg.crossCheck
|
|
18
|
-
? `phase('Cross-Check');
|
|
19
|
-
const crossCheck = await agent(
|
|
20
|
-
'Cross-check these research findings. Identify claims that are supported by multiple sources vs. claims that appear in only one source:\\n' +
|
|
21
|
-
'Sources: ' + JSON.stringify(sources),
|
|
22
|
-
{ label: 'cross-checker' }
|
|
23
|
-
);`
|
|
24
|
-
: "";
|
|
25
|
-
const crossCheckRef = cfg.crossCheck ? "'Cross-check: ' + crossCheck + '\\n' + " : "";
|
|
26
|
-
const crossCheckReturn = cfg.crossCheck ? "crossCheck, " : "";
|
|
12
|
+
export function generateDeepResearchWorkflow() {
|
|
27
13
|
return `export const meta = {
|
|
28
14
|
name: 'deep_research',
|
|
29
|
-
description: 'Deep research
|
|
15
|
+
description: 'Deep research with real web search and cross-checked claims',
|
|
30
16
|
phases: [
|
|
31
|
-
{ title: '
|
|
32
|
-
{ title: '
|
|
33
|
-
{ title: '
|
|
17
|
+
{ title: 'Queries' },
|
|
18
|
+
{ title: 'Gather' },
|
|
19
|
+
{ title: 'Verify' },
|
|
34
20
|
{ title: 'Report' },
|
|
35
21
|
],
|
|
36
|
-
}
|
|
22
|
+
}
|
|
37
23
|
|
|
38
|
-
|
|
39
|
-
const
|
|
40
|
-
const
|
|
41
|
-
'Plan ${cfg.searchAngles} different search angles to research this question comprehensively: ' + question,
|
|
42
|
-
{ label: 'search-planner' }
|
|
43
|
-
);
|
|
24
|
+
const question = (args && args.question) || ''
|
|
25
|
+
const angles = (args && args.angles) || 4
|
|
26
|
+
const minSupport = (args && args.minSupport) || 2
|
|
44
27
|
|
|
45
|
-
phase('
|
|
46
|
-
const
|
|
28
|
+
phase('Queries')
|
|
29
|
+
const plan = await agent(
|
|
30
|
+
'You are planning web research for this question:\\n' + question +
|
|
31
|
+
'\\n\\nProduce ' + angles + ' diverse, specific search queries that together cover the question from different angles.',
|
|
32
|
+
{ label: 'plan queries', schema: { type: 'object', properties: { queries: { type: 'array', items: { type: 'string' } } }, required: ['queries'] } }
|
|
33
|
+
)
|
|
34
|
+
const queries = (plan.queries || []).slice(0, angles)
|
|
35
|
+
|
|
36
|
+
phase('Gather')
|
|
37
|
+
const gathered = await parallel(queries.map((q, i) => () =>
|
|
47
38
|
agent(
|
|
48
|
-
'Research
|
|
49
|
-
|
|
39
|
+
'Research this query using the web_search and web_fetch tools.\\nQuery: ' + q +
|
|
40
|
+
'\\n\\nSteps: (1) call web_search with the query; (2) web_fetch the 2 most relevant result URLs; ' +
|
|
41
|
+
'(3) extract concrete, verifiable factual claims, each tagged with the exact source URL it came from. ' +
|
|
42
|
+
'Do NOT invent sources or claims — report only what the fetched pages actually say.',
|
|
43
|
+
{ label: 'research ' + (i + 1), schema: { type: 'object', properties: { sources: { type: 'array', items: { type: 'object', properties: { url: { type: 'string' }, claims: { type: 'array', items: { type: 'string' } } }, required: ['url', 'claims'] } } }, required: ['sources'] } }
|
|
50
44
|
)
|
|
51
|
-
))
|
|
45
|
+
))
|
|
46
|
+
const allSources = gathered.filter(Boolean).flatMap((g) => (g && g.sources) || [])
|
|
52
47
|
|
|
53
|
-
|
|
48
|
+
phase('Verify')
|
|
49
|
+
const verdict = await agent(
|
|
50
|
+
'Cross-check these research sources. Group claims that assert the same fact across different source URLs. ' +
|
|
51
|
+
'Keep a claim only if it is supported by at least ' + minSupport + ' distinct source URLs OR by one clearly authoritative source. ' +
|
|
52
|
+
'Discard claims found in a single weak source or that conflict with others.\\n\\nSOURCES JSON:\\n' + JSON.stringify(allSources),
|
|
53
|
+
{ label: 'cross-check', schema: { type: 'object', properties: { supported: { type: 'array', items: { type: 'object', properties: { claim: { type: 'string' }, sources: { type: 'array', items: { type: 'string' } } }, required: ['claim', 'sources'] } }, discarded: { type: 'array', items: { type: 'string' } } }, required: ['supported'] } }
|
|
54
|
+
)
|
|
54
55
|
|
|
55
|
-
phase('Report')
|
|
56
|
+
phase('Report')
|
|
56
57
|
const report = await agent(
|
|
57
|
-
'
|
|
58
|
-
'
|
|
59
|
-
'
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
);
|
|
58
|
+
'Write a concise, well-structured research report that answers the question using ONLY the supported claims below. ' +
|
|
59
|
+
'Cite source URLs inline next to each claim. If the evidence is thin, say so explicitly.\\n\\n' +
|
|
60
|
+
'QUESTION: ' + question + '\\n\\nSUPPORTED CLAIMS JSON:\\n' + JSON.stringify((verdict && verdict.supported) || []),
|
|
61
|
+
{ label: 'write report' }
|
|
62
|
+
)
|
|
63
63
|
|
|
64
|
-
return {
|
|
64
|
+
return { question, queries, supported: (verdict && verdict.supported) || [], report }`;
|
|
65
65
|
}
|
|
66
66
|
/**
|
|
67
67
|
* Generate a codebase audit workflow.
|
package/dist/index.d.ts
CHANGED
|
@@ -4,6 +4,7 @@ export type { AgentRunOptions, AgentRunResult, WorkflowAgentOptions } from "./ag
|
|
|
4
4
|
export { WorkflowAgent } from "./agent.js";
|
|
5
5
|
export type { AutoWorkflowConfig } from "./auto-workflow.js";
|
|
6
6
|
export { shouldUseWorkflow, suggestWorkflowScript } from "./auto-workflow.js";
|
|
7
|
+
export { registerBuiltinWorkflows } from "./builtin-commands.js";
|
|
7
8
|
export * from "./config.js";
|
|
8
9
|
export type { DeepResearchConfig } from "./deep-research.js";
|
|
9
10
|
export { generateCodebaseAuditWorkflow, generateDeepResearchWorkflow } from "./deep-research.js";
|
|
@@ -18,6 +19,7 @@ export type { PersistedRunState, RunPersistence, RunStatus } from "./run-persist
|
|
|
18
19
|
export { createRunPersistence, generateRunId } from "./run-persistence.js";
|
|
19
20
|
export type { StructuredOutputCapture, StructuredOutputToolOptions } from "./structured-output.js";
|
|
20
21
|
export { createStructuredOutputTool } from "./structured-output.js";
|
|
22
|
+
export { createWebFetchTool, createWebSearchTool, createWebTools } from "./web-tools.js";
|
|
21
23
|
export type { AgentOptions, JournalEntry, WorkflowMeta, WorkflowMetaPhase, WorkflowRunOptions, WorkflowRunResult, } from "./workflow.js";
|
|
22
24
|
export { parseWorkflowScript, runWorkflow } from "./workflow.js";
|
|
23
25
|
export { registerWorkflowCommands } from "./workflow-commands.js";
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
export { generateAdversarialReviewWorkflow, generateMultiPerspectiveWorkflow } from "./adversarial-review.js";
|
|
2
2
|
export { WorkflowAgent } from "./agent.js";
|
|
3
3
|
export { shouldUseWorkflow, suggestWorkflowScript } from "./auto-workflow.js";
|
|
4
|
+
export { registerBuiltinWorkflows } from "./builtin-commands.js";
|
|
4
5
|
export * from "./config.js";
|
|
5
6
|
export { generateCodebaseAuditWorkflow, generateDeepResearchWorkflow } from "./deep-research.js";
|
|
6
7
|
export { createToolUpdateWorkflowDisplay, createWidgetWorkflowDisplay, createWorkflowSnapshot, preview, recomputeWorkflowSnapshot, renderWorkflowLines, renderWorkflowText, } from "./display.js";
|
|
@@ -9,6 +10,7 @@ export { createWorkflowLogger } from "./logger.js";
|
|
|
9
10
|
export { buildModelRoutingInstructions, parseModelRoutingFromMeta, resolveModelForPhase } from "./model-routing.js";
|
|
10
11
|
export { createRunPersistence, generateRunId } from "./run-persistence.js";
|
|
11
12
|
export { createStructuredOutputTool } from "./structured-output.js";
|
|
13
|
+
export { createWebFetchTool, createWebSearchTool, createWebTools } from "./web-tools.js";
|
|
12
14
|
export { parseWorkflowScript, runWorkflow } from "./workflow.js";
|
|
13
15
|
export { registerWorkflowCommands } from "./workflow-commands.js";
|
|
14
16
|
export { WorkflowManager } from "./workflow-manager.js";
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Real web tools for research workflows. These execute in the extension host
|
|
3
|
+
* process (which has network access), not in a subagent sandbox, so they perform
|
|
4
|
+
* genuine HTTP requests via Node's fetch.
|
|
5
|
+
*
|
|
6
|
+
* - web_search: best-effort Bing HTML scrape -> result {url, title}
|
|
7
|
+
* - web_fetch: fetch a URL and return readable text (HTML stripped, truncated)
|
|
8
|
+
*/
|
|
9
|
+
import { type ToolDefinition } from "@earendil-works/pi-coding-agent";
|
|
10
|
+
/** A tool that searches the web (best-effort) and returns result URLs + titles. */
|
|
11
|
+
export declare function createWebSearchTool(): ToolDefinition;
|
|
12
|
+
/** A tool that fetches a URL and returns readable text. */
|
|
13
|
+
export declare function createWebFetchTool(maxChars?: number): ToolDefinition;
|
|
14
|
+
/** Both web tools, for injecting into a research workflow's agents. */
|
|
15
|
+
export declare function createWebTools(): ToolDefinition[];
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Real web tools for research workflows. These execute in the extension host
|
|
3
|
+
* process (which has network access), not in a subagent sandbox, so they perform
|
|
4
|
+
* genuine HTTP requests via Node's fetch.
|
|
5
|
+
*
|
|
6
|
+
* - web_search: best-effort Bing HTML scrape -> result {url, title}
|
|
7
|
+
* - web_fetch: fetch a URL and return readable text (HTML stripped, truncated)
|
|
8
|
+
*/
|
|
9
|
+
import { defineTool } from "@earendil-works/pi-coding-agent";
|
|
10
|
+
import { Type } from "typebox";
|
|
11
|
+
const UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36";
|
|
12
|
+
async function fetchText(url, timeoutMs = 15000) {
|
|
13
|
+
const controller = new AbortController();
|
|
14
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
15
|
+
try {
|
|
16
|
+
const res = await fetch(url, { headers: { "user-agent": UA }, signal: controller.signal, redirect: "follow" });
|
|
17
|
+
return { status: res.status, body: await res.text() };
|
|
18
|
+
}
|
|
19
|
+
finally {
|
|
20
|
+
clearTimeout(timer);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
function htmlToText(html) {
|
|
24
|
+
return html
|
|
25
|
+
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
26
|
+
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
|
27
|
+
.replace(/<\/(p|div|li|h[1-6]|tr|br)>/gi, "\n")
|
|
28
|
+
.replace(/<[^>]+>/g, " ")
|
|
29
|
+
.replace(/ /g, " ")
|
|
30
|
+
.replace(/&/g, "&")
|
|
31
|
+
.replace(/</g, "<")
|
|
32
|
+
.replace(/>/g, ">")
|
|
33
|
+
.replace(/'|'/g, "'")
|
|
34
|
+
.replace(/"/g, '"')
|
|
35
|
+
.replace(/[ \t]+/g, " ")
|
|
36
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
37
|
+
.trim();
|
|
38
|
+
}
|
|
39
|
+
function parseBingResults(html, limit) {
|
|
40
|
+
const out = [];
|
|
41
|
+
const seen = new Set();
|
|
42
|
+
for (const m of html.matchAll(/<h2[^>]*>\s*<a[^>]+href="(https?:\/\/[^"]+)"[^>]*>([\s\S]*?)<\/a>/g)) {
|
|
43
|
+
const url = m[1];
|
|
44
|
+
if (/\.bing\.com|go\.microsoft\.com/.test(url) || seen.has(url))
|
|
45
|
+
continue;
|
|
46
|
+
seen.add(url);
|
|
47
|
+
out.push({ url, title: m[2].replace(/<[^>]+>/g, "").trim() });
|
|
48
|
+
if (out.length >= limit)
|
|
49
|
+
break;
|
|
50
|
+
}
|
|
51
|
+
return out;
|
|
52
|
+
}
|
|
53
|
+
/** A tool that searches the web (best-effort) and returns result URLs + titles. */
|
|
54
|
+
export function createWebSearchTool() {
|
|
55
|
+
return defineTool({
|
|
56
|
+
name: "web_search",
|
|
57
|
+
label: "Web Search",
|
|
58
|
+
description: "Search the web and return a list of result URLs and titles. Use before web_fetch to find sources.",
|
|
59
|
+
promptSnippet: "Search the web for sources",
|
|
60
|
+
parameters: Type.Object({
|
|
61
|
+
query: Type.String({ description: "The search query." }),
|
|
62
|
+
count: Type.Optional(Type.Number({ description: "Max results (default 6)." })),
|
|
63
|
+
}),
|
|
64
|
+
async execute(_id, params) {
|
|
65
|
+
const limit = Math.min(Math.max(params.count ?? 6, 1), 10);
|
|
66
|
+
try {
|
|
67
|
+
const { status, body } = await fetchText(`https://www.bing.com/search?q=${encodeURIComponent(params.query)}`);
|
|
68
|
+
const results = parseBingResults(body, limit);
|
|
69
|
+
const text = results.length
|
|
70
|
+
? results.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}`).join("\n")
|
|
71
|
+
: `No results parsed (HTTP ${status}). Try a different query or fetch a known URL directly.`;
|
|
72
|
+
return { content: [{ type: "text", text }], details: { results } };
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
return {
|
|
76
|
+
content: [{ type: "text", text: `web_search failed: ${error instanceof Error ? error.message : error}` }],
|
|
77
|
+
details: { results: [] },
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
/** A tool that fetches a URL and returns readable text. */
|
|
84
|
+
export function createWebFetchTool(maxChars = 6000) {
|
|
85
|
+
return defineTool({
|
|
86
|
+
name: "web_fetch",
|
|
87
|
+
label: "Web Fetch",
|
|
88
|
+
description: "Fetch a URL and return its readable text content (HTML stripped, truncated).",
|
|
89
|
+
promptSnippet: "Fetch a URL's text",
|
|
90
|
+
parameters: Type.Object({
|
|
91
|
+
url: Type.String({ description: "The absolute URL to fetch." }),
|
|
92
|
+
}),
|
|
93
|
+
async execute(_id, params) {
|
|
94
|
+
try {
|
|
95
|
+
const { status, body } = await fetchText(params.url);
|
|
96
|
+
const text = htmlToText(body).slice(0, maxChars);
|
|
97
|
+
return {
|
|
98
|
+
content: [{ type: "text", text: `HTTP ${status} ${params.url}\n\n${text}` }],
|
|
99
|
+
details: { status, url: params.url },
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
catch (error) {
|
|
103
|
+
return {
|
|
104
|
+
content: [
|
|
105
|
+
{
|
|
106
|
+
type: "text",
|
|
107
|
+
text: `web_fetch failed for ${params.url}: ${error instanceof Error ? error.message : error}`,
|
|
108
|
+
},
|
|
109
|
+
],
|
|
110
|
+
details: { status: 0, url: params.url },
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
/** Both web tools, for injecting into a research workflow's agents. */
|
|
117
|
+
export function createWebTools() {
|
|
118
|
+
return [createWebSearchTool(), createWebFetchTool()];
|
|
119
|
+
}
|
package/extensions/workflow.ts
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
2
|
-
import {
|
|
2
|
+
import {
|
|
3
|
+
createWorkflowStorage,
|
|
4
|
+
createWorkflowTool,
|
|
5
|
+
registerBuiltinWorkflows,
|
|
6
|
+
registerWorkflowCommands,
|
|
7
|
+
WorkflowManager,
|
|
8
|
+
} from "../src/index.js";
|
|
3
9
|
|
|
4
10
|
export default function extension(pi: ExtensionAPI) {
|
|
5
11
|
// Single manager/storage shared by the workflow tool and the /workflows command,
|
|
@@ -11,6 +17,7 @@ export default function extension(pi: ExtensionAPI) {
|
|
|
11
17
|
const workflowTool = createWorkflowTool({ cwd, manager, storage });
|
|
12
18
|
pi.registerTool(workflowTool);
|
|
13
19
|
registerWorkflowCommands(pi, manager);
|
|
20
|
+
registerBuiltinWorkflows(pi, { cwd });
|
|
14
21
|
|
|
15
22
|
pi.on("session_start", () => {
|
|
16
23
|
const active = pi.getActiveTools();
|
package/package.json
CHANGED
|
@@ -12,61 +12,64 @@ export interface AdversarialReviewConfig {
|
|
|
12
12
|
agreementThreshold: number;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
-
const DEFAULT_CONFIG: AdversarialReviewConfig = {
|
|
16
|
-
reviewerCount: 2,
|
|
17
|
-
filterContested: true,
|
|
18
|
-
agreementThreshold: 0.5,
|
|
19
|
-
};
|
|
20
|
-
|
|
21
15
|
/**
|
|
22
|
-
* Generate an adversarial
|
|
16
|
+
* Generate an adversarial-review workflow. The script is static and reads its
|
|
17
|
+
* inputs from `args` (task/reviewers/threshold) — no string interpolation.
|
|
18
|
+
*
|
|
19
|
+
* Each finding is judged independently by N reviewers who are told to REFUTE it;
|
|
20
|
+
* a finding survives only when the share of reviewers calling it real meets the
|
|
21
|
+
* agreement threshold.
|
|
23
22
|
*/
|
|
24
|
-
export function generateAdversarialReviewWorkflow(
|
|
25
|
-
taskDescription: string,
|
|
26
|
-
config: Partial<AdversarialReviewConfig> = {},
|
|
27
|
-
): string {
|
|
28
|
-
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
29
|
-
|
|
23
|
+
export function generateAdversarialReviewWorkflow(): string {
|
|
30
24
|
return `export const meta = {
|
|
31
25
|
name: 'adversarial_review',
|
|
32
|
-
description: 'Adversarial review
|
|
26
|
+
description: 'Adversarial review: findings cross-checked by independent skeptics',
|
|
33
27
|
phases: [
|
|
34
|
-
{ title: '
|
|
35
|
-
{ title: '
|
|
36
|
-
{ title: 'Cross-Check' },
|
|
28
|
+
{ title: 'Investigate' },
|
|
29
|
+
{ title: 'Refute' },
|
|
37
30
|
{ title: 'Consensus' },
|
|
38
31
|
],
|
|
39
|
-
}
|
|
32
|
+
}
|
|
40
33
|
|
|
41
|
-
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
{ label: 'investigator' }
|
|
45
|
-
);
|
|
34
|
+
const task = (args && args.task) || ''
|
|
35
|
+
const reviewers = (args && args.reviewers) || 2
|
|
36
|
+
const threshold = (args && args.threshold) || 0.5
|
|
46
37
|
|
|
47
|
-
phase('
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
));
|
|
38
|
+
phase('Investigate')
|
|
39
|
+
const investigation = await agent(
|
|
40
|
+
'Investigate the following and list concrete, individually-checkable findings:\\n' + task,
|
|
41
|
+
{ label: 'investigate', schema: { type: 'object', properties: { findings: { type: 'array', items: { type: 'string' } } }, required: ['findings'] } }
|
|
42
|
+
)
|
|
43
|
+
const findings = investigation.findings || []
|
|
54
44
|
|
|
55
|
-
phase('
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
45
|
+
phase('Refute')
|
|
46
|
+
const judged = await parallel(findings.map((f, i) => () =>
|
|
47
|
+
parallel(Array.from({ length: reviewers }, (_, r) => () =>
|
|
48
|
+
agent(
|
|
49
|
+
'You are a skeptical reviewer. Try to REFUTE this finding for the task below. ' +
|
|
50
|
+
'Default to real=false when uncertain. Investigate with the available tools if needed.\\n\\n' +
|
|
51
|
+
'TASK: ' + task + '\\nFINDING: ' + f,
|
|
52
|
+
{ label: 'refute ' + (i + 1) + '.' + (r + 1), schema: { type: 'object', properties: { real: { type: 'boolean' }, reason: { type: 'string' } }, required: ['real'] } }
|
|
53
|
+
)
|
|
54
|
+
)).then((votes) => {
|
|
55
|
+
const valid = votes.filter(Boolean)
|
|
56
|
+
const realCount = valid.filter((v) => v && v.real).length
|
|
57
|
+
const ratio = valid.length ? realCount / valid.length : 0
|
|
58
|
+
return { finding: f, realVotes: realCount, totalVotes: valid.length, survives: ratio >= threshold }
|
|
59
|
+
})
|
|
60
|
+
))
|
|
62
61
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
)
|
|
62
|
+
const survivors = judged.filter((j) => j && j.survives)
|
|
63
|
+
|
|
64
|
+
phase('Consensus')
|
|
65
|
+
const report = await agent(
|
|
66
|
+
'Write a final review report. Include ONLY the findings that survived adversarial review (listed below), ' +
|
|
67
|
+
'each with a short justification. Note how many were discarded.\\n\\n' +
|
|
68
|
+
'SURVIVING FINDINGS JSON:\\n' + JSON.stringify(survivors),
|
|
69
|
+
{ label: 'consensus' }
|
|
70
|
+
)
|
|
68
71
|
|
|
69
|
-
return { findings,
|
|
72
|
+
return { total: findings.length, survivors, report }`;
|
|
70
73
|
}
|
|
71
74
|
|
|
72
75
|
/**
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bundled workflow commands: `/deep-research` and `/adversarial-review`.
|
|
3
|
+
* They run a generated workflow script and print the final report.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { createCodingTools, type ExtensionAPI, type ExtensionCommandContext } from "@earendil-works/pi-coding-agent";
|
|
7
|
+
import { generateAdversarialReviewWorkflow } from "./adversarial-review.js";
|
|
8
|
+
import { generateDeepResearchWorkflow } from "./deep-research.js";
|
|
9
|
+
import { createWebTools } from "./web-tools.js";
|
|
10
|
+
import { runWorkflow, type WorkflowRunResult } from "./workflow.js";
|
|
11
|
+
|
|
12
|
+
function alreadyRegistered(pi: ExtensionAPI, name: string): boolean {
|
|
13
|
+
try {
|
|
14
|
+
return (pi.getCommands?.() ?? []).some((c: { name: string }) => c.name === name);
|
|
15
|
+
} catch {
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function reportText(result: WorkflowRunResult): string {
|
|
21
|
+
const r = result.result as { report?: unknown } | undefined;
|
|
22
|
+
if (r && typeof r.report === "string" && r.report.trim()) return r.report;
|
|
23
|
+
return JSON.stringify(result.result, null, 2);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function registerBuiltinWorkflows(pi: ExtensionAPI, opts: { cwd: string }): void {
|
|
27
|
+
const cwd = opts.cwd;
|
|
28
|
+
|
|
29
|
+
if (!alreadyRegistered(pi, "deep-research")) {
|
|
30
|
+
pi.registerCommand("deep-research", {
|
|
31
|
+
description: "Research a question across the web with cross-checked sources",
|
|
32
|
+
async handler(args: string, ctx: ExtensionCommandContext) {
|
|
33
|
+
const question = args.trim();
|
|
34
|
+
if (!question) return ctx.ui.notify("Usage: /deep-research <question>", "warning");
|
|
35
|
+
ctx.ui.notify("Researching — running web searches across several angles…", "info");
|
|
36
|
+
try {
|
|
37
|
+
const result = await runWorkflow(generateDeepResearchWorkflow(), {
|
|
38
|
+
cwd,
|
|
39
|
+
args: { question },
|
|
40
|
+
// Research agents need real web access on top of the coding tools.
|
|
41
|
+
tools: [...createCodingTools(cwd), ...createWebTools()],
|
|
42
|
+
onPhase: (title) => ctx.ui.setStatus("deep-research", `research: ${title}`),
|
|
43
|
+
});
|
|
44
|
+
ctx.ui.setStatus("deep-research", undefined);
|
|
45
|
+
await pi.sendMessage({ customType: "deep-research", content: reportText(result), display: true });
|
|
46
|
+
} catch (error) {
|
|
47
|
+
ctx.ui.setStatus("deep-research", undefined);
|
|
48
|
+
ctx.ui.notify(`deep-research failed: ${error instanceof Error ? error.message : error}`, "error");
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!alreadyRegistered(pi, "adversarial-review")) {
|
|
55
|
+
pi.registerCommand("adversarial-review", {
|
|
56
|
+
description: "Investigate a task, then cross-check each finding with skeptical reviewers",
|
|
57
|
+
async handler(args: string, ctx: ExtensionCommandContext) {
|
|
58
|
+
const task = args.trim();
|
|
59
|
+
if (!task) return ctx.ui.notify("Usage: /adversarial-review <task or question>", "warning");
|
|
60
|
+
ctx.ui.notify("Reviewing — investigating then refuting each finding…", "info");
|
|
61
|
+
try {
|
|
62
|
+
const result = await runWorkflow(generateAdversarialReviewWorkflow(), {
|
|
63
|
+
cwd,
|
|
64
|
+
args: { task },
|
|
65
|
+
tools: createCodingTools(cwd),
|
|
66
|
+
onPhase: (title) => ctx.ui.setStatus("adversarial-review", `review: ${title}`),
|
|
67
|
+
});
|
|
68
|
+
ctx.ui.setStatus("adversarial-review", undefined);
|
|
69
|
+
await pi.sendMessage({ customType: "adversarial-review", content: reportText(result), display: true });
|
|
70
|
+
} catch (error) {
|
|
71
|
+
ctx.ui.setStatus("adversarial-review", undefined);
|
|
72
|
+
ctx.ui.notify(`adversarial-review failed: ${error instanceof Error ? error.message : error}`, "error");
|
|
73
|
+
}
|
|
74
|
+
},
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
}
|
package/src/deep-research.ts
CHANGED
|
@@ -4,80 +4,72 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
export interface DeepResearchConfig {
|
|
7
|
-
/** Number of search angles to explore. */
|
|
8
|
-
|
|
9
|
-
/**
|
|
10
|
-
|
|
11
|
-
/** Whether to cross-check claims across sources. */
|
|
12
|
-
crossCheck: boolean;
|
|
13
|
-
/** Maximum number of agents to use. */
|
|
14
|
-
maxAgents: number;
|
|
7
|
+
/** Number of distinct search angles/queries to explore. */
|
|
8
|
+
angles: number;
|
|
9
|
+
/** Minimum distinct sources required for a claim to survive cross-checking. */
|
|
10
|
+
minSupport: number;
|
|
15
11
|
}
|
|
16
12
|
|
|
17
|
-
const DEFAULT_CONFIG: DeepResearchConfig = {
|
|
18
|
-
searchAngles: 4,
|
|
19
|
-
sourcesPerAngle: 3,
|
|
20
|
-
crossCheck: true,
|
|
21
|
-
maxAgents: 20,
|
|
22
|
-
};
|
|
23
|
-
|
|
24
13
|
/**
|
|
25
|
-
* Generate a deep
|
|
14
|
+
* Generate a deep-research workflow that uses the real web_search/web_fetch tools.
|
|
15
|
+
*
|
|
16
|
+
* The script is static and reads its inputs from `args` (question/angles/minSupport),
|
|
17
|
+
* so the question is never string-interpolated into source — no escaping hazards.
|
|
18
|
+
* Inject the web tools at run time via the agent's `tools` option.
|
|
26
19
|
*/
|
|
27
|
-
export function generateDeepResearchWorkflow(
|
|
28
|
-
const cfg = { ...DEFAULT_CONFIG, ...config };
|
|
29
|
-
const escapedQuestion = question.replace(/'/g, "\\'").slice(0, 80);
|
|
30
|
-
|
|
31
|
-
const crossCheckPhase = cfg.crossCheck
|
|
32
|
-
? `phase('Cross-Check');
|
|
33
|
-
const crossCheck = await agent(
|
|
34
|
-
'Cross-check these research findings. Identify claims that are supported by multiple sources vs. claims that appear in only one source:\\n' +
|
|
35
|
-
'Sources: ' + JSON.stringify(sources),
|
|
36
|
-
{ label: 'cross-checker' }
|
|
37
|
-
);`
|
|
38
|
-
: "";
|
|
39
|
-
|
|
40
|
-
const crossCheckRef = cfg.crossCheck ? "'Cross-check: ' + crossCheck + '\\n' + " : "";
|
|
41
|
-
const crossCheckReturn = cfg.crossCheck ? "crossCheck, " : "";
|
|
42
|
-
|
|
20
|
+
export function generateDeepResearchWorkflow(): string {
|
|
43
21
|
return `export const meta = {
|
|
44
22
|
name: 'deep_research',
|
|
45
|
-
description: 'Deep research
|
|
23
|
+
description: 'Deep research with real web search and cross-checked claims',
|
|
46
24
|
phases: [
|
|
47
|
-
{ title: '
|
|
48
|
-
{ title: '
|
|
49
|
-
{ title: '
|
|
25
|
+
{ title: 'Queries' },
|
|
26
|
+
{ title: 'Gather' },
|
|
27
|
+
{ title: 'Verify' },
|
|
50
28
|
{ title: 'Report' },
|
|
51
29
|
],
|
|
52
|
-
}
|
|
30
|
+
}
|
|
53
31
|
|
|
54
|
-
|
|
55
|
-
const
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
32
|
+
const question = (args && args.question) || ''
|
|
33
|
+
const angles = (args && args.angles) || 4
|
|
34
|
+
const minSupport = (args && args.minSupport) || 2
|
|
35
|
+
|
|
36
|
+
phase('Queries')
|
|
37
|
+
const plan = await agent(
|
|
38
|
+
'You are planning web research for this question:\\n' + question +
|
|
39
|
+
'\\n\\nProduce ' + angles + ' diverse, specific search queries that together cover the question from different angles.',
|
|
40
|
+
{ label: 'plan queries', schema: { type: 'object', properties: { queries: { type: 'array', items: { type: 'string' } } }, required: ['queries'] } }
|
|
41
|
+
)
|
|
42
|
+
const queries = (plan.queries || []).slice(0, angles)
|
|
60
43
|
|
|
61
|
-
phase('
|
|
62
|
-
const
|
|
44
|
+
phase('Gather')
|
|
45
|
+
const gathered = await parallel(queries.map((q, i) => () =>
|
|
63
46
|
agent(
|
|
64
|
-
'Research
|
|
65
|
-
|
|
47
|
+
'Research this query using the web_search and web_fetch tools.\\nQuery: ' + q +
|
|
48
|
+
'\\n\\nSteps: (1) call web_search with the query; (2) web_fetch the 2 most relevant result URLs; ' +
|
|
49
|
+
'(3) extract concrete, verifiable factual claims, each tagged with the exact source URL it came from. ' +
|
|
50
|
+
'Do NOT invent sources or claims — report only what the fetched pages actually say.',
|
|
51
|
+
{ label: 'research ' + (i + 1), schema: { type: 'object', properties: { sources: { type: 'array', items: { type: 'object', properties: { url: { type: 'string' }, claims: { type: 'array', items: { type: 'string' } } }, required: ['url', 'claims'] } } }, required: ['sources'] } }
|
|
66
52
|
)
|
|
67
|
-
))
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
53
|
+
))
|
|
54
|
+
const allSources = gathered.filter(Boolean).flatMap((g) => (g && g.sources) || [])
|
|
55
|
+
|
|
56
|
+
phase('Verify')
|
|
57
|
+
const verdict = await agent(
|
|
58
|
+
'Cross-check these research sources. Group claims that assert the same fact across different source URLs. ' +
|
|
59
|
+
'Keep a claim only if it is supported by at least ' + minSupport + ' distinct source URLs OR by one clearly authoritative source. ' +
|
|
60
|
+
'Discard claims found in a single weak source or that conflict with others.\\n\\nSOURCES JSON:\\n' + JSON.stringify(allSources),
|
|
61
|
+
{ label: 'cross-check', schema: { type: 'object', properties: { supported: { type: 'array', items: { type: 'object', properties: { claim: { type: 'string' }, sources: { type: 'array', items: { type: 'string' } } }, required: ['claim', 'sources'] } }, discarded: { type: 'array', items: { type: 'string' } } }, required: ['supported'] } }
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
phase('Report')
|
|
72
65
|
const report = await agent(
|
|
73
|
-
'
|
|
74
|
-
'
|
|
75
|
-
'
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
);
|
|
66
|
+
'Write a concise, well-structured research report that answers the question using ONLY the supported claims below. ' +
|
|
67
|
+
'Cite source URLs inline next to each claim. If the evidence is thin, say so explicitly.\\n\\n' +
|
|
68
|
+
'QUESTION: ' + question + '\\n\\nSUPPORTED CLAIMS JSON:\\n' + JSON.stringify((verdict && verdict.supported) || []),
|
|
69
|
+
{ label: 'write report' }
|
|
70
|
+
)
|
|
79
71
|
|
|
80
|
-
return {
|
|
72
|
+
return { question, queries, supported: (verdict && verdict.supported) || [], report }`;
|
|
81
73
|
}
|
|
82
74
|
|
|
83
75
|
/**
|
package/src/index.ts
CHANGED
|
@@ -4,6 +4,7 @@ export type { AgentRunOptions, AgentRunResult, WorkflowAgentOptions } from "./ag
|
|
|
4
4
|
export { WorkflowAgent } from "./agent.js";
|
|
5
5
|
export type { AutoWorkflowConfig } from "./auto-workflow.js";
|
|
6
6
|
export { shouldUseWorkflow, suggestWorkflowScript } from "./auto-workflow.js";
|
|
7
|
+
export { registerBuiltinWorkflows } from "./builtin-commands.js";
|
|
7
8
|
export * from "./config.js";
|
|
8
9
|
export type { DeepResearchConfig } from "./deep-research.js";
|
|
9
10
|
export { generateCodebaseAuditWorkflow, generateDeepResearchWorkflow } from "./deep-research.js";
|
|
@@ -39,6 +40,7 @@ export type { PersistedRunState, RunPersistence, RunStatus } from "./run-persist
|
|
|
39
40
|
export { createRunPersistence, generateRunId } from "./run-persistence.js";
|
|
40
41
|
export type { StructuredOutputCapture, StructuredOutputToolOptions } from "./structured-output.js";
|
|
41
42
|
export { createStructuredOutputTool } from "./structured-output.js";
|
|
43
|
+
export { createWebFetchTool, createWebSearchTool, createWebTools } from "./web-tools.js";
|
|
42
44
|
export type {
|
|
43
45
|
AgentOptions,
|
|
44
46
|
JournalEntry,
|
package/src/web-tools.ts
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Real web tools for research workflows. These execute in the extension host
|
|
3
|
+
* process (which has network access), not in a subagent sandbox, so they perform
|
|
4
|
+
* genuine HTTP requests via Node's fetch.
|
|
5
|
+
*
|
|
6
|
+
* - web_search: best-effort Bing HTML scrape -> result {url, title}
|
|
7
|
+
* - web_fetch: fetch a URL and return readable text (HTML stripped, truncated)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { defineTool, type ToolDefinition } from "@earendil-works/pi-coding-agent";
|
|
11
|
+
import { Type } from "typebox";
|
|
12
|
+
|
|
13
|
+
const UA =
|
|
14
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Safari/537.36";
|
|
15
|
+
|
|
16
|
+
async function fetchText(url: string, timeoutMs = 15000): Promise<{ status: number; body: string }> {
|
|
17
|
+
const controller = new AbortController();
|
|
18
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
19
|
+
try {
|
|
20
|
+
const res = await fetch(url, { headers: { "user-agent": UA }, signal: controller.signal, redirect: "follow" });
|
|
21
|
+
return { status: res.status, body: await res.text() };
|
|
22
|
+
} finally {
|
|
23
|
+
clearTimeout(timer);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function htmlToText(html: string): string {
|
|
28
|
+
return html
|
|
29
|
+
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
|
30
|
+
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
|
31
|
+
.replace(/<\/(p|div|li|h[1-6]|tr|br)>/gi, "\n")
|
|
32
|
+
.replace(/<[^>]+>/g, " ")
|
|
33
|
+
.replace(/ /g, " ")
|
|
34
|
+
.replace(/&/g, "&")
|
|
35
|
+
.replace(/</g, "<")
|
|
36
|
+
.replace(/>/g, ">")
|
|
37
|
+
.replace(/'|'/g, "'")
|
|
38
|
+
.replace(/"/g, '"')
|
|
39
|
+
.replace(/[ \t]+/g, " ")
|
|
40
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
41
|
+
.trim();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function parseBingResults(html: string, limit: number): Array<{ url: string; title: string }> {
|
|
45
|
+
const out: Array<{ url: string; title: string }> = [];
|
|
46
|
+
const seen = new Set<string>();
|
|
47
|
+
for (const m of html.matchAll(/<h2[^>]*>\s*<a[^>]+href="(https?:\/\/[^"]+)"[^>]*>([\s\S]*?)<\/a>/g)) {
|
|
48
|
+
const url = m[1];
|
|
49
|
+
if (/\.bing\.com|go\.microsoft\.com/.test(url) || seen.has(url)) continue;
|
|
50
|
+
seen.add(url);
|
|
51
|
+
out.push({ url, title: m[2].replace(/<[^>]+>/g, "").trim() });
|
|
52
|
+
if (out.length >= limit) break;
|
|
53
|
+
}
|
|
54
|
+
return out;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** A tool that searches the web (best-effort) and returns result URLs + titles. */
|
|
58
|
+
export function createWebSearchTool(): ToolDefinition {
|
|
59
|
+
return defineTool({
|
|
60
|
+
name: "web_search",
|
|
61
|
+
label: "Web Search",
|
|
62
|
+
description: "Search the web and return a list of result URLs and titles. Use before web_fetch to find sources.",
|
|
63
|
+
promptSnippet: "Search the web for sources",
|
|
64
|
+
parameters: Type.Object({
|
|
65
|
+
query: Type.String({ description: "The search query." }),
|
|
66
|
+
count: Type.Optional(Type.Number({ description: "Max results (default 6)." })),
|
|
67
|
+
}),
|
|
68
|
+
async execute(_id, params: { query: string; count?: number }) {
|
|
69
|
+
const limit = Math.min(Math.max(params.count ?? 6, 1), 10);
|
|
70
|
+
try {
|
|
71
|
+
const { status, body } = await fetchText(`https://www.bing.com/search?q=${encodeURIComponent(params.query)}`);
|
|
72
|
+
const results = parseBingResults(body, limit);
|
|
73
|
+
const text = results.length
|
|
74
|
+
? results.map((r, i) => `${i + 1}. ${r.title}\n ${r.url}`).join("\n")
|
|
75
|
+
: `No results parsed (HTTP ${status}). Try a different query or fetch a known URL directly.`;
|
|
76
|
+
return { content: [{ type: "text", text }], details: { results } };
|
|
77
|
+
} catch (error) {
|
|
78
|
+
return {
|
|
79
|
+
content: [{ type: "text", text: `web_search failed: ${error instanceof Error ? error.message : error}` }],
|
|
80
|
+
details: { results: [] as Array<{ url: string; title: string }> },
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
}) as unknown as ToolDefinition;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/** A tool that fetches a URL and returns readable text. */
|
|
88
|
+
export function createWebFetchTool(maxChars = 6000): ToolDefinition {
|
|
89
|
+
return defineTool({
|
|
90
|
+
name: "web_fetch",
|
|
91
|
+
label: "Web Fetch",
|
|
92
|
+
description: "Fetch a URL and return its readable text content (HTML stripped, truncated).",
|
|
93
|
+
promptSnippet: "Fetch a URL's text",
|
|
94
|
+
parameters: Type.Object({
|
|
95
|
+
url: Type.String({ description: "The absolute URL to fetch." }),
|
|
96
|
+
}),
|
|
97
|
+
async execute(_id, params: { url: string }) {
|
|
98
|
+
try {
|
|
99
|
+
const { status, body } = await fetchText(params.url);
|
|
100
|
+
const text = htmlToText(body).slice(0, maxChars);
|
|
101
|
+
return {
|
|
102
|
+
content: [{ type: "text", text: `HTTP ${status} ${params.url}\n\n${text}` }],
|
|
103
|
+
details: { status, url: params.url },
|
|
104
|
+
};
|
|
105
|
+
} catch (error) {
|
|
106
|
+
return {
|
|
107
|
+
content: [
|
|
108
|
+
{
|
|
109
|
+
type: "text",
|
|
110
|
+
text: `web_fetch failed for ${params.url}: ${error instanceof Error ? error.message : error}`,
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
details: { status: 0, url: params.url },
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
}) as unknown as ToolDefinition;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/** Both web tools, for injecting into a research workflow's agents. */
|
|
121
|
+
export function createWebTools(): ToolDefinition[] {
|
|
122
|
+
return [createWebSearchTool(), createWebFetchTool()];
|
|
123
|
+
}
|