ultimate-pi 0.19.0 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/web-retrieval/SKILL.md +163 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
- package/.pi/SYSTEM.md +30 -12
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/agents/harness/planning/stack-researcher.md +5 -1
- package/.pi/agents/harness/running/executor.md +42 -1
- package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
- package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
- package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
- package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
- package/.pi/extensions/harness-anchored-edit.ts +141 -0
- package/.pi/extensions/harness-web-guard.ts +2 -1
- package/.pi/extensions/harness-web-tools.ts +689 -51
- package/.pi/harness/agents.manifest.json +30 -6
- package/.pi/harness/agents.policy.yaml +37 -4
- package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
- package/.pi/harness/docs/adrs/0051-hash-anchored-executor-edits.md +41 -0
- package/.pi/harness/docs/adrs/README.md +2 -0
- package/.pi/harness/docs/harness-web-search.md +97 -0
- package/.pi/harness/docs/practice-map.md +11 -0
- package/.pi/harness/env.harness.template +9 -1
- package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
- package/.pi/harness/web-heuristic-angles.json +278 -0
- package/.pi/harness/web-heuristic-angles.yaml +182 -0
- package/.pi/lib/agents-policy.d.mts +4 -0
- package/.pi/lib/agents-policy.mjs +49 -1
- package/.pi/lib/agents-policy.ts +1 -0
- package/.pi/lib/harness-anchored-edit/.hash_anchors +1721 -0
- package/.pi/lib/harness-anchored-edit/anchor-state.ts +320 -0
- package/.pi/lib/harness-anchored-edit/apply-anchored-edits.ts +161 -0
- package/.pi/lib/harness-anchored-edit/edit-executor.ts +146 -0
- package/.pi/lib/harness-anchored-edit/index.ts +9 -0
- package/.pi/lib/harness-anchored-edit/line-protocol.ts +38 -0
- package/.pi/lib/harness-anchored-edit/settings.ts +1 -0
- package/.pi/lib/harness-anchored-edit/task-id.ts +8 -0
- package/.pi/lib/harness-anchored-edit/types.ts +19 -0
- package/.pi/lib/harness-lens/clients/anchored-edit-autopatch.ts +158 -0
- package/.pi/lib/harness-lens/index.ts +24 -7
- package/.pi/lib/harness-subagent-auth.ts +39 -9
- package/.pi/lib/harness-subagents-bridge.ts +24 -1
- package/.pi/lib/harness-web/artifacts.ts +200 -0
- package/.pi/lib/harness-web/cache.ts +369 -0
- package/.pi/lib/harness-web/run-cli.ts +42 -2
- package/.pi/prompts/harness-plan.md +1 -0
- package/.pi/prompts/harness-setup.md +3 -1
- package/.pi/prompts/harness-steer.md +1 -1
- package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
- package/.pi/scripts/harness-anchored-edit-smoke.mjs +45 -0
- package/.pi/scripts/harness-cli-verify.sh +5 -0
- package/.pi/scripts/harness-verify.mjs +145 -0
- package/.pi/scripts/harness-web-policy-guard.mjs +1 -1
- package/.pi/scripts/harness-web.py +218 -15
- package/.pi/scripts/harness_web/deep_search.py +55 -0
- package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
- package/.pi/scripts/harness_web/find_similar.py +88 -0
- package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
- package/.pi/scripts/harness_web/heuristic_config.py +251 -0
- package/.pi/scripts/harness_web/highlights.py +47 -0
- package/.pi/scripts/harness_web/multi_search.py +59 -0
- package/.pi/scripts/harness_web/output.py +24 -0
- package/.pi/scripts/harness_web/query_angles.py +116 -0
- package/.pi/scripts/harness_web/rank.py +163 -0
- package/.pi/scripts/harness_web/scrape.py +30 -0
- package/.pi/scripts/run-tests.mjs +64 -0
- package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
- package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
- package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +12 -0
- package/THIRD_PARTY_NOTICES.md +7 -0
- package/package.json +7 -4
- package/vendor/pi-subagents/src/agents.ts +5 -0
- package/vendor/pi-subagents/src/subagents.ts +22 -3
- package/.agents/skills/scrapling-web/SKILL.md +0 -98
- package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
- package/.pi/scripts/release.sh +0 -338
|
@@ -1,14 +1,34 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* harness-web-tools — web_search
|
|
2
|
+
* harness-web-tools — WRS web_search, web_fetch, web_find_similar, web_contents.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
6
|
+
import { dirname, resolve } from "node:path";
|
|
5
7
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
6
8
|
import { Type } from "@sinclair/typebox";
|
|
7
9
|
import { claimHarnessGovernanceLoad } from "../lib/extension-load-guard.js";
|
|
10
|
+
import {
|
|
11
|
+
rememberSessionWebArtifactDir,
|
|
12
|
+
resolveWebOutputPath,
|
|
13
|
+
webArtifactScopeHint,
|
|
14
|
+
type WebArtifactScope,
|
|
15
|
+
} from "../lib/harness-web/artifacts.js";
|
|
16
|
+
import {
|
|
17
|
+
fingerprintFile,
|
|
18
|
+
formatCacheAge,
|
|
19
|
+
lookupFetchCache,
|
|
20
|
+
lookupSearchCache,
|
|
21
|
+
publishWorkspaceAlias,
|
|
22
|
+
writeFetchCacheEntry,
|
|
23
|
+
writeSearchCacheEntry,
|
|
24
|
+
type FetchCacheContext,
|
|
25
|
+
type SearchCacheContext,
|
|
26
|
+
} from "../lib/harness-web/cache.js";
|
|
8
27
|
import {
|
|
9
28
|
harnessWebContextLine,
|
|
10
29
|
readTextExcerpt,
|
|
11
30
|
runHarnessWeb,
|
|
31
|
+
summarizeDeepSearchJson,
|
|
12
32
|
summarizeSearchJson,
|
|
13
33
|
} from "../lib/harness-web/run-cli.js";
|
|
14
34
|
|
|
@@ -16,24 +36,106 @@ import {
|
|
|
16
36
|
const MODULE_URL = import.meta.url;
|
|
17
37
|
|
|
18
38
|
const WEB_SEARCH_GUIDELINES = [
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
39
|
+
"DEFAULT tier=deep for landscape, prior art, comparisons, planning research, or any multi-source question.",
|
|
40
|
+
"Before deep (research): spawn harness/web-retrieval/web-query-expander → <artifactDir>/angles.yaml → anglesFile on web_search.",
|
|
41
|
+
"Latency: tier=instant|standard with NO expander; or web-query-expander-fast (2–3 angles); or expandHeuristic:true (no LLM).",
|
|
42
|
+
"tier=standard ONLY for one narrow fact or after search-deep.json exists.",
|
|
43
|
+
"tier=instant ONLY when latency-critical and the question is closed-form.",
|
|
44
|
+
"Set HARNESS_WEB_FAST_MODEL / HARNESS_WEB_EXPANDER_MODEL / HARNESS_WEB_QUALITY_MODEL env (provider/model-id) for web subagents (web-retrieval skill).",
|
|
45
|
+
"Never run 3+ web_search calls with different queries; use one deep search instead.",
|
|
46
|
+
"After deep: read <artifactDir>/search-deep.json; web_fetch with highlights:true before full scrape.",
|
|
47
|
+
"bulk:true only when you need immediate markdown for top N URLs.",
|
|
48
|
+
"Library docs: context7 only, not web_search.",
|
|
49
|
+
"Never preflight UP_PKG, ls harness-web.py, or python3 -c import scrapling before searching.",
|
|
23
50
|
];
|
|
24
51
|
|
|
25
52
|
const WEB_FETCH_GUIDELINES = [
|
|
53
|
+
"Prefer highlights:true + highlightQuery after deep search before full page markdown.",
|
|
26
54
|
"Use web_fetch for page markdown or same-host link maps — never curl/wget the URL.",
|
|
27
55
|
"Never use raw scrapling CLI for fetch; harness-web handles Scrapling bootstrap.",
|
|
28
56
|
"Library API documentation → context7 only, not web_fetch.",
|
|
29
57
|
"Set fast:true for static docs (example.com, raw HTML docs, localhost).",
|
|
30
58
|
];
|
|
31
59
|
|
|
60
|
+
const WEB_FIND_SIMILAR_GUIDELINES = [
|
|
61
|
+
"Use when you have a good seed URL and want more pages like it (Exa findSimilar analog).",
|
|
62
|
+
"Prefer over manually re-phrasing the same intent in multiple web_search calls.",
|
|
63
|
+
"Output is search-deep.json shape; follow with web_fetch highlights on top hits.",
|
|
64
|
+
];
|
|
65
|
+
|
|
66
|
+
const WEB_CONTENTS_GUIDELINES = [
|
|
67
|
+
"Batch-fetch URLs after deep search — pass fromSearch pointing at search-deep.json.",
|
|
68
|
+
"Use after web_search(tier=deep), not instead of deep search.",
|
|
69
|
+
"Set highlights:true when building an evidence bundle for web-answerer.",
|
|
70
|
+
];
|
|
71
|
+
|
|
72
|
+
const WebScopeSchema = Type.Optional(
|
|
73
|
+
Type.String({
|
|
74
|
+
description:
|
|
75
|
+
"WRS workspace directory (default .web/; set HARNESS_WEB_ISOLATE=1 for per-run/session dirs)",
|
|
76
|
+
}),
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
const WebCacheControlSchema = {
|
|
80
|
+
refreshCache: Type.Optional(
|
|
81
|
+
Type.Boolean({
|
|
82
|
+
description: "Bypass pooled .web/cache and refetch from the network",
|
|
83
|
+
default: false,
|
|
84
|
+
}),
|
|
85
|
+
),
|
|
86
|
+
cacheMaxAge: Type.Optional(
|
|
87
|
+
Type.Number({
|
|
88
|
+
description: "Reuse cache entry only if younger than this many seconds",
|
|
89
|
+
minimum: 60,
|
|
90
|
+
}),
|
|
91
|
+
),
|
|
92
|
+
};
|
|
93
|
+
|
|
32
94
|
const WebSearchSchema = Type.Object({
|
|
33
|
-
query: Type.String({ description: "Search query" }),
|
|
95
|
+
query: Type.String({ description: "Search query or research intent" }),
|
|
96
|
+
webScope: WebScopeSchema,
|
|
97
|
+
tier: Type.Optional(
|
|
98
|
+
Type.Union(
|
|
99
|
+
[
|
|
100
|
+
Type.Literal("instant"),
|
|
101
|
+
Type.Literal("standard"),
|
|
102
|
+
Type.Literal("deep"),
|
|
103
|
+
Type.Literal("research"),
|
|
104
|
+
],
|
|
105
|
+
{
|
|
106
|
+
description:
|
|
107
|
+
"WRS tier: deep (default for research), standard (narrow follow-up), instant (fast fact)",
|
|
108
|
+
default: "deep",
|
|
109
|
+
},
|
|
110
|
+
),
|
|
111
|
+
),
|
|
112
|
+
anglesFile: Type.Optional(
|
|
113
|
+
Type.String({
|
|
114
|
+
description:
|
|
115
|
+
"Path to angles YAML from web-query-expander (required for tier=deep unless angles provided)",
|
|
116
|
+
}),
|
|
117
|
+
),
|
|
118
|
+
angles: Type.Optional(
|
|
119
|
+
Type.Array(Type.String(), {
|
|
120
|
+
description: "Inline search queries (one per angle); skips anglesFile",
|
|
121
|
+
minItems: 2,
|
|
122
|
+
maxItems: 8,
|
|
123
|
+
}),
|
|
124
|
+
),
|
|
125
|
+
category: Type.Optional(
|
|
126
|
+
Type.String({
|
|
127
|
+
description: "Expander hint: code|company|people|paper|news",
|
|
128
|
+
}),
|
|
129
|
+
),
|
|
130
|
+
expandHeuristic: Type.Optional(
|
|
131
|
+
Type.Boolean({
|
|
132
|
+
description: "Emergency angle templates without expander (fallback only)",
|
|
133
|
+
default: false,
|
|
134
|
+
}),
|
|
135
|
+
),
|
|
34
136
|
limit: Type.Optional(
|
|
35
137
|
Type.Number({
|
|
36
|
-
description: "Max results (
|
|
138
|
+
description: "Max results (tier defaults: instant 5, standard 10, deep 10)",
|
|
37
139
|
minimum: 1,
|
|
38
140
|
maximum: 20,
|
|
39
141
|
}),
|
|
@@ -41,7 +143,7 @@ const WebSearchSchema = Type.Object({
|
|
|
41
143
|
output: Type.Optional(
|
|
42
144
|
Type.String({
|
|
43
145
|
description:
|
|
44
|
-
"Output path (default .web/search.json
|
|
146
|
+
"Output path (default .web/search-deep.json for deep, .web/search.json otherwise)",
|
|
45
147
|
}),
|
|
46
148
|
),
|
|
47
149
|
bulk: Type.Optional(
|
|
@@ -51,10 +153,12 @@ const WebSearchSchema = Type.Object({
|
|
|
51
153
|
default: false,
|
|
52
154
|
}),
|
|
53
155
|
),
|
|
156
|
+
...WebCacheControlSchema,
|
|
54
157
|
});
|
|
55
158
|
|
|
56
159
|
const WebFetchSchema = Type.Object({
|
|
57
160
|
url: Type.String({ description: "URL to fetch" }),
|
|
161
|
+
webScope: WebScopeSchema,
|
|
58
162
|
mode: Type.Optional(
|
|
59
163
|
Type.Union([Type.Literal("scrape"), Type.Literal("map")], {
|
|
60
164
|
description: "scrape (markdown) or map (same-host links JSON)",
|
|
@@ -70,6 +174,18 @@ const WebFetchSchema = Type.Object({
|
|
|
70
174
|
default: false,
|
|
71
175
|
}),
|
|
72
176
|
),
|
|
177
|
+
highlights: Type.Optional(
|
|
178
|
+
Type.Boolean({
|
|
179
|
+
description: "Extract query-aligned excerpts to highlights JSON",
|
|
180
|
+
default: false,
|
|
181
|
+
}),
|
|
182
|
+
),
|
|
183
|
+
highlightQuery: Type.Optional(
|
|
184
|
+
Type.String({ description: "Query for highlight scoring (required if highlights)" }),
|
|
185
|
+
),
|
|
186
|
+
highlightsOutput: Type.Optional(
|
|
187
|
+
Type.String({ description: "Highlights JSON path (default .web/highlights.json)" }),
|
|
188
|
+
),
|
|
73
189
|
limit: Type.Optional(
|
|
74
190
|
Type.Number({
|
|
75
191
|
description: "For map mode: max links (default 100)",
|
|
@@ -77,6 +193,50 @@ const WebFetchSchema = Type.Object({
|
|
|
77
193
|
maximum: 500,
|
|
78
194
|
}),
|
|
79
195
|
),
|
|
196
|
+
...WebCacheControlSchema,
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
const WebFindSimilarSchema = Type.Object({
|
|
200
|
+
url: Type.String({ description: "Seed URL to find similar pages for" }),
|
|
201
|
+
webScope: WebScopeSchema,
|
|
202
|
+
limit: Type.Optional(
|
|
203
|
+
Type.Number({ description: "Max fused results", minimum: 1, maximum: 20 }),
|
|
204
|
+
),
|
|
205
|
+
output: Type.Optional(
|
|
206
|
+
Type.String({ description: "Output JSON (default .web/search-deep.json)" }),
|
|
207
|
+
),
|
|
208
|
+
fast: Type.Optional(
|
|
209
|
+
Type.Boolean({
|
|
210
|
+
description: "Fast HTTP for seed page fetch",
|
|
211
|
+
default: true,
|
|
212
|
+
}),
|
|
213
|
+
),
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
const WebContentsSchema = Type.Object({
|
|
217
|
+
webScope: WebScopeSchema,
|
|
218
|
+
urls: Type.Optional(
|
|
219
|
+
Type.Array(Type.String(), { description: "URLs to fetch (or use fromSearch)" }),
|
|
220
|
+
),
|
|
221
|
+
fromSearch: Type.Optional(
|
|
222
|
+
Type.String({
|
|
223
|
+
description: "search.json or search-deep.json to read URLs from",
|
|
224
|
+
}),
|
|
225
|
+
),
|
|
226
|
+
outputDir: Type.Optional(
|
|
227
|
+
Type.String({ description: "Output directory (default .web/contents)" }),
|
|
228
|
+
),
|
|
229
|
+
limit: Type.Optional(
|
|
230
|
+
Type.Number({ description: "Max URLs to fetch", minimum: 1, maximum: 10 }),
|
|
231
|
+
),
|
|
232
|
+
highlights: Type.Optional(Type.Boolean({ default: false })),
|
|
233
|
+
highlightQuery: Type.Optional(Type.String()),
|
|
234
|
+
evidenceBundle: Type.Optional(
|
|
235
|
+
Type.String({
|
|
236
|
+
description: "Write evidence-bundle.json (requires fromSearch)",
|
|
237
|
+
}),
|
|
238
|
+
),
|
|
239
|
+
fast: Type.Optional(Type.Boolean({ default: false })),
|
|
80
240
|
});
|
|
81
241
|
|
|
82
242
|
function failResult(text: string) {
|
|
@@ -93,15 +253,81 @@ function okResult(text: string, details: Record<string, unknown> = {}) {
|
|
|
93
253
|
};
|
|
94
254
|
}
|
|
95
255
|
|
|
96
|
-
|
|
256
|
+
type WebToolCtx = {
|
|
257
|
+
cwd?: string;
|
|
258
|
+
sessionManager?: { getSessionId(): string };
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
function sessionCwd(ctx: WebToolCtx): string {
|
|
97
262
|
return ctx.cwd ?? process.cwd();
|
|
98
263
|
}
|
|
99
264
|
|
|
265
|
+
function piSessionId(ctx: WebToolCtx): string {
|
|
266
|
+
return ctx.sessionManager?.getSessionId?.() ?? "default";
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
function resolveScopedOutput(
|
|
270
|
+
ctx: WebToolCtx,
|
|
271
|
+
basename: string,
|
|
272
|
+
explicitOutput?: string,
|
|
273
|
+
webScope?: string,
|
|
274
|
+
): { output: string; artifactDir: string; scope: WebArtifactScope } {
|
|
275
|
+
const cwd = sessionCwd(ctx);
|
|
276
|
+
const sessionId = piSessionId(ctx);
|
|
277
|
+
const resolved = resolveWebOutputPath({
|
|
278
|
+
projectRoot: cwd,
|
|
279
|
+
piSessionId: sessionId,
|
|
280
|
+
basename,
|
|
281
|
+
explicitOutput,
|
|
282
|
+
webScope,
|
|
283
|
+
});
|
|
284
|
+
rememberSessionWebArtifactDir(sessionId, resolved.artifactDir);
|
|
285
|
+
return {
|
|
286
|
+
output: resolved.path,
|
|
287
|
+
artifactDir: resolved.artifactDir,
|
|
288
|
+
scope: resolved.scope,
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
function ensureParentDir(cwd: string, filePath: string): void {
|
|
293
|
+
mkdirSync(dirname(resolve(cwd, filePath)), { recursive: true });
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
function searchEngineId(): string {
|
|
297
|
+
return process.env.HARNESS_WEB_SEARCH_ENGINE?.trim() || "ddg_html";
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
function cacheControlFromParams(params: {
|
|
301
|
+
refreshCache?: boolean;
|
|
302
|
+
cacheMaxAge?: number;
|
|
303
|
+
}): { refresh: boolean; maxAgeSec?: number } {
|
|
304
|
+
return {
|
|
305
|
+
refresh: params.refreshCache === true,
|
|
306
|
+
maxAgeSec:
|
|
307
|
+
typeof params.cacheMaxAge === "number" ? params.cacheMaxAge : undefined,
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function resolveTier(params: { tier?: string; bulk?: boolean }): string {
|
|
312
|
+
if (params.bulk) return "standard";
|
|
313
|
+
const t = String(params.tier ?? "deep").trim();
|
|
314
|
+
if (["instant", "standard", "deep", "research"].includes(t)) return t;
|
|
315
|
+
return "deep";
|
|
316
|
+
}
|
|
317
|
+
|
|
100
318
|
export default function harnessWebTools(pi: ExtensionAPI) {
|
|
101
319
|
if (!claimHarnessGovernanceLoad("harness-web-tools", MODULE_URL)) return;
|
|
102
|
-
pi.on("before_agent_start", async (event) => {
|
|
320
|
+
pi.on("before_agent_start", async (event, ctx) => {
|
|
321
|
+
const cwd = sessionCwd(ctx);
|
|
322
|
+
const sessionId = piSessionId(ctx);
|
|
323
|
+
const scope = resolveWebOutputPath({
|
|
324
|
+
projectRoot: cwd,
|
|
325
|
+
piSessionId: sessionId,
|
|
326
|
+
basename: "angles.yaml",
|
|
327
|
+
}).scope;
|
|
328
|
+
rememberSessionWebArtifactDir(sessionId, scope.artifactDir);
|
|
103
329
|
return {
|
|
104
|
-
systemPrompt: `${event.systemPrompt}\n\n${harnessWebContextLine()}`,
|
|
330
|
+
systemPrompt: `${event.systemPrompt}\n\n${harnessWebContextLine()}\n${webArtifactScopeHint(scope)}`,
|
|
105
331
|
};
|
|
106
332
|
});
|
|
107
333
|
|
|
@@ -109,51 +335,216 @@ export default function harnessWebTools(pi: ExtensionAPI) {
|
|
|
109
335
|
name: "web_search",
|
|
110
336
|
label: "Web Search",
|
|
111
337
|
description:
|
|
112
|
-
"
|
|
113
|
-
|
|
338
|
+
"Multi-tier web retrieval (WRS). Default tier=deep for research: parallel angle queries, RRF fusion. " +
|
|
339
|
+
"Use tier=standard only for narrow follow-ups. Requires anglesFile from web-query-expander for deep.",
|
|
340
|
+
promptSnippet: "tier=deep + anglesFile; not bare SERP",
|
|
114
341
|
promptGuidelines: WEB_SEARCH_GUIDELINES,
|
|
115
342
|
parameters: WebSearchSchema,
|
|
116
343
|
|
|
117
344
|
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
118
345
|
const cwd = sessionCwd(ctx);
|
|
346
|
+
const webScope = String(params.webScope ?? "").trim() || undefined;
|
|
119
347
|
const query = String(params.query ?? "").trim();
|
|
120
348
|
if (!query) return failResult("web_search: query is required.");
|
|
121
349
|
|
|
122
|
-
const
|
|
350
|
+
const tier = resolveTier(params);
|
|
123
351
|
const bulk = params.bulk === true;
|
|
124
|
-
const
|
|
125
|
-
|
|
352
|
+
const limit = typeof params.limit === "number" ? params.limit : undefined;
|
|
353
|
+
|
|
354
|
+
if (bulk) {
|
|
355
|
+
const bulkScoped = resolveScopedOutput(
|
|
356
|
+
ctx,
|
|
357
|
+
"bulk",
|
|
358
|
+
params.output ? `${params.output}` : undefined,
|
|
359
|
+
webScope,
|
|
360
|
+
);
|
|
361
|
+
const output = bulkScoped.output.endsWith("/bulk")
|
|
362
|
+
? bulkScoped.output
|
|
363
|
+
: `${bulkScoped.artifactDir}/bulk`;
|
|
364
|
+
ensureParentDir(cwd, output);
|
|
365
|
+
const lim = limit ?? 3;
|
|
366
|
+
const argv = ["bulk-scrape", query, "-o", output, "--limit", String(lim)];
|
|
367
|
+
const run = runHarnessWeb(MODULE_URL, argv, cwd);
|
|
368
|
+
if (!run.ok) {
|
|
369
|
+
return failResult(
|
|
370
|
+
`web_search bulk failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}`,
|
|
371
|
+
);
|
|
372
|
+
}
|
|
373
|
+
return okResult(
|
|
374
|
+
`${run.stdout}\n\noutput: ${output}\nartifactDir: ${bulkScoped.artifactDir}`,
|
|
375
|
+
{ output, artifactDir: bulkScoped.artifactDir, query, bulk: true },
|
|
376
|
+
);
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
const basename =
|
|
380
|
+
tier === "deep" || tier === "research" ? "search-deep.json" : "search.json";
|
|
381
|
+
const scoped = resolveScopedOutput(
|
|
382
|
+
ctx,
|
|
383
|
+
basename,
|
|
384
|
+
params.output ? String(params.output) : undefined,
|
|
385
|
+
webScope,
|
|
126
386
|
);
|
|
387
|
+
const output = scoped.output;
|
|
388
|
+
ensureParentDir(cwd, output);
|
|
389
|
+
const { refresh: refreshCache, maxAgeSec } = cacheControlFromParams(params);
|
|
390
|
+
const engine = searchEngineId();
|
|
391
|
+
const resultLimit = limit ?? 10;
|
|
392
|
+
const category = params.category ? String(params.category) : undefined;
|
|
127
393
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
394
|
+
let anglesFile = String(params.anglesFile ?? "").trim();
|
|
395
|
+
if (anglesFile && !anglesFile.startsWith("/") && !anglesFile.includes("..")) {
|
|
396
|
+
anglesFile = resolveScopedOutput(ctx, "angles.yaml", anglesFile, webScope).output;
|
|
397
|
+
}
|
|
398
|
+
if (params.angles?.length && !anglesFile) {
|
|
399
|
+
const inline = resolveScopedOutput(ctx, "angles-inline.yaml", undefined, webScope);
|
|
400
|
+
const tmp = resolve(cwd, inline.output);
|
|
401
|
+
ensureParentDir(cwd, inline.output);
|
|
402
|
+
const yaml =
|
|
403
|
+
`intent: ${JSON.stringify(query)}\nangles:\n` +
|
|
404
|
+
params.angles
|
|
405
|
+
.map(
|
|
406
|
+
(q, i) =>
|
|
407
|
+
` - id: angle_${i + 1}\n query: ${JSON.stringify(q)}`,
|
|
408
|
+
)
|
|
409
|
+
.join("\n") +
|
|
410
|
+
"\n";
|
|
411
|
+
writeFileSync(tmp, yaml, "utf-8");
|
|
412
|
+
anglesFile = inline.output;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
if (
|
|
416
|
+
(tier === "deep" || tier === "research") &&
|
|
417
|
+
!anglesFile &&
|
|
418
|
+
params.expandHeuristic !== true &&
|
|
419
|
+
!params.angles?.length
|
|
420
|
+
) {
|
|
421
|
+
return failResult(
|
|
422
|
+
"web_search tier=deep requires anglesFile (.web/angles.yaml from harness/web-retrieval/web-query-expander) " +
|
|
423
|
+
"or expandHeuristic:true. Invoke web-retrieval skill first.",
|
|
424
|
+
);
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
const anglesFingerprint = anglesFile
|
|
428
|
+
? fingerprintFile(cwd, anglesFile)
|
|
429
|
+
: undefined;
|
|
430
|
+
|
|
431
|
+
const searchCtx: SearchCacheContext = {
|
|
432
|
+
query,
|
|
433
|
+
tier,
|
|
434
|
+
engine,
|
|
435
|
+
limit: resultLimit,
|
|
436
|
+
category,
|
|
437
|
+
expandHeuristic: params.expandHeuristic === true,
|
|
438
|
+
anglesFingerprint,
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
if (!refreshCache) {
|
|
442
|
+
const cached = lookupSearchCache(cwd, searchCtx, { maxAgeSec });
|
|
443
|
+
if (cached.hit && !cached.stale) {
|
|
444
|
+
const workspaceOutput = publishWorkspaceAlias(
|
|
445
|
+
cwd,
|
|
446
|
+
cached.artifactPath,
|
|
447
|
+
basename,
|
|
448
|
+
);
|
|
449
|
+
const parts = [
|
|
450
|
+
`[cache hit] age ${formatCacheAge(cached.ageMs)} · key ${cached.cacheKey}`,
|
|
451
|
+
`cache: ${cached.entryDir}`,
|
|
452
|
+
];
|
|
453
|
+
const summary =
|
|
454
|
+
tier === "deep" || tier === "research"
|
|
455
|
+
? summarizeDeepSearchJson(workspaceOutput, cwd)
|
|
456
|
+
: summarizeSearchJson(workspaceOutput, cwd);
|
|
457
|
+
if (summary) parts.push("", summary);
|
|
458
|
+
parts.push(
|
|
459
|
+
"",
|
|
460
|
+
`output: ${workspaceOutput}`,
|
|
461
|
+
`artifactDir: ${scoped.artifactDir}`,
|
|
462
|
+
`tier: ${tier}`,
|
|
463
|
+
);
|
|
464
|
+
parts.push("Read output JSON; web_fetch top URLs with highlights:true.");
|
|
465
|
+
return okResult(parts.join("\n"), {
|
|
466
|
+
output: workspaceOutput,
|
|
467
|
+
artifactDir: scoped.artifactDir,
|
|
468
|
+
query,
|
|
469
|
+
tier,
|
|
470
|
+
engine,
|
|
471
|
+
cacheHit: true,
|
|
472
|
+
cacheKey: cached.cacheKey,
|
|
473
|
+
cachePath: cached.artifactPath,
|
|
474
|
+
cacheAgeMs: cached.ageMs,
|
|
475
|
+
});
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
let argv: string[];
|
|
480
|
+
if (tier === "deep" || tier === "research") {
|
|
481
|
+
argv = [
|
|
482
|
+
"search-deep",
|
|
483
|
+
query,
|
|
484
|
+
"-o",
|
|
485
|
+
output,
|
|
486
|
+
"--limit",
|
|
487
|
+
String(resultLimit),
|
|
488
|
+
];
|
|
489
|
+
if (anglesFile) {
|
|
490
|
+
argv.push("--angles-file", anglesFile);
|
|
491
|
+
} else if (params.expandHeuristic === true) {
|
|
492
|
+
argv.push("--expand-heuristic");
|
|
493
|
+
}
|
|
494
|
+
if (category) {
|
|
495
|
+
argv.push("--category", category);
|
|
496
|
+
}
|
|
497
|
+
} else {
|
|
498
|
+
argv = [
|
|
499
|
+
"search",
|
|
500
|
+
query,
|
|
501
|
+
"-o",
|
|
502
|
+
output,
|
|
503
|
+
"--tier",
|
|
504
|
+
tier,
|
|
505
|
+
...(limit != null ? ["--limit", String(limit)] : []),
|
|
506
|
+
];
|
|
507
|
+
}
|
|
131
508
|
|
|
132
509
|
const run = runHarnessWeb(MODULE_URL, argv, cwd);
|
|
133
510
|
if (!run.ok) {
|
|
134
511
|
const hint =
|
|
135
512
|
"\n\nHints: run /harness-setup; for searxng set HARNESS_WEB_SEARXNG_URL; " +
|
|
136
|
-
"enable json in SearXNG search.formats.";
|
|
513
|
+
"enable json in SearXNG search.formats; for deep spawn web-query-expander first.";
|
|
137
514
|
return failResult(
|
|
138
515
|
`web_search failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}${hint}`,
|
|
139
516
|
);
|
|
140
517
|
}
|
|
141
518
|
|
|
519
|
+
const cacheWrite = writeSearchCacheEntry(cwd, searchCtx, output, {
|
|
520
|
+
anglesPath: anglesFile,
|
|
521
|
+
});
|
|
522
|
+
publishWorkspaceAlias(cwd, `${cacheWrite.entryDir}/${basename}`, basename);
|
|
523
|
+
|
|
142
524
|
const parts = [run.stdout];
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
525
|
+
const summary =
|
|
526
|
+
tier === "deep" || tier === "research"
|
|
527
|
+
? summarizeDeepSearchJson(output, cwd)
|
|
528
|
+
: summarizeSearchJson(output, cwd);
|
|
529
|
+
if (summary) parts.push("", summary);
|
|
530
|
+
parts.push(
|
|
531
|
+
"",
|
|
532
|
+
`output: ${output}`,
|
|
533
|
+
`artifactDir: ${scoped.artifactDir}`,
|
|
534
|
+
`tier: ${tier}`,
|
|
535
|
+
`cache: ${cacheWrite.entryDir}`,
|
|
536
|
+
);
|
|
537
|
+
parts.push("Read output JSON; web_fetch top URLs with highlights:true.");
|
|
151
538
|
|
|
152
539
|
return okResult(parts.join("\n"), {
|
|
153
540
|
output,
|
|
541
|
+
artifactDir: scoped.artifactDir,
|
|
154
542
|
query,
|
|
155
|
-
|
|
156
|
-
engine
|
|
543
|
+
tier,
|
|
544
|
+
engine,
|
|
545
|
+
cacheHit: false,
|
|
546
|
+
cacheKey: cacheWrite.cacheKey,
|
|
547
|
+
cachePath: `${cacheWrite.entryDir}/${basename}`,
|
|
157
548
|
});
|
|
158
549
|
},
|
|
159
550
|
});
|
|
@@ -162,34 +553,110 @@ export default function harnessWebTools(pi: ExtensionAPI) {
|
|
|
162
553
|
name: "web_fetch",
|
|
163
554
|
label: "Web Fetch",
|
|
164
555
|
description:
|
|
165
|
-
"Fetch
|
|
166
|
-
promptSnippet: "Scrape/map
|
|
556
|
+
"Fetch URL content via Scrapling. Prefer highlights:true after deep search before full markdown.",
|
|
557
|
+
promptSnippet: "Scrape/map; highlights first after deep",
|
|
167
558
|
promptGuidelines: WEB_FETCH_GUIDELINES,
|
|
168
559
|
parameters: WebFetchSchema,
|
|
169
560
|
|
|
170
561
|
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
171
562
|
const cwd = sessionCwd(ctx);
|
|
563
|
+
const webScope = String(params.webScope ?? "").trim() || undefined;
|
|
172
564
|
const url = String(params.url ?? "").trim();
|
|
173
565
|
if (!url) return failResult("web_fetch: url is required.");
|
|
174
566
|
|
|
175
567
|
const mode = params.mode === "map" ? "map" : "scrape";
|
|
176
568
|
const fast = params.fast === true;
|
|
177
569
|
const limit = typeof params.limit === "number" ? params.limit : 100;
|
|
178
|
-
const
|
|
179
|
-
const
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
570
|
+
const basename = mode === "map" ? "map.json" : "page.md";
|
|
571
|
+
const scoped = resolveScopedOutput(
|
|
572
|
+
ctx,
|
|
573
|
+
basename,
|
|
574
|
+
params.output ? String(params.output) : undefined,
|
|
575
|
+
webScope,
|
|
576
|
+
);
|
|
577
|
+
const output = scoped.output;
|
|
578
|
+
ensureParentDir(cwd, output);
|
|
579
|
+
const highlights = params.highlights === true;
|
|
580
|
+
const hlQuery = String(params.highlightQuery ?? "").trim();
|
|
581
|
+
const { refresh: refreshCache, maxAgeSec } = cacheControlFromParams(params);
|
|
582
|
+
|
|
583
|
+
const hlScoped =
|
|
584
|
+
highlights && !params.highlightsOutput
|
|
585
|
+
? resolveScopedOutput(ctx, "highlights.json", undefined, webScope)
|
|
586
|
+
: highlights
|
|
587
|
+
? resolveScopedOutput(
|
|
588
|
+
ctx,
|
|
589
|
+
"highlights.json",
|
|
590
|
+
String(params.highlightsOutput),
|
|
591
|
+
webScope,
|
|
592
|
+
)
|
|
593
|
+
: undefined;
|
|
594
|
+
if (hlScoped) ensureParentDir(cwd, hlScoped.output);
|
|
595
|
+
|
|
596
|
+
const fetchCtx: FetchCacheContext = {
|
|
597
|
+
url,
|
|
598
|
+
mode,
|
|
599
|
+
fast,
|
|
600
|
+
highlightQuery: hlQuery || undefined,
|
|
601
|
+
highlights,
|
|
602
|
+
};
|
|
603
|
+
|
|
604
|
+
if (!refreshCache) {
|
|
605
|
+
const cached = lookupFetchCache(cwd, fetchCtx, { maxAgeSec });
|
|
606
|
+
if (cached.hit && !cached.stale) {
|
|
607
|
+
const workspaceBasename = highlights
|
|
608
|
+
? "highlights.json"
|
|
609
|
+
: mode === "map"
|
|
610
|
+
? "map.json"
|
|
611
|
+
: "page.md";
|
|
612
|
+
const workspaceOutput = publishWorkspaceAlias(
|
|
613
|
+
cwd,
|
|
614
|
+
cached.artifactPath,
|
|
615
|
+
workspaceBasename,
|
|
616
|
+
);
|
|
617
|
+
const parts = [
|
|
618
|
+
`[cache hit] age ${formatCacheAge(cached.ageMs)} · key ${cached.cacheKey}`,
|
|
619
|
+
`cache: ${cached.entryDir}`,
|
|
620
|
+
"",
|
|
621
|
+
`output: ${workspaceOutput}`,
|
|
622
|
+
`artifactDir: ${scoped.artifactDir}`,
|
|
623
|
+
];
|
|
624
|
+
const excerpt = readTextExcerpt(workspaceOutput, cwd);
|
|
625
|
+
if (excerpt) parts.push("", "--- excerpt ---", excerpt);
|
|
626
|
+
return okResult(parts.join("\n"), {
|
|
627
|
+
output: workspaceOutput,
|
|
628
|
+
artifactDir: scoped.artifactDir,
|
|
629
|
+
url,
|
|
630
|
+
mode,
|
|
631
|
+
highlights,
|
|
632
|
+
cacheHit: true,
|
|
633
|
+
cacheKey: cached.cacheKey,
|
|
634
|
+
cachePath: cached.artifactPath,
|
|
635
|
+
});
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
let argv: string[];
|
|
640
|
+
if (mode === "map") {
|
|
641
|
+
argv = [
|
|
642
|
+
"map",
|
|
643
|
+
url,
|
|
644
|
+
"-o",
|
|
645
|
+
output,
|
|
646
|
+
"--limit",
|
|
647
|
+
String(limit),
|
|
648
|
+
...(fast ? ["--fast"] : []),
|
|
649
|
+
];
|
|
650
|
+
} else {
|
|
651
|
+
argv = ["scrape", url, "-o", output, ...(fast ? ["--fast"] : [])];
|
|
652
|
+
if (highlights) {
|
|
653
|
+
if (!hlQuery) {
|
|
654
|
+
return failResult("web_fetch: highlightQuery required when highlights=true");
|
|
655
|
+
}
|
|
656
|
+
argv.push("--highlights", "--highlight-query", hlQuery);
|
|
657
|
+
if (hlScoped) argv.push("--highlights-output", hlScoped.output);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
193
660
|
|
|
194
661
|
const run = runHarnessWeb(MODULE_URL, argv, cwd);
|
|
195
662
|
if (!run.ok) {
|
|
@@ -199,13 +666,184 @@ export default function harnessWebTools(pi: ExtensionAPI) {
|
|
|
199
666
|
);
|
|
200
667
|
}
|
|
201
668
|
|
|
202
|
-
const
|
|
669
|
+
const cacheArtifact = highlights && hlScoped ? hlScoped.output : output;
|
|
670
|
+
const cacheWrite = writeFetchCacheEntry(cwd, fetchCtx, cacheArtifact, {
|
|
671
|
+
highlightsPath:
|
|
672
|
+
highlights && hlScoped && hlScoped.output !== cacheArtifact
|
|
673
|
+
? hlScoped.output
|
|
674
|
+
: undefined,
|
|
675
|
+
});
|
|
676
|
+
const workspaceBasename = highlights
|
|
677
|
+
? "highlights.json"
|
|
678
|
+
: mode === "map"
|
|
679
|
+
? "map.json"
|
|
680
|
+
: "page.md";
|
|
681
|
+
publishWorkspaceAlias(cwd, `${cacheWrite.entryDir}/${workspaceBasename}`, workspaceBasename);
|
|
682
|
+
|
|
683
|
+
const parts = [
|
|
684
|
+
run.stdout,
|
|
685
|
+
"",
|
|
686
|
+
`output: ${output}`,
|
|
687
|
+
`artifactDir: ${scoped.artifactDir}`,
|
|
688
|
+
`cache: ${cacheWrite.entryDir}`,
|
|
689
|
+
];
|
|
203
690
|
const excerpt = readTextExcerpt(output, cwd);
|
|
204
|
-
if (excerpt)
|
|
205
|
-
|
|
691
|
+
if (excerpt) parts.push("", "--- excerpt ---", excerpt);
|
|
692
|
+
|
|
693
|
+
return okResult(parts.join("\n"), {
|
|
694
|
+
output,
|
|
695
|
+
artifactDir: scoped.artifactDir,
|
|
696
|
+
url,
|
|
697
|
+
mode,
|
|
698
|
+
highlights,
|
|
699
|
+
cacheHit: false,
|
|
700
|
+
cacheKey: cacheWrite.cacheKey,
|
|
701
|
+
cachePath: `${cacheWrite.entryDir}/${workspaceBasename}`,
|
|
702
|
+
});
|
|
703
|
+
},
|
|
704
|
+
});
|
|
705
|
+
|
|
706
|
+
pi.registerTool({
|
|
707
|
+
name: "web_find_similar",
|
|
708
|
+
label: "Web Find Similar",
|
|
709
|
+
description:
|
|
710
|
+
"Find pages similar to a seed URL (Exa findSimilar analog). Outputs fused search-deep.json.",
|
|
711
|
+
promptSnippet: "Similar pages from seed URL",
|
|
712
|
+
promptGuidelines: WEB_FIND_SIMILAR_GUIDELINES,
|
|
713
|
+
parameters: WebFindSimilarSchema,
|
|
714
|
+
|
|
715
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
716
|
+
const cwd = sessionCwd(ctx);
|
|
717
|
+
const webScope = String(params.webScope ?? "").trim() || undefined;
|
|
718
|
+
const url = String(params.url ?? "").trim();
|
|
719
|
+
if (!url) return failResult("web_find_similar: url is required.");
|
|
720
|
+
|
|
721
|
+
const scoped = resolveScopedOutput(
|
|
722
|
+
ctx,
|
|
723
|
+
"search-deep.json",
|
|
724
|
+
params.output ? String(params.output) : undefined,
|
|
725
|
+
webScope,
|
|
726
|
+
);
|
|
727
|
+
const output = scoped.output;
|
|
728
|
+
ensureParentDir(cwd, output);
|
|
729
|
+
const limit = typeof params.limit === "number" ? params.limit : 10;
|
|
730
|
+
const argv = [
|
|
731
|
+
"find-similar",
|
|
732
|
+
url,
|
|
733
|
+
"-o",
|
|
734
|
+
output,
|
|
735
|
+
"--limit",
|
|
736
|
+
String(limit),
|
|
737
|
+
...(params.fast !== false ? ["--fast"] : []),
|
|
738
|
+
];
|
|
739
|
+
|
|
740
|
+
const run = runHarnessWeb(MODULE_URL, argv, cwd);
|
|
741
|
+
if (!run.ok) {
|
|
742
|
+
return failResult(
|
|
743
|
+
`web_find_similar failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}`,
|
|
744
|
+
);
|
|
206
745
|
}
|
|
207
746
|
|
|
208
|
-
|
|
747
|
+
const parts = [run.stdout];
|
|
748
|
+
const summary = summarizeDeepSearchJson(output, cwd);
|
|
749
|
+
if (summary) parts.push("", summary);
|
|
750
|
+
parts.push("", `output: ${output}`, `artifactDir: ${scoped.artifactDir}`);
|
|
751
|
+
|
|
752
|
+
return okResult(parts.join("\n"), {
|
|
753
|
+
output,
|
|
754
|
+
artifactDir: scoped.artifactDir,
|
|
755
|
+
url,
|
|
756
|
+
});
|
|
757
|
+
},
|
|
758
|
+
});
|
|
759
|
+
|
|
760
|
+
pi.registerTool({
|
|
761
|
+
name: "web_contents",
|
|
762
|
+
label: "Web Contents Batch",
|
|
763
|
+
description:
|
|
764
|
+
"Batch-fetch URLs from search-deep.json into markdown (+ optional highlights). Builds evidence bundle.",
|
|
765
|
+
promptSnippet: "Batch fetch after deep search",
|
|
766
|
+
promptGuidelines: WEB_CONTENTS_GUIDELINES,
|
|
767
|
+
parameters: WebContentsSchema,
|
|
768
|
+
|
|
769
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
770
|
+
const cwd = sessionCwd(ctx);
|
|
771
|
+
const webScope = String(params.webScope ?? "").trim() || undefined;
|
|
772
|
+
const dirScoped = resolveScopedOutput(
|
|
773
|
+
ctx,
|
|
774
|
+
"contents",
|
|
775
|
+
params.outputDir ? String(params.outputDir) : undefined,
|
|
776
|
+
webScope,
|
|
777
|
+
);
|
|
778
|
+
const outputDir = dirScoped.output.endsWith("/contents")
|
|
779
|
+
? dirScoped.output
|
|
780
|
+
: `${dirScoped.artifactDir}/contents`;
|
|
781
|
+
mkdirSync(resolve(cwd, outputDir), { recursive: true });
|
|
782
|
+
let fromSearch = String(params.fromSearch ?? "").trim();
|
|
783
|
+
if (fromSearch && !fromSearch.startsWith("/") && !fromSearch.includes("..")) {
|
|
784
|
+
fromSearch = resolveScopedOutput(
|
|
785
|
+
ctx,
|
|
786
|
+
"search-deep.json",
|
|
787
|
+
fromSearch,
|
|
788
|
+
webScope,
|
|
789
|
+
).output;
|
|
790
|
+
}
|
|
791
|
+
const urls = (params.urls ?? []).map((u) => String(u).trim()).filter(Boolean);
|
|
792
|
+
const limit = typeof params.limit === "number" ? params.limit : 5;
|
|
793
|
+
const hlQuery = String(params.highlightQuery ?? "").trim();
|
|
794
|
+
|
|
795
|
+
const argv = [
|
|
796
|
+
"contents-batch",
|
|
797
|
+
"-o",
|
|
798
|
+
outputDir,
|
|
799
|
+
"--limit",
|
|
800
|
+
String(limit),
|
|
801
|
+
...(params.fast ? ["--fast"] : []),
|
|
802
|
+
...(params.highlights && hlQuery
|
|
803
|
+
? ["--highlights", "--highlight-query", hlQuery]
|
|
804
|
+
: []),
|
|
805
|
+
...urls,
|
|
806
|
+
];
|
|
807
|
+
if (fromSearch) {
|
|
808
|
+
argv.splice(1, 0, "--from-search", fromSearch);
|
|
809
|
+
}
|
|
810
|
+
let evidencePath: string | undefined;
|
|
811
|
+
if (params.evidenceBundle && fromSearch) {
|
|
812
|
+
const bundleArg = String(params.evidenceBundle);
|
|
813
|
+
evidencePath =
|
|
814
|
+
bundleArg.startsWith("/") || bundleArg.includes("..")
|
|
815
|
+
? bundleArg
|
|
816
|
+
: resolveScopedOutput(
|
|
817
|
+
ctx,
|
|
818
|
+
"evidence-bundle.json",
|
|
819
|
+
bundleArg,
|
|
820
|
+
webScope,
|
|
821
|
+
).output;
|
|
822
|
+
ensureParentDir(cwd, evidencePath);
|
|
823
|
+
argv.push("--evidence-bundle", evidencePath);
|
|
824
|
+
}
|
|
825
|
+
|
|
826
|
+
if (!fromSearch && !urls.length) {
|
|
827
|
+
return failResult("web_contents: provide urls or fromSearch");
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
const run = runHarnessWeb(MODULE_URL, argv, cwd);
|
|
831
|
+
if (!run.ok) {
|
|
832
|
+
return failResult(
|
|
833
|
+
`web_contents failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}`,
|
|
834
|
+
);
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
return okResult(
|
|
838
|
+
`${run.stdout}\n\noutputDir: ${outputDir}\nartifactDir: ${dirScoped.artifactDir}` +
|
|
839
|
+
(evidencePath ? `\nevidence: ${evidencePath}` : ""),
|
|
840
|
+
{
|
|
841
|
+
outputDir,
|
|
842
|
+
artifactDir: dirScoped.artifactDir,
|
|
843
|
+
fromSearch,
|
|
844
|
+
evidenceBundle: evidencePath,
|
|
845
|
+
},
|
|
846
|
+
);
|
|
209
847
|
},
|
|
210
848
|
});
|
|
211
849
|
}
|