ultimate-pi 0.19.0 → 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/web-retrieval/SKILL.md +163 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
- package/.pi/SYSTEM.md +30 -12
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/agents/harness/planning/stack-researcher.md +5 -1
- package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
- package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
- package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
- package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
- package/.pi/extensions/harness-web-guard.ts +2 -1
- package/.pi/extensions/harness-web-tools.ts +689 -51
- package/.pi/harness/agents.manifest.json +29 -5
- package/.pi/harness/agents.policy.yaml +34 -0
- package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
- package/.pi/harness/docs/harness-web-search.md +97 -0
- package/.pi/harness/env.harness.template +9 -1
- package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
- package/.pi/harness/web-heuristic-angles.json +278 -0
- package/.pi/harness/web-heuristic-angles.yaml +182 -0
- package/.pi/lib/agents-policy.mjs +6 -0
- package/.pi/lib/harness-subagent-auth.ts +39 -9
- package/.pi/lib/harness-subagents-bridge.ts +21 -0
- package/.pi/lib/harness-web/artifacts.ts +200 -0
- package/.pi/lib/harness-web/cache.ts +369 -0
- package/.pi/lib/harness-web/run-cli.ts +42 -2
- package/.pi/prompts/harness-plan.md +1 -0
- package/.pi/prompts/harness-setup.md +3 -1
- package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
- package/.pi/scripts/harness-cli-verify.sh +5 -0
- package/.pi/scripts/harness-verify.mjs +78 -0
- package/.pi/scripts/harness-web-policy-guard.mjs +1 -1
- package/.pi/scripts/harness-web.py +218 -15
- package/.pi/scripts/harness_web/deep_search.py +55 -0
- package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
- package/.pi/scripts/harness_web/find_similar.py +88 -0
- package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
- package/.pi/scripts/harness_web/heuristic_config.py +251 -0
- package/.pi/scripts/harness_web/highlights.py +47 -0
- package/.pi/scripts/harness_web/multi_search.py +59 -0
- package/.pi/scripts/harness_web/output.py +24 -0
- package/.pi/scripts/harness_web/query_angles.py +116 -0
- package/.pi/scripts/harness_web/rank.py +163 -0
- package/.pi/scripts/harness_web/scrape.py +30 -0
- package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
- package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
- package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +6 -0
- package/package.json +5 -3
- package/.agents/skills/scrapling-web/SKILL.md +0 -98
- package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WRS local cache — pooled `.web/cache/` entries with freshness + search context.
|
|
3
|
+
* Workspace aliases (`.web/search-deep.json`, …) are copies/links for agent ergonomics.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
copyFileSync,
|
|
8
|
+
existsSync,
|
|
9
|
+
mkdirSync,
|
|
10
|
+
readFileSync,
|
|
11
|
+
statSync,
|
|
12
|
+
writeFileSync,
|
|
13
|
+
} from "node:fs";
|
|
14
|
+
import { createHash } from "node:crypto";
|
|
15
|
+
import { dirname, resolve } from "node:path";
|
|
16
|
+
|
|
17
|
+
export const WEB_ROOT = ".web";
|
|
18
|
+
export const WEB_CACHE_ROOT = `${WEB_ROOT}/cache`;
|
|
19
|
+
|
|
20
|
+
export type WebCacheKind =
|
|
21
|
+
| "search"
|
|
22
|
+
| "search-deep"
|
|
23
|
+
| "fetch-page"
|
|
24
|
+
| "fetch-map"
|
|
25
|
+
| "fetch-highlights";
|
|
26
|
+
|
|
27
|
+
export interface WebCacheMeta {
|
|
28
|
+
version: 1;
|
|
29
|
+
kind: WebCacheKind;
|
|
30
|
+
cacheKey: string;
|
|
31
|
+
createdAt: string;
|
|
32
|
+
expiresAt: string;
|
|
33
|
+
ttlSeconds: number;
|
|
34
|
+
context: Record<string, unknown>;
|
|
35
|
+
artifact: string;
|
|
36
|
+
hitCount: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export interface SearchCacheContext {
|
|
40
|
+
query: string;
|
|
41
|
+
tier: string;
|
|
42
|
+
engine: string;
|
|
43
|
+
limit: number;
|
|
44
|
+
category?: string;
|
|
45
|
+
expandHeuristic?: boolean;
|
|
46
|
+
anglesFingerprint?: string;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface FetchCacheContext {
|
|
50
|
+
url: string;
|
|
51
|
+
mode: "scrape" | "map";
|
|
52
|
+
fast: boolean;
|
|
53
|
+
highlightQuery?: string;
|
|
54
|
+
highlights: boolean;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export interface CacheLookupResult {
|
|
58
|
+
hit: boolean;
|
|
59
|
+
stale: boolean;
|
|
60
|
+
cacheKey: string;
|
|
61
|
+
entryDir: string;
|
|
62
|
+
artifactPath: string;
|
|
63
|
+
metaPath: string;
|
|
64
|
+
meta?: WebCacheMeta;
|
|
65
|
+
ageMs?: number;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function shaKey(payload: unknown): string {
|
|
69
|
+
return createHash("sha256")
|
|
70
|
+
.update(JSON.stringify(payload))
|
|
71
|
+
.digest("hex")
|
|
72
|
+
.slice(0, 20);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function cacheEnabled(): boolean {
|
|
76
|
+
const raw = process.env.HARNESS_WEB_CACHE?.trim();
|
|
77
|
+
if (raw === "0" || raw?.toLowerCase() === "false") return false;
|
|
78
|
+
return true;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function defaultCacheTtlSeconds(): number {
|
|
82
|
+
const raw = process.env.HARNESS_WEB_CACHE_TTL_SEC?.trim();
|
|
83
|
+
if (raw) {
|
|
84
|
+
const n = Number.parseInt(raw, 10);
|
|
85
|
+
if (Number.isFinite(n) && n > 0) return n;
|
|
86
|
+
}
|
|
87
|
+
return 86_400; // 24h
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function fingerprintFile(projectRoot: string, relPath: string): string | undefined {
|
|
91
|
+
const full = resolve(projectRoot, relPath);
|
|
92
|
+
if (!existsSync(full)) return undefined;
|
|
93
|
+
const text = readFileSync(full, "utf-8");
|
|
94
|
+
return createHash("sha256").update(text).digest("hex").slice(0, 16);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function searchCacheKey(ctx: SearchCacheContext): string {
|
|
98
|
+
return shaKey({
|
|
99
|
+
v: 1,
|
|
100
|
+
kind: ctx.tier === "deep" || ctx.tier === "research" ? "search-deep" : "search",
|
|
101
|
+
query: ctx.query.trim().toLowerCase(),
|
|
102
|
+
tier: ctx.tier,
|
|
103
|
+
engine: ctx.engine,
|
|
104
|
+
limit: ctx.limit,
|
|
105
|
+
category: ctx.category?.trim().toLowerCase() || null,
|
|
106
|
+
expandHeuristic: Boolean(ctx.expandHeuristic),
|
|
107
|
+
angles: ctx.anglesFingerprint || null,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
export function fetchCacheKey(ctx: FetchCacheContext): string {
|
|
112
|
+
return shaKey({
|
|
113
|
+
v: 1,
|
|
114
|
+
kind: ctx.highlights ? "fetch-highlights" : ctx.mode === "map" ? "fetch-map" : "fetch-page",
|
|
115
|
+
url: ctx.url.trim(),
|
|
116
|
+
mode: ctx.mode,
|
|
117
|
+
fast: ctx.fast,
|
|
118
|
+
highlightQuery: ctx.highlightQuery?.trim() || null,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function cacheKindFromTier(tier: string): WebCacheKind {
|
|
123
|
+
return tier === "deep" || tier === "research" ? "search-deep" : "search";
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function entryDir(projectRoot: string, kind: WebCacheKind, cacheKey: string): string {
|
|
127
|
+
return `${WEB_CACHE_ROOT}/${kind}/${cacheKey}`;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function readMeta(metaPath: string): WebCacheMeta | undefined {
|
|
131
|
+
if (!existsSync(metaPath)) return undefined;
|
|
132
|
+
try {
|
|
133
|
+
const data = JSON.parse(readFileSync(metaPath, "utf-8")) as WebCacheMeta;
|
|
134
|
+
if (data?.version !== 1) return undefined;
|
|
135
|
+
return data;
|
|
136
|
+
} catch {
|
|
137
|
+
return undefined;
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export function isCacheFresh(
|
|
142
|
+
meta: WebCacheMeta,
|
|
143
|
+
options?: { maxAgeSec?: number; nowMs?: number },
|
|
144
|
+
): boolean {
|
|
145
|
+
const now = options?.nowMs ?? Date.now();
|
|
146
|
+
const expiresAt = Date.parse(meta.expiresAt);
|
|
147
|
+
if (Number.isFinite(expiresAt) && now > expiresAt) return false;
|
|
148
|
+
if (options?.maxAgeSec != null && options.maxAgeSec > 0) {
|
|
149
|
+
const createdAt = Date.parse(meta.createdAt);
|
|
150
|
+
if (Number.isFinite(createdAt) && now - createdAt > options.maxAgeSec * 1000) {
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return true;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function bumpHitCount(metaPath: string, meta: WebCacheMeta): void {
|
|
158
|
+
const next: WebCacheMeta = { ...meta, hitCount: (meta.hitCount ?? 0) + 1 };
|
|
159
|
+
writeFileSync(metaPath, `${JSON.stringify(next, null, 2)}\n`, "utf-8");
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
export function lookupSearchCache(
|
|
163
|
+
projectRoot: string,
|
|
164
|
+
ctx: SearchCacheContext,
|
|
165
|
+
options?: { maxAgeSec?: number; ttlSeconds?: number },
|
|
166
|
+
): CacheLookupResult {
|
|
167
|
+
const cacheKey = searchCacheKey(ctx);
|
|
168
|
+
const kind = cacheKindFromTier(ctx.tier);
|
|
169
|
+
const relDir = entryDir(projectRoot, kind, cacheKey);
|
|
170
|
+
const absDir = resolve(projectRoot, relDir);
|
|
171
|
+
const artifactName = kind === "search-deep" ? "search-deep.json" : "search.json";
|
|
172
|
+
const artifactPath = resolve(absDir, artifactName);
|
|
173
|
+
const metaPath = resolve(absDir, "meta.json");
|
|
174
|
+
const base: CacheLookupResult = {
|
|
175
|
+
hit: false,
|
|
176
|
+
stale: false,
|
|
177
|
+
cacheKey,
|
|
178
|
+
entryDir: relDir,
|
|
179
|
+
artifactPath: `${relDir}/${artifactName}`,
|
|
180
|
+
metaPath: `${relDir}/meta.json`,
|
|
181
|
+
};
|
|
182
|
+
if (!cacheEnabled() || !existsSync(artifactPath)) return base;
|
|
183
|
+
const meta = readMeta(metaPath);
|
|
184
|
+
if (!meta) return { ...base, hit: true, stale: true };
|
|
185
|
+
const fresh = isCacheFresh(meta, {
|
|
186
|
+
maxAgeSec: options?.maxAgeSec,
|
|
187
|
+
});
|
|
188
|
+
const createdAt = Date.parse(meta.createdAt);
|
|
189
|
+
const ageMs = Number.isFinite(createdAt) ? Date.now() - createdAt : undefined;
|
|
190
|
+
if (fresh) bumpHitCount(metaPath, meta);
|
|
191
|
+
return {
|
|
192
|
+
...base,
|
|
193
|
+
hit: true,
|
|
194
|
+
stale: !fresh,
|
|
195
|
+
meta,
|
|
196
|
+
ageMs,
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
export function lookupFetchCache(
|
|
201
|
+
projectRoot: string,
|
|
202
|
+
ctx: FetchCacheContext,
|
|
203
|
+
options?: { maxAgeSec?: number },
|
|
204
|
+
): CacheLookupResult {
|
|
205
|
+
const cacheKey = fetchCacheKey(ctx);
|
|
206
|
+
const kind: WebCacheKind = ctx.highlights
|
|
207
|
+
? "fetch-highlights"
|
|
208
|
+
: ctx.mode === "map"
|
|
209
|
+
? "fetch-map"
|
|
210
|
+
: "fetch-page";
|
|
211
|
+
const relDir = entryDir(projectRoot, kind, cacheKey);
|
|
212
|
+
const absDir = resolve(projectRoot, relDir);
|
|
213
|
+
const artifactName = ctx.highlights
|
|
214
|
+
? "highlights.json"
|
|
215
|
+
: ctx.mode === "map"
|
|
216
|
+
? "map.json"
|
|
217
|
+
: "page.md";
|
|
218
|
+
const artifactPath = resolve(absDir, artifactName);
|
|
219
|
+
const metaPath = resolve(absDir, "meta.json");
|
|
220
|
+
const base: CacheLookupResult = {
|
|
221
|
+
hit: false,
|
|
222
|
+
stale: false,
|
|
223
|
+
cacheKey,
|
|
224
|
+
entryDir: relDir,
|
|
225
|
+
artifactPath: `${relDir}/${artifactName}`,
|
|
226
|
+
metaPath: `${relDir}/meta.json`,
|
|
227
|
+
};
|
|
228
|
+
if (!cacheEnabled() || !existsSync(artifactPath)) return base;
|
|
229
|
+
const meta = readMeta(metaPath);
|
|
230
|
+
if (!meta) return { ...base, hit: true, stale: true };
|
|
231
|
+
const fresh = isCacheFresh(meta, { maxAgeSec: options?.maxAgeSec });
|
|
232
|
+
const createdAt = Date.parse(meta.createdAt);
|
|
233
|
+
const ageMs = Number.isFinite(createdAt) ? Date.now() - createdAt : undefined;
|
|
234
|
+
if (fresh) bumpHitCount(metaPath, meta);
|
|
235
|
+
return {
|
|
236
|
+
...base,
|
|
237
|
+
hit: true,
|
|
238
|
+
stale: !fresh,
|
|
239
|
+
meta,
|
|
240
|
+
ageMs,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
export function writeSearchCacheEntry(
|
|
245
|
+
projectRoot: string,
|
|
246
|
+
ctx: SearchCacheContext,
|
|
247
|
+
sourceArtifactPath: string,
|
|
248
|
+
options?: { anglesPath?: string; ttlSeconds?: number },
|
|
249
|
+
): { cacheKey: string; entryDir: string; metaPath: string } {
|
|
250
|
+
const cacheKey = searchCacheKey(ctx);
|
|
251
|
+
const kind = cacheKindFromTier(ctx.tier);
|
|
252
|
+
const relDir = entryDir(projectRoot, kind, cacheKey);
|
|
253
|
+
const absDir = resolve(projectRoot, relDir);
|
|
254
|
+
mkdirSync(absDir, { recursive: true });
|
|
255
|
+
const artifactName = kind === "search-deep" ? "search-deep.json" : "search.json";
|
|
256
|
+
const destArtifact = resolve(absDir, artifactName);
|
|
257
|
+
copyFileSync(resolve(projectRoot, sourceArtifactPath), destArtifact);
|
|
258
|
+
if (options?.anglesPath && existsSync(resolve(projectRoot, options.anglesPath))) {
|
|
259
|
+
copyFileSync(resolve(projectRoot, options.anglesPath), resolve(absDir, "angles.yaml"));
|
|
260
|
+
}
|
|
261
|
+
const ttl = options?.ttlSeconds ?? defaultCacheTtlSeconds();
|
|
262
|
+
const now = new Date();
|
|
263
|
+
const expires = new Date(now.getTime() + ttl * 1000);
|
|
264
|
+
const meta: WebCacheMeta = {
|
|
265
|
+
version: 1,
|
|
266
|
+
kind,
|
|
267
|
+
cacheKey,
|
|
268
|
+
createdAt: now.toISOString(),
|
|
269
|
+
expiresAt: expires.toISOString(),
|
|
270
|
+
ttlSeconds: ttl,
|
|
271
|
+
context: {
|
|
272
|
+
query: ctx.query,
|
|
273
|
+
tier: ctx.tier,
|
|
274
|
+
engine: ctx.engine,
|
|
275
|
+
limit: ctx.limit,
|
|
276
|
+
category: ctx.category ?? null,
|
|
277
|
+
expandHeuristic: Boolean(ctx.expandHeuristic),
|
|
278
|
+
anglesFingerprint: ctx.anglesFingerprint ?? null,
|
|
279
|
+
},
|
|
280
|
+
artifact: artifactName,
|
|
281
|
+
hitCount: 0,
|
|
282
|
+
};
|
|
283
|
+
const metaPath = resolve(absDir, "meta.json");
|
|
284
|
+
writeFileSync(metaPath, `${JSON.stringify(meta, null, 2)}\n`, "utf-8");
|
|
285
|
+
return { cacheKey, entryDir: relDir, metaPath: `${relDir}/meta.json` };
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
export function writeFetchCacheEntry(
|
|
289
|
+
projectRoot: string,
|
|
290
|
+
ctx: FetchCacheContext,
|
|
291
|
+
sourceArtifactPath: string,
|
|
292
|
+
extra?: { highlightsPath?: string },
|
|
293
|
+
): { cacheKey: string; entryDir: string } {
|
|
294
|
+
const cacheKey = fetchCacheKey(ctx);
|
|
295
|
+
const kind: WebCacheKind = ctx.highlights
|
|
296
|
+
? "fetch-highlights"
|
|
297
|
+
: ctx.mode === "map"
|
|
298
|
+
? "fetch-map"
|
|
299
|
+
: "fetch-page";
|
|
300
|
+
const relDir = entryDir(projectRoot, kind, cacheKey);
|
|
301
|
+
const absDir = resolve(projectRoot, relDir);
|
|
302
|
+
mkdirSync(absDir, { recursive: true });
|
|
303
|
+
const artifactName = ctx.highlights
|
|
304
|
+
? "highlights.json"
|
|
305
|
+
: ctx.mode === "map"
|
|
306
|
+
? "map.json"
|
|
307
|
+
: "page.md";
|
|
308
|
+
copyFileSync(resolve(projectRoot, sourceArtifactPath), resolve(absDir, artifactName));
|
|
309
|
+
if (extra?.highlightsPath && existsSync(resolve(projectRoot, extra.highlightsPath))) {
|
|
310
|
+
copyFileSync(
|
|
311
|
+
resolve(projectRoot, extra.highlightsPath),
|
|
312
|
+
resolve(absDir, "highlights.json"),
|
|
313
|
+
);
|
|
314
|
+
}
|
|
315
|
+
const ttl = defaultCacheTtlSeconds();
|
|
316
|
+
const now = new Date();
|
|
317
|
+
const expires = new Date(now.getTime() + ttl * 1000);
|
|
318
|
+
const meta: WebCacheMeta = {
|
|
319
|
+
version: 1,
|
|
320
|
+
kind,
|
|
321
|
+
cacheKey,
|
|
322
|
+
createdAt: now.toISOString(),
|
|
323
|
+
expiresAt: expires.toISOString(),
|
|
324
|
+
ttlSeconds: ttl,
|
|
325
|
+
context: {
|
|
326
|
+
url: ctx.url,
|
|
327
|
+
mode: ctx.mode,
|
|
328
|
+
fast: ctx.fast,
|
|
329
|
+
highlightQuery: ctx.highlightQuery ?? null,
|
|
330
|
+
highlights: ctx.highlights,
|
|
331
|
+
},
|
|
332
|
+
artifact: artifactName,
|
|
333
|
+
hitCount: 0,
|
|
334
|
+
};
|
|
335
|
+
writeFileSync(
|
|
336
|
+
resolve(absDir, "meta.json"),
|
|
337
|
+
`${JSON.stringify(meta, null, 2)}\n`,
|
|
338
|
+
"utf-8",
|
|
339
|
+
);
|
|
340
|
+
return { cacheKey, entryDir: relDir };
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/** Copy cached artifact to a stable workspace path for agents (`.web/search-deep.json`, …). */
|
|
344
|
+
export function publishWorkspaceAlias(
|
|
345
|
+
projectRoot: string,
|
|
346
|
+
cacheArtifactPath: string,
|
|
347
|
+
workspaceBasename: string,
|
|
348
|
+
): string {
|
|
349
|
+
const workspacePath = `${WEB_ROOT}/${workspaceBasename}`;
|
|
350
|
+
const dest = resolve(projectRoot, workspacePath);
|
|
351
|
+
mkdirSync(dirname(dest), { recursive: true });
|
|
352
|
+
copyFileSync(resolve(projectRoot, cacheArtifactPath), dest);
|
|
353
|
+
return workspacePath;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
export function formatCacheAge(ageMs: number | undefined): string {
|
|
357
|
+
if (ageMs == null || ageMs < 0) return "unknown";
|
|
358
|
+
if (ageMs < 60_000) return `${Math.round(ageMs / 1000)}s`;
|
|
359
|
+
if (ageMs < 3_600_000) return `${Math.round(ageMs / 60_000)}m`;
|
|
360
|
+
return `${(ageMs / 3_600_000).toFixed(1)}h`;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
export function webCacheHint(): string {
|
|
364
|
+
return (
|
|
365
|
+
`[WRS cache] Pooled under ${WEB_CACHE_ROOT}/ with TTL (HARNESS_WEB_CACHE_TTL_SEC, default 24h). ` +
|
|
366
|
+
`Workspace aliases: ${WEB_ROOT}/search-deep.json, ${WEB_ROOT}/angles.yaml, ${WEB_ROOT}/page.md. ` +
|
|
367
|
+
`Use refreshCache:true to bypass. Same query+angles reuses SERP without network.`
|
|
368
|
+
);
|
|
369
|
+
}
|
|
@@ -46,6 +46,8 @@ export interface SearchHit {
|
|
|
46
46
|
url: string;
|
|
47
47
|
title: string;
|
|
48
48
|
description: string;
|
|
49
|
+
score?: number;
|
|
50
|
+
angle_ids?: string[];
|
|
49
51
|
}
|
|
50
52
|
|
|
51
53
|
export function summarizeSearchJson(filePath: string, cwd: string): string {
|
|
@@ -55,11 +57,15 @@ export function summarizeSearchJson(filePath: string, cwd: string): string {
|
|
|
55
57
|
const data = JSON.parse(readFileSync(full, "utf-8")) as {
|
|
56
58
|
query?: string;
|
|
57
59
|
engine?: string;
|
|
60
|
+
tier?: string;
|
|
61
|
+
mode?: string;
|
|
58
62
|
data?: { web?: SearchHit[] };
|
|
59
63
|
};
|
|
60
64
|
const hits = data.data?.web ?? [];
|
|
65
|
+
const tier = data.tier ?? data.mode ?? "standard";
|
|
61
66
|
const lines = [
|
|
62
67
|
`engine: ${data.engine ?? "unknown"}`,
|
|
68
|
+
`tier: ${tier}`,
|
|
63
69
|
`query: ${data.query ?? ""}`,
|
|
64
70
|
`results: ${hits.length}`,
|
|
65
71
|
"",
|
|
@@ -67,6 +73,12 @@ export function summarizeSearchJson(filePath: string, cwd: string): string {
|
|
|
67
73
|
for (const [i, hit] of hits.entries()) {
|
|
68
74
|
lines.push(`${i + 1}. ${hit.title || "(no title)"}`);
|
|
69
75
|
lines.push(` ${hit.url}`);
|
|
76
|
+
if (hit.score != null) {
|
|
77
|
+
lines.push(` score: ${hit.score}`);
|
|
78
|
+
}
|
|
79
|
+
if (hit.angle_ids?.length) {
|
|
80
|
+
lines.push(` angles: ${hit.angle_ids.join(", ")}`);
|
|
81
|
+
}
|
|
70
82
|
if (hit.description) {
|
|
71
83
|
const snip =
|
|
72
84
|
hit.description.length > 120
|
|
@@ -81,12 +93,40 @@ export function summarizeSearchJson(filePath: string, cwd: string): string {
|
|
|
81
93
|
}
|
|
82
94
|
}
|
|
83
95
|
|
|
96
|
+
export function summarizeDeepSearchJson(filePath: string, cwd: string): string {
|
|
97
|
+
const full = resolve(cwd, filePath);
|
|
98
|
+
if (!existsSync(full)) return "";
|
|
99
|
+
try {
|
|
100
|
+
const data = JSON.parse(readFileSync(full, "utf-8")) as {
|
|
101
|
+
query?: string;
|
|
102
|
+
angles?: Array<{ id: string; query: string }>;
|
|
103
|
+
data?: { web?: SearchHit[] };
|
|
104
|
+
};
|
|
105
|
+
const lines = [
|
|
106
|
+
summarizeSearchJson(filePath, cwd),
|
|
107
|
+
"",
|
|
108
|
+
`angles: ${data.angles?.length ?? 0}`,
|
|
109
|
+
];
|
|
110
|
+
for (const a of data.angles ?? []) {
|
|
111
|
+
lines.push(` - ${a.id}: ${a.query}`);
|
|
112
|
+
}
|
|
113
|
+
lines.push("");
|
|
114
|
+
lines.push("Prefer URLs with multiple angle_ids. Use web_fetch highlights on top 3.");
|
|
115
|
+
return lines.join("\n");
|
|
116
|
+
} catch {
|
|
117
|
+
return summarizeSearchJson(filePath, cwd);
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
84
121
|
export function harnessWebContextLine(): string {
|
|
85
122
|
const engine = process.env.HARNESS_WEB_SEARCH_ENGINE?.trim() || "ddg_html";
|
|
86
123
|
const searx = process.env.HARNESS_WEB_SEARXNG_URL?.trim();
|
|
87
124
|
const searxPart = searx ? ` searxng_url=${searx}` : "";
|
|
88
125
|
return (
|
|
89
|
-
`[HarnessWeb]
|
|
90
|
-
"
|
|
126
|
+
`[HarnessWeb] engine=${engine}${searxPart} | research: tier=deep + web-query-expander | ` +
|
|
127
|
+
"latency: tier=instant|standard or web-query-expander-fast | " +
|
|
128
|
+
"artifacts: .web/runs/<run_id>/ or .web/sessions/<session_id>/ (not flat .web/answer.md) | " +
|
|
129
|
+
"models: HARNESS_WEB_*_MODEL env (provider/model-id) | " +
|
|
130
|
+
"skill: web-retrieval"
|
|
91
131
|
);
|
|
92
132
|
}
|
|
@@ -120,6 +120,7 @@ Gate: `harness_artifact_ready({ paths: ["artifacts/hypothesis.yaml"] })`.
|
|
|
120
120
|
- Merge both into `research-brief.yaml` (`implementation:` + `stack:`) via parent `write_harness_yaml`.
|
|
121
121
|
- **Partial failure:** if one lane fails, re-spawn that lane once; if still failing write `artifacts/plan-phase-status.yaml` with `plan_status: partial` and `ask_user`. Do not proceed to Phase 4b without both research artifacts unless `artifacts/plan-phase-waiver.yaml` records an explicit waiver.
|
|
122
122
|
- **Web dedup:** implementation owns patterns/repos; stack owns libraries/versions — no overlapping queries.
|
|
123
|
+
- **WRS bundle (parent pre-research):** When running web inline before spawn, use `web-retrieval` deep path: `.web/angles.yaml`, `.web/search-deep.json`, highlight fetches — attach paths in research task context so debate can cite fused SERP scores.
|
|
123
124
|
|
|
124
125
|
Document `human_required` waiver in the run trace only when research is genuinely blocked.
|
|
125
126
|
|
|
@@ -140,6 +140,8 @@ python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "https://example.com" -o .we
|
|
|
140
140
|
|
|
141
141
|
After pi loads extensions, agents should smoke **`web_search`** once (not `UP_PKG` / `import scrapling` preflight). Example intent: query `ultimate-pi harness`, `limit` 2.
|
|
142
142
|
|
|
143
|
+
**WRS subagent models (optional):** set env vars to any Pi `provider/model-id` — `HARNESS_WEB_FAST_MODEL`, `HARNESS_WEB_EXPANDER_MODEL`, `HARNESS_WEB_QUALITY_MODEL` (see **web-retrieval** skill). Add to `.env` via `harness-sync-env.mjs` or export in shell.
|
|
144
|
+
|
|
143
145
|
- **`--skip-tools`:** skip Step 2 (includes Scrapling verify).
|
|
144
146
|
- On Linux/WSL, if stealth scrape fails, install browser libs from `harness-cli-verify.sh` output or use `--fast` for static targets.
|
|
145
147
|
|
|
@@ -188,7 +190,7 @@ python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json
|
|
|
188
190
|
python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "https://example.com" -o .web/page.md --fast
|
|
189
191
|
```
|
|
190
192
|
|
|
191
|
-
See `.agents/skills/
|
|
193
|
+
See `.agents/skills/web-retrieval/SKILL.md` (install + env).
|
|
192
194
|
|
|
193
195
|
### 2.2 — ctx7 (Context7 Library Docs + Skills Management)
|
|
194
196
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/** Regenerate .pi/harness/web-heuristic-angles.json from shipped Python defaults. */
|
|
3
|
+
import { execFileSync } from "node:child_process";
|
|
4
|
+
import { writeFileSync } from "node:fs";
|
|
5
|
+
import { dirname, join } from "node:path";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
|
|
8
|
+
const root = join(dirname(fileURLToPath(import.meta.url)), "..", "harness");
|
|
9
|
+
const out = join(root, "web-heuristic-angles.json");
|
|
10
|
+
const py = join(dirname(fileURLToPath(import.meta.url)), "harness_web", "heuristic_angles_shipped.py");
|
|
11
|
+
const json = execFileSync(
|
|
12
|
+
"python3",
|
|
13
|
+
[
|
|
14
|
+
"-c",
|
|
15
|
+
`import json, importlib.util
|
|
16
|
+
spec = importlib.util.spec_from_file_location("shipped", ${JSON.stringify(py)})
|
|
17
|
+
mod = importlib.util.module_from_spec(spec)
|
|
18
|
+
spec.loader.exec_module(mod)
|
|
19
|
+
print(json.dumps(mod.SHIPPED_HEURISTIC_ANGLES, indent=2))`,
|
|
20
|
+
],
|
|
21
|
+
{ encoding: "utf-8" },
|
|
22
|
+
);
|
|
23
|
+
writeFileSync(out, `${json}\n`, "utf-8");
|
|
24
|
+
console.log(`wrote ${out}`);
|
|
@@ -214,6 +214,11 @@ verify_scrapling() {
|
|
|
214
214
|
else
|
|
215
215
|
fail "harness-web search smoke failed (ddg_html)"
|
|
216
216
|
fi
|
|
217
|
+
if python3 "$_hw" search-deep "ultimate-pi harness" --expand-heuristic -o .web/verify-search-deep.json --limit 3 2>/dev/null | grep -q wrote; then
|
|
218
|
+
pass "harness-web search-deep smoke (heuristic angles)"
|
|
219
|
+
else
|
|
220
|
+
fail "harness-web search-deep smoke failed"
|
|
221
|
+
fi
|
|
217
222
|
if python3 "$_hw" scrape "https://example.com" -o .web/verify-page.md --fast 2>/dev/null | grep -q wrote; then
|
|
218
223
|
pass "harness-web scrape --fast smoke"
|
|
219
224
|
else
|
|
@@ -424,9 +424,87 @@ async function main() {
|
|
|
424
424
|
}
|
|
425
425
|
ok("agents.manifest.json in sync");
|
|
426
426
|
|
|
427
|
+
await checkWrsContracts();
|
|
428
|
+
|
|
427
429
|
console.log("\nharness:verify PASS");
|
|
428
430
|
}
|
|
429
431
|
|
|
432
|
+
async function checkWrsContracts() {
|
|
433
|
+
const systemMd = join(ROOT, ".pi", "SYSTEM.md");
|
|
434
|
+
const toolsTs = join(ROOT, ".pi", "extensions", "harness-web-tools.ts");
|
|
435
|
+
const runCli = join(ROOT, ".pi", "lib", "harness-web", "run-cli.ts");
|
|
436
|
+
const webRetrievalSkill = join(ROOT, ".agents", "skills", "web-retrieval", "SKILL.md");
|
|
437
|
+
const adr = join(
|
|
438
|
+
ROOT,
|
|
439
|
+
".pi",
|
|
440
|
+
"harness",
|
|
441
|
+
"docs",
|
|
442
|
+
"adrs",
|
|
443
|
+
"0050-agentic-web-retrieval-stack.md",
|
|
444
|
+
);
|
|
445
|
+
|
|
446
|
+
for (const p of [systemMd, toolsTs, runCli, webRetrievalSkill, adr]) {
|
|
447
|
+
if (!(await fileExists(p))) fail(`WRS contract missing file: ${p}`);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
const sys = await readFile(systemMd, "utf-8");
|
|
451
|
+
if (!sys.includes("tier=deep") && !sys.includes('tier: "deep"')) {
|
|
452
|
+
fail("SYSTEM.md must document deep tier default for WRS");
|
|
453
|
+
}
|
|
454
|
+
if (!sys.includes("web-retrieval")) {
|
|
455
|
+
fail("SYSTEM.md must reference web-retrieval skill");
|
|
456
|
+
}
|
|
457
|
+
if (!sys.includes(".web/cache") && !sys.includes("HARNESS_WEB_CACHE")) {
|
|
458
|
+
fail("SYSTEM.md must document pooled WRS cache under .web/cache/");
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
const tools = await readFile(toolsTs, "utf-8");
|
|
462
|
+
if (!tools.includes('Literal("deep")')) {
|
|
463
|
+
fail("harness-web-tools.ts must define tier enum including deep");
|
|
464
|
+
}
|
|
465
|
+
if (!tools.includes("anglesFile")) {
|
|
466
|
+
fail("harness-web-tools.ts must expose anglesFile on web_search");
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
const cli = await readFile(runCli, "utf-8");
|
|
470
|
+
if (!cli.includes("tier=deep")) {
|
|
471
|
+
fail("run-cli.ts harnessWebContextLine must mention tier=deep");
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
const artifactsTs = join(ROOT, ".pi", "lib", "harness-web", "artifacts.ts");
|
|
475
|
+
if (!(await fileExists(artifactsTs))) {
|
|
476
|
+
fail("missing harness-web/artifacts.ts for scoped .web paths");
|
|
477
|
+
}
|
|
478
|
+
const cacheTs = join(ROOT, ".pi", "lib", "harness-web", "cache.ts");
|
|
479
|
+
if (!(await fileExists(cacheTs))) {
|
|
480
|
+
fail("missing harness-web/cache.ts for pooled .web/cache/");
|
|
481
|
+
}
|
|
482
|
+
if (!tools.includes("refreshCache") || !tools.includes("lookupSearchCache")) {
|
|
483
|
+
fail("harness-web-tools.ts must implement pooled cache (refreshCache, lookupSearchCache)");
|
|
484
|
+
}
|
|
485
|
+
const heuristicYaml = join(ROOT, ".pi", "harness", "web-heuristic-angles.yaml");
|
|
486
|
+
if (!(await fileExists(heuristicYaml))) {
|
|
487
|
+
fail("missing .pi/harness/web-heuristic-angles.yaml");
|
|
488
|
+
}
|
|
489
|
+
const heuristicPy = join(ROOT, ".pi", "scripts", "harness_web", "heuristic_config.py");
|
|
490
|
+
if (!(await fileExists(heuristicPy))) {
|
|
491
|
+
fail("missing harness_web/heuristic_config.py");
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
const rankPy = join(ROOT, ".pi", "scripts", "harness_web", "rank.py");
|
|
495
|
+
const anglesPy = join(ROOT, ".pi", "scripts", "harness_web", "deep_search.py");
|
|
496
|
+
for (const p of [rankPy, anglesPy]) {
|
|
497
|
+
if (!(await fileExists(p))) fail(`WRS python module missing: ${p}`);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
const expander = join(ROOT, ".pi", "agents", "harness", "web-retrieval", "web-query-expander.md");
|
|
501
|
+
if (!(await fileExists(expander))) {
|
|
502
|
+
fail("missing web-query-expander agent");
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
ok("WRS contracts (SYSTEM.md, tools, modules, web-retrieval skill, ADR)");
|
|
506
|
+
}
|
|
507
|
+
|
|
430
508
|
main().catch((err) => {
|
|
431
509
|
console.error(err);
|
|
432
510
|
process.exit(1);
|
|
@@ -18,7 +18,7 @@ const ALLOWED_FILES = new Set([
|
|
|
18
18
|
".pi/scripts/harness-web.py",
|
|
19
19
|
".pi/scripts/harness-web-search.md",
|
|
20
20
|
".pi/scripts/harness-web-policy-guard.mjs",
|
|
21
|
-
".agents/skills/
|
|
21
|
+
".agents/skills/web-retrieval/SKILL.md",
|
|
22
22
|
".pi/scripts/harness-cli-verify.sh",
|
|
23
23
|
".pi/scripts/harness_web/output.py",
|
|
24
24
|
"AGENTS.md",
|