ultimate-pi 0.19.0 → 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.agents/skills/web-retrieval/SKILL.md +163 -0
  2. package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
  3. package/.pi/SYSTEM.md +30 -12
  4. package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
  5. package/.pi/agents/harness/planning/stack-researcher.md +5 -1
  6. package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
  7. package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
  8. package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
  9. package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
  10. package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
  11. package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
  12. package/.pi/extensions/harness-web-guard.ts +2 -1
  13. package/.pi/extensions/harness-web-tools.ts +689 -51
  14. package/.pi/harness/agents.manifest.json +29 -5
  15. package/.pi/harness/agents.policy.yaml +34 -0
  16. package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
  17. package/.pi/harness/docs/harness-web-search.md +97 -0
  18. package/.pi/harness/env.harness.template +9 -1
  19. package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
  20. package/.pi/harness/web-heuristic-angles.json +278 -0
  21. package/.pi/harness/web-heuristic-angles.yaml +182 -0
  22. package/.pi/lib/agents-policy.mjs +6 -0
  23. package/.pi/lib/harness-subagent-auth.ts +39 -9
  24. package/.pi/lib/harness-subagents-bridge.ts +21 -0
  25. package/.pi/lib/harness-web/artifacts.ts +200 -0
  26. package/.pi/lib/harness-web/cache.ts +369 -0
  27. package/.pi/lib/harness-web/run-cli.ts +42 -2
  28. package/.pi/prompts/harness-plan.md +1 -0
  29. package/.pi/prompts/harness-setup.md +3 -1
  30. package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
  31. package/.pi/scripts/harness-cli-verify.sh +5 -0
  32. package/.pi/scripts/harness-verify.mjs +78 -0
  33. package/.pi/scripts/harness-web-policy-guard.mjs +1 -1
  34. package/.pi/scripts/harness-web.py +218 -15
  35. package/.pi/scripts/harness_web/deep_search.py +55 -0
  36. package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
  37. package/.pi/scripts/harness_web/find_similar.py +88 -0
  38. package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
  39. package/.pi/scripts/harness_web/heuristic_config.py +251 -0
  40. package/.pi/scripts/harness_web/highlights.py +47 -0
  41. package/.pi/scripts/harness_web/multi_search.py +59 -0
  42. package/.pi/scripts/harness_web/output.py +24 -0
  43. package/.pi/scripts/harness_web/query_angles.py +116 -0
  44. package/.pi/scripts/harness_web/rank.py +163 -0
  45. package/.pi/scripts/harness_web/scrape.py +30 -0
  46. package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
  47. package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
  48. package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
  49. package/AGENTS.md +2 -2
  50. package/CHANGELOG.md +6 -0
  51. package/package.json +5 -3
  52. package/.agents/skills/scrapling-web/SKILL.md +0 -98
  53. package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
  54. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  56. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  57. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  58. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  59. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  60. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
@@ -0,0 +1,163 @@
1
+ ---
2
+ name: web-retrieval
3
+ description: |
4
+ Agentic Web Retrieval Stack (WRS) — tiered web_search/web_fetch/web_contents (harness-web.py +
5
+ Scrapling). Default tier=deep with web-query-expander angles. Use for search, scrape, landscape,
6
+ prior art, comparisons, planning pre-research, cited answers, similar URLs, websets scoring.
7
+ Triggers on non-API web research, scrape URL, web_search, web_fetch, .web/ artifacts.
8
+ Library API docs → context7 only. Install: /harness-setup or harness-cli-verify.
9
+ allowed-tools: Read Write web_search web_fetch web_find_similar web_contents
10
+ ---
11
+
12
+ # web-retrieval (WRS)
13
+
14
+ Maps user intent → **tier** + pipeline. Pi tools: `web_search`, `web_fetch`, `web_find_similar`, `web_contents` (wrap `harness-web.py`). **Pooled cache** under `.web/cache/`; workspace aliases under `.web/`.
15
+
16
+ **Never before search/fetch:** `UP_PKG` resolution, `ls harness-web.py`, `python3 -c "import scrapling"`, Firecrawl, curl/wget, or scrapling CLI for SERP/fetch.
17
+
18
+ ## Workspace + cache (default)
19
+
20
+ | Layer | Path | Role |
21
+ |-------|------|------|
22
+ | **Cache** | `.web/cache/<kind>/<cacheKey>/` | Pooled SERP + fetch payloads with `meta.json` (query, tier, angles fingerprint, TTL) |
23
+ | **Workspace** | `.web/` (`angles.yaml`, `search-deep.json`, `page.md`, …) | Stable paths agents read/write; refreshed from cache on hit |
24
+
25
+ `web_search` / `web_fetch` return **`cacheHit`**, **`cacheKey`**, **`cachePath`** when reusing pooled results. Same intent + angles skips network until TTL expires.
26
+
27
+ | Control | Meaning |
28
+ |---------|---------|
29
+ | `HARNESS_WEB_CACHE_TTL_SEC` | Default freshness (86400 = 24h) |
30
+ | `HARNESS_WEB_CACHE=0` | Disable pooling |
31
+ | `refreshCache: true` on tool | Force network |
32
+ | `cacheMaxAge` on tool | Stricter max age (seconds) |
33
+
34
+ **Optional isolation** (parallel mutable synthesis): `HARNESS_WEB_ISOLATE=1` → `.web/runs/<run_id>/` or `.web/sessions/<pi_session_id>/` for `answer.md` / evidence when sessions must not share workspace files.
35
+
36
+ `web_search` responses include **`artifactDir`** (usually `.web`). Spawn web-retrieval subagents with `HARNESS_WEB_ARTIFACT_DIR` or `artifactDir` in the task.
37
+
38
+ ## Tier table
39
+
40
+ | User intent | Tier | Steps |
41
+ |-------------|------|-------|
42
+ | One narrow fact (scoped) | `instant` or `standard` | `web_search({ query, tier })` — **no subagent** |
43
+ | Fast open-web with angles | `standard` or `deep` + heuristic | `web-query-expander-fast` **or** `expandHeuristic:true` |
44
+ | "What is X?" needs sources | `deep` | `web-query-expander` → deep → highlights |
45
+ | Landscape / how / compare | `deep` | `web-query-expander` → deep → `web_fetch` highlights top 3 |
46
+ | Answer with citations | `research` | deep → `web_contents` → `web-answerer` |
47
+ | Qualified list (Websets analog) | `research` + verifier | deep → `web-criteria-verifier` → CSV |
48
+ | More like this URL | — | `web_find_similar` |
49
+ | Library API docs | — | **context7** (not WRS) |
50
+
51
+ ## Latency vs quality routing
52
+
53
+ | Priority | Search / subagent | Model / thinking |
54
+ |----------|-------------------|------------------|
55
+ | **Latency** | No expander: `tier=instant` or `standard` only | Parent session (pick a fast model in UI) |
56
+ | **Latency + angles** | `harness/web-retrieval/web-query-expander-fast` → `tier=deep` | `HARNESS_WEB_FAST_MODEL` (optional) |
57
+ | **Emergency angles (no LLM)** | `web_search({ tier: "deep", expandHeuristic: true, category? })` | Templates from `.pi/harness/web-heuristic-angles.yaml` (project file merges on top) |
58
+ | **Default research** | `harness/web-retrieval/web-query-expander` → `tier=deep` | `HARNESS_WEB_EXPANDER_MODEL` or parent |
59
+ | **Gap fill** | `harness/web-retrieval/web-gap-analyzer` | `HARNESS_WEB_FAST_MODEL` or parent |
60
+ | **Cited answer** | `harness/web-retrieval/web-answerer` | `HARNESS_WEB_QUALITY_MODEL` or parent |
61
+ | **Page digest** | `harness/web-retrieval/web-summarizer` | `HARNESS_WEB_FAST_MODEL` or parent |
62
+ | **Criteria scoring** | `harness/web-retrieval/web-criteria-verifier` | `HARNESS_WEB_QUALITY_MODEL` or parent |
63
+
64
+ ## Configuring models (env vars)
65
+
66
+ WRS subagents need a **concrete** `provider/model-id` (same format as Pi — any provider your install supports). Set in shell, `.env`, or harness-synced project env:
67
+
68
+ | Variable | Applies to |
69
+ |----------|------------|
70
+ | `HARNESS_WEB_FAST_MODEL` | `web-query-expander-fast`, `web-summarizer`, `web-gap-analyzer` |
71
+ | `HARNESS_WEB_EXPANDER_MODEL` | `web-query-expander` |
72
+ | `HARNESS_WEB_QUALITY_MODEL` | `web-answerer`, `web-criteria-verifier` |
73
+
74
+ Example (use **your** provider/model ids):
75
+
76
+ ```bash
77
+ export HARNESS_WEB_FAST_MODEL=your-provider/cheap-model
78
+ export HARNESS_WEB_EXPANDER_MODEL=your-provider/balanced-model
79
+ export HARNESS_WEB_QUALITY_MODEL=your-provider/strong-model
80
+ ```
81
+
82
+ If unset for an agent, the subagent inherits the **parent session model** (pick a fast model in Pi when latency matters).
83
+
84
+ Optional per-agent override: `model:` in agent `.md` frontmatter or project `.pi/agents.policy.yaml` (before parent fallback).
85
+
86
+ ## Deep pipeline (default for research)
87
+
88
+ 1. `subagent` **`harness/web-retrieval/web-query-expander`** → parent saves `.web/angles.yaml`
89
+ 2. `web_search({ query: "<intent>", tier: "deep", anglesFile: ".web/angles.yaml" })` — repeats hit cache when context unchanged
90
+ 3. `read` `.web/search-deep.json` — prefer URLs with multiple `angle_ids`
91
+ 5. `web_fetch({ url, highlights: true, highlightQuery: "<intent>" })` on top 3–5
92
+ 6. Optional gap pass: `harness/web-retrieval/web-gap-analyzer` → new angles → second `web_search` deep
93
+
94
+ **Anti-patterns:** bare `web_search({ query })` for open questions; 3+ manual SERP loops; `bulk: true` without need.
95
+
96
+ ## Research profile (`tier=research`)
97
+
98
+ After deep + highlight fetches:
99
+
100
+ ```bash
101
+ python3 "$UP_PKG/.pi/scripts/harness-web.py" contents-batch \
102
+ --from-search "$ARTIFACT_DIR/search-deep.json" \
103
+ --evidence-bundle "$ARTIFACT_DIR/evidence-bundle.json" --limit 5
104
+ ```
105
+
106
+ Spawn **`harness/web-retrieval/web-answerer`** with `artifactDir` / `answerPath: $ARTIFACT_DIR/answer.md`.
107
+
108
+ ## Websets profile
109
+
110
+ Spawn **`harness/web-retrieval/web-criteria-verifier`** with NL criteria + `search-deep.json` candidates → `.web/webset-reasoning.yaml` / CSV manifest.
111
+
112
+ ## Bash fallback (no pi tools)
113
+
114
+ ```bash
115
+ python3 "$UP_PKG/.pi/scripts/harness-web.py" search-deep "query" \
116
+ --expand-heuristic -o .web/search-deep.json --limit 10
117
+ ```
118
+
119
+ ## Subagents
120
+
121
+ | Agent | Role |
122
+ |-------|------|
123
+ | `harness/web-retrieval/web-query-expander` | Angles YAML (research / recall) |
124
+ | `harness/web-retrieval/web-query-expander-fast` | 2–3 angles (latency) |
125
+ | `harness/web-retrieval/web-gap-analyzer` | Follow-up angles |
126
+ | `harness/web-retrieval/web-answerer` | Cited answer |
127
+ | `harness/web-retrieval/web-summarizer` | Single-page digest |
128
+ | `harness/web-retrieval/web-criteria-verifier` | Criteria scoring |
129
+
130
+ ## Heuristic angle templates (no LLM expander)
131
+
132
+ Package defaults: `.pi/harness/web-heuristic-angles.yaml` (`max_angles: 8`). Categories include targeted `site:` angles — e.g. **code**: GitHub, Stack Overflow, Stack Exchange, Read the Docs, MDN, npm/PyPI/Go/Rust registries, Microsoft Learn, HN; **paper**: arXiv, Semantic Scholar, Papers with Code, OpenReview; **security**: NVD, OWASP, CWE; plus **news**, **company**, **people**, **default**.
133
+
134
+ `web_search({ tier: "deep", expandHeuristic: true, category: "code" })`
135
+
136
+ **External projects:** copy `.pi/harness/examples/web-heuristic-angles.project.yaml` → `<project>/.pi/harness/web-heuristic-angles.yaml` and add or override angle ids. Use `{query}` in templates.
137
+
138
+ ## Install (setup / humans only)
139
+
140
+ ```bash
141
+ command -v uv &>/dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
142
+ uv tool install "scrapling[fetchers]"
143
+ scrapling install # browser binaries for stealth scrape
144
+ bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"
145
+ ```
146
+
147
+ Diagnostics: `python3 "$UP_PKG/.pi/scripts/harness-web.py" status` (JSON).
148
+
149
+ ## Env
150
+
151
+ | Variable | Default | Purpose |
152
+ |----------|---------|---------|
153
+ | `HARNESS_WEB_FETCH_MODE` | `stealth` | `stealth` \| `fast` \| `auto` |
154
+ | `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | `ddg_html` \| `searxng` (+ `HARNESS_WEB_SEARXNG_URL`) |
155
+ | `HARNESS_WEB_CACHE_TTL_SEC` | `86400` | Pooled cache TTL |
156
+ | `HARNESS_WEB_CACHE` | on | Set `0` to disable cache |
157
+ | `HARNESS_WEB_ISOLATE` | off | Set `1` for per-run/session workspace dirs |
158
+ | `HARNESS_WEB_RERANK` | — | `off` \| `lexical` |
159
+ | `HARNESS_WEB_DEEP_CONCURRENCY` | `4` | Parallel angle SERP |
160
+ | `HARNESS_WEB_HEURISTIC_ANGLES_FILE` | — | Extra heuristic angles YAML |
161
+ | `HARNESS_WEB_FAST_MODEL` / `EXPANDER` / `QUALITY` | — | Web subagent models |
162
+
163
+ See `.pi/harness/docs/harness-web-search.md` for internals.
@@ -127,16 +127,16 @@ When the user invokes the trigger with NO topic after it, ask:
127
127
  ```
128
128
  Input: topic (from Topic Selection, above)
129
129
 
130
- Round 1. Broad search
131
- 1. Decompose topic into 3-5 distinct search angles
132
- 2. For each angle: run 2-3 `web_search` queries
133
- 3. For top 2-3 results per angle: `web_fetch` each URL (or `read` `.web/` artifacts)
130
+ Round 1. Broad search (WRS deep — do not loop manual SERP)
131
+ 1. Invoke **web-retrieval** skill: `harness/web-retrieval/web-query-expander` `.web/angles.yaml`
132
+ 2. One `web_search({ query: topic, tier: "deep", anglesFile: ".web/angles.yaml" })` → `.web/search-deep.json`
133
+ 3. For top 2-3 fused URLs: `web_fetch` with `highlights: true` (or `read` `.web/` artifacts)
134
134
  4. Save each fetched page to ./raw/ as a markdown file
135
135
  5. Extract from each: key claims, entities, concepts, open questions
136
136
 
137
137
  Round 2. Gap fill
138
- 6. Identify what's missing or contradicted from Round 1
139
- 7. Run targeted searches for each gap (max 5 queries)
138
+ 6. Identify what's missing or contradicted from Round 1 (`read` `search-deep.json`)
139
+ 7. Optional: `harness/web-retrieval/web-gap-analyzer` second `web_search` deep with new angles (max one extra deep pass)
140
140
  8. Fetch top results for each gap, save to ./raw/
141
141
  9. Run `graphify extract ./raw --out .` to incorporate new sources
142
142
  (NOTE: `graphify update` only works for code files. Research sources are docs
package/.pi/SYSTEM.md CHANGED
@@ -30,27 +30,45 @@ Scope: this file is the reusable harness-level instruction set. It must work whe
30
30
  ## Web Policy (Mandatory)
31
31
 
32
32
  > [!warning] No raw HTTP
33
- > Route **all** web through [[context7]] for API/library docs or **`web_search` / `web_fetch`** via [[scrapling-web]] for non-API web. Do not use `curl`, `wget`, Firecrawl, or scrapling CLI preflight.
33
+ > Route **all** web through [[context7]] for API/library docs or the **Agentic Web Retrieval Stack (WRS)** — `web_search` / `web_fetch` / `web_find_similar` / `web_contents` via [[web-retrieval]]. Do not use `curl`, `wget`, Firecrawl, or scrapling CLI preflight.
34
34
 
35
35
  ### API / Library Docs — context7 ONLY
36
36
  - `ctx7 library <name> <query>` then `ctx7 docs <id> <query>`.
37
37
  - context7 owns function signatures, class APIs, config options, stdlib, and framework specs.
38
38
  - Never use quality-sites or web_fetch for API docs.
39
39
 
40
- ### Non-API Web — web_search + web_fetch
41
- Use the harness web-search/fetch tools and the `scrapling-web` skill when available. No preflight: never probe package paths, list harness scripts, or import Scrapling before searching.
40
+ ### Non-API Web — WRS (tiered)
41
+ Invoke the **`web-retrieval`** skill before non-trivial open-web work (landscape, prior art, comparisons, planning research). WRS uses a **pooled cache** (`.web/cache/`, TTL via `HARNESS_WEB_CACHE_TTL_SEC`) and **workspace aliases** under `.web/` (`angles.yaml`, `search-deep.json`, `answer.md`). Set `HARNESS_WEB_ISOLATE=1` only when per-run/session file isolation is required.
42
+
43
+ | Tier | When | Pattern |
44
+ |------|------|---------|
45
+ | **`deep`** | **Default** for landscape, prior art, how/why, comparisons, stack/implementation research, multi-source questions | 1) `subagent` `harness/web-retrieval/web-query-expander` → `.web/angles.yaml` 2) `web_search({ query, tier: "deep", anglesFile: ".web/angles.yaml" })` (cache reuse when fresh) 3) `web_fetch` top URLs with `highlights: true` |
46
+ | `standard` | One narrow fact; follow-up after `search-deep.json`; verify one claim | `web_search({ query, tier: "standard", limit: 5 })` |
47
+ | `instant` | Closed-form fact, latency-critical | `web_search({ query, tier: "instant", limit: 5 })` |
48
+ | `research` | Cited answer/report; harness-plan external research | `web-retrieval` `research` profile → deep → contents → `web-answerer` |
42
49
 
43
50
  | Task | Tool |
44
51
  |------|------|
45
- | Search (SERP) | `web_search` (`query`, optional `limit`, `bulk`) |
46
- | Scrape page | `web_fetch` (`url`, optional `fast: true`) |
47
- | Map links | `web_fetch` (`url`, `mode: map`) |
48
-
49
- - Artifacts default under the active project's `.web/`; use `read` for full JSON/markdown artifacts.
50
- - If tools are unavailable, use the installed harness web fallback documented by the `scrapling-web` skill.
51
- - Run setup diagnostics only when troubleshooting web tooling.
52
- - Check local quality-site guidance when present before citing non-API sources. Prefer Tier 1 sources; exclude AI content farms, mirrors, and stale packages.
53
- - For deep research, use `/wiki-autoresearch <topic>` when available and store outputs in the active project's configured research/wiki/graph locations.
52
+ | Multi-angle SERP | `web_search` with `tier: "deep"` + `anglesFile` |
53
+ | Narrow SERP | `web_search` with `tier: "standard"` or `"instant"` |
54
+ | Scrape / highlights | `web_fetch` (`highlights: true` after deep search) |
55
+ | Batch excerpts | `web_contents` |
56
+ | Similar pages | `web_find_similar` |
57
+ | Map links | `web_fetch` (`mode: map`) |
58
+
59
+ **Anti-patterns**
60
+ - Open-ended question with omitted `tier` (weak single-query SERP).
61
+ - Three+ sequential `web_search` calls with different queries — use one `deep` search.
62
+ - `bulk: true` unless you need markdown bodies of top N immediately.
63
+ - Full `web_fetch` when SERP snippets + highlights suffice.
64
+ - `web_search` / `web_fetch` for library APIs — **context7 only**.
65
+
66
+ **After deep search:** `read` `<artifactDir>/search-deep.json`; prefer URLs listed under multiple `angle_ids`.
67
+
68
+ **Latency:** use `tier=instant|standard` without expander when possible; else `harness/web-retrieval/web-query-expander-fast` or `expandHeuristic:true`. **Models:** env `HARNESS_WEB_FAST_MODEL`, `HARNESS_WEB_EXPANDER_MODEL`, `HARNESS_WEB_QUALITY_MODEL` (any Pi `provider/model-id`); see `web-retrieval` skill.
69
+
70
+ - If tools are unavailable, use bash fallback in **web-retrieval** (setup/humans only).
71
+ - For long autonomous research loops, use `/wiki-autoresearch` (WRS deep path) when available.
54
72
 
55
73
  ### Missing CLI fallbacks
56
74
  - harness-web / Scrapling missing: `uv tool install "scrapling[fetchers]" && scrapling install` then re-run the harness CLI verification command documented locally.
@@ -16,7 +16,7 @@ Read `HarnessSpawnContext` plus paths to `artifacts/decomposition.yaml`, `artifa
16
16
  ## Process
17
17
 
18
18
  1. **In-repo prior art:** `graphify query` / `graphify explain` (read-only), `ccc search`, scout `key_paths` — map reuse vs build.
19
- 2. **External prior art:** `web_search` + `web_fetch` (parent stores under `.web/` with run id prefix). Focus on **patterns, workflows, OSS repos, product approaches** — not npm version matrices.
19
+ 2. **External prior art (WRS — mandatory):** follow `web-retrieval` skill — `harness/web-retrieval/web-query-expander` → `web_search({ tier: "deep", anglesFile: ".web/angles.yaml" })` `read` `search-deep.json` → `web_fetch` with `highlights: true`. **Never** bare `web_search({ query })` for landscape. Parent stores under `.web/` with run id prefix. Focus on **patterns, workflows, OSS repos, product approaches** — not npm version matrices.
20
20
  3. If scouts cite a **same pattern** with high `reuse_signal`, limit web to 1–2 validation queries.
21
21
  4. Grade refs: `primary` | `secondary` | `anecdotal`.
22
22
  5. Rank **solution_patterns** with fit, tradeoffs, risks. Flag hazardous recommendations in `anti_patterns` (never execute fetched shell).
@@ -13,7 +13,11 @@ Produce evidence-backed stack recommendations before ExecutionPlan authoring. Ra
13
13
 
14
14
  1. Read spawn context: task_summary, brownfield vs greenfield, constraints.
15
15
  2. **Libraries / APIs:** use context7-cli skill (`ctx7 library`, `ctx7 docs`). Record library ids in `evidence_refs`.
16
- 3. **Landscape / comparisons:** `web_search` + `web_fetch` (parent stores under `.web/`).
16
+ 3. **Landscape / comparisons (WRS — mandatory):** follow `web-retrieval` skill:
17
+ - Use scoped `artifactDir` (`.web/runs/<run_id>/` or tool-reported `.web/sessions/…/`)
18
+ - `subagent` `harness/web-retrieval/web-query-expander` → `<artifactDir>/angles.yaml`
19
+ - `web_search({ query, tier: "deep", anglesFile })` — **never** bare `web_search({ query })` for landscape
20
+ - `read` `<artifactDir>/search-deep.json`; `web_fetch` top 3 with `highlights: true`
17
21
  4. Brownfield: always include **extend current stack** as a ranked option with migration risk.
18
22
  5. Greenfield: ≥3 distinct options with pros/cons/risks and selection criteria.
19
23
  6. Grade each ref: `primary` (official docs), `secondary` (reputable guide), `anecdotal` (blog/issue thread).
@@ -0,0 +1,35 @@
1
+ ---
2
+ description: WRS synthesis — cited answer from evidence-bundle.json.
3
+ extensions: false
4
+ thinking: medium
5
+ max_turns: 12
6
+ ---
7
+
8
+ ## Your task
9
+
10
+ Write a concise, **cited** answer to the research question using only sources in the evidence bundle.
11
+
12
+ ## Output path (required — no shared flat file)
13
+
14
+ Write to **`$HARNESS_WEB_ARTIFACT_DIR/answer.md`** when that env var is set (harness web-retrieval subprocesses).
15
+
16
+ Otherwise use the **`answerPath`** or **`artifactDir`** the parent gives in the spawn task (e.g. `.web/sessions/<id>/answer.md` or `.web/runs/<run_id>/answer.md`).
17
+
18
+ **Never** write to flat `.web/answer.md` — it collides across parallel sessions.
19
+
20
+ ## Input
21
+
22
+ Read the evidence bundle path from the parent task (default: same directory as the answer file, file name `evidence-bundle.json`). Each source has url, title, description, optional highlights.
23
+
24
+ ## Output format
25
+
26
+ Write markdown to the resolved answer path via parent tooling or include full content in final message:
27
+
28
+ - Lead with a direct answer (2–4 sentences).
29
+ - Supporting bullets with inline citations `[title](url)`.
30
+ - "Sources" section listing URLs used.
31
+ - Flag uncertainty where evidence is thin.
32
+
33
+ Do **not** invent URLs. Do **not** call web_search.
34
+
35
+ Bus label: `WebAnswerer`.
@@ -0,0 +1,28 @@
1
+ ---
2
+ description: WRS Websets analog — score candidates against NL criteria (YAML/CSV output).
3
+ extensions: false
4
+ thinking: medium
5
+ max_turns: 14
6
+ ---
7
+
8
+ ## Your task
9
+
10
+ Given NL **criteria** and a list of candidate URLs/titles/snippets (from search-deep.json), score each candidate and explain match quality.
11
+
12
+ ## Output
13
+
14
+ Fenced YAML:
15
+
16
+ ```yaml
17
+ criteria: "<restated criteria>"
18
+ results:
19
+ - url: "..."
20
+ title: "..."
21
+ match: true|false
22
+ score: 0.0-1.0
23
+ reason: "<one sentence>"
24
+ ```
25
+
26
+ Parent may convert to `.web/webset-manifest.csv`. Do **not** call web_search.
27
+
28
+ Bus label: `WebCriteriaVerifier`.
@@ -0,0 +1,31 @@
1
+ ---
2
+ description: WRS gap-fill — read search-deep.json, propose follow-up angles for missing coverage.
3
+ extensions: false
4
+ thinking: low
5
+ max_turns: 10
6
+ ---
7
+
8
+ ## Your task
9
+
10
+ After a deep search, identify **gaps** (missing facets, contradictions, stale angles) and output **1–3 new search angles** only.
11
+
12
+ ## Input
13
+
14
+ Parent provides paths to `.web/search-deep.json` and research intent. Use `read` on those artifacts.
15
+
16
+ ## Output (only)
17
+
18
+ Fenced YAML:
19
+
20
+ ```yaml
21
+ gaps:
22
+ - "<what is missing>"
23
+ angles:
24
+ - id: gap_1
25
+ query: "..."
26
+ rationale: "..."
27
+ ```
28
+
29
+ Do **not** call web tools. Parent runs `web_search(tier=deep, anglesFile=...)`.
30
+
31
+ Bus label: `WebGapAnalyzer`.
@@ -0,0 +1,34 @@
1
+ ---
2
+ description: WRS fast query planner — 2–3 angles only for latency-sensitive search (YAML only).
3
+ extensions: false
4
+ thinking: off
5
+ max_turns: 5
6
+ ---
7
+
8
+ ## Your task
9
+
10
+ Same as `web-query-expander`, but **optimized for speed**: produce **2–3** angles only (not 4–5). No web tools.
11
+
12
+ ## When to use (parent)
13
+
14
+ - User asked for **fast** / **quick** / **low latency** open-web lookup
15
+ - `web_search` with `tier: "instant"` or `tier: "standard"` where angles still help (optional)
16
+ - **Not** for landscape, prior art, comparisons, or harness-plan research — use `harness/web-retrieval/web-query-expander` instead
17
+
18
+ ## Output (only)
19
+
20
+ ```yaml
21
+ intent: "<one sentence>"
22
+ category: null
23
+ angles:
24
+ - id: core
25
+ query: "<short query>"
26
+ rationale: "..."
27
+ - id: official
28
+ query: "..."
29
+ rationale: "..."
30
+ ```
31
+
32
+ Keep queries ≤10 words. Do not call `web_search` or `web_fetch`.
33
+
34
+ Bus label: `WebQueryExpanderFast`.
@@ -0,0 +1,60 @@
1
+ ---
2
+ description: WRS query planner — NL intent to 4-5 SearXNG-optimized search angles (YAML only).
3
+ extensions: false
4
+ thinking: low
5
+ max_turns: 8
6
+ ---
7
+
8
+ ## Your task
9
+
10
+ Convert a research intent into **4–5 distinct search angles** optimized for DuckDuckGo / SearXNG keyword search. You do **not** search the web yourself.
11
+
12
+ ## When parent should spawn you (not `web-query-expander-fast`)
13
+
14
+ - Landscape, prior art, comparisons, stack/implementation research, harness-plan external research
15
+ - Any question where **recall** matters more than latency
16
+
17
+ For **fast / narrow** paths, parent should spawn `harness/web-retrieval/web-query-expander-fast` or skip expander and use `tier=instant|standard` with `expandHeuristic:true`.
18
+
19
+ ## Output (only)
20
+
21
+ Respond with a single fenced YAML block and nothing else:
22
+
23
+ ```yaml
24
+ intent: "<restated intent in one sentence>"
25
+ category: null # or code|company|people|paper|news
26
+ angles:
27
+ - id: official
28
+ query: "<short keyword-dense query>"
29
+ rationale: "<why this angle>"
30
+ - id: technical
31
+ query: "..."
32
+ rationale: "..."
33
+ # 4-5 angles total
34
+ ```
35
+
36
+ ## Angle design rules
37
+
38
+ - Each `query` must be **short** (≤12 words unless `site:` operator needed).
39
+ - Angles must be **distinct** (definitional, official docs, technical depth, criticism/limitations, recent news, implementations/repos).
40
+ - Use operators when helpful: `site:github.com`, `site:arxiv.org`, `filetype:pdf`, quoted phrases.
41
+ - Do **not** duplicate the same phrasing across angles.
42
+ - Do **not** call `web_search` or `web_fetch`.
43
+
44
+ ## Category packs (when spawn context includes category)
45
+
46
+ Subagent output is LLM-crafted. For **heuristic** fallback (`expandHeuristic:true`), category packs come from YAML:
47
+
48
+ - Package: `.pi/harness/web-heuristic-angles.yaml`
49
+ - Project override: `<project>/.pi/harness/web-heuristic-angles.yaml` (see `examples/web-heuristic-angles.project.yaml`)
50
+
51
+ | category | Default heuristic angles (configurable) |
52
+ |----------|----------------------------------------|
53
+ | code | github, stackoverflow, … |
54
+ | company | official site, news, … |
55
+ | people | linkedin, biography |
56
+ | paper | arxiv, scholar |
57
+ | news | recent year in query |
58
+ | *(custom)* | Add your own category key in project YAML |
59
+
60
+ Bus label: `WebQueryExpander`.
@@ -0,0 +1,18 @@
1
+ ---
2
+ description: WRS page digest — summarize a fetched markdown excerpt.
3
+ extensions: false
4
+ thinking: low
5
+ max_turns: 6
6
+ ---
7
+
8
+ ## Your task
9
+
10
+ Produce a 5–8 bullet summary of a single page excerpt for the parent agent. Read the provided `.web/*.md` or excerpt path only.
11
+
12
+ ## Rules
13
+
14
+ - Bullets only; no preamble.
15
+ - Preserve factual claims; note if page is marketing-heavy.
16
+ - Do not call web tools.
17
+
18
+ Bus label: `WebSummarizer`.
@@ -5,7 +5,8 @@
5
5
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
6
6
 
7
7
  const BLOCK_REASON =
8
- "harness-web-guard: use web_search (SERP) or web_fetch (page content) instead of raw curl/wget/firecrawl/scrapling fetch. " +
8
+ "harness-web-guard: use web_search (tier=deep for research), web_fetch, web_find_similar, or web_contents " +
9
+ "not raw curl/wget/firecrawl/scrapling fetch. See web-retrieval skill. " +
9
10
  "Setup may use harness-web.py status directly.";
10
11
 
11
12
  const ALLOW_PATTERNS = [