ultimate-pi 0.19.0 → 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/web-retrieval/SKILL.md +163 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
- package/.pi/SYSTEM.md +30 -12
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/agents/harness/planning/stack-researcher.md +5 -1
- package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
- package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
- package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
- package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
- package/.pi/extensions/harness-web-guard.ts +2 -1
- package/.pi/extensions/harness-web-tools.ts +689 -51
- package/.pi/harness/agents.manifest.json +29 -5
- package/.pi/harness/agents.policy.yaml +34 -0
- package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
- package/.pi/harness/docs/harness-web-search.md +97 -0
- package/.pi/harness/env.harness.template +9 -1
- package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
- package/.pi/harness/web-heuristic-angles.json +278 -0
- package/.pi/harness/web-heuristic-angles.yaml +182 -0
- package/.pi/lib/agents-policy.mjs +6 -0
- package/.pi/lib/harness-subagent-auth.ts +39 -9
- package/.pi/lib/harness-subagents-bridge.ts +21 -0
- package/.pi/lib/harness-web/artifacts.ts +200 -0
- package/.pi/lib/harness-web/cache.ts +369 -0
- package/.pi/lib/harness-web/run-cli.ts +42 -2
- package/.pi/prompts/harness-plan.md +1 -0
- package/.pi/prompts/harness-setup.md +3 -1
- package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
- package/.pi/scripts/harness-cli-verify.sh +5 -0
- package/.pi/scripts/harness-verify.mjs +78 -0
- package/.pi/scripts/harness-web-policy-guard.mjs +1 -1
- package/.pi/scripts/harness-web.py +218 -15
- package/.pi/scripts/harness_web/deep_search.py +55 -0
- package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
- package/.pi/scripts/harness_web/find_similar.py +88 -0
- package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
- package/.pi/scripts/harness_web/heuristic_config.py +251 -0
- package/.pi/scripts/harness_web/highlights.py +47 -0
- package/.pi/scripts/harness_web/multi_search.py +59 -0
- package/.pi/scripts/harness_web/output.py +24 -0
- package/.pi/scripts/harness_web/query_angles.py +116 -0
- package/.pi/scripts/harness_web/rank.py +163 -0
- package/.pi/scripts/harness_web/scrape.py +30 -0
- package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
- package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
- package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +6 -0
- package/package.json +5 -3
- package/.agents/skills/scrapling-web/SKILL.md +0 -98
- package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: web-retrieval
|
|
3
|
+
description: |
|
|
4
|
+
Agentic Web Retrieval Stack (WRS) — tiered web_search/web_fetch/web_contents (harness-web.py +
|
|
5
|
+
Scrapling). Default tier=deep with web-query-expander angles. Use for search, scrape, landscape,
|
|
6
|
+
prior art, comparisons, planning pre-research, cited answers, similar URLs, websets scoring.
|
|
7
|
+
Triggers on non-API web research, scrape URL, web_search, web_fetch, .web/ artifacts.
|
|
8
|
+
Library API docs → context7 only. Install: /harness-setup or harness-cli-verify.
|
|
9
|
+
allowed-tools: Read Write web_search web_fetch web_find_similar web_contents
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
# web-retrieval (WRS)
|
|
13
|
+
|
|
14
|
+
Maps user intent → **tier** + pipeline. Pi tools: `web_search`, `web_fetch`, `web_find_similar`, `web_contents` (wrap `harness-web.py`). **Pooled cache** under `.web/cache/`; workspace aliases under `.web/`.
|
|
15
|
+
|
|
16
|
+
**Never before search/fetch:** `UP_PKG` resolution, `ls harness-web.py`, `python3 -c "import scrapling"`, Firecrawl, curl/wget, or scrapling CLI for SERP/fetch.
|
|
17
|
+
|
|
18
|
+
## Workspace + cache (default)
|
|
19
|
+
|
|
20
|
+
| Layer | Path | Role |
|
|
21
|
+
|-------|------|------|
|
|
22
|
+
| **Cache** | `.web/cache/<kind>/<cacheKey>/` | Pooled SERP + fetch payloads with `meta.json` (query, tier, angles fingerprint, TTL) |
|
|
23
|
+
| **Workspace** | `.web/` (`angles.yaml`, `search-deep.json`, `page.md`, …) | Stable paths agents read/write; refreshed from cache on hit |
|
|
24
|
+
|
|
25
|
+
`web_search` / `web_fetch` return **`cacheHit`**, **`cacheKey`**, **`cachePath`** when reusing pooled results. Same intent + angles skips network until TTL expires.
|
|
26
|
+
|
|
27
|
+
| Control | Meaning |
|
|
28
|
+
|---------|---------|
|
|
29
|
+
| `HARNESS_WEB_CACHE_TTL_SEC` | Default freshness (86400 = 24h) |
|
|
30
|
+
| `HARNESS_WEB_CACHE=0` | Disable pooling |
|
|
31
|
+
| `refreshCache: true` on tool | Force network |
|
|
32
|
+
| `cacheMaxAge` on tool | Stricter max age (seconds) |
|
|
33
|
+
|
|
34
|
+
**Optional isolation** (parallel mutable synthesis): `HARNESS_WEB_ISOLATE=1` → `.web/runs/<run_id>/` or `.web/sessions/<pi_session_id>/` for `answer.md` / evidence when sessions must not share workspace files.
|
|
35
|
+
|
|
36
|
+
`web_search` responses include **`artifactDir`** (usually `.web`). Spawn web-retrieval subagents with `HARNESS_WEB_ARTIFACT_DIR` or `artifactDir` in the task.
|
|
37
|
+
|
|
38
|
+
## Tier table
|
|
39
|
+
|
|
40
|
+
| User intent | Tier | Steps |
|
|
41
|
+
|-------------|------|-------|
|
|
42
|
+
| One narrow fact (scoped) | `instant` or `standard` | `web_search({ query, tier })` — **no subagent** |
|
|
43
|
+
| Fast open-web with angles | `standard` or `deep` + heuristic | `web-query-expander-fast` **or** `expandHeuristic:true` |
|
|
44
|
+
| "What is X?" needs sources | `deep` | `web-query-expander` → deep → highlights |
|
|
45
|
+
| Landscape / how / compare | `deep` | `web-query-expander` → deep → `web_fetch` highlights top 3 |
|
|
46
|
+
| Answer with citations | `research` | deep → `web_contents` → `web-answerer` |
|
|
47
|
+
| Qualified list (Websets analog) | `research` + verifier | deep → `web-criteria-verifier` → CSV |
|
|
48
|
+
| More like this URL | — | `web_find_similar` |
|
|
49
|
+
| Library API docs | — | **context7** (not WRS) |
|
|
50
|
+
|
|
51
|
+
## Latency vs quality routing
|
|
52
|
+
|
|
53
|
+
| Priority | Search / subagent | Model / thinking |
|
|
54
|
+
|----------|-------------------|------------------|
|
|
55
|
+
| **Latency** | No expander: `tier=instant` or `standard` only | Parent session (pick a fast model in UI) |
|
|
56
|
+
| **Latency + angles** | `harness/web-retrieval/web-query-expander-fast` → `tier=deep` | `HARNESS_WEB_FAST_MODEL` (optional) |
|
|
57
|
+
| **Emergency angles (no LLM)** | `web_search({ tier: "deep", expandHeuristic: true, category? })` | Templates from `.pi/harness/web-heuristic-angles.yaml` (project file merges on top) |
|
|
58
|
+
| **Default research** | `harness/web-retrieval/web-query-expander` → `tier=deep` | `HARNESS_WEB_EXPANDER_MODEL` or parent |
|
|
59
|
+
| **Gap fill** | `harness/web-retrieval/web-gap-analyzer` | `HARNESS_WEB_FAST_MODEL` or parent |
|
|
60
|
+
| **Cited answer** | `harness/web-retrieval/web-answerer` | `HARNESS_WEB_QUALITY_MODEL` or parent |
|
|
61
|
+
| **Page digest** | `harness/web-retrieval/web-summarizer` | `HARNESS_WEB_FAST_MODEL` or parent |
|
|
62
|
+
| **Criteria scoring** | `harness/web-retrieval/web-criteria-verifier` | `HARNESS_WEB_QUALITY_MODEL` or parent |
|
|
63
|
+
|
|
64
|
+
## Configuring models (env vars)
|
|
65
|
+
|
|
66
|
+
WRS subagents need a **concrete** `provider/model-id` (same format as Pi — any provider your install supports). Set in shell, `.env`, or harness-synced project env:
|
|
67
|
+
|
|
68
|
+
| Variable | Applies to |
|
|
69
|
+
|----------|------------|
|
|
70
|
+
| `HARNESS_WEB_FAST_MODEL` | `web-query-expander-fast`, `web-summarizer`, `web-gap-analyzer` |
|
|
71
|
+
| `HARNESS_WEB_EXPANDER_MODEL` | `web-query-expander` |
|
|
72
|
+
| `HARNESS_WEB_QUALITY_MODEL` | `web-answerer`, `web-criteria-verifier` |
|
|
73
|
+
|
|
74
|
+
Example (use **your** provider/model ids):
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
export HARNESS_WEB_FAST_MODEL=your-provider/cheap-model
|
|
78
|
+
export HARNESS_WEB_EXPANDER_MODEL=your-provider/balanced-model
|
|
79
|
+
export HARNESS_WEB_QUALITY_MODEL=your-provider/strong-model
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
If unset for an agent, the subagent inherits the **parent session model** (pick a fast model in Pi when latency matters).
|
|
83
|
+
|
|
84
|
+
Optional per-agent override: `model:` in agent `.md` frontmatter or project `.pi/agents.policy.yaml` (before parent fallback).
|
|
85
|
+
|
|
86
|
+
## Deep pipeline (default for research)
|
|
87
|
+
|
|
88
|
+
1. `subagent` **`harness/web-retrieval/web-query-expander`** → parent saves `.web/angles.yaml`
|
|
89
|
+
2. `web_search({ query: "<intent>", tier: "deep", anglesFile: ".web/angles.yaml" })` — repeats hit cache when context unchanged
|
|
90
|
+
3. `read` `.web/search-deep.json` — prefer URLs with multiple `angle_ids`
|
|
91
|
+
5. `web_fetch({ url, highlights: true, highlightQuery: "<intent>" })` on top 3–5
|
|
92
|
+
6. Optional gap pass: `harness/web-retrieval/web-gap-analyzer` → new angles → second `web_search` deep
|
|
93
|
+
|
|
94
|
+
**Anti-patterns:** bare `web_search({ query })` for open questions; 3+ manual SERP loops; `bulk: true` without need.
|
|
95
|
+
|
|
96
|
+
## Research profile (`tier=research`)
|
|
97
|
+
|
|
98
|
+
After deep + highlight fetches:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
python3 "$UP_PKG/.pi/scripts/harness-web.py" contents-batch \
|
|
102
|
+
--from-search "$ARTIFACT_DIR/search-deep.json" \
|
|
103
|
+
--evidence-bundle "$ARTIFACT_DIR/evidence-bundle.json" --limit 5
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Spawn **`harness/web-retrieval/web-answerer`** with `artifactDir` / `answerPath: $ARTIFACT_DIR/answer.md`.
|
|
107
|
+
|
|
108
|
+
## Websets profile
|
|
109
|
+
|
|
110
|
+
Spawn **`harness/web-retrieval/web-criteria-verifier`** with NL criteria + `search-deep.json` candidates → `.web/webset-reasoning.yaml` / CSV manifest.
|
|
111
|
+
|
|
112
|
+
## Bash fallback (no pi tools)
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
python3 "$UP_PKG/.pi/scripts/harness-web.py" search-deep "query" \
|
|
116
|
+
--expand-heuristic -o .web/search-deep.json --limit 10
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Subagents
|
|
120
|
+
|
|
121
|
+
| Agent | Role |
|
|
122
|
+
|-------|------|
|
|
123
|
+
| `harness/web-retrieval/web-query-expander` | Angles YAML (research / recall) |
|
|
124
|
+
| `harness/web-retrieval/web-query-expander-fast` | 2–3 angles (latency) |
|
|
125
|
+
| `harness/web-retrieval/web-gap-analyzer` | Follow-up angles |
|
|
126
|
+
| `harness/web-retrieval/web-answerer` | Cited answer |
|
|
127
|
+
| `harness/web-retrieval/web-summarizer` | Single-page digest |
|
|
128
|
+
| `harness/web-retrieval/web-criteria-verifier` | Criteria scoring |
|
|
129
|
+
|
|
130
|
+
## Heuristic angle templates (no LLM expander)
|
|
131
|
+
|
|
132
|
+
Package defaults: `.pi/harness/web-heuristic-angles.yaml` (`max_angles: 8`). Categories include targeted `site:` angles — e.g. **code**: GitHub, Stack Overflow, Stack Exchange, Read the Docs, MDN, npm/PyPI/Go/Rust registries, Microsoft Learn, HN; **paper**: arXiv, Semantic Scholar, Papers with Code, OpenReview; **security**: NVD, OWASP, CWE; plus **news**, **company**, **people**, **default**.
|
|
133
|
+
|
|
134
|
+
`web_search({ tier: "deep", expandHeuristic: true, category: "code" })`
|
|
135
|
+
|
|
136
|
+
**External projects:** copy `.pi/harness/examples/web-heuristic-angles.project.yaml` → `<project>/.pi/harness/web-heuristic-angles.yaml` and add or override angle ids. Use `{query}` in templates.
|
|
137
|
+
|
|
138
|
+
## Install (setup / humans only)
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
command -v uv &>/dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
142
|
+
uv tool install "scrapling[fetchers]"
|
|
143
|
+
scrapling install # browser binaries for stealth scrape
|
|
144
|
+
bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Diagnostics: `python3 "$UP_PKG/.pi/scripts/harness-web.py" status` (JSON).
|
|
148
|
+
|
|
149
|
+
## Env
|
|
150
|
+
|
|
151
|
+
| Variable | Default | Purpose |
|
|
152
|
+
|----------|---------|---------|
|
|
153
|
+
| `HARNESS_WEB_FETCH_MODE` | `stealth` | `stealth` \| `fast` \| `auto` |
|
|
154
|
+
| `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | `ddg_html` \| `searxng` (+ `HARNESS_WEB_SEARXNG_URL`) |
|
|
155
|
+
| `HARNESS_WEB_CACHE_TTL_SEC` | `86400` | Pooled cache TTL |
|
|
156
|
+
| `HARNESS_WEB_CACHE` | on | Set `0` to disable cache |
|
|
157
|
+
| `HARNESS_WEB_ISOLATE` | off | Set `1` for per-run/session workspace dirs |
|
|
158
|
+
| `HARNESS_WEB_RERANK` | — | `off` \| `lexical` |
|
|
159
|
+
| `HARNESS_WEB_DEEP_CONCURRENCY` | `4` | Parallel angle SERP |
|
|
160
|
+
| `HARNESS_WEB_HEURISTIC_ANGLES_FILE` | — | Extra heuristic angles YAML |
|
|
161
|
+
| `HARNESS_WEB_FAST_MODEL` / `EXPANDER` / `QUALITY` | — | Web subagent models |
|
|
162
|
+
|
|
163
|
+
See `.pi/harness/docs/harness-web-search.md` for internals.
|
|
@@ -127,16 +127,16 @@ When the user invokes the trigger with NO topic after it, ask:
|
|
|
127
127
|
```
|
|
128
128
|
Input: topic (from Topic Selection, above)
|
|
129
129
|
|
|
130
|
-
Round 1. Broad search
|
|
131
|
-
1.
|
|
132
|
-
2.
|
|
133
|
-
3. For top 2-3
|
|
130
|
+
Round 1. Broad search (WRS deep — do not loop manual SERP)
|
|
131
|
+
1. Invoke **web-retrieval** skill: `harness/web-retrieval/web-query-expander` → `.web/angles.yaml`
|
|
132
|
+
2. One `web_search({ query: topic, tier: "deep", anglesFile: ".web/angles.yaml" })` → `.web/search-deep.json`
|
|
133
|
+
3. For top 2-3 fused URLs: `web_fetch` with `highlights: true` (or `read` `.web/` artifacts)
|
|
134
134
|
4. Save each fetched page to ./raw/ as a markdown file
|
|
135
135
|
5. Extract from each: key claims, entities, concepts, open questions
|
|
136
136
|
|
|
137
137
|
Round 2. Gap fill
|
|
138
|
-
6. Identify what's missing or contradicted from Round 1
|
|
139
|
-
7.
|
|
138
|
+
6. Identify what's missing or contradicted from Round 1 (`read` `search-deep.json`)
|
|
139
|
+
7. Optional: `harness/web-retrieval/web-gap-analyzer` → second `web_search` deep with new angles (max one extra deep pass)
|
|
140
140
|
8. Fetch top results for each gap, save to ./raw/
|
|
141
141
|
9. Run `graphify extract ./raw --out .` to incorporate new sources
|
|
142
142
|
(NOTE: `graphify update` only works for code files. Research sources are docs
|
package/.pi/SYSTEM.md
CHANGED
|
@@ -30,27 +30,45 @@ Scope: this file is the reusable harness-level instruction set. It must work whe
|
|
|
30
30
|
## Web Policy (Mandatory)
|
|
31
31
|
|
|
32
32
|
> [!warning] No raw HTTP
|
|
33
|
-
> Route **all** web through [[context7]] for API/library docs or
|
|
33
|
+
> Route **all** web through [[context7]] for API/library docs or the **Agentic Web Retrieval Stack (WRS)** — `web_search` / `web_fetch` / `web_find_similar` / `web_contents` via [[web-retrieval]]. Do not use `curl`, `wget`, Firecrawl, or scrapling CLI preflight.
|
|
34
34
|
|
|
35
35
|
### API / Library Docs — context7 ONLY
|
|
36
36
|
- `ctx7 library <name> <query>` then `ctx7 docs <id> <query>`.
|
|
37
37
|
- context7 owns function signatures, class APIs, config options, stdlib, and framework specs.
|
|
38
38
|
- Never use quality-sites or web_fetch for API docs.
|
|
39
39
|
|
|
40
|
-
### Non-API Web —
|
|
41
|
-
|
|
40
|
+
### Non-API Web — WRS (tiered)
|
|
41
|
+
Invoke the **`web-retrieval`** skill before non-trivial open-web work (landscape, prior art, comparisons, planning research). WRS uses a **pooled cache** (`.web/cache/`, TTL via `HARNESS_WEB_CACHE_TTL_SEC`) and **workspace aliases** under `.web/` (`angles.yaml`, `search-deep.json`, `answer.md`). Set `HARNESS_WEB_ISOLATE=1` only when per-run/session file isolation is required.
|
|
42
|
+
|
|
43
|
+
| Tier | When | Pattern |
|
|
44
|
+
|------|------|---------|
|
|
45
|
+
| **`deep`** | **Default** for landscape, prior art, how/why, comparisons, stack/implementation research, multi-source questions | 1) `subagent` `harness/web-retrieval/web-query-expander` → `.web/angles.yaml` 2) `web_search({ query, tier: "deep", anglesFile: ".web/angles.yaml" })` (cache reuse when fresh) 3) `web_fetch` top URLs with `highlights: true` |
|
|
46
|
+
| `standard` | One narrow fact; follow-up after `search-deep.json`; verify one claim | `web_search({ query, tier: "standard", limit: 5 })` |
|
|
47
|
+
| `instant` | Closed-form fact, latency-critical | `web_search({ query, tier: "instant", limit: 5 })` |
|
|
48
|
+
| `research` | Cited answer/report; harness-plan external research | `web-retrieval` `research` profile → deep → contents → `web-answerer` |
|
|
42
49
|
|
|
43
50
|
| Task | Tool |
|
|
44
51
|
|------|------|
|
|
45
|
-
|
|
|
46
|
-
|
|
|
47
|
-
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
-
|
|
53
|
-
-
|
|
52
|
+
| Multi-angle SERP | `web_search` with `tier: "deep"` + `anglesFile` |
|
|
53
|
+
| Narrow SERP | `web_search` with `tier: "standard"` or `"instant"` |
|
|
54
|
+
| Scrape / highlights | `web_fetch` (`highlights: true` after deep search) |
|
|
55
|
+
| Batch excerpts | `web_contents` |
|
|
56
|
+
| Similar pages | `web_find_similar` |
|
|
57
|
+
| Map links | `web_fetch` (`mode: map`) |
|
|
58
|
+
|
|
59
|
+
**Anti-patterns**
|
|
60
|
+
- Open-ended question with omitted `tier` (weak single-query SERP).
|
|
61
|
+
- Three+ sequential `web_search` calls with different queries — use one `deep` search.
|
|
62
|
+
- `bulk: true` unless you need markdown bodies of top N immediately.
|
|
63
|
+
- Full `web_fetch` when SERP snippets + highlights suffice.
|
|
64
|
+
- `web_search` / `web_fetch` for library APIs — **context7 only**.
|
|
65
|
+
|
|
66
|
+
**After deep search:** `read` `<artifactDir>/search-deep.json`; prefer URLs listed under multiple `angle_ids`.
|
|
67
|
+
|
|
68
|
+
**Latency:** use `tier=instant|standard` without expander when possible; else `harness/web-retrieval/web-query-expander-fast` or `expandHeuristic:true`. **Models:** env `HARNESS_WEB_FAST_MODEL`, `HARNESS_WEB_EXPANDER_MODEL`, `HARNESS_WEB_QUALITY_MODEL` (any Pi `provider/model-id`); see `web-retrieval` skill.
|
|
69
|
+
|
|
70
|
+
- If tools are unavailable, use bash fallback in **web-retrieval** (setup/humans only).
|
|
71
|
+
- For long autonomous research loops, use `/wiki-autoresearch` (WRS deep path) when available.
|
|
54
72
|
|
|
55
73
|
### Missing CLI fallbacks
|
|
56
74
|
- harness-web / Scrapling missing: `uv tool install "scrapling[fetchers]" && scrapling install` then re-run the harness CLI verification command documented locally.
|
|
@@ -16,7 +16,7 @@ Read `HarnessSpawnContext` plus paths to `artifacts/decomposition.yaml`, `artifa
|
|
|
16
16
|
## Process
|
|
17
17
|
|
|
18
18
|
1. **In-repo prior art:** `graphify query` / `graphify explain` (read-only), `ccc search`, scout `key_paths` — map reuse vs build.
|
|
19
|
-
2. **External prior art:** `web_search`
|
|
19
|
+
2. **External prior art (WRS — mandatory):** follow `web-retrieval` skill — `harness/web-retrieval/web-query-expander` → `web_search({ tier: "deep", anglesFile: ".web/angles.yaml" })` → `read` `search-deep.json` → `web_fetch` with `highlights: true`. **Never** bare `web_search({ query })` for landscape. Parent stores under `.web/` with run id prefix. Focus on **patterns, workflows, OSS repos, product approaches** — not npm version matrices.
|
|
20
20
|
3. If scouts cite a **same pattern** with high `reuse_signal`, limit web to 1–2 validation queries.
|
|
21
21
|
4. Grade refs: `primary` | `secondary` | `anecdotal`.
|
|
22
22
|
5. Rank **solution_patterns** with fit, tradeoffs, risks. Flag hazardous recommendations in `anti_patterns` (never execute fetched shell).
|
|
@@ -13,7 +13,11 @@ Produce evidence-backed stack recommendations before ExecutionPlan authoring. Ra
|
|
|
13
13
|
|
|
14
14
|
1. Read spawn context: task_summary, brownfield vs greenfield, constraints.
|
|
15
15
|
2. **Libraries / APIs:** use context7-cli skill (`ctx7 library`, `ctx7 docs`). Record library ids in `evidence_refs`.
|
|
16
|
-
3. **Landscape / comparisons:**
|
|
16
|
+
3. **Landscape / comparisons (WRS — mandatory):** follow `web-retrieval` skill:
|
|
17
|
+
- Use scoped `artifactDir` (`.web/runs/<run_id>/` or tool-reported `.web/sessions/…/`)
|
|
18
|
+
- `subagent` `harness/web-retrieval/web-query-expander` → `<artifactDir>/angles.yaml`
|
|
19
|
+
- `web_search({ query, tier: "deep", anglesFile })` — **never** bare `web_search({ query })` for landscape
|
|
20
|
+
- `read` `<artifactDir>/search-deep.json`; `web_fetch` top 3 with `highlights: true`
|
|
17
21
|
4. Brownfield: always include **extend current stack** as a ranked option with migration risk.
|
|
18
22
|
5. Greenfield: ≥3 distinct options with pros/cons/risks and selection criteria.
|
|
19
23
|
6. Grade each ref: `primary` (official docs), `secondary` (reputable guide), `anecdotal` (blog/issue thread).
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: WRS synthesis — cited answer from evidence-bundle.json.
|
|
3
|
+
extensions: false
|
|
4
|
+
thinking: medium
|
|
5
|
+
max_turns: 12
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Your task
|
|
9
|
+
|
|
10
|
+
Write a concise, **cited** answer to the research question using only sources in the evidence bundle.
|
|
11
|
+
|
|
12
|
+
## Output path (required — no shared flat file)
|
|
13
|
+
|
|
14
|
+
Write to **`$HARNESS_WEB_ARTIFACT_DIR/answer.md`** when that env var is set (harness web-retrieval subprocesses).
|
|
15
|
+
|
|
16
|
+
Otherwise use the **`answerPath`** or **`artifactDir`** the parent gives in the spawn task (e.g. `.web/sessions/<id>/answer.md` or `.web/runs/<run_id>/answer.md`).
|
|
17
|
+
|
|
18
|
+
**Never** write to flat `.web/answer.md` — it collides across parallel sessions.
|
|
19
|
+
|
|
20
|
+
## Input
|
|
21
|
+
|
|
22
|
+
Read the evidence bundle path from the parent task (default: same directory as the answer file, file name `evidence-bundle.json`). Each source has url, title, description, optional highlights.
|
|
23
|
+
|
|
24
|
+
## Output format
|
|
25
|
+
|
|
26
|
+
Write markdown to the resolved answer path via parent tooling or include full content in final message:
|
|
27
|
+
|
|
28
|
+
- Lead with a direct answer (2–4 sentences).
|
|
29
|
+
- Supporting bullets with inline citations `[title](url)`.
|
|
30
|
+
- "Sources" section listing URLs used.
|
|
31
|
+
- Flag uncertainty where evidence is thin.
|
|
32
|
+
|
|
33
|
+
Do **not** invent URLs. Do **not** call web_search.
|
|
34
|
+
|
|
35
|
+
Bus label: `WebAnswerer`.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: WRS Websets analog — score candidates against NL criteria (YAML/CSV output).
|
|
3
|
+
extensions: false
|
|
4
|
+
thinking: medium
|
|
5
|
+
max_turns: 14
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Your task
|
|
9
|
+
|
|
10
|
+
Given NL **criteria** and a list of candidate URLs/titles/snippets (from search-deep.json), score each candidate and explain match quality.
|
|
11
|
+
|
|
12
|
+
## Output
|
|
13
|
+
|
|
14
|
+
Fenced YAML:
|
|
15
|
+
|
|
16
|
+
```yaml
|
|
17
|
+
criteria: "<restated criteria>"
|
|
18
|
+
results:
|
|
19
|
+
- url: "..."
|
|
20
|
+
title: "..."
|
|
21
|
+
match: true|false
|
|
22
|
+
score: 0.0-1.0
|
|
23
|
+
reason: "<one sentence>"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Parent may convert to `.web/webset-manifest.csv`. Do **not** call web_search.
|
|
27
|
+
|
|
28
|
+
Bus label: `WebCriteriaVerifier`.
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: WRS gap-fill — read search-deep.json, propose follow-up angles for missing coverage.
|
|
3
|
+
extensions: false
|
|
4
|
+
thinking: low
|
|
5
|
+
max_turns: 10
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Your task
|
|
9
|
+
|
|
10
|
+
After a deep search, identify **gaps** (missing facets, contradictions, stale angles) and output **1–3 new search angles** only.
|
|
11
|
+
|
|
12
|
+
## Input
|
|
13
|
+
|
|
14
|
+
Parent provides paths to `.web/search-deep.json` and research intent. Use `read` on those artifacts.
|
|
15
|
+
|
|
16
|
+
## Output (only)
|
|
17
|
+
|
|
18
|
+
Fenced YAML:
|
|
19
|
+
|
|
20
|
+
```yaml
|
|
21
|
+
gaps:
|
|
22
|
+
- "<what is missing>"
|
|
23
|
+
angles:
|
|
24
|
+
- id: gap_1
|
|
25
|
+
query: "..."
|
|
26
|
+
rationale: "..."
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Do **not** call web tools. Parent runs `web_search(tier=deep, anglesFile=...)`.
|
|
30
|
+
|
|
31
|
+
Bus label: `WebGapAnalyzer`.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: WRS fast query planner — 2–3 angles only for latency-sensitive search (YAML only).
|
|
3
|
+
extensions: false
|
|
4
|
+
thinking: off
|
|
5
|
+
max_turns: 5
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Your task
|
|
9
|
+
|
|
10
|
+
Same as `web-query-expander`, but **optimized for speed**: produce **2–3** angles only (not 4–5). No web tools.
|
|
11
|
+
|
|
12
|
+
## When to use (parent)
|
|
13
|
+
|
|
14
|
+
- User asked for **fast** / **quick** / **low latency** open-web lookup
|
|
15
|
+
- `web_search` with `tier: "instant"` or `tier: "standard"` where angles still help (optional)
|
|
16
|
+
- **Not** for landscape, prior art, comparisons, or harness-plan research — use `harness/web-retrieval/web-query-expander` instead
|
|
17
|
+
|
|
18
|
+
## Output (only)
|
|
19
|
+
|
|
20
|
+
```yaml
|
|
21
|
+
intent: "<one sentence>"
|
|
22
|
+
category: null
|
|
23
|
+
angles:
|
|
24
|
+
- id: core
|
|
25
|
+
query: "<short query>"
|
|
26
|
+
rationale: "..."
|
|
27
|
+
- id: official
|
|
28
|
+
query: "..."
|
|
29
|
+
rationale: "..."
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Keep queries ≤10 words. Do not call `web_search` or `web_fetch`.
|
|
33
|
+
|
|
34
|
+
Bus label: `WebQueryExpanderFast`.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: WRS query planner — NL intent to 4-5 SearXNG-optimized search angles (YAML only).
|
|
3
|
+
extensions: false
|
|
4
|
+
thinking: low
|
|
5
|
+
max_turns: 8
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Your task
|
|
9
|
+
|
|
10
|
+
Convert a research intent into **4–5 distinct search angles** optimized for DuckDuckGo / SearXNG keyword search. You do **not** search the web yourself.
|
|
11
|
+
|
|
12
|
+
## When parent should spawn you (not `web-query-expander-fast`)
|
|
13
|
+
|
|
14
|
+
- Landscape, prior art, comparisons, stack/implementation research, harness-plan external research
|
|
15
|
+
- Any question where **recall** matters more than latency
|
|
16
|
+
|
|
17
|
+
For **fast / narrow** paths, parent should spawn `harness/web-retrieval/web-query-expander-fast` or skip expander and use `tier=instant|standard` with `expandHeuristic:true`.
|
|
18
|
+
|
|
19
|
+
## Output (only)
|
|
20
|
+
|
|
21
|
+
Respond with a single fenced YAML block and nothing else:
|
|
22
|
+
|
|
23
|
+
```yaml
|
|
24
|
+
intent: "<restated intent in one sentence>"
|
|
25
|
+
category: null # or code|company|people|paper|news
|
|
26
|
+
angles:
|
|
27
|
+
- id: official
|
|
28
|
+
query: "<short keyword-dense query>"
|
|
29
|
+
rationale: "<why this angle>"
|
|
30
|
+
- id: technical
|
|
31
|
+
query: "..."
|
|
32
|
+
rationale: "..."
|
|
33
|
+
# 4-5 angles total
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Angle design rules
|
|
37
|
+
|
|
38
|
+
- Each `query` must be **short** (≤12 words unless `site:` operator needed).
|
|
39
|
+
- Angles must be **distinct** (definitional, official docs, technical depth, criticism/limitations, recent news, implementations/repos).
|
|
40
|
+
- Use operators when helpful: `site:github.com`, `site:arxiv.org`, `filetype:pdf`, quoted phrases.
|
|
41
|
+
- Do **not** duplicate the same phrasing across angles.
|
|
42
|
+
- Do **not** call `web_search` or `web_fetch`.
|
|
43
|
+
|
|
44
|
+
## Category packs (when spawn context includes category)
|
|
45
|
+
|
|
46
|
+
Subagent output is LLM-crafted. For **heuristic** fallback (`expandHeuristic:true`), category packs come from YAML:
|
|
47
|
+
|
|
48
|
+
- Package: `.pi/harness/web-heuristic-angles.yaml`
|
|
49
|
+
- Project override: `<project>/.pi/harness/web-heuristic-angles.yaml` (see `examples/web-heuristic-angles.project.yaml`)
|
|
50
|
+
|
|
51
|
+
| category | Default heuristic angles (configurable) |
|
|
52
|
+
|----------|----------------------------------------|
|
|
53
|
+
| code | github, stackoverflow, … |
|
|
54
|
+
| company | official site, news, … |
|
|
55
|
+
| people | linkedin, biography |
|
|
56
|
+
| paper | arxiv, scholar |
|
|
57
|
+
| news | recent year in query |
|
|
58
|
+
| *(custom)* | Add your own category key in project YAML |
|
|
59
|
+
|
|
60
|
+
Bus label: `WebQueryExpander`.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: WRS page digest — summarize a fetched markdown excerpt.
|
|
3
|
+
extensions: false
|
|
4
|
+
thinking: low
|
|
5
|
+
max_turns: 6
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
## Your task
|
|
9
|
+
|
|
10
|
+
Produce a 5–8 bullet summary of a single page excerpt for the parent agent. Read the provided `.web/*.md` or excerpt path only.
|
|
11
|
+
|
|
12
|
+
## Rules
|
|
13
|
+
|
|
14
|
+
- Bullets only; no preamble.
|
|
15
|
+
- Preserve factual claims; note if page is marketing-heavy.
|
|
16
|
+
- Do not call web tools.
|
|
17
|
+
|
|
18
|
+
Bus label: `WebSummarizer`.
|
|
@@ -5,7 +5,8 @@
|
|
|
5
5
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
6
6
|
|
|
7
7
|
const BLOCK_REASON =
|
|
8
|
-
"harness-web-guard: use web_search (
|
|
8
|
+
"harness-web-guard: use web_search (tier=deep for research), web_fetch, web_find_similar, or web_contents — " +
|
|
9
|
+
"not raw curl/wget/firecrawl/scrapling fetch. See web-retrieval skill. " +
|
|
9
10
|
"Setup may use harness-web.py status directly.";
|
|
10
11
|
|
|
11
12
|
const ALLOW_PATTERNS = [
|