ultimate-pi 0.4.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +15 -0
- package/.agents/skills/harness-sentrux-setup/SKILL.md +1 -1
- package/.agents/skills/scrapling-web/SKILL.md +45 -40
- package/.agents/skills/sentrux/SKILL.md +99 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +3 -3
- package/.pi/SYSTEM.md +12 -13
- package/.pi/agents/pi-pi/agent-expert.md +3 -3
- package/.pi/extensions/harness-web-guard.ts +95 -0
- package/.pi/extensions/harness-web-tools.ts +209 -0
- package/.pi/extensions/lib/harness-web/run-cli.ts +92 -0
- package/.pi/harness/env.harness.template +3 -1
- package/.pi/prompts/harness-setup.md +66 -21
- package/.pi/scripts/harness-cli-verify.sh +12 -3
- package/.pi/scripts/harness-searxng-bootstrap.mjs +270 -0
- package/.pi/scripts/harness-web-search.md +24 -5
- package/.pi/scripts/harness-web.py +24 -7
- package/.pi/scripts/harness_web/config.py +37 -3
- package/.pi/scripts/harness_web/output.py +8 -2
- package/.pi/scripts/harness_web/search.py +22 -0
- package/.pi/scripts/harness_web/search_ddg.py +1 -5
- package/.pi/scripts/harness_web/search_searxng.py +100 -0
- package/CHANGELOG.md +26 -0
- package/package.json +2 -3
- package/.pi/mcp.json +0 -11
|
@@ -19,6 +19,21 @@ description: Structured user decisions via ask_user for harness setup, planning,
|
|
|
19
19
|
3. If the user **cancels** (Esc), stop with `needs_clarification` / `human_required` — do not assume defaults.
|
|
20
20
|
4. **CI / automation only:** pass `--non-interactive` to `/harness-setup` to skip prompts and use documented defaults.
|
|
21
21
|
|
|
22
|
+
## Example (harness-setup — search engine)
|
|
23
|
+
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"question": "Which harness-web search backend should this project use?",
|
|
27
|
+
"context": "Scrapling handles scrape/map/bulk. Search: DDG HTML needs no Docker. SearXNG must be self-hosted — public instances often block JSON and rate-limit API to ~4/hour per IP.",
|
|
28
|
+
"options": [
|
|
29
|
+
{ "title": "DuckDuckGo HTML (default)", "description": "HARNESS_WEB_SEARCH_ENGINE=ddg_html" },
|
|
30
|
+
{ "title": "Self-host SearXNG here (Docker)", "description": "node harness-searxng-bootstrap.mjs" },
|
|
31
|
+
{ "title": "Use existing SearXNG instance", "description": "Freeform base URL → HARNESS_WEB_SEARXNG_URL" }
|
|
32
|
+
],
|
|
33
|
+
"allowFreeform": true
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
22
37
|
## Example (plan — scope)
|
|
23
38
|
|
|
24
39
|
```json
|
|
@@ -40,7 +40,7 @@ Custom TOML **outside** `# --- harness:managed:start/end ---` is preserved on ev
|
|
|
40
40
|
node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs"
|
|
41
41
|
```
|
|
42
42
|
3. Optional: `sentrux plugin add-standard` (language plugins; harness-setup Step 2.8).
|
|
43
|
-
4.
|
|
43
|
+
4. Symlink **sentrux** skill into `.pi/skills/` if missing (see harness-setup Step 4.2).
|
|
44
44
|
5. `sentrux check .` — fix violations or tune manifest `max_cc` / layers.
|
|
45
45
|
6. Commit `.sentrux/rules.toml` and project-specific `architecture.manifest.json`.
|
|
46
46
|
|
|
@@ -1,22 +1,33 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: scrapling-web
|
|
3
3
|
description: |
|
|
4
|
-
Harness web search and scrape via
|
|
5
|
-
non-API web task: search, scrape URLs, map site links, bulk research fetches.
|
|
4
|
+
Harness web search and scrape via pi tools web_search and web_fetch (harness-web.py).
|
|
5
|
+
Use for any non-API web task: search, scrape URLs, map site links, bulk research fetches.
|
|
6
6
|
Replaces Firecrawl in ultimate-pi harness agents. Triggers on: search the web,
|
|
7
|
-
scrape URL, fetch page, research online,
|
|
8
|
-
allowed-tools:
|
|
9
|
-
- Bash(python3 *harness-web.py *)
|
|
10
|
-
- Bash(python3 .pi/scripts/harness-web.py *)
|
|
11
|
-
- Bash(scrapling *)
|
|
7
|
+
scrape URL, fetch page, research online, web_search, web_fetch, .web/ artifacts.
|
|
12
8
|
---
|
|
13
9
|
|
|
14
10
|
# scrapling-web (harness-web)
|
|
15
11
|
|
|
16
|
-
Local web layer for harness agents — **no API keys
|
|
17
|
-
|
|
12
|
+
Local web layer for harness agents — **no API keys** for default search/scrape.
|
|
13
|
+
Pi registers **`web_search`** and **`web_fetch`** (wrap `harness-web.py` with Scrapling bootstrap).
|
|
14
|
+
Optional **self-hosted SearXNG** — see `/harness-setup` Step 4.0b.
|
|
18
15
|
|
|
19
|
-
##
|
|
16
|
+
## Agent tools (preferred)
|
|
17
|
+
|
|
18
|
+
| Task | Tool |
|
|
19
|
+
|------|------|
|
|
20
|
+
| Search (SERP) | `web_search` with `query` |
|
|
21
|
+
| Search + multi-scrape | `web_search` with `bulk: true` |
|
|
22
|
+
| Scrape URL | `web_fetch` with `url` (default mode `scrape`) |
|
|
23
|
+
| Map same-host links | `web_fetch` with `mode: map` |
|
|
24
|
+
| Static / simple page | `web_fetch` with `fast: true` |
|
|
25
|
+
|
|
26
|
+
**Never before search/fetch:** resolve `UP_PKG`, `ls harness-web.py`, `python3 -c "import scrapling"`, or Firecrawl/curl/wget/scrapling CLI for SERP or page fetch.
|
|
27
|
+
|
|
28
|
+
Full JSON/markdown lives under **`.web/`** (gitignored). Use `read` on `output` paths after tool calls.
|
|
29
|
+
|
|
30
|
+
## Install (once per machine — setup/humans only)
|
|
20
31
|
|
|
21
32
|
```bash
|
|
22
33
|
command -v uv &>/dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
@@ -24,25 +35,23 @@ uv tool install "scrapling[fetchers]"
|
|
|
24
35
|
scrapling install # browser binaries for default stealth scrape
|
|
25
36
|
```
|
|
26
37
|
|
|
27
|
-
Verify: `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
|
|
28
|
-
|
|
29
|
-
## Output directory
|
|
38
|
+
Verify: `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
|
|
39
|
+
Config diagnostics: `python3 "$UP_PKG/.pi/scripts/harness-web.py" status` (JSON; setup only)
|
|
30
40
|
|
|
31
|
-
|
|
41
|
+
## Bash fallback (if pi tools unavailable)
|
|
32
42
|
|
|
33
43
|
| Task | Command |
|
|
34
44
|
|------|---------|
|
|
35
45
|
| Search | `python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json --limit 5` |
|
|
36
|
-
| Scrape
|
|
37
|
-
| Fast/static
|
|
38
|
-
| Map
|
|
39
|
-
| Bulk | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk
|
|
46
|
+
| Scrape | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
|
|
47
|
+
| Fast/static | add `--fast` |
|
|
48
|
+
| Map | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json` |
|
|
49
|
+
| Bulk | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/` |
|
|
40
50
|
|
|
41
51
|
## Search JSON shape (Firecrawl-compatible)
|
|
42
52
|
|
|
43
53
|
```bash
|
|
44
54
|
jq -r '.data.web[].url' .web/search.json
|
|
45
|
-
jq -r '.data.web[] | "\(.title): \(.url)"' .web/search.json
|
|
46
55
|
```
|
|
47
56
|
|
|
48
57
|
Each entry: `url`, `title`, `description`.
|
|
@@ -51,43 +60,39 @@ Each entry: `url`, `title`, `description`.
|
|
|
51
60
|
|
|
52
61
|
| Mode | When |
|
|
53
62
|
|------|------|
|
|
54
|
-
| **stealth** (default
|
|
55
|
-
| **fast** (
|
|
63
|
+
| **stealth** (default) | Arbitrary URLs, JS-heavy sites |
|
|
64
|
+
| **fast** (`fast: true` / `--fast`) | Static docs, example.com, localhost |
|
|
56
65
|
| **auto** (`HARNESS_WEB_FETCH_MODE=auto`) | fast for known-static hosts, else stealth |
|
|
57
66
|
|
|
58
|
-
Search
|
|
67
|
+
| Search backend | Behavior |
|
|
68
|
+
|--------------|----------|
|
|
69
|
+
| `ddg_html` (default) | DuckDuckGo HTML SERP |
|
|
70
|
+
| `searxng` | JSON at `HARNESS_WEB_SEARXNG_URL` — bootstrap via `harness-searxng-bootstrap.mjs` |
|
|
59
71
|
|
|
60
72
|
## Environment
|
|
61
73
|
|
|
62
74
|
| Variable | Default | Purpose |
|
|
63
75
|
|----------|---------|---------|
|
|
64
76
|
| `HARNESS_WEB_FETCH_MODE` | `stealth` | `stealth` \| `fast` \| `auto` |
|
|
65
|
-
| `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` |
|
|
66
|
-
| `
|
|
67
|
-
| `HARNESS_WEB_RATE_LIMIT_MS` | `2000` | Delay between bulk scrapes |
|
|
68
|
-
| `HARNESS_WEB_TIMEOUT_MS` | `30000` | Per-request timeout |
|
|
77
|
+
| `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | `ddg_html` \| `searxng` |
|
|
78
|
+
| `HARNESS_WEB_SEARXNG_URL` | (unset) | Required when `SEARCH_ENGINE=searxng` |
|
|
69
79
|
|
|
70
80
|
## Escalation
|
|
71
81
|
|
|
72
|
-
1. `
|
|
73
|
-
2. `
|
|
74
|
-
3. `
|
|
75
|
-
4.
|
|
82
|
+
1. `web_search` / `web_fetch`
|
|
83
|
+
2. `web_fetch` with `fast: true` for static hosts
|
|
84
|
+
3. `web_fetch` with `mode: map` then targeted fetches
|
|
85
|
+
4. Site-specific Scrapling only when tools are insufficient (not for routine SERP/fetch)
|
|
76
86
|
|
|
77
|
-
## Gaps vs
|
|
87
|
+
## Gaps vs Firecrawl
|
|
78
88
|
|
|
79
89
|
| Firecrawl | Harness path |
|
|
80
90
|
|-----------|----------------|
|
|
81
|
-
| `interact` |
|
|
82
|
-
| `agent`
|
|
83
|
-
| `parse` (
|
|
84
|
-
| `crawl`
|
|
91
|
+
| `interact` | gstack browse or manual browser |
|
|
92
|
+
| `agent` | Agent reasoning + graphify |
|
|
93
|
+
| `parse` (PDF) | pypdf, markitdown |
|
|
94
|
+
| `crawl` | `web_search` bulk or map + multiple `web_fetch` |
|
|
85
95
|
|
|
86
96
|
## Ethics
|
|
87
97
|
|
|
88
98
|
Respect site terms and rate limits. SERP scraping is for dev research, not high-volume harvesting.
|
|
89
|
-
See [Scrapling ethical considerations](https://scrapling.readthedocs.io/en/latest/cli/extract-commands.html#legal-and-ethical-considerations).
|
|
90
|
-
|
|
91
|
-
## Drawbacks of default stealth scrape
|
|
92
|
-
|
|
93
|
-
Higher latency and RAM (Chromium per session). Use `--fast` for static docs; reuse one `bulk-scrape` run (single `StealthySession`) instead of many cold starts.
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: sentrux
|
|
3
|
+
description: |
|
|
4
|
+
Architectural quality sensor for AI-assisted code — rules, modularity, session baselines,
|
|
5
|
+
and degradation detection via the Sentrux CLI (not MCP in Pi sessions).
|
|
6
|
+
Use when the user mentions sentrux, quality signal, architectural health, gate, check rules,
|
|
7
|
+
modularity, session baseline, degradation, structural drift, or before/after agent work on
|
|
8
|
+
a harness project. Triggers on: sentrux check, sentrux gate, rules.toml, quality gate.
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# sentrux (CLI + harness)
|
|
12
|
+
|
|
13
|
+
[Sentrux](https://github.com/sentrux/sentrux) scans project structure, enforces `.sentrux/rules.toml`, and compares sessions with **gate** baselines. In **Pi**, use the **`sentrux` binary and bash** — Pi does **not** load `.pi/mcp.json`, so Sentrux MCP tools are unavailable in Pi agent sessions.
|
|
14
|
+
|
|
15
|
+
## Install (once per machine)
|
|
16
|
+
|
|
17
|
+
| Platform | Command |
|
|
18
|
+
|----------|---------|
|
|
19
|
+
| macOS | `brew install sentrux/tap/sentrux` |
|
|
20
|
+
| Linux | `curl -fsSL https://raw.githubusercontent.com/sentrux/sentrux/main/install.sh \| sh` |
|
|
21
|
+
|
|
22
|
+
Verify:
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
command -v sentrux && sentrux --version
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Harness setup also checks via `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"` (resolve `UP_PKG` in `.pi/scripts/README.md`).
|
|
29
|
+
|
|
30
|
+
Optional language plugins (52 tree-sitter parsers):
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
sentrux plugin add-standard
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Core workflows (project root)
|
|
37
|
+
|
|
38
|
+
Run from the **target repo root** (where `.sentrux/rules.toml` lives).
|
|
39
|
+
|
|
40
|
+
| When | Command | Notes |
|
|
41
|
+
|------|---------|-------|
|
|
42
|
+
| CI / pre-commit | `sentrux check .` | Exit 0 = pass, 1 = violations |
|
|
43
|
+
| Before agent work | `sentrux gate --save .` | Save session baseline |
|
|
44
|
+
| After agent work | `sentrux gate .` | Detect degradation vs baseline |
|
|
45
|
+
| Explore structure | `sentrux` or `sentrux .` | GUI treemap (optional) |
|
|
46
|
+
|
|
47
|
+
Typical agent loop:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
sentrux gate --save .
|
|
51
|
+
# … agent edits …
|
|
52
|
+
sentrux check . # rules still pass?
|
|
53
|
+
sentrux gate . # structural regression?
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
If `check` fails, fix violations or tune manifest constraints (see **Rules** below). If `gate` reports degradation, inspect changed modules before merging.
|
|
57
|
+
|
|
58
|
+
## Rules (`.sentrux/rules.toml`)
|
|
59
|
+
|
|
60
|
+
Committed rules file at repo root. Harness projects sync it from `.pi/harness/sentrux/architecture.manifest.json`.
|
|
61
|
+
|
|
62
|
+
| Task | Skill / command |
|
|
63
|
+
|------|-----------------|
|
|
64
|
+
| First bootstrap, manifest → rules | **harness-sentrux-setup** — `node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs"` |
|
|
65
|
+
| Manifest edited | bootstrap `--force` or `/harness-sentrux-sync` |
|
|
66
|
+
| CI drift only | `node "$UP_PKG/.pi/scripts/sentrux-rules-sync.mjs" --check` |
|
|
67
|
+
|
|
68
|
+
Custom TOML outside `# --- harness:managed:start/end ---` is preserved on sync. Do **not** hand-edit managed blocks without updating the manifest.
|
|
69
|
+
|
|
70
|
+
## Harness integration
|
|
71
|
+
|
|
72
|
+
| Piece | Role |
|
|
73
|
+
|-------|------|
|
|
74
|
+
| `sentrux-rules-sync` extension | Session start: warns if `rules.toml` drifts; auto-sync after plan/merge phases |
|
|
75
|
+
| `/harness-sentrux-sync` | Force-regenerate rules from manifest (pi command) |
|
|
76
|
+
| `harness-verify.mjs` | Runs `sentrux check .` when rules present |
|
|
77
|
+
| **observation-bus** | Maps `harness-sentrux-signal` custom entries → evaluator observations |
|
|
78
|
+
| **harness-eval** | Evaluate phase may require a Sentrux quality signal (stub or future MCP) per ADR 0006 |
|
|
79
|
+
|
|
80
|
+
High level: **execute** uses CLI gate/check around edits; **evaluate** consumes observation-bus quality signals (`harness-sentrux-signal`) alongside tests and policy. Record CLI outcomes in session notes when no bus entry exists yet.
|
|
81
|
+
|
|
82
|
+
## Related skills
|
|
83
|
+
|
|
84
|
+
- **harness-sentrux-setup** — manifest seeding, rules bootstrap, sync semantics (do not duplicate here)
|
|
85
|
+
- **harness-eval** — verdict + Sentrux signal requirements
|
|
86
|
+
- **harness-governor** — when to re-sync after architecture changes
|
|
87
|
+
|
|
88
|
+
## Do not
|
|
89
|
+
|
|
90
|
+
- Assume Sentrux **MCP** tools (`scan`, `session_start`, `health`, etc.) exist in **Pi** — they do not; use CLI only
|
|
91
|
+
- Edit or rely on `.pi/mcp.json` for Pi sessions
|
|
92
|
+
- Duplicate bootstrap/sync steps from **harness-sentrux-setup**
|
|
93
|
+
- Skip `sentrux check .` after large refactors when `.sentrux/rules.toml` exists
|
|
94
|
+
|
|
95
|
+
## References
|
|
96
|
+
|
|
97
|
+
- ADR 0006 — `.pi/harness/docs/adrs/0006-sentrux-dual-layer.md`
|
|
98
|
+
- ADR 0009 — `.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md`
|
|
99
|
+
- `CONTRIBUTING.md` — Sentrux quick start
|
|
@@ -8,7 +8,7 @@ description: >
|
|
|
8
8
|
Triggers on: "/wiki-autoresearch", "/autoresearch", "wiki-autoresearch", "autoresearch",
|
|
9
9
|
"research [topic]", "deep dive into [topic]", "investigate [topic]",
|
|
10
10
|
"find everything about [topic]", "research and file", "go research", "build a wiki on".
|
|
11
|
-
allowed-tools: Read Write Edit Glob Grep
|
|
11
|
+
allowed-tools: Read Write Edit Glob Grep web_search web_fetch Bash
|
|
12
12
|
---
|
|
13
13
|
|
|
14
14
|
# wiki-autoresearch: Autonomous Research Loop with Graphify
|
|
@@ -129,8 +129,8 @@ Input: topic (from Topic Selection, above)
|
|
|
129
129
|
|
|
130
130
|
Round 1. Broad search
|
|
131
131
|
1. Decompose topic into 3-5 distinct search angles
|
|
132
|
-
2. For each angle: run 2-3
|
|
133
|
-
3. For top 2-3 results per angle:
|
|
132
|
+
2. For each angle: run 2-3 `web_search` queries
|
|
133
|
+
3. For top 2-3 results per angle: `web_fetch` each URL (or `read` `.web/` artifacts)
|
|
134
134
|
4. Save each fetched page to ./raw/ as a markdown file
|
|
135
135
|
5. Extract from each: key claims, entities, concepts, open questions
|
|
136
136
|
|
package/.pi/SYSTEM.md
CHANGED
|
@@ -23,26 +23,25 @@ You are an enterprise coding agent. Optimize for correctness, minimal diffs, and
|
|
|
23
23
|
## Web Policy (Mandatory)
|
|
24
24
|
|
|
25
25
|
> [!warning] No raw HTTP
|
|
26
|
-
> Route **all** web
|
|
26
|
+
> Route **all** web through [[context7]] (API/library docs) or **`web_search` / `web_fetch`** ([[scrapling-web]]). No `curl`, `wget`, Firecrawl, or scrapling CLI preflight.
|
|
27
27
|
|
|
28
28
|
### API / Library Docs — context7 ONLY
|
|
29
29
|
- `ctx7 library <name> <query>` then `ctx7 docs <id> <query>`
|
|
30
30
|
- context7 owns: function signatures, class APIs, config options, stdlib, framework specs.
|
|
31
|
-
- **Never** use quality-sites for API docs.
|
|
31
|
+
- **Never** use quality-sites or web_fetch for API docs.
|
|
32
32
|
|
|
33
|
-
### All Non-API Web
|
|
34
|
-
See `.agents/skills/scrapling-web/SKILL.md`
|
|
33
|
+
### All Non-API Web — web_search + web_fetch
|
|
34
|
+
See `.agents/skills/scrapling-web/SKILL.md`. **No preflight:** never resolve `UP_PKG`, `ls harness-web.py`, or `python3 -c "import scrapling"` before searching.
|
|
35
35
|
|
|
36
|
-
| Task |
|
|
37
|
-
|
|
38
|
-
| Search (
|
|
39
|
-
| Scrape
|
|
40
|
-
|
|
|
41
|
-
| Map same-host links | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json` |
|
|
42
|
-
| Bulk search + scrape | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/` |
|
|
36
|
+
| Task | Tool |
|
|
37
|
+
|------|------|
|
|
38
|
+
| Search (SERP) | `web_search` (`query`, optional `limit`, `bulk`) |
|
|
39
|
+
| Scrape page | `web_fetch` (`url`, optional `fast: true`) |
|
|
40
|
+
| Map links | `web_fetch` (`url`, `mode: map`) |
|
|
43
41
|
|
|
44
|
-
- **Artifacts:**
|
|
45
|
-
- **
|
|
42
|
+
- **Artifacts:** default under `.web/`; use `read` for full JSON/markdown.
|
|
43
|
+
- **Fallback** (tools unavailable): `python3 "$UP_PKG/.pi/scripts/harness-web.py" …` per scrapling-web skill.
|
|
44
|
+
- **Setup diagnostics only:** `harness-web.py status` (JSON config).
|
|
46
45
|
- **Quality sites:** check `.agents/skills/wiki-autoresearch/references/quality-sites.md` before citing non-API sources. Prefer Tier 1 (StackOverflow, GitHub issues, engineering blogs, arxiv). Exclude AI content farms, mirrors, stale packages.
|
|
47
46
|
- **Research:** use `/wiki-autoresearch <topic>` for deep research. Results are graphified into `graphify-out/`.
|
|
48
47
|
|
|
@@ -187,9 +187,9 @@ Before answering ANY question, search the local codebase for existing agent defi
|
|
|
187
187
|
find .pi/agents -name "*.md" -type f 2>/dev/null
|
|
188
188
|
```
|
|
189
189
|
|
|
190
|
-
Fetch the latest pi-subagents docs:
|
|
191
|
-
```
|
|
192
|
-
|
|
190
|
+
Fetch the latest pi-subagents docs (use `web_fetch` with `fast: true` for raw GitHub):
|
|
191
|
+
```
|
|
192
|
+
web_fetch url="https://raw.githubusercontent.com/tintinweb/pi-subagents/refs/heads/master/README.md" fast=true output=.web/pi-subagents-readme.md
|
|
193
193
|
```
|
|
194
194
|
|
|
195
195
|
## How to Respond
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* harness-web-guard — block bash that bypasses web_search / web_fetch tools.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
6
|
+
|
|
7
|
+
const BLOCK_REASON =
|
|
8
|
+
"harness-web-guard: use web_search (SERP) or web_fetch (page content) instead of raw curl/wget/firecrawl/scrapling fetch. " +
|
|
9
|
+
"Setup may use harness-web.py status directly.";
|
|
10
|
+
|
|
11
|
+
const ALLOW_PATTERNS = [
|
|
12
|
+
/harness-web\.py\b/i,
|
|
13
|
+
/harness-cli-verify\.sh\b/i,
|
|
14
|
+
/\bgraphify\b/i,
|
|
15
|
+
/\bctx7\b/i,
|
|
16
|
+
/\bcontext7\b/i,
|
|
17
|
+
/\bgit\b/i,
|
|
18
|
+
/harness-searxng-bootstrap/i,
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
const BLOCK_PATTERNS: Array<{ re: RegExp; note: string }> = [
|
|
22
|
+
{ re: /\bfirecrawl\b/i, note: "firecrawl" },
|
|
23
|
+
{
|
|
24
|
+
re: /\b(?:curl|wget)\b[^\n|;&]*\s+https?:\/\//i,
|
|
25
|
+
note: "curl/wget http(s)",
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
re: /\bscrapling\s+(?:fetch|extract)\b/i,
|
|
29
|
+
note: "scrapling fetch/extract",
|
|
30
|
+
},
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
function isBootstrapPrompt(prompt: string): boolean {
|
|
34
|
+
const p = prompt.toLowerCase();
|
|
35
|
+
return (
|
|
36
|
+
p.includes("/harness-setup") ||
|
|
37
|
+
p.includes("harness-setup") ||
|
|
38
|
+
p.includes("full harness bootstrap")
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function latestUserPrompt(ctx: {
|
|
43
|
+
sessionManager: { getEntries(): unknown[] };
|
|
44
|
+
}): string {
|
|
45
|
+
const entries = ctx.sessionManager.getEntries() as Array<{
|
|
46
|
+
type?: string;
|
|
47
|
+
message?: { role?: string; content?: unknown };
|
|
48
|
+
}>;
|
|
49
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
50
|
+
const entry = entries[i];
|
|
51
|
+
if (entry?.message?.role !== "user") continue;
|
|
52
|
+
const content = entry.message.content;
|
|
53
|
+
if (typeof content === "string") return content;
|
|
54
|
+
if (Array.isArray(content)) {
|
|
55
|
+
return content
|
|
56
|
+
.map((part) =>
|
|
57
|
+
typeof part === "object" && part && "text" in part
|
|
58
|
+
? String((part as { text?: string }).text ?? "")
|
|
59
|
+
: "",
|
|
60
|
+
)
|
|
61
|
+
.join("\n");
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return "";
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function isAllowedBash(command: string): boolean {
|
|
68
|
+
return ALLOW_PATTERNS.some((re) => re.test(command));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function blockedWebBash(command: string): string | null {
|
|
72
|
+
if (isAllowedBash(command)) return null;
|
|
73
|
+
for (const { re, note } of BLOCK_PATTERNS) {
|
|
74
|
+
if (re.test(command)) return note;
|
|
75
|
+
}
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export default function harnessWebGuard(pi: ExtensionAPI) {
|
|
80
|
+
pi.on("tool_call", async (event, ctx) => {
|
|
81
|
+
if (event.toolName !== "bash") return undefined;
|
|
82
|
+
|
|
83
|
+
const prompt = latestUserPrompt(ctx);
|
|
84
|
+
if (isBootstrapPrompt(prompt)) return undefined;
|
|
85
|
+
|
|
86
|
+
const command = String((event.input as { command?: string }).command ?? "");
|
|
87
|
+
const hit = blockedWebBash(command);
|
|
88
|
+
if (!hit) return undefined;
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
block: true,
|
|
92
|
+
reason: `${BLOCK_REASON} (matched: ${hit})`,
|
|
93
|
+
};
|
|
94
|
+
});
|
|
95
|
+
}
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* harness-web-tools — web_search + web_fetch pi tools wrapping harness-web.py.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
6
|
+
import { Type } from "@sinclair/typebox";
|
|
7
|
+
import {
|
|
8
|
+
harnessWebContextLine,
|
|
9
|
+
readTextExcerpt,
|
|
10
|
+
runHarnessWeb,
|
|
11
|
+
summarizeSearchJson,
|
|
12
|
+
} from "./lib/harness-web/run-cli.js";
|
|
13
|
+
|
|
14
|
+
// @ts-expect-error pi extensions run as ESM
|
|
15
|
+
const MODULE_URL = import.meta.url;
|
|
16
|
+
|
|
17
|
+
const WEB_SEARCH_GUIDELINES = [
|
|
18
|
+
"Use web_search for open-web SERP — never preflight UP_PKG, ls harness-web.py, or python3 -c import scrapling.",
|
|
19
|
+
"Never use Firecrawl, curl/wget for search, or scrapling CLI for SERP.",
|
|
20
|
+
"After search, use web_fetch on URLs or read the output JSON under .web/.",
|
|
21
|
+
"Use bulk:true only when you need search plus multi-page scrape in one step.",
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
const WEB_FETCH_GUIDELINES = [
|
|
25
|
+
"Use web_fetch for page markdown or same-host link maps — never curl/wget the URL.",
|
|
26
|
+
"Never use raw scrapling CLI for fetch; harness-web handles Scrapling bootstrap.",
|
|
27
|
+
"Library API documentation → context7 only, not web_fetch.",
|
|
28
|
+
"Set fast:true for static docs (example.com, raw HTML docs, localhost).",
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
const WebSearchSchema = Type.Object({
|
|
32
|
+
query: Type.String({ description: "Search query" }),
|
|
33
|
+
limit: Type.Optional(
|
|
34
|
+
Type.Number({
|
|
35
|
+
description: "Max results (default 5)",
|
|
36
|
+
minimum: 1,
|
|
37
|
+
maximum: 20,
|
|
38
|
+
}),
|
|
39
|
+
),
|
|
40
|
+
output: Type.Optional(
|
|
41
|
+
Type.String({
|
|
42
|
+
description:
|
|
43
|
+
"Output path (default .web/search.json or .web/bulk for bulk)",
|
|
44
|
+
}),
|
|
45
|
+
),
|
|
46
|
+
bulk: Type.Optional(
|
|
47
|
+
Type.Boolean({
|
|
48
|
+
description:
|
|
49
|
+
"If true, run bulk-scrape (search then scrape top URLs to output directory)",
|
|
50
|
+
default: false,
|
|
51
|
+
}),
|
|
52
|
+
),
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const WebFetchSchema = Type.Object({
|
|
56
|
+
url: Type.String({ description: "URL to fetch" }),
|
|
57
|
+
mode: Type.Optional(
|
|
58
|
+
Type.Union([Type.Literal("scrape"), Type.Literal("map")], {
|
|
59
|
+
description: "scrape (markdown) or map (same-host links JSON)",
|
|
60
|
+
default: "scrape",
|
|
61
|
+
}),
|
|
62
|
+
),
|
|
63
|
+
output: Type.Optional(
|
|
64
|
+
Type.String({ description: "Output file path under .web/" }),
|
|
65
|
+
),
|
|
66
|
+
fast: Type.Optional(
|
|
67
|
+
Type.Boolean({
|
|
68
|
+
description: "Use fast HTTP scrape (static/simple pages)",
|
|
69
|
+
default: false,
|
|
70
|
+
}),
|
|
71
|
+
),
|
|
72
|
+
limit: Type.Optional(
|
|
73
|
+
Type.Number({
|
|
74
|
+
description: "For map mode: max links (default 100)",
|
|
75
|
+
minimum: 1,
|
|
76
|
+
maximum: 500,
|
|
77
|
+
}),
|
|
78
|
+
),
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
function failResult(text: string) {
|
|
82
|
+
return {
|
|
83
|
+
content: [{ type: "text" as const, text }],
|
|
84
|
+
details: { ok: false },
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function okResult(text: string, details: Record<string, unknown> = {}) {
|
|
89
|
+
return {
|
|
90
|
+
content: [{ type: "text" as const, text }],
|
|
91
|
+
details: { ok: true, ...details },
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function sessionCwd(ctx: { cwd?: string }): string {
|
|
96
|
+
return ctx.cwd ?? process.cwd();
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export default function harnessWebTools(pi: ExtensionAPI) {
|
|
100
|
+
pi.on("before_agent_start", async (event) => {
|
|
101
|
+
return {
|
|
102
|
+
systemPrompt: `${event.systemPrompt}\n\n${harnessWebContextLine()}`,
|
|
103
|
+
};
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
pi.registerTool({
|
|
107
|
+
name: "web_search",
|
|
108
|
+
label: "Web Search",
|
|
109
|
+
description:
|
|
110
|
+
"Search the web via harness-web (DuckDuckGo HTML or self-hosted SearXNG from .env). Returns result summaries and output path.",
|
|
111
|
+
promptSnippet: "SERP via configured engine (ddg_html or searxng from .env)",
|
|
112
|
+
promptGuidelines: WEB_SEARCH_GUIDELINES,
|
|
113
|
+
parameters: WebSearchSchema,
|
|
114
|
+
|
|
115
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
116
|
+
const cwd = sessionCwd(ctx);
|
|
117
|
+
const query = String(params.query ?? "").trim();
|
|
118
|
+
if (!query) return failResult("web_search: query is required.");
|
|
119
|
+
|
|
120
|
+
const limit = typeof params.limit === "number" ? params.limit : 5;
|
|
121
|
+
const bulk = params.bulk === true;
|
|
122
|
+
const output = String(
|
|
123
|
+
params.output ?? (bulk ? ".web/bulk" : ".web/search.json"),
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
const argv = bulk
|
|
127
|
+
? ["bulk-scrape", query, "-o", output, "--limit", String(limit)]
|
|
128
|
+
: ["search", query, "-o", output, "--limit", String(limit)];
|
|
129
|
+
|
|
130
|
+
const run = runHarnessWeb(MODULE_URL, argv, cwd);
|
|
131
|
+
if (!run.ok) {
|
|
132
|
+
const hint =
|
|
133
|
+
"\n\nHints: run /harness-setup; for searxng set HARNESS_WEB_SEARXNG_URL; " +
|
|
134
|
+
"enable json in SearXNG search.formats.";
|
|
135
|
+
return failResult(
|
|
136
|
+
`web_search failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}${hint}`,
|
|
137
|
+
);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const parts = [run.stdout];
|
|
141
|
+
if (!bulk) {
|
|
142
|
+
const summary = summarizeSearchJson(output, cwd);
|
|
143
|
+
if (summary) {
|
|
144
|
+
parts.push("", summary);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
parts.push("", `output: ${output}`);
|
|
148
|
+
parts.push("Use read tool for full JSON, or web_fetch on result URLs.");
|
|
149
|
+
|
|
150
|
+
return okResult(parts.join("\n"), {
|
|
151
|
+
output,
|
|
152
|
+
query,
|
|
153
|
+
bulk,
|
|
154
|
+
engine: process.env.HARNESS_WEB_SEARCH_ENGINE,
|
|
155
|
+
});
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
pi.registerTool({
|
|
160
|
+
name: "web_fetch",
|
|
161
|
+
label: "Web Fetch",
|
|
162
|
+
description:
|
|
163
|
+
"Fetch a URL via harness-web/Scrapling (scrape to markdown or map same-host links).",
|
|
164
|
+
promptSnippet: "Scrape/map URL via Scrapling (harness-web)",
|
|
165
|
+
promptGuidelines: WEB_FETCH_GUIDELINES,
|
|
166
|
+
parameters: WebFetchSchema,
|
|
167
|
+
|
|
168
|
+
async execute(_id, params, _signal, _onUpdate, ctx) {
|
|
169
|
+
const cwd = sessionCwd(ctx);
|
|
170
|
+
const url = String(params.url ?? "").trim();
|
|
171
|
+
if (!url) return failResult("web_fetch: url is required.");
|
|
172
|
+
|
|
173
|
+
const mode = params.mode === "map" ? "map" : "scrape";
|
|
174
|
+
const fast = params.fast === true;
|
|
175
|
+
const limit = typeof params.limit === "number" ? params.limit : 100;
|
|
176
|
+
const defaultOut = mode === "map" ? ".web/map.json" : ".web/page.md";
|
|
177
|
+
const output = String(params.output ?? defaultOut);
|
|
178
|
+
|
|
179
|
+
const argv =
|
|
180
|
+
mode === "map"
|
|
181
|
+
? [
|
|
182
|
+
"map",
|
|
183
|
+
url,
|
|
184
|
+
"-o",
|
|
185
|
+
output,
|
|
186
|
+
"--limit",
|
|
187
|
+
String(limit),
|
|
188
|
+
...(fast ? ["--fast"] : []),
|
|
189
|
+
]
|
|
190
|
+
: ["scrape", url, "-o", output, ...(fast ? ["--fast"] : [])];
|
|
191
|
+
|
|
192
|
+
const run = runHarnessWeb(MODULE_URL, argv, cwd);
|
|
193
|
+
if (!run.ok) {
|
|
194
|
+
return failResult(
|
|
195
|
+
`web_fetch failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}\n` +
|
|
196
|
+
"Try fast:true for static pages, or run harness-cli-verify for Scrapling install.",
|
|
197
|
+
);
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const parts = [run.stdout, "", `output: ${output}`];
|
|
201
|
+
const excerpt = readTextExcerpt(output, cwd);
|
|
202
|
+
if (excerpt) {
|
|
203
|
+
parts.push("", "--- excerpt ---", excerpt);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
return okResult(parts.join("\n"), { output, url, mode });
|
|
207
|
+
},
|
|
208
|
+
});
|
|
209
|
+
}
|