ultimate-pi 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/.agents/skills/harness-decisions/SKILL.md +15 -0
  2. package/.agents/skills/scrapling-web/SKILL.md +45 -40
  3. package/.agents/skills/wiki-autoresearch/SKILL.md +3 -3
  4. package/.pi/PACKAGING.md +3 -2
  5. package/.pi/SYSTEM.md +12 -13
  6. package/.pi/agents/pi-pi/agent-expert.md +3 -3
  7. package/.pi/extensions/harness-web-guard.ts +95 -0
  8. package/.pi/extensions/harness-web-tools.ts +209 -0
  9. package/.pi/extensions/lib/harness-vcc-settings.ts +50 -0
  10. package/.pi/extensions/lib/harness-web/run-cli.ts +92 -0
  11. package/.pi/extensions/ultimate-pi-vcc.ts +17 -0
  12. package/.pi/harness/docs/adrs/0030-inhouse-vcc-compaction.md +40 -0
  13. package/.pi/harness/docs/adrs/README.md +1 -0
  14. package/.pi/harness/env.harness.template +3 -1
  15. package/.pi/prompts/harness-setup.md +48 -2
  16. package/.pi/scripts/harness-cli-verify.sh +12 -3
  17. package/.pi/scripts/harness-searxng-bootstrap.mjs +270 -0
  18. package/.pi/scripts/harness-web-search.md +24 -5
  19. package/.pi/scripts/harness-web.py +24 -7
  20. package/.pi/scripts/harness_web/config.py +37 -3
  21. package/.pi/scripts/harness_web/output.py +8 -2
  22. package/.pi/scripts/harness_web/search.py +22 -0
  23. package/.pi/scripts/harness_web/search_ddg.py +1 -5
  24. package/.pi/scripts/harness_web/search_searxng.py +100 -0
  25. package/.pi/scripts/vendor-pi-vcc-settings.stub.ts +8 -0
  26. package/.pi/scripts/vendor-sync-pi-vcc.sh +40 -0
  27. package/.pi/settings.example.json +1 -6
  28. package/CHANGELOG.md +20 -6
  29. package/THIRD_PARTY_NOTICES.md +8 -22
  30. package/package.json +7 -6
  31. package/vendor/pi-vcc/README.md +215 -0
  32. package/vendor/pi-vcc/UPSTREAM_PIN.md +12 -0
  33. package/vendor/pi-vcc/demo.gif +0 -0
  34. package/vendor/pi-vcc/index.ts +12 -0
  35. package/vendor/pi-vcc/package.json +26 -0
  36. package/vendor/pi-vcc/scripts/audit-sessions.ts +88 -0
  37. package/vendor/pi-vcc/scripts/benchmark-real-sessions.ts +25 -0
  38. package/vendor/pi-vcc/scripts/compare-before-after.ts +36 -0
  39. package/vendor/pi-vcc/scripts/dump-branch-output.ts +20 -0
  40. package/vendor/pi-vcc/src/commands/pi-vcc.ts +36 -0
  41. package/vendor/pi-vcc/src/commands/vcc-recall.ts +65 -0
  42. package/vendor/pi-vcc/src/core/brief.ts +381 -0
  43. package/vendor/pi-vcc/src/core/build-sections.ts +79 -0
  44. package/vendor/pi-vcc/src/core/content.ts +60 -0
  45. package/vendor/pi-vcc/src/core/filter-noise.ts +42 -0
  46. package/vendor/pi-vcc/src/core/format-recall.ts +27 -0
  47. package/vendor/pi-vcc/src/core/format.ts +49 -0
  48. package/vendor/pi-vcc/src/core/lineage.ts +26 -0
  49. package/vendor/pi-vcc/src/core/load-messages.ts +41 -0
  50. package/vendor/pi-vcc/src/core/normalize.ts +66 -0
  51. package/vendor/pi-vcc/src/core/recall-scope.ts +14 -0
  52. package/vendor/pi-vcc/src/core/render-entries.ts +55 -0
  53. package/vendor/pi-vcc/src/core/report.ts +237 -0
  54. package/vendor/pi-vcc/src/core/sanitize.ts +5 -0
  55. package/vendor/pi-vcc/src/core/search-entries.ts +221 -0
  56. package/vendor/pi-vcc/src/core/settings.ts +8 -0
  57. package/vendor/pi-vcc/src/core/skill-collapse.ts +35 -0
  58. package/vendor/pi-vcc/src/core/summarize.ts +157 -0
  59. package/vendor/pi-vcc/src/core/tool-args.ts +14 -0
  60. package/vendor/pi-vcc/src/details.ts +7 -0
  61. package/vendor/pi-vcc/src/extract/commits.ts +69 -0
  62. package/vendor/pi-vcc/src/extract/files.ts +80 -0
  63. package/vendor/pi-vcc/src/extract/goals.ts +79 -0
  64. package/vendor/pi-vcc/src/extract/preferences.ts +55 -0
  65. package/vendor/pi-vcc/src/hooks/before-compact.ts +314 -0
  66. package/vendor/pi-vcc/src/sections.ts +12 -0
  67. package/vendor/pi-vcc/src/tools/recall.ts +109 -0
  68. package/vendor/pi-vcc/src/types.ts +14 -0
  69. package/vendor/pi-vcc/tests/before-compact-hook.test.ts +204 -0
  70. package/vendor/pi-vcc/tests/before-compact.test.ts +145 -0
  71. package/vendor/pi-vcc/tests/brief.test.ts +206 -0
  72. package/vendor/pi-vcc/tests/build-sections.test.ts +59 -0
  73. package/vendor/pi-vcc/tests/compile.test.ts +80 -0
  74. package/vendor/pi-vcc/tests/content.test.ts +31 -0
  75. package/vendor/pi-vcc/tests/extract-goals.test.ts +86 -0
  76. package/vendor/pi-vcc/tests/extract-preferences.test.ts +30 -0
  77. package/vendor/pi-vcc/tests/filter-noise.test.ts +61 -0
  78. package/vendor/pi-vcc/tests/fixtures.ts +61 -0
  79. package/vendor/pi-vcc/tests/format-recall.test.ts +30 -0
  80. package/vendor/pi-vcc/tests/format.test.ts +62 -0
  81. package/vendor/pi-vcc/tests/lineage.test.ts +33 -0
  82. package/vendor/pi-vcc/tests/load-messages.test.ts +51 -0
  83. package/vendor/pi-vcc/tests/normalize.test.ts +97 -0
  84. package/vendor/pi-vcc/tests/real-sessions.test.ts +38 -0
  85. package/vendor/pi-vcc/tests/recall-expand.test.ts +15 -0
  86. package/vendor/pi-vcc/tests/recall-scope.test.ts +32 -0
  87. package/vendor/pi-vcc/tests/recall-tool-scope.test.ts +67 -0
  88. package/vendor/pi-vcc/tests/render-entries.test.ts +62 -0
  89. package/vendor/pi-vcc/tests/report.test.ts +44 -0
  90. package/vendor/pi-vcc/tests/sanitize.test.ts +24 -0
  91. package/vendor/pi-vcc/tests/search-entries.test.ts +144 -0
  92. package/vendor/pi-vcc/tests/support/load-session.ts +23 -0
  93. package/vendor/pi-vcc/tests/support/real-sessions.ts +51 -0
  94. package/.pi/pi-vcc-config.json +0 -4
@@ -19,6 +19,21 @@ description: Structured user decisions via ask_user for harness setup, planning,
19
19
  3. If the user **cancels** (Esc), stop with `needs_clarification` / `human_required` — do not assume defaults.
20
20
  4. **CI / automation only:** pass `--non-interactive` to `/harness-setup` to skip prompts and use documented defaults.
21
21
 
22
+ ## Example (harness-setup — search engine)
23
+
24
+ ```json
25
+ {
26
+ "question": "Which harness-web search backend should this project use?",
27
+ "context": "Scrapling handles scrape/map/bulk. Search: DDG HTML needs no Docker. SearXNG must be self-hosted — public instances often block JSON and rate-limit API to ~4/hour per IP.",
28
+ "options": [
29
+ { "title": "DuckDuckGo HTML (default)", "description": "HARNESS_WEB_SEARCH_ENGINE=ddg_html" },
30
+ { "title": "Self-host SearXNG here (Docker)", "description": "node harness-searxng-bootstrap.mjs" },
31
+ { "title": "Use existing SearXNG instance", "description": "Freeform base URL → HARNESS_WEB_SEARXNG_URL" }
32
+ ],
33
+ "allowFreeform": true
34
+ }
35
+ ```
36
+
22
37
  ## Example (plan — scope)
23
38
 
24
39
  ```json
@@ -1,22 +1,33 @@
1
1
  ---
2
2
  name: scrapling-web
3
3
  description: |
4
- Harness web search and scrape via the local harness-web CLI (Scrapling). Use for any
5
- non-API web task: search, scrape URLs, map site links, bulk research fetches.
4
+ Harness web search and scrape via pi tools web_search and web_fetch (harness-web.py).
5
+ Use for any non-API web task: search, scrape URLs, map site links, bulk research fetches.
6
6
  Replaces Firecrawl in ultimate-pi harness agents. Triggers on: search the web,
7
- scrape URL, fetch page, research online, harness-web, .web/ artifacts.
8
- allowed-tools:
9
- - Bash(python3 *harness-web.py *)
10
- - Bash(python3 .pi/scripts/harness-web.py *)
11
- - Bash(scrapling *)
7
+ scrape URL, fetch page, research online, web_search, web_fetch, .web/ artifacts.
12
8
  ---
13
9
 
14
10
  # scrapling-web (harness-web)
15
11
 
16
- Local web layer for harness agents — **no API keys**, no Docker compose stack.
17
- Uses [Scrapling](https://scrapling.readthedocs.io/) under `node $UP_PKG/.pi/scripts/harness-web.py`.
12
+ Local web layer for harness agents — **no API keys** for default search/scrape.
13
+ Pi registers **`web_search`** and **`web_fetch`** (wrap `harness-web.py` with Scrapling bootstrap).
14
+ Optional **self-hosted SearXNG** — see `/harness-setup` Step 4.0b.
18
15
 
19
- ## Install (once per machine)
16
+ ## Agent tools (preferred)
17
+
18
+ | Task | Tool |
19
+ |------|------|
20
+ | Search (SERP) | `web_search` with `query` |
21
+ | Search + multi-scrape | `web_search` with `bulk: true` |
22
+ | Scrape URL | `web_fetch` with `url` (default mode `scrape`) |
23
+ | Map same-host links | `web_fetch` with `mode: map` |
24
+ | Static / simple page | `web_fetch` with `fast: true` |
25
+
26
+ **Never before search/fetch:** resolve `UP_PKG`, `ls harness-web.py`, `python3 -c "import scrapling"`, or Firecrawl/curl/wget/scrapling CLI for SERP or page fetch.
27
+
28
+ Full JSON/markdown lives under **`.web/`** (gitignored). Use `read` on `output` paths after tool calls.
29
+
30
+ ## Install (once per machine — setup/humans only)
20
31
 
21
32
  ```bash
22
33
  command -v uv &>/dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
@@ -24,25 +35,23 @@ uv tool install "scrapling[fetchers]"
24
35
  scrapling install # browser binaries for default stealth scrape
25
36
  ```
26
37
 
27
- Verify: `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
28
-
29
- ## Output directory
38
+ Verify: `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
39
+ Config diagnostics: `python3 "$UP_PKG/.pi/scripts/harness-web.py" status` (JSON; setup only)
30
40
 
31
- Write artifacts under **`.web/`** (gitignored), not `.firecrawl/`:
41
+ ## Bash fallback (if pi tools unavailable)
32
42
 
33
43
  | Task | Command |
34
44
  |------|---------|
35
45
  | Search | `python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json --limit 5` |
36
- | Scrape URL | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
37
- | Fast/static scrape | add `--fast` (example.com, raw docs, localhost) |
38
- | Map same-host links | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json --limit 50` |
39
- | Bulk | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/ --limit 3` |
46
+ | Scrape | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
47
+ | Fast/static | add `--fast` |
48
+ | Map | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json` |
49
+ | Bulk | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/` |
40
50
 
41
51
  ## Search JSON shape (Firecrawl-compatible)
42
52
 
43
53
  ```bash
44
54
  jq -r '.data.web[].url' .web/search.json
45
- jq -r '.data.web[] | "\(.title): \(.url)"' .web/search.json
46
55
  ```
47
56
 
48
57
  Each entry: `url`, `title`, `description`.
@@ -51,43 +60,39 @@ Each entry: `url`, `title`, `description`.
51
60
 
52
61
  | Mode | When |
53
62
  |------|------|
54
- | **stealth** (default scrape) | Arbitrary URLs, JS-heavy sites |
55
- | **fast** (`--fast` or `HARNESS_WEB_FETCH_MODE=fast`) | Static docs, example.com, localhost |
63
+ | **stealth** (default) | Arbitrary URLs, JS-heavy sites |
64
+ | **fast** (`fast: true` / `--fast`) | Static docs, example.com, localhost |
56
65
  | **auto** (`HARNESS_WEB_FETCH_MODE=auto`) | fast for known-static hosts, else stealth |
57
66
 
58
- Search always uses lightweight HTTP to `html.duckduckgo.com/html/`; on 403/challenge, **one** stealth retry then fail clearly.
67
+ | Search backend | Behavior |
68
+ |--------------|----------|
69
+ | `ddg_html` (default) | DuckDuckGo HTML SERP |
70
+ | `searxng` | JSON at `HARNESS_WEB_SEARXNG_URL` — bootstrap via `harness-searxng-bootstrap.mjs` |
59
71
 
60
72
  ## Environment
61
73
 
62
74
  | Variable | Default | Purpose |
63
75
  |----------|---------|---------|
64
76
  | `HARNESS_WEB_FETCH_MODE` | `stealth` | `stealth` \| `fast` \| `auto` |
65
- | `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | SERP backend |
66
- | `HARNESS_WEB_PROXY` | (unset) | Proxy URL for fetch/search |
67
- | `HARNESS_WEB_RATE_LIMIT_MS` | `2000` | Delay between bulk scrapes |
68
- | `HARNESS_WEB_TIMEOUT_MS` | `30000` | Per-request timeout |
77
+ | `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | `ddg_html` \| `searxng` |
78
+ | `HARNESS_WEB_SEARXNG_URL` | (unset) | Required when `SEARCH_ENGINE=searxng` |
69
79
 
70
80
  ## Escalation
71
81
 
72
- 1. `harness-web search` (HTTP SERP)
73
- 2. `harness-web scrape` (stealth default)
74
- 3. `harness-web scrape --fast` when the target is known static
75
- 4. `scrapling extract …` only when harness-web flags are insufficient
82
+ 1. `web_search` / `web_fetch`
83
+ 2. `web_fetch` with `fast: true` for static hosts
84
+ 3. `web_fetch` with `mode: map` then targeted fetches
85
+ 4. Site-specific Scrapling only when tools are insufficient (not for routine SERP/fetch)
76
86
 
77
- ## Gaps vs old Firecrawl
87
+ ## Gaps vs Firecrawl
78
88
 
79
89
  | Firecrawl | Harness path |
80
90
  |-----------|----------------|
81
- | `interact` | No 1:1 — rare flows use gstack browse or Scrapling MCP session |
82
- | `agent` (structured extract) | Agent reasoning + graphify, or site-specific selectors |
83
- | `parse` (local PDF) | Dedicated doc tools (pypdf, markitdown) |
84
- | `crawl` (site-wide) | `map` + `bulk-scrape` or future Spiders integration |
91
+ | `interact` | gstack browse or manual browser |
92
+ | `agent` | Agent reasoning + graphify |
93
+ | `parse` (PDF) | pypdf, markitdown |
94
+ | `crawl` | `web_search` bulk or map + multiple `web_fetch` |
85
95
 
86
96
  ## Ethics
87
97
 
88
98
  Respect site terms and rate limits. SERP scraping is for dev research, not high-volume harvesting.
89
- See [Scrapling ethical considerations](https://scrapling.readthedocs.io/en/latest/cli/extract-commands.html#legal-and-ethical-considerations).
90
-
91
- ## Drawbacks of default stealth scrape
92
-
93
- Higher latency and RAM (Chromium per session). Use `--fast` for static docs; reuse one `bulk-scrape` run (single `StealthySession`) instead of many cold starts.
@@ -8,7 +8,7 @@ description: >
8
8
  Triggers on: "/wiki-autoresearch", "/autoresearch", "wiki-autoresearch", "autoresearch",
9
9
  "research [topic]", "deep dive into [topic]", "investigate [topic]",
10
10
  "find everything about [topic]", "research and file", "go research", "build a wiki on".
11
- allowed-tools: Read Write Edit Glob Grep WebFetch WebSearch Bash
11
+ allowed-tools: Read Write Edit Glob Grep web_search web_fetch Bash
12
12
  ---
13
13
 
14
14
  # wiki-autoresearch: Autonomous Research Loop with Graphify
@@ -129,8 +129,8 @@ Input: topic (from Topic Selection, above)
129
129
 
130
130
  Round 1. Broad search
131
131
  1. Decompose topic into 3-5 distinct search angles
132
- 2. For each angle: run 2-3 WebSearch queries
133
- 3. For top 2-3 results per angle: WebFetch the page
132
+ 2. For each angle: run 2-3 `web_search` queries
133
+ 3. For top 2-3 results per angle: `web_fetch` each URL (or `read` `.web/` artifacts)
134
134
  4. Save each fetched page to ./raw/ as a markdown file
135
135
  5. Extract from each: key claims, entities, concepts, open questions
136
136
 
package/.pi/PACKAGING.md CHANGED
@@ -13,16 +13,17 @@ Aligned with [pi packages](https://github.com/badlogic/pi-mono/blob/main/package
13
13
  Pi does **not** define `scripts`, `agents`, or `providers` in the manifest.
14
14
 
15
15
  - **Harness scripts** → `.pi/scripts/` — run via `node` / `bash` and `$UP_PKG` (see `.pi/scripts/README.md`); do not require npm script aliases in consumer `package.json`
16
- - **Subagent agents** → `.pi/agents/**/*.md` on the installed package (`harness/planner`, `pi-pi/agent-expert`, …) via `harness-subagents.ts`; optional **project overrides** at the same relative path under `.pi/agents/`. Version drift: `.pi/harness/agents.manifest.json` (regenerate with `harness-agents-manifest.mjs --write`)
16
+ - **Subagent agents** → `.pi/agents/**/*.md` (loaded by `@tintinweb/pi-subagents` from the **project** `.pi/agents/`; `/harness-setup` seeds them from the installed package)
17
17
  - **Providers** → install via `bundledDependencies` + user settings, not a separate manifest directory
18
18
 
19
19
  ## npm `files` allowlist
20
20
 
21
21
  We use an explicit allowlist (not the whole `.pi/` tree) so dev-only artifacts never ship:
22
22
 
23
- - No `.pi/harness/runs/`, local `model-router.json`, or `.web/` scrape artifacts
23
+ - No `.pi/harness/runs/`, local `model-router.json`, or `firecrawl/.env`
24
24
  - Ship `.pi/settings.example.json`, not `.pi/settings.json` (dev checkout uses `".."` local package)
25
25
  - Include **`vendor/pi-model-router/`** ([`pi-model-router`](https://github.com/yeliu84/pi-model-router), MIT) — see repo [`THIRD_PARTY_NOTICES.md`](../THIRD_PARTY_NOTICES.md); refresh with `npm run vendor:sync-router`
26
+ - Include **`vendor/pi-vcc/`** ([`pi-vcc`](https://github.com/sting8k/pi-vcc), MIT; inspired by [lllyasviel/VCC](https://github.com/lllyasviel/VCC)) — loaded via `.pi/extensions/ultimate-pi-vcc.ts`; refresh with `npm run vendor:sync-vcc`
26
27
 
27
28
  ## Settings
28
29
 
package/.pi/SYSTEM.md CHANGED
@@ -23,26 +23,25 @@ You are an enterprise coding agent. Optimize for correctness, minimal diffs, and
23
23
  ## Web Policy (Mandatory)
24
24
 
25
25
  > [!warning] No raw HTTP
26
- > Route **all** web fetches through [[context7]] (API/library docs) or **harness-web** / [[scrapling-web]] (all other). No `curl`, `wget`, or raw bash HTTP.
26
+ > Route **all** web through [[context7]] (API/library docs) or **`web_search` / `web_fetch`** ([[scrapling-web]]). No `curl`, `wget`, Firecrawl, or scrapling CLI preflight.
27
27
 
28
28
  ### API / Library Docs — context7 ONLY
29
29
  - `ctx7 library <name> <query>` then `ctx7 docs <id> <query>`
30
30
  - context7 owns: function signatures, class APIs, config options, stdlib, framework specs.
31
- - **Never** use quality-sites for API docs.
31
+ - **Never** use quality-sites or web_fetch for API docs.
32
32
 
33
- ### All Non-API Web Fetch harness-web (Scrapling)
34
- See `.agents/skills/scrapling-web/SKILL.md` for workflow escalation.
33
+ ### All Non-API Web — web_search + web_fetch
34
+ See `.agents/skills/scrapling-web/SKILL.md`. **No preflight:** never resolve `UP_PKG`, `ls harness-web.py`, or `python3 -c "import scrapling"` before searching.
35
35
 
36
- | Task | Command |
37
- |------|---------|
38
- | Search (no URL) | `python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json --limit 5` |
39
- | Scrape (have URL) | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
40
- | Static / known-simple | add `--fast` to scrape |
41
- | Map same-host links | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json` |
42
- | Bulk search + scrape | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/` |
36
+ | Task | Tool |
37
+ |------|------|
38
+ | Search (SERP) | `web_search` (`query`, optional `limit`, `bulk`) |
39
+ | Scrape page | `web_fetch` (`url`, optional `fast: true`) |
40
+ | Map links | `web_fetch` (`url`, `mode: map`) |
43
41
 
44
- - **Artifacts:** always write under `.web/` with `-o` (token discipline).
45
- - **Default scrape:** stealth browser; opt out with `--fast` or `HARNESS_WEB_FETCH_MODE=fast`.
42
+ - **Artifacts:** default under `.web/`; use `read` for full JSON/markdown.
43
+ - **Fallback** (tools unavailable): `python3 "$UP_PKG/.pi/scripts/harness-web.py" …` per scrapling-web skill.
44
+ - **Setup diagnostics only:** `harness-web.py status` (JSON config).
46
45
  - **Quality sites:** check `.agents/skills/wiki-autoresearch/references/quality-sites.md` before citing non-API sources. Prefer Tier 1 (StackOverflow, GitHub issues, engineering blogs, arxiv). Exclude AI content farms, mirrors, stale packages.
47
46
  - **Research:** use `/wiki-autoresearch <topic>` for deep research. Results are graphified into `graphify-out/`.
48
47
 
@@ -187,9 +187,9 @@ Before answering ANY question, search the local codebase for existing agent defi
187
187
  find .pi/agents -name "*.md" -type f 2>/dev/null
188
188
  ```
189
189
 
190
- Fetch the latest pi-subagents docs:
191
- ```bash
192
- firecrawl scrape "https://raw.githubusercontent.com/tintinweb/pi-subagents/refs/heads/master/README.md" -o .firecrawl/pi-subagents-readme.md --only-main-content
190
+ Fetch the latest pi-subagents docs (use `web_fetch` with `fast: true` for raw GitHub):
191
+ ```
192
+ web_fetch url="https://raw.githubusercontent.com/tintinweb/pi-subagents/refs/heads/master/README.md" fast=true output=.web/pi-subagents-readme.md
193
193
  ```
194
194
 
195
195
  ## How to Respond
@@ -0,0 +1,95 @@
1
+ /**
2
+ * harness-web-guard — block bash that bypasses web_search / web_fetch tools.
3
+ */
4
+
5
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
+
7
+ const BLOCK_REASON =
8
+ "harness-web-guard: use web_search (SERP) or web_fetch (page content) instead of raw curl/wget/firecrawl/scrapling fetch. " +
9
+ "Setup may use harness-web.py status directly.";
10
+
11
+ const ALLOW_PATTERNS = [
12
+ /harness-web\.py\b/i,
13
+ /harness-cli-verify\.sh\b/i,
14
+ /\bgraphify\b/i,
15
+ /\bctx7\b/i,
16
+ /\bcontext7\b/i,
17
+ /\bgit\b/i,
18
+ /harness-searxng-bootstrap/i,
19
+ ];
20
+
21
+ const BLOCK_PATTERNS: Array<{ re: RegExp; note: string }> = [
22
+ { re: /\bfirecrawl\b/i, note: "firecrawl" },
23
+ {
24
+ re: /\b(?:curl|wget)\b[^\n|;&]*\s+https?:\/\//i,
25
+ note: "curl/wget http(s)",
26
+ },
27
+ {
28
+ re: /\bscrapling\s+(?:fetch|extract)\b/i,
29
+ note: "scrapling fetch/extract",
30
+ },
31
+ ];
32
+
33
+ function isBootstrapPrompt(prompt: string): boolean {
34
+ const p = prompt.toLowerCase();
35
+ return (
36
+ p.includes("/harness-setup") ||
37
+ p.includes("harness-setup") ||
38
+ p.includes("full harness bootstrap")
39
+ );
40
+ }
41
+
42
+ function latestUserPrompt(ctx: {
43
+ sessionManager: { getEntries(): unknown[] };
44
+ }): string {
45
+ const entries = ctx.sessionManager.getEntries() as Array<{
46
+ type?: string;
47
+ message?: { role?: string; content?: unknown };
48
+ }>;
49
+ for (let i = entries.length - 1; i >= 0; i--) {
50
+ const entry = entries[i];
51
+ if (entry?.message?.role !== "user") continue;
52
+ const content = entry.message.content;
53
+ if (typeof content === "string") return content;
54
+ if (Array.isArray(content)) {
55
+ return content
56
+ .map((part) =>
57
+ typeof part === "object" && part && "text" in part
58
+ ? String((part as { text?: string }).text ?? "")
59
+ : "",
60
+ )
61
+ .join("\n");
62
+ }
63
+ }
64
+ return "";
65
+ }
66
+
67
+ function isAllowedBash(command: string): boolean {
68
+ return ALLOW_PATTERNS.some((re) => re.test(command));
69
+ }
70
+
71
+ function blockedWebBash(command: string): string | null {
72
+ if (isAllowedBash(command)) return null;
73
+ for (const { re, note } of BLOCK_PATTERNS) {
74
+ if (re.test(command)) return note;
75
+ }
76
+ return null;
77
+ }
78
+
79
+ export default function harnessWebGuard(pi: ExtensionAPI) {
80
+ pi.on("tool_call", async (event, ctx) => {
81
+ if (event.toolName !== "bash") return undefined;
82
+
83
+ const prompt = latestUserPrompt(ctx);
84
+ if (isBootstrapPrompt(prompt)) return undefined;
85
+
86
+ const command = String((event.input as { command?: string }).command ?? "");
87
+ const hit = blockedWebBash(command);
88
+ if (!hit) return undefined;
89
+
90
+ return {
91
+ block: true,
92
+ reason: `${BLOCK_REASON} (matched: ${hit})`,
93
+ };
94
+ });
95
+ }
@@ -0,0 +1,209 @@
1
+ /**
2
+ * harness-web-tools — web_search + web_fetch pi tools wrapping harness-web.py.
3
+ */
4
+
5
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
6
+ import { Type } from "@sinclair/typebox";
7
+ import {
8
+ harnessWebContextLine,
9
+ readTextExcerpt,
10
+ runHarnessWeb,
11
+ summarizeSearchJson,
12
+ } from "./lib/harness-web/run-cli.js";
13
+
14
+ // @ts-expect-error pi extensions run as ESM
15
+ const MODULE_URL = import.meta.url;
16
+
17
+ const WEB_SEARCH_GUIDELINES = [
18
+ "Use web_search for open-web SERP — never preflight UP_PKG, ls harness-web.py, or python3 -c import scrapling.",
19
+ "Never use Firecrawl, curl/wget for search, or scrapling CLI for SERP.",
20
+ "After search, use web_fetch on URLs or read the output JSON under .web/.",
21
+ "Use bulk:true only when you need search plus multi-page scrape in one step.",
22
+ ];
23
+
24
+ const WEB_FETCH_GUIDELINES = [
25
+ "Use web_fetch for page markdown or same-host link maps — never curl/wget the URL.",
26
+ "Never use raw scrapling CLI for fetch; harness-web handles Scrapling bootstrap.",
27
+ "Library API documentation → context7 only, not web_fetch.",
28
+ "Set fast:true for static docs (example.com, raw HTML docs, localhost).",
29
+ ];
30
+
31
+ const WebSearchSchema = Type.Object({
32
+ query: Type.String({ description: "Search query" }),
33
+ limit: Type.Optional(
34
+ Type.Number({
35
+ description: "Max results (default 5)",
36
+ minimum: 1,
37
+ maximum: 20,
38
+ }),
39
+ ),
40
+ output: Type.Optional(
41
+ Type.String({
42
+ description:
43
+ "Output path (default .web/search.json or .web/bulk for bulk)",
44
+ }),
45
+ ),
46
+ bulk: Type.Optional(
47
+ Type.Boolean({
48
+ description:
49
+ "If true, run bulk-scrape (search then scrape top URLs to output directory)",
50
+ default: false,
51
+ }),
52
+ ),
53
+ });
54
+
55
+ const WebFetchSchema = Type.Object({
56
+ url: Type.String({ description: "URL to fetch" }),
57
+ mode: Type.Optional(
58
+ Type.Union([Type.Literal("scrape"), Type.Literal("map")], {
59
+ description: "scrape (markdown) or map (same-host links JSON)",
60
+ default: "scrape",
61
+ }),
62
+ ),
63
+ output: Type.Optional(
64
+ Type.String({ description: "Output file path under .web/" }),
65
+ ),
66
+ fast: Type.Optional(
67
+ Type.Boolean({
68
+ description: "Use fast HTTP scrape (static/simple pages)",
69
+ default: false,
70
+ }),
71
+ ),
72
+ limit: Type.Optional(
73
+ Type.Number({
74
+ description: "For map mode: max links (default 100)",
75
+ minimum: 1,
76
+ maximum: 500,
77
+ }),
78
+ ),
79
+ });
80
+
81
+ function failResult(text: string) {
82
+ return {
83
+ content: [{ type: "text" as const, text }],
84
+ details: { ok: false },
85
+ };
86
+ }
87
+
88
+ function okResult(text: string, details: Record<string, unknown> = {}) {
89
+ return {
90
+ content: [{ type: "text" as const, text }],
91
+ details: { ok: true, ...details },
92
+ };
93
+ }
94
+
95
+ function sessionCwd(ctx: { cwd?: string }): string {
96
+ return ctx.cwd ?? process.cwd();
97
+ }
98
+
99
+ export default function harnessWebTools(pi: ExtensionAPI) {
100
+ pi.on("before_agent_start", async (event) => {
101
+ return {
102
+ systemPrompt: `${event.systemPrompt}\n\n${harnessWebContextLine()}`,
103
+ };
104
+ });
105
+
106
+ pi.registerTool({
107
+ name: "web_search",
108
+ label: "Web Search",
109
+ description:
110
+ "Search the web via harness-web (DuckDuckGo HTML or self-hosted SearXNG from .env). Returns result summaries and output path.",
111
+ promptSnippet: "SERP via configured engine (ddg_html or searxng from .env)",
112
+ promptGuidelines: WEB_SEARCH_GUIDELINES,
113
+ parameters: WebSearchSchema,
114
+
115
+ async execute(_id, params, _signal, _onUpdate, ctx) {
116
+ const cwd = sessionCwd(ctx);
117
+ const query = String(params.query ?? "").trim();
118
+ if (!query) return failResult("web_search: query is required.");
119
+
120
+ const limit = typeof params.limit === "number" ? params.limit : 5;
121
+ const bulk = params.bulk === true;
122
+ const output = String(
123
+ params.output ?? (bulk ? ".web/bulk" : ".web/search.json"),
124
+ );
125
+
126
+ const argv = bulk
127
+ ? ["bulk-scrape", query, "-o", output, "--limit", String(limit)]
128
+ : ["search", query, "-o", output, "--limit", String(limit)];
129
+
130
+ const run = runHarnessWeb(MODULE_URL, argv, cwd);
131
+ if (!run.ok) {
132
+ const hint =
133
+ "\n\nHints: run /harness-setup; for searxng set HARNESS_WEB_SEARXNG_URL; " +
134
+ "enable json in SearXNG search.formats.";
135
+ return failResult(
136
+ `web_search failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}${hint}`,
137
+ );
138
+ }
139
+
140
+ const parts = [run.stdout];
141
+ if (!bulk) {
142
+ const summary = summarizeSearchJson(output, cwd);
143
+ if (summary) {
144
+ parts.push("", summary);
145
+ }
146
+ }
147
+ parts.push("", `output: ${output}`);
148
+ parts.push("Use read tool for full JSON, or web_fetch on result URLs.");
149
+
150
+ return okResult(parts.join("\n"), {
151
+ output,
152
+ query,
153
+ bulk,
154
+ engine: process.env.HARNESS_WEB_SEARCH_ENGINE,
155
+ });
156
+ },
157
+ });
158
+
159
+ pi.registerTool({
160
+ name: "web_fetch",
161
+ label: "Web Fetch",
162
+ description:
163
+ "Fetch a URL via harness-web/Scrapling (scrape to markdown or map same-host links).",
164
+ promptSnippet: "Scrape/map URL via Scrapling (harness-web)",
165
+ promptGuidelines: WEB_FETCH_GUIDELINES,
166
+ parameters: WebFetchSchema,
167
+
168
+ async execute(_id, params, _signal, _onUpdate, ctx) {
169
+ const cwd = sessionCwd(ctx);
170
+ const url = String(params.url ?? "").trim();
171
+ if (!url) return failResult("web_fetch: url is required.");
172
+
173
+ const mode = params.mode === "map" ? "map" : "scrape";
174
+ const fast = params.fast === true;
175
+ const limit = typeof params.limit === "number" ? params.limit : 100;
176
+ const defaultOut = mode === "map" ? ".web/map.json" : ".web/page.md";
177
+ const output = String(params.output ?? defaultOut);
178
+
179
+ const argv =
180
+ mode === "map"
181
+ ? [
182
+ "map",
183
+ url,
184
+ "-o",
185
+ output,
186
+ "--limit",
187
+ String(limit),
188
+ ...(fast ? ["--fast"] : []),
189
+ ]
190
+ : ["scrape", url, "-o", output, ...(fast ? ["--fast"] : [])];
191
+
192
+ const run = runHarnessWeb(MODULE_URL, argv, cwd);
193
+ if (!run.ok) {
194
+ return failResult(
195
+ `web_fetch failed (exit ${run.exitCode}).\n${run.stderr || run.stdout}\n` +
196
+ "Try fast:true for static pages, or run harness-cli-verify for Scrapling install.",
197
+ );
198
+ }
199
+
200
+ const parts = [run.stdout, "", `output: ${output}`];
201
+ const excerpt = readTextExcerpt(output, cwd);
202
+ if (excerpt) {
203
+ parts.push("", "--- excerpt ---", excerpt);
204
+ }
205
+
206
+ return okResult(parts.join("\n"), { output, url, mode });
207
+ },
208
+ });
209
+ }
@@ -0,0 +1,50 @@
1
+ /**
2
+ * ultimate-pi VCC configuration — env only (no config files).
3
+ *
4
+ * @see https://github.com/sting8k/pi-vcc (vendored algorithm)
5
+ * @see https://github.com/lllyasviel/VCC
6
+ */
7
+
8
+ export interface PiVccSettings {
9
+ /** When true, VCC handles /compact, auto-threshold, and overflow compaction. */
10
+ overrideDefaultCompaction: boolean;
11
+ /** Write debug snapshot to /tmp/pi-vcc-debug.json on each compaction. */
12
+ debug: boolean;
13
+ }
14
+
15
+ const FALSE_VALUES = new Set(["false", "0", "off", "no"]);
16
+ const TRUE_VALUES = new Set(["true", "1", "on", "yes"]);
17
+
18
+ function parseHarnessBool(envName: string, defaultValue: boolean): boolean {
19
+ const raw = process.env[envName]?.trim().toLowerCase();
20
+ if (!raw) {
21
+ return defaultValue;
22
+ }
23
+ if (FALSE_VALUES.has(raw)) {
24
+ return false;
25
+ }
26
+ if (TRUE_VALUES.has(raw)) {
27
+ return true;
28
+ }
29
+ return defaultValue;
30
+ }
31
+
32
+ /** Whether VCC overrides Pi built-in LLM compaction (default: true). */
33
+ export function resolveOverrideDefaultCompaction(): boolean {
34
+ return parseHarnessBool("HARNESS_VCC_COMPACTION", true);
35
+ }
36
+
37
+ /** Compaction debug snapshots (default: false). */
38
+ export function resolveVccDebug(): boolean {
39
+ return parseHarnessBool("HARNESS_VCC_DEBUG", false);
40
+ }
41
+
42
+ export function loadSettings(): PiVccSettings {
43
+ return {
44
+ overrideDefaultCompaction: resolveOverrideDefaultCompaction(),
45
+ debug: resolveVccDebug(),
46
+ };
47
+ }
48
+
49
+ /** No-op — harness VCC does not scaffold or read JSON config files. */
50
+ export function scaffoldSettings(): void {}