ultimate-pi 0.19.0 → 0.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/.agents/skills/web-retrieval/SKILL.md +163 -0
  2. package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
  3. package/.pi/SYSTEM.md +30 -12
  4. package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
  5. package/.pi/agents/harness/planning/stack-researcher.md +5 -1
  6. package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
  7. package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
  8. package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
  9. package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
  10. package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
  11. package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
  12. package/.pi/extensions/harness-web-guard.ts +2 -1
  13. package/.pi/extensions/harness-web-tools.ts +689 -51
  14. package/.pi/harness/agents.manifest.json +29 -5
  15. package/.pi/harness/agents.policy.yaml +34 -0
  16. package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
  17. package/.pi/harness/docs/harness-web-search.md +97 -0
  18. package/.pi/harness/env.harness.template +9 -1
  19. package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
  20. package/.pi/harness/web-heuristic-angles.json +278 -0
  21. package/.pi/harness/web-heuristic-angles.yaml +182 -0
  22. package/.pi/lib/agents-policy.mjs +6 -0
  23. package/.pi/lib/harness-subagent-auth.ts +39 -9
  24. package/.pi/lib/harness-subagents-bridge.ts +21 -0
  25. package/.pi/lib/harness-web/artifacts.ts +200 -0
  26. package/.pi/lib/harness-web/cache.ts +369 -0
  27. package/.pi/lib/harness-web/run-cli.ts +42 -2
  28. package/.pi/prompts/harness-plan.md +1 -0
  29. package/.pi/prompts/harness-setup.md +3 -1
  30. package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
  31. package/.pi/scripts/harness-cli-verify.sh +5 -0
  32. package/.pi/scripts/harness-verify.mjs +78 -0
  33. package/.pi/scripts/harness-web-policy-guard.mjs +1 -1
  34. package/.pi/scripts/harness-web.py +218 -15
  35. package/.pi/scripts/harness_web/deep_search.py +55 -0
  36. package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
  37. package/.pi/scripts/harness_web/find_similar.py +88 -0
  38. package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
  39. package/.pi/scripts/harness_web/heuristic_config.py +251 -0
  40. package/.pi/scripts/harness_web/highlights.py +47 -0
  41. package/.pi/scripts/harness_web/multi_search.py +59 -0
  42. package/.pi/scripts/harness_web/output.py +24 -0
  43. package/.pi/scripts/harness_web/query_angles.py +116 -0
  44. package/.pi/scripts/harness_web/rank.py +163 -0
  45. package/.pi/scripts/harness_web/scrape.py +30 -0
  46. package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
  47. package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
  48. package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
  49. package/AGENTS.md +2 -2
  50. package/CHANGELOG.md +6 -0
  51. package/package.json +5 -3
  52. package/.agents/skills/scrapling-web/SKILL.md +0 -98
  53. package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
  54. package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
  56. package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
  57. package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
  58. package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
  59. package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
  60. package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
@@ -41,6 +41,36 @@ def scrape_url(
41
41
  write_page_markdown(Path(output), page, main_content_only=True)
42
42
 
43
43
 
44
+ def scrape_url_with_highlights(
45
+ url: str,
46
+ markdown_output: str,
47
+ highlights_output: str | None,
48
+ *,
49
+ config: HarnessWebConfig,
50
+ fast: bool,
51
+ wait_ms: int | None,
52
+ highlight_query: str,
53
+ ) -> None:
54
+ import json
55
+ from pathlib import Path
56
+
57
+ from .highlights import extract_highlights
58
+
59
+ page = fetch_page(url, config=config, fast=fast, wait_ms=wait_ms)
60
+ md_path = Path(markdown_output)
61
+ write_page_markdown(md_path, page, main_content_only=True)
62
+ if highlights_output and highlight_query.strip():
63
+ text = md_path.read_text(encoding="utf-8")
64
+ spans = extract_highlights(text, highlight_query)
65
+ hp = Path(highlights_output)
66
+ hp.parent.mkdir(parents=True, exist_ok=True)
67
+ hp.write_text(
68
+ json.dumps({"url": url, "query": highlight_query, "highlights": spans}, indent=2)
69
+ + "\n",
70
+ encoding="utf-8",
71
+ )
72
+
73
+
44
74
  def map_url(
45
75
  url: str,
46
76
  output: str,
@@ -0,0 +1,132 @@
1
+ """Unit tests for harness_web.heuristic_config."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import tempfile
8
+ import unittest
9
+ from pathlib import Path
10
+
11
+ from harness_web.heuristic_config import (
12
+ _embedded_builtin_dict,
13
+ _merge_config_dict,
14
+ build_heuristic_angles,
15
+ clear_heuristic_config_cache,
16
+ heuristic_config_from_merged,
17
+ load_heuristic_angles_config_cached,
18
+ )
19
+
20
+
21
+ class TestHeuristicConfig(unittest.TestCase):
22
+ def tearDown(self) -> None:
23
+ clear_heuristic_config_cache()
24
+
25
+ def test_builtin_code_includes_stackoverflow(self) -> None:
26
+ cfg = heuristic_config_from_merged(_embedded_builtin_dict())
27
+ angles = build_heuristic_angles("rust async", category="code", config=cfg)
28
+ ids = {a.id for a in angles}
29
+ self.assertIn("stackoverflow", ids)
30
+ self.assertIn("github", ids)
31
+ self.assertTrue(any("site:stackoverflow.com" in a.query for a in angles))
32
+
33
+ def test_shipped_code_includes_mdn_and_registries(self) -> None:
34
+ pkg = Path(__file__).resolve().parents[2] / "harness" / "web-heuristic-angles.yaml"
35
+ if not pkg.is_file():
36
+ self.skipTest("package yaml missing")
37
+ clear_heuristic_config_cache()
38
+ cfg = load_heuristic_angles_config_cached((str(pkg),))
39
+ angles = build_heuristic_angles("websocket api", category="code", config=cfg)
40
+ ids = {a.id for a in angles}
41
+ self.assertIn("mdn", ids)
42
+ self.assertIn("package_registries", ids)
43
+ self.assertLessEqual(len(angles), cfg.max_angles)
44
+
45
+ def test_shipped_security_category(self) -> None:
46
+ pkg = Path(__file__).resolve().parents[2] / "harness" / "web-heuristic-angles.yaml"
47
+ if not pkg.is_file():
48
+ self.skipTest("package yaml missing")
49
+ clear_heuristic_config_cache()
50
+ cfg = load_heuristic_angles_config_cached((str(pkg),))
51
+ angles = build_heuristic_angles("jwt validation", category="security", config=cfg)
52
+ ids = {a.id for a in angles}
53
+ self.assertIn("owasp", ids)
54
+ self.assertIn("cve_nvd", ids)
55
+
56
+ def test_merge_extends_code_category(self) -> None:
57
+ merged = _merge_config_dict(
58
+ _embedded_builtin_dict(),
59
+ {
60
+ "max_angles": 12,
61
+ "categories": {
62
+ "code": [
63
+ {
64
+ "id": "docs_rs",
65
+ "query": "{query} site:docs.rs",
66
+ "rationale": "Rust docs",
67
+ },
68
+ ],
69
+ },
70
+ },
71
+ )
72
+ cfg = heuristic_config_from_merged(merged)
73
+ merged_ids = [a["id"] for a in merged["categories"]["code"]]
74
+ self.assertIn("docs_rs", merged_ids)
75
+ angles = build_heuristic_angles("tokio", category="code", config=cfg)
76
+ ids = {a.id for a in angles}
77
+ self.assertIn("stackoverflow", ids)
78
+ self.assertIn("github", ids)
79
+
80
+ def test_merge_adds_new_category(self) -> None:
81
+ merged = _merge_config_dict(
82
+ _embedded_builtin_dict(),
83
+ {
84
+ "categories": {
85
+ "security": [
86
+ {"id": "cve", "query": "{query} CVE", "rationale": "vulns"},
87
+ {
88
+ "id": "owasp",
89
+ "query": "{query} site:owasp.org",
90
+ "rationale": "guidance",
91
+ },
92
+ ],
93
+ },
94
+ },
95
+ )
96
+ cfg = heuristic_config_from_merged(merged)
97
+ angles = build_heuristic_angles("jwt auth", category="security", config=cfg)
98
+ ids = {a.id for a in angles}
99
+ self.assertIn("cve", ids)
100
+ self.assertIn("owasp", ids)
101
+
102
+ def test_json_project_file_merges(self) -> None:
103
+ with tempfile.TemporaryDirectory() as tmp:
104
+ proj = Path(tmp)
105
+ harness_dir = proj / ".pi" / "harness"
106
+ harness_dir.mkdir(parents=True)
107
+ proj_file = harness_dir / "web-heuristic-angles.json"
108
+ proj_file.write_text(
109
+ json.dumps(
110
+ {
111
+ "categories": {
112
+ "code": [
113
+ {
114
+ "id": "crates_io",
115
+ "query": "{query} site:crates.io",
116
+ "rationale": "crates",
117
+ },
118
+ ],
119
+ },
120
+ }
121
+ ),
122
+ encoding="utf-8",
123
+ )
124
+ clear_heuristic_config_cache()
125
+ cfg = load_heuristic_angles_config_cached((str(proj_file),))
126
+ angles = build_heuristic_angles("serde", category="code", config=cfg)
127
+ ids = {a.id for a in angles}
128
+ self.assertIn("crates_io", ids)
129
+
130
+
131
+ if __name__ == "__main__":
132
+ unittest.main()
@@ -0,0 +1,45 @@
1
+ """Unit tests for harness_web.query_angles (no network)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import tempfile
7
+ import unittest
8
+ from pathlib import Path
9
+
10
+ from harness_web.query_angles import AnglesPlan, load_angles_file, resolve_angles
11
+
12
+
13
+ class TestResolveAngles(unittest.TestCase):
14
+ def test_heuristic_code_category(self) -> None:
15
+ plan = resolve_angles("rust async", expand_heuristic=True, category="code")
16
+ ids = {a.id for a in plan.angles}
17
+ self.assertIn("github", ids)
18
+ self.assertGreaterEqual(len(plan.angles), 2)
19
+ self.assertLessEqual(len(plan.angles), 5)
20
+
21
+
22
+ class TestLoadFile(unittest.TestCase):
23
+ def test_load_json_file(self) -> None:
24
+ with tempfile.TemporaryDirectory() as tmp:
25
+ p = Path(tmp) / "angles.json"
26
+ p.write_text(
27
+ json.dumps(
28
+ {
29
+ "intent": "load test",
30
+ "angles": [
31
+ {"id": "a", "query": "first angle query"},
32
+ {"id": "b", "query": "second angle query"},
33
+ ],
34
+ }
35
+ ),
36
+ encoding="utf-8",
37
+ )
38
+ plan = load_angles_file(p)
39
+ self.assertIsInstance(plan, AnglesPlan)
40
+ self.assertEqual(plan.intent, "load test")
41
+ self.assertEqual(len(plan.angles), 2)
42
+
43
+
44
+ if __name__ == "__main__":
45
+ unittest.main()
@@ -0,0 +1,56 @@
1
+ """Unit tests for harness_web.rank (no network)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import unittest
6
+
7
+ from harness_web.rank import RankedHit, fuse_angle_results, lexical_rerank, normalize_url, tokenize
8
+
9
+
10
+ class TestNormalizeUrl(unittest.TestCase):
11
+ def test_strips_tracking(self) -> None:
12
+ a = normalize_url("https://Example.com/path?utm_source=x&id=1")
13
+ b = normalize_url("https://example.com/path?id=1")
14
+ self.assertEqual(a, b)
15
+
16
+ def test_trailing_slash(self) -> None:
17
+ self.assertEqual(
18
+ normalize_url("https://example.com/foo/"),
19
+ normalize_url("https://example.com/foo"),
20
+ )
21
+
22
+
23
+ class TestRrfFusion(unittest.TestCase):
24
+ def test_merges_duplicate_urls(self) -> None:
25
+ angle_results = {
26
+ "a": [
27
+ {"url": "https://x.com/1", "title": "T1", "description": "d1"},
28
+ {"url": "https://x.com/2", "title": "T2", "description": "d2"},
29
+ ],
30
+ "b": [
31
+ {"url": "https://x.com/1", "title": "T1b", "description": "d1b"},
32
+ ],
33
+ }
34
+ fused = fuse_angle_results(angle_results, final_limit=5)
35
+ self.assertEqual(len(fused), 2)
36
+ top = fused[0]
37
+ self.assertEqual(top.url, "https://x.com/1")
38
+ self.assertIn("a", top.angle_ids)
39
+ self.assertIn("b", top.angle_ids)
40
+ self.assertGreater(top.score, fused[1].score)
41
+
42
+
43
+ class TestLexicalRerank(unittest.TestCase):
44
+ def test_boosts_intent_overlap(self) -> None:
45
+ hits = [
46
+ RankedHit("https://a", "unrelated", "noise", 0.52, ["a"]),
47
+ RankedHit("https://b", "kubernetes architecture", "how kubernetes works", 0.50, ["b"]),
48
+ ]
49
+ reranked = lexical_rerank(hits, "kubernetes architecture")
50
+ self.assertEqual(reranked[0].url, "https://b")
51
+ self.assertGreater(reranked[0].score, reranked[1].score)
52
+
53
+
54
+ class TestTokenize(unittest.TestCase):
55
+ def test_min_length(self) -> None:
56
+ self.assertIn("hello", tokenize("hello hi"))
package/AGENTS.md CHANGED
@@ -16,7 +16,7 @@ Created: 2026-05-14
16
16
  - docs/adr/ → Repo-level Architectural Decision Records
17
17
  - .pi/harness/docs/adrs/ → Harness ADRs (team-shared; [index](.pi/harness/docs/adrs/README.md))
18
18
  - .pi/harness/docs/practice-map.md → Phase → practice → agent spawn topology for `/harness-plan`, `/harness-run`, `/harness-review`
19
- - .pi/skills/ → Agent skills
19
+ - .pi/skills/ → Agent skills (harness skills symlink to `.agents/skills/`, e.g. `web-retrieval`)
20
20
  - .pi/agents/ → Specialized agents
21
21
 
22
22
  ## Graphify-First Workflow
@@ -36,7 +36,7 @@ Created: 2026-05-14
36
36
  - Harness context: **context-mode only** — never lean-ctx on harness paths (see harness-context skill)
37
37
  - `graphify update .` after significant code changes
38
38
  - ast-grep (`sg`) is the default code search tool — use `sg -p 'pattern'` for structural search, never grep for code
39
- - Web fetch/search via `python3 "$UP_PKG/.pi/scripts/harness-web.py"` (Scrapling; see scrapling-web skill)
39
+ - Non-API web: invoke **`web-retrieval`** skill (WRS tiers; default `tier=deep` with `web-query-expander` → `anglesFile`). CLI: `python3 "$UP_PKG/.pi/scripts/harness-web.py"`
40
40
 
41
41
  ## graphify
42
42
 
package/CHANGELOG.md CHANGED
@@ -9,6 +9,12 @@ All notable changes to this project are documented in this file.
9
9
  - **Harness lens:** Integrate selected pi-lens capabilities through a harness-owned extension, store lens state under `.pi/harness/.lens`, and route lens findings through harness PostHog telemetry instead of standalone lens health/telemetry surfaces.
10
10
  - **Graphify KB updater:** Productize conservative daily discovery/promotion with explicit repo/release taxonomy, allowlist source-class gates, operator review queue reporting, scheduler smoke validation, and safe Graphify refresh controls.
11
11
 
12
+ ## [v0.19.1] — 2026-05-26
13
+
14
+ ### 🔧 Chores
15
+
16
+ - Prepare web retrieval and harness updates for release.
17
+
12
18
  ## [v0.19.0] — 2026-05-24
13
19
 
14
20
  ### ✨ Features
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ultimate-pi",
3
- "version": "0.19.0",
3
+ "version": "0.19.1",
4
4
  "description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
5
5
  "keywords": [
6
6
  "pi-package",
@@ -56,6 +56,8 @@
56
56
  ".pi/harness/agents.manifest.json",
57
57
  ".pi/harness/agents.policy.yaml",
58
58
  ".pi/harness/examples",
59
+ ".pi/harness/web-heuristic-angles.yaml",
60
+ ".pi/harness/web-heuristic-angles.json",
59
61
  ".pi/lib/harness-lens",
60
62
  ".pi/harness/README.md",
61
63
  ".pi/npm/package.json",
@@ -76,7 +78,7 @@
76
78
  "@earendil-works/pi-coding-agent": "*"
77
79
  },
78
80
  "scripts": {
79
- "check:ts": "tsc --noEmit --target ES2023 --lib ES2023 --moduleResolution nodenext --module nodenext --skipLibCheck .pi/lib/agt/config.ts .pi/lib/agt/policy-engine.ts .pi/lib/agt/build-evaluation-context.ts .pi/lib/agt/evaluate-policy.ts .pi/lib/agt/legacy-evaluate.ts .pi/lib/agt/identity-registry.ts .pi/lib/agt/delegation.ts .pi/lib/agt/trust-run-store.ts .pi/lib/agt/audit-run-sink.ts .pi/lib/agt/rings.ts .pi/lib/agt/workflow-history.ts .pi/lib/agt/sre-hooks.ts .pi/lib/agt/kill-switch-state.ts .pi/lib/agt/index.ts .pi/extensions/agt-prompt-guard.ts .pi/extensions/agt-kill-switch.ts .pi/extensions/harness-subagent-governance.ts .pi/lib/harness-agt-tool-guard.ts .pi/lib/harness-subagent-submit-register.ts .pi/extensions/00-harness-project-control.ts .pi/extensions/custom-system-prompt.ts .pi/lib/harness-run-context.ts .pi/lib/harness-spawn-policy.ts .pi/lib/harness-context-mode-policy.ts .pi/lib/harness-ui-state.ts .pi/extensions/harness-run-context.ts .pi/lib/harness-vcc-settings.ts .pi/extensions/dotenv-loader.ts .pi/extensions/00-posthog-network-bootstrap.ts .pi/lib/posthog-client.ts .pi/lib/posthog-node.d.ts .pi/lib/harness-posthog.ts .pi/lib/harness-paths.ts .pi/extensions/provider-payload-sanitize.ts .pi/extensions/harness-telemetry.ts .pi/extensions/harness-ask-user.ts .pi/extensions/harness-plan-approval.ts .pi/lib/ask-user/schema.ts .pi/lib/ask-user/types.ts .pi/lib/ask-user/validate.ts .pi/lib/ask-user/dialog.ts .pi/lib/ask-user/fallback.ts .pi/lib/ask-user/render.ts .pi/lib/plan-approval/types.ts .pi/lib/plan-approval/schema.ts .pi/lib/plan-approval/validate.ts .pi/lib/plan-approval/format-plan.ts .pi/lib/plan-approval/dialog.ts .pi/lib/plan-approval/render.ts .pi/lib/plan-approval/create-plan.ts .pi/extensions/harness-subagents.ts .pi/lib/harness-subagents-bridge.ts .pi/lib/harness-cocoindex-refresh.ts .pi/lib/harness-subagent-auth.ts .pi/lib/agents-policy.ts .pi/lib/agt-governance-active.ts .pi/extensions/subagent-governance.ts .pi/lib/agt-tool-guard.ts .pi/lib/harness-subagent-precheck.ts .pi/lib/harness-spawn-budget.ts vendor/pi-subagents/src/agents.ts vendor/pi-subagents/src/subagents.ts .pi/extensions/review-integrity.ts .pi/extensions/trace-recorder.ts .pi/extensions/observation-bus.ts .pi/extensions/drift-monitor.ts .pi/extensions/policy-gate.ts .pi/extensions/budget-guard.ts .pi/extensions/debate-orchestrator.ts .pi/extensions/harness-debate-tools.ts .pi/lib/debate-bus-core.ts .pi/lib/debate-bus-state.ts .pi/lib/plan-debate-gate.ts .pi/lib/plan-debate-id.ts .pi/lib/plan-messenger.ts .pi/lib/plan-debate-envelope.ts .pi/lib/plan-review-integrator-rules.ts .pi/lib/plan-scope-guard.ts .pi/lib/plan-debate-write-guard.ts .pi/lib/plan-debate-lane.ts .pi/lib/plan-debate-round-status.ts .pi/extensions/harness-live-widget.ts .pi/extensions/sentrux-rules-sync.ts .pi/extensions/custom-header.ts .pi/extensions/harness-web-tools.ts .pi/extensions/harness-web-guard.ts .pi/lib/harness-web/run-cli.ts",
81
+ "check:ts": "tsc -p tsconfig.check.json",
80
82
  "vendor:sync-vcc": "bash .pi/scripts/vendor-sync-pi-vcc.sh",
81
83
  "vendor:sync-subagents": "bash .pi/scripts/vendor-sync-pi-subagents.sh",
82
84
  "release": "bash .pi/scripts/release.sh",
@@ -85,7 +87,7 @@
85
87
  "format": "biome format --write",
86
88
  "format:check": "biome format",
87
89
  "prepare": "lefthook install",
88
- "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/posthog-client.test.mjs test/harness-agt-policy-load.test.mjs test/harness-agt-policy-matrix.test.mjs test/harness-agt-policy-parity.test.mjs test/harness-agt-packaging.test.mjs test/harness-tool-call-hook-chain.test.mjs test/harness-vcc-settings.test.ts test/harness-run-context-postrun.test.mjs test/harness-tool-payload.test.mjs test/harness-live-widget-status.test.ts test/harness-project-toggle-tui.test.ts test/harness-plan-phase-policy.test.mjs test/harness-context-mode-policy.test.mjs test/harness-subprocess-bootstrap.test.mjs test/harness-subagent-policy.test.mjs test/harness-subagent-precheck-topology.test.mjs test/plan-approval-readiness.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/harness-project-agents-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs test/review-integrity-revise-handoff.test.mjs test/harness-plan-revise-reset.test.mjs",
90
+ "test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-web-cache.test.mjs test/harness-web-artifacts.test.mjs test/harness-subagent-auth.test.mjs test/posthog-client.test.mjs test/harness-agt-policy-load.test.mjs test/harness-agt-policy-matrix.test.mjs test/harness-agt-policy-parity.test.mjs test/harness-agt-packaging.test.mjs test/harness-tool-call-hook-chain.test.mjs test/harness-vcc-settings.test.ts test/harness-run-context-postrun.test.mjs test/harness-tool-payload.test.mjs test/harness-live-widget-status.test.ts test/harness-project-toggle-tui.test.ts test/harness-plan-phase-policy.test.mjs test/harness-context-mode-policy.test.mjs test/harness-subprocess-bootstrap.test.mjs test/harness-subagent-policy.test.mjs test/harness-subagent-precheck-topology.test.mjs test/plan-approval-readiness.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/harness-project-agents-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs test/review-integrity-revise-handoff.test.mjs test/harness-plan-revise-reset.test.mjs",
89
91
  "test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
90
92
  "harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
91
93
  "harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
@@ -1,98 +0,0 @@
1
- ---
2
- name: scrapling-web
3
- description: |
4
- Harness web search and scrape via pi tools web_search and web_fetch (harness-web.py).
5
- Use for any non-API web task: search, scrape URLs, map site links, bulk research fetches.
6
- Replaces Firecrawl in ultimate-pi harness agents. Triggers on: search the web,
7
- scrape URL, fetch page, research online, web_search, web_fetch, .web/ artifacts.
8
- ---
9
-
10
- # scrapling-web (harness-web)
11
-
12
- Local web layer for harness agents — **no API keys** for default search/scrape.
13
- Pi registers **`web_search`** and **`web_fetch`** (wrap `harness-web.py` with Scrapling bootstrap).
14
- Optional **self-hosted SearXNG** — see `/harness-setup` Step 4.0b.
15
-
16
- ## Agent tools (preferred)
17
-
18
- | Task | Tool |
19
- |------|------|
20
- | Search (SERP) | `web_search` with `query` |
21
- | Search + multi-scrape | `web_search` with `bulk: true` |
22
- | Scrape URL | `web_fetch` with `url` (default mode `scrape`) |
23
- | Map same-host links | `web_fetch` with `mode: map` |
24
- | Static / simple page | `web_fetch` with `fast: true` |
25
-
26
- **Never before search/fetch:** resolve `UP_PKG`, `ls harness-web.py`, `python3 -c "import scrapling"`, or Firecrawl/curl/wget/scrapling CLI for SERP or page fetch.
27
-
28
- Full JSON/markdown lives under **`.web/`** (gitignored). Use `read` on `output` paths after tool calls.
29
-
30
- ## Install (once per machine — setup/humans only)
31
-
32
- ```bash
33
- command -v uv &>/dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
34
- uv tool install "scrapling[fetchers]"
35
- scrapling install # browser binaries for default stealth scrape
36
- ```
37
-
38
- Verify: `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
39
- Config diagnostics: `python3 "$UP_PKG/.pi/scripts/harness-web.py" status` (JSON; setup only)
40
-
41
- ## Bash fallback (if pi tools unavailable)
42
-
43
- | Task | Command |
44
- |------|---------|
45
- | Search | `python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json --limit 5` |
46
- | Scrape | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
47
- | Fast/static | add `--fast` |
48
- | Map | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json` |
49
- | Bulk | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/` |
50
-
51
- ## Search JSON shape (Firecrawl-compatible)
52
-
53
- ```bash
54
- jq -r '.data.web[].url' .web/search.json
55
- ```
56
-
57
- Each entry: `url`, `title`, `description`.
58
-
59
- ## Fetch modes
60
-
61
- | Mode | When |
62
- |------|------|
63
- | **stealth** (default) | Arbitrary URLs, JS-heavy sites |
64
- | **fast** (`fast: true` / `--fast`) | Static docs, example.com, localhost |
65
- | **auto** (`HARNESS_WEB_FETCH_MODE=auto`) | fast for known-static hosts, else stealth |
66
-
67
- | Search backend | Behavior |
68
- |--------------|----------|
69
- | `ddg_html` (default) | DuckDuckGo HTML SERP |
70
- | `searxng` | JSON at `HARNESS_WEB_SEARXNG_URL` — bootstrap via `harness-searxng-bootstrap.mjs` |
71
-
72
- ## Environment
73
-
74
- | Variable | Default | Purpose |
75
- |----------|---------|---------|
76
- | `HARNESS_WEB_FETCH_MODE` | `stealth` | `stealth` \| `fast` \| `auto` |
77
- | `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | `ddg_html` \| `searxng` |
78
- | `HARNESS_WEB_SEARXNG_URL` | (unset) | Required when `SEARCH_ENGINE=searxng` |
79
-
80
- ## Escalation
81
-
82
- 1. `web_search` / `web_fetch`
83
- 2. `web_fetch` with `fast: true` for static hosts
84
- 3. `web_fetch` with `mode: map` then targeted fetches
85
- 4. Site-specific Scrapling only when tools are insufficient (not for routine SERP/fetch)
86
-
87
- ## Gaps vs Firecrawl
88
-
89
- | Firecrawl | Harness path |
90
- |-----------|----------------|
91
- | `interact` | gstack browse or manual browser |
92
- | `agent` | Agent reasoning + graphify |
93
- | `parse` (PDF) | pypdf, markitdown |
94
- | `crawl` | `web_search` bulk or map + multiple `web_fetch` |
95
-
96
- ## Ethics
97
-
98
- Respect site terms and rate limits. SERP scraping is for dev research, not high-volume harvesting.
@@ -1,11 +0,0 @@
1
- /**
2
- * Load before other extensions: IPv4-first fetch for *.posthog.com (@posthog/pi uses global fetch).
3
- */
4
-
5
- import { installPostHogFetchPatch } from "../lib/posthog-client.js";
6
-
7
- installPostHogFetchPatch();
8
-
9
- export default function posthogNetworkBootstrap() {
10
- // Side effects run at module load; no hooks required.
11
- }