ultimate-pi 0.19.0 → 0.19.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/web-retrieval/SKILL.md +163 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +6 -6
- package/.pi/SYSTEM.md +30 -12
- package/.pi/agents/harness/planning/implementation-researcher.md +1 -1
- package/.pi/agents/harness/planning/stack-researcher.md +5 -1
- package/.pi/agents/harness/web-retrieval/web-answerer.md +35 -0
- package/.pi/agents/harness/web-retrieval/web-criteria-verifier.md +28 -0
- package/.pi/agents/harness/web-retrieval/web-gap-analyzer.md +31 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander-fast.md +34 -0
- package/.pi/agents/harness/web-retrieval/web-query-expander.md +60 -0
- package/.pi/agents/harness/web-retrieval/web-summarizer.md +18 -0
- package/.pi/extensions/harness-web-guard.ts +2 -1
- package/.pi/extensions/harness-web-tools.ts +689 -51
- package/.pi/harness/agents.manifest.json +29 -5
- package/.pi/harness/agents.policy.yaml +34 -0
- package/.pi/harness/docs/adrs/0050-agentic-web-retrieval-stack.md +46 -0
- package/.pi/harness/docs/harness-web-search.md +97 -0
- package/.pi/harness/env.harness.template +9 -1
- package/.pi/harness/examples/web-heuristic-angles.project.yaml +22 -0
- package/.pi/harness/web-heuristic-angles.json +278 -0
- package/.pi/harness/web-heuristic-angles.yaml +182 -0
- package/.pi/lib/agents-policy.mjs +6 -0
- package/.pi/lib/harness-subagent-auth.ts +39 -9
- package/.pi/lib/harness-subagents-bridge.ts +21 -0
- package/.pi/lib/harness-web/artifacts.ts +200 -0
- package/.pi/lib/harness-web/cache.ts +369 -0
- package/.pi/lib/harness-web/run-cli.ts +42 -2
- package/.pi/prompts/harness-plan.md +1 -0
- package/.pi/prompts/harness-setup.md +3 -1
- package/.pi/scripts/gen-web-heuristic-angles-json.mjs +24 -0
- package/.pi/scripts/harness-cli-verify.sh +5 -0
- package/.pi/scripts/harness-verify.mjs +78 -0
- package/.pi/scripts/harness-web-policy-guard.mjs +1 -1
- package/.pi/scripts/harness-web.py +218 -15
- package/.pi/scripts/harness_web/deep_search.py +55 -0
- package/.pi/scripts/harness_web/evidence_bundle.py +47 -0
- package/.pi/scripts/harness_web/find_similar.py +88 -0
- package/.pi/scripts/harness_web/heuristic_angles_shipped.py +85 -0
- package/.pi/scripts/harness_web/heuristic_config.py +251 -0
- package/.pi/scripts/harness_web/highlights.py +47 -0
- package/.pi/scripts/harness_web/multi_search.py +59 -0
- package/.pi/scripts/harness_web/output.py +24 -0
- package/.pi/scripts/harness_web/query_angles.py +116 -0
- package/.pi/scripts/harness_web/rank.py +163 -0
- package/.pi/scripts/harness_web/scrape.py +30 -0
- package/.pi/scripts/tests/test_harness_web_heuristic_config.py +132 -0
- package/.pi/scripts/tests/test_harness_web_query_angles.py +45 -0
- package/.pi/scripts/tests/test_harness_web_rank.py +56 -0
- package/AGENTS.md +2 -2
- package/CHANGELOG.md +6 -0
- package/package.json +5 -3
- package/.agents/skills/scrapling-web/SKILL.md +0 -98
- package/.pi/extensions/00-posthog-network-bootstrap.ts +0 -11
- package/.pi/scripts/harness_web/__pycache__/__init__.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/config.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/output.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/scrape.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_ddg.cpython-314.pyc +0 -0
- package/.pi/scripts/harness_web/__pycache__/search_searxng.cpython-314.pyc +0 -0
|
@@ -41,6 +41,36 @@ def scrape_url(
|
|
|
41
41
|
write_page_markdown(Path(output), page, main_content_only=True)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
def scrape_url_with_highlights(
|
|
45
|
+
url: str,
|
|
46
|
+
markdown_output: str,
|
|
47
|
+
highlights_output: str | None,
|
|
48
|
+
*,
|
|
49
|
+
config: HarnessWebConfig,
|
|
50
|
+
fast: bool,
|
|
51
|
+
wait_ms: int | None,
|
|
52
|
+
highlight_query: str,
|
|
53
|
+
) -> None:
|
|
54
|
+
import json
|
|
55
|
+
from pathlib import Path
|
|
56
|
+
|
|
57
|
+
from .highlights import extract_highlights
|
|
58
|
+
|
|
59
|
+
page = fetch_page(url, config=config, fast=fast, wait_ms=wait_ms)
|
|
60
|
+
md_path = Path(markdown_output)
|
|
61
|
+
write_page_markdown(md_path, page, main_content_only=True)
|
|
62
|
+
if highlights_output and highlight_query.strip():
|
|
63
|
+
text = md_path.read_text(encoding="utf-8")
|
|
64
|
+
spans = extract_highlights(text, highlight_query)
|
|
65
|
+
hp = Path(highlights_output)
|
|
66
|
+
hp.parent.mkdir(parents=True, exist_ok=True)
|
|
67
|
+
hp.write_text(
|
|
68
|
+
json.dumps({"url": url, "query": highlight_query, "highlights": spans}, indent=2)
|
|
69
|
+
+ "\n",
|
|
70
|
+
encoding="utf-8",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
44
74
|
def map_url(
|
|
45
75
|
url: str,
|
|
46
76
|
output: str,
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Unit tests for harness_web.heuristic_config."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import tempfile
|
|
8
|
+
import unittest
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from harness_web.heuristic_config import (
|
|
12
|
+
_embedded_builtin_dict,
|
|
13
|
+
_merge_config_dict,
|
|
14
|
+
build_heuristic_angles,
|
|
15
|
+
clear_heuristic_config_cache,
|
|
16
|
+
heuristic_config_from_merged,
|
|
17
|
+
load_heuristic_angles_config_cached,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestHeuristicConfig(unittest.TestCase):
|
|
22
|
+
def tearDown(self) -> None:
|
|
23
|
+
clear_heuristic_config_cache()
|
|
24
|
+
|
|
25
|
+
def test_builtin_code_includes_stackoverflow(self) -> None:
|
|
26
|
+
cfg = heuristic_config_from_merged(_embedded_builtin_dict())
|
|
27
|
+
angles = build_heuristic_angles("rust async", category="code", config=cfg)
|
|
28
|
+
ids = {a.id for a in angles}
|
|
29
|
+
self.assertIn("stackoverflow", ids)
|
|
30
|
+
self.assertIn("github", ids)
|
|
31
|
+
self.assertTrue(any("site:stackoverflow.com" in a.query for a in angles))
|
|
32
|
+
|
|
33
|
+
def test_shipped_code_includes_mdn_and_registries(self) -> None:
|
|
34
|
+
pkg = Path(__file__).resolve().parents[2] / "harness" / "web-heuristic-angles.yaml"
|
|
35
|
+
if not pkg.is_file():
|
|
36
|
+
self.skipTest("package yaml missing")
|
|
37
|
+
clear_heuristic_config_cache()
|
|
38
|
+
cfg = load_heuristic_angles_config_cached((str(pkg),))
|
|
39
|
+
angles = build_heuristic_angles("websocket api", category="code", config=cfg)
|
|
40
|
+
ids = {a.id for a in angles}
|
|
41
|
+
self.assertIn("mdn", ids)
|
|
42
|
+
self.assertIn("package_registries", ids)
|
|
43
|
+
self.assertLessEqual(len(angles), cfg.max_angles)
|
|
44
|
+
|
|
45
|
+
def test_shipped_security_category(self) -> None:
|
|
46
|
+
pkg = Path(__file__).resolve().parents[2] / "harness" / "web-heuristic-angles.yaml"
|
|
47
|
+
if not pkg.is_file():
|
|
48
|
+
self.skipTest("package yaml missing")
|
|
49
|
+
clear_heuristic_config_cache()
|
|
50
|
+
cfg = load_heuristic_angles_config_cached((str(pkg),))
|
|
51
|
+
angles = build_heuristic_angles("jwt validation", category="security", config=cfg)
|
|
52
|
+
ids = {a.id for a in angles}
|
|
53
|
+
self.assertIn("owasp", ids)
|
|
54
|
+
self.assertIn("cve_nvd", ids)
|
|
55
|
+
|
|
56
|
+
def test_merge_extends_code_category(self) -> None:
|
|
57
|
+
merged = _merge_config_dict(
|
|
58
|
+
_embedded_builtin_dict(),
|
|
59
|
+
{
|
|
60
|
+
"max_angles": 12,
|
|
61
|
+
"categories": {
|
|
62
|
+
"code": [
|
|
63
|
+
{
|
|
64
|
+
"id": "docs_rs",
|
|
65
|
+
"query": "{query} site:docs.rs",
|
|
66
|
+
"rationale": "Rust docs",
|
|
67
|
+
},
|
|
68
|
+
],
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
cfg = heuristic_config_from_merged(merged)
|
|
73
|
+
merged_ids = [a["id"] for a in merged["categories"]["code"]]
|
|
74
|
+
self.assertIn("docs_rs", merged_ids)
|
|
75
|
+
angles = build_heuristic_angles("tokio", category="code", config=cfg)
|
|
76
|
+
ids = {a.id for a in angles}
|
|
77
|
+
self.assertIn("stackoverflow", ids)
|
|
78
|
+
self.assertIn("github", ids)
|
|
79
|
+
|
|
80
|
+
def test_merge_adds_new_category(self) -> None:
|
|
81
|
+
merged = _merge_config_dict(
|
|
82
|
+
_embedded_builtin_dict(),
|
|
83
|
+
{
|
|
84
|
+
"categories": {
|
|
85
|
+
"security": [
|
|
86
|
+
{"id": "cve", "query": "{query} CVE", "rationale": "vulns"},
|
|
87
|
+
{
|
|
88
|
+
"id": "owasp",
|
|
89
|
+
"query": "{query} site:owasp.org",
|
|
90
|
+
"rationale": "guidance",
|
|
91
|
+
},
|
|
92
|
+
],
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
)
|
|
96
|
+
cfg = heuristic_config_from_merged(merged)
|
|
97
|
+
angles = build_heuristic_angles("jwt auth", category="security", config=cfg)
|
|
98
|
+
ids = {a.id for a in angles}
|
|
99
|
+
self.assertIn("cve", ids)
|
|
100
|
+
self.assertIn("owasp", ids)
|
|
101
|
+
|
|
102
|
+
def test_json_project_file_merges(self) -> None:
|
|
103
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
104
|
+
proj = Path(tmp)
|
|
105
|
+
harness_dir = proj / ".pi" / "harness"
|
|
106
|
+
harness_dir.mkdir(parents=True)
|
|
107
|
+
proj_file = harness_dir / "web-heuristic-angles.json"
|
|
108
|
+
proj_file.write_text(
|
|
109
|
+
json.dumps(
|
|
110
|
+
{
|
|
111
|
+
"categories": {
|
|
112
|
+
"code": [
|
|
113
|
+
{
|
|
114
|
+
"id": "crates_io",
|
|
115
|
+
"query": "{query} site:crates.io",
|
|
116
|
+
"rationale": "crates",
|
|
117
|
+
},
|
|
118
|
+
],
|
|
119
|
+
},
|
|
120
|
+
}
|
|
121
|
+
),
|
|
122
|
+
encoding="utf-8",
|
|
123
|
+
)
|
|
124
|
+
clear_heuristic_config_cache()
|
|
125
|
+
cfg = load_heuristic_angles_config_cached((str(proj_file),))
|
|
126
|
+
angles = build_heuristic_angles("serde", category="code", config=cfg)
|
|
127
|
+
ids = {a.id for a in angles}
|
|
128
|
+
self.assertIn("crates_io", ids)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
if __name__ == "__main__":
|
|
132
|
+
unittest.main()
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Unit tests for harness_web.query_angles (no network)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import tempfile
|
|
7
|
+
import unittest
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from harness_web.query_angles import AnglesPlan, load_angles_file, resolve_angles
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TestResolveAngles(unittest.TestCase):
|
|
14
|
+
def test_heuristic_code_category(self) -> None:
|
|
15
|
+
plan = resolve_angles("rust async", expand_heuristic=True, category="code")
|
|
16
|
+
ids = {a.id for a in plan.angles}
|
|
17
|
+
self.assertIn("github", ids)
|
|
18
|
+
self.assertGreaterEqual(len(plan.angles), 2)
|
|
19
|
+
self.assertLessEqual(len(plan.angles), 5)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TestLoadFile(unittest.TestCase):
|
|
23
|
+
def test_load_json_file(self) -> None:
|
|
24
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
25
|
+
p = Path(tmp) / "angles.json"
|
|
26
|
+
p.write_text(
|
|
27
|
+
json.dumps(
|
|
28
|
+
{
|
|
29
|
+
"intent": "load test",
|
|
30
|
+
"angles": [
|
|
31
|
+
{"id": "a", "query": "first angle query"},
|
|
32
|
+
{"id": "b", "query": "second angle query"},
|
|
33
|
+
],
|
|
34
|
+
}
|
|
35
|
+
),
|
|
36
|
+
encoding="utf-8",
|
|
37
|
+
)
|
|
38
|
+
plan = load_angles_file(p)
|
|
39
|
+
self.assertIsInstance(plan, AnglesPlan)
|
|
40
|
+
self.assertEqual(plan.intent, "load test")
|
|
41
|
+
self.assertEqual(len(plan.angles), 2)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
if __name__ == "__main__":
|
|
45
|
+
unittest.main()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Unit tests for harness_web.rank (no network)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from harness_web.rank import RankedHit, fuse_angle_results, lexical_rerank, normalize_url, tokenize
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestNormalizeUrl(unittest.TestCase):
|
|
11
|
+
def test_strips_tracking(self) -> None:
|
|
12
|
+
a = normalize_url("https://Example.com/path?utm_source=x&id=1")
|
|
13
|
+
b = normalize_url("https://example.com/path?id=1")
|
|
14
|
+
self.assertEqual(a, b)
|
|
15
|
+
|
|
16
|
+
def test_trailing_slash(self) -> None:
|
|
17
|
+
self.assertEqual(
|
|
18
|
+
normalize_url("https://example.com/foo/"),
|
|
19
|
+
normalize_url("https://example.com/foo"),
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TestRrfFusion(unittest.TestCase):
|
|
24
|
+
def test_merges_duplicate_urls(self) -> None:
|
|
25
|
+
angle_results = {
|
|
26
|
+
"a": [
|
|
27
|
+
{"url": "https://x.com/1", "title": "T1", "description": "d1"},
|
|
28
|
+
{"url": "https://x.com/2", "title": "T2", "description": "d2"},
|
|
29
|
+
],
|
|
30
|
+
"b": [
|
|
31
|
+
{"url": "https://x.com/1", "title": "T1b", "description": "d1b"},
|
|
32
|
+
],
|
|
33
|
+
}
|
|
34
|
+
fused = fuse_angle_results(angle_results, final_limit=5)
|
|
35
|
+
self.assertEqual(len(fused), 2)
|
|
36
|
+
top = fused[0]
|
|
37
|
+
self.assertEqual(top.url, "https://x.com/1")
|
|
38
|
+
self.assertIn("a", top.angle_ids)
|
|
39
|
+
self.assertIn("b", top.angle_ids)
|
|
40
|
+
self.assertGreater(top.score, fused[1].score)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TestLexicalRerank(unittest.TestCase):
|
|
44
|
+
def test_boosts_intent_overlap(self) -> None:
|
|
45
|
+
hits = [
|
|
46
|
+
RankedHit("https://a", "unrelated", "noise", 0.52, ["a"]),
|
|
47
|
+
RankedHit("https://b", "kubernetes architecture", "how kubernetes works", 0.50, ["b"]),
|
|
48
|
+
]
|
|
49
|
+
reranked = lexical_rerank(hits, "kubernetes architecture")
|
|
50
|
+
self.assertEqual(reranked[0].url, "https://b")
|
|
51
|
+
self.assertGreater(reranked[0].score, reranked[1].score)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class TestTokenize(unittest.TestCase):
|
|
55
|
+
def test_min_length(self) -> None:
|
|
56
|
+
self.assertIn("hello", tokenize("hello hi"))
|
package/AGENTS.md
CHANGED
|
@@ -16,7 +16,7 @@ Created: 2026-05-14
|
|
|
16
16
|
- docs/adr/ → Repo-level Architectural Decision Records
|
|
17
17
|
- .pi/harness/docs/adrs/ → Harness ADRs (team-shared; [index](.pi/harness/docs/adrs/README.md))
|
|
18
18
|
- .pi/harness/docs/practice-map.md → Phase → practice → agent spawn topology for `/harness-plan`, `/harness-run`, `/harness-review`
|
|
19
|
-
- .pi/skills/ → Agent skills
|
|
19
|
+
- .pi/skills/ → Agent skills (harness skills symlink to `.agents/skills/`, e.g. `web-retrieval`)
|
|
20
20
|
- .pi/agents/ → Specialized agents
|
|
21
21
|
|
|
22
22
|
## Graphify-First Workflow
|
|
@@ -36,7 +36,7 @@ Created: 2026-05-14
|
|
|
36
36
|
- Harness context: **context-mode only** — never lean-ctx on harness paths (see harness-context skill)
|
|
37
37
|
- `graphify update .` after significant code changes
|
|
38
38
|
- ast-grep (`sg`) is the default code search tool — use `sg -p 'pattern'` for structural search, never grep for code
|
|
39
|
-
-
|
|
39
|
+
- Non-API web: invoke **`web-retrieval`** skill (WRS tiers; default `tier=deep` with `web-query-expander` → `anglesFile`). CLI: `python3 "$UP_PKG/.pi/scripts/harness-web.py"`
|
|
40
40
|
|
|
41
41
|
## graphify
|
|
42
42
|
|
package/CHANGELOG.md
CHANGED
|
@@ -9,6 +9,12 @@ All notable changes to this project are documented in this file.
|
|
|
9
9
|
- **Harness lens:** Integrate selected pi-lens capabilities through a harness-owned extension, store lens state under `.pi/harness/.lens`, and route lens findings through harness PostHog telemetry instead of standalone lens health/telemetry surfaces.
|
|
10
10
|
- **Graphify KB updater:** Productize conservative daily discovery/promotion with explicit repo/release taxonomy, allowlist source-class gates, operator review queue reporting, scheduler smoke validation, and safe Graphify refresh controls.
|
|
11
11
|
|
|
12
|
+
## [v0.19.1] — 2026-05-26
|
|
13
|
+
|
|
14
|
+
### 🔧 Chores
|
|
15
|
+
|
|
16
|
+
- Prepare web retrieval and harness updates for release.
|
|
17
|
+
|
|
12
18
|
## [v0.19.0] — 2026-05-24
|
|
13
19
|
|
|
14
20
|
### ✨ Features
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.19.
|
|
3
|
+
"version": "0.19.1",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -56,6 +56,8 @@
|
|
|
56
56
|
".pi/harness/agents.manifest.json",
|
|
57
57
|
".pi/harness/agents.policy.yaml",
|
|
58
58
|
".pi/harness/examples",
|
|
59
|
+
".pi/harness/web-heuristic-angles.yaml",
|
|
60
|
+
".pi/harness/web-heuristic-angles.json",
|
|
59
61
|
".pi/lib/harness-lens",
|
|
60
62
|
".pi/harness/README.md",
|
|
61
63
|
".pi/npm/package.json",
|
|
@@ -76,7 +78,7 @@
|
|
|
76
78
|
"@earendil-works/pi-coding-agent": "*"
|
|
77
79
|
},
|
|
78
80
|
"scripts": {
|
|
79
|
-
"check:ts": "tsc
|
|
81
|
+
"check:ts": "tsc -p tsconfig.check.json",
|
|
80
82
|
"vendor:sync-vcc": "bash .pi/scripts/vendor-sync-pi-vcc.sh",
|
|
81
83
|
"vendor:sync-subagents": "bash .pi/scripts/vendor-sync-pi-subagents.sh",
|
|
82
84
|
"release": "bash .pi/scripts/release.sh",
|
|
@@ -85,7 +87,7 @@
|
|
|
85
87
|
"format": "biome format --write",
|
|
86
88
|
"format:check": "biome format",
|
|
87
89
|
"prepare": "lefthook install",
|
|
88
|
-
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/posthog-client.test.mjs test/harness-agt-policy-load.test.mjs test/harness-agt-policy-matrix.test.mjs test/harness-agt-policy-parity.test.mjs test/harness-agt-packaging.test.mjs test/harness-tool-call-hook-chain.test.mjs test/harness-vcc-settings.test.ts test/harness-run-context-postrun.test.mjs test/harness-tool-payload.test.mjs test/harness-live-widget-status.test.ts test/harness-project-toggle-tui.test.ts test/harness-plan-phase-policy.test.mjs test/harness-context-mode-policy.test.mjs test/harness-subprocess-bootstrap.test.mjs test/harness-subagent-policy.test.mjs test/harness-subagent-precheck-topology.test.mjs test/plan-approval-readiness.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/harness-project-agents-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs test/review-integrity-revise-handoff.test.mjs test/harness-plan-revise-reset.test.mjs",
|
|
90
|
+
"test": "node --test test/harness-verify.test.mjs test/harness-ask-user.test.mjs test/harness-subagents-loader.test.mjs test/harness-subagent-precheck.test.mjs test/sentrux-rules-sync.test.mjs test/harness-budget-guard.test.mjs && node .pi/harness/evals/smoke/smoke-harness-plan.mjs --fixture && npx -y tsx --test test/harness-web-cache.test.mjs test/harness-web-artifacts.test.mjs test/harness-subagent-auth.test.mjs test/posthog-client.test.mjs test/harness-agt-policy-load.test.mjs test/harness-agt-policy-matrix.test.mjs test/harness-agt-policy-parity.test.mjs test/harness-agt-packaging.test.mjs test/harness-tool-call-hook-chain.test.mjs test/harness-vcc-settings.test.ts test/harness-run-context-postrun.test.mjs test/harness-tool-payload.test.mjs test/harness-live-widget-status.test.ts test/harness-project-toggle-tui.test.ts test/harness-plan-phase-policy.test.mjs test/harness-context-mode-policy.test.mjs test/harness-subprocess-bootstrap.test.mjs test/harness-subagent-policy.test.mjs test/harness-subagent-precheck-topology.test.mjs test/plan-approval-readiness.test.mjs test/harness-spawn-budget.test.mjs test/harness-spawn-parse.test.mjs test/harness-schema-validate.test.mjs test/harness-turn-routing.test.mjs test/harness-budget-enforce.test.mjs test/harness-submit-policy.test.mjs test/harness-project-agents-policy.test.mjs test/plan-approval-format.test.mjs test/plan-approval-dialog.test.mjs test/plan-approval-sync.test.mjs test/plan-create-plan.test.mjs test/plan-review-format.test.mjs test/debate-plan-phase.test.mjs test/plan-debate-eligibility.test.mjs test/plan-messenger-gate.test.mjs test/plan-debate-lane-apply.test.mjs test/review-integrity-revise-handoff.test.mjs test/harness-plan-revise-reset.test.mjs",
|
|
89
91
|
"test:vcc": "npx -y tsx --test vendor/pi-vcc/tests/*.test.ts",
|
|
90
92
|
"harness:sentrux-bootstrap": "node .pi/scripts/harness-sentrux-bootstrap.mjs",
|
|
91
93
|
"harness:sentrux-sync": "node .pi/scripts/sentrux-rules-sync.mjs --force",
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: scrapling-web
|
|
3
|
-
description: |
|
|
4
|
-
Harness web search and scrape via pi tools web_search and web_fetch (harness-web.py).
|
|
5
|
-
Use for any non-API web task: search, scrape URLs, map site links, bulk research fetches.
|
|
6
|
-
Replaces Firecrawl in ultimate-pi harness agents. Triggers on: search the web,
|
|
7
|
-
scrape URL, fetch page, research online, web_search, web_fetch, .web/ artifacts.
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
# scrapling-web (harness-web)
|
|
11
|
-
|
|
12
|
-
Local web layer for harness agents — **no API keys** for default search/scrape.
|
|
13
|
-
Pi registers **`web_search`** and **`web_fetch`** (wrap `harness-web.py` with Scrapling bootstrap).
|
|
14
|
-
Optional **self-hosted SearXNG** — see `/harness-setup` Step 4.0b.
|
|
15
|
-
|
|
16
|
-
## Agent tools (preferred)
|
|
17
|
-
|
|
18
|
-
| Task | Tool |
|
|
19
|
-
|------|------|
|
|
20
|
-
| Search (SERP) | `web_search` with `query` |
|
|
21
|
-
| Search + multi-scrape | `web_search` with `bulk: true` |
|
|
22
|
-
| Scrape URL | `web_fetch` with `url` (default mode `scrape`) |
|
|
23
|
-
| Map same-host links | `web_fetch` with `mode: map` |
|
|
24
|
-
| Static / simple page | `web_fetch` with `fast: true` |
|
|
25
|
-
|
|
26
|
-
**Never before search/fetch:** resolve `UP_PKG`, `ls harness-web.py`, `python3 -c "import scrapling"`, or Firecrawl/curl/wget/scrapling CLI for SERP or page fetch.
|
|
27
|
-
|
|
28
|
-
Full JSON/markdown lives under **`.web/`** (gitignored). Use `read` on `output` paths after tool calls.
|
|
29
|
-
|
|
30
|
-
## Install (once per machine — setup/humans only)
|
|
31
|
-
|
|
32
|
-
```bash
|
|
33
|
-
command -v uv &>/dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
34
|
-
uv tool install "scrapling[fetchers]"
|
|
35
|
-
scrapling install # browser binaries for default stealth scrape
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
Verify: `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
|
|
39
|
-
Config diagnostics: `python3 "$UP_PKG/.pi/scripts/harness-web.py" status` (JSON; setup only)
|
|
40
|
-
|
|
41
|
-
## Bash fallback (if pi tools unavailable)
|
|
42
|
-
|
|
43
|
-
| Task | Command |
|
|
44
|
-
|------|---------|
|
|
45
|
-
| Search | `python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json --limit 5` |
|
|
46
|
-
| Scrape | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
|
|
47
|
-
| Fast/static | add `--fast` |
|
|
48
|
-
| Map | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json` |
|
|
49
|
-
| Bulk | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/` |
|
|
50
|
-
|
|
51
|
-
## Search JSON shape (Firecrawl-compatible)
|
|
52
|
-
|
|
53
|
-
```bash
|
|
54
|
-
jq -r '.data.web[].url' .web/search.json
|
|
55
|
-
```
|
|
56
|
-
|
|
57
|
-
Each entry: `url`, `title`, `description`.
|
|
58
|
-
|
|
59
|
-
## Fetch modes
|
|
60
|
-
|
|
61
|
-
| Mode | When |
|
|
62
|
-
|------|------|
|
|
63
|
-
| **stealth** (default) | Arbitrary URLs, JS-heavy sites |
|
|
64
|
-
| **fast** (`fast: true` / `--fast`) | Static docs, example.com, localhost |
|
|
65
|
-
| **auto** (`HARNESS_WEB_FETCH_MODE=auto`) | fast for known-static hosts, else stealth |
|
|
66
|
-
|
|
67
|
-
| Search backend | Behavior |
|
|
68
|
-
|--------------|----------|
|
|
69
|
-
| `ddg_html` (default) | DuckDuckGo HTML SERP |
|
|
70
|
-
| `searxng` | JSON at `HARNESS_WEB_SEARXNG_URL` — bootstrap via `harness-searxng-bootstrap.mjs` |
|
|
71
|
-
|
|
72
|
-
## Environment
|
|
73
|
-
|
|
74
|
-
| Variable | Default | Purpose |
|
|
75
|
-
|----------|---------|---------|
|
|
76
|
-
| `HARNESS_WEB_FETCH_MODE` | `stealth` | `stealth` \| `fast` \| `auto` |
|
|
77
|
-
| `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | `ddg_html` \| `searxng` |
|
|
78
|
-
| `HARNESS_WEB_SEARXNG_URL` | (unset) | Required when `SEARCH_ENGINE=searxng` |
|
|
79
|
-
|
|
80
|
-
## Escalation
|
|
81
|
-
|
|
82
|
-
1. `web_search` / `web_fetch`
|
|
83
|
-
2. `web_fetch` with `fast: true` for static hosts
|
|
84
|
-
3. `web_fetch` with `mode: map` then targeted fetches
|
|
85
|
-
4. Site-specific Scrapling only when tools are insufficient (not for routine SERP/fetch)
|
|
86
|
-
|
|
87
|
-
## Gaps vs Firecrawl
|
|
88
|
-
|
|
89
|
-
| Firecrawl | Harness path |
|
|
90
|
-
|-----------|----------------|
|
|
91
|
-
| `interact` | gstack browse or manual browser |
|
|
92
|
-
| `agent` | Agent reasoning + graphify |
|
|
93
|
-
| `parse` (PDF) | pypdf, markitdown |
|
|
94
|
-
| `crawl` | `web_search` bulk or map + multiple `web_fetch` |
|
|
95
|
-
|
|
96
|
-
## Ethics
|
|
97
|
-
|
|
98
|
-
Respect site terms and rate limits. SERP scraping is for dev research, not high-volume harvesting.
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Load before other extensions: IPv4-first fetch for *.posthog.com (@posthog/pi uses global fetch).
|
|
3
|
-
*/
|
|
4
|
-
|
|
5
|
-
import { installPostHogFetchPatch } from "../lib/posthog-client.js";
|
|
6
|
-
|
|
7
|
-
installPostHogFetchPatch();
|
|
8
|
-
|
|
9
|
-
export default function posthogNetworkBootstrap() {
|
|
10
|
-
// Side effects run at module load; no hooks required.
|
|
11
|
-
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|