ultimate-pi 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/harness-decisions/SKILL.md +37 -0
- package/.agents/skills/harness-governor/SKILL.md +1 -1
- package/.agents/skills/harness-orchestration/SKILL.md +54 -0
- package/.agents/skills/harness-plan/SKILL.md +4 -3
- package/.agents/skills/harness-sentrux-setup/SKILL.md +57 -0
- package/.agents/skills/scrapling-web/SKILL.md +93 -0
- package/.pi/PACKAGING.md +2 -2
- package/.pi/SYSTEM.md +13 -15
- package/.pi/agents/harness/adversary.md +3 -0
- package/.pi/agents/harness/evaluator.md +3 -0
- package/.pi/agents/harness/executor.md +4 -1
- package/.pi/agents/harness/meta-optimizer.md +2 -1
- package/.pi/agents/harness/planner.md +22 -1
- package/.pi/agents/harness/sentrux-bootstrap.md +42 -0
- package/.pi/agents/harness/tie-breaker.md +2 -0
- package/.pi/extensions/harness-ask-user.ts +74 -0
- package/.pi/extensions/harness-subagents.ts +9 -0
- package/.pi/extensions/lib/ask-user/dialog.ts +260 -0
- package/.pi/extensions/lib/ask-user/fallback.ts +78 -0
- package/.pi/extensions/lib/ask-user/render.ts +66 -0
- package/.pi/extensions/lib/ask-user/schema.ts +69 -0
- package/.pi/extensions/lib/ask-user/types.ts +41 -0
- package/.pi/extensions/lib/ask-user/validate-core.mjs +79 -0
- package/.pi/extensions/lib/ask-user/validate.ts +92 -0
- package/.pi/extensions/lib/harness-subagents/agent-loader.ts +126 -0
- package/.pi/extensions/lib/harness-subagents/agent-manifest.ts +119 -0
- package/.pi/extensions/lib/harness-subagents/agent-parser.ts +87 -0
- package/.pi/extensions/lib/harness-subagents/blackboard-tool.ts +118 -0
- package/.pi/extensions/lib/harness-subagents/blackboard.ts +175 -0
- package/.pi/extensions/lib/harness-subagents/spawn-policy.ts +27 -0
- package/.pi/extensions/lib/harness-subagents/types-blackboard.ts +27 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-manager.ts +553 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-runner.ts +637 -0
- package/.pi/extensions/lib/harness-subagents/vendored/agent-types.ts +175 -0
- package/.pi/extensions/lib/harness-subagents/vendored/context.ts +59 -0
- package/.pi/extensions/lib/harness-subagents/vendored/cross-extension-rpc.ts +134 -0
- package/.pi/extensions/lib/harness-subagents/vendored/custom-agents.ts +5 -0
- package/.pi/extensions/lib/harness-subagents/vendored/default-agents.ts +123 -0
- package/.pi/extensions/lib/harness-subagents/vendored/env.ts +43 -0
- package/.pi/extensions/lib/harness-subagents/vendored/group-join.ts +144 -0
- package/.pi/extensions/lib/harness-subagents/vendored/index.ts +2447 -0
- package/.pi/extensions/lib/harness-subagents/vendored/invocation-config.ts +52 -0
- package/.pi/extensions/lib/harness-subagents/vendored/memory.ts +182 -0
- package/.pi/extensions/lib/harness-subagents/vendored/model-resolver.ts +92 -0
- package/.pi/extensions/lib/harness-subagents/vendored/output-file.ts +115 -0
- package/.pi/extensions/lib/harness-subagents/vendored/prompts.ts +103 -0
- package/.pi/extensions/lib/harness-subagents/vendored/schedule-store.ts +177 -0
- package/.pi/extensions/lib/harness-subagents/vendored/schedule.ts +416 -0
- package/.pi/extensions/lib/harness-subagents/vendored/settings.ts +210 -0
- package/.pi/extensions/lib/harness-subagents/vendored/skill-loader.ts +108 -0
- package/.pi/extensions/lib/harness-subagents/vendored/types.ts +187 -0
- package/.pi/extensions/lib/harness-subagents/vendored/ui/agent-widget.ts +637 -0
- package/.pi/extensions/lib/harness-subagents/vendored/ui/conversation-viewer.ts +324 -0
- package/.pi/extensions/lib/harness-subagents/vendored/ui/schedule-menu.ts +110 -0
- package/.pi/extensions/lib/harness-subagents/vendored/usage.ts +71 -0
- package/.pi/extensions/lib/harness-subagents/vendored/worktree.ts +195 -0
- package/.pi/harness/README.md +2 -1
- package/.pi/harness/agents.manifest.json +80 -0
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +9 -5
- package/.pi/harness/env.harness.template +28 -0
- package/.pi/harness/sentrux/architecture.manifest.json +6 -1
- package/.pi/prompts/harness-auto.md +2 -2
- package/.pi/prompts/harness-plan.md +2 -2
- package/.pi/prompts/harness-router-tune.md +2 -2
- package/.pi/prompts/harness-run.md +1 -0
- package/.pi/prompts/harness-setup.md +178 -339
- package/.pi/scripts/README.md +6 -1
- package/.pi/scripts/harness-agents-manifest.mjs +123 -0
- package/.pi/scripts/harness-cli-verify.sh +60 -11
- package/.pi/scripts/harness-generate-model-router.mjs +242 -0
- package/.pi/scripts/harness-graphify-bootstrap.sh +1 -6
- package/.pi/scripts/harness-resolve-up-pkg.mjs +71 -0
- package/.pi/scripts/harness-seed-project-contracts.mjs +33 -1
- package/.pi/scripts/harness-sentrux-bootstrap.mjs +146 -0
- package/.pi/scripts/harness-sync-env.mjs +148 -0
- package/.pi/scripts/harness-verify.mjs +19 -0
- package/.pi/scripts/harness-web-search.md +33 -0
- package/.pi/scripts/harness-web.py +177 -0
- package/.pi/scripts/harness_web/__init__.py +1 -0
- package/.pi/scripts/harness_web/config.py +80 -0
- package/.pi/scripts/harness_web/output.py +55 -0
- package/.pi/scripts/harness_web/scrape.py +120 -0
- package/.pi/scripts/harness_web/search_ddg.py +106 -0
- package/.pi/scripts/release.sh +338 -0
- package/.pi/scripts/sentrux-rules-sync.mjs +29 -7
- package/.pi/settings.example.json +0 -1
- package/.sentrux/rules.toml +1 -1
- package/AGENTS.md +1 -1
- package/CHANGELOG.md +12 -0
- package/THIRD_PARTY_NOTICES.md +22 -0
- package/package.json +12 -9
- package/.agents/skills/firecrawl/SKILL.md +0 -150
- package/.agents/skills/firecrawl/rules/install.md +0 -82
- package/.agents/skills/firecrawl/rules/security.md +0 -26
- package/.agents/skills/firecrawl-agent/SKILL.md +0 -57
- package/.agents/skills/firecrawl-build-interact/SKILL.md +0 -67
- package/.agents/skills/firecrawl-build-onboarding/SKILL.md +0 -102
- package/.agents/skills/firecrawl-build-onboarding/references/auth-flow.md +0 -39
- package/.agents/skills/firecrawl-build-onboarding/references/project-setup.md +0 -20
- package/.agents/skills/firecrawl-build-onboarding/references/sdk-installation.md +0 -17
- package/.agents/skills/firecrawl-build-scrape/SKILL.md +0 -68
- package/.agents/skills/firecrawl-build-search/SKILL.md +0 -68
- package/.agents/skills/firecrawl-crawl/SKILL.md +0 -58
- package/.agents/skills/firecrawl-download/SKILL.md +0 -69
- package/.agents/skills/firecrawl-interact/SKILL.md +0 -83
- package/.agents/skills/firecrawl-map/SKILL.md +0 -50
- package/.agents/skills/firecrawl-parse/SKILL.md +0 -61
- package/.agents/skills/firecrawl-scrape/SKILL.md +0 -68
- package/.agents/skills/firecrawl-search/SKILL.md +0 -59
- package/firecrawl/.env.template +0 -62
- package/firecrawl/README.md +0 -49
- package/firecrawl/docker-compose.yaml +0 -201
- package/firecrawl/searxng/searxng.env +0 -3
- package/firecrawl/searxng/settings.yml +0 -85
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: harness-decisions
|
|
3
|
+
description: Structured user decisions via ask_user for harness setup, planning, and governance forks. Use with /harness-setup, /harness-plan, harness-auto plan phase, and when agents emit human_required.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# harness-decisions
|
|
7
|
+
|
|
8
|
+
## When to use
|
|
9
|
+
|
|
10
|
+
- `/harness-setup` — missing project `.env`, other bootstrap forks
|
|
11
|
+
- `/harness-plan` or harness-auto **plan** phase — scope, risk, acceptance ambiguity
|
|
12
|
+
- Orchestrator receives `human_required` from evaluator, adversary, tie-breaker, or meta-optimizer
|
|
13
|
+
- `/harness-router-tune` — approve / reject / edit a router proposal before apply
|
|
14
|
+
|
|
15
|
+
## Decision handshake
|
|
16
|
+
|
|
17
|
+
1. **One focused `ask_user` call** per blocking fork (2–4 options with short descriptions).
|
|
18
|
+
2. **Never guess** on `.env` creation, risk level, scope boundaries, or merge policy.
|
|
19
|
+
3. If the user **cancels** (Esc), stop with `needs_clarification` / `human_required` — do not assume defaults.
|
|
20
|
+
4. **CI / automation only:** pass `--non-interactive` to `/harness-setup` to skip prompts and use documented defaults.
|
|
21
|
+
|
|
22
|
+
## Example (plan — scope)
|
|
23
|
+
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"question": "What should be in scope for this plan?",
|
|
27
|
+
"options": [
|
|
28
|
+
{ "title": "Backend API only", "description": "No UI or infra changes" },
|
|
29
|
+
{ "title": "Full stack including UI", "description": "API + frontend + tests" }
|
|
30
|
+
],
|
|
31
|
+
"allowFreeform": true
|
|
32
|
+
}
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Who must NOT call ask_user
|
|
36
|
+
|
|
37
|
+
- `harness/evaluator` and `harness/adversary` — emit `human_required` in structured verdicts; the **parent orchestrator** calls `ask_user`.
|
|
@@ -25,7 +25,7 @@ When refining plans from noisy requirements:
|
|
|
25
25
|
|
|
26
26
|
1. Distill user intent into acceptance criteria and non-goals (bullet list).
|
|
27
27
|
2. Map criteria to `plan-packet` fields and testable checks.
|
|
28
|
-
3.
|
|
28
|
+
3. When gates return `human_required` or promotion is blocked, the orchestrator calls `ask_user` — do not guess scope.
|
|
29
29
|
4. Reference graphify wiki or `graphify query` for architecture constraints before execute.
|
|
30
30
|
|
|
31
31
|
## Rules
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: harness-orchestration
|
|
3
|
+
description: >-
|
|
4
|
+
Orchestrate ultimate-pi harness phases with Agent spawns, blackboard handoffs,
|
|
5
|
+
and observation-bus artifacts. Use for plan/execute/evaluate pipelines, L4
|
|
6
|
+
verification, parallel scouts, and debate prep.
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Harness orchestration
|
|
10
|
+
|
|
11
|
+
## Agent IDs (namespaced)
|
|
12
|
+
|
|
13
|
+
Spawn with the `Agent` tool using **path ids** from the installed package:
|
|
14
|
+
|
|
15
|
+
| Phase | `subagent_type` | Policy |
|
|
16
|
+
|-------|-----------------|--------|
|
|
17
|
+
| Plan | `harness/planner` | May use `ask_user` |
|
|
18
|
+
| Execute | `harness/executor` | `ask_user` for in-scope forks only |
|
|
19
|
+
| Verify | `harness/evaluator`, `harness/adversary`, `harness/tie-breaker` | `disallowed_tools: ask_user` on L4 agents |
|
|
20
|
+
| Meta | `harness/meta-optimizer`, `harness/trace-librarian` | Parent calls `ask_user` for approvals |
|
|
21
|
+
|
|
22
|
+
Pi-pi experts: `pi-pi/agent-expert`, `pi-pi/cli-expert`, etc.
|
|
23
|
+
|
|
24
|
+
Project override: `.pi/agents/harness/planner.md` replaces package `harness/planner` only.
|
|
25
|
+
|
|
26
|
+
## Tools
|
|
27
|
+
|
|
28
|
+
- `Agent` — spawn (prefer `run_in_background: true` for parallel work)
|
|
29
|
+
- `get_subagent_result` / `steer_subagent` — background agents
|
|
30
|
+
- `blackboard` — orchestrator handoffs (`list`, `read`, `query`, `wait`, `delete`)
|
|
31
|
+
- `ask_user` — **parent orchestrator only** on L4 paths
|
|
32
|
+
|
|
33
|
+
Subagents cannot spawn sub-subagents (`Agent`, `blackboard`, `ask_user` blocked).
|
|
34
|
+
|
|
35
|
+
## Blackboard + bus
|
|
36
|
+
|
|
37
|
+
1. Scouts/workers post findings to `blackboard` (namespaced keys).
|
|
38
|
+
2. Spawn with `context: { keys: ["scout:*"] }` or `{ agent_name: "…" }` (~8k cap).
|
|
39
|
+
3. On completion, `harness-subagents` appends `harness-observation` entries for `observation-bus`.
|
|
40
|
+
4. Durable artifacts (PlanPacket, EvalVerdict, debate envelopes) still go to trace/run files per harness specs.
|
|
41
|
+
|
|
42
|
+
## Pipeline rules (V2-aligned)
|
|
43
|
+
|
|
44
|
+
- **Plan gate first** — no implementation without an approved `PlanPacket`.
|
|
45
|
+
- **L4 external verification** — evaluator ≠ executor; use `harness/adversary` when policy requires.
|
|
46
|
+
- **Turn budgets** — set `max_turns` on spawn or rely on agent frontmatter defaults.
|
|
47
|
+
- **Parallelism** — parallelize by file/module with explicit ownership in the plan.
|
|
48
|
+
- **Debate** — use `debate-orchestrator` commands; parent handles `human_required` via `ask_user`.
|
|
49
|
+
|
|
50
|
+
## References
|
|
51
|
+
|
|
52
|
+
- Package agents: `$UP_PKG/.pi/agents/`
|
|
53
|
+
- Manifest drift: `node "$UP_PKG/.pi/scripts/harness-agents-manifest.mjs" --check`
|
|
54
|
+
- Reference playbook: `raw/references/subagents/AGENTS.md` (design only)
|
|
@@ -14,9 +14,10 @@ description: Produce PlanPacket-aligned harness plans before execute phase. Use
|
|
|
14
14
|
## Workflow
|
|
15
15
|
|
|
16
16
|
1. Read `.pi/harness/specs/plan-packet.schema.json`.
|
|
17
|
-
2.
|
|
18
|
-
3.
|
|
19
|
-
4.
|
|
17
|
+
2. When scope, risk, or acceptance is ambiguous, call `ask_user` (see harness-decisions skill) before finalizing the packet.
|
|
18
|
+
3. Capture scope, risks, acceptance criteria, and explicit `plan_id`.
|
|
19
|
+
4. Persist plan reference in prompt (`plan_id=...`) so policy-gate sets `approvedPlan`.
|
|
20
|
+
5. Do not mutate production files in plan phase unless user explicitly requests draft-only outputs.
|
|
20
21
|
|
|
21
22
|
## Output
|
|
22
23
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: harness-sentrux-setup
|
|
3
|
+
description: Bootstrap Sentrux architectural rules for harness projects — seed architecture.manifest.json, generate merge-safe .sentrux/rules.toml, and document bootstrap vs --force sync. Use during /harness-setup, when adding Sentrux to a repo, or when rules.toml is missing or out of date.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# harness-sentrux-setup
|
|
7
|
+
|
|
8
|
+
## When to use
|
|
9
|
+
|
|
10
|
+
- `/harness-setup` Step 4.4 (Sentrux rules bootstrap)
|
|
11
|
+
- Target repo has no `.sentrux/rules.toml` or `harness-verify` reports rules out of date
|
|
12
|
+
- User edited `.pi/harness/sentrux/architecture.manifest.json` (layers, boundaries, constraints)
|
|
13
|
+
|
|
14
|
+
## Canonical layout
|
|
15
|
+
|
|
16
|
+
| Path | Role |
|
|
17
|
+
|------|------|
|
|
18
|
+
| `.pi/harness/sentrux/architecture.manifest.json` | Source of truth (layers, boundaries, constraints) |
|
|
19
|
+
| `.sentrux/rules.toml` | Generated Sentrux rules (commit to git) |
|
|
20
|
+
| `.sentrux/.harness-rules-meta.json` | Sync metadata (gitignored) |
|
|
21
|
+
|
|
22
|
+
Custom TOML **outside** `# --- harness:managed:start/end ---` is preserved on every sync.
|
|
23
|
+
|
|
24
|
+
## Commands (resolve `UP_PKG` via `.pi/scripts/README.md`)
|
|
25
|
+
|
|
26
|
+
| Situation | Command |
|
|
27
|
+
|-----------|---------|
|
|
28
|
+
| First-time / harness-setup (idempotent) | `node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs"` |
|
|
29
|
+
| After manifest edits | `node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs" --force` |
|
|
30
|
+
| CI / verify only | `node "$UP_PKG/.pi/scripts/sentrux-rules-sync.mjs" --check` |
|
|
31
|
+
| In pi session | `/harness-sentrux-sync` (extension; uses `--force`) |
|
|
32
|
+
|
|
33
|
+
**Bootstrap vs `--force`:** Default bootstrap/sync skips rewriting `rules.toml` when the manifest hash is unchanged. Use `--force` (or `/harness-sentrux-sync`) after changing `architecture.manifest.json` or when verify reports drift.
|
|
34
|
+
|
|
35
|
+
## Workflow
|
|
36
|
+
|
|
37
|
+
1. Ensure Sentrux CLI is installed (`harness-setup` Step 2.8 or `harness-cli-verify.sh`).
|
|
38
|
+
2. Run bootstrap from **project root** (not `UP_PKG`):
|
|
39
|
+
```bash
|
|
40
|
+
node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs"
|
|
41
|
+
```
|
|
42
|
+
3. Optional: `sentrux plugin add-standard` (language plugins; harness-setup Step 2.8).
|
|
43
|
+
4. Merge sentrux MCP into `.pi/mcp.json` if missing (harness-setup Step 4.2).
|
|
44
|
+
5. `sentrux check .` — fix violations or tune manifest `max_cc` / layers.
|
|
45
|
+
6. Commit `.sentrux/rules.toml` and project-specific `architecture.manifest.json`.
|
|
46
|
+
|
|
47
|
+
## External repos
|
|
48
|
+
|
|
49
|
+
`harness-seed-project-contracts.mjs` (Step 0.5) copies JSON schemas; bootstrap seeds the Sentrux manifest template when absent and sets `project` from `package.json`.
|
|
50
|
+
|
|
51
|
+
Do **not** copy ultimate-pi's layer paths blindly into unrelated layouts — edit manifest layers/boundaries for the target repo, then `--force` sync.
|
|
52
|
+
|
|
53
|
+
## References
|
|
54
|
+
|
|
55
|
+
- ADR 0009 — `.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md`
|
|
56
|
+
- Scripts — `.pi/scripts/sentrux-rules-sync.mjs`, `harness-sentrux-bootstrap.mjs`
|
|
57
|
+
- Agent — `harness/sentrux-bootstrap` (optional delegate for setup-only runs)
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: scrapling-web
|
|
3
|
+
description: |
|
|
4
|
+
Harness web search and scrape via the local harness-web CLI (Scrapling). Use for any
|
|
5
|
+
non-API web task: search, scrape URLs, map site links, bulk research fetches.
|
|
6
|
+
Replaces Firecrawl in ultimate-pi harness agents. Triggers on: search the web,
|
|
7
|
+
scrape URL, fetch page, research online, harness-web, .web/ artifacts.
|
|
8
|
+
allowed-tools:
|
|
9
|
+
- Bash(python3 *harness-web.py *)
|
|
10
|
+
- Bash(python3 .pi/scripts/harness-web.py *)
|
|
11
|
+
- Bash(scrapling *)
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
# scrapling-web (harness-web)
|
|
15
|
+
|
|
16
|
+
Local web layer for harness agents — **no API keys**, no Docker compose stack.
|
|
17
|
+
Uses [Scrapling](https://scrapling.readthedocs.io/) under `node $UP_PKG/.pi/scripts/harness-web.py`.
|
|
18
|
+
|
|
19
|
+
## Install (once per machine)
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
command -v uv &>/dev/null || curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
23
|
+
uv tool install "scrapling[fetchers]"
|
|
24
|
+
scrapling install # browser binaries for default stealth scrape
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Verify: `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
|
|
28
|
+
|
|
29
|
+
## Output directory
|
|
30
|
+
|
|
31
|
+
Write artifacts under **`.web/`** (gitignored), not `.firecrawl/`:
|
|
32
|
+
|
|
33
|
+
| Task | Command |
|
|
34
|
+
|------|---------|
|
|
35
|
+
| Search | `python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json --limit 5` |
|
|
36
|
+
| Scrape URL | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
|
|
37
|
+
| Fast/static scrape | add `--fast` (example.com, raw docs, localhost) |
|
|
38
|
+
| Map same-host links | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json --limit 50` |
|
|
39
|
+
| Bulk | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/ --limit 3` |
|
|
40
|
+
|
|
41
|
+
## Search JSON shape (Firecrawl-compatible)
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
jq -r '.data.web[].url' .web/search.json
|
|
45
|
+
jq -r '.data.web[] | "\(.title): \(.url)"' .web/search.json
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Each entry: `url`, `title`, `description`.
|
|
49
|
+
|
|
50
|
+
## Fetch modes
|
|
51
|
+
|
|
52
|
+
| Mode | When |
|
|
53
|
+
|------|------|
|
|
54
|
+
| **stealth** (default scrape) | Arbitrary URLs, JS-heavy sites |
|
|
55
|
+
| **fast** (`--fast` or `HARNESS_WEB_FETCH_MODE=fast`) | Static docs, example.com, localhost |
|
|
56
|
+
| **auto** (`HARNESS_WEB_FETCH_MODE=auto`) | fast for known-static hosts, else stealth |
|
|
57
|
+
|
|
58
|
+
Search always uses lightweight HTTP to `html.duckduckgo.com/html/`; on 403/challenge, **one** stealth retry then fail clearly.
|
|
59
|
+
|
|
60
|
+
## Environment
|
|
61
|
+
|
|
62
|
+
| Variable | Default | Purpose |
|
|
63
|
+
|----------|---------|---------|
|
|
64
|
+
| `HARNESS_WEB_FETCH_MODE` | `stealth` | `stealth` \| `fast` \| `auto` |
|
|
65
|
+
| `HARNESS_WEB_SEARCH_ENGINE` | `ddg_html` | SERP backend |
|
|
66
|
+
| `HARNESS_WEB_PROXY` | (unset) | Proxy URL for fetch/search |
|
|
67
|
+
| `HARNESS_WEB_RATE_LIMIT_MS` | `2000` | Delay between bulk scrapes |
|
|
68
|
+
| `HARNESS_WEB_TIMEOUT_MS` | `30000` | Per-request timeout |
|
|
69
|
+
|
|
70
|
+
## Escalation
|
|
71
|
+
|
|
72
|
+
1. `harness-web search` (HTTP SERP)
|
|
73
|
+
2. `harness-web scrape` (stealth default)
|
|
74
|
+
3. `harness-web scrape --fast` when the target is known static
|
|
75
|
+
4. `scrapling extract …` only when harness-web flags are insufficient
|
|
76
|
+
|
|
77
|
+
## Gaps vs old Firecrawl
|
|
78
|
+
|
|
79
|
+
| Firecrawl | Harness path |
|
|
80
|
+
|-----------|----------------|
|
|
81
|
+
| `interact` | No 1:1 — rare flows use gstack browse or Scrapling MCP session |
|
|
82
|
+
| `agent` (structured extract) | Agent reasoning + graphify, or site-specific selectors |
|
|
83
|
+
| `parse` (local PDF) | Dedicated doc tools (pypdf, markitdown) |
|
|
84
|
+
| `crawl` (site-wide) | `map` + `bulk-scrape` or future Spiders integration |
|
|
85
|
+
|
|
86
|
+
## Ethics
|
|
87
|
+
|
|
88
|
+
Respect site terms and rate limits. SERP scraping is for dev research, not high-volume harvesting.
|
|
89
|
+
See [Scrapling ethical considerations](https://scrapling.readthedocs.io/en/latest/cli/extract-commands.html#legal-and-ethical-considerations).
|
|
90
|
+
|
|
91
|
+
## Drawbacks of default stealth scrape
|
|
92
|
+
|
|
93
|
+
Higher latency and RAM (Chromium per session). Use `--fast` for static docs; reuse one `bulk-scrape` run (single `StealthySession`) instead of many cold starts.
|
package/.pi/PACKAGING.md
CHANGED
|
@@ -13,14 +13,14 @@ Aligned with [pi packages](https://github.com/badlogic/pi-mono/blob/main/package
|
|
|
13
13
|
Pi does **not** define `scripts`, `agents`, or `providers` in the manifest.
|
|
14
14
|
|
|
15
15
|
- **Harness scripts** → `.pi/scripts/` — run via `node` / `bash` and `$UP_PKG` (see `.pi/scripts/README.md`); do not require npm script aliases in consumer `package.json`
|
|
16
|
-
- **Subagent agents** → `.pi/agents/**/*.md`
|
|
16
|
+
- **Subagent agents** → `.pi/agents/**/*.md` on the installed package (`harness/planner`, `pi-pi/agent-expert`, …) via `harness-subagents.ts`; optional **project overrides** at the same relative path under `.pi/agents/`. Version drift: `.pi/harness/agents.manifest.json` (regenerate with `harness-agents-manifest.mjs --write`)
|
|
17
17
|
- **Providers** → install via `bundledDependencies` + user settings, not a separate manifest directory
|
|
18
18
|
|
|
19
19
|
## npm `files` allowlist
|
|
20
20
|
|
|
21
21
|
We use an explicit allowlist (not the whole `.pi/` tree) so dev-only artifacts never ship:
|
|
22
22
|
|
|
23
|
-
- No `.pi/harness/runs/`, local `model-router.json`, or
|
|
23
|
+
- No `.pi/harness/runs/`, local `model-router.json`, or `.web/` scrape artifacts
|
|
24
24
|
- Ship `.pi/settings.example.json`, not `.pi/settings.json` (dev checkout uses `".."` local package)
|
|
25
25
|
- Include **`vendor/pi-model-router/`** ([`pi-model-router`](https://github.com/yeliu84/pi-model-router), MIT) — see repo [`THIRD_PARTY_NOTICES.md`](../THIRD_PARTY_NOTICES.md); refresh with `npm run vendor:sync-router`
|
|
26
26
|
|
package/.pi/SYSTEM.md
CHANGED
|
@@ -23,33 +23,31 @@ You are an enterprise coding agent. Optimize for correctness, minimal diffs, and
|
|
|
23
23
|
## Web Policy (Mandatory)
|
|
24
24
|
|
|
25
25
|
> [!warning] No raw HTTP
|
|
26
|
-
> Route **all** web fetches through [[context7]] (API/library docs) or [[
|
|
26
|
+
> Route **all** web fetches through [[context7]] (API/library docs) or **harness-web** / [[scrapling-web]] (all other). No `curl`, `wget`, or raw bash HTTP.
|
|
27
27
|
|
|
28
28
|
### API / Library Docs — context7 ONLY
|
|
29
29
|
- `ctx7 library <name> <query>` then `ctx7 docs <id> <query>`
|
|
30
30
|
- context7 owns: function signatures, class APIs, config options, stdlib, framework specs.
|
|
31
31
|
- **Never** use quality-sites for API docs.
|
|
32
32
|
|
|
33
|
-
### All Non-API Web Fetch —
|
|
34
|
-
See `.agents/skills/
|
|
33
|
+
### All Non-API Web Fetch — harness-web (Scrapling)
|
|
34
|
+
See `.agents/skills/scrapling-web/SKILL.md` for workflow escalation.
|
|
35
35
|
|
|
36
36
|
| Task | Command |
|
|
37
37
|
|------|---------|
|
|
38
|
-
| Search (no URL) | `
|
|
39
|
-
| Scrape (have URL) | `
|
|
40
|
-
|
|
|
41
|
-
|
|
|
42
|
-
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
- **Search:** firecrawl search only (no DuckDuckGo).
|
|
47
|
-
- **Post-clean (optional):** `firecrawl parse <file> -o .firecrawl/parsed.md` if output has boilerplate.
|
|
38
|
+
| Search (no URL) | `python3 "$UP_PKG/.pi/scripts/harness-web.py" search "query" -o .web/search.json --limit 5` |
|
|
39
|
+
| Scrape (have URL) | `python3 "$UP_PKG/.pi/scripts/harness-web.py" scrape "<url>" -o .web/page.md` |
|
|
40
|
+
| Static / known-simple | add `--fast` to scrape |
|
|
41
|
+
| Map same-host links | `python3 "$UP_PKG/.pi/scripts/harness-web.py" map "<url>" -o .web/map.json` |
|
|
42
|
+
| Bulk search + scrape | `python3 "$UP_PKG/.pi/scripts/harness-web.py" bulk-scrape "query" -o .web/bulk/` |
|
|
43
|
+
|
|
44
|
+
- **Artifacts:** always write under `.web/` with `-o` (token discipline).
|
|
45
|
+
- **Default scrape:** stealth browser; opt out with `--fast` or `HARNESS_WEB_FETCH_MODE=fast`.
|
|
48
46
|
- **Quality sites:** check `.agents/skills/wiki-autoresearch/references/quality-sites.md` before citing non-API sources. Prefer Tier 1 (StackOverflow, GitHub issues, engineering blogs, arxiv). Exclude AI content farms, mirrors, stale packages.
|
|
49
47
|
- **Research:** use `/wiki-autoresearch <topic>` for deep research. Results are graphified into `graphify-out/`.
|
|
50
48
|
|
|
51
49
|
### Missing CLI fallbacks
|
|
52
|
-
-
|
|
50
|
+
- harness-web / Scrapling missing: `uv tool install "scrapling[fetchers]" && scrapling install` then re-run `bash "$UP_PKG/.pi/scripts/harness-cli-verify.sh"`
|
|
53
51
|
- Context7 missing: `npm install -g ctx7@latest`
|
|
54
52
|
|
|
55
53
|
---
|
|
@@ -133,7 +131,7 @@ for conceptual code search before falling back to `ck`:
|
|
|
133
131
|
## Prompt-Engineering Execution Rules
|
|
134
132
|
1. Restate objective + constraints before major changes.
|
|
135
133
|
2. Make an explicit plan for multi-step tasks.
|
|
136
|
-
3.
|
|
134
|
+
3. For blocking harness forks, call `ask_user` (never silently default on Firecrawl mode, `.env` creation, scope, or risk).
|
|
137
135
|
4. Prefer deterministic commands and pinned paths.
|
|
138
136
|
5. Validate outcomes with targeted checks/tests.
|
|
139
137
|
6. Report: changed files, why, verification, risks/next steps.
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Adversarial harness reviewer focused on breaking assumptions and surfacing regressions.
|
|
3
3
|
tools: read, bash, grep, find, ls
|
|
4
|
+
extensions: true
|
|
5
|
+
disallowed_tools: ask_user
|
|
4
6
|
thinking: high
|
|
5
7
|
max_turns: 20
|
|
6
8
|
---
|
|
@@ -25,6 +27,7 @@ Pressure test the candidate with adversarial reasoning and reproducible attacks.
|
|
|
25
27
|
- Only assess risks relevant to the candidate and gate criteria; do not widen scope.
|
|
26
28
|
- Never speculate about defects without evidence and a reproducible path.
|
|
27
29
|
- Severity ordering must be evidence-backed.
|
|
30
|
+
- **Never** call `ask_user`. Emit findings only; parent orchestrator resolves `human_required` via `ask_user`.
|
|
28
31
|
|
|
29
32
|
## Output
|
|
30
33
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Independent harness evaluator producing structured pass/fail verdicts.
|
|
3
3
|
tools: read, bash, grep, find, ls
|
|
4
|
+
extensions: true
|
|
5
|
+
disallowed_tools: ask_user
|
|
4
6
|
thinking: high
|
|
5
7
|
max_turns: 20
|
|
6
8
|
---
|
|
@@ -25,6 +27,7 @@ Independently validate execution outcomes and emit structured verdicts.
|
|
|
25
27
|
- Only evaluate the candidate and gates requested; do not propose unrelated refactors.
|
|
26
28
|
- Never speculate about checks you did not run or artifacts you did not read.
|
|
27
29
|
- Prefer reproducible findings over subjective opinions.
|
|
30
|
+
- **Never** call `ask_user` — review isolation. Set `human_required` in `EvalVerdict`; the parent orchestrator calls `ask_user`.
|
|
28
31
|
|
|
29
32
|
## Output
|
|
30
33
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness executor that implements only within approved PlanPacket scope.
|
|
3
3
|
tools: read, write, edit, bash, grep, find, ls
|
|
4
|
+
extensions: true
|
|
4
5
|
thinking: medium
|
|
5
6
|
max_turns: 30
|
|
6
7
|
---
|
|
@@ -17,7 +18,9 @@ Implement the approved plan with surgical diffs and strict scope control.
|
|
|
17
18
|
2. Implement only the approved scope with minimal, reversible diffs.
|
|
18
19
|
3. Run focused validations that map to plan acceptance checks.
|
|
19
20
|
4. Prepare rollback artifacts in all required forms.
|
|
20
|
-
5.
|
|
21
|
+
5. For **implementation forks** inside approved scope (library choice, flag, rollback tactic), call `ask_user` with 2–4 options — do not guess.
|
|
22
|
+
6. For **plan-level ambiguity** (wrong scope, missing acceptance), stop and recommend `/harness-plan` — do not widen scope.
|
|
23
|
+
7. Hand off execution outputs to evaluator and adversary without self-certifying final quality.
|
|
21
24
|
|
|
22
25
|
## Guardrails
|
|
23
26
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness meta optimizer proposing policy/prompt/router improvements from trace evidence.
|
|
3
3
|
tools: read, bash, grep, find, ls
|
|
4
|
+
extensions: true
|
|
4
5
|
thinking: high
|
|
5
6
|
max_turns: 25
|
|
6
7
|
---
|
|
@@ -16,7 +17,7 @@ Generate conservative, evidence-backed optimization proposals for harness qualit
|
|
|
16
17
|
1. Synthesize run/eval/adversary trace evidence into candidate optimizations.
|
|
17
18
|
2. Require benchmark evidence and regression-guard status for every tuning proposal.
|
|
18
19
|
3. Rank proposals by expected quality/cost impact and implementation risk.
|
|
19
|
-
4. Route router edits through proposal artifacts and explicit human approval only.
|
|
20
|
+
4. Route router edits through proposal artifacts and explicit human approval only — use `ask_user` to approve / reject / defer ranked proposals before any apply.
|
|
20
21
|
5. Prefer reversible, minimal changes with explicit risk notes.
|
|
21
22
|
|
|
22
23
|
## Guardrails
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Harness planner that compiles strict PlanPacket contracts before execution.
|
|
3
3
|
tools: read, bash, grep, find, ls
|
|
4
|
+
extensions: true
|
|
4
5
|
thinking: medium
|
|
5
6
|
max_turns: 20
|
|
6
7
|
---
|
|
@@ -14,7 +15,7 @@ Compile a strict, machine-readable `PlanPacket` before any implementation happen
|
|
|
14
15
|
## Process
|
|
15
16
|
|
|
16
17
|
1. Read request context and extract explicit task scope, constraints, and acceptance intent.
|
|
17
|
-
2. If scope is ambiguous or contradictory,
|
|
18
|
+
2. If scope is ambiguous or contradictory, **call `ask_user`** with 2–4 clear options (see harness-decisions skill). Do not emit an executable `PlanPacket` until answered or the user cancels.
|
|
18
19
|
3. Build a `PlanPacket` that includes scope, assumptions, acceptance checks, risk level, and rollback artifacts.
|
|
19
20
|
4. Validate that the output matches `.pi/harness/specs/plan-packet.schema.json`.
|
|
20
21
|
5. Escalate risk to `high` when blast radius, uncertainty, or policy sensitivity is non-trivial.
|
|
@@ -26,6 +27,26 @@ Compile a strict, machine-readable `PlanPacket` before any implementation happen
|
|
|
26
27
|
- Never speculate about repository state you have not read.
|
|
27
28
|
- Do not mutate files.
|
|
28
29
|
- Do not hand off an executable path if plan ambiguity remains unresolved.
|
|
30
|
+
- Use `ask_user` for blocking forks; never guess risk level or scope boundaries.
|
|
31
|
+
|
|
32
|
+
## ask_user example
|
|
33
|
+
|
|
34
|
+
When risk or scope is unclear:
|
|
35
|
+
|
|
36
|
+
```json
|
|
37
|
+
{
|
|
38
|
+
"question": "What risk level fits this change?",
|
|
39
|
+
"context": "High risk triggers extra gates and rollback requirements.",
|
|
40
|
+
"options": [
|
|
41
|
+
{ "title": "low", "description": "Localized change, easy revert" },
|
|
42
|
+
{ "title": "med", "description": "Multiple files or moderate blast radius" },
|
|
43
|
+
{ "title": "high", "description": "Auth, data, infra, or uncertain impact" }
|
|
44
|
+
],
|
|
45
|
+
"allowFreeform": false
|
|
46
|
+
}
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
If `ask_user` returns cancelled, stop with `needs_clarification` and no `PlanPacket`.
|
|
29
50
|
|
|
30
51
|
## Output
|
|
31
52
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Bootstrap Sentrux rules for a harness project — seed architecture manifest, sync merge-safe rules.toml, verify sentrux check.
|
|
3
|
+
tools: read, bash, grep, find, ls
|
|
4
|
+
extensions: true
|
|
5
|
+
thinking: low
|
|
6
|
+
max_turns: 12
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
You are the Harness Sentrux Bootstrap agent.
|
|
10
|
+
|
|
11
|
+
## Mission
|
|
12
|
+
|
|
13
|
+
Configure initial Sentrux architectural rules for the current project without destroying user customizations.
|
|
14
|
+
|
|
15
|
+
## Process
|
|
16
|
+
|
|
17
|
+
1. Resolve `UP_PKG` via `node "$UP_PKG/.pi/scripts/harness-resolve-up-pkg.mjs"` (or `require.resolve('ultimate-pi/package.json')`).
|
|
18
|
+
2. Read **harness-sentrux-setup** skill (package `.agents/skills/harness-sentrux-setup/SKILL.md`).
|
|
19
|
+
3. From **project root** (cwd), run:
|
|
20
|
+
```bash
|
|
21
|
+
node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs"
|
|
22
|
+
```
|
|
23
|
+
4. If `sentrux` is on PATH, run `sentrux check .` and summarize pass/fail.
|
|
24
|
+
5. Report paths: manifest, `rules.toml`, and whether bootstrap seeded or skipped (up to date).
|
|
25
|
+
|
|
26
|
+
## When to use `--force`
|
|
27
|
+
|
|
28
|
+
- User edited `.pi/harness/sentrux/architecture.manifest.json`
|
|
29
|
+
- `sentrux-rules-sync --check` or harness-verify reports drift
|
|
30
|
+
|
|
31
|
+
Then:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
node "$UP_PKG/.pi/scripts/harness-sentrux-bootstrap.mjs" --force
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Guardrails
|
|
38
|
+
|
|
39
|
+
- Never delete custom TOML outside `harness:managed` markers.
|
|
40
|
+
- Do not overwrite an existing `architecture.manifest.json` — only seed when missing.
|
|
41
|
+
- Do not run `graphify codex install` or unrelated harness-setup steps unless asked.
|
|
42
|
+
- Prefer bundled scripts over hand-editing `rules.toml`.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
description: Final arbiter for unresolved evaluator vs adversary debates within budget limits.
|
|
3
3
|
tools: read, bash, grep, find, ls
|
|
4
|
+
extensions: true
|
|
4
5
|
thinking: high
|
|
5
6
|
max_turns: 15
|
|
6
7
|
---
|
|
@@ -21,6 +22,7 @@ Resolve unresolved debate outcomes when evaluator and adversary cannot converge
|
|
|
21
22
|
- agreement=0.40
|
|
22
23
|
4. Respect aggressive debate caps and budget exhaustion rules.
|
|
23
24
|
5. Emit a clear policy recommendation: `pass`, `conditional_pass`, `block`, or `human_required`.
|
|
25
|
+
6. When recommendation is `human_required`, call `ask_user` with structured options (`pass`, `conditional_pass`, `block`, `defer`) instead of free-text-only escalation.
|
|
24
26
|
|
|
25
27
|
## Guardrails
|
|
26
28
|
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* harness-ask-user — structured user decisions for harness planning and setup.
|
|
3
|
+
* Design references: pi-ask-user, @pi-unipi/ask-user, rpiv-ask-user-question (not vendored).
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
7
|
+
import { runAskDialog } from "./lib/ask-user/dialog.js";
|
|
8
|
+
import { runAskFallback } from "./lib/ask-user/fallback.js";
|
|
9
|
+
import { renderAskCall, renderAskResult } from "./lib/ask-user/render.js";
|
|
10
|
+
import {
|
|
11
|
+
AskUserParamsSchema,
|
|
12
|
+
PROMPT_GUIDELINES,
|
|
13
|
+
PROMPT_SNIPPET,
|
|
14
|
+
} from "./lib/ask-user/schema.js";
|
|
15
|
+
import type { AskUserParams, DialogResult } from "./lib/ask-user/types.js";
|
|
16
|
+
import {
|
|
17
|
+
formatResultText,
|
|
18
|
+
toToolDetails,
|
|
19
|
+
validateAskParams,
|
|
20
|
+
} from "./lib/ask-user/validate.js";
|
|
21
|
+
|
|
22
|
+
export default function harnessAskUser(pi: ExtensionAPI) {
|
|
23
|
+
pi.registerTool({
|
|
24
|
+
name: "ask_user",
|
|
25
|
+
label: "Ask User",
|
|
26
|
+
description:
|
|
27
|
+
"Ask the user a structured question with options. Use for ambiguous or high-impact harness decisions instead of guessing.",
|
|
28
|
+
promptSnippet: PROMPT_SNIPPET,
|
|
29
|
+
promptGuidelines: PROMPT_GUIDELINES,
|
|
30
|
+
parameters: AskUserParamsSchema,
|
|
31
|
+
|
|
32
|
+
async execute(_toolCallId, params, _signal, _onUpdate, ctx) {
|
|
33
|
+
const validated = validateAskParams(params as AskUserParams);
|
|
34
|
+
if (typeof validated === "string") {
|
|
35
|
+
return {
|
|
36
|
+
content: [{ type: "text", text: validated }],
|
|
37
|
+
details: {
|
|
38
|
+
question: params.question ?? "",
|
|
39
|
+
options: [],
|
|
40
|
+
response: null,
|
|
41
|
+
cancelled: true,
|
|
42
|
+
},
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
let outcome: DialogResult;
|
|
47
|
+
if (ctx.hasUI) {
|
|
48
|
+
outcome = await runAskDialog(ctx.ui, validated);
|
|
49
|
+
} else {
|
|
50
|
+
outcome = await runAskFallback(ctx.ui, validated);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const details = toToolDetails(
|
|
54
|
+
validated,
|
|
55
|
+
outcome.response,
|
|
56
|
+
outcome.cancelled,
|
|
57
|
+
);
|
|
58
|
+
const text = formatResultText(outcome.response, outcome.cancelled);
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
content: [{ type: "text", text }],
|
|
62
|
+
details,
|
|
63
|
+
};
|
|
64
|
+
},
|
|
65
|
+
|
|
66
|
+
renderCall(args, theme) {
|
|
67
|
+
return renderAskCall(args, theme);
|
|
68
|
+
},
|
|
69
|
+
|
|
70
|
+
renderResult(result, options, theme) {
|
|
71
|
+
return renderAskResult(result, options, theme);
|
|
72
|
+
},
|
|
73
|
+
});
|
|
74
|
+
}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* harness-subagents — package-resolved agents, blackboard, observation-bus handoffs.
|
|
3
|
+
*/
|
|
4
|
+
import { getHarnessPackageRoot } from "./lib/harness-paths.js";
|
|
5
|
+
import { createHarnessSubagentsExtension } from "./lib/harness-subagents/vendored/index.js";
|
|
6
|
+
|
|
7
|
+
export default createHarnessSubagentsExtension(
|
|
8
|
+
getHarnessPackageRoot(import.meta.url),
|
|
9
|
+
);
|