ultimate-pi 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.pi/extensions/lib/harness-paths.ts +8 -0
- package/.pi/extensions/sentrux-rules-sync.ts +2 -8
- package/.pi/harness/browser.json +5 -1
- package/.pi/harness/debates/README.md +9 -0
- package/.pi/harness/docs/adrs/0006-sentrux-dual-layer.md +1 -1
- package/.pi/harness/docs/adrs/0009-sentrux-rules-lifecycle.md +2 -2
- package/.pi/harness/incidents/README.md +6 -0
- package/.pi/harness/release-readiness-report.md +128 -0
- package/.pi/harness/router/proposals/canary-proposal.json +96 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/events.jsonl +2 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/trace.json +17 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/events.jsonl +2 -0
- package/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/trace.json +17 -0
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/events.jsonl +6 -0
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/trace.json +42 -0
- package/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774136101/events.jsonl +1 -0
- package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/events.jsonl +2 -0
- package/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/trace.json +17 -0
- package/.pi/harness/runs/README.md +6 -0
- package/.pi/harness/runs/budget-events.jsonl +4 -0
- package/.pi/harness/runs/canary-candidate-router.json +72 -0
- package/.pi/harness/runs/canary-evidence.json +9 -0
- package/.pi/harness/runs/index.jsonl +4 -0
- package/.pi/harness/sentrux/architecture.manifest.json +3 -3
- package/.pi/model-router.json +95 -0
- package/.pi/prompts/harness-setup.md +13 -14
- package/.pi/prompts/release.md +225 -0
- package/.pi/scripts/README.md +17 -0
- package/{scripts → .pi/scripts}/harness-verify.mjs +3 -3
- package/{scripts → .pi/scripts}/sentrux-rules-sync.mjs +2 -2
- package/.sentrux/.harness-rules-meta.json +2 -2
- package/.sentrux/rules.toml +3 -3
- package/CHANGELOG.md +8 -0
- package/firecrawl/.env +53 -0
- package/package.json +15 -5
- package/.ckignore +0 -41
- package/.codex/hooks.json +0 -15
- package/.env.example +0 -21
- package/.gitattributes +0 -1
- package/.github/banner-v2.png +0 -0
- package/.github/workflows/lint.yml +0 -33
- package/.github/workflows/publish-github-packages.yml +0 -35
- package/.github/workflows/publish-npm.yml +0 -32
- package/CONTRIBUTING.md +0 -166
- package/lefthook.yml +0 -9
- package/scripts/__pycache__/merge_graphify_corpora.cpython-314.pyc +0 -0
- package/scripts/index_youtube_urls.py +0 -376
- package/scripts/merge_graphify_corpora.py +0 -398
- package/scripts/regen_graphify_html.py +0 -46
- package/test/harness-verify.test.mjs +0 -33
- /package/{scripts → .pi/scripts}/harness-cli-verify.sh +0 -0
- /package/{scripts → .pi/scripts}/harness-graphify-bootstrap.sh +0 -0
|
@@ -45,3 +45,11 @@ export function resolveHarnessAsset(
|
|
|
45
45
|
): string {
|
|
46
46
|
return join(getHarnessPackageRoot(moduleUrl), ...segments);
|
|
47
47
|
}
|
|
48
|
+
|
|
49
|
+
/** Harness CLI scripts shipped under `.pi/scripts/` in the npm package. */
|
|
50
|
+
export function resolveHarnessScript(
|
|
51
|
+
moduleUrl: string,
|
|
52
|
+
scriptName: string,
|
|
53
|
+
): string {
|
|
54
|
+
return resolveHarnessAsset(moduleUrl, ".pi", "scripts", scriptName);
|
|
55
|
+
}
|
|
@@ -4,21 +4,15 @@
|
|
|
4
4
|
|
|
5
5
|
import { spawn } from "node:child_process";
|
|
6
6
|
import { existsSync } from "node:fs";
|
|
7
|
-
import { join } from "node:path";
|
|
8
7
|
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
9
|
-
import {
|
|
8
|
+
import { resolveHarnessScript } from "./lib/harness-paths.js";
|
|
10
9
|
|
|
11
10
|
function resolveSyncScript(): string {
|
|
12
|
-
|
|
11
|
+
return resolveHarnessScript(
|
|
13
12
|
// @ts-expect-error pi extensions run as ESM
|
|
14
13
|
import.meta.url,
|
|
15
|
-
"scripts",
|
|
16
14
|
"sentrux-rules-sync.mjs",
|
|
17
15
|
);
|
|
18
|
-
if (existsSync(packaged)) {
|
|
19
|
-
return packaged;
|
|
20
|
-
}
|
|
21
|
-
return join(process.cwd(), "scripts", "sentrux-rules-sync.mjs");
|
|
22
16
|
}
|
|
23
17
|
|
|
24
18
|
function runSync(args: string[]): Promise<{ code: number; output: string }> {
|
package/.pi/harness/browser.json
CHANGED
|
@@ -11,7 +11,7 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
|
|
|
11
11
|
|
|
12
12
|
1. **Canonical source:** [`.pi/harness/sentrux/architecture.manifest.json`](../../sentrux/architecture.manifest.json) — layers, boundaries, global constraints.
|
|
13
13
|
2. **Generated artifact:** `.sentrux/rules.toml` — committed to git; managed block between `harness:managed:start/end` markers.
|
|
14
|
-
3. **Sync command:** `npm run harness:sentrux-sync` (
|
|
14
|
+
3. **Sync command:** `npm run harness:sentrux-sync` (`.pi/scripts/sentrux-rules-sync.mjs`).
|
|
15
15
|
4. **Pi command:** `/harness-sentrux-sync` via `sentrux-rules-sync.ts` extension.
|
|
16
16
|
5. **When to sync:**
|
|
17
17
|
- `/harness-setup` Step 2.8 (after sentrux install)
|
|
@@ -34,5 +34,5 @@ Sentrux enforces architecture via [`.sentrux/rules.toml`](https://sentrux.dev/do
|
|
|
34
34
|
## References
|
|
35
35
|
|
|
36
36
|
- ADR 0006 (Sentrux dual layer)
|
|
37
|
-
-
|
|
37
|
+
- `.pi/scripts/sentrux-rules-sync.mjs`
|
|
38
38
|
- `.pi/extensions/sentrux-rules-sync.ts`
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Release Readiness Report
|
|
2
|
+
|
|
3
|
+
Date: 2026-05-14
|
|
4
|
+
Repo root used: `/home/aryaniyaps/ai-projects/ultimate-pi` (active workspace root, treated as canonical)
|
|
5
|
+
|
|
6
|
+
## Requested remaining work
|
|
7
|
+
|
|
8
|
+
- `run-adversarial-canary-and-release`
|
|
9
|
+
- `final-prompt-expert-feature-sweep`
|
|
10
|
+
|
|
11
|
+
Plan file was not modified.
|
|
12
|
+
|
|
13
|
+
## Final integration checks
|
|
14
|
+
|
|
15
|
+
### 1) TypeScript compile check
|
|
16
|
+
|
|
17
|
+
- Command: `npm run check:ts`
|
|
18
|
+
- Result: PASS
|
|
19
|
+
|
|
20
|
+
### 2) Full lint/format/test gate
|
|
21
|
+
|
|
22
|
+
- Command: `npm run check:ts && npm run lint && npm run format:check && npm test`
|
|
23
|
+
- Result: FAIL (expected in current tree state)
|
|
24
|
+
- Notes:
|
|
25
|
+
- `biome check` reports existing lint/format issues (including `.pi/extensions/custom-footer.ts` and multiple `.pi/harness/specs/*.json` files).
|
|
26
|
+
- `npm test` fails before test execution due Node runtime flag incompatibility:
|
|
27
|
+
- `node: bad option: --experimental-strip-types`
|
|
28
|
+
|
|
29
|
+
### 3) Release preflight checks
|
|
30
|
+
|
|
31
|
+
- Command: `git rev-parse --is-inside-work-tree && git remote -v && git symbolic-ref -q HEAD && (git diff --quiet && git diff --cached --quiet && echo CLEAN || echo DIRTY)`
|
|
32
|
+
- Result:
|
|
33
|
+
- inside git repo: yes
|
|
34
|
+
- branch: `refs/heads/main`
|
|
35
|
+
- remote `origin`: configured
|
|
36
|
+
- tree cleanliness: `DIRTY` (release/tag push should stay blocked until clean)
|
|
37
|
+
|
|
38
|
+
## Targeted canary validations
|
|
39
|
+
|
|
40
|
+
### 1) Prompt and policy canary assertions
|
|
41
|
+
|
|
42
|
+
- Static canary suite executed against:
|
|
43
|
+
- harness prompt templates
|
|
44
|
+
- `policy-gate`
|
|
45
|
+
- `test-diff-integrity`
|
|
46
|
+
- `debate-orchestrator`
|
|
47
|
+
- Result: PASS after prompt sweep updates
|
|
48
|
+
- locked clauses in `harness-auto` preserved
|
|
49
|
+
- prompt argument parsing + usage surfaces present across harness prompts
|
|
50
|
+
- completion behavior sections present for operator-facing harness prompts
|
|
51
|
+
- policy/test/debate lock signals present in extension code
|
|
52
|
+
|
|
53
|
+
### 2) Router tuning canary (proposal-only)
|
|
54
|
+
|
|
55
|
+
- Created synthetic canary evidence:
|
|
56
|
+
- `.pi/harness/runs/canary-evidence.json`
|
|
57
|
+
- Candidate router for dry proposal:
|
|
58
|
+
- `.pi/harness/runs/canary-candidate-router.json`
|
|
59
|
+
- Command:
|
|
60
|
+
- `node .pi/harness/router/propose-router-tuning.mjs --evidence ... --candidate ... --proposal-out .pi/harness/router/proposals/canary-proposal.json`
|
|
61
|
+
- Result: PASS (proposal created, no live router write)
|
|
62
|
+
|
|
63
|
+
### 3) Harness schema parse check
|
|
64
|
+
|
|
65
|
+
- Command: Node JSON parse validation across `.pi/harness/specs/*.json`
|
|
66
|
+
- Result: PASS (all 9 schema files parse successfully)
|
|
67
|
+
|
|
68
|
+
## Lightweight adversarial drills
|
|
69
|
+
|
|
70
|
+
### 1) Negative apply drill (guardrail validation)
|
|
71
|
+
|
|
72
|
+
- Command:
|
|
73
|
+
- `node .pi/harness/router/apply-router-proposal.mjs --proposal ... --approve-by ... --justification ...`
|
|
74
|
+
- intentionally omitted `--write`
|
|
75
|
+
- Result: PASS (guard correctly blocked apply)
|
|
76
|
+
- Expected error:
|
|
77
|
+
- `missing --write (blind writes and implicit applies are disallowed)`
|
|
78
|
+
|
|
79
|
+
### 2) Adversarial lock retention
|
|
80
|
+
|
|
81
|
+
- Verified locked governance semantics remain stated in `harness-auto`:
|
|
82
|
+
- adversarial review always required
|
|
83
|
+
- severity-policy-engine remains merge-block authority
|
|
84
|
+
- strict pre-PR gates mandatory
|
|
85
|
+
- never auto-merge
|
|
86
|
+
|
|
87
|
+
## Prompt expert feature sweep
|
|
88
|
+
|
|
89
|
+
Using guidance from `.pi/agents/pi-pi/prompt-expert.md`, harness prompt templates were refined for:
|
|
90
|
+
|
|
91
|
+
1. Argument handling:
|
|
92
|
+
- explicit `$ARGUMENTS` parse sections
|
|
93
|
+
- required/optional argument normalization
|
|
94
|
+
- deterministic usage fallback lines
|
|
95
|
+
2. Completion behavior:
|
|
96
|
+
- explicit terminal output contracts for predictable downstream handoff
|
|
97
|
+
3. UX consistency:
|
|
98
|
+
- harmonized command usage patterns and closure blocks across harness prompts
|
|
99
|
+
4. Policy integrity:
|
|
100
|
+
- locked policy constraints intentionally kept intact
|
|
101
|
+
|
|
102
|
+
## Files updated in this sweep
|
|
103
|
+
|
|
104
|
+
- `.pi/prompts/harness-auto.md`
|
|
105
|
+
- `.pi/prompts/harness-plan.md`
|
|
106
|
+
- `.pi/prompts/harness-run.md`
|
|
107
|
+
- `.pi/prompts/harness-review.md`
|
|
108
|
+
- `.pi/prompts/harness-critic.md`
|
|
109
|
+
- `.pi/prompts/harness-eval.md`
|
|
110
|
+
- `.pi/prompts/harness-trace.md`
|
|
111
|
+
- `.pi/prompts/harness-incident.md`
|
|
112
|
+
- `.pi/prompts/harness-router-tune.md`
|
|
113
|
+
- `.pi/prompts/harness-setup.md`
|
|
114
|
+
- `.pi/harness/release-readiness-report.md` (this report)
|
|
115
|
+
|
|
116
|
+
## New canary artifacts
|
|
117
|
+
|
|
118
|
+
- `.pi/harness/runs/canary-evidence.json`
|
|
119
|
+
- `.pi/harness/runs/canary-candidate-router.json`
|
|
120
|
+
- `.pi/harness/router/proposals/canary-proposal.json`
|
|
121
|
+
|
|
122
|
+
## Residual risks
|
|
123
|
+
|
|
124
|
+
1. Full repo lint/format gate currently fails due pre-existing issues unrelated to this sweep.
|
|
125
|
+
2. `npm test` is currently not runnable in this environment because the configured Node flag is unsupported.
|
|
126
|
+
3. Release flow should remain blocked until working tree is clean and CI-equivalent checks pass.
|
|
127
|
+
4. Router apply path was intentionally not executed with `--write` during this run (safety-preserving drill).
|
|
128
|
+
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"proposal_id": "router-tune-2026-05-14T15-44-44-399Z",
|
|
4
|
+
"created_at": "2026-05-14T15:44:44.399Z",
|
|
5
|
+
"router_path": ".pi/model-router.json",
|
|
6
|
+
"base_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
|
|
7
|
+
"candidate_router_sha256": "2a96fba517cc5b5147f37428d7ed62961b1968c0e83c0e69f02524265449856b",
|
|
8
|
+
"evidence": {
|
|
9
|
+
"sample_count": 24,
|
|
10
|
+
"min_sample_count": 12,
|
|
11
|
+
"success_rate_delta": 0.08,
|
|
12
|
+
"cost_per_task_delta": -0.04,
|
|
13
|
+
"regression_guard_passed": true,
|
|
14
|
+
"trace_refs": ["run-canary-001", "run-canary-002"],
|
|
15
|
+
"notes": "canary validation synthetic evidence"
|
|
16
|
+
},
|
|
17
|
+
"status": "proposed",
|
|
18
|
+
"approval": {
|
|
19
|
+
"required": true,
|
|
20
|
+
"approved_by": null,
|
|
21
|
+
"approved_at": null,
|
|
22
|
+
"justification": null
|
|
23
|
+
},
|
|
24
|
+
"candidate_router": {
|
|
25
|
+
"defaultProfile": "auto",
|
|
26
|
+
"debug": false,
|
|
27
|
+
"classifierModel": "opencode-go/qwen3.6-plus",
|
|
28
|
+
"phaseBias": 0.5,
|
|
29
|
+
"maxSessionBudget": 1,
|
|
30
|
+
"largeContextThreshold": 100000,
|
|
31
|
+
"rules": [
|
|
32
|
+
{
|
|
33
|
+
"matches": ["deploy", "production", "release"],
|
|
34
|
+
"tier": "high",
|
|
35
|
+
"reason": "Safety check for production tasks"
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"matches": "changelog",
|
|
39
|
+
"tier": "low"
|
|
40
|
+
}
|
|
41
|
+
],
|
|
42
|
+
"profiles": {
|
|
43
|
+
"auto": {
|
|
44
|
+
"high": {
|
|
45
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
46
|
+
"thinking": "high",
|
|
47
|
+
"fallbacks": ["opencode-go/qwen3.6-plus", "opencode-go/kimi-k2.6"]
|
|
48
|
+
},
|
|
49
|
+
"medium": {
|
|
50
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
51
|
+
"thinking": "medium",
|
|
52
|
+
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
53
|
+
},
|
|
54
|
+
"low": {
|
|
55
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
56
|
+
"thinking": "low",
|
|
57
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
58
|
+
}
|
|
59
|
+
},
|
|
60
|
+
"cheap": {
|
|
61
|
+
"high": {
|
|
62
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
63
|
+
"thinking": "low",
|
|
64
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
65
|
+
},
|
|
66
|
+
"medium": {
|
|
67
|
+
"model": "opencode-go/qwen3.5-plus",
|
|
68
|
+
"thinking": "off",
|
|
69
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
70
|
+
},
|
|
71
|
+
"low": {
|
|
72
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
73
|
+
"thinking": "off",
|
|
74
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"deep": {
|
|
78
|
+
"high": {
|
|
79
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
80
|
+
"thinking": "xhigh",
|
|
81
|
+
"fallbacks": ["opencode-go/kimi-k2.6"]
|
|
82
|
+
},
|
|
83
|
+
"medium": {
|
|
84
|
+
"model": "opencode-go/kimi-k2.6",
|
|
85
|
+
"thinking": "medium",
|
|
86
|
+
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
87
|
+
},
|
|
88
|
+
"low": {
|
|
89
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
90
|
+
"thinking": "low",
|
|
91
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:51:31.965Z","type":"run_start","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773891854","plan_id":"plan-unknown","phase":"plan"}
|
|
2
|
+
{"timestamp":"2026-05-14T15:51:38.346Z","type":"run_end","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773891854","phase":"plan","tool_span_count":0,"artifact_ref_count":0}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"contract_version": "1.0.0",
|
|
4
|
+
"run_id": "019e272f-3eef-7107-9712-ce281de55707-1778773891854",
|
|
5
|
+
"plan_id": "plan-unknown",
|
|
6
|
+
"agent_id": "019e272f-3eef-7107-9712-ce281de55707",
|
|
7
|
+
"phase": "plan",
|
|
8
|
+
"model": "auto",
|
|
9
|
+
"thinking_level": "off",
|
|
10
|
+
"tool_spans": [],
|
|
11
|
+
"artifact_refs": [],
|
|
12
|
+
"cost": {
|
|
13
|
+
"input_tokens": 15381,
|
|
14
|
+
"output_tokens": 33,
|
|
15
|
+
"total_tokens": 15414
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:51:52.062Z","type":"run_start","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773912057","plan_id":"plan-unknown","phase":"plan"}
|
|
2
|
+
{"timestamp":"2026-05-14T15:52:14.313Z","type":"run_end","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773912057","phase":"plan","tool_span_count":0,"artifact_ref_count":0}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"contract_version": "1.0.0",
|
|
4
|
+
"run_id": "019e272f-3eef-7107-9712-ce281de55707-1778773912057",
|
|
5
|
+
"plan_id": "plan-unknown",
|
|
6
|
+
"agent_id": "019e272f-3eef-7107-9712-ce281de55707",
|
|
7
|
+
"phase": "plan",
|
|
8
|
+
"model": "auto",
|
|
9
|
+
"thinking_level": "off",
|
|
10
|
+
"tool_spans": [],
|
|
11
|
+
"artifact_refs": [],
|
|
12
|
+
"cost": {
|
|
13
|
+
"input_tokens": 31337,
|
|
14
|
+
"output_tokens": 528,
|
|
15
|
+
"total_tokens": 31865
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:54:46.136Z","type":"run_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","plan_id":"plan-unknown","phase":"plan"}
|
|
2
|
+
{"timestamp":"2026-05-14T15:54:59.110Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_00_7UHDcydTHJHVR2dT5xpb0903","tool_name":"bash"}
|
|
3
|
+
{"timestamp":"2026-05-14T15:54:59.137Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_01_aNsry1whTl5hRf5Ew91t3142","tool_name":"bash"}
|
|
4
|
+
{"timestamp":"2026-05-14T15:54:59.139Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_02_N2e56Q6vKr6cAYzd4Z9q7953","tool_name":"bash"}
|
|
5
|
+
{"timestamp":"2026-05-14T15:55:11.546Z","type":"tool_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","tool_call_id":"call_00_wG71Rv3SKrf6R9K03EeS0264","tool_name":"ctx_batch_execute"}
|
|
6
|
+
{"timestamp":"2026-05-14T15:55:25.167Z","type":"run_end","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","phase":"plan","tool_span_count":4,"artifact_ref_count":0}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"contract_version": "1.0.0",
|
|
4
|
+
"run_id": "019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096",
|
|
5
|
+
"plan_id": "plan-unknown",
|
|
6
|
+
"agent_id": "019e2732-8651-74e5-9f5d-4d06c3105f25",
|
|
7
|
+
"phase": "plan",
|
|
8
|
+
"model": "auto",
|
|
9
|
+
"thinking_level": "off",
|
|
10
|
+
"tool_spans": [
|
|
11
|
+
{
|
|
12
|
+
"tool_call_id": "call_00_7UHDcydTHJHVR2dT5xpb0903",
|
|
13
|
+
"tool_name": "bash",
|
|
14
|
+
"started_at": "2026-05-14T15:54:59.108Z",
|
|
15
|
+
"ended_at": "2026-05-14T15:54:59.108Z"
|
|
16
|
+
},
|
|
17
|
+
{
|
|
18
|
+
"tool_call_id": "call_01_aNsry1whTl5hRf5Ew91t3142",
|
|
19
|
+
"tool_name": "bash",
|
|
20
|
+
"started_at": "2026-05-14T15:54:59.136Z",
|
|
21
|
+
"ended_at": "2026-05-14T15:54:59.136Z"
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"tool_call_id": "call_02_N2e56Q6vKr6cAYzd4Z9q7953",
|
|
25
|
+
"tool_name": "bash",
|
|
26
|
+
"started_at": "2026-05-14T15:54:59.139Z",
|
|
27
|
+
"ended_at": "2026-05-14T15:54:59.139Z"
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"tool_call_id": "call_00_wG71Rv3SKrf6R9K03EeS0264",
|
|
31
|
+
"tool_name": "ctx_batch_execute",
|
|
32
|
+
"started_at": "2026-05-14T15:55:11.541Z",
|
|
33
|
+
"ended_at": "2026-05-14T15:55:11.541Z"
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"artifact_refs": [],
|
|
37
|
+
"cost": {
|
|
38
|
+
"input_tokens": 16951,
|
|
39
|
+
"output_tokens": 1020,
|
|
40
|
+
"total_tokens": 17971
|
|
41
|
+
}
|
|
42
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:55:36.107Z","type":"run_start","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774136101","plan_id":"plan-unknown","phase":"plan"}
|
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T16:36:40.660Z","type":"run_start","run_id":"019e2758-b332-771b-ad6f-54d0d8478768-1778776600591","plan_id":"plan-unknown","phase":"plan"}
|
|
2
|
+
{"timestamp":"2026-05-14T16:36:47.570Z","type":"run_end","run_id":"019e2758-b332-771b-ad6f-54d0d8478768-1778776600591","phase":"plan","tool_span_count":0,"artifact_ref_count":0}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0.0",
|
|
3
|
+
"contract_version": "1.0.0",
|
|
4
|
+
"run_id": "019e2758-b332-771b-ad6f-54d0d8478768-1778776600591",
|
|
5
|
+
"plan_id": "plan-unknown",
|
|
6
|
+
"agent_id": "019e2758-b332-771b-ad6f-54d0d8478768",
|
|
7
|
+
"phase": "plan",
|
|
8
|
+
"model": "auto",
|
|
9
|
+
"thinking_level": "off",
|
|
10
|
+
"tool_spans": [],
|
|
11
|
+
"artifact_refs": [],
|
|
12
|
+
"cost": {
|
|
13
|
+
"input_tokens": 21,
|
|
14
|
+
"output_tokens": 32,
|
|
15
|
+
"total_tokens": 53
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:54:59.134Z","schema_version":"1.0.0","contract_version":"1.0.0","event_type":"budget_exhausted","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25","debate_id":"plan-budget-guard","round_count":1,"budget_used":16593,"exhaustion_reason":"debate_global_cap_exceeded","caps":{"max_rounds":6,"round_token_cap":2500,"debate_global_cap":35000},"minimum_evidence_confidence":0.6,"default_policy_outcome":"block","human_override_allowed":true}
|
|
2
|
+
{"timestamp":"2026-05-14T15:54:59.138Z","schema_version":"1.0.0","contract_version":"1.0.0","event_type":"budget_exhausted","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25","debate_id":"plan-budget-guard","round_count":1,"budget_used":16593,"exhaustion_reason":"debate_global_cap_exceeded","caps":{"max_rounds":6,"round_token_cap":2500,"debate_global_cap":35000},"minimum_evidence_confidence":0.6,"default_policy_outcome":"block","human_override_allowed":true}
|
|
3
|
+
{"timestamp":"2026-05-14T15:54:59.140Z","schema_version":"1.0.0","contract_version":"1.0.0","event_type":"budget_exhausted","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25","debate_id":"plan-budget-guard","round_count":1,"budget_used":16593,"exhaustion_reason":"debate_global_cap_exceeded","caps":{"max_rounds":6,"round_token_cap":2500,"debate_global_cap":35000},"minimum_evidence_confidence":0.6,"default_policy_outcome":"block","human_override_allowed":true}
|
|
4
|
+
{"timestamp":"2026-05-14T15:55:11.581Z","schema_version":"1.0.0","contract_version":"1.0.0","event_type":"budget_exhausted","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25","debate_id":"plan-budget-guard","round_count":1,"budget_used":17161,"exhaustion_reason":"debate_global_cap_exceeded","caps":{"max_rounds":6,"round_token_cap":2500,"debate_global_cap":35000},"minimum_evidence_confidence":0.6,"default_policy_outcome":"block","human_override_allowed":true}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
{
|
|
2
|
+
"defaultProfile": "auto",
|
|
3
|
+
"debug": false,
|
|
4
|
+
"classifierModel": "opencode-go/qwen3.6-plus",
|
|
5
|
+
"phaseBias": 0.5,
|
|
6
|
+
"maxSessionBudget": 1.0,
|
|
7
|
+
"largeContextThreshold": 100000,
|
|
8
|
+
"rules": [
|
|
9
|
+
{
|
|
10
|
+
"matches": ["deploy", "production", "release"],
|
|
11
|
+
"tier": "high",
|
|
12
|
+
"reason": "Safety check for production tasks"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"matches": "changelog",
|
|
16
|
+
"tier": "low"
|
|
17
|
+
}
|
|
18
|
+
],
|
|
19
|
+
"profiles": {
|
|
20
|
+
"auto": {
|
|
21
|
+
"high": {
|
|
22
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
23
|
+
"thinking": "high",
|
|
24
|
+
"fallbacks": ["opencode-go/qwen3.6-plus", "opencode-go/kimi-k2.6"]
|
|
25
|
+
},
|
|
26
|
+
"medium": {
|
|
27
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
28
|
+
"thinking": "medium",
|
|
29
|
+
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
30
|
+
},
|
|
31
|
+
"low": {
|
|
32
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
33
|
+
"thinking": "low",
|
|
34
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
35
|
+
}
|
|
36
|
+
},
|
|
37
|
+
"cheap": {
|
|
38
|
+
"high": {
|
|
39
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
40
|
+
"thinking": "low",
|
|
41
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
42
|
+
},
|
|
43
|
+
"medium": {
|
|
44
|
+
"model": "opencode-go/qwen3.5-plus",
|
|
45
|
+
"thinking": "off",
|
|
46
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
47
|
+
},
|
|
48
|
+
"low": {
|
|
49
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
50
|
+
"thinking": "off",
|
|
51
|
+
"fallbacks": ["opencode-go/qwen3.5-plus"]
|
|
52
|
+
}
|
|
53
|
+
},
|
|
54
|
+
"deep": {
|
|
55
|
+
"high": {
|
|
56
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
57
|
+
"thinking": "xhigh",
|
|
58
|
+
"fallbacks": ["opencode-go/kimi-k2.6"]
|
|
59
|
+
},
|
|
60
|
+
"medium": {
|
|
61
|
+
"model": "opencode-go/kimi-k2.6",
|
|
62
|
+
"thinking": "medium",
|
|
63
|
+
"fallbacks": ["opencode-go/deepseek-v4-pro"]
|
|
64
|
+
},
|
|
65
|
+
"low": {
|
|
66
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
67
|
+
"thinking": "low",
|
|
68
|
+
"fallbacks": ["opencode-go/deepseek-v4-flash"]
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
{"timestamp":"2026-05-14T15:51:38.345Z","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773891854","plan_id":"plan-unknown","phase":"plan","trace_file":"/home/aryaniyaps/ai-projects/ultimate-pi/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773891854/trace.json"}
|
|
2
|
+
{"timestamp":"2026-05-14T15:52:14.312Z","run_id":"019e272f-3eef-7107-9712-ce281de55707-1778773912057","plan_id":"plan-unknown","phase":"plan","trace_file":"/home/aryaniyaps/ai-projects/ultimate-pi/.pi/harness/runs/019e272f-3eef-7107-9712-ce281de55707-1778773912057/trace.json"}
|
|
3
|
+
{"timestamp":"2026-05-14T15:55:25.166Z","run_id":"019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096","plan_id":"plan-unknown","phase":"plan","trace_file":"/home/aryaniyaps/ai-projects/ultimate-pi/.pi/harness/runs/019e2732-8651-74e5-9f5d-4d06c3105f25-1778774086096/trace.json"}
|
|
4
|
+
{"timestamp":"2026-05-14T16:36:47.569Z","run_id":"019e2758-b332-771b-ad6f-54d0d8478768-1778776600591","plan_id":"plan-unknown","phase":"plan","trace_file":"/home/aryaniyaps/ai-projects/ultimate-pi/.pi/harness/runs/019e2758-b332-771b-ad6f-54d0d8478768-1778776600591/trace.json"}
|
|
@@ -34,9 +34,9 @@
|
|
|
34
34
|
},
|
|
35
35
|
{
|
|
36
36
|
"name": "tooling",
|
|
37
|
-
"paths": ["scripts/*", "test/*"],
|
|
37
|
+
"paths": [".pi/scripts/*", "test/*"],
|
|
38
38
|
"order": 4,
|
|
39
|
-
"description": "
|
|
39
|
+
"description": "Harness CLI scripts and tests"
|
|
40
40
|
}
|
|
41
41
|
],
|
|
42
42
|
"boundaries": [
|
|
@@ -61,7 +61,7 @@
|
|
|
61
61
|
"reason": "Contracts are data-only JSON schemas; extensions implement behavior"
|
|
62
62
|
},
|
|
63
63
|
{
|
|
64
|
-
"from": "scripts/*",
|
|
64
|
+
"from": ".pi/scripts/*",
|
|
65
65
|
"to": ".agents/skills/*",
|
|
66
66
|
"reason": "CLI scripts stay independent of skill markdown"
|
|
67
67
|
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
{
|
|
2
|
+
"defaultProfile": "auto",
|
|
3
|
+
"debug": false,
|
|
4
|
+
"classifierModel": "opencode-go/qwen3.6-plus",
|
|
5
|
+
"phaseBias": 0.5,
|
|
6
|
+
"maxSessionBudget": 1.0,
|
|
7
|
+
"largeContextThreshold": 100000,
|
|
8
|
+
"rules": [
|
|
9
|
+
{
|
|
10
|
+
"matches": [
|
|
11
|
+
"deploy",
|
|
12
|
+
"production",
|
|
13
|
+
"release"
|
|
14
|
+
],
|
|
15
|
+
"tier": "high",
|
|
16
|
+
"reason": "Safety check for production tasks"
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"matches": "changelog",
|
|
20
|
+
"tier": "low"
|
|
21
|
+
}
|
|
22
|
+
],
|
|
23
|
+
"profiles": {
|
|
24
|
+
"auto": {
|
|
25
|
+
"high": {
|
|
26
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
27
|
+
"thinking": "high",
|
|
28
|
+
"fallbacks": [
|
|
29
|
+
"opencode-go/qwen3.6-plus",
|
|
30
|
+
"opencode-go/kimi-k2.6"
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
"medium": {
|
|
34
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
35
|
+
"thinking": "medium",
|
|
36
|
+
"fallbacks": [
|
|
37
|
+
"opencode-go/deepseek-v4-pro"
|
|
38
|
+
]
|
|
39
|
+
},
|
|
40
|
+
"low": {
|
|
41
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
42
|
+
"thinking": "low",
|
|
43
|
+
"fallbacks": [
|
|
44
|
+
"opencode-go/qwen3.5-plus"
|
|
45
|
+
]
|
|
46
|
+
}
|
|
47
|
+
},
|
|
48
|
+
"cheap": {
|
|
49
|
+
"high": {
|
|
50
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
51
|
+
"thinking": "low",
|
|
52
|
+
"fallbacks": [
|
|
53
|
+
"opencode-go/qwen3.5-plus"
|
|
54
|
+
]
|
|
55
|
+
},
|
|
56
|
+
"medium": {
|
|
57
|
+
"model": "opencode-go/qwen3.5-plus",
|
|
58
|
+
"thinking": "off",
|
|
59
|
+
"fallbacks": [
|
|
60
|
+
"opencode-go/deepseek-v4-flash"
|
|
61
|
+
]
|
|
62
|
+
},
|
|
63
|
+
"low": {
|
|
64
|
+
"model": "opencode-go/deepseek-v4-flash",
|
|
65
|
+
"thinking": "off",
|
|
66
|
+
"fallbacks": [
|
|
67
|
+
"opencode-go/qwen3.5-plus"
|
|
68
|
+
]
|
|
69
|
+
}
|
|
70
|
+
},
|
|
71
|
+
"deep": {
|
|
72
|
+
"high": {
|
|
73
|
+
"model": "opencode-go/deepseek-v4-pro",
|
|
74
|
+
"thinking": "xhigh",
|
|
75
|
+
"fallbacks": [
|
|
76
|
+
"opencode-go/kimi-k2.6"
|
|
77
|
+
]
|
|
78
|
+
},
|
|
79
|
+
"medium": {
|
|
80
|
+
"model": "opencode-go/kimi-k2.6",
|
|
81
|
+
"thinking": "medium",
|
|
82
|
+
"fallbacks": [
|
|
83
|
+
"opencode-go/deepseek-v4-pro"
|
|
84
|
+
]
|
|
85
|
+
},
|
|
86
|
+
"low": {
|
|
87
|
+
"model": "opencode-go/qwen3.6-plus",
|
|
88
|
+
"thinking": "low",
|
|
89
|
+
"fallbacks": [
|
|
90
|
+
"opencode-go/deepseek-v4-flash"
|
|
91
|
+
]
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|