ultimate-pi 0.1.7 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/graphify/.graphify_version +1 -0
- package/.agents/skills/graphify/SKILL.md +1204 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +225 -97
- package/.agents/skills/wiki-autoresearch/references/program.md +28 -62
- package/.agents/skills/wiki-autoresearch/references/quality-sites.md +32 -0
- package/.env.example +5 -1
- package/.gitattributes +1 -0
- package/.github/workflows/publish-github-packages.yml +1 -1
- package/.pi/SYSTEM.md +72 -18
- package/.pi/agents/harness/adversary.md +32 -0
- package/.pi/agents/harness/evaluator.md +32 -0
- package/.pi/agents/harness/executor.md +34 -0
- package/.pi/agents/harness/meta-optimizer.md +33 -0
- package/.pi/agents/harness/planner.md +33 -0
- package/.pi/agents/harness/tie-breaker.md +35 -0
- package/.pi/agents/harness/trace-librarian.md +32 -0
- package/.pi/extensions/banner.png +0 -0
- package/.pi/extensions/budget-guard.ts +265 -0
- package/.pi/extensions/custom-footer.ts +194 -22
- package/.pi/extensions/custom-header.ts +47 -9
- package/.pi/extensions/debate-orchestrator.ts +479 -0
- package/.pi/extensions/harness-live-widget.ts +438 -0
- package/.pi/extensions/policy-gate.ts +349 -0
- package/.pi/extensions/review-integrity.ts +198 -0
- package/.pi/extensions/test-diff-integrity.ts +240 -0
- package/.pi/extensions/trace-recorder.ts +315 -0
- package/.pi/harness/README.md +23 -0
- package/.pi/harness/router/README.md +35 -0
- package/.pi/harness/router/apply-router-proposal.mjs +153 -0
- package/.pi/harness/router/propose-router-tuning.mjs +149 -0
- package/.pi/harness/specs/README.md +37 -0
- package/.pi/harness/specs/adversary-report.schema.json +53 -0
- package/.pi/harness/specs/budget-exhausted-event.schema.json +93 -0
- package/.pi/harness/specs/consensus-packet.schema.json +175 -0
- package/.pi/harness/specs/eval-verdict.schema.json +59 -0
- package/.pi/harness/specs/incident-record.schema.json +84 -0
- package/.pi/harness/specs/plan-packet.schema.json +90 -0
- package/.pi/harness/specs/round-result.schema.json +126 -0
- package/.pi/harness/specs/router-tuning-proposal.schema.json +114 -0
- package/.pi/harness/specs/run-trace.schema.json +107 -0
- package/.pi/lib/harness-ui-state.ts +311 -0
- package/.pi/mcp.json +4 -0
- package/.pi/model-router.json +93 -93
- package/.pi/prompts/graphify.md +23 -0
- package/.pi/prompts/harness-abort.md +41 -0
- package/.pi/prompts/harness-auto.md +83 -0
- package/.pi/prompts/harness-critic.md +52 -0
- package/.pi/prompts/harness-eval.md +51 -0
- package/.pi/prompts/harness-incident.md +51 -0
- package/.pi/prompts/harness-plan.md +64 -0
- package/.pi/prompts/harness-review.md +52 -0
- package/.pi/prompts/harness-router-tune.md +74 -0
- package/.pi/prompts/harness-run.md +59 -0
- package/.pi/prompts/harness-setup.md +316 -216
- package/.pi/prompts/harness-trace.md +51 -0
- package/.pi/prompts/wiki-autoresearch.md +9 -7
- package/.pi/prompts/wiki-save.md +20 -0
- package/.pi/skills/agent-router/SKILL.md +2 -4
- package/.pi/skills/ast-grep/SKILL.md +354 -0
- package/.pi/sounds/project-sounds.json +18 -24
- package/AGENTS.md +30 -0
- package/CHANGELOG.md +89 -0
- package/CONTRIBUTING.md +51 -1
- package/README.md +264 -20
- package/biome.json +8 -2
- package/lefthook.yml +3 -2
- package/node_modules/@sting8k/pi-vcc/README.md +200 -0
- package/node_modules/@sting8k/pi-vcc/index.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/package.json +26 -0
- package/node_modules/@sting8k/pi-vcc/scripts/audit-sessions.ts +88 -0
- package/node_modules/@sting8k/pi-vcc/scripts/benchmark-real-sessions.ts +25 -0
- package/node_modules/@sting8k/pi-vcc/scripts/compare-before-after.ts +36 -0
- package/node_modules/@sting8k/pi-vcc/scripts/dump-branch-output.ts +20 -0
- package/node_modules/@sting8k/pi-vcc/src/commands/pi-vcc.ts +36 -0
- package/node_modules/@sting8k/pi-vcc/src/commands/vcc-recall.ts +65 -0
- package/node_modules/@sting8k/pi-vcc/src/core/brief.ts +381 -0
- package/node_modules/@sting8k/pi-vcc/src/core/build-sections.ts +79 -0
- package/node_modules/@sting8k/pi-vcc/src/core/content.ts +60 -0
- package/node_modules/@sting8k/pi-vcc/src/core/filter-noise.ts +42 -0
- package/node_modules/@sting8k/pi-vcc/src/core/format-recall.ts +27 -0
- package/node_modules/@sting8k/pi-vcc/src/core/format.ts +49 -0
- package/node_modules/@sting8k/pi-vcc/src/core/lineage.ts +26 -0
- package/node_modules/@sting8k/pi-vcc/src/core/load-messages.ts +41 -0
- package/node_modules/@sting8k/pi-vcc/src/core/normalize.ts +66 -0
- package/node_modules/@sting8k/pi-vcc/src/core/recall-scope.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/src/core/render-entries.ts +55 -0
- package/node_modules/@sting8k/pi-vcc/src/core/report.ts +237 -0
- package/node_modules/@sting8k/pi-vcc/src/core/sanitize.ts +5 -0
- package/node_modules/@sting8k/pi-vcc/src/core/search-entries.ts +221 -0
- package/node_modules/@sting8k/pi-vcc/src/core/settings.ts +77 -0
- package/node_modules/@sting8k/pi-vcc/src/core/skill-collapse.ts +35 -0
- package/node_modules/@sting8k/pi-vcc/src/core/summarize.ts +157 -0
- package/node_modules/@sting8k/pi-vcc/src/core/tool-args.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/src/details.ts +7 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/commits.ts +69 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/files.ts +80 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/goals.ts +79 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/preferences.ts +55 -0
- package/node_modules/@sting8k/pi-vcc/src/hooks/before-compact.ts +322 -0
- package/node_modules/@sting8k/pi-vcc/src/sections.ts +12 -0
- package/node_modules/@sting8k/pi-vcc/src/tools/recall.ts +109 -0
- package/node_modules/@sting8k/pi-vcc/src/types.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/tests/before-compact-hook.test.ts +181 -0
- package/node_modules/@sting8k/pi-vcc/tests/before-compact.test.ts +140 -0
- package/node_modules/@sting8k/pi-vcc/tests/brief.test.ts +206 -0
- package/node_modules/@sting8k/pi-vcc/tests/build-sections.test.ts +59 -0
- package/node_modules/@sting8k/pi-vcc/tests/compile.test.ts +80 -0
- package/node_modules/@sting8k/pi-vcc/tests/content.test.ts +31 -0
- package/node_modules/@sting8k/pi-vcc/tests/extract-goals.test.ts +86 -0
- package/node_modules/@sting8k/pi-vcc/tests/extract-preferences.test.ts +30 -0
- package/node_modules/@sting8k/pi-vcc/tests/filter-noise.test.ts +61 -0
- package/node_modules/@sting8k/pi-vcc/tests/fixtures.ts +61 -0
- package/node_modules/@sting8k/pi-vcc/tests/format-recall.test.ts +30 -0
- package/node_modules/@sting8k/pi-vcc/tests/format.test.ts +62 -0
- package/node_modules/@sting8k/pi-vcc/tests/lineage.test.ts +33 -0
- package/node_modules/@sting8k/pi-vcc/tests/load-messages.test.ts +51 -0
- package/node_modules/@sting8k/pi-vcc/tests/normalize.test.ts +97 -0
- package/node_modules/@sting8k/pi-vcc/tests/real-sessions.test.ts +38 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-expand.test.ts +15 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-scope.test.ts +32 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-tool-scope.test.ts +67 -0
- package/node_modules/@sting8k/pi-vcc/tests/render-entries.test.ts +62 -0
- package/node_modules/@sting8k/pi-vcc/tests/report.test.ts +44 -0
- package/node_modules/@sting8k/pi-vcc/tests/sanitize.test.ts +24 -0
- package/node_modules/@sting8k/pi-vcc/tests/search-entries.test.ts +144 -0
- package/node_modules/@sting8k/pi-vcc/tests/support/load-session.ts +23 -0
- package/node_modules/@sting8k/pi-vcc/tests/support/real-sessions.ts +51 -0
- package/package.json +15 -4
- package/scripts/__pycache__/merge_graphify_corpora.cpython-314.pyc +0 -0
- package/scripts/index_youtube_urls.py +376 -0
- package/scripts/merge_graphify_corpora.py +398 -0
- package/scripts/regen_graphify_html.py +46 -0
- package/.agents/skills/defuddle/SKILL.md +0 -90
- package/.agents/skills/wiki/SKILL.md +0 -215
- package/.agents/skills/wiki/references/css-snippets.md +0 -122
- package/.agents/skills/wiki/references/frontmatter.md +0 -107
- package/.agents/skills/wiki/references/git-setup.md +0 -58
- package/.agents/skills/wiki/references/mcp-setup.md +0 -149
- package/.agents/skills/wiki/references/modes.md +0 -259
- package/.agents/skills/wiki/references/plugins.md +0 -96
- package/.agents/skills/wiki/references/rest-api.md +0 -124
- package/.agents/skills/wiki-fold/SKILL.md +0 -204
- package/.agents/skills/wiki-fold/references/fold-template.md +0 -133
- package/.agents/skills/wiki-ingest/SKILL.md +0 -288
- package/.agents/skills/wiki-lint/SKILL.md +0 -183
- package/.agents/skills/wiki-query/SKILL.md +0 -176
- package/.pi/agents/rethink.md +0 -140
- package/.pi/agents/wiki-ingest.md +0 -67
- package/.pi/agents/wiki-lint.md +0 -75
- package/.pi/internal/cursor-sdk-transcript-parser.ts +0 -59
- package/.pi/prompts/save.md +0 -16
- package/.pi/prompts/wiki.md +0 -23
- package/.pi/providers/cursor-sdk-provider.test.mjs +0 -476
- package/.pi/providers/cursor-sdk-provider.ts +0 -1085
- package/vault/AGENTS.md +0 -37
- package/vault/wiki/_templates/comparison.md +0 -39
- package/vault/wiki/_templates/concept.md +0 -40
- package/vault/wiki/_templates/decision.md +0 -21
- package/vault/wiki/_templates/entity.md +0 -32
- package/vault/wiki/_templates/flow.md +0 -14
- package/vault/wiki/_templates/module.md +0 -18
- package/vault/wiki/_templates/question.md +0 -31
- package/vault/wiki/_templates/source.md +0 -39
- package/vault/wiki/concepts/AST-Aware Code Chunking.md +0 -44
- package/vault/wiki/concepts/Build-Time Prompt Compilation.md +0 -107
- package/vault/wiki/concepts/Context Engine (AI Coding).md +0 -47
- package/vault/wiki/concepts/Context-Aware System Reminders.md +0 -61
- package/vault/wiki/concepts/Contextualized Text Embedding.md +0 -42
- package/vault/wiki/concepts/Contractor vs Employee AI Model.md +0 -55
- package/vault/wiki/concepts/Dual-Model Agent Architecture.md +0 -65
- package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +0 -43
- package/vault/wiki/concepts/Majority Vote Ensembling.md +0 -68
- package/vault/wiki/concepts/Meta-Harness.md +0 -16
- package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +0 -75
- package/vault/wiki/concepts/Prompt Enhancement.md +0 -90
- package/vault/wiki/concepts/Prompt Renderer.md +0 -89
- package/vault/wiki/concepts/Semantic Codebase Indexing.md +0 -67
- package/vault/wiki/concepts/additive-config-hierarchy.md +0 -16
- package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +0 -71
- package/vault/wiki/concepts/agent-browser-browser-automation.md +0 -99
- package/vault/wiki/concepts/agent-codebase-interface.md +0 -43
- package/vault/wiki/concepts/agent-harness-architecture.md +0 -67
- package/vault/wiki/concepts/agent-loop-detection-patterns.md +0 -133
- package/vault/wiki/concepts/agent-search-enforcement.md +0 -126
- package/vault/wiki/concepts/agent-skills-ecosystem.md +0 -74
- package/vault/wiki/concepts/agent-skills-pattern.md +0 -68
- package/vault/wiki/concepts/agentic-harness-context-enforcement.md +0 -91
- package/vault/wiki/concepts/agentic-harness.md +0 -34
- package/vault/wiki/concepts/agentic-orchestration-pipeline.md +0 -56
- package/vault/wiki/concepts/agentic-search-no-embeddings.md +0 -18
- package/vault/wiki/concepts/anthropic-context-engineering.md +0 -13
- package/vault/wiki/concepts/antigravity-agent-first-architecture.md +0 -61
- package/vault/wiki/concepts/ast-compression.md +0 -19
- package/vault/wiki/concepts/ast-truncation.md +0 -66
- package/vault/wiki/concepts/barrel-files.md +0 -37
- package/vault/wiki/concepts/browser-harness-agent.md +0 -41
- package/vault/wiki/concepts/browser-subagent-visual-verification.md +0 -82
- package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +0 -192
- package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +0 -161
- package/vault/wiki/concepts/codebase-to-context-ingestion.md +0 -46
- package/vault/wiki/concepts/codex-harness-innovations.md +0 -147
- package/vault/wiki/concepts/consensus-debate-flow.md +0 -17
- package/vault/wiki/concepts/consensus-debate.md +0 -206
- package/vault/wiki/concepts/content-addressed-spec-identity.md +0 -166
- package/vault/wiki/concepts/context-anxiety.md +0 -57
- package/vault/wiki/concepts/context-compression-techniques.md +0 -19
- package/vault/wiki/concepts/context-continuity.md +0 -22
- package/vault/wiki/concepts/context-drift-in-agents.md +0 -106
- package/vault/wiki/concepts/context-engineering.md +0 -62
- package/vault/wiki/concepts/context-folding.md +0 -67
- package/vault/wiki/concepts/context-mode.md +0 -38
- package/vault/wiki/concepts/cursor-harness-innovations.md +0 -107
- package/vault/wiki/concepts/deterministic-session-compaction.md +0 -79
- package/vault/wiki/concepts/drift-detection-unified.md +0 -296
- package/vault/wiki/concepts/execution-feedback-loop.md +0 -46
- package/vault/wiki/concepts/feedforward-feedback-harness.md +0 -60
- package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +0 -40
- package/vault/wiki/concepts/fork-safe-spec-storage.md +0 -89
- package/vault/wiki/concepts/fts5-sandbox.md +0 -19
- package/vault/wiki/concepts/fuzzy-edit-matching.md +0 -71
- package/vault/wiki/concepts/gemini-cli-architecture.md +0 -104
- package/vault/wiki/concepts/generator-evaluator-architecture.md +0 -64
- package/vault/wiki/concepts/guardian-agent-pattern.md +0 -67
- package/vault/wiki/concepts/harness-configuration-layers.md +0 -89
- package/vault/wiki/concepts/harness-control-frameworks.md +0 -155
- package/vault/wiki/concepts/harness-engineering-first-principles.md +0 -90
- package/vault/wiki/concepts/harness-h-formalism.md +0 -53
- package/vault/wiki/concepts/hybrid-code-search.md +0 -61
- package/vault/wiki/concepts/inline-post-edit-validation.md +0 -112
- package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +0 -110
- package/vault/wiki/concepts/lifecycle-hooks.md +0 -94
- package/vault/wiki/concepts/mcp-tool-routing.md +0 -102
- package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +0 -47
- package/vault/wiki/concepts/meta-agent-context-pruning.md +0 -151
- package/vault/wiki/concepts/model-adaptive-harness.md +0 -122
- package/vault/wiki/concepts/model-routing-agents.md +0 -101
- package/vault/wiki/concepts/monorepo-architecture.md +0 -45
- package/vault/wiki/concepts/multi-agent-specialization.md +0 -61
- package/vault/wiki/concepts/permission-subsystem.md +0 -16
- package/vault/wiki/concepts/pi-messenger-analysis.md +0 -243
- package/vault/wiki/concepts/pi-vscode-extension-landscape.md +0 -37
- package/vault/wiki/concepts/policy-engine-pattern.md +0 -78
- package/vault/wiki/concepts/progressive-disclosure-agents.md +0 -53
- package/vault/wiki/concepts/progressive-skill-disclosure.md +0 -17
- package/vault/wiki/concepts/provider-native-prompting.md +0 -203
- package/vault/wiki/concepts/quality-signal-sentrux.md +0 -37
- package/vault/wiki/concepts/repo-map-ranking.md +0 -42
- package/vault/wiki/concepts/result-monad-error-handling.md +0 -47
- package/vault/wiki/concepts/safety-defense-in-depth.md +0 -83
- package/vault/wiki/concepts/sandbox-os-enforcement.md +0 -18
- package/vault/wiki/concepts/selective-debate-routing.md +0 -70
- package/vault/wiki/concepts/self-evolving-harness.md +0 -60
- package/vault/wiki/concepts/sentrux-mcp-integration.md +0 -36
- package/vault/wiki/concepts/sentrux-rules-engine.md +0 -49
- package/vault/wiki/concepts/shell-pattern-compression.md +0 -24
- package/vault/wiki/concepts/skill-first-architecture.md +0 -166
- package/vault/wiki/concepts/structured-compaction.md +0 -78
- package/vault/wiki/concepts/subagent-orchestration.md +0 -17
- package/vault/wiki/concepts/subagent-worktree-isolation.md +0 -68
- package/vault/wiki/concepts/superpowers-methodology.md +0 -78
- package/vault/wiki/concepts/think-in-code.md +0 -73
- package/vault/wiki/concepts/ts-execution-layer.md +0 -100
- package/vault/wiki/concepts/typescript-strict-mode.md +0 -37
- package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +0 -53
- package/vault/wiki/concepts/verification-drift-detection.md +0 -19
- package/vault/wiki/consensus/consensus-records.md +0 -58
- package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +0 -122
- package/vault/wiki/decisions/2026-05-07-replace-lean-ctx-with-context-mode.md +0 -59
- package/vault/wiki/decisions/adr-008.md +0 -40
- package/vault/wiki/decisions/adr-009.md +0 -46
- package/vault/wiki/decisions/adr-010.md +0 -55
- package/vault/wiki/decisions/adr-011.md +0 -165
- package/vault/wiki/decisions/adr-012.md +0 -102
- package/vault/wiki/decisions/adr-013.md +0 -59
- package/vault/wiki/decisions/adr-014.md +0 -73
- package/vault/wiki/decisions/adr-015.md +0 -81
- package/vault/wiki/decisions/adr-016.md +0 -91
- package/vault/wiki/decisions/adr-017.md +0 -79
- package/vault/wiki/decisions/adr-018.md +0 -100
- package/vault/wiki/decisions/adr-019.md +0 -75
- package/vault/wiki/decisions/adr-020.md +0 -106
- package/vault/wiki/decisions/adr-021.md +0 -86
- package/vault/wiki/decisions/adr-022.md +0 -113
- package/vault/wiki/decisions/adr-023.md +0 -113
- package/vault/wiki/decisions/adr-024.md +0 -73
- package/vault/wiki/decisions/adr-025.md +0 -130
- package/vault/wiki/decisions/adr-026.md +0 -56
- package/vault/wiki/decisions/adr-027.md +0 -94
- package/vault/wiki/decisions/colocate-wiki.md +0 -34
- package/vault/wiki/entities/Anders Hejlsberg.md +0 -29
- package/vault/wiki/entities/Anthropic.md +0 -17
- package/vault/wiki/entities/Augment Code.md +0 -49
- package/vault/wiki/entities/Bjarne Stroustrup.md +0 -26
- package/vault/wiki/entities/Bolt.new (StackBlitz).md +0 -39
- package/vault/wiki/entities/Boris Cherny.md +0 -11
- package/vault/wiki/entities/Claude Code.md +0 -19
- package/vault/wiki/entities/Dennis Ritchie.md +0 -26
- package/vault/wiki/entities/Emergent Labs.md +0 -32
- package/vault/wiki/entities/Google Cloud.md +0 -16
- package/vault/wiki/entities/Guido van Rossum.md +0 -28
- package/vault/wiki/entities/Ken Thompson.md +0 -28
- package/vault/wiki/entities/Lee et al.md +0 -16
- package/vault/wiki/entities/Linus Torvalds.md +0 -28
- package/vault/wiki/entities/Lovable (company).md +0 -40
- package/vault/wiki/entities/Martin Fowler.md +0 -16
- package/vault/wiki/entities/Meng et al.md +0 -16
- package/vault/wiki/entities/OpenAI.md +0 -16
- package/vault/wiki/entities/Rocket.new.md +0 -38
- package/vault/wiki/entities/VILA-Lab.md +0 -15
- package/vault/wiki/entities/autodev-codebase.md +0 -18
- package/vault/wiki/entities/ck-tool.md +0 -59
- package/vault/wiki/entities/codesearch.md +0 -18
- package/vault/wiki/entities/disler-indydevdan.md +0 -33
- package/vault/wiki/entities/gsd-get-shit-done.md +0 -56
- package/vault/wiki/entities/javascript-runtimes.md +0 -48
- package/vault/wiki/entities/jesse-vincent.md +0 -38
- package/vault/wiki/entities/lean-ctx.md +0 -32
- package/vault/wiki/entities/opendev.md +0 -41
- package/vault/wiki/entities/ops-codegraph-tool.md +0 -18
- package/vault/wiki/entities/pi-coding-agent.md +0 -53
- package/vault/wiki/entities/sentrux.md +0 -54
- package/vault/wiki/entities/vgrep-tool.md +0 -57
- package/vault/wiki/entities/vitest.md +0 -41
- package/vault/wiki/flows/harness-wiki-pipeline.md +0 -204
- package/vault/wiki/hot.md +0 -932
- package/vault/wiki/index.md +0 -437
- package/vault/wiki/log.md +0 -422
- package/vault/wiki/meta/dashboard.md +0 -30
- package/vault/wiki/meta/lint-report-2026-04-30.md +0 -86
- package/vault/wiki/meta/lint-report-2026-05-02.md +0 -251
- package/vault/wiki/meta/overview.canvas +0 -43
- package/vault/wiki/modules/adversarial-verification.md +0 -57
- package/vault/wiki/modules/automated-observability.md +0 -54
- package/vault/wiki/modules/bench.md +0 -20
- package/vault/wiki/modules/extensions.md +0 -23
- package/vault/wiki/modules/grounding-checkpoints.md +0 -62
- package/vault/wiki/modules/harness-implementation-plan.md +0 -345
- package/vault/wiki/modules/harness-wiki-skill-mapping.md +0 -135
- package/vault/wiki/modules/harness.md +0 -86
- package/vault/wiki/modules/persistent-memory.md +0 -85
- package/vault/wiki/modules/schema-orchestration.md +0 -68
- package/vault/wiki/modules/skills.md +0 -27
- package/vault/wiki/modules/spec-hardening.md +0 -58
- package/vault/wiki/modules/structured-planning.md +0 -53
- package/vault/wiki/modules/think-in-code-enforcement.md +0 -153
- package/vault/wiki/modules/wiki-query-interface.md +0 -64
- package/vault/wiki/overview.md +0 -51
- package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +0 -87
- package/vault/wiki/questions/Research-sentrux-dev.md +0 -123
- package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +0 -164
- package/vault/wiki/questions/Research: Augment Code Context Engine.md +0 -244
- package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +0 -112
- package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +0 -209
- package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +0 -99
- package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +0 -107
- package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +0 -72
- package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +0 -166
- package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +0 -188
- package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +0 -120
- package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +0 -236
- package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +0 -95
- package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +0 -165
- package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +0 -216
- package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +0 -91
- package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +0 -88
- package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +0 -81
- package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +0 -71
- package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +0 -80
- package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +0 -72
- package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +0 -92
- package/vault/wiki/questions/Research: executor.sh Harness Integration.md +0 -170
- package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +0 -97
- package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +0 -80
- package/vault/wiki/questions/Research: pi-vcc.md +0 -113
- package/vault/wiki/questions/Research: semantic code search tools.md +0 -69
- package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +0 -73
- package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +0 -111
- package/vault/wiki/questions/mvp-implementation-blueprint.md +0 -552
- package/vault/wiki/questions/research-agent-first-codebase-exploration.md +0 -199
- package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +0 -142
- package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +0 -100
- package/vault/wiki/questions/research-wozcode-token-reduction.md +0 -67
- package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +0 -95
- package/vault/wiki/questions/resolved-context-window-economics.md +0 -167
- package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +0 -126
- package/vault/wiki/questions/resolved-mcp-tool-preference.md +0 -112
- package/vault/wiki/questions/resolved-small-model-meta-agents.md +0 -107
- package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +0 -95
- package/vault/wiki/sources/Auggie Context MCP Server.md +0 -63
- package/vault/wiki/sources/Augment Code Codacy AI Giants.md +0 -61
- package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +0 -49
- package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +0 -55
- package/vault/wiki/sources/Augment Context Engine Official.md +0 -71
- package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +0 -74
- package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +0 -58
- package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +0 -75
- package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +0 -85
- package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +0 -58
- package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +0 -100
- package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +0 -75
- package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +0 -107
- package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +0 -70
- package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +0 -88
- package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +0 -57
- package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +0 -53
- package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +0 -65
- package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +0 -83
- package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +0 -70
- package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +0 -58
- package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +0 -101
- package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +0 -100
- package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +0 -53
- package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +0 -70
- package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +0 -71
- package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +0 -89
- package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +0 -155
- package/vault/wiki/sources/Source: browser-harness CDP Harness.md +0 -126
- package/vault/wiki/sources/agent-drift-academic-paper.md +0 -79
- package/vault/wiki/sources/aider-repomap-tree-sitter.md +0 -42
- package/vault/wiki/sources/anthropic-compaction-api.md +0 -58
- package/vault/wiki/sources/anthropic-effective-harnesses.md +0 -42
- package/vault/wiki/sources/anthropic-prompt-best-practices.md +0 -100
- package/vault/wiki/sources/anthropic2026-harness-design.md +0 -63
- package/vault/wiki/sources/barrel-files-tkdodo.md +0 -38
- package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +0 -57
- package/vault/wiki/sources/bockeler2026-harness-engineering.md +0 -69
- package/vault/wiki/sources/cast-code-chunking-paper.md +0 -50
- package/vault/wiki/sources/ck-semantic-search.md +0 -78
- package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +0 -71
- package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +0 -50
- package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +0 -64
- package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +0 -70
- package/vault/wiki/sources/claude-context-editing-docs.md +0 -13
- package/vault/wiki/sources/cloudflare-codemode.md +0 -63
- package/vault/wiki/sources/code-chunk-library-supermemory.md +0 -63
- package/vault/wiki/sources/codeact-apple-2024.md +0 -62
- package/vault/wiki/sources/codex-dsc-rfc-8573.md +0 -41
- package/vault/wiki/sources/codex-open-source-agent-2026.md +0 -110
- package/vault/wiki/sources/coir-code-retrieval-benchmark.md +0 -51
- package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +0 -48
- package/vault/wiki/sources/context-folding-paper.md +0 -61
- package/vault/wiki/sources/context-mode-website.md +0 -63
- package/vault/wiki/sources/cursor-agent-best-practices-2026.md +0 -62
- package/vault/wiki/sources/cursor-fork-29b-2025.md +0 -50
- package/vault/wiki/sources/cursor-harness-april-2026.md +0 -76
- package/vault/wiki/sources/cursor-instant-apply-2024.md +0 -45
- package/vault/wiki/sources/cursor-shadow-workspace-2024.md +0 -52
- package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +0 -53
- package/vault/wiki/sources/cursor-vs-antigravity-2026.md +0 -51
- package/vault/wiki/sources/disler-pi-vs-claude-code.md +0 -69
- package/vault/wiki/sources/distill-deterministic-context-compression.md +0 -53
- package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +0 -48
- package/vault/wiki/sources/executor-rhyssullivan.md +0 -122
- package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +0 -125
- package/vault/wiki/sources/fan2025-imad.md +0 -60
- package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +0 -63
- package/vault/wiki/sources/gemini-3-prompting-guide.md +0 -78
- package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +0 -50
- package/vault/wiki/sources/gh-sub-issue-extension.md +0 -72
- package/vault/wiki/sources/github-fork-issues-discussion.md +0 -44
- package/vault/wiki/sources/github-issue-dependencies-docs.md +0 -49
- package/vault/wiki/sources/github-sub-issues-docs.md +0 -51
- package/vault/wiki/sources/gitingest.md +0 -91
- package/vault/wiki/sources/gitreverse.md +0 -63
- package/vault/wiki/sources/google-antigravity-official-blog.md +0 -47
- package/vault/wiki/sources/google-antigravity-wikipedia.md +0 -53
- package/vault/wiki/sources/gsd-codecentric-deep-dive.md +0 -57
- package/vault/wiki/sources/gsd-github-repo.md +0 -51
- package/vault/wiki/sources/gsd-hn-discussion.md +0 -59
- package/vault/wiki/sources/guido-python-design-philosophy.md +0 -56
- package/vault/wiki/sources/hejlsberg-7-learnings.md +0 -48
- package/vault/wiki/sources/ironclaw-drift-monitor.md +0 -80
- package/vault/wiki/sources/langsight-loop-detection.md +0 -80
- package/vault/wiki/sources/leanctx-website.md +0 -69
- package/vault/wiki/sources/lee2026-meta-harness.md +0 -59
- package/vault/wiki/sources/linux-kernel-coding-workflow.md +0 -50
- package/vault/wiki/sources/lou2026-autoharness.md +0 -53
- package/vault/wiki/sources/martin-fowler-harness-engineering.md +0 -73
- package/vault/wiki/sources/mcp-architecture-docs.md +0 -13
- package/vault/wiki/sources/meng2026-agent-harness-survey.md +0 -79
- package/vault/wiki/sources/mindstudio-four-agent-types.md +0 -68
- package/vault/wiki/sources/ms-chat-history-management.md +0 -13
- package/vault/wiki/sources/openai-prompt-guidance.md +0 -104
- package/vault/wiki/sources/openclaw-session-pruning.md +0 -13
- package/vault/wiki/sources/opencode-dcp.md +0 -13
- package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +0 -79
- package/vault/wiki/sources/openhands-platform.md +0 -39
- package/vault/wiki/sources/oss-guide-codebase-exploration.md +0 -53
- package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +0 -102
- package/vault/wiki/sources/pi-context-prune-github-repo.md +0 -38
- package/vault/wiki/sources/pi-mono-compaction-docs.md +0 -38
- package/vault/wiki/sources/pi-omni-compact-github-repo.md +0 -50
- package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +0 -45
- package/vault/wiki/sources/pi-vcc-github-repo.md +0 -69
- package/vault/wiki/sources/pi-vscode-marketplace.md +0 -41
- package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +0 -39
- package/vault/wiki/sources/py-tree-sitter.md +0 -13
- package/vault/wiki/sources/sentrux-dev-landing.md +0 -40
- package/vault/wiki/sources/sentrux-docs-pro-architecture.md +0 -75
- package/vault/wiki/sources/sentrux-docs-quality-signal.md +0 -46
- package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +0 -57
- package/vault/wiki/sources/sentrux-docs-rules-engine.md +0 -58
- package/vault/wiki/sources/sentrux-github-repo.md +0 -56
- package/vault/wiki/sources/superpowers-github-repo.md +0 -56
- package/vault/wiki/sources/superpowers-release-blog.md +0 -54
- package/vault/wiki/sources/superpowers-termdock-analysis.md +0 -45
- package/vault/wiki/sources/swe-agent-aci.md +0 -42
- package/vault/wiki/sources/swe-bench.md +0 -45
- package/vault/wiki/sources/swe-pruner-context-pruning.md +0 -13
- package/vault/wiki/sources/think-in-code-blog.md +0 -48
- package/vault/wiki/sources/tree-sitter-docs.md +0 -13
- package/vault/wiki/sources/ts-best-practices-2025-devto.md +0 -42
- package/vault/wiki/sources/ts-folder-structure-mingyang.md +0 -58
- package/vault/wiki/sources/ts-monorepo-koerselman.md +0 -44
- package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +0 -52
- package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +0 -42
- package/vault/wiki/sources/ts-strict-mode-rishikc.md +0 -43
- package/vault/wiki/sources/unix-philosophy.md +0 -48
- package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +0 -39
- package/vault/wiki/sources/vectara-guardian-agents.md +0 -79
- package/vault/wiki/sources/vgrep-semantic-search.md +0 -76
- package/vault/wiki/sources/vitest-official.md +0 -41
- package/vault/wiki/sources/vscode-pi-community-extension.md +0 -40
- package/vault/wiki/sources/wozcode.md +0 -79
package/.pi/SYSTEM.md
CHANGED
|
@@ -4,8 +4,8 @@ You are an enterprise coding agent. Optimize for correctness, minimal diffs, and
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
## Voice
|
|
7
|
-
-
|
|
8
|
-
-
|
|
7
|
+
- Default to concise, direct language.
|
|
8
|
+
- Use caveman mode only when the user explicitly asks for it.
|
|
9
9
|
- Keep commands, paths, code, logs exact.
|
|
10
10
|
|
|
11
11
|
## Primary Goal
|
|
@@ -28,10 +28,10 @@ You are an enterprise coding agent. Optimize for correctness, minimal diffs, and
|
|
|
28
28
|
### API / Library Docs — context7 ONLY
|
|
29
29
|
- `ctx7 library <name> <query>` then `ctx7 docs <id> <query>`
|
|
30
30
|
- context7 owns: function signatures, class APIs, config options, stdlib, framework specs.
|
|
31
|
-
- **Never** use
|
|
31
|
+
- **Never** use quality-sites for API docs.
|
|
32
32
|
|
|
33
33
|
### All Non-API Web Fetch — Firecrawl CLI
|
|
34
|
-
See `.
|
|
34
|
+
See `.agents/skills/firecrawl/SKILL.md` for workflow escalation.
|
|
35
35
|
|
|
36
36
|
| Task | Command |
|
|
37
37
|
|------|---------|
|
|
@@ -44,29 +44,83 @@ See `.pi/skills/firecrawl` for workflow escalation.
|
|
|
44
44
|
| Parse local docs | `firecrawl parse <file> -o .firecrawl/parsed.md` |
|
|
45
45
|
|
|
46
46
|
- **Search:** firecrawl search only (no DuckDuckGo).
|
|
47
|
-
- **Post-clean (optional):** `
|
|
48
|
-
- **Quality sites:** check `.
|
|
47
|
+
- **Post-clean (optional):** `firecrawl parse <file> -o .firecrawl/parsed.md` if output has boilerplate.
|
|
48
|
+
- **Quality sites:** check `.agents/skills/wiki-autoresearch/references/quality-sites.md` before citing non-API sources. Prefer Tier 1 (StackOverflow, GitHub issues, engineering blogs, arxiv). Exclude AI content farms, mirrors, stale packages.
|
|
49
|
+
- **Research:** use `/wiki-autoresearch <topic>` for deep research. Results are graphified into `graphify-out/`.
|
|
49
50
|
|
|
50
51
|
### Missing CLI fallbacks
|
|
51
52
|
- Firecrawl missing: `npx firecrawl --help || npm install -g firecrawl-cli@latest`
|
|
52
|
-
- Defuddle missing: `npm install -g defuddle-cli`
|
|
53
53
|
- Context7 missing: `npm install -g ctx7@latest`
|
|
54
54
|
|
|
55
55
|
---
|
|
56
|
-
##
|
|
57
|
-
|
|
58
|
-
> [!
|
|
59
|
-
> **
|
|
56
|
+
## Graphify-First Workflow (Mandatory)
|
|
57
|
+
|
|
58
|
+
> [!tip] Graph before grep
|
|
59
|
+
> **Always** build or consult the Graphify knowledge graph before codebase exploration.
|
|
60
|
+
> The graph reveals structure, god nodes, and surprising connections that raw
|
|
61
|
+
> search cannot. 71.5× token reduction on mixed corpora.
|
|
62
|
+
|
|
63
|
+
### Graphify Knowledge Graph
|
|
64
|
+
|
|
65
|
+
Graphify builds a queryable knowledge graph from code, docs, papers, and diagrams.
|
|
66
|
+
It identifies core concepts (god nodes), community structure, and cross-domain
|
|
67
|
+
connections via tree-sitter AST analysis + LLM semantic extraction.
|
|
68
|
+
|
|
69
|
+
| Step | Command | When |
|
|
70
|
+
|------|---------|------|
|
|
71
|
+
| Build graph | `graphify .` | First session, or after major code changes |
|
|
72
|
+
| Update graph | `graphify . --update` | After a few file changes (incremental) |
|
|
73
|
+
| Query graph | `graphify query "question"` | Understanding relationships, architecture |
|
|
74
|
+
| Trace paths | `graphify path "A" "B"` | How two concepts connect (includes call chains) |
|
|
75
|
+
| Explain node | `graphify explain "Concept"` | Deep dive — shows all callers, callees, references |
|
|
76
|
+
| DFS trace | `graphify query "who calls X" --dfs` | Follow a specific call/dependency chain |
|
|
77
|
+
| Read report | Read `graphify-out/GRAPH_REPORT.md` | Fastest path to codebase understanding |
|
|
78
|
+
|
|
79
|
+
**Call graph tracing via graphify:**
|
|
80
|
+
Graphify's tree-sitter AST extraction captures `calls`, `implements`, and `references`
|
|
81
|
+
edges at build time. Use these to answer call-graph questions without external tools:
|
|
82
|
+
- **Who calls `functionName`?** → `graphify explain "functionName"` (shows all inbound `calls` edges)
|
|
83
|
+
- **What does `functionName` call?** → `graphify explain "functionName"` (shows all outbound `calls` edges)
|
|
84
|
+
- **How does `Auth` reach `Database`?** → `graphify path "Auth" "Database"` (shortest call chain)
|
|
85
|
+
- **Trace a dependency chain deep** → `graphify query "how does X depend on Y" --dfs`
|
|
86
|
+
|
|
87
|
+
**Semantic code search via graphify:**
|
|
88
|
+
Graphify already indexes the entire codebase as a knowledge graph. Use graphify
|
|
89
|
+
for conceptual code search before falling back to `ck`:
|
|
90
|
+
- **Find code by meaning** → `graphify query "where is authentication logic"`
|
|
91
|
+
- **Find related concepts** → `graphify query "what connects to error handling"`
|
|
92
|
+
- **Cross-file surprises** → `graphify query "what unexpected connections exist"`
|
|
93
|
+
|
|
94
|
+
**Order of operations for codebase exploration:**
|
|
95
|
+
1. Read `graphify-out/GRAPH_REPORT.md` (god nodes, surprises, suggested questions)
|
|
96
|
+
2. Run `graphify query` for domain-specific questions, call traces, and semantic search
|
|
97
|
+
3. Use `graphify explain "Concept"` for caller/callee/dependency deep dives
|
|
98
|
+
4. Use `sg -p 'pattern'` for structural code search, then `ck --hybrid` only if graph and ast-grep don't surface it
|
|
99
|
+
5. Read individual files last — the graph already told you what matters
|
|
100
|
+
|
|
101
|
+
### Fallback Search (when graph doesn't cover it)
|
|
102
|
+
|
|
103
|
+
> [!note] Graphify handles semantic search and call graphs
|
|
104
|
+
> Graphify already provides semantic code search and call-graph tracing. Use
|
|
105
|
+
> `graphify query`, `graphify explain`, and `graphify path` as your primary
|
|
106
|
+
> code exploration tools. Only fall back to `sg`/`ck`/`find` when the graph
|
|
107
|
+
> doesn't have the answer (e.g., not yet indexed, or you need exact raw text).
|
|
60
108
|
|
|
61
109
|
| Tool | When | Command |
|
|
62
110
|
|------|------|---------|
|
|
63
|
-
| `
|
|
64
|
-
| `
|
|
65
|
-
| `
|
|
111
|
+
| `sg -p` | **Primary code search** — AST-aware structural pattern matching | `sg -p 'pattern' --lang typescript` |
|
|
112
|
+
| `sg scan` | Rule-based code scanning (use project rules in `sgconfig.yml`) | `sg scan` |
|
|
113
|
+
| `ck --hybrid` | Lexical + semantic fusion search (fallback after ast-grep) | `ck --hybrid "query" .` |
|
|
114
|
+
| `ck --sem` | Purely conceptual searches (fallback after ast-grep) | `ck --sem "concept" src/` |
|
|
66
115
|
| `find` | File discovery by name/glob only | `find . -name "*.ts"` |
|
|
116
|
+
| `grep` | **Last resort** — exact literal string matching in non-code files only | `grep -F "exact string"` |
|
|
67
117
|
|
|
118
|
+
- **Always prefer ast-grep (`sg`) over grep for code search.** ast-grep understands code structure via tree-sitter — it matches patterns, not strings. Use it for: finding function calls, class definitions, import statements, variable usage, and any structural code query.
|
|
119
|
+
- Never use grep for code search. grep is only for: log files, non-code text files, exact byte-level matching when AST patterns can't work.
|
|
68
120
|
- Always use `--limit N` on ck to cap output and save context.
|
|
69
|
-
-
|
|
121
|
+
- Graphify is primary. ast-grep is secondary. ck/find are fallbacks. grep is last resort.
|
|
122
|
+
- Do NOT install or use grepai/seagoat/mgrep for call-graph traces or semantic
|
|
123
|
+
search — graphify already handles both.
|
|
70
124
|
|
|
71
125
|
---
|
|
72
126
|
## Agent Routing
|
|
@@ -86,9 +140,9 @@ See `.pi/skills/firecrawl` for workflow escalation.
|
|
|
86
140
|
|
|
87
141
|
---
|
|
88
142
|
## Change Discipline (Mandatory)
|
|
89
|
-
-
|
|
90
|
-
- Document
|
|
91
|
-
- Before code edits,
|
|
143
|
+
- Run `graphify . --update` after significant code changes to keep the knowledge graph current.
|
|
144
|
+
- Document design/governance decisions near the harness surfaces under `.pi/harness/` (for example, contract docs in `.pi/harness/specs/` and incident artifacts in `.pi/harness/incidents/`).
|
|
145
|
+
- Before code edits, consult the graphify graph (`graphify query`) and relevant harness contract docs.
|
|
92
146
|
- Make surgical diffs only. No unrelated edits.
|
|
93
147
|
- If unrelated issue found, log separately. Do not auto-fix.
|
|
94
148
|
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Adversarial harness reviewer focused on breaking assumptions and surfacing regressions.
|
|
3
|
+
tools: read, bash, grep, find, ls
|
|
4
|
+
thinking: high
|
|
5
|
+
max_turns: 20
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the Harness Adversary.
|
|
9
|
+
|
|
10
|
+
## Mission
|
|
11
|
+
|
|
12
|
+
Pressure test the candidate with adversarial reasoning and reproducible attacks.
|
|
13
|
+
|
|
14
|
+
## Process
|
|
15
|
+
|
|
16
|
+
1. Assume hidden defects exist until disproven by evidence.
|
|
17
|
+
2. Challenge evaluator and executor assumptions with reproducible tests and counterexamples.
|
|
18
|
+
3. Emit `AdversaryReport` matching `.pi/harness/specs/adversary-report.schema.json`.
|
|
19
|
+
4. Set `block_merge=true` when high-confidence severe risk is present.
|
|
20
|
+
5. Provide concrete repro steps for every finding.
|
|
21
|
+
|
|
22
|
+
## Guardrails
|
|
23
|
+
|
|
24
|
+
- Do not overthink low-signal speculation; prioritize concrete, reproducible attacks.
|
|
25
|
+
- Only assess risks relevant to the candidate and gate criteria; do not widen scope.
|
|
26
|
+
- Never speculate about defects without evidence and a reproducible path.
|
|
27
|
+
- Severity ordering must be evidence-backed.
|
|
28
|
+
|
|
29
|
+
## Output
|
|
30
|
+
|
|
31
|
+
- Severity-ordered findings.
|
|
32
|
+
- Structured `AdversaryReport` JSON.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Independent harness evaluator producing structured pass/fail verdicts.
|
|
3
|
+
tools: read, bash, grep, find, ls
|
|
4
|
+
thinking: high
|
|
5
|
+
max_turns: 20
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the Harness Evaluator.
|
|
9
|
+
|
|
10
|
+
## Mission
|
|
11
|
+
|
|
12
|
+
Independently validate execution outcomes and emit structured verdicts.
|
|
13
|
+
|
|
14
|
+
## Process
|
|
15
|
+
|
|
16
|
+
1. Reconstruct validation scope from run artifacts and accepted plan criteria.
|
|
17
|
+
2. Treat executor claims as untrusted until independently verified.
|
|
18
|
+
3. Operate in review isolation (no executor scratch leakage).
|
|
19
|
+
4. Emit `EvalVerdict` matching `.pi/harness/specs/eval-verdict.schema.json`.
|
|
20
|
+
5. Recommend only: `proceed_to_adversary`, `replan`, or `rollback`.
|
|
21
|
+
|
|
22
|
+
## Guardrails
|
|
23
|
+
|
|
24
|
+
- Do not overthink straightforward pass/fail evidence; report the verified outcome directly.
|
|
25
|
+
- Only evaluate the candidate and gates requested; do not propose unrelated refactors.
|
|
26
|
+
- Never speculate about checks you did not run or artifacts you did not read.
|
|
27
|
+
- Prefer reproducible findings over subjective opinions.
|
|
28
|
+
|
|
29
|
+
## Output
|
|
30
|
+
|
|
31
|
+
- Findings summary.
|
|
32
|
+
- Structured `EvalVerdict` JSON.
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Harness executor that implements only within approved PlanPacket scope.
|
|
3
|
+
tools: read, write, edit, bash, grep, find, ls
|
|
4
|
+
thinking: medium
|
|
5
|
+
max_turns: 30
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the Harness Executor.
|
|
9
|
+
|
|
10
|
+
## Mission
|
|
11
|
+
|
|
12
|
+
Implement the approved plan with surgical diffs and strict scope control.
|
|
13
|
+
|
|
14
|
+
## Process
|
|
15
|
+
|
|
16
|
+
1. Confirm an approved `PlanPacket` exists and extract the allowed scope before any mutation.
|
|
17
|
+
2. Implement only the approved scope with minimal, reversible diffs.
|
|
18
|
+
3. Run focused validations that map to plan acceptance checks.
|
|
19
|
+
4. Prepare rollback artifacts in all required forms.
|
|
20
|
+
5. Hand off execution outputs to evaluator and adversary without self-certifying final quality.
|
|
21
|
+
|
|
22
|
+
## Guardrails
|
|
23
|
+
|
|
24
|
+
- Do not overthink straightforward implementation steps; execute the approved plan directly.
|
|
25
|
+
- Only modify files required by the approved `PlanPacket`; do not expand scope.
|
|
26
|
+
- Never speculate about code paths you have not read.
|
|
27
|
+
- If scope drift appears, stop and route back to planner instead of improvising.
|
|
28
|
+
- Do not skip rollback artifact generation.
|
|
29
|
+
|
|
30
|
+
## Output
|
|
31
|
+
|
|
32
|
+
- Changes made and rationale.
|
|
33
|
+
- Focused validations and results.
|
|
34
|
+
- Rollback artifact references.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Harness meta optimizer proposing policy/prompt/router improvements from trace evidence.
|
|
3
|
+
tools: read, bash, grep, find, ls
|
|
4
|
+
thinking: high
|
|
5
|
+
max_turns: 25
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the Harness Meta Optimizer.
|
|
9
|
+
|
|
10
|
+
## Mission
|
|
11
|
+
|
|
12
|
+
Generate conservative, evidence-backed optimization proposals for harness quality and cost.
|
|
13
|
+
|
|
14
|
+
## Process
|
|
15
|
+
|
|
16
|
+
1. Synthesize run/eval/adversary trace evidence into candidate optimizations.
|
|
17
|
+
2. Require benchmark evidence and regression-guard status for every tuning proposal.
|
|
18
|
+
3. Rank proposals by expected quality/cost impact and implementation risk.
|
|
19
|
+
4. Route router edits through proposal artifacts and explicit human approval only.
|
|
20
|
+
5. Prefer reversible, minimal changes with explicit risk notes.
|
|
21
|
+
|
|
22
|
+
## Guardrails
|
|
23
|
+
|
|
24
|
+
- Do not overthink speculative optimizations; reject proposals lacking sufficient evidence.
|
|
25
|
+
- Only propose changes requested by harness governance scope.
|
|
26
|
+
- Never speculate about projected gains without citing concrete benchmark evidence.
|
|
27
|
+
- Never apply router updates directly.
|
|
28
|
+
|
|
29
|
+
## Output
|
|
30
|
+
|
|
31
|
+
- Ranked optimization proposals.
|
|
32
|
+
- Evidence references and expected deltas.
|
|
33
|
+
- Explicit approval requirements.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Harness planner that compiles strict PlanPacket contracts before execution.
|
|
3
|
+
tools: read, bash, grep, find, ls
|
|
4
|
+
thinking: medium
|
|
5
|
+
max_turns: 20
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the Harness Planner.
|
|
9
|
+
|
|
10
|
+
## Mission
|
|
11
|
+
|
|
12
|
+
Compile a strict, machine-readable `PlanPacket` before any implementation happens.
|
|
13
|
+
|
|
14
|
+
## Process
|
|
15
|
+
|
|
16
|
+
1. Read request context and extract explicit task scope, constraints, and acceptance intent.
|
|
17
|
+
2. If scope is ambiguous or contradictory, request clarification and stop without producing an executable plan.
|
|
18
|
+
3. Build a `PlanPacket` that includes scope, assumptions, acceptance checks, risk level, and rollback artifacts.
|
|
19
|
+
4. Validate that the output matches `.pi/harness/specs/plan-packet.schema.json`.
|
|
20
|
+
5. Escalate risk to `high` when blast radius, uncertainty, or policy sensitivity is non-trivial.
|
|
21
|
+
|
|
22
|
+
## Guardrails
|
|
23
|
+
|
|
24
|
+
- Do not overthink straightforward requests; respond directly with the required packet.
|
|
25
|
+
- Only create what was requested for planning scope; do not execute or widen implementation scope.
|
|
26
|
+
- Never speculate about repository state you have not read.
|
|
27
|
+
- Do not mutate files.
|
|
28
|
+
- Do not hand off an executable path if plan ambiguity remains unresolved.
|
|
29
|
+
|
|
30
|
+
## Output
|
|
31
|
+
|
|
32
|
+
- Short human-readable plan summary.
|
|
33
|
+
- Valid `PlanPacket` JSON.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Final arbiter for unresolved evaluator vs adversary debates within budget limits.
|
|
3
|
+
tools: read, bash, grep, find, ls
|
|
4
|
+
thinking: high
|
|
5
|
+
max_turns: 15
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the Harness Tie-Breaker.
|
|
9
|
+
|
|
10
|
+
## Mission
|
|
11
|
+
|
|
12
|
+
Resolve unresolved debate outcomes when evaluator and adversary cannot converge within budget.
|
|
13
|
+
|
|
14
|
+
## Process
|
|
15
|
+
|
|
16
|
+
1. Activate only when explicitly requested after unresolved rounds.
|
|
17
|
+
2. Validate that debate budget/cap context is present before arbitration.
|
|
18
|
+
3. Use locked confidence weights:
|
|
19
|
+
- claim_quality=0.20
|
|
20
|
+
- reproducibility=0.40
|
|
21
|
+
- agreement=0.40
|
|
22
|
+
4. Respect aggressive debate caps and budget exhaustion rules.
|
|
23
|
+
5. Emit a clear policy recommendation: `pass`, `conditional_pass`, `block`, or `human_required`.
|
|
24
|
+
|
|
25
|
+
## Guardrails
|
|
26
|
+
|
|
27
|
+
- Do not overthink resolved cases; only arbitrate unresolved debate outcomes.
|
|
28
|
+
- Only evaluate evidence from the constrained debate packet.
|
|
29
|
+
- Never speculate beyond the submitted evidence and locked weighting policy.
|
|
30
|
+
- Do not alter locked weights, thresholds, or budget rules.
|
|
31
|
+
|
|
32
|
+
## Output
|
|
33
|
+
|
|
34
|
+
- Arbitration rationale.
|
|
35
|
+
- Evidence-weighted decision packet.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Harness trace librarian for run replay, artifact indexing, and forensics summaries.
|
|
3
|
+
tools: read, bash, grep, find, ls
|
|
4
|
+
thinking: medium
|
|
5
|
+
max_turns: 20
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
You are the Harness Trace Librarian.
|
|
9
|
+
|
|
10
|
+
## Mission
|
|
11
|
+
|
|
12
|
+
Maintain replayable trace narratives and artifact integrity checks.
|
|
13
|
+
|
|
14
|
+
## Process
|
|
15
|
+
|
|
16
|
+
1. Gather trace and artifact records by run ID and phase.
|
|
17
|
+
2. Index artifacts by run and phase using stable, machine-readable references.
|
|
18
|
+
3. Surface missing artifacts required by strict pre-PR gates.
|
|
19
|
+
4. Produce concise forensic summaries with evidence pointers and replay instructions.
|
|
20
|
+
|
|
21
|
+
## Guardrails
|
|
22
|
+
|
|
23
|
+
- Do not overthink straightforward indexing tasks; prioritize completeness and consistency.
|
|
24
|
+
- Only report artifacts relevant to the requested run/phases.
|
|
25
|
+
- Never speculate about missing artifacts without checking canonical run locations.
|
|
26
|
+
- Keep references stable and machine-readable.
|
|
27
|
+
|
|
28
|
+
## Output
|
|
29
|
+
|
|
30
|
+
- Timeline summary.
|
|
31
|
+
- Artifact manifest and integrity gaps.
|
|
32
|
+
- Replay instructions.
|
|
Binary file
|
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* budget-guard — hard-stop budget enforcement by run + phase.
|
|
3
|
+
*
|
|
4
|
+
* Emits `budget_exhausted` artifacts aligned to
|
|
5
|
+
* `.pi/harness/specs/budget-exhausted-event.schema.json`.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { appendFile, mkdir, readFile } from "node:fs/promises";
|
|
9
|
+
import { join } from "node:path";
|
|
10
|
+
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
11
|
+
|
|
12
|
+
type HarnessPhase = "plan" | "execute" | "evaluate" | "adversary" | "merge";
|
|
13
|
+
|
|
14
|
+
interface BudgetExhaustedEvent {
|
|
15
|
+
schema_version: "1.0.0";
|
|
16
|
+
contract_version: "1.0.0";
|
|
17
|
+
event_type: "budget_exhausted";
|
|
18
|
+
run_id: string;
|
|
19
|
+
debate_id: string;
|
|
20
|
+
round_count: number;
|
|
21
|
+
budget_used: number;
|
|
22
|
+
exhaustion_reason:
|
|
23
|
+
| "max_rounds_reached"
|
|
24
|
+
| "round_token_cap_exceeded"
|
|
25
|
+
| "debate_global_cap_exceeded";
|
|
26
|
+
caps: {
|
|
27
|
+
max_rounds: number;
|
|
28
|
+
round_token_cap: number;
|
|
29
|
+
debate_global_cap: number;
|
|
30
|
+
};
|
|
31
|
+
minimum_evidence_confidence: number;
|
|
32
|
+
default_policy_outcome: "block" | "human_required";
|
|
33
|
+
human_override_allowed: true;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface SessionEntryLike {
|
|
37
|
+
type?: string;
|
|
38
|
+
customType?: string;
|
|
39
|
+
data?: { phase?: HarnessPhase; budgetBypass?: boolean };
|
|
40
|
+
message?: {
|
|
41
|
+
role?: string;
|
|
42
|
+
usage?: { input?: number; output?: number };
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const RUNS_DIR = join(process.cwd(), ".pi", "harness", "runs");
|
|
47
|
+
const EVENTS_FILE = join(RUNS_DIR, "budget-events.jsonl");
|
|
48
|
+
|
|
49
|
+
const DEFAULT_GLOBAL_CAP = Number(
|
|
50
|
+
process.env.HARNESS_BUDGET_TOTAL_TOKENS ?? "120000",
|
|
51
|
+
);
|
|
52
|
+
const HARD_STOP_BUDGETS = process.env.HARNESS_BUDGET_HARD_STOP === "true";
|
|
53
|
+
const DEFAULT_PHASE_CAPS: Record<HarnessPhase, number> = {
|
|
54
|
+
plan: Number(process.env.HARNESS_BUDGET_PLAN_TOKENS ?? "12000"),
|
|
55
|
+
execute: Number(process.env.HARNESS_BUDGET_EXECUTE_TOKENS ?? "80000"),
|
|
56
|
+
evaluate: Number(process.env.HARNESS_BUDGET_EVALUATE_TOKENS ?? "25000"),
|
|
57
|
+
adversary: Number(process.env.HARNESS_BUDGET_ADVERSARY_TOKENS ?? "35000"),
|
|
58
|
+
merge: Number(process.env.HARNESS_BUDGET_MERGE_TOKENS ?? "8000"),
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
function nowIso(): string {
|
|
62
|
+
return new Date().toISOString();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function ensureRunsDir(): Promise<void> {
|
|
66
|
+
await mkdir(RUNS_DIR, { recursive: true });
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function readUsageTotals(ctx: {
|
|
70
|
+
sessionManager: { getEntries(): unknown[] };
|
|
71
|
+
}): {
|
|
72
|
+
totalTokens: number;
|
|
73
|
+
byPhase: Partial<Record<HarnessPhase, number>>;
|
|
74
|
+
} {
|
|
75
|
+
const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
|
|
76
|
+
const totals: Partial<Record<HarnessPhase, number>> = {};
|
|
77
|
+
let total = 0;
|
|
78
|
+
let currentPhase: HarnessPhase | null = null;
|
|
79
|
+
|
|
80
|
+
for (const entry of entries) {
|
|
81
|
+
if (
|
|
82
|
+
entry.type === "custom" &&
|
|
83
|
+
entry.customType === "harness-policy-state"
|
|
84
|
+
) {
|
|
85
|
+
const phase = entry.data?.phase as HarnessPhase | undefined;
|
|
86
|
+
if (phase) currentPhase = phase;
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (entry.type !== "message" || entry.message?.role !== "assistant")
|
|
91
|
+
continue;
|
|
92
|
+
const usage = entry.message.usage ?? {};
|
|
93
|
+
const tokens = Number(usage.input ?? 0) + Number(usage.output ?? 0);
|
|
94
|
+
total += tokens;
|
|
95
|
+
if (currentPhase) {
|
|
96
|
+
totals[currentPhase] = Number(totals[currentPhase] ?? 0) + tokens;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
return { totalTokens: total, byPhase: totals };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function getPolicyContext(ctx: {
|
|
104
|
+
sessionManager: { getEntries(): unknown[] };
|
|
105
|
+
}): {
|
|
106
|
+
phase: HarnessPhase | null;
|
|
107
|
+
budgetBypass: boolean;
|
|
108
|
+
} {
|
|
109
|
+
const entries = ctx.sessionManager.getEntries() as SessionEntryLike[];
|
|
110
|
+
for (let i = entries.length - 1; i >= 0; i--) {
|
|
111
|
+
const entry = entries[i];
|
|
112
|
+
if (
|
|
113
|
+
entry.type === "custom" &&
|
|
114
|
+
entry.customType === "harness-policy-state"
|
|
115
|
+
) {
|
|
116
|
+
const phase = entry.data?.phase;
|
|
117
|
+
const budgetBypass = Boolean(entry.data?.budgetBypass);
|
|
118
|
+
if (
|
|
119
|
+
phase === "plan" ||
|
|
120
|
+
phase === "execute" ||
|
|
121
|
+
phase === "evaluate" ||
|
|
122
|
+
phase === "adversary" ||
|
|
123
|
+
phase === "merge"
|
|
124
|
+
) {
|
|
125
|
+
return { phase, budgetBypass };
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
return { phase: null, budgetBypass: false };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function getRunId(ctx: { sessionManager: { getSessionId(): string } }): string {
|
|
133
|
+
return ctx.sessionManager.getSessionId();
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async function readDebateCapsFromSchema(): Promise<{
|
|
137
|
+
max_rounds: number;
|
|
138
|
+
round_token_cap: number;
|
|
139
|
+
debate_global_cap: number;
|
|
140
|
+
}> {
|
|
141
|
+
try {
|
|
142
|
+
const schemaPath = join(
|
|
143
|
+
process.cwd(),
|
|
144
|
+
".pi",
|
|
145
|
+
"harness",
|
|
146
|
+
"specs",
|
|
147
|
+
"budget-exhausted-event.schema.json",
|
|
148
|
+
);
|
|
149
|
+
const parsed = JSON.parse(await readFile(schemaPath, "utf-8")) as {
|
|
150
|
+
properties?: {
|
|
151
|
+
caps?: {
|
|
152
|
+
properties?: {
|
|
153
|
+
max_rounds?: { const?: number };
|
|
154
|
+
round_token_cap?: { const?: number };
|
|
155
|
+
debate_global_cap?: { const?: number };
|
|
156
|
+
};
|
|
157
|
+
};
|
|
158
|
+
};
|
|
159
|
+
};
|
|
160
|
+
return {
|
|
161
|
+
max_rounds: Number(
|
|
162
|
+
parsed?.properties?.caps?.properties?.max_rounds?.const ?? 6,
|
|
163
|
+
),
|
|
164
|
+
round_token_cap: Number(
|
|
165
|
+
parsed?.properties?.caps?.properties?.round_token_cap?.const ?? 2500,
|
|
166
|
+
),
|
|
167
|
+
debate_global_cap: Number(
|
|
168
|
+
parsed?.properties?.caps?.properties?.debate_global_cap?.const ?? 35000,
|
|
169
|
+
),
|
|
170
|
+
};
|
|
171
|
+
} catch {
|
|
172
|
+
return { max_rounds: 6, round_token_cap: 2500, debate_global_cap: 35000 };
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async function emitBudgetEvent(
|
|
177
|
+
pi: ExtensionAPI,
|
|
178
|
+
event: BudgetExhaustedEvent,
|
|
179
|
+
): Promise<void> {
|
|
180
|
+
await ensureRunsDir();
|
|
181
|
+
const line = `${JSON.stringify({ timestamp: nowIso(), ...event })}\n`;
|
|
182
|
+
await appendFile(EVENTS_FILE, line, "utf-8");
|
|
183
|
+
pi.appendEntry("harness-budget-exhausted", event);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export default function budgetGuard(pi: ExtensionAPI) {
|
|
187
|
+
pi.on("tool_call", async (_event, ctx) => {
|
|
188
|
+
const policy = getPolicyContext(ctx);
|
|
189
|
+
if (policy.phase === null || policy.budgetBypass) return undefined;
|
|
190
|
+
|
|
191
|
+
const phase = policy.phase;
|
|
192
|
+
const usage = readUsageTotals(ctx);
|
|
193
|
+
const phaseUsed = Number(usage.byPhase[phase] ?? 0);
|
|
194
|
+
const globalCap = DEFAULT_GLOBAL_CAP;
|
|
195
|
+
const phaseCap = DEFAULT_PHASE_CAPS[phase];
|
|
196
|
+
const caps = await readDebateCapsFromSchema();
|
|
197
|
+
|
|
198
|
+
if (usage.totalTokens < globalCap && phaseUsed < phaseCap) return undefined;
|
|
199
|
+
|
|
200
|
+
const exhausted: BudgetExhaustedEvent = {
|
|
201
|
+
schema_version: "1.0.0",
|
|
202
|
+
contract_version: "1.0.0",
|
|
203
|
+
event_type: "budget_exhausted",
|
|
204
|
+
run_id: getRunId(ctx),
|
|
205
|
+
debate_id: `${phase}-budget-guard`,
|
|
206
|
+
round_count: 1,
|
|
207
|
+
budget_used: Math.max(usage.totalTokens, phaseUsed),
|
|
208
|
+
exhaustion_reason: "debate_global_cap_exceeded",
|
|
209
|
+
caps,
|
|
210
|
+
minimum_evidence_confidence: 0.6,
|
|
211
|
+
default_policy_outcome: "block",
|
|
212
|
+
human_override_allowed: true,
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
await emitBudgetEvent(pi, exhausted);
|
|
216
|
+
if (!HARD_STOP_BUDGETS) {
|
|
217
|
+
pi.appendEntry("harness-budget-soft-limit", {
|
|
218
|
+
run_id: exhausted.run_id,
|
|
219
|
+
phase,
|
|
220
|
+
phaseUsed,
|
|
221
|
+
phaseCap,
|
|
222
|
+
totalUsed: usage.totalTokens,
|
|
223
|
+
totalCap: globalCap,
|
|
224
|
+
timestamp: nowIso(),
|
|
225
|
+
});
|
|
226
|
+
return undefined;
|
|
227
|
+
}
|
|
228
|
+
return {
|
|
229
|
+
block: true,
|
|
230
|
+
reason: `budget-guard: hard stop in phase '${phase}' (phase=${phaseUsed}/${phaseCap}, total=${usage.totalTokens}/${globalCap}).`,
|
|
231
|
+
};
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
pi.registerCommand("harness-budget-status", {
|
|
235
|
+
description: "Show harness token budget usage by phase",
|
|
236
|
+
handler: async (_args, ctx) => {
|
|
237
|
+
const usage = readUsageTotals(ctx);
|
|
238
|
+
const lines = [
|
|
239
|
+
"Harness budget status:",
|
|
240
|
+
` total: ${usage.totalTokens}/${DEFAULT_GLOBAL_CAP}`,
|
|
241
|
+
...(
|
|
242
|
+
[
|
|
243
|
+
"plan",
|
|
244
|
+
"execute",
|
|
245
|
+
"evaluate",
|
|
246
|
+
"adversary",
|
|
247
|
+
"merge",
|
|
248
|
+
] as HarnessPhase[]
|
|
249
|
+
).map(
|
|
250
|
+
(phase) =>
|
|
251
|
+
` ${phase}: ${Number(usage.byPhase[phase] ?? 0)}/${DEFAULT_PHASE_CAPS[phase]}`,
|
|
252
|
+
),
|
|
253
|
+
];
|
|
254
|
+
if (ctx.hasUI) {
|
|
255
|
+
ctx.ui.notify(lines.join("\n"), "info");
|
|
256
|
+
return;
|
|
257
|
+
}
|
|
258
|
+
pi.sendMessage({
|
|
259
|
+
customType: "harness-budget-status",
|
|
260
|
+
content: lines.join("\n"),
|
|
261
|
+
display: true,
|
|
262
|
+
});
|
|
263
|
+
},
|
|
264
|
+
});
|
|
265
|
+
}
|