create-ccc-tutor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/bin/cli.js +76 -0
- package/package.json +28 -0
- package/template/.claude/commands/abandon.md +7 -0
- package/template/.claude/commands/add-anti-flag.md +7 -0
- package/template/.claude/commands/add-constitution-clause.md +7 -0
- package/template/.claude/commands/audit-spec.md +7 -0
- package/template/.claude/commands/commit.md +7 -0
- package/template/.claude/commands/constitution-edit.md +7 -0
- package/template/.claude/commands/db-schema.md +7 -0
- package/template/.claude/commands/exam.md +66 -0
- package/template/.claude/commands/execution-plan.md +7 -0
- package/template/.claude/commands/feature-draft.md +7 -0
- package/template/.claude/commands/handoff.md +7 -0
- package/template/.claude/commands/implement.md +7 -0
- package/template/.claude/commands/init.md +7 -0
- package/template/.claude/commands/next.md +7 -0
- package/template/.claude/commands/offload.md +7 -0
- package/template/.claude/commands/pickup.md +7 -0
- package/template/.claude/commands/recall.md +7 -0
- package/template/.claude/commands/remember.md +7 -0
- package/template/.claude/commands/slide.md +87 -0
- package/template/.claude/commands/spec-finalize.md +7 -0
- package/template/.claude/commands/test-fix.md +7 -0
- package/template/.claude/commands/uninstall.md +7 -0
- package/template/.claude/settings.json +161 -0
- package/template/.claude-plugin/plugin.json +41 -0
- package/template/.codex/config.toml +24 -0
- package/template/.codex/hooks.json +4 -0
- package/template/.codex/install-skills.sh +18 -0
- package/template/.codex/skills/exam/SKILL.md +61 -0
- package/template/.codex/skills/slide/SKILL.md +69 -0
- package/template/.harness/agents/README.md +70 -0
- package/template/.harness/agents/_template/junior-agent-template.md +116 -0
- package/template/.harness/agents/backend-reviewer.md +153 -0
- package/template/.harness/agents/frontend-reviewer.md +158 -0
- package/template/.harness/agents/security-reviewer.md +148 -0
- package/template/.harness/agents/test-fixer.md +147 -0
- package/template/.harness/docs/doc-sync.md +29 -0
- package/template/.harness/docs/git-hygiene.md +56 -0
- package/template/.harness/docs/spec-model.md +47 -0
- package/template/.harness/docs/tool-map.md +120 -0
- package/template/.harness/docs/workflow.md +59 -0
- package/template/.harness/scripts/README.md +70 -0
- package/template/.harness/scripts/auditor-gate.sh +388 -0
- package/template/.harness/scripts/bootstrap-check.sh +103 -0
- package/template/.harness/scripts/budget-monitor.sh +223 -0
- package/template/.harness/scripts/check-prereqs.sh +165 -0
- package/template/.harness/scripts/checkpoint-recall.sh +136 -0
- package/template/.harness/scripts/checkpoint-write.sh +281 -0
- package/template/.harness/scripts/decision-log-append.sh +90 -0
- package/template/.harness/scripts/env-check.sh +286 -0
- package/template/.harness/scripts/format-edit.sh +80 -0
- package/template/.harness/scripts/lint-bans.sh +110 -0
- package/template/.harness/scripts/memory-archive.sh +129 -0
- package/template/.harness/scripts/memory-recall.sh +197 -0
- package/template/.harness/scripts/memory-snapshot.sh +124 -0
- package/template/.harness/scripts/post-migration.sh +58 -0
- package/template/.harness/scripts/precommit-cycles.sh +74 -0
- package/template/.harness/scripts/precommit-typecheck.sh +69 -0
- package/template/.harness/scripts/scratchpad-recall.sh +83 -0
- package/template/.harness/scripts/scratchpad-update.sh +39 -0
- package/template/.harness/scripts/standalone-bootstrap.md +443 -0
- package/template/.harness/skills/abandon/SKILL.md +157 -0
- package/template/.harness/skills/add-anti-flag/SKILL.md +205 -0
- package/template/.harness/skills/add-constitution-clause/SKILL.md +244 -0
- package/template/.harness/skills/audit-spec/SKILL.md +395 -0
- package/template/.harness/skills/commit/SKILL.md +270 -0
- package/template/.harness/skills/constitution-edit/SKILL.md +292 -0
- package/template/.harness/skills/db-schema/SKILL.md +145 -0
- package/template/.harness/skills/db-schema/references/methodology.md +202 -0
- package/template/.harness/skills/execution-plan/SKILL.md +346 -0
- package/template/.harness/skills/feature-draft/SKILL.md +426 -0
- package/template/.harness/skills/handoff/SKILL.md +211 -0
- package/template/.harness/skills/implement/SKILL.md +355 -0
- package/template/.harness/skills/init/SKILL.md +805 -0
- package/template/.harness/skills/next/SKILL.md +245 -0
- package/template/.harness/skills/offload/SKILL.md +134 -0
- package/template/.harness/skills/pickup/SKILL.md +213 -0
- package/template/.harness/skills/recall/SKILL.md +159 -0
- package/template/.harness/skills/remember/SKILL.md +205 -0
- package/template/.harness/skills/spec-finalize/SKILL.md +196 -0
- package/template/.harness/skills/test-fix/SKILL.md +363 -0
- package/template/.harness/skills/uninstall/SKILL.md +370 -0
- package/template/.harness/state/install.json +83 -0
- package/template/AGENTS.md +262 -0
- package/template/CCC_MAGI_LICENSE +201 -0
- package/template/CCC_MAGI_README.md +986 -0
- package/template/CLAUDE.md +658 -0
- package/template/codex.md +39 -0
- package/template/constitution.md +164 -0
- package/template/course/README.md +15 -0
- package/template/course/course_code(example)/exam/README.md +2 -0
- package/template/course/course_code(example)/slide/slide_example-1.pdf +40 -0
- package/template/course/course_code(example)/slide/slide_example-2.pdf +40 -0
- package/template/docs/features/slide-query-implementation.md +79 -0
- package/template/docs/features/slide-query.md +211 -0
- package/template/docs-harness/README.md +42 -0
- package/template/docs-harness/adoption-playbook.md +373 -0
- package/template/docs-harness/ccc-step1-driver-template.md +288 -0
- package/template/docs-harness/cli-configs-README.md +78 -0
- package/template/docs-harness/context-architecture-v2.md +249 -0
- package/template/docs-harness/design-spec.md +437 -0
- package/template/docs-harness/memory-layer.md +135 -0
- package/template/docs-harness/retrospective-notes.md +204 -0
- package/template/gitignore +106 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Tool map — detail
|
|
2
|
+
|
|
3
|
+
> **Reference for `CLAUDE.md § Tool map`.** Loaded on demand when AI needs per-skill purpose, full subagent role inventory, hook trigger conditions, or memory mechanism details. The compact list in CLAUDE.md is the load-bearing index; this file is the elaboration.
|
|
4
|
+
|
|
5
|
+
## Bootstrap (not a skill; see top of CLAUDE.md)
|
|
6
|
+
|
|
7
|
+
Before any skill runs, the Bootstrap Status Check at the top of CLAUDE.md decides whether harness is configured. If not:
|
|
8
|
+
- **CCC mode**: CCC's bundled Step 1 driver runs (detects existing harness + 3-option menu + git clone)
|
|
9
|
+
- **Standalone mode**: `.harness/scripts/standalone-bootstrap.md` runs (same logic minus git clone, since user already cloned manually)
|
|
10
|
+
|
|
11
|
+
Both bootstrap paths converge on invoking `/init` to fill project-specific values.
|
|
12
|
+
|
|
13
|
+
## Slash commands & skills (`.harness/skills/`)
|
|
14
|
+
|
|
15
|
+
Each skill lives at `.harness/skills/<name>/SKILL.md`. Skills with a `description` are auto-discoverable and create the `/<name>` invocation.
|
|
16
|
+
|
|
17
|
+
Skills are invokable two ways:
|
|
18
|
+
|
|
19
|
+
- **Slash syntax**: `/<skill-name> <args>` (e.g., `/remember 这事很重要`). Forwarded via `.claude/commands/` shims to the actual skill at `.harness/skills/<name>/SKILL.md`.
|
|
20
|
+
- **Natural language**: phrases listed in each skill's `description` field will trigger the same skill (e.g., "记一下: 这事很重要" triggers /remember). See individual SKILL.md `description` for accepted phrases.
|
|
21
|
+
|
|
22
|
+
### Per-skill detail
|
|
23
|
+
|
|
24
|
+
- `/init` — **Step 2** of harness setup: fills L0/L1 slots interactively, writes `.harness/state/install.json` as the canonical "configured" marker. Re-runnable for re-configuration with `--force`. Does NOT run detection — bootstrap handles that before /init is invoked.
|
|
25
|
+
- `/next` — workflow state inspector: detects current feature progress and suggests next command. Doesn't auto-invoke; pure wayfinder. Use when unsure which skill to run.
|
|
26
|
+
- `/pickup` — session resume: reads `.harness/state/workflow-checkpoints/<feature>.json` and restores stage / artifact / progress state. Auto-surfaced at SessionStart if a checkpoint matches the current git branch. Use after multi-day breaks, cross-device work, or context-compaction loss.
|
|
27
|
+
- `/abandon` — mark a feature dead: moves checkpoint to `_archived/`, logs reason to decision-log. Does NOT touch git or source code (CEO's job). Use when CEO rejects a feature post-spec or when cleaning dormant features from `/pickup --list`.
|
|
28
|
+
- `/uninstall` — cleanly remove CCC-MAGI from the project. Detects whether a prior harness archive exists (`old_version_harness/` from bootstrap option 1); if so, offers to restore it. Preserves source code, `docs/features/*.md` specs, git history. Constitutional basis: § 3 (CEO Final Authority).
|
|
29
|
+
- `/feature-draft <name>` — stage 1, **new-feature mode**
|
|
30
|
+
- `/audit-spec <name>` — stage 1, **audit mode**
|
|
31
|
+
- `/spec-finalize <name>` — stage 2
|
|
32
|
+
- `/db-schema <name>` — stage 3 (skip if no backend)
|
|
33
|
+
- `/execution-plan <name>` — stage 4
|
|
34
|
+
- `/implement <name>` — stage 5
|
|
35
|
+
- `/test-fix` — stage 6 (skip if `test_required = false`)
|
|
36
|
+
- `/commit` — stage 8
|
|
37
|
+
- `/constitution-edit` — edit Section 2 / Section 3 / slot registry of constitution.md. Cannot modify Section 1 (Universal Core — harness-guaranteed invariants). Generates a versioned Sync Impact Report at the top of constitution.md (Spec-Kit-pattern audit trail).
|
|
38
|
+
- `/add-constitution-clause` — append to Section 3 of constitution (new project-specific red line)
|
|
39
|
+
- `/add-anti-flag` — grow the L2 anti-flag rules over time (in AGENTS.md)
|
|
40
|
+
- `/remember` — user-curated entry into Tier 2 memory (and Tier 1 shared `decisions.jsonl` for high-signal calls)
|
|
41
|
+
- `/recall <id|feature|tag>` / `/recall --deep <query>` — JIT body fetch from memory tiers
|
|
42
|
+
- `/handoff` — user-invoked at 95% context. Generates a rich 5-slot snapshot entry into Tier 2.
|
|
43
|
+
- `/offload <task>` — spawn fresh-context subagent for a sub-task at ~75% budget.
|
|
44
|
+
|
|
45
|
+
## Constitution versioning
|
|
46
|
+
|
|
47
|
+
`constitution.md` follows semver. Edits via `/constitution-edit` prepend a Sync Impact Report HTML comment at the top of the file documenting:
|
|
48
|
+
- Version bump (MAJOR / MINOR / PATCH)
|
|
49
|
+
- What changed in which section
|
|
50
|
+
- Downstream templates that may need review
|
|
51
|
+
|
|
52
|
+
Ad-hoc edits (raw `vim constitution.md`) skip the report. Use `/constitution-edit` for material changes — the audit trail is worth it.
|
|
53
|
+
|
|
54
|
+
Semver rules:
|
|
55
|
+
- **MAJOR** — removes / substantively changes an existing principle or slot
|
|
56
|
+
- **MINOR** — adds a new principle or slot
|
|
57
|
+
- **PATCH** — typo / clarification / non-semantic rewording
|
|
58
|
+
|
|
59
|
+
Section 1 (Universal Core) is harness-guaranteed and cannot be modified by `/constitution-edit`.
|
|
60
|
+
|
|
61
|
+
## Subagents (`.harness/agents/`)
|
|
62
|
+
|
|
63
|
+
Subagents enforce **mechanical rules only** — they do not exercise judgment, propose new patterns, or evaluate business logic. Judgment is MAGI Verdict's job; pattern proposals belong to MAGI Core; intent decisions are CEO's. A subagent finding always cites the rule source (a `CLAUDE.md` or rule file); if it can't, that's not a finding to report.
|
|
64
|
+
|
|
65
|
+
**Core MAGI positions (built-in):**
|
|
66
|
+
- **MAGI Planner** — Stage 1 + 4. Played by MAGI Core: turns CEO intent into a plain-language spec, then a per-file execution plan.
|
|
67
|
+
- **MAGI Programmer** — Stage 5. Played by MAGI Core: implements per the plan.
|
|
68
|
+
- **MAGI Tester** — Stage 6. Played by `test-fixer` subagent (fresh context, so it doesn't inherit Programmer's rationalizations).
|
|
69
|
+
- **MAGI Verdict** — Stages 2-6 + commit gate. Cross-model judgment auditor (default `{{auditor_model}}`). Single-engine fallback (fresh-context same-model) when no second model available.
|
|
70
|
+
- **MAGI Archivist** — Hook-triggered (SessionStart / PreCompaction). Memory layer service.
|
|
71
|
+
|
|
72
|
+
**MAGI Reviewer plugins** (`{{junior_reviewers}}` — user picks at /init):
|
|
73
|
+
<!-- ⟦L1⟧ Filled per project. Examples shipped: frontend-reviewer,
|
|
74
|
+
backend-reviewer, security-reviewer, infra-reviewer. User selects
|
|
75
|
+
which plugins to enable based on tech stack. -->
|
|
76
|
+
|
|
77
|
+
**Test programmer:**
|
|
78
|
+
- `test-fixer` — junior **programmer** (not reviewer): writes/edits test code from a fresh context. Spawned by `/test-fix`; does not exercise judgment about whether the test is right — that's the auditor's job in the post-fix audit.
|
|
79
|
+
|
|
80
|
+
## Hooks (`.harness/settings.json`)
|
|
81
|
+
|
|
82
|
+
Hooks are deterministic checks that run automatically.
|
|
83
|
+
|
|
84
|
+
- **Pre-commit typecheck** — blocks commit if static type/syntax check fails. Script: `scripts/precommit-typecheck.sh`.
|
|
85
|
+
- **Pre-commit lint bans** — blocks commit if anti-flag patterns are found. Script: `scripts/lint-bans.sh`.
|
|
86
|
+
- **Pre-commit cycles** — blocks commit if a dependency cycle is detected (enabled only if `dependency_flow` is non-empty). Script: `scripts/precommit-cycles.sh`.
|
|
87
|
+
- **Post-edit format** — runs the project's formatter on edited files. Script: `scripts/format-edit.sh`.
|
|
88
|
+
- **Budget pressure monitor** — `.harness/scripts/budget-monitor.sh` (UserPromptSubmit). Monitors transcript token usage (parses Anthropic `usage` field). **Auto-detects context budget from model** (v0.10.3+): `[1m]` suffix → 1M, standard `claude-*` → 200K, `gpt-4*` → 128K, others → 200K safe default. Override with `CCC_CONTEXT_BUDGET` env var. Emits `additionalContext` at 50% / 75% / 90% / 95% with detected model shown in each message; 95% surfaces a `/compact` / `/handoff` / continue menu. Advisory-only; can't force model switch (Claude Code doesn't expose runtime model switching to hooks). Silent under 50%.
|
|
89
|
+
|
|
90
|
+
> **Install-time registry**: `.harness/state/shipped-hashes.json` records SHA-256 of every file the installer shipped, so re-installs can content-hash-detect "user-modified" vs "unmodified" files and safely deliver harness updates without clobbering local changes.
|
|
91
|
+
|
|
92
|
+
## Memory layer (`.harness/memory/` + `.harness/state/scratchpad.md`) — v2 3-tier
|
|
93
|
+
|
|
94
|
+
> Full architectural rationale: `docs-harness/context-architecture-v2.md`.
|
|
95
|
+
|
|
96
|
+
Cross-session persistence in 3 tiers (Letta pattern):
|
|
97
|
+
|
|
98
|
+
| Tier | Location | Purpose | In-context at SessionStart? |
|
|
99
|
+
|---|---|---|---|
|
|
100
|
+
| **1 — Working** | `.harness/state/scratchpad.md` | Current objective + last/next step + blockers; rewritten every turn (Stop hook) | ✅ Always (~500 tokens) |
|
|
101
|
+
| **2 — Recall** | `.harness/memory/sessions/recall/*.jsonl` (`observations` + `snapshots`) | Last 30 days of decisions/failures/snapshots | ✅ Manifest only (~500-1000 tokens), bodies on demand |
|
|
102
|
+
| **3 — Archive** | `.harness/memory/sessions/archive/<YYYY-MM>.jsonl` | Older entries, cold storage | ❌ Never — only via `/recall --deep <query>` |
|
|
103
|
+
|
|
104
|
+
Shared (team, committed): `conventions.md` (long-form rules) + `decisions.jsonl` (`/remember` writes here).
|
|
105
|
+
|
|
106
|
+
Mechanisms:
|
|
107
|
+
|
|
108
|
+
- **`memory-archive.sh`** (SessionStart) — migrates Tier 2 entries >30 days into Tier 3. Back-fills `id` on legacy entries. Idempotent.
|
|
109
|
+
- **`memory-recall.sh`** (SessionStart) — emits a **manifest** of one-line index entries from Tier 2 (`[<id>] feature=<f> kind=<k> date=<YYYY-MM-DD> focus="<≤80 chars>"`). **Does NOT load entry bodies** — that requires `/recall <id>`.
|
|
110
|
+
- **`scratchpad-recall.sh`** (SessionStart) — reads `scratchpad.md`, injects as additionalContext.
|
|
111
|
+
- **`scratchpad-update.sh`** (Stop hook) — instructs AI to rewrite scratchpad at end of each turn.
|
|
112
|
+
- **`memory-snapshot.sh`** (PreCompaction) — deterministically harvests scratchpad + checkpoint + git status into a snapshot entry. **No LLM call** (was v1; now deprecated).
|
|
113
|
+
- **`/remember`** — user-curated entry into Tier 2 (and Tier 1 shared `decisions.jsonl` for high-signal calls).
|
|
114
|
+
- **`/handoff`** — user-invoked at 95% context. Generates a rich 5-slot snapshot entry into Tier 2.
|
|
115
|
+
- **`/recall <id|feature|tag>`** / **`/recall --deep <query>`** — JIT body fetch.
|
|
116
|
+
|
|
117
|
+
Token economics (v2):
|
|
118
|
+
- SessionStart cost: ~1-1.5K tokens regardless of project age (Tier 1 + Tier 2 manifest, bounded). v1's eager-injection often hit 2-5K.
|
|
119
|
+
- Per fetch: ~1-2K tokens (body load). Hard cap: ≤3 recall + ≤1 archive search per session.
|
|
120
|
+
- Net: same-or-cheaper than v1 in common cases; only more expensive in extreme-history-mining sessions, where the cost is justified.
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Workflow detail
|
|
2
|
+
|
|
3
|
+
> **Reference for `CLAUDE.md § Workflow`.** Loaded on demand when AI needs full stage internals, mode-vs-lane distinction, or cross-model audit operationalization. The compact summary in CLAUDE.md is the load-bearing version; this file is the elaboration.
|
|
4
|
+
|
|
5
|
+
## Two sides, three lanes (full picture)
|
|
6
|
+
|
|
7
|
+
The **CEO (you, human)** sets intent. The **MAGI System** (the AI team) implements + reviews — see `AGENTS.md § MAGI System` for the 7 positions. Concretely:
|
|
8
|
+
|
|
9
|
+
- **MAGI Core** (your primary CLI, e.g. Claude Code) — orchestrator + workflow manager. Talks to you. Spawns subagents.
|
|
10
|
+
- **MAGI Verdict** (default `{{auditor_model}}`, e.g. Codex) — cross-model auditor. **Judgment authority. Not under MAGI Core's chain of command** — independent reviewer per Universal Core.
|
|
11
|
+
- **MAGI Planner / Programmer / Tester** — played by MAGI Core during the matching stage (mode switch, not separate processes).
|
|
12
|
+
- **MAGI Reviewer** — `{{junior_reviewers}}` rule-enforcement plugins (backend / frontend / security). Mechanical. Cite rule source; never invent.
|
|
13
|
+
- **MAGI Archivist** — `memory-recall.sh` / `memory-snapshot.sh` hook services.
|
|
14
|
+
|
|
15
|
+
Judgment is MAGI Verdict's; rule enforcement is MAGI Reviewer's; orchestration is MAGI Core's; intent is yours.
|
|
16
|
+
|
|
17
|
+
## Two modes (Stage 1 branches)
|
|
18
|
+
|
|
19
|
+
The workflow runs in two **modes** that share Stages 2–9. Stage 1 differs by mode:
|
|
20
|
+
|
|
21
|
+
- **New-feature mode** — for shipping new features. Stage 1 paraphrases CEO intent, runs an 8-category edge-case round, then writes a plain-language spec.
|
|
22
|
+
- **Audit mode** — for verifying existing features. Stage 1 runs the same intent rounds, then a fresh general-purpose subagent scans the codebase for an as-built read; the auditor independently reviews; CEO decides each delta; output is the same two-file model.
|
|
23
|
+
|
|
24
|
+
Stage-specific tools are in `.harness/` — see `CLAUDE.md § Tool map`.
|
|
25
|
+
|
|
26
|
+
## Full 9-stage description
|
|
27
|
+
|
|
28
|
+
1. **Draft / as-built spec** — `/feature-draft <name>` (new-feature mode) **or** `/audit-spec <name>` (audit mode, fresh-context subagent + auditor review)
|
|
29
|
+
2. **Finalize spec** — `/spec-finalize <name>` (auditor final cross-check)
|
|
30
|
+
3. **Design schema** (when data model changes; **skip if project has no backend**) — `/db-schema <name>`
|
|
31
|
+
4. **Write execution plan** — `/execution-plan <name>` (per-file checklist + auditor judgment audit)
|
|
32
|
+
5. **Implement per plan** — `/implement <name>` (mechanical reviewer chain + auditor judgment)
|
|
33
|
+
6. **Auto tests** — `/test-fix` (test-fixer subagent + auditor audit). **Skipped if `test_required = false`.**
|
|
34
|
+
7. **User smoke test** — CEO runs the application manually against the spec's smoke-test procedures (`{{spec_dir}}<name>.md` only — implementation file not consulted). *Mandated by Constitution § 4.*
|
|
35
|
+
8. **Commit & push** — `/commit` using Conventional Commits, with affected scenario IDs in the message body. Plan file is deleted in this commit. Pushed to GitHub only after **both** the CEO smoke test (Stage 7) **and** the auditor audit have passed.
|
|
36
|
+
9. **Watch after release** — for any change shipped, check `{{error_tracker}}` within 24h for new error groups or a drop in error-free rate. If anything spiked, hotfix or roll back before moving on.
|
|
37
|
+
|
|
38
|
+
Do not reorder stages. Do not advance to the next stage until the current stage's artifact exists or the user has approved skipping. Stages may only be skipped via one of the two explicit lanes below.
|
|
39
|
+
|
|
40
|
+
## Cross-model audit (operationalizing Constitution § 1)
|
|
41
|
+
|
|
42
|
+
The constitutional invariant is in `./constitution.md § 1`. Below is how it is operationalized stage-by-stage:
|
|
43
|
+
|
|
44
|
+
- Audit strength scales with change size: full review on the standard lanes, BLOCKING-only on the trivial lane.
|
|
45
|
+
- The auditor is invoked at stages 2, 3, 4, 5, 6 (post-fix), and on every commit gate.
|
|
46
|
+
- The auditor emits JSON per `AGENTS.md § Verdict output`.
|
|
47
|
+
- `FAIL` halts the flow; `CONCERNS` advances with a logged warning (see `.harness/audits/concerns-*.json`); `PASS with advisory_items` advances silently; `WAIVED` is a CEO override and is rejected by the gate if any blocking item is `category: "universal-core"`.
|
|
48
|
+
|
|
49
|
+
## Lanes (full description)
|
|
50
|
+
|
|
51
|
+
A change picks one of three lanes; lane decisions are Tech-Lead inferred and CEO-confirmed (never silently auto-changed mid-flow).
|
|
52
|
+
|
|
53
|
+
**Full workflow.** New feature, intent change (audit delta), schema change, or new external dependency. All 9 stages.
|
|
54
|
+
|
|
55
|
+
**Stability-fix lane.** Bug fix or hotfix where intent is unchanged, no new feature surface, no schema change, no new dependency. Skip stages 1–3. **Failing test is mandatory** (if `test_required = true`) — write it before the fix, confirm it fails on the broken code, then fix and watch it go green. Path-based reviewer auto-fire on the diff (Stage 5) plus auditor audit on the fix correctness + test legitimacy (Stage 6).
|
|
56
|
+
|
|
57
|
+
**Trivial-change lane.** < 20 LOC, no new feature surface, no schema change, no new dependency, no intent change (typo, copy tweak, single-line bug fix, dependency bump). Skip stages 1–3. Stage 4 reduces to applying the change with path-based reviewer auto-fire; Stage 5 confirms existing tests still pass. Auditor runs in **Quick mode (BLOCKING-only)** — security, data loss, and outright defects only. Stage 7 (smoke) skipped only for pure copy/text/translation; spot-check for any logic change. If the auditor's Quick audit surfaces non-trivial concerns, the lane is wrong — surface to CEO and re-classify.
|
|
58
|
+
|
|
59
|
+
Knowing the lane in advance lets you triage a bug correctly: "panic-fix in 30min" vs. "plan for 48h with a workaround in the meantime."
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# `.harness/scripts/` — Starter Templates + Bootstrap Driver
|
|
2
|
+
|
|
3
|
+
This directory holds:
|
|
4
|
+
|
|
5
|
+
1. **Shell-script templates** that get copied into the user's project at `/init` time (target: `.harness/scripts/`). Each script is a **starter** — the user customizes the parts marked `# CUSTOMIZE:` based on their stack.
|
|
6
|
+
|
|
7
|
+
2. **Standalone bootstrap driver** (`standalone-bootstrap.md`) — an AI-instruction file that runs when the user has cloned CCC-MAGI from GitHub directly (no CCC), the first time they open a CLI in the project.
|
|
8
|
+
|
|
9
|
+
## File inventory
|
|
10
|
+
|
|
11
|
+
| File | Type | Used by | What it does | Required? |
|
|
12
|
+
|------|------|---------|--------------|-----------|
|
|
13
|
+
| `bootstrap-check.sh` | Shell hook | Claude/Codex `UserPromptSubmit` event | Fires on every user prompt. If `.harness/state/install.json` is missing AND `.harness/` is present, injects a directive telling Claude to read `standalone-bootstrap.md` and run the bootstrap flow BEFORE responding. Exits silently if install.json exists. | **Yes — load-bearing** |
|
|
14
|
+
| `standalone-bootstrap.md` | AI driver | CLAUDE.md Bootstrap Status Check + `bootstrap-check.sh` hook | Detects existing harness configs (using AI semantic judgment), presents 3-option menu, archives/deletes other configs, then invokes /init | **Yes — standalone path** |
|
|
15
|
+
| `auditor-gate.sh` | Shell | every audit-gated skill | Invokes the auditor CLI ({{auditor_model}}), parses JSON verdict, returns exit code 0 (PASS / CONCERNS / WAIVED — all advance) / 2 (FAIL — halt) / 1 (script error, Universal Core WAIVED rejected, missing waiver_reason, or legacy verdict). CONCERNS verdicts are logged to `.harness/audits/concerns-*.json`; WAIVED verdicts to `.harness/audits/waivers-*.json`. | **Yes — core** |
|
|
16
|
+
| `precommit-typecheck.sh` | Shell | Claude/Codex hooks (PreToolUse `git commit`) | Runs the project's typecheck before commit | Yes, customize per stack |
|
|
17
|
+
| `lint-bans.sh` | Shell | Claude/Codex hooks (PreToolUse `git commit`) | Greps staged diff for anti-flag patterns | Yes if `{{anti_flag_rules}}` non-empty |
|
|
18
|
+
| `precommit-cycles.sh` | Shell | Claude/Codex hooks (PreToolUse `git commit`) | Runs dependency-cycle check | Optional — only if `{{dependency_flow}}` is non-empty |
|
|
19
|
+
| `format-edit.sh` | Shell | Claude/Codex hooks (PostToolUse Edit\|Write) | Runs the project's formatter on the edited file | Yes, customize per stack |
|
|
20
|
+
| `post-migration.sh` | Shell | `/db-schema` skill (manual invocation) | Backend cache refresh + typed-bindings regeneration | Only if `{{backend_db_type}}` configured |
|
|
21
|
+
| `memory-recall.sh` | Shell hook | Claude/Codex `SessionStart` event | Reads `.harness/memory/observations.jsonl`, scores entries by relevance to the current git branch's feature, injects top-N entries into Claude's `additionalContext`. Silent no-op when memory file is missing or empty. | Yes if memory layer in use |
|
|
22
|
+
| `memory-snapshot.sh` | Shell hook | Claude/Codex `PreCompaction` event | Injects an instruction telling Claude to summarize the session's key decisions into `.harness/memory/observations.jsonl` BEFORE context compaction proceeds. Creates the memory directory/file on first run. | Yes if memory layer in use |
|
|
23
|
+
| `budget-monitor.sh` | Shell hook | Claude/Codex `UserPromptSubmit` event | Reads `transcript_path` from hook input, parses Anthropic-reported `usage` from the most recent assistant turn (falls back to byte/4 estimate). **Auto-detects context budget from transcript's `model` field** (v0.10.3+): `[1m]` suffix → 1M, standard `claude-*` → 200K, `gpt-4*` → 128K, others → 200K safe default. Override via `CCC_CONTEXT_BUDGET` env var. Emits advisory `additionalContext` at 50% / 75% / 90% / 95% with the detected model shown in each message. 95% triggers a deferred end-of-turn 3-option `/compact` / `/handoff` / continue menu. Silent under 50%. Advisory-only — Claude Code doesn't expose runtime model switching to hooks. | Yes (P1.6) |
|
|
24
|
+
|
|
25
|
+
## Why no harness-detect.sh anymore
|
|
26
|
+
|
|
27
|
+
Earlier versions had a `harness-detect.sh` shell script for detecting existing harness installations. It's been **removed** in favor of AI-driven detection inside `standalone-bootstrap.md` (and a parallel CCC-bundled driver on the CCC side).
|
|
28
|
+
|
|
29
|
+
Reasons:
|
|
30
|
+
- Shell-based detection can only match canonical markers (e.g., `.bmad-core/`, `.cursorrules`). Real-world projects often have ad-hoc AI config files (`agent.md`, `agent/harness.md`, etc.) that no static rule catches.
|
|
31
|
+
- AI can read file contents and make semantic judgments about what's harness-related.
|
|
32
|
+
- One uniform mechanism (AI judgment + user confirmation) is simpler than maintaining two layers (shell strict-match + AI fallback).
|
|
33
|
+
- See `CCC_harness_flow.md` § decision 1 for the architectural rationale.
|
|
34
|
+
|
|
35
|
+
## Bash / POSIX only (for shell scripts)
|
|
36
|
+
|
|
37
|
+
All shell scripts target **bash on macOS and Linux**. The harness-detect.sh removal also removed the last `declare -A` (bash 4+) dependency, so the remaining scripts are bash 3.2 compatible (macOS default).
|
|
38
|
+
|
|
39
|
+
## Customization pattern
|
|
40
|
+
|
|
41
|
+
Each shell script has a `# CUSTOMIZE:` block near the top. Edit that block, leave the rest alone:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
# CUSTOMIZE: pick your stack's typecheck command
|
|
45
|
+
# Examples:
|
|
46
|
+
# TypeScript: COMMAND=(npx tsc --noEmit)
|
|
47
|
+
# Python+mypy: COMMAND=(mypy .)
|
|
48
|
+
# Go: COMMAND=(go vet ./...)
|
|
49
|
+
COMMAND=(npx tsc --noEmit)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
`/init` may pre-fill some `CUSTOMIZE` blocks based on detected `tech_stack` — but you're always free to override.
|
|
53
|
+
|
|
54
|
+
## Permissions
|
|
55
|
+
|
|
56
|
+
After copying to your project, make sure shell scripts are executable:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
chmod +x .harness/scripts/*.sh
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
`/init` does this automatically.
|
|
63
|
+
|
|
64
|
+
## Failure mode
|
|
65
|
+
|
|
66
|
+
If a shell script fails (non-zero exit), the calling hook / skill halts. This is by design — broken hooks are infinitely better than silent skipping.
|
|
67
|
+
|
|
68
|
+
If you want to temporarily disable a hook, edit `.claude/settings.json` (or `.codex/hooks.json`) to remove the entry, OR make the script `exit 0` early. **Do not delete the script** — other skills may reference it.
|
|
69
|
+
|
|
70
|
+
For `standalone-bootstrap.md`: this file is read by AI, not executed. If you want to disable standalone-bootstrap behavior, edit the Bootstrap Status Check block at the top of `CLAUDE.md` to remove the "read standalone-bootstrap.md" instruction. (Not recommended — without it, new users get no guidance on existing-harness handling.)
|
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# auditor-gate.sh — invoke the auditor CLI on a target artifact, parse the structured
|
|
3
|
+
# verdict, persist the result, and exit with the gate's exit code.
|
|
4
|
+
#
|
|
5
|
+
# Constitution § 1 (cross-model audit is mandatory) — every audit-gated skill
|
|
6
|
+
# invokes this script. It is the load-bearing primitive for the harness.
|
|
7
|
+
#
|
|
8
|
+
# USAGE
|
|
9
|
+
# auditor-gate.sh review <feature> <stage> <focus-text> [target-file]
|
|
10
|
+
# auditor-gate.sh diagnostic <feature> <focus-text> [attempts-file]
|
|
11
|
+
#
|
|
12
|
+
# ENVIRONMENT
|
|
13
|
+
# AUDITOR_CLI — "codex" (default) | "claude" | "gemini" | "none"
|
|
14
|
+
# "none" = single-engine fallback (fresh-context same-model)
|
|
15
|
+
# AUDITOR_MODEL_ID — model version string (default: gpt-5.5 for codex)
|
|
16
|
+
# AUDITOR_GATE_PRESET — optional preset name (loaded from
|
|
17
|
+
# .harness/scripts/auditor-prompts/<preset>.md if exists)
|
|
18
|
+
# AUDITOR_GATE_TARGET_LABEL — optional human-readable label for the target
|
|
19
|
+
# AUDITOR_GATE_TARGET_MODE — "full" (default) | "diff" | "diff-against:<rev>"
|
|
20
|
+
# full = embed entire target file (legacy behavior)
|
|
21
|
+
# diff = embed `git diff HEAD -- <target>` (working-tree change)
|
|
22
|
+
# diff-against:<rev>= embed `git diff <rev> -- <target>`
|
|
23
|
+
# When diff is empty (untracked / no change), falls back to full.
|
|
24
|
+
# Use diff for code-change audits (Stage 5 implement) to cut input
|
|
25
|
+
# tokens 60-80%. Use full for artifact audits (specs / plans / schemas).
|
|
26
|
+
#
|
|
27
|
+
# PROMPT-CACHE NOTE
|
|
28
|
+
# The prompt is assembled as [PRESET_PREFIX → FOCUS → TARGET]. OpenAI / Anthropic both
|
|
29
|
+
# apply automatic prefix caching when consecutive calls share the same opening tokens.
|
|
30
|
+
# DO NOT reorder these three parts — putting the variable TARGET last keeps the
|
|
31
|
+
# stable prefix cacheable across calls within the 5-min TTL window.
|
|
32
|
+
#
|
|
33
|
+
# EXIT CODES
|
|
34
|
+
# 0 — PASS, CONCERNS, or WAIVED (all advance; caller reads JSON for nuance)
|
|
35
|
+
# 1 — script error (CLI not found, malformed output, IO failure, JSON validation,
|
|
36
|
+
# Universal Core WAIVED attempt, missing waiver_reason, legacy verdict, etc.)
|
|
37
|
+
# 2 — FAIL (halt)
|
|
38
|
+
#
|
|
39
|
+
# OUTPUT FILE
|
|
40
|
+
# review: .harness/state/auditor-approvals/<feature>-stage<N>.json
|
|
41
|
+
# diagnostic: .harness/state/auditor-approvals/<feature>-stage<N>-diagnostic.json
|
|
42
|
+
|
|
43
|
+
set -euo pipefail
|
|
44
|
+
|
|
45
|
+
# Ensure brew-installed tools (jq, etc.) are on PATH even in non-interactive
|
|
46
|
+
# shells where ~/.zprofile isn't loaded. macOS Apple Silicon path comes first.
|
|
47
|
+
export PATH="/opt/homebrew/bin:/usr/local/bin:$PATH"
|
|
48
|
+
|
|
49
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
50
|
+
# Args
|
|
51
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
52
|
+
MODE="${1:-}"
|
|
53
|
+
case "$MODE" in
|
|
54
|
+
review)
|
|
55
|
+
FEATURE="${2:?feature name required}"
|
|
56
|
+
STAGE="${3:?stage required}"
|
|
57
|
+
FOCUS="${4:?focus text required}"
|
|
58
|
+
TARGET="${5:-}"
|
|
59
|
+
OUTPUT_SUFFIX="stage${STAGE}"
|
|
60
|
+
;;
|
|
61
|
+
diagnostic)
|
|
62
|
+
FEATURE="${2:?feature name required}"
|
|
63
|
+
FOCUS="${3:?focus text required}"
|
|
64
|
+
TARGET="${4:-}"
|
|
65
|
+
STAGE="6" # diagnostic mode is always Stage 6 escalation
|
|
66
|
+
OUTPUT_SUFFIX="stage6-diagnostic"
|
|
67
|
+
;;
|
|
68
|
+
*)
|
|
69
|
+
echo "usage: $0 review <feature> <stage> <focus> [target]" >&2
|
|
70
|
+
echo " $0 diagnostic <feature> <focus> [attempts-file]" >&2
|
|
71
|
+
exit 1
|
|
72
|
+
;;
|
|
73
|
+
esac
|
|
74
|
+
|
|
75
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
76
|
+
# Resolve auditor + paths
|
|
77
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
78
|
+
AUDITOR_CLI="${AUDITOR_CLI:-codex}"
|
|
79
|
+
AUDITOR_MODEL_ID="${AUDITOR_MODEL_ID:-gpt-5.5}"
|
|
80
|
+
STATE_DIR=".harness/state/auditor-approvals"
|
|
81
|
+
mkdir -p "$STATE_DIR"
|
|
82
|
+
OUTPUT_FILE="$STATE_DIR/${FEATURE}-${OUTPUT_SUFFIX}.json"
|
|
83
|
+
LABEL="${AUDITOR_GATE_TARGET_LABEL:-${FEATURE} stage${STAGE}}"
|
|
84
|
+
|
|
85
|
+
# Load preset focus prefix if specified
|
|
86
|
+
PRESET_PREFIX=""
|
|
87
|
+
if [ -n "${AUDITOR_GATE_PRESET:-}" ]; then
|
|
88
|
+
PRESET_FILE=".harness/scripts/auditor-prompts/${AUDITOR_GATE_PRESET}.md"
|
|
89
|
+
if [ -f "$PRESET_FILE" ]; then
|
|
90
|
+
PRESET_PREFIX="$(cat "$PRESET_FILE")"$'\n\n'
|
|
91
|
+
else
|
|
92
|
+
echo "warning: preset file not found: $PRESET_FILE" >&2
|
|
93
|
+
fi
|
|
94
|
+
fi
|
|
95
|
+
|
|
96
|
+
FULL_PROMPT="${PRESET_PREFIX}${FOCUS}"
|
|
97
|
+
|
|
98
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
99
|
+
# Resolve TARGET content (full file / diff / diff-against:<rev>)
|
|
100
|
+
# Computed once here so both invoke_codex and invoke_claude_fresh share
|
|
101
|
+
# identical TARGET_BLOCK — keeps logic in one place + prompt cache stable.
|
|
102
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
103
|
+
TARGET_MODE="${AUDITOR_GATE_TARGET_MODE:-full}"
|
|
104
|
+
TARGET_BLOCK=""
|
|
105
|
+
|
|
106
|
+
resolve_target_block() {
|
|
107
|
+
[ -z "$TARGET" ] && return 0
|
|
108
|
+
|
|
109
|
+
case "$TARGET_MODE" in
|
|
110
|
+
full)
|
|
111
|
+
if [ -f "$TARGET" ]; then
|
|
112
|
+
TARGET_BLOCK=$'\n\n=== TARGET ===\n'"$(cat "$TARGET")"
|
|
113
|
+
fi
|
|
114
|
+
;;
|
|
115
|
+
diff)
|
|
116
|
+
# Working-tree diff for this file/path. Empty result = no staged or unstaged
|
|
117
|
+
# change → fall back to full so the auditor still has something to look at.
|
|
118
|
+
if command -v git >/dev/null 2>&1; then
|
|
119
|
+
local diff_text
|
|
120
|
+
diff_text="$(git diff HEAD -- "$TARGET" 2>/dev/null || true)"
|
|
121
|
+
if [ -n "$diff_text" ]; then
|
|
122
|
+
TARGET_BLOCK=$'\n\n=== TARGET (diff) ===\n'"$diff_text"
|
|
123
|
+
elif [ -f "$TARGET" ]; then
|
|
124
|
+
echo "info: AUDITOR_GATE_TARGET_MODE=diff returned empty for $TARGET — falling back to full file" >&2
|
|
125
|
+
TARGET_BLOCK=$'\n\n=== TARGET ===\n'"$(cat "$TARGET")"
|
|
126
|
+
fi
|
|
127
|
+
else
|
|
128
|
+
echo "warning: TARGET_MODE=diff requested but git not on PATH — falling back to full" >&2
|
|
129
|
+
[ -f "$TARGET" ] && TARGET_BLOCK=$'\n\n=== TARGET ===\n'"$(cat "$TARGET")"
|
|
130
|
+
fi
|
|
131
|
+
;;
|
|
132
|
+
diff-against:*)
|
|
133
|
+
local rev="${TARGET_MODE#diff-against:}"
|
|
134
|
+
if command -v git >/dev/null 2>&1; then
|
|
135
|
+
local diff_text
|
|
136
|
+
diff_text="$(git diff "$rev" -- "$TARGET" 2>/dev/null || true)"
|
|
137
|
+
if [ -n "$diff_text" ]; then
|
|
138
|
+
TARGET_BLOCK=$'\n\n=== TARGET (diff vs '"$rev"') ===\n'"$diff_text"
|
|
139
|
+
elif [ -f "$TARGET" ]; then
|
|
140
|
+
echo "info: AUDITOR_GATE_TARGET_MODE=diff-against:$rev returned empty — falling back to full file" >&2
|
|
141
|
+
TARGET_BLOCK=$'\n\n=== TARGET ===\n'"$(cat "$TARGET")"
|
|
142
|
+
fi
|
|
143
|
+
else
|
|
144
|
+
echo "warning: TARGET_MODE=diff-against requested but git not on PATH — falling back to full" >&2
|
|
145
|
+
[ -f "$TARGET" ] && TARGET_BLOCK=$'\n\n=== TARGET ===\n'"$(cat "$TARGET")"
|
|
146
|
+
fi
|
|
147
|
+
;;
|
|
148
|
+
*)
|
|
149
|
+
echo "warning: unknown AUDITOR_GATE_TARGET_MODE=$TARGET_MODE — falling back to full" >&2
|
|
150
|
+
[ -f "$TARGET" ] && TARGET_BLOCK=$'\n\n=== TARGET ===\n'"$(cat "$TARGET")"
|
|
151
|
+
;;
|
|
152
|
+
esac
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
resolve_target_block
|
|
156
|
+
|
|
157
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
158
|
+
# JSON output schema
|
|
159
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
160
|
+
read -r -d '' REVIEW_SCHEMA <<'JSON' || true
|
|
161
|
+
{
|
|
162
|
+
"type": "object",
|
|
163
|
+
"additionalProperties": false,
|
|
164
|
+
"required": ["verdict", "risk_score", "waiver_reason", "blocking_items", "advisory_items"],
|
|
165
|
+
"properties": {
|
|
166
|
+
"verdict": {"type": "string", "enum": ["PASS", "CONCERNS", "FAIL", "WAIVED"]},
|
|
167
|
+
"risk_score": {"type": "integer", "minimum": 0, "maximum": 10},
|
|
168
|
+
"waiver_reason": {"type": "string"},
|
|
169
|
+
"blocking_items": {
|
|
170
|
+
"type": "array",
|
|
171
|
+
"items": {
|
|
172
|
+
"type": "object",
|
|
173
|
+
"additionalProperties": false,
|
|
174
|
+
"required": ["category", "rule_source", "finding"],
|
|
175
|
+
"properties": {
|
|
176
|
+
"category": {"type": "string", "enum": ["universal-core", "strong", "advisory"]},
|
|
177
|
+
"rule_source": {"type": "string"},
|
|
178
|
+
"finding": {"type": "string"}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
},
|
|
182
|
+
"advisory_items": {
|
|
183
|
+
"type": "array",
|
|
184
|
+
"items": {
|
|
185
|
+
"type": "object",
|
|
186
|
+
"additionalProperties": false,
|
|
187
|
+
"required": ["rule_source", "finding"],
|
|
188
|
+
"properties": {
|
|
189
|
+
"rule_source": {"type": "string"},
|
|
190
|
+
"finding": {"type": "string"}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
JSON
|
|
197
|
+
|
|
198
|
+
read -r -d '' DIAGNOSTIC_SCHEMA <<'JSON' || true
|
|
199
|
+
{
|
|
200
|
+
"type": "object",
|
|
201
|
+
"additionalProperties": false,
|
|
202
|
+
"required": ["hypotheses"],
|
|
203
|
+
"properties": {
|
|
204
|
+
"hypotheses": {
|
|
205
|
+
"type": "array",
|
|
206
|
+
"items": {
|
|
207
|
+
"type": "object",
|
|
208
|
+
"additionalProperties": false,
|
|
209
|
+
"required": ["summary", "evidence", "next_step"],
|
|
210
|
+
"properties": {
|
|
211
|
+
"summary": {"type": "string"},
|
|
212
|
+
"evidence": {"type": "string"},
|
|
213
|
+
"next_step": {"type": "string"}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
JSON
|
|
220
|
+
|
|
221
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
222
|
+
# Invoke the auditor CLI
|
|
223
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
224
|
+
# Note: mktemp template without .json suffix — macOS BSD mktemp doesn't
|
|
225
|
+
# substitute X chars when a dot extension follows. The output file is JSON
|
|
226
|
+
# regardless; the extension doesn't matter for jq parsing.
|
|
227
|
+
TEMP_OUTPUT="$(mktemp /tmp/auditor-gate.XXXXXX)"
|
|
228
|
+
|
|
229
|
+
invoke_codex() {
|
|
230
|
+
# Codex CLI 0.130.0+ removed `--file <path>` support. Embed TARGET content
|
|
231
|
+
# in the prompt body instead (same pattern as invoke_claude_fresh).
|
|
232
|
+
# TARGET_BLOCK was resolved upstream by resolve_target_block (honors
|
|
233
|
+
# AUDITOR_GATE_TARGET_MODE = full / diff / diff-against:<rev>).
|
|
234
|
+
local schema_file="$(mktemp /tmp/schema.XXXXXX)"
|
|
235
|
+
if [ "$MODE" = "review" ]; then
|
|
236
|
+
echo "$REVIEW_SCHEMA" > "$schema_file"
|
|
237
|
+
else
|
|
238
|
+
echo "$DIAGNOSTIC_SCHEMA" > "$schema_file"
|
|
239
|
+
fi
|
|
240
|
+
|
|
241
|
+
local prompt_with_target="${FULL_PROMPT}${TARGET_BLOCK}"
|
|
242
|
+
|
|
243
|
+
codex exec \
|
|
244
|
+
--model "$AUDITOR_MODEL_ID" \
|
|
245
|
+
--output-schema "$schema_file" \
|
|
246
|
+
-- "$prompt_with_target" > "$TEMP_OUTPUT"
|
|
247
|
+
|
|
248
|
+
rm -f "$schema_file"
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
invoke_claude_fresh() {
|
|
252
|
+
# Single-engine fallback: invoke Claude with --output-format json in a fresh
|
|
253
|
+
# context (no prior session). Lower bias-cancellation but preserves discipline.
|
|
254
|
+
# TARGET_BLOCK was resolved upstream by resolve_target_block.
|
|
255
|
+
local schema_hint=""
|
|
256
|
+
if [ "$MODE" = "review" ]; then
|
|
257
|
+
schema_hint=$'\n\nRespond with JSON only matching schema: {"verdict": "PASS" | "CONCERNS" | "FAIL" | "WAIVED", "risk_score": 0-10, "waiver_reason": "string (required if WAIVED)", "blocking_items": [{"category": "universal-core" | "strong" | "advisory", "rule_source": "...", "finding": "..."}], "advisory_items": [{"rule_source": "...", "finding": "..."}]}'
|
|
258
|
+
else
|
|
259
|
+
schema_hint=$'\n\nRespond with JSON only matching schema: {"hypotheses": [{"summary": "...", "evidence": "...", "next_step": "..."}, ...]}'
|
|
260
|
+
fi
|
|
261
|
+
|
|
262
|
+
claude --output-format json --no-session -- "${FULL_PROMPT}${schema_hint}${TARGET_BLOCK}" > "$TEMP_OUTPUT"
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
invoke_none() {
|
|
266
|
+
# No auditor configured. Emit a structured "skipped" verdict and let the caller decide.
|
|
267
|
+
echo "warning: AUDITOR_CLI=none — emitting auto-PASS without verification" >&2
|
|
268
|
+
cat > "$TEMP_OUTPUT" <<JSON
|
|
269
|
+
{
|
|
270
|
+
"verdict": "PASS",
|
|
271
|
+
"risk_score": 0,
|
|
272
|
+
"blocking_items": [],
|
|
273
|
+
"advisory_items": [
|
|
274
|
+
{
|
|
275
|
+
"rule_source": "constitution.md § 1",
|
|
276
|
+
"finding": "auditor skipped (AUDITOR_CLI=none); single-engine fallback not engaged either — config error?"
|
|
277
|
+
}
|
|
278
|
+
]
|
|
279
|
+
}
|
|
280
|
+
JSON
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
case "$AUDITOR_CLI" in
|
|
284
|
+
codex) invoke_codex ;;
|
|
285
|
+
claude) invoke_claude_fresh ;;
|
|
286
|
+
none) invoke_none ;;
|
|
287
|
+
*)
|
|
288
|
+
echo "unknown AUDITOR_CLI: $AUDITOR_CLI" >&2
|
|
289
|
+
exit 1
|
|
290
|
+
;;
|
|
291
|
+
esac
|
|
292
|
+
|
|
293
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
294
|
+
# Parse + persist
|
|
295
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
296
|
+
if ! command -v jq >/dev/null 2>&1; then
|
|
297
|
+
echo "auditor-gate.sh requires jq. Install with: brew install jq" >&2
|
|
298
|
+
cp "$TEMP_OUTPUT" "$OUTPUT_FILE"
|
|
299
|
+
exit 1
|
|
300
|
+
fi
|
|
301
|
+
|
|
302
|
+
# Validate JSON
|
|
303
|
+
if ! jq empty "$TEMP_OUTPUT" 2>/dev/null; then
|
|
304
|
+
echo "auditor returned non-JSON output:" >&2
|
|
305
|
+
cat "$TEMP_OUTPUT" >&2
|
|
306
|
+
cp "$TEMP_OUTPUT" "$OUTPUT_FILE"
|
|
307
|
+
exit 1
|
|
308
|
+
fi
|
|
309
|
+
|
|
310
|
+
# Persist
|
|
311
|
+
mv "$TEMP_OUTPUT" "$OUTPUT_FILE"
|
|
312
|
+
|
|
313
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
314
|
+
# Exit on verdict
|
|
315
|
+
# ─────────────────────────────────────────────────────────────────────
|
|
316
|
+
if [ "$MODE" = "diagnostic" ]; then
|
|
317
|
+
# Diagnostic mode doesn't have a verdict — just exit 0 if hypotheses present.
|
|
318
|
+
HYPOTHESIS_COUNT=$(jq '.hypotheses | length' "$OUTPUT_FILE")
|
|
319
|
+
if [ "$HYPOTHESIS_COUNT" -gt 0 ]; then
|
|
320
|
+
echo "✓ Diagnostic complete: $HYPOTHESIS_COUNT hypothesis/hypotheses written to $OUTPUT_FILE"
|
|
321
|
+
exit 0
|
|
322
|
+
else
|
|
323
|
+
echo "✗ Diagnostic returned no hypotheses" >&2
|
|
324
|
+
exit 1
|
|
325
|
+
fi
|
|
326
|
+
fi
|
|
327
|
+
|
|
328
|
+
VERDICT=$(jq -r '.verdict' "$OUTPUT_FILE")
|
|
329
|
+
RISK_SCORE=$(jq -r '.risk_score // 0' "$OUTPUT_FILE")
|
|
330
|
+
BLOCKING_COUNT=$(jq '.blocking_items // [] | length' "$OUTPUT_FILE")
|
|
331
|
+
ADVISORY_COUNT=$(jq '.advisory_items // [] | length' "$OUTPUT_FILE")
|
|
332
|
+
|
|
333
|
+
case "$VERDICT" in
|
|
334
|
+
PASS)
|
|
335
|
+
echo "✓ ${LABEL}: PASS (risk_score=${RISK_SCORE})"
|
|
336
|
+
if [ "$ADVISORY_COUNT" -gt 0 ]; then
|
|
337
|
+
echo " (note: $ADVISORY_COUNT advisory item(s) in $OUTPUT_FILE)"
|
|
338
|
+
fi
|
|
339
|
+
exit 0
|
|
340
|
+
;;
|
|
341
|
+
CONCERNS)
|
|
342
|
+
# Log to .harness/audits/ for CEO review
|
|
343
|
+
AUDIT_DIR=".harness/audits"
|
|
344
|
+
mkdir -p "$AUDIT_DIR"
|
|
345
|
+
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
346
|
+
CONCERNS_FILE="$AUDIT_DIR/concerns-${FEATURE}-stage${STAGE}-${TIMESTAMP}.json"
|
|
347
|
+
cp "$OUTPUT_FILE" "$CONCERNS_FILE"
|
|
348
|
+
echo "⚠ ${LABEL}: CONCERNS (risk_score=${RISK_SCORE}, $BLOCKING_COUNT item(s); advancing)"
|
|
349
|
+
echo " Logged to: $CONCERNS_FILE"
|
|
350
|
+
echo " CEO should review before commit."
|
|
351
|
+
exit 0
|
|
352
|
+
;;
|
|
353
|
+
FAIL)
|
|
354
|
+
echo "✗ ${LABEL}: FAIL (risk_score=${RISK_SCORE}, $BLOCKING_COUNT blocking item(s))"
|
|
355
|
+
echo " See: $OUTPUT_FILE"
|
|
356
|
+
exit 2
|
|
357
|
+
;;
|
|
358
|
+
WAIVED)
|
|
359
|
+
# Verify no Universal Core items are being waived
|
|
360
|
+
UNIVERSAL_CORE_COUNT=$(jq '[.blocking_items[]? | select(.category == "universal-core")] | length' "$OUTPUT_FILE")
|
|
361
|
+
if [ "$UNIVERSAL_CORE_COUNT" -gt 0 ]; then
|
|
362
|
+
echo "✗ ${LABEL}: WAIVED rejected — $UNIVERSAL_CORE_COUNT Universal Core item(s) cannot be waived (constitution.md § 3 — CEO has final authority EXCEPT on Universal Core)" >&2
|
|
363
|
+
echo " See: $OUTPUT_FILE" >&2
|
|
364
|
+
exit 1
|
|
365
|
+
fi
|
|
366
|
+
WAIVER_REASON=$(jq -r '.waiver_reason // "(no reason given)"' "$OUTPUT_FILE")
|
|
367
|
+
if [ "$WAIVER_REASON" = "(no reason given)" ] || [ "$WAIVER_REASON" = "null" ]; then
|
|
368
|
+
echo "✗ ${LABEL}: WAIVED rejected — waiver_reason is required" >&2
|
|
369
|
+
exit 1
|
|
370
|
+
fi
|
|
371
|
+
AUDIT_DIR=".harness/audits"
|
|
372
|
+
mkdir -p "$AUDIT_DIR"
|
|
373
|
+
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
|
374
|
+
WAIVER_FILE="$AUDIT_DIR/waivers-${FEATURE}-stage${STAGE}-${TIMESTAMP}.json"
|
|
375
|
+
cp "$OUTPUT_FILE" "$WAIVER_FILE"
|
|
376
|
+
echo "⚠ ${LABEL}: WAIVED (advancing; reason: $WAIVER_REASON)"
|
|
377
|
+
echo " Logged to: $WAIVER_FILE"
|
|
378
|
+
exit 0
|
|
379
|
+
;;
|
|
380
|
+
APPROVE|"REQUEST CHANGES")
|
|
381
|
+
echo "auditor returned legacy verdict '$VERDICT' — schema migration incomplete. Update the auditor's prompt to use PASS/CONCERNS/FAIL/WAIVED." >&2
|
|
382
|
+
exit 1
|
|
383
|
+
;;
|
|
384
|
+
*)
|
|
385
|
+
echo "auditor returned unexpected verdict: $VERDICT" >&2
|
|
386
|
+
exit 1
|
|
387
|
+
;;
|
|
388
|
+
esac
|