@hallucination-studio/harness-engine 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +185 -27
  3. package/bin/install.js +29 -17
  4. package/package.json +10 -4
  5. package/skills/harness-engine/SKILL.md +97 -0
  6. package/skills/harness-engine/agents/openai.yaml +4 -0
  7. package/skills/harness-engine/evals/cases.json +94 -0
  8. package/skills/harness-engine/evals/harness_engine_evals/__init__.py +1 -0
  9. package/skills/harness-engine/evals/harness_engine_evals/cases_frontend.py +211 -0
  10. package/skills/harness-engine/evals/harness_engine_evals/cases_lifecycle.py +1616 -0
  11. package/skills/harness-engine/evals/harness_engine_evals/helpers.py +155 -0
  12. package/skills/harness-engine/evals/harness_engine_evals/registry.py +55 -0
  13. package/skills/harness-engine/evals/harness_engine_evals/report.py +36 -0
  14. package/skills/harness-engine/evals/harness_engine_evals/runner.py +53 -0
  15. package/skills/harness-engine/evals/run_evals.py +14 -0
  16. package/skills/{harness-repo-bootstrap → harness-engine}/references/evaluation-loop.md +8 -2
  17. package/skills/harness-engine/references/evidence-first-evals.md +187 -0
  18. package/skills/harness-engine/references/exec-plans.md +59 -0
  19. package/skills/{harness-repo-bootstrap → harness-engine}/references/file-map.md +3 -3
  20. package/skills/{harness-repo-bootstrap → harness-engine}/references/knowledge-capture.md +2 -2
  21. package/skills/{harness-repo-bootstrap → harness-engine}/references/sop-index.md +3 -0
  22. package/skills/harness-engine/references/template-policy.md +17 -0
  23. package/skills/harness-engine/references/workflow.md +62 -0
  24. package/skills/harness-engine/scripts/harness_engine/__init__.py +1 -0
  25. package/skills/harness-engine/scripts/harness_engine/analysis.py +240 -0
  26. package/skills/harness-engine/scripts/harness_engine/checks.py +287 -0
  27. package/skills/harness-engine/scripts/harness_engine/cli.py +656 -0
  28. package/skills/harness-engine/scripts/harness_engine/common.py +977 -0
  29. package/skills/harness-engine/scripts/harness_engine/continuation.py +520 -0
  30. package/skills/harness-engine/scripts/harness_engine/git_ops.py +88 -0
  31. package/skills/harness-engine/scripts/harness_engine/knowledge.py +329 -0
  32. package/skills/harness-engine/scripts/harness_engine/plans.py +630 -0
  33. package/skills/harness-engine/scripts/harness_engine/templates.py +124 -0
  34. package/skills/harness-engine/scripts/manage_harness.py +14 -0
  35. package/skills/harness-repo-bootstrap/SKILL.md +0 -68
  36. package/skills/harness-repo-bootstrap/agents/openai.yaml +0 -4
  37. package/skills/harness-repo-bootstrap/evals/cases.json +0 -18
  38. package/skills/harness-repo-bootstrap/evals/run_evals.py +0 -337
  39. package/skills/harness-repo-bootstrap/references/exec-plans.md +0 -39
  40. package/skills/harness-repo-bootstrap/references/template-policy.md +0 -12
  41. package/skills/harness-repo-bootstrap/references/workflow.md +0 -47
  42. package/skills/harness-repo-bootstrap/scripts/manage_harness.py +0 -1181
  43. /package/skills/{harness-repo-bootstrap → harness-engine}/assets/repo-template/.keep +0 -0
  44. /package/skills/{harness-repo-bootstrap → harness-engine}/assets/sops/.keep +0 -0
  45. /package/skills/{harness-repo-bootstrap → harness-engine}/references/question-catalog.md +0 -0
@@ -0,0 +1,124 @@
1
+ from .common import *
2
+
3
+ def make_default_answers(analysis):
4
+ repo_name = analysis["project_name"]
5
+ frameworks = ", ".join(analysis["frameworks"]) or "Unknown"
6
+ style_files = analysis.get("frontend_style_files") or []
7
+ style_file_summary = ", ".join(style_files) if style_files else "No shared style, theme, token, or component style files detected yet."
8
+ has_frontend = analysis["has_frontend"]
9
+ frontend_scope = (
10
+ "User-facing or operator-facing frontend work is expected."
11
+ if has_frontend
12
+ else "No clear frontend surface was detected yet. Update this if a UI emerges."
13
+ )
14
+ frontend_validation_loop = (
15
+ "- Run local UI changes in a browser.\n"
16
+ "- Check desktop and mobile layouts when relevant.\n"
17
+ "- Verify key flows, empty states, and failure states.\n"
18
+ "- Record reusable UI findings in `docs/design-docs/`."
19
+ if has_frontend
20
+ else "- Validate interface changes in the relevant local runtime.\n"
21
+ "- Verify key flows, empty states, failure states, and cleanup behavior where applicable.\n"
22
+ "- Record reusable interface findings in `docs/design-docs/`."
23
+ )
24
+ defaults = {
25
+ "project_name": repo_name,
26
+ "project_summary": f"Summarize the main outcome that {repo_name} should deliver.",
27
+ "primary_users": "Describe the primary users, operators, or internal teams.",
28
+ "deployment_targets": "Describe the main runtime or deployment targets.",
29
+ "product_domain": "Describe the product domain in one line.",
30
+ "reliability_targets": "Describe uptime, failure tolerance, recovery expectations, and required validation loops.",
31
+ "security_constraints": "Describe auth, secrets, compliance, sensitive data, and review constraints.",
32
+ "frontend_stack_notes": (
33
+ f"Detected frameworks: {frameworks}. Describe UX expectations, supported environments, and review rules."
34
+ if has_frontend
35
+ else "No frontend detected. Replace this if the repo includes UI work."
36
+ ),
37
+ "design_style_direction": (
38
+ "Describe the concrete visual direction before major UI work: reference point, mood, density, palette, typography, component shape, and hard don'ts."
39
+ if has_frontend
40
+ else "No frontend detected."
41
+ ),
42
+ "existing_frontend_style_notes": style_file_summary,
43
+ "quality_focus": "List the product areas and architectural layers that deserve the strictest quality bar.",
44
+ "frontend_scope": frontend_scope,
45
+ "frontend_validation_loop": frontend_validation_loop,
46
+ }
47
+ return defaults
48
+
49
+
50
+ def fill_template(template, answers, analysis):
51
+ merged = {}
52
+ merged.update(make_default_answers(analysis))
53
+ merged.update(answers)
54
+ merged.update(
55
+ {
56
+ "marker": MANAGED_MARKER,
57
+ "languages": ", ".join(analysis["languages"]) or "Unknown",
58
+ "package_managers": ", ".join(analysis["package_managers"]) or "Unknown",
59
+ "frameworks": ", ".join(analysis["frameworks"]) or "Unknown",
60
+ }
61
+ )
62
+ return template.format(**merged)
63
+
64
+
65
+ def ensure_parent(path):
66
+ path.parent.mkdir(parents=True, exist_ok=True)
67
+
68
+
69
+ def is_managed_text(text):
70
+ return text.startswith(MANAGED_MARKER) or (
71
+ text.startswith("---") and "\nsource: harness-engine-template\n" in text[:500]
72
+ )
73
+
74
+
75
+ def is_obsolete_managed_text(text):
76
+ return any(text.startswith(marker) for marker in OBSOLETE_MANAGED_MARKERS)
77
+
78
+
79
+ def is_harness_owned_text(text):
80
+ return is_managed_text(text) or is_obsolete_managed_text(text)
81
+
82
+
83
+ def should_write(path, refresh_managed, force):
84
+ if not path.exists():
85
+ return True
86
+ if force:
87
+ return True
88
+ try:
89
+ is_managed = is_harness_owned_text(path.read_text())
90
+ except UnicodeDecodeError:
91
+ return False
92
+ if refresh_managed and is_managed:
93
+ return True
94
+ return False
95
+
96
+
97
+ def write_scaffold(repo, analysis, answers, refresh_managed=False, force=False):
98
+ written = []
99
+ created = []
100
+ refreshed = []
101
+ skipped = []
102
+ all_templates = {}
103
+ all_templates.update(ROOT_FILES)
104
+ all_templates.update(DOC_FILES)
105
+ if analysis["has_frontend"]:
106
+ all_templates.update(FRONTEND_DOC_FILES)
107
+
108
+ for relative_path, template in all_templates.items():
109
+ target = repo / relative_path
110
+ existed = target.exists()
111
+ if should_write(target, refresh_managed, force):
112
+ ensure_parent(target)
113
+ content = fill_template(template, answers, analysis)
114
+ target.write_text(content)
115
+ written.append(relative_path)
116
+ if existed:
117
+ refreshed.append(relative_path)
118
+ else:
119
+ created.append(relative_path)
120
+ else:
121
+ skipped.append(relative_path)
122
+ return written, skipped, created, refreshed
123
+
124
+
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env python3
2
+
3
+ from pathlib import Path
4
+ import sys
5
+
6
+ SCRIPT_DIR = Path(__file__).resolve().parent
7
+ if str(SCRIPT_DIR) not in sys.path:
8
+ sys.path.insert(0, str(SCRIPT_DIR))
9
+
10
+ from harness_engine.cli import main
11
+
12
+
13
+ if __name__ == "__main__":
14
+ main()
@@ -1,68 +0,0 @@
1
- ---
2
- name: harness-repo-bootstrap
3
- description: Bootstrap or refresh an advanced harness-engineering repository shape for Codex-driven projects. Use when Codex needs to analyze a repository, ask the human to confirm high-impact product and architecture facts, and then create or update AGENTS.md, architecture docs, policy docs, plan folders, reference folders, and SOP-backed starter files for the repository.
4
- ---
5
-
6
- # Harness Repo Bootstrap
7
-
8
- Run the packaged script to inspect the target repository before editing files. Use the generated analysis to decide what to ask the human, what durable knowledge is missing from the repo, and which execution-plan and SOP files must be created or updated.
9
-
10
- ## Workflow
11
-
12
- 1. Run `python3 scripts/manage_harness.py analyze --repo <target-repo> --output <analysis.json>`.
13
- 2. Read `analysis.json`.
14
- 3. Ask the human only the unresolved, high-impact questions from `human_confirmations`.
15
- 4. Run `python3 scripts/manage_harness.py sample-answers --analysis <analysis.json> --output <answers.json>`.
16
- 5. Fill the placeholders in `answers.json` from the repository and the human's confirmed answers.
17
- 6. Run one of:
18
- - `python3 scripts/manage_harness.py init --repo <target-repo> --answers <answers.json>`
19
- - `python3 scripts/manage_harness.py update --repo <target-repo> --answers <answers.json>`
20
- 7. If the task is multi-step, run `python3 scripts/manage_harness.py plan-start --repo <target-repo> --slug <task-name> --goal "<goal>"`.
21
- 8. If you learn durable facts during the work, run `python3 scripts/manage_harness.py knowledge-log --repo <target-repo> --plan <plan-file> --fact "<fact>" --destination <durable-doc>` and keep the returned `id`.
22
- 9. Before closing the task, write those facts into their durable docs.
23
- 10. Run `python3 scripts/manage_harness.py knowledge-mark-written --repo <target-repo> --plan <plan-file> --id <knowledge-id> --evidence "<text already in durable doc>"`; use `--append` only when the exact fact should be appended mechanically.
24
- 11. Close the plan with `python3 scripts/manage_harness.py plan-close --repo <target-repo> --plan <plan-file> --summary "<summary>"`.
25
- 12. Before handoff, run `python3 .codex/skills/harness-repo-bootstrap/scripts/manage_harness.py check --repo <target-repo>` from an installed target repository.
26
- 13. After changing this skill, run `python3 evals/run_evals.py` and iterate until it passes.
27
-
28
- ## Reading Order
29
-
30
- - Read [references/workflow.md](references/workflow.md) first for the operating model and question policy.
31
- - Read [references/file-map.md](references/file-map.md) when deciding which generated file to update.
32
- - Read [references/question-catalog.md](references/question-catalog.md) when the analysis surfaces ambiguous product, security, reliability, or frontend facts.
33
- - Read [references/knowledge-capture.md](references/knowledge-capture.md) when you discover facts that should survive chat history.
34
- - Read [references/exec-plans.md](references/exec-plans.md) before planning or updating any multi-step work.
35
- - Read [references/sop-index.md](references/sop-index.md) to choose the right SOP for architecture, UI validation, observability, or knowledge capture work.
36
- - Read [references/template-policy.md](references/template-policy.md) before overwriting existing files.
37
- - Read [references/evaluation-loop.md](references/evaluation-loop.md) before changing the skill, templates, scripts, or policy references.
38
-
39
- ## Command Rules
40
-
41
- - Prefer `analyze` before `init` or `update`.
42
- - Prefer the draft, test, evaluate, iterate loop for changes to this skill.
43
- - Prefer `init` when the target repo has none of the managed files.
44
- - Prefer `update` when the repo already contains any managed file or a partial harness layout.
45
- - Do not overwrite existing files unless the human asked for it or you pass `--force`.
46
- - Treat the generated files as starting points. After generation, tighten them with repository-specific details instead of leaving placeholders behind.
47
- - Treat `docs/exec-plans/` as required state for multi-step work, not optional notes.
48
- - Treat `docs/sops/` as mechanical operating procedures, not background reading.
49
- - When you answer a question using facts that are not yet in the repo but should be reusable, write them into a durable doc before finishing.
50
- - Prefer `knowledge-mark-written --id ... --evidence ...` so durable docs can use natural wording instead of duplicated exact fact strings.
51
- - Use `plan-close` as the final guardrail so plan state and durable docs stay synchronized.
52
- - Use `check` as the local handoff guardrail for user repositories.
53
- - Run `python3 evals/run_evals.py` after skill changes and treat failures as iteration input.
54
- - Do not add CI to user repositories unless the human explicitly asks for it.
55
-
56
- ## Output Rules
57
-
58
- - Keep `AGENTS.md` short and routing-oriented.
59
- - Keep durable knowledge in repo docs, not in chat-only explanations.
60
- - Keep plans under `docs/exec-plans/active/` and move finished plans to `docs/exec-plans/completed/`.
61
- - Keep generated material under `docs/generated/`.
62
- - Keep external, model-friendly references under `docs/references/`.
63
- - Keep SOPs explicit and task-triggered so the next agent can follow the same path mechanically.
64
-
65
- ## Assets
66
-
67
- - Scaffold templates live under [assets/repo-template](assets/repo-template).
68
- - SOP starter docs live under [assets/sops](assets/sops).
@@ -1,4 +0,0 @@
1
- interface:
2
- display_name: "Harness Repo Bootstrap"
3
- short_description: "Scaffold advanced Codex harness docs"
4
- default_prompt: "Use $harness-repo-bootstrap to analyze this repository and scaffold or refresh its advanced harness documentation."
@@ -1,18 +0,0 @@
1
- [
2
- {
3
- "id": "empty-repo-init",
4
- "description": "Empty repositories should receive the full advanced harness scaffold."
5
- },
6
- {
7
- "id": "frontend-analysis",
8
- "description": "Frontend repositories should trigger frontend-specific confirmation and policy output."
9
- },
10
- {
11
- "id": "closed-loop-plan",
12
- "description": "Execution plans should refuse to close until durable knowledge is written back."
13
- },
14
- {
15
- "id": "preserve-unmanaged-docs",
16
- "description": "Existing user-owned harness files should be skipped unless explicitly forced."
17
- }
18
- ]
@@ -1,337 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- import json
4
- import subprocess
5
- import sys
6
- import tempfile
7
- from pathlib import Path
8
-
9
- SKILL_DIR = Path(__file__).resolve().parents[1]
10
- MANAGER = SKILL_DIR / "scripts" / "manage_harness.py"
11
-
12
-
13
- def run_manager(*args, expect_success=True):
14
- result = subprocess.run(
15
- [sys.executable, str(MANAGER), *args],
16
- text=True,
17
- capture_output=True,
18
- check=False,
19
- )
20
- if expect_success and result.returncode != 0:
21
- raise AssertionError(result.stderr or result.stdout)
22
- if not expect_success and result.returncode == 0:
23
- raise AssertionError("Command succeeded unexpectedly")
24
- if result.stdout.strip():
25
- return json.loads(result.stdout)
26
- return {}
27
-
28
-
29
- def write_answers(path, project_name="demo"):
30
- answers = {
31
- "project_name": project_name,
32
- "project_summary": "A developer tooling project used to install and maintain Codex harness docs.",
33
- "primary_users": "Codex users and maintainers",
34
- "deployment_targets": "npm package and local repositories",
35
- "product_domain": "developer tooling",
36
- "reliability_targets": "Repeatable local commands and safe update behavior",
37
- "security_constraints": "Do not write secrets or overwrite user-owned docs without consent",
38
- "frontend_stack_notes": "Frontend changes require browser validation when a UI is detected",
39
- "quality_focus": "installer behavior, generated docs, plan closure, and knowledge capture",
40
- "frontend_scope": "No frontend unless one is detected by analysis",
41
- }
42
- path.write_text(json.dumps(answers, indent=2) + "\n")
43
-
44
-
45
- def assert_exists(repo, relative_path):
46
- path = repo / relative_path
47
- if not path.exists():
48
- raise AssertionError(f"Expected {relative_path} to exist")
49
-
50
-
51
- def assert_contains(repo, relative_path, needle):
52
- text = (repo / relative_path).read_text()
53
- if needle not in text:
54
- raise AssertionError(f"Expected {relative_path} to contain {needle!r}")
55
-
56
-
57
- def test_empty_repo_init(tmp_root):
58
- repo = tmp_root / "empty-repo"
59
- repo.mkdir()
60
- answers = tmp_root / "answers.json"
61
- write_answers(answers)
62
-
63
- analysis = run_manager("analyze", "--repo", str(repo))
64
- if analysis["recommended_action"] != "init":
65
- raise AssertionError("Empty repo should recommend init")
66
- if not analysis["missing_exec_plan_state"]:
67
- raise AssertionError("Analysis should report missing exec-plan state")
68
- if not analysis["missing_sops"]:
69
- raise AssertionError("Analysis should report missing SOPs")
70
-
71
- run_manager("init", "--repo", str(repo), "--answers", str(answers))
72
- for relative_path in [
73
- "AGENTS.md",
74
- "ARCHITECTURE.md",
75
- "docs/PLANS.md",
76
- "docs/QUALITY_SCORE.md",
77
- "docs/exec-plans/active/_template.md",
78
- "docs/exec-plans/completed/README.md",
79
- "docs/sops/encode-unseen-knowledge.md",
80
- ]:
81
- assert_exists(repo, relative_path)
82
- assert_contains(repo, "AGENTS.md", "docs/exec-plans/active/")
83
- assert_contains(repo, "AGENTS.md", "docs/sops/")
84
- assert_contains(repo, "AGENTS.md", ".codex/skills/harness-repo-bootstrap/scripts/manage_harness.py check")
85
-
86
-
87
- def test_frontend_analysis(tmp_root):
88
- repo = tmp_root / "frontend-repo"
89
- repo.mkdir()
90
- (repo / "package.json").write_text(
91
- json.dumps(
92
- {
93
- "dependencies": {
94
- "react": "^19.0.0",
95
- "vite": "^6.0.0",
96
- }
97
- },
98
- indent=2,
99
- )
100
- + "\n"
101
- )
102
- (repo / "src").mkdir()
103
- (repo / "src" / "App.tsx").write_text("export default function App() { return null; }\n")
104
-
105
- analysis = run_manager("analyze", "--repo", str(repo))
106
- question_ids = {item["id"] for item in analysis["human_confirmations"]}
107
- if not analysis["has_frontend"]:
108
- raise AssertionError("Frontend repo should be detected")
109
- if "frontend_stack_notes" not in question_ids:
110
- raise AssertionError("Frontend repo should ask frontend confirmation questions")
111
- if "React" not in analysis["frameworks"]:
112
- raise AssertionError("React should be detected")
113
-
114
-
115
- def test_closed_loop_plan(tmp_root):
116
- repo = tmp_root / "loop-repo"
117
- repo.mkdir()
118
- (repo / "snake.sh").write_text("#!/usr/bin/env bash\nprintf 'snake\\n'\n")
119
- (repo / ".codex" / "skills" / "demo" / "scripts").mkdir(parents=True)
120
- (repo / ".codex" / "skills" / "demo" / "scripts" / "tool.py").write_text("print('ignore me')\n")
121
- answers = tmp_root / "loop-answers.json"
122
- write_answers(answers, project_name="loop-demo")
123
- analysis = run_manager("analyze", "--repo", str(repo))
124
- if "Shell" not in analysis["languages"]:
125
- raise AssertionError("Shell should be detected from target project files")
126
- if "Python" in analysis["languages"]:
127
- raise AssertionError(".codex skill files should not affect target project language detection")
128
- run_manager("init", "--repo", str(repo), "--answers", str(answers))
129
-
130
- plan_result = run_manager(
131
- "plan-start",
132
- "--repo",
133
- str(repo),
134
- "--slug",
135
- "knowledge-loop",
136
- "--goal",
137
- "Validate durable knowledge closure",
138
- )
139
- plan_path = Path(plan_result["plan"])
140
- relative_plan = str(plan_path.resolve().relative_to(repo.resolve()))
141
- fact = "Install mode must distinguish local and global skill destinations"
142
- run_manager(
143
- "knowledge-log",
144
- "--repo",
145
- str(repo),
146
- "--plan",
147
- relative_plan,
148
- "--fact",
149
- fact,
150
- "--destination",
151
- "docs/PRODUCT_SENSE.md",
152
- )
153
- run_manager(
154
- "plan-close",
155
- "--repo",
156
- str(repo),
157
- "--plan",
158
- relative_plan,
159
- "--summary",
160
- "done",
161
- expect_success=False,
162
- )
163
- run_manager(
164
- "knowledge-mark-written",
165
- "--repo",
166
- str(repo),
167
- "--plan",
168
- relative_plan,
169
- "--fact",
170
- fact,
171
- "--destination",
172
- "docs/PRODUCT_SENSE.md",
173
- expect_success=False,
174
- )
175
- run_manager(
176
- "knowledge-mark-written",
177
- "--repo",
178
- str(repo),
179
- "--plan",
180
- relative_plan,
181
- "--fact",
182
- fact,
183
- "--destination",
184
- "docs/PRODUCT_SENSE.md",
185
- "--append",
186
- )
187
- assert_contains(repo, "docs/PRODUCT_SENSE.md", fact)
188
- close_result = run_manager(
189
- "plan-close",
190
- "--repo",
191
- str(repo),
192
- "--plan",
193
- relative_plan,
194
- "--summary",
195
- "Closed after writing durable knowledge.",
196
- )
197
- if close_result["status"] != "closed":
198
- raise AssertionError("Plan should close after knowledge is marked written")
199
- if plan_path.exists():
200
- raise AssertionError("Active plan should be moved after close")
201
- assert_exists(repo, "docs/exec-plans/completed/" + plan_path.name)
202
- check_result = run_manager("check", "--repo", str(repo))
203
- if check_result["status"] != "pass":
204
- raise AssertionError("Harness check should pass after plan closure")
205
-
206
- formatted_plan = create_formatted_plan(repo)
207
- formatted_relative_plan = str(formatted_plan.resolve().relative_to(repo.resolve()))
208
- formatted_fact = "snake.sh is the single runtime entrypoint and owns terminal control directly with stty and tput"
209
- with (repo / "ARCHITECTURE.md").open("a") as handle:
210
- handle.write("\n`snake.sh` is the single runtime entrypoint and owns terminal control directly with `stty` and `tput`.\n")
211
- run_manager(
212
- "knowledge-mark-written",
213
- "--repo",
214
- str(repo),
215
- "--plan",
216
- formatted_relative_plan,
217
- "--fact",
218
- formatted_fact,
219
- "--destination",
220
- "ARCHITECTURE.md",
221
- )
222
-
223
- id_plan_result = run_manager(
224
- "plan-start",
225
- "--repo",
226
- str(repo),
227
- "--slug",
228
- "id-knowledge-loop",
229
- "--goal",
230
- "Validate id-based durable knowledge closure",
231
- )
232
- id_plan_path = Path(id_plan_result["plan"])
233
- id_relative_plan = str(id_plan_path.resolve().relative_to(repo.resolve()))
234
- id_fact = "Runtime input is owned by the terminal runner and core game logic remains independent of terminal packages"
235
- log_result = run_manager(
236
- "knowledge-log",
237
- "--repo",
238
- str(repo),
239
- "--plan",
240
- id_relative_plan,
241
- "--fact",
242
- id_fact,
243
- "--destination",
244
- "ARCHITECTURE.md",
245
- )
246
- with (repo / "ARCHITECTURE.md").open("a") as handle:
247
- handle.write(
248
- "\nThe `main` package owns keyboard input and rendering, while `game` contains pure state transitions.\n"
249
- )
250
- run_manager(
251
- "knowledge-mark-written",
252
- "--repo",
253
- str(repo),
254
- "--plan",
255
- id_relative_plan,
256
- "--id",
257
- log_result["id"],
258
- "--evidence",
259
- "main package owns keyboard input and rendering",
260
- )
261
- plan_text = id_plan_path.read_text()
262
- if id_fact in (repo / "ARCHITECTURE.md").read_text():
263
- raise AssertionError("Id/evidence closure should not require appending the exact fact to the destination")
264
- if "| evidence: main package owns keyboard input and rendering" not in plan_text:
265
- raise AssertionError("Closed knowledge item should record the verification evidence")
266
- run_manager(
267
- "plan-close",
268
- "--repo",
269
- str(repo),
270
- "--plan",
271
- id_relative_plan,
272
- "--summary",
273
- "Closed with id-based evidence.",
274
- )
275
-
276
-
277
- def create_formatted_plan(repo):
278
- plan_path = repo / "docs" / "exec-plans" / "active" / "formatted-plan.md"
279
- plan_path.write_text(
280
- """# Execution Plan: Formatted Plan
281
-
282
- ## Durable Knowledge To Capture
283
-
284
- - [ ] `snake.sh` is the single runtime entrypoint and owns terminal control directly with `stty` and `tput`. -> `ARCHITECTURE.md`
285
- """
286
- )
287
- return plan_path
288
-
289
-
290
- def test_preserve_unmanaged_docs(tmp_root):
291
- repo = tmp_root / "partial-repo"
292
- repo.mkdir()
293
- (repo / "AGENTS.md").write_text("# Existing user router\n\nKeep this custom content.\n")
294
- answers = tmp_root / "partial-answers.json"
295
- write_answers(answers)
296
-
297
- result = run_manager("init", "--repo", str(repo), "--answers", str(answers))
298
- if "AGENTS.md" not in result["skipped"]:
299
- raise AssertionError("Unmanaged AGENTS.md should be skipped")
300
- assert_contains(repo, "AGENTS.md", "Keep this custom content.")
301
- assert_exists(repo, "docs/PLANS.md")
302
-
303
-
304
- EVALS = [
305
- ("empty-repo-init", test_empty_repo_init),
306
- ("frontend-analysis", test_frontend_analysis),
307
- ("closed-loop-plan", test_closed_loop_plan),
308
- ("preserve-unmanaged-docs", test_preserve_unmanaged_docs),
309
- ]
310
-
311
-
312
- def main():
313
- results = []
314
- with tempfile.TemporaryDirectory() as tmp:
315
- tmp_root = Path(tmp)
316
- for eval_id, test_func in EVALS:
317
- try:
318
- test_func(tmp_root)
319
- results.append({"id": eval_id, "status": "pass"})
320
- except Exception as error:
321
- results.append({"id": eval_id, "status": "fail", "error": str(error)})
322
-
323
- passed = sum(1 for result in results if result["status"] == "pass")
324
- total = len(results)
325
- report = {
326
- "score": round((passed / total) * 100),
327
- "passed": passed,
328
- "total": total,
329
- "results": results,
330
- }
331
- print(json.dumps(report, indent=2) + "\n")
332
- if passed != total:
333
- sys.exit(1)
334
-
335
-
336
- if __name__ == "__main__":
337
- main()
@@ -1,39 +0,0 @@
1
- # Execution Plans
2
-
3
- Execution plans are required for multi-step work, risky changes, or tasks that need coordination across files.
4
-
5
- ## When To Create One
6
-
7
- - more than one implementation step is required
8
- - validation is non-trivial
9
- - architecture, product, reliability, or security decisions are involved
10
- - work will span enough time that another agent may resume it later
11
-
12
- ## Location
13
-
14
- - Active: `docs/exec-plans/active/`
15
- - Completed: `docs/exec-plans/completed/`
16
-
17
- ## Minimum Sections
18
-
19
- - goal
20
- - scope
21
- - constraints
22
- - steps
23
- - validation
24
- - durable knowledge to capture
25
- - completion notes
26
-
27
- ## Operating Rule
28
-
29
- Update the active plan during the work. When the work is done, move it to `completed` and leave behind any durable facts in the right permanent docs.
30
-
31
- ## Closed Loop
32
-
33
- Use the script, not ad hoc manual edits, for the lifecycle:
34
-
35
- - `plan-start`: create a new active execution plan
36
- - `knowledge-log`: append a durable fact that still needs to be written into permanent docs and return its stable id
37
- - `knowledge-mark-written`: verify and mark a logged fact as written into its permanent doc; prefer `--id <knowledge-id> --evidence "<doc text>"`, and use `--append` only to append the exact fact first
38
- - `plan-close`: refuse to close cleanly until the listed knowledge items are marked as written to durable docs
39
- - `check`: run a local handoff check without requiring target-repo CI
@@ -1,12 +0,0 @@
1
- # Template Policy
2
-
3
- Every generated file starts with a managed marker:
4
-
5
- `<!-- harness-repo-bootstrap:managed -->`
6
-
7
- Update behavior:
8
-
9
- - `init`: create missing files and skip existing files unless `--force`
10
- - `update`: create missing files, skip existing unmanaged files, and refresh managed files only when `--refresh-managed` or `--force` is passed
11
-
12
- If a file exists without the managed marker, treat it as user-owned unless the human explicitly asks to replace it.
@@ -1,47 +0,0 @@
1
- # Workflow
2
-
3
- Use this skill in two passes.
4
-
5
- ## Pass 1: Analyze and Confirm
6
-
7
- Run `analyze` before editing repository docs.
8
-
9
- Ask the human only about facts that cannot be derived safely from the repo, especially:
10
-
11
- - product domain and top-level outcomes
12
- - intended users or operators
13
- - production reliability expectations
14
- - security or compliance constraints
15
- - frontend experience bar
16
- - canonical external references worth pinning inside `docs/references/`
17
-
18
- Do not ask for facts that can be inferred from source layout, dependency manifests, or existing docs.
19
-
20
- Also inspect the analysis for:
21
-
22
- - missing durable knowledge that should be written during the task
23
- - missing execution-plan state
24
- - which SOPs should be referenced in the generated router docs
25
-
26
- ## Pass 2: Scaffold or Refresh
27
-
28
- Run `sample-answers`, fill the answers, then run `init` or `update`.
29
-
30
- Use `init` for first-time adoption.
31
- Use `update` to add missing managed files or refresh managed files when `--refresh-managed` is passed.
32
-
33
- After the script runs, read the generated docs once and tighten weak generic phrases before handing off.
34
-
35
- ## Ongoing Use
36
-
37
- After the scaffold exists:
38
-
39
- - create an execution plan before multi-step work
40
- - use `plan-start` instead of creating plan files manually when possible
41
- - log durable facts during execution instead of waiting until the end
42
- - follow the matching SOP for architecture, UI, observability, or knowledge capture work
43
- - encode durable knowledge back into the repository before closing the task
44
- - mark logged knowledge items as written after updating the permanent docs
45
- - use `plan-close` to verify no durable knowledge is left stranded in the active plan
46
- - run `.codex/skills/harness-repo-bootstrap/scripts/manage_harness.py check --repo <target-repo>` before handoff
47
- - do not add CI to the target repository unless the human explicitly asks for it