@hallucination-studio/harness-engine 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +185 -27
  3. package/bin/install.js +29 -17
  4. package/package.json +10 -4
  5. package/skills/harness-engine/SKILL.md +97 -0
  6. package/skills/harness-engine/agents/openai.yaml +4 -0
  7. package/skills/harness-engine/evals/cases.json +94 -0
  8. package/skills/harness-engine/evals/harness_engine_evals/__init__.py +1 -0
  9. package/skills/harness-engine/evals/harness_engine_evals/cases_frontend.py +211 -0
  10. package/skills/harness-engine/evals/harness_engine_evals/cases_lifecycle.py +1616 -0
  11. package/skills/harness-engine/evals/harness_engine_evals/helpers.py +155 -0
  12. package/skills/harness-engine/evals/harness_engine_evals/registry.py +55 -0
  13. package/skills/harness-engine/evals/harness_engine_evals/report.py +36 -0
  14. package/skills/harness-engine/evals/harness_engine_evals/runner.py +53 -0
  15. package/skills/harness-engine/evals/run_evals.py +14 -0
  16. package/skills/{harness-repo-bootstrap → harness-engine}/references/evaluation-loop.md +8 -2
  17. package/skills/harness-engine/references/evidence-first-evals.md +187 -0
  18. package/skills/harness-engine/references/exec-plans.md +59 -0
  19. package/skills/{harness-repo-bootstrap → harness-engine}/references/file-map.md +3 -3
  20. package/skills/{harness-repo-bootstrap → harness-engine}/references/knowledge-capture.md +2 -2
  21. package/skills/{harness-repo-bootstrap → harness-engine}/references/sop-index.md +3 -0
  22. package/skills/harness-engine/references/template-policy.md +17 -0
  23. package/skills/harness-engine/references/workflow.md +62 -0
  24. package/skills/harness-engine/scripts/harness_engine/__init__.py +1 -0
  25. package/skills/harness-engine/scripts/harness_engine/analysis.py +240 -0
  26. package/skills/harness-engine/scripts/harness_engine/checks.py +287 -0
  27. package/skills/harness-engine/scripts/harness_engine/cli.py +656 -0
  28. package/skills/harness-engine/scripts/harness_engine/common.py +977 -0
  29. package/skills/harness-engine/scripts/harness_engine/continuation.py +520 -0
  30. package/skills/harness-engine/scripts/harness_engine/git_ops.py +88 -0
  31. package/skills/harness-engine/scripts/harness_engine/knowledge.py +329 -0
  32. package/skills/harness-engine/scripts/harness_engine/plans.py +630 -0
  33. package/skills/harness-engine/scripts/harness_engine/templates.py +124 -0
  34. package/skills/harness-engine/scripts/manage_harness.py +14 -0
  35. package/skills/harness-repo-bootstrap/SKILL.md +0 -68
  36. package/skills/harness-repo-bootstrap/agents/openai.yaml +0 -4
  37. package/skills/harness-repo-bootstrap/evals/cases.json +0 -18
  38. package/skills/harness-repo-bootstrap/evals/run_evals.py +0 -337
  39. package/skills/harness-repo-bootstrap/references/exec-plans.md +0 -39
  40. package/skills/harness-repo-bootstrap/references/template-policy.md +0 -12
  41. package/skills/harness-repo-bootstrap/references/workflow.md +0 -47
  42. package/skills/harness-repo-bootstrap/scripts/manage_harness.py +0 -1181
  43. /package/skills/{harness-repo-bootstrap → harness-engine}/assets/repo-template/.keep +0 -0
  44. /package/skills/{harness-repo-bootstrap → harness-engine}/assets/sops/.keep +0 -0
  45. /package/skills/{harness-repo-bootstrap → harness-engine}/references/question-catalog.md +0 -0
@@ -0,0 +1,630 @@
1
+ from .common import *
2
+ from .templates import DEFAULT_DEFECT_PLACEHOLDER, DEFAULT_KNOWLEDGE_PLACEHOLDER, PLAN_TEMPLATE, ensure_parent
3
+
4
+ def slugify(value):
5
+ normalized = re.sub(r"[^a-z0-9]+", "-", value.strip().lower()).strip("-")
6
+ return normalized or "task"
7
+
8
+
9
+ def utc_now_iso():
10
+ return datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
11
+
12
+
13
+ def find_section(lines, heading):
14
+ target = heading.strip().lower()
15
+ for index, line in enumerate(lines):
16
+ if line.strip().lower() == target:
17
+ return index
18
+ return None
19
+
20
+
21
+ def sidecar_path_for_plan(plan_path):
22
+ return plan_path.with_suffix(".json")
23
+
24
+
25
+ def plan_id_for_path(plan_path):
26
+ digest = hashlib.sha1(str(plan_path.name).encode()).hexdigest()
27
+ return f"plan-{digest[:10]}"
28
+
29
+
30
+ def empty_acceptance_criteria():
31
+ return {key: "" for key, _ in QUALITY_DIMENSIONS}
32
+
33
+
34
+ def criteria_fingerprint(criteria):
35
+ normalized = {
36
+ key: re.sub(r"\s+", " ", (criteria.get(key) or "").strip())
37
+ for key, _ in QUALITY_DIMENSIONS
38
+ }
39
+ payload = json.dumps(normalized, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
40
+ return hashlib.sha256(payload.encode()).hexdigest()[:16]
41
+
42
+
43
+ def new_plan_state(plan_path, goal):
44
+ now = utc_now_iso()
45
+ return {
46
+ "schema_version": SIDECAR_VERSION,
47
+ "plan_id": plan_id_for_path(plan_path),
48
+ "goal": goal,
49
+ "created_at": now,
50
+ "updated_at": now,
51
+ "acceptance_contract": {
52
+ "status": "draft",
53
+ "criteria": empty_acceptance_criteria(),
54
+ "fingerprint": None,
55
+ },
56
+ "quality_result": {
57
+ "status": "pending",
58
+ "minimum": 8.0,
59
+ "average": None,
60
+ "scored_at": None,
61
+ "criteria_fingerprint": None,
62
+ "dimensions": {},
63
+ },
64
+ "defects": [],
65
+ "knowledge_items": [],
66
+ "implementation_dirty_after_score": False,
67
+ "dirty_reasons": [],
68
+ "markdown_path": str(plan_path),
69
+ }
70
+
71
+
72
+ def load_plan_state(plan_path):
73
+ sidecar = sidecar_path_for_plan(plan_path)
74
+ if not sidecar.exists():
75
+ return None
76
+ return json.loads(sidecar.read_text())
77
+
78
+
79
+ def save_plan_state(plan_path, state):
80
+ state["updated_at"] = utc_now_iso()
81
+ state["markdown_path"] = str(plan_path)
82
+ sidecar = sidecar_path_for_plan(plan_path)
83
+ ensure_parent(sidecar)
84
+ sidecar.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n")
85
+
86
+
87
+ def require_plan_state(plan_path):
88
+ state = load_plan_state(plan_path)
89
+ if state is None:
90
+ raise RuntimeError(
91
+ f"Plan is missing structured metadata sidecar: {sidecar_path_for_plan(plan_path)}. "
92
+ "Run migration or recreate the plan with `plan-start`."
93
+ )
94
+ return state
95
+
96
+
97
+ def mark_state_dirty(plan_path, reason):
98
+ state = load_plan_state(plan_path)
99
+ if state is None:
100
+ return
101
+ if state.get("quality_result", {}).get("status") in {"pass", "fail"}:
102
+ state["implementation_dirty_after_score"] = True
103
+ reasons = state.setdefault("dirty_reasons", [])
104
+ if reason not in reasons:
105
+ reasons.append(reason)
106
+ quality = state.setdefault("quality_result", {})
107
+ quality["status"] = "pending"
108
+ save_plan_state(plan_path, state)
109
+
110
+
111
+ def markdown_escape_cell(value):
112
+ return (value or "").replace("\n", " ").replace("|", "\\|").strip()
113
+
114
+
115
+ def render_acceptance_contract(state):
116
+ contract = state.get("acceptance_contract", {})
117
+ criteria = contract.get("criteria", {})
118
+ fingerprint = contract.get("fingerprint") or "pending"
119
+ lines = [
120
+ f"Status: {contract.get('status', 'draft')}",
121
+ f"Fingerprint: {fingerprint}",
122
+ "",
123
+ ]
124
+ if contract.get("status") != "ready":
125
+ lines.append(
126
+ "Run `acceptance-set` before implementation to define specific product, UX, architecture, reliability, and security acceptance criteria."
127
+ )
128
+ lines.append("")
129
+ lines.extend(["| Dimension | Criteria |", "| --- | --- |"])
130
+ for key, label in QUALITY_DIMENSIONS:
131
+ criterion = criteria.get(key) or "pending"
132
+ lines.append(f"| {label} | {markdown_escape_cell(criterion)} |")
133
+ return "\n".join(lines)
134
+
135
+
136
+ def render_quality_result(state):
137
+ quality = state.get("quality_result", {})
138
+ status = quality.get("status", "pending")
139
+ average = quality.get("average")
140
+ average_text = f"{average:.1f}" if isinstance(average, (int, float)) else "pending"
141
+ lines = [
142
+ f"Status: {status}",
143
+ f"Minimum score: {float(quality.get('minimum', 8.0)):.1f}",
144
+ f"Average score: {average_text}",
145
+ f"Last scored: {quality.get('scored_at') or 'pending'}",
146
+ f"Criteria fingerprint: {quality.get('criteria_fingerprint') or 'pending'}",
147
+ "",
148
+ ]
149
+ dimensions = quality.get("dimensions") or {}
150
+ if dimensions:
151
+ lines.extend(["| Dimension | Score | Evidence |", "| --- | ---: | --- |"])
152
+ for key, label in QUALITY_DIMENSIONS:
153
+ item = dimensions.get(key, {})
154
+ score = item.get("score")
155
+ score_text = f"{score:.1f}" if isinstance(score, (int, float)) else "pending"
156
+ evidence = item.get("evidence") or "pending"
157
+ lines.append(f"| {label} | {score_text} | {markdown_escape_cell(evidence)} |")
158
+ else:
159
+ lines.append("Run `quality-score` after implementation and validation. Scores must cite evidence for the ready acceptance contract.")
160
+ if state.get("implementation_dirty_after_score"):
161
+ lines.extend(["", "Result invalidated by later plan state changes. Re-run `quality-score`."])
162
+ return "\n".join(lines)
163
+
164
+
165
+ def sync_plan_markdown_from_state(plan_path, state):
166
+ text = plan_path.read_text()
167
+ text = replace_section(text, "Acceptance Contract", render_acceptance_contract(state))
168
+ text = replace_section(text, "Quality Result", render_quality_result(state))
169
+ plan_path.write_text(text)
170
+
171
+
172
+ def sync_state_from_markdown(plan_path, state):
173
+ from .knowledge import extract_defect_items, extract_knowledge_items, parse_defect_item, parse_knowledge_item
174
+ text = plan_path.read_text()
175
+ defects = []
176
+ for item in extract_defect_items(text):
177
+ parsed = parse_defect_item(item)
178
+ if parsed:
179
+ defects.append(parsed)
180
+ knowledge_items = []
181
+ for item in extract_knowledge_items(text):
182
+ if item == DEFAULT_KNOWLEDGE_PLACEHOLDER:
183
+ continue
184
+ parsed = parse_knowledge_item(item)
185
+ if parsed:
186
+ knowledge_items.append(parsed)
187
+ if state.get("defects") != defects or state.get("knowledge_items") != knowledge_items:
188
+ state["defects"] = defects
189
+ state["knowledge_items"] = knowledge_items
190
+ save_plan_state(plan_path, state)
191
+ return state
192
+
193
+
194
+ def specific_acceptance_issues(criteria):
195
+ issues = []
196
+ for key, label in QUALITY_DIMENSIONS:
197
+ value = (criteria.get(key) or "").strip()
198
+ words = re.findall(r"[A-Za-z0-9]+", value)
199
+ lower = value.lower()
200
+ if len(words) < 6:
201
+ issues.append({"dimension": label, "argument": "--" + ACCEPTANCE_ARGS[key], "message": "Acceptance criterion is too short or empty."})
202
+ continue
203
+ if any(phrase in lower for phrase in GENERIC_ACCEPTANCE_PHRASES):
204
+ issues.append({"dimension": label, "argument": "--" + ACCEPTANCE_ARGS[key], "message": "Acceptance criterion is a generic template phrase."})
205
+ continue
206
+ if lower in {"pending", "todo", "tbd", "n/a", "none"}:
207
+ issues.append({"dimension": label, "argument": "--" + ACCEPTANCE_ARGS[key], "message": "Acceptance criterion is not specific."})
208
+ return issues
209
+
210
+
211
+ def ensure_acceptance_ready(plan_path):
212
+ state = require_plan_state(plan_path)
213
+ contract = state.get("acceptance_contract", {})
214
+ criteria = contract.get("criteria") or {}
215
+ issues = specific_acceptance_issues(criteria)
216
+ fingerprint = criteria_fingerprint(criteria)
217
+ if contract.get("status") != "ready" or issues or contract.get("fingerprint") != fingerprint:
218
+ raise RuntimeError(
219
+ "Cannot score before the Acceptance Contract is ready and specific. "
220
+ "Run `acceptance-set` with concrete criteria for all dimensions."
221
+ )
222
+ return state
223
+
224
+
225
+ def replace_completion_notes(text, summary):
226
+ lines = text.splitlines()
227
+ section_index = find_section(lines, "## Completion Notes")
228
+ if section_index is None:
229
+ return text.rstrip() + "\n\n## Completion Notes\n\n" + summary + "\n"
230
+ end_index = len(lines)
231
+ for index in range(section_index + 1, len(lines)):
232
+ if lines[index].startswith("## "):
233
+ end_index = index
234
+ break
235
+ new_lines = lines[: section_index + 1] + ["", summary] + lines[end_index:]
236
+ return "\n".join(new_lines).rstrip() + "\n"
237
+
238
+
239
+ def replace_section(text, heading, body):
240
+ lines = text.splitlines()
241
+ section_index = find_section(lines, f"## {heading}")
242
+ if section_index is None:
243
+ return text.rstrip() + f"\n\n## {heading}\n\n{body.rstrip()}\n"
244
+ end_index = len(lines)
245
+ for index in range(section_index + 1, len(lines)):
246
+ if lines[index].startswith("## "):
247
+ end_index = index
248
+ break
249
+ new_lines = lines[: section_index + 1] + ["", body.rstrip()] + lines[end_index:]
250
+ return "\n".join(new_lines).rstrip() + "\n"
251
+
252
+
253
+ def quality_result_for_plan(text):
254
+ lines = text.splitlines()
255
+ section_index = find_section(lines, "## Quality Result")
256
+ if section_index is None:
257
+ return {"status": "missing", "minimum": None, "average": None, "scores": {}, "criteria_fingerprint": None}
258
+ section_lines = []
259
+ for line in lines[section_index + 1 :]:
260
+ if line.startswith("## "):
261
+ break
262
+ section_lines.append(line)
263
+ section_text = "\n".join(section_lines)
264
+ status_match = re.search(r"^Status:\s*(?P<status>\w+)", section_text, flags=re.MULTILINE)
265
+ minimum_match = re.search(r"^Minimum score:\s*(?P<score>[0-9]+(?:\.[0-9]+)?)", section_text, flags=re.MULTILINE)
266
+ average_match = re.search(r"^Average score:\s*(?P<score>[0-9]+(?:\.[0-9]+)?)", section_text, flags=re.MULTILINE)
267
+ fingerprint_match = re.search(r"^Criteria fingerprint:\s*(?P<fingerprint>[A-Fa-f0-9]+|pending)", section_text, flags=re.MULTILINE)
268
+ scores = {}
269
+ for _, label in QUALITY_DIMENSIONS:
270
+ row_match = re.search(
271
+ rf"^\|\s*{re.escape(label)}\s*\|\s*(?P<score>[0-9]+(?:\.[0-9]+)?)\s*\|",
272
+ section_text,
273
+ flags=re.MULTILINE,
274
+ )
275
+ if row_match:
276
+ scores[label] = float(row_match.group("score"))
277
+ return {
278
+ "status": status_match.group("status").lower() if status_match else "missing",
279
+ "minimum": float(minimum_match.group("score")) if minimum_match else None,
280
+ "average": float(average_match.group("score")) if average_match else None,
281
+ "scores": scores,
282
+ "criteria_fingerprint": (
283
+ fingerprint_match.group("fingerprint")
284
+ if fingerprint_match and fingerprint_match.group("fingerprint") != "pending"
285
+ else None
286
+ ),
287
+ }
288
+
289
+
290
+ def quality_gate_for_plan(text):
291
+ return quality_result_for_plan(text)
292
+
293
+
294
+ def section_key_values(text, heading):
295
+ lines = text.splitlines()
296
+ section_index = find_section(lines, f"## {heading}")
297
+ if section_index is None:
298
+ return None
299
+ values = {}
300
+ for line in lines[section_index + 1 :]:
301
+ if line.startswith("## "):
302
+ break
303
+ if ":" not in line:
304
+ continue
305
+ key, value = line.split(":", 1)
306
+ normalized_key = key.strip().lower().replace(" ", "_")
307
+ values[normalized_key] = value.strip()
308
+ return values
309
+
310
+
311
+ def phase_number_from_text(value):
312
+ match = re.search(r"\bphase[-_\s]*(?P<number>\d+)\b", value, flags=re.IGNORECASE)
313
+ if not match:
314
+ return None
315
+ return match.group("number")
316
+
317
+
318
+ def plan_title(text):
319
+ for line in text.splitlines():
320
+ if line.startswith("# Execution Plan:"):
321
+ return line.split(":", 1)[1].strip()
322
+ return ""
323
+
324
+
325
+ def open_defects_for_plan(text):
326
+ from .knowledge import extract_defect_items, parse_defect_item
327
+ open_items = []
328
+ for item in extract_defect_items(text):
329
+ parsed = parse_defect_item(item)
330
+ if parsed and parsed["status"] == "open":
331
+ open_items.append(parsed)
332
+ return open_items
333
+
334
+
335
+ def render_quality_gate(scores, notes, minimum, open_defects=None):
336
+ open_defects = open_defects or []
337
+ average = sum(scores.values()) / len(scores)
338
+ low_dimensions = [
339
+ label for key, label in QUALITY_DIMENSIONS if scores[key] < minimum
340
+ ]
341
+ passed = average >= minimum and not low_dimensions and not open_defects
342
+ status = "pass" if passed else "fail"
343
+ lines = [
344
+ f"Status: {status}",
345
+ f"Minimum score: {minimum:.1f}",
346
+ f"Average score: {average:.1f}",
347
+ f"Last scored: {datetime.now(UTC).strftime('%Y-%m-%dT%H:%M:%SZ')}",
348
+ "",
349
+ "| Dimension | Score | Notes |",
350
+ "| --- | ---: | --- |",
351
+ ]
352
+ for key, label in QUALITY_DIMENSIONS:
353
+ note = notes.get(key) or "No note provided."
354
+ safe_note = note.replace("\n", " ").replace("|", "\\|").strip()
355
+ lines.append(f"| {label} | {scores[key]:.1f} | {safe_note} |")
356
+ return "\n".join(lines), passed, average, low_dimensions
357
+
358
+
359
+ def render_rework_section(passed, average, minimum, low_dimensions, notes, open_defects=None):
360
+ open_defects = open_defects or []
361
+ if passed:
362
+ return "None. Quality Result passed."
363
+ lines = [
364
+ f"- Rework implementation until every quality dimension is at least {minimum:.1f}; current average is {average:.1f}.",
365
+ ]
366
+ for defect in open_defects:
367
+ evidence = f" Evidence: {defect['evidence']}." if defect.get("evidence") else ""
368
+ lines.append(
369
+ f"- Resolve {defect['id']} ({defect['severity']}): {defect['summary']}.{evidence}"
370
+ )
371
+ for key, label in QUALITY_DIMENSIONS:
372
+ if label in low_dimensions:
373
+ note = notes.get(key) or "No note provided."
374
+ lines.append(f"- Improve {label}: {note}")
375
+ return "\n".join(lines)
376
+
377
+
378
+ def update_quality_gate(plan_path, scores, notes, minimum):
379
+ state = ensure_acceptance_ready(plan_path)
380
+ state = sync_state_from_markdown(plan_path, state)
381
+ open_defects = [defect for defect in state.get("defects", []) if defect.get("status") == "open"]
382
+ _, passed, average, low_dimensions = render_quality_gate(scores, notes, minimum, open_defects)
383
+ fingerprint = state["acceptance_contract"]["fingerprint"]
384
+ state["quality_result"] = {
385
+ "status": "pass" if passed else "fail",
386
+ "minimum": minimum,
387
+ "average": round(average, 1),
388
+ "scored_at": utc_now_iso(),
389
+ "criteria_fingerprint": fingerprint,
390
+ "dimensions": {
391
+ key: {"score": scores[key], "evidence": notes.get(key) or ""}
392
+ for key, _ in QUALITY_DIMENSIONS
393
+ },
394
+ }
395
+ state["implementation_dirty_after_score"] = False
396
+ state["dirty_reasons"] = []
397
+ save_plan_state(plan_path, state)
398
+ text = plan_path.read_text()
399
+ updated = replace_section(text, "Quality Result", render_quality_result(state))
400
+ updated = replace_section(
401
+ updated,
402
+ "Rework Required",
403
+ render_rework_section(passed, average, minimum, low_dimensions, notes, open_defects),
404
+ )
405
+ plan_path.write_text(updated)
406
+ return {
407
+ "status": "pass" if passed else "fail",
408
+ "minimum": minimum,
409
+ "average": round(average, 1),
410
+ "low_dimensions": low_dimensions,
411
+ "open_defects": [defect["id"] for defect in open_defects],
412
+ "criteria_fingerprint": fingerprint,
413
+ }
414
+
415
+
416
+ def missing_quality_notes(notes):
417
+ missing = []
418
+ for key, label in QUALITY_DIMENSIONS:
419
+ if not (notes.get(key) or "").strip():
420
+ missing.append(
421
+ {
422
+ "dimension": label,
423
+ "argument": "--" + QUALITY_NOTE_ARGS[key],
424
+ "message": f"Provide evidence for {label}.",
425
+ }
426
+ )
427
+ return missing
428
+
429
+
430
+ def weak_quality_notes(notes):
431
+ weak = []
432
+ for key, label in QUALITY_DIMENSIONS:
433
+ note = (notes.get(key) or "").strip()
434
+ lower = note.lower()
435
+ if not note:
436
+ continue
437
+ if len(re.findall(r"[A-Za-z0-9./:-]+", note)) < 4 or not any(hint in lower for hint in EVIDENCE_HINTS):
438
+ weak.append(
439
+ {
440
+ "dimension": label,
441
+ "argument": "--" + QUALITY_NOTE_ARGS[key],
442
+ "message": f"Provide concrete verification evidence for {label}, such as a command, browser check, log, code path, or review finding.",
443
+ }
444
+ )
445
+ return weak
446
+
447
+
448
+ def assert_quality_gate_passed(plan_path, plan_text):
449
+ state = require_plan_state(plan_path)
450
+ state = sync_state_from_markdown(plan_path, state)
451
+ contract = state.get("acceptance_contract", {})
452
+ if contract.get("status") != "ready":
453
+ raise PlanCloseError("acceptance-contract-not-ready", "Cannot close plan until the Acceptance Contract is ready.")
454
+ current_fingerprint = criteria_fingerprint(contract.get("criteria") or {})
455
+ if contract.get("fingerprint") != current_fingerprint:
456
+ raise PlanCloseError(
457
+ "acceptance-fingerprint-stale",
458
+ "Cannot close plan because the Acceptance Contract fingerprint is stale. Re-run `acceptance-set`.",
459
+ {"current_fingerprint": current_fingerprint, "recorded_fingerprint": contract.get("fingerprint")},
460
+ )
461
+ open_defects = [defect for defect in state.get("defects", []) if defect.get("status") == "open"]
462
+ if open_defects:
463
+ defects = "\n".join(
464
+ f"- {defect['id']} ({defect['severity']}): {defect['summary']}" for defect in open_defects
465
+ )
466
+ raise PlanCloseError(
467
+ "open-defects",
468
+ "Cannot close plan with unresolved defects. Run `defect-resolve`, re-run validation, and score again.",
469
+ {"open_defects": open_defects, "defects_text": defects},
470
+ )
471
+ quality = state.get("quality_result", {})
472
+ if state.get("implementation_dirty_after_score"):
473
+ raise PlanCloseError(
474
+ "quality-result-stale",
475
+ "Cannot close plan because plan state changed after the last quality score. Re-run `quality-score`.",
476
+ {"dirty_reasons": state.get("dirty_reasons", [])},
477
+ )
478
+ if quality.get("status") != "pass":
479
+ raise PlanCloseError(
480
+ "quality-result-not-passing",
481
+ "Cannot close plan until the quality result passes. "
482
+ "Run `quality-score`, fix any `## Rework Required` items, then score again.",
483
+ {"quality_status": quality.get("status")},
484
+ )
485
+ if quality.get("criteria_fingerprint") != current_fingerprint:
486
+ raise PlanCloseError(
487
+ "quality-fingerprint-stale",
488
+ "Cannot close plan because the quality result was scored against a stale Acceptance Contract fingerprint.",
489
+ {"quality_fingerprint": quality.get("criteria_fingerprint"), "current_fingerprint": current_fingerprint},
490
+ )
491
+ return quality
492
+
493
+
494
+ def plan_placeholder_issues(plan_text):
495
+ issues = []
496
+ for placeholder in PLAN_PLACEHOLDERS:
497
+ if placeholder in plan_text:
498
+ issues.append(placeholder)
499
+ return issues
500
+
501
+
502
+ def assert_plan_placeholders_resolved(plan_text):
503
+ placeholders = plan_placeholder_issues(plan_text)
504
+ if placeholders:
505
+ raise PlanCloseError(
506
+ "plan-placeholders-unresolved",
507
+ "Cannot close plan with unresolved starter placeholders:\n"
508
+ + "\n".join(f"- {placeholder}" for placeholder in placeholders)
509
+ + "\nReplace generic Scope, Constraints, Steps, and Validation text with task-specific content before closing.",
510
+ {"placeholders": placeholders},
511
+ )
512
+
513
+
514
+ def active_plan_dir(repo):
515
+ return repo / "docs" / "exec-plans" / "active"
516
+
517
+
518
+ def completed_plan_dir(repo):
519
+ return repo / "docs" / "exec-plans" / "completed"
520
+
521
+
522
+ def plan_path_from_arg(repo, plan_arg):
523
+ raw_plan = Path(plan_arg)
524
+ if raw_plan.is_absolute():
525
+ plan_path = raw_plan.resolve()
526
+ else:
527
+ plan_path = (repo / raw_plan).resolve()
528
+
529
+ try:
530
+ relative_plan = str(plan_path.relative_to(repo.resolve()))
531
+ except ValueError as error:
532
+ raise ValueError(f"Plan must be inside repo: {plan_arg}") from error
533
+
534
+ if not plan_path.exists():
535
+ raise FileNotFoundError(f"Plan not found: {plan_path}")
536
+
537
+ return plan_path, relative_plan
538
+
539
+
540
+ def create_plan(repo, slug, goal):
541
+ plan_dir = active_plan_dir(repo)
542
+ plan_dir.mkdir(parents=True, exist_ok=True)
543
+ filename = f"{datetime.now(UTC).strftime('%Y-%m-%d')}-{slugify(slug)}.md"
544
+ plan_path = plan_dir / filename
545
+ if plan_path.exists():
546
+ raise FileExistsError(f"Plan already exists: {plan_path}")
547
+ title = slug.replace("-", " ").strip() or "task"
548
+ content = PLAN_TEMPLATE.format(
549
+ title=title.title(),
550
+ goal=goal,
551
+ defect_section=DEFAULT_DEFECT_PLACEHOLDER,
552
+ knowledge_section="- [ ] Add durable facts here as they emerge -> <destination-doc>",
553
+ )
554
+ plan_path.write_text(content)
555
+ state = new_plan_state(plan_path, goal)
556
+ save_plan_state(plan_path, state)
557
+ sync_plan_markdown_from_state(plan_path, state)
558
+ return plan_path
559
+
560
+
561
+ def set_acceptance_contract(plan_path, criteria):
562
+ from .knowledge import clean_fact_text
563
+ state = require_plan_state(plan_path)
564
+ normalized = {key: clean_fact_text(criteria.get(key) or "") for key, _ in QUALITY_DIMENSIONS}
565
+ issues = specific_acceptance_issues(normalized)
566
+ if issues:
567
+ return {
568
+ "status": "fail",
569
+ "reason": "acceptance-criteria-not-specific",
570
+ "message": "acceptance-set requires concrete, task-specific criteria for every dimension.",
571
+ "issues": issues,
572
+ }
573
+ fingerprint = criteria_fingerprint(normalized)
574
+ state["acceptance_contract"] = {
575
+ "status": "ready",
576
+ "criteria": normalized,
577
+ "fingerprint": fingerprint,
578
+ }
579
+ if state.get("quality_result", {}).get("status") in {"pass", "fail"}:
580
+ state["implementation_dirty_after_score"] = True
581
+ reasons = state.setdefault("dirty_reasons", [])
582
+ if "acceptance-contract-changed" not in reasons:
583
+ reasons.append("acceptance-contract-changed")
584
+ state["quality_result"]["status"] = "pending"
585
+ save_plan_state(plan_path, state)
586
+ sync_plan_markdown_from_state(plan_path, state)
587
+ return {"status": "ready", "criteria_fingerprint": fingerprint}
588
+
589
+
590
+ def close_plan(repo, plan_relative_path, summary, force):
591
+ from .continuation import assert_phase_continuity_closed, update_workstreams_after_plan_close
592
+ from .knowledge import extract_knowledge_items, mark_knowledge_items_closed
593
+ plan_path, active_relative_path = plan_path_from_arg(repo, plan_relative_path)
594
+ text = plan_path.read_text()
595
+ if not force:
596
+ assert_plan_placeholders_resolved(text)
597
+ assert_quality_gate_passed(plan_path, text)
598
+ assert_phase_continuity_closed(repo, plan_path, text)
599
+ open_items = [
600
+ item
601
+ for item in extract_knowledge_items(text)
602
+ if item.startswith("- [ ]") and item != DEFAULT_KNOWLEDGE_PLACEHOLDER
603
+ ]
604
+ if open_items and not force:
605
+ raise PlanCloseError(
606
+ "open-durable-knowledge",
607
+ "Cannot close plan with unresolved durable knowledge items.",
608
+ {"open_items": open_items},
609
+ )
610
+ updated_text = replace_completion_notes(mark_knowledge_items_closed(text), summary)
611
+ state = load_plan_state(plan_path)
612
+ if state is not None:
613
+ state = sync_state_from_markdown(plan_path, state)
614
+ completed_dir = completed_plan_dir(repo)
615
+ completed_dir.mkdir(parents=True, exist_ok=True)
616
+ destination = completed_dir / plan_path.name
617
+ destination.write_text(updated_text)
618
+ sidecar = sidecar_path_for_plan(plan_path)
619
+ destination_sidecar = sidecar_path_for_plan(destination)
620
+ if sidecar.exists():
621
+ if state is not None:
622
+ state["markdown_path"] = str(destination)
623
+ state["updated_at"] = utc_now_iso()
624
+ sidecar.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n")
625
+ shutil.move(str(sidecar), str(destination_sidecar))
626
+ plan_path.unlink()
627
+ completed_relative_path = str(destination.relative_to(repo))
628
+ update_workstreams_after_plan_close(repo, active_relative_path, completed_relative_path)
629
+ return destination, open_items
630
+