@hallucination-studio/harness-engine 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -27
- package/bin/install.js +29 -17
- package/package.json +10 -4
- package/skills/harness-engine/SKILL.md +97 -0
- package/skills/harness-engine/agents/openai.yaml +4 -0
- package/skills/harness-engine/evals/cases.json +94 -0
- package/skills/harness-engine/evals/harness_engine_evals/__init__.py +1 -0
- package/skills/harness-engine/evals/harness_engine_evals/cases_frontend.py +211 -0
- package/skills/harness-engine/evals/harness_engine_evals/cases_lifecycle.py +1616 -0
- package/skills/harness-engine/evals/harness_engine_evals/helpers.py +155 -0
- package/skills/harness-engine/evals/harness_engine_evals/registry.py +55 -0
- package/skills/harness-engine/evals/harness_engine_evals/report.py +36 -0
- package/skills/harness-engine/evals/harness_engine_evals/runner.py +53 -0
- package/skills/harness-engine/evals/run_evals.py +14 -0
- package/skills/{harness-repo-bootstrap → harness-engine}/references/evaluation-loop.md +8 -2
- package/skills/harness-engine/references/evidence-first-evals.md +187 -0
- package/skills/harness-engine/references/exec-plans.md +59 -0
- package/skills/{harness-repo-bootstrap → harness-engine}/references/file-map.md +3 -3
- package/skills/{harness-repo-bootstrap → harness-engine}/references/knowledge-capture.md +2 -2
- package/skills/{harness-repo-bootstrap → harness-engine}/references/sop-index.md +3 -0
- package/skills/harness-engine/references/template-policy.md +17 -0
- package/skills/harness-engine/references/workflow.md +62 -0
- package/skills/harness-engine/scripts/harness_engine/__init__.py +1 -0
- package/skills/harness-engine/scripts/harness_engine/analysis.py +240 -0
- package/skills/harness-engine/scripts/harness_engine/checks.py +287 -0
- package/skills/harness-engine/scripts/harness_engine/cli.py +656 -0
- package/skills/harness-engine/scripts/harness_engine/common.py +977 -0
- package/skills/harness-engine/scripts/harness_engine/continuation.py +520 -0
- package/skills/harness-engine/scripts/harness_engine/git_ops.py +88 -0
- package/skills/harness-engine/scripts/harness_engine/knowledge.py +329 -0
- package/skills/harness-engine/scripts/harness_engine/plans.py +630 -0
- package/skills/harness-engine/scripts/harness_engine/templates.py +124 -0
- package/skills/harness-engine/scripts/manage_harness.py +14 -0
- package/skills/harness-repo-bootstrap/SKILL.md +0 -68
- package/skills/harness-repo-bootstrap/agents/openai.yaml +0 -4
- package/skills/harness-repo-bootstrap/evals/cases.json +0 -18
- package/skills/harness-repo-bootstrap/evals/run_evals.py +0 -337
- package/skills/harness-repo-bootstrap/references/exec-plans.md +0 -39
- package/skills/harness-repo-bootstrap/references/template-policy.md +0 -12
- package/skills/harness-repo-bootstrap/references/workflow.md +0 -47
- package/skills/harness-repo-bootstrap/scripts/manage_harness.py +0 -1181
- /package/skills/{harness-repo-bootstrap → harness-engine}/assets/repo-template/.keep +0 -0
- /package/skills/{harness-repo-bootstrap → harness-engine}/assets/sops/.keep +0 -0
- /package/skills/{harness-repo-bootstrap → harness-engine}/references/question-catalog.md +0 -0
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
from .common import *
|
|
2
|
+
from .templates import DEFAULT_DEFECT_PLACEHOLDER, DEFAULT_KNOWLEDGE_PLACEHOLDER, PLAN_TEMPLATE, ensure_parent
|
|
3
|
+
|
|
4
|
+
def slugify(value):
|
|
5
|
+
normalized = re.sub(r"[^a-z0-9]+", "-", value.strip().lower()).strip("-")
|
|
6
|
+
return normalized or "task"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def utc_now_iso():
|
|
10
|
+
return datetime.now(UTC).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def find_section(lines, heading):
|
|
14
|
+
target = heading.strip().lower()
|
|
15
|
+
for index, line in enumerate(lines):
|
|
16
|
+
if line.strip().lower() == target:
|
|
17
|
+
return index
|
|
18
|
+
return None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def sidecar_path_for_plan(plan_path):
|
|
22
|
+
return plan_path.with_suffix(".json")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def plan_id_for_path(plan_path):
|
|
26
|
+
digest = hashlib.sha1(str(plan_path.name).encode()).hexdigest()
|
|
27
|
+
return f"plan-{digest[:10]}"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def empty_acceptance_criteria():
|
|
31
|
+
return {key: "" for key, _ in QUALITY_DIMENSIONS}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def criteria_fingerprint(criteria):
|
|
35
|
+
normalized = {
|
|
36
|
+
key: re.sub(r"\s+", " ", (criteria.get(key) or "").strip())
|
|
37
|
+
for key, _ in QUALITY_DIMENSIONS
|
|
38
|
+
}
|
|
39
|
+
payload = json.dumps(normalized, ensure_ascii=False, sort_keys=True, separators=(",", ":"))
|
|
40
|
+
return hashlib.sha256(payload.encode()).hexdigest()[:16]
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def new_plan_state(plan_path, goal):
|
|
44
|
+
now = utc_now_iso()
|
|
45
|
+
return {
|
|
46
|
+
"schema_version": SIDECAR_VERSION,
|
|
47
|
+
"plan_id": plan_id_for_path(plan_path),
|
|
48
|
+
"goal": goal,
|
|
49
|
+
"created_at": now,
|
|
50
|
+
"updated_at": now,
|
|
51
|
+
"acceptance_contract": {
|
|
52
|
+
"status": "draft",
|
|
53
|
+
"criteria": empty_acceptance_criteria(),
|
|
54
|
+
"fingerprint": None,
|
|
55
|
+
},
|
|
56
|
+
"quality_result": {
|
|
57
|
+
"status": "pending",
|
|
58
|
+
"minimum": 8.0,
|
|
59
|
+
"average": None,
|
|
60
|
+
"scored_at": None,
|
|
61
|
+
"criteria_fingerprint": None,
|
|
62
|
+
"dimensions": {},
|
|
63
|
+
},
|
|
64
|
+
"defects": [],
|
|
65
|
+
"knowledge_items": [],
|
|
66
|
+
"implementation_dirty_after_score": False,
|
|
67
|
+
"dirty_reasons": [],
|
|
68
|
+
"markdown_path": str(plan_path),
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def load_plan_state(plan_path):
|
|
73
|
+
sidecar = sidecar_path_for_plan(plan_path)
|
|
74
|
+
if not sidecar.exists():
|
|
75
|
+
return None
|
|
76
|
+
return json.loads(sidecar.read_text())
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def save_plan_state(plan_path, state):
|
|
80
|
+
state["updated_at"] = utc_now_iso()
|
|
81
|
+
state["markdown_path"] = str(plan_path)
|
|
82
|
+
sidecar = sidecar_path_for_plan(plan_path)
|
|
83
|
+
ensure_parent(sidecar)
|
|
84
|
+
sidecar.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def require_plan_state(plan_path):
|
|
88
|
+
state = load_plan_state(plan_path)
|
|
89
|
+
if state is None:
|
|
90
|
+
raise RuntimeError(
|
|
91
|
+
f"Plan is missing structured metadata sidecar: {sidecar_path_for_plan(plan_path)}. "
|
|
92
|
+
"Run migration or recreate the plan with `plan-start`."
|
|
93
|
+
)
|
|
94
|
+
return state
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def mark_state_dirty(plan_path, reason):
|
|
98
|
+
state = load_plan_state(plan_path)
|
|
99
|
+
if state is None:
|
|
100
|
+
return
|
|
101
|
+
if state.get("quality_result", {}).get("status") in {"pass", "fail"}:
|
|
102
|
+
state["implementation_dirty_after_score"] = True
|
|
103
|
+
reasons = state.setdefault("dirty_reasons", [])
|
|
104
|
+
if reason not in reasons:
|
|
105
|
+
reasons.append(reason)
|
|
106
|
+
quality = state.setdefault("quality_result", {})
|
|
107
|
+
quality["status"] = "pending"
|
|
108
|
+
save_plan_state(plan_path, state)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def markdown_escape_cell(value):
|
|
112
|
+
return (value or "").replace("\n", " ").replace("|", "\\|").strip()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def render_acceptance_contract(state):
|
|
116
|
+
contract = state.get("acceptance_contract", {})
|
|
117
|
+
criteria = contract.get("criteria", {})
|
|
118
|
+
fingerprint = contract.get("fingerprint") or "pending"
|
|
119
|
+
lines = [
|
|
120
|
+
f"Status: {contract.get('status', 'draft')}",
|
|
121
|
+
f"Fingerprint: {fingerprint}",
|
|
122
|
+
"",
|
|
123
|
+
]
|
|
124
|
+
if contract.get("status") != "ready":
|
|
125
|
+
lines.append(
|
|
126
|
+
"Run `acceptance-set` before implementation to define specific product, UX, architecture, reliability, and security acceptance criteria."
|
|
127
|
+
)
|
|
128
|
+
lines.append("")
|
|
129
|
+
lines.extend(["| Dimension | Criteria |", "| --- | --- |"])
|
|
130
|
+
for key, label in QUALITY_DIMENSIONS:
|
|
131
|
+
criterion = criteria.get(key) or "pending"
|
|
132
|
+
lines.append(f"| {label} | {markdown_escape_cell(criterion)} |")
|
|
133
|
+
return "\n".join(lines)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def render_quality_result(state):
|
|
137
|
+
quality = state.get("quality_result", {})
|
|
138
|
+
status = quality.get("status", "pending")
|
|
139
|
+
average = quality.get("average")
|
|
140
|
+
average_text = f"{average:.1f}" if isinstance(average, (int, float)) else "pending"
|
|
141
|
+
lines = [
|
|
142
|
+
f"Status: {status}",
|
|
143
|
+
f"Minimum score: {float(quality.get('minimum', 8.0)):.1f}",
|
|
144
|
+
f"Average score: {average_text}",
|
|
145
|
+
f"Last scored: {quality.get('scored_at') or 'pending'}",
|
|
146
|
+
f"Criteria fingerprint: {quality.get('criteria_fingerprint') or 'pending'}",
|
|
147
|
+
"",
|
|
148
|
+
]
|
|
149
|
+
dimensions = quality.get("dimensions") or {}
|
|
150
|
+
if dimensions:
|
|
151
|
+
lines.extend(["| Dimension | Score | Evidence |", "| --- | ---: | --- |"])
|
|
152
|
+
for key, label in QUALITY_DIMENSIONS:
|
|
153
|
+
item = dimensions.get(key, {})
|
|
154
|
+
score = item.get("score")
|
|
155
|
+
score_text = f"{score:.1f}" if isinstance(score, (int, float)) else "pending"
|
|
156
|
+
evidence = item.get("evidence") or "pending"
|
|
157
|
+
lines.append(f"| {label} | {score_text} | {markdown_escape_cell(evidence)} |")
|
|
158
|
+
else:
|
|
159
|
+
lines.append("Run `quality-score` after implementation and validation. Scores must cite evidence for the ready acceptance contract.")
|
|
160
|
+
if state.get("implementation_dirty_after_score"):
|
|
161
|
+
lines.extend(["", "Result invalidated by later plan state changes. Re-run `quality-score`."])
|
|
162
|
+
return "\n".join(lines)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def sync_plan_markdown_from_state(plan_path, state):
|
|
166
|
+
text = plan_path.read_text()
|
|
167
|
+
text = replace_section(text, "Acceptance Contract", render_acceptance_contract(state))
|
|
168
|
+
text = replace_section(text, "Quality Result", render_quality_result(state))
|
|
169
|
+
plan_path.write_text(text)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def sync_state_from_markdown(plan_path, state):
|
|
173
|
+
from .knowledge import extract_defect_items, extract_knowledge_items, parse_defect_item, parse_knowledge_item
|
|
174
|
+
text = plan_path.read_text()
|
|
175
|
+
defects = []
|
|
176
|
+
for item in extract_defect_items(text):
|
|
177
|
+
parsed = parse_defect_item(item)
|
|
178
|
+
if parsed:
|
|
179
|
+
defects.append(parsed)
|
|
180
|
+
knowledge_items = []
|
|
181
|
+
for item in extract_knowledge_items(text):
|
|
182
|
+
if item == DEFAULT_KNOWLEDGE_PLACEHOLDER:
|
|
183
|
+
continue
|
|
184
|
+
parsed = parse_knowledge_item(item)
|
|
185
|
+
if parsed:
|
|
186
|
+
knowledge_items.append(parsed)
|
|
187
|
+
if state.get("defects") != defects or state.get("knowledge_items") != knowledge_items:
|
|
188
|
+
state["defects"] = defects
|
|
189
|
+
state["knowledge_items"] = knowledge_items
|
|
190
|
+
save_plan_state(plan_path, state)
|
|
191
|
+
return state
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def specific_acceptance_issues(criteria):
|
|
195
|
+
issues = []
|
|
196
|
+
for key, label in QUALITY_DIMENSIONS:
|
|
197
|
+
value = (criteria.get(key) or "").strip()
|
|
198
|
+
words = re.findall(r"[A-Za-z0-9]+", value)
|
|
199
|
+
lower = value.lower()
|
|
200
|
+
if len(words) < 6:
|
|
201
|
+
issues.append({"dimension": label, "argument": "--" + ACCEPTANCE_ARGS[key], "message": "Acceptance criterion is too short or empty."})
|
|
202
|
+
continue
|
|
203
|
+
if any(phrase in lower for phrase in GENERIC_ACCEPTANCE_PHRASES):
|
|
204
|
+
issues.append({"dimension": label, "argument": "--" + ACCEPTANCE_ARGS[key], "message": "Acceptance criterion is a generic template phrase."})
|
|
205
|
+
continue
|
|
206
|
+
if lower in {"pending", "todo", "tbd", "n/a", "none"}:
|
|
207
|
+
issues.append({"dimension": label, "argument": "--" + ACCEPTANCE_ARGS[key], "message": "Acceptance criterion is not specific."})
|
|
208
|
+
return issues
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def ensure_acceptance_ready(plan_path):
|
|
212
|
+
state = require_plan_state(plan_path)
|
|
213
|
+
contract = state.get("acceptance_contract", {})
|
|
214
|
+
criteria = contract.get("criteria") or {}
|
|
215
|
+
issues = specific_acceptance_issues(criteria)
|
|
216
|
+
fingerprint = criteria_fingerprint(criteria)
|
|
217
|
+
if contract.get("status") != "ready" or issues or contract.get("fingerprint") != fingerprint:
|
|
218
|
+
raise RuntimeError(
|
|
219
|
+
"Cannot score before the Acceptance Contract is ready and specific. "
|
|
220
|
+
"Run `acceptance-set` with concrete criteria for all dimensions."
|
|
221
|
+
)
|
|
222
|
+
return state
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def replace_completion_notes(text, summary):
|
|
226
|
+
lines = text.splitlines()
|
|
227
|
+
section_index = find_section(lines, "## Completion Notes")
|
|
228
|
+
if section_index is None:
|
|
229
|
+
return text.rstrip() + "\n\n## Completion Notes\n\n" + summary + "\n"
|
|
230
|
+
end_index = len(lines)
|
|
231
|
+
for index in range(section_index + 1, len(lines)):
|
|
232
|
+
if lines[index].startswith("## "):
|
|
233
|
+
end_index = index
|
|
234
|
+
break
|
|
235
|
+
new_lines = lines[: section_index + 1] + ["", summary] + lines[end_index:]
|
|
236
|
+
return "\n".join(new_lines).rstrip() + "\n"
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def replace_section(text, heading, body):
|
|
240
|
+
lines = text.splitlines()
|
|
241
|
+
section_index = find_section(lines, f"## {heading}")
|
|
242
|
+
if section_index is None:
|
|
243
|
+
return text.rstrip() + f"\n\n## {heading}\n\n{body.rstrip()}\n"
|
|
244
|
+
end_index = len(lines)
|
|
245
|
+
for index in range(section_index + 1, len(lines)):
|
|
246
|
+
if lines[index].startswith("## "):
|
|
247
|
+
end_index = index
|
|
248
|
+
break
|
|
249
|
+
new_lines = lines[: section_index + 1] + ["", body.rstrip()] + lines[end_index:]
|
|
250
|
+
return "\n".join(new_lines).rstrip() + "\n"
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def quality_result_for_plan(text):
|
|
254
|
+
lines = text.splitlines()
|
|
255
|
+
section_index = find_section(lines, "## Quality Result")
|
|
256
|
+
if section_index is None:
|
|
257
|
+
return {"status": "missing", "minimum": None, "average": None, "scores": {}, "criteria_fingerprint": None}
|
|
258
|
+
section_lines = []
|
|
259
|
+
for line in lines[section_index + 1 :]:
|
|
260
|
+
if line.startswith("## "):
|
|
261
|
+
break
|
|
262
|
+
section_lines.append(line)
|
|
263
|
+
section_text = "\n".join(section_lines)
|
|
264
|
+
status_match = re.search(r"^Status:\s*(?P<status>\w+)", section_text, flags=re.MULTILINE)
|
|
265
|
+
minimum_match = re.search(r"^Minimum score:\s*(?P<score>[0-9]+(?:\.[0-9]+)?)", section_text, flags=re.MULTILINE)
|
|
266
|
+
average_match = re.search(r"^Average score:\s*(?P<score>[0-9]+(?:\.[0-9]+)?)", section_text, flags=re.MULTILINE)
|
|
267
|
+
fingerprint_match = re.search(r"^Criteria fingerprint:\s*(?P<fingerprint>[A-Fa-f0-9]+|pending)", section_text, flags=re.MULTILINE)
|
|
268
|
+
scores = {}
|
|
269
|
+
for _, label in QUALITY_DIMENSIONS:
|
|
270
|
+
row_match = re.search(
|
|
271
|
+
rf"^\|\s*{re.escape(label)}\s*\|\s*(?P<score>[0-9]+(?:\.[0-9]+)?)\s*\|",
|
|
272
|
+
section_text,
|
|
273
|
+
flags=re.MULTILINE,
|
|
274
|
+
)
|
|
275
|
+
if row_match:
|
|
276
|
+
scores[label] = float(row_match.group("score"))
|
|
277
|
+
return {
|
|
278
|
+
"status": status_match.group("status").lower() if status_match else "missing",
|
|
279
|
+
"minimum": float(minimum_match.group("score")) if minimum_match else None,
|
|
280
|
+
"average": float(average_match.group("score")) if average_match else None,
|
|
281
|
+
"scores": scores,
|
|
282
|
+
"criteria_fingerprint": (
|
|
283
|
+
fingerprint_match.group("fingerprint")
|
|
284
|
+
if fingerprint_match and fingerprint_match.group("fingerprint") != "pending"
|
|
285
|
+
else None
|
|
286
|
+
),
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def quality_gate_for_plan(text):
|
|
291
|
+
return quality_result_for_plan(text)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def section_key_values(text, heading):
|
|
295
|
+
lines = text.splitlines()
|
|
296
|
+
section_index = find_section(lines, f"## {heading}")
|
|
297
|
+
if section_index is None:
|
|
298
|
+
return None
|
|
299
|
+
values = {}
|
|
300
|
+
for line in lines[section_index + 1 :]:
|
|
301
|
+
if line.startswith("## "):
|
|
302
|
+
break
|
|
303
|
+
if ":" not in line:
|
|
304
|
+
continue
|
|
305
|
+
key, value = line.split(":", 1)
|
|
306
|
+
normalized_key = key.strip().lower().replace(" ", "_")
|
|
307
|
+
values[normalized_key] = value.strip()
|
|
308
|
+
return values
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def phase_number_from_text(value):
|
|
312
|
+
match = re.search(r"\bphase[-_\s]*(?P<number>\d+)\b", value, flags=re.IGNORECASE)
|
|
313
|
+
if not match:
|
|
314
|
+
return None
|
|
315
|
+
return match.group("number")
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def plan_title(text):
|
|
319
|
+
for line in text.splitlines():
|
|
320
|
+
if line.startswith("# Execution Plan:"):
|
|
321
|
+
return line.split(":", 1)[1].strip()
|
|
322
|
+
return ""
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def open_defects_for_plan(text):
|
|
326
|
+
from .knowledge import extract_defect_items, parse_defect_item
|
|
327
|
+
open_items = []
|
|
328
|
+
for item in extract_defect_items(text):
|
|
329
|
+
parsed = parse_defect_item(item)
|
|
330
|
+
if parsed and parsed["status"] == "open":
|
|
331
|
+
open_items.append(parsed)
|
|
332
|
+
return open_items
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def render_quality_gate(scores, notes, minimum, open_defects=None):
|
|
336
|
+
open_defects = open_defects or []
|
|
337
|
+
average = sum(scores.values()) / len(scores)
|
|
338
|
+
low_dimensions = [
|
|
339
|
+
label for key, label in QUALITY_DIMENSIONS if scores[key] < minimum
|
|
340
|
+
]
|
|
341
|
+
passed = average >= minimum and not low_dimensions and not open_defects
|
|
342
|
+
status = "pass" if passed else "fail"
|
|
343
|
+
lines = [
|
|
344
|
+
f"Status: {status}",
|
|
345
|
+
f"Minimum score: {minimum:.1f}",
|
|
346
|
+
f"Average score: {average:.1f}",
|
|
347
|
+
f"Last scored: {datetime.now(UTC).strftime('%Y-%m-%dT%H:%M:%SZ')}",
|
|
348
|
+
"",
|
|
349
|
+
"| Dimension | Score | Notes |",
|
|
350
|
+
"| --- | ---: | --- |",
|
|
351
|
+
]
|
|
352
|
+
for key, label in QUALITY_DIMENSIONS:
|
|
353
|
+
note = notes.get(key) or "No note provided."
|
|
354
|
+
safe_note = note.replace("\n", " ").replace("|", "\\|").strip()
|
|
355
|
+
lines.append(f"| {label} | {scores[key]:.1f} | {safe_note} |")
|
|
356
|
+
return "\n".join(lines), passed, average, low_dimensions
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def render_rework_section(passed, average, minimum, low_dimensions, notes, open_defects=None):
|
|
360
|
+
open_defects = open_defects or []
|
|
361
|
+
if passed:
|
|
362
|
+
return "None. Quality Result passed."
|
|
363
|
+
lines = [
|
|
364
|
+
f"- Rework implementation until every quality dimension is at least {minimum:.1f}; current average is {average:.1f}.",
|
|
365
|
+
]
|
|
366
|
+
for defect in open_defects:
|
|
367
|
+
evidence = f" Evidence: {defect['evidence']}." if defect.get("evidence") else ""
|
|
368
|
+
lines.append(
|
|
369
|
+
f"- Resolve {defect['id']} ({defect['severity']}): {defect['summary']}.{evidence}"
|
|
370
|
+
)
|
|
371
|
+
for key, label in QUALITY_DIMENSIONS:
|
|
372
|
+
if label in low_dimensions:
|
|
373
|
+
note = notes.get(key) or "No note provided."
|
|
374
|
+
lines.append(f"- Improve {label}: {note}")
|
|
375
|
+
return "\n".join(lines)
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def update_quality_gate(plan_path, scores, notes, minimum):
|
|
379
|
+
state = ensure_acceptance_ready(plan_path)
|
|
380
|
+
state = sync_state_from_markdown(plan_path, state)
|
|
381
|
+
open_defects = [defect for defect in state.get("defects", []) if defect.get("status") == "open"]
|
|
382
|
+
_, passed, average, low_dimensions = render_quality_gate(scores, notes, minimum, open_defects)
|
|
383
|
+
fingerprint = state["acceptance_contract"]["fingerprint"]
|
|
384
|
+
state["quality_result"] = {
|
|
385
|
+
"status": "pass" if passed else "fail",
|
|
386
|
+
"minimum": minimum,
|
|
387
|
+
"average": round(average, 1),
|
|
388
|
+
"scored_at": utc_now_iso(),
|
|
389
|
+
"criteria_fingerprint": fingerprint,
|
|
390
|
+
"dimensions": {
|
|
391
|
+
key: {"score": scores[key], "evidence": notes.get(key) or ""}
|
|
392
|
+
for key, _ in QUALITY_DIMENSIONS
|
|
393
|
+
},
|
|
394
|
+
}
|
|
395
|
+
state["implementation_dirty_after_score"] = False
|
|
396
|
+
state["dirty_reasons"] = []
|
|
397
|
+
save_plan_state(plan_path, state)
|
|
398
|
+
text = plan_path.read_text()
|
|
399
|
+
updated = replace_section(text, "Quality Result", render_quality_result(state))
|
|
400
|
+
updated = replace_section(
|
|
401
|
+
updated,
|
|
402
|
+
"Rework Required",
|
|
403
|
+
render_rework_section(passed, average, minimum, low_dimensions, notes, open_defects),
|
|
404
|
+
)
|
|
405
|
+
plan_path.write_text(updated)
|
|
406
|
+
return {
|
|
407
|
+
"status": "pass" if passed else "fail",
|
|
408
|
+
"minimum": minimum,
|
|
409
|
+
"average": round(average, 1),
|
|
410
|
+
"low_dimensions": low_dimensions,
|
|
411
|
+
"open_defects": [defect["id"] for defect in open_defects],
|
|
412
|
+
"criteria_fingerprint": fingerprint,
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
def missing_quality_notes(notes):
|
|
417
|
+
missing = []
|
|
418
|
+
for key, label in QUALITY_DIMENSIONS:
|
|
419
|
+
if not (notes.get(key) or "").strip():
|
|
420
|
+
missing.append(
|
|
421
|
+
{
|
|
422
|
+
"dimension": label,
|
|
423
|
+
"argument": "--" + QUALITY_NOTE_ARGS[key],
|
|
424
|
+
"message": f"Provide evidence for {label}.",
|
|
425
|
+
}
|
|
426
|
+
)
|
|
427
|
+
return missing
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
def weak_quality_notes(notes):
|
|
431
|
+
weak = []
|
|
432
|
+
for key, label in QUALITY_DIMENSIONS:
|
|
433
|
+
note = (notes.get(key) or "").strip()
|
|
434
|
+
lower = note.lower()
|
|
435
|
+
if not note:
|
|
436
|
+
continue
|
|
437
|
+
if len(re.findall(r"[A-Za-z0-9./:-]+", note)) < 4 or not any(hint in lower for hint in EVIDENCE_HINTS):
|
|
438
|
+
weak.append(
|
|
439
|
+
{
|
|
440
|
+
"dimension": label,
|
|
441
|
+
"argument": "--" + QUALITY_NOTE_ARGS[key],
|
|
442
|
+
"message": f"Provide concrete verification evidence for {label}, such as a command, browser check, log, code path, or review finding.",
|
|
443
|
+
}
|
|
444
|
+
)
|
|
445
|
+
return weak
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
def assert_quality_gate_passed(plan_path, plan_text):
|
|
449
|
+
state = require_plan_state(plan_path)
|
|
450
|
+
state = sync_state_from_markdown(plan_path, state)
|
|
451
|
+
contract = state.get("acceptance_contract", {})
|
|
452
|
+
if contract.get("status") != "ready":
|
|
453
|
+
raise PlanCloseError("acceptance-contract-not-ready", "Cannot close plan until the Acceptance Contract is ready.")
|
|
454
|
+
current_fingerprint = criteria_fingerprint(contract.get("criteria") or {})
|
|
455
|
+
if contract.get("fingerprint") != current_fingerprint:
|
|
456
|
+
raise PlanCloseError(
|
|
457
|
+
"acceptance-fingerprint-stale",
|
|
458
|
+
"Cannot close plan because the Acceptance Contract fingerprint is stale. Re-run `acceptance-set`.",
|
|
459
|
+
{"current_fingerprint": current_fingerprint, "recorded_fingerprint": contract.get("fingerprint")},
|
|
460
|
+
)
|
|
461
|
+
open_defects = [defect for defect in state.get("defects", []) if defect.get("status") == "open"]
|
|
462
|
+
if open_defects:
|
|
463
|
+
defects = "\n".join(
|
|
464
|
+
f"- {defect['id']} ({defect['severity']}): {defect['summary']}" for defect in open_defects
|
|
465
|
+
)
|
|
466
|
+
raise PlanCloseError(
|
|
467
|
+
"open-defects",
|
|
468
|
+
"Cannot close plan with unresolved defects. Run `defect-resolve`, re-run validation, and score again.",
|
|
469
|
+
{"open_defects": open_defects, "defects_text": defects},
|
|
470
|
+
)
|
|
471
|
+
quality = state.get("quality_result", {})
|
|
472
|
+
if state.get("implementation_dirty_after_score"):
|
|
473
|
+
raise PlanCloseError(
|
|
474
|
+
"quality-result-stale",
|
|
475
|
+
"Cannot close plan because plan state changed after the last quality score. Re-run `quality-score`.",
|
|
476
|
+
{"dirty_reasons": state.get("dirty_reasons", [])},
|
|
477
|
+
)
|
|
478
|
+
if quality.get("status") != "pass":
|
|
479
|
+
raise PlanCloseError(
|
|
480
|
+
"quality-result-not-passing",
|
|
481
|
+
"Cannot close plan until the quality result passes. "
|
|
482
|
+
"Run `quality-score`, fix any `## Rework Required` items, then score again.",
|
|
483
|
+
{"quality_status": quality.get("status")},
|
|
484
|
+
)
|
|
485
|
+
if quality.get("criteria_fingerprint") != current_fingerprint:
|
|
486
|
+
raise PlanCloseError(
|
|
487
|
+
"quality-fingerprint-stale",
|
|
488
|
+
"Cannot close plan because the quality result was scored against a stale Acceptance Contract fingerprint.",
|
|
489
|
+
{"quality_fingerprint": quality.get("criteria_fingerprint"), "current_fingerprint": current_fingerprint},
|
|
490
|
+
)
|
|
491
|
+
return quality
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
def plan_placeholder_issues(plan_text):
|
|
495
|
+
issues = []
|
|
496
|
+
for placeholder in PLAN_PLACEHOLDERS:
|
|
497
|
+
if placeholder in plan_text:
|
|
498
|
+
issues.append(placeholder)
|
|
499
|
+
return issues
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def assert_plan_placeholders_resolved(plan_text):
|
|
503
|
+
placeholders = plan_placeholder_issues(plan_text)
|
|
504
|
+
if placeholders:
|
|
505
|
+
raise PlanCloseError(
|
|
506
|
+
"plan-placeholders-unresolved",
|
|
507
|
+
"Cannot close plan with unresolved starter placeholders:\n"
|
|
508
|
+
+ "\n".join(f"- {placeholder}" for placeholder in placeholders)
|
|
509
|
+
+ "\nReplace generic Scope, Constraints, Steps, and Validation text with task-specific content before closing.",
|
|
510
|
+
{"placeholders": placeholders},
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def active_plan_dir(repo):
|
|
515
|
+
return repo / "docs" / "exec-plans" / "active"
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def completed_plan_dir(repo):
|
|
519
|
+
return repo / "docs" / "exec-plans" / "completed"
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
def plan_path_from_arg(repo, plan_arg):
|
|
523
|
+
raw_plan = Path(plan_arg)
|
|
524
|
+
if raw_plan.is_absolute():
|
|
525
|
+
plan_path = raw_plan.resolve()
|
|
526
|
+
else:
|
|
527
|
+
plan_path = (repo / raw_plan).resolve()
|
|
528
|
+
|
|
529
|
+
try:
|
|
530
|
+
relative_plan = str(plan_path.relative_to(repo.resolve()))
|
|
531
|
+
except ValueError as error:
|
|
532
|
+
raise ValueError(f"Plan must be inside repo: {plan_arg}") from error
|
|
533
|
+
|
|
534
|
+
if not plan_path.exists():
|
|
535
|
+
raise FileNotFoundError(f"Plan not found: {plan_path}")
|
|
536
|
+
|
|
537
|
+
return plan_path, relative_plan
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def create_plan(repo, slug, goal):
|
|
541
|
+
plan_dir = active_plan_dir(repo)
|
|
542
|
+
plan_dir.mkdir(parents=True, exist_ok=True)
|
|
543
|
+
filename = f"{datetime.now(UTC).strftime('%Y-%m-%d')}-{slugify(slug)}.md"
|
|
544
|
+
plan_path = plan_dir / filename
|
|
545
|
+
if plan_path.exists():
|
|
546
|
+
raise FileExistsError(f"Plan already exists: {plan_path}")
|
|
547
|
+
title = slug.replace("-", " ").strip() or "task"
|
|
548
|
+
content = PLAN_TEMPLATE.format(
|
|
549
|
+
title=title.title(),
|
|
550
|
+
goal=goal,
|
|
551
|
+
defect_section=DEFAULT_DEFECT_PLACEHOLDER,
|
|
552
|
+
knowledge_section="- [ ] Add durable facts here as they emerge -> <destination-doc>",
|
|
553
|
+
)
|
|
554
|
+
plan_path.write_text(content)
|
|
555
|
+
state = new_plan_state(plan_path, goal)
|
|
556
|
+
save_plan_state(plan_path, state)
|
|
557
|
+
sync_plan_markdown_from_state(plan_path, state)
|
|
558
|
+
return plan_path
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def set_acceptance_contract(plan_path, criteria):
|
|
562
|
+
from .knowledge import clean_fact_text
|
|
563
|
+
state = require_plan_state(plan_path)
|
|
564
|
+
normalized = {key: clean_fact_text(criteria.get(key) or "") for key, _ in QUALITY_DIMENSIONS}
|
|
565
|
+
issues = specific_acceptance_issues(normalized)
|
|
566
|
+
if issues:
|
|
567
|
+
return {
|
|
568
|
+
"status": "fail",
|
|
569
|
+
"reason": "acceptance-criteria-not-specific",
|
|
570
|
+
"message": "acceptance-set requires concrete, task-specific criteria for every dimension.",
|
|
571
|
+
"issues": issues,
|
|
572
|
+
}
|
|
573
|
+
fingerprint = criteria_fingerprint(normalized)
|
|
574
|
+
state["acceptance_contract"] = {
|
|
575
|
+
"status": "ready",
|
|
576
|
+
"criteria": normalized,
|
|
577
|
+
"fingerprint": fingerprint,
|
|
578
|
+
}
|
|
579
|
+
if state.get("quality_result", {}).get("status") in {"pass", "fail"}:
|
|
580
|
+
state["implementation_dirty_after_score"] = True
|
|
581
|
+
reasons = state.setdefault("dirty_reasons", [])
|
|
582
|
+
if "acceptance-contract-changed" not in reasons:
|
|
583
|
+
reasons.append("acceptance-contract-changed")
|
|
584
|
+
state["quality_result"]["status"] = "pending"
|
|
585
|
+
save_plan_state(plan_path, state)
|
|
586
|
+
sync_plan_markdown_from_state(plan_path, state)
|
|
587
|
+
return {"status": "ready", "criteria_fingerprint": fingerprint}
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def close_plan(repo, plan_relative_path, summary, force):
|
|
591
|
+
from .continuation import assert_phase_continuity_closed, update_workstreams_after_plan_close
|
|
592
|
+
from .knowledge import extract_knowledge_items, mark_knowledge_items_closed
|
|
593
|
+
plan_path, active_relative_path = plan_path_from_arg(repo, plan_relative_path)
|
|
594
|
+
text = plan_path.read_text()
|
|
595
|
+
if not force:
|
|
596
|
+
assert_plan_placeholders_resolved(text)
|
|
597
|
+
assert_quality_gate_passed(plan_path, text)
|
|
598
|
+
assert_phase_continuity_closed(repo, plan_path, text)
|
|
599
|
+
open_items = [
|
|
600
|
+
item
|
|
601
|
+
for item in extract_knowledge_items(text)
|
|
602
|
+
if item.startswith("- [ ]") and item != DEFAULT_KNOWLEDGE_PLACEHOLDER
|
|
603
|
+
]
|
|
604
|
+
if open_items and not force:
|
|
605
|
+
raise PlanCloseError(
|
|
606
|
+
"open-durable-knowledge",
|
|
607
|
+
"Cannot close plan with unresolved durable knowledge items.",
|
|
608
|
+
{"open_items": open_items},
|
|
609
|
+
)
|
|
610
|
+
updated_text = replace_completion_notes(mark_knowledge_items_closed(text), summary)
|
|
611
|
+
state = load_plan_state(plan_path)
|
|
612
|
+
if state is not None:
|
|
613
|
+
state = sync_state_from_markdown(plan_path, state)
|
|
614
|
+
completed_dir = completed_plan_dir(repo)
|
|
615
|
+
completed_dir.mkdir(parents=True, exist_ok=True)
|
|
616
|
+
destination = completed_dir / plan_path.name
|
|
617
|
+
destination.write_text(updated_text)
|
|
618
|
+
sidecar = sidecar_path_for_plan(plan_path)
|
|
619
|
+
destination_sidecar = sidecar_path_for_plan(destination)
|
|
620
|
+
if sidecar.exists():
|
|
621
|
+
if state is not None:
|
|
622
|
+
state["markdown_path"] = str(destination)
|
|
623
|
+
state["updated_at"] = utc_now_iso()
|
|
624
|
+
sidecar.write_text(json.dumps(state, indent=2, ensure_ascii=False) + "\n")
|
|
625
|
+
shutil.move(str(sidecar), str(destination_sidecar))
|
|
626
|
+
plan_path.unlink()
|
|
627
|
+
completed_relative_path = str(destination.relative_to(repo))
|
|
628
|
+
update_workstreams_after_plan_close(repo, active_relative_path, completed_relative_path)
|
|
629
|
+
return destination, open_items
|
|
630
|
+
|