@hallucination-studio/harness-engine 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +185 -27
  3. package/bin/install.js +29 -17
  4. package/package.json +10 -4
  5. package/skills/harness-engine/SKILL.md +97 -0
  6. package/skills/harness-engine/agents/openai.yaml +4 -0
  7. package/skills/harness-engine/evals/cases.json +94 -0
  8. package/skills/harness-engine/evals/harness_engine_evals/__init__.py +1 -0
  9. package/skills/harness-engine/evals/harness_engine_evals/cases_frontend.py +211 -0
  10. package/skills/harness-engine/evals/harness_engine_evals/cases_lifecycle.py +1616 -0
  11. package/skills/harness-engine/evals/harness_engine_evals/helpers.py +155 -0
  12. package/skills/harness-engine/evals/harness_engine_evals/registry.py +55 -0
  13. package/skills/harness-engine/evals/harness_engine_evals/report.py +36 -0
  14. package/skills/harness-engine/evals/harness_engine_evals/runner.py +53 -0
  15. package/skills/harness-engine/evals/run_evals.py +14 -0
  16. package/skills/{harness-repo-bootstrap → harness-engine}/references/evaluation-loop.md +8 -2
  17. package/skills/harness-engine/references/evidence-first-evals.md +187 -0
  18. package/skills/harness-engine/references/exec-plans.md +59 -0
  19. package/skills/{harness-repo-bootstrap → harness-engine}/references/file-map.md +3 -3
  20. package/skills/{harness-repo-bootstrap → harness-engine}/references/knowledge-capture.md +2 -2
  21. package/skills/{harness-repo-bootstrap → harness-engine}/references/sop-index.md +3 -0
  22. package/skills/harness-engine/references/template-policy.md +17 -0
  23. package/skills/harness-engine/references/workflow.md +62 -0
  24. package/skills/harness-engine/scripts/harness_engine/__init__.py +1 -0
  25. package/skills/harness-engine/scripts/harness_engine/analysis.py +240 -0
  26. package/skills/harness-engine/scripts/harness_engine/checks.py +287 -0
  27. package/skills/harness-engine/scripts/harness_engine/cli.py +656 -0
  28. package/skills/harness-engine/scripts/harness_engine/common.py +977 -0
  29. package/skills/harness-engine/scripts/harness_engine/continuation.py +520 -0
  30. package/skills/harness-engine/scripts/harness_engine/git_ops.py +88 -0
  31. package/skills/harness-engine/scripts/harness_engine/knowledge.py +329 -0
  32. package/skills/harness-engine/scripts/harness_engine/plans.py +630 -0
  33. package/skills/harness-engine/scripts/harness_engine/templates.py +124 -0
  34. package/skills/harness-engine/scripts/manage_harness.py +14 -0
  35. package/skills/harness-repo-bootstrap/SKILL.md +0 -68
  36. package/skills/harness-repo-bootstrap/agents/openai.yaml +0 -4
  37. package/skills/harness-repo-bootstrap/evals/cases.json +0 -18
  38. package/skills/harness-repo-bootstrap/evals/run_evals.py +0 -337
  39. package/skills/harness-repo-bootstrap/references/exec-plans.md +0 -39
  40. package/skills/harness-repo-bootstrap/references/template-policy.md +0 -12
  41. package/skills/harness-repo-bootstrap/references/workflow.md +0 -47
  42. package/skills/harness-repo-bootstrap/scripts/manage_harness.py +0 -1181
  43. /package/skills/{harness-repo-bootstrap → harness-engine}/assets/repo-template/.keep +0 -0
  44. /package/skills/{harness-repo-bootstrap → harness-engine}/assets/sops/.keep +0 -0
  45. /package/skills/{harness-repo-bootstrap → harness-engine}/references/question-catalog.md +0 -0
@@ -0,0 +1,329 @@
1
+ from .common import *
2
+ from .plans import find_section, mark_state_dirty, open_defects_for_plan, replace_section, utc_now_iso
3
+ from .templates import ensure_parent
4
+
5
+ def extract_knowledge_items(text):
6
+ lines = text.splitlines()
7
+ section_index = find_section(lines, "## Durable Knowledge To Capture")
8
+ if section_index is None:
9
+ return []
10
+ items = []
11
+ for line in lines[section_index + 1 :]:
12
+ if line.startswith("## "):
13
+ break
14
+ stripped = line.strip()
15
+ if stripped.startswith("- ["):
16
+ items.append(stripped)
17
+ return items
18
+
19
+
20
+ def extract_defect_items(text):
21
+ lines = text.splitlines()
22
+ section_index = find_section(lines, "## Defects To Resolve")
23
+ if section_index is None:
24
+ return []
25
+ items = []
26
+ for line in lines[section_index + 1 :]:
27
+ if line.startswith("## "):
28
+ break
29
+ stripped = line.strip()
30
+ if stripped.startswith("- ["):
31
+ items.append(stripped)
32
+ return items
33
+
34
+
35
+ def knowledge_id_for(fact, destination):
36
+ digest = hashlib.sha1(f"{clean_destination_text(destination)}\0{clean_fact_text(fact)}".encode()).hexdigest()
37
+ return f"hk-{digest[:10]}"
38
+
39
+
40
+ def defect_id_for(summary):
41
+ digest = hashlib.sha1(clean_fact_text(summary).encode()).hexdigest()
42
+ return f"bug-{digest[:10]}"
43
+
44
+
45
+ def parse_knowledge_item(item):
46
+ match = re.match(
47
+ r"- \[(?P<status>[ xX])\]\s+"
48
+ r"(?:\[(?:id|kid):(?P<id>[A-Za-z0-9_.:-]+)\]\s+)?"
49
+ r"(?P<fact>.*?)\s+->\s+"
50
+ r"(?P<destination>[^|]+?)"
51
+ r"(?:\s+\|\s+evidence:\s+(?P<evidence>.+))?$",
52
+ item.strip(),
53
+ )
54
+ if not match:
55
+ return None
56
+ return {
57
+ "status": "closed" if match.group("status").lower() == "x" else "open",
58
+ "id": match.group("id"),
59
+ "fact": clean_fact_text(match.group("fact")),
60
+ "destination": clean_destination_text(match.group("destination")),
61
+ "evidence": clean_fact_text(match.group("evidence")) if match.group("evidence") else None,
62
+ "raw": item,
63
+ }
64
+
65
+
66
+ def parse_defect_item(item):
67
+ match = re.match(
68
+ r"- \[(?P<status>[ xX])\]\s+"
69
+ r"(?:\[(?:id|bug):(?P<id>[A-Za-z0-9_.:-]+)\]\s+)?"
70
+ r"\[(?P<severity>P[0-3])\]\s+"
71
+ r"(?P<summary>.*?)"
72
+ r"(?:\s+\|\s+evidence:\s+(?P<evidence>.*?))?"
73
+ r"(?:\s+\|\s+fix:\s+(?P<fix>.+))?$",
74
+ item.strip(),
75
+ )
76
+ if not match:
77
+ return None
78
+ return {
79
+ "status": "closed" if match.group("status").lower() == "x" else "open",
80
+ "id": match.group("id") or defect_id_for(match.group("summary")),
81
+ "severity": match.group("severity"),
82
+ "summary": clean_fact_text(match.group("summary")),
83
+ "evidence": clean_fact_text(match.group("evidence")) if match.group("evidence") else None,
84
+ "fix": clean_fact_text(match.group("fix")) if match.group("fix") else None,
85
+ "raw": item,
86
+ }
87
+
88
+
89
+ def clean_fact_text(value):
90
+ cleaned = value.strip()
91
+ cleaned = cleaned.replace("`", "")
92
+ cleaned = re.sub(r"\s+", " ", cleaned)
93
+ return cleaned.strip()
94
+
95
+
96
+ def clean_destination_text(value):
97
+ return value.strip().strip("`")
98
+
99
+
100
+ def append_knowledge_item(plan_path, fact, destination):
101
+ text = plan_path.read_text()
102
+ lines = text.splitlines()
103
+ section_index = find_section(lines, "## Durable Knowledge To Capture")
104
+ if section_index is None:
105
+ raise ValueError("Plan is missing '## Durable Knowledge To Capture'")
106
+ filtered_lines = [line for line in lines if line.strip() != DEFAULT_KNOWLEDGE_PLACEHOLDER]
107
+ insert_index = section_index + 1
108
+ while insert_index < len(filtered_lines) and not filtered_lines[insert_index].startswith("## "):
109
+ insert_index += 1
110
+ item_id = knowledge_id_for(fact, destination)
111
+ item = f"- [ ] [id:{item_id}] {fact} -> {destination}"
112
+ updated_lines = filtered_lines[:insert_index] + [item] + filtered_lines[insert_index:]
113
+ plan_path.write_text("\n".join(updated_lines).rstrip() + "\n")
114
+ mark_state_dirty(plan_path, "knowledge-item-logged")
115
+ return item, item_id
116
+
117
+
118
+ def render_open_defect_rework(open_defects):
119
+ lines = ["- Resolve all open defects, then re-run validation and `quality-score`."]
120
+ for defect in open_defects:
121
+ evidence = f" Evidence: {defect['evidence']}." if defect.get("evidence") else ""
122
+ lines.append(f"- Resolve {defect['id']} ({defect['severity']}): {defect['summary']}.{evidence}")
123
+ return "\n".join(lines)
124
+
125
+
126
+ def mark_quality_gate_blocked_by_defects(text):
127
+ open_defects = open_defects_for_plan(text)
128
+ if not open_defects:
129
+ return text
130
+ lines = text.splitlines()
131
+ section_index = find_section(lines, "## Quality Result")
132
+ if section_index is None:
133
+ gate_text = "\n".join(
134
+ [
135
+ "Status: fail",
136
+ "Minimum score: 8.0",
137
+ "Average score: pending",
138
+ f"Last scored: {utc_now_iso()}",
139
+ "Criteria fingerprint: pending",
140
+ "",
141
+ "Blocked by unresolved defects. Run `defect-resolve`, re-run validation, then run `quality-score`.",
142
+ ]
143
+ )
144
+ text = replace_section(text, "Quality Result", gate_text)
145
+ else:
146
+ end_index = len(lines)
147
+ for index in range(section_index + 1, len(lines)):
148
+ if lines[index].startswith("## "):
149
+ end_index = index
150
+ break
151
+ section_lines = lines[section_index + 1 : end_index]
152
+ has_status = False
153
+ updated_section = []
154
+ for line in section_lines:
155
+ if line.startswith("Status:"):
156
+ updated_section.append("Status: pending")
157
+ has_status = True
158
+ elif line.startswith("Last scored:"):
159
+ updated_section.append(f"Last scored: {utc_now_iso()}")
160
+ else:
161
+ updated_section.append(line)
162
+ if not has_status:
163
+ updated_section.insert(0, "Status: pending")
164
+ lines = lines[: section_index + 1] + updated_section + lines[end_index:]
165
+ text = "\n".join(lines).rstrip() + "\n"
166
+ return replace_section(text, "Rework Required", render_open_defect_rework(open_defects))
167
+
168
+
169
+ def append_defect_item(plan_path, severity, summary, evidence=None):
170
+ text = plan_path.read_text()
171
+ if find_section(text.splitlines(), "## Defects To Resolve") is None:
172
+ text = replace_section(text, "Defects To Resolve", DEFAULT_DEFECT_PLACEHOLDER)
173
+ lines = text.splitlines()
174
+ section_index = find_section(lines, "## Defects To Resolve")
175
+ if section_index is None:
176
+ raise ValueError("Plan is missing '## Defects To Resolve'")
177
+ filtered_lines = [line for line in lines if line.strip() != DEFAULT_DEFECT_PLACEHOLDER]
178
+ insert_index = section_index + 1
179
+ while insert_index < len(filtered_lines) and not filtered_lines[insert_index].startswith("## "):
180
+ insert_index += 1
181
+ item_id = defect_id_for(summary)
182
+ safe_summary = clean_fact_text(summary)
183
+ safe_evidence = clean_fact_text(evidence) if evidence else None
184
+ item = f"- [ ] [bug:{item_id}] [{severity}] {safe_summary}"
185
+ if safe_evidence:
186
+ item = f"{item} | evidence: {safe_evidence}"
187
+ updated_lines = filtered_lines[:insert_index] + [item] + filtered_lines[insert_index:]
188
+ plan_path.write_text(mark_quality_gate_blocked_by_defects("\n".join(updated_lines).rstrip() + "\n"))
189
+ mark_state_dirty(plan_path, "defect-logged")
190
+ return item, item_id
191
+
192
+
193
+ def close_defect_line(line, fix_evidence):
194
+ updated = line.replace("- [ ]", "- [x]", 1)
195
+ if "| fix:" not in updated:
196
+ updated = f"{updated} | fix: {fix_evidence}"
197
+ return updated
198
+
199
+
200
+ def mark_defect_resolved(plan_path, defect_id, fix_evidence):
201
+ if not defect_id:
202
+ raise ValueError("Provide --id to resolve a defect")
203
+ if not fix_evidence:
204
+ raise ValueError("Provide --fix-evidence or --fix-evidence-file to resolve a defect")
205
+ lines = plan_path.read_text().splitlines()
206
+ safe_fix = clean_fact_text(fix_evidence)
207
+ replaced = False
208
+ updated = []
209
+ for line in lines:
210
+ stripped = line.strip()
211
+ parsed = parse_defect_item(stripped)
212
+ if parsed and parsed["status"] == "open" and parsed["id"] == defect_id and not replaced:
213
+ updated.append(close_defect_line(line, safe_fix))
214
+ replaced = True
215
+ else:
216
+ updated.append(line)
217
+ if not replaced:
218
+ raise ValueError(f"Open defect not found for id: {defect_id}")
219
+ text = "\n".join(updated).rstrip() + "\n"
220
+ open_defects = open_defects_for_plan(text)
221
+ if open_defects:
222
+ text = replace_section(text, "Rework Required", render_open_defect_rework(open_defects))
223
+ else:
224
+ text = replace_section(
225
+ text,
226
+ "Rework Required",
227
+ "Defects resolved. Re-run validation and `quality-score` before closing.",
228
+ )
229
+ plan_path.write_text(text)
230
+ mark_state_dirty(plan_path, "defect-resolved")
231
+
232
+
233
+ def mark_knowledge_items_closed(text):
234
+ lines = text.splitlines()
235
+ updated = []
236
+ in_knowledge_section = False
237
+ for line in lines:
238
+ if line.startswith("## "):
239
+ in_knowledge_section = line.strip().lower() == "## durable knowledge to capture"
240
+ if in_knowledge_section and line.strip().startswith("- [ ]") and line.strip() != DEFAULT_KNOWLEDGE_PLACEHOLDER:
241
+ updated.append(line.replace("- [ ]", "- [x]", 1))
242
+ else:
243
+ updated.append(line)
244
+ return "\n".join(updated).rstrip() + "\n"
245
+
246
+
247
+ def destination_contains_fact(repo, destination, fact):
248
+ target = repo / destination
249
+ if not target.exists() or not target.is_file():
250
+ return False
251
+ try:
252
+ return normalize_fact_for_match(fact) in normalize_fact_for_match(target.read_text())
253
+ except UnicodeDecodeError:
254
+ return False
255
+
256
+
257
+ def normalize_fact_for_match(value):
258
+ normalized = value.replace("`", "")
259
+ normalized = re.sub(r"\s+", " ", normalized)
260
+ normalized = normalized.strip()
261
+ normalized = re.sub(r"[.。]+$", "", normalized)
262
+ return normalized
263
+
264
+
265
+ def append_fact_to_destination(repo, destination, fact):
266
+ target = repo / destination
267
+ ensure_parent(target)
268
+ existing = ""
269
+ if target.exists():
270
+ existing = target.read_text()
271
+ separator = "\n" if existing.endswith("\n") or not existing else "\n\n"
272
+ target.write_text(existing + separator + fact + "\n")
273
+
274
+
275
+ def close_knowledge_line(line, evidence=None):
276
+ updated = line.replace("- [ ]", "- [x]", 1)
277
+ if evidence and "| evidence:" not in updated:
278
+ updated = f"{updated} | evidence: {evidence}"
279
+ return updated
280
+
281
+
282
+ def mark_single_knowledge_item_written(
283
+ repo,
284
+ plan_path,
285
+ fact_text=None,
286
+ destination=None,
287
+ append=False,
288
+ knowledge_id=None,
289
+ evidence=None,
290
+ ):
291
+ if not fact_text and not knowledge_id:
292
+ raise ValueError("Provide either --id or --fact to mark knowledge as written")
293
+ lines = plan_path.read_text().splitlines()
294
+ target = clean_fact_text(fact_text) if fact_text else None
295
+ target_destination = clean_destination_text(destination) if destination else None
296
+ target_evidence = clean_fact_text(evidence) if evidence else None
297
+ replaced = False
298
+ updated = []
299
+ for line in lines:
300
+ stripped = line.strip()
301
+ parsed = parse_knowledge_item(stripped)
302
+ if not parsed:
303
+ updated.append(line)
304
+ continue
305
+ destination_matches = target_destination is None or parsed["destination"] == target_destination
306
+ fact_matches = target is not None and normalize_fact_for_match(target) == normalize_fact_for_match(parsed["fact"])
307
+ id_matches = knowledge_id is not None and parsed["id"] == knowledge_id
308
+ if stripped.startswith("- [ ]") and (id_matches or fact_matches) and destination_matches and not replaced:
309
+ parsed_destination = parsed["destination"]
310
+ if not parsed_destination:
311
+ raise ValueError("Destination is required to verify durable knowledge")
312
+ verification_text = target_evidence or target or parsed["fact"]
313
+ if not destination_contains_fact(repo, parsed_destination, verification_text):
314
+ if append:
315
+ append_fact_to_destination(repo, parsed_destination, verification_text)
316
+ else:
317
+ raise ValueError(
318
+ f"Destination {parsed_destination} does not contain verification text: {verification_text}. "
319
+ "Write it there first, pass --evidence with text present in the doc, or re-run with --append."
320
+ )
321
+ updated.append(close_knowledge_line(line, evidence=target_evidence))
322
+ replaced = True
323
+ else:
324
+ updated.append(line)
325
+ if not replaced:
326
+ target_description = f"id: {knowledge_id}" if knowledge_id else f"fact: {fact_text}"
327
+ raise ValueError(f"Open knowledge item not found for {target_description}")
328
+ plan_path.write_text("\n".join(updated).rstrip() + "\n")
329
+ mark_state_dirty(plan_path, "knowledge-item-written")