@hallucination-studio/harness-engine 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -27
- package/bin/install.js +29 -17
- package/package.json +10 -4
- package/skills/harness-engine/SKILL.md +97 -0
- package/skills/harness-engine/agents/openai.yaml +4 -0
- package/skills/harness-engine/evals/cases.json +94 -0
- package/skills/harness-engine/evals/harness_engine_evals/__init__.py +1 -0
- package/skills/harness-engine/evals/harness_engine_evals/cases_frontend.py +211 -0
- package/skills/harness-engine/evals/harness_engine_evals/cases_lifecycle.py +1616 -0
- package/skills/harness-engine/evals/harness_engine_evals/helpers.py +155 -0
- package/skills/harness-engine/evals/harness_engine_evals/registry.py +55 -0
- package/skills/harness-engine/evals/harness_engine_evals/report.py +36 -0
- package/skills/harness-engine/evals/harness_engine_evals/runner.py +53 -0
- package/skills/harness-engine/evals/run_evals.py +14 -0
- package/skills/{harness-repo-bootstrap → harness-engine}/references/evaluation-loop.md +8 -2
- package/skills/harness-engine/references/evidence-first-evals.md +187 -0
- package/skills/harness-engine/references/exec-plans.md +59 -0
- package/skills/{harness-repo-bootstrap → harness-engine}/references/file-map.md +3 -3
- package/skills/{harness-repo-bootstrap → harness-engine}/references/knowledge-capture.md +2 -2
- package/skills/{harness-repo-bootstrap → harness-engine}/references/sop-index.md +3 -0
- package/skills/harness-engine/references/template-policy.md +17 -0
- package/skills/harness-engine/references/workflow.md +62 -0
- package/skills/harness-engine/scripts/harness_engine/__init__.py +1 -0
- package/skills/harness-engine/scripts/harness_engine/analysis.py +240 -0
- package/skills/harness-engine/scripts/harness_engine/checks.py +287 -0
- package/skills/harness-engine/scripts/harness_engine/cli.py +656 -0
- package/skills/harness-engine/scripts/harness_engine/common.py +977 -0
- package/skills/harness-engine/scripts/harness_engine/continuation.py +520 -0
- package/skills/harness-engine/scripts/harness_engine/git_ops.py +88 -0
- package/skills/harness-engine/scripts/harness_engine/knowledge.py +329 -0
- package/skills/harness-engine/scripts/harness_engine/plans.py +630 -0
- package/skills/harness-engine/scripts/harness_engine/templates.py +124 -0
- package/skills/harness-engine/scripts/manage_harness.py +14 -0
- package/skills/harness-repo-bootstrap/SKILL.md +0 -68
- package/skills/harness-repo-bootstrap/agents/openai.yaml +0 -4
- package/skills/harness-repo-bootstrap/evals/cases.json +0 -18
- package/skills/harness-repo-bootstrap/evals/run_evals.py +0 -337
- package/skills/harness-repo-bootstrap/references/exec-plans.md +0 -39
- package/skills/harness-repo-bootstrap/references/template-policy.md +0 -12
- package/skills/harness-repo-bootstrap/references/workflow.md +0 -47
- package/skills/harness-repo-bootstrap/scripts/manage_harness.py +0 -1181
- /package/skills/{harness-repo-bootstrap → harness-engine}/assets/repo-template/.keep +0 -0
- /package/skills/{harness-repo-bootstrap → harness-engine}/assets/sops/.keep +0 -0
- /package/skills/{harness-repo-bootstrap → harness-engine}/references/question-catalog.md +0 -0
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
from .common import *
|
|
2
|
+
from .plans import find_section, mark_state_dirty, open_defects_for_plan, replace_section, utc_now_iso
|
|
3
|
+
from .templates import ensure_parent
|
|
4
|
+
|
|
5
|
+
def extract_knowledge_items(text):
|
|
6
|
+
lines = text.splitlines()
|
|
7
|
+
section_index = find_section(lines, "## Durable Knowledge To Capture")
|
|
8
|
+
if section_index is None:
|
|
9
|
+
return []
|
|
10
|
+
items = []
|
|
11
|
+
for line in lines[section_index + 1 :]:
|
|
12
|
+
if line.startswith("## "):
|
|
13
|
+
break
|
|
14
|
+
stripped = line.strip()
|
|
15
|
+
if stripped.startswith("- ["):
|
|
16
|
+
items.append(stripped)
|
|
17
|
+
return items
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def extract_defect_items(text):
|
|
21
|
+
lines = text.splitlines()
|
|
22
|
+
section_index = find_section(lines, "## Defects To Resolve")
|
|
23
|
+
if section_index is None:
|
|
24
|
+
return []
|
|
25
|
+
items = []
|
|
26
|
+
for line in lines[section_index + 1 :]:
|
|
27
|
+
if line.startswith("## "):
|
|
28
|
+
break
|
|
29
|
+
stripped = line.strip()
|
|
30
|
+
if stripped.startswith("- ["):
|
|
31
|
+
items.append(stripped)
|
|
32
|
+
return items
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def knowledge_id_for(fact, destination):
|
|
36
|
+
digest = hashlib.sha1(f"{clean_destination_text(destination)}\0{clean_fact_text(fact)}".encode()).hexdigest()
|
|
37
|
+
return f"hk-{digest[:10]}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def defect_id_for(summary):
|
|
41
|
+
digest = hashlib.sha1(clean_fact_text(summary).encode()).hexdigest()
|
|
42
|
+
return f"bug-{digest[:10]}"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def parse_knowledge_item(item):
|
|
46
|
+
match = re.match(
|
|
47
|
+
r"- \[(?P<status>[ xX])\]\s+"
|
|
48
|
+
r"(?:\[(?:id|kid):(?P<id>[A-Za-z0-9_.:-]+)\]\s+)?"
|
|
49
|
+
r"(?P<fact>.*?)\s+->\s+"
|
|
50
|
+
r"(?P<destination>[^|]+?)"
|
|
51
|
+
r"(?:\s+\|\s+evidence:\s+(?P<evidence>.+))?$",
|
|
52
|
+
item.strip(),
|
|
53
|
+
)
|
|
54
|
+
if not match:
|
|
55
|
+
return None
|
|
56
|
+
return {
|
|
57
|
+
"status": "closed" if match.group("status").lower() == "x" else "open",
|
|
58
|
+
"id": match.group("id"),
|
|
59
|
+
"fact": clean_fact_text(match.group("fact")),
|
|
60
|
+
"destination": clean_destination_text(match.group("destination")),
|
|
61
|
+
"evidence": clean_fact_text(match.group("evidence")) if match.group("evidence") else None,
|
|
62
|
+
"raw": item,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def parse_defect_item(item):
|
|
67
|
+
match = re.match(
|
|
68
|
+
r"- \[(?P<status>[ xX])\]\s+"
|
|
69
|
+
r"(?:\[(?:id|bug):(?P<id>[A-Za-z0-9_.:-]+)\]\s+)?"
|
|
70
|
+
r"\[(?P<severity>P[0-3])\]\s+"
|
|
71
|
+
r"(?P<summary>.*?)"
|
|
72
|
+
r"(?:\s+\|\s+evidence:\s+(?P<evidence>.*?))?"
|
|
73
|
+
r"(?:\s+\|\s+fix:\s+(?P<fix>.+))?$",
|
|
74
|
+
item.strip(),
|
|
75
|
+
)
|
|
76
|
+
if not match:
|
|
77
|
+
return None
|
|
78
|
+
return {
|
|
79
|
+
"status": "closed" if match.group("status").lower() == "x" else "open",
|
|
80
|
+
"id": match.group("id") or defect_id_for(match.group("summary")),
|
|
81
|
+
"severity": match.group("severity"),
|
|
82
|
+
"summary": clean_fact_text(match.group("summary")),
|
|
83
|
+
"evidence": clean_fact_text(match.group("evidence")) if match.group("evidence") else None,
|
|
84
|
+
"fix": clean_fact_text(match.group("fix")) if match.group("fix") else None,
|
|
85
|
+
"raw": item,
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def clean_fact_text(value):
|
|
90
|
+
cleaned = value.strip()
|
|
91
|
+
cleaned = cleaned.replace("`", "")
|
|
92
|
+
cleaned = re.sub(r"\s+", " ", cleaned)
|
|
93
|
+
return cleaned.strip()
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def clean_destination_text(value):
|
|
97
|
+
return value.strip().strip("`")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def append_knowledge_item(plan_path, fact, destination):
|
|
101
|
+
text = plan_path.read_text()
|
|
102
|
+
lines = text.splitlines()
|
|
103
|
+
section_index = find_section(lines, "## Durable Knowledge To Capture")
|
|
104
|
+
if section_index is None:
|
|
105
|
+
raise ValueError("Plan is missing '## Durable Knowledge To Capture'")
|
|
106
|
+
filtered_lines = [line for line in lines if line.strip() != DEFAULT_KNOWLEDGE_PLACEHOLDER]
|
|
107
|
+
insert_index = section_index + 1
|
|
108
|
+
while insert_index < len(filtered_lines) and not filtered_lines[insert_index].startswith("## "):
|
|
109
|
+
insert_index += 1
|
|
110
|
+
item_id = knowledge_id_for(fact, destination)
|
|
111
|
+
item = f"- [ ] [id:{item_id}] {fact} -> {destination}"
|
|
112
|
+
updated_lines = filtered_lines[:insert_index] + [item] + filtered_lines[insert_index:]
|
|
113
|
+
plan_path.write_text("\n".join(updated_lines).rstrip() + "\n")
|
|
114
|
+
mark_state_dirty(plan_path, "knowledge-item-logged")
|
|
115
|
+
return item, item_id
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def render_open_defect_rework(open_defects):
|
|
119
|
+
lines = ["- Resolve all open defects, then re-run validation and `quality-score`."]
|
|
120
|
+
for defect in open_defects:
|
|
121
|
+
evidence = f" Evidence: {defect['evidence']}." if defect.get("evidence") else ""
|
|
122
|
+
lines.append(f"- Resolve {defect['id']} ({defect['severity']}): {defect['summary']}.{evidence}")
|
|
123
|
+
return "\n".join(lines)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def mark_quality_gate_blocked_by_defects(text):
|
|
127
|
+
open_defects = open_defects_for_plan(text)
|
|
128
|
+
if not open_defects:
|
|
129
|
+
return text
|
|
130
|
+
lines = text.splitlines()
|
|
131
|
+
section_index = find_section(lines, "## Quality Result")
|
|
132
|
+
if section_index is None:
|
|
133
|
+
gate_text = "\n".join(
|
|
134
|
+
[
|
|
135
|
+
"Status: fail",
|
|
136
|
+
"Minimum score: 8.0",
|
|
137
|
+
"Average score: pending",
|
|
138
|
+
f"Last scored: {utc_now_iso()}",
|
|
139
|
+
"Criteria fingerprint: pending",
|
|
140
|
+
"",
|
|
141
|
+
"Blocked by unresolved defects. Run `defect-resolve`, re-run validation, then run `quality-score`.",
|
|
142
|
+
]
|
|
143
|
+
)
|
|
144
|
+
text = replace_section(text, "Quality Result", gate_text)
|
|
145
|
+
else:
|
|
146
|
+
end_index = len(lines)
|
|
147
|
+
for index in range(section_index + 1, len(lines)):
|
|
148
|
+
if lines[index].startswith("## "):
|
|
149
|
+
end_index = index
|
|
150
|
+
break
|
|
151
|
+
section_lines = lines[section_index + 1 : end_index]
|
|
152
|
+
has_status = False
|
|
153
|
+
updated_section = []
|
|
154
|
+
for line in section_lines:
|
|
155
|
+
if line.startswith("Status:"):
|
|
156
|
+
updated_section.append("Status: pending")
|
|
157
|
+
has_status = True
|
|
158
|
+
elif line.startswith("Last scored:"):
|
|
159
|
+
updated_section.append(f"Last scored: {utc_now_iso()}")
|
|
160
|
+
else:
|
|
161
|
+
updated_section.append(line)
|
|
162
|
+
if not has_status:
|
|
163
|
+
updated_section.insert(0, "Status: pending")
|
|
164
|
+
lines = lines[: section_index + 1] + updated_section + lines[end_index:]
|
|
165
|
+
text = "\n".join(lines).rstrip() + "\n"
|
|
166
|
+
return replace_section(text, "Rework Required", render_open_defect_rework(open_defects))
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def append_defect_item(plan_path, severity, summary, evidence=None):
|
|
170
|
+
text = plan_path.read_text()
|
|
171
|
+
if find_section(text.splitlines(), "## Defects To Resolve") is None:
|
|
172
|
+
text = replace_section(text, "Defects To Resolve", DEFAULT_DEFECT_PLACEHOLDER)
|
|
173
|
+
lines = text.splitlines()
|
|
174
|
+
section_index = find_section(lines, "## Defects To Resolve")
|
|
175
|
+
if section_index is None:
|
|
176
|
+
raise ValueError("Plan is missing '## Defects To Resolve'")
|
|
177
|
+
filtered_lines = [line for line in lines if line.strip() != DEFAULT_DEFECT_PLACEHOLDER]
|
|
178
|
+
insert_index = section_index + 1
|
|
179
|
+
while insert_index < len(filtered_lines) and not filtered_lines[insert_index].startswith("## "):
|
|
180
|
+
insert_index += 1
|
|
181
|
+
item_id = defect_id_for(summary)
|
|
182
|
+
safe_summary = clean_fact_text(summary)
|
|
183
|
+
safe_evidence = clean_fact_text(evidence) if evidence else None
|
|
184
|
+
item = f"- [ ] [bug:{item_id}] [{severity}] {safe_summary}"
|
|
185
|
+
if safe_evidence:
|
|
186
|
+
item = f"{item} | evidence: {safe_evidence}"
|
|
187
|
+
updated_lines = filtered_lines[:insert_index] + [item] + filtered_lines[insert_index:]
|
|
188
|
+
plan_path.write_text(mark_quality_gate_blocked_by_defects("\n".join(updated_lines).rstrip() + "\n"))
|
|
189
|
+
mark_state_dirty(plan_path, "defect-logged")
|
|
190
|
+
return item, item_id
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def close_defect_line(line, fix_evidence):
|
|
194
|
+
updated = line.replace("- [ ]", "- [x]", 1)
|
|
195
|
+
if "| fix:" not in updated:
|
|
196
|
+
updated = f"{updated} | fix: {fix_evidence}"
|
|
197
|
+
return updated
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def mark_defect_resolved(plan_path, defect_id, fix_evidence):
|
|
201
|
+
if not defect_id:
|
|
202
|
+
raise ValueError("Provide --id to resolve a defect")
|
|
203
|
+
if not fix_evidence:
|
|
204
|
+
raise ValueError("Provide --fix-evidence or --fix-evidence-file to resolve a defect")
|
|
205
|
+
lines = plan_path.read_text().splitlines()
|
|
206
|
+
safe_fix = clean_fact_text(fix_evidence)
|
|
207
|
+
replaced = False
|
|
208
|
+
updated = []
|
|
209
|
+
for line in lines:
|
|
210
|
+
stripped = line.strip()
|
|
211
|
+
parsed = parse_defect_item(stripped)
|
|
212
|
+
if parsed and parsed["status"] == "open" and parsed["id"] == defect_id and not replaced:
|
|
213
|
+
updated.append(close_defect_line(line, safe_fix))
|
|
214
|
+
replaced = True
|
|
215
|
+
else:
|
|
216
|
+
updated.append(line)
|
|
217
|
+
if not replaced:
|
|
218
|
+
raise ValueError(f"Open defect not found for id: {defect_id}")
|
|
219
|
+
text = "\n".join(updated).rstrip() + "\n"
|
|
220
|
+
open_defects = open_defects_for_plan(text)
|
|
221
|
+
if open_defects:
|
|
222
|
+
text = replace_section(text, "Rework Required", render_open_defect_rework(open_defects))
|
|
223
|
+
else:
|
|
224
|
+
text = replace_section(
|
|
225
|
+
text,
|
|
226
|
+
"Rework Required",
|
|
227
|
+
"Defects resolved. Re-run validation and `quality-score` before closing.",
|
|
228
|
+
)
|
|
229
|
+
plan_path.write_text(text)
|
|
230
|
+
mark_state_dirty(plan_path, "defect-resolved")
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def mark_knowledge_items_closed(text):
|
|
234
|
+
lines = text.splitlines()
|
|
235
|
+
updated = []
|
|
236
|
+
in_knowledge_section = False
|
|
237
|
+
for line in lines:
|
|
238
|
+
if line.startswith("## "):
|
|
239
|
+
in_knowledge_section = line.strip().lower() == "## durable knowledge to capture"
|
|
240
|
+
if in_knowledge_section and line.strip().startswith("- [ ]") and line.strip() != DEFAULT_KNOWLEDGE_PLACEHOLDER:
|
|
241
|
+
updated.append(line.replace("- [ ]", "- [x]", 1))
|
|
242
|
+
else:
|
|
243
|
+
updated.append(line)
|
|
244
|
+
return "\n".join(updated).rstrip() + "\n"
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def destination_contains_fact(repo, destination, fact):
|
|
248
|
+
target = repo / destination
|
|
249
|
+
if not target.exists() or not target.is_file():
|
|
250
|
+
return False
|
|
251
|
+
try:
|
|
252
|
+
return normalize_fact_for_match(fact) in normalize_fact_for_match(target.read_text())
|
|
253
|
+
except UnicodeDecodeError:
|
|
254
|
+
return False
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def normalize_fact_for_match(value):
|
|
258
|
+
normalized = value.replace("`", "")
|
|
259
|
+
normalized = re.sub(r"\s+", " ", normalized)
|
|
260
|
+
normalized = normalized.strip()
|
|
261
|
+
normalized = re.sub(r"[.。]+$", "", normalized)
|
|
262
|
+
return normalized
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def append_fact_to_destination(repo, destination, fact):
|
|
266
|
+
target = repo / destination
|
|
267
|
+
ensure_parent(target)
|
|
268
|
+
existing = ""
|
|
269
|
+
if target.exists():
|
|
270
|
+
existing = target.read_text()
|
|
271
|
+
separator = "\n" if existing.endswith("\n") or not existing else "\n\n"
|
|
272
|
+
target.write_text(existing + separator + fact + "\n")
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
def close_knowledge_line(line, evidence=None):
|
|
276
|
+
updated = line.replace("- [ ]", "- [x]", 1)
|
|
277
|
+
if evidence and "| evidence:" not in updated:
|
|
278
|
+
updated = f"{updated} | evidence: {evidence}"
|
|
279
|
+
return updated
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def mark_single_knowledge_item_written(
|
|
283
|
+
repo,
|
|
284
|
+
plan_path,
|
|
285
|
+
fact_text=None,
|
|
286
|
+
destination=None,
|
|
287
|
+
append=False,
|
|
288
|
+
knowledge_id=None,
|
|
289
|
+
evidence=None,
|
|
290
|
+
):
|
|
291
|
+
if not fact_text and not knowledge_id:
|
|
292
|
+
raise ValueError("Provide either --id or --fact to mark knowledge as written")
|
|
293
|
+
lines = plan_path.read_text().splitlines()
|
|
294
|
+
target = clean_fact_text(fact_text) if fact_text else None
|
|
295
|
+
target_destination = clean_destination_text(destination) if destination else None
|
|
296
|
+
target_evidence = clean_fact_text(evidence) if evidence else None
|
|
297
|
+
replaced = False
|
|
298
|
+
updated = []
|
|
299
|
+
for line in lines:
|
|
300
|
+
stripped = line.strip()
|
|
301
|
+
parsed = parse_knowledge_item(stripped)
|
|
302
|
+
if not parsed:
|
|
303
|
+
updated.append(line)
|
|
304
|
+
continue
|
|
305
|
+
destination_matches = target_destination is None or parsed["destination"] == target_destination
|
|
306
|
+
fact_matches = target is not None and normalize_fact_for_match(target) == normalize_fact_for_match(parsed["fact"])
|
|
307
|
+
id_matches = knowledge_id is not None and parsed["id"] == knowledge_id
|
|
308
|
+
if stripped.startswith("- [ ]") and (id_matches or fact_matches) and destination_matches and not replaced:
|
|
309
|
+
parsed_destination = parsed["destination"]
|
|
310
|
+
if not parsed_destination:
|
|
311
|
+
raise ValueError("Destination is required to verify durable knowledge")
|
|
312
|
+
verification_text = target_evidence or target or parsed["fact"]
|
|
313
|
+
if not destination_contains_fact(repo, parsed_destination, verification_text):
|
|
314
|
+
if append:
|
|
315
|
+
append_fact_to_destination(repo, parsed_destination, verification_text)
|
|
316
|
+
else:
|
|
317
|
+
raise ValueError(
|
|
318
|
+
f"Destination {parsed_destination} does not contain verification text: {verification_text}. "
|
|
319
|
+
"Write it there first, pass --evidence with text present in the doc, or re-run with --append."
|
|
320
|
+
)
|
|
321
|
+
updated.append(close_knowledge_line(line, evidence=target_evidence))
|
|
322
|
+
replaced = True
|
|
323
|
+
else:
|
|
324
|
+
updated.append(line)
|
|
325
|
+
if not replaced:
|
|
326
|
+
target_description = f"id: {knowledge_id}" if knowledge_id else f"fact: {fact_text}"
|
|
327
|
+
raise ValueError(f"Open knowledge item not found for {target_description}")
|
|
328
|
+
plan_path.write_text("\n".join(updated).rstrip() + "\n")
|
|
329
|
+
mark_state_dirty(plan_path, "knowledge-item-written")
|