@hallucination-studio/harness-engine 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +185 -27
- package/bin/install.js +29 -17
- package/package.json +10 -4
- package/skills/harness-engine/SKILL.md +97 -0
- package/skills/harness-engine/agents/openai.yaml +4 -0
- package/skills/harness-engine/evals/cases.json +94 -0
- package/skills/harness-engine/evals/harness_engine_evals/__init__.py +1 -0
- package/skills/harness-engine/evals/harness_engine_evals/cases_frontend.py +211 -0
- package/skills/harness-engine/evals/harness_engine_evals/cases_lifecycle.py +1616 -0
- package/skills/harness-engine/evals/harness_engine_evals/helpers.py +155 -0
- package/skills/harness-engine/evals/harness_engine_evals/registry.py +55 -0
- package/skills/harness-engine/evals/harness_engine_evals/report.py +36 -0
- package/skills/harness-engine/evals/harness_engine_evals/runner.py +53 -0
- package/skills/harness-engine/evals/run_evals.py +14 -0
- package/skills/{harness-repo-bootstrap → harness-engine}/references/evaluation-loop.md +8 -2
- package/skills/harness-engine/references/evidence-first-evals.md +187 -0
- package/skills/harness-engine/references/exec-plans.md +59 -0
- package/skills/{harness-repo-bootstrap → harness-engine}/references/file-map.md +3 -3
- package/skills/{harness-repo-bootstrap → harness-engine}/references/knowledge-capture.md +2 -2
- package/skills/{harness-repo-bootstrap → harness-engine}/references/sop-index.md +3 -0
- package/skills/harness-engine/references/template-policy.md +17 -0
- package/skills/harness-engine/references/workflow.md +62 -0
- package/skills/harness-engine/scripts/harness_engine/__init__.py +1 -0
- package/skills/harness-engine/scripts/harness_engine/analysis.py +240 -0
- package/skills/harness-engine/scripts/harness_engine/checks.py +287 -0
- package/skills/harness-engine/scripts/harness_engine/cli.py +656 -0
- package/skills/harness-engine/scripts/harness_engine/common.py +977 -0
- package/skills/harness-engine/scripts/harness_engine/continuation.py +520 -0
- package/skills/harness-engine/scripts/harness_engine/git_ops.py +88 -0
- package/skills/harness-engine/scripts/harness_engine/knowledge.py +329 -0
- package/skills/harness-engine/scripts/harness_engine/plans.py +630 -0
- package/skills/harness-engine/scripts/harness_engine/templates.py +124 -0
- package/skills/harness-engine/scripts/manage_harness.py +14 -0
- package/skills/harness-repo-bootstrap/SKILL.md +0 -68
- package/skills/harness-repo-bootstrap/agents/openai.yaml +0 -4
- package/skills/harness-repo-bootstrap/evals/cases.json +0 -18
- package/skills/harness-repo-bootstrap/evals/run_evals.py +0 -337
- package/skills/harness-repo-bootstrap/references/exec-plans.md +0 -39
- package/skills/harness-repo-bootstrap/references/template-policy.md +0 -12
- package/skills/harness-repo-bootstrap/references/workflow.md +0 -47
- package/skills/harness-repo-bootstrap/scripts/manage_harness.py +0 -1181
- /package/skills/{harness-repo-bootstrap → harness-engine}/assets/repo-template/.keep +0 -0
- /package/skills/{harness-repo-bootstrap → harness-engine}/assets/sops/.keep +0 -0
- /package/skills/{harness-repo-bootstrap → harness-engine}/references/question-catalog.md +0 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Workflow
|
|
2
|
+
|
|
3
|
+
Use this skill in two passes.
|
|
4
|
+
|
|
5
|
+
## Pass 1: Analyze and Confirm
|
|
6
|
+
|
|
7
|
+
Run `analyze` before editing repository docs.
|
|
8
|
+
|
|
9
|
+
Ask the human only about facts that cannot be derived safely from the repo, especially:
|
|
10
|
+
|
|
11
|
+
- product domain and top-level outcomes
|
|
12
|
+
- intended users or operators
|
|
13
|
+
- production reliability expectations
|
|
14
|
+
- security or compliance constraints
|
|
15
|
+
- frontend experience bar
|
|
16
|
+
- canonical external references worth pinning inside `docs/references/`
|
|
17
|
+
|
|
18
|
+
Do not ask for facts that can be inferred from source layout, dependency manifests, or existing docs.
|
|
19
|
+
|
|
20
|
+
Also inspect the analysis for:
|
|
21
|
+
|
|
22
|
+
- missing durable knowledge that should be written during the task
|
|
23
|
+
- missing execution-plan state
|
|
24
|
+
- which SOPs should be referenced in the generated router docs
|
|
25
|
+
|
|
26
|
+
## Pass 2: Init
|
|
27
|
+
|
|
28
|
+
Run `sample-answers`, fill the answers, then run `init`.
|
|
29
|
+
|
|
30
|
+
Use `init` for both first-time adoption and managed-harness reconciliation. It creates a new harness when none exists, and refreshes managed harness files plus backfills newly introduced managed files when an existing managed harness is detected. Unmanaged user files are preserved unless `--force` is explicitly used.
|
|
31
|
+
|
|
32
|
+
After the script runs, read the generated docs once and tighten weak generic phrases before handing off.
|
|
33
|
+
|
|
34
|
+
## Ongoing Use
|
|
35
|
+
|
|
36
|
+
After the scaffold exists:
|
|
37
|
+
|
|
38
|
+
- treat harness commands as Codex's execution interface, not as steps the user must manually run
|
|
39
|
+
- translate user intent like "complete this", "continue this later", "pause until X", "stop this", or "defer this" into the matching continuation decision yourself
|
|
40
|
+
- read `docs/exec-plans/workstreams.md` before resuming interrupted or long-running work
|
|
41
|
+
- create or reuse an execution plan before any repository-mutating work, including code, docs, configuration, tests, dependencies, build/release scripts, generated templates, runtime behavior, migrations, cleanup, or review fixes
|
|
42
|
+
- use `plan-start` instead of creating plan files manually when possible
|
|
43
|
+
- use `acceptance-set` before implementation so the active plan has a ready, task-specific Acceptance Contract
|
|
44
|
+
- log durable facts during execution instead of waiting until the end
|
|
45
|
+
- follow the matching SOP for architecture, UI, observability, or knowledge capture work
|
|
46
|
+
- route repository-mutating requests through Harness Task Intake in `AGENTS.md`; route product, frontend, backend, architecture, data/state, security, performance, and reliability issues through the Issue Workflows branch when the request is a bug or regression
|
|
47
|
+
- encode durable knowledge back into the repository before closing the task
|
|
48
|
+
- mark logged knowledge items as written after updating the permanent docs; the `knowledge-mark-written` evidence must be exact text already present in the destination doc, not a paraphrase
|
|
49
|
+
- log every defect found by tests, evals, browser validation, or code review with `defect-log`
|
|
50
|
+
- resolve logged defects only after fixing the implementation and citing passing validation with `defect-resolve`
|
|
51
|
+
- run `quality-score` after implementation and validation, with evidence notes for every dimension tied to the ready Acceptance Contract
|
|
52
|
+
- if `quality-score` fails, implement the `## Rework Required` items and score again
|
|
53
|
+
- use `continuation-set` before every `plan-close`; `continue` and `pause` update the workstream ledger automatically after required fields validate
|
|
54
|
+
- do not ask the user to invoke `continuation-set`, `plan-close`, or `check`; run them and summarize blocked reasons or successful state changes
|
|
55
|
+
- use `clean` when local skill installs or generated evidence need cleanup or were already committed; review dry-run output first, then apply, commit, and push the staged removals
|
|
56
|
+
- use `plan-close` to verify no durable knowledge is left stranded in the active plan
|
|
57
|
+
- before `plan-close`, replace generic plan placeholders with task-specific scope, constraints, steps, validation, and completion notes; delete unused ad hoc durable-knowledge TODOs
|
|
58
|
+
- run the installed manager `check` command before handoff; active plans require ready Acceptance Contracts, while completed plans require passing Quality Results
|
|
59
|
+
- preview stale generated evidence with `evidence-prune` when `docs/generated/` contains old screenshots, DOM dumps, layout summaries, or smoke outputs; review the dry-run output before using `--apply`
|
|
60
|
+
- do not add CI to the target repository unless the human explicitly asks for it
|
|
61
|
+
|
|
62
|
+
No-plan exceptions are limited to pure question answering, read-only investigation, showing command output, or status reporting with no file changes. If files will change, enter the plan lifecycle first.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Harness Engine Python runtime package."""
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from .common import *
|
|
2
|
+
from .templates import is_harness_owned_text
|
|
3
|
+
|
|
4
|
+
def detect_languages(files):
|
|
5
|
+
ext_map = {}
|
|
6
|
+
for file_path in files:
|
|
7
|
+
suffix = Path(file_path).suffix.lower()
|
|
8
|
+
if suffix:
|
|
9
|
+
ext_map[suffix] = ext_map.get(suffix, 0) + 1
|
|
10
|
+
mapping = {
|
|
11
|
+
".js": "JavaScript",
|
|
12
|
+
".jsx": "JavaScript",
|
|
13
|
+
".ts": "TypeScript",
|
|
14
|
+
".tsx": "TypeScript",
|
|
15
|
+
".sh": "Shell",
|
|
16
|
+
".bash": "Shell",
|
|
17
|
+
".zsh": "Shell",
|
|
18
|
+
".py": "Python",
|
|
19
|
+
".rb": "Ruby",
|
|
20
|
+
".go": "Go",
|
|
21
|
+
".rs": "Rust",
|
|
22
|
+
".java": "Java",
|
|
23
|
+
".kt": "Kotlin",
|
|
24
|
+
".swift": "Swift",
|
|
25
|
+
}
|
|
26
|
+
languages = []
|
|
27
|
+
for ext, language in mapping.items():
|
|
28
|
+
if ext in ext_map and language not in languages:
|
|
29
|
+
languages.append(language)
|
|
30
|
+
return languages
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def read_json_if_exists(path):
|
|
34
|
+
if not path.exists():
|
|
35
|
+
return None
|
|
36
|
+
try:
|
|
37
|
+
return json.loads(path.read_text())
|
|
38
|
+
except json.JSONDecodeError:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def detect_frameworks(repo):
|
|
43
|
+
frameworks = []
|
|
44
|
+
package_json = read_json_if_exists(repo / "package.json")
|
|
45
|
+
if package_json:
|
|
46
|
+
deps = {}
|
|
47
|
+
deps.update(package_json.get("dependencies", {}))
|
|
48
|
+
deps.update(package_json.get("devDependencies", {}))
|
|
49
|
+
dep_names = set(deps.keys())
|
|
50
|
+
known = {
|
|
51
|
+
"next": "Next.js",
|
|
52
|
+
"react": "React",
|
|
53
|
+
"vue": "Vue",
|
|
54
|
+
"svelte": "Svelte",
|
|
55
|
+
"vite": "Vite",
|
|
56
|
+
"express": "Express",
|
|
57
|
+
"nestjs": "NestJS",
|
|
58
|
+
}
|
|
59
|
+
for key, label in known.items():
|
|
60
|
+
if key in dep_names and label not in frameworks:
|
|
61
|
+
frameworks.append(label)
|
|
62
|
+
if (repo / "pyproject.toml").exists():
|
|
63
|
+
text = (repo / "pyproject.toml").read_text()
|
|
64
|
+
if "fastapi" in text.lower():
|
|
65
|
+
frameworks.append("FastAPI")
|
|
66
|
+
if "django" in text.lower():
|
|
67
|
+
frameworks.append("Django")
|
|
68
|
+
if "flask" in text.lower():
|
|
69
|
+
frameworks.append("Flask")
|
|
70
|
+
return frameworks
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def detect_package_managers(repo):
|
|
74
|
+
package_managers = []
|
|
75
|
+
markers = {
|
|
76
|
+
"package-lock.json": "npm",
|
|
77
|
+
"pnpm-lock.yaml": "pnpm",
|
|
78
|
+
"yarn.lock": "yarn",
|
|
79
|
+
"bun.lockb": "bun",
|
|
80
|
+
"pyproject.toml": "uv/pip",
|
|
81
|
+
"requirements.txt": "pip",
|
|
82
|
+
"go.mod": "go",
|
|
83
|
+
"Cargo.toml": "cargo",
|
|
84
|
+
}
|
|
85
|
+
for marker, label in markers.items():
|
|
86
|
+
if (repo / marker).exists():
|
|
87
|
+
package_managers.append(label)
|
|
88
|
+
return package_managers
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def detect_frontend_style_files(files):
|
|
92
|
+
style_files = []
|
|
93
|
+
style_markers = (
|
|
94
|
+
".css",
|
|
95
|
+
".scss",
|
|
96
|
+
".sass",
|
|
97
|
+
".less",
|
|
98
|
+
"tailwind.config.js",
|
|
99
|
+
"tailwind.config.ts",
|
|
100
|
+
"postcss.config.js",
|
|
101
|
+
"postcss.config.ts",
|
|
102
|
+
"theme.js",
|
|
103
|
+
"theme.ts",
|
|
104
|
+
"tokens.js",
|
|
105
|
+
"tokens.ts",
|
|
106
|
+
"tokens.json",
|
|
107
|
+
)
|
|
108
|
+
path_keywords = (
|
|
109
|
+
"/styles/",
|
|
110
|
+
"/style/",
|
|
111
|
+
"/theme/",
|
|
112
|
+
"/themes/",
|
|
113
|
+
"/tokens/",
|
|
114
|
+
"/components/",
|
|
115
|
+
"/ui/",
|
|
116
|
+
"/app/",
|
|
117
|
+
"/src/",
|
|
118
|
+
)
|
|
119
|
+
for file_path in files:
|
|
120
|
+
lower = file_path.lower()
|
|
121
|
+
if lower.endswith(style_markers) or any(keyword in f"/{lower}" for keyword in path_keywords):
|
|
122
|
+
if lower.endswith((".js", ".jsx", ".ts", ".tsx", ".css", ".scss", ".sass", ".less", ".json")):
|
|
123
|
+
style_files.append(file_path)
|
|
124
|
+
return sorted(dict.fromkeys(style_files))[:20]
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def list_repo_files(repo):
|
|
128
|
+
ignored = {".git", ".codex", "node_modules", ".next", "dist", "build", "__pycache__"}
|
|
129
|
+
results = []
|
|
130
|
+
for root, dirs, files in os.walk(repo):
|
|
131
|
+
dirs[:] = [d for d in dirs if d not in ignored]
|
|
132
|
+
for filename in files:
|
|
133
|
+
path = Path(root, filename)
|
|
134
|
+
results.append(str(path.relative_to(repo)))
|
|
135
|
+
return sorted(results)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def detect_existing_managed_files(repo):
|
|
139
|
+
managed = []
|
|
140
|
+
for relative_path in list(ROOT_FILES.keys()) + list(DOC_FILES.keys()) + list(FRONTEND_DOC_FILES.keys()):
|
|
141
|
+
path = repo / relative_path
|
|
142
|
+
if path.exists():
|
|
143
|
+
try:
|
|
144
|
+
if is_harness_owned_text(path.read_text()):
|
|
145
|
+
managed.append(relative_path)
|
|
146
|
+
except UnicodeDecodeError:
|
|
147
|
+
continue
|
|
148
|
+
return managed
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def analyze_repo(repo):
|
|
152
|
+
files = list_repo_files(repo)
|
|
153
|
+
languages = detect_languages(files)
|
|
154
|
+
frameworks = detect_frameworks(repo)
|
|
155
|
+
package_managers = detect_package_managers(repo)
|
|
156
|
+
has_frontend = any(name in frameworks for name in ["Next.js", "React", "Vue", "Svelte", "Vite"]) or any(
|
|
157
|
+
file.endswith((".tsx", ".jsx", ".css", ".scss")) for file in files
|
|
158
|
+
)
|
|
159
|
+
frontend_style_files = detect_frontend_style_files(files) if has_frontend else []
|
|
160
|
+
existing_managed = detect_existing_managed_files(repo)
|
|
161
|
+
existing_harness = [
|
|
162
|
+
file for file in ["AGENTS.md", "ARCHITECTURE.md", "docs/PLANS.md", "docs/SECURITY.md"] if (repo / file).exists()
|
|
163
|
+
]
|
|
164
|
+
missing_exec_plan_state = [
|
|
165
|
+
path
|
|
166
|
+
for path in [
|
|
167
|
+
"docs/exec-plans/active/README.md",
|
|
168
|
+
"docs/exec-plans/active/_template.md",
|
|
169
|
+
"docs/exec-plans/completed/README.md",
|
|
170
|
+
]
|
|
171
|
+
if not (repo / path).exists()
|
|
172
|
+
]
|
|
173
|
+
missing_sops = [
|
|
174
|
+
path
|
|
175
|
+
for path in [
|
|
176
|
+
"docs/sops/layered-domain-architecture-setup.md",
|
|
177
|
+
"docs/sops/encode-unseen-knowledge.md",
|
|
178
|
+
"docs/sops/local-observability-feedback-loop.md",
|
|
179
|
+
"docs/sops/chrome-devtools-ui-validation-loop.md",
|
|
180
|
+
"docs/sops/evidence-first-eval-loop.md",
|
|
181
|
+
]
|
|
182
|
+
if not (repo / path).exists()
|
|
183
|
+
]
|
|
184
|
+
durable_knowledge_targets = [
|
|
185
|
+
"ARCHITECTURE.md",
|
|
186
|
+
"docs/product-specs/",
|
|
187
|
+
"docs/RELIABILITY.md",
|
|
188
|
+
"docs/SECURITY.md",
|
|
189
|
+
"docs/references/",
|
|
190
|
+
]
|
|
191
|
+
if has_frontend:
|
|
192
|
+
durable_knowledge_targets.insert(2, "docs/design-docs/")
|
|
193
|
+
|
|
194
|
+
inferred_answers = {
|
|
195
|
+
"project_name": repo.name,
|
|
196
|
+
"languages": languages,
|
|
197
|
+
"frameworks": frameworks,
|
|
198
|
+
"package_managers": package_managers,
|
|
199
|
+
"frontend_scope": (
|
|
200
|
+
"A frontend surface likely exists."
|
|
201
|
+
if has_frontend
|
|
202
|
+
else "No obvious frontend surface detected from the repository."
|
|
203
|
+
),
|
|
204
|
+
"frontend_style_files": frontend_style_files,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
human_confirmations = []
|
|
208
|
+
for question in QUESTION_CATALOG:
|
|
209
|
+
if question["id"] in {"frontend_stack_notes", "design_style_direction"} and not has_frontend:
|
|
210
|
+
continue
|
|
211
|
+
human_confirmations.append(question)
|
|
212
|
+
|
|
213
|
+
analysis = {
|
|
214
|
+
"project_name": repo.name,
|
|
215
|
+
"repo_path": str(repo.resolve()),
|
|
216
|
+
"languages": languages,
|
|
217
|
+
"frameworks": frameworks,
|
|
218
|
+
"package_managers": package_managers,
|
|
219
|
+
"has_frontend": has_frontend,
|
|
220
|
+
"frontend_style_files": frontend_style_files,
|
|
221
|
+
"inferred_answers": inferred_answers,
|
|
222
|
+
"existing_harness_files": existing_harness,
|
|
223
|
+
"existing_managed_files": existing_managed,
|
|
224
|
+
"missing_exec_plan_state": missing_exec_plan_state,
|
|
225
|
+
"missing_sops": missing_sops,
|
|
226
|
+
"durable_knowledge_targets": durable_knowledge_targets,
|
|
227
|
+
"human_confirmations": human_confirmations,
|
|
228
|
+
"harness_state": "existing" if existing_harness or existing_managed else "new",
|
|
229
|
+
"recommended_action": "init",
|
|
230
|
+
"notes": [
|
|
231
|
+
"Ask the human only the confirmations that the repository cannot answer safely.",
|
|
232
|
+
"If unmanaged harness files already exist, preserve them unless the human explicitly requests replacement.",
|
|
233
|
+
"Create execution-plan state before expecting agents to keep repository-mutating work synchronized.",
|
|
234
|
+
"Use SOPs to turn recurring architecture, UI, observability, and knowledge-capture work into mechanical loops.",
|
|
235
|
+
"Write durable facts into permanent docs instead of leaving them trapped inside plans or chat history.",
|
|
236
|
+
],
|
|
237
|
+
}
|
|
238
|
+
return analysis
|
|
239
|
+
|
|
240
|
+
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
from .common import *
|
|
2
|
+
from .templates import DEFAULT_KNOWLEDGE_PLACEHOLDER, is_managed_text
|
|
3
|
+
from .plans import active_plan_dir, completed_plan_dir, load_plan_state, sync_state_from_markdown, specific_acceptance_issues, criteria_fingerprint, find_section
|
|
4
|
+
from .knowledge import extract_knowledge_items, parse_knowledge_item, destination_contains_fact
|
|
5
|
+
from .plans import open_defects_for_plan
|
|
6
|
+
from .continuation import phase_continuity_issues, continuation_decision_issues, workstreams_path
|
|
7
|
+
|
|
8
|
+
def check_harness(repo):
|
|
9
|
+
required_files = [
|
|
10
|
+
"AGENTS.md",
|
|
11
|
+
"ARCHITECTURE.md",
|
|
12
|
+
"docs/PLANS.md",
|
|
13
|
+
"docs/QUALITY_SCORE.md",
|
|
14
|
+
"docs/RELIABILITY.md",
|
|
15
|
+
"docs/SECURITY.md",
|
|
16
|
+
"docs/exec-plans/workstreams.md",
|
|
17
|
+
"docs/exec-plans/active/README.md",
|
|
18
|
+
"docs/exec-plans/active/_template.md",
|
|
19
|
+
"docs/exec-plans/completed/README.md",
|
|
20
|
+
"docs/sops/encode-unseen-knowledge.md",
|
|
21
|
+
]
|
|
22
|
+
issues = []
|
|
23
|
+
for relative_path in required_files:
|
|
24
|
+
if not (repo / relative_path).exists():
|
|
25
|
+
issues.append(
|
|
26
|
+
{
|
|
27
|
+
"severity": "error",
|
|
28
|
+
"code": "missing-required-file",
|
|
29
|
+
"path": relative_path,
|
|
30
|
+
"message": f"Required harness file is missing: {relative_path}",
|
|
31
|
+
}
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
active_dir = active_plan_dir(repo)
|
|
35
|
+
if active_dir.exists():
|
|
36
|
+
for plan_path in sorted(active_dir.glob("*.md")):
|
|
37
|
+
if plan_path.name in {"README.md", "_template.md"}:
|
|
38
|
+
continue
|
|
39
|
+
relative_plan = str(plan_path.relative_to(repo))
|
|
40
|
+
plan_text = plan_path.read_text()
|
|
41
|
+
state = load_plan_state(plan_path)
|
|
42
|
+
if state is None:
|
|
43
|
+
issues.append(
|
|
44
|
+
{
|
|
45
|
+
"severity": "error",
|
|
46
|
+
"code": "missing-plan-sidecar",
|
|
47
|
+
"path": relative_plan,
|
|
48
|
+
"message": "Active plan is missing structured JSON sidecar metadata. Run migration or recreate it with plan-start.",
|
|
49
|
+
}
|
|
50
|
+
)
|
|
51
|
+
else:
|
|
52
|
+
state = sync_state_from_markdown(plan_path, state)
|
|
53
|
+
contract = state.get("acceptance_contract", {})
|
|
54
|
+
criteria = contract.get("criteria") or {}
|
|
55
|
+
acceptance_issues = specific_acceptance_issues(criteria)
|
|
56
|
+
current_fingerprint = criteria_fingerprint(criteria)
|
|
57
|
+
if contract.get("status") != "ready" or acceptance_issues:
|
|
58
|
+
issues.append(
|
|
59
|
+
{
|
|
60
|
+
"severity": "error",
|
|
61
|
+
"code": "acceptance-contract-not-ready",
|
|
62
|
+
"path": relative_plan,
|
|
63
|
+
"message": "Active plan must have a ready, task-specific Acceptance Contract before implementation.",
|
|
64
|
+
"acceptance_issues": acceptance_issues,
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
elif contract.get("fingerprint") != current_fingerprint:
|
|
68
|
+
issues.append(
|
|
69
|
+
{
|
|
70
|
+
"severity": "error",
|
|
71
|
+
"code": "acceptance-fingerprint-stale",
|
|
72
|
+
"path": relative_plan,
|
|
73
|
+
"message": "Active plan Acceptance Contract fingerprint does not match current criteria.",
|
|
74
|
+
}
|
|
75
|
+
)
|
|
76
|
+
if find_section(plan_text.splitlines(), "## Acceptance Contract") is None:
|
|
77
|
+
issues.append(
|
|
78
|
+
{
|
|
79
|
+
"severity": "error",
|
|
80
|
+
"code": "missing-acceptance-contract",
|
|
81
|
+
"path": relative_plan,
|
|
82
|
+
"message": "Active plan is missing an Acceptance Contract section.",
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
if find_section(plan_text.splitlines(), "## Quality Result") is None:
|
|
86
|
+
issues.append(
|
|
87
|
+
{
|
|
88
|
+
"severity": "error",
|
|
89
|
+
"code": "missing-quality-result",
|
|
90
|
+
"path": relative_plan,
|
|
91
|
+
"message": "Active plan is missing a Quality Result section.",
|
|
92
|
+
}
|
|
93
|
+
)
|
|
94
|
+
for defect in open_defects_for_plan(plan_text):
|
|
95
|
+
issues.append(
|
|
96
|
+
{
|
|
97
|
+
"severity": "error",
|
|
98
|
+
"code": "open-defect",
|
|
99
|
+
"path": relative_plan,
|
|
100
|
+
"id": defect["id"],
|
|
101
|
+
"defect_severity": defect["severity"],
|
|
102
|
+
"message": f"Active plan has an unresolved defect: {defect['summary']}",
|
|
103
|
+
}
|
|
104
|
+
)
|
|
105
|
+
issues.extend(phase_continuity_issues(repo, plan_path, plan_text))
|
|
106
|
+
for item in extract_knowledge_items(plan_text):
|
|
107
|
+
if item == DEFAULT_KNOWLEDGE_PLACEHOLDER:
|
|
108
|
+
continue
|
|
109
|
+
parsed = parse_knowledge_item(item)
|
|
110
|
+
if not parsed:
|
|
111
|
+
issues.append(
|
|
112
|
+
{
|
|
113
|
+
"severity": "error",
|
|
114
|
+
"code": "unparseable-knowledge-item",
|
|
115
|
+
"path": relative_plan,
|
|
116
|
+
"message": f"Knowledge item is not parseable: {item}",
|
|
117
|
+
}
|
|
118
|
+
)
|
|
119
|
+
continue
|
|
120
|
+
if parsed["status"] == "open":
|
|
121
|
+
issues.append(
|
|
122
|
+
{
|
|
123
|
+
"severity": "error",
|
|
124
|
+
"code": "open-durable-knowledge",
|
|
125
|
+
"path": relative_plan,
|
|
126
|
+
"destination": parsed["destination"],
|
|
127
|
+
"message": f"Durable knowledge is still open: {parsed['fact']}",
|
|
128
|
+
}
|
|
129
|
+
)
|
|
130
|
+
else:
|
|
131
|
+
verification_text = parsed["evidence"] or parsed["fact"]
|
|
132
|
+
if destination_contains_fact(repo, parsed["destination"], verification_text):
|
|
133
|
+
continue
|
|
134
|
+
issues.append(
|
|
135
|
+
{
|
|
136
|
+
"severity": "error",
|
|
137
|
+
"code": "missing-written-knowledge",
|
|
138
|
+
"path": relative_plan,
|
|
139
|
+
"destination": parsed["destination"],
|
|
140
|
+
"message": f"Marked knowledge evidence is missing from destination: {verification_text}",
|
|
141
|
+
}
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
completed_dir = completed_plan_dir(repo)
|
|
145
|
+
if completed_dir.exists():
|
|
146
|
+
for plan_path in sorted(completed_dir.glob("*.md")):
|
|
147
|
+
if plan_path.name == "README.md":
|
|
148
|
+
continue
|
|
149
|
+
relative_plan = str(plan_path.relative_to(repo))
|
|
150
|
+
state = load_plan_state(plan_path)
|
|
151
|
+
if state is None:
|
|
152
|
+
issues.append(
|
|
153
|
+
{
|
|
154
|
+
"severity": "error",
|
|
155
|
+
"code": "missing-plan-sidecar",
|
|
156
|
+
"path": relative_plan,
|
|
157
|
+
"message": "Completed plan is missing structured JSON sidecar metadata. Run migration or recreate structured plan history.",
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
continue
|
|
161
|
+
state = sync_state_from_markdown(plan_path, state)
|
|
162
|
+
contract = state.get("acceptance_contract", {})
|
|
163
|
+
criteria = contract.get("criteria") or {}
|
|
164
|
+
current_fingerprint = criteria_fingerprint(criteria)
|
|
165
|
+
quality = state.get("quality_result", {})
|
|
166
|
+
if contract.get("status") != "ready":
|
|
167
|
+
issues.append(
|
|
168
|
+
{
|
|
169
|
+
"severity": "error",
|
|
170
|
+
"code": "completed-acceptance-contract-not-ready",
|
|
171
|
+
"path": relative_plan,
|
|
172
|
+
"message": "Completed plan must have a ready Acceptance Contract.",
|
|
173
|
+
}
|
|
174
|
+
)
|
|
175
|
+
if quality.get("status") != "pass":
|
|
176
|
+
issues.append(
|
|
177
|
+
{
|
|
178
|
+
"severity": "error",
|
|
179
|
+
"code": "completed-quality-result-not-passing",
|
|
180
|
+
"path": relative_plan,
|
|
181
|
+
"message": "Completed plan must have a passing Quality Result.",
|
|
182
|
+
}
|
|
183
|
+
)
|
|
184
|
+
if quality.get("criteria_fingerprint") != current_fingerprint:
|
|
185
|
+
issues.append(
|
|
186
|
+
{
|
|
187
|
+
"severity": "error",
|
|
188
|
+
"code": "completed-quality-fingerprint-stale",
|
|
189
|
+
"path": relative_plan,
|
|
190
|
+
"message": "Completed plan Quality Result was not scored against the current Acceptance Contract.",
|
|
191
|
+
}
|
|
192
|
+
)
|
|
193
|
+
issues.extend(continuation_decision_issues(repo, plan_path, plan_path.read_text()))
|
|
194
|
+
|
|
195
|
+
ledger = workstreams_path(repo)
|
|
196
|
+
if ledger.exists():
|
|
197
|
+
for index, line in enumerate(ledger.read_text().splitlines(), start=1):
|
|
198
|
+
stripped = line.strip()
|
|
199
|
+
if not stripped.startswith("|") or stripped.startswith("| ---") or stripped.startswith("| ID |"):
|
|
200
|
+
continue
|
|
201
|
+
cells = [cell.strip() for cell in stripped.strip("|").split("|")]
|
|
202
|
+
if len(cells) != 6:
|
|
203
|
+
continue
|
|
204
|
+
workstream_id, _, current_plan, last_completed_plan, _, _ = cells
|
|
205
|
+
for label, plan_value in [
|
|
206
|
+
("current plan", current_plan),
|
|
207
|
+
("last completed plan", last_completed_plan),
|
|
208
|
+
]:
|
|
209
|
+
if plan_value in {"", "none", "n/a", "-"}:
|
|
210
|
+
continue
|
|
211
|
+
if not (repo / plan_value).exists():
|
|
212
|
+
issues.append(
|
|
213
|
+
{
|
|
214
|
+
"severity": "error",
|
|
215
|
+
"code": "missing-workstream-plan-reference",
|
|
216
|
+
"path": str(ledger.relative_to(repo)),
|
|
217
|
+
"line": index,
|
|
218
|
+
"workstream": workstream_id,
|
|
219
|
+
"message": f"Workstream {workstream_id} references missing {label}: {plan_value}",
|
|
220
|
+
}
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
return {
|
|
224
|
+
"repo": str(repo),
|
|
225
|
+
"status": "pass" if not issues else "fail",
|
|
226
|
+
"issue_count": len(issues),
|
|
227
|
+
"issues": issues,
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def docs_text_for_reference_scan(repo):
|
|
232
|
+
docs_root = repo / "docs"
|
|
233
|
+
chunks = []
|
|
234
|
+
roots = [repo / "AGENTS.md", repo / "ARCHITECTURE.md"]
|
|
235
|
+
if docs_root.exists():
|
|
236
|
+
roots.extend(path for path in docs_root.rglob("*") if path.is_file())
|
|
237
|
+
for path in roots:
|
|
238
|
+
if not path.exists() or not path.is_file():
|
|
239
|
+
continue
|
|
240
|
+
try:
|
|
241
|
+
chunks.append(path.read_text())
|
|
242
|
+
except UnicodeDecodeError:
|
|
243
|
+
continue
|
|
244
|
+
return "\n".join(chunks)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def evidence_prune_candidates(repo, root="docs/generated", older_than_days=14):
|
|
248
|
+
evidence_root = (repo / root).resolve()
|
|
249
|
+
if not evidence_root.exists():
|
|
250
|
+
return []
|
|
251
|
+
try:
|
|
252
|
+
evidence_root.relative_to(repo.resolve())
|
|
253
|
+
except ValueError as error:
|
|
254
|
+
raise ValueError(f"Evidence root must be inside repo: {root}") from error
|
|
255
|
+
|
|
256
|
+
now = time.time()
|
|
257
|
+
max_age_seconds = older_than_days * 24 * 60 * 60
|
|
258
|
+
docs_text = docs_text_for_reference_scan(repo)
|
|
259
|
+
candidates = []
|
|
260
|
+
for path in sorted(evidence_root.rglob("*")):
|
|
261
|
+
if not path.is_file():
|
|
262
|
+
continue
|
|
263
|
+
relative_path = str(path.relative_to(repo))
|
|
264
|
+
try:
|
|
265
|
+
content = path.read_text()
|
|
266
|
+
except UnicodeDecodeError:
|
|
267
|
+
content = ""
|
|
268
|
+
if is_managed_text(content):
|
|
269
|
+
continue
|
|
270
|
+
age_seconds = now - path.stat().st_mtime
|
|
271
|
+
if age_seconds < max_age_seconds:
|
|
272
|
+
continue
|
|
273
|
+
if relative_path in docs_text or path.name in docs_text:
|
|
274
|
+
continue
|
|
275
|
+
candidates.append(
|
|
276
|
+
{
|
|
277
|
+
"path": relative_path,
|
|
278
|
+
"age_days": round(age_seconds / (24 * 60 * 60), 1),
|
|
279
|
+
"reason": (
|
|
280
|
+
f"unreferenced file under {root} older than {older_than_days} days "
|
|
281
|
+
"and not a managed starter"
|
|
282
|
+
),
|
|
283
|
+
}
|
|
284
|
+
)
|
|
285
|
+
return candidates
|
|
286
|
+
|
|
287
|
+
|