prizmkit 1.1.57 → 1.1.60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. package/bin/create-prizmkit.js +8 -6
  2. package/bundled/VERSION.json +3 -3
  3. package/bundled/adapters/codex/agent-adapter.js +38 -0
  4. package/bundled/adapters/codex/paths.js +27 -0
  5. package/bundled/adapters/codex/rules-adapter.js +30 -0
  6. package/bundled/adapters/codex/settings-adapter.js +27 -0
  7. package/bundled/adapters/codex/skill-adapter.js +65 -0
  8. package/bundled/adapters/codex/team-adapter.js +37 -0
  9. package/bundled/dev-pipeline/.env.example +2 -1
  10. package/bundled/dev-pipeline/README.md +10 -7
  11. package/bundled/dev-pipeline/lib/common.sh +278 -37
  12. package/bundled/dev-pipeline/run-bugfix.sh +10 -61
  13. package/bundled/dev-pipeline/run-feature.sh +10 -78
  14. package/bundled/dev-pipeline/run-recovery.sh +10 -46
  15. package/bundled/dev-pipeline/run-refactor.sh +10 -61
  16. package/bundled/dev-pipeline/scripts/generate-bootstrap-prompt.py +17 -7
  17. package/bundled/dev-pipeline/scripts/generate-bugfix-prompt.py +9 -3
  18. package/bundled/dev-pipeline/scripts/generate-refactor-prompt.py +9 -3
  19. package/bundled/dev-pipeline/scripts/utils.py +6 -4
  20. package/bundled/dev-pipeline-windows/.env.example +28 -0
  21. package/bundled/dev-pipeline-windows/README.md +30 -0
  22. package/bundled/dev-pipeline-windows/SCHEMA_ANALYSIS.md +525 -0
  23. package/bundled/dev-pipeline-windows/assets/feature-list-example.json +146 -0
  24. package/bundled/dev-pipeline-windows/assets/prizm-dev-team-integration.md +138 -0
  25. package/bundled/dev-pipeline-windows/launch-bugfix-daemon.ps1 +9 -0
  26. package/bundled/dev-pipeline-windows/launch-feature-daemon.ps1 +9 -0
  27. package/bundled/dev-pipeline-windows/launch-refactor-daemon.ps1 +9 -0
  28. package/bundled/dev-pipeline-windows/lib/common.ps1 +432 -0
  29. package/bundled/dev-pipeline-windows/lib/daemon.ps1 +140 -0
  30. package/bundled/dev-pipeline-windows/lib/pipeline.ps1 +446 -0
  31. package/bundled/dev-pipeline-windows/lib/reset.ps1 +87 -0
  32. package/bundled/dev-pipeline-windows/reset-bug.ps1 +9 -0
  33. package/bundled/dev-pipeline-windows/reset-feature.ps1 +9 -0
  34. package/bundled/dev-pipeline-windows/reset-refactor.ps1 +9 -0
  35. package/bundled/dev-pipeline-windows/run-bugfix.ps1 +9 -0
  36. package/bundled/dev-pipeline-windows/run-feature.ps1 +9 -0
  37. package/bundled/dev-pipeline-windows/run-recovery.ps1 +76 -0
  38. package/bundled/dev-pipeline-windows/run-refactor.ps1 +9 -0
  39. package/bundled/dev-pipeline-windows/scripts/check-session-status.py +228 -0
  40. package/bundled/dev-pipeline-windows/scripts/cleanup-logs.py +192 -0
  41. package/bundled/dev-pipeline-windows/scripts/detect-stuck.py +530 -0
  42. package/bundled/dev-pipeline-windows/scripts/generate-bootstrap-prompt.py +1737 -0
  43. package/bundled/dev-pipeline-windows/scripts/generate-bugfix-prompt.py +685 -0
  44. package/bundled/dev-pipeline-windows/scripts/generate-recovery-prompt.py +805 -0
  45. package/bundled/dev-pipeline-windows/scripts/generate-refactor-prompt.py +763 -0
  46. package/bundled/dev-pipeline-windows/scripts/init-bugfix-pipeline.py +316 -0
  47. package/bundled/dev-pipeline-windows/scripts/init-dev-team.py +134 -0
  48. package/bundled/dev-pipeline-windows/scripts/init-pipeline.py +380 -0
  49. package/bundled/dev-pipeline-windows/scripts/init-refactor-pipeline.py +399 -0
  50. package/bundled/dev-pipeline-windows/scripts/parse-stream-progress.py +388 -0
  51. package/bundled/dev-pipeline-windows/scripts/patch-completion-notes.py +191 -0
  52. package/bundled/dev-pipeline-windows/scripts/update-bug-status.py +864 -0
  53. package/bundled/dev-pipeline-windows/scripts/update-checkpoint.py +173 -0
  54. package/bundled/dev-pipeline-windows/scripts/update-feature-status.py +1501 -0
  55. package/bundled/dev-pipeline-windows/scripts/update-refactor-status.py +1073 -0
  56. package/bundled/dev-pipeline-windows/scripts/utils.py +542 -0
  57. package/bundled/dev-pipeline-windows/templates/agent-prompts/critic-plan-challenge.md +7 -0
  58. package/bundled/dev-pipeline-windows/templates/agent-prompts/dev-fix.md +7 -0
  59. package/bundled/dev-pipeline-windows/templates/agent-prompts/dev-implement.md +30 -0
  60. package/bundled/dev-pipeline-windows/templates/agent-prompts/dev-resume.md +5 -0
  61. package/bundled/dev-pipeline-windows/templates/agent-prompts/reviewer-review.md +7 -0
  62. package/bundled/dev-pipeline-windows/templates/bootstrap-prompt.md +46 -0
  63. package/bundled/dev-pipeline-windows/templates/bootstrap-tier1.md +43 -0
  64. package/bundled/dev-pipeline-windows/templates/bootstrap-tier2.md +43 -0
  65. package/bundled/dev-pipeline-windows/templates/bootstrap-tier3.md +43 -0
  66. package/bundled/dev-pipeline-windows/templates/bug-fix-list-schema.json +263 -0
  67. package/bundled/dev-pipeline-windows/templates/bugfix-bootstrap-prompt.md +320 -0
  68. package/bundled/dev-pipeline-windows/templates/feature-list-schema.json +237 -0
  69. package/bundled/dev-pipeline-windows/templates/refactor-bootstrap-prompt.md +331 -0
  70. package/bundled/dev-pipeline-windows/templates/refactor-list-schema.json +270 -0
  71. package/bundled/dev-pipeline-windows/templates/sections/ac-verification-checklist.md +13 -0
  72. package/bundled/dev-pipeline-windows/templates/sections/checkpoint-system.md +91 -0
  73. package/bundled/dev-pipeline-windows/templates/sections/context-budget-rules.md +33 -0
  74. package/bundled/dev-pipeline-windows/templates/sections/critical-paths-agent.md +10 -0
  75. package/bundled/dev-pipeline-windows/templates/sections/critical-paths-full.md +12 -0
  76. package/bundled/dev-pipeline-windows/templates/sections/critical-paths-lite.md +7 -0
  77. package/bundled/dev-pipeline-windows/templates/sections/directory-convention-agent.md +8 -0
  78. package/bundled/dev-pipeline-windows/templates/sections/directory-convention-full.md +9 -0
  79. package/bundled/dev-pipeline-windows/templates/sections/directory-convention-lite.md +6 -0
  80. package/bundled/dev-pipeline-windows/templates/sections/failure-capture.md +21 -0
  81. package/bundled/dev-pipeline-windows/templates/sections/feature-context.md +31 -0
  82. package/bundled/dev-pipeline-windows/templates/sections/phase-browser-verification-auto.md +72 -0
  83. package/bundled/dev-pipeline-windows/templates/sections/phase-browser-verification-opencli.md +63 -0
  84. package/bundled/dev-pipeline-windows/templates/sections/phase-browser-verification.md +62 -0
  85. package/bundled/dev-pipeline-windows/templates/sections/phase-commit-full.md +71 -0
  86. package/bundled/dev-pipeline-windows/templates/sections/phase-commit.md +64 -0
  87. package/bundled/dev-pipeline-windows/templates/sections/phase-context-snapshot-agent-suffix.md +23 -0
  88. package/bundled/dev-pipeline-windows/templates/sections/phase-context-snapshot-base.md +24 -0
  89. package/bundled/dev-pipeline-windows/templates/sections/phase-context-snapshot-lite-suffix.md +12 -0
  90. package/bundled/dev-pipeline-windows/templates/sections/phase-critic-plan-full.md +53 -0
  91. package/bundled/dev-pipeline-windows/templates/sections/phase-critic-plan.md +32 -0
  92. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-agent.md +37 -0
  93. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-full.md +50 -0
  94. package/bundled/dev-pipeline-windows/templates/sections/phase-implement-lite.md +52 -0
  95. package/bundled/dev-pipeline-windows/templates/sections/phase-plan-agent.md +27 -0
  96. package/bundled/dev-pipeline-windows/templates/sections/phase-plan-lite.md +27 -0
  97. package/bundled/dev-pipeline-windows/templates/sections/phase-review-agent.md +27 -0
  98. package/bundled/dev-pipeline-windows/templates/sections/phase-review-full.md +29 -0
  99. package/bundled/dev-pipeline-windows/templates/sections/phase-specify-plan-full.md +77 -0
  100. package/bundled/dev-pipeline-windows/templates/sections/phase0-init.md +13 -0
  101. package/bundled/dev-pipeline-windows/templates/sections/phase0-test-baseline.md +23 -0
  102. package/bundled/dev-pipeline-windows/templates/sections/session-context.md +5 -0
  103. package/bundled/dev-pipeline-windows/templates/sections/subagent-timeout-recovery.md +6 -0
  104. package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-agent.md +67 -0
  105. package/bundled/dev-pipeline-windows/templates/sections/test-failure-recovery-lite.md +58 -0
  106. package/bundled/dev-pipeline-windows/templates/session-status-schema.json +83 -0
  107. package/bundled/skills/_metadata.json +1 -1
  108. package/bundled/skills/app-planner/SKILL.md +26 -18
  109. package/bundled/skills/app-planner/references/architecture-decisions.md +9 -5
  110. package/bundled/skills/app-planner/references/frontend-design-guide.md +1 -1
  111. package/bundled/skills/feature-planner/SKILL.md +9 -2
  112. package/bundled/skills/prizmkit-init/SKILL.md +7 -6
  113. package/bundled/skills/recovery-workflow/scripts/detect-recovery-state.py +2 -0
  114. package/bundled/skills-windows/app-planner/SKILL.md +639 -0
  115. package/bundled/skills-windows/app-planner/assets/app-design-guide.md +101 -0
  116. package/bundled/skills-windows/app-planner/references/architecture-decisions.md +52 -0
  117. package/bundled/skills-windows/app-planner/references/brainstorm-guide.md +101 -0
  118. package/bundled/skills-windows/app-planner/references/frontend-design-guide.md +71 -0
  119. package/bundled/skills-windows/app-planner/references/project-brief-guide.md +82 -0
  120. package/bundled/skills-windows/app-planner/references/red-team-checklist.md +40 -0
  121. package/bundled/skills-windows/app-planner/references/rules/backend/derivation-rules.md +609 -0
  122. package/bundled/skills-windows/app-planner/references/rules/backend/fixed-rules.md +285 -0
  123. package/bundled/skills-windows/app-planner/references/rules/backend/question-bank.md +249 -0
  124. package/bundled/skills-windows/app-planner/references/rules/backend/template.md +173 -0
  125. package/bundled/skills-windows/app-planner/references/rules/database/derivation-rules.md +373 -0
  126. package/bundled/skills-windows/app-planner/references/rules/database/fixed-rules.md +211 -0
  127. package/bundled/skills-windows/app-planner/references/rules/database/question-bank.md +184 -0
  128. package/bundled/skills-windows/app-planner/references/rules/database/template.md +158 -0
  129. package/bundled/skills-windows/app-planner/references/rules/frontend/derivation-rules.md +810 -0
  130. package/bundled/skills-windows/app-planner/references/rules/frontend/fixed-rules.md +188 -0
  131. package/bundled/skills-windows/app-planner/references/rules/frontend/question-bank.md +302 -0
  132. package/bundled/skills-windows/app-planner/references/rules/frontend/template.md +320 -0
  133. package/bundled/skills-windows/app-planner/references/rules/mobile/derivation-rules.md +639 -0
  134. package/bundled/skills-windows/app-planner/references/rules/mobile/fixed-rules.md +290 -0
  135. package/bundled/skills-windows/app-planner/references/rules/mobile/question-bank.md +232 -0
  136. package/bundled/skills-windows/app-planner/references/rules/mobile/template.md +175 -0
  137. package/bundled/skills-windows/bug-fix-workflow/SKILL.md +415 -0
  138. package/bundled/skills-windows/bug-planner/SKILL.md +395 -0
  139. package/bundled/skills-windows/bug-planner/assets/bug-confirmation-template.md +43 -0
  140. package/bundled/skills-windows/bug-planner/references/critic-and-verification.md +44 -0
  141. package/bundled/skills-windows/bug-planner/references/error-recovery.md +73 -0
  142. package/bundled/skills-windows/bug-planner/references/input-formats.md +53 -0
  143. package/bundled/skills-windows/bug-planner/references/schema-validation.md +25 -0
  144. package/bundled/skills-windows/bug-planner/references/severity-rules.md +16 -0
  145. package/bundled/skills-windows/bug-planner/scripts/validate-bug-list.py +322 -0
  146. package/bundled/skills-windows/bugfix-pipeline-launcher/SKILL.md +380 -0
  147. package/bundled/skills-windows/feature-pipeline-launcher/SKILL.md +441 -0
  148. package/bundled/skills-windows/feature-pipeline-launcher/scripts/preflight-check.py +462 -0
  149. package/bundled/skills-windows/feature-planner/SKILL.md +401 -0
  150. package/bundled/skills-windows/feature-planner/assets/evaluation-guide.md +64 -0
  151. package/bundled/skills-windows/feature-planner/assets/planning-guide.md +214 -0
  152. package/bundled/skills-windows/feature-planner/references/browser-interaction.md +59 -0
  153. package/bundled/skills-windows/feature-planner/references/completeness-review.md +57 -0
  154. package/bundled/skills-windows/feature-planner/references/decomposition-patterns.md +75 -0
  155. package/bundled/skills-windows/feature-planner/references/error-recovery.md +90 -0
  156. package/bundled/skills-windows/feature-planner/references/incremental-feature-planning.md +112 -0
  157. package/bundled/skills-windows/feature-planner/references/new-project-planning.md +85 -0
  158. package/bundled/skills-windows/feature-planner/scripts/validate-and-generate.py +1029 -0
  159. package/bundled/skills-windows/feature-workflow/SKILL.md +531 -0
  160. package/bundled/skills-windows/prizmkit-init/SKILL.md +356 -0
  161. package/bundled/skills-windows/prizmkit-init/assets/project-brief-template.md +82 -0
  162. package/bundled/skills-windows/prizmkit-init/references/config-schema.md +68 -0
  163. package/bundled/skills-windows/prizmkit-init/references/rules/layer-detection.md +41 -0
  164. package/bundled/skills-windows/prizmkit-init/references/tech-stack-catalog.md +13 -0
  165. package/bundled/skills-windows/prizmkit-init/references/update-supplement.md +9 -0
  166. package/bundled/skills-windows/recovery-workflow/SKILL.md +456 -0
  167. package/bundled/skills-windows/recovery-workflow/evals/evals.json +46 -0
  168. package/bundled/skills-windows/recovery-workflow/scripts/detect-recovery-state.py +544 -0
  169. package/bundled/skills-windows/refactor-pipeline-launcher/SKILL.md +406 -0
  170. package/bundled/skills-windows/refactor-planner/SKILL.md +540 -0
  171. package/bundled/skills-windows/refactor-planner/assets/planning-guide.md +292 -0
  172. package/bundled/skills-windows/refactor-planner/references/behavior-preservation.md +301 -0
  173. package/bundled/skills-windows/refactor-planner/references/refactor-scoping-guide.md +221 -0
  174. package/bundled/skills-windows/refactor-planner/scripts/validate-and-generate-refactor.py +858 -0
  175. package/bundled/skills-windows/refactor-workflow/SKILL.md +503 -0
  176. package/package.json +3 -2
  177. package/src/clean.js +73 -2
  178. package/src/config.js +159 -50
  179. package/src/detect-platform.js +16 -8
  180. package/src/external-skills.js +26 -19
  181. package/src/index.js +31 -9
  182. package/src/manifest.js +6 -2
  183. package/src/metadata.js +43 -5
  184. package/src/platforms.js +36 -0
  185. package/src/prompts.js +31 -6
  186. package/src/runtimes.js +20 -0
  187. package/src/scaffold.js +314 -110
  188. package/src/upgrade.js +81 -41
@@ -0,0 +1,1029 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ validate-and-generate.py - Validate and generate feature-list.json files
4
+ for the dev-pipeline system.
5
+
6
+ Commands:
7
+ validate Validate an existing .prizmkit/plans/feature-list.json
8
+ template Generate a blank template .prizmkit/plans/feature-list.json
9
+ generate Validate a draft JSON and generate final feature-list.json with defaults
10
+ summary Print a summary table of features from a .prizmkit/plans/feature-list.json
11
+ grade Generate grading results from eval runs (for npm run skill:review)
12
+
13
+ Usage:
14
+ python3 validate-and-generate.py validate --input .prizmkit/plans/feature-list.json [--output validated.json] [--mode new|incremental]
15
+ python3 validate-and-generate.py template --output .prizmkit/plans/feature-list.json
16
+ python3 validate-and-generate.py generate --input draft.json --output .prizmkit/plans/feature-list.json [--mode new|incremental]
17
+ python3 validate-and-generate.py summary --input .prizmkit/plans/feature-list.json [--format markdown|json]
18
+ python3 validate-and-generate.py grade --workspace /.codebuddy/skill-evals/feature-planner-workspace --iteration iteration-1
19
+
20
+ Python 3.6+ required. No external dependencies.
21
+ """
22
+
23
+ import argparse
24
+ import collections
25
+ import json
26
+ import os
27
+ import re
28
+ import sys
29
+ from datetime import datetime, timezone
30
+
31
+ # ---------------------------------------------------------------------------
32
+ # Constants
33
+ # ---------------------------------------------------------------------------
34
+
35
+ SCHEMA_VERSION = "dev-pipeline-feature-list-v1"
36
+
37
+ VALID_STATUSES = {"pending", "in_progress", "completed", "failed", "skipped", "split", "auto_skipped"}
38
+ VALID_COMPLEXITIES = {"low", "medium", "high", "critical"}
39
+ VALID_PRIORITIES = {"critical", "high", "medium", "low"}
40
+ VALID_GRANULARITIES = {"feature", "sub_feature", "auto"}
41
+ VALID_PLANNING_MODES = {"new", "incremental"}
42
+
43
+ FEATURE_ID_RE = re.compile(r"^F-\d{3}(-[A-Z])?$")
44
+ SUB_FEATURE_ID_RE = re.compile(r"^F-\d{3}-[A-Z]$")
45
+
46
+ # Keywords in acceptance criteria that indicate UI/browser interaction behavior.
47
+ UI_KEYWORDS_RE = re.compile(
48
+ r"\b(click|button|modal|page|form|display|navigate|tab|input|opens|shows|"
49
+ r"renders|visible|redirect|download|upload|preview|select|toggle|dropdown|"
50
+ r"popup|toast|menu)\b",
51
+ re.IGNORECASE,
52
+ )
53
+
54
+ # ---------------------------------------------------------------------------
55
+ # Helpers
56
+ # ---------------------------------------------------------------------------
57
+
58
+
59
+ def _err(msg):
60
+ """Print an error message to stderr."""
61
+ print("ERROR: {}".format(msg), file=sys.stderr)
62
+
63
+
64
+ def _warn(msg):
65
+ """Print a warning message to stderr."""
66
+ print("WARNING: {}".format(msg), file=sys.stderr)
67
+
68
+
69
+ def _info(msg):
70
+ """Print an informational message to stderr."""
71
+ print("INFO: {}".format(msg), file=sys.stderr)
72
+
73
+
74
+ def _load_json(path):
75
+ """Load and return parsed JSON from *path*.
76
+
77
+ Returns (data, error_message). On success error_message is None.
78
+ """
79
+ if not os.path.isfile(path):
80
+ return None, "File not found: {}".format(path)
81
+ try:
82
+ with open(path, "r", encoding="utf-8") as fh:
83
+ data = json.load(fh)
84
+ return data, None
85
+ except json.JSONDecodeError as exc:
86
+ return None, "JSON parse error in {}: {}".format(path, exc)
87
+ except Exception as exc:
88
+ return None, "Failed to read {}: {}".format(path, exc)
89
+
90
+
91
+ def _write_json(path, data):
92
+ """Write *data* as pretty-printed JSON to *path*."""
93
+ parent = os.path.dirname(path)
94
+ if parent and not os.path.isdir(parent):
95
+ os.makedirs(parent, exist_ok=True)
96
+ with open(path, "w", encoding="utf-8") as fh:
97
+ json.dump(data, fh, indent=2, ensure_ascii=False)
98
+ fh.write("\n")
99
+
100
+
101
+ # ---------------------------------------------------------------------------
102
+ # Cycle detection (Kahn's algorithm)
103
+ # ---------------------------------------------------------------------------
104
+
105
+
106
+
107
+ def _detect_cycles(features):
108
+ """Return (has_cycles: bool, max_depth: int) using Kahn's topological sort.
109
+
110
+ *features* is the list of feature dicts. We build a graph from the
111
+ ``dependencies`` field and run Kahn's algorithm.
112
+
113
+ Returns a tuple ``(has_cycles, max_depth)`` where *max_depth* is the
114
+ longest path in the DAG (0 if there are cycles or a single node).
115
+ """
116
+ id_set = {f["id"] for f in features}
117
+ # Build adjacency list and in-degree map.
118
+ adj = {fid: [] for fid in id_set} # dependency -> [dependent]
119
+ in_degree = {fid: 0 for fid in id_set}
120
+
121
+ for feat in features:
122
+ fid = feat["id"]
123
+ for dep in feat.get("dependencies", []):
124
+ if dep in id_set:
125
+ adj[dep].append(fid)
126
+ in_degree[fid] += 1
127
+
128
+ # Kahn's algorithm
129
+ queue = collections.deque()
130
+ for fid, deg in in_degree.items():
131
+ if deg == 0:
132
+ queue.append(fid)
133
+
134
+ sorted_order = []
135
+ # Track depth for each node to compute max dependency depth.
136
+ depth = {fid: 0 for fid in id_set}
137
+
138
+ while queue:
139
+ node = queue.popleft()
140
+ sorted_order.append(node)
141
+ for neighbour in adj[node]:
142
+ in_degree[neighbour] -= 1
143
+ new_depth = depth[node] + 1
144
+ if new_depth > depth[neighbour]:
145
+ depth[neighbour] = new_depth
146
+ if in_degree[neighbour] == 0:
147
+ queue.append(neighbour)
148
+
149
+ has_cycles = len(sorted_order) != len(id_set)
150
+ max_depth = max(depth.values()) if depth else 0
151
+ return has_cycles, max_depth
152
+
153
+
154
+ # ---------------------------------------------------------------------------
155
+ # Validation
156
+ # ---------------------------------------------------------------------------
157
+
158
+
159
+ def validate_feature_list(data, planning_mode="new"):
160
+ """Validate a parsed feature-list data structure.
161
+
162
+ Returns a dict with keys ``valid``, ``errors``, ``warnings``, ``stats``.
163
+ """
164
+ if planning_mode not in VALID_PLANNING_MODES:
165
+ planning_mode = "new"
166
+
167
+ errors = []
168
+ warnings = []
169
+
170
+ # ------------------------------------------------------------------
171
+ # 1. Top-level schema validation
172
+ # ------------------------------------------------------------------
173
+ schema = data.get("$schema")
174
+ if schema != SCHEMA_VERSION:
175
+ errors.append(
176
+ "$schema must be '{}', got '{}'".format(SCHEMA_VERSION, schema)
177
+ )
178
+
179
+ # Support both project_name (canonical) and app_name (legacy)
180
+ app_name = data.get("project_name", data.get("app_name"))
181
+ if not isinstance(app_name, str) or not app_name.strip():
182
+ errors.append("project_name must be a non-empty string")
183
+
184
+ features = data.get("features")
185
+ if not isinstance(features, list) or len(features) == 0:
186
+ errors.append("features must be a non-empty array")
187
+ # Early-out: nothing else to validate if features are missing.
188
+ return {
189
+ "valid": False,
190
+ "errors": errors,
191
+ "warnings": warnings,
192
+ "stats": {
193
+ "total_features": 0,
194
+ "total_sub_features": 0,
195
+ "complexity_distribution": {},
196
+ "max_dependency_depth": 0,
197
+ "has_cycles": False,
198
+ },
199
+ }
200
+
201
+ # ------------------------------------------------------------------
202
+ # 2. Per-feature validation
203
+ # ------------------------------------------------------------------
204
+ required_keys = {
205
+ "id", "title", "description", "priority",
206
+ "dependencies", "acceptance_criteria", "status",
207
+ }
208
+
209
+ seen_ids = set()
210
+ priorities = []
211
+ complexity_dist = {"low": 0, "medium": 0, "high": 0, "critical": 0}
212
+ total_sub_features = 0
213
+
214
+ for idx, feat in enumerate(features):
215
+ label = "features[{}]".format(idx)
216
+
217
+ # -- Required keys --
218
+ if not isinstance(feat, dict):
219
+ errors.append("{} is not an object".format(label))
220
+ continue
221
+
222
+ missing = required_keys - set(feat.keys())
223
+ if missing:
224
+ errors.append("{} missing required keys: {}".format(
225
+ label, ", ".join(sorted(missing))
226
+ ))
227
+
228
+ # -- ID format & uniqueness --
229
+ fid = feat.get("id", "")
230
+ if not FEATURE_ID_RE.match(str(fid)):
231
+ errors.append(
232
+ "{}: id '{}' does not match pattern F-NNN or F-NNN-X".format(label, fid)
233
+ )
234
+ if fid in seen_ids:
235
+ errors.append("{}: duplicate id '{}'".format(label, fid))
236
+ seen_ids.add(fid)
237
+
238
+ # -- Title / description --
239
+ for key in ("title", "description"):
240
+ val = feat.get(key)
241
+ if not isinstance(val, str) or not val.strip():
242
+ errors.append("{}: {} must be a non-empty string".format(label, key))
243
+
244
+ # -- Description depth check --
245
+ desc = feat.get("description", "")
246
+ if isinstance(desc, str) and desc.strip():
247
+ word_count = len(desc.split())
248
+ complexity = feat.get("estimated_complexity", "medium")
249
+ min_words = {
250
+ "low": 30, "medium": 50, "high": 80, "critical": 100,
251
+ }.get(complexity, 50)
252
+ if word_count < 15:
253
+ errors.append(
254
+ "{}: description too short ({} words, minimum 15). "
255
+ "Include: what to build, key behaviors, integration points, "
256
+ "and data model overview.".format(label, word_count)
257
+ )
258
+ elif word_count < min_words:
259
+ warnings.append(
260
+ "{}: description only {} words (recommend {}+ for {} complexity). "
261
+ "Richer descriptions produce better pipeline results.".format(
262
+ label, word_count, min_words, complexity
263
+ )
264
+ )
265
+
266
+ # -- Priority --
267
+ priority = feat.get("priority")
268
+ if isinstance(priority, str) and priority in VALID_PRIORITIES:
269
+ priorities.append(priority)
270
+ else:
271
+ errors.append("{}: priority must be one of 'high', 'medium', 'low', got {}".format(
272
+ label, repr(priority)
273
+ ))
274
+
275
+ # -- Dependencies (list of strings) --
276
+ deps = feat.get("dependencies")
277
+ if not isinstance(deps, list):
278
+ errors.append("{}: dependencies must be an array".format(label))
279
+
280
+ # -- Acceptance criteria --
281
+ criteria = feat.get("acceptance_criteria")
282
+ if isinstance(criteria, list):
283
+ if len(criteria) < 1:
284
+ errors.append("{}: must have at least 1 acceptance criterion".format(label))
285
+ elif len(criteria) < 3:
286
+ warnings.append(
287
+ "{}: only {} acceptance criteria (recommend at least 3)".format(
288
+ label, len(criteria)
289
+ )
290
+ )
291
+ else:
292
+ errors.append("{}: acceptance_criteria must be an array".format(label))
293
+
294
+ # -- Status --
295
+ status = feat.get("status")
296
+ if status not in VALID_STATUSES:
297
+ errors.append(
298
+ "{}: status '{}' invalid, must be one of: {}".format(
299
+ label, status, ", ".join(sorted(VALID_STATUSES))
300
+ )
301
+ )
302
+ if planning_mode == "new" and status and status != "pending":
303
+ warnings.append(
304
+ "{}: status is '{}' (expected 'pending' for new plans)".format(label, status)
305
+ )
306
+
307
+ # -- Complexity (optional but validated if present) --
308
+ complexity = feat.get("estimated_complexity")
309
+ if complexity is not None:
310
+ if complexity not in VALID_COMPLEXITIES:
311
+ errors.append(
312
+ "{}: estimated_complexity '{}' invalid, must be one of: {}".format(
313
+ label, complexity, ", ".join(sorted(VALID_COMPLEXITIES))
314
+ )
315
+ )
316
+ else:
317
+ complexity_dist[complexity] += 1
318
+
319
+ # -- Granularity (optional but validated if present) --
320
+ granularity = feat.get("session_granularity")
321
+ if granularity is not None:
322
+ if granularity not in VALID_GRANULARITIES:
323
+ errors.append(
324
+ "{}: session_granularity '{}' invalid, must be one of: {}".format(
325
+ label, granularity, ", ".join(sorted(VALID_GRANULARITIES))
326
+ )
327
+ )
328
+ if granularity == "auto":
329
+ subs = feat.get("sub_features")
330
+ if not isinstance(subs, list) or len(subs) == 0:
331
+ warnings.append(
332
+ "{}: granularity is 'auto' but no sub_features defined".format(label)
333
+ )
334
+
335
+ # -- Sub-features --
336
+ subs = feat.get("sub_features")
337
+
338
+ # -- Critic fields (optional but validated if present) --
339
+ critic = feat.get("critic")
340
+ if critic is not None and not isinstance(critic, bool):
341
+ errors.append(
342
+ "{}: 'critic' must be a boolean, got {}".format(label, type(critic).__name__)
343
+ )
344
+ critic_count = feat.get("critic_count")
345
+ if critic_count is not None and critic_count not in (1, 3):
346
+ errors.append(
347
+ "{}: 'critic_count' must be 1 or 3, got {}".format(label, critic_count)
348
+ )
349
+
350
+ # -- Browser interaction check (warning for frontend features) --
351
+ has_frontend = bool(
352
+ data.get("global_context", {}).get("frontend_framework")
353
+ )
354
+ if has_frontend and feat.get("status") != "completed":
355
+ criteria = feat.get("acceptance_criteria", [])
356
+ criteria_text = " ".join(
357
+ c for c in criteria if isinstance(c, str)
358
+ )
359
+ if UI_KEYWORDS_RE.search(criteria_text):
360
+ if feat.get("browser_interaction") is None:
361
+ warnings.append(
362
+ "{}: has UI acceptance criteria but no browser_interaction "
363
+ "field. Consider adding browser verification.".format(label)
364
+ )
365
+
366
+ if isinstance(subs, list):
367
+ for sidx, sub in enumerate(subs):
368
+ sub_label = "{}->sub_features[{}]".format(label, sidx)
369
+ if not isinstance(sub, dict):
370
+ errors.append("{} is not an object".format(sub_label))
371
+ continue
372
+
373
+ sub_missing = {"id", "title", "description"} - set(sub.keys())
374
+ if sub_missing:
375
+ errors.append("{} missing required keys: {}".format(
376
+ sub_label, ", ".join(sorted(sub_missing))
377
+ ))
378
+
379
+ sub_id = sub.get("id", "")
380
+ if not SUB_FEATURE_ID_RE.match(str(sub_id)):
381
+ errors.append(
382
+ "{}: id '{}' must be F-NNN-X format".format(sub_label, sub_id)
383
+ )
384
+
385
+ # Sub-feature ID should share parent prefix
386
+ parent_prefix = str(fid).rstrip("ABCDEFGHIJKLMNOPQRSTUVWXYZ").rstrip("-")
387
+ sub_prefix = str(sub_id)[:5] # e.g. "F-001"
388
+ if parent_prefix and sub_prefix != parent_prefix:
389
+ warnings.append(
390
+ "{}: sub-feature '{}' does not share parent prefix '{}'".format(
391
+ sub_label, sub_id, parent_prefix
392
+ )
393
+ )
394
+
395
+ if sub_id in seen_ids:
396
+ errors.append("{}: duplicate id '{}'".format(sub_label, sub_id))
397
+ seen_ids.add(sub_id)
398
+ total_sub_features += 1
399
+
400
+ # ------------------------------------------------------------------
401
+ # 3. Dependency validation
402
+ # ------------------------------------------------------------------
403
+ all_ids = {f.get("id") for f in features}
404
+ for idx, feat in enumerate(features):
405
+ label = "features[{}]".format(idx)
406
+ deps = feat.get("dependencies", [])
407
+ if isinstance(deps, list):
408
+ for dep in deps:
409
+ if dep not in all_ids:
410
+ errors.append(
411
+ "{}: dependency '{}' does not exist in feature list".format(label, dep)
412
+ )
413
+
414
+ # -- Cycle detection --
415
+ has_cycles, max_depth = _detect_cycles(features)
416
+ if has_cycles:
417
+ errors.append("Dependency graph contains cycles (not a valid DAG)")
418
+
419
+ # ------------------------------------------------------------------
420
+ # 4. Build result
421
+ # ------------------------------------------------------------------
422
+ is_valid = len(errors) == 0
423
+
424
+ return {
425
+ "valid": is_valid,
426
+ "errors": errors,
427
+ "warnings": warnings,
428
+ "stats": {
429
+ "total_features": len(features),
430
+ "total_sub_features": total_sub_features,
431
+ "complexity_distribution": complexity_dist,
432
+ "max_dependency_depth": max_depth,
433
+ "has_cycles": has_cycles,
434
+ },
435
+ }
436
+
437
+
438
+ # ---------------------------------------------------------------------------
439
+ # Template generation
440
+ # ---------------------------------------------------------------------------
441
+
442
+
443
+ def generate_template():
444
+ """Return a template feature-list dict with placeholder values."""
445
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
446
+
447
+ return {
448
+ "$schema": SCHEMA_VERSION,
449
+ "project_name": "YOUR_PROJECT_NAME",
450
+ "project_description": "YOUR_PROJECT_DESCRIPTION",
451
+ "created_at": now,
452
+ "created_by": "feature-planner",
453
+ "features": [
454
+ {
455
+ "id": "F-001",
456
+ "title": "Project Infrastructure Setup",
457
+ "description": (
458
+ "Initialize project structure, configure build tools, "
459
+ "set up development environment."
460
+ ),
461
+ "priority": "high",
462
+ "estimated_complexity": "medium",
463
+ "dependencies": [],
464
+ "acceptance_criteria": [
465
+ "Project builds successfully",
466
+ "Development server starts",
467
+ "Linting and formatting configured",
468
+ ],
469
+ "status": "pending",
470
+ "session_granularity": "feature",
471
+ "sub_features": [],
472
+ }
473
+ ],
474
+ "global_context": {
475
+ "tech_stack": "",
476
+ "design_system": "",
477
+ "testing_strategy": "",
478
+ },
479
+ }
480
+
481
+
482
+ # ---------------------------------------------------------------------------
483
+ # Summary
484
+ # ---------------------------------------------------------------------------
485
+
486
+
487
+ def _build_dependency_graph_text(features):
488
+ """Build a human-readable text representation of the dependency graph.
489
+
490
+ Produces an arrow-chain format that shows all dependency paths,
491
+ including convergent edges (where multiple paths lead to the same node).
492
+
493
+ Returns a list of lines.
494
+ """
495
+ all_ids = [f["id"] for f in features]
496
+
497
+ # Build adjacency: dependency -> list of dependents (forward edges)
498
+ dependents = {fid: [] for fid in all_ids}
499
+ has_parent = set()
500
+ for feat in features:
501
+ for dep in feat.get("dependencies", []):
502
+ if dep in dependents:
503
+ dependents[dep].append(feat["id"])
504
+ has_parent.add(feat["id"])
505
+
506
+ # Sort children for deterministic output
507
+ for fid in dependents:
508
+ dependents[fid] = sorted(set(dependents[fid]))
509
+
510
+ # Roots: features with no incoming dependencies
511
+ roots = [fid for fid in all_ids if fid not in has_parent]
512
+ if not roots:
513
+ return ["(cycle detected - no root nodes)"]
514
+ if not any(dependents[r] for r in all_ids):
515
+ # No dependencies at all
516
+ return ["(no dependencies)"]
517
+
518
+ result_lines = []
519
+
520
+ def _render(node, prefix, is_continuation):
521
+ """Render a node and its dependents recursively.
522
+
523
+ *prefix*: whitespace to print before " -> node" on branch lines.
524
+ *is_continuation*: True if this node is appended on the same line
525
+ as its parent (first child).
526
+ """
527
+ children = dependents.get(node, [])
528
+ if not children:
529
+ return
530
+
531
+ for i, child in enumerate(children):
532
+ if i == 0:
533
+ # First child: continue on the same line
534
+ result_lines[-1] += " -> {}".format(child)
535
+ _render(child, prefix + " " * (len(node) + 4), True)
536
+ else:
537
+ # Subsequent children: new line, indented under the arrow
538
+ line = "{}-> {}".format(prefix, child)
539
+ result_lines.append(line)
540
+ child_prefix = prefix + " " * (len(child) + 4)
541
+ _render(child, child_prefix, True)
542
+
543
+ for root in sorted(roots):
544
+ result_lines.append(root)
545
+ _render(root, " " * len(root), False)
546
+
547
+ return result_lines
548
+
549
+
550
+ def generate_summary_markdown(data):
551
+ """Generate a markdown summary of the feature list."""
552
+ app_name = data.get("project_name", data.get("app_name", "Unknown"))
553
+ features = data.get("features", [])
554
+
555
+ lines = []
556
+ lines.append("# Feature Summary: {}".format(app_name))
557
+ lines.append("")
558
+
559
+ # Table header
560
+ lines.append("| ID | Title | Complexity | Priority | Dependencies | Criteria | Granularity |")
561
+ lines.append("|----|-------|------------|----------|--------------|----------|-------------|")
562
+
563
+ for feat in features:
564
+ fid = feat.get("id", "?")
565
+ title = feat.get("title", "?")
566
+ complexity = feat.get("estimated_complexity", "-")
567
+ priority = feat.get("priority", "?")
568
+ deps = feat.get("dependencies", [])
569
+ deps_str = ", ".join(deps) if deps else "-"
570
+ criteria_count = len(feat.get("acceptance_criteria", []))
571
+ granularity = feat.get("session_granularity", "-")
572
+
573
+ lines.append("| {} | {} | {} | {} | {} | {} | {} |".format(
574
+ fid, title, complexity, priority, deps_str, criteria_count, granularity
575
+ ))
576
+
577
+ lines.append("")
578
+
579
+ # Dependency graph
580
+ lines.append("## Dependency Graph")
581
+ graph_lines = _build_dependency_graph_text(features)
582
+ for gl in graph_lines:
583
+ lines.append(gl)
584
+ lines.append("")
585
+
586
+ # Statistics
587
+ complexity_dist = {"low": 0, "medium": 0, "high": 0, "critical": 0}
588
+ total_sub = 0
589
+ for feat in features:
590
+ c = feat.get("estimated_complexity")
591
+ if c in complexity_dist:
592
+ complexity_dist[c] += 1
593
+ subs = feat.get("sub_features")
594
+ if isinstance(subs, list):
595
+ total_sub += len(subs)
596
+
597
+ _, max_depth = _detect_cycles(features)
598
+
599
+ lines.append("## Statistics")
600
+ lines.append("- Total features: {}".format(len(features)))
601
+ if total_sub > 0:
602
+ lines.append("- Total sub-features: {}".format(total_sub))
603
+ lines.append("- Complexity: {} low, {} medium, {} high, {} critical".format(
604
+ complexity_dist["low"], complexity_dist["medium"],
605
+ complexity_dist["high"], complexity_dist["critical"]
606
+ ))
607
+ lines.append("- Max dependency depth: {}".format(max_depth))
608
+
609
+ return "\n".join(lines)
610
+
611
+
612
+ def generate_summary_json(data):
613
+ """Generate a JSON summary of the feature list."""
614
+ features = data.get("features", [])
615
+
616
+ complexity_dist = {"low": 0, "medium": 0, "high": 0, "critical": 0}
617
+ total_sub = 0
618
+ for feat in features:
619
+ c = feat.get("estimated_complexity")
620
+ if c in complexity_dist:
621
+ complexity_dist[c] += 1
622
+ subs = feat.get("sub_features")
623
+ if isinstance(subs, list):
624
+ total_sub += len(subs)
625
+
626
+ has_cycles, max_depth = _detect_cycles(features)
627
+
628
+ feature_summaries = []
629
+ for feat in features:
630
+ feature_summaries.append({
631
+ "id": feat.get("id"),
632
+ "title": feat.get("title"),
633
+ "priority": feat.get("priority"),
634
+ "estimated_complexity": feat.get("estimated_complexity"),
635
+ "dependencies": feat.get("dependencies", []),
636
+ "acceptance_criteria_count": len(feat.get("acceptance_criteria", [])),
637
+ "session_granularity": feat.get("session_granularity"),
638
+ "status": feat.get("status"),
639
+ })
640
+
641
+ return {
642
+ "project_name": data.get("project_name", data.get("app_name", "")),
643
+ "features": feature_summaries,
644
+ "stats": {
645
+ "total_features": len(features),
646
+ "total_sub_features": total_sub,
647
+ "complexity_distribution": complexity_dist,
648
+ "max_dependency_depth": max_depth,
649
+ "has_cycles": has_cycles,
650
+ },
651
+ }
652
+
653
+
654
+ # ---------------------------------------------------------------------------
655
+ # CLI
656
+ # ---------------------------------------------------------------------------
657
+
658
+
659
+ def cmd_validate(args):
660
+ """Handle the 'validate' command."""
661
+ if not args.input:
662
+ _err("--input is required for the validate command")
663
+ return 2
664
+
665
+ data, load_err = _load_json(args.input)
666
+ if load_err:
667
+ _err(load_err)
668
+ result = {
669
+ "valid": False,
670
+ "errors": [load_err],
671
+ "warnings": [],
672
+ "stats": {
673
+ "total_features": 0,
674
+ "total_sub_features": 0,
675
+ "complexity_distribution": {},
676
+ "max_dependency_depth": 0,
677
+ "has_cycles": False,
678
+ },
679
+ }
680
+ print(json.dumps(result, indent=2, ensure_ascii=False))
681
+ return 2
682
+
683
+ result = validate_feature_list(data, planning_mode=args.mode)
684
+
685
+ # Print results to stdout
686
+ print(json.dumps(result, indent=2, ensure_ascii=False))
687
+
688
+ # Log to stderr for humans
689
+ if result["valid"]:
690
+ _info("Validation passed with {} warning(s)".format(len(result["warnings"])))
691
+ else:
692
+ _err("Validation failed with {} error(s) and {} warning(s)".format(
693
+ len(result["errors"]), len(result["warnings"])
694
+ ))
695
+
696
+ for e in result["errors"]:
697
+ _err(" " + e)
698
+ for w in result["warnings"]:
699
+ _warn(" " + w)
700
+
701
+ # Optionally write validated/cleaned output
702
+ if args.output and result["valid"]:
703
+ _write_json(args.output, data)
704
+ _info("Validated output written to {}".format(args.output))
705
+
706
+ return 0 if result["valid"] else 1
707
+
708
+
709
+ def cmd_template(args):
710
+ """Handle the 'template' command."""
711
+ if not args.output:
712
+ _err("--output is required for the template command")
713
+ return 2
714
+
715
+ template = generate_template()
716
+ _write_json(args.output, template)
717
+ _info("Template written to {}".format(args.output))
718
+ return 0
719
+
720
+
721
+ def cmd_generate(args):
722
+ """Handle the 'generate' command.
723
+
724
+ Loads a draft JSON (produced by AI), fills in defaults, validates,
725
+ and writes the final feature-list.json.
726
+ """
727
+ if not args.input:
728
+ _err("--input is required for the generate command")
729
+ return 2
730
+ if not args.output:
731
+ _err("--output is required for the generate command")
732
+ return 2
733
+
734
+ # Load draft (supports stdin via '-')
735
+ if args.input == "-":
736
+ try:
737
+ data = json.load(sys.stdin)
738
+ except json.JSONDecodeError as exc:
739
+ _err("Invalid JSON from stdin: {}".format(exc))
740
+ return 2
741
+ else:
742
+ data, load_err = _load_json(args.input)
743
+ if load_err:
744
+ _err(load_err)
745
+ return 2
746
+
747
+ # Fill in defaults
748
+ now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
749
+ data.setdefault("$schema", SCHEMA_VERSION)
750
+ data.setdefault("created_at", now)
751
+ data.setdefault("created_by", "feature-planner")
752
+
753
+ # Ensure project_name from app_name if needed
754
+ if "project_name" not in data and "app_name" in data:
755
+ data["project_name"] = data["app_name"]
756
+
757
+ # Set default status for features without one
758
+ for feature in data.get("features", []):
759
+ feature.setdefault("status", "pending")
760
+
761
+ # Validate
762
+ mode = getattr(args, "mode", "new") or "new"
763
+ result = validate_feature_list(data, planning_mode=mode)
764
+
765
+ # Output validation result
766
+ print(json.dumps(result, indent=2, ensure_ascii=False))
767
+
768
+ if result["valid"]:
769
+ _write_json(args.output, data)
770
+ _info("Generated feature-list written to {}".format(args.output))
771
+ return 0
772
+ else:
773
+ _err("Validation failed with {} error(s)".format(len(result["errors"])))
774
+ for e in result["errors"]:
775
+ _err(" " + e)
776
+ for w in result.get("warnings", []):
777
+ _warn(" " + w)
778
+ return 1
779
+
780
+
781
+ def cmd_summary(args):
782
+ """Handle the 'summary' command."""
783
+ if not args.input:
784
+ _err("--input is required for the summary command")
785
+ return 2
786
+
787
+ data, load_err = _load_json(args.input)
788
+ if load_err:
789
+ _err(load_err)
790
+ return 2
791
+
792
+ output_format = getattr(args, "format", "markdown") or "markdown"
793
+
794
+ if output_format == "json":
795
+ summary = generate_summary_json(data)
796
+ print(json.dumps(summary, indent=2, ensure_ascii=False))
797
+ else:
798
+ summary = generate_summary_markdown(data)
799
+ print(summary)
800
+
801
+ return 0
802
+
803
+
804
+ def cmd_grade(args):
805
+ """Handle the 'grade' command for evaluation framework integration.
806
+
807
+ Collects validation results from eval runs and generates grading data.
808
+ Used by npm run skill:review for automated evaluation of feature-planner.
809
+ """
810
+ workspace = getattr(args, 'workspace', None)
811
+ iteration = getattr(args, 'iteration', None)
812
+
813
+ if not workspace or not iteration:
814
+ _err("--workspace and --iteration are required for the grade command")
815
+ return 2
816
+
817
+ workspace_path = os.path.expanduser(workspace)
818
+
819
+ if not os.path.isdir(workspace_path):
820
+ _err("Workspace directory not found: {}".format(workspace_path))
821
+ return 2
822
+
823
+ # Collect run outputs from iteration subdirectory
824
+ iteration_dir = os.path.join(workspace_path, iteration)
825
+ if not os.path.isdir(iteration_dir):
826
+ _err("Iteration directory not found: {}".format(iteration_dir))
827
+ return 2
828
+
829
+ # Find all eval-* subdirectories
830
+ eval_dirs = []
831
+ try:
832
+ for item in os.listdir(iteration_dir):
833
+ item_path = os.path.join(iteration_dir, item)
834
+ if os.path.isdir(item_path) and item.startswith('eval-'):
835
+ eval_dirs.append((item, item_path))
836
+ except Exception as exc:
837
+ _err("Failed to list iteration directory: {}".format(exc))
838
+ return 2
839
+
840
+ if not eval_dirs:
841
+ _warn("No eval-* directories found in {}".format(iteration_dir))
842
+
843
+ grades = []
844
+
845
+ for eval_name, eval_path in sorted(eval_dirs):
846
+ output_json = os.path.join(eval_path, "outputs", "feature-list.json")
847
+
848
+ if not os.path.isfile(output_json):
849
+ _info("Skipping {}: no output/feature-list.json found".format(eval_name))
850
+ continue
851
+
852
+ # Load and validate the output
853
+ data, load_err = _load_json(output_json)
854
+ if load_err:
855
+ _warn("Failed to load {}: {}".format(output_json, load_err))
856
+ continue
857
+
858
+ # Run validation
859
+ result = validate_feature_list(data, planning_mode="new")
860
+
861
+ # Create grading entry
862
+ grade_entry = {
863
+ "test_name": eval_name,
864
+ "passed": result["valid"],
865
+ "assertions": [
866
+ {
867
+ "name": "Feature list valid schema",
868
+ "passed": result["valid"],
869
+ "evidence": "valid={}".format(result["valid"])
870
+ },
871
+ {
872
+ "name": "No cycles in DAG",
873
+ "passed": not result["stats"].get("has_cycles", False),
874
+ "evidence": "cycles={}".format(result["stats"].get("has_cycles", False))
875
+ },
876
+ {
877
+ "name": "Features generated",
878
+ "passed": result["stats"].get("total_features", 0) > 0,
879
+ "evidence": "count={}".format(result["stats"].get("total_features", 0))
880
+ },
881
+ {
882
+ "name": "No validation errors",
883
+ "passed": len(result.get("errors", [])) == 0,
884
+ "evidence": "error_count={}".format(len(result.get("errors", [])))
885
+ }
886
+ ]
887
+ }
888
+
889
+ grades.append(grade_entry)
890
+
891
+ # Write grading.json to eval run directory
892
+ grading_file = os.path.join(eval_path, "grading.json")
893
+ _write_json(grading_file, grade_entry)
894
+ _info("Wrote grading to {}".format(grading_file))
895
+
896
+ # Write aggregated results
897
+ aggregated = {
898
+ "iteration": iteration,
899
+ "total_runs": len(grades),
900
+ "passed_runs": sum(1 for g in grades if g["passed"]),
901
+ "pass_rate": len([g for g in grades if g["passed"]]) / len(grades) if grades else 0,
902
+ "grades": grades
903
+ }
904
+
905
+ benchmark_file = os.path.join(iteration_dir, "benchmark.json")
906
+ _write_json(benchmark_file, aggregated)
907
+ _info("Wrote aggregated benchmark to {}".format(benchmark_file))
908
+
909
+ return 0
910
+
911
+
912
+ def main():
913
+ parser = argparse.ArgumentParser(
914
+ description="Validate and generate .prizmkit/plans/feature-list.json files for the dev-pipeline system.",
915
+ formatter_class=argparse.RawDescriptionHelpFormatter,
916
+ epilog=(
917
+ "Examples:\n"
918
+ " %(prog)s validate --input .prizmkit/plans/feature-list.json\n"
919
+ " %(prog)s validate --input .prizmkit/plans/feature-list.json --mode incremental\n"
920
+ " %(prog)s validate --input .prizmkit/plans/feature-list.json --output validated.json\n"
921
+ " %(prog)s template --output .prizmkit/plans/feature-list.json\n"
922
+ " %(prog)s generate --input draft.json --output .prizmkit/plans/feature-list.json\n"
923
+ " %(prog)s summary --input .prizmkit/plans/feature-list.json\n"
924
+ " %(prog)s summary --input .prizmkit/plans/feature-list.json --format json\n"
925
+ ),
926
+ )
927
+
928
+ subparsers = parser.add_subparsers(dest="command", help="Command to execute")
929
+
930
+ # -- validate --
931
+ p_validate = subparsers.add_parser(
932
+ "validate",
933
+ help="Validate an existing .prizmkit/plans/feature-list.json",
934
+ )
935
+ p_validate.add_argument(
936
+ "--input", required=True, help="Path to input .prizmkit/plans/feature-list.json"
937
+ )
938
+ p_validate.add_argument(
939
+ "--output", help="Path to write validated output (optional)"
940
+ )
941
+ p_validate.add_argument(
942
+ "--mode",
943
+ choices=["new", "incremental"],
944
+ default="new",
945
+ help="Validation mode (default: new)",
946
+ )
947
+
948
+ # -- template --
949
+ p_template = subparsers.add_parser(
950
+ "template",
951
+ help="Generate a blank template .prizmkit/plans/feature-list.json",
952
+ )
953
+ p_template.add_argument(
954
+ "--output", required=True, help="Path to write template file"
955
+ )
956
+
957
+ # -- generate --
958
+ p_generate = subparsers.add_parser(
959
+ "generate",
960
+ help="Validate a draft and generate final feature-list.json with defaults filled in",
961
+ )
962
+ p_generate.add_argument(
963
+ "--input", required=True, help="Path to draft JSON (or '-' for stdin)"
964
+ )
965
+ p_generate.add_argument(
966
+ "--output", required=True, help="Path to write final feature-list.json"
967
+ )
968
+ p_generate.add_argument(
969
+ "--mode",
970
+ choices=["new", "incremental"],
971
+ default="new",
972
+ help="Validation mode (default: new)",
973
+ )
974
+
975
+ # -- summary --
976
+ p_summary = subparsers.add_parser(
977
+ "summary",
978
+ help="Print a summary table of features from a .prizmkit/plans/feature-list.json",
979
+ )
980
+ p_summary.add_argument(
981
+ "--input", required=True, help="Path to input .prizmkit/plans/feature-list.json"
982
+ )
983
+ p_summary.add_argument(
984
+ "--format",
985
+ choices=["json", "markdown"],
986
+ default="markdown",
987
+ help="Output format (default: markdown)",
988
+ )
989
+
990
+ # -- grade --
991
+ p_grade = subparsers.add_parser(
992
+ "grade",
993
+ help="Generate grading results from eval runs (for npm run skill:review)",
994
+ )
995
+ p_grade.add_argument(
996
+ "--workspace",
997
+ required=True,
998
+ help="Path to eval workspace (e.g., /.codebuddy/skill-evals/feature-planner-workspace)",
999
+ )
1000
+ p_grade.add_argument(
1001
+ "--iteration",
1002
+ required=True,
1003
+ help="Iteration ID (e.g., iteration-1)",
1004
+ )
1005
+
1006
+ args = parser.parse_args()
1007
+
1008
+ if not args.command:
1009
+ parser.print_help(sys.stderr)
1010
+ return 2
1011
+
1012
+ dispatch = {
1013
+ "validate": cmd_validate,
1014
+ "template": cmd_template,
1015
+ "generate": cmd_generate,
1016
+ "summary": cmd_summary,
1017
+ "grade": cmd_grade,
1018
+ }
1019
+
1020
+ handler = dispatch.get(args.command)
1021
+ if handler is None:
1022
+ _err("Unknown command: {}".format(args.command))
1023
+ return 2
1024
+
1025
+ return handler(args)
1026
+
1027
+
1028
+ if __name__ == "__main__":
1029
+ sys.exit(main())