@wazir-dev/cli 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/CHANGELOG.md +74 -10
  2. package/README.md +15 -15
  3. package/assets/demo.cast +47 -0
  4. package/assets/demo.gif +0 -0
  5. package/docs/anti-patterns/AP-23-skipping-enabled-workflows.md +28 -0
  6. package/docs/anti-patterns/AP-24-clarifier-deciding-scope.md +34 -0
  7. package/docs/concepts/architecture.md +1 -1
  8. package/docs/concepts/roles-and-workflows.md +2 -0
  9. package/docs/concepts/why-wazir.md +59 -0
  10. package/docs/decisions/2026-03-19-deferred-items.md +564 -0
  11. package/docs/decisions/2026-03-19-enhancement-decisions.md +300 -0
  12. package/docs/readmes/INDEX.md +21 -5
  13. package/docs/readmes/features/expertise/README.md +2 -2
  14. package/docs/readmes/features/exports/README.md +2 -2
  15. package/docs/readmes/features/hooks/pre-compact-summary.md +1 -1
  16. package/docs/readmes/features/schemas/README.md +3 -0
  17. package/docs/readmes/features/skills/README.md +17 -0
  18. package/docs/readmes/features/skills/clarifier.md +5 -0
  19. package/docs/readmes/features/skills/claude-cli.md +5 -0
  20. package/docs/readmes/features/skills/codex-cli.md +5 -0
  21. package/docs/readmes/features/skills/dispatching-parallel-agents.md +5 -0
  22. package/docs/readmes/features/skills/executing-plans.md +5 -0
  23. package/docs/readmes/features/skills/executor.md +5 -0
  24. package/docs/readmes/features/skills/finishing-a-development-branch.md +5 -0
  25. package/docs/readmes/features/skills/gemini-cli.md +5 -0
  26. package/docs/readmes/features/skills/humanize.md +5 -0
  27. package/docs/readmes/features/skills/init-pipeline.md +5 -0
  28. package/docs/readmes/features/skills/receiving-code-review.md +5 -0
  29. package/docs/readmes/features/skills/requesting-code-review.md +5 -0
  30. package/docs/readmes/features/skills/reviewer.md +5 -0
  31. package/docs/readmes/features/skills/subagent-driven-development.md +5 -0
  32. package/docs/readmes/features/skills/using-git-worktrees.md +5 -0
  33. package/docs/readmes/features/skills/wazir.md +5 -0
  34. package/docs/readmes/features/skills/writing-skills.md +5 -0
  35. package/docs/readmes/features/workflows/prepare-next.md +1 -1
  36. package/docs/reference/configuration-reference.md +47 -6
  37. package/docs/reference/hooks.md +1 -0
  38. package/docs/reference/launch-checklist.md +4 -4
  39. package/docs/reference/review-loop-pattern.md +119 -9
  40. package/docs/reference/roles-reference.md +1 -0
  41. package/docs/reference/skill-tiers.md +147 -0
  42. package/docs/reference/tooling-cli.md +3 -1
  43. package/docs/truth-claims.yaml +12 -0
  44. package/expertise/antipatterns/process/ai-coding-antipatterns.md +214 -1
  45. package/exports/hosts/claude/.claude/commands/plan-review.md +3 -1
  46. package/exports/hosts/claude/.claude/commands/verify.md +30 -1
  47. package/exports/hosts/claude/.claude/settings.json +9 -0
  48. package/exports/hosts/claude/CLAUDE.md +1 -1
  49. package/exports/hosts/claude/export.manifest.json +6 -4
  50. package/exports/hosts/claude/host-package.json +3 -1
  51. package/exports/hosts/codex/AGENTS.md +1 -1
  52. package/exports/hosts/codex/export.manifest.json +6 -4
  53. package/exports/hosts/codex/host-package.json +3 -1
  54. package/exports/hosts/cursor/.cursor/hooks.json +4 -0
  55. package/exports/hosts/cursor/.cursor/rules/wazir-core.mdc +1 -1
  56. package/exports/hosts/cursor/export.manifest.json +6 -4
  57. package/exports/hosts/cursor/host-package.json +3 -1
  58. package/exports/hosts/gemini/GEMINI.md +1 -1
  59. package/exports/hosts/gemini/export.manifest.json +6 -4
  60. package/exports/hosts/gemini/host-package.json +3 -1
  61. package/hooks/context-mode-router +191 -0
  62. package/hooks/definitions/context_mode_router.yaml +19 -0
  63. package/hooks/hooks.json +31 -6
  64. package/hooks/protected-path-write-guard +8 -0
  65. package/hooks/routing-matrix.json +45 -0
  66. package/hooks/session-start +62 -1
  67. package/llms-full.txt +937 -134
  68. package/package.json +2 -4
  69. package/schemas/hook.schema.json +2 -1
  70. package/schemas/phase-report.schema.json +89 -0
  71. package/schemas/usage.schema.json +25 -1
  72. package/schemas/wazir-manifest.schema.json +19 -0
  73. package/skills/brainstorming/SKILL.md +32 -157
  74. package/skills/clarifier/SKILL.md +289 -111
  75. package/skills/claude-cli/SKILL.md +320 -0
  76. package/skills/codex-cli/SKILL.md +260 -0
  77. package/skills/debugging/SKILL.md +13 -0
  78. package/skills/design/SKILL.md +13 -0
  79. package/skills/dispatching-parallel-agents/SKILL.md +13 -0
  80. package/skills/executing-plans/SKILL.md +13 -0
  81. package/skills/executor/SKILL.md +139 -19
  82. package/skills/finishing-a-development-branch/SKILL.md +13 -0
  83. package/skills/gemini-cli/SKILL.md +260 -0
  84. package/skills/humanize/SKILL.md +13 -0
  85. package/skills/init-pipeline/SKILL.md +72 -164
  86. package/skills/prepare-next/SKILL.md +81 -10
  87. package/skills/receiving-code-review/SKILL.md +13 -0
  88. package/skills/requesting-code-review/SKILL.md +13 -0
  89. package/skills/reviewer/SKILL.md +369 -24
  90. package/skills/run-audit/SKILL.md +13 -0
  91. package/skills/scan-project/SKILL.md +13 -0
  92. package/skills/self-audit/SKILL.md +217 -16
  93. package/skills/skill-research/SKILL.md +188 -0
  94. package/skills/subagent-driven-development/SKILL.md +13 -0
  95. package/skills/subagent-driven-development/code-quality-reviewer-prompt.md +2 -0
  96. package/skills/subagent-driven-development/implementer-prompt.md +8 -0
  97. package/skills/subagent-driven-development/spec-reviewer-prompt.md +7 -0
  98. package/skills/tdd/SKILL.md +13 -0
  99. package/skills/using-git-worktrees/SKILL.md +13 -0
  100. package/skills/using-skills/SKILL.md +13 -0
  101. package/skills/verification/SKILL.md +54 -3
  102. package/skills/wazir/SKILL.md +464 -381
  103. package/skills/writing-plans/SKILL.md +14 -1
  104. package/skills/writing-skills/SKILL.md +13 -0
  105. package/templates/artifacts/implementation-plan.md +3 -0
  106. package/templates/artifacts/tasks-template.md +133 -0
  107. package/templates/examples/phase-report.example.json +48 -0
  108. package/tooling/src/adapters/composition-engine.js +256 -0
  109. package/tooling/src/adapters/model-router.js +84 -0
  110. package/tooling/src/capture/command.js +41 -2
  111. package/tooling/src/capture/run-config.js +3 -1
  112. package/tooling/src/capture/store.js +56 -0
  113. package/tooling/src/capture/usage.js +106 -0
  114. package/tooling/src/capture/user-input.js +66 -0
  115. package/tooling/src/checks/ac-matrix.js +256 -0
  116. package/tooling/src/checks/command-registry.js +12 -0
  117. package/tooling/src/checks/docs-truth.js +1 -1
  118. package/tooling/src/checks/security-sensitivity.js +69 -0
  119. package/tooling/src/checks/skills.js +111 -0
  120. package/tooling/src/cli.js +31 -20
  121. package/tooling/src/commands/stats.js +161 -0
  122. package/tooling/src/commands/validate.js +5 -1
  123. package/tooling/src/export/compiler.js +33 -37
  124. package/tooling/src/gating/agent.js +145 -0
  125. package/tooling/src/guards/phase-prerequisite-guard.js +185 -0
  126. package/tooling/src/hooks/routing-logic.js +69 -0
  127. package/tooling/src/init/auto-detect.js +258 -0
  128. package/tooling/src/init/command.js +38 -170
  129. package/tooling/src/input/scanner.js +46 -0
  130. package/tooling/src/reports/command.js +103 -0
  131. package/tooling/src/reports/phase-report.js +323 -0
  132. package/tooling/src/state/command.js +160 -0
  133. package/tooling/src/state/db.js +287 -0
  134. package/tooling/src/status/command.js +58 -1
  135. package/tooling/src/verify/proof-collector.js +299 -0
  136. package/wazir.manifest.yaml +26 -14
  137. package/workflows/plan-review.md +3 -1
  138. package/workflows/verify.md +30 -1
@@ -0,0 +1,287 @@
1
+ import crypto from 'node:crypto';
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import { DatabaseSync } from 'node:sqlite';
5
+
6
+ function getStateDatabasePath(stateRoot) {
7
+ return path.join(stateRoot, 'state', 'state.sqlite');
8
+ }
9
+
10
+ function hashDescription(description) {
11
+ return crypto.createHash('sha256').update(description).digest('hex');
12
+ }
13
+
14
+ function ensureStateSchema(db) {
15
+ db.exec(`
16
+ CREATE TABLE IF NOT EXISTS learnings (
17
+ id TEXT PRIMARY KEY,
18
+ source_run TEXT NOT NULL,
19
+ category TEXT NOT NULL,
20
+ scope_roles TEXT DEFAULT '',
21
+ scope_stacks TEXT DEFAULT '',
22
+ scope_concerns TEXT DEFAULT '',
23
+ confidence TEXT DEFAULT 'medium' CHECK(confidence IN ('low','medium','high')),
24
+ recurrence_count INTEGER DEFAULT 1,
25
+ content TEXT NOT NULL,
26
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
27
+ last_applied TEXT,
28
+ expires_at TEXT
29
+ );
30
+
31
+ CREATE TABLE IF NOT EXISTS findings (
32
+ id TEXT PRIMARY KEY,
33
+ run_id TEXT NOT NULL,
34
+ phase TEXT NOT NULL,
35
+ source TEXT NOT NULL CHECK(source IN ('internal','codex','self-audit','gemini')),
36
+ severity TEXT NOT NULL CHECK(severity IN ('critical','high','medium','low')),
37
+ description TEXT NOT NULL,
38
+ resolved INTEGER DEFAULT 0,
39
+ finding_hash TEXT NOT NULL,
40
+ created_at TEXT NOT NULL DEFAULT (datetime('now'))
41
+ );
42
+
43
+ CREATE TABLE IF NOT EXISTS audit_history (
44
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
45
+ run_id TEXT NOT NULL,
46
+ date TEXT NOT NULL DEFAULT (date('now')),
47
+ finding_count INTEGER DEFAULT 0,
48
+ fix_count INTEGER DEFAULT 0,
49
+ manual_count INTEGER DEFAULT 0,
50
+ quality_score_before REAL,
51
+ quality_score_after REAL
52
+ );
53
+
54
+ CREATE TABLE IF NOT EXISTS usage_aggregate (
55
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
56
+ run_id TEXT NOT NULL,
57
+ date TEXT NOT NULL DEFAULT (date('now')),
58
+ tokens_saved INTEGER DEFAULT 0,
59
+ bytes_avoided INTEGER DEFAULT 0,
60
+ savings_ratio REAL DEFAULT 0.0,
61
+ index_queries INTEGER DEFAULT 0,
62
+ routing_decisions INTEGER DEFAULT 0
63
+ );
64
+
65
+ CREATE INDEX IF NOT EXISTS idx_learnings_category ON learnings(category);
66
+ CREATE INDEX IF NOT EXISTS idx_findings_run_id ON findings(run_id);
67
+ CREATE INDEX IF NOT EXISTS idx_findings_finding_hash ON findings(finding_hash);
68
+ CREATE INDEX IF NOT EXISTS idx_audit_history_run_id ON audit_history(run_id);
69
+ CREATE INDEX IF NOT EXISTS idx_usage_aggregate_run_id ON usage_aggregate(run_id);
70
+ `);
71
+ }
72
+
73
+ // ---------------------------------------------------------------------------
74
+ // Database lifecycle
75
+ // ---------------------------------------------------------------------------
76
+
77
+ export function openStateDb(stateRoot) {
78
+ const databasePath = getStateDatabasePath(stateRoot);
79
+ fs.mkdirSync(path.dirname(databasePath), { recursive: true });
80
+ const db = new DatabaseSync(databasePath, { timeout: 5000 });
81
+ ensureStateSchema(db);
82
+ return db;
83
+ }
84
+
85
+ export function closeStateDb(db) {
86
+ db.close();
87
+ }
88
+
89
+ // ---------------------------------------------------------------------------
90
+ // Learnings CRUD
91
+ // ---------------------------------------------------------------------------
92
+
93
+ export function insertLearning(db, record) {
94
+ const id = crypto.randomUUID();
95
+ const createdAt = new Date().toISOString();
96
+
97
+ db.prepare(`
98
+ INSERT INTO learnings (id, source_run, category, scope_roles, scope_stacks, scope_concerns, confidence, content, created_at)
99
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
100
+ `).run(
101
+ id,
102
+ record.source_run,
103
+ record.category,
104
+ record.scope_roles ?? '',
105
+ record.scope_stacks ?? '',
106
+ record.scope_concerns ?? '',
107
+ record.confidence ?? 'medium',
108
+ record.content,
109
+ createdAt,
110
+ );
111
+
112
+ return id;
113
+ }
114
+
115
+ export function getLearningsByScope(db, filters = {}) {
116
+ const conditions = [];
117
+ const params = [];
118
+
119
+ if (filters.roles) {
120
+ conditions.push("scope_roles LIKE ?");
121
+ params.push(`%${filters.roles}%`);
122
+ }
123
+
124
+ if (filters.stacks) {
125
+ conditions.push("scope_stacks LIKE ?");
126
+ params.push(`%${filters.stacks}%`);
127
+ }
128
+
129
+ if (filters.concerns) {
130
+ conditions.push("scope_concerns LIKE ?");
131
+ params.push(`%${filters.concerns}%`);
132
+ }
133
+
134
+ if (filters.confidence) {
135
+ conditions.push("confidence = ?");
136
+ params.push(filters.confidence);
137
+ }
138
+
139
+ const where = conditions.length > 0 ? `WHERE ${conditions.join(' AND ')}` : '';
140
+ const limit = filters.limit ? `LIMIT ${Number(filters.limit)}` : '';
141
+
142
+ return db.prepare(`
143
+ SELECT * FROM learnings ${where} ORDER BY created_at DESC ${limit}
144
+ `).all(...params);
145
+ }
146
+
147
+ export function updateLearningRecurrence(db, id) {
148
+ const now = new Date().toISOString();
149
+
150
+ db.prepare(`
151
+ UPDATE learnings
152
+ SET recurrence_count = recurrence_count + 1,
153
+ last_applied = ?
154
+ WHERE id = ?
155
+ `).run(now, id);
156
+ }
157
+
158
+ export function getRecurringLearnings(db, minCount) {
159
+ return db.prepare(`
160
+ SELECT * FROM learnings
161
+ WHERE recurrence_count >= ?
162
+ ORDER BY recurrence_count DESC
163
+ `).all(minCount);
164
+ }
165
+
166
+ // ---------------------------------------------------------------------------
167
+ // Findings CRUD
168
+ // ---------------------------------------------------------------------------
169
+
170
+ export function insertFinding(db, record) {
171
+ const id = crypto.randomUUID();
172
+ const findingHash = record.finding_hash ?? hashDescription(record.description);
173
+ const createdAt = new Date().toISOString();
174
+
175
+ db.prepare(`
176
+ INSERT INTO findings (id, run_id, phase, source, severity, description, finding_hash, created_at)
177
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
178
+ `).run(
179
+ id,
180
+ record.run_id,
181
+ record.phase,
182
+ record.source,
183
+ record.severity,
184
+ record.description,
185
+ findingHash,
186
+ createdAt,
187
+ );
188
+
189
+ return id;
190
+ }
191
+
192
+ export function getFindingsByRun(db, runId) {
193
+ return db.prepare(`
194
+ SELECT * FROM findings
195
+ WHERE run_id = ?
196
+ ORDER BY created_at ASC
197
+ `).all(runId);
198
+ }
199
+
200
+ export function getRecurringFindingHashes(db, minOccurrences) {
201
+ return db.prepare(`
202
+ SELECT finding_hash, COUNT(*) AS count
203
+ FROM findings
204
+ GROUP BY finding_hash
205
+ HAVING COUNT(*) >= ?
206
+ ORDER BY count DESC
207
+ `).all(minOccurrences);
208
+ }
209
+
210
+ export function resolveFinding(db, id) {
211
+ db.prepare(`
212
+ UPDATE findings SET resolved = 1 WHERE id = ?
213
+ `).run(id);
214
+ }
215
+
216
+ // ---------------------------------------------------------------------------
217
+ // Audit history
218
+ // ---------------------------------------------------------------------------
219
+
220
+ export function insertAuditRecord(db, record) {
221
+ db.prepare(`
222
+ INSERT INTO audit_history (run_id, finding_count, fix_count, manual_count, quality_score_before, quality_score_after)
223
+ VALUES (?, ?, ?, ?, ?, ?)
224
+ `).run(
225
+ record.run_id,
226
+ record.finding_count ?? 0,
227
+ record.fix_count ?? 0,
228
+ record.manual_count ?? 0,
229
+ record.quality_score_before ?? null,
230
+ record.quality_score_after ?? null,
231
+ );
232
+ }
233
+
234
+ export function getAuditTrend(db, limit) {
235
+ const limitClause = limit ? `LIMIT ${Number(limit)}` : '';
236
+
237
+ return db.prepare(`
238
+ SELECT * FROM audit_history
239
+ ORDER BY date DESC, id DESC
240
+ ${limitClause}
241
+ `).all();
242
+ }
243
+
244
+ // ---------------------------------------------------------------------------
245
+ // Usage
246
+ // ---------------------------------------------------------------------------
247
+
248
+ export function insertUsageRecord(db, record) {
249
+ db.prepare(`
250
+ INSERT INTO usage_aggregate (run_id, tokens_saved, bytes_avoided, savings_ratio, index_queries, routing_decisions)
251
+ VALUES (?, ?, ?, ?, ?, ?)
252
+ `).run(
253
+ record.run_id,
254
+ record.tokens_saved ?? 0,
255
+ record.bytes_avoided ?? 0,
256
+ record.savings_ratio ?? 0.0,
257
+ record.index_queries ?? 0,
258
+ record.routing_decisions ?? 0,
259
+ );
260
+ }
261
+
262
+ export function getUsageSummary(db) {
263
+ const row = db.prepare(`
264
+ SELECT
265
+ COALESCE(SUM(tokens_saved), 0) AS total_tokens_saved,
266
+ COALESCE(SUM(bytes_avoided), 0) AS total_bytes_avoided,
267
+ CASE WHEN COUNT(*) > 0 THEN AVG(savings_ratio) ELSE 0.0 END AS avg_savings_ratio,
268
+ COALESCE(SUM(index_queries), 0) AS total_index_queries,
269
+ COUNT(*) AS run_count
270
+ FROM usage_aggregate
271
+ `).get();
272
+
273
+ return row;
274
+ }
275
+
276
+ // ---------------------------------------------------------------------------
277
+ // Stats (for CLI)
278
+ // ---------------------------------------------------------------------------
279
+
280
+ export function getStateCounts(db) {
281
+ return {
282
+ learning_count: db.prepare('SELECT COUNT(*) AS count FROM learnings').get().count,
283
+ finding_count: db.prepare('SELECT COUNT(*) AS count FROM findings').get().count,
284
+ audit_count: db.prepare('SELECT COUNT(*) AS count FROM audit_history').get().count,
285
+ usage_count: db.prepare('SELECT COUNT(*) AS count FROM usage_aggregate').get().count,
286
+ };
287
+ }
@@ -5,6 +5,46 @@ import { parseCommandOptions } from '../command-options.js';
5
5
  import { readYamlFile } from '../loaders.js';
6
6
  import { findProjectRoot } from '../project-root.js';
7
7
  import { resolveStateRoot } from '../state-root.js';
8
+ import { estimateTokens } from '../capture/usage.js';
9
+
10
+ function readUsageSavingsSummary(stateRoot, runId) {
11
+ const usagePath = path.join(stateRoot, 'runs', runId, 'usage.json');
12
+
13
+ if (!fs.existsSync(usagePath)) {
14
+ return null;
15
+ }
16
+
17
+ try {
18
+ const usage = JSON.parse(fs.readFileSync(usagePath, 'utf8'));
19
+ const cr = usage.savings?.capture_routing ?? {};
20
+ const cm = usage.savings?.context_mode ?? {};
21
+ const co = usage.savings?.compaction ?? {};
22
+ const iq = usage.savings?.index_queries ?? {};
23
+
24
+ const crTokensSaved = cr.estimated_tokens_avoided ?? 0;
25
+ const cmRawTokens = estimateTokens(Math.round((cm.raw_kb ?? 0) * 1024));
26
+ const cmAfterTokens = estimateTokens(Math.round((cm.context_kb ?? 0) * 1024));
27
+ const cmTokensSaved = cmRawTokens - cmAfterTokens;
28
+ const coTokensSaved = (co.pre_compaction_tokens_est ?? 0) - (co.post_compaction_tokens_est ?? 0);
29
+ const iqTokensSaved = iq.estimated_tokens_saved ?? 0;
30
+
31
+ const totalSaved = crTokensSaved + cmTokensSaved + coTokensSaved + iqTokensSaved;
32
+
33
+ if (totalSaved === 0) {
34
+ return null;
35
+ }
36
+
37
+ const crRawTokens = crTokensSaved + estimateTokens(cr.summary_bytes ?? 0);
38
+ const withoutSavings = crRawTokens + cmRawTokens + (co.pre_compaction_tokens_est ?? 0);
39
+ const pct = withoutSavings > 0
40
+ ? `${((totalSaved / withoutSavings) * 100).toFixed(0)}%`
41
+ : '0%';
42
+
43
+ return `Context savings: ~${totalSaved.toLocaleString('en-US')} tokens saved (${pct} reduction)`;
44
+ } catch {
45
+ return null;
46
+ }
47
+ }
8
48
 
9
49
  function success(payload, options = {}) {
10
50
  if (options.json) {
@@ -14,9 +54,20 @@ function success(payload, options = {}) {
14
54
  };
15
55
  }
16
56
 
57
+ const parentPhase = payload.parent_phase ?? payload.phase;
58
+ const workflow = payload.workflow;
59
+ const phaseLabel = workflow
60
+ ? `Phase: ${parentPhase} > Workflow: ${workflow}`
61
+ : `Phase: ${parentPhase}`;
62
+ let output = `${payload.run_id} ${phaseLabel} ${payload.status}\n`;
63
+
64
+ if (payload.savings_summary) {
65
+ output += `${payload.savings_summary}\n`;
66
+ }
67
+
17
68
  return {
18
69
  exitCode: 0,
19
- stdout: `${payload.run_id} ${payload.phase} ${payload.status}\n`,
70
+ stdout: output,
20
71
  };
21
72
  }
22
73
 
@@ -61,6 +112,12 @@ export function runStatusCommand(parsed, context = {}) {
61
112
  status_path: statusPath,
62
113
  };
63
114
 
115
+ const savingsSummary = readUsageSavingsSummary(stateRoot, options.run);
116
+
117
+ if (savingsSummary) {
118
+ payload.savings_summary = savingsSummary;
119
+ }
120
+
64
121
  return success(payload, { json: options.json });
65
122
  } catch (error) {
66
123
  return {
@@ -0,0 +1,299 @@
1
+ import fs from 'node:fs';
2
+ import path from 'node:path';
3
+ import { execFileSync } from 'node:child_process';
4
+
5
+ const WEB_FRAMEWORKS = ['next', 'vite', 'react-scripts', '@angular/cli', 'nuxt', 'astro', 'gatsby'];
6
+ const API_FRAMEWORKS = ['express', 'fastify', 'hono', 'koa', '@nestjs/core', '@hapi/hapi'];
7
+
8
+ /**
9
+ * Detect whether a project produces runnable output and what type.
10
+ *
11
+ * @param {string} projectRoot
12
+ * @returns {'web' | 'api' | 'cli' | 'library'}
13
+ */
14
+ export function detectRunnableType(projectRoot) {
15
+ const pkgPath = path.join(projectRoot, 'package.json');
16
+ if (!fs.existsSync(pkgPath)) return 'library';
17
+
18
+ let pkg;
19
+ try {
20
+ pkg = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
21
+ } catch {
22
+ return 'library';
23
+ }
24
+
25
+ const allDeps = { ...pkg.dependencies, ...pkg.devDependencies };
26
+
27
+ if (WEB_FRAMEWORKS.some((fw) => fw in allDeps)) return 'web';
28
+ if (API_FRAMEWORKS.some((fw) => fw in allDeps)) return 'api';
29
+ if (pkg.bin) return 'cli';
30
+
31
+ return 'library';
32
+ }
33
+
34
+ /**
35
+ * Run a command safely using execFileSync (no shell injection).
36
+ *
37
+ * @param {string} cmd - The executable
38
+ * @param {string[]} args - Arguments array
39
+ * @param {string} cwd
40
+ * @returns {{ exit_code: number, stdout: string, stderr: string }}
41
+ */
42
+ function runCommand(cmd, args, cwd) {
43
+ try {
44
+ const stdout = execFileSync(cmd, args, {
45
+ cwd,
46
+ encoding: 'utf8',
47
+ timeout: 60000,
48
+ stdio: ['pipe', 'pipe', 'pipe'],
49
+ });
50
+ return { exit_code: 0, stdout: stdout.trim(), stderr: '' };
51
+ } catch (err) {
52
+ return {
53
+ exit_code: err.status ?? 1,
54
+ stdout: (err.stdout ?? '').trim(),
55
+ stderr: (err.stderr ?? '').trim(),
56
+ };
57
+ }
58
+ }
59
+
60
+ /**
61
+ * Summarize command output to a short string.
62
+ *
63
+ * @param {string} stdout
64
+ * @param {number} maxLen
65
+ * @returns {string}
66
+ */
67
+ function summarize(stdout, maxLen = 200) {
68
+ if (!stdout) return '';
69
+ const lines = stdout.split('\n');
70
+ if (lines.length <= 5) return stdout.slice(0, maxLen);
71
+ return [...lines.slice(0, 3), `... (${lines.length} lines total)`, ...lines.slice(-2)]
72
+ .join('\n')
73
+ .slice(0, maxLen);
74
+ }
75
+
76
+ /**
77
+ * Check if a package.json has a specific script.
78
+ *
79
+ * @param {string} projectRoot
80
+ * @param {string} scriptName
81
+ * @returns {boolean}
82
+ */
83
+ function hasScript(projectRoot, scriptName) {
84
+ try {
85
+ const pkg = JSON.parse(fs.readFileSync(path.join(projectRoot, 'package.json'), 'utf8'));
86
+ return !!(pkg.scripts && pkg.scripts[scriptName]);
87
+ } catch {
88
+ return false;
89
+ }
90
+ }
91
+
92
+ /**
93
+ * Check if a config file exists for a tool.
94
+ *
95
+ * @param {string} projectRoot
96
+ * @param {string[]} candidates
97
+ * @returns {boolean}
98
+ */
99
+ function hasConfigFile(projectRoot, candidates) {
100
+ return candidates.some((f) => fs.existsSync(path.join(projectRoot, f)));
101
+ }
102
+
103
+ /**
104
+ * Collect library-type proof: tests, lint, format, type-check.
105
+ *
106
+ * @param {string} projectRoot
107
+ * @returns {{ tool: string, command: string, exit_code: number, stdout_summary: string, passed: boolean }[]}
108
+ */
109
+ function collectLibraryEvidence(projectRoot) {
110
+ const evidence = [];
111
+
112
+ // npm test
113
+ if (hasScript(projectRoot, 'test')) {
114
+ const result = runCommand('npm', ['test'], projectRoot);
115
+ evidence.push({
116
+ tool: 'npm test',
117
+ command: 'npm test',
118
+ exit_code: result.exit_code,
119
+ stdout_summary: summarize(result.stdout),
120
+ passed: result.exit_code === 0,
121
+ });
122
+ }
123
+
124
+ // TypeScript type check
125
+ if (
126
+ hasConfigFile(projectRoot, ['tsconfig.json']) ||
127
+ hasScript(projectRoot, 'typecheck')
128
+ ) {
129
+ const cmd = hasScript(projectRoot, 'typecheck')
130
+ ? ['npm', ['run', 'typecheck']]
131
+ : ['npx', ['tsc', '--noEmit']];
132
+ const result = runCommand(cmd[0], cmd[1], projectRoot);
133
+ evidence.push({
134
+ tool: 'tsc',
135
+ command: cmd[0] + ' ' + cmd[1].join(' '),
136
+ exit_code: result.exit_code,
137
+ stdout_summary: summarize(result.exit_code === 0 ? 'No type errors' : result.stdout || result.stderr),
138
+ passed: result.exit_code === 0,
139
+ });
140
+ }
141
+
142
+ // ESLint
143
+ if (
144
+ hasConfigFile(projectRoot, ['.eslintrc', '.eslintrc.js', '.eslintrc.json', '.eslintrc.yml', 'eslint.config.js', 'eslint.config.mjs']) ||
145
+ hasScript(projectRoot, 'lint')
146
+ ) {
147
+ const cmd = hasScript(projectRoot, 'lint')
148
+ ? ['npm', ['run', 'lint']]
149
+ : ['npx', ['eslint', '.']];
150
+ const result = runCommand(cmd[0], cmd[1], projectRoot);
151
+ evidence.push({
152
+ tool: 'eslint',
153
+ command: cmd[0] + ' ' + cmd[1].join(' '),
154
+ exit_code: result.exit_code,
155
+ stdout_summary: summarize(result.exit_code === 0 ? 'No lint errors' : result.stdout || result.stderr),
156
+ passed: result.exit_code === 0,
157
+ });
158
+ }
159
+
160
+ // Prettier
161
+ if (
162
+ hasConfigFile(projectRoot, ['.prettierrc', '.prettierrc.js', '.prettierrc.json', '.prettierrc.yml', 'prettier.config.js', 'prettier.config.mjs']) ||
163
+ hasScript(projectRoot, 'format:check')
164
+ ) {
165
+ const cmd = hasScript(projectRoot, 'format:check')
166
+ ? ['npm', ['run', 'format:check']]
167
+ : ['npx', ['prettier', '--check', '.']];
168
+ const result = runCommand(cmd[0], cmd[1], projectRoot);
169
+ evidence.push({
170
+ tool: 'prettier',
171
+ command: cmd[0] + ' ' + cmd[1].join(' '),
172
+ exit_code: result.exit_code,
173
+ stdout_summary: summarize(result.exit_code === 0 ? 'All files formatted' : result.stdout || result.stderr),
174
+ passed: result.exit_code === 0,
175
+ });
176
+ }
177
+
178
+ return evidence;
179
+ }
180
+
181
+ /**
182
+ * Collect web-type proof: build + library checks.
183
+ *
184
+ * @param {string} projectRoot
185
+ * @returns {{ tool: string, command: string, exit_code: number, stdout_summary: string, passed: boolean }[]}
186
+ */
187
+ function collectWebEvidence(projectRoot) {
188
+ const evidence = [];
189
+
190
+ // Build
191
+ if (hasScript(projectRoot, 'build')) {
192
+ const result = runCommand('npm', ['run', 'build'], projectRoot);
193
+ evidence.push({
194
+ tool: 'build',
195
+ command: 'npm run build',
196
+ exit_code: result.exit_code,
197
+ stdout_summary: summarize(result.stdout),
198
+ passed: result.exit_code === 0,
199
+ });
200
+ }
201
+
202
+ // Also run library checks (tests, lint, etc.)
203
+ evidence.push(...collectLibraryEvidence(projectRoot));
204
+
205
+ return evidence;
206
+ }
207
+
208
+ /**
209
+ * Collect API-type proof: library checks (server start/stop is complex, defer to manual).
210
+ *
211
+ * @param {string} projectRoot
212
+ * @returns {{ tool: string, command: string, exit_code: number, stdout_summary: string, passed: boolean }[]}
213
+ */
214
+ function collectApiEvidence(projectRoot) {
215
+ return collectLibraryEvidence(projectRoot);
216
+ }
217
+
218
+ /**
219
+ * Collect CLI-type proof: --help output + library checks.
220
+ *
221
+ * @param {string} projectRoot
222
+ * @returns {{ tool: string, command: string, exit_code: number, stdout_summary: string, passed: boolean }[]}
223
+ */
224
+ function collectCliEvidence(projectRoot) {
225
+ const evidence = [];
226
+
227
+ try {
228
+ const pkg = JSON.parse(fs.readFileSync(path.join(projectRoot, 'package.json'), 'utf8'));
229
+ const binEntry = typeof pkg.bin === 'string' ? pkg.bin : Object.values(pkg.bin || {})[0];
230
+ if (binEntry) {
231
+ const binPath = path.join(projectRoot, binEntry);
232
+ if (fs.existsSync(binPath)) {
233
+ const result = runCommand('node', [binPath, '--help'], projectRoot);
234
+ evidence.push({
235
+ tool: 'cli --help',
236
+ command: `node ${binEntry} --help`,
237
+ exit_code: result.exit_code,
238
+ stdout_summary: summarize(result.stdout),
239
+ passed: result.exit_code === 0,
240
+ });
241
+ }
242
+ }
243
+ } catch { /* ignore */ }
244
+
245
+ evidence.push(...collectLibraryEvidence(projectRoot));
246
+
247
+ return evidence;
248
+ }
249
+
250
+ /**
251
+ * Collect proof of implementation for a task.
252
+ *
253
+ * @param {{ id: string, title: string }} taskSpec
254
+ * @param {{ projectRoot: string, runId?: string, stateRoot?: string }} runConfig
255
+ * @returns {Promise<{ task_id: string, type: string, timestamp: string, evidence: object[], status: string, all_passed: boolean }>}
256
+ */
257
+ export async function collectProof(taskSpec, runConfig) {
258
+ const { projectRoot } = runConfig;
259
+ const type = detectRunnableType(projectRoot);
260
+
261
+ let evidence;
262
+ switch (type) {
263
+ case 'web':
264
+ evidence = collectWebEvidence(projectRoot);
265
+ break;
266
+ case 'api':
267
+ evidence = collectApiEvidence(projectRoot);
268
+ break;
269
+ case 'cli':
270
+ evidence = collectCliEvidence(projectRoot);
271
+ break;
272
+ default:
273
+ evidence = collectLibraryEvidence(projectRoot);
274
+ }
275
+
276
+ const allPassed = evidence.length === 0 || evidence.every((e) => e.passed);
277
+
278
+ const result = {
279
+ task_id: taskSpec.id,
280
+ type,
281
+ timestamp: new Date().toISOString(),
282
+ evidence,
283
+ status: allPassed ? 'pass' : 'fail',
284
+ all_passed: allPassed,
285
+ };
286
+
287
+ // Save to artifacts if runId provided
288
+ if (runConfig.runId && runConfig.stateRoot) {
289
+ const artifactDir = path.join(runConfig.stateRoot, 'runs', runConfig.runId, 'artifacts');
290
+ if (fs.existsSync(artifactDir)) {
291
+ fs.writeFileSync(
292
+ path.join(artifactDir, `proof-${taskSpec.id}.json`),
293
+ JSON.stringify(result, null, 2) + '\n',
294
+ );
295
+ }
296
+ }
297
+
298
+ return result;
299
+ }