@mainahq/core 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +31 -0
  2. package/package.json +37 -0
  3. package/src/ai/__tests__/ai.test.ts +207 -0
  4. package/src/ai/__tests__/design-approaches.test.ts +192 -0
  5. package/src/ai/__tests__/spec-questions.test.ts +191 -0
  6. package/src/ai/__tests__/tiers.test.ts +110 -0
  7. package/src/ai/commit-msg.ts +28 -0
  8. package/src/ai/design-approaches.ts +76 -0
  9. package/src/ai/index.ts +205 -0
  10. package/src/ai/pr-summary.ts +60 -0
  11. package/src/ai/spec-questions.ts +74 -0
  12. package/src/ai/tiers.ts +52 -0
  13. package/src/ai/try-generate.ts +89 -0
  14. package/src/ai/validate.ts +66 -0
  15. package/src/benchmark/__tests__/reporter.test.ts +525 -0
  16. package/src/benchmark/__tests__/runner.test.ts +113 -0
  17. package/src/benchmark/__tests__/story-loader.test.ts +152 -0
  18. package/src/benchmark/reporter.ts +332 -0
  19. package/src/benchmark/runner.ts +91 -0
  20. package/src/benchmark/story-loader.ts +88 -0
  21. package/src/benchmark/types.ts +95 -0
  22. package/src/cache/__tests__/keys.test.ts +97 -0
  23. package/src/cache/__tests__/manager.test.ts +312 -0
  24. package/src/cache/__tests__/ttl.test.ts +94 -0
  25. package/src/cache/keys.ts +44 -0
  26. package/src/cache/manager.ts +231 -0
  27. package/src/cache/ttl.ts +77 -0
  28. package/src/config/__tests__/config.test.ts +376 -0
  29. package/src/config/index.ts +198 -0
  30. package/src/context/__tests__/budget.test.ts +179 -0
  31. package/src/context/__tests__/engine.test.ts +163 -0
  32. package/src/context/__tests__/episodic.test.ts +291 -0
  33. package/src/context/__tests__/relevance.test.ts +323 -0
  34. package/src/context/__tests__/retrieval.test.ts +143 -0
  35. package/src/context/__tests__/selector.test.ts +174 -0
  36. package/src/context/__tests__/semantic.test.ts +252 -0
  37. package/src/context/__tests__/treesitter.test.ts +229 -0
  38. package/src/context/__tests__/working.test.ts +236 -0
  39. package/src/context/budget.ts +130 -0
  40. package/src/context/engine.ts +394 -0
  41. package/src/context/episodic.ts +251 -0
  42. package/src/context/relevance.ts +325 -0
  43. package/src/context/retrieval.ts +325 -0
  44. package/src/context/selector.ts +93 -0
  45. package/src/context/semantic.ts +331 -0
  46. package/src/context/treesitter.ts +216 -0
  47. package/src/context/working.ts +192 -0
  48. package/src/db/__tests__/db.test.ts +151 -0
  49. package/src/db/index.ts +211 -0
  50. package/src/db/schema.ts +84 -0
  51. package/src/design/__tests__/design.test.ts +310 -0
  52. package/src/design/__tests__/generate-hld-lld.test.ts +109 -0
  53. package/src/design/__tests__/review.test.ts +561 -0
  54. package/src/design/index.ts +297 -0
  55. package/src/design/review.ts +327 -0
  56. package/src/explain/__tests__/explain.test.ts +173 -0
  57. package/src/explain/index.ts +181 -0
  58. package/src/features/__tests__/analyzer.test.ts +358 -0
  59. package/src/features/__tests__/checklist.test.ts +454 -0
  60. package/src/features/__tests__/numbering.test.ts +319 -0
  61. package/src/features/__tests__/quality.test.ts +295 -0
  62. package/src/features/__tests__/traceability.test.ts +147 -0
  63. package/src/features/analyzer.ts +445 -0
  64. package/src/features/checklist.ts +366 -0
  65. package/src/features/index.ts +18 -0
  66. package/src/features/numbering.ts +404 -0
  67. package/src/features/quality.ts +349 -0
  68. package/src/features/test-stubs.ts +157 -0
  69. package/src/features/traceability.ts +260 -0
  70. package/src/feedback/__tests__/async-feedback.test.ts +52 -0
  71. package/src/feedback/__tests__/collector.test.ts +219 -0
  72. package/src/feedback/__tests__/compress.test.ts +150 -0
  73. package/src/feedback/__tests__/preferences.test.ts +169 -0
  74. package/src/feedback/collector.ts +135 -0
  75. package/src/feedback/compress.ts +92 -0
  76. package/src/feedback/preferences.ts +108 -0
  77. package/src/git/__tests__/git.test.ts +62 -0
  78. package/src/git/index.ts +110 -0
  79. package/src/hooks/__tests__/runner.test.ts +266 -0
  80. package/src/hooks/index.ts +8 -0
  81. package/src/hooks/runner.ts +130 -0
  82. package/src/index.ts +356 -0
  83. package/src/init/__tests__/init.test.ts +228 -0
  84. package/src/init/index.ts +364 -0
  85. package/src/language/__tests__/detect.test.ts +77 -0
  86. package/src/language/__tests__/profile.test.ts +51 -0
  87. package/src/language/detect.ts +70 -0
  88. package/src/language/profile.ts +110 -0
  89. package/src/prompts/__tests__/defaults.test.ts +52 -0
  90. package/src/prompts/__tests__/engine.test.ts +183 -0
  91. package/src/prompts/__tests__/evolution-resolve.test.ts +169 -0
  92. package/src/prompts/__tests__/evolution.test.ts +187 -0
  93. package/src/prompts/__tests__/loader.test.ts +105 -0
  94. package/src/prompts/candidates/review-v2.md +55 -0
  95. package/src/prompts/defaults/ai-review.md +49 -0
  96. package/src/prompts/defaults/commit.md +30 -0
  97. package/src/prompts/defaults/context.md +26 -0
  98. package/src/prompts/defaults/design-approaches.md +57 -0
  99. package/src/prompts/defaults/design-hld-lld.md +55 -0
  100. package/src/prompts/defaults/design.md +53 -0
  101. package/src/prompts/defaults/explain.md +31 -0
  102. package/src/prompts/defaults/fix.md +32 -0
  103. package/src/prompts/defaults/index.ts +38 -0
  104. package/src/prompts/defaults/review.md +41 -0
  105. package/src/prompts/defaults/spec-questions.md +59 -0
  106. package/src/prompts/defaults/tests.md +72 -0
  107. package/src/prompts/engine.ts +137 -0
  108. package/src/prompts/evolution.ts +409 -0
  109. package/src/prompts/loader.ts +71 -0
  110. package/src/review/__tests__/review.test.ts +288 -0
  111. package/src/review/comprehensive.ts +362 -0
  112. package/src/review/index.ts +417 -0
  113. package/src/stats/__tests__/tracker.test.ts +323 -0
  114. package/src/stats/index.ts +11 -0
  115. package/src/stats/tracker.ts +492 -0
  116. package/src/ticket/__tests__/ticket.test.ts +273 -0
  117. package/src/ticket/index.ts +185 -0
  118. package/src/utils.ts +87 -0
  119. package/src/verify/__tests__/ai-review.test.ts +242 -0
  120. package/src/verify/__tests__/coverage.test.ts +83 -0
  121. package/src/verify/__tests__/detect.test.ts +175 -0
  122. package/src/verify/__tests__/diff-filter.test.ts +338 -0
  123. package/src/verify/__tests__/fix.test.ts +478 -0
  124. package/src/verify/__tests__/linters/clippy.test.ts +45 -0
  125. package/src/verify/__tests__/linters/go-vet.test.ts +27 -0
  126. package/src/verify/__tests__/linters/ruff.test.ts +64 -0
  127. package/src/verify/__tests__/mutation.test.ts +141 -0
  128. package/src/verify/__tests__/pipeline.test.ts +553 -0
  129. package/src/verify/__tests__/proof.test.ts +97 -0
  130. package/src/verify/__tests__/secretlint.test.ts +190 -0
  131. package/src/verify/__tests__/semgrep.test.ts +217 -0
  132. package/src/verify/__tests__/slop.test.ts +366 -0
  133. package/src/verify/__tests__/sonar.test.ts +113 -0
  134. package/src/verify/__tests__/syntax-guard.test.ts +227 -0
  135. package/src/verify/__tests__/trivy.test.ts +191 -0
  136. package/src/verify/__tests__/visual.test.ts +139 -0
  137. package/src/verify/ai-review.ts +276 -0
  138. package/src/verify/coverage.ts +134 -0
  139. package/src/verify/detect.ts +171 -0
  140. package/src/verify/diff-filter.ts +183 -0
  141. package/src/verify/fix.ts +317 -0
  142. package/src/verify/linters/clippy.ts +52 -0
  143. package/src/verify/linters/go-vet.ts +32 -0
  144. package/src/verify/linters/ruff.ts +47 -0
  145. package/src/verify/mutation.ts +143 -0
  146. package/src/verify/pipeline.ts +328 -0
  147. package/src/verify/proof.ts +277 -0
  148. package/src/verify/secretlint.ts +168 -0
  149. package/src/verify/semgrep.ts +170 -0
  150. package/src/verify/slop.ts +493 -0
  151. package/src/verify/sonar.ts +146 -0
  152. package/src/verify/syntax-guard.ts +251 -0
  153. package/src/verify/trivy.ts +161 -0
  154. package/src/verify/visual.ts +460 -0
  155. package/src/workflow/__tests__/context.test.ts +110 -0
  156. package/src/workflow/context.ts +81 -0
@@ -0,0 +1,328 @@
1
+ /**
2
+ * Verify Pipeline Orchestrator — ties together all verification tools.
3
+ *
4
+ * Pipeline flow:
5
+ * 1. Get files to check (staged files, or provided list)
6
+ * 2. Run syntax guard FIRST — abort immediately if it fails
7
+ * 3. Auto-detect available tools
8
+ * 4. Run all available tools in PARALLEL (slop, semgrep, trivy, secretlint)
9
+ * 5. Collect all findings
10
+ * 6. Apply diff-only filter (unless diffOnly === false)
11
+ * 7. Determine pass/fail: passed = no error-severity findings
12
+ * 8. Return unified PipelineResult
13
+ */
14
+
15
+ import { createCacheManager } from "../cache/manager";
16
+ import { getNoisyRules } from "../feedback/preferences";
17
+ import { getDiff, getStagedFiles } from "../git/index";
18
+ import { detectLanguages } from "../language/detect";
19
+ import type { LanguageId } from "../language/profile";
20
+ import { getProfile } from "../language/profile";
21
+ import { type AIReviewResult, runAIReview } from "./ai-review";
22
+ import { runCoverage } from "./coverage";
23
+ import type { DetectedTool } from "./detect";
24
+ import { detectTools } from "./detect";
25
+ import type { Finding } from "./diff-filter";
26
+ import { filterByDiff } from "./diff-filter";
27
+ import { runMutation } from "./mutation";
28
+ import { runSecretlint } from "./secretlint";
29
+ import { runSemgrep } from "./semgrep";
30
+ import { detectSlop } from "./slop";
31
+ import { runSonar } from "./sonar";
32
+ import type { SyntaxDiagnostic } from "./syntax-guard";
33
+ import { syntaxGuard } from "./syntax-guard";
34
+ import { runTrivy } from "./trivy";
35
+
36
+ // ─── Types ────────────────────────────────────────────────────────────────
37
+
38
+ export interface ToolReport {
39
+ tool: string;
40
+ findings: Finding[];
41
+ skipped: boolean;
42
+ duration: number; // ms
43
+ }
44
+
45
+ export interface PipelineResult {
46
+ passed: boolean; // true if no errors
47
+ syntaxPassed: boolean; // syntax guard result
48
+ syntaxErrors?: SyntaxDiagnostic[];
49
+ tools: ToolReport[]; // per-tool results
50
+ findings: Finding[]; // all shown findings (after diff filter)
51
+ hiddenCount: number; // pre-existing findings hidden
52
+ detectedTools: DetectedTool[]; // what was found on PATH
53
+ duration: number; // total ms
54
+ cacheHits: number; // cache L1+L2 hits during this run
55
+ cacheMisses: number; // cache misses during this run
56
+ }
57
+
58
+ export interface PipelineOptions {
59
+ files?: string[]; // specific files (default: staged files)
60
+ baseBranch?: string; // for diff filter (default: "main")
61
+ diffOnly?: boolean; // default: true
62
+ deep?: boolean; // NEW — triggers standard-tier AI review
63
+ cwd?: string;
64
+ mainaDir?: string;
65
+ languages?: string[]; // override language detection
66
+ }
67
+
68
+ // ─── Tool Runner Helpers ──────────────────────────────────────────────────
69
+
70
+ /**
71
+ * Run a single tool and wrap the result in a ToolReport with timing.
72
+ */
73
+ async function runToolWithTiming(
74
+ toolName: string,
75
+ fn: () => Promise<{ findings: Finding[]; skipped: boolean }>,
76
+ ): Promise<ToolReport> {
77
+ const start = performance.now();
78
+ const result = await fn();
79
+ const duration = Math.round(performance.now() - start);
80
+
81
+ return {
82
+ tool: toolName,
83
+ findings: result.findings,
84
+ skipped: result.skipped,
85
+ duration,
86
+ };
87
+ }
88
+
89
+ // ─── Pipeline ─────────────────────────────────────────────────────────────
90
+
91
+ /**
92
+ * Run the full verification pipeline.
93
+ *
94
+ * Orchestrates: syntax guard -> tool detection -> parallel tool execution
95
+ * -> diff-only filtering -> unified result.
96
+ */
97
+ export async function runPipeline(
98
+ options?: PipelineOptions,
99
+ ): Promise<PipelineResult> {
100
+ const start = performance.now();
101
+ const cwd = options?.cwd ?? process.cwd();
102
+ const diffOnly = options?.diffOnly !== false; // default: true
103
+ const baseBranch = options?.baseBranch ?? "main";
104
+
105
+ // ── Step 1: Get files to check ────────────────────────────────────────
106
+ const files = options?.files ?? (await getStagedFiles(cwd));
107
+
108
+ // Empty file list → nothing to verify
109
+ if (files.length === 0) {
110
+ return {
111
+ passed: true,
112
+ syntaxPassed: true,
113
+ tools: [],
114
+ findings: [],
115
+ hiddenCount: 0,
116
+ detectedTools: [],
117
+ duration: Math.round(performance.now() - start),
118
+ cacheHits: 0,
119
+ cacheMisses: 0,
120
+ };
121
+ }
122
+
123
+ // ── Step 2: Syntax guard (MUST run first) ─────────────────────────────
124
+ // Detect languages or use provided override
125
+ const languages = options?.languages ?? detectLanguages(cwd);
126
+ const primaryLang = (languages[0] ?? "typescript") as LanguageId;
127
+ const profile = getProfile(primaryLang);
128
+ const syntaxResult = await syntaxGuard(files, cwd, profile);
129
+
130
+ if (!syntaxResult.ok) {
131
+ return {
132
+ passed: false,
133
+ syntaxPassed: false,
134
+ syntaxErrors: syntaxResult.error,
135
+ tools: [],
136
+ findings: [],
137
+ hiddenCount: 0,
138
+ detectedTools: [],
139
+ duration: Math.round(performance.now() - start),
140
+ cacheHits: 0,
141
+ cacheMisses: 0,
142
+ };
143
+ }
144
+
145
+ // ── Step 3: Auto-detect tools ─────────────────────────────────────────
146
+ const detectedTools = await detectTools();
147
+
148
+ // ── Step 4: Run all available tools in PARALLEL ───────────────────────
149
+ // Build a lookup from detection results to avoid redundant subprocess spawns
150
+ const toolAvailability = new Map<string, boolean>();
151
+ for (const t of detectedTools) {
152
+ toolAvailability.set(t.name, t.available);
153
+ }
154
+
155
+ const toolPromises: Promise<ToolReport>[] = [];
156
+
157
+ // Slop detector always runs (no external tool dependency), cache-aware
158
+ const mainaDir = options?.mainaDir ?? ".maina";
159
+ const slopCache = createCacheManager(mainaDir);
160
+ toolPromises.push(
161
+ runToolWithTiming("slop", async () => {
162
+ const result = await detectSlop(files, { cwd, cache: slopCache });
163
+ return { findings: result.findings, skipped: false };
164
+ }),
165
+ );
166
+
167
+ // Semgrep — pass pre-resolved availability
168
+ toolPromises.push(
169
+ runToolWithTiming("semgrep", () =>
170
+ runSemgrep({
171
+ files,
172
+ cwd,
173
+ available: toolAvailability.get("semgrep") ?? false,
174
+ }),
175
+ ),
176
+ );
177
+
178
+ // Trivy — pass pre-resolved availability
179
+ toolPromises.push(
180
+ runToolWithTiming("trivy", () =>
181
+ runTrivy({ cwd, available: toolAvailability.get("trivy") ?? false }),
182
+ ),
183
+ );
184
+
185
+ // Secretlint — pass pre-resolved availability
186
+ toolPromises.push(
187
+ runToolWithTiming("secretlint", () =>
188
+ runSecretlint({
189
+ files,
190
+ cwd,
191
+ available: toolAvailability.get("secretlint") ?? false,
192
+ }),
193
+ ),
194
+ );
195
+
196
+ // SonarQube — pass pre-resolved availability
197
+ toolPromises.push(
198
+ runToolWithTiming("sonarqube", () =>
199
+ runSonar({
200
+ cwd,
201
+ available: toolAvailability.get("sonarqube") ?? false,
202
+ }),
203
+ ),
204
+ );
205
+
206
+ // Stryker mutation testing — pass pre-resolved availability
207
+ toolPromises.push(
208
+ runToolWithTiming("stryker", () =>
209
+ runMutation({
210
+ cwd,
211
+ available: toolAvailability.get("stryker") ?? false,
212
+ }),
213
+ ),
214
+ );
215
+
216
+ // diff-cover — pass pre-resolved availability
217
+ toolPromises.push(
218
+ runToolWithTiming("diff-cover", () =>
219
+ runCoverage({
220
+ cwd,
221
+ available: toolAvailability.get("diff-cover") ?? false,
222
+ }),
223
+ ),
224
+ );
225
+
226
+ const toolReports = await Promise.all(toolPromises);
227
+
228
+ // ── Step 4b: Warn if all external tools were skipped ─────────────────
229
+ const externalTools = toolReports.filter((r) => r.tool !== "slop");
230
+ const allExternalSkipped =
231
+ externalTools.length > 0 && externalTools.every((r) => r.skipped);
232
+
233
+ // ── Step 5: Collect all findings ──────────────────────────────────────
234
+ const allFindings: Finding[] = [];
235
+ for (const report of toolReports) {
236
+ allFindings.push(...report.findings);
237
+ }
238
+
239
+ if (allExternalSkipped) {
240
+ const skippedNames = externalTools.map((r) => r.tool).join(", ");
241
+ allFindings.push({
242
+ tool: "pipeline",
243
+ file: "",
244
+ line: 0,
245
+ message: `No external verification tools ran (${skippedNames} skipped). Run \`maina doctor\` to check tool health or \`maina init\` to configure.`,
246
+ severity: "warning",
247
+ });
248
+ }
249
+
250
+ // ── Step 6: Apply diff-only filter ────────────────────────────────────
251
+ let shownFindings: Finding[];
252
+ let hiddenCount: number;
253
+
254
+ if (diffOnly) {
255
+ const filtered = await filterByDiff(allFindings, baseBranch, cwd);
256
+ shownFindings = filtered.shown;
257
+ hiddenCount = filtered.hidden;
258
+ } else {
259
+ shownFindings = allFindings;
260
+ hiddenCount = 0;
261
+ }
262
+
263
+ // ── Step 6b: Skip or downgrade noisy rules based on preferences ─────
264
+ try {
265
+ const noisy = getNoisyRules(mainaDir);
266
+ const noisyMap = new Map(noisy.map((r) => [r.ruleId, r]));
267
+ shownFindings = shownFindings.filter((finding) => {
268
+ if (!finding.ruleId) return true;
269
+ const rule = noisyMap.get(finding.ruleId);
270
+ if (!rule) return true;
271
+ // Skip entirely if FP rate > 50% — these erode trust
272
+ if (rule.falsePositiveRate > 0.5) return false;
273
+ // Downgrade if borderline (>30%)
274
+ if (rule.falsePositiveRate > 0.3) {
275
+ if (finding.severity === "error") finding.severity = "warning";
276
+ else if (finding.severity === "warning") finding.severity = "info";
277
+ }
278
+ return true;
279
+ });
280
+ } catch {
281
+ // Preference loading failure should never block verification
282
+ }
283
+
284
+ // ── Step 7: AI review (mechanical always, standard if --deep) ────────
285
+ const deep = options?.deep ?? false;
286
+ let diffText = "";
287
+ try {
288
+ diffText = diffOnly ? await getDiff(baseBranch, undefined, cwd) : "";
289
+ } catch {
290
+ // getDiff failure should not block pipeline
291
+ }
292
+
293
+ const aiReviewResult: AIReviewResult = await runAIReview({
294
+ diff: diffText,
295
+ entities: [], // Entities require tree-sitter + file body reads; wired when semantic index is hydrated
296
+ deep,
297
+ mainaDir: options?.mainaDir ?? ".maina",
298
+ });
299
+
300
+ const aiReport: ToolReport = {
301
+ tool: "ai-review",
302
+ findings: aiReviewResult.findings,
303
+ skipped: aiReviewResult.skipped,
304
+ duration: aiReviewResult.duration,
305
+ };
306
+
307
+ toolReports.push(aiReport);
308
+
309
+ // Merge AI findings into shown findings
310
+ shownFindings.push(...aiReviewResult.findings);
311
+
312
+ // ── Step 8: Determine pass/fail ───────────────────────────────────────
313
+ const passed = !shownFindings.some((f) => f.severity === "error");
314
+
315
+ // ── Step 9: Return unified result ─────────────────────────────────────
316
+ const cacheStats = slopCache.stats();
317
+ return {
318
+ passed,
319
+ syntaxPassed: true,
320
+ tools: toolReports,
321
+ findings: shownFindings,
322
+ hiddenCount,
323
+ detectedTools,
324
+ duration: Math.round(performance.now() - start),
325
+ cacheHits: cacheStats.l1Hits + cacheStats.l2Hits,
326
+ cacheMisses: cacheStats.misses,
327
+ };
328
+ }
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Verification Proof — gathers and formats verification evidence for PRs.
3
+ *
4
+ * Collects pipeline results, test count, review results, slop check,
5
+ * and visual verification into a formatted markdown section.
6
+ */
7
+
8
+ import { existsSync } from "node:fs";
9
+ import { join } from "node:path";
10
+ import { createCacheManager } from "../cache/manager";
11
+ import { loadWorkflowContext } from "../workflow/context";
12
+ import type { PipelineResult } from "./pipeline";
13
+ import { runPipeline } from "./pipeline";
14
+ import { detectSlop } from "./slop";
15
+ import { runVisualVerification } from "./visual";
16
+
17
+ // ─── Types ────────────────────────────────────────────────────────────────
18
+
19
+ export interface ToolProof {
20
+ tool: string;
21
+ findings: number;
22
+ duration: number;
23
+ skipped: boolean;
24
+ }
25
+
26
+ export interface VerificationProof {
27
+ pipeline: ToolProof[];
28
+ pipelinePassed: boolean;
29
+ pipelineDuration: number;
30
+ tests: { passed: number; failed: number; files: number } | null;
31
+ review: {
32
+ stage1Passed: boolean;
33
+ stage1Findings: number;
34
+ stage2Passed: boolean;
35
+ stage2Findings: number;
36
+ } | null;
37
+ slop: { findings: number } | null;
38
+ visual: { pages: number; regressions: number } | null;
39
+ workflowSummary: string | null;
40
+ }
41
+
42
+ export interface ProofOptions {
43
+ cwd?: string;
44
+ mainaDir?: string;
45
+ baseBranch?: string;
46
+ skipTests?: boolean;
47
+ skipVisual?: boolean;
48
+ pipelineResult?: PipelineResult;
49
+ reviewResult?: {
50
+ passed: boolean;
51
+ stage1: { passed: boolean; findings: unknown[] };
52
+ stage2?: { passed: boolean; findings: unknown[] } | null;
53
+ };
54
+ }
55
+
56
+ // ─── Gather ───────────────────────────────────────────────────────────────
57
+
58
+ /**
59
+ * Run tests and parse the output for pass/fail count.
60
+ */
61
+ async function runTests(
62
+ cwd: string,
63
+ ): Promise<{ passed: number; failed: number; files: number } | null> {
64
+ try {
65
+ const proc = Bun.spawn(["bun", "test"], {
66
+ cwd,
67
+ stdout: "pipe",
68
+ stderr: "pipe",
69
+ });
70
+
71
+ const stdout = await new Response(proc.stdout).text();
72
+ await proc.exited;
73
+
74
+ // Parse "980 pass, 0 fail across 87 files."
75
+ const match = stdout.match(/(\d+)\s+pass,?\s+(\d+)\s+fail.*?(\d+)\s+file/);
76
+ if (match) {
77
+ return {
78
+ passed: Number.parseInt(match[1] ?? "0", 10),
79
+ failed: Number.parseInt(match[2] ?? "0", 10),
80
+ files: Number.parseInt(match[3] ?? "0", 10),
81
+ };
82
+ }
83
+
84
+ // Fallback: just check exit code
85
+ return null;
86
+ } catch {
87
+ return null;
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Gather all verification proof.
93
+ */
94
+ export async function gatherVerificationProof(
95
+ options: ProofOptions = {},
96
+ ): Promise<VerificationProof> {
97
+ const cwd = options.cwd ?? process.cwd();
98
+ const mainaDir = options.mainaDir ?? join(cwd, ".maina");
99
+ const baseBranch = options.baseBranch ?? "main";
100
+
101
+ // Pipeline
102
+ let pipelineResult = options.pipelineResult;
103
+ if (!pipelineResult) {
104
+ pipelineResult = await runPipeline({
105
+ baseBranch,
106
+ diffOnly: true,
107
+ cwd,
108
+ mainaDir,
109
+ });
110
+ }
111
+
112
+ const pipeline: ToolProof[] = pipelineResult.tools.map((t) => ({
113
+ tool: t.tool,
114
+ findings: t.findings.length,
115
+ duration: t.duration,
116
+ skipped: t.skipped,
117
+ }));
118
+
119
+ // Tests
120
+ const tests = options.skipTests ? null : await runTests(cwd);
121
+
122
+ // Review (passed from caller if available)
123
+ const review = options.reviewResult
124
+ ? {
125
+ stage1Passed: options.reviewResult.stage1.passed,
126
+ stage1Findings: options.reviewResult.stage1.findings.length,
127
+ stage2Passed: options.reviewResult.stage2?.passed ?? true,
128
+ stage2Findings: options.reviewResult.stage2?.findings.length ?? 0,
129
+ }
130
+ : null;
131
+
132
+ // Slop
133
+ let slop: { findings: number } | null = null;
134
+ try {
135
+ const cache = createCacheManager(mainaDir);
136
+ const slopResult = await detectSlop([], { cwd, cache });
137
+ slop = { findings: slopResult.findings.length };
138
+ } catch {
139
+ slop = null;
140
+ }
141
+
142
+ // Visual
143
+ let visual: { pages: number; regressions: number } | null = null;
144
+ if (!options.skipVisual) {
145
+ const baselineDir = join(mainaDir, "visual-baselines");
146
+ if (existsSync(baselineDir)) {
147
+ try {
148
+ const visualResult = await runVisualVerification(mainaDir);
149
+ if (!visualResult.skipped) {
150
+ const regressions = visualResult.findings.filter(
151
+ (f) => f.ruleId === "visual/regression",
152
+ ).length;
153
+ visual = { pages: visualResult.comparisons, regressions };
154
+ }
155
+ } catch {
156
+ // Visual verification failure shouldn't block PR
157
+ }
158
+ }
159
+ }
160
+
161
+ // Workflow context
162
+ const workflowSummary = loadWorkflowContext(mainaDir);
163
+
164
+ return {
165
+ pipeline,
166
+ pipelinePassed: pipelineResult.passed,
167
+ pipelineDuration: pipelineResult.duration,
168
+ tests,
169
+ review,
170
+ slop,
171
+ visual,
172
+ workflowSummary,
173
+ };
174
+ }
175
+
176
+ // ─── Format ───────────────────────────────────────────────────────────────
177
+
178
+ /**
179
+ * Format verification proof as a markdown section with collapsible details.
180
+ */
181
+ export function formatVerificationProof(proof: VerificationProof): string {
182
+ const sections: string[] = [];
183
+ sections.push("\n## Verification Proof\n");
184
+
185
+ // Pipeline
186
+ const pipelineIcon = proof.pipelinePassed ? "✅" : "❌";
187
+ const toolCount = proof.pipeline.length;
188
+ const totalFindings = proof.pipeline.reduce((sum, t) => sum + t.findings, 0);
189
+ const duration = (proof.pipelineDuration / 1000).toFixed(1);
190
+
191
+ sections.push(`<details>`);
192
+ sections.push(
193
+ `<summary>${pipelineIcon} Pipeline: ${toolCount} tools, ${totalFindings} findings, ${duration}s</summary>\n`,
194
+ );
195
+ sections.push("| Tool | Findings | Duration | Status |");
196
+ sections.push("|------|----------|----------|--------|");
197
+ for (const t of proof.pipeline) {
198
+ const status = t.skipped
199
+ ? "skipped"
200
+ : t.findings > 0
201
+ ? `${t.findings} found`
202
+ : "✅";
203
+ const dur = t.skipped ? "-" : `${t.duration}ms`;
204
+ const findings = t.skipped ? "-" : String(t.findings);
205
+ sections.push(`| ${t.tool} | ${findings} | ${dur} | ${status} |`);
206
+ }
207
+ sections.push("\n</details>\n");
208
+
209
+ // Tests
210
+ if (proof.tests) {
211
+ const testIcon = proof.tests.failed === 0 ? "✅" : "❌";
212
+ sections.push(`<details>`);
213
+ sections.push(
214
+ `<summary>${testIcon} Tests: ${proof.tests.passed} pass, ${proof.tests.failed} fail</summary>\n`,
215
+ );
216
+ sections.push(
217
+ `${proof.tests.passed} pass, ${proof.tests.failed} fail across ${proof.tests.files} files.`,
218
+ );
219
+ sections.push("\n</details>\n");
220
+ }
221
+
222
+ // Code Review
223
+ if (proof.review) {
224
+ const reviewIcon =
225
+ proof.review.stage1Passed && proof.review.stage2Passed ? "✅" : "⚠️";
226
+ sections.push(`<details>`);
227
+ sections.push(`<summary>${reviewIcon} Code Review</summary>\n`);
228
+ sections.push(
229
+ `- Stage 1 (spec compliance): ${proof.review.stage1Passed ? "passed" : "failed"}, ${proof.review.stage1Findings} finding(s)`,
230
+ );
231
+ sections.push(
232
+ `- Stage 2 (code quality): ${proof.review.stage2Passed ? "passed" : "failed"}, ${proof.review.stage2Findings} finding(s)`,
233
+ );
234
+ sections.push("\n</details>\n");
235
+ }
236
+
237
+ // Slop
238
+ if (proof.slop) {
239
+ const slopIcon = proof.slop.findings === 0 ? "✅" : "⚠️";
240
+ sections.push(`<details>`);
241
+ sections.push(
242
+ `<summary>${slopIcon} Slop: ${proof.slop.findings === 0 ? "clean" : `${proof.slop.findings} patterns`}</summary>\n`,
243
+ );
244
+ sections.push(`${proof.slop.findings} slop pattern(s) detected.`);
245
+ sections.push("\n</details>\n");
246
+ }
247
+
248
+ // Visual
249
+ if (proof.visual) {
250
+ const visualIcon = proof.visual.regressions === 0 ? "✅" : "⚠️";
251
+ sections.push(`<details>`);
252
+ sections.push(
253
+ `<summary>${visualIcon} Visual: ${proof.visual.pages} page(s), ${proof.visual.regressions} regression(s)</summary>\n`,
254
+ );
255
+ sections.push(
256
+ `Compared ${proof.visual.pages} page(s) against baselines. ${proof.visual.regressions} regression(s) found.`,
257
+ );
258
+ sections.push("\n</details>\n");
259
+ }
260
+
261
+ // Workflow
262
+ if (proof.workflowSummary) {
263
+ sections.push(`<details>`);
264
+ sections.push(`<summary>📋 Workflow Context</summary>\n`);
265
+ sections.push("```");
266
+ // Truncate to last 500 chars to keep PR body reasonable
267
+ const summary =
268
+ proof.workflowSummary.length > 500
269
+ ? `...${proof.workflowSummary.slice(-500)}`
270
+ : proof.workflowSummary;
271
+ sections.push(summary);
272
+ sections.push("```");
273
+ sections.push("\n</details>");
274
+ }
275
+
276
+ return sections.join("\n");
277
+ }