@ktpartners/dgs-platform 2.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +38 -0
- package/README.md +851 -0
- package/agents/dgs-codebase-cross-analyzer.md +183 -0
- package/agents/dgs-codebase-mapper.md +782 -0
- package/agents/dgs-codebase-synthesizer.md +156 -0
- package/agents/dgs-debugger.md +1256 -0
- package/agents/dgs-executor.md +550 -0
- package/agents/dgs-integration-checker.md +481 -0
- package/agents/dgs-nyquist-auditor.md +178 -0
- package/agents/dgs-phase-researcher.md +563 -0
- package/agents/dgs-phase-verifier.md +450 -0
- package/agents/dgs-plan-checker.md +708 -0
- package/agents/dgs-planner.md +1324 -0
- package/agents/dgs-project-researcher.md +631 -0
- package/agents/dgs-research-synthesizer.md +249 -0
- package/agents/dgs-roadmapper.md +652 -0
- package/agents/dgs-verifier.md +607 -0
- package/bin/install.js +2073 -0
- package/commands/dgs/add-doc.md +45 -0
- package/commands/dgs/add-idea.md +38 -0
- package/commands/dgs/add-phase.md +43 -0
- package/commands/dgs/add-repo.md +54 -0
- package/commands/dgs/add-tests.md +41 -0
- package/commands/dgs/add-todo.md +47 -0
- package/commands/dgs/approve-spec.md +38 -0
- package/commands/dgs/audit-milestone.md +36 -0
- package/commands/dgs/audit-phase.md +37 -0
- package/commands/dgs/cancel-job.md +23 -0
- package/commands/dgs/capture-principle.md +143 -0
- package/commands/dgs/check-todos.md +45 -0
- package/commands/dgs/cleanup.md +18 -0
- package/commands/dgs/complete-milestone.md +136 -0
- package/commands/dgs/complete-project.md +70 -0
- package/commands/dgs/consolidate-ideas.md +50 -0
- package/commands/dgs/create-milestone-job.md +37 -0
- package/commands/dgs/debug.md +164 -0
- package/commands/dgs/develop-idea.md +53 -0
- package/commands/dgs/discuss-idea.md +41 -0
- package/commands/dgs/discuss-phase.md +83 -0
- package/commands/dgs/execute-phase.md +41 -0
- package/commands/dgs/fast.md +38 -0
- package/commands/dgs/find-related-ideas.md +43 -0
- package/commands/dgs/health.md +28 -0
- package/commands/dgs/help.md +22 -0
- package/commands/dgs/import-spec.md +36 -0
- package/commands/dgs/init-product.md +28 -0
- package/commands/dgs/insert-phase.md +32 -0
- package/commands/dgs/join-discord.md +18 -0
- package/commands/dgs/list-docs.md +40 -0
- package/commands/dgs/list-ideas.md +42 -0
- package/commands/dgs/list-jobs.md +22 -0
- package/commands/dgs/list-phase-assumptions.md +46 -0
- package/commands/dgs/list-projects.md +57 -0
- package/commands/dgs/list-specs.md +40 -0
- package/commands/dgs/map-codebase.md +92 -0
- package/commands/dgs/new-milestone.md +44 -0
- package/commands/dgs/new-project.md +42 -0
- package/commands/dgs/node-repair.md +26 -0
- package/commands/dgs/overlap-check.md +20 -0
- package/commands/dgs/pause-work.md +38 -0
- package/commands/dgs/plan-milestone-gaps.md +34 -0
- package/commands/dgs/plan-phase.md +44 -0
- package/commands/dgs/progress.md +24 -0
- package/commands/dgs/quick.md +41 -0
- package/commands/dgs/reactivate-project.md +70 -0
- package/commands/dgs/reapply-patches.md +110 -0
- package/commands/dgs/refine-spec.md +38 -0
- package/commands/dgs/reject-idea.md +43 -0
- package/commands/dgs/remove-doc.md +44 -0
- package/commands/dgs/remove-phase.md +31 -0
- package/commands/dgs/remove-repo.md +69 -0
- package/commands/dgs/research-idea.md +43 -0
- package/commands/dgs/research-phase.md +189 -0
- package/commands/dgs/restore-idea.md +45 -0
- package/commands/dgs/resume-work.md +40 -0
- package/commands/dgs/rollback-job.md +24 -0
- package/commands/dgs/run-job.md +35 -0
- package/commands/dgs/search.md +40 -0
- package/commands/dgs/set-profile.md +34 -0
- package/commands/dgs/settings.md +38 -0
- package/commands/dgs/switch-project.md +58 -0
- package/commands/dgs/undo-consolidation.md +42 -0
- package/commands/dgs/update-idea.md +44 -0
- package/commands/dgs/update.md +37 -0
- package/commands/dgs/validate-phase.md +35 -0
- package/commands/dgs/verify-work.md +39 -0
- package/commands/dgs/write-spec.md +49 -0
- package/deliver-great-systems/.planning/phases/09-backend-wiring-and-error-handling/09-01-SUMMARY.md +84 -0
- package/deliver-great-systems/.planning/phases/09-backend-wiring-and-error-handling/09-02-SUMMARY.md +86 -0
- package/deliver-great-systems/.planning/phases/10-v1-to-v2-migration-flow/10-01-SUMMARY.md +85 -0
- package/deliver-great-systems/bin/dgs-tools.cjs +1444 -0
- package/deliver-great-systems/bin/lib/auto-test.cjs +1365 -0
- package/deliver-great-systems/bin/lib/commands.cjs +570 -0
- package/deliver-great-systems/bin/lib/config.cjs +417 -0
- package/deliver-great-systems/bin/lib/conflict-agent.cjs +1063 -0
- package/deliver-great-systems/bin/lib/conflict-agent.test.cjs +554 -0
- package/deliver-great-systems/bin/lib/context.cjs +929 -0
- package/deliver-great-systems/bin/lib/context.test.cjs +693 -0
- package/deliver-great-systems/bin/lib/core.cjs +744 -0
- package/deliver-great-systems/bin/lib/core.test.cjs +822 -0
- package/deliver-great-systems/bin/lib/docs.cjs +919 -0
- package/deliver-great-systems/bin/lib/docs.test.cjs +211 -0
- package/deliver-great-systems/bin/lib/execution.cjs +705 -0
- package/deliver-great-systems/bin/lib/execution.test.cjs +1472 -0
- package/deliver-great-systems/bin/lib/frontmatter.cjs +324 -0
- package/deliver-great-systems/bin/lib/ideas.cjs +1406 -0
- package/deliver-great-systems/bin/lib/ideas.test.cjs +1417 -0
- package/deliver-great-systems/bin/lib/identity.cjs +125 -0
- package/deliver-great-systems/bin/lib/init.cjs +1114 -0
- package/deliver-great-systems/bin/lib/init.test.cjs +1271 -0
- package/deliver-great-systems/bin/lib/jobs.cjs +2015 -0
- package/deliver-great-systems/bin/lib/jobs.test.cjs +2619 -0
- package/deliver-great-systems/bin/lib/merge-conflicts.cjs +654 -0
- package/deliver-great-systems/bin/lib/merge-conflicts.test.cjs +370 -0
- package/deliver-great-systems/bin/lib/migration.cjs +352 -0
- package/deliver-great-systems/bin/lib/migration.test.cjs +582 -0
- package/deliver-great-systems/bin/lib/milestone.cjs +243 -0
- package/deliver-great-systems/bin/lib/overlap.cjs +437 -0
- package/deliver-great-systems/bin/lib/overlap.test.cjs +747 -0
- package/deliver-great-systems/bin/lib/path-audit.test.cjs +384 -0
- package/deliver-great-systems/bin/lib/paths.cjs +144 -0
- package/deliver-great-systems/bin/lib/paths.test.cjs +486 -0
- package/deliver-great-systems/bin/lib/phase.cjs +910 -0
- package/deliver-great-systems/bin/lib/projects.cjs +691 -0
- package/deliver-great-systems/bin/lib/projects.test.cjs +871 -0
- package/deliver-great-systems/bin/lib/repos.cjs +1432 -0
- package/deliver-great-systems/bin/lib/repos.test.cjs +1882 -0
- package/deliver-great-systems/bin/lib/roadmap.cjs +305 -0
- package/deliver-great-systems/bin/lib/search.cjs +570 -0
- package/deliver-great-systems/bin/lib/specs.cjs +1303 -0
- package/deliver-great-systems/bin/lib/state.cjs +893 -0
- package/deliver-great-systems/bin/lib/template.cjs +228 -0
- package/deliver-great-systems/bin/lib/test-helpers.cjs +291 -0
- package/deliver-great-systems/bin/lib/verify.cjs +796 -0
- package/deliver-great-systems/references/checkpoints.md +776 -0
- package/deliver-great-systems/references/conflict-resolution.md +66 -0
- package/deliver-great-systems/references/context-tiers.md +166 -0
- package/deliver-great-systems/references/continuation-format.md +249 -0
- package/deliver-great-systems/references/decimal-phase-calculation.md +67 -0
- package/deliver-great-systems/references/git-integration.md +250 -0
- package/deliver-great-systems/references/git-planning-commit.md +40 -0
- package/deliver-great-systems/references/model-profile-resolution.md +36 -0
- package/deliver-great-systems/references/model-profiles.md +95 -0
- package/deliver-great-systems/references/phase-argument-parsing.md +61 -0
- package/deliver-great-systems/references/planning-config.md +224 -0
- package/deliver-great-systems/references/questioning.md +162 -0
- package/deliver-great-systems/references/spec-review-loop.md +177 -0
- package/deliver-great-systems/references/tdd.md +265 -0
- package/deliver-great-systems/references/ui-brand.md +160 -0
- package/deliver-great-systems/references/verification-patterns.md +612 -0
- package/deliver-great-systems/templates/DEBUG.md +166 -0
- package/deliver-great-systems/templates/UAT.md +251 -0
- package/deliver-great-systems/templates/VALIDATION.md +95 -0
- package/deliver-great-systems/templates/claude-md.md +74 -0
- package/deliver-great-systems/templates/codebase/architecture.md +257 -0
- package/deliver-great-systems/templates/codebase/concerns.md +312 -0
- package/deliver-great-systems/templates/codebase/conventions.md +309 -0
- package/deliver-great-systems/templates/codebase/integrations.md +282 -0
- package/deliver-great-systems/templates/codebase/stack.md +188 -0
- package/deliver-great-systems/templates/codebase/structure.md +287 -0
- package/deliver-great-systems/templates/codebase/testing.md +482 -0
- package/deliver-great-systems/templates/config.json +38 -0
- package/deliver-great-systems/templates/context.md +354 -0
- package/deliver-great-systems/templates/continue-here.md +80 -0
- package/deliver-great-systems/templates/debug-subagent-prompt.md +93 -0
- package/deliver-great-systems/templates/discovery.md +148 -0
- package/deliver-great-systems/templates/milestone-archive.md +125 -0
- package/deliver-great-systems/templates/milestone.md +117 -0
- package/deliver-great-systems/templates/phase-prompt.md +615 -0
- package/deliver-great-systems/templates/planner-subagent-prompt.md +119 -0
- package/deliver-great-systems/templates/project.md +186 -0
- package/deliver-great-systems/templates/requirements.md +233 -0
- package/deliver-great-systems/templates/research-project/ARCHITECTURE.md +206 -0
- package/deliver-great-systems/templates/research-project/FEATURES.md +149 -0
- package/deliver-great-systems/templates/research-project/PITFALLS.md +202 -0
- package/deliver-great-systems/templates/research-project/STACK.md +122 -0
- package/deliver-great-systems/templates/research-project/SUMMARY.md +172 -0
- package/deliver-great-systems/templates/research.md +554 -0
- package/deliver-great-systems/templates/retrospective.md +54 -0
- package/deliver-great-systems/templates/roadmap.md +204 -0
- package/deliver-great-systems/templates/state.md +178 -0
- package/deliver-great-systems/templates/summary-complex.md +59 -0
- package/deliver-great-systems/templates/summary-minimal.md +41 -0
- package/deliver-great-systems/templates/summary-standard.md +48 -0
- package/deliver-great-systems/templates/summary.md +253 -0
- package/deliver-great-systems/templates/user-setup.md +313 -0
- package/deliver-great-systems/templates/verification-report.md +324 -0
- package/deliver-great-systems/workflows/add-doc.md +151 -0
- package/deliver-great-systems/workflows/add-idea.md +96 -0
- package/deliver-great-systems/workflows/add-phase.md +120 -0
- package/deliver-great-systems/workflows/add-tests.md +359 -0
- package/deliver-great-systems/workflows/add-todo.md +162 -0
- package/deliver-great-systems/workflows/approve-spec.md +194 -0
- package/deliver-great-systems/workflows/audit-milestone.md +364 -0
- package/deliver-great-systems/workflows/audit-phase.md +462 -0
- package/deliver-great-systems/workflows/cancel-job.md +108 -0
- package/deliver-great-systems/workflows/check-todos.md +181 -0
- package/deliver-great-systems/workflows/cleanup.md +247 -0
- package/deliver-great-systems/workflows/codereview.md +526 -0
- package/deliver-great-systems/workflows/complete-milestone.md +1298 -0
- package/deliver-great-systems/workflows/consolidate-ideas.md +365 -0
- package/deliver-great-systems/workflows/create-milestone-job.md +177 -0
- package/deliver-great-systems/workflows/develop-idea.md +544 -0
- package/deliver-great-systems/workflows/diagnose-issues.md +231 -0
- package/deliver-great-systems/workflows/discovery-phase.md +301 -0
- package/deliver-great-systems/workflows/discuss-idea.md +263 -0
- package/deliver-great-systems/workflows/discuss-phase.md +733 -0
- package/deliver-great-systems/workflows/execute-phase.md +571 -0
- package/deliver-great-systems/workflows/execute-plan.md +592 -0
- package/deliver-great-systems/workflows/find-related-ideas.md +271 -0
- package/deliver-great-systems/workflows/health.md +173 -0
- package/deliver-great-systems/workflows/help.md +997 -0
- package/deliver-great-systems/workflows/import-spec.md +381 -0
- package/deliver-great-systems/workflows/init-product.md +767 -0
- package/deliver-great-systems/workflows/insert-phase.md +138 -0
- package/deliver-great-systems/workflows/list-docs.md +119 -0
- package/deliver-great-systems/workflows/list-ideas.md +154 -0
- package/deliver-great-systems/workflows/list-jobs.md +89 -0
- package/deliver-great-systems/workflows/list-phase-assumptions.md +192 -0
- package/deliver-great-systems/workflows/list-specs.md +101 -0
- package/deliver-great-systems/workflows/map-codebase.md +621 -0
- package/deliver-great-systems/workflows/new-milestone.md +591 -0
- package/deliver-great-systems/workflows/new-project.md +1113 -0
- package/deliver-great-systems/workflows/node-repair.md +94 -0
- package/deliver-great-systems/workflows/overlap-check.md +86 -0
- package/deliver-great-systems/workflows/pause-work.md +134 -0
- package/deliver-great-systems/workflows/plan-milestone-gaps.md +306 -0
- package/deliver-great-systems/workflows/plan-phase.md +698 -0
- package/deliver-great-systems/workflows/progress.md +386 -0
- package/deliver-great-systems/workflows/quick.md +845 -0
- package/deliver-great-systems/workflows/refine-spec.md +275 -0
- package/deliver-great-systems/workflows/reject-idea.md +109 -0
- package/deliver-great-systems/workflows/remove-doc.md +117 -0
- package/deliver-great-systems/workflows/remove-phase.md +163 -0
- package/deliver-great-systems/workflows/research-idea.md +325 -0
- package/deliver-great-systems/workflows/research-phase.md +81 -0
- package/deliver-great-systems/workflows/restore-idea.md +101 -0
- package/deliver-great-systems/workflows/resume-project.md +311 -0
- package/deliver-great-systems/workflows/rollback-job.md +130 -0
- package/deliver-great-systems/workflows/run-job.md +498 -0
- package/deliver-great-systems/workflows/search.md +130 -0
- package/deliver-great-systems/workflows/set-profile.md +83 -0
- package/deliver-great-systems/workflows/settings.md +470 -0
- package/deliver-great-systems/workflows/transition.md +563 -0
- package/deliver-great-systems/workflows/undo-consolidation.md +155 -0
- package/deliver-great-systems/workflows/update-idea.md +157 -0
- package/deliver-great-systems/workflows/update.md +242 -0
- package/deliver-great-systems/workflows/validate-phase.md +177 -0
- package/deliver-great-systems/workflows/verify-phase.md +253 -0
- package/deliver-great-systems/workflows/verify-work.md +671 -0
- package/deliver-great-systems/workflows/write-spec.md +450 -0
- package/hooks/dist/dgs-check-update.js +62 -0
- package/hooks/dist/dgs-context-monitor.js +141 -0
- package/hooks/dist/dgs-statusline.js +115 -0
- package/package.json +60 -0
- package/scripts/build-hooks.js +43 -0
|
@@ -0,0 +1,1365 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-Test — Test command collection and execution engine
|
|
3
|
+
*
|
|
4
|
+
* Collection: Parses two source formats:
|
|
5
|
+
* - VALIDATION.md: ```auto-test:suite/quick/task fenced code blocks
|
|
6
|
+
* - PLAN.md: <verify><automated> XML blocks within <task> elements
|
|
7
|
+
* Deduplicates across sources (VALIDATION.md wins on exact match).
|
|
8
|
+
*
|
|
9
|
+
* Execution: Runs collected commands with timeout protection, false positive
|
|
10
|
+
* detection, failure classification (infrastructure vs code), and orchestrated
|
|
11
|
+
* suite execution with abort-on-timeout semantics.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const fs = require('fs');
|
|
15
|
+
const path = require('path');
|
|
16
|
+
const { spawnSync } = require('child_process');
|
|
17
|
+
const { safeReadFile, findPhaseInternal, output, error } = require('./core.cjs');
|
|
18
|
+
const { extractFrontmatter } = require('./frontmatter.cjs');
|
|
19
|
+
|
|
20
|
+
// ─── Parsers ─────────────────────────────────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Parse VALIDATION.md content for auto-test:* fenced code blocks.
|
|
24
|
+
*
|
|
25
|
+
* @param {string|null} content - Markdown content
|
|
26
|
+
* @returns {Array<{command: string, source: string, tier: string, label: string}>}
|
|
27
|
+
*/
|
|
28
|
+
function parseValidationMd(content) {
|
|
29
|
+
if (!content) return [];
|
|
30
|
+
|
|
31
|
+
const commands = [];
|
|
32
|
+
const blockPattern = /```auto-test:(suite|quick|task)\n([\s\S]*?)```/g;
|
|
33
|
+
let match;
|
|
34
|
+
|
|
35
|
+
while ((match = blockPattern.exec(content)) !== null) {
|
|
36
|
+
const tier = match[1];
|
|
37
|
+
const blockContent = match[2];
|
|
38
|
+
|
|
39
|
+
const lines = blockContent.split('\n');
|
|
40
|
+
for (const line of lines) {
|
|
41
|
+
const trimmed = line.trim();
|
|
42
|
+
// Skip blank lines and comment lines
|
|
43
|
+
if (!trimmed || trimmed.startsWith('#')) continue;
|
|
44
|
+
|
|
45
|
+
commands.push({
|
|
46
|
+
command: trimmed,
|
|
47
|
+
source: 'VALIDATION.md',
|
|
48
|
+
tier: tier,
|
|
49
|
+
label: tier === 'suite' ? 'full suite'
|
|
50
|
+
: tier === 'quick' ? 'quick run'
|
|
51
|
+
: 'task verify',
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return commands;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* Parse PLAN.md content for <verify><automated> XML blocks.
|
|
61
|
+
*
|
|
62
|
+
* @param {string|null} content - PLAN.md content
|
|
63
|
+
* @param {string} planId - Plan identifier (e.g., "71-01")
|
|
64
|
+
* @returns {Array<{command: string, source: string, tier: string, label: string}>}
|
|
65
|
+
*/
|
|
66
|
+
function parsePlanVerifyBlocks(content, planId) {
|
|
67
|
+
if (!content) return [];
|
|
68
|
+
|
|
69
|
+
const commands = [];
|
|
70
|
+
const taskPattern = /<task[^>]*>([\s\S]*?)<\/task>/g;
|
|
71
|
+
let taskMatch;
|
|
72
|
+
let taskIndex = 0;
|
|
73
|
+
|
|
74
|
+
while ((taskMatch = taskPattern.exec(content)) !== null) {
|
|
75
|
+
taskIndex++;
|
|
76
|
+
const taskContent = taskMatch[1];
|
|
77
|
+
|
|
78
|
+
// Extract task name for labeling
|
|
79
|
+
const nameMatch = taskContent.match(/<name>([\s\S]*?)<\/name>/);
|
|
80
|
+
const taskName = nameMatch ? nameMatch[1].trim() : `task ${taskIndex}`;
|
|
81
|
+
|
|
82
|
+
// Extract <automated> content from within <verify>
|
|
83
|
+
const autoMatch = taskContent.match(/<automated>([\s\S]*?)<\/automated>/);
|
|
84
|
+
if (autoMatch) {
|
|
85
|
+
const cmdStr = autoMatch[1].trim();
|
|
86
|
+
// Skip empty commands and MISSING placeholders
|
|
87
|
+
if (cmdStr && !cmdStr.startsWith('MISSING')) {
|
|
88
|
+
commands.push({
|
|
89
|
+
command: cmdStr,
|
|
90
|
+
source: 'PLAN.md',
|
|
91
|
+
tier: 'task',
|
|
92
|
+
label: `${planId} ${taskName}`,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return commands;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ─── Deduplication ───────────────────────────────────────────────────────────
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Deduplicate commands across sources. VALIDATION.md commands always win.
|
|
105
|
+
*
|
|
106
|
+
* @param {Array} validationCmds - Commands from VALIDATION.md
|
|
107
|
+
* @param {Array} planCmds - Commands from PLAN.md files
|
|
108
|
+
* @returns {{commands: Array, duplicatesRemoved: number}}
|
|
109
|
+
*/
|
|
110
|
+
function deduplicateCommands(validationCmds, planCmds) {
|
|
111
|
+
const seen = new Set();
|
|
112
|
+
const result = [];
|
|
113
|
+
let dupCount = 0;
|
|
114
|
+
|
|
115
|
+
// VALIDATION.md commands go first (they win on conflicts)
|
|
116
|
+
for (const cmd of validationCmds) {
|
|
117
|
+
seen.add(cmd.command);
|
|
118
|
+
result.push(cmd);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// PLAN.md commands -- skip exact duplicates
|
|
122
|
+
for (const cmd of planCmds) {
|
|
123
|
+
if (seen.has(cmd.command)) {
|
|
124
|
+
dupCount++;
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
seen.add(cmd.command);
|
|
128
|
+
result.push(cmd);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return { commands: result, duplicatesRemoved: dupCount };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// ─── Collection ──────────────────────────────────────────────────────────────
|
|
135
|
+
|
|
136
|
+
/**
|
|
137
|
+
* Collect all test commands from a phase directory.
|
|
138
|
+
*
|
|
139
|
+
* @param {string} cwd - Working directory
|
|
140
|
+
* @param {string} phaseDir - Phase directory relative to cwd
|
|
141
|
+
* @returns {{commands: Array, summary: Object, empty: boolean}}
|
|
142
|
+
*/
|
|
143
|
+
function collectTestCommands(cwd, phaseDir) {
|
|
144
|
+
const phaseDirFull = path.join(cwd, phaseDir);
|
|
145
|
+
|
|
146
|
+
// Read VALIDATION.md (may not exist -- find by pattern)
|
|
147
|
+
let validationCmds = [];
|
|
148
|
+
try {
|
|
149
|
+
const files = fs.readdirSync(phaseDirFull);
|
|
150
|
+
const validationFile = files.find(f => /-VALIDATION\.md$/i.test(f) || f === 'VALIDATION.md');
|
|
151
|
+
if (validationFile) {
|
|
152
|
+
const validationContent = safeReadFile(path.join(phaseDirFull, validationFile));
|
|
153
|
+
validationCmds = parseValidationMd(validationContent);
|
|
154
|
+
}
|
|
155
|
+
} catch { /* directory may not exist */ }
|
|
156
|
+
|
|
157
|
+
// Read all PLAN.md files, sorted alphabetically
|
|
158
|
+
let planCmds = [];
|
|
159
|
+
try {
|
|
160
|
+
const files = fs.readdirSync(phaseDirFull);
|
|
161
|
+
const planFiles = files.filter(f => f.endsWith('-PLAN.md')).sort();
|
|
162
|
+
for (const planFile of planFiles) {
|
|
163
|
+
const planId = planFile.replace('-PLAN.md', '');
|
|
164
|
+
const content = safeReadFile(path.join(phaseDirFull, planFile));
|
|
165
|
+
planCmds = planCmds.concat(parsePlanVerifyBlocks(content, planId));
|
|
166
|
+
}
|
|
167
|
+
} catch { /* no plan files */ }
|
|
168
|
+
|
|
169
|
+
// Deduplicate
|
|
170
|
+
const { commands, duplicatesRemoved } = deduplicateCommands(validationCmds, planCmds);
|
|
171
|
+
|
|
172
|
+
return {
|
|
173
|
+
commands,
|
|
174
|
+
summary: {
|
|
175
|
+
total: commands.length,
|
|
176
|
+
from_validation: validationCmds.length,
|
|
177
|
+
from_plans: commands.length - validationCmds.length,
|
|
178
|
+
duplicates_removed: duplicatesRemoved,
|
|
179
|
+
},
|
|
180
|
+
empty: commands.length === 0,
|
|
181
|
+
};
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// ─── CLI Command ─────────────────────────────────────────────────────────────
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* CLI wrapper for collectTestCommands.
|
|
188
|
+
* Resolves phase via findPhaseInternal, calls collectTestCommands, outputs JSON.
|
|
189
|
+
*
|
|
190
|
+
* @param {string} cwd - Working directory
|
|
191
|
+
* @param {string} phase - Phase number/identifier
|
|
192
|
+
* @param {boolean} raw - Raw output mode
|
|
193
|
+
*/
|
|
194
|
+
function cmdCollectTestCommands(cwd, phase, raw) {
|
|
195
|
+
if (!phase) {
|
|
196
|
+
error('phase required');
|
|
197
|
+
}
|
|
198
|
+
const phaseInfo = findPhaseInternal(cwd, phase);
|
|
199
|
+
if (!phaseInfo || !phaseInfo.found) {
|
|
200
|
+
output({ error: 'Phase not found', phase }, raw);
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
const result = collectTestCommands(cwd, phaseInfo.directory);
|
|
204
|
+
output(result, raw);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// --- Execution Engine --------------------------------------------------------
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Run a single shell command and capture structured results.
|
|
211
|
+
*
|
|
212
|
+
* @param {string} command - Shell command to execute
|
|
213
|
+
* @param {Object} [options] - Execution options
|
|
214
|
+
* @param {number} [options.timeout_ms=120000] - Timeout in milliseconds
|
|
215
|
+
* @param {string} [options.cwd=process.cwd()] - Working directory
|
|
216
|
+
* @returns {{exit_code: number, stdout: string, stderr: string, timed_out: boolean, duration_ms: number}}
|
|
217
|
+
*/
|
|
218
|
+
function runCommand(command, options = {}) {
|
|
219
|
+
const timeout_ms = options.timeout_ms ?? 120000;
|
|
220
|
+
const cwd = options.cwd ?? process.cwd();
|
|
221
|
+
|
|
222
|
+
const start = Date.now();
|
|
223
|
+
const result = spawnSync('sh', ['-c', command], {
|
|
224
|
+
timeout: timeout_ms,
|
|
225
|
+
cwd,
|
|
226
|
+
encoding: 'utf-8',
|
|
227
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
228
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
229
|
+
});
|
|
230
|
+
const duration_ms = Date.now() - start;
|
|
231
|
+
|
|
232
|
+
const timed_out = !!(result.signal === 'SIGTERM' || (result.error && /ETIMEDOUT|TIMEOUT|timed out/i.test(result.error.message)));
|
|
233
|
+
const stdout = result.stdout || '';
|
|
234
|
+
const stderr = result.stderr || '';
|
|
235
|
+
const exit_code = result.status != null ? result.status : (timed_out ? 1 : 1);
|
|
236
|
+
|
|
237
|
+
return { exit_code, stdout, stderr, timed_out, duration_ms };
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Check if command stdout indicates a false positive (exit 0 but no tests actually ran).
|
|
242
|
+
*
|
|
243
|
+
* @param {string|null} stdout - Command stdout
|
|
244
|
+
* @returns {boolean} true if false positive detected
|
|
245
|
+
*/
|
|
246
|
+
function checkFalsePositive(stdout) {
|
|
247
|
+
if (!stdout) return false;
|
|
248
|
+
return /(?:0 tests ran|no tests found)/i.test(stdout);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Check if a passing result is suspect (exit 0 but no evidence of test execution).
|
|
253
|
+
*
|
|
254
|
+
* Two heuristics:
|
|
255
|
+
* 1. Empty stdout on a "passing" test -- nothing was actually tested
|
|
256
|
+
* 2. stdout has content but no test framework assertion markers -- output
|
|
257
|
+
* may be informational (e.g., build output, help text) not test results
|
|
258
|
+
*
|
|
259
|
+
* Suspect results are distinct from false_positive (which catches "0 tests ran").
|
|
260
|
+
* A suspect result requires human review rather than being auto-classified.
|
|
261
|
+
*
|
|
262
|
+
* @param {{exit_code: number, stdout: string}} result - runCommand result
|
|
263
|
+
* @returns {boolean} true if result is suspect
|
|
264
|
+
*/
|
|
265
|
+
function checkSuspect(result) {
|
|
266
|
+
// Only applies to exit code 0 (would-be passes)
|
|
267
|
+
if (result.exit_code !== 0) return false;
|
|
268
|
+
|
|
269
|
+
const stdout = (result.stdout || '').trim();
|
|
270
|
+
|
|
271
|
+
// Heuristic 1: Empty stdout on a "passing" test
|
|
272
|
+
if (!stdout) return true;
|
|
273
|
+
|
|
274
|
+
// Heuristic 2: stdout has content but no test framework markers
|
|
275
|
+
const assertionPatterns = [
|
|
276
|
+
/\bpass(?:ed|ing)?\b/i,
|
|
277
|
+
/\bok\b/i,
|
|
278
|
+
/\bfail(?:ed|ing|ure)?\b/i,
|
|
279
|
+
/\btests?\s+\d+/i,
|
|
280
|
+
/\d+\s+(?:passing|failing|pending|skipped)/i,
|
|
281
|
+
/# tests \d+/, // Node test runner
|
|
282
|
+
/Tests:\s+\d+/, // Jest
|
|
283
|
+
/[✓✗✕●]/, // Framework symbols
|
|
284
|
+
/\bassert/i,
|
|
285
|
+
];
|
|
286
|
+
|
|
287
|
+
const hasAssertionEvidence = assertionPatterns.some(p => p.test(stdout));
|
|
288
|
+
if (!hasAssertionEvidence) return true;
|
|
289
|
+
|
|
290
|
+
return false;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
/**
|
|
294
|
+
* Check if a task-tier command is covered by a passing suite-tier command.
|
|
295
|
+
*
|
|
296
|
+
* Conservative heuristic: only skips when ALL suite commands passed AND the
|
|
297
|
+
* task command references a file under a directory covered by a suite glob.
|
|
298
|
+
* When in doubt, returns false (run the test).
|
|
299
|
+
*
|
|
300
|
+
* @param {string} taskCmd - The task-tier command string
|
|
301
|
+
* @param {string[]} suiteCmds - Array of suite-tier command strings
|
|
302
|
+
* @param {Array<{passed: boolean}>} suiteResults - Results of suite commands (parallel arrays with suiteCmds)
|
|
303
|
+
* @returns {boolean} true if the task command is covered and can be skipped
|
|
304
|
+
*/
|
|
305
|
+
function isCommandCovered(taskCmd, suiteCmds, suiteResults) {
|
|
306
|
+
// Only skip if all suite commands passed
|
|
307
|
+
if (!suiteResults.length) return false;
|
|
308
|
+
const suiteAllPassed = suiteResults.every(r => r.passed);
|
|
309
|
+
if (!suiteAllPassed) return false;
|
|
310
|
+
|
|
311
|
+
// Extract test file/directory from task command
|
|
312
|
+
// Matches patterns like: "node --test tests/foo.test.cjs" or "npx jest tests/foo.test.js"
|
|
313
|
+
const taskFileMatch = taskCmd.match(/(?:--test|jest|vitest|mocha)\s+(\S+)/);
|
|
314
|
+
if (!taskFileMatch) return false;
|
|
315
|
+
const taskFile = taskFileMatch[1];
|
|
316
|
+
|
|
317
|
+
// Check if any suite command uses a glob that covers this file's directory
|
|
318
|
+
for (const suiteCmd of suiteCmds) {
|
|
319
|
+
const suiteGlobMatch = suiteCmd.match(/(?:--test|jest|vitest|mocha)\s+(\S+)/);
|
|
320
|
+
if (!suiteGlobMatch) continue;
|
|
321
|
+
const suiteGlob = suiteGlobMatch[1];
|
|
322
|
+
|
|
323
|
+
// Simple coverage: if suite uses a glob (contains *) and task file starts with same dir prefix
|
|
324
|
+
if (suiteGlob.includes('*')) {
|
|
325
|
+
const suiteDir = suiteGlob.split('*')[0];
|
|
326
|
+
if (suiteDir && taskFile.startsWith(suiteDir)) return true;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
return false;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
/**
|
|
334
|
+
* Classify a command failure as infrastructure or code.
|
|
335
|
+
*
|
|
336
|
+
* Infrastructure: environment/setup problems (command not found, permissions, connections, etc.)
|
|
337
|
+
* Code: test assertion failures, syntax errors, type errors, and other code-level problems.
|
|
338
|
+
*
|
|
339
|
+
* @param {{exit_code: number, stderr: string, timed_out: boolean}} result - runCommand result
|
|
340
|
+
* @returns {'infrastructure'|'code'} Failure classification
|
|
341
|
+
*/
|
|
342
|
+
function classifyFailure(result) {
|
|
343
|
+
// Timeout is always infrastructure
|
|
344
|
+
if (result.timed_out) return 'infrastructure';
|
|
345
|
+
|
|
346
|
+
// Exit code 127 = command not found
|
|
347
|
+
if (result.exit_code === 127) return 'infrastructure';
|
|
348
|
+
|
|
349
|
+
// Check stderr for infrastructure signals (case-insensitive)
|
|
350
|
+
const stderr = (result.stderr || '').toLowerCase();
|
|
351
|
+
const infraPatterns = [
|
|
352
|
+
'permission denied', 'eacces',
|
|
353
|
+
'econnrefused', 'connection refused',
|
|
354
|
+
'enoent', 'file not found', 'no such file',
|
|
355
|
+
'cannot find module',
|
|
356
|
+
'out of memory', 'enomem',
|
|
357
|
+
'enospc', 'disk full', 'no space left',
|
|
358
|
+
'docker', 'container',
|
|
359
|
+
'npm err! missing', 'npm err! code e404',
|
|
360
|
+
];
|
|
361
|
+
|
|
362
|
+
for (const pattern of infraPatterns) {
|
|
363
|
+
if (stderr.includes(pattern)) return 'infrastructure';
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Default: code failure
|
|
367
|
+
return 'code';
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/**
|
|
371
|
+
* Execute a suite of collected test commands in order with timeout abort semantics.
|
|
372
|
+
*
|
|
373
|
+
* Execution order: suite tier first, then quick tier, then task tier.
|
|
374
|
+
* If any command times out, remaining commands are aborted.
|
|
375
|
+
* Each result is augmented with passed, failure_type, and false_positive fields.
|
|
376
|
+
*
|
|
377
|
+
* @param {Array<{command: string, source: string, tier: string, label: string}>} commands
|
|
378
|
+
* @param {Object} [options] - Execution options
|
|
379
|
+
* @param {number} [options.timeout_ms=120000] - Timeout in milliseconds
|
|
380
|
+
* @param {string} [options.cwd=process.cwd()] - Working directory
|
|
381
|
+
* @returns {{results: Array, summary: {total: number, passed: number, failed: number, timed_out: number, aborted: number, false_positives: number}}}
|
|
382
|
+
*/
|
|
383
|
+
function executeTestSuite(commands, options = {}) {
|
|
384
|
+
const timeout_ms = options.timeout_ms ?? 120000;
|
|
385
|
+
const cwd = options.cwd ?? process.cwd();
|
|
386
|
+
|
|
387
|
+
// Sort: suite first, then quick, then task (preserve order within same tier)
|
|
388
|
+
const tierOrder = { suite: 0, quick: 1, task: 2 };
|
|
389
|
+
const sorted = commands.slice().sort((a, b) => {
|
|
390
|
+
const orderA = tierOrder[a.tier] ?? 2;
|
|
391
|
+
const orderB = tierOrder[b.tier] ?? 2;
|
|
392
|
+
return orderA - orderB;
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
// Track suite commands and their results for covered-duplicate detection
|
|
396
|
+
const suiteCmds = sorted.filter(c => c.tier === 'suite');
|
|
397
|
+
const suiteCommands = suiteCmds.map(c => c.command);
|
|
398
|
+
const suiteResults = [];
|
|
399
|
+
|
|
400
|
+
const results = [];
|
|
401
|
+
let abortRemaining = false;
|
|
402
|
+
|
|
403
|
+
for (const cmd of sorted) {
|
|
404
|
+
if (abortRemaining) {
|
|
405
|
+
results.push({
|
|
406
|
+
command: cmd.command,
|
|
407
|
+
exit_code: null,
|
|
408
|
+
stdout: '',
|
|
409
|
+
stderr: '',
|
|
410
|
+
timed_out: false,
|
|
411
|
+
duration_ms: 0,
|
|
412
|
+
passed: false,
|
|
413
|
+
failure_type: 'infrastructure',
|
|
414
|
+
false_positive: false,
|
|
415
|
+
suspect: false,
|
|
416
|
+
skipped_covered: false,
|
|
417
|
+
aborted: true,
|
|
418
|
+
aborted_reason: 'previous command timed out',
|
|
419
|
+
});
|
|
420
|
+
continue;
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Check if task-tier command is covered by a passing suite
|
|
424
|
+
if (cmd.tier === 'task' && isCommandCovered(cmd.command, suiteCommands, suiteResults)) {
|
|
425
|
+
results.push({
|
|
426
|
+
command: cmd.command,
|
|
427
|
+
exit_code: null,
|
|
428
|
+
stdout: '',
|
|
429
|
+
stderr: '',
|
|
430
|
+
timed_out: false,
|
|
431
|
+
duration_ms: 0,
|
|
432
|
+
passed: true,
|
|
433
|
+
failure_type: null,
|
|
434
|
+
false_positive: false,
|
|
435
|
+
suspect: false,
|
|
436
|
+
skipped_covered: true,
|
|
437
|
+
skipped_reason: 'covered by passing suite command',
|
|
438
|
+
});
|
|
439
|
+
continue;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
const raw = runCommand(cmd.command, { timeout_ms, cwd });
|
|
443
|
+
const false_positive = raw.exit_code === 0 && checkFalsePositive(raw.stdout);
|
|
444
|
+
const suspect = !false_positive && checkSuspect(raw);
|
|
445
|
+
const passed = raw.exit_code === 0 && !false_positive && !suspect;
|
|
446
|
+
const failure_type = passed ? null : classifyFailure(raw);
|
|
447
|
+
|
|
448
|
+
const resultEntry = {
|
|
449
|
+
command: cmd.command,
|
|
450
|
+
exit_code: raw.exit_code,
|
|
451
|
+
stdout: raw.stdout,
|
|
452
|
+
stderr: raw.stderr,
|
|
453
|
+
timed_out: raw.timed_out,
|
|
454
|
+
duration_ms: raw.duration_ms,
|
|
455
|
+
passed,
|
|
456
|
+
failure_type,
|
|
457
|
+
false_positive,
|
|
458
|
+
suspect,
|
|
459
|
+
skipped_covered: false,
|
|
460
|
+
framework_output: parseFrameworkOutput(raw.stdout),
|
|
461
|
+
};
|
|
462
|
+
|
|
463
|
+
results.push(resultEntry);
|
|
464
|
+
|
|
465
|
+
// Track suite results for covered-duplicate detection
|
|
466
|
+
if (cmd.tier === 'suite') {
|
|
467
|
+
suiteResults.push({ passed: resultEntry.passed });
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
if (raw.timed_out) {
|
|
471
|
+
abortRemaining = true;
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Build summary
|
|
476
|
+
const summary = {
|
|
477
|
+
total: results.length,
|
|
478
|
+
passed: results.filter(r => r.passed).length,
|
|
479
|
+
failed: results.filter(r => !r.passed).length,
|
|
480
|
+
timed_out: results.filter(r => r.timed_out).length,
|
|
481
|
+
aborted: results.filter(r => r.aborted).length,
|
|
482
|
+
false_positives: results.filter(r => r.false_positive).length,
|
|
483
|
+
suspects: results.filter(r => r.suspect).length,
|
|
484
|
+
skipped_covered: results.filter(r => r.skipped_covered).length,
|
|
485
|
+
};
|
|
486
|
+
|
|
487
|
+
return { results, summary };
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// --- Framework Output Parsing ------------------------------------------------
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Parse test framework stdout to extract granular pass/fail/skip counts.
|
|
494
|
+
*
|
|
495
|
+
* Detection order: Jest, Vitest, Mocha, Node.js test runner, fallback.
|
|
496
|
+
* Returns null counts for unrecognized or empty output.
|
|
497
|
+
*
|
|
498
|
+
* @param {string|null} stdout - Command stdout
|
|
499
|
+
* @returns {{framework: string|null, passed: number|null, failed: number|null, skipped: number|null, total: number|null}}
|
|
500
|
+
*/
|
|
501
|
+
function parseFrameworkOutput(stdout) {
|
|
502
|
+
const nullResult = { framework: null, passed: null, failed: null, skipped: null, total: null };
|
|
503
|
+
if (!stdout) return nullResult;
|
|
504
|
+
|
|
505
|
+
// 1. Jest: "Tests: 3 passed, 1 failed, 4 total"
|
|
506
|
+
const jestMatch = stdout.match(/^Tests:\s+(.+)$/m);
|
|
507
|
+
if (jestMatch) {
|
|
508
|
+
const line = jestMatch[1];
|
|
509
|
+
const passed = parseInt((line.match(/(\d+) passed/) || [])[1], 10) || 0;
|
|
510
|
+
const failed = parseInt((line.match(/(\d+) failed/) || [])[1], 10) || 0;
|
|
511
|
+
const skipped = parseInt((line.match(/(\d+) skipped/) || [])[1], 10) || 0;
|
|
512
|
+
const total = parseInt((line.match(/(\d+) total/) || [])[1], 10) || 0;
|
|
513
|
+
return { framework: 'jest', passed, failed, skipped, total };
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
// 2. Vitest: " Tests 2 failed | 3 passed (5)"
|
|
517
|
+
const vitestMatch = stdout.match(/^\s*Tests\s+(.+)\((\d+)\)\s*$/m);
|
|
518
|
+
if (vitestMatch) {
|
|
519
|
+
const line = vitestMatch[1];
|
|
520
|
+
const total = parseInt(vitestMatch[2], 10);
|
|
521
|
+
const passed = parseInt((line.match(/(\d+) passed/) || [])[1], 10) || 0;
|
|
522
|
+
const failed = parseInt((line.match(/(\d+) failed/) || [])[1], 10) || 0;
|
|
523
|
+
const skipped = parseInt((line.match(/(\d+) skipped/) || [])[1], 10) || 0;
|
|
524
|
+
return { framework: 'vitest', passed, failed, skipped, total };
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
// 3. Mocha: " 3 passing (42ms)" / " 1 failing" / " 2 pending"
|
|
528
|
+
const mochaPassMatch = stdout.match(/^\s*(\d+) passing/m);
|
|
529
|
+
if (mochaPassMatch) {
|
|
530
|
+
const passed = parseInt(mochaPassMatch[1], 10);
|
|
531
|
+
const failMatch = stdout.match(/^\s*(\d+) failing/m);
|
|
532
|
+
const pendMatch = stdout.match(/^\s*(\d+) pending/m);
|
|
533
|
+
const failed = failMatch ? parseInt(failMatch[1], 10) : 0;
|
|
534
|
+
const skipped = pendMatch ? parseInt(pendMatch[1], 10) : 0;
|
|
535
|
+
const total = passed + failed + skipped;
|
|
536
|
+
return { framework: 'mocha', passed, failed, skipped, total };
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// 4. Node.js test runner: "# tests 19", "# pass 19", "# fail 0"
|
|
540
|
+
const nodeTestsMatch = stdout.match(/^# tests (\d+)/m);
|
|
541
|
+
const nodePassMatch = stdout.match(/^# pass (\d+)/m);
|
|
542
|
+
const nodeFailMatch = stdout.match(/^# fail (\d+)/m);
|
|
543
|
+
if (nodeTestsMatch && nodePassMatch && nodeFailMatch) {
|
|
544
|
+
const total = parseInt(nodeTestsMatch[1], 10);
|
|
545
|
+
const passed = parseInt(nodePassMatch[1], 10);
|
|
546
|
+
const failed = parseInt(nodeFailMatch[1], 10);
|
|
547
|
+
const nodeSkipMatch = stdout.match(/^# skip (\d+)/m);
|
|
548
|
+
const skipped = nodeSkipMatch ? parseInt(nodeSkipMatch[1], 10) : 0;
|
|
549
|
+
return { framework: 'node', passed, failed, skipped, total };
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
// 5. Fallback: unrecognized
|
|
553
|
+
return nullResult;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// --- CLI: Run Test Suite -----------------------------------------------------
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* CLI wrapper for executeTestSuite.
|
|
560
|
+
* Collects test commands then executes them.
|
|
561
|
+
*
|
|
562
|
+
* @param {string} cwd - Working directory
|
|
563
|
+
* @param {string} phase - Phase number/identifier
|
|
564
|
+
* @param {boolean} raw - Raw output mode
|
|
565
|
+
* @param {Object} [options] - Execution options
|
|
566
|
+
* @param {number} [options.timeout_ms] - Per-command timeout in milliseconds
|
|
567
|
+
*/
|
|
568
|
+
function cmdRunTestSuite(cwd, phase, raw, options = {}) {
|
|
569
|
+
if (!phase) {
|
|
570
|
+
error('phase required');
|
|
571
|
+
}
|
|
572
|
+
const phaseInfo = findPhaseInternal(cwd, phase);
|
|
573
|
+
if (!phaseInfo || !phaseInfo.found) {
|
|
574
|
+
output({ error: 'Phase not found', phase }, raw);
|
|
575
|
+
return;
|
|
576
|
+
}
|
|
577
|
+
const collected = collectTestCommands(cwd, phaseInfo.directory);
|
|
578
|
+
if (collected.empty) {
|
|
579
|
+
output({ results: [], summary: { total: 0, passed: 0, failed: 0, timed_out: 0, aborted: 0, false_positives: 0 }, collection: collected.summary }, raw);
|
|
580
|
+
return;
|
|
581
|
+
}
|
|
582
|
+
const execOptions = { cwd: options.testCwd || cwd };
|
|
583
|
+
if (options.timeout_ms) execOptions.timeout_ms = options.timeout_ms;
|
|
584
|
+
const result = executeTestSuite(collected.commands, execOptions);
|
|
585
|
+
result.collection = collected.summary;
|
|
586
|
+
output(result, raw);
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
// --- Result Mapping Engine ----------------------------------------------------
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Infer gap severity from failure output.
|
|
593
|
+
*
|
|
594
|
+
* Priority order:
|
|
595
|
+
* 1. Infrastructure failure_type or timed_out → blocker
|
|
596
|
+
* 2. Compilation/syntax patterns in stderr → blocker
|
|
597
|
+
* 3. Warning-only patterns (no assertion/error patterns) → minor
|
|
598
|
+
* 4. Assertion patterns in stderr → major
|
|
599
|
+
* 5. Default → major
|
|
600
|
+
*
|
|
601
|
+
* @param {{exit_code: number, stdout: string, stderr: string, failure_type: string|null, false_positive: boolean, timed_out: boolean}} result
|
|
602
|
+
* @returns {'blocker'|'major'|'minor'}
|
|
603
|
+
*/
|
|
604
|
+
function inferSeverity(result) {
|
|
605
|
+
// 1. Infrastructure failures are always blockers
|
|
606
|
+
if (result.failure_type === 'infrastructure') return 'blocker';
|
|
607
|
+
|
|
608
|
+
// 2. Timeout is always a blocker
|
|
609
|
+
if (result.timed_out === true) return 'blocker';
|
|
610
|
+
|
|
611
|
+
const stderr = result.stderr || '';
|
|
612
|
+
|
|
613
|
+
// 3. Compilation/syntax patterns → blocker
|
|
614
|
+
const blockerPatterns = [
|
|
615
|
+
/SyntaxError/i,
|
|
616
|
+
/TypeError/i,
|
|
617
|
+
/ReferenceError/i,
|
|
618
|
+
/error TS/i,
|
|
619
|
+
/cannot find module/i,
|
|
620
|
+
/compilation failed/i,
|
|
621
|
+
/Build failed/i,
|
|
622
|
+
];
|
|
623
|
+
for (const pattern of blockerPatterns) {
|
|
624
|
+
if (pattern.test(stderr)) return 'blocker';
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
// 4. Warning-only patterns (no assertion/error signals) → minor
|
|
628
|
+
const warningPatterns = /warning:|DeprecationWarning/i;
|
|
629
|
+
const assertionPatterns = /AssertionError|Expected.*but received|assert\.|FAIL |✕|✗/i;
|
|
630
|
+
if (warningPatterns.test(stderr) && !assertionPatterns.test(stderr)) {
|
|
631
|
+
return 'minor';
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// 5. Assertion patterns → major
|
|
635
|
+
if (assertionPatterns.test(stderr)) return 'major';
|
|
636
|
+
|
|
637
|
+
// 6. Default
|
|
638
|
+
return 'major';
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
/**
|
|
642
|
+
* Parse manual verification items from VALIDATION.md and PLAN.md files.
|
|
643
|
+
*
|
|
644
|
+
* From PLAN.md: Extracts <manual> blocks within <task><verify> elements.
|
|
645
|
+
* From VALIDATION.md: Extracts rows from "## Manual-Only Verifications" table.
|
|
646
|
+
*
|
|
647
|
+
* @param {string|null} validationContent - VALIDATION.md content
|
|
648
|
+
* @param {Array<{content: string|null, planId: string}>} planContents - Array of plan content objects
|
|
649
|
+
* @returns {Array<{description: string, source: string, label: string}>}
|
|
650
|
+
*/
|
|
651
|
+
function collectManualVerifications(validationContent, planContents) {
|
|
652
|
+
const items = [];
|
|
653
|
+
|
|
654
|
+
// Parse PLAN.md files for <manual> blocks
|
|
655
|
+
if (Array.isArray(planContents)) {
|
|
656
|
+
for (const plan of planContents) {
|
|
657
|
+
if (!plan.content) continue;
|
|
658
|
+
|
|
659
|
+
const taskPattern = /<task[^>]*>([\s\S]*?)<\/task>/g;
|
|
660
|
+
let taskMatch;
|
|
661
|
+
let taskIndex = 0;
|
|
662
|
+
|
|
663
|
+
while ((taskMatch = taskPattern.exec(plan.content)) !== null) {
|
|
664
|
+
taskIndex++;
|
|
665
|
+
const taskContent = taskMatch[1];
|
|
666
|
+
|
|
667
|
+
// Extract task name
|
|
668
|
+
const nameMatch = taskContent.match(/<name>([\s\S]*?)<\/name>/);
|
|
669
|
+
const taskName = nameMatch ? nameMatch[1].trim() : `task ${taskIndex}`;
|
|
670
|
+
|
|
671
|
+
// Check for <manual> inside <verify>
|
|
672
|
+
const manualMatch = taskContent.match(/<manual>([\s\S]*?)<\/manual>/);
|
|
673
|
+
if (manualMatch) {
|
|
674
|
+
items.push({
|
|
675
|
+
description: manualMatch[1].trim(),
|
|
676
|
+
source: 'PLAN.md',
|
|
677
|
+
label: `${plan.planId} ${taskName}`,
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
// Parse VALIDATION.md for Manual-Only Verifications table
|
|
685
|
+
if (validationContent) {
|
|
686
|
+
const sectionMatch = validationContent.match(/## Manual-Only Verifications\s*\n([\s\S]*?)(?=\n## |\n$|$)/);
|
|
687
|
+
if (sectionMatch) {
|
|
688
|
+
const sectionContent = sectionMatch[1];
|
|
689
|
+
// Parse markdown table rows (skip header and separator)
|
|
690
|
+
const lines = sectionContent.split('\n');
|
|
691
|
+
let headerParsed = false;
|
|
692
|
+
let separatorParsed = false;
|
|
693
|
+
|
|
694
|
+
for (const line of lines) {
|
|
695
|
+
const trimmed = line.trim();
|
|
696
|
+
if (!trimmed || !trimmed.startsWith('|')) continue;
|
|
697
|
+
|
|
698
|
+
if (!headerParsed) {
|
|
699
|
+
headerParsed = true;
|
|
700
|
+
continue;
|
|
701
|
+
}
|
|
702
|
+
if (!separatorParsed) {
|
|
703
|
+
separatorParsed = true;
|
|
704
|
+
continue;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// Parse data row: | # | Behavior | Context | Test Instructions |
|
|
708
|
+
const cells = trimmed.split('|').filter(c => c.trim() !== '');
|
|
709
|
+
if (cells.length >= 4) {
|
|
710
|
+
const behavior = cells[1].trim();
|
|
711
|
+
const instructions = cells[3].trim();
|
|
712
|
+
items.push({
|
|
713
|
+
description: `${behavior}: ${instructions}`,
|
|
714
|
+
source: 'VALIDATION.md',
|
|
715
|
+
label: 'manual-only',
|
|
716
|
+
});
|
|
717
|
+
}
|
|
718
|
+
}
|
|
719
|
+
}
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
return items;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
/**
|
|
726
|
+
* Generate full UAT file content string from execution results and manual verifications.
|
|
727
|
+
*
|
|
728
|
+
* @param {{results: Array, summary: Object}} executionResult - From executeTestSuite
|
|
729
|
+
* @param {Array<{description: string, source: string, label: string}>} manualVerifications - From collectManualVerifications
|
|
730
|
+
* @param {{phase: string, phaseDir: string, sourceFiles: string[]}} phaseInfo - Phase metadata
|
|
731
|
+
* @returns {string} UAT markdown file content
|
|
732
|
+
*/
|
|
733
|
+
function mapResultsToUat(executionResult, manualVerifications, phaseInfo) {
|
|
734
|
+
const results = executionResult.results || [];
|
|
735
|
+
const manual = manualVerifications || [];
|
|
736
|
+
|
|
737
|
+
const passedCount = results.filter(r => r.passed).length;
|
|
738
|
+
const actualFailedCount = results.filter(r => !r.passed && !r.suspect && !r.skipped_covered).length;
|
|
739
|
+
const failedCount = results.filter(r => !r.passed).length;
|
|
740
|
+
const suspectCount = results.filter(r => r.suspect).length;
|
|
741
|
+
const skippedCount = results.filter(r => r.skipped_covered).length;
|
|
742
|
+
const humanCount = manual.length;
|
|
743
|
+
const totalCount = results.length + humanCount;
|
|
744
|
+
const allAutomatedPassed = actualFailedCount === 0;
|
|
745
|
+
const status = allAutomatedPassed ? 'complete' : 'gaps_found';
|
|
746
|
+
const now = new Date().toISOString();
|
|
747
|
+
|
|
748
|
+
const lines = [];
|
|
749
|
+
|
|
750
|
+
// --- Frontmatter ---
|
|
751
|
+
lines.push('---');
|
|
752
|
+
lines.push(`status: ${status}`);
|
|
753
|
+
lines.push(`phase: ${phaseInfo.phase}`);
|
|
754
|
+
lines.push('mode: auto-test');
|
|
755
|
+
lines.push('ai_verified: true');
|
|
756
|
+
if (phaseInfo.sourceFiles && phaseInfo.sourceFiles.length > 0) {
|
|
757
|
+
lines.push(`source: ${phaseInfo.sourceFiles.join(', ')}`);
|
|
758
|
+
}
|
|
759
|
+
lines.push(`started: ${now}`);
|
|
760
|
+
lines.push(`updated: ${now}`);
|
|
761
|
+
lines.push('---');
|
|
762
|
+
lines.push('');
|
|
763
|
+
|
|
764
|
+
// --- Current Test ---
|
|
765
|
+
lines.push('## Current Test');
|
|
766
|
+
lines.push('');
|
|
767
|
+
lines.push('[testing complete]');
|
|
768
|
+
lines.push('');
|
|
769
|
+
|
|
770
|
+
// --- Tests Section ---
|
|
771
|
+
lines.push('## Tests');
|
|
772
|
+
lines.push('');
|
|
773
|
+
|
|
774
|
+
let testNum = 0;
|
|
775
|
+
|
|
776
|
+
// Automated results
|
|
777
|
+
for (const r of results) {
|
|
778
|
+
testNum++;
|
|
779
|
+
const cmdTruncated = r.command.length > 60 ? r.command.substring(0, 60) + '...' : r.command;
|
|
780
|
+
lines.push(`### ${testNum}. ${cmdTruncated}`);
|
|
781
|
+
lines.push('expected: Command exits with code 0');
|
|
782
|
+
|
|
783
|
+
if (r.passed && !r.skipped_covered) {
|
|
784
|
+
lines.push('result: pass');
|
|
785
|
+
} else if (r.skipped_covered) {
|
|
786
|
+
lines.push('result: pass');
|
|
787
|
+
lines.push('note: covered by passing suite command — skipped');
|
|
788
|
+
} else if (r.suspect) {
|
|
789
|
+
lines.push('result: suspect');
|
|
790
|
+
lines.push('note: "Exit code 0 but output lacks test framework assertion markers — requires human review"');
|
|
791
|
+
lines.push('severity: minor');
|
|
792
|
+
} else {
|
|
793
|
+
const severity = inferSeverity(r);
|
|
794
|
+
lines.push('result: issue');
|
|
795
|
+
lines.push(`severity: ${severity}`);
|
|
796
|
+
const stderrTrunc = (r.stderr || '').substring(0, 500);
|
|
797
|
+
lines.push(`reported: "Exit code ${r.exit_code}: ${stderrTrunc}"`);
|
|
798
|
+
}
|
|
799
|
+
lines.push(`command: ${r.command}`);
|
|
800
|
+
lines.push('');
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
// Manual verification entries
|
|
804
|
+
for (const m of manual) {
|
|
805
|
+
testNum++;
|
|
806
|
+
const descTruncated = m.description.length > 60 ? m.description.substring(0, 60) + '...' : m.description;
|
|
807
|
+
lines.push(`### ${testNum}. ${descTruncated}`);
|
|
808
|
+
lines.push(`expected: ${m.description}`);
|
|
809
|
+
lines.push('result: human_needed');
|
|
810
|
+
lines.push(`source: ${m.source}`);
|
|
811
|
+
lines.push(`label: ${m.label}`);
|
|
812
|
+
lines.push('');
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// --- Summary Section ---
|
|
816
|
+
lines.push('## Summary');
|
|
817
|
+
lines.push('');
|
|
818
|
+
lines.push(`total: ${totalCount}`);
|
|
819
|
+
lines.push(`passed: ${passedCount}`);
|
|
820
|
+
lines.push(`issues: ${actualFailedCount}`);
|
|
821
|
+
lines.push(`suspect: ${suspectCount}`);
|
|
822
|
+
lines.push(`human_needed: ${humanCount}`);
|
|
823
|
+
lines.push('pending: 0');
|
|
824
|
+
lines.push(`skipped: ${skippedCount}`);
|
|
825
|
+
lines.push('');
|
|
826
|
+
|
|
827
|
+
// --- Gaps Section ---
|
|
828
|
+
lines.push('## Gaps');
|
|
829
|
+
lines.push('');
|
|
830
|
+
|
|
831
|
+
// Only actual failures appear in Gaps (not suspect or skipped_covered)
|
|
832
|
+
let gapTestNum = 0;
|
|
833
|
+
for (const r of results) {
|
|
834
|
+
gapTestNum++;
|
|
835
|
+
if (r.passed || r.suspect || r.skipped_covered) continue;
|
|
836
|
+
|
|
837
|
+
const severity = inferSeverity(r);
|
|
838
|
+
const gapType = r.failure_type === 'infrastructure' ? 'infrastructure_gap' : 'code_gap';
|
|
839
|
+
const stderrTrunc = (r.stderr || '').substring(0, 500);
|
|
840
|
+
|
|
841
|
+
lines.push(`- truth: "Command '${r.command}' passes"`);
|
|
842
|
+
lines.push(' status: failed');
|
|
843
|
+
lines.push(` reason: "Exit code ${r.exit_code}: ${stderrTrunc}"`);
|
|
844
|
+
lines.push(` severity: ${severity}`);
|
|
845
|
+
lines.push(` gap_type: ${gapType}`);
|
|
846
|
+
lines.push(` command: "${r.command}"`);
|
|
847
|
+
lines.push(` test: ${gapTestNum}`);
|
|
848
|
+
lines.push(` exit_code: ${r.exit_code}`);
|
|
849
|
+
lines.push(` stderr: "${stderrTrunc}"`);
|
|
850
|
+
lines.push(' artifacts: []');
|
|
851
|
+
lines.push(' missing: []');
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
return lines.join('\n');
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// --- CLI: Map Results to UAT -------------------------------------------------
|
|
858
|
+
|
|
859
|
+
/**
|
|
860
|
+
* CLI wrapper that orchestrates the full auto-test pipeline:
|
|
861
|
+
* collect commands, execute tests, collect manual verifications,
|
|
862
|
+
* generate UAT file, and write to disk.
|
|
863
|
+
*
|
|
864
|
+
* @param {string} cwd - Working directory
|
|
865
|
+
* @param {string} phase - Phase number/identifier
|
|
866
|
+
* @param {boolean} raw - Raw output mode
|
|
867
|
+
* @param {Object} [options] - Execution options
|
|
868
|
+
* @param {number} [options.timeout_ms] - Per-command timeout in milliseconds
|
|
869
|
+
*/
|
|
870
|
+
function cmdMapResults(cwd, phase, raw, options = {}) {
|
|
871
|
+
if (!phase) {
|
|
872
|
+
error('phase required');
|
|
873
|
+
}
|
|
874
|
+
const phaseInfo = findPhaseInternal(cwd, phase);
|
|
875
|
+
if (!phaseInfo || !phaseInfo.found) {
|
|
876
|
+
output({ error: 'Phase not found', phase }, raw);
|
|
877
|
+
return;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
const phaseDir = phaseInfo.directory;
|
|
881
|
+
const phaseDirFull = path.join(cwd, phaseDir);
|
|
882
|
+
|
|
883
|
+
// 1. Collect and execute
|
|
884
|
+
const collected = collectTestCommands(cwd, phaseDir);
|
|
885
|
+
let executionResult;
|
|
886
|
+
if (collected.empty) {
|
|
887
|
+
executionResult = { results: [], summary: { total: 0, passed: 0, failed: 0, timed_out: 0, aborted: 0, false_positives: 0 } };
|
|
888
|
+
} else {
|
|
889
|
+
const execOptions = { cwd: options.testCwd || cwd };
|
|
890
|
+
if (options.timeout_ms) execOptions.timeout_ms = options.timeout_ms;
|
|
891
|
+
executionResult = executeTestSuite(collected.commands, execOptions);
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
// 2. Collect manual verifications
|
|
895
|
+
const files = fs.readdirSync(phaseDirFull);
|
|
896
|
+
let validationContent = null;
|
|
897
|
+
const validationFile = files.find(f => /-VALIDATION\.md$/i.test(f) || f === 'VALIDATION.md');
|
|
898
|
+
if (validationFile) {
|
|
899
|
+
validationContent = safeReadFile(path.join(phaseDirFull, validationFile));
|
|
900
|
+
}
|
|
901
|
+
const planContents = [];
|
|
902
|
+
const planFiles = files.filter(f => f.endsWith('-PLAN.md')).sort();
|
|
903
|
+
for (const planFile of planFiles) {
|
|
904
|
+
const planId = planFile.replace('-PLAN.md', '');
|
|
905
|
+
const content = safeReadFile(path.join(phaseDirFull, planFile));
|
|
906
|
+
planContents.push({ content, planId });
|
|
907
|
+
}
|
|
908
|
+
const manualVerifications = collectManualVerifications(validationContent, planContents);
|
|
909
|
+
|
|
910
|
+
// 3. Build phase info
|
|
911
|
+
const sourceFiles = files.filter(f => f.endsWith('-SUMMARY.md'));
|
|
912
|
+
const mapPhaseInfo = {
|
|
913
|
+
phase: phaseInfo.name || phaseDir.split('/').pop(),
|
|
914
|
+
phaseDir: phaseDir,
|
|
915
|
+
sourceFiles: sourceFiles,
|
|
916
|
+
};
|
|
917
|
+
|
|
918
|
+
// 4. Generate UAT content and write
|
|
919
|
+
const uatContent = mapResultsToUat(executionResult, manualVerifications, mapPhaseInfo);
|
|
920
|
+
const phaseNum = phaseDir.split('/').pop().match(/^(\d+)/)?.[1] || phase;
|
|
921
|
+
const uatPath = path.join(phaseDirFull, `${phaseNum}-UAT.md`);
|
|
922
|
+
fs.writeFileSync(uatPath, uatContent, 'utf-8');
|
|
923
|
+
|
|
924
|
+
// 4b. Generate and write test log
|
|
925
|
+
const logContent = generateTestLog(executionResult);
|
|
926
|
+
const logPath = path.join(phaseDirFull, `${phaseNum}-UAT-LOG.md`);
|
|
927
|
+
fs.writeFileSync(logPath, logContent, 'utf-8');
|
|
928
|
+
|
|
929
|
+
// 5. Output result
|
|
930
|
+
output({
|
|
931
|
+
uat_path: path.relative(cwd, uatPath),
|
|
932
|
+
log_path: path.relative(cwd, logPath),
|
|
933
|
+
summary: executionResult.summary,
|
|
934
|
+
manual_count: manualVerifications.length,
|
|
935
|
+
status: executionResult.summary.failed > 0 ? 'gaps_found' : 'complete',
|
|
936
|
+
}, raw);
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
// --- Diagnosis Check ---------------------------------------------------------
|
|
940
|
+
|
|
941
|
+
/**
|
|
942
|
+
* Check if a UAT file needs diagnosis (has gaps_found status and actual gap entries).
|
|
943
|
+
*
|
|
944
|
+
* @param {string|null} uatContent - UAT file content
|
|
945
|
+
* @returns {boolean} true if UAT has status: gaps_found and non-empty gaps
|
|
946
|
+
*/
|
|
947
|
+
function checkNeedsDiagnosis(uatContent) {
|
|
948
|
+
if (!uatContent) return false;
|
|
949
|
+
// Check frontmatter for status: diagnosed
|
|
950
|
+
const statusMatch = uatContent.match(/^status:\s*gaps_found\s*$/m);
|
|
951
|
+
if (!statusMatch) return false;
|
|
952
|
+
// Check for actual gap entries (not just "[none yet]")
|
|
953
|
+
const gapsSection = uatContent.match(/## Gaps\s*\n([\s\S]*?)(?=\n## |$)/);
|
|
954
|
+
if (!gapsSection) return false;
|
|
955
|
+
const gapsContent = gapsSection[1].trim();
|
|
956
|
+
if (!gapsContent || gapsContent === '[none yet]' || gapsContent === '[none]') return false;
|
|
957
|
+
// Must have at least one "- truth:" entry
|
|
958
|
+
return /- truth:/.test(gapsContent);
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
// --- Test Log Generation -----------------------------------------------------
|
|
962
|
+
|
|
963
|
+
/**
|
|
964
|
+
* Generate a test log string with full stdout/stderr for every command.
|
|
965
|
+
*
|
|
966
|
+
* Each command's output is delimited by command name and timestamp for
|
|
967
|
+
* easy post-hoc debugging. The log preserves complete output that the
|
|
968
|
+
* UAT file truncates.
|
|
969
|
+
*
|
|
970
|
+
* @param {{results: Array<{command: string, exit_code: number|null, stdout: string, stderr: string, timed_out: boolean, duration_ms: number}>}} executionResult
|
|
971
|
+
* @returns {string} Delimited log file content
|
|
972
|
+
*/
|
|
973
|
+
function generateTestLog(executionResult) {
|
|
974
|
+
const results = executionResult.results || [];
|
|
975
|
+
const lines = [];
|
|
976
|
+
|
|
977
|
+
// Header
|
|
978
|
+
lines.push('# Auto-Test Log');
|
|
979
|
+
lines.push(`# Generated: ${new Date().toISOString()}`);
|
|
980
|
+
lines.push('');
|
|
981
|
+
|
|
982
|
+
for (const r of results) {
|
|
983
|
+
lines.push(`=== COMMAND: ${r.command} ===`);
|
|
984
|
+
lines.push(`=== TIMESTAMP: ${new Date().toISOString()} ===`);
|
|
985
|
+
lines.push(`=== EXIT CODE: ${r.exit_code} ===`);
|
|
986
|
+
lines.push(`=== DURATION: ${r.duration_ms}ms ===`);
|
|
987
|
+
lines.push('');
|
|
988
|
+
lines.push('--- stdout ---');
|
|
989
|
+
lines.push(r.stdout || '(empty)');
|
|
990
|
+
lines.push('--- stderr ---');
|
|
991
|
+
lines.push(r.stderr || '(empty)');
|
|
992
|
+
lines.push('');
|
|
993
|
+
lines.push(`=== END: ${r.command} ===`);
|
|
994
|
+
lines.push('');
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
return lines.join('\n');
|
|
998
|
+
}
|
|
999
|
+
|
|
1000
|
+
// --- Rerun Pipeline ----------------------------------------------------------
|
|
1001
|
+
|
|
1002
|
+
/**
|
|
1003
|
+
* Parse an auto-test UAT file to extract commands for failed tests only.
|
|
1004
|
+
*
|
|
1005
|
+
* Only processes UAT files with `mode: auto-test` in frontmatter.
|
|
1006
|
+
* Only extracts commands from entries with `result: issue`.
|
|
1007
|
+
* Entries with `result: pass` or `result: human_needed` are excluded.
|
|
1008
|
+
*
|
|
1009
|
+
* @param {string|null} uatContent - UAT file content
|
|
1010
|
+
* @returns {string[]} Array of command strings from failed tests
|
|
1011
|
+
*/
|
|
1012
|
+
function parseUatForFailedCommands(uatContent) {
|
|
1013
|
+
if (!uatContent) return [];
|
|
1014
|
+
|
|
1015
|
+
// Check frontmatter for mode: auto-test
|
|
1016
|
+
if (!/^mode:\s*auto-test\s*$/m.test(uatContent)) return [];
|
|
1017
|
+
|
|
1018
|
+
// Split into test blocks by ### N. headers
|
|
1019
|
+
const blocks = uatContent.split(/(?=^### \d+\.)/m);
|
|
1020
|
+
const commands = [];
|
|
1021
|
+
|
|
1022
|
+
for (const block of blocks) {
|
|
1023
|
+
// Must be a test block (starts with ### N.)
|
|
1024
|
+
if (!/^### \d+\./.test(block)) continue;
|
|
1025
|
+
|
|
1026
|
+
// Check if result is issue (not pass, not human_needed)
|
|
1027
|
+
if (!/^result:\s*issue\s*$/m.test(block)) continue;
|
|
1028
|
+
|
|
1029
|
+
// Extract command field value
|
|
1030
|
+
const cmdMatch = block.match(/^command:\s*(.+)$/m);
|
|
1031
|
+
if (cmdMatch) {
|
|
1032
|
+
commands.push(cmdMatch[1].trim());
|
|
1033
|
+
}
|
|
1034
|
+
}
|
|
1035
|
+
|
|
1036
|
+
return commands;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
/**
|
|
1040
|
+
* Extract structural gap entries from a UAT file.
|
|
1041
|
+
* Structural gaps are identified by source: structural_verification and result: issue.
|
|
1042
|
+
* Used by --rerun-failed to pass the gap list to the verifier for targeted re-check.
|
|
1043
|
+
* @param {string} uatContent - Raw UAT file content
|
|
1044
|
+
* @returns {Array<{description: string, reported: string, severity: string}>}
|
|
1045
|
+
*/
|
|
1046
|
+
function parseUatForStructuralGaps(uatContent) {
|
|
1047
|
+
if (!uatContent) return [];
|
|
1048
|
+
if (!/^mode:\s*auto-test\s*$/m.test(uatContent)) return [];
|
|
1049
|
+
|
|
1050
|
+
const blocks = uatContent.split(/(?=^### \d+\.)/m);
|
|
1051
|
+
const gaps = [];
|
|
1052
|
+
|
|
1053
|
+
for (const block of blocks) {
|
|
1054
|
+
if (!/^### \d+\./.test(block)) continue;
|
|
1055
|
+
if (!/^result:\s*issue\s*$/m.test(block)) continue;
|
|
1056
|
+
if (!/^source:\s*structural_verification\s*$/m.test(block)) continue;
|
|
1057
|
+
|
|
1058
|
+
const headerMatch = block.match(/^### \d+\.\s*\[STRUCTURAL\]\s*(.+)$/m);
|
|
1059
|
+
const reportedMatch = block.match(/^reported:\s*"?(.+?)"?\s*$/m);
|
|
1060
|
+
const severityMatch = block.match(/^severity:\s*(\w+)\s*$/m);
|
|
1061
|
+
|
|
1062
|
+
if (headerMatch) {
|
|
1063
|
+
gaps.push({
|
|
1064
|
+
description: headerMatch[1].trim(),
|
|
1065
|
+
reported: reportedMatch ? reportedMatch[1].trim() : '',
|
|
1066
|
+
severity: severityMatch ? severityMatch[1].trim() : 'major',
|
|
1067
|
+
});
|
|
1068
|
+
}
|
|
1069
|
+
}
|
|
1070
|
+
|
|
1071
|
+
return gaps;
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
/**
|
|
1075
|
+
* Re-execute a list of previously-failed test commands.
|
|
1076
|
+
*
|
|
1077
|
+
* Uses the existing runCommand infrastructure for execution,
|
|
1078
|
+
* checkFalsePositive for false positive detection, classifyFailure
|
|
1079
|
+
* for failure type, and parseFrameworkOutput for framework metadata.
|
|
1080
|
+
*
|
|
1081
|
+
* @param {string[]} failedCommands - Array of command strings to re-execute
|
|
1082
|
+
* @param {Object} [options] - Execution options
|
|
1083
|
+
* @param {string} [options.cwd=process.cwd()] - Working directory
|
|
1084
|
+
* @param {number} [options.timeout_ms=120000] - Timeout in milliseconds
|
|
1085
|
+
* @returns {{rerun_count: number, all_passed: boolean, results: Array}}
|
|
1086
|
+
*/
|
|
1087
|
+
function rerunFailedTests(failedCommands, options = {}) {
|
|
1088
|
+
if (failedCommands.length === 0) {
|
|
1089
|
+
return { rerun_count: 0, all_passed: true, results: [] };
|
|
1090
|
+
}
|
|
1091
|
+
|
|
1092
|
+
const cwd = options.cwd ?? process.cwd();
|
|
1093
|
+
const timeout_ms = options.timeout_ms ?? 120000;
|
|
1094
|
+
const results = [];
|
|
1095
|
+
|
|
1096
|
+
for (const cmd of failedCommands) {
|
|
1097
|
+
const raw = runCommand(cmd, { cwd, timeout_ms });
|
|
1098
|
+
const false_positive = raw.exit_code === 0 && checkFalsePositive(raw.stdout);
|
|
1099
|
+
const passed = raw.exit_code === 0 && !false_positive;
|
|
1100
|
+
const failure_type = passed ? null : classifyFailure(raw);
|
|
1101
|
+
|
|
1102
|
+
results.push({
|
|
1103
|
+
command: cmd,
|
|
1104
|
+
exit_code: raw.exit_code,
|
|
1105
|
+
stdout: raw.stdout,
|
|
1106
|
+
stderr: raw.stderr,
|
|
1107
|
+
timed_out: raw.timed_out,
|
|
1108
|
+
duration_ms: raw.duration_ms,
|
|
1109
|
+
passed,
|
|
1110
|
+
failure_type,
|
|
1111
|
+
false_positive,
|
|
1112
|
+
framework_output: parseFrameworkOutput(raw.stdout),
|
|
1113
|
+
});
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
const allPassed = results.every(r => r.passed);
|
|
1117
|
+
return { rerun_count: results.length, all_passed: allPassed, results };
|
|
1118
|
+
}
|
|
1119
|
+
|
|
1120
|
+
/**
|
|
1121
|
+
* CLI wrapper that orchestrates the full rerun-failed pipeline:
|
|
1122
|
+
* reads existing UAT, extracts failed commands, re-executes them,
|
|
1123
|
+
* merges results with original passes, regenerates full UAT.
|
|
1124
|
+
*
|
|
1125
|
+
* @param {string} cwd - Working directory
|
|
1126
|
+
* @param {string} phase - Phase number/identifier
|
|
1127
|
+
* @param {boolean} raw - Raw output mode
|
|
1128
|
+
*/
|
|
1129
|
+
function cmdRerunFailed(cwd, phase, raw, options = {}) {
|
|
1130
|
+
if (!phase) {
|
|
1131
|
+
error('phase required');
|
|
1132
|
+
}
|
|
1133
|
+
const phaseInfo = findPhaseInternal(cwd, phase);
|
|
1134
|
+
if (!phaseInfo || !phaseInfo.found) {
|
|
1135
|
+
output({ error: 'Phase not found', phase }, raw);
|
|
1136
|
+
return;
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
const phaseDir = phaseInfo.directory;
|
|
1140
|
+
const phaseDirFull = path.join(cwd, phaseDir);
|
|
1141
|
+
const phaseNum = phaseDir.split('/').pop().match(/^(\d+)/)?.[1] || phase;
|
|
1142
|
+
const uatPath = path.join(phaseDirFull, `${phaseNum}-UAT.md`);
|
|
1143
|
+
const uatContent = safeReadFile(uatPath);
|
|
1144
|
+
|
|
1145
|
+
if (!uatContent) {
|
|
1146
|
+
output({ error: 'No UAT file found for phase', phase }, raw);
|
|
1147
|
+
return;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
// Extract failed commands
|
|
1151
|
+
const failedCommands = parseUatForFailedCommands(uatContent);
|
|
1152
|
+
if (failedCommands.length === 0) {
|
|
1153
|
+
output({ rerun_count: 0, all_passed: true, status: 'complete', message: 'No failed tests to rerun' }, raw);
|
|
1154
|
+
return;
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
// Re-execute failed commands
|
|
1158
|
+
const rerunResult = rerunFailedTests(failedCommands, { cwd: options.testCwd || cwd });
|
|
1159
|
+
|
|
1160
|
+
// Collect original passing results from UAT
|
|
1161
|
+
const blocks = uatContent.split(/(?=^### \d+\.)/m);
|
|
1162
|
+
const originalPassResults = [];
|
|
1163
|
+
for (const block of blocks) {
|
|
1164
|
+
if (!/^### \d+\./.test(block)) continue;
|
|
1165
|
+
if (!/^result:\s*pass\s*$/m.test(block)) continue;
|
|
1166
|
+
const cmdMatch = block.match(/^command:\s*(.+)$/m);
|
|
1167
|
+
if (cmdMatch) {
|
|
1168
|
+
const cmd = cmdMatch[1].trim();
|
|
1169
|
+
originalPassResults.push({
|
|
1170
|
+
command: cmd,
|
|
1171
|
+
exit_code: 0,
|
|
1172
|
+
stdout: '',
|
|
1173
|
+
stderr: '',
|
|
1174
|
+
timed_out: false,
|
|
1175
|
+
duration_ms: 0,
|
|
1176
|
+
passed: true,
|
|
1177
|
+
failure_type: null,
|
|
1178
|
+
false_positive: false,
|
|
1179
|
+
framework_output: { framework: null, passed: null, failed: null, skipped: null, total: null },
|
|
1180
|
+
});
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
// Merge: original pass results + rerun results
|
|
1185
|
+
const mergedResults = [...originalPassResults, ...rerunResult.results];
|
|
1186
|
+
const mergedExecutionResult = {
|
|
1187
|
+
results: mergedResults,
|
|
1188
|
+
summary: {
|
|
1189
|
+
total: mergedResults.length,
|
|
1190
|
+
passed: mergedResults.filter(r => r.passed).length,
|
|
1191
|
+
failed: mergedResults.filter(r => !r.passed).length,
|
|
1192
|
+
timed_out: mergedResults.filter(r => r.timed_out).length,
|
|
1193
|
+
aborted: mergedResults.filter(r => r.aborted).length,
|
|
1194
|
+
false_positives: mergedResults.filter(r => r.false_positive).length,
|
|
1195
|
+
},
|
|
1196
|
+
};
|
|
1197
|
+
|
|
1198
|
+
// Collect manual verifications (same pattern as cmdMapResults)
|
|
1199
|
+
const files = fs.readdirSync(phaseDirFull);
|
|
1200
|
+
let validationContent = null;
|
|
1201
|
+
const validationFile = files.find(f => /-VALIDATION\.md$/i.test(f) || f === 'VALIDATION.md');
|
|
1202
|
+
if (validationFile) {
|
|
1203
|
+
validationContent = safeReadFile(path.join(phaseDirFull, validationFile));
|
|
1204
|
+
}
|
|
1205
|
+
const planContents = [];
|
|
1206
|
+
const planFiles = files.filter(f => f.endsWith('-PLAN.md')).sort();
|
|
1207
|
+
for (const planFile of planFiles) {
|
|
1208
|
+
const planId = planFile.replace('-PLAN.md', '');
|
|
1209
|
+
const content = safeReadFile(path.join(phaseDirFull, planFile));
|
|
1210
|
+
planContents.push({ content, planId });
|
|
1211
|
+
}
|
|
1212
|
+
const manualVerifications = collectManualVerifications(validationContent, planContents);
|
|
1213
|
+
|
|
1214
|
+
// Build phase info (same pattern as cmdMapResults)
|
|
1215
|
+
const sourceFiles = files.filter(f => f.endsWith('-SUMMARY.md'));
|
|
1216
|
+
const mapPhaseInfo = {
|
|
1217
|
+
phase: phaseInfo.name || phaseDir.split('/').pop(),
|
|
1218
|
+
phaseDir: phaseDir,
|
|
1219
|
+
sourceFiles: sourceFiles,
|
|
1220
|
+
};
|
|
1221
|
+
|
|
1222
|
+
// Regenerate full UAT with merged results
|
|
1223
|
+
const newUatContent = mapResultsToUat(mergedExecutionResult, manualVerifications, mapPhaseInfo);
|
|
1224
|
+
fs.writeFileSync(uatPath, newUatContent, 'utf-8');
|
|
1225
|
+
|
|
1226
|
+
// Generate test log for the rerun only (not merged)
|
|
1227
|
+
const rerunExecutionResult = {
|
|
1228
|
+
results: rerunResult.results,
|
|
1229
|
+
summary: {
|
|
1230
|
+
total: rerunResult.results.length,
|
|
1231
|
+
passed: rerunResult.results.filter(r => r.passed).length,
|
|
1232
|
+
failed: rerunResult.results.filter(r => !r.passed).length,
|
|
1233
|
+
timed_out: rerunResult.results.filter(r => r.timed_out).length,
|
|
1234
|
+
aborted: 0,
|
|
1235
|
+
false_positives: rerunResult.results.filter(r => r.false_positive).length,
|
|
1236
|
+
},
|
|
1237
|
+
};
|
|
1238
|
+
const logContent = generateTestLog(rerunExecutionResult);
|
|
1239
|
+
const logPath = path.join(phaseDirFull, `${phaseNum}-UAT-LOG.md`);
|
|
1240
|
+
fs.writeFileSync(logPath, logContent, 'utf-8');
|
|
1241
|
+
|
|
1242
|
+
// Determine status
|
|
1243
|
+
const allPassed = rerunResult.all_passed;
|
|
1244
|
+
const status = allPassed ? 'complete' : 'gaps_found';
|
|
1245
|
+
const passedRerun = rerunResult.results.filter(r => r.passed).length;
|
|
1246
|
+
const failedRerun = rerunResult.results.filter(r => !r.passed).length;
|
|
1247
|
+
|
|
1248
|
+
// Extract structural gaps for the workflow to pass to the verifier
|
|
1249
|
+
const structuralGaps = parseUatForStructuralGaps(uatContent);
|
|
1250
|
+
|
|
1251
|
+
output({
|
|
1252
|
+
uat_path: path.relative(cwd, uatPath),
|
|
1253
|
+
log_path: path.relative(cwd, logPath),
|
|
1254
|
+
rerun_count: rerunResult.rerun_count,
|
|
1255
|
+
all_passed: allPassed,
|
|
1256
|
+
status,
|
|
1257
|
+
summary: `${passedRerun} passed, ${failedRerun} failed (of ${rerunResult.rerun_count} rerun)`,
|
|
1258
|
+
structural_gaps: structuralGaps,
|
|
1259
|
+
structural_gap_count: structuralGaps.length,
|
|
1260
|
+
}, raw);
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
// --- CLI: Check Diagnosis ----------------------------------------------------
|
|
1264
|
+
|
|
1265
|
+
/**
|
|
1266
|
+
* CLI wrapper that reads the UAT file for a phase and checks if diagnosis is needed.
|
|
1267
|
+
*
|
|
1268
|
+
* @param {string} cwd - Working directory
|
|
1269
|
+
* @param {string} phase - Phase number/identifier
|
|
1270
|
+
* @param {boolean} raw - Raw output mode
|
|
1271
|
+
*/
|
|
1272
|
+
function cmdCheckDiagnosis(cwd, phase, raw) {
|
|
1273
|
+
if (!phase) {
|
|
1274
|
+
error('phase required');
|
|
1275
|
+
}
|
|
1276
|
+
const phaseInfo = findPhaseInternal(cwd, phase);
|
|
1277
|
+
if (!phaseInfo || !phaseInfo.found) {
|
|
1278
|
+
output({ error: 'Phase not found', phase }, raw);
|
|
1279
|
+
return;
|
|
1280
|
+
}
|
|
1281
|
+
const phaseDirFull = path.join(cwd, phaseInfo.directory);
|
|
1282
|
+
const phaseNum = phaseInfo.directory.split('/').pop().match(/^(\d+)/)?.[1] || phase;
|
|
1283
|
+
const uatPath = path.join(phaseDirFull, `${phaseNum}-UAT.md`);
|
|
1284
|
+
const uatContent = safeReadFile(uatPath);
|
|
1285
|
+
if (!uatContent) {
|
|
1286
|
+
output({ needs_diagnosis: false, reason: 'No UAT file found', uat_path: null }, raw);
|
|
1287
|
+
return;
|
|
1288
|
+
}
|
|
1289
|
+
const needsDiag = checkNeedsDiagnosis(uatContent);
|
|
1290
|
+
output({
|
|
1291
|
+
needs_diagnosis: needsDiag,
|
|
1292
|
+
uat_path: path.relative(cwd, uatPath),
|
|
1293
|
+
status: needsDiag ? 'gaps_found' : 'complete',
|
|
1294
|
+
}, raw);
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
/**
|
|
1298
|
+
* CLI wrapper that reads a phase's UAT file and returns structured JSON status.
|
|
1299
|
+
* Enables the job orchestrator to check phase audit status programmatically.
|
|
1300
|
+
*
|
|
1301
|
+
* @param {string} cwd - Working directory
|
|
1302
|
+
* @param {string} phase - Phase number/identifier
|
|
1303
|
+
* @param {boolean} raw - Raw output mode
|
|
1304
|
+
*/
|
|
1305
|
+
function cmdPhaseUatStatus(cwd, phase, raw) {
|
|
1306
|
+
if (!phase) {
|
|
1307
|
+
error('phase required');
|
|
1308
|
+
}
|
|
1309
|
+
const phaseInfo = findPhaseInternal(cwd, phase);
|
|
1310
|
+
if (!phaseInfo || !phaseInfo.found) {
|
|
1311
|
+
output({ error: 'Phase not found', phase }, raw);
|
|
1312
|
+
return;
|
|
1313
|
+
}
|
|
1314
|
+
const phaseDirFull = path.join(cwd, phaseInfo.directory);
|
|
1315
|
+
const phaseNum = phaseInfo.directory.split('/').pop().match(/^(\d+)/)?.[1] || phase;
|
|
1316
|
+
const uatPath = path.join(phaseDirFull, `${phaseNum}-UAT.md`);
|
|
1317
|
+
const uatContent = safeReadFile(uatPath);
|
|
1318
|
+
if (!uatContent) {
|
|
1319
|
+
output({ status: null, uat_path: null, phase: phaseNum, reason: 'no_uat_file' }, raw);
|
|
1320
|
+
return;
|
|
1321
|
+
}
|
|
1322
|
+
const fm = extractFrontmatter(uatContent);
|
|
1323
|
+
const summary = {};
|
|
1324
|
+
const countFields = ['total', 'passed', 'issues', 'human_needed', 'pending', 'skipped'];
|
|
1325
|
+
for (const field of countFields) {
|
|
1326
|
+
const match = uatContent.match(new RegExp(`^${field}:\\s*(\\d+)`, 'm'));
|
|
1327
|
+
summary[field] = match ? parseInt(match[1], 10) : 0;
|
|
1328
|
+
}
|
|
1329
|
+
output({
|
|
1330
|
+
status: fm.status || null,
|
|
1331
|
+
phase: phaseNum,
|
|
1332
|
+
uat_path: path.relative(cwd, uatPath),
|
|
1333
|
+
mode: fm.mode || null,
|
|
1334
|
+
ai_verified: fm.ai_verified === 'true' || fm.ai_verified === true || false,
|
|
1335
|
+
summary,
|
|
1336
|
+
}, raw);
|
|
1337
|
+
}
|
|
1338
|
+
|
|
1339
|
+
module.exports = {
|
|
1340
|
+
parseValidationMd,
|
|
1341
|
+
parsePlanVerifyBlocks,
|
|
1342
|
+
deduplicateCommands,
|
|
1343
|
+
collectTestCommands,
|
|
1344
|
+
cmdCollectTestCommands,
|
|
1345
|
+
runCommand,
|
|
1346
|
+
checkFalsePositive,
|
|
1347
|
+
checkSuspect,
|
|
1348
|
+
isCommandCovered,
|
|
1349
|
+
classifyFailure,
|
|
1350
|
+
executeTestSuite,
|
|
1351
|
+
parseFrameworkOutput,
|
|
1352
|
+
cmdRunTestSuite,
|
|
1353
|
+
cmdMapResults,
|
|
1354
|
+
inferSeverity,
|
|
1355
|
+
collectManualVerifications,
|
|
1356
|
+
mapResultsToUat,
|
|
1357
|
+
checkNeedsDiagnosis,
|
|
1358
|
+
cmdCheckDiagnosis,
|
|
1359
|
+
cmdPhaseUatStatus,
|
|
1360
|
+
generateTestLog,
|
|
1361
|
+
parseUatForFailedCommands,
|
|
1362
|
+
parseUatForStructuralGaps,
|
|
1363
|
+
rerunFailedTests,
|
|
1364
|
+
cmdRerunFailed,
|
|
1365
|
+
};
|