codeharness 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +186 -33
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -919,6 +919,14 @@ function storyVerificationPatch() {
|
|
|
919
919
|
- [ ] All acceptance criteria verified with real-world evidence
|
|
920
920
|
- [ ] Test coverage meets target (100%)
|
|
921
921
|
|
|
922
|
+
### Verification Tags
|
|
923
|
+
|
|
924
|
+
For each AC, append a verification tag to indicate how it can be verified:
|
|
925
|
+
- \`<!-- verification: cli-verifiable -->\` \u2014 AC can be verified by running CLI commands in a subprocess
|
|
926
|
+
- \`<!-- verification: integration-required -->\` \u2014 AC requires integration testing, multi-system interaction, or manual verification
|
|
927
|
+
|
|
928
|
+
ACs referencing workflows, sprint planning, user sessions, or external system interactions should be tagged as \`integration-required\`. If no tag is present, a heuristic classifier will attempt to determine verifiability at runtime.
|
|
929
|
+
|
|
922
930
|
## Documentation Requirements
|
|
923
931
|
|
|
924
932
|
- [ ] Relevant AGENTS.md files updated (list modules touched)
|
|
@@ -1340,7 +1348,7 @@ function importStoriesToBeads(stories, opts, beadsFns) {
|
|
|
1340
1348
|
}
|
|
1341
1349
|
|
|
1342
1350
|
// src/commands/init.ts
|
|
1343
|
-
var HARNESS_VERSION = true ? "0.
|
|
1351
|
+
var HARNESS_VERSION = true ? "0.9.0" : "0.0.0-dev";
|
|
1344
1352
|
function getStackLabel(stack) {
|
|
1345
1353
|
if (stack === "nodejs") return "Node.js (package.json)";
|
|
1346
1354
|
if (stack === "python") return "Python";
|
|
@@ -2573,6 +2581,29 @@ var DB_KEYWORDS = [
|
|
|
2573
2581
|
"sql",
|
|
2574
2582
|
"table"
|
|
2575
2583
|
];
|
|
2584
|
+
var INTEGRATION_KEYWORDS = [
|
|
2585
|
+
"sprint planning",
|
|
2586
|
+
"workflow",
|
|
2587
|
+
"run /command",
|
|
2588
|
+
"user session",
|
|
2589
|
+
"multi-step",
|
|
2590
|
+
"external system",
|
|
2591
|
+
"real infrastructure",
|
|
2592
|
+
"integration test",
|
|
2593
|
+
"manual verification"
|
|
2594
|
+
];
|
|
2595
|
+
function classifyVerifiability(description) {
|
|
2596
|
+
const lower = description.toLowerCase();
|
|
2597
|
+
for (const kw of INTEGRATION_KEYWORDS) {
|
|
2598
|
+
if (lower.includes(kw)) return "integration-required";
|
|
2599
|
+
}
|
|
2600
|
+
return "cli-verifiable";
|
|
2601
|
+
}
|
|
2602
|
+
var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required)\s*-->/;
|
|
2603
|
+
function parseVerificationTag(text) {
|
|
2604
|
+
const match = VERIFICATION_TAG_PATTERN.exec(text);
|
|
2605
|
+
return match ? match[1] : null;
|
|
2606
|
+
}
|
|
2576
2607
|
function classifyAC(description) {
|
|
2577
2608
|
const lower = description.toLowerCase();
|
|
2578
2609
|
for (const kw of UI_KEYWORDS) {
|
|
@@ -2622,10 +2653,13 @@ function parseStoryACs(storyFilePath) {
|
|
|
2622
2653
|
if (currentId !== null && currentDesc.length > 0) {
|
|
2623
2654
|
const description = currentDesc.join(" ").trim();
|
|
2624
2655
|
if (description) {
|
|
2656
|
+
const tag = parseVerificationTag(description);
|
|
2657
|
+
const verifiability = tag ?? classifyVerifiability(description);
|
|
2625
2658
|
acs.push({
|
|
2626
2659
|
id: currentId,
|
|
2627
2660
|
description,
|
|
2628
|
-
type: classifyAC(description)
|
|
2661
|
+
type: classifyAC(description),
|
|
2662
|
+
verifiability
|
|
2629
2663
|
});
|
|
2630
2664
|
} else {
|
|
2631
2665
|
warn(`Skipping malformed AC #${currentId}: empty description`);
|
|
@@ -2764,6 +2798,62 @@ function getNewestSourceMtime(dir) {
|
|
|
2764
2798
|
walk(dir);
|
|
2765
2799
|
return newest;
|
|
2766
2800
|
}
|
|
2801
|
+
function getSourceFilesInModule(modulePath) {
|
|
2802
|
+
const files = [];
|
|
2803
|
+
function walk(current) {
|
|
2804
|
+
let entries;
|
|
2805
|
+
try {
|
|
2806
|
+
entries = readdirSync2(current);
|
|
2807
|
+
} catch {
|
|
2808
|
+
return;
|
|
2809
|
+
}
|
|
2810
|
+
const dirName = current.split("/").pop() ?? "";
|
|
2811
|
+
if (dirName === "node_modules" || dirName === ".git" || dirName === "__tests__" || dirName === "dist" || dirName === "coverage" || dirName.startsWith(".") && current !== modulePath) return;
|
|
2812
|
+
for (const entry of entries) {
|
|
2813
|
+
const fullPath = join9(current, entry);
|
|
2814
|
+
let stat;
|
|
2815
|
+
try {
|
|
2816
|
+
stat = statSync(fullPath);
|
|
2817
|
+
} catch {
|
|
2818
|
+
continue;
|
|
2819
|
+
}
|
|
2820
|
+
if (stat.isDirectory()) {
|
|
2821
|
+
walk(fullPath);
|
|
2822
|
+
} else if (stat.isFile()) {
|
|
2823
|
+
const ext = getExtension(entry);
|
|
2824
|
+
if (SOURCE_EXTENSIONS.has(ext) && !isTestFile(entry)) {
|
|
2825
|
+
files.push(entry);
|
|
2826
|
+
}
|
|
2827
|
+
}
|
|
2828
|
+
}
|
|
2829
|
+
}
|
|
2830
|
+
walk(modulePath);
|
|
2831
|
+
return files;
|
|
2832
|
+
}
|
|
2833
|
+
function getMentionedFilesInAgentsMd(agentsPath) {
|
|
2834
|
+
if (!existsSync11(agentsPath)) return [];
|
|
2835
|
+
const content = readFileSync9(agentsPath, "utf-8");
|
|
2836
|
+
const mentioned = /* @__PURE__ */ new Set();
|
|
2837
|
+
const filenamePattern = /[\w./-]*[\w-]+\.(?:ts|js|py)\b/g;
|
|
2838
|
+
let match;
|
|
2839
|
+
while ((match = filenamePattern.exec(content)) !== null) {
|
|
2840
|
+
const fullMatch = match[0];
|
|
2841
|
+
const basename3 = fullMatch.split("/").pop();
|
|
2842
|
+
if (!isTestFile(basename3)) {
|
|
2843
|
+
mentioned.add(basename3);
|
|
2844
|
+
}
|
|
2845
|
+
}
|
|
2846
|
+
return Array.from(mentioned);
|
|
2847
|
+
}
|
|
2848
|
+
function checkAgentsMdCompleteness(agentsPath, modulePath) {
|
|
2849
|
+
const sourceFiles = getSourceFilesInModule(modulePath);
|
|
2850
|
+
const mentionedFiles = new Set(getMentionedFilesInAgentsMd(agentsPath));
|
|
2851
|
+
const missing = sourceFiles.filter((f) => !mentionedFiles.has(f));
|
|
2852
|
+
return {
|
|
2853
|
+
complete: missing.length === 0,
|
|
2854
|
+
missing
|
|
2855
|
+
};
|
|
2856
|
+
}
|
|
2767
2857
|
function checkAgentsMdForModule(modulePath, dir) {
|
|
2768
2858
|
const root = dir ?? process.cwd();
|
|
2769
2859
|
const fullModulePath = join9(root, modulePath);
|
|
@@ -2782,13 +2872,15 @@ function checkAgentsMdForModule(modulePath, dir) {
|
|
|
2782
2872
|
}
|
|
2783
2873
|
const docMtime = statSync(agentsPath).mtime;
|
|
2784
2874
|
const codeMtime = getNewestSourceMtime(fullModulePath);
|
|
2785
|
-
|
|
2875
|
+
const { complete, missing } = checkAgentsMdCompleteness(agentsPath, fullModulePath);
|
|
2876
|
+
if (!complete) {
|
|
2877
|
+
const missingList = missing.join(", ");
|
|
2786
2878
|
return {
|
|
2787
2879
|
path: relative(root, agentsPath),
|
|
2788
2880
|
grade: "stale",
|
|
2789
2881
|
lastModified: docMtime,
|
|
2790
2882
|
codeLastModified: codeMtime,
|
|
2791
|
-
reason: `AGENTS.md stale for module: ${modulePath}`
|
|
2883
|
+
reason: `AGENTS.md stale for module: ${modulePath} \u2014 missing: ${missingList}`
|
|
2792
2884
|
};
|
|
2793
2885
|
}
|
|
2794
2886
|
return {
|
|
@@ -2818,22 +2910,30 @@ function scanDocHealth(dir) {
|
|
|
2818
2910
|
if (existsSync11(rootAgentsPath)) {
|
|
2819
2911
|
if (modules.length > 0) {
|
|
2820
2912
|
const docMtime = statSync(rootAgentsPath).mtime;
|
|
2821
|
-
let
|
|
2913
|
+
let allMissing = [];
|
|
2822
2914
|
let staleModule = "";
|
|
2915
|
+
let newestCode = null;
|
|
2823
2916
|
for (const mod of modules) {
|
|
2824
|
-
const
|
|
2917
|
+
const fullModPath = join9(root, mod);
|
|
2918
|
+
const modAgentsPath = join9(fullModPath, "AGENTS.md");
|
|
2919
|
+
if (existsSync11(modAgentsPath)) continue;
|
|
2920
|
+
const { missing } = checkAgentsMdCompleteness(rootAgentsPath, fullModPath);
|
|
2921
|
+
if (missing.length > 0 && staleModule === "") {
|
|
2922
|
+
staleModule = mod;
|
|
2923
|
+
allMissing = missing;
|
|
2924
|
+
}
|
|
2925
|
+
const modMtime = getNewestSourceMtime(fullModPath);
|
|
2825
2926
|
if (modMtime !== null && (newestCode === null || modMtime.getTime() > newestCode.getTime())) {
|
|
2826
2927
|
newestCode = modMtime;
|
|
2827
|
-
staleModule = mod;
|
|
2828
2928
|
}
|
|
2829
2929
|
}
|
|
2830
|
-
if (
|
|
2930
|
+
if (allMissing.length > 0) {
|
|
2831
2931
|
documents.push({
|
|
2832
2932
|
path: "AGENTS.md",
|
|
2833
2933
|
grade: "stale",
|
|
2834
2934
|
lastModified: docMtime,
|
|
2835
2935
|
codeLastModified: newestCode,
|
|
2836
|
-
reason: `AGENTS.md stale for module: ${staleModule}`
|
|
2936
|
+
reason: `AGENTS.md stale for module: ${staleModule} \u2014 missing: ${allMissing.join(", ")}`
|
|
2837
2937
|
});
|
|
2838
2938
|
} else {
|
|
2839
2939
|
documents.push({
|
|
@@ -3189,10 +3289,44 @@ function runShowboatVerify(proofPath) {
|
|
|
3189
3289
|
return { passed: false, output: stdout || stderr || message };
|
|
3190
3290
|
}
|
|
3191
3291
|
}
|
|
3192
|
-
function
|
|
3193
|
-
if (!existsSync12(proofPath))
|
|
3292
|
+
function validateProofQuality(proofPath) {
|
|
3293
|
+
if (!existsSync12(proofPath)) {
|
|
3294
|
+
return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
|
|
3295
|
+
}
|
|
3194
3296
|
const content = readFileSync10(proofPath, "utf-8");
|
|
3195
|
-
|
|
3297
|
+
const acHeaderPattern = /^## AC \d+:/gm;
|
|
3298
|
+
const matches = [...content.matchAll(acHeaderPattern)];
|
|
3299
|
+
if (matches.length === 0) {
|
|
3300
|
+
return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
|
|
3301
|
+
}
|
|
3302
|
+
let verified = 0;
|
|
3303
|
+
let pending = 0;
|
|
3304
|
+
let escalated = 0;
|
|
3305
|
+
for (let i = 0; i < matches.length; i++) {
|
|
3306
|
+
const start = matches[i].index;
|
|
3307
|
+
const end = i + 1 < matches.length ? matches[i + 1].index : content.length;
|
|
3308
|
+
const section = content.slice(start, end);
|
|
3309
|
+
if (section.includes("[ESCALATE]")) {
|
|
3310
|
+
escalated++;
|
|
3311
|
+
continue;
|
|
3312
|
+
}
|
|
3313
|
+
const hasEvidence = section.includes("<!-- /showboat exec -->") || section.includes("<!-- showboat image:") || /```(?:bash|shell)\n[\s\S]*?```\n+```output\n/m.test(section);
|
|
3314
|
+
if (hasEvidence) {
|
|
3315
|
+
verified++;
|
|
3316
|
+
} else {
|
|
3317
|
+
pending++;
|
|
3318
|
+
}
|
|
3319
|
+
}
|
|
3320
|
+
const total = verified + pending + escalated;
|
|
3321
|
+
return {
|
|
3322
|
+
verified,
|
|
3323
|
+
pending,
|
|
3324
|
+
escalated,
|
|
3325
|
+
total,
|
|
3326
|
+
// Proof passes when no pending ACs remain and at least one is verified.
|
|
3327
|
+
// Escalated ACs are allowed — they are explicitly acknowledged as unverifiable.
|
|
3328
|
+
passed: pending === 0 && verified > 0
|
|
3329
|
+
};
|
|
3196
3330
|
}
|
|
3197
3331
|
function updateVerificationState(storyId, result, dir) {
|
|
3198
3332
|
const { state, body } = readStateWithBody(dir);
|
|
@@ -3333,36 +3467,52 @@ function verifyStory(storyId, isJson, root) {
|
|
|
3333
3467
|
return;
|
|
3334
3468
|
}
|
|
3335
3469
|
const storyTitle = extractStoryTitle(storyFilePath);
|
|
3336
|
-
const
|
|
3337
|
-
|
|
3338
|
-
|
|
3339
|
-
|
|
3340
|
-
if (
|
|
3341
|
-
|
|
3342
|
-
|
|
3470
|
+
const expectedProofPath = join11(root, "verification", `${storyId}-proof.md`);
|
|
3471
|
+
const proofPath = existsSync13(expectedProofPath) ? expectedProofPath : createProofDocument(storyId, storyTitle, acs, root);
|
|
3472
|
+
const proofQuality = validateProofQuality(proofPath);
|
|
3473
|
+
if (!proofQuality.passed) {
|
|
3474
|
+
if (isJson) {
|
|
3475
|
+
jsonOutput({
|
|
3476
|
+
status: "fail",
|
|
3477
|
+
message: `Proof quality check failed: ${proofQuality.verified}/${proofQuality.total} ACs verified`,
|
|
3478
|
+
proofQuality: { verified: proofQuality.verified, pending: proofQuality.pending, escalated: proofQuality.escalated, total: proofQuality.total }
|
|
3479
|
+
});
|
|
3343
3480
|
} else {
|
|
3344
|
-
|
|
3345
|
-
|
|
3346
|
-
|
|
3347
|
-
|
|
3348
|
-
|
|
3349
|
-
|
|
3481
|
+
fail(`Proof quality check failed: ${proofQuality.verified}/${proofQuality.total} ACs verified`);
|
|
3482
|
+
}
|
|
3483
|
+
process.exitCode = 1;
|
|
3484
|
+
return;
|
|
3485
|
+
}
|
|
3486
|
+
if (proofQuality.escalated > 0) {
|
|
3487
|
+
warn(`Story ${storyId} has ${proofQuality.escalated} ACs requiring integration verification`);
|
|
3488
|
+
info("Run these ACs manually or in a dedicated verification session");
|
|
3489
|
+
}
|
|
3490
|
+
let showboatStatus = "skipped";
|
|
3491
|
+
const showboatResult = runShowboatVerify(proofPath);
|
|
3492
|
+
if (showboatResult.output === "showboat not available") {
|
|
3493
|
+
showboatStatus = "skipped";
|
|
3494
|
+
warn("Showboat not installed \u2014 skipping re-verification");
|
|
3495
|
+
} else {
|
|
3496
|
+
showboatStatus = showboatResult.passed ? "pass" : "fail";
|
|
3497
|
+
if (!showboatResult.passed) {
|
|
3498
|
+
fail(`Showboat verify failed: ${showboatResult.output}`, { json: isJson });
|
|
3499
|
+
process.exitCode = 1;
|
|
3500
|
+
return;
|
|
3350
3501
|
}
|
|
3351
3502
|
}
|
|
3352
|
-
const acsVerified = showboatStatus === "pass";
|
|
3353
|
-
const verifiedCount = acsVerified ? acs.length : 0;
|
|
3354
3503
|
const result = {
|
|
3355
3504
|
storyId,
|
|
3356
3505
|
success: true,
|
|
3357
|
-
totalACs:
|
|
3358
|
-
verifiedCount,
|
|
3359
|
-
failedCount:
|
|
3506
|
+
totalACs: proofQuality.total,
|
|
3507
|
+
verifiedCount: proofQuality.verified,
|
|
3508
|
+
failedCount: proofQuality.pending,
|
|
3509
|
+
escalatedCount: proofQuality.escalated,
|
|
3360
3510
|
proofPath: `verification/${storyId}-proof.md`,
|
|
3361
3511
|
showboatVerifyStatus: showboatStatus,
|
|
3362
3512
|
perAC: acs.map((ac) => ({
|
|
3363
3513
|
id: ac.id,
|
|
3364
3514
|
description: ac.description,
|
|
3365
|
-
verified:
|
|
3515
|
+
verified: true,
|
|
3366
3516
|
evidencePaths: []
|
|
3367
3517
|
}))
|
|
3368
3518
|
};
|
|
@@ -3394,7 +3544,10 @@ function verifyStory(storyId, isJson, root) {
|
|
|
3394
3544
|
warn(`Failed to complete exec-plan: ${message}`);
|
|
3395
3545
|
}
|
|
3396
3546
|
if (isJson) {
|
|
3397
|
-
jsonOutput(
|
|
3547
|
+
jsonOutput({
|
|
3548
|
+
...result,
|
|
3549
|
+
proofQuality: { verified: proofQuality.verified, pending: proofQuality.pending, escalated: proofQuality.escalated, total: proofQuality.total }
|
|
3550
|
+
});
|
|
3398
3551
|
} else {
|
|
3399
3552
|
ok(`Story ${storyId}: verified \u2014 proof at verification/${storyId}-proof.md`);
|
|
3400
3553
|
}
|
|
@@ -6630,7 +6783,7 @@ function registerGithubImportCommand(program) {
|
|
|
6630
6783
|
}
|
|
6631
6784
|
|
|
6632
6785
|
// src/index.ts
|
|
6633
|
-
var VERSION = true ? "0.
|
|
6786
|
+
var VERSION = true ? "0.9.0" : "0.0.0-dev";
|
|
6634
6787
|
function createProgram() {
|
|
6635
6788
|
const program = new Command();
|
|
6636
6789
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|