codeharness 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +186 -33
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -919,6 +919,14 @@ function storyVerificationPatch() {
919
919
  - [ ] All acceptance criteria verified with real-world evidence
920
920
  - [ ] Test coverage meets target (100%)
921
921
 
922
+ ### Verification Tags
923
+
924
+ For each AC, append a verification tag to indicate how it can be verified:
925
+ - \`<!-- verification: cli-verifiable -->\` \u2014 AC can be verified by running CLI commands in a subprocess
926
+ - \`<!-- verification: integration-required -->\` \u2014 AC requires integration testing, multi-system interaction, or manual verification
927
+
928
+ ACs referencing workflows, sprint planning, user sessions, or external system interactions should be tagged as \`integration-required\`. If no tag is present, a heuristic classifier will attempt to determine verifiability at runtime.
929
+
922
930
  ## Documentation Requirements
923
931
 
924
932
  - [ ] Relevant AGENTS.md files updated (list modules touched)
@@ -1340,7 +1348,7 @@ function importStoriesToBeads(stories, opts, beadsFns) {
1340
1348
  }
1341
1349
 
1342
1350
  // src/commands/init.ts
1343
- var HARNESS_VERSION = true ? "0.8.0" : "0.0.0-dev";
1351
+ var HARNESS_VERSION = true ? "0.9.0" : "0.0.0-dev";
1344
1352
  function getStackLabel(stack) {
1345
1353
  if (stack === "nodejs") return "Node.js (package.json)";
1346
1354
  if (stack === "python") return "Python";
@@ -2573,6 +2581,29 @@ var DB_KEYWORDS = [
2573
2581
  "sql",
2574
2582
  "table"
2575
2583
  ];
2584
+ var INTEGRATION_KEYWORDS = [
2585
+ "sprint planning",
2586
+ "workflow",
2587
+ "run /command",
2588
+ "user session",
2589
+ "multi-step",
2590
+ "external system",
2591
+ "real infrastructure",
2592
+ "integration test",
2593
+ "manual verification"
2594
+ ];
2595
+ function classifyVerifiability(description) {
2596
+ const lower = description.toLowerCase();
2597
+ for (const kw of INTEGRATION_KEYWORDS) {
2598
+ if (lower.includes(kw)) return "integration-required";
2599
+ }
2600
+ return "cli-verifiable";
2601
+ }
2602
+ var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required)\s*-->/;
2603
+ function parseVerificationTag(text) {
2604
+ const match = VERIFICATION_TAG_PATTERN.exec(text);
2605
+ return match ? match[1] : null;
2606
+ }
2576
2607
  function classifyAC(description) {
2577
2608
  const lower = description.toLowerCase();
2578
2609
  for (const kw of UI_KEYWORDS) {
@@ -2622,10 +2653,13 @@ function parseStoryACs(storyFilePath) {
2622
2653
  if (currentId !== null && currentDesc.length > 0) {
2623
2654
  const description = currentDesc.join(" ").trim();
2624
2655
  if (description) {
2656
+ const tag = parseVerificationTag(description);
2657
+ const verifiability = tag ?? classifyVerifiability(description);
2625
2658
  acs.push({
2626
2659
  id: currentId,
2627
2660
  description,
2628
- type: classifyAC(description)
2661
+ type: classifyAC(description),
2662
+ verifiability
2629
2663
  });
2630
2664
  } else {
2631
2665
  warn(`Skipping malformed AC #${currentId}: empty description`);
@@ -2764,6 +2798,62 @@ function getNewestSourceMtime(dir) {
2764
2798
  walk(dir);
2765
2799
  return newest;
2766
2800
  }
2801
+ function getSourceFilesInModule(modulePath) {
2802
+ const files = [];
2803
+ function walk(current) {
2804
+ let entries;
2805
+ try {
2806
+ entries = readdirSync2(current);
2807
+ } catch {
2808
+ return;
2809
+ }
2810
+ const dirName = current.split("/").pop() ?? "";
2811
+ if (dirName === "node_modules" || dirName === ".git" || dirName === "__tests__" || dirName === "dist" || dirName === "coverage" || dirName.startsWith(".") && current !== modulePath) return;
2812
+ for (const entry of entries) {
2813
+ const fullPath = join9(current, entry);
2814
+ let stat;
2815
+ try {
2816
+ stat = statSync(fullPath);
2817
+ } catch {
2818
+ continue;
2819
+ }
2820
+ if (stat.isDirectory()) {
2821
+ walk(fullPath);
2822
+ } else if (stat.isFile()) {
2823
+ const ext = getExtension(entry);
2824
+ if (SOURCE_EXTENSIONS.has(ext) && !isTestFile(entry)) {
2825
+ files.push(entry);
2826
+ }
2827
+ }
2828
+ }
2829
+ }
2830
+ walk(modulePath);
2831
+ return files;
2832
+ }
2833
+ function getMentionedFilesInAgentsMd(agentsPath) {
2834
+ if (!existsSync11(agentsPath)) return [];
2835
+ const content = readFileSync9(agentsPath, "utf-8");
2836
+ const mentioned = /* @__PURE__ */ new Set();
2837
+ const filenamePattern = /[\w./-]*[\w-]+\.(?:ts|js|py)\b/g;
2838
+ let match;
2839
+ while ((match = filenamePattern.exec(content)) !== null) {
2840
+ const fullMatch = match[0];
2841
+ const basename3 = fullMatch.split("/").pop();
2842
+ if (!isTestFile(basename3)) {
2843
+ mentioned.add(basename3);
2844
+ }
2845
+ }
2846
+ return Array.from(mentioned);
2847
+ }
2848
+ function checkAgentsMdCompleteness(agentsPath, modulePath) {
2849
+ const sourceFiles = getSourceFilesInModule(modulePath);
2850
+ const mentionedFiles = new Set(getMentionedFilesInAgentsMd(agentsPath));
2851
+ const missing = sourceFiles.filter((f) => !mentionedFiles.has(f));
2852
+ return {
2853
+ complete: missing.length === 0,
2854
+ missing
2855
+ };
2856
+ }
2767
2857
  function checkAgentsMdForModule(modulePath, dir) {
2768
2858
  const root = dir ?? process.cwd();
2769
2859
  const fullModulePath = join9(root, modulePath);
@@ -2782,13 +2872,15 @@ function checkAgentsMdForModule(modulePath, dir) {
2782
2872
  }
2783
2873
  const docMtime = statSync(agentsPath).mtime;
2784
2874
  const codeMtime = getNewestSourceMtime(fullModulePath);
2785
- if (codeMtime !== null && codeMtime.getTime() > docMtime.getTime()) {
2875
+ const { complete, missing } = checkAgentsMdCompleteness(agentsPath, fullModulePath);
2876
+ if (!complete) {
2877
+ const missingList = missing.join(", ");
2786
2878
  return {
2787
2879
  path: relative(root, agentsPath),
2788
2880
  grade: "stale",
2789
2881
  lastModified: docMtime,
2790
2882
  codeLastModified: codeMtime,
2791
- reason: `AGENTS.md stale for module: ${modulePath}`
2883
+ reason: `AGENTS.md stale for module: ${modulePath} \u2014 missing: ${missingList}`
2792
2884
  };
2793
2885
  }
2794
2886
  return {
@@ -2818,22 +2910,30 @@ function scanDocHealth(dir) {
2818
2910
  if (existsSync11(rootAgentsPath)) {
2819
2911
  if (modules.length > 0) {
2820
2912
  const docMtime = statSync(rootAgentsPath).mtime;
2821
- let newestCode = null;
2913
+ let allMissing = [];
2822
2914
  let staleModule = "";
2915
+ let newestCode = null;
2823
2916
  for (const mod of modules) {
2824
- const modMtime = getNewestSourceMtime(join9(root, mod));
2917
+ const fullModPath = join9(root, mod);
2918
+ const modAgentsPath = join9(fullModPath, "AGENTS.md");
2919
+ if (existsSync11(modAgentsPath)) continue;
2920
+ const { missing } = checkAgentsMdCompleteness(rootAgentsPath, fullModPath);
2921
+ if (missing.length > 0 && staleModule === "") {
2922
+ staleModule = mod;
2923
+ allMissing = missing;
2924
+ }
2925
+ const modMtime = getNewestSourceMtime(fullModPath);
2825
2926
  if (modMtime !== null && (newestCode === null || modMtime.getTime() > newestCode.getTime())) {
2826
2927
  newestCode = modMtime;
2827
- staleModule = mod;
2828
2928
  }
2829
2929
  }
2830
- if (newestCode !== null && newestCode.getTime() > docMtime.getTime()) {
2930
+ if (allMissing.length > 0) {
2831
2931
  documents.push({
2832
2932
  path: "AGENTS.md",
2833
2933
  grade: "stale",
2834
2934
  lastModified: docMtime,
2835
2935
  codeLastModified: newestCode,
2836
- reason: `AGENTS.md stale for module: ${staleModule}`
2936
+ reason: `AGENTS.md stale for module: ${staleModule} \u2014 missing: ${allMissing.join(", ")}`
2837
2937
  });
2838
2938
  } else {
2839
2939
  documents.push({
@@ -3189,10 +3289,44 @@ function runShowboatVerify(proofPath) {
3189
3289
  return { passed: false, output: stdout || stderr || message };
3190
3290
  }
3191
3291
  }
3192
- function proofHasContent(proofPath) {
3193
- if (!existsSync12(proofPath)) return false;
3292
+ function validateProofQuality(proofPath) {
3293
+ if (!existsSync12(proofPath)) {
3294
+ return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
3295
+ }
3194
3296
  const content = readFileSync10(proofPath, "utf-8");
3195
- return content.includes("<!-- /showboat exec -->") || content.includes("<!-- showboat image:");
3297
+ const acHeaderPattern = /^## AC \d+:/gm;
3298
+ const matches = [...content.matchAll(acHeaderPattern)];
3299
+ if (matches.length === 0) {
3300
+ return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
3301
+ }
3302
+ let verified = 0;
3303
+ let pending = 0;
3304
+ let escalated = 0;
3305
+ for (let i = 0; i < matches.length; i++) {
3306
+ const start = matches[i].index;
3307
+ const end = i + 1 < matches.length ? matches[i + 1].index : content.length;
3308
+ const section = content.slice(start, end);
3309
+ if (section.includes("[ESCALATE]")) {
3310
+ escalated++;
3311
+ continue;
3312
+ }
3313
+ const hasEvidence = section.includes("<!-- /showboat exec -->") || section.includes("<!-- showboat image:") || /```(?:bash|shell)\n[\s\S]*?```\n+```output\n/m.test(section);
3314
+ if (hasEvidence) {
3315
+ verified++;
3316
+ } else {
3317
+ pending++;
3318
+ }
3319
+ }
3320
+ const total = verified + pending + escalated;
3321
+ return {
3322
+ verified,
3323
+ pending,
3324
+ escalated,
3325
+ total,
3326
+ // Proof passes when no pending ACs remain and at least one is verified.
3327
+ // Escalated ACs are allowed — they are explicitly acknowledged as unverifiable.
3328
+ passed: pending === 0 && verified > 0
3329
+ };
3196
3330
  }
3197
3331
  function updateVerificationState(storyId, result, dir) {
3198
3332
  const { state, body } = readStateWithBody(dir);
@@ -3333,36 +3467,52 @@ function verifyStory(storyId, isJson, root) {
3333
3467
  return;
3334
3468
  }
3335
3469
  const storyTitle = extractStoryTitle(storyFilePath);
3336
- const proofPath = createProofDocument(storyId, storyTitle, acs, root);
3337
- let showboatStatus = "skipped";
3338
- if (proofHasContent(proofPath)) {
3339
- const showboatResult = runShowboatVerify(proofPath);
3340
- if (showboatResult.output === "showboat not available") {
3341
- showboatStatus = "skipped";
3342
- warn("Showboat not installed \u2014 skipping re-verification");
3470
+ const expectedProofPath = join11(root, "verification", `${storyId}-proof.md`);
3471
+ const proofPath = existsSync13(expectedProofPath) ? expectedProofPath : createProofDocument(storyId, storyTitle, acs, root);
3472
+ const proofQuality = validateProofQuality(proofPath);
3473
+ if (!proofQuality.passed) {
3474
+ if (isJson) {
3475
+ jsonOutput({
3476
+ status: "fail",
3477
+ message: `Proof quality check failed: ${proofQuality.verified}/${proofQuality.total} ACs verified`,
3478
+ proofQuality: { verified: proofQuality.verified, pending: proofQuality.pending, escalated: proofQuality.escalated, total: proofQuality.total }
3479
+ });
3343
3480
  } else {
3344
- showboatStatus = showboatResult.passed ? "pass" : "fail";
3345
- if (!showboatResult.passed) {
3346
- fail(`Showboat verify failed: ${showboatResult.output}`, { json: isJson });
3347
- process.exitCode = 1;
3348
- return;
3349
- }
3481
+ fail(`Proof quality check failed: ${proofQuality.verified}/${proofQuality.total} ACs verified`);
3482
+ }
3483
+ process.exitCode = 1;
3484
+ return;
3485
+ }
3486
+ if (proofQuality.escalated > 0) {
3487
+ warn(`Story ${storyId} has ${proofQuality.escalated} ACs requiring integration verification`);
3488
+ info("Run these ACs manually or in a dedicated verification session");
3489
+ }
3490
+ let showboatStatus = "skipped";
3491
+ const showboatResult = runShowboatVerify(proofPath);
3492
+ if (showboatResult.output === "showboat not available") {
3493
+ showboatStatus = "skipped";
3494
+ warn("Showboat not installed \u2014 skipping re-verification");
3495
+ } else {
3496
+ showboatStatus = showboatResult.passed ? "pass" : "fail";
3497
+ if (!showboatResult.passed) {
3498
+ fail(`Showboat verify failed: ${showboatResult.output}`, { json: isJson });
3499
+ process.exitCode = 1;
3500
+ return;
3350
3501
  }
3351
3502
  }
3352
- const acsVerified = showboatStatus === "pass";
3353
- const verifiedCount = acsVerified ? acs.length : 0;
3354
3503
  const result = {
3355
3504
  storyId,
3356
3505
  success: true,
3357
- totalACs: acs.length,
3358
- verifiedCount,
3359
- failedCount: acs.length - verifiedCount,
3506
+ totalACs: proofQuality.total,
3507
+ verifiedCount: proofQuality.verified,
3508
+ failedCount: proofQuality.pending,
3509
+ escalatedCount: proofQuality.escalated,
3360
3510
  proofPath: `verification/${storyId}-proof.md`,
3361
3511
  showboatVerifyStatus: showboatStatus,
3362
3512
  perAC: acs.map((ac) => ({
3363
3513
  id: ac.id,
3364
3514
  description: ac.description,
3365
- verified: acsVerified,
3515
+ verified: true,
3366
3516
  evidencePaths: []
3367
3517
  }))
3368
3518
  };
@@ -3394,7 +3544,10 @@ function verifyStory(storyId, isJson, root) {
3394
3544
  warn(`Failed to complete exec-plan: ${message}`);
3395
3545
  }
3396
3546
  if (isJson) {
3397
- jsonOutput(result);
3547
+ jsonOutput({
3548
+ ...result,
3549
+ proofQuality: { verified: proofQuality.verified, pending: proofQuality.pending, escalated: proofQuality.escalated, total: proofQuality.total }
3550
+ });
3398
3551
  } else {
3399
3552
  ok(`Story ${storyId}: verified \u2014 proof at verification/${storyId}-proof.md`);
3400
3553
  }
@@ -6630,7 +6783,7 @@ function registerGithubImportCommand(program) {
6630
6783
  }
6631
6784
 
6632
6785
  // src/index.ts
6633
- var VERSION = true ? "0.8.0" : "0.0.0-dev";
6786
+ var VERSION = true ? "0.9.0" : "0.0.0-dev";
6634
6787
  function createProgram() {
6635
6788
  const program = new Command();
6636
6789
  program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.8.0",
3
+ "version": "0.9.0",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {