codeharness 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1348,7 +1348,7 @@ function importStoriesToBeads(stories, opts, beadsFns) {
1348
1348
  }
1349
1349
 
1350
1350
  // src/commands/init.ts
1351
- var HARNESS_VERSION = true ? "0.10.0" : "0.0.0-dev";
1351
+ var HARNESS_VERSION = true ? "0.12.0" : "0.0.0-dev";
1352
1352
  function getStackLabel(stack) {
1353
1353
  if (stack === "nodejs") return "Node.js (package.json)";
1354
1354
  if (stack === "python") return "Python";
@@ -2405,10 +2405,13 @@ function buildSpawnArgs(opts) {
2405
2405
  if (opts.live) {
2406
2406
  args.push("--live");
2407
2407
  }
2408
+ if (opts.reset) {
2409
+ args.push("--reset");
2410
+ }
2408
2411
  return args;
2409
2412
  }
2410
2413
  function registerRunCommand(program) {
2411
- program.command("run").description("Execute the autonomous coding loop").option("--max-iterations <n>", "Maximum loop iterations", "50").option("--timeout <seconds>", "Total loop timeout in seconds", "14400").option("--iteration-timeout <minutes>", "Per-iteration timeout in minutes", "30").option("--live", "Show live output streaming", false).option("--calls <n>", "Max API calls per hour", "100").option("--max-story-retries <n>", "Max retries per story before flagging", "3").action(async (options, cmd) => {
2414
+ program.command("run").description("Execute the autonomous coding loop").option("--max-iterations <n>", "Maximum loop iterations", "50").option("--timeout <seconds>", "Total loop timeout in seconds", "14400").option("--iteration-timeout <minutes>", "Per-iteration timeout in minutes", "30").option("--live", "Show live output streaming", false).option("--calls <n>", "Max API calls per hour", "100").option("--max-story-retries <n>", "Max retries per story before flagging", "3").option("--reset", "Clear retry counters, flagged stories, and circuit breaker before starting", false).action(async (options, cmd) => {
2412
2415
  const globalOpts = cmd.optsWithGlobals();
2413
2416
  const isJson = !!globalOpts.json;
2414
2417
  const outputOpts = { json: isJson };
@@ -2477,7 +2480,8 @@ function registerRunCommand(program) {
2477
2480
  iterationTimeout,
2478
2481
  calls,
2479
2482
  live: options.live,
2480
- maxStoryRetries
2483
+ maxStoryRetries,
2484
+ reset: options.reset
2481
2485
  });
2482
2486
  const env = { ...process.env };
2483
2487
  if (isJson) {
@@ -2592,6 +2596,32 @@ var INTEGRATION_KEYWORDS = [
2592
2596
  "integration test",
2593
2597
  "manual verification"
2594
2598
  ];
2599
+ var DOCKER_SESSION_KEYWORDS = [
2600
+ "agent tool",
2601
+ "subagent",
2602
+ "via agent",
2603
+ "invoke",
2604
+ "/create-story",
2605
+ "/bmad-dev-story",
2606
+ "/bmad-code-review",
2607
+ "/harness-run",
2608
+ "/retrospective",
2609
+ "sprint execution",
2610
+ "fresh context",
2611
+ "spawns",
2612
+ "code review workflow",
2613
+ "dev-story workflow",
2614
+ "automatically proceeds",
2615
+ "retries the current story",
2616
+ "halts with status",
2617
+ "prints summary"
2618
+ ];
2619
+ var ESCALATE_KEYWORDS = [
2620
+ "physical hardware",
2621
+ "manual human",
2622
+ "visual inspection by human",
2623
+ "paid external service"
2624
+ ];
2595
2625
  function classifyVerifiability(description) {
2596
2626
  const lower = description.toLowerCase();
2597
2627
  for (const kw of INTEGRATION_KEYWORDS) {
@@ -2599,6 +2629,19 @@ function classifyVerifiability(description) {
2599
2629
  }
2600
2630
  return "cli-verifiable";
2601
2631
  }
2632
+ function classifyStrategy(description) {
2633
+ const lower = description.toLowerCase();
2634
+ for (const kw of ESCALATE_KEYWORDS) {
2635
+ if (lower.includes(kw)) return "escalate";
2636
+ }
2637
+ for (const kw of DOCKER_SESSION_KEYWORDS) {
2638
+ if (lower.includes(kw)) return "docker-session";
2639
+ }
2640
+ for (const kw of INTEGRATION_KEYWORDS) {
2641
+ if (lower.includes(kw)) return "docker-session";
2642
+ }
2643
+ return "cli-direct";
2644
+ }
2602
2645
  var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required)\s*-->/;
2603
2646
  function parseVerificationTag(text) {
2604
2647
  const match = VERIFICATION_TAG_PATTERN.exec(text);
@@ -2655,11 +2698,13 @@ function parseStoryACs(storyFilePath) {
2655
2698
  if (description) {
2656
2699
  const tag = parseVerificationTag(description);
2657
2700
  const verifiability = tag ?? classifyVerifiability(description);
2701
+ const strategy = classifyStrategy(description);
2658
2702
  acs.push({
2659
2703
  id: currentId,
2660
2704
  description,
2661
2705
  type: classifyAC(description),
2662
- verifiability
2706
+ verifiability,
2707
+ strategy
2663
2708
  });
2664
2709
  } else {
2665
2710
  warn(`Skipping malformed AC #${currentId}: empty description`);
@@ -3294,27 +3339,52 @@ function validateProofQuality(proofPath) {
3294
3339
  return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
3295
3340
  }
3296
3341
  const content = readFileSync10(proofPath, "utf-8");
3297
- const acHeaderPattern = /^## AC \d+:/gm;
3342
+ const acHeaderPattern = /^## AC ?(\d+):/gm;
3298
3343
  const matches = [...content.matchAll(acHeaderPattern)];
3299
- if (matches.length === 0) {
3300
- return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
3301
- }
3302
3344
  let verified = 0;
3303
3345
  let pending = 0;
3304
3346
  let escalated = 0;
3305
- for (let i = 0; i < matches.length; i++) {
3306
- const start = matches[i].index;
3307
- const end = i + 1 < matches.length ? matches[i + 1].index : content.length;
3308
- const section = content.slice(start, end);
3309
- if (section.includes("[ESCALATE]")) {
3310
- escalated++;
3311
- continue;
3347
+ if (matches.length > 0) {
3348
+ for (let i = 0; i < matches.length; i++) {
3349
+ const start = matches[i].index;
3350
+ const end = i + 1 < matches.length ? matches[i + 1].index : content.length;
3351
+ const section = content.slice(start, end);
3352
+ if (section.includes("[ESCALATE]")) {
3353
+ escalated++;
3354
+ continue;
3355
+ }
3356
+ const hasEvidence = section.includes("<!-- /showboat exec -->") || section.includes("<!-- showboat image:") || /```(?:bash|shell)\n[\s\S]*?```\n+```output\n/m.test(section);
3357
+ if (hasEvidence) {
3358
+ verified++;
3359
+ } else {
3360
+ pending++;
3361
+ }
3312
3362
  }
3313
- const hasEvidence = section.includes("<!-- /showboat exec -->") || section.includes("<!-- showboat image:") || /```(?:bash|shell)\n[\s\S]*?```\n+```output\n/m.test(section);
3314
- if (hasEvidence) {
3315
- verified++;
3316
- } else {
3317
- pending++;
3363
+ } else {
3364
+ const inlineAcPattern = /--- AC ?(\d+):/g;
3365
+ const inlineMatches = [...content.matchAll(inlineAcPattern)];
3366
+ const acNumbers = new Set(inlineMatches.map((m) => m[1]));
3367
+ if (acNumbers.size === 0) {
3368
+ return { verified: 0, pending: 0, escalated: 0, total: 0, passed: false };
3369
+ }
3370
+ for (const acNum of acNumbers) {
3371
+ const acPattern = new RegExp(`--- AC ?${acNum}:`, "g");
3372
+ const acIdx = content.search(acPattern);
3373
+ if (acIdx === -1) {
3374
+ pending++;
3375
+ continue;
3376
+ }
3377
+ const nextAcPattern = new RegExp(`--- AC ?(?!${acNum})\\d+:`, "g");
3378
+ nextAcPattern.lastIndex = acIdx + 1;
3379
+ const nextMatch = nextAcPattern.exec(content);
3380
+ const section = content.slice(acIdx, nextMatch ? nextMatch.index : content.length);
3381
+ if (section.includes("[ESCALATE]")) {
3382
+ escalated++;
3383
+ } else if (/```output\n/m.test(section)) {
3384
+ verified++;
3385
+ } else {
3386
+ pending++;
3387
+ }
3318
3388
  }
3319
3389
  }
3320
3390
  const total = verified + pending + escalated;
@@ -6783,7 +6853,7 @@ function registerGithubImportCommand(program) {
6783
6853
  }
6784
6854
 
6785
6855
  // src/index.ts
6786
- var VERSION = true ? "0.10.0" : "0.0.0-dev";
6856
+ var VERSION = true ? "0.12.0" : "0.0.0-dev";
6787
6857
  function createProgram() {
6788
6858
  const program = new Command();
6789
6859
  program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.10.0",
3
+ "version": "0.12.0",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {
package/ralph/ralph.sh CHANGED
@@ -50,6 +50,9 @@ CLAUDE_OUTPUT_FORMAT="${CLAUDE_OUTPUT_FORMAT:-json}"
50
50
  CLAUDE_ALLOWED_TOOLS="${CLAUDE_ALLOWED_TOOLS:-}"
51
51
  CLAUDE_USE_CONTINUE="${CLAUDE_USE_CONTINUE:-false}" # Fresh context per iteration by default
52
52
 
53
+ # Reset retry state on start
54
+ RESET_RETRIES=false
55
+
53
56
  # Live output
54
57
  LIVE_OUTPUT=false
55
58
 
@@ -722,6 +725,7 @@ Options:
722
725
  --prompt FILE Prompt file for each iteration
723
726
  --progress FILE Progress file (tasks JSON)
724
727
  --live Show live output streaming
728
+ --reset Clear retry counters, flagged stories, and circuit breaker before starting
725
729
  --reset-circuit Reset circuit breaker and exit
726
730
  --status Show current status and exit
727
731
 
@@ -805,7 +809,20 @@ main() {
805
809
  fi
806
810
  fi
807
811
 
808
- # Preserve retry state across restarts (Task 5.3)
812
+ # Reset retry state if --reset flag was passed
813
+ if [[ "$RESET_RETRIES" == "true" ]]; then
814
+ if [[ -f "$STORY_RETRY_FILE" ]]; then
815
+ rm -f "$STORY_RETRY_FILE"
816
+ log_status "INFO" "Cleared story retry counters"
817
+ fi
818
+ if [[ -f "$FLAGGED_STORIES_FILE" ]]; then
819
+ rm -f "$FLAGGED_STORIES_FILE"
820
+ log_status "INFO" "Cleared flagged stories"
821
+ fi
822
+ reset_circuit_breaker "Reset via --reset flag"
823
+ log_status "INFO" "Circuit breaker reset to CLOSED"
824
+ fi
825
+
809
826
  # .story_retries and .flagged_stories are file-based — they persist automatically
810
827
 
811
828
  log_status "SUCCESS" "Ralph loop starting"
@@ -1056,6 +1073,10 @@ while [[ $# -gt 0 ]]; do
1056
1073
  LIVE_OUTPUT=true
1057
1074
  shift
1058
1075
  ;;
1076
+ --reset)
1077
+ RESET_RETRIES=true
1078
+ shift
1079
+ ;;
1059
1080
  --reset-circuit)
1060
1081
  # Derive state paths so circuit breaker uses the correct directory
1061
1082
  HARNESS_STATE_DIR="$(pwd)/.claude"