codeharness 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2886,7 +2886,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
2886
2886
  }
2887
2887
 
2888
2888
  // src/modules/infra/init-project.ts
2889
- var HARNESS_VERSION = true ? "0.27.0" : "0.0.0-dev";
2889
+ var HARNESS_VERSION = true ? "0.28.0" : "0.0.0-dev";
2890
2890
  function failResult(opts, error) {
2891
2891
  return {
2892
2892
  status: "fail",
@@ -3360,6 +3360,7 @@ export {
3360
3360
  getStackProvider,
3361
3361
  detectStacks,
3362
3362
  detectStack,
3363
+ getPackageRoot,
3363
3364
  getStatePath,
3364
3365
  writeState,
3365
3366
  readState,
@@ -16,7 +16,7 @@ import {
16
16
  stopCollectorOnly,
17
17
  stopSharedStack,
18
18
  stopStack
19
- } from "./chunk-JMYDBV6O.js";
19
+ } from "./chunk-2BBYPR57.js";
20
20
  export {
21
21
  checkRemoteEndpoint,
22
22
  cleanupOrphanedContainers,
package/dist/index.js CHANGED
@@ -12,6 +12,7 @@ import {
12
12
  getComposeFilePath,
13
13
  getElkComposeFilePath,
14
14
  getNestedValue,
15
+ getPackageRoot,
15
16
  getStackDir,
16
17
  getStackHealth,
17
18
  getStackProvider,
@@ -39,7 +40,7 @@ import {
39
40
  validateDockerfile,
40
41
  warn,
41
42
  writeState
42
- } from "./chunk-JMYDBV6O.js";
43
+ } from "./chunk-2BBYPR57.js";
43
44
 
44
45
  // src/index.ts
45
46
  import { Command } from "commander";
@@ -1557,8 +1558,7 @@ function formatElapsed(ms) {
1557
1558
 
1558
1559
  // src/lib/workflow-parser.ts
1559
1560
  import { readFileSync as readFileSync9, existsSync as existsSync10 } from "fs";
1560
- import { join as join7, resolve as resolve3, dirname as dirname4 } from "path";
1561
- import { fileURLToPath as fileURLToPath2 } from "url";
1561
+ import { join as join7, resolve as resolve3 } from "path";
1562
1562
  import os2 from "os";
1563
1563
  import { parse as parse3 } from "yaml";
1564
1564
 
@@ -1907,12 +1907,9 @@ function suggestCheaperDriver(driverName, requiredCaps) {
1907
1907
 
1908
1908
  // src/lib/agent-resolver.ts
1909
1909
  import { readFileSync as readFileSync8, existsSync as existsSync9, readdirSync as readdirSync2 } from "fs";
1910
- import { resolve as resolve2, join as join6, dirname as dirname3 } from "path";
1911
- import { fileURLToPath } from "url";
1910
+ import { resolve as resolve2, join as join6 } from "path";
1912
1911
  import os from "os";
1913
1912
  import { parse as parse2 } from "yaml";
1914
- var __filename = fileURLToPath(import.meta.url);
1915
- var __dirname = dirname3(__filename);
1916
1913
  var AgentResolveError = class extends Error {
1917
1914
  filePath;
1918
1915
  errors;
@@ -1923,8 +1920,8 @@ var AgentResolveError = class extends Error {
1923
1920
  this.errors = errors ?? [];
1924
1921
  }
1925
1922
  };
1926
- var TEMPLATES_DIR = resolve2(__dirname, "../../templates/agents");
1927
- var DEFAULT_MODEL = "claude-sonnet-4-20250514";
1923
+ var TEMPLATES_DIR = resolve2(getPackageRoot(), "templates/agents");
1924
+ var DEFAULT_MODEL = "claude-sonnet-4-6-20250514";
1928
1925
  var SAFE_NAME_RE = /^[a-zA-Z0-9_-]+$/;
1929
1926
  function validateName(name) {
1930
1927
  if (!name || !SAFE_NAME_RE.test(name)) {
@@ -2215,9 +2212,7 @@ var HierarchicalFlowError = class extends Error {
2215
2212
  };
2216
2213
 
2217
2214
  // src/lib/workflow-parser.ts
2218
- var __filename2 = fileURLToPath2(import.meta.url);
2219
- var __dirname2 = dirname4(__filename2);
2220
- var TEMPLATES_DIR2 = resolve3(__dirname2, "../../templates/workflows");
2215
+ var TEMPLATES_DIR2 = resolve3(getPackageRoot(), "templates/workflows");
2221
2216
  var WorkflowParseError = class extends Error {
2222
2217
  errors;
2223
2218
  constructor(message, errors) {
@@ -7259,7 +7254,7 @@ function getACById(id) {
7259
7254
  // src/modules/verify/validation-runner.ts
7260
7255
  import { execSync as execSync5 } from "child_process";
7261
7256
  import { writeFileSync as writeFileSync12, mkdirSync as mkdirSync9 } from "fs";
7262
- import { join as join25, dirname as dirname5 } from "path";
7257
+ import { join as join25, dirname as dirname3 } from "path";
7263
7258
  var MAX_VALIDATION_ATTEMPTS = 10;
7264
7259
  var AC_COMMAND_TIMEOUT_MS = 3e4;
7265
7260
  var VAL_KEY_PREFIX = "val-";
@@ -7411,7 +7406,7 @@ function createFixStory(ac, error) {
7411
7406
  "Fix the root cause so the validation command passes.",
7412
7407
  ""
7413
7408
  ].join("\n");
7414
- mkdirSync9(dirname5(storyPath), { recursive: true });
7409
+ mkdirSync9(dirname3(storyPath), { recursive: true });
7415
7410
  writeFileSync12(storyPath, markdown, "utf-8");
7416
7411
  return ok2(storyKey);
7417
7412
  } catch (err) {
@@ -9634,7 +9629,7 @@ function formatAuditJson(result) {
9634
9629
 
9635
9630
  // src/modules/audit/fix-generator.ts
9636
9631
  import { existsSync as existsSync34, writeFileSync as writeFileSync14, mkdirSync as mkdirSync11 } from "fs";
9637
- import { join as join33, dirname as dirname7 } from "path";
9632
+ import { join as join33, dirname as dirname5 } from "path";
9638
9633
  function buildStoryKey(gap2, index) {
9639
9634
  const safeDimension = gap2.dimension.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "");
9640
9635
  return `audit-fix-${safeDimension}-${index}`;
@@ -9688,7 +9683,7 @@ function generateFixStories(auditResult) {
9688
9683
  continue;
9689
9684
  }
9690
9685
  const markdown = buildStoryMarkdown(gap2, key);
9691
- mkdirSync11(dirname7(filePath), { recursive: true });
9686
+ mkdirSync11(dirname5(filePath), { recursive: true });
9692
9687
  writeFileSync14(filePath, markdown, "utf-8");
9693
9688
  stories.push({ key, filePath, gap: gap2, skipped: false });
9694
9689
  created++;
@@ -9912,7 +9907,7 @@ function registerTeardownCommand(program) {
9912
9907
  } else if (otlpMode === "remote-routed") {
9913
9908
  if (!options.keepDocker) {
9914
9909
  try {
9915
- const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-5LUADX2H.js");
9910
+ const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-72QTSBOK.js");
9916
9911
  stopCollectorOnly2();
9917
9912
  result.docker.stopped = true;
9918
9913
  if (!isJson) {
@@ -9944,7 +9939,7 @@ function registerTeardownCommand(program) {
9944
9939
  info("Shared stack: kept running (other projects may use it)");
9945
9940
  }
9946
9941
  } else if (isLegacyStack) {
9947
- const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-5LUADX2H.js");
9942
+ const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-72QTSBOK.js");
9948
9943
  let stackRunning = false;
9949
9944
  try {
9950
9945
  stackRunning = isStackRunning2(composeFile);
@@ -12822,7 +12817,7 @@ function registerDriversCommand(program) {
12822
12817
  }
12823
12818
 
12824
12819
  // src/index.ts
12825
- var VERSION = true ? "0.27.0" : "0.0.0-dev";
12820
+ var VERSION = true ? "0.28.0" : "0.0.0-dev";
12826
12821
  function createProgram() {
12827
12822
  const program = new Command();
12828
12823
  program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codeharness",
3
- "version": "0.27.0",
3
+ "version": "0.28.0",
4
4
  "type": "module",
5
5
  "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
6
6
  "bin": {
@@ -0,0 +1,63 @@
1
+ name: retro
2
+ role:
3
+ title: Retrospective Agent
4
+ purpose: Extract actionable lessons from completed epic execution to improve future epics
5
+ persona:
6
+ identity: |
7
+ Experienced scrum master who facilitates blameless retrospectives.
8
+ Analyzes patterns across story implementations — what worked, what failed, what was retried.
9
+ Focuses on systemic improvements, not individual failures.
10
+ communication_style: "Analytical, structured, forward-looking. Backs every insight with data from the sprint. No filler, no blame."
11
+ principles:
12
+ - Psychological safety is paramount — focus on systems and processes, not blame
13
+ - Every lesson must be backed by specific evidence from the epic execution
14
+ - Action items must be concrete and achievable — no vague aspirations
15
+ - Compare against previous retrospectives to track whether lessons were actually applied
16
+ - Distinguish between one-off incidents and recurring patterns
17
+ disallowedTools:
18
+ - Edit
19
+ - Write
20
+ prompt_template: |
21
+ ## Role
22
+
23
+ You are conducting a retrospective for a completed epic. Analyze what happened and extract lessons that will improve the next epic.
24
+
25
+ ## Input
26
+
27
+ 1. Read the sprint state and progress files to understand what was executed
28
+ 2. Read story files for the completed epic to understand scope
29
+ 3. Read any previous retrospective files for pattern comparison
30
+ 4. Check git log for the epic's commits — look for retry patterns, reverts, fixups
31
+
32
+ ## Analysis Framework
33
+
34
+ ### 1. Epic Summary
35
+ - Stories completed, failed, retried
36
+ - Total cost (tokens/dollars if available)
37
+ - Time from first implement to final verify
38
+
39
+ ### 2. What Worked
40
+ - Stories that passed on first attempt — what made them clean?
41
+ - Patterns worth repeating
42
+
43
+ ### 3. What Failed
44
+ - Stories that required retries — root cause for each
45
+ - Review/verify failures — were they legitimate catches or false positives?
46
+ - Common failure modes across stories
47
+
48
+ ### 4. Patterns & Trends
49
+ - Compare with previous retros — are past lessons being applied?
50
+ - Recurring issues that need systemic fixes
51
+ - Test quality trends — are tests catching real issues?
52
+
53
+ ### 5. Action Items for Next Epic
54
+ - Concrete, specific changes to make
55
+ - Each item must reference the evidence that motivates it
56
+
57
+ ## Output Format
58
+
59
+ Output a structured markdown document with the sections above.
60
+
61
+ ## Output Location
62
+
63
+ Write retrospective to ./retro/epic-{epic_number}-retro.md
@@ -0,0 +1,76 @@
1
+ name: reviewer
2
+ role:
3
+ title: Code Reviewer
4
+ purpose: Adversarial code review that finds real issues before runtime verification
5
+ persona:
6
+ identity: Senior engineer who reviews code for correctness, security, architecture violations, and adherence to story requirements. Does not fix — only reports.
7
+ communication_style: "Terse, evidence-based. Cites file:line for every finding. No praise, no filler."
8
+ principles:
9
+ - Every finding must cite a specific file and line number
10
+ - Distinguish blocking issues from suggestions — only block on real problems
11
+ - Check that ALL acceptance criteria are addressed in the implementation
12
+ - Flag security issues, missing error handling at system boundaries, and dead code
13
+ - Do not suggest stylistic changes or cosmetic improvements
14
+ - Compare implementation against story spec — catch scope creep and missed requirements
15
+ disallowedTools:
16
+ - Edit
17
+ - Write
18
+ prompt_template: |
19
+ ## Role
20
+
21
+ You are performing adversarial code review on a story implementation. Your job is to find real issues — not nitpick style.
22
+
23
+ ## Input
24
+
25
+ Read the story spec from ./story-files/ to understand what was supposed to be built.
26
+ Then review all changed files (use `git diff` against the branch base).
27
+
28
+ ## Review Checklist
29
+
30
+ 1. **Acceptance Criteria Coverage** — is every AC actually implemented? Map each AC to the code that satisfies it.
31
+ 2. **Correctness** — logic errors, off-by-one, race conditions, unhandled edge cases at system boundaries.
32
+ 3. **Security** — injection, XSS, secrets in code, unsafe deserialization, missing auth checks.
33
+ 4. **Architecture** — does it follow existing patterns? New abstractions justified?
34
+ 5. **Tests** — do tests actually test the behavior, or just assert mocks?
35
+ 6. **Dead Code** — unused imports, unreachable branches, commented-out code.
36
+
37
+ ## Anti-Leniency Rules
38
+
39
+ - Do not give benefit of the doubt. If something looks wrong, flag it.
40
+ - Do not suggest improvements. Only flag things that are broken, insecure, or missing.
41
+ - "It probably works" is not acceptable — if you can't verify, flag as UNKNOWN.
42
+
43
+ ## Output Format
44
+
45
+ Output a single JSON object:
46
+
47
+ ```json
48
+ {
49
+ "verdict": "pass" | "fail",
50
+ "blocking": [
51
+ {
52
+ "file": "<path>",
53
+ "line": <number>,
54
+ "severity": "error" | "security",
55
+ "description": "<what's wrong>",
56
+ "ac": <number or null>
57
+ }
58
+ ],
59
+ "warnings": [
60
+ {
61
+ "file": "<path>",
62
+ "line": <number>,
63
+ "description": "<concern>"
64
+ }
65
+ ],
66
+ "ac_coverage": {
67
+ "<ac_id>": "covered" | "missing" | "partial"
68
+ }
69
+ }
70
+ ```
71
+
72
+ Verdict is "pass" only if `blocking` is empty and all ACs are "covered".
73
+
74
+ ## Output Location
75
+
76
+ Write your review JSON to ./verdict/review.json
@@ -4,20 +4,38 @@ tasks:
4
4
  scope: per-story
5
5
  session: fresh
6
6
  source_access: true
7
+ model: claude-sonnet-4-6-20250514
8
+ review:
9
+ agent: reviewer
10
+ scope: per-story
11
+ session: fresh
12
+ source_access: true
13
+ driver: codex
7
14
  verify:
8
15
  agent: evaluator
9
- scope: per-run
16
+ scope: per-story
10
17
  session: fresh
11
18
  source_access: false
19
+ driver: codex
12
20
  retry:
13
21
  agent: dev
14
22
  scope: per-story
15
23
  session: fresh
16
24
  source_access: true
25
+ model: claude-sonnet-4-6-20250514
26
+ retro:
27
+ agent: retro
28
+ scope: per-epic
29
+ session: fresh
30
+ source_access: true
31
+ model: claude-opus-4-6-20250514
17
32
 
18
33
  flow:
19
34
  - implement
35
+ - review
20
36
  - verify
21
37
  - loop:
22
38
  - retry
39
+ - review
23
40
  - verify
41
+ - retro