npm - codeharness - Versions diffs - 0.27.0 → 0.28.0 - Mend

codeharness 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/{chunk-JMYDBV6O.js → chunk-2BBYPR57.js} +2 -1
package/dist/{docker-5LUADX2H.js → docker-72QTSBOK.js} +1 -1
package/dist/index.js +14 -19
package/package.json +1 -1
package/templates/agents/retro.yaml +63 -0
package/templates/agents/reviewer.yaml +76 -0
package/templates/workflows/default.yaml +19 -1

package/dist/{chunk-JMYDBV6O.js → chunk-2BBYPR57.js} RENAMED Viewed

@@ -2886,7 +2886,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
 }
 // src/modules/infra/init-project.ts
-var HARNESS_VERSION = true ? "0.27.0" : "0.0.0-dev";
+var HARNESS_VERSION = true ? "0.28.0" : "0.0.0-dev";
 function failResult(opts, error) {
   return {
     status: "fail",
@@ -3360,6 +3360,7 @@ export {
   getStackProvider,
   detectStacks,
   detectStack,
+  getPackageRoot,
   getStatePath,
   writeState,
   readState,

package/dist/{docker-5LUADX2H.js → docker-72QTSBOK.js} RENAMED Viewed

@@ -16,7 +16,7 @@ import {
   stopCollectorOnly,
   stopSharedStack,
   stopStack
-} from "./chunk-JMYDBV6O.js";
+} from "./chunk-2BBYPR57.js";
 export {
   checkRemoteEndpoint,
   cleanupOrphanedContainers,

package/dist/index.js CHANGED Viewed

@@ -12,6 +12,7 @@ import {
   getComposeFilePath,
   getElkComposeFilePath,
   getNestedValue,
+  getPackageRoot,
   getStackDir,
   getStackHealth,
   getStackProvider,
@@ -39,7 +40,7 @@ import {
   validateDockerfile,
   warn,
   writeState
-} from "./chunk-JMYDBV6O.js";
+} from "./chunk-2BBYPR57.js";
 // src/index.ts
 import { Command } from "commander";
@@ -1557,8 +1558,7 @@ function formatElapsed(ms) {
 // src/lib/workflow-parser.ts
 import { readFileSync as readFileSync9, existsSync as existsSync10 } from "fs";
-import { join as join7, resolve as resolve3, dirname as dirname4 } from "path";
-import { fileURLToPath as fileURLToPath2 } from "url";
+import { join as join7, resolve as resolve3 } from "path";
 import os2 from "os";
 import { parse as parse3 } from "yaml";
@@ -1907,12 +1907,9 @@ function suggestCheaperDriver(driverName, requiredCaps) {
 // src/lib/agent-resolver.ts
 import { readFileSync as readFileSync8, existsSync as existsSync9, readdirSync as readdirSync2 } from "fs";
-import { resolve as resolve2, join as join6, dirname as dirname3 } from "path";
-import { fileURLToPath } from "url";
+import { resolve as resolve2, join as join6 } from "path";
 import os from "os";
 import { parse as parse2 } from "yaml";
-var __filename = fileURLToPath(import.meta.url);
-var __dirname = dirname3(__filename);
 var AgentResolveError = class extends Error {
   filePath;
   errors;
@@ -1923,8 +1920,8 @@ var AgentResolveError = class extends Error {
     this.errors = errors ?? [];
   }
 };
-var TEMPLATES_DIR = resolve2(__dirname, "../../templates/agents");
-var DEFAULT_MODEL = "claude-sonnet-4-20250514";
+var TEMPLATES_DIR = resolve2(getPackageRoot(), "templates/agents");
+var DEFAULT_MODEL = "claude-sonnet-4-6-20250514";
 var SAFE_NAME_RE = /^[a-zA-Z0-9_-]+$/;
 function validateName(name) {
   if (!name || !SAFE_NAME_RE.test(name)) {
@@ -2215,9 +2212,7 @@ var HierarchicalFlowError = class extends Error {
 };
 // src/lib/workflow-parser.ts
-var __filename2 = fileURLToPath2(import.meta.url);
-var __dirname2 = dirname4(__filename2);
-var TEMPLATES_DIR2 = resolve3(__dirname2, "../../templates/workflows");
+var TEMPLATES_DIR2 = resolve3(getPackageRoot(), "templates/workflows");
 var WorkflowParseError = class extends Error {
   errors;
   constructor(message, errors) {
@@ -7259,7 +7254,7 @@ function getACById(id) {
 // src/modules/verify/validation-runner.ts
 import { execSync as execSync5 } from "child_process";
 import { writeFileSync as writeFileSync12, mkdirSync as mkdirSync9 } from "fs";
-import { join as join25, dirname as dirname5 } from "path";
+import { join as join25, dirname as dirname3 } from "path";
 var MAX_VALIDATION_ATTEMPTS = 10;
 var AC_COMMAND_TIMEOUT_MS = 3e4;
 var VAL_KEY_PREFIX = "val-";
@@ -7411,7 +7406,7 @@ function createFixStory(ac, error) {
       "Fix the root cause so the validation command passes.",
       ""
     ].join("\n");
-    mkdirSync9(dirname5(storyPath), { recursive: true });
+    mkdirSync9(dirname3(storyPath), { recursive: true });
     writeFileSync12(storyPath, markdown, "utf-8");
     return ok2(storyKey);
   } catch (err) {
@@ -9634,7 +9629,7 @@ function formatAuditJson(result) {
 // src/modules/audit/fix-generator.ts
 import { existsSync as existsSync34, writeFileSync as writeFileSync14, mkdirSync as mkdirSync11 } from "fs";
-import { join as join33, dirname as dirname7 } from "path";
+import { join as join33, dirname as dirname5 } from "path";
 function buildStoryKey(gap2, index) {
   const safeDimension = gap2.dimension.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "");
   return `audit-fix-${safeDimension}-${index}`;
@@ -9688,7 +9683,7 @@ function generateFixStories(auditResult) {
           continue;
         }
         const markdown = buildStoryMarkdown(gap2, key);
-        mkdirSync11(dirname7(filePath), { recursive: true });
+        mkdirSync11(dirname5(filePath), { recursive: true });
         writeFileSync14(filePath, markdown, "utf-8");
         stories.push({ key, filePath, gap: gap2, skipped: false });
         created++;
@@ -9912,7 +9907,7 @@ function registerTeardownCommand(program) {
     } else if (otlpMode === "remote-routed") {
       if (!options.keepDocker) {
         try {
-          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-5LUADX2H.js");
+          const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-72QTSBOK.js");
           stopCollectorOnly2();
           result.docker.stopped = true;
           if (!isJson) {
@@ -9944,7 +9939,7 @@ function registerTeardownCommand(program) {
         info("Shared stack: kept running (other projects may use it)");
       }
     } else if (isLegacyStack) {
-      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-5LUADX2H.js");
+      const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-72QTSBOK.js");
       let stackRunning = false;
       try {
         stackRunning = isStackRunning2(composeFile);
@@ -12822,7 +12817,7 @@ function registerDriversCommand(program) {
 }
 // src/index.ts
-var VERSION = true ? "0.27.0" : "0.0.0-dev";
+var VERSION = true ? "0.28.0" : "0.0.0-dev";
 function createProgram() {
   const program = new Command();
   program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codeharness",
-  "version": "0.27.0",
+  "version": "0.28.0",
   "type": "module",
   "description": "CLI for codeharness — makes autonomous coding agents produce software that actually works",
   "bin": {

package/templates/agents/retro.yaml ADDED Viewed

@@ -0,0 +1,63 @@
+name: retro
+role:
+  title: Retrospective Agent
+  purpose: Extract actionable lessons from completed epic execution to improve future epics
+persona:
+  identity: |
+    Experienced scrum master who facilitates blameless retrospectives.
+    Analyzes patterns across story implementations — what worked, what failed, what was retried.
+    Focuses on systemic improvements, not individual failures.
+  communication_style: "Analytical, structured, forward-looking. Backs every insight with data from the sprint. No filler, no blame."
+  principles:
+    - Psychological safety is paramount — focus on systems and processes, not blame
+    - Every lesson must be backed by specific evidence from the epic execution
+    - Action items must be concrete and achievable — no vague aspirations
+    - Compare against previous retrospectives to track whether lessons were actually applied
+    - Distinguish between one-off incidents and recurring patterns
+disallowedTools:
+  - Edit
+  - Write
+prompt_template: |
+  ## Role
+  You are conducting a retrospective for a completed epic. Analyze what happened and extract lessons that will improve the next epic.
+  ## Input
+  1. Read the sprint state and progress files to understand what was executed
+  2. Read story files for the completed epic to understand scope
+  3. Read any previous retrospective files for pattern comparison
+  4. Check git log for the epic's commits — look for retry patterns, reverts, fixups
+  ## Analysis Framework
+  ### 1. Epic Summary
+  - Stories completed, failed, retried
+  - Total cost (tokens/dollars if available)
+  - Time from first implement to final verify
+  ### 2. What Worked
+  - Stories that passed on first attempt — what made them clean?
+  - Patterns worth repeating
+  ### 3. What Failed
+  - Stories that required retries — root cause for each
+  - Review/verify failures — were they legitimate catches or false positives?
+  - Common failure modes across stories
+  ### 4. Patterns & Trends
+  - Compare with previous retros — are past lessons being applied?
+  - Recurring issues that need systemic fixes
+  - Test quality trends — are tests catching real issues?
+  ### 5. Action Items for Next Epic
+  - Concrete, specific changes to make
+  - Each item must reference the evidence that motivates it
+  ## Output Format
+  Output a structured markdown document with the sections above.
+  ## Output Location
+  Write retrospective to ./retro/epic-{epic_number}-retro.md

package/templates/agents/reviewer.yaml ADDED Viewed

@@ -0,0 +1,76 @@
+name: reviewer
+role:
+  title: Code Reviewer
+  purpose: Adversarial code review that finds real issues before runtime verification
+persona:
+  identity: Senior engineer who reviews code for correctness, security, architecture violations, and adherence to story requirements. Does not fix — only reports.
+  communication_style: "Terse, evidence-based. Cites file:line for every finding. No praise, no filler."
+  principles:
+    - Every finding must cite a specific file and line number
+    - Distinguish blocking issues from suggestions — only block on real problems
+    - Check that ALL acceptance criteria are addressed in the implementation
+    - Flag security issues, missing error handling at system boundaries, and dead code
+    - Do not suggest stylistic changes or cosmetic improvements
+    - Compare implementation against story spec — catch scope creep and missed requirements
+disallowedTools:
+  - Edit
+  - Write
+prompt_template: |
+  ## Role
+  You are performing adversarial code review on a story implementation. Your job is to find real issues — not nitpick style.
+  ## Input
+  Read the story spec from ./story-files/ to understand what was supposed to be built.
+  Then review all changed files (use `git diff` against the branch base).
+  ## Review Checklist
+  1. **Acceptance Criteria Coverage** — is every AC actually implemented? Map each AC to the code that satisfies it.
+  2. **Correctness** — logic errors, off-by-one, race conditions, unhandled edge cases at system boundaries.
+  3. **Security** — injection, XSS, secrets in code, unsafe deserialization, missing auth checks.
+  4. **Architecture** — does it follow existing patterns? New abstractions justified?
+  5. **Tests** — do tests actually test the behavior, or just assert mocks?
+  6. **Dead Code** — unused imports, unreachable branches, commented-out code.
+  ## Anti-Leniency Rules
+  - Do not give benefit of the doubt. If something looks wrong, flag it.
+  - Do not suggest improvements. Only flag things that are broken, insecure, or missing.
+  - "It probably works" is not acceptable — if you can't verify, flag as UNKNOWN.
+  ## Output Format
+  Output a single JSON object:
+  ```json
+  {
+    "verdict": "pass" | "fail",
+    "blocking": [
+      {
+        "file": "<path>",
+        "line": <number>,
+        "severity": "error" | "security",
+        "description": "<what's wrong>",
+        "ac": <number or null>
+      }
+    ],
+    "warnings": [
+      {
+        "file": "<path>",
+        "line": <number>,
+        "description": "<concern>"
+      }
+    ],
+    "ac_coverage": {
+      "<ac_id>": "covered" | "missing" | "partial"
+    }
+  }
+  ```
+  Verdict is "pass" only if `blocking` is empty and all ACs are "covered".
+  ## Output Location
+  Write your review JSON to ./verdict/review.json

package/templates/workflows/default.yaml CHANGED Viewed

@@ -4,20 +4,38 @@ tasks:
     scope: per-story
     session: fresh
     source_access: true
+    model: claude-sonnet-4-6-20250514
+  review:
+    agent: reviewer
+    scope: per-story
+    session: fresh
+    source_access: true
+    driver: codex
   verify:
     agent: evaluator
-    scope: per-run
+    scope: per-story
     session: fresh
     source_access: false
+    driver: codex
   retry:
     agent: dev
     scope: per-story
     session: fresh
     source_access: true
+    model: claude-sonnet-4-6-20250514
+  retro:
+    agent: retro
+    scope: per-epic
+    session: fresh
+    source_access: true
+    model: claude-opus-4-6-20250514
 flow:
   - implement
+  - review
   - verify
   - loop:
       - retry
+      - review
       - verify
+  - retro