codeharness 0.27.0 → 0.28.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-JMYDBV6O.js → chunk-2BBYPR57.js} +2 -1
- package/dist/{docker-5LUADX2H.js → docker-72QTSBOK.js} +1 -1
- package/dist/index.js +14 -19
- package/package.json +1 -1
- package/templates/agents/retro.yaml +63 -0
- package/templates/agents/reviewer.yaml +76 -0
- package/templates/workflows/default.yaml +19 -1
|
@@ -2886,7 +2886,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
|
|
|
2886
2886
|
}
|
|
2887
2887
|
|
|
2888
2888
|
// src/modules/infra/init-project.ts
|
|
2889
|
-
var HARNESS_VERSION = true ? "0.
|
|
2889
|
+
var HARNESS_VERSION = true ? "0.28.0" : "0.0.0-dev";
|
|
2890
2890
|
function failResult(opts, error) {
|
|
2891
2891
|
return {
|
|
2892
2892
|
status: "fail",
|
|
@@ -3360,6 +3360,7 @@ export {
|
|
|
3360
3360
|
getStackProvider,
|
|
3361
3361
|
detectStacks,
|
|
3362
3362
|
detectStack,
|
|
3363
|
+
getPackageRoot,
|
|
3363
3364
|
getStatePath,
|
|
3364
3365
|
writeState,
|
|
3365
3366
|
readState,
|
package/dist/index.js
CHANGED
|
@@ -12,6 +12,7 @@ import {
|
|
|
12
12
|
getComposeFilePath,
|
|
13
13
|
getElkComposeFilePath,
|
|
14
14
|
getNestedValue,
|
|
15
|
+
getPackageRoot,
|
|
15
16
|
getStackDir,
|
|
16
17
|
getStackHealth,
|
|
17
18
|
getStackProvider,
|
|
@@ -39,7 +40,7 @@ import {
|
|
|
39
40
|
validateDockerfile,
|
|
40
41
|
warn,
|
|
41
42
|
writeState
|
|
42
|
-
} from "./chunk-
|
|
43
|
+
} from "./chunk-2BBYPR57.js";
|
|
43
44
|
|
|
44
45
|
// src/index.ts
|
|
45
46
|
import { Command } from "commander";
|
|
@@ -1557,8 +1558,7 @@ function formatElapsed(ms) {
|
|
|
1557
1558
|
|
|
1558
1559
|
// src/lib/workflow-parser.ts
|
|
1559
1560
|
import { readFileSync as readFileSync9, existsSync as existsSync10 } from "fs";
|
|
1560
|
-
import { join as join7, resolve as resolve3
|
|
1561
|
-
import { fileURLToPath as fileURLToPath2 } from "url";
|
|
1561
|
+
import { join as join7, resolve as resolve3 } from "path";
|
|
1562
1562
|
import os2 from "os";
|
|
1563
1563
|
import { parse as parse3 } from "yaml";
|
|
1564
1564
|
|
|
@@ -1907,12 +1907,9 @@ function suggestCheaperDriver(driverName, requiredCaps) {
|
|
|
1907
1907
|
|
|
1908
1908
|
// src/lib/agent-resolver.ts
|
|
1909
1909
|
import { readFileSync as readFileSync8, existsSync as existsSync9, readdirSync as readdirSync2 } from "fs";
|
|
1910
|
-
import { resolve as resolve2, join as join6
|
|
1911
|
-
import { fileURLToPath } from "url";
|
|
1910
|
+
import { resolve as resolve2, join as join6 } from "path";
|
|
1912
1911
|
import os from "os";
|
|
1913
1912
|
import { parse as parse2 } from "yaml";
|
|
1914
|
-
var __filename = fileURLToPath(import.meta.url);
|
|
1915
|
-
var __dirname = dirname3(__filename);
|
|
1916
1913
|
var AgentResolveError = class extends Error {
|
|
1917
1914
|
filePath;
|
|
1918
1915
|
errors;
|
|
@@ -1923,8 +1920,8 @@ var AgentResolveError = class extends Error {
|
|
|
1923
1920
|
this.errors = errors ?? [];
|
|
1924
1921
|
}
|
|
1925
1922
|
};
|
|
1926
|
-
var TEMPLATES_DIR = resolve2(
|
|
1927
|
-
var DEFAULT_MODEL = "claude-sonnet-4-20250514";
|
|
1923
|
+
var TEMPLATES_DIR = resolve2(getPackageRoot(), "templates/agents");
|
|
1924
|
+
var DEFAULT_MODEL = "claude-sonnet-4-6-20250514";
|
|
1928
1925
|
var SAFE_NAME_RE = /^[a-zA-Z0-9_-]+$/;
|
|
1929
1926
|
function validateName(name) {
|
|
1930
1927
|
if (!name || !SAFE_NAME_RE.test(name)) {
|
|
@@ -2215,9 +2212,7 @@ var HierarchicalFlowError = class extends Error {
|
|
|
2215
2212
|
};
|
|
2216
2213
|
|
|
2217
2214
|
// src/lib/workflow-parser.ts
|
|
2218
|
-
var
|
|
2219
|
-
var __dirname2 = dirname4(__filename2);
|
|
2220
|
-
var TEMPLATES_DIR2 = resolve3(__dirname2, "../../templates/workflows");
|
|
2215
|
+
var TEMPLATES_DIR2 = resolve3(getPackageRoot(), "templates/workflows");
|
|
2221
2216
|
var WorkflowParseError = class extends Error {
|
|
2222
2217
|
errors;
|
|
2223
2218
|
constructor(message, errors) {
|
|
@@ -7259,7 +7254,7 @@ function getACById(id) {
|
|
|
7259
7254
|
// src/modules/verify/validation-runner.ts
|
|
7260
7255
|
import { execSync as execSync5 } from "child_process";
|
|
7261
7256
|
import { writeFileSync as writeFileSync12, mkdirSync as mkdirSync9 } from "fs";
|
|
7262
|
-
import { join as join25, dirname as
|
|
7257
|
+
import { join as join25, dirname as dirname3 } from "path";
|
|
7263
7258
|
var MAX_VALIDATION_ATTEMPTS = 10;
|
|
7264
7259
|
var AC_COMMAND_TIMEOUT_MS = 3e4;
|
|
7265
7260
|
var VAL_KEY_PREFIX = "val-";
|
|
@@ -7411,7 +7406,7 @@ function createFixStory(ac, error) {
|
|
|
7411
7406
|
"Fix the root cause so the validation command passes.",
|
|
7412
7407
|
""
|
|
7413
7408
|
].join("\n");
|
|
7414
|
-
mkdirSync9(
|
|
7409
|
+
mkdirSync9(dirname3(storyPath), { recursive: true });
|
|
7415
7410
|
writeFileSync12(storyPath, markdown, "utf-8");
|
|
7416
7411
|
return ok2(storyKey);
|
|
7417
7412
|
} catch (err) {
|
|
@@ -9634,7 +9629,7 @@ function formatAuditJson(result) {
|
|
|
9634
9629
|
|
|
9635
9630
|
// src/modules/audit/fix-generator.ts
|
|
9636
9631
|
import { existsSync as existsSync34, writeFileSync as writeFileSync14, mkdirSync as mkdirSync11 } from "fs";
|
|
9637
|
-
import { join as join33, dirname as
|
|
9632
|
+
import { join as join33, dirname as dirname5 } from "path";
|
|
9638
9633
|
function buildStoryKey(gap2, index) {
|
|
9639
9634
|
const safeDimension = gap2.dimension.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "");
|
|
9640
9635
|
return `audit-fix-${safeDimension}-${index}`;
|
|
@@ -9688,7 +9683,7 @@ function generateFixStories(auditResult) {
|
|
|
9688
9683
|
continue;
|
|
9689
9684
|
}
|
|
9690
9685
|
const markdown = buildStoryMarkdown(gap2, key);
|
|
9691
|
-
mkdirSync11(
|
|
9686
|
+
mkdirSync11(dirname5(filePath), { recursive: true });
|
|
9692
9687
|
writeFileSync14(filePath, markdown, "utf-8");
|
|
9693
9688
|
stories.push({ key, filePath, gap: gap2, skipped: false });
|
|
9694
9689
|
created++;
|
|
@@ -9912,7 +9907,7 @@ function registerTeardownCommand(program) {
|
|
|
9912
9907
|
} else if (otlpMode === "remote-routed") {
|
|
9913
9908
|
if (!options.keepDocker) {
|
|
9914
9909
|
try {
|
|
9915
|
-
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-
|
|
9910
|
+
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-72QTSBOK.js");
|
|
9916
9911
|
stopCollectorOnly2();
|
|
9917
9912
|
result.docker.stopped = true;
|
|
9918
9913
|
if (!isJson) {
|
|
@@ -9944,7 +9939,7 @@ function registerTeardownCommand(program) {
|
|
|
9944
9939
|
info("Shared stack: kept running (other projects may use it)");
|
|
9945
9940
|
}
|
|
9946
9941
|
} else if (isLegacyStack) {
|
|
9947
|
-
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-
|
|
9942
|
+
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-72QTSBOK.js");
|
|
9948
9943
|
let stackRunning = false;
|
|
9949
9944
|
try {
|
|
9950
9945
|
stackRunning = isStackRunning2(composeFile);
|
|
@@ -12822,7 +12817,7 @@ function registerDriversCommand(program) {
|
|
|
12822
12817
|
}
|
|
12823
12818
|
|
|
12824
12819
|
// src/index.ts
|
|
12825
|
-
var VERSION = true ? "0.
|
|
12820
|
+
var VERSION = true ? "0.28.0" : "0.0.0-dev";
|
|
12826
12821
|
function createProgram() {
|
|
12827
12822
|
const program = new Command();
|
|
12828
12823
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|
package/package.json
CHANGED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
name: retro
|
|
2
|
+
role:
|
|
3
|
+
title: Retrospective Agent
|
|
4
|
+
purpose: Extract actionable lessons from completed epic execution to improve future epics
|
|
5
|
+
persona:
|
|
6
|
+
identity: |
|
|
7
|
+
Experienced scrum master who facilitates blameless retrospectives.
|
|
8
|
+
Analyzes patterns across story implementations — what worked, what failed, what was retried.
|
|
9
|
+
Focuses on systemic improvements, not individual failures.
|
|
10
|
+
communication_style: "Analytical, structured, forward-looking. Backs every insight with data from the sprint. No filler, no blame."
|
|
11
|
+
principles:
|
|
12
|
+
- Psychological safety is paramount — focus on systems and processes, not blame
|
|
13
|
+
- Every lesson must be backed by specific evidence from the epic execution
|
|
14
|
+
- Action items must be concrete and achievable — no vague aspirations
|
|
15
|
+
- Compare against previous retrospectives to track whether lessons were actually applied
|
|
16
|
+
- Distinguish between one-off incidents and recurring patterns
|
|
17
|
+
disallowedTools:
|
|
18
|
+
- Edit
|
|
19
|
+
- Write
|
|
20
|
+
prompt_template: |
|
|
21
|
+
## Role
|
|
22
|
+
|
|
23
|
+
You are conducting a retrospective for a completed epic. Analyze what happened and extract lessons that will improve the next epic.
|
|
24
|
+
|
|
25
|
+
## Input
|
|
26
|
+
|
|
27
|
+
1. Read the sprint state and progress files to understand what was executed
|
|
28
|
+
2. Read story files for the completed epic to understand scope
|
|
29
|
+
3. Read any previous retrospective files for pattern comparison
|
|
30
|
+
4. Check git log for the epic's commits — look for retry patterns, reverts, fixups
|
|
31
|
+
|
|
32
|
+
## Analysis Framework
|
|
33
|
+
|
|
34
|
+
### 1. Epic Summary
|
|
35
|
+
- Stories completed, failed, retried
|
|
36
|
+
- Total cost (tokens/dollars if available)
|
|
37
|
+
- Time from first implement to final verify
|
|
38
|
+
|
|
39
|
+
### 2. What Worked
|
|
40
|
+
- Stories that passed on first attempt — what made them clean?
|
|
41
|
+
- Patterns worth repeating
|
|
42
|
+
|
|
43
|
+
### 3. What Failed
|
|
44
|
+
- Stories that required retries — root cause for each
|
|
45
|
+
- Review/verify failures — were they legitimate catches or false positives?
|
|
46
|
+
- Common failure modes across stories
|
|
47
|
+
|
|
48
|
+
### 4. Patterns & Trends
|
|
49
|
+
- Compare with previous retros — are past lessons being applied?
|
|
50
|
+
- Recurring issues that need systemic fixes
|
|
51
|
+
- Test quality trends — are tests catching real issues?
|
|
52
|
+
|
|
53
|
+
### 5. Action Items for Next Epic
|
|
54
|
+
- Concrete, specific changes to make
|
|
55
|
+
- Each item must reference the evidence that motivates it
|
|
56
|
+
|
|
57
|
+
## Output Format
|
|
58
|
+
|
|
59
|
+
Output a structured markdown document with the sections above.
|
|
60
|
+
|
|
61
|
+
## Output Location
|
|
62
|
+
|
|
63
|
+
Write retrospective to ./retro/epic-{epic_number}-retro.md
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
name: reviewer
|
|
2
|
+
role:
|
|
3
|
+
title: Code Reviewer
|
|
4
|
+
purpose: Adversarial code review that finds real issues before runtime verification
|
|
5
|
+
persona:
|
|
6
|
+
identity: Senior engineer who reviews code for correctness, security, architecture violations, and adherence to story requirements. Does not fix — only reports.
|
|
7
|
+
communication_style: "Terse, evidence-based. Cites file:line for every finding. No praise, no filler."
|
|
8
|
+
principles:
|
|
9
|
+
- Every finding must cite a specific file and line number
|
|
10
|
+
- Distinguish blocking issues from suggestions — only block on real problems
|
|
11
|
+
- Check that ALL acceptance criteria are addressed in the implementation
|
|
12
|
+
- Flag security issues, missing error handling at system boundaries, and dead code
|
|
13
|
+
- Do not suggest stylistic changes or cosmetic improvements
|
|
14
|
+
- Compare implementation against story spec — catch scope creep and missed requirements
|
|
15
|
+
disallowedTools:
|
|
16
|
+
- Edit
|
|
17
|
+
- Write
|
|
18
|
+
prompt_template: |
|
|
19
|
+
## Role
|
|
20
|
+
|
|
21
|
+
You are performing adversarial code review on a story implementation. Your job is to find real issues — not nitpick style.
|
|
22
|
+
|
|
23
|
+
## Input
|
|
24
|
+
|
|
25
|
+
Read the story spec from ./story-files/ to understand what was supposed to be built.
|
|
26
|
+
Then review all changed files (use `git diff` against the branch base).
|
|
27
|
+
|
|
28
|
+
## Review Checklist
|
|
29
|
+
|
|
30
|
+
1. **Acceptance Criteria Coverage** — is every AC actually implemented? Map each AC to the code that satisfies it.
|
|
31
|
+
2. **Correctness** — logic errors, off-by-one, race conditions, unhandled edge cases at system boundaries.
|
|
32
|
+
3. **Security** — injection, XSS, secrets in code, unsafe deserialization, missing auth checks.
|
|
33
|
+
4. **Architecture** — does it follow existing patterns? New abstractions justified?
|
|
34
|
+
5. **Tests** — do tests actually test the behavior, or just assert mocks?
|
|
35
|
+
6. **Dead Code** — unused imports, unreachable branches, commented-out code.
|
|
36
|
+
|
|
37
|
+
## Anti-Leniency Rules
|
|
38
|
+
|
|
39
|
+
- Do not give benefit of the doubt. If something looks wrong, flag it.
|
|
40
|
+
- Do not suggest improvements. Only flag things that are broken, insecure, or missing.
|
|
41
|
+
- "It probably works" is not acceptable — if you can't verify, flag as UNKNOWN.
|
|
42
|
+
|
|
43
|
+
## Output Format
|
|
44
|
+
|
|
45
|
+
Output a single JSON object:
|
|
46
|
+
|
|
47
|
+
```json
|
|
48
|
+
{
|
|
49
|
+
"verdict": "pass" | "fail",
|
|
50
|
+
"blocking": [
|
|
51
|
+
{
|
|
52
|
+
"file": "<path>",
|
|
53
|
+
"line": <number>,
|
|
54
|
+
"severity": "error" | "security",
|
|
55
|
+
"description": "<what's wrong>",
|
|
56
|
+
"ac": <number or null>
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"warnings": [
|
|
60
|
+
{
|
|
61
|
+
"file": "<path>",
|
|
62
|
+
"line": <number>,
|
|
63
|
+
"description": "<concern>"
|
|
64
|
+
}
|
|
65
|
+
],
|
|
66
|
+
"ac_coverage": {
|
|
67
|
+
"<ac_id>": "covered" | "missing" | "partial"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Verdict is "pass" only if `blocking` is empty and all ACs are "covered".
|
|
73
|
+
|
|
74
|
+
## Output Location
|
|
75
|
+
|
|
76
|
+
Write your review JSON to ./verdict/review.json
|
|
@@ -4,20 +4,38 @@ tasks:
|
|
|
4
4
|
scope: per-story
|
|
5
5
|
session: fresh
|
|
6
6
|
source_access: true
|
|
7
|
+
model: claude-sonnet-4-6-20250514
|
|
8
|
+
review:
|
|
9
|
+
agent: reviewer
|
|
10
|
+
scope: per-story
|
|
11
|
+
session: fresh
|
|
12
|
+
source_access: true
|
|
13
|
+
driver: codex
|
|
7
14
|
verify:
|
|
8
15
|
agent: evaluator
|
|
9
|
-
scope: per-
|
|
16
|
+
scope: per-story
|
|
10
17
|
session: fresh
|
|
11
18
|
source_access: false
|
|
19
|
+
driver: codex
|
|
12
20
|
retry:
|
|
13
21
|
agent: dev
|
|
14
22
|
scope: per-story
|
|
15
23
|
session: fresh
|
|
16
24
|
source_access: true
|
|
25
|
+
model: claude-sonnet-4-6-20250514
|
|
26
|
+
retro:
|
|
27
|
+
agent: retro
|
|
28
|
+
scope: per-epic
|
|
29
|
+
session: fresh
|
|
30
|
+
source_access: true
|
|
31
|
+
model: claude-opus-4-6-20250514
|
|
17
32
|
|
|
18
33
|
flow:
|
|
19
34
|
- implement
|
|
35
|
+
- review
|
|
20
36
|
- verify
|
|
21
37
|
- loop:
|
|
22
38
|
- retry
|
|
39
|
+
- review
|
|
23
40
|
- verify
|
|
41
|
+
- retro
|