codeharness 0.35.0 → 0.35.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
|
|
|
2895
2895
|
}
|
|
2896
2896
|
|
|
2897
2897
|
// src/modules/infra/init-project.ts
|
|
2898
|
-
var HARNESS_VERSION = true ? "0.35.
|
|
2898
|
+
var HARNESS_VERSION = true ? "0.35.2" : "0.0.0-dev";
|
|
2899
2899
|
function failResult(opts, error) {
|
|
2900
2900
|
return {
|
|
2901
2901
|
status: "fail",
|
package/dist/index.js
CHANGED
|
@@ -40,7 +40,7 @@ import {
|
|
|
40
40
|
validateDockerfile,
|
|
41
41
|
warn,
|
|
42
42
|
writeState
|
|
43
|
-
} from "./chunk-
|
|
43
|
+
} from "./chunk-KJF2YA5T.js";
|
|
44
44
|
|
|
45
45
|
// src/index.ts
|
|
46
46
|
import { Command } from "commander";
|
|
@@ -3386,8 +3386,7 @@ async function dispatchTaskWithResult(task, taskName, storyKey, definition, stat
|
|
|
3386
3386
|
}
|
|
3387
3387
|
const isEpicSentinel = storyKey.startsWith("__epic_") || storyKey === PER_RUN_SENTINEL;
|
|
3388
3388
|
const TASK_PROMPTS = {
|
|
3389
|
-
"create-story": (key) => `Create
|
|
3390
|
-
"negotiate-acs": (key) => `Review the ACs in story ${key} for testability. Can each AC be verified by a blind QA agent with only Docker access and user documentation? Include <verdict>pass</verdict> or <verdict>fail</verdict> in your response. If fail, include <issues>...</issues> with specific feedback per AC.`,
|
|
3389
|
+
"create-story": (key) => `Create the story spec for ${key}. Read the epic definitions and architecture docs. Write a complete story file with acceptance criteria, tasks, and dev notes. CRITICAL: Every AC must be testable by a blind QA agent using ONLY a user guide + browser/API/CLI access. No AC should reference source code, internal data structures, or implementation details like O(1) complexity. Each AC must describe observable behavior that can be verified through UI interaction (agent-browser), API calls (curl), CLI commands (docker exec), or log inspection (docker logs). Wrap output in <story-spec>...</story-spec> tags.`,
|
|
3391
3390
|
"implement": (key) => `Implement story ${key}`,
|
|
3392
3391
|
"check": (key) => `Run automated checks for story ${key}. Execute the project's test suite, linter, and coverage tool. Include <verdict>pass</verdict> or <verdict>fail</verdict> in your response.`,
|
|
3393
3392
|
"review": (key) => `Review the implementation of story ${key}. Check for correctness, security issues, architecture violations, and AC coverage. Include <verdict>pass</verdict> or <verdict>fail</verdict> in your response. If fail, include <issues>...</issues>.`,
|
|
@@ -3610,6 +3609,14 @@ async function executeLoopBlock(loopBlock, state, config, workItems, initialCont
|
|
|
3610
3609
|
if (loopBlock.loop.length === 0) {
|
|
3611
3610
|
return { state: currentState, errors, tasksCompleted, halted: false, lastContract: lastOutputContract };
|
|
3612
3611
|
}
|
|
3612
|
+
const lastAgentTaskInLoop = (() => {
|
|
3613
|
+
for (let i = loopBlock.loop.length - 1; i >= 0; i--) {
|
|
3614
|
+
const tn = loopBlock.loop[i];
|
|
3615
|
+
const t = config.workflow.tasks[tn];
|
|
3616
|
+
if (t && t.agent !== null) return tn;
|
|
3617
|
+
}
|
|
3618
|
+
return loopBlock.loop[loopBlock.loop.length - 1];
|
|
3619
|
+
})();
|
|
3613
3620
|
while (true) {
|
|
3614
3621
|
const nextIteration = currentState.iteration + 1;
|
|
3615
3622
|
const allCurrentIterationDone = currentState.iteration > 0 && loopBlock.loop.every((tn) => {
|
|
@@ -3752,88 +3759,91 @@ async function executeLoopBlock(loopBlock, state, config, workItems, initialCont
|
|
|
3752
3759
|
propagateVerifyFlags(taskName, dispatchResult.contract, projectDir);
|
|
3753
3760
|
accumulatedCostUsd += dispatchResult.contract?.cost_usd ?? 0;
|
|
3754
3761
|
tasksCompleted++;
|
|
3755
|
-
|
|
3756
|
-
|
|
3757
|
-
verdict =
|
|
3758
|
-
|
|
3759
|
-
|
|
3760
|
-
|
|
3761
|
-
|
|
3762
|
-
|
|
3763
|
-
|
|
3764
|
-
|
|
3765
|
-
|
|
3766
|
-
|
|
3767
|
-
|
|
3768
|
-
|
|
3769
|
-
|
|
3770
|
-
|
|
3771
|
-
|
|
3772
|
-
|
|
3773
|
-
|
|
3774
|
-
|
|
3775
|
-
|
|
3776
|
-
|
|
3777
|
-
|
|
3778
|
-
|
|
3779
|
-
|
|
3780
|
-
|
|
3781
|
-
|
|
3762
|
+
const isLastTaskInLoop = taskName === lastAgentTaskInLoop;
|
|
3763
|
+
if (isLastTaskInLoop) {
|
|
3764
|
+
let verdict = null;
|
|
3765
|
+
try {
|
|
3766
|
+
verdict = parseVerdict(dispatchResult.output);
|
|
3767
|
+
} catch (parseErr) {
|
|
3768
|
+
if (parseErr instanceof VerdictParseError && parseErr.retryable) {
|
|
3769
|
+
warn(`workflow-engine: verdict parse failed, retrying evaluator for ${taskName}`);
|
|
3770
|
+
try {
|
|
3771
|
+
const retryResult = await dispatchTaskWithResult(
|
|
3772
|
+
task,
|
|
3773
|
+
taskName,
|
|
3774
|
+
PER_RUN_SENTINEL,
|
|
3775
|
+
definition,
|
|
3776
|
+
currentState,
|
|
3777
|
+
config,
|
|
3778
|
+
void 0,
|
|
3779
|
+
lastOutputContract ?? void 0
|
|
3780
|
+
);
|
|
3781
|
+
currentState = retryResult.updatedState;
|
|
3782
|
+
lastOutputContract = retryResult.contract;
|
|
3783
|
+
propagateVerifyFlags(taskName, retryResult.contract, projectDir);
|
|
3784
|
+
tasksCompleted++;
|
|
3785
|
+
verdict = parseVerdict(retryResult.output);
|
|
3786
|
+
} catch {
|
|
3787
|
+
verdict = buildAllUnknownVerdict(
|
|
3788
|
+
workItems,
|
|
3789
|
+
"Evaluator failed to produce valid JSON after retry"
|
|
3790
|
+
);
|
|
3791
|
+
}
|
|
3782
3792
|
}
|
|
3783
3793
|
}
|
|
3784
|
-
|
|
3785
|
-
|
|
3786
|
-
|
|
3787
|
-
|
|
3788
|
-
|
|
3789
|
-
|
|
3790
|
-
|
|
3791
|
-
|
|
3794
|
+
if (!verdict) {
|
|
3795
|
+
const tagged = parseVerdictTag(dispatchResult.output);
|
|
3796
|
+
if (tagged) {
|
|
3797
|
+
verdict = {
|
|
3798
|
+
verdict: tagged.verdict,
|
|
3799
|
+
score: { passed: tagged.verdict === "pass" ? 1 : 0, failed: tagged.verdict === "fail" ? 1 : 0, unknown: 0, total: 1 },
|
|
3800
|
+
findings: []
|
|
3801
|
+
};
|
|
3802
|
+
}
|
|
3803
|
+
}
|
|
3804
|
+
lastVerdict = verdict;
|
|
3805
|
+
if (verdict) {
|
|
3806
|
+
const score = {
|
|
3807
|
+
iteration: currentState.iteration,
|
|
3808
|
+
passed: verdict.score.passed,
|
|
3809
|
+
failed: verdict.score.failed,
|
|
3810
|
+
unknown: verdict.score.unknown,
|
|
3811
|
+
total: verdict.score.total,
|
|
3812
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
3813
|
+
};
|
|
3814
|
+
currentState = {
|
|
3815
|
+
...currentState,
|
|
3816
|
+
evaluator_scores: [...currentState.evaluator_scores, score]
|
|
3817
|
+
};
|
|
3818
|
+
} else {
|
|
3819
|
+
const totalItems = workItems.length;
|
|
3820
|
+
const score = {
|
|
3821
|
+
iteration: currentState.iteration,
|
|
3822
|
+
passed: 0,
|
|
3823
|
+
failed: 0,
|
|
3824
|
+
unknown: totalItems,
|
|
3825
|
+
total: totalItems,
|
|
3826
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
3827
|
+
};
|
|
3828
|
+
currentState = {
|
|
3829
|
+
...currentState,
|
|
3830
|
+
evaluator_scores: [...currentState.evaluator_scores, score]
|
|
3792
3831
|
};
|
|
3793
3832
|
}
|
|
3794
|
-
|
|
3795
|
-
|
|
3796
|
-
|
|
3797
|
-
|
|
3798
|
-
|
|
3799
|
-
|
|
3800
|
-
|
|
3801
|
-
|
|
3802
|
-
|
|
3803
|
-
|
|
3804
|
-
|
|
3805
|
-
|
|
3806
|
-
...currentState,
|
|
3807
|
-
evaluator_scores: [...currentState.evaluator_scores, score]
|
|
3808
|
-
};
|
|
3809
|
-
} else {
|
|
3810
|
-
const totalItems = workItems.length;
|
|
3811
|
-
const score = {
|
|
3812
|
-
iteration: currentState.iteration,
|
|
3813
|
-
passed: 0,
|
|
3814
|
-
failed: 0,
|
|
3815
|
-
unknown: totalItems,
|
|
3816
|
-
total: totalItems,
|
|
3817
|
-
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
3818
|
-
};
|
|
3819
|
-
currentState = {
|
|
3820
|
-
...currentState,
|
|
3821
|
-
evaluator_scores: [...currentState.evaluator_scores, score]
|
|
3822
|
-
};
|
|
3833
|
+
const cbDecision = evaluateProgress(currentState.evaluator_scores);
|
|
3834
|
+
if (cbDecision.halt) {
|
|
3835
|
+
currentState = {
|
|
3836
|
+
...currentState,
|
|
3837
|
+
circuit_breaker: {
|
|
3838
|
+
triggered: true,
|
|
3839
|
+
reason: cbDecision.reason,
|
|
3840
|
+
score_history: cbDecision.scoreHistory
|
|
3841
|
+
}
|
|
3842
|
+
};
|
|
3843
|
+
writeWorkflowState(currentState, projectDir);
|
|
3844
|
+
}
|
|
3823
3845
|
}
|
|
3824
3846
|
writeWorkflowState(currentState, projectDir);
|
|
3825
|
-
const cbDecision = evaluateProgress(currentState.evaluator_scores);
|
|
3826
|
-
if (cbDecision.halt) {
|
|
3827
|
-
currentState = {
|
|
3828
|
-
...currentState,
|
|
3829
|
-
circuit_breaker: {
|
|
3830
|
-
triggered: true,
|
|
3831
|
-
reason: cbDecision.reason,
|
|
3832
|
-
score_history: cbDecision.scoreHistory
|
|
3833
|
-
}
|
|
3834
|
-
};
|
|
3835
|
-
writeWorkflowState(currentState, projectDir);
|
|
3836
|
-
}
|
|
3837
3847
|
} catch (err) {
|
|
3838
3848
|
const engineError = handleDispatchError(err, taskName, PER_RUN_SENTINEL);
|
|
3839
3849
|
errors.push(engineError);
|
|
@@ -11189,7 +11199,7 @@ function registerTeardownCommand(program) {
|
|
|
11189
11199
|
} else if (otlpMode === "remote-routed") {
|
|
11190
11200
|
if (!options.keepDocker) {
|
|
11191
11201
|
try {
|
|
11192
|
-
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-
|
|
11202
|
+
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-EIWOFRFK.js");
|
|
11193
11203
|
stopCollectorOnly2();
|
|
11194
11204
|
result.docker.stopped = true;
|
|
11195
11205
|
if (!isJson) {
|
|
@@ -11221,7 +11231,7 @@ function registerTeardownCommand(program) {
|
|
|
11221
11231
|
info("Shared stack: kept running (other projects may use it)");
|
|
11222
11232
|
}
|
|
11223
11233
|
} else if (isLegacyStack) {
|
|
11224
|
-
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-
|
|
11234
|
+
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-EIWOFRFK.js");
|
|
11225
11235
|
let stackRunning = false;
|
|
11226
11236
|
try {
|
|
11227
11237
|
stackRunning = isStackRunning2(composeFile);
|
|
@@ -14208,7 +14218,7 @@ function registerDriversCommand(program) {
|
|
|
14208
14218
|
}
|
|
14209
14219
|
|
|
14210
14220
|
// src/index.ts
|
|
14211
|
-
var VERSION = true ? "0.35.
|
|
14221
|
+
var VERSION = true ? "0.35.2" : "0.0.0-dev";
|
|
14212
14222
|
function createProgram() {
|
|
14213
14223
|
const program = new Command();
|
|
14214
14224
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|
package/package.json
CHANGED
|
@@ -4,11 +4,6 @@ tasks:
|
|
|
4
4
|
session: fresh
|
|
5
5
|
source_access: true
|
|
6
6
|
model: claude-opus-4-6
|
|
7
|
-
negotiate-acs:
|
|
8
|
-
agent: negotiator
|
|
9
|
-
session: fresh
|
|
10
|
-
source_access: true
|
|
11
|
-
model: claude-sonnet-4-6
|
|
12
7
|
implement:
|
|
13
8
|
agent: dev
|
|
14
9
|
session: fresh
|
|
@@ -52,10 +47,6 @@ tasks:
|
|
|
52
47
|
|
|
53
48
|
story_flow:
|
|
54
49
|
- create-story
|
|
55
|
-
- negotiate-acs
|
|
56
|
-
- loop:
|
|
57
|
-
- create-story
|
|
58
|
-
- negotiate-acs
|
|
59
50
|
- implement
|
|
60
51
|
- check
|
|
61
52
|
- review
|