codeharness 0.32.3 → 0.33.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-N57BYUXA.js → chunk-537B2B6W.js} +1 -1
- package/dist/{docker-UY37PFPB.js → docker-ZMY7GX5P.js} +1 -1
- package/dist/index.js +137 -191
- package/package.json +1 -1
- package/patches/dev/enforcement.md +8 -17
- package/patches/retro/enforcement.md +1 -1
- package/patches/review/enforcement.md +4 -26
- package/patches/verify/story-verification.md +6 -30
- package/templates/agents/documenter.yaml +64 -0
- package/templates/agents/evaluator.yaml +16 -11
- package/templates/workflows/default.yaml +7 -0
|
@@ -2895,7 +2895,7 @@ function generateDockerfileTemplate(projectDir, stackOrDetections) {
|
|
|
2895
2895
|
}
|
|
2896
2896
|
|
|
2897
2897
|
// src/modules/infra/init-project.ts
|
|
2898
|
-
var HARNESS_VERSION = true ? "0.
|
|
2898
|
+
var HARNESS_VERSION = true ? "0.33.1" : "0.0.0-dev";
|
|
2899
2899
|
function failResult(opts, error) {
|
|
2900
2900
|
return {
|
|
2901
2901
|
status: "fail",
|
package/dist/index.js
CHANGED
|
@@ -40,7 +40,7 @@ import {
|
|
|
40
40
|
validateDockerfile,
|
|
41
41
|
warn,
|
|
42
42
|
writeState
|
|
43
|
-
} from "./chunk-
|
|
43
|
+
} from "./chunk-537B2B6W.js";
|
|
44
44
|
|
|
45
45
|
// src/index.ts
|
|
46
46
|
import { Command } from "commander";
|
|
@@ -2507,7 +2507,7 @@ function resolveWorkflow(options) {
|
|
|
2507
2507
|
}
|
|
2508
2508
|
|
|
2509
2509
|
// src/lib/workflow-engine.ts
|
|
2510
|
-
import { readFileSync as readFileSync13, existsSync as existsSync15 } from "fs";
|
|
2510
|
+
import { readFileSync as readFileSync13, existsSync as existsSync15, writeFileSync as writeFileSync8, mkdirSync as mkdirSync6, rmSync as rmSync2 } from "fs";
|
|
2511
2511
|
import { join as join12 } from "path";
|
|
2512
2512
|
import { parse as parse5 } from "yaml";
|
|
2513
2513
|
|
|
@@ -3347,7 +3347,7 @@ async function executeNullTask(task, taskName, storyKey, state, config, previous
|
|
|
3347
3347
|
writeWorkflowState(updatedState, projectDir);
|
|
3348
3348
|
return { updatedState, output: result.output ?? "", contract };
|
|
3349
3349
|
}
|
|
3350
|
-
async function dispatchTaskWithResult(task, taskName, storyKey, definition, state, config, customPrompt, previousOutputContract) {
|
|
3350
|
+
async function dispatchTaskWithResult(task, taskName, storyKey, definition, state, config, customPrompt, previousOutputContract, storyFiles) {
|
|
3351
3351
|
const projectDir = config.projectDir ?? process.cwd();
|
|
3352
3352
|
const traceId = generateTraceId(config.runId, state.iteration, taskName);
|
|
3353
3353
|
const tracePrompt = formatTracePrompt(traceId);
|
|
@@ -3361,7 +3361,7 @@ async function dispatchTaskWithResult(task, taskName, storyKey, definition, stat
|
|
|
3361
3361
|
let workspace = null;
|
|
3362
3362
|
if (task.source_access === false) {
|
|
3363
3363
|
try {
|
|
3364
|
-
workspace = await createIsolatedWorkspace({ runId: config.runId, storyFiles: [] });
|
|
3364
|
+
workspace = await createIsolatedWorkspace({ runId: config.runId, storyFiles: storyFiles ?? [] });
|
|
3365
3365
|
cwd = workspace?.toDispatchOptions()?.cwd ?? projectDir;
|
|
3366
3366
|
} catch {
|
|
3367
3367
|
cwd = projectDir;
|
|
@@ -4030,8 +4030,27 @@ async function executeWorkflow(config) {
|
|
|
4030
4030
|
}
|
|
4031
4031
|
const epicSentinel = `__epic_${epicId}__`;
|
|
4032
4032
|
if (isTaskCompleted(state, taskName, epicSentinel)) continue;
|
|
4033
|
+
let guideFiles = [];
|
|
4034
|
+
if (task.source_access === false) {
|
|
4035
|
+
const guidesDir = join12(projectDir, ".codeharness", "verify-guides");
|
|
4036
|
+
try {
|
|
4037
|
+
mkdirSync6(guidesDir, { recursive: true });
|
|
4038
|
+
for (const item of epicItems) {
|
|
4039
|
+
const contractPath = join12(projectDir, ".codeharness", "contracts", `document-${item.key}.json`);
|
|
4040
|
+
if (existsSync15(contractPath)) {
|
|
4041
|
+
const contractData = JSON.parse(readFileSync13(contractPath, "utf-8"));
|
|
4042
|
+
if (contractData.output) {
|
|
4043
|
+
const guidePath = join12(guidesDir, `${item.key}-guide.md`);
|
|
4044
|
+
writeFileSync8(guidePath, contractData.output, "utf-8");
|
|
4045
|
+
guideFiles.push(guidePath);
|
|
4046
|
+
}
|
|
4047
|
+
}
|
|
4048
|
+
}
|
|
4049
|
+
} catch {
|
|
4050
|
+
}
|
|
4051
|
+
}
|
|
4033
4052
|
try {
|
|
4034
|
-
const dr = await dispatchTaskWithResult(task, taskName, epicSentinel, definition, state, config, void 0, lastOutputContract ?? void 0);
|
|
4053
|
+
const dr = await dispatchTaskWithResult(task, taskName, epicSentinel, definition, state, config, void 0, lastOutputContract ?? void 0, guideFiles);
|
|
4035
4054
|
state = dr.updatedState;
|
|
4036
4055
|
lastOutputContract = dr.contract;
|
|
4037
4056
|
propagateVerifyFlags(taskName, dr.contract, projectDir);
|
|
@@ -4050,6 +4069,14 @@ async function executeWorkflow(config) {
|
|
|
4050
4069
|
if (err instanceof DispatchError && HALT_ERROR_CODES.has(err.code)) {
|
|
4051
4070
|
halted = true;
|
|
4052
4071
|
}
|
|
4072
|
+
} finally {
|
|
4073
|
+
if (guideFiles.length > 0) {
|
|
4074
|
+
const guidesDir = join12(projectDir, ".codeharness", "verify-guides");
|
|
4075
|
+
try {
|
|
4076
|
+
rmSync2(guidesDir, { recursive: true, force: true });
|
|
4077
|
+
} catch {
|
|
4078
|
+
}
|
|
4079
|
+
}
|
|
4053
4080
|
}
|
|
4054
4081
|
}
|
|
4055
4082
|
if (!halted) {
|
|
@@ -4115,7 +4142,7 @@ import { join as join14 } from "path";
|
|
|
4115
4142
|
|
|
4116
4143
|
// src/lib/cross-worktree-validator.ts
|
|
4117
4144
|
import { exec } from "child_process";
|
|
4118
|
-
import { appendFileSync as appendFileSync2, mkdirSync as
|
|
4145
|
+
import { appendFileSync as appendFileSync2, mkdirSync as mkdirSync7 } from "fs";
|
|
4119
4146
|
import { join as join13 } from "path";
|
|
4120
4147
|
import { promisify } from "util";
|
|
4121
4148
|
var execAsync = promisify(exec);
|
|
@@ -4151,7 +4178,7 @@ function writeMergeTelemetry(opts, result) {
|
|
|
4151
4178
|
errors: result.valid ? [] : ["Test suite failed after merge"]
|
|
4152
4179
|
};
|
|
4153
4180
|
const dir = join13(opts.cwd, TELEMETRY_DIR2);
|
|
4154
|
-
|
|
4181
|
+
mkdirSync7(dir, { recursive: true });
|
|
4155
4182
|
appendFileSync2(join13(dir, TELEMETRY_FILE2), JSON.stringify(entry) + "\n");
|
|
4156
4183
|
} catch {
|
|
4157
4184
|
}
|
|
@@ -5435,8 +5462,8 @@ function App({ state, onCycleLane, onQuit }) {
|
|
|
5435
5462
|
] })
|
|
5436
5463
|
] }) : /* @__PURE__ */ jsxs7(Fragment, { children: [
|
|
5437
5464
|
/* @__PURE__ */ jsx7(Separator, {}),
|
|
5438
|
-
/* @__PURE__ */ jsx7(ProgressBar, { done: state.sprintInfo?.done ?? 0, total: state.sprintInfo?.total ?? 0 }),
|
|
5439
|
-
/* @__PURE__ */ jsx7(EpicInfo, { info: state.sprintInfo }),
|
|
5465
|
+
/* @__PURE__ */ jsx7(ProgressBar, { done: state.sprintInfo?.done ?? 0, total: state.sprintInfo?.total ?? 0, inProgress: state.stories.filter((s) => s.status === "in-progress").length }),
|
|
5466
|
+
/* @__PURE__ */ jsx7(EpicInfo, { info: state.sprintInfo, stories: state.stories }),
|
|
5440
5467
|
/* @__PURE__ */ jsx7(StoryContext, { entries: state.storyContext ?? [] }),
|
|
5441
5468
|
/* @__PURE__ */ jsx7(Separator, {}),
|
|
5442
5469
|
/* @__PURE__ */ jsx7(WorkflowGraph, { flow: state.workflowFlow, currentTask: state.currentTaskName, taskStates: state.taskStates }),
|
|
@@ -5472,22 +5499,36 @@ function Header({ info: info3, laneCount }) {
|
|
|
5472
5499
|
/* @__PURE__ */ jsx8(Text8, { dimColor: true, children: right })
|
|
5473
5500
|
] });
|
|
5474
5501
|
}
|
|
5475
|
-
function ProgressBar({ done, total }) {
|
|
5476
|
-
const width = Math.max(10, (process.stdout.columns || 80) -
|
|
5477
|
-
const
|
|
5478
|
-
const
|
|
5479
|
-
const
|
|
5480
|
-
const
|
|
5502
|
+
function ProgressBar({ done, total, inProgress }) {
|
|
5503
|
+
const width = Math.max(10, (process.stdout.columns || 80) - 40);
|
|
5504
|
+
const ip = inProgress ?? 0;
|
|
5505
|
+
const donePct = total > 0 ? done / total : 0;
|
|
5506
|
+
const ipPct = total > 0 ? ip / total : 0;
|
|
5507
|
+
const doneFilled = Math.round(width * donePct);
|
|
5508
|
+
const ipFilled = Math.round(width * ipPct);
|
|
5509
|
+
const empty = Math.max(0, width - doneFilled - ipFilled);
|
|
5510
|
+
const pctStr = total > 0 ? `${Math.round((done + ip) * 100 / total)}%` : "0%";
|
|
5511
|
+
const label = ip > 0 ? `${done} verified + ${ip} in progress / ${total} (${pctStr})` : `${done}/${total} stories (${pctStr})`;
|
|
5481
5512
|
return /* @__PURE__ */ jsxs8(Text8, { children: [
|
|
5482
5513
|
"Progress: ",
|
|
5483
|
-
/* @__PURE__ */ jsx8(Text8, { color: "green", children:
|
|
5484
|
-
|
|
5514
|
+
/* @__PURE__ */ jsx8(Text8, { color: "green", children: "\u2588".repeat(doneFilled) }),
|
|
5515
|
+
/* @__PURE__ */ jsx8(Text8, { color: "yellow", children: "\u2588".repeat(ipFilled) }),
|
|
5516
|
+
/* @__PURE__ */ jsx8(Text8, { children: "\u2591".repeat(empty) }),
|
|
5517
|
+
` ${label}`
|
|
5485
5518
|
] });
|
|
5486
5519
|
}
|
|
5487
|
-
function EpicInfo({ info: info3 }) {
|
|
5520
|
+
function EpicInfo({ info: info3, stories }) {
|
|
5488
5521
|
if (!info3?.epicId) return null;
|
|
5489
5522
|
const title = info3.epicTitle ?? `Epic ${info3.epicId}`;
|
|
5490
|
-
const
|
|
5523
|
+
const epicPrefix2 = `${info3.epicId}-`;
|
|
5524
|
+
const epicStories = stories?.filter((s) => s.key.startsWith(epicPrefix2)) ?? [];
|
|
5525
|
+
const ipCount = epicStories.filter((s) => s.status === "in-progress").length;
|
|
5526
|
+
const doneCount = info3.epicStoriesDone ?? 0;
|
|
5527
|
+
const totalCount = info3.epicStoriesTotal ?? epicStories.length;
|
|
5528
|
+
const progressParts = [];
|
|
5529
|
+
if (doneCount > 0) progressParts.push(`${doneCount} verified`);
|
|
5530
|
+
if (ipCount > 0) progressParts.push(`${ipCount} implemented`);
|
|
5531
|
+
const progress = totalCount > 0 ? ` \u2014 ${progressParts.join(", ")} / ${totalCount} stories` : "";
|
|
5491
5532
|
return /* @__PURE__ */ jsxs8(Text8, { children: [
|
|
5492
5533
|
/* @__PURE__ */ jsx8(Text8, { bold: true, children: `Epic ${info3.epicId}: ${title}` }),
|
|
5493
5534
|
/* @__PURE__ */ jsx8(Text8, { dimColor: true, children: progress })
|
|
@@ -5552,11 +5593,11 @@ function startRenderer(options) {
|
|
|
5552
5593
|
let lastStoryKey = state.sprintInfo?.storyKey ?? null;
|
|
5553
5594
|
const pendingStoryCosts = /* @__PURE__ */ new Map();
|
|
5554
5595
|
let cleaned = false;
|
|
5596
|
+
process.stdout.write("\x1B[2J\x1B[H");
|
|
5555
5597
|
const onQuit = options?.onQuit;
|
|
5556
5598
|
const inkInstance = inkRender(/* @__PURE__ */ jsx9(App, { state, onCycleLane: () => cycleLane(), onQuit: onQuit ? () => onQuit() : void 0 }), {
|
|
5557
5599
|
exitOnCtrlC: false,
|
|
5558
|
-
patchConsole:
|
|
5559
|
-
// Disable console patching to prevent flicker
|
|
5600
|
+
patchConsole: !options?._forceTTY,
|
|
5560
5601
|
maxFps: 10
|
|
5561
5602
|
});
|
|
5562
5603
|
function rerender() {
|
|
@@ -5911,18 +5952,28 @@ function startRenderer(options) {
|
|
|
5911
5952
|
const ctx = [];
|
|
5912
5953
|
const currentStory = currentKey ?? "";
|
|
5913
5954
|
const currentTask = state.currentTaskName ?? "";
|
|
5914
|
-
|
|
5915
|
-
|
|
5916
|
-
|
|
5917
|
-
|
|
5918
|
-
|
|
5919
|
-
|
|
5920
|
-
|
|
5921
|
-
|
|
5922
|
-
|
|
5923
|
-
|
|
5924
|
-
|
|
5925
|
-
|
|
5955
|
+
const epicMatch = currentStory.match(/^Epic (\d+)/);
|
|
5956
|
+
if (epicMatch) {
|
|
5957
|
+
const epicPrefix2 = `${epicMatch[1]}-`;
|
|
5958
|
+
const epicStories = updatedStories.filter((s) => s.key.startsWith(epicPrefix2));
|
|
5959
|
+
const lastDone = [...epicStories].reverse().find((s) => s.status === "done" || s.status === "in-progress");
|
|
5960
|
+
if (lastDone) ctx.push({ key: lastDone.key, role: "prev" });
|
|
5961
|
+
const ipStory = epicStories.find((s) => s.status === "in-progress");
|
|
5962
|
+
if (ipStory) ctx.push({ key: `${epicStories.length} stories in epic`, role: "current" });
|
|
5963
|
+
} else {
|
|
5964
|
+
let foundCurrent = false;
|
|
5965
|
+
let prevKey = null;
|
|
5966
|
+
for (const s of updatedStories) {
|
|
5967
|
+
if (s.key === currentStory) {
|
|
5968
|
+
if (prevKey) ctx.push({ key: prevKey, role: "prev" });
|
|
5969
|
+
ctx.push({ key: s.key, role: "current" });
|
|
5970
|
+
foundCurrent = true;
|
|
5971
|
+
} else if (foundCurrent && (s.status === "pending" || s.status === "in-progress")) {
|
|
5972
|
+
ctx.push({ key: s.key, role: "next" });
|
|
5973
|
+
break;
|
|
5974
|
+
} else if (s.status === "done" || s.status === "in-progress") {
|
|
5975
|
+
prevKey = s.key;
|
|
5976
|
+
}
|
|
5926
5977
|
}
|
|
5927
5978
|
}
|
|
5928
5979
|
state.storyContext = ctx;
|
|
@@ -6199,10 +6250,11 @@ function registerRunCommand(program) {
|
|
|
6199
6250
|
currentTaskName = event.taskName;
|
|
6200
6251
|
const inLoop = inEpicPhase && epicLoopTasks.has(event.taskName) && taskStates[event.taskName] === "done";
|
|
6201
6252
|
const stateKey = inLoop ? `loop:${event.taskName}` : event.taskName;
|
|
6202
|
-
const epicId = extractEpicId2(event.storyKey);
|
|
6253
|
+
const epicId = event.storyKey.startsWith("__epic_") ? event.storyKey.replace("__epic_", "").replace("__", "") : extractEpicId2(event.storyKey);
|
|
6254
|
+
const displayStoryKey = event.storyKey.startsWith("__epic_") ? `Epic ${epicId}` : event.storyKey;
|
|
6203
6255
|
const epic = epicData[epicId];
|
|
6204
6256
|
renderer.updateSprintState({
|
|
6205
|
-
storyKey:
|
|
6257
|
+
storyKey: displayStoryKey,
|
|
6206
6258
|
phase: event.taskName,
|
|
6207
6259
|
done: storiesDone,
|
|
6208
6260
|
total: counts.total,
|
|
@@ -6223,6 +6275,10 @@ function registerRunCommand(program) {
|
|
|
6223
6275
|
renderer.updateStories([...storyEntries]);
|
|
6224
6276
|
}
|
|
6225
6277
|
}
|
|
6278
|
+
if (isEpicTask) {
|
|
6279
|
+
const epicStories = storyEntries.filter((s) => s.key.startsWith(`${epicId}-`));
|
|
6280
|
+
renderer.updateStories([...storyEntries]);
|
|
6281
|
+
}
|
|
6226
6282
|
}
|
|
6227
6283
|
if (event.type === "dispatch-end") {
|
|
6228
6284
|
totalCostUsd += event.costUsd ?? 0;
|
|
@@ -6398,22 +6454,6 @@ import { readFileSync as readFileSync24 } from "fs";
|
|
|
6398
6454
|
|
|
6399
6455
|
// src/modules/verify/proof.ts
|
|
6400
6456
|
import { existsSync as existsSync18, readFileSync as readFileSync15 } from "fs";
|
|
6401
|
-
|
|
6402
|
-
// src/modules/verify/types.ts
|
|
6403
|
-
var TIER_HIERARCHY = [
|
|
6404
|
-
"test-provable",
|
|
6405
|
-
"runtime-provable",
|
|
6406
|
-
"environment-provable",
|
|
6407
|
-
"escalate"
|
|
6408
|
-
];
|
|
6409
|
-
var LEGACY_TIER_MAP = {
|
|
6410
|
-
"cli-verifiable": "test-provable",
|
|
6411
|
-
"integration-required": "environment-provable",
|
|
6412
|
-
"unit-testable": "test-provable",
|
|
6413
|
-
"black-box": "environment-provable"
|
|
6414
|
-
};
|
|
6415
|
-
|
|
6416
|
-
// src/modules/verify/proof.ts
|
|
6417
6457
|
function classifyEvidenceCommands(proofContent) {
|
|
6418
6458
|
const results = [];
|
|
6419
6459
|
const codeBlockPattern = /```(?:bash|shell)\n([\s\S]*?)```/g;
|
|
@@ -6503,15 +6543,7 @@ function validateProofQuality(proofPath) {
|
|
|
6503
6543
|
return emptyResult;
|
|
6504
6544
|
}
|
|
6505
6545
|
const content = readFileSync15(proofPath, "utf-8");
|
|
6506
|
-
const
|
|
6507
|
-
const uniqueTierNames = [...new Set(allTierNames)];
|
|
6508
|
-
const tierPattern = new RegExp(`\\*\\*Tier:\\*\\*\\s*(${uniqueTierNames.join("|")})`, "i");
|
|
6509
|
-
const bbTierMatch = tierPattern.exec(content);
|
|
6510
|
-
const rawTierValue = bbTierMatch ? bbTierMatch[1].toLowerCase() : null;
|
|
6511
|
-
const normalizedTier = rawTierValue ? LEGACY_TIER_MAP[rawTierValue] ?? (TIER_HIERARCHY.includes(rawTierValue) ? rawTierValue : null) : null;
|
|
6512
|
-
const skipDockerEnforcement = normalizedTier !== null && normalizedTier !== "environment-provable";
|
|
6513
|
-
const bbRawEnforcement = checkBlackBoxEnforcement(content);
|
|
6514
|
-
const bbEnforcement = skipDockerEnforcement ? { ...bbRawEnforcement, blackBoxPass: true } : bbRawEnforcement;
|
|
6546
|
+
const bbEnforcement = checkBlackBoxEnforcement(content);
|
|
6515
6547
|
function buildResult(base) {
|
|
6516
6548
|
const basePassed = base.pending === 0 && base.verified > 0;
|
|
6517
6549
|
return {
|
|
@@ -6651,7 +6683,7 @@ function validateProofQuality(proofPath) {
|
|
|
6651
6683
|
|
|
6652
6684
|
// src/modules/verify/orchestrator.ts
|
|
6653
6685
|
import { execFileSync } from "child_process";
|
|
6654
|
-
import { mkdirSync as
|
|
6686
|
+
import { mkdirSync as mkdirSync9, writeFileSync as writeFileSync10 } from "fs";
|
|
6655
6687
|
import { join as join20 } from "path";
|
|
6656
6688
|
|
|
6657
6689
|
// src/lib/doc-health/types.ts
|
|
@@ -7113,10 +7145,10 @@ function checkAgentsMdLineCount(filePath, docPath, documents) {
|
|
|
7113
7145
|
// src/lib/doc-health/report.ts
|
|
7114
7146
|
import {
|
|
7115
7147
|
existsSync as existsSync21,
|
|
7116
|
-
mkdirSync as
|
|
7148
|
+
mkdirSync as mkdirSync8,
|
|
7117
7149
|
readFileSync as readFileSync18,
|
|
7118
7150
|
unlinkSync as unlinkSync2,
|
|
7119
|
-
writeFileSync as
|
|
7151
|
+
writeFileSync as writeFileSync9
|
|
7120
7152
|
} from "fs";
|
|
7121
7153
|
import { join as join19 } from "path";
|
|
7122
7154
|
function printDocHealthOutput(report) {
|
|
@@ -7152,9 +7184,9 @@ function completeExecPlan(storyId, dir) {
|
|
|
7152
7184
|
Completed: ${timestamp}`
|
|
7153
7185
|
);
|
|
7154
7186
|
const completedDir = join19(root, "docs", "exec-plans", "completed");
|
|
7155
|
-
|
|
7187
|
+
mkdirSync8(completedDir, { recursive: true });
|
|
7156
7188
|
const completedPath = join19(completedDir, `${storyId}.md`);
|
|
7157
|
-
|
|
7189
|
+
writeFileSync9(completedPath, content, "utf-8");
|
|
7158
7190
|
try {
|
|
7159
7191
|
unlinkSync2(activePath);
|
|
7160
7192
|
} catch {
|
|
@@ -7196,9 +7228,9 @@ function checkPreconditions(dir, storyId) {
|
|
|
7196
7228
|
function createProofDocument(storyId, _storyTitle, _acs, dir) {
|
|
7197
7229
|
const root = dir ?? process.cwd();
|
|
7198
7230
|
const verificationDir = join20(root, "verification");
|
|
7199
|
-
|
|
7231
|
+
mkdirSync9(verificationDir, { recursive: true });
|
|
7200
7232
|
const proofPath = join20(verificationDir, `${storyId}-proof.md`);
|
|
7201
|
-
|
|
7233
|
+
writeFileSync10(proofPath, `# ${storyId} \u2014 Proof
|
|
7202
7234
|
|
|
7203
7235
|
Pending: blind evaluator (Epic 6)
|
|
7204
7236
|
`, "utf-8");
|
|
@@ -7261,87 +7293,8 @@ var DB_KEYWORDS = [
|
|
|
7261
7293
|
"sql",
|
|
7262
7294
|
"table"
|
|
7263
7295
|
];
|
|
7264
|
-
var INTEGRATION_KEYWORDS = [
|
|
7265
|
-
"external system",
|
|
7266
|
-
"real infrastructure",
|
|
7267
|
-
"manual verification"
|
|
7268
|
-
];
|
|
7269
|
-
var ESCALATE_KEYWORDS = [
|
|
7270
|
-
"physical hardware",
|
|
7271
|
-
"manual human",
|
|
7272
|
-
"visual inspection by human",
|
|
7273
|
-
"paid external service"
|
|
7274
|
-
];
|
|
7275
|
-
var RUNTIME_PROVABLE_KEYWORDS = [
|
|
7276
|
-
"cli command",
|
|
7277
|
-
"api endpoint",
|
|
7278
|
-
"http",
|
|
7279
|
-
"server",
|
|
7280
|
-
"output shows",
|
|
7281
|
-
"exit code",
|
|
7282
|
-
"binary",
|
|
7283
|
-
"runs and produces",
|
|
7284
|
-
"cli outputs",
|
|
7285
|
-
"when run"
|
|
7286
|
-
];
|
|
7287
|
-
var ENVIRONMENT_PROVABLE_KEYWORDS = [
|
|
7288
|
-
"docker",
|
|
7289
|
-
"container",
|
|
7290
|
-
"observability",
|
|
7291
|
-
"telemetry",
|
|
7292
|
-
"database",
|
|
7293
|
-
"queue",
|
|
7294
|
-
"distributed",
|
|
7295
|
-
"multi-service",
|
|
7296
|
-
"end-to-end",
|
|
7297
|
-
"victorialogs"
|
|
7298
|
-
];
|
|
7299
|
-
var ESCALATE_TIER_KEYWORDS = [
|
|
7300
|
-
"physical hardware",
|
|
7301
|
-
"human visual",
|
|
7302
|
-
"paid service",
|
|
7303
|
-
"gpu",
|
|
7304
|
-
"manual inspection",
|
|
7305
|
-
"physical display"
|
|
7306
|
-
];
|
|
7307
7296
|
|
|
7308
7297
|
// src/modules/verify/parser.ts
|
|
7309
|
-
function classifyVerifiability(description) {
|
|
7310
|
-
const lower = description.toLowerCase();
|
|
7311
|
-
for (const kw of INTEGRATION_KEYWORDS) {
|
|
7312
|
-
if (lower.includes(kw)) return "integration-required";
|
|
7313
|
-
}
|
|
7314
|
-
return "cli-verifiable";
|
|
7315
|
-
}
|
|
7316
|
-
function classifyStrategy(description) {
|
|
7317
|
-
const lower = description.toLowerCase();
|
|
7318
|
-
for (const kw of ESCALATE_KEYWORDS) {
|
|
7319
|
-
if (lower.includes(kw)) return "escalate";
|
|
7320
|
-
}
|
|
7321
|
-
return "docker";
|
|
7322
|
-
}
|
|
7323
|
-
function classifyTier(description) {
|
|
7324
|
-
const lower = description.toLowerCase();
|
|
7325
|
-
for (const kw of ESCALATE_TIER_KEYWORDS) {
|
|
7326
|
-
if (lower.includes(kw)) return "escalate";
|
|
7327
|
-
}
|
|
7328
|
-
for (const kw of ENVIRONMENT_PROVABLE_KEYWORDS) {
|
|
7329
|
-
if (lower.includes(kw)) return "environment-provable";
|
|
7330
|
-
}
|
|
7331
|
-
for (const kw of RUNTIME_PROVABLE_KEYWORDS) {
|
|
7332
|
-
if (lower.includes(kw)) return "runtime-provable";
|
|
7333
|
-
}
|
|
7334
|
-
return "test-provable";
|
|
7335
|
-
}
|
|
7336
|
-
var VERIFICATION_TAG_PATTERN = /<!--\s*verification:\s*(cli-verifiable|integration-required|unit-testable|black-box|test-provable|runtime-provable|environment-provable|escalate)\s*-->/;
|
|
7337
|
-
function parseVerificationTag(text) {
|
|
7338
|
-
const match = VERIFICATION_TAG_PATTERN.exec(text);
|
|
7339
|
-
if (!match) return null;
|
|
7340
|
-
const raw = match[1];
|
|
7341
|
-
const mapped = LEGACY_TIER_MAP[raw] ?? raw;
|
|
7342
|
-
if (!TIER_HIERARCHY.includes(mapped)) return null;
|
|
7343
|
-
return mapped;
|
|
7344
|
-
}
|
|
7345
7298
|
function classifyAC(description) {
|
|
7346
7299
|
const lower = description.toLowerCase();
|
|
7347
7300
|
for (const kw of UI_KEYWORDS) {
|
|
@@ -7391,17 +7344,10 @@ function parseStoryACs(storyFilePath) {
|
|
|
7391
7344
|
if (currentId !== null && currentDesc.length > 0) {
|
|
7392
7345
|
const description = currentDesc.join(" ").trim();
|
|
7393
7346
|
if (description) {
|
|
7394
|
-
const tag = parseVerificationTag(description);
|
|
7395
|
-
const tier = tag ?? classifyTier(description);
|
|
7396
|
-
const verifiability = classifyVerifiability(description);
|
|
7397
|
-
const strategy = classifyStrategy(description);
|
|
7398
7347
|
acs.push({
|
|
7399
7348
|
id: currentId,
|
|
7400
7349
|
description,
|
|
7401
|
-
type: classifyAC(description)
|
|
7402
|
-
verifiability,
|
|
7403
|
-
strategy,
|
|
7404
|
-
tier
|
|
7350
|
+
type: classifyAC(description)
|
|
7405
7351
|
});
|
|
7406
7352
|
} else {
|
|
7407
7353
|
warn(`Skipping malformed AC #${currentId}: empty description`);
|
|
@@ -7579,7 +7525,7 @@ function normalizeSeverity(severity) {
|
|
|
7579
7525
|
}
|
|
7580
7526
|
|
|
7581
7527
|
// src/modules/observability/coverage.ts
|
|
7582
|
-
import { readFileSync as readFileSync20, writeFileSync as
|
|
7528
|
+
import { readFileSync as readFileSync20, writeFileSync as writeFileSync11, renameSync as renameSync3, existsSync as existsSync24 } from "fs";
|
|
7583
7529
|
import { join as join22 } from "path";
|
|
7584
7530
|
var STATE_FILE2 = "sprint-state.json";
|
|
7585
7531
|
var DEFAULT_STATIC_TARGET = 80;
|
|
@@ -7668,7 +7614,7 @@ function parseGapArray(raw) {
|
|
|
7668
7614
|
}
|
|
7669
7615
|
|
|
7670
7616
|
// src/modules/observability/runtime-coverage.ts
|
|
7671
|
-
import { readFileSync as readFileSync21, writeFileSync as
|
|
7617
|
+
import { readFileSync as readFileSync21, writeFileSync as writeFileSync12, renameSync as renameSync4, existsSync as existsSync25 } from "fs";
|
|
7672
7618
|
import { join as join23 } from "path";
|
|
7673
7619
|
|
|
7674
7620
|
// src/modules/observability/coverage-gate.ts
|
|
@@ -8510,7 +8456,7 @@ function getACById(id) {
|
|
|
8510
8456
|
|
|
8511
8457
|
// src/modules/verify/validation-runner.ts
|
|
8512
8458
|
import { execSync as execSync5 } from "child_process";
|
|
8513
|
-
import { writeFileSync as
|
|
8459
|
+
import { writeFileSync as writeFileSync13, mkdirSync as mkdirSync10 } from "fs";
|
|
8514
8460
|
import { join as join25, dirname as dirname3 } from "path";
|
|
8515
8461
|
var MAX_VALIDATION_ATTEMPTS = 10;
|
|
8516
8462
|
var AC_COMMAND_TIMEOUT_MS = 3e4;
|
|
@@ -8663,8 +8609,8 @@ function createFixStory(ac, error) {
|
|
|
8663
8609
|
"Fix the root cause so the validation command passes.",
|
|
8664
8610
|
""
|
|
8665
8611
|
].join("\n");
|
|
8666
|
-
|
|
8667
|
-
|
|
8612
|
+
mkdirSync10(dirname3(storyPath), { recursive: true });
|
|
8613
|
+
writeFileSync13(storyPath, markdown, "utf-8");
|
|
8668
8614
|
return ok2(storyKey);
|
|
8669
8615
|
} catch (err) {
|
|
8670
8616
|
const msg = err instanceof Error ? err.message : String(err);
|
|
@@ -8990,7 +8936,7 @@ function runValidationCycle() {
|
|
|
8990
8936
|
|
|
8991
8937
|
// src/modules/verify/env.ts
|
|
8992
8938
|
import { execFileSync as execFileSync5 } from "child_process";
|
|
8993
|
-
import { existsSync as existsSync27, mkdirSync as
|
|
8939
|
+
import { existsSync as existsSync27, mkdirSync as mkdirSync11, readdirSync as readdirSync7, readFileSync as readFileSync23, writeFileSync as writeFileSync14, cpSync, rmSync as rmSync3, statSync as statSync6 } from "fs";
|
|
8994
8940
|
import { join as join27, basename as basename2 } from "path";
|
|
8995
8941
|
import { createHash } from "crypto";
|
|
8996
8942
|
|
|
@@ -9139,7 +9085,7 @@ function buildNodeImage(projectDir) {
|
|
|
9139
9085
|
const tarballName = basename2(lastLine);
|
|
9140
9086
|
const tarballPath = join27("/tmp", tarballName);
|
|
9141
9087
|
const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
|
|
9142
|
-
|
|
9088
|
+
mkdirSync11(buildContext, { recursive: true });
|
|
9143
9089
|
try {
|
|
9144
9090
|
cpSync(tarballPath, join27(buildContext, tarballName));
|
|
9145
9091
|
const dockerfile = generateVerifyDockerfile(projectDir) + `
|
|
@@ -9148,15 +9094,15 @@ ARG TARBALL=package.tgz
|
|
|
9148
9094
|
COPY \${TARBALL} /tmp/\${TARBALL}
|
|
9149
9095
|
RUN npm install -g /tmp/\${TARBALL} && rm /tmp/\${TARBALL}
|
|
9150
9096
|
`;
|
|
9151
|
-
|
|
9097
|
+
writeFileSync14(join27(buildContext, "Dockerfile"), dockerfile);
|
|
9152
9098
|
execFileSync5("docker", ["build", "-t", IMAGE_TAG, "--build-arg", `TARBALL=${tarballName}`, "."], {
|
|
9153
9099
|
cwd: buildContext,
|
|
9154
9100
|
stdio: "pipe",
|
|
9155
9101
|
timeout: 12e4
|
|
9156
9102
|
});
|
|
9157
9103
|
} finally {
|
|
9158
|
-
|
|
9159
|
-
|
|
9104
|
+
rmSync3(buildContext, { recursive: true, force: true });
|
|
9105
|
+
rmSync3(tarballPath, { force: true });
|
|
9160
9106
|
}
|
|
9161
9107
|
}
|
|
9162
9108
|
function buildPythonImage(projectDir) {
|
|
@@ -9167,7 +9113,7 @@ function buildPythonImage(projectDir) {
|
|
|
9167
9113
|
}
|
|
9168
9114
|
const distFile = distFiles.filter((f) => f.endsWith(".tar.gz"))[0] ?? distFiles[0];
|
|
9169
9115
|
const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
|
|
9170
|
-
|
|
9116
|
+
mkdirSync11(buildContext, { recursive: true });
|
|
9171
9117
|
try {
|
|
9172
9118
|
cpSync(join27(distDir, distFile), join27(buildContext, distFile));
|
|
9173
9119
|
const dockerfile = generateVerifyDockerfile(projectDir) + `
|
|
@@ -9175,14 +9121,14 @@ function buildPythonImage(projectDir) {
|
|
|
9175
9121
|
COPY ${distFile} /tmp/${distFile}
|
|
9176
9122
|
RUN pip install --break-system-packages /tmp/${distFile} && rm /tmp/${distFile}
|
|
9177
9123
|
`;
|
|
9178
|
-
|
|
9124
|
+
writeFileSync14(join27(buildContext, "Dockerfile"), dockerfile);
|
|
9179
9125
|
execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
|
|
9180
9126
|
cwd: buildContext,
|
|
9181
9127
|
stdio: "pipe",
|
|
9182
9128
|
timeout: 12e4
|
|
9183
9129
|
});
|
|
9184
9130
|
} finally {
|
|
9185
|
-
|
|
9131
|
+
rmSync3(buildContext, { recursive: true, force: true });
|
|
9186
9132
|
}
|
|
9187
9133
|
}
|
|
9188
9134
|
function prepareVerifyWorkspace(storyKey, projectDir) {
|
|
@@ -9193,8 +9139,8 @@ function prepareVerifyWorkspace(storyKey, projectDir) {
|
|
|
9193
9139
|
const storyFile = join27(root, STORY_DIR, `${storyKey}.md`);
|
|
9194
9140
|
if (!existsSync27(storyFile)) throw new Error(`Story file not found: ${storyFile}`);
|
|
9195
9141
|
const workspace = `${TEMP_PREFIX}${storyKey}`;
|
|
9196
|
-
if (existsSync27(workspace))
|
|
9197
|
-
|
|
9142
|
+
if (existsSync27(workspace)) rmSync3(workspace, { recursive: true, force: true });
|
|
9143
|
+
mkdirSync11(workspace, { recursive: true });
|
|
9198
9144
|
cpSync(storyFile, join27(workspace, "story.md"));
|
|
9199
9145
|
const readmePath = join27(root, "README.md");
|
|
9200
9146
|
if (existsSync27(readmePath)) cpSync(readmePath, join27(workspace, "README.md"));
|
|
@@ -9202,7 +9148,7 @@ function prepareVerifyWorkspace(storyKey, projectDir) {
|
|
|
9202
9148
|
if (existsSync27(docsDir) && statSync6(docsDir).isDirectory()) {
|
|
9203
9149
|
cpSync(docsDir, join27(workspace, "docs"), { recursive: true });
|
|
9204
9150
|
}
|
|
9205
|
-
|
|
9151
|
+
mkdirSync11(join27(workspace, "verification"), { recursive: true });
|
|
9206
9152
|
return workspace;
|
|
9207
9153
|
}
|
|
9208
9154
|
function checkVerifyEnv() {
|
|
@@ -9244,7 +9190,7 @@ function cleanupVerifyEnv(storyKey) {
|
|
|
9244
9190
|
}
|
|
9245
9191
|
const workspace = `${TEMP_PREFIX}${storyKey}`;
|
|
9246
9192
|
const containerName = `codeharness-verify-${storyKey}`;
|
|
9247
|
-
if (existsSync27(workspace))
|
|
9193
|
+
if (existsSync27(workspace)) rmSync3(workspace, { recursive: true, force: true });
|
|
9248
9194
|
try {
|
|
9249
9195
|
execFileSync5("docker", ["stop", containerName], { stdio: "pipe", timeout: 15e3 });
|
|
9250
9196
|
} catch {
|
|
@@ -9256,7 +9202,7 @@ function cleanupVerifyEnv(storyKey) {
|
|
|
9256
9202
|
}
|
|
9257
9203
|
function buildPluginImage(projectDir) {
|
|
9258
9204
|
const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
|
|
9259
|
-
|
|
9205
|
+
mkdirSync11(buildContext, { recursive: true });
|
|
9260
9206
|
try {
|
|
9261
9207
|
const pluginDir = join27(projectDir, ".claude-plugin");
|
|
9262
9208
|
cpSync(pluginDir, join27(buildContext, ".claude-plugin"), { recursive: true });
|
|
@@ -9266,28 +9212,28 @@ function buildPluginImage(projectDir) {
|
|
|
9266
9212
|
cpSync(src, join27(buildContext, dir), { recursive: true });
|
|
9267
9213
|
}
|
|
9268
9214
|
}
|
|
9269
|
-
|
|
9215
|
+
writeFileSync14(join27(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
|
|
9270
9216
|
execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
|
|
9271
9217
|
cwd: buildContext,
|
|
9272
9218
|
stdio: "pipe",
|
|
9273
9219
|
timeout: 12e4
|
|
9274
9220
|
});
|
|
9275
9221
|
} finally {
|
|
9276
|
-
|
|
9222
|
+
rmSync3(buildContext, { recursive: true, force: true });
|
|
9277
9223
|
}
|
|
9278
9224
|
}
|
|
9279
9225
|
function buildSimpleImage(projectDir, timeout = 12e4) {
|
|
9280
9226
|
const buildContext = join27("/tmp", `codeharness-verify-build-${Date.now()}`);
|
|
9281
|
-
|
|
9227
|
+
mkdirSync11(buildContext, { recursive: true });
|
|
9282
9228
|
try {
|
|
9283
|
-
|
|
9229
|
+
writeFileSync14(join27(buildContext, "Dockerfile"), generateVerifyDockerfile(projectDir));
|
|
9284
9230
|
execFileSync5("docker", ["build", "-t", IMAGE_TAG, "."], {
|
|
9285
9231
|
cwd: buildContext,
|
|
9286
9232
|
stdio: "pipe",
|
|
9287
9233
|
timeout
|
|
9288
9234
|
});
|
|
9289
9235
|
} finally {
|
|
9290
|
-
|
|
9236
|
+
rmSync3(buildContext, { recursive: true, force: true });
|
|
9291
9237
|
}
|
|
9292
9238
|
}
|
|
9293
9239
|
function dockerImageExists(tag) {
|
|
@@ -10885,7 +10831,7 @@ function formatAuditJson(result) {
|
|
|
10885
10831
|
}
|
|
10886
10832
|
|
|
10887
10833
|
// src/modules/audit/fix-generator.ts
|
|
10888
|
-
import { existsSync as existsSync34, writeFileSync as
|
|
10834
|
+
import { existsSync as existsSync34, writeFileSync as writeFileSync15, mkdirSync as mkdirSync12 } from "fs";
|
|
10889
10835
|
import { join as join33, dirname as dirname5 } from "path";
|
|
10890
10836
|
function buildStoryKey(gap2, index) {
|
|
10891
10837
|
const safeDimension = gap2.dimension.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/(^-|-$)/g, "");
|
|
@@ -10940,8 +10886,8 @@ function generateFixStories(auditResult) {
|
|
|
10940
10886
|
continue;
|
|
10941
10887
|
}
|
|
10942
10888
|
const markdown = buildStoryMarkdown(gap2, key);
|
|
10943
|
-
|
|
10944
|
-
|
|
10889
|
+
mkdirSync12(dirname5(filePath), { recursive: true });
|
|
10890
|
+
writeFileSync15(filePath, markdown, "utf-8");
|
|
10945
10891
|
stories.push({ key, filePath, gap: gap2, skipped: false });
|
|
10946
10892
|
created++;
|
|
10947
10893
|
}
|
|
@@ -11117,7 +11063,7 @@ function registerOnboardCommand(program) {
|
|
|
11117
11063
|
}
|
|
11118
11064
|
|
|
11119
11065
|
// src/commands/teardown.ts
|
|
11120
|
-
import { existsSync as existsSync35, unlinkSync as unlinkSync3, readFileSync as readFileSync29, writeFileSync as
|
|
11066
|
+
import { existsSync as existsSync35, unlinkSync as unlinkSync3, readFileSync as readFileSync29, writeFileSync as writeFileSync16, rmSync as rmSync4 } from "fs";
|
|
11121
11067
|
import { join as join34 } from "path";
|
|
11122
11068
|
function buildDefaultResult() {
|
|
11123
11069
|
return {
|
|
@@ -11164,7 +11110,7 @@ function registerTeardownCommand(program) {
|
|
|
11164
11110
|
} else if (otlpMode === "remote-routed") {
|
|
11165
11111
|
if (!options.keepDocker) {
|
|
11166
11112
|
try {
|
|
11167
|
-
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-
|
|
11113
|
+
const { stopCollectorOnly: stopCollectorOnly2 } = await import("./docker-ZMY7GX5P.js");
|
|
11168
11114
|
stopCollectorOnly2();
|
|
11169
11115
|
result.docker.stopped = true;
|
|
11170
11116
|
if (!isJson) {
|
|
@@ -11196,7 +11142,7 @@ function registerTeardownCommand(program) {
|
|
|
11196
11142
|
info("Shared stack: kept running (other projects may use it)");
|
|
11197
11143
|
}
|
|
11198
11144
|
} else if (isLegacyStack) {
|
|
11199
|
-
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-
|
|
11145
|
+
const { isStackRunning: isStackRunning2, stopStack } = await import("./docker-ZMY7GX5P.js");
|
|
11200
11146
|
let stackRunning = false;
|
|
11201
11147
|
try {
|
|
11202
11148
|
stackRunning = isStackRunning2(composeFile);
|
|
@@ -11266,7 +11212,7 @@ function registerTeardownCommand(program) {
|
|
|
11266
11212
|
for (const key of keysToRemove) {
|
|
11267
11213
|
delete scripts[key];
|
|
11268
11214
|
}
|
|
11269
|
-
|
|
11215
|
+
writeFileSync16(pkgPath, JSON.stringify(pkg, null, 2) + "\n", "utf-8");
|
|
11270
11216
|
result.otlp_cleaned = true;
|
|
11271
11217
|
if (!isJson) {
|
|
11272
11218
|
ok("OTLP: removed instrumented scripts from package.json");
|
|
@@ -11294,7 +11240,7 @@ function registerTeardownCommand(program) {
|
|
|
11294
11240
|
}
|
|
11295
11241
|
const harnessDir = join34(projectDir, ".harness");
|
|
11296
11242
|
if (existsSync35(harnessDir)) {
|
|
11297
|
-
|
|
11243
|
+
rmSync4(harnessDir, { recursive: true, force: true });
|
|
11298
11244
|
result.removed.push(".harness/");
|
|
11299
11245
|
if (!isJson) {
|
|
11300
11246
|
ok("Removed: .harness/");
|
|
@@ -12100,7 +12046,7 @@ function isDuplicate(newItem, existingTitles, threshold = 0.8) {
|
|
|
12100
12046
|
}
|
|
12101
12047
|
|
|
12102
12048
|
// src/lib/issue-tracker.ts
|
|
12103
|
-
import { existsSync as existsSync36, readFileSync as readFileSync30, writeFileSync as
|
|
12049
|
+
import { existsSync as existsSync36, readFileSync as readFileSync30, writeFileSync as writeFileSync17, mkdirSync as mkdirSync13 } from "fs";
|
|
12104
12050
|
import { join as join35 } from "path";
|
|
12105
12051
|
import { parse as parse6, stringify as stringify3 } from "yaml";
|
|
12106
12052
|
var VALID_PRIORITIES = /* @__PURE__ */ new Set([
|
|
@@ -12129,9 +12075,9 @@ function writeIssues(data, dir = process.cwd()) {
|
|
|
12129
12075
|
const filePath = issuesPath(dir);
|
|
12130
12076
|
const dirPath = join35(dir, ".codeharness");
|
|
12131
12077
|
if (!existsSync36(dirPath)) {
|
|
12132
|
-
|
|
12078
|
+
mkdirSync13(dirPath, { recursive: true });
|
|
12133
12079
|
}
|
|
12134
|
-
|
|
12080
|
+
writeFileSync17(filePath, stringify3(data, { nullStr: "" }), "utf-8");
|
|
12135
12081
|
}
|
|
12136
12082
|
function nextIssueId(existing) {
|
|
12137
12083
|
let max = 0;
|
|
@@ -13117,7 +13063,7 @@ function registerAuditCommand(program) {
|
|
|
13117
13063
|
}
|
|
13118
13064
|
|
|
13119
13065
|
// src/commands/stats.ts
|
|
13120
|
-
import { existsSync as existsSync39, readdirSync as readdirSync10, readFileSync as readFileSync32, writeFileSync as
|
|
13066
|
+
import { existsSync as existsSync39, readdirSync as readdirSync10, readFileSync as readFileSync32, writeFileSync as writeFileSync18 } from "fs";
|
|
13121
13067
|
import { join as join38 } from "path";
|
|
13122
13068
|
var RATES = {
|
|
13123
13069
|
input: 15,
|
|
@@ -13327,7 +13273,7 @@ function registerStatsCommand(program) {
|
|
|
13327
13273
|
console.log(formatted);
|
|
13328
13274
|
if (options.save) {
|
|
13329
13275
|
const outPath = join38(projectDir, "_bmad-output", "implementation-artifacts", "cost-report.md");
|
|
13330
|
-
|
|
13276
|
+
writeFileSync18(outPath, formatted, "utf-8");
|
|
13331
13277
|
ok(`Report saved to ${outPath}`);
|
|
13332
13278
|
}
|
|
13333
13279
|
});
|
|
@@ -14183,7 +14129,7 @@ function registerDriversCommand(program) {
|
|
|
14183
14129
|
}
|
|
14184
14130
|
|
|
14185
14131
|
// src/index.ts
|
|
14186
|
-
var VERSION = true ? "0.
|
|
14132
|
+
var VERSION = true ? "0.33.1" : "0.0.0-dev";
|
|
14187
14133
|
function createProgram() {
|
|
14188
14134
|
const program = new Command();
|
|
14189
14135
|
program.name("codeharness").description("Makes autonomous coding agents produce software that actually works").version(VERSION).option("--json", "Output in machine-readable JSON format");
|
package/package.json
CHANGED
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
Dev agents repeatedly shipped code without reading module conventions (AGENTS.md),
|
|
4
4
|
skipped observability checks, and produced features that could not be verified
|
|
5
5
|
from outside the source tree. This patch enforces architecture awareness,
|
|
6
|
-
observability validation, documentation hygiene, test coverage gates
|
|
7
|
-
|
|
6
|
+
observability validation, documentation hygiene, and test coverage gates
|
|
7
|
+
— all operational failures observed in prior sprints.
|
|
8
8
|
(FR33, FR34, NFR20)
|
|
9
9
|
|
|
10
10
|
## Codeharness Development Enforcement
|
|
@@ -35,23 +35,14 @@ After running tests, verify telemetry is flowing:
|
|
|
35
35
|
- Coverage gate: 100% of new/changed code
|
|
36
36
|
- Run `npm test` / `pytest` and verify no regressions
|
|
37
37
|
|
|
38
|
-
### Verification
|
|
38
|
+
### Verification Readiness
|
|
39
39
|
|
|
40
|
-
Write code that can be verified
|
|
40
|
+
Write code that can be verified via Docker-based blind verification. Ask yourself:
|
|
41
|
+
- Are my functions testable and my outputs greppable?
|
|
42
|
+
- Can I run the CLI/server and verify output?
|
|
43
|
+
- Does `docker exec` work? Are logs flowing to the observability stack?
|
|
41
44
|
|
|
42
|
-
|
|
43
|
-
- **`runtime-provable`** — Code must be exercisable via CLI or local server. Ensure the binary/CLI produces verifiable stdout, exit codes, or HTTP responses without needing Docker.
|
|
44
|
-
- **`environment-provable`** — Code must work in a Docker verification environment. Ensure the Dockerfile is current, services start correctly, and `docker exec` can exercise the feature. Observability queries should return expected log/trace events.
|
|
45
|
-
- **`escalate`** — Reserved for ACs that genuinely cannot be automated (physical hardware, paid external APIs). This is rare — exhaust all automated approaches first.
|
|
46
|
-
|
|
47
|
-
Ask yourself:
|
|
48
|
-
- What tier is this story tagged with?
|
|
49
|
-
- Does my implementation produce the evidence that tier requires?
|
|
50
|
-
- If `test-provable`: are my functions testable and my outputs greppable?
|
|
51
|
-
- If `runtime-provable`: can I run the CLI/server and verify output locally?
|
|
52
|
-
- If `environment-provable`: does `docker exec` work? Are logs flowing to the observability stack?
|
|
53
|
-
|
|
54
|
-
If the answer is "no", the feature has a testability gap — fix the code to be verifiable at the appropriate tier.
|
|
45
|
+
If the answer is "no", the feature has a testability gap — fix the code to be verifiable.
|
|
55
46
|
|
|
56
47
|
### Dockerfile Maintenance
|
|
57
48
|
|
|
@@ -20,7 +20,7 @@ quality trends, and mandatory concrete action items with owners.
|
|
|
20
20
|
|
|
21
21
|
- Did the verifier hang on permissions? (check for `--allowedTools` issues)
|
|
22
22
|
- Did stories get stuck in verify→dev loops? (check `attempts` counter)
|
|
23
|
-
- Were stories assigned the wrong verification
|
|
23
|
+
- Were stories assigned the wrong verification method?
|
|
24
24
|
- Did the verify parser correctly detect `[FAIL]` verdicts?
|
|
25
25
|
|
|
26
26
|
### Documentation Health
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
## WHY
|
|
2
2
|
|
|
3
3
|
Review agents approved stories without verifying proof documents existed or
|
|
4
|
-
checking that evidence
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
checking that evidence was real. Stories passed review with fabricated output
|
|
5
|
+
and missing coverage data. This patch enforces proof existence, evidence
|
|
6
|
+
quality, and coverage delta reporting as hard gates before a story can leave
|
|
7
|
+
review.
|
|
8
8
|
(FR33, FR34, NFR20)
|
|
9
9
|
|
|
10
10
|
## Codeharness Review Gates
|
|
@@ -18,34 +18,12 @@ gates before a story can leave review.
|
|
|
18
18
|
|
|
19
19
|
### Proof Quality Checks
|
|
20
20
|
|
|
21
|
-
The proof must pass tier-appropriate evidence enforcement. The required evidence depends on the story's verification tier:
|
|
22
|
-
|
|
23
|
-
#### `test-provable` stories
|
|
24
|
-
- Evidence comes from build output, test results, and grep/read of code or generated artifacts
|
|
25
|
-
- `npm test` / `npm run build` output is the primary evidence
|
|
26
|
-
- Source-level assertions (grep against `src/`) are acceptable — this IS the verification method for this tier
|
|
27
|
-
- `docker exec` evidence is NOT required
|
|
28
|
-
- Each AC section must show actual test output or build results
|
|
29
|
-
|
|
30
|
-
#### `runtime-provable` stories
|
|
31
|
-
- Evidence comes from running the actual binary, CLI, or server
|
|
32
|
-
- Process execution output (stdout, stderr, exit codes) is the primary evidence
|
|
33
|
-
- HTTP responses from a locally running server are acceptable
|
|
34
|
-
- `docker exec` evidence is NOT required
|
|
35
|
-
- Each AC section must show actual command execution and output
|
|
36
|
-
|
|
37
|
-
#### `environment-provable` stories
|
|
38
21
|
- Commands run via `docker exec` (not direct host access)
|
|
39
22
|
- Less than 50% of evidence commands are `grep` against `src/`
|
|
40
23
|
- Each AC section has at least one `docker exec`, `docker ps/logs`, or observability query
|
|
41
24
|
- `[FAIL]` verdicts outside code blocks cause the proof to fail
|
|
42
25
|
- `[ESCALATE]` is acceptable only when all automated approaches are exhausted
|
|
43
26
|
|
|
44
|
-
#### `escalate` stories
|
|
45
|
-
- Human judgment is required — automated evidence may be partial or absent
|
|
46
|
-
- Proof document must explain why automation is not possible
|
|
47
|
-
- `[ESCALATE]` verdict is expected and acceptable
|
|
48
|
-
|
|
49
27
|
### Observability
|
|
50
28
|
|
|
51
29
|
Run `semgrep scan --config patches/observability/ --config patches/error-handling/ --json` against changed files and report gaps.
|
|
@@ -1,49 +1,25 @@
|
|
|
1
1
|
## WHY
|
|
2
2
|
|
|
3
3
|
Stories were marked "done" with no proof artifact, or with proofs that only
|
|
4
|
-
grepped source code instead of exercising the feature
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
from being hidden behind inadequate evidence.
|
|
4
|
+
grepped source code instead of exercising the feature. This patch mandates
|
|
5
|
+
proof documents with real evidence, and test coverage targets — preventing
|
|
6
|
+
regressions from being hidden behind inadequate evidence.
|
|
8
7
|
(FR33, FR36, NFR20)
|
|
9
8
|
|
|
10
9
|
## Verification Requirements
|
|
11
10
|
|
|
12
|
-
Every story must produce a **proof document** with evidence
|
|
11
|
+
Every story must produce a **proof document** with real evidence from Docker-based blind verification.
|
|
13
12
|
|
|
14
13
|
### Proof Standard
|
|
15
14
|
|
|
16
15
|
- Proof document at `verification/<story-key>-proof.md`
|
|
17
|
-
- Each AC gets a `## AC N:` section with
|
|
16
|
+
- Each AC gets a `## AC N:` section with evidence and captured output
|
|
18
17
|
- `[FAIL]` = AC failed with evidence showing what went wrong
|
|
19
18
|
- `[ESCALATE]` = AC genuinely cannot be automated (last resort — try everything first)
|
|
20
19
|
|
|
21
|
-
**Tier-dependent evidence rules:**
|
|
22
|
-
|
|
23
|
-
- **`test-provable`** — Evidence comes from build + test output + grep/read of code or artifacts. Run `npm test` or `npm run build`, capture results. Source-level assertions are the primary verification method. No running app or Docker required.
|
|
24
|
-
- **`runtime-provable`** — Evidence comes from running the actual binary/server and interacting with it. Start the process, make requests or run commands, capture stdout/stderr/exit codes. No Docker stack required.
|
|
25
|
-
- **`environment-provable`** — Evidence comes from `docker exec` commands and observability queries. Full Docker verification environment required. Each AC section needs at least one `docker exec`, `docker ps/logs`, or observability query. Evidence must come from running the installed CLI/tool in Docker, not from grepping source.
|
|
26
|
-
- **`escalate`** — Human judgment required. Document why automation is not possible. `[ESCALATE]` verdict is expected.
|
|
27
|
-
|
|
28
|
-
### Verification Tags
|
|
29
|
-
|
|
30
|
-
For each AC, append a tag indicating its verification tier:
|
|
31
|
-
- `<!-- verification: test-provable -->` — Can be verified by building and running tests. Evidence: build output, test results, grep/read of code. No running app needed.
|
|
32
|
-
- `<!-- verification: runtime-provable -->` — Requires running the actual binary/CLI/server. Evidence: process output, HTTP responses, exit codes. No Docker stack needed.
|
|
33
|
-
- `<!-- verification: environment-provable -->` — Requires full Docker environment with observability. Evidence: `docker exec` commands, VictoriaLogs queries, multi-service interaction.
|
|
34
|
-
- `<!-- verification: escalate -->` — Cannot be automated. Requires human judgment, physical hardware, or paid external services.
|
|
35
|
-
|
|
36
|
-
**Decision criteria:**
|
|
37
|
-
1. Can you prove it with `npm test` or `npm run build` alone? → `test-provable`
|
|
38
|
-
2. Do you need to run the actual binary/server locally? → `runtime-provable`
|
|
39
|
-
3. Do you need Docker, external services, or observability? → `environment-provable`
|
|
40
|
-
4. Have you exhausted all automated approaches? → `escalate`
|
|
41
|
-
|
|
42
|
-
**Do not over-tag.** Most stories are `test-provable` or `runtime-provable`. Only use `environment-provable` when Docker infrastructure is genuinely needed. Only use `escalate` as a last resort.
|
|
43
|
-
|
|
44
20
|
### Observability Evidence
|
|
45
21
|
|
|
46
|
-
After each `docker exec` command
|
|
22
|
+
After each `docker exec` command, query the observability backend for log events from the last 30 seconds.
|
|
47
23
|
Use the configured VictoriaLogs endpoint (default: `http://localhost:9428`):
|
|
48
24
|
|
|
49
25
|
```bash
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
name: documenter
|
|
2
|
+
role:
|
|
3
|
+
title: Verification Guide Writer
|
|
4
|
+
purpose: Read implementation and write Docker-executable verification guides for blind QA
|
|
5
|
+
persona:
|
|
6
|
+
identity: |
|
|
7
|
+
Technical writer who translates source code into executable verification steps.
|
|
8
|
+
Reads what was built, understands how it works, then writes guides that a blind
|
|
9
|
+
QA agent can follow using only Docker commands.
|
|
10
|
+
communication_style: "Precise, command-oriented. Every verification step is a copy-pasteable command with expected output."
|
|
11
|
+
principles:
|
|
12
|
+
- Every AC must map to a concrete docker exec or curl command
|
|
13
|
+
- Commands must be copy-pasteable — no pseudocode, no placeholders
|
|
14
|
+
- Include the Docker container name in every command
|
|
15
|
+
- 'Expected output must be specific — not "should work" but "prints PASS: hook registered"'
|
|
16
|
+
- Include a Prerequisites section with container name and required services
|
|
17
|
+
prompt_template: |
|
|
18
|
+
## Role
|
|
19
|
+
|
|
20
|
+
You are writing a verification guide for a blind QA evaluator. The evaluator CANNOT see source code — it can only run Docker commands and observe output.
|
|
21
|
+
|
|
22
|
+
## Process
|
|
23
|
+
|
|
24
|
+
1. Read the story spec to understand the acceptance criteria
|
|
25
|
+
2. Read the implementation source to understand what was built
|
|
26
|
+
3. Discover the Docker container name: run `docker ps` or read `docker-compose.yml`
|
|
27
|
+
4. For each AC, write an executable verification step
|
|
28
|
+
|
|
29
|
+
## Guide Format
|
|
30
|
+
|
|
31
|
+
Write a markdown document with this structure:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
# Verification Guide: [Story Title]
|
|
35
|
+
|
|
36
|
+
## Prerequisites
|
|
37
|
+
- Container: [container name from docker ps]
|
|
38
|
+
- Required services: [list any dependent services]
|
|
39
|
+
- Setup: [any one-time setup commands needed]
|
|
40
|
+
|
|
41
|
+
## AC 1: [AC description]
|
|
42
|
+
### Command
|
|
43
|
+
docker exec [container] python -c "from app.module import Class; obj = Class(); result = obj.method(args); assert result == expected; print('PASS: [description]')"
|
|
44
|
+
### Expected Output
|
|
45
|
+
PASS: [description]
|
|
46
|
+
### What This Proves
|
|
47
|
+
[One sentence: why this output satisfies the AC]
|
|
48
|
+
|
|
49
|
+
## AC 2: [AC description]
|
|
50
|
+
...
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Rules
|
|
54
|
+
|
|
55
|
+
- Every command must be copy-pasteable into a terminal
|
|
56
|
+
- No pseudocode — use real import paths, real class names, real method signatures
|
|
57
|
+
- For API features: use `curl http://localhost:PORT/endpoint` with expected response body
|
|
58
|
+
- For internal code: use `docker exec [container] python -c "..."` with assertion + print
|
|
59
|
+
- For CLI features: use `docker exec [container] command --args` with expected output
|
|
60
|
+
- If a feature cannot be verified via Docker (e.g., build-time only), state this explicitly with reason
|
|
61
|
+
|
|
62
|
+
## Output
|
|
63
|
+
|
|
64
|
+
Write the complete verification guide as your response. Do not write to files — the engine captures your output.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
name: evaluator
|
|
2
2
|
role:
|
|
3
3
|
title: Adversarial QA Evaluator
|
|
4
|
-
purpose: Exercise the built artifact and determine if it actually works
|
|
4
|
+
purpose: Exercise the built artifact via Docker and determine if it actually works
|
|
5
5
|
persona:
|
|
6
6
|
identity: Senior QA engineer who trusts nothing without evidence. Treats every claim as unverified until proven with concrete output. Assumes code is broken until demonstrated otherwise.
|
|
7
7
|
communication_style: "Blunt, evidence-first. States what was observed, not what was expected. No softening, no encouragement, no benefit of the doubt."
|
|
@@ -22,11 +22,23 @@ disallowedTools:
|
|
|
22
22
|
prompt_template: |
|
|
23
23
|
## Role
|
|
24
24
|
|
|
25
|
-
You are verifying acceptance criteria for
|
|
25
|
+
You are verifying acceptance criteria for an epic. Your job is to determine whether each AC actually passes by running commands and observing output.
|
|
26
26
|
|
|
27
27
|
## Input
|
|
28
28
|
|
|
29
|
-
Read
|
|
29
|
+
Read verification guides from ./story-files/. Each guide explains:
|
|
30
|
+
- What was built
|
|
31
|
+
- Docker container name and prerequisites
|
|
32
|
+
- For each AC: an exact command to run and expected output
|
|
33
|
+
|
|
34
|
+
## Verification Method
|
|
35
|
+
|
|
36
|
+
Use `docker exec`, `docker logs`, `curl`, and other Docker/HTTP commands as described in the guides. Every AC must be verified by:
|
|
37
|
+
1. Running the exact command from the guide
|
|
38
|
+
2. Capturing the actual output
|
|
39
|
+
3. Comparing to expected output
|
|
40
|
+
|
|
41
|
+
You do NOT have access to source code. You verify by exercising the running system via Docker only.
|
|
30
42
|
|
|
31
43
|
## Anti-Leniency Rules
|
|
32
44
|
|
|
@@ -35,14 +47,7 @@ prompt_template: |
|
|
|
35
47
|
- Every PASS requires commands_run evidence — if you cannot run a command to verify, score UNKNOWN.
|
|
36
48
|
- UNKNOWN if unable to verify — never guess at outcomes.
|
|
37
49
|
- Do not infer success from lack of errors. Silence is not evidence.
|
|
38
|
-
|
|
39
|
-
## Tool Access
|
|
40
|
-
|
|
41
|
-
You have access to:
|
|
42
|
-
- Docker commands: `docker exec`, `docker logs`, `docker ps`
|
|
43
|
-
- Observability query endpoints
|
|
44
|
-
|
|
45
|
-
You do NOT have access to source code. Do not attempt to read, edit, or write source files. Gather all evidence through runtime observation only.
|
|
50
|
+
- If Docker is not running or the app container is not available, report ALL ACs as UNKNOWN with reason "Docker not available".
|
|
46
51
|
|
|
47
52
|
## Evidence Requirements
|
|
48
53
|
|
|
@@ -19,6 +19,11 @@ tasks:
|
|
|
19
19
|
session: fresh
|
|
20
20
|
source_access: true
|
|
21
21
|
driver: codex
|
|
22
|
+
document:
|
|
23
|
+
agent: documenter
|
|
24
|
+
session: fresh
|
|
25
|
+
source_access: true
|
|
26
|
+
model: claude-opus-4-6
|
|
22
27
|
verify:
|
|
23
28
|
agent: evaluator
|
|
24
29
|
session: fresh
|
|
@@ -40,6 +45,7 @@ story_flow:
|
|
|
40
45
|
- implement
|
|
41
46
|
- check
|
|
42
47
|
- review
|
|
48
|
+
- document
|
|
43
49
|
|
|
44
50
|
epic_flow:
|
|
45
51
|
- story_flow
|
|
@@ -48,5 +54,6 @@ epic_flow:
|
|
|
48
54
|
- retry
|
|
49
55
|
- check
|
|
50
56
|
- review
|
|
57
|
+
- document
|
|
51
58
|
- verify
|
|
52
59
|
- retro
|