agents-harness 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/file-protocol.d.ts +4 -2
- package/dist/core/file-protocol.js +71 -1
- package/dist/core/file-protocol.js.map +1 -1
- package/dist/core/orchestrator.d.ts +1 -0
- package/dist/core/orchestrator.js +9 -2
- package/dist/core/orchestrator.js.map +1 -1
- package/dist/core/types.d.ts +20 -0
- package/dist/defaults/criteria.d.ts +3 -0
- package/dist/defaults/criteria.js +124 -0
- package/dist/defaults/criteria.js.map +1 -1
- package/dist/defaults/project-type.d.ts +2 -0
- package/dist/defaults/project-type.js +48 -0
- package/dist/defaults/project-type.js.map +1 -0
- package/dist/defaults/prompts.js +40 -18
- package/dist/defaults/prompts.js.map +1 -1
- package/dist/index.d.ts +3 -2
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { Progress, EvalResult } from "./types.js";
|
|
1
|
+
import type { Progress, EvalResult, EvalDimension } from "./types.js";
|
|
2
2
|
export declare class FileProtocol {
|
|
3
3
|
private harnessDir;
|
|
4
4
|
private projectRoot;
|
|
@@ -8,7 +8,9 @@ export declare class FileProtocol {
|
|
|
8
8
|
readFile(name: string): string | null;
|
|
9
9
|
writeProgress(progress: Progress): void;
|
|
10
10
|
readProgress(): Progress | null;
|
|
11
|
-
parseEvaluation(): EvalResult;
|
|
11
|
+
parseEvaluation(knownDimensions?: EvalDimension[]): EvalResult;
|
|
12
|
+
private parseScoredEvaluation;
|
|
13
|
+
private parseLegacyEvaluation;
|
|
12
14
|
ensureGitignore(): void;
|
|
13
15
|
cleanEphemeral(): void;
|
|
14
16
|
}
|
|
@@ -44,7 +44,7 @@ export class FileProtocol {
|
|
|
44
44
|
}
|
|
45
45
|
return parseYaml(content);
|
|
46
46
|
}
|
|
47
|
-
parseEvaluation() {
|
|
47
|
+
parseEvaluation(knownDimensions) {
|
|
48
48
|
const content = this.readFile("evaluation.md");
|
|
49
49
|
if (content === null) {
|
|
50
50
|
return {
|
|
@@ -54,6 +54,76 @@ export class FileProtocol {
|
|
|
54
54
|
passedCriteria: [],
|
|
55
55
|
};
|
|
56
56
|
}
|
|
57
|
+
if (content.includes("## Dimensions")) {
|
|
58
|
+
return this.parseScoredEvaluation(content, knownDimensions);
|
|
59
|
+
}
|
|
60
|
+
return this.parseLegacyEvaluation(content);
|
|
61
|
+
}
|
|
62
|
+
parseScoredEvaluation(content, knownDimensions) {
|
|
63
|
+
const thresholdMap = new Map();
|
|
64
|
+
if (knownDimensions) {
|
|
65
|
+
for (const dim of knownDimensions) {
|
|
66
|
+
thresholdMap.set(dim.name.toLowerCase(), dim.threshold);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
// Parse overall score from header — handles **bold** markdown
|
|
70
|
+
let overallScore = 0;
|
|
71
|
+
const scoreMatch = content.match(/^(?:\*\*)?Score:?\s*\*?\*?\s*([\d.]+)\s*\/\s*10/m);
|
|
72
|
+
if (scoreMatch) {
|
|
73
|
+
overallScore = parseFloat(scoreMatch[1]);
|
|
74
|
+
}
|
|
75
|
+
// Extract critique section
|
|
76
|
+
let critique = "";
|
|
77
|
+
const critiqueMatch = content.match(/## Critique\s*\n([\s\S]*?)$/);
|
|
78
|
+
if (critiqueMatch) {
|
|
79
|
+
critique = critiqueMatch[1].trim();
|
|
80
|
+
}
|
|
81
|
+
// Parse dimensions — split by ### headers within ## Dimensions section
|
|
82
|
+
const dimensionsSection = content.match(/## Dimensions\s*\n([\s\S]*?)(?=## Critique|$)/);
|
|
83
|
+
const dimensions = [];
|
|
84
|
+
if (dimensionsSection) {
|
|
85
|
+
const dimBlocks = dimensionsSection[1].split(/^### /m).filter(Boolean);
|
|
86
|
+
for (const block of dimBlocks) {
|
|
87
|
+
const nameMatch = block.match(/^(.+?)$/m);
|
|
88
|
+
if (!nameMatch)
|
|
89
|
+
continue;
|
|
90
|
+
const name = nameMatch[1].trim();
|
|
91
|
+
// Handle **Score: N/10** with optional bold markdown and trailing text
|
|
92
|
+
const dimScoreMatch = block.match(/\*?\*?Score:?\s*\*?\*?\s*(\d+)\s*\/\s*10/m);
|
|
93
|
+
const score = dimScoreMatch ? parseInt(dimScoreMatch[1], 10) : 0;
|
|
94
|
+
// Rationale may start with bold or be on same/next line
|
|
95
|
+
const rationaleMatch = block.match(/(?:\*?\*?)?Rationale:?\s*\*?\*?\s*([\s\S]*?)(?=\n\n|\n### |$)/m);
|
|
96
|
+
const rationale = rationaleMatch ? rationaleMatch[1].trim() : "";
|
|
97
|
+
const threshold = thresholdMap.get(name.toLowerCase()) ?? 5;
|
|
98
|
+
dimensions.push({
|
|
99
|
+
id: name.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/-$/, ""),
|
|
100
|
+
name,
|
|
101
|
+
score,
|
|
102
|
+
threshold,
|
|
103
|
+
passed: score >= threshold,
|
|
104
|
+
rationale,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// Recompute passed from dimension scores — don't trust agent's "Overall:" line
|
|
109
|
+
const passed = dimensions.length > 0 && dimensions.every(d => d.passed);
|
|
110
|
+
// Derive backward-compat criteria lists
|
|
111
|
+
const passedCriteria = dimensions
|
|
112
|
+
.filter(d => d.passed)
|
|
113
|
+
.map(d => `${d.name}: ${d.score}/10`);
|
|
114
|
+
const failedCriteria = dimensions
|
|
115
|
+
.filter(d => !d.passed)
|
|
116
|
+
.map(d => `${d.name}: ${d.score}/10 (min: ${d.threshold})`);
|
|
117
|
+
return {
|
|
118
|
+
passed,
|
|
119
|
+
critique,
|
|
120
|
+
failedCriteria,
|
|
121
|
+
passedCriteria,
|
|
122
|
+
overallScore,
|
|
123
|
+
dimensions,
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
parseLegacyEvaluation(content) {
|
|
57
127
|
const lines = content.split("\n");
|
|
58
128
|
let passed = false;
|
|
59
129
|
const failedCriteria = [];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"file-protocol.js","sourceRoot":"","sources":["../../src/core/file-protocol.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzG,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,SAAS,IAAI,MAAM,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAG/D,MAAM,eAAe,GAAG,CAAC,aAAa,EAAE,eAAe,EAAE,YAAY,EAAE,YAAY,EAAE,aAAa,CAAC,CAAC;AAEpG,MAAM,iBAAiB,GAAG;IACxB,oCAAoC;IACpC,kBAAkB;IAClB,qBAAqB;IACrB,sBAAsB;IACtB,wBAAwB;IACxB,qBAAqB;IACrB,sBAAsB;IACtB,qBAAqB;IACrB,sBAAsB;CACvB,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,UAAU,CAAS;IACnB,WAAW,CAAS;IAE5B,YAAY,WAAmB;QAC7B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;IAClD,CAAC;IAED,SAAS;QACP,SAAS,CAAC,IAAI,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,SAAS,CAAC,IAAY,EAAE,OAAe;QACrC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC7C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,aAAa,CAAC,QAAkB;QAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9B,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,YAAY;QACV,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAC7C,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,SAAS,CAAC,OAAO,CAAa,CAAC;IACxC,CAAC;IAED,eAAe;
|
|
1
|
+
{"version":3,"file":"file-protocol.js","sourceRoot":"","sources":["../../src/core/file-protocol.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,SAAS,EAAE,YAAY,EAAE,aAAa,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,SAAS,CAAC;AACzG,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,SAAS,IAAI,MAAM,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAG/D,MAAM,eAAe,GAAG,CAAC,aAAa,EAAE,eAAe,EAAE,YAAY,EAAE,YAAY,EAAE,aAAa,CAAC,CAAC;AAEpG,MAAM,iBAAiB,GAAG;IACxB,oCAAoC;IACpC,kBAAkB;IAClB,qBAAqB;IACrB,sBAAsB;IACtB,wBAAwB;IACxB,qBAAqB;IACrB,sBAAsB;IACtB,qBAAqB;IACrB,sBAAsB;CACvB,CAAC;AAEF,MAAM,OAAO,YAAY;IACf,UAAU,CAAS;IACnB,WAAW,CAAS;IAE5B,YAAY,WAAmB;QAC7B,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;IAClD,CAAC;IAED,SAAS;QACP,SAAS,CAAC,IAAI,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAClD,CAAC;IAED,SAAS,CAAC,IAAY,EAAE,OAAe;QACrC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC/D,CAAC;IAED,QAAQ,CAAC,IAAY;QACnB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC7C,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IACzC,CAAC;IAED,aAAa,CAAC,QAAkB;QAC9B,MAAM,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC9B,IAAI,CAAC,SAAS,CAAC,aAAa,EAAE,IAAI,CAAC,CAAC;IACtC,CAAC;IAED,YAAY;QACV,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QAC7C,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,SAAS,CAAC,OAAO,CAAa,CAAC;IACxC,CAAC;IAED,eAAe,CAAC,eAAiC;QAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;QAC/C,IAAI,OAAO,KAAK,IAAI,EAAE,CAAC;YACrB,OAAO;gBACL,MAAM,EAAE,KAAK;gBACb,QAAQ,EAAE,0BAA0B;gBACpC,cAAc,EAAE,EAAE;gBAClB,cAAc,EAAE,EAAE;aACnB,CAAC;QACJ,CAAC;QAED,IAAI,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,EAAE,CAAC;YACtC,OAAO,IAAI,CAAC,qBAAqB,CAAC,OAAO,EAAE,eAAe,CAAC,CAAC;QAC9D,CAAC;QACD,OAAO,IAAI,CAAC,qBAAqB,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAEO,qBAAqB,CAAC,OAAe,EAAE,eAAiC;QAC9E,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB,CAAC;QAC/C,IAAI,eAAe,EAAE,CAAC;YACpB,KAAK,MAAM,GAAG,IAAI,eAAe,EAAE,CAAC;gBAClC,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC;YAC1D,CAAC;QACH,CAAC;QAED,8DAA8D;QAC9D,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,kDAAkD,CAAC,CAAC;QACrF,IAAI,UAAU,EAAE,CAAC;YACf,YAAY,GAAG,UAAU,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;QAC3C,CAAC;QAED,2BAA2B;QAC3B,IAAI,QAAQ,GAAG,EAAE,CAAC;QAClB,MAAM,aAAa,GAAG,OAAO,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;QACnE,IAAI,aAAa,EAAE,CAAC;YAClB,QAAQ,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACrC,CAAC;QAED,uEAAuE;QACvE,MAAM,iBAAiB,GAAG,OAAO,CAAC,KAAK,CAAC,+CAA+C,CAAC,CAAC;QACzF,MAAM,UAAU,GAAqB,EAAE,CAAC;QAExC,IAAI,iBAAiB,EAAE,CAAC;YACtB,MAAM,SAAS,GAAG,iBAAiB,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;YACvE,KAAK,MAAM,KAAK,IAAI,SAAS,EAAE,CAAC;gBAC9B,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;gBAC1C,IAAI,CAAC,SAAS;oBAAE,SAAS;gBACzB,MAAM,IAAI,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAEjC,uEAAuE;gBACvE,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,2CAA2C,CAAC,CAAC;gBAC/E,MAAM,KAAK,GAAG,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAEjE,wDAAwD;gBACxD,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,gEAAgE,CAAC,CAAC;gBACrG,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBAEjE,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,CAAC,CAAC;gBAC5D,UAAU,CAAC,IAAI,CAAC;oBACd,EAAE,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;oBACpE,IAAI;oBACJ,KAAK;oBACL,SAAS;oBACT,MAAM,EAAE,KAAK,IAAI,SAAS;oBAC1B,SAAS;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,+EAA+E;QAC/E,MAAM,MAAM,GAAG,UAAU,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QAExE,wCAAwC;QACxC,MAAM,cAAc,GAAG,UAAU;aAC9B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC;aACrB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC;QACxC,MAAM,cAAc,GAAG,UAAU;aAC9B,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;aACtB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,aAAa,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC;QAE9D,OAAO;YACL,MAAM;YACN,QAAQ;YACR,cAAc;YACd,cAAc;YACd,YAAY;YACZ,UAAU;SACX,CAAC;IACJ,CAAC;IAEO,qBAAqB,CAAC,OAAe;QAC3C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,MAAM,GAAG,KAAK,CAAC;QACnB,MAAM,cAAc,GAAa,EAAE,CAAC;QACpC,MAAM,cAAc,GAAa,EAAE,CAAC;QACpC,MAAM,aAAa,GAAa,EAAE,CAAC;QAGnC,IAAI,cAAc,GAAY,MAAM,CAAC;QAErC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC/B,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;gBAClD,MAAM,GAAG,MAAM,KAAK,MAAM,CAAC;gBAC3B,SAAS;YACX,CAAC;YAED,IAAI,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,EAAE,CAAC;gBACxC,cAAc,GAAG,QAAQ,CAAC;gBAC1B,SAAS;YACX,CAAC;YAED,IAAI,IAAI,CAAC,UAAU,CAAC,kBAAkB,CAAC,EAAE,CAAC;gBACxC,cAAc,GAAG,QAAQ,CAAC;gBAC1B,SAAS;YACX,CAAC;YAED,IAAI,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;gBACjC,cAAc,GAAG,UAAU,CAAC;gBAC5B,SAAS;YACX,CAAC;YAED,IAAI,cAAc,KAAK,QAAQ,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACzD,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACrC,CAAC;iBAAM,IAAI,cAAc,KAAK,QAAQ,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBAChE,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACrC,CAAC;iBAAM,IAAI,cAAc,KAAK,UAAU,EAAE,CAAC;gBACzC,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,OAAO;YACL,MAAM;YACN,QAAQ,EAAE,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;YACzC,cAAc;YACd,cAAc;SACf,CAAC;IACJ,CAAC;IAED,eAAe;QACb,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,YAAY,CAAC,CAAC;QAE3D,IAAI,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;YAC9B,MAAM,QAAQ,GAAG,YAAY,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;YACtD,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,CAAC;gBACxC,OAAO;YACT,CAAC;YACD,cAAc,CAAC,aAAa,EAAE,IAAI,GAAG,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QACrF,CAAC;aAAM,CAAC;YACN,aAAa,CAAC,aAAa,EAAE,iBAAiB,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,EAAE,OAAO,CAAC,CAAC;QAC7E,CAAC;IACH,CAAC;IAED,cAAc;QACZ,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;YACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;YAC7C,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACzB,UAAU,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
|
|
@@ -2,15 +2,19 @@ import { EventEmitter } from "node:events";
|
|
|
2
2
|
import { ContextManager } from "./context-manager.js";
|
|
3
3
|
import { FileProtocol } from "./file-protocol.js";
|
|
4
4
|
import { buildProjectContext } from "../discovery/project-context.js";
|
|
5
|
+
import { detectProjectType } from "../defaults/project-type.js";
|
|
6
|
+
import { getDimensions } from "../defaults/criteria.js";
|
|
5
7
|
export class Harness extends EventEmitter {
|
|
6
8
|
contextManager;
|
|
7
9
|
fileProtocol;
|
|
10
|
+
projectContext;
|
|
8
11
|
progress;
|
|
9
12
|
aborted = false;
|
|
10
13
|
options;
|
|
11
14
|
constructor(opts) {
|
|
12
15
|
super();
|
|
13
16
|
const projectContext = buildProjectContext(opts.root, opts.scope ?? null);
|
|
17
|
+
this.projectContext = projectContext;
|
|
14
18
|
// Apply config overrides from .harness/config.yaml
|
|
15
19
|
const config = projectContext.config;
|
|
16
20
|
this.options = {
|
|
@@ -150,8 +154,11 @@ export class Harness extends EventEmitter {
|
|
|
150
154
|
// Evaluate phase
|
|
151
155
|
this.updatePhase("evaluate", sprintNum, attempt);
|
|
152
156
|
await this.runAgentPhase("evaluator", `Evaluate the implementation against the sprint contract. Read .harness/contract.md for requirements. Write your evaluation to .harness/evaluation.md.`, sprintNum);
|
|
153
|
-
// Parse evaluation
|
|
154
|
-
const
|
|
157
|
+
// Parse evaluation with scored dimensions
|
|
158
|
+
const projectType = detectProjectType(this.projectContext);
|
|
159
|
+
const dims = getDimensions(projectType);
|
|
160
|
+
const evalResult = this.fileProtocol.parseEvaluation(dims);
|
|
161
|
+
evalResult.projectType = projectType;
|
|
155
162
|
this.emitEvent({
|
|
156
163
|
type: "evaluation",
|
|
157
164
|
data: { sprint: sprintNum, attempt, result: evalResult },
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAQ3C,OAAO,EAAE,cAAc,EAAuB,MAAM,sBAAsB,CAAC;AAC3E,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,mBAAmB,EAAE,MAAM,iCAAiC,CAAC;AACtE,OAAO,EAAE,iBAAiB,EAAE,MAAM,6BAA6B,CAAC;AAChE,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAYxD,MAAM,OAAO,OAAQ,SAAQ,YAAY;IAC/B,cAAc,CAAiB;IAC/B,YAAY,CAAe;IAC3B,cAAc,CAAiB;IAC/B,QAAQ,CAAW;IACnB,OAAO,GAAG,KAAK,CAAC;IAChB,OAAO,CAKb;IAEF,YAAY,IAAoB;QAC9B,KAAK,EAAE,CAAC;QACR,MAAM,cAAc,GAAG,mBAAmB,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,CAAC;QAC1E,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QAErC,mDAAmD;QACnD,MAAM,MAAM,GAAG,cAAc,CAAC,MAAM,CAAC;QACrC,IAAI,CAAC,OAAO,GAAG;YACb,oBAAoB,EAClB,IAAI,CAAC,oBAAoB,IAAI,MAAM,EAAE,oBAAoB,IAAI,CAAC;YAChE,qBAAqB,EACnB,IAAI,CAAC,qBAAqB,IAAI,MAAM,EAAE,qBAAqB,IAAI,CAAC;YAClE,iBAAiB,EACf,IAAI,CAAC,iBAAiB,IAAI,MAAM,EAAE,iBAAiB,IAAI,EAAE;SAC5D,CAAC;QAEF,IAAI,CAAC,cAAc,GAAG,IAAI,cAAc,CAAC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACnF,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChD,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC;IACxC,CAAC;IAEO,YAAY,CAAC,OAAe;QAClC,OAAO;YACL,MAAM,EAAE,SAAS;YACjB,OAAO;YACP,aAAa,EAAE,CAAC;YAChB,YAAY,EAAE,CAAC;YACf,cAAc,EAAE,CAAC;YACjB,YAAY,EAAE,MAAM;YACpB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,OAAO,EAAE,CAAC;YACV,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,iBAAiB;YAC5C,OAAO,EAAE,EAAE;SACZ,CAAC;IACJ,CAAC;IAEO,SAAS,CAAC,KAAmB;QACnC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;IAC5B,CAAC;IAEO,WAAW,CAAC,KAAY,EAAE,MAAM,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC;QACvD,IAAI,CAAC,QAAQ,CAAC,YAAY,GAAG,KAAK,CAAC;QACnC,IAAI,CAAC,QAAQ,CAAC,aAAa,GAAG,MAAM,CAAC;QACrC,IAAI,CAAC,QAAQ,CAAC,cAAc,GAAG,OAAO,CAAC;QACvC,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,aAAa;YACnB,IAAI,EAAE,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE;SACjC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CACzB,IAAe,EACf,MAAc,EACd,MAAc;QAEd,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC;YAChD,IAAI;YACJ,MAAM;YACN,UAAU,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;gBAC5B,IAAI,CAAC,SAAS,CAAC;oBACb,IAAI,EAAE,gBAAgB;oBACtB,IAAI,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,OAAO,EAAE,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE,EAAE;iBAC7D,CAAC,CAAC;YACL,CAAC;SACF,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC;QACxC,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;YAClC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,OAAO,IAAI,MAAM,CAAC,OAAO,CAAC;QAC1D,CAAC;QAED,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,aAAa;YACnB,IAAI,EAAE;gBACJ,aAAa,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,OAAO,IAAI,CAAC;gBAC1D,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO;gBACnC,SAAS,EAAE,IAAI,CAAC,OAAO,CAAC,iBAAiB;aAC1C;SACF,CAAC,CAAC;QAEH,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC/C,OAAO,MAAM,CAAC,QAAQ,CAAC;IACzB,CAAC;IAEO,gBAAgB;QACtB,OAAO,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC;IACjE,CAAC;IAEO,iBAAiB,CAAC,gBAAwB;QAChD,yDAAyD;QACzD,MAAM,OAAO,GAAG,IAAI,CAAC,YAAY,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC;QACzD,IAAI,CAAC,OAAO;YAAE,OAAO,CAAC,CAAC;QAEvB,qDAAqD;QACrD,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC3D,OAAO,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,IAAY;QACpB,IAAI,CAAC,YAAY,CAAC,SAAS,EAAE,CAAC;QAC9B,IAAI,CAAC,YAAY,CAAC,eAAe,EAAE,CAAC;QACpC,IAAI,CAAC,YAAY,CAAC,cAAc,EAAE,CAAC;QACnC,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACxC,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;QAErB,IAAI,CAAC;YACH,oEAAoE;YACpE,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;YACzB,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;YAC7C,MAAM,IAAI,CAAC,aAAa,CACtB,SAAS,EACT,kMAAkM,EAClM,CAAC,CACF,CAAC;YAEF,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,gBAAgB,EAAE;gBAAE,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;YAEpE,+CAA+C;YAC/C,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;YAC9B,MAAM,IAAI,CAAC,aAAa,CACtB,SAAS,EACT,qKAAqK,EACrK,CAAC,CACF,CAAC;YAEF,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,gBAAgB,EAAE;gBAAE,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;YAEpE,MAAM,YAAY,GAAG,IAAI,CAAC,iBAAiB,CAAC,EAAE,CAAC,CAAC;YAChD,IAAI,CAAC,QAAQ,CAAC,YAAY,GAAG,YAAY,CAAC;YAE1C,kBAAkB;YAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;gBACvC,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,gBAAgB,EAAE;oBAAE,MAAM;gBACnD,MAAM,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC;YACjC,IAAI,CAAC,QAAQ,CAAC,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;YACnD,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YAC/C,MAAM,KAAK,CAAC;QACd,CAAC;QAED,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;IACzB,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,SAAiB;QAC3C,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;YACtC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG;gBACjC,MAAM,EAAE,aAAa;gBACrB,QAAQ,EAAE,CAAC;gBACX,OAAO,EAAE,CAAC;aACX,CAAC;QACJ,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,aAAa,CAAC;QAC1D,CAAC;QAED,iEAAiE;QACjE,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,SAAS,EAAE,CAAC,CAAC,CAAC;QAC3C,MAAM,IAAI,CAAC,aAAa,CACtB,SAAS,EACT,gEAAgE,SAAS,wEAAwE,EACjJ,SAAS,CACV,CAAC;QAEF,eAAe;QACf,MAAM,WAAW,GAAG,IAAI,CAAC,OAAO,CAAC,oBAAoB,CAAC;QACtD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,IAAI,WAAW,EAAE,OAAO,EAAE,EAAE,CAAC;YACxD,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,gBAAgB,EAAE;gBAAE,MAAM;YAEnD,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,QAAQ,GAAG,OAAO,CAAC;YAEpD,iBAAiB;YACjB,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACjD,MAAM,IAAI,CAAC,aAAa,CACtB,WAAW,EACX,6EAA6E,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,uEAAuE,CAAC,CAAC,CAAC,EAAE,EAAE,EACzK,SAAS,CACV,CAAC;YAEF,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,gBAAgB,EAAE;gBAAE,MAAM;YAEnD,iBAAiB;YACjB,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,SAAS,EAAE,OAAO,CAAC,CAAC;YACjD,MAAM,IAAI,CAAC,aAAa,CACtB,WAAW,EACX,uJAAuJ,EACvJ,SAAS,CACV,CAAC;YAEF,0CAA0C;YAC1C,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC3D,MAAM,IAAI,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;YACxC,MAAM,UAAU,GAAG,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;YAC3D,UAAU,CAAC,WAAW,GAAG,WAAW,CAAC;YACrC,IAAI,CAAC,SAAS,CAAC;gBACb,IAAI,EAAE,YAAY;gBAClB,IAAI,EAAE,EAAE,MAAM,EAAE,SAAS,EAAE,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE;aACzD,CAAC,CAAC;YAEH,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC;gBACtB,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,QAAQ,CAAC;gBACnD,IAAI,CAAC,SAAS,CAAC;oBACb,IAAI,EAAE,iBAAiB;oBACvB,IAAI,EAAE;wBACJ,MAAM,EAAE,SAAS;wBACjB,MAAM,EAAE,QAAQ;wBAChB,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,OAAO;qBAClD;iBACF,CAAC,CAAC;gBAEH,gDAAgD;gBAChD,IAAI,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,CAAC;oBAC3C,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;oBACvC,MAAM,IAAI,CAAC,aAAa,CACtB,SAAS,EACT,UAAU,SAAS,0IAA0I,EAC7J,SAAS,CACV,CAAC;gBACJ,CAAC;gBAED,OAAO,CAAC,8BAA8B;YACxC,CAAC;YAED,oCAAoC;YACpC,IAAI,OAAO,KAAK,WAAW,EAAE,CAAC;gBAC5B,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,QAAQ,CAAC;gBACnD,IAAI,CAAC,SAAS,CAAC;oBACb,IAAI,EAAE,iBAAiB;oBACvB,IAAI,EAAE;wBACJ,MAAM,EAAE,SAAS;wBACjB,MAAM,EAAE,QAAQ;wBAChB,QAAQ,EAAE,OAAO;wBACjB,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,OAAO;qBAClD;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAEO,QAAQ;QACd,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC;YACjC,IAAI,CAAC,QAAQ,CAAC,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACrD,CAAC;aAAM,CAAC;YACN,8BAA8B;YAC9B,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,KAAK,CAC1D,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAC7B,CAAC;YACF,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC5D,CAAC;QAED,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE/C,MAAM,OAAO,GAAG,IAAI,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEzB,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,cAAc;YACpB,IAAI,EAAE;gBACJ,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,MAAM;gBAC5B,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,YAAY;gBACxC,YAAY,EAAE,IAAI,CAAC,QAAQ,CAAC,OAAO;gBACnC,UAAU,EAAE,KAAK,GAAG,OAAO;aAC5B;SACF,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,MAAM;QACV,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,YAAY,EAAE,CAAC;QAC/C,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAC;QAChE,CAAC;QAED,IAAI,CAAC,QAAQ,GAAG,EAAE,GAAG,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;QAChD,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;QACrB,OAAO,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC;QAE/B,iCAAiC;QACjC,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,IAAI,CAAC,CAAC;QACrD,KAAK,IAAI,CAAC,GAAG,WAAW,EAAE,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,YAAY,EAAE,CAAC,EAAE,EAAE,CAAC;YAC/D,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,gBAAgB,EAAE;gBAAE,MAAM;YACnD,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAChD,IAAI,cAAc,EAAE,MAAM,KAAK,QAAQ;gBAAE,SAAS,CAAC,yBAAyB;YAC5E,MAAM,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;QAC9B,CAAC;QAED,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC;IACzB,CAAC;IAED,IAAI;QACF,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACpB,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,SAAS,CAAC;QACjC,IAAI,CAAC,QAAQ,CAAC,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QACnD,IAAI,CAAC,YAAY,CAAC,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,WAAW;QACT,OAAO,EAAE,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;IAC9B,CAAC;IAED,SAAS;QACP,OAAO;YACL,OAAO,EAAE,IAAI,CAAC,cAAc,CAAC,eAAe,CAAC,SAAS,CAAC;YACvD,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,eAAe,CAAC,WAAW,CAAC;YAC3D,SAAS,EAAE,IAAI,CAAC,cAAc,CAAC,eAAe,CAAC,WAAW,CAAC;SAC5D,CAAC;IACJ,CAAC;CACF"}
|
package/dist/core/types.d.ts
CHANGED
|
@@ -67,11 +67,31 @@ export interface Progress {
|
|
|
67
67
|
maxBudgetUsd: number;
|
|
68
68
|
sprints: Record<number, SprintProgress>;
|
|
69
69
|
}
|
|
70
|
+
export type ProjectType = "frontend" | "backend" | "fullstack" | "universal";
|
|
71
|
+
export interface EvalDimension {
|
|
72
|
+
id: string;
|
|
73
|
+
name: string;
|
|
74
|
+
description: string;
|
|
75
|
+
weight: number;
|
|
76
|
+
threshold: number;
|
|
77
|
+
rubric: string;
|
|
78
|
+
}
|
|
79
|
+
export interface DimensionScore {
|
|
80
|
+
id: string;
|
|
81
|
+
name: string;
|
|
82
|
+
score: number;
|
|
83
|
+
threshold: number;
|
|
84
|
+
passed: boolean;
|
|
85
|
+
rationale: string;
|
|
86
|
+
}
|
|
70
87
|
export interface EvalResult {
|
|
71
88
|
passed: boolean;
|
|
72
89
|
critique: string;
|
|
73
90
|
failedCriteria: string[];
|
|
74
91
|
passedCriteria: string[];
|
|
92
|
+
overallScore?: number;
|
|
93
|
+
dimensions?: DimensionScore[];
|
|
94
|
+
projectType?: ProjectType;
|
|
75
95
|
}
|
|
76
96
|
export type AgentRole = "planner" | "generator" | "evaluator";
|
|
77
97
|
export interface AgentDefinition {
|
|
@@ -1 +1,4 @@
|
|
|
1
|
+
import type { EvalDimension, ProjectType } from "../core/types.js";
|
|
1
2
|
export declare const DEFAULT_CRITERIA = "## Default Evaluation Criteria\n\n### Correctness\n- All features specified in the contract are implemented and functional\n- No placeholder, stubbed, or mocked implementations in production code\n- Code runs without runtime errors\n\n### Testing\n- New features have corresponding tests\n- All tests pass when the test suite is run\n- Tests cover the primary success path and key edge cases\n\n### Code Quality\n- Code follows project conventions (from CLAUDE.md if present)\n- No leftover TODO or FIXME comments\n- No debug logging or commented-out code left in place\n- Imports are clean \u2014 no unused imports\n\n### Integration\n- New code integrates with existing codebase without breaking existing functionality\n- Existing tests still pass after changes\n";
|
|
3
|
+
export declare function getDimensions(projectType: ProjectType): EvalDimension[];
|
|
4
|
+
export declare function formatDimensionsBlock(dimensions: EvalDimension[]): string;
|
|
@@ -20,4 +20,128 @@ export const DEFAULT_CRITERIA = `## Default Evaluation Criteria
|
|
|
20
20
|
- New code integrates with existing codebase without breaking existing functionality
|
|
21
21
|
- Existing tests still pass after changes
|
|
22
22
|
`;
|
|
23
|
+
const UNIVERSAL_DIMENSIONS = [
|
|
24
|
+
{
|
|
25
|
+
id: "correctness",
|
|
26
|
+
name: "Correctness",
|
|
27
|
+
description: "Features work as specified, no placeholders, no runtime errors",
|
|
28
|
+
weight: 2.0,
|
|
29
|
+
threshold: 6,
|
|
30
|
+
rubric: "3=major features missing or broken; 5=features work with gaps; 7=solid with minor issues; 9=exceeds requirements",
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
id: "testing",
|
|
34
|
+
name: "Testing",
|
|
35
|
+
description: "Tests exist, pass, cover happy path and edge cases",
|
|
36
|
+
weight: 1.5,
|
|
37
|
+
threshold: 5,
|
|
38
|
+
rubric: "3=no tests or most fail; 5=happy path covered; 7=good coverage with edge cases; 9=comprehensive with mocks and integration",
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
id: "code-quality",
|
|
42
|
+
name: "Code Quality",
|
|
43
|
+
description: "Follows conventions, no TODOs/dead code, clean imports",
|
|
44
|
+
weight: 1.0,
|
|
45
|
+
threshold: 5,
|
|
46
|
+
rubric: "3=inconsistent style, dead code; 5=acceptable, minor issues; 7=clean and consistent; 9=exemplary, idiomatic",
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
id: "integration",
|
|
50
|
+
name: "Integration",
|
|
51
|
+
description: "Existing tests pass, follows existing patterns, no regressions",
|
|
52
|
+
weight: 1.5,
|
|
53
|
+
threshold: 6,
|
|
54
|
+
rubric: "3=breaks existing tests; 5=works but diverges from patterns; 7=integrates cleanly; 9=enhances existing architecture",
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
id: "design-principles",
|
|
58
|
+
name: "Design Principles",
|
|
59
|
+
description: "SOLID, DRY, separation of concerns, appropriate abstractions",
|
|
60
|
+
weight: 1.0,
|
|
61
|
+
threshold: 5,
|
|
62
|
+
rubric: "3=tangled responsibilities, heavy duplication; 5=reasonable structure; 7=clean separation, minimal duplication; 9=elegant, well-abstracted",
|
|
63
|
+
},
|
|
64
|
+
{
|
|
65
|
+
id: "error-handling",
|
|
66
|
+
name: "Error Handling",
|
|
67
|
+
description: "Proper error propagation, edge cases handled, input validation",
|
|
68
|
+
weight: 1.0,
|
|
69
|
+
threshold: 5,
|
|
70
|
+
rubric: "3=errors swallowed or crash; 5=basic error handling; 7=graceful handling with informative messages; 9=comprehensive with recovery strategies",
|
|
71
|
+
},
|
|
72
|
+
];
|
|
73
|
+
const BACKEND_DIMENSIONS = [
|
|
74
|
+
{
|
|
75
|
+
id: "api-design",
|
|
76
|
+
name: "API Design",
|
|
77
|
+
description: "Consistent endpoints, status codes, input validation, error responses",
|
|
78
|
+
weight: 1.5,
|
|
79
|
+
threshold: 6,
|
|
80
|
+
rubric: "3=inconsistent or broken endpoints; 5=functional but inconsistent; 7=clean REST/GraphQL with proper status codes; 9=well-documented, versioned, idiomatic",
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
id: "data-integrity",
|
|
84
|
+
name: "Data Integrity",
|
|
85
|
+
description: "Transactions, data validation at boundaries, no data loss paths",
|
|
86
|
+
weight: 1.5,
|
|
87
|
+
threshold: 6,
|
|
88
|
+
rubric: "3=data loss possible; 5=basic validation; 7=proper transactions and boundary checks; 9=bulletproof data handling",
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
id: "concurrency-safety",
|
|
92
|
+
name: "Concurrency Safety",
|
|
93
|
+
description: "No race conditions, shared state protected, timeout handling",
|
|
94
|
+
weight: 1.0,
|
|
95
|
+
threshold: 5,
|
|
96
|
+
rubric: "3=race conditions present; 5=basic locking; 7=proper concurrency patterns; 9=lock-free or formally verified",
|
|
97
|
+
},
|
|
98
|
+
];
|
|
99
|
+
const FRONTEND_DIMENSIONS = [
|
|
100
|
+
{
|
|
101
|
+
id: "ui-ux-quality",
|
|
102
|
+
name: "UI/UX Quality",
|
|
103
|
+
description: "Consistent visuals, responsive, loading/error states, intuitive flows",
|
|
104
|
+
weight: 1.5,
|
|
105
|
+
threshold: 5,
|
|
106
|
+
rubric: "3=broken layout or missing states; 5=functional UI with gaps; 7=polished with loading/error states; 9=delightful, pixel-perfect",
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
id: "component-architecture",
|
|
110
|
+
name: "Component Architecture",
|
|
111
|
+
description: "Clear responsibilities, proper state management, clean props",
|
|
112
|
+
weight: 1.0,
|
|
113
|
+
threshold: 5,
|
|
114
|
+
rubric: "3=monolithic components, prop drilling; 5=reasonable split; 7=clean composition with proper state; 9=reusable, well-encapsulated",
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
id: "accessibility",
|
|
118
|
+
name: "Accessibility",
|
|
119
|
+
description: "Semantic HTML, keyboard navigation, ARIA labels",
|
|
120
|
+
weight: 1.0,
|
|
121
|
+
threshold: 4,
|
|
122
|
+
rubric: "3=no semantic HTML; 5=basic semantics; 7=keyboard nav and ARIA; 9=WCAG AA compliant",
|
|
123
|
+
},
|
|
124
|
+
];
|
|
125
|
+
export function getDimensions(projectType) {
|
|
126
|
+
switch (projectType) {
|
|
127
|
+
case "frontend":
|
|
128
|
+
return [...UNIVERSAL_DIMENSIONS, ...FRONTEND_DIMENSIONS];
|
|
129
|
+
case "backend":
|
|
130
|
+
return [...UNIVERSAL_DIMENSIONS, ...BACKEND_DIMENSIONS];
|
|
131
|
+
case "fullstack":
|
|
132
|
+
return [...UNIVERSAL_DIMENSIONS, ...BACKEND_DIMENSIONS, ...FRONTEND_DIMENSIONS];
|
|
133
|
+
case "universal":
|
|
134
|
+
return [...UNIVERSAL_DIMENSIONS];
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
export function formatDimensionsBlock(dimensions) {
|
|
138
|
+
const lines = [];
|
|
139
|
+
for (const dim of dimensions) {
|
|
140
|
+
lines.push(`### ${dim.name} (weight: ${dim.weight}, min: ${dim.threshold}/10)`);
|
|
141
|
+
lines.push(dim.description);
|
|
142
|
+
lines.push(`Rubric: ${dim.rubric}`);
|
|
143
|
+
lines.push("");
|
|
144
|
+
}
|
|
145
|
+
return lines.join("\n");
|
|
146
|
+
}
|
|
23
147
|
//# sourceMappingURL=criteria.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"criteria.js","sourceRoot":"","sources":["../../src/defaults/criteria.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"criteria.js","sourceRoot":"","sources":["../../src/defaults/criteria.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,gBAAgB,GAAG;;;;;;;;;;;;;;;;;;;;;CAqB/B,CAAC;AAEF,MAAM,oBAAoB,GAAoB;IAC5C;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,aAAa;QACnB,WAAW,EAAE,gEAAgE;QAC7E,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,kHAAkH;KAC3H;IACD;QACE,EAAE,EAAE,SAAS;QACb,IAAI,EAAE,SAAS;QACf,WAAW,EAAE,oDAAoD;QACjE,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,4HAA4H;KACrI;IACD;QACE,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,cAAc;QACpB,WAAW,EAAE,wDAAwD;QACrE,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,6GAA6G;KACtH;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,aAAa;QACnB,WAAW,EAAE,gEAAgE;QAC7E,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,qHAAqH;KAC9H;IACD;QACE,EAAE,EAAE,mBAAmB;QACvB,IAAI,EAAE,mBAAmB;QACzB,WAAW,EAAE,8DAA8D;QAC3E,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,4IAA4I;KACrJ;IACD;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,gEAAgE;QAC7E,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,8IAA8I;KACvJ;CACF,CAAC;AAEF,MAAM,kBAAkB,GAAoB;IAC1C;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,YAAY;QAClB,WAAW,EAAE,uEAAuE;QACpF,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,2JAA2J;KACpK;IACD;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,gBAAgB;QACtB,WAAW,EAAE,iEAAiE;QAC9E,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,kHAAkH;KAC3H;IACD;QACE,EAAE,EAAE,oBAAoB;QACxB,IAAI,EAAE,oBAAoB;QAC1B,WAAW,EAAE,8DAA8D;QAC3E,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,6GAA6G;KACtH;CACF,CAAC;AAEF,MAAM,mBAAmB,GAAoB;IAC3C;QACE,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,eAAe;QACrB,WAAW,EAAE,uEAAuE;QACpF,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,iIAAiI;KAC1I;IACD;QACE,EAAE,EAAE,wBAAwB;QAC5B,IAAI,EAAE,wBAAwB;QAC9B,WAAW,EAAE,8DAA8D;QAC3E,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,kIAAkI;KAC3I;IACD;QACE,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,eAAe;QACrB,WAAW,EAAE,iDAAiD;QAC9D,MAAM,EAAE,GAAG;QACX,SAAS,EAAE,CAAC;QACZ,MAAM,EAAE,qFAAqF;KAC9F;CACF,CAAC;AAEF,MAAM,UAAU,aAAa,CAAC,WAAwB;IACpD,QAAQ,WAAW,EAAE,CAAC;QACpB,KAAK,UAAU;YACb,OAAO,CAAC,GAAG,oBAAoB,EAAE,GAAG,mBAAmB,CAAC,CAAC;QAC3D,KAAK,SAAS;YACZ,OAAO,CAAC,GAAG,oBAAoB,EAAE,GAAG,kBAAkB,CAAC,CAAC;QAC1D,KAAK,WAAW;YACd,OAAO,CAAC,GAAG,oBAAoB,EAAE,GAAG,kBAAkB,EAAE,GAAG,mBAAmB,CAAC,CAAC;QAClF,KAAK,WAAW;YACd,OAAO,CAAC,GAAG,oBAAoB,CAAC,CAAC;IACrC,CAAC;AACH,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,UAA2B;IAC/D,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,OAAO,GAAG,CAAC,IAAI,aAAa,GAAG,CAAC,MAAM,UAAU,GAAG,CAAC,SAAS,MAAM,CAAC,CAAC;QAChF,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QAC5B,KAAK,CAAC,IAAI,CAAC,WAAW,GAAG,CAAC,MAAM,EAAE,CAAC,CAAC;QACpC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACjB,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
const FRONTEND_FRAMEWORKS = new Set([
|
|
2
|
+
"vite",
|
|
3
|
+
"react",
|
|
4
|
+
"vue",
|
|
5
|
+
"svelte",
|
|
6
|
+
"angular",
|
|
7
|
+
]);
|
|
8
|
+
const BACKEND_FRAMEWORKS = new Set([
|
|
9
|
+
"django",
|
|
10
|
+
"fastapi",
|
|
11
|
+
"express",
|
|
12
|
+
"flask",
|
|
13
|
+
"hono",
|
|
14
|
+
"gin",
|
|
15
|
+
"rails",
|
|
16
|
+
]);
|
|
17
|
+
const FULLSTACK_FRAMEWORKS = new Set(["nextjs"]);
|
|
18
|
+
const BACKEND_LANGUAGES = new Set(["python", "go", "rust", "java", "ruby"]);
|
|
19
|
+
export function detectProjectType(ctx) {
|
|
20
|
+
let hasFrontend = false;
|
|
21
|
+
let hasBackend = false;
|
|
22
|
+
for (const ws of ctx.workspaces) {
|
|
23
|
+
const { framework, language, devServer } = ws.stack;
|
|
24
|
+
if (framework && FULLSTACK_FRAMEWORKS.has(framework)) {
|
|
25
|
+
return "fullstack";
|
|
26
|
+
}
|
|
27
|
+
if (framework && FRONTEND_FRAMEWORKS.has(framework)) {
|
|
28
|
+
hasFrontend = true;
|
|
29
|
+
}
|
|
30
|
+
else if (framework && BACKEND_FRAMEWORKS.has(framework)) {
|
|
31
|
+
hasBackend = true;
|
|
32
|
+
}
|
|
33
|
+
else if (!framework && BACKEND_LANGUAGES.has(language)) {
|
|
34
|
+
hasBackend = true;
|
|
35
|
+
}
|
|
36
|
+
else if (!framework && devServer) {
|
|
37
|
+
hasFrontend = true;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
if (hasFrontend && hasBackend)
|
|
41
|
+
return "fullstack";
|
|
42
|
+
if (hasFrontend)
|
|
43
|
+
return "frontend";
|
|
44
|
+
if (hasBackend)
|
|
45
|
+
return "backend";
|
|
46
|
+
return "universal";
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=project-type.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"project-type.js","sourceRoot":"","sources":["../../src/defaults/project-type.ts"],"names":[],"mappings":"AAEA,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,MAAM;IACN,OAAO;IACP,KAAK;IACL,QAAQ;IACR,SAAS;CACV,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC;IACjC,QAAQ;IACR,SAAS;IACT,SAAS;IACT,OAAO;IACP,MAAM;IACN,KAAK;IACL,OAAO;CACR,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;AAEjD,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,QAAQ,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,CAAC;AAE5E,MAAM,UAAU,iBAAiB,CAAC,GAAmB;IACnD,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,IAAI,UAAU,GAAG,KAAK,CAAC;IAEvB,KAAK,MAAM,EAAE,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;QAChC,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,SAAS,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC;QAEpD,IAAI,SAAS,IAAI,oBAAoB,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YACrD,OAAO,WAAW,CAAC;QACrB,CAAC;QAED,IAAI,SAAS,IAAI,mBAAmB,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YACpD,WAAW,GAAG,IAAI,CAAC;QACrB,CAAC;aAAM,IAAI,SAAS,IAAI,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;YAC1D,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,IAAI,CAAC,SAAS,IAAI,iBAAiB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACzD,UAAU,GAAG,IAAI,CAAC;QACpB,CAAC;aAAM,IAAI,CAAC,SAAS,IAAI,SAAS,EAAE,CAAC;YACnC,WAAW,GAAG,IAAI,CAAC;QACrB,CAAC;IACH,CAAC;IAED,IAAI,WAAW,IAAI,UAAU;QAAE,OAAO,WAAW,CAAC;IAClD,IAAI,WAAW;QAAE,OAAO,UAAU,CAAC;IACnC,IAAI,UAAU;QAAE,OAAO,SAAS,CAAC;IACjC,OAAO,WAAW,CAAC;AACrB,CAAC"}
|
package/dist/defaults/prompts.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { getDimensions, formatDimensionsBlock } from "./criteria.js";
|
|
2
|
+
import { detectProjectType } from "./project-type.js";
|
|
2
3
|
const PLANNER_BASE = `You are a product planner. Your job is to convert user descriptions into comprehensive product specifications, break them into sprints, and write sprint contracts.
|
|
3
4
|
|
|
4
5
|
RULES:
|
|
@@ -21,29 +22,47 @@ RULES:
|
|
|
21
22
|
- Commit your work with conventional commit messages.
|
|
22
23
|
- Do NOT evaluate your own work. Do NOT say "this looks good" or "everything is working."
|
|
23
24
|
Your job is to implement, not judge. A separate evaluator will assess your work.`;
|
|
24
|
-
const EVALUATOR_BASE = `You are a
|
|
25
|
+
const EVALUATOR_BASE = `You are a code evaluator. Your job is to assess implementation quality across multiple dimensions.
|
|
25
26
|
|
|
26
27
|
MINDSET:
|
|
27
|
-
- Be
|
|
28
|
-
-
|
|
29
|
-
-
|
|
30
|
-
-
|
|
31
|
-
-
|
|
28
|
+
- Be fair and calibrated. Base scores on evidence.
|
|
29
|
+
- 5 = acceptable, meets minimum expectations
|
|
30
|
+
- 7 = good, solid with minor issues
|
|
31
|
+
- 9-10 = excellent, production-grade
|
|
32
|
+
- 3 or below = significant problems
|
|
33
|
+
- Run the test suite. Read the code. Verify behavior.
|
|
34
|
+
- Stubbed, mocked, or placeholder implementations in production code score low on Correctness.
|
|
35
|
+
|
|
36
|
+
CALIBRATION EXAMPLES:
|
|
37
|
+
- All features work but one edge case unhandled: Correctness = 7
|
|
38
|
+
- Tests exist but only happy path: Testing = 5
|
|
39
|
+
- API returns correct data but 500s on invalid input: Error Handling = 4
|
|
40
|
+
- Code works but duplicates logic across files: Design Principles = 4
|
|
41
|
+
- Clean code, follows all conventions: Code Quality = 8
|
|
32
42
|
|
|
33
43
|
PROCESS:
|
|
34
44
|
1. Read .harness/contract.md for what was promised
|
|
35
45
|
2. Read the actual code that was written (use Grep and Read)
|
|
36
46
|
3. Run the test suite
|
|
37
|
-
4.
|
|
47
|
+
4. Score each dimension below with evidence
|
|
38
48
|
5. Write your evaluation to .harness/evaluation.md
|
|
39
49
|
|
|
50
|
+
{{DIMENSIONS}}
|
|
51
|
+
|
|
40
52
|
YOUR OUTPUT FORMAT (write to .harness/evaluation.md):
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
53
|
+
Overall: PASS or FAIL
|
|
54
|
+
Score: X.X/10
|
|
55
|
+
|
|
56
|
+
## Dimensions
|
|
57
|
+
|
|
58
|
+
### [Dimension Name]
|
|
59
|
+
Score: N/10
|
|
60
|
+
Rationale: (1-2 sentences with specific evidence)
|
|
61
|
+
|
|
62
|
+
(repeat for each dimension)
|
|
63
|
+
|
|
64
|
+
## Critique
|
|
65
|
+
(actionable feedback for improvements — what's wrong and what needs to change)`;
|
|
47
66
|
const BASE_PROMPTS = {
|
|
48
67
|
planner: PLANNER_BASE,
|
|
49
68
|
generator: GENERATOR_BASE,
|
|
@@ -106,13 +125,16 @@ export function buildSystemPrompt(role, ctx, appendPrompt) {
|
|
|
106
125
|
sections.push(BASE_PROMPTS[role]);
|
|
107
126
|
// 2. Project context
|
|
108
127
|
sections.push(`\n\n## PROJECT CONTEXT\n\n${formatProjectContext(ctx)}`);
|
|
109
|
-
// 3. Evaluation
|
|
128
|
+
// 3. Evaluation dimensions (evaluator only)
|
|
110
129
|
if (role === "evaluator") {
|
|
111
|
-
|
|
130
|
+
const projectType = detectProjectType(ctx);
|
|
131
|
+
const dimensions = getDimensions(projectType);
|
|
132
|
+
const dimensionsBlock = formatDimensionsBlock(dimensions);
|
|
133
|
+
// Replace {{DIMENSIONS}} placeholder in base prompt
|
|
134
|
+
sections[0] = sections[0].replace("{{DIMENSIONS}}", `## SCORING DIMENSIONS\n\n${dimensionsBlock}`);
|
|
112
135
|
if (ctx.criteria) {
|
|
113
|
-
|
|
136
|
+
sections.push(`\n\n## CUSTOM CRITERIA\n\n${ctx.criteria}`);
|
|
114
137
|
}
|
|
115
|
-
sections.push(`\n\n## EVALUATION CRITERIA\n\n${criteriaBlock}`);
|
|
116
138
|
}
|
|
117
139
|
// 4. Additional instructions
|
|
118
140
|
if (appendPrompt) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompts.js","sourceRoot":"","sources":["../../src/defaults/prompts.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"prompts.js","sourceRoot":"","sources":["../../src/defaults/prompts.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,eAAe,CAAC;AACrE,OAAO,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AAEtD,MAAM,YAAY,GAAG;;;;;;;;;8DASyC,CAAC;AAE/D,MAAM,cAAc,GAAG;;;;;;;;;;;mFAW4D,CAAC;AAEpF,MAAM,cAAc,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;+EAwCwD,CAAC;AAEhF,MAAM,YAAY,GAA8B;IAC9C,OAAO,EAAE,YAAY;IACrB,SAAS,EAAE,cAAc;IACzB,SAAS,EAAE,cAAc;CAC1B,CAAC;AAEF;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAAC,GAAmB;IACtD,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,CAAC,IAAI,CAAC,oBAAoB,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC/C,KAAK,CAAC,IAAI,CAAC,SAAS,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC;IAEhC,IAAI,GAAG,CAAC,KAAK,IAAI,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtC,KAAK,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACf,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IAC1B,KAAK,MAAM,EAAE,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC;QAChC,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC;QAC7B,KAAK,CAAC,IAAI,CAAC,iBAAiB,EAAE,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;QACjD,IAAI,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CAAC,kBAAkB,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC;QACrD,CAAC;QACD,IAAI,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC;YACxB,KAAK,CAAC,IAAI,CAAC,oBAAoB,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC;QACxD,CAAC;QACD,KAAK,CAAC,IAAI,CAAC,qBAAqB,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;QACxD,IAAI,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;YACzB,KAAK,CAAC,IAAI,CAAC,qBAAqB,EAAE,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;QAC1D,CAAC;QACD,IAAI,EAAE,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC;YAC1B,KAAK,CAAC,IAAI,CAAC,sBAAsB,EAAE,CAAC,KAAK,CAAC,YAAY,EAAE,CAAC,CAAC;QAC5D,CAAC;QACD,IAAI,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CAAC,mBAAmB,EAAE,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC;QACtD,CAAC;QACD,IAAI,EAAE,CAAC,QAAQ,EAAE,CAAC;YAChB,KAAK,CAAC,IAAI,CAAC,kBAAkB,EAAE,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,IAAI,GAAG,CAAC,YAAY,EAAE,CAAC;QACrB,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QACzB,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,UAAU,iBAAiB,CAC/B,IAAe,EACf,GAAmB,EACnB,YAAqB;IAErB,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,iBAAiB;IACjB,QAAQ,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC;IAElC,qBAAqB;IACrB,QAAQ,CAAC,IAAI,CAAC,6BAA6B,oBAAoB,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IAExE,4CAA4C;IAC5C,IAAI,IAAI,KAAK,WAAW,EAAE,CAAC;QACzB,MAAM,WAAW,GAAG,iBAAiB,CAAC,GAAG,CAAC,CAAC;QAC3C,MAAM,UAAU,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;QAC9C,MAAM,eAAe,GAAG,qBAAqB,CAAC,UAAU,CAAC,CAAC;QAE1D,oDAAoD;QACpD,QAAQ,CAAC,CAAC,CAAC,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,gBAAgB,EAAE,4BAA4B,eAAe,EAAE,CAAC,CAAC;QAEnG,IAAI,GAAG,CAAC,QAAQ,EAAE,CAAC;YACjB,QAAQ,CAAC,IAAI,CAAC,6BAA6B,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC;QAC7D,CAAC;IACH,CAAC;IAED,6BAA6B;IAC7B,IAAI,YAAY,EAAE,CAAC;QACjB,QAAQ,CAAC,IAAI,CAAC,qCAAqC,YAAY,EAAE,CAAC,CAAC;IACrE,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;AAC3B,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -7,6 +7,7 @@ export { buildProjectContext } from "./discovery/project-context.js";
|
|
|
7
7
|
export { detectStack, detectRepoType, discoverWorkspaces } from "./discovery/stack-detector.js";
|
|
8
8
|
export { loadConfig, loadCriteria } from "./discovery/config-loader.js";
|
|
9
9
|
export { buildSystemPrompt, formatProjectContext } from "./defaults/prompts.js";
|
|
10
|
-
export { DEFAULT_CRITERIA } from "./defaults/criteria.js";
|
|
10
|
+
export { DEFAULT_CRITERIA, getDimensions, formatDimensionsBlock } from "./defaults/criteria.js";
|
|
11
|
+
export { detectProjectType } from "./defaults/project-type.js";
|
|
11
12
|
export { DashboardServer } from "./dashboard/server.js";
|
|
12
|
-
export type { Stack, Workspace, ProjectContext, AgentConfig, WorkspaceConfig, HarnessConfig, SprintStatus, RunStatus, Phase, SprintProgress, Progress, EvalResult, AgentRole, AgentDefinition, PhaseStartEvent, AgentActivityEvent, EvaluationEvent, CostUpdateEvent, SprintCompleteEvent, RunCompleteEvent, HarnessEvent, } from "./core/types.js";
|
|
13
|
+
export type { Stack, Workspace, ProjectContext, AgentConfig, WorkspaceConfig, HarnessConfig, SprintStatus, RunStatus, Phase, SprintProgress, Progress, EvalResult, ProjectType, EvalDimension, DimensionScore, AgentRole, AgentDefinition, PhaseStartEvent, AgentActivityEvent, EvaluationEvent, CostUpdateEvent, SprintCompleteEvent, RunCompleteEvent, HarnessEvent, } from "./core/types.js";
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,8 @@ export { detectStack, detectRepoType, discoverWorkspaces } from "./discovery/sta
|
|
|
8
8
|
export { loadConfig, loadCriteria } from "./discovery/config-loader.js";
|
|
9
9
|
// Defaults
|
|
10
10
|
export { buildSystemPrompt, formatProjectContext } from "./defaults/prompts.js";
|
|
11
|
-
export { DEFAULT_CRITERIA } from "./defaults/criteria.js";
|
|
11
|
+
export { DEFAULT_CRITERIA, getDimensions, formatDimensionsBlock } from "./defaults/criteria.js";
|
|
12
|
+
export { detectProjectType } from "./defaults/project-type.js";
|
|
12
13
|
// Dashboard
|
|
13
14
|
export { DashboardServer } from "./dashboard/server.js";
|
|
14
15
|
//# sourceMappingURL=index.js.map
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO;AACP,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAEjD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAE3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAEvD,YAAY;AACZ,OAAO,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AACrE,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAChG,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAExE,WAAW;AACX,OAAO,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO;AACP,OAAO,EAAE,OAAO,EAAE,MAAM,wBAAwB,CAAC;AAEjD,OAAO,EAAE,cAAc,EAAE,MAAM,2BAA2B,CAAC;AAE3D,OAAO,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAEvD,YAAY;AACZ,OAAO,EAAE,mBAAmB,EAAE,MAAM,gCAAgC,CAAC;AACrE,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,kBAAkB,EAAE,MAAM,+BAA+B,CAAC;AAChG,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAExE,WAAW;AACX,OAAO,EAAE,iBAAiB,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAChF,OAAO,EAAE,gBAAgB,EAAE,aAAa,EAAE,qBAAqB,EAAE,MAAM,wBAAwB,CAAC;AAChG,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAE/D,YAAY;AACZ,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC"}
|