@jhlee0619/codexloop 0.1.0 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/prompts/evaluate.md +10 -0
- package/prompts/rank.md +4 -0
- package/prompts/suggest.md +10 -0
- package/schemas/operation.schema.json +45 -0
- package/schemas/task-spec.schema.json +85 -0
- package/schemas/validator-result.schema.json +45 -0
- package/scripts/lib/apply.mjs +112 -15
- package/scripts/lib/codex-exec.mjs +25 -0
- package/scripts/lib/convergence.mjs +45 -11
- package/scripts/lib/iteration.mjs +39 -11
- package/scripts/lib/modes/artifact.mjs +56 -0
- package/scripts/lib/modes/code.mjs +33 -0
- package/scripts/lib/modes/index.mjs +30 -0
- package/scripts/lib/operations.mjs +244 -0
- package/scripts/lib/retrieve.mjs +132 -0
- package/scripts/lib/state.mjs +28 -6
- package/scripts/lib/task-spec.mjs +111 -0
- package/scripts/lib/validate.mjs +37 -6
- package/scripts/lib/validators/file-exists.mjs +35 -0
- package/scripts/lib/validators/headings-present.mjs +44 -0
- package/scripts/lib/validators/max-placeholder-count.mjs +58 -0
- package/scripts/lib/validators/registry.mjs +47 -0
- package/scripts/loop-companion.mjs +53 -2
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@jhlee0619/codexloop",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "CodexLoop
|
|
3
|
+
"version": "0.1.3",
|
|
4
|
+
"description": "CodexLoop — iterative improvement loop that drives OpenAI Codex as a multi-role critic (evaluate → suggest → rank → apply → validate → record).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"cloop": "./bin/cloop"
|
package/prompts/evaluate.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
<mode>
|
|
2
|
+
{{PROMPT_SUFFIX}}
|
|
3
|
+
</mode>
|
|
4
|
+
|
|
1
5
|
<role>
|
|
2
6
|
You are Codex operating as a strict code reviewer and adversarial critic for CodexLoop.
|
|
3
7
|
Your job is to evaluate the current state of a repository against a specific goal and report defects, risks, and missing pieces that block acceptance.
|
|
@@ -28,6 +32,12 @@ Git diff since the loop's seed commit (or "(none)" on the first iteration):
|
|
|
28
32
|
|
|
29
33
|
Latest test/lint/type command results (or "(none)"):
|
|
30
34
|
{{CURRENT_CHECK_STATE}}
|
|
35
|
+
|
|
36
|
+
Task spec (structured, or "(none)" in code mode):
|
|
37
|
+
{{TASK_SPEC}}
|
|
38
|
+
|
|
39
|
+
Relevant file contents (context bundle, or "(none)" in code mode):
|
|
40
|
+
{{CONTEXT_BUNDLE}}
|
|
31
41
|
</task>
|
|
32
42
|
|
|
33
43
|
<operating_stance>
|
package/prompts/rank.md
CHANGED
package/prompts/suggest.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
<mode>
|
|
2
|
+
{{PROMPT_SUFFIX}}
|
|
3
|
+
</mode>
|
|
4
|
+
|
|
1
5
|
<role>
|
|
2
6
|
You are Codex operating simultaneously as a solution generator, a refactoring advisor, and a test designer for CodexLoop.
|
|
3
7
|
You propose concrete patches that the runtime will rank, apply, and validate.
|
|
@@ -22,6 +26,12 @@ Recent rejected proposals (do NOT re-propose these; explain in justifications if
|
|
|
22
26
|
|
|
23
27
|
Git diff since the loop's seed commit:
|
|
24
28
|
{{DIFF_SINCE_SEED}}
|
|
29
|
+
|
|
30
|
+
Task spec (structured, or "(none)" in code mode):
|
|
31
|
+
{{TASK_SPEC}}
|
|
32
|
+
|
|
33
|
+
Relevant file contents (context bundle, or "(none)" in code mode):
|
|
34
|
+
{{CONTEXT_BUNDLE}}
|
|
25
35
|
</task>
|
|
26
36
|
|
|
27
37
|
<proposal_rules>
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://codexloop/operation.schema.json",
|
|
4
|
+
"title": "CodexLoop Proposal Operation",
|
|
5
|
+
"description": "A single file-editing operation in an artifact-mode proposal. Runtime translates operations into file writes + git commit, same as patch-based proposals.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["kind", "file"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"kind": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"enum": ["replace_section", "insert_after_heading", "replace_file", "append_to_file"],
|
|
12
|
+
"description": "Operation type. v0.2 ships four kinds."
|
|
13
|
+
},
|
|
14
|
+
"file": {
|
|
15
|
+
"type": "string",
|
|
16
|
+
"description": "Path relative to the repo root."
|
|
17
|
+
},
|
|
18
|
+
"heading": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"description": "Markdown heading to anchor on (e.g. '## Results'). Required for replace_section and insert_after_heading."
|
|
21
|
+
},
|
|
22
|
+
"content": {
|
|
23
|
+
"type": "string",
|
|
24
|
+
"description": "New content. For replace_section, replaces the section body. For insert_after_heading, inserted after the heading line. For replace_file and append_to_file, used as-is."
|
|
25
|
+
}
|
|
26
|
+
},
|
|
27
|
+
"allOf": [
|
|
28
|
+
{
|
|
29
|
+
"if": { "properties": { "kind": { "const": "replace_section" } } },
|
|
30
|
+
"then": { "required": ["kind", "file", "heading", "content"] }
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"if": { "properties": { "kind": { "const": "insert_after_heading" } } },
|
|
34
|
+
"then": { "required": ["kind", "file", "heading", "content"] }
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
"if": { "properties": { "kind": { "const": "replace_file" } } },
|
|
38
|
+
"then": { "required": ["kind", "file", "content"] }
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"if": { "properties": { "kind": { "const": "append_to_file" } } },
|
|
42
|
+
"then": { "required": ["kind", "file", "content"] }
|
|
43
|
+
}
|
|
44
|
+
]
|
|
45
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://codexloop/task-spec.schema.json",
|
|
4
|
+
"title": "CodexLoop Task Specification",
|
|
5
|
+
"description": "Structured task spec for artifact/paper loops. Lives at cloop.task.json in the target repo root, or assembled by the interview and written to .loop/task-spec.json.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": ["version", "mode"],
|
|
9
|
+
"properties": {
|
|
10
|
+
"version": {
|
|
11
|
+
"type": "integer",
|
|
12
|
+
"minimum": 1,
|
|
13
|
+
"description": "Spec format version. v0.2 ships version 1."
|
|
14
|
+
},
|
|
15
|
+
"mode": {
|
|
16
|
+
"type": "string",
|
|
17
|
+
"enum": ["code", "artifact", "paper"],
|
|
18
|
+
"description": "Task mode. Determines which adapter drives the loop."
|
|
19
|
+
},
|
|
20
|
+
"required_files": {
|
|
21
|
+
"type": "array",
|
|
22
|
+
"items": { "type": "string" },
|
|
23
|
+
"description": "Files that MUST exist at loop completion. file_exists validator checks these."
|
|
24
|
+
},
|
|
25
|
+
"required_sections": {
|
|
26
|
+
"type": "array",
|
|
27
|
+
"items": {
|
|
28
|
+
"type": "object",
|
|
29
|
+
"additionalProperties": false,
|
|
30
|
+
"required": ["file", "headings"],
|
|
31
|
+
"properties": {
|
|
32
|
+
"file": { "type": "string" },
|
|
33
|
+
"headings": {
|
|
34
|
+
"type": "array",
|
|
35
|
+
"items": { "type": "string" },
|
|
36
|
+
"description": "Markdown headings (including the ## prefix) that must be present."
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
},
|
|
40
|
+
"description": "Per-file heading requirements. headings_present validator checks these."
|
|
41
|
+
},
|
|
42
|
+
"placeholder_policy": {
|
|
43
|
+
"oneOf": [
|
|
44
|
+
{ "type": "null" },
|
|
45
|
+
{
|
|
46
|
+
"type": "object",
|
|
47
|
+
"additionalProperties": false,
|
|
48
|
+
"properties": {
|
|
49
|
+
"max": {
|
|
50
|
+
"type": "integer",
|
|
51
|
+
"minimum": 0,
|
|
52
|
+
"description": "Maximum number of placeholders allowed at loop completion."
|
|
53
|
+
},
|
|
54
|
+
"disallowed_kinds": {
|
|
55
|
+
"type": "array",
|
|
56
|
+
"items": { "type": "string" },
|
|
57
|
+
"description": "Placeholder kinds (regex fragments) that are never acceptable."
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
],
|
|
62
|
+
"description": "Placeholder budget and forbidden kinds. max_placeholder_count validator reads this."
|
|
63
|
+
},
|
|
64
|
+
"validators": {
|
|
65
|
+
"type": "array",
|
|
66
|
+
"items": {
|
|
67
|
+
"type": "object",
|
|
68
|
+
"additionalProperties": false,
|
|
69
|
+
"required": ["name"],
|
|
70
|
+
"properties": {
|
|
71
|
+
"name": {
|
|
72
|
+
"type": "string",
|
|
73
|
+
"description": "Name matching a registered built-in validator (e.g. 'file_exists', 'headings_present')."
|
|
74
|
+
},
|
|
75
|
+
"args": {
|
|
76
|
+
"type": "object",
|
|
77
|
+
"additionalProperties": true,
|
|
78
|
+
"description": "Validator-specific arguments passed to the run function."
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
"description": "Explicit validator list. Runs alongside mode-implicit validators."
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://codexloop/validator-result.schema.json",
|
|
4
|
+
"title": "CodexLoop Validator Result",
|
|
5
|
+
"description": "Output shape of a single built-in validator. Results are collected in iteration.validate.validatorResults.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": ["name", "passed", "severity"],
|
|
9
|
+
"properties": {
|
|
10
|
+
"name": {
|
|
11
|
+
"type": "string",
|
|
12
|
+
"description": "Validator name matching the registry key (e.g. 'file_exists')."
|
|
13
|
+
},
|
|
14
|
+
"passed": {
|
|
15
|
+
"type": "boolean",
|
|
16
|
+
"description": "True if the validator's condition is satisfied."
|
|
17
|
+
},
|
|
18
|
+
"severity": {
|
|
19
|
+
"type": "string",
|
|
20
|
+
"enum": ["info", "warn", "error"],
|
|
21
|
+
"description": "info/warn are reported but non-blocking. error blocks the loop (counts as validate.passed = false)."
|
|
22
|
+
},
|
|
23
|
+
"count": {
|
|
24
|
+
"type": ["integer", "null"],
|
|
25
|
+
"description": "Relevant count (e.g. number of missing headings, number of placeholders found)."
|
|
26
|
+
},
|
|
27
|
+
"details": {
|
|
28
|
+
"type": "string",
|
|
29
|
+
"description": "Human-readable summary of what was checked and what failed/passed."
|
|
30
|
+
},
|
|
31
|
+
"evidence": {
|
|
32
|
+
"type": "array",
|
|
33
|
+
"items": {
|
|
34
|
+
"type": "object",
|
|
35
|
+
"additionalProperties": false,
|
|
36
|
+
"properties": {
|
|
37
|
+
"file": { "type": "string" },
|
|
38
|
+
"line": { "type": ["integer", "null"] },
|
|
39
|
+
"snippet": { "type": "string" }
|
|
40
|
+
}
|
|
41
|
+
},
|
|
42
|
+
"description": "Concrete locations that triggered the finding."
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
package/scripts/lib/apply.mjs
CHANGED
|
@@ -16,6 +16,7 @@ import path from "node:path";
|
|
|
16
16
|
import process from "node:process";
|
|
17
17
|
|
|
18
18
|
import { runCommand } from "./process.mjs";
|
|
19
|
+
import { applyOperations } from "./operations.mjs";
|
|
19
20
|
|
|
20
21
|
const TEST_FILE_PATTERNS = [
|
|
21
22
|
/(^|\/)tests?\//i,
|
|
@@ -142,9 +143,27 @@ function buildCommitEnv() {
|
|
|
142
143
|
};
|
|
143
144
|
}
|
|
144
145
|
|
|
145
|
-
//
|
|
146
|
-
|
|
147
|
-
|
|
146
|
+
// Mode-keyed blocking guard sets.
|
|
147
|
+
const CODE_BLOCKING_KINDS = new Set([
|
|
148
|
+
"test-file-deleted",
|
|
149
|
+
"test-disabled",
|
|
150
|
+
"missing-test-justification"
|
|
151
|
+
]);
|
|
152
|
+
const ARTIFACT_BLOCKING_KINDS = new Set([
|
|
153
|
+
"required-file-deleted"
|
|
154
|
+
]);
|
|
155
|
+
|
|
156
|
+
export function getBlockingKinds(taskMode) {
|
|
157
|
+
if (taskMode === "artifact" || taskMode === "paper") return ARTIFACT_BLOCKING_KINDS;
|
|
158
|
+
return CODE_BLOCKING_KINDS;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Apply a proposal's patch OR operations to the target repo. Returns a
|
|
162
|
+
// structured record the iteration layer records verbatim. Dispatches:
|
|
163
|
+
// - proposal.operations[] → operations.mjs (artifact/paper mode)
|
|
164
|
+
// - proposal.patch → git apply (code mode, or fallback)
|
|
165
|
+
export async function applyPatch({ cwd, proposal, iterationIndex, taskMode = "code", spec = null }) {
|
|
166
|
+
const hasOperations = Array.isArray(proposal?.operations) && proposal.operations.length > 0;
|
|
148
167
|
const patch = proposal?.patch ?? "";
|
|
149
168
|
const record = {
|
|
150
169
|
applied: false,
|
|
@@ -155,24 +174,45 @@ export async function applyPatch({ cwd, proposal, iterationIndex }) {
|
|
|
155
174
|
hackingFindings: [],
|
|
156
175
|
conflicts: null,
|
|
157
176
|
error: null,
|
|
158
|
-
rolledBack: false
|
|
177
|
+
rolledBack: false,
|
|
178
|
+
usedOperations: hasOperations
|
|
159
179
|
};
|
|
160
180
|
|
|
161
181
|
record.preSha = getHead(cwd);
|
|
162
182
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
(
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
183
|
+
// Reward-hacking scan (patch path — operations are checked below separately)
|
|
184
|
+
if (!hasOperations) {
|
|
185
|
+
record.hackingFindings = scanPatchForRewardHacking(patch, proposal);
|
|
186
|
+
const blockingKinds = getBlockingKinds(taskMode);
|
|
187
|
+
const blocking = record.hackingFindings.find((f) => blockingKinds.has(f.kind));
|
|
188
|
+
if (blocking) {
|
|
189
|
+
record.error = `reward-hacking-guard:${blocking.kind}${blocking.file ? `:${blocking.file}` : ""}`;
|
|
190
|
+
return record;
|
|
191
|
+
}
|
|
192
|
+
} else {
|
|
193
|
+
// Check artifact-mode guards: required-file deletion via operations
|
|
194
|
+
if (Array.isArray(spec?.required_files)) {
|
|
195
|
+
const requiredSet = new Set(spec.required_files);
|
|
196
|
+
for (const op of proposal.operations) {
|
|
197
|
+
if (op.kind === "replace_file" && op.content === "" && requiredSet.has(op.file)) {
|
|
198
|
+
record.hackingFindings.push({
|
|
199
|
+
kind: "required-file-deleted",
|
|
200
|
+
severity: "critical",
|
|
201
|
+
file: op.file,
|
|
202
|
+
summary: `operation would empty required file ${op.file}`
|
|
203
|
+
});
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
const blocking = record.hackingFindings.find((f) => f.kind === "required-file-deleted");
|
|
207
|
+
if (blocking) {
|
|
208
|
+
record.error = `reward-hacking-guard:required-file-deleted:${blocking.file}`;
|
|
209
|
+
return record;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
173
212
|
}
|
|
174
213
|
|
|
175
|
-
|
|
214
|
+
// Empty check
|
|
215
|
+
if (!hasOperations && !patch.trim()) {
|
|
176
216
|
record.empty = true;
|
|
177
217
|
return record;
|
|
178
218
|
}
|
|
@@ -184,6 +224,24 @@ export async function applyPatch({ cwd, proposal, iterationIndex }) {
|
|
|
184
224
|
return record;
|
|
185
225
|
}
|
|
186
226
|
|
|
227
|
+
// ── DISPATCH: operations vs patch ────────────────────────────────
|
|
228
|
+
if (hasOperations) {
|
|
229
|
+
const opResult = applyOperations({ cwd, operations: proposal.operations });
|
|
230
|
+
if (opResult.error) {
|
|
231
|
+
record.error = `operation-failed: ${opResult.error}`;
|
|
232
|
+
hardReset(cwd, record.preSha);
|
|
233
|
+
record.rolledBack = true;
|
|
234
|
+
return record;
|
|
235
|
+
}
|
|
236
|
+
if (!opResult.applied) {
|
|
237
|
+
record.empty = true;
|
|
238
|
+
return record;
|
|
239
|
+
}
|
|
240
|
+
record.filesTouched = opResult.filesTouched;
|
|
241
|
+
// Fall through to git add + commit below.
|
|
242
|
+
} else {
|
|
243
|
+
// Patch path (existing git apply logic)
|
|
244
|
+
|
|
187
245
|
const tmpPatch = path.join(
|
|
188
246
|
os.tmpdir(),
|
|
189
247
|
`codexloop-patch-${process.pid}-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}.diff`
|
|
@@ -251,4 +309,43 @@ export async function applyPatch({ cwd, proposal, iterationIndex }) {
|
|
|
251
309
|
} finally {
|
|
252
310
|
try { fs.unlinkSync(tmpPatch); } catch {}
|
|
253
311
|
}
|
|
312
|
+
} // end else (patch path)
|
|
313
|
+
|
|
314
|
+
// ── Operations path: files already written by applyOperations, commit them.
|
|
315
|
+
const env = buildCommitEnv();
|
|
316
|
+
const addRes = runCommand("git", ["add", "-A"], { cwd, env });
|
|
317
|
+
if (addRes.status !== 0) {
|
|
318
|
+
record.error = `git add failed: ${(addRes.stderr || "").trim().slice(0, 300)}`;
|
|
319
|
+
hardReset(cwd, record.preSha);
|
|
320
|
+
record.rolledBack = true;
|
|
321
|
+
return record;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const commitMsg = `cloop: iter ${iterationIndex} apply ${proposal?.id ?? "?"} (ops)`;
|
|
325
|
+
const commitRes = runCommand("git", ["commit", "-m", commitMsg], { cwd, env });
|
|
326
|
+
if (commitRes.status !== 0) {
|
|
327
|
+
record.error = `git commit failed: ${(commitRes.stderr || commitRes.stdout || "").trim().slice(0, 400)}`;
|
|
328
|
+
hardReset(cwd, record.preSha);
|
|
329
|
+
record.rolledBack = true;
|
|
330
|
+
return record;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
record.postSha = getHead(cwd);
|
|
334
|
+
record.applied = true;
|
|
335
|
+
|
|
336
|
+
if (record.filesTouched.length === 0 && record.preSha && record.postSha) {
|
|
337
|
+
const diffFiles = runCommand(
|
|
338
|
+
"git",
|
|
339
|
+
["diff", "--name-only", `${record.preSha}..${record.postSha}`],
|
|
340
|
+
{ cwd }
|
|
341
|
+
);
|
|
342
|
+
if (diffFiles.status === 0) {
|
|
343
|
+
record.filesTouched = (diffFiles.stdout || "")
|
|
344
|
+
.split("\n")
|
|
345
|
+
.map((l) => l.trim())
|
|
346
|
+
.filter(Boolean);
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return record;
|
|
254
351
|
}
|
|
@@ -173,6 +173,18 @@ export async function runCodex({
|
|
|
173
173
|
const events = parseJsonlEvents(result.stdout);
|
|
174
174
|
const usage = extractUsage(events);
|
|
175
175
|
|
|
176
|
+
// v0.2: check for item.error events in stdout even when status is 0.
|
|
177
|
+
const itemErrors = extractItemErrors(events);
|
|
178
|
+
if (itemErrors.length > 0 && result.status === 0) {
|
|
179
|
+
lastError = new CodexError(
|
|
180
|
+
`Codex emitted ${itemErrors.length} error event(s) in stdout: ${itemErrors.join("; ")}`,
|
|
181
|
+
{ kind: "item-error", stderr: result.stderr, status: 0, attempts: attempt }
|
|
182
|
+
);
|
|
183
|
+
if (attempt >= maxAttempts) break;
|
|
184
|
+
await sleep(backoffMs(attempt));
|
|
185
|
+
continue;
|
|
186
|
+
}
|
|
187
|
+
|
|
176
188
|
if (result.status !== 0) {
|
|
177
189
|
const kind = classifyCodexFailure(result.stderr, result.status);
|
|
178
190
|
lastError = new CodexError(
|
|
@@ -279,6 +291,19 @@ function extractLastAgentMessage(events) {
|
|
|
279
291
|
return null;
|
|
280
292
|
}
|
|
281
293
|
|
|
294
|
+
// v0.2: detect item.error events in stdout JSONL. Some Codex CLI versions
|
|
295
|
+
// emit structured errors as JSONL events rather than (or in addition to) stderr.
|
|
296
|
+
function extractItemErrors(events) {
|
|
297
|
+
const errors = [];
|
|
298
|
+
for (const evt of events) {
|
|
299
|
+
if (evt?.type === "item.error" || (evt?.type === "item.completed" && evt.item?.type === "error")) {
|
|
300
|
+
const msg = evt.item?.text ?? evt.message ?? evt.error ?? JSON.stringify(evt);
|
|
301
|
+
errors.push(String(msg).slice(0, 500));
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
return errors;
|
|
305
|
+
}
|
|
306
|
+
|
|
282
307
|
function extractUsage(events) {
|
|
283
308
|
for (let i = events.length - 1; i >= 0; i -= 1) {
|
|
284
309
|
const evt = events[i];
|
|
@@ -14,14 +14,30 @@
|
|
|
14
14
|
// 6. budget-time
|
|
15
15
|
// 7. budget-calls
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
// Mode-keyed weight tables. Code mode is byte-for-byte identical to v0.1.0.
|
|
18
|
+
const ARTIFACT_WEIGHTS = Object.freeze({
|
|
19
|
+
validatorPassRate: 0.35,
|
|
20
|
+
issueReduction: 0.15,
|
|
21
|
+
distanceFromGoal: 0.20,
|
|
22
|
+
winnerConfidence: 0.10,
|
|
23
|
+
testPassRate: 0.10,
|
|
24
|
+
typeClean: 0.10
|
|
24
25
|
});
|
|
26
|
+
export const QUALITY_WEIGHTS_BY_MODE = Object.freeze({
|
|
27
|
+
code: Object.freeze({
|
|
28
|
+
testPassRate: 0.30,
|
|
29
|
+
issueReduction: 0.20,
|
|
30
|
+
winnerConfidence: 0.15,
|
|
31
|
+
typeClean: 0.15,
|
|
32
|
+
lintClean: 0.10,
|
|
33
|
+
distanceFromGoal: 0.10
|
|
34
|
+
}),
|
|
35
|
+
artifact: ARTIFACT_WEIGHTS,
|
|
36
|
+
paper: ARTIFACT_WEIGHTS
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Legacy alias for backwards compatibility in tests.
|
|
40
|
+
export const QUALITY_WEIGHTS = QUALITY_WEIGHTS_BY_MODE.code;
|
|
25
41
|
|
|
26
42
|
export const REGRESSION_DROP = 0.10;
|
|
27
43
|
export const PLATEAU_QUALITY_THRESHOLD = 0.75;
|
|
@@ -36,7 +52,10 @@ function clamp01(x) {
|
|
|
36
52
|
export function computeQualityScore(iteration, state) {
|
|
37
53
|
const v = iteration.validate;
|
|
38
54
|
const e = iteration.evaluate;
|
|
55
|
+
const taskMode = state?.taskMode ?? "code";
|
|
56
|
+
const weights = QUALITY_WEIGHTS_BY_MODE[taskMode] ?? QUALITY_WEIGHTS_BY_MODE.code;
|
|
39
57
|
|
|
58
|
+
// Compute all possible terms — some modes use subsets.
|
|
40
59
|
let testPassRate = 0.5;
|
|
41
60
|
if (v && Number.isFinite(v.passingTests) && Number.isFinite(v.failingTests)) {
|
|
42
61
|
const total = v.passingTests + v.failingTests;
|
|
@@ -66,18 +85,33 @@ export function computeQualityScore(iteration, state) {
|
|
|
66
85
|
const distance = clamp01(e?.distanceFromGoal ?? 0.5);
|
|
67
86
|
const distanceFromGoal = 1 - distance;
|
|
68
87
|
|
|
69
|
-
|
|
88
|
+
// Validator pass rate for artifact/paper mode.
|
|
89
|
+
let validatorPassRate = 0.5;
|
|
90
|
+
if (Array.isArray(v?.validatorResults) && v.validatorResults.length > 0) {
|
|
91
|
+
const errorResults = v.validatorResults.filter((r) => r.severity === "error");
|
|
92
|
+
if (errorResults.length > 0) {
|
|
93
|
+
validatorPassRate = errorResults.filter((r) => r.passed).length / errorResults.length;
|
|
94
|
+
} else {
|
|
95
|
+
validatorPassRate = 1;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const allTerms = {
|
|
70
100
|
testPassRate: clamp01(testPassRate),
|
|
71
101
|
issueReduction: clamp01(issueReduction),
|
|
72
102
|
winnerConfidence: clamp01(winnerConfidenceTerm),
|
|
73
103
|
typeClean: clamp01(typeClean),
|
|
74
104
|
lintClean: clamp01(lintClean),
|
|
75
|
-
distanceFromGoal: clamp01(distanceFromGoal)
|
|
105
|
+
distanceFromGoal: clamp01(distanceFromGoal),
|
|
106
|
+
validatorPassRate: clamp01(validatorPassRate)
|
|
76
107
|
};
|
|
77
108
|
|
|
78
109
|
let total = 0;
|
|
79
|
-
|
|
80
|
-
|
|
110
|
+
const terms = {};
|
|
111
|
+
for (const key of Object.keys(weights)) {
|
|
112
|
+
const termValue = allTerms[key] ?? 0;
|
|
113
|
+
terms[key] = termValue;
|
|
114
|
+
total += weights[key] * termValue;
|
|
81
115
|
}
|
|
82
116
|
return { score: Number(total.toFixed(6)), terms };
|
|
83
117
|
}
|
|
@@ -1,9 +1,14 @@
|
|
|
1
|
-
// Single-iteration orchestration:
|
|
1
|
+
// Single-iteration orchestration:
|
|
2
|
+
// retrieve → evaluate → suggest → rank → apply → validate → record
|
|
2
3
|
//
|
|
3
|
-
// This is where the
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
//
|
|
4
|
+
// This is where the loop lives. `runIteration` is called either synchronously
|
|
5
|
+
// by /cloop:iterate or in a loop by the background worker (/cloop:start
|
|
6
|
+
// --background). It loads state from disk, runs the phases, persists state,
|
|
7
|
+
// and returns the full iteration record.
|
|
8
|
+
//
|
|
9
|
+
// v0.2: mode-aware via getModeAdapter(state.taskMode). The adapter provides
|
|
10
|
+
// per-mode prompt vars, validators, quality weights, and guards. Code mode
|
|
11
|
+
// is byte-for-byte identical to v0.1.0 behavior.
|
|
7
12
|
|
|
8
13
|
import process from "node:process";
|
|
9
14
|
|
|
@@ -18,6 +23,8 @@ import {
|
|
|
18
23
|
readProgressLogTail,
|
|
19
24
|
computeGoalHash
|
|
20
25
|
} from "./state.mjs";
|
|
26
|
+
import { getModeAdapter } from "./modes/index.mjs";
|
|
27
|
+
import { collectBundle, formatBundleForPrompt } from "./retrieve.mjs";
|
|
21
28
|
import { recomputeWinner } from "./rank.mjs";
|
|
22
29
|
import { computeQualityScore, checkStopping } from "./convergence.mjs";
|
|
23
30
|
import { applyPatch, rollbackToSha } from "./apply.mjs";
|
|
@@ -84,13 +91,13 @@ function gitDiffSince(cwd, sinceSha) {
|
|
|
84
91
|
|
|
85
92
|
function ensureGoalHash(state) {
|
|
86
93
|
if (!state.goal?.goalHash) {
|
|
87
|
-
state.goal.goalHash = computeGoalHash(state.goal ?? {});
|
|
94
|
+
state.goal.goalHash = computeGoalHash(state.goal ?? {}, state.taskMode);
|
|
88
95
|
}
|
|
89
96
|
return state.goal.goalHash;
|
|
90
97
|
}
|
|
91
98
|
|
|
92
99
|
function assertGoalImmutable(state, expectedHash) {
|
|
93
|
-
const currentHash = computeGoalHash(state.goal ?? {});
|
|
100
|
+
const currentHash = computeGoalHash(state.goal ?? {}, state.taskMode);
|
|
94
101
|
if (expectedHash && currentHash !== expectedHash) {
|
|
95
102
|
throw new Error(
|
|
96
103
|
`goal hash mismatch (expected ${expectedHash}, got ${currentHash}) — goal text may have drifted`
|
|
@@ -226,6 +233,14 @@ export async function runIteration({ repoRoot, options = {} }) {
|
|
|
226
233
|
stopReason: i.stopReason
|
|
227
234
|
}));
|
|
228
235
|
|
|
236
|
+
// ── MODE ADAPTER LOOKUP ────────────────────────────────────────
|
|
237
|
+
const adapter = getModeAdapter(state.taskMode ?? "code");
|
|
238
|
+
const adapterVars = adapter.getCommonVars(state);
|
|
239
|
+
|
|
240
|
+
// ── RETRIEVE PHASE (context bundle) ──────────────────────────
|
|
241
|
+
const bundle = collectBundle({ state, repoRoot });
|
|
242
|
+
const contextBundleText = formatBundleForPrompt(bundle);
|
|
243
|
+
|
|
229
244
|
const commonVars = {
|
|
230
245
|
GOAL: state.goal?.text ?? "(unspecified)",
|
|
231
246
|
ACCEPTANCE_CRITERIA:
|
|
@@ -241,7 +256,14 @@ export async function runIteration({ repoRoot, options = {} }) {
|
|
|
241
256
|
? state.openIssues
|
|
242
257
|
: "(none)",
|
|
243
258
|
DIFF_SINCE_SEED: diffSinceSeed || "(none)",
|
|
244
|
-
CURRENT_CHECK_STATE: "(none)"
|
|
259
|
+
CURRENT_CHECK_STATE: "(none)",
|
|
260
|
+
// v0.2: mode-aware additions
|
|
261
|
+
TASK_MODE: state.taskMode ?? "code",
|
|
262
|
+
TASK_SPEC: state.goal?.spec ?? "(none)",
|
|
263
|
+
CONTEXT_BUNDLE: contextBundleText,
|
|
264
|
+
PROMPT_SUFFIX: adapter.getPromptSuffix(),
|
|
265
|
+
// Spread adapter-specific vars (e.g. REQUIRED_FILES, REQUIRED_SECTIONS)
|
|
266
|
+
...adapterVars
|
|
245
267
|
};
|
|
246
268
|
|
|
247
269
|
const codexOpts = {
|
|
@@ -269,7 +291,9 @@ export async function runIteration({ repoRoot, options = {} }) {
|
|
|
269
291
|
iteration.validate = await runValidation({
|
|
270
292
|
cwd: repoRoot,
|
|
271
293
|
goal: state.goal,
|
|
272
|
-
previousIteration: state.iterations[state.iterations.length - 1]
|
|
294
|
+
previousIteration: state.iterations[state.iterations.length - 1],
|
|
295
|
+
spec: state.goal?.spec ?? null,
|
|
296
|
+
implicitValidators: adapter.getImplicitValidators(state.goal?.spec)
|
|
273
297
|
});
|
|
274
298
|
} else {
|
|
275
299
|
iteration.validate = { passed: null, regression: false, commands: [], skipped: "dry-run" };
|
|
@@ -366,7 +390,9 @@ export async function runIteration({ repoRoot, options = {} }) {
|
|
|
366
390
|
iteration.apply = await applyPatch({
|
|
367
391
|
cwd: repoRoot,
|
|
368
392
|
proposal: winnerProposal,
|
|
369
|
-
iterationIndex: iteration.index
|
|
393
|
+
iterationIndex: iteration.index,
|
|
394
|
+
taskMode: state.taskMode ?? "code",
|
|
395
|
+
spec: state.goal?.spec ?? null
|
|
370
396
|
});
|
|
371
397
|
|
|
372
398
|
if (!iteration.apply.applied && !iteration.apply.empty) {
|
|
@@ -411,7 +437,9 @@ export async function runIteration({ repoRoot, options = {} }) {
|
|
|
411
437
|
iteration.validate = await runValidation({
|
|
412
438
|
cwd: repoRoot,
|
|
413
439
|
goal: state.goal,
|
|
414
|
-
previousIteration: state.iterations[state.iterations.length - 1]
|
|
440
|
+
previousIteration: state.iterations[state.iterations.length - 1],
|
|
441
|
+
spec: state.goal?.spec ?? null,
|
|
442
|
+
implicitValidators: adapter.getImplicitValidators(state.goal?.spec)
|
|
415
443
|
});
|
|
416
444
|
|
|
417
445
|
if (iteration.validate.regression && iteration.apply.applied && iteration.apply.preSha) {
|