@jhlee0619/codexloop 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +34 -0
- package/.claude-plugin/plugin.json +8 -0
- package/.codex-plugin/plugin.json +38 -0
- package/LICENSE +21 -0
- package/README.md +425 -0
- package/assets/banner.png +0 -0
- package/bin/cloop +45 -0
- package/commands/iterate.md +25 -0
- package/commands/model.md +33 -0
- package/commands/result.md +17 -0
- package/commands/start.md +188 -0
- package/commands/status.md +10 -0
- package/commands/stop.md +12 -0
- package/package.json +60 -0
- package/prompts/evaluate.md +91 -0
- package/prompts/rank.md +97 -0
- package/prompts/suggest.md +69 -0
- package/schemas/evaluation.schema.json +65 -0
- package/schemas/loop-state.schema.json +103 -0
- package/schemas/proposal.schema.json +74 -0
- package/schemas/ranking.schema.json +77 -0
- package/scripts/lib/apply.mjs +254 -0
- package/scripts/lib/args.mjs +202 -0
- package/scripts/lib/codex-exec.mjs +318 -0
- package/scripts/lib/convergence.mjs +153 -0
- package/scripts/lib/iteration.mjs +484 -0
- package/scripts/lib/process.mjs +164 -0
- package/scripts/lib/prompts.mjs +53 -0
- package/scripts/lib/rank.mjs +149 -0
- package/scripts/lib/render.mjs +240 -0
- package/scripts/lib/state.mjs +378 -0
- package/scripts/lib/validate.mjs +71 -0
- package/scripts/lib/workspace.mjs +49 -0
- package/scripts/loop-companion.mjs +849 -0
- package/skills/cloop/SKILL.md +177 -0
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://codexloop/loop-state.schema.json",
|
|
4
|
+
"title": "CodexLoop State",
|
|
5
|
+
"description": "Canonical shape of <repo>/.loop/state.json. This schema is documentation for human readers and for the state.mjs runtime; it is NOT passed to Codex via --output-schema.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": [
|
|
8
|
+
"version",
|
|
9
|
+
"loopId",
|
|
10
|
+
"status",
|
|
11
|
+
"mode",
|
|
12
|
+
"goal",
|
|
13
|
+
"budget",
|
|
14
|
+
"convergence",
|
|
15
|
+
"iterations"
|
|
16
|
+
],
|
|
17
|
+
"properties": {
|
|
18
|
+
"version": { "type": "integer", "minimum": 1 },
|
|
19
|
+
"loopId": { "type": ["string", "null"] },
|
|
20
|
+
"status": {
|
|
21
|
+
"type": "string",
|
|
22
|
+
"enum": [
|
|
23
|
+
"idle",
|
|
24
|
+
"queued",
|
|
25
|
+
"running",
|
|
26
|
+
"paused",
|
|
27
|
+
"completed",
|
|
28
|
+
"failed",
|
|
29
|
+
"cancelled",
|
|
30
|
+
"crashed"
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
"pid": { "type": ["integer", "null"] },
|
|
34
|
+
"mode": { "type": "string", "enum": ["interactive", "background", "dry-run"] },
|
|
35
|
+
"startedAt": { "type": ["string", "null"], "format": "date-time" },
|
|
36
|
+
"lastIterationAt": { "type": ["string", "null"], "format": "date-time" },
|
|
37
|
+
"completedAt": { "type": ["string", "null"], "format": "date-time" },
|
|
38
|
+
"goal": {
|
|
39
|
+
"type": "object",
|
|
40
|
+
"required": ["text", "acceptanceCriteria"],
|
|
41
|
+
"properties": {
|
|
42
|
+
"text": { "type": "string" },
|
|
43
|
+
"acceptanceCriteria": { "type": "array", "items": { "type": "string" } },
|
|
44
|
+
"seedCommit": { "type": ["string", "null"] },
|
|
45
|
+
"relevantGlobs": { "type": "array", "items": { "type": "string" } },
|
|
46
|
+
"testCmd": { "type": ["string", "null"] },
|
|
47
|
+
"lintCmd": { "type": ["string", "null"] },
|
|
48
|
+
"typeCmd": { "type": ["string", "null"] },
|
|
49
|
+
"goalHash": { "type": ["string", "null"] }
|
|
50
|
+
}
|
|
51
|
+
},
|
|
52
|
+
"budget": {
|
|
53
|
+
"type": "object",
|
|
54
|
+
"required": ["maxIterations", "maxElapsedMs", "maxCodexCalls", "consumed"],
|
|
55
|
+
"properties": {
|
|
56
|
+
"maxIterations": { "type": "integer", "minimum": 1 },
|
|
57
|
+
"maxElapsedMs": { "type": "integer", "minimum": 0 },
|
|
58
|
+
"maxCodexCalls": { "type": "integer", "minimum": 0 },
|
|
59
|
+
"consumed": {
|
|
60
|
+
"type": "object",
|
|
61
|
+
"properties": {
|
|
62
|
+
"iterations": { "type": "integer", "minimum": 0 },
|
|
63
|
+
"elapsedMs": { "type": "integer", "minimum": 0 },
|
|
64
|
+
"codexCalls": { "type": "integer", "minimum": 0 },
|
|
65
|
+
"startedAtMs": { "type": ["integer", "null"] }
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
"convergence": {
|
|
71
|
+
"type": "object",
|
|
72
|
+
"properties": {
|
|
73
|
+
"epsilon": { "type": "number", "minimum": 0 },
|
|
74
|
+
"stableWindow": { "type": "integer", "minimum": 1 },
|
|
75
|
+
"scoreHistory": { "type": "array", "items": { "type": "number" } },
|
|
76
|
+
"stalledSince": { "type": ["integer", "null"] }
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
"iterations": {
|
|
80
|
+
"type": "array",
|
|
81
|
+
"items": {
|
|
82
|
+
"type": "object",
|
|
83
|
+
"required": ["index", "startedAt"],
|
|
84
|
+
"properties": {
|
|
85
|
+
"index": { "type": "integer", "minimum": 1 },
|
|
86
|
+
"startedAt": { "type": "string", "format": "date-time" },
|
|
87
|
+
"completedAt": { "type": ["string", "null"], "format": "date-time" },
|
|
88
|
+
"qualityScore": { "type": ["number", "null"] },
|
|
89
|
+
"qualityDelta": { "type": ["number", "null"] },
|
|
90
|
+
"acceptedProposalId": { "type": ["string", "null"] },
|
|
91
|
+
"stopReason": { "type": ["string", "null"] },
|
|
92
|
+
"error": { "type": ["string", "null"] }
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
},
|
|
96
|
+
"accepted": { "type": "array" },
|
|
97
|
+
"rejected": { "type": "array" },
|
|
98
|
+
"openIssues": { "type": "array" },
|
|
99
|
+
"openIssuesInitial": { "type": "integer", "minimum": 0 },
|
|
100
|
+
"stopReason": { "type": ["string", "null"] },
|
|
101
|
+
"error": { "type": ["object", "string", "null"] }
|
|
102
|
+
}
|
|
103
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://codexloop/proposal.schema.json",
|
|
4
|
+
"title": "CodexLoop Proposal Set",
|
|
5
|
+
"description": "Structured JSON that Codex must return for the suggest step: ALWAYS at least two proposals. Passed to `codex exec --output-schema`.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": ["proposals"],
|
|
9
|
+
"properties": {
|
|
10
|
+
"proposals": {
|
|
11
|
+
"type": "array",
|
|
12
|
+
"minItems": 2,
|
|
13
|
+
"maxItems": 5,
|
|
14
|
+
"items": {
|
|
15
|
+
"type": "object",
|
|
16
|
+
"additionalProperties": false,
|
|
17
|
+
"required": [
|
|
18
|
+
"id",
|
|
19
|
+
"approach",
|
|
20
|
+
"patch",
|
|
21
|
+
"justification",
|
|
22
|
+
"estimatedRisk",
|
|
23
|
+
"estimatedImpact",
|
|
24
|
+
"filesTouched",
|
|
25
|
+
"modifiesTests"
|
|
26
|
+
],
|
|
27
|
+
"properties": {
|
|
28
|
+
"id": {
|
|
29
|
+
"type": "string",
|
|
30
|
+
"pattern": "^[a-z][a-z0-9_-]*$",
|
|
31
|
+
"description": "Short stable id like 'a', 'b', 'c'."
|
|
32
|
+
},
|
|
33
|
+
"approach": {
|
|
34
|
+
"type": "string",
|
|
35
|
+
"description": "One-sentence summary of what this proposal does and why."
|
|
36
|
+
},
|
|
37
|
+
"patch": {
|
|
38
|
+
"type": "string",
|
|
39
|
+
"description": "Unified diff ready for `git apply`. If no code change is needed, use the empty string."
|
|
40
|
+
},
|
|
41
|
+
"justification": {
|
|
42
|
+
"type": "string",
|
|
43
|
+
"description": "Why this proposal addresses the open issues identified in evaluation."
|
|
44
|
+
},
|
|
45
|
+
"estimatedRisk": {
|
|
46
|
+
"type": "string",
|
|
47
|
+
"enum": ["low", "medium", "high"]
|
|
48
|
+
},
|
|
49
|
+
"estimatedImpact": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"enum": ["low", "medium", "high"]
|
|
52
|
+
},
|
|
53
|
+
"filesTouched": {
|
|
54
|
+
"type": "array",
|
|
55
|
+
"items": { "type": "string" }
|
|
56
|
+
},
|
|
57
|
+
"modifiesTests": {
|
|
58
|
+
"type": "boolean",
|
|
59
|
+
"description": "True if `patch` modifies, adds, or deletes any test file."
|
|
60
|
+
},
|
|
61
|
+
"testChangeJustification": {
|
|
62
|
+
"type": ["string", "null"],
|
|
63
|
+
"description": "REQUIRED (non-empty) when modifiesTests is true. Explain what the test change is and why it is not reward-hacking."
|
|
64
|
+
},
|
|
65
|
+
"reviewNotes": {
|
|
66
|
+
"type": "array",
|
|
67
|
+
"items": { "type": "string" },
|
|
68
|
+
"description": "Self-review notes: known shortcomings, assumptions, follow-ups."
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://codexloop/ranking.schema.json",
|
|
4
|
+
"title": "CodexLoop Ranking Output",
|
|
5
|
+
"description": "Structured JSON that Codex must return for the rank step. One 'judge' call compares all proposals against six dimensions.",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"required": ["scores", "winner", "rejections"],
|
|
9
|
+
"properties": {
|
|
10
|
+
"scores": {
|
|
11
|
+
"type": "array",
|
|
12
|
+
"minItems": 2,
|
|
13
|
+
"items": {
|
|
14
|
+
"type": "object",
|
|
15
|
+
"additionalProperties": false,
|
|
16
|
+
"required": [
|
|
17
|
+
"proposalId",
|
|
18
|
+
"correctness",
|
|
19
|
+
"requirementSatisfaction",
|
|
20
|
+
"simplicity",
|
|
21
|
+
"maintainability",
|
|
22
|
+
"riskInverse",
|
|
23
|
+
"testability",
|
|
24
|
+
"weighted"
|
|
25
|
+
],
|
|
26
|
+
"properties": {
|
|
27
|
+
"proposalId": { "type": "string" },
|
|
28
|
+
"correctness": { "type": "number", "minimum": 0, "maximum": 1 },
|
|
29
|
+
"requirementSatisfaction": { "type": "number", "minimum": 0, "maximum": 1 },
|
|
30
|
+
"simplicity": { "type": "number", "minimum": 0, "maximum": 1 },
|
|
31
|
+
"maintainability": { "type": "number", "minimum": 0, "maximum": 1 },
|
|
32
|
+
"riskInverse": {
|
|
33
|
+
"type": "number",
|
|
34
|
+
"minimum": 0,
|
|
35
|
+
"maximum": 1,
|
|
36
|
+
"description": "1 = very safe, 0 = extremely risky. Inverted so higher is always better."
|
|
37
|
+
},
|
|
38
|
+
"testability": { "type": "number", "minimum": 0, "maximum": 1 },
|
|
39
|
+
"weighted": {
|
|
40
|
+
"type": "number",
|
|
41
|
+
"minimum": 0,
|
|
42
|
+
"maximum": 1,
|
|
43
|
+
"description": "The judge's own weighted sum. Runtime will re-compute from dimensional scores and override if they disagree."
|
|
44
|
+
},
|
|
45
|
+
"notes": { "type": "string" }
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
},
|
|
49
|
+
"winner": {
|
|
50
|
+
"type": "object",
|
|
51
|
+
"additionalProperties": false,
|
|
52
|
+
"required": ["id", "justification"],
|
|
53
|
+
"properties": {
|
|
54
|
+
"id": { "type": "string" },
|
|
55
|
+
"justification": {
|
|
56
|
+
"type": "string",
|
|
57
|
+
"description": "Why this proposal beats the alternatives on the dimensional rubric."
|
|
58
|
+
},
|
|
59
|
+
"confidence": {
|
|
60
|
+
"type": "number",
|
|
61
|
+
"minimum": 0,
|
|
62
|
+
"maximum": 1,
|
|
63
|
+
"description": "How confident the judge is in this pick."
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
},
|
|
67
|
+
"rejections": {
|
|
68
|
+
"type": "object",
|
|
69
|
+
"description": "Map of rejected proposal id to a concrete 'why not' reason.",
|
|
70
|
+
"additionalProperties": { "type": "string" }
|
|
71
|
+
},
|
|
72
|
+
"tiebreaker": {
|
|
73
|
+
"type": ["string", "null"],
|
|
74
|
+
"description": "If applied, a short explanation of the tiebreaker (correctness, risk, patch size)."
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
// Patch application with hard reward-hacking guards and atomic rollback.
|
|
2
|
+
//
|
|
3
|
+
// Preconditions: the target repo MUST be a git repo with a clean working tree
|
|
4
|
+
// at the start of each iteration. `applyPatch` refuses to run if the working
|
|
5
|
+
// tree has uncommitted changes so there is never any ambiguity about what the
|
|
6
|
+
// iteration mutated.
|
|
7
|
+
//
|
|
8
|
+
// On success, the patch is applied and committed with a codexloop signature
|
|
9
|
+
// message. On any failure (patch rejection, commit failure, reward-hacking
|
|
10
|
+
// detection, or a later regression in validate), we `git reset --hard` back
|
|
11
|
+
// to the recorded preSha and mark the iteration's apply record accordingly.
|
|
12
|
+
|
|
13
|
+
import fs from "node:fs";
|
|
14
|
+
import os from "node:os";
|
|
15
|
+
import path from "node:path";
|
|
16
|
+
import process from "node:process";
|
|
17
|
+
|
|
18
|
+
import { runCommand } from "./process.mjs";
|
|
19
|
+
|
|
20
|
+
const TEST_FILE_PATTERNS = [
|
|
21
|
+
/(^|\/)tests?\//i,
|
|
22
|
+
/(^|\/)__tests__\//i,
|
|
23
|
+
/(^|\/)spec\//i,
|
|
24
|
+
/\.test\.[a-zA-Z0-9]+$/,
|
|
25
|
+
/\.spec\.[a-zA-Z0-9]+$/,
|
|
26
|
+
/_test\.[a-zA-Z0-9]+$/
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
export function isTestPath(filePath) {
|
|
30
|
+
if (!filePath || typeof filePath !== "string") return false;
|
|
31
|
+
return TEST_FILE_PATTERNS.some((re) => re.test(filePath));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Parse a unified diff looking for reward-hacking patterns.
|
|
35
|
+
// Returns an array of findings; empty means the patch looks clean.
|
|
36
|
+
export function scanPatchForRewardHacking(patch, proposal) {
|
|
37
|
+
const findings = [];
|
|
38
|
+
if (!patch || typeof patch !== "string") return findings;
|
|
39
|
+
|
|
40
|
+
// Missing justification when proposal claims to modify tests.
|
|
41
|
+
if (proposal?.modifiesTests === true) {
|
|
42
|
+
const justification = String(proposal.testChangeJustification ?? "").trim();
|
|
43
|
+
if (justification.length < 20) {
|
|
44
|
+
findings.push({
|
|
45
|
+
kind: "missing-test-justification",
|
|
46
|
+
severity: "high",
|
|
47
|
+
summary: "proposal.modifiesTests is true but testChangeJustification is missing or < 20 chars"
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const lines = patch.split("\n");
|
|
53
|
+
let headerFileA = null;
|
|
54
|
+
let headerFileB = null;
|
|
55
|
+
let deletedFileMode = false;
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i < lines.length; i += 1) {
|
|
58
|
+
const line = lines[i];
|
|
59
|
+
|
|
60
|
+
if (line.startsWith("diff --git ")) {
|
|
61
|
+
const match = line.match(/^diff --git a\/(.+?) b\/(.+)$/);
|
|
62
|
+
if (match) {
|
|
63
|
+
headerFileA = match[1];
|
|
64
|
+
headerFileB = match[2];
|
|
65
|
+
deletedFileMode = false;
|
|
66
|
+
}
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (line.startsWith("deleted file mode")) {
|
|
71
|
+
deletedFileMode = true;
|
|
72
|
+
if (headerFileA && isTestPath(headerFileA)) {
|
|
73
|
+
findings.push({
|
|
74
|
+
kind: "test-file-deleted",
|
|
75
|
+
severity: "critical",
|
|
76
|
+
file: headerFileA,
|
|
77
|
+
summary: `patch deletes test file ${headerFileA}`
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (line.startsWith("new file mode")) {
|
|
84
|
+
deletedFileMode = false;
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Skip/only/xit inside a test file hunk
|
|
89
|
+
if (
|
|
90
|
+
line.startsWith("+") &&
|
|
91
|
+
!line.startsWith("+++") &&
|
|
92
|
+
headerFileB &&
|
|
93
|
+
isTestPath(headerFileB)
|
|
94
|
+
) {
|
|
95
|
+
if (
|
|
96
|
+
/\.(skip|only|xit|xdescribe|todo)\b/.test(line) ||
|
|
97
|
+
/\b(test|it|describe)\.skip\b/.test(line) ||
|
|
98
|
+
/\bxit\s*\(/.test(line) ||
|
|
99
|
+
/\bxdescribe\s*\(/.test(line)
|
|
100
|
+
) {
|
|
101
|
+
findings.push({
|
|
102
|
+
kind: "test-disabled",
|
|
103
|
+
severity: "high",
|
|
104
|
+
file: headerFileB,
|
|
105
|
+
snippet: line.slice(1).trim().slice(0, 160),
|
|
106
|
+
summary: `patch adds test skip/only/xit in ${headerFileB}`
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
return findings;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function getHead(cwd) {
|
|
115
|
+
const res = runCommand("git", ["rev-parse", "HEAD"], { cwd });
|
|
116
|
+
if (res.status !== 0) return null;
|
|
117
|
+
return res.stdout.trim() || null;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function workingTreeDirty(cwd) {
|
|
121
|
+
const res = runCommand("git", ["status", "--porcelain"], { cwd });
|
|
122
|
+
return res.status !== 0 || (res.stdout || "").trim().length > 0;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function hardReset(cwd, sha) {
|
|
126
|
+
if (!sha) return { ok: false, reason: "no-sha" };
|
|
127
|
+
const res = runCommand("git", ["reset", "--hard", sha], { cwd });
|
|
128
|
+
return { ok: res.status === 0, detail: (res.stderr || res.stdout || "").trim() };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export function rollbackToSha(cwd, sha) {
|
|
132
|
+
return hardReset(cwd, sha);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function buildCommitEnv() {
|
|
136
|
+
return {
|
|
137
|
+
...process.env,
|
|
138
|
+
GIT_AUTHOR_NAME: process.env.GIT_AUTHOR_NAME ?? "CodexLoop",
|
|
139
|
+
GIT_AUTHOR_EMAIL: process.env.GIT_AUTHOR_EMAIL ?? "codexloop@local",
|
|
140
|
+
GIT_COMMITTER_NAME: process.env.GIT_COMMITTER_NAME ?? "CodexLoop",
|
|
141
|
+
GIT_COMMITTER_EMAIL: process.env.GIT_COMMITTER_EMAIL ?? "codexloop@local"
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Apply a proposal's patch to the target repo. Returns a structured record
|
|
146
|
+
// the iteration layer records verbatim.
|
|
147
|
+
export async function applyPatch({ cwd, proposal, iterationIndex }) {
|
|
148
|
+
const patch = proposal?.patch ?? "";
|
|
149
|
+
const record = {
|
|
150
|
+
applied: false,
|
|
151
|
+
empty: false,
|
|
152
|
+
filesTouched: Array.isArray(proposal?.filesTouched) ? [...proposal.filesTouched] : [],
|
|
153
|
+
preSha: null,
|
|
154
|
+
postSha: null,
|
|
155
|
+
hackingFindings: [],
|
|
156
|
+
conflicts: null,
|
|
157
|
+
error: null,
|
|
158
|
+
rolledBack: false
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
record.preSha = getHead(cwd);
|
|
162
|
+
|
|
163
|
+
record.hackingFindings = scanPatchForRewardHacking(patch, proposal);
|
|
164
|
+
const blocking = record.hackingFindings.find(
|
|
165
|
+
(f) =>
|
|
166
|
+
f.kind === "test-file-deleted" ||
|
|
167
|
+
f.kind === "test-disabled" ||
|
|
168
|
+
f.kind === "missing-test-justification"
|
|
169
|
+
);
|
|
170
|
+
if (blocking) {
|
|
171
|
+
record.error = `reward-hacking-guard:${blocking.kind}${blocking.file ? `:${blocking.file}` : ""}`;
|
|
172
|
+
return record;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (!patch.trim()) {
|
|
176
|
+
record.empty = true;
|
|
177
|
+
return record;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (workingTreeDirty(cwd)) {
|
|
181
|
+
const status = runCommand("git", ["status", "--porcelain"], { cwd });
|
|
182
|
+
record.error =
|
|
183
|
+
`working tree is not clean before apply: ${(status.stdout || "").trim().split("\n").slice(0, 5).join(" | ")}`;
|
|
184
|
+
return record;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const tmpPatch = path.join(
|
|
188
|
+
os.tmpdir(),
|
|
189
|
+
`codexloop-patch-${process.pid}-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}.diff`
|
|
190
|
+
);
|
|
191
|
+
fs.writeFileSync(tmpPatch, patch.endsWith("\n") ? patch : `${patch}\n`, "utf8");
|
|
192
|
+
|
|
193
|
+
try {
|
|
194
|
+
const check = runCommand("git", ["apply", "--check", tmpPatch], { cwd });
|
|
195
|
+
if (check.status !== 0) {
|
|
196
|
+
record.conflicts = (check.stderr || check.stdout || "unknown conflict").trim();
|
|
197
|
+
record.error = `git apply --check failed: ${record.conflicts.slice(0, 300)}`;
|
|
198
|
+
return record;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
const apply = runCommand("git", ["apply", tmpPatch], { cwd });
|
|
202
|
+
if (apply.status !== 0) {
|
|
203
|
+
record.error = `git apply failed: ${(apply.stderr || apply.stdout || "").trim().slice(0, 300)}`;
|
|
204
|
+
hardReset(cwd, record.preSha);
|
|
205
|
+
record.rolledBack = true;
|
|
206
|
+
return record;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
const env = buildCommitEnv();
|
|
210
|
+
const addRes = runCommand("git", ["add", "-A"], { cwd, env });
|
|
211
|
+
if (addRes.status !== 0) {
|
|
212
|
+
record.error = `git add failed: ${(addRes.stderr || "").trim().slice(0, 300)}`;
|
|
213
|
+
hardReset(cwd, record.preSha);
|
|
214
|
+
record.rolledBack = true;
|
|
215
|
+
return record;
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
const commitMsg =
|
|
219
|
+
`cloop: iter ${iterationIndex} apply ${proposal?.id ?? "?"}`;
|
|
220
|
+
const commitRes = runCommand(
|
|
221
|
+
"git",
|
|
222
|
+
["commit", "-m", commitMsg],
|
|
223
|
+
{ cwd, env }
|
|
224
|
+
);
|
|
225
|
+
if (commitRes.status !== 0) {
|
|
226
|
+
record.error =
|
|
227
|
+
`git commit failed: ${(commitRes.stderr || commitRes.stdout || "").trim().slice(0, 400)}`;
|
|
228
|
+
hardReset(cwd, record.preSha);
|
|
229
|
+
record.rolledBack = true;
|
|
230
|
+
return record;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
record.postSha = getHead(cwd);
|
|
234
|
+
record.applied = true;
|
|
235
|
+
|
|
236
|
+
if (record.filesTouched.length === 0 && record.preSha && record.postSha) {
|
|
237
|
+
const diffFiles = runCommand(
|
|
238
|
+
"git",
|
|
239
|
+
["diff", "--name-only", `${record.preSha}..${record.postSha}`],
|
|
240
|
+
{ cwd }
|
|
241
|
+
);
|
|
242
|
+
if (diffFiles.status === 0) {
|
|
243
|
+
record.filesTouched = (diffFiles.stdout || "")
|
|
244
|
+
.split("\n")
|
|
245
|
+
.map((l) => l.trim())
|
|
246
|
+
.filter(Boolean);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return record;
|
|
251
|
+
} finally {
|
|
252
|
+
try { fs.unlinkSync(tmpPatch); } catch {}
|
|
253
|
+
}
|
|
254
|
+
}
|