code-review-forge 2.0.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_forge/__init__.py +14 -0
- code_forge/__main__.py +8 -0
- code_forge/autofix.py +78 -0
- code_forge/baseline.py +216 -0
- code_forge/cli.py +983 -0
- code_forge/delta.py +65 -0
- code_forge/diagnose.py +109 -0
- code_forge/diff.py +82 -0
- code_forge/disposition.py +32 -0
- code_forge/e2e_check.py +641 -0
- code_forge/env_resolver.py +91 -0
- code_forge/errors.py +34 -0
- code_forge/exit_codes.py +37 -0
- code_forge/factories.py +191 -0
- code_forge/falsify.py +85 -0
- code_forge/gate_check.py +466 -0
- code_forge/git.py +351 -0
- code_forge/hold.py +126 -0
- code_forge/install_hooks.py +331 -0
- code_forge/lock.py +162 -0
- code_forge/machine.py +792 -0
- code_forge/mode_resolver.py +60 -0
- code_forge/mutation.py +380 -0
- code_forge/parsers/__init__.py +56 -0
- code_forge/parsers/_sarif.py +77 -0
- code_forge/parsers/base.py +65 -0
- code_forge/parsers/checkpatch.py +66 -0
- code_forge/parsers/clippy.py +85 -0
- code_forge/parsers/non_ascii.py +47 -0
- code_forge/parsers/ruff.py +18 -0
- code_forge/parsers/semgrep.py +18 -0
- code_forge/parsers/shellcheck.py +56 -0
- code_forge/registry.py +153 -0
- code_forge/reporter.py +133 -0
- code_forge/runner.py +205 -0
- code_forge/sarif.py +226 -0
- code_forge/skills/adversarial-qe/SKILL.md +272 -0
- code_forge/skills/code-forge/SKILL.md +1193 -0
- code_forge/skills/code-review-expert/SKILL.md +162 -0
- code_forge/skills/code-review-expert/references/code-quality-checklist.md +130 -0
- code_forge/skills/code-review-expert/references/removal-plan.md +52 -0
- code_forge/skills/code-review-expert/references/security-checklist.md +118 -0
- code_forge/skills/code-review-expert/references/solid-checklist.md +65 -0
- code_forge/skills/kernel-fp-verify/SKILL.md +101 -0
- code_forge/skills/qodo-review/SKILL.md +135 -0
- code_forge/skills/smoke-test/SKILL.md +253 -0
- code_forge/skills/smoke-test/references/boundary-cases.md +114 -0
- code_forge/skills/smoke-test/references/concurrency-patterns.md +306 -0
- code_forge/skills/smoke-test/references/injection-payloads.md +124 -0
- code_forge/skills/smoke-test/test-library/shell/README.md +271 -0
- code_forge/skills/smoke-test/test-library/shell/primitives.sh +352 -0
- code_forge/skills/smoke-test/test-library/shell/primitives_test.sh +324 -0
- code_forge/snapshot.py +196 -0
- code_forge/source.py +64 -0
- code_forge/state.py +246 -0
- code_forge/verdict.py +43 -0
- code_review_forge-2.0.0a1.dist-info/METADATA +237 -0
- code_review_forge-2.0.0a1.dist-info/RECORD +62 -0
- code_review_forge-2.0.0a1.dist-info/WHEEL +5 -0
- code_review_forge-2.0.0a1.dist-info/entry_points.txt +2 -0
- code_review_forge-2.0.0a1.dist-info/licenses/LICENSE +179 -0
- code_review_forge-2.0.0a1.dist-info/top_level.txt +1 -0
code_forge/machine.py
ADDED
|
@@ -0,0 +1,792 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright (c) 2026, Minxi Hou <houminxi@gmail.com>
|
|
3
|
+
"""State machine core: loop-until-fixpoint orchestration.
|
|
4
|
+
|
|
5
|
+
Owned by 02-02. Mode-agnostic core; mode (LOCAL / CI) is a constructor
|
|
6
|
+
parameter (D3). Mode resolution (TTY / env / flag) is 02-04 + 02-05.
|
|
7
|
+
|
|
8
|
+
Per-round flow (STATE-08 ordering):
|
|
9
|
+
1. _run_l0_phase: L0 detect -> auto-CONFIRMED StateFindings (GATE-04a)
|
|
10
|
+
2. LOCAL only: _apply_autofix_loop_to(l0_findings) (STATE-03)
|
|
11
|
+
3. _run_l1_phase: L1 candidates -> falsify -> Disposition
|
|
12
|
+
4. Merge by fingerprint (FP-04: L0 wins; DISPO-05 UNCERTAIN sticks)
|
|
13
|
+
5. Update state.round_history; save_state
|
|
14
|
+
6. Convergence check / HOLD check
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import hashlib
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import threading
|
|
23
|
+
import time
|
|
24
|
+
from dataclasses import dataclass, field
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Callable, Optional
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
|
|
30
|
+
from .autofix import AutoFixer, FixOutcome
|
|
31
|
+
from .baseline import ResolvedReview
|
|
32
|
+
from .diagnose import diagnose_non_convergence
|
|
33
|
+
from .disposition import (
|
|
34
|
+
Disposition,
|
|
35
|
+
MAX_FIX_ATTEMPTS_PER_FINGERPRINT,
|
|
36
|
+
)
|
|
37
|
+
from .falsify import Falsifier
|
|
38
|
+
from .hold import check_escalated_frozen
|
|
39
|
+
from .parsers.base import Finding, ToolError
|
|
40
|
+
from .state import (
|
|
41
|
+
Mode,
|
|
42
|
+
State,
|
|
43
|
+
StateFinding,
|
|
44
|
+
Verdict,
|
|
45
|
+
load_state,
|
|
46
|
+
save_state,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# L1 candidate provider type alias.
|
|
50
|
+
L1Provider = Callable[[], list[StateFinding]]
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _default_l0_runner(
|
|
54
|
+
registry: dict, files: list[Path]
|
|
55
|
+
) -> tuple[list[StateFinding], list[str]]:
|
|
56
|
+
"""Phase 1 Finding -> 02-01 StateFinding adapter.
|
|
57
|
+
|
|
58
|
+
R2-1 fix: returns tuple (state_findings, infra_errors).
|
|
59
|
+
R1 B4 fix: interim fingerprint = sha256(tool:file:line:rule_id)[:16].
|
|
60
|
+
R1 H2 fix: ToolError -> infra_errors, NOT promoted to StateFinding.
|
|
61
|
+
R3 LOW1+LOW2+LOW6 fixes: StateFinding.line_range is list[int].
|
|
62
|
+
"""
|
|
63
|
+
from .parsers import parse_output
|
|
64
|
+
from .runner import run_tools
|
|
65
|
+
|
|
66
|
+
file_strs = [str(f) for f in files]
|
|
67
|
+
tool_results, _versions, _skipped = run_tools(registry, file_strs)
|
|
68
|
+
state_findings: list[StateFinding] = []
|
|
69
|
+
infra_errors: list[str] = []
|
|
70
|
+
|
|
71
|
+
for tool, (stdout, returncode, stderr) in tool_results.items():
|
|
72
|
+
tc = registry[tool]
|
|
73
|
+
items = parse_output(stdout, tc.output_format, tool, returncode)
|
|
74
|
+
for item in items:
|
|
75
|
+
if isinstance(item, ToolError):
|
|
76
|
+
infra_errors.append(
|
|
77
|
+
"L0 ToolError tool=%s msg=%s" % (tool, item.message)
|
|
78
|
+
)
|
|
79
|
+
continue
|
|
80
|
+
f: Finding = item
|
|
81
|
+
fp_raw = "%s:%s:%s:%s" % (tool, f.file, f.line, f.rule_id)
|
|
82
|
+
fp = hashlib.sha256(
|
|
83
|
+
fp_raw.encode("utf-8")
|
|
84
|
+
).hexdigest()[:16]
|
|
85
|
+
state_findings.append(
|
|
86
|
+
StateFinding(
|
|
87
|
+
id=fp,
|
|
88
|
+
fingerprint=fp,
|
|
89
|
+
source="L0",
|
|
90
|
+
disposition=Disposition.CONFIRMED,
|
|
91
|
+
file=f.file,
|
|
92
|
+
line_range=[f.line, f.end_line],
|
|
93
|
+
description=f.message,
|
|
94
|
+
error=None,
|
|
95
|
+
anchor=None,
|
|
96
|
+
evidence_files=[],
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
return state_findings, infra_errors
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class StateMachine:
|
|
104
|
+
"""Forge state machine. Constructor wires dependencies; .run() executes.
|
|
105
|
+
|
|
106
|
+
Parameters:
|
|
107
|
+
mode: Mode.LOCAL or Mode.CI
|
|
108
|
+
falsifier: Falsifier impl (StubFalsifier for tests)
|
|
109
|
+
autofixer: AutoFixer impl (StubAutoFixer for tests)
|
|
110
|
+
revert_fn: Callable[[StateFinding], None] -- invoked on PARSE_FAIL
|
|
111
|
+
resolved_review: from 02-03 resolve_baseline
|
|
112
|
+
source_hash: from 02-03 compute_source_hash
|
|
113
|
+
baseline_spec_repr: from 02-03 serialize_baseline_spec
|
|
114
|
+
cwd: working directory (state.json at cwd/.code-forge/state.json)
|
|
115
|
+
registry: dict[str, ToolConfig] passed to l0_runner
|
|
116
|
+
l0_runner: callable (registry, files) -> (findings, infra_errors)
|
|
117
|
+
l1_provider: callable returning L1 candidates (default: no L1)
|
|
118
|
+
l2_runner: callable (diff_files, baseline_cmd) -> (findings, infra_errors)
|
|
119
|
+
e2e_runner: callable (diff_text, repo_root) -> (findings, infra_errors)
|
|
120
|
+
post_round_hook: optional callable for test observability (R1 H6)
|
|
121
|
+
max_total_rounds: STATE-04 LOCAL bound (default 20)
|
|
122
|
+
max_fix_attempts: per-fingerprint budget (default from disposition)
|
|
123
|
+
"""
|
|
124
|
+
mode: Mode
|
|
125
|
+
falsifier: Falsifier
|
|
126
|
+
autofixer: AutoFixer
|
|
127
|
+
revert_fn: Callable[[StateFinding], None]
|
|
128
|
+
resolved_review: ResolvedReview
|
|
129
|
+
source_hash: str
|
|
130
|
+
baseline_spec_repr: str
|
|
131
|
+
cwd: Path
|
|
132
|
+
registry: dict
|
|
133
|
+
l0_runner: Callable = field(default=_default_l0_runner)
|
|
134
|
+
l1_provider: L1Provider = field(default=lambda: [])
|
|
135
|
+
l2_runner: Callable = field(
|
|
136
|
+
default=lambda diff_files, baseline_cmd: ([], [])
|
|
137
|
+
)
|
|
138
|
+
e2e_runner: Callable = field(
|
|
139
|
+
default=lambda diff_text, repo_root: ([], [])
|
|
140
|
+
)
|
|
141
|
+
post_round_hook: Optional[Callable[[int], None]] = None
|
|
142
|
+
max_total_rounds: int = 20
|
|
143
|
+
max_fix_attempts: int = MAX_FIX_ATTEMPTS_PER_FINGERPRINT
|
|
144
|
+
_state: State = field(default_factory=State, init=False)
|
|
145
|
+
|
|
146
|
+
def run(self) -> Verdict:
|
|
147
|
+
"""Dispatch to LOCAL or CI execution per mode."""
|
|
148
|
+
self._maybe_load_prior_state()
|
|
149
|
+
self._state.mode = self.mode
|
|
150
|
+
self._state.source_hash = self.source_hash
|
|
151
|
+
self._state.baseline_spec_repr = self.baseline_spec_repr
|
|
152
|
+
if self.mode == Mode.LOCAL:
|
|
153
|
+
return self._run_local()
|
|
154
|
+
if self.mode == Mode.CI:
|
|
155
|
+
return self._run_ci()
|
|
156
|
+
raise ValueError("unknown mode: %s" % self.mode)
|
|
157
|
+
|
|
158
|
+
def _maybe_load_prior_state(self) -> None:
|
|
159
|
+
"""Load .code-forge/state.json if LOCAL mode; skip if CI (STATE-09).
|
|
160
|
+
|
|
161
|
+
CI mode starts fresh every run to avoid inheriting human-
|
|
162
|
+
DISMISSED findings into shared CI runs. LOCAL mode loads if
|
|
163
|
+
file present (CorruptedStateError propagates per 02-01 contract).
|
|
164
|
+
"""
|
|
165
|
+
state_path = self.cwd / ".code-forge" / "state.json"
|
|
166
|
+
if self.mode == Mode.CI:
|
|
167
|
+
if state_path.exists():
|
|
168
|
+
logging.getLogger("code_forge").warning(
|
|
169
|
+
"ignoring prior state.json in CI mode (STATE-09)"
|
|
170
|
+
)
|
|
171
|
+
return
|
|
172
|
+
if state_path.exists():
|
|
173
|
+
loaded = load_state(state_path)
|
|
174
|
+
if loaded is not None:
|
|
175
|
+
self._state = loaded
|
|
176
|
+
|
|
177
|
+
def _run_ci(self) -> Verdict:
|
|
178
|
+
"""CI: linear single round; FAIL on any CONFIRMED, else PASS.
|
|
179
|
+
|
|
180
|
+
Per R1 H5: converged=True on PASS only; FAIL exits early so
|
|
181
|
+
converged=False.
|
|
182
|
+
02-02: Added async mutation result check and launch.
|
|
183
|
+
"""
|
|
184
|
+
self._execute_round(round_index=0)
|
|
185
|
+
|
|
186
|
+
# Check for prior mutation result
|
|
187
|
+
result_path = self.cwd / ".code-forge" / "mutation-result.json"
|
|
188
|
+
if result_path.exists():
|
|
189
|
+
try:
|
|
190
|
+
with open(result_path, "r", encoding="utf-8") as f:
|
|
191
|
+
result_data = json.load(f)
|
|
192
|
+
|
|
193
|
+
if "status" not in result_data:
|
|
194
|
+
self._state.infra_errors.append(
|
|
195
|
+
"CI: mutation-result.json missing status field"
|
|
196
|
+
)
|
|
197
|
+
else:
|
|
198
|
+
status = result_data["status"]
|
|
199
|
+
if status == "done":
|
|
200
|
+
survivors = result_data.get("survivors", [])
|
|
201
|
+
if survivors:
|
|
202
|
+
self._state.verdict = Verdict.FAIL
|
|
203
|
+
self._state.converged = False
|
|
204
|
+
self._state.infra_errors.append(
|
|
205
|
+
"CI: mutation survivors found: %d survivors"
|
|
206
|
+
% len(survivors)
|
|
207
|
+
)
|
|
208
|
+
self._persist_state()
|
|
209
|
+
return Verdict.FAIL
|
|
210
|
+
elif status == "running":
|
|
211
|
+
pid = result_data.get("pid")
|
|
212
|
+
if pid is not None:
|
|
213
|
+
try:
|
|
214
|
+
os.kill(pid, 0)
|
|
215
|
+
# PID alive, skip new launch
|
|
216
|
+
self._state.infra_errors.append(
|
|
217
|
+
"CI: mutation PID %d still running, "
|
|
218
|
+
"skipping new launch" % pid
|
|
219
|
+
)
|
|
220
|
+
return
|
|
221
|
+
except ProcessLookupError:
|
|
222
|
+
# PID dead, treat as error
|
|
223
|
+
from .disposition import Disposition as Disp
|
|
224
|
+
|
|
225
|
+
finding = StateFinding(
|
|
226
|
+
id="MUTATION_SKIPPED",
|
|
227
|
+
fingerprint="mutation-process-died",
|
|
228
|
+
source="MUTANT",
|
|
229
|
+
disposition=Disp.DISMISSED,
|
|
230
|
+
file="",
|
|
231
|
+
line_range=[],
|
|
232
|
+
description=(
|
|
233
|
+
"CI: mutation process died (PID %d)"
|
|
234
|
+
% pid
|
|
235
|
+
),
|
|
236
|
+
)
|
|
237
|
+
self._state.findings.append(finding)
|
|
238
|
+
result_path.unlink()
|
|
239
|
+
elif status == "error":
|
|
240
|
+
error_msg = result_data.get(
|
|
241
|
+
"message", "unknown error"
|
|
242
|
+
)
|
|
243
|
+
self._state.verdict = Verdict.FAIL
|
|
244
|
+
self._state.converged = False
|
|
245
|
+
self._state.infra_errors.append(
|
|
246
|
+
"CI: mutation error: %s" % error_msg
|
|
247
|
+
)
|
|
248
|
+
self._persist_state()
|
|
249
|
+
return Verdict.FAIL
|
|
250
|
+
except (json.JSONDecodeError, KeyError, OSError) as e:
|
|
251
|
+
self._state.infra_errors.append(
|
|
252
|
+
"CI: failed to read mutation-result.json: %s" % e
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Launch new async mutation via run_mutation (single invocation point)
|
|
256
|
+
import shutil
|
|
257
|
+
|
|
258
|
+
diff_files = [str(f) for f in self._source_files()]
|
|
259
|
+
py_files = [f for f in diff_files if f.endswith(".py")]
|
|
260
|
+
|
|
261
|
+
if py_files and shutil.which("mutmut") is not None:
|
|
262
|
+
try:
|
|
263
|
+
from .gate_check import load_gate_config
|
|
264
|
+
|
|
265
|
+
config = load_gate_config(
|
|
266
|
+
self.cwd / ".code-forge" / "gate.yaml"
|
|
267
|
+
)
|
|
268
|
+
baseline_cmd = config["test"]["command"]
|
|
269
|
+
except Exception: # noqa: BLE001
|
|
270
|
+
baseline_cmd = None
|
|
271
|
+
|
|
272
|
+
if baseline_cmd is not None:
|
|
273
|
+
from .mutation import run_mutation
|
|
274
|
+
|
|
275
|
+
cwd_ref = self.cwd
|
|
276
|
+
|
|
277
|
+
def _async_mutation():
|
|
278
|
+
# Write initial status
|
|
279
|
+
initial_data = {
|
|
280
|
+
"pid": os.getpid(),
|
|
281
|
+
"started_at": time.time(),
|
|
282
|
+
"status": "running",
|
|
283
|
+
"survivors": [],
|
|
284
|
+
}
|
|
285
|
+
try:
|
|
286
|
+
with open(
|
|
287
|
+
result_path, "w", encoding="utf-8"
|
|
288
|
+
) as f:
|
|
289
|
+
json.dump(initial_data, f)
|
|
290
|
+
except OSError:
|
|
291
|
+
return
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
mm_findings, _infra = run_mutation(
|
|
295
|
+
diff_files=diff_files,
|
|
296
|
+
baseline_cmd=baseline_cmd,
|
|
297
|
+
cwd=cwd_ref,
|
|
298
|
+
)
|
|
299
|
+
survivor_list = [
|
|
300
|
+
f.id
|
|
301
|
+
for f in mm_findings
|
|
302
|
+
if f.source == "MUTANT"
|
|
303
|
+
and f.disposition == Disposition.CONFIRMED
|
|
304
|
+
and f.id != "MUTATION_ERROR"
|
|
305
|
+
]
|
|
306
|
+
# f.id is "mutant-{mutant_name}" for survivors
|
|
307
|
+
done_data = {
|
|
308
|
+
"pid": os.getpid(),
|
|
309
|
+
"started_at": initial_data["started_at"],
|
|
310
|
+
"status": "done",
|
|
311
|
+
"survivors": survivor_list,
|
|
312
|
+
}
|
|
313
|
+
with open(
|
|
314
|
+
result_path, "w", encoding="utf-8"
|
|
315
|
+
) as f:
|
|
316
|
+
json.dump(done_data, f)
|
|
317
|
+
except Exception as e: # noqa: BLE001
|
|
318
|
+
error_data = {
|
|
319
|
+
"pid": os.getpid(),
|
|
320
|
+
"started_at": initial_data["started_at"],
|
|
321
|
+
"status": "error",
|
|
322
|
+
"message": str(e),
|
|
323
|
+
}
|
|
324
|
+
try:
|
|
325
|
+
with open(
|
|
326
|
+
result_path, "w", encoding="utf-8"
|
|
327
|
+
) as f:
|
|
328
|
+
json.dump(error_data, f)
|
|
329
|
+
except OSError:
|
|
330
|
+
pass
|
|
331
|
+
|
|
332
|
+
thread = threading.Thread(
|
|
333
|
+
target=_async_mutation, daemon=True
|
|
334
|
+
)
|
|
335
|
+
thread.start()
|
|
336
|
+
elif not py_files:
|
|
337
|
+
# No Python files, write MUTATION_SKIPPED
|
|
338
|
+
skip_data = {
|
|
339
|
+
"status": "error",
|
|
340
|
+
"message": "no Python files in diff",
|
|
341
|
+
}
|
|
342
|
+
try:
|
|
343
|
+
with open(result_path, "w", encoding="utf-8") as f:
|
|
344
|
+
json.dump(skip_data, f)
|
|
345
|
+
except OSError:
|
|
346
|
+
pass
|
|
347
|
+
elif shutil.which("mutmut") is None:
|
|
348
|
+
# mutmut not installed
|
|
349
|
+
skip_data = {
|
|
350
|
+
"status": "error",
|
|
351
|
+
"message": "mutmut not installed",
|
|
352
|
+
}
|
|
353
|
+
try:
|
|
354
|
+
with open(result_path, "w", encoding="utf-8") as f:
|
|
355
|
+
json.dump(skip_data, f)
|
|
356
|
+
except OSError:
|
|
357
|
+
pass
|
|
358
|
+
|
|
359
|
+
# Proceed with normal L0+L1 verdict determination
|
|
360
|
+
confirmed = self._count(Disposition.CONFIRMED)
|
|
361
|
+
verdict = Verdict.FAIL if confirmed > 0 else Verdict.PASS
|
|
362
|
+
self._state.verdict = verdict
|
|
363
|
+
self._state.converged = (verdict == Verdict.PASS)
|
|
364
|
+
self._persist_state()
|
|
365
|
+
return verdict
|
|
366
|
+
|
|
367
|
+
def _run_local(self) -> Verdict:
|
|
368
|
+
"""LOCAL: loop until fixpoint / HOLD / MAX_TOTAL_ROUNDS.
|
|
369
|
+
|
|
370
|
+
STATE-01 / STATE-02 / STATE-04 / STATE-05 / GATE-01b.
|
|
371
|
+
ESCALATED-frozen check at top of each iteration (after HOLD
|
|
372
|
+
resume): if check_escalated_frozen() -> Verdict.ESCALATED.
|
|
373
|
+
"""
|
|
374
|
+
for round_index in range(self.max_total_rounds):
|
|
375
|
+
if check_escalated_frozen(self._state):
|
|
376
|
+
self._append_round_snapshot(
|
|
377
|
+
round_index,
|
|
378
|
+
l0_findings=[],
|
|
379
|
+
l1_findings=[],
|
|
380
|
+
l2_findings=[],
|
|
381
|
+
)
|
|
382
|
+
self._state.verdict = Verdict.ESCALATED
|
|
383
|
+
self._state.converged = False
|
|
384
|
+
frozen_fps = [
|
|
385
|
+
f.fingerprint for f in self._state.findings
|
|
386
|
+
if (
|
|
387
|
+
f.disposition == Disposition.CONFIRMED
|
|
388
|
+
and f.fingerprint
|
|
389
|
+
in self._state.promoted_fingerprints
|
|
390
|
+
)
|
|
391
|
+
]
|
|
392
|
+
preview = ",".join(frozen_fps[:3])
|
|
393
|
+
more = "..." if len(frozen_fps) > 3 else ""
|
|
394
|
+
self._state.infra_errors.append(
|
|
395
|
+
"ESCALATED frozen (DISPO-05) fingerprints=[%s%s]"
|
|
396
|
+
% (preview, more)
|
|
397
|
+
)
|
|
398
|
+
self._persist_state()
|
|
399
|
+
return Verdict.ESCALATED
|
|
400
|
+
self._execute_round(round_index)
|
|
401
|
+
|
|
402
|
+
# Check consecutive_survivor_rounds
|
|
403
|
+
mutant_survivors = sum(
|
|
404
|
+
1 for f in self._state.findings
|
|
405
|
+
if f.source == "MUTANT"
|
|
406
|
+
and f.disposition == Disposition.CONFIRMED
|
|
407
|
+
)
|
|
408
|
+
if mutant_survivors > 0:
|
|
409
|
+
self._state.consecutive_survivor_rounds += 1
|
|
410
|
+
else:
|
|
411
|
+
self._state.consecutive_survivor_rounds = 0
|
|
412
|
+
|
|
413
|
+
if self._state.consecutive_survivor_rounds >= 3:
|
|
414
|
+
self._state.verdict = Verdict.FAIL
|
|
415
|
+
self._state.converged = False
|
|
416
|
+
self._state.infra_errors.append(
|
|
417
|
+
"mutation: 3 consecutive rounds with survivors -- "
|
|
418
|
+
"tests are demonstrably weak"
|
|
419
|
+
)
|
|
420
|
+
self._persist_state()
|
|
421
|
+
return Verdict.FAIL
|
|
422
|
+
|
|
423
|
+
if self._fixpoint_reached():
|
|
424
|
+
self._finalize_local_terminal()
|
|
425
|
+
return self._state.verdict
|
|
426
|
+
if self._should_enter_hold():
|
|
427
|
+
uncertain_count = sum(
|
|
428
|
+
1 for f in self._state.findings
|
|
429
|
+
if f.disposition == Disposition.UNCERTAIN
|
|
430
|
+
)
|
|
431
|
+
self._state.hold_reason = (
|
|
432
|
+
"%d UNCERTAIN finding(s) awaiting human disposition"
|
|
433
|
+
% uncertain_count
|
|
434
|
+
)
|
|
435
|
+
self._state.verdict = Verdict.PENDING
|
|
436
|
+
self._persist_state()
|
|
437
|
+
return Verdict.PENDING
|
|
438
|
+
|
|
439
|
+
# MAX_TOTAL_ROUNDS exhausted -> STATE-05 diagnosis + ESCALATED
|
|
440
|
+
category = diagnose_non_convergence(
|
|
441
|
+
self._state.round_history, self._state.infra_errors
|
|
442
|
+
)
|
|
443
|
+
self._state.verdict = Verdict.ESCALATED
|
|
444
|
+
self._state.converged = False
|
|
445
|
+
self._state.infra_errors.append(
|
|
446
|
+
"ESCALATED category=%s" % category
|
|
447
|
+
)
|
|
448
|
+
self._persist_state()
|
|
449
|
+
return Verdict.ESCALATED
|
|
450
|
+
|
|
451
|
+
def _run_l0_phase(self) -> list[StateFinding]:
|
|
452
|
+
"""STATE-08 L0 detect phase. Returns L0 StateFindings (CONFIRMED).
|
|
453
|
+
|
|
454
|
+
No file mutations here -- L0 detect only; autofix is separate.
|
|
455
|
+
"""
|
|
456
|
+
try:
|
|
457
|
+
l0_findings, l0_infra = self.l0_runner(
|
|
458
|
+
self.registry, self._source_files()
|
|
459
|
+
)
|
|
460
|
+
self._state.infra_errors.extend(l0_infra)
|
|
461
|
+
return l0_findings
|
|
462
|
+
except Exception as exc: # noqa: BLE001
|
|
463
|
+
self._state.infra_errors.append(
|
|
464
|
+
"L0 runner failed: %s" % exc
|
|
465
|
+
)
|
|
466
|
+
return []
|
|
467
|
+
|
|
468
|
+
def _run_l1_phase(self) -> list[StateFinding]:
|
|
469
|
+
"""STATE-08 L1 detect phase. Runs AFTER L0 autofix in LOCAL mode.
|
|
470
|
+
|
|
471
|
+
Both modes invoke L1 per LAYER0-07 (SARIF includes L1 candidates).
|
|
472
|
+
LOCAL L1 sees post-fix code; CI L1 sees raw L0 output (no autofix).
|
|
473
|
+
"""
|
|
474
|
+
l1_candidates = self.l1_provider()
|
|
475
|
+
l1_findings: list[StateFinding] = []
|
|
476
|
+
for f in l1_candidates:
|
|
477
|
+
try:
|
|
478
|
+
f.disposition = self.falsifier.falsify(f)
|
|
479
|
+
except RuntimeError as exc:
|
|
480
|
+
f.disposition = Disposition.UNCERTAIN
|
|
481
|
+
f.error = "falsify() raised: %s" % exc
|
|
482
|
+
self._state.infra_errors.append(
|
|
483
|
+
"falsify exception on %s: %s" % (f.fingerprint, exc)
|
|
484
|
+
)
|
|
485
|
+
l1_findings.append(f)
|
|
486
|
+
return l1_findings
|
|
487
|
+
|
|
488
|
+
def _run_l2_phase(self) -> list[StateFinding]:
|
|
489
|
+
"""L2 mutation phase. Runs after L1.
|
|
490
|
+
|
|
491
|
+
Calls l2_runner with diff-scoped files and baseline test command.
|
|
492
|
+
Returns MUTANT findings (survivors or MUTATION_SKIPPED).
|
|
493
|
+
"""
|
|
494
|
+
try:
|
|
495
|
+
from .gate_check import load_gate_config
|
|
496
|
+
|
|
497
|
+
config = load_gate_config(self.cwd / ".code-forge" / "gate.yaml")
|
|
498
|
+
baseline_cmd = config["test"]["command"]
|
|
499
|
+
except Exception as exc: # noqa: BLE001
|
|
500
|
+
self._state.infra_errors.append(
|
|
501
|
+
"L2: gate.yaml missing or test.command not configured: %s"
|
|
502
|
+
% exc
|
|
503
|
+
)
|
|
504
|
+
return []
|
|
505
|
+
|
|
506
|
+
diff_files = [str(f) for f in self._source_files()]
|
|
507
|
+
|
|
508
|
+
try:
|
|
509
|
+
l2_findings, l2_infra = self.l2_runner(diff_files, baseline_cmd)
|
|
510
|
+
self._state.infra_errors.extend(l2_infra)
|
|
511
|
+
return l2_findings
|
|
512
|
+
except Exception as exc: # noqa: BLE001
|
|
513
|
+
self._state.infra_errors.append(
|
|
514
|
+
"L2 runner failed: %s" % exc
|
|
515
|
+
)
|
|
516
|
+
return []
|
|
517
|
+
|
|
518
|
+
def _run_e2e_phase(self) -> list[StateFinding]:
|
|
519
|
+
"""E2e coverage phase. Runs after L2.
|
|
520
|
+
|
|
521
|
+
Reads diff_text from the resolved review's canonical diff (captured at
|
|
522
|
+
review setup, same scope as L0/L1). Returns E2E_CHECK findings:
|
|
523
|
+
Layer 1 DISMISSED (advisory), Layer 2 UNCERTAIN (enters HOLD for human
|
|
524
|
+
triage). A failing e2e runner degrades to no findings, never crashes
|
|
525
|
+
the round.
|
|
526
|
+
|
|
527
|
+
Non-git mode: no diff is available; records a non-fatal infra signal
|
|
528
|
+
and returns no findings.
|
|
529
|
+
"""
|
|
530
|
+
diff_text = self.resolved_review.git_diff
|
|
531
|
+
if diff_text is None:
|
|
532
|
+
self._state.infra_errors.append(
|
|
533
|
+
"e2e: no git diff available (non-git review)"
|
|
534
|
+
)
|
|
535
|
+
return []
|
|
536
|
+
try:
|
|
537
|
+
e2e_findings, e2e_infra = self.e2e_runner(diff_text, self.cwd)
|
|
538
|
+
self._state.infra_errors.extend(e2e_infra)
|
|
539
|
+
return e2e_findings
|
|
540
|
+
except Exception as exc: # noqa: BLE001
|
|
541
|
+
self._state.infra_errors.append("e2e runner failed: %s" % exc)
|
|
542
|
+
return []
|
|
543
|
+
|
|
544
|
+
def _execute_round(self, round_index: int) -> None:
|
|
545
|
+
"""STATE-08: both modes run L0 + L1 + L2 + E2E each round.
|
|
546
|
+
|
|
547
|
+
Difference is autofix scope:
|
|
548
|
+
LOCAL: L0 detect -> L0 autofix loop -> L1 -> L2 -> E2E
|
|
549
|
+
CI: L0 detect -> L1 -> L2 -> E2E (no autofix loop per STATE-03)
|
|
550
|
+
"""
|
|
551
|
+
self._state.round = round_index
|
|
552
|
+
l0_findings = self._run_l0_phase()
|
|
553
|
+
if self.mode == Mode.LOCAL:
|
|
554
|
+
self._apply_autofix_loop_to(l0_findings)
|
|
555
|
+
l1_findings = self._run_l1_phase()
|
|
556
|
+
l2_findings = self._run_l2_phase()
|
|
557
|
+
e2e_findings = self._run_e2e_phase()
|
|
558
|
+
merged = self._merge_findings(
|
|
559
|
+
l0_findings, l1_findings, l2_findings, e2e_findings
|
|
560
|
+
)
|
|
561
|
+
merged = self._apply_promotion_stickiness(merged)
|
|
562
|
+
self._state.findings = merged
|
|
563
|
+
self._append_round_snapshot(
|
|
564
|
+
round_index, l0_findings, l1_findings, l2_findings, e2e_findings
|
|
565
|
+
)
|
|
566
|
+
self._persist_state()
|
|
567
|
+
if self.post_round_hook is not None:
|
|
568
|
+
self.post_round_hook(round_index)
|
|
569
|
+
|
|
570
|
+
def _apply_autofix_loop_to(
|
|
571
|
+
self, findings: list[StateFinding]
|
|
572
|
+
) -> None:
|
|
573
|
+
"""LOCAL only: attempt auto-fix on the given finding list.
|
|
574
|
+
|
|
575
|
+
STATE-08 parameterized: operates on the provided list (L0 only)
|
|
576
|
+
rather than the merged full list. For each CONFIRMED finding:
|
|
577
|
+
- If fix_attempts >= max_fix_attempts: promote to UNCERTAIN
|
|
578
|
+
(DISPO-05, exactly once per fingerprint)
|
|
579
|
+
- Else: invoke autofixer.fix()
|
|
580
|
+
- SUCCESS -> FIXED
|
|
581
|
+
- PARSE_FAIL -> revert_fn(finding) + fix_attempts++
|
|
582
|
+
- NO_CHANGE -> fix_attempts++ (no revert needed)
|
|
583
|
+
- EXCEPTION -> fix_attempts++ + infra_errors append
|
|
584
|
+
"""
|
|
585
|
+
mode_hint = self.resolved_review.mode_hint
|
|
586
|
+
for finding in findings:
|
|
587
|
+
# Coverage-gap findings skip autofix: they are not code defects
|
|
588
|
+
# and the autofix loop cannot add a missing test.
|
|
589
|
+
if finding.source in ("MUTANT", "E2E_CHECK"):
|
|
590
|
+
continue
|
|
591
|
+
if finding.disposition != Disposition.CONFIRMED:
|
|
592
|
+
continue
|
|
593
|
+
fp = finding.fingerprint
|
|
594
|
+
attempts = self._state.fix_attempts.get(fp, 0)
|
|
595
|
+
|
|
596
|
+
# DISPO-05: promote CONFIRMED -> UNCERTAIN once
|
|
597
|
+
if attempts >= self.max_fix_attempts:
|
|
598
|
+
finding.disposition = Disposition.UNCERTAIN
|
|
599
|
+
self._state.promoted_fingerprints.add(fp)
|
|
600
|
+
continue
|
|
601
|
+
|
|
602
|
+
try:
|
|
603
|
+
outcome = self.autofixer.fix(finding, mode_hint)
|
|
604
|
+
except Exception as exc: # noqa: BLE001
|
|
605
|
+
outcome = FixOutcome.EXCEPTION
|
|
606
|
+
self._state.infra_errors.append(
|
|
607
|
+
"autofixer exception on %s: %s" % (fp, exc)
|
|
608
|
+
)
|
|
609
|
+
|
|
610
|
+
if outcome == FixOutcome.SUCCESS:
|
|
611
|
+
finding.disposition = Disposition.FIXED
|
|
612
|
+
elif outcome == FixOutcome.PARSE_FAIL:
|
|
613
|
+
self.revert_fn(finding)
|
|
614
|
+
self._state.fix_attempts[fp] = attempts + 1
|
|
615
|
+
elif outcome == FixOutcome.NO_CHANGE:
|
|
616
|
+
self._state.fix_attempts[fp] = attempts + 1
|
|
617
|
+
elif outcome == FixOutcome.EXCEPTION:
|
|
618
|
+
self._state.fix_attempts[fp] = attempts + 1
|
|
619
|
+
if "autofixer exception" not in str(
|
|
620
|
+
self._state.infra_errors[-1:]
|
|
621
|
+
):
|
|
622
|
+
self._state.infra_errors.append(
|
|
623
|
+
"autofixer EXCEPTION on %s" % fp
|
|
624
|
+
)
|
|
625
|
+
|
|
626
|
+
def _fixpoint_reached(self) -> bool:
|
|
627
|
+
"""R1 B3 + R2-2: precise fixpoint (LOCAL only).
|
|
628
|
+
|
|
629
|
+
TRUE iff ALL FOUR conditions hold:
|
|
630
|
+
(a) zero NEW CONFIRMED this round (new = fingerprint not in
|
|
631
|
+
prior round's dispositions; round 0 treats prior as empty)
|
|
632
|
+
(b) zero FIXED->CONFIRMED reversions this round
|
|
633
|
+
(c) zero unfixed CONFIRMED remain in active findings
|
|
634
|
+
(d) zero UNCERTAIN remain in active findings
|
|
635
|
+
"""
|
|
636
|
+
history = self._state.round_history
|
|
637
|
+
current_disps = {
|
|
638
|
+
f.fingerprint: f.disposition
|
|
639
|
+
for f in self._state.findings
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
# Prior round dispositions (empty set for round 0, R3 LOW4)
|
|
643
|
+
if len(history) >= 2:
|
|
644
|
+
prior_disps = history[-2].get("dispositions", {})
|
|
645
|
+
else:
|
|
646
|
+
prior_disps = {}
|
|
647
|
+
|
|
648
|
+
# (a) zero NEW CONFIRMED this round
|
|
649
|
+
for fp, disp in current_disps.items():
|
|
650
|
+
if disp == Disposition.CONFIRMED and fp not in prior_disps:
|
|
651
|
+
return False
|
|
652
|
+
|
|
653
|
+
# (b) zero FIXED->CONFIRMED reversions
|
|
654
|
+
for fp, disp in current_disps.items():
|
|
655
|
+
if (
|
|
656
|
+
disp == Disposition.CONFIRMED
|
|
657
|
+
and prior_disps.get(fp) == "FIXED"
|
|
658
|
+
):
|
|
659
|
+
return False
|
|
660
|
+
|
|
661
|
+
# (c) zero unfixed CONFIRMED remain
|
|
662
|
+
for f in self._state.findings:
|
|
663
|
+
if f.disposition == Disposition.CONFIRMED:
|
|
664
|
+
return False
|
|
665
|
+
|
|
666
|
+
# (d) zero UNCERTAIN remain
|
|
667
|
+
for f in self._state.findings:
|
|
668
|
+
if f.disposition == Disposition.UNCERTAIN:
|
|
669
|
+
return False
|
|
670
|
+
|
|
671
|
+
return True
|
|
672
|
+
|
|
673
|
+
def _should_enter_hold(self) -> bool:
|
|
674
|
+
"""GATE-01b: HOLD when UNCERTAIN > 0 AND unfixed CONFIRMED == 0.
|
|
675
|
+
|
|
676
|
+
CI mode never HOLDs.
|
|
677
|
+
"""
|
|
678
|
+
if self.mode == Mode.CI:
|
|
679
|
+
return False
|
|
680
|
+
has_uncertain = any(
|
|
681
|
+
f.disposition == Disposition.UNCERTAIN
|
|
682
|
+
for f in self._state.findings
|
|
683
|
+
)
|
|
684
|
+
has_unfixed_confirmed = any(
|
|
685
|
+
f.disposition == Disposition.CONFIRMED
|
|
686
|
+
for f in self._state.findings
|
|
687
|
+
)
|
|
688
|
+
return has_uncertain and not has_unfixed_confirmed
|
|
689
|
+
|
|
690
|
+
def _finalize_local_terminal(self) -> None:
|
|
691
|
+
"""R3 LOW5: terminal state writer for LOCAL fixpoint exit."""
|
|
692
|
+
self._state.verdict = Verdict.PASS
|
|
693
|
+
self._state.converged = True
|
|
694
|
+
self._persist_state()
|
|
695
|
+
|
|
696
|
+
def _merge_findings(
|
|
697
|
+
self,
|
|
698
|
+
l0_findings: list[StateFinding],
|
|
699
|
+
l1_findings: list[StateFinding],
|
|
700
|
+
l2_findings: list[StateFinding] = None,
|
|
701
|
+
e2e_findings: list[StateFinding] = None,
|
|
702
|
+
) -> list[StateFinding]:
|
|
703
|
+
"""Merge L0 + L1 + L2 + E2E by fingerprint. FP-04: L0 wins on conflict.
|
|
704
|
+
|
|
705
|
+
Merge order (lowest priority first; higher overwrites):
|
|
706
|
+
e2e (lowest) -> l2 -> l1 -> l0 (highest).
|
|
707
|
+
E2E fingerprints use an "e2e-" prefix and do not collide with
|
|
708
|
+
L0/L1/L2 fingerprints; the ordering is defensive correctness.
|
|
709
|
+
"""
|
|
710
|
+
merged: dict[str, StateFinding] = {}
|
|
711
|
+
# e2e lowest priority: insert first so l2/l1/l0 can overwrite.
|
|
712
|
+
for f in (e2e_findings or []):
|
|
713
|
+
merged[f.fingerprint] = f
|
|
714
|
+
for f in (l2_findings or []):
|
|
715
|
+
merged[f.fingerprint] = f
|
|
716
|
+
for f in l1_findings:
|
|
717
|
+
merged[f.fingerprint] = f
|
|
718
|
+
for f in l0_findings:
|
|
719
|
+
merged[f.fingerprint] = f
|
|
720
|
+
return list(merged.values())
|
|
721
|
+
|
|
722
|
+
def _apply_promotion_stickiness(
|
|
723
|
+
self, findings: list[StateFinding]
|
|
724
|
+
) -> list[StateFinding]:
|
|
725
|
+
"""DISPO-05: promoted UNCERTAIN sticks against L0 re-detect.
|
|
726
|
+
|
|
727
|
+
If a finding was promoted to UNCERTAIN in a prior round
|
|
728
|
+
(fix_attempts >= max and disposition was UNCERTAIN), preserve
|
|
729
|
+
UNCERTAIN even if L0 re-detected it as CONFIRMED this round.
|
|
730
|
+
"""
|
|
731
|
+
for f in findings:
|
|
732
|
+
fp = f.fingerprint
|
|
733
|
+
attempts = self._state.fix_attempts.get(fp, 0)
|
|
734
|
+
if attempts >= self.max_fix_attempts:
|
|
735
|
+
# Check prior round for promotion evidence
|
|
736
|
+
prior_was_uncertain = False
|
|
737
|
+
if self._state.round_history:
|
|
738
|
+
last_disps = self._state.round_history[-1].get(
|
|
739
|
+
"dispositions", {}
|
|
740
|
+
)
|
|
741
|
+
if last_disps.get(fp) == "UNCERTAIN":
|
|
742
|
+
prior_was_uncertain = True
|
|
743
|
+
if prior_was_uncertain:
|
|
744
|
+
f.disposition = Disposition.UNCERTAIN
|
|
745
|
+
return findings
|
|
746
|
+
|
|
747
|
+
def _append_round_snapshot(
|
|
748
|
+
self,
|
|
749
|
+
round_index: int,
|
|
750
|
+
l0_findings: list[StateFinding],
|
|
751
|
+
l1_findings: list[StateFinding],
|
|
752
|
+
l2_findings: list[StateFinding] = None,
|
|
753
|
+
e2e_findings: list[StateFinding] = None,
|
|
754
|
+
) -> None:
|
|
755
|
+
"""Append per-round snapshot to round_history for STATE-05."""
|
|
756
|
+
snapshot = {
|
|
757
|
+
"round": round_index,
|
|
758
|
+
"l0_fingerprints": [f.fingerprint for f in l0_findings],
|
|
759
|
+
"l1_fingerprints": [f.fingerprint for f in l1_findings],
|
|
760
|
+
"l2_fingerprints": [
|
|
761
|
+
f.fingerprint for f in (l2_findings or [])
|
|
762
|
+
],
|
|
763
|
+
"e2e_fingerprints": [
|
|
764
|
+
f.fingerprint for f in (e2e_findings or [])
|
|
765
|
+
],
|
|
766
|
+
"dispositions": {
|
|
767
|
+
f.fingerprint: f.disposition.value
|
|
768
|
+
for f in self._state.findings
|
|
769
|
+
},
|
|
770
|
+
"fixed_fingerprints": [
|
|
771
|
+
f.fingerprint
|
|
772
|
+
for f in self._state.findings
|
|
773
|
+
if f.disposition == Disposition.FIXED
|
|
774
|
+
],
|
|
775
|
+
}
|
|
776
|
+
self._state.round_history.append(snapshot)
|
|
777
|
+
|
|
778
|
+
def _count(self, disposition: Disposition) -> int:
|
|
779
|
+
"""Count findings with a given disposition."""
|
|
780
|
+
return sum(
|
|
781
|
+
1 for f in self._state.findings
|
|
782
|
+
if f.disposition == disposition
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
def _persist_state(self) -> None:
|
|
786
|
+
"""Write state.json to cwd/.code-forge/state.json."""
|
|
787
|
+
state_path = self.cwd / ".code-forge" / "state.json"
|
|
788
|
+
save_state(self._state, state_path)
|
|
789
|
+
|
|
790
|
+
def _source_files(self) -> list[Path]:
|
|
791
|
+
"""Return source files from resolved review."""
|
|
792
|
+
return self.resolved_review.source_files
|