PyPI - code-review-forge - Versions diffs - 2.0.0a1__py3-none-any.whl - Mend

code-review-forge 2.0.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

code_forge/__init__.py +14 -0
code_forge/__main__.py +8 -0
code_forge/autofix.py +78 -0
code_forge/baseline.py +216 -0
code_forge/cli.py +983 -0
code_forge/delta.py +65 -0
code_forge/diagnose.py +109 -0
code_forge/diff.py +82 -0
code_forge/disposition.py +32 -0
code_forge/e2e_check.py +641 -0
code_forge/env_resolver.py +91 -0
code_forge/errors.py +34 -0
code_forge/exit_codes.py +37 -0
code_forge/factories.py +191 -0
code_forge/falsify.py +85 -0
code_forge/gate_check.py +466 -0
code_forge/git.py +351 -0
code_forge/hold.py +126 -0
code_forge/install_hooks.py +331 -0
code_forge/lock.py +162 -0
code_forge/machine.py +792 -0
code_forge/mode_resolver.py +60 -0
code_forge/mutation.py +380 -0
code_forge/parsers/__init__.py +56 -0
code_forge/parsers/_sarif.py +77 -0
code_forge/parsers/base.py +65 -0
code_forge/parsers/checkpatch.py +66 -0
code_forge/parsers/clippy.py +85 -0
code_forge/parsers/non_ascii.py +47 -0
code_forge/parsers/ruff.py +18 -0
code_forge/parsers/semgrep.py +18 -0
code_forge/parsers/shellcheck.py +56 -0
code_forge/registry.py +153 -0
code_forge/reporter.py +133 -0
code_forge/runner.py +205 -0
code_forge/sarif.py +226 -0
code_forge/skills/adversarial-qe/SKILL.md +272 -0
code_forge/skills/code-forge/SKILL.md +1193 -0
code_forge/skills/code-review-expert/SKILL.md +162 -0
code_forge/skills/code-review-expert/references/code-quality-checklist.md +130 -0
code_forge/skills/code-review-expert/references/removal-plan.md +52 -0
code_forge/skills/code-review-expert/references/security-checklist.md +118 -0
code_forge/skills/code-review-expert/references/solid-checklist.md +65 -0
code_forge/skills/kernel-fp-verify/SKILL.md +101 -0
code_forge/skills/qodo-review/SKILL.md +135 -0
code_forge/skills/smoke-test/SKILL.md +253 -0
code_forge/skills/smoke-test/references/boundary-cases.md +114 -0
code_forge/skills/smoke-test/references/concurrency-patterns.md +306 -0
code_forge/skills/smoke-test/references/injection-payloads.md +124 -0
code_forge/skills/smoke-test/test-library/shell/README.md +271 -0
code_forge/skills/smoke-test/test-library/shell/primitives.sh +352 -0
code_forge/skills/smoke-test/test-library/shell/primitives_test.sh +324 -0
code_forge/snapshot.py +196 -0
code_forge/source.py +64 -0
code_forge/state.py +246 -0
code_forge/verdict.py +43 -0
code_review_forge-2.0.0a1.dist-info/METADATA +237 -0
code_review_forge-2.0.0a1.dist-info/RECORD +62 -0
code_review_forge-2.0.0a1.dist-info/WHEEL +5 -0
code_review_forge-2.0.0a1.dist-info/entry_points.txt +2 -0
code_review_forge-2.0.0a1.dist-info/licenses/LICENSE +179 -0
code_review_forge-2.0.0a1.dist-info/top_level.txt +1 -0

code_forge/snapshot.py ADDED Viewed

@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2026, Minxi Hou <houminxi@gmail.com>
+"""Snapshot persistence per BASELINE-02 + invalidation per BASELINE-03.
+SCHEMA_VERSION is independent of state.SCHEMA_VERSION (snapshot evolves
+on different cadence from state.json).
+B2 fix: NO Disposition import. finding_dispositions: dict[str, str] stores
+disposition values as strings; state machine (02-02) converts to/from
+Disposition at the read/write boundary.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import warnings
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Optional
+from .errors import (
+    BaselineResolutionError,
+    CorruptedSnapshotError,
+    SnapshotSchemaMismatchError,
+)
+SNAPSHOT_SCHEMA_VERSION: int = 1
+@dataclass(frozen=True)
+class SnapshotEntry:
+    """One file's recorded state in the snapshot."""
+    path: str  # path-as-posix relative to snapshot root (H3)
+    content_hash: str  # SHA256 (text: normalized; binary: raw bytes)
+@dataclass
+class Snapshot:
+    """A persisted baseline snapshot. BASELINE-02 + BASELINE-03."""
+    schema_version: int = SNAPSHOT_SCHEMA_VERSION
+    source_hash: str = ""
+    files: list[SnapshotEntry] = field(default_factory=list)
+    finding_dispositions: dict[str, str] = field(default_factory=dict)
+@dataclass(frozen=True)
+class InvalidationResult:
+    """BASELINE-03 partial-invalidation result.
+    missing = files in snapshot but absent from current source.
+    changed = files present in both but content hash differs.
+    unchanged = files where snapshot hash matches current.
+    added = files in current but not in snapshot.
+    """
+    missing: list[str]
+    changed: list[str]
+    unchanged: list[str]
+    added: list[str]
+def snapshot_path_for(source_hash: str, cwd: Path) -> Path:
+    """Standard location: .code-forge/snapshots/<source-hash>.json under cwd."""
+    return cwd / ".code-forge" / "snapshots" / ("%s.json" % source_hash)
+def find_existing_snapshot(source_hash: str, cwd: Path) -> Optional[Path]:
+    """H5 fix: snapshot auto-discovery helper.
+    Returns the snapshot path if it exists, else None.
+    """
+    p = snapshot_path_for(source_hash, cwd)
+    return p if p.exists() else None
+SNAPSHOT_COUNT_WARN_THRESHOLD: int = 50
+def save_snapshot(snapshot: Snapshot, path: Path) -> None:
+    """Atomic write of snapshot file. Auto-creates parent dirs (D1).
+    OQ2 fix: after write, count snapshot files in directory; if above
+    threshold, warn user about manual cleanup.
+    """
+    path.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
+    tmp = path.with_suffix(".tmp")
+    tmp.write_text(json.dumps(asdict(snapshot), indent=2))
+    tmp.replace(path)
+    snapshots = list(path.parent.glob("*.json"))
+    if len(snapshots) > SNAPSHOT_COUNT_WARN_THRESHOLD:
+        warnings.warn(
+            "forge: %d snapshot files in %s; "
+            "consider manual cleanup (no auto-GC in v2.0)"
+            % (len(snapshots), path.parent),
+            stacklevel=2,
+        )
+def load_snapshot(path: Path) -> Optional[Snapshot]:
+    """Load snapshot. Returns None on missing file (BASELINE-03).
+    Raises:
+        CorruptedSnapshotError: JSON parse failure or missing/invalid
+            fields in snapshot data.
+        SnapshotSchemaMismatchError: schema_version mismatch.
+    """
+    if not path.exists():
+        return None
+    try:
+        data = json.loads(path.read_text())
+    except json.JSONDecodeError as e:
+        raise CorruptedSnapshotError(
+            "cannot parse %s: %s" % (path, e)
+        ) from e
+    sv = data.get("schema_version")
+    if sv != SNAPSHOT_SCHEMA_VERSION:
+        raise SnapshotSchemaMismatchError(
+            "snapshot schema_version=%s, forge expects %s; "
+            "remove %s to start fresh" % (sv, SNAPSHOT_SCHEMA_VERSION, path)
+        )
+    try:
+        return Snapshot(
+            schema_version=data["schema_version"],
+            source_hash=data["source_hash"],
+            files=[SnapshotEntry(**e) for e in data.get("files", [])],
+            finding_dispositions=dict(
+                data.get("finding_dispositions", {})
+            ),
+        )
+    except (KeyError, TypeError) as e:
+        raise CorruptedSnapshotError(
+            "invalid snapshot data in %s: %s" % (path, e)
+        ) from e
+def validate_snapshot(
+    snapshot: Snapshot, current_files: list[Path], root: Path
+) -> InvalidationResult:
+    """BASELINE-03: classify files as unchanged/changed/added/missing.
+    H6 fix: files outside root raise BaselineResolutionError with
+    explicit file and root context.
+    """
+    snapshot_map = {e.path: e.content_hash for e in snapshot.files}
+    current_map: dict[str, str] = {}
+    for f in current_files:
+        try:
+            rel = f.relative_to(root).as_posix()
+        except ValueError as e:
+            raise BaselineResolutionError(
+                "file %s is outside snapshot root %s "
+                "-- cannot classify against snapshot" % (f, root)
+            ) from e
+        current_map[rel] = _hash_file(f)
+    missing = sorted(p for p in snapshot_map if p not in current_map)
+    added = sorted(p for p in current_map if p not in snapshot_map)
+    changed = sorted(
+        p
+        for p in current_map
+        if p in snapshot_map and current_map[p] != snapshot_map[p]
+    )
+    unchanged = sorted(
+        p
+        for p in current_map
+        if p in snapshot_map and current_map[p] == snapshot_map[p]
+    )
+    return InvalidationResult(
+        missing=missing,
+        changed=changed,
+        unchanged=unchanged,
+        added=added,
+    )
+def _hash_file(path: Path) -> str:
+    """SHA256 of file content.
+    Text files use normalize_text (LF + trailing-ws strip).
+    Binary files hash raw bytes (H1 fix).
+    """
+    from .source import normalize_text
+    try:
+        content = path.read_text(encoding="utf-8")
+        return hashlib.sha256(
+            normalize_text(content).encode("utf-8")
+        ).hexdigest()
+    except UnicodeDecodeError:
+        return hashlib.sha256(path.read_bytes()).hexdigest()

code_forge/source.py ADDED Viewed

@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2026, Minxi Hou <houminxi@gmail.com>
+"""source_hash computation per STATE-07.
+Whitespace normalization: trailing-ws strip + LF line endings.
+H1/H3 fixes applied: binary files hashed as raw bytes (preserves invalidation
+correctness); path serialization uses as_posix() for cross-platform determinism.
+"""
+from __future__ import annotations
+import hashlib
+from pathlib import Path
+from typing import Optional
+def normalize_text(text: str) -> str:
+    """Strip trailing whitespace per line; force LF endings.
+    No trailing blank line stripping.
+    """
+    lines = [line.rstrip() for line in text.splitlines()]
+    return "\n".join(lines)
+def compute_source_hash(
+    *,
+    git_diff: Optional[str] = None,
+    files: Optional[list[Path]] = None,
+) -> str:
+    """STATE-07 source_hash.
+    Git mode: caller passes git_diff (unified diff output).
+    Non-git mode: caller passes files. Files are sorted by posix path
+    string for cross-platform deterministic ordering (H3). Binary files
+    (UnicodeDecodeError on utf-8 read) are hashed as raw bytes with a
+    binary marker (H1) -- this preserves invalidation correctness for
+    binary edits and keeps source_hash stable.
+    Exactly one of git_diff / files must be provided. Returns lowercase
+    hex SHA256.
+    """
+    if (git_diff is None) == (files is None):
+        raise ValueError(
+            "compute_source_hash: pass exactly one of git_diff or files"
+        )
+    h = hashlib.sha256()
+    if git_diff is not None:
+        h.update(b"mode=git\n")
+        h.update(normalize_text(git_diff).encode("utf-8"))
+        return h.hexdigest()
+    h.update(b"mode=non-git\n")
+    for f in sorted(files, key=lambda p: p.as_posix()):
+        try:
+            content = f.read_text(encoding="utf-8")
+            h.update(("--- %s text\n" % f.as_posix()).encode("utf-8"))
+            h.update(normalize_text(content).encode("utf-8"))
+        except UnicodeDecodeError:
+            # H1: binary file -- hash raw bytes
+            h.update(("--- %s binary\n" % f.as_posix()).encode("utf-8"))
+            h.update(f.read_bytes())
+        h.update(b"\n")
+    return h.hexdigest()

code_forge/state.py ADDED Viewed

@@ -0,0 +1,246 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2026, Minxi Hou <houminxi@gmail.com>
+"""state.json schema + IO.
+Schema owned by 02-01. Subsequent sub-plans add fields ADDITIVELY (no rename,
+no remove). Bump SCHEMA_VERSION on breaking change.
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from enum import Enum
+from pathlib import Path
+from typing import Literal, Optional
+from .disposition import Disposition, DISPOSITION_PROTOCOL_VERSION
+from .errors import CorruptedStateError, SchemaVersionMismatchError
+SCHEMA_VERSION: int = 1
+class Mode(str, Enum):
+    """Forge execution mode. Resolved by 02-05, consumed by 02-02."""
+    LOCAL = "LOCAL"
+    CI = "CI"
+class Verdict(str, Enum):
+    """Process verdict (terminal). Set by state machine on exit."""
+    PASS = "PASS"
+    FAIL = "FAIL"
+    ESCALATED = "ESCALATED"
+    PENDING = "PENDING"
+@dataclass
+class StateFinding:
+    """A single finding entry in state.json findings[].
+    Named StateFinding (not Finding) to avoid conflict with Phase 1
+    forge.parsers.base.Finding (parser-emitted record, different shape).
+    Conversion: state machine in 02-02 maps parsers.base.Finding ->
+    StateFinding.
+    """
+    id: str
+    fingerprint: str
+    source: Literal["L0", "L1", "MUTANT", "E2E_CHECK"]
+    disposition: Disposition
+    file: str
+    line_range: list[int]
+    description: str
+    error: Optional[str] = None
+    anchor: Optional[dict] = None
+    evidence_files: Optional[list[str]] = None
+@dataclass
+class State:
+    """state.json schema. v1.
+    02-02 additions (additive only, no schema_version bump per D2):
+      - baseline_spec_repr: from 02-03 serialize_baseline_spec; recorded so
+        HOLD resume can verify which baseline was used (OQ1 fix from 02-03)
+      - round_history: per-round snapshots for STATE-05 diagnosis
+      - infra_errors: error messages collected during L0/L1/falsify failures
+        (drives STATE-05 Category D classification)
+    02-04 additions (additive per D2):
+      - hold_reason: Optional[str] -- set on HOLD entry; cleared on resume.
+        Disambiguates "interrupted mid-run" from "HOLD pending human input".
+      - promoted_fingerprints: set[str] -- fingerprints promoted CONFIRMED ->
+        UNCERTAIN via DISPO-05. Used by ESCALATED-frozen predicate.
+        Serialized as sorted list (JSON has no native set type).
+    """
+    schema_version: int = SCHEMA_VERSION
+    disposition_protocol_version: int = DISPOSITION_PROTOCOL_VERSION
+    round: int = 0
+    mode: Mode = Mode.LOCAL
+    source_hash: Optional[str] = None
+    findings: list[StateFinding] = field(default_factory=list)
+    # Derived lookup cache (NOT source of truth; SOT = StateFinding.disposition).
+    # save_state rebuilds from findings; load_state verifies cache matches.
+    dispositions: dict[str, Disposition] = field(default_factory=dict)
+    fix_attempts: dict[str, int] = field(default_factory=dict)
+    verdict: Verdict = Verdict.PENDING
+    converged: bool = False
+    # 02-02 additions:
+    baseline_spec_repr: Optional[str] = None
+    round_history: list[dict] = field(default_factory=list)
+    infra_errors: list[str] = field(default_factory=list)
+    # 02-04 additions:
+    hold_reason: Optional[str] = None
+    promoted_fingerprints: set[str] = field(default_factory=set)
+    # Mutation survivor round counter (LOCAL mode):
+    consecutive_survivor_rounds: int = 0  # LOCAL mode only
+def _finding_from_dict(d: dict) -> StateFinding:
+    """Reconstruct StateFinding from JSON dict with enum conversion."""
+    return StateFinding(
+        id=d["id"],
+        fingerprint=d["fingerprint"],
+        source=d["source"],
+        disposition=Disposition(d["disposition"]),
+        file=d["file"],
+        line_range=list(d["line_range"]),
+        description=d["description"],
+        error=d.get("error"),
+        anchor=d.get("anchor"),
+        evidence_files=d.get("evidence_files"),
+    )
+def load_state(path: Path) -> Optional[State]:
+    """Load state.json. Returns None if file does not exist.
+    Raises:
+        CorruptedStateError: JSON parse failure, missing/invalid fields,
+            invalid enum values, or cache mismatch.
+        SchemaVersionMismatchError: schema_version != SCHEMA_VERSION.
+    """
+    if not path.exists():
+        return None
+    try:
+        data = json.loads(path.read_text())
+    except json.JSONDecodeError as e:
+        raise CorruptedStateError(
+            "cannot parse %s: %s" % (path, e)
+        ) from e
+    sv = data.get("schema_version")
+    if sv != SCHEMA_VERSION:
+        raise SchemaVersionMismatchError(
+            "state.json schema_version=%s, forge expects %s; "
+            "remove .code-forge/state.json to start fresh" % (sv, SCHEMA_VERSION)
+        )
+    try:
+        findings = [
+            _finding_from_dict(f) for f in data.get("findings", [])
+        ]
+        dispositions = {
+            k: Disposition(v)
+            for k, v in data.get("dispositions", {}).items()
+        }
+    except (KeyError, ValueError) as e:
+        raise CorruptedStateError(
+            "invalid finding or disposition in %s: %s" % (path, e)
+        ) from e
+    expected = {f.id: f.disposition for f in findings}
+    if dispositions != expected:
+        raise CorruptedStateError(
+            "dispositions cache out of sync with findings (path=%s)" % path
+        )
+    try:
+        state = State(
+            schema_version=data["schema_version"],
+            disposition_protocol_version=data[
+                "disposition_protocol_version"
+            ],
+            round=data["round"],
+            mode=Mode(data["mode"]),
+            source_hash=data.get("source_hash"),
+            findings=findings,
+            dispositions=dispositions,
+            fix_attempts=dict(data.get("fix_attempts", {})),
+            verdict=Verdict(data["verdict"]),
+            converged=bool(data["converged"]),
+        )
+    except (KeyError, ValueError) as e:
+        raise CorruptedStateError(
+            "missing or invalid field in %s: %s" % (path, e)
+        ) from e
+    # 02-02 additions: backward-compat defaults for pre-02-02 state.json
+    # (R1 B1 silent-loss guard). Pre-02-02 files lack these keys; the
+    # loader returns a State with defaults rather than KeyError.
+    state.baseline_spec_repr = data.get("baseline_spec_repr")
+    state.round_history = data.get("round_history", [])
+    state.infra_errors = data.get("infra_errors", [])
+    # 02-04 additions: backward-compat defaults for pre-02-04 state.json.
+    state.hold_reason = data.get("hold_reason")
+    state.promoted_fingerprints = set(
+        data.get("promoted_fingerprints", [])
+    )
+    # 02-02 additions: backward-compat defaults for pre-02-02 state.json.
+    state.consecutive_survivor_rounds = data.get(
+        "consecutive_survivor_rounds", 0
+    )
+    return state
+def _finding_to_dict(f: StateFinding) -> dict:
+    """Serialize StateFinding to JSON-safe dict."""
+    d = {
+        "id": f.id,
+        "fingerprint": f.fingerprint,
+        "source": f.source,
+        "disposition": f.disposition.value,
+        "file": f.file,
+        "line_range": list(f.line_range),
+        "description": f.description,
+        "error": f.error,
+        "anchor": f.anchor,
+        "evidence_files": f.evidence_files,
+    }
+    return d
+def save_state(state: State, path: Path) -> None:
+    """Atomic write of state.json. Rebuilds dispositions cache first.
+    02-04 rewrite: no asdict on State. asdict cannot handle the set-typed
+    promoted_fingerprints field. All fields serialized explicitly.
+    """
+    state.dispositions = {f.id: f.disposition for f in state.findings}
+    data = {
+        "schema_version": state.schema_version,
+        "disposition_protocol_version": state.disposition_protocol_version,
+        "round": state.round,
+        "mode": state.mode.value,
+        "source_hash": state.source_hash,
+        "findings": [_finding_to_dict(f) for f in state.findings],
+        "dispositions": {
+            k: v.value for k, v in state.dispositions.items()
+        },
+        "fix_attempts": dict(state.fix_attempts),
+        "verdict": state.verdict.value,
+        "converged": state.converged,
+        "baseline_spec_repr": state.baseline_spec_repr,
+        "round_history": list(state.round_history),
+        "infra_errors": list(state.infra_errors),
+        "hold_reason": state.hold_reason,
+        "promoted_fingerprints": sorted(state.promoted_fingerprints),
+        "consecutive_survivor_rounds": state.consecutive_survivor_rounds,
+    }
+    path.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
+    tmp = path.with_suffix(".tmp")
+    tmp.write_text(json.dumps(data, indent=2))
+    tmp.replace(path)

code_forge/verdict.py ADDED Viewed

@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright (c) 2026, Minxi Hou <houminxi@gmail.com>
+"""Verdict determination -- PASS/FAIL from delta findings.
+Pure function. Phase 1 implements PASS/FAIL only (GATE-01).
+HOLD state is Phase 2+.
+Addresses:
+- Consensus #4: ToolError in results -> FAIL (not false PASS)
+- Consensus #6: uses EXIT_PASS/EXIT_FAIL from code_forge.__init__
+"""
+from code_forge import EXIT_PASS, EXIT_FAIL
+from code_forge.parsers.base import Finding, ToolError
+# Lightweight type alias for readability.
+# Phase 2 may replace with a proper enum or dataclass when HOLD is added.
+Verdict = tuple[str, int]  # (verdict_string, exit_code)
+def determine_verdict(
+    delta_findings: list[Finding | ToolError],
+) -> Verdict:
+    """Determine verdict from delta findings.
+    Rules:
+    - Empty list: PASS (no new violations)
+    - Any ToolError: FAIL (tool crash = cannot guarantee no violations)
+    - Any Finding: FAIL (new violations found)
+    Per GATE-01, Phase 1 implements PASS/FAIL only.
+    Per GATE-04, all Layer 0 violations are gate-blocking.
+    Args:
+        delta_findings: filtered findings on changed lines
+    Returns:
+        (verdict_string, exit_code) tuple
+    """
+    if not delta_findings:
+        return ("PASS", EXIT_PASS)
+    return ("FAIL", EXIT_FAIL)