revrem 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_loop/__init__.py +5 -0
- code_review_loop/__main__.py +6 -0
- code_review_loop/_compat_jsonschema.py +113 -0
- code_review_loop/_compat_tomli_w.py +97 -0
- code_review_loop/artifacts.py +87 -0
- code_review_loop/budgets.py +129 -0
- code_review_loop/bug_bundle.py +205 -0
- code_review_loop/cli.py +4170 -0
- code_review_loop/diagnostics.py +387 -0
- code_review_loop/events.py +362 -0
- code_review_loop/fingerprints.py +61 -0
- code_review_loop/harnesses.py +318 -0
- code_review_loop/profiles.py +1093 -0
- code_review_loop/progress.py +156 -0
- code_review_loop/prompts/triage_v1.txt +35 -0
- code_review_loop/redaction.py +147 -0
- code_review_loop/run_history.py +85 -0
- code_review_loop/schemas/triage-v1.schema.json +266 -0
- code_review_loop/suppressions.py +392 -0
- code_review_loop/triage.py +93 -0
- code_review_loop/tui.py +316 -0
- code_review_loop/tui_state.py +537 -0
- revrem-0.3.2.dist-info/METADATA +309 -0
- revrem-0.3.2.dist-info/RECORD +29 -0
- revrem-0.3.2.dist-info/WHEEL +5 -0
- revrem-0.3.2.dist-info/entry_points.txt +3 -0
- revrem-0.3.2.dist-info/licenses/LICENSE +201 -0
- revrem-0.3.2.dist-info/licenses/NOTICE +10 -0
- revrem-0.3.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""JSON Schema 2020-12 validation with graceful fallback.
|
|
2
|
+
|
|
3
|
+
When the ``jsonschema`` package is available (the default for installed
|
|
4
|
+
wheels), the real validator is used so that *all* JSON Schema keywords
|
|
5
|
+
including ``minimum``, ``maximum``, ``pattern``, ``$ref``, ``allOf``,
|
|
6
|
+
``anyOf``, ``oneOf``, ``if/then/else``, ``minItems``, ``maxItems``, and
|
|
7
|
+
``patternProperties`` are enforced.
|
|
8
|
+
|
|
9
|
+
When ``jsonschema`` is *not* installed, a minimal compat validator is
|
|
10
|
+
used instead. The compat layer validates ``type``, ``const``, ``enum``,
|
|
11
|
+
``required``, ``additionalProperties``, ``minLength``, and ``items`` but
|
|
12
|
+
silently ignores the keywords listed above. Callers that rely on those
|
|
13
|
+
keywords should ensure ``jsonschema`` is present.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
from jsonschema import ( # type: ignore[attr-defined,import-untyped]
|
|
20
|
+
Draft202012Validator,
|
|
21
|
+
validate,
|
|
22
|
+
)
|
|
23
|
+
except ImportError:
|
|
24
|
+
from collections.abc import Iterator
|
|
25
|
+
|
|
26
|
+
class ValidationError(ValueError):
|
|
27
|
+
"""Minimal stand-in for jsonschema.exceptions.ValidationError."""
|
|
28
|
+
|
|
29
|
+
def validate(instance, schema) -> None: # type: ignore[misc] # noqa: F811
|
|
30
|
+
Draft202012Validator(schema).validate(instance)
|
|
31
|
+
|
|
32
|
+
class Draft202012Validator: # type: ignore[no-redef]
|
|
33
|
+
def __init__(self, schema):
|
|
34
|
+
self.schema = schema
|
|
35
|
+
|
|
36
|
+
@classmethod
|
|
37
|
+
def check_schema(cls, schema) -> None:
|
|
38
|
+
if not isinstance(schema, dict):
|
|
39
|
+
raise TypeError("schema must be a mapping")
|
|
40
|
+
|
|
41
|
+
def validate(self, instance) -> None:
|
|
42
|
+
errors = list(self.iter_errors(instance))
|
|
43
|
+
if errors:
|
|
44
|
+
raise ValidationError(errors[0])
|
|
45
|
+
|
|
46
|
+
def iter_errors(self, instance) -> Iterator[str]:
|
|
47
|
+
yield from _validate(instance, self.schema, path="$")
|
|
48
|
+
|
|
49
|
+
def _validate(instance, schema, *, path: str) -> Iterator[str]:
|
|
50
|
+
if not isinstance(schema, dict):
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
expected = schema.get("type")
|
|
54
|
+
if expected is not None and not _matches_type(instance, expected):
|
|
55
|
+
yield f"{path}: expected type {expected!r}"
|
|
56
|
+
return
|
|
57
|
+
|
|
58
|
+
if "const" in schema and instance != schema["const"]:
|
|
59
|
+
yield f"{path}: expected const {schema['const']!r}"
|
|
60
|
+
return
|
|
61
|
+
|
|
62
|
+
if "enum" in schema and instance not in schema["enum"]:
|
|
63
|
+
yield f"{path}: expected one of {schema['enum']!r}"
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
if isinstance(instance, str):
|
|
67
|
+
min_length = schema.get("minLength")
|
|
68
|
+
if min_length is not None and len(instance) < min_length:
|
|
69
|
+
yield f"{path}: string shorter than {min_length}"
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
if isinstance(instance, list):
|
|
73
|
+
items_schema = schema.get("items")
|
|
74
|
+
if items_schema is not None:
|
|
75
|
+
for index, item in enumerate(instance):
|
|
76
|
+
yield from _validate(item, items_schema, path=f"{path}[{index}]")
|
|
77
|
+
return
|
|
78
|
+
|
|
79
|
+
if isinstance(instance, dict):
|
|
80
|
+
properties = schema.get("properties", {})
|
|
81
|
+
required = schema.get("required", [])
|
|
82
|
+
additional = schema.get("additionalProperties", True)
|
|
83
|
+
|
|
84
|
+
for key in required:
|
|
85
|
+
if key not in instance:
|
|
86
|
+
yield f"{path}: missing required property {key!r}"
|
|
87
|
+
|
|
88
|
+
for key, value in instance.items():
|
|
89
|
+
if key in properties:
|
|
90
|
+
yield from _validate(value, properties[key], path=f"{path}.{key}")
|
|
91
|
+
elif additional is False:
|
|
92
|
+
yield f"{path}: unexpected property {key!r}"
|
|
93
|
+
elif isinstance(additional, dict):
|
|
94
|
+
yield from _validate(value, additional, path=f"{path}.{key}")
|
|
95
|
+
|
|
96
|
+
def _matches_type(instance, expected) -> bool:
|
|
97
|
+
if isinstance(expected, list):
|
|
98
|
+
return any(_matches_type(instance, item) for item in expected)
|
|
99
|
+
if expected == "object":
|
|
100
|
+
return isinstance(instance, dict)
|
|
101
|
+
if expected == "array":
|
|
102
|
+
return isinstance(instance, list)
|
|
103
|
+
if expected == "string":
|
|
104
|
+
return isinstance(instance, str)
|
|
105
|
+
if expected == "integer":
|
|
106
|
+
return isinstance(instance, int) and not isinstance(instance, bool)
|
|
107
|
+
if expected == "number":
|
|
108
|
+
return isinstance(instance, (int, float)) and not isinstance(instance, bool)
|
|
109
|
+
if expected == "boolean":
|
|
110
|
+
return isinstance(instance, bool)
|
|
111
|
+
if expected == "null":
|
|
112
|
+
return instance is None
|
|
113
|
+
return True
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Minimal TOML writer used when the external `tomli-w` package is unavailable."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from collections.abc import Mapping, Sequence
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def dumps(data: Mapping[str, object]) -> str:
|
|
9
|
+
lines: list[str] = []
|
|
10
|
+
_write_table(lines, (), data)
|
|
11
|
+
return "\n".join(lines)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _write_table(lines: list[str], prefix: tuple[str, ...], table: Mapping[str, object]) -> None:
|
|
15
|
+
scalars: list[tuple[str, object]] = []
|
|
16
|
+
nested: list[tuple[str, Mapping[str, object]]] = []
|
|
17
|
+
arrays: list[tuple[str, Sequence[object]]] = []
|
|
18
|
+
|
|
19
|
+
for key, value in table.items():
|
|
20
|
+
if isinstance(value, Mapping):
|
|
21
|
+
nested.append((key, value))
|
|
22
|
+
elif isinstance(value, Sequence) and not isinstance(value, (str, bytes, bytearray)):
|
|
23
|
+
arrays.append((key, value))
|
|
24
|
+
else:
|
|
25
|
+
scalars.append((key, value))
|
|
26
|
+
|
|
27
|
+
# Implicit table: a node with only nested sub-tables needs no [header] line;
|
|
28
|
+
# its children emit their own fully-qualified [a.b.c] paths.
|
|
29
|
+
container = not scalars and not arrays and nested
|
|
30
|
+
if prefix and not container:
|
|
31
|
+
lines.append(f"[{'.'.join(_format_key(part) for part in prefix)}]")
|
|
32
|
+
|
|
33
|
+
for key, value in scalars:
|
|
34
|
+
lines.append(f"{_format_key(key)} = {_format_value(value)}")
|
|
35
|
+
|
|
36
|
+
for key, value in arrays:
|
|
37
|
+
lines.append(f"{_format_key(key)} = {_format_array(value)}")
|
|
38
|
+
|
|
39
|
+
for key, value in nested:
|
|
40
|
+
if lines and (scalars or arrays or (prefix and not container)):
|
|
41
|
+
lines.append("")
|
|
42
|
+
_write_table(lines, (*prefix, key), value)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _format_key(key: str) -> str:
|
|
46
|
+
if key and all(_is_bare_key_char(ch) for ch in key):
|
|
47
|
+
return key
|
|
48
|
+
escaped = key.replace("\\", "\\\\").replace('"', '\\"')
|
|
49
|
+
return f'"{escaped}"'
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _is_bare_key_char(ch: str) -> bool:
|
|
53
|
+
return ch.isascii() and (ch.isalnum() or ch in {"-", "_"})
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _format_value(value: object) -> str:
|
|
57
|
+
if isinstance(value, bool):
|
|
58
|
+
return "true" if value else "false"
|
|
59
|
+
if isinstance(value, int) and not isinstance(value, bool):
|
|
60
|
+
return str(value)
|
|
61
|
+
if isinstance(value, float):
|
|
62
|
+
return repr(value)
|
|
63
|
+
if value is None:
|
|
64
|
+
raise TypeError("TOML does not support null/None values")
|
|
65
|
+
if isinstance(value, str):
|
|
66
|
+
escaped = _escape_basic_string(value)
|
|
67
|
+
return f'"{escaped}"'
|
|
68
|
+
raise TypeError(f"unsupported TOML value type: {type(value).__name__}")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _format_array(values: Sequence[object]) -> str:
|
|
72
|
+
return "[" + ", ".join(_format_value(value) for value in values) + "]"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _escape_basic_string(value: str) -> str:
|
|
76
|
+
parts: list[str] = []
|
|
77
|
+
for char in value:
|
|
78
|
+
codepoint = ord(char)
|
|
79
|
+
if char == "\\":
|
|
80
|
+
parts.append("\\\\")
|
|
81
|
+
elif char == '"':
|
|
82
|
+
parts.append('\\"')
|
|
83
|
+
elif char == "\b":
|
|
84
|
+
parts.append("\\b")
|
|
85
|
+
elif char == "\t":
|
|
86
|
+
parts.append("\\t")
|
|
87
|
+
elif char == "\n":
|
|
88
|
+
parts.append("\\n")
|
|
89
|
+
elif char == "\f":
|
|
90
|
+
parts.append("\\f")
|
|
91
|
+
elif char == "\r":
|
|
92
|
+
parts.append("\\r")
|
|
93
|
+
elif codepoint < 0x20 or codepoint == 0x7F:
|
|
94
|
+
parts.append(f"\\u{codepoint:04x}")
|
|
95
|
+
else:
|
|
96
|
+
parts.append(char)
|
|
97
|
+
return "".join(parts)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Artifact writing helpers with canonical JSON and path-safety checks."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import tempfile
|
|
8
|
+
import unicodedata
|
|
9
|
+
from collections.abc import Mapping, Sequence
|
|
10
|
+
from contextlib import suppress
|
|
11
|
+
from decimal import Decimal
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
JSON_SCHEMA_VERSION = "1.0"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ArtifactPathError(ValueError):
|
|
19
|
+
"""Raised when an artifact path would escape its run directory."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def safe_artifact_path(run_dir: Path, relative_path: str | Path) -> Path:
|
|
23
|
+
path = Path(relative_path)
|
|
24
|
+
if path.is_absolute():
|
|
25
|
+
raise ArtifactPathError("artifact path must be relative")
|
|
26
|
+
if any(part in {"", ".", ".."} for part in path.parts):
|
|
27
|
+
raise ArtifactPathError("artifact path must not contain empty, current, or parent parts")
|
|
28
|
+
|
|
29
|
+
root = run_dir.resolve()
|
|
30
|
+
target = run_dir / path
|
|
31
|
+
parent = target.parent
|
|
32
|
+
resolved_parent = parent.resolve(strict=False)
|
|
33
|
+
if not resolved_parent.is_relative_to(root):
|
|
34
|
+
raise ArtifactPathError("artifact path resolves outside the run directory")
|
|
35
|
+
parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
return target
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def write_text_artifact(path: Path, content: str) -> None:
|
|
40
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
_atomic_write(path, content.encode("utf-8"))
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def write_json_artifact(
|
|
45
|
+
run_dir: Path,
|
|
46
|
+
relative_path: str | Path,
|
|
47
|
+
payload: Mapping[str, Any],
|
|
48
|
+
*,
|
|
49
|
+
schema_version: str = JSON_SCHEMA_VERSION,
|
|
50
|
+
) -> Path:
|
|
51
|
+
target = safe_artifact_path(run_dir, relative_path)
|
|
52
|
+
serializable = canonicalize_json({**dict(payload), "schema_version": schema_version})
|
|
53
|
+
content = json.dumps(serializable, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
|
54
|
+
_atomic_write(target, content.encode("utf-8"))
|
|
55
|
+
return target
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def canonicalize_json(value: Any) -> Any:
|
|
59
|
+
if isinstance(value, str):
|
|
60
|
+
return unicodedata.normalize("NFC", value)
|
|
61
|
+
if isinstance(value, Decimal):
|
|
62
|
+
return str(value)
|
|
63
|
+
if isinstance(value, Mapping):
|
|
64
|
+
return {str(canonicalize_json(key)): canonicalize_json(item) for key, item in value.items()}
|
|
65
|
+
if isinstance(value, Sequence) and not isinstance(value, bytes | bytearray | str):
|
|
66
|
+
return [canonicalize_json(item) for item in value]
|
|
67
|
+
return value
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _atomic_write(path: Path, content: bytes) -> None:
|
|
71
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
fd, tmp_name = tempfile.mkstemp(prefix=f".{path.name}.", suffix=".tmp", dir=path.parent)
|
|
73
|
+
tmp_path = Path(tmp_name)
|
|
74
|
+
fd_consumed = False
|
|
75
|
+
try:
|
|
76
|
+
with os.fdopen(fd, "wb") as handle:
|
|
77
|
+
fd_consumed = True
|
|
78
|
+
handle.write(content)
|
|
79
|
+
handle.flush()
|
|
80
|
+
os.fsync(handle.fileno())
|
|
81
|
+
os.replace(tmp_path, path)
|
|
82
|
+
except Exception:
|
|
83
|
+
if not fd_consumed:
|
|
84
|
+
os.close(fd)
|
|
85
|
+
with suppress(FileNotFoundError):
|
|
86
|
+
tmp_path.unlink()
|
|
87
|
+
raise
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Run budget accounting for bounded RevRem execution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from decimal import Decimal, InvalidOperation
|
|
7
|
+
from time import monotonic
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class BudgetConfig:
|
|
13
|
+
max_wall_seconds: float | None = None
|
|
14
|
+
max_tokens: int | None = None
|
|
15
|
+
max_usd: Decimal | None = None
|
|
16
|
+
soft_warn_fraction: float = 0.8
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class BudgetState:
|
|
21
|
+
started_at_monotonic: float
|
|
22
|
+
wall_warning_emitted: bool = False
|
|
23
|
+
tokens_used: int = 0
|
|
24
|
+
tokens_reported: bool = False
|
|
25
|
+
usd_used: Decimal = Decimal("0")
|
|
26
|
+
usd_reported: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BudgetExceeded(Exception):
|
|
30
|
+
"""Raised when a configured run ceiling has already been reached."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, *, ceiling: str, limit: Any, actual: Any):
|
|
33
|
+
super().__init__(f"{ceiling} budget reached: {actual} >= {limit}")
|
|
34
|
+
self.ceiling = ceiling
|
|
35
|
+
self.limit = limit
|
|
36
|
+
self.actual = actual
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def started_now() -> BudgetState:
|
|
40
|
+
return BudgetState(started_at_monotonic=monotonic())
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def parse_usd(value: str | None) -> Decimal | None:
|
|
44
|
+
if value is None:
|
|
45
|
+
return None
|
|
46
|
+
try:
|
|
47
|
+
amount = Decimal(value)
|
|
48
|
+
except InvalidOperation as exc:
|
|
49
|
+
raise ValueError("--max-usd must be a decimal number") from exc
|
|
50
|
+
if not amount.is_finite():
|
|
51
|
+
raise ValueError("--max-usd must be a finite decimal number")
|
|
52
|
+
if amount < 0:
|
|
53
|
+
raise ValueError("--max-usd must be 0 or greater")
|
|
54
|
+
return amount
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def validate_config(config: BudgetConfig) -> None:
|
|
58
|
+
if config.max_wall_seconds is not None and config.max_wall_seconds < 0:
|
|
59
|
+
raise ValueError("--max-wall-seconds must be 0 or greater")
|
|
60
|
+
if config.max_tokens is not None and config.max_tokens < 0:
|
|
61
|
+
raise ValueError("--max-tokens must be 0 or greater")
|
|
62
|
+
if config.max_usd is not None and config.max_usd < 0:
|
|
63
|
+
raise ValueError("--max-usd must be 0 or greater")
|
|
64
|
+
if not 0 < config.soft_warn_fraction <= 1:
|
|
65
|
+
raise ValueError("--soft-warn-fraction must be greater than 0 and no more than 1")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def wall_elapsed_seconds(state: BudgetState, *, now: float | None = None) -> float:
|
|
69
|
+
current = monotonic() if now is None else now
|
|
70
|
+
return max(0.0, current - state.started_at_monotonic)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def wall_warning_due(
|
|
74
|
+
config: BudgetConfig,
|
|
75
|
+
state: BudgetState,
|
|
76
|
+
*,
|
|
77
|
+
now: float | None = None,
|
|
78
|
+
) -> tuple[bool, float]:
|
|
79
|
+
if config.max_wall_seconds is None or state.wall_warning_emitted:
|
|
80
|
+
return False, wall_elapsed_seconds(state, now=now)
|
|
81
|
+
elapsed = wall_elapsed_seconds(state, now=now)
|
|
82
|
+
return elapsed >= config.max_wall_seconds * config.soft_warn_fraction, elapsed
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def check_wall_budget(
|
|
86
|
+
config: BudgetConfig,
|
|
87
|
+
state: BudgetState,
|
|
88
|
+
*,
|
|
89
|
+
now: float | None = None,
|
|
90
|
+
) -> float:
|
|
91
|
+
elapsed = wall_elapsed_seconds(state, now=now)
|
|
92
|
+
if config.max_wall_seconds is not None and elapsed >= config.max_wall_seconds:
|
|
93
|
+
raise BudgetExceeded(
|
|
94
|
+
ceiling="wall",
|
|
95
|
+
limit=f"{config.max_wall_seconds:g}",
|
|
96
|
+
actual=f"{elapsed:g}",
|
|
97
|
+
)
|
|
98
|
+
return elapsed
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def record_charge(
|
|
102
|
+
config: BudgetConfig,
|
|
103
|
+
state: BudgetState,
|
|
104
|
+
*,
|
|
105
|
+
tokens: int | None = None,
|
|
106
|
+
usd: Decimal | None = None,
|
|
107
|
+
) -> None:
|
|
108
|
+
if tokens is not None:
|
|
109
|
+
if tokens < 0:
|
|
110
|
+
raise ValueError("token charge must be 0 or greater")
|
|
111
|
+
state.tokens_reported = True
|
|
112
|
+
state.tokens_used += tokens
|
|
113
|
+
if usd is not None:
|
|
114
|
+
if usd < 0:
|
|
115
|
+
raise ValueError("USD charge must be 0 or greater")
|
|
116
|
+
state.usd_reported = True
|
|
117
|
+
state.usd_used += usd
|
|
118
|
+
if config.max_tokens is not None and state.tokens_used >= config.max_tokens:
|
|
119
|
+
raise BudgetExceeded(
|
|
120
|
+
ceiling="tokens",
|
|
121
|
+
limit=config.max_tokens,
|
|
122
|
+
actual=state.tokens_used,
|
|
123
|
+
)
|
|
124
|
+
if config.max_usd is not None and state.usd_used >= config.max_usd:
|
|
125
|
+
raise BudgetExceeded(
|
|
126
|
+
ceiling="usd",
|
|
127
|
+
limit=str(config.max_usd),
|
|
128
|
+
actual=str(state.usd_used),
|
|
129
|
+
)
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""Create deterministic, redacted RevRem bug-report bundles."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import gzip
|
|
6
|
+
import io
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import tarfile
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from code_review_loop import artifacts, redaction, suppressions
|
|
14
|
+
|
|
15
|
+
BUG_BUNDLE_SCHEMA_VERSION = "1.0"
|
|
16
|
+
MANIFEST_NAME = "bug-bundle.json"
|
|
17
|
+
DEFAULT_JSON_NAMES = {
|
|
18
|
+
"summary.json",
|
|
19
|
+
"diagnostics.json",
|
|
20
|
+
"events.jsonl",
|
|
21
|
+
"doctor.json",
|
|
22
|
+
"preflight.json",
|
|
23
|
+
"profile.json",
|
|
24
|
+
}
|
|
25
|
+
DEFAULT_TEXT_NAMES = {"profile.toml"}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class BundleOptions:
|
|
30
|
+
run_dir: Path
|
|
31
|
+
output_path: Path | None = None
|
|
32
|
+
include_raw_transcripts: bool = False
|
|
33
|
+
redact: bool = True
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class BundleResult:
|
|
38
|
+
output_path: Path
|
|
39
|
+
manifest: dict[str, object]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def create_bug_bundle(options: BundleOptions) -> BundleResult:
|
|
43
|
+
run_dir = options.run_dir.resolve()
|
|
44
|
+
if not run_dir.is_dir():
|
|
45
|
+
raise FileNotFoundError(f"run directory not found: {options.run_dir}")
|
|
46
|
+
|
|
47
|
+
run_id = _run_id(run_dir)
|
|
48
|
+
output_path = (options.output_path or _default_output_path(run_id, run_dir)).resolve()
|
|
49
|
+
entries: list[tuple[str, bytes]] = []
|
|
50
|
+
redaction_counts: dict[str, int] = {}
|
|
51
|
+
for path in _bundle_files(run_dir, include_raw_transcripts=options.include_raw_transcripts):
|
|
52
|
+
arcname = path.relative_to(run_dir).as_posix()
|
|
53
|
+
content = path.read_text(encoding="utf-8", errors="replace")
|
|
54
|
+
if options.redact:
|
|
55
|
+
result = redaction.redact_text(content)
|
|
56
|
+
content = result.text
|
|
57
|
+
_merge_counts(redaction_counts, redaction.redaction_summary(result))
|
|
58
|
+
entries.append((arcname, content.encode("utf-8")))
|
|
59
|
+
for path in _suppression_audit_paths(run_dir):
|
|
60
|
+
summary = suppressions.audit_summary(path)
|
|
61
|
+
if summary is None:
|
|
62
|
+
continue
|
|
63
|
+
arcname = f"suppressions/{path.stem}.summary.json"
|
|
64
|
+
content = json.dumps(summary, ensure_ascii=False, indent=2, sort_keys=True) + "\n"
|
|
65
|
+
entries.append((arcname, content.encode("utf-8")))
|
|
66
|
+
|
|
67
|
+
manifest = {
|
|
68
|
+
"schema_version": BUG_BUNDLE_SCHEMA_VERSION,
|
|
69
|
+
"run_id": run_id,
|
|
70
|
+
"source_run_dir_name": run_dir.name,
|
|
71
|
+
"include_raw_transcripts": options.include_raw_transcripts,
|
|
72
|
+
"redacted": options.redact,
|
|
73
|
+
"files": [arcname for arcname, _content in entries],
|
|
74
|
+
"suppression_audit_summaries": [
|
|
75
|
+
arcname for arcname, _content in entries if arcname.startswith("suppressions/")
|
|
76
|
+
],
|
|
77
|
+
"redaction_counts": redaction_counts,
|
|
78
|
+
}
|
|
79
|
+
manifest_bytes = (
|
|
80
|
+
json.dumps(
|
|
81
|
+
artifacts.canonicalize_json(manifest),
|
|
82
|
+
ensure_ascii=False,
|
|
83
|
+
indent=2,
|
|
84
|
+
sort_keys=True,
|
|
85
|
+
)
|
|
86
|
+
+ "\n"
|
|
87
|
+
).encode("utf-8")
|
|
88
|
+
|
|
89
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
tmp_path = output_path.with_suffix(".tar.gz.tmp")
|
|
91
|
+
try:
|
|
92
|
+
with (
|
|
93
|
+
tmp_path.open("wb") as raw,
|
|
94
|
+
gzip.GzipFile(fileobj=raw, mode="wb", filename="", mtime=0) as gz,
|
|
95
|
+
tarfile.open(fileobj=gz, mode="w") as tar,
|
|
96
|
+
):
|
|
97
|
+
_add_bytes(tar, MANIFEST_NAME, manifest_bytes)
|
|
98
|
+
for arcname, bundle_bytes in entries:
|
|
99
|
+
_add_bytes(tar, arcname, bundle_bytes)
|
|
100
|
+
os.replace(tmp_path, output_path)
|
|
101
|
+
except BaseException:
|
|
102
|
+
tmp_path.unlink(missing_ok=True)
|
|
103
|
+
raise
|
|
104
|
+
return BundleResult(output_path=output_path, manifest=manifest)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def default_output_path(run_dir: Path) -> Path:
|
|
108
|
+
return _default_output_path(_run_id(run_dir), run_dir)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _default_output_path(run_id: str, run_dir: Path) -> Path:
|
|
112
|
+
return Path.cwd() / f"revrem-bug-{_bundle_name_component(run_dir, run_id=run_id)}.tar.gz"
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _bundle_files(run_dir: Path, *, include_raw_transcripts: bool) -> list[Path]:
|
|
116
|
+
candidates = []
|
|
117
|
+
for path in run_dir.rglob("*"):
|
|
118
|
+
if path.is_symlink():
|
|
119
|
+
continue
|
|
120
|
+
if not path.is_file():
|
|
121
|
+
continue
|
|
122
|
+
relative = path.relative_to(run_dir).as_posix()
|
|
123
|
+
if relative == MANIFEST_NAME:
|
|
124
|
+
continue
|
|
125
|
+
if path.name in DEFAULT_JSON_NAMES or path.name.startswith("check-"):
|
|
126
|
+
candidates.append(path)
|
|
127
|
+
continue
|
|
128
|
+
if path.name in DEFAULT_TEXT_NAMES:
|
|
129
|
+
candidates.append(path)
|
|
130
|
+
continue
|
|
131
|
+
if path.name.startswith("review-") and path.name.endswith("-status.json"):
|
|
132
|
+
candidates.append(path)
|
|
133
|
+
continue
|
|
134
|
+
if path.name.startswith("diagnostics-") and path.suffix == ".json":
|
|
135
|
+
candidates.append(path)
|
|
136
|
+
continue
|
|
137
|
+
if path.name.startswith("triage-") and path.suffix == ".json":
|
|
138
|
+
candidates.append(path)
|
|
139
|
+
continue
|
|
140
|
+
if include_raw_transcripts and path.name == "suppressions.audit.jsonl":
|
|
141
|
+
candidates.append(path)
|
|
142
|
+
continue
|
|
143
|
+
if include_raw_transcripts and path.suffix == ".txt":
|
|
144
|
+
candidates.append(path)
|
|
145
|
+
return sorted(candidates, key=lambda item: item.relative_to(run_dir).as_posix())
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _add_bytes(tar: tarfile.TarFile, arcname: str, content: bytes) -> None:
|
|
149
|
+
info = tarfile.TarInfo(arcname)
|
|
150
|
+
info.size = len(content)
|
|
151
|
+
info.mtime = 0
|
|
152
|
+
info.mode = 0o644
|
|
153
|
+
info.uid = 0
|
|
154
|
+
info.gid = 0
|
|
155
|
+
info.uname = ""
|
|
156
|
+
info.gname = ""
|
|
157
|
+
tar.addfile(info, io.BytesIO(content))
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _run_id(run_dir: Path) -> str:
|
|
161
|
+
summary_path = run_dir / "summary.json"
|
|
162
|
+
if summary_path.is_file() and not summary_path.is_symlink():
|
|
163
|
+
try:
|
|
164
|
+
summary = json.loads(summary_path.read_text(encoding="utf-8"))
|
|
165
|
+
except (OSError, json.JSONDecodeError):
|
|
166
|
+
summary = {}
|
|
167
|
+
run_id = summary.get("run_id") if isinstance(summary, dict) else None
|
|
168
|
+
if isinstance(run_id, str):
|
|
169
|
+
return run_id
|
|
170
|
+
return run_dir.name
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _bundle_name_component(run_dir: Path, *, run_id: str | None = None) -> str:
|
|
174
|
+
run_id = run_id if run_id is not None else _run_id(run_dir)
|
|
175
|
+
if run_id:
|
|
176
|
+
candidate = Path(run_id).name
|
|
177
|
+
if candidate not in {"", ".", ".."}:
|
|
178
|
+
return candidate
|
|
179
|
+
if run_dir.name not in {"", ".", ".."}:
|
|
180
|
+
return run_dir.name
|
|
181
|
+
return "run"
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _suppression_audit_paths(run_dir: Path) -> list[Path]:
|
|
185
|
+
paths = []
|
|
186
|
+
for candidate in (
|
|
187
|
+
_owning_revrem_audit_path(run_dir),
|
|
188
|
+
suppressions.repo_audit_path(run_dir),
|
|
189
|
+
run_dir / ".revrem" / "suppressions.audit.jsonl",
|
|
190
|
+
):
|
|
191
|
+
if candidate.is_file() and not candidate.is_symlink():
|
|
192
|
+
paths.append(candidate)
|
|
193
|
+
return sorted(set(paths))
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _owning_revrem_audit_path(run_dir: Path) -> Path:
|
|
197
|
+
for ancestor in (run_dir, *run_dir.parents):
|
|
198
|
+
if ancestor.name == ".revrem":
|
|
199
|
+
return ancestor / "suppressions.audit.jsonl"
|
|
200
|
+
return run_dir / ".revrem" / "suppressions.audit.jsonl"
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _merge_counts(target: dict[str, int], source: dict[str, int]) -> None:
|
|
204
|
+
for key, value in source.items():
|
|
205
|
+
target[key] = target.get(key, 0) + value
|