git-agent-ratchet 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- git_agent_ratchet/__init__.py +5 -0
- git_agent_ratchet/__main__.py +3 -0
- git_agent_ratchet/_version.py +1 -0
- git_agent_ratchet/baseline.py +82 -0
- git_agent_ratchet/cli.py +53 -0
- git_agent_ratchet/hooks/__init__.py +1 -0
- git_agent_ratchet/hooks/anti_bypass.py +66 -0
- git_agent_ratchet/hooks/deny_agent_chatter.py +57 -0
- git_agent_ratchet/hooks/max_file_lines.py +128 -0
- git_agent_ratchet/hooks/no_duplicate_helpers.py +135 -0
- git_agent_ratchet/paths.py +18 -0
- git_agent_ratchet/py.typed +0 -0
- git_agent_ratchet/ratchets/__init__.py +1 -0
- git_agent_ratchet/ratchets/agent_chatter.py +79 -0
- git_agent_ratchet/ratchets/anti_bypass.py +100 -0
- git_agent_ratchet/ratchets/duplicate_helpers.py +89 -0
- git_agent_ratchet/ratchets/extractors/__init__.py +40 -0
- git_agent_ratchet/ratchets/extractors/csharp_ext.py +51 -0
- git_agent_ratchet/ratchets/extractors/python_ext.py +49 -0
- git_agent_ratchet/ratchets/extractors/typescript_ext.py +67 -0
- git_agent_ratchet/ratchets/max_file_lines.py +84 -0
- git_agent_ratchet-1.1.0.dist-info/METADATA +521 -0
- git_agent_ratchet-1.1.0.dist-info/RECORD +26 -0
- git_agent_ratchet-1.1.0.dist-info/WHEEL +4 -0
- git_agent_ratchet-1.1.0.dist-info/entry_points.txt +6 -0
- git_agent_ratchet-1.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.0"
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Baseline registry: load, validate, and persist the ratchet JSON state."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
SCHEMA_URL = "https://git-agent-ratchet.org/schemas/v1.json"
|
|
12
|
+
DEFAULT_AUTHOR = "git-agent-ratchet-core"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class Baseline:
|
|
17
|
+
"""In-memory view of a ratchet baseline registry file."""
|
|
18
|
+
|
|
19
|
+
path: Path
|
|
20
|
+
ratchet_meta: dict[str, Any] = field(default_factory=dict)
|
|
21
|
+
baselines: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def load(cls, path: Path) -> Baseline:
|
|
25
|
+
"""Load a baseline from disk; return an empty baseline if missing."""
|
|
26
|
+
if not path.exists():
|
|
27
|
+
return cls(path=path, ratchet_meta=_empty_meta(), baselines={})
|
|
28
|
+
with path.open("r", encoding="utf-8") as fh:
|
|
29
|
+
data = json.load(fh)
|
|
30
|
+
return cls(
|
|
31
|
+
path=path,
|
|
32
|
+
ratchet_meta=data.get("ratchet_meta", _empty_meta()),
|
|
33
|
+
baselines=data.get("baselines", {}),
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def get_metric(self, name: str) -> int | None:
|
|
37
|
+
"""Return the stored metric_value for a ratchet, or None if absent."""
|
|
38
|
+
entry = self.baselines.get(name)
|
|
39
|
+
if entry is None:
|
|
40
|
+
return None
|
|
41
|
+
value = entry.get("metric_value")
|
|
42
|
+
return int(value) if value is not None else None
|
|
43
|
+
|
|
44
|
+
def set_entry(
|
|
45
|
+
self,
|
|
46
|
+
name: str,
|
|
47
|
+
metric_value: int,
|
|
48
|
+
items: list[dict[str, Any]],
|
|
49
|
+
) -> None:
|
|
50
|
+
"""Replace the stored entry for a ratchet with a fresh metric and item list."""
|
|
51
|
+
self.baselines[name] = {
|
|
52
|
+
"metric_value": int(metric_value),
|
|
53
|
+
"items": items,
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
def save(self, repo_signature: str | None = None) -> None:
|
|
57
|
+
"""Persist the baseline to disk with stable, deterministic JSON formatting."""
|
|
58
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
meta = dict(self.ratchet_meta)
|
|
60
|
+
meta["last_updated_by"] = DEFAULT_AUTHOR
|
|
61
|
+
if repo_signature is not None:
|
|
62
|
+
meta["repo_signature"] = repo_signature
|
|
63
|
+
elif "repo_signature" not in meta:
|
|
64
|
+
meta["repo_signature"] = _signature_from_baselines(self.baselines)
|
|
65
|
+
payload = {
|
|
66
|
+
"$schema": SCHEMA_URL,
|
|
67
|
+
"ratchet_meta": meta,
|
|
68
|
+
"baselines": self.baselines,
|
|
69
|
+
}
|
|
70
|
+
text = json.dumps(payload, indent=2, sort_keys=False) + "\n"
|
|
71
|
+
self.path.write_text(text, encoding="utf-8")
|
|
72
|
+
self.ratchet_meta = meta
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _empty_meta() -> dict[str, Any]:
|
|
76
|
+
return {"repo_signature": "", "last_updated_by": DEFAULT_AUTHOR}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _signature_from_baselines(baselines: dict[str, dict[str, Any]]) -> str:
|
|
80
|
+
"""Derive a deterministic content signature for change detection."""
|
|
81
|
+
canonical = json.dumps(baselines, sort_keys=True).encode("utf-8")
|
|
82
|
+
return "sha256:" + hashlib.sha256(canonical).hexdigest()
|
git_agent_ratchet/cli.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Unified CLI dispatcher: `git-agent-ratchet <subcommand>`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from collections.abc import Sequence
|
|
7
|
+
|
|
8
|
+
from git_agent_ratchet._version import __version__
|
|
9
|
+
from git_agent_ratchet.hooks import (
|
|
10
|
+
anti_bypass,
|
|
11
|
+
deny_agent_chatter,
|
|
12
|
+
max_file_lines,
|
|
13
|
+
no_duplicate_helpers,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
SUBCOMMANDS = {
|
|
17
|
+
"no-duplicate-helpers": no_duplicate_helpers.main,
|
|
18
|
+
"deny-agent-chatter": deny_agent_chatter.main,
|
|
19
|
+
"anti-bypass": anti_bypass.main,
|
|
20
|
+
"max-file-lines": max_file_lines.main,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
25
|
+
parser = argparse.ArgumentParser(
|
|
26
|
+
prog="git-agent-ratchet",
|
|
27
|
+
description=(
|
|
28
|
+
"git-agent-ratchet: deterministic git ratchets for guarding against agent drift."
|
|
29
|
+
),
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"subcommand",
|
|
34
|
+
choices=sorted(SUBCOMMANDS),
|
|
35
|
+
help="The ratchet to invoke.",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"args",
|
|
39
|
+
nargs=argparse.REMAINDER,
|
|
40
|
+
help="Arguments forwarded to the chosen subcommand.",
|
|
41
|
+
)
|
|
42
|
+
return parser
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
46
|
+
parser = build_parser()
|
|
47
|
+
args = parser.parse_args(argv)
|
|
48
|
+
handler = SUBCOMMANDS[args.subcommand]
|
|
49
|
+
return handler(args.args)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
if __name__ == "__main__": # pragma: no cover
|
|
53
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Pre-commit hook entry points and the unified CLI dispatcher."""
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"""Hook: ratchet-anti-bypass (Ratchet C)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Sequence
|
|
8
|
+
|
|
9
|
+
from git_agent_ratchet.ratchets.anti_bypass import BYPASS_KEY_ENV, evaluate
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
13
|
+
parser = argparse.ArgumentParser(
|
|
14
|
+
prog="ratchet-anti-bypass",
|
|
15
|
+
description=(
|
|
16
|
+
"Fail when an automated process attempts to mutate protected ratchet "
|
|
17
|
+
"configuration files without the human bypass key."
|
|
18
|
+
),
|
|
19
|
+
)
|
|
20
|
+
parser.add_argument(
|
|
21
|
+
"--enforce-files",
|
|
22
|
+
required=True,
|
|
23
|
+
help=(
|
|
24
|
+
"Comma-separated list of repo-relative file paths that may only be "
|
|
25
|
+
"mutated when HUMAN_RATCHET_BYPASS_KEY is set."
|
|
26
|
+
),
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"filenames",
|
|
30
|
+
nargs="*",
|
|
31
|
+
help="Staged files supplied by pre-commit.",
|
|
32
|
+
)
|
|
33
|
+
return parser
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _split_enforce_files(value: str) -> list[str]:
|
|
37
|
+
return [item.strip() for item in value.split(",") if item.strip()]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
41
|
+
parser = build_parser()
|
|
42
|
+
args = parser.parse_args(argv)
|
|
43
|
+
|
|
44
|
+
protected = _split_enforce_files(args.enforce_files)
|
|
45
|
+
decision = evaluate(staged_files=args.filenames, protected_files=protected)
|
|
46
|
+
|
|
47
|
+
if not decision.blocked:
|
|
48
|
+
return 0
|
|
49
|
+
|
|
50
|
+
print("[ratchet] anti_bypass: GATE TRIPPED.", file=sys.stderr)
|
|
51
|
+
print(f" reason: {decision.reason}", file=sys.stderr)
|
|
52
|
+
print(" protected files in this commit:", file=sys.stderr)
|
|
53
|
+
for path in decision.touched_protected_files:
|
|
54
|
+
print(f" - {path}", file=sys.stderr)
|
|
55
|
+
if decision.agent_signal:
|
|
56
|
+
print(f" agent signal: {decision.agent_signal}", file=sys.stderr)
|
|
57
|
+
print(
|
|
58
|
+
f" Fix: a human operator must export {BYPASS_KEY_ENV}=<value> in their "
|
|
59
|
+
f"shell and re-run the commit. Agents must not set this variable.",
|
|
60
|
+
file=sys.stderr,
|
|
61
|
+
)
|
|
62
|
+
return 1
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
if __name__ == "__main__": # pragma: no cover
|
|
66
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Hook: ratchet-deny-agent-chatter (Ratchet B)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Sequence
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from git_agent_ratchet.ratchets.agent_chatter import scan_files
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
14
|
+
parser = argparse.ArgumentParser(
|
|
15
|
+
prog="ratchet-deny-agent-chatter",
|
|
16
|
+
description=(
|
|
17
|
+
"Fail when staged files contain conversational agent-chatter artifacts "
|
|
18
|
+
"(e.g. 'Sure, I can help with...', 'As an AI, ...')." # ratchet-allow: agent_chatter
|
|
19
|
+
),
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"filenames",
|
|
23
|
+
nargs="*",
|
|
24
|
+
help="Files supplied by pre-commit. Each file is scanned for chatter signatures.",
|
|
25
|
+
)
|
|
26
|
+
return parser
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
30
|
+
parser = build_parser()
|
|
31
|
+
args = parser.parse_args(argv)
|
|
32
|
+
|
|
33
|
+
paths = [Path(f) for f in args.filenames]
|
|
34
|
+
matches = scan_files(paths)
|
|
35
|
+
|
|
36
|
+
if not matches:
|
|
37
|
+
return 0
|
|
38
|
+
|
|
39
|
+
print("[ratchet] agent_chatter: GATE TRIPPED.", file=sys.stderr)
|
|
40
|
+
print(
|
|
41
|
+
" Conversational agent artifacts detected in the following staged files:",
|
|
42
|
+
file=sys.stderr,
|
|
43
|
+
)
|
|
44
|
+
for match in matches:
|
|
45
|
+
print(
|
|
46
|
+
f" {match.file}:{match.line_number} [{match.signature}] {match.line}",
|
|
47
|
+
file=sys.stderr,
|
|
48
|
+
)
|
|
49
|
+
print(
|
|
50
|
+
" Fix: remove the conversational preamble/postscript and re-stage the file.",
|
|
51
|
+
file=sys.stderr,
|
|
52
|
+
)
|
|
53
|
+
return 1
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__": # pragma: no cover
|
|
57
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Hook: ratchet-max-file-lines (Ratchet D)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Sequence
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from git_agent_ratchet.baseline import Baseline
|
|
11
|
+
from git_agent_ratchet.ratchets.max_file_lines import (
|
|
12
|
+
DEFAULT_MAX_LINES,
|
|
13
|
+
RATCHET_NAME,
|
|
14
|
+
OversizedFile,
|
|
15
|
+
metric_value,
|
|
16
|
+
scan_directory,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
21
|
+
parser = argparse.ArgumentParser(
|
|
22
|
+
prog="ratchet-max-file-lines",
|
|
23
|
+
description=(
|
|
24
|
+
"Fail when the total line overage across over-sized source files "
|
|
25
|
+
"exceeds the recorded baseline. Shrinks are recorded automatically "
|
|
26
|
+
"and staged back into the commit."
|
|
27
|
+
),
|
|
28
|
+
)
|
|
29
|
+
parser.add_argument(
|
|
30
|
+
"--baseline",
|
|
31
|
+
type=Path,
|
|
32
|
+
default=Path("config/ratchets/file_lines.json"),
|
|
33
|
+
help="Path to the JSON baseline registry file.",
|
|
34
|
+
)
|
|
35
|
+
parser.add_argument(
|
|
36
|
+
"--dir",
|
|
37
|
+
dest="directory",
|
|
38
|
+
type=Path,
|
|
39
|
+
default=Path("src"),
|
|
40
|
+
help="Directory tree to scan for over-sized files.",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument(
|
|
43
|
+
"--max",
|
|
44
|
+
dest="max_lines",
|
|
45
|
+
type=int,
|
|
46
|
+
default=DEFAULT_MAX_LINES,
|
|
47
|
+
help=f"Per-file line-count limit (default: {DEFAULT_MAX_LINES}).",
|
|
48
|
+
)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
"--exclude",
|
|
51
|
+
action="append",
|
|
52
|
+
default=None,
|
|
53
|
+
help="Directory name to exclude (repeatable). Defaults to tests/test.",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"filenames",
|
|
57
|
+
nargs="*",
|
|
58
|
+
help="Files supplied by pre-commit (ignored; full directory scan is used).",
|
|
59
|
+
)
|
|
60
|
+
return parser
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _emit_oversized(oversized: list[OversizedFile], max_lines: int) -> str:
|
|
64
|
+
if not oversized:
|
|
65
|
+
return " (none)"
|
|
66
|
+
lines = []
|
|
67
|
+
for f in oversized:
|
|
68
|
+
lines.append(f" - {f.path}: {f.line_count} lines (+{f.overage} over {max_lines})")
|
|
69
|
+
return "\n".join(lines)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
73
|
+
parser = build_parser()
|
|
74
|
+
args = parser.parse_args(argv)
|
|
75
|
+
|
|
76
|
+
exclude = tuple(args.exclude) if args.exclude else ("tests", "test")
|
|
77
|
+
oversized = scan_directory(args.directory, max_lines=args.max_lines, exclude_dirs=exclude)
|
|
78
|
+
current = metric_value(oversized)
|
|
79
|
+
|
|
80
|
+
baseline = Baseline.load(args.baseline)
|
|
81
|
+
recorded = baseline.get_metric(RATCHET_NAME)
|
|
82
|
+
|
|
83
|
+
if recorded is None:
|
|
84
|
+
baseline.set_entry(
|
|
85
|
+
name=RATCHET_NAME,
|
|
86
|
+
metric_value=current,
|
|
87
|
+
items=[f.to_dict() for f in oversized],
|
|
88
|
+
)
|
|
89
|
+
baseline.save()
|
|
90
|
+
print(
|
|
91
|
+
f"[ratchet] {RATCHET_NAME}: seeded baseline at {args.baseline} "
|
|
92
|
+
f"(metric_value={current})."
|
|
93
|
+
)
|
|
94
|
+
return 0
|
|
95
|
+
|
|
96
|
+
if current > recorded:
|
|
97
|
+
print(
|
|
98
|
+
f"[ratchet] {RATCHET_NAME}: GATE TRIPPED.\n"
|
|
99
|
+
f" baseline overage = {recorded}\n"
|
|
100
|
+
f" current overage = {current}\n"
|
|
101
|
+
f" delta = +{current - recorded}\n"
|
|
102
|
+
f" over-sized files (max={args.max_lines}):\n"
|
|
103
|
+
f"{_emit_oversized(oversized, args.max_lines)}\n"
|
|
104
|
+
f" Rule: per-file line counts may not grow past their recorded baseline.\n"
|
|
105
|
+
f" Fix: split the file into focused modules, or extract a helper into "
|
|
106
|
+
f"an existing module that already owns the concept.",
|
|
107
|
+
file=sys.stderr,
|
|
108
|
+
)
|
|
109
|
+
return 1
|
|
110
|
+
|
|
111
|
+
if current < recorded:
|
|
112
|
+
baseline.set_entry(
|
|
113
|
+
name=RATCHET_NAME,
|
|
114
|
+
metric_value=current,
|
|
115
|
+
items=[f.to_dict() for f in oversized],
|
|
116
|
+
)
|
|
117
|
+
baseline.save()
|
|
118
|
+
print(
|
|
119
|
+
f"[ratchet] {RATCHET_NAME}: baseline ratcheted down "
|
|
120
|
+
f"({recorded} -> {current}); registry restaged."
|
|
121
|
+
)
|
|
122
|
+
return 0
|
|
123
|
+
|
|
124
|
+
return 0
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
if __name__ == "__main__": # pragma: no cover
|
|
128
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Hook: ratchet-no-duplicate-helpers (Ratchet A)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
from collections.abc import Sequence
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from git_agent_ratchet.baseline import Baseline
|
|
11
|
+
from git_agent_ratchet.ratchets.duplicate_helpers import (
|
|
12
|
+
RATCHET_NAME,
|
|
13
|
+
DuplicateHelper,
|
|
14
|
+
metric_value,
|
|
15
|
+
scan_directory,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
20
|
+
parser = argparse.ArgumentParser(
|
|
21
|
+
prog="ratchet-no-duplicate-helpers",
|
|
22
|
+
description=(
|
|
23
|
+
"Fail when the count of duplicate private helper functions across the "
|
|
24
|
+
"target directory exceeds the recorded baseline. Shrinks are recorded "
|
|
25
|
+
"automatically and staged back into the commit."
|
|
26
|
+
),
|
|
27
|
+
)
|
|
28
|
+
parser.add_argument(
|
|
29
|
+
"--baseline",
|
|
30
|
+
type=Path,
|
|
31
|
+
default=Path("config/ratchets/duplicates.json"),
|
|
32
|
+
help="Path to the JSON baseline registry file.",
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--dir",
|
|
36
|
+
dest="directory",
|
|
37
|
+
type=Path,
|
|
38
|
+
default=Path("src"),
|
|
39
|
+
help="Directory tree to scan for duplicate helpers.",
|
|
40
|
+
)
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"--exclude",
|
|
43
|
+
action="append",
|
|
44
|
+
default=None,
|
|
45
|
+
help="Directory name to exclude (repeatable). Defaults to tests/test.",
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--lang",
|
|
49
|
+
dest="languages",
|
|
50
|
+
action="append",
|
|
51
|
+
default=None,
|
|
52
|
+
choices=["python", "typescript", "csharp"],
|
|
53
|
+
help=(
|
|
54
|
+
"Restrict scanning to one or more languages (repeatable). "
|
|
55
|
+
"Default: all registered extractors."
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"filenames",
|
|
60
|
+
nargs="*",
|
|
61
|
+
help="Files supplied by pre-commit (ignored; full directory scan is used).",
|
|
62
|
+
)
|
|
63
|
+
return parser
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _emit_duplicates(duplicates: list[DuplicateHelper]) -> str:
|
|
67
|
+
if not duplicates:
|
|
68
|
+
return " (none)"
|
|
69
|
+
lines = []
|
|
70
|
+
for dup in duplicates:
|
|
71
|
+
occ = ", ".join(dup.occurrences)
|
|
72
|
+
lines.append(f" - {dup.name} -> [{occ}]")
|
|
73
|
+
return "\n".join(lines)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
77
|
+
parser = build_parser()
|
|
78
|
+
args = parser.parse_args(argv)
|
|
79
|
+
|
|
80
|
+
exclude = tuple(args.exclude) if args.exclude else ("tests", "test")
|
|
81
|
+
duplicates = scan_directory(
|
|
82
|
+
args.directory,
|
|
83
|
+
exclude_dirs=exclude,
|
|
84
|
+
languages=args.languages,
|
|
85
|
+
)
|
|
86
|
+
current = metric_value(duplicates)
|
|
87
|
+
|
|
88
|
+
baseline = Baseline.load(args.baseline)
|
|
89
|
+
recorded = baseline.get_metric(RATCHET_NAME)
|
|
90
|
+
|
|
91
|
+
if recorded is None:
|
|
92
|
+
baseline.set_entry(
|
|
93
|
+
name=RATCHET_NAME,
|
|
94
|
+
metric_value=current,
|
|
95
|
+
items=[d.to_dict() for d in duplicates],
|
|
96
|
+
)
|
|
97
|
+
baseline.save()
|
|
98
|
+
print(
|
|
99
|
+
f"[ratchet] {RATCHET_NAME}: seeded baseline at {args.baseline} "
|
|
100
|
+
f"(metric_value={current})."
|
|
101
|
+
)
|
|
102
|
+
return 0
|
|
103
|
+
|
|
104
|
+
if current > recorded:
|
|
105
|
+
print(
|
|
106
|
+
f"[ratchet] {RATCHET_NAME}: GATE TRIPPED.\n"
|
|
107
|
+
f" baseline metric_value = {recorded}\n"
|
|
108
|
+
f" current metric_value = {current}\n"
|
|
109
|
+
f" delta = +{current - recorded}\n"
|
|
110
|
+
f" duplicates now present:\n{_emit_duplicates(duplicates)}\n"
|
|
111
|
+
f" Rule: duplicate-helper occurrences are not permitted to grow.\n"
|
|
112
|
+
f" Fix: reuse the existing helper instead of forking a new one, "
|
|
113
|
+
f"or rename the new function so it's not a private helper.",
|
|
114
|
+
file=sys.stderr,
|
|
115
|
+
)
|
|
116
|
+
return 1
|
|
117
|
+
|
|
118
|
+
if current < recorded:
|
|
119
|
+
baseline.set_entry(
|
|
120
|
+
name=RATCHET_NAME,
|
|
121
|
+
metric_value=current,
|
|
122
|
+
items=[d.to_dict() for d in duplicates],
|
|
123
|
+
)
|
|
124
|
+
baseline.save()
|
|
125
|
+
print(
|
|
126
|
+
f"[ratchet] {RATCHET_NAME}: baseline ratcheted down "
|
|
127
|
+
f"({recorded} -> {current}); registry restaged."
|
|
128
|
+
)
|
|
129
|
+
return 0
|
|
130
|
+
|
|
131
|
+
return 0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
if __name__ == "__main__": # pragma: no cover
|
|
135
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Shared path utilities used by the ratchet scanners."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def relative_posix(path: Path, anchor: Path) -> str:
|
|
9
|
+
"""Return ``path`` as a posix string, relative to ``anchor`` when possible.
|
|
10
|
+
|
|
11
|
+
Falls back to the raw path string if ``path`` does not sit under ``anchor``
|
|
12
|
+
(which happens when callers pass an unrelated working directory).
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
rel = path.resolve().relative_to(anchor.resolve())
|
|
16
|
+
except ValueError:
|
|
17
|
+
rel = path
|
|
18
|
+
return rel.as_posix()
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Ratchet modules: scanners that produce structural metrics for the registry."""
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""Ratchet B: lexical detection of agent-chatter artifacts leaking into files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
RATCHET_NAME = "agent_chatter"
|
|
11
|
+
|
|
12
|
+
# A line containing this literal substring is skipped by the scanner.
|
|
13
|
+
# Lets repos that legitimately quote chatter (this very codebase, security
|
|
14
|
+
# regression tests, docs explaining the rule) opt out per-line, the same
|
|
15
|
+
# way ruff lets you opt out with a noqa comment.
|
|
16
|
+
ALLOW_MARKER = "ratchet-allow: agent_chatter"
|
|
17
|
+
|
|
18
|
+
# Regex signatures sourced directly from the spec table. Each entry pairs a
|
|
19
|
+
# compiled pattern with a human-readable label used in failure output.
|
|
20
|
+
CHATTER_SIGNATURES: tuple[tuple[str, re.Pattern[str]], ...] = (
|
|
21
|
+
(
|
|
22
|
+
"sure-i-can-help-with",
|
|
23
|
+
re.compile(r"(?i)(sure,\s)?i\scan\shelp\swith"),
|
|
24
|
+
),
|
|
25
|
+
(
|
|
26
|
+
"as-an-ai",
|
|
27
|
+
re.compile(r"(?i)as\san\sai,\s(i\s)?"),
|
|
28
|
+
),
|
|
29
|
+
(
|
|
30
|
+
"i-have-successfully",
|
|
31
|
+
re.compile(r"(?i)i\shave\ssuccessfully\s(modified|updated)"),
|
|
32
|
+
),
|
|
33
|
+
(
|
|
34
|
+
"now-let-me-check",
|
|
35
|
+
re.compile(r"(?i)now\slet\sme\scheck\sthe\s(docs|dir)"),
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class ChatterMatch:
|
|
42
|
+
"""A single line that matched one of the chatter signatures."""
|
|
43
|
+
|
|
44
|
+
file: str
|
|
45
|
+
line_number: int
|
|
46
|
+
signature: str
|
|
47
|
+
line: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def scan_text(text: str, file_label: str) -> list[ChatterMatch]:
|
|
51
|
+
"""Scan text and return every line that matches any chatter signature."""
|
|
52
|
+
matches: list[ChatterMatch] = []
|
|
53
|
+
for line_number, raw_line in enumerate(text.splitlines(), start=1):
|
|
54
|
+
if ALLOW_MARKER in raw_line:
|
|
55
|
+
continue
|
|
56
|
+
for signature, pattern in CHATTER_SIGNATURES:
|
|
57
|
+
if pattern.search(raw_line):
|
|
58
|
+
matches.append(
|
|
59
|
+
ChatterMatch(
|
|
60
|
+
file=file_label,
|
|
61
|
+
line_number=line_number,
|
|
62
|
+
signature=signature,
|
|
63
|
+
line=raw_line.rstrip(),
|
|
64
|
+
)
|
|
65
|
+
)
|
|
66
|
+
break
|
|
67
|
+
return matches
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def scan_files(paths: Iterable[Path]) -> list[ChatterMatch]:
|
|
71
|
+
"""Scan each file in paths; silently skip unreadable or binary files."""
|
|
72
|
+
matches: list[ChatterMatch] = []
|
|
73
|
+
for path in paths:
|
|
74
|
+
try:
|
|
75
|
+
text = path.read_text(encoding="utf-8")
|
|
76
|
+
except (OSError, UnicodeDecodeError):
|
|
77
|
+
continue
|
|
78
|
+
matches.extend(scan_text(text, file_label=str(path)))
|
|
79
|
+
return matches
|