agent-write-gate 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
agentgate/config.py ADDED
@@ -0,0 +1,171 @@
1
+ """config.py -- Load agentgate configuration.
2
+
3
+ Looks for config in order:
4
+ 1. Explicit path (--config flag)
5
+ 2. agentgate.toml in current directory
6
+ 3. [tool.agentgate] in pyproject.toml in current directory
7
+ 4. Built-in defaults
8
+
9
+ Uses tomllib (Python 3.11+) when available; falls back to defaults on older
10
+ Python (a minimal TOML subset parser is NOT implemented -- just use defaults).
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import sys
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+ from typing import Any, Dict, List, Optional
19
+
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # Default values (mirrors section 4 of the design doc)
23
+ # ---------------------------------------------------------------------------
24
+
25
+ _DEFAULT_CODE_EXTENSIONS = [
26
+ ".py", ".js", ".ts", ".go", ".rs", ".java",
27
+ ".c", ".cpp", ".rb", ".php", ".sh", ".sql",
28
+ ]
29
+
30
+
31
+ @dataclass
32
+ class PolicyConfig:
33
+ high: str = "block"
34
+ medium: str = "warn"
35
+ low: str = "ignore"
36
+
37
+
38
+ @dataclass
39
+ class CjkConfig:
40
+ enabled: bool = False
41
+ min_confidence: str = "high"
42
+
43
+
44
+ @dataclass
45
+ class UnicodeConfig:
46
+ enabled: bool = True
47
+ homoglyph: bool = False
48
+ strict_zerowidth: bool = False
49
+ allow_bidi_suppression: bool = False
50
+ code_extensions: List[str] = field(default_factory=lambda: list(_DEFAULT_CODE_EXTENSIONS))
51
+
52
+
53
+ @dataclass
54
+ class GateConfig:
55
+ cjk: CjkConfig = field(default_factory=CjkConfig)
56
+ unicode: UnicodeConfig = field(default_factory=UnicodeConfig)
57
+ policy: PolicyConfig = field(default_factory=PolicyConfig)
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # TOML loading (stdlib tomllib on 3.11+; graceful default otherwise)
62
+ # ---------------------------------------------------------------------------
63
+
64
+ class ConfigError(Exception):
65
+ """Raised when a config file exists but cannot be parsed."""
66
+
67
+
68
+ def _load_toml(path: Path) -> Optional[Dict[str, Any]]:
69
+ """Load a TOML file.
70
+
71
+ Returns None if the file is absent. Raises ConfigError if the file exists
72
+ but cannot be parsed (so a broken config is never silently ignored).
73
+ """
74
+ if not path.exists():
75
+ return None
76
+
77
+ if sys.version_info >= (3, 11):
78
+ import tomllib as _toml # type: ignore
79
+ decode_error = _toml.TOMLDecodeError
80
+ else:
81
+ try:
82
+ import tomli as _toml # type: ignore
83
+ except ImportError as exc: # pragma: no cover - tomli is a declared dep
84
+ raise ConfigError(
85
+ f"{path}: parsing TOML on Python < 3.11 requires the 'tomli' "
86
+ "package (install agent-write-gate, which depends on it)."
87
+ ) from exc
88
+ decode_error = _toml.TOMLDecodeError
89
+
90
+ try:
91
+ with open(path, "rb") as fh:
92
+ return _toml.load(fh)
93
+ except decode_error as exc:
94
+ raise ConfigError(f"{path}: invalid TOML: {exc}") from exc
95
+
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # Config builder
99
+ # ---------------------------------------------------------------------------
100
+
101
+ def _apply_dict(cfg: GateConfig, d: Dict[str, Any]) -> None:
102
+ """Apply a parsed TOML dict onto a GateConfig (mutates in place)."""
103
+ checks = d.get("checks", {})
104
+ if isinstance(checks, dict):
105
+ # Top-level boolean toggles
106
+ if "cjk" in checks and isinstance(checks["cjk"], bool):
107
+ cfg.cjk.enabled = checks["cjk"]
108
+ if "unicode" in checks and isinstance(checks["unicode"], bool):
109
+ cfg.unicode.enabled = checks["unicode"]
110
+
111
+ # Nested [checks.cjk]
112
+ cjk_sub = checks.get("cjk")
113
+ if isinstance(cjk_sub, dict):
114
+ cfg.cjk.enabled = True # sub-table presence implies enabled
115
+ if "enabled" in cjk_sub:
116
+ cfg.cjk.enabled = bool(cjk_sub["enabled"])
117
+ if "min_confidence" in cjk_sub:
118
+ cfg.cjk.min_confidence = str(cjk_sub["min_confidence"])
119
+
120
+ # Nested [checks.unicode]
121
+ uni_sub = checks.get("unicode")
122
+ if isinstance(uni_sub, dict):
123
+ if "enabled" in uni_sub:
124
+ cfg.unicode.enabled = bool(uni_sub["enabled"])
125
+ if "homoglyph" in uni_sub:
126
+ cfg.unicode.homoglyph = bool(uni_sub["homoglyph"])
127
+ if "strict_zerowidth" in uni_sub:
128
+ cfg.unicode.strict_zerowidth = bool(uni_sub["strict_zerowidth"])
129
+ if "allow_bidi_suppression" in uni_sub:
130
+ cfg.unicode.allow_bidi_suppression = bool(uni_sub["allow_bidi_suppression"])
131
+ if "code_extensions" in uni_sub and isinstance(uni_sub["code_extensions"], list):
132
+ cfg.unicode.code_extensions = list(uni_sub["code_extensions"])
133
+
134
+ policy = d.get("policy", {})
135
+ if isinstance(policy, dict):
136
+ if "high" in policy:
137
+ cfg.policy.high = str(policy["high"])
138
+ if "medium" in policy:
139
+ cfg.policy.medium = str(policy["medium"])
140
+ if "low" in policy:
141
+ cfg.policy.low = str(policy["low"])
142
+
143
+
144
+ def load_config(explicit_path: Optional[Path] = None) -> GateConfig:
145
+ """Load configuration, returning GateConfig with defaults for any missing keys."""
146
+ cfg = GateConfig()
147
+
148
+ raw: Optional[Dict[str, Any]] = None
149
+
150
+ if explicit_path is not None:
151
+ raw = _load_toml(explicit_path)
152
+ if raw is not None:
153
+ _apply_dict(cfg, raw)
154
+ return cfg
155
+
156
+ # Try agentgate.toml
157
+ ag_toml = Path("agentgate.toml")
158
+ raw = _load_toml(ag_toml)
159
+ if raw is not None:
160
+ _apply_dict(cfg, raw)
161
+ return cfg
162
+
163
+ # Try [tool.agentgate] in pyproject.toml
164
+ pyproject = Path("pyproject.toml")
165
+ raw = _load_toml(pyproject)
166
+ if raw is not None:
167
+ tool_section = raw.get("tool", {}).get("agentgate")
168
+ if isinstance(tool_section, dict):
169
+ _apply_dict(cfg, tool_section)
170
+
171
+ return cfg
agentgate/model.py ADDED
@@ -0,0 +1,34 @@
1
+ """model.py -- Core dataclasses for agentgate.
2
+
3
+ WriteEvent: normalized representation of an agent write action.
4
+ Issue: a single finding from a check.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+
11
+
12
+ @dataclass
13
+ class WriteEvent:
14
+ """Normalized agent write event extracted from a hook payload."""
15
+
16
+ agent: str # "claude-code" | "codex" | "generic"
17
+ phase: str # "pre" | "post" | "unknown"
18
+ tool: str # "Write" | "Edit" | "apply_patch" | "unknown"
19
+ file_path: str # best-effort; "<stdin>" if absent
20
+ content: str # the added/written text to inspect
21
+
22
+
23
+ @dataclass
24
+ class Issue:
25
+ """A single finding from a check run."""
26
+
27
+ check: str # "cjk" | "unicode"
28
+ rule_id: str # "MH001" | "AG-BIDI" | "AG-INVIS" | "AG-HOMO"
29
+ severity: str # "high" | "medium" | "low"
30
+ line: int
31
+ col: int
32
+ message: str
33
+ excerpt: str
34
+ suggestion: str = field(default="")
agentgate/policy.py ADDED
@@ -0,0 +1,94 @@
1
+ """policy.py -- Severity-to-action mapping and suppression rules.
2
+
3
+ Policy:
4
+ high -> block (exit 2)
5
+ medium -> warn (exit 0, logged)
6
+ low -> ignore
7
+
8
+ Suppression:
9
+ Only rule-specific directives: `agentgate: ignore[AG-INVIS]`
10
+ NO bare `agentgate: ignore` (that would let models launder violations).
11
+ AG-BIDI is never suppressible unless cfg.unicode.allow_bidi_suppression is True.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from typing import List, Optional, Set, TYPE_CHECKING
18
+
19
+ if TYPE_CHECKING:
20
+ from .model import Issue
21
+ from .config import GateConfig
22
+
23
+ # Pattern: agentgate: ignore[RULE1,RULE2] (with optional whitespace)
24
+ _SUPPRESS_RE = re.compile(r"agentgate:\s*ignore\[([^\]]+)\]")
25
+
26
+
27
+ def _parse_suppression(line_text: str) -> Optional[Set[str]]:
28
+ """Return set of suppressed rule IDs found in this line, or None if none."""
29
+ m = _SUPPRESS_RE.search(line_text)
30
+ if not m:
31
+ return None
32
+ rules_str = m.group(1)
33
+ rules = {r.strip() for r in rules_str.split(",") if r.strip()}
34
+ return rules if rules else None
35
+
36
+
37
+ def _severity_to_action(severity: str, cfg: "GateConfig") -> str:
38
+ """Map a severity string to an action string."""
39
+ if severity == "high":
40
+ return cfg.policy.high
41
+ if severity == "medium":
42
+ return cfg.policy.medium
43
+ if severity == "low":
44
+ return cfg.policy.low
45
+ return "ignore"
46
+
47
+
48
+ def apply_suppression(
49
+ issues: List["Issue"],
50
+ content: str,
51
+ cfg: "GateConfig",
52
+ ) -> List["Issue"]:
53
+ """Filter issues based on per-line suppression directives.
54
+
55
+ Returns issues that survive suppression.
56
+ """
57
+ lines = content.splitlines()
58
+ # Build line->suppressed_rules map
59
+ suppression_map: dict = {}
60
+ for i, line in enumerate(lines, start=1):
61
+ rules = _parse_suppression(line)
62
+ if rules is not None:
63
+ suppression_map[i] = rules
64
+
65
+ surviving: List["Issue"] = []
66
+ for issue in issues:
67
+ suppressed_rules = suppression_map.get(issue.line)
68
+ if suppressed_rules is not None and issue.rule_id in suppressed_rules:
69
+ # AG-BIDI is only suppressible if explicitly allowed
70
+ if issue.rule_id == "AG-BIDI" and not cfg.unicode.allow_bidi_suppression:
71
+ surviving.append(issue) # cannot suppress AG-BIDI
72
+ else:
73
+ continue # suppressed
74
+ else:
75
+ surviving.append(issue)
76
+
77
+ return surviving
78
+
79
+
80
+ def decide_block(issues: List["Issue"], cfg: "GateConfig") -> bool:
81
+ """Return True if any issue maps to 'block' action."""
82
+ for issue in issues:
83
+ action = _severity_to_action(issue.severity, cfg)
84
+ if action == "block":
85
+ return True
86
+ return False
87
+
88
+
89
+ def filter_actionable(issues: List["Issue"], cfg: "GateConfig") -> List["Issue"]:
90
+ """Return only issues that are not 'ignore'."""
91
+ return [
92
+ issue for issue in issues
93
+ if _severity_to_action(issue.severity, cfg) != "ignore"
94
+ ]
agentgate/registry.py ADDED
@@ -0,0 +1,61 @@
1
+ """registry.py -- Check registry for agentgate.
2
+
3
+ Built-in checks are registered at import time.
4
+ Third-party checks can be added via register(name, fn).
5
+
6
+ A check function has signature:
7
+ def run(event: WriteEvent, cfg: GateConfig) -> List[Issue]: ...
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from typing import Callable, Dict, List, TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from .model import WriteEvent, Issue
16
+ from .config import GateConfig
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Registry storage
20
+ # ---------------------------------------------------------------------------
21
+
22
+ _REGISTRY: Dict[str, Callable] = {}
23
+
24
+
25
+ def register(name: str, fn: Callable) -> None:
26
+ """Register a check function under the given name."""
27
+ _REGISTRY[name] = fn
28
+
29
+
30
+ def get_enabled(cfg: "GateConfig") -> List[tuple]:
31
+ """Return list of (name, fn) pairs for all enabled checks."""
32
+ enabled = []
33
+ if cfg.unicode.enabled and "unicode" in _REGISTRY:
34
+ enabled.append(("unicode", _REGISTRY["unicode"]))
35
+ if cfg.cjk.enabled and "cjk" in _REGISTRY:
36
+ enabled.append(("cjk", _REGISTRY["cjk"]))
37
+ # Any additional registered checks (third-party)
38
+ for name, fn in _REGISTRY.items():
39
+ if name not in ("unicode", "cjk"):
40
+ enabled.append((name, fn))
41
+ return enabled
42
+
43
+
44
+ def get_all() -> Dict[str, Callable]:
45
+ """Return a copy of the full registry."""
46
+ return dict(_REGISTRY)
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # Register built-ins at import time
51
+ # ---------------------------------------------------------------------------
52
+
53
+ def _register_builtins() -> None:
54
+ from .checks.unicode_safety import run as unicode_run
55
+ register("unicode", unicode_run)
56
+
57
+ from .checks.cjk import run as cjk_run
58
+ register("cjk", cjk_run)
59
+
60
+
61
+ _register_builtins()
agentgate/report.py ADDED
@@ -0,0 +1,247 @@
1
+ """report.py -- Output formatters for agentgate.
2
+
3
+ block_report(issues) -> str -- model-readable block message for stderr
4
+ format_tty / format_json / format_sarif -- scan output formatters
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import sys
12
+ from typing import List, TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from .model import Issue
16
+
17
+ # ---------------------------------------------------------------------------
18
+ # Color helpers
19
+ # ---------------------------------------------------------------------------
20
+
21
+ _RESET = "\033[0m"
22
+ _BOLD = "\033[1m"
23
+ _RED = "\033[31m"
24
+ _YELLOW = "\033[33m"
25
+ _CYAN = "\033[36m"
26
+ _GREEN = "\033[32m"
27
+ _DIM = "\033[2m"
28
+
29
+
30
+ def _color_enabled() -> bool:
31
+ return (
32
+ hasattr(sys.stdout, "isatty")
33
+ and sys.stdout.isatty()
34
+ and os.environ.get("NO_COLOR", "") == ""
35
+ and os.environ.get("TERM", "") != "dumb"
36
+ )
37
+
38
+
39
+ def _c(code: str, text: str) -> str:
40
+ if _color_enabled():
41
+ return f"{code}{text}{_RESET}"
42
+ return text
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Block report (model-readable, written to stderr)
47
+ # ---------------------------------------------------------------------------
48
+
49
+ def block_report(issues: List["Issue"], file_path: str = "<stdin>") -> str:
50
+ """Build the model-readable block report string.
51
+
52
+ Format mirrors design section 3.5:
53
+ agentgate: BLOCKED -- N issue(s) to fix before this write
54
+
55
+ file:line:col check/rule_id SEVERITY 'excerpt' -> suggestion
56
+ message text
57
+
58
+ Fix these and re-emit.
59
+ """
60
+ n = len(issues)
61
+ lines = [
62
+ f"agentgate: BLOCKED -- {n} issue(s) to fix before this write",
63
+ "",
64
+ ]
65
+
66
+ for issue in issues:
67
+ fp = issue_file_path(issue, file_path)
68
+ sev = issue.severity.upper()
69
+ suggestion_str = f" -> {issue.suggestion}" if issue.suggestion else ""
70
+ lines.append(
71
+ f" {fp}:{issue.line}:{issue.col} "
72
+ f"{issue.check}/{issue.rule_id} {sev} "
73
+ f"'{issue.excerpt}'{suggestion_str}"
74
+ )
75
+ lines.append(f" {issue.message}")
76
+
77
+ lines.append("")
78
+ lines.append(" Fix these and re-emit.")
79
+ return "\n".join(lines)
80
+
81
+
82
+ def warn_report(issues: List["Issue"], file_path: str = "<stdin>") -> str:
83
+ """Build a non-blocking warning report (same layout as block_report).
84
+
85
+ Used in hook mode for warn-level issues: surfaced to stderr, exit 0.
86
+ """
87
+ n = len(issues)
88
+ lines = [
89
+ f"agentgate: WARNING -- {n} non-blocking issue(s)",
90
+ "",
91
+ ]
92
+ for issue in issues:
93
+ fp = issue_file_path(issue, file_path)
94
+ sev = issue.severity.upper()
95
+ suggestion_str = f" -> {issue.suggestion}" if issue.suggestion else ""
96
+ lines.append(
97
+ f" {fp}:{issue.line}:{issue.col} "
98
+ f"{issue.check}/{issue.rule_id} {sev} "
99
+ f"'{issue.excerpt}'{suggestion_str}"
100
+ )
101
+ lines.append(f" {issue.message}")
102
+ return "\n".join(lines)
103
+
104
+
105
+ def issue_file_path(issue: "Issue", default: str) -> str:
106
+ """Return the file path for an issue (uses default if not set on issue)."""
107
+ return default
108
+
109
+
110
+ # ---------------------------------------------------------------------------
111
+ # TTY scan format
112
+ # ---------------------------------------------------------------------------
113
+
114
+ _SEV_COLOR = {
115
+ "high": _RED,
116
+ "medium": _YELLOW,
117
+ "low": _CYAN,
118
+ }
119
+
120
+
121
+ def format_tty(issues: List["Issue"], file_path: str = "") -> str:
122
+ lines: List[str] = []
123
+ for issue in issues:
124
+ fp = file_path or "<stdin>"
125
+ sev_label = _c(_SEV_COLOR.get(issue.severity, ""), issue.severity.upper())
126
+ rule_label = _c(_CYAN, f"{issue.check}/{issue.rule_id}")
127
+ suggestion_str = f" {_c(_GREEN, '-> ' + issue.suggestion)}" if issue.suggestion else ""
128
+ lines.append(
129
+ f"{_c(_BOLD, fp)}:{issue.line}:{issue.col} "
130
+ f"{rule_label} {sev_label} "
131
+ f"{_c(_BOLD, repr(issue.excerpt))}{suggestion_str}"
132
+ )
133
+ lines.append(f" {_c(_DIM, issue.message)}")
134
+
135
+ n = len(issues)
136
+ if n == 0:
137
+ lines.append(_c(_GREEN, "agentgate: no issues"))
138
+ else:
139
+ plural = "issue" if n == 1 else "issues"
140
+ lines.append(_c(_BOLD, f"\nagentgate: {n} {plural}"))
141
+ return "\n".join(lines)
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # JSON scan format
146
+ # ---------------------------------------------------------------------------
147
+
148
+ def _issue_to_dict(issue: "Issue", file_path: str = "") -> dict:
149
+ return {
150
+ "check": issue.check,
151
+ "rule_id": issue.rule_id,
152
+ "severity": issue.severity,
153
+ "file": file_path or "",
154
+ "line": issue.line,
155
+ "col": issue.col,
156
+ "message": issue.message,
157
+ "excerpt": issue.excerpt,
158
+ "suggestion": issue.suggestion,
159
+ }
160
+
161
+
162
+ def format_json(issues: List["Issue"], file_path: str = "") -> str:
163
+ payload = {
164
+ "version": "1",
165
+ "tool": "agentgate",
166
+ "issues": [_issue_to_dict(i, file_path) for i in issues],
167
+ }
168
+ return json.dumps(payload, ensure_ascii=False, indent=2)
169
+
170
+
171
+ # ---------------------------------------------------------------------------
172
+ # SARIF 2.1.0 format
173
+ # ---------------------------------------------------------------------------
174
+
175
+ _SARIF_LEVEL = {
176
+ "high": "error",
177
+ "medium": "warning",
178
+ "low": "note",
179
+ }
180
+
181
+ _RULE_DESCRIPTIONS = {
182
+ "AG-BIDI": "Bidi control character that can visually reorder source code (Trojan-Source).",
183
+ "AG-INVIS": "Invisible character (zero-width space, soft hyphen, etc.) inside identifier or string.",
184
+ "AG-HOMO": "Homoglyph: Cyrillic or Greek character that looks like ASCII inside an identifier.",
185
+ "MH001": "Known LLM CJK corruption: a character in the mojihen corpus of confirmed near-miss substitutions.",
186
+ "MH002": "Mixed-script token: Han characters mixed with Latin or Cyrillic.",
187
+ "MH003": "Isolated CJK in ASCII context: CJK appearing inside an otherwise-ASCII identifier.",
188
+ }
189
+
190
+ _RULE_NAMES = {
191
+ "AG-BIDI": "bidi-control",
192
+ "AG-INVIS": "invisible-char",
193
+ "AG-HOMO": "homoglyph",
194
+ "MH001": "known-cjk-corruption",
195
+ "MH002": "mixed-script-token",
196
+ "MH003": "isolated-cjk",
197
+ }
198
+
199
+
200
+ def format_sarif(issues: List["Issue"], file_path: str = "") -> str:
201
+ rule_ids_seen = sorted({i.rule_id for i in issues})
202
+ rules = []
203
+ for rid in rule_ids_seen:
204
+ rules.append({
205
+ "id": rid,
206
+ "name": _RULE_NAMES.get(rid, rid),
207
+ "shortDescription": {"text": _RULE_DESCRIPTIONS.get(rid, rid)},
208
+ "defaultConfiguration": {"level": "error"},
209
+ })
210
+
211
+ results = []
212
+ for issue in issues:
213
+ results.append({
214
+ "ruleId": issue.rule_id,
215
+ "level": _SARIF_LEVEL.get(issue.severity, "warning"),
216
+ "message": {"text": issue.message},
217
+ "locations": [
218
+ {
219
+ "physicalLocation": {
220
+ "artifactLocation": {"uri": file_path or ""},
221
+ "region": {
222
+ "startLine": issue.line,
223
+ "startColumn": issue.col,
224
+ },
225
+ }
226
+ }
227
+ ],
228
+ })
229
+
230
+ sarif = {
231
+ "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
232
+ "version": "2.1.0",
233
+ "runs": [
234
+ {
235
+ "tool": {
236
+ "driver": {
237
+ "name": "agentgate",
238
+ "version": "0.1.0",
239
+ "informationUri": "https://github.com/hryoma1217/agentgate",
240
+ "rules": rules,
241
+ }
242
+ },
243
+ "results": results,
244
+ }
245
+ ],
246
+ }
247
+ return json.dumps(sarif, ensure_ascii=False, indent=2)