PyPI - agent-write-gate - Versions diffs - 0.1.0__py3-none-any.whl - Mend

agent-write-gate 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

agent_write_gate-0.1.0.dist-info/METADATA +276 -0
agent_write_gate-0.1.0.dist-info/RECORD +18 -0
agent_write_gate-0.1.0.dist-info/WHEEL +5 -0
agent_write_gate-0.1.0.dist-info/entry_points.txt +2 -0
agent_write_gate-0.1.0.dist-info/licenses/LICENSE +21 -0
agent_write_gate-0.1.0.dist-info/top_level.txt +1 -0
agentgate/__init__.py +5 -0
agentgate/adapter.py +142 -0
agentgate/apply_patch.py +74 -0
agentgate/checks/__init__.py +1 -0
agentgate/checks/cjk.py +81 -0
agentgate/checks/unicode_safety.py +277 -0
agentgate/cli.py +550 -0
agentgate/config.py +171 -0
agentgate/model.py +34 -0
agentgate/policy.py +94 -0
agentgate/registry.py +61 -0
agentgate/report.py +247 -0

agentgate/config.py ADDED Viewed

@@ -0,0 +1,171 @@
+"""config.py -- Load agentgate configuration.
+Looks for config in order:
+  1. Explicit path (--config flag)
+  2. agentgate.toml in current directory
+  3. [tool.agentgate] in pyproject.toml in current directory
+  4. Built-in defaults
+Uses tomllib (Python 3.11+) when available; falls back to defaults on older
+Python (a minimal TOML subset parser is NOT implemented -- just use defaults).
+"""
+from __future__ import annotations
+import sys
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+# ---------------------------------------------------------------------------
+# Default values (mirrors section 4 of the design doc)
+# ---------------------------------------------------------------------------
+_DEFAULT_CODE_EXTENSIONS = [
+    ".py", ".js", ".ts", ".go", ".rs", ".java",
+    ".c", ".cpp", ".rb", ".php", ".sh", ".sql",
+]
+@dataclass
+class PolicyConfig:
+    high: str = "block"
+    medium: str = "warn"
+    low: str = "ignore"
+@dataclass
+class CjkConfig:
+    enabled: bool = False
+    min_confidence: str = "high"
+@dataclass
+class UnicodeConfig:
+    enabled: bool = True
+    homoglyph: bool = False
+    strict_zerowidth: bool = False
+    allow_bidi_suppression: bool = False
+    code_extensions: List[str] = field(default_factory=lambda: list(_DEFAULT_CODE_EXTENSIONS))
+@dataclass
+class GateConfig:
+    cjk: CjkConfig = field(default_factory=CjkConfig)
+    unicode: UnicodeConfig = field(default_factory=UnicodeConfig)
+    policy: PolicyConfig = field(default_factory=PolicyConfig)
+# ---------------------------------------------------------------------------
+# TOML loading (stdlib tomllib on 3.11+; graceful default otherwise)
+# ---------------------------------------------------------------------------
+class ConfigError(Exception):
+    """Raised when a config file exists but cannot be parsed."""
+def _load_toml(path: Path) -> Optional[Dict[str, Any]]:
+    """Load a TOML file.
+    Returns None if the file is absent. Raises ConfigError if the file exists
+    but cannot be parsed (so a broken config is never silently ignored).
+    """
+    if not path.exists():
+        return None
+    if sys.version_info >= (3, 11):
+        import tomllib as _toml  # type: ignore
+        decode_error = _toml.TOMLDecodeError
+    else:
+        try:
+            import tomli as _toml  # type: ignore
+        except ImportError as exc:  # pragma: no cover - tomli is a declared dep
+            raise ConfigError(
+                f"{path}: parsing TOML on Python < 3.11 requires the 'tomli' "
+                "package (install agent-write-gate, which depends on it)."
+            ) from exc
+        decode_error = _toml.TOMLDecodeError
+    try:
+        with open(path, "rb") as fh:
+            return _toml.load(fh)
+    except decode_error as exc:
+        raise ConfigError(f"{path}: invalid TOML: {exc}") from exc
+# ---------------------------------------------------------------------------
+# Config builder
+# ---------------------------------------------------------------------------
+def _apply_dict(cfg: GateConfig, d: Dict[str, Any]) -> None:
+    """Apply a parsed TOML dict onto a GateConfig (mutates in place)."""
+    checks = d.get("checks", {})
+    if isinstance(checks, dict):
+        # Top-level boolean toggles
+        if "cjk" in checks and isinstance(checks["cjk"], bool):
+            cfg.cjk.enabled = checks["cjk"]
+        if "unicode" in checks and isinstance(checks["unicode"], bool):
+            cfg.unicode.enabled = checks["unicode"]
+        # Nested [checks.cjk]
+        cjk_sub = checks.get("cjk")
+        if isinstance(cjk_sub, dict):
+            cfg.cjk.enabled = True  # sub-table presence implies enabled
+            if "enabled" in cjk_sub:
+                cfg.cjk.enabled = bool(cjk_sub["enabled"])
+            if "min_confidence" in cjk_sub:
+                cfg.cjk.min_confidence = str(cjk_sub["min_confidence"])
+        # Nested [checks.unicode]
+        uni_sub = checks.get("unicode")
+        if isinstance(uni_sub, dict):
+            if "enabled" in uni_sub:
+                cfg.unicode.enabled = bool(uni_sub["enabled"])
+            if "homoglyph" in uni_sub:
+                cfg.unicode.homoglyph = bool(uni_sub["homoglyph"])
+            if "strict_zerowidth" in uni_sub:
+                cfg.unicode.strict_zerowidth = bool(uni_sub["strict_zerowidth"])
+            if "allow_bidi_suppression" in uni_sub:
+                cfg.unicode.allow_bidi_suppression = bool(uni_sub["allow_bidi_suppression"])
+            if "code_extensions" in uni_sub and isinstance(uni_sub["code_extensions"], list):
+                cfg.unicode.code_extensions = list(uni_sub["code_extensions"])
+    policy = d.get("policy", {})
+    if isinstance(policy, dict):
+        if "high" in policy:
+            cfg.policy.high = str(policy["high"])
+        if "medium" in policy:
+            cfg.policy.medium = str(policy["medium"])
+        if "low" in policy:
+            cfg.policy.low = str(policy["low"])
+def load_config(explicit_path: Optional[Path] = None) -> GateConfig:
+    """Load configuration, returning GateConfig with defaults for any missing keys."""
+    cfg = GateConfig()
+    raw: Optional[Dict[str, Any]] = None
+    if explicit_path is not None:
+        raw = _load_toml(explicit_path)
+        if raw is not None:
+            _apply_dict(cfg, raw)
+        return cfg
+    # Try agentgate.toml
+    ag_toml = Path("agentgate.toml")
+    raw = _load_toml(ag_toml)
+    if raw is not None:
+        _apply_dict(cfg, raw)
+        return cfg
+    # Try [tool.agentgate] in pyproject.toml
+    pyproject = Path("pyproject.toml")
+    raw = _load_toml(pyproject)
+    if raw is not None:
+        tool_section = raw.get("tool", {}).get("agentgate")
+        if isinstance(tool_section, dict):
+            _apply_dict(cfg, tool_section)
+    return cfg

agentgate/model.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""model.py -- Core dataclasses for agentgate.
+WriteEvent: normalized representation of an agent write action.
+Issue: a single finding from a check.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+@dataclass
+class WriteEvent:
+    """Normalized agent write event extracted from a hook payload."""
+    agent: str    # "claude-code" | "codex" | "generic"
+    phase: str    # "pre" | "post" | "unknown"
+    tool: str     # "Write" | "Edit" | "apply_patch" | "unknown"
+    file_path: str  # best-effort; "<stdin>" if absent
+    content: str  # the added/written text to inspect
+@dataclass
+class Issue:
+    """A single finding from a check run."""
+    check: str      # "cjk" | "unicode"
+    rule_id: str    # "MH001" | "AG-BIDI" | "AG-INVIS" | "AG-HOMO"
+    severity: str   # "high" | "medium" | "low"
+    line: int
+    col: int
+    message: str
+    excerpt: str
+    suggestion: str = field(default="")

agentgate/policy.py ADDED Viewed

@@ -0,0 +1,94 @@
+"""policy.py -- Severity-to-action mapping and suppression rules.
+Policy:
+  high   -> block (exit 2)
+  medium -> warn  (exit 0, logged)
+  low    -> ignore
+Suppression:
+  Only rule-specific directives: `agentgate: ignore[AG-INVIS]`
+  NO bare `agentgate: ignore` (that would let models launder violations).
+  AG-BIDI is never suppressible unless cfg.unicode.allow_bidi_suppression is True.
+"""
+from __future__ import annotations
+import re
+from typing import List, Optional, Set, TYPE_CHECKING
+if TYPE_CHECKING:
+    from .model import Issue
+    from .config import GateConfig
+# Pattern: agentgate: ignore[RULE1,RULE2] (with optional whitespace)
+_SUPPRESS_RE = re.compile(r"agentgate:\s*ignore\[([^\]]+)\]")
+def _parse_suppression(line_text: str) -> Optional[Set[str]]:
+    """Return set of suppressed rule IDs found in this line, or None if none."""
+    m = _SUPPRESS_RE.search(line_text)
+    if not m:
+        return None
+    rules_str = m.group(1)
+    rules = {r.strip() for r in rules_str.split(",") if r.strip()}
+    return rules if rules else None
+def _severity_to_action(severity: str, cfg: "GateConfig") -> str:
+    """Map a severity string to an action string."""
+    if severity == "high":
+        return cfg.policy.high
+    if severity == "medium":
+        return cfg.policy.medium
+    if severity == "low":
+        return cfg.policy.low
+    return "ignore"
+def apply_suppression(
+    issues: List["Issue"],
+    content: str,
+    cfg: "GateConfig",
+) -> List["Issue"]:
+    """Filter issues based on per-line suppression directives.
+    Returns issues that survive suppression.
+    """
+    lines = content.splitlines()
+    # Build line->suppressed_rules map
+    suppression_map: dict = {}
+    for i, line in enumerate(lines, start=1):
+        rules = _parse_suppression(line)
+        if rules is not None:
+            suppression_map[i] = rules
+    surviving: List["Issue"] = []
+    for issue in issues:
+        suppressed_rules = suppression_map.get(issue.line)
+        if suppressed_rules is not None and issue.rule_id in suppressed_rules:
+            # AG-BIDI is only suppressible if explicitly allowed
+            if issue.rule_id == "AG-BIDI" and not cfg.unicode.allow_bidi_suppression:
+                surviving.append(issue)  # cannot suppress AG-BIDI
+            else:
+                continue  # suppressed
+        else:
+            surviving.append(issue)
+    return surviving
+def decide_block(issues: List["Issue"], cfg: "GateConfig") -> bool:
+    """Return True if any issue maps to 'block' action."""
+    for issue in issues:
+        action = _severity_to_action(issue.severity, cfg)
+        if action == "block":
+            return True
+    return False
+def filter_actionable(issues: List["Issue"], cfg: "GateConfig") -> List["Issue"]:
+    """Return only issues that are not 'ignore'."""
+    return [
+        issue for issue in issues
+        if _severity_to_action(issue.severity, cfg) != "ignore"
+    ]

agentgate/registry.py ADDED Viewed

@@ -0,0 +1,61 @@
+"""registry.py -- Check registry for agentgate.
+Built-in checks are registered at import time.
+Third-party checks can be added via register(name, fn).
+A check function has signature:
+  def run(event: WriteEvent, cfg: GateConfig) -> List[Issue]: ...
+"""
+from __future__ import annotations
+from typing import Callable, Dict, List, TYPE_CHECKING
+if TYPE_CHECKING:
+    from .model import WriteEvent, Issue
+    from .config import GateConfig
+# ---------------------------------------------------------------------------
+# Registry storage
+# ---------------------------------------------------------------------------
+_REGISTRY: Dict[str, Callable] = {}
+def register(name: str, fn: Callable) -> None:
+    """Register a check function under the given name."""
+    _REGISTRY[name] = fn
+def get_enabled(cfg: "GateConfig") -> List[tuple]:
+    """Return list of (name, fn) pairs for all enabled checks."""
+    enabled = []
+    if cfg.unicode.enabled and "unicode" in _REGISTRY:
+        enabled.append(("unicode", _REGISTRY["unicode"]))
+    if cfg.cjk.enabled and "cjk" in _REGISTRY:
+        enabled.append(("cjk", _REGISTRY["cjk"]))
+    # Any additional registered checks (third-party)
+    for name, fn in _REGISTRY.items():
+        if name not in ("unicode", "cjk"):
+            enabled.append((name, fn))
+    return enabled
+def get_all() -> Dict[str, Callable]:
+    """Return a copy of the full registry."""
+    return dict(_REGISTRY)
+# ---------------------------------------------------------------------------
+# Register built-ins at import time
+# ---------------------------------------------------------------------------
+def _register_builtins() -> None:
+    from .checks.unicode_safety import run as unicode_run
+    register("unicode", unicode_run)
+    from .checks.cjk import run as cjk_run
+    register("cjk", cjk_run)
+_register_builtins()

agentgate/report.py ADDED Viewed

@@ -0,0 +1,247 @@
+"""report.py -- Output formatters for agentgate.
+block_report(issues) -> str  -- model-readable block message for stderr
+format_tty / format_json / format_sarif -- scan output formatters
+"""
+from __future__ import annotations
+import json
+import os
+import sys
+from typing import List, TYPE_CHECKING
+if TYPE_CHECKING:
+    from .model import Issue
+# ---------------------------------------------------------------------------
+# Color helpers
+# ---------------------------------------------------------------------------
+_RESET = "\033[0m"
+_BOLD = "\033[1m"
+_RED = "\033[31m"
+_YELLOW = "\033[33m"
+_CYAN = "\033[36m"
+_GREEN = "\033[32m"
+_DIM = "\033[2m"
+def _color_enabled() -> bool:
+    return (
+        hasattr(sys.stdout, "isatty")
+        and sys.stdout.isatty()
+        and os.environ.get("NO_COLOR", "") == ""
+        and os.environ.get("TERM", "") != "dumb"
+    )
+def _c(code: str, text: str) -> str:
+    if _color_enabled():
+        return f"{code}{text}{_RESET}"
+    return text
+# ---------------------------------------------------------------------------
+# Block report (model-readable, written to stderr)
+# ---------------------------------------------------------------------------
+def block_report(issues: List["Issue"], file_path: str = "<stdin>") -> str:
+    """Build the model-readable block report string.
+    Format mirrors design section 3.5:
+      agentgate: BLOCKED -- N issue(s) to fix before this write
+        file:line:col  check/rule_id SEVERITY  'excerpt'  -> suggestion
+            message text
+      Fix these and re-emit.
+    """
+    n = len(issues)
+    lines = [
+        f"agentgate: BLOCKED -- {n} issue(s) to fix before this write",
+        "",
+    ]
+    for issue in issues:
+        fp = issue_file_path(issue, file_path)
+        sev = issue.severity.upper()
+        suggestion_str = f"  -> {issue.suggestion}" if issue.suggestion else ""
+        lines.append(
+            f"  {fp}:{issue.line}:{issue.col}  "
+            f"{issue.check}/{issue.rule_id} {sev}  "
+            f"'{issue.excerpt}'{suggestion_str}"
+        )
+        lines.append(f"      {issue.message}")
+    lines.append("")
+    lines.append("  Fix these and re-emit.")
+    return "\n".join(lines)
+def warn_report(issues: List["Issue"], file_path: str = "<stdin>") -> str:
+    """Build a non-blocking warning report (same layout as block_report).
+    Used in hook mode for warn-level issues: surfaced to stderr, exit 0.
+    """
+    n = len(issues)
+    lines = [
+        f"agentgate: WARNING -- {n} non-blocking issue(s)",
+        "",
+    ]
+    for issue in issues:
+        fp = issue_file_path(issue, file_path)
+        sev = issue.severity.upper()
+        suggestion_str = f"  -> {issue.suggestion}" if issue.suggestion else ""
+        lines.append(
+            f"  {fp}:{issue.line}:{issue.col}  "
+            f"{issue.check}/{issue.rule_id} {sev}  "
+            f"'{issue.excerpt}'{suggestion_str}"
+        )
+        lines.append(f"      {issue.message}")
+    return "\n".join(lines)
+def issue_file_path(issue: "Issue", default: str) -> str:
+    """Return the file path for an issue (uses default if not set on issue)."""
+    return default
+# ---------------------------------------------------------------------------
+# TTY scan format
+# ---------------------------------------------------------------------------
+_SEV_COLOR = {
+    "high": _RED,
+    "medium": _YELLOW,
+    "low": _CYAN,
+}
+def format_tty(issues: List["Issue"], file_path: str = "") -> str:
+    lines: List[str] = []
+    for issue in issues:
+        fp = file_path or "<stdin>"
+        sev_label = _c(_SEV_COLOR.get(issue.severity, ""), issue.severity.upper())
+        rule_label = _c(_CYAN, f"{issue.check}/{issue.rule_id}")
+        suggestion_str = f"  {_c(_GREEN, '-> ' + issue.suggestion)}" if issue.suggestion else ""
+        lines.append(
+            f"{_c(_BOLD, fp)}:{issue.line}:{issue.col}  "
+            f"{rule_label} {sev_label}  "
+            f"{_c(_BOLD, repr(issue.excerpt))}{suggestion_str}"
+        )
+        lines.append(f"  {_c(_DIM, issue.message)}")
+    n = len(issues)
+    if n == 0:
+        lines.append(_c(_GREEN, "agentgate: no issues"))
+    else:
+        plural = "issue" if n == 1 else "issues"
+        lines.append(_c(_BOLD, f"\nagentgate: {n} {plural}"))
+    return "\n".join(lines)
+# ---------------------------------------------------------------------------
+# JSON scan format
+# ---------------------------------------------------------------------------
+def _issue_to_dict(issue: "Issue", file_path: str = "") -> dict:
+    return {
+        "check": issue.check,
+        "rule_id": issue.rule_id,
+        "severity": issue.severity,
+        "file": file_path or "",
+        "line": issue.line,
+        "col": issue.col,
+        "message": issue.message,
+        "excerpt": issue.excerpt,
+        "suggestion": issue.suggestion,
+    }
+def format_json(issues: List["Issue"], file_path: str = "") -> str:
+    payload = {
+        "version": "1",
+        "tool": "agentgate",
+        "issues": [_issue_to_dict(i, file_path) for i in issues],
+    }
+    return json.dumps(payload, ensure_ascii=False, indent=2)
+# ---------------------------------------------------------------------------
+# SARIF 2.1.0 format
+# ---------------------------------------------------------------------------
+_SARIF_LEVEL = {
+    "high": "error",
+    "medium": "warning",
+    "low": "note",
+}
+_RULE_DESCRIPTIONS = {
+    "AG-BIDI": "Bidi control character that can visually reorder source code (Trojan-Source).",
+    "AG-INVIS": "Invisible character (zero-width space, soft hyphen, etc.) inside identifier or string.",
+    "AG-HOMO": "Homoglyph: Cyrillic or Greek character that looks like ASCII inside an identifier.",
+    "MH001": "Known LLM CJK corruption: a character in the mojihen corpus of confirmed near-miss substitutions.",
+    "MH002": "Mixed-script token: Han characters mixed with Latin or Cyrillic.",
+    "MH003": "Isolated CJK in ASCII context: CJK appearing inside an otherwise-ASCII identifier.",
+}
+_RULE_NAMES = {
+    "AG-BIDI": "bidi-control",
+    "AG-INVIS": "invisible-char",
+    "AG-HOMO": "homoglyph",
+    "MH001": "known-cjk-corruption",
+    "MH002": "mixed-script-token",
+    "MH003": "isolated-cjk",
+}
+def format_sarif(issues: List["Issue"], file_path: str = "") -> str:
+    rule_ids_seen = sorted({i.rule_id for i in issues})
+    rules = []
+    for rid in rule_ids_seen:
+        rules.append({
+            "id": rid,
+            "name": _RULE_NAMES.get(rid, rid),
+            "shortDescription": {"text": _RULE_DESCRIPTIONS.get(rid, rid)},
+            "defaultConfiguration": {"level": "error"},
+        })
+    results = []
+    for issue in issues:
+        results.append({
+            "ruleId": issue.rule_id,
+            "level": _SARIF_LEVEL.get(issue.severity, "warning"),
+            "message": {"text": issue.message},
+            "locations": [
+                {
+                    "physicalLocation": {
+                        "artifactLocation": {"uri": file_path or ""},
+                        "region": {
+                            "startLine": issue.line,
+                            "startColumn": issue.col,
+                        },
+                    }
+                }
+            ],
+        })
+    sarif = {
+        "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
+        "version": "2.1.0",
+        "runs": [
+            {
+                "tool": {
+                    "driver": {
+                        "name": "agentgate",
+                        "version": "0.1.0",
+                        "informationUri": "https://github.com/hryoma1217/agentgate",
+                        "rules": rules,
+                    }
+                },
+                "results": results,
+            }
+        ],
+    }
+    return json.dumps(sarif, ensure_ascii=False, indent=2)