PyPI - mcp-github-agent - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mcp-github-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

mcp_github_agent-0.1.0.dist-info/METADATA +153 -0
mcp_github_agent-0.1.0.dist-info/RECORD +27 -0
mcp_github_agent-0.1.0.dist-info/WHEEL +5 -0
mcp_github_agent-0.1.0.dist-info/entry_points.txt +2 -0
mcp_github_agent-0.1.0.dist-info/top_level.txt +2 -0
src/__init__.py +0 -0
src/analyzers/base.py +21 -0
src/analyzers/ruff.py +49 -0
src/audit.py +120 -0
src/config.py +36 -0
src/diff_parser.py +36 -0
src/github_client.py +111 -0
src/main.py +29 -0
src/policy.py +125 -0
src/review.py +107 -0
src/review_engine.py +83 -0
src/tools.py +291 -0
tests/__init__.py +0 -0
tests/test_audit.py +181 -0
tests/test_config.py +41 -0
tests/test_diff_parser.py +74 -0
tests/test_github_client.py +173 -0
tests/test_main.py +48 -0
tests/test_policy.py +133 -0
tests/test_review_engine.py +162 -0
tests/test_ruff_analyzer.py +78 -0
tests/test_tools.py +326 -0

src/policy.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Policy enforcement layer — repo allowlist, branch protection, dry-run delegation."""
+import json
+import os
+import re
+from dataclasses import dataclass, field
+from typing import Optional
+# ── Data structures ────────────────────────────────────
+@dataclass
+class PolicyDecision:
+    action: str          # "allow" | "deny" | "dry_run"
+    reason: str          # human-readable explanation
+    matched_rule: str    # which rule triggered (for audit)
+@dataclass
+class PolicyConfig:
+    """Loads and holds runtime policy from policy.json."""
+    repo_allowlist: list[str] = field(default_factory=list)
+    deny_pr_base: list[str] = field(default_factory=list)
+    deny_force_push: bool = True
+    _required: bool = False
+    _loaded: bool = False
+    def load(self, path: str, required: bool = False) -> "PolicyConfig":
+        """Load policy from a JSON file."""
+        self._required = required
+        if not os.path.exists(path):
+            if required:
+                raise FileNotFoundError(
+                    f"Policy file not found: {path} (GITHUB_POLICY_REQUIRED=true)"
+                )
+            return self  # empty config → default-allow
+        try:
+            with open(path, "r") as f:
+                data = json.load(f)
+        except (json.JSONDecodeError, OSError) as e:
+            if required:
+                raise RuntimeError(f"Failed to load policy file {path}: {e}")
+            # Invalid JSON → default-deny (safer than default-allow)
+            self._loaded = True
+            self._deny_all = True
+            self.repo_allowlist = []
+            self.deny_pr_base = []
+            return self
+        self.repo_allowlist = _ensure_list(data.get("repo_allowlist"))
+        deny_list = (
+            data.get("protected_branches", {}).get("deny_pr_base")
+        )
+        self.deny_pr_base = _ensure_list(deny_list) if deny_list is not None else []
+        self.deny_force_push = (
+            data.get("protected_branches", {}).get("deny_force_push", True)
+        )
+        self._loaded = True
+        return self
+    def check_repo(self, repo: str) -> PolicyDecision:
+        """Check if `repo` is allowed."""
+        if not self._loaded:
+            return PolicyDecision("allow", "policy not loaded", "default-allow")
+        if getattr(self, "_deny_all", False):
+            return PolicyDecision("deny", "policy load failed — denying all", "policy:invalid-config")
+        for pattern in self.repo_allowlist:
+            if _wildcard_match(pattern, repo):
+                return PolicyDecision(
+                    "allow", f"repo {repo} matches allowlist {pattern}",
+                    f"repo_allowlist:{pattern}"
+                )
+        if self.repo_allowlist:
+            return PolicyDecision(
+                "deny", f"repo {repo} not in allowlist",
+                "repo_allowlist:deny_unlisted"
+            )
+        return PolicyDecision("allow", "allowlist empty", "default-allow")
+    def check_branch_for_pr(self, base_branch: str) -> PolicyDecision:
+        """Check if `base_branch` is protected from PR."""
+        if not self._loaded:
+            return PolicyDecision("allow", "policy not loaded", "default-allow")
+        for protected in self.deny_pr_base:
+            if _wildcard_match(protected, base_branch):
+                return PolicyDecision(
+                    "deny",
+                    f"PR to protected branch '{base_branch}' is blocked",
+                    f"protected_branch:{protected}"
+                )
+        return PolicyDecision("allow", f"branch {base_branch} is not protected", "branch_unprotected")
+# ── Helpers ────────────────────────────────────────────
+def _ensure_list(v) -> list:
+    """Return v as a list, wrapping a single string if needed."""
+    if v is None:
+        return []
+    if isinstance(v, list):
+        return v
+    if isinstance(v, str):
+        return [v]
+    return [v]
+def _wildcard_match(pattern: str, value: str) -> bool:
+    """Match a glob-like pattern (e.g. 'FMorgan-111/*') against a value."""
+    if pattern == "*":
+        return True
+    if "*" in pattern:
+        regex = "^" + re.escape(pattern).replace(r"\*", ".*") + "$"
+        return bool(re.match(regex, value))
+    return pattern == value
+def resolve_dry_run(dry_run: Optional[bool], env_enabled: bool) -> bool:
+    """Resolve effective dry-run state: explicit arg > env > default False."""
+    if dry_run is not None:
+        return dry_run
+    return env_enabled

src/review.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Local rule-based code review"""
+import re
+from typing import List, Dict
+def review_diff(diff_text: str) -> List[Dict]:
+    """Analyze diff text and return code review issues"""
+    issues = []
+    lines = diff_text.split('\n')
+    current_line = 0
+    current_file = ""
+    for line in lines:
+        if line.startswith('+++'):
+            current_file = line[6:]  # Remove '+++ b/'
+        elif line.startswith('@@'):
+            # Parse line number from @@ -old_start,old_count +new_start,new_count @@
+            match = re.search(r'\+(\d+)', line)
+            if match:
+                current_line = int(match.group(1)) - 1
+        elif line.startswith('+') and not line.startswith('+++'):
+            current_line += 1
+            content = line[1:]  # Remove '+' prefix
+            # Check for print() statements (skip test files)
+            is_test_file = (
+                '/tests/' in current_file
+                or current_file.startswith('tests/')
+                or current_file.endswith('_test.py')
+                or '/test_' in current_file
+            )
+            if 'print(' in content and not is_test_file:
+                issues.append({
+                    'severity': 'warning',
+                    'line': current_line,
+                    'message': 'Print statement found - consider using logging instead',
+                    'rule': 'no-print'
+                })
+            # Check for TODO/FIXME/HACK comments
+            if re.search(r'\b(TODO|FIXME|HACK)\b', content, re.IGNORECASE):
+                issues.append({
+                    'severity': 'warning',
+                    'line': current_line,
+                    'message': 'TODO/FIXME/HACK comment found - should be tracked in issue tracker',
+                    'rule': 'no-todo-comments'
+                })
+            # Check for hardcoded secrets
+            secret_patterns = [
+                r'password\s*=\s*["\'][^"\']+["\']',
+                r'api_key\s*=\s*["\'][^"\']+["\']',
+                r'token\s*=\s*["\'][^"\']+["\']'
+            ]
+            for pattern in secret_patterns:
+                if re.search(pattern, content, re.IGNORECASE):
+                    issues.append({
+                        'severity': 'error',
+                        'line': current_line,
+                        'message': 'Hardcoded secret detected - use environment variables',
+                        'rule': 'no-hardcoded-secrets'
+                    })
+            # Check for bare except clauses
+            if re.search(r'except\s*:', content):
+                issues.append({
+                    'severity': 'error',
+                    'line': current_line,
+                    'message': 'Bare except clause - specify exception type',
+                    'rule': 'no-bare-except'
+                })
+        elif line.startswith(' ') or (line.startswith('-') and not line.startswith('---')):
+            if line.startswith(' '):
+                current_line += 1
+    # Check for long functions (simplified - count added lines between def and next def/class)
+    function_lines = 0
+    in_function = False
+    for line in lines:
+        if line.startswith('+'):
+            content = line[1:]
+            if re.match(r'\s*def\s+', content):
+                in_function = True
+                function_lines = 0
+            elif in_function and re.match(r'\s*(def\s+|class\s+)', content):
+                if function_lines > 80:
+                    issues.append({
+                        'severity': 'warning',
+                        'line': current_line - function_lines,
+                        'message': f'Function is {function_lines} lines long - consider breaking it down',
+                        'rule': 'function-length'
+                    })
+                in_function = re.match(r'\s*def\s+', content) is not None
+                function_lines = 0
+            elif in_function:
+                function_lines += 1
+    # Check final function if still in one
+    if in_function and function_lines > 80:
+        issues.append({
+            'severity': 'warning',
+            'line': current_line - function_lines,
+            'message': f'Function is {function_lines} lines long - consider breaking it down',
+            'rule': 'function-length'
+        })
+    return issues

src/review_engine.py ADDED Viewed

@@ -0,0 +1,83 @@
+"""ReviewService — orchestrate diff parsing + analyzers + old regex fallback."""
+import logging
+import os
+from .diff_parser import parse_diff
+from .analyzers.base import Finding
+from .analyzers.ruff import RuffAnalyzer
+from .review import review_diff as legacy_review
+# 500KB default; override with GITHUB_REVIEW_MAX_DIFF_BYTES env var
+_DEFAULT_MAX_DIFF_BYTES = 500 * 1024
+# GitHub's PR diff API limit is 1MB; keep this stdio server safely below
+# unbounded memory use even if the env var is misconfigured.
+_MAX_DIFF_BYTES_HARD_CAP = 1024 * 1024
+logger = logging.getLogger(__name__)
+def _get_max_diff_bytes() -> int:
+    val = os.environ.get("GITHUB_REVIEW_MAX_DIFF_BYTES", "")
+    try:
+        max_bytes = int(val) if val else _DEFAULT_MAX_DIFF_BYTES
+    except ValueError:
+        return _DEFAULT_MAX_DIFF_BYTES
+    if max_bytes > _MAX_DIFF_BYTES_HARD_CAP:
+        logger.warning(
+            "GITHUB_REVIEW_MAX_DIFF_BYTES=%s exceeds hard cap of %s bytes; clamping.",
+            max_bytes,
+            _MAX_DIFF_BYTES_HARD_CAP,
+        )
+        return _MAX_DIFF_BYTES_HARD_CAP
+    return max_bytes
+class ReviewService:
+    def __init__(self):
+        self.analyzers = []
+        try:
+            self.analyzers.append(RuffAnalyzer())
+        except Exception:
+            pass
+    def review(self, diff_text: str) -> list[Finding]:
+        # Guard against oversized diffs that could OOM the process
+        max_bytes = _get_max_diff_bytes()
+        diff_bytes = len(diff_text.encode("utf-8"))
+        if diff_bytes > max_bytes:
+            return [Finding(
+                severity="error",
+                file="",
+                line=0,
+                rule="diff-too-large",
+                message=f"Diff too large ({diff_bytes // 1024} KB, limit {max_bytes // 1024} KB). "
+                        f"Review skipped to avoid OOM.",
+                source="review_engine",
+            )]
+        findings = []
+        changed = parse_diff(diff_text)
+        # Ruff on changed Python files
+        for cf in changed:
+            if cf.path.endswith(".py"):
+                for a in self.analyzers:
+                    try:
+                        raw = a.analyze(cf.path)
+                        # Filter: only changed lines
+                        findings.extend(f for f in raw if f.line in cf.added_lines)
+                    except Exception:
+                        pass
+        # Legacy regex fallback
+        legacy_issues = legacy_review(diff_text)
+        for li in legacy_issues:
+            findings.append(Finding(
+                severity=li["severity"],
+                file=li.get("file", ""),
+                line=li["line"],
+                rule=li["rule"],
+                message=li["message"],
+                source="regex",
+            ))
+        return findings

src/tools.py ADDED Viewed

@@ -0,0 +1,291 @@
+"""MCP tools for GitHub operations — with policy guard & audit logging."""
+from fastmcp import FastMCP
+from .config import (
+    get_github_token, get_github_api_base,
+    get_policy_path, get_policy_required,
+    get_audit_sink, get_dry_run_enabled,
+)
+from .github_client import GitHubClient
+from .review import review_diff
+from .review_engine import ReviewService
+from .policy import PolicyConfig, resolve_dry_run
+from .audit import AuditLogger
+mcp = FastMCP("GitHub MCP Agent Server")
+# Lazy-init singletons — created on first access
+_policy: PolicyConfig | None = None
+_audit: AuditLogger | None = None
+def _get_policy() -> PolicyConfig:
+    global _policy
+    if _policy is None:
+        _policy = PolicyConfig().load(
+            path=get_policy_path(),
+            required=get_policy_required(),
+        )
+    return _policy
+def _get_audit() -> AuditLogger:
+    global _audit
+    if _audit is None:
+        _audit = AuditLogger(sink=get_audit_sink())
+    return _audit
+# ── Read tools (no guard needed) ───────────────────────
+@mcp.tool()
+def search_code(query: str, repo: str = None) -> str:
+    """Search for code in GitHub repositories."""
+    client = GitHubClient(get_github_token(), get_github_api_base())
+    result = client.search_code(query, repo)
+    if isinstance(result, dict) and "error" in result:
+        return f"Error: {result['error']}"
+    items = result.get("items", [])
+    if not items:
+        return "No results found"
+    output = []
+    for item in items[:10]:
+        output.append(f"• {item['path']} in {item['repo']}\n  {item['url']}")
+    return f"Found {len(items)} results:\n" + "\n\n".join(output)
+@mcp.tool()
+def list_issues(repo: str, state: str = "open") -> str:
+    """List issues in a GitHub repository."""
+    client = GitHubClient(get_github_token(), get_github_api_base())
+    result = client.list_issues(repo, state)
+    if "error" in result:
+        return f"Error: {result['error']}"
+    if not result:
+        return f"No {state} issues found in {repo}"
+    output = []
+    for issue in result[:10]:
+        output.append(f"#{issue['number']}: {issue['title']}\n  {issue['html_url']}")
+    return f"Issues in {repo} ({state}):\n" + "\n\n".join(output)
+@mcp.tool()
+def get_pr_diff(repo: str, pr_number: int) -> str:
+    """Get the diff for a pull request."""
+    client = GitHubClient(get_github_token(), get_github_api_base())
+    result = client.get_pr_diff(repo, pr_number)
+    if "error" in result:
+        return f"Error: {result['error']}"
+    return f"PR #{pr_number} diff:\n\n{result['diff']}"
+@mcp.tool()
+def review_pr_diff(repo: str, pr_number: int) -> str:
+    """Review a PR diff using ruff + legacy regex rules. Returns structured findings."""
+    client = GitHubClient(get_github_token(), get_github_api_base())
+    result = client.get_pr_diff(repo, pr_number)
+    if "error" in result:
+        return f"Error: {result['error']}"
+    # Use new review engine (ruff + regex fallback)
+    try:
+        review = ReviewService()
+        findings = review.review(result["diff"])
+    except Exception:
+        findings = []
+    if not findings:
+        return f"PR #{pr_number} looks good - no issues found!"
+    output = [f"Code review for PR #{pr_number} ({len(findings)} issues):"]
+    for f in findings:
+        icon = "❌" if f.severity == "error" else "⚠️"
+        output.append(
+            f"{icon} {f.file}:{f.line} — {f.message} "
+            f"[{f.rule}/{f.source}]"
+        )
+    return "\n".join(output)
+@mcp.tool()
+def comment_pr_review(repo: str, pr_number: int) -> str:
+    """Fetch PR diff, run code review, and post findings as review comments."""
+    client = GitHubClient(get_github_token(), get_github_api_base())
+    diff_result = client.get_pr_diff(repo, pr_number)
+    if "error" in diff_result:
+        return f"Error: {diff_result['error']}"
+    try:
+        review = ReviewService()
+        findings = review.review(diff_result["diff"])
+    except Exception:
+        findings = []
+    if not findings:
+        return f"PR #{pr_number} looks good - no issues found!"
+    # Limit to top 10 findings to avoid spam
+    posted = 0
+    for f in findings[:10]:
+        body = f"{f.message}\n\nRule: `{f.rule}` | Source: {f.source}"
+        r = client.create_review_comment(repo, pr_number, body, path=f.file, line=f.line)
+        if "error" not in r:
+            posted += 1
+    return (
+        f"Posted {posted} review comments on PR #{pr_number} "
+        f"({len(findings)} total issues found, top 10 posted)"
+    )
+# ── Write tools (guarded) ──────────────────────────────
+@mcp.tool()
+def create_issue(repo: str, title: str, body: str, dry_run: bool = False) -> str:
+    """Create a new issue in a GitHub repository.
+    Args:
+        repo: Repository in 'owner/repo' format.
+        title: Issue title.
+        body: Issue body text.
+        dry_run: If True, preview the operation without executing.
+    """
+    dry = resolve_dry_run(dry_run, get_dry_run_enabled())
+    policy = _get_policy()
+    audit = _get_audit()
+    # Guard: repo allowlist check
+    repo_decision = policy.check_repo(repo)
+    if repo_decision.action == "deny":
+        audit.log(
+            tool="create_issue", action="issue.create", repo=repo,
+            dry_run=dry, policy_decision="deny",
+            policy_rule=repo_decision.matched_rule,
+            request_body={"title": title, "body": body},
+            error=repo_decision.reason,
+        )
+        return f"❌ Policy Denied: {repo_decision.reason}"
+    if dry:
+        audit.log(
+            tool="create_issue", action="issue.create", repo=repo,
+            dry_run=True, policy_decision="allow",
+            policy_rule=repo_decision.matched_rule,
+            request_body={"title": title, "body": body},
+        )
+        return (
+            f"[DRY RUN] Would create issue in {repo}:\n"
+            f"  Title: {title}\n"
+            f"  Body:  {body[:120]}{'...' if len(body) > 120 else ''}\n"
+            f"  Policy: {repo_decision.reason}"
+        )
+    client = GitHubClient(get_github_token(), get_github_api_base())
+    result = client.create_issue(repo, title, body)
+    if "error" in result:
+        audit.log(
+            tool="create_issue", action="issue.create", repo=repo,
+            policy_decision="allow", policy_rule=repo_decision.matched_rule,
+            request_body={"title": title, "body": body},
+            error=result["error"],
+        )
+        return f"Error: {result['error']}"
+    audit.log(
+        tool="create_issue", action="issue.create", repo=repo,
+        policy_decision="allow", policy_rule=repo_decision.matched_rule,
+        request_body={"title": title, "body": body},
+        response=result,
+    )
+    return f"Issue created: #{result['number']}: {result['title']}\n{result['html_url']}"
+@mcp.tool()
+def create_pr(repo: str, title: str, body: str, head: str, base: str,
+              dry_run: bool = False) -> str:
+    """Create a new pull request.
+    Args:
+        repo: Repository in 'owner/repo' format.
+        title: PR title.
+        body: PR description.
+        head: Source branch name.
+        base: Target branch name (e.g. 'main').
+        dry_run: If True, preview the operation without executing.
+    """
+    dry = resolve_dry_run(dry_run, get_dry_run_enabled())
+    policy = _get_policy()
+    audit = _get_audit()
+    # Guard: repo allowlist
+    repo_decision = policy.check_repo(repo)
+    if repo_decision.action == "deny":
+        audit.log(
+            tool="create_pr", action="pull_request.create", repo=repo,
+            dry_run=dry, policy_decision="deny",
+            policy_rule=repo_decision.matched_rule,
+            request_body={"title": title, "head": head, "base": base},
+            error=repo_decision.reason,
+        )
+        return f"❌ Policy Denied: {repo_decision.reason}"
+    # Guard: branch protection
+    branch_decision = policy.check_branch_for_pr(base)
+    if branch_decision.action == "deny":
+        audit.log(
+            tool="create_pr", action="pull_request.create", repo=repo,
+            dry_run=dry, policy_decision="deny",
+            policy_rule=branch_decision.matched_rule,
+            request_body={"title": title, "head": head, "base": base},
+            error=branch_decision.reason,
+        )
+        return f"❌ Policy Denied: {branch_decision.reason}"
+    if dry:
+        audit.log(
+            tool="create_pr", action="pull_request.create", repo=repo,
+            dry_run=True, policy_decision="allow",
+            policy_rule=f"{repo_decision.matched_rule}, {branch_decision.matched_rule}",
+            request_body={"title": title, "head": head, "base": base},
+        )
+        return (
+            f"[DRY RUN] Would create PR in {repo}:\n"
+            f"  Title:  {title}\n"
+            f"  Head:   {head} → Base: {base}\n"
+            f"  Policy: {repo_decision.reason} · {branch_decision.reason}"
+        )
+    client = GitHubClient(get_github_token(), get_github_api_base())
+    result = client.create_pr(repo, title, body, head, base)
+    if "error" in result:
+        audit.log(
+            tool="create_pr", action="pull_request.create", repo=repo,
+            policy_decision="allow",
+            policy_rule=f"{repo_decision.matched_rule}, {branch_decision.matched_rule}",
+            request_body={"title": title, "head": head, "base": base},
+            error=result["error"],
+        )
+        return f"Error: {result['error']}"
+    audit.log(
+        tool="create_pr", action="pull_request.create", repo=repo,
+        policy_decision="allow",
+        policy_rule=f"{repo_decision.matched_rule}, {branch_decision.matched_rule}",
+        request_body={"title": title, "head": head, "base": base},
+        response=result,
+    )
+    return f"PR created: #{result['number']}: {result['title']}\n{result['html_url']}"

tests/__init__.py ADDED Viewed

File without changes