mcp-github-agent 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/policy.py ADDED
@@ -0,0 +1,125 @@
1
+ """Policy enforcement layer — repo allowlist, branch protection, dry-run delegation."""
2
+ import json
3
+ import os
4
+ import re
5
+ from dataclasses import dataclass, field
6
+ from typing import Optional
7
+
8
+ # ── Data structures ────────────────────────────────────
9
+ @dataclass
10
+ class PolicyDecision:
11
+ action: str # "allow" | "deny" | "dry_run"
12
+ reason: str # human-readable explanation
13
+ matched_rule: str # which rule triggered (for audit)
14
+
15
+ @dataclass
16
+ class PolicyConfig:
17
+ """Loads and holds runtime policy from policy.json."""
18
+ repo_allowlist: list[str] = field(default_factory=list)
19
+ deny_pr_base: list[str] = field(default_factory=list)
20
+ deny_force_push: bool = True
21
+
22
+ _required: bool = False
23
+ _loaded: bool = False
24
+
25
+ def load(self, path: str, required: bool = False) -> "PolicyConfig":
26
+ """Load policy from a JSON file."""
27
+ self._required = required
28
+
29
+ if not os.path.exists(path):
30
+ if required:
31
+ raise FileNotFoundError(
32
+ f"Policy file not found: {path} (GITHUB_POLICY_REQUIRED=true)"
33
+ )
34
+ return self # empty config → default-allow
35
+
36
+ try:
37
+ with open(path, "r") as f:
38
+ data = json.load(f)
39
+ except (json.JSONDecodeError, OSError) as e:
40
+ if required:
41
+ raise RuntimeError(f"Failed to load policy file {path}: {e}")
42
+ # Invalid JSON → default-deny (safer than default-allow)
43
+ self._loaded = True
44
+ self._deny_all = True
45
+ self.repo_allowlist = []
46
+ self.deny_pr_base = []
47
+ return self
48
+
49
+ self.repo_allowlist = _ensure_list(data.get("repo_allowlist"))
50
+ deny_list = (
51
+ data.get("protected_branches", {}).get("deny_pr_base")
52
+ )
53
+ self.deny_pr_base = _ensure_list(deny_list) if deny_list is not None else []
54
+ self.deny_force_push = (
55
+ data.get("protected_branches", {}).get("deny_force_push", True)
56
+ )
57
+ self._loaded = True
58
+ return self
59
+
60
+ def check_repo(self, repo: str) -> PolicyDecision:
61
+ """Check if `repo` is allowed."""
62
+ if not self._loaded:
63
+ return PolicyDecision("allow", "policy not loaded", "default-allow")
64
+
65
+ if getattr(self, "_deny_all", False):
66
+ return PolicyDecision("deny", "policy load failed — denying all", "policy:invalid-config")
67
+
68
+ for pattern in self.repo_allowlist:
69
+ if _wildcard_match(pattern, repo):
70
+ return PolicyDecision(
71
+ "allow", f"repo {repo} matches allowlist {pattern}",
72
+ f"repo_allowlist:{pattern}"
73
+ )
74
+
75
+ if self.repo_allowlist:
76
+ return PolicyDecision(
77
+ "deny", f"repo {repo} not in allowlist",
78
+ "repo_allowlist:deny_unlisted"
79
+ )
80
+
81
+ return PolicyDecision("allow", "allowlist empty", "default-allow")
82
+
83
+ def check_branch_for_pr(self, base_branch: str) -> PolicyDecision:
84
+ """Check if `base_branch` is protected from PR."""
85
+ if not self._loaded:
86
+ return PolicyDecision("allow", "policy not loaded", "default-allow")
87
+
88
+ for protected in self.deny_pr_base:
89
+ if _wildcard_match(protected, base_branch):
90
+ return PolicyDecision(
91
+ "deny",
92
+ f"PR to protected branch '{base_branch}' is blocked",
93
+ f"protected_branch:{protected}"
94
+ )
95
+
96
+ return PolicyDecision("allow", f"branch {base_branch} is not protected", "branch_unprotected")
97
+
98
+
99
+ # ── Helpers ────────────────────────────────────────────
100
+ def _ensure_list(v) -> list:
101
+ """Return v as a list, wrapping a single string if needed."""
102
+ if v is None:
103
+ return []
104
+ if isinstance(v, list):
105
+ return v
106
+ if isinstance(v, str):
107
+ return [v]
108
+ return [v]
109
+
110
+
111
+ def _wildcard_match(pattern: str, value: str) -> bool:
112
+ """Match a glob-like pattern (e.g. 'FMorgan-111/*') against a value."""
113
+ if pattern == "*":
114
+ return True
115
+ if "*" in pattern:
116
+ regex = "^" + re.escape(pattern).replace(r"\*", ".*") + "$"
117
+ return bool(re.match(regex, value))
118
+ return pattern == value
119
+
120
+
121
+ def resolve_dry_run(dry_run: Optional[bool], env_enabled: bool) -> bool:
122
+ """Resolve effective dry-run state: explicit arg > env > default False."""
123
+ if dry_run is not None:
124
+ return dry_run
125
+ return env_enabled
src/review.py ADDED
@@ -0,0 +1,107 @@
1
+ """Local rule-based code review"""
2
+ import re
3
+ from typing import List, Dict
4
+
5
+
6
+ def review_diff(diff_text: str) -> List[Dict]:
7
+ """Analyze diff text and return code review issues"""
8
+ issues = []
9
+ lines = diff_text.split('\n')
10
+ current_line = 0
11
+ current_file = ""
12
+
13
+ for line in lines:
14
+ if line.startswith('+++'):
15
+ current_file = line[6:] # Remove '+++ b/'
16
+ elif line.startswith('@@'):
17
+ # Parse line number from @@ -old_start,old_count +new_start,new_count @@
18
+ match = re.search(r'\+(\d+)', line)
19
+ if match:
20
+ current_line = int(match.group(1)) - 1
21
+ elif line.startswith('+') and not line.startswith('+++'):
22
+ current_line += 1
23
+ content = line[1:] # Remove '+' prefix
24
+
25
+ # Check for print() statements (skip test files)
26
+ is_test_file = (
27
+ '/tests/' in current_file
28
+ or current_file.startswith('tests/')
29
+ or current_file.endswith('_test.py')
30
+ or '/test_' in current_file
31
+ )
32
+ if 'print(' in content and not is_test_file:
33
+ issues.append({
34
+ 'severity': 'warning',
35
+ 'line': current_line,
36
+ 'message': 'Print statement found - consider using logging instead',
37
+ 'rule': 'no-print'
38
+ })
39
+
40
+ # Check for TODO/FIXME/HACK comments
41
+ if re.search(r'\b(TODO|FIXME|HACK)\b', content, re.IGNORECASE):
42
+ issues.append({
43
+ 'severity': 'warning',
44
+ 'line': current_line,
45
+ 'message': 'TODO/FIXME/HACK comment found - should be tracked in issue tracker',
46
+ 'rule': 'no-todo-comments'
47
+ })
48
+
49
+ # Check for hardcoded secrets
50
+ secret_patterns = [
51
+ r'password\s*=\s*["\'][^"\']+["\']',
52
+ r'api_key\s*=\s*["\'][^"\']+["\']',
53
+ r'token\s*=\s*["\'][^"\']+["\']'
54
+ ]
55
+ for pattern in secret_patterns:
56
+ if re.search(pattern, content, re.IGNORECASE):
57
+ issues.append({
58
+ 'severity': 'error',
59
+ 'line': current_line,
60
+ 'message': 'Hardcoded secret detected - use environment variables',
61
+ 'rule': 'no-hardcoded-secrets'
62
+ })
63
+
64
+ # Check for bare except clauses
65
+ if re.search(r'except\s*:', content):
66
+ issues.append({
67
+ 'severity': 'error',
68
+ 'line': current_line,
69
+ 'message': 'Bare except clause - specify exception type',
70
+ 'rule': 'no-bare-except'
71
+ })
72
+ elif line.startswith(' ') or (line.startswith('-') and not line.startswith('---')):
73
+ if line.startswith(' '):
74
+ current_line += 1
75
+
76
+ # Check for long functions (simplified - count added lines between def and next def/class)
77
+ function_lines = 0
78
+ in_function = False
79
+ for line in lines:
80
+ if line.startswith('+'):
81
+ content = line[1:]
82
+ if re.match(r'\s*def\s+', content):
83
+ in_function = True
84
+ function_lines = 0
85
+ elif in_function and re.match(r'\s*(def\s+|class\s+)', content):
86
+ if function_lines > 80:
87
+ issues.append({
88
+ 'severity': 'warning',
89
+ 'line': current_line - function_lines,
90
+ 'message': f'Function is {function_lines} lines long - consider breaking it down',
91
+ 'rule': 'function-length'
92
+ })
93
+ in_function = re.match(r'\s*def\s+', content) is not None
94
+ function_lines = 0
95
+ elif in_function:
96
+ function_lines += 1
97
+
98
+ # Check final function if still in one
99
+ if in_function and function_lines > 80:
100
+ issues.append({
101
+ 'severity': 'warning',
102
+ 'line': current_line - function_lines,
103
+ 'message': f'Function is {function_lines} lines long - consider breaking it down',
104
+ 'rule': 'function-length'
105
+ })
106
+
107
+ return issues
src/review_engine.py ADDED
@@ -0,0 +1,83 @@
1
+ """ReviewService — orchestrate diff parsing + analyzers + old regex fallback."""
2
+ import logging
3
+ import os
4
+ from .diff_parser import parse_diff
5
+ from .analyzers.base import Finding
6
+ from .analyzers.ruff import RuffAnalyzer
7
+ from .review import review_diff as legacy_review
8
+
9
+ # 500KB default; override with GITHUB_REVIEW_MAX_DIFF_BYTES env var
10
+ _DEFAULT_MAX_DIFF_BYTES = 500 * 1024
11
+ # GitHub's PR diff API limit is 1MB; keep this stdio server safely below
12
+ # unbounded memory use even if the env var is misconfigured.
13
+ _MAX_DIFF_BYTES_HARD_CAP = 1024 * 1024
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def _get_max_diff_bytes() -> int:
19
+ val = os.environ.get("GITHUB_REVIEW_MAX_DIFF_BYTES", "")
20
+ try:
21
+ max_bytes = int(val) if val else _DEFAULT_MAX_DIFF_BYTES
22
+ except ValueError:
23
+ return _DEFAULT_MAX_DIFF_BYTES
24
+ if max_bytes > _MAX_DIFF_BYTES_HARD_CAP:
25
+ logger.warning(
26
+ "GITHUB_REVIEW_MAX_DIFF_BYTES=%s exceeds hard cap of %s bytes; clamping.",
27
+ max_bytes,
28
+ _MAX_DIFF_BYTES_HARD_CAP,
29
+ )
30
+ return _MAX_DIFF_BYTES_HARD_CAP
31
+ return max_bytes
32
+
33
+
34
+ class ReviewService:
35
+ def __init__(self):
36
+ self.analyzers = []
37
+ try:
38
+ self.analyzers.append(RuffAnalyzer())
39
+ except Exception:
40
+ pass
41
+
42
+ def review(self, diff_text: str) -> list[Finding]:
43
+ # Guard against oversized diffs that could OOM the process
44
+ max_bytes = _get_max_diff_bytes()
45
+ diff_bytes = len(diff_text.encode("utf-8"))
46
+ if diff_bytes > max_bytes:
47
+ return [Finding(
48
+ severity="error",
49
+ file="",
50
+ line=0,
51
+ rule="diff-too-large",
52
+ message=f"Diff too large ({diff_bytes // 1024} KB, limit {max_bytes // 1024} KB). "
53
+ f"Review skipped to avoid OOM.",
54
+ source="review_engine",
55
+ )]
56
+
57
+ findings = []
58
+ changed = parse_diff(diff_text)
59
+
60
+ # Ruff on changed Python files
61
+ for cf in changed:
62
+ if cf.path.endswith(".py"):
63
+ for a in self.analyzers:
64
+ try:
65
+ raw = a.analyze(cf.path)
66
+ # Filter: only changed lines
67
+ findings.extend(f for f in raw if f.line in cf.added_lines)
68
+ except Exception:
69
+ pass
70
+
71
+ # Legacy regex fallback
72
+ legacy_issues = legacy_review(diff_text)
73
+ for li in legacy_issues:
74
+ findings.append(Finding(
75
+ severity=li["severity"],
76
+ file=li.get("file", ""),
77
+ line=li["line"],
78
+ rule=li["rule"],
79
+ message=li["message"],
80
+ source="regex",
81
+ ))
82
+
83
+ return findings
src/tools.py ADDED
@@ -0,0 +1,291 @@
1
+ """MCP tools for GitHub operations — with policy guard & audit logging."""
2
+ from fastmcp import FastMCP
3
+
4
+ from .config import (
5
+ get_github_token, get_github_api_base,
6
+ get_policy_path, get_policy_required,
7
+ get_audit_sink, get_dry_run_enabled,
8
+ )
9
+ from .github_client import GitHubClient
10
+ from .review import review_diff
11
+ from .review_engine import ReviewService
12
+ from .policy import PolicyConfig, resolve_dry_run
13
+ from .audit import AuditLogger
14
+
15
+
16
+ mcp = FastMCP("GitHub MCP Agent Server")
17
+
18
+ # Lazy-init singletons — created on first access
19
+ _policy: PolicyConfig | None = None
20
+ _audit: AuditLogger | None = None
21
+
22
+
23
+ def _get_policy() -> PolicyConfig:
24
+ global _policy
25
+ if _policy is None:
26
+ _policy = PolicyConfig().load(
27
+ path=get_policy_path(),
28
+ required=get_policy_required(),
29
+ )
30
+ return _policy
31
+
32
+
33
+ def _get_audit() -> AuditLogger:
34
+ global _audit
35
+ if _audit is None:
36
+ _audit = AuditLogger(sink=get_audit_sink())
37
+ return _audit
38
+
39
+
40
+ # ── Read tools (no guard needed) ───────────────────────
41
+ @mcp.tool()
42
+ def search_code(query: str, repo: str = None) -> str:
43
+ """Search for code in GitHub repositories."""
44
+ client = GitHubClient(get_github_token(), get_github_api_base())
45
+ result = client.search_code(query, repo)
46
+
47
+ if isinstance(result, dict) and "error" in result:
48
+ return f"Error: {result['error']}"
49
+
50
+ items = result.get("items", [])
51
+ if not items:
52
+ return "No results found"
53
+
54
+ output = []
55
+ for item in items[:10]:
56
+ output.append(f"• {item['path']} in {item['repo']}\n {item['url']}")
57
+
58
+ return f"Found {len(items)} results:\n" + "\n\n".join(output)
59
+
60
+
61
+ @mcp.tool()
62
+ def list_issues(repo: str, state: str = "open") -> str:
63
+ """List issues in a GitHub repository."""
64
+ client = GitHubClient(get_github_token(), get_github_api_base())
65
+ result = client.list_issues(repo, state)
66
+
67
+ if "error" in result:
68
+ return f"Error: {result['error']}"
69
+
70
+ if not result:
71
+ return f"No {state} issues found in {repo}"
72
+
73
+ output = []
74
+ for issue in result[:10]:
75
+ output.append(f"#{issue['number']}: {issue['title']}\n {issue['html_url']}")
76
+
77
+ return f"Issues in {repo} ({state}):\n" + "\n\n".join(output)
78
+
79
+
80
+ @mcp.tool()
81
+ def get_pr_diff(repo: str, pr_number: int) -> str:
82
+ """Get the diff for a pull request."""
83
+ client = GitHubClient(get_github_token(), get_github_api_base())
84
+ result = client.get_pr_diff(repo, pr_number)
85
+
86
+ if "error" in result:
87
+ return f"Error: {result['error']}"
88
+
89
+ return f"PR #{pr_number} diff:\n\n{result['diff']}"
90
+
91
+
92
+ @mcp.tool()
93
+ def review_pr_diff(repo: str, pr_number: int) -> str:
94
+ """Review a PR diff using ruff + legacy regex rules. Returns structured findings."""
95
+ client = GitHubClient(get_github_token(), get_github_api_base())
96
+ result = client.get_pr_diff(repo, pr_number)
97
+
98
+ if "error" in result:
99
+ return f"Error: {result['error']}"
100
+
101
+ # Use new review engine (ruff + regex fallback)
102
+ try:
103
+ review = ReviewService()
104
+ findings = review.review(result["diff"])
105
+ except Exception:
106
+ findings = []
107
+
108
+ if not findings:
109
+ return f"PR #{pr_number} looks good - no issues found!"
110
+
111
+ output = [f"Code review for PR #{pr_number} ({len(findings)} issues):"]
112
+ for f in findings:
113
+ icon = "❌" if f.severity == "error" else "⚠️"
114
+ output.append(
115
+ f"{icon} {f.file}:{f.line} — {f.message} "
116
+ f"[{f.rule}/{f.source}]"
117
+ )
118
+
119
+ return "\n".join(output)
120
+
121
+
122
+ @mcp.tool()
123
+ def comment_pr_review(repo: str, pr_number: int) -> str:
124
+ """Fetch PR diff, run code review, and post findings as review comments."""
125
+ client = GitHubClient(get_github_token(), get_github_api_base())
126
+ diff_result = client.get_pr_diff(repo, pr_number)
127
+
128
+ if "error" in diff_result:
129
+ return f"Error: {diff_result['error']}"
130
+
131
+ try:
132
+ review = ReviewService()
133
+ findings = review.review(diff_result["diff"])
134
+ except Exception:
135
+ findings = []
136
+
137
+ if not findings:
138
+ return f"PR #{pr_number} looks good - no issues found!"
139
+
140
+ # Limit to top 10 findings to avoid spam
141
+ posted = 0
142
+ for f in findings[:10]:
143
+ body = f"{f.message}\n\nRule: `{f.rule}` | Source: {f.source}"
144
+ r = client.create_review_comment(repo, pr_number, body, path=f.file, line=f.line)
145
+ if "error" not in r:
146
+ posted += 1
147
+
148
+ return (
149
+ f"Posted {posted} review comments on PR #{pr_number} "
150
+ f"({len(findings)} total issues found, top 10 posted)"
151
+ )
152
+
153
+
154
+ # ── Write tools (guarded) ──────────────────────────────
155
+ @mcp.tool()
156
+ def create_issue(repo: str, title: str, body: str, dry_run: bool = False) -> str:
157
+ """Create a new issue in a GitHub repository.
158
+
159
+ Args:
160
+ repo: Repository in 'owner/repo' format.
161
+ title: Issue title.
162
+ body: Issue body text.
163
+ dry_run: If True, preview the operation without executing.
164
+ """
165
+ dry = resolve_dry_run(dry_run, get_dry_run_enabled())
166
+ policy = _get_policy()
167
+ audit = _get_audit()
168
+
169
+ # Guard: repo allowlist check
170
+ repo_decision = policy.check_repo(repo)
171
+ if repo_decision.action == "deny":
172
+ audit.log(
173
+ tool="create_issue", action="issue.create", repo=repo,
174
+ dry_run=dry, policy_decision="deny",
175
+ policy_rule=repo_decision.matched_rule,
176
+ request_body={"title": title, "body": body},
177
+ error=repo_decision.reason,
178
+ )
179
+ return f"❌ Policy Denied: {repo_decision.reason}"
180
+
181
+ if dry:
182
+ audit.log(
183
+ tool="create_issue", action="issue.create", repo=repo,
184
+ dry_run=True, policy_decision="allow",
185
+ policy_rule=repo_decision.matched_rule,
186
+ request_body={"title": title, "body": body},
187
+ )
188
+ return (
189
+ f"[DRY RUN] Would create issue in {repo}:\n"
190
+ f" Title: {title}\n"
191
+ f" Body: {body[:120]}{'...' if len(body) > 120 else ''}\n"
192
+ f" Policy: {repo_decision.reason}"
193
+ )
194
+
195
+ client = GitHubClient(get_github_token(), get_github_api_base())
196
+ result = client.create_issue(repo, title, body)
197
+
198
+ if "error" in result:
199
+ audit.log(
200
+ tool="create_issue", action="issue.create", repo=repo,
201
+ policy_decision="allow", policy_rule=repo_decision.matched_rule,
202
+ request_body={"title": title, "body": body},
203
+ error=result["error"],
204
+ )
205
+ return f"Error: {result['error']}"
206
+
207
+ audit.log(
208
+ tool="create_issue", action="issue.create", repo=repo,
209
+ policy_decision="allow", policy_rule=repo_decision.matched_rule,
210
+ request_body={"title": title, "body": body},
211
+ response=result,
212
+ )
213
+ return f"Issue created: #{result['number']}: {result['title']}\n{result['html_url']}"
214
+
215
+
216
+ @mcp.tool()
217
+ def create_pr(repo: str, title: str, body: str, head: str, base: str,
218
+ dry_run: bool = False) -> str:
219
+ """Create a new pull request.
220
+
221
+ Args:
222
+ repo: Repository in 'owner/repo' format.
223
+ title: PR title.
224
+ body: PR description.
225
+ head: Source branch name.
226
+ base: Target branch name (e.g. 'main').
227
+ dry_run: If True, preview the operation without executing.
228
+ """
229
+ dry = resolve_dry_run(dry_run, get_dry_run_enabled())
230
+ policy = _get_policy()
231
+ audit = _get_audit()
232
+
233
+ # Guard: repo allowlist
234
+ repo_decision = policy.check_repo(repo)
235
+ if repo_decision.action == "deny":
236
+ audit.log(
237
+ tool="create_pr", action="pull_request.create", repo=repo,
238
+ dry_run=dry, policy_decision="deny",
239
+ policy_rule=repo_decision.matched_rule,
240
+ request_body={"title": title, "head": head, "base": base},
241
+ error=repo_decision.reason,
242
+ )
243
+ return f"❌ Policy Denied: {repo_decision.reason}"
244
+
245
+ # Guard: branch protection
246
+ branch_decision = policy.check_branch_for_pr(base)
247
+ if branch_decision.action == "deny":
248
+ audit.log(
249
+ tool="create_pr", action="pull_request.create", repo=repo,
250
+ dry_run=dry, policy_decision="deny",
251
+ policy_rule=branch_decision.matched_rule,
252
+ request_body={"title": title, "head": head, "base": base},
253
+ error=branch_decision.reason,
254
+ )
255
+ return f"❌ Policy Denied: {branch_decision.reason}"
256
+
257
+ if dry:
258
+ audit.log(
259
+ tool="create_pr", action="pull_request.create", repo=repo,
260
+ dry_run=True, policy_decision="allow",
261
+ policy_rule=f"{repo_decision.matched_rule}, {branch_decision.matched_rule}",
262
+ request_body={"title": title, "head": head, "base": base},
263
+ )
264
+ return (
265
+ f"[DRY RUN] Would create PR in {repo}:\n"
266
+ f" Title: {title}\n"
267
+ f" Head: {head} → Base: {base}\n"
268
+ f" Policy: {repo_decision.reason} · {branch_decision.reason}"
269
+ )
270
+
271
+ client = GitHubClient(get_github_token(), get_github_api_base())
272
+ result = client.create_pr(repo, title, body, head, base)
273
+
274
+ if "error" in result:
275
+ audit.log(
276
+ tool="create_pr", action="pull_request.create", repo=repo,
277
+ policy_decision="allow",
278
+ policy_rule=f"{repo_decision.matched_rule}, {branch_decision.matched_rule}",
279
+ request_body={"title": title, "head": head, "base": base},
280
+ error=result["error"],
281
+ )
282
+ return f"Error: {result['error']}"
283
+
284
+ audit.log(
285
+ tool="create_pr", action="pull_request.create", repo=repo,
286
+ policy_decision="allow",
287
+ policy_rule=f"{repo_decision.matched_rule}, {branch_decision.matched_rule}",
288
+ request_body={"title": title, "head": head, "base": base},
289
+ response=result,
290
+ )
291
+ return f"PR created: #{result['number']}: {result['title']}\n{result['html_url']}"
tests/__init__.py ADDED
File without changes