cicada-mcp 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cicada-mcp might be problematic. Click here for more details.

Files changed (48) hide show
  1. cicada/__init__.py +30 -0
  2. cicada/clean.py +297 -0
  3. cicada/command_logger.py +293 -0
  4. cicada/dead_code_analyzer.py +282 -0
  5. cicada/extractors/__init__.py +36 -0
  6. cicada/extractors/base.py +66 -0
  7. cicada/extractors/call.py +176 -0
  8. cicada/extractors/dependency.py +361 -0
  9. cicada/extractors/doc.py +179 -0
  10. cicada/extractors/function.py +246 -0
  11. cicada/extractors/module.py +123 -0
  12. cicada/extractors/spec.py +151 -0
  13. cicada/find_dead_code.py +270 -0
  14. cicada/formatter.py +918 -0
  15. cicada/git_helper.py +646 -0
  16. cicada/indexer.py +629 -0
  17. cicada/install.py +724 -0
  18. cicada/keyword_extractor.py +364 -0
  19. cicada/keyword_search.py +553 -0
  20. cicada/lightweight_keyword_extractor.py +298 -0
  21. cicada/mcp_server.py +1559 -0
  22. cicada/mcp_tools.py +291 -0
  23. cicada/parser.py +124 -0
  24. cicada/pr_finder.py +435 -0
  25. cicada/pr_indexer/__init__.py +20 -0
  26. cicada/pr_indexer/cli.py +62 -0
  27. cicada/pr_indexer/github_api_client.py +431 -0
  28. cicada/pr_indexer/indexer.py +297 -0
  29. cicada/pr_indexer/line_mapper.py +209 -0
  30. cicada/pr_indexer/pr_index_builder.py +253 -0
  31. cicada/setup.py +339 -0
  32. cicada/utils/__init__.py +52 -0
  33. cicada/utils/call_site_formatter.py +95 -0
  34. cicada/utils/function_grouper.py +57 -0
  35. cicada/utils/hash_utils.py +173 -0
  36. cicada/utils/index_utils.py +290 -0
  37. cicada/utils/path_utils.py +240 -0
  38. cicada/utils/signature_builder.py +106 -0
  39. cicada/utils/storage.py +111 -0
  40. cicada/utils/subprocess_runner.py +182 -0
  41. cicada/utils/text_utils.py +90 -0
  42. cicada/version_check.py +116 -0
  43. cicada_mcp-0.1.4.dist-info/METADATA +619 -0
  44. cicada_mcp-0.1.4.dist-info/RECORD +48 -0
  45. cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
  46. cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
  47. cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
  48. cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0
cicada/pr_finder.py ADDED
@@ -0,0 +1,435 @@
1
+ """
2
+ PR Finder - Find the Pull Request that introduced a specific line of code.
3
+
4
+ Uses git blame to find the commit and GitHub API to find the associated PR.
5
+ """
6
+
7
+ import json
8
+ import subprocess
9
+ import sys
10
+ from pathlib import Path
11
+ from typing import Optional, Dict, Any
12
+
13
+ from cicada.utils import load_index
14
+
15
+
16
+ class PRFinder:
17
+ """Find the PR that introduced a specific line of code."""
18
+
19
+ def __init__(
20
+ self,
21
+ repo_path: str = ".",
22
+ use_index: bool = True,
23
+ index_path: str = ".cicada/pr_index.json",
24
+ verbose: bool = False,
25
+ ):
26
+ """
27
+ Initialize the PR finder.
28
+
29
+ Args:
30
+ repo_path: Path to the git repository (defaults to current directory)
31
+ use_index: If True, use cached index for PR lookups (default: True)
32
+ index_path: Path to the PR index file (default: .cicada/pr_index.json)
33
+ verbose: If True, print status messages (default: False)
34
+ """
35
+ self.repo_path = Path(repo_path).resolve()
36
+ self.use_index = use_index
37
+ self.index_path = index_path
38
+ self.index: dict[str, Any] | None = None
39
+ self.verbose = verbose
40
+
41
+ self._validate_git_repo()
42
+
43
+ # Load index if enabled
44
+ if self.use_index:
45
+ self.index = self._load_index()
46
+ if self.index and self.verbose:
47
+ print(f"Loaded PR index with {self.index['metadata']['total_prs']} PRs")
48
+ elif not self.index:
49
+ # Always show warning (even in non-verbose mode) with color
50
+ print(
51
+ f"\033[33m⚠️ No PR index found - using slower network lookups. Create index: python cicada/pr_indexer.py\033[0m",
52
+ file=sys.stderr,
53
+ )
54
+
55
+ # Only validate gh CLI if we might need it (no index or index disabled)
56
+ if not self.use_index or not self.index:
57
+ self._validate_gh_cli()
58
+
59
+ def _validate_git_repo(self):
60
+ """Validate that the path is a git repository."""
61
+ git_dir = self.repo_path / ".git"
62
+ if not git_dir.exists():
63
+ raise ValueError(f"Not a git repository: {self.repo_path}")
64
+
65
+ def _validate_gh_cli(self):
66
+ """Validate that GitHub CLI is installed and available."""
67
+ try:
68
+ _ = subprocess.run(
69
+ ["gh", "--version"], capture_output=True, check=True, cwd=self.repo_path
70
+ )
71
+ except (subprocess.CalledProcessError, FileNotFoundError):
72
+ raise RuntimeError(
73
+ "GitHub CLI (gh) is not installed or not available in PATH. "
74
+ "Install it from https://cli.github.com/"
75
+ )
76
+
77
+ def _load_index(self) -> Optional[Dict[str, Any]]:
78
+ """
79
+ Load the PR index from file.
80
+
81
+ Returns:
82
+ Index dictionary or None if file doesn't exist or can't be loaded
83
+ """
84
+ # Try absolute path first
85
+ index_file = Path(self.index_path)
86
+
87
+ # If relative path, try relative to repo root
88
+ if not index_file.is_absolute():
89
+ index_file = self.repo_path / self.index_path
90
+
91
+ return load_index(index_file, verbose=self.verbose, raise_on_error=False)
92
+
93
+ def _lookup_pr_in_index(self, commit_sha: str) -> Optional[Dict[str, Any]]:
94
+ """
95
+ Look up PR information from the index.
96
+
97
+ Args:
98
+ commit_sha: Git commit SHA
99
+
100
+ Returns:
101
+ PR information dictionary or None if not found
102
+ """
103
+ if not self.index:
104
+ return None
105
+
106
+ # Look up commit in the commit_to_pr mapping
107
+ pr_number = self.index.get("commit_to_pr", {}).get(commit_sha)
108
+
109
+ if pr_number is None:
110
+ return None
111
+
112
+ # Get PR details
113
+ pr = self.index.get("prs", {}).get(str(pr_number))
114
+
115
+ return pr
116
+
117
+ def _run_git_blame(
118
+ self, file_path: str, line_number: int
119
+ ) -> Optional[Dict[str, str | None]]:
120
+ """
121
+ Run git blame to find the commit that introduced a specific line.
122
+
123
+ Args:
124
+ file_path: Relative path to the file from repo root
125
+ line_number: Line number (1-indexed)
126
+
127
+ Returns:
128
+ Dictionary with commit SHA and author information, or None if not found
129
+ """
130
+ try:
131
+ result = subprocess.run(
132
+ [
133
+ "git",
134
+ "blame",
135
+ "-L",
136
+ f"{line_number},{line_number}",
137
+ "--porcelain",
138
+ file_path,
139
+ ],
140
+ capture_output=True,
141
+ text=True,
142
+ check=True,
143
+ cwd=self.repo_path,
144
+ )
145
+
146
+ # Parse porcelain output
147
+ lines = result.stdout.split("\n")
148
+ commit_sha = lines[0].split()[0]
149
+
150
+ # Extract author information from porcelain output
151
+ author_name = None
152
+ author_email = None
153
+
154
+ for line in lines:
155
+ if line.startswith("author "):
156
+ author_name = line[7:] # Skip 'author '
157
+ elif line.startswith("author-mail "):
158
+ author_email = line[12:].strip(
159
+ "<>"
160
+ ) # Skip 'author-mail ' and remove < >
161
+
162
+ return {
163
+ "commit": commit_sha,
164
+ "author_name": author_name,
165
+ "author_email": author_email,
166
+ }
167
+
168
+ except subprocess.CalledProcessError as e:
169
+ raise RuntimeError(f"git blame failed: {e.stderr}")
170
+
171
+ def _get_repo_info(self) -> Optional[tuple[str, str]]:
172
+ """
173
+ Get the repository owner and name from git remote.
174
+
175
+ Returns:
176
+ Tuple of (owner, repo_name), or None if not a GitHub repository
177
+ """
178
+ try:
179
+ result = subprocess.run(
180
+ [
181
+ "gh",
182
+ "repo",
183
+ "view",
184
+ "--json",
185
+ "nameWithOwner",
186
+ "-q",
187
+ ".nameWithOwner",
188
+ ],
189
+ capture_output=True,
190
+ text=True,
191
+ check=True,
192
+ cwd=self.repo_path,
193
+ )
194
+
195
+ name_with_owner = result.stdout.strip()
196
+ if not name_with_owner or name_with_owner == "null":
197
+ return None
198
+
199
+ owner, repo_name = name_with_owner.split("/")
200
+ return owner, repo_name
201
+
202
+ except subprocess.CalledProcessError:
203
+ # Not a GitHub repository or no remote configured
204
+ return None
205
+
206
+ def _find_pr_for_commit(self, commit_sha: str) -> Optional[Dict[str, Any]]:
207
+ """
208
+ Find the PR that introduced a specific commit.
209
+
210
+ Args:
211
+ commit_sha: Git commit SHA
212
+
213
+ Returns:
214
+ Dictionary containing PR information, or None if no PR found
215
+ """
216
+ try:
217
+ repo_info = self._get_repo_info()
218
+ if repo_info is None:
219
+ # Not a GitHub repository
220
+ return None
221
+
222
+ owner, repo_name = repo_info
223
+
224
+ # Query GitHub API for PRs associated with the commit
225
+ result = subprocess.run(
226
+ ["gh", "api", f"repos/{owner}/{repo_name}/commits/{commit_sha}/pulls"],
227
+ capture_output=True,
228
+ text=True,
229
+ check=True,
230
+ cwd=self.repo_path,
231
+ )
232
+
233
+ prs = json.loads(result.stdout)
234
+
235
+ if not prs:
236
+ return None
237
+
238
+ # Return the first PR (usually there's only one)
239
+ pr = prs[0]
240
+ return {
241
+ "number": pr["number"],
242
+ "title": pr["title"],
243
+ "url": pr["html_url"],
244
+ "state": pr["state"],
245
+ "merged": pr.get("merged_at") is not None,
246
+ "author": pr["user"]["login"],
247
+ "created_at": pr["created_at"],
248
+ "merged_at": pr.get("merged_at"),
249
+ }
250
+
251
+ except subprocess.CalledProcessError as e:
252
+ # Commit might not be associated with a PR
253
+ return None
254
+ except (json.JSONDecodeError, KeyError) as e:
255
+ raise RuntimeError(f"Failed to parse PR information: {e}")
256
+
257
+ def find_pr_for_line(self, file_path: str, line_number: int) -> Dict[str, Any]:
258
+ """
259
+ Find the PR that introduced a specific line of code.
260
+
261
+ Args:
262
+ file_path: Path to the file (relative to repo root or absolute)
263
+ line_number: Line number (1-indexed)
264
+
265
+ Returns:
266
+ Dictionary containing:
267
+ - commit: The commit SHA
268
+ - author_name: The commit author's name
269
+ - author_email: The commit author's email
270
+ - file_path: The file path
271
+ - line_number: The line number
272
+ - pr: PR information (or None if not found)
273
+ """
274
+ # Convert to relative path from repo root
275
+ file_path_obj = Path(file_path)
276
+ if file_path_obj.is_absolute():
277
+ file_path_obj = file_path_obj.relative_to(self.repo_path)
278
+
279
+ file_path_str = str(file_path_obj)
280
+
281
+ # Get commit and author info from git blame
282
+ blame_info = self._run_git_blame(file_path_str, line_number)
283
+
284
+ if not blame_info:
285
+ return {
286
+ "file_path": file_path_str,
287
+ "line_number": line_number,
288
+ "commit": None,
289
+ "author_name": None,
290
+ "author_email": None,
291
+ "pr": None,
292
+ "error": "Could not find commit for this line",
293
+ }
294
+
295
+ # Find PR for the commit - check index first, then network
296
+ pr_info = None
297
+ commit_sha = blame_info["commit"]
298
+ assert commit_sha is not None
299
+
300
+ if self.use_index and self.index:
301
+ # Try index lookup first (fast, no network)
302
+ pr_info = self._lookup_pr_in_index(commit_sha)
303
+
304
+ # Fall back to network lookup if not found in index
305
+ if pr_info is None and (not self.use_index or not self.index):
306
+ pr_info = self._find_pr_for_commit(commit_sha)
307
+
308
+ return {
309
+ "file_path": file_path_str,
310
+ "line_number": line_number,
311
+ "commit": blame_info["commit"],
312
+ "author_name": blame_info["author_name"],
313
+ "author_email": blame_info["author_email"],
314
+ "pr": pr_info,
315
+ }
316
+
317
+ def format_result(self, result: Dict[str, Any], output_format: str = "text") -> str:
318
+ """
319
+ Format the result for display.
320
+
321
+ Args:
322
+ result: Result dictionary from find_pr_for_line
323
+ output_format: Output format ('text', 'json', or 'markdown')
324
+
325
+ Returns:
326
+ Formatted string
327
+ """
328
+ if output_format == "json":
329
+ return json.dumps(result, indent=2)
330
+
331
+ if result.get("error"):
332
+ return f"Error: {result['error']}"
333
+
334
+ pr = result.get("pr")
335
+ commit = result.get("commit")
336
+ author_name = result.get("author_name")
337
+ author_email = result.get("author_email")
338
+
339
+ # Use short commit SHA for display (first 7 characters)
340
+ short_commit = commit[:7] if commit and len(commit) >= 7 else commit
341
+
342
+ # Format author string
343
+ if author_name and author_email:
344
+ author_str = f"{author_name} <{author_email}>"
345
+ elif author_name:
346
+ author_str = author_name
347
+ elif author_email:
348
+ author_str = author_email
349
+ else:
350
+ author_str = "Unknown"
351
+
352
+ if output_format == "markdown":
353
+ output = [
354
+ f"## Line {result['line_number']} in {result['file_path']}",
355
+ f"",
356
+ f"**Commit:** `{short_commit}` ",
357
+ f"**Author:** {author_str}",
358
+ ]
359
+
360
+ if pr:
361
+ pr_status = "merged" if pr["merged"] else pr["state"]
362
+ output.append(
363
+ f"**PR:** [#{pr['number']}]({pr['url']}) - {pr['title']} (@{pr['author']}, {pr_status})"
364
+ )
365
+ else:
366
+ note = result.get("note", "None")
367
+ output.append(f"**PR:** {note}")
368
+
369
+ return "\n".join(output)
370
+
371
+ else: # text format
372
+ output = [
373
+ f"File: {result['file_path']}:{result['line_number']}",
374
+ f"Commit: {short_commit}",
375
+ f"Author: {author_str}",
376
+ ]
377
+
378
+ if pr:
379
+ pr_status = "merged" if pr["merged"] else pr["state"]
380
+ output.append(
381
+ f"PR: #{pr['number']} - {pr['title']} (@{pr['author']}, {pr_status}) - {pr['url']}"
382
+ )
383
+ else:
384
+ note = result.get("note", "None")
385
+ output.append(f"PR: {note}")
386
+
387
+ return "\n".join(output)
388
+
389
+
390
+ def main():
391
+ """CLI entry point for pr_finder."""
392
+ import argparse
393
+
394
+ parser = argparse.ArgumentParser(
395
+ description="Find the PR that introduced a specific line of code"
396
+ )
397
+ _ = parser.add_argument("file", help="Path to the file")
398
+ _ = parser.add_argument("line", type=int, help="Line number (1-indexed)")
399
+ _ = parser.add_argument(
400
+ "--format",
401
+ choices=["text", "json", "markdown"],
402
+ default="text",
403
+ help="Output format (default: text)",
404
+ )
405
+ _ = parser.add_argument(
406
+ "--no-index",
407
+ action="store_true",
408
+ help="Disable index lookup and use network instead (slower)",
409
+ )
410
+ _ = parser.add_argument(
411
+ "--index-path",
412
+ default=".cicada/pr_index.json",
413
+ help="Path to PR index file (default: .cicada/pr_index.json)",
414
+ )
415
+
416
+ args = parser.parse_args()
417
+
418
+ try:
419
+ finder = PRFinder(
420
+ repo_path=".",
421
+ use_index=not args.no_index,
422
+ index_path=args.index_path,
423
+ verbose=True,
424
+ )
425
+ result = finder.find_pr_for_line(args.file, args.line)
426
+ output = finder.format_result(result, args.format)
427
+ print(output)
428
+
429
+ except Exception as e:
430
+ print(f"Error: {e}", file=sys.stderr)
431
+ sys.exit(1)
432
+
433
+
434
+ if __name__ == "__main__":
435
+ main()
@@ -0,0 +1,20 @@
1
+ """
2
+ PR Indexer package.
3
+
4
+ This package contains classes for indexing GitHub pull requests,
5
+ separated by responsibility for better maintainability.
6
+ """
7
+
8
+ from .github_api_client import GitHubAPIClient
9
+ from .pr_index_builder import PRIndexBuilder
10
+ from .line_mapper import LineMapper
11
+ from .indexer import PRIndexer
12
+ from .cli import main
13
+
14
+ __all__ = [
15
+ "GitHubAPIClient",
16
+ "PRIndexBuilder",
17
+ "LineMapper",
18
+ "PRIndexer",
19
+ "main",
20
+ ]
@@ -0,0 +1,62 @@
1
+ """CLI entry point for the PR indexer."""
2
+
3
+ import sys
4
+
5
+ from .indexer import PRIndexer
6
+
7
+
8
+ def main():
9
+ """CLI entry point for pr_indexer."""
10
+ import argparse
11
+ from cicada.version_check import check_for_updates
12
+
13
+ # Check for updates (non-blocking, fails silently)
14
+ check_for_updates()
15
+
16
+ parser = argparse.ArgumentParser(
17
+ description="Index GitHub pull requests for fast offline lookup"
18
+ )
19
+ _ = parser.add_argument(
20
+ "repo",
21
+ nargs="?",
22
+ default=".",
23
+ help="Path to git repository (default: current directory)",
24
+ )
25
+ _ = parser.add_argument(
26
+ "--output",
27
+ default=".cicada/pr_index.json",
28
+ help="Output path for the index file (default: .cicada/pr_index.json)",
29
+ )
30
+ _ = parser.add_argument(
31
+ "--clean",
32
+ action="store_true",
33
+ help="Clean and rebuild the entire index from scratch (default: incremental update)",
34
+ )
35
+
36
+ args = parser.parse_args()
37
+
38
+ try:
39
+ indexer = PRIndexer(repo_path=args.repo)
40
+ # Incremental by default, unless --clean is specified
41
+ _ = indexer.index_repository(
42
+ output_path=args.output, incremental=not args.clean
43
+ )
44
+
45
+ print(
46
+ "\n✅ Indexing complete! You can now use the MCP tools for PR history lookups."
47
+ )
48
+
49
+ except KeyboardInterrupt:
50
+ print("\n\n⚠️ Indexing interrupted by user.")
51
+ print(
52
+ "Partial index may have been saved. Run again to continue (incremental by default)."
53
+ )
54
+ sys.exit(130) # Standard exit code for SIGINT
55
+
56
+ except Exception as e:
57
+ print(f"Error: {e}", file=sys.stderr)
58
+ sys.exit(1)
59
+
60
+
61
+ if __name__ == "__main__":
62
+ main()