cicada-mcp 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cicada-mcp might be problematic. Click here for more details.

Files changed (48) hide show
  1. cicada/__init__.py +30 -0
  2. cicada/clean.py +297 -0
  3. cicada/command_logger.py +293 -0
  4. cicada/dead_code_analyzer.py +282 -0
  5. cicada/extractors/__init__.py +36 -0
  6. cicada/extractors/base.py +66 -0
  7. cicada/extractors/call.py +176 -0
  8. cicada/extractors/dependency.py +361 -0
  9. cicada/extractors/doc.py +179 -0
  10. cicada/extractors/function.py +246 -0
  11. cicada/extractors/module.py +123 -0
  12. cicada/extractors/spec.py +151 -0
  13. cicada/find_dead_code.py +270 -0
  14. cicada/formatter.py +918 -0
  15. cicada/git_helper.py +646 -0
  16. cicada/indexer.py +629 -0
  17. cicada/install.py +724 -0
  18. cicada/keyword_extractor.py +364 -0
  19. cicada/keyword_search.py +553 -0
  20. cicada/lightweight_keyword_extractor.py +298 -0
  21. cicada/mcp_server.py +1559 -0
  22. cicada/mcp_tools.py +291 -0
  23. cicada/parser.py +124 -0
  24. cicada/pr_finder.py +435 -0
  25. cicada/pr_indexer/__init__.py +20 -0
  26. cicada/pr_indexer/cli.py +62 -0
  27. cicada/pr_indexer/github_api_client.py +431 -0
  28. cicada/pr_indexer/indexer.py +297 -0
  29. cicada/pr_indexer/line_mapper.py +209 -0
  30. cicada/pr_indexer/pr_index_builder.py +253 -0
  31. cicada/setup.py +339 -0
  32. cicada/utils/__init__.py +52 -0
  33. cicada/utils/call_site_formatter.py +95 -0
  34. cicada/utils/function_grouper.py +57 -0
  35. cicada/utils/hash_utils.py +173 -0
  36. cicada/utils/index_utils.py +290 -0
  37. cicada/utils/path_utils.py +240 -0
  38. cicada/utils/signature_builder.py +106 -0
  39. cicada/utils/storage.py +111 -0
  40. cicada/utils/subprocess_runner.py +182 -0
  41. cicada/utils/text_utils.py +90 -0
  42. cicada/version_check.py +116 -0
  43. cicada_mcp-0.1.4.dist-info/METADATA +619 -0
  44. cicada_mcp-0.1.4.dist-info/RECORD +48 -0
  45. cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
  46. cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
  47. cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
  48. cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,431 @@
1
+ """
2
+ Author: Cursor(Auto)
3
+
4
+ GitHub API Client for PR Indexer.
5
+
6
+ This module handles all interactions with the GitHub API (both REST and GraphQL),
7
+ separating API concerns from indexing logic.
8
+ """
9
+
10
+ import json
11
+ import subprocess
12
+ from pathlib import Path
13
+ from typing import Dict, List, Any, Tuple
14
+
15
+ from cicada.utils import SubprocessRunner
16
+
17
+
18
+ class GitHubAPIClient:
19
+ """
20
+ Handles GitHub API interactions for PR indexing.
21
+
22
+ This class encapsulates all GitHub CLI and API calls, providing
23
+ a clean interface for fetching PR data.
24
+ """
25
+
26
+ def __init__(self, repo_path: Path, repo_owner: str, repo_name: str):
27
+ """
28
+ Initialize the GitHub API client.
29
+
30
+ Args:
31
+ repo_path: Path to the git repository
32
+ repo_owner: GitHub repository owner
33
+ repo_name: GitHub repository name
34
+ """
35
+ self.repo_path = repo_path
36
+ self.repo_owner = repo_owner
37
+ self.repo_name = repo_name
38
+ self.runner = SubprocessRunner(cwd=repo_path)
39
+
40
+ def validate_gh_cli(self) -> None:
41
+ """
42
+ Validate that GitHub CLI is installed and available.
43
+
44
+ Raises:
45
+ RuntimeError: If gh CLI is not available
46
+ """
47
+ try:
48
+ self.runner.run_gh_command("--version")
49
+ except (subprocess.CalledProcessError, FileNotFoundError):
50
+ raise RuntimeError(
51
+ "GitHub CLI (gh) is not installed or not available in PATH. "
52
+ "Install it from https://cli.github.com/"
53
+ )
54
+
55
+ def get_repo_info(self) -> Tuple[str, str]:
56
+ """
57
+ Get repository owner and name from git remote.
58
+
59
+ Returns:
60
+ Tuple of (owner, repo_name)
61
+
62
+ Raises:
63
+ RuntimeError: If not a GitHub repository
64
+ """
65
+ try:
66
+ result = self.runner.run_gh_command(
67
+ ["repo", "view", "--json", "nameWithOwner", "-q", ".nameWithOwner"]
68
+ )
69
+
70
+ name_with_owner = result.stdout.strip()
71
+ if not name_with_owner or name_with_owner == "null":
72
+ raise RuntimeError("Not a GitHub repository or no remote configured")
73
+
74
+ parts = name_with_owner.split("/")
75
+ if len(parts) != 2:
76
+ raise RuntimeError(
77
+ f"Invalid repository format. Expected owner/repo, got: {name_with_owner}"
78
+ )
79
+
80
+ owner, repo_name = parts
81
+ return owner, repo_name
82
+
83
+ except subprocess.CalledProcessError:
84
+ raise RuntimeError("Not a GitHub repository or no remote configured")
85
+
86
+ def fetch_pr_list(self, state: str = "all", limit: int = 10000) -> List[int]:
87
+ """
88
+ Fetch list of PR numbers.
89
+
90
+ Args:
91
+ state: PR state filter ('all', 'open', 'closed', 'merged')
92
+ limit: Maximum number of PRs to fetch
93
+
94
+ Returns:
95
+ List of PR numbers
96
+
97
+ Raises:
98
+ RuntimeError: If fetching fails
99
+ """
100
+ try:
101
+ result = self.runner.run_gh_command(
102
+ [
103
+ "pr",
104
+ "list",
105
+ "--state",
106
+ state,
107
+ "--json",
108
+ "number",
109
+ "--limit",
110
+ str(limit),
111
+ ]
112
+ )
113
+
114
+ pr_list = json.loads(result.stdout)
115
+ return [pr["number"] for pr in pr_list]
116
+
117
+ except subprocess.CalledProcessError as e:
118
+ raise RuntimeError(f"Failed to fetch PR list: {e.stderr}")
119
+ except json.JSONDecodeError as e:
120
+ raise RuntimeError(f"Failed to parse PR list: {e}")
121
+
122
+ def fetch_prs_batch_graphql(self, pr_numbers: List[int]) -> List[Dict[str, Any]]:
123
+ """
124
+ Fetch detailed PR information for a batch using GraphQL.
125
+
126
+ This is much more efficient than making individual REST calls.
127
+
128
+ Args:
129
+ pr_numbers: List of PR numbers to fetch (max 10 recommended)
130
+
131
+ Returns:
132
+ List of detailed PR dictionaries
133
+
134
+ Raises:
135
+ RuntimeError: If GraphQL query fails
136
+ """
137
+ if not pr_numbers:
138
+ return []
139
+
140
+ # Build GraphQL query for batch fetching
141
+ pr_queries: list[str] = []
142
+ for i, num in enumerate(pr_numbers):
143
+ pr_queries.append(
144
+ f"""
145
+ pr{i}: pullRequest(number: {num}) {{
146
+ number
147
+ title
148
+ url
149
+ state
150
+ mergedAt
151
+ bodyText
152
+ author {{ login }}
153
+ commits(first: 250) {{
154
+ nodes {{ commit {{ oid }} }}
155
+ }}
156
+ files(first: 100) {{
157
+ nodes {{ path }}
158
+ }}
159
+ reviewThreads(first: 100) {{
160
+ nodes {{
161
+ isResolved
162
+ comments(first: 10) {{
163
+ nodes {{
164
+ id
165
+ body
166
+ createdAt
167
+ author {{ login }}
168
+ path
169
+ position
170
+ originalPosition
171
+ line
172
+ originalLine
173
+ diffHunk
174
+ commit {{ oid }}
175
+ }}
176
+ }}
177
+ }}
178
+ }}
179
+ }}
180
+ """
181
+ )
182
+
183
+ query = f"""
184
+ query {{
185
+ repository(owner: "{self.repo_owner}", name: "{self.repo_name}") {{
186
+ {' '.join(pr_queries)}
187
+ }}
188
+ }}
189
+ """
190
+
191
+ try:
192
+ result = self.runner.run_gh_command(
193
+ ["api", "graphql", "-f", f"query={query}"]
194
+ )
195
+
196
+ data = json.loads(result.stdout)
197
+ repo_data = data.get("data", {}).get("repository", {})
198
+
199
+ return self._parse_graphql_response(repo_data, len(pr_numbers))
200
+
201
+ except subprocess.CalledProcessError as e:
202
+ raise RuntimeError(f"GraphQL query failed for PRs {pr_numbers}: {e.stderr}")
203
+ except (json.JSONDecodeError, KeyError) as e:
204
+ raise RuntimeError(
205
+ f"Failed to parse GraphQL response for PRs {pr_numbers}: {e}"
206
+ )
207
+
208
+ def _parse_graphql_response(
209
+ self, repo_data: Dict[str, Any], num_prs: int
210
+ ) -> List[Dict[str, Any]]:
211
+ """
212
+ Parse GraphQL response into PR dictionaries.
213
+
214
+ Args:
215
+ repo_data: Repository data from GraphQL response
216
+ num_prs: Number of PRs in the batch
217
+
218
+ Returns:
219
+ List of parsed PR dictionaries
220
+ """
221
+ detailed_prs = []
222
+
223
+ for i in range(num_prs):
224
+ pr_data = repo_data.get(f"pr{i}")
225
+ if not pr_data:
226
+ continue
227
+
228
+ # Extract commits
229
+ commits = [
230
+ node["commit"]["oid"]
231
+ for node in pr_data.get("commits", {}).get("nodes", [])
232
+ ]
233
+
234
+ # Extract files
235
+ files = [node["path"] for node in pr_data.get("files", {}).get("nodes", [])]
236
+
237
+ # Extract and flatten review thread comments
238
+ comments = self._parse_review_comments(pr_data)
239
+
240
+ detailed_pr = {
241
+ "number": pr_data["number"],
242
+ "title": pr_data["title"],
243
+ "url": pr_data["url"],
244
+ "state": pr_data["state"].lower(),
245
+ "merged": pr_data.get("mergedAt") is not None,
246
+ "merged_at": pr_data.get("mergedAt"),
247
+ "author": (pr_data.get("author") or {}).get("login", "unknown"),
248
+ "description": pr_data.get("bodyText", ""),
249
+ "commits": commits,
250
+ "files_changed": files,
251
+ "comments": comments,
252
+ }
253
+
254
+ detailed_prs.append(detailed_pr)
255
+
256
+ return detailed_prs
257
+
258
+ def _parse_review_comments(self, pr_data: Dict[str, Any]) -> List[Dict[str, Any]]:
259
+ """
260
+ Parse review thread comments from PR data.
261
+
262
+ Args:
263
+ pr_data: PR data from GraphQL
264
+
265
+ Returns:
266
+ List of comment dictionaries
267
+ """
268
+ comments = []
269
+
270
+ for thread in pr_data.get("reviewThreads", {}).get("nodes", []):
271
+ is_resolved = thread.get("isResolved", False)
272
+
273
+ for comment_node in thread.get("comments", {}).get("nodes", []):
274
+ # Map comment line to current file line (will be updated later)
275
+ mapped_line = comment_node.get("line")
276
+
277
+ comments.append(
278
+ {
279
+ "id": comment_node.get("id"),
280
+ "author": (comment_node.get("author") or {}).get(
281
+ "login", "unknown"
282
+ ),
283
+ "body": comment_node.get("body", ""),
284
+ "created_at": comment_node.get("createdAt"),
285
+ "path": comment_node.get("path"),
286
+ "line": mapped_line, # Current line (to be mapped)
287
+ "original_line": comment_node.get("originalLine"),
288
+ "diff_hunk": comment_node.get("diffHunk"),
289
+ "resolved": is_resolved,
290
+ "commit_sha": (comment_node.get("commit") or {}).get("oid"),
291
+ }
292
+ )
293
+
294
+ return comments
295
+
296
+ def fetch_pr_rest(self, pr_number: int) -> Dict[str, Any]:
297
+ """
298
+ Fallback method to fetch a single PR using REST API.
299
+
300
+ Args:
301
+ pr_number: PR number to fetch
302
+
303
+ Returns:
304
+ PR dictionary
305
+
306
+ Note:
307
+ This is slower than GraphQL and doesn't include comments.
308
+ Only used as a fallback if GraphQL fails.
309
+ """
310
+ commits = self._fetch_pr_commits_rest(pr_number)
311
+ files = self._fetch_pr_files_rest(pr_number)
312
+
313
+ try:
314
+ result = self.runner.run_gh_command(
315
+ [
316
+ "pr",
317
+ "view",
318
+ str(pr_number),
319
+ "--json",
320
+ "number,title,url,state,mergedAt,author,body",
321
+ ]
322
+ )
323
+ pr_data = json.loads(result.stdout)
324
+
325
+ return {
326
+ "number": pr_number,
327
+ "title": pr_data.get("title", ""),
328
+ "url": pr_data.get("url", ""),
329
+ "state": pr_data.get("state", "").lower(),
330
+ "merged": pr_data.get("mergedAt") is not None,
331
+ "merged_at": pr_data.get("mergedAt"),
332
+ "author": (pr_data.get("author") or {}).get("login", "unknown"),
333
+ "description": pr_data.get("body", ""),
334
+ "commits": commits,
335
+ "files_changed": files,
336
+ "comments": [], # REST fallback doesn't fetch comments
337
+ }
338
+ except (subprocess.CalledProcessError, json.JSONDecodeError):
339
+ # Return minimal PR info if everything fails
340
+ return {
341
+ "number": pr_number,
342
+ "title": f"PR #{pr_number}",
343
+ "url": f"https://github.com/{self.repo_owner}/{self.repo_name}/pull/{pr_number}",
344
+ "state": "unknown",
345
+ "merged": False,
346
+ "merged_at": None,
347
+ "author": "unknown",
348
+ "description": "",
349
+ "commits": commits,
350
+ "files_changed": files,
351
+ "comments": [],
352
+ }
353
+
354
+ def _fetch_pr_commits_rest(self, pr_number: int) -> List[str]:
355
+ """
356
+ Fetch commit SHAs for a PR using REST API.
357
+
358
+ Args:
359
+ pr_number: PR number
360
+
361
+ Returns:
362
+ List of commit SHAs
363
+ """
364
+ try:
365
+ result = self.runner.run_gh_command(
366
+ [
367
+ "pr",
368
+ "view",
369
+ str(pr_number),
370
+ "--json",
371
+ "commits",
372
+ "-q",
373
+ ".commits[].oid",
374
+ ]
375
+ )
376
+
377
+ commits = [
378
+ line.strip()
379
+ for line in result.stdout.strip().split("\n")
380
+ if line.strip()
381
+ ]
382
+ return commits
383
+
384
+ except subprocess.CalledProcessError:
385
+ return []
386
+
387
+ def _fetch_pr_files_rest(self, pr_number: int) -> List[str]:
388
+ """
389
+ Fetch changed files for a PR using REST API.
390
+
391
+ Args:
392
+ pr_number: PR number
393
+
394
+ Returns:
395
+ List of file paths
396
+ """
397
+ try:
398
+ result = self.runner.run_gh_command(
399
+ ["pr", "view", str(pr_number), "--json", "files", "-q", ".files[].path"]
400
+ )
401
+
402
+ files = [
403
+ line.strip()
404
+ for line in result.stdout.strip().split("\n")
405
+ if line.strip()
406
+ ]
407
+ return files
408
+
409
+ except subprocess.CalledProcessError:
410
+ return []
411
+
412
+ def get_total_pr_count(self) -> int:
413
+ """
414
+ Get approximate total number of PRs in the repository.
415
+
416
+ Returns:
417
+ Approximate PR count (based on highest PR number)
418
+ """
419
+ try:
420
+ result = self.runner.run_gh_command(
421
+ ["pr", "list", "--state", "all", "--json", "number", "--limit", "1"]
422
+ )
423
+
424
+ # The API returns PRs in descending order, so the first PR's number
425
+ # is approximately the total count
426
+ pr_list = json.loads(result.stdout)
427
+ if pr_list:
428
+ return pr_list[0]["number"]
429
+ return 0
430
+ except:
431
+ return 0