cicada-mcp 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cicada-mcp might be problematic. Click here for more details.
- cicada/__init__.py +30 -0
- cicada/clean.py +297 -0
- cicada/command_logger.py +293 -0
- cicada/dead_code_analyzer.py +282 -0
- cicada/extractors/__init__.py +36 -0
- cicada/extractors/base.py +66 -0
- cicada/extractors/call.py +176 -0
- cicada/extractors/dependency.py +361 -0
- cicada/extractors/doc.py +179 -0
- cicada/extractors/function.py +246 -0
- cicada/extractors/module.py +123 -0
- cicada/extractors/spec.py +151 -0
- cicada/find_dead_code.py +270 -0
- cicada/formatter.py +918 -0
- cicada/git_helper.py +646 -0
- cicada/indexer.py +629 -0
- cicada/install.py +724 -0
- cicada/keyword_extractor.py +364 -0
- cicada/keyword_search.py +553 -0
- cicada/lightweight_keyword_extractor.py +298 -0
- cicada/mcp_server.py +1559 -0
- cicada/mcp_tools.py +291 -0
- cicada/parser.py +124 -0
- cicada/pr_finder.py +435 -0
- cicada/pr_indexer/__init__.py +20 -0
- cicada/pr_indexer/cli.py +62 -0
- cicada/pr_indexer/github_api_client.py +431 -0
- cicada/pr_indexer/indexer.py +297 -0
- cicada/pr_indexer/line_mapper.py +209 -0
- cicada/pr_indexer/pr_index_builder.py +253 -0
- cicada/setup.py +339 -0
- cicada/utils/__init__.py +52 -0
- cicada/utils/call_site_formatter.py +95 -0
- cicada/utils/function_grouper.py +57 -0
- cicada/utils/hash_utils.py +173 -0
- cicada/utils/index_utils.py +290 -0
- cicada/utils/path_utils.py +240 -0
- cicada/utils/signature_builder.py +106 -0
- cicada/utils/storage.py +111 -0
- cicada/utils/subprocess_runner.py +182 -0
- cicada/utils/text_utils.py +90 -0
- cicada/version_check.py +116 -0
- cicada_mcp-0.1.4.dist-info/METADATA +619 -0
- cicada_mcp-0.1.4.dist-info/RECORD +48 -0
- cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
- cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
- cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
- cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Author: Cursor(Auto)
|
|
3
|
+
|
|
4
|
+
GitHub API Client for PR Indexer.
|
|
5
|
+
|
|
6
|
+
This module handles all interactions with the GitHub API (both REST and GraphQL),
|
|
7
|
+
separating API concerns from indexing logic.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import subprocess
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, List, Any, Tuple
|
|
14
|
+
|
|
15
|
+
from cicada.utils import SubprocessRunner
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GitHubAPIClient:
|
|
19
|
+
"""
|
|
20
|
+
Handles GitHub API interactions for PR indexing.
|
|
21
|
+
|
|
22
|
+
This class encapsulates all GitHub CLI and API calls, providing
|
|
23
|
+
a clean interface for fetching PR data.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, repo_path: Path, repo_owner: str, repo_name: str):
|
|
27
|
+
"""
|
|
28
|
+
Initialize the GitHub API client.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
repo_path: Path to the git repository
|
|
32
|
+
repo_owner: GitHub repository owner
|
|
33
|
+
repo_name: GitHub repository name
|
|
34
|
+
"""
|
|
35
|
+
self.repo_path = repo_path
|
|
36
|
+
self.repo_owner = repo_owner
|
|
37
|
+
self.repo_name = repo_name
|
|
38
|
+
self.runner = SubprocessRunner(cwd=repo_path)
|
|
39
|
+
|
|
40
|
+
def validate_gh_cli(self) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Validate that GitHub CLI is installed and available.
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
RuntimeError: If gh CLI is not available
|
|
46
|
+
"""
|
|
47
|
+
try:
|
|
48
|
+
self.runner.run_gh_command("--version")
|
|
49
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
|
50
|
+
raise RuntimeError(
|
|
51
|
+
"GitHub CLI (gh) is not installed or not available in PATH. "
|
|
52
|
+
"Install it from https://cli.github.com/"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def get_repo_info(self) -> Tuple[str, str]:
|
|
56
|
+
"""
|
|
57
|
+
Get repository owner and name from git remote.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
Tuple of (owner, repo_name)
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
RuntimeError: If not a GitHub repository
|
|
64
|
+
"""
|
|
65
|
+
try:
|
|
66
|
+
result = self.runner.run_gh_command(
|
|
67
|
+
["repo", "view", "--json", "nameWithOwner", "-q", ".nameWithOwner"]
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
name_with_owner = result.stdout.strip()
|
|
71
|
+
if not name_with_owner or name_with_owner == "null":
|
|
72
|
+
raise RuntimeError("Not a GitHub repository or no remote configured")
|
|
73
|
+
|
|
74
|
+
parts = name_with_owner.split("/")
|
|
75
|
+
if len(parts) != 2:
|
|
76
|
+
raise RuntimeError(
|
|
77
|
+
f"Invalid repository format. Expected owner/repo, got: {name_with_owner}"
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
owner, repo_name = parts
|
|
81
|
+
return owner, repo_name
|
|
82
|
+
|
|
83
|
+
except subprocess.CalledProcessError:
|
|
84
|
+
raise RuntimeError("Not a GitHub repository or no remote configured")
|
|
85
|
+
|
|
86
|
+
def fetch_pr_list(self, state: str = "all", limit: int = 10000) -> List[int]:
|
|
87
|
+
"""
|
|
88
|
+
Fetch list of PR numbers.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
state: PR state filter ('all', 'open', 'closed', 'merged')
|
|
92
|
+
limit: Maximum number of PRs to fetch
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
List of PR numbers
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
RuntimeError: If fetching fails
|
|
99
|
+
"""
|
|
100
|
+
try:
|
|
101
|
+
result = self.runner.run_gh_command(
|
|
102
|
+
[
|
|
103
|
+
"pr",
|
|
104
|
+
"list",
|
|
105
|
+
"--state",
|
|
106
|
+
state,
|
|
107
|
+
"--json",
|
|
108
|
+
"number",
|
|
109
|
+
"--limit",
|
|
110
|
+
str(limit),
|
|
111
|
+
]
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
pr_list = json.loads(result.stdout)
|
|
115
|
+
return [pr["number"] for pr in pr_list]
|
|
116
|
+
|
|
117
|
+
except subprocess.CalledProcessError as e:
|
|
118
|
+
raise RuntimeError(f"Failed to fetch PR list: {e.stderr}")
|
|
119
|
+
except json.JSONDecodeError as e:
|
|
120
|
+
raise RuntimeError(f"Failed to parse PR list: {e}")
|
|
121
|
+
|
|
122
|
+
def fetch_prs_batch_graphql(self, pr_numbers: List[int]) -> List[Dict[str, Any]]:
|
|
123
|
+
"""
|
|
124
|
+
Fetch detailed PR information for a batch using GraphQL.
|
|
125
|
+
|
|
126
|
+
This is much more efficient than making individual REST calls.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
pr_numbers: List of PR numbers to fetch (max 10 recommended)
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
List of detailed PR dictionaries
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
RuntimeError: If GraphQL query fails
|
|
136
|
+
"""
|
|
137
|
+
if not pr_numbers:
|
|
138
|
+
return []
|
|
139
|
+
|
|
140
|
+
# Build GraphQL query for batch fetching
|
|
141
|
+
pr_queries: list[str] = []
|
|
142
|
+
for i, num in enumerate(pr_numbers):
|
|
143
|
+
pr_queries.append(
|
|
144
|
+
f"""
|
|
145
|
+
pr{i}: pullRequest(number: {num}) {{
|
|
146
|
+
number
|
|
147
|
+
title
|
|
148
|
+
url
|
|
149
|
+
state
|
|
150
|
+
mergedAt
|
|
151
|
+
bodyText
|
|
152
|
+
author {{ login }}
|
|
153
|
+
commits(first: 250) {{
|
|
154
|
+
nodes {{ commit {{ oid }} }}
|
|
155
|
+
}}
|
|
156
|
+
files(first: 100) {{
|
|
157
|
+
nodes {{ path }}
|
|
158
|
+
}}
|
|
159
|
+
reviewThreads(first: 100) {{
|
|
160
|
+
nodes {{
|
|
161
|
+
isResolved
|
|
162
|
+
comments(first: 10) {{
|
|
163
|
+
nodes {{
|
|
164
|
+
id
|
|
165
|
+
body
|
|
166
|
+
createdAt
|
|
167
|
+
author {{ login }}
|
|
168
|
+
path
|
|
169
|
+
position
|
|
170
|
+
originalPosition
|
|
171
|
+
line
|
|
172
|
+
originalLine
|
|
173
|
+
diffHunk
|
|
174
|
+
commit {{ oid }}
|
|
175
|
+
}}
|
|
176
|
+
}}
|
|
177
|
+
}}
|
|
178
|
+
}}
|
|
179
|
+
}}
|
|
180
|
+
"""
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
query = f"""
|
|
184
|
+
query {{
|
|
185
|
+
repository(owner: "{self.repo_owner}", name: "{self.repo_name}") {{
|
|
186
|
+
{' '.join(pr_queries)}
|
|
187
|
+
}}
|
|
188
|
+
}}
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
result = self.runner.run_gh_command(
|
|
193
|
+
["api", "graphql", "-f", f"query={query}"]
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
data = json.loads(result.stdout)
|
|
197
|
+
repo_data = data.get("data", {}).get("repository", {})
|
|
198
|
+
|
|
199
|
+
return self._parse_graphql_response(repo_data, len(pr_numbers))
|
|
200
|
+
|
|
201
|
+
except subprocess.CalledProcessError as e:
|
|
202
|
+
raise RuntimeError(f"GraphQL query failed for PRs {pr_numbers}: {e.stderr}")
|
|
203
|
+
except (json.JSONDecodeError, KeyError) as e:
|
|
204
|
+
raise RuntimeError(
|
|
205
|
+
f"Failed to parse GraphQL response for PRs {pr_numbers}: {e}"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def _parse_graphql_response(
|
|
209
|
+
self, repo_data: Dict[str, Any], num_prs: int
|
|
210
|
+
) -> List[Dict[str, Any]]:
|
|
211
|
+
"""
|
|
212
|
+
Parse GraphQL response into PR dictionaries.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
repo_data: Repository data from GraphQL response
|
|
216
|
+
num_prs: Number of PRs in the batch
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
List of parsed PR dictionaries
|
|
220
|
+
"""
|
|
221
|
+
detailed_prs = []
|
|
222
|
+
|
|
223
|
+
for i in range(num_prs):
|
|
224
|
+
pr_data = repo_data.get(f"pr{i}")
|
|
225
|
+
if not pr_data:
|
|
226
|
+
continue
|
|
227
|
+
|
|
228
|
+
# Extract commits
|
|
229
|
+
commits = [
|
|
230
|
+
node["commit"]["oid"]
|
|
231
|
+
for node in pr_data.get("commits", {}).get("nodes", [])
|
|
232
|
+
]
|
|
233
|
+
|
|
234
|
+
# Extract files
|
|
235
|
+
files = [node["path"] for node in pr_data.get("files", {}).get("nodes", [])]
|
|
236
|
+
|
|
237
|
+
# Extract and flatten review thread comments
|
|
238
|
+
comments = self._parse_review_comments(pr_data)
|
|
239
|
+
|
|
240
|
+
detailed_pr = {
|
|
241
|
+
"number": pr_data["number"],
|
|
242
|
+
"title": pr_data["title"],
|
|
243
|
+
"url": pr_data["url"],
|
|
244
|
+
"state": pr_data["state"].lower(),
|
|
245
|
+
"merged": pr_data.get("mergedAt") is not None,
|
|
246
|
+
"merged_at": pr_data.get("mergedAt"),
|
|
247
|
+
"author": (pr_data.get("author") or {}).get("login", "unknown"),
|
|
248
|
+
"description": pr_data.get("bodyText", ""),
|
|
249
|
+
"commits": commits,
|
|
250
|
+
"files_changed": files,
|
|
251
|
+
"comments": comments,
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
detailed_prs.append(detailed_pr)
|
|
255
|
+
|
|
256
|
+
return detailed_prs
|
|
257
|
+
|
|
258
|
+
def _parse_review_comments(self, pr_data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
259
|
+
"""
|
|
260
|
+
Parse review thread comments from PR data.
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
pr_data: PR data from GraphQL
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
List of comment dictionaries
|
|
267
|
+
"""
|
|
268
|
+
comments = []
|
|
269
|
+
|
|
270
|
+
for thread in pr_data.get("reviewThreads", {}).get("nodes", []):
|
|
271
|
+
is_resolved = thread.get("isResolved", False)
|
|
272
|
+
|
|
273
|
+
for comment_node in thread.get("comments", {}).get("nodes", []):
|
|
274
|
+
# Map comment line to current file line (will be updated later)
|
|
275
|
+
mapped_line = comment_node.get("line")
|
|
276
|
+
|
|
277
|
+
comments.append(
|
|
278
|
+
{
|
|
279
|
+
"id": comment_node.get("id"),
|
|
280
|
+
"author": (comment_node.get("author") or {}).get(
|
|
281
|
+
"login", "unknown"
|
|
282
|
+
),
|
|
283
|
+
"body": comment_node.get("body", ""),
|
|
284
|
+
"created_at": comment_node.get("createdAt"),
|
|
285
|
+
"path": comment_node.get("path"),
|
|
286
|
+
"line": mapped_line, # Current line (to be mapped)
|
|
287
|
+
"original_line": comment_node.get("originalLine"),
|
|
288
|
+
"diff_hunk": comment_node.get("diffHunk"),
|
|
289
|
+
"resolved": is_resolved,
|
|
290
|
+
"commit_sha": (comment_node.get("commit") or {}).get("oid"),
|
|
291
|
+
}
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
return comments
|
|
295
|
+
|
|
296
|
+
def fetch_pr_rest(self, pr_number: int) -> Dict[str, Any]:
|
|
297
|
+
"""
|
|
298
|
+
Fallback method to fetch a single PR using REST API.
|
|
299
|
+
|
|
300
|
+
Args:
|
|
301
|
+
pr_number: PR number to fetch
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
PR dictionary
|
|
305
|
+
|
|
306
|
+
Note:
|
|
307
|
+
This is slower than GraphQL and doesn't include comments.
|
|
308
|
+
Only used as a fallback if GraphQL fails.
|
|
309
|
+
"""
|
|
310
|
+
commits = self._fetch_pr_commits_rest(pr_number)
|
|
311
|
+
files = self._fetch_pr_files_rest(pr_number)
|
|
312
|
+
|
|
313
|
+
try:
|
|
314
|
+
result = self.runner.run_gh_command(
|
|
315
|
+
[
|
|
316
|
+
"pr",
|
|
317
|
+
"view",
|
|
318
|
+
str(pr_number),
|
|
319
|
+
"--json",
|
|
320
|
+
"number,title,url,state,mergedAt,author,body",
|
|
321
|
+
]
|
|
322
|
+
)
|
|
323
|
+
pr_data = json.loads(result.stdout)
|
|
324
|
+
|
|
325
|
+
return {
|
|
326
|
+
"number": pr_number,
|
|
327
|
+
"title": pr_data.get("title", ""),
|
|
328
|
+
"url": pr_data.get("url", ""),
|
|
329
|
+
"state": pr_data.get("state", "").lower(),
|
|
330
|
+
"merged": pr_data.get("mergedAt") is not None,
|
|
331
|
+
"merged_at": pr_data.get("mergedAt"),
|
|
332
|
+
"author": (pr_data.get("author") or {}).get("login", "unknown"),
|
|
333
|
+
"description": pr_data.get("body", ""),
|
|
334
|
+
"commits": commits,
|
|
335
|
+
"files_changed": files,
|
|
336
|
+
"comments": [], # REST fallback doesn't fetch comments
|
|
337
|
+
}
|
|
338
|
+
except (subprocess.CalledProcessError, json.JSONDecodeError):
|
|
339
|
+
# Return minimal PR info if everything fails
|
|
340
|
+
return {
|
|
341
|
+
"number": pr_number,
|
|
342
|
+
"title": f"PR #{pr_number}",
|
|
343
|
+
"url": f"https://github.com/{self.repo_owner}/{self.repo_name}/pull/{pr_number}",
|
|
344
|
+
"state": "unknown",
|
|
345
|
+
"merged": False,
|
|
346
|
+
"merged_at": None,
|
|
347
|
+
"author": "unknown",
|
|
348
|
+
"description": "",
|
|
349
|
+
"commits": commits,
|
|
350
|
+
"files_changed": files,
|
|
351
|
+
"comments": [],
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
def _fetch_pr_commits_rest(self, pr_number: int) -> List[str]:
|
|
355
|
+
"""
|
|
356
|
+
Fetch commit SHAs for a PR using REST API.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
pr_number: PR number
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
List of commit SHAs
|
|
363
|
+
"""
|
|
364
|
+
try:
|
|
365
|
+
result = self.runner.run_gh_command(
|
|
366
|
+
[
|
|
367
|
+
"pr",
|
|
368
|
+
"view",
|
|
369
|
+
str(pr_number),
|
|
370
|
+
"--json",
|
|
371
|
+
"commits",
|
|
372
|
+
"-q",
|
|
373
|
+
".commits[].oid",
|
|
374
|
+
]
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
commits = [
|
|
378
|
+
line.strip()
|
|
379
|
+
for line in result.stdout.strip().split("\n")
|
|
380
|
+
if line.strip()
|
|
381
|
+
]
|
|
382
|
+
return commits
|
|
383
|
+
|
|
384
|
+
except subprocess.CalledProcessError:
|
|
385
|
+
return []
|
|
386
|
+
|
|
387
|
+
def _fetch_pr_files_rest(self, pr_number: int) -> List[str]:
|
|
388
|
+
"""
|
|
389
|
+
Fetch changed files for a PR using REST API.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
pr_number: PR number
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
List of file paths
|
|
396
|
+
"""
|
|
397
|
+
try:
|
|
398
|
+
result = self.runner.run_gh_command(
|
|
399
|
+
["pr", "view", str(pr_number), "--json", "files", "-q", ".files[].path"]
|
|
400
|
+
)
|
|
401
|
+
|
|
402
|
+
files = [
|
|
403
|
+
line.strip()
|
|
404
|
+
for line in result.stdout.strip().split("\n")
|
|
405
|
+
if line.strip()
|
|
406
|
+
]
|
|
407
|
+
return files
|
|
408
|
+
|
|
409
|
+
except subprocess.CalledProcessError:
|
|
410
|
+
return []
|
|
411
|
+
|
|
412
|
+
def get_total_pr_count(self) -> int:
|
|
413
|
+
"""
|
|
414
|
+
Get approximate total number of PRs in the repository.
|
|
415
|
+
|
|
416
|
+
Returns:
|
|
417
|
+
Approximate PR count (based on highest PR number)
|
|
418
|
+
"""
|
|
419
|
+
try:
|
|
420
|
+
result = self.runner.run_gh_command(
|
|
421
|
+
["pr", "list", "--state", "all", "--json", "number", "--limit", "1"]
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
# The API returns PRs in descending order, so the first PR's number
|
|
425
|
+
# is approximately the total count
|
|
426
|
+
pr_list = json.loads(result.stdout)
|
|
427
|
+
if pr_list:
|
|
428
|
+
return pr_list[0]["number"]
|
|
429
|
+
return 0
|
|
430
|
+
except:
|
|
431
|
+
return 0
|