cicada-mcp 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cicada/ascii_art.py +60 -0
- cicada/clean.py +195 -60
- cicada/cli.py +757 -0
- cicada/colors.py +27 -0
- cicada/command_logger.py +14 -16
- cicada/dead_code_analyzer.py +12 -19
- cicada/extractors/__init__.py +6 -6
- cicada/extractors/base.py +3 -3
- cicada/extractors/call.py +11 -15
- cicada/extractors/dependency.py +39 -51
- cicada/extractors/doc.py +8 -9
- cicada/extractors/function.py +12 -24
- cicada/extractors/module.py +11 -15
- cicada/extractors/spec.py +8 -12
- cicada/find_dead_code.py +15 -39
- cicada/formatter.py +37 -91
- cicada/git_helper.py +22 -34
- cicada/indexer.py +165 -132
- cicada/interactive_setup.py +490 -0
- cicada/keybert_extractor.py +286 -0
- cicada/keyword_search.py +22 -30
- cicada/keyword_test.py +127 -0
- cicada/lightweight_keyword_extractor.py +5 -13
- cicada/mcp_entry.py +683 -0
- cicada/mcp_server.py +110 -232
- cicada/parser.py +9 -9
- cicada/pr_finder.py +15 -19
- cicada/pr_indexer/__init__.py +3 -3
- cicada/pr_indexer/cli.py +4 -9
- cicada/pr_indexer/github_api_client.py +22 -37
- cicada/pr_indexer/indexer.py +17 -29
- cicada/pr_indexer/line_mapper.py +8 -12
- cicada/pr_indexer/pr_index_builder.py +22 -34
- cicada/setup.py +198 -89
- cicada/utils/__init__.py +9 -9
- cicada/utils/call_site_formatter.py +4 -6
- cicada/utils/function_grouper.py +4 -4
- cicada/utils/hash_utils.py +12 -15
- cicada/utils/index_utils.py +15 -15
- cicada/utils/path_utils.py +24 -29
- cicada/utils/signature_builder.py +3 -3
- cicada/utils/subprocess_runner.py +17 -19
- cicada/utils/text_utils.py +1 -2
- cicada/version_check.py +2 -5
- {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/METADATA +144 -55
- cicada_mcp-0.2.0.dist-info/RECORD +53 -0
- cicada_mcp-0.2.0.dist-info/entry_points.txt +4 -0
- cicada/install.py +0 -741
- cicada_mcp-0.1.5.dist-info/RECORD +0 -47
- cicada_mcp-0.1.5.dist-info/entry_points.txt +0 -9
- {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/WHEEL +0 -0
- {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/top_level.txt +0 -0
cicada/parser.py
CHANGED
|
@@ -7,22 +7,22 @@ Author: Cursor(Auto)
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import tree_sitter_elixir as ts_elixir
|
|
10
|
-
from tree_sitter import
|
|
10
|
+
from tree_sitter import Language, Parser
|
|
11
11
|
|
|
12
12
|
from .extractors import (
|
|
13
|
-
extract_modules,
|
|
14
|
-
extract_functions,
|
|
15
|
-
extract_specs,
|
|
16
|
-
match_specs_to_functions,
|
|
17
|
-
extract_docs,
|
|
18
|
-
match_docs_to_functions,
|
|
19
13
|
extract_aliases,
|
|
14
|
+
extract_behaviours,
|
|
15
|
+
extract_docs,
|
|
16
|
+
extract_function_calls,
|
|
17
|
+
extract_functions,
|
|
20
18
|
extract_imports,
|
|
19
|
+
extract_modules,
|
|
21
20
|
extract_requires,
|
|
21
|
+
extract_specs,
|
|
22
22
|
extract_uses,
|
|
23
|
-
extract_behaviours,
|
|
24
|
-
extract_function_calls,
|
|
25
23
|
extract_value_mentions,
|
|
24
|
+
match_docs_to_functions,
|
|
25
|
+
match_specs_to_functions,
|
|
26
26
|
)
|
|
27
27
|
|
|
28
28
|
|
cicada/pr_finder.py
CHANGED
|
@@ -8,7 +8,7 @@ import json
|
|
|
8
8
|
import subprocess
|
|
9
9
|
import sys
|
|
10
10
|
from pathlib import Path
|
|
11
|
-
from typing import
|
|
11
|
+
from typing import Any
|
|
12
12
|
|
|
13
13
|
from cicada.utils import load_index
|
|
14
14
|
|
|
@@ -48,7 +48,7 @@ class PRFinder:
|
|
|
48
48
|
elif not self.index:
|
|
49
49
|
# Always show warning (even in non-verbose mode) with color
|
|
50
50
|
print(
|
|
51
|
-
|
|
51
|
+
"\033[33m⚠️ No PR index found - using slower network lookups. Create index: cicada index-pr\033[0m",
|
|
52
52
|
file=sys.stderr,
|
|
53
53
|
)
|
|
54
54
|
|
|
@@ -72,9 +72,9 @@ class PRFinder:
|
|
|
72
72
|
raise RuntimeError(
|
|
73
73
|
"GitHub CLI (gh) is not installed or not available in PATH. "
|
|
74
74
|
"Install it from https://cli.github.com/"
|
|
75
|
-
)
|
|
75
|
+
) from None
|
|
76
76
|
|
|
77
|
-
def _load_index(self) ->
|
|
77
|
+
def _load_index(self) -> dict[str, Any] | None:
|
|
78
78
|
"""
|
|
79
79
|
Load the PR index from file.
|
|
80
80
|
|
|
@@ -90,7 +90,7 @@ class PRFinder:
|
|
|
90
90
|
|
|
91
91
|
return load_index(index_file, verbose=self.verbose, raise_on_error=False)
|
|
92
92
|
|
|
93
|
-
def _lookup_pr_in_index(self, commit_sha: str) ->
|
|
93
|
+
def _lookup_pr_in_index(self, commit_sha: str) -> dict[str, Any] | None:
|
|
94
94
|
"""
|
|
95
95
|
Look up PR information from the index.
|
|
96
96
|
|
|
@@ -114,9 +114,7 @@ class PRFinder:
|
|
|
114
114
|
|
|
115
115
|
return pr
|
|
116
116
|
|
|
117
|
-
def _run_git_blame(
|
|
118
|
-
self, file_path: str, line_number: int
|
|
119
|
-
) -> Optional[Dict[str, str | None]]:
|
|
117
|
+
def _run_git_blame(self, file_path: str, line_number: int) -> dict[str, str | None] | None:
|
|
120
118
|
"""
|
|
121
119
|
Run git blame to find the commit that introduced a specific line.
|
|
122
120
|
|
|
@@ -155,9 +153,7 @@ class PRFinder:
|
|
|
155
153
|
if line.startswith("author "):
|
|
156
154
|
author_name = line[7:] # Skip 'author '
|
|
157
155
|
elif line.startswith("author-mail "):
|
|
158
|
-
author_email = line[12:].strip(
|
|
159
|
-
"<>"
|
|
160
|
-
) # Skip 'author-mail ' and remove < >
|
|
156
|
+
author_email = line[12:].strip("<>") # Skip 'author-mail ' and remove < >
|
|
161
157
|
|
|
162
158
|
return {
|
|
163
159
|
"commit": commit_sha,
|
|
@@ -166,9 +162,9 @@ class PRFinder:
|
|
|
166
162
|
}
|
|
167
163
|
|
|
168
164
|
except subprocess.CalledProcessError as e:
|
|
169
|
-
raise RuntimeError(f"git blame failed: {e.stderr}")
|
|
165
|
+
raise RuntimeError(f"git blame failed: {e.stderr}") from e
|
|
170
166
|
|
|
171
|
-
def _get_repo_info(self) ->
|
|
167
|
+
def _get_repo_info(self) -> tuple[str, str] | None:
|
|
172
168
|
"""
|
|
173
169
|
Get the repository owner and name from git remote.
|
|
174
170
|
|
|
@@ -203,7 +199,7 @@ class PRFinder:
|
|
|
203
199
|
# Not a GitHub repository or no remote configured
|
|
204
200
|
return None
|
|
205
201
|
|
|
206
|
-
def _find_pr_for_commit(self, commit_sha: str) ->
|
|
202
|
+
def _find_pr_for_commit(self, commit_sha: str) -> dict[str, Any] | None:
|
|
207
203
|
"""
|
|
208
204
|
Find the PR that introduced a specific commit.
|
|
209
205
|
|
|
@@ -248,13 +244,13 @@ class PRFinder:
|
|
|
248
244
|
"merged_at": pr.get("merged_at"),
|
|
249
245
|
}
|
|
250
246
|
|
|
251
|
-
except subprocess.CalledProcessError
|
|
247
|
+
except subprocess.CalledProcessError:
|
|
252
248
|
# Commit might not be associated with a PR
|
|
253
249
|
return None
|
|
254
250
|
except (json.JSONDecodeError, KeyError) as e:
|
|
255
|
-
raise RuntimeError(f"Failed to parse PR information: {e}")
|
|
251
|
+
raise RuntimeError(f"Failed to parse PR information: {e}") from e
|
|
256
252
|
|
|
257
|
-
def find_pr_for_line(self, file_path: str, line_number: int) ->
|
|
253
|
+
def find_pr_for_line(self, file_path: str, line_number: int) -> dict[str, Any]:
|
|
258
254
|
"""
|
|
259
255
|
Find the PR that introduced a specific line of code.
|
|
260
256
|
|
|
@@ -314,7 +310,7 @@ class PRFinder:
|
|
|
314
310
|
"pr": pr_info,
|
|
315
311
|
}
|
|
316
312
|
|
|
317
|
-
def format_result(self, result:
|
|
313
|
+
def format_result(self, result: dict[str, Any], output_format: str = "text") -> str:
|
|
318
314
|
"""
|
|
319
315
|
Format the result for display.
|
|
320
316
|
|
|
@@ -352,7 +348,7 @@ class PRFinder:
|
|
|
352
348
|
if output_format == "markdown":
|
|
353
349
|
output = [
|
|
354
350
|
f"## Line {result['line_number']} in {result['file_path']}",
|
|
355
|
-
|
|
351
|
+
"",
|
|
356
352
|
f"**Commit:** `{short_commit}` ",
|
|
357
353
|
f"**Author:** {author_str}",
|
|
358
354
|
]
|
cicada/pr_indexer/__init__.py
CHANGED
|
@@ -5,11 +5,11 @@ This package contains classes for indexing GitHub pull requests,
|
|
|
5
5
|
separated by responsibility for better maintainability.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
from .cli import main
|
|
8
9
|
from .github_api_client import GitHubAPIClient
|
|
9
|
-
from .pr_index_builder import PRIndexBuilder
|
|
10
|
-
from .line_mapper import LineMapper
|
|
11
10
|
from .indexer import PRIndexer
|
|
12
|
-
from .
|
|
11
|
+
from .line_mapper import LineMapper
|
|
12
|
+
from .pr_index_builder import PRIndexBuilder
|
|
13
13
|
|
|
14
14
|
__all__ = [
|
|
15
15
|
"GitHubAPIClient",
|
cicada/pr_indexer/cli.py
CHANGED
|
@@ -8,6 +8,7 @@ from .indexer import PRIndexer
|
|
|
8
8
|
def main():
|
|
9
9
|
"""CLI entry point for pr_indexer."""
|
|
10
10
|
import argparse
|
|
11
|
+
|
|
11
12
|
from cicada.version_check import check_for_updates
|
|
12
13
|
|
|
13
14
|
# Check for updates (non-blocking, fails silently)
|
|
@@ -38,19 +39,13 @@ def main():
|
|
|
38
39
|
try:
|
|
39
40
|
indexer = PRIndexer(repo_path=args.repo)
|
|
40
41
|
# Incremental by default, unless --clean is specified
|
|
41
|
-
_ = indexer.index_repository(
|
|
42
|
-
output_path=args.output, incremental=not args.clean
|
|
43
|
-
)
|
|
42
|
+
_ = indexer.index_repository(output_path=args.output, incremental=not args.clean)
|
|
44
43
|
|
|
45
|
-
print(
|
|
46
|
-
"\n✅ Indexing complete! You can now use the MCP tools for PR history lookups."
|
|
47
|
-
)
|
|
44
|
+
print("\nIndexing complete! You can now use the MCP tools for PR history lookups.")
|
|
48
45
|
|
|
49
46
|
except KeyboardInterrupt:
|
|
50
47
|
print("\n\n⚠️ Indexing interrupted by user.")
|
|
51
|
-
print(
|
|
52
|
-
"Partial index may have been saved. Run again to continue (incremental by default)."
|
|
53
|
-
)
|
|
48
|
+
print("Partial index may have been saved. Run again to continue (incremental by default).")
|
|
54
49
|
sys.exit(130) # Standard exit code for SIGINT
|
|
55
50
|
|
|
56
51
|
except Exception as e:
|
|
@@ -10,7 +10,7 @@ separating API concerns from indexing logic.
|
|
|
10
10
|
import json
|
|
11
11
|
import subprocess
|
|
12
12
|
from pathlib import Path
|
|
13
|
-
from typing import
|
|
13
|
+
from typing import Any
|
|
14
14
|
|
|
15
15
|
from cicada.utils import SubprocessRunner
|
|
16
16
|
|
|
@@ -50,9 +50,9 @@ class GitHubAPIClient:
|
|
|
50
50
|
raise RuntimeError(
|
|
51
51
|
"GitHub CLI (gh) is not installed or not available in PATH. "
|
|
52
52
|
"Install it from https://cli.github.com/"
|
|
53
|
-
)
|
|
53
|
+
) from None
|
|
54
54
|
|
|
55
|
-
def get_repo_info(self) ->
|
|
55
|
+
def get_repo_info(self) -> tuple[str, str]:
|
|
56
56
|
"""
|
|
57
57
|
Get repository owner and name from git remote.
|
|
58
58
|
|
|
@@ -81,9 +81,9 @@ class GitHubAPIClient:
|
|
|
81
81
|
return owner, repo_name
|
|
82
82
|
|
|
83
83
|
except subprocess.CalledProcessError:
|
|
84
|
-
raise RuntimeError("Not a GitHub repository or no remote configured")
|
|
84
|
+
raise RuntimeError("Not a GitHub repository or no remote configured") from None
|
|
85
85
|
|
|
86
|
-
def fetch_pr_list(self, state: str = "all", limit: int = 10000) ->
|
|
86
|
+
def fetch_pr_list(self, state: str = "all", limit: int = 10000) -> list[int]:
|
|
87
87
|
"""
|
|
88
88
|
Fetch list of PR numbers.
|
|
89
89
|
|
|
@@ -115,11 +115,11 @@ class GitHubAPIClient:
|
|
|
115
115
|
return [pr["number"] for pr in pr_list]
|
|
116
116
|
|
|
117
117
|
except subprocess.CalledProcessError as e:
|
|
118
|
-
raise RuntimeError(f"Failed to fetch PR list: {e.stderr}")
|
|
118
|
+
raise RuntimeError(f"Failed to fetch PR list: {e.stderr}") from e
|
|
119
119
|
except json.JSONDecodeError as e:
|
|
120
|
-
raise RuntimeError(f"Failed to parse PR list: {e}")
|
|
120
|
+
raise RuntimeError(f"Failed to parse PR list: {e}") from e
|
|
121
121
|
|
|
122
|
-
def fetch_prs_batch_graphql(self, pr_numbers:
|
|
122
|
+
def fetch_prs_batch_graphql(self, pr_numbers: list[int]) -> list[dict[str, Any]]:
|
|
123
123
|
"""
|
|
124
124
|
Fetch detailed PR information for a batch using GraphQL.
|
|
125
125
|
|
|
@@ -189,9 +189,7 @@ class GitHubAPIClient:
|
|
|
189
189
|
"""
|
|
190
190
|
|
|
191
191
|
try:
|
|
192
|
-
result = self.runner.run_gh_command(
|
|
193
|
-
["api", "graphql", "-f", f"query={query}"]
|
|
194
|
-
)
|
|
192
|
+
result = self.runner.run_gh_command(["api", "graphql", "-f", f"query={query}"])
|
|
195
193
|
|
|
196
194
|
data = json.loads(result.stdout)
|
|
197
195
|
repo_data = data.get("data", {}).get("repository", {})
|
|
@@ -199,15 +197,13 @@ class GitHubAPIClient:
|
|
|
199
197
|
return self._parse_graphql_response(repo_data, len(pr_numbers))
|
|
200
198
|
|
|
201
199
|
except subprocess.CalledProcessError as e:
|
|
202
|
-
raise RuntimeError(f"GraphQL query failed for PRs {pr_numbers}: {e.stderr}")
|
|
200
|
+
raise RuntimeError(f"GraphQL query failed for PRs {pr_numbers}: {e.stderr}") from e
|
|
203
201
|
except (json.JSONDecodeError, KeyError) as e:
|
|
204
|
-
raise RuntimeError(
|
|
205
|
-
f"Failed to parse GraphQL response for PRs {pr_numbers}: {e}"
|
|
206
|
-
)
|
|
202
|
+
raise RuntimeError(f"Failed to parse GraphQL response for PRs {pr_numbers}: {e}") from e
|
|
207
203
|
|
|
208
204
|
def _parse_graphql_response(
|
|
209
|
-
self, repo_data:
|
|
210
|
-
) ->
|
|
205
|
+
self, repo_data: dict[str, Any], num_prs: int
|
|
206
|
+
) -> list[dict[str, Any]]:
|
|
211
207
|
"""
|
|
212
208
|
Parse GraphQL response into PR dictionaries.
|
|
213
209
|
|
|
@@ -227,8 +223,7 @@ class GitHubAPIClient:
|
|
|
227
223
|
|
|
228
224
|
# Extract commits
|
|
229
225
|
commits = [
|
|
230
|
-
node["commit"]["oid"]
|
|
231
|
-
for node in pr_data.get("commits", {}).get("nodes", [])
|
|
226
|
+
node["commit"]["oid"] for node in pr_data.get("commits", {}).get("nodes", [])
|
|
232
227
|
]
|
|
233
228
|
|
|
234
229
|
# Extract files
|
|
@@ -255,7 +250,7 @@ class GitHubAPIClient:
|
|
|
255
250
|
|
|
256
251
|
return detailed_prs
|
|
257
252
|
|
|
258
|
-
def _parse_review_comments(self, pr_data:
|
|
253
|
+
def _parse_review_comments(self, pr_data: dict[str, Any]) -> list[dict[str, Any]]:
|
|
259
254
|
"""
|
|
260
255
|
Parse review thread comments from PR data.
|
|
261
256
|
|
|
@@ -277,9 +272,7 @@ class GitHubAPIClient:
|
|
|
277
272
|
comments.append(
|
|
278
273
|
{
|
|
279
274
|
"id": comment_node.get("id"),
|
|
280
|
-
"author": (comment_node.get("author") or {}).get(
|
|
281
|
-
"login", "unknown"
|
|
282
|
-
),
|
|
275
|
+
"author": (comment_node.get("author") or {}).get("login", "unknown"),
|
|
283
276
|
"body": comment_node.get("body", ""),
|
|
284
277
|
"created_at": comment_node.get("createdAt"),
|
|
285
278
|
"path": comment_node.get("path"),
|
|
@@ -293,7 +286,7 @@ class GitHubAPIClient:
|
|
|
293
286
|
|
|
294
287
|
return comments
|
|
295
288
|
|
|
296
|
-
def fetch_pr_rest(self, pr_number: int) ->
|
|
289
|
+
def fetch_pr_rest(self, pr_number: int) -> dict[str, Any]:
|
|
297
290
|
"""
|
|
298
291
|
Fallback method to fetch a single PR using REST API.
|
|
299
292
|
|
|
@@ -351,7 +344,7 @@ class GitHubAPIClient:
|
|
|
351
344
|
"comments": [],
|
|
352
345
|
}
|
|
353
346
|
|
|
354
|
-
def _fetch_pr_commits_rest(self, pr_number: int) ->
|
|
347
|
+
def _fetch_pr_commits_rest(self, pr_number: int) -> list[str]:
|
|
355
348
|
"""
|
|
356
349
|
Fetch commit SHAs for a PR using REST API.
|
|
357
350
|
|
|
@@ -374,17 +367,13 @@ class GitHubAPIClient:
|
|
|
374
367
|
]
|
|
375
368
|
)
|
|
376
369
|
|
|
377
|
-
commits = [
|
|
378
|
-
line.strip()
|
|
379
|
-
for line in result.stdout.strip().split("\n")
|
|
380
|
-
if line.strip()
|
|
381
|
-
]
|
|
370
|
+
commits = [line.strip() for line in result.stdout.strip().split("\n") if line.strip()]
|
|
382
371
|
return commits
|
|
383
372
|
|
|
384
373
|
except subprocess.CalledProcessError:
|
|
385
374
|
return []
|
|
386
375
|
|
|
387
|
-
def _fetch_pr_files_rest(self, pr_number: int) ->
|
|
376
|
+
def _fetch_pr_files_rest(self, pr_number: int) -> list[str]:
|
|
388
377
|
"""
|
|
389
378
|
Fetch changed files for a PR using REST API.
|
|
390
379
|
|
|
@@ -399,11 +388,7 @@ class GitHubAPIClient:
|
|
|
399
388
|
["pr", "view", str(pr_number), "--json", "files", "-q", ".files[].path"]
|
|
400
389
|
)
|
|
401
390
|
|
|
402
|
-
files = [
|
|
403
|
-
line.strip()
|
|
404
|
-
for line in result.stdout.strip().split("\n")
|
|
405
|
-
if line.strip()
|
|
406
|
-
]
|
|
391
|
+
files = [line.strip() for line in result.stdout.strip().split("\n") if line.strip()]
|
|
407
392
|
return files
|
|
408
393
|
|
|
409
394
|
except subprocess.CalledProcessError:
|
|
@@ -427,5 +412,5 @@ class GitHubAPIClient:
|
|
|
427
412
|
if pr_list:
|
|
428
413
|
return pr_list[0]["number"]
|
|
429
414
|
return 0
|
|
430
|
-
except:
|
|
415
|
+
except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError, IndexError):
|
|
431
416
|
return 0
|
cicada/pr_indexer/indexer.py
CHANGED
|
@@ -5,11 +5,11 @@ Fetches all PRs from a GitHub repository and builds an index mapping commits to
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import
|
|
8
|
+
from typing import Any
|
|
9
9
|
|
|
10
10
|
from .github_api_client import GitHubAPIClient
|
|
11
|
-
from .pr_index_builder import PRIndexBuilder
|
|
12
11
|
from .line_mapper import LineMapper
|
|
12
|
+
from .pr_index_builder import PRIndexBuilder
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class PRIndexer:
|
|
@@ -36,9 +36,7 @@ class PRIndexer:
|
|
|
36
36
|
self.repo_owner, self.repo_name = temp_client.get_repo_info()
|
|
37
37
|
|
|
38
38
|
# Initialize components
|
|
39
|
-
self.api_client = GitHubAPIClient(
|
|
40
|
-
self.repo_path, self.repo_owner, self.repo_name
|
|
41
|
-
)
|
|
39
|
+
self.api_client = GitHubAPIClient(self.repo_path, self.repo_owner, self.repo_name)
|
|
42
40
|
self.index_builder = PRIndexBuilder(self.repo_owner, self.repo_name)
|
|
43
41
|
self.line_mapper = LineMapper(self.repo_path)
|
|
44
42
|
|
|
@@ -48,7 +46,7 @@ class PRIndexer:
|
|
|
48
46
|
if not git_dir.exists():
|
|
49
47
|
raise ValueError(f"Not a git repository: {self.repo_path}")
|
|
50
48
|
|
|
51
|
-
def fetch_all_prs(self, state: str = "all") ->
|
|
49
|
+
def fetch_all_prs(self, state: str = "all") -> list[dict[str, Any]]:
|
|
52
50
|
"""
|
|
53
51
|
Fetch all pull requests from GitHub using GraphQL for efficiency.
|
|
54
52
|
|
|
@@ -93,11 +91,9 @@ class PRIndexer:
|
|
|
93
91
|
return detailed_prs
|
|
94
92
|
|
|
95
93
|
except RuntimeError as e:
|
|
96
|
-
raise RuntimeError(f"Failed to fetch PRs: {e}")
|
|
94
|
+
raise RuntimeError(f"Failed to fetch PRs: {e}") from e
|
|
97
95
|
|
|
98
|
-
def incremental_update(
|
|
99
|
-
self, existing_index: Dict[str, Any]
|
|
100
|
-
) -> List[Dict[str, Any]]:
|
|
96
|
+
def incremental_update(self, existing_index: dict[str, Any]) -> list[dict[str, Any]]:
|
|
101
97
|
"""
|
|
102
98
|
Fetch PRs bidirectionally: newer (above max) and older (below min).
|
|
103
99
|
|
|
@@ -108,7 +104,7 @@ class PRIndexer:
|
|
|
108
104
|
List of new PRs
|
|
109
105
|
"""
|
|
110
106
|
# Get min and max PR numbers currently in the index
|
|
111
|
-
existing_pr_numbers = [int(num) for num in existing_index.get("prs", {})
|
|
107
|
+
existing_pr_numbers = [int(num) for num in existing_index.get("prs", {})]
|
|
112
108
|
|
|
113
109
|
if not existing_pr_numbers:
|
|
114
110
|
print("Empty index, performing full fetch...")
|
|
@@ -145,13 +141,11 @@ class PRIndexer:
|
|
|
145
141
|
print()
|
|
146
142
|
|
|
147
143
|
# Fetch detailed info for all PRs
|
|
148
|
-
detailed_prs = self._fetch_prs_in_batches(
|
|
149
|
-
newer_pr_numbers, older_pr_numbers, min_pr
|
|
150
|
-
)
|
|
144
|
+
detailed_prs = self._fetch_prs_in_batches(newer_pr_numbers, older_pr_numbers, min_pr)
|
|
151
145
|
|
|
152
146
|
return detailed_prs
|
|
153
147
|
|
|
154
|
-
def _fetch_newer_prs(self, max_pr: int) ->
|
|
148
|
+
def _fetch_newer_prs(self, max_pr: int) -> list[int]:
|
|
155
149
|
"""Fetch PR numbers newer than max_pr."""
|
|
156
150
|
pr_numbers = self.api_client.fetch_pr_list(state="all", limit=1000)
|
|
157
151
|
|
|
@@ -164,7 +158,7 @@ class PRIndexer:
|
|
|
164
158
|
|
|
165
159
|
return newer
|
|
166
160
|
|
|
167
|
-
def _fetch_older_prs(self, min_pr: int) ->
|
|
161
|
+
def _fetch_older_prs(self, min_pr: int) -> list[int]:
|
|
168
162
|
"""Fetch PR numbers older than min_pr."""
|
|
169
163
|
if min_pr <= 1:
|
|
170
164
|
return []
|
|
@@ -184,8 +178,8 @@ class PRIndexer:
|
|
|
184
178
|
return []
|
|
185
179
|
|
|
186
180
|
def _fetch_prs_in_batches(
|
|
187
|
-
self, newer_pr_numbers:
|
|
188
|
-
) ->
|
|
181
|
+
self, newer_pr_numbers: list[int], older_pr_numbers: list[int], min_pr: int
|
|
182
|
+
) -> list[dict[str, Any]]:
|
|
189
183
|
"""Fetch PRs in batches, showing progress."""
|
|
190
184
|
detailed_prs = []
|
|
191
185
|
batch_size = 10
|
|
@@ -197,9 +191,7 @@ class PRIndexer:
|
|
|
197
191
|
print(f"\n⬆️ Fetching {len(newer_pr_numbers)} newer PRs...")
|
|
198
192
|
for i in range(0, len(newer_pr_numbers), batch_size):
|
|
199
193
|
batch = newer_pr_numbers[i : i + batch_size]
|
|
200
|
-
print(
|
|
201
|
-
f" Batch {i//batch_size + 1}/{newer_batches} ({len(batch)} PRs)..."
|
|
202
|
-
)
|
|
194
|
+
print(f" Batch {i//batch_size + 1}/{newer_batches} ({len(batch)} PRs)...")
|
|
203
195
|
batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
|
|
204
196
|
detailed_prs.extend(batch_prs)
|
|
205
197
|
|
|
@@ -212,9 +204,7 @@ class PRIndexer:
|
|
|
212
204
|
)
|
|
213
205
|
for i in range(0, len(older_pr_numbers), batch_size):
|
|
214
206
|
batch = older_pr_numbers[i : i + batch_size]
|
|
215
|
-
print(
|
|
216
|
-
f" Batch {i//batch_size + 1}/{older_batches} ({len(batch)} PRs)..."
|
|
217
|
-
)
|
|
207
|
+
print(f" Batch {i//batch_size + 1}/{older_batches} ({len(batch)} PRs)...")
|
|
218
208
|
batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
|
|
219
209
|
detailed_prs.extend(batch_prs)
|
|
220
210
|
|
|
@@ -227,9 +217,7 @@ class PRIndexer:
|
|
|
227
217
|
|
|
228
218
|
return detailed_prs
|
|
229
219
|
|
|
230
|
-
def index_repository(
|
|
231
|
-
self, output_path: str = ".cicada/pr_index.json", incremental: bool = False
|
|
232
|
-
):
|
|
220
|
+
def index_repository(self, output_path: str, incremental: bool = False):
|
|
233
221
|
"""
|
|
234
222
|
Index the repository's PRs and save to file.
|
|
235
223
|
|
|
@@ -266,8 +254,8 @@ class PRIndexer:
|
|
|
266
254
|
return index
|
|
267
255
|
|
|
268
256
|
def _perform_full_index(
|
|
269
|
-
self, _output_path: str, existing_index:
|
|
270
|
-
) ->
|
|
257
|
+
self, _output_path: str, existing_index: dict[str, Any] | None
|
|
258
|
+
) -> dict[str, Any]:
|
|
271
259
|
"""Perform a full index build."""
|
|
272
260
|
total_prs_in_repo = self.api_client.get_total_pr_count()
|
|
273
261
|
print(f"Starting clean rebuild ({total_prs_in_repo} PRs in repository)...")
|
cicada/pr_indexer/line_mapper.py
CHANGED
|
@@ -7,7 +7,7 @@ allowing comments to track code changes over time.
|
|
|
7
7
|
|
|
8
8
|
import subprocess
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import Any
|
|
11
11
|
|
|
12
12
|
from cicada.utils import SubprocessRunner
|
|
13
13
|
|
|
@@ -31,7 +31,7 @@ class LineMapper:
|
|
|
31
31
|
self.repo_path = repo_path
|
|
32
32
|
self.runner = SubprocessRunner(cwd=repo_path)
|
|
33
33
|
|
|
34
|
-
def map_all_comment_lines(self, prs:
|
|
34
|
+
def map_all_comment_lines(self, prs: list[dict[str, Any]]) -> None:
|
|
35
35
|
"""
|
|
36
36
|
Map all comment lines in PRs to current line numbers.
|
|
37
37
|
|
|
@@ -83,7 +83,7 @@ class LineMapper:
|
|
|
83
83
|
|
|
84
84
|
def map_line_to_current(
|
|
85
85
|
self, file_path: str, original_line: int, commit_sha: str
|
|
86
|
-
) ->
|
|
86
|
+
) -> int | None:
|
|
87
87
|
"""
|
|
88
88
|
Map a line number from a PR commit to the current HEAD.
|
|
89
89
|
|
|
@@ -127,9 +127,7 @@ class LineMapper:
|
|
|
127
127
|
|
|
128
128
|
# Search for the same content in current file
|
|
129
129
|
# Look for exact match near the original line number
|
|
130
|
-
current_line = self._find_matching_line(
|
|
131
|
-
current_lines, original_content, original_line
|
|
132
|
-
)
|
|
130
|
+
current_line = self._find_matching_line(current_lines, original_content, original_line)
|
|
133
131
|
|
|
134
132
|
return current_line
|
|
135
133
|
|
|
@@ -152,7 +150,7 @@ class LineMapper:
|
|
|
152
150
|
except subprocess.CalledProcessError:
|
|
153
151
|
return False
|
|
154
152
|
|
|
155
|
-
def _get_file_lines(self, ref: str, file_path: str) ->
|
|
153
|
+
def _get_file_lines(self, ref: str, file_path: str) -> list[str] | None:
|
|
156
154
|
"""
|
|
157
155
|
Get file lines at a specific git ref.
|
|
158
156
|
|
|
@@ -164,9 +162,7 @@ class LineMapper:
|
|
|
164
162
|
List of file lines, or None if file doesn't exist at that ref
|
|
165
163
|
"""
|
|
166
164
|
try:
|
|
167
|
-
result = self.runner.run_git_command(
|
|
168
|
-
["show", f"{ref}:{file_path}"], check=False
|
|
169
|
-
)
|
|
165
|
+
result = self.runner.run_git_command(["show", f"{ref}:{file_path}"], check=False)
|
|
170
166
|
|
|
171
167
|
if result.returncode != 0:
|
|
172
168
|
return None
|
|
@@ -178,11 +174,11 @@ class LineMapper:
|
|
|
178
174
|
|
|
179
175
|
def _find_matching_line(
|
|
180
176
|
self,
|
|
181
|
-
current_lines:
|
|
177
|
+
current_lines: list[str],
|
|
182
178
|
original_content: str,
|
|
183
179
|
original_line: int,
|
|
184
180
|
search_range: int = 20,
|
|
185
|
-
) ->
|
|
181
|
+
) -> int | None:
|
|
186
182
|
"""
|
|
187
183
|
Find a matching line in the current file.
|
|
188
184
|
|