cicada-mcp 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cicada-mcp might be problematic. Click here for more details.

Files changed (48) hide show
  1. cicada/__init__.py +30 -0
  2. cicada/clean.py +297 -0
  3. cicada/command_logger.py +293 -0
  4. cicada/dead_code_analyzer.py +282 -0
  5. cicada/extractors/__init__.py +36 -0
  6. cicada/extractors/base.py +66 -0
  7. cicada/extractors/call.py +176 -0
  8. cicada/extractors/dependency.py +361 -0
  9. cicada/extractors/doc.py +179 -0
  10. cicada/extractors/function.py +246 -0
  11. cicada/extractors/module.py +123 -0
  12. cicada/extractors/spec.py +151 -0
  13. cicada/find_dead_code.py +270 -0
  14. cicada/formatter.py +918 -0
  15. cicada/git_helper.py +646 -0
  16. cicada/indexer.py +629 -0
  17. cicada/install.py +724 -0
  18. cicada/keyword_extractor.py +364 -0
  19. cicada/keyword_search.py +553 -0
  20. cicada/lightweight_keyword_extractor.py +298 -0
  21. cicada/mcp_server.py +1559 -0
  22. cicada/mcp_tools.py +291 -0
  23. cicada/parser.py +124 -0
  24. cicada/pr_finder.py +435 -0
  25. cicada/pr_indexer/__init__.py +20 -0
  26. cicada/pr_indexer/cli.py +62 -0
  27. cicada/pr_indexer/github_api_client.py +431 -0
  28. cicada/pr_indexer/indexer.py +297 -0
  29. cicada/pr_indexer/line_mapper.py +209 -0
  30. cicada/pr_indexer/pr_index_builder.py +253 -0
  31. cicada/setup.py +339 -0
  32. cicada/utils/__init__.py +52 -0
  33. cicada/utils/call_site_formatter.py +95 -0
  34. cicada/utils/function_grouper.py +57 -0
  35. cicada/utils/hash_utils.py +173 -0
  36. cicada/utils/index_utils.py +290 -0
  37. cicada/utils/path_utils.py +240 -0
  38. cicada/utils/signature_builder.py +106 -0
  39. cicada/utils/storage.py +111 -0
  40. cicada/utils/subprocess_runner.py +182 -0
  41. cicada/utils/text_utils.py +90 -0
  42. cicada/version_check.py +116 -0
  43. cicada_mcp-0.1.4.dist-info/METADATA +619 -0
  44. cicada_mcp-0.1.4.dist-info/RECORD +48 -0
  45. cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
  46. cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
  47. cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
  48. cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,297 @@
1
+ """
2
+ PR Indexer - Indexes pull requests and their commits for fast offline lookup.
3
+
4
+ Fetches all PRs from a GitHub repository and builds an index mapping commits to PRs.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Optional, Dict, List, Any
9
+
10
+ from .github_api_client import GitHubAPIClient
11
+ from .pr_index_builder import PRIndexBuilder
12
+ from .line_mapper import LineMapper
13
+
14
+
15
+ class PRIndexer:
16
+ """
17
+ Indexes GitHub pull requests for fast offline lookup.
18
+
19
+ This class orchestrates the indexing process, delegating to specialized
20
+ components for API interaction, index building, and line mapping.
21
+ """
22
+
23
+ def __init__(self, repo_path: str = "."):
24
+ """
25
+ Initialize the PR indexer.
26
+
27
+ Args:
28
+ repo_path: Path to the git repository (defaults to current directory)
29
+ """
30
+ self.repo_path = Path(repo_path).resolve()
31
+ self._validate_git_repo()
32
+
33
+ # Initialize API client and get repo info
34
+ temp_client = GitHubAPIClient(self.repo_path, "", "")
35
+ temp_client.validate_gh_cli()
36
+ self.repo_owner, self.repo_name = temp_client.get_repo_info()
37
+
38
+ # Initialize components
39
+ self.api_client = GitHubAPIClient(
40
+ self.repo_path, self.repo_owner, self.repo_name
41
+ )
42
+ self.index_builder = PRIndexBuilder(self.repo_owner, self.repo_name)
43
+ self.line_mapper = LineMapper(self.repo_path)
44
+
45
+ def _validate_git_repo(self):
46
+ """Validate that the path is a git repository."""
47
+ git_dir = self.repo_path / ".git"
48
+ if not git_dir.exists():
49
+ raise ValueError(f"Not a git repository: {self.repo_path}")
50
+
51
+ def fetch_all_prs(self, state: str = "all") -> List[Dict[str, Any]]:
52
+ """
53
+ Fetch all pull requests from GitHub using GraphQL for efficiency.
54
+
55
+ Args:
56
+ state: PR state filter ('all', 'open', 'closed', 'merged')
57
+
58
+ Returns:
59
+ List of PR dictionaries with full details
60
+ """
61
+ print(f"Fetching PRs from {self.repo_owner}/{self.repo_name}...")
62
+
63
+ try:
64
+ # Get list of PR numbers
65
+ # Use a very high limit to ensure we don't miss PRs in large repos
66
+ pr_numbers = self.api_client.fetch_pr_list(state=state, limit=100000)
67
+ print(f"Found {len(pr_numbers)} pull requests")
68
+
69
+ # Fetch detailed PR info in batches
70
+ detailed_prs = []
71
+ batch_size = 10
72
+ total_batches = (len(pr_numbers) + batch_size - 1) // batch_size
73
+
74
+ try:
75
+ for i in range(0, len(pr_numbers), batch_size):
76
+ batch = pr_numbers[i : i + batch_size]
77
+ print(
78
+ f" Fetching batch {i//batch_size + 1}/{total_batches} "
79
+ f"({len(batch)} PRs)..."
80
+ )
81
+
82
+ batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
83
+ detailed_prs.extend(batch_prs)
84
+
85
+ except KeyboardInterrupt:
86
+ print(
87
+ f"\n\n⚠️ Interrupted by user. Fetched {len(detailed_prs)}/"
88
+ f"{len(pr_numbers)} PRs."
89
+ )
90
+ print("Saving partial index...")
91
+ return detailed_prs
92
+
93
+ return detailed_prs
94
+
95
+ except RuntimeError as e:
96
+ raise RuntimeError(f"Failed to fetch PRs: {e}")
97
+
98
+ def incremental_update(
99
+ self, existing_index: Dict[str, Any]
100
+ ) -> List[Dict[str, Any]]:
101
+ """
102
+ Fetch PRs bidirectionally: newer (above max) and older (below min).
103
+
104
+ Args:
105
+ existing_index: The existing index dictionary
106
+
107
+ Returns:
108
+ List of new PRs
109
+ """
110
+ # Get min and max PR numbers currently in the index
111
+ existing_pr_numbers = [int(num) for num in existing_index.get("prs", {}).keys()]
112
+
113
+ if not existing_pr_numbers:
114
+ print("Empty index, performing full fetch...")
115
+ return self.fetch_all_prs()
116
+
117
+ min_pr = min(existing_pr_numbers)
118
+ max_pr = max(existing_pr_numbers)
119
+ total_prs_in_repo = self.api_client.get_total_pr_count()
120
+
121
+ print(
122
+ f"Performing incremental update (index range: #{min_pr}-#{max_pr}, "
123
+ f"repo has {total_prs_in_repo} PRs)..."
124
+ )
125
+
126
+ # Fetch newer PRs (> max_pr)
127
+ newer_pr_numbers = self._fetch_newer_prs(max_pr)
128
+
129
+ # Fetch older PRs (< min_pr)
130
+ older_pr_numbers = self._fetch_older_prs(min_pr)
131
+
132
+ all_to_fetch = newer_pr_numbers + older_pr_numbers
133
+
134
+ if not all_to_fetch:
135
+ print("Index is complete (no newer or older PRs to fetch)")
136
+ return []
137
+
138
+ print(f"Found {len(newer_pr_numbers)} newer PRs", end="")
139
+ if older_pr_numbers:
140
+ print(
141
+ f" and {len(older_pr_numbers)} older PRs to fetch "
142
+ f"(going downward from #{min_pr})"
143
+ )
144
+ else:
145
+ print()
146
+
147
+ # Fetch detailed info for all PRs
148
+ detailed_prs = self._fetch_prs_in_batches(
149
+ newer_pr_numbers, older_pr_numbers, min_pr
150
+ )
151
+
152
+ return detailed_prs
153
+
154
+ def _fetch_newer_prs(self, max_pr: int) -> List[int]:
155
+ """Fetch PR numbers newer than max_pr."""
156
+ pr_numbers = self.api_client.fetch_pr_list(state="all", limit=1000)
157
+
158
+ # PRs are returned newest-first, so stop when we hit max_pr
159
+ newer = []
160
+ for pr_num in pr_numbers:
161
+ if pr_num <= max_pr:
162
+ break
163
+ newer.append(pr_num)
164
+
165
+ return newer
166
+
167
+ def _fetch_older_prs(self, min_pr: int) -> List[int]:
168
+ """Fetch PR numbers older than min_pr."""
169
+ if min_pr <= 1:
170
+ return []
171
+
172
+ try:
173
+ # Fetch all PRs and filter for ones < min_pr
174
+ # Use a very high limit to ensure we don't miss older PRs in large repos
175
+ all_prs = self.api_client.fetch_pr_list(state="all", limit=100000)
176
+ older = sorted(
177
+ [pr_num for pr_num in all_prs if pr_num < min_pr],
178
+ reverse=True, # Descending order
179
+ )
180
+ return older
181
+
182
+ except RuntimeError as e:
183
+ print(f"Warning: Could not fetch older PRs: {e}")
184
+ return []
185
+
186
+ def _fetch_prs_in_batches(
187
+ self, newer_pr_numbers: List[int], older_pr_numbers: List[int], min_pr: int
188
+ ) -> List[Dict[str, Any]]:
189
+ """Fetch PRs in batches, showing progress."""
190
+ detailed_prs = []
191
+ batch_size = 10
192
+
193
+ try:
194
+ # Fetch newer PRs first
195
+ if newer_pr_numbers:
196
+ newer_batches = (len(newer_pr_numbers) + batch_size - 1) // batch_size
197
+ print(f"\n⬆️ Fetching {len(newer_pr_numbers)} newer PRs...")
198
+ for i in range(0, len(newer_pr_numbers), batch_size):
199
+ batch = newer_pr_numbers[i : i + batch_size]
200
+ print(
201
+ f" Batch {i//batch_size + 1}/{newer_batches} ({len(batch)} PRs)..."
202
+ )
203
+ batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
204
+ detailed_prs.extend(batch_prs)
205
+
206
+ # Then fetch older PRs
207
+ if older_pr_numbers:
208
+ older_batches = (len(older_pr_numbers) + batch_size - 1) // batch_size
209
+ print(
210
+ f"\n⬇️ Fetching {len(older_pr_numbers)} older PRs "
211
+ f"(going downward from #{min_pr})..."
212
+ )
213
+ for i in range(0, len(older_pr_numbers), batch_size):
214
+ batch = older_pr_numbers[i : i + batch_size]
215
+ print(
216
+ f" Batch {i//batch_size + 1}/{older_batches} ({len(batch)} PRs)..."
217
+ )
218
+ batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
219
+ detailed_prs.extend(batch_prs)
220
+
221
+ except KeyboardInterrupt:
222
+ print(
223
+ f"\n\n⚠️ Interrupted by user. Fetched {len(detailed_prs)}/"
224
+ f"{len(newer_pr_numbers) + len(older_pr_numbers)} PRs."
225
+ )
226
+ print("Saving partial index...")
227
+
228
+ return detailed_prs
229
+
230
+ def index_repository(
231
+ self, output_path: str = ".cicada/pr_index.json", incremental: bool = False
232
+ ):
233
+ """
234
+ Index the repository's PRs and save to file.
235
+
236
+ Args:
237
+ output_path: Path where the index will be saved
238
+ incremental: If True, only fetch new PRs since last index
239
+ """
240
+ # Load existing index to preserve last_pr_number if clean build is interrupted
241
+ existing_index = self.index_builder.load_existing_index(output_path)
242
+
243
+ if incremental:
244
+ if existing_index:
245
+ # Fetch only new PRs
246
+ new_prs = self.incremental_update(existing_index)
247
+
248
+ if not new_prs:
249
+ print("No new PRs found. Index is up to date.")
250
+ return existing_index
251
+
252
+ # Map comment lines
253
+ self.line_mapper.map_all_comment_lines(new_prs)
254
+
255
+ # Merge new PRs into existing index
256
+ index = self.index_builder.merge_indexes(existing_index, new_prs)
257
+ else:
258
+ print("No existing index found. Performing full index...")
259
+ index = self._perform_full_index(output_path, existing_index)
260
+ else:
261
+ # Full index (--clean)
262
+ index = self._perform_full_index(output_path, existing_index)
263
+
264
+ # Save index
265
+ self.index_builder.save_index(index, output_path)
266
+ return index
267
+
268
+ def _perform_full_index(
269
+ self, _output_path: str, existing_index: Optional[Dict[str, Any]]
270
+ ) -> Dict[str, Any]:
271
+ """Perform a full index build."""
272
+ total_prs_in_repo = self.api_client.get_total_pr_count()
273
+ print(f"Starting clean rebuild ({total_prs_in_repo} PRs in repository)...")
274
+
275
+ prs = self.fetch_all_prs()
276
+
277
+ # Map comment lines
278
+ self.line_mapper.map_all_comment_lines(prs)
279
+
280
+ # Check if this is a partial/interrupted fetch
281
+ is_partial = len(prs) < total_prs_in_repo
282
+
283
+ if is_partial:
284
+ print(f"⚠️ Partial fetch: got {len(prs)}/{total_prs_in_repo} PRs.")
285
+ print(" Setting last_pr_number=0 to allow incremental resume...")
286
+
287
+ if existing_index:
288
+ # Merge with existing index to preserve PR data
289
+ print(" Merging with existing index to preserve PR data...")
290
+ new_index = self.index_builder.build_index(prs, preserve_last_pr=0)
291
+ return self.index_builder.merge_partial_clean(existing_index, new_index)
292
+ else:
293
+ # No existing index - build new one with last_pr_number=0
294
+ return self.index_builder.build_index(prs, preserve_last_pr=0)
295
+ else:
296
+ # Complete fetch
297
+ return self.index_builder.build_index(prs)
@@ -0,0 +1,209 @@
1
+ """
2
+ Line Mapper for PR Comments.
3
+
4
+ This module handles mapping comment line numbers from PR commits to current HEAD,
5
+ allowing comments to track code changes over time.
6
+ """
7
+
8
+ import subprocess
9
+ from pathlib import Path
10
+ from typing import Dict, List, Any, Optional
11
+
12
+ from cicada.utils import SubprocessRunner
13
+
14
+
15
+ class LineMapper:
16
+ """
17
+ Maps comment line numbers from PR commits to current file state.
18
+
19
+ When a PR comment is made on a specific line, that line might move
20
+ or be deleted as the codebase evolves. This class attempts to track
21
+ those changes.
22
+ """
23
+
24
+ def __init__(self, repo_path: Path):
25
+ """
26
+ Initialize the line mapper.
27
+
28
+ Args:
29
+ repo_path: Path to the git repository
30
+ """
31
+ self.repo_path = repo_path
32
+ self.runner = SubprocessRunner(cwd=repo_path)
33
+
34
+ def map_all_comment_lines(self, prs: List[Dict[str, Any]]) -> None:
35
+ """
36
+ Map all comment lines in PRs to current line numbers.
37
+
38
+ This updates the PRs in-place, setting the 'line' field for each comment.
39
+
40
+ Args:
41
+ prs: List of PR dictionaries to update in-place
42
+ """
43
+ print("Mapping comment lines to current file state...")
44
+
45
+ total_comments = sum(len(pr.get("comments", [])) for pr in prs)
46
+ if total_comments == 0:
47
+ return
48
+
49
+ mapped_count = 0
50
+ unmapped_count = 0
51
+
52
+ try:
53
+ for pr in prs:
54
+ for comment in pr.get("comments", []):
55
+ original_line = comment.get("original_line")
56
+ commit_sha = comment.get("commit_sha")
57
+ file_path = comment.get("path")
58
+
59
+ if original_line and commit_sha and file_path:
60
+ current_line = self.map_line_to_current(
61
+ file_path, original_line, commit_sha
62
+ )
63
+ comment["line"] = current_line
64
+
65
+ if current_line is not None:
66
+ mapped_count += 1
67
+ else:
68
+ unmapped_count += 1
69
+ else:
70
+ comment["line"] = None
71
+ unmapped_count += 1
72
+
73
+ except KeyboardInterrupt:
74
+ print(
75
+ f"\n\n⚠️ Line mapping interrupted. "
76
+ f"Mapped {mapped_count}/{total_comments} comments."
77
+ )
78
+ print("Saving index with partial line mappings...")
79
+ # Re-raise to let the outer handler save the index
80
+ raise
81
+
82
+ print(f" Mapped {mapped_count} comments, {unmapped_count} unmappable/outdated")
83
+
84
+ def map_line_to_current(
85
+ self, file_path: str, original_line: int, commit_sha: str
86
+ ) -> Optional[int]:
87
+ """
88
+ Map a line number from a PR commit to the current HEAD.
89
+
90
+ This is a best-effort attempt that:
91
+ 1. Checks if the file still exists
92
+ 2. Tries to find the same line content in the current file
93
+ 3. Searches within a reasonable range if exact position doesn't match
94
+
95
+ Args:
96
+ file_path: Path to the file
97
+ original_line: Line number in the PR commit
98
+ commit_sha: The commit SHA where the comment was made
99
+
100
+ Returns:
101
+ Current line number, or None if line no longer exists or can't be tracked
102
+ """
103
+ if not original_line or not commit_sha or not file_path:
104
+ return None
105
+
106
+ try:
107
+ # Check if the file still exists
108
+ if not self._file_exists(file_path):
109
+ return None
110
+
111
+ # Get current file lines
112
+ current_lines = self._get_file_lines("HEAD", file_path)
113
+ if not current_lines:
114
+ return None
115
+
116
+ # Get original file lines
117
+ original_lines = self._get_file_lines(commit_sha, file_path)
118
+ if not original_lines or original_line > len(original_lines):
119
+ return None
120
+
121
+ # Get the actual line content from the original commit
122
+ original_content = original_lines[original_line - 1].strip()
123
+
124
+ if not original_content:
125
+ # Empty line, can't track reliably
126
+ return None
127
+
128
+ # Search for the same content in current file
129
+ # Look for exact match near the original line number
130
+ current_line = self._find_matching_line(
131
+ current_lines, original_content, original_line
132
+ )
133
+
134
+ return current_line
135
+
136
+ except (subprocess.CalledProcessError, IndexError, ValueError):
137
+ return None
138
+
139
+ def _file_exists(self, file_path: str) -> bool:
140
+ """
141
+ Check if a file exists in the current HEAD.
142
+
143
+ Args:
144
+ file_path: Path to the file
145
+
146
+ Returns:
147
+ True if file exists
148
+ """
149
+ try:
150
+ result = self.runner.run_git_command(["ls-files", file_path], check=False)
151
+ return bool(result.stdout.strip())
152
+ except subprocess.CalledProcessError:
153
+ return False
154
+
155
+ def _get_file_lines(self, ref: str, file_path: str) -> Optional[List[str]]:
156
+ """
157
+ Get file lines at a specific git ref.
158
+
159
+ Args:
160
+ ref: Git reference (commit SHA, branch name, HEAD, etc.)
161
+ file_path: Path to the file
162
+
163
+ Returns:
164
+ List of file lines, or None if file doesn't exist at that ref
165
+ """
166
+ try:
167
+ result = self.runner.run_git_command(
168
+ ["show", f"{ref}:{file_path}"], check=False
169
+ )
170
+
171
+ if result.returncode != 0:
172
+ return None
173
+
174
+ return result.stdout.split("\n")
175
+
176
+ except subprocess.CalledProcessError:
177
+ return None
178
+
179
+ def _find_matching_line(
180
+ self,
181
+ current_lines: List[str],
182
+ original_content: str,
183
+ original_line: int,
184
+ search_range: int = 20,
185
+ ) -> Optional[int]:
186
+ """
187
+ Find a matching line in the current file.
188
+
189
+ Searches for an exact content match near the original line number.
190
+
191
+ Args:
192
+ current_lines: Lines from the current file
193
+ original_content: Original line content (stripped)
194
+ original_line: Original line number
195
+ search_range: How many lines to search above/below (default: 20)
196
+
197
+ Returns:
198
+ Matching line number (1-indexed), or None if not found
199
+ """
200
+ # Search within a range near the original line
201
+ start = max(1, original_line - search_range)
202
+ end = min(len(current_lines), original_line + search_range)
203
+
204
+ for i in range(start - 1, end):
205
+ if current_lines[i].strip() == original_content:
206
+ return i + 1
207
+
208
+ # If not found nearby, return None (line likely deleted/changed)
209
+ return None