cicada-mcp 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cicada-mcp might be problematic. Click here for more details.

Files changed (48) hide show
  1. cicada/__init__.py +30 -0
  2. cicada/clean.py +297 -0
  3. cicada/command_logger.py +293 -0
  4. cicada/dead_code_analyzer.py +282 -0
  5. cicada/extractors/__init__.py +36 -0
  6. cicada/extractors/base.py +66 -0
  7. cicada/extractors/call.py +176 -0
  8. cicada/extractors/dependency.py +361 -0
  9. cicada/extractors/doc.py +179 -0
  10. cicada/extractors/function.py +246 -0
  11. cicada/extractors/module.py +123 -0
  12. cicada/extractors/spec.py +151 -0
  13. cicada/find_dead_code.py +270 -0
  14. cicada/formatter.py +918 -0
  15. cicada/git_helper.py +646 -0
  16. cicada/indexer.py +629 -0
  17. cicada/install.py +724 -0
  18. cicada/keyword_extractor.py +364 -0
  19. cicada/keyword_search.py +553 -0
  20. cicada/lightweight_keyword_extractor.py +298 -0
  21. cicada/mcp_server.py +1559 -0
  22. cicada/mcp_tools.py +291 -0
  23. cicada/parser.py +124 -0
  24. cicada/pr_finder.py +435 -0
  25. cicada/pr_indexer/__init__.py +20 -0
  26. cicada/pr_indexer/cli.py +62 -0
  27. cicada/pr_indexer/github_api_client.py +431 -0
  28. cicada/pr_indexer/indexer.py +297 -0
  29. cicada/pr_indexer/line_mapper.py +209 -0
  30. cicada/pr_indexer/pr_index_builder.py +253 -0
  31. cicada/setup.py +339 -0
  32. cicada/utils/__init__.py +52 -0
  33. cicada/utils/call_site_formatter.py +95 -0
  34. cicada/utils/function_grouper.py +57 -0
  35. cicada/utils/hash_utils.py +173 -0
  36. cicada/utils/index_utils.py +290 -0
  37. cicada/utils/path_utils.py +240 -0
  38. cicada/utils/signature_builder.py +106 -0
  39. cicada/utils/storage.py +111 -0
  40. cicada/utils/subprocess_runner.py +182 -0
  41. cicada/utils/text_utils.py +90 -0
  42. cicada/version_check.py +116 -0
  43. cicada_mcp-0.1.4.dist-info/METADATA +619 -0
  44. cicada_mcp-0.1.4.dist-info/RECORD +48 -0
  45. cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
  46. cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
  47. cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
  48. cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,95 @@
1
+ """
2
+ Call site formatting utilities.
3
+
4
+ This module provides utilities for grouping and formatting call sites,
5
+ eliminating duplication in the formatter module.
6
+ """
7
+
8
+ from typing import Dict, List, Any, Tuple
9
+
10
+
11
+ class CallSiteFormatter:
12
+ """
13
+ Formats and groups call sites for display.
14
+
15
+ This class consolidates the call site grouping and formatting logic
16
+ that appears multiple times in the formatter module.
17
+ """
18
+
19
+ @staticmethod
20
+ def group_by_caller(call_sites: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
21
+ """
22
+ Group call sites by their caller (calling_module + calling_function).
23
+
24
+ When the same function is called multiple times from the same caller,
25
+ this consolidates those calls into a single entry with multiple line numbers.
26
+
27
+ Args:
28
+ call_sites: List of call site dictionaries with keys:
29
+ - calling_module: Module making the call
30
+ - calling_function: Function making the call (dict with name, arity)
31
+ - file: File path
32
+ - line: Line number
33
+ - code_line: Optional code snippet
34
+
35
+ Returns:
36
+ List of grouped call site dictionaries with keys:
37
+ - calling_module: Module making the call
38
+ - calling_function: Function making the call
39
+ - file: File path
40
+ - lines: List of line numbers (sorted)
41
+ - code_lines: List of {line, code} dicts (if present)
42
+
43
+ Example:
44
+ call_sites = [
45
+ {'calling_module': 'MyApp.User', 'calling_function': {'name': 'create', 'arity': 2},
46
+ 'file': 'lib/user.ex', 'line': 10},
47
+ {'calling_module': 'MyApp.User', 'calling_function': {'name': 'create', 'arity': 2},
48
+ 'file': 'lib/user.ex', 'line': 20},
49
+ ]
50
+ grouped = CallSiteFormatter.group_by_caller(call_sites)
51
+ # Returns:
52
+ # [{
53
+ # 'calling_module': 'MyApp.User',
54
+ # 'calling_function': {'name': 'create', 'arity': 2},
55
+ # 'file': 'lib/user.ex',
56
+ # 'lines': [10, 20]
57
+ # }]
58
+ """
59
+ grouped: Dict[Tuple, Dict[str, Any]] = {}
60
+
61
+ for site in call_sites:
62
+ # Create a key based on caller identity
63
+ calling_func = site.get("calling_function")
64
+ if calling_func:
65
+ key = (
66
+ site["calling_module"],
67
+ calling_func["name"],
68
+ calling_func["arity"],
69
+ )
70
+ else:
71
+ key = (site["calling_module"], None, None)
72
+
73
+ if key not in grouped:
74
+ grouped[key] = {
75
+ "calling_module": site["calling_module"],
76
+ "calling_function": calling_func,
77
+ "file": site["file"],
78
+ "lines": [],
79
+ "code_lines": [],
80
+ }
81
+
82
+ grouped[key]["lines"].append(site["line"])
83
+ if "code_line" in site:
84
+ grouped[key]["code_lines"].append(
85
+ {"line": site["line"], "code": site["code_line"]}
86
+ )
87
+
88
+ # Convert back to list and sort lines
89
+ result = []
90
+ for data in grouped.values():
91
+ data["lines"].sort()
92
+ data["code_lines"].sort(key=lambda x: x["line"])
93
+ result.append(data)
94
+
95
+ return result
@@ -0,0 +1,57 @@
1
+ """
2
+ Function grouping utilities.
3
+
4
+ This module provides utilities for grouping functions by name and arity,
5
+ eliminating duplication across formatter and other modules.
6
+ """
7
+
8
+ from typing import Dict, List, Any, Tuple
9
+
10
+
11
+ class FunctionGrouper:
12
+ """
13
+ Groups functions by their name and arity.
14
+
15
+ This eliminates duplication of the grouping logic that appears
16
+ multiple times in the formatter module.
17
+ """
18
+
19
+ @staticmethod
20
+ def group_by_name_arity(
21
+ functions: List[Dict[str, Any]],
22
+ ) -> Dict[Tuple[str, int], List[Dict[str, Any]]]:
23
+ """
24
+ Group functions by their (name, arity) tuple.
25
+
26
+ Multiple function clauses with the same name and arity are grouped
27
+ together. This is common in Elixir where you can define multiple
28
+ clauses for the same function.
29
+
30
+ Args:
31
+ functions: List of function dictionaries with 'name' and 'arity' keys
32
+
33
+ Returns:
34
+ Dictionary mapping (name, arity) tuples to lists of function clauses
35
+
36
+ Example:
37
+ functions = [
38
+ {'name': 'create', 'arity': 1, 'line': 10},
39
+ {'name': 'create', 'arity': 1, 'line': 15}, # Second clause
40
+ {'name': 'create', 'arity': 2, 'line': 20},
41
+ ]
42
+ grouped = FunctionGrouper.group_by_name_arity(functions)
43
+ # Returns:
44
+ # {
45
+ # ('create', 1): [{'name': 'create', 'arity': 1, ...}, {...}],
46
+ # ('create', 2): [{'name': 'create', 'arity': 2, ...}]
47
+ # }
48
+ """
49
+ grouped: Dict[Tuple[str, int], List[Dict[str, Any]]] = {}
50
+
51
+ for func in functions:
52
+ key = (func["name"], func["arity"])
53
+ if key not in grouped:
54
+ grouped[key] = []
55
+ grouped[key].append(func)
56
+
57
+ return grouped
@@ -0,0 +1,173 @@
1
+ """
2
+ Utilities for computing and managing file hashes for incremental indexing.
3
+
4
+ This module provides MD5-based file hashing to detect changes in the codebase
5
+ and enable incremental reindexing, avoiding reprocessing of unchanged files.
6
+ """
7
+
8
+ import hashlib
9
+ import json
10
+ import os
11
+ from datetime import datetime, timezone
12
+ from pathlib import Path
13
+ from typing import Dict, List, Tuple
14
+
15
+
16
+ def compute_file_hash(file_path: str) -> str:
17
+ """
18
+ Compute MD5 hash of a file's content.
19
+
20
+ Args:
21
+ file_path: Path to the file to hash
22
+
23
+ Returns:
24
+ MD5 hash as hexadecimal string
25
+
26
+ Raises:
27
+ FileNotFoundError: If file doesn't exist
28
+ IOError: If file cannot be read
29
+ """
30
+ # Note: MD5 is used here for speed, not security. This is for content-based
31
+ # change detection, not cryptographic purposes. MD5 is significantly faster
32
+ # than SHA256 and collision risk is negligible for our use case.
33
+ hash_md5 = hashlib.md5()
34
+ try:
35
+ with open(file_path, "rb") as f:
36
+ # Read in chunks to handle large files efficiently
37
+ for chunk in iter(lambda: f.read(4096), b""):
38
+ hash_md5.update(chunk)
39
+ return hash_md5.hexdigest()
40
+ except FileNotFoundError:
41
+ raise FileNotFoundError(f"File not found: {file_path}")
42
+ except Exception as e:
43
+ raise IOError(f"Error reading file {file_path}: {e}")
44
+
45
+
46
+ def load_file_hashes(cicada_dir: str) -> Dict[str, str]:
47
+ """
48
+ Load file hashes from .cicada/hashes.json.
49
+
50
+ Args:
51
+ cicada_dir: Path to the .cicada directory
52
+
53
+ Returns:
54
+ Dictionary mapping file paths to MD5 hashes.
55
+ Returns empty dict if hashes.json doesn't exist.
56
+ """
57
+ hashes_path = Path(cicada_dir) / "hashes.json"
58
+
59
+ if not hashes_path.exists():
60
+ return {}
61
+
62
+ try:
63
+ with open(hashes_path, "r", encoding="utf-8") as f:
64
+ data = json.load(f)
65
+ return data.get("hashes", {})
66
+ except (json.JSONDecodeError, IOError) as e:
67
+ print(f"Warning: Could not load hashes.json: {e}")
68
+ return {}
69
+
70
+
71
+ def save_file_hashes(cicada_dir: str, hashes: Dict[str, str]) -> None:
72
+ """
73
+ Save file hashes to .cicada/hashes.json.
74
+
75
+ Args:
76
+ cicada_dir: Path to the .cicada directory
77
+ hashes: Dictionary mapping file paths to MD5 hashes
78
+ """
79
+ hashes_path = Path(cicada_dir) / "hashes.json"
80
+
81
+ # Ensure .cicada directory exists
82
+ os.makedirs(cicada_dir, exist_ok=True)
83
+
84
+ data = {
85
+ "version": "1.0",
86
+ "hashes": hashes,
87
+ "last_updated": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
88
+ }
89
+
90
+ try:
91
+ with open(hashes_path, "w", encoding="utf-8") as f:
92
+ json.dump(data, f, indent=2)
93
+ except IOError as e:
94
+ print(f"Warning: Could not save hashes.json: {e}")
95
+
96
+
97
+ def detect_file_changes(
98
+ files: List[str], old_hashes: Dict[str, str], repo_path: str | None = None
99
+ ) -> Tuple[List[str], List[str], List[str]]:
100
+ """
101
+ Detect new, modified, and deleted files by comparing hashes.
102
+
103
+ Args:
104
+ files: List of current file paths (relative to repo root)
105
+ old_hashes: Dictionary of file paths to their previous MD5 hashes
106
+ repo_path: Optional repository root path. If provided, file paths
107
+ will be resolved relative to this path.
108
+
109
+ Returns:
110
+ Tuple of (new_files, modified_files, deleted_files)
111
+ - new_files: Files that didn't exist in old_hashes
112
+ - modified_files: Files whose hash changed
113
+ - deleted_files: Files in old_hashes but not in current files list
114
+ """
115
+ new_files = []
116
+ modified_files = []
117
+
118
+ current_file_set = set(files)
119
+ old_file_set = set(old_hashes.keys())
120
+
121
+ # Detect deleted files
122
+ deleted_files = list(old_file_set - current_file_set)
123
+
124
+ # Detect new and modified files
125
+ for file_path in files:
126
+ # Resolve full path if repo_path provided
127
+ full_path = os.path.join(repo_path, file_path) if repo_path else file_path
128
+
129
+ if file_path not in old_hashes:
130
+ # New file
131
+ new_files.append(file_path)
132
+ else:
133
+ # Check if modified
134
+ # Note: Race condition possible if file modified between this check
135
+ # and actual indexing, but impact is minimal (re-detected next run)
136
+ try:
137
+ current_hash = compute_file_hash(full_path)
138
+ if current_hash != old_hashes[file_path]:
139
+ modified_files.append(file_path)
140
+ except (FileNotFoundError, IOError) as e:
141
+ # File might have been deleted after listing
142
+ print(f"Warning: Could not hash {file_path}: {e}")
143
+ deleted_files.append(file_path)
144
+
145
+ return new_files, modified_files, deleted_files
146
+
147
+
148
+ def compute_hashes_for_files(
149
+ files: List[str], repo_path: str | None = None
150
+ ) -> Dict[str, str]:
151
+ """
152
+ Compute MD5 hashes for a list of files.
153
+
154
+ Args:
155
+ files: List of file paths (relative to repo root)
156
+ repo_path: Optional repository root path. If provided, file paths
157
+ will be resolved relative to this path.
158
+
159
+ Returns:
160
+ Dictionary mapping file paths to MD5 hashes
161
+ """
162
+ hashes = {}
163
+
164
+ for file_path in files:
165
+ # Resolve full path if repo_path provided
166
+ full_path = os.path.join(repo_path, file_path) if repo_path else file_path
167
+
168
+ try:
169
+ hashes[file_path] = compute_file_hash(full_path)
170
+ except (FileNotFoundError, IOError) as e:
171
+ print(f"Warning: Could not hash {file_path}: {e}")
172
+
173
+ return hashes
@@ -0,0 +1,290 @@
1
+ """
2
+ Index file loading and saving utilities.
3
+
4
+ This module provides centralized functions for loading and saving
5
+ JSON index files with consistent error handling.
6
+ """
7
+
8
+ import json
9
+ import sys
10
+ from pathlib import Path
11
+ from typing import Optional, Dict, Any, Union
12
+
13
+
14
+ def load_index(
15
+ index_path: Union[str, Path],
16
+ verbose: bool = False,
17
+ raise_on_error: bool = False,
18
+ ) -> Optional[Dict[str, Any]]:
19
+ """
20
+ Load a JSON index file.
21
+
22
+ Args:
23
+ index_path: Path to the index file
24
+ verbose: If True, print warning messages
25
+ raise_on_error: If True, raise exceptions instead of returning None
26
+
27
+ Returns:
28
+ Index dictionary, or None if file doesn't exist or can't be loaded
29
+
30
+ Raises:
31
+ FileNotFoundError: If raise_on_error=True and file doesn't exist
32
+ json.JSONDecodeError: If raise_on_error=True and JSON is invalid
33
+ IOError: If raise_on_error=True and file can't be read
34
+ """
35
+ index_file = Path(index_path)
36
+
37
+ if not index_file.exists():
38
+ if raise_on_error:
39
+ raise FileNotFoundError(f"Index file not found: {index_path}")
40
+ if verbose:
41
+ print(f"Warning: Index not found at {index_path}", file=sys.stderr)
42
+ return None
43
+
44
+ try:
45
+ with open(index_file, "r") as f:
46
+ return json.load(f)
47
+ except json.JSONDecodeError as e:
48
+ if raise_on_error:
49
+ raise
50
+ if verbose:
51
+ print(f"Warning: Could not parse index: {e}", file=sys.stderr)
52
+ return None
53
+ except IOError as e:
54
+ if raise_on_error:
55
+ raise
56
+ if verbose:
57
+ print(f"Warning: Could not read index: {e}", file=sys.stderr)
58
+ return None
59
+
60
+
61
+ def save_index(
62
+ index: Dict[str, Any],
63
+ output_path: Union[str, Path],
64
+ indent: int = 2,
65
+ create_dirs: bool = True,
66
+ verbose: bool = False,
67
+ ) -> None:
68
+ """
69
+ Save an index dictionary to a JSON file.
70
+
71
+ Args:
72
+ index: Index dictionary to save
73
+ output_path: Path where the index will be saved
74
+ indent: JSON indentation (default: 2 spaces)
75
+ create_dirs: Create parent directories if they don't exist
76
+ verbose: If True, print confirmation message
77
+
78
+ Raises:
79
+ IOError: If file cannot be written
80
+ json.JSONEncodeError: If index cannot be serialized to JSON
81
+ """
82
+ output_file = Path(output_path)
83
+
84
+ if create_dirs:
85
+ output_file.parent.mkdir(parents=True, exist_ok=True)
86
+
87
+ with open(output_file, "w") as f:
88
+ json.dump(index, f, indent=indent)
89
+
90
+ if verbose:
91
+ print(f"Index saved to: {output_path}")
92
+
93
+
94
+ def validate_index_structure(
95
+ index: Any,
96
+ required_keys: Optional[list[str]] = None,
97
+ ) -> tuple[bool, Optional[str]]:
98
+ """
99
+ Validate the structure of an index dictionary.
100
+
101
+ Args:
102
+ index: Index dictionary to validate
103
+ required_keys: List of required top-level keys (default: ['modules', 'metadata'])
104
+
105
+ Returns:
106
+ Tuple of (is_valid, error_message)
107
+ error_message is None if index is valid
108
+
109
+ Example:
110
+ valid, error = validate_index_structure(index)
111
+ if not valid:
112
+ print(f"Invalid index: {error}")
113
+ """
114
+ if not isinstance(index, dict):
115
+ return False, "Index must be a dictionary"
116
+
117
+ if required_keys is None:
118
+ required_keys = ["modules", "metadata"]
119
+
120
+ for key in required_keys:
121
+ if key not in index:
122
+ return False, f"Missing required key: {key}"
123
+
124
+ # Validate modules structure
125
+ if "modules" in index and not isinstance(index["modules"], dict):
126
+ return False, "'modules' must be a dictionary"
127
+
128
+ # Validate metadata structure
129
+ if "metadata" in index and not isinstance(index["metadata"], dict):
130
+ return False, "'metadata' must be a dictionary"
131
+
132
+ return True, None
133
+
134
+
135
+ def merge_indexes(
136
+ *indexes: Dict[str, Any],
137
+ strategy: str = "last_wins",
138
+ ) -> Dict[str, Any]:
139
+ """
140
+ Merge multiple index dictionaries.
141
+
142
+ Args:
143
+ *indexes: Variable number of index dictionaries to merge
144
+ strategy: Merge strategy ('last_wins' or 'first_wins')
145
+ - 'last_wins': Later indexes override earlier ones
146
+ - 'first_wins': Earlier indexes take precedence
147
+
148
+ Returns:
149
+ Merged index dictionary
150
+
151
+ Example:
152
+ merged = merge_indexes(index1, index2, index3, strategy='last_wins')
153
+ """
154
+ if not indexes:
155
+ return {}
156
+
157
+ # Start with empty structure
158
+ merged = {
159
+ "modules": {},
160
+ "metadata": {},
161
+ }
162
+
163
+ if strategy == "last_wins":
164
+ index_list = list(indexes)
165
+ elif strategy == "first_wins":
166
+ index_list = list(reversed(indexes))
167
+ else:
168
+ raise ValueError(f"Unknown merge strategy: {strategy}")
169
+
170
+ # Merge modules
171
+ for index in index_list:
172
+ if "modules" in index:
173
+ merged["modules"].update(index["modules"])
174
+
175
+ # Merge metadata (later ones override)
176
+ for index in index_list:
177
+ if "metadata" in index:
178
+ merged["metadata"].update(index["metadata"])
179
+
180
+ return merged
181
+
182
+
183
+ def get_index_stats(index: Dict[str, Any]) -> Dict[str, Any]:
184
+ """
185
+ Get statistics about an index.
186
+
187
+ Args:
188
+ index: Index dictionary
189
+
190
+ Returns:
191
+ Dictionary with statistics:
192
+ - total_modules: Number of modules
193
+ - total_functions: Total function count
194
+ - public_functions: Public function count
195
+ - private_functions: Private function count
196
+
197
+ Example:
198
+ stats = get_index_stats(index)
199
+ print(f"Index contains {stats['total_modules']} modules")
200
+ """
201
+ stats = {
202
+ "total_modules": 0,
203
+ "total_functions": 0,
204
+ "public_functions": 0,
205
+ "private_functions": 0,
206
+ }
207
+
208
+ if "modules" not in index:
209
+ return stats
210
+
211
+ modules = index["modules"]
212
+ stats["total_modules"] = len(modules)
213
+
214
+ for module_data in modules.values():
215
+ if "functions" in module_data:
216
+ functions = module_data["functions"]
217
+ stats["total_functions"] += len(functions)
218
+
219
+ for func in functions:
220
+ if func.get("type") == "def":
221
+ stats["public_functions"] += 1
222
+ elif func.get("type") == "defp":
223
+ stats["private_functions"] += 1
224
+
225
+ return stats
226
+
227
+
228
+ def merge_indexes_incremental(
229
+ old_index: Dict[str, Any],
230
+ new_index: Dict[str, Any],
231
+ deleted_files: list[str],
232
+ ) -> Dict[str, Any]:
233
+ """
234
+ Merge old and new indexes for incremental reindexing.
235
+
236
+ This specialized merge function:
237
+ 1. Keeps all modules from old_index that aren't in deleted files
238
+ 2. Adds/updates modules from new_index (new and modified files)
239
+ 3. Removes modules whose files were deleted
240
+ 4. Updates metadata with new counts and timestamp
241
+
242
+ Args:
243
+ old_index: Existing index dictionary
244
+ new_index: Index from newly processed files
245
+ deleted_files: List of file paths that were deleted
246
+
247
+ Returns:
248
+ Merged index dictionary with updated modules and metadata
249
+
250
+ Example:
251
+ merged = merge_indexes_incremental(
252
+ old_index=existing_index,
253
+ new_index=changed_files_index,
254
+ deleted_files=['lib/deleted.ex']
255
+ )
256
+ """
257
+ # Start with empty structure
258
+ merged = {
259
+ "modules": {},
260
+ "metadata": {},
261
+ }
262
+
263
+ # Convert deleted files list to set for O(1) lookup
264
+ deleted_set = set(deleted_files)
265
+
266
+ # Keep modules from old_index that aren't deleted
267
+ if "modules" in old_index:
268
+ for module_name, module_data in old_index["modules"].items():
269
+ file_path = module_data.get("file", "")
270
+ if file_path not in deleted_set:
271
+ merged["modules"][module_name] = module_data
272
+
273
+ # Add/update modules from new_index (overrides old ones with same name)
274
+ if "modules" in new_index:
275
+ merged["modules"].update(new_index["modules"])
276
+
277
+ # Merge metadata - take from new_index if available, else old_index
278
+ if "metadata" in new_index:
279
+ merged["metadata"].update(new_index["metadata"])
280
+ elif "metadata" in old_index:
281
+ merged["metadata"].update(old_index["metadata"])
282
+
283
+ # Update module and function counts
284
+ stats = get_index_stats(merged)
285
+ merged["metadata"]["total_modules"] = stats["total_modules"]
286
+ merged["metadata"]["total_functions"] = stats["total_functions"]
287
+ merged["metadata"]["public_functions"] = stats["public_functions"]
288
+ merged["metadata"]["private_functions"] = stats["private_functions"]
289
+
290
+ return merged