cicada-mcp 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cicada-mcp might be problematic. Click here for more details.
- cicada/__init__.py +30 -0
- cicada/clean.py +297 -0
- cicada/command_logger.py +293 -0
- cicada/dead_code_analyzer.py +282 -0
- cicada/extractors/__init__.py +36 -0
- cicada/extractors/base.py +66 -0
- cicada/extractors/call.py +176 -0
- cicada/extractors/dependency.py +361 -0
- cicada/extractors/doc.py +179 -0
- cicada/extractors/function.py +246 -0
- cicada/extractors/module.py +123 -0
- cicada/extractors/spec.py +151 -0
- cicada/find_dead_code.py +270 -0
- cicada/formatter.py +918 -0
- cicada/git_helper.py +646 -0
- cicada/indexer.py +629 -0
- cicada/install.py +724 -0
- cicada/keyword_extractor.py +364 -0
- cicada/keyword_search.py +553 -0
- cicada/lightweight_keyword_extractor.py +298 -0
- cicada/mcp_server.py +1559 -0
- cicada/mcp_tools.py +291 -0
- cicada/parser.py +124 -0
- cicada/pr_finder.py +435 -0
- cicada/pr_indexer/__init__.py +20 -0
- cicada/pr_indexer/cli.py +62 -0
- cicada/pr_indexer/github_api_client.py +431 -0
- cicada/pr_indexer/indexer.py +297 -0
- cicada/pr_indexer/line_mapper.py +209 -0
- cicada/pr_indexer/pr_index_builder.py +253 -0
- cicada/setup.py +339 -0
- cicada/utils/__init__.py +52 -0
- cicada/utils/call_site_formatter.py +95 -0
- cicada/utils/function_grouper.py +57 -0
- cicada/utils/hash_utils.py +173 -0
- cicada/utils/index_utils.py +290 -0
- cicada/utils/path_utils.py +240 -0
- cicada/utils/signature_builder.py +106 -0
- cicada/utils/storage.py +111 -0
- cicada/utils/subprocess_runner.py +182 -0
- cicada/utils/text_utils.py +90 -0
- cicada/version_check.py +116 -0
- cicada_mcp-0.1.4.dist-info/METADATA +619 -0
- cicada_mcp-0.1.4.dist-info/RECORD +48 -0
- cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
- cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
- cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
- cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Call site formatting utilities.
|
|
3
|
+
|
|
4
|
+
This module provides utilities for grouping and formatting call sites,
|
|
5
|
+
eliminating duplication in the formatter module.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, List, Any, Tuple
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CallSiteFormatter:
|
|
12
|
+
"""
|
|
13
|
+
Formats and groups call sites for display.
|
|
14
|
+
|
|
15
|
+
This class consolidates the call site grouping and formatting logic
|
|
16
|
+
that appears multiple times in the formatter module.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def group_by_caller(call_sites: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
21
|
+
"""
|
|
22
|
+
Group call sites by their caller (calling_module + calling_function).
|
|
23
|
+
|
|
24
|
+
When the same function is called multiple times from the same caller,
|
|
25
|
+
this consolidates those calls into a single entry with multiple line numbers.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
call_sites: List of call site dictionaries with keys:
|
|
29
|
+
- calling_module: Module making the call
|
|
30
|
+
- calling_function: Function making the call (dict with name, arity)
|
|
31
|
+
- file: File path
|
|
32
|
+
- line: Line number
|
|
33
|
+
- code_line: Optional code snippet
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List of grouped call site dictionaries with keys:
|
|
37
|
+
- calling_module: Module making the call
|
|
38
|
+
- calling_function: Function making the call
|
|
39
|
+
- file: File path
|
|
40
|
+
- lines: List of line numbers (sorted)
|
|
41
|
+
- code_lines: List of {line, code} dicts (if present)
|
|
42
|
+
|
|
43
|
+
Example:
|
|
44
|
+
call_sites = [
|
|
45
|
+
{'calling_module': 'MyApp.User', 'calling_function': {'name': 'create', 'arity': 2},
|
|
46
|
+
'file': 'lib/user.ex', 'line': 10},
|
|
47
|
+
{'calling_module': 'MyApp.User', 'calling_function': {'name': 'create', 'arity': 2},
|
|
48
|
+
'file': 'lib/user.ex', 'line': 20},
|
|
49
|
+
]
|
|
50
|
+
grouped = CallSiteFormatter.group_by_caller(call_sites)
|
|
51
|
+
# Returns:
|
|
52
|
+
# [{
|
|
53
|
+
# 'calling_module': 'MyApp.User',
|
|
54
|
+
# 'calling_function': {'name': 'create', 'arity': 2},
|
|
55
|
+
# 'file': 'lib/user.ex',
|
|
56
|
+
# 'lines': [10, 20]
|
|
57
|
+
# }]
|
|
58
|
+
"""
|
|
59
|
+
grouped: Dict[Tuple, Dict[str, Any]] = {}
|
|
60
|
+
|
|
61
|
+
for site in call_sites:
|
|
62
|
+
# Create a key based on caller identity
|
|
63
|
+
calling_func = site.get("calling_function")
|
|
64
|
+
if calling_func:
|
|
65
|
+
key = (
|
|
66
|
+
site["calling_module"],
|
|
67
|
+
calling_func["name"],
|
|
68
|
+
calling_func["arity"],
|
|
69
|
+
)
|
|
70
|
+
else:
|
|
71
|
+
key = (site["calling_module"], None, None)
|
|
72
|
+
|
|
73
|
+
if key not in grouped:
|
|
74
|
+
grouped[key] = {
|
|
75
|
+
"calling_module": site["calling_module"],
|
|
76
|
+
"calling_function": calling_func,
|
|
77
|
+
"file": site["file"],
|
|
78
|
+
"lines": [],
|
|
79
|
+
"code_lines": [],
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
grouped[key]["lines"].append(site["line"])
|
|
83
|
+
if "code_line" in site:
|
|
84
|
+
grouped[key]["code_lines"].append(
|
|
85
|
+
{"line": site["line"], "code": site["code_line"]}
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
# Convert back to list and sort lines
|
|
89
|
+
result = []
|
|
90
|
+
for data in grouped.values():
|
|
91
|
+
data["lines"].sort()
|
|
92
|
+
data["code_lines"].sort(key=lambda x: x["line"])
|
|
93
|
+
result.append(data)
|
|
94
|
+
|
|
95
|
+
return result
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Function grouping utilities.
|
|
3
|
+
|
|
4
|
+
This module provides utilities for grouping functions by name and arity,
|
|
5
|
+
eliminating duplication across formatter and other modules.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Dict, List, Any, Tuple
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FunctionGrouper:
|
|
12
|
+
"""
|
|
13
|
+
Groups functions by their name and arity.
|
|
14
|
+
|
|
15
|
+
This eliminates duplication of the grouping logic that appears
|
|
16
|
+
multiple times in the formatter module.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@staticmethod
|
|
20
|
+
def group_by_name_arity(
|
|
21
|
+
functions: List[Dict[str, Any]],
|
|
22
|
+
) -> Dict[Tuple[str, int], List[Dict[str, Any]]]:
|
|
23
|
+
"""
|
|
24
|
+
Group functions by their (name, arity) tuple.
|
|
25
|
+
|
|
26
|
+
Multiple function clauses with the same name and arity are grouped
|
|
27
|
+
together. This is common in Elixir where you can define multiple
|
|
28
|
+
clauses for the same function.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
functions: List of function dictionaries with 'name' and 'arity' keys
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Dictionary mapping (name, arity) tuples to lists of function clauses
|
|
35
|
+
|
|
36
|
+
Example:
|
|
37
|
+
functions = [
|
|
38
|
+
{'name': 'create', 'arity': 1, 'line': 10},
|
|
39
|
+
{'name': 'create', 'arity': 1, 'line': 15}, # Second clause
|
|
40
|
+
{'name': 'create', 'arity': 2, 'line': 20},
|
|
41
|
+
]
|
|
42
|
+
grouped = FunctionGrouper.group_by_name_arity(functions)
|
|
43
|
+
# Returns:
|
|
44
|
+
# {
|
|
45
|
+
# ('create', 1): [{'name': 'create', 'arity': 1, ...}, {...}],
|
|
46
|
+
# ('create', 2): [{'name': 'create', 'arity': 2, ...}]
|
|
47
|
+
# }
|
|
48
|
+
"""
|
|
49
|
+
grouped: Dict[Tuple[str, int], List[Dict[str, Any]]] = {}
|
|
50
|
+
|
|
51
|
+
for func in functions:
|
|
52
|
+
key = (func["name"], func["arity"])
|
|
53
|
+
if key not in grouped:
|
|
54
|
+
grouped[key] = []
|
|
55
|
+
grouped[key].append(func)
|
|
56
|
+
|
|
57
|
+
return grouped
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for computing and managing file hashes for incremental indexing.
|
|
3
|
+
|
|
4
|
+
This module provides MD5-based file hashing to detect changes in the codebase
|
|
5
|
+
and enable incremental reindexing, avoiding reprocessing of unchanged files.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import hashlib
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
from datetime import datetime, timezone
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Dict, List, Tuple
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def compute_file_hash(file_path: str) -> str:
|
|
17
|
+
"""
|
|
18
|
+
Compute MD5 hash of a file's content.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
file_path: Path to the file to hash
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
MD5 hash as hexadecimal string
|
|
25
|
+
|
|
26
|
+
Raises:
|
|
27
|
+
FileNotFoundError: If file doesn't exist
|
|
28
|
+
IOError: If file cannot be read
|
|
29
|
+
"""
|
|
30
|
+
# Note: MD5 is used here for speed, not security. This is for content-based
|
|
31
|
+
# change detection, not cryptographic purposes. MD5 is significantly faster
|
|
32
|
+
# than SHA256 and collision risk is negligible for our use case.
|
|
33
|
+
hash_md5 = hashlib.md5()
|
|
34
|
+
try:
|
|
35
|
+
with open(file_path, "rb") as f:
|
|
36
|
+
# Read in chunks to handle large files efficiently
|
|
37
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
38
|
+
hash_md5.update(chunk)
|
|
39
|
+
return hash_md5.hexdigest()
|
|
40
|
+
except FileNotFoundError:
|
|
41
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
42
|
+
except Exception as e:
|
|
43
|
+
raise IOError(f"Error reading file {file_path}: {e}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def load_file_hashes(cicada_dir: str) -> Dict[str, str]:
|
|
47
|
+
"""
|
|
48
|
+
Load file hashes from .cicada/hashes.json.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
cicada_dir: Path to the .cicada directory
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
Dictionary mapping file paths to MD5 hashes.
|
|
55
|
+
Returns empty dict if hashes.json doesn't exist.
|
|
56
|
+
"""
|
|
57
|
+
hashes_path = Path(cicada_dir) / "hashes.json"
|
|
58
|
+
|
|
59
|
+
if not hashes_path.exists():
|
|
60
|
+
return {}
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
with open(hashes_path, "r", encoding="utf-8") as f:
|
|
64
|
+
data = json.load(f)
|
|
65
|
+
return data.get("hashes", {})
|
|
66
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
67
|
+
print(f"Warning: Could not load hashes.json: {e}")
|
|
68
|
+
return {}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def save_file_hashes(cicada_dir: str, hashes: Dict[str, str]) -> None:
|
|
72
|
+
"""
|
|
73
|
+
Save file hashes to .cicada/hashes.json.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
cicada_dir: Path to the .cicada directory
|
|
77
|
+
hashes: Dictionary mapping file paths to MD5 hashes
|
|
78
|
+
"""
|
|
79
|
+
hashes_path = Path(cicada_dir) / "hashes.json"
|
|
80
|
+
|
|
81
|
+
# Ensure .cicada directory exists
|
|
82
|
+
os.makedirs(cicada_dir, exist_ok=True)
|
|
83
|
+
|
|
84
|
+
data = {
|
|
85
|
+
"version": "1.0",
|
|
86
|
+
"hashes": hashes,
|
|
87
|
+
"last_updated": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
with open(hashes_path, "w", encoding="utf-8") as f:
|
|
92
|
+
json.dump(data, f, indent=2)
|
|
93
|
+
except IOError as e:
|
|
94
|
+
print(f"Warning: Could not save hashes.json: {e}")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def detect_file_changes(
|
|
98
|
+
files: List[str], old_hashes: Dict[str, str], repo_path: str | None = None
|
|
99
|
+
) -> Tuple[List[str], List[str], List[str]]:
|
|
100
|
+
"""
|
|
101
|
+
Detect new, modified, and deleted files by comparing hashes.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
files: List of current file paths (relative to repo root)
|
|
105
|
+
old_hashes: Dictionary of file paths to their previous MD5 hashes
|
|
106
|
+
repo_path: Optional repository root path. If provided, file paths
|
|
107
|
+
will be resolved relative to this path.
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
Tuple of (new_files, modified_files, deleted_files)
|
|
111
|
+
- new_files: Files that didn't exist in old_hashes
|
|
112
|
+
- modified_files: Files whose hash changed
|
|
113
|
+
- deleted_files: Files in old_hashes but not in current files list
|
|
114
|
+
"""
|
|
115
|
+
new_files = []
|
|
116
|
+
modified_files = []
|
|
117
|
+
|
|
118
|
+
current_file_set = set(files)
|
|
119
|
+
old_file_set = set(old_hashes.keys())
|
|
120
|
+
|
|
121
|
+
# Detect deleted files
|
|
122
|
+
deleted_files = list(old_file_set - current_file_set)
|
|
123
|
+
|
|
124
|
+
# Detect new and modified files
|
|
125
|
+
for file_path in files:
|
|
126
|
+
# Resolve full path if repo_path provided
|
|
127
|
+
full_path = os.path.join(repo_path, file_path) if repo_path else file_path
|
|
128
|
+
|
|
129
|
+
if file_path not in old_hashes:
|
|
130
|
+
# New file
|
|
131
|
+
new_files.append(file_path)
|
|
132
|
+
else:
|
|
133
|
+
# Check if modified
|
|
134
|
+
# Note: Race condition possible if file modified between this check
|
|
135
|
+
# and actual indexing, but impact is minimal (re-detected next run)
|
|
136
|
+
try:
|
|
137
|
+
current_hash = compute_file_hash(full_path)
|
|
138
|
+
if current_hash != old_hashes[file_path]:
|
|
139
|
+
modified_files.append(file_path)
|
|
140
|
+
except (FileNotFoundError, IOError) as e:
|
|
141
|
+
# File might have been deleted after listing
|
|
142
|
+
print(f"Warning: Could not hash {file_path}: {e}")
|
|
143
|
+
deleted_files.append(file_path)
|
|
144
|
+
|
|
145
|
+
return new_files, modified_files, deleted_files
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def compute_hashes_for_files(
|
|
149
|
+
files: List[str], repo_path: str | None = None
|
|
150
|
+
) -> Dict[str, str]:
|
|
151
|
+
"""
|
|
152
|
+
Compute MD5 hashes for a list of files.
|
|
153
|
+
|
|
154
|
+
Args:
|
|
155
|
+
files: List of file paths (relative to repo root)
|
|
156
|
+
repo_path: Optional repository root path. If provided, file paths
|
|
157
|
+
will be resolved relative to this path.
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Dictionary mapping file paths to MD5 hashes
|
|
161
|
+
"""
|
|
162
|
+
hashes = {}
|
|
163
|
+
|
|
164
|
+
for file_path in files:
|
|
165
|
+
# Resolve full path if repo_path provided
|
|
166
|
+
full_path = os.path.join(repo_path, file_path) if repo_path else file_path
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
hashes[file_path] = compute_file_hash(full_path)
|
|
170
|
+
except (FileNotFoundError, IOError) as e:
|
|
171
|
+
print(f"Warning: Could not hash {file_path}: {e}")
|
|
172
|
+
|
|
173
|
+
return hashes
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Index file loading and saving utilities.
|
|
3
|
+
|
|
4
|
+
This module provides centralized functions for loading and saving
|
|
5
|
+
JSON index files with consistent error handling.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Optional, Dict, Any, Union
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_index(
|
|
15
|
+
index_path: Union[str, Path],
|
|
16
|
+
verbose: bool = False,
|
|
17
|
+
raise_on_error: bool = False,
|
|
18
|
+
) -> Optional[Dict[str, Any]]:
|
|
19
|
+
"""
|
|
20
|
+
Load a JSON index file.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
index_path: Path to the index file
|
|
24
|
+
verbose: If True, print warning messages
|
|
25
|
+
raise_on_error: If True, raise exceptions instead of returning None
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Index dictionary, or None if file doesn't exist or can't be loaded
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
FileNotFoundError: If raise_on_error=True and file doesn't exist
|
|
32
|
+
json.JSONDecodeError: If raise_on_error=True and JSON is invalid
|
|
33
|
+
IOError: If raise_on_error=True and file can't be read
|
|
34
|
+
"""
|
|
35
|
+
index_file = Path(index_path)
|
|
36
|
+
|
|
37
|
+
if not index_file.exists():
|
|
38
|
+
if raise_on_error:
|
|
39
|
+
raise FileNotFoundError(f"Index file not found: {index_path}")
|
|
40
|
+
if verbose:
|
|
41
|
+
print(f"Warning: Index not found at {index_path}", file=sys.stderr)
|
|
42
|
+
return None
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
with open(index_file, "r") as f:
|
|
46
|
+
return json.load(f)
|
|
47
|
+
except json.JSONDecodeError as e:
|
|
48
|
+
if raise_on_error:
|
|
49
|
+
raise
|
|
50
|
+
if verbose:
|
|
51
|
+
print(f"Warning: Could not parse index: {e}", file=sys.stderr)
|
|
52
|
+
return None
|
|
53
|
+
except IOError as e:
|
|
54
|
+
if raise_on_error:
|
|
55
|
+
raise
|
|
56
|
+
if verbose:
|
|
57
|
+
print(f"Warning: Could not read index: {e}", file=sys.stderr)
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def save_index(
|
|
62
|
+
index: Dict[str, Any],
|
|
63
|
+
output_path: Union[str, Path],
|
|
64
|
+
indent: int = 2,
|
|
65
|
+
create_dirs: bool = True,
|
|
66
|
+
verbose: bool = False,
|
|
67
|
+
) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Save an index dictionary to a JSON file.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
index: Index dictionary to save
|
|
73
|
+
output_path: Path where the index will be saved
|
|
74
|
+
indent: JSON indentation (default: 2 spaces)
|
|
75
|
+
create_dirs: Create parent directories if they don't exist
|
|
76
|
+
verbose: If True, print confirmation message
|
|
77
|
+
|
|
78
|
+
Raises:
|
|
79
|
+
IOError: If file cannot be written
|
|
80
|
+
json.JSONEncodeError: If index cannot be serialized to JSON
|
|
81
|
+
"""
|
|
82
|
+
output_file = Path(output_path)
|
|
83
|
+
|
|
84
|
+
if create_dirs:
|
|
85
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
|
|
87
|
+
with open(output_file, "w") as f:
|
|
88
|
+
json.dump(index, f, indent=indent)
|
|
89
|
+
|
|
90
|
+
if verbose:
|
|
91
|
+
print(f"Index saved to: {output_path}")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def validate_index_structure(
|
|
95
|
+
index: Any,
|
|
96
|
+
required_keys: Optional[list[str]] = None,
|
|
97
|
+
) -> tuple[bool, Optional[str]]:
|
|
98
|
+
"""
|
|
99
|
+
Validate the structure of an index dictionary.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
index: Index dictionary to validate
|
|
103
|
+
required_keys: List of required top-level keys (default: ['modules', 'metadata'])
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Tuple of (is_valid, error_message)
|
|
107
|
+
error_message is None if index is valid
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
valid, error = validate_index_structure(index)
|
|
111
|
+
if not valid:
|
|
112
|
+
print(f"Invalid index: {error}")
|
|
113
|
+
"""
|
|
114
|
+
if not isinstance(index, dict):
|
|
115
|
+
return False, "Index must be a dictionary"
|
|
116
|
+
|
|
117
|
+
if required_keys is None:
|
|
118
|
+
required_keys = ["modules", "metadata"]
|
|
119
|
+
|
|
120
|
+
for key in required_keys:
|
|
121
|
+
if key not in index:
|
|
122
|
+
return False, f"Missing required key: {key}"
|
|
123
|
+
|
|
124
|
+
# Validate modules structure
|
|
125
|
+
if "modules" in index and not isinstance(index["modules"], dict):
|
|
126
|
+
return False, "'modules' must be a dictionary"
|
|
127
|
+
|
|
128
|
+
# Validate metadata structure
|
|
129
|
+
if "metadata" in index and not isinstance(index["metadata"], dict):
|
|
130
|
+
return False, "'metadata' must be a dictionary"
|
|
131
|
+
|
|
132
|
+
return True, None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def merge_indexes(
|
|
136
|
+
*indexes: Dict[str, Any],
|
|
137
|
+
strategy: str = "last_wins",
|
|
138
|
+
) -> Dict[str, Any]:
|
|
139
|
+
"""
|
|
140
|
+
Merge multiple index dictionaries.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
*indexes: Variable number of index dictionaries to merge
|
|
144
|
+
strategy: Merge strategy ('last_wins' or 'first_wins')
|
|
145
|
+
- 'last_wins': Later indexes override earlier ones
|
|
146
|
+
- 'first_wins': Earlier indexes take precedence
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
Merged index dictionary
|
|
150
|
+
|
|
151
|
+
Example:
|
|
152
|
+
merged = merge_indexes(index1, index2, index3, strategy='last_wins')
|
|
153
|
+
"""
|
|
154
|
+
if not indexes:
|
|
155
|
+
return {}
|
|
156
|
+
|
|
157
|
+
# Start with empty structure
|
|
158
|
+
merged = {
|
|
159
|
+
"modules": {},
|
|
160
|
+
"metadata": {},
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
if strategy == "last_wins":
|
|
164
|
+
index_list = list(indexes)
|
|
165
|
+
elif strategy == "first_wins":
|
|
166
|
+
index_list = list(reversed(indexes))
|
|
167
|
+
else:
|
|
168
|
+
raise ValueError(f"Unknown merge strategy: {strategy}")
|
|
169
|
+
|
|
170
|
+
# Merge modules
|
|
171
|
+
for index in index_list:
|
|
172
|
+
if "modules" in index:
|
|
173
|
+
merged["modules"].update(index["modules"])
|
|
174
|
+
|
|
175
|
+
# Merge metadata (later ones override)
|
|
176
|
+
for index in index_list:
|
|
177
|
+
if "metadata" in index:
|
|
178
|
+
merged["metadata"].update(index["metadata"])
|
|
179
|
+
|
|
180
|
+
return merged
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def get_index_stats(index: Dict[str, Any]) -> Dict[str, Any]:
|
|
184
|
+
"""
|
|
185
|
+
Get statistics about an index.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
index: Index dictionary
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Dictionary with statistics:
|
|
192
|
+
- total_modules: Number of modules
|
|
193
|
+
- total_functions: Total function count
|
|
194
|
+
- public_functions: Public function count
|
|
195
|
+
- private_functions: Private function count
|
|
196
|
+
|
|
197
|
+
Example:
|
|
198
|
+
stats = get_index_stats(index)
|
|
199
|
+
print(f"Index contains {stats['total_modules']} modules")
|
|
200
|
+
"""
|
|
201
|
+
stats = {
|
|
202
|
+
"total_modules": 0,
|
|
203
|
+
"total_functions": 0,
|
|
204
|
+
"public_functions": 0,
|
|
205
|
+
"private_functions": 0,
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
if "modules" not in index:
|
|
209
|
+
return stats
|
|
210
|
+
|
|
211
|
+
modules = index["modules"]
|
|
212
|
+
stats["total_modules"] = len(modules)
|
|
213
|
+
|
|
214
|
+
for module_data in modules.values():
|
|
215
|
+
if "functions" in module_data:
|
|
216
|
+
functions = module_data["functions"]
|
|
217
|
+
stats["total_functions"] += len(functions)
|
|
218
|
+
|
|
219
|
+
for func in functions:
|
|
220
|
+
if func.get("type") == "def":
|
|
221
|
+
stats["public_functions"] += 1
|
|
222
|
+
elif func.get("type") == "defp":
|
|
223
|
+
stats["private_functions"] += 1
|
|
224
|
+
|
|
225
|
+
return stats
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def merge_indexes_incremental(
|
|
229
|
+
old_index: Dict[str, Any],
|
|
230
|
+
new_index: Dict[str, Any],
|
|
231
|
+
deleted_files: list[str],
|
|
232
|
+
) -> Dict[str, Any]:
|
|
233
|
+
"""
|
|
234
|
+
Merge old and new indexes for incremental reindexing.
|
|
235
|
+
|
|
236
|
+
This specialized merge function:
|
|
237
|
+
1. Keeps all modules from old_index that aren't in deleted files
|
|
238
|
+
2. Adds/updates modules from new_index (new and modified files)
|
|
239
|
+
3. Removes modules whose files were deleted
|
|
240
|
+
4. Updates metadata with new counts and timestamp
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
old_index: Existing index dictionary
|
|
244
|
+
new_index: Index from newly processed files
|
|
245
|
+
deleted_files: List of file paths that were deleted
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
Merged index dictionary with updated modules and metadata
|
|
249
|
+
|
|
250
|
+
Example:
|
|
251
|
+
merged = merge_indexes_incremental(
|
|
252
|
+
old_index=existing_index,
|
|
253
|
+
new_index=changed_files_index,
|
|
254
|
+
deleted_files=['lib/deleted.ex']
|
|
255
|
+
)
|
|
256
|
+
"""
|
|
257
|
+
# Start with empty structure
|
|
258
|
+
merged = {
|
|
259
|
+
"modules": {},
|
|
260
|
+
"metadata": {},
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
# Convert deleted files list to set for O(1) lookup
|
|
264
|
+
deleted_set = set(deleted_files)
|
|
265
|
+
|
|
266
|
+
# Keep modules from old_index that aren't deleted
|
|
267
|
+
if "modules" in old_index:
|
|
268
|
+
for module_name, module_data in old_index["modules"].items():
|
|
269
|
+
file_path = module_data.get("file", "")
|
|
270
|
+
if file_path not in deleted_set:
|
|
271
|
+
merged["modules"][module_name] = module_data
|
|
272
|
+
|
|
273
|
+
# Add/update modules from new_index (overrides old ones with same name)
|
|
274
|
+
if "modules" in new_index:
|
|
275
|
+
merged["modules"].update(new_index["modules"])
|
|
276
|
+
|
|
277
|
+
# Merge metadata - take from new_index if available, else old_index
|
|
278
|
+
if "metadata" in new_index:
|
|
279
|
+
merged["metadata"].update(new_index["metadata"])
|
|
280
|
+
elif "metadata" in old_index:
|
|
281
|
+
merged["metadata"].update(old_index["metadata"])
|
|
282
|
+
|
|
283
|
+
# Update module and function counts
|
|
284
|
+
stats = get_index_stats(merged)
|
|
285
|
+
merged["metadata"]["total_modules"] = stats["total_modules"]
|
|
286
|
+
merged["metadata"]["total_functions"] = stats["total_functions"]
|
|
287
|
+
merged["metadata"]["public_functions"] = stats["public_functions"]
|
|
288
|
+
merged["metadata"]["private_functions"] = stats["private_functions"]
|
|
289
|
+
|
|
290
|
+
return merged
|