cortexcode 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cortexcode/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """CortexCode - Lightweight code indexing for AI assistants."""
2
+
3
+ __version__ = "0.1.0"
cortexcode/analysis.py ADDED
@@ -0,0 +1,331 @@
1
+ """Code analysis — dead code detection, complexity metrics, and change impact analysis."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+
8
+ def detect_dead_code(index: dict) -> list[dict[str, Any]]:
9
+ """Find symbols that are defined but never called by any other symbol.
10
+
11
+ Returns a list of potentially dead symbols with their details.
12
+ """
13
+ call_graph = index.get("call_graph", {})
14
+ files = index.get("files", {})
15
+
16
+ # Build set of all called names (flatten call graph values)
17
+ all_called: set[str] = set()
18
+ for callees in call_graph.values():
19
+ all_called.update(callees)
20
+
21
+ # Also gather names referenced in imports across files
22
+ all_imported: set[str] = set()
23
+ for file_data in files.values():
24
+ if not isinstance(file_data, dict):
25
+ continue
26
+ for imp in file_data.get("imports", []):
27
+ all_imported.update(imp.get("imported", []))
28
+
29
+ all_referenced = all_called | all_imported
30
+
31
+ # Collect all defined symbols
32
+ dead: list[dict[str, Any]] = []
33
+ for rel_path, file_data in files.items():
34
+ if not isinstance(file_data, dict):
35
+ continue
36
+ for sym in file_data.get("symbols", []):
37
+ name = sym.get("name", "")
38
+ sym_type = sym.get("type", "")
39
+
40
+ # Skip entry points, constructors, overrides, test helpers
41
+ if _is_likely_entrypoint(name, sym, rel_path):
42
+ continue
43
+
44
+ # Check if never referenced
45
+ if name not in all_referenced:
46
+ dead.append({
47
+ "name": name,
48
+ "type": sym_type,
49
+ "file": rel_path,
50
+ "line": sym.get("line", 0),
51
+ "framework": sym.get("framework"),
52
+ "reason": "never called or imported by any other symbol",
53
+ })
54
+
55
+ return dead
56
+
57
+
58
+ def _is_likely_entrypoint(name: str, sym: dict, file_path: str) -> bool:
59
+ """Check if a symbol is likely an entry point that won't appear in call graph."""
60
+ # Framework entry points
61
+ fw = sym.get("framework") or ""
62
+ if fw:
63
+ return True # Framework-detected symbols are likely wired by the framework
64
+
65
+ # Common entry point patterns
66
+ entrypoint_names = {
67
+ "main", "app", "init", "__init__", "setup", "configure", "register",
68
+ "run", "start", "bootstrap", "index", "default", "handler",
69
+ }
70
+ if name.lower() in entrypoint_names:
71
+ return True
72
+
73
+ # Exported symbols (they may be used externally)
74
+ if sym.get("type") == "class":
75
+ return True # Classes are often instantiated dynamically
76
+
77
+ # Test files
78
+ if "test" in file_path.lower() or "spec" in file_path.lower():
79
+ return True
80
+
81
+ # Lifecycle methods
82
+ lifecycle = {
83
+ "componentDidMount", "componentWillUnmount", "render", "build",
84
+ "ngOnInit", "ngOnDestroy", "viewDidLoad", "viewWillAppear",
85
+ "onCreate", "onStart", "onResume", "onPause", "onStop", "onDestroy",
86
+ "initState", "dispose", "didChangeDependencies",
87
+ }
88
+ if name in lifecycle:
89
+ return True
90
+
91
+ # Dunder methods
92
+ if name.startswith("__") and name.endswith("__"):
93
+ return True
94
+
95
+ # Decorator-based routing (likely registered by framework)
96
+ if name.startswith("get_") or name.startswith("post_") or name.startswith("handle_"):
97
+ return True
98
+
99
+ return False
100
+
101
+
102
+ def compute_complexity(index: dict, project_root: str | None = None) -> list[dict[str, Any]]:
103
+ """Compute complexity metrics for all functions/methods.
104
+
105
+ Metrics:
106
+ - lines: approximate line count of the function body
107
+ - params: number of parameters
108
+ - calls: number of outgoing calls
109
+ - cyclomatic: estimated cyclomatic complexity (branch count + 1)
110
+ - nesting: max nesting depth estimate
111
+ """
112
+ files = index.get("files", {})
113
+ results: list[dict[str, Any]] = []
114
+ root = Path(project_root) if project_root else None
115
+
116
+ for rel_path, file_data in files.items():
117
+ if not isinstance(file_data, dict):
118
+ continue
119
+
120
+ symbols = file_data.get("symbols", [])
121
+
122
+ # Try to read source for line-level analysis
123
+ source_lines: list[str] | None = None
124
+ if root:
125
+ try:
126
+ source_lines = (root / rel_path).read_text(encoding="utf-8").split("\n")
127
+ except (OSError, UnicodeDecodeError):
128
+ pass
129
+
130
+ for sym in symbols:
131
+ if sym.get("type") not in ("function", "method"):
132
+ continue
133
+
134
+ name = sym.get("name", "")
135
+ line = sym.get("line", 0)
136
+ params = sym.get("params", [])
137
+ calls = sym.get("calls", [])
138
+
139
+ metrics = {
140
+ "name": name,
141
+ "type": sym.get("type"),
142
+ "file": rel_path,
143
+ "line": line,
144
+ "params_count": len(params),
145
+ "calls_count": len(calls),
146
+ }
147
+
148
+ if source_lines and line > 0:
149
+ body_lines, cyclomatic, nesting = _analyze_function_body(source_lines, line - 1)
150
+ metrics["lines"] = body_lines
151
+ metrics["cyclomatic"] = cyclomatic
152
+ metrics["max_nesting"] = nesting
153
+
154
+ # Compute a simple complexity score (0-100)
155
+ score = _complexity_score(metrics)
156
+ metrics["score"] = score
157
+ metrics["rating"] = "low" if score < 20 else "medium" if score < 50 else "high" if score < 80 else "critical"
158
+
159
+ results.append(metrics)
160
+
161
+ # Sort by score descending
162
+ results.sort(key=lambda x: x.get("score", 0), reverse=True)
163
+ return results
164
+
165
+
166
+ def _analyze_function_body(lines: list[str], start_idx: int) -> tuple[int, int, int]:
167
+ """Analyze function body for line count, cyclomatic complexity, and nesting depth.
168
+
169
+ Returns: (line_count, cyclomatic_complexity, max_nesting_depth)
170
+ """
171
+ # Branch keywords that increase cyclomatic complexity
172
+ branch_re = re.compile(
173
+ r'\b(if|elif|else if|for|while|catch|except|case|&&|\|\||and |or |when)\b'
174
+ )
175
+
176
+ # Find the end of the function by tracking indentation / braces
177
+ start_line = lines[start_idx] if start_idx < len(lines) else ""
178
+ start_indent = len(start_line) - len(start_line.lstrip())
179
+
180
+ body_lines = 0
181
+ branch_count = 0
182
+ max_nesting = 0
183
+ brace_depth = 0
184
+ indent_based = "def " in start_line or start_line.strip().endswith(":")
185
+
186
+ for i in range(start_idx + 1, min(start_idx + 500, len(lines))):
187
+ line = lines[i]
188
+ stripped = line.strip()
189
+
190
+ if not stripped or stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
191
+ continue
192
+
193
+ if indent_based:
194
+ # Python-style: end when we see a line at same or lower indent
195
+ current_indent = len(line) - len(line.lstrip())
196
+ if current_indent <= start_indent and stripped and not stripped.startswith((")", "]", "}")):
197
+ break
198
+ nesting = (current_indent - start_indent) // 4
199
+ else:
200
+ # Brace-style
201
+ brace_depth += stripped.count("{") - stripped.count("}")
202
+ if brace_depth <= 0 and body_lines > 0:
203
+ break
204
+ nesting = brace_depth
205
+
206
+ body_lines += 1
207
+ max_nesting = max(max_nesting, nesting)
208
+ branch_count += len(branch_re.findall(stripped))
209
+
210
+ cyclomatic = branch_count + 1
211
+ return body_lines, cyclomatic, max_nesting
212
+
213
+
214
+ def _complexity_score(metrics: dict) -> int:
215
+ """Compute a 0-100 complexity score from metrics."""
216
+ score = 0.0
217
+
218
+ # Line count (0-30 points)
219
+ body_lines = metrics.get("lines", 0)
220
+ if body_lines > 100:
221
+ score += 30
222
+ elif body_lines > 50:
223
+ score += 20
224
+ elif body_lines > 25:
225
+ score += 10
226
+ elif body_lines > 10:
227
+ score += 5
228
+
229
+ # Cyclomatic (0-30 points)
230
+ cyclomatic = metrics.get("cyclomatic", 1)
231
+ if cyclomatic > 20:
232
+ score += 30
233
+ elif cyclomatic > 10:
234
+ score += 20
235
+ elif cyclomatic > 5:
236
+ score += 10
237
+ elif cyclomatic > 3:
238
+ score += 5
239
+
240
+ # Nesting depth (0-20 points)
241
+ nesting = metrics.get("max_nesting", 0)
242
+ if nesting > 5:
243
+ score += 20
244
+ elif nesting > 3:
245
+ score += 10
246
+ elif nesting > 2:
247
+ score += 5
248
+
249
+ # Params count (0-10 points)
250
+ params = metrics.get("params_count", 0)
251
+ if params > 7:
252
+ score += 10
253
+ elif params > 4:
254
+ score += 5
255
+
256
+ # Calls count (0-10 points)
257
+ calls = metrics.get("calls_count", 0)
258
+ if calls > 15:
259
+ score += 10
260
+ elif calls > 8:
261
+ score += 5
262
+
263
+ return min(100, int(score))
264
+
265
+
266
+ def analyze_change_impact(index: dict, symbol_name: str) -> dict[str, Any]:
267
+ """Analyze what would be impacted if a symbol is changed.
268
+
269
+ Returns:
270
+ - direct_callers: symbols that call this one
271
+ - indirect_callers: symbols that call the direct callers (2nd-degree)
272
+ - affected_files: files containing affected symbols
273
+ - affected_tests: test files that may need updating
274
+ """
275
+ call_graph = index.get("call_graph", {})
276
+ files = index.get("files", {})
277
+
278
+ # Build reverse call graph: callee -> set of callers
279
+ reverse_graph: dict[str, set[str]] = {}
280
+ for caller, callees in call_graph.items():
281
+ for callee in callees:
282
+ if callee not in reverse_graph:
283
+ reverse_graph[callee] = set()
284
+ reverse_graph[callee].add(caller)
285
+
286
+ # Direct callers
287
+ direct_callers = list(reverse_graph.get(symbol_name, set()))
288
+
289
+ # Indirect callers (2nd degree)
290
+ indirect_callers = set()
291
+ for dc in direct_callers:
292
+ for caller in reverse_graph.get(dc, set()):
293
+ if caller != symbol_name and caller not in direct_callers:
294
+ indirect_callers.add(caller)
295
+
296
+ # Build symbol -> file mapping
297
+ sym_to_file: dict[str, str] = {}
298
+ for rel_path, file_data in files.items():
299
+ if not isinstance(file_data, dict):
300
+ continue
301
+ for sym in file_data.get("symbols", []):
302
+ sym_to_file[sym.get("name", "")] = rel_path
303
+
304
+ # Affected files
305
+ all_affected = {symbol_name} | set(direct_callers) | indirect_callers
306
+ affected_files = set()
307
+ for sym in all_affected:
308
+ if sym in sym_to_file:
309
+ affected_files.add(sym_to_file[sym])
310
+
311
+ # Test files
312
+ affected_tests = [f for f in affected_files if "test" in f.lower() or "spec" in f.lower()]
313
+ non_test_files = [f for f in affected_files if f not in affected_tests]
314
+
315
+ # File deps that import files containing affected symbols
316
+ file_deps = index.get("file_dependencies", {})
317
+ dep_affected = set()
318
+ for f, deps in file_deps.items():
319
+ if any(af in deps for af in non_test_files):
320
+ dep_affected.add(f)
321
+
322
+ return {
323
+ "symbol": symbol_name,
324
+ "direct_callers": sorted(direct_callers),
325
+ "indirect_callers": sorted(indirect_callers),
326
+ "affected_files": sorted(non_test_files),
327
+ "affected_tests": sorted(affected_tests),
328
+ "importing_files": sorted(dep_affected - affected_files),
329
+ "total_impact": len(all_affected) - 1, # exclude self
330
+ "risk": "high" if len(all_affected) > 10 else "medium" if len(all_affected) > 3 else "low",
331
+ }