cortexcode 0.2.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortexcode/advanced_analysis.py +816 -0
- cortexcode/docs/__init__.py +19 -0
- cortexcode/docs/generator.py +573 -0
- cortexcode/docs/html_generators.py +114 -0
- cortexcode/docs/javascript.py +373 -0
- cortexcode/docs/templates.py +174 -0
- cortexcode/docs.py +38 -1266
- cortexcode/indexer.py +28 -4
- cortexcode/mcp_server.py +142 -0
- {cortexcode-0.2.2.dist-info → cortexcode-0.4.0.dist-info}/METADATA +80 -4
- cortexcode-0.4.0.dist-info/RECORD +27 -0
- cortexcode-0.2.2.dist-info/RECORD +0 -21
- {cortexcode-0.2.2.dist-info → cortexcode-0.4.0.dist-info}/WHEEL +0 -0
- {cortexcode-0.2.2.dist-info → cortexcode-0.4.0.dist-info}/entry_points.txt +0 -0
- {cortexcode-0.2.2.dist-info → cortexcode-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {cortexcode-0.2.2.dist-info → cortexcode-0.4.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,816 @@
|
|
|
1
|
+
"""Advanced code analysis — duplication, security, circular deps, API endpoints, doc generation."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import hashlib
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
from difflib import SequenceMatcher
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
# ─── Fuzzy Search ───────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
def fuzzy_search(index: dict, query: str, threshold: float = 0.5, limit: int = 20) -> list[dict[str, Any]]:
|
|
13
|
+
"""Fuzzy search for symbols — finds approximate matches.
|
|
14
|
+
|
|
15
|
+
Uses substring matching, case-insensitive matching, and sequence similarity.
|
|
16
|
+
"""
|
|
17
|
+
query_lower = query.lower()
|
|
18
|
+
files = index.get("files", {})
|
|
19
|
+
results = []
|
|
20
|
+
|
|
21
|
+
for rel_path, file_data in files.items():
|
|
22
|
+
if not isinstance(file_data, dict):
|
|
23
|
+
continue
|
|
24
|
+
for sym in file_data.get("symbols", []):
|
|
25
|
+
name = sym.get("name", "")
|
|
26
|
+
name_lower = name.lower()
|
|
27
|
+
|
|
28
|
+
# Exact substring match — highest score
|
|
29
|
+
if query_lower in name_lower:
|
|
30
|
+
score = 1.0 if query_lower == name_lower else 0.9
|
|
31
|
+
else:
|
|
32
|
+
# Sequence similarity
|
|
33
|
+
score = SequenceMatcher(None, query_lower, name_lower).ratio()
|
|
34
|
+
|
|
35
|
+
# Bonus for matching initials (e.g., "guc" matches "getUserCount")
|
|
36
|
+
initials = _extract_initials(name)
|
|
37
|
+
if query_lower in initials.lower():
|
|
38
|
+
score = max(score, 0.75)
|
|
39
|
+
|
|
40
|
+
# Bonus for matching words (e.g., "user auth" matches "userAuthentication")
|
|
41
|
+
if all(w in name_lower for w in query_lower.split()):
|
|
42
|
+
score = max(score, 0.8)
|
|
43
|
+
|
|
44
|
+
if score >= threshold:
|
|
45
|
+
results.append({
|
|
46
|
+
"name": name,
|
|
47
|
+
"type": sym.get("type"),
|
|
48
|
+
"file": rel_path,
|
|
49
|
+
"line": sym.get("line"),
|
|
50
|
+
"params": sym.get("params", []),
|
|
51
|
+
"doc": sym.get("doc"),
|
|
52
|
+
"score": round(score, 3),
|
|
53
|
+
})
|
|
54
|
+
|
|
55
|
+
results.sort(key=lambda x: x["score"], reverse=True)
|
|
56
|
+
return results[:limit]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def regex_search(index: dict, pattern: str, sym_type: str | None = None, limit: int = 20) -> list[dict[str, Any]]:
|
|
60
|
+
"""Search symbols using regex pattern."""
|
|
61
|
+
try:
|
|
62
|
+
regex = re.compile(pattern, re.IGNORECASE)
|
|
63
|
+
except re.error as e:
|
|
64
|
+
return [{"error": f"Invalid regex: {e}"}]
|
|
65
|
+
|
|
66
|
+
files = index.get("files", {})
|
|
67
|
+
results = []
|
|
68
|
+
|
|
69
|
+
for rel_path, file_data in files.items():
|
|
70
|
+
if not isinstance(file_data, dict):
|
|
71
|
+
continue
|
|
72
|
+
for sym in file_data.get("symbols", []):
|
|
73
|
+
name = sym.get("name", "")
|
|
74
|
+
if regex.search(name):
|
|
75
|
+
if sym_type and sym.get("type") != sym_type:
|
|
76
|
+
continue
|
|
77
|
+
results.append({
|
|
78
|
+
"name": name,
|
|
79
|
+
"type": sym.get("type"),
|
|
80
|
+
"file": rel_path,
|
|
81
|
+
"line": sym.get("line"),
|
|
82
|
+
"params": sym.get("params", []),
|
|
83
|
+
"doc": sym.get("doc"),
|
|
84
|
+
})
|
|
85
|
+
|
|
86
|
+
return results[:limit]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _extract_initials(name: str) -> str:
|
|
90
|
+
"""Extract initials from camelCase/PascalCase/snake_case name."""
|
|
91
|
+
# camelCase/PascalCase: extract uppercase letters
|
|
92
|
+
initials = re.findall(r'[A-Z]', name)
|
|
93
|
+
if initials:
|
|
94
|
+
return ''.join(initials)
|
|
95
|
+
# snake_case: extract first letter of each word
|
|
96
|
+
parts = name.split('_')
|
|
97
|
+
return ''.join(p[0] for p in parts if p)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# ─── Code Duplication Detection ─────────────────────────────────────────────
|
|
101
|
+
|
|
102
|
+
def detect_duplicates(index: dict, project_root: str | None = None, min_lines: int = 5) -> list[dict[str, Any]]:
|
|
103
|
+
"""Find duplicate or very similar code blocks.
|
|
104
|
+
|
|
105
|
+
Compares function bodies by normalizing whitespace and variable names,
|
|
106
|
+
then computing similarity scores.
|
|
107
|
+
"""
|
|
108
|
+
files = index.get("files", {})
|
|
109
|
+
root = Path(project_root) if project_root else None
|
|
110
|
+
|
|
111
|
+
# Collect all function bodies
|
|
112
|
+
functions: list[dict] = []
|
|
113
|
+
for rel_path, file_data in files.items():
|
|
114
|
+
if not isinstance(file_data, dict):
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
source_lines = None
|
|
118
|
+
if root:
|
|
119
|
+
try:
|
|
120
|
+
source_lines = (root / rel_path).read_text(encoding="utf-8").split("\n")
|
|
121
|
+
except (OSError, UnicodeDecodeError):
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if not source_lines:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
for sym in file_data.get("symbols", []):
|
|
128
|
+
if sym.get("type") not in ("function", "method"):
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
line = sym.get("line", 0)
|
|
132
|
+
if line <= 0:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
body = _extract_function_body(source_lines, line - 1)
|
|
136
|
+
if len(body.split("\n")) < min_lines:
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
normalized = _normalize_code(body)
|
|
140
|
+
functions.append({
|
|
141
|
+
"name": sym.get("name", ""),
|
|
142
|
+
"file": rel_path,
|
|
143
|
+
"line": line,
|
|
144
|
+
"body": body,
|
|
145
|
+
"normalized": normalized,
|
|
146
|
+
"hash": hashlib.md5(normalized.encode()).hexdigest(),
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
# Group by hash for exact duplicates
|
|
150
|
+
hash_groups: dict[str, list] = {}
|
|
151
|
+
for func in functions:
|
|
152
|
+
h = func["hash"]
|
|
153
|
+
if h not in hash_groups:
|
|
154
|
+
hash_groups[h] = []
|
|
155
|
+
hash_groups[h].append(func)
|
|
156
|
+
|
|
157
|
+
duplicates = []
|
|
158
|
+
seen_pairs = set()
|
|
159
|
+
|
|
160
|
+
# Exact duplicates
|
|
161
|
+
for h, group in hash_groups.items():
|
|
162
|
+
if len(group) > 1:
|
|
163
|
+
duplicates.append({
|
|
164
|
+
"type": "exact",
|
|
165
|
+
"similarity": 1.0,
|
|
166
|
+
"functions": [
|
|
167
|
+
{"name": f["name"], "file": f["file"], "line": f["line"]}
|
|
168
|
+
for f in group
|
|
169
|
+
],
|
|
170
|
+
"lines": len(group[0]["body"].split("\n")),
|
|
171
|
+
})
|
|
172
|
+
for f in group:
|
|
173
|
+
seen_pairs.add((f["file"], f["line"]))
|
|
174
|
+
|
|
175
|
+
# Near duplicates (similarity > 0.8)
|
|
176
|
+
for i, f1 in enumerate(functions):
|
|
177
|
+
if (f1["file"], f1["line"]) in seen_pairs:
|
|
178
|
+
continue
|
|
179
|
+
for f2 in functions[i + 1:]:
|
|
180
|
+
if (f2["file"], f2["line"]) in seen_pairs:
|
|
181
|
+
continue
|
|
182
|
+
if f1["hash"] == f2["hash"]:
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
sim = SequenceMatcher(None, f1["normalized"], f2["normalized"]).ratio()
|
|
186
|
+
if sim > 0.8:
|
|
187
|
+
duplicates.append({
|
|
188
|
+
"type": "near",
|
|
189
|
+
"similarity": round(sim, 3),
|
|
190
|
+
"functions": [
|
|
191
|
+
{"name": f1["name"], "file": f1["file"], "line": f1["line"]},
|
|
192
|
+
{"name": f2["name"], "file": f2["file"], "line": f2["line"]},
|
|
193
|
+
],
|
|
194
|
+
"lines": max(
|
|
195
|
+
len(f1["body"].split("\n")),
|
|
196
|
+
len(f2["body"].split("\n")),
|
|
197
|
+
),
|
|
198
|
+
})
|
|
199
|
+
|
|
200
|
+
duplicates.sort(key=lambda x: x["similarity"], reverse=True)
|
|
201
|
+
return duplicates
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def _extract_function_body(lines: list[str], start_idx: int) -> str:
|
|
205
|
+
"""Extract function body from source lines."""
|
|
206
|
+
if start_idx >= len(lines):
|
|
207
|
+
return ""
|
|
208
|
+
|
|
209
|
+
start_line = lines[start_idx]
|
|
210
|
+
start_indent = len(start_line) - len(start_line.lstrip())
|
|
211
|
+
indent_based = "def " in start_line or start_line.strip().endswith(":")
|
|
212
|
+
|
|
213
|
+
body = [lines[start_idx]]
|
|
214
|
+
brace_depth = 0
|
|
215
|
+
|
|
216
|
+
for i in range(start_idx + 1, min(start_idx + 300, len(lines))):
|
|
217
|
+
line = lines[i]
|
|
218
|
+
stripped = line.strip()
|
|
219
|
+
|
|
220
|
+
if not stripped:
|
|
221
|
+
body.append(line)
|
|
222
|
+
continue
|
|
223
|
+
|
|
224
|
+
if indent_based:
|
|
225
|
+
current_indent = len(line) - len(line.lstrip())
|
|
226
|
+
if current_indent <= start_indent and stripped and not stripped.startswith((")", "]", "}")):
|
|
227
|
+
break
|
|
228
|
+
else:
|
|
229
|
+
brace_depth += stripped.count("{") - stripped.count("}")
|
|
230
|
+
if brace_depth <= 0 and len(body) > 1:
|
|
231
|
+
body.append(line)
|
|
232
|
+
break
|
|
233
|
+
|
|
234
|
+
body.append(line)
|
|
235
|
+
|
|
236
|
+
return "\n".join(body)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def _normalize_code(code: str) -> str:
|
|
240
|
+
"""Normalize code for comparison — remove comments, normalize whitespace, replace identifiers."""
|
|
241
|
+
lines = []
|
|
242
|
+
for line in code.split("\n"):
|
|
243
|
+
stripped = line.strip()
|
|
244
|
+
# Remove comments
|
|
245
|
+
if stripped.startswith("#") or stripped.startswith("//"):
|
|
246
|
+
continue
|
|
247
|
+
# Remove inline comments
|
|
248
|
+
stripped = re.sub(r'#.*$', '', stripped)
|
|
249
|
+
stripped = re.sub(r'//.*$', '', stripped)
|
|
250
|
+
stripped = stripped.strip()
|
|
251
|
+
if stripped:
|
|
252
|
+
lines.append(stripped)
|
|
253
|
+
|
|
254
|
+
result = "\n".join(lines)
|
|
255
|
+
# Normalize string literals
|
|
256
|
+
result = re.sub(r'"[^"]*"', '"STR"', result)
|
|
257
|
+
result = re.sub(r"'[^']*'", "'STR'", result)
|
|
258
|
+
# Normalize numbers
|
|
259
|
+
result = re.sub(r'\b\d+\b', 'NUM', result)
|
|
260
|
+
return result
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# ─── Security Scan ──────────────────────────────────────────────────────────
|
|
264
|
+
|
|
265
|
+
SECRET_PATTERNS = [
|
|
266
|
+
(r'(?:api[_-]?key|apikey)\s*[:=]\s*["\']?[a-zA-Z0-9_\-]{16,}', "API key", "high"),
|
|
267
|
+
(r'(?:secret|password|passwd|pwd)\s*[:=]\s*["\'][^"\']{4,}["\']', "Hardcoded password/secret", "critical"),
|
|
268
|
+
(r'(?:token|auth[_-]?token|access[_-]?token)\s*[:=]\s*["\']?[a-zA-Z0-9_\-\.]{16,}', "Hardcoded token", "high"),
|
|
269
|
+
(r'(?:aws[_-]?access|aws[_-]?secret)\s*[:=]\s*["\']?[A-Za-z0-9/+=]{16,}', "AWS credential", "critical"),
|
|
270
|
+
(r'(?:private[_-]?key|ssh[_-]?key)\s*[:=]\s*["\'].*["\']', "Private key reference", "critical"),
|
|
271
|
+
(r'(?:jdbc|mongodb|mysql|postgres|redis)://[^\s"\']+', "Database connection string", "high"),
|
|
272
|
+
(r'sk-[a-zA-Z0-9]{20,}', "OpenAI API key", "critical"),
|
|
273
|
+
(r'ghp_[a-zA-Z0-9]{36}', "GitHub personal access token", "critical"),
|
|
274
|
+
(r'xoxb-[a-zA-Z0-9-]+', "Slack bot token", "critical"),
|
|
275
|
+
(r'(?:AKIA|ASIA)[A-Z0-9]{16}', "AWS Access Key ID", "critical"),
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
SQL_INJECTION_PATTERNS = [
|
|
279
|
+
(r'(?:execute|query|raw)\s*\(\s*(?:f["\']|["\'].*%|.*\.format\(|.*\+\s*(?:req|request|params|input))', "SQL injection risk — use parameterized queries", "high"),
|
|
280
|
+
(r'(?:cursor\.execute|db\.query)\s*\(\s*["\'].*\{', "SQL injection risk — f-string in query", "high"),
|
|
281
|
+
]
|
|
282
|
+
|
|
283
|
+
XSS_PATTERNS = [
|
|
284
|
+
(r'innerHTML\s*=\s*(?![\'"]\s*$)', "Potential XSS — innerHTML assignment", "medium"),
|
|
285
|
+
(r'dangerouslySetInnerHTML', "Potential XSS — dangerouslySetInnerHTML", "medium"),
|
|
286
|
+
(r'document\.write\s*\(', "Potential XSS — document.write", "medium"),
|
|
287
|
+
]
|
|
288
|
+
|
|
289
|
+
UNSAFE_PATTERNS = [
|
|
290
|
+
(r'\beval\s*\(', "Unsafe eval() usage", "high"),
|
|
291
|
+
(r'\bexec\s*\(', "Unsafe exec() usage", "high"),
|
|
292
|
+
(r'subprocess\.(call|run|Popen)\s*\(.*shell\s*=\s*True', "Shell injection risk", "high"),
|
|
293
|
+
(r'os\.system\s*\(', "Shell injection risk — os.system", "high"),
|
|
294
|
+
(r'pickle\.loads?\s*\(', "Unsafe deserialization — pickle", "medium"),
|
|
295
|
+
(r'yaml\.load\s*\([^)]*\)\s*$', "Unsafe YAML load (use safe_load)", "medium"),
|
|
296
|
+
(r'Math\.random\(\)', "Insecure randomness — use crypto.getRandomValues", "low"),
|
|
297
|
+
]
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def security_scan(project_root: str, index: dict | None = None) -> dict[str, Any]:
|
|
301
|
+
"""Scan source code for security issues."""
|
|
302
|
+
root = Path(project_root)
|
|
303
|
+
files = index.get("files", {}) if index else {}
|
|
304
|
+
|
|
305
|
+
findings: list[dict] = []
|
|
306
|
+
scanned_files = 0
|
|
307
|
+
|
|
308
|
+
# Get file list from index or scan directory
|
|
309
|
+
file_paths = []
|
|
310
|
+
if files:
|
|
311
|
+
for rel_path in files:
|
|
312
|
+
file_paths.append(root / rel_path)
|
|
313
|
+
else:
|
|
314
|
+
exts = {".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php"}
|
|
315
|
+
for ext in exts:
|
|
316
|
+
file_paths.extend(root.rglob(f"*{ext}"))
|
|
317
|
+
|
|
318
|
+
all_patterns = (
|
|
319
|
+
[(p, d, s, "secret") for p, d, s in SECRET_PATTERNS] +
|
|
320
|
+
[(p, d, s, "sql_injection") for p, d, s in SQL_INJECTION_PATTERNS] +
|
|
321
|
+
[(p, d, s, "xss") for p, d, s in XSS_PATTERNS] +
|
|
322
|
+
[(p, d, s, "unsafe_code") for p, d, s in UNSAFE_PATTERNS]
|
|
323
|
+
)
|
|
324
|
+
compiled = [(re.compile(p, re.IGNORECASE), d, s, c) for p, d, s, c in all_patterns]
|
|
325
|
+
|
|
326
|
+
for file_path in file_paths:
|
|
327
|
+
try:
|
|
328
|
+
content = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
329
|
+
except OSError:
|
|
330
|
+
continue
|
|
331
|
+
|
|
332
|
+
scanned_files += 1
|
|
333
|
+
rel = str(file_path.relative_to(root))
|
|
334
|
+
|
|
335
|
+
for line_num, line in enumerate(content.split("\n"), 1):
|
|
336
|
+
stripped = line.strip()
|
|
337
|
+
# Skip comments
|
|
338
|
+
if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
|
|
339
|
+
continue
|
|
340
|
+
# Skip test files for some checks
|
|
341
|
+
is_test = "test" in rel.lower() or "spec" in rel.lower()
|
|
342
|
+
|
|
343
|
+
for regex, desc, severity, category in compiled:
|
|
344
|
+
if is_test and category in ("unsafe_code",):
|
|
345
|
+
continue
|
|
346
|
+
if regex.search(line):
|
|
347
|
+
findings.append({
|
|
348
|
+
"file": rel,
|
|
349
|
+
"line": line_num,
|
|
350
|
+
"category": category,
|
|
351
|
+
"severity": severity,
|
|
352
|
+
"description": desc,
|
|
353
|
+
"snippet": stripped[:120],
|
|
354
|
+
})
|
|
355
|
+
|
|
356
|
+
# Deduplicate
|
|
357
|
+
seen = set()
|
|
358
|
+
unique_findings = []
|
|
359
|
+
for f in findings:
|
|
360
|
+
key = (f["file"], f["line"], f["category"])
|
|
361
|
+
if key not in seen:
|
|
362
|
+
seen.add(key)
|
|
363
|
+
unique_findings.append(f)
|
|
364
|
+
|
|
365
|
+
# Sort by severity
|
|
366
|
+
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
|
367
|
+
unique_findings.sort(key=lambda x: severity_order.get(x["severity"], 99))
|
|
368
|
+
|
|
369
|
+
summary = {}
|
|
370
|
+
for f in unique_findings:
|
|
371
|
+
cat = f["category"]
|
|
372
|
+
summary[cat] = summary.get(cat, 0) + 1
|
|
373
|
+
|
|
374
|
+
return {
|
|
375
|
+
"scanned_files": scanned_files,
|
|
376
|
+
"total_findings": len(unique_findings),
|
|
377
|
+
"summary": summary,
|
|
378
|
+
"severity_counts": {
|
|
379
|
+
s: sum(1 for f in unique_findings if f["severity"] == s)
|
|
380
|
+
for s in ("critical", "high", "medium", "low")
|
|
381
|
+
},
|
|
382
|
+
"findings": unique_findings,
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
# ─── Circular Dependency Detection ──────────────────────────────────────────
|
|
387
|
+
|
|
388
|
+
def detect_circular_deps(index: dict) -> list[dict[str, Any]]:
|
|
389
|
+
"""Detect circular dependencies in file imports and call graph."""
|
|
390
|
+
results = []
|
|
391
|
+
|
|
392
|
+
# File-level circular dependencies
|
|
393
|
+
file_deps = index.get("file_dependencies", {})
|
|
394
|
+
file_cycles = _find_cycles(file_deps)
|
|
395
|
+
for cycle in file_cycles:
|
|
396
|
+
results.append({
|
|
397
|
+
"type": "file_import",
|
|
398
|
+
"cycle": cycle,
|
|
399
|
+
"length": len(cycle),
|
|
400
|
+
"severity": "high" if len(cycle) <= 2 else "medium",
|
|
401
|
+
})
|
|
402
|
+
|
|
403
|
+
# Symbol-level circular calls
|
|
404
|
+
call_graph = index.get("call_graph", {})
|
|
405
|
+
symbol_cycles = _find_cycles(call_graph)
|
|
406
|
+
for cycle in symbol_cycles:
|
|
407
|
+
if len(cycle) <= 5: # Only report short cycles
|
|
408
|
+
results.append({
|
|
409
|
+
"type": "call_cycle",
|
|
410
|
+
"cycle": cycle,
|
|
411
|
+
"length": len(cycle),
|
|
412
|
+
"severity": "medium" if len(cycle) <= 2 else "low",
|
|
413
|
+
})
|
|
414
|
+
|
|
415
|
+
results.sort(key=lambda x: x["length"])
|
|
416
|
+
return results
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
def _find_cycles(graph: dict[str, list]) -> list[list[str]]:
|
|
420
|
+
"""Find all cycles in a directed graph using DFS."""
|
|
421
|
+
cycles = []
|
|
422
|
+
visited = set()
|
|
423
|
+
path = []
|
|
424
|
+
path_set = set()
|
|
425
|
+
|
|
426
|
+
def dfs(node: str):
|
|
427
|
+
if node in path_set:
|
|
428
|
+
# Found a cycle — extract it
|
|
429
|
+
idx = path.index(node)
|
|
430
|
+
cycle = path[idx:] + [node]
|
|
431
|
+
# Normalize cycle so smallest element is first
|
|
432
|
+
min_idx = cycle.index(min(cycle[:-1]))
|
|
433
|
+
normalized = cycle[min_idx:-1] + cycle[:min_idx] + [cycle[min_idx]]
|
|
434
|
+
if normalized not in cycles:
|
|
435
|
+
cycles.append(normalized)
|
|
436
|
+
return
|
|
437
|
+
|
|
438
|
+
if node in visited:
|
|
439
|
+
return
|
|
440
|
+
|
|
441
|
+
visited.add(node)
|
|
442
|
+
path.append(node)
|
|
443
|
+
path_set.add(node)
|
|
444
|
+
|
|
445
|
+
for neighbor in graph.get(node, []):
|
|
446
|
+
if neighbor in graph: # Only follow edges to known nodes
|
|
447
|
+
dfs(neighbor)
|
|
448
|
+
|
|
449
|
+
path.pop()
|
|
450
|
+
path_set.discard(node)
|
|
451
|
+
|
|
452
|
+
for node in graph:
|
|
453
|
+
dfs(node)
|
|
454
|
+
|
|
455
|
+
return cycles
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
# ─── API Endpoint Extraction ────────────────────────────────────────────────
|
|
459
|
+
|
|
460
|
+
ENDPOINT_PATTERNS = [
|
|
461
|
+
# Express.js
|
|
462
|
+
(r'(?:app|router)\.(get|post|put|delete|patch|all|use)\s*\(\s*["\']([^"\']+)', "express"),
|
|
463
|
+
# Flask
|
|
464
|
+
(r'@(?:app|blueprint|bp)\.(route|get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', "flask"),
|
|
465
|
+
# Django
|
|
466
|
+
(r'path\s*\(\s*["\']([^"\']+)["\']', "django"),
|
|
467
|
+
# FastAPI
|
|
468
|
+
(r'@(?:app|router)\.(get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', "fastapi"),
|
|
469
|
+
# Next.js API routes (file-based)
|
|
470
|
+
(r'export\s+(?:async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH)\s*\(', "nextjs"),
|
|
471
|
+
# Spring Boot
|
|
472
|
+
(r'@(?:Get|Post|Put|Delete|Patch|Request)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)', "spring"),
|
|
473
|
+
# Go net/http
|
|
474
|
+
(r'(?:Handle|HandleFunc)\s*\(\s*["\']([^"\']+)', "go-http"),
|
|
475
|
+
# Ruby on Rails
|
|
476
|
+
(r'(?:get|post|put|patch|delete)\s+["\']([^"\']+)', "rails"),
|
|
477
|
+
]
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def extract_endpoints(index: dict, project_root: str | None = None) -> dict[str, Any]:
|
|
481
|
+
"""Extract API endpoints from source code."""
|
|
482
|
+
root = Path(project_root) if project_root else None
|
|
483
|
+
files = index.get("files", {})
|
|
484
|
+
|
|
485
|
+
endpoints: list[dict] = []
|
|
486
|
+
seen = set()
|
|
487
|
+
|
|
488
|
+
for rel_path, file_data in files.items():
|
|
489
|
+
if not isinstance(file_data, dict):
|
|
490
|
+
continue
|
|
491
|
+
|
|
492
|
+
source = None
|
|
493
|
+
if root:
|
|
494
|
+
try:
|
|
495
|
+
source = (root / rel_path).read_text(encoding="utf-8", errors="ignore")
|
|
496
|
+
except OSError:
|
|
497
|
+
continue
|
|
498
|
+
|
|
499
|
+
if not source:
|
|
500
|
+
continue
|
|
501
|
+
|
|
502
|
+
# Next.js file-based routing (file path based)
|
|
503
|
+
if _is_nextjs_route(rel_path):
|
|
504
|
+
methods = re.findall(
|
|
505
|
+
r'export\s+(?:async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s*\(',
|
|
506
|
+
source
|
|
507
|
+
)
|
|
508
|
+
route_path = _nextjs_file_to_route(rel_path)
|
|
509
|
+
for method in methods:
|
|
510
|
+
key = (method.upper(), route_path, rel_path)
|
|
511
|
+
if key not in seen:
|
|
512
|
+
seen.add(key)
|
|
513
|
+
endpoints.append({
|
|
514
|
+
"method": method.upper(),
|
|
515
|
+
"path": route_path,
|
|
516
|
+
"file": rel_path,
|
|
517
|
+
"framework": "nextjs",
|
|
518
|
+
})
|
|
519
|
+
continue # Skip pattern matching for Next.js route files
|
|
520
|
+
|
|
521
|
+
# Pattern-based extraction - only match at line start (not inside strings)
|
|
522
|
+
lines = source.split("\n")
|
|
523
|
+
|
|
524
|
+
# Skip Next.js route files (already handled above)
|
|
525
|
+
if _is_nextjs_route(rel_path):
|
|
526
|
+
continue
|
|
527
|
+
|
|
528
|
+
for line_num, line in enumerate(lines, 1):
|
|
529
|
+
stripped = line.strip()
|
|
530
|
+
|
|
531
|
+
# Skip empty lines and comments
|
|
532
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("//"):
|
|
533
|
+
continue
|
|
534
|
+
|
|
535
|
+
# Only match patterns at the start of a line (after optional whitespace)
|
|
536
|
+
# This avoids matching strings inside code
|
|
537
|
+
|
|
538
|
+
# Express.js: app.get("/path", ...) - must start with app or router
|
|
539
|
+
if stripped.startswith("app.") or stripped.startswith("router."):
|
|
540
|
+
match = re.search(r'\.(get|post|put|delete|patch|all|use)\s*\(\s*["\']([^"\']+)', line)
|
|
541
|
+
if match:
|
|
542
|
+
method = match.group(1).upper()
|
|
543
|
+
path = match.group(2)
|
|
544
|
+
key = (method, path, rel_path)
|
|
545
|
+
if key not in seen:
|
|
546
|
+
seen.add(key)
|
|
547
|
+
endpoints.append({
|
|
548
|
+
"method": method,
|
|
549
|
+
"path": path,
|
|
550
|
+
"file": rel_path,
|
|
551
|
+
"line": line_num,
|
|
552
|
+
"framework": "express",
|
|
553
|
+
})
|
|
554
|
+
continue
|
|
555
|
+
|
|
556
|
+
# Flask/FastAPI: @app.route or @app.get - must start with @
|
|
557
|
+
if stripped.startswith("@"):
|
|
558
|
+
match = re.search(r'@(?:app|blueprint|bp)\.(route|get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', line)
|
|
559
|
+
if match:
|
|
560
|
+
method = match.group(1).upper()
|
|
561
|
+
if method == "ROUTE":
|
|
562
|
+
method = "GET"
|
|
563
|
+
path = match.group(2)
|
|
564
|
+
key = (method, path, rel_path)
|
|
565
|
+
if key not in seen:
|
|
566
|
+
seen.add(key)
|
|
567
|
+
endpoints.append({
|
|
568
|
+
"method": method,
|
|
569
|
+
"path": path,
|
|
570
|
+
"file": rel_path,
|
|
571
|
+
"line": line_num,
|
|
572
|
+
"framework": "flask",
|
|
573
|
+
})
|
|
574
|
+
continue
|
|
575
|
+
|
|
576
|
+
# Django: path( - must start with path(
|
|
577
|
+
if "path(" in stripped:
|
|
578
|
+
match = re.search(r'path\s*\(\s*["\']([^"\']+)["\']', line)
|
|
579
|
+
if match:
|
|
580
|
+
path = match.group(1)
|
|
581
|
+
key = ("GET", path, rel_path)
|
|
582
|
+
if key not in seen:
|
|
583
|
+
seen.add(key)
|
|
584
|
+
endpoints.append({
|
|
585
|
+
"method": "GET",
|
|
586
|
+
"path": path,
|
|
587
|
+
"file": rel_path,
|
|
588
|
+
"line": line_num,
|
|
589
|
+
"framework": "django",
|
|
590
|
+
})
|
|
591
|
+
continue
|
|
592
|
+
|
|
593
|
+
# Spring: @GetMapping, @PostMapping, etc.
|
|
594
|
+
match = re.search(r'@(Get|Post|Put|Delete|Patch|Request)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)', line)
|
|
595
|
+
if match:
|
|
596
|
+
method = match.group(1)
|
|
597
|
+
if method == "Request":
|
|
598
|
+
method = "GET"
|
|
599
|
+
path = match.group(2)
|
|
600
|
+
key = (method, path, rel_path)
|
|
601
|
+
if key not in seen:
|
|
602
|
+
seen.add(key)
|
|
603
|
+
endpoints.append({
|
|
604
|
+
"method": method,
|
|
605
|
+
"path": path,
|
|
606
|
+
"file": rel_path,
|
|
607
|
+
"line": line_num,
|
|
608
|
+
"framework": "spring",
|
|
609
|
+
})
|
|
610
|
+
continue
|
|
611
|
+
|
|
612
|
+
# Deduplicate
|
|
613
|
+
seen = set()
|
|
614
|
+
unique = []
|
|
615
|
+
for ep in endpoints:
|
|
616
|
+
key = (ep["method"], ep["path"], ep["file"])
|
|
617
|
+
if key not in seen:
|
|
618
|
+
seen.add(key)
|
|
619
|
+
unique.append(ep)
|
|
620
|
+
|
|
621
|
+
unique.sort(key=lambda x: (x["path"], x["method"]))
|
|
622
|
+
|
|
623
|
+
return {
|
|
624
|
+
"count": len(unique),
|
|
625
|
+
"endpoints": unique,
|
|
626
|
+
"frameworks": list(set(ep["framework"] for ep in unique)),
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def _is_nextjs_route(path: str) -> bool:
|
|
631
|
+
"""Check if a file is a Next.js API/app route."""
|
|
632
|
+
normalized = path.replace("\\", "/")
|
|
633
|
+
return (
|
|
634
|
+
("/api/" in normalized and "route." in normalized) or
|
|
635
|
+
("/app/" in normalized and "route." in normalized)
|
|
636
|
+
)
|
|
637
|
+
|
|
638
|
+
|
|
639
|
+
def _is_inside_string(lines: list[str], line_idx: int) -> bool:
|
|
640
|
+
"""Check if a line is inside a string literal."""
|
|
641
|
+
if line_idx < 0 or line_idx >= len(lines):
|
|
642
|
+
return False
|
|
643
|
+
|
|
644
|
+
# Count unescaped quotes before this line to determine if we're in a string
|
|
645
|
+
quote_count = 0
|
|
646
|
+
in_string = False
|
|
647
|
+
current_quote = None
|
|
648
|
+
|
|
649
|
+
for i in range(line_idx + 1):
|
|
650
|
+
line = lines[i]
|
|
651
|
+
for char in line:
|
|
652
|
+
if char in ('"', "'", '`') and (i != line_idx or True):
|
|
653
|
+
if not in_string:
|
|
654
|
+
in_string = True
|
|
655
|
+
current_quote = char
|
|
656
|
+
quote_count = 1
|
|
657
|
+
elif char == current_quote:
|
|
658
|
+
# Check for escaped quote
|
|
659
|
+
if i == line_idx and line.index(char) > 0 and line[line.index(char) - 1] == '\\':
|
|
660
|
+
continue
|
|
661
|
+
quote_count += 1
|
|
662
|
+
if quote_count % 2 == 0:
|
|
663
|
+
in_string = False
|
|
664
|
+
current_quote = None
|
|
665
|
+
|
|
666
|
+
return in_string
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _nextjs_file_to_route(path: str) -> str:
|
|
670
|
+
"""Convert Next.js file path to route path."""
|
|
671
|
+
normalized = path.replace("\\", "/")
|
|
672
|
+
# src/app/api/users/route.ts -> /api/users
|
|
673
|
+
match = re.search(r'(?:src/)?app(/.*)/route\.(?:ts|js|tsx|jsx)', normalized)
|
|
674
|
+
if match:
|
|
675
|
+
return match.group(1)
|
|
676
|
+
# pages/api/users.ts -> /api/users
|
|
677
|
+
match = re.search(r'pages(/.*?)\.(?:ts|js|tsx|jsx)', normalized)
|
|
678
|
+
if match:
|
|
679
|
+
route = match.group(1)
|
|
680
|
+
if route.endswith("/index"):
|
|
681
|
+
route = route[:-6] or "/"
|
|
682
|
+
return route
|
|
683
|
+
return normalized
|
|
684
|
+
|
|
685
|
+
|
|
686
|
+
# ─── Auto-Generate API Docs ─────────────────────────────────────────────────
|
|
687
|
+
|
|
688
|
+
def generate_api_docs(index: dict, project_root: str | None = None) -> dict[str, Any]:
|
|
689
|
+
"""Generate API documentation from function signatures and docstrings."""
|
|
690
|
+
files = index.get("files", {})
|
|
691
|
+
root = Path(project_root) if project_root else None
|
|
692
|
+
|
|
693
|
+
modules: list[dict] = []
|
|
694
|
+
|
|
695
|
+
for rel_path, file_data in files.items():
|
|
696
|
+
if not isinstance(file_data, dict):
|
|
697
|
+
continue
|
|
698
|
+
|
|
699
|
+
symbols = file_data.get("symbols", [])
|
|
700
|
+
if not symbols:
|
|
701
|
+
continue
|
|
702
|
+
|
|
703
|
+
# Read source for docstrings
|
|
704
|
+
source_lines = None
|
|
705
|
+
if root:
|
|
706
|
+
try:
|
|
707
|
+
source_lines = (root / rel_path).read_text(encoding="utf-8").split("\n")
|
|
708
|
+
except (OSError, UnicodeDecodeError):
|
|
709
|
+
pass
|
|
710
|
+
|
|
711
|
+
classes = []
|
|
712
|
+
functions = []
|
|
713
|
+
|
|
714
|
+
for sym in symbols:
|
|
715
|
+
name = sym.get("name", "")
|
|
716
|
+
sym_type = sym.get("type", "")
|
|
717
|
+
line = sym.get("line", 0)
|
|
718
|
+
params = sym.get("params", [])
|
|
719
|
+
doc = sym.get("doc", "")
|
|
720
|
+
|
|
721
|
+
# Try to extract docstring from source
|
|
722
|
+
if not doc and source_lines and line > 0:
|
|
723
|
+
doc = _extract_docstring(source_lines, line - 1)
|
|
724
|
+
|
|
725
|
+
entry = {
|
|
726
|
+
"name": name,
|
|
727
|
+
"type": sym_type,
|
|
728
|
+
"line": line,
|
|
729
|
+
"params": params,
|
|
730
|
+
"doc": doc or "",
|
|
731
|
+
"calls": sym.get("calls", []),
|
|
732
|
+
"framework": sym.get("framework"),
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
if sym_type == "class":
|
|
736
|
+
classes.append(entry)
|
|
737
|
+
elif sym_type in ("function", "method"):
|
|
738
|
+
functions.append(entry)
|
|
739
|
+
|
|
740
|
+
if classes or functions:
|
|
741
|
+
modules.append({
|
|
742
|
+
"file": rel_path,
|
|
743
|
+
"classes": classes,
|
|
744
|
+
"functions": functions,
|
|
745
|
+
"imports": file_data.get("imports", []),
|
|
746
|
+
})
|
|
747
|
+
|
|
748
|
+
# Compute summary stats
|
|
749
|
+
total_documented = sum(
|
|
750
|
+
1 for m in modules
|
|
751
|
+
for f in m["functions"] + m["classes"]
|
|
752
|
+
if f["doc"]
|
|
753
|
+
)
|
|
754
|
+
total_symbols = sum(
|
|
755
|
+
len(m["functions"]) + len(m["classes"])
|
|
756
|
+
for m in modules
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
return {
|
|
760
|
+
"modules": modules,
|
|
761
|
+
"total_modules": len(modules),
|
|
762
|
+
"total_symbols": total_symbols,
|
|
763
|
+
"documented": total_documented,
|
|
764
|
+
"undocumented": total_symbols - total_documented,
|
|
765
|
+
"coverage_pct": round(total_documented / max(total_symbols, 1) * 100, 1),
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def _extract_docstring(lines: list[str], start_idx: int) -> str:
|
|
770
|
+
"""Extract docstring from the line after a function/class definition."""
|
|
771
|
+
# Look at next few lines for a docstring
|
|
772
|
+
for i in range(start_idx + 1, min(start_idx + 5, len(lines))):
|
|
773
|
+
stripped = lines[i].strip()
|
|
774
|
+
if not stripped:
|
|
775
|
+
continue
|
|
776
|
+
|
|
777
|
+
# Python triple-quoted docstring
|
|
778
|
+
if stripped.startswith('"""') or stripped.startswith("'''"):
|
|
779
|
+
quote = stripped[:3]
|
|
780
|
+
if stripped.endswith(quote) and len(stripped) > 6:
|
|
781
|
+
return stripped[3:-3].strip()
|
|
782
|
+
# Multi-line docstring
|
|
783
|
+
doc_lines = [stripped[3:]]
|
|
784
|
+
for j in range(i + 1, min(i + 20, len(lines))):
|
|
785
|
+
line = lines[j].strip()
|
|
786
|
+
if line.endswith(quote):
|
|
787
|
+
doc_lines.append(line[:-3])
|
|
788
|
+
return "\n".join(doc_lines).strip()
|
|
789
|
+
doc_lines.append(line)
|
|
790
|
+
break
|
|
791
|
+
|
|
792
|
+
# JSDoc /** ... */
|
|
793
|
+
if stripped.startswith("/**"):
|
|
794
|
+
doc_lines = []
|
|
795
|
+
for j in range(i, min(i + 20, len(lines))):
|
|
796
|
+
line = lines[j].strip()
|
|
797
|
+
if line.endswith("*/"):
|
|
798
|
+
line = line[:-2].strip()
|
|
799
|
+
if line.startswith("/**"):
|
|
800
|
+
line = line[3:].strip()
|
|
801
|
+
elif line.startswith("*"):
|
|
802
|
+
line = line[1:].strip()
|
|
803
|
+
if line:
|
|
804
|
+
doc_lines.append(line)
|
|
805
|
+
return "\n".join(doc_lines).strip()
|
|
806
|
+
if line.startswith("/**"):
|
|
807
|
+
line = line[3:].strip()
|
|
808
|
+
elif line.startswith("*"):
|
|
809
|
+
line = line[1:].strip()
|
|
810
|
+
if line:
|
|
811
|
+
doc_lines.append(line)
|
|
812
|
+
break
|
|
813
|
+
|
|
814
|
+
break
|
|
815
|
+
|
|
816
|
+
return ""
|