cortexcode 0.2.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,816 @@
1
+ """Advanced code analysis — duplication, security, circular deps, API endpoints, doc generation."""
2
+
3
+ import re
4
+ import hashlib
5
+ from pathlib import Path
6
+ from typing import Any
7
+ from difflib import SequenceMatcher
8
+
9
+
10
+ # ─── Fuzzy Search ───────────────────────────────────────────────────────────
11
+
12
+ def fuzzy_search(index: dict, query: str, threshold: float = 0.5, limit: int = 20) -> list[dict[str, Any]]:
13
+ """Fuzzy search for symbols — finds approximate matches.
14
+
15
+ Uses substring matching, case-insensitive matching, and sequence similarity.
16
+ """
17
+ query_lower = query.lower()
18
+ files = index.get("files", {})
19
+ results = []
20
+
21
+ for rel_path, file_data in files.items():
22
+ if not isinstance(file_data, dict):
23
+ continue
24
+ for sym in file_data.get("symbols", []):
25
+ name = sym.get("name", "")
26
+ name_lower = name.lower()
27
+
28
+ # Exact substring match — highest score
29
+ if query_lower in name_lower:
30
+ score = 1.0 if query_lower == name_lower else 0.9
31
+ else:
32
+ # Sequence similarity
33
+ score = SequenceMatcher(None, query_lower, name_lower).ratio()
34
+
35
+ # Bonus for matching initials (e.g., "guc" matches "getUserCount")
36
+ initials = _extract_initials(name)
37
+ if query_lower in initials.lower():
38
+ score = max(score, 0.75)
39
+
40
+ # Bonus for matching words (e.g., "user auth" matches "userAuthentication")
41
+ if all(w in name_lower for w in query_lower.split()):
42
+ score = max(score, 0.8)
43
+
44
+ if score >= threshold:
45
+ results.append({
46
+ "name": name,
47
+ "type": sym.get("type"),
48
+ "file": rel_path,
49
+ "line": sym.get("line"),
50
+ "params": sym.get("params", []),
51
+ "doc": sym.get("doc"),
52
+ "score": round(score, 3),
53
+ })
54
+
55
+ results.sort(key=lambda x: x["score"], reverse=True)
56
+ return results[:limit]
57
+
58
+
59
+ def regex_search(index: dict, pattern: str, sym_type: str | None = None, limit: int = 20) -> list[dict[str, Any]]:
60
+ """Search symbols using regex pattern."""
61
+ try:
62
+ regex = re.compile(pattern, re.IGNORECASE)
63
+ except re.error as e:
64
+ return [{"error": f"Invalid regex: {e}"}]
65
+
66
+ files = index.get("files", {})
67
+ results = []
68
+
69
+ for rel_path, file_data in files.items():
70
+ if not isinstance(file_data, dict):
71
+ continue
72
+ for sym in file_data.get("symbols", []):
73
+ name = sym.get("name", "")
74
+ if regex.search(name):
75
+ if sym_type and sym.get("type") != sym_type:
76
+ continue
77
+ results.append({
78
+ "name": name,
79
+ "type": sym.get("type"),
80
+ "file": rel_path,
81
+ "line": sym.get("line"),
82
+ "params": sym.get("params", []),
83
+ "doc": sym.get("doc"),
84
+ })
85
+
86
+ return results[:limit]
87
+
88
+
89
+ def _extract_initials(name: str) -> str:
90
+ """Extract initials from camelCase/PascalCase/snake_case name."""
91
+ # camelCase/PascalCase: extract uppercase letters
92
+ initials = re.findall(r'[A-Z]', name)
93
+ if initials:
94
+ return ''.join(initials)
95
+ # snake_case: extract first letter of each word
96
+ parts = name.split('_')
97
+ return ''.join(p[0] for p in parts if p)
98
+
99
+
100
+ # ─── Code Duplication Detection ─────────────────────────────────────────────
101
+
102
+ def detect_duplicates(index: dict, project_root: str | None = None, min_lines: int = 5) -> list[dict[str, Any]]:
103
+ """Find duplicate or very similar code blocks.
104
+
105
+ Compares function bodies by normalizing whitespace and variable names,
106
+ then computing similarity scores.
107
+ """
108
+ files = index.get("files", {})
109
+ root = Path(project_root) if project_root else None
110
+
111
+ # Collect all function bodies
112
+ functions: list[dict] = []
113
+ for rel_path, file_data in files.items():
114
+ if not isinstance(file_data, dict):
115
+ continue
116
+
117
+ source_lines = None
118
+ if root:
119
+ try:
120
+ source_lines = (root / rel_path).read_text(encoding="utf-8").split("\n")
121
+ except (OSError, UnicodeDecodeError):
122
+ continue
123
+
124
+ if not source_lines:
125
+ continue
126
+
127
+ for sym in file_data.get("symbols", []):
128
+ if sym.get("type") not in ("function", "method"):
129
+ continue
130
+
131
+ line = sym.get("line", 0)
132
+ if line <= 0:
133
+ continue
134
+
135
+ body = _extract_function_body(source_lines, line - 1)
136
+ if len(body.split("\n")) < min_lines:
137
+ continue
138
+
139
+ normalized = _normalize_code(body)
140
+ functions.append({
141
+ "name": sym.get("name", ""),
142
+ "file": rel_path,
143
+ "line": line,
144
+ "body": body,
145
+ "normalized": normalized,
146
+ "hash": hashlib.md5(normalized.encode()).hexdigest(),
147
+ })
148
+
149
+ # Group by hash for exact duplicates
150
+ hash_groups: dict[str, list] = {}
151
+ for func in functions:
152
+ h = func["hash"]
153
+ if h not in hash_groups:
154
+ hash_groups[h] = []
155
+ hash_groups[h].append(func)
156
+
157
+ duplicates = []
158
+ seen_pairs = set()
159
+
160
+ # Exact duplicates
161
+ for h, group in hash_groups.items():
162
+ if len(group) > 1:
163
+ duplicates.append({
164
+ "type": "exact",
165
+ "similarity": 1.0,
166
+ "functions": [
167
+ {"name": f["name"], "file": f["file"], "line": f["line"]}
168
+ for f in group
169
+ ],
170
+ "lines": len(group[0]["body"].split("\n")),
171
+ })
172
+ for f in group:
173
+ seen_pairs.add((f["file"], f["line"]))
174
+
175
+ # Near duplicates (similarity > 0.8)
176
+ for i, f1 in enumerate(functions):
177
+ if (f1["file"], f1["line"]) in seen_pairs:
178
+ continue
179
+ for f2 in functions[i + 1:]:
180
+ if (f2["file"], f2["line"]) in seen_pairs:
181
+ continue
182
+ if f1["hash"] == f2["hash"]:
183
+ continue
184
+
185
+ sim = SequenceMatcher(None, f1["normalized"], f2["normalized"]).ratio()
186
+ if sim > 0.8:
187
+ duplicates.append({
188
+ "type": "near",
189
+ "similarity": round(sim, 3),
190
+ "functions": [
191
+ {"name": f1["name"], "file": f1["file"], "line": f1["line"]},
192
+ {"name": f2["name"], "file": f2["file"], "line": f2["line"]},
193
+ ],
194
+ "lines": max(
195
+ len(f1["body"].split("\n")),
196
+ len(f2["body"].split("\n")),
197
+ ),
198
+ })
199
+
200
+ duplicates.sort(key=lambda x: x["similarity"], reverse=True)
201
+ return duplicates
202
+
203
+
204
+ def _extract_function_body(lines: list[str], start_idx: int) -> str:
205
+ """Extract function body from source lines."""
206
+ if start_idx >= len(lines):
207
+ return ""
208
+
209
+ start_line = lines[start_idx]
210
+ start_indent = len(start_line) - len(start_line.lstrip())
211
+ indent_based = "def " in start_line or start_line.strip().endswith(":")
212
+
213
+ body = [lines[start_idx]]
214
+ brace_depth = 0
215
+
216
+ for i in range(start_idx + 1, min(start_idx + 300, len(lines))):
217
+ line = lines[i]
218
+ stripped = line.strip()
219
+
220
+ if not stripped:
221
+ body.append(line)
222
+ continue
223
+
224
+ if indent_based:
225
+ current_indent = len(line) - len(line.lstrip())
226
+ if current_indent <= start_indent and stripped and not stripped.startswith((")", "]", "}")):
227
+ break
228
+ else:
229
+ brace_depth += stripped.count("{") - stripped.count("}")
230
+ if brace_depth <= 0 and len(body) > 1:
231
+ body.append(line)
232
+ break
233
+
234
+ body.append(line)
235
+
236
+ return "\n".join(body)
237
+
238
+
239
+ def _normalize_code(code: str) -> str:
240
+ """Normalize code for comparison — remove comments, normalize whitespace, replace identifiers."""
241
+ lines = []
242
+ for line in code.split("\n"):
243
+ stripped = line.strip()
244
+ # Remove comments
245
+ if stripped.startswith("#") or stripped.startswith("//"):
246
+ continue
247
+ # Remove inline comments
248
+ stripped = re.sub(r'#.*$', '', stripped)
249
+ stripped = re.sub(r'//.*$', '', stripped)
250
+ stripped = stripped.strip()
251
+ if stripped:
252
+ lines.append(stripped)
253
+
254
+ result = "\n".join(lines)
255
+ # Normalize string literals
256
+ result = re.sub(r'"[^"]*"', '"STR"', result)
257
+ result = re.sub(r"'[^']*'", "'STR'", result)
258
+ # Normalize numbers
259
+ result = re.sub(r'\b\d+\b', 'NUM', result)
260
+ return result
261
+
262
+
263
+ # ─── Security Scan ──────────────────────────────────────────────────────────
264
+
265
+ SECRET_PATTERNS = [
266
+ (r'(?:api[_-]?key|apikey)\s*[:=]\s*["\']?[a-zA-Z0-9_\-]{16,}', "API key", "high"),
267
+ (r'(?:secret|password|passwd|pwd)\s*[:=]\s*["\'][^"\']{4,}["\']', "Hardcoded password/secret", "critical"),
268
+ (r'(?:token|auth[_-]?token|access[_-]?token)\s*[:=]\s*["\']?[a-zA-Z0-9_\-\.]{16,}', "Hardcoded token", "high"),
269
+ (r'(?:aws[_-]?access|aws[_-]?secret)\s*[:=]\s*["\']?[A-Za-z0-9/+=]{16,}', "AWS credential", "critical"),
270
+ (r'(?:private[_-]?key|ssh[_-]?key)\s*[:=]\s*["\'].*["\']', "Private key reference", "critical"),
271
+ (r'(?:jdbc|mongodb|mysql|postgres|redis)://[^\s"\']+', "Database connection string", "high"),
272
+ (r'sk-[a-zA-Z0-9]{20,}', "OpenAI API key", "critical"),
273
+ (r'ghp_[a-zA-Z0-9]{36}', "GitHub personal access token", "critical"),
274
+ (r'xoxb-[a-zA-Z0-9-]+', "Slack bot token", "critical"),
275
+ (r'(?:AKIA|ASIA)[A-Z0-9]{16}', "AWS Access Key ID", "critical"),
276
+ ]
277
+
278
+ SQL_INJECTION_PATTERNS = [
279
+ (r'(?:execute|query|raw)\s*\(\s*(?:f["\']|["\'].*%|.*\.format\(|.*\+\s*(?:req|request|params|input))', "SQL injection risk — use parameterized queries", "high"),
280
+ (r'(?:cursor\.execute|db\.query)\s*\(\s*["\'].*\{', "SQL injection risk — f-string in query", "high"),
281
+ ]
282
+
283
+ XSS_PATTERNS = [
284
+ (r'innerHTML\s*=\s*(?![\'"]\s*$)', "Potential XSS — innerHTML assignment", "medium"),
285
+ (r'dangerouslySetInnerHTML', "Potential XSS — dangerouslySetInnerHTML", "medium"),
286
+ (r'document\.write\s*\(', "Potential XSS — document.write", "medium"),
287
+ ]
288
+
289
+ UNSAFE_PATTERNS = [
290
+ (r'\beval\s*\(', "Unsafe eval() usage", "high"),
291
+ (r'\bexec\s*\(', "Unsafe exec() usage", "high"),
292
+ (r'subprocess\.(call|run|Popen)\s*\(.*shell\s*=\s*True', "Shell injection risk", "high"),
293
+ (r'os\.system\s*\(', "Shell injection risk — os.system", "high"),
294
+ (r'pickle\.loads?\s*\(', "Unsafe deserialization — pickle", "medium"),
295
+ (r'yaml\.load\s*\([^)]*\)\s*$', "Unsafe YAML load (use safe_load)", "medium"),
296
+ (r'Math\.random\(\)', "Insecure randomness — use crypto.getRandomValues", "low"),
297
+ ]
298
+
299
+
300
+ def security_scan(project_root: str, index: dict | None = None) -> dict[str, Any]:
301
+ """Scan source code for security issues."""
302
+ root = Path(project_root)
303
+ files = index.get("files", {}) if index else {}
304
+
305
+ findings: list[dict] = []
306
+ scanned_files = 0
307
+
308
+ # Get file list from index or scan directory
309
+ file_paths = []
310
+ if files:
311
+ for rel_path in files:
312
+ file_paths.append(root / rel_path)
313
+ else:
314
+ exts = {".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php"}
315
+ for ext in exts:
316
+ file_paths.extend(root.rglob(f"*{ext}"))
317
+
318
+ all_patterns = (
319
+ [(p, d, s, "secret") for p, d, s in SECRET_PATTERNS] +
320
+ [(p, d, s, "sql_injection") for p, d, s in SQL_INJECTION_PATTERNS] +
321
+ [(p, d, s, "xss") for p, d, s in XSS_PATTERNS] +
322
+ [(p, d, s, "unsafe_code") for p, d, s in UNSAFE_PATTERNS]
323
+ )
324
+ compiled = [(re.compile(p, re.IGNORECASE), d, s, c) for p, d, s, c in all_patterns]
325
+
326
+ for file_path in file_paths:
327
+ try:
328
+ content = file_path.read_text(encoding="utf-8", errors="ignore")
329
+ except OSError:
330
+ continue
331
+
332
+ scanned_files += 1
333
+ rel = str(file_path.relative_to(root))
334
+
335
+ for line_num, line in enumerate(content.split("\n"), 1):
336
+ stripped = line.strip()
337
+ # Skip comments
338
+ if stripped.startswith("#") or stripped.startswith("//") or stripped.startswith("*"):
339
+ continue
340
+ # Skip test files for some checks
341
+ is_test = "test" in rel.lower() or "spec" in rel.lower()
342
+
343
+ for regex, desc, severity, category in compiled:
344
+ if is_test and category in ("unsafe_code",):
345
+ continue
346
+ if regex.search(line):
347
+ findings.append({
348
+ "file": rel,
349
+ "line": line_num,
350
+ "category": category,
351
+ "severity": severity,
352
+ "description": desc,
353
+ "snippet": stripped[:120],
354
+ })
355
+
356
+ # Deduplicate
357
+ seen = set()
358
+ unique_findings = []
359
+ for f in findings:
360
+ key = (f["file"], f["line"], f["category"])
361
+ if key not in seen:
362
+ seen.add(key)
363
+ unique_findings.append(f)
364
+
365
+ # Sort by severity
366
+ severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
367
+ unique_findings.sort(key=lambda x: severity_order.get(x["severity"], 99))
368
+
369
+ summary = {}
370
+ for f in unique_findings:
371
+ cat = f["category"]
372
+ summary[cat] = summary.get(cat, 0) + 1
373
+
374
+ return {
375
+ "scanned_files": scanned_files,
376
+ "total_findings": len(unique_findings),
377
+ "summary": summary,
378
+ "severity_counts": {
379
+ s: sum(1 for f in unique_findings if f["severity"] == s)
380
+ for s in ("critical", "high", "medium", "low")
381
+ },
382
+ "findings": unique_findings,
383
+ }
384
+
385
+
386
+ # ─── Circular Dependency Detection ──────────────────────────────────────────
387
+
388
+ def detect_circular_deps(index: dict) -> list[dict[str, Any]]:
389
+ """Detect circular dependencies in file imports and call graph."""
390
+ results = []
391
+
392
+ # File-level circular dependencies
393
+ file_deps = index.get("file_dependencies", {})
394
+ file_cycles = _find_cycles(file_deps)
395
+ for cycle in file_cycles:
396
+ results.append({
397
+ "type": "file_import",
398
+ "cycle": cycle,
399
+ "length": len(cycle),
400
+ "severity": "high" if len(cycle) <= 2 else "medium",
401
+ })
402
+
403
+ # Symbol-level circular calls
404
+ call_graph = index.get("call_graph", {})
405
+ symbol_cycles = _find_cycles(call_graph)
406
+ for cycle in symbol_cycles:
407
+ if len(cycle) <= 5: # Only report short cycles
408
+ results.append({
409
+ "type": "call_cycle",
410
+ "cycle": cycle,
411
+ "length": len(cycle),
412
+ "severity": "medium" if len(cycle) <= 2 else "low",
413
+ })
414
+
415
+ results.sort(key=lambda x: x["length"])
416
+ return results
417
+
418
+
419
+ def _find_cycles(graph: dict[str, list]) -> list[list[str]]:
420
+ """Find all cycles in a directed graph using DFS."""
421
+ cycles = []
422
+ visited = set()
423
+ path = []
424
+ path_set = set()
425
+
426
+ def dfs(node: str):
427
+ if node in path_set:
428
+ # Found a cycle — extract it
429
+ idx = path.index(node)
430
+ cycle = path[idx:] + [node]
431
+ # Normalize cycle so smallest element is first
432
+ min_idx = cycle.index(min(cycle[:-1]))
433
+ normalized = cycle[min_idx:-1] + cycle[:min_idx] + [cycle[min_idx]]
434
+ if normalized not in cycles:
435
+ cycles.append(normalized)
436
+ return
437
+
438
+ if node in visited:
439
+ return
440
+
441
+ visited.add(node)
442
+ path.append(node)
443
+ path_set.add(node)
444
+
445
+ for neighbor in graph.get(node, []):
446
+ if neighbor in graph: # Only follow edges to known nodes
447
+ dfs(neighbor)
448
+
449
+ path.pop()
450
+ path_set.discard(node)
451
+
452
+ for node in graph:
453
+ dfs(node)
454
+
455
+ return cycles
456
+
457
+
458
+ # ─── API Endpoint Extraction ────────────────────────────────────────────────
459
+
460
+ ENDPOINT_PATTERNS = [
461
+ # Express.js
462
+ (r'(?:app|router)\.(get|post|put|delete|patch|all|use)\s*\(\s*["\']([^"\']+)', "express"),
463
+ # Flask
464
+ (r'@(?:app|blueprint|bp)\.(route|get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', "flask"),
465
+ # Django
466
+ (r'path\s*\(\s*["\']([^"\']+)["\']', "django"),
467
+ # FastAPI
468
+ (r'@(?:app|router)\.(get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', "fastapi"),
469
+ # Next.js API routes (file-based)
470
+ (r'export\s+(?:async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH)\s*\(', "nextjs"),
471
+ # Spring Boot
472
+ (r'@(?:Get|Post|Put|Delete|Patch|Request)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)', "spring"),
473
+ # Go net/http
474
+ (r'(?:Handle|HandleFunc)\s*\(\s*["\']([^"\']+)', "go-http"),
475
+ # Ruby on Rails
476
+ (r'(?:get|post|put|patch|delete)\s+["\']([^"\']+)', "rails"),
477
+ ]
478
+
479
+
480
+ def extract_endpoints(index: dict, project_root: str | None = None) -> dict[str, Any]:
481
+ """Extract API endpoints from source code."""
482
+ root = Path(project_root) if project_root else None
483
+ files = index.get("files", {})
484
+
485
+ endpoints: list[dict] = []
486
+ seen = set()
487
+
488
+ for rel_path, file_data in files.items():
489
+ if not isinstance(file_data, dict):
490
+ continue
491
+
492
+ source = None
493
+ if root:
494
+ try:
495
+ source = (root / rel_path).read_text(encoding="utf-8", errors="ignore")
496
+ except OSError:
497
+ continue
498
+
499
+ if not source:
500
+ continue
501
+
502
+ # Next.js file-based routing (file path based)
503
+ if _is_nextjs_route(rel_path):
504
+ methods = re.findall(
505
+ r'export\s+(?:async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s*\(',
506
+ source
507
+ )
508
+ route_path = _nextjs_file_to_route(rel_path)
509
+ for method in methods:
510
+ key = (method.upper(), route_path, rel_path)
511
+ if key not in seen:
512
+ seen.add(key)
513
+ endpoints.append({
514
+ "method": method.upper(),
515
+ "path": route_path,
516
+ "file": rel_path,
517
+ "framework": "nextjs",
518
+ })
519
+ continue # Skip pattern matching for Next.js route files
520
+
521
+ # Pattern-based extraction - only match at line start (not inside strings)
522
+ lines = source.split("\n")
523
+
524
+ # Skip Next.js route files (already handled above)
525
+ if _is_nextjs_route(rel_path):
526
+ continue
527
+
528
+ for line_num, line in enumerate(lines, 1):
529
+ stripped = line.strip()
530
+
531
+ # Skip empty lines and comments
532
+ if not stripped or stripped.startswith("#") or stripped.startswith("//"):
533
+ continue
534
+
535
+ # Only match patterns at the start of a line (after optional whitespace)
536
+ # This avoids matching strings inside code
537
+
538
+ # Express.js: app.get("/path", ...) - must start with app or router
539
+ if stripped.startswith("app.") or stripped.startswith("router."):
540
+ match = re.search(r'\.(get|post|put|delete|patch|all|use)\s*\(\s*["\']([^"\']+)', line)
541
+ if match:
542
+ method = match.group(1).upper()
543
+ path = match.group(2)
544
+ key = (method, path, rel_path)
545
+ if key not in seen:
546
+ seen.add(key)
547
+ endpoints.append({
548
+ "method": method,
549
+ "path": path,
550
+ "file": rel_path,
551
+ "line": line_num,
552
+ "framework": "express",
553
+ })
554
+ continue
555
+
556
+ # Flask/FastAPI: @app.route or @app.get - must start with @
557
+ if stripped.startswith("@"):
558
+ match = re.search(r'@(?:app|blueprint|bp)\.(route|get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', line)
559
+ if match:
560
+ method = match.group(1).upper()
561
+ if method == "ROUTE":
562
+ method = "GET"
563
+ path = match.group(2)
564
+ key = (method, path, rel_path)
565
+ if key not in seen:
566
+ seen.add(key)
567
+ endpoints.append({
568
+ "method": method,
569
+ "path": path,
570
+ "file": rel_path,
571
+ "line": line_num,
572
+ "framework": "flask",
573
+ })
574
+ continue
575
+
576
+ # Django: path( - must start with path(
577
+ if "path(" in stripped:
578
+ match = re.search(r'path\s*\(\s*["\']([^"\']+)["\']', line)
579
+ if match:
580
+ path = match.group(1)
581
+ key = ("GET", path, rel_path)
582
+ if key not in seen:
583
+ seen.add(key)
584
+ endpoints.append({
585
+ "method": "GET",
586
+ "path": path,
587
+ "file": rel_path,
588
+ "line": line_num,
589
+ "framework": "django",
590
+ })
591
+ continue
592
+
593
+ # Spring: @GetMapping, @PostMapping, etc.
594
+ match = re.search(r'@(Get|Post|Put|Delete|Patch|Request)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)', line)
595
+ if match:
596
+ method = match.group(1)
597
+ if method == "Request":
598
+ method = "GET"
599
+ path = match.group(2)
600
+ key = (method, path, rel_path)
601
+ if key not in seen:
602
+ seen.add(key)
603
+ endpoints.append({
604
+ "method": method,
605
+ "path": path,
606
+ "file": rel_path,
607
+ "line": line_num,
608
+ "framework": "spring",
609
+ })
610
+ continue
611
+
612
+ # Deduplicate
613
+ seen = set()
614
+ unique = []
615
+ for ep in endpoints:
616
+ key = (ep["method"], ep["path"], ep["file"])
617
+ if key not in seen:
618
+ seen.add(key)
619
+ unique.append(ep)
620
+
621
+ unique.sort(key=lambda x: (x["path"], x["method"]))
622
+
623
+ return {
624
+ "count": len(unique),
625
+ "endpoints": unique,
626
+ "frameworks": list(set(ep["framework"] for ep in unique)),
627
+ }
628
+
629
+
630
+ def _is_nextjs_route(path: str) -> bool:
631
+ """Check if a file is a Next.js API/app route."""
632
+ normalized = path.replace("\\", "/")
633
+ return (
634
+ ("/api/" in normalized and "route." in normalized) or
635
+ ("/app/" in normalized and "route." in normalized)
636
+ )
637
+
638
+
639
+ def _is_inside_string(lines: list[str], line_idx: int) -> bool:
640
+ """Check if a line is inside a string literal."""
641
+ if line_idx < 0 or line_idx >= len(lines):
642
+ return False
643
+
644
+ # Count unescaped quotes before this line to determine if we're in a string
645
+ quote_count = 0
646
+ in_string = False
647
+ current_quote = None
648
+
649
+ for i in range(line_idx + 1):
650
+ line = lines[i]
651
+ for char in line:
652
+ if char in ('"', "'", '`') and (i != line_idx or True):
653
+ if not in_string:
654
+ in_string = True
655
+ current_quote = char
656
+ quote_count = 1
657
+ elif char == current_quote:
658
+ # Check for escaped quote
659
+ if i == line_idx and line.index(char) > 0 and line[line.index(char) - 1] == '\\':
660
+ continue
661
+ quote_count += 1
662
+ if quote_count % 2 == 0:
663
+ in_string = False
664
+ current_quote = None
665
+
666
+ return in_string
667
+
668
+
669
+ def _nextjs_file_to_route(path: str) -> str:
670
+ """Convert Next.js file path to route path."""
671
+ normalized = path.replace("\\", "/")
672
+ # src/app/api/users/route.ts -> /api/users
673
+ match = re.search(r'(?:src/)?app(/.*)/route\.(?:ts|js|tsx|jsx)', normalized)
674
+ if match:
675
+ return match.group(1)
676
+ # pages/api/users.ts -> /api/users
677
+ match = re.search(r'pages(/.*?)\.(?:ts|js|tsx|jsx)', normalized)
678
+ if match:
679
+ route = match.group(1)
680
+ if route.endswith("/index"):
681
+ route = route[:-6] or "/"
682
+ return route
683
+ return normalized
684
+
685
+
686
+ # ─── Auto-Generate API Docs ─────────────────────────────────────────────────
687
+
688
+ def generate_api_docs(index: dict, project_root: str | None = None) -> dict[str, Any]:
689
+ """Generate API documentation from function signatures and docstrings."""
690
+ files = index.get("files", {})
691
+ root = Path(project_root) if project_root else None
692
+
693
+ modules: list[dict] = []
694
+
695
+ for rel_path, file_data in files.items():
696
+ if not isinstance(file_data, dict):
697
+ continue
698
+
699
+ symbols = file_data.get("symbols", [])
700
+ if not symbols:
701
+ continue
702
+
703
+ # Read source for docstrings
704
+ source_lines = None
705
+ if root:
706
+ try:
707
+ source_lines = (root / rel_path).read_text(encoding="utf-8").split("\n")
708
+ except (OSError, UnicodeDecodeError):
709
+ pass
710
+
711
+ classes = []
712
+ functions = []
713
+
714
+ for sym in symbols:
715
+ name = sym.get("name", "")
716
+ sym_type = sym.get("type", "")
717
+ line = sym.get("line", 0)
718
+ params = sym.get("params", [])
719
+ doc = sym.get("doc", "")
720
+
721
+ # Try to extract docstring from source
722
+ if not doc and source_lines and line > 0:
723
+ doc = _extract_docstring(source_lines, line - 1)
724
+
725
+ entry = {
726
+ "name": name,
727
+ "type": sym_type,
728
+ "line": line,
729
+ "params": params,
730
+ "doc": doc or "",
731
+ "calls": sym.get("calls", []),
732
+ "framework": sym.get("framework"),
733
+ }
734
+
735
+ if sym_type == "class":
736
+ classes.append(entry)
737
+ elif sym_type in ("function", "method"):
738
+ functions.append(entry)
739
+
740
+ if classes or functions:
741
+ modules.append({
742
+ "file": rel_path,
743
+ "classes": classes,
744
+ "functions": functions,
745
+ "imports": file_data.get("imports", []),
746
+ })
747
+
748
+ # Compute summary stats
749
+ total_documented = sum(
750
+ 1 for m in modules
751
+ for f in m["functions"] + m["classes"]
752
+ if f["doc"]
753
+ )
754
+ total_symbols = sum(
755
+ len(m["functions"]) + len(m["classes"])
756
+ for m in modules
757
+ )
758
+
759
+ return {
760
+ "modules": modules,
761
+ "total_modules": len(modules),
762
+ "total_symbols": total_symbols,
763
+ "documented": total_documented,
764
+ "undocumented": total_symbols - total_documented,
765
+ "coverage_pct": round(total_documented / max(total_symbols, 1) * 100, 1),
766
+ }
767
+
768
+
769
+ def _extract_docstring(lines: list[str], start_idx: int) -> str:
770
+ """Extract docstring from the line after a function/class definition."""
771
+ # Look at next few lines for a docstring
772
+ for i in range(start_idx + 1, min(start_idx + 5, len(lines))):
773
+ stripped = lines[i].strip()
774
+ if not stripped:
775
+ continue
776
+
777
+ # Python triple-quoted docstring
778
+ if stripped.startswith('"""') or stripped.startswith("'''"):
779
+ quote = stripped[:3]
780
+ if stripped.endswith(quote) and len(stripped) > 6:
781
+ return stripped[3:-3].strip()
782
+ # Multi-line docstring
783
+ doc_lines = [stripped[3:]]
784
+ for j in range(i + 1, min(i + 20, len(lines))):
785
+ line = lines[j].strip()
786
+ if line.endswith(quote):
787
+ doc_lines.append(line[:-3])
788
+ return "\n".join(doc_lines).strip()
789
+ doc_lines.append(line)
790
+ break
791
+
792
+ # JSDoc /** ... */
793
+ if stripped.startswith("/**"):
794
+ doc_lines = []
795
+ for j in range(i, min(i + 20, len(lines))):
796
+ line = lines[j].strip()
797
+ if line.endswith("*/"):
798
+ line = line[:-2].strip()
799
+ if line.startswith("/**"):
800
+ line = line[3:].strip()
801
+ elif line.startswith("*"):
802
+ line = line[1:].strip()
803
+ if line:
804
+ doc_lines.append(line)
805
+ return "\n".join(doc_lines).strip()
806
+ if line.startswith("/**"):
807
+ line = line[3:].strip()
808
+ elif line.startswith("*"):
809
+ line = line[1:].strip()
810
+ if line:
811
+ doc_lines.append(line)
812
+ break
813
+
814
+ break
815
+
816
+ return ""