cortexcode 0.5.0__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cortexcode/advanced_analysis/__init__.py +17 -0
- cortexcode/advanced_analysis/advanced_analysis.py +19 -0
- cortexcode/advanced_analysis/advanced_analysis_cycles.py +67 -0
- cortexcode/advanced_analysis/advanced_analysis_docs.py +126 -0
- cortexcode/advanced_analysis/advanced_analysis_duplicates.py +158 -0
- cortexcode/advanced_analysis/advanced_analysis_endpoints.py +205 -0
- cortexcode/advanced_analysis/advanced_analysis_search.py +85 -0
- cortexcode/advanced_analysis/advanced_analysis_security.py +119 -0
- cortexcode/advanced_analysis.py +18 -815
- cortexcode/ai_docs/__init__.py +15 -0
- cortexcode/ai_docs/config.py +130 -0
- cortexcode/ai_docs/doc_cache.py +40 -0
- cortexcode/ai_docs/doc_generator.py +193 -0
- cortexcode/ai_docs/doc_lookup.py +27 -0
- cortexcode/ai_docs/doc_models.py +12 -0
- cortexcode/ai_docs/explainer.py +237 -0
- cortexcode/ai_docs/llm_client.py +289 -0
- cortexcode/ai_docs/page_generator.py +357 -0
- cortexcode/ai_docs/prompts.py +271 -0
- cortexcode/ai_docs/report_runner.py +251 -0
- cortexcode/analysis/__init__.py +11 -0
- cortexcode/analysis/analysis_complexity.py +147 -0
- cortexcode/analysis/analysis_dead_code.py +83 -0
- cortexcode/analysis/analysis_impact.py +62 -0
- cortexcode/analysis.py +6 -327
- cortexcode/cli/__init__.py +63 -0
- cortexcode/cli/cli_ai_docs.py +28 -0
- cortexcode/cli/cli_complexity.py +62 -0
- cortexcode/cli/cli_config.py +68 -0
- cortexcode/cli/cli_context.py +34 -0
- cortexcode/cli/cli_dashboard.py +31 -0
- cortexcode/cli/cli_dead_code.py +41 -0
- cortexcode/cli/cli_diagrams.py +26 -0
- cortexcode/cli/cli_diff.py +57 -0
- cortexcode/cli/cli_docs.py +37 -0
- cortexcode/cli/cli_explain.py +23 -0
- cortexcode/cli/cli_find.py +40 -0
- cortexcode/cli/cli_impact.py +50 -0
- cortexcode/cli/cli_index.py +110 -0
- cortexcode/cli/cli_report.py +19 -0
- cortexcode/cli/cli_scan.py +34 -0
- cortexcode/cli/cli_search.py +61 -0
- cortexcode/cli/cli_servers.py +10 -0
- cortexcode/cli/cli_stats.py +20 -0
- cortexcode/cli/cli_support.py +30 -0
- cortexcode/cli/cli_watch.py +10 -0
- cortexcode/cli/cli_wiki.py +161 -0
- cortexcode/cli/cli_workspace.py +103 -0
- cortexcode/config.py +159 -0
- cortexcode/context/__init__.py +13 -0
- cortexcode/context/context_format.py +17 -0
- cortexcode/context/context_query.py +195 -0
- cortexcode/context/context_tokens.py +68 -0
- cortexcode/context.py +8 -294
- cortexcode/diagrams/__init__.py +26 -0
- cortexcode/diagrams/architecture.py +60 -0
- cortexcode/diagrams/call_graph.py +71 -0
- cortexcode/diagrams/class_diagram.py +63 -0
- cortexcode/diagrams/dependencies.py +35 -0
- cortexcode/diagrams/directory_tree.py +28 -0
- cortexcode/diagrams/entities.py +83 -0
- cortexcode/diagrams/file_tree.py +33 -0
- cortexcode/diagrams/imports.py +44 -0
- cortexcode/diagrams/save.py +53 -0
- cortexcode/diagrams/sequence.py +47 -0
- cortexcode/diagrams/state.py +22 -0
- cortexcode/diagrams/utils.py +24 -0
- cortexcode/docs/__init__.py +88 -8
- cortexcode/docs/diagrams.py +105 -0
- cortexcode/docs/generator.py +29 -506
- cortexcode/docs/javascript.py +2 -369
- cortexcode/docs/javascript_sections.py +395 -0
- cortexcode/indexer.py +144 -1815
- cortexcode/indexing/__init__.py +58 -0
- cortexcode/indexing/build.py +51 -0
- cortexcode/indexing/calls.py +25 -0
- cortexcode/indexing/config.py +24 -0
- cortexcode/indexing/defaults.py +21 -0
- cortexcode/indexing/dispatch.py +42 -0
- cortexcode/indexing/entities.py +87 -0
- cortexcode/indexing/extensions.py +2 -0
- cortexcode/indexing/extractor_mixin.py +355 -0
- cortexcode/indexing/extractors/__init__.py +9 -0
- cortexcode/indexing/extractors/csharp.py +90 -0
- cortexcode/indexing/extractors/dart.py +165 -0
- cortexcode/indexing/extractors/generic.py +139 -0
- cortexcode/indexing/extractors/java.py +91 -0
- cortexcode/indexing/extractors/javascript.py +194 -0
- cortexcode/indexing/extractors/kotlin.py +92 -0
- cortexcode/indexing/extractors/swift.py +112 -0
- cortexcode/indexing/filtering.py +74 -0
- cortexcode/indexing/frameworks.py +236 -0
- cortexcode/indexing/gitignore.py +65 -0
- cortexcode/indexing/imports_exports.py +172 -0
- cortexcode/indexing/incremental.py +57 -0
- cortexcode/indexing/languages.py +48 -0
- cortexcode/indexing/metadata.py +100 -0
- cortexcode/indexing/nodes.py +8 -0
- cortexcode/indexing/output.py +19 -0
- cortexcode/indexing/params.py +27 -0
- cortexcode/indexing/parsers.py +20 -0
- cortexcode/indexing/pipeline.py +104 -0
- cortexcode/indexing/profile.py +271 -0
- cortexcode/indexing/resolution.py +191 -0
- cortexcode/indexing/routes.py +120 -0
- cortexcode/indexing/session.py +21 -0
- cortexcode/indexing/storage.py +13 -0
- cortexcode/indexing/walk.py +32 -0
- cortexcode/knowledge/__init__.py +21 -0
- cortexcode/knowledge/build.py +119 -0
- cortexcode/knowledge/citations.py +46 -0
- cortexcode/knowledge/concepts.py +241 -0
- cortexcode/knowledge/models.py +111 -0
- cortexcode/knowledge/snippets.py +99 -0
- cortexcode/knowledge/usage.py +86 -0
- cortexcode/main.py +486 -0
- cortexcode/mcp/__init__.py +20 -0
- cortexcode/mcp/mcp_protocol.py +11 -0
- cortexcode/mcp/mcp_registry.py +206 -0
- cortexcode/mcp/mcp_server.py +78 -0
- cortexcode/mcp/mcp_tool_handlers.py +245 -0
- cortexcode/mcp/mcp_transport.py +61 -0
- cortexcode/performance/__init__.py +28 -0
- cortexcode/performance/performance_config.py +176 -0
- cortexcode/performance/performance_index_storage.py +67 -0
- cortexcode/performance/performance_preview.py +60 -0
- cortexcode/performance.py +32 -0
- cortexcode/reports/__init__.py +17 -0
- cortexcode/reports/html/__init__.py +4 -0
- cortexcode/reports/html/dashboard.py +316 -0
- cortexcode/reports/html/dashboard_fragments.py +134 -0
- cortexcode/reports/html/view_model.py +126 -0
- cortexcode/reports/markdown/__init__.py +15 -0
- cortexcode/reports/markdown/api.py +58 -0
- cortexcode/reports/markdown/flows.py +57 -0
- cortexcode/reports/markdown/insights.py +81 -0
- cortexcode/reports/markdown/readme.py +74 -0
- cortexcode/reports/markdown/structure.py +19 -0
- cortexcode/reports/markdown/tech.py +95 -0
- cortexcode/reports/site/__init__.py +5 -0
- cortexcode/reports/site/generator.py +570 -0
- cortexcode/terminal/__init__.py +30 -0
- cortexcode/terminal/analysis.py +63 -0
- cortexcode/terminal/completion.py +39 -0
- cortexcode/terminal/headers.py +36 -0
- cortexcode/terminal/prompts.py +11 -0
- cortexcode/terminal/reports.py +209 -0
- cortexcode/terminal/stats.py +49 -0
- {cortexcode-0.5.0.dist-info → cortexcode-0.6.0.dist-info}/METADATA +29 -1
- cortexcode-0.6.0.dist-info/RECORD +166 -0
- cortexcode-0.6.0.dist-info/entry_points.txt +2 -0
- cortexcode/cli.py +0 -845
- cortexcode/mcp_server.py +0 -597
- cortexcode-0.5.0.dist-info/RECORD +0 -27
- cortexcode-0.5.0.dist-info/entry_points.txt +0 -2
- {cortexcode-0.5.0.dist-info → cortexcode-0.6.0.dist-info}/WHEEL +0 -0
- {cortexcode-0.5.0.dist-info → cortexcode-0.6.0.dist-info}/licenses/LICENSE +0 -0
- {cortexcode-0.5.0.dist-info → cortexcode-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Advanced analysis modules."""
|
|
2
|
+
|
|
3
|
+
from cortexcode.advanced_analysis.advanced_analysis_cycles import find_circular_dependencies
|
|
4
|
+
from cortexcode.advanced_analysis.advanced_analysis_docs import generate_docs_summary
|
|
5
|
+
from cortexcode.advanced_analysis.advanced_analysis_duplicates import find_duplicates
|
|
6
|
+
from cortexcode.advanced_analysis.advanced_analysis_endpoints import find_api_endpoints
|
|
7
|
+
from cortexcode.advanced_analysis.advanced_analysis_search import search_symbols_by_semantics
|
|
8
|
+
from cortexcode.advanced_analysis.advanced_analysis_security import scan_security_issues
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"find_circular_dependencies",
|
|
12
|
+
"generate_docs_summary",
|
|
13
|
+
"find_duplicates",
|
|
14
|
+
"find_api_endpoints",
|
|
15
|
+
"search_symbols_by_semantics",
|
|
16
|
+
"scan_security_issues",
|
|
17
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Advanced code analysis — duplication, security, circular deps, API endpoints, doc generation."""
|
|
2
|
+
|
|
3
|
+
from cortexcode.advanced_analysis_cycles import detect_circular_deps
|
|
4
|
+
from cortexcode.advanced_analysis_docs import generate_api_docs
|
|
5
|
+
from cortexcode.advanced_analysis_duplicates import detect_duplicates
|
|
6
|
+
from cortexcode.advanced_analysis_endpoints import extract_endpoints
|
|
7
|
+
from cortexcode.advanced_analysis_search import fuzzy_search, regex_search
|
|
8
|
+
from cortexcode.advanced_analysis_security import security_scan
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"fuzzy_search",
|
|
13
|
+
"regex_search",
|
|
14
|
+
"detect_duplicates",
|
|
15
|
+
"security_scan",
|
|
16
|
+
"detect_circular_deps",
|
|
17
|
+
"extract_endpoints",
|
|
18
|
+
"generate_api_docs",
|
|
19
|
+
]
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def detect_circular_deps(index: dict) -> list[dict[str, Any]]:
|
|
5
|
+
"""Detect circular dependencies in file imports and call graph."""
|
|
6
|
+
results = []
|
|
7
|
+
|
|
8
|
+
file_deps = index.get("file_dependencies", {})
|
|
9
|
+
file_cycles = _find_cycles(file_deps)
|
|
10
|
+
for cycle in file_cycles:
|
|
11
|
+
results.append({
|
|
12
|
+
"type": "file_import",
|
|
13
|
+
"cycle": cycle,
|
|
14
|
+
"length": len(cycle),
|
|
15
|
+
"severity": "high" if len(cycle) <= 2 else "medium",
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
call_graph = index.get("call_graph", {})
|
|
19
|
+
symbol_cycles = _find_cycles(call_graph)
|
|
20
|
+
for cycle in symbol_cycles:
|
|
21
|
+
if len(cycle) <= 5:
|
|
22
|
+
results.append({
|
|
23
|
+
"type": "call_cycle",
|
|
24
|
+
"cycle": cycle,
|
|
25
|
+
"length": len(cycle),
|
|
26
|
+
"severity": "medium" if len(cycle) <= 2 else "low",
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
results.sort(key=lambda x: x["length"])
|
|
30
|
+
return results
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _find_cycles(graph: dict[str, list]) -> list[list[str]]:
|
|
34
|
+
"""Find all cycles in a directed graph using DFS."""
|
|
35
|
+
cycles = []
|
|
36
|
+
visited = set()
|
|
37
|
+
path = []
|
|
38
|
+
path_set = set()
|
|
39
|
+
|
|
40
|
+
def dfs(node: str):
|
|
41
|
+
if node in path_set:
|
|
42
|
+
idx = path.index(node)
|
|
43
|
+
cycle = path[idx:] + [node]
|
|
44
|
+
min_idx = cycle.index(min(cycle[:-1]))
|
|
45
|
+
normalized = cycle[min_idx:-1] + cycle[:min_idx] + [cycle[min_idx]]
|
|
46
|
+
if normalized not in cycles:
|
|
47
|
+
cycles.append(normalized)
|
|
48
|
+
return
|
|
49
|
+
|
|
50
|
+
if node in visited:
|
|
51
|
+
return
|
|
52
|
+
|
|
53
|
+
visited.add(node)
|
|
54
|
+
path.append(node)
|
|
55
|
+
path_set.add(node)
|
|
56
|
+
|
|
57
|
+
for neighbor in graph.get(node, []):
|
|
58
|
+
if neighbor in graph:
|
|
59
|
+
dfs(neighbor)
|
|
60
|
+
|
|
61
|
+
path.pop()
|
|
62
|
+
path_set.discard(node)
|
|
63
|
+
|
|
64
|
+
for node in graph:
|
|
65
|
+
dfs(node)
|
|
66
|
+
|
|
67
|
+
return cycles
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def generate_api_docs(index: dict, project_root: str | None = None) -> dict[str, Any]:
|
|
6
|
+
"""Generate API documentation from function signatures and docstrings."""
|
|
7
|
+
files = index.get("files", {})
|
|
8
|
+
root = Path(project_root) if project_root else None
|
|
9
|
+
|
|
10
|
+
modules: list[dict] = []
|
|
11
|
+
|
|
12
|
+
for rel_path, file_data in files.items():
|
|
13
|
+
if not isinstance(file_data, dict):
|
|
14
|
+
continue
|
|
15
|
+
|
|
16
|
+
symbols = file_data.get("symbols", [])
|
|
17
|
+
if not symbols:
|
|
18
|
+
continue
|
|
19
|
+
|
|
20
|
+
source_lines = None
|
|
21
|
+
if root:
|
|
22
|
+
try:
|
|
23
|
+
source_lines = (root / rel_path).read_text(encoding="utf-8").split("\n")
|
|
24
|
+
except (OSError, UnicodeDecodeError):
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
classes = []
|
|
28
|
+
functions = []
|
|
29
|
+
|
|
30
|
+
for sym in symbols:
|
|
31
|
+
name = sym.get("name", "")
|
|
32
|
+
sym_type = sym.get("type", "")
|
|
33
|
+
line = sym.get("line", 0)
|
|
34
|
+
params = sym.get("params", [])
|
|
35
|
+
doc = sym.get("doc", "")
|
|
36
|
+
|
|
37
|
+
if not doc and source_lines and line > 0:
|
|
38
|
+
doc = _extract_docstring(source_lines, line - 1)
|
|
39
|
+
|
|
40
|
+
entry = {
|
|
41
|
+
"name": name,
|
|
42
|
+
"type": sym_type,
|
|
43
|
+
"line": line,
|
|
44
|
+
"params": params,
|
|
45
|
+
"doc": doc or "",
|
|
46
|
+
"calls": sym.get("calls", []),
|
|
47
|
+
"framework": sym.get("framework"),
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if sym_type == "class":
|
|
51
|
+
classes.append(entry)
|
|
52
|
+
elif sym_type in ("function", "method"):
|
|
53
|
+
functions.append(entry)
|
|
54
|
+
|
|
55
|
+
if classes or functions:
|
|
56
|
+
modules.append({
|
|
57
|
+
"file": rel_path,
|
|
58
|
+
"classes": classes,
|
|
59
|
+
"functions": functions,
|
|
60
|
+
"imports": file_data.get("imports", []),
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
total_documented = sum(
|
|
64
|
+
1 for module in modules
|
|
65
|
+
for item in module["functions"] + module["classes"]
|
|
66
|
+
if item["doc"]
|
|
67
|
+
)
|
|
68
|
+
total_symbols = sum(
|
|
69
|
+
len(module["functions"]) + len(module["classes"])
|
|
70
|
+
for module in modules
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
"modules": modules,
|
|
75
|
+
"total_modules": len(modules),
|
|
76
|
+
"total_symbols": total_symbols,
|
|
77
|
+
"documented": total_documented,
|
|
78
|
+
"undocumented": total_symbols - total_documented,
|
|
79
|
+
"coverage_pct": round(total_documented / max(total_symbols, 1) * 100, 1),
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _extract_docstring(lines: list[str], start_idx: int) -> str:
|
|
84
|
+
"""Extract docstring from the line after a function/class definition."""
|
|
85
|
+
for line_index in range(start_idx + 1, min(start_idx + 5, len(lines))):
|
|
86
|
+
stripped = lines[line_index].strip()
|
|
87
|
+
if not stripped:
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
if stripped.startswith('"""') or stripped.startswith("'''"):
|
|
91
|
+
quote = stripped[:3]
|
|
92
|
+
if stripped.endswith(quote) and len(stripped) > 6:
|
|
93
|
+
return stripped[3:-3].strip()
|
|
94
|
+
doc_lines = [stripped[3:]]
|
|
95
|
+
for doc_line_index in range(line_index + 1, min(line_index + 20, len(lines))):
|
|
96
|
+
line = lines[doc_line_index].strip()
|
|
97
|
+
if line.endswith(quote):
|
|
98
|
+
doc_lines.append(line[:-3])
|
|
99
|
+
return "\n".join(doc_lines).strip()
|
|
100
|
+
doc_lines.append(line)
|
|
101
|
+
break
|
|
102
|
+
|
|
103
|
+
if stripped.startswith("/**"):
|
|
104
|
+
doc_lines = []
|
|
105
|
+
for doc_line_index in range(line_index, min(line_index + 20, len(lines))):
|
|
106
|
+
line = lines[doc_line_index].strip()
|
|
107
|
+
if line.endswith("*/"):
|
|
108
|
+
line = line[:-2].strip()
|
|
109
|
+
if line.startswith("/**"):
|
|
110
|
+
line = line[3:].strip()
|
|
111
|
+
elif line.startswith("*"):
|
|
112
|
+
line = line[1:].strip()
|
|
113
|
+
if line:
|
|
114
|
+
doc_lines.append(line)
|
|
115
|
+
return "\n".join(doc_lines).strip()
|
|
116
|
+
if line.startswith("/**"):
|
|
117
|
+
line = line[3:].strip()
|
|
118
|
+
elif line.startswith("*"):
|
|
119
|
+
line = line[1:].strip()
|
|
120
|
+
if line:
|
|
121
|
+
doc_lines.append(line)
|
|
122
|
+
break
|
|
123
|
+
|
|
124
|
+
break
|
|
125
|
+
|
|
126
|
+
return ""
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import hashlib
|
|
2
|
+
import re
|
|
3
|
+
from difflib import SequenceMatcher
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def detect_duplicates(index: dict, project_root: str | None = None, min_lines: int = 5) -> list[dict[str, Any]]:
|
|
9
|
+
"""Find duplicate or very similar code blocks.
|
|
10
|
+
|
|
11
|
+
Compares function bodies by normalizing whitespace and variable names,
|
|
12
|
+
then computing similarity scores.
|
|
13
|
+
"""
|
|
14
|
+
files = index.get("files", {})
|
|
15
|
+
root = Path(project_root) if project_root else None
|
|
16
|
+
|
|
17
|
+
functions: list[dict] = []
|
|
18
|
+
for rel_path, file_data in files.items():
|
|
19
|
+
if not isinstance(file_data, dict):
|
|
20
|
+
continue
|
|
21
|
+
|
|
22
|
+
source_lines = None
|
|
23
|
+
if root:
|
|
24
|
+
try:
|
|
25
|
+
source_lines = (root / rel_path).read_text(encoding="utf-8").split("\n")
|
|
26
|
+
except (OSError, UnicodeDecodeError):
|
|
27
|
+
continue
|
|
28
|
+
|
|
29
|
+
if not source_lines:
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
for sym in file_data.get("symbols", []):
|
|
33
|
+
if sym.get("type") not in ("function", "method"):
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
line = sym.get("line", 0)
|
|
37
|
+
if line <= 0:
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
body = _extract_function_body(source_lines, line - 1)
|
|
41
|
+
if len(body.split("\n")) < min_lines:
|
|
42
|
+
continue
|
|
43
|
+
|
|
44
|
+
normalized = _normalize_code(body)
|
|
45
|
+
functions.append({
|
|
46
|
+
"name": sym.get("name", ""),
|
|
47
|
+
"file": rel_path,
|
|
48
|
+
"line": line,
|
|
49
|
+
"body": body,
|
|
50
|
+
"normalized": normalized,
|
|
51
|
+
"hash": hashlib.md5(normalized.encode()).hexdigest(),
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
hash_groups: dict[str, list] = {}
|
|
55
|
+
for func in functions:
|
|
56
|
+
func_hash = func["hash"]
|
|
57
|
+
if func_hash not in hash_groups:
|
|
58
|
+
hash_groups[func_hash] = []
|
|
59
|
+
hash_groups[func_hash].append(func)
|
|
60
|
+
|
|
61
|
+
duplicates = []
|
|
62
|
+
seen_pairs = set()
|
|
63
|
+
|
|
64
|
+
for func_hash, group in hash_groups.items():
|
|
65
|
+
if len(group) > 1:
|
|
66
|
+
duplicates.append({
|
|
67
|
+
"type": "exact",
|
|
68
|
+
"similarity": 1.0,
|
|
69
|
+
"functions": [
|
|
70
|
+
{"name": func["name"], "file": func["file"], "line": func["line"]}
|
|
71
|
+
for func in group
|
|
72
|
+
],
|
|
73
|
+
"lines": len(group[0]["body"].split("\n")),
|
|
74
|
+
})
|
|
75
|
+
for func in group:
|
|
76
|
+
seen_pairs.add((func["file"], func["line"]))
|
|
77
|
+
|
|
78
|
+
for index_position, first_func in enumerate(functions):
|
|
79
|
+
if (first_func["file"], first_func["line"]) in seen_pairs:
|
|
80
|
+
continue
|
|
81
|
+
for second_func in functions[index_position + 1:]:
|
|
82
|
+
if (second_func["file"], second_func["line"]) in seen_pairs:
|
|
83
|
+
continue
|
|
84
|
+
if first_func["hash"] == second_func["hash"]:
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
similarity = SequenceMatcher(None, first_func["normalized"], second_func["normalized"]).ratio()
|
|
88
|
+
if similarity > 0.8:
|
|
89
|
+
duplicates.append({
|
|
90
|
+
"type": "near",
|
|
91
|
+
"similarity": round(similarity, 3),
|
|
92
|
+
"functions": [
|
|
93
|
+
{"name": first_func["name"], "file": first_func["file"], "line": first_func["line"]},
|
|
94
|
+
{"name": second_func["name"], "file": second_func["file"], "line": second_func["line"]},
|
|
95
|
+
],
|
|
96
|
+
"lines": max(
|
|
97
|
+
len(first_func["body"].split("\n")),
|
|
98
|
+
len(second_func["body"].split("\n")),
|
|
99
|
+
),
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
duplicates.sort(key=lambda x: x["similarity"], reverse=True)
|
|
103
|
+
return duplicates
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _extract_function_body(lines: list[str], start_idx: int) -> str:
|
|
107
|
+
"""Extract function body from source lines."""
|
|
108
|
+
if start_idx >= len(lines):
|
|
109
|
+
return ""
|
|
110
|
+
|
|
111
|
+
start_line = lines[start_idx]
|
|
112
|
+
start_indent = len(start_line) - len(start_line.lstrip())
|
|
113
|
+
indent_based = "def " in start_line or start_line.strip().endswith(":")
|
|
114
|
+
|
|
115
|
+
body = [lines[start_idx]]
|
|
116
|
+
brace_depth = 0
|
|
117
|
+
|
|
118
|
+
for line_index in range(start_idx + 1, min(start_idx + 300, len(lines))):
|
|
119
|
+
line = lines[line_index]
|
|
120
|
+
stripped = line.strip()
|
|
121
|
+
|
|
122
|
+
if not stripped:
|
|
123
|
+
body.append(line)
|
|
124
|
+
continue
|
|
125
|
+
|
|
126
|
+
if indent_based:
|
|
127
|
+
current_indent = len(line) - len(line.lstrip())
|
|
128
|
+
if current_indent <= start_indent and stripped and not stripped.startswith((")", "]", "}")):
|
|
129
|
+
break
|
|
130
|
+
else:
|
|
131
|
+
brace_depth += stripped.count("{") - stripped.count("}")
|
|
132
|
+
if brace_depth <= 0 and len(body) > 1:
|
|
133
|
+
body.append(line)
|
|
134
|
+
break
|
|
135
|
+
|
|
136
|
+
body.append(line)
|
|
137
|
+
|
|
138
|
+
return "\n".join(body)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def _normalize_code(code: str) -> str:
|
|
142
|
+
"""Normalize code for comparison — remove comments, normalize whitespace, replace identifiers."""
|
|
143
|
+
lines = []
|
|
144
|
+
for line in code.split("\n"):
|
|
145
|
+
stripped = line.strip()
|
|
146
|
+
if stripped.startswith("#") or stripped.startswith("//"):
|
|
147
|
+
continue
|
|
148
|
+
stripped = re.sub(r'#.*$', '', stripped)
|
|
149
|
+
stripped = re.sub(r'//.*$', '', stripped)
|
|
150
|
+
stripped = stripped.strip()
|
|
151
|
+
if stripped:
|
|
152
|
+
lines.append(stripped)
|
|
153
|
+
|
|
154
|
+
result = "\n".join(lines)
|
|
155
|
+
result = re.sub(r'"[^"]*"', '"STR"', result)
|
|
156
|
+
result = re.sub(r"'[^']*'", "'STR'", result)
|
|
157
|
+
result = re.sub(r'\b\d+\b', 'NUM', result)
|
|
158
|
+
return result
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
ENDPOINT_PATTERNS = [
|
|
7
|
+
(r'(?:app|router)\.(get|post|put|delete|patch|all|use)\s*\(\s*["\']([^"\']+)', "express"),
|
|
8
|
+
(r'@(?:app|blueprint|bp)\.(route|get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', "flask"),
|
|
9
|
+
(r'path\s*\(\s*["\']([^"\']+)["\']', "django"),
|
|
10
|
+
(r'@(?:app|router)\.(get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', "fastapi"),
|
|
11
|
+
(r'export\s+(?:async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH)\s*\(', "nextjs"),
|
|
12
|
+
(r'@(?:Get|Post|Put|Delete|Patch|Request)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)', "spring"),
|
|
13
|
+
(r'(?:Handle|HandleFunc)\s*\(\s*["\']([^"\']+)', "go-http"),
|
|
14
|
+
(r'(?:get|post|put|patch|delete)\s+["\']([^"\']+)', "rails"),
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def extract_endpoints(index: dict, project_root: str | None = None) -> dict[str, Any]:
|
|
19
|
+
"""Extract API endpoints from source code."""
|
|
20
|
+
root = Path(project_root) if project_root else None
|
|
21
|
+
files = index.get("files", {})
|
|
22
|
+
|
|
23
|
+
endpoints: list[dict] = []
|
|
24
|
+
seen = set()
|
|
25
|
+
|
|
26
|
+
for rel_path, file_data in files.items():
|
|
27
|
+
if not isinstance(file_data, dict):
|
|
28
|
+
continue
|
|
29
|
+
|
|
30
|
+
source = None
|
|
31
|
+
if root:
|
|
32
|
+
try:
|
|
33
|
+
source = (root / rel_path).read_text(encoding="utf-8", errors="ignore")
|
|
34
|
+
except OSError:
|
|
35
|
+
continue
|
|
36
|
+
|
|
37
|
+
if not source:
|
|
38
|
+
continue
|
|
39
|
+
|
|
40
|
+
if _is_nextjs_route(rel_path):
|
|
41
|
+
methods = re.findall(
|
|
42
|
+
r'export\s+(?:async\s+)?function\s+(GET|POST|PUT|DELETE|PATCH|HEAD|OPTIONS)\s*\(',
|
|
43
|
+
source,
|
|
44
|
+
)
|
|
45
|
+
route_path = _nextjs_file_to_route(rel_path)
|
|
46
|
+
for method in methods:
|
|
47
|
+
key = (method.upper(), route_path, rel_path)
|
|
48
|
+
if key not in seen:
|
|
49
|
+
seen.add(key)
|
|
50
|
+
endpoints.append({
|
|
51
|
+
"method": method.upper(),
|
|
52
|
+
"path": route_path,
|
|
53
|
+
"file": rel_path,
|
|
54
|
+
"framework": "nextjs",
|
|
55
|
+
})
|
|
56
|
+
continue
|
|
57
|
+
|
|
58
|
+
lines = source.split("\n")
|
|
59
|
+
|
|
60
|
+
if _is_nextjs_route(rel_path):
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
for line_num, line in enumerate(lines, 1):
|
|
64
|
+
stripped = line.strip()
|
|
65
|
+
|
|
66
|
+
if not stripped or stripped.startswith("#") or stripped.startswith("//"):
|
|
67
|
+
continue
|
|
68
|
+
|
|
69
|
+
if stripped.startswith("app.") or stripped.startswith("router."):
|
|
70
|
+
match = re.search(r'\.(get|post|put|delete|patch|all|use)\s*\(\s*["\']([^"\']+)', line)
|
|
71
|
+
if match:
|
|
72
|
+
method = match.group(1).upper()
|
|
73
|
+
path = match.group(2)
|
|
74
|
+
key = (method, path, rel_path)
|
|
75
|
+
if key not in seen:
|
|
76
|
+
seen.add(key)
|
|
77
|
+
endpoints.append({
|
|
78
|
+
"method": method,
|
|
79
|
+
"path": path,
|
|
80
|
+
"file": rel_path,
|
|
81
|
+
"line": line_num,
|
|
82
|
+
"framework": "express",
|
|
83
|
+
})
|
|
84
|
+
continue
|
|
85
|
+
|
|
86
|
+
if stripped.startswith("@"):
|
|
87
|
+
match = re.search(r'@(?:app|blueprint|bp)\.(route|get|post|put|delete|patch)\s*\(\s*["\']([^"\']+)', line)
|
|
88
|
+
if match:
|
|
89
|
+
method = match.group(1).upper()
|
|
90
|
+
if method == "ROUTE":
|
|
91
|
+
method = "GET"
|
|
92
|
+
path = match.group(2)
|
|
93
|
+
key = (method, path, rel_path)
|
|
94
|
+
if key not in seen:
|
|
95
|
+
seen.add(key)
|
|
96
|
+
endpoints.append({
|
|
97
|
+
"method": method,
|
|
98
|
+
"path": path,
|
|
99
|
+
"file": rel_path,
|
|
100
|
+
"line": line_num,
|
|
101
|
+
"framework": "flask",
|
|
102
|
+
})
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
if "path(" in stripped:
|
|
106
|
+
match = re.search(r'path\s*\(\s*["\']([^"\']+)["\']', line)
|
|
107
|
+
if match:
|
|
108
|
+
path = match.group(1)
|
|
109
|
+
key = ("GET", path, rel_path)
|
|
110
|
+
if key not in seen:
|
|
111
|
+
seen.add(key)
|
|
112
|
+
endpoints.append({
|
|
113
|
+
"method": "GET",
|
|
114
|
+
"path": path,
|
|
115
|
+
"file": rel_path,
|
|
116
|
+
"line": line_num,
|
|
117
|
+
"framework": "django",
|
|
118
|
+
})
|
|
119
|
+
continue
|
|
120
|
+
|
|
121
|
+
match = re.search(r'@(Get|Post|Put|Delete|Patch|Request)Mapping\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)', line)
|
|
122
|
+
if match:
|
|
123
|
+
method = match.group(1)
|
|
124
|
+
if method == "Request":
|
|
125
|
+
method = "GET"
|
|
126
|
+
path = match.group(2)
|
|
127
|
+
key = (method, path, rel_path)
|
|
128
|
+
if key not in seen:
|
|
129
|
+
seen.add(key)
|
|
130
|
+
endpoints.append({
|
|
131
|
+
"method": method,
|
|
132
|
+
"path": path,
|
|
133
|
+
"file": rel_path,
|
|
134
|
+
"line": line_num,
|
|
135
|
+
"framework": "spring",
|
|
136
|
+
})
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
seen = set()
|
|
140
|
+
unique = []
|
|
141
|
+
for endpoint in endpoints:
|
|
142
|
+
key = (endpoint["method"], endpoint["path"], endpoint["file"])
|
|
143
|
+
if key not in seen:
|
|
144
|
+
seen.add(key)
|
|
145
|
+
unique.append(endpoint)
|
|
146
|
+
|
|
147
|
+
unique.sort(key=lambda x: (x["path"], x["method"]))
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
"count": len(unique),
|
|
151
|
+
"endpoints": unique,
|
|
152
|
+
"frameworks": list(set(endpoint["framework"] for endpoint in unique)),
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _is_nextjs_route(path: str) -> bool:
|
|
157
|
+
"""Check if a file is a Next.js API/app route."""
|
|
158
|
+
normalized = path.replace("\\", "/")
|
|
159
|
+
return (
|
|
160
|
+
("/api/" in normalized and "route." in normalized)
|
|
161
|
+
or ("/app/" in normalized and "route." in normalized)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _is_inside_string(lines: list[str], line_idx: int) -> bool:
|
|
166
|
+
"""Check if a line is inside a string literal."""
|
|
167
|
+
if line_idx < 0 or line_idx >= len(lines):
|
|
168
|
+
return False
|
|
169
|
+
|
|
170
|
+
quote_count = 0
|
|
171
|
+
in_string = False
|
|
172
|
+
current_quote = None
|
|
173
|
+
|
|
174
|
+
for current_index in range(line_idx + 1):
|
|
175
|
+
line = lines[current_index]
|
|
176
|
+
for char in line:
|
|
177
|
+
if char in ('"', "'", '`') and (current_index != line_idx or True):
|
|
178
|
+
if not in_string:
|
|
179
|
+
in_string = True
|
|
180
|
+
current_quote = char
|
|
181
|
+
quote_count = 1
|
|
182
|
+
elif char == current_quote:
|
|
183
|
+
if current_index == line_idx and line.index(char) > 0 and line[line.index(char) - 1] == '\\':
|
|
184
|
+
continue
|
|
185
|
+
quote_count += 1
|
|
186
|
+
if quote_count % 2 == 0:
|
|
187
|
+
in_string = False
|
|
188
|
+
current_quote = None
|
|
189
|
+
|
|
190
|
+
return in_string
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _nextjs_file_to_route(path: str) -> str:
|
|
194
|
+
"""Convert Next.js file path to route path."""
|
|
195
|
+
normalized = path.replace("\\", "/")
|
|
196
|
+
match = re.search(r'(?:src/)?app(/.*)/route\.(?:ts|js|tsx|jsx)', normalized)
|
|
197
|
+
if match:
|
|
198
|
+
return match.group(1)
|
|
199
|
+
match = re.search(r'pages(/.*?)\.(?:ts|js|tsx|jsx)', normalized)
|
|
200
|
+
if match:
|
|
201
|
+
route = match.group(1)
|
|
202
|
+
if route.endswith("/index"):
|
|
203
|
+
route = route[:-6] or "/"
|
|
204
|
+
return route
|
|
205
|
+
return normalized
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from difflib import SequenceMatcher
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def fuzzy_search(index: dict, query: str, threshold: float = 0.5, limit: int = 20) -> list[dict[str, Any]]:
|
|
7
|
+
"""Fuzzy search for symbols — finds approximate matches.
|
|
8
|
+
|
|
9
|
+
Uses substring matching, case-insensitive matching, and sequence similarity.
|
|
10
|
+
"""
|
|
11
|
+
query_lower = query.lower()
|
|
12
|
+
files = index.get("files", {})
|
|
13
|
+
results = []
|
|
14
|
+
|
|
15
|
+
for rel_path, file_data in files.items():
|
|
16
|
+
if not isinstance(file_data, dict):
|
|
17
|
+
continue
|
|
18
|
+
for sym in file_data.get("symbols", []):
|
|
19
|
+
name = sym.get("name", "")
|
|
20
|
+
name_lower = name.lower()
|
|
21
|
+
|
|
22
|
+
if query_lower in name_lower:
|
|
23
|
+
score = 1.0 if query_lower == name_lower else 0.9
|
|
24
|
+
else:
|
|
25
|
+
score = SequenceMatcher(None, query_lower, name_lower).ratio()
|
|
26
|
+
|
|
27
|
+
initials = _extract_initials(name)
|
|
28
|
+
if query_lower in initials.lower():
|
|
29
|
+
score = max(score, 0.75)
|
|
30
|
+
|
|
31
|
+
if all(word in name_lower for word in query_lower.split()):
|
|
32
|
+
score = max(score, 0.8)
|
|
33
|
+
|
|
34
|
+
if score >= threshold:
|
|
35
|
+
results.append({
|
|
36
|
+
"name": name,
|
|
37
|
+
"type": sym.get("type"),
|
|
38
|
+
"file": rel_path,
|
|
39
|
+
"line": sym.get("line"),
|
|
40
|
+
"params": sym.get("params", []),
|
|
41
|
+
"doc": sym.get("doc"),
|
|
42
|
+
"score": round(score, 3),
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
results.sort(key=lambda x: x["score"], reverse=True)
|
|
46
|
+
return results[:limit]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def regex_search(index: dict, pattern: str, sym_type: str | None = None, limit: int = 20) -> list[dict[str, Any]]:
|
|
50
|
+
"""Search symbols using regex pattern."""
|
|
51
|
+
try:
|
|
52
|
+
regex = re.compile(pattern, re.IGNORECASE)
|
|
53
|
+
except re.error as e:
|
|
54
|
+
return [{"error": f"Invalid regex: {e}"}]
|
|
55
|
+
|
|
56
|
+
files = index.get("files", {})
|
|
57
|
+
results = []
|
|
58
|
+
|
|
59
|
+
for rel_path, file_data in files.items():
|
|
60
|
+
if not isinstance(file_data, dict):
|
|
61
|
+
continue
|
|
62
|
+
for sym in file_data.get("symbols", []):
|
|
63
|
+
name = sym.get("name", "")
|
|
64
|
+
if regex.search(name):
|
|
65
|
+
if sym_type and sym.get("type") != sym_type:
|
|
66
|
+
continue
|
|
67
|
+
results.append({
|
|
68
|
+
"name": name,
|
|
69
|
+
"type": sym.get("type"),
|
|
70
|
+
"file": rel_path,
|
|
71
|
+
"line": sym.get("line"),
|
|
72
|
+
"params": sym.get("params", []),
|
|
73
|
+
"doc": sym.get("doc"),
|
|
74
|
+
})
|
|
75
|
+
|
|
76
|
+
return results[:limit]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _extract_initials(name: str) -> str:
|
|
80
|
+
"""Extract initials from camelCase/PascalCase/snake_case name."""
|
|
81
|
+
initials = re.findall(r'[A-Z]', name)
|
|
82
|
+
if initials:
|
|
83
|
+
return ''.join(initials)
|
|
84
|
+
parts = name.split('_')
|
|
85
|
+
return ''.join(part[0] for part in parts if part)
|