codegraph-nav 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codegraph_nav/__init__.py +194 -0
- codegraph_nav/ast_grep_analyzer.py +448 -0
- codegraph_nav/cli.py +223 -0
- codegraph_nav/code_navigator.py +1328 -0
- codegraph_nav/code_search.py +1009 -0
- codegraph_nav/colors.py +209 -0
- codegraph_nav/completions.py +354 -0
- codegraph_nav/dart_analyzer.py +301 -0
- codegraph_nav/dependency_graph.py +814 -0
- codegraph_nav/domain/__init__.py +20 -0
- codegraph_nav/domain/routes.py +337 -0
- codegraph_nav/domain/schemas.py +229 -0
- codegraph_nav/domain/tags.py +87 -0
- codegraph_nav/exporters.py +563 -0
- codegraph_nav/go_analyzer.py +273 -0
- codegraph_nav/graph/__init__.py +72 -0
- codegraph_nav/graph/builder.py +409 -0
- codegraph_nav/graph/communities.py +402 -0
- codegraph_nav/graph/flows.py +311 -0
- codegraph_nav/graph/query.py +380 -0
- codegraph_nav/graph/schema.py +266 -0
- codegraph_nav/graph/search.py +257 -0
- codegraph_nav/graph/store.py +517 -0
- codegraph_nav/hints.py +195 -0
- codegraph_nav/import_resolver.py +891 -0
- codegraph_nav/js_ts_analyzer.py +564 -0
- codegraph_nav/line_reader.py +664 -0
- codegraph_nav/mcp/__init__.py +39 -0
- codegraph_nav/mcp/__main__.py +5 -0
- codegraph_nav/mcp/server.py +2228 -0
- codegraph_nav/py.typed +2 -0
- codegraph_nav/ruby_analyzer.py +259 -0
- codegraph_nav/rust_analyzer.py +379 -0
- codegraph_nav/token_efficient_renderer.py +743 -0
- codegraph_nav/watcher.py +382 -0
- codegraph_nav-0.1.0.dist-info/METADATA +487 -0
- codegraph_nav-0.1.0.dist-info/RECORD +41 -0
- codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
- codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
- codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
- codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Domain intelligence — routes, schemas, and domain tags.
|
|
2
|
+
|
|
3
|
+
Detects HTTP routes (15+ frameworks), ORM schemas (8+ ORMs), and infers
|
|
4
|
+
domain tags from imports and symbol names.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .routes import detect_routes, format_routes_minimal, group_crud_routes
|
|
8
|
+
from .schemas import detect_schemas, format_schemas_minimal
|
|
9
|
+
from .tags import TAG_PATTERNS, infer_file_tags, infer_project_tags
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"detect_routes",
|
|
13
|
+
"format_routes_minimal",
|
|
14
|
+
"group_crud_routes",
|
|
15
|
+
"detect_schemas",
|
|
16
|
+
"format_schemas_minimal",
|
|
17
|
+
"TAG_PATTERNS",
|
|
18
|
+
"infer_file_tags",
|
|
19
|
+
"infer_project_tags",
|
|
20
|
+
]
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""Framework route detection — 15+ frameworks via regex patterns.
|
|
2
|
+
|
|
3
|
+
Detects HTTP routes from source code using cheap content detection
|
|
4
|
+
followed by regex extraction. Supports Python, JS/TS, Go, Ruby, Java,
|
|
5
|
+
Rust, and PHP frameworks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from collections import defaultdict
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from .tags import infer_file_tags
|
|
16
|
+
|
|
17
|
+
# ==============================================================================
|
|
18
|
+
# Framework Detectors
|
|
19
|
+
# ==============================================================================
|
|
20
|
+
|
|
21
|
+
FRAMEWORK_DETECTORS: dict[str, dict[str, Any]] = {
|
|
22
|
+
# Python
|
|
23
|
+
"fastapi": {
|
|
24
|
+
"detect": ["from fastapi", "FastAPI", "APIRouter"],
|
|
25
|
+
"patterns": [
|
|
26
|
+
re.compile(r'@(?:app|router)\.(get|post|put|delete|patch)\(\s*["\']([^"\']+)["\']'),
|
|
27
|
+
],
|
|
28
|
+
},
|
|
29
|
+
"flask": {
|
|
30
|
+
"detect": ["from flask", "Flask("],
|
|
31
|
+
"patterns": [
|
|
32
|
+
re.compile(
|
|
33
|
+
r'@(?:app|bp|blueprint)\.(route|get|post|put|delete)\(\s*["\']([^"\']+)["\']'
|
|
34
|
+
),
|
|
35
|
+
],
|
|
36
|
+
},
|
|
37
|
+
"django": {
|
|
38
|
+
"detect": ["from django", "urlpatterns"],
|
|
39
|
+
"patterns": [
|
|
40
|
+
re.compile(r"path\(\s*['\"]([^'\"]+)['\"].*?,\s*(\w+)"),
|
|
41
|
+
],
|
|
42
|
+
},
|
|
43
|
+
# JavaScript/TypeScript
|
|
44
|
+
"express": {
|
|
45
|
+
"detect": [
|
|
46
|
+
"express()",
|
|
47
|
+
"require('express')",
|
|
48
|
+
'require("express")',
|
|
49
|
+
"from 'express'",
|
|
50
|
+
'from "express"',
|
|
51
|
+
],
|
|
52
|
+
"patterns": [
|
|
53
|
+
re.compile(r'(?:app|router)\.(get|post|put|delete|patch|all)\(\s*["\']([^"\']+)["\']'),
|
|
54
|
+
],
|
|
55
|
+
},
|
|
56
|
+
"nextjs": {
|
|
57
|
+
"detect": [],
|
|
58
|
+
"file_patterns": [
|
|
59
|
+
re.compile(r"app/.*/route\.[jt]sx?$"),
|
|
60
|
+
re.compile(r"pages/api/.*\.[jt]sx?$"),
|
|
61
|
+
],
|
|
62
|
+
},
|
|
63
|
+
"nestjs": {
|
|
64
|
+
"detect": ["@nestjs", "@Controller"],
|
|
65
|
+
"patterns": [
|
|
66
|
+
re.compile(r"@(Get|Post|Put|Delete|Patch)\(\s*['\"]([^'\"]*)['\"]"),
|
|
67
|
+
],
|
|
68
|
+
},
|
|
69
|
+
"hono": {
|
|
70
|
+
"detect": ["from 'hono'", 'from "hono"', "new Hono"],
|
|
71
|
+
"patterns": [
|
|
72
|
+
re.compile(r'(?:app|router)\.(get|post|put|delete|patch|all)\(\s*["\']([^"\']+)["\']'),
|
|
73
|
+
],
|
|
74
|
+
},
|
|
75
|
+
# Go
|
|
76
|
+
"gin": {
|
|
77
|
+
"detect": ["gin.Default", "gin.New", '"github.com/gin-gonic'],
|
|
78
|
+
"patterns": [
|
|
79
|
+
re.compile(r'(?:r|router|group)\.(GET|POST|PUT|DELETE|PATCH)\(\s*["\']([^"\']+)["\']'),
|
|
80
|
+
],
|
|
81
|
+
},
|
|
82
|
+
"echo": {
|
|
83
|
+
"detect": ["echo.New", '"github.com/labstack/echo'],
|
|
84
|
+
"patterns": [
|
|
85
|
+
re.compile(r'e\.(GET|POST|PUT|DELETE|PATCH)\(\s*["\']([^"\']+)["\']'),
|
|
86
|
+
],
|
|
87
|
+
},
|
|
88
|
+
# Ruby
|
|
89
|
+
"rails": {
|
|
90
|
+
"detect": ["Rails.application", "ActionController"],
|
|
91
|
+
"patterns": [
|
|
92
|
+
re.compile(r"(get|post|put|patch|delete)\s+['\"]([^'\"]+)['\"]"),
|
|
93
|
+
],
|
|
94
|
+
},
|
|
95
|
+
# Java
|
|
96
|
+
"spring": {
|
|
97
|
+
"detect": ["@RestController", "@RequestMapping", "springframework"],
|
|
98
|
+
"patterns": [
|
|
99
|
+
re.compile(
|
|
100
|
+
r'@(GetMapping|PostMapping|PutMapping|DeleteMapping|RequestMapping)\(\s*["\']([^"\']+)["\']'
|
|
101
|
+
),
|
|
102
|
+
],
|
|
103
|
+
},
|
|
104
|
+
# Rust
|
|
105
|
+
"actix": {
|
|
106
|
+
"detect": ["actix_web", "HttpServer"],
|
|
107
|
+
"patterns": [
|
|
108
|
+
re.compile(r'#\[(get|post|put|delete)\(\s*["\']([^"\']+)["\']'),
|
|
109
|
+
],
|
|
110
|
+
},
|
|
111
|
+
"axum": {
|
|
112
|
+
"detect": ["axum::Router", "axum::routing"],
|
|
113
|
+
"patterns": [
|
|
114
|
+
re.compile(r"\.(get|post|put|delete)\((\w+)"),
|
|
115
|
+
],
|
|
116
|
+
},
|
|
117
|
+
# PHP
|
|
118
|
+
"laravel": {
|
|
119
|
+
"detect": ["Route::get", "Route::post", "Illuminate"],
|
|
120
|
+
"patterns": [
|
|
121
|
+
re.compile(r"Route::(get|post|put|patch|delete)\(\s*['\"]([^'\"]+)['\"]"),
|
|
122
|
+
],
|
|
123
|
+
},
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
# ==============================================================================
|
|
128
|
+
# Route Detection
|
|
129
|
+
# ==============================================================================
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def detect_routes(code_map: dict, root_path: str = "") -> list[dict]:
|
|
133
|
+
"""Detect HTTP routes from code_map files.
|
|
134
|
+
|
|
135
|
+
Returns list of {method, path, file_path, handler_name, framework, tags, confidence}.
|
|
136
|
+
"""
|
|
137
|
+
routes = []
|
|
138
|
+
files = code_map.get("files", {})
|
|
139
|
+
|
|
140
|
+
for file_path, file_info in files.items():
|
|
141
|
+
# Read file content
|
|
142
|
+
abs_path = os.path.join(root_path, file_path) if root_path else file_path
|
|
143
|
+
try:
|
|
144
|
+
with open(abs_path, encoding="utf-8", errors="replace") as f:
|
|
145
|
+
content = f.read()
|
|
146
|
+
except OSError:
|
|
147
|
+
continue
|
|
148
|
+
|
|
149
|
+
# Try each framework
|
|
150
|
+
for fw_name, fw_config in FRAMEWORK_DETECTORS.items():
|
|
151
|
+
# File-pattern-based detection (e.g., Next.js)
|
|
152
|
+
file_patterns = fw_config.get("file_patterns", [])
|
|
153
|
+
for fp in file_patterns:
|
|
154
|
+
if fp.search(file_path):
|
|
155
|
+
# Infer method from export names or default to GET
|
|
156
|
+
method = _infer_nextjs_method(content) if fw_name == "nextjs" else "GET"
|
|
157
|
+
route_path = _file_to_route_path(file_path, fw_name)
|
|
158
|
+
tags = infer_file_tags(
|
|
159
|
+
file_path, file_info.get("imports", []), file_info.get("symbols", [])
|
|
160
|
+
)
|
|
161
|
+
routes.append(
|
|
162
|
+
{
|
|
163
|
+
"method": method,
|
|
164
|
+
"path": route_path,
|
|
165
|
+
"file_path": file_path,
|
|
166
|
+
"handler_name": None,
|
|
167
|
+
"framework": fw_name,
|
|
168
|
+
"tags": tags,
|
|
169
|
+
"confidence": "high",
|
|
170
|
+
}
|
|
171
|
+
)
|
|
172
|
+
break
|
|
173
|
+
|
|
174
|
+
# Content-based detection
|
|
175
|
+
detect_strings = fw_config.get("detect", [])
|
|
176
|
+
if not detect_strings or not any(d in content for d in detect_strings):
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
patterns = fw_config.get("patterns", [])
|
|
180
|
+
for pattern in patterns:
|
|
181
|
+
for match in pattern.finditer(content):
|
|
182
|
+
groups = match.groups()
|
|
183
|
+
if len(groups) >= 2:
|
|
184
|
+
method = groups[0].upper()
|
|
185
|
+
path = groups[1]
|
|
186
|
+
# Try to find handler name
|
|
187
|
+
handler = _find_handler_near_match(content, match, file_info)
|
|
188
|
+
tags = infer_file_tags(
|
|
189
|
+
file_path, file_info.get("imports", []), file_info.get("symbols", [])
|
|
190
|
+
)
|
|
191
|
+
routes.append(
|
|
192
|
+
{
|
|
193
|
+
"method": method,
|
|
194
|
+
"path": path,
|
|
195
|
+
"file_path": file_path,
|
|
196
|
+
"handler_name": handler,
|
|
197
|
+
"framework": fw_name,
|
|
198
|
+
"tags": tags,
|
|
199
|
+
"confidence": "high",
|
|
200
|
+
}
|
|
201
|
+
)
|
|
202
|
+
elif len(groups) == 1 and fw_name == "django":
|
|
203
|
+
# Django path() has different capture groups
|
|
204
|
+
path = groups[0]
|
|
205
|
+
routes.append(
|
|
206
|
+
{
|
|
207
|
+
"method": "*",
|
|
208
|
+
"path": path,
|
|
209
|
+
"file_path": file_path,
|
|
210
|
+
"handler_name": None,
|
|
211
|
+
"framework": fw_name,
|
|
212
|
+
"tags": [],
|
|
213
|
+
"confidence": "medium",
|
|
214
|
+
}
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
return routes
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _infer_nextjs_method(content: str) -> str:
|
|
221
|
+
"""Infer HTTP method from Next.js route handler exports."""
|
|
222
|
+
methods = []
|
|
223
|
+
for method in ("GET", "POST", "PUT", "DELETE", "PATCH"):
|
|
224
|
+
if re.search(rf"export\s+(?:async\s+)?function\s+{method}\b", content):
|
|
225
|
+
methods.append(method)
|
|
226
|
+
return ",".join(methods) if methods else "GET"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _file_to_route_path(file_path: str, framework: str) -> str:
|
|
230
|
+
"""Convert file path to route path for file-based routing."""
|
|
231
|
+
path = file_path
|
|
232
|
+
# Next.js: app/api/users/[id]/route.ts → /api/users/:id
|
|
233
|
+
path = re.sub(r"^app/", "/", path)
|
|
234
|
+
path = re.sub(r"^pages/", "/", path)
|
|
235
|
+
path = re.sub(r"/route\.[jt]sx?$", "", path)
|
|
236
|
+
path = re.sub(r"/index\.[jt]sx?$", "", path)
|
|
237
|
+
path = re.sub(r"\.[jt]sx?$", "", path)
|
|
238
|
+
path = re.sub(r"\[([^\]]+)\]", r":\1", path) # [id] → :id
|
|
239
|
+
return path or "/"
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def _find_handler_near_match(content: str, match: re.Match, file_info: dict) -> str | None:
|
|
243
|
+
"""Try to find the handler function name near a route match."""
|
|
244
|
+
# Look for the function defined right after/around the match
|
|
245
|
+
pos = match.end()
|
|
246
|
+
# Check for def/function within 200 chars after match
|
|
247
|
+
snippet = content[pos : pos + 200]
|
|
248
|
+
fn_match = re.search(r"(?:def|function|async function)\s+(\w+)", snippet)
|
|
249
|
+
if fn_match:
|
|
250
|
+
return fn_match.group(1)
|
|
251
|
+
|
|
252
|
+
# Check for preceding function (decorator pattern)
|
|
253
|
+
pre_snippet = content[max(0, match.start() - 100) : match.start()]
|
|
254
|
+
fn_match = re.search(r"(?:def|function|async function)\s+(\w+)", pre_snippet)
|
|
255
|
+
if fn_match:
|
|
256
|
+
return fn_match.group(1)
|
|
257
|
+
|
|
258
|
+
# Check symbols at matching line
|
|
259
|
+
line_num = content[: match.start()].count("\n") + 1
|
|
260
|
+
for sym in file_info.get("symbols", []):
|
|
261
|
+
lines = sym.get("lines", [0, 0])
|
|
262
|
+
if lines and abs(lines[0] - line_num) <= 3:
|
|
263
|
+
return str(sym["name"])
|
|
264
|
+
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# ==============================================================================
|
|
269
|
+
# CRUD Grouping
|
|
270
|
+
# ==============================================================================
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def group_crud_routes(routes: list[dict]) -> list[dict]:
|
|
274
|
+
"""Group routes by base path, collapsing CRUD endpoints.
|
|
275
|
+
|
|
276
|
+
/users GET, POST + /users/:id GET, PUT, DELETE → /users CRUD[5]
|
|
277
|
+
"""
|
|
278
|
+
by_base: dict[str, list[dict]] = defaultdict(list)
|
|
279
|
+
|
|
280
|
+
for route in routes:
|
|
281
|
+
# Strip dynamic segments for grouping
|
|
282
|
+
base = re.sub(r"/:[^/]+", "", route["path"])
|
|
283
|
+
base = re.sub(r"/\{[^}]+\}", "", base)
|
|
284
|
+
base = base.rstrip("/") or "/"
|
|
285
|
+
by_base[base].append(route)
|
|
286
|
+
|
|
287
|
+
grouped = []
|
|
288
|
+
for base, group in by_base.items():
|
|
289
|
+
methods = sorted({r["method"] for r in group})
|
|
290
|
+
crud_methods = {"GET", "POST", "PUT", "DELETE", "PATCH"}
|
|
291
|
+
is_crud = len(set(methods) & crud_methods) >= 3
|
|
292
|
+
|
|
293
|
+
if is_crud and len(group) >= 3:
|
|
294
|
+
tags = sorted({t for r in group for t in r.get("tags", [])})
|
|
295
|
+
grouped.append(
|
|
296
|
+
{
|
|
297
|
+
"method": "CRUD",
|
|
298
|
+
"path": base,
|
|
299
|
+
"file_path": group[0]["file_path"],
|
|
300
|
+
"handler_name": None,
|
|
301
|
+
"framework": group[0]["framework"],
|
|
302
|
+
"tags": tags,
|
|
303
|
+
"confidence": "high",
|
|
304
|
+
"count": len(group),
|
|
305
|
+
"methods": methods,
|
|
306
|
+
}
|
|
307
|
+
)
|
|
308
|
+
else:
|
|
309
|
+
grouped.extend(group)
|
|
310
|
+
|
|
311
|
+
return grouped
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
# ==============================================================================
|
|
315
|
+
# Formatters
|
|
316
|
+
# ==============================================================================
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def format_routes_minimal(routes: list[dict], group_crud: bool = True, limit: int = 20) -> str:
|
|
320
|
+
"""Format routes as compact string."""
|
|
321
|
+
if not routes:
|
|
322
|
+
return "No routes detected."
|
|
323
|
+
|
|
324
|
+
display = group_crud_routes(routes) if group_crud else routes
|
|
325
|
+
|
|
326
|
+
lines = [f"{len(routes)} routes detected:"]
|
|
327
|
+
for r in display[:limit]:
|
|
328
|
+
tags_str = f" [{','.join(r['tags'])}]" if r.get("tags") else ""
|
|
329
|
+
handler = f" → {r['handler_name']}" if r.get("handler_name") else ""
|
|
330
|
+
if r["method"] == "CRUD":
|
|
331
|
+
lines.append(f" {r['path']} CRUD[{r.get('count', '?')}]{tags_str} ({r['framework']})")
|
|
332
|
+
else:
|
|
333
|
+
lines.append(f" {r['method']} {r['path']}{handler}{tags_str} ({r['framework']})")
|
|
334
|
+
if len(display) > limit:
|
|
335
|
+
lines.append(f" ... +{len(display) - limit} more")
|
|
336
|
+
|
|
337
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""ORM schema detection — 8+ ORMs via regex patterns.
|
|
2
|
+
|
|
3
|
+
Detects model definitions from source code by matching ORM-specific patterns
|
|
4
|
+
in class signatures, decorators, and field definitions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
# ==============================================================================
|
|
15
|
+
# ORM Detectors
|
|
16
|
+
# ==============================================================================
|
|
17
|
+
|
|
18
|
+
ORM_DETECTORS: dict[str, dict[str, Any]] = {
|
|
19
|
+
"sqlalchemy": {
|
|
20
|
+
"detect": ["from sqlalchemy", "Column(", "relationship(", "mapped_column("],
|
|
21
|
+
"class_pattern": re.compile(r"class\s+(\w+)\(.*?(?:Base|Model|db\.Model|DeclarativeBase)"),
|
|
22
|
+
"field_pattern": re.compile(r"(\w+)\s*[:=]\s*(?:Column|db\.Column|mapped_column)\((\w+)"),
|
|
23
|
+
},
|
|
24
|
+
"django": {
|
|
25
|
+
"detect": ["from django.db", "models.Model"],
|
|
26
|
+
"class_pattern": re.compile(r"class\s+(\w+)\((?:models\.Model|.*Model)\)"),
|
|
27
|
+
"field_pattern": re.compile(r"(\w+)\s*=\s*models\.(\w+Field)"),
|
|
28
|
+
},
|
|
29
|
+
"prisma": {
|
|
30
|
+
"detect": [],
|
|
31
|
+
"file_patterns": ["schema.prisma", "prisma/schema.prisma"],
|
|
32
|
+
"model_pattern": re.compile(r"model\s+(\w+)\s*\{([^}]+)\}"),
|
|
33
|
+
"field_pattern": re.compile(r"(\w+)\s+(\w+)"),
|
|
34
|
+
},
|
|
35
|
+
"sequelize": {
|
|
36
|
+
"detect": ["sequelize", "DataTypes", "Model.init", "define("],
|
|
37
|
+
"class_pattern": re.compile(r"class\s+(\w+)\s+extends\s+Model"),
|
|
38
|
+
"field_pattern": re.compile(r"(\w+):\s*\{[^}]*type:\s*DataTypes\.(\w+)"),
|
|
39
|
+
},
|
|
40
|
+
"typeorm": {
|
|
41
|
+
"detect": ["@Entity", "typeorm", "@Column"],
|
|
42
|
+
"class_pattern": re.compile(r"@Entity\(\)?\s*(?:export\s+)?class\s+(\w+)"),
|
|
43
|
+
"field_pattern": re.compile(r"@Column\([^)]*\)\s*(\w+)\s*[!?]?:\s*(\w+)"),
|
|
44
|
+
},
|
|
45
|
+
"gorm": {
|
|
46
|
+
"detect": ["gorm.Model", "gorm.io"],
|
|
47
|
+
"class_pattern": re.compile(r"type\s+(\w+)\s+struct\s*\{"),
|
|
48
|
+
"field_pattern": re.compile(r"(\w+)\s+(\w+)\s+`gorm:"),
|
|
49
|
+
},
|
|
50
|
+
"drizzle": {
|
|
51
|
+
"detect": ["drizzle-orm", "pgTable", "mysqlTable", "sqliteTable"],
|
|
52
|
+
"class_pattern": re.compile(
|
|
53
|
+
r"(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:pg|mysql|sqlite)Table"
|
|
54
|
+
),
|
|
55
|
+
"field_pattern": re.compile(r"(\w+):\s*(\w+)\("),
|
|
56
|
+
},
|
|
57
|
+
"mongoose": {
|
|
58
|
+
"detect": ["mongoose", "Schema(", "model("],
|
|
59
|
+
"class_pattern": re.compile(
|
|
60
|
+
r"(?:const|let|var)\s+(\w+)Schema\s*=\s*new\s+(?:mongoose\.)?Schema"
|
|
61
|
+
),
|
|
62
|
+
"field_pattern": re.compile(r"(\w+):\s*\{[^}]*type:\s*(\w+)"),
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
# ==============================================================================
|
|
68
|
+
# Schema Detection
|
|
69
|
+
# ==============================================================================
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def detect_schemas(code_map: dict, root_path: str = "") -> list[dict]:
|
|
73
|
+
"""Detect ORM models/schemas from code_map files.
|
|
74
|
+
|
|
75
|
+
Returns list of {name, file_path, orm, fields: [{name, type}], relations: []}.
|
|
76
|
+
"""
|
|
77
|
+
schemas = []
|
|
78
|
+
files = code_map.get("files", {})
|
|
79
|
+
|
|
80
|
+
for file_path, file_info in files.items():
|
|
81
|
+
abs_path = os.path.join(root_path, file_path) if root_path else file_path
|
|
82
|
+
|
|
83
|
+
# Special case: Prisma schema files
|
|
84
|
+
for orm_name, orm_config in ORM_DETECTORS.items():
|
|
85
|
+
for fp in orm_config.get("file_patterns", []):
|
|
86
|
+
if file_path.endswith(fp) or fp in file_path:
|
|
87
|
+
try:
|
|
88
|
+
with open(abs_path, encoding="utf-8", errors="replace") as f:
|
|
89
|
+
content = f.read()
|
|
90
|
+
schemas.extend(_extract_prisma_schemas(content, file_path))
|
|
91
|
+
except OSError:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
# Content-based detection
|
|
95
|
+
try:
|
|
96
|
+
with open(abs_path, encoding="utf-8", errors="replace") as f:
|
|
97
|
+
content = f.read()
|
|
98
|
+
except OSError:
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
for orm_name, orm_config in ORM_DETECTORS.items():
|
|
102
|
+
detect_strings = orm_config.get("detect", [])
|
|
103
|
+
if not detect_strings or not any(d in content for d in detect_strings):
|
|
104
|
+
continue
|
|
105
|
+
|
|
106
|
+
class_pattern = orm_config.get("class_pattern")
|
|
107
|
+
field_pattern = orm_config.get("field_pattern")
|
|
108
|
+
|
|
109
|
+
if not class_pattern:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
for class_match in class_pattern.finditer(content):
|
|
113
|
+
model_name = class_match.group(1)
|
|
114
|
+
|
|
115
|
+
# Extract fields within class body
|
|
116
|
+
fields = []
|
|
117
|
+
if field_pattern:
|
|
118
|
+
# Search within ~2000 chars after class declaration
|
|
119
|
+
body_start = class_match.end()
|
|
120
|
+
body = content[body_start : body_start + 2000]
|
|
121
|
+
for field_match in field_pattern.finditer(body):
|
|
122
|
+
fields.append(
|
|
123
|
+
{
|
|
124
|
+
"name": field_match.group(1),
|
|
125
|
+
"type": field_match.group(2),
|
|
126
|
+
}
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
schemas.append(
|
|
130
|
+
{
|
|
131
|
+
"name": model_name,
|
|
132
|
+
"file_path": file_path,
|
|
133
|
+
"orm": orm_name,
|
|
134
|
+
"fields": fields,
|
|
135
|
+
"relations": [],
|
|
136
|
+
}
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Also detect from symbols (class signatures)
|
|
140
|
+
schemas.extend(_detect_from_symbols(files))
|
|
141
|
+
|
|
142
|
+
# Deduplicate by (name, file_path)
|
|
143
|
+
seen = set()
|
|
144
|
+
unique = []
|
|
145
|
+
for s in schemas:
|
|
146
|
+
key = (s["name"], s["file_path"])
|
|
147
|
+
if key not in seen:
|
|
148
|
+
seen.add(key)
|
|
149
|
+
unique.append(s)
|
|
150
|
+
|
|
151
|
+
return unique
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _extract_prisma_schemas(content: str, file_path: str) -> list[dict]:
|
|
155
|
+
"""Extract models from Prisma schema file."""
|
|
156
|
+
schemas = []
|
|
157
|
+
model_pattern = re.compile(r"model\s+(\w+)\s*\{([^}]+)\}")
|
|
158
|
+
field_pattern = re.compile(r"^\s*(\w+)\s+(\w+)", re.MULTILINE)
|
|
159
|
+
|
|
160
|
+
for model_match in model_pattern.finditer(content):
|
|
161
|
+
name = model_match.group(1)
|
|
162
|
+
body = model_match.group(2)
|
|
163
|
+
fields = []
|
|
164
|
+
for field_match in field_pattern.finditer(body):
|
|
165
|
+
fname = field_match.group(1)
|
|
166
|
+
ftype = field_match.group(2)
|
|
167
|
+
if fname not in ("@@", "//"):
|
|
168
|
+
fields.append({"name": fname, "type": ftype})
|
|
169
|
+
schemas.append(
|
|
170
|
+
{
|
|
171
|
+
"name": name,
|
|
172
|
+
"file_path": file_path,
|
|
173
|
+
"orm": "prisma",
|
|
174
|
+
"fields": fields,
|
|
175
|
+
"relations": [],
|
|
176
|
+
}
|
|
177
|
+
)
|
|
178
|
+
return schemas
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _detect_from_symbols(files: dict) -> list[dict]:
|
|
182
|
+
"""Detect ORM models from symbol signatures in code_map."""
|
|
183
|
+
schemas = []
|
|
184
|
+
orm_base_patterns = [
|
|
185
|
+
(re.compile(r"class\s+\w+\(.*(?:Base|Model|db\.Model)"), "sqlalchemy"),
|
|
186
|
+
(re.compile(r"class\s+\w+\(models\.Model\)"), "django"),
|
|
187
|
+
]
|
|
188
|
+
|
|
189
|
+
for file_path, file_info in files.items():
|
|
190
|
+
for sym in file_info.get("symbols", []):
|
|
191
|
+
if sym.get("type") != "class":
|
|
192
|
+
continue
|
|
193
|
+
sig = sym.get("signature", "")
|
|
194
|
+
if not sig:
|
|
195
|
+
continue
|
|
196
|
+
for pattern, orm in orm_base_patterns:
|
|
197
|
+
if pattern.search(sig):
|
|
198
|
+
schemas.append(
|
|
199
|
+
{
|
|
200
|
+
"name": sym["name"],
|
|
201
|
+
"file_path": file_path,
|
|
202
|
+
"orm": orm,
|
|
203
|
+
"fields": [],
|
|
204
|
+
"relations": [],
|
|
205
|
+
}
|
|
206
|
+
)
|
|
207
|
+
break
|
|
208
|
+
return schemas
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# ==============================================================================
|
|
212
|
+
# Formatters
|
|
213
|
+
# ==============================================================================
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def format_schemas_minimal(schemas: list[dict], limit: int = 20) -> str:
|
|
217
|
+
"""Format schemas as compact string."""
|
|
218
|
+
if not schemas:
|
|
219
|
+
return "No schemas detected."
|
|
220
|
+
|
|
221
|
+
lines = [f"{len(schemas)} schemas detected:"]
|
|
222
|
+
for s in schemas[:limit]:
|
|
223
|
+
field_count = len(s.get("fields", []))
|
|
224
|
+
orm_str = f" [{s['orm']}]" if s.get("orm") else ""
|
|
225
|
+
lines.append(f" {s['name']}{orm_str} {field_count} fields ({Path(s['file_path']).name})")
|
|
226
|
+
if len(schemas) > limit:
|
|
227
|
+
lines.append(f" ... +{len(schemas) - limit} more")
|
|
228
|
+
|
|
229
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Domain tag inference from imports, symbol names, and file paths.
|
|
2
|
+
|
|
3
|
+
Tags represent high-level domain concerns: auth, db, cache, api, etc.
|
|
4
|
+
Used to enrich search results, flow descriptions, and route metadata.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
TAG_PATTERNS: dict[str, list[str]] = {
|
|
10
|
+
"auth": [
|
|
11
|
+
"auth",
|
|
12
|
+
"jwt",
|
|
13
|
+
"token",
|
|
14
|
+
"session",
|
|
15
|
+
"login",
|
|
16
|
+
"password",
|
|
17
|
+
"oauth",
|
|
18
|
+
"credential",
|
|
19
|
+
"permission",
|
|
20
|
+
],
|
|
21
|
+
"db": [
|
|
22
|
+
"sql",
|
|
23
|
+
"query",
|
|
24
|
+
"model",
|
|
25
|
+
"orm",
|
|
26
|
+
"database",
|
|
27
|
+
"migrate",
|
|
28
|
+
"schema",
|
|
29
|
+
"prisma",
|
|
30
|
+
"drizzle",
|
|
31
|
+
"sequelize",
|
|
32
|
+
],
|
|
33
|
+
"cache": ["redis", "cache", "memcache", "ttl", "lru"],
|
|
34
|
+
"queue": ["bull", "celery", "rabbitmq", "kafka", "sqs", "queue", "worker", "job"],
|
|
35
|
+
"email": ["email", "smtp", "sendgrid", "resend", "mailgun", "ses"],
|
|
36
|
+
"payment": ["stripe", "payment", "billing", "invoice", "checkout", "charge"],
|
|
37
|
+
"upload": ["upload", "multer", "s3", "storage", "bucket", "blob"],
|
|
38
|
+
"ai": ["openai", "anthropic", "claude", "llm", "embedding", "gpt", "gemini"],
|
|
39
|
+
"api": ["api", "endpoint", "route", "rest", "graphql", "grpc", "websocket"],
|
|
40
|
+
"test": ["test", "spec", "mock", "fixture", "assert", "pytest"],
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def infer_file_tags(
|
|
45
|
+
file_path: str,
|
|
46
|
+
imports: list[str],
|
|
47
|
+
symbols: list[dict],
|
|
48
|
+
) -> list[str]:
|
|
49
|
+
"""Infer domain tags for a file from its imports, symbol names, and path.
|
|
50
|
+
|
|
51
|
+
Returns sorted list of unique tags.
|
|
52
|
+
"""
|
|
53
|
+
tags: set[str] = set()
|
|
54
|
+
|
|
55
|
+
# Combine all text signals
|
|
56
|
+
signals = []
|
|
57
|
+
signals.append(file_path.lower())
|
|
58
|
+
signals.extend(imp.lower() for imp in imports)
|
|
59
|
+
signals.extend(sym.get("name", "").lower() for sym in symbols)
|
|
60
|
+
signals.extend(sym.get("signature", "").lower() for sym in symbols if sym.get("signature"))
|
|
61
|
+
|
|
62
|
+
combined = " ".join(signals)
|
|
63
|
+
|
|
64
|
+
for tag, keywords in TAG_PATTERNS.items():
|
|
65
|
+
for keyword in keywords:
|
|
66
|
+
if keyword in combined:
|
|
67
|
+
tags.add(tag)
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
return sorted(tags)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def infer_project_tags(code_map: dict) -> dict[str, list[str]]:
|
|
74
|
+
"""Infer domain tags for all files in a code_map.
|
|
75
|
+
|
|
76
|
+
Returns {file_path: [tags]}.
|
|
77
|
+
"""
|
|
78
|
+
result = {}
|
|
79
|
+
for file_path, file_info in code_map.get("files", {}).items():
|
|
80
|
+
tags = infer_file_tags(
|
|
81
|
+
file_path,
|
|
82
|
+
file_info.get("imports", []),
|
|
83
|
+
file_info.get("symbols", []),
|
|
84
|
+
)
|
|
85
|
+
if tags:
|
|
86
|
+
result[file_path] = tags
|
|
87
|
+
return result
|