codegraph-nav 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. codegraph_nav/__init__.py +194 -0
  2. codegraph_nav/ast_grep_analyzer.py +448 -0
  3. codegraph_nav/cli.py +223 -0
  4. codegraph_nav/code_navigator.py +1328 -0
  5. codegraph_nav/code_search.py +1009 -0
  6. codegraph_nav/colors.py +209 -0
  7. codegraph_nav/completions.py +354 -0
  8. codegraph_nav/dart_analyzer.py +301 -0
  9. codegraph_nav/dependency_graph.py +814 -0
  10. codegraph_nav/domain/__init__.py +20 -0
  11. codegraph_nav/domain/routes.py +337 -0
  12. codegraph_nav/domain/schemas.py +229 -0
  13. codegraph_nav/domain/tags.py +87 -0
  14. codegraph_nav/exporters.py +563 -0
  15. codegraph_nav/go_analyzer.py +273 -0
  16. codegraph_nav/graph/__init__.py +72 -0
  17. codegraph_nav/graph/builder.py +409 -0
  18. codegraph_nav/graph/communities.py +402 -0
  19. codegraph_nav/graph/flows.py +311 -0
  20. codegraph_nav/graph/query.py +380 -0
  21. codegraph_nav/graph/schema.py +266 -0
  22. codegraph_nav/graph/search.py +257 -0
  23. codegraph_nav/graph/store.py +517 -0
  24. codegraph_nav/hints.py +195 -0
  25. codegraph_nav/import_resolver.py +891 -0
  26. codegraph_nav/js_ts_analyzer.py +564 -0
  27. codegraph_nav/line_reader.py +664 -0
  28. codegraph_nav/mcp/__init__.py +39 -0
  29. codegraph_nav/mcp/__main__.py +5 -0
  30. codegraph_nav/mcp/server.py +2228 -0
  31. codegraph_nav/py.typed +2 -0
  32. codegraph_nav/ruby_analyzer.py +259 -0
  33. codegraph_nav/rust_analyzer.py +379 -0
  34. codegraph_nav/token_efficient_renderer.py +743 -0
  35. codegraph_nav/watcher.py +382 -0
  36. codegraph_nav-0.1.0.dist-info/METADATA +487 -0
  37. codegraph_nav-0.1.0.dist-info/RECORD +41 -0
  38. codegraph_nav-0.1.0.dist-info/WHEEL +5 -0
  39. codegraph_nav-0.1.0.dist-info/entry_points.txt +4 -0
  40. codegraph_nav-0.1.0.dist-info/licenses/LICENSE +21 -0
  41. codegraph_nav-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,20 @@
1
+ """Domain intelligence — routes, schemas, and domain tags.
2
+
3
+ Detects HTTP routes (15+ frameworks), ORM schemas (8+ ORMs), and infers
4
+ domain tags from imports and symbol names.
5
+ """
6
+
7
+ from .routes import detect_routes, format_routes_minimal, group_crud_routes
8
+ from .schemas import detect_schemas, format_schemas_minimal
9
+ from .tags import TAG_PATTERNS, infer_file_tags, infer_project_tags
10
+
11
+ __all__ = [
12
+ "detect_routes",
13
+ "format_routes_minimal",
14
+ "group_crud_routes",
15
+ "detect_schemas",
16
+ "format_schemas_minimal",
17
+ "TAG_PATTERNS",
18
+ "infer_file_tags",
19
+ "infer_project_tags",
20
+ ]
@@ -0,0 +1,337 @@
1
+ """Framework route detection — 15+ frameworks via regex patterns.
2
+
3
+ Detects HTTP routes from source code using cheap content detection
4
+ followed by regex extraction. Supports Python, JS/TS, Go, Ruby, Java,
5
+ Rust, and PHP frameworks.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import re
12
+ from collections import defaultdict
13
+ from typing import Any
14
+
15
+ from .tags import infer_file_tags
16
+
17
+ # ==============================================================================
18
+ # Framework Detectors
19
+ # ==============================================================================
20
+
21
+ FRAMEWORK_DETECTORS: dict[str, dict[str, Any]] = {
22
+ # Python
23
+ "fastapi": {
24
+ "detect": ["from fastapi", "FastAPI", "APIRouter"],
25
+ "patterns": [
26
+ re.compile(r'@(?:app|router)\.(get|post|put|delete|patch)\(\s*["\']([^"\']+)["\']'),
27
+ ],
28
+ },
29
+ "flask": {
30
+ "detect": ["from flask", "Flask("],
31
+ "patterns": [
32
+ re.compile(
33
+ r'@(?:app|bp|blueprint)\.(route|get|post|put|delete)\(\s*["\']([^"\']+)["\']'
34
+ ),
35
+ ],
36
+ },
37
+ "django": {
38
+ "detect": ["from django", "urlpatterns"],
39
+ "patterns": [
40
+ re.compile(r"path\(\s*['\"]([^'\"]+)['\"].*?,\s*(\w+)"),
41
+ ],
42
+ },
43
+ # JavaScript/TypeScript
44
+ "express": {
45
+ "detect": [
46
+ "express()",
47
+ "require('express')",
48
+ 'require("express")',
49
+ "from 'express'",
50
+ 'from "express"',
51
+ ],
52
+ "patterns": [
53
+ re.compile(r'(?:app|router)\.(get|post|put|delete|patch|all)\(\s*["\']([^"\']+)["\']'),
54
+ ],
55
+ },
56
+ "nextjs": {
57
+ "detect": [],
58
+ "file_patterns": [
59
+ re.compile(r"app/.*/route\.[jt]sx?$"),
60
+ re.compile(r"pages/api/.*\.[jt]sx?$"),
61
+ ],
62
+ },
63
+ "nestjs": {
64
+ "detect": ["@nestjs", "@Controller"],
65
+ "patterns": [
66
+ re.compile(r"@(Get|Post|Put|Delete|Patch)\(\s*['\"]([^'\"]*)['\"]"),
67
+ ],
68
+ },
69
+ "hono": {
70
+ "detect": ["from 'hono'", 'from "hono"', "new Hono"],
71
+ "patterns": [
72
+ re.compile(r'(?:app|router)\.(get|post|put|delete|patch|all)\(\s*["\']([^"\']+)["\']'),
73
+ ],
74
+ },
75
+ # Go
76
+ "gin": {
77
+ "detect": ["gin.Default", "gin.New", '"github.com/gin-gonic'],
78
+ "patterns": [
79
+ re.compile(r'(?:r|router|group)\.(GET|POST|PUT|DELETE|PATCH)\(\s*["\']([^"\']+)["\']'),
80
+ ],
81
+ },
82
+ "echo": {
83
+ "detect": ["echo.New", '"github.com/labstack/echo'],
84
+ "patterns": [
85
+ re.compile(r'e\.(GET|POST|PUT|DELETE|PATCH)\(\s*["\']([^"\']+)["\']'),
86
+ ],
87
+ },
88
+ # Ruby
89
+ "rails": {
90
+ "detect": ["Rails.application", "ActionController"],
91
+ "patterns": [
92
+ re.compile(r"(get|post|put|patch|delete)\s+['\"]([^'\"]+)['\"]"),
93
+ ],
94
+ },
95
+ # Java
96
+ "spring": {
97
+ "detect": ["@RestController", "@RequestMapping", "springframework"],
98
+ "patterns": [
99
+ re.compile(
100
+ r'@(GetMapping|PostMapping|PutMapping|DeleteMapping|RequestMapping)\(\s*["\']([^"\']+)["\']'
101
+ ),
102
+ ],
103
+ },
104
+ # Rust
105
+ "actix": {
106
+ "detect": ["actix_web", "HttpServer"],
107
+ "patterns": [
108
+ re.compile(r'#\[(get|post|put|delete)\(\s*["\']([^"\']+)["\']'),
109
+ ],
110
+ },
111
+ "axum": {
112
+ "detect": ["axum::Router", "axum::routing"],
113
+ "patterns": [
114
+ re.compile(r"\.(get|post|put|delete)\((\w+)"),
115
+ ],
116
+ },
117
+ # PHP
118
+ "laravel": {
119
+ "detect": ["Route::get", "Route::post", "Illuminate"],
120
+ "patterns": [
121
+ re.compile(r"Route::(get|post|put|patch|delete)\(\s*['\"]([^'\"]+)['\"]"),
122
+ ],
123
+ },
124
+ }
125
+
126
+
127
+ # ==============================================================================
128
+ # Route Detection
129
+ # ==============================================================================
130
+
131
+
132
+ def detect_routes(code_map: dict, root_path: str = "") -> list[dict]:
133
+ """Detect HTTP routes from code_map files.
134
+
135
+ Returns list of {method, path, file_path, handler_name, framework, tags, confidence}.
136
+ """
137
+ routes = []
138
+ files = code_map.get("files", {})
139
+
140
+ for file_path, file_info in files.items():
141
+ # Read file content
142
+ abs_path = os.path.join(root_path, file_path) if root_path else file_path
143
+ try:
144
+ with open(abs_path, encoding="utf-8", errors="replace") as f:
145
+ content = f.read()
146
+ except OSError:
147
+ continue
148
+
149
+ # Try each framework
150
+ for fw_name, fw_config in FRAMEWORK_DETECTORS.items():
151
+ # File-pattern-based detection (e.g., Next.js)
152
+ file_patterns = fw_config.get("file_patterns", [])
153
+ for fp in file_patterns:
154
+ if fp.search(file_path):
155
+ # Infer method from export names or default to GET
156
+ method = _infer_nextjs_method(content) if fw_name == "nextjs" else "GET"
157
+ route_path = _file_to_route_path(file_path, fw_name)
158
+ tags = infer_file_tags(
159
+ file_path, file_info.get("imports", []), file_info.get("symbols", [])
160
+ )
161
+ routes.append(
162
+ {
163
+ "method": method,
164
+ "path": route_path,
165
+ "file_path": file_path,
166
+ "handler_name": None,
167
+ "framework": fw_name,
168
+ "tags": tags,
169
+ "confidence": "high",
170
+ }
171
+ )
172
+ break
173
+
174
+ # Content-based detection
175
+ detect_strings = fw_config.get("detect", [])
176
+ if not detect_strings or not any(d in content for d in detect_strings):
177
+ continue
178
+
179
+ patterns = fw_config.get("patterns", [])
180
+ for pattern in patterns:
181
+ for match in pattern.finditer(content):
182
+ groups = match.groups()
183
+ if len(groups) >= 2:
184
+ method = groups[0].upper()
185
+ path = groups[1]
186
+ # Try to find handler name
187
+ handler = _find_handler_near_match(content, match, file_info)
188
+ tags = infer_file_tags(
189
+ file_path, file_info.get("imports", []), file_info.get("symbols", [])
190
+ )
191
+ routes.append(
192
+ {
193
+ "method": method,
194
+ "path": path,
195
+ "file_path": file_path,
196
+ "handler_name": handler,
197
+ "framework": fw_name,
198
+ "tags": tags,
199
+ "confidence": "high",
200
+ }
201
+ )
202
+ elif len(groups) == 1 and fw_name == "django":
203
+ # Django path() has different capture groups
204
+ path = groups[0]
205
+ routes.append(
206
+ {
207
+ "method": "*",
208
+ "path": path,
209
+ "file_path": file_path,
210
+ "handler_name": None,
211
+ "framework": fw_name,
212
+ "tags": [],
213
+ "confidence": "medium",
214
+ }
215
+ )
216
+
217
+ return routes
218
+
219
+
220
+ def _infer_nextjs_method(content: str) -> str:
221
+ """Infer HTTP method from Next.js route handler exports."""
222
+ methods = []
223
+ for method in ("GET", "POST", "PUT", "DELETE", "PATCH"):
224
+ if re.search(rf"export\s+(?:async\s+)?function\s+{method}\b", content):
225
+ methods.append(method)
226
+ return ",".join(methods) if methods else "GET"
227
+
228
+
229
+ def _file_to_route_path(file_path: str, framework: str) -> str:
230
+ """Convert file path to route path for file-based routing."""
231
+ path = file_path
232
+ # Next.js: app/api/users/[id]/route.ts → /api/users/:id
233
+ path = re.sub(r"^app/", "/", path)
234
+ path = re.sub(r"^pages/", "/", path)
235
+ path = re.sub(r"/route\.[jt]sx?$", "", path)
236
+ path = re.sub(r"/index\.[jt]sx?$", "", path)
237
+ path = re.sub(r"\.[jt]sx?$", "", path)
238
+ path = re.sub(r"\[([^\]]+)\]", r":\1", path) # [id] → :id
239
+ return path or "/"
240
+
241
+
242
+ def _find_handler_near_match(content: str, match: re.Match, file_info: dict) -> str | None:
243
+ """Try to find the handler function name near a route match."""
244
+ # Look for the function defined right after/around the match
245
+ pos = match.end()
246
+ # Check for def/function within 200 chars after match
247
+ snippet = content[pos : pos + 200]
248
+ fn_match = re.search(r"(?:def|function|async function)\s+(\w+)", snippet)
249
+ if fn_match:
250
+ return fn_match.group(1)
251
+
252
+ # Check for preceding function (decorator pattern)
253
+ pre_snippet = content[max(0, match.start() - 100) : match.start()]
254
+ fn_match = re.search(r"(?:def|function|async function)\s+(\w+)", pre_snippet)
255
+ if fn_match:
256
+ return fn_match.group(1)
257
+
258
+ # Check symbols at matching line
259
+ line_num = content[: match.start()].count("\n") + 1
260
+ for sym in file_info.get("symbols", []):
261
+ lines = sym.get("lines", [0, 0])
262
+ if lines and abs(lines[0] - line_num) <= 3:
263
+ return str(sym["name"])
264
+
265
+ return None
266
+
267
+
268
+ # ==============================================================================
269
+ # CRUD Grouping
270
+ # ==============================================================================
271
+
272
+
273
+ def group_crud_routes(routes: list[dict]) -> list[dict]:
274
+ """Group routes by base path, collapsing CRUD endpoints.
275
+
276
+ /users GET, POST + /users/:id GET, PUT, DELETE → /users CRUD[5]
277
+ """
278
+ by_base: dict[str, list[dict]] = defaultdict(list)
279
+
280
+ for route in routes:
281
+ # Strip dynamic segments for grouping
282
+ base = re.sub(r"/:[^/]+", "", route["path"])
283
+ base = re.sub(r"/\{[^}]+\}", "", base)
284
+ base = base.rstrip("/") or "/"
285
+ by_base[base].append(route)
286
+
287
+ grouped = []
288
+ for base, group in by_base.items():
289
+ methods = sorted({r["method"] for r in group})
290
+ crud_methods = {"GET", "POST", "PUT", "DELETE", "PATCH"}
291
+ is_crud = len(set(methods) & crud_methods) >= 3
292
+
293
+ if is_crud and len(group) >= 3:
294
+ tags = sorted({t for r in group for t in r.get("tags", [])})
295
+ grouped.append(
296
+ {
297
+ "method": "CRUD",
298
+ "path": base,
299
+ "file_path": group[0]["file_path"],
300
+ "handler_name": None,
301
+ "framework": group[0]["framework"],
302
+ "tags": tags,
303
+ "confidence": "high",
304
+ "count": len(group),
305
+ "methods": methods,
306
+ }
307
+ )
308
+ else:
309
+ grouped.extend(group)
310
+
311
+ return grouped
312
+
313
+
314
+ # ==============================================================================
315
+ # Formatters
316
+ # ==============================================================================
317
+
318
+
319
+ def format_routes_minimal(routes: list[dict], group_crud: bool = True, limit: int = 20) -> str:
320
+ """Format routes as compact string."""
321
+ if not routes:
322
+ return "No routes detected."
323
+
324
+ display = group_crud_routes(routes) if group_crud else routes
325
+
326
+ lines = [f"{len(routes)} routes detected:"]
327
+ for r in display[:limit]:
328
+ tags_str = f" [{','.join(r['tags'])}]" if r.get("tags") else ""
329
+ handler = f" → {r['handler_name']}" if r.get("handler_name") else ""
330
+ if r["method"] == "CRUD":
331
+ lines.append(f" {r['path']} CRUD[{r.get('count', '?')}]{tags_str} ({r['framework']})")
332
+ else:
333
+ lines.append(f" {r['method']} {r['path']}{handler}{tags_str} ({r['framework']})")
334
+ if len(display) > limit:
335
+ lines.append(f" ... +{len(display) - limit} more")
336
+
337
+ return "\n".join(lines)
@@ -0,0 +1,229 @@
1
+ """ORM schema detection — 8+ ORMs via regex patterns.
2
+
3
+ Detects model definitions from source code by matching ORM-specific patterns
4
+ in class signatures, decorators, and field definitions.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ import re
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ # ==============================================================================
15
+ # ORM Detectors
16
+ # ==============================================================================
17
+
18
+ ORM_DETECTORS: dict[str, dict[str, Any]] = {
19
+ "sqlalchemy": {
20
+ "detect": ["from sqlalchemy", "Column(", "relationship(", "mapped_column("],
21
+ "class_pattern": re.compile(r"class\s+(\w+)\(.*?(?:Base|Model|db\.Model|DeclarativeBase)"),
22
+ "field_pattern": re.compile(r"(\w+)\s*[:=]\s*(?:Column|db\.Column|mapped_column)\((\w+)"),
23
+ },
24
+ "django": {
25
+ "detect": ["from django.db", "models.Model"],
26
+ "class_pattern": re.compile(r"class\s+(\w+)\((?:models\.Model|.*Model)\)"),
27
+ "field_pattern": re.compile(r"(\w+)\s*=\s*models\.(\w+Field)"),
28
+ },
29
+ "prisma": {
30
+ "detect": [],
31
+ "file_patterns": ["schema.prisma", "prisma/schema.prisma"],
32
+ "model_pattern": re.compile(r"model\s+(\w+)\s*\{([^}]+)\}"),
33
+ "field_pattern": re.compile(r"(\w+)\s+(\w+)"),
34
+ },
35
+ "sequelize": {
36
+ "detect": ["sequelize", "DataTypes", "Model.init", "define("],
37
+ "class_pattern": re.compile(r"class\s+(\w+)\s+extends\s+Model"),
38
+ "field_pattern": re.compile(r"(\w+):\s*\{[^}]*type:\s*DataTypes\.(\w+)"),
39
+ },
40
+ "typeorm": {
41
+ "detect": ["@Entity", "typeorm", "@Column"],
42
+ "class_pattern": re.compile(r"@Entity\(\)?\s*(?:export\s+)?class\s+(\w+)"),
43
+ "field_pattern": re.compile(r"@Column\([^)]*\)\s*(\w+)\s*[!?]?:\s*(\w+)"),
44
+ },
45
+ "gorm": {
46
+ "detect": ["gorm.Model", "gorm.io"],
47
+ "class_pattern": re.compile(r"type\s+(\w+)\s+struct\s*\{"),
48
+ "field_pattern": re.compile(r"(\w+)\s+(\w+)\s+`gorm:"),
49
+ },
50
+ "drizzle": {
51
+ "detect": ["drizzle-orm", "pgTable", "mysqlTable", "sqliteTable"],
52
+ "class_pattern": re.compile(
53
+ r"(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:pg|mysql|sqlite)Table"
54
+ ),
55
+ "field_pattern": re.compile(r"(\w+):\s*(\w+)\("),
56
+ },
57
+ "mongoose": {
58
+ "detect": ["mongoose", "Schema(", "model("],
59
+ "class_pattern": re.compile(
60
+ r"(?:const|let|var)\s+(\w+)Schema\s*=\s*new\s+(?:mongoose\.)?Schema"
61
+ ),
62
+ "field_pattern": re.compile(r"(\w+):\s*\{[^}]*type:\s*(\w+)"),
63
+ },
64
+ }
65
+
66
+
67
+ # ==============================================================================
68
+ # Schema Detection
69
+ # ==============================================================================
70
+
71
+
72
+ def detect_schemas(code_map: dict, root_path: str = "") -> list[dict]:
73
+ """Detect ORM models/schemas from code_map files.
74
+
75
+ Returns list of {name, file_path, orm, fields: [{name, type}], relations: []}.
76
+ """
77
+ schemas = []
78
+ files = code_map.get("files", {})
79
+
80
+ for file_path, file_info in files.items():
81
+ abs_path = os.path.join(root_path, file_path) if root_path else file_path
82
+
83
+ # Special case: Prisma schema files
84
+ for orm_name, orm_config in ORM_DETECTORS.items():
85
+ for fp in orm_config.get("file_patterns", []):
86
+ if file_path.endswith(fp) or fp in file_path:
87
+ try:
88
+ with open(abs_path, encoding="utf-8", errors="replace") as f:
89
+ content = f.read()
90
+ schemas.extend(_extract_prisma_schemas(content, file_path))
91
+ except OSError:
92
+ pass
93
+
94
+ # Content-based detection
95
+ try:
96
+ with open(abs_path, encoding="utf-8", errors="replace") as f:
97
+ content = f.read()
98
+ except OSError:
99
+ continue
100
+
101
+ for orm_name, orm_config in ORM_DETECTORS.items():
102
+ detect_strings = orm_config.get("detect", [])
103
+ if not detect_strings or not any(d in content for d in detect_strings):
104
+ continue
105
+
106
+ class_pattern = orm_config.get("class_pattern")
107
+ field_pattern = orm_config.get("field_pattern")
108
+
109
+ if not class_pattern:
110
+ continue
111
+
112
+ for class_match in class_pattern.finditer(content):
113
+ model_name = class_match.group(1)
114
+
115
+ # Extract fields within class body
116
+ fields = []
117
+ if field_pattern:
118
+ # Search within ~2000 chars after class declaration
119
+ body_start = class_match.end()
120
+ body = content[body_start : body_start + 2000]
121
+ for field_match in field_pattern.finditer(body):
122
+ fields.append(
123
+ {
124
+ "name": field_match.group(1),
125
+ "type": field_match.group(2),
126
+ }
127
+ )
128
+
129
+ schemas.append(
130
+ {
131
+ "name": model_name,
132
+ "file_path": file_path,
133
+ "orm": orm_name,
134
+ "fields": fields,
135
+ "relations": [],
136
+ }
137
+ )
138
+
139
+ # Also detect from symbols (class signatures)
140
+ schemas.extend(_detect_from_symbols(files))
141
+
142
+ # Deduplicate by (name, file_path)
143
+ seen = set()
144
+ unique = []
145
+ for s in schemas:
146
+ key = (s["name"], s["file_path"])
147
+ if key not in seen:
148
+ seen.add(key)
149
+ unique.append(s)
150
+
151
+ return unique
152
+
153
+
154
+ def _extract_prisma_schemas(content: str, file_path: str) -> list[dict]:
155
+ """Extract models from Prisma schema file."""
156
+ schemas = []
157
+ model_pattern = re.compile(r"model\s+(\w+)\s*\{([^}]+)\}")
158
+ field_pattern = re.compile(r"^\s*(\w+)\s+(\w+)", re.MULTILINE)
159
+
160
+ for model_match in model_pattern.finditer(content):
161
+ name = model_match.group(1)
162
+ body = model_match.group(2)
163
+ fields = []
164
+ for field_match in field_pattern.finditer(body):
165
+ fname = field_match.group(1)
166
+ ftype = field_match.group(2)
167
+ if fname not in ("@@", "//"):
168
+ fields.append({"name": fname, "type": ftype})
169
+ schemas.append(
170
+ {
171
+ "name": name,
172
+ "file_path": file_path,
173
+ "orm": "prisma",
174
+ "fields": fields,
175
+ "relations": [],
176
+ }
177
+ )
178
+ return schemas
179
+
180
+
181
+ def _detect_from_symbols(files: dict) -> list[dict]:
182
+ """Detect ORM models from symbol signatures in code_map."""
183
+ schemas = []
184
+ orm_base_patterns = [
185
+ (re.compile(r"class\s+\w+\(.*(?:Base|Model|db\.Model)"), "sqlalchemy"),
186
+ (re.compile(r"class\s+\w+\(models\.Model\)"), "django"),
187
+ ]
188
+
189
+ for file_path, file_info in files.items():
190
+ for sym in file_info.get("symbols", []):
191
+ if sym.get("type") != "class":
192
+ continue
193
+ sig = sym.get("signature", "")
194
+ if not sig:
195
+ continue
196
+ for pattern, orm in orm_base_patterns:
197
+ if pattern.search(sig):
198
+ schemas.append(
199
+ {
200
+ "name": sym["name"],
201
+ "file_path": file_path,
202
+ "orm": orm,
203
+ "fields": [],
204
+ "relations": [],
205
+ }
206
+ )
207
+ break
208
+ return schemas
209
+
210
+
211
+ # ==============================================================================
212
+ # Formatters
213
+ # ==============================================================================
214
+
215
+
216
+ def format_schemas_minimal(schemas: list[dict], limit: int = 20) -> str:
217
+ """Format schemas as compact string."""
218
+ if not schemas:
219
+ return "No schemas detected."
220
+
221
+ lines = [f"{len(schemas)} schemas detected:"]
222
+ for s in schemas[:limit]:
223
+ field_count = len(s.get("fields", []))
224
+ orm_str = f" [{s['orm']}]" if s.get("orm") else ""
225
+ lines.append(f" {s['name']}{orm_str} {field_count} fields ({Path(s['file_path']).name})")
226
+ if len(schemas) > limit:
227
+ lines.append(f" ... +{len(schemas) - limit} more")
228
+
229
+ return "\n".join(lines)
@@ -0,0 +1,87 @@
1
+ """Domain tag inference from imports, symbol names, and file paths.
2
+
3
+ Tags represent high-level domain concerns: auth, db, cache, api, etc.
4
+ Used to enrich search results, flow descriptions, and route metadata.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ TAG_PATTERNS: dict[str, list[str]] = {
10
+ "auth": [
11
+ "auth",
12
+ "jwt",
13
+ "token",
14
+ "session",
15
+ "login",
16
+ "password",
17
+ "oauth",
18
+ "credential",
19
+ "permission",
20
+ ],
21
+ "db": [
22
+ "sql",
23
+ "query",
24
+ "model",
25
+ "orm",
26
+ "database",
27
+ "migrate",
28
+ "schema",
29
+ "prisma",
30
+ "drizzle",
31
+ "sequelize",
32
+ ],
33
+ "cache": ["redis", "cache", "memcache", "ttl", "lru"],
34
+ "queue": ["bull", "celery", "rabbitmq", "kafka", "sqs", "queue", "worker", "job"],
35
+ "email": ["email", "smtp", "sendgrid", "resend", "mailgun", "ses"],
36
+ "payment": ["stripe", "payment", "billing", "invoice", "checkout", "charge"],
37
+ "upload": ["upload", "multer", "s3", "storage", "bucket", "blob"],
38
+ "ai": ["openai", "anthropic", "claude", "llm", "embedding", "gpt", "gemini"],
39
+ "api": ["api", "endpoint", "route", "rest", "graphql", "grpc", "websocket"],
40
+ "test": ["test", "spec", "mock", "fixture", "assert", "pytest"],
41
+ }
42
+
43
+
44
+ def infer_file_tags(
45
+ file_path: str,
46
+ imports: list[str],
47
+ symbols: list[dict],
48
+ ) -> list[str]:
49
+ """Infer domain tags for a file from its imports, symbol names, and path.
50
+
51
+ Returns sorted list of unique tags.
52
+ """
53
+ tags: set[str] = set()
54
+
55
+ # Combine all text signals
56
+ signals = []
57
+ signals.append(file_path.lower())
58
+ signals.extend(imp.lower() for imp in imports)
59
+ signals.extend(sym.get("name", "").lower() for sym in symbols)
60
+ signals.extend(sym.get("signature", "").lower() for sym in symbols if sym.get("signature"))
61
+
62
+ combined = " ".join(signals)
63
+
64
+ for tag, keywords in TAG_PATTERNS.items():
65
+ for keyword in keywords:
66
+ if keyword in combined:
67
+ tags.add(tag)
68
+ break
69
+
70
+ return sorted(tags)
71
+
72
+
73
+ def infer_project_tags(code_map: dict) -> dict[str, list[str]]:
74
+ """Infer domain tags for all files in a code_map.
75
+
76
+ Returns {file_path: [tags]}.
77
+ """
78
+ result = {}
79
+ for file_path, file_info in code_map.get("files", {}).items():
80
+ tags = infer_file_tags(
81
+ file_path,
82
+ file_info.get("imports", []),
83
+ file_info.get("symbols", []),
84
+ )
85
+ if tags:
86
+ result[file_path] = tags
87
+ return result