commiter-cli 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. commiter/__init__.py +3 -0
  2. commiter/adapters/__init__.py +0 -0
  3. commiter/adapters/base.py +96 -0
  4. commiter/adapters/django_rest.py +247 -0
  5. commiter/adapters/express.py +204 -0
  6. commiter/adapters/fastapi.py +170 -0
  7. commiter/adapters/flask.py +169 -0
  8. commiter/adapters/nextjs.py +180 -0
  9. commiter/adapters/prisma.py +76 -0
  10. commiter/adapters/raw_sql.py +191 -0
  11. commiter/adapters/react.py +129 -0
  12. commiter/adapters/sqlalchemy.py +99 -0
  13. commiter/adapters/supabase.py +68 -0
  14. commiter/auth.py +130 -0
  15. commiter/cli.py +667 -0
  16. commiter/correlator.py +208 -0
  17. commiter/extractors/__init__.py +0 -0
  18. commiter/extractors/api_calls.py +91 -0
  19. commiter/extractors/api_endpoints.py +354 -0
  20. commiter/extractors/backend_files.py +33 -0
  21. commiter/extractors/base.py +40 -0
  22. commiter/extractors/db_operations.py +69 -0
  23. commiter/extractors/dependencies.py +219 -0
  24. commiter/generic_resolver.py +204 -0
  25. commiter/handler_index.py +97 -0
  26. commiter/lib.py +63 -0
  27. commiter/middleware_index.py +350 -0
  28. commiter/models.py +117 -0
  29. commiter/parser.py +1283 -0
  30. commiter/prefix_index.py +211 -0
  31. commiter/report/__init__.py +0 -0
  32. commiter/report/ai.py +120 -0
  33. commiter/report/api_guide.py +217 -0
  34. commiter/report/architecture.py +930 -0
  35. commiter/report/console.py +254 -0
  36. commiter/report/json_output.py +122 -0
  37. commiter/report/markdown.py +163 -0
  38. commiter/scanner.py +383 -0
  39. commiter/type_index.py +304 -0
  40. commiter/uploader.py +46 -0
  41. commiter/utils/__init__.py +0 -0
  42. commiter/utils/env_reader.py +78 -0
  43. commiter/utils/file_classifier.py +187 -0
  44. commiter/utils/path_helpers.py +73 -0
  45. commiter/utils/tsconfig_resolver.py +281 -0
  46. commiter/wrapper_index.py +288 -0
  47. commiter_cli-0.3.0.dist-info/METADATA +14 -0
  48. commiter_cli-0.3.0.dist-info/RECORD +96 -0
  49. commiter_cli-0.3.0.dist-info/WHEEL +5 -0
  50. commiter_cli-0.3.0.dist-info/entry_points.txt +2 -0
  51. commiter_cli-0.3.0.dist-info/top_level.txt +2 -0
  52. tests/__init__.py +0 -0
  53. tests/fixtures/arch_backend/app.py +22 -0
  54. tests/fixtures/arch_backend/middleware/__init__.py +0 -0
  55. tests/fixtures/arch_backend/middleware/rate_limit.py +4 -0
  56. tests/fixtures/arch_backend/routes/__init__.py +0 -0
  57. tests/fixtures/arch_backend/routes/analytics.py +20 -0
  58. tests/fixtures/arch_backend/routes/auth.py +29 -0
  59. tests/fixtures/arch_backend/routes/projects.py +60 -0
  60. tests/fixtures/arch_backend/routes/users.py +55 -0
  61. tests/fixtures/arch_monorepo/apps/api/app.py +30 -0
  62. tests/fixtures/arch_monorepo/apps/api/middleware/__init__.py +0 -0
  63. tests/fixtures/arch_monorepo/apps/api/middleware/auth.py +17 -0
  64. tests/fixtures/arch_monorepo/apps/api/middleware/rate_limit.py +10 -0
  65. tests/fixtures/arch_monorepo/apps/api/routes/__init__.py +0 -0
  66. tests/fixtures/arch_monorepo/apps/api/routes/auth.py +46 -0
  67. tests/fixtures/arch_monorepo/apps/api/routes/invites.py +30 -0
  68. tests/fixtures/arch_monorepo/apps/api/routes/notifications.py +25 -0
  69. tests/fixtures/arch_monorepo/apps/api/routes/projects.py +80 -0
  70. tests/fixtures/arch_monorepo/apps/api/routes/tasks.py +91 -0
  71. tests/fixtures/arch_monorepo/apps/api/routes/users.py +48 -0
  72. tests/fixtures/arch_monorepo/apps/api/services/__init__.py +0 -0
  73. tests/fixtures/arch_monorepo/apps/api/services/email.py +11 -0
  74. tests/fixtures/backend_b/app.py +17 -0
  75. tests/fixtures/fastapi_app/app.py +48 -0
  76. tests/fixtures/fastapi_crossfile/routes.py +18 -0
  77. tests/fixtures/fastapi_crossfile/schemas.py +21 -0
  78. tests/fixtures/flask_app/app.py +33 -0
  79. tests/fixtures/flask_blueprint/app.py +7 -0
  80. tests/fixtures/flask_blueprint/routes/items.py +13 -0
  81. tests/fixtures/flask_blueprint/routes/users.py +20 -0
  82. tests/fixtures/middleware_test_flask/routes/public.py +8 -0
  83. tests/fixtures/middleware_test_flask/routes/users.py +26 -0
  84. tests/fixtures/python_deep_imports/app/__init__.py +0 -0
  85. tests/fixtures/python_deep_imports/app/api/__init__.py +0 -0
  86. tests/fixtures/python_deep_imports/app/api/health.py +11 -0
  87. tests/fixtures/python_deep_imports/app/api/v1/__init__.py +0 -0
  88. tests/fixtures/python_deep_imports/app/api/v1/items.py +18 -0
  89. tests/fixtures/python_deep_imports/app/api/v1/users.py +27 -0
  90. tests/fixtures/python_deep_imports/app/schemas/__init__.py +0 -0
  91. tests/fixtures/python_deep_imports/app/schemas/item.py +13 -0
  92. tests/fixtures/python_deep_imports/app/schemas/user.py +15 -0
  93. tests/fixtures/python_deep_imports/app/shared/__init__.py +0 -0
  94. tests/fixtures/python_deep_imports/app/shared/models.py +7 -0
  95. tests/fixtures/raw_sql_test/app.py +54 -0
  96. tests/test_architecture.py +757 -0
@@ -0,0 +1,930 @@
1
+ """Generate architecture graph JSON for the interactive frontend diagram."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import os
8
+ import re
9
+ import subprocess
10
+ from pathlib import Path
11
+
12
+ from commiter.models import (
13
+ APICall,
14
+ APIEndpoint,
15
+ DBOperation,
16
+ FileClassification,
17
+ FileRole,
18
+ RepoDocumentation,
19
+ )
20
+
21
+ # Avoid circular import — ScanResult is only used for type hints.
22
+ from typing import TYPE_CHECKING
23
+ if TYPE_CHECKING:
24
+ from commiter.scanner import ScanResult
25
+ from commiter.middleware_index import MiddlewareIndex
26
+ from commiter.type_index import TypeIndex
27
+
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Public API
31
+ # ---------------------------------------------------------------------------
32
+
33
+ def _get_repo_full_name(repo_root: str) -> str | None:
34
+ """Extract the GitHub owner/repo from the git remote URL.
35
+
36
+ Handles both SSH (git@github.com:owner/repo.git) and HTTPS
37
+ (https://github.com/owner/repo.git) formats.
38
+ Returns None if not a git repo or no remote configured.
39
+ """
40
+ try:
41
+ result = subprocess.run(
42
+ ["git", "-C", repo_root, "remote", "get-url", "origin"],
43
+ capture_output=True, text=True, timeout=5,
44
+ )
45
+ if result.returncode != 0:
46
+ return None
47
+ url = result.stdout.strip()
48
+
49
+ # SSH: git@github.com:owner/repo.git
50
+ m = re.match(r"git@[^:]+:(.+?)(?:\.git)?$", url)
51
+ if m:
52
+ return m.group(1)
53
+
54
+ # HTTPS: https://github.com/owner/repo.git
55
+ m = re.match(r"https?://[^/]+/(.+?)(?:\.git)?$", url)
56
+ if m:
57
+ return m.group(1)
58
+
59
+ return None
60
+ except Exception:
61
+ return None
62
+
63
+
64
+ def generate_architecture(scan_results: list[ScanResult]) -> str:
65
+ """Build the full architecture payload and return it as a JSON string."""
66
+ all_nodes: list[dict] = []
67
+ all_edges: list[dict] = []
68
+ all_file_trees: list[dict] = []
69
+ all_analysis: dict[str, dict] = {}
70
+
71
+ multi_repo = len(scan_results) > 1
72
+
73
+ for sr in scan_results:
74
+ prefix = sr.doc.repo_name if multi_repo else ""
75
+ nodes = _build_nodes(sr.doc, sr.file_list, sr.doc.repo_path, prefix)
76
+ all_nodes.extend(nodes)
77
+ all_file_trees.extend(_build_file_tree(sr.file_list, sr.doc.repo_path))
78
+
79
+ # Edges need the full node list to resolve cross-repo references
80
+ all_edges = _build_edges(
81
+ [sr.doc for sr in scan_results],
82
+ all_nodes,
83
+ scan_results,
84
+ )
85
+
86
+ # Layout is handled by the frontend (dagre) — CLI sends x:0, y:0
87
+ # _compute_layout(all_nodes)
88
+
89
+ all_analysis = _build_node_analysis(all_nodes, scan_results, all_edges)
90
+
91
+ node_hashes = _compute_node_hashes(all_nodes, all_analysis)
92
+
93
+ # Collect git remote full names for commit history correlation
94
+ repo_full_names = []
95
+ for sr in scan_results:
96
+ full_name = _get_repo_full_name(sr.doc.repo_path)
97
+ if full_name:
98
+ repo_full_names.append(full_name)
99
+
100
+ return json.dumps(
101
+ {
102
+ "nodes": all_nodes,
103
+ "edges": all_edges,
104
+ "fileTree": all_file_trees,
105
+ "nodeAnalysis": all_analysis,
106
+ "nodeHashes": node_hashes,
107
+ "repoFullNames": repo_full_names,
108
+ },
109
+ indent=2,
110
+ default=str,
111
+ )
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # Node building
116
+ # ---------------------------------------------------------------------------
117
+
118
+ _PAGE_PATTERNS = re.compile(
119
+ r"(^|/)page\.(tsx?|jsx?)$"
120
+ r"|/pages/(?!api/)[^/]+\.(tsx?|jsx?)$"
121
+ )
122
+ _PROVIDER_PATTERN = re.compile(r"provider", re.IGNORECASE)
123
+ _API_CLIENT_DIRS = {"lib/api", "services/api", "utils/api", "lib/services"}
124
+ _BACKEND_ROUTE_DIRS = {"routes", "controllers", "handlers", "views", "endpoints", "api"}
125
+ _DB_CLIENT_PATTERNS = re.compile(r"supabase|prisma|database|db", re.IGNORECASE)
126
+ _INFRA_FILES = {"Dockerfile", "docker-compose.yml", "docker-compose.yaml", ".env", "vercel.json", "fly.toml"}
127
+
128
+ # Files that get a node on the canvas but should not be connected with edges.
129
+ # Useful for context files (env vars, config) that the AI chat can reference
130
+ # but don't represent meaningful architectural connections.
131
+ ISOLATED_NODE_FILES = {".env", ".env.local", ".env.production", ".env.development"}
132
+
133
+
134
+ def _slugify(text: str) -> str:
135
+ """Convert a label to a URL-safe node ID slug."""
136
+ s = re.sub(r"[^a-z0-9]+", "-", text.lower()).strip("-")
137
+ return s or "node"
138
+
139
+
140
+ def _label_from_path(rel_path: str) -> str:
141
+ """Derive a human-friendly label from a relative file path."""
142
+ p = Path(rel_path)
143
+ # For page.tsx files use the parent directory name
144
+ if re.match(r"^page\.", p.name) or re.match(r"^route\.", p.name):
145
+ return p.parent.name.replace("-", " ").replace("_", " ").title()
146
+ return p.stem.replace("-", " ").replace("_", " ").title()
147
+
148
+
149
+ def _classify_node(fc: FileClassification, rel_path: str) -> tuple[str, str] | None:
150
+ """Return (node_type, category) for a file, or None to skip it."""
151
+ role = fc.role
152
+
153
+ # Skip non-meaningful files
154
+ if Path(rel_path).name == "__init__.py":
155
+ return None
156
+ if role in (FileRole.TEST, FileRole.MIGRATION):
157
+ return None
158
+ if role == FileRole.CONFIG:
159
+ if Path(rel_path).name in _INFRA_FILES:
160
+ return ("service", "infra")
161
+ return None
162
+
163
+ rp = rel_path.replace("\\", "/")
164
+
165
+ # Frontend pages
166
+ if _PAGE_PATTERNS.search(rp):
167
+ return ("page", "frontend")
168
+
169
+ # Providers
170
+ if _PROVIDER_PATTERN.search(rp) and role == FileRole.FRONTEND:
171
+ return ("provider", "frontend")
172
+
173
+ # Frontend components
174
+ if role == FileRole.FRONTEND:
175
+ return ("component", "frontend")
176
+
177
+ # API client / wrapper layer (JS/TS files in lib/api etc.)
178
+ for api_dir in _API_CLIENT_DIRS:
179
+ if api_dir in rp and fc.language in ("javascript", "typescript", "tsx"):
180
+ return ("api", "backend")
181
+
182
+ # Backend route files → will be grouped into one service node
183
+ if role == FileRole.BACKEND:
184
+ return ("service", "backend")
185
+
186
+ # DB client files
187
+ if _DB_CLIENT_PATTERNS.search(rp):
188
+ return ("database", "data")
189
+
190
+ # Files with DB operations but no other classification
191
+ return None
192
+
193
+
194
+ def _build_nodes(
195
+ doc: RepoDocumentation,
196
+ file_list: list[str],
197
+ repo_root: str,
198
+ repo_prefix: str,
199
+ ) -> list[dict]:
200
+ """Assemble architecture nodes using smart grouping."""
201
+ nodes: list[dict] = []
202
+ used_ids: set[str] = set()
203
+
204
+ # Map file_path -> classification for quick lookup
205
+ fc_map: dict[str, FileClassification] = {
206
+ fc.file_path: fc for fc in doc.file_classifications
207
+ }
208
+
209
+ # Files that have endpoints are backend regardless of classifier result
210
+ files_with_endpoints: set[str] = {ep.file_path for ep in doc.endpoints}
211
+
212
+ # Collect backend route files to group later
213
+ backend_route_files: list[str] = []
214
+
215
+ # Collect DB operations to create database nodes
216
+ db_orms: dict[str, list[str]] = {} # orm_library -> list of file paths
217
+
218
+ for abs_path in file_list:
219
+ rel = os.path.relpath(abs_path, repo_root).replace("\\", "/")
220
+ fc = fc_map.get(abs_path)
221
+ if fc is None:
222
+ continue
223
+
224
+ classification = _classify_node(fc, rel)
225
+
226
+ # Fallback: files with endpoints are always backend services
227
+ if classification is None and abs_path in files_with_endpoints:
228
+ classification = ("service", "backend")
229
+
230
+ if classification is None:
231
+ continue
232
+
233
+ node_type, category = classification
234
+
235
+ # Backend route files are grouped — collect and handle after the loop
236
+ if node_type == "service" and category == "backend":
237
+ backend_route_files.append(abs_path)
238
+ continue
239
+
240
+ label = _label_from_path(rel)
241
+ node_id = _make_id(node_type, label, repo_prefix, used_ids)
242
+
243
+ node_dict = {
244
+ "id": node_id,
245
+ "label": label,
246
+ "type": node_type,
247
+ "category": category,
248
+ "x": 0,
249
+ "y": 0,
250
+ "description": "",
251
+ "files": [rel],
252
+ }
253
+ if Path(rel).name in ISOLATED_NODE_FILES:
254
+ node_dict["isolated"] = True
255
+ nodes.append(node_dict)
256
+
257
+ # --- Grouped backend service node ---
258
+ if backend_route_files:
259
+ # Determine a label from the primary framework
260
+ fw = doc.frameworks[0] if doc.frameworks else "Backend"
261
+ label = f"{fw.title()} Backend"
262
+ node_id = _make_id("service", label, repo_prefix, used_ids)
263
+ nodes.append({
264
+ "id": node_id,
265
+ "label": label,
266
+ "type": "service",
267
+ "category": "backend",
268
+ "x": 0,
269
+ "y": 0,
270
+ "description": "",
271
+ "files": [
272
+ os.path.relpath(f, repo_root).replace("\\", "/")
273
+ for f in backend_route_files
274
+ ],
275
+ })
276
+
277
+ # --- Database nodes (one per ORM detected) ---
278
+ for op in doc.db_operations:
279
+ db_orms.setdefault(op.orm_library, []).append(op.file_path)
280
+
281
+ for orm, file_paths in db_orms.items():
282
+ label = orm.title() if orm != "raw_sql" else "Database"
283
+ node_id = _make_id("database", label, repo_prefix, used_ids)
284
+ rel_files = sorted({
285
+ os.path.relpath(f, repo_root).replace("\\", "/") for f in file_paths
286
+ })
287
+ nodes.append({
288
+ "id": node_id,
289
+ "label": label,
290
+ "type": "database",
291
+ "category": "data",
292
+ "x": 0,
293
+ "y": 0,
294
+ "description": "",
295
+ "files": rel_files,
296
+ })
297
+
298
+ return nodes
299
+
300
+
301
+ def _make_id(node_type: str, label: str, repo_prefix: str, used: set[str]) -> str:
302
+ """Generate a unique node ID."""
303
+ base = f"{node_type}-{_slugify(label)}"
304
+ if repo_prefix:
305
+ base = f"{_slugify(repo_prefix)}-{base}"
306
+ node_id = base
307
+ counter = 2
308
+ while node_id in used:
309
+ node_id = f"{base}-{counter}"
310
+ counter += 1
311
+ used.add(node_id)
312
+ return node_id
313
+
314
+
315
+ # ---------------------------------------------------------------------------
316
+ # Edge building
317
+ # ---------------------------------------------------------------------------
318
+
319
+ def _find_node_for_file(nodes: list[dict], rel_path: str) -> dict | None:
320
+ """Find the node that owns a given relative file path."""
321
+ for node in nodes:
322
+ if rel_path in node["files"]:
323
+ return node
324
+ return None
325
+
326
+
327
+ def _find_node_for_abs_file(nodes: list[dict], abs_path: str, repo_root: str) -> dict | None:
328
+ """Find the node that owns a given absolute file path."""
329
+ rel = os.path.relpath(abs_path, repo_root).replace("\\", "/")
330
+ return _find_node_for_file(nodes, rel)
331
+
332
+
333
+ def _find_api_gateway(call: APICall, nodes: list[dict]) -> dict | None:
334
+ """If a call was traced through a wrapper in an api-type node, return that node.
335
+
336
+ Detects patterns like 'apiClient() in client.ts:5' from the traced_from field,
337
+ then finds the api-type node whose files contain that wrapper file.
338
+ """
339
+ traced = call.traced_from
340
+ if not traced or " in " not in traced:
341
+ return None
342
+ file_part = traced.split(" in ", 1)[1].split(":")[0] # "client.ts"
343
+ for node in nodes:
344
+ if node["type"] != "api":
345
+ continue
346
+ for f in node["files"]:
347
+ if f.endswith(file_part) or f.endswith("/" + file_part):
348
+ return node
349
+ return None
350
+
351
+
352
+ def _build_edges(
353
+ docs: list[RepoDocumentation],
354
+ nodes: list[dict],
355
+ scan_results: list[ScanResult],
356
+ ) -> list[dict]:
357
+ """Build edges from extracted relationships."""
358
+ edges: list[dict] = []
359
+ edge_counter = 0
360
+ seen_edges: set[tuple[str, str, str]] = set() # (from, to, type) dedup
361
+ isolated_ids = {n["id"] for n in nodes if n.get("isolated")}
362
+
363
+ def _clean_edge_label(label: str) -> str:
364
+ """Strip protocol, host, and port from URL labels to keep just the path."""
365
+ if not label:
366
+ return label
367
+ cleaned = re.sub(r"^https?://[^/]+", "", label)
368
+ # Remove leading env var placeholders like :NEXT_PUBLIC_API_URL
369
+ cleaned = re.sub(r"^:[A-Z_]+", "", cleaned)
370
+ return cleaned.strip() or label
371
+
372
+ def _add_edge(from_id: str, to_id: str, edge_type: str, label: str = "") -> None:
373
+ nonlocal edge_counter
374
+ if from_id in isolated_ids or to_id in isolated_ids:
375
+ return
376
+ key = (from_id, to_id, edge_type)
377
+ if key in seen_edges or from_id == to_id:
378
+ return
379
+ seen_edges.add(key)
380
+ edge_counter += 1
381
+ label = _clean_edge_label(label)
382
+ edges.append({
383
+ "id": f"e-{edge_counter}",
384
+ "from": from_id,
385
+ "to": to_id,
386
+ "type": edge_type,
387
+ **({"label": label} if label else {}),
388
+ })
389
+
390
+ # Build a lookup: endpoint route -> node_id (for correlating calls)
391
+ ep_route_to_node: dict[str, str] = {}
392
+ for sr in scan_results:
393
+ doc = sr.doc
394
+ for ep in doc.endpoints:
395
+ rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
396
+ node = _find_node_for_file(nodes, rel)
397
+ if node:
398
+ key = f"{ep.http_method.upper()} {ep.route_pattern}"
399
+ ep_route_to_node[key] = node["id"]
400
+
401
+ # --- API edges: frontend call → backend endpoint ---
402
+ for sr in scan_results:
403
+ doc = sr.doc
404
+ for call in doc.api_calls:
405
+ rel = os.path.relpath(call.file_path, doc.repo_path).replace("\\", "/")
406
+ source_node = _find_node_for_file(nodes, rel)
407
+ if not source_node:
408
+ continue
409
+
410
+ # Try to find matching endpoint via service_relationships
411
+ target_node_id = None
412
+ for srel in doc.service_relationships:
413
+ if srel.source_file.startswith(call.file_path) and srel.connection_type == "api_call":
414
+ target_key = srel.target_endpoint
415
+ target_node_id = ep_route_to_node.get(target_key)
416
+ break
417
+
418
+ # Fallback: try matching by URL pattern within same repo
419
+ if not target_node_id:
420
+ for ep in doc.endpoints:
421
+ if _url_matches(call.url_pattern, ep.route_pattern):
422
+ ep_rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
423
+ target = _find_node_for_file(nodes, ep_rel)
424
+ if target:
425
+ target_node_id = target["id"]
426
+ break
427
+
428
+ # Also try cross-repo endpoints
429
+ if not target_node_id:
430
+ for other_sr in scan_results:
431
+ if other_sr is sr:
432
+ continue
433
+ for ep in other_sr.doc.endpoints:
434
+ if _url_matches(call.url_pattern, ep.route_pattern):
435
+ ep_rel = os.path.relpath(ep.file_path, other_sr.doc.repo_path).replace("\\", "/")
436
+ target = _find_node_for_file(nodes, ep_rel)
437
+ if target:
438
+ target_node_id = target["id"]
439
+ break
440
+ if target_node_id:
441
+ break
442
+
443
+ if target_node_id:
444
+ # Route through API gateway node if the call was traced through a wrapper
445
+ gateway = _find_api_gateway(call, nodes)
446
+ if gateway and gateway["id"] != source_node["id"]:
447
+ _add_edge(source_node["id"], gateway["id"], "api", call.url_pattern)
448
+ _add_edge(gateway["id"], target_node_id, "api", "")
449
+ else:
450
+ _add_edge(source_node["id"], target_node_id, "api", call.url_pattern)
451
+
452
+ # --- Data edges: node with DB operations → database node ---
453
+ db_nodes = [n for n in nodes if n["type"] == "database"]
454
+ for sr in scan_results:
455
+ doc = sr.doc
456
+ for op in doc.db_operations:
457
+ rel = os.path.relpath(op.file_path, doc.repo_path).replace("\\", "/")
458
+ source_node = _find_node_for_file(nodes, rel)
459
+ if not source_node:
460
+ continue
461
+ # Find the database node for this ORM
462
+ for db_node in db_nodes:
463
+ if op.orm_library.lower() in db_node["label"].lower() or db_node["label"] == "Database":
464
+ _add_edge(source_node["id"], db_node["id"], "data", op.table_name)
465
+ break
466
+
467
+ # --- Auth edges: service node → auth provider (heuristic) ---
468
+ auth_provider = None
469
+ for n in nodes:
470
+ if n["type"] == "provider" and re.search(r"auth", n["label"], re.IGNORECASE):
471
+ auth_provider = n
472
+ break
473
+
474
+ if auth_provider:
475
+ for sr in scan_results:
476
+ for ep in sr.doc.endpoints:
477
+ if ep.auth_decorators or any(
478
+ re.search(r"auth|jwt|login|protect", mw, re.IGNORECASE)
479
+ for mw in ep.middleware
480
+ ):
481
+ rel = os.path.relpath(ep.file_path, sr.doc.repo_path).replace("\\", "/")
482
+ source_node = _find_node_for_file(nodes, rel)
483
+ if source_node:
484
+ _add_edge(source_node["id"], auth_provider["id"], "auth", "requires auth")
485
+
486
+ # --- Dependency / render edges: page/component imports → other nodes ---
487
+ for sr in scan_results:
488
+ doc = sr.doc
489
+ alias_resolver = sr.alias_resolver
490
+ for node in nodes:
491
+ for rel_file in node["files"]:
492
+ abs_path = os.path.join(doc.repo_path, rel_file)
493
+ if not os.path.isfile(abs_path):
494
+ continue
495
+ imports = _extract_imports_simple(abs_path)
496
+ for imp_path in imports:
497
+ target = _resolve_import_to_node(
498
+ imp_path, rel_file, doc.repo_path, nodes,
499
+ alias_resolver=alias_resolver, caller_abs=abs_path,
500
+ )
501
+ if target and target["id"] != node["id"]:
502
+ edge_type = "render" if node["type"] in ("page", "component") and target["type"] in ("component", "provider") else "dependency"
503
+ _add_edge(node["id"], target["id"], edge_type, "")
504
+
505
+ return edges
506
+
507
+
508
+ def _url_matches(call_url: str, route_pattern: str) -> bool:
509
+ """Quick check if a frontend URL could match a backend route."""
510
+ # Normalize both to comparable segments
511
+ call_clean = re.sub(r"\$\{[^}]+\}", ":param", call_url)
512
+ call_clean = re.sub(r"^https?://[^/]+", "", call_clean).strip("/")
513
+ # Normalize :varName params (from template literal resolution) to :param
514
+ call_clean = re.sub(r":(\w+)", ":param", call_clean)
515
+
516
+ route_clean = re.sub(r"<(?:\w+:)?(\w+)>", r":param", route_pattern)
517
+ route_clean = re.sub(r"\{(\w+)\}", r":param", route_clean)
518
+ route_clean = re.sub(r"\[(\w+)\]", r":param", route_clean).strip("/")
519
+
520
+ if not call_clean or not route_clean:
521
+ return False
522
+
523
+ # Check if one ends with the other (frontend may include /api prefix)
524
+ return call_clean.endswith(route_clean) or route_clean.endswith(call_clean) or call_clean == route_clean
525
+
526
+
527
+ def _extract_imports_simple(file_path: str) -> list[str]:
528
+ """Extract import paths from a file using simple regex (no AST needed)."""
529
+ try:
530
+ with open(file_path, "r", encoding="utf-8", errors="replace") as f:
531
+ source = f.read(50_000) # cap read size
532
+ except OSError:
533
+ return []
534
+
535
+ imports = []
536
+ # JS/TS: import ... from "path"
537
+ for m in re.finditer(r'''(?:import|from)\s+.*?["']([^"']+)["']''', source):
538
+ imports.append(m.group(1))
539
+ # Python: from path import ... / import path
540
+ for m in re.finditer(r"^(?:from\s+([\w.]+)|import\s+([\w.]+))", source, re.MULTILINE):
541
+ imports.append(m.group(1) or m.group(2))
542
+ return imports
543
+
544
+
545
+ def _resolve_import_to_node(
546
+ import_path: str,
547
+ source_rel: str,
548
+ repo_root: str,
549
+ nodes: list[dict],
550
+ alias_resolver: object | None = None,
551
+ caller_abs: str = "",
552
+ ) -> dict | None:
553
+ """Try to resolve an import path to a node by matching against node file lists."""
554
+ # Try tsconfig alias resolution first (handles @/, ~/, etc.)
555
+ if alias_resolver and not import_path.startswith("."):
556
+ resolved_abs = alias_resolver.resolve(import_path, caller_abs)
557
+ if resolved_abs:
558
+ resolved_rel = os.path.relpath(resolved_abs, repo_root).replace("\\", "/")
559
+ for node in nodes:
560
+ for nf in node["files"]:
561
+ if nf.replace("\\", "/") == resolved_rel:
562
+ return node
563
+ return None
564
+
565
+ if not import_path.startswith("."):
566
+ # Non-relative imports without alias resolver — skip
567
+ return None
568
+
569
+ source_dir = str(Path(source_rel).parent)
570
+ resolved = os.path.normpath(os.path.join(source_dir, import_path)).replace("\\", "/")
571
+
572
+ # Try common extensions
573
+ candidates = [resolved]
574
+ for ext in (".ts", ".tsx", ".js", ".jsx", "/index.ts", "/index.tsx", "/index.js"):
575
+ candidates.append(resolved + ext)
576
+
577
+ for node in nodes:
578
+ for node_file in node["files"]:
579
+ nf = node_file.replace("\\", "/")
580
+ for candidate in candidates:
581
+ if nf == candidate or nf.startswith(candidate + "/"):
582
+ return node
583
+ return None
584
+
585
+
586
+ # ---------------------------------------------------------------------------
587
+ # Per-node hashing (for incremental cache)
588
+ # ---------------------------------------------------------------------------
589
+
590
+ def _compute_node_hashes(
591
+ nodes: list[dict],
592
+ node_analysis: dict[str, dict],
593
+ ) -> dict[str, str]:
594
+ """Compute a SHA-256 hash per node based on its structural data.
595
+
596
+ The hash covers the node's identity, files, and analysis content (endpoints,
597
+ rules, data sources) but NOT descriptions — those are what the AI generates.
598
+ A changed hash means the node needs re-enrichment.
599
+ """
600
+ hashes: dict[str, str] = {}
601
+ for node in nodes:
602
+ node_data: dict = {
603
+ "id": node["id"],
604
+ "type": node["type"],
605
+ "category": node["category"],
606
+ "files": sorted(node["files"]),
607
+ }
608
+ analysis = node_analysis.get(node["id"], {})
609
+ node_data["endpoints"] = [
610
+ {"method": ep["method"], "path": ep["path"], "category": ep.get("category", "")}
611
+ for ep in analysis.get("endpoints", [])
612
+ ]
613
+ node_data["rules"] = [r["name"] for r in analysis.get("rules", [])]
614
+ node_data["dataUsed"] = [d["source"] for d in analysis.get("dataUsed", [])]
615
+
616
+ content = json.dumps(node_data, sort_keys=True)
617
+ hashes[node["id"]] = hashlib.sha256(content.encode()).hexdigest()
618
+ return hashes
619
+
620
+
621
+ # ---------------------------------------------------------------------------
622
+ # Layout
623
+ # ---------------------------------------------------------------------------
624
+
625
+ _LAYER_ORDER = [
626
+ ("frontend", "page"),
627
+ ("frontend", "component"),
628
+ ("frontend", "provider"),
629
+ ("backend", "api"),
630
+ ("backend", "service"),
631
+ ("infra", "service"),
632
+ ("data", "database"),
633
+ ]
634
+
635
+ _LAYER_Y = {
636
+ 0: 80,
637
+ 1: 280,
638
+ 2: 280, # components + providers share a row
639
+ 3: 480,
640
+ 4: 680,
641
+ 5: 680, # infra shares row with backend services
642
+ 6: 880,
643
+ }
644
+
645
+
646
+ def _compute_layout(nodes: list[dict]) -> None:
647
+ """Assign x/y coordinates using a simple layered layout."""
648
+ layers: dict[int, list[dict]] = {}
649
+ for node in nodes:
650
+ key = (node["category"], node["type"])
651
+ layer_idx = None
652
+ for i, (cat, ntype) in enumerate(_LAYER_ORDER):
653
+ if key == (cat, ntype):
654
+ layer_idx = i
655
+ break
656
+ if layer_idx is None:
657
+ layer_idx = 4 # default to backend service layer
658
+
659
+ layers.setdefault(layer_idx, []).append(node)
660
+
661
+ for layer_idx, layer_nodes in layers.items():
662
+ y = _LAYER_Y.get(layer_idx, 400)
663
+ count = len(layer_nodes)
664
+ spacing = 280 if count <= 5 else max(160, 1400 // count)
665
+ start_x = 120
666
+
667
+ for i, node in enumerate(layer_nodes):
668
+ node["x"] = start_x + i * spacing
669
+ node["y"] = y
670
+
671
+
672
+ # ---------------------------------------------------------------------------
673
+ # File tree
674
+ # ---------------------------------------------------------------------------
675
+
676
+ def _build_file_tree(file_list: list[str], repo_root: str) -> list[dict]:
677
+ """Convert a flat file list into a nested FileTreeNode structure."""
678
+ tree: dict = {}
679
+
680
+ for abs_path in file_list:
681
+ rel = os.path.relpath(abs_path, repo_root).replace("\\", "/")
682
+ parts = rel.split("/")
683
+ current = tree
684
+ for i, part in enumerate(parts):
685
+ if part not in current:
686
+ current[part] = {} if i < len(parts) - 1 else None
687
+ if current[part] is not None:
688
+ current = current[part]
689
+
690
+ return _dict_to_tree(tree, "")
691
+
692
+
693
+ def _dict_to_tree(d: dict, prefix: str) -> list[dict]:
694
+ """Recursively convert a nested dict into FileTreeNode list."""
695
+ result = []
696
+ for name in sorted(d.keys()):
697
+ path = f"{prefix}/{name}".lstrip("/") if prefix else name
698
+ if d[name] is None:
699
+ # File
700
+ result.append({"name": name, "type": "file", "path": path})
701
+ else:
702
+ # Folder
703
+ children = _dict_to_tree(d[name], path)
704
+ result.append({"name": name, "type": "folder", "path": path, "children": children})
705
+ return result
706
+
707
+
708
+ # ---------------------------------------------------------------------------
709
+ # Node analysis
710
+ # ---------------------------------------------------------------------------
711
+
712
+ def _build_node_analysis(
713
+ nodes: list[dict],
714
+ scan_results: list[ScanResult],
715
+ edges: list[dict],
716
+ ) -> dict[str, dict]:
717
+ """Assemble NodeAnalysis data for each node."""
718
+ analysis: dict[str, dict] = {}
719
+
720
+ # Build reverse lookup: node_id -> list of edges targeting it
721
+ edges_by_source: dict[str, list[dict]] = {}
722
+ for e in edges:
723
+ edges_by_source.setdefault(e["from"], []).append(e)
724
+
725
+ # Flatten all docs and build lookup helpers
726
+ all_endpoints: list[tuple[RepoDocumentation, ScanResult, APIEndpoint]] = []
727
+ all_calls: list[tuple[RepoDocumentation, APICall]] = []
728
+ all_db_ops: list[tuple[RepoDocumentation, DBOperation]] = []
729
+
730
+ for sr in scan_results:
731
+ doc = sr.doc
732
+ for ep in doc.endpoints:
733
+ all_endpoints.append((doc, sr, ep))
734
+ for call in doc.api_calls:
735
+ all_calls.append((doc, call))
736
+ for op in doc.db_operations:
737
+ all_db_ops.append((doc, op))
738
+
739
+ # Build lookup: endpoint route_pattern -> list of frontend node labels that call it
740
+ ep_used_by: dict[str, list[str]] = {}
741
+ for doc, call in all_calls:
742
+ call_rel = os.path.relpath(call.file_path, doc.repo_path).replace("\\", "/")
743
+ caller_node = _find_node_for_file(nodes, call_rel)
744
+ if not caller_node:
745
+ continue
746
+ for _, _, ep in all_endpoints:
747
+ if _url_matches(call.url_pattern, ep.route_pattern):
748
+ if caller_node["label"] not in ep_used_by.get(ep.route_pattern, []):
749
+ ep_used_by.setdefault(ep.route_pattern, []).append(caller_node["label"])
750
+
751
+ for node in nodes:
752
+ node_files = set(node["files"])
753
+
754
+ # --- dataUsed: API calls made by this node's files ---
755
+ data_used = []
756
+ seen_sources: set[str] = set()
757
+ for doc, call in all_calls:
758
+ call_rel = os.path.relpath(call.file_path, doc.repo_path).replace("\\", "/")
759
+ if call_rel in node_files:
760
+ source = f"{call.http_method} {call.url_pattern} via {call.client_library}"
761
+ if source in seen_sources:
762
+ continue
763
+ seen_sources.add(source)
764
+ url_name = call.url_pattern.rstrip("/").rsplit("/", 1)[-1] or call.url_pattern
765
+ data_used.append({
766
+ "name": url_name.replace("-", " ").replace("_", " ").title(),
767
+ "source": source,
768
+ })
769
+
770
+ # --- endpoints: for service/api nodes ---
771
+ endpoints_data = None
772
+ if node["type"] in ("service", "api"):
773
+ endpoints_data = []
774
+ for doc, sr, ep in all_endpoints:
775
+ ep_rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
776
+ if ep_rel in node_files:
777
+ # Derive category from route path
778
+ category = _category_from_route(ep.route_pattern)
779
+
780
+ # Resolve request/response shapes via type index
781
+ req_shape = _resolve_type_shape(ep.request_body_type, ep.request_body_fields, sr.type_index)
782
+ res_shape = _resolve_type_shape(ep.response_type, ep.response_fields, sr.type_index)
783
+
784
+ # DB operations for this endpoint
785
+ db_ops = []
786
+ for table in ep.db_tables:
787
+ # Find matching operation type
788
+ op_types = set()
789
+ for d, op in all_db_ops:
790
+ if op.table_name == table:
791
+ op_types.add(op.operation_type.upper())
792
+ for op_type in sorted(op_types) or ["SELECT"]:
793
+ db_ops.append({
794
+ "type": op_type,
795
+ "table": table,
796
+ "description": "",
797
+ })
798
+
799
+ used_by = ep_used_by.get(ep.route_pattern, [])
800
+
801
+ ep_dict: dict = {
802
+ "method": ep.http_method.upper(),
803
+ "path": ep.route_pattern,
804
+ "description": "",
805
+ "category": category,
806
+ "usedBy": used_by,
807
+ }
808
+ if req_shape:
809
+ ep_dict["requestShape"] = req_shape
810
+ if res_shape:
811
+ ep_dict["responseShape"] = res_shape
812
+ if db_ops:
813
+ ep_dict["dbOperations"] = db_ops
814
+
815
+ endpoints_data.append(ep_dict)
816
+
817
+ # --- rules: middleware/decorators for service nodes ---
818
+ rules_data = None
819
+ if node["type"] in ("service",) and endpoints_data:
820
+ rules_data = _build_rules(node, scan_results)
821
+
822
+ entry: dict = {
823
+ "userFacing": "",
824
+ "dataUsed": data_used,
825
+ "commits": [],
826
+ "issues": [],
827
+ }
828
+ if endpoints_data:
829
+ entry["endpoints"] = endpoints_data
830
+ if rules_data:
831
+ entry["rules"] = rules_data
832
+
833
+ analysis[node["id"]] = entry
834
+
835
+ return analysis
836
+
837
+
838
+ def _category_from_route(route: str) -> str:
839
+ """Derive a category label from a route pattern's first meaningful segment."""
840
+ parts = [p for p in route.strip("/").split("/") if p and not re.match(r"^(v\d+|api)$", p)]
841
+ if parts:
842
+ return parts[0].replace("-", " ").replace("_", " ").title()
843
+ return "General"
844
+
845
+
846
+ def _resolve_type_shape(
847
+ type_name: str | None,
848
+ flat_fields: list[str],
849
+ type_index: TypeIndex,
850
+ ) -> dict | None:
851
+ """Resolve a type name to a {field: type} dict via the type index."""
852
+ if type_name:
853
+ typedef = type_index.resolve(type_name, None, "")
854
+ if typedef and typedef.fields:
855
+ shape = {}
856
+ for field in typedef.fields:
857
+ field_type = field.type_str
858
+ if field.optional:
859
+ field_type += "?"
860
+ shape[field.name] = field_type
861
+ return shape
862
+
863
+ # Fallback: use flat field list
864
+ if flat_fields:
865
+ return {f.split(":")[0].strip(): f.split(":", 1)[1].strip() if ":" in f else "unknown" for f in flat_fields}
866
+
867
+ return None
868
+
869
+
870
+ def _build_rules(node: dict, scan_results: list[ScanResult]) -> list[dict]:
871
+ """Extract middleware/decorator rules for a service node."""
872
+ rules: list[dict] = []
873
+ seen_names: set[str] = set()
874
+
875
+ for sr in scan_results:
876
+ doc = sr.doc
877
+ node_files = set(node["files"])
878
+
879
+ # Collect unique middleware/decorators from endpoints in this node
880
+ for ep in doc.endpoints:
881
+ ep_rel = os.path.relpath(ep.file_path, doc.repo_path).replace("\\", "/")
882
+ if ep_rel not in node_files:
883
+ continue
884
+
885
+ # Auth decorators
886
+ for dec in ep.auth_decorators:
887
+ if dec not in seen_names:
888
+ seen_names.add(dec)
889
+ rules.append({
890
+ "name": dec,
891
+ "type": _classify_rule_type(dec),
892
+ "appliedTo": [ep.route_pattern],
893
+ "description": "",
894
+ "implementation": f"{ep_rel}:{ep.line}",
895
+ })
896
+ else:
897
+ # Add route to existing rule
898
+ for r in rules:
899
+ if r["name"] == dec and ep.route_pattern not in r["appliedTo"]:
900
+ r["appliedTo"].append(ep.route_pattern)
901
+
902
+ # Middleware
903
+ for mw in ep.middleware:
904
+ if mw not in seen_names:
905
+ seen_names.add(mw)
906
+ rules.append({
907
+ "name": mw,
908
+ "type": _classify_rule_type(mw),
909
+ "appliedTo": [ep.route_pattern],
910
+ "description": "",
911
+ "implementation": f"{ep_rel}:{ep.line}",
912
+ })
913
+ else:
914
+ for r in rules:
915
+ if r["name"] == mw and ep.route_pattern not in r["appliedTo"]:
916
+ r["appliedTo"].append(ep.route_pattern)
917
+
918
+ return rules
919
+
920
+
921
+ def _classify_rule_type(name: str) -> str:
922
+ """Classify a middleware/decorator name into a rule type."""
923
+ lower = name.lower()
924
+ if re.search(r"auth|jwt|login|protect|require_auth|token", lower):
925
+ return "guard"
926
+ if re.search(r"valid|sanitiz|schema|check", lower):
927
+ return "validator"
928
+ if name.startswith("@"):
929
+ return "decorator"
930
+ return "middleware"