minder-cli 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. minder/__init__.py +12 -0
  2. minder/api/routers/prompts.py +177 -0
  3. minder/application/__init__.py +1 -0
  4. minder/application/admin/__init__.py +11 -0
  5. minder/application/admin/dto.py +453 -0
  6. minder/application/admin/jobs.py +327 -0
  7. minder/application/admin/use_cases.py +1895 -0
  8. minder/auth/__init__.py +12 -0
  9. minder/auth/context.py +26 -0
  10. minder/auth/middleware.py +70 -0
  11. minder/auth/principal.py +59 -0
  12. minder/auth/rate_limiter.py +89 -0
  13. minder/auth/rbac.py +60 -0
  14. minder/auth/service.py +541 -0
  15. minder/bootstrap/__init__.py +9 -0
  16. minder/bootstrap/providers.py +109 -0
  17. minder/bootstrap/transport.py +807 -0
  18. minder/cache/__init__.py +10 -0
  19. minder/cache/providers.py +140 -0
  20. minder/chunking/__init__.py +4 -0
  21. minder/chunking/code_splitter.py +184 -0
  22. minder/chunking/splitter.py +136 -0
  23. minder/cli.py +1542 -0
  24. minder/config.py +179 -0
  25. minder/continuity.py +363 -0
  26. minder/dev.py +160 -0
  27. minder/embedding/__init__.py +9 -0
  28. minder/embedding/base.py +7 -0
  29. minder/embedding/local.py +65 -0
  30. minder/embedding/openai.py +7 -0
  31. minder/graph/__init__.py +11 -0
  32. minder/graph/edges.py +13 -0
  33. minder/graph/executor.py +127 -0
  34. minder/graph/graph.py +263 -0
  35. minder/graph/nodes/__init__.py +27 -0
  36. minder/graph/nodes/evaluator.py +21 -0
  37. minder/graph/nodes/guard.py +64 -0
  38. minder/graph/nodes/llm.py +59 -0
  39. minder/graph/nodes/planning.py +30 -0
  40. minder/graph/nodes/reasoning.py +87 -0
  41. minder/graph/nodes/reranker.py +141 -0
  42. minder/graph/nodes/retriever.py +86 -0
  43. minder/graph/nodes/verification.py +230 -0
  44. minder/graph/nodes/workflow_planner.py +250 -0
  45. minder/graph/runtime.py +15 -0
  46. minder/graph/state.py +26 -0
  47. minder/llm/__init__.py +5 -0
  48. minder/llm/base.py +14 -0
  49. minder/llm/local.py +381 -0
  50. minder/llm/openai.py +89 -0
  51. minder/models/__init__.py +109 -0
  52. minder/models/base.py +10 -0
  53. minder/models/client.py +137 -0
  54. minder/models/document.py +34 -0
  55. minder/models/error.py +32 -0
  56. minder/models/graph.py +114 -0
  57. minder/models/history.py +32 -0
  58. minder/models/job.py +62 -0
  59. minder/models/prompt.py +41 -0
  60. minder/models/repository.py +62 -0
  61. minder/models/rule.py +68 -0
  62. minder/models/session.py +51 -0
  63. minder/models/skill.py +52 -0
  64. minder/models/user.py +41 -0
  65. minder/models/workflow.py +35 -0
  66. minder/observability/__init__.py +57 -0
  67. minder/observability/audit.py +243 -0
  68. minder/observability/logging.py +253 -0
  69. minder/observability/metrics.py +448 -0
  70. minder/observability/tracing.py +215 -0
  71. minder/presentation/__init__.py +1 -0
  72. minder/presentation/http/__init__.py +1 -0
  73. minder/presentation/http/admin/__init__.py +3 -0
  74. minder/presentation/http/admin/api.py +1309 -0
  75. minder/presentation/http/admin/context.py +94 -0
  76. minder/presentation/http/admin/dashboard.py +111 -0
  77. minder/presentation/http/admin/jobs.py +208 -0
  78. minder/presentation/http/admin/memories.py +185 -0
  79. minder/presentation/http/admin/prompts.py +219 -0
  80. minder/presentation/http/admin/routes.py +127 -0
  81. minder/presentation/http/admin/runtime.py +650 -0
  82. minder/presentation/http/admin/search.py +368 -0
  83. minder/presentation/http/admin/skills.py +230 -0
  84. minder/prompts/__init__.py +646 -0
  85. minder/prompts/formatter.py +142 -0
  86. minder/resources/__init__.py +318 -0
  87. minder/retrieval/__init__.py +5 -0
  88. minder/retrieval/hybrid.py +178 -0
  89. minder/retrieval/mmr.py +116 -0
  90. minder/retrieval/multi_hop.py +115 -0
  91. minder/runtime.py +15 -0
  92. minder/server.py +145 -0
  93. minder/store/__init__.py +64 -0
  94. minder/store/document.py +115 -0
  95. minder/store/error.py +82 -0
  96. minder/store/feedback.py +114 -0
  97. minder/store/graph.py +588 -0
  98. minder/store/history.py +57 -0
  99. minder/store/interfaces.py +512 -0
  100. minder/store/milvus/__init__.py +11 -0
  101. minder/store/milvus/client.py +26 -0
  102. minder/store/milvus/collections.py +15 -0
  103. minder/store/milvus/vector_store.py +232 -0
  104. minder/store/mongodb/__init__.py +11 -0
  105. minder/store/mongodb/client.py +49 -0
  106. minder/store/mongodb/indexes.py +90 -0
  107. minder/store/mongodb/operational_store.py +993 -0
  108. minder/store/relational.py +1087 -0
  109. minder/store/repo_state.py +58 -0
  110. minder/store/rule.py +93 -0
  111. minder/store/vector.py +79 -0
  112. minder/tools/__init__.py +47 -0
  113. minder/tools/auth.py +94 -0
  114. minder/tools/graph.py +839 -0
  115. minder/tools/ingest.py +353 -0
  116. minder/tools/memory.py +381 -0
  117. minder/tools/query.py +307 -0
  118. minder/tools/registry.py +269 -0
  119. minder/tools/repo_scanner.py +1266 -0
  120. minder/tools/search.py +15 -0
  121. minder/tools/session.py +316 -0
  122. minder/tools/skills.py +899 -0
  123. minder/tools/workflow.py +215 -0
  124. minder/transport/__init__.py +4 -0
  125. minder/transport/base.py +286 -0
  126. minder/transport/sse.py +252 -0
  127. minder/transport/stdio.py +29 -0
  128. minder_cli-0.2.0.dist-info/METADATA +318 -0
  129. minder_cli-0.2.0.dist-info/RECORD +132 -0
  130. minder_cli-0.2.0.dist-info/WHEEL +4 -0
  131. minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
  132. minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1266 @@
1
+ """Repository graph extraction and sync-payload building."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import ast
6
+ import json
7
+ import re
8
+ import subprocess
9
+ import tomllib
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING, Any
13
+
14
+ if TYPE_CHECKING:
15
+ from minder.store.graph import KnowledgeGraphStore
16
+
17
+ _SOURCE_SUFFIXES = {
18
+ ".py",
19
+ ".ts",
20
+ ".tsx",
21
+ ".js",
22
+ ".jsx",
23
+ ".java",
24
+ ".go",
25
+ ".rs",
26
+ ".md",
27
+ ".json",
28
+ ".toml",
29
+ ".yaml",
30
+ ".yml",
31
+ ".txt",
32
+ }
33
+ _PYTHON_SUFFIXES = {".py"}
34
+ _SCRIPT_SUFFIXES = {".ts", ".tsx", ".js", ".jsx", ".java", ".go", ".rs"}
35
+ _MARKDOWN_SUFFIXES = {".md"}
36
+ _STRUCTURED_SUFFIXES = {".json", ".toml", ".yaml", ".yml"}
37
+ _SERVICE_MARKERS = {"pyproject.toml", "package.json", "go.mod", "Cargo.toml"}
38
+ _HTTP_ROUTE_DECORATORS = {"get", "post", "put", "patch", "delete", "route"}
39
+ _MQ_PUBLISH_CALLS = {"publish", "send", "produce", "emit"}
40
+ _MQ_CONSUME_CALLS = {"consume", "subscribe", "listen"}
41
+
42
+ # Spring Boot route annotation detection (Java)
43
+ _SPRING_ROUTE_PATTERN = re.compile(
44
+ r'@(GetMapping|PostMapping|PutMapping|PatchMapping|DeleteMapping|RequestMapping)'
45
+ r'\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)["\']',
46
+ re.MULTILINE,
47
+ )
48
+ # NestJS decorator detection (TypeScript) — @Get/@Post etc. at class level prefix
49
+ _NESTJS_CONTROLLER_PATTERN = re.compile(
50
+ r'@Controller\s*\(\s*["\']([^"\']*)["\']',
51
+ re.MULTILINE,
52
+ )
53
+ _NESTJS_ROUTE_PATTERN = re.compile(
54
+ r'@(Get|Post|Put|Patch|Delete|All)\s*\(\s*(?:["\']([^"\']*)["\'])?\s*\)',
55
+ re.MULTILINE,
56
+ )
57
+ # WebSocket endpoint detection
58
+ _WS_GATEWAY_PATTERN = re.compile(
59
+ r'@WebSocketGateway\s*\(\s*(?:(?:path\s*=\s*)?["\']([^"\']*)["\'])?\s*\)',
60
+ re.MULTILINE,
61
+ )
62
+ _WS_SUBSCRIBE_PATTERN = re.compile(
63
+ r'@SubscribeMessage\s*\(\s*["\']([^"\']+)["\']',
64
+ re.MULTILINE,
65
+ )
66
+ _SPRING_WS_MAPPING_PATTERN = re.compile(
67
+ r'@MessageMapping\s*\(\s*["\']([^"\']+)["\']',
68
+ re.MULTILINE,
69
+ )
70
+ # Go/Gin/Fiber route patterns
71
+ _GO_ROUTE_PATTERN = re.compile(
72
+ r'(?:r|router|app|engine)\.(GET|POST|PUT|PATCH|DELETE)\s*\(\s*"([^"]+)"',
73
+ re.MULTILINE,
74
+ )
75
+ # Rust/axum/actix-web route patterns
76
+ _RUST_ROUTE_ATTR_PATTERN = re.compile(
77
+ r'#\[(?:get|post|put|patch|delete)\s*\(\s*"([^"]+)"\s*\)\]',
78
+ re.MULTILINE,
79
+ )
80
+ _TODO_PATTERN = re.compile(r"(?:#|//|/\*+|\*+)\s*TODO\s*:?\s*(.+)?", re.IGNORECASE)
81
+ _MARKDOWN_TASK_PATTERN = re.compile(r"^\s*[-*]\s+\[\s\]\s+(.+)$")
82
+ _URL_PATTERN = re.compile(r"https?://[^\s\"')]+")
83
+ _MARKDOWN_HEADING_PATTERN = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
84
+ _YAML_KEY_PATTERN = re.compile(r"^([A-Za-z0-9_.-]+)\s*:", re.MULTILINE)
85
+ _INI_KEY_PATTERN = re.compile(r"^([A-Za-z0-9_.-]+)\s*=", re.MULTILINE)
86
+ _LANGUAGE_BY_SUFFIX = {
87
+ ".py": "python",
88
+ ".ts": "typescript",
89
+ ".tsx": "typescript",
90
+ ".js": "javascript",
91
+ ".jsx": "javascript",
92
+ ".java": "java",
93
+ ".go": "go",
94
+ ".rs": "rust",
95
+ ".md": "markdown",
96
+ ".json": "json",
97
+ ".toml": "toml",
98
+ ".yaml": "yaml",
99
+ ".yml": "yaml",
100
+ ".txt": "text",
101
+ }
102
+
103
+
104
+ @dataclass(slots=True)
105
+ class _NodeSpec:
106
+ node_type: str
107
+ name: str
108
+ metadata: dict[str, Any]
109
+
110
+
111
+ @dataclass(slots=True)
112
+ class _EdgeSpec:
113
+ source_type: str
114
+ source_name: str
115
+ target_type: str
116
+ target_name: str
117
+ relation: str
118
+ weight: float = 1.0
119
+
120
+
121
+ class RepoScanner:
122
+ def __init__(
123
+ self,
124
+ graph_store: "KnowledgeGraphStore",
125
+ repo_root: str,
126
+ *,
127
+ project: str | None = None,
128
+ ) -> None:
129
+ self._store = graph_store
130
+ self._root = Path(repo_root).resolve()
131
+ self._project = project or self._root.name
132
+ self._git_metadata_cache: dict[str, dict[str, Any]] = {}
133
+ self._git_line_commit_cache: dict[tuple[str, int], dict[str, str] | None] = {}
134
+ self._git_commit_detail_cache: dict[str, dict[str, str]] = {}
135
+
136
+ async def scan(self) -> dict[str, Any]:
137
+ service_dirs = self._discover_service_boundaries()
138
+ source_files = self._discover_source_files()
139
+
140
+ nodes_upserted = 0
141
+ edges_upserted = 0
142
+ service_node_ids: dict[Path, Any] = {}
143
+ for svc_dir in service_dirs:
144
+ rel = str(svc_dir.relative_to(self._root))
145
+ svc_node = await self._store.upsert_node(
146
+ node_type="service",
147
+ name=rel,
148
+ metadata={"project": self._project, "path": str(svc_dir)},
149
+ )
150
+ service_node_ids[svc_dir] = svc_node.id
151
+ nodes_upserted += 1
152
+
153
+ for file_path in source_files:
154
+ rel_path = str(file_path.relative_to(self._root))
155
+ file_metadata, extracted_nodes, extracted_edges = self._extract_file_metadata(file_path, rel_path)
156
+ change_metadata = self._git_file_change_metadata(rel_path)
157
+ common_metadata = self._build_file_scoped_metadata(
158
+ rel_path=rel_path,
159
+ language=str(file_metadata.get("language", "text") or "text"),
160
+ change_metadata=change_metadata,
161
+ )
162
+
163
+ file_node = await self._store.upsert_node(
164
+ node_type="file",
165
+ name=rel_path,
166
+ metadata={"project": self._project, **common_metadata, **file_metadata},
167
+ )
168
+ nodes_upserted += 1
169
+ known_node_ids: dict[tuple[str, str], Any] = {("file", rel_path): file_node.id}
170
+
171
+ owning_svc = self._find_owning_service(file_path, service_dirs)
172
+ if owning_svc is not None:
173
+ await self._store.upsert_edge(
174
+ source_id=service_node_ids[owning_svc],
175
+ target_id=file_node.id,
176
+ relation="contains",
177
+ )
178
+ edges_upserted += 1
179
+
180
+ for module_name in self._extract_imports(file_path):
181
+ mod_node = await self._store.upsert_node(
182
+ node_type="module",
183
+ name=module_name,
184
+ metadata={"project": self._project},
185
+ )
186
+ nodes_upserted += 1
187
+ known_node_ids[("module", module_name)] = mod_node.id
188
+
189
+ await self._store.upsert_edge(
190
+ source_id=file_node.id,
191
+ target_id=mod_node.id,
192
+ relation="imports",
193
+ )
194
+ edges_upserted += 1
195
+
196
+ if owning_svc is not None:
197
+ top_pkg = module_name.split(".")[0].split("/")[0].split(":")[0]
198
+ for svc_dir, svc_node_id in service_node_ids.items():
199
+ if svc_dir != owning_svc and svc_dir.name == top_pkg:
200
+ await self._store.upsert_edge(
201
+ source_id=service_node_ids[owning_svc],
202
+ target_id=svc_node_id,
203
+ relation="depends_on",
204
+ )
205
+ edges_upserted += 1
206
+
207
+ for node_spec in extracted_nodes:
208
+ node_common_metadata = self._build_node_scoped_metadata(
209
+ rel_path=rel_path,
210
+ base_metadata=common_metadata,
211
+ node_metadata=node_spec.metadata,
212
+ )
213
+ persisted = await self._store.upsert_node(
214
+ node_type=node_spec.node_type,
215
+ name=node_spec.name,
216
+ metadata={"project": self._project, **node_common_metadata, **node_spec.metadata},
217
+ )
218
+ known_node_ids[(node_spec.node_type, node_spec.name)] = persisted.id
219
+ nodes_upserted += 1
220
+
221
+ for edge_spec in extracted_edges:
222
+ source_id = known_node_ids.get((edge_spec.source_type, edge_spec.source_name))
223
+ target_id = known_node_ids.get((edge_spec.target_type, edge_spec.target_name))
224
+ if source_id is None or target_id is None:
225
+ continue
226
+ await self._store.upsert_edge(
227
+ source_id=source_id,
228
+ target_id=target_id,
229
+ relation=edge_spec.relation,
230
+ weight=edge_spec.weight,
231
+ )
232
+ edges_upserted += 1
233
+
234
+ return {
235
+ "project": self._project,
236
+ "files_scanned": len(source_files),
237
+ "nodes_upserted": nodes_upserted,
238
+ "edges_upserted": edges_upserted,
239
+ }
240
+
241
+ @classmethod
242
+ def build_sync_payload(
243
+ cls,
244
+ repo_root: str,
245
+ *,
246
+ project: str | None = None,
247
+ branch: str | None = None,
248
+ diff_base: str | None = None,
249
+ changed_files: list[str] | None = None,
250
+ deleted_files: list[str] | None = None,
251
+ branch_relationships: list[dict[str, Any]] | None = None,
252
+ payload_version: str = "2026-04-15",
253
+ source: str = "minder-cli",
254
+ ) -> dict[str, Any]:
255
+ builder = cls.__new__(cls)
256
+ builder._root = Path(repo_root).resolve()
257
+ builder._project = project or builder._root.name
258
+ builder._git_metadata_cache = {}
259
+ builder._git_line_commit_cache = {}
260
+ builder._git_commit_detail_cache = {}
261
+
262
+ service_dirs = builder._discover_service_boundaries()
263
+ source_files = builder._resolve_source_files(changed_files)
264
+ nodes: list[dict[str, Any]] = []
265
+ edges: list[dict[str, Any]] = []
266
+ seen_nodes: set[tuple[str, str]] = set()
267
+ seen_edges: set[tuple[str, str, str, str, str]] = set()
268
+
269
+ def add_node(node_type: str, name: str, metadata: dict[str, Any]) -> None:
270
+ key = (node_type, name)
271
+ if key in seen_nodes:
272
+ for existing in nodes:
273
+ if existing["node_type"] == node_type and existing["name"] == name:
274
+ existing["metadata"] = {**existing["metadata"], **metadata}
275
+ return
276
+ seen_nodes.add(key)
277
+ nodes.append({"node_type": node_type, "name": name, "metadata": metadata})
278
+
279
+ def add_edge(edge_spec: _EdgeSpec) -> None:
280
+ key = (
281
+ edge_spec.source_type,
282
+ edge_spec.source_name,
283
+ edge_spec.target_type,
284
+ edge_spec.target_name,
285
+ edge_spec.relation,
286
+ )
287
+ if key in seen_edges:
288
+ return
289
+ seen_edges.add(key)
290
+ edges.append(
291
+ {
292
+ "source": {"node_type": edge_spec.source_type, "name": edge_spec.source_name},
293
+ "target": {"node_type": edge_spec.target_type, "name": edge_spec.target_name},
294
+ "relation": edge_spec.relation,
295
+ "weight": edge_spec.weight,
296
+ }
297
+ )
298
+
299
+ for file_path in source_files:
300
+ rel_path = str(file_path.relative_to(builder._root))
301
+ file_metadata, extracted_nodes, extracted_edges = builder._extract_file_metadata(file_path, rel_path)
302
+ change_metadata = builder._git_file_change_metadata(rel_path)
303
+ common_metadata = builder._build_file_scoped_metadata(
304
+ rel_path=rel_path,
305
+ language=str(file_metadata.get("language", "text") or "text"),
306
+ change_metadata=change_metadata,
307
+ )
308
+ add_node("file", rel_path, {"project": builder._project, **common_metadata, **file_metadata})
309
+
310
+ owning_svc = builder._find_owning_service(file_path, service_dirs)
311
+ if owning_svc is not None:
312
+ service_name = str(owning_svc.relative_to(builder._root))
313
+ add_node("service", service_name, {"project": builder._project, "path": str(owning_svc)})
314
+ add_edge(_EdgeSpec("service", service_name, "file", rel_path, "contains"))
315
+
316
+ for module_name in builder._extract_imports(file_path):
317
+ add_node("module", module_name, {"project": builder._project})
318
+ add_edge(_EdgeSpec("file", rel_path, "module", module_name, "imports"))
319
+
320
+ for node_spec in extracted_nodes:
321
+ node_common_metadata = builder._build_node_scoped_metadata(
322
+ rel_path=rel_path,
323
+ base_metadata=common_metadata,
324
+ node_metadata=node_spec.metadata,
325
+ )
326
+ add_node(
327
+ node_spec.node_type,
328
+ node_spec.name,
329
+ {"project": builder._project, **node_common_metadata, **node_spec.metadata},
330
+ )
331
+ for edge_spec in extracted_edges:
332
+ add_edge(edge_spec)
333
+
334
+ return {
335
+ "payload_version": payload_version,
336
+ "source": source,
337
+ "repo_path": str(builder._root),
338
+ "branch": branch,
339
+ "diff_base": diff_base,
340
+ "deleted_files": sorted(deleted_files or []),
341
+ "sync_metadata": {
342
+ "project": builder._project,
343
+ "changed_file_count": len(source_files),
344
+ "changed_files": [str(file_path.relative_to(builder._root)) for file_path in source_files],
345
+ "deleted_file_count": len(deleted_files or []),
346
+ "branch_relationship_count": len(branch_relationships or []),
347
+ },
348
+ "nodes": nodes,
349
+ "edges": edges,
350
+ "branch_relationships": list(branch_relationships or []),
351
+ }
352
+
353
+ def _discover_service_boundaries(self) -> list[Path]:
354
+ service_dirs: list[Path] = []
355
+ for marker in _SERVICE_MARKERS:
356
+ for marker_path in self._root.rglob(marker):
357
+ if any(part.startswith(".") for part in marker_path.parts):
358
+ continue
359
+ svc_dir = marker_path.parent
360
+ if svc_dir not in service_dirs:
361
+ service_dirs.append(svc_dir)
362
+ return sorted(service_dirs, key=lambda path: len(path.parts), reverse=True)
363
+
364
+ def _discover_source_files(self) -> list[Path]:
365
+ files: list[Path] = []
366
+ for suffix in _SOURCE_SUFFIXES:
367
+ for path in self._root.rglob(f"*{suffix}"):
368
+ if any(part.startswith(".") or part == "__pycache__" for part in path.parts):
369
+ continue
370
+ if path.is_file():
371
+ files.append(path)
372
+ return sorted(set(files))
373
+
374
+ def _resolve_source_files(self, changed_files: list[str] | None) -> list[Path]:
375
+ if not changed_files:
376
+ return self._discover_source_files()
377
+ files: list[Path] = []
378
+ for changed_file in changed_files:
379
+ candidate = (self._root / changed_file).resolve()
380
+ if candidate.is_file() and candidate.suffix.lower() in _SOURCE_SUFFIXES:
381
+ files.append(candidate)
382
+ return sorted(set(files))
383
+
384
+ @staticmethod
385
+ def _find_owning_service(file_path: Path, service_dirs: list[Path]) -> Path | None:
386
+ for svc_dir in service_dirs:
387
+ try:
388
+ file_path.relative_to(svc_dir)
389
+ return svc_dir
390
+ except ValueError:
391
+ continue
392
+ return None
393
+
394
+ def _extract_file_metadata(
395
+ self,
396
+ file_path: Path,
397
+ rel_path: str,
398
+ ) -> tuple[dict[str, Any], list[_NodeSpec], list[_EdgeSpec]]:
399
+ source = file_path.read_text(encoding="utf-8", errors="replace")
400
+ suffix = file_path.suffix.lower()
401
+ language = _LANGUAGE_BY_SUFFIX.get(suffix, suffix.lstrip("."))
402
+ file_metadata: dict[str, Any] = {
403
+ "path": rel_path,
404
+ "language": language,
405
+ "line_count": source.count("\n") + (1 if source else 0),
406
+ "size_bytes": file_path.stat().st_size,
407
+ }
408
+ nodes: list[_NodeSpec] = []
409
+ edges: list[_EdgeSpec] = []
410
+
411
+ if suffix in _PYTHON_SUFFIXES:
412
+ python_nodes, python_edges = self._extract_python_metadata(file_path, rel_path)
413
+ nodes.extend(python_nodes)
414
+ edges.extend(python_edges)
415
+ elif suffix in _SCRIPT_SUFFIXES:
416
+ script_metadata, script_nodes, script_edges = self._extract_script_metadata(source, rel_path)
417
+ file_metadata.update(script_metadata)
418
+ nodes.extend(script_nodes)
419
+ edges.extend(script_edges)
420
+ elif suffix in _MARKDOWN_SUFFIXES:
421
+ file_metadata.update(self._extract_markdown_metadata(source))
422
+ nodes.extend(self._extract_markdown_task_nodes(source, rel_path))
423
+ elif suffix in _STRUCTURED_SUFFIXES:
424
+ file_metadata.update(self._extract_structured_metadata(source, suffix))
425
+ else:
426
+ file_metadata["non_empty_line_count"] = len([line for line in source.splitlines() if line.strip()])
427
+
428
+ nodes.extend(self._extract_todo_nodes(source, rel_path))
429
+ return file_metadata, self._dedupe_node_specs(nodes), self._dedupe_edge_specs(edges)
430
+
431
+ def _build_file_scoped_metadata(
432
+ self,
433
+ *,
434
+ rel_path: str,
435
+ language: str,
436
+ change_metadata: dict[str, Any],
437
+ ) -> dict[str, Any]:
438
+ return {
439
+ "path": rel_path,
440
+ "language": language,
441
+ "history_scope": "file",
442
+ **change_metadata,
443
+ }
444
+
445
+ def _build_node_scoped_metadata(
446
+ self,
447
+ *,
448
+ rel_path: str,
449
+ base_metadata: dict[str, Any],
450
+ node_metadata: dict[str, Any],
451
+ ) -> dict[str, Any]:
452
+ scoped_metadata = dict(base_metadata)
453
+ scoped_metadata.update(
454
+ self._git_node_change_metadata(
455
+ rel_path=rel_path,
456
+ node_metadata=node_metadata,
457
+ file_change_metadata=base_metadata,
458
+ )
459
+ )
460
+ return scoped_metadata
461
+
462
+ def _git_file_change_metadata(self, rel_path: str) -> dict[str, Any]:
463
+ cached = self._git_metadata_cache.get(rel_path)
464
+ if cached is not None:
465
+ return cached
466
+
467
+ recent_commits = self._git_recent_commits(rel_path)
468
+ status = self._git_status(rel_path, tracked=bool(recent_commits))
469
+ latest_commit = recent_commits[0] if recent_commits else {}
470
+ metadata = {
471
+ "last_state": status,
472
+ "last_commit_sha": latest_commit.get("sha"),
473
+ "last_commit_at": latest_commit.get("committed_at"),
474
+ "last_commit_summary": latest_commit.get("summary"),
475
+ "history_summary": self._build_history_summary(recent_commits, status),
476
+ "recent_commits": recent_commits,
477
+ }
478
+ self._git_metadata_cache[rel_path] = metadata
479
+ return metadata
480
+
481
+ def _git_node_change_metadata(
482
+ self,
483
+ *,
484
+ rel_path: str,
485
+ node_metadata: dict[str, Any],
486
+ file_change_metadata: dict[str, Any],
487
+ ) -> dict[str, Any]:
488
+ line_number = self._node_line_number(node_metadata)
489
+ if line_number is None:
490
+ return {}
491
+
492
+ line_commit = self._git_line_commit(rel_path, line_number)
493
+ if line_commit is None:
494
+ return {
495
+ "history_scope": "line",
496
+ "last_touch_line": line_number,
497
+ "file_last_commit_sha": file_change_metadata.get("last_commit_sha"),
498
+ "file_last_commit_at": file_change_metadata.get("last_commit_at"),
499
+ "file_last_commit_summary": file_change_metadata.get("last_commit_summary"),
500
+ "file_history_summary": file_change_metadata.get("history_summary"),
501
+ }
502
+
503
+ subject = self._history_subject(node_metadata)
504
+ recent_commits = self._build_symbol_recent_commits(
505
+ subject=subject,
506
+ line_commit=line_commit,
507
+ file_recent_commits=file_change_metadata.get("recent_commits"),
508
+ )
509
+ return {
510
+ "history_scope": "symbol" if subject else "line",
511
+ "last_touch_line": line_number,
512
+ "last_commit_sha": line_commit.get("sha"),
513
+ "last_commit_at": line_commit.get("committed_at"),
514
+ "last_commit_summary": line_commit.get("summary"),
515
+ "history_summary": self._build_symbol_history_summary(
516
+ subject=subject,
517
+ status=str(file_change_metadata.get("last_state", "") or ""),
518
+ line_commit=line_commit,
519
+ recent_commits=recent_commits,
520
+ ),
521
+ "recent_commits": recent_commits,
522
+ "file_last_commit_sha": file_change_metadata.get("last_commit_sha"),
523
+ "file_last_commit_at": file_change_metadata.get("last_commit_at"),
524
+ "file_last_commit_summary": file_change_metadata.get("last_commit_summary"),
525
+ "file_history_summary": file_change_metadata.get("history_summary"),
526
+ }
527
+
528
+ def _git_recent_commits(self, rel_path: str, limit: int = 5) -> list[dict[str, str]]:
529
+ result = subprocess.run(
530
+ [
531
+ "git",
532
+ "log",
533
+ "--follow",
534
+ "--format=%H%x1f%cI%x1f%s",
535
+ "-n",
536
+ str(limit),
537
+ "--",
538
+ rel_path,
539
+ ],
540
+ cwd=self._root,
541
+ capture_output=True,
542
+ text=True,
543
+ check=False,
544
+ )
545
+ if result.returncode != 0:
546
+ return []
547
+ commits: list[dict[str, str]] = []
548
+ for line in result.stdout.splitlines():
549
+ sha, _, rest = line.partition("\x1f")
550
+ committed_at, _, summary = rest.partition("\x1f")
551
+ if not sha or not summary:
552
+ continue
553
+ commits.append(
554
+ {
555
+ "sha": sha.strip(),
556
+ "committed_at": committed_at.strip(),
557
+ "summary": summary.strip(),
558
+ }
559
+ )
560
+ return commits
561
+
562
+ def _git_line_commit(self, rel_path: str, line_number: int) -> dict[str, str] | None:
563
+ cache_key = (rel_path, line_number)
564
+ if cache_key in self._git_line_commit_cache:
565
+ return self._git_line_commit_cache[cache_key]
566
+
567
+ result = subprocess.run(
568
+ [
569
+ "git",
570
+ "blame",
571
+ "--line-porcelain",
572
+ "-L",
573
+ f"{line_number},{line_number}",
574
+ "--",
575
+ rel_path,
576
+ ],
577
+ cwd=self._root,
578
+ capture_output=True,
579
+ text=True,
580
+ check=False,
581
+ )
582
+ if result.returncode != 0:
583
+ self._git_line_commit_cache[cache_key] = None
584
+ return None
585
+
586
+ first_line = next((line for line in result.stdout.splitlines() if line.strip()), "")
587
+ sha = first_line.split(" ", 1)[0].strip()
588
+ if not sha or set(sha) == {"0"}:
589
+ self._git_line_commit_cache[cache_key] = None
590
+ return None
591
+
592
+ details = self._git_commit_details(sha)
593
+ self._git_line_commit_cache[cache_key] = details
594
+ return details
595
+
596
+ def _git_commit_details(self, sha: str) -> dict[str, str]:
597
+ cached = self._git_commit_detail_cache.get(sha)
598
+ if cached is not None:
599
+ return cached
600
+
601
+ result = subprocess.run(
602
+ ["git", "show", "-s", "--format=%H%x1f%cI%x1f%s", sha],
603
+ cwd=self._root,
604
+ capture_output=True,
605
+ text=True,
606
+ check=False,
607
+ )
608
+ if result.returncode != 0:
609
+ details = {"sha": sha, "committed_at": "", "summary": ""}
610
+ self._git_commit_detail_cache[sha] = details
611
+ return details
612
+
613
+ raw = result.stdout.strip().splitlines()
614
+ if not raw:
615
+ details = {"sha": sha, "committed_at": "", "summary": ""}
616
+ self._git_commit_detail_cache[sha] = details
617
+ return details
618
+
619
+ commit_sha, _, rest = raw[0].partition("\x1f")
620
+ committed_at, _, summary = rest.partition("\x1f")
621
+ details = {
622
+ "sha": commit_sha.strip() or sha,
623
+ "committed_at": committed_at.strip(),
624
+ "summary": summary.strip(),
625
+ }
626
+ self._git_commit_detail_cache[sha] = details
627
+ return details
628
+
629
+ def _git_status(self, rel_path: str, *, tracked: bool) -> str:
630
+ result = subprocess.run(
631
+ ["git", "status", "--short", "--", rel_path],
632
+ cwd=self._root,
633
+ capture_output=True,
634
+ text=True,
635
+ check=False,
636
+ )
637
+ if result.returncode != 0:
638
+ return "tracked" if tracked else "untracked"
639
+ raw_status = result.stdout.strip()
640
+ if not raw_status:
641
+ return "clean" if tracked else "untracked"
642
+ status_code = raw_status[:2]
643
+ if status_code == "??":
644
+ return "untracked"
645
+ if "R" in status_code:
646
+ return "renamed"
647
+ if "D" in status_code:
648
+ return "deleted"
649
+ if "A" in status_code:
650
+ return "added"
651
+ if "M" in status_code:
652
+ return "modified"
653
+ return "changed"
654
+
655
+ @staticmethod
656
+ def _node_line_number(node_metadata: dict[str, Any]) -> int | None:
657
+ raw_line = node_metadata.get("line")
658
+ if isinstance(raw_line, int):
659
+ return raw_line
660
+ if isinstance(raw_line, str) and raw_line.isdigit():
661
+ return int(raw_line)
662
+ return None
663
+
664
+ @staticmethod
665
+ def _history_subject(node_metadata: dict[str, Any]) -> str:
666
+ for key in ("symbol", "route_path", "handler", "text"):
667
+ value = node_metadata.get(key)
668
+ if isinstance(value, str) and value.strip():
669
+ return value.strip()
670
+ return ""
671
+
672
+ @staticmethod
673
+ def _build_symbol_recent_commits(
674
+ *,
675
+ subject: str,
676
+ line_commit: dict[str, str],
677
+ file_recent_commits: Any,
678
+ ) -> list[dict[str, str]]:
679
+ commits: list[dict[str, str]] = []
680
+ seen: set[str] = set()
681
+
682
+ def add(commit: dict[str, str]) -> None:
683
+ sha = str(commit.get("sha", "") or "")
684
+ if not sha or sha in seen:
685
+ return
686
+ seen.add(sha)
687
+ commits.append(commit)
688
+
689
+ add(line_commit)
690
+ normalized_subject = subject.lower().strip()
691
+ if isinstance(file_recent_commits, list):
692
+ for commit in file_recent_commits:
693
+ if not isinstance(commit, dict):
694
+ continue
695
+ summary = str(commit.get("summary", "") or "")
696
+ if normalized_subject and normalized_subject in summary.lower():
697
+ add(
698
+ {
699
+ "sha": str(commit.get("sha", "") or ""),
700
+ "committed_at": str(commit.get("committed_at", "") or ""),
701
+ "summary": summary,
702
+ }
703
+ )
704
+ for commit in file_recent_commits:
705
+ if not isinstance(commit, dict) or len(commits) >= 3:
706
+ continue
707
+ add(
708
+ {
709
+ "sha": str(commit.get("sha", "") or ""),
710
+ "committed_at": str(commit.get("committed_at", "") or ""),
711
+ "summary": str(commit.get("summary", "") or ""),
712
+ }
713
+ )
714
+
715
+ return commits[:5]
716
+
717
+ @staticmethod
718
+ def _build_symbol_history_summary(
719
+ *,
720
+ subject: str,
721
+ status: str,
722
+ line_commit: dict[str, str],
723
+ recent_commits: list[dict[str, str]],
724
+ ) -> str:
725
+ subject_label = subject or "this node"
726
+ prefix = f"Current state: {status}. " if status and status != "clean" else ""
727
+ summary = line_commit.get("summary", "").strip()
728
+ if not summary:
729
+ return f"{prefix}No symbol-level git history available yet.".strip()
730
+
731
+ trailing = [
732
+ commit.get("summary", "").strip()
733
+ for commit in recent_commits[1:3]
734
+ if commit.get("summary")
735
+ ]
736
+ if trailing:
737
+ return f"{prefix}Last touch for {subject_label}: {summary}. Related changes: {'; '.join(trailing)}".strip()
738
+ return f"{prefix}Last touch for {subject_label}: {summary}.".strip()
739
+
740
+ @staticmethod
741
+ def _build_history_summary(recent_commits: list[dict[str, str]], status: str) -> str:
742
+ if not recent_commits:
743
+ if status == "untracked":
744
+ return "New file not committed yet."
745
+ return "No git history available for this node yet."
746
+ summaries = [commit.get("summary", "").strip() for commit in recent_commits if commit.get("summary")]
747
+ compact = "; ".join(summaries[:3])
748
+ prefix = f"Current state: {status}. " if status and status != "clean" else ""
749
+ return f"{prefix}Recent changes: {compact}".strip()
750
+
751
+ @staticmethod
752
+ def _extract_imports(file_path: Path) -> list[str]:
753
+ source = file_path.read_text(encoding="utf-8", errors="replace")
754
+ suffix = file_path.suffix.lower()
755
+ if suffix in _PYTHON_SUFFIXES:
756
+ try:
757
+ tree = ast.parse(source, filename=str(file_path))
758
+ except (SyntaxError, ValueError):
759
+ return []
760
+ python_modules: set[str] = set()
761
+ for node in ast.walk(tree):
762
+ if isinstance(node, ast.Import):
763
+ for alias in node.names:
764
+ python_modules.add(alias.name)
765
+ elif isinstance(node, ast.ImportFrom) and node.module:
766
+ python_modules.add(node.module)
767
+ return sorted(python_modules)
768
+
769
+ modules: set[str] = set()
770
+ if suffix in {".ts", ".tsx", ".js", ".jsx"}:
771
+ for match in re.finditer(r"import\s+(?:[^;]*?from\s+)?['\"]([^'\"]+)['\"]", source):
772
+ modules.add(match.group(1))
773
+ for match in re.finditer(r"require\(\s*['\"]([^'\"]+)['\"]\s*\)", source):
774
+ modules.add(match.group(1))
775
+ elif suffix == ".java":
776
+ for match in re.finditer(r"^\s*import\s+([\w.]+);", source, flags=re.MULTILINE):
777
+ modules.add(match.group(1))
778
+ elif suffix == ".go":
779
+ for match in re.finditer(r'"([^"]+)"', source):
780
+ modules.add(match.group(1))
781
+ elif suffix == ".rs":
782
+ for match in re.finditer(r"^\s*use\s+([\w:]+)", source, flags=re.MULTILINE):
783
+ modules.add(match.group(1))
784
+ return sorted(modules)
785
+
786
+ @classmethod
787
+ def _extract_python_metadata(
788
+ cls,
789
+ file_path: Path,
790
+ rel_path: str,
791
+ ) -> tuple[list[_NodeSpec], list[_EdgeSpec]]:
792
+ try:
793
+ source = file_path.read_text(encoding="utf-8", errors="replace")
794
+ tree = ast.parse(source, filename=str(file_path))
795
+ except (SyntaxError, ValueError):
796
+ return cls._extract_todo_nodes(source if "source" in locals() else "", rel_path), []
797
+
798
+ nodes: list[_NodeSpec] = cls._extract_todo_nodes(source, rel_path)
799
+ edges: list[_EdgeSpec] = []
800
+
801
+ class MetadataVisitor(ast.NodeVisitor):
802
+ def __init__(self) -> None:
803
+ self.class_stack: list[tuple[str, str]] = []
804
+ self.http_aliases: set[str] = set()
805
+
806
+ def visit_Import(self, node: ast.Import) -> None:
807
+ for alias in node.names:
808
+ alias_name = alias.asname or alias.name.split(".")[0]
809
+ if alias.name.split(".")[0] in {"httpx", "requests"}:
810
+ self.http_aliases.add(alias_name)
811
+ self.generic_visit(node)
812
+
813
+ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
814
+ module = node.module or ""
815
+ for alias in node.names:
816
+ alias_name = alias.asname or alias.name
817
+ if module.split(".")[0] in {"httpx", "requests"}:
818
+ self.http_aliases.add(alias_name)
819
+ self.generic_visit(node)
820
+
821
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
822
+ class_type = cls._class_node_type(node)
823
+ class_name = cls._qualified_symbol_name(rel_path, node.name, self.class_stack)
824
+ nodes.append(_NodeSpec(class_type, class_name, {
825
+ "path": rel_path,
826
+ "line": node.lineno,
827
+ "end_line": getattr(node, "end_lineno", node.lineno),
828
+ "symbol": node.name,
829
+ }))
830
+ edges.append(_EdgeSpec("file", rel_path, class_type, class_name, "contains"))
831
+ if cls._is_controller_class(node):
832
+ nodes.append(_NodeSpec("controller", class_name, {"path": rel_path, "line": node.lineno, "symbol": node.name}))
833
+ edges.append(_EdgeSpec(class_type, class_name, "controller", class_name, "tracks"))
834
+
835
+ self.class_stack.append((class_type, class_name))
836
+ self.generic_visit(node)
837
+ self.class_stack.pop()
838
+
839
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
840
+ self._visit_function_like(node)
841
+
842
+ def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
843
+ self._visit_function_like(node)
844
+
845
+ def _visit_function_like(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
846
+ function_name = cls._qualified_symbol_name(rel_path, node.name, self.class_stack)
847
+ nodes.append(_NodeSpec("function", function_name, {
848
+ "path": rel_path,
849
+ "line": node.lineno,
850
+ "end_line": getattr(node, "end_lineno", node.lineno),
851
+ "symbol": node.name,
852
+ "is_async": isinstance(node, ast.AsyncFunctionDef),
853
+ }))
854
+ owner_type = self.class_stack[-1][0] if self.class_stack else "file"
855
+ owner_name = self.class_stack[-1][1] if self.class_stack else rel_path
856
+ edges.append(_EdgeSpec(owner_type, owner_name, "function", function_name, "contains"))
857
+
858
+ route_info = cls._route_info(node)
859
+ if route_info is not None:
860
+ method, path = route_info
861
+ route_name = f"{method} {path}"
862
+ nodes.append(_NodeSpec("route", route_name, {
863
+ "path": rel_path,
864
+ "method": method,
865
+ "route_path": path,
866
+ "line": node.lineno,
867
+ "handler": function_name,
868
+ }))
869
+ route_source_type = "controller" if self.class_stack else "function"
870
+ route_source_name = self.class_stack[-1][1] if self.class_stack else function_name
871
+ edges.append(_EdgeSpec(route_source_type, route_source_name, "route", route_name, "exposes_route"))
872
+
873
+ for child in ast.walk(node):
874
+ if not isinstance(child, ast.Call):
875
+ continue
876
+ external_call = cls._external_service_from_call(child, self.http_aliases)
877
+ if external_call is not None:
878
+ nodes.append(_NodeSpec("external_service_api", external_call, {
879
+ "path": rel_path,
880
+ "line": getattr(child, "lineno", node.lineno),
881
+ "caller": function_name,
882
+ }))
883
+ edges.append(_EdgeSpec("function", function_name, "external_service_api", external_call, "uses_external_service"))
884
+
885
+ mq_info = cls._mq_topic_from_call(child)
886
+ if mq_info is not None:
887
+ relation, topic_name = mq_info
888
+ nodes.append(_NodeSpec("mq_topic", topic_name, {"path": rel_path, "line": getattr(child, "lineno", node.lineno)}))
889
+ edges.append(_EdgeSpec("function", function_name, "mq_topic", topic_name, relation))
890
+
891
+ self.generic_visit(node)
892
+
893
+ MetadataVisitor().visit(tree)
894
+ return cls._dedupe_node_specs(nodes), cls._dedupe_edge_specs(edges)
895
+
896
+ @classmethod
897
+ def _extract_script_metadata(
898
+ cls,
899
+ source: str,
900
+ rel_path: str,
901
+ ) -> tuple[dict[str, Any], list[_NodeSpec], list[_EdgeSpec]]:
902
+ nodes: list[_NodeSpec] = []
903
+ edges: list[_EdgeSpec] = []
904
+ symbol_count = 0
905
+ route_count = 0
906
+ external_services: set[str] = set()
907
+ mq_topics: set[str] = set()
908
+
909
+ for match in re.finditer(r"(?:export\s+)?interface\s+([A-Za-z_][A-Za-z0-9_]*)|\btrait\s+([A-Za-z_][A-Za-z0-9_]*)", source):
910
+ name = match.group(1) or match.group(2)
911
+ symbol_name = f"{rel_path}::{name}"
912
+ nodes.append(_NodeSpec("interface", symbol_name, {"path": rel_path, "line": _line_number(source, match.start()), "symbol": name}))
913
+ edges.append(_EdgeSpec("file", rel_path, "interface", symbol_name, "contains"))
914
+ symbol_count += 1
915
+
916
+ for match in re.finditer(r"(?:export\s+)?(abstract\s+)?class\s+([A-Za-z_][A-Za-z0-9_]*)|\bstruct\s+([A-Za-z_][A-Za-z0-9_]*)", source):
917
+ is_abstract = bool(match.group(1))
918
+ name = match.group(2) or match.group(3)
919
+ symbol_name = f"{rel_path}::{name}"
920
+ node_type = "abstract_class" if is_abstract else "class"
921
+ line = _line_number(source, match.start())
922
+ nodes.append(_NodeSpec(node_type, symbol_name, {"path": rel_path, "line": line, "symbol": name}))
923
+ edges.append(_EdgeSpec("file", rel_path, node_type, symbol_name, "contains"))
924
+ if name.endswith("Controller"):
925
+ nodes.append(_NodeSpec("controller", symbol_name, {"path": rel_path, "line": line, "symbol": name}))
926
+ edges.append(_EdgeSpec(node_type, symbol_name, "controller", symbol_name, "tracks"))
927
+ symbol_count += 1
928
+
929
+ patterns = [
930
+ r"(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(",
931
+ r"(?:export\s+)?const\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:async\s*)?\(",
932
+ r"\bfunc\s+(?:\([^)]*\)\s*)?([A-Za-z_][A-Za-z0-9_]*)\s*\(",
933
+ r"\bfn\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(",
934
+ ]
935
+ for pattern in patterns:
936
+ for match in re.finditer(pattern, source):
937
+ name = match.group(1)
938
+ symbol_name = f"{rel_path}::{name}"
939
+ nodes.append(_NodeSpec("function", symbol_name, {"path": rel_path, "line": _line_number(source, match.start()), "symbol": name}))
940
+ edges.append(_EdgeSpec("file", rel_path, "function", symbol_name, "contains"))
941
+ symbol_count += 1
942
+
943
+ # --- HTTP route detection (Express/Fastify/Koa patterns) ---
944
+ for match in re.finditer(
945
+ r"(?:router|app|server)\.(get|post|put|patch|delete)\s*\(\s*['\"]([^'\"]+)['\"]",
946
+ source,
947
+ ):
948
+ method = match.group(1).upper()
949
+ path = match.group(2)
950
+ route_name = f"{method} {path}"
951
+ nodes.append(_NodeSpec("route", route_name, {
952
+ "path": rel_path, "line": _line_number(source, match.start()),
953
+ "method": method, "route_path": path, "framework": "express",
954
+ }))
955
+ edges.append(_EdgeSpec("file", rel_path, "route", route_name, "exposes_route"))
956
+ route_count += 1
957
+
958
+ # --- NestJS @Get/@Post etc. ---
959
+ # Detect controller prefix for full path reconstruction
960
+ controller_prefix = ""
961
+ ctrl_match = _NESTJS_CONTROLLER_PATTERN.search(source)
962
+ if ctrl_match:
963
+ controller_prefix = ctrl_match.group(1).rstrip("/")
964
+
965
+ for match in _NESTJS_ROUTE_PATTERN.finditer(source):
966
+ method = match.group(1).upper()
967
+ sub_path = (match.group(2) or "").strip("/")
968
+ full_path = f"/{controller_prefix}/{sub_path}".replace("//", "/").rstrip("/") or "/"
969
+ route_name = f"{method} {full_path}"
970
+ nodes.append(_NodeSpec("api_endpoint", route_name, {
971
+ "path": rel_path, "line": _line_number(source, match.start()),
972
+ "method": method, "route_path": full_path, "framework": "nestjs",
973
+ }))
974
+ edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
975
+ route_count += 1
976
+
977
+ # --- Spring Boot @GetMapping / @PostMapping etc. ---
978
+ for match in _SPRING_ROUTE_PATTERN.finditer(source):
979
+ annotation = match.group(1)
980
+ route_path = match.group(2)
981
+ method_map = {
982
+ "GetMapping": "GET", "PostMapping": "POST", "PutMapping": "PUT",
983
+ "PatchMapping": "PATCH", "DeleteMapping": "DELETE", "RequestMapping": "ANY",
984
+ }
985
+ method = method_map.get(annotation, "ANY")
986
+ route_name = f"{method} {route_path}"
987
+ nodes.append(_NodeSpec("api_endpoint", route_name, {
988
+ "path": rel_path, "line": _line_number(source, match.start()),
989
+ "method": method, "route_path": route_path, "framework": "spring",
990
+ }))
991
+ edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
992
+ route_count += 1
993
+
994
+ # --- Go Gin/Fiber/Chi route patterns ---
995
+ for match in _GO_ROUTE_PATTERN.finditer(source):
996
+ method = match.group(1).upper()
997
+ route_path = match.group(2)
998
+ route_name = f"{method} {route_path}"
999
+ nodes.append(_NodeSpec("api_endpoint", route_name, {
1000
+ "path": rel_path, "line": _line_number(source, match.start()),
1001
+ "method": method, "route_path": route_path, "framework": "gin",
1002
+ }))
1003
+ edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
1004
+ route_count += 1
1005
+
1006
+ # --- Rust axum/actix-web route attributes ---
1007
+ for match in _RUST_ROUTE_ATTR_PATTERN.finditer(source):
1008
+ route_path = match.group(1)
1009
+ # Infer method from attribute name (e.g. #[get("/")] → GET)
1010
+ attr_line = source[max(0, match.start() - 5):match.start() + 30]
1011
+ method = "GET"
1012
+ for m in ("post", "put", "patch", "delete"):
1013
+ if m in attr_line:
1014
+ method = m.upper()
1015
+ break
1016
+ route_name = f"{method} {route_path}"
1017
+ nodes.append(_NodeSpec("api_endpoint", route_name, {
1018
+ "path": rel_path, "line": _line_number(source, match.start()),
1019
+ "method": method, "route_path": route_path, "framework": "axum",
1020
+ }))
1021
+ edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
1022
+ route_count += 1
1023
+
1024
+ # --- WebSocket: NestJS @WebSocketGateway + @SubscribeMessage ---
1025
+ ws_gateway_path = ""
1026
+ gw_match = _WS_GATEWAY_PATTERN.search(source)
1027
+ if gw_match:
1028
+ ws_gateway_path = gw_match.group(1) or ""
1029
+ nodes.append(_NodeSpec("websocket_endpoint", f"WS {ws_gateway_path or '/'}", {
1030
+ "path": rel_path, "line": _line_number(source, gw_match.start()),
1031
+ "gateway_path": ws_gateway_path, "framework": "nestjs",
1032
+ }))
1033
+ edges.append(_EdgeSpec("file", rel_path, "websocket_endpoint", f"WS {ws_gateway_path or '/'}", "exposes_websocket"))
1034
+
1035
+ for match in _WS_SUBSCRIBE_PATTERN.finditer(source):
1036
+ event_name = match.group(1)
1037
+ ws_endpoint_name = f"WS:{event_name}"
1038
+ nodes.append(_NodeSpec("websocket_endpoint", ws_endpoint_name, {
1039
+ "path": rel_path, "line": _line_number(source, match.start()),
1040
+ "event": event_name, "framework": "nestjs",
1041
+ }))
1042
+ edges.append(_EdgeSpec("file", rel_path, "websocket_endpoint", ws_endpoint_name, "websocket"))
1043
+
1044
+ # --- Spring WebSocket @MessageMapping ---
1045
+ for match in _SPRING_WS_MAPPING_PATTERN.finditer(source):
1046
+ dest = match.group(1)
1047
+ ws_endpoint_name = f"WS:{dest}"
1048
+ nodes.append(_NodeSpec("websocket_endpoint", ws_endpoint_name, {
1049
+ "path": rel_path, "line": _line_number(source, match.start()),
1050
+ "event": dest, "framework": "spring",
1051
+ }))
1052
+ edges.append(_EdgeSpec("file", rel_path, "websocket_endpoint", ws_endpoint_name, "websocket"))
1053
+
1054
+ # --- External service calls (URL literals) ---
1055
+ for match in _URL_PATTERN.finditer(source):
1056
+ url = match.group(0)
1057
+ external_services.add(url)
1058
+ nodes.append(_NodeSpec("external_service_api", url, {
1059
+ "path": rel_path, "line": _line_number(source, match.start()),
1060
+ }))
1061
+ edges.append(_EdgeSpec("file", rel_path, "external_service_api", url, "uses_external_service"))
1062
+
1063
+ # --- Message queue: publish / consume calls ---
1064
+ for action in _MQ_PUBLISH_CALLS.union(_MQ_CONSUME_CALLS):
1065
+ for match in re.finditer(rf"\.{action}\s*\(\s*['\"]([^'\"]+)['\"]", source):
1066
+ topic_name = match.group(1)
1067
+ relation = "publishes" if action in _MQ_PUBLISH_CALLS else "consumes"
1068
+ node_type = "mq_producer" if action in _MQ_PUBLISH_CALLS else "mq_consumer"
1069
+ mq_topics.add(topic_name)
1070
+ nodes.append(_NodeSpec("mq_topic", topic_name, {
1071
+ "path": rel_path, "line": _line_number(source, match.start()),
1072
+ }))
1073
+ nodes.append(_NodeSpec(node_type, f"{node_type}:{topic_name}", {
1074
+ "path": rel_path, "line": _line_number(source, match.start()),
1075
+ "topic": topic_name,
1076
+ }))
1077
+ edges.append(_EdgeSpec("file", rel_path, node_type, f"{node_type}:{topic_name}", relation))
1078
+ edges.append(_EdgeSpec(node_type, f"{node_type}:{topic_name}", "mq_topic", topic_name, relation))
1079
+
1080
+ return {
1081
+ "symbol_count": symbol_count,
1082
+ "route_count": route_count,
1083
+ "external_service_count": len(external_services),
1084
+ "mq_topic_count": len(mq_topics),
1085
+ }, cls._dedupe_node_specs(nodes), cls._dedupe_edge_specs(edges)
1086
+
1087
+ @staticmethod
1088
+ def _extract_markdown_metadata(source: str) -> dict[str, Any]:
1089
+ headings = [match.group(2).strip() for match in _MARKDOWN_HEADING_PATTERN.finditer(source)]
1090
+ return {
1091
+ "heading_count": len(headings),
1092
+ "headings": headings[:20],
1093
+ "link_count": len(_URL_PATTERN.findall(source)),
1094
+ }
1095
+
1096
+ @staticmethod
1097
+ def _extract_markdown_task_nodes(source: str, rel_path: str) -> list[_NodeSpec]:
1098
+ nodes: list[_NodeSpec] = []
1099
+ for index, line in enumerate(source.splitlines(), start=1):
1100
+ match = _MARKDOWN_TASK_PATTERN.match(line)
1101
+ if match is None:
1102
+ continue
1103
+ nodes.append(_NodeSpec("todo", f"{rel_path}::TODO:{index}", {"path": rel_path, "line": index, "text": match.group(1).strip()}))
1104
+ return nodes
1105
+
1106
+ @staticmethod
1107
+ def _extract_structured_metadata(source: str, suffix: str) -> dict[str, Any]:
1108
+ keys: list[str] = []
1109
+ if suffix == ".json":
1110
+ try:
1111
+ parsed = json.loads(source)
1112
+ if isinstance(parsed, dict):
1113
+ keys = sorted(str(key) for key in parsed.keys())
1114
+ except json.JSONDecodeError:
1115
+ keys = []
1116
+ elif suffix == ".toml":
1117
+ try:
1118
+ parsed = tomllib.loads(source)
1119
+ if isinstance(parsed, dict):
1120
+ keys = sorted(str(key) for key in parsed.keys())
1121
+ except tomllib.TOMLDecodeError:
1122
+ keys = []
1123
+ else:
1124
+ keys = sorted({match.group(1) for match in _YAML_KEY_PATTERN.finditer(source)})
1125
+ if not keys:
1126
+ keys = sorted({match.group(1) for match in _INI_KEY_PATTERN.finditer(source)})
1127
+ return {"top_level_keys": keys[:50], "top_level_key_count": len(keys)}
1128
+
1129
+ @staticmethod
1130
+ def _extract_todo_nodes(source: str, rel_path: str) -> list[_NodeSpec]:
1131
+ nodes: list[_NodeSpec] = []
1132
+ for index, line in enumerate(source.splitlines(), start=1):
1133
+ match = _TODO_PATTERN.search(line)
1134
+ if match is None:
1135
+ continue
1136
+ text = (match.group(1) or "").strip() or "TODO"
1137
+ nodes.append(_NodeSpec("todo", f"{rel_path}::TODO:{index}", {"path": rel_path, "line": index, "text": text}))
1138
+ return nodes
1139
+
1140
+ @staticmethod
1141
+ def _qualified_symbol_name(rel_path: str, symbol_name: str, class_stack: list[tuple[str, str]]) -> str:
1142
+ if not class_stack:
1143
+ return f"{rel_path}::{symbol_name}"
1144
+ owner_name = class_stack[-1][1].split("::", 1)[1]
1145
+ return f"{rel_path}::{owner_name}.{symbol_name}"
1146
+
1147
+ @staticmethod
1148
+ def _class_node_type(node: ast.ClassDef) -> str:
1149
+ base_names = {RepoScanner._base_name(base) for base in node.bases}
1150
+ has_abstract_method = any(
1151
+ isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef))
1152
+ and any(RepoScanner._base_name(dec) == "abstractmethod" for dec in child.decorator_list)
1153
+ for child in node.body
1154
+ )
1155
+ if "Protocol" in base_names:
1156
+ return "interface"
1157
+ if "ABC" in base_names or "ABCMeta" in base_names or has_abstract_method:
1158
+ return "abstract_class"
1159
+ return "class"
1160
+
1161
+ @staticmethod
1162
+ def _is_controller_class(node: ast.ClassDef) -> bool:
1163
+ if node.name.endswith("Controller"):
1164
+ return True
1165
+ for child in node.body:
1166
+ if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)) and RepoScanner._route_info(child) is not None:
1167
+ return True
1168
+ return False
1169
+
1170
+ @staticmethod
1171
+ def _route_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> tuple[str, str] | None:
1172
+ for decorator in node.decorator_list:
1173
+ if not isinstance(decorator, ast.Call) or not isinstance(decorator.func, ast.Attribute):
1174
+ continue
1175
+ method_name = decorator.func.attr.lower()
1176
+ if method_name not in _HTTP_ROUTE_DECORATORS:
1177
+ continue
1178
+ route_path = RepoScanner._string_arg_value(decorator.args)
1179
+ if route_path is None:
1180
+ continue
1181
+ method = RepoScanner._route_methods_from_keywords(decorator.keywords) if method_name == "route" else method_name.upper()
1182
+ return method, route_path
1183
+ return None
1184
+
1185
+ @staticmethod
1186
+ def _route_methods_from_keywords(keywords: list[ast.keyword]) -> str:
1187
+ for keyword in keywords:
1188
+ if keyword.arg != "methods" or not isinstance(keyword.value, (ast.List, ast.Tuple)):
1189
+ continue
1190
+ methods = [elt.value.upper() for elt in keyword.value.elts if isinstance(elt, ast.Constant) and isinstance(elt.value, str)]
1191
+ if methods:
1192
+ return "/".join(methods)
1193
+ return "ROUTE"
1194
+
1195
+ @staticmethod
1196
+ def _string_arg_value(args: list[ast.expr]) -> str | None:
1197
+ if not args:
1198
+ return None
1199
+ first = args[0]
1200
+ if isinstance(first, ast.Constant) and isinstance(first.value, str):
1201
+ return first.value
1202
+ return None
1203
+
1204
+ @staticmethod
1205
+ def _external_service_from_call(call: ast.Call, http_aliases: set[str]) -> str | None:
1206
+ func = call.func
1207
+ if isinstance(func, ast.Attribute) and isinstance(func.value, ast.Name):
1208
+ if func.value.id in http_aliases and func.attr.lower() in _HTTP_ROUTE_DECORATORS.union({"request"}):
1209
+ return RepoScanner._extract_url_from_call(call)
1210
+ if isinstance(func, ast.Name) and func.id in http_aliases:
1211
+ return RepoScanner._extract_url_from_call(call)
1212
+ return None
1213
+
1214
+ @staticmethod
1215
+ def _extract_url_from_call(call: ast.Call) -> str | None:
1216
+ for candidate in [*call.args, *(kw.value for kw in call.keywords if kw.arg == "url")]:
1217
+ if isinstance(candidate, ast.Constant) and isinstance(candidate.value, str) and _URL_PATTERN.match(candidate.value):
1218
+ return candidate.value
1219
+ return None
1220
+
1221
+ @staticmethod
1222
+ def _mq_topic_from_call(call: ast.Call) -> tuple[str, str] | None:
1223
+ func = call.func
1224
+ if not isinstance(func, ast.Attribute):
1225
+ return None
1226
+ action = func.attr.lower()
1227
+ if action in _MQ_PUBLISH_CALLS:
1228
+ relation = "publishes"
1229
+ elif action in _MQ_CONSUME_CALLS:
1230
+ relation = "consumes"
1231
+ else:
1232
+ return None
1233
+ topic_name = RepoScanner._string_arg_value(call.args)
1234
+ if topic_name is None:
1235
+ return None
1236
+ return relation, topic_name
1237
+
1238
+ @staticmethod
1239
+ def _base_name(node: ast.expr) -> str:
1240
+ if isinstance(node, ast.Name):
1241
+ return node.id
1242
+ if isinstance(node, ast.Attribute):
1243
+ return node.attr
1244
+ if isinstance(node, ast.Subscript):
1245
+ return RepoScanner._base_name(node.value)
1246
+ return ""
1247
+
1248
+ @staticmethod
1249
+ def _dedupe_node_specs(nodes: list[_NodeSpec]) -> list[_NodeSpec]:
1250
+ deduped: dict[tuple[str, str], _NodeSpec] = {}
1251
+ for node in nodes:
1252
+ key = (node.node_type, node.name)
1253
+ existing = deduped.get(key)
1254
+ deduped[key] = node if existing is None else _NodeSpec(node.node_type, node.name, {**existing.metadata, **node.metadata})
1255
+ return list(deduped.values())
1256
+
1257
+ @staticmethod
1258
+ def _dedupe_edge_specs(edges: list[_EdgeSpec]) -> list[_EdgeSpec]:
1259
+ deduped: dict[tuple[str, str, str, str, str], _EdgeSpec] = {}
1260
+ for edge in edges:
1261
+ deduped[(edge.source_type, edge.source_name, edge.target_type, edge.target_name, edge.relation)] = edge
1262
+ return list(deduped.values())
1263
+
1264
+
1265
+ def _line_number(source: str, offset: int) -> int:
1266
+ return source.count("\n", 0, offset) + 1