minder-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- minder/__init__.py +12 -0
- minder/api/routers/prompts.py +177 -0
- minder/application/__init__.py +1 -0
- minder/application/admin/__init__.py +11 -0
- minder/application/admin/dto.py +453 -0
- minder/application/admin/jobs.py +327 -0
- minder/application/admin/use_cases.py +1895 -0
- minder/auth/__init__.py +12 -0
- minder/auth/context.py +26 -0
- minder/auth/middleware.py +70 -0
- minder/auth/principal.py +59 -0
- minder/auth/rate_limiter.py +89 -0
- minder/auth/rbac.py +60 -0
- minder/auth/service.py +541 -0
- minder/bootstrap/__init__.py +9 -0
- minder/bootstrap/providers.py +109 -0
- minder/bootstrap/transport.py +807 -0
- minder/cache/__init__.py +10 -0
- minder/cache/providers.py +140 -0
- minder/chunking/__init__.py +4 -0
- minder/chunking/code_splitter.py +184 -0
- minder/chunking/splitter.py +136 -0
- minder/cli.py +1542 -0
- minder/config.py +179 -0
- minder/continuity.py +363 -0
- minder/dev.py +160 -0
- minder/embedding/__init__.py +9 -0
- minder/embedding/base.py +7 -0
- minder/embedding/local.py +65 -0
- minder/embedding/openai.py +7 -0
- minder/graph/__init__.py +11 -0
- minder/graph/edges.py +13 -0
- minder/graph/executor.py +127 -0
- minder/graph/graph.py +263 -0
- minder/graph/nodes/__init__.py +27 -0
- minder/graph/nodes/evaluator.py +21 -0
- minder/graph/nodes/guard.py +64 -0
- minder/graph/nodes/llm.py +59 -0
- minder/graph/nodes/planning.py +30 -0
- minder/graph/nodes/reasoning.py +87 -0
- minder/graph/nodes/reranker.py +141 -0
- minder/graph/nodes/retriever.py +86 -0
- minder/graph/nodes/verification.py +230 -0
- minder/graph/nodes/workflow_planner.py +250 -0
- minder/graph/runtime.py +15 -0
- minder/graph/state.py +26 -0
- minder/llm/__init__.py +5 -0
- minder/llm/base.py +14 -0
- minder/llm/local.py +381 -0
- minder/llm/openai.py +89 -0
- minder/models/__init__.py +109 -0
- minder/models/base.py +10 -0
- minder/models/client.py +137 -0
- minder/models/document.py +34 -0
- minder/models/error.py +32 -0
- minder/models/graph.py +114 -0
- minder/models/history.py +32 -0
- minder/models/job.py +62 -0
- minder/models/prompt.py +41 -0
- minder/models/repository.py +62 -0
- minder/models/rule.py +68 -0
- minder/models/session.py +51 -0
- minder/models/skill.py +52 -0
- minder/models/user.py +41 -0
- minder/models/workflow.py +35 -0
- minder/observability/__init__.py +57 -0
- minder/observability/audit.py +243 -0
- minder/observability/logging.py +253 -0
- minder/observability/metrics.py +448 -0
- minder/observability/tracing.py +215 -0
- minder/presentation/__init__.py +1 -0
- minder/presentation/http/__init__.py +1 -0
- minder/presentation/http/admin/__init__.py +3 -0
- minder/presentation/http/admin/api.py +1309 -0
- minder/presentation/http/admin/context.py +94 -0
- minder/presentation/http/admin/dashboard.py +111 -0
- minder/presentation/http/admin/jobs.py +208 -0
- minder/presentation/http/admin/memories.py +185 -0
- minder/presentation/http/admin/prompts.py +219 -0
- minder/presentation/http/admin/routes.py +127 -0
- minder/presentation/http/admin/runtime.py +650 -0
- minder/presentation/http/admin/search.py +368 -0
- minder/presentation/http/admin/skills.py +230 -0
- minder/prompts/__init__.py +646 -0
- minder/prompts/formatter.py +142 -0
- minder/resources/__init__.py +318 -0
- minder/retrieval/__init__.py +5 -0
- minder/retrieval/hybrid.py +178 -0
- minder/retrieval/mmr.py +116 -0
- minder/retrieval/multi_hop.py +115 -0
- minder/runtime.py +15 -0
- minder/server.py +145 -0
- minder/store/__init__.py +64 -0
- minder/store/document.py +115 -0
- minder/store/error.py +82 -0
- minder/store/feedback.py +114 -0
- minder/store/graph.py +588 -0
- minder/store/history.py +57 -0
- minder/store/interfaces.py +512 -0
- minder/store/milvus/__init__.py +11 -0
- minder/store/milvus/client.py +26 -0
- minder/store/milvus/collections.py +15 -0
- minder/store/milvus/vector_store.py +232 -0
- minder/store/mongodb/__init__.py +11 -0
- minder/store/mongodb/client.py +49 -0
- minder/store/mongodb/indexes.py +90 -0
- minder/store/mongodb/operational_store.py +993 -0
- minder/store/relational.py +1087 -0
- minder/store/repo_state.py +58 -0
- minder/store/rule.py +93 -0
- minder/store/vector.py +79 -0
- minder/tools/__init__.py +47 -0
- minder/tools/auth.py +94 -0
- minder/tools/graph.py +839 -0
- minder/tools/ingest.py +353 -0
- minder/tools/memory.py +381 -0
- minder/tools/query.py +307 -0
- minder/tools/registry.py +269 -0
- minder/tools/repo_scanner.py +1266 -0
- minder/tools/search.py +15 -0
- minder/tools/session.py +316 -0
- minder/tools/skills.py +899 -0
- minder/tools/workflow.py +215 -0
- minder/transport/__init__.py +4 -0
- minder/transport/base.py +286 -0
- minder/transport/sse.py +252 -0
- minder/transport/stdio.py +29 -0
- minder_cli-0.2.0.dist-info/METADATA +318 -0
- minder_cli-0.2.0.dist-info/RECORD +132 -0
- minder_cli-0.2.0.dist-info/WHEEL +4 -0
- minder_cli-0.2.0.dist-info/entry_points.txt +2 -0
- minder_cli-0.2.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,1266 @@
|
|
|
1
|
+
"""Repository graph extraction and sync-payload building."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import ast
|
|
6
|
+
import json
|
|
7
|
+
import re
|
|
8
|
+
import subprocess
|
|
9
|
+
import tomllib
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import TYPE_CHECKING, Any
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from minder.store.graph import KnowledgeGraphStore
|
|
16
|
+
|
|
17
|
+
_SOURCE_SUFFIXES = {
|
|
18
|
+
".py",
|
|
19
|
+
".ts",
|
|
20
|
+
".tsx",
|
|
21
|
+
".js",
|
|
22
|
+
".jsx",
|
|
23
|
+
".java",
|
|
24
|
+
".go",
|
|
25
|
+
".rs",
|
|
26
|
+
".md",
|
|
27
|
+
".json",
|
|
28
|
+
".toml",
|
|
29
|
+
".yaml",
|
|
30
|
+
".yml",
|
|
31
|
+
".txt",
|
|
32
|
+
}
|
|
33
|
+
_PYTHON_SUFFIXES = {".py"}
|
|
34
|
+
_SCRIPT_SUFFIXES = {".ts", ".tsx", ".js", ".jsx", ".java", ".go", ".rs"}
|
|
35
|
+
_MARKDOWN_SUFFIXES = {".md"}
|
|
36
|
+
_STRUCTURED_SUFFIXES = {".json", ".toml", ".yaml", ".yml"}
|
|
37
|
+
_SERVICE_MARKERS = {"pyproject.toml", "package.json", "go.mod", "Cargo.toml"}
|
|
38
|
+
_HTTP_ROUTE_DECORATORS = {"get", "post", "put", "patch", "delete", "route"}
|
|
39
|
+
_MQ_PUBLISH_CALLS = {"publish", "send", "produce", "emit"}
|
|
40
|
+
_MQ_CONSUME_CALLS = {"consume", "subscribe", "listen"}
|
|
41
|
+
|
|
42
|
+
# Spring Boot route annotation detection (Java)
|
|
43
|
+
_SPRING_ROUTE_PATTERN = re.compile(
|
|
44
|
+
r'@(GetMapping|PostMapping|PutMapping|PatchMapping|DeleteMapping|RequestMapping)'
|
|
45
|
+
r'\s*\(\s*(?:value\s*=\s*)?["\']([^"\']+)["\']',
|
|
46
|
+
re.MULTILINE,
|
|
47
|
+
)
|
|
48
|
+
# NestJS decorator detection (TypeScript) — @Get/@Post etc. at class level prefix
|
|
49
|
+
_NESTJS_CONTROLLER_PATTERN = re.compile(
|
|
50
|
+
r'@Controller\s*\(\s*["\']([^"\']*)["\']',
|
|
51
|
+
re.MULTILINE,
|
|
52
|
+
)
|
|
53
|
+
_NESTJS_ROUTE_PATTERN = re.compile(
|
|
54
|
+
r'@(Get|Post|Put|Patch|Delete|All)\s*\(\s*(?:["\']([^"\']*)["\'])?\s*\)',
|
|
55
|
+
re.MULTILINE,
|
|
56
|
+
)
|
|
57
|
+
# WebSocket endpoint detection
|
|
58
|
+
_WS_GATEWAY_PATTERN = re.compile(
|
|
59
|
+
r'@WebSocketGateway\s*\(\s*(?:(?:path\s*=\s*)?["\']([^"\']*)["\'])?\s*\)',
|
|
60
|
+
re.MULTILINE,
|
|
61
|
+
)
|
|
62
|
+
_WS_SUBSCRIBE_PATTERN = re.compile(
|
|
63
|
+
r'@SubscribeMessage\s*\(\s*["\']([^"\']+)["\']',
|
|
64
|
+
re.MULTILINE,
|
|
65
|
+
)
|
|
66
|
+
_SPRING_WS_MAPPING_PATTERN = re.compile(
|
|
67
|
+
r'@MessageMapping\s*\(\s*["\']([^"\']+)["\']',
|
|
68
|
+
re.MULTILINE,
|
|
69
|
+
)
|
|
70
|
+
# Go/Gin/Fiber route patterns
|
|
71
|
+
_GO_ROUTE_PATTERN = re.compile(
|
|
72
|
+
r'(?:r|router|app|engine)\.(GET|POST|PUT|PATCH|DELETE)\s*\(\s*"([^"]+)"',
|
|
73
|
+
re.MULTILINE,
|
|
74
|
+
)
|
|
75
|
+
# Rust/axum/actix-web route patterns
|
|
76
|
+
_RUST_ROUTE_ATTR_PATTERN = re.compile(
|
|
77
|
+
r'#\[(?:get|post|put|patch|delete)\s*\(\s*"([^"]+)"\s*\)\]',
|
|
78
|
+
re.MULTILINE,
|
|
79
|
+
)
|
|
80
|
+
_TODO_PATTERN = re.compile(r"(?:#|//|/\*+|\*+)\s*TODO\s*:?\s*(.+)?", re.IGNORECASE)
|
|
81
|
+
_MARKDOWN_TASK_PATTERN = re.compile(r"^\s*[-*]\s+\[\s\]\s+(.+)$")
|
|
82
|
+
_URL_PATTERN = re.compile(r"https?://[^\s\"')]+")
|
|
83
|
+
_MARKDOWN_HEADING_PATTERN = re.compile(r"^(#{1,6})\s+(.+)$", re.MULTILINE)
|
|
84
|
+
_YAML_KEY_PATTERN = re.compile(r"^([A-Za-z0-9_.-]+)\s*:", re.MULTILINE)
|
|
85
|
+
_INI_KEY_PATTERN = re.compile(r"^([A-Za-z0-9_.-]+)\s*=", re.MULTILINE)
|
|
86
|
+
_LANGUAGE_BY_SUFFIX = {
|
|
87
|
+
".py": "python",
|
|
88
|
+
".ts": "typescript",
|
|
89
|
+
".tsx": "typescript",
|
|
90
|
+
".js": "javascript",
|
|
91
|
+
".jsx": "javascript",
|
|
92
|
+
".java": "java",
|
|
93
|
+
".go": "go",
|
|
94
|
+
".rs": "rust",
|
|
95
|
+
".md": "markdown",
|
|
96
|
+
".json": "json",
|
|
97
|
+
".toml": "toml",
|
|
98
|
+
".yaml": "yaml",
|
|
99
|
+
".yml": "yaml",
|
|
100
|
+
".txt": "text",
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass(slots=True)
|
|
105
|
+
class _NodeSpec:
|
|
106
|
+
node_type: str
|
|
107
|
+
name: str
|
|
108
|
+
metadata: dict[str, Any]
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass(slots=True)
|
|
112
|
+
class _EdgeSpec:
|
|
113
|
+
source_type: str
|
|
114
|
+
source_name: str
|
|
115
|
+
target_type: str
|
|
116
|
+
target_name: str
|
|
117
|
+
relation: str
|
|
118
|
+
weight: float = 1.0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class RepoScanner:
|
|
122
|
+
def __init__(
|
|
123
|
+
self,
|
|
124
|
+
graph_store: "KnowledgeGraphStore",
|
|
125
|
+
repo_root: str,
|
|
126
|
+
*,
|
|
127
|
+
project: str | None = None,
|
|
128
|
+
) -> None:
|
|
129
|
+
self._store = graph_store
|
|
130
|
+
self._root = Path(repo_root).resolve()
|
|
131
|
+
self._project = project or self._root.name
|
|
132
|
+
self._git_metadata_cache: dict[str, dict[str, Any]] = {}
|
|
133
|
+
self._git_line_commit_cache: dict[tuple[str, int], dict[str, str] | None] = {}
|
|
134
|
+
self._git_commit_detail_cache: dict[str, dict[str, str]] = {}
|
|
135
|
+
|
|
136
|
+
async def scan(self) -> dict[str, Any]:
|
|
137
|
+
service_dirs = self._discover_service_boundaries()
|
|
138
|
+
source_files = self._discover_source_files()
|
|
139
|
+
|
|
140
|
+
nodes_upserted = 0
|
|
141
|
+
edges_upserted = 0
|
|
142
|
+
service_node_ids: dict[Path, Any] = {}
|
|
143
|
+
for svc_dir in service_dirs:
|
|
144
|
+
rel = str(svc_dir.relative_to(self._root))
|
|
145
|
+
svc_node = await self._store.upsert_node(
|
|
146
|
+
node_type="service",
|
|
147
|
+
name=rel,
|
|
148
|
+
metadata={"project": self._project, "path": str(svc_dir)},
|
|
149
|
+
)
|
|
150
|
+
service_node_ids[svc_dir] = svc_node.id
|
|
151
|
+
nodes_upserted += 1
|
|
152
|
+
|
|
153
|
+
for file_path in source_files:
|
|
154
|
+
rel_path = str(file_path.relative_to(self._root))
|
|
155
|
+
file_metadata, extracted_nodes, extracted_edges = self._extract_file_metadata(file_path, rel_path)
|
|
156
|
+
change_metadata = self._git_file_change_metadata(rel_path)
|
|
157
|
+
common_metadata = self._build_file_scoped_metadata(
|
|
158
|
+
rel_path=rel_path,
|
|
159
|
+
language=str(file_metadata.get("language", "text") or "text"),
|
|
160
|
+
change_metadata=change_metadata,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
file_node = await self._store.upsert_node(
|
|
164
|
+
node_type="file",
|
|
165
|
+
name=rel_path,
|
|
166
|
+
metadata={"project": self._project, **common_metadata, **file_metadata},
|
|
167
|
+
)
|
|
168
|
+
nodes_upserted += 1
|
|
169
|
+
known_node_ids: dict[tuple[str, str], Any] = {("file", rel_path): file_node.id}
|
|
170
|
+
|
|
171
|
+
owning_svc = self._find_owning_service(file_path, service_dirs)
|
|
172
|
+
if owning_svc is not None:
|
|
173
|
+
await self._store.upsert_edge(
|
|
174
|
+
source_id=service_node_ids[owning_svc],
|
|
175
|
+
target_id=file_node.id,
|
|
176
|
+
relation="contains",
|
|
177
|
+
)
|
|
178
|
+
edges_upserted += 1
|
|
179
|
+
|
|
180
|
+
for module_name in self._extract_imports(file_path):
|
|
181
|
+
mod_node = await self._store.upsert_node(
|
|
182
|
+
node_type="module",
|
|
183
|
+
name=module_name,
|
|
184
|
+
metadata={"project": self._project},
|
|
185
|
+
)
|
|
186
|
+
nodes_upserted += 1
|
|
187
|
+
known_node_ids[("module", module_name)] = mod_node.id
|
|
188
|
+
|
|
189
|
+
await self._store.upsert_edge(
|
|
190
|
+
source_id=file_node.id,
|
|
191
|
+
target_id=mod_node.id,
|
|
192
|
+
relation="imports",
|
|
193
|
+
)
|
|
194
|
+
edges_upserted += 1
|
|
195
|
+
|
|
196
|
+
if owning_svc is not None:
|
|
197
|
+
top_pkg = module_name.split(".")[0].split("/")[0].split(":")[0]
|
|
198
|
+
for svc_dir, svc_node_id in service_node_ids.items():
|
|
199
|
+
if svc_dir != owning_svc and svc_dir.name == top_pkg:
|
|
200
|
+
await self._store.upsert_edge(
|
|
201
|
+
source_id=service_node_ids[owning_svc],
|
|
202
|
+
target_id=svc_node_id,
|
|
203
|
+
relation="depends_on",
|
|
204
|
+
)
|
|
205
|
+
edges_upserted += 1
|
|
206
|
+
|
|
207
|
+
for node_spec in extracted_nodes:
|
|
208
|
+
node_common_metadata = self._build_node_scoped_metadata(
|
|
209
|
+
rel_path=rel_path,
|
|
210
|
+
base_metadata=common_metadata,
|
|
211
|
+
node_metadata=node_spec.metadata,
|
|
212
|
+
)
|
|
213
|
+
persisted = await self._store.upsert_node(
|
|
214
|
+
node_type=node_spec.node_type,
|
|
215
|
+
name=node_spec.name,
|
|
216
|
+
metadata={"project": self._project, **node_common_metadata, **node_spec.metadata},
|
|
217
|
+
)
|
|
218
|
+
known_node_ids[(node_spec.node_type, node_spec.name)] = persisted.id
|
|
219
|
+
nodes_upserted += 1
|
|
220
|
+
|
|
221
|
+
for edge_spec in extracted_edges:
|
|
222
|
+
source_id = known_node_ids.get((edge_spec.source_type, edge_spec.source_name))
|
|
223
|
+
target_id = known_node_ids.get((edge_spec.target_type, edge_spec.target_name))
|
|
224
|
+
if source_id is None or target_id is None:
|
|
225
|
+
continue
|
|
226
|
+
await self._store.upsert_edge(
|
|
227
|
+
source_id=source_id,
|
|
228
|
+
target_id=target_id,
|
|
229
|
+
relation=edge_spec.relation,
|
|
230
|
+
weight=edge_spec.weight,
|
|
231
|
+
)
|
|
232
|
+
edges_upserted += 1
|
|
233
|
+
|
|
234
|
+
return {
|
|
235
|
+
"project": self._project,
|
|
236
|
+
"files_scanned": len(source_files),
|
|
237
|
+
"nodes_upserted": nodes_upserted,
|
|
238
|
+
"edges_upserted": edges_upserted,
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
@classmethod
|
|
242
|
+
def build_sync_payload(
|
|
243
|
+
cls,
|
|
244
|
+
repo_root: str,
|
|
245
|
+
*,
|
|
246
|
+
project: str | None = None,
|
|
247
|
+
branch: str | None = None,
|
|
248
|
+
diff_base: str | None = None,
|
|
249
|
+
changed_files: list[str] | None = None,
|
|
250
|
+
deleted_files: list[str] | None = None,
|
|
251
|
+
branch_relationships: list[dict[str, Any]] | None = None,
|
|
252
|
+
payload_version: str = "2026-04-15",
|
|
253
|
+
source: str = "minder-cli",
|
|
254
|
+
) -> dict[str, Any]:
|
|
255
|
+
builder = cls.__new__(cls)
|
|
256
|
+
builder._root = Path(repo_root).resolve()
|
|
257
|
+
builder._project = project or builder._root.name
|
|
258
|
+
builder._git_metadata_cache = {}
|
|
259
|
+
builder._git_line_commit_cache = {}
|
|
260
|
+
builder._git_commit_detail_cache = {}
|
|
261
|
+
|
|
262
|
+
service_dirs = builder._discover_service_boundaries()
|
|
263
|
+
source_files = builder._resolve_source_files(changed_files)
|
|
264
|
+
nodes: list[dict[str, Any]] = []
|
|
265
|
+
edges: list[dict[str, Any]] = []
|
|
266
|
+
seen_nodes: set[tuple[str, str]] = set()
|
|
267
|
+
seen_edges: set[tuple[str, str, str, str, str]] = set()
|
|
268
|
+
|
|
269
|
+
def add_node(node_type: str, name: str, metadata: dict[str, Any]) -> None:
|
|
270
|
+
key = (node_type, name)
|
|
271
|
+
if key in seen_nodes:
|
|
272
|
+
for existing in nodes:
|
|
273
|
+
if existing["node_type"] == node_type and existing["name"] == name:
|
|
274
|
+
existing["metadata"] = {**existing["metadata"], **metadata}
|
|
275
|
+
return
|
|
276
|
+
seen_nodes.add(key)
|
|
277
|
+
nodes.append({"node_type": node_type, "name": name, "metadata": metadata})
|
|
278
|
+
|
|
279
|
+
def add_edge(edge_spec: _EdgeSpec) -> None:
|
|
280
|
+
key = (
|
|
281
|
+
edge_spec.source_type,
|
|
282
|
+
edge_spec.source_name,
|
|
283
|
+
edge_spec.target_type,
|
|
284
|
+
edge_spec.target_name,
|
|
285
|
+
edge_spec.relation,
|
|
286
|
+
)
|
|
287
|
+
if key in seen_edges:
|
|
288
|
+
return
|
|
289
|
+
seen_edges.add(key)
|
|
290
|
+
edges.append(
|
|
291
|
+
{
|
|
292
|
+
"source": {"node_type": edge_spec.source_type, "name": edge_spec.source_name},
|
|
293
|
+
"target": {"node_type": edge_spec.target_type, "name": edge_spec.target_name},
|
|
294
|
+
"relation": edge_spec.relation,
|
|
295
|
+
"weight": edge_spec.weight,
|
|
296
|
+
}
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
for file_path in source_files:
|
|
300
|
+
rel_path = str(file_path.relative_to(builder._root))
|
|
301
|
+
file_metadata, extracted_nodes, extracted_edges = builder._extract_file_metadata(file_path, rel_path)
|
|
302
|
+
change_metadata = builder._git_file_change_metadata(rel_path)
|
|
303
|
+
common_metadata = builder._build_file_scoped_metadata(
|
|
304
|
+
rel_path=rel_path,
|
|
305
|
+
language=str(file_metadata.get("language", "text") or "text"),
|
|
306
|
+
change_metadata=change_metadata,
|
|
307
|
+
)
|
|
308
|
+
add_node("file", rel_path, {"project": builder._project, **common_metadata, **file_metadata})
|
|
309
|
+
|
|
310
|
+
owning_svc = builder._find_owning_service(file_path, service_dirs)
|
|
311
|
+
if owning_svc is not None:
|
|
312
|
+
service_name = str(owning_svc.relative_to(builder._root))
|
|
313
|
+
add_node("service", service_name, {"project": builder._project, "path": str(owning_svc)})
|
|
314
|
+
add_edge(_EdgeSpec("service", service_name, "file", rel_path, "contains"))
|
|
315
|
+
|
|
316
|
+
for module_name in builder._extract_imports(file_path):
|
|
317
|
+
add_node("module", module_name, {"project": builder._project})
|
|
318
|
+
add_edge(_EdgeSpec("file", rel_path, "module", module_name, "imports"))
|
|
319
|
+
|
|
320
|
+
for node_spec in extracted_nodes:
|
|
321
|
+
node_common_metadata = builder._build_node_scoped_metadata(
|
|
322
|
+
rel_path=rel_path,
|
|
323
|
+
base_metadata=common_metadata,
|
|
324
|
+
node_metadata=node_spec.metadata,
|
|
325
|
+
)
|
|
326
|
+
add_node(
|
|
327
|
+
node_spec.node_type,
|
|
328
|
+
node_spec.name,
|
|
329
|
+
{"project": builder._project, **node_common_metadata, **node_spec.metadata},
|
|
330
|
+
)
|
|
331
|
+
for edge_spec in extracted_edges:
|
|
332
|
+
add_edge(edge_spec)
|
|
333
|
+
|
|
334
|
+
return {
|
|
335
|
+
"payload_version": payload_version,
|
|
336
|
+
"source": source,
|
|
337
|
+
"repo_path": str(builder._root),
|
|
338
|
+
"branch": branch,
|
|
339
|
+
"diff_base": diff_base,
|
|
340
|
+
"deleted_files": sorted(deleted_files or []),
|
|
341
|
+
"sync_metadata": {
|
|
342
|
+
"project": builder._project,
|
|
343
|
+
"changed_file_count": len(source_files),
|
|
344
|
+
"changed_files": [str(file_path.relative_to(builder._root)) for file_path in source_files],
|
|
345
|
+
"deleted_file_count": len(deleted_files or []),
|
|
346
|
+
"branch_relationship_count": len(branch_relationships or []),
|
|
347
|
+
},
|
|
348
|
+
"nodes": nodes,
|
|
349
|
+
"edges": edges,
|
|
350
|
+
"branch_relationships": list(branch_relationships or []),
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
def _discover_service_boundaries(self) -> list[Path]:
|
|
354
|
+
service_dirs: list[Path] = []
|
|
355
|
+
for marker in _SERVICE_MARKERS:
|
|
356
|
+
for marker_path in self._root.rglob(marker):
|
|
357
|
+
if any(part.startswith(".") for part in marker_path.parts):
|
|
358
|
+
continue
|
|
359
|
+
svc_dir = marker_path.parent
|
|
360
|
+
if svc_dir not in service_dirs:
|
|
361
|
+
service_dirs.append(svc_dir)
|
|
362
|
+
return sorted(service_dirs, key=lambda path: len(path.parts), reverse=True)
|
|
363
|
+
|
|
364
|
+
def _discover_source_files(self) -> list[Path]:
|
|
365
|
+
files: list[Path] = []
|
|
366
|
+
for suffix in _SOURCE_SUFFIXES:
|
|
367
|
+
for path in self._root.rglob(f"*{suffix}"):
|
|
368
|
+
if any(part.startswith(".") or part == "__pycache__" for part in path.parts):
|
|
369
|
+
continue
|
|
370
|
+
if path.is_file():
|
|
371
|
+
files.append(path)
|
|
372
|
+
return sorted(set(files))
|
|
373
|
+
|
|
374
|
+
def _resolve_source_files(self, changed_files: list[str] | None) -> list[Path]:
|
|
375
|
+
if not changed_files:
|
|
376
|
+
return self._discover_source_files()
|
|
377
|
+
files: list[Path] = []
|
|
378
|
+
for changed_file in changed_files:
|
|
379
|
+
candidate = (self._root / changed_file).resolve()
|
|
380
|
+
if candidate.is_file() and candidate.suffix.lower() in _SOURCE_SUFFIXES:
|
|
381
|
+
files.append(candidate)
|
|
382
|
+
return sorted(set(files))
|
|
383
|
+
|
|
384
|
+
@staticmethod
|
|
385
|
+
def _find_owning_service(file_path: Path, service_dirs: list[Path]) -> Path | None:
|
|
386
|
+
for svc_dir in service_dirs:
|
|
387
|
+
try:
|
|
388
|
+
file_path.relative_to(svc_dir)
|
|
389
|
+
return svc_dir
|
|
390
|
+
except ValueError:
|
|
391
|
+
continue
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
def _extract_file_metadata(
|
|
395
|
+
self,
|
|
396
|
+
file_path: Path,
|
|
397
|
+
rel_path: str,
|
|
398
|
+
) -> tuple[dict[str, Any], list[_NodeSpec], list[_EdgeSpec]]:
|
|
399
|
+
source = file_path.read_text(encoding="utf-8", errors="replace")
|
|
400
|
+
suffix = file_path.suffix.lower()
|
|
401
|
+
language = _LANGUAGE_BY_SUFFIX.get(suffix, suffix.lstrip("."))
|
|
402
|
+
file_metadata: dict[str, Any] = {
|
|
403
|
+
"path": rel_path,
|
|
404
|
+
"language": language,
|
|
405
|
+
"line_count": source.count("\n") + (1 if source else 0),
|
|
406
|
+
"size_bytes": file_path.stat().st_size,
|
|
407
|
+
}
|
|
408
|
+
nodes: list[_NodeSpec] = []
|
|
409
|
+
edges: list[_EdgeSpec] = []
|
|
410
|
+
|
|
411
|
+
if suffix in _PYTHON_SUFFIXES:
|
|
412
|
+
python_nodes, python_edges = self._extract_python_metadata(file_path, rel_path)
|
|
413
|
+
nodes.extend(python_nodes)
|
|
414
|
+
edges.extend(python_edges)
|
|
415
|
+
elif suffix in _SCRIPT_SUFFIXES:
|
|
416
|
+
script_metadata, script_nodes, script_edges = self._extract_script_metadata(source, rel_path)
|
|
417
|
+
file_metadata.update(script_metadata)
|
|
418
|
+
nodes.extend(script_nodes)
|
|
419
|
+
edges.extend(script_edges)
|
|
420
|
+
elif suffix in _MARKDOWN_SUFFIXES:
|
|
421
|
+
file_metadata.update(self._extract_markdown_metadata(source))
|
|
422
|
+
nodes.extend(self._extract_markdown_task_nodes(source, rel_path))
|
|
423
|
+
elif suffix in _STRUCTURED_SUFFIXES:
|
|
424
|
+
file_metadata.update(self._extract_structured_metadata(source, suffix))
|
|
425
|
+
else:
|
|
426
|
+
file_metadata["non_empty_line_count"] = len([line for line in source.splitlines() if line.strip()])
|
|
427
|
+
|
|
428
|
+
nodes.extend(self._extract_todo_nodes(source, rel_path))
|
|
429
|
+
return file_metadata, self._dedupe_node_specs(nodes), self._dedupe_edge_specs(edges)
|
|
430
|
+
|
|
431
|
+
def _build_file_scoped_metadata(
|
|
432
|
+
self,
|
|
433
|
+
*,
|
|
434
|
+
rel_path: str,
|
|
435
|
+
language: str,
|
|
436
|
+
change_metadata: dict[str, Any],
|
|
437
|
+
) -> dict[str, Any]:
|
|
438
|
+
return {
|
|
439
|
+
"path": rel_path,
|
|
440
|
+
"language": language,
|
|
441
|
+
"history_scope": "file",
|
|
442
|
+
**change_metadata,
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
def _build_node_scoped_metadata(
|
|
446
|
+
self,
|
|
447
|
+
*,
|
|
448
|
+
rel_path: str,
|
|
449
|
+
base_metadata: dict[str, Any],
|
|
450
|
+
node_metadata: dict[str, Any],
|
|
451
|
+
) -> dict[str, Any]:
|
|
452
|
+
scoped_metadata = dict(base_metadata)
|
|
453
|
+
scoped_metadata.update(
|
|
454
|
+
self._git_node_change_metadata(
|
|
455
|
+
rel_path=rel_path,
|
|
456
|
+
node_metadata=node_metadata,
|
|
457
|
+
file_change_metadata=base_metadata,
|
|
458
|
+
)
|
|
459
|
+
)
|
|
460
|
+
return scoped_metadata
|
|
461
|
+
|
|
462
|
+
def _git_file_change_metadata(self, rel_path: str) -> dict[str, Any]:
|
|
463
|
+
cached = self._git_metadata_cache.get(rel_path)
|
|
464
|
+
if cached is not None:
|
|
465
|
+
return cached
|
|
466
|
+
|
|
467
|
+
recent_commits = self._git_recent_commits(rel_path)
|
|
468
|
+
status = self._git_status(rel_path, tracked=bool(recent_commits))
|
|
469
|
+
latest_commit = recent_commits[0] if recent_commits else {}
|
|
470
|
+
metadata = {
|
|
471
|
+
"last_state": status,
|
|
472
|
+
"last_commit_sha": latest_commit.get("sha"),
|
|
473
|
+
"last_commit_at": latest_commit.get("committed_at"),
|
|
474
|
+
"last_commit_summary": latest_commit.get("summary"),
|
|
475
|
+
"history_summary": self._build_history_summary(recent_commits, status),
|
|
476
|
+
"recent_commits": recent_commits,
|
|
477
|
+
}
|
|
478
|
+
self._git_metadata_cache[rel_path] = metadata
|
|
479
|
+
return metadata
|
|
480
|
+
|
|
481
|
+
def _git_node_change_metadata(
|
|
482
|
+
self,
|
|
483
|
+
*,
|
|
484
|
+
rel_path: str,
|
|
485
|
+
node_metadata: dict[str, Any],
|
|
486
|
+
file_change_metadata: dict[str, Any],
|
|
487
|
+
) -> dict[str, Any]:
|
|
488
|
+
line_number = self._node_line_number(node_metadata)
|
|
489
|
+
if line_number is None:
|
|
490
|
+
return {}
|
|
491
|
+
|
|
492
|
+
line_commit = self._git_line_commit(rel_path, line_number)
|
|
493
|
+
if line_commit is None:
|
|
494
|
+
return {
|
|
495
|
+
"history_scope": "line",
|
|
496
|
+
"last_touch_line": line_number,
|
|
497
|
+
"file_last_commit_sha": file_change_metadata.get("last_commit_sha"),
|
|
498
|
+
"file_last_commit_at": file_change_metadata.get("last_commit_at"),
|
|
499
|
+
"file_last_commit_summary": file_change_metadata.get("last_commit_summary"),
|
|
500
|
+
"file_history_summary": file_change_metadata.get("history_summary"),
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
subject = self._history_subject(node_metadata)
|
|
504
|
+
recent_commits = self._build_symbol_recent_commits(
|
|
505
|
+
subject=subject,
|
|
506
|
+
line_commit=line_commit,
|
|
507
|
+
file_recent_commits=file_change_metadata.get("recent_commits"),
|
|
508
|
+
)
|
|
509
|
+
return {
|
|
510
|
+
"history_scope": "symbol" if subject else "line",
|
|
511
|
+
"last_touch_line": line_number,
|
|
512
|
+
"last_commit_sha": line_commit.get("sha"),
|
|
513
|
+
"last_commit_at": line_commit.get("committed_at"),
|
|
514
|
+
"last_commit_summary": line_commit.get("summary"),
|
|
515
|
+
"history_summary": self._build_symbol_history_summary(
|
|
516
|
+
subject=subject,
|
|
517
|
+
status=str(file_change_metadata.get("last_state", "") or ""),
|
|
518
|
+
line_commit=line_commit,
|
|
519
|
+
recent_commits=recent_commits,
|
|
520
|
+
),
|
|
521
|
+
"recent_commits": recent_commits,
|
|
522
|
+
"file_last_commit_sha": file_change_metadata.get("last_commit_sha"),
|
|
523
|
+
"file_last_commit_at": file_change_metadata.get("last_commit_at"),
|
|
524
|
+
"file_last_commit_summary": file_change_metadata.get("last_commit_summary"),
|
|
525
|
+
"file_history_summary": file_change_metadata.get("history_summary"),
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
def _git_recent_commits(self, rel_path: str, limit: int = 5) -> list[dict[str, str]]:
|
|
529
|
+
result = subprocess.run(
|
|
530
|
+
[
|
|
531
|
+
"git",
|
|
532
|
+
"log",
|
|
533
|
+
"--follow",
|
|
534
|
+
"--format=%H%x1f%cI%x1f%s",
|
|
535
|
+
"-n",
|
|
536
|
+
str(limit),
|
|
537
|
+
"--",
|
|
538
|
+
rel_path,
|
|
539
|
+
],
|
|
540
|
+
cwd=self._root,
|
|
541
|
+
capture_output=True,
|
|
542
|
+
text=True,
|
|
543
|
+
check=False,
|
|
544
|
+
)
|
|
545
|
+
if result.returncode != 0:
|
|
546
|
+
return []
|
|
547
|
+
commits: list[dict[str, str]] = []
|
|
548
|
+
for line in result.stdout.splitlines():
|
|
549
|
+
sha, _, rest = line.partition("\x1f")
|
|
550
|
+
committed_at, _, summary = rest.partition("\x1f")
|
|
551
|
+
if not sha or not summary:
|
|
552
|
+
continue
|
|
553
|
+
commits.append(
|
|
554
|
+
{
|
|
555
|
+
"sha": sha.strip(),
|
|
556
|
+
"committed_at": committed_at.strip(),
|
|
557
|
+
"summary": summary.strip(),
|
|
558
|
+
}
|
|
559
|
+
)
|
|
560
|
+
return commits
|
|
561
|
+
|
|
562
|
+
def _git_line_commit(self, rel_path: str, line_number: int) -> dict[str, str] | None:
|
|
563
|
+
cache_key = (rel_path, line_number)
|
|
564
|
+
if cache_key in self._git_line_commit_cache:
|
|
565
|
+
return self._git_line_commit_cache[cache_key]
|
|
566
|
+
|
|
567
|
+
result = subprocess.run(
|
|
568
|
+
[
|
|
569
|
+
"git",
|
|
570
|
+
"blame",
|
|
571
|
+
"--line-porcelain",
|
|
572
|
+
"-L",
|
|
573
|
+
f"{line_number},{line_number}",
|
|
574
|
+
"--",
|
|
575
|
+
rel_path,
|
|
576
|
+
],
|
|
577
|
+
cwd=self._root,
|
|
578
|
+
capture_output=True,
|
|
579
|
+
text=True,
|
|
580
|
+
check=False,
|
|
581
|
+
)
|
|
582
|
+
if result.returncode != 0:
|
|
583
|
+
self._git_line_commit_cache[cache_key] = None
|
|
584
|
+
return None
|
|
585
|
+
|
|
586
|
+
first_line = next((line for line in result.stdout.splitlines() if line.strip()), "")
|
|
587
|
+
sha = first_line.split(" ", 1)[0].strip()
|
|
588
|
+
if not sha or set(sha) == {"0"}:
|
|
589
|
+
self._git_line_commit_cache[cache_key] = None
|
|
590
|
+
return None
|
|
591
|
+
|
|
592
|
+
details = self._git_commit_details(sha)
|
|
593
|
+
self._git_line_commit_cache[cache_key] = details
|
|
594
|
+
return details
|
|
595
|
+
|
|
596
|
+
def _git_commit_details(self, sha: str) -> dict[str, str]:
|
|
597
|
+
cached = self._git_commit_detail_cache.get(sha)
|
|
598
|
+
if cached is not None:
|
|
599
|
+
return cached
|
|
600
|
+
|
|
601
|
+
result = subprocess.run(
|
|
602
|
+
["git", "show", "-s", "--format=%H%x1f%cI%x1f%s", sha],
|
|
603
|
+
cwd=self._root,
|
|
604
|
+
capture_output=True,
|
|
605
|
+
text=True,
|
|
606
|
+
check=False,
|
|
607
|
+
)
|
|
608
|
+
if result.returncode != 0:
|
|
609
|
+
details = {"sha": sha, "committed_at": "", "summary": ""}
|
|
610
|
+
self._git_commit_detail_cache[sha] = details
|
|
611
|
+
return details
|
|
612
|
+
|
|
613
|
+
raw = result.stdout.strip().splitlines()
|
|
614
|
+
if not raw:
|
|
615
|
+
details = {"sha": sha, "committed_at": "", "summary": ""}
|
|
616
|
+
self._git_commit_detail_cache[sha] = details
|
|
617
|
+
return details
|
|
618
|
+
|
|
619
|
+
commit_sha, _, rest = raw[0].partition("\x1f")
|
|
620
|
+
committed_at, _, summary = rest.partition("\x1f")
|
|
621
|
+
details = {
|
|
622
|
+
"sha": commit_sha.strip() or sha,
|
|
623
|
+
"committed_at": committed_at.strip(),
|
|
624
|
+
"summary": summary.strip(),
|
|
625
|
+
}
|
|
626
|
+
self._git_commit_detail_cache[sha] = details
|
|
627
|
+
return details
|
|
628
|
+
|
|
629
|
+
def _git_status(self, rel_path: str, *, tracked: bool) -> str:
|
|
630
|
+
result = subprocess.run(
|
|
631
|
+
["git", "status", "--short", "--", rel_path],
|
|
632
|
+
cwd=self._root,
|
|
633
|
+
capture_output=True,
|
|
634
|
+
text=True,
|
|
635
|
+
check=False,
|
|
636
|
+
)
|
|
637
|
+
if result.returncode != 0:
|
|
638
|
+
return "tracked" if tracked else "untracked"
|
|
639
|
+
raw_status = result.stdout.strip()
|
|
640
|
+
if not raw_status:
|
|
641
|
+
return "clean" if tracked else "untracked"
|
|
642
|
+
status_code = raw_status[:2]
|
|
643
|
+
if status_code == "??":
|
|
644
|
+
return "untracked"
|
|
645
|
+
if "R" in status_code:
|
|
646
|
+
return "renamed"
|
|
647
|
+
if "D" in status_code:
|
|
648
|
+
return "deleted"
|
|
649
|
+
if "A" in status_code:
|
|
650
|
+
return "added"
|
|
651
|
+
if "M" in status_code:
|
|
652
|
+
return "modified"
|
|
653
|
+
return "changed"
|
|
654
|
+
|
|
655
|
+
@staticmethod
|
|
656
|
+
def _node_line_number(node_metadata: dict[str, Any]) -> int | None:
|
|
657
|
+
raw_line = node_metadata.get("line")
|
|
658
|
+
if isinstance(raw_line, int):
|
|
659
|
+
return raw_line
|
|
660
|
+
if isinstance(raw_line, str) and raw_line.isdigit():
|
|
661
|
+
return int(raw_line)
|
|
662
|
+
return None
|
|
663
|
+
|
|
664
|
+
@staticmethod
|
|
665
|
+
def _history_subject(node_metadata: dict[str, Any]) -> str:
|
|
666
|
+
for key in ("symbol", "route_path", "handler", "text"):
|
|
667
|
+
value = node_metadata.get(key)
|
|
668
|
+
if isinstance(value, str) and value.strip():
|
|
669
|
+
return value.strip()
|
|
670
|
+
return ""
|
|
671
|
+
|
|
672
|
+
@staticmethod
|
|
673
|
+
def _build_symbol_recent_commits(
|
|
674
|
+
*,
|
|
675
|
+
subject: str,
|
|
676
|
+
line_commit: dict[str, str],
|
|
677
|
+
file_recent_commits: Any,
|
|
678
|
+
) -> list[dict[str, str]]:
|
|
679
|
+
commits: list[dict[str, str]] = []
|
|
680
|
+
seen: set[str] = set()
|
|
681
|
+
|
|
682
|
+
def add(commit: dict[str, str]) -> None:
|
|
683
|
+
sha = str(commit.get("sha", "") or "")
|
|
684
|
+
if not sha or sha in seen:
|
|
685
|
+
return
|
|
686
|
+
seen.add(sha)
|
|
687
|
+
commits.append(commit)
|
|
688
|
+
|
|
689
|
+
add(line_commit)
|
|
690
|
+
normalized_subject = subject.lower().strip()
|
|
691
|
+
if isinstance(file_recent_commits, list):
|
|
692
|
+
for commit in file_recent_commits:
|
|
693
|
+
if not isinstance(commit, dict):
|
|
694
|
+
continue
|
|
695
|
+
summary = str(commit.get("summary", "") or "")
|
|
696
|
+
if normalized_subject and normalized_subject in summary.lower():
|
|
697
|
+
add(
|
|
698
|
+
{
|
|
699
|
+
"sha": str(commit.get("sha", "") or ""),
|
|
700
|
+
"committed_at": str(commit.get("committed_at", "") or ""),
|
|
701
|
+
"summary": summary,
|
|
702
|
+
}
|
|
703
|
+
)
|
|
704
|
+
for commit in file_recent_commits:
|
|
705
|
+
if not isinstance(commit, dict) or len(commits) >= 3:
|
|
706
|
+
continue
|
|
707
|
+
add(
|
|
708
|
+
{
|
|
709
|
+
"sha": str(commit.get("sha", "") or ""),
|
|
710
|
+
"committed_at": str(commit.get("committed_at", "") or ""),
|
|
711
|
+
"summary": str(commit.get("summary", "") or ""),
|
|
712
|
+
}
|
|
713
|
+
)
|
|
714
|
+
|
|
715
|
+
return commits[:5]
|
|
716
|
+
|
|
717
|
+
@staticmethod
|
|
718
|
+
def _build_symbol_history_summary(
|
|
719
|
+
*,
|
|
720
|
+
subject: str,
|
|
721
|
+
status: str,
|
|
722
|
+
line_commit: dict[str, str],
|
|
723
|
+
recent_commits: list[dict[str, str]],
|
|
724
|
+
) -> str:
|
|
725
|
+
subject_label = subject or "this node"
|
|
726
|
+
prefix = f"Current state: {status}. " if status and status != "clean" else ""
|
|
727
|
+
summary = line_commit.get("summary", "").strip()
|
|
728
|
+
if not summary:
|
|
729
|
+
return f"{prefix}No symbol-level git history available yet.".strip()
|
|
730
|
+
|
|
731
|
+
trailing = [
|
|
732
|
+
commit.get("summary", "").strip()
|
|
733
|
+
for commit in recent_commits[1:3]
|
|
734
|
+
if commit.get("summary")
|
|
735
|
+
]
|
|
736
|
+
if trailing:
|
|
737
|
+
return f"{prefix}Last touch for {subject_label}: {summary}. Related changes: {'; '.join(trailing)}".strip()
|
|
738
|
+
return f"{prefix}Last touch for {subject_label}: {summary}.".strip()
|
|
739
|
+
|
|
740
|
+
@staticmethod
|
|
741
|
+
def _build_history_summary(recent_commits: list[dict[str, str]], status: str) -> str:
|
|
742
|
+
if not recent_commits:
|
|
743
|
+
if status == "untracked":
|
|
744
|
+
return "New file not committed yet."
|
|
745
|
+
return "No git history available for this node yet."
|
|
746
|
+
summaries = [commit.get("summary", "").strip() for commit in recent_commits if commit.get("summary")]
|
|
747
|
+
compact = "; ".join(summaries[:3])
|
|
748
|
+
prefix = f"Current state: {status}. " if status and status != "clean" else ""
|
|
749
|
+
return f"{prefix}Recent changes: {compact}".strip()
|
|
750
|
+
|
|
751
|
+
@staticmethod
|
|
752
|
+
def _extract_imports(file_path: Path) -> list[str]:
|
|
753
|
+
source = file_path.read_text(encoding="utf-8", errors="replace")
|
|
754
|
+
suffix = file_path.suffix.lower()
|
|
755
|
+
if suffix in _PYTHON_SUFFIXES:
|
|
756
|
+
try:
|
|
757
|
+
tree = ast.parse(source, filename=str(file_path))
|
|
758
|
+
except (SyntaxError, ValueError):
|
|
759
|
+
return []
|
|
760
|
+
python_modules: set[str] = set()
|
|
761
|
+
for node in ast.walk(tree):
|
|
762
|
+
if isinstance(node, ast.Import):
|
|
763
|
+
for alias in node.names:
|
|
764
|
+
python_modules.add(alias.name)
|
|
765
|
+
elif isinstance(node, ast.ImportFrom) and node.module:
|
|
766
|
+
python_modules.add(node.module)
|
|
767
|
+
return sorted(python_modules)
|
|
768
|
+
|
|
769
|
+
modules: set[str] = set()
|
|
770
|
+
if suffix in {".ts", ".tsx", ".js", ".jsx"}:
|
|
771
|
+
for match in re.finditer(r"import\s+(?:[^;]*?from\s+)?['\"]([^'\"]+)['\"]", source):
|
|
772
|
+
modules.add(match.group(1))
|
|
773
|
+
for match in re.finditer(r"require\(\s*['\"]([^'\"]+)['\"]\s*\)", source):
|
|
774
|
+
modules.add(match.group(1))
|
|
775
|
+
elif suffix == ".java":
|
|
776
|
+
for match in re.finditer(r"^\s*import\s+([\w.]+);", source, flags=re.MULTILINE):
|
|
777
|
+
modules.add(match.group(1))
|
|
778
|
+
elif suffix == ".go":
|
|
779
|
+
for match in re.finditer(r'"([^"]+)"', source):
|
|
780
|
+
modules.add(match.group(1))
|
|
781
|
+
elif suffix == ".rs":
|
|
782
|
+
for match in re.finditer(r"^\s*use\s+([\w:]+)", source, flags=re.MULTILINE):
|
|
783
|
+
modules.add(match.group(1))
|
|
784
|
+
return sorted(modules)
|
|
785
|
+
|
|
786
|
+
@classmethod
|
|
787
|
+
def _extract_python_metadata(
|
|
788
|
+
cls,
|
|
789
|
+
file_path: Path,
|
|
790
|
+
rel_path: str,
|
|
791
|
+
) -> tuple[list[_NodeSpec], list[_EdgeSpec]]:
|
|
792
|
+
try:
|
|
793
|
+
source = file_path.read_text(encoding="utf-8", errors="replace")
|
|
794
|
+
tree = ast.parse(source, filename=str(file_path))
|
|
795
|
+
except (SyntaxError, ValueError):
|
|
796
|
+
return cls._extract_todo_nodes(source if "source" in locals() else "", rel_path), []
|
|
797
|
+
|
|
798
|
+
nodes: list[_NodeSpec] = cls._extract_todo_nodes(source, rel_path)
|
|
799
|
+
edges: list[_EdgeSpec] = []
|
|
800
|
+
|
|
801
|
+
class MetadataVisitor(ast.NodeVisitor):
|
|
802
|
+
def __init__(self) -> None:
|
|
803
|
+
self.class_stack: list[tuple[str, str]] = []
|
|
804
|
+
self.http_aliases: set[str] = set()
|
|
805
|
+
|
|
806
|
+
def visit_Import(self, node: ast.Import) -> None:
|
|
807
|
+
for alias in node.names:
|
|
808
|
+
alias_name = alias.asname or alias.name.split(".")[0]
|
|
809
|
+
if alias.name.split(".")[0] in {"httpx", "requests"}:
|
|
810
|
+
self.http_aliases.add(alias_name)
|
|
811
|
+
self.generic_visit(node)
|
|
812
|
+
|
|
813
|
+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
|
814
|
+
module = node.module or ""
|
|
815
|
+
for alias in node.names:
|
|
816
|
+
alias_name = alias.asname or alias.name
|
|
817
|
+
if module.split(".")[0] in {"httpx", "requests"}:
|
|
818
|
+
self.http_aliases.add(alias_name)
|
|
819
|
+
self.generic_visit(node)
|
|
820
|
+
|
|
821
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
822
|
+
class_type = cls._class_node_type(node)
|
|
823
|
+
class_name = cls._qualified_symbol_name(rel_path, node.name, self.class_stack)
|
|
824
|
+
nodes.append(_NodeSpec(class_type, class_name, {
|
|
825
|
+
"path": rel_path,
|
|
826
|
+
"line": node.lineno,
|
|
827
|
+
"end_line": getattr(node, "end_lineno", node.lineno),
|
|
828
|
+
"symbol": node.name,
|
|
829
|
+
}))
|
|
830
|
+
edges.append(_EdgeSpec("file", rel_path, class_type, class_name, "contains"))
|
|
831
|
+
if cls._is_controller_class(node):
|
|
832
|
+
nodes.append(_NodeSpec("controller", class_name, {"path": rel_path, "line": node.lineno, "symbol": node.name}))
|
|
833
|
+
edges.append(_EdgeSpec(class_type, class_name, "controller", class_name, "tracks"))
|
|
834
|
+
|
|
835
|
+
self.class_stack.append((class_type, class_name))
|
|
836
|
+
self.generic_visit(node)
|
|
837
|
+
self.class_stack.pop()
|
|
838
|
+
|
|
839
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
840
|
+
self._visit_function_like(node)
|
|
841
|
+
|
|
842
|
+
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
|
843
|
+
self._visit_function_like(node)
|
|
844
|
+
|
|
845
|
+
def _visit_function_like(self, node: ast.FunctionDef | ast.AsyncFunctionDef) -> None:
|
|
846
|
+
function_name = cls._qualified_symbol_name(rel_path, node.name, self.class_stack)
|
|
847
|
+
nodes.append(_NodeSpec("function", function_name, {
|
|
848
|
+
"path": rel_path,
|
|
849
|
+
"line": node.lineno,
|
|
850
|
+
"end_line": getattr(node, "end_lineno", node.lineno),
|
|
851
|
+
"symbol": node.name,
|
|
852
|
+
"is_async": isinstance(node, ast.AsyncFunctionDef),
|
|
853
|
+
}))
|
|
854
|
+
owner_type = self.class_stack[-1][0] if self.class_stack else "file"
|
|
855
|
+
owner_name = self.class_stack[-1][1] if self.class_stack else rel_path
|
|
856
|
+
edges.append(_EdgeSpec(owner_type, owner_name, "function", function_name, "contains"))
|
|
857
|
+
|
|
858
|
+
route_info = cls._route_info(node)
|
|
859
|
+
if route_info is not None:
|
|
860
|
+
method, path = route_info
|
|
861
|
+
route_name = f"{method} {path}"
|
|
862
|
+
nodes.append(_NodeSpec("route", route_name, {
|
|
863
|
+
"path": rel_path,
|
|
864
|
+
"method": method,
|
|
865
|
+
"route_path": path,
|
|
866
|
+
"line": node.lineno,
|
|
867
|
+
"handler": function_name,
|
|
868
|
+
}))
|
|
869
|
+
route_source_type = "controller" if self.class_stack else "function"
|
|
870
|
+
route_source_name = self.class_stack[-1][1] if self.class_stack else function_name
|
|
871
|
+
edges.append(_EdgeSpec(route_source_type, route_source_name, "route", route_name, "exposes_route"))
|
|
872
|
+
|
|
873
|
+
for child in ast.walk(node):
|
|
874
|
+
if not isinstance(child, ast.Call):
|
|
875
|
+
continue
|
|
876
|
+
external_call = cls._external_service_from_call(child, self.http_aliases)
|
|
877
|
+
if external_call is not None:
|
|
878
|
+
nodes.append(_NodeSpec("external_service_api", external_call, {
|
|
879
|
+
"path": rel_path,
|
|
880
|
+
"line": getattr(child, "lineno", node.lineno),
|
|
881
|
+
"caller": function_name,
|
|
882
|
+
}))
|
|
883
|
+
edges.append(_EdgeSpec("function", function_name, "external_service_api", external_call, "uses_external_service"))
|
|
884
|
+
|
|
885
|
+
mq_info = cls._mq_topic_from_call(child)
|
|
886
|
+
if mq_info is not None:
|
|
887
|
+
relation, topic_name = mq_info
|
|
888
|
+
nodes.append(_NodeSpec("mq_topic", topic_name, {"path": rel_path, "line": getattr(child, "lineno", node.lineno)}))
|
|
889
|
+
edges.append(_EdgeSpec("function", function_name, "mq_topic", topic_name, relation))
|
|
890
|
+
|
|
891
|
+
self.generic_visit(node)
|
|
892
|
+
|
|
893
|
+
MetadataVisitor().visit(tree)
|
|
894
|
+
return cls._dedupe_node_specs(nodes), cls._dedupe_edge_specs(edges)
|
|
895
|
+
|
|
896
|
+
@classmethod
|
|
897
|
+
def _extract_script_metadata(
|
|
898
|
+
cls,
|
|
899
|
+
source: str,
|
|
900
|
+
rel_path: str,
|
|
901
|
+
) -> tuple[dict[str, Any], list[_NodeSpec], list[_EdgeSpec]]:
|
|
902
|
+
nodes: list[_NodeSpec] = []
|
|
903
|
+
edges: list[_EdgeSpec] = []
|
|
904
|
+
symbol_count = 0
|
|
905
|
+
route_count = 0
|
|
906
|
+
external_services: set[str] = set()
|
|
907
|
+
mq_topics: set[str] = set()
|
|
908
|
+
|
|
909
|
+
for match in re.finditer(r"(?:export\s+)?interface\s+([A-Za-z_][A-Za-z0-9_]*)|\btrait\s+([A-Za-z_][A-Za-z0-9_]*)", source):
|
|
910
|
+
name = match.group(1) or match.group(2)
|
|
911
|
+
symbol_name = f"{rel_path}::{name}"
|
|
912
|
+
nodes.append(_NodeSpec("interface", symbol_name, {"path": rel_path, "line": _line_number(source, match.start()), "symbol": name}))
|
|
913
|
+
edges.append(_EdgeSpec("file", rel_path, "interface", symbol_name, "contains"))
|
|
914
|
+
symbol_count += 1
|
|
915
|
+
|
|
916
|
+
for match in re.finditer(r"(?:export\s+)?(abstract\s+)?class\s+([A-Za-z_][A-Za-z0-9_]*)|\bstruct\s+([A-Za-z_][A-Za-z0-9_]*)", source):
|
|
917
|
+
is_abstract = bool(match.group(1))
|
|
918
|
+
name = match.group(2) or match.group(3)
|
|
919
|
+
symbol_name = f"{rel_path}::{name}"
|
|
920
|
+
node_type = "abstract_class" if is_abstract else "class"
|
|
921
|
+
line = _line_number(source, match.start())
|
|
922
|
+
nodes.append(_NodeSpec(node_type, symbol_name, {"path": rel_path, "line": line, "symbol": name}))
|
|
923
|
+
edges.append(_EdgeSpec("file", rel_path, node_type, symbol_name, "contains"))
|
|
924
|
+
if name.endswith("Controller"):
|
|
925
|
+
nodes.append(_NodeSpec("controller", symbol_name, {"path": rel_path, "line": line, "symbol": name}))
|
|
926
|
+
edges.append(_EdgeSpec(node_type, symbol_name, "controller", symbol_name, "tracks"))
|
|
927
|
+
symbol_count += 1
|
|
928
|
+
|
|
929
|
+
patterns = [
|
|
930
|
+
r"(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(",
|
|
931
|
+
r"(?:export\s+)?const\s+([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(?:async\s*)?\(",
|
|
932
|
+
r"\bfunc\s+(?:\([^)]*\)\s*)?([A-Za-z_][A-Za-z0-9_]*)\s*\(",
|
|
933
|
+
r"\bfn\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(",
|
|
934
|
+
]
|
|
935
|
+
for pattern in patterns:
|
|
936
|
+
for match in re.finditer(pattern, source):
|
|
937
|
+
name = match.group(1)
|
|
938
|
+
symbol_name = f"{rel_path}::{name}"
|
|
939
|
+
nodes.append(_NodeSpec("function", symbol_name, {"path": rel_path, "line": _line_number(source, match.start()), "symbol": name}))
|
|
940
|
+
edges.append(_EdgeSpec("file", rel_path, "function", symbol_name, "contains"))
|
|
941
|
+
symbol_count += 1
|
|
942
|
+
|
|
943
|
+
# --- HTTP route detection (Express/Fastify/Koa patterns) ---
|
|
944
|
+
for match in re.finditer(
|
|
945
|
+
r"(?:router|app|server)\.(get|post|put|patch|delete)\s*\(\s*['\"]([^'\"]+)['\"]",
|
|
946
|
+
source,
|
|
947
|
+
):
|
|
948
|
+
method = match.group(1).upper()
|
|
949
|
+
path = match.group(2)
|
|
950
|
+
route_name = f"{method} {path}"
|
|
951
|
+
nodes.append(_NodeSpec("route", route_name, {
|
|
952
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
953
|
+
"method": method, "route_path": path, "framework": "express",
|
|
954
|
+
}))
|
|
955
|
+
edges.append(_EdgeSpec("file", rel_path, "route", route_name, "exposes_route"))
|
|
956
|
+
route_count += 1
|
|
957
|
+
|
|
958
|
+
# --- NestJS @Get/@Post etc. ---
|
|
959
|
+
# Detect controller prefix for full path reconstruction
|
|
960
|
+
controller_prefix = ""
|
|
961
|
+
ctrl_match = _NESTJS_CONTROLLER_PATTERN.search(source)
|
|
962
|
+
if ctrl_match:
|
|
963
|
+
controller_prefix = ctrl_match.group(1).rstrip("/")
|
|
964
|
+
|
|
965
|
+
for match in _NESTJS_ROUTE_PATTERN.finditer(source):
|
|
966
|
+
method = match.group(1).upper()
|
|
967
|
+
sub_path = (match.group(2) or "").strip("/")
|
|
968
|
+
full_path = f"/{controller_prefix}/{sub_path}".replace("//", "/").rstrip("/") or "/"
|
|
969
|
+
route_name = f"{method} {full_path}"
|
|
970
|
+
nodes.append(_NodeSpec("api_endpoint", route_name, {
|
|
971
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
972
|
+
"method": method, "route_path": full_path, "framework": "nestjs",
|
|
973
|
+
}))
|
|
974
|
+
edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
|
|
975
|
+
route_count += 1
|
|
976
|
+
|
|
977
|
+
# --- Spring Boot @GetMapping / @PostMapping etc. ---
|
|
978
|
+
for match in _SPRING_ROUTE_PATTERN.finditer(source):
|
|
979
|
+
annotation = match.group(1)
|
|
980
|
+
route_path = match.group(2)
|
|
981
|
+
method_map = {
|
|
982
|
+
"GetMapping": "GET", "PostMapping": "POST", "PutMapping": "PUT",
|
|
983
|
+
"PatchMapping": "PATCH", "DeleteMapping": "DELETE", "RequestMapping": "ANY",
|
|
984
|
+
}
|
|
985
|
+
method = method_map.get(annotation, "ANY")
|
|
986
|
+
route_name = f"{method} {route_path}"
|
|
987
|
+
nodes.append(_NodeSpec("api_endpoint", route_name, {
|
|
988
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
989
|
+
"method": method, "route_path": route_path, "framework": "spring",
|
|
990
|
+
}))
|
|
991
|
+
edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
|
|
992
|
+
route_count += 1
|
|
993
|
+
|
|
994
|
+
# --- Go Gin/Fiber/Chi route patterns ---
|
|
995
|
+
for match in _GO_ROUTE_PATTERN.finditer(source):
|
|
996
|
+
method = match.group(1).upper()
|
|
997
|
+
route_path = match.group(2)
|
|
998
|
+
route_name = f"{method} {route_path}"
|
|
999
|
+
nodes.append(_NodeSpec("api_endpoint", route_name, {
|
|
1000
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
1001
|
+
"method": method, "route_path": route_path, "framework": "gin",
|
|
1002
|
+
}))
|
|
1003
|
+
edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
|
|
1004
|
+
route_count += 1
|
|
1005
|
+
|
|
1006
|
+
# --- Rust axum/actix-web route attributes ---
|
|
1007
|
+
for match in _RUST_ROUTE_ATTR_PATTERN.finditer(source):
|
|
1008
|
+
route_path = match.group(1)
|
|
1009
|
+
# Infer method from attribute name (e.g. #[get("/")] → GET)
|
|
1010
|
+
attr_line = source[max(0, match.start() - 5):match.start() + 30]
|
|
1011
|
+
method = "GET"
|
|
1012
|
+
for m in ("post", "put", "patch", "delete"):
|
|
1013
|
+
if m in attr_line:
|
|
1014
|
+
method = m.upper()
|
|
1015
|
+
break
|
|
1016
|
+
route_name = f"{method} {route_path}"
|
|
1017
|
+
nodes.append(_NodeSpec("api_endpoint", route_name, {
|
|
1018
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
1019
|
+
"method": method, "route_path": route_path, "framework": "axum",
|
|
1020
|
+
}))
|
|
1021
|
+
edges.append(_EdgeSpec("file", rel_path, "api_endpoint", route_name, "exposes_route"))
|
|
1022
|
+
route_count += 1
|
|
1023
|
+
|
|
1024
|
+
# --- WebSocket: NestJS @WebSocketGateway + @SubscribeMessage ---
|
|
1025
|
+
ws_gateway_path = ""
|
|
1026
|
+
gw_match = _WS_GATEWAY_PATTERN.search(source)
|
|
1027
|
+
if gw_match:
|
|
1028
|
+
ws_gateway_path = gw_match.group(1) or ""
|
|
1029
|
+
nodes.append(_NodeSpec("websocket_endpoint", f"WS {ws_gateway_path or '/'}", {
|
|
1030
|
+
"path": rel_path, "line": _line_number(source, gw_match.start()),
|
|
1031
|
+
"gateway_path": ws_gateway_path, "framework": "nestjs",
|
|
1032
|
+
}))
|
|
1033
|
+
edges.append(_EdgeSpec("file", rel_path, "websocket_endpoint", f"WS {ws_gateway_path or '/'}", "exposes_websocket"))
|
|
1034
|
+
|
|
1035
|
+
for match in _WS_SUBSCRIBE_PATTERN.finditer(source):
|
|
1036
|
+
event_name = match.group(1)
|
|
1037
|
+
ws_endpoint_name = f"WS:{event_name}"
|
|
1038
|
+
nodes.append(_NodeSpec("websocket_endpoint", ws_endpoint_name, {
|
|
1039
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
1040
|
+
"event": event_name, "framework": "nestjs",
|
|
1041
|
+
}))
|
|
1042
|
+
edges.append(_EdgeSpec("file", rel_path, "websocket_endpoint", ws_endpoint_name, "websocket"))
|
|
1043
|
+
|
|
1044
|
+
# --- Spring WebSocket @MessageMapping ---
|
|
1045
|
+
for match in _SPRING_WS_MAPPING_PATTERN.finditer(source):
|
|
1046
|
+
dest = match.group(1)
|
|
1047
|
+
ws_endpoint_name = f"WS:{dest}"
|
|
1048
|
+
nodes.append(_NodeSpec("websocket_endpoint", ws_endpoint_name, {
|
|
1049
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
1050
|
+
"event": dest, "framework": "spring",
|
|
1051
|
+
}))
|
|
1052
|
+
edges.append(_EdgeSpec("file", rel_path, "websocket_endpoint", ws_endpoint_name, "websocket"))
|
|
1053
|
+
|
|
1054
|
+
# --- External service calls (URL literals) ---
|
|
1055
|
+
for match in _URL_PATTERN.finditer(source):
|
|
1056
|
+
url = match.group(0)
|
|
1057
|
+
external_services.add(url)
|
|
1058
|
+
nodes.append(_NodeSpec("external_service_api", url, {
|
|
1059
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
1060
|
+
}))
|
|
1061
|
+
edges.append(_EdgeSpec("file", rel_path, "external_service_api", url, "uses_external_service"))
|
|
1062
|
+
|
|
1063
|
+
# --- Message queue: publish / consume calls ---
|
|
1064
|
+
for action in _MQ_PUBLISH_CALLS.union(_MQ_CONSUME_CALLS):
|
|
1065
|
+
for match in re.finditer(rf"\.{action}\s*\(\s*['\"]([^'\"]+)['\"]", source):
|
|
1066
|
+
topic_name = match.group(1)
|
|
1067
|
+
relation = "publishes" if action in _MQ_PUBLISH_CALLS else "consumes"
|
|
1068
|
+
node_type = "mq_producer" if action in _MQ_PUBLISH_CALLS else "mq_consumer"
|
|
1069
|
+
mq_topics.add(topic_name)
|
|
1070
|
+
nodes.append(_NodeSpec("mq_topic", topic_name, {
|
|
1071
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
1072
|
+
}))
|
|
1073
|
+
nodes.append(_NodeSpec(node_type, f"{node_type}:{topic_name}", {
|
|
1074
|
+
"path": rel_path, "line": _line_number(source, match.start()),
|
|
1075
|
+
"topic": topic_name,
|
|
1076
|
+
}))
|
|
1077
|
+
edges.append(_EdgeSpec("file", rel_path, node_type, f"{node_type}:{topic_name}", relation))
|
|
1078
|
+
edges.append(_EdgeSpec(node_type, f"{node_type}:{topic_name}", "mq_topic", topic_name, relation))
|
|
1079
|
+
|
|
1080
|
+
return {
|
|
1081
|
+
"symbol_count": symbol_count,
|
|
1082
|
+
"route_count": route_count,
|
|
1083
|
+
"external_service_count": len(external_services),
|
|
1084
|
+
"mq_topic_count": len(mq_topics),
|
|
1085
|
+
}, cls._dedupe_node_specs(nodes), cls._dedupe_edge_specs(edges)
|
|
1086
|
+
|
|
1087
|
+
@staticmethod
|
|
1088
|
+
def _extract_markdown_metadata(source: str) -> dict[str, Any]:
|
|
1089
|
+
headings = [match.group(2).strip() for match in _MARKDOWN_HEADING_PATTERN.finditer(source)]
|
|
1090
|
+
return {
|
|
1091
|
+
"heading_count": len(headings),
|
|
1092
|
+
"headings": headings[:20],
|
|
1093
|
+
"link_count": len(_URL_PATTERN.findall(source)),
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
@staticmethod
|
|
1097
|
+
def _extract_markdown_task_nodes(source: str, rel_path: str) -> list[_NodeSpec]:
|
|
1098
|
+
nodes: list[_NodeSpec] = []
|
|
1099
|
+
for index, line in enumerate(source.splitlines(), start=1):
|
|
1100
|
+
match = _MARKDOWN_TASK_PATTERN.match(line)
|
|
1101
|
+
if match is None:
|
|
1102
|
+
continue
|
|
1103
|
+
nodes.append(_NodeSpec("todo", f"{rel_path}::TODO:{index}", {"path": rel_path, "line": index, "text": match.group(1).strip()}))
|
|
1104
|
+
return nodes
|
|
1105
|
+
|
|
1106
|
+
@staticmethod
|
|
1107
|
+
def _extract_structured_metadata(source: str, suffix: str) -> dict[str, Any]:
|
|
1108
|
+
keys: list[str] = []
|
|
1109
|
+
if suffix == ".json":
|
|
1110
|
+
try:
|
|
1111
|
+
parsed = json.loads(source)
|
|
1112
|
+
if isinstance(parsed, dict):
|
|
1113
|
+
keys = sorted(str(key) for key in parsed.keys())
|
|
1114
|
+
except json.JSONDecodeError:
|
|
1115
|
+
keys = []
|
|
1116
|
+
elif suffix == ".toml":
|
|
1117
|
+
try:
|
|
1118
|
+
parsed = tomllib.loads(source)
|
|
1119
|
+
if isinstance(parsed, dict):
|
|
1120
|
+
keys = sorted(str(key) for key in parsed.keys())
|
|
1121
|
+
except tomllib.TOMLDecodeError:
|
|
1122
|
+
keys = []
|
|
1123
|
+
else:
|
|
1124
|
+
keys = sorted({match.group(1) for match in _YAML_KEY_PATTERN.finditer(source)})
|
|
1125
|
+
if not keys:
|
|
1126
|
+
keys = sorted({match.group(1) for match in _INI_KEY_PATTERN.finditer(source)})
|
|
1127
|
+
return {"top_level_keys": keys[:50], "top_level_key_count": len(keys)}
|
|
1128
|
+
|
|
1129
|
+
@staticmethod
|
|
1130
|
+
def _extract_todo_nodes(source: str, rel_path: str) -> list[_NodeSpec]:
|
|
1131
|
+
nodes: list[_NodeSpec] = []
|
|
1132
|
+
for index, line in enumerate(source.splitlines(), start=1):
|
|
1133
|
+
match = _TODO_PATTERN.search(line)
|
|
1134
|
+
if match is None:
|
|
1135
|
+
continue
|
|
1136
|
+
text = (match.group(1) or "").strip() or "TODO"
|
|
1137
|
+
nodes.append(_NodeSpec("todo", f"{rel_path}::TODO:{index}", {"path": rel_path, "line": index, "text": text}))
|
|
1138
|
+
return nodes
|
|
1139
|
+
|
|
1140
|
+
@staticmethod
|
|
1141
|
+
def _qualified_symbol_name(rel_path: str, symbol_name: str, class_stack: list[tuple[str, str]]) -> str:
|
|
1142
|
+
if not class_stack:
|
|
1143
|
+
return f"{rel_path}::{symbol_name}"
|
|
1144
|
+
owner_name = class_stack[-1][1].split("::", 1)[1]
|
|
1145
|
+
return f"{rel_path}::{owner_name}.{symbol_name}"
|
|
1146
|
+
|
|
1147
|
+
@staticmethod
|
|
1148
|
+
def _class_node_type(node: ast.ClassDef) -> str:
|
|
1149
|
+
base_names = {RepoScanner._base_name(base) for base in node.bases}
|
|
1150
|
+
has_abstract_method = any(
|
|
1151
|
+
isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef))
|
|
1152
|
+
and any(RepoScanner._base_name(dec) == "abstractmethod" for dec in child.decorator_list)
|
|
1153
|
+
for child in node.body
|
|
1154
|
+
)
|
|
1155
|
+
if "Protocol" in base_names:
|
|
1156
|
+
return "interface"
|
|
1157
|
+
if "ABC" in base_names or "ABCMeta" in base_names or has_abstract_method:
|
|
1158
|
+
return "abstract_class"
|
|
1159
|
+
return "class"
|
|
1160
|
+
|
|
1161
|
+
@staticmethod
|
|
1162
|
+
def _is_controller_class(node: ast.ClassDef) -> bool:
|
|
1163
|
+
if node.name.endswith("Controller"):
|
|
1164
|
+
return True
|
|
1165
|
+
for child in node.body:
|
|
1166
|
+
if isinstance(child, (ast.FunctionDef, ast.AsyncFunctionDef)) and RepoScanner._route_info(child) is not None:
|
|
1167
|
+
return True
|
|
1168
|
+
return False
|
|
1169
|
+
|
|
1170
|
+
@staticmethod
|
|
1171
|
+
def _route_info(node: ast.FunctionDef | ast.AsyncFunctionDef) -> tuple[str, str] | None:
|
|
1172
|
+
for decorator in node.decorator_list:
|
|
1173
|
+
if not isinstance(decorator, ast.Call) or not isinstance(decorator.func, ast.Attribute):
|
|
1174
|
+
continue
|
|
1175
|
+
method_name = decorator.func.attr.lower()
|
|
1176
|
+
if method_name not in _HTTP_ROUTE_DECORATORS:
|
|
1177
|
+
continue
|
|
1178
|
+
route_path = RepoScanner._string_arg_value(decorator.args)
|
|
1179
|
+
if route_path is None:
|
|
1180
|
+
continue
|
|
1181
|
+
method = RepoScanner._route_methods_from_keywords(decorator.keywords) if method_name == "route" else method_name.upper()
|
|
1182
|
+
return method, route_path
|
|
1183
|
+
return None
|
|
1184
|
+
|
|
1185
|
+
@staticmethod
|
|
1186
|
+
def _route_methods_from_keywords(keywords: list[ast.keyword]) -> str:
|
|
1187
|
+
for keyword in keywords:
|
|
1188
|
+
if keyword.arg != "methods" or not isinstance(keyword.value, (ast.List, ast.Tuple)):
|
|
1189
|
+
continue
|
|
1190
|
+
methods = [elt.value.upper() for elt in keyword.value.elts if isinstance(elt, ast.Constant) and isinstance(elt.value, str)]
|
|
1191
|
+
if methods:
|
|
1192
|
+
return "/".join(methods)
|
|
1193
|
+
return "ROUTE"
|
|
1194
|
+
|
|
1195
|
+
@staticmethod
|
|
1196
|
+
def _string_arg_value(args: list[ast.expr]) -> str | None:
|
|
1197
|
+
if not args:
|
|
1198
|
+
return None
|
|
1199
|
+
first = args[0]
|
|
1200
|
+
if isinstance(first, ast.Constant) and isinstance(first.value, str):
|
|
1201
|
+
return first.value
|
|
1202
|
+
return None
|
|
1203
|
+
|
|
1204
|
+
@staticmethod
|
|
1205
|
+
def _external_service_from_call(call: ast.Call, http_aliases: set[str]) -> str | None:
|
|
1206
|
+
func = call.func
|
|
1207
|
+
if isinstance(func, ast.Attribute) and isinstance(func.value, ast.Name):
|
|
1208
|
+
if func.value.id in http_aliases and func.attr.lower() in _HTTP_ROUTE_DECORATORS.union({"request"}):
|
|
1209
|
+
return RepoScanner._extract_url_from_call(call)
|
|
1210
|
+
if isinstance(func, ast.Name) and func.id in http_aliases:
|
|
1211
|
+
return RepoScanner._extract_url_from_call(call)
|
|
1212
|
+
return None
|
|
1213
|
+
|
|
1214
|
+
@staticmethod
|
|
1215
|
+
def _extract_url_from_call(call: ast.Call) -> str | None:
|
|
1216
|
+
for candidate in [*call.args, *(kw.value for kw in call.keywords if kw.arg == "url")]:
|
|
1217
|
+
if isinstance(candidate, ast.Constant) and isinstance(candidate.value, str) and _URL_PATTERN.match(candidate.value):
|
|
1218
|
+
return candidate.value
|
|
1219
|
+
return None
|
|
1220
|
+
|
|
1221
|
+
@staticmethod
|
|
1222
|
+
def _mq_topic_from_call(call: ast.Call) -> tuple[str, str] | None:
|
|
1223
|
+
func = call.func
|
|
1224
|
+
if not isinstance(func, ast.Attribute):
|
|
1225
|
+
return None
|
|
1226
|
+
action = func.attr.lower()
|
|
1227
|
+
if action in _MQ_PUBLISH_CALLS:
|
|
1228
|
+
relation = "publishes"
|
|
1229
|
+
elif action in _MQ_CONSUME_CALLS:
|
|
1230
|
+
relation = "consumes"
|
|
1231
|
+
else:
|
|
1232
|
+
return None
|
|
1233
|
+
topic_name = RepoScanner._string_arg_value(call.args)
|
|
1234
|
+
if topic_name is None:
|
|
1235
|
+
return None
|
|
1236
|
+
return relation, topic_name
|
|
1237
|
+
|
|
1238
|
+
@staticmethod
|
|
1239
|
+
def _base_name(node: ast.expr) -> str:
|
|
1240
|
+
if isinstance(node, ast.Name):
|
|
1241
|
+
return node.id
|
|
1242
|
+
if isinstance(node, ast.Attribute):
|
|
1243
|
+
return node.attr
|
|
1244
|
+
if isinstance(node, ast.Subscript):
|
|
1245
|
+
return RepoScanner._base_name(node.value)
|
|
1246
|
+
return ""
|
|
1247
|
+
|
|
1248
|
+
@staticmethod
|
|
1249
|
+
def _dedupe_node_specs(nodes: list[_NodeSpec]) -> list[_NodeSpec]:
|
|
1250
|
+
deduped: dict[tuple[str, str], _NodeSpec] = {}
|
|
1251
|
+
for node in nodes:
|
|
1252
|
+
key = (node.node_type, node.name)
|
|
1253
|
+
existing = deduped.get(key)
|
|
1254
|
+
deduped[key] = node if existing is None else _NodeSpec(node.node_type, node.name, {**existing.metadata, **node.metadata})
|
|
1255
|
+
return list(deduped.values())
|
|
1256
|
+
|
|
1257
|
+
@staticmethod
|
|
1258
|
+
def _dedupe_edge_specs(edges: list[_EdgeSpec]) -> list[_EdgeSpec]:
|
|
1259
|
+
deduped: dict[tuple[str, str, str, str, str], _EdgeSpec] = {}
|
|
1260
|
+
for edge in edges:
|
|
1261
|
+
deduped[(edge.source_type, edge.source_name, edge.target_type, edge.target_name, edge.relation)] = edge
|
|
1262
|
+
return list(deduped.values())
|
|
1263
|
+
|
|
1264
|
+
|
|
1265
|
+
def _line_number(source: str, offset: int) -> int:
|
|
1266
|
+
return source.count("\n", 0, offset) + 1
|