codebeacon 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebeacon/__init__.py +1 -0
- codebeacon/__main__.py +3 -0
- codebeacon/cache.py +136 -0
- codebeacon/cli.py +391 -0
- codebeacon/common/__init__.py +0 -0
- codebeacon/common/filters.py +170 -0
- codebeacon/common/symbols.py +121 -0
- codebeacon/common/types.py +98 -0
- codebeacon/config.py +144 -0
- codebeacon/contextmap/__init__.py +0 -0
- codebeacon/contextmap/generator.py +602 -0
- codebeacon/discover/__init__.py +0 -0
- codebeacon/discover/detector.py +388 -0
- codebeacon/discover/scanner.py +192 -0
- codebeacon/export/__init__.py +0 -0
- codebeacon/export/mcp.py +515 -0
- codebeacon/export/obsidian.py +812 -0
- codebeacon/extract/__init__.py +22 -0
- codebeacon/extract/base.py +372 -0
- codebeacon/extract/components.py +357 -0
- codebeacon/extract/dependencies.py +140 -0
- codebeacon/extract/entities.py +575 -0
- codebeacon/extract/queries/README.md +116 -0
- codebeacon/extract/queries/actix.scm +115 -0
- codebeacon/extract/queries/angular.scm +155 -0
- codebeacon/extract/queries/aspnet.scm +159 -0
- codebeacon/extract/queries/django.scm +122 -0
- codebeacon/extract/queries/express.scm +124 -0
- codebeacon/extract/queries/fastapi.scm +152 -0
- codebeacon/extract/queries/flask.scm +120 -0
- codebeacon/extract/queries/gin.scm +142 -0
- codebeacon/extract/queries/ktor.scm +144 -0
- codebeacon/extract/queries/laravel.scm +172 -0
- codebeacon/extract/queries/nestjs.scm +183 -0
- codebeacon/extract/queries/rails.scm +114 -0
- codebeacon/extract/queries/react.scm +111 -0
- codebeacon/extract/queries/spring_boot.scm +204 -0
- codebeacon/extract/queries/svelte.scm +73 -0
- codebeacon/extract/queries/vapor.scm +130 -0
- codebeacon/extract/queries/vue.scm +123 -0
- codebeacon/extract/routes.py +910 -0
- codebeacon/extract/semantic.py +280 -0
- codebeacon/extract/services.py +597 -0
- codebeacon/graph/__init__.py +1 -0
- codebeacon/graph/analyze.py +281 -0
- codebeacon/graph/build.py +320 -0
- codebeacon/graph/cluster.py +160 -0
- codebeacon/graph/enrich.py +206 -0
- codebeacon/skill/SKILL.md +127 -0
- codebeacon/wave.py +292 -0
- codebeacon/wiki/__init__.py +0 -0
- codebeacon/wiki/generator.py +376 -0
- codebeacon/wiki/index.py +95 -0
- codebeacon/wiki/templates.py +467 -0
- codebeacon-0.1.2.dist-info/METADATA +319 -0
- codebeacon-0.1.2.dist-info/RECORD +59 -0
- codebeacon-0.1.2.dist-info/WHEEL +4 -0
- codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
- codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,575 @@
|
|
|
1
|
+
"""Entity / ORM model extraction for all supported frameworks.
|
|
2
|
+
|
|
3
|
+
Public API:
|
|
4
|
+
extract_entities(file_path, framework) -> list[EntityInfo]
|
|
5
|
+
|
|
6
|
+
Supported ORMs:
|
|
7
|
+
JPA (@Entity), Django ORM (models.Model), SQLAlchemy/Pydantic,
|
|
8
|
+
Eloquent, EF Core (DbSet<>), GORM (struct tags), Active Record,
|
|
9
|
+
Diesel/SeaORM (#[derive]), Fluent (Vapor), Exposed (Ktor), TypeORM/Mongoose (NestJS).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import re
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
|
|
16
|
+
from codebeacon.common.types import EntityInfo
|
|
17
|
+
from codebeacon.extract.base import (
|
|
18
|
+
extract_sfc_sections,
|
|
19
|
+
load_query_file,
|
|
20
|
+
node_text,
|
|
21
|
+
parse_file,
|
|
22
|
+
parse_sfc_script,
|
|
23
|
+
run_query,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# ── Framework → query file stem ───────────────────────────────────────────────
|
|
28
|
+
|
|
29
|
+
_FW_TO_QUERY: dict[str, str] = {
|
|
30
|
+
"spring-boot": "spring_boot",
|
|
31
|
+
"express": "express",
|
|
32
|
+
"koa": "express",
|
|
33
|
+
"fastify": "express",
|
|
34
|
+
"nestjs": "nestjs",
|
|
35
|
+
"nextjs": "react",
|
|
36
|
+
"react": "react",
|
|
37
|
+
"fastapi": "fastapi",
|
|
38
|
+
"django": "django",
|
|
39
|
+
"flask": "flask",
|
|
40
|
+
"gin": "gin",
|
|
41
|
+
"echo": "gin",
|
|
42
|
+
"fiber": "gin",
|
|
43
|
+
"go": "gin",
|
|
44
|
+
"rails": "rails",
|
|
45
|
+
"laravel": "laravel",
|
|
46
|
+
"aspnet": "aspnet",
|
|
47
|
+
"actix": "actix",
|
|
48
|
+
"axum": "actix",
|
|
49
|
+
"rust": "actix",
|
|
50
|
+
"vapor": "vapor",
|
|
51
|
+
"ktor": "ktor",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# GORM struct tag parser: `gorm:"column:name;primaryKey"`
|
|
55
|
+
_GORM_TAG_RE = re.compile(r'gorm:"([^"]*)"')
|
|
56
|
+
_GORM_KEY_RE = re.compile(r'(\w+)(?::(\w+))?')
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ── Public function ───────────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
def extract_entities(file_path: str, framework: str) -> list[EntityInfo]:
|
|
62
|
+
"""Extract entity / ORM model definitions from *file_path*."""
|
|
63
|
+
fw = framework.lower()
|
|
64
|
+
query_name = _FW_TO_QUERY.get(fw)
|
|
65
|
+
if not query_name:
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
query_src = load_query_file(query_name)
|
|
69
|
+
if not query_src:
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
parsed = parse_file(file_path)
|
|
73
|
+
if parsed is None:
|
|
74
|
+
return []
|
|
75
|
+
root, lang = parsed
|
|
76
|
+
|
|
77
|
+
from codebeacon.extract.base import is_grammar_allowed
|
|
78
|
+
if not is_grammar_allowed(query_name, lang):
|
|
79
|
+
return []
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
matches = run_query(lang, query_src, root)
|
|
83
|
+
except Exception:
|
|
84
|
+
return []
|
|
85
|
+
|
|
86
|
+
_interpreters = {
|
|
87
|
+
"spring_boot": _interpret_spring_boot,
|
|
88
|
+
"express": _interpret_noop,
|
|
89
|
+
"nestjs": _interpret_nestjs,
|
|
90
|
+
"fastapi": _interpret_python_orm,
|
|
91
|
+
"django": _interpret_django,
|
|
92
|
+
"flask": _interpret_python_orm,
|
|
93
|
+
"gin": _interpret_gorm,
|
|
94
|
+
"rails": _interpret_rails,
|
|
95
|
+
"laravel": _interpret_laravel,
|
|
96
|
+
"aspnet": _interpret_aspnet,
|
|
97
|
+
"actix": _interpret_rust,
|
|
98
|
+
"vapor": _interpret_vapor,
|
|
99
|
+
"ktor": _interpret_ktor,
|
|
100
|
+
"react": _interpret_noop,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
interpreter = _interpreters.get(query_name, _interpret_noop)
|
|
104
|
+
try:
|
|
105
|
+
return interpreter(file_path, matches, fw)
|
|
106
|
+
except Exception:
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _interpret_noop(file_path: str, matches: list, framework: str) -> list[EntityInfo]:
|
|
111
|
+
return []
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ── Per-framework interpreters ────────────────────────────────────────────────
|
|
115
|
+
|
|
116
|
+
def _interpret_spring_boot(
|
|
117
|
+
file_path: str, matches: list, framework: str,
|
|
118
|
+
) -> list[EntityInfo]:
|
|
119
|
+
"""JPA @Entity with @Table(name=...), @Id, @Column, @ManyToOne etc."""
|
|
120
|
+
entities: dict[int, EntityInfo] = {} # class start_byte → EntityInfo
|
|
121
|
+
class_ranges: dict[int, tuple[int, int]] = {}
|
|
122
|
+
table_names: dict[int, str] = {} # entity class start_byte → table name
|
|
123
|
+
|
|
124
|
+
for _idx, caps in matches:
|
|
125
|
+
# @Entity class
|
|
126
|
+
if "entity.class" in caps and "entity.class_name" in caps:
|
|
127
|
+
cls = caps["entity.class"][0]
|
|
128
|
+
name = node_text(caps["entity.class_name"][0])
|
|
129
|
+
key = cls.start_byte
|
|
130
|
+
entities[key] = EntityInfo(
|
|
131
|
+
name=name,
|
|
132
|
+
table_name="",
|
|
133
|
+
source_file=file_path,
|
|
134
|
+
line=cls.start_point[0] + 1,
|
|
135
|
+
framework="jpa",
|
|
136
|
+
)
|
|
137
|
+
class_ranges[key] = (cls.start_byte, cls.end_byte)
|
|
138
|
+
|
|
139
|
+
# @Table(name="...") annotation
|
|
140
|
+
if "entity.table_annotation" in caps and "entity.table_name" in caps:
|
|
141
|
+
tbl_node = caps["entity.table_annotation"][0]
|
|
142
|
+
table = node_text(caps["entity.table_name"][0]).strip('"\'')
|
|
143
|
+
for key, (start, end) in class_ranges.items():
|
|
144
|
+
if start <= tbl_node.start_byte <= end:
|
|
145
|
+
entities[key].table_name = table
|
|
146
|
+
break
|
|
147
|
+
|
|
148
|
+
# Entity fields with JPA annotations
|
|
149
|
+
if "entity.field" in caps and "entity.field_name" in caps:
|
|
150
|
+
field_node = caps["entity.field"][0]
|
|
151
|
+
field_name = node_text(caps["entity.field_name"][0])
|
|
152
|
+
field_type = node_text(caps["entity.field_type"][0]) if "entity.field_type" in caps else ""
|
|
153
|
+
ann = node_text(caps["entity.field_annotation"][0]) if "entity.field_annotation" in caps else ""
|
|
154
|
+
|
|
155
|
+
for key, (start, end) in class_ranges.items():
|
|
156
|
+
if start <= field_node.start_byte <= end:
|
|
157
|
+
field_info = {"name": field_name, "type": field_type, "annotations": [ann] if ann else []}
|
|
158
|
+
# Relations
|
|
159
|
+
if ann in ("ManyToOne", "OneToMany", "ManyToMany", "OneToOne"):
|
|
160
|
+
entities[key].relations.append({"type": ann, "target": field_type})
|
|
161
|
+
else:
|
|
162
|
+
entities[key].fields.append(field_info)
|
|
163
|
+
break
|
|
164
|
+
|
|
165
|
+
return list(entities.values())
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _interpret_nestjs(
|
|
169
|
+
file_path: str, matches: list, framework: str,
|
|
170
|
+
) -> list[EntityInfo]:
|
|
171
|
+
"""NestJS: TypeORM @Entity() / Mongoose @Schema()."""
|
|
172
|
+
entities: list[EntityInfo] = []
|
|
173
|
+
seen: set[str] = set()
|
|
174
|
+
|
|
175
|
+
for _idx, caps in matches:
|
|
176
|
+
if "entity.class" in caps and "entity.class_name" in caps:
|
|
177
|
+
name = node_text(caps["entity.class_name"][0])
|
|
178
|
+
if name in seen:
|
|
179
|
+
continue
|
|
180
|
+
seen.add(name)
|
|
181
|
+
node = caps["entity.class"][0]
|
|
182
|
+
entities.append(EntityInfo(
|
|
183
|
+
name=name,
|
|
184
|
+
table_name="",
|
|
185
|
+
source_file=file_path,
|
|
186
|
+
line=node.start_point[0] + 1,
|
|
187
|
+
framework="typeorm",
|
|
188
|
+
))
|
|
189
|
+
|
|
190
|
+
return entities
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _interpret_python_orm(
|
|
194
|
+
file_path: str, matches: list, framework: str,
|
|
195
|
+
) -> list[EntityInfo]:
|
|
196
|
+
"""FastAPI/Flask: SQLAlchemy Base / Pydantic BaseModel."""
|
|
197
|
+
entities: dict[int, EntityInfo] = {}
|
|
198
|
+
class_ranges: dict[int, tuple[int, int]] = {}
|
|
199
|
+
|
|
200
|
+
for _idx, caps in matches:
|
|
201
|
+
for cls_key in ("entity.class", "entity.class_attr"):
|
|
202
|
+
if cls_key in caps and "entity.class_name" in caps:
|
|
203
|
+
cls = caps[cls_key][0]
|
|
204
|
+
name = node_text(caps["entity.class_name"][0])
|
|
205
|
+
key = cls.start_byte
|
|
206
|
+
if key not in entities:
|
|
207
|
+
entities[key] = EntityInfo(
|
|
208
|
+
name=name,
|
|
209
|
+
table_name="",
|
|
210
|
+
source_file=file_path,
|
|
211
|
+
line=cls.start_point[0] + 1,
|
|
212
|
+
framework="sqlalchemy",
|
|
213
|
+
)
|
|
214
|
+
class_ranges[key] = (cls.start_byte, cls.end_byte)
|
|
215
|
+
break
|
|
216
|
+
|
|
217
|
+
# Fields: type-annotated assignments
|
|
218
|
+
if "entity.with_fields" in caps and "entity.field_name" in caps:
|
|
219
|
+
cls = caps["entity.with_fields"][0]
|
|
220
|
+
for fn, ft in zip(
|
|
221
|
+
caps.get("entity.field_name", []),
|
|
222
|
+
caps.get("entity.field_type", []),
|
|
223
|
+
):
|
|
224
|
+
field_name = node_text(fn)
|
|
225
|
+
field_type = node_text(ft)
|
|
226
|
+
for key, (start, end) in class_ranges.items():
|
|
227
|
+
if start <= cls.start_byte <= end:
|
|
228
|
+
entities[key].fields.append({
|
|
229
|
+
"name": field_name, "type": field_type, "annotations": [],
|
|
230
|
+
})
|
|
231
|
+
break
|
|
232
|
+
|
|
233
|
+
return list(entities.values())
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _interpret_django(
|
|
237
|
+
file_path: str, matches: list, framework: str,
|
|
238
|
+
) -> list[EntityInfo]:
|
|
239
|
+
"""Django: models.Model subclass with CharField, ForeignKey, etc."""
|
|
240
|
+
entities: list[EntityInfo] = []
|
|
241
|
+
|
|
242
|
+
for _idx, caps in matches:
|
|
243
|
+
if "entity.model" in caps and "entity.class_name" in caps:
|
|
244
|
+
name = node_text(caps["entity.class_name"][0])
|
|
245
|
+
node = caps["entity.model"][0]
|
|
246
|
+
fields = []
|
|
247
|
+
relations = []
|
|
248
|
+
for fn, ft in zip(
|
|
249
|
+
caps.get("entity.field_name", []),
|
|
250
|
+
caps.get("entity.field_type", []),
|
|
251
|
+
):
|
|
252
|
+
fname = node_text(fn)
|
|
253
|
+
ftype = node_text(ft)
|
|
254
|
+
if ftype in ("ForeignKey", "OneToOneField", "ManyToManyField"):
|
|
255
|
+
relations.append({"type": ftype, "target": fname})
|
|
256
|
+
else:
|
|
257
|
+
fields.append({"name": fname, "type": ftype, "annotations": []})
|
|
258
|
+
|
|
259
|
+
entities.append(EntityInfo(
|
|
260
|
+
name=name,
|
|
261
|
+
table_name="",
|
|
262
|
+
source_file=file_path,
|
|
263
|
+
line=node.start_point[0] + 1,
|
|
264
|
+
framework="django-orm",
|
|
265
|
+
fields=fields,
|
|
266
|
+
relations=relations,
|
|
267
|
+
))
|
|
268
|
+
|
|
269
|
+
return entities
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _interpret_gorm(
|
|
273
|
+
file_path: str, matches: list, framework: str,
|
|
274
|
+
) -> list[EntityInfo]:
|
|
275
|
+
"""Go: GORM struct with struct tags parsed via regex."""
|
|
276
|
+
entities: dict[int, EntityInfo] = {}
|
|
277
|
+
|
|
278
|
+
for _idx, caps in matches:
|
|
279
|
+
# Struct with tagged fields
|
|
280
|
+
if "entity.struct" in caps and "entity.struct_name" in caps:
|
|
281
|
+
cls = caps["entity.struct"][0]
|
|
282
|
+
name = node_text(caps["entity.struct_name"][0])
|
|
283
|
+
key = cls.start_byte
|
|
284
|
+
if key not in entities:
|
|
285
|
+
entities[key] = EntityInfo(
|
|
286
|
+
name=name,
|
|
287
|
+
table_name="",
|
|
288
|
+
source_file=file_path,
|
|
289
|
+
line=cls.start_point[0] + 1,
|
|
290
|
+
framework="gorm",
|
|
291
|
+
)
|
|
292
|
+
# Parse fields + tags
|
|
293
|
+
for fn, ft, tag_node in zip(
|
|
294
|
+
caps.get("entity.field_name", []),
|
|
295
|
+
caps.get("entity.field_type", []),
|
|
296
|
+
caps.get("entity.field_tag", []),
|
|
297
|
+
):
|
|
298
|
+
field_name = node_text(fn)
|
|
299
|
+
field_type = node_text(ft)
|
|
300
|
+
tag_raw = node_text(tag_node)
|
|
301
|
+
annotations = _parse_gorm_tag(tag_raw)
|
|
302
|
+
entities[key].fields.append({
|
|
303
|
+
"name": field_name, "type": field_type, "annotations": annotations,
|
|
304
|
+
})
|
|
305
|
+
|
|
306
|
+
# Struct without tags (bare struct)
|
|
307
|
+
elif "entity.struct_bare" in caps and "entity.struct_name" in caps:
|
|
308
|
+
cls = caps["entity.struct_bare"][0]
|
|
309
|
+
name = node_text(caps["entity.struct_name"][0])
|
|
310
|
+
key = cls.start_byte
|
|
311
|
+
if key not in entities:
|
|
312
|
+
entities[key] = EntityInfo(
|
|
313
|
+
name=name,
|
|
314
|
+
table_name="",
|
|
315
|
+
source_file=file_path,
|
|
316
|
+
line=cls.start_point[0] + 1,
|
|
317
|
+
framework="gorm",
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
return list(entities.values())
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _parse_gorm_tag(raw: str) -> list[str]:
|
|
324
|
+
"""Parse GORM struct tag like `gorm:"column:name;primaryKey"` into annotations."""
|
|
325
|
+
m = _GORM_TAG_RE.search(raw)
|
|
326
|
+
if not m:
|
|
327
|
+
return []
|
|
328
|
+
parts = m.group(1).split(";")
|
|
329
|
+
annotations = []
|
|
330
|
+
for part in parts:
|
|
331
|
+
part = part.strip()
|
|
332
|
+
if part:
|
|
333
|
+
annotations.append(f"gorm:{part}")
|
|
334
|
+
return annotations
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _interpret_rails(
|
|
338
|
+
file_path: str, matches: list, framework: str,
|
|
339
|
+
) -> list[EntityInfo]:
|
|
340
|
+
"""Rails: ApplicationRecord subclass + has_many/belongs_to associations."""
|
|
341
|
+
entities: dict[int, EntityInfo] = {}
|
|
342
|
+
class_ranges: dict[int, tuple[int, int]] = {}
|
|
343
|
+
|
|
344
|
+
for _idx, caps in matches:
|
|
345
|
+
if "entity.model" in caps and "entity.class_name" in caps:
|
|
346
|
+
cls = caps["entity.model"][0]
|
|
347
|
+
name = node_text(caps["entity.class_name"][0])
|
|
348
|
+
key = cls.start_byte
|
|
349
|
+
entities[key] = EntityInfo(
|
|
350
|
+
name=name,
|
|
351
|
+
table_name="",
|
|
352
|
+
source_file=file_path,
|
|
353
|
+
line=cls.start_point[0] + 1,
|
|
354
|
+
framework="active-record",
|
|
355
|
+
)
|
|
356
|
+
class_ranges[key] = (cls.start_byte, cls.end_byte)
|
|
357
|
+
|
|
358
|
+
for _idx, caps in matches:
|
|
359
|
+
if "entity.association" in caps and "entity.relation_type" in caps:
|
|
360
|
+
rel_node = caps["entity.association"][0]
|
|
361
|
+
rel_type = node_text(caps["entity.relation_type"][0])
|
|
362
|
+
target = node_text(caps["entity.relation_target"][0]).strip(":") if "entity.relation_target" in caps else ""
|
|
363
|
+
for key, (start, end) in class_ranges.items():
|
|
364
|
+
if start <= rel_node.start_byte <= end:
|
|
365
|
+
entities[key].relations.append({"type": rel_type, "target": target})
|
|
366
|
+
break
|
|
367
|
+
|
|
368
|
+
return list(entities.values())
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _interpret_laravel(
|
|
372
|
+
file_path: str, matches: list, framework: str,
|
|
373
|
+
) -> list[EntityInfo]:
|
|
374
|
+
"""Laravel: Eloquent Model subclass + relation methods."""
|
|
375
|
+
entities: dict[int, EntityInfo] = {}
|
|
376
|
+
class_ranges: dict[int, tuple[int, int]] = {}
|
|
377
|
+
|
|
378
|
+
for _idx, caps in matches:
|
|
379
|
+
if "entity.model" in caps and "entity.class_name" in caps:
|
|
380
|
+
cls = caps["entity.model"][0]
|
|
381
|
+
name = node_text(caps["entity.class_name"][0])
|
|
382
|
+
key = cls.start_byte
|
|
383
|
+
entities[key] = EntityInfo(
|
|
384
|
+
name=name,
|
|
385
|
+
table_name="",
|
|
386
|
+
source_file=file_path,
|
|
387
|
+
line=cls.start_point[0] + 1,
|
|
388
|
+
framework="eloquent",
|
|
389
|
+
)
|
|
390
|
+
class_ranges[key] = (cls.start_byte, cls.end_byte)
|
|
391
|
+
|
|
392
|
+
for _idx, caps in matches:
|
|
393
|
+
if "entity.relation" in caps and "entity.relation_type" in caps:
|
|
394
|
+
rel_node = caps["entity.relation"][0]
|
|
395
|
+
rel_type = node_text(caps["entity.relation_type"][0])
|
|
396
|
+
target = node_text(caps["entity.relation_model"][0]) if "entity.relation_model" in caps else ""
|
|
397
|
+
for key, (start, end) in class_ranges.items():
|
|
398
|
+
if start <= rel_node.start_byte <= end:
|
|
399
|
+
entities[key].relations.append({"type": rel_type, "target": target})
|
|
400
|
+
break
|
|
401
|
+
|
|
402
|
+
return list(entities.values())
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _interpret_aspnet(
|
|
406
|
+
file_path: str, matches: list, framework: str,
|
|
407
|
+
) -> list[EntityInfo]:
|
|
408
|
+
"""ASP.NET: EF Core DbSet<T> properties on DbContext."""
|
|
409
|
+
entities: list[EntityInfo] = []
|
|
410
|
+
seen: set[str] = set()
|
|
411
|
+
|
|
412
|
+
for _idx, caps in matches:
|
|
413
|
+
if "entity.dbset" in caps and "entity.class_name" in caps:
|
|
414
|
+
name = node_text(caps["entity.class_name"][0])
|
|
415
|
+
if name in seen:
|
|
416
|
+
continue
|
|
417
|
+
seen.add(name)
|
|
418
|
+
dbset_name = node_text(caps["entity.dbset_name"][0]) if "entity.dbset_name" in caps else ""
|
|
419
|
+
node = caps["entity.dbset"][0]
|
|
420
|
+
entities.append(EntityInfo(
|
|
421
|
+
name=name,
|
|
422
|
+
table_name=dbset_name,
|
|
423
|
+
source_file=file_path,
|
|
424
|
+
line=node.start_point[0] + 1,
|
|
425
|
+
framework="ef-core",
|
|
426
|
+
))
|
|
427
|
+
|
|
428
|
+
return entities
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def _interpret_rust(
|
|
432
|
+
file_path: str, matches: list, framework: str,
|
|
433
|
+
) -> list[EntityInfo]:
|
|
434
|
+
"""Rust: #[derive(Queryable/DeriveEntityModel)] structs + fields."""
|
|
435
|
+
entities: dict[int, EntityInfo] = {}
|
|
436
|
+
class_ranges: dict[int, tuple[int, int]] = {}
|
|
437
|
+
|
|
438
|
+
for _idx, caps in matches:
|
|
439
|
+
# Struct with derive macros
|
|
440
|
+
if "entity.struct" in caps and "entity.struct_name" in caps:
|
|
441
|
+
cls = caps["entity.struct"][0]
|
|
442
|
+
name = node_text(caps["entity.struct_name"][0])
|
|
443
|
+
traits = [node_text(n) for n in caps.get("entity.derive_trait", [])]
|
|
444
|
+
key = cls.start_byte
|
|
445
|
+
|
|
446
|
+
orm_type = "diesel"
|
|
447
|
+
if any(t in ("DeriveEntityModel", "DeriveRelation") for t in traits):
|
|
448
|
+
orm_type = "sea-orm"
|
|
449
|
+
elif any(t in ("FromRow",) for t in traits):
|
|
450
|
+
orm_type = "sqlx"
|
|
451
|
+
|
|
452
|
+
entities[key] = EntityInfo(
|
|
453
|
+
name=name,
|
|
454
|
+
table_name="",
|
|
455
|
+
source_file=file_path,
|
|
456
|
+
line=cls.start_point[0] + 1,
|
|
457
|
+
framework=orm_type,
|
|
458
|
+
)
|
|
459
|
+
class_ranges[key] = (cls.start_byte, cls.end_byte)
|
|
460
|
+
|
|
461
|
+
for _idx, caps in matches:
|
|
462
|
+
if "entity.struct_with_fields" in caps and "entity.field_name" in caps:
|
|
463
|
+
cls = caps["entity.struct_with_fields"][0]
|
|
464
|
+
for fn, ft in zip(
|
|
465
|
+
caps.get("entity.field_name", []),
|
|
466
|
+
caps.get("entity.field_type", []),
|
|
467
|
+
):
|
|
468
|
+
fname = node_text(fn)
|
|
469
|
+
ftype = node_text(ft)
|
|
470
|
+
for key, (start, end) in class_ranges.items():
|
|
471
|
+
if start <= cls.start_byte <= end:
|
|
472
|
+
entities[key].fields.append({
|
|
473
|
+
"name": fname, "type": ftype, "annotations": [],
|
|
474
|
+
})
|
|
475
|
+
break
|
|
476
|
+
|
|
477
|
+
return list(entities.values())
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
def _interpret_vapor(
|
|
481
|
+
file_path: str, matches: list, framework: str,
|
|
482
|
+
) -> list[EntityInfo]:
|
|
483
|
+
"""Vapor Fluent: Model class + @Field/@ID property wrappers."""
|
|
484
|
+
entities: dict[int, EntityInfo] = {}
|
|
485
|
+
class_ranges: dict[int, tuple[int, int]] = {}
|
|
486
|
+
|
|
487
|
+
for _idx, caps in matches:
|
|
488
|
+
if "entity.model" in caps and "entity.class_name" in caps:
|
|
489
|
+
cls = caps["entity.model"][0]
|
|
490
|
+
name = node_text(caps["entity.class_name"][0])
|
|
491
|
+
key = cls.start_byte
|
|
492
|
+
entities[key] = EntityInfo(
|
|
493
|
+
name=name,
|
|
494
|
+
table_name="",
|
|
495
|
+
source_file=file_path,
|
|
496
|
+
line=cls.start_point[0] + 1,
|
|
497
|
+
framework="fluent",
|
|
498
|
+
)
|
|
499
|
+
class_ranges[key] = (cls.start_byte, cls.end_byte)
|
|
500
|
+
|
|
501
|
+
for _idx, caps in matches:
|
|
502
|
+
# @Field(key: "column") var fieldName
|
|
503
|
+
if "entity.field" in caps and "entity.field_name" in caps:
|
|
504
|
+
field_node = caps["entity.field"][0]
|
|
505
|
+
fname = node_text(caps["entity.field_name"][0])
|
|
506
|
+
col_key = node_text(caps["entity.field_key"][0]) if "entity.field_key" in caps else ""
|
|
507
|
+
for key, (start, end) in class_ranges.items():
|
|
508
|
+
if start <= field_node.start_byte <= end:
|
|
509
|
+
entities[key].fields.append({
|
|
510
|
+
"name": fname, "type": "", "annotations": [f"key:{col_key}"] if col_key else [],
|
|
511
|
+
})
|
|
512
|
+
break
|
|
513
|
+
|
|
514
|
+
# @ID var id
|
|
515
|
+
if "entity.id_field" in caps and "entity.id_name" in caps:
|
|
516
|
+
field_node = caps["entity.id_field"][0]
|
|
517
|
+
fname = node_text(caps["entity.id_name"][0])
|
|
518
|
+
for key, (start, end) in class_ranges.items():
|
|
519
|
+
if start <= field_node.start_byte <= end:
|
|
520
|
+
entities[key].fields.append({
|
|
521
|
+
"name": fname, "type": "", "annotations": ["@ID"],
|
|
522
|
+
})
|
|
523
|
+
break
|
|
524
|
+
|
|
525
|
+
return list(entities.values())
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _interpret_ktor(
|
|
529
|
+
file_path: str, matches: list, framework: str,
|
|
530
|
+
) -> list[EntityInfo]:
|
|
531
|
+
"""Ktor: Exposed Table objects + columns, data classes."""
|
|
532
|
+
entities: list[EntityInfo] = []
|
|
533
|
+
seen: set[str] = set()
|
|
534
|
+
|
|
535
|
+
for _idx, caps in matches:
|
|
536
|
+
# Exposed Table object
|
|
537
|
+
if "entity.table" in caps and "entity.table_name" in caps:
|
|
538
|
+
name = node_text(caps["entity.table_name"][0])
|
|
539
|
+
if name in seen:
|
|
540
|
+
continue
|
|
541
|
+
seen.add(name)
|
|
542
|
+
node = caps["entity.table"][0]
|
|
543
|
+
fields = []
|
|
544
|
+
for cn, ct in zip(
|
|
545
|
+
caps.get("entity.column_name", []),
|
|
546
|
+
caps.get("entity.column_type", []),
|
|
547
|
+
):
|
|
548
|
+
fields.append({
|
|
549
|
+
"name": node_text(cn), "type": node_text(ct), "annotations": [],
|
|
550
|
+
})
|
|
551
|
+
entities.append(EntityInfo(
|
|
552
|
+
name=name,
|
|
553
|
+
table_name=name.lower(),
|
|
554
|
+
source_file=file_path,
|
|
555
|
+
line=node.start_point[0] + 1,
|
|
556
|
+
framework="exposed",
|
|
557
|
+
fields=fields,
|
|
558
|
+
))
|
|
559
|
+
|
|
560
|
+
# Kotlin data class
|
|
561
|
+
if "entity.data_class" in caps and "entity.class_name" in caps:
|
|
562
|
+
name = node_text(caps["entity.class_name"][0])
|
|
563
|
+
if name in seen:
|
|
564
|
+
continue
|
|
565
|
+
seen.add(name)
|
|
566
|
+
node = caps["entity.data_class"][0]
|
|
567
|
+
entities.append(EntityInfo(
|
|
568
|
+
name=name,
|
|
569
|
+
table_name="",
|
|
570
|
+
source_file=file_path,
|
|
571
|
+
line=node.start_point[0] + 1,
|
|
572
|
+
framework="kotlin-data",
|
|
573
|
+
))
|
|
574
|
+
|
|
575
|
+
return entities
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# codebeacon query files
|
|
2
|
+
|
|
3
|
+
Each `.scm` file defines tree-sitter queries for one framework or language.
|
|
4
|
+
Queries are loaded at runtime by `extract/base.py → load_query_file(grammar)`.
|
|
5
|
+
|
|
6
|
+
## Adding a new framework
|
|
7
|
+
|
|
8
|
+
### 1. Identify the grammar
|
|
9
|
+
|
|
10
|
+
Map the file extension to a tree-sitter grammar in `extract/base.py`:
|
|
11
|
+
|
|
12
|
+
```python
|
|
13
|
+
# EXT_TO_GRAMMAR
|
|
14
|
+
".ex": "elixir",
|
|
15
|
+
|
|
16
|
+
# _GRAMMAR_MODULES
|
|
17
|
+
"elixir": "tree_sitter_elixir",
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
If the grammar package exposes a non-standard function (like `language_typescript()`
|
|
21
|
+
instead of `language()`), add special handling in `get_language()`.
|
|
22
|
+
|
|
23
|
+
### 2. Explore the AST
|
|
24
|
+
|
|
25
|
+
Use the tree-sitter playground or this snippet to understand node types:
|
|
26
|
+
|
|
27
|
+
```python
|
|
28
|
+
from codebeacon.extract.base import parse_source, node_text
|
|
29
|
+
|
|
30
|
+
src = b'your framework code here'
|
|
31
|
+
root, lang = parse_source(src, 'your_grammar')
|
|
32
|
+
|
|
33
|
+
def walk(n, d=0):
|
|
34
|
+
print(' '*d + n.type + ' ' + repr(node_text(n)[:40]))
|
|
35
|
+
for c in n.named_children: walk(c, d+2)
|
|
36
|
+
walk(root)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
### 3. Write the .scm file
|
|
40
|
+
|
|
41
|
+
Name it after the **grammar** (not the framework):
|
|
42
|
+
|
|
43
|
+
| Framework | Grammar | File |
|
|
44
|
+
|-------------|------------|-----------------|
|
|
45
|
+
| Spring Boot | java | spring_boot.scm |
|
|
46
|
+
| NestJS | typescript | nestjs.scm |
|
|
47
|
+
| Gin/Echo | go | gin.scm |
|
|
48
|
+
| Ktor | kotlin | ktor.scm |
|
|
49
|
+
| Actix/Axum | rust | actix.scm |
|
|
50
|
+
|
|
51
|
+
**Capture naming convention:**
|
|
52
|
+
|
|
53
|
+
| Prefix | Meaning |
|
|
54
|
+
|--------------|--------------------------------------|
|
|
55
|
+
| `@route.*` | Route path, method, handler |
|
|
56
|
+
| `@service.*` | Service class, DI relationships |
|
|
57
|
+
| `@entity.*` | ORM models, fields, relations |
|
|
58
|
+
| `@component.*` | Frontend components, props |
|
|
59
|
+
| `@di.*` | DI bindings (unresolved refs) |
|
|
60
|
+
| `@module.*` | Module-level groupings |
|
|
61
|
+
| `@hook.*` | Hooks / composables usage |
|
|
62
|
+
| `@import.*` | Import/require statements |
|
|
63
|
+
|
|
64
|
+
**Grammar quirks to watch:**
|
|
65
|
+
|
|
66
|
+
- **Java**: `marker_annotation` (no args) vs `annotation` (with args) — use `[...]` alternation
|
|
67
|
+
- **PHP**: `scoped_call_expression` for `Class::method()`, `encapsed_string` for strings
|
|
68
|
+
- **Rust**: `attribute_item` wraps `attribute`, proc macro args in `token_tree` (unparsed)
|
|
69
|
+
- **Kotlin**: trailing lambdas via `annotated_lambda` / `lambda_literal`
|
|
70
|
+
- **Swift**: route paths are multi-argument: `app.get("a", "b")` — join in extractor
|
|
71
|
+
- **Vue/Svelte**: SFC files use section extraction; queries apply to `<script>` content only
|
|
72
|
+
|
|
73
|
+
### 4. Wire up the extractor
|
|
74
|
+
|
|
75
|
+
Add dispatch in the relevant extractor module:
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
# extract/routes.py
|
|
79
|
+
elif framework == "phoenix":
|
|
80
|
+
return _extract_phoenix_routes(file_path, root, lang)
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### 5. Add fixtures and tests
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
tests/fixtures/phoenix/
|
|
87
|
+
router.ex
|
|
88
|
+
user_controller.ex
|
|
89
|
+
|
|
90
|
+
tests/test_routes.py
|
|
91
|
+
def test_phoenix_routes():
|
|
92
|
+
...
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Query file structure
|
|
96
|
+
|
|
97
|
+
Each file should include:
|
|
98
|
+
1. Header comment: framework name, grammar, important AST notes
|
|
99
|
+
2. Capture documentation table
|
|
100
|
+
3. Grouped sections (routes → services → entities → imports)
|
|
101
|
+
4. `; ──` separators between sections
|
|
102
|
+
|
|
103
|
+
## tree-sitter 0.25 API note
|
|
104
|
+
|
|
105
|
+
```python
|
|
106
|
+
from tree_sitter import Query, QueryCursor
|
|
107
|
+
|
|
108
|
+
q = Query(language, pattern_string)
|
|
109
|
+
cursor = QueryCursor(q)
|
|
110
|
+
for pattern_idx, captures in cursor.matches(root_node):
|
|
111
|
+
for capture_name, nodes in captures.items():
|
|
112
|
+
for node in nodes:
|
|
113
|
+
print(capture_name, node.text)
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
`Language.query()` is deprecated in 0.25 — always use `Query(language, pattern)`.
|