codebeacon 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. codebeacon/__init__.py +1 -0
  2. codebeacon/__main__.py +3 -0
  3. codebeacon/cache.py +136 -0
  4. codebeacon/cli.py +391 -0
  5. codebeacon/common/__init__.py +0 -0
  6. codebeacon/common/filters.py +170 -0
  7. codebeacon/common/symbols.py +121 -0
  8. codebeacon/common/types.py +98 -0
  9. codebeacon/config.py +144 -0
  10. codebeacon/contextmap/__init__.py +0 -0
  11. codebeacon/contextmap/generator.py +602 -0
  12. codebeacon/discover/__init__.py +0 -0
  13. codebeacon/discover/detector.py +388 -0
  14. codebeacon/discover/scanner.py +192 -0
  15. codebeacon/export/__init__.py +0 -0
  16. codebeacon/export/mcp.py +515 -0
  17. codebeacon/export/obsidian.py +812 -0
  18. codebeacon/extract/__init__.py +22 -0
  19. codebeacon/extract/base.py +372 -0
  20. codebeacon/extract/components.py +357 -0
  21. codebeacon/extract/dependencies.py +140 -0
  22. codebeacon/extract/entities.py +575 -0
  23. codebeacon/extract/queries/README.md +116 -0
  24. codebeacon/extract/queries/actix.scm +115 -0
  25. codebeacon/extract/queries/angular.scm +155 -0
  26. codebeacon/extract/queries/aspnet.scm +159 -0
  27. codebeacon/extract/queries/django.scm +122 -0
  28. codebeacon/extract/queries/express.scm +124 -0
  29. codebeacon/extract/queries/fastapi.scm +152 -0
  30. codebeacon/extract/queries/flask.scm +120 -0
  31. codebeacon/extract/queries/gin.scm +142 -0
  32. codebeacon/extract/queries/ktor.scm +144 -0
  33. codebeacon/extract/queries/laravel.scm +172 -0
  34. codebeacon/extract/queries/nestjs.scm +183 -0
  35. codebeacon/extract/queries/rails.scm +114 -0
  36. codebeacon/extract/queries/react.scm +111 -0
  37. codebeacon/extract/queries/spring_boot.scm +204 -0
  38. codebeacon/extract/queries/svelte.scm +73 -0
  39. codebeacon/extract/queries/vapor.scm +130 -0
  40. codebeacon/extract/queries/vue.scm +123 -0
  41. codebeacon/extract/routes.py +910 -0
  42. codebeacon/extract/semantic.py +280 -0
  43. codebeacon/extract/services.py +597 -0
  44. codebeacon/graph/__init__.py +1 -0
  45. codebeacon/graph/analyze.py +281 -0
  46. codebeacon/graph/build.py +320 -0
  47. codebeacon/graph/cluster.py +160 -0
  48. codebeacon/graph/enrich.py +206 -0
  49. codebeacon/skill/SKILL.md +127 -0
  50. codebeacon/wave.py +292 -0
  51. codebeacon/wiki/__init__.py +0 -0
  52. codebeacon/wiki/generator.py +376 -0
  53. codebeacon/wiki/index.py +95 -0
  54. codebeacon/wiki/templates.py +467 -0
  55. codebeacon-0.1.2.dist-info/METADATA +319 -0
  56. codebeacon-0.1.2.dist-info/RECORD +59 -0
  57. codebeacon-0.1.2.dist-info/WHEEL +4 -0
  58. codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
  59. codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,575 @@
1
+ """Entity / ORM model extraction for all supported frameworks.
2
+
3
+ Public API:
4
+ extract_entities(file_path, framework) -> list[EntityInfo]
5
+
6
+ Supported ORMs:
7
+ JPA (@Entity), Django ORM (models.Model), SQLAlchemy/Pydantic,
8
+ Eloquent, EF Core (DbSet<>), GORM (struct tags), Active Record,
9
+ Diesel/SeaORM (#[derive]), Fluent (Vapor), Exposed (Ktor), TypeORM/Mongoose (NestJS).
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from pathlib import Path
15
+
16
+ from codebeacon.common.types import EntityInfo
17
+ from codebeacon.extract.base import (
18
+ extract_sfc_sections,
19
+ load_query_file,
20
+ node_text,
21
+ parse_file,
22
+ parse_sfc_script,
23
+ run_query,
24
+ )
25
+
26
+
27
+ # ── Framework → query file stem ───────────────────────────────────────────────
28
+
29
+ _FW_TO_QUERY: dict[str, str] = {
30
+ "spring-boot": "spring_boot",
31
+ "express": "express",
32
+ "koa": "express",
33
+ "fastify": "express",
34
+ "nestjs": "nestjs",
35
+ "nextjs": "react",
36
+ "react": "react",
37
+ "fastapi": "fastapi",
38
+ "django": "django",
39
+ "flask": "flask",
40
+ "gin": "gin",
41
+ "echo": "gin",
42
+ "fiber": "gin",
43
+ "go": "gin",
44
+ "rails": "rails",
45
+ "laravel": "laravel",
46
+ "aspnet": "aspnet",
47
+ "actix": "actix",
48
+ "axum": "actix",
49
+ "rust": "actix",
50
+ "vapor": "vapor",
51
+ "ktor": "ktor",
52
+ }
53
+
54
+ # GORM struct tag parser: `gorm:"column:name;primaryKey"`
55
+ _GORM_TAG_RE = re.compile(r'gorm:"([^"]*)"')
56
+ _GORM_KEY_RE = re.compile(r'(\w+)(?::(\w+))?')
57
+
58
+
59
+ # ── Public function ───────────────────────────────────────────────────────────
60
+
61
+ def extract_entities(file_path: str, framework: str) -> list[EntityInfo]:
62
+ """Extract entity / ORM model definitions from *file_path*."""
63
+ fw = framework.lower()
64
+ query_name = _FW_TO_QUERY.get(fw)
65
+ if not query_name:
66
+ return []
67
+
68
+ query_src = load_query_file(query_name)
69
+ if not query_src:
70
+ return []
71
+
72
+ parsed = parse_file(file_path)
73
+ if parsed is None:
74
+ return []
75
+ root, lang = parsed
76
+
77
+ from codebeacon.extract.base import is_grammar_allowed
78
+ if not is_grammar_allowed(query_name, lang):
79
+ return []
80
+
81
+ try:
82
+ matches = run_query(lang, query_src, root)
83
+ except Exception:
84
+ return []
85
+
86
+ _interpreters = {
87
+ "spring_boot": _interpret_spring_boot,
88
+ "express": _interpret_noop,
89
+ "nestjs": _interpret_nestjs,
90
+ "fastapi": _interpret_python_orm,
91
+ "django": _interpret_django,
92
+ "flask": _interpret_python_orm,
93
+ "gin": _interpret_gorm,
94
+ "rails": _interpret_rails,
95
+ "laravel": _interpret_laravel,
96
+ "aspnet": _interpret_aspnet,
97
+ "actix": _interpret_rust,
98
+ "vapor": _interpret_vapor,
99
+ "ktor": _interpret_ktor,
100
+ "react": _interpret_noop,
101
+ }
102
+
103
+ interpreter = _interpreters.get(query_name, _interpret_noop)
104
+ try:
105
+ return interpreter(file_path, matches, fw)
106
+ except Exception:
107
+ return []
108
+
109
+
110
+ def _interpret_noop(file_path: str, matches: list, framework: str) -> list[EntityInfo]:
111
+ return []
112
+
113
+
114
+ # ── Per-framework interpreters ────────────────────────────────────────────────
115
+
116
+ def _interpret_spring_boot(
117
+ file_path: str, matches: list, framework: str,
118
+ ) -> list[EntityInfo]:
119
+ """JPA @Entity with @Table(name=...), @Id, @Column, @ManyToOne etc."""
120
+ entities: dict[int, EntityInfo] = {} # class start_byte → EntityInfo
121
+ class_ranges: dict[int, tuple[int, int]] = {}
122
+ table_names: dict[int, str] = {} # entity class start_byte → table name
123
+
124
+ for _idx, caps in matches:
125
+ # @Entity class
126
+ if "entity.class" in caps and "entity.class_name" in caps:
127
+ cls = caps["entity.class"][0]
128
+ name = node_text(caps["entity.class_name"][0])
129
+ key = cls.start_byte
130
+ entities[key] = EntityInfo(
131
+ name=name,
132
+ table_name="",
133
+ source_file=file_path,
134
+ line=cls.start_point[0] + 1,
135
+ framework="jpa",
136
+ )
137
+ class_ranges[key] = (cls.start_byte, cls.end_byte)
138
+
139
+ # @Table(name="...") annotation
140
+ if "entity.table_annotation" in caps and "entity.table_name" in caps:
141
+ tbl_node = caps["entity.table_annotation"][0]
142
+ table = node_text(caps["entity.table_name"][0]).strip('"\'')
143
+ for key, (start, end) in class_ranges.items():
144
+ if start <= tbl_node.start_byte <= end:
145
+ entities[key].table_name = table
146
+ break
147
+
148
+ # Entity fields with JPA annotations
149
+ if "entity.field" in caps and "entity.field_name" in caps:
150
+ field_node = caps["entity.field"][0]
151
+ field_name = node_text(caps["entity.field_name"][0])
152
+ field_type = node_text(caps["entity.field_type"][0]) if "entity.field_type" in caps else ""
153
+ ann = node_text(caps["entity.field_annotation"][0]) if "entity.field_annotation" in caps else ""
154
+
155
+ for key, (start, end) in class_ranges.items():
156
+ if start <= field_node.start_byte <= end:
157
+ field_info = {"name": field_name, "type": field_type, "annotations": [ann] if ann else []}
158
+ # Relations
159
+ if ann in ("ManyToOne", "OneToMany", "ManyToMany", "OneToOne"):
160
+ entities[key].relations.append({"type": ann, "target": field_type})
161
+ else:
162
+ entities[key].fields.append(field_info)
163
+ break
164
+
165
+ return list(entities.values())
166
+
167
+
168
+ def _interpret_nestjs(
169
+ file_path: str, matches: list, framework: str,
170
+ ) -> list[EntityInfo]:
171
+ """NestJS: TypeORM @Entity() / Mongoose @Schema()."""
172
+ entities: list[EntityInfo] = []
173
+ seen: set[str] = set()
174
+
175
+ for _idx, caps in matches:
176
+ if "entity.class" in caps and "entity.class_name" in caps:
177
+ name = node_text(caps["entity.class_name"][0])
178
+ if name in seen:
179
+ continue
180
+ seen.add(name)
181
+ node = caps["entity.class"][0]
182
+ entities.append(EntityInfo(
183
+ name=name,
184
+ table_name="",
185
+ source_file=file_path,
186
+ line=node.start_point[0] + 1,
187
+ framework="typeorm",
188
+ ))
189
+
190
+ return entities
191
+
192
+
193
+ def _interpret_python_orm(
194
+ file_path: str, matches: list, framework: str,
195
+ ) -> list[EntityInfo]:
196
+ """FastAPI/Flask: SQLAlchemy Base / Pydantic BaseModel."""
197
+ entities: dict[int, EntityInfo] = {}
198
+ class_ranges: dict[int, tuple[int, int]] = {}
199
+
200
+ for _idx, caps in matches:
201
+ for cls_key in ("entity.class", "entity.class_attr"):
202
+ if cls_key in caps and "entity.class_name" in caps:
203
+ cls = caps[cls_key][0]
204
+ name = node_text(caps["entity.class_name"][0])
205
+ key = cls.start_byte
206
+ if key not in entities:
207
+ entities[key] = EntityInfo(
208
+ name=name,
209
+ table_name="",
210
+ source_file=file_path,
211
+ line=cls.start_point[0] + 1,
212
+ framework="sqlalchemy",
213
+ )
214
+ class_ranges[key] = (cls.start_byte, cls.end_byte)
215
+ break
216
+
217
+ # Fields: type-annotated assignments
218
+ if "entity.with_fields" in caps and "entity.field_name" in caps:
219
+ cls = caps["entity.with_fields"][0]
220
+ for fn, ft in zip(
221
+ caps.get("entity.field_name", []),
222
+ caps.get("entity.field_type", []),
223
+ ):
224
+ field_name = node_text(fn)
225
+ field_type = node_text(ft)
226
+ for key, (start, end) in class_ranges.items():
227
+ if start <= cls.start_byte <= end:
228
+ entities[key].fields.append({
229
+ "name": field_name, "type": field_type, "annotations": [],
230
+ })
231
+ break
232
+
233
+ return list(entities.values())
234
+
235
+
236
+ def _interpret_django(
237
+ file_path: str, matches: list, framework: str,
238
+ ) -> list[EntityInfo]:
239
+ """Django: models.Model subclass with CharField, ForeignKey, etc."""
240
+ entities: list[EntityInfo] = []
241
+
242
+ for _idx, caps in matches:
243
+ if "entity.model" in caps and "entity.class_name" in caps:
244
+ name = node_text(caps["entity.class_name"][0])
245
+ node = caps["entity.model"][0]
246
+ fields = []
247
+ relations = []
248
+ for fn, ft in zip(
249
+ caps.get("entity.field_name", []),
250
+ caps.get("entity.field_type", []),
251
+ ):
252
+ fname = node_text(fn)
253
+ ftype = node_text(ft)
254
+ if ftype in ("ForeignKey", "OneToOneField", "ManyToManyField"):
255
+ relations.append({"type": ftype, "target": fname})
256
+ else:
257
+ fields.append({"name": fname, "type": ftype, "annotations": []})
258
+
259
+ entities.append(EntityInfo(
260
+ name=name,
261
+ table_name="",
262
+ source_file=file_path,
263
+ line=node.start_point[0] + 1,
264
+ framework="django-orm",
265
+ fields=fields,
266
+ relations=relations,
267
+ ))
268
+
269
+ return entities
270
+
271
+
272
+ def _interpret_gorm(
273
+ file_path: str, matches: list, framework: str,
274
+ ) -> list[EntityInfo]:
275
+ """Go: GORM struct with struct tags parsed via regex."""
276
+ entities: dict[int, EntityInfo] = {}
277
+
278
+ for _idx, caps in matches:
279
+ # Struct with tagged fields
280
+ if "entity.struct" in caps and "entity.struct_name" in caps:
281
+ cls = caps["entity.struct"][0]
282
+ name = node_text(caps["entity.struct_name"][0])
283
+ key = cls.start_byte
284
+ if key not in entities:
285
+ entities[key] = EntityInfo(
286
+ name=name,
287
+ table_name="",
288
+ source_file=file_path,
289
+ line=cls.start_point[0] + 1,
290
+ framework="gorm",
291
+ )
292
+ # Parse fields + tags
293
+ for fn, ft, tag_node in zip(
294
+ caps.get("entity.field_name", []),
295
+ caps.get("entity.field_type", []),
296
+ caps.get("entity.field_tag", []),
297
+ ):
298
+ field_name = node_text(fn)
299
+ field_type = node_text(ft)
300
+ tag_raw = node_text(tag_node)
301
+ annotations = _parse_gorm_tag(tag_raw)
302
+ entities[key].fields.append({
303
+ "name": field_name, "type": field_type, "annotations": annotations,
304
+ })
305
+
306
+ # Struct without tags (bare struct)
307
+ elif "entity.struct_bare" in caps and "entity.struct_name" in caps:
308
+ cls = caps["entity.struct_bare"][0]
309
+ name = node_text(caps["entity.struct_name"][0])
310
+ key = cls.start_byte
311
+ if key not in entities:
312
+ entities[key] = EntityInfo(
313
+ name=name,
314
+ table_name="",
315
+ source_file=file_path,
316
+ line=cls.start_point[0] + 1,
317
+ framework="gorm",
318
+ )
319
+
320
+ return list(entities.values())
321
+
322
+
323
+ def _parse_gorm_tag(raw: str) -> list[str]:
324
+ """Parse GORM struct tag like `gorm:"column:name;primaryKey"` into annotations."""
325
+ m = _GORM_TAG_RE.search(raw)
326
+ if not m:
327
+ return []
328
+ parts = m.group(1).split(";")
329
+ annotations = []
330
+ for part in parts:
331
+ part = part.strip()
332
+ if part:
333
+ annotations.append(f"gorm:{part}")
334
+ return annotations
335
+
336
+
337
+ def _interpret_rails(
338
+ file_path: str, matches: list, framework: str,
339
+ ) -> list[EntityInfo]:
340
+ """Rails: ApplicationRecord subclass + has_many/belongs_to associations."""
341
+ entities: dict[int, EntityInfo] = {}
342
+ class_ranges: dict[int, tuple[int, int]] = {}
343
+
344
+ for _idx, caps in matches:
345
+ if "entity.model" in caps and "entity.class_name" in caps:
346
+ cls = caps["entity.model"][0]
347
+ name = node_text(caps["entity.class_name"][0])
348
+ key = cls.start_byte
349
+ entities[key] = EntityInfo(
350
+ name=name,
351
+ table_name="",
352
+ source_file=file_path,
353
+ line=cls.start_point[0] + 1,
354
+ framework="active-record",
355
+ )
356
+ class_ranges[key] = (cls.start_byte, cls.end_byte)
357
+
358
+ for _idx, caps in matches:
359
+ if "entity.association" in caps and "entity.relation_type" in caps:
360
+ rel_node = caps["entity.association"][0]
361
+ rel_type = node_text(caps["entity.relation_type"][0])
362
+ target = node_text(caps["entity.relation_target"][0]).strip(":") if "entity.relation_target" in caps else ""
363
+ for key, (start, end) in class_ranges.items():
364
+ if start <= rel_node.start_byte <= end:
365
+ entities[key].relations.append({"type": rel_type, "target": target})
366
+ break
367
+
368
+ return list(entities.values())
369
+
370
+
371
+ def _interpret_laravel(
372
+ file_path: str, matches: list, framework: str,
373
+ ) -> list[EntityInfo]:
374
+ """Laravel: Eloquent Model subclass + relation methods."""
375
+ entities: dict[int, EntityInfo] = {}
376
+ class_ranges: dict[int, tuple[int, int]] = {}
377
+
378
+ for _idx, caps in matches:
379
+ if "entity.model" in caps and "entity.class_name" in caps:
380
+ cls = caps["entity.model"][0]
381
+ name = node_text(caps["entity.class_name"][0])
382
+ key = cls.start_byte
383
+ entities[key] = EntityInfo(
384
+ name=name,
385
+ table_name="",
386
+ source_file=file_path,
387
+ line=cls.start_point[0] + 1,
388
+ framework="eloquent",
389
+ )
390
+ class_ranges[key] = (cls.start_byte, cls.end_byte)
391
+
392
+ for _idx, caps in matches:
393
+ if "entity.relation" in caps and "entity.relation_type" in caps:
394
+ rel_node = caps["entity.relation"][0]
395
+ rel_type = node_text(caps["entity.relation_type"][0])
396
+ target = node_text(caps["entity.relation_model"][0]) if "entity.relation_model" in caps else ""
397
+ for key, (start, end) in class_ranges.items():
398
+ if start <= rel_node.start_byte <= end:
399
+ entities[key].relations.append({"type": rel_type, "target": target})
400
+ break
401
+
402
+ return list(entities.values())
403
+
404
+
405
+ def _interpret_aspnet(
406
+ file_path: str, matches: list, framework: str,
407
+ ) -> list[EntityInfo]:
408
+ """ASP.NET: EF Core DbSet<T> properties on DbContext."""
409
+ entities: list[EntityInfo] = []
410
+ seen: set[str] = set()
411
+
412
+ for _idx, caps in matches:
413
+ if "entity.dbset" in caps and "entity.class_name" in caps:
414
+ name = node_text(caps["entity.class_name"][0])
415
+ if name in seen:
416
+ continue
417
+ seen.add(name)
418
+ dbset_name = node_text(caps["entity.dbset_name"][0]) if "entity.dbset_name" in caps else ""
419
+ node = caps["entity.dbset"][0]
420
+ entities.append(EntityInfo(
421
+ name=name,
422
+ table_name=dbset_name,
423
+ source_file=file_path,
424
+ line=node.start_point[0] + 1,
425
+ framework="ef-core",
426
+ ))
427
+
428
+ return entities
429
+
430
+
431
+ def _interpret_rust(
432
+ file_path: str, matches: list, framework: str,
433
+ ) -> list[EntityInfo]:
434
+ """Rust: #[derive(Queryable/DeriveEntityModel)] structs + fields."""
435
+ entities: dict[int, EntityInfo] = {}
436
+ class_ranges: dict[int, tuple[int, int]] = {}
437
+
438
+ for _idx, caps in matches:
439
+ # Struct with derive macros
440
+ if "entity.struct" in caps and "entity.struct_name" in caps:
441
+ cls = caps["entity.struct"][0]
442
+ name = node_text(caps["entity.struct_name"][0])
443
+ traits = [node_text(n) for n in caps.get("entity.derive_trait", [])]
444
+ key = cls.start_byte
445
+
446
+ orm_type = "diesel"
447
+ if any(t in ("DeriveEntityModel", "DeriveRelation") for t in traits):
448
+ orm_type = "sea-orm"
449
+ elif any(t in ("FromRow",) for t in traits):
450
+ orm_type = "sqlx"
451
+
452
+ entities[key] = EntityInfo(
453
+ name=name,
454
+ table_name="",
455
+ source_file=file_path,
456
+ line=cls.start_point[0] + 1,
457
+ framework=orm_type,
458
+ )
459
+ class_ranges[key] = (cls.start_byte, cls.end_byte)
460
+
461
+ for _idx, caps in matches:
462
+ if "entity.struct_with_fields" in caps and "entity.field_name" in caps:
463
+ cls = caps["entity.struct_with_fields"][0]
464
+ for fn, ft in zip(
465
+ caps.get("entity.field_name", []),
466
+ caps.get("entity.field_type", []),
467
+ ):
468
+ fname = node_text(fn)
469
+ ftype = node_text(ft)
470
+ for key, (start, end) in class_ranges.items():
471
+ if start <= cls.start_byte <= end:
472
+ entities[key].fields.append({
473
+ "name": fname, "type": ftype, "annotations": [],
474
+ })
475
+ break
476
+
477
+ return list(entities.values())
478
+
479
+
480
+ def _interpret_vapor(
481
+ file_path: str, matches: list, framework: str,
482
+ ) -> list[EntityInfo]:
483
+ """Vapor Fluent: Model class + @Field/@ID property wrappers."""
484
+ entities: dict[int, EntityInfo] = {}
485
+ class_ranges: dict[int, tuple[int, int]] = {}
486
+
487
+ for _idx, caps in matches:
488
+ if "entity.model" in caps and "entity.class_name" in caps:
489
+ cls = caps["entity.model"][0]
490
+ name = node_text(caps["entity.class_name"][0])
491
+ key = cls.start_byte
492
+ entities[key] = EntityInfo(
493
+ name=name,
494
+ table_name="",
495
+ source_file=file_path,
496
+ line=cls.start_point[0] + 1,
497
+ framework="fluent",
498
+ )
499
+ class_ranges[key] = (cls.start_byte, cls.end_byte)
500
+
501
+ for _idx, caps in matches:
502
+ # @Field(key: "column") var fieldName
503
+ if "entity.field" in caps and "entity.field_name" in caps:
504
+ field_node = caps["entity.field"][0]
505
+ fname = node_text(caps["entity.field_name"][0])
506
+ col_key = node_text(caps["entity.field_key"][0]) if "entity.field_key" in caps else ""
507
+ for key, (start, end) in class_ranges.items():
508
+ if start <= field_node.start_byte <= end:
509
+ entities[key].fields.append({
510
+ "name": fname, "type": "", "annotations": [f"key:{col_key}"] if col_key else [],
511
+ })
512
+ break
513
+
514
+ # @ID var id
515
+ if "entity.id_field" in caps and "entity.id_name" in caps:
516
+ field_node = caps["entity.id_field"][0]
517
+ fname = node_text(caps["entity.id_name"][0])
518
+ for key, (start, end) in class_ranges.items():
519
+ if start <= field_node.start_byte <= end:
520
+ entities[key].fields.append({
521
+ "name": fname, "type": "", "annotations": ["@ID"],
522
+ })
523
+ break
524
+
525
+ return list(entities.values())
526
+
527
+
528
+ def _interpret_ktor(
529
+ file_path: str, matches: list, framework: str,
530
+ ) -> list[EntityInfo]:
531
+ """Ktor: Exposed Table objects + columns, data classes."""
532
+ entities: list[EntityInfo] = []
533
+ seen: set[str] = set()
534
+
535
+ for _idx, caps in matches:
536
+ # Exposed Table object
537
+ if "entity.table" in caps and "entity.table_name" in caps:
538
+ name = node_text(caps["entity.table_name"][0])
539
+ if name in seen:
540
+ continue
541
+ seen.add(name)
542
+ node = caps["entity.table"][0]
543
+ fields = []
544
+ for cn, ct in zip(
545
+ caps.get("entity.column_name", []),
546
+ caps.get("entity.column_type", []),
547
+ ):
548
+ fields.append({
549
+ "name": node_text(cn), "type": node_text(ct), "annotations": [],
550
+ })
551
+ entities.append(EntityInfo(
552
+ name=name,
553
+ table_name=name.lower(),
554
+ source_file=file_path,
555
+ line=node.start_point[0] + 1,
556
+ framework="exposed",
557
+ fields=fields,
558
+ ))
559
+
560
+ # Kotlin data class
561
+ if "entity.data_class" in caps and "entity.class_name" in caps:
562
+ name = node_text(caps["entity.class_name"][0])
563
+ if name in seen:
564
+ continue
565
+ seen.add(name)
566
+ node = caps["entity.data_class"][0]
567
+ entities.append(EntityInfo(
568
+ name=name,
569
+ table_name="",
570
+ source_file=file_path,
571
+ line=node.start_point[0] + 1,
572
+ framework="kotlin-data",
573
+ ))
574
+
575
+ return entities
@@ -0,0 +1,116 @@
1
+ # codebeacon query files
2
+
3
+ Each `.scm` file defines tree-sitter queries for one framework or language.
4
+ Queries are loaded at runtime by `extract/base.py → load_query_file(grammar)`.
5
+
6
+ ## Adding a new framework
7
+
8
+ ### 1. Identify the grammar
9
+
10
+ Map the file extension to a tree-sitter grammar in `extract/base.py`:
11
+
12
+ ```python
13
+ # EXT_TO_GRAMMAR
14
+ ".ex": "elixir",
15
+
16
+ # _GRAMMAR_MODULES
17
+ "elixir": "tree_sitter_elixir",
18
+ ```
19
+
20
+ If the grammar package exposes a non-standard function (like `language_typescript()`
21
+ instead of `language()`), add special handling in `get_language()`.
22
+
23
+ ### 2. Explore the AST
24
+
25
+ Use the tree-sitter playground or this snippet to understand node types:
26
+
27
+ ```python
28
+ from codebeacon.extract.base import parse_source, node_text
29
+
30
+ src = b'your framework code here'
31
+ root, lang = parse_source(src, 'your_grammar')
32
+
33
+ def walk(n, d=0):
34
+ print(' '*d + n.type + ' ' + repr(node_text(n)[:40]))
35
+ for c in n.named_children: walk(c, d+2)
36
+ walk(root)
37
+ ```
38
+
39
+ ### 3. Write the .scm file
40
+
41
+ Name it after the **grammar** (not the framework):
42
+
43
+ | Framework | Grammar | File |
44
+ |-------------|------------|-----------------|
45
+ | Spring Boot | java | spring_boot.scm |
46
+ | NestJS | typescript | nestjs.scm |
47
+ | Gin/Echo | go | gin.scm |
48
+ | Ktor | kotlin | ktor.scm |
49
+ | Actix/Axum | rust | actix.scm |
50
+
51
+ **Capture naming convention:**
52
+
53
+ | Prefix | Meaning |
54
+ |--------------|--------------------------------------|
55
+ | `@route.*` | Route path, method, handler |
56
+ | `@service.*` | Service class, DI relationships |
57
+ | `@entity.*` | ORM models, fields, relations |
58
+ | `@component.*` | Frontend components, props |
59
+ | `@di.*` | DI bindings (unresolved refs) |
60
+ | `@module.*` | Module-level groupings |
61
+ | `@hook.*` | Hooks / composables usage |
62
+ | `@import.*` | Import/require statements |
63
+
64
+ **Grammar quirks to watch:**
65
+
66
+ - **Java**: `marker_annotation` (no args) vs `annotation` (with args) — use `[...]` alternation
67
+ - **PHP**: `scoped_call_expression` for `Class::method()`, `encapsed_string` for strings
68
+ - **Rust**: `attribute_item` wraps `attribute`, proc macro args in `token_tree` (unparsed)
69
+ - **Kotlin**: trailing lambdas via `annotated_lambda` / `lambda_literal`
70
+ - **Swift**: route paths are multi-argument: `app.get("a", "b")` — join in extractor
71
+ - **Vue/Svelte**: SFC files use section extraction; queries apply to `<script>` content only
72
+
73
+ ### 4. Wire up the extractor
74
+
75
+ Add dispatch in the relevant extractor module:
76
+
77
+ ```python
78
+ # extract/routes.py
79
+ elif framework == "phoenix":
80
+ return _extract_phoenix_routes(file_path, root, lang)
81
+ ```
82
+
83
+ ### 5. Add fixtures and tests
84
+
85
+ ```
86
+ tests/fixtures/phoenix/
87
+ router.ex
88
+ user_controller.ex
89
+
90
+ tests/test_routes.py
91
+ def test_phoenix_routes():
92
+ ...
93
+ ```
94
+
95
+ ## Query file structure
96
+
97
+ Each file should include:
98
+ 1. Header comment: framework name, grammar, important AST notes
99
+ 2. Capture documentation table
100
+ 3. Grouped sections (routes → services → entities → imports)
101
+ 4. `; ──` separators between sections
102
+
103
+ ## tree-sitter 0.25 API note
104
+
105
+ ```python
106
+ from tree_sitter import Query, QueryCursor
107
+
108
+ q = Query(language, pattern_string)
109
+ cursor = QueryCursor(q)
110
+ for pattern_idx, captures in cursor.matches(root_node):
111
+ for capture_name, nodes in captures.items():
112
+ for node in nodes:
113
+ print(capture_name, node.text)
114
+ ```
115
+
116
+ `Language.query()` is deprecated in 0.25 — always use `Query(language, pattern)`.