codebeacon 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. codebeacon/__init__.py +1 -0
  2. codebeacon/__main__.py +3 -0
  3. codebeacon/cache.py +136 -0
  4. codebeacon/cli.py +391 -0
  5. codebeacon/common/__init__.py +0 -0
  6. codebeacon/common/filters.py +170 -0
  7. codebeacon/common/symbols.py +121 -0
  8. codebeacon/common/types.py +98 -0
  9. codebeacon/config.py +144 -0
  10. codebeacon/contextmap/__init__.py +0 -0
  11. codebeacon/contextmap/generator.py +602 -0
  12. codebeacon/discover/__init__.py +0 -0
  13. codebeacon/discover/detector.py +388 -0
  14. codebeacon/discover/scanner.py +192 -0
  15. codebeacon/export/__init__.py +0 -0
  16. codebeacon/export/mcp.py +515 -0
  17. codebeacon/export/obsidian.py +812 -0
  18. codebeacon/extract/__init__.py +22 -0
  19. codebeacon/extract/base.py +372 -0
  20. codebeacon/extract/components.py +357 -0
  21. codebeacon/extract/dependencies.py +140 -0
  22. codebeacon/extract/entities.py +575 -0
  23. codebeacon/extract/queries/README.md +116 -0
  24. codebeacon/extract/queries/actix.scm +115 -0
  25. codebeacon/extract/queries/angular.scm +155 -0
  26. codebeacon/extract/queries/aspnet.scm +159 -0
  27. codebeacon/extract/queries/django.scm +122 -0
  28. codebeacon/extract/queries/express.scm +124 -0
  29. codebeacon/extract/queries/fastapi.scm +152 -0
  30. codebeacon/extract/queries/flask.scm +120 -0
  31. codebeacon/extract/queries/gin.scm +142 -0
  32. codebeacon/extract/queries/ktor.scm +144 -0
  33. codebeacon/extract/queries/laravel.scm +172 -0
  34. codebeacon/extract/queries/nestjs.scm +183 -0
  35. codebeacon/extract/queries/rails.scm +114 -0
  36. codebeacon/extract/queries/react.scm +111 -0
  37. codebeacon/extract/queries/spring_boot.scm +204 -0
  38. codebeacon/extract/queries/svelte.scm +73 -0
  39. codebeacon/extract/queries/vapor.scm +130 -0
  40. codebeacon/extract/queries/vue.scm +123 -0
  41. codebeacon/extract/routes.py +910 -0
  42. codebeacon/extract/semantic.py +280 -0
  43. codebeacon/extract/services.py +597 -0
  44. codebeacon/graph/__init__.py +1 -0
  45. codebeacon/graph/analyze.py +281 -0
  46. codebeacon/graph/build.py +320 -0
  47. codebeacon/graph/cluster.py +160 -0
  48. codebeacon/graph/enrich.py +206 -0
  49. codebeacon/skill/SKILL.md +127 -0
  50. codebeacon/wave.py +292 -0
  51. codebeacon/wiki/__init__.py +0 -0
  52. codebeacon/wiki/generator.py +376 -0
  53. codebeacon/wiki/index.py +95 -0
  54. codebeacon/wiki/templates.py +467 -0
  55. codebeacon-0.1.2.dist-info/METADATA +319 -0
  56. codebeacon-0.1.2.dist-info/RECORD +59 -0
  57. codebeacon-0.1.2.dist-info/WHEEL +4 -0
  58. codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
  59. codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,22 @@
1
+ """Extraction layer: AST-based analysis of source files.
2
+
3
+ Public API:
4
+ extract_routes(file_path, framework, project_path="") -> list[RouteInfo]
5
+ extract_services(file_path, framework) -> tuple[list[ServiceInfo], list[UnresolvedRef]]
6
+ extract_entities(file_path, framework) -> list[EntityInfo]
7
+ extract_components(file_path, framework, project_path="") -> list[ComponentInfo]
8
+ extract_dependencies(file_path, framework) -> list[Edge]
9
+ """
10
+ from codebeacon.extract.routes import extract_routes
11
+ from codebeacon.extract.services import extract_services
12
+ from codebeacon.extract.entities import extract_entities
13
+ from codebeacon.extract.components import extract_components
14
+ from codebeacon.extract.dependencies import extract_dependencies
15
+
16
+ __all__ = [
17
+ "extract_routes",
18
+ "extract_services",
19
+ "extract_entities",
20
+ "extract_components",
21
+ "extract_dependencies",
22
+ ]
@@ -0,0 +1,372 @@
1
+ """tree-sitter Language/Parser management.
2
+
3
+ API note: tree-sitter 0.25+ uses QueryCursor for running queries.
4
+ Query(language, pattern) → QueryCursor(query) → cursor.matches(node)
5
+ Each match is (pattern_idx, {capture_name: [Node, ...]}).
6
+
7
+ Grammar packages: tree-sitter-python, tree-sitter-java, etc.
8
+ If a grammar is not installed, that language is gracefully skipped with a warning.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ import warnings
15
+ from pathlib import Path
16
+ from typing import Iterator, Optional
17
+
18
+ from tree_sitter import Language, Parser, Query, QueryCursor, Node
19
+
20
+ # ── Grammar registry ─────────────────────────────────────────────────────────
21
+
22
+ _LANG_CACHE: dict[str, Optional[Language]] = {}
23
+
24
+ _GRAMMAR_MODULES: dict[str, str] = {
25
+ "python": "tree_sitter_python",
26
+ "java": "tree_sitter_java",
27
+ "kotlin": "tree_sitter_kotlin",
28
+ "javascript": "tree_sitter_javascript",
29
+ "typescript": "tree_sitter_typescript",
30
+ "tsx": "tree_sitter_typescript",
31
+ "go": "tree_sitter_go",
32
+ "ruby": "tree_sitter_ruby",
33
+ "php": "tree_sitter_php",
34
+ "csharp": "tree_sitter_c_sharp",
35
+ "rust": "tree_sitter_rust",
36
+ "swift": "tree_sitter_swift",
37
+ "html": "tree_sitter_html",
38
+ "svelte": "tree_sitter_svelte",
39
+ }
40
+
41
+ # Query files that are only valid for specific grammar families.
42
+ # Keys are query file stems (e.g. "nestjs"); values are sets of grammar names
43
+ # that the query can compile against. If a file's grammar is not in this set,
44
+ # the extractor skips running the query rather than emitting a warning.
45
+ QUERY_GRAMMAR_ALLOWLIST: dict[str, frozenset[str]] = {
46
+ # TypeScript/JavaScript families
47
+ "react": frozenset({"typescript", "tsx", "javascript"}),
48
+ "svelte": frozenset({"typescript", "tsx", "javascript"}),
49
+ "nestjs": frozenset({"typescript", "tsx"}),
50
+ "angular": frozenset({"typescript", "tsx"}),
51
+ "express": frozenset({"typescript", "tsx", "javascript"}),
52
+ "vue": frozenset({"typescript", "tsx", "javascript"}),
53
+ # Python families
54
+ "fastapi": frozenset({"python"}),
55
+ "django": frozenset({"python"}),
56
+ "flask": frozenset({"python"}),
57
+ # JVM families
58
+ "spring_boot": frozenset({"java", "kotlin"}),
59
+ "ktor": frozenset({"kotlin"}),
60
+ # Other single-language families
61
+ "gin": frozenset({"go"}),
62
+ "rails": frozenset({"ruby"}),
63
+ "laravel": frozenset({"php"}),
64
+ "aspnet": frozenset({"csharp"}),
65
+ "actix": frozenset({"rust"}),
66
+ "vapor": frozenset({"swift"}),
67
+ }
68
+
69
+ # Extensions that map to a grammar name
70
+ EXT_TO_GRAMMAR: dict[str, str] = {
71
+ ".py": "python",
72
+ ".java": "java",
73
+ ".kt": "kotlin",
74
+ ".kts": "kotlin",
75
+ ".js": "javascript",
76
+ ".jsx": "javascript",
77
+ ".mjs": "javascript",
78
+ ".cjs": "javascript",
79
+ ".ts": "typescript",
80
+ ".tsx": "tsx",
81
+ ".go": "go",
82
+ ".rb": "ruby",
83
+ ".php": "php",
84
+ ".cs": "csharp",
85
+ ".rs": "rust",
86
+ ".swift": "swift",
87
+ ".html": "html",
88
+ ".svelte":"svelte",
89
+ # Vue: SFC section extraction → use typescript + html
90
+ ".vue": "_vue_sfc",
91
+ }
92
+
93
+
94
+ def is_grammar_allowed(query_name: str, lang: Language) -> bool:
95
+ """Return True if *lang* is compatible with the given query file.
96
+
97
+ Uses QUERY_GRAMMAR_ALLOWLIST; if the query is not listed, all grammars are
98
+ allowed (unknown queries fall through gracefully).
99
+ """
100
+ allowed = QUERY_GRAMMAR_ALLOWLIST.get(query_name)
101
+ if allowed is None:
102
+ return True # no restriction defined — attempt the query
103
+ # Reverse-lookup the grammar name from the cached Language object
104
+ gram_name = next((k for k, v in _LANG_CACHE.items() if v is lang), None)
105
+ if gram_name is None:
106
+ return True # can't determine — let it run
107
+ return gram_name in allowed
108
+
109
+
110
+ def get_language(name: str) -> Optional[Language]:
111
+ """Return a Language object for the given grammar name, or None if not installed."""
112
+ if name in _LANG_CACHE:
113
+ return _LANG_CACHE[name]
114
+
115
+ module_name = _GRAMMAR_MODULES.get(name)
116
+ if not module_name:
117
+ _LANG_CACHE[name] = None
118
+ return None
119
+
120
+ try:
121
+ mod = __import__(module_name)
122
+ # Some packages expose dialect-specific functions instead of language()
123
+ if name == "php":
124
+ lang = Language(mod.language_php())
125
+ elif name == "typescript":
126
+ # tree-sitter-typescript: language_typescript() / language_tsx()
127
+ lang = Language(mod.language_typescript())
128
+ elif name == "tsx":
129
+ import tree_sitter_typescript as _tsts
130
+ lang = Language(_tsts.language_tsx())
131
+ elif name == "kotlin":
132
+ # tree-sitter-kotlin may expose language_kotlin or language
133
+ fn = getattr(mod, "language_kotlin", None) or getattr(mod, "language", None)
134
+ lang = Language(fn())
135
+ else:
136
+ lang = Language(mod.language())
137
+ _LANG_CACHE[name] = lang
138
+ return lang
139
+ except (ImportError, AttributeError):
140
+ warnings.warn(
141
+ f"Grammar '{name}' not installed. "
142
+ f"Install with: pip install codebeacon[{_pip_extra(name)}]",
143
+ stacklevel=3,
144
+ )
145
+ _LANG_CACHE[name] = None
146
+ return None
147
+
148
+
149
+ def _pip_extra(name: str) -> str:
150
+ extras = {
151
+ "java": "java", "kotlin": "kotlin",
152
+ "python": "python",
153
+ "javascript": "js", "typescript": "js",
154
+ "go": "go", "ruby": "ruby", "php": "php",
155
+ "csharp": "csharp", "rust": "rust",
156
+ "swift": "swift", "html": "html", "svelte": "svelte",
157
+ }
158
+ return extras.get(name, name)
159
+
160
+
161
+ def get_parser(grammar: str) -> Optional[Parser]:
162
+ """Return a Parser configured for the given grammar, or None."""
163
+ lang = get_language(grammar)
164
+ if lang is None:
165
+ return None
166
+ return Parser(lang)
167
+
168
+
169
+ # ── Query helpers ─────────────────────────────────────────────────────────────
170
+
171
+ QueryMatch = tuple[int, dict[str, list[Node]]] # (pattern_idx, captures)
172
+
173
+
174
+ def run_query(language: Language, pattern: str, node: Node) -> list[QueryMatch]:
175
+ """Run a tree-sitter query and return all matches.
176
+
177
+ Returns list of (pattern_index, {capture_name: [Node, ...]}).
178
+ """
179
+ try:
180
+ q = Query(language, pattern)
181
+ cursor = QueryCursor(q)
182
+ return list(cursor.matches(node))
183
+ except Exception as e:
184
+ warnings.warn(f"Query error: {e}", stacklevel=2)
185
+ return []
186
+
187
+
188
+ def query_captures_flat(language: Language, pattern: str, node: Node) -> list[tuple[str, Node]]:
189
+ """Convenience: return flat list of (capture_name, Node) pairs from all matches."""
190
+ result: list[tuple[str, Node]] = []
191
+ for _idx, captures in run_query(language, pattern, node):
192
+ for name, nodes in captures.items():
193
+ for n in nodes:
194
+ result.append((name, n))
195
+ return result
196
+
197
+
198
+ def load_query_file(grammar: str) -> Optional[str]:
199
+ """Load the .scm query file for a grammar from extract/queries/."""
200
+ queries_dir = Path(__file__).parent / "queries"
201
+ scm_path = queries_dir / f"{grammar}.scm"
202
+ if scm_path.exists():
203
+ return scm_path.read_text(encoding="utf-8")
204
+ return None
205
+
206
+
207
+ # ── File parsing ──────────────────────────────────────────────────────────────
208
+
209
+ def parse_file(file_path: str) -> Optional[tuple[Node, Language]]:
210
+ """Parse a source file and return (root_node, language).
211
+
212
+ For .vue files, returns None — use extract_vue_sections() instead.
213
+ """
214
+ ext = Path(file_path).suffix.lower()
215
+ grammar = EXT_TO_GRAMMAR.get(ext)
216
+
217
+ if grammar is None:
218
+ return None
219
+ if grammar == "_vue_sfc":
220
+ return None # handled separately
221
+
222
+ lang = get_language(grammar)
223
+ if lang is None:
224
+ return None
225
+
226
+ try:
227
+ content = Path(file_path).read_bytes()
228
+ parser = Parser(lang)
229
+ tree = parser.parse(content)
230
+ return (tree.root_node, lang)
231
+ except (OSError, UnicodeDecodeError, Exception):
232
+ return None
233
+
234
+
235
+ def parse_source(source: bytes, grammar: str) -> Optional[tuple[Node, Language]]:
236
+ """Parse raw bytes with the given grammar."""
237
+ lang = get_language(grammar)
238
+ if lang is None:
239
+ return None
240
+ parser = Parser(lang)
241
+ tree = parser.parse(source)
242
+ return (tree.root_node, lang)
243
+
244
+
245
+ # ── SFC section extraction (Vue / Svelte) ────────────────────────────────────
246
+
247
+ _SCRIPT_RE = re.compile(
248
+ r"<script(?:\s[^>]*)?>(.+?)</script>",
249
+ re.DOTALL | re.IGNORECASE,
250
+ )
251
+ _TEMPLATE_RE = re.compile(
252
+ r"<template(?:\s[^>]*)?>(.+?)</template>",
253
+ re.DOTALL | re.IGNORECASE,
254
+ )
255
+ _SCRIPT_LANG_RE = re.compile(r'lang=["\'](\w+)["\']', re.IGNORECASE)
256
+
257
+
258
+ class SFCSection:
259
+ __slots__ = ("script_src", "script_lang", "script_offset", "template_src", "template_offset")
260
+
261
+ def __init__(
262
+ self,
263
+ script_src: bytes,
264
+ script_lang: str,
265
+ script_offset: int,
266
+ template_src: bytes,
267
+ template_offset: int,
268
+ ) -> None:
269
+ self.script_src = script_src
270
+ self.script_lang = script_lang # "ts" or "js"
271
+ self.script_offset = script_offset # byte offset in original file
272
+ self.template_src = template_src
273
+ self.template_offset = template_offset
274
+
275
+
276
+ def extract_sfc_sections(file_path: str) -> Optional[SFCSection]:
277
+ """Extract <script> and <template> sections from a .vue or .svelte SFC file."""
278
+ try:
279
+ raw = Path(file_path).read_text(encoding="utf-8", errors="replace")
280
+ except OSError:
281
+ return None
282
+
283
+ # Script section
284
+ script_match = _SCRIPT_RE.search(raw)
285
+ script_src = b""
286
+ script_lang = "js"
287
+ script_offset = 0
288
+ if script_match:
289
+ # Detect lang attribute on <script> tag
290
+ tag_content = raw[script_match.start():script_match.start(1)]
291
+ lang_m = _SCRIPT_LANG_RE.search(tag_content)
292
+ if lang_m and lang_m.group(1).lower() in ("ts", "typescript"):
293
+ script_lang = "ts"
294
+ script_src = script_match.group(1).encode("utf-8", errors="replace")
295
+ script_offset = script_match.start(1)
296
+
297
+ # Template section
298
+ template_match = _TEMPLATE_RE.search(raw)
299
+ template_src = b""
300
+ template_offset = 0
301
+ if template_match:
302
+ template_src = template_match.group(1).encode("utf-8", errors="replace")
303
+ template_offset = template_match.start(1)
304
+
305
+ return SFCSection(
306
+ script_src=script_src,
307
+ script_lang=script_lang,
308
+ script_offset=script_offset,
309
+ template_src=template_src,
310
+ template_offset=template_offset,
311
+ )
312
+
313
+
314
+ def parse_sfc_script(sfc: SFCSection) -> Optional[tuple[Node, Language]]:
315
+ """Parse the <script> section of an SFC using typescript or javascript grammar."""
316
+ grammar = "typescript" if sfc.script_lang == "ts" else "javascript"
317
+ return parse_source(sfc.script_src, grammar)
318
+
319
+
320
+ def parse_sfc_template(sfc: SFCSection) -> Optional[tuple[Node, Language]]:
321
+ """Parse the <template> section of an SFC using the html grammar."""
322
+ return parse_source(sfc.template_src, "html")
323
+
324
+
325
+ # ── Utility ───────────────────────────────────────────────────────────────────
326
+
327
+ def node_text(node: Node) -> str:
328
+ """Return the UTF-8 decoded text of a node."""
329
+ return (node.text or b"").decode("utf-8", errors="replace")
330
+
331
+
332
+ def first_child_of_type(node: Node, *types: str) -> Optional[Node]:
333
+ """Return first named child matching any of the given types."""
334
+ for child in node.named_children:
335
+ if child.type in types:
336
+ return child
337
+ return None
338
+
339
+
340
+ def find_nodes_by_type(root: Node, node_type: str) -> list[Node]:
341
+ """DFS: collect all nodes of a given type."""
342
+ result: list[Node] = []
343
+ _dfs_collect(root, node_type, result)
344
+ return result
345
+
346
+
347
+ def _dfs_collect(node: Node, target_type: str, result: list[Node]) -> None:
348
+ if node.type == target_type:
349
+ result.append(node)
350
+ for child in node.children:
351
+ _dfs_collect(child, target_type, result)
352
+
353
+
354
+ def get_annotation_names(node: Node) -> list[str]:
355
+ """Extract annotation/decorator names from a modifiers/decorators node."""
356
+ names: list[str] = []
357
+ for child in node.named_children:
358
+ if child.type == "annotation":
359
+ name_node = child.child_by_field_name("name")
360
+ if name_node:
361
+ names.append(node_text(name_node))
362
+ elif child.type in ("decorator",):
363
+ # Python/TS decorators: @name or @name(...)
364
+ expr = child.named_children[0] if child.named_children else None
365
+ if expr:
366
+ if expr.type == "identifier":
367
+ names.append(node_text(expr))
368
+ elif expr.type == "call":
369
+ func = expr.child_by_field_name("function")
370
+ if func:
371
+ names.append(node_text(func))
372
+ return names