codebeacon 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. codebeacon/__init__.py +1 -0
  2. codebeacon/__main__.py +3 -0
  3. codebeacon/cache.py +136 -0
  4. codebeacon/cli.py +391 -0
  5. codebeacon/common/__init__.py +0 -0
  6. codebeacon/common/filters.py +170 -0
  7. codebeacon/common/symbols.py +121 -0
  8. codebeacon/common/types.py +98 -0
  9. codebeacon/config.py +144 -0
  10. codebeacon/contextmap/__init__.py +0 -0
  11. codebeacon/contextmap/generator.py +602 -0
  12. codebeacon/discover/__init__.py +0 -0
  13. codebeacon/discover/detector.py +388 -0
  14. codebeacon/discover/scanner.py +192 -0
  15. codebeacon/export/__init__.py +0 -0
  16. codebeacon/export/mcp.py +515 -0
  17. codebeacon/export/obsidian.py +812 -0
  18. codebeacon/extract/__init__.py +22 -0
  19. codebeacon/extract/base.py +372 -0
  20. codebeacon/extract/components.py +357 -0
  21. codebeacon/extract/dependencies.py +140 -0
  22. codebeacon/extract/entities.py +575 -0
  23. codebeacon/extract/queries/README.md +116 -0
  24. codebeacon/extract/queries/actix.scm +115 -0
  25. codebeacon/extract/queries/angular.scm +155 -0
  26. codebeacon/extract/queries/aspnet.scm +159 -0
  27. codebeacon/extract/queries/django.scm +122 -0
  28. codebeacon/extract/queries/express.scm +124 -0
  29. codebeacon/extract/queries/fastapi.scm +152 -0
  30. codebeacon/extract/queries/flask.scm +120 -0
  31. codebeacon/extract/queries/gin.scm +142 -0
  32. codebeacon/extract/queries/ktor.scm +144 -0
  33. codebeacon/extract/queries/laravel.scm +172 -0
  34. codebeacon/extract/queries/nestjs.scm +183 -0
  35. codebeacon/extract/queries/rails.scm +114 -0
  36. codebeacon/extract/queries/react.scm +111 -0
  37. codebeacon/extract/queries/spring_boot.scm +204 -0
  38. codebeacon/extract/queries/svelte.scm +73 -0
  39. codebeacon/extract/queries/vapor.scm +130 -0
  40. codebeacon/extract/queries/vue.scm +123 -0
  41. codebeacon/extract/routes.py +910 -0
  42. codebeacon/extract/semantic.py +280 -0
  43. codebeacon/extract/services.py +597 -0
  44. codebeacon/graph/__init__.py +1 -0
  45. codebeacon/graph/analyze.py +281 -0
  46. codebeacon/graph/build.py +320 -0
  47. codebeacon/graph/cluster.py +160 -0
  48. codebeacon/graph/enrich.py +206 -0
  49. codebeacon/skill/SKILL.md +127 -0
  50. codebeacon/wave.py +292 -0
  51. codebeacon/wiki/__init__.py +0 -0
  52. codebeacon/wiki/generator.py +376 -0
  53. codebeacon/wiki/index.py +95 -0
  54. codebeacon/wiki/templates.py +467 -0
  55. codebeacon-0.1.2.dist-info/METADATA +319 -0
  56. codebeacon-0.1.2.dist-info/RECORD +59 -0
  57. codebeacon-0.1.2.dist-info/WHEEL +4 -0
  58. codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
  59. codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,170 @@
1
+ """Edge and node filters for graph cleanup.
2
+
3
+ Three main filters applied after Pass-2 symbol resolution:
4
+ 1. filter_build_artifacts() — Remove nodes from build output dirs
5
+ 2. filter_cross_language() — Remove spurious Java↔TS/TSX import edges
6
+ 3. filter_cross_service() — Remove false cross-service edges (preserve calls_api, shares_db_entity)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path
12
+
13
+ from codebeacon.common.types import Edge, Node
14
+
15
+ # Build artifact directories to exclude (checked against any path segment)
16
+ _ARTIFACT_DIRS: frozenset[str] = frozenset({
17
+ "target", "build", "dist", "node_modules", ".next", ".nuxt",
18
+ "out", "output", "__pycache__", ".gradle", "vendor",
19
+ "bin", "obj", ".dart_tool", ".build", ".cache",
20
+ })
21
+
22
+ # Java/Kotlin file extensions
23
+ _JAVA_EXTS: frozenset[str] = frozenset({".java", ".kt", ".kts"})
24
+ # TypeScript/JavaScript file extensions
25
+ _TS_EXTS: frozenset[str] = frozenset({".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"})
26
+
27
+ # Relations to always preserve regardless of filter logic
28
+ _PRESERVE_RELATIONS: frozenset[str] = frozenset({"calls_api", "shares_db_entity"})
29
+
30
+ # Import-type relations that cross-service/cross-language filters operate on
31
+ _IMPORT_RELATIONS: frozenset[str] = frozenset({"imports", "imports_from"})
32
+
33
+ # Shared library directory markers (heuristic)
34
+ _SHARED_MARKERS: frozenset[str] = frozenset({
35
+ "shared", "common", "lib", "libs", "core", "utils", "util", "commons", "base",
36
+ })
37
+
38
+
39
+ def filter_build_artifacts(
40
+ nodes: list[Node],
41
+ edges: list[Edge],
42
+ ) -> tuple[list[Node], list[Edge]]:
43
+ """Remove nodes whose source_file is inside a build artifact directory.
44
+
45
+ Also removes any edges that reference removed node IDs.
46
+
47
+ Returns:
48
+ (clean_nodes, clean_edges)
49
+ """
50
+ artifact_ids: set[str] = set()
51
+ clean_nodes: list[Node] = []
52
+
53
+ for node in nodes:
54
+ parts = Path(node.source_file).parts
55
+ if any(part in _ARTIFACT_DIRS for part in parts):
56
+ artifact_ids.add(node.id)
57
+ else:
58
+ clean_nodes.append(node)
59
+
60
+ clean_edges = [
61
+ e for e in edges
62
+ if e.source not in artifact_ids and e.target not in artifact_ids
63
+ ]
64
+ return clean_nodes, clean_edges
65
+
66
+
67
+ def filter_cross_language(
68
+ edges: list[Edge],
69
+ nodes: dict[str, Node],
70
+ ) -> list[Edge]:
71
+ """Remove spurious cross-language import edges (e.g. Java class importing a TS file).
72
+
73
+ Preserves:
74
+ - calls_api, shares_db_entity (cross-service HTTP/DB)
75
+ - Non-import relations (calls, injects, etc.)
76
+
77
+ Args:
78
+ edges: list of all edges
79
+ nodes: node_id → Node mapping
80
+ """
81
+ result: list[Edge] = []
82
+ for edge in edges:
83
+ if edge.relation in _PRESERVE_RELATIONS:
84
+ result.append(edge)
85
+ continue
86
+
87
+ if edge.relation not in _IMPORT_RELATIONS:
88
+ result.append(edge)
89
+ continue
90
+
91
+ src_node = nodes.get(edge.source)
92
+ tgt_node = nodes.get(edge.target)
93
+ if not src_node or not tgt_node:
94
+ result.append(edge)
95
+ continue
96
+
97
+ src_ext = Path(src_node.source_file).suffix.lower()
98
+ tgt_ext = Path(tgt_node.source_file).suffix.lower()
99
+
100
+ src_java = src_ext in _JAVA_EXTS
101
+ src_ts = src_ext in _TS_EXTS
102
+ tgt_java = tgt_ext in _JAVA_EXTS
103
+ tgt_ts = tgt_ext in _TS_EXTS
104
+
105
+ # Java/Kotlin ↔ TypeScript/JavaScript import is always spurious
106
+ if (src_java and tgt_ts) or (src_ts and tgt_java):
107
+ continue
108
+
109
+ result.append(edge)
110
+ return result
111
+
112
+
113
+ def filter_cross_service(
114
+ edges: list[Edge],
115
+ nodes: dict[str, Node],
116
+ service_roots: dict[str, str], # node_id → service/project name
117
+ ) -> list[Edge]:
118
+ """Remove false cross-service import edges caused by name collisions.
119
+
120
+ For example: front-pms/Button ↔ front-pvms/Button should NOT be linked.
121
+
122
+ Preserved:
123
+ - calls_api, shares_db_entity (intentional cross-service connections)
124
+ - Non-import relations (calls, injects — kept for cross-service analysis)
125
+ - Edges to shared library nodes (heuristic: path contains 'shared', 'common', etc.)
126
+ - Edges where service affiliation is unknown
127
+
128
+ Args:
129
+ edges: list of all edges
130
+ nodes: node_id → Node mapping
131
+ service_roots: node_id → project/service name
132
+ """
133
+ result: list[Edge] = []
134
+ for edge in edges:
135
+ if edge.relation in _PRESERVE_RELATIONS:
136
+ result.append(edge)
137
+ continue
138
+
139
+ if edge.relation not in _IMPORT_RELATIONS:
140
+ result.append(edge)
141
+ continue
142
+
143
+ src_svc = service_roots.get(edge.source)
144
+ tgt_svc = service_roots.get(edge.target)
145
+
146
+ # Unknown affiliation → keep (conservative)
147
+ if not src_svc or not tgt_svc:
148
+ result.append(edge)
149
+ continue
150
+
151
+ # Same service → always keep
152
+ if src_svc == tgt_svc:
153
+ result.append(edge)
154
+ continue
155
+
156
+ # Different service import: check if target is a shared library
157
+ tgt_node = nodes.get(edge.target)
158
+ if tgt_node and _is_shared_lib(tgt_node.source_file):
159
+ result.append(edge)
160
+ continue
161
+
162
+ # Different service import to non-shared target → drop (likely false edge)
163
+
164
+ return result
165
+
166
+
167
+ def _is_shared_lib(file_path: str) -> bool:
168
+ """Heuristic: is this file in a shared/common/lib directory?"""
169
+ parts = {p.lower() for p in Path(file_path).parts}
170
+ return bool(parts & _SHARED_MARKERS)
@@ -0,0 +1,121 @@
1
+ """Global symbol table for two-pass DI resolution (Pass 2).
2
+
3
+ SymbolTable:
4
+ - Pass 1: receives all extracted nodes, builds class_name → file_path + implements/extends maps
5
+ - Pass 2: resolves UnresolvedRef objects → concrete Edge objects
6
+
7
+ Resolution priority:
8
+ 1. Interface → Impl mapping (Spring Boot @Service/@Component implements chain)
9
+ 2. Direct class name match
10
+ 3. Unresolved → return None
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import Optional
16
+
17
+ from codebeacon.common.types import Edge, Node, UnresolvedRef
18
+
19
+
20
+ class SymbolTable:
21
+ """Manages global symbol mappings for cross-file dependency resolution."""
22
+
23
+ def __init__(self) -> None:
24
+ # class_name → [file_path, ...] (multiple definitions possible in monorepo)
25
+ self._class_map: dict[str, list[str]] = {}
26
+ # interface_name → [impl_class_name, ...]
27
+ self._implements_map: dict[str, list[str]] = {}
28
+ # All known node IDs
29
+ self._node_ids: set[str] = set()
30
+
31
+ def build(self, nodes: list[Node]) -> None:
32
+ """Build symbol maps from a flat list of all extracted nodes.
33
+
34
+ Must be called after all Pass-1 extraction is complete.
35
+ """
36
+ for node in nodes:
37
+ self._node_ids.add(node.id)
38
+
39
+ label = node.label
40
+ if label not in self._class_map:
41
+ self._class_map[label] = []
42
+ if node.id not in self._class_map[label]:
43
+ self._class_map[label].append(node.id)
44
+
45
+ # Register implements/extends relationships from metadata
46
+ meta = node.metadata or {}
47
+ for iface in meta.get("implements", []):
48
+ self._implements_map.setdefault(iface, [])
49
+ if label not in self._implements_map[iface]:
50
+ self._implements_map[iface].append(label)
51
+ for parent in meta.get("extends", []):
52
+ self._implements_map.setdefault(parent, [])
53
+ if label not in self._implements_map[parent]:
54
+ self._implements_map[parent].append(label)
55
+
56
+ def resolve_ref(self, ref: UnresolvedRef) -> Optional[Edge]:
57
+ """Attempt to resolve a single UnresolvedRef into a concrete Edge.
58
+
59
+ Returns None if resolution fails.
60
+ """
61
+ target_name = ref.ref_name
62
+
63
+ # Step 1: Try interface → impl mapping (Spring Boot / Laravel / Angular pattern)
64
+ impls = self._implements_map.get(target_name)
65
+ if impls:
66
+ chosen = impls[0]
67
+ if len(impls) > 1:
68
+ for impl in impls:
69
+ if impl.endswith("Impl") or impl.endswith("Implementation"):
70
+ chosen = impl
71
+ break
72
+ target_name = chosen
73
+
74
+ # Step 2: Direct class match
75
+ if target_name not in self._class_map:
76
+ return None
77
+
78
+ # _class_map now stores node IDs; pick same-project target when possible
79
+ target_node_ids = self._class_map[target_name]
80
+ target_id = target_node_ids[0]
81
+ source_project = ref.source_node_id.split("::")[0] if "::" in ref.source_node_id else ""
82
+ for nid in target_node_ids:
83
+ if nid.startswith(source_project + "::"):
84
+ target_id = nid
85
+ break
86
+
87
+ is_interface_resolved = target_name != ref.ref_name
88
+ return Edge(
89
+ source=ref.source_node_id,
90
+ target=target_id,
91
+ relation="injects",
92
+ confidence="INFERRED" if is_interface_resolved else "EXTRACTED",
93
+ confidence_score=0.8 if is_interface_resolved else 1.0,
94
+ source_file=ref.source_node_id,
95
+ )
96
+
97
+ def resolve_all(
98
+ self, unresolved: list[UnresolvedRef]
99
+ ) -> tuple[list[Edge], list[UnresolvedRef]]:
100
+ """Resolve all UnresolvedRefs.
101
+
102
+ Returns:
103
+ (resolved_edges, still_unresolved) tuple.
104
+ """
105
+ resolved: list[Edge] = []
106
+ still_unresolved: list[UnresolvedRef] = []
107
+ for ref in unresolved:
108
+ edge = self.resolve_ref(ref)
109
+ if edge is not None:
110
+ resolved.append(edge)
111
+ else:
112
+ still_unresolved.append(ref)
113
+ return resolved, still_unresolved
114
+
115
+ def known_classes(self) -> set[str]:
116
+ """Return the set of all known class/type names."""
117
+ return set(self._class_map.keys())
118
+
119
+ def known_node_ids(self) -> set[str]:
120
+ """Return all registered node IDs."""
121
+ return set(self._node_ids)
@@ -0,0 +1,98 @@
1
+ """Core data types for codebeacon. All dataclasses use slots=True for memory efficiency."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Optional
5
+
6
+
7
+ @dataclass(slots=True)
8
+ class Node:
9
+ id: str
10
+ label: str
11
+ type: str # "class", "method", "entity", "route", "component"
12
+ source_file: str
13
+ line: int
14
+ metadata: dict # framework-specific extras
15
+
16
+
17
+ @dataclass(slots=True)
18
+ class Edge:
19
+ source: str
20
+ target: str
21
+ relation: str # "imports", "calls", "injects", "calls_api", "shares_db_entity"
22
+ confidence: str # "EXTRACTED", "INFERRED", "UNRESOLVED"
23
+ confidence_score: float
24
+ source_file: str
25
+
26
+
27
+ @dataclass(slots=True)
28
+ class UnresolvedRef:
29
+ source_node_id: str
30
+ ref_type: str # "autowired", "depends", "inject", "import"
31
+ ref_name: str # "AlertService", "get_db"
32
+ framework: str
33
+
34
+
35
+ @dataclass(slots=True)
36
+ class LocalExtractResult:
37
+ file_path: str
38
+ nodes: list # list[Node]
39
+ unresolved: list # list[UnresolvedRef]
40
+ imports: list # list[str] - raw import statements
41
+
42
+
43
+ @dataclass(slots=True)
44
+ class RouteInfo:
45
+ method: str # "GET", "POST", "PUT", "DELETE", "PATCH", "ANY"
46
+ path: str # "/api/users/{id}"
47
+ handler: str # "UserController.getUser"
48
+ source_file: str
49
+ line: int
50
+ framework: str
51
+ prefix: str = "" # accumulated prefix from router.use() / Blueprint / etc.
52
+ tags: list = field(default_factory=list) # ["auth", "db", "cache"]
53
+
54
+
55
+ @dataclass(slots=True)
56
+ class ServiceInfo:
57
+ name: str # "UserService"
58
+ class_name: str
59
+ source_file: str
60
+ line: int
61
+ framework: str
62
+ methods: list = field(default_factory=list) # list[str] - method names
63
+ dependencies: list = field(default_factory=list) # list[str] - injected type names (unresolved)
64
+ annotations: list = field(default_factory=list) # list[str] - @Service, @Injectable, etc.
65
+
66
+
67
+ @dataclass(slots=True)
68
+ class EntityInfo:
69
+ name: str # "User"
70
+ table_name: str # "users" or "" if not explicit
71
+ source_file: str
72
+ line: int
73
+ framework: str # "jpa", "django-orm", "sqlalchemy", "eloquent", "ef-core", "gorm", "active-record", "diesel", "sea-orm"
74
+ fields: list = field(default_factory=list) # list[dict]: {"name", "type", "annotations"}
75
+ relations: list = field(default_factory=list) # list[dict]: {"type": "hasMany", "target": "Order"}
76
+
77
+
78
+ @dataclass(slots=True)
79
+ class ComponentInfo:
80
+ name: str # "UserCard"
81
+ source_file: str
82
+ line: int
83
+ framework: str # "react", "vue", "svelte", "angular"
84
+ props: list = field(default_factory=list) # list[str] - prop names
85
+ hooks: list = field(default_factory=list) # list[str] - used hooks/composables
86
+ imports: list = field(default_factory=list) # list[str] - imported component names
87
+ is_page: bool = False # true if this is a route-level page component
88
+ route_path: str = "" # Next.js/Nuxt/SvelteKit derived route path
89
+
90
+
91
+ @dataclass(slots=True)
92
+ class ProjectInfo:
93
+ name: str
94
+ path: str
95
+ framework: str # detected framework
96
+ language: str # primary language
97
+ signature_file: str # the file that triggered detection (pom.xml, package.json, etc.)
98
+ is_multi: bool = False
codebeacon/config.py ADDED
@@ -0,0 +1,144 @@
1
+ """codebeacon.yaml loader and validator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ import yaml
11
+
12
+
13
+ @dataclass
14
+ class ProjectConfig:
15
+ name: str
16
+ path: str
17
+ type: str = "auto" # framework type or "auto" for detection
18
+
19
+
20
+ @dataclass
21
+ class OutputConfig:
22
+ dir: str = ".codebeacon"
23
+ wiki: bool = True
24
+ obsidian: bool = True
25
+ graph_html: bool = True
26
+ context_map_targets: list = field(default_factory=lambda: ["CLAUDE.md", ".cursorrules", "AGENTS.md"])
27
+
28
+
29
+ @dataclass
30
+ class WaveConfig:
31
+ auto: bool = True
32
+ chunk_size: int = 300
33
+ max_parallel: int = 5
34
+
35
+
36
+ @dataclass
37
+ class SemanticConfig:
38
+ enabled: bool = False
39
+
40
+
41
+ @dataclass
42
+ class CodebeaconConfig:
43
+ version: int
44
+ projects: list # list[ProjectConfig]
45
+ output: OutputConfig = field(default_factory=OutputConfig)
46
+ wave: WaveConfig = field(default_factory=WaveConfig)
47
+ semantic: SemanticConfig = field(default_factory=SemanticConfig)
48
+ config_file: str = "" # path to the loaded yaml file
49
+
50
+
51
+ def load_config(path: str | Path) -> CodebeaconConfig:
52
+ """Load and validate codebeacon.yaml from the given path."""
53
+ path = Path(path)
54
+ if not path.exists():
55
+ raise FileNotFoundError(f"Config file not found: {path}")
56
+
57
+ with open(path) as f:
58
+ raw = yaml.safe_load(f)
59
+
60
+ if not isinstance(raw, dict):
61
+ raise ValueError(f"Invalid config file: {path}")
62
+
63
+ version = raw.get("version", 1)
64
+ if version != 1:
65
+ raise ValueError(f"Unsupported config version: {version}. Expected 1.")
66
+
67
+ projects_raw = raw.get("projects", [])
68
+ if not isinstance(projects_raw, list) or not projects_raw:
69
+ raise ValueError("Config must contain at least one project under 'projects:'")
70
+
71
+ projects = []
72
+ for p in projects_raw:
73
+ if "name" not in p or "path" not in p:
74
+ raise ValueError(f"Project entry missing 'name' or 'path': {p}")
75
+ # Resolve path relative to config file location
76
+ proj_path = p["path"]
77
+ if not os.path.isabs(proj_path):
78
+ proj_path = str(path.parent / proj_path)
79
+ projects.append(ProjectConfig(
80
+ name=p["name"],
81
+ path=proj_path,
82
+ type=p.get("type", "auto"),
83
+ ))
84
+
85
+ output_raw = raw.get("output", {})
86
+ context_map = output_raw.get("context_map", {})
87
+ output = OutputConfig(
88
+ dir=output_raw.get("dir", ".codebeacon"),
89
+ wiki=output_raw.get("wiki", True),
90
+ obsidian=output_raw.get("obsidian", True),
91
+ graph_html=output_raw.get("graph_html", True),
92
+ context_map_targets=context_map.get("targets", ["CLAUDE.md", ".cursorrules", "AGENTS.md"]),
93
+ )
94
+
95
+ wave_raw = raw.get("wave", {})
96
+ wave = WaveConfig(
97
+ auto=wave_raw.get("auto", True),
98
+ chunk_size=wave_raw.get("chunk_size", 300),
99
+ max_parallel=wave_raw.get("max_parallel", 5),
100
+ )
101
+
102
+ semantic_raw = raw.get("semantic", {})
103
+ semantic = SemanticConfig(
104
+ enabled=semantic_raw.get("enabled", False),
105
+ )
106
+
107
+ return CodebeaconConfig(
108
+ version=version,
109
+ projects=projects,
110
+ output=output,
111
+ wave=wave,
112
+ semantic=semantic,
113
+ config_file=str(path),
114
+ )
115
+
116
+
117
+ def find_config(start_dir: str | Path) -> Optional[Path]:
118
+ """Search for codebeacon.yaml starting from start_dir."""
119
+ start_dir = Path(start_dir)
120
+ candidates = [
121
+ start_dir / "codebeacon.yaml",
122
+ start_dir / "codebeacon.yml",
123
+ ]
124
+ for c in candidates:
125
+ if c.exists():
126
+ return c
127
+ return None
128
+
129
+
130
+ def generate_config(projects: list, output_dir: str, config_path: str | Path) -> None:
131
+ """Write an auto-generated codebeacon.yaml for multi-project scans."""
132
+ config_path = Path(config_path)
133
+ data = {
134
+ "version": 1,
135
+ "projects": [
136
+ {"name": p.name, "path": p.path, "type": p.framework}
137
+ for p in projects
138
+ ],
139
+ "output": {"dir": output_dir},
140
+ "wave": {"auto": True, "chunk_size": 300, "max_parallel": 5},
141
+ "semantic": {"enabled": False},
142
+ }
143
+ with open(config_path, "w") as f:
144
+ yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
File without changes