codebeacon 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebeacon/__init__.py +1 -0
- codebeacon/__main__.py +3 -0
- codebeacon/cache.py +136 -0
- codebeacon/cli.py +391 -0
- codebeacon/common/__init__.py +0 -0
- codebeacon/common/filters.py +170 -0
- codebeacon/common/symbols.py +121 -0
- codebeacon/common/types.py +98 -0
- codebeacon/config.py +144 -0
- codebeacon/contextmap/__init__.py +0 -0
- codebeacon/contextmap/generator.py +602 -0
- codebeacon/discover/__init__.py +0 -0
- codebeacon/discover/detector.py +388 -0
- codebeacon/discover/scanner.py +192 -0
- codebeacon/export/__init__.py +0 -0
- codebeacon/export/mcp.py +515 -0
- codebeacon/export/obsidian.py +812 -0
- codebeacon/extract/__init__.py +22 -0
- codebeacon/extract/base.py +372 -0
- codebeacon/extract/components.py +357 -0
- codebeacon/extract/dependencies.py +140 -0
- codebeacon/extract/entities.py +575 -0
- codebeacon/extract/queries/README.md +116 -0
- codebeacon/extract/queries/actix.scm +115 -0
- codebeacon/extract/queries/angular.scm +155 -0
- codebeacon/extract/queries/aspnet.scm +159 -0
- codebeacon/extract/queries/django.scm +122 -0
- codebeacon/extract/queries/express.scm +124 -0
- codebeacon/extract/queries/fastapi.scm +152 -0
- codebeacon/extract/queries/flask.scm +120 -0
- codebeacon/extract/queries/gin.scm +142 -0
- codebeacon/extract/queries/ktor.scm +144 -0
- codebeacon/extract/queries/laravel.scm +172 -0
- codebeacon/extract/queries/nestjs.scm +183 -0
- codebeacon/extract/queries/rails.scm +114 -0
- codebeacon/extract/queries/react.scm +111 -0
- codebeacon/extract/queries/spring_boot.scm +204 -0
- codebeacon/extract/queries/svelte.scm +73 -0
- codebeacon/extract/queries/vapor.scm +130 -0
- codebeacon/extract/queries/vue.scm +123 -0
- codebeacon/extract/routes.py +910 -0
- codebeacon/extract/semantic.py +280 -0
- codebeacon/extract/services.py +597 -0
- codebeacon/graph/__init__.py +1 -0
- codebeacon/graph/analyze.py +281 -0
- codebeacon/graph/build.py +320 -0
- codebeacon/graph/cluster.py +160 -0
- codebeacon/graph/enrich.py +206 -0
- codebeacon/skill/SKILL.md +127 -0
- codebeacon/wave.py +292 -0
- codebeacon/wiki/__init__.py +0 -0
- codebeacon/wiki/generator.py +376 -0
- codebeacon/wiki/index.py +95 -0
- codebeacon/wiki/templates.py +467 -0
- codebeacon-0.1.2.dist-info/METADATA +319 -0
- codebeacon-0.1.2.dist-info/RECORD +59 -0
- codebeacon-0.1.2.dist-info/WHEEL +4 -0
- codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
- codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Edge and node filters for graph cleanup.
|
|
2
|
+
|
|
3
|
+
Three main filters applied after Pass-2 symbol resolution:
|
|
4
|
+
1. filter_build_artifacts() — Remove nodes from build output dirs
|
|
5
|
+
2. filter_cross_language() — Remove spurious Java↔TS/TSX import edges
|
|
6
|
+
3. filter_cross_service() — Remove false cross-service edges (preserve calls_api, shares_db_entity)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from codebeacon.common.types import Edge, Node
|
|
14
|
+
|
|
15
|
+
# Build artifact directories to exclude (checked against any path segment)
|
|
16
|
+
_ARTIFACT_DIRS: frozenset[str] = frozenset({
|
|
17
|
+
"target", "build", "dist", "node_modules", ".next", ".nuxt",
|
|
18
|
+
"out", "output", "__pycache__", ".gradle", "vendor",
|
|
19
|
+
"bin", "obj", ".dart_tool", ".build", ".cache",
|
|
20
|
+
})
|
|
21
|
+
|
|
22
|
+
# Java/Kotlin file extensions
|
|
23
|
+
_JAVA_EXTS: frozenset[str] = frozenset({".java", ".kt", ".kts"})
|
|
24
|
+
# TypeScript/JavaScript file extensions
|
|
25
|
+
_TS_EXTS: frozenset[str] = frozenset({".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs"})
|
|
26
|
+
|
|
27
|
+
# Relations to always preserve regardless of filter logic
|
|
28
|
+
_PRESERVE_RELATIONS: frozenset[str] = frozenset({"calls_api", "shares_db_entity"})
|
|
29
|
+
|
|
30
|
+
# Import-type relations that cross-service/cross-language filters operate on
|
|
31
|
+
_IMPORT_RELATIONS: frozenset[str] = frozenset({"imports", "imports_from"})
|
|
32
|
+
|
|
33
|
+
# Shared library directory markers (heuristic)
|
|
34
|
+
_SHARED_MARKERS: frozenset[str] = frozenset({
|
|
35
|
+
"shared", "common", "lib", "libs", "core", "utils", "util", "commons", "base",
|
|
36
|
+
})
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def filter_build_artifacts(
|
|
40
|
+
nodes: list[Node],
|
|
41
|
+
edges: list[Edge],
|
|
42
|
+
) -> tuple[list[Node], list[Edge]]:
|
|
43
|
+
"""Remove nodes whose source_file is inside a build artifact directory.
|
|
44
|
+
|
|
45
|
+
Also removes any edges that reference removed node IDs.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
(clean_nodes, clean_edges)
|
|
49
|
+
"""
|
|
50
|
+
artifact_ids: set[str] = set()
|
|
51
|
+
clean_nodes: list[Node] = []
|
|
52
|
+
|
|
53
|
+
for node in nodes:
|
|
54
|
+
parts = Path(node.source_file).parts
|
|
55
|
+
if any(part in _ARTIFACT_DIRS for part in parts):
|
|
56
|
+
artifact_ids.add(node.id)
|
|
57
|
+
else:
|
|
58
|
+
clean_nodes.append(node)
|
|
59
|
+
|
|
60
|
+
clean_edges = [
|
|
61
|
+
e for e in edges
|
|
62
|
+
if e.source not in artifact_ids and e.target not in artifact_ids
|
|
63
|
+
]
|
|
64
|
+
return clean_nodes, clean_edges
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def filter_cross_language(
|
|
68
|
+
edges: list[Edge],
|
|
69
|
+
nodes: dict[str, Node],
|
|
70
|
+
) -> list[Edge]:
|
|
71
|
+
"""Remove spurious cross-language import edges (e.g. Java class importing a TS file).
|
|
72
|
+
|
|
73
|
+
Preserves:
|
|
74
|
+
- calls_api, shares_db_entity (cross-service HTTP/DB)
|
|
75
|
+
- Non-import relations (calls, injects, etc.)
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
edges: list of all edges
|
|
79
|
+
nodes: node_id → Node mapping
|
|
80
|
+
"""
|
|
81
|
+
result: list[Edge] = []
|
|
82
|
+
for edge in edges:
|
|
83
|
+
if edge.relation in _PRESERVE_RELATIONS:
|
|
84
|
+
result.append(edge)
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
if edge.relation not in _IMPORT_RELATIONS:
|
|
88
|
+
result.append(edge)
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
src_node = nodes.get(edge.source)
|
|
92
|
+
tgt_node = nodes.get(edge.target)
|
|
93
|
+
if not src_node or not tgt_node:
|
|
94
|
+
result.append(edge)
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
src_ext = Path(src_node.source_file).suffix.lower()
|
|
98
|
+
tgt_ext = Path(tgt_node.source_file).suffix.lower()
|
|
99
|
+
|
|
100
|
+
src_java = src_ext in _JAVA_EXTS
|
|
101
|
+
src_ts = src_ext in _TS_EXTS
|
|
102
|
+
tgt_java = tgt_ext in _JAVA_EXTS
|
|
103
|
+
tgt_ts = tgt_ext in _TS_EXTS
|
|
104
|
+
|
|
105
|
+
# Java/Kotlin ↔ TypeScript/JavaScript import is always spurious
|
|
106
|
+
if (src_java and tgt_ts) or (src_ts and tgt_java):
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
result.append(edge)
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def filter_cross_service(
|
|
114
|
+
edges: list[Edge],
|
|
115
|
+
nodes: dict[str, Node],
|
|
116
|
+
service_roots: dict[str, str], # node_id → service/project name
|
|
117
|
+
) -> list[Edge]:
|
|
118
|
+
"""Remove false cross-service import edges caused by name collisions.
|
|
119
|
+
|
|
120
|
+
For example: front-pms/Button ↔ front-pvms/Button should NOT be linked.
|
|
121
|
+
|
|
122
|
+
Preserved:
|
|
123
|
+
- calls_api, shares_db_entity (intentional cross-service connections)
|
|
124
|
+
- Non-import relations (calls, injects — kept for cross-service analysis)
|
|
125
|
+
- Edges to shared library nodes (heuristic: path contains 'shared', 'common', etc.)
|
|
126
|
+
- Edges where service affiliation is unknown
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
edges: list of all edges
|
|
130
|
+
nodes: node_id → Node mapping
|
|
131
|
+
service_roots: node_id → project/service name
|
|
132
|
+
"""
|
|
133
|
+
result: list[Edge] = []
|
|
134
|
+
for edge in edges:
|
|
135
|
+
if edge.relation in _PRESERVE_RELATIONS:
|
|
136
|
+
result.append(edge)
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if edge.relation not in _IMPORT_RELATIONS:
|
|
140
|
+
result.append(edge)
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
src_svc = service_roots.get(edge.source)
|
|
144
|
+
tgt_svc = service_roots.get(edge.target)
|
|
145
|
+
|
|
146
|
+
# Unknown affiliation → keep (conservative)
|
|
147
|
+
if not src_svc or not tgt_svc:
|
|
148
|
+
result.append(edge)
|
|
149
|
+
continue
|
|
150
|
+
|
|
151
|
+
# Same service → always keep
|
|
152
|
+
if src_svc == tgt_svc:
|
|
153
|
+
result.append(edge)
|
|
154
|
+
continue
|
|
155
|
+
|
|
156
|
+
# Different service import: check if target is a shared library
|
|
157
|
+
tgt_node = nodes.get(edge.target)
|
|
158
|
+
if tgt_node and _is_shared_lib(tgt_node.source_file):
|
|
159
|
+
result.append(edge)
|
|
160
|
+
continue
|
|
161
|
+
|
|
162
|
+
# Different service import to non-shared target → drop (likely false edge)
|
|
163
|
+
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _is_shared_lib(file_path: str) -> bool:
|
|
168
|
+
"""Heuristic: is this file in a shared/common/lib directory?"""
|
|
169
|
+
parts = {p.lower() for p in Path(file_path).parts}
|
|
170
|
+
return bool(parts & _SHARED_MARKERS)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Global symbol table for two-pass DI resolution (Pass 2).
|
|
2
|
+
|
|
3
|
+
SymbolTable:
|
|
4
|
+
- Pass 1: receives all extracted nodes, builds class_name → file_path + implements/extends maps
|
|
5
|
+
- Pass 2: resolves UnresolvedRef objects → concrete Edge objects
|
|
6
|
+
|
|
7
|
+
Resolution priority:
|
|
8
|
+
1. Interface → Impl mapping (Spring Boot @Service/@Component implements chain)
|
|
9
|
+
2. Direct class name match
|
|
10
|
+
3. Unresolved → return None
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
from codebeacon.common.types import Edge, Node, UnresolvedRef
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SymbolTable:
|
|
21
|
+
"""Manages global symbol mappings for cross-file dependency resolution."""
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
# class_name → [file_path, ...] (multiple definitions possible in monorepo)
|
|
25
|
+
self._class_map: dict[str, list[str]] = {}
|
|
26
|
+
# interface_name → [impl_class_name, ...]
|
|
27
|
+
self._implements_map: dict[str, list[str]] = {}
|
|
28
|
+
# All known node IDs
|
|
29
|
+
self._node_ids: set[str] = set()
|
|
30
|
+
|
|
31
|
+
def build(self, nodes: list[Node]) -> None:
|
|
32
|
+
"""Build symbol maps from a flat list of all extracted nodes.
|
|
33
|
+
|
|
34
|
+
Must be called after all Pass-1 extraction is complete.
|
|
35
|
+
"""
|
|
36
|
+
for node in nodes:
|
|
37
|
+
self._node_ids.add(node.id)
|
|
38
|
+
|
|
39
|
+
label = node.label
|
|
40
|
+
if label not in self._class_map:
|
|
41
|
+
self._class_map[label] = []
|
|
42
|
+
if node.id not in self._class_map[label]:
|
|
43
|
+
self._class_map[label].append(node.id)
|
|
44
|
+
|
|
45
|
+
# Register implements/extends relationships from metadata
|
|
46
|
+
meta = node.metadata or {}
|
|
47
|
+
for iface in meta.get("implements", []):
|
|
48
|
+
self._implements_map.setdefault(iface, [])
|
|
49
|
+
if label not in self._implements_map[iface]:
|
|
50
|
+
self._implements_map[iface].append(label)
|
|
51
|
+
for parent in meta.get("extends", []):
|
|
52
|
+
self._implements_map.setdefault(parent, [])
|
|
53
|
+
if label not in self._implements_map[parent]:
|
|
54
|
+
self._implements_map[parent].append(label)
|
|
55
|
+
|
|
56
|
+
def resolve_ref(self, ref: UnresolvedRef) -> Optional[Edge]:
|
|
57
|
+
"""Attempt to resolve a single UnresolvedRef into a concrete Edge.
|
|
58
|
+
|
|
59
|
+
Returns None if resolution fails.
|
|
60
|
+
"""
|
|
61
|
+
target_name = ref.ref_name
|
|
62
|
+
|
|
63
|
+
# Step 1: Try interface → impl mapping (Spring Boot / Laravel / Angular pattern)
|
|
64
|
+
impls = self._implements_map.get(target_name)
|
|
65
|
+
if impls:
|
|
66
|
+
chosen = impls[0]
|
|
67
|
+
if len(impls) > 1:
|
|
68
|
+
for impl in impls:
|
|
69
|
+
if impl.endswith("Impl") or impl.endswith("Implementation"):
|
|
70
|
+
chosen = impl
|
|
71
|
+
break
|
|
72
|
+
target_name = chosen
|
|
73
|
+
|
|
74
|
+
# Step 2: Direct class match
|
|
75
|
+
if target_name not in self._class_map:
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
# _class_map now stores node IDs; pick same-project target when possible
|
|
79
|
+
target_node_ids = self._class_map[target_name]
|
|
80
|
+
target_id = target_node_ids[0]
|
|
81
|
+
source_project = ref.source_node_id.split("::")[0] if "::" in ref.source_node_id else ""
|
|
82
|
+
for nid in target_node_ids:
|
|
83
|
+
if nid.startswith(source_project + "::"):
|
|
84
|
+
target_id = nid
|
|
85
|
+
break
|
|
86
|
+
|
|
87
|
+
is_interface_resolved = target_name != ref.ref_name
|
|
88
|
+
return Edge(
|
|
89
|
+
source=ref.source_node_id,
|
|
90
|
+
target=target_id,
|
|
91
|
+
relation="injects",
|
|
92
|
+
confidence="INFERRED" if is_interface_resolved else "EXTRACTED",
|
|
93
|
+
confidence_score=0.8 if is_interface_resolved else 1.0,
|
|
94
|
+
source_file=ref.source_node_id,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def resolve_all(
|
|
98
|
+
self, unresolved: list[UnresolvedRef]
|
|
99
|
+
) -> tuple[list[Edge], list[UnresolvedRef]]:
|
|
100
|
+
"""Resolve all UnresolvedRefs.
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
(resolved_edges, still_unresolved) tuple.
|
|
104
|
+
"""
|
|
105
|
+
resolved: list[Edge] = []
|
|
106
|
+
still_unresolved: list[UnresolvedRef] = []
|
|
107
|
+
for ref in unresolved:
|
|
108
|
+
edge = self.resolve_ref(ref)
|
|
109
|
+
if edge is not None:
|
|
110
|
+
resolved.append(edge)
|
|
111
|
+
else:
|
|
112
|
+
still_unresolved.append(ref)
|
|
113
|
+
return resolved, still_unresolved
|
|
114
|
+
|
|
115
|
+
def known_classes(self) -> set[str]:
|
|
116
|
+
"""Return the set of all known class/type names."""
|
|
117
|
+
return set(self._class_map.keys())
|
|
118
|
+
|
|
119
|
+
def known_node_ids(self) -> set[str]:
|
|
120
|
+
"""Return all registered node IDs."""
|
|
121
|
+
return set(self._node_ids)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""Core data types for codebeacon. All dataclasses use slots=True for memory efficiency."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(slots=True)
|
|
8
|
+
class Node:
|
|
9
|
+
id: str
|
|
10
|
+
label: str
|
|
11
|
+
type: str # "class", "method", "entity", "route", "component"
|
|
12
|
+
source_file: str
|
|
13
|
+
line: int
|
|
14
|
+
metadata: dict # framework-specific extras
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(slots=True)
|
|
18
|
+
class Edge:
|
|
19
|
+
source: str
|
|
20
|
+
target: str
|
|
21
|
+
relation: str # "imports", "calls", "injects", "calls_api", "shares_db_entity"
|
|
22
|
+
confidence: str # "EXTRACTED", "INFERRED", "UNRESOLVED"
|
|
23
|
+
confidence_score: float
|
|
24
|
+
source_file: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass(slots=True)
|
|
28
|
+
class UnresolvedRef:
|
|
29
|
+
source_node_id: str
|
|
30
|
+
ref_type: str # "autowired", "depends", "inject", "import"
|
|
31
|
+
ref_name: str # "AlertService", "get_db"
|
|
32
|
+
framework: str
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True)
|
|
36
|
+
class LocalExtractResult:
|
|
37
|
+
file_path: str
|
|
38
|
+
nodes: list # list[Node]
|
|
39
|
+
unresolved: list # list[UnresolvedRef]
|
|
40
|
+
imports: list # list[str] - raw import statements
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass(slots=True)
|
|
44
|
+
class RouteInfo:
|
|
45
|
+
method: str # "GET", "POST", "PUT", "DELETE", "PATCH", "ANY"
|
|
46
|
+
path: str # "/api/users/{id}"
|
|
47
|
+
handler: str # "UserController.getUser"
|
|
48
|
+
source_file: str
|
|
49
|
+
line: int
|
|
50
|
+
framework: str
|
|
51
|
+
prefix: str = "" # accumulated prefix from router.use() / Blueprint / etc.
|
|
52
|
+
tags: list = field(default_factory=list) # ["auth", "db", "cache"]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(slots=True)
|
|
56
|
+
class ServiceInfo:
|
|
57
|
+
name: str # "UserService"
|
|
58
|
+
class_name: str
|
|
59
|
+
source_file: str
|
|
60
|
+
line: int
|
|
61
|
+
framework: str
|
|
62
|
+
methods: list = field(default_factory=list) # list[str] - method names
|
|
63
|
+
dependencies: list = field(default_factory=list) # list[str] - injected type names (unresolved)
|
|
64
|
+
annotations: list = field(default_factory=list) # list[str] - @Service, @Injectable, etc.
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(slots=True)
|
|
68
|
+
class EntityInfo:
|
|
69
|
+
name: str # "User"
|
|
70
|
+
table_name: str # "users" or "" if not explicit
|
|
71
|
+
source_file: str
|
|
72
|
+
line: int
|
|
73
|
+
framework: str # "jpa", "django-orm", "sqlalchemy", "eloquent", "ef-core", "gorm", "active-record", "diesel", "sea-orm"
|
|
74
|
+
fields: list = field(default_factory=list) # list[dict]: {"name", "type", "annotations"}
|
|
75
|
+
relations: list = field(default_factory=list) # list[dict]: {"type": "hasMany", "target": "Order"}
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(slots=True)
|
|
79
|
+
class ComponentInfo:
|
|
80
|
+
name: str # "UserCard"
|
|
81
|
+
source_file: str
|
|
82
|
+
line: int
|
|
83
|
+
framework: str # "react", "vue", "svelte", "angular"
|
|
84
|
+
props: list = field(default_factory=list) # list[str] - prop names
|
|
85
|
+
hooks: list = field(default_factory=list) # list[str] - used hooks/composables
|
|
86
|
+
imports: list = field(default_factory=list) # list[str] - imported component names
|
|
87
|
+
is_page: bool = False # true if this is a route-level page component
|
|
88
|
+
route_path: str = "" # Next.js/Nuxt/SvelteKit derived route path
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass(slots=True)
|
|
92
|
+
class ProjectInfo:
|
|
93
|
+
name: str
|
|
94
|
+
path: str
|
|
95
|
+
framework: str # detected framework
|
|
96
|
+
language: str # primary language
|
|
97
|
+
signature_file: str # the file that triggered detection (pom.xml, package.json, etc.)
|
|
98
|
+
is_multi: bool = False
|
codebeacon/config.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""codebeacon.yaml loader and validator."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class ProjectConfig:
|
|
15
|
+
name: str
|
|
16
|
+
path: str
|
|
17
|
+
type: str = "auto" # framework type or "auto" for detection
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class OutputConfig:
|
|
22
|
+
dir: str = ".codebeacon"
|
|
23
|
+
wiki: bool = True
|
|
24
|
+
obsidian: bool = True
|
|
25
|
+
graph_html: bool = True
|
|
26
|
+
context_map_targets: list = field(default_factory=lambda: ["CLAUDE.md", ".cursorrules", "AGENTS.md"])
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class WaveConfig:
|
|
31
|
+
auto: bool = True
|
|
32
|
+
chunk_size: int = 300
|
|
33
|
+
max_parallel: int = 5
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class SemanticConfig:
|
|
38
|
+
enabled: bool = False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class CodebeaconConfig:
|
|
43
|
+
version: int
|
|
44
|
+
projects: list # list[ProjectConfig]
|
|
45
|
+
output: OutputConfig = field(default_factory=OutputConfig)
|
|
46
|
+
wave: WaveConfig = field(default_factory=WaveConfig)
|
|
47
|
+
semantic: SemanticConfig = field(default_factory=SemanticConfig)
|
|
48
|
+
config_file: str = "" # path to the loaded yaml file
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def load_config(path: str | Path) -> CodebeaconConfig:
|
|
52
|
+
"""Load and validate codebeacon.yaml from the given path."""
|
|
53
|
+
path = Path(path)
|
|
54
|
+
if not path.exists():
|
|
55
|
+
raise FileNotFoundError(f"Config file not found: {path}")
|
|
56
|
+
|
|
57
|
+
with open(path) as f:
|
|
58
|
+
raw = yaml.safe_load(f)
|
|
59
|
+
|
|
60
|
+
if not isinstance(raw, dict):
|
|
61
|
+
raise ValueError(f"Invalid config file: {path}")
|
|
62
|
+
|
|
63
|
+
version = raw.get("version", 1)
|
|
64
|
+
if version != 1:
|
|
65
|
+
raise ValueError(f"Unsupported config version: {version}. Expected 1.")
|
|
66
|
+
|
|
67
|
+
projects_raw = raw.get("projects", [])
|
|
68
|
+
if not isinstance(projects_raw, list) or not projects_raw:
|
|
69
|
+
raise ValueError("Config must contain at least one project under 'projects:'")
|
|
70
|
+
|
|
71
|
+
projects = []
|
|
72
|
+
for p in projects_raw:
|
|
73
|
+
if "name" not in p or "path" not in p:
|
|
74
|
+
raise ValueError(f"Project entry missing 'name' or 'path': {p}")
|
|
75
|
+
# Resolve path relative to config file location
|
|
76
|
+
proj_path = p["path"]
|
|
77
|
+
if not os.path.isabs(proj_path):
|
|
78
|
+
proj_path = str(path.parent / proj_path)
|
|
79
|
+
projects.append(ProjectConfig(
|
|
80
|
+
name=p["name"],
|
|
81
|
+
path=proj_path,
|
|
82
|
+
type=p.get("type", "auto"),
|
|
83
|
+
))
|
|
84
|
+
|
|
85
|
+
output_raw = raw.get("output", {})
|
|
86
|
+
context_map = output_raw.get("context_map", {})
|
|
87
|
+
output = OutputConfig(
|
|
88
|
+
dir=output_raw.get("dir", ".codebeacon"),
|
|
89
|
+
wiki=output_raw.get("wiki", True),
|
|
90
|
+
obsidian=output_raw.get("obsidian", True),
|
|
91
|
+
graph_html=output_raw.get("graph_html", True),
|
|
92
|
+
context_map_targets=context_map.get("targets", ["CLAUDE.md", ".cursorrules", "AGENTS.md"]),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
wave_raw = raw.get("wave", {})
|
|
96
|
+
wave = WaveConfig(
|
|
97
|
+
auto=wave_raw.get("auto", True),
|
|
98
|
+
chunk_size=wave_raw.get("chunk_size", 300),
|
|
99
|
+
max_parallel=wave_raw.get("max_parallel", 5),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
semantic_raw = raw.get("semantic", {})
|
|
103
|
+
semantic = SemanticConfig(
|
|
104
|
+
enabled=semantic_raw.get("enabled", False),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return CodebeaconConfig(
|
|
108
|
+
version=version,
|
|
109
|
+
projects=projects,
|
|
110
|
+
output=output,
|
|
111
|
+
wave=wave,
|
|
112
|
+
semantic=semantic,
|
|
113
|
+
config_file=str(path),
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def find_config(start_dir: str | Path) -> Optional[Path]:
|
|
118
|
+
"""Search for codebeacon.yaml starting from start_dir."""
|
|
119
|
+
start_dir = Path(start_dir)
|
|
120
|
+
candidates = [
|
|
121
|
+
start_dir / "codebeacon.yaml",
|
|
122
|
+
start_dir / "codebeacon.yml",
|
|
123
|
+
]
|
|
124
|
+
for c in candidates:
|
|
125
|
+
if c.exists():
|
|
126
|
+
return c
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def generate_config(projects: list, output_dir: str, config_path: str | Path) -> None:
|
|
131
|
+
"""Write an auto-generated codebeacon.yaml for multi-project scans."""
|
|
132
|
+
config_path = Path(config_path)
|
|
133
|
+
data = {
|
|
134
|
+
"version": 1,
|
|
135
|
+
"projects": [
|
|
136
|
+
{"name": p.name, "path": p.path, "type": p.framework}
|
|
137
|
+
for p in projects
|
|
138
|
+
],
|
|
139
|
+
"output": {"dir": output_dir},
|
|
140
|
+
"wave": {"auto": True, "chunk_size": 300, "max_parallel": 5},
|
|
141
|
+
"semantic": {"enabled": False},
|
|
142
|
+
}
|
|
143
|
+
with open(config_path, "w") as f:
|
|
144
|
+
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
File without changes
|