codebeacon 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebeacon/__init__.py +1 -0
- codebeacon/__main__.py +3 -0
- codebeacon/cache.py +136 -0
- codebeacon/cli.py +391 -0
- codebeacon/common/__init__.py +0 -0
- codebeacon/common/filters.py +170 -0
- codebeacon/common/symbols.py +121 -0
- codebeacon/common/types.py +98 -0
- codebeacon/config.py +144 -0
- codebeacon/contextmap/__init__.py +0 -0
- codebeacon/contextmap/generator.py +602 -0
- codebeacon/discover/__init__.py +0 -0
- codebeacon/discover/detector.py +388 -0
- codebeacon/discover/scanner.py +192 -0
- codebeacon/export/__init__.py +0 -0
- codebeacon/export/mcp.py +515 -0
- codebeacon/export/obsidian.py +812 -0
- codebeacon/extract/__init__.py +22 -0
- codebeacon/extract/base.py +372 -0
- codebeacon/extract/components.py +357 -0
- codebeacon/extract/dependencies.py +140 -0
- codebeacon/extract/entities.py +575 -0
- codebeacon/extract/queries/README.md +116 -0
- codebeacon/extract/queries/actix.scm +115 -0
- codebeacon/extract/queries/angular.scm +155 -0
- codebeacon/extract/queries/aspnet.scm +159 -0
- codebeacon/extract/queries/django.scm +122 -0
- codebeacon/extract/queries/express.scm +124 -0
- codebeacon/extract/queries/fastapi.scm +152 -0
- codebeacon/extract/queries/flask.scm +120 -0
- codebeacon/extract/queries/gin.scm +142 -0
- codebeacon/extract/queries/ktor.scm +144 -0
- codebeacon/extract/queries/laravel.scm +172 -0
- codebeacon/extract/queries/nestjs.scm +183 -0
- codebeacon/extract/queries/rails.scm +114 -0
- codebeacon/extract/queries/react.scm +111 -0
- codebeacon/extract/queries/spring_boot.scm +204 -0
- codebeacon/extract/queries/svelte.scm +73 -0
- codebeacon/extract/queries/vapor.scm +130 -0
- codebeacon/extract/queries/vue.scm +123 -0
- codebeacon/extract/routes.py +910 -0
- codebeacon/extract/semantic.py +280 -0
- codebeacon/extract/services.py +597 -0
- codebeacon/graph/__init__.py +1 -0
- codebeacon/graph/analyze.py +281 -0
- codebeacon/graph/build.py +320 -0
- codebeacon/graph/cluster.py +160 -0
- codebeacon/graph/enrich.py +206 -0
- codebeacon/skill/SKILL.md +127 -0
- codebeacon/wave.py +292 -0
- codebeacon/wiki/__init__.py +0 -0
- codebeacon/wiki/generator.py +376 -0
- codebeacon/wiki/index.py +95 -0
- codebeacon/wiki/templates.py +467 -0
- codebeacon-0.1.2.dist-info/METADATA +319 -0
- codebeacon-0.1.2.dist-info/RECORD +59 -0
- codebeacon-0.1.2.dist-info/WHEEL +4 -0
- codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
- codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
codebeacon/wave.py
ADDED
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
"""Automatic wave / segment processing (Pass 1).
|
|
2
|
+
|
|
3
|
+
auto_wave() splits source files into chunks and processes them in parallel
|
|
4
|
+
using a ThreadPoolExecutor. Each file is run through all extractors:
|
|
5
|
+
routes, services, entities, components, dependencies.
|
|
6
|
+
|
|
7
|
+
Results are merged into a WaveResult.
|
|
8
|
+
Pass 2 (symbol resolution + graph wiring) happens in graph/build.py after
|
|
9
|
+
all waves complete.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import concurrent.futures
|
|
15
|
+
import warnings
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any, Callable, Optional
|
|
18
|
+
|
|
19
|
+
from codebeacon.common.types import (
|
|
20
|
+
ComponentInfo,
|
|
21
|
+
Edge,
|
|
22
|
+
EntityInfo,
|
|
23
|
+
ProjectInfo,
|
|
24
|
+
RouteInfo,
|
|
25
|
+
ServiceInfo,
|
|
26
|
+
UnresolvedRef,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class WaveResult:
|
|
32
|
+
"""Aggregated Pass-1 extraction results across all chunks for one project."""
|
|
33
|
+
project: ProjectInfo
|
|
34
|
+
routes: list[RouteInfo] = field(default_factory=list)
|
|
35
|
+
services: list[ServiceInfo] = field(default_factory=list)
|
|
36
|
+
entities: list[EntityInfo] = field(default_factory=list)
|
|
37
|
+
components: list[ComponentInfo] = field(default_factory=list)
|
|
38
|
+
import_edges: list[Edge] = field(default_factory=list)
|
|
39
|
+
unresolved: list[UnresolvedRef] = field(default_factory=list)
|
|
40
|
+
file_count: int = 0
|
|
41
|
+
skipped_count: int = 0 # cache hits
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ── Single-file extraction ────────────────────────────────────────────────────
|
|
45
|
+
|
|
46
|
+
def _extract_file(
|
|
47
|
+
file_path: str,
|
|
48
|
+
framework: str,
|
|
49
|
+
project_path: str,
|
|
50
|
+
cache=None,
|
|
51
|
+
semantic: bool = False,
|
|
52
|
+
) -> Optional[dict]:
|
|
53
|
+
"""Run all extractors on a single file.
|
|
54
|
+
|
|
55
|
+
Returns a plain dict (JSON-serializable) or None on hard failure.
|
|
56
|
+
The dict has a '_cache_hit' key if the result came from cache.
|
|
57
|
+
"""
|
|
58
|
+
# Check cache before parsing
|
|
59
|
+
if cache is not None:
|
|
60
|
+
cached = cache.get(file_path)
|
|
61
|
+
if cached is not None:
|
|
62
|
+
return {"_cache_hit": True, **cached}
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
from codebeacon.extract.routes import extract_routes
|
|
66
|
+
from codebeacon.extract.services import extract_services
|
|
67
|
+
from codebeacon.extract.entities import extract_entities
|
|
68
|
+
from codebeacon.extract.components import extract_components
|
|
69
|
+
from codebeacon.extract.dependencies import extract_dependencies
|
|
70
|
+
|
|
71
|
+
routes = extract_routes(file_path, framework, project_path)
|
|
72
|
+
services, unresolved = extract_services(file_path, framework)
|
|
73
|
+
entities = extract_entities(file_path, framework)
|
|
74
|
+
components = extract_components(file_path, framework, project_path)
|
|
75
|
+
import_edges = extract_dependencies(file_path, framework)
|
|
76
|
+
|
|
77
|
+
# Optional semantic extraction (structured comment parsing)
|
|
78
|
+
semantic_edges: list[Edge] = []
|
|
79
|
+
if semantic:
|
|
80
|
+
from codebeacon.extract.semantic import extract_semantic_refs
|
|
81
|
+
semantic_edges = extract_semantic_refs(file_path, framework)
|
|
82
|
+
|
|
83
|
+
result: dict[str, Any] = {
|
|
84
|
+
"routes": [_route_to_dict(r) for r in routes],
|
|
85
|
+
"services": [_service_to_dict(s) for s in services],
|
|
86
|
+
"entities": [_entity_to_dict(e) for e in entities],
|
|
87
|
+
"components": [_component_to_dict(c) for c in components],
|
|
88
|
+
"import_edges": [_edge_to_dict(e) for e in import_edges + semantic_edges],
|
|
89
|
+
"unresolved": [_unresolved_to_dict(u) for u in unresolved],
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if cache is not None:
|
|
93
|
+
fh = cache.file_hash(file_path)
|
|
94
|
+
cache.put(file_path, result, fh)
|
|
95
|
+
|
|
96
|
+
return result
|
|
97
|
+
|
|
98
|
+
except Exception as exc:
|
|
99
|
+
warnings.warn(f"Extraction failed [{framework}] {file_path}: {exc}", stacklevel=2)
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
# ── Main public function ──────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
def auto_wave(
|
|
106
|
+
project: ProjectInfo,
|
|
107
|
+
files: list[str],
|
|
108
|
+
chunk_size: int = 300,
|
|
109
|
+
max_parallel: int = 5,
|
|
110
|
+
cache=None,
|
|
111
|
+
progress_callback: Optional[Callable[[int, int], None]] = None,
|
|
112
|
+
semantic: bool = False,
|
|
113
|
+
) -> WaveResult:
|
|
114
|
+
"""Process all files in parallel chunks and merge results (Pass 1).
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
project: the ProjectInfo for the project being scanned
|
|
118
|
+
files: absolute file paths to process
|
|
119
|
+
chunk_size: files per wave chunk (controls peak memory)
|
|
120
|
+
max_parallel: max ThreadPoolExecutor workers per chunk
|
|
121
|
+
cache: optional Cache instance for incremental processing
|
|
122
|
+
progress_callback: optional callable(processed_count, total_count)
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
WaveResult with all extraction data merged.
|
|
126
|
+
Pass 2 (symbol resolve + graph wiring) is NOT done here.
|
|
127
|
+
"""
|
|
128
|
+
wave_result = WaveResult(project=project, file_count=len(files))
|
|
129
|
+
|
|
130
|
+
if not files:
|
|
131
|
+
return wave_result
|
|
132
|
+
|
|
133
|
+
processed = 0
|
|
134
|
+
chunks = [files[i: i + chunk_size] for i in range(0, len(files), chunk_size)]
|
|
135
|
+
|
|
136
|
+
for chunk in chunks:
|
|
137
|
+
chunk_results = _process_chunk(chunk, project.framework, project.path, cache, max_parallel, semantic)
|
|
138
|
+
for file_result in chunk_results:
|
|
139
|
+
if file_result is None:
|
|
140
|
+
continue
|
|
141
|
+
if file_result.get("_cache_hit"):
|
|
142
|
+
wave_result.skipped_count += 1
|
|
143
|
+
_merge_file_result(file_result, wave_result)
|
|
144
|
+
|
|
145
|
+
processed += len(chunk)
|
|
146
|
+
if progress_callback:
|
|
147
|
+
progress_callback(processed, len(files))
|
|
148
|
+
|
|
149
|
+
return wave_result
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _process_chunk(
|
|
153
|
+
chunk: list[str],
|
|
154
|
+
framework: str,
|
|
155
|
+
project_path: str,
|
|
156
|
+
cache,
|
|
157
|
+
max_workers: int,
|
|
158
|
+
semantic: bool = False,
|
|
159
|
+
) -> list[Optional[dict]]:
|
|
160
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as pool:
|
|
161
|
+
futures = {
|
|
162
|
+
pool.submit(_extract_file, fp, framework, project_path, cache, semantic): fp
|
|
163
|
+
for fp in chunk
|
|
164
|
+
}
|
|
165
|
+
results: list[Optional[dict]] = []
|
|
166
|
+
for future in concurrent.futures.as_completed(futures):
|
|
167
|
+
try:
|
|
168
|
+
results.append(future.result())
|
|
169
|
+
except Exception as exc:
|
|
170
|
+
fp = futures[future]
|
|
171
|
+
warnings.warn(f"Chunk worker failed for {fp}: {exc}", stacklevel=2)
|
|
172
|
+
results.append(None)
|
|
173
|
+
return results
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _merge_file_result(result: dict, wave: WaveResult) -> None:
|
|
177
|
+
"""Merge one file's extraction dict into the WaveResult."""
|
|
178
|
+
for r in result.get("routes", []):
|
|
179
|
+
wave.routes.append(_dict_to_route(r))
|
|
180
|
+
for s in result.get("services", []):
|
|
181
|
+
wave.services.append(_dict_to_service(s))
|
|
182
|
+
for e in result.get("entities", []):
|
|
183
|
+
wave.entities.append(_dict_to_entity(e))
|
|
184
|
+
for c in result.get("components", []):
|
|
185
|
+
wave.components.append(_dict_to_component(c))
|
|
186
|
+
for e in result.get("import_edges", []):
|
|
187
|
+
wave.import_edges.append(_dict_to_edge(e))
|
|
188
|
+
for u in result.get("unresolved", []):
|
|
189
|
+
wave.unresolved.append(_dict_to_unresolved(u))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
# ── Serialisation helpers (dataclass ↔ JSON-safe dict) ───────────────────────
|
|
193
|
+
|
|
194
|
+
def _route_to_dict(r: RouteInfo) -> dict:
|
|
195
|
+
return {
|
|
196
|
+
"method": r.method, "path": r.path, "handler": r.handler,
|
|
197
|
+
"source_file": r.source_file, "line": r.line,
|
|
198
|
+
"framework": r.framework, "prefix": r.prefix,
|
|
199
|
+
"tags": list(r.tags),
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def _service_to_dict(s: ServiceInfo) -> dict:
|
|
203
|
+
return {
|
|
204
|
+
"name": s.name, "class_name": s.class_name,
|
|
205
|
+
"source_file": s.source_file, "line": s.line,
|
|
206
|
+
"framework": s.framework,
|
|
207
|
+
"methods": list(s.methods),
|
|
208
|
+
"dependencies": list(s.dependencies),
|
|
209
|
+
"annotations": list(s.annotations),
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
def _entity_to_dict(e: EntityInfo) -> dict:
|
|
213
|
+
return {
|
|
214
|
+
"name": e.name, "table_name": e.table_name,
|
|
215
|
+
"source_file": e.source_file, "line": e.line,
|
|
216
|
+
"framework": e.framework,
|
|
217
|
+
"fields": list(e.fields),
|
|
218
|
+
"relations": list(e.relations),
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
def _component_to_dict(c: ComponentInfo) -> dict:
|
|
222
|
+
return {
|
|
223
|
+
"name": c.name, "source_file": c.source_file, "line": c.line,
|
|
224
|
+
"framework": c.framework,
|
|
225
|
+
"props": list(c.props), "hooks": list(c.hooks), "imports": list(c.imports),
|
|
226
|
+
"is_page": c.is_page, "route_path": c.route_path,
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
def _edge_to_dict(e: Edge) -> dict:
|
|
230
|
+
return {
|
|
231
|
+
"source": e.source, "target": e.target,
|
|
232
|
+
"relation": e.relation, "confidence": e.confidence,
|
|
233
|
+
"confidence_score": e.confidence_score,
|
|
234
|
+
"source_file": e.source_file,
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
def _unresolved_to_dict(u: UnresolvedRef) -> dict:
|
|
238
|
+
return {
|
|
239
|
+
"source_node_id": u.source_node_id, "ref_type": u.ref_type,
|
|
240
|
+
"ref_name": u.ref_name, "framework": u.framework,
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _dict_to_route(d: dict) -> RouteInfo:
|
|
245
|
+
return RouteInfo(
|
|
246
|
+
method=d["method"], path=d["path"], handler=d["handler"],
|
|
247
|
+
source_file=d["source_file"], line=d["line"],
|
|
248
|
+
framework=d["framework"], prefix=d.get("prefix", ""),
|
|
249
|
+
tags=d.get("tags", []),
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
def _dict_to_service(d: dict) -> ServiceInfo:
|
|
253
|
+
return ServiceInfo(
|
|
254
|
+
name=d["name"], class_name=d["class_name"],
|
|
255
|
+
source_file=d["source_file"], line=d["line"],
|
|
256
|
+
framework=d["framework"],
|
|
257
|
+
methods=d.get("methods", []),
|
|
258
|
+
dependencies=d.get("dependencies", []),
|
|
259
|
+
annotations=d.get("annotations", []),
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def _dict_to_entity(d: dict) -> EntityInfo:
|
|
263
|
+
return EntityInfo(
|
|
264
|
+
name=d["name"], table_name=d["table_name"],
|
|
265
|
+
source_file=d["source_file"], line=d["line"],
|
|
266
|
+
framework=d["framework"],
|
|
267
|
+
fields=d.get("fields", []),
|
|
268
|
+
relations=d.get("relations", []),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
def _dict_to_component(d: dict) -> ComponentInfo:
|
|
272
|
+
return ComponentInfo(
|
|
273
|
+
name=d["name"], source_file=d["source_file"], line=d["line"],
|
|
274
|
+
framework=d["framework"],
|
|
275
|
+
props=d.get("props", []), hooks=d.get("hooks", []),
|
|
276
|
+
imports=d.get("imports", []),
|
|
277
|
+
is_page=d.get("is_page", False), route_path=d.get("route_path", ""),
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def _dict_to_edge(d: dict) -> Edge:
|
|
281
|
+
return Edge(
|
|
282
|
+
source=d["source"], target=d["target"],
|
|
283
|
+
relation=d["relation"], confidence=d["confidence"],
|
|
284
|
+
confidence_score=d["confidence_score"],
|
|
285
|
+
source_file=d["source_file"],
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def _dict_to_unresolved(d: dict) -> UnresolvedRef:
|
|
289
|
+
return UnresolvedRef(
|
|
290
|
+
source_node_id=d["source_node_id"], ref_type=d["ref_type"],
|
|
291
|
+
ref_name=d["ref_name"], framework=d["framework"],
|
|
292
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
"""Wiki generator: read the NetworkX graph, write per-project markdown articles.
|
|
2
|
+
|
|
3
|
+
Output structure:
|
|
4
|
+
<output_dir>/wiki/
|
|
5
|
+
index.md ← global index (short)
|
|
6
|
+
overview.md ← platform stats + cross-project
|
|
7
|
+
routes.md ← all routes table
|
|
8
|
+
cross-project/
|
|
9
|
+
connections.md ← cross-service edges
|
|
10
|
+
<project>/
|
|
11
|
+
index.md ← project index
|
|
12
|
+
routes.md ← project routes
|
|
13
|
+
controllers/<Name>.md
|
|
14
|
+
services/<Name>.md
|
|
15
|
+
entities/<Name>.md
|
|
16
|
+
components/<Name>.md
|
|
17
|
+
|
|
18
|
+
Public API:
|
|
19
|
+
generate_wiki(G, communities, output_dir) → None
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import os
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
import networkx as nx
|
|
29
|
+
|
|
30
|
+
from codebeacon.wiki import templates
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
# ── Classification helpers ────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
_CONTROLLER_ANNOTATIONS = frozenset({
|
|
36
|
+
# Spring
|
|
37
|
+
"@Controller", "@RestController",
|
|
38
|
+
# NestJS
|
|
39
|
+
"@Controller",
|
|
40
|
+
# ASP.NET
|
|
41
|
+
"[ApiController]", "[Controller]",
|
|
42
|
+
# Generic
|
|
43
|
+
"Controller", "RestController",
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
_CONTROLLER_NAME_SUFFIXES = ("Controller", "Router", "Handler", "Resource")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _is_controller(label: str, annotations: list[str]) -> bool:
|
|
50
|
+
"""Heuristic: is this class node a controller rather than a service?"""
|
|
51
|
+
if any(a in _CONTROLLER_ANNOTATIONS for a in annotations):
|
|
52
|
+
return True
|
|
53
|
+
return label.endswith(_CONTROLLER_NAME_SUFFIXES)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _safe_filename(label: str) -> str:
|
|
57
|
+
"""Strip characters that are unsafe in filenames."""
|
|
58
|
+
return "".join(c if c.isalnum() or c in "-_." else "_" for c in label)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# ── Node neighbour helpers ────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
_CALL_RELATIONS = frozenset({"calls", "injects", "depends"})
|
|
64
|
+
_ENTITY_TYPES = frozenset({"entity"})
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _predecessors_labels(G: nx.DiGraph, node_id: str, relations: frozenset[str]) -> list[str]:
|
|
68
|
+
"""Labels of predecessors connected via the given relation types."""
|
|
69
|
+
result = []
|
|
70
|
+
for pred in G.predecessors(node_id):
|
|
71
|
+
edge_data = G.edges[pred, node_id]
|
|
72
|
+
if edge_data.get("relation") in relations:
|
|
73
|
+
result.append(G.nodes[pred].get("label", pred))
|
|
74
|
+
return result
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _successors_labels(G: nx.DiGraph, node_id: str, relations: frozenset[str]) -> list[str]:
|
|
78
|
+
"""Labels of successors connected via the given relation types."""
|
|
79
|
+
result = []
|
|
80
|
+
for succ in G.successors(node_id):
|
|
81
|
+
edge_data = G.edges[node_id, succ]
|
|
82
|
+
if edge_data.get("relation") in relations:
|
|
83
|
+
result.append(G.nodes[succ].get("label", succ))
|
|
84
|
+
return result
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _related_entities(G: nx.DiGraph, node_id: str) -> list[str]:
|
|
88
|
+
"""Entity node labels reachable via imports/calls edges."""
|
|
89
|
+
result = []
|
|
90
|
+
for succ in G.successors(node_id):
|
|
91
|
+
if G.nodes[succ].get("type") in _ENTITY_TYPES:
|
|
92
|
+
result.append(G.nodes[succ].get("label", succ))
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ── Cross-project connections ─────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
def _cross_project_edges(G: nx.DiGraph) -> list[dict[str, Any]]:
|
|
99
|
+
"""Edges that cross project boundaries."""
|
|
100
|
+
result = []
|
|
101
|
+
for src, tgt, data in G.edges(data=True):
|
|
102
|
+
src_proj = G.nodes[src].get("project", "")
|
|
103
|
+
tgt_proj = G.nodes[tgt].get("project", "")
|
|
104
|
+
if src_proj and tgt_proj and src_proj != tgt_proj:
|
|
105
|
+
result.append({
|
|
106
|
+
"source": G.nodes[src].get("label", src),
|
|
107
|
+
"target": G.nodes[tgt].get("label", tgt),
|
|
108
|
+
"relation": data.get("relation", ""),
|
|
109
|
+
"source_project": src_proj,
|
|
110
|
+
"target_project": tgt_proj,
|
|
111
|
+
})
|
|
112
|
+
return result
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# ── Route collector ───────────────────────────────────────────────────────────
|
|
116
|
+
|
|
117
|
+
def _collect_routes(G: nx.DiGraph) -> dict[str, list[dict[str, Any]]]:
|
|
118
|
+
"""Collect route nodes grouped by project."""
|
|
119
|
+
routes_by_project: dict[str, list[dict[str, Any]]] = {}
|
|
120
|
+
for node_id, data in G.nodes(data=True):
|
|
121
|
+
if data.get("type") != "route":
|
|
122
|
+
continue
|
|
123
|
+
project = data.get("project", "_unknown")
|
|
124
|
+
routes_by_project.setdefault(project, []).append({
|
|
125
|
+
"method": data.get("method", ""),
|
|
126
|
+
"path": data.get("path", ""),
|
|
127
|
+
"handler": data.get("label", ""),
|
|
128
|
+
"source_file": data.get("source_file", ""),
|
|
129
|
+
"framework": data.get("framework", ""),
|
|
130
|
+
"tags": data.get("tags", []),
|
|
131
|
+
})
|
|
132
|
+
return routes_by_project
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ── Main generator ────────────────────────────────────────────────────────────
|
|
136
|
+
|
|
137
|
+
def generate_wiki(
|
|
138
|
+
G: nx.DiGraph,
|
|
139
|
+
communities: dict[str, int],
|
|
140
|
+
output_dir: str | Path,
|
|
141
|
+
) -> None:
|
|
142
|
+
"""Generate full wiki from the knowledge graph.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
G: built NetworkX DiGraph (output of graph/build.py + enrich.py)
|
|
146
|
+
communities: node_id → community_id (output of graph/cluster.py)
|
|
147
|
+
output_dir: root output directory (e.g. /path/to/project/.codebeacon)
|
|
148
|
+
"""
|
|
149
|
+
wiki_dir = Path(output_dir) / "wiki"
|
|
150
|
+
wiki_dir.mkdir(parents=True, exist_ok=True)
|
|
151
|
+
|
|
152
|
+
# Group nodes by project and type
|
|
153
|
+
projects: dict[str, dict[str, list[tuple[str, dict]]]] = {}
|
|
154
|
+
# project → type → [(node_id, data)]
|
|
155
|
+
|
|
156
|
+
for node_id, data in G.nodes(data=True):
|
|
157
|
+
project = data.get("project", "_unknown")
|
|
158
|
+
ntype = data.get("type", "unknown")
|
|
159
|
+
projects.setdefault(project, {}).setdefault(ntype, []).append((node_id, data))
|
|
160
|
+
|
|
161
|
+
# Collect routes for routes.md (all projects)
|
|
162
|
+
routes_by_project = _collect_routes(G)
|
|
163
|
+
|
|
164
|
+
# Per-project stats accumulator for overview
|
|
165
|
+
project_summary: list[dict[str, Any]] = []
|
|
166
|
+
|
|
167
|
+
for project_name, type_map in sorted(projects.items()):
|
|
168
|
+
proj_dir = wiki_dir / project_name
|
|
169
|
+
_write_project(G, project_name, type_map, routes_by_project, proj_dir)
|
|
170
|
+
|
|
171
|
+
# Collect summary stats
|
|
172
|
+
route_count = len(routes_by_project.get(project_name, []))
|
|
173
|
+
service_count = 0
|
|
174
|
+
entity_count = 0
|
|
175
|
+
component_count = 0
|
|
176
|
+
framework = ""
|
|
177
|
+
|
|
178
|
+
for node_id, data in type_map.get("class", []):
|
|
179
|
+
annotations = data.get("annotations", [])
|
|
180
|
+
if not _is_controller(data.get("label", ""), annotations):
|
|
181
|
+
service_count += 1
|
|
182
|
+
fw = data.get("framework", "")
|
|
183
|
+
if fw:
|
|
184
|
+
framework = fw
|
|
185
|
+
|
|
186
|
+
entity_count = len(type_map.get("entity", []))
|
|
187
|
+
component_count = len(type_map.get("component", []))
|
|
188
|
+
|
|
189
|
+
project_summary.append({
|
|
190
|
+
"name": project_name,
|
|
191
|
+
"framework": framework,
|
|
192
|
+
"route_count": route_count,
|
|
193
|
+
"service_count": service_count,
|
|
194
|
+
"entity_count": entity_count,
|
|
195
|
+
"component_count": component_count,
|
|
196
|
+
})
|
|
197
|
+
|
|
198
|
+
# Global stats
|
|
199
|
+
total_routes = sum(len(rs) for rs in routes_by_project.values())
|
|
200
|
+
total_services = sum(p["service_count"] for p in project_summary)
|
|
201
|
+
total_entities = sum(p["entity_count"] for p in project_summary)
|
|
202
|
+
total_components = sum(p["component_count"] for p in project_summary)
|
|
203
|
+
total_stats = {
|
|
204
|
+
"nodes": G.number_of_nodes(),
|
|
205
|
+
"edges": G.number_of_edges(),
|
|
206
|
+
"communities": len(set(communities.values())) if communities else 0,
|
|
207
|
+
"routes": total_routes,
|
|
208
|
+
"services": total_services,
|
|
209
|
+
"entities": total_entities,
|
|
210
|
+
"components": total_components,
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# Cross-project connections
|
|
214
|
+
cross_edges = _cross_project_edges(G)
|
|
215
|
+
|
|
216
|
+
# Write global files (delegated to index.py's generate_index)
|
|
217
|
+
from codebeacon.wiki.index import generate_index
|
|
218
|
+
generate_index(
|
|
219
|
+
wiki_dir=wiki_dir,
|
|
220
|
+
project_summary=project_summary,
|
|
221
|
+
routes_by_project=routes_by_project,
|
|
222
|
+
cross_edges=cross_edges,
|
|
223
|
+
total_stats=total_stats,
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# ── Per-project writer ────────────────────────────────────────────────────────
|
|
228
|
+
|
|
229
|
+
def _write_project(
|
|
230
|
+
G: nx.DiGraph,
|
|
231
|
+
project_name: str,
|
|
232
|
+
type_map: dict[str, list[tuple[str, dict]]],
|
|
233
|
+
routes_by_project: dict[str, list[dict[str, Any]]],
|
|
234
|
+
proj_dir: Path,
|
|
235
|
+
) -> None:
|
|
236
|
+
"""Write all wiki files for one project."""
|
|
237
|
+
proj_dir.mkdir(parents=True, exist_ok=True)
|
|
238
|
+
|
|
239
|
+
controllers: list[str] = []
|
|
240
|
+
services: list[str] = []
|
|
241
|
+
|
|
242
|
+
# Class nodes → controller or service
|
|
243
|
+
for node_id, data in type_map.get("class", []):
|
|
244
|
+
label = data.get("label", node_id)
|
|
245
|
+
annotations = data.get("annotations", [])
|
|
246
|
+
methods = data.get("methods", [])
|
|
247
|
+
dependencies = data.get("dependencies", [])
|
|
248
|
+
source_file = data.get("source_file", "")
|
|
249
|
+
framework = data.get("framework", "")
|
|
250
|
+
|
|
251
|
+
called_by = _predecessors_labels(G, node_id, _CALL_RELATIONS)
|
|
252
|
+
calls = _successors_labels(G, node_id, _CALL_RELATIONS)
|
|
253
|
+
|
|
254
|
+
if _is_controller(label, annotations):
|
|
255
|
+
controllers.append(label)
|
|
256
|
+
# Gather routes for this controller
|
|
257
|
+
ctrl_routes = [
|
|
258
|
+
r for r in routes_by_project.get(project_name, [])
|
|
259
|
+
if label in r.get("handler", "")
|
|
260
|
+
]
|
|
261
|
+
content = templates.controller_article(
|
|
262
|
+
label=label,
|
|
263
|
+
routes=ctrl_routes,
|
|
264
|
+
source_file=source_file,
|
|
265
|
+
called_by=called_by,
|
|
266
|
+
calls=calls,
|
|
267
|
+
project_name=project_name,
|
|
268
|
+
)
|
|
269
|
+
_write_file(proj_dir / "controllers" / f"{_safe_filename(label)}.md", content)
|
|
270
|
+
else:
|
|
271
|
+
services.append(label)
|
|
272
|
+
entities = _related_entities(G, node_id)
|
|
273
|
+
content = templates.service_article(
|
|
274
|
+
label=label,
|
|
275
|
+
methods=methods,
|
|
276
|
+
dependencies=dependencies,
|
|
277
|
+
source_file=source_file,
|
|
278
|
+
called_by=called_by,
|
|
279
|
+
calls=calls,
|
|
280
|
+
related_entities=entities,
|
|
281
|
+
annotations=annotations,
|
|
282
|
+
project_name=project_name,
|
|
283
|
+
)
|
|
284
|
+
_write_file(proj_dir / "services" / f"{_safe_filename(label)}.md", content)
|
|
285
|
+
|
|
286
|
+
# Entity nodes
|
|
287
|
+
entity_names: list[str] = []
|
|
288
|
+
for node_id, data in type_map.get("entity", []):
|
|
289
|
+
label = data.get("label", node_id)
|
|
290
|
+
entity_names.append(label)
|
|
291
|
+
table_name = data.get("table_name", "")
|
|
292
|
+
fields = data.get("fields", [])
|
|
293
|
+
relations = data.get("relations", [])
|
|
294
|
+
source_file = data.get("source_file", "")
|
|
295
|
+
framework = data.get("framework", "")
|
|
296
|
+
used_by = _predecessors_labels(G, node_id, frozenset({"imports", "imports_from", "calls"}))
|
|
297
|
+
|
|
298
|
+
content = templates.entity_article(
|
|
299
|
+
label=label,
|
|
300
|
+
table_name=table_name,
|
|
301
|
+
fields=fields,
|
|
302
|
+
relations=relations,
|
|
303
|
+
source_file=source_file,
|
|
304
|
+
used_by=used_by,
|
|
305
|
+
framework=framework,
|
|
306
|
+
project_name=project_name,
|
|
307
|
+
)
|
|
308
|
+
_write_file(proj_dir / "entities" / f"{_safe_filename(label)}.md", content)
|
|
309
|
+
|
|
310
|
+
# Component nodes
|
|
311
|
+
component_names: list[str] = []
|
|
312
|
+
for node_id, data in type_map.get("component", []):
|
|
313
|
+
label = data.get("label", node_id)
|
|
314
|
+
component_names.append(label)
|
|
315
|
+
props = data.get("props", [])
|
|
316
|
+
hooks = data.get("hooks", [])
|
|
317
|
+
imports_list = data.get("imports", [])
|
|
318
|
+
is_page = data.get("is_page", False)
|
|
319
|
+
route_path = data.get("route_path", "")
|
|
320
|
+
source_file = data.get("source_file", "")
|
|
321
|
+
framework = data.get("framework", "")
|
|
322
|
+
|
|
323
|
+
content = templates.component_article(
|
|
324
|
+
label=label,
|
|
325
|
+
props=props,
|
|
326
|
+
hooks=hooks,
|
|
327
|
+
imports=imports_list,
|
|
328
|
+
is_page=is_page,
|
|
329
|
+
route_path=route_path,
|
|
330
|
+
source_file=source_file,
|
|
331
|
+
framework=framework,
|
|
332
|
+
project_name=project_name,
|
|
333
|
+
)
|
|
334
|
+
_write_file(proj_dir / "components" / f"{_safe_filename(label)}.md", content)
|
|
335
|
+
|
|
336
|
+
# Detect framework from any node in this project
|
|
337
|
+
framework = ""
|
|
338
|
+
for type_nodes in type_map.values():
|
|
339
|
+
for _, data in type_nodes:
|
|
340
|
+
fw = data.get("framework", "")
|
|
341
|
+
if fw:
|
|
342
|
+
framework = fw
|
|
343
|
+
break
|
|
344
|
+
if framework:
|
|
345
|
+
break
|
|
346
|
+
|
|
347
|
+
# Per-project routes.md
|
|
348
|
+
proj_routes = routes_by_project.get(project_name, [])
|
|
349
|
+
if proj_routes:
|
|
350
|
+
content = templates.routes_summary({project_name: proj_routes})
|
|
351
|
+
_write_file(proj_dir / "routes.md", content)
|
|
352
|
+
|
|
353
|
+
# Per-project index.md
|
|
354
|
+
stats = {
|
|
355
|
+
"routes": len(proj_routes),
|
|
356
|
+
"services": len(services),
|
|
357
|
+
"entities": len(entity_names),
|
|
358
|
+
"components": len(component_names),
|
|
359
|
+
}
|
|
360
|
+
content = templates.project_index(
|
|
361
|
+
project_name=project_name,
|
|
362
|
+
framework=framework,
|
|
363
|
+
stats=stats,
|
|
364
|
+
controllers=controllers,
|
|
365
|
+
services=services,
|
|
366
|
+
entities=entity_names,
|
|
367
|
+
components=component_names,
|
|
368
|
+
)
|
|
369
|
+
_write_file(proj_dir / "index.md", content)
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
# ── File writer ───────────────────────────────────────────────────────────────
|
|
373
|
+
|
|
374
|
+
def _write_file(path: Path, content: str) -> None:
|
|
375
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
376
|
+
path.write_text(content, encoding="utf-8")
|