codebeacon 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebeacon/__init__.py +1 -0
- codebeacon/__main__.py +3 -0
- codebeacon/cache.py +136 -0
- codebeacon/cli.py +391 -0
- codebeacon/common/__init__.py +0 -0
- codebeacon/common/filters.py +170 -0
- codebeacon/common/symbols.py +121 -0
- codebeacon/common/types.py +98 -0
- codebeacon/config.py +144 -0
- codebeacon/contextmap/__init__.py +0 -0
- codebeacon/contextmap/generator.py +602 -0
- codebeacon/discover/__init__.py +0 -0
- codebeacon/discover/detector.py +388 -0
- codebeacon/discover/scanner.py +192 -0
- codebeacon/export/__init__.py +0 -0
- codebeacon/export/mcp.py +515 -0
- codebeacon/export/obsidian.py +812 -0
- codebeacon/extract/__init__.py +22 -0
- codebeacon/extract/base.py +372 -0
- codebeacon/extract/components.py +357 -0
- codebeacon/extract/dependencies.py +140 -0
- codebeacon/extract/entities.py +575 -0
- codebeacon/extract/queries/README.md +116 -0
- codebeacon/extract/queries/actix.scm +115 -0
- codebeacon/extract/queries/angular.scm +155 -0
- codebeacon/extract/queries/aspnet.scm +159 -0
- codebeacon/extract/queries/django.scm +122 -0
- codebeacon/extract/queries/express.scm +124 -0
- codebeacon/extract/queries/fastapi.scm +152 -0
- codebeacon/extract/queries/flask.scm +120 -0
- codebeacon/extract/queries/gin.scm +142 -0
- codebeacon/extract/queries/ktor.scm +144 -0
- codebeacon/extract/queries/laravel.scm +172 -0
- codebeacon/extract/queries/nestjs.scm +183 -0
- codebeacon/extract/queries/rails.scm +114 -0
- codebeacon/extract/queries/react.scm +111 -0
- codebeacon/extract/queries/spring_boot.scm +204 -0
- codebeacon/extract/queries/svelte.scm +73 -0
- codebeacon/extract/queries/vapor.scm +130 -0
- codebeacon/extract/queries/vue.scm +123 -0
- codebeacon/extract/routes.py +910 -0
- codebeacon/extract/semantic.py +280 -0
- codebeacon/extract/services.py +597 -0
- codebeacon/graph/__init__.py +1 -0
- codebeacon/graph/analyze.py +281 -0
- codebeacon/graph/build.py +320 -0
- codebeacon/graph/cluster.py +160 -0
- codebeacon/graph/enrich.py +206 -0
- codebeacon/skill/SKILL.md +127 -0
- codebeacon/wave.py +292 -0
- codebeacon/wiki/__init__.py +0 -0
- codebeacon/wiki/generator.py +376 -0
- codebeacon/wiki/index.py +95 -0
- codebeacon/wiki/templates.py +467 -0
- codebeacon-0.1.2.dist-info/METADATA +319 -0
- codebeacon-0.1.2.dist-info/RECORD +59 -0
- codebeacon-0.1.2.dist-info/WHEEL +4 -0
- codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
- codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,812 @@
|
|
|
1
|
+
"""Obsidian vault export with 12-step post-processing.
|
|
2
|
+
|
|
3
|
+
Generates a fully organised Obsidian vault from the codebeacon knowledge graph.
|
|
4
|
+
|
|
5
|
+
Public API:
|
|
6
|
+
generate_obsidian_vault(G, communities, output_dir, obsidian_dir=None) → int
|
|
7
|
+
Returns number of notes written.
|
|
8
|
+
|
|
9
|
+
12-step pipeline:
|
|
10
|
+
(1) Basic note generation — one .md per graph node
|
|
11
|
+
(2) Broken wikilink fix — case-mismatch normalisation
|
|
12
|
+
(3) Cross-language imports edge removal — Java↔TS/TSX, preserves calls_api
|
|
13
|
+
(4) Community tag fix — Community_N → service folder name from source_file
|
|
14
|
+
(5) source_file-based service subfolder move
|
|
15
|
+
(6) Same source_file dedup — priority: .java.md > bare > _N
|
|
16
|
+
(7) Members section injection — from methods/fields metadata
|
|
17
|
+
(8) Remaining root-level notes → service folder
|
|
18
|
+
(9) Service index hub note creation + backlinks from all notes
|
|
19
|
+
(10) Wikilink qualification — [[X]] → [[svc/X]]
|
|
20
|
+
(11) Cross-service false link removal — preserves calls_api / shares_db_entity
|
|
21
|
+
(12) .obsidian/graph.json colour groups — one colour per service
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import hashlib
|
|
27
|
+
import json
|
|
28
|
+
import re
|
|
29
|
+
import shutil
|
|
30
|
+
from collections import defaultdict
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any
|
|
33
|
+
|
|
34
|
+
import networkx as nx
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
# ── Regexes ────────────────────────────────────────────────────────────────────
|
|
38
|
+
|
|
39
|
+
_SOURCE_RE = re.compile(r'^source_file:\s*["\']([^"\']*)["\']', re.MULTILINE)
|
|
40
|
+
_PROJECT_RE = re.compile(r'^community:\s*["\']([^"\']*)["\']', re.MULTILINE)
|
|
41
|
+
_COMM_FRONT = re.compile(r'^community:\s*["\'][^"\']*["\']', re.MULTILINE)
|
|
42
|
+
_COMM_YAML = re.compile(r' - community/[^\n]+\n')
|
|
43
|
+
_COMM_BODY = re.compile(r'#community/\S+')
|
|
44
|
+
_WIKILINK_RE = re.compile(r'\[\[([^\]/|#\]]+?)\]\]')
|
|
45
|
+
_SUFFIX_RE = re.compile(r'^(.+?)_\d+$')
|
|
46
|
+
|
|
47
|
+
_IMPORT_RELS = frozenset({"imports", "imports_from"})
|
|
48
|
+
_KEEP_RELS = frozenset({"calls_api", "shares_db_entity"}) # always preserved cross-service
|
|
49
|
+
|
|
50
|
+
# Language-file-extension sets for cross-language filter
|
|
51
|
+
_JAVA_EXTS = frozenset({".java", ".kt"})
|
|
52
|
+
_TS_EXTS = frozenset({".ts", ".tsx", ".js", ".jsx"})
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# ── Public entry point ─────────────────────────────────────────────────────────
|
|
56
|
+
|
|
57
|
+
def generate_obsidian_vault(
|
|
58
|
+
G: nx.DiGraph,
|
|
59
|
+
communities: dict[str, int],
|
|
60
|
+
output_dir: str | Path,
|
|
61
|
+
obsidian_dir: str | Path | None = None,
|
|
62
|
+
) -> int:
|
|
63
|
+
"""Generate a fully post-processed Obsidian vault.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
G: knowledge graph (output of graph/build.py + enrich.py)
|
|
67
|
+
communities: node_id → community_id mapping
|
|
68
|
+
output_dir: codebeacon output root (.codebeacon/)
|
|
69
|
+
obsidian_dir: override vault path; defaults to output_dir/obsidian/
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Total number of notes written.
|
|
73
|
+
"""
|
|
74
|
+
vault = Path(obsidian_dir) if obsidian_dir else Path(output_dir) / "obsidian"
|
|
75
|
+
vault.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
|
|
77
|
+
# Step 1 — basic note generation
|
|
78
|
+
_step1_generate_notes(G, communities, vault)
|
|
79
|
+
|
|
80
|
+
# Step 2 — broken wikilink normalisation
|
|
81
|
+
_step2_fix_wikilinks(vault)
|
|
82
|
+
|
|
83
|
+
# Step 3 — cross-language imports removal (Java↔TS)
|
|
84
|
+
_step3_remove_cross_language(vault)
|
|
85
|
+
|
|
86
|
+
# Step 4 — Community_N tag → service folder name
|
|
87
|
+
_step4_fix_community_tags(vault)
|
|
88
|
+
|
|
89
|
+
# Step 5 — move notes to service subfolders
|
|
90
|
+
_step5_move_to_subfolders(vault)
|
|
91
|
+
|
|
92
|
+
# Step 6 — deduplicate same source_file notes
|
|
93
|
+
_step6_dedup_notes(vault)
|
|
94
|
+
|
|
95
|
+
# Step 7 — inject Members section from methods/fields
|
|
96
|
+
_step7_inject_members(G, vault)
|
|
97
|
+
|
|
98
|
+
# Step 8 — move any remaining root-level notes
|
|
99
|
+
_step8_move_remaining(vault)
|
|
100
|
+
|
|
101
|
+
# Step 9 — service index hub notes + backlinks
|
|
102
|
+
_step9_hub_notes(vault)
|
|
103
|
+
|
|
104
|
+
# Step 10 — qualify wikilinks [[X]] → [[svc/X]]
|
|
105
|
+
_step10_qualify_wikilinks(vault)
|
|
106
|
+
|
|
107
|
+
# Step 11 — remove cross-service false links
|
|
108
|
+
_step11_remove_cross_service_links(vault)
|
|
109
|
+
|
|
110
|
+
# Step 12 — write .obsidian/graph.json
|
|
111
|
+
_step12_graph_json(vault)
|
|
112
|
+
|
|
113
|
+
total = sum(1 for _ in vault.rglob("*.md"))
|
|
114
|
+
return total
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ── Step 1: Generate notes ─────────────────────────────────────────────────────
|
|
118
|
+
|
|
119
|
+
def _step1_generate_notes(
|
|
120
|
+
G: nx.DiGraph,
|
|
121
|
+
communities: dict[str, int],
|
|
122
|
+
vault: Path,
|
|
123
|
+
) -> None:
|
|
124
|
+
"""One Obsidian note per graph node (skipping external stubs)."""
|
|
125
|
+
|
|
126
|
+
# Build edge index: node_id → [(neighbour_id, edge_data, direction)]
|
|
127
|
+
# direction: "out" = G[node→neighbour], "in" = G[neighbour→node]
|
|
128
|
+
out_edges: dict[str, list[tuple[str, dict]]] = defaultdict(list)
|
|
129
|
+
in_edges: dict[str, list[tuple[str, dict]]] = defaultdict(list)
|
|
130
|
+
|
|
131
|
+
for src, tgt, data in G.edges(data=True):
|
|
132
|
+
out_edges[src].append((tgt, data))
|
|
133
|
+
in_edges[tgt].append((src, data))
|
|
134
|
+
|
|
135
|
+
for node_id, data in G.nodes(data=True):
|
|
136
|
+
ntype = data.get("type", "unknown")
|
|
137
|
+
if ntype == "external":
|
|
138
|
+
continue # skip stub nodes
|
|
139
|
+
|
|
140
|
+
project = data.get("project", "_unknown")
|
|
141
|
+
label = data.get("label", node_id)
|
|
142
|
+
source_file = data.get("source_file", "")
|
|
143
|
+
framework = data.get("framework", "")
|
|
144
|
+
community_id = communities.get(node_id, -1)
|
|
145
|
+
|
|
146
|
+
note_name = _safe_note_name(label)
|
|
147
|
+
content = _build_note(
|
|
148
|
+
node_id = node_id,
|
|
149
|
+
label = label,
|
|
150
|
+
ntype = ntype,
|
|
151
|
+
data = data,
|
|
152
|
+
project = project,
|
|
153
|
+
source_file = source_file,
|
|
154
|
+
framework = framework,
|
|
155
|
+
community_id = community_id,
|
|
156
|
+
out_edges = out_edges.get(node_id, []),
|
|
157
|
+
in_edges = in_edges.get(node_id, []),
|
|
158
|
+
G = G,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
note_path = vault / f"{note_name}.md"
|
|
162
|
+
note_path.write_text(content, encoding="utf-8")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _build_note(
|
|
166
|
+
node_id: str,
|
|
167
|
+
label: str,
|
|
168
|
+
ntype: str,
|
|
169
|
+
data: dict,
|
|
170
|
+
project: str,
|
|
171
|
+
source_file: str,
|
|
172
|
+
framework: str,
|
|
173
|
+
community_id: int,
|
|
174
|
+
out_edges: list[tuple[str, dict]],
|
|
175
|
+
in_edges: list[tuple[str, dict]],
|
|
176
|
+
G: nx.DiGraph,
|
|
177
|
+
) -> str:
|
|
178
|
+
"""Render a single Obsidian note from node data."""
|
|
179
|
+
|
|
180
|
+
# ── Frontmatter ──
|
|
181
|
+
lines = [
|
|
182
|
+
"---",
|
|
183
|
+
f'source_file: "{source_file}"',
|
|
184
|
+
f'type: "code"',
|
|
185
|
+
f'community: "{project}"',
|
|
186
|
+
"tags:",
|
|
187
|
+
" - codebeacon/code",
|
|
188
|
+
f" - codebeacon/{ntype}",
|
|
189
|
+
" - codebeacon/EXTRACTED",
|
|
190
|
+
f" - community/{project}",
|
|
191
|
+
"---",
|
|
192
|
+
"",
|
|
193
|
+
f"# {label}",
|
|
194
|
+
"",
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
# ── Type header ──
|
|
198
|
+
type_label = _type_display(ntype, data, framework)
|
|
199
|
+
lines.append(f"**Type:** {type_label}")
|
|
200
|
+
if framework:
|
|
201
|
+
lines.append(f"**Framework:** {framework}")
|
|
202
|
+
if source_file:
|
|
203
|
+
lines.append(f"**Source:** `{source_file}`")
|
|
204
|
+
lines.append("")
|
|
205
|
+
|
|
206
|
+
# ── Type-specific body ──
|
|
207
|
+
if ntype == "class":
|
|
208
|
+
_append_class_body(lines, data)
|
|
209
|
+
elif ntype == "entity":
|
|
210
|
+
_append_entity_body(lines, data)
|
|
211
|
+
elif ntype == "component":
|
|
212
|
+
_append_component_body(lines, data)
|
|
213
|
+
elif ntype == "route":
|
|
214
|
+
_append_route_body(lines, data)
|
|
215
|
+
|
|
216
|
+
# ── Connections ──
|
|
217
|
+
all_conn_lines = []
|
|
218
|
+
|
|
219
|
+
# Outgoing edges
|
|
220
|
+
for tgt_id, edata in sorted(out_edges, key=lambda x: x[0]):
|
|
221
|
+
tgt_data = G.nodes.get(tgt_id, {})
|
|
222
|
+
if tgt_data.get("type") == "external":
|
|
223
|
+
continue
|
|
224
|
+
tgt_label = tgt_data.get("label", tgt_id)
|
|
225
|
+
tgt_name = _safe_note_name(tgt_label)
|
|
226
|
+
relation = edata.get("relation", "")
|
|
227
|
+
conf = edata.get("confidence", "EXTRACTED")
|
|
228
|
+
all_conn_lines.append(f"- [[{tgt_name}]] - `{relation}` [{conf}]")
|
|
229
|
+
|
|
230
|
+
# Incoming edges (reverse direction labelled)
|
|
231
|
+
for src_id, edata in sorted(in_edges, key=lambda x: x[0]):
|
|
232
|
+
src_data = G.nodes.get(src_id, {})
|
|
233
|
+
if src_data.get("type") == "external":
|
|
234
|
+
continue
|
|
235
|
+
src_label = src_data.get("label", src_id)
|
|
236
|
+
src_name = _safe_note_name(src_label)
|
|
237
|
+
relation = edata.get("relation", "")
|
|
238
|
+
conf = edata.get("confidence", "EXTRACTED")
|
|
239
|
+
# Label incoming as reverse perspective
|
|
240
|
+
reverse = _reverse_relation(relation)
|
|
241
|
+
all_conn_lines.append(f"- [[{src_name}]] - `{reverse}` [{conf}]")
|
|
242
|
+
|
|
243
|
+
if all_conn_lines:
|
|
244
|
+
lines += ["## Connections", ""]
|
|
245
|
+
lines += sorted(set(all_conn_lines))
|
|
246
|
+
lines.append("")
|
|
247
|
+
|
|
248
|
+
# ── Footer tags + service backlink ──
|
|
249
|
+
lines.append(f"#codebeacon/code #codebeacon/{ntype} #community/{project}")
|
|
250
|
+
lines.append("")
|
|
251
|
+
lines.append(f"**Service:** [[{project}]]")
|
|
252
|
+
|
|
253
|
+
return "\n".join(lines) + "\n"
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _append_class_body(lines: list[str], data: dict) -> None:
|
|
257
|
+
annotations = data.get("annotations", [])
|
|
258
|
+
methods = data.get("methods", [])
|
|
259
|
+
dependencies = data.get("dependencies", [])
|
|
260
|
+
|
|
261
|
+
if annotations:
|
|
262
|
+
lines += ["### Annotations", ""]
|
|
263
|
+
for ann in annotations:
|
|
264
|
+
lines.append(f"- `{ann}`")
|
|
265
|
+
lines.append("")
|
|
266
|
+
|
|
267
|
+
if methods:
|
|
268
|
+
lines += ["### Methods", ""]
|
|
269
|
+
for m in methods:
|
|
270
|
+
lines.append(f"- `{m}()`")
|
|
271
|
+
lines.append("")
|
|
272
|
+
|
|
273
|
+
if dependencies:
|
|
274
|
+
lines += ["### Fields (Injected)", ""]
|
|
275
|
+
for dep in dependencies:
|
|
276
|
+
lines.append(f"- `{dep}`")
|
|
277
|
+
lines.append("")
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def _append_entity_body(lines: list[str], data: dict) -> None:
|
|
281
|
+
table_name = data.get("table_name", "")
|
|
282
|
+
fields = data.get("fields", [])
|
|
283
|
+
relations = data.get("relations", [])
|
|
284
|
+
|
|
285
|
+
if table_name:
|
|
286
|
+
lines.append(f"**Table:** `{table_name}`")
|
|
287
|
+
lines.append("")
|
|
288
|
+
|
|
289
|
+
if fields:
|
|
290
|
+
lines += ["### Fields", ""]
|
|
291
|
+
for f in fields:
|
|
292
|
+
name = f.get("name", "")
|
|
293
|
+
ftype = f.get("type", "")
|
|
294
|
+
anns = f.get("annotations", [])
|
|
295
|
+
ann_str = f" ({', '.join(anns)})" if anns else ""
|
|
296
|
+
lines.append(f"- `{ftype} {name}`{ann_str}")
|
|
297
|
+
lines.append("")
|
|
298
|
+
|
|
299
|
+
if relations:
|
|
300
|
+
lines += ["### Relations", ""]
|
|
301
|
+
for r in relations:
|
|
302
|
+
rtype = r.get("type", "")
|
|
303
|
+
target = r.get("target", "")
|
|
304
|
+
lines.append(f"- `{rtype}` → `{target}`")
|
|
305
|
+
lines.append("")
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _append_component_body(lines: list[str], data: dict) -> None:
|
|
309
|
+
props = data.get("props", [])
|
|
310
|
+
hooks = data.get("hooks", [])
|
|
311
|
+
is_page = data.get("is_page", False)
|
|
312
|
+
route_path = data.get("route_path", "")
|
|
313
|
+
|
|
314
|
+
if is_page and route_path:
|
|
315
|
+
lines.append(f"**Route:** `{route_path}`")
|
|
316
|
+
lines.append("")
|
|
317
|
+
|
|
318
|
+
if props:
|
|
319
|
+
lines += ["### Props", ""]
|
|
320
|
+
for p in props:
|
|
321
|
+
lines.append(f"- `{p}`")
|
|
322
|
+
lines.append("")
|
|
323
|
+
|
|
324
|
+
if hooks:
|
|
325
|
+
lines += ["### Hooks", ""]
|
|
326
|
+
for h in hooks:
|
|
327
|
+
lines.append(f"- `{h}`")
|
|
328
|
+
lines.append("")
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _append_route_body(lines: list[str], data: dict) -> None:
|
|
332
|
+
method = data.get("method", "")
|
|
333
|
+
path = data.get("path", "")
|
|
334
|
+
tags = data.get("tags", [])
|
|
335
|
+
|
|
336
|
+
if method and path:
|
|
337
|
+
lines.append(f"**Route:** `{method} {path}`")
|
|
338
|
+
if tags:
|
|
339
|
+
lines.append(f"**Tags:** {', '.join(tags)}")
|
|
340
|
+
lines.append("")
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _type_display(ntype: str, data: dict, framework: str) -> str:
|
|
344
|
+
"""Human-readable type label."""
|
|
345
|
+
if ntype == "class":
|
|
346
|
+
anns = data.get("annotations", [])
|
|
347
|
+
for a in anns:
|
|
348
|
+
al = a.lower()
|
|
349
|
+
if "restcontroller" in al or "controller" in al:
|
|
350
|
+
return "REST Controller"
|
|
351
|
+
if "service" in al:
|
|
352
|
+
return "Service"
|
|
353
|
+
if "repository" in al:
|
|
354
|
+
return "Repository"
|
|
355
|
+
if "component" in al:
|
|
356
|
+
return "Component"
|
|
357
|
+
label = data.get("label", "")
|
|
358
|
+
if label.endswith("Controller"):
|
|
359
|
+
return "Controller"
|
|
360
|
+
if label.endswith("Repository"):
|
|
361
|
+
return "Repository"
|
|
362
|
+
return "Service"
|
|
363
|
+
mapping = {
|
|
364
|
+
"entity": "Entity",
|
|
365
|
+
"component": "Frontend Component",
|
|
366
|
+
"route": "API Route",
|
|
367
|
+
}
|
|
368
|
+
return mapping.get(ntype, ntype.title())
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _reverse_relation(relation: str) -> str:
|
|
372
|
+
"""Invert an edge label for the incoming-edge perspective."""
|
|
373
|
+
inv = {
|
|
374
|
+
"imports": "imported_by",
|
|
375
|
+
"imports_from": "imported_by",
|
|
376
|
+
"calls": "called_by",
|
|
377
|
+
"injects": "injected_by",
|
|
378
|
+
"calls_api": "api_called_by",
|
|
379
|
+
"shares_db_entity": "shares_db_entity",
|
|
380
|
+
}
|
|
381
|
+
return inv.get(relation, f"←{relation}")
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _safe_note_name(label: str) -> str:
|
|
385
|
+
"""Convert node label to a safe filename stem (no path separators)."""
|
|
386
|
+
# Replace characters that confuse Obsidian wikilinks
|
|
387
|
+
return re.sub(r'[/\\#^|[\]]', "_", label).strip()
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
# ── Step 2: Fix broken wikilinks ──────────────────────────────────────────────
|
|
391
|
+
|
|
392
|
+
def _step2_fix_wikilinks(vault: Path) -> None:
|
|
393
|
+
"""Normalise wikilinks whose target has a case mismatch."""
|
|
394
|
+
existing = {f.stem: f.stem for f in vault.glob("*.md")}
|
|
395
|
+
|
|
396
|
+
def _norm(s: str) -> str:
|
|
397
|
+
return re.sub(r"[\s_-]", "", s).lower()
|
|
398
|
+
|
|
399
|
+
norm_to_actual: dict[str, str] = {_norm(k): k for k in existing}
|
|
400
|
+
broken_map: dict[str, str] = {}
|
|
401
|
+
|
|
402
|
+
for md in vault.glob("*.md"):
|
|
403
|
+
for m in _WIKILINK_RE.finditer(md.read_text(errors="ignore")):
|
|
404
|
+
stem = m.group(1)
|
|
405
|
+
if stem not in existing:
|
|
406
|
+
nrm = _norm(stem)
|
|
407
|
+
if nrm in norm_to_actual:
|
|
408
|
+
broken_map[stem] = norm_to_actual[nrm]
|
|
409
|
+
|
|
410
|
+
if not broken_map:
|
|
411
|
+
return
|
|
412
|
+
|
|
413
|
+
for md in vault.glob("*.md"):
|
|
414
|
+
content = md.read_text(errors="ignore")
|
|
415
|
+
new_c = content
|
|
416
|
+
for broken, correct in broken_map.items():
|
|
417
|
+
new_c = new_c.replace(f"[[{broken}]]", f"[[{correct}]]")
|
|
418
|
+
if new_c != content:
|
|
419
|
+
md.write_text(new_c, encoding="utf-8")
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
# ── Step 3: Cross-language imports removal ────────────────────────────────────
|
|
423
|
+
|
|
424
|
+
def _step3_remove_cross_language(vault: Path) -> None:
|
|
425
|
+
"""Remove Java↔TS import edges from notes. Preserves calls_api."""
|
|
426
|
+
|
|
427
|
+
# Patterns that match Java→TS or TS→Java import connection lines
|
|
428
|
+
# e.g.: - [[SomeJavaClass.java]] - `imports_from` [EXTRACTED]
|
|
429
|
+
ts_drop = re.compile(r"^- \[\[[^\]]*\.(?:tsx?|jsx?)\]\] - `imports(?:_from)?` .*\n?", re.MULTILINE)
|
|
430
|
+
java_drop = re.compile(r"^- \[\[[^\]]*\.(?:java|kt)\]\] - `imports(?:_from)?` .*\n?", re.MULTILINE)
|
|
431
|
+
|
|
432
|
+
for md in vault.glob("*.md"):
|
|
433
|
+
name = md.name
|
|
434
|
+
content = md.read_text(errors="ignore")
|
|
435
|
+
new_c = content
|
|
436
|
+
|
|
437
|
+
if name.endswith((".ts.md", ".tsx.md", ".js.md", ".jsx.md")):
|
|
438
|
+
new_c = ts_drop.sub("", new_c)
|
|
439
|
+
elif name.endswith((".java.md", ".kt.md")):
|
|
440
|
+
new_c = java_drop.sub("", new_c)
|
|
441
|
+
|
|
442
|
+
if new_c != content:
|
|
443
|
+
md.write_text(new_c, encoding="utf-8")
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
# ── Step 4: Fix community tags ────────────────────────────────────────────────
|
|
447
|
+
|
|
448
|
+
def _step4_fix_community_tags(vault: Path) -> None:
|
|
449
|
+
"""Replace Community_N / Community_None tags with service folder from source_file."""
|
|
450
|
+
|
|
451
|
+
for md in vault.glob("*.md"):
|
|
452
|
+
content = md.read_text(errors="ignore")
|
|
453
|
+
m = _PROJECT_RE.search(content)
|
|
454
|
+
folder = m.group(1) if m and m.group(1) else None
|
|
455
|
+
|
|
456
|
+
new_c = content
|
|
457
|
+
if folder:
|
|
458
|
+
new_c = _COMM_FRONT.sub(f'community: "{folder}"', new_c)
|
|
459
|
+
new_c = _COMM_YAML.sub(f" - community/{folder}\n", new_c)
|
|
460
|
+
new_c = _COMM_BODY.sub(f"#community/{folder}", new_c)
|
|
461
|
+
else:
|
|
462
|
+
new_c = _COMM_FRONT.sub('community: ""', new_c)
|
|
463
|
+
new_c = _COMM_YAML.sub("", new_c)
|
|
464
|
+
new_c = _COMM_BODY.sub("", new_c)
|
|
465
|
+
|
|
466
|
+
if new_c != content:
|
|
467
|
+
md.write_text(new_c, encoding="utf-8")
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
# ── Step 5: Move to service subfolders ───────────────────────────────────────
|
|
471
|
+
|
|
472
|
+
def _step5_move_to_subfolders(vault: Path) -> None:
|
|
473
|
+
"""Move root-level notes into <vault>/<service>/ based on source_file."""
|
|
474
|
+
|
|
475
|
+
for md in list(vault.glob("*.md")):
|
|
476
|
+
content = md.read_text(errors="ignore")
|
|
477
|
+
m = _PROJECT_RE.search(content)
|
|
478
|
+
folder = m.group(1) if m and m.group(1) else "_unknown"
|
|
479
|
+
|
|
480
|
+
dest_dir = vault / folder
|
|
481
|
+
dest_dir.mkdir(exist_ok=True)
|
|
482
|
+
dest = dest_dir / md.name
|
|
483
|
+
|
|
484
|
+
if not dest.exists():
|
|
485
|
+
md.rename(dest)
|
|
486
|
+
else:
|
|
487
|
+
# collision — keep dest (dedup in step 6)
|
|
488
|
+
md.unlink()
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
# ── Step 6: Deduplicate same source_file ──────────────────────────────────────
|
|
492
|
+
|
|
493
|
+
def _step6_dedup_notes(vault: Path) -> None:
|
|
494
|
+
"""Keep one note per source_file; remove duplicates.
|
|
495
|
+
|
|
496
|
+
Priority: file-extension.md > bare name > _N suffix
|
|
497
|
+
"""
|
|
498
|
+
by_src: dict[str, list[Path]] = defaultdict(list)
|
|
499
|
+
|
|
500
|
+
for md in vault.rglob("*.md"):
|
|
501
|
+
content = md.read_text(errors="ignore")
|
|
502
|
+
m = _SOURCE_RE.search(content)
|
|
503
|
+
if m and m.group(1):
|
|
504
|
+
by_src[m.group(1)].append(md)
|
|
505
|
+
|
|
506
|
+
remap: dict[str, str] = {}
|
|
507
|
+
|
|
508
|
+
for sf, files in by_src.items():
|
|
509
|
+
if len(files) < 2:
|
|
510
|
+
continue
|
|
511
|
+
primary = _pick_primary(files)
|
|
512
|
+
for f in files:
|
|
513
|
+
if f != primary:
|
|
514
|
+
remap[f.stem] = primary.stem
|
|
515
|
+
f.unlink()
|
|
516
|
+
|
|
517
|
+
# Rewrite wikilinks pointing at deleted notes
|
|
518
|
+
if remap:
|
|
519
|
+
for md in vault.rglob("*.md"):
|
|
520
|
+
content = md.read_text(errors="ignore")
|
|
521
|
+
new_c = content
|
|
522
|
+
for old, new in remap.items():
|
|
523
|
+
new_c = new_c.replace(f"[[{old}]]", f"[[{new}]]")
|
|
524
|
+
if new_c != content:
|
|
525
|
+
md.write_text(new_c, encoding="utf-8")
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _pick_primary(files: list[Path]) -> Path:
|
|
529
|
+
"""Choose the canonical note from a group with the same source_file."""
|
|
530
|
+
# Prefer: has a file-extension in stem (e.g. Foo.java.md)
|
|
531
|
+
for ext in (".java", ".kt", ".ts", ".tsx", ".js", ".py", ".go", ".cs", ".rs", ".rb", ".php"):
|
|
532
|
+
for f in files:
|
|
533
|
+
if f.stem.endswith(ext):
|
|
534
|
+
return f
|
|
535
|
+
# Prefer: no _N suffix
|
|
536
|
+
for f in files:
|
|
537
|
+
if not _SUFFIX_RE.match(f.stem):
|
|
538
|
+
return f
|
|
539
|
+
return files[0]
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
# ── Step 7: Members section injection ────────────────────────────────────────
|
|
543
|
+
|
|
544
|
+
def _step7_inject_members(G: nx.DiGraph, vault: Path) -> None:
|
|
545
|
+
"""Add ## Members section to notes from class node methods/fields metadata.
|
|
546
|
+
|
|
547
|
+
In codebeacon's graph there are no method child-nodes; method names are
|
|
548
|
+
stored directly in the class node's `methods` metadata list.
|
|
549
|
+
"""
|
|
550
|
+
# Build: source_file → methods list from graph
|
|
551
|
+
sf_methods: dict[str, list[str]] = {}
|
|
552
|
+
|
|
553
|
+
for node_id, data in G.nodes(data=True):
|
|
554
|
+
sf = data.get("source_file", "")
|
|
555
|
+
if not sf:
|
|
556
|
+
continue
|
|
557
|
+
methods = data.get("methods", [])
|
|
558
|
+
fields = data.get("dependencies", []) # injected fields
|
|
559
|
+
members = [f".{m}()" for m in methods] + [f"{dep}" for dep in fields]
|
|
560
|
+
if members:
|
|
561
|
+
existing = sf_methods.get(sf, [])
|
|
562
|
+
sf_methods[sf] = list(dict.fromkeys(existing + members))
|
|
563
|
+
|
|
564
|
+
# Build: source_file → vault note path
|
|
565
|
+
sf_to_note: dict[str, Path] = {}
|
|
566
|
+
for md in vault.rglob("*.md"):
|
|
567
|
+
content = md.read_text(errors="ignore")
|
|
568
|
+
m = _SOURCE_RE.search(content)
|
|
569
|
+
if m and m.group(1):
|
|
570
|
+
sf_to_note[m.group(1)] = md
|
|
571
|
+
|
|
572
|
+
_SKIP = re.compile(r"^\.(?:get[A-Z]|set[A-Z]|is[A-Z]|has[A-Z]|builder|toString|hashCode|equals|canEqual)")
|
|
573
|
+
|
|
574
|
+
for sf, members in sf_methods.items():
|
|
575
|
+
note = sf_to_note.get(sf)
|
|
576
|
+
if not note:
|
|
577
|
+
continue
|
|
578
|
+
|
|
579
|
+
real = sorted(set(m for m in members if not _SKIP.match(m)))
|
|
580
|
+
if not real:
|
|
581
|
+
continue
|
|
582
|
+
|
|
583
|
+
content = note.read_text(errors="ignore")
|
|
584
|
+
if "## Members" in content:
|
|
585
|
+
continue
|
|
586
|
+
|
|
587
|
+
member_block = "\n## Members\n" + "\n".join(f"- `{m}`" for m in real) + "\n"
|
|
588
|
+
|
|
589
|
+
# Insert before ## Connections or before footer tags
|
|
590
|
+
conn_match = re.search(r"\n## Connections\n", content)
|
|
591
|
+
tag_match = re.search(r"\n#codebeacon/", content)
|
|
592
|
+
pos = conn_match.start() if conn_match else (tag_match.start() if tag_match else len(content.rstrip()))
|
|
593
|
+
|
|
594
|
+
content = content[:pos] + member_block + content[pos:]
|
|
595
|
+
note.write_text(content, encoding="utf-8")
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
# ── Step 8: Move remaining root-level notes ───────────────────────────────────
|
|
599
|
+
|
|
600
|
+
def _step8_move_remaining(vault: Path) -> None:
|
|
601
|
+
"""Move any notes still at root level to their service subfolder."""
|
|
602
|
+
|
|
603
|
+
for md in list(vault.glob("*.md")):
|
|
604
|
+
content = md.read_text(errors="ignore")
|
|
605
|
+
m = _PROJECT_RE.search(content)
|
|
606
|
+
if m and m.group(1):
|
|
607
|
+
svc = m.group(1)
|
|
608
|
+
dest_dir = vault / svc
|
|
609
|
+
dest_dir.mkdir(exist_ok=True)
|
|
610
|
+
dest = dest_dir / md.name
|
|
611
|
+
if not dest.exists():
|
|
612
|
+
md.rename(dest)
|
|
613
|
+
else:
|
|
614
|
+
md.unlink()
|
|
615
|
+
else:
|
|
616
|
+
# Orphan without source_file — delete
|
|
617
|
+
md.unlink()
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
# ── Step 9: Hub notes + backlinks ─────────────────────────────────────────────
|
|
621
|
+
|
|
622
|
+
def _step9_hub_notes(vault: Path) -> None:
|
|
623
|
+
"""Create a service/<service>.md index hub note + add backlinks."""
|
|
624
|
+
|
|
625
|
+
service_dirs = [d for d in vault.iterdir() if d.is_dir() and not d.name.startswith(".")]
|
|
626
|
+
|
|
627
|
+
for svc_dir in service_dirs:
|
|
628
|
+
svc = svc_dir.name
|
|
629
|
+
notes = sorted(svc_dir.glob("*.md"), key=lambda f: f.name)
|
|
630
|
+
|
|
631
|
+
# Hub note content
|
|
632
|
+
lines = [
|
|
633
|
+
"---",
|
|
634
|
+
f'type: "folder-index"',
|
|
635
|
+
f'community: "{svc}"',
|
|
636
|
+
"tags:",
|
|
637
|
+
" - codebeacon/folder-index",
|
|
638
|
+
f" - community/{svc}",
|
|
639
|
+
"---",
|
|
640
|
+
"",
|
|
641
|
+
f"# {svc}",
|
|
642
|
+
"",
|
|
643
|
+
f"Service folder — {len(notes)} node(s)",
|
|
644
|
+
"",
|
|
645
|
+
"## All Files",
|
|
646
|
+
"",
|
|
647
|
+
]
|
|
648
|
+
for note in sorted(notes, key=lambda f: f.stem):
|
|
649
|
+
if note.stem != svc:
|
|
650
|
+
lines.append(f"- [[{svc}/{note.stem}]]")
|
|
651
|
+
|
|
652
|
+
lines += ["", f"#codebeacon/folder-index #community/{svc}"]
|
|
653
|
+
hub_path = svc_dir / f"{svc}.md"
|
|
654
|
+
hub_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
655
|
+
|
|
656
|
+
# Add back-link from each note → service hub
|
|
657
|
+
for note in notes:
|
|
658
|
+
if note.stem == svc:
|
|
659
|
+
continue
|
|
660
|
+
content = note.read_text(errors="ignore")
|
|
661
|
+
if f"[[{svc}]]" not in content:
|
|
662
|
+
note.write_text(content.rstrip() + f"\n\n**Service:** [[{svc}]]\n", encoding="utf-8")
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
# ── Step 10: Qualify wikilinks ────────────────────────────────────────────────
|
|
666
|
+
|
|
667
|
+
def _step10_qualify_wikilinks(vault: Path) -> None:
|
|
668
|
+
"""Rewrite [[X]] → [[svc/X]] to disambiguate same-name notes in different services."""
|
|
669
|
+
|
|
670
|
+
# Build: stem → set of services containing that stem
|
|
671
|
+
stem_svcs: dict[str, set[str]] = defaultdict(set)
|
|
672
|
+
for md in vault.rglob("*.md"):
|
|
673
|
+
stem_svcs[md.stem].add(md.parent.name)
|
|
674
|
+
|
|
675
|
+
all_svc_names = {d.name for d in vault.iterdir() if d.is_dir() and not d.name.startswith(".")}
|
|
676
|
+
|
|
677
|
+
for md in vault.rglob("*.md"):
|
|
678
|
+
my_svc = md.parent.name
|
|
679
|
+
content = md.read_text(errors="ignore")
|
|
680
|
+
|
|
681
|
+
def _qualify(m: re.Match) -> str:
|
|
682
|
+
stem = m.group(1).strip()
|
|
683
|
+
# Don't qualify service index links
|
|
684
|
+
if stem in all_svc_names:
|
|
685
|
+
return m.group(0)
|
|
686
|
+
# Already qualified (contains /)
|
|
687
|
+
if "/" in stem:
|
|
688
|
+
return m.group(0)
|
|
689
|
+
svcs = stem_svcs.get(stem, set())
|
|
690
|
+
if not svcs:
|
|
691
|
+
return m.group(0)
|
|
692
|
+
if my_svc in svcs:
|
|
693
|
+
return f"[[{my_svc}/{stem}]]"
|
|
694
|
+
if len(svcs) == 1:
|
|
695
|
+
only = next(iter(svcs))
|
|
696
|
+
return f"[[{only}/{stem}]]"
|
|
697
|
+
# Ambiguous — leave unqualified
|
|
698
|
+
return m.group(0)
|
|
699
|
+
|
|
700
|
+
new_c = _WIKILINK_RE.sub(_qualify, content)
|
|
701
|
+
if new_c != content:
|
|
702
|
+
md.write_text(new_c, encoding="utf-8")
|
|
703
|
+
|
|
704
|
+
|
|
705
|
+
# ── Step 11: Remove cross-service false links ─────────────────────────────────
|
|
706
|
+
|
|
707
|
+
def _step11_remove_cross_service_links(vault: Path) -> None:
|
|
708
|
+
"""Remove wikilinks pointing to a different service (except calls_api/shares_db_entity)."""
|
|
709
|
+
|
|
710
|
+
all_svc_names = {d.name for d in vault.iterdir() if d.is_dir() and not d.name.startswith(".")}
|
|
711
|
+
|
|
712
|
+
# Build stem → service mapping (post qualification, most links have svc/stem)
|
|
713
|
+
stem_to_svc: dict[str, str] = {}
|
|
714
|
+
for md in vault.rglob("*.md"):
|
|
715
|
+
stem_to_svc[md.stem] = md.parent.name
|
|
716
|
+
|
|
717
|
+
# Identify frontend service names heuristically
|
|
718
|
+
_front_prefixes = ("front-", "dring-", "app-", "web-", "ui-")
|
|
719
|
+
|
|
720
|
+
def _is_frontend(svc: str) -> bool:
|
|
721
|
+
return any(svc.startswith(p) for p in _front_prefixes)
|
|
722
|
+
|
|
723
|
+
for md in vault.rglob("*.md"):
|
|
724
|
+
src_svc = md.parent.name
|
|
725
|
+
content = md.read_text(errors="ignore")
|
|
726
|
+
lines = content.split("\n")
|
|
727
|
+
new_lines = []
|
|
728
|
+
changed = False
|
|
729
|
+
|
|
730
|
+
for line in lines:
|
|
731
|
+
# Always keep lines with intentional cross-service relations
|
|
732
|
+
if any(rel in line for rel in ("calls_api", "api_called_by", "shares_db_entity", "Called By", "API Connections")):
|
|
733
|
+
new_lines.append(line)
|
|
734
|
+
continue
|
|
735
|
+
|
|
736
|
+
has_cross = False
|
|
737
|
+
for m in _WIKILINK_RE.finditer(line):
|
|
738
|
+
link = m.group(1).strip()
|
|
739
|
+
if link in all_svc_names:
|
|
740
|
+
continue # service index links are fine
|
|
741
|
+
if "/" in link:
|
|
742
|
+
tgt_svc = link.split("/")[0]
|
|
743
|
+
else:
|
|
744
|
+
tgt_svc = stem_to_svc.get(link, "")
|
|
745
|
+
|
|
746
|
+
if tgt_svc in all_svc_names and tgt_svc != src_svc:
|
|
747
|
+
src_front = _is_frontend(src_svc)
|
|
748
|
+
tgt_front = _is_frontend(tgt_svc)
|
|
749
|
+
# Remove front↔front or front↔backend false edges
|
|
750
|
+
if src_front or tgt_front:
|
|
751
|
+
has_cross = True
|
|
752
|
+
break
|
|
753
|
+
|
|
754
|
+
if has_cross and line.strip().startswith("- [["):
|
|
755
|
+
changed = True
|
|
756
|
+
continue # drop the entire bullet line
|
|
757
|
+
|
|
758
|
+
new_lines.append(line)
|
|
759
|
+
|
|
760
|
+
if changed:
|
|
761
|
+
md.write_text("\n".join(new_lines), encoding="utf-8")
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
# ── Step 12: .obsidian/graph.json ────────────────────────────────────────────
|
|
765
|
+
|
|
766
|
+
def _step12_graph_json(vault: Path) -> None:
|
|
767
|
+
"""Write Obsidian graph view config with per-service colour groups."""
|
|
768
|
+
|
|
769
|
+
obsidian_config = vault / ".obsidian"
|
|
770
|
+
obsidian_config.mkdir(exist_ok=True)
|
|
771
|
+
|
|
772
|
+
service_dirs = sorted(
|
|
773
|
+
[d for d in vault.iterdir() if d.is_dir() and not d.name.startswith(".")],
|
|
774
|
+
key=lambda d: d.name,
|
|
775
|
+
)
|
|
776
|
+
|
|
777
|
+
color_groups = []
|
|
778
|
+
for svc_dir in service_dirs:
|
|
779
|
+
svc = svc_dir.name
|
|
780
|
+
rgb = int(hashlib.md5(svc.encode()).hexdigest()[:6], 16)
|
|
781
|
+
color_groups.append({
|
|
782
|
+
"query": f"path:{svc}",
|
|
783
|
+
"color": {"a": 1, "rgb": rgb},
|
|
784
|
+
})
|
|
785
|
+
|
|
786
|
+
graph_config = {
|
|
787
|
+
"collapse-filter": True,
|
|
788
|
+
"search": "",
|
|
789
|
+
"showTags": False,
|
|
790
|
+
"showAttachments": False,
|
|
791
|
+
"hideUnresolved": False,
|
|
792
|
+
"showOrphans": True,
|
|
793
|
+
"collapse-color-groups": False,
|
|
794
|
+
"colorGroups": color_groups,
|
|
795
|
+
"collapse-display": False,
|
|
796
|
+
"showArrow": True,
|
|
797
|
+
"textFadeMultiplier": 0,
|
|
798
|
+
"nodeSizeMultiplier": 1,
|
|
799
|
+
"lineSizeMultiplier": 1,
|
|
800
|
+
"collapse-forces": True,
|
|
801
|
+
"centerStrength": 0.5,
|
|
802
|
+
"repelStrength": 10,
|
|
803
|
+
"linkStrength": 1,
|
|
804
|
+
"linkDistance": 250,
|
|
805
|
+
"scale": 0.05,
|
|
806
|
+
"close": False,
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
(obsidian_config / "graph.json").write_text(
|
|
810
|
+
json.dumps(graph_config, indent=2),
|
|
811
|
+
encoding="utf-8",
|
|
812
|
+
)
|