codebeacon 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. codebeacon/__init__.py +1 -0
  2. codebeacon/__main__.py +3 -0
  3. codebeacon/cache.py +136 -0
  4. codebeacon/cli.py +391 -0
  5. codebeacon/common/__init__.py +0 -0
  6. codebeacon/common/filters.py +170 -0
  7. codebeacon/common/symbols.py +121 -0
  8. codebeacon/common/types.py +98 -0
  9. codebeacon/config.py +144 -0
  10. codebeacon/contextmap/__init__.py +0 -0
  11. codebeacon/contextmap/generator.py +602 -0
  12. codebeacon/discover/__init__.py +0 -0
  13. codebeacon/discover/detector.py +388 -0
  14. codebeacon/discover/scanner.py +192 -0
  15. codebeacon/export/__init__.py +0 -0
  16. codebeacon/export/mcp.py +515 -0
  17. codebeacon/export/obsidian.py +812 -0
  18. codebeacon/extract/__init__.py +22 -0
  19. codebeacon/extract/base.py +372 -0
  20. codebeacon/extract/components.py +357 -0
  21. codebeacon/extract/dependencies.py +140 -0
  22. codebeacon/extract/entities.py +575 -0
  23. codebeacon/extract/queries/README.md +116 -0
  24. codebeacon/extract/queries/actix.scm +115 -0
  25. codebeacon/extract/queries/angular.scm +155 -0
  26. codebeacon/extract/queries/aspnet.scm +159 -0
  27. codebeacon/extract/queries/django.scm +122 -0
  28. codebeacon/extract/queries/express.scm +124 -0
  29. codebeacon/extract/queries/fastapi.scm +152 -0
  30. codebeacon/extract/queries/flask.scm +120 -0
  31. codebeacon/extract/queries/gin.scm +142 -0
  32. codebeacon/extract/queries/ktor.scm +144 -0
  33. codebeacon/extract/queries/laravel.scm +172 -0
  34. codebeacon/extract/queries/nestjs.scm +183 -0
  35. codebeacon/extract/queries/rails.scm +114 -0
  36. codebeacon/extract/queries/react.scm +111 -0
  37. codebeacon/extract/queries/spring_boot.scm +204 -0
  38. codebeacon/extract/queries/svelte.scm +73 -0
  39. codebeacon/extract/queries/vapor.scm +130 -0
  40. codebeacon/extract/queries/vue.scm +123 -0
  41. codebeacon/extract/routes.py +910 -0
  42. codebeacon/extract/semantic.py +280 -0
  43. codebeacon/extract/services.py +597 -0
  44. codebeacon/graph/__init__.py +1 -0
  45. codebeacon/graph/analyze.py +281 -0
  46. codebeacon/graph/build.py +320 -0
  47. codebeacon/graph/cluster.py +160 -0
  48. codebeacon/graph/enrich.py +206 -0
  49. codebeacon/skill/SKILL.md +127 -0
  50. codebeacon/wave.py +292 -0
  51. codebeacon/wiki/__init__.py +0 -0
  52. codebeacon/wiki/generator.py +376 -0
  53. codebeacon/wiki/index.py +95 -0
  54. codebeacon/wiki/templates.py +467 -0
  55. codebeacon-0.1.2.dist-info/METADATA +319 -0
  56. codebeacon-0.1.2.dist-info/RECORD +59 -0
  57. codebeacon-0.1.2.dist-info/WHEEL +4 -0
  58. codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
  59. codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,160 @@
1
+ """Community detection for the codebeacon knowledge graph.
2
+
3
+ Attempts clustering in order of quality:
4
+ 1. graspologic Leiden — best quality, requires: pip install graspologic
5
+ 2. leidenalg — good quality, requires: pip install leidenalg igraph
6
+ 3. NetworkX Louvain — built into networkx >= 3.0 (seed-stable)
7
+ 4. Weakly connected components — always available fallback
8
+
9
+ Public API:
10
+ cluster(G) → dict[node_id, community_id]
11
+ apply_communities(G, communities) → writes community attr to G nodes
12
+ score_all(G, communities) → dict[community_id, cohesion_score]
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import warnings
18
+ from typing import Optional
19
+
20
+ import networkx as nx
21
+
22
+
23
+ def cluster(G: nx.DiGraph) -> dict[str, int]:
24
+ """Detect communities in the graph.
25
+
26
+ Returns:
27
+ node_id → community_id mapping (community IDs are consecutive integers
28
+ starting from 0).
29
+ """
30
+ if G.number_of_nodes() == 0:
31
+ return {}
32
+
33
+ result = _try_graspologic(G)
34
+ if result is not None:
35
+ return result
36
+
37
+ result = _try_leidenalg(G)
38
+ if result is not None:
39
+ return result
40
+
41
+ result = _try_louvain(G)
42
+ if result is not None:
43
+ return result
44
+
45
+ return _connected_components(G)
46
+
47
+
48
+ def apply_communities(G: nx.DiGraph, communities: dict[str, int]) -> None:
49
+ """Write community labels back as node attributes (in-place)."""
50
+ for node_id, community_id in communities.items():
51
+ if node_id in G:
52
+ G.nodes[node_id]["community"] = community_id
53
+
54
+
55
+ def score_all(G: nx.DiGraph, communities: dict[str, int]) -> dict[int, float]:
56
+ """Compute a simple edge-based cohesion score for each community.
57
+
58
+ Cohesion = internal_edges / (internal_edges + boundary_edges).
59
+ A score of 1.0 means all edges stay within the community.
60
+
61
+ Returns:
62
+ community_id → cohesion score (0.0–1.0).
63
+ """
64
+ if not communities:
65
+ return {}
66
+
67
+ # Build community → member set
68
+ community_nodes: dict[int, set[str]] = {}
69
+ for node, cid in communities.items():
70
+ community_nodes.setdefault(cid, set()).add(node)
71
+
72
+ scores: dict[int, float] = {}
73
+ for cid, members in community_nodes.items():
74
+ internal = sum(
75
+ 1 for u, v in G.edges()
76
+ if u in members and v in members
77
+ )
78
+ boundary = sum(
79
+ 1 for u, v in G.edges()
80
+ if (u in members) != (v in members)
81
+ )
82
+ total = internal + boundary
83
+ scores[cid] = internal / total if total > 0 else 1.0
84
+
85
+ return scores
86
+
87
+
88
+ # ── Algorithm implementations ─────────────────────────────────────────────────
89
+
90
+ def _try_graspologic(G: nx.DiGraph) -> Optional[dict[str, int]]:
91
+ """Leiden via graspologic."""
92
+ try:
93
+ from graspologic.partition import leiden
94
+
95
+ UG = G.to_undirected()
96
+ if UG.number_of_edges() == 0:
97
+ return None
98
+
99
+ communities, _ = leiden(UG)
100
+ return {str(k): int(v) for k, v in communities.items()}
101
+ except ImportError:
102
+ return None
103
+ except Exception as exc:
104
+ warnings.warn(f"graspologic leiden failed: {exc}", stacklevel=2)
105
+ return None
106
+
107
+
108
+ def _try_leidenalg(G: nx.DiGraph) -> Optional[dict[str, int]]:
109
+ """Leiden via leidenalg + python-igraph."""
110
+ try:
111
+ import leidenalg
112
+ import igraph as ig
113
+
114
+ UG = G.to_undirected()
115
+ if UG.number_of_edges() == 0:
116
+ return None
117
+
118
+ nodes = list(UG.nodes())
119
+ node_idx = {n: i for i, n in enumerate(nodes)}
120
+ edges = [(node_idx[u], node_idx[v]) for u, v in UG.edges()]
121
+
122
+ g = ig.Graph(n=len(nodes), edges=edges, directed=False)
123
+ partition = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition)
124
+
125
+ result: dict[str, int] = {}
126
+ for community_id, members in enumerate(partition):
127
+ for member_idx in members:
128
+ result[nodes[member_idx]] = community_id
129
+ return result
130
+ except ImportError:
131
+ return None
132
+ except Exception as exc:
133
+ warnings.warn(f"leidenalg failed: {exc}", stacklevel=2)
134
+ return None
135
+
136
+
137
+ def _try_louvain(G: nx.DiGraph) -> Optional[dict[str, int]]:
138
+ """Louvain via networkx built-in (nx >= 3.0)."""
139
+ try:
140
+ UG = G.to_undirected()
141
+ if UG.number_of_edges() == 0:
142
+ return None
143
+
144
+ communities = nx.community.louvain_communities(UG, seed=42)
145
+ result: dict[str, int] = {}
146
+ for community_id, members in enumerate(communities):
147
+ for node in members:
148
+ result[node] = community_id
149
+ return result
150
+ except (AttributeError, Exception):
151
+ return None
152
+
153
+
154
+ def _connected_components(G: nx.DiGraph) -> dict[str, int]:
155
+ """Fallback: weakly connected components as pseudo-communities."""
156
+ result: dict[str, int] = {}
157
+ for community_id, component in enumerate(nx.weakly_connected_components(G)):
158
+ for node in component:
159
+ result[node] = community_id
160
+ return result
@@ -0,0 +1,206 @@
1
+ """Graph enrichment: HTTP API cross-service edges + shared DB entity edges.
2
+
3
+ Two enrichment passes run AFTER the base graph is built by build.py:
4
+ 1. enrich_http_api() — frontend URL calls → backend controller routes (calls_api edges)
5
+ 2. enrich_shared_db() — same DAO/Entity used by multiple services (shares_db_entity edges)
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from pathlib import Path
12
+
13
+ import networkx as nx
14
+
15
+
16
+ # Regexes to extract API URLs from frontend source files
17
+ _API_URL_RES = [
18
+ re.compile(r'''(?:fetch|invoke|\$fetch)\s*\(\s*[`"']([^`"'$]+)[`"']'''),
19
+ re.compile(r'''axios\.\w+\s*\(\s*[`"']([^`"'$]+)[`"']'''),
20
+ re.compile(r'''(?:api|http|client)\.\w+\s*\(\s*[`"']([^`"'$]+)[`"']'''),
21
+ re.compile(r'''url\s*[:=]\s*[`"']([^`"'$]+)[`"']'''),
22
+ re.compile(r'''["'](/api/[^"'`\s]+)["'`]'''),
23
+ ]
24
+ _URL_LIKE = re.compile(r'^/[a-zA-Z]')
25
+
26
+
27
+ def _extract_api_urls(source_file: str) -> list[str]:
28
+ """Scan a source file for HTTP API URL patterns."""
29
+ try:
30
+ content = Path(source_file).read_text(encoding="utf-8", errors="replace")
31
+ except OSError:
32
+ return []
33
+ urls: set[str] = set()
34
+ for pat in _API_URL_RES:
35
+ for m in pat.finditer(content):
36
+ url = m.group(1).split("?")[0].split("#")[0].strip()
37
+ if _URL_LIKE.match(url):
38
+ urls.add(url)
39
+ return list(urls)
40
+
41
+
42
+ def enrich_http_api(G: nx.DiGraph) -> int:
43
+ """Add calls_api edges where frontend URL patterns match backend route paths.
44
+
45
+ Strategy:
46
+ - Collect all 'route' nodes with their path attribute
47
+ - For each frontend component, scan its source file for API URL patterns
48
+ - Match URLs to routes: exact first, then parameterized
49
+
50
+ Returns:
51
+ Number of new calls_api edges added.
52
+ """
53
+ added = 0
54
+
55
+ # Build route lookup: normalized_path → (node_id, project)
56
+ route_map: dict[str, tuple[str, str]] = {}
57
+ for node_id, data in G.nodes(data=True):
58
+ if data.get("type") != "route":
59
+ continue
60
+ path = data.get("path", "")
61
+ if path:
62
+ proj = data.get("project", "")
63
+ route_map[_normalize_path(path)] = (node_id, proj)
64
+
65
+ if not route_map:
66
+ return 0
67
+
68
+ # Find component/class nodes and scan their source for API calls
69
+ for node_id, data in G.nodes(data=True):
70
+ if data.get("type") not in ("component", "class"):
71
+ continue
72
+ src_proj = data.get("project", "")
73
+
74
+ src_file = data.get("source_file", "")
75
+ if not src_file:
76
+ continue
77
+
78
+ # Use metadata api_calls if set, otherwise scan file
79
+ api_calls = data.get("api_calls", [])
80
+ if not api_calls:
81
+ api_calls = _extract_api_urls(src_file)
82
+
83
+ for url in api_calls:
84
+ normalized = _normalize_path(url)
85
+
86
+ # Exact match
87
+ if normalized in route_map:
88
+ target_id, target_proj = route_map[normalized]
89
+ # Only create cross-project edges (skip same-project)
90
+ if target_proj == src_proj:
91
+ continue
92
+ if not G.has_edge(node_id, target_id):
93
+ G.add_edge(
94
+ node_id, target_id,
95
+ relation="calls_api",
96
+ confidence="EXTRACTED",
97
+ confidence_score=1.0,
98
+ source_file=src_file,
99
+ )
100
+ added += 1
101
+ continue
102
+
103
+ # Parameterized match: /api/users/123 → /api/users/:id
104
+ for route_path, (route_node_id, route_proj) in route_map.items():
105
+ if route_proj == src_proj:
106
+ continue
107
+ if _paths_match(normalized, route_path):
108
+ if not G.has_edge(node_id, route_node_id):
109
+ G.add_edge(
110
+ node_id, route_node_id,
111
+ relation="calls_api",
112
+ confidence="INFERRED",
113
+ confidence_score=0.8,
114
+ source_file=src_file,
115
+ )
116
+ added += 1
117
+ break
118
+
119
+ return added
120
+
121
+
122
+ def enrich_shared_db(G: nx.DiGraph) -> int:
123
+ """Add shares_db_entity edges when the same entity is accessed by multiple services.
124
+
125
+ Detection strategy:
126
+ - Find all entity nodes
127
+ - For each entity, collect which service/class nodes reference it (via any edge)
128
+ - If references span more than one project → add shares_db_entity between those projects
129
+
130
+ Returns:
131
+ Number of new shares_db_entity edges added.
132
+ """
133
+ added = 0
134
+
135
+ # Collect entity nodes
136
+ entity_ids: set[str] = {
137
+ node_id
138
+ for node_id, data in G.nodes(data=True)
139
+ if data.get("type") == "entity"
140
+ }
141
+
142
+ if not entity_ids:
143
+ return 0
144
+
145
+ for entity_id in entity_ids:
146
+ entity_data = G.nodes[entity_id]
147
+ entity_src = entity_data.get("source_file", "")
148
+
149
+ # Find all nodes that reference this entity (in-edges or out-edges)
150
+ referencing: list[str] = [
151
+ n for n in list(G.predecessors(entity_id)) + list(G.successors(entity_id))
152
+ if G.nodes[n].get("type") in ("class", "route", "component")
153
+ ]
154
+
155
+ # Group referencing nodes by project
156
+ project_reps: dict[str, str] = {} # project → first node_id as representative
157
+ for ref_id in referencing:
158
+ proj = G.nodes[ref_id].get("project", "")
159
+ if proj and proj not in project_reps:
160
+ project_reps[proj] = ref_id
161
+
162
+ if len(project_reps) < 2:
163
+ continue # only interesting when multiple projects share an entity
164
+
165
+ projects = list(project_reps.keys())
166
+ for i in range(len(projects)):
167
+ for j in range(i + 1, len(projects)):
168
+ src_rep = project_reps[projects[i]]
169
+ tgt_rep = project_reps[projects[j]]
170
+ if not G.has_edge(src_rep, tgt_rep):
171
+ G.add_edge(
172
+ src_rep, tgt_rep,
173
+ relation="shares_db_entity",
174
+ confidence="INFERRED",
175
+ confidence_score=0.9,
176
+ source_file=entity_src,
177
+ shared_entity=entity_id,
178
+ )
179
+ added += 1
180
+
181
+ return added
182
+
183
+
184
+ # ── URL / path utilities ──────────────────────────────────────────────────────
185
+
186
+ def _normalize_path(path: str) -> str:
187
+ """Normalize a URL or route path for comparison."""
188
+ path = path.split("?")[0].split("#")[0] # strip query + fragment
189
+ path = path.rstrip("/") or "/"
190
+ return path.lower()
191
+
192
+
193
+ def _paths_match(url: str, route_pattern: str) -> bool:
194
+ """Check if a concrete URL matches a parameterized route pattern.
195
+
196
+ Handles :param, {param}, [param], and <param> styles.
197
+ """
198
+ # Convert all parameter styles to a regex segment
199
+ pattern = re.sub(r":[^/]+", r"[^/]+", route_pattern)
200
+ pattern = re.sub(r"\{[^}]+\}", r"[^/]+", pattern)
201
+ pattern = re.sub(r"\[[^\]]+\]", r"[^/]+", pattern)
202
+ pattern = re.sub(r"<[^>]+>", r"[^/]+", pattern)
203
+ try:
204
+ return bool(re.fullmatch(pattern, url))
205
+ except re.error:
206
+ return False
@@ -0,0 +1,127 @@
1
+ ---
2
+ name: codebeacon
3
+ description: Scan a codebase → AST extraction → knowledge graph → wiki + CLAUDE.md context map. Supports 17 frameworks (Spring Boot, NestJS, Django, FastAPI, Rails, Express, React, Vue, Angular, and more).
4
+ trigger: /codebeacon
5
+ ---
6
+
7
+ # /codebeacon
8
+
9
+ Scan source code with AST analysis → build a knowledge graph → generate a navigable wiki + `CLAUDE.md` context map ready for AI agents.
10
+
11
+ ## Usage
12
+
13
+ ```
14
+ /codebeacon # scan current directory
15
+ /codebeacon <path> # scan specific path or workspace root
16
+ /codebeacon <path> --update # incremental: only reprocess changed files
17
+ /codebeacon <path> --wiki-only # regenerate wiki without re-extracting
18
+ /codebeacon sync # sync from codebeacon.yaml (multi-project)
19
+ /codebeacon serve <path> # start MCP server pointing at .codebeacon/
20
+ ```
21
+
22
+ ## What You Must Do When Invoked
23
+
24
+ If no path was given, use `.` (current directory). Do not ask the user for a path.
25
+
26
+ Follow these steps in order.
27
+
28
+ ### Step 1 — Ensure codebeacon is installed
29
+
30
+ ```bash
31
+ python3 -c "import codebeacon" 2>/dev/null || pip install codebeacon -q --break-system-packages 2>&1 | tail -5
32
+ python3 -c "import sys; open('.codebeacon_python', 'w').write(sys.executable)"
33
+ ```
34
+
35
+ In every subsequent bash block, replace `python3` with `$(cat .codebeacon_python)`.
36
+
37
+ If import succeeds, print nothing and move to Step 2.
38
+
39
+ ### Step 2 — Detect mode and run
40
+
41
+ Check if `codebeacon.yaml` exists in the target directory:
42
+
43
+ ```bash
44
+ TARGET="${1:-.}"
45
+
46
+ if [ -f "$TARGET/codebeacon.yaml" ]; then
47
+ echo "Found codebeacon.yaml — running sync mode"
48
+ $(cat .codebeacon_python) -m codebeacon sync --config "$TARGET/codebeacon.yaml"
49
+ else
50
+ echo "Scanning $TARGET ..."
51
+ $(cat .codebeacon_python) -m codebeacon scan "$TARGET"
52
+ fi
53
+ ```
54
+
55
+ The command prints wave progress as it goes:
56
+ - Framework detection per project
57
+ - `[pct%] done/total files processed` (wave progress per project)
58
+ - Route / Service / Entity / Component counts after each project
59
+ - Final: `Nodes: N, Edges: E, Communities: K`
60
+
61
+ Let it run to completion. Do not interrupt.
62
+
63
+ ### Step 3 — Report results
64
+
65
+ After the command exits, read the REPORT.md:
66
+
67
+ ```bash
68
+ TARGET="${1:-.}"
69
+ OUTPUT_DIR="$TARGET/.codebeacon"
70
+ [ -f "$OUTPUT_DIR/REPORT.md" ] && head -40 "$OUTPUT_DIR/REPORT.md"
71
+ ```
72
+
73
+ Then summarise for the user:
74
+ - Which projects/frameworks were detected
75
+ - Total nodes, edges, communities
76
+ - Output location (`.codebeacon/wiki/`, `.codebeacon/CLAUDE.md`, etc.)
77
+ - Any god nodes or surprising connections worth mentioning
78
+
79
+ ### Step 4 — (Optional) MCP serve
80
+
81
+ If the user asked for `serve`:
82
+
83
+ ```bash
84
+ TARGET="${1:-.}"
85
+ $(cat .codebeacon_python) -m codebeacon serve --dir "$TARGET/.codebeacon"
86
+ ```
87
+
88
+ This blocks — run it only when the user explicitly wants an MCP server.
89
+
90
+ ## Output structure
91
+
92
+ ```
93
+ .codebeacon/
94
+ beacon.json ← full knowledge graph (node-link JSON)
95
+ REPORT.md ← god nodes, surprising connections, hub files
96
+ CLAUDE.md ← AI context map (also written to project root)
97
+ .cursorrules ← Cursor IDE context
98
+ AGENTS.md ← OpenAI Agents context
99
+ wiki/
100
+ index.md ← global index (~200 tokens)
101
+ overview.md ← platform stats + cross-project connections
102
+ routes.md ← all routes table
103
+ cross-project/
104
+ connections.md ← cross-service edges
105
+ <project>/
106
+ index.md
107
+ routes.md
108
+ controllers/<Name>.md
109
+ services/<Name>.md
110
+ entities/<Name>.md
111
+ components/<Name>.md
112
+ obsidian/ ← Obsidian vault (one note per node)
113
+ ```
114
+
115
+ ## Supported frameworks
116
+
117
+ | Language | Frameworks |
118
+ |----------|-----------|
119
+ | Java/Kotlin | Spring Boot, Ktor |
120
+ | Python | Django, FastAPI, Flask |
121
+ | JavaScript/TypeScript | Express, NestJS, React, Vue, Angular, Svelte |
122
+ | Go | Gin |
123
+ | Ruby | Rails |
124
+ | PHP | Laravel |
125
+ | Rust | Actix-Web |
126
+ | C# | ASP.NET Core |
127
+ | Swift | Vapor |