codebeacon 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codebeacon/__init__.py +1 -0
- codebeacon/__main__.py +3 -0
- codebeacon/cache.py +136 -0
- codebeacon/cli.py +391 -0
- codebeacon/common/__init__.py +0 -0
- codebeacon/common/filters.py +170 -0
- codebeacon/common/symbols.py +121 -0
- codebeacon/common/types.py +98 -0
- codebeacon/config.py +144 -0
- codebeacon/contextmap/__init__.py +0 -0
- codebeacon/contextmap/generator.py +602 -0
- codebeacon/discover/__init__.py +0 -0
- codebeacon/discover/detector.py +388 -0
- codebeacon/discover/scanner.py +192 -0
- codebeacon/export/__init__.py +0 -0
- codebeacon/export/mcp.py +515 -0
- codebeacon/export/obsidian.py +812 -0
- codebeacon/extract/__init__.py +22 -0
- codebeacon/extract/base.py +372 -0
- codebeacon/extract/components.py +357 -0
- codebeacon/extract/dependencies.py +140 -0
- codebeacon/extract/entities.py +575 -0
- codebeacon/extract/queries/README.md +116 -0
- codebeacon/extract/queries/actix.scm +115 -0
- codebeacon/extract/queries/angular.scm +155 -0
- codebeacon/extract/queries/aspnet.scm +159 -0
- codebeacon/extract/queries/django.scm +122 -0
- codebeacon/extract/queries/express.scm +124 -0
- codebeacon/extract/queries/fastapi.scm +152 -0
- codebeacon/extract/queries/flask.scm +120 -0
- codebeacon/extract/queries/gin.scm +142 -0
- codebeacon/extract/queries/ktor.scm +144 -0
- codebeacon/extract/queries/laravel.scm +172 -0
- codebeacon/extract/queries/nestjs.scm +183 -0
- codebeacon/extract/queries/rails.scm +114 -0
- codebeacon/extract/queries/react.scm +111 -0
- codebeacon/extract/queries/spring_boot.scm +204 -0
- codebeacon/extract/queries/svelte.scm +73 -0
- codebeacon/extract/queries/vapor.scm +130 -0
- codebeacon/extract/queries/vue.scm +123 -0
- codebeacon/extract/routes.py +910 -0
- codebeacon/extract/semantic.py +280 -0
- codebeacon/extract/services.py +597 -0
- codebeacon/graph/__init__.py +1 -0
- codebeacon/graph/analyze.py +281 -0
- codebeacon/graph/build.py +320 -0
- codebeacon/graph/cluster.py +160 -0
- codebeacon/graph/enrich.py +206 -0
- codebeacon/skill/SKILL.md +127 -0
- codebeacon/wave.py +292 -0
- codebeacon/wiki/__init__.py +0 -0
- codebeacon/wiki/generator.py +376 -0
- codebeacon/wiki/index.py +95 -0
- codebeacon/wiki/templates.py +467 -0
- codebeacon-0.1.2.dist-info/METADATA +319 -0
- codebeacon-0.1.2.dist-info/RECORD +59 -0
- codebeacon-0.1.2.dist-info/WHEEL +4 -0
- codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
- codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,388 @@
|
|
|
1
|
+
"""Framework auto-detection and multi/single project determination."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
from codebeacon.common.types import ProjectInfo
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# ── Signature files → (framework, language) ─────────────────────────────────
|
|
14
|
+
|
|
15
|
+
SIGNATURE_MAP: list[tuple[str, str, str]] = [
|
|
16
|
+
# (filename_or_glob, framework, primary_language)
|
|
17
|
+
# Order matters: more specific first
|
|
18
|
+
("angular.json", "angular", "typescript"),
|
|
19
|
+
("nuxt.config.ts", "nuxt", "typescript"),
|
|
20
|
+
("nuxt.config.js", "nuxt", "typescript"),
|
|
21
|
+
("svelte.config.js", "sveltekit", "typescript"),
|
|
22
|
+
("svelte.config.ts", "sveltekit", "typescript"),
|
|
23
|
+
("build.gradle.kts", "ktor", "kotlin"),
|
|
24
|
+
("build.gradle", "spring-boot", "java"), # could be Ktor too, check below
|
|
25
|
+
("pom.xml", "spring-boot", "java"),
|
|
26
|
+
("Package.swift", "vapor", "swift"),
|
|
27
|
+
("Cargo.toml", "rust", "rust"), # actix/axum refined below
|
|
28
|
+
("composer.json", "laravel", "php"),
|
|
29
|
+
("Gemfile", "rails", "ruby"),
|
|
30
|
+
("go.mod", "go", "go"), # gin/echo/fiber refined below
|
|
31
|
+
("package.json", "node", "typescript"), # express/nest/next refined below
|
|
32
|
+
("requirements.txt", "python", "python"), # fastapi/django/flask refined below
|
|
33
|
+
("pyproject.toml", "python", "python"), # fastapi/django/flask refined below
|
|
34
|
+
("setup.py", "python", "python"),
|
|
35
|
+
("*.csproj", "aspnet", "csharp"),
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# Refinement patterns: read content of specific files to narrow down framework
|
|
39
|
+
_PACKAGE_JSON_REFINEMENTS: list[tuple[str, str]] = [
|
|
40
|
+
# (pattern_in_deps_or_scripts, framework)
|
|
41
|
+
# Order matters: more specific first
|
|
42
|
+
("@nestjs/core", "nestjs"),
|
|
43
|
+
('"next"', "nextjs"), # "next": "..." — avoid matching "nextjs" etc.
|
|
44
|
+
("nuxt", "nuxt"),
|
|
45
|
+
("@sveltejs/kit", "sveltekit"),
|
|
46
|
+
("@angular/core", "angular"),
|
|
47
|
+
("fastify", "fastify"),
|
|
48
|
+
("koa", "koa"),
|
|
49
|
+
("express", "express"),
|
|
50
|
+
('"react"', "react"), # plain React (CRA, Vite, etc.)
|
|
51
|
+
('"react-dom"', "react"),
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
_REQUIREMENTS_REFINEMENTS: list[tuple[str, str]] = [
|
|
55
|
+
("fastapi", "fastapi"),
|
|
56
|
+
("django", "django"),
|
|
57
|
+
("flask", "flask"),
|
|
58
|
+
("tornado", "tornado"),
|
|
59
|
+
("aiohttp", "aiohttp"),
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
_GO_MOD_REFINEMENTS: list[tuple[str, str]] = [
|
|
63
|
+
("github.com/gofiber/fiber", "fiber"),
|
|
64
|
+
("github.com/labstack/echo", "echo"),
|
|
65
|
+
("github.com/gin-gonic/gin", "gin"),
|
|
66
|
+
]
|
|
67
|
+
|
|
68
|
+
_CARGO_REFINEMENTS: list[tuple[str, str]] = [
|
|
69
|
+
("axum", "axum"),
|
|
70
|
+
("actix-web", "actix"),
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
_BUILD_GRADLE_KOTLIN_REFINEMENTS: list[tuple[str, str]] = [
|
|
74
|
+
("ktor", "ktor"),
|
|
75
|
+
("spring", "spring-boot"),
|
|
76
|
+
]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _read_safe(path: Path) -> str:
|
|
80
|
+
try:
|
|
81
|
+
return path.read_text(encoding="utf-8", errors="replace")
|
|
82
|
+
except OSError:
|
|
83
|
+
return ""
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _refine_node_framework(project_dir: Path) -> str:
|
|
87
|
+
pkg = project_dir / "package.json"
|
|
88
|
+
content = _read_safe(pkg)
|
|
89
|
+
for pattern, fw in _PACKAGE_JSON_REFINEMENTS:
|
|
90
|
+
if pattern in content:
|
|
91
|
+
return fw
|
|
92
|
+
return "node"
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _refine_python_framework(project_dir: Path) -> str:
|
|
96
|
+
for fname in ("requirements.txt", "pyproject.toml", "setup.py", "Pipfile"):
|
|
97
|
+
content = _read_safe(project_dir / fname)
|
|
98
|
+
if content:
|
|
99
|
+
lower = content.lower()
|
|
100
|
+
for pattern, fw in _REQUIREMENTS_REFINEMENTS:
|
|
101
|
+
if pattern in lower:
|
|
102
|
+
return fw
|
|
103
|
+
return "python"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _refine_go_framework(project_dir: Path) -> str:
|
|
107
|
+
content = _read_safe(project_dir / "go.mod")
|
|
108
|
+
for pattern, fw in _GO_MOD_REFINEMENTS:
|
|
109
|
+
if pattern in content:
|
|
110
|
+
return fw
|
|
111
|
+
return "gin" # most common Go web framework default
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _refine_rust_framework(project_dir: Path) -> str:
|
|
115
|
+
content = _read_safe(project_dir / "Cargo.toml")
|
|
116
|
+
for pattern, fw in _CARGO_REFINEMENTS:
|
|
117
|
+
if pattern in content:
|
|
118
|
+
return fw
|
|
119
|
+
return "actix"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _refine_gradle_framework(project_dir: Path) -> tuple[str, str]:
|
|
123
|
+
content = _read_safe(project_dir / "build.gradle.kts")
|
|
124
|
+
if not content:
|
|
125
|
+
content = _read_safe(project_dir / "build.gradle")
|
|
126
|
+
lower = content.lower()
|
|
127
|
+
for pattern, fw in _BUILD_GRADLE_KOTLIN_REFINEMENTS:
|
|
128
|
+
if pattern in lower:
|
|
129
|
+
if fw == "ktor":
|
|
130
|
+
return ("ktor", "kotlin")
|
|
131
|
+
return ("spring-boot", "java")
|
|
132
|
+
return ("spring-boot", "java")
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def detect_framework(project_dir: str | Path) -> tuple[str, str, str]:
|
|
136
|
+
"""Detect the framework, language and signature file for a project directory.
|
|
137
|
+
|
|
138
|
+
Returns (framework, language, signature_file).
|
|
139
|
+
Returns ("unknown", "unknown", "") if nothing detected.
|
|
140
|
+
"""
|
|
141
|
+
project_dir = Path(project_dir)
|
|
142
|
+
|
|
143
|
+
# Check for *.csproj (glob-style)
|
|
144
|
+
csproj_files = list(project_dir.glob("*.csproj"))
|
|
145
|
+
if csproj_files:
|
|
146
|
+
return ("aspnet", "csharp", str(csproj_files[0]))
|
|
147
|
+
|
|
148
|
+
for sig, fw, lang in SIGNATURE_MAP:
|
|
149
|
+
if sig.startswith("*"):
|
|
150
|
+
# glob handled above
|
|
151
|
+
continue
|
|
152
|
+
sig_path = project_dir / sig
|
|
153
|
+
if sig_path.exists():
|
|
154
|
+
# Refine generic frameworks
|
|
155
|
+
if fw == "node":
|
|
156
|
+
fw = _refine_node_framework(project_dir)
|
|
157
|
+
return (fw, "typescript", str(sig_path))
|
|
158
|
+
if fw == "python":
|
|
159
|
+
fw = _refine_python_framework(project_dir)
|
|
160
|
+
return (fw, "python", str(sig_path))
|
|
161
|
+
if fw == "go":
|
|
162
|
+
fw = _refine_go_framework(project_dir)
|
|
163
|
+
return (fw, "go", str(sig_path))
|
|
164
|
+
if fw == "rust":
|
|
165
|
+
fw = _refine_rust_framework(project_dir)
|
|
166
|
+
return (fw, "rust", str(sig_path))
|
|
167
|
+
if sig in ("build.gradle.kts", "build.gradle"):
|
|
168
|
+
fw, lang = _refine_gradle_framework(project_dir)
|
|
169
|
+
return (fw, lang, str(sig_path))
|
|
170
|
+
return (fw, lang, str(sig_path))
|
|
171
|
+
|
|
172
|
+
# No signature file found — try to guess from code files
|
|
173
|
+
return ("unknown", "unknown", "")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _has_project_signature(directory: Path) -> bool:
|
|
177
|
+
"""Return True if directory looks like a project root (has a build/config file)."""
|
|
178
|
+
signature_files = [
|
|
179
|
+
"pom.xml", "build.gradle", "build.gradle.kts",
|
|
180
|
+
"package.json", "requirements.txt", "pyproject.toml", "setup.py",
|
|
181
|
+
"go.mod", "Gemfile", "composer.json", "Cargo.toml",
|
|
182
|
+
"Package.swift", "angular.json",
|
|
183
|
+
"nuxt.config.ts", "nuxt.config.js",
|
|
184
|
+
"svelte.config.js", "svelte.config.ts",
|
|
185
|
+
]
|
|
186
|
+
for sig in signature_files:
|
|
187
|
+
if (directory / sig).exists():
|
|
188
|
+
return True
|
|
189
|
+
# Check for *.csproj
|
|
190
|
+
if list(directory.glob("*.csproj")):
|
|
191
|
+
return True
|
|
192
|
+
return False
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _detect_language_from_files(directory: Path) -> str:
|
|
196
|
+
"""Detect dominant language by counting code files."""
|
|
197
|
+
counts: dict[str, int] = {}
|
|
198
|
+
ext_to_lang = {
|
|
199
|
+
".java": "java", ".kt": "kotlin",
|
|
200
|
+
".py": "python",
|
|
201
|
+
".ts": "typescript", ".tsx": "typescript", ".js": "javascript", ".jsx": "javascript",
|
|
202
|
+
".go": "go",
|
|
203
|
+
".rb": "ruby",
|
|
204
|
+
".php": "php",
|
|
205
|
+
".cs": "csharp",
|
|
206
|
+
".rs": "rust",
|
|
207
|
+
".swift": "swift",
|
|
208
|
+
}
|
|
209
|
+
try:
|
|
210
|
+
for entry in directory.rglob("*"):
|
|
211
|
+
if entry.is_file() and entry.suffix in ext_to_lang:
|
|
212
|
+
lang = ext_to_lang[entry.suffix]
|
|
213
|
+
counts[lang] = counts.get(lang, 0) + 1
|
|
214
|
+
except (PermissionError, OSError):
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
if not counts:
|
|
218
|
+
return "unknown"
|
|
219
|
+
return max(counts, key=lambda k: counts[k])
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def discover_projects(paths: list[str]) -> list[ProjectInfo]:
|
|
223
|
+
"""Main entry point: given a list of input paths, return discovered projects.
|
|
224
|
+
|
|
225
|
+
Logic:
|
|
226
|
+
- 2+ paths → treat each as a separate project (multi mode)
|
|
227
|
+
- 1 path with signature → single project mode
|
|
228
|
+
- 1 path without signature → scan 1-depth subdirs for projects (multi mode)
|
|
229
|
+
- if subdirs have 2+ projects → multi mode
|
|
230
|
+
- if 0-1 projects but code files exist → generic single mode
|
|
231
|
+
"""
|
|
232
|
+
if len(paths) > 1:
|
|
233
|
+
return _multi_from_paths(paths)
|
|
234
|
+
|
|
235
|
+
single_path = Path(paths[0]).resolve()
|
|
236
|
+
|
|
237
|
+
if not single_path.exists():
|
|
238
|
+
raise FileNotFoundError(f"Path does not exist: {single_path}")
|
|
239
|
+
|
|
240
|
+
if not single_path.is_dir():
|
|
241
|
+
raise ValueError(f"Path must be a directory: {single_path}")
|
|
242
|
+
|
|
243
|
+
if _has_project_signature(single_path):
|
|
244
|
+
return [_build_project_info(single_path, multi=False)]
|
|
245
|
+
|
|
246
|
+
# No signature: scan 1-depth subdirs
|
|
247
|
+
subdirs = [
|
|
248
|
+
d for d in sorted(single_path.iterdir())
|
|
249
|
+
if d.is_dir() and not d.name.startswith(".") and d.name not in {
|
|
250
|
+
"node_modules", "__pycache__", ".git", "dist", "build", "target",
|
|
251
|
+
}
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
subprojects: list[ProjectInfo] = []
|
|
255
|
+
for subdir in subdirs:
|
|
256
|
+
if _has_project_signature(subdir):
|
|
257
|
+
subprojects.append(_build_project_info(subdir, multi=True))
|
|
258
|
+
|
|
259
|
+
if len(subprojects) >= 2:
|
|
260
|
+
return subprojects
|
|
261
|
+
|
|
262
|
+
if len(subprojects) == 1:
|
|
263
|
+
return subprojects
|
|
264
|
+
|
|
265
|
+
# No project signatures found anywhere: try generic mode
|
|
266
|
+
lang = _detect_language_from_files(single_path)
|
|
267
|
+
if lang == "unknown":
|
|
268
|
+
raise ValueError(
|
|
269
|
+
f"No projects found under {single_path}.\n"
|
|
270
|
+
"Make sure the path contains source code or a project file "
|
|
271
|
+
"(pom.xml, package.json, go.mod, etc.)"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
return [ProjectInfo(
|
|
275
|
+
name=single_path.name,
|
|
276
|
+
path=str(single_path),
|
|
277
|
+
framework="generic",
|
|
278
|
+
language=lang,
|
|
279
|
+
signature_file="",
|
|
280
|
+
is_multi=False,
|
|
281
|
+
)]
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
def _multi_from_paths(paths: list[str]) -> list[ProjectInfo]:
|
|
285
|
+
"""Treat each path as an independent project."""
|
|
286
|
+
projects = []
|
|
287
|
+
for p in paths:
|
|
288
|
+
resolved = Path(p).resolve()
|
|
289
|
+
if not resolved.exists():
|
|
290
|
+
raise FileNotFoundError(f"Path does not exist: {resolved}")
|
|
291
|
+
projects.append(_build_project_info(resolved, multi=True))
|
|
292
|
+
return projects
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _build_project_info(directory: Path, multi: bool) -> ProjectInfo:
|
|
296
|
+
framework, language, sig_file = detect_framework(directory)
|
|
297
|
+
if framework == "unknown":
|
|
298
|
+
lang = _detect_language_from_files(directory)
|
|
299
|
+
return ProjectInfo(
|
|
300
|
+
name=directory.name,
|
|
301
|
+
path=str(directory),
|
|
302
|
+
framework="generic",
|
|
303
|
+
language=lang or "unknown",
|
|
304
|
+
signature_file="",
|
|
305
|
+
is_multi=multi,
|
|
306
|
+
)
|
|
307
|
+
return ProjectInfo(
|
|
308
|
+
name=directory.name,
|
|
309
|
+
path=str(directory),
|
|
310
|
+
framework=framework,
|
|
311
|
+
language=language,
|
|
312
|
+
signature_file=sig_file,
|
|
313
|
+
is_multi=multi,
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def extract_convention_routes(project: ProjectInfo) -> list[str]:
|
|
318
|
+
"""Extract file-system based routes for Next.js / Nuxt / SvelteKit.
|
|
319
|
+
|
|
320
|
+
Returns a list of route path strings. Actual RouteInfo objects are built in extract/routes.py.
|
|
321
|
+
This is a discovery-time stub that returns raw route strings.
|
|
322
|
+
"""
|
|
323
|
+
root = Path(project.path)
|
|
324
|
+
routes: list[str] = []
|
|
325
|
+
|
|
326
|
+
if project.framework in ("nextjs", "next"):
|
|
327
|
+
# Pages Router: pages/**/*.{ts,tsx,js,jsx} → route
|
|
328
|
+
pages_dir = root / "pages"
|
|
329
|
+
if pages_dir.exists():
|
|
330
|
+
routes.extend(_fs_routes_from_dir(pages_dir, pages_dir))
|
|
331
|
+
# App Router: app/**/page.{ts,tsx,js,jsx} → route
|
|
332
|
+
app_dir = root / "app"
|
|
333
|
+
if app_dir.exists():
|
|
334
|
+
for f in app_dir.rglob("page.tsx"):
|
|
335
|
+
routes.append(_app_router_path(f, app_dir))
|
|
336
|
+
for f in app_dir.rglob("page.ts"):
|
|
337
|
+
routes.append(_app_router_path(f, app_dir))
|
|
338
|
+
|
|
339
|
+
elif project.framework == "nuxt":
|
|
340
|
+
pages_dir = root / "pages"
|
|
341
|
+
if pages_dir.exists():
|
|
342
|
+
routes.extend(_fs_routes_from_dir(pages_dir, pages_dir))
|
|
343
|
+
|
|
344
|
+
elif project.framework == "sveltekit":
|
|
345
|
+
routes_dir = root / "src" / "routes"
|
|
346
|
+
if routes_dir.exists():
|
|
347
|
+
for f in routes_dir.rglob("+page.svelte"):
|
|
348
|
+
route = "/" + str(f.parent.relative_to(routes_dir)).replace(os.sep, "/")
|
|
349
|
+
if route == "/.":
|
|
350
|
+
route = "/"
|
|
351
|
+
routes.append(route)
|
|
352
|
+
|
|
353
|
+
return routes
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _fs_routes_from_dir(file_dir: Path, base_dir: Path) -> list[str]:
|
|
357
|
+
"""Convert Next.js / Nuxt pages directory files to route strings."""
|
|
358
|
+
routes = []
|
|
359
|
+
for f in file_dir.rglob("*"):
|
|
360
|
+
if f.is_file() and f.suffix in {".tsx", ".ts", ".jsx", ".js", ".vue"}:
|
|
361
|
+
rel = f.relative_to(base_dir)
|
|
362
|
+
parts = list(rel.parts)
|
|
363
|
+
# Remove extension from last part
|
|
364
|
+
last = parts[-1]
|
|
365
|
+
stem = last.rsplit(".", 1)[0]
|
|
366
|
+
# Skip _app, _document, _error in Next.js
|
|
367
|
+
if stem.startswith("_"):
|
|
368
|
+
continue
|
|
369
|
+
parts[-1] = stem
|
|
370
|
+
# index → ""
|
|
371
|
+
if parts[-1] == "index":
|
|
372
|
+
parts = parts[:-1]
|
|
373
|
+
route = "/" + "/".join(parts)
|
|
374
|
+
# Convert [param] → :param, [...slug] → *
|
|
375
|
+
route = re.sub(r"\[\.\.\.(\w+)\]", "*", route)
|
|
376
|
+
route = re.sub(r"\[(\w+)\]", r":\1", route)
|
|
377
|
+
routes.append(route)
|
|
378
|
+
return routes
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def _app_router_path(page_file: Path, app_dir: Path) -> str:
|
|
382
|
+
rel = page_file.parent.relative_to(app_dir)
|
|
383
|
+
parts = list(rel.parts)
|
|
384
|
+
route = "/" + "/".join(parts) if parts else "/"
|
|
385
|
+
route = re.sub(r"\(.*?\)/", "", route) # route groups: (group)/
|
|
386
|
+
route = re.sub(r"\[\.\.\.(\w+)\]", "*", route)
|
|
387
|
+
route = re.sub(r"\[(\w+)\]", r":\1", route)
|
|
388
|
+
return route or "/"
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""Recursive file collector with ignore patterns and hash caching."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Iterator
|
|
10
|
+
|
|
11
|
+
IGNORE_DIRS: set[str] = {
|
|
12
|
+
"node_modules",
|
|
13
|
+
".git",
|
|
14
|
+
".next",
|
|
15
|
+
".nuxt",
|
|
16
|
+
".svelte-kit",
|
|
17
|
+
"__pycache__",
|
|
18
|
+
".venv",
|
|
19
|
+
"venv",
|
|
20
|
+
"env",
|
|
21
|
+
".env",
|
|
22
|
+
"dist",
|
|
23
|
+
"build",
|
|
24
|
+
"out",
|
|
25
|
+
".output",
|
|
26
|
+
"coverage",
|
|
27
|
+
".turbo",
|
|
28
|
+
".vercel",
|
|
29
|
+
".codebeacon",
|
|
30
|
+
".codesight",
|
|
31
|
+
".ai-codex",
|
|
32
|
+
"vendor",
|
|
33
|
+
".cache",
|
|
34
|
+
".parcel-cache",
|
|
35
|
+
".gradle",
|
|
36
|
+
"target", # Maven/Cargo build output
|
|
37
|
+
".idea",
|
|
38
|
+
".vscode",
|
|
39
|
+
"tmp",
|
|
40
|
+
"temp",
|
|
41
|
+
".DS_Store",
|
|
42
|
+
"bin",
|
|
43
|
+
"obj", # .NET build output
|
|
44
|
+
".bundle", # Ruby bundler
|
|
45
|
+
"public", # usually static assets
|
|
46
|
+
".terraform",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
CODE_EXTENSIONS: set[str] = {
|
|
50
|
+
".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
|
|
51
|
+
".py",
|
|
52
|
+
".go",
|
|
53
|
+
".vue", ".svelte",
|
|
54
|
+
".rb",
|
|
55
|
+
".java", ".kt",
|
|
56
|
+
".rs",
|
|
57
|
+
".php",
|
|
58
|
+
".swift",
|
|
59
|
+
".cs",
|
|
60
|
+
".ex", ".exs",
|
|
61
|
+
".dart",
|
|
62
|
+
".scala",
|
|
63
|
+
".clj",
|
|
64
|
+
".hs",
|
|
65
|
+
".graphql", ".gql",
|
|
66
|
+
".proto",
|
|
67
|
+
".sql",
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def read_ignore_file(root: str | Path, filename: str = ".codebeaconignore") -> list[str]:
|
|
72
|
+
"""Read .codebeaconignore at the project root and return ignore patterns."""
|
|
73
|
+
ignore_path = Path(root) / filename
|
|
74
|
+
try:
|
|
75
|
+
content = ignore_path.read_text(encoding="utf-8")
|
|
76
|
+
return [
|
|
77
|
+
line.strip()
|
|
78
|
+
for line in content.splitlines()
|
|
79
|
+
if line.strip() and not line.strip().startswith("#")
|
|
80
|
+
]
|
|
81
|
+
except (FileNotFoundError, OSError):
|
|
82
|
+
return []
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _should_ignore_dir(name: str, extra_ignore: set[str]) -> bool:
|
|
86
|
+
if name in IGNORE_DIRS:
|
|
87
|
+
return True
|
|
88
|
+
if name in extra_ignore:
|
|
89
|
+
return True
|
|
90
|
+
if name.startswith("."):
|
|
91
|
+
# Hidden dirs — skip most except known config dirs
|
|
92
|
+
return True
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def collect_files(
|
|
97
|
+
root: str | Path,
|
|
98
|
+
max_depth: int = 15,
|
|
99
|
+
extra_ignore: list[str] | None = None,
|
|
100
|
+
) -> list[str]:
|
|
101
|
+
"""Recursively collect code files under root.
|
|
102
|
+
|
|
103
|
+
Returns absolute paths sorted by directory then filename.
|
|
104
|
+
"""
|
|
105
|
+
root = Path(root).resolve()
|
|
106
|
+
ignore_patterns = read_ignore_file(root)
|
|
107
|
+
if extra_ignore:
|
|
108
|
+
ignore_patterns.extend(extra_ignore)
|
|
109
|
+
|
|
110
|
+
extra_ignore_set: set[str] = set()
|
|
111
|
+
for p in ignore_patterns:
|
|
112
|
+
# Simple patterns: strip leading / and trailing /* or /**
|
|
113
|
+
clean = p.lstrip("/").rstrip("/").rstrip("*").rstrip("/")
|
|
114
|
+
if clean:
|
|
115
|
+
extra_ignore_set.add(clean)
|
|
116
|
+
|
|
117
|
+
result: list[str] = []
|
|
118
|
+
_walk(root, root, 0, max_depth, extra_ignore_set, result)
|
|
119
|
+
return sorted(result)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _walk(
|
|
123
|
+
base: Path,
|
|
124
|
+
current: Path,
|
|
125
|
+
depth: int,
|
|
126
|
+
max_depth: int,
|
|
127
|
+
extra_ignore: set[str],
|
|
128
|
+
result: list[str],
|
|
129
|
+
) -> None:
|
|
130
|
+
if depth > max_depth:
|
|
131
|
+
return
|
|
132
|
+
try:
|
|
133
|
+
entries = sorted(current.iterdir(), key=lambda e: (e.is_file(), e.name))
|
|
134
|
+
except PermissionError:
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
for entry in entries:
|
|
138
|
+
if entry.is_symlink():
|
|
139
|
+
continue
|
|
140
|
+
if entry.is_dir():
|
|
141
|
+
if not _should_ignore_dir(entry.name, extra_ignore):
|
|
142
|
+
_walk(base, entry, depth + 1, max_depth, extra_ignore, result)
|
|
143
|
+
elif entry.is_file():
|
|
144
|
+
if entry.suffix in CODE_EXTENSIONS:
|
|
145
|
+
result.append(str(entry))
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def hash_file(path: str | Path) -> str:
|
|
149
|
+
"""Return SHA-256 hex digest (first 12 chars) of file content."""
|
|
150
|
+
try:
|
|
151
|
+
content = Path(path).read_bytes()
|
|
152
|
+
return hashlib.sha256(content).hexdigest()[:12]
|
|
153
|
+
except OSError:
|
|
154
|
+
return ""
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def load_hash_cache(cache_dir: str | Path) -> dict:
|
|
158
|
+
"""Load the file hash cache from cache_dir/cache.json."""
|
|
159
|
+
cache_path = Path(cache_dir) / "cache.json"
|
|
160
|
+
try:
|
|
161
|
+
return json.loads(cache_path.read_text())
|
|
162
|
+
except (FileNotFoundError, json.JSONDecodeError, OSError):
|
|
163
|
+
return {"version": 1, "hashes": {}}
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def save_hash_cache(cache_dir: str | Path, cache: dict) -> None:
|
|
167
|
+
"""Persist the hash cache; non-fatal if it fails."""
|
|
168
|
+
try:
|
|
169
|
+
cache_path = Path(cache_dir) / "cache.json"
|
|
170
|
+
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
|
171
|
+
cache_path.write_text(json.dumps(cache, indent=2))
|
|
172
|
+
except OSError:
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def get_changed_files(files: list[str], cache: dict) -> tuple[list[str], dict]:
|
|
177
|
+
"""Return files whose hash differs from cache, and the updated hash map."""
|
|
178
|
+
hashes = cache.get("hashes", {})
|
|
179
|
+
changed: list[str] = []
|
|
180
|
+
new_hashes: dict[str, str] = dict(hashes)
|
|
181
|
+
|
|
182
|
+
for f in files:
|
|
183
|
+
h = hash_file(f)
|
|
184
|
+
if hashes.get(f) != h:
|
|
185
|
+
changed.append(f)
|
|
186
|
+
new_hashes[f] = h
|
|
187
|
+
|
|
188
|
+
# Remove entries for files that no longer exist
|
|
189
|
+
existing = set(files)
|
|
190
|
+
new_hashes = {k: v for k, v in new_hashes.items() if k in existing}
|
|
191
|
+
|
|
192
|
+
return changed, {"version": 1, "hashes": new_hashes}
|
|
File without changes
|