codebeacon 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. codebeacon/__init__.py +1 -0
  2. codebeacon/__main__.py +3 -0
  3. codebeacon/cache.py +136 -0
  4. codebeacon/cli.py +391 -0
  5. codebeacon/common/__init__.py +0 -0
  6. codebeacon/common/filters.py +170 -0
  7. codebeacon/common/symbols.py +121 -0
  8. codebeacon/common/types.py +98 -0
  9. codebeacon/config.py +144 -0
  10. codebeacon/contextmap/__init__.py +0 -0
  11. codebeacon/contextmap/generator.py +602 -0
  12. codebeacon/discover/__init__.py +0 -0
  13. codebeacon/discover/detector.py +388 -0
  14. codebeacon/discover/scanner.py +192 -0
  15. codebeacon/export/__init__.py +0 -0
  16. codebeacon/export/mcp.py +515 -0
  17. codebeacon/export/obsidian.py +812 -0
  18. codebeacon/extract/__init__.py +22 -0
  19. codebeacon/extract/base.py +372 -0
  20. codebeacon/extract/components.py +357 -0
  21. codebeacon/extract/dependencies.py +140 -0
  22. codebeacon/extract/entities.py +575 -0
  23. codebeacon/extract/queries/README.md +116 -0
  24. codebeacon/extract/queries/actix.scm +115 -0
  25. codebeacon/extract/queries/angular.scm +155 -0
  26. codebeacon/extract/queries/aspnet.scm +159 -0
  27. codebeacon/extract/queries/django.scm +122 -0
  28. codebeacon/extract/queries/express.scm +124 -0
  29. codebeacon/extract/queries/fastapi.scm +152 -0
  30. codebeacon/extract/queries/flask.scm +120 -0
  31. codebeacon/extract/queries/gin.scm +142 -0
  32. codebeacon/extract/queries/ktor.scm +144 -0
  33. codebeacon/extract/queries/laravel.scm +172 -0
  34. codebeacon/extract/queries/nestjs.scm +183 -0
  35. codebeacon/extract/queries/rails.scm +114 -0
  36. codebeacon/extract/queries/react.scm +111 -0
  37. codebeacon/extract/queries/spring_boot.scm +204 -0
  38. codebeacon/extract/queries/svelte.scm +73 -0
  39. codebeacon/extract/queries/vapor.scm +130 -0
  40. codebeacon/extract/queries/vue.scm +123 -0
  41. codebeacon/extract/routes.py +910 -0
  42. codebeacon/extract/semantic.py +280 -0
  43. codebeacon/extract/services.py +597 -0
  44. codebeacon/graph/__init__.py +1 -0
  45. codebeacon/graph/analyze.py +281 -0
  46. codebeacon/graph/build.py +320 -0
  47. codebeacon/graph/cluster.py +160 -0
  48. codebeacon/graph/enrich.py +206 -0
  49. codebeacon/skill/SKILL.md +127 -0
  50. codebeacon/wave.py +292 -0
  51. codebeacon/wiki/__init__.py +0 -0
  52. codebeacon/wiki/generator.py +376 -0
  53. codebeacon/wiki/index.py +95 -0
  54. codebeacon/wiki/templates.py +467 -0
  55. codebeacon-0.1.2.dist-info/METADATA +319 -0
  56. codebeacon-0.1.2.dist-info/RECORD +59 -0
  57. codebeacon-0.1.2.dist-info/WHEEL +4 -0
  58. codebeacon-0.1.2.dist-info/entry_points.txt +2 -0
  59. codebeacon-0.1.2.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,388 @@
1
+ """Framework auto-detection and multi/single project determination."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import re
7
+ from pathlib import Path
8
+ from typing import Optional
9
+
10
+ from codebeacon.common.types import ProjectInfo
11
+
12
+
13
+ # ── Signature files → (framework, language) ─────────────────────────────────
14
+
15
+ SIGNATURE_MAP: list[tuple[str, str, str]] = [
16
+ # (filename_or_glob, framework, primary_language)
17
+ # Order matters: more specific first
18
+ ("angular.json", "angular", "typescript"),
19
+ ("nuxt.config.ts", "nuxt", "typescript"),
20
+ ("nuxt.config.js", "nuxt", "typescript"),
21
+ ("svelte.config.js", "sveltekit", "typescript"),
22
+ ("svelte.config.ts", "sveltekit", "typescript"),
23
+ ("build.gradle.kts", "ktor", "kotlin"),
24
+ ("build.gradle", "spring-boot", "java"), # could be Ktor too, check below
25
+ ("pom.xml", "spring-boot", "java"),
26
+ ("Package.swift", "vapor", "swift"),
27
+ ("Cargo.toml", "rust", "rust"), # actix/axum refined below
28
+ ("composer.json", "laravel", "php"),
29
+ ("Gemfile", "rails", "ruby"),
30
+ ("go.mod", "go", "go"), # gin/echo/fiber refined below
31
+ ("package.json", "node", "typescript"), # express/nest/next refined below
32
+ ("requirements.txt", "python", "python"), # fastapi/django/flask refined below
33
+ ("pyproject.toml", "python", "python"), # fastapi/django/flask refined below
34
+ ("setup.py", "python", "python"),
35
+ ("*.csproj", "aspnet", "csharp"),
36
+ ]
37
+
38
+ # Refinement patterns: read content of specific files to narrow down framework
39
+ _PACKAGE_JSON_REFINEMENTS: list[tuple[str, str]] = [
40
+ # (pattern_in_deps_or_scripts, framework)
41
+ # Order matters: more specific first
42
+ ("@nestjs/core", "nestjs"),
43
+ ('"next"', "nextjs"), # "next": "..." — avoid matching "nextjs" etc.
44
+ ("nuxt", "nuxt"),
45
+ ("@sveltejs/kit", "sveltekit"),
46
+ ("@angular/core", "angular"),
47
+ ("fastify", "fastify"),
48
+ ("koa", "koa"),
49
+ ("express", "express"),
50
+ ('"react"', "react"), # plain React (CRA, Vite, etc.)
51
+ ('"react-dom"', "react"),
52
+ ]
53
+
54
+ _REQUIREMENTS_REFINEMENTS: list[tuple[str, str]] = [
55
+ ("fastapi", "fastapi"),
56
+ ("django", "django"),
57
+ ("flask", "flask"),
58
+ ("tornado", "tornado"),
59
+ ("aiohttp", "aiohttp"),
60
+ ]
61
+
62
+ _GO_MOD_REFINEMENTS: list[tuple[str, str]] = [
63
+ ("github.com/gofiber/fiber", "fiber"),
64
+ ("github.com/labstack/echo", "echo"),
65
+ ("github.com/gin-gonic/gin", "gin"),
66
+ ]
67
+
68
+ _CARGO_REFINEMENTS: list[tuple[str, str]] = [
69
+ ("axum", "axum"),
70
+ ("actix-web", "actix"),
71
+ ]
72
+
73
+ _BUILD_GRADLE_KOTLIN_REFINEMENTS: list[tuple[str, str]] = [
74
+ ("ktor", "ktor"),
75
+ ("spring", "spring-boot"),
76
+ ]
77
+
78
+
79
+ def _read_safe(path: Path) -> str:
80
+ try:
81
+ return path.read_text(encoding="utf-8", errors="replace")
82
+ except OSError:
83
+ return ""
84
+
85
+
86
+ def _refine_node_framework(project_dir: Path) -> str:
87
+ pkg = project_dir / "package.json"
88
+ content = _read_safe(pkg)
89
+ for pattern, fw in _PACKAGE_JSON_REFINEMENTS:
90
+ if pattern in content:
91
+ return fw
92
+ return "node"
93
+
94
+
95
+ def _refine_python_framework(project_dir: Path) -> str:
96
+ for fname in ("requirements.txt", "pyproject.toml", "setup.py", "Pipfile"):
97
+ content = _read_safe(project_dir / fname)
98
+ if content:
99
+ lower = content.lower()
100
+ for pattern, fw in _REQUIREMENTS_REFINEMENTS:
101
+ if pattern in lower:
102
+ return fw
103
+ return "python"
104
+
105
+
106
+ def _refine_go_framework(project_dir: Path) -> str:
107
+ content = _read_safe(project_dir / "go.mod")
108
+ for pattern, fw in _GO_MOD_REFINEMENTS:
109
+ if pattern in content:
110
+ return fw
111
+ return "gin" # most common Go web framework default
112
+
113
+
114
+ def _refine_rust_framework(project_dir: Path) -> str:
115
+ content = _read_safe(project_dir / "Cargo.toml")
116
+ for pattern, fw in _CARGO_REFINEMENTS:
117
+ if pattern in content:
118
+ return fw
119
+ return "actix"
120
+
121
+
122
+ def _refine_gradle_framework(project_dir: Path) -> tuple[str, str]:
123
+ content = _read_safe(project_dir / "build.gradle.kts")
124
+ if not content:
125
+ content = _read_safe(project_dir / "build.gradle")
126
+ lower = content.lower()
127
+ for pattern, fw in _BUILD_GRADLE_KOTLIN_REFINEMENTS:
128
+ if pattern in lower:
129
+ if fw == "ktor":
130
+ return ("ktor", "kotlin")
131
+ return ("spring-boot", "java")
132
+ return ("spring-boot", "java")
133
+
134
+
135
+ def detect_framework(project_dir: str | Path) -> tuple[str, str, str]:
136
+ """Detect the framework, language and signature file for a project directory.
137
+
138
+ Returns (framework, language, signature_file).
139
+ Returns ("unknown", "unknown", "") if nothing detected.
140
+ """
141
+ project_dir = Path(project_dir)
142
+
143
+ # Check for *.csproj (glob-style)
144
+ csproj_files = list(project_dir.glob("*.csproj"))
145
+ if csproj_files:
146
+ return ("aspnet", "csharp", str(csproj_files[0]))
147
+
148
+ for sig, fw, lang in SIGNATURE_MAP:
149
+ if sig.startswith("*"):
150
+ # glob handled above
151
+ continue
152
+ sig_path = project_dir / sig
153
+ if sig_path.exists():
154
+ # Refine generic frameworks
155
+ if fw == "node":
156
+ fw = _refine_node_framework(project_dir)
157
+ return (fw, "typescript", str(sig_path))
158
+ if fw == "python":
159
+ fw = _refine_python_framework(project_dir)
160
+ return (fw, "python", str(sig_path))
161
+ if fw == "go":
162
+ fw = _refine_go_framework(project_dir)
163
+ return (fw, "go", str(sig_path))
164
+ if fw == "rust":
165
+ fw = _refine_rust_framework(project_dir)
166
+ return (fw, "rust", str(sig_path))
167
+ if sig in ("build.gradle.kts", "build.gradle"):
168
+ fw, lang = _refine_gradle_framework(project_dir)
169
+ return (fw, lang, str(sig_path))
170
+ return (fw, lang, str(sig_path))
171
+
172
+ # No signature file found — try to guess from code files
173
+ return ("unknown", "unknown", "")
174
+
175
+
176
+ def _has_project_signature(directory: Path) -> bool:
177
+ """Return True if directory looks like a project root (has a build/config file)."""
178
+ signature_files = [
179
+ "pom.xml", "build.gradle", "build.gradle.kts",
180
+ "package.json", "requirements.txt", "pyproject.toml", "setup.py",
181
+ "go.mod", "Gemfile", "composer.json", "Cargo.toml",
182
+ "Package.swift", "angular.json",
183
+ "nuxt.config.ts", "nuxt.config.js",
184
+ "svelte.config.js", "svelte.config.ts",
185
+ ]
186
+ for sig in signature_files:
187
+ if (directory / sig).exists():
188
+ return True
189
+ # Check for *.csproj
190
+ if list(directory.glob("*.csproj")):
191
+ return True
192
+ return False
193
+
194
+
195
+ def _detect_language_from_files(directory: Path) -> str:
196
+ """Detect dominant language by counting code files."""
197
+ counts: dict[str, int] = {}
198
+ ext_to_lang = {
199
+ ".java": "java", ".kt": "kotlin",
200
+ ".py": "python",
201
+ ".ts": "typescript", ".tsx": "typescript", ".js": "javascript", ".jsx": "javascript",
202
+ ".go": "go",
203
+ ".rb": "ruby",
204
+ ".php": "php",
205
+ ".cs": "csharp",
206
+ ".rs": "rust",
207
+ ".swift": "swift",
208
+ }
209
+ try:
210
+ for entry in directory.rglob("*"):
211
+ if entry.is_file() and entry.suffix in ext_to_lang:
212
+ lang = ext_to_lang[entry.suffix]
213
+ counts[lang] = counts.get(lang, 0) + 1
214
+ except (PermissionError, OSError):
215
+ pass
216
+
217
+ if not counts:
218
+ return "unknown"
219
+ return max(counts, key=lambda k: counts[k])
220
+
221
+
222
+ def discover_projects(paths: list[str]) -> list[ProjectInfo]:
223
+ """Main entry point: given a list of input paths, return discovered projects.
224
+
225
+ Logic:
226
+ - 2+ paths → treat each as a separate project (multi mode)
227
+ - 1 path with signature → single project mode
228
+ - 1 path without signature → scan 1-depth subdirs for projects (multi mode)
229
+ - if subdirs have 2+ projects → multi mode
230
+ - if 0-1 projects but code files exist → generic single mode
231
+ """
232
+ if len(paths) > 1:
233
+ return _multi_from_paths(paths)
234
+
235
+ single_path = Path(paths[0]).resolve()
236
+
237
+ if not single_path.exists():
238
+ raise FileNotFoundError(f"Path does not exist: {single_path}")
239
+
240
+ if not single_path.is_dir():
241
+ raise ValueError(f"Path must be a directory: {single_path}")
242
+
243
+ if _has_project_signature(single_path):
244
+ return [_build_project_info(single_path, multi=False)]
245
+
246
+ # No signature: scan 1-depth subdirs
247
+ subdirs = [
248
+ d for d in sorted(single_path.iterdir())
249
+ if d.is_dir() and not d.name.startswith(".") and d.name not in {
250
+ "node_modules", "__pycache__", ".git", "dist", "build", "target",
251
+ }
252
+ ]
253
+
254
+ subprojects: list[ProjectInfo] = []
255
+ for subdir in subdirs:
256
+ if _has_project_signature(subdir):
257
+ subprojects.append(_build_project_info(subdir, multi=True))
258
+
259
+ if len(subprojects) >= 2:
260
+ return subprojects
261
+
262
+ if len(subprojects) == 1:
263
+ return subprojects
264
+
265
+ # No project signatures found anywhere: try generic mode
266
+ lang = _detect_language_from_files(single_path)
267
+ if lang == "unknown":
268
+ raise ValueError(
269
+ f"No projects found under {single_path}.\n"
270
+ "Make sure the path contains source code or a project file "
271
+ "(pom.xml, package.json, go.mod, etc.)"
272
+ )
273
+
274
+ return [ProjectInfo(
275
+ name=single_path.name,
276
+ path=str(single_path),
277
+ framework="generic",
278
+ language=lang,
279
+ signature_file="",
280
+ is_multi=False,
281
+ )]
282
+
283
+
284
+ def _multi_from_paths(paths: list[str]) -> list[ProjectInfo]:
285
+ """Treat each path as an independent project."""
286
+ projects = []
287
+ for p in paths:
288
+ resolved = Path(p).resolve()
289
+ if not resolved.exists():
290
+ raise FileNotFoundError(f"Path does not exist: {resolved}")
291
+ projects.append(_build_project_info(resolved, multi=True))
292
+ return projects
293
+
294
+
295
+ def _build_project_info(directory: Path, multi: bool) -> ProjectInfo:
296
+ framework, language, sig_file = detect_framework(directory)
297
+ if framework == "unknown":
298
+ lang = _detect_language_from_files(directory)
299
+ return ProjectInfo(
300
+ name=directory.name,
301
+ path=str(directory),
302
+ framework="generic",
303
+ language=lang or "unknown",
304
+ signature_file="",
305
+ is_multi=multi,
306
+ )
307
+ return ProjectInfo(
308
+ name=directory.name,
309
+ path=str(directory),
310
+ framework=framework,
311
+ language=language,
312
+ signature_file=sig_file,
313
+ is_multi=multi,
314
+ )
315
+
316
+
317
+ def extract_convention_routes(project: ProjectInfo) -> list[str]:
318
+ """Extract file-system based routes for Next.js / Nuxt / SvelteKit.
319
+
320
+ Returns a list of route path strings. Actual RouteInfo objects are built in extract/routes.py.
321
+ This is a discovery-time stub that returns raw route strings.
322
+ """
323
+ root = Path(project.path)
324
+ routes: list[str] = []
325
+
326
+ if project.framework in ("nextjs", "next"):
327
+ # Pages Router: pages/**/*.{ts,tsx,js,jsx} → route
328
+ pages_dir = root / "pages"
329
+ if pages_dir.exists():
330
+ routes.extend(_fs_routes_from_dir(pages_dir, pages_dir))
331
+ # App Router: app/**/page.{ts,tsx,js,jsx} → route
332
+ app_dir = root / "app"
333
+ if app_dir.exists():
334
+ for f in app_dir.rglob("page.tsx"):
335
+ routes.append(_app_router_path(f, app_dir))
336
+ for f in app_dir.rglob("page.ts"):
337
+ routes.append(_app_router_path(f, app_dir))
338
+
339
+ elif project.framework == "nuxt":
340
+ pages_dir = root / "pages"
341
+ if pages_dir.exists():
342
+ routes.extend(_fs_routes_from_dir(pages_dir, pages_dir))
343
+
344
+ elif project.framework == "sveltekit":
345
+ routes_dir = root / "src" / "routes"
346
+ if routes_dir.exists():
347
+ for f in routes_dir.rglob("+page.svelte"):
348
+ route = "/" + str(f.parent.relative_to(routes_dir)).replace(os.sep, "/")
349
+ if route == "/.":
350
+ route = "/"
351
+ routes.append(route)
352
+
353
+ return routes
354
+
355
+
356
+ def _fs_routes_from_dir(file_dir: Path, base_dir: Path) -> list[str]:
357
+ """Convert Next.js / Nuxt pages directory files to route strings."""
358
+ routes = []
359
+ for f in file_dir.rglob("*"):
360
+ if f.is_file() and f.suffix in {".tsx", ".ts", ".jsx", ".js", ".vue"}:
361
+ rel = f.relative_to(base_dir)
362
+ parts = list(rel.parts)
363
+ # Remove extension from last part
364
+ last = parts[-1]
365
+ stem = last.rsplit(".", 1)[0]
366
+ # Skip _app, _document, _error in Next.js
367
+ if stem.startswith("_"):
368
+ continue
369
+ parts[-1] = stem
370
+ # index → ""
371
+ if parts[-1] == "index":
372
+ parts = parts[:-1]
373
+ route = "/" + "/".join(parts)
374
+ # Convert [param] → :param, [...slug] → *
375
+ route = re.sub(r"\[\.\.\.(\w+)\]", "*", route)
376
+ route = re.sub(r"\[(\w+)\]", r":\1", route)
377
+ routes.append(route)
378
+ return routes
379
+
380
+
381
+ def _app_router_path(page_file: Path, app_dir: Path) -> str:
382
+ rel = page_file.parent.relative_to(app_dir)
383
+ parts = list(rel.parts)
384
+ route = "/" + "/".join(parts) if parts else "/"
385
+ route = re.sub(r"\(.*?\)/", "", route) # route groups: (group)/
386
+ route = re.sub(r"\[\.\.\.(\w+)\]", "*", route)
387
+ route = re.sub(r"\[(\w+)\]", r":\1", route)
388
+ return route or "/"
@@ -0,0 +1,192 @@
1
+ """Recursive file collector with ignore patterns and hash caching."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+ import json
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Iterator
10
+
11
+ IGNORE_DIRS: set[str] = {
12
+ "node_modules",
13
+ ".git",
14
+ ".next",
15
+ ".nuxt",
16
+ ".svelte-kit",
17
+ "__pycache__",
18
+ ".venv",
19
+ "venv",
20
+ "env",
21
+ ".env",
22
+ "dist",
23
+ "build",
24
+ "out",
25
+ ".output",
26
+ "coverage",
27
+ ".turbo",
28
+ ".vercel",
29
+ ".codebeacon",
30
+ ".codesight",
31
+ ".ai-codex",
32
+ "vendor",
33
+ ".cache",
34
+ ".parcel-cache",
35
+ ".gradle",
36
+ "target", # Maven/Cargo build output
37
+ ".idea",
38
+ ".vscode",
39
+ "tmp",
40
+ "temp",
41
+ ".DS_Store",
42
+ "bin",
43
+ "obj", # .NET build output
44
+ ".bundle", # Ruby bundler
45
+ "public", # usually static assets
46
+ ".terraform",
47
+ }
48
+
49
+ CODE_EXTENSIONS: set[str] = {
50
+ ".ts", ".tsx", ".js", ".jsx", ".mjs", ".cjs",
51
+ ".py",
52
+ ".go",
53
+ ".vue", ".svelte",
54
+ ".rb",
55
+ ".java", ".kt",
56
+ ".rs",
57
+ ".php",
58
+ ".swift",
59
+ ".cs",
60
+ ".ex", ".exs",
61
+ ".dart",
62
+ ".scala",
63
+ ".clj",
64
+ ".hs",
65
+ ".graphql", ".gql",
66
+ ".proto",
67
+ ".sql",
68
+ }
69
+
70
+
71
+ def read_ignore_file(root: str | Path, filename: str = ".codebeaconignore") -> list[str]:
72
+ """Read .codebeaconignore at the project root and return ignore patterns."""
73
+ ignore_path = Path(root) / filename
74
+ try:
75
+ content = ignore_path.read_text(encoding="utf-8")
76
+ return [
77
+ line.strip()
78
+ for line in content.splitlines()
79
+ if line.strip() and not line.strip().startswith("#")
80
+ ]
81
+ except (FileNotFoundError, OSError):
82
+ return []
83
+
84
+
85
+ def _should_ignore_dir(name: str, extra_ignore: set[str]) -> bool:
86
+ if name in IGNORE_DIRS:
87
+ return True
88
+ if name in extra_ignore:
89
+ return True
90
+ if name.startswith("."):
91
+ # Hidden dirs — skip most except known config dirs
92
+ return True
93
+ return False
94
+
95
+
96
+ def collect_files(
97
+ root: str | Path,
98
+ max_depth: int = 15,
99
+ extra_ignore: list[str] | None = None,
100
+ ) -> list[str]:
101
+ """Recursively collect code files under root.
102
+
103
+ Returns absolute paths sorted by directory then filename.
104
+ """
105
+ root = Path(root).resolve()
106
+ ignore_patterns = read_ignore_file(root)
107
+ if extra_ignore:
108
+ ignore_patterns.extend(extra_ignore)
109
+
110
+ extra_ignore_set: set[str] = set()
111
+ for p in ignore_patterns:
112
+ # Simple patterns: strip leading / and trailing /* or /**
113
+ clean = p.lstrip("/").rstrip("/").rstrip("*").rstrip("/")
114
+ if clean:
115
+ extra_ignore_set.add(clean)
116
+
117
+ result: list[str] = []
118
+ _walk(root, root, 0, max_depth, extra_ignore_set, result)
119
+ return sorted(result)
120
+
121
+
122
+ def _walk(
123
+ base: Path,
124
+ current: Path,
125
+ depth: int,
126
+ max_depth: int,
127
+ extra_ignore: set[str],
128
+ result: list[str],
129
+ ) -> None:
130
+ if depth > max_depth:
131
+ return
132
+ try:
133
+ entries = sorted(current.iterdir(), key=lambda e: (e.is_file(), e.name))
134
+ except PermissionError:
135
+ return
136
+
137
+ for entry in entries:
138
+ if entry.is_symlink():
139
+ continue
140
+ if entry.is_dir():
141
+ if not _should_ignore_dir(entry.name, extra_ignore):
142
+ _walk(base, entry, depth + 1, max_depth, extra_ignore, result)
143
+ elif entry.is_file():
144
+ if entry.suffix in CODE_EXTENSIONS:
145
+ result.append(str(entry))
146
+
147
+
148
+ def hash_file(path: str | Path) -> str:
149
+ """Return SHA-256 hex digest (first 12 chars) of file content."""
150
+ try:
151
+ content = Path(path).read_bytes()
152
+ return hashlib.sha256(content).hexdigest()[:12]
153
+ except OSError:
154
+ return ""
155
+
156
+
157
+ def load_hash_cache(cache_dir: str | Path) -> dict:
158
+ """Load the file hash cache from cache_dir/cache.json."""
159
+ cache_path = Path(cache_dir) / "cache.json"
160
+ try:
161
+ return json.loads(cache_path.read_text())
162
+ except (FileNotFoundError, json.JSONDecodeError, OSError):
163
+ return {"version": 1, "hashes": {}}
164
+
165
+
166
+ def save_hash_cache(cache_dir: str | Path, cache: dict) -> None:
167
+ """Persist the hash cache; non-fatal if it fails."""
168
+ try:
169
+ cache_path = Path(cache_dir) / "cache.json"
170
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
171
+ cache_path.write_text(json.dumps(cache, indent=2))
172
+ except OSError:
173
+ pass
174
+
175
+
176
+ def get_changed_files(files: list[str], cache: dict) -> tuple[list[str], dict]:
177
+ """Return files whose hash differs from cache, and the updated hash map."""
178
+ hashes = cache.get("hashes", {})
179
+ changed: list[str] = []
180
+ new_hashes: dict[str, str] = dict(hashes)
181
+
182
+ for f in files:
183
+ h = hash_file(f)
184
+ if hashes.get(f) != h:
185
+ changed.append(f)
186
+ new_hashes[f] = h
187
+
188
+ # Remove entries for files that no longer exist
189
+ existing = set(files)
190
+ new_hashes = {k: v for k, v in new_hashes.items() if k in existing}
191
+
192
+ return changed, {"version": 1, "hashes": new_hashes}
File without changes