codemap-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. analysis/__init__.py +1 -0
  2. analysis/architecture/__init__.py +1 -0
  3. analysis/architecture/architecture_engine.py +155 -0
  4. analysis/architecture/dependency_cycles.py +103 -0
  5. analysis/architecture/risk_radar.py +220 -0
  6. analysis/call_graph/__init__.py +1 -0
  7. analysis/call_graph/call_extractor.py +91 -0
  8. analysis/call_graph/call_graph_builder.py +1 -0
  9. analysis/call_graph/call_resolver.py +56 -0
  10. analysis/call_graph/context_models.py +1 -0
  11. analysis/call_graph/cross_file_resolver.py +122 -0
  12. analysis/call_graph/execution_tracker.py +1 -0
  13. analysis/call_graph/flow_builder.py +1 -0
  14. analysis/call_graph/models.py +1 -0
  15. analysis/core/__init__.py +1 -0
  16. analysis/core/ast_context.py +1 -0
  17. analysis/core/ast_parser.py +8 -0
  18. analysis/core/class_extractor.py +35 -0
  19. analysis/core/function_extractor.py +16 -0
  20. analysis/core/import_extractor.py +43 -0
  21. analysis/explain/__init__.py +1 -0
  22. analysis/explain/docstring_extractor.py +45 -0
  23. analysis/explain/explain_runner.py +177 -0
  24. analysis/explain/repo_summary_generator.py +138 -0
  25. analysis/explain/return_analyzer.py +114 -0
  26. analysis/explain/risk_flags.py +1 -0
  27. analysis/explain/signature_extractor.py +104 -0
  28. analysis/explain/summary_generator.py +282 -0
  29. analysis/graph/__init__.py +1 -0
  30. analysis/graph/callgraph_index.py +117 -0
  31. analysis/graph/entrypoint_detector.py +1 -0
  32. analysis/graph/impact_analyzer.py +210 -0
  33. analysis/indexing/__init__.py +1 -0
  34. analysis/indexing/import_resolver.py +156 -0
  35. analysis/indexing/symbol_index.py +150 -0
  36. analysis/runners/__init__.py +1 -0
  37. analysis/runners/phase4_runner.py +137 -0
  38. analysis/utils/__init__.py +1 -0
  39. analysis/utils/ast_helpers.py +1 -0
  40. analysis/utils/cache_manager.py +659 -0
  41. analysis/utils/path_resolver.py +1 -0
  42. analysis/utils/repo_fetcher.py +469 -0
  43. cli.py +1728 -0
  44. codemap_cli.py +11 -0
  45. codemap_python-0.1.0.dist-info/METADATA +399 -0
  46. codemap_python-0.1.0.dist-info/RECORD +58 -0
  47. codemap_python-0.1.0.dist-info/WHEEL +5 -0
  48. codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
  49. codemap_python-0.1.0.dist-info/top_level.txt +5 -0
  50. security_utils.py +51 -0
  51. ui/__init__.py +1 -0
  52. ui/app.py +2160 -0
  53. ui/device_id.py +27 -0
  54. ui/static/app.js +2703 -0
  55. ui/static/styles.css +1268 -0
  56. ui/templates/index.html +231 -0
  57. ui/utils/__init__.py +1 -0
  58. ui/utils/registry_manager.py +190 -0
@@ -0,0 +1,659 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import os
6
+ import shutil
7
+ import tempfile
8
+ from datetime import datetime, timezone
9
+ from threading import RLock
10
+ from typing import Any, Dict, List, Optional, Tuple
11
+
12
+ from security_utils import redact_secrets
13
+
14
+ _LOCK = RLock()
15
+ _SENSITIVE_KEYS = ("api_key", "token", "authorization", "bearer", "basic", "secret", "password")
16
+ _SKIP_DIRS = {".git", "__pycache__", ".codemap_cache", ".venv", "venv", "node_modules"}
17
+
18
+
19
+ def _project_root() -> str:
20
+ return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
21
+
22
+
23
+ def cache_root(base_dir: Optional[str] = None) -> str:
24
+ root = os.path.abspath(base_dir or os.path.join(_project_root(), ".codemap_cache"))
25
+ os.makedirs(root, exist_ok=True)
26
+ return root
27
+
28
+
29
+ def _now_iso() -> str:
30
+ return datetime.now(timezone.utc).isoformat()
31
+
32
+
33
+ def _parse_iso(value: Optional[str]) -> Optional[datetime]:
34
+ if not value:
35
+ return None
36
+ try:
37
+ return datetime.fromisoformat(str(value).replace("Z", "+00:00"))
38
+ except Exception:
39
+ return None
40
+
41
+
42
+ def _safe_int(value: Any, default: int) -> int:
43
+ try:
44
+ return int(value)
45
+ except Exception:
46
+ return int(default)
47
+
48
+
49
+ def _sha256_text(text: str) -> str:
50
+ return hashlib.sha256(text.encode("utf-8", errors="ignore")).hexdigest()
51
+
52
+
53
+ def _normalize_target(value: str) -> str:
54
+ raw = str(value or "").strip()
55
+ if not raw:
56
+ return ""
57
+ if os.path.exists(raw):
58
+ raw = os.path.abspath(raw)
59
+ raw = os.path.normpath(raw)
60
+ return raw.replace("\\", "/").lower()
61
+
62
+
63
+ def compute_repo_hash(repo_target: str) -> str:
64
+ normalized = _normalize_target(repo_target)
65
+ if not normalized:
66
+ normalized = "<empty>"
67
+ return _sha256_text(normalized)[:16]
68
+
69
+
70
+ def _is_probable_repo_hash(value: str) -> bool:
71
+ v = str(value or "").strip().lower()
72
+ if len(v) < 8 or len(v) > 64:
73
+ return False
74
+ return all(ch in "0123456789abcdef" for ch in v)
75
+
76
+
77
+ def get_cache_dir(repo_target: str, base_dir: Optional[str] = None) -> str:
78
+ root = cache_root(base_dir)
79
+ target = str(repo_target or "").strip()
80
+ if _is_probable_repo_hash(target) and os.path.isdir(os.path.join(root, target)):
81
+ repo_hash = target
82
+ else:
83
+ repo_hash = compute_repo_hash(target)
84
+ return os.path.join(root, repo_hash)
85
+
86
+
87
+ def _metadata_path(repo_hash: str, base_dir: Optional[str] = None) -> str:
88
+ return os.path.join(cache_root(base_dir), repo_hash, "metadata.json")
89
+
90
+
91
+ def _manifest_path(repo_dir: str, base_dir: Optional[str] = None) -> str:
92
+ return os.path.join(get_cache_dir(repo_dir, base_dir=base_dir), "manifest.json")
93
+
94
+
95
+ def _policy_path(base_dir: Optional[str] = None) -> str:
96
+ return os.path.join(cache_root(base_dir), "retention.json")
97
+
98
+
99
+ def _workspaces_path(base_dir: Optional[str] = None) -> str:
100
+ return os.path.join(cache_root(base_dir), "workspaces.json")
101
+
102
+
103
+ def _dir_size(path: str) -> int:
104
+ total = 0
105
+ if not os.path.isdir(path):
106
+ return 0
107
+ for root, _dirs, files in os.walk(path):
108
+ for name in files:
109
+ fp = os.path.join(root, name)
110
+ try:
111
+ total += int(os.path.getsize(fp))
112
+ except OSError:
113
+ continue
114
+ return int(total)
115
+
116
+
117
+ def _atomic_json_write(path: str, payload: Dict[str, Any]) -> None:
118
+ os.makedirs(os.path.dirname(path), exist_ok=True)
119
+ fd, tmp = tempfile.mkstemp(prefix=".tmp_", suffix=".json", dir=os.path.dirname(path))
120
+ try:
121
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
122
+ json.dump(payload, f, indent=2)
123
+ os.replace(tmp, path)
124
+ finally:
125
+ if os.path.exists(tmp):
126
+ try:
127
+ os.remove(tmp)
128
+ except OSError:
129
+ pass
130
+
131
+
132
+ def _load_json(path: str, default: Any) -> Any:
133
+ if not os.path.exists(path):
134
+ return default
135
+ try:
136
+ with open(path, "r", encoding="utf-8") as f:
137
+ data = json.load(f)
138
+ return data
139
+ except Exception:
140
+ return default
141
+
142
+
143
+ def _scrub_payload(payload: Any) -> Any:
144
+ if isinstance(payload, dict):
145
+ out: Dict[str, Any] = {}
146
+ for k, v in payload.items():
147
+ key = str(k or "")
148
+ lk = key.lower()
149
+ if any(s in lk for s in _SENSITIVE_KEYS):
150
+ continue
151
+ out[key] = _scrub_payload(v)
152
+ return out
153
+ if isinstance(payload, list):
154
+ return [_scrub_payload(v) for v in payload]
155
+ if isinstance(payload, str):
156
+ return redact_secrets(payload)
157
+ return payload
158
+
159
+
160
+ def load_policy(base_dir: Optional[str] = None) -> Dict[str, Any]:
161
+ default = {
162
+ "default_ttl_days": 14,
163
+ "workspaces_ttl_days": 7,
164
+ "never_delete_repo_hashes": [],
165
+ "repo_policies": {},
166
+ "last_cleanup_iso": "",
167
+ }
168
+ raw = _load_json(_policy_path(base_dir), default)
169
+ if not isinstance(raw, dict):
170
+ return dict(default)
171
+ merged = dict(default)
172
+ merged.update(raw)
173
+ merged["default_ttl_days"] = max(0, _safe_int(merged.get("default_ttl_days"), 14))
174
+ merged["workspaces_ttl_days"] = max(0, _safe_int(merged.get("workspaces_ttl_days"), 7))
175
+ if not isinstance(merged.get("never_delete_repo_hashes"), list):
176
+ merged["never_delete_repo_hashes"] = []
177
+ if not isinstance(merged.get("repo_policies"), dict):
178
+ merged["repo_policies"] = {}
179
+ return _scrub_payload(merged)
180
+
181
+
182
+ def save_policy(policy: Dict[str, Any], base_dir: Optional[str] = None) -> Dict[str, Any]:
183
+ with _LOCK:
184
+ current = load_policy(base_dir)
185
+ merged = dict(current)
186
+ if isinstance(policy, dict):
187
+ merged.update(policy)
188
+ merged["default_ttl_days"] = max(0, _safe_int(merged.get("default_ttl_days"), 14))
189
+ merged["workspaces_ttl_days"] = max(0, _safe_int(merged.get("workspaces_ttl_days"), 7))
190
+ merged["last_cleanup_iso"] = str(merged.get("last_cleanup_iso", "") or "")
191
+ merged = _scrub_payload(merged)
192
+ _atomic_json_write(_policy_path(base_dir), merged)
193
+ return merged
194
+
195
+
196
+ def collect_fingerprints(repo_dir: str) -> Dict[str, Dict[str, int]]:
197
+ repo_root = os.path.abspath(repo_dir)
198
+ out: Dict[str, Dict[str, int]] = {}
199
+ if not os.path.isdir(repo_root):
200
+ return out
201
+ for root, dirs, files in os.walk(repo_root):
202
+ dirs[:] = [d for d in dirs if d not in _SKIP_DIRS]
203
+ for name in files:
204
+ if not name.endswith(".py"):
205
+ continue
206
+ fp = os.path.join(root, name)
207
+ try:
208
+ st = os.stat(fp)
209
+ except OSError:
210
+ continue
211
+ rel = os.path.relpath(fp, repo_root).replace("\\", "/")
212
+ out[rel] = {"mtime_ns": int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1e9))), "size": int(st.st_size)}
213
+ return out
214
+
215
+
216
+ def diff_fingerprints(previous: Dict[str, Any], current: Dict[str, Any]) -> Dict[str, Any]:
217
+ prev = previous if isinstance(previous, dict) else {}
218
+ cur = current if isinstance(current, dict) else {}
219
+ changed: List[str] = []
220
+ keys = set(prev.keys()) | set(cur.keys())
221
+ for key in sorted(keys):
222
+ if prev.get(key) != cur.get(key):
223
+ changed.append(key)
224
+ return {"changed_files": changed, "changed_count": len(changed)}
225
+
226
+
227
+ def build_manifest(repo_dir: str, fingerprints: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
228
+ extra = metadata if isinstance(metadata, dict) else {}
229
+ analysis_version = str(extra.get("analysis_version", "2.2") or "2.2")
230
+ now = _now_iso()
231
+ payload: Dict[str, Any] = {
232
+ "repo_hash": compute_repo_hash(repo_dir),
233
+ "repo_dir": os.path.abspath(repo_dir),
234
+ "analysis_version": analysis_version,
235
+ "updated_at": now,
236
+ "fingerprints": fingerprints if isinstance(fingerprints, dict) else {},
237
+ }
238
+ payload.update(_scrub_payload(extra))
239
+ return payload
240
+
241
+
242
+ def load_manifest(repo_dir: str, base_dir: Optional[str] = None) -> Dict[str, Any]:
243
+ data = _load_json(_manifest_path(repo_dir, base_dir), {})
244
+ return data if isinstance(data, dict) else {}
245
+
246
+
247
+ def save_manifest(repo_dir: str, manifest: Dict[str, Any], base_dir: Optional[str] = None) -> None:
248
+ with _LOCK:
249
+ payload = manifest if isinstance(manifest, dict) else {}
250
+ _atomic_json_write(_manifest_path(repo_dir, base_dir), _scrub_payload(payload))
251
+
252
+
253
+ def should_rebuild(repo_dir: str, analysis_version: str = "2.2", base_dir: Optional[str] = None) -> bool:
254
+ manifest = load_manifest(repo_dir, base_dir=base_dir)
255
+ if not manifest:
256
+ return True
257
+ if str(manifest.get("analysis_version", "") or "") != str(analysis_version or ""):
258
+ return True
259
+ previous = manifest.get("fingerprints", {}) if isinstance(manifest.get("fingerprints"), dict) else {}
260
+ current = collect_fingerprints(repo_dir)
261
+ delta = diff_fingerprints(previous, current)
262
+ return bool(delta.get("changed_count", 0))
263
+
264
+
265
+ def _default_metadata(repo_hash: str) -> Dict[str, Any]:
266
+ now = _now_iso()
267
+ return {
268
+ "repo_hash": repo_hash,
269
+ "source": "filesystem",
270
+ "repo_path": "",
271
+ "repo_url": "",
272
+ "ref": "",
273
+ "workspace_dir": "",
274
+ "analysis_version": "2.2",
275
+ "created_at": now,
276
+ "last_accessed_at": now,
277
+ "retention_days": 14,
278
+ "private_mode": False,
279
+ "ai_fingerprint_source": "",
280
+ }
281
+
282
+
283
+ def _load_metadata(repo_hash: str, base_dir: Optional[str] = None) -> Dict[str, Any]:
284
+ path = _metadata_path(repo_hash, base_dir=base_dir)
285
+ raw = _load_json(path, {})
286
+ base = _default_metadata(repo_hash)
287
+ if isinstance(raw, dict):
288
+ base.update(raw)
289
+ base["repo_hash"] = repo_hash
290
+ base["retention_days"] = max(0, _safe_int(base.get("retention_days"), 14))
291
+ base["private_mode"] = bool(base.get("private_mode", False))
292
+ return _scrub_payload(base)
293
+
294
+
295
+ def _save_metadata(repo_hash: str, payload: Dict[str, Any], base_dir: Optional[str] = None) -> Dict[str, Any]:
296
+ meta = _default_metadata(repo_hash)
297
+ if isinstance(payload, dict):
298
+ meta.update(payload)
299
+ meta["repo_hash"] = repo_hash
300
+ meta["retention_days"] = max(0, _safe_int(meta.get("retention_days"), 14))
301
+ meta["private_mode"] = bool(meta.get("private_mode", False))
302
+ meta = _scrub_payload(meta)
303
+ _atomic_json_write(_metadata_path(repo_hash, base_dir=base_dir), meta)
304
+ return meta
305
+
306
+
307
+ def upsert_metadata(repo_hash: str, **fields: Any) -> Dict[str, Any]:
308
+ with _LOCK:
309
+ current = _load_metadata(repo_hash)
310
+ current.update(_scrub_payload(fields))
311
+ if not str(current.get("created_at", "") or ""):
312
+ current["created_at"] = _now_iso()
313
+ if not str(current.get("last_accessed_at", "") or ""):
314
+ current["last_accessed_at"] = _now_iso()
315
+ return _save_metadata(repo_hash, current)
316
+
317
+
318
+ def set_retention(repo_hash: str, days: int) -> Dict[str, Any]:
319
+ with _LOCK:
320
+ current = _load_metadata(repo_hash)
321
+ current["retention_days"] = max(0, int(days))
322
+ current["last_accessed_at"] = _now_iso()
323
+ return _save_metadata(repo_hash, current)
324
+
325
+
326
+ def touch_last_accessed(repo_hash: str) -> Dict[str, Any]:
327
+ with _LOCK:
328
+ current = _load_metadata(repo_hash)
329
+ current["last_accessed_at"] = _now_iso()
330
+ return _save_metadata(repo_hash, current)
331
+
332
+
333
+ def compute_analysis_fingerprint(repo_dir: str) -> str:
334
+ cache_dir = get_cache_dir(repo_dir)
335
+ manifest = _load_json(os.path.join(cache_dir, "manifest.json"), {})
336
+ analysis_version = str((manifest or {}).get("analysis_version", "") or "")
337
+ parts: List[str] = [analysis_version]
338
+ for name in ("resolved_calls.json", "project_tree.json", "risk_radar.json", "analysis_metrics.json"):
339
+ path = os.path.join(cache_dir, name)
340
+ if os.path.exists(path):
341
+ try:
342
+ st = os.stat(path)
343
+ parts.append(f"{name}:{int(st.st_size)}:{int(getattr(st, 'st_mtime_ns', int(st.st_mtime*1e9)))}")
344
+ except OSError:
345
+ parts.append(f"{name}:missing")
346
+ else:
347
+ parts.append(f"{name}:missing")
348
+ return _sha256_text("|".join(parts))
349
+
350
+
351
+ def _artifact_flags(cache_dir: str) -> Dict[str, bool]:
352
+ return {
353
+ "resolved_calls": os.path.exists(os.path.join(cache_dir, "resolved_calls.json")),
354
+ "explain": os.path.exists(os.path.join(cache_dir, "explain.json")),
355
+ "project_tree": os.path.exists(os.path.join(cache_dir, "project_tree.json")),
356
+ "risk_radar": os.path.exists(os.path.join(cache_dir, "risk_radar.json")),
357
+ "dependency_cycles": os.path.exists(os.path.join(cache_dir, "dependency_cycles.json")),
358
+ }
359
+
360
+
361
+ def _compute_expiry(meta: Dict[str, Any], policy: Dict[str, Any], now: Optional[datetime] = None) -> Dict[str, Any]:
362
+ current = now or datetime.now(timezone.utc)
363
+ repo_hash = str(meta.get("repo_hash", "") or "")
364
+ never = set(str(x) for x in (policy.get("never_delete_repo_hashes") or []))
365
+ if repo_hash in never:
366
+ return {"mode": "pinned", "days_left": None, "expired": False}
367
+
368
+ ttl_days = _safe_int(meta.get("retention_days"), -1)
369
+ if ttl_days < 0:
370
+ ttl_days = _safe_int(policy.get("default_ttl_days"), 14)
371
+ if ttl_days == 0:
372
+ return {"mode": "pinned", "days_left": None, "expired": False}
373
+
374
+ last = _parse_iso(str(meta.get("last_accessed_at", "") or "")) or _parse_iso(str(meta.get("created_at", "") or ""))
375
+ if last is None:
376
+ return {"mode": "ttl", "days_left": ttl_days, "expired": False}
377
+
378
+ age_days = (current - last).total_seconds() / 86400.0
379
+ days_left = int(ttl_days - age_days)
380
+ expired = age_days >= float(ttl_days)
381
+ return {"mode": "ttl", "days_left": days_left, "expired": bool(expired)}
382
+
383
+
384
+ def _list_repo_hash_dirs(base_dir: Optional[str] = None) -> List[str]:
385
+ root = cache_root(base_dir)
386
+ out: List[str] = []
387
+ for name in sorted(os.listdir(root)):
388
+ if name in {"workspaces", "_local"}:
389
+ continue
390
+ path = os.path.join(root, name)
391
+ if not os.path.isdir(path):
392
+ continue
393
+ if not _is_probable_repo_hash(name):
394
+ continue
395
+ out.append(name)
396
+ return out
397
+
398
+
399
+ def list_caches(base_dir: Optional[str] = None) -> List[Dict[str, Any]]:
400
+ root = cache_root(base_dir)
401
+ policy = load_policy(base_dir)
402
+ now = datetime.now(timezone.utc)
403
+ rows: List[Dict[str, Any]] = []
404
+
405
+ for repo_hash in _list_repo_hash_dirs(base_dir):
406
+ cache_dir = os.path.join(root, repo_hash)
407
+ meta = _load_metadata(repo_hash, base_dir=base_dir)
408
+ manifest = _load_json(os.path.join(cache_dir, "manifest.json"), {})
409
+ if isinstance(manifest, dict) and manifest.get("analysis_version") and not meta.get("analysis_version"):
410
+ meta["analysis_version"] = manifest.get("analysis_version")
411
+
412
+ expires = _compute_expiry(meta, policy, now=now)
413
+ rows.append(
414
+ {
415
+ "repo_hash": repo_hash,
416
+ "cache_dir": cache_dir,
417
+ "source": str(meta.get("source", "filesystem") or "filesystem"),
418
+ "repo_url": str(meta.get("repo_url", "") or ""),
419
+ "repo_path": str(meta.get("repo_path", "") or ""),
420
+ "ref": str(meta.get("ref", "") or ""),
421
+ "workspace_dir": str(meta.get("workspace_dir", "") or ""),
422
+ "analysis_version": str(meta.get("analysis_version", "") or manifest.get("analysis_version", "") or ""),
423
+ "created_at": str(meta.get("created_at", "") or ""),
424
+ "last_accessed_at": str(meta.get("last_accessed_at", "") or ""),
425
+ "retention_days": int(meta.get("retention_days", policy.get("default_ttl_days", 14)) or 14),
426
+ "private_mode": bool(meta.get("private_mode", False)),
427
+ "size_bytes": _dir_size(cache_dir),
428
+ "has": _artifact_flags(cache_dir),
429
+ "expires": expires,
430
+ }
431
+ )
432
+
433
+ return rows
434
+
435
+
436
+ def _load_workspaces(base_dir: Optional[str] = None) -> Dict[str, Any]:
437
+ path = _workspaces_path(base_dir)
438
+ raw = _load_json(path, {})
439
+ if not isinstance(raw, dict):
440
+ return {"active_repo_hash": "", "repos": []}
441
+ repos = raw.get("repos") if isinstance(raw.get("repos"), list) else []
442
+ return {"active_repo_hash": str(raw.get("active_repo_hash", "") or ""), "repos": repos}
443
+
444
+
445
+ def _save_workspaces(data: Dict[str, Any], base_dir: Optional[str] = None) -> None:
446
+ payload = data if isinstance(data, dict) else {}
447
+ repos = payload.get("repos") if isinstance(payload.get("repos"), list) else []
448
+ payload = {
449
+ "active_repo_hash": str(payload.get("active_repo_hash", "") or ""),
450
+ "repos": _scrub_payload(repos),
451
+ }
452
+ if payload["active_repo_hash"] and not any(str((r or {}).get("repo_hash", "")) == payload["active_repo_hash"] for r in repos):
453
+ payload["active_repo_hash"] = ""
454
+ _atomic_json_write(_workspaces_path(base_dir), payload)
455
+
456
+
457
+ def _workspace_refcounts(base_dir: Optional[str] = None) -> Dict[str, int]:
458
+ refs: Dict[str, int] = {}
459
+ root = cache_root(base_dir)
460
+ ws_root = os.path.realpath(os.path.join(root, "workspaces"))
461
+
462
+ for item in list_caches(base_dir):
463
+ ws = str(item.get("workspace_dir", "") or "").strip()
464
+ if not ws:
465
+ continue
466
+ ws_real = os.path.realpath(ws)
467
+ try:
468
+ if os.path.commonpath([ws_root, ws_real]) != ws_root:
469
+ continue
470
+ except ValueError:
471
+ continue
472
+ refs[ws_real] = refs.get(ws_real, 0) + 1
473
+
474
+ ws = _load_workspaces(base_dir)
475
+ for repo in ws.get("repos", []):
476
+ path = str((repo or {}).get("path", "") or "").strip()
477
+ if not path:
478
+ continue
479
+ real = os.path.realpath(path)
480
+ try:
481
+ if os.path.commonpath([ws_root, real]) != ws_root:
482
+ continue
483
+ except ValueError:
484
+ continue
485
+ workspace_dir = os.path.dirname(real)
486
+ refs[workspace_dir] = refs.get(workspace_dir, 0) + 1
487
+
488
+ return refs
489
+
490
+
491
+ def _on_rm_error(func, path, exc_info):
492
+ try:
493
+ os.chmod(path, 0o700)
494
+ except OSError:
495
+ pass
496
+ try:
497
+ func(path)
498
+ except Exception:
499
+ pass
500
+
501
+
502
+ def _safe_rmtree(path: str, allowed_root: str) -> Tuple[bool, Optional[str]]:
503
+ if not path:
504
+ return False, None
505
+ if not os.path.exists(path):
506
+ return False, None
507
+ root_real = os.path.realpath(allowed_root)
508
+ target_real = os.path.realpath(path)
509
+ try:
510
+ if os.path.commonpath([root_real, target_real]) != root_real:
511
+ return False, "TARGET_OUTSIDE_ALLOWED_ROOT"
512
+ except ValueError:
513
+ return False, "TARGET_OUTSIDE_ALLOWED_ROOT"
514
+ try:
515
+ shutil.rmtree(target_real, onerror=_on_rm_error)
516
+ if os.path.exists(target_real):
517
+ return False, "DELETE_INCOMPLETE"
518
+ return True, None
519
+ except Exception as e:
520
+ return False, str(e)
521
+
522
+
523
+ def clear_cache(repo_hash: str, dry_run: bool = False, base_dir: Optional[str] = None) -> Dict[str, Any]:
524
+ repo_hash = str(repo_hash or "").strip()
525
+ root = cache_root(base_dir)
526
+ cache_dir = os.path.join(root, repo_hash)
527
+ meta = _load_metadata(repo_hash, base_dir=base_dir)
528
+ workspace_dir = str(meta.get("workspace_dir", "") or "")
529
+
530
+ would_delete: List[str] = []
531
+ workspace_preserved: List[str] = []
532
+ errors: List[str] = []
533
+
534
+ if os.path.isdir(cache_dir):
535
+ would_delete.append(os.path.abspath(cache_dir))
536
+
537
+ ws_refs = _workspace_refcounts(base_dir)
538
+ if workspace_dir and os.path.isdir(workspace_dir):
539
+ ws_real = os.path.realpath(workspace_dir)
540
+ if ws_refs.get(ws_real, 0) <= 1:
541
+ would_delete.append(os.path.abspath(workspace_dir))
542
+ else:
543
+ workspace_preserved.append(os.path.abspath(workspace_dir))
544
+
545
+ freed = sum(_dir_size(p) for p in would_delete if os.path.exists(p))
546
+
547
+ if dry_run:
548
+ return {
549
+ "ok": True,
550
+ "repo_hash": repo_hash,
551
+ "dry_run": True,
552
+ "deleted": False,
553
+ "cache_dir": cache_dir,
554
+ "workspace_dir": workspace_dir or None,
555
+ "would_delete": would_delete,
556
+ "workspace_preserved": workspace_preserved,
557
+ "freed_bytes_estimate": int(freed),
558
+ "errors": errors,
559
+ "message": "Dry run only",
560
+ }
561
+
562
+ deleted_any = False
563
+ if os.path.isdir(cache_dir):
564
+ ok, err = _safe_rmtree(cache_dir, root)
565
+ if ok:
566
+ deleted_any = True
567
+ elif err:
568
+ errors.append(f"cache_dir:{err}")
569
+
570
+ if workspace_dir and os.path.isdir(workspace_dir) and os.path.abspath(workspace_dir) in would_delete:
571
+ ws_root = os.path.join(root, "workspaces")
572
+ ok, err = _safe_rmtree(workspace_dir, ws_root)
573
+ if ok:
574
+ deleted_any = True
575
+ elif err:
576
+ errors.append(f"workspace_dir:{err}")
577
+
578
+ ws = _load_workspaces(base_dir)
579
+ repos = ws.get("repos", []) if isinstance(ws.get("repos"), list) else []
580
+ repos = [r for r in repos if str((r or {}).get("repo_hash", "") or "") != repo_hash]
581
+ ws["repos"] = repos
582
+ active = str(ws.get("active_repo_hash", "") or "")
583
+ if active == repo_hash:
584
+ ws["active_repo_hash"] = ""
585
+ _save_workspaces(ws, base_dir)
586
+
587
+ return {
588
+ "ok": True,
589
+ "repo_hash": repo_hash,
590
+ "dry_run": False,
591
+ "deleted": bool(deleted_any and not errors),
592
+ "cache_dir": cache_dir,
593
+ "workspace_dir": workspace_dir or None,
594
+ "would_delete": would_delete,
595
+ "workspace_preserved": workspace_preserved,
596
+ "freed_bytes_estimate": int(freed),
597
+ "errors": errors,
598
+ "message": "Deleted" if deleted_any and not errors else "Nothing deleted" if not would_delete else "Completed with warnings",
599
+ }
600
+
601
+
602
+ def delete_repo(repo_hash: str, dry_run: bool = False, base_dir: Optional[str] = None) -> Dict[str, Any]:
603
+ return clear_cache(repo_hash=repo_hash, dry_run=dry_run, base_dir=base_dir)
604
+
605
+
606
+ def sweep_expired(
607
+ dry_run: bool = False,
608
+ base_dir: Optional[str] = None,
609
+ now: Optional[datetime] = None,
610
+ ) -> Dict[str, Any]:
611
+ current = now or datetime.now(timezone.utc)
612
+ removed_hashes: List[str] = []
613
+ workspaces_removed: List[str] = []
614
+ would_delete: List[str] = []
615
+ errors: List[str] = []
616
+ freed = 0
617
+
618
+ for item in list_caches(base_dir):
619
+ exp = item.get("expires", {}) if isinstance(item.get("expires"), dict) else {}
620
+ if not bool(exp.get("expired", False)):
621
+ continue
622
+ repo_hash = str(item.get("repo_hash", "") or "")
623
+ if not repo_hash:
624
+ continue
625
+ result = clear_cache(repo_hash=repo_hash, dry_run=dry_run, base_dir=base_dir)
626
+ paths = [str(p) for p in result.get("would_delete", []) if str(p)]
627
+ would_delete.extend(paths)
628
+ freed += int(result.get("freed_bytes_estimate", 0) or 0)
629
+ if not result.get("errors"):
630
+ removed_hashes.append(repo_hash)
631
+ else:
632
+ errors.extend(result.get("errors", []))
633
+ for p in paths:
634
+ if os.path.basename(os.path.dirname(p)) == "workspaces":
635
+ workspaces_removed.append(p)
636
+
637
+ policy = load_policy(base_dir)
638
+ policy["last_cleanup_iso"] = current.isoformat()
639
+ save_policy(policy, base_dir=base_dir)
640
+
641
+ return {
642
+ "ok": True,
643
+ "dry_run": bool(dry_run),
644
+ "deleted": bool((not dry_run) and (not errors)),
645
+ "caches_removed": removed_hashes,
646
+ "workspaces_removed": workspaces_removed,
647
+ "would_delete": would_delete,
648
+ "freed_bytes_estimate": int(freed),
649
+ "errors": errors,
650
+ }
651
+
652
+
653
+ def apply_retention(base_dir: Optional[str] = None, now: Optional[datetime] = None, dry_run: bool = False) -> Dict[str, Any]:
654
+ return sweep_expired(dry_run=dry_run, base_dir=base_dir, now=now)
655
+
656
+
657
+ def cleanup(dry_run: bool = False, base_dir: Optional[str] = None) -> Dict[str, Any]:
658
+ return sweep_expired(dry_run=dry_run, base_dir=base_dir)
659
+
@@ -0,0 +1 @@
1
+ # Path resolver utilities