codemap-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- analysis/__init__.py +1 -0
- analysis/architecture/__init__.py +1 -0
- analysis/architecture/architecture_engine.py +155 -0
- analysis/architecture/dependency_cycles.py +103 -0
- analysis/architecture/risk_radar.py +220 -0
- analysis/call_graph/__init__.py +1 -0
- analysis/call_graph/call_extractor.py +91 -0
- analysis/call_graph/call_graph_builder.py +1 -0
- analysis/call_graph/call_resolver.py +56 -0
- analysis/call_graph/context_models.py +1 -0
- analysis/call_graph/cross_file_resolver.py +122 -0
- analysis/call_graph/execution_tracker.py +1 -0
- analysis/call_graph/flow_builder.py +1 -0
- analysis/call_graph/models.py +1 -0
- analysis/core/__init__.py +1 -0
- analysis/core/ast_context.py +1 -0
- analysis/core/ast_parser.py +8 -0
- analysis/core/class_extractor.py +35 -0
- analysis/core/function_extractor.py +16 -0
- analysis/core/import_extractor.py +43 -0
- analysis/explain/__init__.py +1 -0
- analysis/explain/docstring_extractor.py +45 -0
- analysis/explain/explain_runner.py +177 -0
- analysis/explain/repo_summary_generator.py +138 -0
- analysis/explain/return_analyzer.py +114 -0
- analysis/explain/risk_flags.py +1 -0
- analysis/explain/signature_extractor.py +104 -0
- analysis/explain/summary_generator.py +282 -0
- analysis/graph/__init__.py +1 -0
- analysis/graph/callgraph_index.py +117 -0
- analysis/graph/entrypoint_detector.py +1 -0
- analysis/graph/impact_analyzer.py +210 -0
- analysis/indexing/__init__.py +1 -0
- analysis/indexing/import_resolver.py +156 -0
- analysis/indexing/symbol_index.py +150 -0
- analysis/runners/__init__.py +1 -0
- analysis/runners/phase4_runner.py +137 -0
- analysis/utils/__init__.py +1 -0
- analysis/utils/ast_helpers.py +1 -0
- analysis/utils/cache_manager.py +659 -0
- analysis/utils/path_resolver.py +1 -0
- analysis/utils/repo_fetcher.py +469 -0
- cli.py +1728 -0
- codemap_cli.py +11 -0
- codemap_python-0.1.0.dist-info/METADATA +399 -0
- codemap_python-0.1.0.dist-info/RECORD +58 -0
- codemap_python-0.1.0.dist-info/WHEEL +5 -0
- codemap_python-0.1.0.dist-info/entry_points.txt +2 -0
- codemap_python-0.1.0.dist-info/top_level.txt +5 -0
- security_utils.py +51 -0
- ui/__init__.py +1 -0
- ui/app.py +2160 -0
- ui/device_id.py +27 -0
- ui/static/app.js +2703 -0
- ui/static/styles.css +1268 -0
- ui/templates/index.html +231 -0
- ui/utils/__init__.py +1 -0
- ui/utils/registry_manager.py +190 -0
|
@@ -0,0 +1,659 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import shutil
|
|
7
|
+
import tempfile
|
|
8
|
+
from datetime import datetime, timezone
|
|
9
|
+
from threading import RLock
|
|
10
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
11
|
+
|
|
12
|
+
from security_utils import redact_secrets
|
|
13
|
+
|
|
14
|
+
_LOCK = RLock()
|
|
15
|
+
_SENSITIVE_KEYS = ("api_key", "token", "authorization", "bearer", "basic", "secret", "password")
|
|
16
|
+
_SKIP_DIRS = {".git", "__pycache__", ".codemap_cache", ".venv", "venv", "node_modules"}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _project_root() -> str:
|
|
20
|
+
return os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def cache_root(base_dir: Optional[str] = None) -> str:
|
|
24
|
+
root = os.path.abspath(base_dir or os.path.join(_project_root(), ".codemap_cache"))
|
|
25
|
+
os.makedirs(root, exist_ok=True)
|
|
26
|
+
return root
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _now_iso() -> str:
|
|
30
|
+
return datetime.now(timezone.utc).isoformat()
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _parse_iso(value: Optional[str]) -> Optional[datetime]:
|
|
34
|
+
if not value:
|
|
35
|
+
return None
|
|
36
|
+
try:
|
|
37
|
+
return datetime.fromisoformat(str(value).replace("Z", "+00:00"))
|
|
38
|
+
except Exception:
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _safe_int(value: Any, default: int) -> int:
|
|
43
|
+
try:
|
|
44
|
+
return int(value)
|
|
45
|
+
except Exception:
|
|
46
|
+
return int(default)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _sha256_text(text: str) -> str:
|
|
50
|
+
return hashlib.sha256(text.encode("utf-8", errors="ignore")).hexdigest()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _normalize_target(value: str) -> str:
|
|
54
|
+
raw = str(value or "").strip()
|
|
55
|
+
if not raw:
|
|
56
|
+
return ""
|
|
57
|
+
if os.path.exists(raw):
|
|
58
|
+
raw = os.path.abspath(raw)
|
|
59
|
+
raw = os.path.normpath(raw)
|
|
60
|
+
return raw.replace("\\", "/").lower()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def compute_repo_hash(repo_target: str) -> str:
|
|
64
|
+
normalized = _normalize_target(repo_target)
|
|
65
|
+
if not normalized:
|
|
66
|
+
normalized = "<empty>"
|
|
67
|
+
return _sha256_text(normalized)[:16]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _is_probable_repo_hash(value: str) -> bool:
|
|
71
|
+
v = str(value or "").strip().lower()
|
|
72
|
+
if len(v) < 8 or len(v) > 64:
|
|
73
|
+
return False
|
|
74
|
+
return all(ch in "0123456789abcdef" for ch in v)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_cache_dir(repo_target: str, base_dir: Optional[str] = None) -> str:
|
|
78
|
+
root = cache_root(base_dir)
|
|
79
|
+
target = str(repo_target or "").strip()
|
|
80
|
+
if _is_probable_repo_hash(target) and os.path.isdir(os.path.join(root, target)):
|
|
81
|
+
repo_hash = target
|
|
82
|
+
else:
|
|
83
|
+
repo_hash = compute_repo_hash(target)
|
|
84
|
+
return os.path.join(root, repo_hash)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _metadata_path(repo_hash: str, base_dir: Optional[str] = None) -> str:
|
|
88
|
+
return os.path.join(cache_root(base_dir), repo_hash, "metadata.json")
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _manifest_path(repo_dir: str, base_dir: Optional[str] = None) -> str:
|
|
92
|
+
return os.path.join(get_cache_dir(repo_dir, base_dir=base_dir), "manifest.json")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _policy_path(base_dir: Optional[str] = None) -> str:
|
|
96
|
+
return os.path.join(cache_root(base_dir), "retention.json")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _workspaces_path(base_dir: Optional[str] = None) -> str:
|
|
100
|
+
return os.path.join(cache_root(base_dir), "workspaces.json")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _dir_size(path: str) -> int:
|
|
104
|
+
total = 0
|
|
105
|
+
if not os.path.isdir(path):
|
|
106
|
+
return 0
|
|
107
|
+
for root, _dirs, files in os.walk(path):
|
|
108
|
+
for name in files:
|
|
109
|
+
fp = os.path.join(root, name)
|
|
110
|
+
try:
|
|
111
|
+
total += int(os.path.getsize(fp))
|
|
112
|
+
except OSError:
|
|
113
|
+
continue
|
|
114
|
+
return int(total)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _atomic_json_write(path: str, payload: Dict[str, Any]) -> None:
|
|
118
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
119
|
+
fd, tmp = tempfile.mkstemp(prefix=".tmp_", suffix=".json", dir=os.path.dirname(path))
|
|
120
|
+
try:
|
|
121
|
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
|
122
|
+
json.dump(payload, f, indent=2)
|
|
123
|
+
os.replace(tmp, path)
|
|
124
|
+
finally:
|
|
125
|
+
if os.path.exists(tmp):
|
|
126
|
+
try:
|
|
127
|
+
os.remove(tmp)
|
|
128
|
+
except OSError:
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _load_json(path: str, default: Any) -> Any:
|
|
133
|
+
if not os.path.exists(path):
|
|
134
|
+
return default
|
|
135
|
+
try:
|
|
136
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
137
|
+
data = json.load(f)
|
|
138
|
+
return data
|
|
139
|
+
except Exception:
|
|
140
|
+
return default
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _scrub_payload(payload: Any) -> Any:
|
|
144
|
+
if isinstance(payload, dict):
|
|
145
|
+
out: Dict[str, Any] = {}
|
|
146
|
+
for k, v in payload.items():
|
|
147
|
+
key = str(k or "")
|
|
148
|
+
lk = key.lower()
|
|
149
|
+
if any(s in lk for s in _SENSITIVE_KEYS):
|
|
150
|
+
continue
|
|
151
|
+
out[key] = _scrub_payload(v)
|
|
152
|
+
return out
|
|
153
|
+
if isinstance(payload, list):
|
|
154
|
+
return [_scrub_payload(v) for v in payload]
|
|
155
|
+
if isinstance(payload, str):
|
|
156
|
+
return redact_secrets(payload)
|
|
157
|
+
return payload
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def load_policy(base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
161
|
+
default = {
|
|
162
|
+
"default_ttl_days": 14,
|
|
163
|
+
"workspaces_ttl_days": 7,
|
|
164
|
+
"never_delete_repo_hashes": [],
|
|
165
|
+
"repo_policies": {},
|
|
166
|
+
"last_cleanup_iso": "",
|
|
167
|
+
}
|
|
168
|
+
raw = _load_json(_policy_path(base_dir), default)
|
|
169
|
+
if not isinstance(raw, dict):
|
|
170
|
+
return dict(default)
|
|
171
|
+
merged = dict(default)
|
|
172
|
+
merged.update(raw)
|
|
173
|
+
merged["default_ttl_days"] = max(0, _safe_int(merged.get("default_ttl_days"), 14))
|
|
174
|
+
merged["workspaces_ttl_days"] = max(0, _safe_int(merged.get("workspaces_ttl_days"), 7))
|
|
175
|
+
if not isinstance(merged.get("never_delete_repo_hashes"), list):
|
|
176
|
+
merged["never_delete_repo_hashes"] = []
|
|
177
|
+
if not isinstance(merged.get("repo_policies"), dict):
|
|
178
|
+
merged["repo_policies"] = {}
|
|
179
|
+
return _scrub_payload(merged)
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def save_policy(policy: Dict[str, Any], base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
183
|
+
with _LOCK:
|
|
184
|
+
current = load_policy(base_dir)
|
|
185
|
+
merged = dict(current)
|
|
186
|
+
if isinstance(policy, dict):
|
|
187
|
+
merged.update(policy)
|
|
188
|
+
merged["default_ttl_days"] = max(0, _safe_int(merged.get("default_ttl_days"), 14))
|
|
189
|
+
merged["workspaces_ttl_days"] = max(0, _safe_int(merged.get("workspaces_ttl_days"), 7))
|
|
190
|
+
merged["last_cleanup_iso"] = str(merged.get("last_cleanup_iso", "") or "")
|
|
191
|
+
merged = _scrub_payload(merged)
|
|
192
|
+
_atomic_json_write(_policy_path(base_dir), merged)
|
|
193
|
+
return merged
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def collect_fingerprints(repo_dir: str) -> Dict[str, Dict[str, int]]:
|
|
197
|
+
repo_root = os.path.abspath(repo_dir)
|
|
198
|
+
out: Dict[str, Dict[str, int]] = {}
|
|
199
|
+
if not os.path.isdir(repo_root):
|
|
200
|
+
return out
|
|
201
|
+
for root, dirs, files in os.walk(repo_root):
|
|
202
|
+
dirs[:] = [d for d in dirs if d not in _SKIP_DIRS]
|
|
203
|
+
for name in files:
|
|
204
|
+
if not name.endswith(".py"):
|
|
205
|
+
continue
|
|
206
|
+
fp = os.path.join(root, name)
|
|
207
|
+
try:
|
|
208
|
+
st = os.stat(fp)
|
|
209
|
+
except OSError:
|
|
210
|
+
continue
|
|
211
|
+
rel = os.path.relpath(fp, repo_root).replace("\\", "/")
|
|
212
|
+
out[rel] = {"mtime_ns": int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1e9))), "size": int(st.st_size)}
|
|
213
|
+
return out
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def diff_fingerprints(previous: Dict[str, Any], current: Dict[str, Any]) -> Dict[str, Any]:
|
|
217
|
+
prev = previous if isinstance(previous, dict) else {}
|
|
218
|
+
cur = current if isinstance(current, dict) else {}
|
|
219
|
+
changed: List[str] = []
|
|
220
|
+
keys = set(prev.keys()) | set(cur.keys())
|
|
221
|
+
for key in sorted(keys):
|
|
222
|
+
if prev.get(key) != cur.get(key):
|
|
223
|
+
changed.append(key)
|
|
224
|
+
return {"changed_files": changed, "changed_count": len(changed)}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def build_manifest(repo_dir: str, fingerprints: Dict[str, Any], metadata: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
228
|
+
extra = metadata if isinstance(metadata, dict) else {}
|
|
229
|
+
analysis_version = str(extra.get("analysis_version", "2.2") or "2.2")
|
|
230
|
+
now = _now_iso()
|
|
231
|
+
payload: Dict[str, Any] = {
|
|
232
|
+
"repo_hash": compute_repo_hash(repo_dir),
|
|
233
|
+
"repo_dir": os.path.abspath(repo_dir),
|
|
234
|
+
"analysis_version": analysis_version,
|
|
235
|
+
"updated_at": now,
|
|
236
|
+
"fingerprints": fingerprints if isinstance(fingerprints, dict) else {},
|
|
237
|
+
}
|
|
238
|
+
payload.update(_scrub_payload(extra))
|
|
239
|
+
return payload
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def load_manifest(repo_dir: str, base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
243
|
+
data = _load_json(_manifest_path(repo_dir, base_dir), {})
|
|
244
|
+
return data if isinstance(data, dict) else {}
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def save_manifest(repo_dir: str, manifest: Dict[str, Any], base_dir: Optional[str] = None) -> None:
|
|
248
|
+
with _LOCK:
|
|
249
|
+
payload = manifest if isinstance(manifest, dict) else {}
|
|
250
|
+
_atomic_json_write(_manifest_path(repo_dir, base_dir), _scrub_payload(payload))
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def should_rebuild(repo_dir: str, analysis_version: str = "2.2", base_dir: Optional[str] = None) -> bool:
|
|
254
|
+
manifest = load_manifest(repo_dir, base_dir=base_dir)
|
|
255
|
+
if not manifest:
|
|
256
|
+
return True
|
|
257
|
+
if str(manifest.get("analysis_version", "") or "") != str(analysis_version or ""):
|
|
258
|
+
return True
|
|
259
|
+
previous = manifest.get("fingerprints", {}) if isinstance(manifest.get("fingerprints"), dict) else {}
|
|
260
|
+
current = collect_fingerprints(repo_dir)
|
|
261
|
+
delta = diff_fingerprints(previous, current)
|
|
262
|
+
return bool(delta.get("changed_count", 0))
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _default_metadata(repo_hash: str) -> Dict[str, Any]:
|
|
266
|
+
now = _now_iso()
|
|
267
|
+
return {
|
|
268
|
+
"repo_hash": repo_hash,
|
|
269
|
+
"source": "filesystem",
|
|
270
|
+
"repo_path": "",
|
|
271
|
+
"repo_url": "",
|
|
272
|
+
"ref": "",
|
|
273
|
+
"workspace_dir": "",
|
|
274
|
+
"analysis_version": "2.2",
|
|
275
|
+
"created_at": now,
|
|
276
|
+
"last_accessed_at": now,
|
|
277
|
+
"retention_days": 14,
|
|
278
|
+
"private_mode": False,
|
|
279
|
+
"ai_fingerprint_source": "",
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def _load_metadata(repo_hash: str, base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
284
|
+
path = _metadata_path(repo_hash, base_dir=base_dir)
|
|
285
|
+
raw = _load_json(path, {})
|
|
286
|
+
base = _default_metadata(repo_hash)
|
|
287
|
+
if isinstance(raw, dict):
|
|
288
|
+
base.update(raw)
|
|
289
|
+
base["repo_hash"] = repo_hash
|
|
290
|
+
base["retention_days"] = max(0, _safe_int(base.get("retention_days"), 14))
|
|
291
|
+
base["private_mode"] = bool(base.get("private_mode", False))
|
|
292
|
+
return _scrub_payload(base)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _save_metadata(repo_hash: str, payload: Dict[str, Any], base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
296
|
+
meta = _default_metadata(repo_hash)
|
|
297
|
+
if isinstance(payload, dict):
|
|
298
|
+
meta.update(payload)
|
|
299
|
+
meta["repo_hash"] = repo_hash
|
|
300
|
+
meta["retention_days"] = max(0, _safe_int(meta.get("retention_days"), 14))
|
|
301
|
+
meta["private_mode"] = bool(meta.get("private_mode", False))
|
|
302
|
+
meta = _scrub_payload(meta)
|
|
303
|
+
_atomic_json_write(_metadata_path(repo_hash, base_dir=base_dir), meta)
|
|
304
|
+
return meta
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def upsert_metadata(repo_hash: str, **fields: Any) -> Dict[str, Any]:
|
|
308
|
+
with _LOCK:
|
|
309
|
+
current = _load_metadata(repo_hash)
|
|
310
|
+
current.update(_scrub_payload(fields))
|
|
311
|
+
if not str(current.get("created_at", "") or ""):
|
|
312
|
+
current["created_at"] = _now_iso()
|
|
313
|
+
if not str(current.get("last_accessed_at", "") or ""):
|
|
314
|
+
current["last_accessed_at"] = _now_iso()
|
|
315
|
+
return _save_metadata(repo_hash, current)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def set_retention(repo_hash: str, days: int) -> Dict[str, Any]:
|
|
319
|
+
with _LOCK:
|
|
320
|
+
current = _load_metadata(repo_hash)
|
|
321
|
+
current["retention_days"] = max(0, int(days))
|
|
322
|
+
current["last_accessed_at"] = _now_iso()
|
|
323
|
+
return _save_metadata(repo_hash, current)
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def touch_last_accessed(repo_hash: str) -> Dict[str, Any]:
|
|
327
|
+
with _LOCK:
|
|
328
|
+
current = _load_metadata(repo_hash)
|
|
329
|
+
current["last_accessed_at"] = _now_iso()
|
|
330
|
+
return _save_metadata(repo_hash, current)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def compute_analysis_fingerprint(repo_dir: str) -> str:
|
|
334
|
+
cache_dir = get_cache_dir(repo_dir)
|
|
335
|
+
manifest = _load_json(os.path.join(cache_dir, "manifest.json"), {})
|
|
336
|
+
analysis_version = str((manifest or {}).get("analysis_version", "") or "")
|
|
337
|
+
parts: List[str] = [analysis_version]
|
|
338
|
+
for name in ("resolved_calls.json", "project_tree.json", "risk_radar.json", "analysis_metrics.json"):
|
|
339
|
+
path = os.path.join(cache_dir, name)
|
|
340
|
+
if os.path.exists(path):
|
|
341
|
+
try:
|
|
342
|
+
st = os.stat(path)
|
|
343
|
+
parts.append(f"{name}:{int(st.st_size)}:{int(getattr(st, 'st_mtime_ns', int(st.st_mtime*1e9)))}")
|
|
344
|
+
except OSError:
|
|
345
|
+
parts.append(f"{name}:missing")
|
|
346
|
+
else:
|
|
347
|
+
parts.append(f"{name}:missing")
|
|
348
|
+
return _sha256_text("|".join(parts))
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _artifact_flags(cache_dir: str) -> Dict[str, bool]:
|
|
352
|
+
return {
|
|
353
|
+
"resolved_calls": os.path.exists(os.path.join(cache_dir, "resolved_calls.json")),
|
|
354
|
+
"explain": os.path.exists(os.path.join(cache_dir, "explain.json")),
|
|
355
|
+
"project_tree": os.path.exists(os.path.join(cache_dir, "project_tree.json")),
|
|
356
|
+
"risk_radar": os.path.exists(os.path.join(cache_dir, "risk_radar.json")),
|
|
357
|
+
"dependency_cycles": os.path.exists(os.path.join(cache_dir, "dependency_cycles.json")),
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def _compute_expiry(meta: Dict[str, Any], policy: Dict[str, Any], now: Optional[datetime] = None) -> Dict[str, Any]:
|
|
362
|
+
current = now or datetime.now(timezone.utc)
|
|
363
|
+
repo_hash = str(meta.get("repo_hash", "") or "")
|
|
364
|
+
never = set(str(x) for x in (policy.get("never_delete_repo_hashes") or []))
|
|
365
|
+
if repo_hash in never:
|
|
366
|
+
return {"mode": "pinned", "days_left": None, "expired": False}
|
|
367
|
+
|
|
368
|
+
ttl_days = _safe_int(meta.get("retention_days"), -1)
|
|
369
|
+
if ttl_days < 0:
|
|
370
|
+
ttl_days = _safe_int(policy.get("default_ttl_days"), 14)
|
|
371
|
+
if ttl_days == 0:
|
|
372
|
+
return {"mode": "pinned", "days_left": None, "expired": False}
|
|
373
|
+
|
|
374
|
+
last = _parse_iso(str(meta.get("last_accessed_at", "") or "")) or _parse_iso(str(meta.get("created_at", "") or ""))
|
|
375
|
+
if last is None:
|
|
376
|
+
return {"mode": "ttl", "days_left": ttl_days, "expired": False}
|
|
377
|
+
|
|
378
|
+
age_days = (current - last).total_seconds() / 86400.0
|
|
379
|
+
days_left = int(ttl_days - age_days)
|
|
380
|
+
expired = age_days >= float(ttl_days)
|
|
381
|
+
return {"mode": "ttl", "days_left": days_left, "expired": bool(expired)}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def _list_repo_hash_dirs(base_dir: Optional[str] = None) -> List[str]:
|
|
385
|
+
root = cache_root(base_dir)
|
|
386
|
+
out: List[str] = []
|
|
387
|
+
for name in sorted(os.listdir(root)):
|
|
388
|
+
if name in {"workspaces", "_local"}:
|
|
389
|
+
continue
|
|
390
|
+
path = os.path.join(root, name)
|
|
391
|
+
if not os.path.isdir(path):
|
|
392
|
+
continue
|
|
393
|
+
if not _is_probable_repo_hash(name):
|
|
394
|
+
continue
|
|
395
|
+
out.append(name)
|
|
396
|
+
return out
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def list_caches(base_dir: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
400
|
+
root = cache_root(base_dir)
|
|
401
|
+
policy = load_policy(base_dir)
|
|
402
|
+
now = datetime.now(timezone.utc)
|
|
403
|
+
rows: List[Dict[str, Any]] = []
|
|
404
|
+
|
|
405
|
+
for repo_hash in _list_repo_hash_dirs(base_dir):
|
|
406
|
+
cache_dir = os.path.join(root, repo_hash)
|
|
407
|
+
meta = _load_metadata(repo_hash, base_dir=base_dir)
|
|
408
|
+
manifest = _load_json(os.path.join(cache_dir, "manifest.json"), {})
|
|
409
|
+
if isinstance(manifest, dict) and manifest.get("analysis_version") and not meta.get("analysis_version"):
|
|
410
|
+
meta["analysis_version"] = manifest.get("analysis_version")
|
|
411
|
+
|
|
412
|
+
expires = _compute_expiry(meta, policy, now=now)
|
|
413
|
+
rows.append(
|
|
414
|
+
{
|
|
415
|
+
"repo_hash": repo_hash,
|
|
416
|
+
"cache_dir": cache_dir,
|
|
417
|
+
"source": str(meta.get("source", "filesystem") or "filesystem"),
|
|
418
|
+
"repo_url": str(meta.get("repo_url", "") or ""),
|
|
419
|
+
"repo_path": str(meta.get("repo_path", "") or ""),
|
|
420
|
+
"ref": str(meta.get("ref", "") or ""),
|
|
421
|
+
"workspace_dir": str(meta.get("workspace_dir", "") or ""),
|
|
422
|
+
"analysis_version": str(meta.get("analysis_version", "") or manifest.get("analysis_version", "") or ""),
|
|
423
|
+
"created_at": str(meta.get("created_at", "") or ""),
|
|
424
|
+
"last_accessed_at": str(meta.get("last_accessed_at", "") or ""),
|
|
425
|
+
"retention_days": int(meta.get("retention_days", policy.get("default_ttl_days", 14)) or 14),
|
|
426
|
+
"private_mode": bool(meta.get("private_mode", False)),
|
|
427
|
+
"size_bytes": _dir_size(cache_dir),
|
|
428
|
+
"has": _artifact_flags(cache_dir),
|
|
429
|
+
"expires": expires,
|
|
430
|
+
}
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
return rows
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _load_workspaces(base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
437
|
+
path = _workspaces_path(base_dir)
|
|
438
|
+
raw = _load_json(path, {})
|
|
439
|
+
if not isinstance(raw, dict):
|
|
440
|
+
return {"active_repo_hash": "", "repos": []}
|
|
441
|
+
repos = raw.get("repos") if isinstance(raw.get("repos"), list) else []
|
|
442
|
+
return {"active_repo_hash": str(raw.get("active_repo_hash", "") or ""), "repos": repos}
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _save_workspaces(data: Dict[str, Any], base_dir: Optional[str] = None) -> None:
|
|
446
|
+
payload = data if isinstance(data, dict) else {}
|
|
447
|
+
repos = payload.get("repos") if isinstance(payload.get("repos"), list) else []
|
|
448
|
+
payload = {
|
|
449
|
+
"active_repo_hash": str(payload.get("active_repo_hash", "") or ""),
|
|
450
|
+
"repos": _scrub_payload(repos),
|
|
451
|
+
}
|
|
452
|
+
if payload["active_repo_hash"] and not any(str((r or {}).get("repo_hash", "")) == payload["active_repo_hash"] for r in repos):
|
|
453
|
+
payload["active_repo_hash"] = ""
|
|
454
|
+
_atomic_json_write(_workspaces_path(base_dir), payload)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def _workspace_refcounts(base_dir: Optional[str] = None) -> Dict[str, int]:
|
|
458
|
+
refs: Dict[str, int] = {}
|
|
459
|
+
root = cache_root(base_dir)
|
|
460
|
+
ws_root = os.path.realpath(os.path.join(root, "workspaces"))
|
|
461
|
+
|
|
462
|
+
for item in list_caches(base_dir):
|
|
463
|
+
ws = str(item.get("workspace_dir", "") or "").strip()
|
|
464
|
+
if not ws:
|
|
465
|
+
continue
|
|
466
|
+
ws_real = os.path.realpath(ws)
|
|
467
|
+
try:
|
|
468
|
+
if os.path.commonpath([ws_root, ws_real]) != ws_root:
|
|
469
|
+
continue
|
|
470
|
+
except ValueError:
|
|
471
|
+
continue
|
|
472
|
+
refs[ws_real] = refs.get(ws_real, 0) + 1
|
|
473
|
+
|
|
474
|
+
ws = _load_workspaces(base_dir)
|
|
475
|
+
for repo in ws.get("repos", []):
|
|
476
|
+
path = str((repo or {}).get("path", "") or "").strip()
|
|
477
|
+
if not path:
|
|
478
|
+
continue
|
|
479
|
+
real = os.path.realpath(path)
|
|
480
|
+
try:
|
|
481
|
+
if os.path.commonpath([ws_root, real]) != ws_root:
|
|
482
|
+
continue
|
|
483
|
+
except ValueError:
|
|
484
|
+
continue
|
|
485
|
+
workspace_dir = os.path.dirname(real)
|
|
486
|
+
refs[workspace_dir] = refs.get(workspace_dir, 0) + 1
|
|
487
|
+
|
|
488
|
+
return refs
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def _on_rm_error(func, path, exc_info):
|
|
492
|
+
try:
|
|
493
|
+
os.chmod(path, 0o700)
|
|
494
|
+
except OSError:
|
|
495
|
+
pass
|
|
496
|
+
try:
|
|
497
|
+
func(path)
|
|
498
|
+
except Exception:
|
|
499
|
+
pass
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def _safe_rmtree(path: str, allowed_root: str) -> Tuple[bool, Optional[str]]:
|
|
503
|
+
if not path:
|
|
504
|
+
return False, None
|
|
505
|
+
if not os.path.exists(path):
|
|
506
|
+
return False, None
|
|
507
|
+
root_real = os.path.realpath(allowed_root)
|
|
508
|
+
target_real = os.path.realpath(path)
|
|
509
|
+
try:
|
|
510
|
+
if os.path.commonpath([root_real, target_real]) != root_real:
|
|
511
|
+
return False, "TARGET_OUTSIDE_ALLOWED_ROOT"
|
|
512
|
+
except ValueError:
|
|
513
|
+
return False, "TARGET_OUTSIDE_ALLOWED_ROOT"
|
|
514
|
+
try:
|
|
515
|
+
shutil.rmtree(target_real, onerror=_on_rm_error)
|
|
516
|
+
if os.path.exists(target_real):
|
|
517
|
+
return False, "DELETE_INCOMPLETE"
|
|
518
|
+
return True, None
|
|
519
|
+
except Exception as e:
|
|
520
|
+
return False, str(e)
|
|
521
|
+
|
|
522
|
+
|
|
523
|
+
def clear_cache(repo_hash: str, dry_run: bool = False, base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
524
|
+
repo_hash = str(repo_hash or "").strip()
|
|
525
|
+
root = cache_root(base_dir)
|
|
526
|
+
cache_dir = os.path.join(root, repo_hash)
|
|
527
|
+
meta = _load_metadata(repo_hash, base_dir=base_dir)
|
|
528
|
+
workspace_dir = str(meta.get("workspace_dir", "") or "")
|
|
529
|
+
|
|
530
|
+
would_delete: List[str] = []
|
|
531
|
+
workspace_preserved: List[str] = []
|
|
532
|
+
errors: List[str] = []
|
|
533
|
+
|
|
534
|
+
if os.path.isdir(cache_dir):
|
|
535
|
+
would_delete.append(os.path.abspath(cache_dir))
|
|
536
|
+
|
|
537
|
+
ws_refs = _workspace_refcounts(base_dir)
|
|
538
|
+
if workspace_dir and os.path.isdir(workspace_dir):
|
|
539
|
+
ws_real = os.path.realpath(workspace_dir)
|
|
540
|
+
if ws_refs.get(ws_real, 0) <= 1:
|
|
541
|
+
would_delete.append(os.path.abspath(workspace_dir))
|
|
542
|
+
else:
|
|
543
|
+
workspace_preserved.append(os.path.abspath(workspace_dir))
|
|
544
|
+
|
|
545
|
+
freed = sum(_dir_size(p) for p in would_delete if os.path.exists(p))
|
|
546
|
+
|
|
547
|
+
if dry_run:
|
|
548
|
+
return {
|
|
549
|
+
"ok": True,
|
|
550
|
+
"repo_hash": repo_hash,
|
|
551
|
+
"dry_run": True,
|
|
552
|
+
"deleted": False,
|
|
553
|
+
"cache_dir": cache_dir,
|
|
554
|
+
"workspace_dir": workspace_dir or None,
|
|
555
|
+
"would_delete": would_delete,
|
|
556
|
+
"workspace_preserved": workspace_preserved,
|
|
557
|
+
"freed_bytes_estimate": int(freed),
|
|
558
|
+
"errors": errors,
|
|
559
|
+
"message": "Dry run only",
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
deleted_any = False
|
|
563
|
+
if os.path.isdir(cache_dir):
|
|
564
|
+
ok, err = _safe_rmtree(cache_dir, root)
|
|
565
|
+
if ok:
|
|
566
|
+
deleted_any = True
|
|
567
|
+
elif err:
|
|
568
|
+
errors.append(f"cache_dir:{err}")
|
|
569
|
+
|
|
570
|
+
if workspace_dir and os.path.isdir(workspace_dir) and os.path.abspath(workspace_dir) in would_delete:
|
|
571
|
+
ws_root = os.path.join(root, "workspaces")
|
|
572
|
+
ok, err = _safe_rmtree(workspace_dir, ws_root)
|
|
573
|
+
if ok:
|
|
574
|
+
deleted_any = True
|
|
575
|
+
elif err:
|
|
576
|
+
errors.append(f"workspace_dir:{err}")
|
|
577
|
+
|
|
578
|
+
ws = _load_workspaces(base_dir)
|
|
579
|
+
repos = ws.get("repos", []) if isinstance(ws.get("repos"), list) else []
|
|
580
|
+
repos = [r for r in repos if str((r or {}).get("repo_hash", "") or "") != repo_hash]
|
|
581
|
+
ws["repos"] = repos
|
|
582
|
+
active = str(ws.get("active_repo_hash", "") or "")
|
|
583
|
+
if active == repo_hash:
|
|
584
|
+
ws["active_repo_hash"] = ""
|
|
585
|
+
_save_workspaces(ws, base_dir)
|
|
586
|
+
|
|
587
|
+
return {
|
|
588
|
+
"ok": True,
|
|
589
|
+
"repo_hash": repo_hash,
|
|
590
|
+
"dry_run": False,
|
|
591
|
+
"deleted": bool(deleted_any and not errors),
|
|
592
|
+
"cache_dir": cache_dir,
|
|
593
|
+
"workspace_dir": workspace_dir or None,
|
|
594
|
+
"would_delete": would_delete,
|
|
595
|
+
"workspace_preserved": workspace_preserved,
|
|
596
|
+
"freed_bytes_estimate": int(freed),
|
|
597
|
+
"errors": errors,
|
|
598
|
+
"message": "Deleted" if deleted_any and not errors else "Nothing deleted" if not would_delete else "Completed with warnings",
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def delete_repo(repo_hash: str, dry_run: bool = False, base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
603
|
+
return clear_cache(repo_hash=repo_hash, dry_run=dry_run, base_dir=base_dir)
|
|
604
|
+
|
|
605
|
+
|
|
606
|
+
def sweep_expired(
|
|
607
|
+
dry_run: bool = False,
|
|
608
|
+
base_dir: Optional[str] = None,
|
|
609
|
+
now: Optional[datetime] = None,
|
|
610
|
+
) -> Dict[str, Any]:
|
|
611
|
+
current = now or datetime.now(timezone.utc)
|
|
612
|
+
removed_hashes: List[str] = []
|
|
613
|
+
workspaces_removed: List[str] = []
|
|
614
|
+
would_delete: List[str] = []
|
|
615
|
+
errors: List[str] = []
|
|
616
|
+
freed = 0
|
|
617
|
+
|
|
618
|
+
for item in list_caches(base_dir):
|
|
619
|
+
exp = item.get("expires", {}) if isinstance(item.get("expires"), dict) else {}
|
|
620
|
+
if not bool(exp.get("expired", False)):
|
|
621
|
+
continue
|
|
622
|
+
repo_hash = str(item.get("repo_hash", "") or "")
|
|
623
|
+
if not repo_hash:
|
|
624
|
+
continue
|
|
625
|
+
result = clear_cache(repo_hash=repo_hash, dry_run=dry_run, base_dir=base_dir)
|
|
626
|
+
paths = [str(p) for p in result.get("would_delete", []) if str(p)]
|
|
627
|
+
would_delete.extend(paths)
|
|
628
|
+
freed += int(result.get("freed_bytes_estimate", 0) or 0)
|
|
629
|
+
if not result.get("errors"):
|
|
630
|
+
removed_hashes.append(repo_hash)
|
|
631
|
+
else:
|
|
632
|
+
errors.extend(result.get("errors", []))
|
|
633
|
+
for p in paths:
|
|
634
|
+
if os.path.basename(os.path.dirname(p)) == "workspaces":
|
|
635
|
+
workspaces_removed.append(p)
|
|
636
|
+
|
|
637
|
+
policy = load_policy(base_dir)
|
|
638
|
+
policy["last_cleanup_iso"] = current.isoformat()
|
|
639
|
+
save_policy(policy, base_dir=base_dir)
|
|
640
|
+
|
|
641
|
+
return {
|
|
642
|
+
"ok": True,
|
|
643
|
+
"dry_run": bool(dry_run),
|
|
644
|
+
"deleted": bool((not dry_run) and (not errors)),
|
|
645
|
+
"caches_removed": removed_hashes,
|
|
646
|
+
"workspaces_removed": workspaces_removed,
|
|
647
|
+
"would_delete": would_delete,
|
|
648
|
+
"freed_bytes_estimate": int(freed),
|
|
649
|
+
"errors": errors,
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def apply_retention(base_dir: Optional[str] = None, now: Optional[datetime] = None, dry_run: bool = False) -> Dict[str, Any]:
|
|
654
|
+
return sweep_expired(dry_run=dry_run, base_dir=base_dir, now=now)
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
def cleanup(dry_run: bool = False, base_dir: Optional[str] = None) -> Dict[str, Any]:
|
|
658
|
+
return sweep_expired(dry_run=dry_run, base_dir=base_dir)
|
|
659
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Path resolver utilities
|