claude-turing 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +48 -7
- package/commands/brief.md +13 -1
- package/commands/card.md +36 -0
- package/commands/init.md +13 -0
- package/commands/train.md +16 -7
- package/commands/turing.md +4 -2
- package/package.json +1 -1
- package/src/install.js +1 -1
- package/src/verify.js +1 -0
- package/templates/model_contract.md +49 -0
- package/templates/model_registry.yaml +69 -0
- package/templates/program.md +2 -0
- package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_model_card.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/cleanup.py +599 -0
- package/templates/scripts/cost_frontier.py +292 -0
- package/templates/scripts/diff_configs.py +534 -0
- package/templates/scripts/export_results.py +457 -0
- package/templates/scripts/generate_brief.py +54 -0
- package/templates/scripts/generate_model_card.py +342 -0
- package/templates/scripts/leaderboard.py +508 -0
- package/templates/scripts/plot_trajectory.py +611 -0
- package/templates/scripts/scaffold.py +9 -0
- package/templates/scripts/show_metrics.py +23 -2
- package/templates/tests/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/tests/__pycache__/conftest.cpython-314-pytest-9.0.2.pyc +0 -0
- package/templates/tests/__pycache__/test_cost_frontier.cpython-314-pytest-9.0.2.pyc +0 -0
- package/templates/tests/test_cost_frontier.py +222 -0
|
@@ -0,0 +1,599 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Archive old experiment branches and model artifacts to reclaim disk space.
|
|
3
|
+
|
|
4
|
+
Nothing is deleted. Experiment branches are preserved as lightweight git tags
|
|
5
|
+
(archive/exp-NNN) before the branch is removed. Model artifacts are moved to
|
|
6
|
+
models/_archive/ — not deleted.
|
|
7
|
+
|
|
8
|
+
Archival policy:
|
|
9
|
+
- Git branches: merged branches whose tip commit is older than N days get
|
|
10
|
+
tagged as archive/<branch-name> and then deleted locally.
|
|
11
|
+
- Model files: all files in models/ (excluding models/best/ and
|
|
12
|
+
models/_archive/) are sorted by modification time. The N most recent are
|
|
13
|
+
kept in place; the rest are moved to models/_archive/.
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
python scripts/cleanup.py # Archive with defaults
|
|
17
|
+
python scripts/cleanup.py --dry-run # Preview only (no changes)
|
|
18
|
+
python scripts/cleanup.py --keep-models 10 # Keep 10 most recent models
|
|
19
|
+
python scripts/cleanup.py --branch-age 60 # Archive branches older than 60 days
|
|
20
|
+
python scripts/cleanup.py --compress # Also compress models/_archive/
|
|
21
|
+
python scripts/cleanup.py --models-dir path/ # Override models directory
|
|
22
|
+
python scripts/cleanup.py --log-path path/ # Override experiments log path
|
|
23
|
+
|
|
24
|
+
Exit codes:
|
|
25
|
+
0 — success (or dry-run completed)
|
|
26
|
+
1 — fatal error (e.g. not in a git repo)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import argparse
|
|
32
|
+
import os
|
|
33
|
+
import shutil
|
|
34
|
+
import subprocess
|
|
35
|
+
import sys
|
|
36
|
+
import tarfile
|
|
37
|
+
from datetime import datetime, timezone, timedelta
|
|
38
|
+
from pathlib import Path
|
|
39
|
+
from typing import NamedTuple
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ---------------------------------------------------------------------------
|
|
43
|
+
# Data containers
|
|
44
|
+
# ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
class BranchInfo(NamedTuple):
|
|
47
|
+
name: str
|
|
48
|
+
commit_hash: str
|
|
49
|
+
commit_date: datetime
|
|
50
|
+
age_days: int
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ModelFileInfo(NamedTuple):
|
|
54
|
+
path: Path
|
|
55
|
+
mtime: datetime
|
|
56
|
+
size_bytes: int
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class ArchiveSummary(NamedTuple):
|
|
60
|
+
branches_archived: list[str]
|
|
61
|
+
branches_skipped: list[str]
|
|
62
|
+
models_moved: list[str]
|
|
63
|
+
bytes_reclaimed_by_move: int # always 0 — files are moved, not deleted
|
|
64
|
+
archive_compressed: bool
|
|
65
|
+
compressed_path: str | None
|
|
66
|
+
dry_run: bool
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# ---------------------------------------------------------------------------
|
|
70
|
+
# Git helpers
|
|
71
|
+
# ---------------------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
def _run_git(args: list[str], cwd: Path | None = None) -> str:
|
|
74
|
+
"""Run a git command and return stdout. Raises RuntimeError on failure."""
|
|
75
|
+
result = subprocess.run(
|
|
76
|
+
["git"] + args,
|
|
77
|
+
capture_output=True,
|
|
78
|
+
text=True,
|
|
79
|
+
cwd=str(cwd) if cwd else None,
|
|
80
|
+
)
|
|
81
|
+
if result.returncode != 0:
|
|
82
|
+
raise RuntimeError(
|
|
83
|
+
f"git {' '.join(args)} failed:\n{result.stderr.strip()}"
|
|
84
|
+
)
|
|
85
|
+
return result.stdout.strip()
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def find_git_root() -> Path | None:
|
|
89
|
+
"""Return the root of the current git repository, or None."""
|
|
90
|
+
try:
|
|
91
|
+
root = _run_git(["rev-parse", "--show-toplevel"])
|
|
92
|
+
return Path(root)
|
|
93
|
+
except RuntimeError:
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def get_merged_branches(git_root: Path) -> list[str]:
|
|
98
|
+
"""Return local branch names that have been merged into HEAD.
|
|
99
|
+
|
|
100
|
+
Excludes HEAD itself and the current branch.
|
|
101
|
+
"""
|
|
102
|
+
try:
|
|
103
|
+
raw = _run_git(["branch", "--merged", "HEAD", "--format=%(refname:short)"], cwd=git_root)
|
|
104
|
+
except RuntimeError:
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
current = ""
|
|
108
|
+
try:
|
|
109
|
+
current = _run_git(["branch", "--show-current"], cwd=git_root)
|
|
110
|
+
except RuntimeError:
|
|
111
|
+
pass
|
|
112
|
+
|
|
113
|
+
branches = []
|
|
114
|
+
for line in raw.splitlines():
|
|
115
|
+
name = line.strip()
|
|
116
|
+
if name and name != current and name not in ("main", "master", "HEAD"):
|
|
117
|
+
branches.append(name)
|
|
118
|
+
return branches
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def get_branch_commit_date(branch: str, git_root: Path) -> datetime | None:
|
|
122
|
+
"""Return the author date of the tip commit for the branch."""
|
|
123
|
+
try:
|
|
124
|
+
ts = _run_git(
|
|
125
|
+
["log", "-1", "--format=%aI", branch], cwd=git_root
|
|
126
|
+
)
|
|
127
|
+
if not ts:
|
|
128
|
+
return None
|
|
129
|
+
# ISO 8601 with timezone — fromisoformat handles this in Python 3.11+;
|
|
130
|
+
# fall back to manual parsing for older Pythons.
|
|
131
|
+
try:
|
|
132
|
+
return datetime.fromisoformat(ts)
|
|
133
|
+
except ValueError:
|
|
134
|
+
# Strip trailing timezone if parsing fails
|
|
135
|
+
return datetime.fromisoformat(ts[:19]).replace(tzinfo=timezone.utc)
|
|
136
|
+
except RuntimeError:
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def get_branch_commit_hash(branch: str, git_root: Path) -> str | None:
|
|
141
|
+
"""Return the full commit hash at the tip of a branch."""
|
|
142
|
+
try:
|
|
143
|
+
return _run_git(["rev-parse", branch], cwd=git_root)
|
|
144
|
+
except RuntimeError:
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def tag_exists(tag_name: str, git_root: Path) -> bool:
|
|
149
|
+
"""Return True if the given tag already exists in the repo."""
|
|
150
|
+
try:
|
|
151
|
+
_run_git(["rev-parse", "--verify", f"refs/tags/{tag_name}"], cwd=git_root)
|
|
152
|
+
return True
|
|
153
|
+
except RuntimeError:
|
|
154
|
+
return False
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def create_archive_tag(branch: str, commit_hash: str, git_root: Path) -> str:
|
|
158
|
+
"""Create a lightweight tag archive/<branch> pointing at commit_hash.
|
|
159
|
+
|
|
160
|
+
Returns the tag name. Raises RuntimeError on failure.
|
|
161
|
+
"""
|
|
162
|
+
tag_name = f"archive/{branch}"
|
|
163
|
+
if tag_exists(tag_name, git_root):
|
|
164
|
+
# Overwrite only if it already points to the same commit
|
|
165
|
+
existing_hash = _run_git(["rev-parse", tag_name], cwd=git_root)
|
|
166
|
+
if existing_hash == commit_hash:
|
|
167
|
+
return tag_name
|
|
168
|
+
# Different commit — append a counter to avoid collision
|
|
169
|
+
for i in range(2, 100):
|
|
170
|
+
candidate = f"archive/{branch}-{i}"
|
|
171
|
+
if not tag_exists(candidate, git_root):
|
|
172
|
+
tag_name = candidate
|
|
173
|
+
break
|
|
174
|
+
|
|
175
|
+
_run_git(["tag", tag_name, commit_hash], cwd=git_root)
|
|
176
|
+
return tag_name
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def delete_branch(branch: str, git_root: Path) -> None:
|
|
180
|
+
"""Delete a local branch. The branch must already be tagged."""
|
|
181
|
+
_run_git(["branch", "-d", branch], cwd=git_root)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
# ---------------------------------------------------------------------------
|
|
185
|
+
# Branch archival
|
|
186
|
+
# ---------------------------------------------------------------------------
|
|
187
|
+
|
|
188
|
+
def collect_stale_branches(
|
|
189
|
+
git_root: Path,
|
|
190
|
+
max_age_days: int,
|
|
191
|
+
) -> tuple[list[BranchInfo], list[BranchInfo]]:
|
|
192
|
+
"""Split merged branches into stale (archive candidates) and recent.
|
|
193
|
+
|
|
194
|
+
Returns (stale, recent).
|
|
195
|
+
"""
|
|
196
|
+
merged = get_merged_branches(git_root)
|
|
197
|
+
now = datetime.now(timezone.utc)
|
|
198
|
+
cutoff = now - timedelta(days=max_age_days)
|
|
199
|
+
|
|
200
|
+
stale: list[BranchInfo] = []
|
|
201
|
+
recent: list[BranchInfo] = []
|
|
202
|
+
|
|
203
|
+
for branch in merged:
|
|
204
|
+
commit_date = get_branch_commit_date(branch, git_root)
|
|
205
|
+
if commit_date is None:
|
|
206
|
+
continue
|
|
207
|
+
# Make commit_date timezone-aware if it isn't
|
|
208
|
+
if commit_date.tzinfo is None:
|
|
209
|
+
commit_date = commit_date.replace(tzinfo=timezone.utc)
|
|
210
|
+
age_days = (now - commit_date).days
|
|
211
|
+
commit_hash = get_branch_commit_hash(branch, git_root) or ""
|
|
212
|
+
info = BranchInfo(
|
|
213
|
+
name=branch,
|
|
214
|
+
commit_hash=commit_hash,
|
|
215
|
+
commit_date=commit_date,
|
|
216
|
+
age_days=age_days,
|
|
217
|
+
)
|
|
218
|
+
if commit_date < cutoff:
|
|
219
|
+
stale.append(info)
|
|
220
|
+
else:
|
|
221
|
+
recent.append(info)
|
|
222
|
+
|
|
223
|
+
stale.sort(key=lambda b: b.commit_date)
|
|
224
|
+
return stale, recent
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def archive_branches(
|
|
228
|
+
stale: list[BranchInfo],
|
|
229
|
+
git_root: Path,
|
|
230
|
+
dry_run: bool,
|
|
231
|
+
) -> tuple[list[str], list[str]]:
|
|
232
|
+
"""Archive stale branches: tag them and delete the branch ref.
|
|
233
|
+
|
|
234
|
+
Returns (archived_names, skipped_names).
|
|
235
|
+
"""
|
|
236
|
+
archived: list[str] = []
|
|
237
|
+
skipped: list[str] = []
|
|
238
|
+
|
|
239
|
+
for branch_info in stale:
|
|
240
|
+
if not branch_info.commit_hash:
|
|
241
|
+
print(
|
|
242
|
+
f" [skip] {branch_info.name} — could not resolve commit hash",
|
|
243
|
+
file=sys.stderr,
|
|
244
|
+
)
|
|
245
|
+
skipped.append(branch_info.name)
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
tag_name = f"archive/{branch_info.name}"
|
|
249
|
+
if dry_run:
|
|
250
|
+
print(f" [dry-run] Would tag {branch_info.name} -> {tag_name} and delete branch")
|
|
251
|
+
archived.append(branch_info.name)
|
|
252
|
+
continue
|
|
253
|
+
|
|
254
|
+
try:
|
|
255
|
+
actual_tag = create_archive_tag(
|
|
256
|
+
branch_info.name, branch_info.commit_hash, git_root
|
|
257
|
+
)
|
|
258
|
+
delete_branch(branch_info.name, git_root)
|
|
259
|
+
print(f" Archived branch: {branch_info.name} -> tag {actual_tag}")
|
|
260
|
+
archived.append(branch_info.name)
|
|
261
|
+
except RuntimeError as exc:
|
|
262
|
+
print(f" [skip] {branch_info.name} — {exc}", file=sys.stderr)
|
|
263
|
+
skipped.append(branch_info.name)
|
|
264
|
+
|
|
265
|
+
return archived, skipped
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
# ---------------------------------------------------------------------------
|
|
269
|
+
# Model artifact archival
|
|
270
|
+
# ---------------------------------------------------------------------------
|
|
271
|
+
|
|
272
|
+
def collect_model_files(models_dir: Path) -> list[ModelFileInfo]:
|
|
273
|
+
"""Return all model files directly under models_dir (non-recursive).
|
|
274
|
+
|
|
275
|
+
Excludes:
|
|
276
|
+
- models/best/ (current best — never touch)
|
|
277
|
+
- models/_archive/ (already archived)
|
|
278
|
+
- directories
|
|
279
|
+
"""
|
|
280
|
+
if not models_dir.exists():
|
|
281
|
+
return []
|
|
282
|
+
|
|
283
|
+
files: list[ModelFileInfo] = []
|
|
284
|
+
for entry in models_dir.iterdir():
|
|
285
|
+
if entry.is_dir():
|
|
286
|
+
continue
|
|
287
|
+
stat = entry.stat()
|
|
288
|
+
files.append(
|
|
289
|
+
ModelFileInfo(
|
|
290
|
+
path=entry,
|
|
291
|
+
mtime=datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc),
|
|
292
|
+
size_bytes=stat.st_size,
|
|
293
|
+
)
|
|
294
|
+
)
|
|
295
|
+
# Also collect from immediate subdirectories that are NOT best/ or _archive/
|
|
296
|
+
for subdir in sorted(models_dir.iterdir()):
|
|
297
|
+
if not subdir.is_dir():
|
|
298
|
+
continue
|
|
299
|
+
if subdir.name in ("best", "_archive"):
|
|
300
|
+
continue
|
|
301
|
+
for entry in subdir.rglob("*"):
|
|
302
|
+
if entry.is_file():
|
|
303
|
+
stat = entry.stat()
|
|
304
|
+
files.append(
|
|
305
|
+
ModelFileInfo(
|
|
306
|
+
path=entry,
|
|
307
|
+
mtime=datetime.fromtimestamp(stat.st_mtime, tz=timezone.utc),
|
|
308
|
+
size_bytes=stat.st_size,
|
|
309
|
+
)
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# Sort newest first
|
|
313
|
+
files.sort(key=lambda f: f.mtime, reverse=True)
|
|
314
|
+
return files
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def archive_models(
|
|
318
|
+
models_dir: Path,
|
|
319
|
+
keep_n: int,
|
|
320
|
+
dry_run: bool,
|
|
321
|
+
) -> list[str]:
|
|
322
|
+
"""Move old model files to models/_archive/.
|
|
323
|
+
|
|
324
|
+
Keeps the N most-recently-modified files in place.
|
|
325
|
+
Returns list of moved file paths (relative to models_dir).
|
|
326
|
+
"""
|
|
327
|
+
all_files = collect_model_files(models_dir)
|
|
328
|
+
|
|
329
|
+
if not all_files:
|
|
330
|
+
return []
|
|
331
|
+
|
|
332
|
+
# Files to keep (newest N) vs. archive (the rest)
|
|
333
|
+
to_keep = all_files[:keep_n]
|
|
334
|
+
to_archive = all_files[keep_n:]
|
|
335
|
+
|
|
336
|
+
if not to_archive:
|
|
337
|
+
return []
|
|
338
|
+
|
|
339
|
+
archive_dir = models_dir / "_archive"
|
|
340
|
+
moved: list[str] = []
|
|
341
|
+
|
|
342
|
+
for file_info in to_archive:
|
|
343
|
+
rel = file_info.path.relative_to(models_dir)
|
|
344
|
+
dest = archive_dir / rel
|
|
345
|
+
|
|
346
|
+
if dry_run:
|
|
347
|
+
print(f" [dry-run] Would move models/{rel} -> models/_archive/{rel}")
|
|
348
|
+
moved.append(str(rel))
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
352
|
+
try:
|
|
353
|
+
shutil.move(str(file_info.path), str(dest))
|
|
354
|
+
print(f" Archived model: models/{rel} -> models/_archive/{rel}")
|
|
355
|
+
moved.append(str(rel))
|
|
356
|
+
except OSError as exc:
|
|
357
|
+
print(f" [skip] {file_info.path} — {exc}", file=sys.stderr)
|
|
358
|
+
|
|
359
|
+
if to_keep:
|
|
360
|
+
print(f" Kept {len(to_keep)} most recent model(s) in models/:")
|
|
361
|
+
for fi in to_keep:
|
|
362
|
+
rel = fi.path.relative_to(models_dir)
|
|
363
|
+
print(f" models/{rel} ({fi.mtime.strftime('%Y-%m-%d')})")
|
|
364
|
+
|
|
365
|
+
return moved
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
# ---------------------------------------------------------------------------
|
|
369
|
+
# Compression
|
|
370
|
+
# ---------------------------------------------------------------------------
|
|
371
|
+
|
|
372
|
+
def compress_archive(models_dir: Path, dry_run: bool) -> str | None:
|
|
373
|
+
"""Tar.gz the models/_archive/ directory.
|
|
374
|
+
|
|
375
|
+
The archive is placed at models/_archive.tar.gz (alongside models/).
|
|
376
|
+
Returns the path to the .tar.gz, or None if nothing to compress.
|
|
377
|
+
"""
|
|
378
|
+
archive_dir = models_dir / "_archive"
|
|
379
|
+
if not archive_dir.exists() or not any(archive_dir.rglob("*")):
|
|
380
|
+
print(" Nothing in models/_archive/ to compress.")
|
|
381
|
+
return None
|
|
382
|
+
|
|
383
|
+
out_path = models_dir.parent / "models_archive.tar.gz"
|
|
384
|
+
|
|
385
|
+
if dry_run:
|
|
386
|
+
print(f" [dry-run] Would compress models/_archive/ -> {out_path}")
|
|
387
|
+
return str(out_path)
|
|
388
|
+
|
|
389
|
+
try:
|
|
390
|
+
with tarfile.open(str(out_path), "w:gz") as tar:
|
|
391
|
+
tar.add(str(archive_dir), arcname="_archive")
|
|
392
|
+
size_mb = out_path.stat().st_size / (1024 * 1024)
|
|
393
|
+
print(f" Compressed models/_archive/ -> {out_path} ({size_mb:.1f} MB)")
|
|
394
|
+
return str(out_path)
|
|
395
|
+
except OSError as exc:
|
|
396
|
+
print(f" [warn] Compression failed: {exc}", file=sys.stderr)
|
|
397
|
+
return None
|
|
398
|
+
|
|
399
|
+
|
|
400
|
+
# ---------------------------------------------------------------------------
|
|
401
|
+
# Reporting
|
|
402
|
+
# ---------------------------------------------------------------------------
|
|
403
|
+
|
|
404
|
+
def print_summary(summary: ArchiveSummary) -> None:
|
|
405
|
+
"""Print a human-readable archive summary."""
|
|
406
|
+
mode = "DRY RUN — " if summary.dry_run else ""
|
|
407
|
+
print()
|
|
408
|
+
print(f"{'=' * 60}")
|
|
409
|
+
print(f" {mode}Archive Summary")
|
|
410
|
+
print(f"{'=' * 60}")
|
|
411
|
+
|
|
412
|
+
# Branches
|
|
413
|
+
print(f"\n Git branches:")
|
|
414
|
+
if summary.branches_archived:
|
|
415
|
+
print(f" Archived : {len(summary.branches_archived)}")
|
|
416
|
+
for name in summary.branches_archived:
|
|
417
|
+
print(f" - {name} -> archive/{name}")
|
|
418
|
+
else:
|
|
419
|
+
print(" No branches archived.")
|
|
420
|
+
if summary.branches_skipped:
|
|
421
|
+
print(f" Skipped : {len(summary.branches_skipped)} (see warnings above)")
|
|
422
|
+
|
|
423
|
+
# Models
|
|
424
|
+
print(f"\n Model artifacts:")
|
|
425
|
+
if summary.models_moved:
|
|
426
|
+
print(f" Moved to _archive/: {len(summary.models_moved)}")
|
|
427
|
+
for name in summary.models_moved:
|
|
428
|
+
print(f" - {name}")
|
|
429
|
+
else:
|
|
430
|
+
print(" No model files needed archiving.")
|
|
431
|
+
|
|
432
|
+
# Compression
|
|
433
|
+
print(f"\n Compression:")
|
|
434
|
+
if summary.archive_compressed and summary.compressed_path:
|
|
435
|
+
print(f" Archive written to: {summary.compressed_path}")
|
|
436
|
+
elif summary.archive_compressed and not summary.compressed_path:
|
|
437
|
+
print(" Compression requested but no archive content found.")
|
|
438
|
+
else:
|
|
439
|
+
print(" Not requested (use --compress to enable).")
|
|
440
|
+
|
|
441
|
+
print()
|
|
442
|
+
if summary.dry_run:
|
|
443
|
+
print(" NOTE: Dry-run mode — no changes were made.")
|
|
444
|
+
print(f"{'=' * 60}")
|
|
445
|
+
print()
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
# ---------------------------------------------------------------------------
|
|
449
|
+
# CLI
|
|
450
|
+
# ---------------------------------------------------------------------------
|
|
451
|
+
|
|
452
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
453
|
+
parser = argparse.ArgumentParser(
|
|
454
|
+
description=(
|
|
455
|
+
"Archive old experiment branches and model artifacts.\n\n"
|
|
456
|
+
"Branches are preserved as git tags (archive/<name>) before "
|
|
457
|
+
"deletion. Models are MOVED to models/_archive/ — never deleted."
|
|
458
|
+
),
|
|
459
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
460
|
+
)
|
|
461
|
+
parser.add_argument(
|
|
462
|
+
"--dry-run",
|
|
463
|
+
action="store_true",
|
|
464
|
+
default=False,
|
|
465
|
+
help="Show what WOULD be archived without making any changes.",
|
|
466
|
+
)
|
|
467
|
+
parser.add_argument(
|
|
468
|
+
"--keep-models",
|
|
469
|
+
type=int,
|
|
470
|
+
default=5,
|
|
471
|
+
metavar="N",
|
|
472
|
+
help="Number of most-recent model files to keep in place (default: 5).",
|
|
473
|
+
)
|
|
474
|
+
parser.add_argument(
|
|
475
|
+
"--branch-age",
|
|
476
|
+
type=int,
|
|
477
|
+
default=30,
|
|
478
|
+
metavar="DAYS",
|
|
479
|
+
help="Archive merged branches whose tip commit is older than DAYS days (default: 30).",
|
|
480
|
+
)
|
|
481
|
+
parser.add_argument(
|
|
482
|
+
"--compress",
|
|
483
|
+
action="store_true",
|
|
484
|
+
default=False,
|
|
485
|
+
help="Tar.gz models/_archive/ after archiving.",
|
|
486
|
+
)
|
|
487
|
+
parser.add_argument(
|
|
488
|
+
"--models-dir",
|
|
489
|
+
default="models",
|
|
490
|
+
metavar="PATH",
|
|
491
|
+
help="Path to the models directory (default: models/).",
|
|
492
|
+
)
|
|
493
|
+
parser.add_argument(
|
|
494
|
+
"--no-branches",
|
|
495
|
+
action="store_true",
|
|
496
|
+
default=False,
|
|
497
|
+
help="Skip branch archival entirely.",
|
|
498
|
+
)
|
|
499
|
+
parser.add_argument(
|
|
500
|
+
"--no-models",
|
|
501
|
+
action="store_true",
|
|
502
|
+
default=False,
|
|
503
|
+
help="Skip model archival entirely.",
|
|
504
|
+
)
|
|
505
|
+
return parser
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def main() -> None:
|
|
509
|
+
"""CLI entry point."""
|
|
510
|
+
parser = build_parser()
|
|
511
|
+
args = parser.parse_args()
|
|
512
|
+
|
|
513
|
+
if args.dry_run:
|
|
514
|
+
print("[DRY RUN] No changes will be made.\n")
|
|
515
|
+
|
|
516
|
+
# -----------------------------------------------------------------------
|
|
517
|
+
# Git root
|
|
518
|
+
# -----------------------------------------------------------------------
|
|
519
|
+
git_root = find_git_root()
|
|
520
|
+
if git_root is None:
|
|
521
|
+
print(
|
|
522
|
+
"Error: Not inside a git repository. Cannot archive branches.",
|
|
523
|
+
file=sys.stderr,
|
|
524
|
+
)
|
|
525
|
+
# We can still archive models — don't hard-fail if only models are requested
|
|
526
|
+
if not args.no_branches:
|
|
527
|
+
sys.exit(1)
|
|
528
|
+
|
|
529
|
+
# -----------------------------------------------------------------------
|
|
530
|
+
# Resolve models directory relative to cwd (or git root if available)
|
|
531
|
+
# -----------------------------------------------------------------------
|
|
532
|
+
base_dir = git_root if git_root is not None else Path.cwd()
|
|
533
|
+
models_dir = (base_dir / args.models_dir).resolve()
|
|
534
|
+
|
|
535
|
+
# -----------------------------------------------------------------------
|
|
536
|
+
# Branch archival
|
|
537
|
+
# -----------------------------------------------------------------------
|
|
538
|
+
branches_archived: list[str] = []
|
|
539
|
+
branches_skipped: list[str] = []
|
|
540
|
+
|
|
541
|
+
if not args.no_branches and git_root is not None:
|
|
542
|
+
print(f"Scanning for merged branches older than {args.branch_age} day(s)...")
|
|
543
|
+
stale, recent = collect_stale_branches(git_root, args.branch_age)
|
|
544
|
+
|
|
545
|
+
if not stale and not recent:
|
|
546
|
+
print(" No merged branches found (besides main/master).")
|
|
547
|
+
else:
|
|
548
|
+
print(f" Found {len(stale)} stale + {len(recent)} recent merged branch(es).")
|
|
549
|
+
|
|
550
|
+
if stale:
|
|
551
|
+
archived, skipped = archive_branches(stale, git_root, args.dry_run)
|
|
552
|
+
branches_archived.extend(archived)
|
|
553
|
+
branches_skipped.extend(skipped)
|
|
554
|
+
else:
|
|
555
|
+
print(f" No merged branches older than {args.branch_age} days to archive.")
|
|
556
|
+
|
|
557
|
+
# -----------------------------------------------------------------------
|
|
558
|
+
# Model archival
|
|
559
|
+
# -----------------------------------------------------------------------
|
|
560
|
+
models_moved: list[str] = []
|
|
561
|
+
|
|
562
|
+
if not args.no_models:
|
|
563
|
+
if not models_dir.exists():
|
|
564
|
+
print(f"\nModels directory '{models_dir}' does not exist — skipping model archival.")
|
|
565
|
+
else:
|
|
566
|
+
print(f"\nScanning {models_dir} for model files (keeping {args.keep_models} most recent)...")
|
|
567
|
+
all_files = collect_model_files(models_dir)
|
|
568
|
+
if not all_files:
|
|
569
|
+
print(" No model files found outside best/ and _archive/.")
|
|
570
|
+
else:
|
|
571
|
+
print(f" Found {len(all_files)} model file(s).")
|
|
572
|
+
moved = archive_models(models_dir, args.keep_models, args.dry_run)
|
|
573
|
+
models_moved.extend(moved)
|
|
574
|
+
|
|
575
|
+
# -----------------------------------------------------------------------
|
|
576
|
+
# Compression
|
|
577
|
+
# -----------------------------------------------------------------------
|
|
578
|
+
compressed_path: str | None = None
|
|
579
|
+
if args.compress:
|
|
580
|
+
print("\nCompressing models/_archive/...")
|
|
581
|
+
compressed_path = compress_archive(models_dir, args.dry_run)
|
|
582
|
+
|
|
583
|
+
# -----------------------------------------------------------------------
|
|
584
|
+
# Summary
|
|
585
|
+
# -----------------------------------------------------------------------
|
|
586
|
+
summary = ArchiveSummary(
|
|
587
|
+
branches_archived=branches_archived,
|
|
588
|
+
branches_skipped=branches_skipped,
|
|
589
|
+
models_moved=models_moved,
|
|
590
|
+
bytes_reclaimed_by_move=0, # files moved, not deleted — disk usage unchanged
|
|
591
|
+
archive_compressed=args.compress,
|
|
592
|
+
compressed_path=compressed_path,
|
|
593
|
+
dry_run=args.dry_run,
|
|
594
|
+
)
|
|
595
|
+
print_summary(summary)
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
if __name__ == "__main__":
|
|
599
|
+
main()
|