autoresearchstudio 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ *.pyc
2
+ __pycache__/
3
+ db.sqlite3
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: autoresearchstudio
3
+ Version: 0.1.0
4
+ Summary: Generalized autonomous ML research framework
5
+ License-Expression: MIT
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: pyyaml>=6.0
8
+ Requires-Dist: requests>=2.28
9
+ Description-Content-Type: text/markdown
10
+
11
+ # autoresearchstudio
12
+
13
+ Generalized autonomous ML research framework. Inspired by [Karpathy's autoresearch](https://github.com/karpathy/autoresearch).
14
+
15
+ ## Install
16
+
17
+ ```bash
18
+ pip install autoresearchstudio
19
+ ```
20
+
21
+ ## Quick start
22
+
23
+ ```bash
24
+ # Initialize a project (use --from-template karpathy for LLM pretraining)
25
+ ars init
26
+
27
+ # Edit autoresearch.yaml to match your project
28
+
29
+ # Start a run
30
+ ars setup --tag mar21
31
+
32
+ # The AI agent then uses these commands in a loop:
33
+ ars run --description "baseline"
34
+ ars log --description "baseline"
35
+ ars judge
36
+ ```
@@ -0,0 +1,26 @@
1
+ # autoresearchstudio
2
+
3
+ Generalized autonomous ML research framework. Inspired by [Karpathy's autoresearch](https://github.com/karpathy/autoresearch).
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install autoresearchstudio
9
+ ```
10
+
11
+ ## Quick start
12
+
13
+ ```bash
14
+ # Initialize a project (use --from-template karpathy for LLM pretraining)
15
+ ars init
16
+
17
+ # Edit autoresearch.yaml to match your project
18
+
19
+ # Start a run
20
+ ars setup --tag mar21
21
+
22
+ # The AI agent then uses these commands in a loop:
23
+ ars run --description "baseline"
24
+ ars log --description "baseline"
25
+ ars judge
26
+ ```
@@ -0,0 +1,18 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "autoresearchstudio"
7
+ version = "0.1.0"
8
+ description = "Generalized autonomous ML research framework"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ dependencies = [
13
+ "pyyaml>=6.0",
14
+ "requests>=2.28",
15
+ ]
16
+
17
+ [project.scripts]
18
+ ars = "autoresearchstudio.cli:main"
@@ -0,0 +1,3 @@
1
+ """autoresearchstudio - Generalized autonomous ML research framework."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,600 @@
1
+ """CLI entry point: all `ars` subcommands."""
2
+
3
+ import argparse
4
+ import os
5
+ import re
6
+ import subprocess
7
+ import sys
8
+ from datetime import datetime, timezone
9
+
10
+ from . import __version__
11
+ from .config import (
12
+ Config, load_config, validate_config, config_to_yaml,
13
+ CONFIG_FILENAME, ProjectConfig, FilesConfig, ExperimentConfig,
14
+ MetricConfig, JudgeConfig, ApiConfig, SecondaryMetric,
15
+ )
16
+ from .judge import Judge
17
+ from .prompt import generate_program_md
18
+ from .runner import run_experiment, read_log_tail
19
+ from .tracker import Tracker
20
+
21
+
22
+ # ---------------------------------------------------------------------------
23
+ # Helpers
24
+ # ---------------------------------------------------------------------------
25
+
26
+ def _git(*args, check=True, capture=True) -> str:
27
+ """Run a git command. Returns stdout as string."""
28
+ result = subprocess.run(
29
+ ["git"] + list(args),
30
+ capture_output=capture,
31
+ text=True,
32
+ check=check,
33
+ )
34
+ return result.stdout.strip() if capture else ""
35
+
36
+
37
+ def _git_commit_hash() -> str:
38
+ try:
39
+ return _git("rev-parse", "--short=7", "HEAD")
40
+ except subprocess.CalledProcessError:
41
+ return ""
42
+
43
+
44
+ def _git_branch() -> str:
45
+ try:
46
+ return _git("rev-parse", "--abbrev-ref", "HEAD")
47
+ except subprocess.CalledProcessError:
48
+ return ""
49
+
50
+
51
+ def _git_diff_from_parent() -> str:
52
+ try:
53
+ return _git("diff", "HEAD~1", "HEAD")
54
+ except subprocess.CalledProcessError:
55
+ return ""
56
+
57
+
58
+ def _extract_metric(log_file: str, pattern: str) -> float | None:
59
+ """Extract a metric value from a log file using a regex pattern."""
60
+ try:
61
+ with open(log_file) as f:
62
+ for line in f:
63
+ m = re.search(pattern, line)
64
+ if m:
65
+ return float(m.group(1))
66
+ except (FileNotFoundError, ValueError):
67
+ pass
68
+ return None
69
+
70
+
71
+ def _load_config_or_exit() -> Config:
72
+ if not os.path.exists(CONFIG_FILENAME):
73
+ print(f"Error: {CONFIG_FILENAME} not found. Run `ars init` first.")
74
+ sys.exit(1)
75
+ return load_config()
76
+
77
+
78
+ def _load_tracker(config: Config) -> Tracker:
79
+ return Tracker(config)
80
+
81
+
82
+ def _get_run_tag(tracker: Tracker) -> str:
83
+ tag = tracker.local.get_meta("run_tag")
84
+ if not tag:
85
+ print("Error: no active run. Run `ars setup --tag <tag>` first.")
86
+ sys.exit(1)
87
+ return tag
88
+
89
+
90
+ # ---------------------------------------------------------------------------
91
+ # Commands
92
+ # ---------------------------------------------------------------------------
93
+
94
+ def cmd_init(args):
95
+ """Initialize a new autoresearch project."""
96
+ if os.path.exists(CONFIG_FILENAME) and not args.force:
97
+ print(f"{CONFIG_FILENAME} already exists. Use --force to overwrite.")
98
+ sys.exit(1)
99
+
100
+ if args.from_template == "karpathy":
101
+ config = Config(
102
+ project=ProjectConfig(
103
+ name="autoresearch",
104
+ description="Autonomous pretraining research — generalized from Karpathy's autoresearch.",
105
+ goal="Get the lowest val_bpb.",
106
+ ),
107
+ files=FilesConfig(
108
+ editable=["train.py"],
109
+ readonly=["prepare.py"],
110
+ context=["README.md"],
111
+ ),
112
+ experiment=ExperimentConfig(
113
+ run_command="uv run train.py",
114
+ timeout=600,
115
+ setup_command="uv run prepare.py",
116
+ constraints=(
117
+ "VRAM is a soft constraint. Some increase is acceptable for "
118
+ "meaningful val_bpb gains, but it should not blow up dramatically."
119
+ ),
120
+ ),
121
+ metric=MetricConfig(
122
+ name="val_bpb",
123
+ pattern=r"^val_bpb:\s+([\d.]+)",
124
+ direction="minimize",
125
+ secondary=[
126
+ SecondaryMetric(name="peak_vram_mb", pattern=r"^peak_vram_mb:\s+([\d.]+)"),
127
+ SecondaryMetric(name="mfu_percent", pattern=r"^mfu_percent:\s+([\d.]+)"),
128
+ ],
129
+ ),
130
+ judge=JudgeConfig(
131
+ threshold=0.0,
132
+ simplicity_note=(
133
+ "**Simplicity criterion**: All else being equal, simpler is better. "
134
+ "A small improvement that adds ugly complexity is not worth it. "
135
+ "Conversely, removing something and getting equal or better results "
136
+ "is a great outcome."
137
+ ),
138
+ ),
139
+ )
140
+ else:
141
+ config = Config()
142
+
143
+ yaml_str = config_to_yaml(config)
144
+ with open(CONFIG_FILENAME, "w") as f:
145
+ f.write(yaml_str)
146
+
147
+ print(f"Created {CONFIG_FILENAME}")
148
+
149
+ # Generate program.md
150
+ program = generate_program_md(config)
151
+ with open("program.md", "w") as f:
152
+ f.write(program)
153
+ print("Created program.md")
154
+
155
+ print("\nNext steps:")
156
+ print(" 1. Edit autoresearch.yaml to match your project")
157
+ print(" 2. Run `ars setup --tag <tag>` to start a run")
158
+
159
+
160
+ def cmd_setup(args):
161
+ """Set up a fresh experiment run."""
162
+ config = _load_config_or_exit()
163
+ errors = validate_config(config)
164
+ if errors:
165
+ print("Config validation errors:")
166
+ for e in errors:
167
+ print(f" - {e}")
168
+ sys.exit(1)
169
+
170
+ tag = args.tag
171
+ branch = f"autoresearch/{tag}"
172
+
173
+ # Check branch doesn't exist
174
+ if not args.skip_branch:
175
+ result = subprocess.run(
176
+ ["git", "rev-parse", "--verify", branch],
177
+ capture_output=True, text=True,
178
+ )
179
+ if result.returncode == 0:
180
+ print(f"Error: branch {branch} already exists. Pick a different tag.")
181
+ sys.exit(1)
182
+
183
+ _git("checkout", "-b", branch)
184
+ print(f"Created branch: {branch}")
185
+
186
+ # Init tracker
187
+ tracker = _load_tracker(config)
188
+ tracker.local.set_meta("run_tag", tag)
189
+ tracker.local.set_meta("project_name", config.project.name)
190
+
191
+ # Run setup command if configured
192
+ if config.experiment.setup_command and not args.skip_setup:
193
+ print(f"Running setup: {config.experiment.setup_command}")
194
+ result = subprocess.run(config.experiment.setup_command, shell=True)
195
+ if result.returncode != 0:
196
+ print("Warning: setup command exited with non-zero status.")
197
+
198
+ # Regenerate program.md with current config
199
+ program = generate_program_md(config)
200
+ with open("program.md", "w") as f:
201
+ f.write(program)
202
+
203
+ tracker.close()
204
+
205
+ print(f"\nSetup complete!")
206
+ print(f" Branch: {branch}")
207
+ print(f" Metric: {config.metric.name} ({config.metric.direction})")
208
+ print(f" Run command: {config.experiment.run_command}")
209
+ print(f" Timeout: {config.experiment.timeout}s")
210
+ print(f"\nReady. Run baseline with: ars run --description \"baseline\"")
211
+
212
+
213
+ def cmd_run(args):
214
+ """Execute an experiment with timeout and output capture."""
215
+ config = _load_config_or_exit()
216
+ tracker = _load_tracker(config)
217
+ run_tag = _get_run_tag(tracker)
218
+
219
+ command = args.run_command or config.experiment.run_command
220
+ timeout = args.timeout or config.experiment.timeout
221
+ log_file = config.experiment.log_file
222
+ description = args.description or ""
223
+
224
+ # Create experiment record
225
+ exp_num = tracker.local.next_experiment_number(run_tag)
226
+ commit_hash = _git_commit_hash()
227
+ parent_hash = ""
228
+ try:
229
+ parent_hash = _git("rev-parse", "--short=7", "HEAD~1")
230
+ except subprocess.CalledProcessError:
231
+ pass
232
+
233
+ exp_id = tracker.create_experiment(
234
+ run_tag=run_tag,
235
+ experiment_number=exp_num,
236
+ commit_hash=commit_hash,
237
+ parent_commit_hash=parent_hash,
238
+ status="running",
239
+ description=description,
240
+ started_at=datetime.now(timezone.utc).isoformat(),
241
+ )
242
+
243
+ print(f"Run #{exp_num} started | commit: {commit_hash} | timeout: {timeout}s")
244
+ print(f"Command: {command}")
245
+ print(f"Log: {log_file}")
246
+
247
+ # Execute
248
+ result = run_experiment(command, log_file, timeout)
249
+
250
+ # Update record
251
+ now = datetime.now(timezone.utc).isoformat()
252
+ if result.timed_out:
253
+ status = "crash"
254
+ reason = "timeout"
255
+ tail = read_log_tail(log_file)
256
+ tracker.update_experiment(exp_id,
257
+ status=status,
258
+ duration_seconds=result.duration_seconds,
259
+ finished_at=now,
260
+ stdout_tail=tail)
261
+ print(f"\nRun #{exp_num} TIMEOUT after {result.duration_seconds:.0f}s")
262
+ elif result.exit_code != 0:
263
+ status = "crash"
264
+ tail = read_log_tail(log_file)
265
+ tracker.update_experiment(exp_id,
266
+ status=status,
267
+ duration_seconds=result.duration_seconds,
268
+ finished_at=now,
269
+ stdout_tail=tail)
270
+ print(f"\nRun #{exp_num} CRASHED (exit code {result.exit_code}) after {result.duration_seconds:.0f}s")
271
+ print(f"Tail of log:\n{tail[-500:]}")
272
+ else:
273
+ tracker.update_experiment(exp_id,
274
+ duration_seconds=result.duration_seconds,
275
+ finished_at=now)
276
+ print(f"\nRun #{exp_num} finished in {result.duration_seconds:.0f}s | exit code: 0")
277
+
278
+ tracker.sync(exp_id)
279
+ tracker.close()
280
+
281
+
282
+ def cmd_log(args):
283
+ """Extract metrics from run output and record results."""
284
+ config = _load_config_or_exit()
285
+ tracker = _load_tracker(config)
286
+
287
+ latest = tracker.get_latest()
288
+ if not latest:
289
+ print("Error: no experiments found. Run `ars run` first.")
290
+ sys.exit(1)
291
+
292
+ log_file = args.file or config.experiment.log_file
293
+ description = args.description
294
+
295
+ # Extract primary metric
296
+ if args.metric is not None:
297
+ metric_value = args.metric
298
+ else:
299
+ metric_value = _extract_metric(log_file, config.metric.pattern)
300
+
301
+ # Extract secondary metrics
302
+ secondary = {}
303
+ for sm in config.metric.secondary:
304
+ val = _extract_metric(log_file, sm.pattern)
305
+ if val is not None:
306
+ secondary[sm.name] = val
307
+
308
+ # Extract diff
309
+ diff = _git_diff_from_parent()
310
+
311
+ # Update record
312
+ updates = {
313
+ "metric_value": metric_value,
314
+ "secondary_metrics": secondary,
315
+ "diff": diff,
316
+ }
317
+ if description:
318
+ updates["description"] = description
319
+
320
+ tracker.update_experiment(latest.id, **updates)
321
+ tracker.sync(latest.id)
322
+
323
+ # Print results
324
+ print(f"Experiment #{latest.experiment_number}:")
325
+ if metric_value is not None:
326
+ print(f" {config.metric.name}: {metric_value:.6f}")
327
+ else:
328
+ print(f" {config.metric.name}: NOT FOUND (crash?)")
329
+ for name, val in secondary.items():
330
+ print(f" {name}: {val:.1f}")
331
+ if description:
332
+ print(f" Description: {description}")
333
+
334
+ tracker.close()
335
+
336
+
337
+ def cmd_judge(args):
338
+ """Decide keep/discard based on metric comparison with current best."""
339
+ config = _load_config_or_exit()
340
+ tracker = _load_tracker(config)
341
+
342
+ latest = tracker.get_latest()
343
+ if not latest:
344
+ print("Error: no experiments found.")
345
+ sys.exit(1)
346
+
347
+ judge = Judge(config, tracker)
348
+
349
+ # Force overrides
350
+ if args.force_keep:
351
+ tracker.update_experiment(latest.id, status="keep")
352
+ tracker.sync(latest.id)
353
+ print(f"KEEP (forced): experiment #{latest.experiment_number}")
354
+ tracker.close()
355
+ return
356
+
357
+ if args.force_discard:
358
+ tracker.update_experiment(latest.id, status="discard")
359
+ try:
360
+ _git("reset", "--hard", "HEAD~1")
361
+ reverted_to = _git_commit_hash()
362
+ print(f"DISCARD (forced): experiment #{latest.experiment_number}")
363
+ print(f"Reverted to {reverted_to}")
364
+ except subprocess.CalledProcessError:
365
+ print(f"DISCARD (forced): experiment #{latest.experiment_number}")
366
+ print("Warning: git reset failed")
367
+ tracker.sync(latest.id)
368
+ tracker.close()
369
+ return
370
+
371
+ # Reload latest with metric values
372
+ latest = tracker.local.get_experiment(latest.id)
373
+
374
+ result = judge.evaluate(latest)
375
+ judge.apply(latest, result)
376
+
377
+ # Print result
378
+ label = result.decision.upper()
379
+ if result.is_baseline:
380
+ print(f"{label} (baseline): {result.reason}")
381
+ elif result.decision == "keep":
382
+ print(f"{label}: {result.reason}")
383
+ elif result.decision == "discard":
384
+ reverted_to = _git_commit_hash()
385
+ print(f"{label}: {result.reason}")
386
+ print(f"Reverted to {reverted_to}")
387
+ elif result.decision == "crash":
388
+ reverted_to = _git_commit_hash()
389
+ print(f"{label}: {result.reason}")
390
+ print(f"Reverted to {reverted_to}")
391
+
392
+ tracker.close()
393
+
394
+
395
+ def cmd_results(args):
396
+ """Show results table."""
397
+ config = _load_config_or_exit()
398
+ tracker = _load_tracker(config)
399
+
400
+ experiments = tracker.get_all(status=args.status, last_n=args.last)
401
+
402
+ if not experiments:
403
+ print("No experiments recorded yet.")
404
+ tracker.close()
405
+ return
406
+
407
+ fmt = args.format
408
+
409
+ if fmt == "tsv":
410
+ print("commit\t{}\tmemory_gb\tstatus\tdescription".format(config.metric.name))
411
+ for e in experiments:
412
+ metric_str = f"{e.metric_value:.6f}" if e.metric_value is not None else "0.000000"
413
+ mem = e.secondary_metrics.get("peak_vram_mb", 0)
414
+ mem_gb = mem / 1024 if mem else 0.0
415
+ print(f"{e.commit_hash}\t{metric_str}\t{mem_gb:.1f}\t{e.status}\t{e.description}")
416
+
417
+ elif fmt == "json":
418
+ import json
419
+ data = []
420
+ for e in experiments:
421
+ data.append({
422
+ "experiment": e.experiment_number,
423
+ "commit": e.commit_hash,
424
+ "metric": e.metric_value,
425
+ "status": e.status,
426
+ "description": e.description,
427
+ "duration": e.duration_seconds,
428
+ "secondary": e.secondary_metrics,
429
+ })
430
+ print(json.dumps(data, indent=2))
431
+
432
+ else: # table
433
+ # Header
434
+ metric_col = config.metric.name
435
+ print(f"{'#':>4} {'commit':>7} {metric_col:>12} {'status':>8} {'dur':>5} description")
436
+ print("-" * 80)
437
+
438
+ best = tracker.get_best()
439
+ best_id = best.id if best else None
440
+
441
+ for e in experiments:
442
+ num = e.experiment_number
443
+ commit = e.commit_hash[:7] if e.commit_hash else "-------"
444
+ metric_str = f"{e.metric_value:.6f}" if e.metric_value is not None else " --- "
445
+ status = e.status
446
+ dur = f"{e.duration_seconds:.0f}s" if e.duration_seconds else " -"
447
+ desc = e.description or ""
448
+ marker = " *" if e.id == best_id else ""
449
+ print(f"{num:4d} {commit} {metric_str:>12} {status:>8} {dur:>5} {desc}{marker}")
450
+
451
+ if best:
452
+ print(f"\n* Best: {config.metric.name} = {best.metric_value:.6f} (experiment #{best.experiment_number})")
453
+
454
+ tracker.close()
455
+
456
+
457
+ def cmd_status(args):
458
+ """Show current project state."""
459
+ config = _load_config_or_exit()
460
+ tracker = _load_tracker(config)
461
+
462
+ branch = _git_branch()
463
+ commit = _git_commit_hash()
464
+ run_tag = tracker.local.get_meta("run_tag") or "(none)"
465
+ stats = tracker.get_stats()
466
+ best = tracker.get_best()
467
+ latest = tracker.get_latest()
468
+
469
+ print(f"Project: {config.project.name}")
470
+ print(f"Branch: {branch}")
471
+ print(f"Commit: {commit}")
472
+ print(f"Run tag: {run_tag}")
473
+ print(f"Metric: {config.metric.name} ({config.metric.direction})")
474
+ print(f"Run command: {config.experiment.run_command}")
475
+
476
+ if best:
477
+ print(f"Best: {config.metric.name} = {best.metric_value:.6f} (experiment #{best.experiment_number})")
478
+ else:
479
+ print(f"Best: (no results yet)")
480
+
481
+ total = stats.get("total", 0)
482
+ keep = stats.get("keep", 0)
483
+ discard = stats.get("discard", 0)
484
+ crash = stats.get("crash", 0)
485
+ running = stats.get("running", 0)
486
+ print(f"Experiments: {total} total ({keep} keep, {discard} discard, {crash} crash, {running} running)")
487
+
488
+ if latest and latest.status != "running":
489
+ val = latest.metric_value if latest.metric_value is not None else 0.0
490
+ print(f"Last: #{latest.experiment_number} {latest.status} "
491
+ f"{config.metric.name}={val:.6f} "
492
+ f"\"{latest.description}\"")
493
+
494
+ api_status = "connected" if config.api.key else "not configured"
495
+ print(f"API: {api_status}")
496
+
497
+ tracker.close()
498
+
499
+
500
+ def cmd_generate(args):
501
+ """Regenerate program.md from current config."""
502
+ config = _load_config_or_exit()
503
+ program = generate_program_md(config)
504
+
505
+ output = args.output or "program.md"
506
+ with open(output, "w") as f:
507
+ f.write(program)
508
+ print(f"Generated {output}")
509
+
510
+
511
+ # ---------------------------------------------------------------------------
512
+ # Main
513
+ # ---------------------------------------------------------------------------
514
+
515
+ def main():
516
+ parser = argparse.ArgumentParser(
517
+ prog="ars",
518
+ description="autoresearchstudio — autonomous ML research framework",
519
+ )
520
+ parser.add_argument("--version", action="version", version=f"ars {__version__}")
521
+ subparsers = parser.add_subparsers(dest="command")
522
+
523
+ # ars init
524
+ p_init = subparsers.add_parser("init", help="Initialize a new autoresearch project")
525
+ p_init.add_argument("--from-template", default=None, choices=["karpathy"],
526
+ help="Use a predefined template")
527
+ p_init.add_argument("--force", action="store_true",
528
+ help="Overwrite existing config")
529
+
530
+ # ars setup
531
+ p_setup = subparsers.add_parser("setup", help="Set up a fresh experiment run")
532
+ p_setup.add_argument("--tag", required=True, help="Run tag (e.g. mar21)")
533
+ p_setup.add_argument("--skip-branch", action="store_true",
534
+ help="Don't create a new git branch")
535
+ p_setup.add_argument("--skip-setup", action="store_true",
536
+ help="Don't run the setup command")
537
+
538
+ # ars run
539
+ p_run = subparsers.add_parser("run", help="Execute an experiment")
540
+ p_run.add_argument("run_command", nargs="?", default=None,
541
+ help="Command to run (default from config)")
542
+ p_run.add_argument("--timeout", type=int, default=None,
543
+ help="Timeout in seconds (default from config)")
544
+ p_run.add_argument("--description", "-d", default=None,
545
+ help="Description of this experiment")
546
+
547
+ # ars log
548
+ p_log = subparsers.add_parser("log", help="Extract and record metrics from latest run")
549
+ p_log.add_argument("--file", "-f", default=None,
550
+ help="Log file to read (default from config)")
551
+ p_log.add_argument("--description", "-d", default=None,
552
+ help="Experiment description")
553
+ p_log.add_argument("--metric", type=float, default=None,
554
+ help="Manually provide metric value")
555
+
556
+ # ars judge
557
+ p_judge = subparsers.add_parser("judge", help="Decide keep/discard")
558
+ p_judge.add_argument("--force-keep", action="store_true")
559
+ p_judge.add_argument("--force-discard", action="store_true")
560
+ p_judge.add_argument("--threshold", type=float, default=None)
561
+
562
+ # ars results
563
+ p_results = subparsers.add_parser("results", help="Show results table")
564
+ p_results.add_argument("--format", choices=["table", "tsv", "json"], default="table")
565
+ p_results.add_argument("--status", default=None,
566
+ help="Filter by status (keep, discard, crash)")
567
+ p_results.add_argument("--last", type=int, default=None,
568
+ help="Show last N experiments")
569
+
570
+ # ars status
571
+ subparsers.add_parser("status", help="Show current project state")
572
+
573
+ # ars generate
574
+ p_gen = subparsers.add_parser("generate", help="Regenerate program.md from config")
575
+ p_gen.add_argument("--output", "-o", default=None,
576
+ help="Output file (default: program.md)")
577
+
578
+ args = parser.parse_args()
579
+
580
+ if args.command is None:
581
+ parser.print_help()
582
+ sys.exit(0)
583
+
584
+ commands = {
585
+ "init": cmd_init,
586
+ "setup": cmd_setup,
587
+ "run": cmd_run,
588
+ "log": cmd_log,
589
+ "judge": cmd_judge,
590
+ "results": cmd_results,
591
+ "status": cmd_status,
592
+ "generate": cmd_generate,
593
+ }
594
+
595
+ cmd_func = commands.get(args.command)
596
+ if cmd_func:
597
+ cmd_func(args)
598
+ else:
599
+ parser.print_help()
600
+ sys.exit(1)