loopkit 0.0.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
loopkit/cli/sweep.py ADDED
@@ -0,0 +1,313 @@
1
+ import argparse
2
+ import copy
3
+ import itertools
4
+ import json
5
+ import re
6
+ import subprocess
7
+ import sys
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ import yaml
13
+
14
+
15
+ def generate_sweep_configs(
16
+ base_config: Dict[str, Any],
17
+ sweep_params: Dict[str, List[Any]],
18
+ ) -> List[Dict[str, Any]]:
19
+ """Generate all configurations for a grid search.
20
+
21
+ Args:
22
+ base_config: Base configuration dictionary
23
+ sweep_params: Dictionary mapping parameter paths to lists of values.
24
+ Example: {"training.lr": [0.001, 0.01],
25
+ "model.hidden_dim": [128, 256]}
26
+
27
+ Returns:
28
+ List of configuration dictionaries
29
+ """
30
+ # Get all parameter names and their values
31
+ param_names = list(sweep_params.keys())
32
+ param_values = [sweep_params[name] for name in param_names]
33
+
34
+ # Generate all combinations
35
+ configs = []
36
+ for values in itertools.product(*param_values):
37
+ # Deep copy the base config to avoid mutations
38
+ config = copy.deepcopy(base_config)
39
+
40
+ # Apply sweep parameters
41
+ for param_name, value in zip(param_names, values):
42
+ # Convert dot notation to nested dict
43
+ keys = param_name.split(".")
44
+ current = config
45
+ for key in keys[:-1]:
46
+ if key not in current:
47
+ current[key] = {}
48
+ current = current[key]
49
+ current[keys[-1]] = value
50
+
51
+ configs.append(config)
52
+
53
+ return configs
54
+
55
+
56
+ def run_sweep(
57
+ command_template: str,
58
+ sweep_params: Dict[str, List[Any]],
59
+ base_config_file: Optional[str] = None,
60
+ runs_dir: str = "runs",
61
+ sweep_name: Optional[str] = None,
62
+ dry_run: bool = False,
63
+ parallel: bool = False,
64
+ max_parallel: int = 4,
65
+ ):
66
+ """Run a hyperparameter sweep.
67
+
68
+ Args:
69
+ command_template: Command to run, e.g.:
70
+ "python train.py {config}"
71
+ "torchrun --nproc_per_node=2 train.py {config}"
72
+ "bash train.sh 2 {config}"
73
+ sweep_params: Parameters to sweep over
74
+ base_config_file: Base config file (optional)
75
+ runs_dir: Directory to save runs
76
+ sweep_name: Name for this sweep
77
+ dry_run: If True, only print commands without running
78
+ parallel: Run experiments in parallel
79
+ max_parallel: Maximum number of parallel runs
80
+
81
+ Example:
82
+ >>> run_sweep(
83
+ ... command_template="python train.py {config}",
84
+ ... sweep_params={
85
+ ... "training.lr": [0.001, 0.01, 0.1],
86
+ ... "model.hidden_dim": [128, 256],
87
+ ... },
88
+ ... base_config_file="conf/default.yaml",
89
+ ... sweep_name="lr_hidden_sweep"
90
+ ... )
91
+ """
92
+ # Create sweep directory
93
+ if sweep_name is None:
94
+ sweep_name = datetime.now().strftime("sweep_%Y%m%d_%H%M%S")
95
+
96
+ sweep_dir = Path(runs_dir) / sweep_name
97
+ sweep_dir.mkdir(parents=True, exist_ok=True)
98
+
99
+ # Load base config if provided
100
+ base_config = {}
101
+ if base_config_file:
102
+ with open(base_config_file) as f:
103
+ base_config = yaml.safe_load(f)
104
+
105
+ # Generate all configurations
106
+ configs = generate_sweep_configs(base_config, sweep_params)
107
+
108
+ print(f"\n{'=' * 80}")
109
+ print(f"Hyperparameter Sweep: {sweep_name}")
110
+ print(f"{'=' * 80}")
111
+ print(f"Total experiments: {len(configs)}")
112
+ print(f"Sweep directory: {sweep_dir}")
113
+ print("\nSweep parameters:")
114
+ for param, values in sweep_params.items():
115
+ print(f" {param}: {values}")
116
+ print(f"{'=' * 80}\n")
117
+
118
+ # Save sweep configuration
119
+ sweep_info = {
120
+ "sweep_name": sweep_name,
121
+ "command_template": command_template,
122
+ "sweep_params": sweep_params,
123
+ "base_config_file": base_config_file,
124
+ "num_experiments": len(configs),
125
+ "timestamp": datetime.now().isoformat(),
126
+ }
127
+
128
+ with open(sweep_dir / "sweep_info.json", "w") as f:
129
+ json.dump(sweep_info, f, indent=2)
130
+
131
+ # Run each configuration
132
+ results = []
133
+
134
+ for idx, config in enumerate(configs):
135
+ exp_name = f"exp_{idx:03d}"
136
+ exp_dir = sweep_dir / exp_name
137
+ exp_dir.mkdir(exist_ok=True)
138
+
139
+ # Save config
140
+ config_file = exp_dir / "config.yaml"
141
+ with open(config_file, "w") as f:
142
+ yaml.dump(config, f)
143
+
144
+ # Build command
145
+ command = command_template.format(config=str(config_file))
146
+
147
+ # Add run_dir override if not in command
148
+ if "run_dir" not in command:
149
+ command += f" run_dir={exp_dir}"
150
+
151
+ print(f"\n[{idx + 1}/{len(configs)}] Running: {exp_name}")
152
+ print(f"Command: {command}")
153
+
154
+ # Print sweep parameters for this run
155
+ print("Parameters:")
156
+ for param in sweep_params:
157
+ keys = param.split(".")
158
+ current = config
159
+ for key in keys:
160
+ current = current.get(key, {})
161
+ print(f" {param}: {current}")
162
+
163
+ if dry_run:
164
+ results.append({"exp_name": exp_name, "status": "dry_run"})
165
+ continue
166
+
167
+ # Run command
168
+ try:
169
+ result = subprocess.run(
170
+ command,
171
+ shell=True,
172
+ capture_output=True,
173
+ text=True,
174
+ cwd=Path.cwd(),
175
+ )
176
+
177
+ success = result.returncode == 0
178
+ results.append(
179
+ {
180
+ "exp_name": exp_name,
181
+ "config": config,
182
+ "command": command,
183
+ "returncode": result.returncode,
184
+ "status": "success" if success else "failed",
185
+ }
186
+ )
187
+
188
+ if success:
189
+ print("✅ Completed successfully")
190
+ else:
191
+ print(f"❌ Failed with return code {result.returncode}")
192
+ if result.stderr:
193
+ print(f"Error: {result.stderr[:500]}")
194
+
195
+ except Exception as e:
196
+ print(f"❌ Exception: {e}")
197
+ results.append(
198
+ {
199
+ "exp_name": exp_name,
200
+ "config": config,
201
+ "command": command,
202
+ "status": "exception",
203
+ "error": str(e),
204
+ }
205
+ )
206
+
207
+ # Save results
208
+ results_file = sweep_dir / "sweep_results.json"
209
+ with open(results_file, "w") as f:
210
+ json.dump(results, f, indent=2)
211
+
212
+ # Print summary
213
+ print(f"\n{'=' * 80}")
214
+ print("Sweep Summary")
215
+ print(f"{'=' * 80}")
216
+
217
+ successful = sum(1 for r in results if r["status"] == "success")
218
+ failed = sum(1 for r in results if r["status"] in ["failed", "exception"])
219
+
220
+ print(f"Total experiments: {len(results)}")
221
+ print(f"Successful: {successful}")
222
+ print(f"Failed: {failed}")
223
+ print(f"\nResults saved to: {sweep_dir}")
224
+ print(f"{'=' * 80}\n")
225
+
226
+ return results
227
+
228
+
229
+ def setup_sweep_parser(subparsers):
230
+ """Setup the sweep subcommand parser."""
231
+ sweep_parser = subparsers.add_parser(
232
+ "sweep",
233
+ help="Run hyperparameter sweeps",
234
+ formatter_class=argparse.RawDescriptionHelpFormatter,
235
+ epilog="""
236
+ Examples:
237
+ # Basic sweep
238
+ loopkit sweep "python train.py {config}" \\
239
+ --config config.yaml \\
240
+ --sweep "training.lr=[0.001,0.01]" "model.hidden_dim=[128,256]"
241
+
242
+ # With torchrun
243
+ loopkit sweep "torchrun --nproc_per_node=2 train.py {config}" \\
244
+ --config config.yaml \\
245
+ --sweep "training.lr=[0.001,0.01]"
246
+
247
+ # Dry run to see commands
248
+ loopkit sweep "python train.py {config}" \\
249
+ --config config.yaml \\
250
+ --sweep "training.lr=[0.001,0.01]" \\
251
+ --dry-run
252
+ """,
253
+ )
254
+ sweep_parser.add_argument(
255
+ "command", help="Command template with {config} placeholder"
256
+ )
257
+ sweep_parser.add_argument(
258
+ "--config", "-c", required=True, help="Base configuration file"
259
+ )
260
+ sweep_parser.add_argument(
261
+ "--sweep",
262
+ "-s",
263
+ nargs="+",
264
+ required=True,
265
+ help="Sweep parameters in format: param=[val1,val2,...]",
266
+ )
267
+ sweep_parser.add_argument("--name", "-n", help="Sweep name")
268
+ sweep_parser.add_argument(
269
+ "--runs-dir", "-d", default="runs", help="Directory for runs"
270
+ )
271
+ sweep_parser.add_argument(
272
+ "--dry-run", action="store_true", help="Print commands without running"
273
+ )
274
+
275
+
276
+ def run_sweep_command(args):
277
+ """Run the sweep command."""
278
+ # Parse sweep parameters
279
+ sweep_params = {}
280
+
281
+ for spec in args.sweep:
282
+ match = re.match(r"(\S+)=\[([^\]]+)\]", spec)
283
+ if not match:
284
+ print(f"❌ Invalid sweep spec: {spec}")
285
+ print(" Expected format: param=[val1,val2,...]")
286
+ continue
287
+
288
+ key, values_str = match.groups()
289
+ values = []
290
+ for v in values_str.split(","):
291
+ v = v.strip()
292
+ try:
293
+ values.append(int(v))
294
+ except ValueError:
295
+ try:
296
+ values.append(float(v))
297
+ except ValueError:
298
+ values.append(v)
299
+
300
+ sweep_params[key] = values
301
+
302
+ if not sweep_params:
303
+ print("❌ No valid sweep parameters provided")
304
+ sys.exit(1)
305
+
306
+ run_sweep(
307
+ command_template=args.command,
308
+ sweep_params=sweep_params,
309
+ base_config_file=args.config,
310
+ sweep_name=args.name,
311
+ runs_dir=args.runs_dir,
312
+ dry_run=args.dry_run,
313
+ )
@@ -0,0 +1,285 @@
1
+ import argparse
2
+ import json
3
+ from pathlib import Path
4
+ from typing import List, Optional, Union
5
+
6
+ import matplotlib.pyplot as plt
7
+ import polars as pl
8
+
9
+
10
+ def plot_metrics(
11
+ run_dirs: Union[str, Path, List[Union[str, Path]]],
12
+ metrics: Optional[List[str]] = None,
13
+ output_file: Optional[str] = None,
14
+ show: bool = True,
15
+ style: str = "default",
16
+ ):
17
+ """Plot metrics from one or more runs.
18
+
19
+ Args:
20
+ run_dirs: Single run directory or list of run directories
21
+ metrics: List of metrics to plot (None = all)
22
+ output_file: Save plot to file (e.g., 'plot.png')
23
+ show: Whether to display the plot
24
+ style: Plot style ('default', 'seaborn', 'ggplot')
25
+ """
26
+ # Handle single run_dir
27
+ if isinstance(run_dirs, (str, Path)):
28
+ run_dirs = [run_dirs]
29
+
30
+ # Set style
31
+ if style != "default":
32
+ plt.style.use(style)
33
+
34
+ # Load data from all runs
35
+ all_data = []
36
+ for run_dir in run_dirs:
37
+ run_dir = Path(run_dir)
38
+ metrics_file = run_dir / "metrics.csv"
39
+
40
+ if not metrics_file.exists():
41
+ print(f"⚠️ No metrics found in {run_dir}")
42
+ continue
43
+
44
+ df = pl.read_csv(metrics_file)
45
+ df = df.with_columns(run_id=pl.lit(run_dir.name))
46
+ all_data.append(df)
47
+
48
+ if not all_data:
49
+ print("❌ No metrics data found")
50
+ return
51
+
52
+ combined_df = pl.concat(all_data)
53
+
54
+ # Filter metrics if specified
55
+ if metrics:
56
+ combined_df = combined_df.filter(pl.col("name").is_in(metrics))
57
+
58
+ # Get unique metrics and splits
59
+ unique_metrics = combined_df["name"].unique()
60
+ unique_splits = combined_df["split"].unique()
61
+
62
+ # Create subplots
63
+ n_metrics = len(unique_metrics)
64
+ n_cols = min(2, n_metrics)
65
+ n_rows = (n_metrics + n_cols - 1) // n_cols
66
+
67
+ fig, axes = plt.subplots(n_rows, n_cols, figsize=(6 * n_cols, 4 * n_rows))
68
+ if n_metrics == 1:
69
+ axes = [axes]
70
+ elif n_rows > 1 and n_cols > 1:
71
+ axes = axes.flatten()
72
+ else:
73
+ axes = list(axes) if n_rows > 1 or n_cols > 1 else [axes]
74
+
75
+ # Plot each metric
76
+ for idx, metric_name in enumerate(sorted(unique_metrics)):
77
+ ax = axes[idx]
78
+
79
+ for run_id in combined_df["run_id"].unique():
80
+ for split in unique_splits:
81
+ data = combined_df.filter(
82
+ (pl.col("name") == metric_name)
83
+ & (pl.col("run_id") == run_id)
84
+ & (pl.col("split") == split)
85
+ )
86
+
87
+ if data.height > 0:
88
+ label = f"{run_id}_{split}" if len(run_dirs) > 1 else split
89
+ ax.plot(
90
+ data["step"],
91
+ data["value"],
92
+ label=label,
93
+ marker="o",
94
+ markersize=3,
95
+ )
96
+
97
+ ax.set_xlabel("Step")
98
+ ax.set_ylabel("Value")
99
+ ax.set_title(metric_name)
100
+ ax.legend()
101
+ ax.grid(True, alpha=0.3)
102
+
103
+ # Hide unused subplots
104
+ for idx in range(n_metrics, len(axes)):
105
+ axes[idx].axis("off")
106
+
107
+ plt.tight_layout()
108
+
109
+ if output_file:
110
+ plt.savefig(output_file, dpi=150, bbox_inches="tight")
111
+ print(f"📊 Plot saved to {output_file}")
112
+
113
+ if show:
114
+ plt.show()
115
+ else:
116
+ plt.close()
117
+
118
+
119
+ def plot_resource_usage(
120
+ run_dir: Union[str, Path],
121
+ output_file: Optional[str] = None,
122
+ show: bool = True,
123
+ ):
124
+ """Plot resource usage over time.
125
+
126
+ Args:
127
+ run_dir: Run directory
128
+ output_file: Save plot to file
129
+ show: Whether to display the plot
130
+ """
131
+ run_dir = Path(run_dir)
132
+ resource_file = run_dir / "resource_usage.jsonl"
133
+
134
+ if not resource_file.exists():
135
+ print(f"❌ No resource monitoring data found in {run_dir}")
136
+ return
137
+
138
+ # Load data
139
+ data = []
140
+ with open(resource_file) as f:
141
+ for line in f:
142
+ data.append(json.loads(line))
143
+
144
+ if not data:
145
+ print("❌ No resource data found")
146
+ return
147
+
148
+ # Extract time series
149
+ timestamps = [d["timestamp"] for d in data]
150
+ start_time = timestamps[0]
151
+ elapsed_times = [(t - start_time) / 60 for t in timestamps] # Convert to minutes
152
+
153
+ # Prepare subplots
154
+ n_plots = 0
155
+ has_gpu = "gpu" in data[0]
156
+ has_cpu = "cpu" in data[0]
157
+ has_memory = "memory" in data[0]
158
+
159
+ n_plots = sum([has_gpu, has_cpu, has_memory])
160
+
161
+ if n_plots == 0:
162
+ print("❌ No resource data to plot")
163
+ return
164
+
165
+ fig, axes = plt.subplots(n_plots, 1, figsize=(10, 4 * n_plots))
166
+ if n_plots == 1:
167
+ axes = [axes]
168
+
169
+ plot_idx = 0
170
+
171
+ # Plot GPU memory
172
+ if has_gpu:
173
+ ax = axes[plot_idx]
174
+ plot_idx += 1
175
+
176
+ for i in range(len(data[0]["gpu"])):
177
+ gpu_mem = [d["gpu"][i]["allocated_mb"] for d in data if "gpu" in d]
178
+ ax.plot(elapsed_times, gpu_mem, label=f"GPU {i}", marker="o", markersize=2)
179
+
180
+ ax.set_xlabel("Time (minutes)")
181
+ ax.set_ylabel("Memory (MB)")
182
+ ax.set_title("GPU Memory Usage")
183
+ ax.legend()
184
+ ax.grid(True, alpha=0.3)
185
+
186
+ # Plot CPU usage
187
+ if has_cpu:
188
+ ax = axes[plot_idx]
189
+ plot_idx += 1
190
+
191
+ cpu_percent = [d["cpu"]["percent"] for d in data if "cpu" in d]
192
+ ax.plot(
193
+ elapsed_times[: len(cpu_percent)], cpu_percent, marker="o", markersize=2
194
+ )
195
+
196
+ ax.set_xlabel("Time (minutes)")
197
+ ax.set_ylabel("CPU %")
198
+ ax.set_title("CPU Usage")
199
+ ax.grid(True, alpha=0.3)
200
+
201
+ # Plot memory usage
202
+ if has_memory:
203
+ ax = axes[plot_idx]
204
+ plot_idx += 1
205
+
206
+ mem_percent = [d["memory"]["percent"] for d in data if "memory" in d]
207
+ ax.plot(
208
+ elapsed_times[: len(mem_percent)], mem_percent, marker="o", markersize=2
209
+ )
210
+
211
+ ax.set_xlabel("Time (minutes)")
212
+ ax.set_ylabel("Memory %")
213
+ ax.set_title("System Memory Usage")
214
+ ax.grid(True, alpha=0.3)
215
+
216
+ plt.tight_layout()
217
+
218
+ if output_file:
219
+ plt.savefig(output_file, dpi=150, bbox_inches="tight")
220
+ print(f"📊 Resource plot saved to {output_file}")
221
+
222
+ if show:
223
+ plt.show()
224
+ else:
225
+ plt.close()
226
+
227
+
228
+ def setup_visualize_parser(subparsers):
229
+ """Setup the visualize subcommand parser."""
230
+ viz_parser = subparsers.add_parser(
231
+ "viz",
232
+ help="Visualize experiment metrics",
233
+ formatter_class=argparse.RawDescriptionHelpFormatter,
234
+ epilog="""
235
+ Examples:
236
+ # Plot all metrics from multiple runs
237
+ loopkit viz runs/exp1 runs/exp2 runs/exp3
238
+
239
+ # Plot specific metrics
240
+ loopkit viz runs/exp_* --metrics loss accuracy
241
+
242
+ # Save plot to file
243
+ loopkit viz runs/exp1 --output plot.png
244
+
245
+ # Plot resource usage
246
+ loopkit viz runs/exp1 --resources
247
+ """,
248
+ )
249
+ viz_parser.add_argument("run_dirs", nargs="+", help="Run directories to plot")
250
+ viz_parser.add_argument(
251
+ "--metrics", "-m", nargs="+", help="Specific metrics to plot"
252
+ )
253
+ viz_parser.add_argument("--output", "-o", help="Save plot to file (e.g., plot.png)")
254
+ viz_parser.add_argument("--no-show", action="store_true", help="Don't display plot")
255
+ viz_parser.add_argument(
256
+ "--style",
257
+ default="default",
258
+ choices=["default", "seaborn", "ggplot"],
259
+ help="Plot style",
260
+ )
261
+ viz_parser.add_argument(
262
+ "--resources",
263
+ "-r",
264
+ action="store_true",
265
+ help="Plot resource usage instead of metrics",
266
+ )
267
+
268
+
269
+ def run_visualize_command(args):
270
+ """Run the visualize command."""
271
+ if args.resources:
272
+ if len(args.run_dirs) > 1:
273
+ print("⚠️ Resource plotting only supports single run directory")
274
+ print(f"Plotting resources for: {args.run_dirs[0]}")
275
+ plot_resource_usage(
276
+ args.run_dirs[0], output_file=args.output, show=not args.no_show
277
+ )
278
+ else:
279
+ plot_metrics(
280
+ args.run_dirs,
281
+ metrics=args.metrics,
282
+ output_file=args.output,
283
+ show=not args.no_show,
284
+ style=args.style,
285
+ )