sixseven 0.1.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,445 @@
1
+ """Main Run class for tracking ML experiments."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import atexit
6
+ import signal
7
+ import sys
8
+ import threading
9
+ import time
10
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING
11
+
12
+ from sixtyseven.config import SDKConfig, get_config
13
+ from sixtyseven.exceptions import ValidationError
14
+ from sixtyseven.utils import generate_run_name, get_git_info, get_system_info
15
+
16
+ if TYPE_CHECKING:
17
+ from sixtyseven.client import SixtySevenClient
18
+ from sixtyseven.metrics import MetricsBatcher
19
+ from sixtyseven.local import LocalWriter, LocalBatcher
20
+ from sixtyseven.server import ServerManager
21
+
22
+
23
+ class Run:
24
+ """
25
+ Main class for tracking ML experiments with Sixtyseven.
26
+
27
+ A Run represents a single training session or experiment. It tracks:
28
+ - Configuration/hyperparameters
29
+ - Metrics over time (loss, accuracy, etc.)
30
+ - System information
31
+ - Git information
32
+
33
+ Supports two modes:
34
+ - Local mode (default): Writes to local SQLite files, viewable with `sixtyseven --logdir`
35
+ - Remote mode: Sends to a Sixtyseven API server
36
+
37
+ Usage:
38
+ # Context manager (recommended)
39
+ with Run(project="my-project") as run:
40
+ run.log_config({"lr": 0.001})
41
+ for epoch in range(100):
42
+ run.log_metrics({"loss": 0.5}, step=epoch)
43
+
44
+ # Explicit management
45
+ run = Run(project="my-project")
46
+ run.log_metrics({"loss": 0.5}, step=1)
47
+ run.complete() # or run.fail("error message")
48
+
49
+ Environment variables:
50
+ SIXTYSEVEN_LOGDIR: Set to enable local mode with custom log directory
51
+ SIXTYSEVEN_URL: Set to enable remote mode with API server
52
+ SIXTYSEVEN_API_KEY: API key for remote mode authentication
53
+ """
54
+
55
+ def __init__(
56
+ self,
57
+ project: str,
58
+ api_key: Optional[str] = None,
59
+ name: Optional[str] = None,
60
+ tags: Optional[List[str]] = None,
61
+ config: Optional[Dict[str, Any]] = None,
62
+ # Advanced options
63
+ mode: Optional[str] = None, # "local" or "remote"
64
+ logdir: Optional[str] = None,
65
+ base_url: Optional[str] = None,
66
+ batch_size: Optional[int] = None,
67
+ flush_interval: Optional[float] = None,
68
+ capture_git: Optional[bool] = None,
69
+ capture_system: Optional[bool] = None,
70
+ # Server option (local mode only)
71
+ start_server: bool = False,
72
+ ):
73
+ """
74
+ Initialize a new run.
75
+
76
+ Args:
77
+ project: Project identifier. In local mode, this is just a project name.
78
+ In remote mode, use "team-slug/app-slug" format.
79
+ api_key: API key for authentication (remote mode only)
80
+ name: Run name (auto-generated if not provided)
81
+ tags: List of tags for organizing runs
82
+ config: Initial configuration/hyperparameters
83
+ mode: Operating mode ("local" or "remote"). Auto-detected if not specified.
84
+ logdir: Directory for logs (local mode only)
85
+ base_url: API base URL (remote mode only)
86
+ batch_size: Number of metrics to batch before sending/writing
87
+ flush_interval: Seconds between automatic flushes
88
+ capture_git: Whether to capture git information
89
+ capture_system: Whether to capture system information
90
+ start_server: Automatically start the sixtyseven viewer server and open the
91
+ browser (local mode only). The server stops when the run ends.
92
+
93
+ Raises:
94
+ ValidationError: If project format is invalid (remote mode)
95
+ AuthenticationError: If API key is invalid (remote mode)
96
+ ServerError: If start_server=True but the sixtyseven binary cannot be found
97
+ """
98
+ # Get global config
99
+ global_config = get_config()
100
+
101
+ # Build effective config
102
+ self._config = SDKConfig(
103
+ mode=mode or global_config.mode,
104
+ logdir=logdir or global_config.logdir,
105
+ base_url=base_url or global_config.base_url,
106
+ api_key=api_key or global_config.api_key,
107
+ batch_size=batch_size or global_config.batch_size,
108
+ flush_interval=flush_interval or global_config.flush_interval,
109
+ capture_git=capture_git
110
+ if capture_git is not None
111
+ else global_config.capture_git,
112
+ capture_system=capture_system
113
+ if capture_system is not None
114
+ else global_config.capture_system,
115
+ )
116
+
117
+ self._project = project
118
+ self._run_name = name or generate_run_name()
119
+ self._tags = tags or []
120
+ self._initial_config = config
121
+
122
+ # Mode-specific initialization
123
+ self._local_writer: Optional["LocalWriter"] = None
124
+ self._local_batcher: Optional["LocalBatcher"] = None
125
+ self._remote_client: Optional["SixtySevenClient"] = None
126
+ self._remote_batcher: Optional["MetricsBatcher"] = None
127
+ self._server_manager: Optional["ServerManager"] = None
128
+ self._start_server = start_server
129
+
130
+ # Capture info before creating run
131
+ self._git_info = None
132
+ self._system_info = None
133
+ if self._config.capture_git:
134
+ self._git_info = get_git_info()
135
+ if self._config.capture_system:
136
+ self._system_info = get_system_info()
137
+
138
+ if self._config.mode == "local":
139
+ self._init_local_mode()
140
+ else:
141
+ self._init_remote_mode()
142
+
143
+ # Track state
144
+ self._step = 0
145
+ self._closed = False
146
+ self._lock = threading.Lock()
147
+
148
+ # Register cleanup handlers
149
+ atexit.register(self._cleanup)
150
+ self._setup_signal_handlers()
151
+
152
+ def _init_local_mode(self) -> None:
153
+ """Initialize local file-based storage."""
154
+ from sixtyseven.local import LocalWriter, LocalBatcher
155
+
156
+ self._local_writer = LocalWriter(
157
+ logdir=self._config.logdir,
158
+ project=self._project,
159
+ run_name=self._run_name,
160
+ tags=self._tags,
161
+ config=self._initial_config,
162
+ git_info=self._git_info,
163
+ system_info=self._system_info,
164
+ )
165
+ self._run_id = self._local_writer.run_id
166
+
167
+ self._local_batcher = LocalBatcher(
168
+ writer=self._local_writer,
169
+ batch_size=self._config.batch_size,
170
+ flush_interval=self._config.flush_interval,
171
+ )
172
+ self._local_batcher.start()
173
+
174
+ # Print local mode info
175
+ print(f"Sixtyseven: Logging to {self._local_writer.run_dir}")
176
+
177
+ # Start the viewer server if requested
178
+ if self._start_server:
179
+ from sixtyseven.server import ServerManager
180
+
181
+ self._server_manager = ServerManager(
182
+ logdir=self._config.logdir,
183
+ open_browser=True,
184
+ project=self._project,
185
+ run_id=self._run_id,
186
+ )
187
+ self._server_manager.start()
188
+
189
+ def _init_remote_mode(self) -> None:
190
+ """Initialize remote API client."""
191
+ from sixtyseven.client import SixtySevenClient
192
+ from sixtyseven.metrics import MetricsBatcher
193
+
194
+ # Parse project for remote mode
195
+ self._team_slug, self._app_slug = self._parse_project(self._project)
196
+
197
+ self._remote_client = SixtySevenClient(self._config, self._config.api_key)
198
+
199
+ # Create run on server
200
+ self._run_id = self._remote_client.create_run(
201
+ team_slug=self._team_slug,
202
+ app_slug=self._app_slug,
203
+ name=self._run_name,
204
+ tags=self._tags,
205
+ config=self._initial_config or {},
206
+ git_info=self._git_info,
207
+ system_info=self._system_info,
208
+ )
209
+
210
+ self._remote_batcher = MetricsBatcher(
211
+ client=self._remote_client,
212
+ run_id=self._run_id,
213
+ batch_size=self._config.batch_size,
214
+ flush_interval=self._config.flush_interval,
215
+ )
216
+ self._remote_batcher.start()
217
+
218
+ @property
219
+ def id(self) -> str:
220
+ """Return the run ID."""
221
+ return self._run_id
222
+
223
+ @property
224
+ def name(self) -> str:
225
+ """Return the run name."""
226
+ return self._run_name
227
+
228
+ @property
229
+ def project(self) -> str:
230
+ """Return the project identifier."""
231
+ return self._project
232
+
233
+ @property
234
+ def mode(self) -> str:
235
+ """Return the operating mode ('local' or 'remote')."""
236
+ return self._config.mode
237
+
238
+ @property
239
+ def logdir(self) -> Optional[str]:
240
+ """Return the log directory (local mode only)."""
241
+ if self._local_writer:
242
+ return str(self._local_writer.run_dir)
243
+ return None
244
+
245
+ def log_config(self, config: Dict[str, Any]) -> None:
246
+ """
247
+ Log configuration/hyperparameters.
248
+
249
+ This merges with any config provided at initialization.
250
+ Can be called multiple times to add more config.
251
+
252
+ Args:
253
+ config: Dictionary of configuration values
254
+
255
+ Example:
256
+ run.log_config({
257
+ "learning_rate": 0.001,
258
+ "batch_size": 32,
259
+ "optimizer": "adam",
260
+ })
261
+ """
262
+ if self._config.mode == "local":
263
+ self._local_writer.log_config(config)
264
+ else:
265
+ self._remote_client.update_run_config(self._run_id, config)
266
+
267
+ def log_metrics(
268
+ self,
269
+ metrics: Dict[str, float],
270
+ step: Optional[int] = None,
271
+ timestamp: Optional[float] = None,
272
+ ) -> None:
273
+ """
274
+ Log metrics for the current step.
275
+
276
+ Metrics are automatically batched for efficiency. Use flush()
277
+ to force immediate sending.
278
+
279
+ Args:
280
+ metrics: Dictionary of metric name -> value
281
+ step: Step number (auto-incremented if not provided)
282
+ timestamp: Unix timestamp (current time if not provided)
283
+
284
+ Example:
285
+ run.log_metrics({
286
+ "train/loss": 0.45,
287
+ "train/accuracy": 0.82,
288
+ "val/loss": 0.52,
289
+ }, step=epoch)
290
+ """
291
+ with self._lock:
292
+ if step is None:
293
+ step = self._step
294
+ self._step += 1
295
+ else:
296
+ self._step = max(self._step, step + 1)
297
+
298
+ ts = timestamp or time.time()
299
+
300
+ batcher = (
301
+ self._local_batcher
302
+ if self._config.mode == "local"
303
+ else self._remote_batcher
304
+ )
305
+ for name, value in metrics.items():
306
+ batcher.add(name=name, value=float(value), step=step, timestamp=ts)
307
+
308
+ def log(self, name: str, value: float, step: Optional[int] = None) -> None:
309
+ """
310
+ Log a single metric.
311
+
312
+ Convenience method for logging one metric at a time.
313
+
314
+ Args:
315
+ name: Metric name
316
+ value: Metric value
317
+ step: Step number (auto-incremented if not provided)
318
+ """
319
+ self.log_metrics({name: value}, step=step)
320
+
321
+ def flush(self) -> None:
322
+ """Force flush all buffered metrics."""
323
+ if self._config.mode == "local":
324
+ self._local_batcher.flush()
325
+ else:
326
+ self._remote_batcher.flush()
327
+
328
+ def add_tags(self, tags: List[str]) -> None:
329
+ """
330
+ Add tags to the run.
331
+
332
+ Args:
333
+ tags: List of tags to add
334
+ """
335
+ if self._config.mode == "local":
336
+ # For local mode, update the meta.json
337
+ meta = self._local_writer._read_meta()
338
+ existing_tags = meta.get("tags", [])
339
+ meta["tags"] = list(set(existing_tags + tags))
340
+ self._local_writer._write_meta(meta)
341
+ else:
342
+ self._remote_client.add_run_tags(self._run_id, tags)
343
+
344
+ def complete(self) -> None:
345
+ """Mark the run as completed successfully."""
346
+ self._finalize("completed")
347
+
348
+ def fail(self, error: Optional[str] = None) -> None:
349
+ """
350
+ Mark the run as failed.
351
+
352
+ Args:
353
+ error: Optional error message
354
+ """
355
+ self._finalize("failed", error=error)
356
+
357
+ def abort(self) -> None:
358
+ """Mark the run as aborted."""
359
+ self._finalize("aborted")
360
+
361
+ def _finalize(self, status: str, error: Optional[str] = None) -> None:
362
+ """Finalize the run with the given status."""
363
+ with self._lock:
364
+ if self._closed:
365
+ return
366
+ self._closed = True
367
+
368
+ if self._config.mode == "local":
369
+ # Flush and stop local batcher
370
+ self._local_batcher.flush()
371
+ self._local_batcher.stop()
372
+ # Update status in meta.json
373
+ self._local_writer.update_status(status, error)
374
+ self._local_writer.close()
375
+ # Stop the server if we started it
376
+ if self._server_manager is not None:
377
+ self._server_manager.stop()
378
+ else:
379
+ # Flush and stop remote batcher
380
+ self._remote_batcher.flush()
381
+ self._remote_batcher.stop()
382
+ # Update status on server
383
+ self._remote_client.update_run_status(self._run_id, status, error=error)
384
+
385
+ def __enter__(self) -> "Run":
386
+ """Enter context manager."""
387
+ return self
388
+
389
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
390
+ """Exit context manager, marking run as completed or failed."""
391
+ if exc_type is not None:
392
+ self.fail(str(exc_val) if exc_val else "Unknown error")
393
+ else:
394
+ self.complete()
395
+
396
+ def _setup_signal_handlers(self) -> None:
397
+ """Set up signal handlers for graceful shutdown."""
398
+ self._original_sigint = signal.getsignal(signal.SIGINT)
399
+ self._original_sigterm = signal.getsignal(signal.SIGTERM)
400
+
401
+ def handle_signal(signum, frame):
402
+ """Handle termination signals."""
403
+ # Immediately restore original handlers to prevent re-entry
404
+ signal.signal(signal.SIGINT, self._original_sigint)
405
+ signal.signal(signal.SIGTERM, self._original_sigterm)
406
+
407
+ # Mark run as canceled
408
+ if not self._closed:
409
+ sig_name = "SIGINT" if signum == signal.SIGINT else "SIGTERM"
410
+ print(f"\nSixtyseven: Run canceled ({sig_name})")
411
+ self._finalize("canceled", error=f"Interrupted by {sig_name}")
412
+
413
+ # Re-raise to exit
414
+ if signum == signal.SIGINT:
415
+ raise KeyboardInterrupt
416
+ else:
417
+ sys.exit(128 + signum)
418
+
419
+ # Only set handlers in main thread
420
+ if threading.current_thread() is threading.main_thread():
421
+ signal.signal(signal.SIGINT, handle_signal)
422
+ signal.signal(signal.SIGTERM, handle_signal)
423
+
424
+ def _cleanup(self) -> None:
425
+ """Cleanup handler for atexit - marks incomplete runs as canceled."""
426
+ if not self._closed:
427
+ self._finalize("canceled", error="Process exited unexpectedly")
428
+
429
+ @staticmethod
430
+ def _parse_project(project: str) -> tuple:
431
+ """Parse project string into team and app slugs (remote mode only)."""
432
+ parts = project.split("/")
433
+ if len(parts) != 2:
434
+ raise ValidationError(
435
+ f"Invalid project format: '{project}'. Expected 'team-slug/app-slug' format for remote mode."
436
+ )
437
+ return parts[0], parts[1]
438
+
439
+ def __repr__(self) -> str:
440
+ mode_info = (
441
+ f"logdir='{self.logdir}'"
442
+ if self._config.mode == "local"
443
+ else f"url='{self._config.base_url}'"
444
+ )
445
+ return f"Run(id='{self._run_id}', project='{self.project}', mode='{self.mode}', {mode_info})"