sixtyseven 0.1.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sixtyseven-0.1.0.data/purelib/sixtyseven/__init__.py +36 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/cli.py +64 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/client.py +190 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/config.py +161 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/exceptions.py +40 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/local.py +335 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/metrics.py +157 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/run.py +445 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/server.py +383 -0
- sixtyseven-0.1.0.data/purelib/sixtyseven/utils.py +171 -0
- sixtyseven-0.1.0.dist-info/METADATA +84 -0
- sixtyseven-0.1.0.dist-info/RECORD +15 -0
- sixtyseven-0.1.0.dist-info/WHEEL +5 -0
- sixtyseven-0.1.0.dist-info/entry_points.txt +2 -0
- sixtyseven-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
"""Main Run class for tracking ML experiments."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import atexit
|
|
6
|
+
import signal
|
|
7
|
+
import sys
|
|
8
|
+
import threading
|
|
9
|
+
import time
|
|
10
|
+
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from sixtyseven.config import SDKConfig, get_config
|
|
13
|
+
from sixtyseven.exceptions import ValidationError
|
|
14
|
+
from sixtyseven.utils import generate_run_name, get_git_info, get_system_info
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from sixtyseven.client import SixtySevenClient
|
|
18
|
+
from sixtyseven.metrics import MetricsBatcher
|
|
19
|
+
from sixtyseven.local import LocalWriter, LocalBatcher
|
|
20
|
+
from sixtyseven.server import ServerManager
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Run:
|
|
24
|
+
"""
|
|
25
|
+
Main class for tracking ML experiments with Sixtyseven.
|
|
26
|
+
|
|
27
|
+
A Run represents a single training session or experiment. It tracks:
|
|
28
|
+
- Configuration/hyperparameters
|
|
29
|
+
- Metrics over time (loss, accuracy, etc.)
|
|
30
|
+
- System information
|
|
31
|
+
- Git information
|
|
32
|
+
|
|
33
|
+
Supports two modes:
|
|
34
|
+
- Local mode (default): Writes to local SQLite files, viewable with `sixtyseven --logdir`
|
|
35
|
+
- Remote mode: Sends to a Sixtyseven API server
|
|
36
|
+
|
|
37
|
+
Usage:
|
|
38
|
+
# Context manager (recommended)
|
|
39
|
+
with Run(project="my-project") as run:
|
|
40
|
+
run.log_config({"lr": 0.001})
|
|
41
|
+
for epoch in range(100):
|
|
42
|
+
run.log_metrics({"loss": 0.5}, step=epoch)
|
|
43
|
+
|
|
44
|
+
# Explicit management
|
|
45
|
+
run = Run(project="my-project")
|
|
46
|
+
run.log_metrics({"loss": 0.5}, step=1)
|
|
47
|
+
run.complete() # or run.fail("error message")
|
|
48
|
+
|
|
49
|
+
Environment variables:
|
|
50
|
+
SIXTYSEVEN_LOGDIR: Set to enable local mode with custom log directory
|
|
51
|
+
SIXTYSEVEN_URL: Set to enable remote mode with API server
|
|
52
|
+
SIXTYSEVEN_API_KEY: API key for remote mode authentication
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
project: str,
|
|
58
|
+
api_key: Optional[str] = None,
|
|
59
|
+
name: Optional[str] = None,
|
|
60
|
+
tags: Optional[List[str]] = None,
|
|
61
|
+
config: Optional[Dict[str, Any]] = None,
|
|
62
|
+
# Advanced options
|
|
63
|
+
mode: Optional[str] = None, # "local" or "remote"
|
|
64
|
+
logdir: Optional[str] = None,
|
|
65
|
+
base_url: Optional[str] = None,
|
|
66
|
+
batch_size: Optional[int] = None,
|
|
67
|
+
flush_interval: Optional[float] = None,
|
|
68
|
+
capture_git: Optional[bool] = None,
|
|
69
|
+
capture_system: Optional[bool] = None,
|
|
70
|
+
# Server option (local mode only)
|
|
71
|
+
start_server: bool = False,
|
|
72
|
+
):
|
|
73
|
+
"""
|
|
74
|
+
Initialize a new run.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
project: Project identifier. In local mode, this is just a project name.
|
|
78
|
+
In remote mode, use "team-slug/app-slug" format.
|
|
79
|
+
api_key: API key for authentication (remote mode only)
|
|
80
|
+
name: Run name (auto-generated if not provided)
|
|
81
|
+
tags: List of tags for organizing runs
|
|
82
|
+
config: Initial configuration/hyperparameters
|
|
83
|
+
mode: Operating mode ("local" or "remote"). Auto-detected if not specified.
|
|
84
|
+
logdir: Directory for logs (local mode only)
|
|
85
|
+
base_url: API base URL (remote mode only)
|
|
86
|
+
batch_size: Number of metrics to batch before sending/writing
|
|
87
|
+
flush_interval: Seconds between automatic flushes
|
|
88
|
+
capture_git: Whether to capture git information
|
|
89
|
+
capture_system: Whether to capture system information
|
|
90
|
+
start_server: Automatically start the sixtyseven viewer server and open the
|
|
91
|
+
browser (local mode only). The server stops when the run ends.
|
|
92
|
+
|
|
93
|
+
Raises:
|
|
94
|
+
ValidationError: If project format is invalid (remote mode)
|
|
95
|
+
AuthenticationError: If API key is invalid (remote mode)
|
|
96
|
+
ServerError: If start_server=True but the sixtyseven binary cannot be found
|
|
97
|
+
"""
|
|
98
|
+
# Get global config
|
|
99
|
+
global_config = get_config()
|
|
100
|
+
|
|
101
|
+
# Build effective config
|
|
102
|
+
self._config = SDKConfig(
|
|
103
|
+
mode=mode or global_config.mode,
|
|
104
|
+
logdir=logdir or global_config.logdir,
|
|
105
|
+
base_url=base_url or global_config.base_url,
|
|
106
|
+
api_key=api_key or global_config.api_key,
|
|
107
|
+
batch_size=batch_size or global_config.batch_size,
|
|
108
|
+
flush_interval=flush_interval or global_config.flush_interval,
|
|
109
|
+
capture_git=capture_git
|
|
110
|
+
if capture_git is not None
|
|
111
|
+
else global_config.capture_git,
|
|
112
|
+
capture_system=capture_system
|
|
113
|
+
if capture_system is not None
|
|
114
|
+
else global_config.capture_system,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
self._project = project
|
|
118
|
+
self._run_name = name or generate_run_name()
|
|
119
|
+
self._tags = tags or []
|
|
120
|
+
self._initial_config = config
|
|
121
|
+
|
|
122
|
+
# Mode-specific initialization
|
|
123
|
+
self._local_writer: Optional["LocalWriter"] = None
|
|
124
|
+
self._local_batcher: Optional["LocalBatcher"] = None
|
|
125
|
+
self._remote_client: Optional["SixtySevenClient"] = None
|
|
126
|
+
self._remote_batcher: Optional["MetricsBatcher"] = None
|
|
127
|
+
self._server_manager: Optional["ServerManager"] = None
|
|
128
|
+
self._start_server = start_server
|
|
129
|
+
|
|
130
|
+
# Capture info before creating run
|
|
131
|
+
self._git_info = None
|
|
132
|
+
self._system_info = None
|
|
133
|
+
if self._config.capture_git:
|
|
134
|
+
self._git_info = get_git_info()
|
|
135
|
+
if self._config.capture_system:
|
|
136
|
+
self._system_info = get_system_info()
|
|
137
|
+
|
|
138
|
+
if self._config.mode == "local":
|
|
139
|
+
self._init_local_mode()
|
|
140
|
+
else:
|
|
141
|
+
self._init_remote_mode()
|
|
142
|
+
|
|
143
|
+
# Track state
|
|
144
|
+
self._step = 0
|
|
145
|
+
self._closed = False
|
|
146
|
+
self._lock = threading.Lock()
|
|
147
|
+
|
|
148
|
+
# Register cleanup handlers
|
|
149
|
+
atexit.register(self._cleanup)
|
|
150
|
+
self._setup_signal_handlers()
|
|
151
|
+
|
|
152
|
+
def _init_local_mode(self) -> None:
|
|
153
|
+
"""Initialize local file-based storage."""
|
|
154
|
+
from sixtyseven.local import LocalWriter, LocalBatcher
|
|
155
|
+
|
|
156
|
+
self._local_writer = LocalWriter(
|
|
157
|
+
logdir=self._config.logdir,
|
|
158
|
+
project=self._project,
|
|
159
|
+
run_name=self._run_name,
|
|
160
|
+
tags=self._tags,
|
|
161
|
+
config=self._initial_config,
|
|
162
|
+
git_info=self._git_info,
|
|
163
|
+
system_info=self._system_info,
|
|
164
|
+
)
|
|
165
|
+
self._run_id = self._local_writer.run_id
|
|
166
|
+
|
|
167
|
+
self._local_batcher = LocalBatcher(
|
|
168
|
+
writer=self._local_writer,
|
|
169
|
+
batch_size=self._config.batch_size,
|
|
170
|
+
flush_interval=self._config.flush_interval,
|
|
171
|
+
)
|
|
172
|
+
self._local_batcher.start()
|
|
173
|
+
|
|
174
|
+
# Print local mode info
|
|
175
|
+
print(f"Sixtyseven: Logging to {self._local_writer.run_dir}")
|
|
176
|
+
|
|
177
|
+
# Start the viewer server if requested
|
|
178
|
+
if self._start_server:
|
|
179
|
+
from sixtyseven.server import ServerManager
|
|
180
|
+
|
|
181
|
+
self._server_manager = ServerManager(
|
|
182
|
+
logdir=self._config.logdir,
|
|
183
|
+
open_browser=True,
|
|
184
|
+
project=self._project,
|
|
185
|
+
run_id=self._run_id,
|
|
186
|
+
)
|
|
187
|
+
self._server_manager.start()
|
|
188
|
+
|
|
189
|
+
def _init_remote_mode(self) -> None:
|
|
190
|
+
"""Initialize remote API client."""
|
|
191
|
+
from sixtyseven.client import SixtySevenClient
|
|
192
|
+
from sixtyseven.metrics import MetricsBatcher
|
|
193
|
+
|
|
194
|
+
# Parse project for remote mode
|
|
195
|
+
self._team_slug, self._app_slug = self._parse_project(self._project)
|
|
196
|
+
|
|
197
|
+
self._remote_client = SixtySevenClient(self._config, self._config.api_key)
|
|
198
|
+
|
|
199
|
+
# Create run on server
|
|
200
|
+
self._run_id = self._remote_client.create_run(
|
|
201
|
+
team_slug=self._team_slug,
|
|
202
|
+
app_slug=self._app_slug,
|
|
203
|
+
name=self._run_name,
|
|
204
|
+
tags=self._tags,
|
|
205
|
+
config=self._initial_config or {},
|
|
206
|
+
git_info=self._git_info,
|
|
207
|
+
system_info=self._system_info,
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
self._remote_batcher = MetricsBatcher(
|
|
211
|
+
client=self._remote_client,
|
|
212
|
+
run_id=self._run_id,
|
|
213
|
+
batch_size=self._config.batch_size,
|
|
214
|
+
flush_interval=self._config.flush_interval,
|
|
215
|
+
)
|
|
216
|
+
self._remote_batcher.start()
|
|
217
|
+
|
|
218
|
+
@property
|
|
219
|
+
def id(self) -> str:
|
|
220
|
+
"""Return the run ID."""
|
|
221
|
+
return self._run_id
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def name(self) -> str:
|
|
225
|
+
"""Return the run name."""
|
|
226
|
+
return self._run_name
|
|
227
|
+
|
|
228
|
+
@property
|
|
229
|
+
def project(self) -> str:
|
|
230
|
+
"""Return the project identifier."""
|
|
231
|
+
return self._project
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def mode(self) -> str:
|
|
235
|
+
"""Return the operating mode ('local' or 'remote')."""
|
|
236
|
+
return self._config.mode
|
|
237
|
+
|
|
238
|
+
@property
|
|
239
|
+
def logdir(self) -> Optional[str]:
|
|
240
|
+
"""Return the log directory (local mode only)."""
|
|
241
|
+
if self._local_writer:
|
|
242
|
+
return str(self._local_writer.run_dir)
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
def log_config(self, config: Dict[str, Any]) -> None:
|
|
246
|
+
"""
|
|
247
|
+
Log configuration/hyperparameters.
|
|
248
|
+
|
|
249
|
+
This merges with any config provided at initialization.
|
|
250
|
+
Can be called multiple times to add more config.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
config: Dictionary of configuration values
|
|
254
|
+
|
|
255
|
+
Example:
|
|
256
|
+
run.log_config({
|
|
257
|
+
"learning_rate": 0.001,
|
|
258
|
+
"batch_size": 32,
|
|
259
|
+
"optimizer": "adam",
|
|
260
|
+
})
|
|
261
|
+
"""
|
|
262
|
+
if self._config.mode == "local":
|
|
263
|
+
self._local_writer.log_config(config)
|
|
264
|
+
else:
|
|
265
|
+
self._remote_client.update_run_config(self._run_id, config)
|
|
266
|
+
|
|
267
|
+
def log_metrics(
|
|
268
|
+
self,
|
|
269
|
+
metrics: Dict[str, float],
|
|
270
|
+
step: Optional[int] = None,
|
|
271
|
+
timestamp: Optional[float] = None,
|
|
272
|
+
) -> None:
|
|
273
|
+
"""
|
|
274
|
+
Log metrics for the current step.
|
|
275
|
+
|
|
276
|
+
Metrics are automatically batched for efficiency. Use flush()
|
|
277
|
+
to force immediate sending.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
metrics: Dictionary of metric name -> value
|
|
281
|
+
step: Step number (auto-incremented if not provided)
|
|
282
|
+
timestamp: Unix timestamp (current time if not provided)
|
|
283
|
+
|
|
284
|
+
Example:
|
|
285
|
+
run.log_metrics({
|
|
286
|
+
"train/loss": 0.45,
|
|
287
|
+
"train/accuracy": 0.82,
|
|
288
|
+
"val/loss": 0.52,
|
|
289
|
+
}, step=epoch)
|
|
290
|
+
"""
|
|
291
|
+
with self._lock:
|
|
292
|
+
if step is None:
|
|
293
|
+
step = self._step
|
|
294
|
+
self._step += 1
|
|
295
|
+
else:
|
|
296
|
+
self._step = max(self._step, step + 1)
|
|
297
|
+
|
|
298
|
+
ts = timestamp or time.time()
|
|
299
|
+
|
|
300
|
+
batcher = (
|
|
301
|
+
self._local_batcher
|
|
302
|
+
if self._config.mode == "local"
|
|
303
|
+
else self._remote_batcher
|
|
304
|
+
)
|
|
305
|
+
for name, value in metrics.items():
|
|
306
|
+
batcher.add(name=name, value=float(value), step=step, timestamp=ts)
|
|
307
|
+
|
|
308
|
+
def log(self, name: str, value: float, step: Optional[int] = None) -> None:
|
|
309
|
+
"""
|
|
310
|
+
Log a single metric.
|
|
311
|
+
|
|
312
|
+
Convenience method for logging one metric at a time.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
name: Metric name
|
|
316
|
+
value: Metric value
|
|
317
|
+
step: Step number (auto-incremented if not provided)
|
|
318
|
+
"""
|
|
319
|
+
self.log_metrics({name: value}, step=step)
|
|
320
|
+
|
|
321
|
+
def flush(self) -> None:
|
|
322
|
+
"""Force flush all buffered metrics."""
|
|
323
|
+
if self._config.mode == "local":
|
|
324
|
+
self._local_batcher.flush()
|
|
325
|
+
else:
|
|
326
|
+
self._remote_batcher.flush()
|
|
327
|
+
|
|
328
|
+
def add_tags(self, tags: List[str]) -> None:
|
|
329
|
+
"""
|
|
330
|
+
Add tags to the run.
|
|
331
|
+
|
|
332
|
+
Args:
|
|
333
|
+
tags: List of tags to add
|
|
334
|
+
"""
|
|
335
|
+
if self._config.mode == "local":
|
|
336
|
+
# For local mode, update the meta.json
|
|
337
|
+
meta = self._local_writer._read_meta()
|
|
338
|
+
existing_tags = meta.get("tags", [])
|
|
339
|
+
meta["tags"] = list(set(existing_tags + tags))
|
|
340
|
+
self._local_writer._write_meta(meta)
|
|
341
|
+
else:
|
|
342
|
+
self._remote_client.add_run_tags(self._run_id, tags)
|
|
343
|
+
|
|
344
|
+
def complete(self) -> None:
|
|
345
|
+
"""Mark the run as completed successfully."""
|
|
346
|
+
self._finalize("completed")
|
|
347
|
+
|
|
348
|
+
def fail(self, error: Optional[str] = None) -> None:
|
|
349
|
+
"""
|
|
350
|
+
Mark the run as failed.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
error: Optional error message
|
|
354
|
+
"""
|
|
355
|
+
self._finalize("failed", error=error)
|
|
356
|
+
|
|
357
|
+
def abort(self) -> None:
|
|
358
|
+
"""Mark the run as aborted."""
|
|
359
|
+
self._finalize("aborted")
|
|
360
|
+
|
|
361
|
+
def _finalize(self, status: str, error: Optional[str] = None) -> None:
|
|
362
|
+
"""Finalize the run with the given status."""
|
|
363
|
+
with self._lock:
|
|
364
|
+
if self._closed:
|
|
365
|
+
return
|
|
366
|
+
self._closed = True
|
|
367
|
+
|
|
368
|
+
if self._config.mode == "local":
|
|
369
|
+
# Flush and stop local batcher
|
|
370
|
+
self._local_batcher.flush()
|
|
371
|
+
self._local_batcher.stop()
|
|
372
|
+
# Update status in meta.json
|
|
373
|
+
self._local_writer.update_status(status, error)
|
|
374
|
+
self._local_writer.close()
|
|
375
|
+
# Stop the server if we started it
|
|
376
|
+
if self._server_manager is not None:
|
|
377
|
+
self._server_manager.stop()
|
|
378
|
+
else:
|
|
379
|
+
# Flush and stop remote batcher
|
|
380
|
+
self._remote_batcher.flush()
|
|
381
|
+
self._remote_batcher.stop()
|
|
382
|
+
# Update status on server
|
|
383
|
+
self._remote_client.update_run_status(self._run_id, status, error=error)
|
|
384
|
+
|
|
385
|
+
def __enter__(self) -> "Run":
|
|
386
|
+
"""Enter context manager."""
|
|
387
|
+
return self
|
|
388
|
+
|
|
389
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
390
|
+
"""Exit context manager, marking run as completed or failed."""
|
|
391
|
+
if exc_type is not None:
|
|
392
|
+
self.fail(str(exc_val) if exc_val else "Unknown error")
|
|
393
|
+
else:
|
|
394
|
+
self.complete()
|
|
395
|
+
|
|
396
|
+
def _setup_signal_handlers(self) -> None:
|
|
397
|
+
"""Set up signal handlers for graceful shutdown."""
|
|
398
|
+
self._original_sigint = signal.getsignal(signal.SIGINT)
|
|
399
|
+
self._original_sigterm = signal.getsignal(signal.SIGTERM)
|
|
400
|
+
|
|
401
|
+
def handle_signal(signum, frame):
|
|
402
|
+
"""Handle termination signals."""
|
|
403
|
+
# Immediately restore original handlers to prevent re-entry
|
|
404
|
+
signal.signal(signal.SIGINT, self._original_sigint)
|
|
405
|
+
signal.signal(signal.SIGTERM, self._original_sigterm)
|
|
406
|
+
|
|
407
|
+
# Mark run as canceled
|
|
408
|
+
if not self._closed:
|
|
409
|
+
sig_name = "SIGINT" if signum == signal.SIGINT else "SIGTERM"
|
|
410
|
+
print(f"\nSixtyseven: Run canceled ({sig_name})")
|
|
411
|
+
self._finalize("canceled", error=f"Interrupted by {sig_name}")
|
|
412
|
+
|
|
413
|
+
# Re-raise to exit
|
|
414
|
+
if signum == signal.SIGINT:
|
|
415
|
+
raise KeyboardInterrupt
|
|
416
|
+
else:
|
|
417
|
+
sys.exit(128 + signum)
|
|
418
|
+
|
|
419
|
+
# Only set handlers in main thread
|
|
420
|
+
if threading.current_thread() is threading.main_thread():
|
|
421
|
+
signal.signal(signal.SIGINT, handle_signal)
|
|
422
|
+
signal.signal(signal.SIGTERM, handle_signal)
|
|
423
|
+
|
|
424
|
+
def _cleanup(self) -> None:
|
|
425
|
+
"""Cleanup handler for atexit - marks incomplete runs as canceled."""
|
|
426
|
+
if not self._closed:
|
|
427
|
+
self._finalize("canceled", error="Process exited unexpectedly")
|
|
428
|
+
|
|
429
|
+
@staticmethod
|
|
430
|
+
def _parse_project(project: str) -> tuple:
|
|
431
|
+
"""Parse project string into team and app slugs (remote mode only)."""
|
|
432
|
+
parts = project.split("/")
|
|
433
|
+
if len(parts) != 2:
|
|
434
|
+
raise ValidationError(
|
|
435
|
+
f"Invalid project format: '{project}'. Expected 'team-slug/app-slug' format for remote mode."
|
|
436
|
+
)
|
|
437
|
+
return parts[0], parts[1]
|
|
438
|
+
|
|
439
|
+
def __repr__(self) -> str:
|
|
440
|
+
mode_info = (
|
|
441
|
+
f"logdir='{self.logdir}'"
|
|
442
|
+
if self._config.mode == "local"
|
|
443
|
+
else f"url='{self._config.base_url}'"
|
|
444
|
+
)
|
|
445
|
+
return f"Run(id='{self._run_id}', project='{self.project}', mode='{self.mode}', {mode_info})"
|