openrunner-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
openrunner/__init__.py ADDED
@@ -0,0 +1,273 @@
1
+ """OpenRunner SDK - W&B-compatible ML experiment tracking.
2
+
3
+ Public API:
4
+ openrunner.init(project=..., name=..., config=...) -> Run
5
+ openrunner.log({"loss": 0.5}, step=10) -> None
6
+ openrunner.finish(exit_code=0) -> None
7
+ openrunner.config -> Config proxy
8
+ openrunner.summary -> Summary proxy
9
+ openrunner.run -> active Run or None
10
+ openrunner.Image -> Image class for media logging
11
+ openrunner.Table -> Table class for structured data
12
+ openrunner.Artifact -> Artifact class for versioned file collections
13
+ openrunner.log_artifact -> Upload an artifact
14
+ openrunner.use_artifact -> Download an artifact (cached)
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ from pathlib import Path
21
+ from typing import Any
22
+
23
+ from openrunner.artifact import Artifact
24
+ from openrunner.config import Config
25
+ from openrunner.media import Image, Table
26
+ from openrunner.run import Run
27
+ from openrunner.settings import SDKSettings
28
+ from openrunner.summary import Summary
29
+
30
+ __version__ = "0.1.0"
31
+
32
+ logger = logging.getLogger("openrunner")
33
+
34
+ # Active run state
35
+ _active_run: Run | None = None
36
+
37
+
38
+ # ---------------------------------------------------------------------------
39
+ # Proxy objects for module-level config/summary access
40
+ # ---------------------------------------------------------------------------
41
+
42
+ class _ConfigProxy:
43
+ """Proxy that delegates attribute access to the active run's Config.
44
+
45
+ Since Python modules can't have properties, this proxy object sits
46
+ at ``openrunner.config`` and forwards all access to ``_active_run.config``.
47
+ """
48
+
49
+ def __getattr__(self, name: str) -> Any:
50
+ if _active_run is not None:
51
+ return getattr(_active_run.config, name)
52
+ raise AttributeError(f"No active run -- call openrunner.init() first")
53
+
54
+ def __setattr__(self, name: str, value: Any) -> None:
55
+ if _active_run is not None:
56
+ setattr(_active_run.config, name, value)
57
+ else:
58
+ logger.warning("openrunner.config: no active run -- call openrunner.init() first")
59
+
60
+ def __getitem__(self, key: str) -> Any:
61
+ if _active_run is not None:
62
+ return _active_run.config[key]
63
+ raise KeyError(f"No active run -- call openrunner.init() first")
64
+
65
+ def __setitem__(self, key: str, value: Any) -> None:
66
+ if _active_run is not None:
67
+ _active_run.config[key] = value
68
+ else:
69
+ logger.warning("openrunner.config: no active run -- call openrunner.init() first")
70
+
71
+ def __repr__(self) -> str:
72
+ if _active_run is not None:
73
+ return repr(_active_run.config)
74
+ return "ConfigProxy(no active run)"
75
+
76
+
77
+ class _SummaryProxy:
78
+ """Proxy that delegates attribute access to the active run's Summary."""
79
+
80
+ def __getattr__(self, name: str) -> Any:
81
+ if _active_run is not None:
82
+ return getattr(_active_run.summary, name)
83
+ raise AttributeError(f"No active run -- call openrunner.init() first")
84
+
85
+ def __setattr__(self, name: str, value: Any) -> None:
86
+ if _active_run is not None:
87
+ setattr(_active_run.summary, name, value)
88
+ else:
89
+ logger.warning("openrunner.summary: no active run -- call openrunner.init() first")
90
+
91
+ def __getitem__(self, key: str) -> Any:
92
+ if _active_run is not None:
93
+ return _active_run.summary[key]
94
+ raise KeyError(f"No active run -- call openrunner.init() first")
95
+
96
+ def __setitem__(self, key: str, value: Any) -> None:
97
+ if _active_run is not None:
98
+ _active_run.summary[key] = value
99
+ else:
100
+ logger.warning("openrunner.summary: no active run -- call openrunner.init() first")
101
+
102
+ def __repr__(self) -> str:
103
+ if _active_run is not None:
104
+ return repr(_active_run.summary)
105
+ return "SummaryProxy(no active run)"
106
+
107
+
108
+ # Module-level proxy instances
109
+ config = _ConfigProxy()
110
+ summary = _SummaryProxy()
111
+
112
+
113
+ # ---------------------------------------------------------------------------
114
+ # Public API functions
115
+ # ---------------------------------------------------------------------------
116
+
117
+ def init(
118
+ project: str | None = None,
119
+ name: str | None = None,
120
+ config: dict[str, Any] | None = None,
121
+ tags: list[str] | None = None,
122
+ notes: str | None = None,
123
+ group: str | None = None,
124
+ job_type: str | None = None,
125
+ id: str | None = None,
126
+ resume: bool | str | None = None,
127
+ **kwargs: Any,
128
+ ) -> Run | None:
129
+ """Initialize a new run.
130
+
131
+ Creates a run on the server, starts the background sender thread,
132
+ and returns the Run object. Never raises -- SDK failures must not
133
+ crash training code.
134
+
135
+ Args:
136
+ project: Project name (or OPENRUNNER_PROJECT env var, or "uncategorized").
137
+ name: Display name for the run.
138
+ config: Hyperparameter dict (sent to server at init time, SDK-12).
139
+ tags: List of tags for the run.
140
+ notes: Notes/description for the run.
141
+ group: Group name for related runs.
142
+ job_type: Job type label.
143
+ id: Custom run ID (8-char alphanumeric generated if not provided).
144
+ resume: Resume mode -- True/"allow" (resume if exists, fresh otherwise)
145
+ or "must" (error if parent not found). The ``id`` parameter
146
+ is treated as the parent run ID when resuming.
147
+
148
+ Returns:
149
+ The Run object, or None if initialization fails.
150
+ """
151
+ global _active_run
152
+
153
+ try:
154
+ settings = SDKSettings()
155
+
156
+ # Resolve project
157
+ resolved_project = project or settings.project or "uncategorized"
158
+
159
+ # Warn if no API key (skip in offline mode)
160
+ if not settings.api_key and settings.mode != "offline":
161
+ logger.warning(
162
+ "No API key set. Set OPENRUNNER_API_KEY or WANDB_API_KEY "
163
+ "environment variable for server communication."
164
+ )
165
+
166
+ # Create run
167
+ run = Run(
168
+ project=resolved_project,
169
+ name=name,
170
+ config_dict=config,
171
+ tags=tags,
172
+ notes=notes,
173
+ group=group,
174
+ job_type=job_type,
175
+ run_id=id,
176
+ settings=settings,
177
+ resume=resume,
178
+ )
179
+
180
+ _active_run = run
181
+ return run
182
+
183
+ except Exception as e:
184
+ logger.warning("openrunner.init() failed: %s", e)
185
+ return None
186
+
187
+
188
+ def log(
189
+ data: dict[str, Any],
190
+ step: int | None = None,
191
+ commit: bool = True,
192
+ ) -> None:
193
+ """Log metrics. Never raises -- training must not be interrupted.
194
+
195
+ Args:
196
+ data: Dict of metric key-value pairs.
197
+ step: Explicit step value.
198
+ commit: If True (default), finalize the current step.
199
+ """
200
+ try:
201
+ if _active_run is None:
202
+ logger.warning("openrunner.log(): no active run -- call openrunner.init() first")
203
+ return
204
+ _active_run.log(data, step=step, commit=commit)
205
+ except Exception as e:
206
+ logger.warning("openrunner.log() failed: %s", e)
207
+
208
+
209
+ def finish(
210
+ exit_code: int | None = None,
211
+ quiet: bool | None = None,
212
+ ) -> None:
213
+ """Finish the active run. Never raises.
214
+
215
+ Args:
216
+ exit_code: Optional exit code.
217
+ quiet: If True, suppress the finish message.
218
+ """
219
+ global _active_run
220
+
221
+ try:
222
+ if _active_run is None:
223
+ return
224
+ _active_run.finish(exit_code=exit_code, quiet=quiet or False)
225
+ _active_run = None
226
+ except Exception as e:
227
+ logger.warning("openrunner.finish() failed: %s", e)
228
+ _active_run = None
229
+
230
+
231
+ def log_artifact(artifact: Artifact) -> dict[str, Any] | None:
232
+ """Upload an artifact to the active run. Never raises.
233
+
234
+ Args:
235
+ artifact: The Artifact object with files added via add_file/add_dir.
236
+
237
+ Returns:
238
+ Server response dict with version info, or None on failure.
239
+ """
240
+ try:
241
+ if _active_run is None:
242
+ logger.warning("openrunner.log_artifact(): no active run")
243
+ return None
244
+ return _active_run.log_artifact(artifact)
245
+ except Exception as e:
246
+ logger.warning("openrunner.log_artifact() failed: %s", e)
247
+ return None
248
+
249
+
250
+ def use_artifact(name: str, version: int | None = None) -> Path | None:
251
+ """Download an artifact and cache locally. Never raises.
252
+
253
+ Args:
254
+ name: Artifact name.
255
+ version: Specific version number, or None for latest.
256
+
257
+ Returns:
258
+ Path to the local artifact directory, or None on failure.
259
+ """
260
+ try:
261
+ if _active_run is None:
262
+ logger.warning("openrunner.use_artifact(): no active run")
263
+ return None
264
+ return _active_run.use_artifact(name, version)
265
+ except Exception as e:
266
+ logger.warning("openrunner.use_artifact() failed: %s", e)
267
+ return None
268
+
269
+
270
+ @property # type: ignore[misc]
271
+ def run() -> Run | None:
272
+ """Return the active run, or None."""
273
+ return _active_run
@@ -0,0 +1,368 @@
1
+ """HTTP client for OpenRunner server communication.
2
+
3
+ Uses httpx.Client (synchronous, thread-safe) with defensive error handling.
4
+ All methods log warnings on failure and never raise exceptions to the caller.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import logging
10
+ from typing import Any
11
+
12
+ import httpx
13
+
14
+ logger = logging.getLogger("openrunner")
15
+
16
+
17
+ class APIClient:
18
+ """HTTP client for the OpenRunner server API.
19
+
20
+ All methods are defensive -- they catch exceptions, log warnings,
21
+ and return None/0 rather than propagating errors to the caller.
22
+ This ensures SDK failures never crash training code.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ base_url: str,
28
+ api_key: str,
29
+ transport: httpx.BaseTransport | None = None,
30
+ ) -> None:
31
+ kwargs: dict[str, Any] = {
32
+ "base_url": f"{base_url.rstrip('/')}/api/v1",
33
+ "headers": {"Authorization": f"Bearer {api_key}"},
34
+ "timeout": 30.0,
35
+ }
36
+ if transport is not None:
37
+ kwargs["transport"] = transport
38
+
39
+ self._client = httpx.Client(**kwargs)
40
+
41
+ def create_run(self, data: dict[str, Any]) -> dict[str, Any] | None:
42
+ """POST /runs -- create a new run on the server.
43
+
44
+ Returns the response JSON dict, or None on failure.
45
+ """
46
+ try:
47
+ response = self._client.post("/runs", json=data)
48
+ response.raise_for_status()
49
+ return response.json()
50
+ except Exception as e:
51
+ logger.warning("create_run failed: %s", e)
52
+ return None
53
+
54
+ def update_run(self, run_id: str, data: dict[str, Any]) -> dict[str, Any] | None:
55
+ """PATCH /runs/{run_id} -- update run state/summary/config.
56
+
57
+ Returns the response JSON dict, or None on failure.
58
+ """
59
+ try:
60
+ response = self._client.patch(f"/runs/{run_id}", json=data)
61
+ response.raise_for_status()
62
+ return response.json()
63
+ except Exception as e:
64
+ logger.warning("update_run failed: %s", e)
65
+ return None
66
+
67
+ def post_metrics(self, run_id: str, metrics: list[dict[str, Any]]) -> int:
68
+ """POST /runs/{run_id}/metrics -- send a batch of metric records.
69
+
70
+ Returns the count of metrics accepted, or 0 on failure.
71
+ """
72
+ try:
73
+ response = self._client.post(
74
+ f"/runs/{run_id}/metrics",
75
+ json={"metrics": metrics},
76
+ )
77
+ response.raise_for_status()
78
+ return response.json().get("count", 0)
79
+ except Exception as e:
80
+ logger.warning("post_metrics failed: %s", e)
81
+ return 0
82
+
83
+ def get_run(self, run_id: str) -> dict[str, Any] | None:
84
+ """GET /runs/{run_id} -- fetch run data.
85
+
86
+ Returns the response JSON dict, or None on failure/not-found.
87
+ """
88
+ try:
89
+ response = self._client.get(f"/runs/{run_id}")
90
+ if response.status_code == 404:
91
+ return None
92
+ response.raise_for_status()
93
+ return response.json()
94
+ except Exception as e:
95
+ logger.warning("get_run failed: %s", e)
96
+ return None
97
+
98
+ def get_run_max_step(self, run_id: str) -> int:
99
+ """GET /runs/{run_id}/metrics -- return max step across all metric keys.
100
+
101
+ Fetches a small sample of metrics and finds the maximum step value.
102
+ Returns 0 if no metrics exist or on failure.
103
+ """
104
+ try:
105
+ response = self._client.get(
106
+ f"/runs/{run_id}/metrics", params={"max_points": 10}
107
+ )
108
+ if response.status_code == 404:
109
+ return 0
110
+ response.raise_for_status()
111
+ data = response.json()
112
+ max_step = 0
113
+ # data is a dict of metric_key -> list of {step, value, ...}
114
+ if isinstance(data, dict):
115
+ for key, series in data.items():
116
+ if isinstance(series, list):
117
+ for point in series:
118
+ step = point.get("step", 0)
119
+ if step > max_step:
120
+ max_step = step
121
+ return max_step
122
+ except Exception as e:
123
+ logger.warning("get_run_max_step failed: %s", e)
124
+ return 0
125
+
126
+ def sync_metrics(self, run_id: str, metrics: list[dict[str, Any]]) -> int:
127
+ """POST /runs/{run_id}/sync-metrics -- idempotent metric sync.
128
+
129
+ Calls the sync-metrics endpoint which uses ON CONFLICT DO NOTHING
130
+ to handle duplicate metric points. Returns count or 0 on failure.
131
+ """
132
+ try:
133
+ response = self._client.post(
134
+ f"/runs/{run_id}/sync-metrics",
135
+ json={"metrics": metrics},
136
+ )
137
+ response.raise_for_status()
138
+ return response.json().get("count", 0)
139
+ except Exception as e:
140
+ logger.warning("sync_metrics failed: %s", e)
141
+ return 0
142
+
143
+ def post_heartbeat(self, run_id: str) -> None:
144
+ """POST /runs/{run_id}/heartbeat -- send a heartbeat signal.
145
+
146
+ Logs a warning on failure but never raises.
147
+ """
148
+ try:
149
+ response = self._client.post(f"/runs/{run_id}/heartbeat")
150
+ response.raise_for_status()
151
+ except Exception as e:
152
+ logger.warning("post_heartbeat failed: %s", e)
153
+
154
+ # -- Listing methods -------------------------------------------------------
155
+
156
+ def list_projects(self) -> list[dict[str, Any]]:
157
+ """GET /projects -- list all projects the user has access to.
158
+
159
+ Returns a list of project dicts, or empty list on failure.
160
+ """
161
+ try:
162
+ response = self._client.get("/projects")
163
+ response.raise_for_status()
164
+ data = response.json()
165
+ # Server may return {projects: [...]} or [...]
166
+ if isinstance(data, list):
167
+ return data
168
+ return data.get("projects", data.get("items", []))
169
+ except Exception as e:
170
+ logger.warning("list_projects failed: %s", e)
171
+ return []
172
+
173
+ def list_runs(self, project_id: str) -> list[dict[str, Any]]:
174
+ """GET /projects/{project_id}/runs -- list runs in a project.
175
+
176
+ Returns a list of run dicts, or empty list on failure.
177
+ """
178
+ try:
179
+ response = self._client.get(f"/projects/{project_id}/runs")
180
+ response.raise_for_status()
181
+ data = response.json()
182
+ if isinstance(data, list):
183
+ return data
184
+ return data.get("runs", data.get("items", []))
185
+ except Exception as e:
186
+ logger.warning("list_runs failed: %s", e)
187
+ return []
188
+
189
+ # -- Artifact methods ------------------------------------------------------
190
+
191
+ def create_artifact_version(
192
+ self,
193
+ run_id: str,
194
+ name: str,
195
+ type: str,
196
+ files: list[dict[str, Any]],
197
+ description: str | None = None,
198
+ metadata: dict[str, Any] | None = None,
199
+ ) -> dict[str, Any] | None:
200
+ """POST /runs/{run_id}/artifacts -- create a new artifact version.
201
+
202
+ Returns {version_id, version, upload_urls}, or None on failure.
203
+ """
204
+ try:
205
+ body: dict[str, Any] = {
206
+ "name": name,
207
+ "type": type,
208
+ "files": files,
209
+ }
210
+ if description is not None:
211
+ body["description"] = description
212
+ if metadata is not None:
213
+ body["metadata"] = metadata
214
+ response = self._client.post(f"/runs/{run_id}/artifacts", json=body)
215
+ response.raise_for_status()
216
+ return response.json()
217
+ except Exception as e:
218
+ logger.warning("create_artifact_version failed: %s", e)
219
+ return None
220
+
221
+ def confirm_artifact_version(
222
+ self, version_id: str
223
+ ) -> dict[str, Any] | None:
224
+ """POST /artifacts/versions/{version_id}/confirm -- confirm upload.
225
+
226
+ Returns {status, version}, or None on failure.
227
+ """
228
+ try:
229
+ response = self._client.post(
230
+ f"/artifacts/versions/{version_id}/confirm"
231
+ )
232
+ response.raise_for_status()
233
+ return response.json()
234
+ except Exception as e:
235
+ logger.warning("confirm_artifact_version failed: %s", e)
236
+ return None
237
+
238
+ def use_artifact(
239
+ self,
240
+ run_id: str,
241
+ artifact_name: str,
242
+ version: int | None = None,
243
+ ) -> dict[str, Any] | None:
244
+ """POST /runs/{run_id}/use-artifact -- get download URLs.
245
+
246
+ Returns download info dict, or None on failure.
247
+ """
248
+ try:
249
+ body: dict[str, Any] = {"artifact_name": artifact_name}
250
+ if version is not None:
251
+ body["version"] = version
252
+ response = self._client.post(
253
+ f"/runs/{run_id}/use-artifact", json=body
254
+ )
255
+ response.raise_for_status()
256
+ return response.json()
257
+ except Exception as e:
258
+ logger.warning("use_artifact failed: %s", e)
259
+ return None
260
+
261
+ def upload_file_to_presigned_url(
262
+ self, presigned_url: str, file_path: str
263
+ ) -> bool:
264
+ """PUT raw file bytes to a presigned URL.
265
+
266
+ Uses httpx directly (not self._client) because presigned URLs
267
+ are absolute and should not have a base_url prefix.
268
+ """
269
+ try:
270
+ with open(file_path, "rb") as f:
271
+ data = f.read()
272
+ resp = httpx.put(presigned_url, content=data, timeout=300.0)
273
+ resp.raise_for_status()
274
+ return True
275
+ except Exception as e:
276
+ logger.warning("upload_file_to_presigned_url failed: %s", e)
277
+ return False
278
+
279
+ def download_file_from_presigned_url(
280
+ self, presigned_url: str
281
+ ) -> bytes | None:
282
+ """GET file bytes from a presigned URL.
283
+
284
+ Uses httpx directly (not self._client) because presigned URLs
285
+ are absolute and should not have a base_url prefix.
286
+ """
287
+ try:
288
+ resp = httpx.get(presigned_url, timeout=300.0)
289
+ resp.raise_for_status()
290
+ return resp.content
291
+ except Exception as e:
292
+ logger.warning("download_file_from_presigned_url failed: %s", e)
293
+ return None
294
+
295
+ # -- Media methods -------------------------------------------------------
296
+
297
+ def create_media_file(
298
+ self,
299
+ run_id: str,
300
+ key: str,
301
+ media_type: str,
302
+ step: int | None = None,
303
+ caption: str | None = None,
304
+ content_type: str | None = None,
305
+ width: int | None = None,
306
+ height: int | None = None,
307
+ data: dict[str, Any] | None = None,
308
+ ) -> dict[str, Any] | None:
309
+ """POST /runs/{run_id}/media -- create a media file record.
310
+
311
+ Returns {id, storage_key, presigned_url}, or None on failure.
312
+ """
313
+ try:
314
+ body: dict[str, Any] = {
315
+ "key": key,
316
+ "media_type": media_type,
317
+ }
318
+ if step is not None:
319
+ body["step"] = step
320
+ if caption is not None:
321
+ body["caption"] = caption
322
+ if content_type is not None:
323
+ body["content_type"] = content_type
324
+ if width is not None:
325
+ body["width"] = width
326
+ if height is not None:
327
+ body["height"] = height
328
+ if data is not None:
329
+ body["data"] = data
330
+ response = self._client.post(f"/runs/{run_id}/media", json=body)
331
+ response.raise_for_status()
332
+ return response.json()
333
+ except Exception as e:
334
+ logger.warning("create_media_file failed: %s", e)
335
+ return None
336
+
337
+ def upload_media_bytes(
338
+ self,
339
+ presigned_url: str,
340
+ data_bytes: bytes,
341
+ content_type: str = "image/png",
342
+ ) -> bool:
343
+ """PUT bytes to a presigned URL with Content-Type header.
344
+
345
+ Uses httpx directly (not self._client) because presigned URLs
346
+ are absolute and should not have a base_url prefix.
347
+ """
348
+ try:
349
+ resp = httpx.put(
350
+ presigned_url,
351
+ content=data_bytes,
352
+ headers={"Content-Type": content_type},
353
+ timeout=300.0,
354
+ )
355
+ resp.raise_for_status()
356
+ return True
357
+ except Exception as e:
358
+ logger.warning("upload_media_bytes failed: %s", e)
359
+ return False
360
+
361
+ # -- Lifecycle -----------------------------------------------------------
362
+
363
+ def close(self) -> None:
364
+ """Close the underlying httpx client."""
365
+ try:
366
+ self._client.close()
367
+ except Exception:
368
+ pass