fraclab-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. README.md +1601 -0
  2. fraclab_sdk/__init__.py +34 -0
  3. fraclab_sdk/algorithm/__init__.py +13 -0
  4. fraclab_sdk/algorithm/export.py +1 -0
  5. fraclab_sdk/algorithm/library.py +378 -0
  6. fraclab_sdk/cli.py +381 -0
  7. fraclab_sdk/config.py +54 -0
  8. fraclab_sdk/devkit/__init__.py +25 -0
  9. fraclab_sdk/devkit/compile.py +342 -0
  10. fraclab_sdk/devkit/export.py +354 -0
  11. fraclab_sdk/devkit/validate.py +1043 -0
  12. fraclab_sdk/errors.py +124 -0
  13. fraclab_sdk/materialize/__init__.py +8 -0
  14. fraclab_sdk/materialize/fsops.py +125 -0
  15. fraclab_sdk/materialize/hash.py +28 -0
  16. fraclab_sdk/materialize/materializer.py +241 -0
  17. fraclab_sdk/models/__init__.py +52 -0
  18. fraclab_sdk/models/bundle_manifest.py +51 -0
  19. fraclab_sdk/models/dataspec.py +65 -0
  20. fraclab_sdk/models/drs.py +47 -0
  21. fraclab_sdk/models/output_contract.py +111 -0
  22. fraclab_sdk/models/run_output_manifest.py +119 -0
  23. fraclab_sdk/results/__init__.py +25 -0
  24. fraclab_sdk/results/preview.py +150 -0
  25. fraclab_sdk/results/reader.py +329 -0
  26. fraclab_sdk/run/__init__.py +10 -0
  27. fraclab_sdk/run/logs.py +42 -0
  28. fraclab_sdk/run/manager.py +403 -0
  29. fraclab_sdk/run/subprocess_runner.py +153 -0
  30. fraclab_sdk/runtime/__init__.py +11 -0
  31. fraclab_sdk/runtime/artifacts.py +303 -0
  32. fraclab_sdk/runtime/data_client.py +123 -0
  33. fraclab_sdk/runtime/runner_main.py +286 -0
  34. fraclab_sdk/runtime/snapshot_provider.py +1 -0
  35. fraclab_sdk/selection/__init__.py +11 -0
  36. fraclab_sdk/selection/model.py +247 -0
  37. fraclab_sdk/selection/validate.py +54 -0
  38. fraclab_sdk/snapshot/__init__.py +12 -0
  39. fraclab_sdk/snapshot/index.py +94 -0
  40. fraclab_sdk/snapshot/library.py +205 -0
  41. fraclab_sdk/snapshot/loader.py +217 -0
  42. fraclab_sdk/specs/manifest.py +89 -0
  43. fraclab_sdk/utils/io.py +32 -0
  44. fraclab_sdk-0.1.0.dist-info/METADATA +1622 -0
  45. fraclab_sdk-0.1.0.dist-info/RECORD +47 -0
  46. fraclab_sdk-0.1.0.dist-info/WHEEL +4 -0
  47. fraclab_sdk-0.1.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,403 @@
1
+ """Run manager implementation."""
2
+
3
+ import json
4
+ import shutil
5
+ import sys
6
+ import uuid
7
+ from dataclasses import dataclass
8
+ from datetime import datetime
9
+ from enum import Enum
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from fraclab_sdk.algorithm import AlgorithmLibrary
14
+ from fraclab_sdk.config import SDKConfig
15
+ from fraclab_sdk.errors import RunError
16
+ from fraclab_sdk.materialize import Materializer
17
+ from fraclab_sdk.run.logs import tail_stderr, tail_stdout
18
+ from fraclab_sdk.run.subprocess_runner import SubprocessRunner
19
+ from fraclab_sdk.selection.model import SelectionModel
20
+ from fraclab_sdk.snapshot import SnapshotLibrary
21
+ from fraclab_sdk.utils.io import atomic_write_json
22
+ from fraclab_sdk.utils.io import atomic_write_json
23
+
24
+
25
+ class RunStatus(Enum):
26
+ """Status of a run."""
27
+
28
+ PENDING = "pending"
29
+ RUNNING = "running"
30
+ SUCCEEDED = "succeeded"
31
+ FAILED = "failed"
32
+ TIMEOUT = "timeout"
33
+
34
+
35
+ @dataclass
36
+ class RunMeta:
37
+ """Metadata for a run."""
38
+
39
+ run_id: str
40
+ snapshot_id: str
41
+ algorithm_id: str
42
+ algorithm_version: str
43
+ status: RunStatus
44
+ created_at: str
45
+ started_at: str | None = None
46
+ completed_at: str | None = None
47
+ error: str | None = None
48
+
49
+
50
+ @dataclass
51
+ class RunResult:
52
+ """Result of run execution."""
53
+
54
+ run_id: str
55
+ status: RunStatus
56
+ exit_code: int | None = None
57
+ error: str | None = None
58
+ stdout: str | None = None
59
+ stderr: str | None = None
60
+
61
+
62
+ class RunIndex:
63
+ """Manages the run index file."""
64
+
65
+ def __init__(self, runs_dir: Path) -> None:
66
+ """Initialize run index."""
67
+ self._runs_dir = runs_dir
68
+ self._index_path = runs_dir / "index.json"
69
+
70
+ def _load(self) -> dict[str, dict]:
71
+ """Load index from disk."""
72
+ if not self._index_path.exists():
73
+ return {}
74
+ return json.loads(self._index_path.read_text())
75
+
76
+ def _save(self, data: dict[str, dict]) -> None:
77
+ """Save index to disk."""
78
+ self._runs_dir.mkdir(parents=True, exist_ok=True)
79
+ atomic_write_json(self._index_path, data)
80
+
81
+ def add(self, meta: RunMeta) -> None:
82
+ """Add a run to the index."""
83
+ data = self._load()
84
+ data[meta.run_id] = {
85
+ "run_id": meta.run_id,
86
+ "snapshot_id": meta.snapshot_id,
87
+ "algorithm_id": meta.algorithm_id,
88
+ "algorithm_version": meta.algorithm_version,
89
+ "status": meta.status.value,
90
+ "created_at": meta.created_at,
91
+ "started_at": meta.started_at,
92
+ "completed_at": meta.completed_at,
93
+ "error": meta.error,
94
+ }
95
+ self._save(data)
96
+
97
+ def update(self, meta: RunMeta) -> None:
98
+ """Update a run in the index."""
99
+ self.add(meta)
100
+
101
+ def remove(self, run_id: str) -> None:
102
+ """Remove a run from the index."""
103
+ data = self._load()
104
+ if run_id in data:
105
+ del data[run_id]
106
+ self._save(data)
107
+
108
+ def get(self, run_id: str) -> RunMeta | None:
109
+ """Get run metadata."""
110
+ data = self._load()
111
+ if run_id not in data:
112
+ return None
113
+ entry = data[run_id]
114
+ return RunMeta(
115
+ run_id=entry["run_id"],
116
+ snapshot_id=entry["snapshot_id"],
117
+ algorithm_id=entry["algorithm_id"],
118
+ algorithm_version=entry["algorithm_version"],
119
+ status=self._coerce_status(entry.get("status", "")),
120
+ created_at=entry["created_at"],
121
+ started_at=entry.get("started_at"),
122
+ completed_at=entry.get("completed_at"),
123
+ error=entry.get("error"),
124
+ )
125
+
126
+ def list_all(self) -> list[RunMeta]:
127
+ """List all runs."""
128
+ data = self._load()
129
+ return [
130
+ RunMeta(
131
+ run_id=entry["run_id"],
132
+ snapshot_id=entry["snapshot_id"],
133
+ algorithm_id=entry["algorithm_id"],
134
+ algorithm_version=entry["algorithm_version"],
135
+ status=self._coerce_status(entry.get("status", "")),
136
+ created_at=entry["created_at"],
137
+ started_at=entry.get("started_at"),
138
+ completed_at=entry.get("completed_at"),
139
+ error=entry.get("error"),
140
+ )
141
+ for entry in data.values()
142
+ ]
143
+
144
+ @staticmethod
145
+ def _coerce_status(value: str) -> RunStatus:
146
+ """Map legacy statuses to new enum."""
147
+ mapping = {
148
+ "completed": RunStatus.SUCCEEDED,
149
+ "failed": RunStatus.FAILED,
150
+ "pending": RunStatus.PENDING,
151
+ "running": RunStatus.RUNNING,
152
+ "timeout": RunStatus.TIMEOUT,
153
+ "succeeded": RunStatus.SUCCEEDED,
154
+ }
155
+ if value in mapping:
156
+ return mapping[value]
157
+ try:
158
+ return RunStatus(value)
159
+ except Exception:
160
+ return RunStatus.FAILED
161
+
162
+
163
+ class RunManager:
164
+ """Manages algorithm runs."""
165
+
166
+ def __init__(self, config: SDKConfig | None = None) -> None:
167
+ """Initialize run manager.
168
+
169
+ Args:
170
+ config: SDK configuration. If None, uses default.
171
+ """
172
+ self._config = config or SDKConfig()
173
+ self._index = RunIndex(self._config.runs_dir)
174
+ self._snapshot_lib = SnapshotLibrary(self._config)
175
+ self._algorithm_lib = AlgorithmLibrary(self._config)
176
+ self._materializer = Materializer()
177
+
178
+ def create_run(
179
+ self,
180
+ snapshot_id: str,
181
+ algorithm_id: str,
182
+ algorithm_version: str,
183
+ selection: SelectionModel,
184
+ params: dict[str, Any],
185
+ ) -> str:
186
+ """Create a new run.
187
+
188
+ Args:
189
+ snapshot_id: The snapshot ID.
190
+ algorithm_id: The algorithm ID.
191
+ algorithm_version: The algorithm version.
192
+ selection: The selection model with selected items.
193
+ params: Algorithm parameters.
194
+
195
+ Returns:
196
+ The run ID.
197
+
198
+ Raises:
199
+ RunError: If run creation fails.
200
+ """
201
+ # Validate selection
202
+ errors = selection.validate()
203
+ if errors:
204
+ error_msgs = [f"{e.dataset_key}: {e.message}" for e in errors]
205
+ raise RunError(f"Selection validation failed: {'; '.join(error_msgs)}")
206
+
207
+ # Get handles
208
+ snapshot = self._snapshot_lib.get_snapshot(snapshot_id)
209
+ algorithm = self._algorithm_lib.get_algorithm(algorithm_id, algorithm_version)
210
+
211
+ # Generate run ID
212
+ run_id = str(uuid.uuid4())[:8]
213
+
214
+ # Create run directory
215
+ self._config.ensure_dirs()
216
+ run_dir = self._config.runs_dir / run_id
217
+ run_dir.mkdir(parents=True)
218
+
219
+ # Build run DataSpec
220
+ run_ds = selection.build_run_ds()
221
+
222
+ # Build run context
223
+ run_context = {
224
+ "runId": run_id,
225
+ "snapshotId": snapshot_id,
226
+ "algorithmId": algorithm_id,
227
+ "algorithmVersion": algorithm_version,
228
+ "contractVersion": algorithm.manifest.contractVersion,
229
+ }
230
+
231
+ # Materialize input
232
+ self._materializer.materialize(
233
+ run_dir=run_dir,
234
+ snapshot=snapshot,
235
+ run_ds=run_ds,
236
+ drs=algorithm.drs,
237
+ params=params,
238
+ run_context=run_context,
239
+ )
240
+
241
+ # Create run metadata
242
+ meta = RunMeta(
243
+ run_id=run_id,
244
+ snapshot_id=snapshot_id,
245
+ algorithm_id=algorithm_id,
246
+ algorithm_version=algorithm_version,
247
+ status=RunStatus.PENDING,
248
+ created_at=datetime.now().isoformat(),
249
+ )
250
+ self._index.add(meta)
251
+
252
+ # Write run_meta.json
253
+ run_meta_path = run_dir / "run_meta.json"
254
+ run_meta_path.write_text(
255
+ json.dumps(
256
+ {
257
+ "run_id": run_id,
258
+ "snapshot_id": snapshot_id,
259
+ "algorithm_id": algorithm_id,
260
+ "algorithm_version": algorithm_version,
261
+ "created_at": meta.created_at,
262
+ },
263
+ indent=2,
264
+ )
265
+ )
266
+
267
+ return run_id
268
+
269
+ def delete_run(self, run_id: str) -> None:
270
+ """Delete a run and its outputs."""
271
+ run_dir = self._config.runs_dir / run_id
272
+ if run_dir.exists():
273
+ shutil.rmtree(run_dir)
274
+ self._index.remove(run_id)
275
+
276
+ def execute(
277
+ self,
278
+ run_id: str,
279
+ timeout_s: int | None = None,
280
+ ) -> RunResult:
281
+ """Execute a run.
282
+
283
+ Args:
284
+ run_id: The run ID.
285
+ timeout_s: Optional timeout in seconds.
286
+
287
+ Returns:
288
+ RunResult with execution outcome.
289
+
290
+ Raises:
291
+ RunError: If run not found or already executed.
292
+ """
293
+ meta = self._index.get(run_id)
294
+ if meta is None:
295
+ raise RunError(f"Run not found: {run_id}")
296
+
297
+ if meta.status not in (RunStatus.PENDING, RunStatus.FAILED, RunStatus.TIMEOUT):
298
+ raise RunError(f"Run {run_id} already executed with status {meta.status}")
299
+
300
+ run_dir = self._config.runs_dir / run_id
301
+ algorithm = self._algorithm_lib.get_algorithm(
302
+ meta.algorithm_id, meta.algorithm_version
303
+ )
304
+
305
+ # Update status to running
306
+ meta.status = RunStatus.RUNNING
307
+ meta.started_at = datetime.now().isoformat()
308
+ self._index.update(meta)
309
+
310
+ # Execute via subprocess runner (streaming logs)
311
+ cmd = [
312
+ sys.executable,
313
+ "-m",
314
+ "fraclab_sdk.runtime.runner_main",
315
+ str(run_dir),
316
+ str(algorithm.algorithm_path),
317
+ ]
318
+
319
+ stdout_log = run_dir / "output" / "_logs" / "stdout.log"
320
+ stderr_log = run_dir / "output" / "_logs" / "stderr.log"
321
+ execute_meta = run_dir / "output" / "_logs" / "execute.json"
322
+
323
+ runner = SubprocessRunner(cmd=cmd, cwd=run_dir, timeout_s=timeout_s)
324
+ exit_code, timed_out = runner.run(stdout_log, stderr_log, execute_meta)
325
+
326
+ error = None
327
+ if timed_out:
328
+ error = f"Timeout after {timeout_s}s"
329
+ elif exit_code != 0:
330
+ error = f"Exit code: {exit_code}"
331
+
332
+ # Update final status
333
+ if timed_out:
334
+ meta.status = RunStatus.TIMEOUT
335
+ elif exit_code == 0:
336
+ meta.status = RunStatus.SUCCEEDED
337
+ else:
338
+ meta.status = RunStatus.FAILED
339
+ meta.completed_at = datetime.now().isoformat()
340
+ meta.error = error
341
+ self._index.update(meta)
342
+
343
+ return RunResult(
344
+ run_id=run_id,
345
+ status=meta.status,
346
+ exit_code=exit_code,
347
+ error=error,
348
+ stdout=tail_stdout(run_dir),
349
+ stderr=tail_stderr(run_dir),
350
+ )
351
+
352
+ def get_run_status(self, run_id: str) -> RunStatus:
353
+ """Get the status of a run.
354
+
355
+ Args:
356
+ run_id: The run ID.
357
+
358
+ Returns:
359
+ Run status.
360
+
361
+ Raises:
362
+ RunError: If run not found.
363
+ """
364
+ meta = self._index.get(run_id)
365
+ if meta is None:
366
+ raise RunError(f"Run not found: {run_id}")
367
+ return meta.status
368
+
369
+ def get_run(self, run_id: str) -> RunMeta:
370
+ """Get run metadata.
371
+
372
+ Args:
373
+ run_id: The run ID.
374
+
375
+ Returns:
376
+ Run metadata.
377
+
378
+ Raises:
379
+ RunError: If run not found.
380
+ """
381
+ meta = self._index.get(run_id)
382
+ if meta is None:
383
+ raise RunError(f"Run not found: {run_id}")
384
+ return meta
385
+
386
+ def get_run_dir(self, run_id: str) -> Path:
387
+ """Get the run directory path.
388
+
389
+ Args:
390
+ run_id: The run ID.
391
+
392
+ Returns:
393
+ Path to run directory.
394
+ """
395
+ return self._config.runs_dir / run_id
396
+
397
+ def list_runs(self) -> list[RunMeta]:
398
+ """List all runs.
399
+
400
+ Returns:
401
+ List of run metadata.
402
+ """
403
+ return self._index.list_all()
@@ -0,0 +1,153 @@
1
+ """Subprocess runner implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import signal
8
+ import subprocess
9
+ import sys
10
+ import threading
11
+ import time
12
+ from collections.abc import Iterable, Mapping
13
+ from pathlib import Path
14
+
15
+ _IS_WINDOWS = sys.platform == "win32"
16
+
17
+ # Grace period for SIGTERM before escalating to SIGKILL (seconds)
18
+ _TERM_GRACE_SECONDS = 2.0
19
+
20
+
21
+ def _kill_process_tree(proc: subprocess.Popen) -> str:
22
+ """Kill a process and its entire tree. Returns the kill strategy used."""
23
+ if _IS_WINDOWS:
24
+ # Windows: try CTRL_BREAK_EVENT first, then kill
25
+ try:
26
+ proc.send_signal(signal.CTRL_BREAK_EVENT)
27
+ try:
28
+ proc.wait(timeout=_TERM_GRACE_SECONDS)
29
+ return "ctrl_break"
30
+ except subprocess.TimeoutExpired:
31
+ pass
32
+ except OSError:
33
+ pass
34
+ proc.kill()
35
+ proc.wait()
36
+ return "kill"
37
+ else:
38
+ # POSIX: use process group kill
39
+ pgid = proc.pid
40
+ try:
41
+ os.killpg(pgid, signal.SIGTERM)
42
+ try:
43
+ proc.wait(timeout=_TERM_GRACE_SECONDS)
44
+ return "killpg_term"
45
+ except subprocess.TimeoutExpired:
46
+ os.killpg(pgid, signal.SIGKILL)
47
+ proc.wait()
48
+ return "killpg_kill"
49
+ except OSError:
50
+ # Fallback if process group doesn't exist
51
+ proc.kill()
52
+ proc.wait()
53
+ return "kill"
54
+
55
+
56
+ class SubprocessRunner:
57
+ """Run subprocess with streaming logs and metadata."""
58
+
59
+ def __init__(
60
+ self,
61
+ cmd: Iterable[str],
62
+ cwd: Path,
63
+ env: Mapping[str, str] | None = None,
64
+ timeout_s: int | None = None,
65
+ ) -> None:
66
+ self._cmd = list(cmd)
67
+ self._cwd = Path(cwd)
68
+ self._env = {**os.environ, **(env or {})}
69
+ self._env["PYTHONUNBUFFERED"] = "1"
70
+ self._timeout_s = timeout_s
71
+
72
+ def run(self, stdout_path: Path, stderr_path: Path, execute_path: Path) -> tuple[int, bool]:
73
+ """Execute the subprocess, streaming logs and writing metadata.
74
+
75
+ Returns:
76
+ (return_code, timed_out)
77
+ """
78
+ stdout_path.parent.mkdir(parents=True, exist_ok=True)
79
+ stderr_path.parent.mkdir(parents=True, exist_ok=True)
80
+ execute_path.parent.mkdir(parents=True, exist_ok=True)
81
+
82
+ start_ts = time.time()
83
+
84
+ # Platform-specific process group setup
85
+ popen_kwargs: dict = {
86
+ "cwd": self._cwd,
87
+ "env": self._env,
88
+ "stdout": subprocess.PIPE,
89
+ "stderr": subprocess.PIPE,
90
+ "text": True,
91
+ "bufsize": 1,
92
+ }
93
+ if _IS_WINDOWS:
94
+ popen_kwargs["creationflags"] = subprocess.CREATE_NEW_PROCESS_GROUP
95
+ else:
96
+ popen_kwargs["start_new_session"] = True
97
+
98
+ proc = subprocess.Popen(self._cmd, **popen_kwargs)
99
+
100
+ timed_out = False
101
+ kill_strategy: str | None = None
102
+ terminated_at: float | None = None
103
+
104
+ def _pipe_to_file(pipe, path: Path):
105
+ with path.open("a", encoding="utf-8") as f:
106
+ for line in pipe:
107
+ f.write(line)
108
+ f.flush()
109
+
110
+ threads: list[threading.Thread] = []
111
+ if proc.stdout:
112
+ t_out = threading.Thread(
113
+ target=_pipe_to_file, args=(proc.stdout, stdout_path), daemon=True
114
+ )
115
+ threads.append(t_out)
116
+ t_out.start()
117
+ if proc.stderr:
118
+ t_err = threading.Thread(
119
+ target=_pipe_to_file, args=(proc.stderr, stderr_path), daemon=True
120
+ )
121
+ threads.append(t_err)
122
+ t_err.start()
123
+
124
+ try:
125
+ proc.wait(timeout=self._timeout_s)
126
+ except subprocess.TimeoutExpired:
127
+ timed_out = True
128
+ terminated_at = time.time()
129
+ kill_strategy = _kill_process_tree(proc)
130
+
131
+ for t in threads:
132
+ t.join(timeout=5.0)
133
+
134
+ end_ts = time.time()
135
+
136
+ meta = {
137
+ "cmd": self._cmd,
138
+ "cwd": str(self._cwd),
139
+ "env": {"PYTHONUNBUFFERED": self._env.get("PYTHONUNBUFFERED", "1")},
140
+ "startedAt": start_ts,
141
+ "endedAt": end_ts,
142
+ "returnCode": proc.returncode,
143
+ "timeout": timed_out,
144
+ "timeoutSeconds": self._timeout_s,
145
+ "killStrategy": kill_strategy,
146
+ "terminatedAt": terminated_at,
147
+ }
148
+ execute_path.write_text(json.dumps(meta, indent=2), encoding="utf-8")
149
+
150
+ return proc.returncode or 0, timed_out
151
+
152
+
153
+ __all__ = ["SubprocessRunner"]
@@ -0,0 +1,11 @@
1
+ """Runtime components for algorithm execution."""
2
+
3
+ from fraclab_sdk.runtime.artifacts import ArtifactWriter
4
+ from fraclab_sdk.runtime.data_client import DataClient
5
+ from fraclab_sdk.runtime.runner_main import RunContext
6
+
7
+ __all__ = [
8
+ "ArtifactWriter",
9
+ "DataClient",
10
+ "RunContext",
11
+ ]