fraclab-sdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. README.md +1601 -0
  2. fraclab_sdk/__init__.py +34 -0
  3. fraclab_sdk/algorithm/__init__.py +13 -0
  4. fraclab_sdk/algorithm/export.py +1 -0
  5. fraclab_sdk/algorithm/library.py +378 -0
  6. fraclab_sdk/cli.py +381 -0
  7. fraclab_sdk/config.py +54 -0
  8. fraclab_sdk/devkit/__init__.py +25 -0
  9. fraclab_sdk/devkit/compile.py +342 -0
  10. fraclab_sdk/devkit/export.py +354 -0
  11. fraclab_sdk/devkit/validate.py +1043 -0
  12. fraclab_sdk/errors.py +124 -0
  13. fraclab_sdk/materialize/__init__.py +8 -0
  14. fraclab_sdk/materialize/fsops.py +125 -0
  15. fraclab_sdk/materialize/hash.py +28 -0
  16. fraclab_sdk/materialize/materializer.py +241 -0
  17. fraclab_sdk/models/__init__.py +52 -0
  18. fraclab_sdk/models/bundle_manifest.py +51 -0
  19. fraclab_sdk/models/dataspec.py +65 -0
  20. fraclab_sdk/models/drs.py +47 -0
  21. fraclab_sdk/models/output_contract.py +111 -0
  22. fraclab_sdk/models/run_output_manifest.py +119 -0
  23. fraclab_sdk/results/__init__.py +25 -0
  24. fraclab_sdk/results/preview.py +150 -0
  25. fraclab_sdk/results/reader.py +329 -0
  26. fraclab_sdk/run/__init__.py +10 -0
  27. fraclab_sdk/run/logs.py +42 -0
  28. fraclab_sdk/run/manager.py +403 -0
  29. fraclab_sdk/run/subprocess_runner.py +153 -0
  30. fraclab_sdk/runtime/__init__.py +11 -0
  31. fraclab_sdk/runtime/artifacts.py +303 -0
  32. fraclab_sdk/runtime/data_client.py +123 -0
  33. fraclab_sdk/runtime/runner_main.py +286 -0
  34. fraclab_sdk/runtime/snapshot_provider.py +1 -0
  35. fraclab_sdk/selection/__init__.py +11 -0
  36. fraclab_sdk/selection/model.py +247 -0
  37. fraclab_sdk/selection/validate.py +54 -0
  38. fraclab_sdk/snapshot/__init__.py +12 -0
  39. fraclab_sdk/snapshot/index.py +94 -0
  40. fraclab_sdk/snapshot/library.py +205 -0
  41. fraclab_sdk/snapshot/loader.py +217 -0
  42. fraclab_sdk/specs/manifest.py +89 -0
  43. fraclab_sdk/utils/io.py +32 -0
  44. fraclab_sdk-0.1.0.dist-info/METADATA +1622 -0
  45. fraclab_sdk-0.1.0.dist-info/RECORD +47 -0
  46. fraclab_sdk-0.1.0.dist-info/WHEEL +4 -0
  47. fraclab_sdk-0.1.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,217 @@
1
+ """Snapshot loader implementation."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+
6
+ from fraclab_sdk.errors import SnapshotError
7
+ from fraclab_sdk.models import DRS, BundleManifest, DataSpec, DataSpecItem
8
+
9
+
10
+ class SnapshotHandle:
11
+ """Handle for accessing snapshot contents."""
12
+
13
+ def __init__(self, snapshot_dir: Path) -> None:
14
+ """Initialize snapshot handle.
15
+
16
+ Args:
17
+ snapshot_dir: Path to the snapshot directory.
18
+ """
19
+ self._dir = snapshot_dir
20
+ self._manifest: BundleManifest | None = None
21
+ self._dataspec: DataSpec | None = None
22
+ self._drs: DRS | None = None
23
+
24
+ @property
25
+ def directory(self) -> Path:
26
+ """Get snapshot directory path."""
27
+ return self._dir
28
+
29
+ @property
30
+ def manifest(self) -> BundleManifest:
31
+ """Get bundle manifest."""
32
+ if self._manifest is None:
33
+ manifest_path = self._dir / "manifest.json"
34
+ if not manifest_path.exists():
35
+ raise SnapshotError(f"Manifest not found: {manifest_path}")
36
+ self._manifest = BundleManifest.model_validate_json(manifest_path.read_text())
37
+ return self._manifest
38
+
39
+ @property
40
+ def dataspec(self) -> DataSpec:
41
+ """Get data specification."""
42
+ if self._dataspec is None:
43
+ ds_path = self._dir / self.manifest.specFiles.dsPath
44
+ if not ds_path.exists():
45
+ raise SnapshotError(f"DataSpec not found: {ds_path}")
46
+ self._dataspec = DataSpec.model_validate_json(ds_path.read_text())
47
+ return self._dataspec
48
+
49
+ @property
50
+ def drs(self) -> DRS:
51
+ """Get data requirement specification."""
52
+ if self._drs is None:
53
+ drs_path = self._dir / self.manifest.specFiles.drsPath
54
+ if not drs_path.exists():
55
+ raise SnapshotError(f"DRS not found: {drs_path}")
56
+ self._drs = DRS.model_validate_json(drs_path.read_text())
57
+ return self._drs
58
+
59
+ @property
60
+ def data_root(self) -> Path:
61
+ """Get data root directory path."""
62
+ return self._dir / self.manifest.dataRoot
63
+
64
+ def get_raw_ds_bytes(self) -> bytes:
65
+ """Get raw bytes of ds.json for hash verification."""
66
+ return (self._dir / self.manifest.specFiles.dsPath).read_bytes()
67
+
68
+ def get_raw_drs_bytes(self) -> bytes:
69
+ """Get raw bytes of drs.json for hash verification."""
70
+ return (self._dir / self.manifest.specFiles.drsPath).read_bytes()
71
+
72
+ def get_datasets(self) -> list[dict]:
73
+ """Get list of datasets with metadata.
74
+
75
+ Returns:
76
+ List of dicts with dataset_key, resource_type, layout, item_count.
77
+ """
78
+ return [
79
+ {
80
+ "dataset_key": ds.datasetKey,
81
+ "resource_type": ds.resourceType,
82
+ "layout": ds.layout,
83
+ "item_count": len(ds.items),
84
+ }
85
+ for ds in self.dataspec.datasets
86
+ ]
87
+
88
+ def get_items(self, dataset_key: str) -> list[tuple[int, DataSpecItem]]:
89
+ """Get items for a dataset as (index, item) tuples.
90
+
91
+ Args:
92
+ dataset_key: The dataset key.
93
+
94
+ Returns:
95
+ List of (index, DataSpecItem) tuples.
96
+
97
+ Raises:
98
+ SnapshotError: If dataset not found.
99
+ """
100
+ dataset = self.dataspec.get_dataset(dataset_key)
101
+ if dataset is None:
102
+ raise SnapshotError(f"Dataset not found: {dataset_key}")
103
+ return list(enumerate(dataset.items))
104
+
105
+ def get_layout(self, dataset_key: str) -> str | None:
106
+ """Get the layout type for a dataset."""
107
+ dataset = self.dataspec.get_dataset(dataset_key)
108
+ if dataset is None:
109
+ raise SnapshotError(f"Dataset not found: {dataset_key}")
110
+ return dataset.layout
111
+
112
+ def read_object_line(self, dataset_key: str, item_index: int) -> dict:
113
+ """Read a single line from object.ndjson by index.
114
+
115
+ Args:
116
+ dataset_key: The dataset key.
117
+ item_index: The item index (0-based).
118
+
119
+ Returns:
120
+ Parsed JSON object from the line.
121
+
122
+ Raises:
123
+ SnapshotError: If dataset not found or invalid layout.
124
+ """
125
+ layout = self.get_layout(dataset_key)
126
+ if layout != "object_ndjson_lines":
127
+ raise SnapshotError(
128
+ f"Cannot read object line from layout '{layout}', expected 'object_ndjson_lines'"
129
+ )
130
+
131
+ ndjson_path = self.data_root / dataset_key / "object.ndjson"
132
+ if not ndjson_path.exists():
133
+ raise SnapshotError(f"object.ndjson not found: {ndjson_path}")
134
+
135
+ # Check for index file for faster random access
136
+ idx_path = self.data_root / dataset_key / "object.idx.u64"
137
+ if idx_path.exists():
138
+ return self._read_object_line_indexed(ndjson_path, idx_path, item_index)
139
+
140
+ # Fallback to linear scan
141
+ return self._read_object_line_linear(ndjson_path, item_index)
142
+
143
+ def _read_object_line_indexed(
144
+ self, ndjson_path: Path, idx_path: Path, item_index: int
145
+ ) -> dict:
146
+ """Read object line using index file for random access."""
147
+ import struct
148
+
149
+ idx_data = idx_path.read_bytes()
150
+ num_entries = len(idx_data) // 8
151
+
152
+ if item_index < 0 or item_index >= num_entries:
153
+ raise SnapshotError(f"Item index {item_index} out of range [0, {num_entries})")
154
+
155
+ offset = struct.unpack("<Q", idx_data[item_index * 8 : (item_index + 1) * 8])[0]
156
+
157
+ with ndjson_path.open("rb") as f:
158
+ f.seek(offset)
159
+ line = f.readline()
160
+ return json.loads(line)
161
+
162
+ def _read_object_line_linear(self, ndjson_path: Path, item_index: int) -> dict:
163
+ """Read object line via linear scan."""
164
+ with ndjson_path.open() as f:
165
+ for i, line in enumerate(f):
166
+ if i == item_index:
167
+ return json.loads(line)
168
+ raise SnapshotError(f"Item index {item_index} not found in {ndjson_path}")
169
+
170
+ def read_frame_parts(self, dataset_key: str, item_index: int) -> list[Path]:
171
+ """Get paths to parquet files for an item.
172
+
173
+ Args:
174
+ dataset_key: The dataset key.
175
+ item_index: The item index (0-based).
176
+
177
+ Returns:
178
+ List of paths to parquet files in the item directory.
179
+
180
+ Raises:
181
+ SnapshotError: If dataset not found or invalid layout.
182
+ """
183
+ layout = self.get_layout(dataset_key)
184
+ if layout != "frame_parquet_item_dirs":
185
+ raise SnapshotError(
186
+ f"Cannot read frame parts from layout '{layout}', "
187
+ f"expected 'frame_parquet_item_dirs'"
188
+ )
189
+
190
+ item_dir = self.data_root / dataset_key / "parquet" / f"item-{item_index:05d}"
191
+ if not item_dir.exists():
192
+ raise SnapshotError(f"Item directory not found: {item_dir}")
193
+
194
+ return list(item_dir.rglob("*.parquet"))
195
+
196
+ def get_item_dir(self, dataset_key: str, item_index: int) -> Path:
197
+ """Get the directory path for a parquet item.
198
+
199
+ Args:
200
+ dataset_key: The dataset key.
201
+ item_index: The item index (0-based).
202
+
203
+ Returns:
204
+ Path to the item directory.
205
+ """
206
+ return self.data_root / dataset_key / "parquet" / f"item-{item_index:05d}"
207
+
208
+ def get_ndjson_path(self, dataset_key: str) -> Path:
209
+ """Get the path to object.ndjson for a dataset.
210
+
211
+ Args:
212
+ dataset_key: The dataset key.
213
+
214
+ Returns:
215
+ Path to object.ndjson file.
216
+ """
217
+ return self.data_root / dataset_key / "object.ndjson"
@@ -0,0 +1,89 @@
1
+ """Algorithm manifest specification (FracLabAlgorithmManifestV1)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from typing import Annotated, List, Literal, Optional
7
+
8
+ from pydantic import BaseModel, ConfigDict, StringConstraints, model_validator
9
+
10
+ ManifestVersion = Literal["1"]
11
+
12
+ _SEMVER_RE = re.compile(r"^\d+\.\d+\.\d+([-+][0-9A-Za-z.-]+)?$")
13
+
14
+ NonEmptyStr = Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=256)]
15
+ IdStr = Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=128)]
16
+ UrlStr = Annotated[str, StringConstraints(strip_whitespace=True, min_length=1, max_length=2048)]
17
+ EmailStr = Annotated[str, StringConstraints(strip_whitespace=True, min_length=3, max_length=320)]
18
+
19
+
20
+ class Author(BaseModel):
21
+ """Author information."""
22
+
23
+ model_config = ConfigDict(extra="ignore")
24
+
25
+ name: NonEmptyStr
26
+ email: Optional[EmailStr] = None
27
+ organization: Optional[NonEmptyStr] = None
28
+
29
+
30
+ class Compatibility(BaseModel):
31
+ """Compatibility gates."""
32
+
33
+ model_config = ConfigDict(extra="ignore")
34
+
35
+ sdk: Optional[NonEmptyStr] = None
36
+ core: Optional[NonEmptyStr] = None
37
+
38
+ @model_validator(mode="after")
39
+ def _validate_semver_like(self) -> "Compatibility":
40
+ for label, v in (("sdk", self.sdk), ("core", self.core)):
41
+ if v is not None and not _SEMVER_RE.match(v):
42
+ raise ValueError(f"requires.{label} must be semver-like (e.g. 1.2.3), got: {v}")
43
+ return self
44
+
45
+
46
+ class FracLabAlgorithmManifestV1(BaseModel):
47
+ """Minimal but complete algorithm manifest."""
48
+
49
+ model_config = ConfigDict(extra="allow")
50
+
51
+ manifestVersion: ManifestVersion
52
+
53
+ algorithmId: IdStr
54
+ name: NonEmptyStr
55
+ summary: NonEmptyStr
56
+
57
+ notes: Optional[str] = None
58
+ tags: Optional[List[NonEmptyStr]] = None
59
+
60
+ authors: List[Author]
61
+
62
+ contractVersion: NonEmptyStr
63
+ codeVersion: NonEmptyStr
64
+
65
+ requires: Optional[Compatibility] = None
66
+
67
+ repository: Optional[UrlStr] = None
68
+ homepage: Optional[UrlStr] = None
69
+ license: Optional[NonEmptyStr] = None
70
+
71
+ @model_validator(mode="after")
72
+ def _validate_minimal(self) -> "FracLabAlgorithmManifestV1":
73
+ if len(self.authors) == 0:
74
+ raise ValueError("authors must contain at least one author")
75
+
76
+ if not _SEMVER_RE.match(self.contractVersion):
77
+ raise ValueError(
78
+ f"contractVersion must be semver-like (e.g. 1.2.3), got: {self.contractVersion}"
79
+ )
80
+
81
+ return self
82
+
83
+
84
+ __all__ = [
85
+ "ManifestVersion",
86
+ "FracLabAlgorithmManifestV1",
87
+ "Author",
88
+ "Compatibility",
89
+ ]
@@ -0,0 +1,32 @@
1
+ """IO utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+ import tempfile
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+
12
+ def atomic_write_json(path: Path, data: Any) -> None:
13
+ """Atomically write JSON to path (tmp -> replace).
14
+
15
+ Args:
16
+ path: Target file path.
17
+ data: JSON-serializable object.
18
+ """
19
+ path = path.expanduser().resolve()
20
+ path.parent.mkdir(parents=True, exist_ok=True)
21
+ content = json.dumps(data, indent=2)
22
+ with tempfile.NamedTemporaryFile(
23
+ mode="w", encoding="utf-8", dir=path.parent, delete=False, suffix=".tmp"
24
+ ) as tmp:
25
+ tmp.write(content)
26
+ tmp.flush()
27
+ os.fsync(tmp.fileno())
28
+ tmp_path = Path(tmp.name)
29
+ os.replace(tmp_path, path)
30
+
31
+
32
+ __all__ = ["atomic_write_json"]