fraclab-sdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- README.md +1601 -0
- fraclab_sdk/__init__.py +34 -0
- fraclab_sdk/algorithm/__init__.py +13 -0
- fraclab_sdk/algorithm/export.py +1 -0
- fraclab_sdk/algorithm/library.py +378 -0
- fraclab_sdk/cli.py +381 -0
- fraclab_sdk/config.py +54 -0
- fraclab_sdk/devkit/__init__.py +25 -0
- fraclab_sdk/devkit/compile.py +342 -0
- fraclab_sdk/devkit/export.py +354 -0
- fraclab_sdk/devkit/validate.py +1043 -0
- fraclab_sdk/errors.py +124 -0
- fraclab_sdk/materialize/__init__.py +8 -0
- fraclab_sdk/materialize/fsops.py +125 -0
- fraclab_sdk/materialize/hash.py +28 -0
- fraclab_sdk/materialize/materializer.py +241 -0
- fraclab_sdk/models/__init__.py +52 -0
- fraclab_sdk/models/bundle_manifest.py +51 -0
- fraclab_sdk/models/dataspec.py +65 -0
- fraclab_sdk/models/drs.py +47 -0
- fraclab_sdk/models/output_contract.py +111 -0
- fraclab_sdk/models/run_output_manifest.py +119 -0
- fraclab_sdk/results/__init__.py +25 -0
- fraclab_sdk/results/preview.py +150 -0
- fraclab_sdk/results/reader.py +329 -0
- fraclab_sdk/run/__init__.py +10 -0
- fraclab_sdk/run/logs.py +42 -0
- fraclab_sdk/run/manager.py +403 -0
- fraclab_sdk/run/subprocess_runner.py +153 -0
- fraclab_sdk/runtime/__init__.py +11 -0
- fraclab_sdk/runtime/artifacts.py +303 -0
- fraclab_sdk/runtime/data_client.py +123 -0
- fraclab_sdk/runtime/runner_main.py +286 -0
- fraclab_sdk/runtime/snapshot_provider.py +1 -0
- fraclab_sdk/selection/__init__.py +11 -0
- fraclab_sdk/selection/model.py +247 -0
- fraclab_sdk/selection/validate.py +54 -0
- fraclab_sdk/snapshot/__init__.py +12 -0
- fraclab_sdk/snapshot/index.py +94 -0
- fraclab_sdk/snapshot/library.py +205 -0
- fraclab_sdk/snapshot/loader.py +217 -0
- fraclab_sdk/specs/manifest.py +89 -0
- fraclab_sdk/utils/io.py +32 -0
- fraclab_sdk-0.1.0.dist-info/METADATA +1622 -0
- fraclab_sdk-0.1.0.dist-info/RECORD +47 -0
- fraclab_sdk-0.1.0.dist-info/WHEEL +4 -0
- fraclab_sdk-0.1.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Snapshot data provider for algorithm runtime."""
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Selection management."""
|
|
2
|
+
|
|
3
|
+
from fraclab_sdk.selection.model import SelectableDataset, SelectionModel
|
|
4
|
+
from fraclab_sdk.selection.validate import ValidationError, validate_cardinality
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"SelectableDataset",
|
|
8
|
+
"SelectionModel",
|
|
9
|
+
"ValidationError",
|
|
10
|
+
"validate_cardinality",
|
|
11
|
+
]
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
"""Selection model implementation."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from fraclab_sdk.errors import DatasetKeyError
|
|
6
|
+
from fraclab_sdk.models import DRS, DataSpec, DataSpecDataset, DataSpecItem, DRSDataset
|
|
7
|
+
from fraclab_sdk.selection.validate import ValidationError, validate_cardinality
|
|
8
|
+
from fraclab_sdk.snapshot.loader import SnapshotHandle
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class SelectableDataset:
|
|
13
|
+
"""Information about a selectable dataset."""
|
|
14
|
+
|
|
15
|
+
dataset_key: str
|
|
16
|
+
cardinality: str
|
|
17
|
+
total_items: int
|
|
18
|
+
resource_type: str | None = None
|
|
19
|
+
description: str | None = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SelectionModel:
|
|
23
|
+
"""Manages selection state for creating a run.
|
|
24
|
+
|
|
25
|
+
Selection uses snapshot item indices (integers).
|
|
26
|
+
Items are automatically sorted and deduplicated.
|
|
27
|
+
build_run_ds() produces a re-indexed DataSpec (0..N-1).
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(
|
|
31
|
+
self,
|
|
32
|
+
snapshot: SnapshotHandle,
|
|
33
|
+
drs: DRS,
|
|
34
|
+
) -> None:
|
|
35
|
+
"""Initialize selection model.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
snapshot: The snapshot handle.
|
|
39
|
+
drs: The data requirement specification (from algorithm).
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
DatasetKeyError: If DRS requires a dataset not in snapshot.
|
|
43
|
+
"""
|
|
44
|
+
self._snapshot = snapshot
|
|
45
|
+
|
|
46
|
+
# If DRS is empty, infer from snapshot dataspec so users can still select data.
|
|
47
|
+
if not drs.datasets:
|
|
48
|
+
inferred = [
|
|
49
|
+
DRSDataset(
|
|
50
|
+
datasetKey=ds.datasetKey,
|
|
51
|
+
resourceType=ds.resourceType,
|
|
52
|
+
cardinality="many",
|
|
53
|
+
description=ds.layout,
|
|
54
|
+
)
|
|
55
|
+
for ds in snapshot.dataspec.datasets
|
|
56
|
+
]
|
|
57
|
+
self._drs = DRS(schemaVersion=drs.schemaVersion, datasets=inferred)
|
|
58
|
+
else:
|
|
59
|
+
self._drs = drs
|
|
60
|
+
self._selections: dict[str, list[int]] = {}
|
|
61
|
+
|
|
62
|
+
# Validate that all DRS dataset keys exist in snapshot
|
|
63
|
+
snapshot_keys = set(snapshot.dataspec.get_dataset_keys())
|
|
64
|
+
for drs_dataset in self._drs.datasets:
|
|
65
|
+
if drs_dataset.datasetKey not in snapshot_keys:
|
|
66
|
+
raise DatasetKeyError(
|
|
67
|
+
dataset_key=drs_dataset.datasetKey,
|
|
68
|
+
available_keys=list(snapshot_keys),
|
|
69
|
+
)
|
|
70
|
+
# Initialize empty selection
|
|
71
|
+
self._selections[drs_dataset.datasetKey] = []
|
|
72
|
+
|
|
73
|
+
@classmethod
|
|
74
|
+
def from_snapshot_and_drs(
|
|
75
|
+
cls,
|
|
76
|
+
snapshot: SnapshotHandle,
|
|
77
|
+
drs: DRS,
|
|
78
|
+
) -> "SelectionModel":
|
|
79
|
+
"""Create a SelectionModel from snapshot and DRS.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
snapshot: The snapshot handle.
|
|
83
|
+
drs: The data requirement specification.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Initialized SelectionModel.
|
|
87
|
+
"""
|
|
88
|
+
return cls(snapshot=snapshot, drs=drs)
|
|
89
|
+
|
|
90
|
+
def get_selectable_datasets(self) -> list[SelectableDataset]:
|
|
91
|
+
"""Get list of datasets that can be selected.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
List of SelectableDataset with metadata.
|
|
95
|
+
"""
|
|
96
|
+
result = []
|
|
97
|
+
for drs_dataset in self._drs.datasets:
|
|
98
|
+
snapshot_dataset = self._snapshot.dataspec.get_dataset(drs_dataset.datasetKey)
|
|
99
|
+
if snapshot_dataset:
|
|
100
|
+
result.append(
|
|
101
|
+
SelectableDataset(
|
|
102
|
+
dataset_key=drs_dataset.datasetKey,
|
|
103
|
+
cardinality=drs_dataset.cardinality,
|
|
104
|
+
total_items=len(snapshot_dataset.items),
|
|
105
|
+
resource_type=drs_dataset.resourceType,
|
|
106
|
+
description=drs_dataset.description,
|
|
107
|
+
)
|
|
108
|
+
)
|
|
109
|
+
return result
|
|
110
|
+
|
|
111
|
+
def set_selected(self, dataset_key: str, item_indices: list[int]) -> None:
|
|
112
|
+
"""Set selected items for a dataset.
|
|
113
|
+
|
|
114
|
+
Items are automatically sorted (ascending) and deduplicated.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
dataset_key: The dataset key.
|
|
118
|
+
item_indices: List of item indices to select.
|
|
119
|
+
|
|
120
|
+
Raises:
|
|
121
|
+
DatasetKeyError: If dataset_key is not in the selection.
|
|
122
|
+
"""
|
|
123
|
+
if dataset_key not in self._selections:
|
|
124
|
+
raise DatasetKeyError(
|
|
125
|
+
dataset_key=dataset_key,
|
|
126
|
+
available_keys=list(self._selections.keys()),
|
|
127
|
+
)
|
|
128
|
+
# Sort and deduplicate
|
|
129
|
+
self._selections[dataset_key] = sorted(set(item_indices))
|
|
130
|
+
|
|
131
|
+
def get_selected(self, dataset_key: str) -> list[int]:
|
|
132
|
+
"""Get selected item indices for a dataset.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Sorted list of selected item indices.
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
DatasetKeyError: If dataset_key is not in the selection.
|
|
139
|
+
"""
|
|
140
|
+
if dataset_key not in self._selections:
|
|
141
|
+
raise DatasetKeyError(
|
|
142
|
+
dataset_key=dataset_key,
|
|
143
|
+
available_keys=list(self._selections.keys()),
|
|
144
|
+
)
|
|
145
|
+
return self._selections[dataset_key]
|
|
146
|
+
|
|
147
|
+
def validate(self) -> list[ValidationError]:
|
|
148
|
+
"""Validate all selections against cardinality constraints.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
List of validation errors (empty if all valid).
|
|
152
|
+
"""
|
|
153
|
+
errors = []
|
|
154
|
+
for drs_dataset in self._drs.datasets:
|
|
155
|
+
selected = self._selections.get(drs_dataset.datasetKey, [])
|
|
156
|
+
error = validate_cardinality(
|
|
157
|
+
dataset_key=drs_dataset.datasetKey,
|
|
158
|
+
cardinality=drs_dataset.cardinality,
|
|
159
|
+
selected_count=len(selected),
|
|
160
|
+
)
|
|
161
|
+
if error:
|
|
162
|
+
errors.append(error)
|
|
163
|
+
return errors
|
|
164
|
+
|
|
165
|
+
def is_valid(self) -> bool:
|
|
166
|
+
"""Check if current selection is valid.
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
True if all selections satisfy cardinality constraints.
|
|
170
|
+
"""
|
|
171
|
+
return len(self.validate()) == 0
|
|
172
|
+
|
|
173
|
+
def _infer_layout(self, dataset_key: str) -> str | None:
|
|
174
|
+
"""Infer layout from dataspec, manifest, or on-disk data."""
|
|
175
|
+
ds = self._snapshot.dataspec.get_dataset(dataset_key)
|
|
176
|
+
if ds and ds.layout:
|
|
177
|
+
return ds.layout
|
|
178
|
+
|
|
179
|
+
manifest_ds = self._snapshot.manifest.datasets.get(dataset_key)
|
|
180
|
+
if manifest_ds and getattr(manifest_ds, "layout", None):
|
|
181
|
+
return manifest_ds.layout
|
|
182
|
+
|
|
183
|
+
data_root = self._snapshot.manifest.dataRoot or "data"
|
|
184
|
+
base = self._snapshot.directory / data_root / dataset_key
|
|
185
|
+
if (base / "object.ndjson").exists():
|
|
186
|
+
return "object_ndjson_lines"
|
|
187
|
+
if (base / "parquet").exists():
|
|
188
|
+
return "frame_parquet_item_dirs"
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
def build_run_ds(self) -> DataSpec:
|
|
192
|
+
"""Build a run DataSpec from current selection.
|
|
193
|
+
|
|
194
|
+
Selected items are re-indexed to 0..N-1 (compact indices).
|
|
195
|
+
Each item includes sourceItemIndex for traceability.
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
DataSpec with selected items, re-indexed.
|
|
199
|
+
"""
|
|
200
|
+
datasets = []
|
|
201
|
+
|
|
202
|
+
for drs_dataset in self._drs.datasets:
|
|
203
|
+
snapshot_dataset = self._snapshot.dataspec.get_dataset(drs_dataset.datasetKey)
|
|
204
|
+
if not snapshot_dataset:
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
selected_indices = self._selections.get(drs_dataset.datasetKey, [])
|
|
208
|
+
|
|
209
|
+
# Build re-indexed items
|
|
210
|
+
items = []
|
|
211
|
+
for snapshot_index in selected_indices:
|
|
212
|
+
if 0 <= snapshot_index < len(snapshot_dataset.items):
|
|
213
|
+
original_item = snapshot_dataset.items[snapshot_index]
|
|
214
|
+
# Create new item with sourceItemIndex for traceability
|
|
215
|
+
new_item = DataSpecItem(
|
|
216
|
+
owner=original_item.owner,
|
|
217
|
+
resolutionParams=original_item.resolutionParams,
|
|
218
|
+
range=original_item.range,
|
|
219
|
+
sourceItemIndex=snapshot_index,
|
|
220
|
+
)
|
|
221
|
+
items.append(new_item)
|
|
222
|
+
|
|
223
|
+
datasets.append(
|
|
224
|
+
DataSpecDataset(
|
|
225
|
+
datasetKey=snapshot_dataset.datasetKey,
|
|
226
|
+
resourceType=snapshot_dataset.resourceType,
|
|
227
|
+
layout=self._infer_layout(snapshot_dataset.datasetKey),
|
|
228
|
+
items=items,
|
|
229
|
+
)
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
return DataSpec(
|
|
233
|
+
schemaVersion=self._snapshot.dataspec.schemaVersion,
|
|
234
|
+
datasets=datasets,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
def get_selection_mapping(self, dataset_key: str) -> list[tuple[int, int]]:
|
|
238
|
+
"""Get mapping from run index to snapshot index.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
dataset_key: The dataset key.
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
List of (run_index, snapshot_index) tuples.
|
|
245
|
+
"""
|
|
246
|
+
selected = self.get_selected(dataset_key)
|
|
247
|
+
return list(enumerate(selected))
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Selection validation implementation."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@dataclass
|
|
7
|
+
class ValidationError:
|
|
8
|
+
"""Represents a validation error."""
|
|
9
|
+
|
|
10
|
+
dataset_key: str
|
|
11
|
+
message: str
|
|
12
|
+
error_type: str # "cardinality", "missing_key", etc.
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def validate_cardinality(
|
|
16
|
+
dataset_key: str,
|
|
17
|
+
cardinality: str,
|
|
18
|
+
selected_count: int,
|
|
19
|
+
) -> ValidationError | None:
|
|
20
|
+
"""Validate selection against cardinality constraint.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
dataset_key: The dataset key.
|
|
24
|
+
cardinality: The cardinality constraint ("one", "many", "zeroOrMany").
|
|
25
|
+
selected_count: Number of selected items.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
ValidationError if invalid, None if valid.
|
|
29
|
+
"""
|
|
30
|
+
if cardinality == "one":
|
|
31
|
+
if selected_count != 1:
|
|
32
|
+
return ValidationError(
|
|
33
|
+
dataset_key=dataset_key,
|
|
34
|
+
message=f"Cardinality 'one' requires exactly 1 item, got {selected_count}",
|
|
35
|
+
error_type="cardinality",
|
|
36
|
+
)
|
|
37
|
+
elif cardinality == "many":
|
|
38
|
+
if selected_count < 1:
|
|
39
|
+
return ValidationError(
|
|
40
|
+
dataset_key=dataset_key,
|
|
41
|
+
message=f"Cardinality 'many' requires at least 1 item, got {selected_count}",
|
|
42
|
+
error_type="cardinality",
|
|
43
|
+
)
|
|
44
|
+
elif cardinality == "zeroOrMany":
|
|
45
|
+
# Always valid
|
|
46
|
+
pass
|
|
47
|
+
else:
|
|
48
|
+
return ValidationError(
|
|
49
|
+
dataset_key=dataset_key,
|
|
50
|
+
message=f"Unknown cardinality: {cardinality}",
|
|
51
|
+
error_type="unknown_cardinality",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
return None
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Snapshot management."""
|
|
2
|
+
|
|
3
|
+
from fraclab_sdk.snapshot.index import SnapshotIndex, SnapshotMeta
|
|
4
|
+
from fraclab_sdk.snapshot.library import SnapshotLibrary
|
|
5
|
+
from fraclab_sdk.snapshot.loader import SnapshotHandle
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"SnapshotIndex",
|
|
9
|
+
"SnapshotLibrary",
|
|
10
|
+
"SnapshotHandle",
|
|
11
|
+
"SnapshotMeta",
|
|
12
|
+
]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Snapshot index management."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass, field
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from fraclab_sdk.utils.io import atomic_write_json
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class SnapshotMeta:
|
|
13
|
+
"""Metadata for an indexed snapshot."""
|
|
14
|
+
|
|
15
|
+
snapshot_id: str
|
|
16
|
+
bundle_id: str
|
|
17
|
+
created_at: str
|
|
18
|
+
description: str | None = None
|
|
19
|
+
imported_at: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class SnapshotIndex:
|
|
23
|
+
"""Manages the snapshot index file."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, snapshots_dir: Path) -> None:
|
|
26
|
+
"""Initialize snapshot index.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
snapshots_dir: Directory containing snapshots.
|
|
30
|
+
"""
|
|
31
|
+
self._snapshots_dir = snapshots_dir
|
|
32
|
+
self._index_path = snapshots_dir / "index.json"
|
|
33
|
+
|
|
34
|
+
def _load(self) -> dict[str, dict]:
|
|
35
|
+
"""Load index from disk."""
|
|
36
|
+
if not self._index_path.exists():
|
|
37
|
+
return {}
|
|
38
|
+
return json.loads(self._index_path.read_text())
|
|
39
|
+
|
|
40
|
+
def _save(self, data: dict[str, dict]) -> None:
|
|
41
|
+
"""Save index to disk."""
|
|
42
|
+
self._snapshots_dir.mkdir(parents=True, exist_ok=True)
|
|
43
|
+
atomic_write_json(self._index_path, data)
|
|
44
|
+
|
|
45
|
+
def add(self, meta: SnapshotMeta) -> None:
|
|
46
|
+
"""Add a snapshot to the index."""
|
|
47
|
+
data = self._load()
|
|
48
|
+
data[meta.snapshot_id] = {
|
|
49
|
+
"snapshot_id": meta.snapshot_id,
|
|
50
|
+
"bundle_id": meta.bundle_id,
|
|
51
|
+
"created_at": meta.created_at,
|
|
52
|
+
"description": meta.description,
|
|
53
|
+
"imported_at": meta.imported_at,
|
|
54
|
+
}
|
|
55
|
+
self._save(data)
|
|
56
|
+
|
|
57
|
+
def remove(self, snapshot_id: str) -> None:
|
|
58
|
+
"""Remove a snapshot from the index."""
|
|
59
|
+
data = self._load()
|
|
60
|
+
if snapshot_id in data:
|
|
61
|
+
del data[snapshot_id]
|
|
62
|
+
self._save(data)
|
|
63
|
+
|
|
64
|
+
def get(self, snapshot_id: str) -> SnapshotMeta | None:
|
|
65
|
+
"""Get snapshot metadata by ID."""
|
|
66
|
+
data = self._load()
|
|
67
|
+
if snapshot_id not in data:
|
|
68
|
+
return None
|
|
69
|
+
entry = data[snapshot_id]
|
|
70
|
+
return SnapshotMeta(
|
|
71
|
+
snapshot_id=entry["snapshot_id"],
|
|
72
|
+
bundle_id=entry["bundle_id"],
|
|
73
|
+
created_at=entry["created_at"],
|
|
74
|
+
description=entry.get("description"),
|
|
75
|
+
imported_at=entry.get("imported_at", ""),
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def list_all(self) -> list[SnapshotMeta]:
|
|
79
|
+
"""List all indexed snapshots."""
|
|
80
|
+
data = self._load()
|
|
81
|
+
return [
|
|
82
|
+
SnapshotMeta(
|
|
83
|
+
snapshot_id=entry["snapshot_id"],
|
|
84
|
+
bundle_id=entry["bundle_id"],
|
|
85
|
+
created_at=entry["created_at"],
|
|
86
|
+
description=entry.get("description"),
|
|
87
|
+
imported_at=entry.get("imported_at", ""),
|
|
88
|
+
)
|
|
89
|
+
for entry in data.values()
|
|
90
|
+
]
|
|
91
|
+
|
|
92
|
+
def contains(self, snapshot_id: str) -> bool:
|
|
93
|
+
"""Check if a snapshot is in the index."""
|
|
94
|
+
return snapshot_id in self._load()
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""Snapshot library implementation."""
|
|
2
|
+
|
|
3
|
+
import shutil
|
|
4
|
+
import zipfile
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from fraclab_sdk.config import SDKConfig
|
|
8
|
+
from fraclab_sdk.errors import HashMismatchError, PathTraversalError, SnapshotError
|
|
9
|
+
from fraclab_sdk.materialize.hash import compute_sha256
|
|
10
|
+
from fraclab_sdk.models import BundleManifest
|
|
11
|
+
from fraclab_sdk.snapshot.index import SnapshotIndex, SnapshotMeta
|
|
12
|
+
from fraclab_sdk.snapshot.loader import SnapshotHandle
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _is_safe_path(path: str) -> bool:
|
|
16
|
+
"""Check if a path is safe (no traversal attacks)."""
|
|
17
|
+
if path.startswith("/") or path.startswith("\\"):
|
|
18
|
+
return False
|
|
19
|
+
if ".." in path:
|
|
20
|
+
return False
|
|
21
|
+
return not any(c in path for c in [":", "*", "?", '"', "<", ">", "|"])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _generate_snapshot_id(manifest_bytes: bytes) -> str:
|
|
25
|
+
"""Generate snapshot ID from manifest content hash.
|
|
26
|
+
|
|
27
|
+
Uses SHA256 of manifest bytes, truncated to 16 chars for readability.
|
|
28
|
+
"""
|
|
29
|
+
full_hash = compute_sha256(manifest_bytes)
|
|
30
|
+
return full_hash[:16]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SnapshotLibrary:
|
|
34
|
+
"""Library for managing snapshots."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, config: SDKConfig | None = None) -> None:
|
|
37
|
+
"""Initialize snapshot library.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
config: SDK configuration. If None, uses default.
|
|
41
|
+
"""
|
|
42
|
+
self._config = config or SDKConfig()
|
|
43
|
+
self._index = SnapshotIndex(self._config.snapshots_dir)
|
|
44
|
+
|
|
45
|
+
def import_snapshot(self, path: Path) -> str:
|
|
46
|
+
"""Import a snapshot from a directory or zip file.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
path: Path to snapshot directory or zip file.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
The snapshot_id of the imported snapshot.
|
|
53
|
+
|
|
54
|
+
Raises:
|
|
55
|
+
SnapshotError: If import fails.
|
|
56
|
+
HashMismatchError: If hash verification fails.
|
|
57
|
+
PathTraversalError: If zip contains unsafe paths.
|
|
58
|
+
"""
|
|
59
|
+
path = path.resolve()
|
|
60
|
+
if not path.exists():
|
|
61
|
+
raise SnapshotError(f"Path does not exist: {path}")
|
|
62
|
+
|
|
63
|
+
if path.is_file() and path.suffix == ".zip":
|
|
64
|
+
return self._import_from_zip(path)
|
|
65
|
+
elif path.is_dir():
|
|
66
|
+
return self._import_from_dir(path)
|
|
67
|
+
else:
|
|
68
|
+
raise SnapshotError(f"Path must be a directory or .zip file: {path}")
|
|
69
|
+
|
|
70
|
+
def _import_from_zip(self, zip_path: Path) -> str:
|
|
71
|
+
"""Import snapshot from zip file."""
|
|
72
|
+
import tempfile
|
|
73
|
+
|
|
74
|
+
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
75
|
+
tmp_path = Path(tmp_dir)
|
|
76
|
+
|
|
77
|
+
with zipfile.ZipFile(zip_path) as zf:
|
|
78
|
+
# Security check: verify all paths are safe
|
|
79
|
+
for name in zf.namelist():
|
|
80
|
+
if not _is_safe_path(name):
|
|
81
|
+
raise PathTraversalError(name)
|
|
82
|
+
zf.extractall(tmp_path)
|
|
83
|
+
|
|
84
|
+
# Find the actual snapshot root (may be in a subdirectory)
|
|
85
|
+
snapshot_root = self._find_snapshot_root(tmp_path)
|
|
86
|
+
return self._import_from_dir(snapshot_root)
|
|
87
|
+
|
|
88
|
+
def _find_snapshot_root(self, path: Path) -> Path:
|
|
89
|
+
"""Find the snapshot root directory (contains manifest.json)."""
|
|
90
|
+
if (path / "manifest.json").exists():
|
|
91
|
+
return path
|
|
92
|
+
|
|
93
|
+
# Check one level of subdirectories
|
|
94
|
+
for subdir in path.iterdir():
|
|
95
|
+
if subdir.is_dir() and (subdir / "manifest.json").exists():
|
|
96
|
+
return subdir
|
|
97
|
+
|
|
98
|
+
raise SnapshotError(f"No manifest.json found in {path}")
|
|
99
|
+
|
|
100
|
+
def _import_from_dir(self, source_dir: Path) -> str:
|
|
101
|
+
"""Import snapshot from directory."""
|
|
102
|
+
# Validate manifest exists
|
|
103
|
+
manifest_path = source_dir / "manifest.json"
|
|
104
|
+
if not manifest_path.exists():
|
|
105
|
+
raise SnapshotError(f"manifest.json not found in {source_dir}")
|
|
106
|
+
|
|
107
|
+
# Parse manifest and get file paths
|
|
108
|
+
manifest_bytes = manifest_path.read_bytes()
|
|
109
|
+
manifest = BundleManifest.model_validate_json(manifest_bytes.decode())
|
|
110
|
+
|
|
111
|
+
ds_path = source_dir / manifest.specFiles.dsPath
|
|
112
|
+
drs_path = source_dir / manifest.specFiles.drsPath
|
|
113
|
+
data_dir = source_dir / manifest.dataRoot
|
|
114
|
+
|
|
115
|
+
# Validate required files exist
|
|
116
|
+
if not ds_path.exists():
|
|
117
|
+
raise SnapshotError(f"{manifest.specFiles.dsPath} not found in {source_dir}")
|
|
118
|
+
if not drs_path.exists():
|
|
119
|
+
raise SnapshotError(
|
|
120
|
+
f"{manifest.specFiles.drsPath} not found (REQUIRED): {drs_path}"
|
|
121
|
+
)
|
|
122
|
+
if not data_dir.exists():
|
|
123
|
+
raise SnapshotError(f"{manifest.dataRoot}/ directory not found in {source_dir}")
|
|
124
|
+
|
|
125
|
+
# Verify hashes on raw bytes
|
|
126
|
+
ds_bytes = ds_path.read_bytes()
|
|
127
|
+
ds_hash = compute_sha256(ds_bytes)
|
|
128
|
+
if ds_hash != manifest.specFiles.dsSha256:
|
|
129
|
+
raise HashMismatchError(
|
|
130
|
+
manifest.specFiles.dsPath, manifest.specFiles.dsSha256, ds_hash
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
drs_bytes = drs_path.read_bytes()
|
|
134
|
+
drs_hash = compute_sha256(drs_bytes)
|
|
135
|
+
if drs_hash != manifest.specFiles.drsSha256:
|
|
136
|
+
raise HashMismatchError(
|
|
137
|
+
manifest.specFiles.drsPath, manifest.specFiles.drsSha256, drs_hash
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Generate snapshot_id from manifest hash
|
|
141
|
+
snapshot_id = _generate_snapshot_id(manifest_bytes)
|
|
142
|
+
|
|
143
|
+
# Create target directory
|
|
144
|
+
self._config.ensure_dirs()
|
|
145
|
+
target_dir = self._config.snapshots_dir / snapshot_id
|
|
146
|
+
|
|
147
|
+
if target_dir.exists():
|
|
148
|
+
# Already imported
|
|
149
|
+
return snapshot_id
|
|
150
|
+
|
|
151
|
+
# Copy to library
|
|
152
|
+
shutil.copytree(source_dir, target_dir)
|
|
153
|
+
|
|
154
|
+
# Add to index
|
|
155
|
+
self._index.add(
|
|
156
|
+
SnapshotMeta(
|
|
157
|
+
snapshot_id=snapshot_id,
|
|
158
|
+
bundle_id=snapshot_id,
|
|
159
|
+
created_at=str(manifest.createdAtUs),
|
|
160
|
+
description=None,
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
return snapshot_id
|
|
165
|
+
|
|
166
|
+
def list_snapshots(self) -> list[SnapshotMeta]:
|
|
167
|
+
"""List all imported snapshots.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
List of snapshot metadata.
|
|
171
|
+
"""
|
|
172
|
+
return self._index.list_all()
|
|
173
|
+
|
|
174
|
+
def get_snapshot(self, snapshot_id: str) -> SnapshotHandle:
|
|
175
|
+
"""Get a handle to a snapshot.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
snapshot_id: The snapshot ID.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
SnapshotHandle for accessing snapshot contents.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
SnapshotError: If snapshot not found.
|
|
185
|
+
"""
|
|
186
|
+
snapshot_dir = self._config.snapshots_dir / snapshot_id
|
|
187
|
+
if not snapshot_dir.exists():
|
|
188
|
+
raise SnapshotError(f"Snapshot not found: {snapshot_id}")
|
|
189
|
+
return SnapshotHandle(snapshot_dir)
|
|
190
|
+
|
|
191
|
+
def delete_snapshot(self, snapshot_id: str) -> None:
|
|
192
|
+
"""Delete a snapshot from the library.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
snapshot_id: The snapshot ID to delete.
|
|
196
|
+
|
|
197
|
+
Raises:
|
|
198
|
+
SnapshotError: If snapshot not found.
|
|
199
|
+
"""
|
|
200
|
+
snapshot_dir = self._config.snapshots_dir / snapshot_id
|
|
201
|
+
if not snapshot_dir.exists():
|
|
202
|
+
raise SnapshotError(f"Snapshot not found: {snapshot_id}")
|
|
203
|
+
|
|
204
|
+
shutil.rmtree(snapshot_dir)
|
|
205
|
+
self._index.remove(snapshot_id)
|