juniper-data 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_data/__init__.py +88 -0
- juniper_data/__main__.py +78 -0
- juniper_data/api/__init__.py +10 -0
- juniper_data/api/app.py +111 -0
- juniper_data/api/middleware.py +95 -0
- juniper_data/api/routes/__init__.py +9 -0
- juniper_data/api/routes/datasets.py +414 -0
- juniper_data/api/routes/generators.py +125 -0
- juniper_data/api/routes/health.py +49 -0
- juniper_data/api/security.py +238 -0
- juniper_data/api/settings.py +109 -0
- juniper_data/core/__init__.py +32 -0
- juniper_data/core/artifacts.py +63 -0
- juniper_data/core/dataset_id.py +38 -0
- juniper_data/core/models.py +135 -0
- juniper_data/core/split.py +120 -0
- juniper_data/generators/__init__.py +15 -0
- juniper_data/generators/arc_agi/__init__.py +11 -0
- juniper_data/generators/arc_agi/generator.py +229 -0
- juniper_data/generators/arc_agi/params.py +56 -0
- juniper_data/generators/checkerboard/__init__.py +15 -0
- juniper_data/generators/checkerboard/generator.py +114 -0
- juniper_data/generators/checkerboard/params.py +32 -0
- juniper_data/generators/circles/__init__.py +11 -0
- juniper_data/generators/circles/generator.py +112 -0
- juniper_data/generators/circles/params.py +31 -0
- juniper_data/generators/csv_import/__init__.py +15 -0
- juniper_data/generators/csv_import/generator.py +198 -0
- juniper_data/generators/csv_import/params.py +48 -0
- juniper_data/generators/gaussian/__init__.py +11 -0
- juniper_data/generators/gaussian/generator.py +149 -0
- juniper_data/generators/gaussian/params.py +53 -0
- juniper_data/generators/mnist/__init__.py +11 -0
- juniper_data/generators/mnist/generator.py +124 -0
- juniper_data/generators/mnist/params.py +39 -0
- juniper_data/generators/spiral/__init__.py +57 -0
- juniper_data/generators/spiral/defaults.py +39 -0
- juniper_data/generators/spiral/generator.py +206 -0
- juniper_data/generators/spiral/params.py +148 -0
- juniper_data/generators/xor/__init__.py +11 -0
- juniper_data/generators/xor/generator.py +162 -0
- juniper_data/generators/xor/params.py +30 -0
- juniper_data/storage/__init__.py +120 -0
- juniper_data/storage/base.py +279 -0
- juniper_data/storage/cached.py +211 -0
- juniper_data/storage/hf_store.py +257 -0
- juniper_data/storage/kaggle_store.py +333 -0
- juniper_data/storage/local_fs.py +232 -0
- juniper_data/storage/memory.py +136 -0
- juniper_data/storage/postgres_store.py +373 -0
- juniper_data/storage/redis_store.py +264 -0
- juniper_data/tests/__init__.py +1 -0
- juniper_data/tests/conftest.py +68 -0
- juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
- juniper_data/tests/integration/__init__.py +1 -0
- juniper_data/tests/integration/test_api.py +283 -0
- juniper_data/tests/integration/test_e2e_workflow.py +378 -0
- juniper_data/tests/integration/test_lifecycle_api.py +304 -0
- juniper_data/tests/integration/test_security_integration.py +189 -0
- juniper_data/tests/integration/test_storage_workflow.py +259 -0
- juniper_data/tests/performance/__init__.py +1 -0
- juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
- juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
- juniper_data/tests/unit/__init__.py +1 -0
- juniper_data/tests/unit/test_api_app.py +206 -0
- juniper_data/tests/unit/test_api_routes.py +407 -0
- juniper_data/tests/unit/test_api_settings.py +100 -0
- juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
- juniper_data/tests/unit/test_artifacts.py +145 -0
- juniper_data/tests/unit/test_cached_store.py +423 -0
- juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
- juniper_data/tests/unit/test_circles_generator.py +256 -0
- juniper_data/tests/unit/test_csv_import_generator.py +345 -0
- juniper_data/tests/unit/test_dataset_id.py +181 -0
- juniper_data/tests/unit/test_gaussian_generator.py +333 -0
- juniper_data/tests/unit/test_hf_store.py +416 -0
- juniper_data/tests/unit/test_init.py +93 -0
- juniper_data/tests/unit/test_kaggle_store.py +469 -0
- juniper_data/tests/unit/test_lifecycle.py +394 -0
- juniper_data/tests/unit/test_main.py +127 -0
- juniper_data/tests/unit/test_middleware.py +79 -0
- juniper_data/tests/unit/test_mnist_generator.py +370 -0
- juniper_data/tests/unit/test_postgres_store.py +490 -0
- juniper_data/tests/unit/test_redis_store.py +500 -0
- juniper_data/tests/unit/test_security.py +281 -0
- juniper_data/tests/unit/test_security_boundaries.py +517 -0
- juniper_data/tests/unit/test_spiral_generator.py +566 -0
- juniper_data/tests/unit/test_split.py +245 -0
- juniper_data/tests/unit/test_storage.py +767 -0
- juniper_data/tests/unit/test_xor_generator.py +223 -0
- juniper_data-0.4.2.dist-info/METADATA +216 -0
- juniper_data-0.4.2.dist-info/RECORD +95 -0
- juniper_data-0.4.2.dist-info/WHEEL +5 -0
- juniper_data-0.4.2.dist-info/licenses/LICENSE +9 -0
- juniper_data-0.4.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,767 @@
|
|
|
1
|
+
"""Unit tests for storage module."""
|
|
2
|
+
|
|
3
|
+
import contextlib
|
|
4
|
+
import io
|
|
5
|
+
import tempfile
|
|
6
|
+
from datetime import UTC, datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from juniper_data.core.models import DatasetMeta
|
|
13
|
+
from juniper_data.storage import DatasetStore, InMemoryDatasetStore, LocalFSDatasetStore
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def sample_meta() -> DatasetMeta:
|
|
18
|
+
"""Create sample dataset metadata for testing."""
|
|
19
|
+
return DatasetMeta(
|
|
20
|
+
dataset_id="test-dataset-001",
|
|
21
|
+
generator="spiral",
|
|
22
|
+
generator_version="1.0.0",
|
|
23
|
+
params={"n_spirals": 2, "n_points_per_spiral": 100, "noise": 0.1},
|
|
24
|
+
n_samples=200,
|
|
25
|
+
n_features=2,
|
|
26
|
+
n_classes=2,
|
|
27
|
+
n_train=160,
|
|
28
|
+
n_test=40,
|
|
29
|
+
class_distribution={"0": 100, "1": 100},
|
|
30
|
+
artifact_formats=["npz"],
|
|
31
|
+
created_at=datetime(2026, 1, 30, 12, 0, 0),
|
|
32
|
+
checksum="abc123",
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@pytest.fixture
|
|
37
|
+
def sample_arrays() -> dict[str, np.ndarray]:
|
|
38
|
+
"""Create sample arrays for testing."""
|
|
39
|
+
return {
|
|
40
|
+
"X_train": np.random.randn(160, 2).astype(np.float32),
|
|
41
|
+
"y_train": np.eye(2, dtype=np.float32)[np.random.randint(0, 2, 160)],
|
|
42
|
+
"X_test": np.random.randn(40, 2).astype(np.float32),
|
|
43
|
+
"y_test": np.eye(2, dtype=np.float32)[np.random.randint(0, 2, 40)],
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.fixture
|
|
48
|
+
def memory_store() -> InMemoryDatasetStore:
|
|
49
|
+
"""Create a fresh in-memory store."""
|
|
50
|
+
return InMemoryDatasetStore()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@pytest.fixture
|
|
54
|
+
def temp_dir():
|
|
55
|
+
"""Create a temporary directory for filesystem tests."""
|
|
56
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
57
|
+
yield Path(tmpdir)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@pytest.fixture
|
|
61
|
+
def fs_store(temp_dir: Path) -> LocalFSDatasetStore:
|
|
62
|
+
"""Create a local filesystem store in a temp directory."""
|
|
63
|
+
return LocalFSDatasetStore(temp_dir)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class TestInMemoryDatasetStore:
|
|
67
|
+
"""Tests for InMemoryDatasetStore."""
|
|
68
|
+
|
|
69
|
+
@pytest.mark.unit
|
|
70
|
+
def test_init_creates_empty_store(self, memory_store: InMemoryDatasetStore):
|
|
71
|
+
"""Test that initialization creates an empty store."""
|
|
72
|
+
assert memory_store.list_datasets() == []
|
|
73
|
+
|
|
74
|
+
@pytest.mark.unit
|
|
75
|
+
def test_save_and_get_meta(
|
|
76
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
77
|
+
):
|
|
78
|
+
"""Test saving and retrieving metadata."""
|
|
79
|
+
memory_store.save("ds-001", sample_meta, sample_arrays)
|
|
80
|
+
retrieved = memory_store.get_meta("ds-001")
|
|
81
|
+
|
|
82
|
+
assert retrieved is not None
|
|
83
|
+
assert retrieved.dataset_id == sample_meta.dataset_id
|
|
84
|
+
assert retrieved.generator == sample_meta.generator
|
|
85
|
+
assert retrieved.n_samples == sample_meta.n_samples
|
|
86
|
+
|
|
87
|
+
@pytest.mark.unit
|
|
88
|
+
def test_get_meta_nonexistent(self, memory_store: InMemoryDatasetStore):
|
|
89
|
+
"""Test getting metadata for nonexistent dataset returns None."""
|
|
90
|
+
assert memory_store.get_meta("nonexistent") is None
|
|
91
|
+
|
|
92
|
+
@pytest.mark.unit
|
|
93
|
+
def test_save_and_get_artifact_bytes(
|
|
94
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
95
|
+
):
|
|
96
|
+
"""Test saving and retrieving artifact bytes."""
|
|
97
|
+
memory_store.save("ds-001", sample_meta, sample_arrays)
|
|
98
|
+
artifact_bytes = memory_store.get_artifact_bytes("ds-001")
|
|
99
|
+
|
|
100
|
+
assert artifact_bytes is not None
|
|
101
|
+
assert len(artifact_bytes) > 0
|
|
102
|
+
|
|
103
|
+
loaded = np.load(io.BytesIO(artifact_bytes))
|
|
104
|
+
assert set(loaded.files) == set(sample_arrays.keys())
|
|
105
|
+
for key in sample_arrays:
|
|
106
|
+
np.testing.assert_array_almost_equal(loaded[key], sample_arrays[key])
|
|
107
|
+
|
|
108
|
+
@pytest.mark.unit
|
|
109
|
+
def test_get_artifact_bytes_nonexistent(self, memory_store: InMemoryDatasetStore):
|
|
110
|
+
"""Test getting artifact bytes for nonexistent dataset returns None."""
|
|
111
|
+
assert memory_store.get_artifact_bytes("nonexistent") is None
|
|
112
|
+
|
|
113
|
+
@pytest.mark.unit
|
|
114
|
+
def test_exists_true(
|
|
115
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
116
|
+
):
|
|
117
|
+
"""Test exists returns True for saved dataset."""
|
|
118
|
+
memory_store.save("ds-001", sample_meta, sample_arrays)
|
|
119
|
+
assert memory_store.exists("ds-001") is True
|
|
120
|
+
|
|
121
|
+
@pytest.mark.unit
|
|
122
|
+
def test_exists_false(self, memory_store: InMemoryDatasetStore):
|
|
123
|
+
"""Test exists returns False for nonexistent dataset."""
|
|
124
|
+
assert memory_store.exists("nonexistent") is False
|
|
125
|
+
|
|
126
|
+
@pytest.mark.unit
|
|
127
|
+
def test_delete_existing(
|
|
128
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
129
|
+
):
|
|
130
|
+
"""Test deleting an existing dataset returns True."""
|
|
131
|
+
memory_store.save("ds-001", sample_meta, sample_arrays)
|
|
132
|
+
deleted = memory_store.delete("ds-001")
|
|
133
|
+
assert deleted is True
|
|
134
|
+
assert memory_store.exists("ds-001") is False
|
|
135
|
+
assert memory_store.get_meta("ds-001") is None
|
|
136
|
+
|
|
137
|
+
@pytest.mark.unit
|
|
138
|
+
def test_delete_nonexistent(self, memory_store: InMemoryDatasetStore):
|
|
139
|
+
"""Test deleting a nonexistent dataset returns False."""
|
|
140
|
+
deleted = memory_store.delete("nonexistent")
|
|
141
|
+
assert deleted is False
|
|
142
|
+
|
|
143
|
+
@pytest.mark.unit
|
|
144
|
+
def test_list_datasets_empty(self, memory_store: InMemoryDatasetStore):
|
|
145
|
+
"""Test listing datasets in empty store."""
|
|
146
|
+
assert memory_store.list_datasets() == []
|
|
147
|
+
|
|
148
|
+
@pytest.mark.unit
|
|
149
|
+
def test_list_datasets_multiple(
|
|
150
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
151
|
+
):
|
|
152
|
+
"""Test listing multiple datasets."""
|
|
153
|
+
for i in range(5):
|
|
154
|
+
memory_store.save(f"ds-00{i}", sample_meta, sample_arrays)
|
|
155
|
+
|
|
156
|
+
datasets = memory_store.list_datasets()
|
|
157
|
+
assert len(datasets) == 5
|
|
158
|
+
assert datasets == sorted(datasets)
|
|
159
|
+
|
|
160
|
+
@pytest.mark.unit
|
|
161
|
+
def test_list_datasets_with_limit(
|
|
162
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
163
|
+
):
|
|
164
|
+
"""Test listing datasets with limit."""
|
|
165
|
+
for i in range(10):
|
|
166
|
+
memory_store.save(f"ds-{i:03d}", sample_meta, sample_arrays)
|
|
167
|
+
|
|
168
|
+
datasets = memory_store.list_datasets(limit=3)
|
|
169
|
+
assert len(datasets) == 3
|
|
170
|
+
|
|
171
|
+
@pytest.mark.unit
|
|
172
|
+
def test_list_datasets_with_offset(
|
|
173
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
174
|
+
):
|
|
175
|
+
"""Test listing datasets with offset."""
|
|
176
|
+
for i in range(10):
|
|
177
|
+
memory_store.save(f"ds-{i:03d}", sample_meta, sample_arrays)
|
|
178
|
+
|
|
179
|
+
datasets = memory_store.list_datasets(offset=5)
|
|
180
|
+
assert len(datasets) == 5
|
|
181
|
+
assert datasets[0] == "ds-005"
|
|
182
|
+
|
|
183
|
+
@pytest.mark.unit
|
|
184
|
+
def test_list_datasets_with_limit_and_offset(
|
|
185
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
186
|
+
):
|
|
187
|
+
"""Test listing datasets with both limit and offset."""
|
|
188
|
+
for i in range(10):
|
|
189
|
+
memory_store.save(f"ds-{i:03d}", sample_meta, sample_arrays)
|
|
190
|
+
|
|
191
|
+
datasets = memory_store.list_datasets(limit=3, offset=2)
|
|
192
|
+
assert len(datasets) == 3
|
|
193
|
+
assert datasets == ["ds-002", "ds-003", "ds-004"]
|
|
194
|
+
|
|
195
|
+
@pytest.mark.unit
|
|
196
|
+
def test_clear(
|
|
197
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
198
|
+
):
|
|
199
|
+
"""Test clearing all datasets."""
|
|
200
|
+
for i in range(5):
|
|
201
|
+
memory_store.save(f"ds-00{i}", sample_meta, sample_arrays)
|
|
202
|
+
|
|
203
|
+
assert len(memory_store.list_datasets()) == 5
|
|
204
|
+
memory_store.clear()
|
|
205
|
+
assert len(memory_store.list_datasets()) == 0
|
|
206
|
+
|
|
207
|
+
@pytest.mark.unit
|
|
208
|
+
def test_save_copies_arrays(
|
|
209
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
210
|
+
):
|
|
211
|
+
"""Test that save makes copies of arrays (not references)."""
|
|
212
|
+
memory_store.save("ds-001", sample_meta, sample_arrays)
|
|
213
|
+
|
|
214
|
+
original_value = sample_arrays["X_train"][0, 0].copy()
|
|
215
|
+
sample_arrays["X_train"][0, 0] = 999.0
|
|
216
|
+
|
|
217
|
+
artifact_bytes = memory_store.get_artifact_bytes("ds-001")
|
|
218
|
+
assert artifact_bytes is not None
|
|
219
|
+
loaded = np.load(io.BytesIO(artifact_bytes))
|
|
220
|
+
assert loaded["X_train"][0, 0] == original_value
|
|
221
|
+
|
|
222
|
+
@pytest.mark.unit
|
|
223
|
+
def test_overwrite_existing(
|
|
224
|
+
self, memory_store: InMemoryDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
225
|
+
):
|
|
226
|
+
"""Test that saving to same ID overwrites existing dataset."""
|
|
227
|
+
memory_store.save("ds-001", sample_meta, sample_arrays)
|
|
228
|
+
|
|
229
|
+
new_meta = DatasetMeta(
|
|
230
|
+
dataset_id="ds-001-updated",
|
|
231
|
+
generator="spiral",
|
|
232
|
+
generator_version="2.0.0",
|
|
233
|
+
params={},
|
|
234
|
+
n_samples=100,
|
|
235
|
+
n_features=2,
|
|
236
|
+
n_classes=2,
|
|
237
|
+
n_train=80,
|
|
238
|
+
n_test=20,
|
|
239
|
+
class_distribution={"0": 50, "1": 50},
|
|
240
|
+
created_at=datetime.now(),
|
|
241
|
+
)
|
|
242
|
+
new_arrays = {"X": np.zeros((10, 2), dtype=np.float32)}
|
|
243
|
+
|
|
244
|
+
memory_store.save("ds-001", new_meta, new_arrays)
|
|
245
|
+
|
|
246
|
+
retrieved = memory_store.get_meta("ds-001")
|
|
247
|
+
assert retrieved is not None
|
|
248
|
+
assert retrieved.generator_version == "2.0.0"
|
|
249
|
+
|
|
250
|
+
@pytest.mark.unit
|
|
251
|
+
def test_record_access_nonexistent_noop(self, memory_store: InMemoryDatasetStore):
|
|
252
|
+
"""record_access on nonexistent dataset does nothing."""
|
|
253
|
+
memory_store.record_access("nonexistent-id")
|
|
254
|
+
assert not memory_store.exists("nonexistent-id")
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
class TestLocalFSDatasetStore:
|
|
258
|
+
"""Tests for LocalFSDatasetStore."""
|
|
259
|
+
|
|
260
|
+
@pytest.mark.unit
|
|
261
|
+
def test_init_creates_directory(self, temp_dir: Path):
|
|
262
|
+
"""Test that initialization creates the base directory."""
|
|
263
|
+
subdir = temp_dir / "datasets" / "nested"
|
|
264
|
+
store = LocalFSDatasetStore(subdir)
|
|
265
|
+
assert subdir.exists()
|
|
266
|
+
assert store.base_path == subdir
|
|
267
|
+
|
|
268
|
+
@pytest.mark.unit
|
|
269
|
+
def test_save_creates_files(
|
|
270
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
271
|
+
):
|
|
272
|
+
"""Test that save creates meta and npz files."""
|
|
273
|
+
fs_store.save("ds-001", sample_meta, sample_arrays)
|
|
274
|
+
|
|
275
|
+
meta_path = fs_store.base_path / "ds-001.meta.json"
|
|
276
|
+
npz_path = fs_store.base_path / "ds-001.npz"
|
|
277
|
+
|
|
278
|
+
assert meta_path.exists()
|
|
279
|
+
assert npz_path.exists()
|
|
280
|
+
|
|
281
|
+
@pytest.mark.unit
|
|
282
|
+
def test_save_and_get_meta(
|
|
283
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
284
|
+
):
|
|
285
|
+
"""Test saving and retrieving metadata."""
|
|
286
|
+
fs_store.save("ds-001", sample_meta, sample_arrays)
|
|
287
|
+
retrieved = fs_store.get_meta("ds-001")
|
|
288
|
+
|
|
289
|
+
assert retrieved is not None
|
|
290
|
+
assert retrieved.dataset_id == sample_meta.dataset_id
|
|
291
|
+
assert retrieved.generator == sample_meta.generator
|
|
292
|
+
assert retrieved.n_samples == sample_meta.n_samples
|
|
293
|
+
assert retrieved.created_at == sample_meta.created_at
|
|
294
|
+
|
|
295
|
+
@pytest.mark.unit
|
|
296
|
+
def test_get_meta_nonexistent(self, fs_store: LocalFSDatasetStore):
|
|
297
|
+
"""Test getting metadata for nonexistent dataset returns None."""
|
|
298
|
+
assert fs_store.get_meta("nonexistent") is None
|
|
299
|
+
|
|
300
|
+
@pytest.mark.unit
|
|
301
|
+
def test_save_and_get_artifact_bytes(
|
|
302
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
303
|
+
):
|
|
304
|
+
"""Test saving and retrieving artifact bytes."""
|
|
305
|
+
fs_store.save("ds-001", sample_meta, sample_arrays)
|
|
306
|
+
artifact_bytes = fs_store.get_artifact_bytes("ds-001")
|
|
307
|
+
|
|
308
|
+
assert artifact_bytes is not None
|
|
309
|
+
assert len(artifact_bytes) > 0
|
|
310
|
+
|
|
311
|
+
loaded = np.load(io.BytesIO(artifact_bytes))
|
|
312
|
+
assert set(loaded.files) == set(sample_arrays.keys())
|
|
313
|
+
|
|
314
|
+
@pytest.mark.unit
|
|
315
|
+
def test_get_artifact_bytes_nonexistent(self, fs_store: LocalFSDatasetStore):
|
|
316
|
+
"""Test getting artifact bytes for nonexistent dataset returns None."""
|
|
317
|
+
assert fs_store.get_artifact_bytes("nonexistent") is None
|
|
318
|
+
|
|
319
|
+
@pytest.mark.unit
|
|
320
|
+
def test_exists_true(
|
|
321
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
322
|
+
):
|
|
323
|
+
"""Test exists returns True for saved dataset."""
|
|
324
|
+
fs_store.save("ds-001", sample_meta, sample_arrays)
|
|
325
|
+
assert fs_store.exists("ds-001") is True
|
|
326
|
+
|
|
327
|
+
@pytest.mark.unit
|
|
328
|
+
def test_exists_false(self, fs_store: LocalFSDatasetStore):
|
|
329
|
+
"""Test exists returns False for nonexistent dataset."""
|
|
330
|
+
assert fs_store.exists("nonexistent") is False
|
|
331
|
+
|
|
332
|
+
@pytest.mark.unit
|
|
333
|
+
def test_exists_partial_files(
|
|
334
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
335
|
+
):
|
|
336
|
+
"""Test exists returns False when only one file exists."""
|
|
337
|
+
fs_store.save("ds-001", sample_meta, sample_arrays)
|
|
338
|
+
|
|
339
|
+
(fs_store.base_path / "ds-001.npz").unlink()
|
|
340
|
+
assert fs_store.exists("ds-001") is False
|
|
341
|
+
|
|
342
|
+
@pytest.mark.unit
|
|
343
|
+
def test_delete_existing(
|
|
344
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
345
|
+
):
|
|
346
|
+
"""Test deleting an existing dataset returns True."""
|
|
347
|
+
fs_store.save("ds-001", sample_meta, sample_arrays)
|
|
348
|
+
deleted = fs_store.delete("ds-001")
|
|
349
|
+
assert deleted is True
|
|
350
|
+
assert fs_store.exists("ds-001") is False
|
|
351
|
+
|
|
352
|
+
assert not (fs_store.base_path / "ds-001.meta.json").exists()
|
|
353
|
+
assert not (fs_store.base_path / "ds-001.npz").exists()
|
|
354
|
+
|
|
355
|
+
@pytest.mark.unit
|
|
356
|
+
def test_delete_nonexistent(self, fs_store: LocalFSDatasetStore):
|
|
357
|
+
"""Test deleting a nonexistent dataset returns False."""
|
|
358
|
+
deleted = fs_store.delete("nonexistent")
|
|
359
|
+
assert deleted is False
|
|
360
|
+
|
|
361
|
+
@pytest.mark.unit
|
|
362
|
+
def test_delete_partial_files(
|
|
363
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
364
|
+
):
|
|
365
|
+
"""Test deleting when only meta file exists."""
|
|
366
|
+
fs_store.save("ds-001", sample_meta, sample_arrays)
|
|
367
|
+
|
|
368
|
+
deleted = fs_store.delete("ds-001")
|
|
369
|
+
assert deleted is True
|
|
370
|
+
assert fs_store.delete("ds-001") is False
|
|
371
|
+
assert not (fs_store.base_path / "ds-001.meta.json").exists()
|
|
372
|
+
|
|
373
|
+
@pytest.mark.unit
|
|
374
|
+
def test_list_datasets_empty(self, fs_store: LocalFSDatasetStore):
|
|
375
|
+
"""Test listing datasets in empty store."""
|
|
376
|
+
assert fs_store.list_datasets() == []
|
|
377
|
+
|
|
378
|
+
@pytest.mark.unit
|
|
379
|
+
def test_list_datasets_multiple(
|
|
380
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
381
|
+
):
|
|
382
|
+
"""Test listing multiple datasets."""
|
|
383
|
+
for i in range(5):
|
|
384
|
+
fs_store.save(f"ds-00{i}", sample_meta, sample_arrays)
|
|
385
|
+
|
|
386
|
+
datasets = fs_store.list_datasets()
|
|
387
|
+
assert len(datasets) == 5
|
|
388
|
+
assert datasets == sorted(datasets)
|
|
389
|
+
|
|
390
|
+
@pytest.mark.unit
|
|
391
|
+
def test_list_datasets_with_limit(
|
|
392
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
393
|
+
):
|
|
394
|
+
"""Test listing datasets with limit."""
|
|
395
|
+
for i in range(10):
|
|
396
|
+
fs_store.save(f"ds-{i:03d}", sample_meta, sample_arrays)
|
|
397
|
+
|
|
398
|
+
datasets = fs_store.list_datasets(limit=3)
|
|
399
|
+
assert len(datasets) == 3
|
|
400
|
+
|
|
401
|
+
@pytest.mark.unit
|
|
402
|
+
def test_list_datasets_with_offset(
|
|
403
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
404
|
+
):
|
|
405
|
+
"""Test listing datasets with offset."""
|
|
406
|
+
for i in range(10):
|
|
407
|
+
fs_store.save(f"ds-{i:03d}", sample_meta, sample_arrays)
|
|
408
|
+
|
|
409
|
+
datasets = fs_store.list_datasets(offset=5)
|
|
410
|
+
assert len(datasets) == 5
|
|
411
|
+
assert datasets[0] == "ds-005"
|
|
412
|
+
|
|
413
|
+
@pytest.mark.unit
|
|
414
|
+
def test_list_datasets_with_limit_and_offset(
|
|
415
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
416
|
+
):
|
|
417
|
+
"""Test listing datasets with both limit and offset."""
|
|
418
|
+
for i in range(10):
|
|
419
|
+
fs_store.save(f"ds-{i:03d}", sample_meta, sample_arrays)
|
|
420
|
+
|
|
421
|
+
datasets = fs_store.list_datasets(limit=3, offset=2)
|
|
422
|
+
assert len(datasets) == 3
|
|
423
|
+
assert datasets == ["ds-002", "ds-003", "ds-004"]
|
|
424
|
+
|
|
425
|
+
@pytest.mark.unit
|
|
426
|
+
def test_base_path_property(self, temp_dir: Path):
|
|
427
|
+
"""Test base_path property returns correct path."""
|
|
428
|
+
store = LocalFSDatasetStore(temp_dir)
|
|
429
|
+
assert store.base_path == temp_dir
|
|
430
|
+
|
|
431
|
+
@pytest.mark.unit
|
|
432
|
+
def test_datetime_serialization(self, fs_store: LocalFSDatasetStore, sample_arrays: dict[str, np.ndarray]):
|
|
433
|
+
"""Test that datetime is properly serialized and deserialized."""
|
|
434
|
+
specific_time = datetime(2026, 6, 15, 10, 30, 45)
|
|
435
|
+
meta = DatasetMeta(
|
|
436
|
+
dataset_id="dt-test",
|
|
437
|
+
generator="test",
|
|
438
|
+
generator_version="1.0.0",
|
|
439
|
+
params={},
|
|
440
|
+
n_samples=100,
|
|
441
|
+
n_features=2,
|
|
442
|
+
n_classes=2,
|
|
443
|
+
n_train=80,
|
|
444
|
+
n_test=20,
|
|
445
|
+
class_distribution={"0": 50, "1": 50},
|
|
446
|
+
created_at=specific_time,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
fs_store.save("dt-test", meta, sample_arrays)
|
|
450
|
+
retrieved = fs_store.get_meta("dt-test")
|
|
451
|
+
|
|
452
|
+
assert retrieved is not None
|
|
453
|
+
assert retrieved.created_at == specific_time
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
class TestDatasetStoreInterface:
|
|
457
|
+
"""Tests to verify implementations follow the abstract interface."""
|
|
458
|
+
|
|
459
|
+
@pytest.mark.unit
|
|
460
|
+
def test_memory_store_is_dataset_store(self, memory_store: InMemoryDatasetStore):
|
|
461
|
+
"""Test InMemoryDatasetStore is a DatasetStore."""
|
|
462
|
+
assert isinstance(memory_store, DatasetStore)
|
|
463
|
+
|
|
464
|
+
@pytest.mark.unit
|
|
465
|
+
def test_fs_store_is_dataset_store(self, fs_store: LocalFSDatasetStore):
|
|
466
|
+
"""Test LocalFSDatasetStore is a DatasetStore."""
|
|
467
|
+
assert isinstance(fs_store, DatasetStore)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
class TestLocalFSUpdateAndList:
|
|
471
|
+
"""Tests for LocalFSDatasetStore update_meta and list_all_metadata."""
|
|
472
|
+
|
|
473
|
+
@pytest.mark.unit
|
|
474
|
+
def test_update_meta(self, fs_store, sample_meta, sample_arrays):
|
|
475
|
+
"""Test update_meta updates metadata."""
|
|
476
|
+
fs_store.save("ds-upd", sample_meta, sample_arrays)
|
|
477
|
+
new_meta = DatasetMeta(
|
|
478
|
+
dataset_id="ds-upd-v2",
|
|
479
|
+
generator="spiral",
|
|
480
|
+
generator_version="2.0.0",
|
|
481
|
+
params={},
|
|
482
|
+
n_samples=200,
|
|
483
|
+
n_features=2,
|
|
484
|
+
n_classes=2,
|
|
485
|
+
n_train=160,
|
|
486
|
+
n_test=40,
|
|
487
|
+
class_distribution={"0": 100, "1": 100},
|
|
488
|
+
created_at=datetime(2026, 2, 1, 12, 0, 0),
|
|
489
|
+
)
|
|
490
|
+
result = fs_store.update_meta("ds-upd", new_meta)
|
|
491
|
+
assert result is True
|
|
492
|
+
retrieved = fs_store.get_meta("ds-upd")
|
|
493
|
+
assert retrieved.generator_version == "2.0.0"
|
|
494
|
+
|
|
495
|
+
@pytest.mark.unit
|
|
496
|
+
def test_update_meta_nonexistent(self, fs_store, sample_meta):
|
|
497
|
+
"""Test update_meta returns False for nonexistent dataset."""
|
|
498
|
+
assert fs_store.update_meta("nonexistent", sample_meta) is False
|
|
499
|
+
|
|
500
|
+
@pytest.mark.unit
|
|
501
|
+
def test_list_all_metadata(self, fs_store, sample_meta, sample_arrays):
|
|
502
|
+
"""Test list_all_metadata returns all stored metadata."""
|
|
503
|
+
fs_store.save("ds-a", sample_meta, sample_arrays)
|
|
504
|
+
fs_store.save("ds-b", sample_meta, sample_arrays)
|
|
505
|
+
result = fs_store.list_all_metadata()
|
|
506
|
+
assert len(result) == 2
|
|
507
|
+
|
|
508
|
+
@pytest.mark.unit
|
|
509
|
+
def test_list_all_metadata_empty(self, fs_store):
|
|
510
|
+
"""Test list_all_metadata returns empty list when no datasets."""
|
|
511
|
+
result = fs_store.list_all_metadata()
|
|
512
|
+
assert result == []
|
|
513
|
+
|
|
514
|
+
@pytest.mark.unit
|
|
515
|
+
def test_save_error_cleanup(self, fs_store, sample_meta, sample_arrays):
|
|
516
|
+
"""Test save cleans up temp files on error."""
|
|
517
|
+
from unittest.mock import patch
|
|
518
|
+
|
|
519
|
+
with patch("numpy.savez_compressed", side_effect=OSError("disk full")):
|
|
520
|
+
with pytest.raises(OSError, match="disk full"):
|
|
521
|
+
fs_store.save("ds-fail", sample_meta, sample_arrays)
|
|
522
|
+
tmp_files = list(fs_store.base_path.glob("*.tmp"))
|
|
523
|
+
assert not tmp_files
|
|
524
|
+
|
|
525
|
+
@pytest.mark.unit
|
|
526
|
+
def test_save_cleanup_oserror_suppressed(self, fs_store, sample_meta, sample_arrays):
|
|
527
|
+
"""Test that OSError during temp file cleanup is caught and logged."""
|
|
528
|
+
from unittest.mock import patch
|
|
529
|
+
|
|
530
|
+
path_cls = type(fs_store.base_path)
|
|
531
|
+
original_replace = path_cls.replace
|
|
532
|
+
original_unlink = path_cls.unlink
|
|
533
|
+
|
|
534
|
+
def failing_replace(self_path, target):
|
|
535
|
+
if str(self_path).endswith(".npz.tmp"):
|
|
536
|
+
raise OSError("Simulated disk error during replace")
|
|
537
|
+
return original_replace(self_path, target)
|
|
538
|
+
|
|
539
|
+
def failing_unlink(self_path, missing_ok=False):
|
|
540
|
+
if str(self_path).endswith(".tmp"):
|
|
541
|
+
raise OSError("Simulated permission error during cleanup")
|
|
542
|
+
return original_unlink(self_path, missing_ok=missing_ok)
|
|
543
|
+
|
|
544
|
+
with patch.object(path_cls, "replace", failing_replace):
|
|
545
|
+
with patch.object(path_cls, "unlink", failing_unlink):
|
|
546
|
+
with pytest.raises(IOError, match="Simulated disk error"):
|
|
547
|
+
fs_store.save("ds-cleanup-err", sample_meta, sample_arrays)
|
|
548
|
+
|
|
549
|
+
|
|
550
|
+
class TestLocalFSEdgeCases:
|
|
551
|
+
"""Additional edge case tests for LocalFSDatasetStore."""
|
|
552
|
+
|
|
553
|
+
@pytest.mark.unit
|
|
554
|
+
def test_json_serializer_raises_for_unknown_type(self):
|
|
555
|
+
"""Test _json_serializer raises TypeError for unknown types."""
|
|
556
|
+
from juniper_data.storage.local_fs import _json_serializer
|
|
557
|
+
|
|
558
|
+
with pytest.raises(TypeError) as exc_info:
|
|
559
|
+
_json_serializer(object())
|
|
560
|
+
|
|
561
|
+
assert "not JSON serializable" in str(exc_info.value)
|
|
562
|
+
|
|
563
|
+
@pytest.mark.unit
|
|
564
|
+
def test_get_meta_skips_datetime_conversion_for_non_string(
|
|
565
|
+
self, fs_store: LocalFSDatasetStore, sample_arrays: dict[str, np.ndarray]
|
|
566
|
+
):
|
|
567
|
+
"""Test get_meta skips datetime conversion when created_at is already parsed or not a string."""
|
|
568
|
+
import json
|
|
569
|
+
|
|
570
|
+
meta = DatasetMeta(
|
|
571
|
+
dataset_id="test-date-type",
|
|
572
|
+
generator="test",
|
|
573
|
+
generator_version="1.0.0",
|
|
574
|
+
params={},
|
|
575
|
+
n_samples=100,
|
|
576
|
+
n_features=2,
|
|
577
|
+
n_classes=2,
|
|
578
|
+
n_train=80,
|
|
579
|
+
n_test=20,
|
|
580
|
+
class_distribution={"0": 50, "1": 50},
|
|
581
|
+
created_at=datetime(2026, 1, 30, 12, 0, 0),
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
fs_store.save("test-date-type", meta, sample_arrays)
|
|
585
|
+
|
|
586
|
+
meta_path = fs_store._meta_path("test-date-type")
|
|
587
|
+
meta_dict = json.loads(meta_path.read_text())
|
|
588
|
+
assert isinstance(meta_dict["created_at"], str)
|
|
589
|
+
meta_dict["created_at"] = 1234567890
|
|
590
|
+
meta_path.write_text(json.dumps(meta_dict))
|
|
591
|
+
|
|
592
|
+
retrieved = fs_store.get_meta("test-date-type")
|
|
593
|
+
assert retrieved is not None
|
|
594
|
+
|
|
595
|
+
@pytest.mark.unit
|
|
596
|
+
def test_delete_only_npz_exists(
|
|
597
|
+
self, fs_store: LocalFSDatasetStore, sample_meta: DatasetMeta, sample_arrays: dict[str, np.ndarray]
|
|
598
|
+
):
|
|
599
|
+
"""Test delete when only NPZ file exists (meta was deleted)."""
|
|
600
|
+
fs_store.save("ds-partial-npz", sample_meta, sample_arrays)
|
|
601
|
+
|
|
602
|
+
(fs_store.base_path / "ds-partial-npz.meta.json").unlink()
|
|
603
|
+
|
|
604
|
+
result = fs_store.delete("ds-partial-npz")
|
|
605
|
+
assert result is True
|
|
606
|
+
assert not (fs_store.base_path / "ds-partial-npz.npz").exists()
|
|
607
|
+
|
|
608
|
+
@pytest.mark.unit
|
|
609
|
+
def test_get_meta_with_timezone_aware_datetime(
|
|
610
|
+
self, fs_store: LocalFSDatasetStore, sample_arrays: dict[str, np.ndarray]
|
|
611
|
+
):
|
|
612
|
+
"""Test get_meta correctly deserializes timezone-aware datetime."""
|
|
613
|
+
|
|
614
|
+
tz_aware_time = datetime(2026, 6, 15, 10, 30, 45, tzinfo=UTC)
|
|
615
|
+
meta = DatasetMeta(
|
|
616
|
+
dataset_id="tz-test",
|
|
617
|
+
generator="test",
|
|
618
|
+
generator_version="1.0.0",
|
|
619
|
+
params={},
|
|
620
|
+
n_samples=100,
|
|
621
|
+
n_features=2,
|
|
622
|
+
n_classes=2,
|
|
623
|
+
n_train=80,
|
|
624
|
+
n_test=20,
|
|
625
|
+
class_distribution={"0": 50, "1": 50},
|
|
626
|
+
created_at=tz_aware_time,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
fs_store.save("tz-test", meta, sample_arrays)
|
|
630
|
+
retrieved = fs_store.get_meta("tz-test")
|
|
631
|
+
|
|
632
|
+
assert retrieved is not None
|
|
633
|
+
assert retrieved.created_at is not None
|
|
634
|
+
assert retrieved.created_at.year == 2026
|
|
635
|
+
assert retrieved.created_at.month == 6
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
class TestDatasetStoreAbstractMethods:
|
|
639
|
+
"""Tests to ensure abstract methods are properly defined."""
|
|
640
|
+
|
|
641
|
+
@pytest.mark.unit
|
|
642
|
+
def test_cannot_instantiate_abstract_base(self):
|
|
643
|
+
"""Test that DatasetStore cannot be instantiated directly."""
|
|
644
|
+
with pytest.raises(TypeError):
|
|
645
|
+
DatasetStore()
|
|
646
|
+
|
|
647
|
+
@pytest.mark.unit
|
|
648
|
+
def test_abstract_methods_exist(self):
|
|
649
|
+
"""Test that all abstract methods are defined."""
|
|
650
|
+
import inspect
|
|
651
|
+
|
|
652
|
+
abstract_methods = [
|
|
653
|
+
name
|
|
654
|
+
for name, method in inspect.getmembers(DatasetStore, predicate=inspect.isfunction)
|
|
655
|
+
if getattr(method, "__isabstractmethod__", False)
|
|
656
|
+
]
|
|
657
|
+
|
|
658
|
+
expected_methods = ["save", "get_meta", "get_artifact_bytes", "exists", "delete", "list_datasets"]
|
|
659
|
+
for method in expected_methods:
|
|
660
|
+
assert method in abstract_methods, f"Missing abstract method: {method}"
|
|
661
|
+
|
|
662
|
+
|
|
663
|
+
class TestStorageModuleFactories:
|
|
664
|
+
"""Tests for storage module factory functions and imports."""
|
|
665
|
+
|
|
666
|
+
@pytest.mark.unit
|
|
667
|
+
def test_storage_module_exports(self):
|
|
668
|
+
"""Test that the storage module exports expected classes."""
|
|
669
|
+
from juniper_data.storage import CachedDatasetStore, DatasetStore, InMemoryDatasetStore, LocalFSDatasetStore
|
|
670
|
+
|
|
671
|
+
assert DatasetStore is not None
|
|
672
|
+
assert InMemoryDatasetStore is not None
|
|
673
|
+
assert LocalFSDatasetStore is not None
|
|
674
|
+
assert CachedDatasetStore is not None
|
|
675
|
+
|
|
676
|
+
@pytest.mark.unit
|
|
677
|
+
def test_optional_imports_are_none_or_class(self):
|
|
678
|
+
"""Test that optional store classes are None or importable."""
|
|
679
|
+
import juniper_data.storage as storage_mod
|
|
680
|
+
|
|
681
|
+
for attr in ["RedisDatasetStore", "HuggingFaceDatasetStore", "PostgresDatasetStore", "KaggleDatasetStore"]:
|
|
682
|
+
val = getattr(storage_mod, attr, None)
|
|
683
|
+
assert val is None or isinstance(val, type)
|
|
684
|
+
|
|
685
|
+
@pytest.mark.unit
|
|
686
|
+
def test_get_redis_store_raises_import_error(self):
|
|
687
|
+
"""Test get_redis_store raises ImportError when redis not installed."""
|
|
688
|
+
from juniper_data.storage import get_redis_store
|
|
689
|
+
|
|
690
|
+
with contextlib.suppress(ImportError):
|
|
691
|
+
get_redis_store()
|
|
692
|
+
|
|
693
|
+
@pytest.mark.unit
|
|
694
|
+
def test_get_hf_store_raises_import_error(self):
|
|
695
|
+
"""Test get_hf_store raises ImportError when datasets not installed."""
|
|
696
|
+
from juniper_data.storage import get_hf_store
|
|
697
|
+
|
|
698
|
+
with contextlib.suppress(ImportError):
|
|
699
|
+
get_hf_store()
|
|
700
|
+
|
|
701
|
+
@pytest.mark.unit
|
|
702
|
+
def test_get_postgres_store_raises_import_error(self):
|
|
703
|
+
"""Test get_postgres_store raises ImportError when psycopg2 not installed."""
|
|
704
|
+
from juniper_data.storage import get_postgres_store
|
|
705
|
+
|
|
706
|
+
with contextlib.suppress(ImportError):
|
|
707
|
+
get_postgres_store()
|
|
708
|
+
|
|
709
|
+
@pytest.mark.unit
|
|
710
|
+
def test_get_kaggle_store_raises_import_error(self):
|
|
711
|
+
"""Test get_kaggle_store raises ImportError when kaggle not installed."""
|
|
712
|
+
from juniper_data.storage import get_kaggle_store
|
|
713
|
+
|
|
714
|
+
with contextlib.suppress(ImportError):
|
|
715
|
+
get_kaggle_store()
|
|
716
|
+
|
|
717
|
+
@pytest.mark.unit
|
|
718
|
+
def test_optional_imports_fallback_to_none(self):
|
|
719
|
+
"""Test that optional imports fall back to None when packages are missing."""
|
|
720
|
+
import importlib
|
|
721
|
+
import sys
|
|
722
|
+
|
|
723
|
+
# from unittest.mock import MagicMock
|
|
724
|
+
# Save original modules and remove them to force ImportError
|
|
725
|
+
modules_to_block = [
|
|
726
|
+
"juniper_data.storage.redis_store",
|
|
727
|
+
"juniper_data.storage.hf_store",
|
|
728
|
+
"juniper_data.storage.postgres_store",
|
|
729
|
+
"juniper_data.storage.kaggle_store",
|
|
730
|
+
]
|
|
731
|
+
saved = {}
|
|
732
|
+
for mod in modules_to_block:
|
|
733
|
+
saved[mod] = sys.modules.pop(mod, None)
|
|
734
|
+
|
|
735
|
+
# Also remove the storage module itself so it can be reimported
|
|
736
|
+
saved["juniper_data.storage"] = sys.modules.pop("juniper_data.storage", None)
|
|
737
|
+
|
|
738
|
+
# Patch __import__ to block the optional store modules
|
|
739
|
+
original_import = __builtins__.__import__ if hasattr(__builtins__, "__import__") else __import__
|
|
740
|
+
|
|
741
|
+
def blocking_import(name, *args, **kwargs):
|
|
742
|
+
if name in modules_to_block:
|
|
743
|
+
raise ImportError(f"Mocked: {name} not installed")
|
|
744
|
+
return original_import(name, *args, **kwargs)
|
|
745
|
+
|
|
746
|
+
try:
|
|
747
|
+
with pytest.MonkeyPatch.context() as mp:
|
|
748
|
+
mp.setattr("builtins.__import__", blocking_import)
|
|
749
|
+
storage_mod = importlib.import_module("juniper_data.storage")
|
|
750
|
+
|
|
751
|
+
assert storage_mod.RedisDatasetStore is None
|
|
752
|
+
assert storage_mod.HuggingFaceDatasetStore is None
|
|
753
|
+
assert storage_mod.PostgresDatasetStore is None
|
|
754
|
+
assert storage_mod.KaggleDatasetStore is None
|
|
755
|
+
assert "RedisDatasetStore" not in storage_mod.__all__
|
|
756
|
+
assert "HuggingFaceDatasetStore" not in storage_mod.__all__
|
|
757
|
+
assert "PostgresDatasetStore" not in storage_mod.__all__
|
|
758
|
+
assert "KaggleDatasetStore" not in storage_mod.__all__
|
|
759
|
+
finally:
|
|
760
|
+
# Restore original modules
|
|
761
|
+
for mod, val in saved.items():
|
|
762
|
+
if val is not None:
|
|
763
|
+
sys.modules[mod] = val
|
|
764
|
+
else:
|
|
765
|
+
sys.modules.pop(mod, None)
|
|
766
|
+
# Force reimport to restore normal state
|
|
767
|
+
importlib.import_module("juniper_data.storage")
|