juniper-data 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. juniper_data/__init__.py +88 -0
  2. juniper_data/__main__.py +78 -0
  3. juniper_data/api/__init__.py +10 -0
  4. juniper_data/api/app.py +111 -0
  5. juniper_data/api/middleware.py +95 -0
  6. juniper_data/api/routes/__init__.py +9 -0
  7. juniper_data/api/routes/datasets.py +414 -0
  8. juniper_data/api/routes/generators.py +125 -0
  9. juniper_data/api/routes/health.py +49 -0
  10. juniper_data/api/security.py +238 -0
  11. juniper_data/api/settings.py +109 -0
  12. juniper_data/core/__init__.py +32 -0
  13. juniper_data/core/artifacts.py +63 -0
  14. juniper_data/core/dataset_id.py +38 -0
  15. juniper_data/core/models.py +135 -0
  16. juniper_data/core/split.py +120 -0
  17. juniper_data/generators/__init__.py +15 -0
  18. juniper_data/generators/arc_agi/__init__.py +11 -0
  19. juniper_data/generators/arc_agi/generator.py +229 -0
  20. juniper_data/generators/arc_agi/params.py +56 -0
  21. juniper_data/generators/checkerboard/__init__.py +15 -0
  22. juniper_data/generators/checkerboard/generator.py +114 -0
  23. juniper_data/generators/checkerboard/params.py +32 -0
  24. juniper_data/generators/circles/__init__.py +11 -0
  25. juniper_data/generators/circles/generator.py +112 -0
  26. juniper_data/generators/circles/params.py +31 -0
  27. juniper_data/generators/csv_import/__init__.py +15 -0
  28. juniper_data/generators/csv_import/generator.py +198 -0
  29. juniper_data/generators/csv_import/params.py +48 -0
  30. juniper_data/generators/gaussian/__init__.py +11 -0
  31. juniper_data/generators/gaussian/generator.py +149 -0
  32. juniper_data/generators/gaussian/params.py +53 -0
  33. juniper_data/generators/mnist/__init__.py +11 -0
  34. juniper_data/generators/mnist/generator.py +124 -0
  35. juniper_data/generators/mnist/params.py +39 -0
  36. juniper_data/generators/spiral/__init__.py +57 -0
  37. juniper_data/generators/spiral/defaults.py +39 -0
  38. juniper_data/generators/spiral/generator.py +206 -0
  39. juniper_data/generators/spiral/params.py +148 -0
  40. juniper_data/generators/xor/__init__.py +11 -0
  41. juniper_data/generators/xor/generator.py +162 -0
  42. juniper_data/generators/xor/params.py +30 -0
  43. juniper_data/storage/__init__.py +120 -0
  44. juniper_data/storage/base.py +279 -0
  45. juniper_data/storage/cached.py +211 -0
  46. juniper_data/storage/hf_store.py +257 -0
  47. juniper_data/storage/kaggle_store.py +333 -0
  48. juniper_data/storage/local_fs.py +232 -0
  49. juniper_data/storage/memory.py +136 -0
  50. juniper_data/storage/postgres_store.py +373 -0
  51. juniper_data/storage/redis_store.py +264 -0
  52. juniper_data/tests/__init__.py +1 -0
  53. juniper_data/tests/conftest.py +68 -0
  54. juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
  55. juniper_data/tests/integration/__init__.py +1 -0
  56. juniper_data/tests/integration/test_api.py +283 -0
  57. juniper_data/tests/integration/test_e2e_workflow.py +378 -0
  58. juniper_data/tests/integration/test_lifecycle_api.py +304 -0
  59. juniper_data/tests/integration/test_security_integration.py +189 -0
  60. juniper_data/tests/integration/test_storage_workflow.py +259 -0
  61. juniper_data/tests/performance/__init__.py +1 -0
  62. juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
  63. juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
  64. juniper_data/tests/unit/__init__.py +1 -0
  65. juniper_data/tests/unit/test_api_app.py +206 -0
  66. juniper_data/tests/unit/test_api_routes.py +407 -0
  67. juniper_data/tests/unit/test_api_settings.py +100 -0
  68. juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
  69. juniper_data/tests/unit/test_artifacts.py +145 -0
  70. juniper_data/tests/unit/test_cached_store.py +423 -0
  71. juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
  72. juniper_data/tests/unit/test_circles_generator.py +256 -0
  73. juniper_data/tests/unit/test_csv_import_generator.py +345 -0
  74. juniper_data/tests/unit/test_dataset_id.py +181 -0
  75. juniper_data/tests/unit/test_gaussian_generator.py +333 -0
  76. juniper_data/tests/unit/test_hf_store.py +416 -0
  77. juniper_data/tests/unit/test_init.py +93 -0
  78. juniper_data/tests/unit/test_kaggle_store.py +469 -0
  79. juniper_data/tests/unit/test_lifecycle.py +394 -0
  80. juniper_data/tests/unit/test_main.py +127 -0
  81. juniper_data/tests/unit/test_middleware.py +79 -0
  82. juniper_data/tests/unit/test_mnist_generator.py +370 -0
  83. juniper_data/tests/unit/test_postgres_store.py +490 -0
  84. juniper_data/tests/unit/test_redis_store.py +500 -0
  85. juniper_data/tests/unit/test_security.py +281 -0
  86. juniper_data/tests/unit/test_security_boundaries.py +517 -0
  87. juniper_data/tests/unit/test_spiral_generator.py +566 -0
  88. juniper_data/tests/unit/test_split.py +245 -0
  89. juniper_data/tests/unit/test_storage.py +767 -0
  90. juniper_data/tests/unit/test_xor_generator.py +223 -0
  91. juniper_data-0.4.2.dist-info/METADATA +216 -0
  92. juniper_data-0.4.2.dist-info/RECORD +95 -0
  93. juniper_data-0.4.2.dist-info/WHEEL +5 -0
  94. juniper_data-0.4.2.dist-info/licenses/LICENSE +9 -0
  95. juniper_data-0.4.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,416 @@
1
+ """Unit tests for HuggingFaceDatasetStore."""
2
+
3
+ from datetime import UTC, datetime
4
+ from unittest.mock import MagicMock, patch
5
+
6
+ import numpy as np
7
+ import pytest
8
+
9
+ from juniper_data.core.models import DatasetMeta
10
+ from juniper_data.storage.memory import InMemoryDatasetStore
11
+
12
+
13
+ @pytest.fixture
14
+ def sample_meta() -> DatasetMeta:
15
+ """Create sample metadata."""
16
+ return DatasetMeta(
17
+ dataset_id="test-dataset",
18
+ generator="test",
19
+ generator_version="1.0.0",
20
+ params={"seed": 42},
21
+ n_samples=100,
22
+ n_features=2,
23
+ n_classes=2,
24
+ n_train=80,
25
+ n_test=20,
26
+ class_distribution={"0": 50, "1": 50},
27
+ created_at=datetime.now(UTC),
28
+ )
29
+
30
+
31
+ @pytest.fixture
32
+ def sample_arrays() -> dict[str, np.ndarray]:
33
+ """Create sample arrays."""
34
+ rng = np.random.default_rng(42)
35
+ return {
36
+ "X_train": rng.standard_normal((80, 2)).astype(np.float32),
37
+ "y_train": rng.standard_normal((80, 2)).astype(np.float32),
38
+ "X_test": rng.standard_normal((20, 2)).astype(np.float32),
39
+ "y_test": rng.standard_normal((20, 2)).astype(np.float32),
40
+ }
41
+
42
+
43
+ def _make_mock_hf_dataset(n_samples=20, n_classes=3, feature_type="tabular"):
44
+ """Create a mock HuggingFace dataset object."""
45
+ mock_ds = MagicMock()
46
+ mock_ds.column_names = ["feature1", "feature2", "label"] if feature_type == "tabular" else ["image", "label"]
47
+ mock_ds.__len__ = MagicMock(return_value=n_samples)
48
+
49
+ labels = list(range(n_classes)) * (n_samples // n_classes) + list(range(n_samples % n_classes))
50
+ labels = labels[:n_samples]
51
+ mock_ds.__getitem__ = MagicMock()
52
+
53
+ if feature_type == "tabular":
54
+
55
+ def getitem(key):
56
+ if key == "feature1":
57
+ return list(range(n_samples))
58
+ elif key == "feature2":
59
+ return list(range(n_samples, 2 * n_samples))
60
+ elif key == "label":
61
+ return labels
62
+ return []
63
+
64
+ mock_ds.__getitem__.side_effect = getitem
65
+ else:
66
+ mock_images = []
67
+ for _ in range(n_samples):
68
+ mock_img = MagicMock()
69
+ mock_img.convert.return_value = MagicMock()
70
+ np_arr = np.random.randint(0, 255, (28, 28), dtype=np.uint8)
71
+ mock_img.convert.return_value = np_arr
72
+ mock_images.append({"image": mock_img, "label": labels[_ % len(labels)]})
73
+ mock_ds.__iter__ = MagicMock(return_value=iter(mock_images))
74
+ mock_ds.__getitem__.side_effect = lambda key: (
75
+ labels if key == "label" else [m["image"] for m in mock_images] if key == "image" else []
76
+ )
77
+
78
+ mock_ds.shuffle.return_value = mock_ds
79
+ mock_ds.select.return_value = mock_ds
80
+
81
+ return mock_ds, labels
82
+
83
+
84
+ @pytest.fixture
85
+ def mock_hf_module():
86
+ """Create a mock HF datasets module and patch it into hf_store."""
87
+ mock_load = MagicMock()
88
+
89
+ with patch("juniper_data.storage.hf_store.HF_AVAILABLE", True):
90
+ with patch("juniper_data.storage.hf_store.hf_load_dataset", mock_load):
91
+ yield mock_load
92
+
93
+
94
+ @pytest.mark.unit
95
+ @pytest.mark.storage
96
+ class TestHuggingFaceDatasetStoreInit:
97
+ """Tests for HuggingFaceDatasetStore initialization."""
98
+
99
+ def test_init_default(self, mock_hf_module) -> None:
100
+ """Initialize with default parameters."""
101
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
102
+
103
+ store = HuggingFaceDatasetStore()
104
+ assert isinstance(store._cache_store, InMemoryDatasetStore)
105
+ assert store._cache_dir is None
106
+
107
+ def test_init_custom_cache_store(self, mock_hf_module) -> None:
108
+ """Initialize with custom cache store."""
109
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
110
+
111
+ custom_cache = InMemoryDatasetStore()
112
+ store = HuggingFaceDatasetStore(cache_store=custom_cache)
113
+ assert store._cache_store is custom_cache
114
+
115
+ def test_init_with_cache_dir(self, mock_hf_module) -> None:
116
+ """Initialize with cache directory."""
117
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
118
+
119
+ store = HuggingFaceDatasetStore(cache_dir="/tmp/hf_cache")
120
+ assert store._cache_dir == "/tmp/hf_cache"
121
+
122
+ def test_init_raises_without_datasets(self) -> None:
123
+ """Raises ImportError when datasets package is not available."""
124
+ with patch("juniper_data.storage.hf_store.HF_AVAILABLE", False):
125
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
126
+
127
+ with pytest.raises(ImportError, match="Hugging Face datasets package not installed"):
128
+ HuggingFaceDatasetStore()
129
+
130
+
131
+ @pytest.mark.unit
132
+ @pytest.mark.storage
133
+ class TestHuggingFaceDatasetStoreLoadDataset:
134
+ """Tests for load_hf_dataset operation."""
135
+
136
+ def test_load_tabular_dataset(self, mock_hf_module) -> None:
137
+ """Load a tabular dataset from HuggingFace."""
138
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
139
+
140
+ mock_ds, labels = _make_mock_hf_dataset(n_samples=20, n_classes=3, feature_type="tabular")
141
+ mock_hf_module.return_value = mock_ds
142
+
143
+ store = HuggingFaceDatasetStore()
144
+ dataset_id, meta, arrays = store.load_hf_dataset(
145
+ "test-dataset", feature_columns=["feature1", "feature2"], label_column="label"
146
+ )
147
+
148
+ assert "hf-test-dataset" in dataset_id
149
+ assert meta.generator == "huggingface"
150
+ assert "X_train" in arrays
151
+ assert "y_train" in arrays
152
+ assert "X_full" in arrays
153
+ assert arrays["X_full"].dtype == np.float32
154
+
155
+ def test_load_with_config_name(self, mock_hf_module) -> None:
156
+ """Load with config name included in dataset_id."""
157
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
158
+
159
+ mock_ds, _ = _make_mock_hf_dataset(n_samples=10, n_classes=2, feature_type="tabular")
160
+ mock_hf_module.return_value = mock_ds
161
+
162
+ store = HuggingFaceDatasetStore()
163
+ dataset_id, meta, arrays = store.load_hf_dataset(
164
+ "test-dataset", config_name="v2", feature_columns=["feature1", "feature2"]
165
+ )
166
+
167
+ assert "-v2-" in dataset_id
168
+
169
+ def test_load_with_seed(self, mock_hf_module) -> None:
170
+ """Load with seed triggers shuffle."""
171
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
172
+
173
+ mock_ds, _ = _make_mock_hf_dataset(n_samples=10, n_classes=2, feature_type="tabular")
174
+ mock_hf_module.return_value = mock_ds
175
+
176
+ store = HuggingFaceDatasetStore()
177
+ store.load_hf_dataset("test-dataset", seed=42, feature_columns=["feature1", "feature2"])
178
+
179
+ mock_ds.shuffle.assert_called_once_with(seed=42)
180
+
181
+ def test_load_with_n_samples(self, mock_hf_module) -> None:
182
+ """Load with n_samples limits data."""
183
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
184
+
185
+ mock_ds, _ = _make_mock_hf_dataset(n_samples=20, n_classes=2, feature_type="tabular")
186
+ mock_hf_module.return_value = mock_ds
187
+
188
+ store = HuggingFaceDatasetStore()
189
+ store.load_hf_dataset("test-dataset", n_samples=5, feature_columns=["feature1", "feature2"])
190
+
191
+ mock_ds.select.assert_called_once()
192
+
193
+ def test_load_without_one_hot(self, mock_hf_module) -> None:
194
+ """Load without one-hot encoding produces integer labels."""
195
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
196
+
197
+ mock_ds, _ = _make_mock_hf_dataset(n_samples=10, n_classes=2, feature_type="tabular")
198
+ mock_hf_module.return_value = mock_ds
199
+
200
+ store = HuggingFaceDatasetStore()
201
+ _, meta, arrays = store.load_hf_dataset(
202
+ "test-dataset", one_hot_labels=False, feature_columns=["feature1", "feature2"]
203
+ )
204
+
205
+ assert arrays["y_full"].shape[1] == 1
206
+
207
+ def test_load_with_normalization(self, mock_hf_module) -> None:
208
+ """Load with normalization scales features."""
209
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
210
+
211
+ mock_ds, _ = _make_mock_hf_dataset(n_samples=10, n_classes=2, feature_type="tabular")
212
+ mock_hf_module.return_value = mock_ds
213
+
214
+ store = HuggingFaceDatasetStore()
215
+ _, _, arrays = store.load_hf_dataset("test-dataset", normalize=True, feature_columns=["feature1", "feature2"])
216
+
217
+ assert arrays["X_full"].max() <= 1.0
218
+
219
+ def test_load_saves_to_cache(self, mock_hf_module) -> None:
220
+ """Load saves the result to cache store."""
221
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
222
+
223
+ mock_ds, _ = _make_mock_hf_dataset(n_samples=10, n_classes=2, feature_type="tabular")
224
+ mock_hf_module.return_value = mock_ds
225
+
226
+ store = HuggingFaceDatasetStore()
227
+ dataset_id, _, _ = store.load_hf_dataset("test-dataset", feature_columns=["feature1", "feature2"])
228
+
229
+ assert store._cache_store.exists(dataset_id)
230
+
231
+
232
+ @pytest.mark.unit
233
+ @pytest.mark.storage
234
+ class TestHuggingFaceDatasetStoreExtractImages:
235
+ """Tests for _extract_images operation."""
236
+
237
+ def test_extract_images_pil_like(self, mock_hf_module) -> None:
238
+ """Extract images from PIL-like objects."""
239
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
240
+
241
+ store = HuggingFaceDatasetStore()
242
+
243
+ mock_img = MagicMock()
244
+ mock_img.convert.return_value = np.random.randint(0, 255, (28, 28), dtype=np.uint8)
245
+ mock_ds = [{"image": mock_img}, {"image": mock_img}]
246
+
247
+ result = store._extract_images(mock_ds, "image", flatten=True, normalize=True)
248
+ assert result.shape == (2, 784)
249
+ assert result.max() <= 1.0
250
+
251
+ def test_extract_images_numpy_like(self, mock_hf_module) -> None:
252
+ """Extract images from numpy-like objects."""
253
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
254
+
255
+ store = HuggingFaceDatasetStore()
256
+
257
+ mock_tensor = MagicMock(spec=["numpy"])
258
+ mock_tensor.numpy.return_value = np.random.randint(0, 255, (28, 28), dtype=np.uint8)
259
+ mock_ds = [{"image": mock_tensor}, {"image": mock_tensor}]
260
+
261
+ result = store._extract_images(mock_ds, "image", flatten=False, normalize=False)
262
+ assert result.shape == (2, 28, 28)
263
+ assert result.dtype == np.float32
264
+
265
+ def test_extract_images_raw_array(self, mock_hf_module) -> None:
266
+ """Extract images from raw array-like objects."""
267
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
268
+
269
+ store = HuggingFaceDatasetStore()
270
+
271
+ raw_data = [[1, 2], [3, 4]]
272
+ mock_ds = [{"image": raw_data}, {"image": raw_data}]
273
+
274
+ result = store._extract_images(mock_ds, "image", flatten=True, normalize=True)
275
+ assert result.shape == (2, 4)
276
+
277
+
278
+ @pytest.mark.unit
279
+ @pytest.mark.storage
280
+ class TestHuggingFaceDatasetStoreExtractFeaturesLabels:
281
+ """Tests for _extract_features_labels operation."""
282
+
283
+ def test_auto_detect_feature_columns(self, mock_hf_module) -> None:
284
+ """Auto-detect feature columns excluding label and id columns."""
285
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
286
+
287
+ store = HuggingFaceDatasetStore()
288
+
289
+ mock_ds = MagicMock()
290
+ mock_ds.column_names = ["feature1", "feature2", "label", "idx"]
291
+
292
+ mock_ds.__getitem__.side_effect = lambda key: [1.0, 2.0] if key in ("feature1", "feature2") else [0, 1]
293
+
294
+ X, y, n_classes = store._extract_features_labels(
295
+ mock_ds, feature_columns=None, label_column="label", flatten=True, normalize=False, one_hot_labels=True
296
+ )
297
+ assert X.dtype == np.float32
298
+
299
+ def test_single_image_feature(self, mock_hf_module) -> None:
300
+ """Handle single image column."""
301
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
302
+
303
+ store = HuggingFaceDatasetStore()
304
+
305
+ mock_img = MagicMock()
306
+ mock_img.convert.return_value = np.random.randint(0, 255, (28, 28), dtype=np.uint8)
307
+ mock_ds = MagicMock()
308
+ mock_ds.column_names = ["image", "label"]
309
+ mock_ds.__iter__ = MagicMock(return_value=iter([{"image": mock_img}, {"image": mock_img}]))
310
+ mock_ds.__getitem__.side_effect = lambda key: [0, 1] if key == "label" else [mock_img, mock_img]
311
+
312
+ X, y, n_classes = store._extract_features_labels(
313
+ mock_ds, feature_columns=["image"], label_column="label", flatten=True, normalize=True, one_hot_labels=True
314
+ )
315
+ assert X.shape[0] == 2
316
+ assert n_classes == 2
317
+
318
+ def test_tensor_feature_columns(self, mock_hf_module) -> None:
319
+ """Handle feature columns with tensor-like values."""
320
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
321
+
322
+ store = HuggingFaceDatasetStore()
323
+
324
+ mock_tensor = MagicMock()
325
+ mock_tensor.numpy.return_value = np.array(1.0)
326
+
327
+ mock_ds = MagicMock()
328
+ mock_ds.column_names = ["feat", "label"]
329
+
330
+ mock_ds.__getitem__.side_effect = lambda key: [mock_tensor, mock_tensor] if key == "feat" else [0, 1] # type: ignore[list-item]
331
+
332
+ X, y, n_classes = store._extract_features_labels(
333
+ mock_ds, feature_columns=["feat"], label_column="label", flatten=True, normalize=False, one_hot_labels=False
334
+ )
335
+ assert y.shape[1] == 1
336
+
337
+
338
+ @pytest.mark.unit
339
+ @pytest.mark.storage
340
+ class TestHuggingFaceDatasetStoreDelegation:
341
+ """Tests for delegated cache store operations."""
342
+
343
+ def test_save_delegates(self, mock_hf_module, sample_meta, sample_arrays) -> None:
344
+ """save delegates to cache store."""
345
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
346
+
347
+ store = HuggingFaceDatasetStore()
348
+ store.save("test-1", sample_meta, sample_arrays)
349
+ assert store._cache_store.exists("test-1")
350
+
351
+ def test_get_meta_delegates(self, mock_hf_module, sample_meta, sample_arrays) -> None:
352
+ """get_meta delegates to cache store."""
353
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
354
+
355
+ store = HuggingFaceDatasetStore()
356
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
357
+
358
+ result = store.get_meta("test-1")
359
+ assert result is not None
360
+ assert result.dataset_id == sample_meta.dataset_id
361
+
362
+ def test_get_artifact_bytes_delegates(self, mock_hf_module, sample_meta, sample_arrays) -> None:
363
+ """get_artifact_bytes delegates to cache store."""
364
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
365
+
366
+ store = HuggingFaceDatasetStore()
367
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
368
+
369
+ result = store.get_artifact_bytes("test-1")
370
+ assert result is not None
371
+
372
+ def test_exists_delegates(self, mock_hf_module) -> None:
373
+ """exists delegates to cache store."""
374
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
375
+
376
+ store = HuggingFaceDatasetStore()
377
+ assert store.exists("nonexistent") is False
378
+
379
+ def test_delete_delegates(self, mock_hf_module, sample_meta, sample_arrays) -> None:
380
+ """delete delegates to cache store."""
381
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
382
+
383
+ store = HuggingFaceDatasetStore()
384
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
385
+ assert store.delete("test-1") is True
386
+ assert not store.exists("test-1")
387
+
388
+ def test_list_datasets_delegates(self, mock_hf_module, sample_meta, sample_arrays) -> None:
389
+ """list_datasets delegates to cache store."""
390
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
391
+
392
+ store = HuggingFaceDatasetStore()
393
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
394
+
395
+ result = store.list_datasets()
396
+ assert "test-1" in result
397
+
398
+ def test_update_meta_delegates(self, mock_hf_module, sample_meta, sample_arrays) -> None:
399
+ """update_meta delegates to cache store."""
400
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
401
+
402
+ store = HuggingFaceDatasetStore()
403
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
404
+
405
+ result = store.update_meta("test-1", sample_meta)
406
+ assert result is True
407
+
408
+ def test_list_all_metadata_delegates(self, mock_hf_module, sample_meta, sample_arrays) -> None:
409
+ """list_all_metadata delegates to cache store."""
410
+ from juniper_data.storage.hf_store import HuggingFaceDatasetStore
411
+
412
+ store = HuggingFaceDatasetStore()
413
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
414
+
415
+ result = store.list_all_metadata()
416
+ assert len(result) == 1
@@ -0,0 +1,93 @@
1
+ """Unit tests for juniper_data package __init__."""
2
+
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ import pytest
6
+
7
+ from juniper_data import (
8
+ __version__,
9
+ get_arc_agi_api,
10
+ get_arc_agi_api_url,
11
+ get_arc_agi_arcade,
12
+ get_arc_agi_env,
13
+ get_arc_api_key,
14
+ reload_arc_agi_env,
15
+ )
16
+
17
+
18
+ @pytest.mark.unit
19
+ class TestPackageInit:
20
+ def test_version_is_string(self) -> None:
21
+ assert isinstance(__version__, str)
22
+
23
+ def test_get_arc_agi_api_url_returns_none_when_unset(self, monkeypatch) -> None:
24
+ monkeypatch.delenv("ARC_AGI_API", raising=False)
25
+ assert get_arc_agi_api_url() is None
26
+
27
+ def test_get_arc_agi_api_url_returns_value_when_set(self, monkeypatch) -> None:
28
+ monkeypatch.setenv("ARC_AGI_API", "http://localhost:9000")
29
+ assert get_arc_agi_api_url() == "http://localhost:9000"
30
+
31
+ def test_get_arc_agi_api_delegates_to_url(self, monkeypatch) -> None:
32
+ monkeypatch.setenv("ARC_AGI_API", "http://example.com")
33
+ assert get_arc_agi_api() == "http://example.com"
34
+
35
+ def test_get_arc_agi_api_returns_none_when_unset(self, monkeypatch) -> None:
36
+ monkeypatch.delenv("ARC_AGI_API", raising=False)
37
+ assert get_arc_agi_api() is None
38
+
39
+ def test_get_arc_agi_env_returns_true_when_set(self, monkeypatch) -> None:
40
+ """get_arc_agi_env returns True when ARC_AGI_ENV is set."""
41
+ monkeypatch.setenv("ARC_AGI_ENV", "1")
42
+ assert get_arc_agi_env() is True
43
+
44
+ def test_get_arc_agi_env_calls_load_dotenv_when_unset(self, monkeypatch) -> None:
45
+ """get_arc_agi_env calls load_dotenv then returns False when ARC_AGI_ENV remains unset."""
46
+ monkeypatch.delenv("ARC_AGI_ENV", raising=False)
47
+ with patch("juniper_data.load_dotenv", return_value=True) as mock_load:
48
+ result = get_arc_agi_env()
49
+ mock_load.assert_called_once()
50
+ assert result is False
51
+
52
+ def test_reload_arc_agi_env(self) -> None:
53
+ """reload_arc_agi_env calls load_dotenv and returns its result."""
54
+ with patch("juniper_data.load_dotenv", return_value=True) as mock_load:
55
+ result = reload_arc_agi_env()
56
+ mock_load.assert_called_once()
57
+ assert result is True
58
+
59
+ def test_reload_arc_agi_env_returns_false(self) -> None:
60
+ """reload_arc_agi_env returns False when load_dotenv returns False."""
61
+ with patch("juniper_data.load_dotenv", return_value=False):
62
+ assert reload_arc_agi_env() is False
63
+
64
+ def test_get_arc_api_key_returns_none_when_unset(self, monkeypatch) -> None:
65
+ """get_arc_api_key returns None when ARC_API_KEY is not set."""
66
+ monkeypatch.delenv("ARC_API_KEY", raising=False)
67
+ assert get_arc_api_key() is None
68
+
69
+ def test_get_arc_api_key_returns_value_when_set(self, monkeypatch) -> None:
70
+ """get_arc_api_key returns the key value when set."""
71
+ monkeypatch.setenv("ARC_API_KEY", "test-key-123")
72
+ assert get_arc_api_key() == "test-key-123"
73
+
74
+ def test_get_arc_api_key_returns_none_for_empty_string(self, monkeypatch) -> None:
75
+ """get_arc_api_key returns None when ARC_API_KEY is empty string."""
76
+ monkeypatch.setenv("ARC_API_KEY", "")
77
+ assert get_arc_api_key() is None
78
+
79
+ def test_get_arc_agi_arcade_returns_arcade_instance(self, monkeypatch) -> None:
80
+ """get_arc_agi_arcade creates an Arcade instance when arc-agi is installed."""
81
+ monkeypatch.delenv("ARC_API_KEY", raising=False)
82
+ mock_arcade = MagicMock()
83
+ mock_arc_agi = MagicMock()
84
+ mock_arc_agi.Arcade.return_value = mock_arcade
85
+ with patch("juniper_data.ARC_AGI_AVAILABLE", True), patch("juniper_data.arc_agi", mock_arc_agi):
86
+ result = get_arc_agi_arcade()
87
+ assert result is mock_arcade
88
+
89
+ def test_get_arc_agi_arcade_raises_when_not_installed(self) -> None:
90
+ """get_arc_agi_arcade raises ImportError when arc-agi is not installed."""
91
+ with patch("juniper_data.ARC_AGI_AVAILABLE", False):
92
+ with pytest.raises(ImportError, match="arc-agi package not installed"):
93
+ get_arc_agi_arcade()