juniper-data 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. juniper_data/__init__.py +88 -0
  2. juniper_data/__main__.py +78 -0
  3. juniper_data/api/__init__.py +10 -0
  4. juniper_data/api/app.py +111 -0
  5. juniper_data/api/middleware.py +95 -0
  6. juniper_data/api/routes/__init__.py +9 -0
  7. juniper_data/api/routes/datasets.py +414 -0
  8. juniper_data/api/routes/generators.py +125 -0
  9. juniper_data/api/routes/health.py +49 -0
  10. juniper_data/api/security.py +238 -0
  11. juniper_data/api/settings.py +109 -0
  12. juniper_data/core/__init__.py +32 -0
  13. juniper_data/core/artifacts.py +63 -0
  14. juniper_data/core/dataset_id.py +38 -0
  15. juniper_data/core/models.py +135 -0
  16. juniper_data/core/split.py +120 -0
  17. juniper_data/generators/__init__.py +15 -0
  18. juniper_data/generators/arc_agi/__init__.py +11 -0
  19. juniper_data/generators/arc_agi/generator.py +229 -0
  20. juniper_data/generators/arc_agi/params.py +56 -0
  21. juniper_data/generators/checkerboard/__init__.py +15 -0
  22. juniper_data/generators/checkerboard/generator.py +114 -0
  23. juniper_data/generators/checkerboard/params.py +32 -0
  24. juniper_data/generators/circles/__init__.py +11 -0
  25. juniper_data/generators/circles/generator.py +112 -0
  26. juniper_data/generators/circles/params.py +31 -0
  27. juniper_data/generators/csv_import/__init__.py +15 -0
  28. juniper_data/generators/csv_import/generator.py +198 -0
  29. juniper_data/generators/csv_import/params.py +48 -0
  30. juniper_data/generators/gaussian/__init__.py +11 -0
  31. juniper_data/generators/gaussian/generator.py +149 -0
  32. juniper_data/generators/gaussian/params.py +53 -0
  33. juniper_data/generators/mnist/__init__.py +11 -0
  34. juniper_data/generators/mnist/generator.py +124 -0
  35. juniper_data/generators/mnist/params.py +39 -0
  36. juniper_data/generators/spiral/__init__.py +57 -0
  37. juniper_data/generators/spiral/defaults.py +39 -0
  38. juniper_data/generators/spiral/generator.py +206 -0
  39. juniper_data/generators/spiral/params.py +148 -0
  40. juniper_data/generators/xor/__init__.py +11 -0
  41. juniper_data/generators/xor/generator.py +162 -0
  42. juniper_data/generators/xor/params.py +30 -0
  43. juniper_data/storage/__init__.py +120 -0
  44. juniper_data/storage/base.py +279 -0
  45. juniper_data/storage/cached.py +211 -0
  46. juniper_data/storage/hf_store.py +257 -0
  47. juniper_data/storage/kaggle_store.py +333 -0
  48. juniper_data/storage/local_fs.py +232 -0
  49. juniper_data/storage/memory.py +136 -0
  50. juniper_data/storage/postgres_store.py +373 -0
  51. juniper_data/storage/redis_store.py +264 -0
  52. juniper_data/tests/__init__.py +1 -0
  53. juniper_data/tests/conftest.py +68 -0
  54. juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
  55. juniper_data/tests/integration/__init__.py +1 -0
  56. juniper_data/tests/integration/test_api.py +283 -0
  57. juniper_data/tests/integration/test_e2e_workflow.py +378 -0
  58. juniper_data/tests/integration/test_lifecycle_api.py +304 -0
  59. juniper_data/tests/integration/test_security_integration.py +189 -0
  60. juniper_data/tests/integration/test_storage_workflow.py +259 -0
  61. juniper_data/tests/performance/__init__.py +1 -0
  62. juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
  63. juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
  64. juniper_data/tests/unit/__init__.py +1 -0
  65. juniper_data/tests/unit/test_api_app.py +206 -0
  66. juniper_data/tests/unit/test_api_routes.py +407 -0
  67. juniper_data/tests/unit/test_api_settings.py +100 -0
  68. juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
  69. juniper_data/tests/unit/test_artifacts.py +145 -0
  70. juniper_data/tests/unit/test_cached_store.py +423 -0
  71. juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
  72. juniper_data/tests/unit/test_circles_generator.py +256 -0
  73. juniper_data/tests/unit/test_csv_import_generator.py +345 -0
  74. juniper_data/tests/unit/test_dataset_id.py +181 -0
  75. juniper_data/tests/unit/test_gaussian_generator.py +333 -0
  76. juniper_data/tests/unit/test_hf_store.py +416 -0
  77. juniper_data/tests/unit/test_init.py +93 -0
  78. juniper_data/tests/unit/test_kaggle_store.py +469 -0
  79. juniper_data/tests/unit/test_lifecycle.py +394 -0
  80. juniper_data/tests/unit/test_main.py +127 -0
  81. juniper_data/tests/unit/test_middleware.py +79 -0
  82. juniper_data/tests/unit/test_mnist_generator.py +370 -0
  83. juniper_data/tests/unit/test_postgres_store.py +490 -0
  84. juniper_data/tests/unit/test_redis_store.py +500 -0
  85. juniper_data/tests/unit/test_security.py +281 -0
  86. juniper_data/tests/unit/test_security_boundaries.py +517 -0
  87. juniper_data/tests/unit/test_spiral_generator.py +566 -0
  88. juniper_data/tests/unit/test_split.py +245 -0
  89. juniper_data/tests/unit/test_storage.py +767 -0
  90. juniper_data/tests/unit/test_xor_generator.py +223 -0
  91. juniper_data-0.4.2.dist-info/METADATA +216 -0
  92. juniper_data-0.4.2.dist-info/RECORD +95 -0
  93. juniper_data-0.4.2.dist-info/WHEEL +5 -0
  94. juniper_data-0.4.2.dist-info/licenses/LICENSE +9 -0
  95. juniper_data-0.4.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,469 @@
1
+ """Unit tests for KaggleDatasetStore."""
2
+
3
+ import csv
4
+ from datetime import UTC, datetime
5
+ from pathlib import Path
6
+ from unittest.mock import MagicMock, patch
7
+
8
+ import numpy as np
9
+ import pytest
10
+
11
+ from juniper_data.core.models import DatasetMeta
12
+ from juniper_data.storage.memory import InMemoryDatasetStore
13
+
14
+
15
+ @pytest.fixture
16
+ def sample_meta() -> DatasetMeta:
17
+ """Create sample metadata."""
18
+ return DatasetMeta(
19
+ dataset_id="test-dataset",
20
+ generator="test",
21
+ generator_version="1.0.0",
22
+ params={"seed": 42},
23
+ n_samples=100,
24
+ n_features=2,
25
+ n_classes=2,
26
+ n_train=80,
27
+ n_test=20,
28
+ class_distribution={"0": 50, "1": 50},
29
+ created_at=datetime.now(UTC),
30
+ )
31
+
32
+
33
+ @pytest.fixture
34
+ def sample_arrays() -> dict[str, np.ndarray]:
35
+ """Create sample arrays."""
36
+ rng = np.random.default_rng(42)
37
+ return {
38
+ "X_train": rng.standard_normal((80, 2)).astype(np.float32),
39
+ "y_train": rng.standard_normal((80, 2)).astype(np.float32),
40
+ "X_test": rng.standard_normal((20, 2)).astype(np.float32),
41
+ "y_test": rng.standard_normal((20, 2)).astype(np.float32),
42
+ }
43
+
44
+
45
+ @pytest.fixture
46
+ def mock_kaggle_module():
47
+ """Create a mock kaggle module and patch it into kaggle_store."""
48
+ mock_api_class = MagicMock()
49
+ mock_api_instance = MagicMock()
50
+ mock_api_class.return_value = mock_api_instance
51
+
52
+ with patch("juniper_data.storage.kaggle_store.KAGGLE_AVAILABLE", True):
53
+ with patch("juniper_data.storage.kaggle_store.KaggleApi", mock_api_class):
54
+ yield mock_api_class, mock_api_instance
55
+
56
+
57
+ def _write_csv(path: Path, rows: list[dict]) -> None:
58
+ """Helper to write a CSV file."""
59
+ path.parent.mkdir(parents=True, exist_ok=True)
60
+ with open(path, "w", newline="", encoding="utf-8") as f:
61
+ writer = csv.DictWriter(f, fieldnames=rows[0].keys())
62
+ writer.writeheader()
63
+ writer.writerows(rows)
64
+
65
+
66
+ @pytest.mark.unit
67
+ @pytest.mark.storage
68
+ class TestKaggleDatasetStoreInit:
69
+ """Tests for KaggleDatasetStore initialization."""
70
+
71
+ def test_init_default(self, mock_kaggle_module, tmp_path) -> None:
72
+ """Initialize with default parameters."""
73
+ mock_api_class, mock_api_instance = mock_kaggle_module
74
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
75
+
76
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
77
+ mock_api_instance.authenticate.assert_called_once()
78
+ assert isinstance(store._cache_store, InMemoryDatasetStore)
79
+
80
+ def test_init_custom_cache_store(self, mock_kaggle_module, tmp_path) -> None:
81
+ """Initialize with custom cache store."""
82
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
83
+
84
+ custom_cache = InMemoryDatasetStore()
85
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle", cache_store=custom_cache)
86
+ assert store._cache_store is custom_cache
87
+
88
+ def test_init_no_auto_authenticate(self, mock_kaggle_module, tmp_path) -> None:
89
+ """Initialize without auto authentication."""
90
+ _, mock_api_instance = mock_kaggle_module
91
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
92
+
93
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle", auto_authenticate=False)
94
+ assert store._api is None
95
+
96
+ def test_init_raises_without_kaggle(self) -> None:
97
+ """Raises ImportError when kaggle package is not available."""
98
+ with patch("juniper_data.storage.kaggle_store.KAGGLE_AVAILABLE", False):
99
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
100
+
101
+ with pytest.raises(ImportError, match="Kaggle package not installed"):
102
+ KaggleDatasetStore()
103
+
104
+
105
+ @pytest.mark.unit
106
+ @pytest.mark.storage
107
+ class TestKaggleDatasetStoreDownload:
108
+ """Tests for download_dataset operation."""
109
+
110
+ def test_download_dataset(self, mock_kaggle_module, tmp_path) -> None:
111
+ """Download a new dataset."""
112
+ _, mock_api_instance = mock_kaggle_module
113
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
114
+
115
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
116
+
117
+ result = store.download_dataset("owner/dataset-name")
118
+ assert isinstance(result, Path)
119
+ mock_api_instance.dataset_download_files.assert_called_once()
120
+
121
+ def test_download_dataset_cached(self, mock_kaggle_module, tmp_path) -> None:
122
+ """Skip download when dataset directory exists."""
123
+ _, mock_api_instance = mock_kaggle_module
124
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
125
+
126
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
127
+ cached_path = tmp_path / "kaggle" / "owner_dataset-name"
128
+ cached_path.mkdir(parents=True, exist_ok=True)
129
+
130
+ result = store.download_dataset("owner/dataset-name")
131
+ assert result == cached_path
132
+ mock_api_instance.dataset_download_files.assert_not_called()
133
+
134
+ def test_download_dataset_force(self, mock_kaggle_module, tmp_path) -> None:
135
+ """Force re-download even when cached."""
136
+ _, mock_api_instance = mock_kaggle_module
137
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
138
+
139
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
140
+ cached_path = tmp_path / "kaggle" / "owner_dataset-name"
141
+ cached_path.mkdir(parents=True, exist_ok=True)
142
+
143
+ store.download_dataset("owner/dataset-name", force=True)
144
+ mock_api_instance.dataset_download_files.assert_called_once()
145
+
146
+ def test_download_dataset_not_authenticated(self, mock_kaggle_module, tmp_path) -> None:
147
+ """Raises RuntimeError when API not authenticated."""
148
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
149
+
150
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle", auto_authenticate=False)
151
+
152
+ with pytest.raises(RuntimeError, match="not authenticated"):
153
+ store.download_dataset("owner/dataset")
154
+
155
+
156
+ @pytest.mark.unit
157
+ @pytest.mark.storage
158
+ class TestKaggleDatasetStoreLoadDataset:
159
+ """Tests for load_kaggle_dataset operation."""
160
+
161
+ def test_load_csv_dataset(self, mock_kaggle_module, tmp_path) -> None:
162
+ """Load a CSV dataset from Kaggle."""
163
+ _, mock_api_instance = mock_kaggle_module
164
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
165
+
166
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
167
+
168
+ dataset_dir = tmp_path / "kaggle" / "owner_iris"
169
+ dataset_dir.mkdir(parents=True, exist_ok=True)
170
+ rows = [
171
+ {"sepal_length": "5.1", "sepal_width": "3.5", "label": "0"},
172
+ {"sepal_length": "7.0", "sepal_width": "3.2", "label": "1"},
173
+ {"sepal_length": "6.3", "sepal_width": "3.3", "label": "2"},
174
+ {"sepal_length": "5.0", "sepal_width": "3.6", "label": "0"},
175
+ {"sepal_length": "6.7", "sepal_width": "3.1", "label": "1"},
176
+ ]
177
+ _write_csv(dataset_dir / "data.csv", rows)
178
+
179
+ dataset_id, meta, arrays = store.load_kaggle_dataset("owner/iris", file_name="data.csv")
180
+
181
+ assert "kaggle-owner-iris" in dataset_id
182
+ assert meta.generator == "kaggle"
183
+ assert meta.n_samples == 5
184
+ assert meta.n_features == 2
185
+ assert meta.n_classes == 3
186
+ assert arrays["X_full"].shape == (5, 2)
187
+
188
+ def test_load_with_auto_detect_csv(self, mock_kaggle_module, tmp_path) -> None:
189
+ """Auto-detect CSV when specified file not found."""
190
+ _, mock_api_instance = mock_kaggle_module
191
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
192
+
193
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
194
+
195
+ dataset_dir = tmp_path / "kaggle" / "owner_test"
196
+ dataset_dir.mkdir(parents=True, exist_ok=True)
197
+ rows = [
198
+ {"feature": "1.0", "label": "a"},
199
+ {"feature": "2.0", "label": "b"},
200
+ ]
201
+ _write_csv(dataset_dir / "actual.csv", rows)
202
+
203
+ dataset_id, meta, arrays = store.load_kaggle_dataset("owner/test", file_name="missing.csv")
204
+ assert meta.n_samples == 2
205
+
206
+ def test_load_file_not_found(self, mock_kaggle_module, tmp_path) -> None:
207
+ """Raises FileNotFoundError when no CSV found."""
208
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
209
+
210
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
211
+ dataset_dir = tmp_path / "kaggle" / "owner_test"
212
+ dataset_dir.mkdir(parents=True, exist_ok=True)
213
+
214
+ with pytest.raises(FileNotFoundError, match="not found"):
215
+ store.load_kaggle_dataset("owner/test", file_name="missing.csv")
216
+
217
+ def test_load_empty_csv(self, mock_kaggle_module, tmp_path) -> None:
218
+ """Raises ValueError when CSV is empty."""
219
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
220
+
221
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
222
+ dataset_dir = tmp_path / "kaggle" / "owner_empty"
223
+ dataset_dir.mkdir(parents=True, exist_ok=True)
224
+ empty_csv = dataset_dir / "data.csv"
225
+ empty_csv.write_text("col1,col2,label\n")
226
+
227
+ with pytest.raises(ValueError, match="No data found"):
228
+ store.load_kaggle_dataset("owner/empty", file_name="data.csv")
229
+
230
+ def test_load_with_seed(self, mock_kaggle_module, tmp_path) -> None:
231
+ """Load with seed shuffles data."""
232
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
233
+
234
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
235
+ dataset_dir = tmp_path / "kaggle" / "owner_seed"
236
+ dataset_dir.mkdir(parents=True, exist_ok=True)
237
+ rows = [{"feature": str(i), "label": str(i % 2)} for i in range(10)]
238
+ _write_csv(dataset_dir / "data.csv", rows)
239
+
240
+ _, meta1, arrays1 = store.load_kaggle_dataset("owner/seed", file_name="data.csv", seed=42)
241
+ _, meta2, arrays2 = store.load_kaggle_dataset("owner/seed", file_name="data.csv", seed=42)
242
+
243
+ np.testing.assert_array_equal(arrays1["X_full"], arrays2["X_full"])
244
+
245
+ def test_load_with_n_samples(self, mock_kaggle_module, tmp_path) -> None:
246
+ """Load with n_samples limits data."""
247
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
248
+
249
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
250
+ dataset_dir = tmp_path / "kaggle" / "owner_limit"
251
+ dataset_dir.mkdir(parents=True, exist_ok=True)
252
+ rows = [{"feature": str(i), "label": str(i % 2)} for i in range(20)]
253
+ _write_csv(dataset_dir / "data.csv", rows)
254
+
255
+ _, meta, _ = store.load_kaggle_dataset("owner/limit", file_name="data.csv", n_samples=5)
256
+ assert meta.n_samples == 5
257
+
258
+ def test_load_without_one_hot(self, mock_kaggle_module, tmp_path) -> None:
259
+ """Load without one-hot encoding."""
260
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
261
+
262
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
263
+ dataset_dir = tmp_path / "kaggle" / "owner_nohot"
264
+ dataset_dir.mkdir(parents=True, exist_ok=True)
265
+ rows = [{"feature": str(i), "label": str(i % 2)} for i in range(10)]
266
+ _write_csv(dataset_dir / "data.csv", rows)
267
+
268
+ _, _, arrays = store.load_kaggle_dataset("owner/nohot", file_name="data.csv", one_hot_labels=False)
269
+ assert arrays["y_full"].shape[1] == 1
270
+
271
+ def test_load_with_normalization(self, mock_kaggle_module, tmp_path) -> None:
272
+ """Load with feature normalization."""
273
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
274
+
275
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
276
+ dataset_dir = tmp_path / "kaggle" / "owner_norm"
277
+ dataset_dir.mkdir(parents=True, exist_ok=True)
278
+ rows = [{"feature": str(i * 10), "label": str(i % 2)} for i in range(10)]
279
+ _write_csv(dataset_dir / "data.csv", rows)
280
+
281
+ _, _, arrays = store.load_kaggle_dataset("owner/norm", file_name="data.csv", normalize_features=True)
282
+ assert arrays["X_full"].max() <= 1.0 + 1e-6
283
+ assert arrays["X_full"].min() >= 0.0 - 1e-6
284
+
285
+ def test_load_with_invalid_values(self, mock_kaggle_module, tmp_path) -> None:
286
+ """Non-numeric feature values are treated as 0.0."""
287
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
288
+
289
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
290
+ dataset_dir = tmp_path / "kaggle" / "owner_bad"
291
+ dataset_dir.mkdir(parents=True, exist_ok=True)
292
+ rows = [
293
+ {"feature": "abc", "label": "0"},
294
+ {"feature": "1.5", "label": "1"},
295
+ ]
296
+ _write_csv(dataset_dir / "data.csv", rows)
297
+
298
+ _, _, arrays = store.load_kaggle_dataset("owner/bad", file_name="data.csv")
299
+ assert arrays["X_full"][0, 0] == 0.0
300
+ assert arrays["X_full"][1, 0] == 1.5
301
+
302
+ def test_load_with_feature_columns(self, mock_kaggle_module, tmp_path) -> None:
303
+ """Load with explicit feature columns."""
304
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
305
+
306
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
307
+ dataset_dir = tmp_path / "kaggle" / "owner_cols"
308
+ dataset_dir.mkdir(parents=True, exist_ok=True)
309
+ rows = [
310
+ {"a": "1", "b": "2", "c": "3", "label": "0"},
311
+ {"a": "4", "b": "5", "c": "6", "label": "1"},
312
+ ]
313
+ _write_csv(dataset_dir / "data.csv", rows)
314
+
315
+ _, meta, arrays = store.load_kaggle_dataset("owner/cols", file_name="data.csv", feature_columns=["a", "b"])
316
+ assert meta.n_features == 2
317
+
318
+ def test_load_saves_to_cache(self, mock_kaggle_module, tmp_path) -> None:
319
+ """Load saves the result to cache store."""
320
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
321
+
322
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
323
+ dataset_dir = tmp_path / "kaggle" / "owner_cache"
324
+ dataset_dir.mkdir(parents=True, exist_ok=True)
325
+ rows = [{"feature": "1", "label": "0"}, {"feature": "2", "label": "1"}]
326
+ _write_csv(dataset_dir / "data.csv", rows)
327
+
328
+ dataset_id, _, _ = store.load_kaggle_dataset("owner/cache", file_name="data.csv")
329
+ assert store._cache_store.exists(dataset_id)
330
+
331
+
332
+ @pytest.mark.unit
333
+ @pytest.mark.storage
334
+ class TestKaggleDatasetStoreListCompetitions:
335
+ """Tests for list_competitions operation."""
336
+
337
+ def test_list_competitions(self, mock_kaggle_module, tmp_path) -> None:
338
+ """List competitions returns formatted results."""
339
+ _, mock_api_instance = mock_kaggle_module
340
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
341
+
342
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
343
+
344
+ mock_comp = MagicMock()
345
+ mock_comp.ref = "competition-1"
346
+ mock_comp.title = "Test Competition"
347
+ mock_comp.deadline = "2026-12-31"
348
+ mock_comp.category = "Getting Started"
349
+ mock_api_instance.competitions_list.return_value = [mock_comp]
350
+
351
+ result = store.list_competitions(search="test")
352
+ assert len(result) == 1
353
+ assert result[0]["ref"] == "competition-1"
354
+ assert result[0]["title"] == "Test Competition"
355
+
356
+ def test_list_competitions_not_authenticated(self, mock_kaggle_module, tmp_path) -> None:
357
+ """Raises RuntimeError when API not authenticated."""
358
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
359
+
360
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle", auto_authenticate=False)
361
+
362
+ with pytest.raises(RuntimeError, match="not authenticated"):
363
+ store.list_competitions()
364
+
365
+
366
+ @pytest.mark.unit
367
+ @pytest.mark.storage
368
+ class TestKaggleDatasetStoreListKaggleDatasets:
369
+ """Tests for list_kaggle_datasets operation."""
370
+
371
+ def test_list_kaggle_datasets(self, mock_kaggle_module, tmp_path) -> None:
372
+ """List Kaggle datasets returns formatted results."""
373
+ _, mock_api_instance = mock_kaggle_module
374
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
375
+
376
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
377
+
378
+ mock_dataset = MagicMock()
379
+ mock_dataset.ref = "owner/dataset"
380
+ mock_dataset.title = "Test Dataset"
381
+ mock_dataset.totalBytes = 1024
382
+ mock_dataset.lastUpdated = "2026-01-01"
383
+ mock_api_instance.dataset_list.return_value = [mock_dataset]
384
+
385
+ result = store.list_kaggle_datasets(search="test", page=2)
386
+ assert len(result) == 1
387
+ assert result[0]["ref"] == "owner/dataset"
388
+ mock_api_instance.dataset_list.assert_called_once_with(search="test", page=2)
389
+
390
+ def test_list_kaggle_datasets_not_authenticated(self, mock_kaggle_module, tmp_path) -> None:
391
+ """Raises RuntimeError when API not authenticated."""
392
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
393
+
394
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle", auto_authenticate=False)
395
+
396
+ with pytest.raises(RuntimeError, match="not authenticated"):
397
+ store.list_kaggle_datasets()
398
+
399
+
400
+ @pytest.mark.unit
401
+ @pytest.mark.storage
402
+ class TestKaggleDatasetStoreDelegation:
403
+ """Tests for delegated cache store operations."""
404
+
405
+ def test_save_delegates(self, mock_kaggle_module, tmp_path, sample_meta, sample_arrays) -> None:
406
+ """save delegates to cache store."""
407
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
408
+
409
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
410
+ store.save("test-1", sample_meta, sample_arrays)
411
+ assert store._cache_store.exists("test-1")
412
+
413
+ def test_get_meta_delegates(self, mock_kaggle_module, tmp_path, sample_meta, sample_arrays) -> None:
414
+ """get_meta delegates to cache store."""
415
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
416
+
417
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
418
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
419
+ result = store.get_meta("test-1")
420
+ assert result is not None
421
+
422
+ def test_get_artifact_bytes_delegates(self, mock_kaggle_module, tmp_path, sample_meta, sample_arrays) -> None:
423
+ """get_artifact_bytes delegates to cache store."""
424
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
425
+
426
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
427
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
428
+ result = store.get_artifact_bytes("test-1")
429
+ assert result is not None
430
+
431
+ def test_exists_delegates(self, mock_kaggle_module, tmp_path) -> None:
432
+ """exists delegates to cache store."""
433
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
434
+
435
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
436
+ assert store.exists("nonexistent") is False
437
+
438
+ def test_delete_delegates(self, mock_kaggle_module, tmp_path, sample_meta, sample_arrays) -> None:
439
+ """delete delegates to cache store."""
440
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
441
+
442
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
443
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
444
+ assert store.delete("test-1") is True
445
+
446
+ def test_list_datasets_delegates(self, mock_kaggle_module, tmp_path, sample_meta, sample_arrays) -> None:
447
+ """list_datasets delegates to cache store."""
448
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
449
+
450
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
451
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
452
+ assert "test-1" in store.list_datasets()
453
+
454
+ def test_update_meta_delegates(self, mock_kaggle_module, tmp_path, sample_meta, sample_arrays) -> None:
455
+ """update_meta delegates to cache store."""
456
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
457
+
458
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
459
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
460
+ assert store.update_meta("test-1", sample_meta) is True
461
+
462
+ def test_list_all_metadata_delegates(self, mock_kaggle_module, tmp_path, sample_meta, sample_arrays) -> None:
463
+ """list_all_metadata delegates to cache store."""
464
+ from juniper_data.storage.kaggle_store import KaggleDatasetStore
465
+
466
+ store = KaggleDatasetStore(download_path=tmp_path / "kaggle")
467
+ store._cache_store.save("test-1", sample_meta, sample_arrays)
468
+ result = store.list_all_metadata()
469
+ assert len(result) == 1