juniper-data 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_data/__init__.py +88 -0
- juniper_data/__main__.py +78 -0
- juniper_data/api/__init__.py +10 -0
- juniper_data/api/app.py +111 -0
- juniper_data/api/middleware.py +95 -0
- juniper_data/api/routes/__init__.py +9 -0
- juniper_data/api/routes/datasets.py +414 -0
- juniper_data/api/routes/generators.py +125 -0
- juniper_data/api/routes/health.py +49 -0
- juniper_data/api/security.py +238 -0
- juniper_data/api/settings.py +109 -0
- juniper_data/core/__init__.py +32 -0
- juniper_data/core/artifacts.py +63 -0
- juniper_data/core/dataset_id.py +38 -0
- juniper_data/core/models.py +135 -0
- juniper_data/core/split.py +120 -0
- juniper_data/generators/__init__.py +15 -0
- juniper_data/generators/arc_agi/__init__.py +11 -0
- juniper_data/generators/arc_agi/generator.py +229 -0
- juniper_data/generators/arc_agi/params.py +56 -0
- juniper_data/generators/checkerboard/__init__.py +15 -0
- juniper_data/generators/checkerboard/generator.py +114 -0
- juniper_data/generators/checkerboard/params.py +32 -0
- juniper_data/generators/circles/__init__.py +11 -0
- juniper_data/generators/circles/generator.py +112 -0
- juniper_data/generators/circles/params.py +31 -0
- juniper_data/generators/csv_import/__init__.py +15 -0
- juniper_data/generators/csv_import/generator.py +198 -0
- juniper_data/generators/csv_import/params.py +48 -0
- juniper_data/generators/gaussian/__init__.py +11 -0
- juniper_data/generators/gaussian/generator.py +149 -0
- juniper_data/generators/gaussian/params.py +53 -0
- juniper_data/generators/mnist/__init__.py +11 -0
- juniper_data/generators/mnist/generator.py +124 -0
- juniper_data/generators/mnist/params.py +39 -0
- juniper_data/generators/spiral/__init__.py +57 -0
- juniper_data/generators/spiral/defaults.py +39 -0
- juniper_data/generators/spiral/generator.py +206 -0
- juniper_data/generators/spiral/params.py +148 -0
- juniper_data/generators/xor/__init__.py +11 -0
- juniper_data/generators/xor/generator.py +162 -0
- juniper_data/generators/xor/params.py +30 -0
- juniper_data/storage/__init__.py +120 -0
- juniper_data/storage/base.py +279 -0
- juniper_data/storage/cached.py +211 -0
- juniper_data/storage/hf_store.py +257 -0
- juniper_data/storage/kaggle_store.py +333 -0
- juniper_data/storage/local_fs.py +232 -0
- juniper_data/storage/memory.py +136 -0
- juniper_data/storage/postgres_store.py +373 -0
- juniper_data/storage/redis_store.py +264 -0
- juniper_data/tests/__init__.py +1 -0
- juniper_data/tests/conftest.py +68 -0
- juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
- juniper_data/tests/integration/__init__.py +1 -0
- juniper_data/tests/integration/test_api.py +283 -0
- juniper_data/tests/integration/test_e2e_workflow.py +378 -0
- juniper_data/tests/integration/test_lifecycle_api.py +304 -0
- juniper_data/tests/integration/test_security_integration.py +189 -0
- juniper_data/tests/integration/test_storage_workflow.py +259 -0
- juniper_data/tests/performance/__init__.py +1 -0
- juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
- juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
- juniper_data/tests/unit/__init__.py +1 -0
- juniper_data/tests/unit/test_api_app.py +206 -0
- juniper_data/tests/unit/test_api_routes.py +407 -0
- juniper_data/tests/unit/test_api_settings.py +100 -0
- juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
- juniper_data/tests/unit/test_artifacts.py +145 -0
- juniper_data/tests/unit/test_cached_store.py +423 -0
- juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
- juniper_data/tests/unit/test_circles_generator.py +256 -0
- juniper_data/tests/unit/test_csv_import_generator.py +345 -0
- juniper_data/tests/unit/test_dataset_id.py +181 -0
- juniper_data/tests/unit/test_gaussian_generator.py +333 -0
- juniper_data/tests/unit/test_hf_store.py +416 -0
- juniper_data/tests/unit/test_init.py +93 -0
- juniper_data/tests/unit/test_kaggle_store.py +469 -0
- juniper_data/tests/unit/test_lifecycle.py +394 -0
- juniper_data/tests/unit/test_main.py +127 -0
- juniper_data/tests/unit/test_middleware.py +79 -0
- juniper_data/tests/unit/test_mnist_generator.py +370 -0
- juniper_data/tests/unit/test_postgres_store.py +490 -0
- juniper_data/tests/unit/test_redis_store.py +500 -0
- juniper_data/tests/unit/test_security.py +281 -0
- juniper_data/tests/unit/test_security_boundaries.py +517 -0
- juniper_data/tests/unit/test_spiral_generator.py +566 -0
- juniper_data/tests/unit/test_split.py +245 -0
- juniper_data/tests/unit/test_storage.py +767 -0
- juniper_data/tests/unit/test_xor_generator.py +223 -0
- juniper_data-0.4.2.dist-info/METADATA +216 -0
- juniper_data-0.4.2.dist-info/RECORD +95 -0
- juniper_data-0.4.2.dist-info/WHEEL +5 -0
- juniper_data-0.4.2.dist-info/licenses/LICENSE +9 -0
- juniper_data-0.4.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,490 @@
|
|
|
1
|
+
"""Unit tests for PostgresDatasetStore."""
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import json
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from unittest.mock import MagicMock, patch
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from juniper_data.core.models import DatasetMeta
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@pytest.fixture
|
|
16
|
+
def sample_meta() -> DatasetMeta:
|
|
17
|
+
"""Create sample metadata."""
|
|
18
|
+
return DatasetMeta(
|
|
19
|
+
dataset_id="test-dataset",
|
|
20
|
+
generator="test",
|
|
21
|
+
generator_version="1.0.0",
|
|
22
|
+
params={"seed": 42},
|
|
23
|
+
n_samples=100,
|
|
24
|
+
n_features=2,
|
|
25
|
+
n_classes=2,
|
|
26
|
+
n_train=80,
|
|
27
|
+
n_test=20,
|
|
28
|
+
class_distribution={"0": 50, "1": 50},
|
|
29
|
+
created_at=datetime(2026, 1, 1, tzinfo=UTC),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@pytest.fixture
|
|
34
|
+
def sample_arrays() -> dict[str, np.ndarray]:
|
|
35
|
+
"""Create sample arrays."""
|
|
36
|
+
rng = np.random.default_rng(42)
|
|
37
|
+
return {
|
|
38
|
+
"X_train": rng.standard_normal((80, 2)).astype(np.float32),
|
|
39
|
+
"y_train": rng.standard_normal((80, 2)).astype(np.float32),
|
|
40
|
+
"X_test": rng.standard_normal((20, 2)).astype(np.float32),
|
|
41
|
+
"y_test": rng.standard_normal((20, 2)).astype(np.float32),
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pytest.fixture
|
|
46
|
+
def mock_psycopg2():
|
|
47
|
+
"""Create a mock psycopg2 module and patch it into postgres_store."""
|
|
48
|
+
mock_pg = MagicMock()
|
|
49
|
+
mock_conn = MagicMock()
|
|
50
|
+
mock_cursor = MagicMock()
|
|
51
|
+
_mock_dict_cursor = MagicMock()
|
|
52
|
+
|
|
53
|
+
mock_pg.connect.return_value = mock_conn
|
|
54
|
+
mock_conn.__enter__ = MagicMock(return_value=mock_conn)
|
|
55
|
+
mock_conn.__exit__ = MagicMock(return_value=False)
|
|
56
|
+
mock_conn.cursor.return_value = mock_cursor
|
|
57
|
+
mock_cursor.__enter__ = MagicMock(return_value=mock_cursor)
|
|
58
|
+
mock_cursor.__exit__ = MagicMock(return_value=False)
|
|
59
|
+
|
|
60
|
+
mock_pg.extras = MagicMock()
|
|
61
|
+
mock_pg.extras.RealDictCursor = MagicMock()
|
|
62
|
+
|
|
63
|
+
with patch.dict("sys.modules", {"psycopg2": mock_pg, "psycopg2.extras": mock_pg.extras}):
|
|
64
|
+
with patch("juniper_data.storage.postgres_store.POSTGRES_AVAILABLE", True):
|
|
65
|
+
with patch("juniper_data.storage.postgres_store.psycopg2", mock_pg):
|
|
66
|
+
with patch(
|
|
67
|
+
"juniper_data.storage.postgres_store.RealDictCursor", mock_pg.extras.RealDictCursor, create=True
|
|
68
|
+
):
|
|
69
|
+
yield mock_pg, mock_conn, mock_cursor
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@pytest.mark.unit
|
|
73
|
+
@pytest.mark.storage
|
|
74
|
+
class TestPostgresDatasetStoreInit:
|
|
75
|
+
"""Tests for PostgresDatasetStore initialization."""
|
|
76
|
+
|
|
77
|
+
def test_init_default_params(self, mock_psycopg2, tmp_path) -> None:
|
|
78
|
+
"""Initialize with default parameters."""
|
|
79
|
+
mock_pg, mock_conn, mock_cursor = mock_psycopg2
|
|
80
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
81
|
+
|
|
82
|
+
with patch.object(Path, "mkdir"):
|
|
83
|
+
store = PostgresDatasetStore(artifact_path=tmp_path / "artifacts")
|
|
84
|
+
assert store._conn_params["host"] == "localhost"
|
|
85
|
+
assert store._conn_params["port"] == "5432"
|
|
86
|
+
|
|
87
|
+
def test_init_custom_params(self, mock_psycopg2, tmp_path) -> None:
|
|
88
|
+
"""Initialize with custom parameters."""
|
|
89
|
+
mock_pg, _, _ = mock_psycopg2
|
|
90
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
91
|
+
|
|
92
|
+
store = PostgresDatasetStore(
|
|
93
|
+
host="db.example.com",
|
|
94
|
+
port=5433,
|
|
95
|
+
database="mydb",
|
|
96
|
+
user="admin",
|
|
97
|
+
password="secret",
|
|
98
|
+
artifact_path=tmp_path / "data",
|
|
99
|
+
)
|
|
100
|
+
assert store._conn_params["host"] == "db.example.com"
|
|
101
|
+
assert store._conn_params["port"] == "5433"
|
|
102
|
+
assert store._conn_params["database"] == "mydb"
|
|
103
|
+
assert store._conn_params["user"] == "admin"
|
|
104
|
+
assert store._conn_params["password"] == "secret"
|
|
105
|
+
|
|
106
|
+
def test_init_with_connection_string(self, mock_psycopg2, tmp_path) -> None:
|
|
107
|
+
"""Initialize with connection string overrides individual params."""
|
|
108
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
109
|
+
|
|
110
|
+
store = PostgresDatasetStore(
|
|
111
|
+
connection_string="postgresql://user:pass@host/db", artifact_path=tmp_path / "data"
|
|
112
|
+
)
|
|
113
|
+
assert store._conn_params == {"dsn": "postgresql://user:pass@host/db"}
|
|
114
|
+
|
|
115
|
+
def test_init_without_auto_schema(self, mock_psycopg2, tmp_path) -> None:
|
|
116
|
+
"""Initialize without auto-creating schema."""
|
|
117
|
+
mock_pg, mock_conn, _ = mock_psycopg2
|
|
118
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
119
|
+
|
|
120
|
+
call_count_before = mock_pg.connect.call_count
|
|
121
|
+
_store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
122
|
+
assert mock_pg.connect.call_count == call_count_before
|
|
123
|
+
|
|
124
|
+
def test_init_raises_without_psycopg2(self) -> None:
|
|
125
|
+
"""Raises ImportError when psycopg2 is not available."""
|
|
126
|
+
with patch("juniper_data.storage.postgres_store.POSTGRES_AVAILABLE", False):
|
|
127
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
128
|
+
|
|
129
|
+
with pytest.raises(ImportError, match="psycopg2 package not installed"):
|
|
130
|
+
PostgresDatasetStore()
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@pytest.mark.unit
|
|
134
|
+
@pytest.mark.storage
|
|
135
|
+
class TestPostgresDatasetStoreMetaConversion:
|
|
136
|
+
"""Tests for metadata <-> row conversion."""
|
|
137
|
+
|
|
138
|
+
def test_meta_to_row(self, mock_psycopg2, tmp_path, sample_meta) -> None:
|
|
139
|
+
"""_meta_to_row converts DatasetMeta to dict."""
|
|
140
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
141
|
+
|
|
142
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
143
|
+
row = store._meta_to_row(sample_meta)
|
|
144
|
+
|
|
145
|
+
assert row["dataset_id"] == "test-dataset"
|
|
146
|
+
assert row["generator"] == "test"
|
|
147
|
+
assert row["n_samples"] == 100
|
|
148
|
+
assert isinstance(row["params"], str)
|
|
149
|
+
assert json.loads(row["params"]) == {"seed": 42}
|
|
150
|
+
|
|
151
|
+
def test_row_to_meta_with_dict_params(self, mock_psycopg2, tmp_path, sample_meta) -> None:
|
|
152
|
+
"""_row_to_meta handles dict params (already parsed JSON)."""
|
|
153
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
154
|
+
|
|
155
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
156
|
+
row = {
|
|
157
|
+
"dataset_id": "test-dataset",
|
|
158
|
+
"generator": "test",
|
|
159
|
+
"generator_version": "1.0.0",
|
|
160
|
+
"params": {"seed": 42},
|
|
161
|
+
"n_samples": 100,
|
|
162
|
+
"n_features": 2,
|
|
163
|
+
"n_classes": 2,
|
|
164
|
+
"n_train": 80,
|
|
165
|
+
"n_test": 20,
|
|
166
|
+
"class_distribution": {"0": 50, "1": 50},
|
|
167
|
+
"artifact_formats": ["npz"],
|
|
168
|
+
"created_at": datetime(2026, 1, 1, tzinfo=UTC),
|
|
169
|
+
"checksum": None,
|
|
170
|
+
"tags": ["test"],
|
|
171
|
+
"ttl_seconds": None,
|
|
172
|
+
"expires_at": None,
|
|
173
|
+
"last_accessed_at": None,
|
|
174
|
+
"access_count": 0,
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
meta = store._row_to_meta(row)
|
|
178
|
+
assert meta.dataset_id == "test-dataset"
|
|
179
|
+
assert meta.params == {"seed": 42}
|
|
180
|
+
|
|
181
|
+
def test_row_to_meta_with_string_params(self, mock_psycopg2, tmp_path) -> None:
|
|
182
|
+
"""_row_to_meta handles string params (JSON string from DB)."""
|
|
183
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
184
|
+
|
|
185
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
186
|
+
row = {
|
|
187
|
+
"dataset_id": "test-dataset",
|
|
188
|
+
"generator": "test",
|
|
189
|
+
"generator_version": "1.0.0",
|
|
190
|
+
"params": '{"seed": 42}',
|
|
191
|
+
"n_samples": 100,
|
|
192
|
+
"n_features": 2,
|
|
193
|
+
"n_classes": 2,
|
|
194
|
+
"n_train": 80,
|
|
195
|
+
"n_test": 20,
|
|
196
|
+
"class_distribution": '{"0": 50, "1": 50}',
|
|
197
|
+
"artifact_formats": ["npz"],
|
|
198
|
+
"created_at": datetime(2026, 1, 1, tzinfo=UTC),
|
|
199
|
+
"checksum": None,
|
|
200
|
+
"tags": None,
|
|
201
|
+
"ttl_seconds": None,
|
|
202
|
+
"expires_at": None,
|
|
203
|
+
"last_accessed_at": None,
|
|
204
|
+
"access_count": 0,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
meta = store._row_to_meta(row)
|
|
208
|
+
assert meta.params == {"seed": 42}
|
|
209
|
+
assert meta.class_distribution == {"0": 50, "1": 50}
|
|
210
|
+
assert meta.tags == []
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@pytest.mark.unit
|
|
214
|
+
@pytest.mark.storage
|
|
215
|
+
class TestPostgresDatasetStoreSave:
|
|
216
|
+
"""Tests for save operation."""
|
|
217
|
+
|
|
218
|
+
def test_save(self, mock_psycopg2, tmp_path, sample_meta, sample_arrays) -> None:
|
|
219
|
+
"""save writes metadata to DB and artifact to filesystem."""
|
|
220
|
+
mock_pg, mock_conn, mock_cursor = mock_psycopg2
|
|
221
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
222
|
+
|
|
223
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
224
|
+
store.save("test-dataset", sample_meta, sample_arrays)
|
|
225
|
+
|
|
226
|
+
mock_cursor.execute.assert_called()
|
|
227
|
+
artifact_path = tmp_path / "data" / "test-dataset.npz"
|
|
228
|
+
assert artifact_path.exists()
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
@pytest.mark.unit
|
|
232
|
+
@pytest.mark.storage
|
|
233
|
+
class TestPostgresDatasetStoreGetMeta:
|
|
234
|
+
"""Tests for get_meta operation."""
|
|
235
|
+
|
|
236
|
+
def test_get_meta_found(self, mock_psycopg2, tmp_path, sample_meta) -> None:
|
|
237
|
+
"""get_meta returns metadata when found."""
|
|
238
|
+
_, mock_conn, mock_cursor = mock_psycopg2
|
|
239
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
240
|
+
|
|
241
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
242
|
+
|
|
243
|
+
row_data = {
|
|
244
|
+
"dataset_id": "test-dataset",
|
|
245
|
+
"generator": "test",
|
|
246
|
+
"generator_version": "1.0.0",
|
|
247
|
+
"params": {"seed": 42},
|
|
248
|
+
"n_samples": 100,
|
|
249
|
+
"n_features": 2,
|
|
250
|
+
"n_classes": 2,
|
|
251
|
+
"n_train": 80,
|
|
252
|
+
"n_test": 20,
|
|
253
|
+
"class_distribution": {"0": 50, "1": 50},
|
|
254
|
+
"artifact_formats": ["npz"],
|
|
255
|
+
"created_at": datetime(2026, 1, 1, tzinfo=UTC),
|
|
256
|
+
"checksum": None,
|
|
257
|
+
"tags": [],
|
|
258
|
+
"ttl_seconds": None,
|
|
259
|
+
"expires_at": None,
|
|
260
|
+
"last_accessed_at": None,
|
|
261
|
+
"access_count": 0,
|
|
262
|
+
}
|
|
263
|
+
mock_cursor.fetchone.return_value = row_data
|
|
264
|
+
|
|
265
|
+
result = store.get_meta("test-dataset")
|
|
266
|
+
assert result is not None
|
|
267
|
+
assert result.dataset_id == "test-dataset"
|
|
268
|
+
|
|
269
|
+
def test_get_meta_not_found(self, mock_psycopg2, tmp_path) -> None:
|
|
270
|
+
"""get_meta returns None when not found."""
|
|
271
|
+
_, _, mock_cursor = mock_psycopg2
|
|
272
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
273
|
+
|
|
274
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
275
|
+
mock_cursor.fetchone.return_value = None
|
|
276
|
+
|
|
277
|
+
result = store.get_meta("nonexistent")
|
|
278
|
+
assert result is None
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
@pytest.mark.unit
|
|
282
|
+
@pytest.mark.storage
|
|
283
|
+
class TestPostgresDatasetStoreGetArtifact:
|
|
284
|
+
"""Tests for get_artifact_bytes operation."""
|
|
285
|
+
|
|
286
|
+
def test_get_artifact_bytes_found(self, mock_psycopg2, tmp_path, sample_arrays) -> None:
|
|
287
|
+
"""get_artifact_bytes returns bytes when file exists."""
|
|
288
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
289
|
+
|
|
290
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
291
|
+
(tmp_path / "data").mkdir(parents=True, exist_ok=True)
|
|
292
|
+
|
|
293
|
+
artifact_path = tmp_path / "data" / "test-dataset.npz"
|
|
294
|
+
buf = io.BytesIO()
|
|
295
|
+
np.savez_compressed(buf, **sample_arrays)
|
|
296
|
+
artifact_path.write_bytes(buf.getvalue())
|
|
297
|
+
|
|
298
|
+
result = store.get_artifact_bytes("test-dataset")
|
|
299
|
+
assert result is not None
|
|
300
|
+
assert len(result) > 0
|
|
301
|
+
|
|
302
|
+
def test_get_artifact_bytes_not_found(self, mock_psycopg2, tmp_path) -> None:
|
|
303
|
+
"""get_artifact_bytes returns None when file doesn't exist."""
|
|
304
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
305
|
+
|
|
306
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
307
|
+
|
|
308
|
+
result = store.get_artifact_bytes("nonexistent")
|
|
309
|
+
assert result is None
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
@pytest.mark.unit
|
|
313
|
+
@pytest.mark.storage
|
|
314
|
+
class TestPostgresDatasetStoreExists:
|
|
315
|
+
"""Tests for exists operation."""
|
|
316
|
+
|
|
317
|
+
def test_exists_true(self, mock_psycopg2, tmp_path) -> None:
|
|
318
|
+
"""exists returns True when dataset is in DB."""
|
|
319
|
+
_, _, mock_cursor = mock_psycopg2
|
|
320
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
321
|
+
|
|
322
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
323
|
+
mock_cursor.fetchone.return_value = (1,)
|
|
324
|
+
|
|
325
|
+
assert store.exists("test-dataset") is True
|
|
326
|
+
|
|
327
|
+
def test_exists_false(self, mock_psycopg2, tmp_path) -> None:
|
|
328
|
+
"""exists returns False when dataset is not in DB."""
|
|
329
|
+
_, _, mock_cursor = mock_psycopg2
|
|
330
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
331
|
+
|
|
332
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
333
|
+
mock_cursor.fetchone.return_value = None
|
|
334
|
+
|
|
335
|
+
assert store.exists("nonexistent") is False
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@pytest.mark.unit
|
|
339
|
+
@pytest.mark.storage
|
|
340
|
+
class TestPostgresDatasetStoreDelete:
|
|
341
|
+
"""Tests for delete operation."""
|
|
342
|
+
|
|
343
|
+
def test_delete_existing_with_artifact(self, mock_psycopg2, tmp_path) -> None:
|
|
344
|
+
"""delete removes DB row and artifact file."""
|
|
345
|
+
_, _, mock_cursor = mock_psycopg2
|
|
346
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
347
|
+
|
|
348
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
349
|
+
(tmp_path / "data").mkdir(parents=True, exist_ok=True)
|
|
350
|
+
artifact_path = tmp_path / "data" / "test-dataset.npz"
|
|
351
|
+
artifact_path.write_bytes(b"dummy")
|
|
352
|
+
|
|
353
|
+
mock_cursor.fetchone.return_value = ("test-dataset",)
|
|
354
|
+
|
|
355
|
+
result = store.delete("test-dataset")
|
|
356
|
+
assert result is True
|
|
357
|
+
assert not artifact_path.exists()
|
|
358
|
+
|
|
359
|
+
def test_delete_existing_no_artifact(self, mock_psycopg2, tmp_path) -> None:
|
|
360
|
+
"""delete works even when artifact file is missing."""
|
|
361
|
+
_, _, mock_cursor = mock_psycopg2
|
|
362
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
363
|
+
|
|
364
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
365
|
+
mock_cursor.fetchone.return_value = ("test-dataset",)
|
|
366
|
+
|
|
367
|
+
result = store.delete("test-dataset")
|
|
368
|
+
assert result is True
|
|
369
|
+
|
|
370
|
+
def test_delete_nonexistent(self, mock_psycopg2, tmp_path) -> None:
|
|
371
|
+
"""delete returns False when dataset doesn't exist."""
|
|
372
|
+
_, _, mock_cursor = mock_psycopg2
|
|
373
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
374
|
+
|
|
375
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
376
|
+
mock_cursor.fetchone.return_value = None
|
|
377
|
+
|
|
378
|
+
result = store.delete("nonexistent")
|
|
379
|
+
assert result is False
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
@pytest.mark.unit
|
|
383
|
+
@pytest.mark.storage
|
|
384
|
+
class TestPostgresDatasetStoreListDatasets:
|
|
385
|
+
"""Tests for list_datasets operation."""
|
|
386
|
+
|
|
387
|
+
def test_list_datasets(self, mock_psycopg2, tmp_path) -> None:
|
|
388
|
+
"""list_datasets returns dataset IDs."""
|
|
389
|
+
_, _, mock_cursor = mock_psycopg2
|
|
390
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
391
|
+
|
|
392
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
393
|
+
mock_cursor.fetchall.return_value = [("ds-1",), ("ds-2",)]
|
|
394
|
+
|
|
395
|
+
result = store.list_datasets()
|
|
396
|
+
assert result == ["ds-1", "ds-2"]
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
@pytest.mark.unit
|
|
400
|
+
@pytest.mark.storage
|
|
401
|
+
class TestPostgresDatasetStoreUpdateMeta:
|
|
402
|
+
"""Tests for update_meta operation."""
|
|
403
|
+
|
|
404
|
+
def test_update_meta_found(self, mock_psycopg2, tmp_path, sample_meta) -> None:
|
|
405
|
+
"""update_meta returns True when dataset was updated."""
|
|
406
|
+
_, _, mock_cursor = mock_psycopg2
|
|
407
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
408
|
+
|
|
409
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
410
|
+
mock_cursor.rowcount = 1
|
|
411
|
+
|
|
412
|
+
result = store.update_meta("test-dataset", sample_meta)
|
|
413
|
+
assert result is True
|
|
414
|
+
|
|
415
|
+
def test_update_meta_not_found(self, mock_psycopg2, tmp_path, sample_meta) -> None:
|
|
416
|
+
"""update_meta returns False when dataset doesn't exist."""
|
|
417
|
+
_, _, mock_cursor = mock_psycopg2
|
|
418
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
419
|
+
|
|
420
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
421
|
+
mock_cursor.rowcount = 0
|
|
422
|
+
|
|
423
|
+
result = store.update_meta("nonexistent", sample_meta)
|
|
424
|
+
assert result is False
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
@pytest.mark.unit
|
|
428
|
+
@pytest.mark.storage
|
|
429
|
+
class TestPostgresDatasetStoreListAllMetadata:
|
|
430
|
+
"""Tests for list_all_metadata operation."""
|
|
431
|
+
|
|
432
|
+
def test_list_all_metadata(self, mock_psycopg2, tmp_path) -> None:
|
|
433
|
+
"""list_all_metadata returns all metadata objects."""
|
|
434
|
+
_, _, mock_cursor = mock_psycopg2
|
|
435
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
436
|
+
|
|
437
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
438
|
+
mock_cursor.fetchall.return_value = [
|
|
439
|
+
{
|
|
440
|
+
"dataset_id": "ds-1",
|
|
441
|
+
"generator": "test",
|
|
442
|
+
"generator_version": "1.0.0",
|
|
443
|
+
"params": {"seed": 42},
|
|
444
|
+
"n_samples": 100,
|
|
445
|
+
"n_features": 2,
|
|
446
|
+
"n_classes": 2,
|
|
447
|
+
"n_train": 80,
|
|
448
|
+
"n_test": 20,
|
|
449
|
+
"class_distribution": {"0": 50, "1": 50},
|
|
450
|
+
"artifact_formats": ["npz"],
|
|
451
|
+
"created_at": datetime(2026, 1, 1, tzinfo=UTC),
|
|
452
|
+
"checksum": None,
|
|
453
|
+
"tags": [],
|
|
454
|
+
"ttl_seconds": None,
|
|
455
|
+
"expires_at": None,
|
|
456
|
+
"last_accessed_at": None,
|
|
457
|
+
"access_count": 0,
|
|
458
|
+
}
|
|
459
|
+
]
|
|
460
|
+
|
|
461
|
+
result = store.list_all_metadata()
|
|
462
|
+
assert len(result) == 1
|
|
463
|
+
assert result[0].dataset_id == "ds-1"
|
|
464
|
+
|
|
465
|
+
|
|
466
|
+
@pytest.mark.unit
|
|
467
|
+
@pytest.mark.storage
|
|
468
|
+
class TestPostgresDatasetStoreClose:
|
|
469
|
+
"""Tests for close operation."""
|
|
470
|
+
|
|
471
|
+
def test_close(self, mock_psycopg2, tmp_path) -> None:
|
|
472
|
+
"""close is a no-op that doesn't raise."""
|
|
473
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
474
|
+
|
|
475
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
476
|
+
store.close()
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
@pytest.mark.unit
|
|
480
|
+
@pytest.mark.storage
|
|
481
|
+
class TestPostgresDatasetStoreArtifactFile:
|
|
482
|
+
"""Tests for _artifact_file helper."""
|
|
483
|
+
|
|
484
|
+
def test_artifact_file_path(self, mock_psycopg2, tmp_path) -> None:
|
|
485
|
+
"""_artifact_file returns correct path."""
|
|
486
|
+
from juniper_data.storage.postgres_store import PostgresDatasetStore
|
|
487
|
+
|
|
488
|
+
store = PostgresDatasetStore(auto_create_schema=False, artifact_path=tmp_path / "data")
|
|
489
|
+
path = store._artifact_file("my-dataset")
|
|
490
|
+
assert path == tmp_path / "data" / "my-dataset.npz"
|