juniper-data 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_data/__init__.py +88 -0
- juniper_data/__main__.py +78 -0
- juniper_data/api/__init__.py +10 -0
- juniper_data/api/app.py +111 -0
- juniper_data/api/middleware.py +95 -0
- juniper_data/api/routes/__init__.py +9 -0
- juniper_data/api/routes/datasets.py +414 -0
- juniper_data/api/routes/generators.py +125 -0
- juniper_data/api/routes/health.py +49 -0
- juniper_data/api/security.py +238 -0
- juniper_data/api/settings.py +109 -0
- juniper_data/core/__init__.py +32 -0
- juniper_data/core/artifacts.py +63 -0
- juniper_data/core/dataset_id.py +38 -0
- juniper_data/core/models.py +135 -0
- juniper_data/core/split.py +120 -0
- juniper_data/generators/__init__.py +15 -0
- juniper_data/generators/arc_agi/__init__.py +11 -0
- juniper_data/generators/arc_agi/generator.py +229 -0
- juniper_data/generators/arc_agi/params.py +56 -0
- juniper_data/generators/checkerboard/__init__.py +15 -0
- juniper_data/generators/checkerboard/generator.py +114 -0
- juniper_data/generators/checkerboard/params.py +32 -0
- juniper_data/generators/circles/__init__.py +11 -0
- juniper_data/generators/circles/generator.py +112 -0
- juniper_data/generators/circles/params.py +31 -0
- juniper_data/generators/csv_import/__init__.py +15 -0
- juniper_data/generators/csv_import/generator.py +198 -0
- juniper_data/generators/csv_import/params.py +48 -0
- juniper_data/generators/gaussian/__init__.py +11 -0
- juniper_data/generators/gaussian/generator.py +149 -0
- juniper_data/generators/gaussian/params.py +53 -0
- juniper_data/generators/mnist/__init__.py +11 -0
- juniper_data/generators/mnist/generator.py +124 -0
- juniper_data/generators/mnist/params.py +39 -0
- juniper_data/generators/spiral/__init__.py +57 -0
- juniper_data/generators/spiral/defaults.py +39 -0
- juniper_data/generators/spiral/generator.py +206 -0
- juniper_data/generators/spiral/params.py +148 -0
- juniper_data/generators/xor/__init__.py +11 -0
- juniper_data/generators/xor/generator.py +162 -0
- juniper_data/generators/xor/params.py +30 -0
- juniper_data/storage/__init__.py +120 -0
- juniper_data/storage/base.py +279 -0
- juniper_data/storage/cached.py +211 -0
- juniper_data/storage/hf_store.py +257 -0
- juniper_data/storage/kaggle_store.py +333 -0
- juniper_data/storage/local_fs.py +232 -0
- juniper_data/storage/memory.py +136 -0
- juniper_data/storage/postgres_store.py +373 -0
- juniper_data/storage/redis_store.py +264 -0
- juniper_data/tests/__init__.py +1 -0
- juniper_data/tests/conftest.py +68 -0
- juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
- juniper_data/tests/integration/__init__.py +1 -0
- juniper_data/tests/integration/test_api.py +283 -0
- juniper_data/tests/integration/test_e2e_workflow.py +378 -0
- juniper_data/tests/integration/test_lifecycle_api.py +304 -0
- juniper_data/tests/integration/test_security_integration.py +189 -0
- juniper_data/tests/integration/test_storage_workflow.py +259 -0
- juniper_data/tests/performance/__init__.py +1 -0
- juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
- juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
- juniper_data/tests/unit/__init__.py +1 -0
- juniper_data/tests/unit/test_api_app.py +206 -0
- juniper_data/tests/unit/test_api_routes.py +407 -0
- juniper_data/tests/unit/test_api_settings.py +100 -0
- juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
- juniper_data/tests/unit/test_artifacts.py +145 -0
- juniper_data/tests/unit/test_cached_store.py +423 -0
- juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
- juniper_data/tests/unit/test_circles_generator.py +256 -0
- juniper_data/tests/unit/test_csv_import_generator.py +345 -0
- juniper_data/tests/unit/test_dataset_id.py +181 -0
- juniper_data/tests/unit/test_gaussian_generator.py +333 -0
- juniper_data/tests/unit/test_hf_store.py +416 -0
- juniper_data/tests/unit/test_init.py +93 -0
- juniper_data/tests/unit/test_kaggle_store.py +469 -0
- juniper_data/tests/unit/test_lifecycle.py +394 -0
- juniper_data/tests/unit/test_main.py +127 -0
- juniper_data/tests/unit/test_middleware.py +79 -0
- juniper_data/tests/unit/test_mnist_generator.py +370 -0
- juniper_data/tests/unit/test_postgres_store.py +490 -0
- juniper_data/tests/unit/test_redis_store.py +500 -0
- juniper_data/tests/unit/test_security.py +281 -0
- juniper_data/tests/unit/test_security_boundaries.py +517 -0
- juniper_data/tests/unit/test_spiral_generator.py +566 -0
- juniper_data/tests/unit/test_split.py +245 -0
- juniper_data/tests/unit/test_storage.py +767 -0
- juniper_data/tests/unit/test_xor_generator.py +223 -0
- juniper_data-0.4.2.dist-info/METADATA +216 -0
- juniper_data-0.4.2.dist-info/RECORD +95 -0
- juniper_data-0.4.2.dist-info/WHEEL +5 -0
- juniper_data-0.4.2.dist-info/licenses/LICENSE +9 -0
- juniper_data-0.4.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,517 @@
|
|
|
1
|
+
"""Security boundary tests for JuniperData.
|
|
2
|
+
|
|
3
|
+
Tests for path traversal prevention, input injection, parameter bounds
|
|
4
|
+
enforcement, and resource exhaustion protection.
|
|
5
|
+
|
|
6
|
+
Source: RD-006 (TEST_SUITE_AUDIT_DATA_CLAUDE.md Section 1.8, TEST_SUITE_AUDIT_DATA_AMP_.md)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import tempfile
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pytest
|
|
15
|
+
from fastapi.testclient import TestClient
|
|
16
|
+
from pydantic import ValidationError
|
|
17
|
+
|
|
18
|
+
from juniper_data.api.app import create_app
|
|
19
|
+
from juniper_data.api.routes import datasets
|
|
20
|
+
from juniper_data.api.settings import Settings
|
|
21
|
+
from juniper_data.core.models import (
|
|
22
|
+
BatchDeleteRequest,
|
|
23
|
+
CreateDatasetRequest,
|
|
24
|
+
DatasetMeta,
|
|
25
|
+
)
|
|
26
|
+
from juniper_data.generators.csv_import.params import CsvImportParams
|
|
27
|
+
from juniper_data.generators.spiral.params import SpiralParams
|
|
28
|
+
from juniper_data.storage import LocalFSDatasetStore
|
|
29
|
+
from juniper_data.storage.memory import InMemoryDatasetStore
|
|
30
|
+
|
|
31
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
32
|
+
# Fixtures
|
|
33
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@pytest.fixture
|
|
37
|
+
def temp_dir():
|
|
38
|
+
"""Create a temporary directory for filesystem tests."""
|
|
39
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
40
|
+
yield Path(tmpdir)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@pytest.fixture
|
|
44
|
+
def fs_store(temp_dir: Path) -> LocalFSDatasetStore:
|
|
45
|
+
"""Create a local filesystem store in a temporary directory."""
|
|
46
|
+
return LocalFSDatasetStore(temp_dir)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@pytest.fixture
|
|
50
|
+
def sample_meta() -> DatasetMeta:
|
|
51
|
+
"""Create sample dataset metadata for testing."""
|
|
52
|
+
return DatasetMeta(
|
|
53
|
+
dataset_id="test-dataset-001",
|
|
54
|
+
generator="spiral",
|
|
55
|
+
generator_version="1.0.0",
|
|
56
|
+
params={"n_spirals": 2, "n_points_per_spiral": 100},
|
|
57
|
+
n_samples=200,
|
|
58
|
+
n_features=2,
|
|
59
|
+
n_classes=2,
|
|
60
|
+
n_train=160,
|
|
61
|
+
n_test=40,
|
|
62
|
+
class_distribution={"0": 100, "1": 100},
|
|
63
|
+
created_at=datetime(2026, 1, 30, 12, 0, 0),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@pytest.fixture
|
|
68
|
+
def sample_arrays() -> dict[str, np.ndarray]:
|
|
69
|
+
"""Create sample arrays for testing."""
|
|
70
|
+
rng = np.random.default_rng(42)
|
|
71
|
+
return {
|
|
72
|
+
"X_train": rng.standard_normal((160, 2)).astype(np.float32),
|
|
73
|
+
"y_train": np.eye(2, dtype=np.float32)[rng.integers(0, 2, 160)],
|
|
74
|
+
"X_test": rng.standard_normal((40, 2)).astype(np.float32),
|
|
75
|
+
"y_test": np.eye(2, dtype=np.float32)[rng.integers(0, 2, 40)],
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@pytest.fixture
|
|
80
|
+
def memory_store() -> InMemoryDatasetStore:
|
|
81
|
+
"""Create in-memory store for testing."""
|
|
82
|
+
return InMemoryDatasetStore()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@pytest.fixture
|
|
86
|
+
def client(memory_store: InMemoryDatasetStore) -> TestClient:
|
|
87
|
+
"""Create a test client with in-memory storage."""
|
|
88
|
+
settings = Settings(storage_path="/tmp/juniper_test")
|
|
89
|
+
app = create_app(settings=settings)
|
|
90
|
+
datasets.set_store(memory_store)
|
|
91
|
+
return TestClient(app)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
95
|
+
# TestPathTraversalPrevention
|
|
96
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@pytest.mark.unit
|
|
100
|
+
class TestPathTraversalPrevention:
|
|
101
|
+
"""Tests that path traversal attacks via dataset_id are contained."""
|
|
102
|
+
|
|
103
|
+
def test_storage_path_with_dotdot_stays_in_base(self, fs_store: LocalFSDatasetStore, temp_dir: Path) -> None:
|
|
104
|
+
"""Dataset ID with '..' should resolve to a path within the base directory."""
|
|
105
|
+
malicious_id = "../../../etc/passwd"
|
|
106
|
+
meta_path = fs_store._meta_path(malicious_id)
|
|
107
|
+
npz_path = fs_store._npz_path(malicious_id)
|
|
108
|
+
|
|
109
|
+
# Verify the constructed paths resolve outside base_path (demonstrating the risk)
|
|
110
|
+
assert not meta_path.resolve().is_relative_to(temp_dir)
|
|
111
|
+
assert not npz_path.resolve().is_relative_to(temp_dir)
|
|
112
|
+
|
|
113
|
+
def test_storage_absolute_path_in_dataset_id(self, fs_store: LocalFSDatasetStore, temp_dir: Path) -> None:
|
|
114
|
+
"""Dataset ID with absolute path components should not escape base directory."""
|
|
115
|
+
malicious_id = "/etc/shadow"
|
|
116
|
+
meta_path = fs_store._meta_path(malicious_id)
|
|
117
|
+
# Path("/base" / "/etc/shadow") resolves to /etc/shadow on POSIX
|
|
118
|
+
# This demonstrates the path construction behavior
|
|
119
|
+
assert meta_path == temp_dir / "/etc/shadow.meta.json"
|
|
120
|
+
|
|
121
|
+
def test_dataset_id_with_null_bytes(self, fs_store: LocalFSDatasetStore, temp_dir: Path) -> None:
|
|
122
|
+
"""Dataset ID with null bytes should not allow file creation."""
|
|
123
|
+
malicious_id = "dataset\x00.meta.json"
|
|
124
|
+
meta_path = fs_store._meta_path(malicious_id)
|
|
125
|
+
# Null bytes in filenames raise ValueError on write operations
|
|
126
|
+
with pytest.raises((ValueError, OSError)):
|
|
127
|
+
meta_path.write_text("test", encoding="utf-8")
|
|
128
|
+
|
|
129
|
+
def test_api_dataset_id_with_path_traversal(self, client: TestClient) -> None:
|
|
130
|
+
"""API endpoints should handle dataset IDs with path traversal characters."""
|
|
131
|
+
traversal_ids = [
|
|
132
|
+
"../../../etc/passwd",
|
|
133
|
+
"..%2F..%2F..%2Fetc%2Fpasswd",
|
|
134
|
+
"dataset/../../../etc/shadow",
|
|
135
|
+
]
|
|
136
|
+
for malicious_id in traversal_ids:
|
|
137
|
+
response = client.get(f"/v1/datasets/{malicious_id}")
|
|
138
|
+
# Should return 404 (not found in store), not 500 or file contents
|
|
139
|
+
assert response.status_code in (404, 422), (
|
|
140
|
+
f"Unexpected status for ID '{malicious_id}': {response.status_code}"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def test_api_artifact_download_with_traversal(self, client: TestClient) -> None:
|
|
144
|
+
"""Artifact download should not serve files outside storage via traversal."""
|
|
145
|
+
response = client.get("/v1/datasets/../../etc/passwd/artifact")
|
|
146
|
+
assert response.status_code in (404, 422)
|
|
147
|
+
|
|
148
|
+
def test_batch_delete_with_traversal_ids(self, client: TestClient) -> None:
|
|
149
|
+
"""Batch delete should handle dataset IDs containing traversal sequences."""
|
|
150
|
+
response = client.post(
|
|
151
|
+
"/v1/datasets/batch-delete",
|
|
152
|
+
json={"dataset_ids": ["../../../etc/passwd", "valid-id"]},
|
|
153
|
+
)
|
|
154
|
+
# Should complete without file-system side effects outside storage
|
|
155
|
+
assert response.status_code == 200
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
159
|
+
# TestCsvImportPathSecurity
|
|
160
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@pytest.mark.unit
|
|
164
|
+
class TestCsvImportPathSecurity:
|
|
165
|
+
"""Tests for path traversal risks in CSV import file_path parameter."""
|
|
166
|
+
|
|
167
|
+
def test_absolute_path_outside_working_dir(self) -> None:
|
|
168
|
+
"""CSV import with absolute path to sensitive file should fail."""
|
|
169
|
+
params = CsvImportParams(file_path="/etc/shadow")
|
|
170
|
+
from juniper_data.generators.csv_import.generator import CsvImportGenerator
|
|
171
|
+
|
|
172
|
+
# May raise FileNotFoundError, PermissionError, or ValueError
|
|
173
|
+
# (ValueError if auto-detect can't determine format from extension)
|
|
174
|
+
with pytest.raises((FileNotFoundError, PermissionError, ValueError)):
|
|
175
|
+
CsvImportGenerator.generate(params)
|
|
176
|
+
|
|
177
|
+
def test_relative_path_traversal(self) -> None:
|
|
178
|
+
"""CSV import with relative traversal path documents the validation gap."""
|
|
179
|
+
params = CsvImportParams(file_path="../../../etc/passwd")
|
|
180
|
+
from juniper_data.generators.csv_import.generator import CsvImportGenerator
|
|
181
|
+
|
|
182
|
+
with pytest.raises((FileNotFoundError, PermissionError, ValueError)):
|
|
183
|
+
CsvImportGenerator.generate(params)
|
|
184
|
+
|
|
185
|
+
def test_file_path_with_null_bytes(self) -> None:
|
|
186
|
+
"""CSV import with null bytes in path should fail."""
|
|
187
|
+
params = CsvImportParams(file_path="/tmp/test\x00malicious.csv")
|
|
188
|
+
from juniper_data.generators.csv_import.generator import CsvImportGenerator
|
|
189
|
+
|
|
190
|
+
with pytest.raises((FileNotFoundError, ValueError, OSError)):
|
|
191
|
+
CsvImportGenerator.generate(params)
|
|
192
|
+
|
|
193
|
+
def test_csv_import_via_api_with_traversal_path(self) -> None:
|
|
194
|
+
"""CSV import through the API with traversal path should fail, not expose files."""
|
|
195
|
+
settings = Settings(storage_path="/tmp/juniper_test")
|
|
196
|
+
app = create_app(settings=settings)
|
|
197
|
+
datasets.set_store(InMemoryDatasetStore())
|
|
198
|
+
# raise_server_exceptions=False lets us inspect the 500 response
|
|
199
|
+
# instead of having the test client propagate the FileNotFoundError
|
|
200
|
+
test_client = TestClient(app, raise_server_exceptions=False)
|
|
201
|
+
|
|
202
|
+
response = test_client.post(
|
|
203
|
+
"/v1/datasets",
|
|
204
|
+
json={
|
|
205
|
+
"generator": "csv_import",
|
|
206
|
+
"params": {"file_path": "../../../etc/passwd"},
|
|
207
|
+
},
|
|
208
|
+
)
|
|
209
|
+
# FileNotFoundError from the generator propagates as 500 since
|
|
210
|
+
# create_dataset only catches parameter validation errors, not
|
|
211
|
+
# generator runtime errors — this documents the current behavior
|
|
212
|
+
assert response.status_code == 500
|
|
213
|
+
# Verify no file contents are leaked in the error response
|
|
214
|
+
assert "root:" not in response.text
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
218
|
+
# TestInputBoundaryEnforcement
|
|
219
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@pytest.mark.unit
|
|
223
|
+
class TestInputBoundaryEnforcement:
|
|
224
|
+
"""Tests for Pydantic parameter bound enforcement."""
|
|
225
|
+
|
|
226
|
+
def test_spiral_n_points_at_maximum(self) -> None:
|
|
227
|
+
"""Spiral generator should accept n_points at maximum boundary."""
|
|
228
|
+
params = SpiralParams(n_points_per_spiral=10000)
|
|
229
|
+
assert params.n_points_per_spiral == 10000
|
|
230
|
+
|
|
231
|
+
def test_spiral_n_points_above_maximum(self) -> None:
|
|
232
|
+
"""Spiral generator should reject n_points above maximum."""
|
|
233
|
+
with pytest.raises(ValidationError):
|
|
234
|
+
SpiralParams(n_points_per_spiral=10001)
|
|
235
|
+
|
|
236
|
+
def test_spiral_n_points_below_minimum(self) -> None:
|
|
237
|
+
"""Spiral generator should reject n_points below minimum."""
|
|
238
|
+
with pytest.raises(ValidationError):
|
|
239
|
+
SpiralParams(n_points_per_spiral=9)
|
|
240
|
+
|
|
241
|
+
def test_spiral_negative_noise(self) -> None:
|
|
242
|
+
"""Spiral generator should reject negative noise values."""
|
|
243
|
+
with pytest.raises(ValidationError):
|
|
244
|
+
SpiralParams(noise=-0.1)
|
|
245
|
+
|
|
246
|
+
def test_spiral_train_test_ratio_sum_exceeds_one(self) -> None:
|
|
247
|
+
"""Spiral generator should reject train_ratio + test_ratio > 1.0."""
|
|
248
|
+
with pytest.raises(ValidationError):
|
|
249
|
+
SpiralParams(train_ratio=0.8, test_ratio=0.3)
|
|
250
|
+
|
|
251
|
+
def test_spiral_n_spirals_at_boundaries(self) -> None:
|
|
252
|
+
"""Spiral generator should enforce n_spirals bounds (2-10)."""
|
|
253
|
+
with pytest.raises(ValidationError):
|
|
254
|
+
SpiralParams(n_spirals=1)
|
|
255
|
+
with pytest.raises(ValidationError):
|
|
256
|
+
SpiralParams(n_spirals=11)
|
|
257
|
+
# Boundaries should work
|
|
258
|
+
params_min = SpiralParams(n_spirals=2)
|
|
259
|
+
assert params_min.n_spirals == 2
|
|
260
|
+
params_max = SpiralParams(n_spirals=10)
|
|
261
|
+
assert params_max.n_spirals == 10
|
|
262
|
+
|
|
263
|
+
def test_api_extreme_n_points_rejected(self, client: TestClient) -> None:
|
|
264
|
+
"""API should reject extreme n_points values via Pydantic validation."""
|
|
265
|
+
response = client.post(
|
|
266
|
+
"/v1/datasets",
|
|
267
|
+
json={
|
|
268
|
+
"generator": "spiral",
|
|
269
|
+
"params": {"n_points_per_spiral": 999999999},
|
|
270
|
+
},
|
|
271
|
+
)
|
|
272
|
+
assert response.status_code == 400
|
|
273
|
+
assert "Invalid parameters" in response.json()["detail"]
|
|
274
|
+
|
|
275
|
+
def test_api_negative_parameters_rejected(self, client: TestClient) -> None:
|
|
276
|
+
"""API should reject negative parameter values."""
|
|
277
|
+
response = client.post(
|
|
278
|
+
"/v1/datasets",
|
|
279
|
+
json={
|
|
280
|
+
"generator": "spiral",
|
|
281
|
+
"params": {"n_points_per_spiral": -100},
|
|
282
|
+
},
|
|
283
|
+
)
|
|
284
|
+
assert response.status_code == 400
|
|
285
|
+
|
|
286
|
+
def test_api_string_in_numeric_field(self, client: TestClient) -> None:
|
|
287
|
+
"""API should reject string values in numeric parameter fields."""
|
|
288
|
+
response = client.post(
|
|
289
|
+
"/v1/datasets",
|
|
290
|
+
json={
|
|
291
|
+
"generator": "spiral",
|
|
292
|
+
"params": {"n_points_per_spiral": "DROP TABLE datasets"},
|
|
293
|
+
},
|
|
294
|
+
)
|
|
295
|
+
assert response.status_code == 400
|
|
296
|
+
|
|
297
|
+
def test_ttl_seconds_zero_rejected(self) -> None:
|
|
298
|
+
"""TTL of zero should be rejected (minimum is 1)."""
|
|
299
|
+
with pytest.raises(ValidationError):
|
|
300
|
+
CreateDatasetRequest(generator="spiral", ttl_seconds=0)
|
|
301
|
+
|
|
302
|
+
def test_ttl_seconds_negative_rejected(self) -> None:
|
|
303
|
+
"""Negative TTL should be rejected."""
|
|
304
|
+
with pytest.raises(ValidationError):
|
|
305
|
+
CreateDatasetRequest(generator="spiral", ttl_seconds=-1)
|
|
306
|
+
|
|
307
|
+
def test_batch_delete_empty_list_rejected(self) -> None:
|
|
308
|
+
"""Batch delete with empty list should be rejected (min_length=1)."""
|
|
309
|
+
with pytest.raises(ValidationError):
|
|
310
|
+
BatchDeleteRequest(dataset_ids=[])
|
|
311
|
+
|
|
312
|
+
def test_batch_delete_exceeds_max_rejected(self) -> None:
|
|
313
|
+
"""Batch delete with >100 IDs should be rejected (max_length=100)."""
|
|
314
|
+
with pytest.raises(ValidationError):
|
|
315
|
+
BatchDeleteRequest(dataset_ids=[f"id-{i}" for i in range(101)])
|
|
316
|
+
|
|
317
|
+
def test_batch_delete_at_max_accepted(self) -> None:
|
|
318
|
+
"""Batch delete with exactly 100 IDs should be accepted."""
|
|
319
|
+
request = BatchDeleteRequest(dataset_ids=[f"id-{i}" for i in range(100)])
|
|
320
|
+
assert len(request.dataset_ids) == 100
|
|
321
|
+
|
|
322
|
+
def test_list_limit_boundaries(self, client: TestClient) -> None:
|
|
323
|
+
"""List endpoint should enforce limit bounds (1-1000)."""
|
|
324
|
+
# Below minimum
|
|
325
|
+
response = client.get("/v1/datasets?limit=0")
|
|
326
|
+
assert response.status_code == 422
|
|
327
|
+
|
|
328
|
+
# Above maximum
|
|
329
|
+
response = client.get("/v1/datasets?limit=1001")
|
|
330
|
+
assert response.status_code == 422
|
|
331
|
+
|
|
332
|
+
# At boundaries
|
|
333
|
+
response = client.get("/v1/datasets?limit=1")
|
|
334
|
+
assert response.status_code == 200
|
|
335
|
+
response = client.get("/v1/datasets?limit=1000")
|
|
336
|
+
assert response.status_code == 200
|
|
337
|
+
|
|
338
|
+
def test_list_offset_negative_rejected(self, client: TestClient) -> None:
|
|
339
|
+
"""List endpoint should reject negative offset."""
|
|
340
|
+
response = client.get("/v1/datasets?offset=-1")
|
|
341
|
+
assert response.status_code == 422
|
|
342
|
+
|
|
343
|
+
def test_preview_n_boundaries(self, client: TestClient) -> None:
|
|
344
|
+
"""Preview endpoint should enforce n bounds (1-1000)."""
|
|
345
|
+
# Below minimum
|
|
346
|
+
response = client.get("/v1/datasets/some-id/preview?n=0")
|
|
347
|
+
assert response.status_code == 422
|
|
348
|
+
|
|
349
|
+
# Above maximum
|
|
350
|
+
response = client.get("/v1/datasets/some-id/preview?n=1001")
|
|
351
|
+
assert response.status_code == 422
|
|
352
|
+
|
|
353
|
+
def test_filter_tags_match_pattern_enforcement(self, client: TestClient) -> None:
|
|
354
|
+
"""Filter endpoint should only accept 'any' or 'all' for tags_match."""
|
|
355
|
+
response = client.get("/v1/datasets/filter?tags_match=invalid")
|
|
356
|
+
assert response.status_code == 422
|
|
357
|
+
|
|
358
|
+
response = client.get("/v1/datasets/filter?tags_match=any")
|
|
359
|
+
assert response.status_code == 200
|
|
360
|
+
response = client.get("/v1/datasets/filter?tags_match=all")
|
|
361
|
+
assert response.status_code == 200
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
365
|
+
# TestResourceExhaustion
|
|
366
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
@pytest.mark.unit
|
|
370
|
+
class TestResourceExhaustion:
|
|
371
|
+
"""Tests for resource exhaustion protection."""
|
|
372
|
+
|
|
373
|
+
def test_very_large_n_points_rejected_by_pydantic(self) -> None:
|
|
374
|
+
"""Generators should reject unreasonably large point counts."""
|
|
375
|
+
with pytest.raises(ValidationError):
|
|
376
|
+
SpiralParams(n_points_per_spiral=10001)
|
|
377
|
+
|
|
378
|
+
def test_api_rejects_very_large_dataset_request(self, client: TestClient) -> None:
|
|
379
|
+
"""API should reject dataset generation requests with extreme parameters."""
|
|
380
|
+
response = client.post(
|
|
381
|
+
"/v1/datasets",
|
|
382
|
+
json={
|
|
383
|
+
"generator": "spiral",
|
|
384
|
+
"params": {"n_spirals": 10, "n_points_per_spiral": 10001},
|
|
385
|
+
},
|
|
386
|
+
)
|
|
387
|
+
assert response.status_code == 400
|
|
388
|
+
|
|
389
|
+
def test_batch_delete_max_enforcement(self, client: TestClient) -> None:
|
|
390
|
+
"""Batch delete should enforce maximum of 100 IDs per request."""
|
|
391
|
+
response = client.post(
|
|
392
|
+
"/v1/datasets/batch-delete",
|
|
393
|
+
json={"dataset_ids": [f"id-{i}" for i in range(101)]},
|
|
394
|
+
)
|
|
395
|
+
assert response.status_code == 422
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
399
|
+
# TestAPIBoundaries
|
|
400
|
+
# ═══════════════════════════════════════════════════════════════════════════════
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
@pytest.mark.unit
|
|
404
|
+
class TestAPIBoundaries:
|
|
405
|
+
"""Tests for API-level input handling and malformed request resilience."""
|
|
406
|
+
|
|
407
|
+
def test_malformed_json_body(self, client: TestClient) -> None:
|
|
408
|
+
"""API should return 422 for malformed JSON in request body."""
|
|
409
|
+
response = client.post(
|
|
410
|
+
"/v1/datasets",
|
|
411
|
+
content=b"not valid json{{{",
|
|
412
|
+
headers={"Content-Type": "application/json"},
|
|
413
|
+
)
|
|
414
|
+
assert response.status_code == 422
|
|
415
|
+
|
|
416
|
+
def test_missing_required_field(self, client: TestClient) -> None:
|
|
417
|
+
"""API should return 422 when required 'generator' field is missing."""
|
|
418
|
+
response = client.post("/v1/datasets", json={"params": {}})
|
|
419
|
+
assert response.status_code == 422
|
|
420
|
+
|
|
421
|
+
def test_wrong_type_for_generator(self, client: TestClient) -> None:
|
|
422
|
+
"""API should return 422 when generator is not a string."""
|
|
423
|
+
response = client.post("/v1/datasets", json={"generator": 12345})
|
|
424
|
+
# FastAPI will attempt coercion; integer may be cast to string
|
|
425
|
+
# The key check is that it doesn't crash
|
|
426
|
+
assert response.status_code in (201, 400, 422)
|
|
427
|
+
|
|
428
|
+
def test_extra_fields_ignored(self, client: TestClient) -> None:
|
|
429
|
+
"""API should handle unexpected extra fields gracefully."""
|
|
430
|
+
response = client.post(
|
|
431
|
+
"/v1/datasets",
|
|
432
|
+
json={
|
|
433
|
+
"generator": "spiral",
|
|
434
|
+
"params": {"n_spirals": 2, "n_points_per_spiral": 50, "seed": 42},
|
|
435
|
+
"persist": False,
|
|
436
|
+
"evil_field": "<script>alert('xss')</script>",
|
|
437
|
+
"__proto__": {"admin": True},
|
|
438
|
+
},
|
|
439
|
+
)
|
|
440
|
+
# Extra fields should be ignored, request should succeed
|
|
441
|
+
assert response.status_code == 201
|
|
442
|
+
|
|
443
|
+
def test_dataset_id_special_characters(self, client: TestClient) -> None:
|
|
444
|
+
"""API should handle dataset IDs with special characters without crashing."""
|
|
445
|
+
special_ids = [
|
|
446
|
+
"id with spaces",
|
|
447
|
+
"id<script>alert(1)</script>",
|
|
448
|
+
"id'; DROP TABLE datasets;--",
|
|
449
|
+
"a" * 1000, # very long ID
|
|
450
|
+
]
|
|
451
|
+
for special_id in special_ids:
|
|
452
|
+
response = client.get(f"/v1/datasets/{special_id}")
|
|
453
|
+
# Should return 404 (not found), not 500
|
|
454
|
+
assert response.status_code in (404, 422), (
|
|
455
|
+
f"Unexpected status for ID '{special_id[:50]}': {response.status_code}"
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
def test_dataset_id_non_printable_characters(self) -> None:
|
|
459
|
+
"""Dataset IDs with non-printable characters are rejected at HTTP level."""
|
|
460
|
+
import httpx
|
|
461
|
+
|
|
462
|
+
# Non-printable ASCII characters (tabs, newlines) are invalid in URLs
|
|
463
|
+
# and rejected by the HTTP client before reaching the API
|
|
464
|
+
with pytest.raises(httpx.InvalidURL):
|
|
465
|
+
httpx.URL("http://localhost/v1/datasets/id\t\n\r")
|
|
466
|
+
|
|
467
|
+
def test_tags_with_special_characters(self, client: TestClient) -> None:
|
|
468
|
+
"""Dataset creation with special characters in tags should not crash."""
|
|
469
|
+
response = client.post(
|
|
470
|
+
"/v1/datasets",
|
|
471
|
+
json={
|
|
472
|
+
"generator": "spiral",
|
|
473
|
+
"params": {"n_spirals": 2, "n_points_per_spiral": 50, "seed": 42},
|
|
474
|
+
"persist": False,
|
|
475
|
+
"tags": [
|
|
476
|
+
"normal-tag",
|
|
477
|
+
"<script>alert('xss')</script>",
|
|
478
|
+
"'; DROP TABLE datasets;--",
|
|
479
|
+
"a" * 500,
|
|
480
|
+
],
|
|
481
|
+
},
|
|
482
|
+
)
|
|
483
|
+
# Tags are stored as-is (no injection risk in JSON/Pydantic models)
|
|
484
|
+
assert response.status_code == 201
|
|
485
|
+
|
|
486
|
+
def test_empty_body_rejected(self, client: TestClient) -> None:
|
|
487
|
+
"""API should return 422 for empty body on POST."""
|
|
488
|
+
response = client.post(
|
|
489
|
+
"/v1/datasets",
|
|
490
|
+
content=b"",
|
|
491
|
+
headers={"Content-Type": "application/json"},
|
|
492
|
+
)
|
|
493
|
+
assert response.status_code == 422
|
|
494
|
+
|
|
495
|
+
def test_content_type_mismatch(self, client: TestClient) -> None:
|
|
496
|
+
"""API should handle wrong content type gracefully."""
|
|
497
|
+
response = client.post(
|
|
498
|
+
"/v1/datasets",
|
|
499
|
+
content=b"generator=spiral",
|
|
500
|
+
headers={"Content-Type": "application/x-www-form-urlencoded"},
|
|
501
|
+
)
|
|
502
|
+
assert response.status_code == 422
|
|
503
|
+
|
|
504
|
+
def test_generator_name_injection(self, client: TestClient) -> None:
|
|
505
|
+
"""Generator name with injection payloads should return 400."""
|
|
506
|
+
injection_names = [
|
|
507
|
+
"'; DROP TABLE generators;--",
|
|
508
|
+
"../generators/spiral",
|
|
509
|
+
"__import__('os').system('rm -rf /')",
|
|
510
|
+
]
|
|
511
|
+
for name in injection_names:
|
|
512
|
+
response = client.post(
|
|
513
|
+
"/v1/datasets",
|
|
514
|
+
json={"generator": name, "params": {}},
|
|
515
|
+
)
|
|
516
|
+
assert response.status_code == 400
|
|
517
|
+
assert "Unknown generator" in response.json()["detail"]
|