juniper-data 0.4.2__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {juniper_data-0.4.2 → juniper_data-0.6.0}/PKG-INFO +69 -22
  2. {juniper_data-0.4.2 → juniper_data-0.6.0}/README.md +63 -19
  3. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/__init__.py +1 -1
  4. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/app.py +47 -17
  5. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/middleware.py +48 -0
  6. juniper_data-0.6.0/juniper_data/api/models/__init__.py +1 -0
  7. juniper_data-0.6.0/juniper_data/api/models/health.py +26 -0
  8. juniper_data-0.6.0/juniper_data/api/observability.py +227 -0
  9. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/routes/datasets.py +228 -5
  10. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/routes/generators.py +8 -16
  11. juniper_data-0.6.0/juniper_data/api/routes/health.py +76 -0
  12. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/settings.py +49 -2
  13. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/core/models.py +83 -0
  14. juniper_data-0.6.0/juniper_data/core/secrets.py +25 -0
  15. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/core/split.py +3 -10
  16. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/arc_agi/generator.py +1 -4
  17. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/csv_import/generator.py +10 -17
  18. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/spiral/params.py +1 -3
  19. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/base.py +79 -5
  20. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/kaggle_store.py +1 -3
  21. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/postgres_store.py +88 -14
  22. juniper_data-0.6.0/juniper_data/tests/api/__init__.py +1 -0
  23. juniper_data-0.6.0/juniper_data/tests/api/test_batch_operations.py +449 -0
  24. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/integration/test_api.py +4 -2
  25. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/integration/test_lifecycle_api.py +4 -12
  26. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/integration/test_storage_workflow.py +1 -3
  27. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_api_app.py +8 -7
  28. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_api_routes.py +5 -5
  29. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_api_settings.py +5 -5
  30. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_arc_agi_generator.py +5 -15
  31. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_checkerboard_generator.py +1 -3
  32. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_csv_import_generator.py +71 -89
  33. juniper_data-0.6.0/juniper_data/tests/unit/test_dataset_versioning.py +663 -0
  34. juniper_data-0.6.0/juniper_data/tests/unit/test_health_enhanced.py +138 -0
  35. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_hf_store.py +7 -21
  36. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_main.py +6 -4
  37. juniper_data-0.6.0/juniper_data/tests/unit/test_observability.py +352 -0
  38. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_postgres_store.py +207 -6
  39. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_redis_store.py +3 -9
  40. juniper_data-0.6.0/juniper_data/tests/unit/test_secrets.py +85 -0
  41. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_security_boundaries.py +14 -21
  42. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_storage.py +26 -80
  43. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data.egg-info/PKG-INFO +69 -22
  44. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data.egg-info/SOURCES.txt +10 -0
  45. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data.egg-info/requires.txt +6 -2
  46. {juniper_data-0.4.2 → juniper_data-0.6.0}/pyproject.toml +13 -6
  47. juniper_data-0.4.2/juniper_data/api/routes/health.py +0 -49
  48. {juniper_data-0.4.2 → juniper_data-0.6.0}/LICENSE +0 -0
  49. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/__main__.py +0 -0
  50. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/__init__.py +0 -0
  51. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/routes/__init__.py +0 -0
  52. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/api/security.py +0 -0
  53. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/core/__init__.py +0 -0
  54. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/core/artifacts.py +0 -0
  55. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/core/dataset_id.py +0 -0
  56. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/__init__.py +0 -0
  57. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/arc_agi/__init__.py +0 -0
  58. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/arc_agi/params.py +0 -0
  59. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/checkerboard/__init__.py +0 -0
  60. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/checkerboard/generator.py +0 -0
  61. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/checkerboard/params.py +0 -0
  62. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/circles/__init__.py +0 -0
  63. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/circles/generator.py +0 -0
  64. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/circles/params.py +0 -0
  65. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/csv_import/__init__.py +0 -0
  66. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/csv_import/params.py +0 -0
  67. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/gaussian/__init__.py +0 -0
  68. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/gaussian/generator.py +0 -0
  69. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/gaussian/params.py +0 -0
  70. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/mnist/__init__.py +0 -0
  71. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/mnist/generator.py +0 -0
  72. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/mnist/params.py +0 -0
  73. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/spiral/__init__.py +0 -0
  74. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/spiral/defaults.py +0 -0
  75. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/spiral/generator.py +0 -0
  76. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/xor/__init__.py +0 -0
  77. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/xor/generator.py +0 -0
  78. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/generators/xor/params.py +0 -0
  79. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/__init__.py +0 -0
  80. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/cached.py +0 -0
  81. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/hf_store.py +0 -0
  82. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/local_fs.py +0 -0
  83. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/memory.py +0 -0
  84. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/storage/redis_store.py +0 -0
  85. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/__init__.py +0 -0
  86. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/conftest.py +0 -0
  87. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/fixtures/generate_golden_datasets.py +0 -0
  88. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/integration/__init__.py +0 -0
  89. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/integration/test_e2e_workflow.py +0 -0
  90. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/integration/test_security_integration.py +0 -0
  91. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/performance/__init__.py +0 -0
  92. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/performance/test_generator_benchmarks.py +0 -0
  93. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/performance/test_storage_benchmarks.py +0 -0
  94. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/__init__.py +0 -0
  95. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_artifacts.py +0 -0
  96. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_cached_store.py +0 -0
  97. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_circles_generator.py +0 -0
  98. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_dataset_id.py +0 -0
  99. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_gaussian_generator.py +0 -0
  100. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_init.py +0 -0
  101. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_kaggle_store.py +0 -0
  102. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_lifecycle.py +0 -0
  103. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_middleware.py +0 -0
  104. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_mnist_generator.py +0 -0
  105. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_security.py +0 -0
  106. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_spiral_generator.py +0 -0
  107. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_split.py +0 -0
  108. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data/tests/unit/test_xor_generator.py +0 -0
  109. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data.egg-info/dependency_links.txt +0 -0
  110. {juniper_data-0.4.2 → juniper_data-0.6.0}/juniper_data.egg-info/top_level.txt +0 -0
  111. {juniper_data-0.4.2 → juniper_data-0.6.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: juniper-data
3
- Version: 0.4.2
3
+ Version: 0.6.0
4
4
  Summary: Dataset generation and management service for the Juniper ecosystem
5
5
  Author: Paul Calnon
6
6
  License: MIT
@@ -25,14 +25,17 @@ Requires-Dist: pytest-benchmark>=4.0.0; extra == "test"
25
25
  Requires-Dist: httpx>=0.24.0; extra == "test"
26
26
  Requires-Dist: coverage[toml]>=7.0.0; extra == "test"
27
27
  Requires-Dist: juniper-data-client>=0.3.0; extra == "test"
28
+ Provides-Extra: observability
29
+ Requires-Dist: prometheus-client>=0.20.0; extra == "observability"
30
+ Requires-Dist: sentry-sdk[fastapi]>=2.0.0; extra == "observability"
28
31
  Provides-Extra: dev
29
32
  Requires-Dist: ruff>=0.9.0; extra == "dev"
30
33
  Requires-Dist: mypy>=1.0.0; extra == "dev"
31
- Requires-Dist: bandit[sarif]>=1.7.9; extra == "dev"
34
+ Requires-Dist: bandit[sarif]>=1.9.4; extra == "dev"
32
35
  Requires-Dist: pip-audit>=2.7.0; extra == "dev"
33
36
  Requires-Dist: pre-commit>=3.0.0; extra == "dev"
34
37
  Provides-Extra: all
35
- Requires-Dist: juniper-data[api,arc-agi,dev,test]; extra == "all"
38
+ Requires-Dist: juniper-data[api,arc-agi,dev,observability,test]; extra == "all"
36
39
  Dynamic: license-file
37
40
 
38
41
  # Juniper Data
@@ -52,7 +55,7 @@ Verified compatible versions:
52
55
  |---|---|---|---|---|---|
53
56
  | 0.4.x | 0.3.x | 0.2.x | >=0.3.1 | >=0.1.0 | >=0.1.0 |
54
57
 
55
- For full-stack Docker deployment and integration tests, see [juniper-deploy](https://github.com/pcalnon/juniper-deploy).
58
+ For full-stack Docker deployment and integration tests, see `juniper-deploy`.
56
59
 
57
60
  ## Architecture
58
61
 
@@ -94,10 +97,20 @@ JuniperData is the **foundational data layer** of the Juniper ecosystem. Juniper
94
97
 
95
98
  ```bash
96
99
  # Full stack with all three services:
97
- git clone https://github.com/pcalnon/juniper-deploy.git
100
+ git clone https://github.com/pcalnon/juniper-deploy.git # (private repository)
98
101
  cd juniper-deploy && docker compose up --build
99
102
  ```
100
103
 
104
+ ## Dependency Lockfile
105
+
106
+ The `requirements.lock` file pins exact dependency versions for reproducible Docker builds. The `pyproject.toml` retains flexible `>=` ranges for local development.
107
+
108
+ **Regenerate after changing dependencies in `pyproject.toml`:**
109
+
110
+ ```bash
111
+ uv pip compile pyproject.toml --extra api --extra observability -o requirements.lock
112
+ ```
113
+
101
114
  ## Installation
102
115
 
103
116
  ### Basic Installation
@@ -143,28 +156,62 @@ uvicorn juniper_data.api.app:app --reload
143
156
 
144
157
  ## API Endpoints
145
158
 
146
- | Endpoint | Method | Description |
147
- | ------------------------------- | ------ | ---------------------------------- |
148
- | `/v1/health` | GET | Health check endpoint |
149
- | `/v1/datasets` | GET | List available datasets |
150
- | `/v1/datasets/{id}` | GET | Get a specific dataset |
151
- | `/v1/generators/spiral` | POST | Generate a new spiral dataset |
152
- | `/v1/generators/spiral/config` | GET | Get spiral generator configuration |
159
+ | Endpoint | Method | Description |
160
+ | ------------------------------------- | ------ | ---------------------------------------------------- |
161
+ | `/v1/health` | GET | Health check |
162
+ | `/v1/health/live` | GET | Liveness probe |
163
+ | `/v1/health/ready` | GET | Readiness probe (checks storage) |
164
+ | `/v1/generators` | GET | List all generators with schemas |
165
+ | `/v1/generators/{name}/schema` | GET | Get parameter schema for a generator |
166
+ | `/v1/datasets` | POST | Create dataset (or return cached dataset) |
167
+ | `/v1/datasets` | GET | List dataset IDs |
168
+ | `/v1/datasets/filter` | GET | Filter metadata by generator/tags/date/name/version |
169
+ | `/v1/datasets/stats` | GET | Aggregate dataset statistics |
170
+ | `/v1/datasets/versions` | GET | List all versions for a logical dataset name |
171
+ | `/v1/datasets/latest` | GET | Get latest version for a logical dataset name |
172
+ | `/v1/datasets/batch-create` | POST | Create multiple datasets |
173
+ | `/v1/datasets/batch-delete` | POST | Delete multiple datasets |
174
+ | `/v1/datasets/batch-tags` | PATCH | Update tags on multiple datasets |
175
+ | `/v1/datasets/batch-export` | POST | Export multiple datasets as ZIP |
176
+ | `/v1/datasets/cleanup-expired` | POST | Delete expired datasets |
177
+ | `/v1/datasets/{id}` | GET | Get dataset metadata |
178
+ | `/v1/datasets/{id}` | DELETE | Delete a dataset |
179
+ | `/v1/datasets/{id}/artifact` | GET | Download NPZ artifact |
180
+ | `/v1/datasets/{id}/preview` | GET | Preview first N samples as JSON |
181
+ | `/v1/datasets/{id}/tags` | PATCH | Add/remove tags on one dataset |
182
+
183
+ See [docs/api/JUNIPER_DATA_API.md](docs/api/JUNIPER_DATA_API.md) for full endpoint documentation including filtering, batch operations, and tagging.
184
+
185
+ ### Named Dataset Versioning
186
+
187
+ `POST /v1/datasets` supports logical names for versioned datasets:
188
+
189
+ - Set `name` to group related datasets into a version series.
190
+ - Persisted creates with the same `name` auto-increment `meta.dataset_version` (`1`, `2`, `3`, ...).
191
+ - Repeating an identical request returns the cached dataset and keeps its existing version.
192
+ - Use `GET /v1/datasets/versions?name=<dataset_name>` to view history and `GET /v1/datasets/latest?name=<dataset_name>` to resolve the latest.
153
193
 
154
194
  ## Project Structure
155
195
 
156
196
  ```bash
157
- JuniperData/
197
+ juniper-data/
158
198
  ├── juniper_data/
159
199
  │ ├── core/ # Core functionality and base classes
160
- │ ├── generators/ # Dataset generators
161
- │ │ └── spiral/ # Spiral dataset generator
200
+ │ ├── generators/ # Dataset generators (8 types)
201
+ │ │ ├── spiral/ # Multi-spiral classification
202
+ │ │ ├── xor/ # XOR classification
203
+ │ │ ├── gaussian/ # Mixture of Gaussians
204
+ │ │ ├── circles/ # Concentric circles
205
+ │ │ ├── checkerboard/ # 2D checkerboard pattern
206
+ │ │ ├── csv_import/ # CSV/JSON file import
207
+ │ │ ├── mnist/ # MNIST / Fashion-MNIST
208
+ │ │ └── arc_agi/ # ARC-AGI visual reasoning
162
209
  │ ├── storage/ # Dataset persistence layer
163
- └── api/ # FastAPI application
164
- └── routes/ # API route handlers
165
- ├── tests/
166
- ├── unit/ # Unit tests
167
- └── integration/ # Integration tests
210
+ ├── api/ # FastAPI application
211
+ └── routes/ # API route handlers
212
+ │ └── tests/ # Test suite
213
+ ├── unit/ # Unit tests
214
+ └── integration/ # Integration tests
168
215
  ├── pyproject.toml # Project configuration
169
216
  └── README.md # This file
170
217
  ```
@@ -186,8 +233,8 @@ pytest --cov=juniper_data --cov-report=html
186
233
  ### Code Formatting
187
234
 
188
235
  ```bash
189
- black juniper_data tests
190
- isort juniper_data tests
236
+ ruff format juniper_data tests
237
+ ruff check --fix juniper_data tests
191
238
  ```
192
239
 
193
240
  ### Type Checking
@@ -15,7 +15,7 @@ Verified compatible versions:
15
15
  |---|---|---|---|---|---|
16
16
  | 0.4.x | 0.3.x | 0.2.x | >=0.3.1 | >=0.1.0 | >=0.1.0 |
17
17
 
18
- For full-stack Docker deployment and integration tests, see [juniper-deploy](https://github.com/pcalnon/juniper-deploy).
18
+ For full-stack Docker deployment and integration tests, see `juniper-deploy`.
19
19
 
20
20
  ## Architecture
21
21
 
@@ -57,10 +57,20 @@ JuniperData is the **foundational data layer** of the Juniper ecosystem. Juniper
57
57
 
58
58
  ```bash
59
59
  # Full stack with all three services:
60
- git clone https://github.com/pcalnon/juniper-deploy.git
60
+ git clone https://github.com/pcalnon/juniper-deploy.git # (private repository)
61
61
  cd juniper-deploy && docker compose up --build
62
62
  ```
63
63
 
64
+ ## Dependency Lockfile
65
+
66
+ The `requirements.lock` file pins exact dependency versions for reproducible Docker builds. The `pyproject.toml` retains flexible `>=` ranges for local development.
67
+
68
+ **Regenerate after changing dependencies in `pyproject.toml`:**
69
+
70
+ ```bash
71
+ uv pip compile pyproject.toml --extra api --extra observability -o requirements.lock
72
+ ```
73
+
64
74
  ## Installation
65
75
 
66
76
  ### Basic Installation
@@ -106,28 +116,62 @@ uvicorn juniper_data.api.app:app --reload
106
116
 
107
117
  ## API Endpoints
108
118
 
109
- | Endpoint | Method | Description |
110
- | ------------------------------- | ------ | ---------------------------------- |
111
- | `/v1/health` | GET | Health check endpoint |
112
- | `/v1/datasets` | GET | List available datasets |
113
- | `/v1/datasets/{id}` | GET | Get a specific dataset |
114
- | `/v1/generators/spiral` | POST | Generate a new spiral dataset |
115
- | `/v1/generators/spiral/config` | GET | Get spiral generator configuration |
119
+ | Endpoint | Method | Description |
120
+ | ------------------------------------- | ------ | ---------------------------------------------------- |
121
+ | `/v1/health` | GET | Health check |
122
+ | `/v1/health/live` | GET | Liveness probe |
123
+ | `/v1/health/ready` | GET | Readiness probe (checks storage) |
124
+ | `/v1/generators` | GET | List all generators with schemas |
125
+ | `/v1/generators/{name}/schema` | GET | Get parameter schema for a generator |
126
+ | `/v1/datasets` | POST | Create dataset (or return cached dataset) |
127
+ | `/v1/datasets` | GET | List dataset IDs |
128
+ | `/v1/datasets/filter` | GET | Filter metadata by generator/tags/date/name/version |
129
+ | `/v1/datasets/stats` | GET | Aggregate dataset statistics |
130
+ | `/v1/datasets/versions` | GET | List all versions for a logical dataset name |
131
+ | `/v1/datasets/latest` | GET | Get latest version for a logical dataset name |
132
+ | `/v1/datasets/batch-create` | POST | Create multiple datasets |
133
+ | `/v1/datasets/batch-delete` | POST | Delete multiple datasets |
134
+ | `/v1/datasets/batch-tags` | PATCH | Update tags on multiple datasets |
135
+ | `/v1/datasets/batch-export` | POST | Export multiple datasets as ZIP |
136
+ | `/v1/datasets/cleanup-expired` | POST | Delete expired datasets |
137
+ | `/v1/datasets/{id}` | GET | Get dataset metadata |
138
+ | `/v1/datasets/{id}` | DELETE | Delete a dataset |
139
+ | `/v1/datasets/{id}/artifact` | GET | Download NPZ artifact |
140
+ | `/v1/datasets/{id}/preview` | GET | Preview first N samples as JSON |
141
+ | `/v1/datasets/{id}/tags` | PATCH | Add/remove tags on one dataset |
142
+
143
+ See [docs/api/JUNIPER_DATA_API.md](docs/api/JUNIPER_DATA_API.md) for full endpoint documentation including filtering, batch operations, and tagging.
144
+
145
+ ### Named Dataset Versioning
146
+
147
+ `POST /v1/datasets` supports logical names for versioned datasets:
148
+
149
+ - Set `name` to group related datasets into a version series.
150
+ - Persisted creates with the same `name` auto-increment `meta.dataset_version` (`1`, `2`, `3`, ...).
151
+ - Repeating an identical request returns the cached dataset and keeps its existing version.
152
+ - Use `GET /v1/datasets/versions?name=<dataset_name>` to view history and `GET /v1/datasets/latest?name=<dataset_name>` to resolve the latest.
116
153
 
117
154
  ## Project Structure
118
155
 
119
156
  ```bash
120
- JuniperData/
157
+ juniper-data/
121
158
  ├── juniper_data/
122
159
  │ ├── core/ # Core functionality and base classes
123
- │ ├── generators/ # Dataset generators
124
- │ │ └── spiral/ # Spiral dataset generator
160
+ │ ├── generators/ # Dataset generators (8 types)
161
+ │ │ ├── spiral/ # Multi-spiral classification
162
+ │ │ ├── xor/ # XOR classification
163
+ │ │ ├── gaussian/ # Mixture of Gaussians
164
+ │ │ ├── circles/ # Concentric circles
165
+ │ │ ├── checkerboard/ # 2D checkerboard pattern
166
+ │ │ ├── csv_import/ # CSV/JSON file import
167
+ │ │ ├── mnist/ # MNIST / Fashion-MNIST
168
+ │ │ └── arc_agi/ # ARC-AGI visual reasoning
125
169
  │ ├── storage/ # Dataset persistence layer
126
- └── api/ # FastAPI application
127
- └── routes/ # API route handlers
128
- ├── tests/
129
- ├── unit/ # Unit tests
130
- └── integration/ # Integration tests
170
+ ├── api/ # FastAPI application
171
+ └── routes/ # API route handlers
172
+ │ └── tests/ # Test suite
173
+ ├── unit/ # Unit tests
174
+ └── integration/ # Integration tests
131
175
  ├── pyproject.toml # Project configuration
132
176
  └── README.md # This file
133
177
  ```
@@ -149,8 +193,8 @@ pytest --cov=juniper_data --cov-report=html
149
193
  ### Code Formatting
150
194
 
151
195
  ```bash
152
- black juniper_data tests
153
- isort juniper_data tests
196
+ ruff format juniper_data tests
197
+ ruff check --fix juniper_data tests
154
198
  ```
155
199
 
156
200
  ### Type Checking
@@ -14,7 +14,7 @@ except ImportError:
14
14
  ARC_AGI_AVAILABLE = False
15
15
  arc_agi = None # type: ignore[assignment]
16
16
 
17
- __version__ = "0.4.2"
17
+ __version__ = "0.6.0"
18
18
  __author__ = "Paul Calnon"
19
19
 
20
20
 
@@ -12,7 +12,15 @@ from fastapi.responses import JSONResponse
12
12
  from juniper_data import __version__
13
13
  from juniper_data.storage import LocalFSDatasetStore
14
14
 
15
- from .middleware import SecurityMiddleware
15
+ from .middleware import RequestBodyLimitMiddleware, SecurityHeadersMiddleware, SecurityMiddleware
16
+ from .observability import (
17
+ PrometheusMiddleware,
18
+ RequestIdMiddleware,
19
+ configure_logging,
20
+ configure_sentry,
21
+ get_prometheus_app,
22
+ set_build_info,
23
+ )
16
24
  from .routes import datasets, generators, health
17
25
  from .security import APIKeyAuth, RateLimiter
18
26
  from .settings import Settings, get_settings
@@ -26,10 +34,11 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
26
34
  store = LocalFSDatasetStore(storage_path)
27
35
  datasets.set_store(store)
28
36
 
29
- logging.basicConfig(
30
- level=getattr(logging, settings.log_level.upper(), logging.INFO),
31
- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
32
- )
37
+ configure_logging(settings.log_level, settings.log_format, "juniper-data")
38
+ configure_sentry(settings.sentry_dsn, "juniper-data", __version__, send_pii=settings.sentry_send_pii, traces_sample_rate=settings.sentry_traces_sample_rate)
39
+ if settings.metrics_enabled:
40
+ set_build_info("juniper_data", __version__)
41
+
33
42
  logger = logging.getLogger("juniper_data")
34
43
  logger.info(f"JuniperData API v{__version__} starting")
35
44
  logger.info(f"Storage path: {storage_path.absolute()}")
@@ -52,28 +61,37 @@ def create_app(settings: Settings | None = None) -> FastAPI:
52
61
  if settings is None:
53
62
  settings = get_settings()
54
63
 
64
+ # Disable interactive API docs when authentication is enabled (production).
65
+ docs_enabled = not settings.api_keys
55
66
  app = FastAPI(
56
67
  title="Juniper Data API",
57
68
  description="Dataset generation and management service for the Juniper ecosystem",
58
69
  version=__version__,
59
70
  lifespan=lifespan,
71
+ docs_url="/docs" if docs_enabled else None,
72
+ redoc_url="/redoc" if docs_enabled else None,
73
+ openapi_url="/openapi.json" if docs_enabled else None,
60
74
  )
61
75
 
62
76
  app.state.settings = settings
63
77
 
64
- # Only allow credentialed CORS requests when origins are explicitly specified.
65
- # Browsers do not permit Access-Control-Allow-Credentials: true with a wildcard
66
- # origin (Access-Control-Allow-Origin: "*"), so the default ["*"] intentionally
67
- # disables credentials unless concrete origins are configured.
78
+ # CORS: only enable when origins are explicitly configured.
68
79
  allow_credentials = bool(settings.cors_origins) and "*" not in settings.cors_origins
69
80
 
70
- app.add_middleware(
71
- CORSMiddleware,
72
- allow_origins=settings.cors_origins,
73
- allow_credentials=allow_credentials,
74
- allow_methods=["*"],
75
- allow_headers=["*"],
76
- )
81
+ if settings.cors_origins:
82
+ app.add_middleware(
83
+ CORSMiddleware,
84
+ allow_origins=settings.cors_origins,
85
+ allow_credentials=allow_credentials,
86
+ allow_methods=["*"],
87
+ allow_headers=["*"],
88
+ )
89
+
90
+ # Request body size limit
91
+ app.add_middleware(RequestBodyLimitMiddleware)
92
+
93
+ # Security headers (outermost — runs on every response)
94
+ app.add_middleware(SecurityHeadersMiddleware)
77
95
 
78
96
  api_key_auth = APIKeyAuth(settings.api_keys)
79
97
  rate_limiter = RateLimiter(
@@ -86,15 +104,27 @@ def create_app(settings: Settings | None = None) -> FastAPI:
86
104
  rate_limiter=rate_limiter,
87
105
  )
88
106
 
107
+ # Observability middleware (added after SecurityMiddleware, before CORS)
108
+ # Middleware execution is LIFO: last added runs first.
109
+ # Order: RequestIdMiddleware → PrometheusMiddleware → SecurityMiddleware → SecurityHeaders → CORS
110
+ if settings.metrics_enabled:
111
+ app.add_middleware(PrometheusMiddleware, service_name="juniper-data", namespace="juniper_data")
112
+ app.add_middleware(RequestIdMiddleware)
113
+
89
114
  app.include_router(health.router, prefix="/v1")
90
115
  app.include_router(generators.router, prefix="/v1")
91
116
  app.include_router(datasets.router, prefix="/v1")
92
117
 
118
+ # Mount Prometheus metrics endpoint
119
+ if settings.metrics_enabled:
120
+ app.mount("/metrics", get_prometheus_app())
121
+
93
122
  @app.exception_handler(ValueError)
94
123
  async def value_error_handler(request: Request, exc: ValueError) -> JSONResponse:
124
+ logging.getLogger("juniper_data").debug("Validation error: %s", exc)
95
125
  return JSONResponse(
96
126
  status_code=400,
97
- content={"detail": str(exc)},
127
+ content={"detail": "Invalid request parameters"},
98
128
  )
99
129
 
100
130
  @app.exception_handler(Exception)
@@ -16,6 +16,54 @@ EXEMPT_PATHS = {
16
16
  "/redoc",
17
17
  }
18
18
 
19
+ # Default Content-Security-Policy for API-only services.
20
+ _DEFAULT_CSP = "default-src 'none'; frame-ancestors 'none'"
21
+
22
+
23
+ class SecurityHeadersMiddleware(BaseHTTPMiddleware):
24
+ """Add security headers to all responses.
25
+
26
+ Injects standard security headers (X-Content-Type-Options, X-Frame-Options,
27
+ Referrer-Policy, Permissions-Policy, CSP, and conditional HSTS) into every
28
+ HTTP response.
29
+ """
30
+
31
+ def __init__(self, app: ASGIApp, content_security_policy: str = _DEFAULT_CSP) -> None:
32
+ super().__init__(app)
33
+ self._csp = content_security_policy
34
+
35
+ async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
36
+ response = await call_next(request)
37
+
38
+ response.headers["X-Content-Type-Options"] = "nosniff"
39
+ response.headers["X-Frame-Options"] = "DENY"
40
+ response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
41
+ response.headers["Permissions-Policy"] = "camera=(), microphone=(), geolocation=()"
42
+ response.headers["Content-Security-Policy"] = self._csp
43
+
44
+ # Only add HSTS when the request arrived over TLS (via reverse proxy)
45
+ if request.headers.get("X-Forwarded-Proto") == "https":
46
+ response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
47
+
48
+ return response
49
+
50
+
51
+ _MAX_REQUEST_BODY_BYTES = 10 * 1024 * 1024 # 10 MB
52
+
53
+
54
+ class RequestBodyLimitMiddleware(BaseHTTPMiddleware):
55
+ """Reject requests whose Content-Length exceeds a configurable limit."""
56
+
57
+ def __init__(self, app: ASGIApp, max_bytes: int = _MAX_REQUEST_BODY_BYTES) -> None:
58
+ super().__init__(app)
59
+ self._max_bytes = max_bytes
60
+
61
+ async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response:
62
+ content_length = request.headers.get("content-length")
63
+ if content_length is not None and int(content_length) > self._max_bytes:
64
+ return JSONResponse(status_code=413, content={"detail": "Request body too large"})
65
+ return await call_next(request)
66
+
19
67
 
20
68
  class SecurityMiddleware(BaseHTTPMiddleware):
21
69
  """Middleware for API key authentication and rate limiting.
@@ -0,0 +1 @@
1
+ """API response models."""
@@ -0,0 +1,26 @@
1
+ """Health check response models for standardized readiness reporting."""
2
+
3
+ from datetime import datetime
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class DependencyStatus(BaseModel):
10
+ """Health status of a single dependency."""
11
+
12
+ name: str
13
+ status: Literal["healthy", "unhealthy", "degraded", "not_configured"]
14
+ latency_ms: float | None = None
15
+ message: str | None = None
16
+
17
+
18
+ class ReadinessResponse(BaseModel):
19
+ """Standard /v1/health/ready response for all Juniper services."""
20
+
21
+ status: Literal["ready", "degraded", "not_ready"]
22
+ version: str
23
+ service: str
24
+ timestamp: float = Field(default_factory=lambda: datetime.now().timestamp())
25
+ dependencies: dict[str, DependencyStatus] = {}
26
+ details: dict[str, object] = {}