juniper-data 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. juniper_data-0.4.2/LICENSE +9 -0
  2. juniper_data-0.4.2/PKG-INFO +216 -0
  3. juniper_data-0.4.2/README.md +179 -0
  4. juniper_data-0.4.2/juniper_data/__init__.py +88 -0
  5. juniper_data-0.4.2/juniper_data/__main__.py +78 -0
  6. juniper_data-0.4.2/juniper_data/api/__init__.py +10 -0
  7. juniper_data-0.4.2/juniper_data/api/app.py +111 -0
  8. juniper_data-0.4.2/juniper_data/api/middleware.py +95 -0
  9. juniper_data-0.4.2/juniper_data/api/routes/__init__.py +9 -0
  10. juniper_data-0.4.2/juniper_data/api/routes/datasets.py +414 -0
  11. juniper_data-0.4.2/juniper_data/api/routes/generators.py +125 -0
  12. juniper_data-0.4.2/juniper_data/api/routes/health.py +49 -0
  13. juniper_data-0.4.2/juniper_data/api/security.py +238 -0
  14. juniper_data-0.4.2/juniper_data/api/settings.py +109 -0
  15. juniper_data-0.4.2/juniper_data/core/__init__.py +32 -0
  16. juniper_data-0.4.2/juniper_data/core/artifacts.py +63 -0
  17. juniper_data-0.4.2/juniper_data/core/dataset_id.py +38 -0
  18. juniper_data-0.4.2/juniper_data/core/models.py +135 -0
  19. juniper_data-0.4.2/juniper_data/core/split.py +120 -0
  20. juniper_data-0.4.2/juniper_data/generators/__init__.py +15 -0
  21. juniper_data-0.4.2/juniper_data/generators/arc_agi/__init__.py +11 -0
  22. juniper_data-0.4.2/juniper_data/generators/arc_agi/generator.py +229 -0
  23. juniper_data-0.4.2/juniper_data/generators/arc_agi/params.py +56 -0
  24. juniper_data-0.4.2/juniper_data/generators/checkerboard/__init__.py +15 -0
  25. juniper_data-0.4.2/juniper_data/generators/checkerboard/generator.py +114 -0
  26. juniper_data-0.4.2/juniper_data/generators/checkerboard/params.py +32 -0
  27. juniper_data-0.4.2/juniper_data/generators/circles/__init__.py +11 -0
  28. juniper_data-0.4.2/juniper_data/generators/circles/generator.py +112 -0
  29. juniper_data-0.4.2/juniper_data/generators/circles/params.py +31 -0
  30. juniper_data-0.4.2/juniper_data/generators/csv_import/__init__.py +15 -0
  31. juniper_data-0.4.2/juniper_data/generators/csv_import/generator.py +198 -0
  32. juniper_data-0.4.2/juniper_data/generators/csv_import/params.py +48 -0
  33. juniper_data-0.4.2/juniper_data/generators/gaussian/__init__.py +11 -0
  34. juniper_data-0.4.2/juniper_data/generators/gaussian/generator.py +149 -0
  35. juniper_data-0.4.2/juniper_data/generators/gaussian/params.py +53 -0
  36. juniper_data-0.4.2/juniper_data/generators/mnist/__init__.py +11 -0
  37. juniper_data-0.4.2/juniper_data/generators/mnist/generator.py +124 -0
  38. juniper_data-0.4.2/juniper_data/generators/mnist/params.py +39 -0
  39. juniper_data-0.4.2/juniper_data/generators/spiral/__init__.py +57 -0
  40. juniper_data-0.4.2/juniper_data/generators/spiral/defaults.py +39 -0
  41. juniper_data-0.4.2/juniper_data/generators/spiral/generator.py +206 -0
  42. juniper_data-0.4.2/juniper_data/generators/spiral/params.py +148 -0
  43. juniper_data-0.4.2/juniper_data/generators/xor/__init__.py +11 -0
  44. juniper_data-0.4.2/juniper_data/generators/xor/generator.py +162 -0
  45. juniper_data-0.4.2/juniper_data/generators/xor/params.py +30 -0
  46. juniper_data-0.4.2/juniper_data/storage/__init__.py +120 -0
  47. juniper_data-0.4.2/juniper_data/storage/base.py +279 -0
  48. juniper_data-0.4.2/juniper_data/storage/cached.py +211 -0
  49. juniper_data-0.4.2/juniper_data/storage/hf_store.py +257 -0
  50. juniper_data-0.4.2/juniper_data/storage/kaggle_store.py +333 -0
  51. juniper_data-0.4.2/juniper_data/storage/local_fs.py +232 -0
  52. juniper_data-0.4.2/juniper_data/storage/memory.py +136 -0
  53. juniper_data-0.4.2/juniper_data/storage/postgres_store.py +373 -0
  54. juniper_data-0.4.2/juniper_data/storage/redis_store.py +264 -0
  55. juniper_data-0.4.2/juniper_data/tests/__init__.py +1 -0
  56. juniper_data-0.4.2/juniper_data/tests/conftest.py +68 -0
  57. juniper_data-0.4.2/juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
  58. juniper_data-0.4.2/juniper_data/tests/integration/__init__.py +1 -0
  59. juniper_data-0.4.2/juniper_data/tests/integration/test_api.py +283 -0
  60. juniper_data-0.4.2/juniper_data/tests/integration/test_e2e_workflow.py +378 -0
  61. juniper_data-0.4.2/juniper_data/tests/integration/test_lifecycle_api.py +304 -0
  62. juniper_data-0.4.2/juniper_data/tests/integration/test_security_integration.py +189 -0
  63. juniper_data-0.4.2/juniper_data/tests/integration/test_storage_workflow.py +259 -0
  64. juniper_data-0.4.2/juniper_data/tests/performance/__init__.py +1 -0
  65. juniper_data-0.4.2/juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
  66. juniper_data-0.4.2/juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
  67. juniper_data-0.4.2/juniper_data/tests/unit/__init__.py +1 -0
  68. juniper_data-0.4.2/juniper_data/tests/unit/test_api_app.py +206 -0
  69. juniper_data-0.4.2/juniper_data/tests/unit/test_api_routes.py +407 -0
  70. juniper_data-0.4.2/juniper_data/tests/unit/test_api_settings.py +100 -0
  71. juniper_data-0.4.2/juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
  72. juniper_data-0.4.2/juniper_data/tests/unit/test_artifacts.py +145 -0
  73. juniper_data-0.4.2/juniper_data/tests/unit/test_cached_store.py +423 -0
  74. juniper_data-0.4.2/juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
  75. juniper_data-0.4.2/juniper_data/tests/unit/test_circles_generator.py +256 -0
  76. juniper_data-0.4.2/juniper_data/tests/unit/test_csv_import_generator.py +345 -0
  77. juniper_data-0.4.2/juniper_data/tests/unit/test_dataset_id.py +181 -0
  78. juniper_data-0.4.2/juniper_data/tests/unit/test_gaussian_generator.py +333 -0
  79. juniper_data-0.4.2/juniper_data/tests/unit/test_hf_store.py +416 -0
  80. juniper_data-0.4.2/juniper_data/tests/unit/test_init.py +93 -0
  81. juniper_data-0.4.2/juniper_data/tests/unit/test_kaggle_store.py +469 -0
  82. juniper_data-0.4.2/juniper_data/tests/unit/test_lifecycle.py +394 -0
  83. juniper_data-0.4.2/juniper_data/tests/unit/test_main.py +127 -0
  84. juniper_data-0.4.2/juniper_data/tests/unit/test_middleware.py +79 -0
  85. juniper_data-0.4.2/juniper_data/tests/unit/test_mnist_generator.py +370 -0
  86. juniper_data-0.4.2/juniper_data/tests/unit/test_postgres_store.py +490 -0
  87. juniper_data-0.4.2/juniper_data/tests/unit/test_redis_store.py +500 -0
  88. juniper_data-0.4.2/juniper_data/tests/unit/test_security.py +281 -0
  89. juniper_data-0.4.2/juniper_data/tests/unit/test_security_boundaries.py +517 -0
  90. juniper_data-0.4.2/juniper_data/tests/unit/test_spiral_generator.py +566 -0
  91. juniper_data-0.4.2/juniper_data/tests/unit/test_split.py +245 -0
  92. juniper_data-0.4.2/juniper_data/tests/unit/test_storage.py +767 -0
  93. juniper_data-0.4.2/juniper_data/tests/unit/test_xor_generator.py +223 -0
  94. juniper_data-0.4.2/juniper_data.egg-info/PKG-INFO +216 -0
  95. juniper_data-0.4.2/juniper_data.egg-info/SOURCES.txt +98 -0
  96. juniper_data-0.4.2/juniper_data.egg-info/dependency_links.txt +1 -0
  97. juniper_data-0.4.2/juniper_data.egg-info/requires.txt +31 -0
  98. juniper_data-0.4.2/juniper_data.egg-info/top_level.txt +1 -0
  99. juniper_data-0.4.2/pyproject.toml +224 -0
  100. juniper_data-0.4.2/setup.cfg +4 -0
@@ -0,0 +1,9 @@
1
+ # Juniper Canopy project
2
+
3
+ Copyright 2024, 2025 Paul Calnon
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,216 @@
1
+ Metadata-Version: 2.4
2
+ Name: juniper-data
3
+ Version: 0.4.2
4
+ Summary: Dataset generation and management service for the Juniper ecosystem
5
+ Author: Paul Calnon
6
+ License: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: numpy>=1.24.0
11
+ Requires-Dist: pydantic>=2.0.0
12
+ Requires-Dist: python-dotenv>=1.0.0
13
+ Provides-Extra: arc-agi
14
+ Requires-Dist: arc-agi>=0.9.0; extra == "arc-agi"
15
+ Provides-Extra: api
16
+ Requires-Dist: fastapi>=0.100.0; extra == "api"
17
+ Requires-Dist: uvicorn[standard]>=0.23.0; extra == "api"
18
+ Requires-Dist: pydantic-settings>=2.0.0; extra == "api"
19
+ Provides-Extra: test
20
+ Requires-Dist: pytest>=7.0.0; extra == "test"
21
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
22
+ Requires-Dist: pytest-timeout>=2.2.0; extra == "test"
23
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
24
+ Requires-Dist: pytest-benchmark>=4.0.0; extra == "test"
25
+ Requires-Dist: httpx>=0.24.0; extra == "test"
26
+ Requires-Dist: coverage[toml]>=7.0.0; extra == "test"
27
+ Requires-Dist: juniper-data-client>=0.3.0; extra == "test"
28
+ Provides-Extra: dev
29
+ Requires-Dist: ruff>=0.9.0; extra == "dev"
30
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
31
+ Requires-Dist: bandit[sarif]>=1.7.9; extra == "dev"
32
+ Requires-Dist: pip-audit>=2.7.0; extra == "dev"
33
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
34
+ Provides-Extra: all
35
+ Requires-Dist: juniper-data[api,arc-agi,dev,test]; extra == "all"
36
+ Dynamic: license-file
37
+
38
+ # Juniper Data
39
+
40
+ Dataset generation and management service for the Juniper ecosystem.
41
+
42
+ ## Overview
43
+
44
+ Juniper Data provides a centralized service for generating, storing, and serving datasets used by the Juniper neural network projects. It supports various dataset types including the classic two-spiral classification problem.
45
+
46
+ ## Ecosystem Compatibility
47
+
48
+ This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
49
+ Verified compatible versions:
50
+
51
+ | juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
52
+ |---|---|---|---|---|---|
53
+ | 0.4.x | 0.3.x | 0.2.x | >=0.3.1 | >=0.1.0 | >=0.1.0 |
54
+
55
+ For full-stack Docker deployment and integration tests, see [juniper-deploy](https://github.com/pcalnon/juniper-deploy).
56
+
57
+ ## Architecture
58
+
59
+ JuniperData is the **foundational data layer** of the Juniper ecosystem. JuniperCascor and juniper-canopy both call JuniperData to generate and retrieve datasets.
60
+
61
+ ```
62
+ ┌─────────────────────┐ REST+WS ┌──────────────────────┐
63
+ │ juniper-canopy │ ◄──────────────► │ JuniperCascor │
64
+ │ Dashboard │ │ Training Svc │
65
+ │ Port 8050 │ │ Port 8200 │
66
+ └──────────┬──────────┘ └──────────┬───────────┘
67
+ │ REST │ REST
68
+ ▼ ▼
69
+ ┌──────────────────────────────────────────────────────────────┐
70
+ │ JuniperData ◄── (this service) │
71
+ │ Dataset Service · Port 8100 │
72
+ └──────────────────────────────────────────────────────────────┘
73
+ ```
74
+
75
+ **Data contract**: datasets are served as NPZ archives with keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full` (all `float32`).
76
+
77
+ ## Related Services
78
+
79
+ | Service | Relationship | Environment Variable |
80
+ |---------|-------------|---------------------|
81
+ | [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes JuniperData for training datasets | `JUNIPER_DATA_URL=http://localhost:8100` |
82
+ | [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes JuniperData for visualization data | `JUNIPER_DATA_URL=http://localhost:8100` |
83
+ | [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI client library for this service | `pip install juniper-data-client` |
84
+
85
+ ### Service Configuration
86
+
87
+ | Variable | Default | Description |
88
+ |----------|---------|-------------|
89
+ | `JUNIPER_DATA_HOST` | `0.0.0.0` | Listen address |
90
+ | `JUNIPER_DATA_PORT` | `8100` | Service port |
91
+ | `JUNIPER_DATA_LOG_LEVEL` | `INFO` | Log verbosity |
92
+
93
+ ### Docker Deployment
94
+
95
+ ```bash
96
+ # Full stack with all three services:
97
+ git clone https://github.com/pcalnon/juniper-deploy.git
98
+ cd juniper-deploy && docker compose up --build
99
+ ```
100
+
101
+ ## Installation
102
+
103
+ ### Basic Installation
104
+
105
+ ```bash
106
+ pip install -e .
107
+ ```
108
+
109
+ ### With API Support
110
+
111
+ ```bash
112
+ pip install -e ".[api]"
113
+ ```
114
+
115
+ ### Development Installation
116
+
117
+ ```bash
118
+ pip install -e ".[dev]"
119
+ ```
120
+
121
+ ### Full Installation
122
+
123
+ ```bash
124
+ pip install -e ".[all]"
125
+ ```
126
+
127
+ ## Quick Start
128
+
129
+ ### Generate a Spiral Dataset
130
+
131
+ ```python
132
+ from juniper_data.generators.spiral import SpiralGenerator
133
+
134
+ generator = SpiralGenerator()
135
+ dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
136
+ ```
137
+
138
+ ### Start the API Server
139
+
140
+ ```bash
141
+ uvicorn juniper_data.api.app:app --reload
142
+ ```
143
+
144
+ ## API Endpoints
145
+
146
+ | Endpoint | Method | Description |
147
+ | ------------------------------- | ------ | ---------------------------------- |
148
+ | `/v1/health` | GET | Health check endpoint |
149
+ | `/v1/datasets` | GET | List available datasets |
150
+ | `/v1/datasets/{id}` | GET | Get a specific dataset |
151
+ | `/v1/generators/spiral` | POST | Generate a new spiral dataset |
152
+ | `/v1/generators/spiral/config` | GET | Get spiral generator configuration |
153
+
154
+ ## Project Structure
155
+
156
+ ```bash
157
+ JuniperData/
158
+ ├── juniper_data/
159
+ │ ├── core/ # Core functionality and base classes
160
+ │ ├── generators/ # Dataset generators
161
+ │ │ └── spiral/ # Spiral dataset generator
162
+ │ ├── storage/ # Dataset persistence layer
163
+ │ └── api/ # FastAPI application
164
+ │ └── routes/ # API route handlers
165
+ ├── tests/
166
+ │ ├── unit/ # Unit tests
167
+ │ └── integration/ # Integration tests
168
+ ├── pyproject.toml # Project configuration
169
+ └── README.md # This file
170
+ ```
171
+
172
+ ## Development
173
+
174
+ ### Running Tests
175
+
176
+ ```bash
177
+ pytest
178
+ ```
179
+
180
+ ### Running Tests with Coverage
181
+
182
+ ```bash
183
+ pytest --cov=juniper_data --cov-report=html
184
+ ```
185
+
186
+ ### Code Formatting
187
+
188
+ ```bash
189
+ black juniper_data tests
190
+ isort juniper_data tests
191
+ ```
192
+
193
+ ### Type Checking
194
+
195
+ ```bash
196
+ mypy juniper_data
197
+ ```
198
+
199
+ ## Juniper Ecosystem
200
+
201
+ | Repository | Description |
202
+ |-----------|-------------|
203
+ | [juniper-data](https://github.com/pcalnon/juniper-data) | Dataset generation service (this repo) |
204
+ | [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | CasCor neural network training service |
205
+ | [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Real-time monitoring dashboard |
206
+ | [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI: `juniper-data-client` |
207
+ | [juniper-cascor-client](https://github.com/pcalnon/juniper-cascor-client) | PyPI: `juniper-cascor-client` |
208
+ | [juniper-cascor-worker](https://github.com/pcalnon/juniper-cascor-worker) | PyPI: `juniper-cascor-worker` |
209
+
210
+ ## License
211
+
212
+ MIT License - Copyright (c) 2024-2026 Paul Calnon
213
+
214
+ ## Git Leaks
215
+
216
+ ![gitleaks badge](https://img.shields.io/badge/protected%20by-gitleaks-blue)
@@ -0,0 +1,179 @@
1
+ # Juniper Data
2
+
3
+ Dataset generation and management service for the Juniper ecosystem.
4
+
5
+ ## Overview
6
+
7
+ Juniper Data provides a centralized service for generating, storing, and serving datasets used by the Juniper neural network projects. It supports various dataset types including the classic two-spiral classification problem.
8
+
9
+ ## Ecosystem Compatibility
10
+
11
+ This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
12
+ Verified compatible versions:
13
+
14
+ | juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
15
+ |---|---|---|---|---|---|
16
+ | 0.4.x | 0.3.x | 0.2.x | >=0.3.1 | >=0.1.0 | >=0.1.0 |
17
+
18
+ For full-stack Docker deployment and integration tests, see [juniper-deploy](https://github.com/pcalnon/juniper-deploy).
19
+
20
+ ## Architecture
21
+
22
+ JuniperData is the **foundational data layer** of the Juniper ecosystem. JuniperCascor and juniper-canopy both call JuniperData to generate and retrieve datasets.
23
+
24
+ ```
25
+ ┌─────────────────────┐ REST+WS ┌──────────────────────┐
26
+ │ juniper-canopy │ ◄──────────────► │ JuniperCascor │
27
+ │ Dashboard │ │ Training Svc │
28
+ │ Port 8050 │ │ Port 8200 │
29
+ └──────────┬──────────┘ └──────────┬───────────┘
30
+ │ REST │ REST
31
+ ▼ ▼
32
+ ┌──────────────────────────────────────────────────────────────┐
33
+ │ JuniperData ◄── (this service) │
34
+ │ Dataset Service · Port 8100 │
35
+ └──────────────────────────────────────────────────────────────┘
36
+ ```
37
+
38
+ **Data contract**: datasets are served as NPZ archives with keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full` (all `float32`).
39
+
40
+ ## Related Services
41
+
42
+ | Service | Relationship | Environment Variable |
43
+ |---------|-------------|---------------------|
44
+ | [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes JuniperData for training datasets | `JUNIPER_DATA_URL=http://localhost:8100` |
45
+ | [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes JuniperData for visualization data | `JUNIPER_DATA_URL=http://localhost:8100` |
46
+ | [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI client library for this service | `pip install juniper-data-client` |
47
+
48
+ ### Service Configuration
49
+
50
+ | Variable | Default | Description |
51
+ |----------|---------|-------------|
52
+ | `JUNIPER_DATA_HOST` | `0.0.0.0` | Listen address |
53
+ | `JUNIPER_DATA_PORT` | `8100` | Service port |
54
+ | `JUNIPER_DATA_LOG_LEVEL` | `INFO` | Log verbosity |
55
+
56
+ ### Docker Deployment
57
+
58
+ ```bash
59
+ # Full stack with all three services:
60
+ git clone https://github.com/pcalnon/juniper-deploy.git
61
+ cd juniper-deploy && docker compose up --build
62
+ ```
63
+
64
+ ## Installation
65
+
66
+ ### Basic Installation
67
+
68
+ ```bash
69
+ pip install -e .
70
+ ```
71
+
72
+ ### With API Support
73
+
74
+ ```bash
75
+ pip install -e ".[api]"
76
+ ```
77
+
78
+ ### Development Installation
79
+
80
+ ```bash
81
+ pip install -e ".[dev]"
82
+ ```
83
+
84
+ ### Full Installation
85
+
86
+ ```bash
87
+ pip install -e ".[all]"
88
+ ```
89
+
90
+ ## Quick Start
91
+
92
+ ### Generate a Spiral Dataset
93
+
94
+ ```python
95
+ from juniper_data.generators.spiral import SpiralGenerator
96
+
97
+ generator = SpiralGenerator()
98
+ dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
99
+ ```
100
+
101
+ ### Start the API Server
102
+
103
+ ```bash
104
+ uvicorn juniper_data.api.app:app --reload
105
+ ```
106
+
107
+ ## API Endpoints
108
+
109
+ | Endpoint | Method | Description |
110
+ | ------------------------------- | ------ | ---------------------------------- |
111
+ | `/v1/health` | GET | Health check endpoint |
112
+ | `/v1/datasets` | GET | List available datasets |
113
+ | `/v1/datasets/{id}` | GET | Get a specific dataset |
114
+ | `/v1/generators/spiral` | POST | Generate a new spiral dataset |
115
+ | `/v1/generators/spiral/config` | GET | Get spiral generator configuration |
116
+
117
+ ## Project Structure
118
+
119
+ ```bash
120
+ JuniperData/
121
+ ├── juniper_data/
122
+ │ ├── core/ # Core functionality and base classes
123
+ │ ├── generators/ # Dataset generators
124
+ │ │ └── spiral/ # Spiral dataset generator
125
+ │ ├── storage/ # Dataset persistence layer
126
+ │ └── api/ # FastAPI application
127
+ │ └── routes/ # API route handlers
128
+ ├── tests/
129
+ │ ├── unit/ # Unit tests
130
+ │ └── integration/ # Integration tests
131
+ ├── pyproject.toml # Project configuration
132
+ └── README.md # This file
133
+ ```
134
+
135
+ ## Development
136
+
137
+ ### Running Tests
138
+
139
+ ```bash
140
+ pytest
141
+ ```
142
+
143
+ ### Running Tests with Coverage
144
+
145
+ ```bash
146
+ pytest --cov=juniper_data --cov-report=html
147
+ ```
148
+
149
+ ### Code Formatting
150
+
151
+ ```bash
152
+ black juniper_data tests
153
+ isort juniper_data tests
154
+ ```
155
+
156
+ ### Type Checking
157
+
158
+ ```bash
159
+ mypy juniper_data
160
+ ```
161
+
162
+ ## Juniper Ecosystem
163
+
164
+ | Repository | Description |
165
+ |-----------|-------------|
166
+ | [juniper-data](https://github.com/pcalnon/juniper-data) | Dataset generation service (this repo) |
167
+ | [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | CasCor neural network training service |
168
+ | [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Real-time monitoring dashboard |
169
+ | [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI: `juniper-data-client` |
170
+ | [juniper-cascor-client](https://github.com/pcalnon/juniper-cascor-client) | PyPI: `juniper-cascor-client` |
171
+ | [juniper-cascor-worker](https://github.com/pcalnon/juniper-cascor-worker) | PyPI: `juniper-cascor-worker` |
172
+
173
+ ## License
174
+
175
+ MIT License - Copyright (c) 2024-2026 Paul Calnon
176
+
177
+ ## Git Leaks
178
+
179
+ ![gitleaks badge](https://img.shields.io/badge/protected%20by-gitleaks-blue)
@@ -0,0 +1,88 @@
1
+ """
2
+ Juniper Data - Dataset generation and management service for the Juniper ecosystem.
3
+ """
4
+
5
+ import os
6
+
7
+ from dotenv import load_dotenv
8
+
9
+ try:
10
+ import arc_agi
11
+
12
+ ARC_AGI_AVAILABLE = True
13
+ except ImportError:
14
+ ARC_AGI_AVAILABLE = False
15
+ arc_agi = None # type: ignore[assignment]
16
+
17
+ __version__ = "0.4.2"
18
+ __author__ = "Paul Calnon"
19
+
20
+
21
+ def get_arc_agi_env() -> bool:
22
+ """
23
+ Ensure ARC_AGI_ENV is available by loading environment variables if needed.
24
+
25
+ This function attempts to load environment variables from a `.env` file and then
26
+ returns whether the `ARC_AGI_ENV` environment variable is set.
27
+
28
+ Returns:
29
+ bool: True if ARC_AGI_ENV is set after loading, otherwise False.
30
+ """
31
+ # Attempt to load variables from a .env file, but base the result solely on
32
+ # whether ARC_AGI_ENV is present afterwards to provide consistent semantics.
33
+ load_dotenv()
34
+ return bool(os.getenv("ARC_AGI_ENV"))
35
+
36
+
37
+ def reload_arc_agi_env() -> bool:
38
+ """
39
+ Reloads all of the Environment Variables from local OS env whether already loaded or not.
40
+
41
+ Returns:
42
+ bool: True if environment variables were loaded from a .env file, False otherwise.
43
+ """
44
+ return bool(load_dotenv())
45
+
46
+
47
+ def get_arc_api_key() -> str | None:
48
+ """
49
+ Return the current value of the ARC_API_KEY environment variable as a string.
50
+ """
51
+ return os.getenv("ARC_API_KEY") or None
52
+
53
+
54
+ def get_arc_agi_api_url() -> str | None:
55
+ """
56
+ Return the current value of the ARC_AGI_API as a URL/endpoint string.
57
+
58
+ Reading the environment at call time avoids import-time side effects
59
+ and makes it easier to adjust configuration in tests.
60
+ """
61
+ return os.getenv("ARC_AGI_API") or None
62
+
63
+
64
+ def get_arc_agi_arcade() -> "arc_agi.Arcade | None":
65
+ """
66
+ Create and return an :class:`arc_agi.Arcade` instance configured from environment variables.
67
+
68
+ The API key is read from the environment via :func:`get_arc_api_key`, avoiding import-time
69
+ side effects and making it easier to adjust configuration in tests.
70
+
71
+ Raises:
72
+ ImportError: If the ``arc-agi`` package is not installed.
73
+ """
74
+ if not ARC_AGI_AVAILABLE:
75
+ raise ImportError("arc-agi package not installed. Install with: pip install 'juniper-data[arc-agi]'")
76
+ # Automatically uses ARC_API_KEY from environment: arc = arc_agi.Arcade(), Or pass the API key explicitly
77
+ return arc_agi.Arcade(arc_api_key=get_arc_api_key()) or None
78
+
79
+
80
+ # Deprecated
81
+ def get_arc_agi_api() -> str | None:
82
+ """
83
+ Deprecated alias for :func:`get_arc_agi_api_url`.
84
+
85
+ This function returns the same value as :func:`get_arc_agi_api_url` and will be
86
+ removed in a future release. Use :func:`get_arc_agi_api_url` instead.
87
+ """
88
+ return get_arc_agi_api_url()
@@ -0,0 +1,78 @@
1
+ """Entry point for running the JuniperData API with uvicorn."""
2
+
3
+ import argparse
4
+ import sys
5
+
6
+
7
+ def main() -> int:
8
+ """Run the JuniperData API server."""
9
+ try:
10
+ import uvicorn
11
+ except ImportError:
12
+ print("Error: uvicorn not installed. Install with: pip install 'juniper-data[api]'")
13
+ return 1
14
+
15
+ from juniper_data.api.settings import Settings
16
+
17
+ parser = argparse.ArgumentParser(
18
+ description="Run the JuniperData API server",
19
+ prog="python -m juniper_data",
20
+ )
21
+ parser.add_argument(
22
+ "--host",
23
+ type=str,
24
+ default=None,
25
+ help="Host to bind to (default: from settings, which default to 127.0.0.1)",
26
+ )
27
+ parser.add_argument(
28
+ "--port",
29
+ type=int,
30
+ default=None,
31
+ help="Port to bind to (default: from settings or 8100)",
32
+ )
33
+ parser.add_argument(
34
+ "--storage-path",
35
+ type=str,
36
+ default=None,
37
+ help="Path to dataset storage directory",
38
+ )
39
+ parser.add_argument(
40
+ "--log-level",
41
+ type=str,
42
+ choices=["TRACE", "VERBOSE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "FATAL"],
43
+ default=None,
44
+ help="Logging level",
45
+ )
46
+ parser.add_argument(
47
+ "--reload",
48
+ action="store_true",
49
+ help="Enable auto-reload for development",
50
+ )
51
+
52
+ args = parser.parse_args()
53
+
54
+ settings = Settings()
55
+
56
+ host = args.host if args.host is not None else settings.host
57
+ port = args.port if args.port is not None else settings.port
58
+ log_level_source = args.log_level if args.log_level is not None else settings.log_level
59
+ log_level = log_level_source.lower()
60
+
61
+ if args.storage_path is not None:
62
+ import os
63
+
64
+ os.environ["JUNIPER_DATA_STORAGE_PATH"] = args.storage_path
65
+
66
+ uvicorn.run(
67
+ "juniper_data.api.app:app",
68
+ host=host,
69
+ port=port,
70
+ log_level=log_level,
71
+ reload=args.reload,
72
+ )
73
+
74
+ return 0
75
+
76
+
77
+ if __name__ == "__main__":
78
+ sys.exit(main())
@@ -0,0 +1,10 @@
1
+ """API module for Juniper Data service."""
2
+
3
+ from .app import create_app
4
+ from .settings import Settings, get_settings
5
+
6
+ __all__ = [
7
+ "create_app",
8
+ "Settings",
9
+ "get_settings",
10
+ ]
@@ -0,0 +1,111 @@
1
+ """FastAPI application factory and configuration."""
2
+
3
+ import logging
4
+ from collections.abc import AsyncGenerator
5
+ from contextlib import asynccontextmanager
6
+ from pathlib import Path
7
+
8
+ from fastapi import FastAPI, Request
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.responses import JSONResponse
11
+
12
+ from juniper_data import __version__
13
+ from juniper_data.storage import LocalFSDatasetStore
14
+
15
+ from .middleware import SecurityMiddleware
16
+ from .routes import datasets, generators, health
17
+ from .security import APIKeyAuth, RateLimiter
18
+ from .settings import Settings, get_settings
19
+
20
+
21
+ @asynccontextmanager
22
+ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
23
+ """Application lifespan handler for startup/shutdown."""
24
+ settings: Settings = app.state.settings
25
+ storage_path = Path(settings.storage_path)
26
+ store = LocalFSDatasetStore(storage_path)
27
+ datasets.set_store(store)
28
+
29
+ logging.basicConfig(
30
+ level=getattr(logging, settings.log_level.upper(), logging.INFO),
31
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
32
+ )
33
+ logger = logging.getLogger("juniper_data")
34
+ logger.info(f"JuniperData API v{__version__} starting")
35
+ logger.info(f"Storage path: {storage_path.absolute()}")
36
+
37
+ yield
38
+
39
+ logger.info("JuniperData API shutting down")
40
+
41
+
42
+ def create_app(settings: Settings | None = None) -> FastAPI:
43
+ """Create and configure the FastAPI application.
44
+
45
+ Args:
46
+ settings: Optional settings override. If not provided,
47
+ settings are loaded from environment variables.
48
+
49
+ Returns:
50
+ Configured FastAPI application instance.
51
+ """
52
+ if settings is None:
53
+ settings = get_settings()
54
+
55
+ app = FastAPI(
56
+ title="Juniper Data API",
57
+ description="Dataset generation and management service for the Juniper ecosystem",
58
+ version=__version__,
59
+ lifespan=lifespan,
60
+ )
61
+
62
+ app.state.settings = settings
63
+
64
+ # Only allow credentialed CORS requests when origins are explicitly specified.
65
+ # Browsers do not permit Access-Control-Allow-Credentials: true with a wildcard
66
+ # origin (Access-Control-Allow-Origin: "*"), so the default ["*"] intentionally
67
+ # disables credentials unless concrete origins are configured.
68
+ allow_credentials = bool(settings.cors_origins) and "*" not in settings.cors_origins
69
+
70
+ app.add_middleware(
71
+ CORSMiddleware,
72
+ allow_origins=settings.cors_origins,
73
+ allow_credentials=allow_credentials,
74
+ allow_methods=["*"],
75
+ allow_headers=["*"],
76
+ )
77
+
78
+ api_key_auth = APIKeyAuth(settings.api_keys)
79
+ rate_limiter = RateLimiter(
80
+ requests_per_minute=settings.rate_limit_requests_per_minute,
81
+ enabled=settings.rate_limit_enabled,
82
+ )
83
+ app.add_middleware(
84
+ SecurityMiddleware,
85
+ api_key_auth=api_key_auth,
86
+ rate_limiter=rate_limiter,
87
+ )
88
+
89
+ app.include_router(health.router, prefix="/v1")
90
+ app.include_router(generators.router, prefix="/v1")
91
+ app.include_router(datasets.router, prefix="/v1")
92
+
93
+ @app.exception_handler(ValueError)
94
+ async def value_error_handler(request: Request, exc: ValueError) -> JSONResponse:
95
+ return JSONResponse(
96
+ status_code=400,
97
+ content={"detail": str(exc)},
98
+ )
99
+
100
+ @app.exception_handler(Exception)
101
+ async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
102
+ logging.getLogger("juniper_data").exception("Unhandled exception")
103
+ return JSONResponse(
104
+ status_code=500,
105
+ content={"detail": "Internal server error"},
106
+ )
107
+
108
+ return app
109
+
110
+
111
+ app = create_app()