juniper-data 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_data-0.4.2/LICENSE +9 -0
- juniper_data-0.4.2/PKG-INFO +216 -0
- juniper_data-0.4.2/README.md +179 -0
- juniper_data-0.4.2/juniper_data/__init__.py +88 -0
- juniper_data-0.4.2/juniper_data/__main__.py +78 -0
- juniper_data-0.4.2/juniper_data/api/__init__.py +10 -0
- juniper_data-0.4.2/juniper_data/api/app.py +111 -0
- juniper_data-0.4.2/juniper_data/api/middleware.py +95 -0
- juniper_data-0.4.2/juniper_data/api/routes/__init__.py +9 -0
- juniper_data-0.4.2/juniper_data/api/routes/datasets.py +414 -0
- juniper_data-0.4.2/juniper_data/api/routes/generators.py +125 -0
- juniper_data-0.4.2/juniper_data/api/routes/health.py +49 -0
- juniper_data-0.4.2/juniper_data/api/security.py +238 -0
- juniper_data-0.4.2/juniper_data/api/settings.py +109 -0
- juniper_data-0.4.2/juniper_data/core/__init__.py +32 -0
- juniper_data-0.4.2/juniper_data/core/artifacts.py +63 -0
- juniper_data-0.4.2/juniper_data/core/dataset_id.py +38 -0
- juniper_data-0.4.2/juniper_data/core/models.py +135 -0
- juniper_data-0.4.2/juniper_data/core/split.py +120 -0
- juniper_data-0.4.2/juniper_data/generators/__init__.py +15 -0
- juniper_data-0.4.2/juniper_data/generators/arc_agi/__init__.py +11 -0
- juniper_data-0.4.2/juniper_data/generators/arc_agi/generator.py +229 -0
- juniper_data-0.4.2/juniper_data/generators/arc_agi/params.py +56 -0
- juniper_data-0.4.2/juniper_data/generators/checkerboard/__init__.py +15 -0
- juniper_data-0.4.2/juniper_data/generators/checkerboard/generator.py +114 -0
- juniper_data-0.4.2/juniper_data/generators/checkerboard/params.py +32 -0
- juniper_data-0.4.2/juniper_data/generators/circles/__init__.py +11 -0
- juniper_data-0.4.2/juniper_data/generators/circles/generator.py +112 -0
- juniper_data-0.4.2/juniper_data/generators/circles/params.py +31 -0
- juniper_data-0.4.2/juniper_data/generators/csv_import/__init__.py +15 -0
- juniper_data-0.4.2/juniper_data/generators/csv_import/generator.py +198 -0
- juniper_data-0.4.2/juniper_data/generators/csv_import/params.py +48 -0
- juniper_data-0.4.2/juniper_data/generators/gaussian/__init__.py +11 -0
- juniper_data-0.4.2/juniper_data/generators/gaussian/generator.py +149 -0
- juniper_data-0.4.2/juniper_data/generators/gaussian/params.py +53 -0
- juniper_data-0.4.2/juniper_data/generators/mnist/__init__.py +11 -0
- juniper_data-0.4.2/juniper_data/generators/mnist/generator.py +124 -0
- juniper_data-0.4.2/juniper_data/generators/mnist/params.py +39 -0
- juniper_data-0.4.2/juniper_data/generators/spiral/__init__.py +57 -0
- juniper_data-0.4.2/juniper_data/generators/spiral/defaults.py +39 -0
- juniper_data-0.4.2/juniper_data/generators/spiral/generator.py +206 -0
- juniper_data-0.4.2/juniper_data/generators/spiral/params.py +148 -0
- juniper_data-0.4.2/juniper_data/generators/xor/__init__.py +11 -0
- juniper_data-0.4.2/juniper_data/generators/xor/generator.py +162 -0
- juniper_data-0.4.2/juniper_data/generators/xor/params.py +30 -0
- juniper_data-0.4.2/juniper_data/storage/__init__.py +120 -0
- juniper_data-0.4.2/juniper_data/storage/base.py +279 -0
- juniper_data-0.4.2/juniper_data/storage/cached.py +211 -0
- juniper_data-0.4.2/juniper_data/storage/hf_store.py +257 -0
- juniper_data-0.4.2/juniper_data/storage/kaggle_store.py +333 -0
- juniper_data-0.4.2/juniper_data/storage/local_fs.py +232 -0
- juniper_data-0.4.2/juniper_data/storage/memory.py +136 -0
- juniper_data-0.4.2/juniper_data/storage/postgres_store.py +373 -0
- juniper_data-0.4.2/juniper_data/storage/redis_store.py +264 -0
- juniper_data-0.4.2/juniper_data/tests/__init__.py +1 -0
- juniper_data-0.4.2/juniper_data/tests/conftest.py +68 -0
- juniper_data-0.4.2/juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
- juniper_data-0.4.2/juniper_data/tests/integration/__init__.py +1 -0
- juniper_data-0.4.2/juniper_data/tests/integration/test_api.py +283 -0
- juniper_data-0.4.2/juniper_data/tests/integration/test_e2e_workflow.py +378 -0
- juniper_data-0.4.2/juniper_data/tests/integration/test_lifecycle_api.py +304 -0
- juniper_data-0.4.2/juniper_data/tests/integration/test_security_integration.py +189 -0
- juniper_data-0.4.2/juniper_data/tests/integration/test_storage_workflow.py +259 -0
- juniper_data-0.4.2/juniper_data/tests/performance/__init__.py +1 -0
- juniper_data-0.4.2/juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
- juniper_data-0.4.2/juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
- juniper_data-0.4.2/juniper_data/tests/unit/__init__.py +1 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_api_app.py +206 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_api_routes.py +407 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_api_settings.py +100 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_artifacts.py +145 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_cached_store.py +423 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_circles_generator.py +256 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_csv_import_generator.py +345 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_dataset_id.py +181 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_gaussian_generator.py +333 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_hf_store.py +416 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_init.py +93 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_kaggle_store.py +469 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_lifecycle.py +394 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_main.py +127 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_middleware.py +79 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_mnist_generator.py +370 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_postgres_store.py +490 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_redis_store.py +500 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_security.py +281 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_security_boundaries.py +517 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_spiral_generator.py +566 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_split.py +245 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_storage.py +767 -0
- juniper_data-0.4.2/juniper_data/tests/unit/test_xor_generator.py +223 -0
- juniper_data-0.4.2/juniper_data.egg-info/PKG-INFO +216 -0
- juniper_data-0.4.2/juniper_data.egg-info/SOURCES.txt +98 -0
- juniper_data-0.4.2/juniper_data.egg-info/dependency_links.txt +1 -0
- juniper_data-0.4.2/juniper_data.egg-info/requires.txt +31 -0
- juniper_data-0.4.2/juniper_data.egg-info/top_level.txt +1 -0
- juniper_data-0.4.2/pyproject.toml +224 -0
- juniper_data-0.4.2/setup.cfg +4 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# Juniper Canopy project
|
|
2
|
+
|
|
3
|
+
Copyright 2024, 2025 Paul Calnon
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: juniper-data
|
|
3
|
+
Version: 0.4.2
|
|
4
|
+
Summary: Dataset generation and management service for the Juniper ecosystem
|
|
5
|
+
Author: Paul Calnon
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: numpy>=1.24.0
|
|
11
|
+
Requires-Dist: pydantic>=2.0.0
|
|
12
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
13
|
+
Provides-Extra: arc-agi
|
|
14
|
+
Requires-Dist: arc-agi>=0.9.0; extra == "arc-agi"
|
|
15
|
+
Provides-Extra: api
|
|
16
|
+
Requires-Dist: fastapi>=0.100.0; extra == "api"
|
|
17
|
+
Requires-Dist: uvicorn[standard]>=0.23.0; extra == "api"
|
|
18
|
+
Requires-Dist: pydantic-settings>=2.0.0; extra == "api"
|
|
19
|
+
Provides-Extra: test
|
|
20
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
21
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
22
|
+
Requires-Dist: pytest-timeout>=2.2.0; extra == "test"
|
|
23
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
|
|
24
|
+
Requires-Dist: pytest-benchmark>=4.0.0; extra == "test"
|
|
25
|
+
Requires-Dist: httpx>=0.24.0; extra == "test"
|
|
26
|
+
Requires-Dist: coverage[toml]>=7.0.0; extra == "test"
|
|
27
|
+
Requires-Dist: juniper-data-client>=0.3.0; extra == "test"
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
30
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
31
|
+
Requires-Dist: bandit[sarif]>=1.7.9; extra == "dev"
|
|
32
|
+
Requires-Dist: pip-audit>=2.7.0; extra == "dev"
|
|
33
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
34
|
+
Provides-Extra: all
|
|
35
|
+
Requires-Dist: juniper-data[api,arc-agi,dev,test]; extra == "all"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# Juniper Data
|
|
39
|
+
|
|
40
|
+
Dataset generation and management service for the Juniper ecosystem.
|
|
41
|
+
|
|
42
|
+
## Overview
|
|
43
|
+
|
|
44
|
+
Juniper Data provides a centralized service for generating, storing, and serving datasets used by the Juniper neural network projects. It supports various dataset types including the classic two-spiral classification problem.
|
|
45
|
+
|
|
46
|
+
## Ecosystem Compatibility
|
|
47
|
+
|
|
48
|
+
This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
|
|
49
|
+
Verified compatible versions:
|
|
50
|
+
|
|
51
|
+
| juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
|
|
52
|
+
|---|---|---|---|---|---|
|
|
53
|
+
| 0.4.x | 0.3.x | 0.2.x | >=0.3.1 | >=0.1.0 | >=0.1.0 |
|
|
54
|
+
|
|
55
|
+
For full-stack Docker deployment and integration tests, see [juniper-deploy](https://github.com/pcalnon/juniper-deploy).
|
|
56
|
+
|
|
57
|
+
## Architecture
|
|
58
|
+
|
|
59
|
+
JuniperData is the **foundational data layer** of the Juniper ecosystem. JuniperCascor and juniper-canopy both call JuniperData to generate and retrieve datasets.
|
|
60
|
+
|
|
61
|
+
```
|
|
62
|
+
┌─────────────────────┐ REST+WS ┌──────────────────────┐
|
|
63
|
+
│ juniper-canopy │ ◄──────────────► │ JuniperCascor │
|
|
64
|
+
│ Dashboard │ │ Training Svc │
|
|
65
|
+
│ Port 8050 │ │ Port 8200 │
|
|
66
|
+
└──────────┬──────────┘ └──────────┬───────────┘
|
|
67
|
+
│ REST │ REST
|
|
68
|
+
▼ ▼
|
|
69
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
70
|
+
│ JuniperData ◄── (this service) │
|
|
71
|
+
│ Dataset Service · Port 8100 │
|
|
72
|
+
└──────────────────────────────────────────────────────────────┘
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
**Data contract**: datasets are served as NPZ archives with keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full` (all `float32`).
|
|
76
|
+
|
|
77
|
+
## Related Services
|
|
78
|
+
|
|
79
|
+
| Service | Relationship | Environment Variable |
|
|
80
|
+
|---------|-------------|---------------------|
|
|
81
|
+
| [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes JuniperData for training datasets | `JUNIPER_DATA_URL=http://localhost:8100` |
|
|
82
|
+
| [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes JuniperData for visualization data | `JUNIPER_DATA_URL=http://localhost:8100` |
|
|
83
|
+
| [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI client library for this service | `pip install juniper-data-client` |
|
|
84
|
+
|
|
85
|
+
### Service Configuration
|
|
86
|
+
|
|
87
|
+
| Variable | Default | Description |
|
|
88
|
+
|----------|---------|-------------|
|
|
89
|
+
| `JUNIPER_DATA_HOST` | `0.0.0.0` | Listen address |
|
|
90
|
+
| `JUNIPER_DATA_PORT` | `8100` | Service port |
|
|
91
|
+
| `JUNIPER_DATA_LOG_LEVEL` | `INFO` | Log verbosity |
|
|
92
|
+
|
|
93
|
+
### Docker Deployment
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
# Full stack with all three services:
|
|
97
|
+
git clone https://github.com/pcalnon/juniper-deploy.git
|
|
98
|
+
cd juniper-deploy && docker compose up --build
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Installation
|
|
102
|
+
|
|
103
|
+
### Basic Installation
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
pip install -e .
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### With API Support
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
pip install -e ".[api]"
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Development Installation
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
pip install -e ".[dev]"
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Full Installation
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
pip install -e ".[all]"
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Quick Start
|
|
128
|
+
|
|
129
|
+
### Generate a Spiral Dataset
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from juniper_data.generators.spiral import SpiralGenerator
|
|
133
|
+
|
|
134
|
+
generator = SpiralGenerator()
|
|
135
|
+
dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Start the API Server
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
uvicorn juniper_data.api.app:app --reload
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## API Endpoints
|
|
145
|
+
|
|
146
|
+
| Endpoint | Method | Description |
|
|
147
|
+
| ------------------------------- | ------ | ---------------------------------- |
|
|
148
|
+
| `/v1/health` | GET | Health check endpoint |
|
|
149
|
+
| `/v1/datasets` | GET | List available datasets |
|
|
150
|
+
| `/v1/datasets/{id}` | GET | Get a specific dataset |
|
|
151
|
+
| `/v1/generators/spiral` | POST | Generate a new spiral dataset |
|
|
152
|
+
| `/v1/generators/spiral/config` | GET | Get spiral generator configuration |
|
|
153
|
+
|
|
154
|
+
## Project Structure
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
JuniperData/
|
|
158
|
+
├── juniper_data/
|
|
159
|
+
│ ├── core/ # Core functionality and base classes
|
|
160
|
+
│ ├── generators/ # Dataset generators
|
|
161
|
+
│ │ └── spiral/ # Spiral dataset generator
|
|
162
|
+
│ ├── storage/ # Dataset persistence layer
|
|
163
|
+
│ └── api/ # FastAPI application
|
|
164
|
+
│ └── routes/ # API route handlers
|
|
165
|
+
├── tests/
|
|
166
|
+
│ ├── unit/ # Unit tests
|
|
167
|
+
│ └── integration/ # Integration tests
|
|
168
|
+
├── pyproject.toml # Project configuration
|
|
169
|
+
└── README.md # This file
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## Development
|
|
173
|
+
|
|
174
|
+
### Running Tests
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
pytest
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### Running Tests with Coverage
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
pytest --cov=juniper_data --cov-report=html
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Code Formatting
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
black juniper_data tests
|
|
190
|
+
isort juniper_data tests
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Type Checking
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
mypy juniper_data
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Juniper Ecosystem
|
|
200
|
+
|
|
201
|
+
| Repository | Description |
|
|
202
|
+
|-----------|-------------|
|
|
203
|
+
| [juniper-data](https://github.com/pcalnon/juniper-data) | Dataset generation service (this repo) |
|
|
204
|
+
| [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | CasCor neural network training service |
|
|
205
|
+
| [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Real-time monitoring dashboard |
|
|
206
|
+
| [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI: `juniper-data-client` |
|
|
207
|
+
| [juniper-cascor-client](https://github.com/pcalnon/juniper-cascor-client) | PyPI: `juniper-cascor-client` |
|
|
208
|
+
| [juniper-cascor-worker](https://github.com/pcalnon/juniper-cascor-worker) | PyPI: `juniper-cascor-worker` |
|
|
209
|
+
|
|
210
|
+
## License
|
|
211
|
+
|
|
212
|
+
MIT License - Copyright (c) 2024-2026 Paul Calnon
|
|
213
|
+
|
|
214
|
+
## Git Leaks
|
|
215
|
+
|
|
216
|
+

|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Juniper Data
|
|
2
|
+
|
|
3
|
+
Dataset generation and management service for the Juniper ecosystem.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Juniper Data provides a centralized service for generating, storing, and serving datasets used by the Juniper neural network projects. It supports various dataset types including the classic two-spiral classification problem.
|
|
8
|
+
|
|
9
|
+
## Ecosystem Compatibility
|
|
10
|
+
|
|
11
|
+
This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
|
|
12
|
+
Verified compatible versions:
|
|
13
|
+
|
|
14
|
+
| juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
|
|
15
|
+
|---|---|---|---|---|---|
|
|
16
|
+
| 0.4.x | 0.3.x | 0.2.x | >=0.3.1 | >=0.1.0 | >=0.1.0 |
|
|
17
|
+
|
|
18
|
+
For full-stack Docker deployment and integration tests, see [juniper-deploy](https://github.com/pcalnon/juniper-deploy).
|
|
19
|
+
|
|
20
|
+
## Architecture
|
|
21
|
+
|
|
22
|
+
JuniperData is the **foundational data layer** of the Juniper ecosystem. JuniperCascor and juniper-canopy both call JuniperData to generate and retrieve datasets.
|
|
23
|
+
|
|
24
|
+
```
|
|
25
|
+
┌─────────────────────┐ REST+WS ┌──────────────────────┐
|
|
26
|
+
│ juniper-canopy │ ◄──────────────► │ JuniperCascor │
|
|
27
|
+
│ Dashboard │ │ Training Svc │
|
|
28
|
+
│ Port 8050 │ │ Port 8200 │
|
|
29
|
+
└──────────┬──────────┘ └──────────┬───────────┘
|
|
30
|
+
│ REST │ REST
|
|
31
|
+
▼ ▼
|
|
32
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
33
|
+
│ JuniperData ◄── (this service) │
|
|
34
|
+
│ Dataset Service · Port 8100 │
|
|
35
|
+
└──────────────────────────────────────────────────────────────┘
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
**Data contract**: datasets are served as NPZ archives with keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full` (all `float32`).
|
|
39
|
+
|
|
40
|
+
## Related Services
|
|
41
|
+
|
|
42
|
+
| Service | Relationship | Environment Variable |
|
|
43
|
+
|---------|-------------|---------------------|
|
|
44
|
+
| [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes JuniperData for training datasets | `JUNIPER_DATA_URL=http://localhost:8100` |
|
|
45
|
+
| [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes JuniperData for visualization data | `JUNIPER_DATA_URL=http://localhost:8100` |
|
|
46
|
+
| [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI client library for this service | `pip install juniper-data-client` |
|
|
47
|
+
|
|
48
|
+
### Service Configuration
|
|
49
|
+
|
|
50
|
+
| Variable | Default | Description |
|
|
51
|
+
|----------|---------|-------------|
|
|
52
|
+
| `JUNIPER_DATA_HOST` | `0.0.0.0` | Listen address |
|
|
53
|
+
| `JUNIPER_DATA_PORT` | `8100` | Service port |
|
|
54
|
+
| `JUNIPER_DATA_LOG_LEVEL` | `INFO` | Log verbosity |
|
|
55
|
+
|
|
56
|
+
### Docker Deployment
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# Full stack with all three services:
|
|
60
|
+
git clone https://github.com/pcalnon/juniper-deploy.git
|
|
61
|
+
cd juniper-deploy && docker compose up --build
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Installation
|
|
65
|
+
|
|
66
|
+
### Basic Installation
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install -e .
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### With API Support
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pip install -e ".[api]"
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Development Installation
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
pip install -e ".[dev]"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Full Installation
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
pip install -e ".[all]"
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Quick Start
|
|
91
|
+
|
|
92
|
+
### Generate a Spiral Dataset
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from juniper_data.generators.spiral import SpiralGenerator
|
|
96
|
+
|
|
97
|
+
generator = SpiralGenerator()
|
|
98
|
+
dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
### Start the API Server
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
uvicorn juniper_data.api.app:app --reload
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## API Endpoints
|
|
108
|
+
|
|
109
|
+
| Endpoint | Method | Description |
|
|
110
|
+
| ------------------------------- | ------ | ---------------------------------- |
|
|
111
|
+
| `/v1/health` | GET | Health check endpoint |
|
|
112
|
+
| `/v1/datasets` | GET | List available datasets |
|
|
113
|
+
| `/v1/datasets/{id}` | GET | Get a specific dataset |
|
|
114
|
+
| `/v1/generators/spiral` | POST | Generate a new spiral dataset |
|
|
115
|
+
| `/v1/generators/spiral/config` | GET | Get spiral generator configuration |
|
|
116
|
+
|
|
117
|
+
## Project Structure
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
JuniperData/
|
|
121
|
+
├── juniper_data/
|
|
122
|
+
│ ├── core/ # Core functionality and base classes
|
|
123
|
+
│ ├── generators/ # Dataset generators
|
|
124
|
+
│ │ └── spiral/ # Spiral dataset generator
|
|
125
|
+
│ ├── storage/ # Dataset persistence layer
|
|
126
|
+
│ └── api/ # FastAPI application
|
|
127
|
+
│ └── routes/ # API route handlers
|
|
128
|
+
├── tests/
|
|
129
|
+
│ ├── unit/ # Unit tests
|
|
130
|
+
│ └── integration/ # Integration tests
|
|
131
|
+
├── pyproject.toml # Project configuration
|
|
132
|
+
└── README.md # This file
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Development
|
|
136
|
+
|
|
137
|
+
### Running Tests
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
pytest
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
### Running Tests with Coverage
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
pytest --cov=juniper_data --cov-report=html
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### Code Formatting
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
black juniper_data tests
|
|
153
|
+
isort juniper_data tests
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
### Type Checking
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
mypy juniper_data
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Juniper Ecosystem
|
|
163
|
+
|
|
164
|
+
| Repository | Description |
|
|
165
|
+
|-----------|-------------|
|
|
166
|
+
| [juniper-data](https://github.com/pcalnon/juniper-data) | Dataset generation service (this repo) |
|
|
167
|
+
| [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | CasCor neural network training service |
|
|
168
|
+
| [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Real-time monitoring dashboard |
|
|
169
|
+
| [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | PyPI: `juniper-data-client` |
|
|
170
|
+
| [juniper-cascor-client](https://github.com/pcalnon/juniper-cascor-client) | PyPI: `juniper-cascor-client` |
|
|
171
|
+
| [juniper-cascor-worker](https://github.com/pcalnon/juniper-cascor-worker) | PyPI: `juniper-cascor-worker` |
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT License - Copyright (c) 2024-2026 Paul Calnon
|
|
176
|
+
|
|
177
|
+
## Git Leaks
|
|
178
|
+
|
|
179
|
+

|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Juniper Data - Dataset generation and management service for the Juniper ecosystem.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
from dotenv import load_dotenv
|
|
8
|
+
|
|
9
|
+
try:
|
|
10
|
+
import arc_agi
|
|
11
|
+
|
|
12
|
+
ARC_AGI_AVAILABLE = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
ARC_AGI_AVAILABLE = False
|
|
15
|
+
arc_agi = None # type: ignore[assignment]
|
|
16
|
+
|
|
17
|
+
__version__ = "0.4.2"
|
|
18
|
+
__author__ = "Paul Calnon"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_arc_agi_env() -> bool:
|
|
22
|
+
"""
|
|
23
|
+
Ensure ARC_AGI_ENV is available by loading environment variables if needed.
|
|
24
|
+
|
|
25
|
+
This function attempts to load environment variables from a `.env` file and then
|
|
26
|
+
returns whether the `ARC_AGI_ENV` environment variable is set.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
bool: True if ARC_AGI_ENV is set after loading, otherwise False.
|
|
30
|
+
"""
|
|
31
|
+
# Attempt to load variables from a .env file, but base the result solely on
|
|
32
|
+
# whether ARC_AGI_ENV is present afterwards to provide consistent semantics.
|
|
33
|
+
load_dotenv()
|
|
34
|
+
return bool(os.getenv("ARC_AGI_ENV"))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def reload_arc_agi_env() -> bool:
|
|
38
|
+
"""
|
|
39
|
+
Reloads all of the Environment Variables from local OS env whether already loaded or not.
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
bool: True if environment variables were loaded from a .env file, False otherwise.
|
|
43
|
+
"""
|
|
44
|
+
return bool(load_dotenv())
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_arc_api_key() -> str | None:
|
|
48
|
+
"""
|
|
49
|
+
Return the current value of the ARC_API_KEY environment variable as a string.
|
|
50
|
+
"""
|
|
51
|
+
return os.getenv("ARC_API_KEY") or None
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_arc_agi_api_url() -> str | None:
|
|
55
|
+
"""
|
|
56
|
+
Return the current value of the ARC_AGI_API as a URL/endpoint string.
|
|
57
|
+
|
|
58
|
+
Reading the environment at call time avoids import-time side effects
|
|
59
|
+
and makes it easier to adjust configuration in tests.
|
|
60
|
+
"""
|
|
61
|
+
return os.getenv("ARC_AGI_API") or None
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def get_arc_agi_arcade() -> "arc_agi.Arcade | None":
|
|
65
|
+
"""
|
|
66
|
+
Create and return an :class:`arc_agi.Arcade` instance configured from environment variables.
|
|
67
|
+
|
|
68
|
+
The API key is read from the environment via :func:`get_arc_api_key`, avoiding import-time
|
|
69
|
+
side effects and making it easier to adjust configuration in tests.
|
|
70
|
+
|
|
71
|
+
Raises:
|
|
72
|
+
ImportError: If the ``arc-agi`` package is not installed.
|
|
73
|
+
"""
|
|
74
|
+
if not ARC_AGI_AVAILABLE:
|
|
75
|
+
raise ImportError("arc-agi package not installed. Install with: pip install 'juniper-data[arc-agi]'")
|
|
76
|
+
# Automatically uses ARC_API_KEY from environment: arc = arc_agi.Arcade(), Or pass the API key explicitly
|
|
77
|
+
return arc_agi.Arcade(arc_api_key=get_arc_api_key()) or None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# Deprecated
|
|
81
|
+
def get_arc_agi_api() -> str | None:
|
|
82
|
+
"""
|
|
83
|
+
Deprecated alias for :func:`get_arc_agi_api_url`.
|
|
84
|
+
|
|
85
|
+
This function returns the same value as :func:`get_arc_agi_api_url` and will be
|
|
86
|
+
removed in a future release. Use :func:`get_arc_agi_api_url` instead.
|
|
87
|
+
"""
|
|
88
|
+
return get_arc_agi_api_url()
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""Entry point for running the JuniperData API with uvicorn."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main() -> int:
|
|
8
|
+
"""Run the JuniperData API server."""
|
|
9
|
+
try:
|
|
10
|
+
import uvicorn
|
|
11
|
+
except ImportError:
|
|
12
|
+
print("Error: uvicorn not installed. Install with: pip install 'juniper-data[api]'")
|
|
13
|
+
return 1
|
|
14
|
+
|
|
15
|
+
from juniper_data.api.settings import Settings
|
|
16
|
+
|
|
17
|
+
parser = argparse.ArgumentParser(
|
|
18
|
+
description="Run the JuniperData API server",
|
|
19
|
+
prog="python -m juniper_data",
|
|
20
|
+
)
|
|
21
|
+
parser.add_argument(
|
|
22
|
+
"--host",
|
|
23
|
+
type=str,
|
|
24
|
+
default=None,
|
|
25
|
+
help="Host to bind to (default: from settings, which default to 127.0.0.1)",
|
|
26
|
+
)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--port",
|
|
29
|
+
type=int,
|
|
30
|
+
default=None,
|
|
31
|
+
help="Port to bind to (default: from settings or 8100)",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument(
|
|
34
|
+
"--storage-path",
|
|
35
|
+
type=str,
|
|
36
|
+
default=None,
|
|
37
|
+
help="Path to dataset storage directory",
|
|
38
|
+
)
|
|
39
|
+
parser.add_argument(
|
|
40
|
+
"--log-level",
|
|
41
|
+
type=str,
|
|
42
|
+
choices=["TRACE", "VERBOSE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "FATAL"],
|
|
43
|
+
default=None,
|
|
44
|
+
help="Logging level",
|
|
45
|
+
)
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--reload",
|
|
48
|
+
action="store_true",
|
|
49
|
+
help="Enable auto-reload for development",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
args = parser.parse_args()
|
|
53
|
+
|
|
54
|
+
settings = Settings()
|
|
55
|
+
|
|
56
|
+
host = args.host if args.host is not None else settings.host
|
|
57
|
+
port = args.port if args.port is not None else settings.port
|
|
58
|
+
log_level_source = args.log_level if args.log_level is not None else settings.log_level
|
|
59
|
+
log_level = log_level_source.lower()
|
|
60
|
+
|
|
61
|
+
if args.storage_path is not None:
|
|
62
|
+
import os
|
|
63
|
+
|
|
64
|
+
os.environ["JUNIPER_DATA_STORAGE_PATH"] = args.storage_path
|
|
65
|
+
|
|
66
|
+
uvicorn.run(
|
|
67
|
+
"juniper_data.api.app:app",
|
|
68
|
+
host=host,
|
|
69
|
+
port=port,
|
|
70
|
+
log_level=log_level,
|
|
71
|
+
reload=args.reload,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return 0
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
if __name__ == "__main__":
|
|
78
|
+
sys.exit(main())
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""FastAPI application factory and configuration."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from collections.abc import AsyncGenerator
|
|
5
|
+
from contextlib import asynccontextmanager
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from fastapi import FastAPI, Request
|
|
9
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
10
|
+
from fastapi.responses import JSONResponse
|
|
11
|
+
|
|
12
|
+
from juniper_data import __version__
|
|
13
|
+
from juniper_data.storage import LocalFSDatasetStore
|
|
14
|
+
|
|
15
|
+
from .middleware import SecurityMiddleware
|
|
16
|
+
from .routes import datasets, generators, health
|
|
17
|
+
from .security import APIKeyAuth, RateLimiter
|
|
18
|
+
from .settings import Settings, get_settings
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@asynccontextmanager
|
|
22
|
+
async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
|
23
|
+
"""Application lifespan handler for startup/shutdown."""
|
|
24
|
+
settings: Settings = app.state.settings
|
|
25
|
+
storage_path = Path(settings.storage_path)
|
|
26
|
+
store = LocalFSDatasetStore(storage_path)
|
|
27
|
+
datasets.set_store(store)
|
|
28
|
+
|
|
29
|
+
logging.basicConfig(
|
|
30
|
+
level=getattr(logging, settings.log_level.upper(), logging.INFO),
|
|
31
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
32
|
+
)
|
|
33
|
+
logger = logging.getLogger("juniper_data")
|
|
34
|
+
logger.info(f"JuniperData API v{__version__} starting")
|
|
35
|
+
logger.info(f"Storage path: {storage_path.absolute()}")
|
|
36
|
+
|
|
37
|
+
yield
|
|
38
|
+
|
|
39
|
+
logger.info("JuniperData API shutting down")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def create_app(settings: Settings | None = None) -> FastAPI:
|
|
43
|
+
"""Create and configure the FastAPI application.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
settings: Optional settings override. If not provided,
|
|
47
|
+
settings are loaded from environment variables.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Configured FastAPI application instance.
|
|
51
|
+
"""
|
|
52
|
+
if settings is None:
|
|
53
|
+
settings = get_settings()
|
|
54
|
+
|
|
55
|
+
app = FastAPI(
|
|
56
|
+
title="Juniper Data API",
|
|
57
|
+
description="Dataset generation and management service for the Juniper ecosystem",
|
|
58
|
+
version=__version__,
|
|
59
|
+
lifespan=lifespan,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
app.state.settings = settings
|
|
63
|
+
|
|
64
|
+
# Only allow credentialed CORS requests when origins are explicitly specified.
|
|
65
|
+
# Browsers do not permit Access-Control-Allow-Credentials: true with a wildcard
|
|
66
|
+
# origin (Access-Control-Allow-Origin: "*"), so the default ["*"] intentionally
|
|
67
|
+
# disables credentials unless concrete origins are configured.
|
|
68
|
+
allow_credentials = bool(settings.cors_origins) and "*" not in settings.cors_origins
|
|
69
|
+
|
|
70
|
+
app.add_middleware(
|
|
71
|
+
CORSMiddleware,
|
|
72
|
+
allow_origins=settings.cors_origins,
|
|
73
|
+
allow_credentials=allow_credentials,
|
|
74
|
+
allow_methods=["*"],
|
|
75
|
+
allow_headers=["*"],
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
api_key_auth = APIKeyAuth(settings.api_keys)
|
|
79
|
+
rate_limiter = RateLimiter(
|
|
80
|
+
requests_per_minute=settings.rate_limit_requests_per_minute,
|
|
81
|
+
enabled=settings.rate_limit_enabled,
|
|
82
|
+
)
|
|
83
|
+
app.add_middleware(
|
|
84
|
+
SecurityMiddleware,
|
|
85
|
+
api_key_auth=api_key_auth,
|
|
86
|
+
rate_limiter=rate_limiter,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
app.include_router(health.router, prefix="/v1")
|
|
90
|
+
app.include_router(generators.router, prefix="/v1")
|
|
91
|
+
app.include_router(datasets.router, prefix="/v1")
|
|
92
|
+
|
|
93
|
+
@app.exception_handler(ValueError)
|
|
94
|
+
async def value_error_handler(request: Request, exc: ValueError) -> JSONResponse:
|
|
95
|
+
return JSONResponse(
|
|
96
|
+
status_code=400,
|
|
97
|
+
content={"detail": str(exc)},
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
@app.exception_handler(Exception)
|
|
101
|
+
async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
|
102
|
+
logging.getLogger("juniper_data").exception("Unhandled exception")
|
|
103
|
+
return JSONResponse(
|
|
104
|
+
status_code=500,
|
|
105
|
+
content={"detail": "Internal server error"},
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return app
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
app = create_app()
|