juniper-data 0.6.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- juniper_data-0.7.0/PKG-INFO +243 -0
- juniper_data-0.7.0/README.md +195 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/__init__.py +9 -1
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/__main__.py +2 -1
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/app.py +41 -8
- juniper_data-0.7.0/juniper_data/api/constants.py +117 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/middleware.py +43 -25
- juniper_data-0.7.0/juniper_data/api/models/health.py +21 -0
- juniper_data-0.7.0/juniper_data/api/observability.py +187 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/routes/datasets.py +200 -62
- juniper_data-0.7.0/juniper_data/api/routes/generators.py +206 -0
- juniper_data-0.7.0/juniper_data/api/routes/health.py +240 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/security.py +86 -14
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/settings.py +36 -0
- juniper_data-0.7.0/juniper_data/core/constants.py +41 -0
- juniper_data-0.7.0/juniper_data/core/dataset_id.py +61 -0
- juniper_data-0.7.0/juniper_data/core/meta.py +145 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/models.py +46 -12
- juniper_data-0.7.0/juniper_data/core/scaling.py +92 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/split.py +32 -0
- juniper_data-0.7.0/juniper_data/generators/_sequence.py +349 -0
- juniper_data-0.7.0/juniper_data/generators/_synthetic.py +115 -0
- juniper_data-0.7.0/juniper_data/generators/ar_p/__init__.py +11 -0
- juniper_data-0.7.0/juniper_data/generators/ar_p/generator.py +73 -0
- juniper_data-0.7.0/juniper_data/generators/ar_p/params.py +38 -0
- juniper_data-0.7.0/juniper_data/generators/arc_agi/defaults.py +29 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/arc_agi/generator.py +23 -3
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/arc_agi/params.py +27 -12
- juniper_data-0.7.0/juniper_data/generators/checkerboard/defaults.py +24 -0
- juniper_data-0.7.0/juniper_data/generators/checkerboard/params.py +46 -0
- juniper_data-0.7.0/juniper_data/generators/circles/defaults.py +27 -0
- juniper_data-0.7.0/juniper_data/generators/circles/params.py +43 -0
- juniper_data-0.7.0/juniper_data/generators/csv_import/defaults.py +20 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/csv_import/generator.py +3 -2
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/csv_import/params.py +19 -8
- juniper_data-0.7.0/juniper_data/generators/equities/__init__.py +19 -0
- juniper_data-0.7.0/juniper_data/generators/equities/defaults.py +61 -0
- juniper_data-0.7.0/juniper_data/generators/equities/generator.py +448 -0
- juniper_data-0.7.0/juniper_data/generators/equities/params.py +123 -0
- juniper_data-0.7.0/juniper_data/generators/equities_seq/__init__.py +19 -0
- juniper_data-0.7.0/juniper_data/generators/equities_seq/generator.py +174 -0
- juniper_data-0.7.0/juniper_data/generators/equities_seq/params.py +34 -0
- juniper_data-0.7.0/juniper_data/generators/gaussian/defaults.py +25 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/gaussian/params.py +24 -8
- juniper_data-0.7.0/juniper_data/generators/irregular_sine/__init__.py +11 -0
- juniper_data-0.7.0/juniper_data/generators/irregular_sine/generator.py +90 -0
- juniper_data-0.7.0/juniper_data/generators/irregular_sine/params.py +44 -0
- juniper_data-0.7.0/juniper_data/generators/mackey_glass/__init__.py +11 -0
- juniper_data-0.7.0/juniper_data/generators/mackey_glass/generator.py +80 -0
- juniper_data-0.7.0/juniper_data/generators/mackey_glass/params.py +34 -0
- juniper_data-0.7.0/juniper_data/generators/mnist/defaults.py +24 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/mnist/params.py +15 -6
- juniper_data-0.7.0/juniper_data/generators/moon/__init__.py +17 -0
- juniper_data-0.7.0/juniper_data/generators/moon/defaults.py +19 -0
- juniper_data-0.7.0/juniper_data/generators/moon/generator.py +107 -0
- juniper_data-0.7.0/juniper_data/generators/moon/params.py +35 -0
- juniper_data-0.7.0/juniper_data/generators/multi_sine/__init__.py +11 -0
- juniper_data-0.7.0/juniper_data/generators/multi_sine/generator.py +84 -0
- juniper_data-0.7.0/juniper_data/generators/multi_sine/params.py +41 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/defaults.py +1 -1
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/params.py +2 -2
- juniper_data-0.7.0/juniper_data/generators/xor/defaults.py +25 -0
- juniper_data-0.7.0/juniper_data/generators/xor/params.py +43 -0
- juniper_data-0.7.0/juniper_data/provenance.py +33 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/base.py +106 -10
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/cached.py +49 -2
- juniper_data-0.7.0/juniper_data/storage/constants.py +47 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/kaggle_store.py +2 -1
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/local_fs.py +101 -21
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/memory.py +9 -1
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/postgres_store.py +8 -3
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/redis_store.py +24 -13
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/api/test_batch_operations.py +59 -0
- juniper_data-0.7.0/juniper_data/tests/conftest.py +111 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/fixtures/generate_golden_datasets.py +5 -5
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_api.py +9 -3
- juniper_data-0.7.0/juniper_data/tests/integration/test_dataset_generation_metrics_live.py +192 -0
- juniper_data-0.7.0/juniper_data/tests/integration/test_dataset_post_total_metric.py +288 -0
- juniper_data-0.7.0/juniper_data/tests/integration/test_e2e_synthetic_regression.py +119 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_security_integration.py +9 -3
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_api_app.py +23 -16
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_api_routes.py +24 -3
- juniper_data-0.7.0/juniper_data/tests/unit/test_ar_p_generator.py +113 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_arc_agi_generator.py +59 -2
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_cached_store.py +158 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_dataset_id.py +14 -5
- juniper_data-0.7.0/juniper_data/tests/unit/test_env_file_isolation.py +83 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_equities_generator.py +253 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_equities_seq_generator.py +146 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_health_enhanced.py +314 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_irregular_sine_generator.py +129 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_local_fs_path_traversal.py +105 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_mackey_glass_generator.py +109 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_main.py +3 -2
- juniper_data-0.7.0/juniper_data/tests/unit/test_meta_dispatch.py +155 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_metadata_cache.py +212 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_middleware.py +8 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_moon_generator.py +137 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_multi_sine_generator.py +128 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_observability.py +200 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_phase1d_security.py +584 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_phase_2b_data_integrity.py +303 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_phase_2d_metrics.py +233 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_r2_1_2_wire_compat.py +86 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_record_access_concurrency.py +190 -0
- juniper_data-0.7.0/juniper_data/tests/unit/test_scaling.py +81 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_security.py +79 -1
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_security_boundaries.py +30 -31
- juniper_data-0.7.0/juniper_data/tests/unit/test_sequence_windowing_leakage.py +234 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_split.py +22 -1
- juniper_data-0.7.0/juniper_data/tests/unit/test_synthetic_scaling.py +90 -0
- juniper_data-0.7.0/juniper_data.egg-info/PKG-INFO +243 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/SOURCES.txt +60 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/requires.txt +10 -1
- {juniper_data-0.6.0 → juniper_data-0.7.0}/pyproject.toml +44 -4
- juniper_data-0.6.0/PKG-INFO +0 -263
- juniper_data-0.6.0/README.md +0 -223
- juniper_data-0.6.0/juniper_data/api/models/health.py +0 -26
- juniper_data-0.6.0/juniper_data/api/observability.py +0 -227
- juniper_data-0.6.0/juniper_data/api/routes/generators.py +0 -117
- juniper_data-0.6.0/juniper_data/api/routes/health.py +0 -76
- juniper_data-0.6.0/juniper_data/core/dataset_id.py +0 -38
- juniper_data-0.6.0/juniper_data/generators/checkerboard/params.py +0 -32
- juniper_data-0.6.0/juniper_data/generators/circles/params.py +0 -31
- juniper_data-0.6.0/juniper_data/generators/xor/params.py +0 -30
- juniper_data-0.6.0/juniper_data/tests/conftest.py +0 -68
- juniper_data-0.6.0/juniper_data/tests/unit/test_health_enhanced.py +0 -138
- juniper_data-0.6.0/juniper_data/tests/unit/test_observability.py +0 -352
- juniper_data-0.6.0/juniper_data.egg-info/PKG-INFO +0 -263
- {juniper_data-0.6.0 → juniper_data-0.7.0}/LICENSE +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/models/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/routes/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/artifacts.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/secrets.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/arc_agi/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/checkerboard/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/checkerboard/generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/circles/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/circles/generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/csv_import/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/gaussian/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/gaussian/generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/mnist/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/mnist/generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/xor/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/xor/generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/hf_store.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/api/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_e2e_workflow.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_lifecycle_api.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_storage_workflow.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/performance/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/performance/test_generator_benchmarks.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/performance/test_storage_benchmarks.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/__init__.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_api_settings.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_artifacts.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_checkerboard_generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_circles_generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_csv_import_generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_dataset_versioning.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_gaussian_generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_hf_store.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_init.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_kaggle_store.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_lifecycle.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_mnist_generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_postgres_store.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_redis_store.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_secrets.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_spiral_generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_storage.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_xor_generator.py +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/dependency_links.txt +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/top_level.txt +0 -0
- {juniper_data-0.6.0 → juniper_data-0.7.0}/setup.cfg +0 -0
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: juniper-data
|
|
3
|
+
Version: 0.7.0
|
|
4
|
+
Summary: Dataset generation and management service for the Juniper ecosystem
|
|
5
|
+
Author: Paul Calnon
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: cachetools>=5.3.0
|
|
11
|
+
Requires-Dist: numpy>=1.24.0
|
|
12
|
+
Requires-Dist: pydantic>=2.0.0
|
|
13
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
14
|
+
Provides-Extra: arc-agi
|
|
15
|
+
Requires-Dist: arc-agi>=0.9.0; extra == "arc-agi"
|
|
16
|
+
Provides-Extra: equities
|
|
17
|
+
Requires-Dist: yfinance>=0.2.40; extra == "equities"
|
|
18
|
+
Requires-Dist: pandas>=2.0.0; extra == "equities"
|
|
19
|
+
Provides-Extra: api
|
|
20
|
+
Requires-Dist: fastapi>=0.100.0; extra == "api"
|
|
21
|
+
Requires-Dist: starlette>=1.0.1; extra == "api"
|
|
22
|
+
Requires-Dist: uvicorn[standard]>=0.23.0; extra == "api"
|
|
23
|
+
Requires-Dist: pydantic-settings>=2.0.0; extra == "api"
|
|
24
|
+
Requires-Dist: juniper-observability>=0.4.0; extra == "api"
|
|
25
|
+
Provides-Extra: test
|
|
26
|
+
Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
27
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
|
28
|
+
Requires-Dist: pytest-timeout>=2.2.0; extra == "test"
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
|
|
30
|
+
Requires-Dist: pytest-benchmark>=4.0.0; extra == "test"
|
|
31
|
+
Requires-Dist: hypothesis>=6.0.0; extra == "test"
|
|
32
|
+
Requires-Dist: httpx>=0.24.0; extra == "test"
|
|
33
|
+
Requires-Dist: coverage[toml]>=7.0.0; extra == "test"
|
|
34
|
+
Requires-Dist: juniper-data-client>=0.3.0; extra == "test"
|
|
35
|
+
Requires-Dist: PyYAML>=6.0; extra == "test"
|
|
36
|
+
Provides-Extra: observability
|
|
37
|
+
Requires-Dist: prometheus-client>=0.20.0; extra == "observability"
|
|
38
|
+
Requires-Dist: sentry-sdk[fastapi]>=2.0.0; extra == "observability"
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
41
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
42
|
+
Requires-Dist: bandit[sarif]>=1.9.4; extra == "dev"
|
|
43
|
+
Requires-Dist: pip-audit>=2.7.0; extra == "dev"
|
|
44
|
+
Requires-Dist: pre-commit>=3.0.0; extra == "dev"
|
|
45
|
+
Provides-Extra: all
|
|
46
|
+
Requires-Dist: juniper-data[api,arc-agi,dev,equities,observability,test]; extra == "all"
|
|
47
|
+
Dynamic: license-file
|
|
48
|
+
|
|
49
|
+
<!-- markdownlint-disable MD013 MD033 MD041 -->
|
|
50
|
+
<!--
|
|
51
|
+
MD013 (line-length): README contains prose paragraphs that intentionally
|
|
52
|
+
exceed the 512-char ecosystem limit. Disabled file-wide
|
|
53
|
+
since wrapping mid-sentence harms PyPI rendering.
|
|
54
|
+
MD033 (no-inline-html): The right-aligned logo + spacing rely on HTML.
|
|
55
|
+
MD041 (first-line-heading): The HTML logo is the first line by design.
|
|
56
|
+
-->
|
|
57
|
+
<div align="right" width="150px" height="150px" align="right" valign="top"> <img src="images/Juniper_Logo_150px.png" alt="Juniper" align="right" valign="top" width="150px" /></div>
|
|
58
|
+
<br /> <br /> <br /> <br />
|
|
59
|
+
|
|
60
|
+
# Juniper: Dynamic Neural Network Research Platform
|
|
61
|
+
|
|
62
|
+
Juniper is an AI/ML research platform for investigating dynamic neural network architectures and novel learning paradigms. The project emphasizes ground-up implementations from primary literature, enabling a more transparent exploration of fundamental algorithms.
|
|
63
|
+
|
|
64
|
+
## Juniper Data
|
|
65
|
+
|
|
66
|
+
`juniper-data` is the **dataset-generation service** of the Juniper platform. It is a FastAPI service that produces NPZ-formatted datasets from a catalogue of generators — including the classic two-spiral and concentric-circles problems, XOR and Gaussian mixtures, a CSV/JSON import path, MNIST/Fashion-MNIST, and the ARC-AGI visual-reasoning task families — and serves them through a REST surface that supports a named-version registry, batch creation and export, tag-based filtering, and per-dataset preview. `juniper-data` is the upstream of both `juniper-cascor` (training) and `juniper-canopy` (visualisation): the dataset identifiers it returns are the substrate on which the rest of the platform conducts comparative work.
|
|
67
|
+
|
|
68
|
+
## Distribution
|
|
69
|
+
|
|
70
|
+
`juniper-data` is published on PyPI as **[`juniper-data`](https://pypi.org/project/juniper-data/)**.
|
|
71
|
+
The package is also surfaced through the platform meta-distribution
|
|
72
|
+
**[`juniper-ml`](https://pypi.org/project/juniper-ml/)**, which installs
|
|
73
|
+
the full client stack via `pip install juniper-ml[all]`.
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install juniper-data
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
## Ecosystem Compatibility
|
|
80
|
+
|
|
81
|
+
This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
|
|
82
|
+
Verified compatible versions:
|
|
83
|
+
|
|
84
|
+
| juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
|
|
85
|
+
|--------------|----------------|----------------|-------------|---------------|---------------|
|
|
86
|
+
| 0.6.x | 0.5.x | 0.5.x | >=0.4.1 | >=0.4.0 | >=0.4.0 |
|
|
87
|
+
|
|
88
|
+
For full-stack Docker deployment and integration tests, see [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy).
|
|
89
|
+
|
|
90
|
+
## Architecture
|
|
91
|
+
|
|
92
|
+
`juniper-data` is the **foundational data layer** of the Juniper ecosystem. Both `juniper-cascor` and `juniper-canopy` call `juniper-data` to generate, version, and retrieve datasets.
|
|
93
|
+
|
|
94
|
+
```text
|
|
95
|
+
┌─────────────────────┐ REST+WS ┌──────────────────────┐
|
|
96
|
+
│ juniper-canopy │ ◄──────────────► │ juniper-cascor │
|
|
97
|
+
│ Dashboard │ │ Training Svc │
|
|
98
|
+
│ Port 8050 │ │ Port 8200 │
|
|
99
|
+
└──────────┬──────────┘ └──────────┬───────────┘
|
|
100
|
+
│ REST │ REST
|
|
101
|
+
▼ ▼
|
|
102
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
103
|
+
│ juniper-data ◄── (this service) │
|
|
104
|
+
│ Dataset Service · Port 8100 │
|
|
105
|
+
└──────────────────────────────────────────────────────────────┘
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
**Data contract**: datasets are served as NPZ archives with the keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full`, all of dtype `float32`.
|
|
109
|
+
|
|
110
|
+
## Related Services
|
|
111
|
+
|
|
112
|
+
| Service | Relationship | Notes |
|
|
113
|
+
|---------|-------------|-------|
|
|
114
|
+
| [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes `juniper-data` for training datasets | Set `JUNIPER_DATA_URL` |
|
|
115
|
+
| [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes `juniper-data` for visualisation data | Set `JUNIPER_DATA_URL` |
|
|
116
|
+
| [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | Python HTTP client for this service | `pip install juniper-data-client` |
|
|
117
|
+
|
|
118
|
+
## Service Configuration
|
|
119
|
+
|
|
120
|
+
Configuration is sourced from `juniper_data/api/settings.py` (Pydantic `BaseSettings`, `env_prefix="JUNIPER_DATA_"`). The complete env-var surface is listed below.
|
|
121
|
+
|
|
122
|
+
| Variable | Required | Default | Description |
|
|
123
|
+
|----------|----------|---------|-------------|
|
|
124
|
+
| `JUNIPER_DATA_HOST` | No | `127.0.0.1` | Bind address (override to `0.0.0.0` for Docker) |
|
|
125
|
+
| `JUNIPER_DATA_PORT` | No | `8100` | Service port |
|
|
126
|
+
| `JUNIPER_DATA_STORAGE_PATH` | No | `./data/datasets` | Filesystem path for persisted dataset artifacts |
|
|
127
|
+
| `JUNIPER_DATA_IMPORT_DIR` | No | `/data/imports` | Filesystem path for CSV/JSON imports |
|
|
128
|
+
| `JUNIPER_DATA_LOG_LEVEL` | No | `INFO` | Log verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
|
|
129
|
+
| `JUNIPER_DATA_LOG_FORMAT` | No | `text` | `text` or `json` (structured logging) |
|
|
130
|
+
| `JUNIPER_DATA_CORS_ORIGINS` | No | `[]` | Allowed CORS origins |
|
|
131
|
+
| `JUNIPER_DATA_API_KEYS` | No | `None` | Comma-separated or JSON-array API keys; authentication disabled when unset; Docker-secrets file path supported via the implicit `*_FILE` convention |
|
|
132
|
+
| `JUNIPER_DATA_RATE_LIMIT_ENABLED` | No | `true` | Enforce per-IP request rate limiting |
|
|
133
|
+
| `JUNIPER_DATA_RATE_LIMIT_REQUESTS_PER_MINUTE` | No | `60` | Per-IP rate limit |
|
|
134
|
+
| `JUNIPER_DATA_SENTRY_DSN` | No | `None` | Sentry DSN for error tracking |
|
|
135
|
+
| `JUNIPER_DATA_SENTRY_SEND_PII` | No | `false` | Whether Sentry should send personally identifiable information |
|
|
136
|
+
| `JUNIPER_DATA_SENTRY_TRACES_SAMPLE_RATE` | No | `0.1` | Sentry tracing sample rate |
|
|
137
|
+
| `JUNIPER_DATA_METRICS_ENABLED` | No | `false` | Expose `/metrics` for Prometheus scraping |
|
|
138
|
+
| `JUNIPER_DATA_METRICS_TRUSTED_IPS` | No | `["127.0.0.1", "::1"]` | IPs allowed to scrape `/metrics` |
|
|
139
|
+
|
|
140
|
+
## Docker Deployment
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Full stack (recommended) — see juniper-deploy:
|
|
144
|
+
git clone https://github.com/pcalnon/juniper-deploy.git # (private repository)
|
|
145
|
+
cd juniper-deploy && docker compose up --build
|
|
146
|
+
|
|
147
|
+
# Standalone:
|
|
148
|
+
docker build -t juniper-data:latest .
|
|
149
|
+
docker run --rm -p 8100:8100 -e JUNIPER_DATA_HOST=0.0.0.0 juniper-data:latest
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
The Dockerfile is multi-stage (Python 3.14-slim builder + runtime). Container health is probed against `/v1/health/ready`.
|
|
153
|
+
|
|
154
|
+
## Dependency Lockfile
|
|
155
|
+
|
|
156
|
+
The `requirements.lock` file pins exact dependency versions for reproducible Docker builds. The `pyproject.toml` retains flexible `>=` ranges for local development.
|
|
157
|
+
|
|
158
|
+
Regenerate after changing dependencies in `pyproject.toml`:
|
|
159
|
+
|
|
160
|
+
```bash
|
|
161
|
+
uv pip compile pyproject.toml --extra api --extra observability -o requirements.lock
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
The ecosystem-wide lockfile-freshness gate enforces regeneration on every PR that touches `pyproject.toml`; if regeneration triggers the self-pin trap of `uv pip compile -o requirements.lock` reading the existing file, compile to `/tmp/requirements.lock` and `mv` into place.
|
|
165
|
+
|
|
166
|
+
## Active Research Components
|
|
167
|
+
|
|
168
|
+
`juniper-data` contributes three research components to the Juniper platform: the **ARC-AGI dataset families** (ARC-AGI-1 and ARC-AGI-2), loadable from the Hugging Face Hub or from local copies and exposed through the same NPZ-artifact contract as the simpler generators, which makes them directly usable as the substrate for comparative architecture-growth experiments; the **named-version dataset registry** (`POST /v1/datasets` with a `name` parameter auto-increments `meta.dataset_version`; `GET /v1/datasets/versions` and `/v1/datasets/latest` resolve the history), which gives experiments reproducible dataset references rather than opaque UUIDs; and the **dataset-API surface** itself — preview, filtering by tags, batch operations, and tag-based metadata queries — which together comprise the operational interface through which platform users compose and curate dataset corpora. The implementation of these surfaces is engineering rather than research; the **availability** of curated datasets and stable versioned references is itself the research artifact.
|
|
169
|
+
|
|
170
|
+
## Quick Start Guide
|
|
171
|
+
|
|
172
|
+
### Prerequisites
|
|
173
|
+
|
|
174
|
+
- Python ≥ 3.12 (Docker image uses 3.14)
|
|
175
|
+
- Conda environment `JuniperData`
|
|
176
|
+
- For ARC-AGI loading from the Hub: internet access at first load; subsequent loads are cached
|
|
177
|
+
|
|
178
|
+
### Installation
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
git clone https://github.com/pcalnon/juniper-data.git
|
|
182
|
+
cd juniper-data
|
|
183
|
+
conda activate JuniperData
|
|
184
|
+
pip install -e ".[all]"
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
The PyPI release is installable via `pip install juniper-data`; the editable-clone form above is the standard for active development. The optional-dependency extras are `api`, `arc-agi`, `observability`, `test`, `dev`, and `all`.
|
|
188
|
+
|
|
189
|
+
### Verification
|
|
190
|
+
|
|
191
|
+
Start the service:
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
uvicorn --factory juniper_data.api.app:get_app --reload
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
Confirm the service responds:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
curl http://localhost:8100/v1/health
|
|
201
|
+
curl http://localhost:8100/v1/health/ready
|
|
202
|
+
curl http://localhost:8100/v1/generators
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
Generate a small dataset directly from Python:
|
|
206
|
+
|
|
207
|
+
```python
|
|
208
|
+
from juniper_data.generators.spiral import SpiralGenerator
|
|
209
|
+
|
|
210
|
+
generator = SpiralGenerator()
|
|
211
|
+
dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### Next Steps
|
|
215
|
+
|
|
216
|
+
- [`docs/QUICK_START.md`](docs/QUICK_START.md) — complete installation guide
|
|
217
|
+
- [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) — comprehensive usage guide
|
|
218
|
+
- [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) — full REST endpoint reference (filtering, batch operations, tagging, versioning)
|
|
219
|
+
- [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy) — Docker Compose orchestration for the full-stack platform
|
|
220
|
+
- [`juniper-ml`](https://pypi.org/project/juniper-ml/) — platform meta-package on PyPI
|
|
221
|
+
|
|
222
|
+
## Research Philosophy
|
|
223
|
+
|
|
224
|
+
The Juniper platform exists to study learning algorithms whose network architecture is not fixed in advance. Its initial anchor is the Cascade-Correlation algorithm of Fahlman and Lebiere (1990), implemented from the primary literature without recourse to higher-level abstractions that elide the algorithm's operational detail. The organising commitment is that algorithm implementations remain inspectable at the level at which they were originally specified: candidate units, correlation objectives, weight-freezing semantics, and the structural events that grow the network are first-class artifacts of the codebase rather than internal details of a library wrapper. This permits comparative work — across algorithms, datasets, and hyperparameter regimes — to be conducted on a known and reproducible substrate.
|
|
225
|
+
|
|
226
|
+
The current platform comprises a Cascade-Correlation training service exposing a REST and WebSocket interface, a dataset-generation service with a named-version registry that includes the ARC-AGI families, a real-time monitoring dashboard for inspecting training dynamics as they occur, and a distributed worker that parallelises candidate-unit training across hosts. Near-term work extends the architectural-growth catalogue beyond Cascade-Correlation, introduces multi-network orchestration for comparative experiments at the level of network populations rather than individual runs, and tightens the dataset–training–monitoring loop into a reproducible research workbench. The longer-term direction is the systematic empirical study of constructive and architecture-growing learning algorithms, with first-class infrastructure for the ablation, comparison, and replication that such a study requires.
|
|
227
|
+
|
|
228
|
+
## Documentation
|
|
229
|
+
|
|
230
|
+
| Document | Purpose |
|
|
231
|
+
|----------|---------|
|
|
232
|
+
| [`docs/DOCUMENTATION_OVERVIEW.md`](docs/DOCUMENTATION_OVERVIEW.md) | Navigation index for all `juniper-data` documentation |
|
|
233
|
+
| [`docs/QUICK_START.md`](docs/QUICK_START.md) | Get running in five minutes |
|
|
234
|
+
| [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) | Comprehensive usage guide |
|
|
235
|
+
| [`docs/REFERENCE.md`](docs/REFERENCE.md) | Configuration, environment variables, and operational reference |
|
|
236
|
+
| [`docs/ENVIRONMENT_SETUP.md`](docs/ENVIRONMENT_SETUP.md) | Conda environment and editable-install setup |
|
|
237
|
+
| [`docs/DEVELOPER_CHEATSHEET.md`](docs/DEVELOPER_CHEATSHEET.md) | Quick-reference card for development tasks |
|
|
238
|
+
| [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) | Complete REST endpoint reference |
|
|
239
|
+
| [`CHANGELOG.md`](CHANGELOG.md) | Version history |
|
|
240
|
+
|
|
241
|
+
## License
|
|
242
|
+
|
|
243
|
+
MIT License — Copyright (c) 2024-2026 Paul Calnon
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
<!-- markdownlint-disable MD013 MD033 MD041 -->
|
|
2
|
+
<!--
|
|
3
|
+
MD013 (line-length): README contains prose paragraphs that intentionally
|
|
4
|
+
exceed the 512-char ecosystem limit. Disabled file-wide
|
|
5
|
+
since wrapping mid-sentence harms PyPI rendering.
|
|
6
|
+
MD033 (no-inline-html): The right-aligned logo + spacing rely on HTML.
|
|
7
|
+
MD041 (first-line-heading): The HTML logo is the first line by design.
|
|
8
|
+
-->
|
|
9
|
+
<div align="right" width="150px" height="150px" align="right" valign="top"> <img src="images/Juniper_Logo_150px.png" alt="Juniper" align="right" valign="top" width="150px" /></div>
|
|
10
|
+
<br /> <br /> <br /> <br />
|
|
11
|
+
|
|
12
|
+
# Juniper: Dynamic Neural Network Research Platform
|
|
13
|
+
|
|
14
|
+
Juniper is an AI/ML research platform for investigating dynamic neural network architectures and novel learning paradigms. The project emphasizes ground-up implementations from primary literature, enabling a more transparent exploration of fundamental algorithms.
|
|
15
|
+
|
|
16
|
+
## Juniper Data
|
|
17
|
+
|
|
18
|
+
`juniper-data` is the **dataset-generation service** of the Juniper platform. It is a FastAPI service that produces NPZ-formatted datasets from a catalogue of generators — including the classic two-spiral and concentric-circles problems, XOR and Gaussian mixtures, a CSV/JSON import path, MNIST/Fashion-MNIST, and the ARC-AGI visual-reasoning task families — and serves them through a REST surface that supports a named-version registry, batch creation and export, tag-based filtering, and per-dataset preview. `juniper-data` is the upstream of both `juniper-cascor` (training) and `juniper-canopy` (visualisation): the dataset identifiers it returns are the substrate on which the rest of the platform conducts comparative work.
|
|
19
|
+
|
|
20
|
+
## Distribution
|
|
21
|
+
|
|
22
|
+
`juniper-data` is published on PyPI as **[`juniper-data`](https://pypi.org/project/juniper-data/)**.
|
|
23
|
+
The package is also surfaced through the platform meta-distribution
|
|
24
|
+
**[`juniper-ml`](https://pypi.org/project/juniper-ml/)**, which installs
|
|
25
|
+
the full client stack via `pip install juniper-ml[all]`.
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install juniper-data
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Ecosystem Compatibility
|
|
32
|
+
|
|
33
|
+
This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
|
|
34
|
+
Verified compatible versions:
|
|
35
|
+
|
|
36
|
+
| juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
|
|
37
|
+
|--------------|----------------|----------------|-------------|---------------|---------------|
|
|
38
|
+
| 0.6.x | 0.5.x | 0.5.x | >=0.4.1 | >=0.4.0 | >=0.4.0 |
|
|
39
|
+
|
|
40
|
+
For full-stack Docker deployment and integration tests, see [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy).
|
|
41
|
+
|
|
42
|
+
## Architecture
|
|
43
|
+
|
|
44
|
+
`juniper-data` is the **foundational data layer** of the Juniper ecosystem. Both `juniper-cascor` and `juniper-canopy` call `juniper-data` to generate, version, and retrieve datasets.
|
|
45
|
+
|
|
46
|
+
```text
|
|
47
|
+
┌─────────────────────┐ REST+WS ┌──────────────────────┐
|
|
48
|
+
│ juniper-canopy │ ◄──────────────► │ juniper-cascor │
|
|
49
|
+
│ Dashboard │ │ Training Svc │
|
|
50
|
+
│ Port 8050 │ │ Port 8200 │
|
|
51
|
+
└──────────┬──────────┘ └──────────┬───────────┘
|
|
52
|
+
│ REST │ REST
|
|
53
|
+
▼ ▼
|
|
54
|
+
┌──────────────────────────────────────────────────────────────┐
|
|
55
|
+
│ juniper-data ◄── (this service) │
|
|
56
|
+
│ Dataset Service · Port 8100 │
|
|
57
|
+
└──────────────────────────────────────────────────────────────┘
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Data contract**: datasets are served as NPZ archives with the keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full`, all of dtype `float32`.
|
|
61
|
+
|
|
62
|
+
## Related Services
|
|
63
|
+
|
|
64
|
+
| Service | Relationship | Notes |
|
|
65
|
+
|---------|-------------|-------|
|
|
66
|
+
| [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes `juniper-data` for training datasets | Set `JUNIPER_DATA_URL` |
|
|
67
|
+
| [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes `juniper-data` for visualisation data | Set `JUNIPER_DATA_URL` |
|
|
68
|
+
| [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | Python HTTP client for this service | `pip install juniper-data-client` |
|
|
69
|
+
|
|
70
|
+
## Service Configuration
|
|
71
|
+
|
|
72
|
+
Configuration is sourced from `juniper_data/api/settings.py` (Pydantic `BaseSettings`, `env_prefix="JUNIPER_DATA_"`). The complete env-var surface is listed below.
|
|
73
|
+
|
|
74
|
+
| Variable | Required | Default | Description |
|
|
75
|
+
|----------|----------|---------|-------------|
|
|
76
|
+
| `JUNIPER_DATA_HOST` | No | `127.0.0.1` | Bind address (override to `0.0.0.0` for Docker) |
|
|
77
|
+
| `JUNIPER_DATA_PORT` | No | `8100` | Service port |
|
|
78
|
+
| `JUNIPER_DATA_STORAGE_PATH` | No | `./data/datasets` | Filesystem path for persisted dataset artifacts |
|
|
79
|
+
| `JUNIPER_DATA_IMPORT_DIR` | No | `/data/imports` | Filesystem path for CSV/JSON imports |
|
|
80
|
+
| `JUNIPER_DATA_LOG_LEVEL` | No | `INFO` | Log verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
|
|
81
|
+
| `JUNIPER_DATA_LOG_FORMAT` | No | `text` | `text` or `json` (structured logging) |
|
|
82
|
+
| `JUNIPER_DATA_CORS_ORIGINS` | No | `[]` | Allowed CORS origins |
|
|
83
|
+
| `JUNIPER_DATA_API_KEYS` | No | `None` | Comma-separated or JSON-array API keys; authentication disabled when unset; Docker-secrets file path supported via the implicit `*_FILE` convention |
|
|
84
|
+
| `JUNIPER_DATA_RATE_LIMIT_ENABLED` | No | `true` | Enforce per-IP request rate limiting |
|
|
85
|
+
| `JUNIPER_DATA_RATE_LIMIT_REQUESTS_PER_MINUTE` | No | `60` | Per-IP rate limit |
|
|
86
|
+
| `JUNIPER_DATA_SENTRY_DSN` | No | `None` | Sentry DSN for error tracking |
|
|
87
|
+
| `JUNIPER_DATA_SENTRY_SEND_PII` | No | `false` | Whether Sentry should send personally identifiable information |
|
|
88
|
+
| `JUNIPER_DATA_SENTRY_TRACES_SAMPLE_RATE` | No | `0.1` | Sentry tracing sample rate |
|
|
89
|
+
| `JUNIPER_DATA_METRICS_ENABLED` | No | `false` | Expose `/metrics` for Prometheus scraping |
|
|
90
|
+
| `JUNIPER_DATA_METRICS_TRUSTED_IPS` | No | `["127.0.0.1", "::1"]` | IPs allowed to scrape `/metrics` |
|
|
91
|
+
|
|
92
|
+
## Docker Deployment
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
# Full stack (recommended) — see juniper-deploy:
|
|
96
|
+
git clone https://github.com/pcalnon/juniper-deploy.git # (private repository)
|
|
97
|
+
cd juniper-deploy && docker compose up --build
|
|
98
|
+
|
|
99
|
+
# Standalone:
|
|
100
|
+
docker build -t juniper-data:latest .
|
|
101
|
+
docker run --rm -p 8100:8100 -e JUNIPER_DATA_HOST=0.0.0.0 juniper-data:latest
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The Dockerfile is multi-stage (Python 3.14-slim builder + runtime). Container health is probed against `/v1/health/ready`.
|
|
105
|
+
|
|
106
|
+
## Dependency Lockfile
|
|
107
|
+
|
|
108
|
+
The `requirements.lock` file pins exact dependency versions for reproducible Docker builds. The `pyproject.toml` retains flexible `>=` ranges for local development.
|
|
109
|
+
|
|
110
|
+
Regenerate after changing dependencies in `pyproject.toml`:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
uv pip compile pyproject.toml --extra api --extra observability -o requirements.lock
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
The ecosystem-wide lockfile-freshness gate enforces regeneration on every PR that touches `pyproject.toml`; if regeneration triggers the self-pin trap of `uv pip compile -o requirements.lock` reading the existing file, compile to `/tmp/requirements.lock` and `mv` into place.
|
|
117
|
+
|
|
118
|
+
## Active Research Components
|
|
119
|
+
|
|
120
|
+
`juniper-data` contributes three research components to the Juniper platform: the **ARC-AGI dataset families** (ARC-AGI-1 and ARC-AGI-2), loadable from the Hugging Face Hub or from local copies and exposed through the same NPZ-artifact contract as the simpler generators, which makes them directly usable as the substrate for comparative architecture-growth experiments; the **named-version dataset registry** (`POST /v1/datasets` with a `name` parameter auto-increments `meta.dataset_version`; `GET /v1/datasets/versions` and `/v1/datasets/latest` resolve the history), which gives experiments reproducible dataset references rather than opaque UUIDs; and the **dataset-API surface** itself — preview, filtering by tags, batch operations, and tag-based metadata queries — which together comprise the operational interface through which platform users compose and curate dataset corpora. The implementation of these surfaces is engineering rather than research; the **availability** of curated datasets and stable versioned references is itself the research artifact.
|
|
121
|
+
|
|
122
|
+
## Quick Start Guide
|
|
123
|
+
|
|
124
|
+
### Prerequisites
|
|
125
|
+
|
|
126
|
+
- Python ≥ 3.12 (Docker image uses 3.14)
|
|
127
|
+
- Conda environment `JuniperData`
|
|
128
|
+
- For ARC-AGI loading from the Hub: internet access at first load; subsequent loads are cached
|
|
129
|
+
|
|
130
|
+
### Installation
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
git clone https://github.com/pcalnon/juniper-data.git
|
|
134
|
+
cd juniper-data
|
|
135
|
+
conda activate JuniperData
|
|
136
|
+
pip install -e ".[all]"
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
The PyPI release is installable via `pip install juniper-data`; the editable-clone form above is the standard for active development. The optional-dependency extras are `api`, `arc-agi`, `observability`, `test`, `dev`, and `all`.
|
|
140
|
+
|
|
141
|
+
### Verification
|
|
142
|
+
|
|
143
|
+
Start the service:
|
|
144
|
+
|
|
145
|
+
```bash
|
|
146
|
+
uvicorn --factory juniper_data.api.app:get_app --reload
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Confirm the service responds:
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
curl http://localhost:8100/v1/health
|
|
153
|
+
curl http://localhost:8100/v1/health/ready
|
|
154
|
+
curl http://localhost:8100/v1/generators
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Generate a small dataset directly from Python:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from juniper_data.generators.spiral import SpiralGenerator
|
|
161
|
+
|
|
162
|
+
generator = SpiralGenerator()
|
|
163
|
+
dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Next Steps
|
|
167
|
+
|
|
168
|
+
- [`docs/QUICK_START.md`](docs/QUICK_START.md) — complete installation guide
|
|
169
|
+
- [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) — comprehensive usage guide
|
|
170
|
+
- [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) — full REST endpoint reference (filtering, batch operations, tagging, versioning)
|
|
171
|
+
- [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy) — Docker Compose orchestration for the full-stack platform
|
|
172
|
+
- [`juniper-ml`](https://pypi.org/project/juniper-ml/) — platform meta-package on PyPI
|
|
173
|
+
|
|
174
|
+
## Research Philosophy
|
|
175
|
+
|
|
176
|
+
The Juniper platform exists to study learning algorithms whose network architecture is not fixed in advance. Its initial anchor is the Cascade-Correlation algorithm of Fahlman and Lebiere (1990), implemented from the primary literature without recourse to higher-level abstractions that elide the algorithm's operational detail. The organising commitment is that algorithm implementations remain inspectable at the level at which they were originally specified: candidate units, correlation objectives, weight-freezing semantics, and the structural events that grow the network are first-class artifacts of the codebase rather than internal details of a library wrapper. This permits comparative work — across algorithms, datasets, and hyperparameter regimes — to be conducted on a known and reproducible substrate.
|
|
177
|
+
|
|
178
|
+
The current platform comprises a Cascade-Correlation training service exposing a REST and WebSocket interface, a dataset-generation service with a named-version registry that includes the ARC-AGI families, a real-time monitoring dashboard for inspecting training dynamics as they occur, and a distributed worker that parallelises candidate-unit training across hosts. Near-term work extends the architectural-growth catalogue beyond Cascade-Correlation, introduces multi-network orchestration for comparative experiments at the level of network populations rather than individual runs, and tightens the dataset–training–monitoring loop into a reproducible research workbench. The longer-term direction is the systematic empirical study of constructive and architecture-growing learning algorithms, with first-class infrastructure for the ablation, comparison, and replication that such a study requires.
|
|
179
|
+
|
|
180
|
+
## Documentation
|
|
181
|
+
|
|
182
|
+
| Document | Purpose |
|
|
183
|
+
|----------|---------|
|
|
184
|
+
| [`docs/DOCUMENTATION_OVERVIEW.md`](docs/DOCUMENTATION_OVERVIEW.md) | Navigation index for all `juniper-data` documentation |
|
|
185
|
+
| [`docs/QUICK_START.md`](docs/QUICK_START.md) | Get running in five minutes |
|
|
186
|
+
| [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) | Comprehensive usage guide |
|
|
187
|
+
| [`docs/REFERENCE.md`](docs/REFERENCE.md) | Configuration, environment variables, and operational reference |
|
|
188
|
+
| [`docs/ENVIRONMENT_SETUP.md`](docs/ENVIRONMENT_SETUP.md) | Conda environment and editable-install setup |
|
|
189
|
+
| [`docs/DEVELOPER_CHEATSHEET.md`](docs/DEVELOPER_CHEATSHEET.md) | Quick-reference card for development tasks |
|
|
190
|
+
| [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) | Complete REST endpoint reference |
|
|
191
|
+
| [`CHANGELOG.md`](CHANGELOG.md) | Version history |
|
|
192
|
+
|
|
193
|
+
## License
|
|
194
|
+
|
|
195
|
+
MIT License — Copyright (c) 2024-2026 Paul Calnon
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Juniper Data - Dataset generation and management service for the Juniper ecosystem.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
+
import importlib.metadata
|
|
5
6
|
import os
|
|
6
7
|
|
|
7
8
|
from dotenv import load_dotenv
|
|
@@ -14,7 +15,14 @@ except ImportError:
|
|
|
14
15
|
ARC_AGI_AVAILABLE = False
|
|
15
16
|
arc_agi = None # type: ignore[assignment]
|
|
16
17
|
|
|
17
|
-
|
|
18
|
+
# Single source of truth: the installed distribution's metadata (OQ-1 of the
|
|
19
|
+
# build-provenance effort — juniper-ml notes/BUILD_PROVENANCE_DESIGN_2026-06-14.md).
|
|
20
|
+
# Falls back to the literal only in a bare source checkout where the package is
|
|
21
|
+
# not installed, so this constant can no longer drift from pyproject's version.
|
|
22
|
+
try:
|
|
23
|
+
__version__ = importlib.metadata.version("juniper-data")
|
|
24
|
+
except importlib.metadata.PackageNotFoundError: # pragma: no cover - source checkout
|
|
25
|
+
__version__ = "0.7.0"
|
|
18
26
|
__author__ = "Paul Calnon"
|
|
19
27
|
|
|
20
28
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
"""FastAPI application factory and configuration."""
|
|
2
2
|
|
|
3
|
+
import functools
|
|
3
4
|
import logging
|
|
4
5
|
from collections.abc import AsyncGenerator
|
|
5
6
|
from contextlib import asynccontextmanager
|
|
@@ -8,12 +9,14 @@ from pathlib import Path
|
|
|
8
9
|
from fastapi import FastAPI, Request
|
|
9
10
|
from fastapi.middleware.cors import CORSMiddleware
|
|
10
11
|
from fastapi.responses import JSONResponse
|
|
12
|
+
from starlette import status
|
|
11
13
|
|
|
12
|
-
from juniper_data import __version__
|
|
14
|
+
from juniper_data import __version__, provenance
|
|
13
15
|
from juniper_data.storage import LocalFSDatasetStore
|
|
14
16
|
|
|
15
17
|
from .middleware import RequestBodyLimitMiddleware, SecurityHeadersMiddleware, SecurityMiddleware
|
|
16
18
|
from .observability import (
|
|
19
|
+
MetricsAuthMiddleware,
|
|
17
20
|
PrometheusMiddleware,
|
|
18
21
|
RequestIdMiddleware,
|
|
19
22
|
configure_logging,
|
|
@@ -37,11 +40,17 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
|
|
|
37
40
|
configure_logging(settings.log_level, settings.log_format, "juniper-data")
|
|
38
41
|
configure_sentry(settings.sentry_dsn, "juniper-data", __version__, send_pii=settings.sentry_send_pii, traces_sample_rate=settings.sentry_traces_sample_rate)
|
|
39
42
|
if settings.metrics_enabled:
|
|
40
|
-
set_build_info("juniper_data", __version__)
|
|
43
|
+
set_build_info("juniper_data", __version__, git_sha=provenance.git_sha(), build_date=provenance.build_date())
|
|
41
44
|
|
|
42
45
|
logger = logging.getLogger("juniper_data")
|
|
43
46
|
logger.info(f"JuniperData API v{__version__} starting")
|
|
44
|
-
|
|
47
|
+
# ``Path.absolute()`` is pure path manipulation (no I/O); the
|
|
48
|
+
# ASYNC240 rule is over-conservative here and flags every
|
|
49
|
+
# ``pathlib.Path`` method without distinguishing stat-bound ones
|
|
50
|
+
# from text-only ones. Lifespan startup is also a one-shot
|
|
51
|
+
# event, not a request handler — even if there were I/O it
|
|
52
|
+
# wouldn't block per-request latency.
|
|
53
|
+
logger.info(f"Storage path: {storage_path.absolute()}") # noqa: ASYNC240
|
|
45
54
|
|
|
46
55
|
yield
|
|
47
56
|
|
|
@@ -115,15 +124,19 @@ def create_app(settings: Settings | None = None) -> FastAPI:
|
|
|
115
124
|
app.include_router(generators.router, prefix="/v1")
|
|
116
125
|
app.include_router(datasets.router, prefix="/v1")
|
|
117
126
|
|
|
118
|
-
# Mount Prometheus metrics endpoint
|
|
127
|
+
# Mount Prometheus metrics endpoint (SEC-16: wrap with trusted-IP
|
|
128
|
+
# auth because ASGI sub-app mounts bypass SecurityMiddleware).
|
|
119
129
|
if settings.metrics_enabled:
|
|
120
|
-
app.mount(
|
|
130
|
+
app.mount(
|
|
131
|
+
"/metrics",
|
|
132
|
+
MetricsAuthMiddleware(get_prometheus_app(), settings.metrics_trusted_ips),
|
|
133
|
+
)
|
|
121
134
|
|
|
122
135
|
@app.exception_handler(ValueError)
|
|
123
136
|
async def value_error_handler(request: Request, exc: ValueError) -> JSONResponse:
|
|
124
137
|
logging.getLogger("juniper_data").debug("Validation error: %s", exc)
|
|
125
138
|
return JSONResponse(
|
|
126
|
-
status_code=
|
|
139
|
+
status_code=status.HTTP_400_BAD_REQUEST,
|
|
127
140
|
content={"detail": "Invalid request parameters"},
|
|
128
141
|
)
|
|
129
142
|
|
|
@@ -131,11 +144,31 @@ def create_app(settings: Settings | None = None) -> FastAPI:
|
|
|
131
144
|
async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
|
132
145
|
logging.getLogger("juniper_data").exception("Unhandled exception")
|
|
133
146
|
return JSONResponse(
|
|
134
|
-
status_code=
|
|
147
|
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
135
148
|
content={"detail": "Internal server error"},
|
|
136
149
|
)
|
|
137
150
|
|
|
138
151
|
return app
|
|
139
152
|
|
|
140
153
|
|
|
141
|
-
|
|
154
|
+
@functools.lru_cache(maxsize=1)
|
|
155
|
+
def get_app() -> FastAPI:
|
|
156
|
+
"""Return the singleton FastAPI app instance (lazy factory).
|
|
157
|
+
|
|
158
|
+
Use with uvicorn's factory mode::
|
|
159
|
+
|
|
160
|
+
uvicorn --factory juniper_data.api.app:get_app
|
|
161
|
+
|
|
162
|
+
or programmatically::
|
|
163
|
+
|
|
164
|
+
uvicorn.run("juniper_data.api.app:get_app", factory=True)
|
|
165
|
+
|
|
166
|
+
The first call builds the app via :func:`create_app` with default
|
|
167
|
+
settings; subsequent calls return the same instance from
|
|
168
|
+
``functools.lru_cache``. Replaces the previous module-level
|
|
169
|
+
``app = create_app()`` (CLN-JD-03), which read environment variables
|
|
170
|
+
and registered middleware at import time. Tests that need a fresh
|
|
171
|
+
instance with overridden settings should continue to call
|
|
172
|
+
:func:`create_app` directly with explicit ``Settings``.
|
|
173
|
+
"""
|
|
174
|
+
return create_app()
|