juniper-data 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. juniper_data-0.7.0/PKG-INFO +243 -0
  2. juniper_data-0.7.0/README.md +195 -0
  3. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/__init__.py +9 -1
  4. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/__main__.py +2 -1
  5. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/app.py +41 -8
  6. juniper_data-0.7.0/juniper_data/api/constants.py +117 -0
  7. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/middleware.py +43 -25
  8. juniper_data-0.7.0/juniper_data/api/models/health.py +21 -0
  9. juniper_data-0.7.0/juniper_data/api/observability.py +187 -0
  10. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/routes/datasets.py +200 -62
  11. juniper_data-0.7.0/juniper_data/api/routes/generators.py +206 -0
  12. juniper_data-0.7.0/juniper_data/api/routes/health.py +240 -0
  13. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/security.py +86 -14
  14. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/settings.py +36 -0
  15. juniper_data-0.7.0/juniper_data/core/constants.py +41 -0
  16. juniper_data-0.7.0/juniper_data/core/dataset_id.py +61 -0
  17. juniper_data-0.7.0/juniper_data/core/meta.py +145 -0
  18. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/models.py +46 -12
  19. juniper_data-0.7.0/juniper_data/core/scaling.py +92 -0
  20. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/split.py +32 -0
  21. juniper_data-0.7.0/juniper_data/generators/_sequence.py +349 -0
  22. juniper_data-0.7.0/juniper_data/generators/_synthetic.py +115 -0
  23. juniper_data-0.7.0/juniper_data/generators/ar_p/__init__.py +11 -0
  24. juniper_data-0.7.0/juniper_data/generators/ar_p/generator.py +73 -0
  25. juniper_data-0.7.0/juniper_data/generators/ar_p/params.py +38 -0
  26. juniper_data-0.7.0/juniper_data/generators/arc_agi/defaults.py +29 -0
  27. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/arc_agi/generator.py +23 -3
  28. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/arc_agi/params.py +27 -12
  29. juniper_data-0.7.0/juniper_data/generators/checkerboard/defaults.py +24 -0
  30. juniper_data-0.7.0/juniper_data/generators/checkerboard/params.py +46 -0
  31. juniper_data-0.7.0/juniper_data/generators/circles/defaults.py +27 -0
  32. juniper_data-0.7.0/juniper_data/generators/circles/params.py +43 -0
  33. juniper_data-0.7.0/juniper_data/generators/csv_import/defaults.py +20 -0
  34. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/csv_import/generator.py +3 -2
  35. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/csv_import/params.py +19 -8
  36. juniper_data-0.7.0/juniper_data/generators/equities/__init__.py +19 -0
  37. juniper_data-0.7.0/juniper_data/generators/equities/defaults.py +61 -0
  38. juniper_data-0.7.0/juniper_data/generators/equities/generator.py +448 -0
  39. juniper_data-0.7.0/juniper_data/generators/equities/params.py +123 -0
  40. juniper_data-0.7.0/juniper_data/generators/equities_seq/__init__.py +19 -0
  41. juniper_data-0.7.0/juniper_data/generators/equities_seq/generator.py +174 -0
  42. juniper_data-0.7.0/juniper_data/generators/equities_seq/params.py +34 -0
  43. juniper_data-0.7.0/juniper_data/generators/gaussian/defaults.py +25 -0
  44. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/gaussian/params.py +24 -8
  45. juniper_data-0.7.0/juniper_data/generators/irregular_sine/__init__.py +11 -0
  46. juniper_data-0.7.0/juniper_data/generators/irregular_sine/generator.py +90 -0
  47. juniper_data-0.7.0/juniper_data/generators/irregular_sine/params.py +44 -0
  48. juniper_data-0.7.0/juniper_data/generators/mackey_glass/__init__.py +11 -0
  49. juniper_data-0.7.0/juniper_data/generators/mackey_glass/generator.py +80 -0
  50. juniper_data-0.7.0/juniper_data/generators/mackey_glass/params.py +34 -0
  51. juniper_data-0.7.0/juniper_data/generators/mnist/defaults.py +24 -0
  52. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/mnist/params.py +15 -6
  53. juniper_data-0.7.0/juniper_data/generators/moon/__init__.py +17 -0
  54. juniper_data-0.7.0/juniper_data/generators/moon/defaults.py +19 -0
  55. juniper_data-0.7.0/juniper_data/generators/moon/generator.py +107 -0
  56. juniper_data-0.7.0/juniper_data/generators/moon/params.py +35 -0
  57. juniper_data-0.7.0/juniper_data/generators/multi_sine/__init__.py +11 -0
  58. juniper_data-0.7.0/juniper_data/generators/multi_sine/generator.py +84 -0
  59. juniper_data-0.7.0/juniper_data/generators/multi_sine/params.py +41 -0
  60. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/defaults.py +1 -1
  61. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/params.py +2 -2
  62. juniper_data-0.7.0/juniper_data/generators/xor/defaults.py +25 -0
  63. juniper_data-0.7.0/juniper_data/generators/xor/params.py +43 -0
  64. juniper_data-0.7.0/juniper_data/provenance.py +33 -0
  65. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/base.py +106 -10
  66. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/cached.py +49 -2
  67. juniper_data-0.7.0/juniper_data/storage/constants.py +47 -0
  68. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/kaggle_store.py +2 -1
  69. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/local_fs.py +101 -21
  70. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/memory.py +9 -1
  71. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/postgres_store.py +8 -3
  72. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/redis_store.py +24 -13
  73. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/api/test_batch_operations.py +59 -0
  74. juniper_data-0.7.0/juniper_data/tests/conftest.py +111 -0
  75. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/fixtures/generate_golden_datasets.py +5 -5
  76. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_api.py +9 -3
  77. juniper_data-0.7.0/juniper_data/tests/integration/test_dataset_generation_metrics_live.py +192 -0
  78. juniper_data-0.7.0/juniper_data/tests/integration/test_dataset_post_total_metric.py +288 -0
  79. juniper_data-0.7.0/juniper_data/tests/integration/test_e2e_synthetic_regression.py +119 -0
  80. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_security_integration.py +9 -3
  81. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_api_app.py +23 -16
  82. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_api_routes.py +24 -3
  83. juniper_data-0.7.0/juniper_data/tests/unit/test_ar_p_generator.py +113 -0
  84. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_arc_agi_generator.py +59 -2
  85. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_cached_store.py +158 -0
  86. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_dataset_id.py +14 -5
  87. juniper_data-0.7.0/juniper_data/tests/unit/test_env_file_isolation.py +83 -0
  88. juniper_data-0.7.0/juniper_data/tests/unit/test_equities_generator.py +253 -0
  89. juniper_data-0.7.0/juniper_data/tests/unit/test_equities_seq_generator.py +146 -0
  90. juniper_data-0.7.0/juniper_data/tests/unit/test_health_enhanced.py +314 -0
  91. juniper_data-0.7.0/juniper_data/tests/unit/test_irregular_sine_generator.py +129 -0
  92. juniper_data-0.7.0/juniper_data/tests/unit/test_local_fs_path_traversal.py +105 -0
  93. juniper_data-0.7.0/juniper_data/tests/unit/test_mackey_glass_generator.py +109 -0
  94. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_main.py +3 -2
  95. juniper_data-0.7.0/juniper_data/tests/unit/test_meta_dispatch.py +155 -0
  96. juniper_data-0.7.0/juniper_data/tests/unit/test_metadata_cache.py +212 -0
  97. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_middleware.py +8 -0
  98. juniper_data-0.7.0/juniper_data/tests/unit/test_moon_generator.py +137 -0
  99. juniper_data-0.7.0/juniper_data/tests/unit/test_multi_sine_generator.py +128 -0
  100. juniper_data-0.7.0/juniper_data/tests/unit/test_observability.py +200 -0
  101. juniper_data-0.7.0/juniper_data/tests/unit/test_phase1d_security.py +584 -0
  102. juniper_data-0.7.0/juniper_data/tests/unit/test_phase_2b_data_integrity.py +303 -0
  103. juniper_data-0.7.0/juniper_data/tests/unit/test_phase_2d_metrics.py +233 -0
  104. juniper_data-0.7.0/juniper_data/tests/unit/test_r2_1_2_wire_compat.py +86 -0
  105. juniper_data-0.7.0/juniper_data/tests/unit/test_record_access_concurrency.py +190 -0
  106. juniper_data-0.7.0/juniper_data/tests/unit/test_scaling.py +81 -0
  107. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_security.py +79 -1
  108. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_security_boundaries.py +30 -31
  109. juniper_data-0.7.0/juniper_data/tests/unit/test_sequence_windowing_leakage.py +234 -0
  110. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_split.py +22 -1
  111. juniper_data-0.7.0/juniper_data/tests/unit/test_synthetic_scaling.py +90 -0
  112. juniper_data-0.7.0/juniper_data.egg-info/PKG-INFO +243 -0
  113. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/SOURCES.txt +60 -0
  114. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/requires.txt +10 -1
  115. {juniper_data-0.6.0 → juniper_data-0.7.0}/pyproject.toml +44 -4
  116. juniper_data-0.6.0/PKG-INFO +0 -263
  117. juniper_data-0.6.0/README.md +0 -223
  118. juniper_data-0.6.0/juniper_data/api/models/health.py +0 -26
  119. juniper_data-0.6.0/juniper_data/api/observability.py +0 -227
  120. juniper_data-0.6.0/juniper_data/api/routes/generators.py +0 -117
  121. juniper_data-0.6.0/juniper_data/api/routes/health.py +0 -76
  122. juniper_data-0.6.0/juniper_data/core/dataset_id.py +0 -38
  123. juniper_data-0.6.0/juniper_data/generators/checkerboard/params.py +0 -32
  124. juniper_data-0.6.0/juniper_data/generators/circles/params.py +0 -31
  125. juniper_data-0.6.0/juniper_data/generators/xor/params.py +0 -30
  126. juniper_data-0.6.0/juniper_data/tests/conftest.py +0 -68
  127. juniper_data-0.6.0/juniper_data/tests/unit/test_health_enhanced.py +0 -138
  128. juniper_data-0.6.0/juniper_data/tests/unit/test_observability.py +0 -352
  129. juniper_data-0.6.0/juniper_data.egg-info/PKG-INFO +0 -263
  130. {juniper_data-0.6.0 → juniper_data-0.7.0}/LICENSE +0 -0
  131. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/__init__.py +0 -0
  132. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/models/__init__.py +0 -0
  133. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/api/routes/__init__.py +0 -0
  134. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/__init__.py +0 -0
  135. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/artifacts.py +0 -0
  136. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/core/secrets.py +0 -0
  137. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/__init__.py +0 -0
  138. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/arc_agi/__init__.py +0 -0
  139. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/checkerboard/__init__.py +0 -0
  140. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/checkerboard/generator.py +0 -0
  141. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/circles/__init__.py +0 -0
  142. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/circles/generator.py +0 -0
  143. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/csv_import/__init__.py +0 -0
  144. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/gaussian/__init__.py +0 -0
  145. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/gaussian/generator.py +0 -0
  146. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/mnist/__init__.py +0 -0
  147. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/mnist/generator.py +0 -0
  148. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/__init__.py +0 -0
  149. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/spiral/generator.py +0 -0
  150. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/xor/__init__.py +0 -0
  151. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/generators/xor/generator.py +0 -0
  152. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/__init__.py +0 -0
  153. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/storage/hf_store.py +0 -0
  154. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/__init__.py +0 -0
  155. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/api/__init__.py +0 -0
  156. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/__init__.py +0 -0
  157. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_e2e_workflow.py +0 -0
  158. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_lifecycle_api.py +0 -0
  159. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/integration/test_storage_workflow.py +0 -0
  160. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/performance/__init__.py +0 -0
  161. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/performance/test_generator_benchmarks.py +0 -0
  162. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/performance/test_storage_benchmarks.py +0 -0
  163. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/__init__.py +0 -0
  164. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_api_settings.py +0 -0
  165. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_artifacts.py +0 -0
  166. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_checkerboard_generator.py +0 -0
  167. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_circles_generator.py +0 -0
  168. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_csv_import_generator.py +0 -0
  169. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_dataset_versioning.py +0 -0
  170. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_gaussian_generator.py +0 -0
  171. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_hf_store.py +0 -0
  172. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_init.py +0 -0
  173. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_kaggle_store.py +0 -0
  174. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_lifecycle.py +0 -0
  175. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_mnist_generator.py +0 -0
  176. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_postgres_store.py +0 -0
  177. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_redis_store.py +0 -0
  178. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_secrets.py +0 -0
  179. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_spiral_generator.py +0 -0
  180. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_storage.py +0 -0
  181. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data/tests/unit/test_xor_generator.py +0 -0
  182. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/dependency_links.txt +0 -0
  183. {juniper_data-0.6.0 → juniper_data-0.7.0}/juniper_data.egg-info/top_level.txt +0 -0
  184. {juniper_data-0.6.0 → juniper_data-0.7.0}/setup.cfg +0 -0
@@ -0,0 +1,243 @@
1
+ Metadata-Version: 2.4
2
+ Name: juniper-data
3
+ Version: 0.7.0
4
+ Summary: Dataset generation and management service for the Juniper ecosystem
5
+ Author: Paul Calnon
6
+ License: MIT
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: cachetools>=5.3.0
11
+ Requires-Dist: numpy>=1.24.0
12
+ Requires-Dist: pydantic>=2.0.0
13
+ Requires-Dist: python-dotenv>=1.0.0
14
+ Provides-Extra: arc-agi
15
+ Requires-Dist: arc-agi>=0.9.0; extra == "arc-agi"
16
+ Provides-Extra: equities
17
+ Requires-Dist: yfinance>=0.2.40; extra == "equities"
18
+ Requires-Dist: pandas>=2.0.0; extra == "equities"
19
+ Provides-Extra: api
20
+ Requires-Dist: fastapi>=0.100.0; extra == "api"
21
+ Requires-Dist: starlette>=1.0.1; extra == "api"
22
+ Requires-Dist: uvicorn[standard]>=0.23.0; extra == "api"
23
+ Requires-Dist: pydantic-settings>=2.0.0; extra == "api"
24
+ Requires-Dist: juniper-observability>=0.4.0; extra == "api"
25
+ Provides-Extra: test
26
+ Requires-Dist: pytest>=7.0.0; extra == "test"
27
+ Requires-Dist: pytest-cov>=4.0.0; extra == "test"
28
+ Requires-Dist: pytest-timeout>=2.2.0; extra == "test"
29
+ Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
30
+ Requires-Dist: pytest-benchmark>=4.0.0; extra == "test"
31
+ Requires-Dist: hypothesis>=6.0.0; extra == "test"
32
+ Requires-Dist: httpx>=0.24.0; extra == "test"
33
+ Requires-Dist: coverage[toml]>=7.0.0; extra == "test"
34
+ Requires-Dist: juniper-data-client>=0.3.0; extra == "test"
35
+ Requires-Dist: PyYAML>=6.0; extra == "test"
36
+ Provides-Extra: observability
37
+ Requires-Dist: prometheus-client>=0.20.0; extra == "observability"
38
+ Requires-Dist: sentry-sdk[fastapi]>=2.0.0; extra == "observability"
39
+ Provides-Extra: dev
40
+ Requires-Dist: ruff>=0.9.0; extra == "dev"
41
+ Requires-Dist: mypy>=1.0.0; extra == "dev"
42
+ Requires-Dist: bandit[sarif]>=1.9.4; extra == "dev"
43
+ Requires-Dist: pip-audit>=2.7.0; extra == "dev"
44
+ Requires-Dist: pre-commit>=3.0.0; extra == "dev"
45
+ Provides-Extra: all
46
+ Requires-Dist: juniper-data[api,arc-agi,dev,equities,observability,test]; extra == "all"
47
+ Dynamic: license-file
48
+
49
+ <!-- markdownlint-disable MD013 MD033 MD041 -->
50
+ <!--
51
+ MD013 (line-length): README contains prose paragraphs that intentionally
52
+ exceed the 512-char ecosystem limit. Disabled file-wide
53
+ since wrapping mid-sentence harms PyPI rendering.
54
+ MD033 (no-inline-html): The right-aligned logo + spacing rely on HTML.
55
+ MD041 (first-line-heading): The HTML logo is the first line by design.
56
+ -->
57
+ <div align="right" width="150px" height="150px" align="right" valign="top"> <img src="images/Juniper_Logo_150px.png" alt="Juniper" align="right" valign="top" width="150px" /></div>
58
+ <br /> <br /> <br /> <br />
59
+
60
+ # Juniper: Dynamic Neural Network Research Platform
61
+
62
+ Juniper is an AI/ML research platform for investigating dynamic neural network architectures and novel learning paradigms. The project emphasizes ground-up implementations from primary literature, enabling a more transparent exploration of fundamental algorithms.
63
+
64
+ ## Juniper Data
65
+
66
+ `juniper-data` is the **dataset-generation service** of the Juniper platform. It is a FastAPI service that produces NPZ-formatted datasets from a catalogue of generators — including the classic two-spiral and concentric-circles problems, XOR and Gaussian mixtures, a CSV/JSON import path, MNIST/Fashion-MNIST, and the ARC-AGI visual-reasoning task families — and serves them through a REST surface that supports a named-version registry, batch creation and export, tag-based filtering, and per-dataset preview. `juniper-data` is the upstream of both `juniper-cascor` (training) and `juniper-canopy` (visualisation): the dataset identifiers it returns are the substrate on which the rest of the platform conducts comparative work.
67
+
68
+ ## Distribution
69
+
70
+ `juniper-data` is published on PyPI as **[`juniper-data`](https://pypi.org/project/juniper-data/)**.
71
+ The package is also surfaced through the platform meta-distribution
72
+ **[`juniper-ml`](https://pypi.org/project/juniper-ml/)**, which installs
73
+ the full client stack via `pip install juniper-ml[all]`.
74
+
75
+ ```bash
76
+ pip install juniper-data
77
+ ```
78
+
79
+ ## Ecosystem Compatibility
80
+
81
+ This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
82
+ Verified compatible versions:
83
+
84
+ | juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
85
+ |--------------|----------------|----------------|-------------|---------------|---------------|
86
+ | 0.6.x | 0.5.x | 0.5.x | >=0.4.1 | >=0.4.0 | >=0.4.0 |
87
+
88
+ For full-stack Docker deployment and integration tests, see [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy).
89
+
90
+ ## Architecture
91
+
92
+ `juniper-data` is the **foundational data layer** of the Juniper ecosystem. Both `juniper-cascor` and `juniper-canopy` call `juniper-data` to generate, version, and retrieve datasets.
93
+
94
+ ```text
95
+ ┌─────────────────────┐ REST+WS ┌──────────────────────┐
96
+ │ juniper-canopy │ ◄──────────────► │ juniper-cascor │
97
+ │ Dashboard │ │ Training Svc │
98
+ │ Port 8050 │ │ Port 8200 │
99
+ └──────────┬──────────┘ └──────────┬───────────┘
100
+ │ REST │ REST
101
+ ▼ ▼
102
+ ┌──────────────────────────────────────────────────────────────┐
103
+ │ juniper-data ◄── (this service) │
104
+ │ Dataset Service · Port 8100 │
105
+ └──────────────────────────────────────────────────────────────┘
106
+ ```
107
+
108
+ **Data contract**: datasets are served as NPZ archives with the keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full`, all of dtype `float32`.
109
+
110
+ ## Related Services
111
+
112
+ | Service | Relationship | Notes |
113
+ |---------|-------------|-------|
114
+ | [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes `juniper-data` for training datasets | Set `JUNIPER_DATA_URL` |
115
+ | [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes `juniper-data` for visualisation data | Set `JUNIPER_DATA_URL` |
116
+ | [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | Python HTTP client for this service | `pip install juniper-data-client` |
117
+
118
+ ## Service Configuration
119
+
120
+ Configuration is sourced from `juniper_data/api/settings.py` (Pydantic `BaseSettings`, `env_prefix="JUNIPER_DATA_"`). The complete env-var surface is listed below.
121
+
122
+ | Variable | Required | Default | Description |
123
+ |----------|----------|---------|-------------|
124
+ | `JUNIPER_DATA_HOST` | No | `127.0.0.1` | Bind address (override to `0.0.0.0` for Docker) |
125
+ | `JUNIPER_DATA_PORT` | No | `8100` | Service port |
126
+ | `JUNIPER_DATA_STORAGE_PATH` | No | `./data/datasets` | Filesystem path for persisted dataset artifacts |
127
+ | `JUNIPER_DATA_IMPORT_DIR` | No | `/data/imports` | Filesystem path for CSV/JSON imports |
128
+ | `JUNIPER_DATA_LOG_LEVEL` | No | `INFO` | Log verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
129
+ | `JUNIPER_DATA_LOG_FORMAT` | No | `text` | `text` or `json` (structured logging) |
130
+ | `JUNIPER_DATA_CORS_ORIGINS` | No | `[]` | Allowed CORS origins |
131
+ | `JUNIPER_DATA_API_KEYS` | No | `None` | Comma-separated or JSON-array API keys; authentication disabled when unset; Docker-secrets file path supported via the implicit `*_FILE` convention |
132
+ | `JUNIPER_DATA_RATE_LIMIT_ENABLED` | No | `true` | Enforce per-IP request rate limiting |
133
+ | `JUNIPER_DATA_RATE_LIMIT_REQUESTS_PER_MINUTE` | No | `60` | Per-IP rate limit |
134
+ | `JUNIPER_DATA_SENTRY_DSN` | No | `None` | Sentry DSN for error tracking |
135
+ | `JUNIPER_DATA_SENTRY_SEND_PII` | No | `false` | Whether Sentry should send personally identifiable information |
136
+ | `JUNIPER_DATA_SENTRY_TRACES_SAMPLE_RATE` | No | `0.1` | Sentry tracing sample rate |
137
+ | `JUNIPER_DATA_METRICS_ENABLED` | No | `false` | Expose `/metrics` for Prometheus scraping |
138
+ | `JUNIPER_DATA_METRICS_TRUSTED_IPS` | No | `["127.0.0.1", "::1"]` | IPs allowed to scrape `/metrics` |
139
+
140
+ ## Docker Deployment
141
+
142
+ ```bash
143
+ # Full stack (recommended) — see juniper-deploy:
144
+ git clone https://github.com/pcalnon/juniper-deploy.git # (private repository)
145
+ cd juniper-deploy && docker compose up --build
146
+
147
+ # Standalone:
148
+ docker build -t juniper-data:latest .
149
+ docker run --rm -p 8100:8100 -e JUNIPER_DATA_HOST=0.0.0.0 juniper-data:latest
150
+ ```
151
+
152
+ The Dockerfile is multi-stage (Python 3.14-slim builder + runtime). Container health is probed against `/v1/health/ready`.
153
+
154
+ ## Dependency Lockfile
155
+
156
+ The `requirements.lock` file pins exact dependency versions for reproducible Docker builds. The `pyproject.toml` retains flexible `>=` ranges for local development.
157
+
158
+ Regenerate after changing dependencies in `pyproject.toml`:
159
+
160
+ ```bash
161
+ uv pip compile pyproject.toml --extra api --extra observability -o requirements.lock
162
+ ```
163
+
164
+ The ecosystem-wide lockfile-freshness gate enforces regeneration on every PR that touches `pyproject.toml`; if regeneration triggers the self-pin trap of `uv pip compile -o requirements.lock` reading the existing file, compile to `/tmp/requirements.lock` and `mv` into place.
165
+
166
+ ## Active Research Components
167
+
168
+ `juniper-data` contributes three research components to the Juniper platform: the **ARC-AGI dataset families** (ARC-AGI-1 and ARC-AGI-2), loadable from the Hugging Face Hub or from local copies and exposed through the same NPZ-artifact contract as the simpler generators, which makes them directly usable as the substrate for comparative architecture-growth experiments; the **named-version dataset registry** (`POST /v1/datasets` with a `name` parameter auto-increments `meta.dataset_version`; `GET /v1/datasets/versions` and `/v1/datasets/latest` resolve the history), which gives experiments reproducible dataset references rather than opaque UUIDs; and the **dataset-API surface** itself — preview, filtering by tags, batch operations, and tag-based metadata queries — which together comprise the operational interface through which platform users compose and curate dataset corpora. The implementation of these surfaces is engineering rather than research; the **availability** of curated datasets and stable versioned references is itself the research artifact.
169
+
170
+ ## Quick Start Guide
171
+
172
+ ### Prerequisites
173
+
174
+ - Python ≥ 3.12 (Docker image uses 3.14)
175
+ - Conda environment `JuniperData`
176
+ - For ARC-AGI loading from the Hub: internet access at first load; subsequent loads are cached
177
+
178
+ ### Installation
179
+
180
+ ```bash
181
+ git clone https://github.com/pcalnon/juniper-data.git
182
+ cd juniper-data
183
+ conda activate JuniperData
184
+ pip install -e ".[all]"
185
+ ```
186
+
187
+ The PyPI release is installable via `pip install juniper-data`; the editable-clone form above is the standard for active development. The optional-dependency extras are `api`, `arc-agi`, `observability`, `test`, `dev`, and `all`.
188
+
189
+ ### Verification
190
+
191
+ Start the service:
192
+
193
+ ```bash
194
+ uvicorn --factory juniper_data.api.app:get_app --reload
195
+ ```
196
+
197
+ Confirm the service responds:
198
+
199
+ ```bash
200
+ curl http://localhost:8100/v1/health
201
+ curl http://localhost:8100/v1/health/ready
202
+ curl http://localhost:8100/v1/generators
203
+ ```
204
+
205
+ Generate a small dataset directly from Python:
206
+
207
+ ```python
208
+ from juniper_data.generators.spiral import SpiralGenerator
209
+
210
+ generator = SpiralGenerator()
211
+ dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
212
+ ```
213
+
214
+ ### Next Steps
215
+
216
+ - [`docs/QUICK_START.md`](docs/QUICK_START.md) — complete installation guide
217
+ - [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) — comprehensive usage guide
218
+ - [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) — full REST endpoint reference (filtering, batch operations, tagging, versioning)
219
+ - [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy) — Docker Compose orchestration for the full-stack platform
220
+ - [`juniper-ml`](https://pypi.org/project/juniper-ml/) — platform meta-package on PyPI
221
+
222
+ ## Research Philosophy
223
+
224
+ The Juniper platform exists to study learning algorithms whose network architecture is not fixed in advance. Its initial anchor is the Cascade-Correlation algorithm of Fahlman and Lebiere (1990), implemented from the primary literature without recourse to higher-level abstractions that elide the algorithm's operational detail. The organising commitment is that algorithm implementations remain inspectable at the level at which they were originally specified: candidate units, correlation objectives, weight-freezing semantics, and the structural events that grow the network are first-class artifacts of the codebase rather than internal details of a library wrapper. This permits comparative work — across algorithms, datasets, and hyperparameter regimes — to be conducted on a known and reproducible substrate.
225
+
226
+ The current platform comprises a Cascade-Correlation training service exposing a REST and WebSocket interface, a dataset-generation service with a named-version registry that includes the ARC-AGI families, a real-time monitoring dashboard for inspecting training dynamics as they occur, and a distributed worker that parallelises candidate-unit training across hosts. Near-term work extends the architectural-growth catalogue beyond Cascade-Correlation, introduces multi-network orchestration for comparative experiments at the level of network populations rather than individual runs, and tightens the dataset–training–monitoring loop into a reproducible research workbench. The longer-term direction is the systematic empirical study of constructive and architecture-growing learning algorithms, with first-class infrastructure for the ablation, comparison, and replication that such a study requires.
227
+
228
+ ## Documentation
229
+
230
+ | Document | Purpose |
231
+ |----------|---------|
232
+ | [`docs/DOCUMENTATION_OVERVIEW.md`](docs/DOCUMENTATION_OVERVIEW.md) | Navigation index for all `juniper-data` documentation |
233
+ | [`docs/QUICK_START.md`](docs/QUICK_START.md) | Get running in five minutes |
234
+ | [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) | Comprehensive usage guide |
235
+ | [`docs/REFERENCE.md`](docs/REFERENCE.md) | Configuration, environment variables, and operational reference |
236
+ | [`docs/ENVIRONMENT_SETUP.md`](docs/ENVIRONMENT_SETUP.md) | Conda environment and editable-install setup |
237
+ | [`docs/DEVELOPER_CHEATSHEET.md`](docs/DEVELOPER_CHEATSHEET.md) | Quick-reference card for development tasks |
238
+ | [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) | Complete REST endpoint reference |
239
+ | [`CHANGELOG.md`](CHANGELOG.md) | Version history |
240
+
241
+ ## License
242
+
243
+ MIT License — Copyright (c) 2024-2026 Paul Calnon
@@ -0,0 +1,195 @@
1
+ <!-- markdownlint-disable MD013 MD033 MD041 -->
2
+ <!--
3
+ MD013 (line-length): README contains prose paragraphs that intentionally
4
+ exceed the 512-char ecosystem limit. Disabled file-wide
5
+ since wrapping mid-sentence harms PyPI rendering.
6
+ MD033 (no-inline-html): The right-aligned logo + spacing rely on HTML.
7
+ MD041 (first-line-heading): The HTML logo is the first line by design.
8
+ -->
9
+ <div align="right" width="150px" height="150px" align="right" valign="top"> <img src="images/Juniper_Logo_150px.png" alt="Juniper" align="right" valign="top" width="150px" /></div>
10
+ <br /> <br /> <br /> <br />
11
+
12
+ # Juniper: Dynamic Neural Network Research Platform
13
+
14
+ Juniper is an AI/ML research platform for investigating dynamic neural network architectures and novel learning paradigms. The project emphasizes ground-up implementations from primary literature, enabling a more transparent exploration of fundamental algorithms.
15
+
16
+ ## Juniper Data
17
+
18
+ `juniper-data` is the **dataset-generation service** of the Juniper platform. It is a FastAPI service that produces NPZ-formatted datasets from a catalogue of generators — including the classic two-spiral and concentric-circles problems, XOR and Gaussian mixtures, a CSV/JSON import path, MNIST/Fashion-MNIST, and the ARC-AGI visual-reasoning task families — and serves them through a REST surface that supports a named-version registry, batch creation and export, tag-based filtering, and per-dataset preview. `juniper-data` is the upstream of both `juniper-cascor` (training) and `juniper-canopy` (visualisation): the dataset identifiers it returns are the substrate on which the rest of the platform conducts comparative work.
19
+
20
+ ## Distribution
21
+
22
+ `juniper-data` is published on PyPI as **[`juniper-data`](https://pypi.org/project/juniper-data/)**.
23
+ The package is also surfaced through the platform meta-distribution
24
+ **[`juniper-ml`](https://pypi.org/project/juniper-ml/)**, which installs
25
+ the full client stack via `pip install juniper-ml[all]`.
26
+
27
+ ```bash
28
+ pip install juniper-data
29
+ ```
30
+
31
+ ## Ecosystem Compatibility
32
+
33
+ This service is part of the [Juniper](https://github.com/pcalnon/juniper-ml) ecosystem.
34
+ Verified compatible versions:
35
+
36
+ | juniper-data | juniper-cascor | juniper-canopy | data-client | cascor-client | cascor-worker |
37
+ |--------------|----------------|----------------|-------------|---------------|---------------|
38
+ | 0.6.x | 0.5.x | 0.5.x | >=0.4.1 | >=0.4.0 | >=0.4.0 |
39
+
40
+ For full-stack Docker deployment and integration tests, see [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy).
41
+
42
+ ## Architecture
43
+
44
+ `juniper-data` is the **foundational data layer** of the Juniper ecosystem. Both `juniper-cascor` and `juniper-canopy` call `juniper-data` to generate, version, and retrieve datasets.
45
+
46
+ ```text
47
+ ┌─────────────────────┐ REST+WS ┌──────────────────────┐
48
+ │ juniper-canopy │ ◄──────────────► │ juniper-cascor │
49
+ │ Dashboard │ │ Training Svc │
50
+ │ Port 8050 │ │ Port 8200 │
51
+ └──────────┬──────────┘ └──────────┬───────────┘
52
+ │ REST │ REST
53
+ ▼ ▼
54
+ ┌──────────────────────────────────────────────────────────────┐
55
+ │ juniper-data ◄── (this service) │
56
+ │ Dataset Service · Port 8100 │
57
+ └──────────────────────────────────────────────────────────────┘
58
+ ```
59
+
60
+ **Data contract**: datasets are served as NPZ archives with the keys `X_train`, `y_train`, `X_test`, `y_test`, `X_full`, `y_full`, all of dtype `float32`.
61
+
62
+ ## Related Services
63
+
64
+ | Service | Relationship | Notes |
65
+ |---------|-------------|-------|
66
+ | [juniper-cascor](https://github.com/pcalnon/juniper-cascor) | Consumes `juniper-data` for training datasets | Set `JUNIPER_DATA_URL` |
67
+ | [juniper-canopy](https://github.com/pcalnon/juniper-canopy) | Consumes `juniper-data` for visualisation data | Set `JUNIPER_DATA_URL` |
68
+ | [juniper-data-client](https://github.com/pcalnon/juniper-data-client) | Python HTTP client for this service | `pip install juniper-data-client` |
69
+
70
+ ## Service Configuration
71
+
72
+ Configuration is sourced from `juniper_data/api/settings.py` (Pydantic `BaseSettings`, `env_prefix="JUNIPER_DATA_"`). The complete env-var surface is listed below.
73
+
74
+ | Variable | Required | Default | Description |
75
+ |----------|----------|---------|-------------|
76
+ | `JUNIPER_DATA_HOST` | No | `127.0.0.1` | Bind address (override to `0.0.0.0` for Docker) |
77
+ | `JUNIPER_DATA_PORT` | No | `8100` | Service port |
78
+ | `JUNIPER_DATA_STORAGE_PATH` | No | `./data/datasets` | Filesystem path for persisted dataset artifacts |
79
+ | `JUNIPER_DATA_IMPORT_DIR` | No | `/data/imports` | Filesystem path for CSV/JSON imports |
80
+ | `JUNIPER_DATA_LOG_LEVEL` | No | `INFO` | Log verbosity (`DEBUG`, `INFO`, `WARNING`, `ERROR`) |
81
+ | `JUNIPER_DATA_LOG_FORMAT` | No | `text` | `text` or `json` (structured logging) |
82
+ | `JUNIPER_DATA_CORS_ORIGINS` | No | `[]` | Allowed CORS origins |
83
+ | `JUNIPER_DATA_API_KEYS` | No | `None` | Comma-separated or JSON-array API keys; authentication disabled when unset; Docker-secrets file path supported via the implicit `*_FILE` convention |
84
+ | `JUNIPER_DATA_RATE_LIMIT_ENABLED` | No | `true` | Enforce per-IP request rate limiting |
85
+ | `JUNIPER_DATA_RATE_LIMIT_REQUESTS_PER_MINUTE` | No | `60` | Per-IP rate limit |
86
+ | `JUNIPER_DATA_SENTRY_DSN` | No | `None` | Sentry DSN for error tracking |
87
+ | `JUNIPER_DATA_SENTRY_SEND_PII` | No | `false` | Whether Sentry should send personally identifiable information |
88
+ | `JUNIPER_DATA_SENTRY_TRACES_SAMPLE_RATE` | No | `0.1` | Sentry tracing sample rate |
89
+ | `JUNIPER_DATA_METRICS_ENABLED` | No | `false` | Expose `/metrics` for Prometheus scraping |
90
+ | `JUNIPER_DATA_METRICS_TRUSTED_IPS` | No | `["127.0.0.1", "::1"]` | IPs allowed to scrape `/metrics` |
91
+
92
+ ## Docker Deployment
93
+
94
+ ```bash
95
+ # Full stack (recommended) — see juniper-deploy:
96
+ git clone https://github.com/pcalnon/juniper-deploy.git # (private repository)
97
+ cd juniper-deploy && docker compose up --build
98
+
99
+ # Standalone:
100
+ docker build -t juniper-data:latest .
101
+ docker run --rm -p 8100:8100 -e JUNIPER_DATA_HOST=0.0.0.0 juniper-data:latest
102
+ ```
103
+
104
+ The Dockerfile is multi-stage (Python 3.14-slim builder + runtime). Container health is probed against `/v1/health/ready`.
105
+
106
+ ## Dependency Lockfile
107
+
108
+ The `requirements.lock` file pins exact dependency versions for reproducible Docker builds. The `pyproject.toml` retains flexible `>=` ranges for local development.
109
+
110
+ Regenerate after changing dependencies in `pyproject.toml`:
111
+
112
+ ```bash
113
+ uv pip compile pyproject.toml --extra api --extra observability -o requirements.lock
114
+ ```
115
+
116
+ The ecosystem-wide lockfile-freshness gate enforces regeneration on every PR that touches `pyproject.toml`; if regeneration triggers the self-pin trap of `uv pip compile -o requirements.lock` reading the existing file, compile to `/tmp/requirements.lock` and `mv` into place.
117
+
118
+ ## Active Research Components
119
+
120
+ `juniper-data` contributes three research components to the Juniper platform: the **ARC-AGI dataset families** (ARC-AGI-1 and ARC-AGI-2), loadable from the Hugging Face Hub or from local copies and exposed through the same NPZ-artifact contract as the simpler generators, which makes them directly usable as the substrate for comparative architecture-growth experiments; the **named-version dataset registry** (`POST /v1/datasets` with a `name` parameter auto-increments `meta.dataset_version`; `GET /v1/datasets/versions` and `/v1/datasets/latest` resolve the history), which gives experiments reproducible dataset references rather than opaque UUIDs; and the **dataset-API surface** itself — preview, filtering by tags, batch operations, and tag-based metadata queries — which together comprise the operational interface through which platform users compose and curate dataset corpora. The implementation of these surfaces is engineering rather than research; the **availability** of curated datasets and stable versioned references is itself the research artifact.
121
+
122
+ ## Quick Start Guide
123
+
124
+ ### Prerequisites
125
+
126
+ - Python ≥ 3.12 (Docker image uses 3.14)
127
+ - Conda environment `JuniperData`
128
+ - For ARC-AGI loading from the Hub: internet access at first load; subsequent loads are cached
129
+
130
+ ### Installation
131
+
132
+ ```bash
133
+ git clone https://github.com/pcalnon/juniper-data.git
134
+ cd juniper-data
135
+ conda activate JuniperData
136
+ pip install -e ".[all]"
137
+ ```
138
+
139
+ The PyPI release is installable via `pip install juniper-data`; the editable-clone form above is the standard for active development. The optional-dependency extras are `api`, `arc-agi`, `observability`, `test`, `dev`, and `all`.
140
+
141
+ ### Verification
142
+
143
+ Start the service:
144
+
145
+ ```bash
146
+ uvicorn --factory juniper_data.api.app:get_app --reload
147
+ ```
148
+
149
+ Confirm the service responds:
150
+
151
+ ```bash
152
+ curl http://localhost:8100/v1/health
153
+ curl http://localhost:8100/v1/health/ready
154
+ curl http://localhost:8100/v1/generators
155
+ ```
156
+
157
+ Generate a small dataset directly from Python:
158
+
159
+ ```python
160
+ from juniper_data.generators.spiral import SpiralGenerator
161
+
162
+ generator = SpiralGenerator()
163
+ dataset = generator.generate(n_points=100, n_spirals=2, noise=0.1)
164
+ ```
165
+
166
+ ### Next Steps
167
+
168
+ - [`docs/QUICK_START.md`](docs/QUICK_START.md) — complete installation guide
169
+ - [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) — comprehensive usage guide
170
+ - [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) — full REST endpoint reference (filtering, batch operations, tagging, versioning)
171
+ - [`juniper-deploy`](https://github.com/pcalnon/juniper-deploy) — Docker Compose orchestration for the full-stack platform
172
+ - [`juniper-ml`](https://pypi.org/project/juniper-ml/) — platform meta-package on PyPI
173
+
174
+ ## Research Philosophy
175
+
176
+ The Juniper platform exists to study learning algorithms whose network architecture is not fixed in advance. Its initial anchor is the Cascade-Correlation algorithm of Fahlman and Lebiere (1990), implemented from the primary literature without recourse to higher-level abstractions that elide the algorithm's operational detail. The organising commitment is that algorithm implementations remain inspectable at the level at which they were originally specified: candidate units, correlation objectives, weight-freezing semantics, and the structural events that grow the network are first-class artifacts of the codebase rather than internal details of a library wrapper. This permits comparative work — across algorithms, datasets, and hyperparameter regimes — to be conducted on a known and reproducible substrate.
177
+
178
+ The current platform comprises a Cascade-Correlation training service exposing a REST and WebSocket interface, a dataset-generation service with a named-version registry that includes the ARC-AGI families, a real-time monitoring dashboard for inspecting training dynamics as they occur, and a distributed worker that parallelises candidate-unit training across hosts. Near-term work extends the architectural-growth catalogue beyond Cascade-Correlation, introduces multi-network orchestration for comparative experiments at the level of network populations rather than individual runs, and tightens the dataset–training–monitoring loop into a reproducible research workbench. The longer-term direction is the systematic empirical study of constructive and architecture-growing learning algorithms, with first-class infrastructure for the ablation, comparison, and replication that such a study requires.
179
+
180
+ ## Documentation
181
+
182
+ | Document | Purpose |
183
+ |----------|---------|
184
+ | [`docs/DOCUMENTATION_OVERVIEW.md`](docs/DOCUMENTATION_OVERVIEW.md) | Navigation index for all `juniper-data` documentation |
185
+ | [`docs/QUICK_START.md`](docs/QUICK_START.md) | Get running in five minutes |
186
+ | [`docs/USER_MANUAL.md`](docs/USER_MANUAL.md) | Comprehensive usage guide |
187
+ | [`docs/REFERENCE.md`](docs/REFERENCE.md) | Configuration, environment variables, and operational reference |
188
+ | [`docs/ENVIRONMENT_SETUP.md`](docs/ENVIRONMENT_SETUP.md) | Conda environment and editable-install setup |
189
+ | [`docs/DEVELOPER_CHEATSHEET.md`](docs/DEVELOPER_CHEATSHEET.md) | Quick-reference card for development tasks |
190
+ | [`docs/api/JUNIPER_DATA_API.md`](docs/api/JUNIPER_DATA_API.md) | Complete REST endpoint reference |
191
+ | [`CHANGELOG.md`](CHANGELOG.md) | Version history |
192
+
193
+ ## License
194
+
195
+ MIT License — Copyright (c) 2024-2026 Paul Calnon
@@ -2,6 +2,7 @@
2
2
  Juniper Data - Dataset generation and management service for the Juniper ecosystem.
3
3
  """
4
4
 
5
+ import importlib.metadata
5
6
  import os
6
7
 
7
8
  from dotenv import load_dotenv
@@ -14,7 +15,14 @@ except ImportError:
14
15
  ARC_AGI_AVAILABLE = False
15
16
  arc_agi = None # type: ignore[assignment]
16
17
 
17
- __version__ = "0.6.0"
18
+ # Single source of truth: the installed distribution's metadata (OQ-1 of the
19
+ # build-provenance effort — juniper-ml notes/BUILD_PROVENANCE_DESIGN_2026-06-14.md).
20
+ # Falls back to the literal only in a bare source checkout where the package is
21
+ # not installed, so this constant can no longer drift from pyproject's version.
22
+ try:
23
+ __version__ = importlib.metadata.version("juniper-data")
24
+ except importlib.metadata.PackageNotFoundError: # pragma: no cover - source checkout
25
+ __version__ = "0.7.0"
18
26
  __author__ = "Paul Calnon"
19
27
 
20
28
 
@@ -64,7 +64,8 @@ def main() -> int:
64
64
  os.environ["JUNIPER_DATA_STORAGE_PATH"] = args.storage_path
65
65
 
66
66
  uvicorn.run(
67
- "juniper_data.api.app:app",
67
+ "juniper_data.api.app:get_app",
68
+ factory=True,
68
69
  host=host,
69
70
  port=port,
70
71
  log_level=log_level,
@@ -1,5 +1,6 @@
1
1
  """FastAPI application factory and configuration."""
2
2
 
3
+ import functools
3
4
  import logging
4
5
  from collections.abc import AsyncGenerator
5
6
  from contextlib import asynccontextmanager
@@ -8,12 +9,14 @@ from pathlib import Path
8
9
  from fastapi import FastAPI, Request
9
10
  from fastapi.middleware.cors import CORSMiddleware
10
11
  from fastapi.responses import JSONResponse
12
+ from starlette import status
11
13
 
12
- from juniper_data import __version__
14
+ from juniper_data import __version__, provenance
13
15
  from juniper_data.storage import LocalFSDatasetStore
14
16
 
15
17
  from .middleware import RequestBodyLimitMiddleware, SecurityHeadersMiddleware, SecurityMiddleware
16
18
  from .observability import (
19
+ MetricsAuthMiddleware,
17
20
  PrometheusMiddleware,
18
21
  RequestIdMiddleware,
19
22
  configure_logging,
@@ -37,11 +40,17 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
37
40
  configure_logging(settings.log_level, settings.log_format, "juniper-data")
38
41
  configure_sentry(settings.sentry_dsn, "juniper-data", __version__, send_pii=settings.sentry_send_pii, traces_sample_rate=settings.sentry_traces_sample_rate)
39
42
  if settings.metrics_enabled:
40
- set_build_info("juniper_data", __version__)
43
+ set_build_info("juniper_data", __version__, git_sha=provenance.git_sha(), build_date=provenance.build_date())
41
44
 
42
45
  logger = logging.getLogger("juniper_data")
43
46
  logger.info(f"JuniperData API v{__version__} starting")
44
- logger.info(f"Storage path: {storage_path.absolute()}")
47
+ # ``Path.absolute()`` is pure path manipulation (no I/O); the
48
+ # ASYNC240 rule is over-conservative here and flags every
49
+ # ``pathlib.Path`` method without distinguishing stat-bound ones
50
+ # from text-only ones. Lifespan startup is also a one-shot
51
+ # event, not a request handler — even if there were I/O it
52
+ # wouldn't block per-request latency.
53
+ logger.info(f"Storage path: {storage_path.absolute()}") # noqa: ASYNC240
45
54
 
46
55
  yield
47
56
 
@@ -115,15 +124,19 @@ def create_app(settings: Settings | None = None) -> FastAPI:
115
124
  app.include_router(generators.router, prefix="/v1")
116
125
  app.include_router(datasets.router, prefix="/v1")
117
126
 
118
- # Mount Prometheus metrics endpoint
127
+ # Mount Prometheus metrics endpoint (SEC-16: wrap with trusted-IP
128
+ # auth because ASGI sub-app mounts bypass SecurityMiddleware).
119
129
  if settings.metrics_enabled:
120
- app.mount("/metrics", get_prometheus_app())
130
+ app.mount(
131
+ "/metrics",
132
+ MetricsAuthMiddleware(get_prometheus_app(), settings.metrics_trusted_ips),
133
+ )
121
134
 
122
135
  @app.exception_handler(ValueError)
123
136
  async def value_error_handler(request: Request, exc: ValueError) -> JSONResponse:
124
137
  logging.getLogger("juniper_data").debug("Validation error: %s", exc)
125
138
  return JSONResponse(
126
- status_code=400,
139
+ status_code=status.HTTP_400_BAD_REQUEST,
127
140
  content={"detail": "Invalid request parameters"},
128
141
  )
129
142
 
@@ -131,11 +144,31 @@ def create_app(settings: Settings | None = None) -> FastAPI:
131
144
  async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
132
145
  logging.getLogger("juniper_data").exception("Unhandled exception")
133
146
  return JSONResponse(
134
- status_code=500,
147
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
135
148
  content={"detail": "Internal server error"},
136
149
  )
137
150
 
138
151
  return app
139
152
 
140
153
 
141
- app = create_app()
154
+ @functools.lru_cache(maxsize=1)
155
+ def get_app() -> FastAPI:
156
+ """Return the singleton FastAPI app instance (lazy factory).
157
+
158
+ Use with uvicorn's factory mode::
159
+
160
+ uvicorn --factory juniper_data.api.app:get_app
161
+
162
+ or programmatically::
163
+
164
+ uvicorn.run("juniper_data.api.app:get_app", factory=True)
165
+
166
+ The first call builds the app via :func:`create_app` with default
167
+ settings; subsequent calls return the same instance from
168
+ ``functools.lru_cache``. Replaces the previous module-level
169
+ ``app = create_app()`` (CLN-JD-03), which read environment variables
170
+ and registered middleware at import time. Tests that need a fresh
171
+ instance with overridden settings should continue to call
172
+ :func:`create_app` directly with explicit ``Settings``.
173
+ """
174
+ return create_app()