juniper-data 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. juniper_data/__init__.py +88 -0
  2. juniper_data/__main__.py +78 -0
  3. juniper_data/api/__init__.py +10 -0
  4. juniper_data/api/app.py +111 -0
  5. juniper_data/api/middleware.py +95 -0
  6. juniper_data/api/routes/__init__.py +9 -0
  7. juniper_data/api/routes/datasets.py +414 -0
  8. juniper_data/api/routes/generators.py +125 -0
  9. juniper_data/api/routes/health.py +49 -0
  10. juniper_data/api/security.py +238 -0
  11. juniper_data/api/settings.py +109 -0
  12. juniper_data/core/__init__.py +32 -0
  13. juniper_data/core/artifacts.py +63 -0
  14. juniper_data/core/dataset_id.py +38 -0
  15. juniper_data/core/models.py +135 -0
  16. juniper_data/core/split.py +120 -0
  17. juniper_data/generators/__init__.py +15 -0
  18. juniper_data/generators/arc_agi/__init__.py +11 -0
  19. juniper_data/generators/arc_agi/generator.py +229 -0
  20. juniper_data/generators/arc_agi/params.py +56 -0
  21. juniper_data/generators/checkerboard/__init__.py +15 -0
  22. juniper_data/generators/checkerboard/generator.py +114 -0
  23. juniper_data/generators/checkerboard/params.py +32 -0
  24. juniper_data/generators/circles/__init__.py +11 -0
  25. juniper_data/generators/circles/generator.py +112 -0
  26. juniper_data/generators/circles/params.py +31 -0
  27. juniper_data/generators/csv_import/__init__.py +15 -0
  28. juniper_data/generators/csv_import/generator.py +198 -0
  29. juniper_data/generators/csv_import/params.py +48 -0
  30. juniper_data/generators/gaussian/__init__.py +11 -0
  31. juniper_data/generators/gaussian/generator.py +149 -0
  32. juniper_data/generators/gaussian/params.py +53 -0
  33. juniper_data/generators/mnist/__init__.py +11 -0
  34. juniper_data/generators/mnist/generator.py +124 -0
  35. juniper_data/generators/mnist/params.py +39 -0
  36. juniper_data/generators/spiral/__init__.py +57 -0
  37. juniper_data/generators/spiral/defaults.py +39 -0
  38. juniper_data/generators/spiral/generator.py +206 -0
  39. juniper_data/generators/spiral/params.py +148 -0
  40. juniper_data/generators/xor/__init__.py +11 -0
  41. juniper_data/generators/xor/generator.py +162 -0
  42. juniper_data/generators/xor/params.py +30 -0
  43. juniper_data/storage/__init__.py +120 -0
  44. juniper_data/storage/base.py +279 -0
  45. juniper_data/storage/cached.py +211 -0
  46. juniper_data/storage/hf_store.py +257 -0
  47. juniper_data/storage/kaggle_store.py +333 -0
  48. juniper_data/storage/local_fs.py +232 -0
  49. juniper_data/storage/memory.py +136 -0
  50. juniper_data/storage/postgres_store.py +373 -0
  51. juniper_data/storage/redis_store.py +264 -0
  52. juniper_data/tests/__init__.py +1 -0
  53. juniper_data/tests/conftest.py +68 -0
  54. juniper_data/tests/fixtures/generate_golden_datasets.py +199 -0
  55. juniper_data/tests/integration/__init__.py +1 -0
  56. juniper_data/tests/integration/test_api.py +283 -0
  57. juniper_data/tests/integration/test_e2e_workflow.py +378 -0
  58. juniper_data/tests/integration/test_lifecycle_api.py +304 -0
  59. juniper_data/tests/integration/test_security_integration.py +189 -0
  60. juniper_data/tests/integration/test_storage_workflow.py +259 -0
  61. juniper_data/tests/performance/__init__.py +1 -0
  62. juniper_data/tests/performance/test_generator_benchmarks.py +178 -0
  63. juniper_data/tests/performance/test_storage_benchmarks.py +257 -0
  64. juniper_data/tests/unit/__init__.py +1 -0
  65. juniper_data/tests/unit/test_api_app.py +206 -0
  66. juniper_data/tests/unit/test_api_routes.py +407 -0
  67. juniper_data/tests/unit/test_api_settings.py +100 -0
  68. juniper_data/tests/unit/test_arc_agi_generator.py +525 -0
  69. juniper_data/tests/unit/test_artifacts.py +145 -0
  70. juniper_data/tests/unit/test_cached_store.py +423 -0
  71. juniper_data/tests/unit/test_checkerboard_generator.py +232 -0
  72. juniper_data/tests/unit/test_circles_generator.py +256 -0
  73. juniper_data/tests/unit/test_csv_import_generator.py +345 -0
  74. juniper_data/tests/unit/test_dataset_id.py +181 -0
  75. juniper_data/tests/unit/test_gaussian_generator.py +333 -0
  76. juniper_data/tests/unit/test_hf_store.py +416 -0
  77. juniper_data/tests/unit/test_init.py +93 -0
  78. juniper_data/tests/unit/test_kaggle_store.py +469 -0
  79. juniper_data/tests/unit/test_lifecycle.py +394 -0
  80. juniper_data/tests/unit/test_main.py +127 -0
  81. juniper_data/tests/unit/test_middleware.py +79 -0
  82. juniper_data/tests/unit/test_mnist_generator.py +370 -0
  83. juniper_data/tests/unit/test_postgres_store.py +490 -0
  84. juniper_data/tests/unit/test_redis_store.py +500 -0
  85. juniper_data/tests/unit/test_security.py +281 -0
  86. juniper_data/tests/unit/test_security_boundaries.py +517 -0
  87. juniper_data/tests/unit/test_spiral_generator.py +566 -0
  88. juniper_data/tests/unit/test_split.py +245 -0
  89. juniper_data/tests/unit/test_storage.py +767 -0
  90. juniper_data/tests/unit/test_xor_generator.py +223 -0
  91. juniper_data-0.4.2.dist-info/METADATA +216 -0
  92. juniper_data-0.4.2.dist-info/RECORD +95 -0
  93. juniper_data-0.4.2.dist-info/WHEEL +5 -0
  94. juniper_data-0.4.2.dist-info/licenses/LICENSE +9 -0
  95. juniper_data-0.4.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,88 @@
1
+ """
2
+ Juniper Data - Dataset generation and management service for the Juniper ecosystem.
3
+ """
4
+
5
+ import os
6
+
7
+ from dotenv import load_dotenv
8
+
9
+ try:
10
+ import arc_agi
11
+
12
+ ARC_AGI_AVAILABLE = True
13
+ except ImportError:
14
+ ARC_AGI_AVAILABLE = False
15
+ arc_agi = None # type: ignore[assignment]
16
+
17
+ __version__ = "0.4.2"
18
+ __author__ = "Paul Calnon"
19
+
20
+
21
+ def get_arc_agi_env() -> bool:
22
+ """
23
+ Ensure ARC_AGI_ENV is available by loading environment variables if needed.
24
+
25
+ This function attempts to load environment variables from a `.env` file and then
26
+ returns whether the `ARC_AGI_ENV` environment variable is set.
27
+
28
+ Returns:
29
+ bool: True if ARC_AGI_ENV is set after loading, otherwise False.
30
+ """
31
+ # Attempt to load variables from a .env file, but base the result solely on
32
+ # whether ARC_AGI_ENV is present afterwards to provide consistent semantics.
33
+ load_dotenv()
34
+ return bool(os.getenv("ARC_AGI_ENV"))
35
+
36
+
37
+ def reload_arc_agi_env() -> bool:
38
+ """
39
+ Reloads all of the Environment Variables from local OS env whether already loaded or not.
40
+
41
+ Returns:
42
+ bool: True if environment variables were loaded from a .env file, False otherwise.
43
+ """
44
+ return bool(load_dotenv())
45
+
46
+
47
+ def get_arc_api_key() -> str | None:
48
+ """
49
+ Return the current value of the ARC_API_KEY environment variable as a string.
50
+ """
51
+ return os.getenv("ARC_API_KEY") or None
52
+
53
+
54
+ def get_arc_agi_api_url() -> str | None:
55
+ """
56
+ Return the current value of the ARC_AGI_API as a URL/endpoint string.
57
+
58
+ Reading the environment at call time avoids import-time side effects
59
+ and makes it easier to adjust configuration in tests.
60
+ """
61
+ return os.getenv("ARC_AGI_API") or None
62
+
63
+
64
+ def get_arc_agi_arcade() -> "arc_agi.Arcade | None":
65
+ """
66
+ Create and return an :class:`arc_agi.Arcade` instance configured from environment variables.
67
+
68
+ The API key is read from the environment via :func:`get_arc_api_key`, avoiding import-time
69
+ side effects and making it easier to adjust configuration in tests.
70
+
71
+ Raises:
72
+ ImportError: If the ``arc-agi`` package is not installed.
73
+ """
74
+ if not ARC_AGI_AVAILABLE:
75
+ raise ImportError("arc-agi package not installed. Install with: pip install 'juniper-data[arc-agi]'")
76
+ # Automatically uses ARC_API_KEY from environment: arc = arc_agi.Arcade(), Or pass the API key explicitly
77
+ return arc_agi.Arcade(arc_api_key=get_arc_api_key()) or None
78
+
79
+
80
+ # Deprecated
81
+ def get_arc_agi_api() -> str | None:
82
+ """
83
+ Deprecated alias for :func:`get_arc_agi_api_url`.
84
+
85
+ This function returns the same value as :func:`get_arc_agi_api_url` and will be
86
+ removed in a future release. Use :func:`get_arc_agi_api_url` instead.
87
+ """
88
+ return get_arc_agi_api_url()
@@ -0,0 +1,78 @@
1
+ """Entry point for running the JuniperData API with uvicorn."""
2
+
3
+ import argparse
4
+ import sys
5
+
6
+
7
+ def main() -> int:
8
+ """Run the JuniperData API server."""
9
+ try:
10
+ import uvicorn
11
+ except ImportError:
12
+ print("Error: uvicorn not installed. Install with: pip install 'juniper-data[api]'")
13
+ return 1
14
+
15
+ from juniper_data.api.settings import Settings
16
+
17
+ parser = argparse.ArgumentParser(
18
+ description="Run the JuniperData API server",
19
+ prog="python -m juniper_data",
20
+ )
21
+ parser.add_argument(
22
+ "--host",
23
+ type=str,
24
+ default=None,
25
+ help="Host to bind to (default: from settings, which default to 127.0.0.1)",
26
+ )
27
+ parser.add_argument(
28
+ "--port",
29
+ type=int,
30
+ default=None,
31
+ help="Port to bind to (default: from settings or 8100)",
32
+ )
33
+ parser.add_argument(
34
+ "--storage-path",
35
+ type=str,
36
+ default=None,
37
+ help="Path to dataset storage directory",
38
+ )
39
+ parser.add_argument(
40
+ "--log-level",
41
+ type=str,
42
+ choices=["TRACE", "VERBOSE", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL", "FATAL"],
43
+ default=None,
44
+ help="Logging level",
45
+ )
46
+ parser.add_argument(
47
+ "--reload",
48
+ action="store_true",
49
+ help="Enable auto-reload for development",
50
+ )
51
+
52
+ args = parser.parse_args()
53
+
54
+ settings = Settings()
55
+
56
+ host = args.host if args.host is not None else settings.host
57
+ port = args.port if args.port is not None else settings.port
58
+ log_level_source = args.log_level if args.log_level is not None else settings.log_level
59
+ log_level = log_level_source.lower()
60
+
61
+ if args.storage_path is not None:
62
+ import os
63
+
64
+ os.environ["JUNIPER_DATA_STORAGE_PATH"] = args.storage_path
65
+
66
+ uvicorn.run(
67
+ "juniper_data.api.app:app",
68
+ host=host,
69
+ port=port,
70
+ log_level=log_level,
71
+ reload=args.reload,
72
+ )
73
+
74
+ return 0
75
+
76
+
77
+ if __name__ == "__main__":
78
+ sys.exit(main())
@@ -0,0 +1,10 @@
1
+ """API module for Juniper Data service."""
2
+
3
+ from .app import create_app
4
+ from .settings import Settings, get_settings
5
+
6
+ __all__ = [
7
+ "create_app",
8
+ "Settings",
9
+ "get_settings",
10
+ ]
@@ -0,0 +1,111 @@
1
+ """FastAPI application factory and configuration."""
2
+
3
+ import logging
4
+ from collections.abc import AsyncGenerator
5
+ from contextlib import asynccontextmanager
6
+ from pathlib import Path
7
+
8
+ from fastapi import FastAPI, Request
9
+ from fastapi.middleware.cors import CORSMiddleware
10
+ from fastapi.responses import JSONResponse
11
+
12
+ from juniper_data import __version__
13
+ from juniper_data.storage import LocalFSDatasetStore
14
+
15
+ from .middleware import SecurityMiddleware
16
+ from .routes import datasets, generators, health
17
+ from .security import APIKeyAuth, RateLimiter
18
+ from .settings import Settings, get_settings
19
+
20
+
21
+ @asynccontextmanager
22
+ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
23
+ """Application lifespan handler for startup/shutdown."""
24
+ settings: Settings = app.state.settings
25
+ storage_path = Path(settings.storage_path)
26
+ store = LocalFSDatasetStore(storage_path)
27
+ datasets.set_store(store)
28
+
29
+ logging.basicConfig(
30
+ level=getattr(logging, settings.log_level.upper(), logging.INFO),
31
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
32
+ )
33
+ logger = logging.getLogger("juniper_data")
34
+ logger.info(f"JuniperData API v{__version__} starting")
35
+ logger.info(f"Storage path: {storage_path.absolute()}")
36
+
37
+ yield
38
+
39
+ logger.info("JuniperData API shutting down")
40
+
41
+
42
+ def create_app(settings: Settings | None = None) -> FastAPI:
43
+ """Create and configure the FastAPI application.
44
+
45
+ Args:
46
+ settings: Optional settings override. If not provided,
47
+ settings are loaded from environment variables.
48
+
49
+ Returns:
50
+ Configured FastAPI application instance.
51
+ """
52
+ if settings is None:
53
+ settings = get_settings()
54
+
55
+ app = FastAPI(
56
+ title="Juniper Data API",
57
+ description="Dataset generation and management service for the Juniper ecosystem",
58
+ version=__version__,
59
+ lifespan=lifespan,
60
+ )
61
+
62
+ app.state.settings = settings
63
+
64
+ # Only allow credentialed CORS requests when origins are explicitly specified.
65
+ # Browsers do not permit Access-Control-Allow-Credentials: true with a wildcard
66
+ # origin (Access-Control-Allow-Origin: "*"), so the default ["*"] intentionally
67
+ # disables credentials unless concrete origins are configured.
68
+ allow_credentials = bool(settings.cors_origins) and "*" not in settings.cors_origins
69
+
70
+ app.add_middleware(
71
+ CORSMiddleware,
72
+ allow_origins=settings.cors_origins,
73
+ allow_credentials=allow_credentials,
74
+ allow_methods=["*"],
75
+ allow_headers=["*"],
76
+ )
77
+
78
+ api_key_auth = APIKeyAuth(settings.api_keys)
79
+ rate_limiter = RateLimiter(
80
+ requests_per_minute=settings.rate_limit_requests_per_minute,
81
+ enabled=settings.rate_limit_enabled,
82
+ )
83
+ app.add_middleware(
84
+ SecurityMiddleware,
85
+ api_key_auth=api_key_auth,
86
+ rate_limiter=rate_limiter,
87
+ )
88
+
89
+ app.include_router(health.router, prefix="/v1")
90
+ app.include_router(generators.router, prefix="/v1")
91
+ app.include_router(datasets.router, prefix="/v1")
92
+
93
+ @app.exception_handler(ValueError)
94
+ async def value_error_handler(request: Request, exc: ValueError) -> JSONResponse:
95
+ return JSONResponse(
96
+ status_code=400,
97
+ content={"detail": str(exc)},
98
+ )
99
+
100
+ @app.exception_handler(Exception)
101
+ async def general_exception_handler(request: Request, exc: Exception) -> JSONResponse:
102
+ logging.getLogger("juniper_data").exception("Unhandled exception")
103
+ return JSONResponse(
104
+ status_code=500,
105
+ content={"detail": "Internal server error"},
106
+ )
107
+
108
+ return app
109
+
110
+
111
+ app = create_app()
@@ -0,0 +1,95 @@
1
+ """FastAPI middleware for security and request processing."""
2
+
3
+ from fastapi import HTTPException, Request, Response
4
+ from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
5
+ from starlette.responses import JSONResponse
6
+ from starlette.types import ASGIApp
7
+
8
+ from .security import APIKeyAuth, RateLimiter
9
+
10
+ EXEMPT_PATHS = {
11
+ "/v1/health",
12
+ "/v1/health/live",
13
+ "/v1/health/ready",
14
+ "/docs",
15
+ "/openapi.json",
16
+ "/redoc",
17
+ }
18
+
19
+
20
+ class SecurityMiddleware(BaseHTTPMiddleware):
21
+ """Middleware for API key authentication and rate limiting.
22
+
23
+ Applies authentication and rate limiting to all requests except
24
+ explicitly exempt paths (health checks, docs).
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ app: ASGIApp,
30
+ api_key_auth: APIKeyAuth,
31
+ rate_limiter: RateLimiter,
32
+ ) -> None:
33
+ """Initialize the security middleware.
34
+
35
+ Args:
36
+ app: The ASGI application.
37
+ api_key_auth: API key authentication handler.
38
+ rate_limiter: Rate limiter instance.
39
+ """
40
+ super().__init__(app)
41
+ self._api_key_auth = api_key_auth
42
+ self._rate_limiter = rate_limiter
43
+
44
+ async def dispatch(
45
+ self,
46
+ request: Request,
47
+ call_next: RequestResponseEndpoint,
48
+ ) -> Response:
49
+ """Process the request through security checks.
50
+
51
+ Args:
52
+ request: The incoming request.
53
+ call_next: The next middleware/handler in the chain.
54
+
55
+ Returns:
56
+ The response from the application.
57
+ """
58
+ path = request.url.path
59
+
60
+ if self._is_exempt(path):
61
+ return await call_next(request)
62
+
63
+ api_key = None
64
+ try:
65
+ if self._api_key_auth.enabled:
66
+ api_key = await self._api_key_auth(request)
67
+
68
+ if self._rate_limiter.enabled:
69
+ await self._rate_limiter(request, api_key)
70
+ except HTTPException as exc:
71
+ return JSONResponse(
72
+ status_code=exc.status_code,
73
+ content={"detail": exc.detail},
74
+ headers=exc.headers,
75
+ )
76
+
77
+ response = await call_next(request)
78
+
79
+ if self._rate_limiter.enabled and hasattr(request.state, "rate_limit_remaining"):
80
+ response.headers["X-RateLimit-Limit"] = str(self._rate_limiter.limit)
81
+ response.headers["X-RateLimit-Remaining"] = str(request.state.rate_limit_remaining)
82
+ response.headers["X-RateLimit-Reset"] = str(request.state.rate_limit_reset)
83
+
84
+ return response
85
+
86
+ def _is_exempt(self, path: str) -> bool:
87
+ """Check if a path is exempt from security checks.
88
+
89
+ Args:
90
+ path: The request path.
91
+
92
+ Returns:
93
+ True if the path is exempt, False otherwise.
94
+ """
95
+ return path in EXEMPT_PATHS
@@ -0,0 +1,9 @@
1
+ """API routes module."""
2
+
3
+ from . import datasets, generators, health
4
+
5
+ __all__ = [
6
+ "datasets",
7
+ "generators",
8
+ "health",
9
+ ]