pulse-engine 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulse_engine/__init__.py +0 -0
- pulse_engine/adapters/__init__.py +58 -0
- pulse_engine/adapters/audio_transcription.py +167 -0
- pulse_engine/adapters/batcher.py +36 -0
- pulse_engine/adapters/digital_news.py +128 -0
- pulse_engine/adapters/digital_news_metadata.py +536 -0
- pulse_engine/adapters/exceptions.py +10 -0
- pulse_engine/adapters/models.py +134 -0
- pulse_engine/adapters/opensearch_storage.py +160 -0
- pulse_engine/adapters/speech_content.py +130 -0
- pulse_engine/adapters/speech_metadata.py +374 -0
- pulse_engine/adapters/twitter.py +423 -0
- pulse_engine/adapters/youtube_downloader.py +186 -0
- pulse_engine/adapters/youtube_metadata.py +261 -0
- pulse_engine/api/__init__.py +0 -0
- pulse_engine/api/v1/__init__.py +0 -0
- pulse_engine/api/v1/auth.py +91 -0
- pulse_engine/api/v1/health.py +62 -0
- pulse_engine/api/v1/router.py +16 -0
- pulse_engine/chain_recovery.py +131 -0
- pulse_engine/cli/__init__.py +0 -0
- pulse_engine/cli/main.py +169 -0
- pulse_engine/cli/templates/cookiecutter.json +4 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
- pulse_engine/client.py +95 -0
- pulse_engine/config.py +157 -0
- pulse_engine/core/__init__.py +0 -0
- pulse_engine/core/error_handlers.py +64 -0
- pulse_engine/core/exceptions.py +67 -0
- pulse_engine/core/job_token.py +109 -0
- pulse_engine/core/logging.py +45 -0
- pulse_engine/core/scope.py +23 -0
- pulse_engine/core/security.py +130 -0
- pulse_engine/database.py +30 -0
- pulse_engine/dependencies.py +166 -0
- pulse_engine/deployment/__init__.py +0 -0
- pulse_engine/deployment/backend_deployment_repository.py +83 -0
- pulse_engine/deployment/backends/__init__.py +0 -0
- pulse_engine/deployment/backends/base.py +50 -0
- pulse_engine/deployment/backends/exceptions.py +20 -0
- pulse_engine/deployment/backends/native_lambda.py +125 -0
- pulse_engine/deployment/backends/prefect_ecs.py +116 -0
- pulse_engine/deployment/backends/prefect_k8s.py +131 -0
- pulse_engine/deployment/backends/registry.py +50 -0
- pulse_engine/deployment/infra_provisioner.py +285 -0
- pulse_engine/deployment/job_launcher.py +178 -0
- pulse_engine/deployment/models.py +48 -0
- pulse_engine/deployment/repository.py +54 -0
- pulse_engine/deployment/router.py +22 -0
- pulse_engine/deployment/schemas.py +18 -0
- pulse_engine/deployment/service.py +65 -0
- pulse_engine/extractor/__init__.py +0 -0
- pulse_engine/extractor/adapters/__init__.py +0 -0
- pulse_engine/extractor/base.py +48 -0
- pulse_engine/extractor/models.py +50 -0
- pulse_engine/extractor/orchestrator/__init__.py +15 -0
- pulse_engine/extractor/orchestrator/base.py +34 -0
- pulse_engine/extractor/orchestrator/noop.py +37 -0
- pulse_engine/extractor/orchestrator/prefect.py +163 -0
- pulse_engine/extractor/repository.py +163 -0
- pulse_engine/extractor/router.py +102 -0
- pulse_engine/extractor/schemas.py +93 -0
- pulse_engine/extractor/service.py +431 -0
- pulse_engine/extractor/stage_models.py +36 -0
- pulse_engine/extractor/stage_repository.py +109 -0
- pulse_engine/main.py +195 -0
- pulse_engine/mcp/__init__.py +0 -0
- pulse_engine/mcp/__main__.py +5 -0
- pulse_engine/mcp/server.py +108 -0
- pulse_engine/mcp/tools_jobs.py +159 -0
- pulse_engine/mcp/tools_kb.py +88 -0
- pulse_engine/mcp/tools_modules.py +115 -0
- pulse_engine/mcp/tools_pipelines.py +215 -0
- pulse_engine/mcp/tools_processor.py +208 -0
- pulse_engine/middleware/__init__.py +0 -0
- pulse_engine/middleware/rate_limit.py +144 -0
- pulse_engine/middleware/request_id.py +16 -0
- pulse_engine/middleware/security_headers.py +25 -0
- pulse_engine/middleware/tenant.py +90 -0
- pulse_engine/pipeline/__init__.py +0 -0
- pulse_engine/pipeline/config_parser.py +148 -0
- pulse_engine/pipeline/expression.py +268 -0
- pulse_engine/pipeline/models.py +98 -0
- pulse_engine/pipeline/repositories.py +224 -0
- pulse_engine/pipeline/router_modules.py +66 -0
- pulse_engine/pipeline/router_pipelines.py +198 -0
- pulse_engine/pipeline/schemas.py +200 -0
- pulse_engine/pipeline/service.py +250 -0
- pulse_engine/pipeline/translators/__init__.py +44 -0
- pulse_engine/pipeline/translators/airflow_status.py +11 -0
- pulse_engine/pipeline/translators/airflow_translator.py +22 -0
- pulse_engine/pipeline/translators/base.py +42 -0
- pulse_engine/pipeline/translators/prefect_status.py +93 -0
- pulse_engine/pipeline/translators/prefect_translator.py +195 -0
- pulse_engine/processor/__init__.py +0 -0
- pulse_engine/processor/base.py +36 -0
- pulse_engine/processor/core/__init__.py +0 -0
- pulse_engine/processor/core/analysis.py +148 -0
- pulse_engine/processor/core/chunking.py +158 -0
- pulse_engine/processor/core/prompts.py +340 -0
- pulse_engine/processor/core/topic_splitter.py +105 -0
- pulse_engine/processor/defaults/__init__.py +11 -0
- pulse_engine/processor/defaults/core_processor.py +12 -0
- pulse_engine/processor/defaults/postprocessor.py +12 -0
- pulse_engine/processor/defaults/preprocessor.py +12 -0
- pulse_engine/processor/llm/__init__.py +0 -0
- pulse_engine/processor/llm/provider.py +58 -0
- pulse_engine/processor/ocr/gemini.py +52 -0
- pulse_engine/processor/pipeline.py +107 -0
- pulse_engine/processor/postprocessor/__init__.py +0 -0
- pulse_engine/processor/postprocessor/embeddings.py +34 -0
- pulse_engine/processor/postprocessor/tasks.py +180 -0
- pulse_engine/processor/preprocessor/__init__.py +0 -0
- pulse_engine/processor/preprocessor/tasks.py +71 -0
- pulse_engine/processor/router.py +192 -0
- pulse_engine/processor/schemas.py +167 -0
- pulse_engine/registry.py +117 -0
- pulse_engine/runners/__init__.py +0 -0
- pulse_engine/runners/lambda_runner.py +26 -0
- pulse_engine/runners/pipeline_runner.py +43 -0
- pulse_engine/runners/prefect_pipeline_flow.py +904 -0
- pulse_engine/runners/prefect_runner.py +33 -0
- pulse_engine/s3.py +72 -0
- pulse_engine/secrets.py +46 -0
- pulse_engine/services/__init__.py +0 -0
- pulse_engine/services/bootstrap.py +211 -0
- pulse_engine/services/opensearch.py +84 -0
- pulse_engine/storage/__init__.py +0 -0
- pulse_engine/storage/connectors/__init__.py +0 -0
- pulse_engine/storage/connectors/athena.py +226 -0
- pulse_engine/storage/connectors/base.py +32 -0
- pulse_engine/storage/connectors/opensearch.py +344 -0
- pulse_engine/storage/knowledge_base.py +68 -0
- pulse_engine/storage/router.py +78 -0
- pulse_engine/storage/schemas.py +93 -0
- pulse_engine/testing/__init__.py +13 -0
- pulse_engine/testing/fixtures.py +50 -0
- pulse_engine/testing/mocks.py +104 -0
- pulse_engine/worker.py +53 -0
- pulse_engine-0.2.0.dist-info/METADATA +654 -0
- pulse_engine-0.2.0.dist-info/RECORD +150 -0
- pulse_engine-0.2.0.dist-info/WHEEL +4 -0
- pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# src/pulse_engine/core/job_token.py
|
|
2
|
+
"""Job-scoped JWT issuance and verification for container callbacks."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from jose import JWTError, jwt
|
|
11
|
+
|
|
12
|
+
from pulse_engine.core.exceptions import UnauthorizedError
|
|
13
|
+
|
|
14
|
+
_ALGORITHM = "HS256"
|
|
15
|
+
_DEFAULT_TTL = 86400 # 24 hours
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class JobClaims:
|
|
20
|
+
"""Claims extracted from a job-scoped JWT."""
|
|
21
|
+
|
|
22
|
+
sub: str # "job:{job_id}"
|
|
23
|
+
tenant_id: str
|
|
24
|
+
product: str
|
|
25
|
+
stage: str # "extraction" | "processing" | "storage"
|
|
26
|
+
scope: list[str]
|
|
27
|
+
orchestrator: str = "prefect"
|
|
28
|
+
compute: str = "ecs"
|
|
29
|
+
raw: dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class JobTokenIssuer:
|
|
33
|
+
"""Issues HMAC-signed JWTs scoped to a single job stage."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, secret: str) -> None:
|
|
36
|
+
self._secret = secret
|
|
37
|
+
|
|
38
|
+
def issue(
|
|
39
|
+
self,
|
|
40
|
+
job_id: str,
|
|
41
|
+
tenant_id: str,
|
|
42
|
+
product: str,
|
|
43
|
+
stage: str,
|
|
44
|
+
scope: list[str],
|
|
45
|
+
orchestrator: str = "prefect",
|
|
46
|
+
compute: str = "ecs",
|
|
47
|
+
ttl_seconds: int = _DEFAULT_TTL,
|
|
48
|
+
) -> str:
|
|
49
|
+
now = int(time.time())
|
|
50
|
+
payload: dict[str, Any] = {
|
|
51
|
+
"sub": f"job:{job_id}",
|
|
52
|
+
"custom:tenant_id": tenant_id,
|
|
53
|
+
"product": product,
|
|
54
|
+
"stage": stage,
|
|
55
|
+
"scope": scope,
|
|
56
|
+
"orchestrator": orchestrator,
|
|
57
|
+
"compute": compute,
|
|
58
|
+
"iat": now,
|
|
59
|
+
"exp": now + ttl_seconds,
|
|
60
|
+
}
|
|
61
|
+
return jwt.encode(payload, self._secret, algorithm=_ALGORITHM)
|
|
62
|
+
|
|
63
|
+
def issue_token(
|
|
64
|
+
self,
|
|
65
|
+
pipeline_run_id: str,
|
|
66
|
+
tenant_id: str,
|
|
67
|
+
ttl_seconds: int = _DEFAULT_TTL,
|
|
68
|
+
) -> str:
|
|
69
|
+
"""Issue a run-scoped token for pipeline orchestration."""
|
|
70
|
+
return self.issue(
|
|
71
|
+
job_id=pipeline_run_id,
|
|
72
|
+
tenant_id=tenant_id,
|
|
73
|
+
product="pipeline",
|
|
74
|
+
stage="pipeline",
|
|
75
|
+
scope=["pipeline:run", "kb:write"],
|
|
76
|
+
ttl_seconds=ttl_seconds,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class JobTokenVerifier:
|
|
81
|
+
"""Verifies HMAC-signed job-scoped JWTs."""
|
|
82
|
+
|
|
83
|
+
def __init__(self, secret: str) -> None:
|
|
84
|
+
self._secret = secret
|
|
85
|
+
|
|
86
|
+
async def verify(self, token: str) -> JobClaims:
|
|
87
|
+
try:
|
|
88
|
+
payload = jwt.decode(token, self._secret, algorithms=[_ALGORITHM])
|
|
89
|
+
except JWTError as e:
|
|
90
|
+
raise UnauthorizedError(f"Invalid token: {e}") from e
|
|
91
|
+
|
|
92
|
+
sub = payload.get("sub", "")
|
|
93
|
+
if not sub.startswith("job:"):
|
|
94
|
+
raise UnauthorizedError("Not a job-scoped token")
|
|
95
|
+
|
|
96
|
+
tenant_id = payload.get("custom:tenant_id")
|
|
97
|
+
if not tenant_id:
|
|
98
|
+
raise UnauthorizedError("Token missing tenant_id claim")
|
|
99
|
+
|
|
100
|
+
return JobClaims(
|
|
101
|
+
sub=sub,
|
|
102
|
+
tenant_id=tenant_id,
|
|
103
|
+
product=payload.get("product", ""),
|
|
104
|
+
stage=payload.get("stage", ""),
|
|
105
|
+
scope=payload.get("scope", []),
|
|
106
|
+
orchestrator=payload.get("orchestrator", "prefect"),
|
|
107
|
+
compute=payload.get("compute", "ecs"),
|
|
108
|
+
raw=payload,
|
|
109
|
+
)
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
import structlog
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def setup_logging(log_level: str = "INFO", env: str = "development") -> None:
|
|
8
|
+
shared_processors: list[structlog.types.Processor] = [
|
|
9
|
+
structlog.contextvars.merge_contextvars,
|
|
10
|
+
structlog.stdlib.add_log_level,
|
|
11
|
+
structlog.stdlib.add_logger_name,
|
|
12
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
13
|
+
structlog.processors.StackInfoRenderer(),
|
|
14
|
+
structlog.processors.UnicodeDecoder(),
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
if env == "development":
|
|
18
|
+
renderer: structlog.types.Processor = structlog.dev.ConsoleRenderer()
|
|
19
|
+
else:
|
|
20
|
+
renderer = structlog.processors.JSONRenderer()
|
|
21
|
+
|
|
22
|
+
structlog.configure(
|
|
23
|
+
processors=[
|
|
24
|
+
*shared_processors,
|
|
25
|
+
structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
|
|
26
|
+
],
|
|
27
|
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
28
|
+
wrapper_class=structlog.stdlib.BoundLogger,
|
|
29
|
+
cache_logger_on_first_use=True,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
formatter = structlog.stdlib.ProcessorFormatter(
|
|
33
|
+
processors=[
|
|
34
|
+
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
|
|
35
|
+
renderer,
|
|
36
|
+
],
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
handler = logging.StreamHandler(sys.stdout)
|
|
40
|
+
handler.setFormatter(formatter)
|
|
41
|
+
|
|
42
|
+
root_logger = logging.getLogger()
|
|
43
|
+
root_logger.handlers.clear()
|
|
44
|
+
root_logger.addHandler(handler)
|
|
45
|
+
root_logger.setLevel(getattr(logging, log_level.upper(), logging.INFO))
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Scope enforcement for job-scoped tokens."""
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from fastapi import Depends, Request
|
|
6
|
+
|
|
7
|
+
from pulse_engine.core.exceptions import ForbiddenError
|
|
8
|
+
from pulse_engine.core.job_token import JobClaims
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def require_scope(scope: str) -> Any:
|
|
12
|
+
"""FastAPI dependency that enforces scope on job-scoped tokens.
|
|
13
|
+
|
|
14
|
+
Cognito-authenticated requests bypass the check (no scope field).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def _check(request: Request) -> None:
|
|
18
|
+
claims = getattr(request.state, "user_claims", None)
|
|
19
|
+
if isinstance(claims, JobClaims):
|
|
20
|
+
if scope not in claims.scope:
|
|
21
|
+
raise ForbiddenError(f"Token missing required scope: {scope}")
|
|
22
|
+
|
|
23
|
+
return Depends(_check)
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import time as _time
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Any, Protocol
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
from jose import JWTError, jwt
|
|
7
|
+
|
|
8
|
+
from pulse_engine.core.exceptions import UnauthorizedError
|
|
9
|
+
from pulse_engine.core.job_token import JobClaims
|
|
10
|
+
|
|
11
|
+
_JWKS_TTL_SECONDS = 3600 # Re-fetch JWKS every hour
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class CognitoClaims:
|
|
16
|
+
sub: str
|
|
17
|
+
email: str
|
|
18
|
+
tenant_id: str
|
|
19
|
+
raw: dict[str, Any]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class TokenVerifier(Protocol):
|
|
23
|
+
async def verify(self, token: str) -> CognitoClaims | JobClaims: ...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class CognitoTokenVerifier:
|
|
27
|
+
def __init__(self, jwks_url: str, issuer: str, audience: str) -> None:
|
|
28
|
+
self._jwks_url = jwks_url
|
|
29
|
+
self._issuer = issuer
|
|
30
|
+
self._audience = audience
|
|
31
|
+
self._jwks: dict[str, Any] | None = None
|
|
32
|
+
self._jwks_fetched_at: float = 0.0
|
|
33
|
+
|
|
34
|
+
async def _get_jwks(self, *, force_refresh: bool = False) -> dict[str, Any]:
|
|
35
|
+
"""Return cached JWKS, refreshing when the TTL has elapsed or forced."""
|
|
36
|
+
now = _time.monotonic()
|
|
37
|
+
if (
|
|
38
|
+
self._jwks is None
|
|
39
|
+
or force_refresh
|
|
40
|
+
or (now - self._jwks_fetched_at) > _JWKS_TTL_SECONDS
|
|
41
|
+
):
|
|
42
|
+
async with httpx.AsyncClient() as client:
|
|
43
|
+
resp = await client.get(self._jwks_url)
|
|
44
|
+
resp.raise_for_status()
|
|
45
|
+
self._jwks = resp.json()
|
|
46
|
+
self._jwks_fetched_at = now
|
|
47
|
+
assert self._jwks is not None # guaranteed by the branch above
|
|
48
|
+
return self._jwks
|
|
49
|
+
|
|
50
|
+
async def verify(self, token: str) -> CognitoClaims:
|
|
51
|
+
try:
|
|
52
|
+
jwks = await self._get_jwks()
|
|
53
|
+
unverified_header = jwt.get_unverified_header(token)
|
|
54
|
+
kid = unverified_header.get("kid")
|
|
55
|
+
key = self._find_key(jwks, kid)
|
|
56
|
+
|
|
57
|
+
# On key miss, attempt a one-time forced refresh (handles key rotation)
|
|
58
|
+
if key is None:
|
|
59
|
+
jwks = await self._get_jwks(force_refresh=True)
|
|
60
|
+
key = self._find_key(jwks, kid)
|
|
61
|
+
|
|
62
|
+
if key is None:
|
|
63
|
+
raise UnauthorizedError("Invalid token: signing key not found")
|
|
64
|
+
|
|
65
|
+
payload = jwt.decode(
|
|
66
|
+
token,
|
|
67
|
+
key,
|
|
68
|
+
algorithms=["RS256"],
|
|
69
|
+
audience=self._audience,
|
|
70
|
+
issuer=self._issuer,
|
|
71
|
+
)
|
|
72
|
+
except JWTError as e:
|
|
73
|
+
raise UnauthorizedError("Invalid token") from e
|
|
74
|
+
|
|
75
|
+
tenant_id = payload.get("custom:tenant_id")
|
|
76
|
+
if not tenant_id:
|
|
77
|
+
raise UnauthorizedError("Token missing tenant_id claim")
|
|
78
|
+
|
|
79
|
+
return CognitoClaims(
|
|
80
|
+
sub=payload.get("sub", ""),
|
|
81
|
+
email=payload.get("email", ""),
|
|
82
|
+
tenant_id=tenant_id,
|
|
83
|
+
raw=payload,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
@staticmethod
|
|
87
|
+
def _find_key(jwks: dict[str, Any], kid: str | None) -> dict[str, Any] | None:
|
|
88
|
+
for k in jwks.get("keys", []):
|
|
89
|
+
if k.get("kid") == kid:
|
|
90
|
+
return k # type: ignore[no-any-return]
|
|
91
|
+
return None
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class MockTokenVerifier:
|
|
95
|
+
def __init__(self, secret: str, audience: str, issuer: str) -> None:
|
|
96
|
+
self._secret = secret
|
|
97
|
+
self._audience = audience
|
|
98
|
+
self._issuer = issuer
|
|
99
|
+
|
|
100
|
+
async def verify(self, token: str) -> CognitoClaims:
|
|
101
|
+
try:
|
|
102
|
+
payload = jwt.decode(
|
|
103
|
+
token,
|
|
104
|
+
self._secret,
|
|
105
|
+
algorithms=["HS256"],
|
|
106
|
+
audience=self._audience,
|
|
107
|
+
issuer=self._issuer,
|
|
108
|
+
)
|
|
109
|
+
except JWTError as e:
|
|
110
|
+
raise UnauthorizedError("Invalid token") from e
|
|
111
|
+
|
|
112
|
+
tenant_id = payload.get("custom:tenant_id")
|
|
113
|
+
if not tenant_id:
|
|
114
|
+
raise UnauthorizedError("Token missing tenant_id claim")
|
|
115
|
+
|
|
116
|
+
return CognitoClaims(
|
|
117
|
+
sub=payload.get("sub", ""),
|
|
118
|
+
email=payload.get("email", ""),
|
|
119
|
+
tenant_id=tenant_id,
|
|
120
|
+
raw=payload,
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def extract_bearer_token(authorization: str | None) -> str:
|
|
125
|
+
if not authorization:
|
|
126
|
+
raise UnauthorizedError("Missing authorization header")
|
|
127
|
+
parts = authorization.split()
|
|
128
|
+
if len(parts) != 2 or parts[0].lower() != "bearer":
|
|
129
|
+
raise UnauthorizedError("Invalid authorization header format")
|
|
130
|
+
return parts[1]
|
pulse_engine/database.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from sqlalchemy.ext.asyncio import (
|
|
2
|
+
AsyncEngine,
|
|
3
|
+
AsyncSession,
|
|
4
|
+
async_sessionmaker,
|
|
5
|
+
create_async_engine,
|
|
6
|
+
)
|
|
7
|
+
from sqlalchemy.orm import DeclarativeBase
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Base(DeclarativeBase):
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def build_async_engine(database_url: str) -> AsyncEngine:
|
|
15
|
+
return create_async_engine(
|
|
16
|
+
database_url,
|
|
17
|
+
echo=False,
|
|
18
|
+
future=True,
|
|
19
|
+
connect_args={
|
|
20
|
+
"statement_cache_size": 0,
|
|
21
|
+
"prepared_statement_cache_size": 0,
|
|
22
|
+
"prepared_statement_name_func": lambda: "",
|
|
23
|
+
},
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def build_session_factory(
|
|
28
|
+
engine: AsyncEngine,
|
|
29
|
+
) -> async_sessionmaker[AsyncSession]:
|
|
30
|
+
return async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
from collections.abc import AsyncIterator
|
|
2
|
+
from typing import TYPE_CHECKING
|
|
3
|
+
|
|
4
|
+
from fastapi import Depends, Request
|
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
|
+
|
|
7
|
+
from pulse_engine.config import Settings, get_settings
|
|
8
|
+
from pulse_engine.core.job_token import JobClaims
|
|
9
|
+
from pulse_engine.core.security import (
|
|
10
|
+
CognitoClaims,
|
|
11
|
+
CognitoTokenVerifier,
|
|
12
|
+
TokenVerifier,
|
|
13
|
+
extract_bearer_token,
|
|
14
|
+
)
|
|
15
|
+
from pulse_engine.services.opensearch import OpenSearchService
|
|
16
|
+
from pulse_engine.storage.knowledge_base import KnowledgeBaseService
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from pulse_engine.deployment.backends.registry import BackendRegistry
|
|
20
|
+
from pulse_engine.deployment.job_launcher import JobLauncher
|
|
21
|
+
from pulse_engine.deployment.service import DeploymentService
|
|
22
|
+
from pulse_engine.extractor.orchestrator.base import BaseOrchestratorAdapter
|
|
23
|
+
from pulse_engine.extractor.repository import JobRepository
|
|
24
|
+
from pulse_engine.extractor.service import JobService
|
|
25
|
+
from pulse_engine.processor.pipeline import ProcessingPipeline
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_token_verifier(
|
|
29
|
+
settings: Settings = Depends(get_settings),
|
|
30
|
+
) -> TokenVerifier:
|
|
31
|
+
return CognitoTokenVerifier(
|
|
32
|
+
jwks_url=settings.cognito_jwks_url,
|
|
33
|
+
issuer=settings.cognito_issuer,
|
|
34
|
+
audience=settings.cognito_app_client_id,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
async def get_current_user(
|
|
39
|
+
request: Request,
|
|
40
|
+
verifier: TokenVerifier = Depends(get_token_verifier),
|
|
41
|
+
) -> CognitoClaims | JobClaims:
|
|
42
|
+
token = extract_bearer_token(request.headers.get("authorization"))
|
|
43
|
+
return await verifier.verify(token)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_opensearch(request: Request) -> OpenSearchService:
|
|
47
|
+
return request.app.state.opensearch # type: ignore[no-any-return]
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_knowledge_base(request: Request) -> KnowledgeBaseService:
|
|
51
|
+
return request.app.state.knowledge_base # type: ignore[no-any-return]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
async def get_db_session(request: Request) -> AsyncIterator[AsyncSession]:
|
|
55
|
+
factory = request.app.state.db_session_factory
|
|
56
|
+
if factory is None:
|
|
57
|
+
from pulse_engine.core.exceptions import ServiceUnavailableError
|
|
58
|
+
|
|
59
|
+
raise ServiceUnavailableError("Database not configured")
|
|
60
|
+
async with factory() as session:
|
|
61
|
+
try:
|
|
62
|
+
yield session
|
|
63
|
+
await session.commit()
|
|
64
|
+
except Exception:
|
|
65
|
+
await session.rollback()
|
|
66
|
+
raise
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_orchestrator(request: Request) -> "BaseOrchestratorAdapter":
|
|
70
|
+
return request.app.state.orchestrator_adapter # type: ignore[no-any-return]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_job_repository(
|
|
74
|
+
session: AsyncSession = Depends(get_db_session),
|
|
75
|
+
) -> "JobRepository":
|
|
76
|
+
from pulse_engine.extractor.repository import JobRepository
|
|
77
|
+
|
|
78
|
+
return JobRepository(session)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_backend_registry(
|
|
82
|
+
request: Request,
|
|
83
|
+
settings: Settings = Depends(get_settings),
|
|
84
|
+
) -> "BackendRegistry":
|
|
85
|
+
# Cache on app state to avoid recreating on each request
|
|
86
|
+
if not hasattr(request.app.state, "backend_registry"):
|
|
87
|
+
from pulse_engine.deployment.backends.registry import BackendRegistry
|
|
88
|
+
|
|
89
|
+
request.app.state.backend_registry = BackendRegistry(settings)
|
|
90
|
+
return request.app.state.backend_registry # type: ignore[no-any-return]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def get_job_launcher(
|
|
94
|
+
request: Request,
|
|
95
|
+
session: AsyncSession = Depends(get_db_session),
|
|
96
|
+
registry: "BackendRegistry" = Depends(get_backend_registry),
|
|
97
|
+
settings: Settings = Depends(get_settings),
|
|
98
|
+
) -> "JobLauncher":
|
|
99
|
+
from pulse_engine.core.exceptions import ServiceUnavailableError
|
|
100
|
+
from pulse_engine.core.job_token import JobTokenIssuer
|
|
101
|
+
from pulse_engine.deployment.backend_deployment_repository import (
|
|
102
|
+
BackendDeploymentRepository,
|
|
103
|
+
)
|
|
104
|
+
from pulse_engine.deployment.job_launcher import JobLauncher
|
|
105
|
+
from pulse_engine.deployment.repository import RegistrationRepository
|
|
106
|
+
|
|
107
|
+
reg_repo = RegistrationRepository(session)
|
|
108
|
+
backend_repo = BackendDeploymentRepository(session)
|
|
109
|
+
if not settings.pulse_job_token_secret:
|
|
110
|
+
raise ServiceUnavailableError("PULSE_JOB_TOKEN_SECRET is not configured")
|
|
111
|
+
token_issuer = JobTokenIssuer(secret=settings.pulse_job_token_secret)
|
|
112
|
+
return JobLauncher(
|
|
113
|
+
registration_repo=reg_repo,
|
|
114
|
+
backend_deployment_repo=backend_repo,
|
|
115
|
+
registry=registry,
|
|
116
|
+
token_issuer=token_issuer,
|
|
117
|
+
settings=settings,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def get_job_service(
|
|
122
|
+
request: Request,
|
|
123
|
+
repo: "JobRepository" = Depends(get_job_repository),
|
|
124
|
+
orchestrator: "BaseOrchestratorAdapter" = Depends(get_orchestrator),
|
|
125
|
+
settings: Settings = Depends(get_settings),
|
|
126
|
+
session: AsyncSession = Depends(get_db_session),
|
|
127
|
+
job_launcher: "JobLauncher" = Depends(get_job_launcher),
|
|
128
|
+
) -> "JobService":
|
|
129
|
+
from pulse_engine.extractor.service import JobService
|
|
130
|
+
from pulse_engine.extractor.stage_repository import StageRepository
|
|
131
|
+
|
|
132
|
+
stage_repo = StageRepository(session)
|
|
133
|
+
# NOTE: deployment_repository and token_issuer are intentionally NOT passed here.
|
|
134
|
+
# JobLauncher handles all triggering. The legacy path in _trigger_stage is
|
|
135
|
+
# disabled in production by omitting these args.
|
|
136
|
+
return JobService(
|
|
137
|
+
repository=repo,
|
|
138
|
+
orchestrator=orchestrator,
|
|
139
|
+
settings=settings,
|
|
140
|
+
stage_repository=stage_repo,
|
|
141
|
+
job_launcher=job_launcher,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def get_processing_pipeline(request: Request) -> "ProcessingPipeline":
|
|
146
|
+
from pulse_engine.processor.pipeline import ProcessingPipeline
|
|
147
|
+
|
|
148
|
+
kb = request.app.state.knowledge_base
|
|
149
|
+
return ProcessingPipeline(kb_service=kb)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def get_deployment_service(
|
|
153
|
+
session: AsyncSession = Depends(get_db_session),
|
|
154
|
+
) -> "DeploymentService":
|
|
155
|
+
from pulse_engine.deployment.backend_deployment_repository import (
|
|
156
|
+
BackendDeploymentRepository,
|
|
157
|
+
)
|
|
158
|
+
from pulse_engine.deployment.repository import RegistrationRepository
|
|
159
|
+
from pulse_engine.deployment.service import DeploymentService
|
|
160
|
+
|
|
161
|
+
reg_repo = RegistrationRepository(session)
|
|
162
|
+
backend_repo = BackendDeploymentRepository(session)
|
|
163
|
+
return DeploymentService(
|
|
164
|
+
registration_repo=reg_repo,
|
|
165
|
+
backend_deployment_repo=backend_repo,
|
|
166
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Data access layer for the product_deployments lazy cache table."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC, datetime
|
|
6
|
+
|
|
7
|
+
import sqlalchemy as sa
|
|
8
|
+
from sqlalchemy.dialects.postgresql import insert as pg_insert
|
|
9
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
10
|
+
|
|
11
|
+
from pulse_engine.deployment.models import ProductBackendDeploymentModel
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BackendDeploymentRepository:
|
|
15
|
+
"""CRUD for product_deployments — the per-backend Prefect deployment ID cache.
|
|
16
|
+
|
|
17
|
+
Owns all reads, writes, and deletes for this table.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
21
|
+
self._session = session
|
|
22
|
+
|
|
23
|
+
async def get(
|
|
24
|
+
self,
|
|
25
|
+
product: str,
|
|
26
|
+
stage: str,
|
|
27
|
+
orchestrator: str,
|
|
28
|
+
compute: str,
|
|
29
|
+
) -> ProductBackendDeploymentModel | None:
|
|
30
|
+
stmt = sa.select(ProductBackendDeploymentModel).where(
|
|
31
|
+
ProductBackendDeploymentModel.product == product,
|
|
32
|
+
ProductBackendDeploymentModel.stage == stage,
|
|
33
|
+
ProductBackendDeploymentModel.orchestrator == orchestrator,
|
|
34
|
+
ProductBackendDeploymentModel.compute == compute,
|
|
35
|
+
)
|
|
36
|
+
result = await self._session.execute(stmt)
|
|
37
|
+
return result.scalar_one_or_none()
|
|
38
|
+
|
|
39
|
+
async def upsert(
|
|
40
|
+
self,
|
|
41
|
+
product: str,
|
|
42
|
+
stage: str,
|
|
43
|
+
orchestrator: str,
|
|
44
|
+
compute: str,
|
|
45
|
+
deployment_id: str,
|
|
46
|
+
) -> None:
|
|
47
|
+
"""Insert or update deployment_id using SQL-level ON CONFLICT DO UPDATE.
|
|
48
|
+
|
|
49
|
+
Safe under concurrent triggers — two concurrent callers both complete
|
|
50
|
+
without IntegrityError; the last write wins.
|
|
51
|
+
"""
|
|
52
|
+
now = datetime.now(UTC)
|
|
53
|
+
stmt = (
|
|
54
|
+
pg_insert(ProductBackendDeploymentModel)
|
|
55
|
+
.values(
|
|
56
|
+
product=product,
|
|
57
|
+
stage=stage,
|
|
58
|
+
orchestrator=orchestrator,
|
|
59
|
+
compute=compute,
|
|
60
|
+
deployment_id=deployment_id,
|
|
61
|
+
created_at=now,
|
|
62
|
+
updated_at=now,
|
|
63
|
+
)
|
|
64
|
+
.on_conflict_do_update(
|
|
65
|
+
index_elements=["product", "stage", "orchestrator", "compute"],
|
|
66
|
+
set_={"deployment_id": deployment_id, "updated_at": now},
|
|
67
|
+
)
|
|
68
|
+
)
|
|
69
|
+
await self._session.execute(stmt)
|
|
70
|
+
await self._session.commit()
|
|
71
|
+
|
|
72
|
+
async def delete_by_product_stage(self, product: str, stage: str) -> None:
|
|
73
|
+
"""Delete all cached deployment IDs for this product+stage.
|
|
74
|
+
|
|
75
|
+
Called by DeploymentService when a product re-registers with a new image,
|
|
76
|
+
so JobLauncher recreates Prefect deployments on the next trigger.
|
|
77
|
+
"""
|
|
78
|
+
stmt = sa.delete(ProductBackendDeploymentModel).where(
|
|
79
|
+
ProductBackendDeploymentModel.product == product,
|
|
80
|
+
ProductBackendDeploymentModel.stage == stage,
|
|
81
|
+
)
|
|
82
|
+
await self._session.execute(stmt)
|
|
83
|
+
await self._session.commit()
|
|
File without changes
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Abstract base class for runner backends."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pulse_engine.extractor.orchestrator.base import OrchestratorRunStatus
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseRunnerBackend(ABC):
|
|
12
|
+
"""One implementation per (orchestrator, compute) pair.
|
|
13
|
+
|
|
14
|
+
Responsible for: backend setup, run-unit registration,
|
|
15
|
+
run triggering, status querying, and cancellation.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@abstractmethod
|
|
19
|
+
async def prepare(self) -> None:
|
|
20
|
+
"""One-time setup before triggering (e.g. work pool creation).
|
|
21
|
+
No-op for backends that don't require it."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
async def register(
|
|
25
|
+
self,
|
|
26
|
+
product: str,
|
|
27
|
+
stage: str,
|
|
28
|
+
image: str,
|
|
29
|
+
entrypoint: str | None = None,
|
|
30
|
+
) -> str:
|
|
31
|
+
"""Register or update a runnable unit for this product+stage.
|
|
32
|
+
|
|
33
|
+
Returns a stable handle the launcher caches.
|
|
34
|
+
- Prefect backends: Prefect deployment UUID
|
|
35
|
+
- Native Lambda: "{product}-{stage}" (function name by convention)
|
|
36
|
+
|
|
37
|
+
entrypoint is required for Prefect backends, ignored for native backends.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
@abstractmethod
|
|
41
|
+
async def trigger(self, handle: str, parameters: dict[str, Any]) -> str:
|
|
42
|
+
"""Trigger a run. Returns a run ID for correlation."""
|
|
43
|
+
|
|
44
|
+
@abstractmethod
|
|
45
|
+
async def get_run_status(self, run_id: str) -> OrchestratorRunStatus:
|
|
46
|
+
"""Fetch current status of a run."""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
async def cancel_run(self, run_id: str) -> bool:
|
|
50
|
+
"""Request cancellation. Return True if accepted."""
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Exceptions for runner backends."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BackendNotAvailableError(Exception):
|
|
5
|
+
"""Raised when a requested (orchestrator, compute) backend is not available."""
|
|
6
|
+
|
|
7
|
+
def __init__(
|
|
8
|
+
self,
|
|
9
|
+
orchestrator: str,
|
|
10
|
+
compute: str,
|
|
11
|
+
available: list[str],
|
|
12
|
+
) -> None:
|
|
13
|
+
self.orchestrator = orchestrator
|
|
14
|
+
self.compute = compute
|
|
15
|
+
self.available = available
|
|
16
|
+
msg = (
|
|
17
|
+
f"Backend not available: orchestrator={orchestrator!r}, "
|
|
18
|
+
f"compute={compute!r}. Available: {', '.join(available)}"
|
|
19
|
+
)
|
|
20
|
+
super().__init__(msg)
|