pulse-engine 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. pulse_engine/__init__.py +0 -0
  2. pulse_engine/adapters/__init__.py +58 -0
  3. pulse_engine/adapters/audio_transcription.py +167 -0
  4. pulse_engine/adapters/batcher.py +36 -0
  5. pulse_engine/adapters/digital_news.py +128 -0
  6. pulse_engine/adapters/digital_news_metadata.py +536 -0
  7. pulse_engine/adapters/exceptions.py +10 -0
  8. pulse_engine/adapters/models.py +134 -0
  9. pulse_engine/adapters/opensearch_storage.py +160 -0
  10. pulse_engine/adapters/speech_content.py +130 -0
  11. pulse_engine/adapters/speech_metadata.py +374 -0
  12. pulse_engine/adapters/twitter.py +423 -0
  13. pulse_engine/adapters/youtube_downloader.py +186 -0
  14. pulse_engine/adapters/youtube_metadata.py +261 -0
  15. pulse_engine/api/__init__.py +0 -0
  16. pulse_engine/api/v1/__init__.py +0 -0
  17. pulse_engine/api/v1/auth.py +91 -0
  18. pulse_engine/api/v1/health.py +62 -0
  19. pulse_engine/api/v1/router.py +16 -0
  20. pulse_engine/chain_recovery.py +131 -0
  21. pulse_engine/cli/__init__.py +0 -0
  22. pulse_engine/cli/main.py +169 -0
  23. pulse_engine/cli/templates/cookiecutter.json +4 -0
  24. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
  25. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
  26. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
  27. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
  28. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
  29. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
  30. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
  31. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
  32. pulse_engine/client.py +95 -0
  33. pulse_engine/config.py +157 -0
  34. pulse_engine/core/__init__.py +0 -0
  35. pulse_engine/core/error_handlers.py +64 -0
  36. pulse_engine/core/exceptions.py +67 -0
  37. pulse_engine/core/job_token.py +109 -0
  38. pulse_engine/core/logging.py +45 -0
  39. pulse_engine/core/scope.py +23 -0
  40. pulse_engine/core/security.py +130 -0
  41. pulse_engine/database.py +30 -0
  42. pulse_engine/dependencies.py +166 -0
  43. pulse_engine/deployment/__init__.py +0 -0
  44. pulse_engine/deployment/backend_deployment_repository.py +83 -0
  45. pulse_engine/deployment/backends/__init__.py +0 -0
  46. pulse_engine/deployment/backends/base.py +50 -0
  47. pulse_engine/deployment/backends/exceptions.py +20 -0
  48. pulse_engine/deployment/backends/native_lambda.py +125 -0
  49. pulse_engine/deployment/backends/prefect_ecs.py +116 -0
  50. pulse_engine/deployment/backends/prefect_k8s.py +131 -0
  51. pulse_engine/deployment/backends/registry.py +50 -0
  52. pulse_engine/deployment/infra_provisioner.py +285 -0
  53. pulse_engine/deployment/job_launcher.py +178 -0
  54. pulse_engine/deployment/models.py +48 -0
  55. pulse_engine/deployment/repository.py +54 -0
  56. pulse_engine/deployment/router.py +22 -0
  57. pulse_engine/deployment/schemas.py +18 -0
  58. pulse_engine/deployment/service.py +65 -0
  59. pulse_engine/extractor/__init__.py +0 -0
  60. pulse_engine/extractor/adapters/__init__.py +0 -0
  61. pulse_engine/extractor/base.py +48 -0
  62. pulse_engine/extractor/models.py +50 -0
  63. pulse_engine/extractor/orchestrator/__init__.py +15 -0
  64. pulse_engine/extractor/orchestrator/base.py +34 -0
  65. pulse_engine/extractor/orchestrator/noop.py +37 -0
  66. pulse_engine/extractor/orchestrator/prefect.py +163 -0
  67. pulse_engine/extractor/repository.py +163 -0
  68. pulse_engine/extractor/router.py +102 -0
  69. pulse_engine/extractor/schemas.py +93 -0
  70. pulse_engine/extractor/service.py +431 -0
  71. pulse_engine/extractor/stage_models.py +36 -0
  72. pulse_engine/extractor/stage_repository.py +109 -0
  73. pulse_engine/main.py +195 -0
  74. pulse_engine/mcp/__init__.py +0 -0
  75. pulse_engine/mcp/__main__.py +5 -0
  76. pulse_engine/mcp/server.py +108 -0
  77. pulse_engine/mcp/tools_jobs.py +159 -0
  78. pulse_engine/mcp/tools_kb.py +88 -0
  79. pulse_engine/mcp/tools_modules.py +115 -0
  80. pulse_engine/mcp/tools_pipelines.py +215 -0
  81. pulse_engine/mcp/tools_processor.py +208 -0
  82. pulse_engine/middleware/__init__.py +0 -0
  83. pulse_engine/middleware/rate_limit.py +144 -0
  84. pulse_engine/middleware/request_id.py +16 -0
  85. pulse_engine/middleware/security_headers.py +25 -0
  86. pulse_engine/middleware/tenant.py +90 -0
  87. pulse_engine/pipeline/__init__.py +0 -0
  88. pulse_engine/pipeline/config_parser.py +148 -0
  89. pulse_engine/pipeline/expression.py +268 -0
  90. pulse_engine/pipeline/models.py +98 -0
  91. pulse_engine/pipeline/repositories.py +224 -0
  92. pulse_engine/pipeline/router_modules.py +66 -0
  93. pulse_engine/pipeline/router_pipelines.py +198 -0
  94. pulse_engine/pipeline/schemas.py +200 -0
  95. pulse_engine/pipeline/service.py +250 -0
  96. pulse_engine/pipeline/translators/__init__.py +44 -0
  97. pulse_engine/pipeline/translators/airflow_status.py +11 -0
  98. pulse_engine/pipeline/translators/airflow_translator.py +22 -0
  99. pulse_engine/pipeline/translators/base.py +42 -0
  100. pulse_engine/pipeline/translators/prefect_status.py +93 -0
  101. pulse_engine/pipeline/translators/prefect_translator.py +195 -0
  102. pulse_engine/processor/__init__.py +0 -0
  103. pulse_engine/processor/base.py +36 -0
  104. pulse_engine/processor/core/__init__.py +0 -0
  105. pulse_engine/processor/core/analysis.py +148 -0
  106. pulse_engine/processor/core/chunking.py +158 -0
  107. pulse_engine/processor/core/prompts.py +340 -0
  108. pulse_engine/processor/core/topic_splitter.py +105 -0
  109. pulse_engine/processor/defaults/__init__.py +11 -0
  110. pulse_engine/processor/defaults/core_processor.py +12 -0
  111. pulse_engine/processor/defaults/postprocessor.py +12 -0
  112. pulse_engine/processor/defaults/preprocessor.py +12 -0
  113. pulse_engine/processor/llm/__init__.py +0 -0
  114. pulse_engine/processor/llm/provider.py +58 -0
  115. pulse_engine/processor/ocr/gemini.py +52 -0
  116. pulse_engine/processor/pipeline.py +107 -0
  117. pulse_engine/processor/postprocessor/__init__.py +0 -0
  118. pulse_engine/processor/postprocessor/embeddings.py +34 -0
  119. pulse_engine/processor/postprocessor/tasks.py +180 -0
  120. pulse_engine/processor/preprocessor/__init__.py +0 -0
  121. pulse_engine/processor/preprocessor/tasks.py +71 -0
  122. pulse_engine/processor/router.py +192 -0
  123. pulse_engine/processor/schemas.py +167 -0
  124. pulse_engine/registry.py +117 -0
  125. pulse_engine/runners/__init__.py +0 -0
  126. pulse_engine/runners/lambda_runner.py +26 -0
  127. pulse_engine/runners/pipeline_runner.py +43 -0
  128. pulse_engine/runners/prefect_pipeline_flow.py +904 -0
  129. pulse_engine/runners/prefect_runner.py +33 -0
  130. pulse_engine/s3.py +72 -0
  131. pulse_engine/secrets.py +46 -0
  132. pulse_engine/services/__init__.py +0 -0
  133. pulse_engine/services/bootstrap.py +211 -0
  134. pulse_engine/services/opensearch.py +84 -0
  135. pulse_engine/storage/__init__.py +0 -0
  136. pulse_engine/storage/connectors/__init__.py +0 -0
  137. pulse_engine/storage/connectors/athena.py +226 -0
  138. pulse_engine/storage/connectors/base.py +32 -0
  139. pulse_engine/storage/connectors/opensearch.py +344 -0
  140. pulse_engine/storage/knowledge_base.py +68 -0
  141. pulse_engine/storage/router.py +78 -0
  142. pulse_engine/storage/schemas.py +93 -0
  143. pulse_engine/testing/__init__.py +13 -0
  144. pulse_engine/testing/fixtures.py +50 -0
  145. pulse_engine/testing/mocks.py +104 -0
  146. pulse_engine/worker.py +53 -0
  147. pulse_engine-0.2.0.dist-info/METADATA +654 -0
  148. pulse_engine-0.2.0.dist-info/RECORD +150 -0
  149. pulse_engine-0.2.0.dist-info/WHEEL +4 -0
  150. pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,109 @@
1
+ # src/pulse_engine/core/job_token.py
2
+ """Job-scoped JWT issuance and verification for container callbacks."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import time
7
+ from dataclasses import dataclass, field
8
+ from typing import Any
9
+
10
+ from jose import JWTError, jwt
11
+
12
+ from pulse_engine.core.exceptions import UnauthorizedError
13
+
14
+ _ALGORITHM = "HS256"
15
+ _DEFAULT_TTL = 86400 # 24 hours
16
+
17
+
18
+ @dataclass
19
+ class JobClaims:
20
+ """Claims extracted from a job-scoped JWT."""
21
+
22
+ sub: str # "job:{job_id}"
23
+ tenant_id: str
24
+ product: str
25
+ stage: str # "extraction" | "processing" | "storage"
26
+ scope: list[str]
27
+ orchestrator: str = "prefect"
28
+ compute: str = "ecs"
29
+ raw: dict[str, Any] = field(default_factory=dict)
30
+
31
+
32
+ class JobTokenIssuer:
33
+ """Issues HMAC-signed JWTs scoped to a single job stage."""
34
+
35
+ def __init__(self, secret: str) -> None:
36
+ self._secret = secret
37
+
38
+ def issue(
39
+ self,
40
+ job_id: str,
41
+ tenant_id: str,
42
+ product: str,
43
+ stage: str,
44
+ scope: list[str],
45
+ orchestrator: str = "prefect",
46
+ compute: str = "ecs",
47
+ ttl_seconds: int = _DEFAULT_TTL,
48
+ ) -> str:
49
+ now = int(time.time())
50
+ payload: dict[str, Any] = {
51
+ "sub": f"job:{job_id}",
52
+ "custom:tenant_id": tenant_id,
53
+ "product": product,
54
+ "stage": stage,
55
+ "scope": scope,
56
+ "orchestrator": orchestrator,
57
+ "compute": compute,
58
+ "iat": now,
59
+ "exp": now + ttl_seconds,
60
+ }
61
+ return jwt.encode(payload, self._secret, algorithm=_ALGORITHM)
62
+
63
+ def issue_token(
64
+ self,
65
+ pipeline_run_id: str,
66
+ tenant_id: str,
67
+ ttl_seconds: int = _DEFAULT_TTL,
68
+ ) -> str:
69
+ """Issue a run-scoped token for pipeline orchestration."""
70
+ return self.issue(
71
+ job_id=pipeline_run_id,
72
+ tenant_id=tenant_id,
73
+ product="pipeline",
74
+ stage="pipeline",
75
+ scope=["pipeline:run", "kb:write"],
76
+ ttl_seconds=ttl_seconds,
77
+ )
78
+
79
+
80
+ class JobTokenVerifier:
81
+ """Verifies HMAC-signed job-scoped JWTs."""
82
+
83
+ def __init__(self, secret: str) -> None:
84
+ self._secret = secret
85
+
86
+ async def verify(self, token: str) -> JobClaims:
87
+ try:
88
+ payload = jwt.decode(token, self._secret, algorithms=[_ALGORITHM])
89
+ except JWTError as e:
90
+ raise UnauthorizedError(f"Invalid token: {e}") from e
91
+
92
+ sub = payload.get("sub", "")
93
+ if not sub.startswith("job:"):
94
+ raise UnauthorizedError("Not a job-scoped token")
95
+
96
+ tenant_id = payload.get("custom:tenant_id")
97
+ if not tenant_id:
98
+ raise UnauthorizedError("Token missing tenant_id claim")
99
+
100
+ return JobClaims(
101
+ sub=sub,
102
+ tenant_id=tenant_id,
103
+ product=payload.get("product", ""),
104
+ stage=payload.get("stage", ""),
105
+ scope=payload.get("scope", []),
106
+ orchestrator=payload.get("orchestrator", "prefect"),
107
+ compute=payload.get("compute", "ecs"),
108
+ raw=payload,
109
+ )
@@ -0,0 +1,45 @@
1
+ import logging
2
+ import sys
3
+
4
+ import structlog
5
+
6
+
7
+ def setup_logging(log_level: str = "INFO", env: str = "development") -> None:
8
+ shared_processors: list[structlog.types.Processor] = [
9
+ structlog.contextvars.merge_contextvars,
10
+ structlog.stdlib.add_log_level,
11
+ structlog.stdlib.add_logger_name,
12
+ structlog.processors.TimeStamper(fmt="iso"),
13
+ structlog.processors.StackInfoRenderer(),
14
+ structlog.processors.UnicodeDecoder(),
15
+ ]
16
+
17
+ if env == "development":
18
+ renderer: structlog.types.Processor = structlog.dev.ConsoleRenderer()
19
+ else:
20
+ renderer = structlog.processors.JSONRenderer()
21
+
22
+ structlog.configure(
23
+ processors=[
24
+ *shared_processors,
25
+ structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
26
+ ],
27
+ logger_factory=structlog.stdlib.LoggerFactory(),
28
+ wrapper_class=structlog.stdlib.BoundLogger,
29
+ cache_logger_on_first_use=True,
30
+ )
31
+
32
+ formatter = structlog.stdlib.ProcessorFormatter(
33
+ processors=[
34
+ structlog.stdlib.ProcessorFormatter.remove_processors_meta,
35
+ renderer,
36
+ ],
37
+ )
38
+
39
+ handler = logging.StreamHandler(sys.stdout)
40
+ handler.setFormatter(formatter)
41
+
42
+ root_logger = logging.getLogger()
43
+ root_logger.handlers.clear()
44
+ root_logger.addHandler(handler)
45
+ root_logger.setLevel(getattr(logging, log_level.upper(), logging.INFO))
@@ -0,0 +1,23 @@
1
+ """Scope enforcement for job-scoped tokens."""
2
+
3
+ from typing import Any
4
+
5
+ from fastapi import Depends, Request
6
+
7
+ from pulse_engine.core.exceptions import ForbiddenError
8
+ from pulse_engine.core.job_token import JobClaims
9
+
10
+
11
+ def require_scope(scope: str) -> Any:
12
+ """FastAPI dependency that enforces scope on job-scoped tokens.
13
+
14
+ Cognito-authenticated requests bypass the check (no scope field).
15
+ """
16
+
17
+ def _check(request: Request) -> None:
18
+ claims = getattr(request.state, "user_claims", None)
19
+ if isinstance(claims, JobClaims):
20
+ if scope not in claims.scope:
21
+ raise ForbiddenError(f"Token missing required scope: {scope}")
22
+
23
+ return Depends(_check)
@@ -0,0 +1,130 @@
1
+ import time as _time
2
+ from dataclasses import dataclass
3
+ from typing import Any, Protocol
4
+
5
+ import httpx
6
+ from jose import JWTError, jwt
7
+
8
+ from pulse_engine.core.exceptions import UnauthorizedError
9
+ from pulse_engine.core.job_token import JobClaims
10
+
11
+ _JWKS_TTL_SECONDS = 3600 # Re-fetch JWKS every hour
12
+
13
+
14
+ @dataclass
15
+ class CognitoClaims:
16
+ sub: str
17
+ email: str
18
+ tenant_id: str
19
+ raw: dict[str, Any]
20
+
21
+
22
+ class TokenVerifier(Protocol):
23
+ async def verify(self, token: str) -> CognitoClaims | JobClaims: ...
24
+
25
+
26
+ class CognitoTokenVerifier:
27
+ def __init__(self, jwks_url: str, issuer: str, audience: str) -> None:
28
+ self._jwks_url = jwks_url
29
+ self._issuer = issuer
30
+ self._audience = audience
31
+ self._jwks: dict[str, Any] | None = None
32
+ self._jwks_fetched_at: float = 0.0
33
+
34
+ async def _get_jwks(self, *, force_refresh: bool = False) -> dict[str, Any]:
35
+ """Return cached JWKS, refreshing when the TTL has elapsed or forced."""
36
+ now = _time.monotonic()
37
+ if (
38
+ self._jwks is None
39
+ or force_refresh
40
+ or (now - self._jwks_fetched_at) > _JWKS_TTL_SECONDS
41
+ ):
42
+ async with httpx.AsyncClient() as client:
43
+ resp = await client.get(self._jwks_url)
44
+ resp.raise_for_status()
45
+ self._jwks = resp.json()
46
+ self._jwks_fetched_at = now
47
+ assert self._jwks is not None # guaranteed by the branch above
48
+ return self._jwks
49
+
50
+ async def verify(self, token: str) -> CognitoClaims:
51
+ try:
52
+ jwks = await self._get_jwks()
53
+ unverified_header = jwt.get_unverified_header(token)
54
+ kid = unverified_header.get("kid")
55
+ key = self._find_key(jwks, kid)
56
+
57
+ # On key miss, attempt a one-time forced refresh (handles key rotation)
58
+ if key is None:
59
+ jwks = await self._get_jwks(force_refresh=True)
60
+ key = self._find_key(jwks, kid)
61
+
62
+ if key is None:
63
+ raise UnauthorizedError("Invalid token: signing key not found")
64
+
65
+ payload = jwt.decode(
66
+ token,
67
+ key,
68
+ algorithms=["RS256"],
69
+ audience=self._audience,
70
+ issuer=self._issuer,
71
+ )
72
+ except JWTError as e:
73
+ raise UnauthorizedError("Invalid token") from e
74
+
75
+ tenant_id = payload.get("custom:tenant_id")
76
+ if not tenant_id:
77
+ raise UnauthorizedError("Token missing tenant_id claim")
78
+
79
+ return CognitoClaims(
80
+ sub=payload.get("sub", ""),
81
+ email=payload.get("email", ""),
82
+ tenant_id=tenant_id,
83
+ raw=payload,
84
+ )
85
+
86
+ @staticmethod
87
+ def _find_key(jwks: dict[str, Any], kid: str | None) -> dict[str, Any] | None:
88
+ for k in jwks.get("keys", []):
89
+ if k.get("kid") == kid:
90
+ return k # type: ignore[no-any-return]
91
+ return None
92
+
93
+
94
+ class MockTokenVerifier:
95
+ def __init__(self, secret: str, audience: str, issuer: str) -> None:
96
+ self._secret = secret
97
+ self._audience = audience
98
+ self._issuer = issuer
99
+
100
+ async def verify(self, token: str) -> CognitoClaims:
101
+ try:
102
+ payload = jwt.decode(
103
+ token,
104
+ self._secret,
105
+ algorithms=["HS256"],
106
+ audience=self._audience,
107
+ issuer=self._issuer,
108
+ )
109
+ except JWTError as e:
110
+ raise UnauthorizedError("Invalid token") from e
111
+
112
+ tenant_id = payload.get("custom:tenant_id")
113
+ if not tenant_id:
114
+ raise UnauthorizedError("Token missing tenant_id claim")
115
+
116
+ return CognitoClaims(
117
+ sub=payload.get("sub", ""),
118
+ email=payload.get("email", ""),
119
+ tenant_id=tenant_id,
120
+ raw=payload,
121
+ )
122
+
123
+
124
+ def extract_bearer_token(authorization: str | None) -> str:
125
+ if not authorization:
126
+ raise UnauthorizedError("Missing authorization header")
127
+ parts = authorization.split()
128
+ if len(parts) != 2 or parts[0].lower() != "bearer":
129
+ raise UnauthorizedError("Invalid authorization header format")
130
+ return parts[1]
@@ -0,0 +1,30 @@
1
+ from sqlalchemy.ext.asyncio import (
2
+ AsyncEngine,
3
+ AsyncSession,
4
+ async_sessionmaker,
5
+ create_async_engine,
6
+ )
7
+ from sqlalchemy.orm import DeclarativeBase
8
+
9
+
10
+ class Base(DeclarativeBase):
11
+ pass
12
+
13
+
14
+ def build_async_engine(database_url: str) -> AsyncEngine:
15
+ return create_async_engine(
16
+ database_url,
17
+ echo=False,
18
+ future=True,
19
+ connect_args={
20
+ "statement_cache_size": 0,
21
+ "prepared_statement_cache_size": 0,
22
+ "prepared_statement_name_func": lambda: "",
23
+ },
24
+ )
25
+
26
+
27
+ def build_session_factory(
28
+ engine: AsyncEngine,
29
+ ) -> async_sessionmaker[AsyncSession]:
30
+ return async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
@@ -0,0 +1,166 @@
1
+ from collections.abc import AsyncIterator
2
+ from typing import TYPE_CHECKING
3
+
4
+ from fastapi import Depends, Request
5
+ from sqlalchemy.ext.asyncio import AsyncSession
6
+
7
+ from pulse_engine.config import Settings, get_settings
8
+ from pulse_engine.core.job_token import JobClaims
9
+ from pulse_engine.core.security import (
10
+ CognitoClaims,
11
+ CognitoTokenVerifier,
12
+ TokenVerifier,
13
+ extract_bearer_token,
14
+ )
15
+ from pulse_engine.services.opensearch import OpenSearchService
16
+ from pulse_engine.storage.knowledge_base import KnowledgeBaseService
17
+
18
+ if TYPE_CHECKING:
19
+ from pulse_engine.deployment.backends.registry import BackendRegistry
20
+ from pulse_engine.deployment.job_launcher import JobLauncher
21
+ from pulse_engine.deployment.service import DeploymentService
22
+ from pulse_engine.extractor.orchestrator.base import BaseOrchestratorAdapter
23
+ from pulse_engine.extractor.repository import JobRepository
24
+ from pulse_engine.extractor.service import JobService
25
+ from pulse_engine.processor.pipeline import ProcessingPipeline
26
+
27
+
28
+ def get_token_verifier(
29
+ settings: Settings = Depends(get_settings),
30
+ ) -> TokenVerifier:
31
+ return CognitoTokenVerifier(
32
+ jwks_url=settings.cognito_jwks_url,
33
+ issuer=settings.cognito_issuer,
34
+ audience=settings.cognito_app_client_id,
35
+ )
36
+
37
+
38
+ async def get_current_user(
39
+ request: Request,
40
+ verifier: TokenVerifier = Depends(get_token_verifier),
41
+ ) -> CognitoClaims | JobClaims:
42
+ token = extract_bearer_token(request.headers.get("authorization"))
43
+ return await verifier.verify(token)
44
+
45
+
46
+ def get_opensearch(request: Request) -> OpenSearchService:
47
+ return request.app.state.opensearch # type: ignore[no-any-return]
48
+
49
+
50
+ def get_knowledge_base(request: Request) -> KnowledgeBaseService:
51
+ return request.app.state.knowledge_base # type: ignore[no-any-return]
52
+
53
+
54
+ async def get_db_session(request: Request) -> AsyncIterator[AsyncSession]:
55
+ factory = request.app.state.db_session_factory
56
+ if factory is None:
57
+ from pulse_engine.core.exceptions import ServiceUnavailableError
58
+
59
+ raise ServiceUnavailableError("Database not configured")
60
+ async with factory() as session:
61
+ try:
62
+ yield session
63
+ await session.commit()
64
+ except Exception:
65
+ await session.rollback()
66
+ raise
67
+
68
+
69
+ def get_orchestrator(request: Request) -> "BaseOrchestratorAdapter":
70
+ return request.app.state.orchestrator_adapter # type: ignore[no-any-return]
71
+
72
+
73
+ def get_job_repository(
74
+ session: AsyncSession = Depends(get_db_session),
75
+ ) -> "JobRepository":
76
+ from pulse_engine.extractor.repository import JobRepository
77
+
78
+ return JobRepository(session)
79
+
80
+
81
+ def get_backend_registry(
82
+ request: Request,
83
+ settings: Settings = Depends(get_settings),
84
+ ) -> "BackendRegistry":
85
+ # Cache on app state to avoid recreating on each request
86
+ if not hasattr(request.app.state, "backend_registry"):
87
+ from pulse_engine.deployment.backends.registry import BackendRegistry
88
+
89
+ request.app.state.backend_registry = BackendRegistry(settings)
90
+ return request.app.state.backend_registry # type: ignore[no-any-return]
91
+
92
+
93
+ def get_job_launcher(
94
+ request: Request,
95
+ session: AsyncSession = Depends(get_db_session),
96
+ registry: "BackendRegistry" = Depends(get_backend_registry),
97
+ settings: Settings = Depends(get_settings),
98
+ ) -> "JobLauncher":
99
+ from pulse_engine.core.exceptions import ServiceUnavailableError
100
+ from pulse_engine.core.job_token import JobTokenIssuer
101
+ from pulse_engine.deployment.backend_deployment_repository import (
102
+ BackendDeploymentRepository,
103
+ )
104
+ from pulse_engine.deployment.job_launcher import JobLauncher
105
+ from pulse_engine.deployment.repository import RegistrationRepository
106
+
107
+ reg_repo = RegistrationRepository(session)
108
+ backend_repo = BackendDeploymentRepository(session)
109
+ if not settings.pulse_job_token_secret:
110
+ raise ServiceUnavailableError("PULSE_JOB_TOKEN_SECRET is not configured")
111
+ token_issuer = JobTokenIssuer(secret=settings.pulse_job_token_secret)
112
+ return JobLauncher(
113
+ registration_repo=reg_repo,
114
+ backend_deployment_repo=backend_repo,
115
+ registry=registry,
116
+ token_issuer=token_issuer,
117
+ settings=settings,
118
+ )
119
+
120
+
121
+ def get_job_service(
122
+ request: Request,
123
+ repo: "JobRepository" = Depends(get_job_repository),
124
+ orchestrator: "BaseOrchestratorAdapter" = Depends(get_orchestrator),
125
+ settings: Settings = Depends(get_settings),
126
+ session: AsyncSession = Depends(get_db_session),
127
+ job_launcher: "JobLauncher" = Depends(get_job_launcher),
128
+ ) -> "JobService":
129
+ from pulse_engine.extractor.service import JobService
130
+ from pulse_engine.extractor.stage_repository import StageRepository
131
+
132
+ stage_repo = StageRepository(session)
133
+ # NOTE: deployment_repository and token_issuer are intentionally NOT passed here.
134
+ # JobLauncher handles all triggering. The legacy path in _trigger_stage is
135
+ # disabled in production by omitting these args.
136
+ return JobService(
137
+ repository=repo,
138
+ orchestrator=orchestrator,
139
+ settings=settings,
140
+ stage_repository=stage_repo,
141
+ job_launcher=job_launcher,
142
+ )
143
+
144
+
145
+ def get_processing_pipeline(request: Request) -> "ProcessingPipeline":
146
+ from pulse_engine.processor.pipeline import ProcessingPipeline
147
+
148
+ kb = request.app.state.knowledge_base
149
+ return ProcessingPipeline(kb_service=kb)
150
+
151
+
152
+ def get_deployment_service(
153
+ session: AsyncSession = Depends(get_db_session),
154
+ ) -> "DeploymentService":
155
+ from pulse_engine.deployment.backend_deployment_repository import (
156
+ BackendDeploymentRepository,
157
+ )
158
+ from pulse_engine.deployment.repository import RegistrationRepository
159
+ from pulse_engine.deployment.service import DeploymentService
160
+
161
+ reg_repo = RegistrationRepository(session)
162
+ backend_repo = BackendDeploymentRepository(session)
163
+ return DeploymentService(
164
+ registration_repo=reg_repo,
165
+ backend_deployment_repo=backend_repo,
166
+ )
File without changes
@@ -0,0 +1,83 @@
1
+ """Data access layer for the product_deployments lazy cache table."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import UTC, datetime
6
+
7
+ import sqlalchemy as sa
8
+ from sqlalchemy.dialects.postgresql import insert as pg_insert
9
+ from sqlalchemy.ext.asyncio import AsyncSession
10
+
11
+ from pulse_engine.deployment.models import ProductBackendDeploymentModel
12
+
13
+
14
+ class BackendDeploymentRepository:
15
+ """CRUD for product_deployments — the per-backend Prefect deployment ID cache.
16
+
17
+ Owns all reads, writes, and deletes for this table.
18
+ """
19
+
20
+ def __init__(self, session: AsyncSession) -> None:
21
+ self._session = session
22
+
23
+ async def get(
24
+ self,
25
+ product: str,
26
+ stage: str,
27
+ orchestrator: str,
28
+ compute: str,
29
+ ) -> ProductBackendDeploymentModel | None:
30
+ stmt = sa.select(ProductBackendDeploymentModel).where(
31
+ ProductBackendDeploymentModel.product == product,
32
+ ProductBackendDeploymentModel.stage == stage,
33
+ ProductBackendDeploymentModel.orchestrator == orchestrator,
34
+ ProductBackendDeploymentModel.compute == compute,
35
+ )
36
+ result = await self._session.execute(stmt)
37
+ return result.scalar_one_or_none()
38
+
39
+ async def upsert(
40
+ self,
41
+ product: str,
42
+ stage: str,
43
+ orchestrator: str,
44
+ compute: str,
45
+ deployment_id: str,
46
+ ) -> None:
47
+ """Insert or update deployment_id using SQL-level ON CONFLICT DO UPDATE.
48
+
49
+ Safe under concurrent triggers — two concurrent callers both complete
50
+ without IntegrityError; the last write wins.
51
+ """
52
+ now = datetime.now(UTC)
53
+ stmt = (
54
+ pg_insert(ProductBackendDeploymentModel)
55
+ .values(
56
+ product=product,
57
+ stage=stage,
58
+ orchestrator=orchestrator,
59
+ compute=compute,
60
+ deployment_id=deployment_id,
61
+ created_at=now,
62
+ updated_at=now,
63
+ )
64
+ .on_conflict_do_update(
65
+ index_elements=["product", "stage", "orchestrator", "compute"],
66
+ set_={"deployment_id": deployment_id, "updated_at": now},
67
+ )
68
+ )
69
+ await self._session.execute(stmt)
70
+ await self._session.commit()
71
+
72
+ async def delete_by_product_stage(self, product: str, stage: str) -> None:
73
+ """Delete all cached deployment IDs for this product+stage.
74
+
75
+ Called by DeploymentService when a product re-registers with a new image,
76
+ so JobLauncher recreates Prefect deployments on the next trigger.
77
+ """
78
+ stmt = sa.delete(ProductBackendDeploymentModel).where(
79
+ ProductBackendDeploymentModel.product == product,
80
+ ProductBackendDeploymentModel.stage == stage,
81
+ )
82
+ await self._session.execute(stmt)
83
+ await self._session.commit()
File without changes
@@ -0,0 +1,50 @@
1
+ """Abstract base class for runner backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Any
7
+
8
+ from pulse_engine.extractor.orchestrator.base import OrchestratorRunStatus
9
+
10
+
11
+ class BaseRunnerBackend(ABC):
12
+ """One implementation per (orchestrator, compute) pair.
13
+
14
+ Responsible for: backend setup, run-unit registration,
15
+ run triggering, status querying, and cancellation.
16
+ """
17
+
18
+ @abstractmethod
19
+ async def prepare(self) -> None:
20
+ """One-time setup before triggering (e.g. work pool creation).
21
+ No-op for backends that don't require it."""
22
+
23
+ @abstractmethod
24
+ async def register(
25
+ self,
26
+ product: str,
27
+ stage: str,
28
+ image: str,
29
+ entrypoint: str | None = None,
30
+ ) -> str:
31
+ """Register or update a runnable unit for this product+stage.
32
+
33
+ Returns a stable handle the launcher caches.
34
+ - Prefect backends: Prefect deployment UUID
35
+ - Native Lambda: "{product}-{stage}" (function name by convention)
36
+
37
+ entrypoint is required for Prefect backends, ignored for native backends.
38
+ """
39
+
40
+ @abstractmethod
41
+ async def trigger(self, handle: str, parameters: dict[str, Any]) -> str:
42
+ """Trigger a run. Returns a run ID for correlation."""
43
+
44
+ @abstractmethod
45
+ async def get_run_status(self, run_id: str) -> OrchestratorRunStatus:
46
+ """Fetch current status of a run."""
47
+
48
+ @abstractmethod
49
+ async def cancel_run(self, run_id: str) -> bool:
50
+ """Request cancellation. Return True if accepted."""
@@ -0,0 +1,20 @@
1
+ """Exceptions for runner backends."""
2
+
3
+
4
+ class BackendNotAvailableError(Exception):
5
+ """Raised when a requested (orchestrator, compute) backend is not available."""
6
+
7
+ def __init__(
8
+ self,
9
+ orchestrator: str,
10
+ compute: str,
11
+ available: list[str],
12
+ ) -> None:
13
+ self.orchestrator = orchestrator
14
+ self.compute = compute
15
+ self.available = available
16
+ msg = (
17
+ f"Backend not available: orchestrator={orchestrator!r}, "
18
+ f"compute={compute!r}. Available: {', '.join(available)}"
19
+ )
20
+ super().__init__(msg)