pulse-engine 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. pulse_engine/__init__.py +0 -0
  2. pulse_engine/adapters/__init__.py +58 -0
  3. pulse_engine/adapters/audio_transcription.py +167 -0
  4. pulse_engine/adapters/batcher.py +36 -0
  5. pulse_engine/adapters/digital_news.py +128 -0
  6. pulse_engine/adapters/digital_news_metadata.py +536 -0
  7. pulse_engine/adapters/exceptions.py +10 -0
  8. pulse_engine/adapters/models.py +134 -0
  9. pulse_engine/adapters/opensearch_storage.py +160 -0
  10. pulse_engine/adapters/speech_content.py +130 -0
  11. pulse_engine/adapters/speech_metadata.py +374 -0
  12. pulse_engine/adapters/twitter.py +423 -0
  13. pulse_engine/adapters/youtube_downloader.py +186 -0
  14. pulse_engine/adapters/youtube_metadata.py +261 -0
  15. pulse_engine/api/__init__.py +0 -0
  16. pulse_engine/api/v1/__init__.py +0 -0
  17. pulse_engine/api/v1/auth.py +91 -0
  18. pulse_engine/api/v1/health.py +62 -0
  19. pulse_engine/api/v1/router.py +16 -0
  20. pulse_engine/chain_recovery.py +131 -0
  21. pulse_engine/cli/__init__.py +0 -0
  22. pulse_engine/cli/main.py +169 -0
  23. pulse_engine/cli/templates/cookiecutter.json +4 -0
  24. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
  25. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
  26. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
  27. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
  28. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
  29. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
  30. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
  31. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
  32. pulse_engine/client.py +95 -0
  33. pulse_engine/config.py +157 -0
  34. pulse_engine/core/__init__.py +0 -0
  35. pulse_engine/core/error_handlers.py +64 -0
  36. pulse_engine/core/exceptions.py +67 -0
  37. pulse_engine/core/job_token.py +109 -0
  38. pulse_engine/core/logging.py +45 -0
  39. pulse_engine/core/scope.py +23 -0
  40. pulse_engine/core/security.py +130 -0
  41. pulse_engine/database.py +30 -0
  42. pulse_engine/dependencies.py +166 -0
  43. pulse_engine/deployment/__init__.py +0 -0
  44. pulse_engine/deployment/backend_deployment_repository.py +83 -0
  45. pulse_engine/deployment/backends/__init__.py +0 -0
  46. pulse_engine/deployment/backends/base.py +50 -0
  47. pulse_engine/deployment/backends/exceptions.py +20 -0
  48. pulse_engine/deployment/backends/native_lambda.py +125 -0
  49. pulse_engine/deployment/backends/prefect_ecs.py +116 -0
  50. pulse_engine/deployment/backends/prefect_k8s.py +131 -0
  51. pulse_engine/deployment/backends/registry.py +50 -0
  52. pulse_engine/deployment/infra_provisioner.py +285 -0
  53. pulse_engine/deployment/job_launcher.py +178 -0
  54. pulse_engine/deployment/models.py +48 -0
  55. pulse_engine/deployment/repository.py +54 -0
  56. pulse_engine/deployment/router.py +22 -0
  57. pulse_engine/deployment/schemas.py +18 -0
  58. pulse_engine/deployment/service.py +65 -0
  59. pulse_engine/extractor/__init__.py +0 -0
  60. pulse_engine/extractor/adapters/__init__.py +0 -0
  61. pulse_engine/extractor/base.py +48 -0
  62. pulse_engine/extractor/models.py +50 -0
  63. pulse_engine/extractor/orchestrator/__init__.py +15 -0
  64. pulse_engine/extractor/orchestrator/base.py +34 -0
  65. pulse_engine/extractor/orchestrator/noop.py +37 -0
  66. pulse_engine/extractor/orchestrator/prefect.py +163 -0
  67. pulse_engine/extractor/repository.py +163 -0
  68. pulse_engine/extractor/router.py +102 -0
  69. pulse_engine/extractor/schemas.py +93 -0
  70. pulse_engine/extractor/service.py +431 -0
  71. pulse_engine/extractor/stage_models.py +36 -0
  72. pulse_engine/extractor/stage_repository.py +109 -0
  73. pulse_engine/main.py +195 -0
  74. pulse_engine/mcp/__init__.py +0 -0
  75. pulse_engine/mcp/__main__.py +5 -0
  76. pulse_engine/mcp/server.py +108 -0
  77. pulse_engine/mcp/tools_jobs.py +159 -0
  78. pulse_engine/mcp/tools_kb.py +88 -0
  79. pulse_engine/mcp/tools_modules.py +115 -0
  80. pulse_engine/mcp/tools_pipelines.py +215 -0
  81. pulse_engine/mcp/tools_processor.py +208 -0
  82. pulse_engine/middleware/__init__.py +0 -0
  83. pulse_engine/middleware/rate_limit.py +144 -0
  84. pulse_engine/middleware/request_id.py +16 -0
  85. pulse_engine/middleware/security_headers.py +25 -0
  86. pulse_engine/middleware/tenant.py +90 -0
  87. pulse_engine/pipeline/__init__.py +0 -0
  88. pulse_engine/pipeline/config_parser.py +148 -0
  89. pulse_engine/pipeline/expression.py +268 -0
  90. pulse_engine/pipeline/models.py +98 -0
  91. pulse_engine/pipeline/repositories.py +224 -0
  92. pulse_engine/pipeline/router_modules.py +66 -0
  93. pulse_engine/pipeline/router_pipelines.py +198 -0
  94. pulse_engine/pipeline/schemas.py +200 -0
  95. pulse_engine/pipeline/service.py +250 -0
  96. pulse_engine/pipeline/translators/__init__.py +44 -0
  97. pulse_engine/pipeline/translators/airflow_status.py +11 -0
  98. pulse_engine/pipeline/translators/airflow_translator.py +22 -0
  99. pulse_engine/pipeline/translators/base.py +42 -0
  100. pulse_engine/pipeline/translators/prefect_status.py +93 -0
  101. pulse_engine/pipeline/translators/prefect_translator.py +195 -0
  102. pulse_engine/processor/__init__.py +0 -0
  103. pulse_engine/processor/base.py +36 -0
  104. pulse_engine/processor/core/__init__.py +0 -0
  105. pulse_engine/processor/core/analysis.py +148 -0
  106. pulse_engine/processor/core/chunking.py +158 -0
  107. pulse_engine/processor/core/prompts.py +340 -0
  108. pulse_engine/processor/core/topic_splitter.py +105 -0
  109. pulse_engine/processor/defaults/__init__.py +11 -0
  110. pulse_engine/processor/defaults/core_processor.py +12 -0
  111. pulse_engine/processor/defaults/postprocessor.py +12 -0
  112. pulse_engine/processor/defaults/preprocessor.py +12 -0
  113. pulse_engine/processor/llm/__init__.py +0 -0
  114. pulse_engine/processor/llm/provider.py +58 -0
  115. pulse_engine/processor/ocr/gemini.py +52 -0
  116. pulse_engine/processor/pipeline.py +107 -0
  117. pulse_engine/processor/postprocessor/__init__.py +0 -0
  118. pulse_engine/processor/postprocessor/embeddings.py +34 -0
  119. pulse_engine/processor/postprocessor/tasks.py +180 -0
  120. pulse_engine/processor/preprocessor/__init__.py +0 -0
  121. pulse_engine/processor/preprocessor/tasks.py +71 -0
  122. pulse_engine/processor/router.py +192 -0
  123. pulse_engine/processor/schemas.py +167 -0
  124. pulse_engine/registry.py +117 -0
  125. pulse_engine/runners/__init__.py +0 -0
  126. pulse_engine/runners/lambda_runner.py +26 -0
  127. pulse_engine/runners/pipeline_runner.py +43 -0
  128. pulse_engine/runners/prefect_pipeline_flow.py +904 -0
  129. pulse_engine/runners/prefect_runner.py +33 -0
  130. pulse_engine/s3.py +72 -0
  131. pulse_engine/secrets.py +46 -0
  132. pulse_engine/services/__init__.py +0 -0
  133. pulse_engine/services/bootstrap.py +211 -0
  134. pulse_engine/services/opensearch.py +84 -0
  135. pulse_engine/storage/__init__.py +0 -0
  136. pulse_engine/storage/connectors/__init__.py +0 -0
  137. pulse_engine/storage/connectors/athena.py +226 -0
  138. pulse_engine/storage/connectors/base.py +32 -0
  139. pulse_engine/storage/connectors/opensearch.py +344 -0
  140. pulse_engine/storage/knowledge_base.py +68 -0
  141. pulse_engine/storage/router.py +78 -0
  142. pulse_engine/storage/schemas.py +93 -0
  143. pulse_engine/testing/__init__.py +13 -0
  144. pulse_engine/testing/fixtures.py +50 -0
  145. pulse_engine/testing/mocks.py +104 -0
  146. pulse_engine/worker.py +53 -0
  147. pulse_engine-0.2.0.dist-info/METADATA +654 -0
  148. pulse_engine-0.2.0.dist-info/RECORD +150 -0
  149. pulse_engine-0.2.0.dist-info/WHEEL +4 -0
  150. pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,285 @@
1
+ """Dynamic AWS infrastructure provisioner for pipeline step execution.
2
+
3
+ The engine auto-provisions ECS task definitions, Lambda functions, and ECR
4
+ repositories so that products can execute immediately after registration
5
+ without requiring manual Terraform runs for each new product/module.
6
+
7
+ Prerequisites (created by Terraform once):
8
+ - ECS cluster, execution role, task role, security group (pipeline_cluster module)
9
+ - Lambda execution role, security group (lambda_pipeline module)
10
+ - VPC with private subnets (networking module)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ from typing import Any
17
+
18
+ import boto3
19
+ from botocore.exceptions import ClientError
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class InfraProvisioner:
25
+ """Auto-provisions AWS infrastructure for pipeline step containers.
26
+
27
+ Uses IAM roles and networking created by Terraform. Creates the dynamic
28
+ resources (task definitions, Lambda functions, ECR repos) on demand.
29
+ """
30
+
31
+ def __init__(
32
+ self,
33
+ *,
34
+ region: str,
35
+ pipeline_cluster_name: str,
36
+ pipeline_execution_role_arn: str,
37
+ pipeline_task_role_arn: str,
38
+ pipeline_log_group: str,
39
+ pipeline_subnets: list[str],
40
+ pipeline_security_groups: list[str],
41
+ lambda_execution_role_arn: str,
42
+ lambda_subnets: list[str],
43
+ lambda_security_groups: list[str],
44
+ lambda_log_group: str,
45
+ ) -> None:
46
+ self._region = region
47
+ self._cluster = pipeline_cluster_name
48
+ self._exec_role_arn = pipeline_execution_role_arn
49
+ self._task_role_arn = pipeline_task_role_arn
50
+ self._log_group = pipeline_log_group
51
+ self._subnets = pipeline_subnets
52
+ self._security_groups = pipeline_security_groups
53
+ self._lambda_role_arn = lambda_execution_role_arn
54
+ self._lambda_subnets = lambda_subnets
55
+ self._lambda_sgs = lambda_security_groups
56
+ self._lambda_log_group = lambda_log_group
57
+
58
+ # Cache to avoid redundant API calls within same process lifetime
59
+ self._ecs_task_def_cache: dict[str, str] = {}
60
+ self._lambda_fn_cache: set[str] = set()
61
+ self._ecr_repo_cache: dict[str, str] = {}
62
+
63
+ # ------------------------------------------------------------------
64
+ # ECS Task Definition
65
+ # ------------------------------------------------------------------
66
+
67
+ def ensure_ecs_task_definition(
68
+ self,
69
+ family: str = "pulse-pipeline-step",
70
+ cpu: str = "256",
71
+ memory: str = "512",
72
+ image: str | None = None,
73
+ ) -> str:
74
+ """Create or update a Fargate task definition for pipeline steps.
75
+
76
+ When image is provided, registers a new revision with that image.
77
+ When image is None, reuses existing revision or creates one with
78
+ a placeholder image.
79
+
80
+ Returns the task definition ARN.
81
+ """
82
+ cache_key = f"{family}:{cpu}:{memory}:{image or 'default'}"
83
+ if cache_key in self._ecs_task_def_cache:
84
+ return self._ecs_task_def_cache[cache_key]
85
+
86
+ ecs = boto3.client("ecs", region_name=self._region)
87
+
88
+ # If no specific image requested, reuse existing task def
89
+ if image is None:
90
+ try:
91
+ resp = ecs.describe_task_definition(taskDefinition=family)
92
+ existing = resp["taskDefinition"]
93
+ result_arn = str(existing["taskDefinitionArn"])
94
+ logger.info(
95
+ "ECS task definition already exists: %s (rev %s)",
96
+ family,
97
+ existing["revision"],
98
+ )
99
+ self._ecs_task_def_cache[cache_key] = result_arn
100
+ return result_arn
101
+ except ClientError as e:
102
+ if e.response["Error"]["Code"] != "ClientException":
103
+ raise
104
+
105
+ # Register new task definition with the specified (or placeholder) image
106
+ container_def: dict[str, Any] = {
107
+ "name": "step",
108
+ "image": image or "amazon/amazon-ecs-sample",
109
+ "essential": True,
110
+ # Set entryPoint to "python -c" so that the ECS
111
+ # containerOverrides.command becomes the script body.
112
+ # This overrides any image ENTRYPOINT (e.g. Lambda images
113
+ # with awslambdaric) ensuring the pipeline runner script
114
+ # executes instead of the Lambda runtime.
115
+ "entryPoint": ["python", "-c"],
116
+ "command": ["print('task-definition placeholder')"],
117
+ "logConfiguration": {
118
+ "logDriver": "awslogs",
119
+ "options": {
120
+ "awslogs-group": self._log_group,
121
+ "awslogs-region": self._region,
122
+ "awslogs-stream-prefix": "step",
123
+ },
124
+ },
125
+ }
126
+
127
+ resp = ecs.register_task_definition(
128
+ family=family,
129
+ networkMode="awsvpc",
130
+ requiresCompatibilities=["FARGATE"],
131
+ cpu=cpu,
132
+ memory=memory,
133
+ executionRoleArn=self._exec_role_arn,
134
+ taskRoleArn=self._task_role_arn,
135
+ containerDefinitions=[container_def],
136
+ )
137
+
138
+ result_arn = str(resp["taskDefinition"]["taskDefinitionArn"])
139
+ logger.info("Registered ECS task definition: %s (image=%s)", result_arn, image)
140
+ self._ecs_task_def_cache[cache_key] = result_arn
141
+ return result_arn
142
+
143
+ # ------------------------------------------------------------------
144
+ # Lambda Function
145
+ # ------------------------------------------------------------------
146
+
147
+ def ensure_lambda_function(
148
+ self,
149
+ function_name: str,
150
+ image_uri: str,
151
+ timeout: int = 900,
152
+ memory_size: int = 3008,
153
+ ) -> str:
154
+ """Create or update a Lambda function backed by a container image.
155
+
156
+ Always waits for the function to be Active before returning.
157
+ Returns the function name.
158
+ """
159
+ if function_name in self._lambda_fn_cache:
160
+ return function_name
161
+
162
+ lam = boto3.client("lambda", region_name=self._region)
163
+
164
+ try:
165
+ lam.get_function(FunctionName=function_name)
166
+ # Function exists — update code to latest image
167
+ self._update_lambda_code(function_name, image_uri)
168
+ logger.info("Lambda function exists, updated code: %s", function_name)
169
+ except ClientError as e:
170
+ if e.response["Error"]["Code"] != "ResourceNotFoundException":
171
+ raise
172
+ # Create the function
173
+ lam.create_function(
174
+ FunctionName=function_name,
175
+ Role=self._lambda_role_arn,
176
+ Code={"ImageUri": image_uri},
177
+ PackageType="Image",
178
+ Timeout=timeout,
179
+ MemorySize=memory_size,
180
+ VpcConfig={
181
+ "SubnetIds": self._lambda_subnets,
182
+ "SecurityGroupIds": self._lambda_sgs,
183
+ },
184
+ Environment={"Variables": {}},
185
+ )
186
+ logger.info("Created Lambda function: %s", function_name)
187
+
188
+ # Always wait for function to be Active (handles both create and update)
189
+ waiter = lam.get_waiter("function_active_v2")
190
+ waiter.wait(FunctionName=function_name, WaiterConfig={"MaxAttempts": 60})
191
+ logger.info("Lambda function active: %s", function_name)
192
+
193
+ self._lambda_fn_cache.add(function_name)
194
+ return function_name
195
+
196
+ def _update_lambda_code(self, function_name: str, image_uri: str) -> None:
197
+ """Update Lambda function code to a new container image."""
198
+ lam = boto3.client("lambda", region_name=self._region)
199
+ try:
200
+ lam.update_function_code(FunctionName=function_name, ImageUri=image_uri)
201
+ except ClientError:
202
+ logger.warning(
203
+ "Failed to update Lambda code for %s", function_name, exc_info=True
204
+ )
205
+
206
+ # ------------------------------------------------------------------
207
+ # ECR Repository
208
+ # ------------------------------------------------------------------
209
+
210
+ def ensure_ecr_repository(self, repo_name: str) -> str:
211
+ """Create an ECR repository if it doesn't exist.
212
+
213
+ Returns the repository URI.
214
+ """
215
+ if repo_name in self._ecr_repo_cache:
216
+ return self._ecr_repo_cache[repo_name]
217
+
218
+ ecr = boto3.client("ecr", region_name=self._region)
219
+
220
+ lifecycle_policy = (
221
+ '{"rules":[{"rulePriority":1,"description":"Keep last 30",'
222
+ '"selection":{"tagStatus":"any","countType":"imageCountMoreThan",'
223
+ '"countNumber":30},"action":{"type":"expire"}}]}'
224
+ )
225
+
226
+ try:
227
+ resp = ecr.describe_repositories(repositoryNames=[repo_name])
228
+ uri = str(resp["repositories"][0]["repositoryUri"])
229
+ except ClientError as e:
230
+ if e.response["Error"]["Code"] != "RepositoryNotFoundException":
231
+ raise
232
+ resp = ecr.create_repository(
233
+ repositoryName=repo_name,
234
+ imageScanningConfiguration={"scanOnPush": True},
235
+ imageTagMutability="MUTABLE",
236
+ )
237
+ uri = str(resp["repository"]["repositoryUri"])
238
+ ecr.put_lifecycle_policy(
239
+ repositoryName=repo_name,
240
+ lifecyclePolicyText=lifecycle_policy,
241
+ )
242
+ logger.info("Created ECR repository: %s (%s)", repo_name, uri)
243
+
244
+ self._ecr_repo_cache[repo_name] = uri
245
+ return uri
246
+
247
+ # ------------------------------------------------------------------
248
+ # Convenience: provision everything a module needs
249
+ # ------------------------------------------------------------------
250
+
251
+ def provision_for_ecs_module(
252
+ self,
253
+ product: str,
254
+ module: str,
255
+ task_def_family: str = "pulse-pipeline-step",
256
+ ) -> dict[str, str]:
257
+ """Ensure all ECS infrastructure exists for a pipeline module.
258
+
259
+ Returns dict with cluster, task_definition, subnets, security_groups.
260
+ """
261
+ task_def_arn = self.ensure_ecs_task_definition(family=task_def_family)
262
+ ecr_repo = self.ensure_ecr_repository(f"{product}-{module}")
263
+
264
+ return {
265
+ "cluster": self._cluster,
266
+ "task_definition": task_def_arn,
267
+ "subnets": ",".join(self._subnets),
268
+ "security_groups": ",".join(self._security_groups),
269
+ "ecr_repository": ecr_repo,
270
+ }
271
+
272
+ def provision_for_lambda_module(
273
+ self,
274
+ product: str,
275
+ module: str,
276
+ image_uri: str,
277
+ ) -> dict[str, str]:
278
+ """Ensure Lambda function exists for a pipeline module.
279
+
280
+ Returns dict with function_name.
281
+ """
282
+ func_name = f"{product}-{module}"
283
+ self.ensure_lambda_function(func_name, image_uri)
284
+
285
+ return {"function_name": func_name}
@@ -0,0 +1,178 @@
1
+ """Central dispatch for job triggering across all runner backends."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ import structlog
9
+
10
+ from pulse_engine.core.exceptions import (
11
+ BadRequestError,
12
+ NotFoundError,
13
+ UnprocessableEntityError,
14
+ )
15
+ from pulse_engine.deployment.backends.exceptions import BackendNotAvailableError
16
+
17
+ if TYPE_CHECKING:
18
+ from pulse_engine.config import Settings
19
+ from pulse_engine.core.job_token import JobTokenIssuer
20
+ from pulse_engine.deployment.backend_deployment_repository import (
21
+ BackendDeploymentRepository,
22
+ )
23
+ from pulse_engine.deployment.backends.registry import BackendRegistry
24
+ from pulse_engine.deployment.repository import RegistrationRepository
25
+
26
+ logger = structlog.get_logger(__name__)
27
+
28
+ # The engine-owned Prefect flow entrypoint. All product containers must expose
29
+ # entrypoint:run as a plain function; this wrapper (shipped with the engine)
30
+ # decorates it with @flow so product code stays Prefect-free.
31
+ PULSE_FLOW_ENTRYPOINT = "pulse_engine.runners.prefect_runner:main"
32
+
33
+ _STAGE_NAME_MAP = {
34
+ "extractor": "extraction",
35
+ "processor": "processing",
36
+ "storage": "storage",
37
+ }
38
+
39
+
40
+ @dataclass
41
+ class LaunchResult:
42
+ flow_run_id: str
43
+ token: str
44
+
45
+
46
+ class JobLauncher:
47
+ """Orchestrates the full job trigger sequence.
48
+
49
+ Steps:
50
+ 1. Look up product registration -> image
51
+ 2. Resolve backend from registry
52
+ 3. prepare() — work pool setup or no-op
53
+ 4. register() — upsert backend run unit (cached)
54
+ 5. Issue job-scoped JWT
55
+ 6. trigger() — fire the run, return run ID
56
+ 7. Return LaunchResult
57
+ """
58
+
59
+ def __init__(
60
+ self,
61
+ registration_repo: RegistrationRepository,
62
+ backend_deployment_repo: BackendDeploymentRepository,
63
+ registry: BackendRegistry,
64
+ token_issuer: JobTokenIssuer,
65
+ settings: Settings,
66
+ ) -> None:
67
+ self._reg_repo = registration_repo
68
+ self._backend_repo = backend_deployment_repo
69
+ self._registry = registry
70
+ self._token_issuer = token_issuer
71
+ self._settings = settings
72
+
73
+ async def launch(
74
+ self,
75
+ job_id: str,
76
+ tenant_id: str,
77
+ product: str,
78
+ stage: str,
79
+ orchestrator: str,
80
+ compute: str,
81
+ chain: bool,
82
+ config: dict[str, Any],
83
+ ) -> LaunchResult:
84
+ # 1. Look up registration
85
+ registration = await self._reg_repo.get(product, stage)
86
+ if registration is None:
87
+ raise NotFoundError(
88
+ f"No deployment registered for {product}/{stage}",
89
+ product=product,
90
+ stage=stage,
91
+ )
92
+
93
+ # 2. Resolve backend (raises 422 if unsupported)
94
+ try:
95
+ backend = self._registry.get(orchestrator, compute)
96
+ except BackendNotAvailableError as exc:
97
+ logger.warning(
98
+ "backend_not_available",
99
+ orchestrator=orchestrator,
100
+ compute=compute,
101
+ exc_info=True,
102
+ )
103
+ raise UnprocessableEntityError(
104
+ "Unsupported orchestrator/compute combination"
105
+ ) from exc
106
+
107
+ # 3. prepare() — idempotent setup (work pool creation, no-op for native)
108
+ await backend.prepare()
109
+
110
+ # 4. register() — cached; Prefect gets image + entrypoint, native Lambda
111
+ # derives function name from product+stage convention.
112
+ # NOTE: Concurrent callers may both see a cache miss and both call register().
113
+ # This is safe: upsert() uses ON CONFLICT DO UPDATE, and register()
114
+ # is idempotent.
115
+ cached = await self._backend_repo.get(product, stage, orchestrator, compute)
116
+ if cached:
117
+ handle = cached.deployment_id
118
+ else:
119
+ handle = await backend.register(
120
+ product=product,
121
+ stage=stage,
122
+ image=registration.image,
123
+ entrypoint=PULSE_FLOW_ENTRYPOINT,
124
+ )
125
+ await self._backend_repo.upsert(
126
+ product=product,
127
+ stage=stage,
128
+ orchestrator=orchestrator,
129
+ compute=compute,
130
+ deployment_id=handle,
131
+ )
132
+
133
+ # 5. Issue job-scoped JWT
134
+ stage_name = _STAGE_NAME_MAP.get(stage)
135
+ if stage_name is None:
136
+ raise BadRequestError(
137
+ f"Unknown stage: {stage!r}. Must be one of: {list(_STAGE_NAME_MAP)}"
138
+ )
139
+ scope = ["jobs:status"]
140
+ if stage == "storage":
141
+ scope.append("kb:write")
142
+ if chain:
143
+ scope.append("jobs:trigger_next")
144
+
145
+ token = self._token_issuer.issue(
146
+ job_id=job_id,
147
+ tenant_id=tenant_id,
148
+ product=product,
149
+ stage=stage_name,
150
+ scope=scope,
151
+ orchestrator=orchestrator,
152
+ compute=compute,
153
+ )
154
+
155
+ # 6. trigger() — returns run ID (UUID for native Lambda, Prefect flow
156
+ # run UUID for Prefect)
157
+ run_id = await backend.trigger(
158
+ handle,
159
+ {
160
+ "job_id": job_id,
161
+ "chain": chain,
162
+ "config": config,
163
+ "pulse_api_token": token,
164
+ "pulse_engine_url": self._settings.pulse_engine_url,
165
+ },
166
+ )
167
+
168
+ logger.info(
169
+ "job_launched",
170
+ job_id=job_id,
171
+ product=product,
172
+ stage=stage_name,
173
+ orchestrator=orchestrator,
174
+ compute=compute,
175
+ run_id=run_id,
176
+ )
177
+
178
+ return LaunchResult(flow_run_id=run_id, token=token)
@@ -0,0 +1,48 @@
1
+ """ORM models for product registrations and backend deployment cache."""
2
+
3
+ from datetime import datetime
4
+
5
+ import sqlalchemy as sa
6
+ from sqlalchemy.orm import Mapped, mapped_column
7
+
8
+ from pulse_engine.database import Base
9
+
10
+
11
+ class ProductRegistrationModel(Base):
12
+ """Stores product stage images registered via POST /api/v1/deployments."""
13
+
14
+ __tablename__ = "product_registrations"
15
+
16
+ product: Mapped[str] = mapped_column(sa.String(100), primary_key=True)
17
+ stage: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
18
+ image: Mapped[str] = mapped_column(sa.String(500), nullable=False)
19
+ registered_at: Mapped[datetime] = mapped_column(
20
+ sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False
21
+ )
22
+ updated_at: Mapped[datetime] = mapped_column(
23
+ sa.DateTime(timezone=True),
24
+ server_default=sa.func.now(),
25
+ onupdate=sa.func.now(),
26
+ nullable=False,
27
+ )
28
+
29
+
30
+ class ProductBackendDeploymentModel(Base):
31
+ """Lazy cache: Prefect deployment IDs per deployment dimension tuple."""
32
+
33
+ __tablename__ = "product_deployments"
34
+
35
+ product: Mapped[str] = mapped_column(sa.String(100), primary_key=True)
36
+ stage: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
37
+ orchestrator: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
38
+ compute: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
39
+ deployment_id: Mapped[str] = mapped_column(sa.String(255), nullable=False)
40
+ created_at: Mapped[datetime] = mapped_column(
41
+ sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False
42
+ )
43
+ updated_at: Mapped[datetime] = mapped_column(
44
+ sa.DateTime(timezone=True),
45
+ server_default=sa.func.now(),
46
+ onupdate=sa.func.now(),
47
+ nullable=False,
48
+ )
@@ -0,0 +1,54 @@
1
+ """Data access layer for product registrations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sqlalchemy as sa
6
+ from sqlalchemy.ext.asyncio import AsyncSession
7
+
8
+ from pulse_engine.deployment.models import ProductRegistrationModel
9
+
10
+
11
+ class RegistrationRepository:
12
+ """CRUD for product_registrations table."""
13
+
14
+ def __init__(self, session: AsyncSession) -> None:
15
+ self._session = session
16
+
17
+ async def upsert(
18
+ self,
19
+ product: str,
20
+ stage: str,
21
+ image: str,
22
+ ) -> ProductRegistrationModel:
23
+ existing = await self.get(product, stage)
24
+ if existing:
25
+ existing.image = image
26
+ await self._session.commit()
27
+ await self._session.refresh(existing)
28
+ return existing
29
+ record = ProductRegistrationModel(
30
+ product=product,
31
+ stage=stage,
32
+ image=image,
33
+ )
34
+ self._session.add(record)
35
+ await self._session.commit()
36
+ await self._session.refresh(record)
37
+ return record
38
+
39
+ async def get(self, product: str, stage: str) -> ProductRegistrationModel | None:
40
+ stmt = sa.select(ProductRegistrationModel).where(
41
+ ProductRegistrationModel.product == product,
42
+ ProductRegistrationModel.stage == stage,
43
+ )
44
+ result = await self._session.execute(stmt)
45
+ return result.scalar_one_or_none()
46
+
47
+ async def list_by_product(self, product: str) -> list[ProductRegistrationModel]:
48
+ stmt = (
49
+ sa.select(ProductRegistrationModel)
50
+ .where(ProductRegistrationModel.product == product)
51
+ .order_by(ProductRegistrationModel.stage)
52
+ )
53
+ result = await self._session.execute(stmt)
54
+ return list(result.scalars().all())
@@ -0,0 +1,22 @@
1
+ """Deployments API router."""
2
+
3
+ from fastapi import APIRouter, Depends
4
+
5
+ from pulse_engine.dependencies import get_deployment_service
6
+ from pulse_engine.deployment.schemas import (
7
+ RegisterDeploymentRequest,
8
+ RegisterDeploymentResponse,
9
+ )
10
+ from pulse_engine.deployment.service import DeploymentService
11
+ from pulse_engine.middleware.tenant import get_tenant_id
12
+
13
+ router = APIRouter(prefix="/deployments", tags=["Deployments"])
14
+
15
+
16
+ @router.post("/", response_model=RegisterDeploymentResponse, status_code=201)
17
+ async def register_deployment(
18
+ body: RegisterDeploymentRequest,
19
+ tenant_id: str = Depends(get_tenant_id),
20
+ service: DeploymentService = Depends(get_deployment_service),
21
+ ) -> RegisterDeploymentResponse:
22
+ return await service.register(body)
@@ -0,0 +1,18 @@
1
+ # src/pulse_engine/deployment/schemas.py
2
+ """Pydantic schemas for the Deployments API."""
3
+
4
+ from pydantic import BaseModel
5
+
6
+
7
+ class StageDefinition(BaseModel):
8
+ image: str # ECR URI
9
+
10
+
11
+ class RegisterDeploymentRequest(BaseModel):
12
+ product: str
13
+ stages: dict[str, StageDefinition] # key: "extractor" | "processor" | "storage"
14
+
15
+
16
+ class RegisterDeploymentResponse(BaseModel):
17
+ product: str
18
+ stages: dict[str, dict[str, str]] # {"extractor": {"status": "registered"}, ...}
@@ -0,0 +1,65 @@
1
+ # src/pulse_engine/deployment/service.py
2
+ """Business logic for registering product stage deployments."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import TYPE_CHECKING
7
+
8
+ import structlog
9
+
10
+ from pulse_engine.deployment.schemas import (
11
+ RegisterDeploymentRequest,
12
+ RegisterDeploymentResponse,
13
+ )
14
+
15
+ if TYPE_CHECKING:
16
+ from pulse_engine.deployment.backend_deployment_repository import (
17
+ BackendDeploymentRepository,
18
+ )
19
+ from pulse_engine.deployment.repository import RegistrationRepository
20
+
21
+ logger = structlog.get_logger(__name__)
22
+
23
+
24
+ class DeploymentService:
25
+ """Stores product stage image registrations. Does not create Prefect deployments.
26
+
27
+ Prefect deployments are created lazily by JobLauncher on first trigger.
28
+ When a product re-registers, all cached backend deployment IDs for that
29
+ product+stage are invalidated so JobLauncher recreates them with the new image.
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ registration_repo: RegistrationRepository,
35
+ backend_deployment_repo: BackendDeploymentRepository,
36
+ ) -> None:
37
+ self._repo = registration_repo
38
+ self._backend_repo = backend_deployment_repo
39
+
40
+ async def register(
41
+ self, request: RegisterDeploymentRequest
42
+ ) -> RegisterDeploymentResponse:
43
+ result_stages: dict[str, dict[str, str]] = {}
44
+
45
+ for stage_name, stage_def in request.stages.items():
46
+ await self._repo.upsert(
47
+ product=request.product,
48
+ stage=stage_name,
49
+ image=stage_def.image,
50
+ )
51
+ # Invalidate cached Prefect deployment IDs — image may have changed
52
+ await self._backend_repo.delete_by_product_stage(
53
+ request.product, stage_name
54
+ )
55
+ result_stages[stage_name] = {"status": "registered"}
56
+ logger.info(
57
+ "stage_registration_stored",
58
+ product=request.product,
59
+ stage=stage_name,
60
+ )
61
+
62
+ return RegisterDeploymentResponse(
63
+ product=request.product,
64
+ stages=result_stages,
65
+ )
File without changes
File without changes