pulse-engine 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulse_engine/__init__.py +0 -0
- pulse_engine/adapters/__init__.py +58 -0
- pulse_engine/adapters/audio_transcription.py +167 -0
- pulse_engine/adapters/batcher.py +36 -0
- pulse_engine/adapters/digital_news.py +128 -0
- pulse_engine/adapters/digital_news_metadata.py +536 -0
- pulse_engine/adapters/exceptions.py +10 -0
- pulse_engine/adapters/models.py +134 -0
- pulse_engine/adapters/opensearch_storage.py +160 -0
- pulse_engine/adapters/speech_content.py +130 -0
- pulse_engine/adapters/speech_metadata.py +374 -0
- pulse_engine/adapters/twitter.py +423 -0
- pulse_engine/adapters/youtube_downloader.py +186 -0
- pulse_engine/adapters/youtube_metadata.py +261 -0
- pulse_engine/api/__init__.py +0 -0
- pulse_engine/api/v1/__init__.py +0 -0
- pulse_engine/api/v1/auth.py +91 -0
- pulse_engine/api/v1/health.py +62 -0
- pulse_engine/api/v1/router.py +16 -0
- pulse_engine/chain_recovery.py +131 -0
- pulse_engine/cli/__init__.py +0 -0
- pulse_engine/cli/main.py +169 -0
- pulse_engine/cli/templates/cookiecutter.json +4 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
- pulse_engine/client.py +95 -0
- pulse_engine/config.py +157 -0
- pulse_engine/core/__init__.py +0 -0
- pulse_engine/core/error_handlers.py +64 -0
- pulse_engine/core/exceptions.py +67 -0
- pulse_engine/core/job_token.py +109 -0
- pulse_engine/core/logging.py +45 -0
- pulse_engine/core/scope.py +23 -0
- pulse_engine/core/security.py +130 -0
- pulse_engine/database.py +30 -0
- pulse_engine/dependencies.py +166 -0
- pulse_engine/deployment/__init__.py +0 -0
- pulse_engine/deployment/backend_deployment_repository.py +83 -0
- pulse_engine/deployment/backends/__init__.py +0 -0
- pulse_engine/deployment/backends/base.py +50 -0
- pulse_engine/deployment/backends/exceptions.py +20 -0
- pulse_engine/deployment/backends/native_lambda.py +125 -0
- pulse_engine/deployment/backends/prefect_ecs.py +116 -0
- pulse_engine/deployment/backends/prefect_k8s.py +131 -0
- pulse_engine/deployment/backends/registry.py +50 -0
- pulse_engine/deployment/infra_provisioner.py +285 -0
- pulse_engine/deployment/job_launcher.py +178 -0
- pulse_engine/deployment/models.py +48 -0
- pulse_engine/deployment/repository.py +54 -0
- pulse_engine/deployment/router.py +22 -0
- pulse_engine/deployment/schemas.py +18 -0
- pulse_engine/deployment/service.py +65 -0
- pulse_engine/extractor/__init__.py +0 -0
- pulse_engine/extractor/adapters/__init__.py +0 -0
- pulse_engine/extractor/base.py +48 -0
- pulse_engine/extractor/models.py +50 -0
- pulse_engine/extractor/orchestrator/__init__.py +15 -0
- pulse_engine/extractor/orchestrator/base.py +34 -0
- pulse_engine/extractor/orchestrator/noop.py +37 -0
- pulse_engine/extractor/orchestrator/prefect.py +163 -0
- pulse_engine/extractor/repository.py +163 -0
- pulse_engine/extractor/router.py +102 -0
- pulse_engine/extractor/schemas.py +93 -0
- pulse_engine/extractor/service.py +431 -0
- pulse_engine/extractor/stage_models.py +36 -0
- pulse_engine/extractor/stage_repository.py +109 -0
- pulse_engine/main.py +195 -0
- pulse_engine/mcp/__init__.py +0 -0
- pulse_engine/mcp/__main__.py +5 -0
- pulse_engine/mcp/server.py +108 -0
- pulse_engine/mcp/tools_jobs.py +159 -0
- pulse_engine/mcp/tools_kb.py +88 -0
- pulse_engine/mcp/tools_modules.py +115 -0
- pulse_engine/mcp/tools_pipelines.py +215 -0
- pulse_engine/mcp/tools_processor.py +208 -0
- pulse_engine/middleware/__init__.py +0 -0
- pulse_engine/middleware/rate_limit.py +144 -0
- pulse_engine/middleware/request_id.py +16 -0
- pulse_engine/middleware/security_headers.py +25 -0
- pulse_engine/middleware/tenant.py +90 -0
- pulse_engine/pipeline/__init__.py +0 -0
- pulse_engine/pipeline/config_parser.py +148 -0
- pulse_engine/pipeline/expression.py +268 -0
- pulse_engine/pipeline/models.py +98 -0
- pulse_engine/pipeline/repositories.py +224 -0
- pulse_engine/pipeline/router_modules.py +66 -0
- pulse_engine/pipeline/router_pipelines.py +198 -0
- pulse_engine/pipeline/schemas.py +200 -0
- pulse_engine/pipeline/service.py +250 -0
- pulse_engine/pipeline/translators/__init__.py +44 -0
- pulse_engine/pipeline/translators/airflow_status.py +11 -0
- pulse_engine/pipeline/translators/airflow_translator.py +22 -0
- pulse_engine/pipeline/translators/base.py +42 -0
- pulse_engine/pipeline/translators/prefect_status.py +93 -0
- pulse_engine/pipeline/translators/prefect_translator.py +195 -0
- pulse_engine/processor/__init__.py +0 -0
- pulse_engine/processor/base.py +36 -0
- pulse_engine/processor/core/__init__.py +0 -0
- pulse_engine/processor/core/analysis.py +148 -0
- pulse_engine/processor/core/chunking.py +158 -0
- pulse_engine/processor/core/prompts.py +340 -0
- pulse_engine/processor/core/topic_splitter.py +105 -0
- pulse_engine/processor/defaults/__init__.py +11 -0
- pulse_engine/processor/defaults/core_processor.py +12 -0
- pulse_engine/processor/defaults/postprocessor.py +12 -0
- pulse_engine/processor/defaults/preprocessor.py +12 -0
- pulse_engine/processor/llm/__init__.py +0 -0
- pulse_engine/processor/llm/provider.py +58 -0
- pulse_engine/processor/ocr/gemini.py +52 -0
- pulse_engine/processor/pipeline.py +107 -0
- pulse_engine/processor/postprocessor/__init__.py +0 -0
- pulse_engine/processor/postprocessor/embeddings.py +34 -0
- pulse_engine/processor/postprocessor/tasks.py +180 -0
- pulse_engine/processor/preprocessor/__init__.py +0 -0
- pulse_engine/processor/preprocessor/tasks.py +71 -0
- pulse_engine/processor/router.py +192 -0
- pulse_engine/processor/schemas.py +167 -0
- pulse_engine/registry.py +117 -0
- pulse_engine/runners/__init__.py +0 -0
- pulse_engine/runners/lambda_runner.py +26 -0
- pulse_engine/runners/pipeline_runner.py +43 -0
- pulse_engine/runners/prefect_pipeline_flow.py +904 -0
- pulse_engine/runners/prefect_runner.py +33 -0
- pulse_engine/s3.py +72 -0
- pulse_engine/secrets.py +46 -0
- pulse_engine/services/__init__.py +0 -0
- pulse_engine/services/bootstrap.py +211 -0
- pulse_engine/services/opensearch.py +84 -0
- pulse_engine/storage/__init__.py +0 -0
- pulse_engine/storage/connectors/__init__.py +0 -0
- pulse_engine/storage/connectors/athena.py +226 -0
- pulse_engine/storage/connectors/base.py +32 -0
- pulse_engine/storage/connectors/opensearch.py +344 -0
- pulse_engine/storage/knowledge_base.py +68 -0
- pulse_engine/storage/router.py +78 -0
- pulse_engine/storage/schemas.py +93 -0
- pulse_engine/testing/__init__.py +13 -0
- pulse_engine/testing/fixtures.py +50 -0
- pulse_engine/testing/mocks.py +104 -0
- pulse_engine/worker.py +53 -0
- pulse_engine-0.2.0.dist-info/METADATA +654 -0
- pulse_engine-0.2.0.dist-info/RECORD +150 -0
- pulse_engine-0.2.0.dist-info/WHEEL +4 -0
- pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""Dynamic AWS infrastructure provisioner for pipeline step execution.
|
|
2
|
+
|
|
3
|
+
The engine auto-provisions ECS task definitions, Lambda functions, and ECR
|
|
4
|
+
repositories so that products can execute immediately after registration
|
|
5
|
+
without requiring manual Terraform runs for each new product/module.
|
|
6
|
+
|
|
7
|
+
Prerequisites (created by Terraform once):
|
|
8
|
+
- ECS cluster, execution role, task role, security group (pipeline_cluster module)
|
|
9
|
+
- Lambda execution role, security group (lambda_pipeline module)
|
|
10
|
+
- VPC with private subnets (networking module)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
import boto3
|
|
19
|
+
from botocore.exceptions import ClientError
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class InfraProvisioner:
|
|
25
|
+
"""Auto-provisions AWS infrastructure for pipeline step containers.
|
|
26
|
+
|
|
27
|
+
Uses IAM roles and networking created by Terraform. Creates the dynamic
|
|
28
|
+
resources (task definitions, Lambda functions, ECR repos) on demand.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
*,
|
|
34
|
+
region: str,
|
|
35
|
+
pipeline_cluster_name: str,
|
|
36
|
+
pipeline_execution_role_arn: str,
|
|
37
|
+
pipeline_task_role_arn: str,
|
|
38
|
+
pipeline_log_group: str,
|
|
39
|
+
pipeline_subnets: list[str],
|
|
40
|
+
pipeline_security_groups: list[str],
|
|
41
|
+
lambda_execution_role_arn: str,
|
|
42
|
+
lambda_subnets: list[str],
|
|
43
|
+
lambda_security_groups: list[str],
|
|
44
|
+
lambda_log_group: str,
|
|
45
|
+
) -> None:
|
|
46
|
+
self._region = region
|
|
47
|
+
self._cluster = pipeline_cluster_name
|
|
48
|
+
self._exec_role_arn = pipeline_execution_role_arn
|
|
49
|
+
self._task_role_arn = pipeline_task_role_arn
|
|
50
|
+
self._log_group = pipeline_log_group
|
|
51
|
+
self._subnets = pipeline_subnets
|
|
52
|
+
self._security_groups = pipeline_security_groups
|
|
53
|
+
self._lambda_role_arn = lambda_execution_role_arn
|
|
54
|
+
self._lambda_subnets = lambda_subnets
|
|
55
|
+
self._lambda_sgs = lambda_security_groups
|
|
56
|
+
self._lambda_log_group = lambda_log_group
|
|
57
|
+
|
|
58
|
+
# Cache to avoid redundant API calls within same process lifetime
|
|
59
|
+
self._ecs_task_def_cache: dict[str, str] = {}
|
|
60
|
+
self._lambda_fn_cache: set[str] = set()
|
|
61
|
+
self._ecr_repo_cache: dict[str, str] = {}
|
|
62
|
+
|
|
63
|
+
# ------------------------------------------------------------------
|
|
64
|
+
# ECS Task Definition
|
|
65
|
+
# ------------------------------------------------------------------
|
|
66
|
+
|
|
67
|
+
def ensure_ecs_task_definition(
|
|
68
|
+
self,
|
|
69
|
+
family: str = "pulse-pipeline-step",
|
|
70
|
+
cpu: str = "256",
|
|
71
|
+
memory: str = "512",
|
|
72
|
+
image: str | None = None,
|
|
73
|
+
) -> str:
|
|
74
|
+
"""Create or update a Fargate task definition for pipeline steps.
|
|
75
|
+
|
|
76
|
+
When image is provided, registers a new revision with that image.
|
|
77
|
+
When image is None, reuses existing revision or creates one with
|
|
78
|
+
a placeholder image.
|
|
79
|
+
|
|
80
|
+
Returns the task definition ARN.
|
|
81
|
+
"""
|
|
82
|
+
cache_key = f"{family}:{cpu}:{memory}:{image or 'default'}"
|
|
83
|
+
if cache_key in self._ecs_task_def_cache:
|
|
84
|
+
return self._ecs_task_def_cache[cache_key]
|
|
85
|
+
|
|
86
|
+
ecs = boto3.client("ecs", region_name=self._region)
|
|
87
|
+
|
|
88
|
+
# If no specific image requested, reuse existing task def
|
|
89
|
+
if image is None:
|
|
90
|
+
try:
|
|
91
|
+
resp = ecs.describe_task_definition(taskDefinition=family)
|
|
92
|
+
existing = resp["taskDefinition"]
|
|
93
|
+
result_arn = str(existing["taskDefinitionArn"])
|
|
94
|
+
logger.info(
|
|
95
|
+
"ECS task definition already exists: %s (rev %s)",
|
|
96
|
+
family,
|
|
97
|
+
existing["revision"],
|
|
98
|
+
)
|
|
99
|
+
self._ecs_task_def_cache[cache_key] = result_arn
|
|
100
|
+
return result_arn
|
|
101
|
+
except ClientError as e:
|
|
102
|
+
if e.response["Error"]["Code"] != "ClientException":
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
# Register new task definition with the specified (or placeholder) image
|
|
106
|
+
container_def: dict[str, Any] = {
|
|
107
|
+
"name": "step",
|
|
108
|
+
"image": image or "amazon/amazon-ecs-sample",
|
|
109
|
+
"essential": True,
|
|
110
|
+
# Set entryPoint to "python -c" so that the ECS
|
|
111
|
+
# containerOverrides.command becomes the script body.
|
|
112
|
+
# This overrides any image ENTRYPOINT (e.g. Lambda images
|
|
113
|
+
# with awslambdaric) ensuring the pipeline runner script
|
|
114
|
+
# executes instead of the Lambda runtime.
|
|
115
|
+
"entryPoint": ["python", "-c"],
|
|
116
|
+
"command": ["print('task-definition placeholder')"],
|
|
117
|
+
"logConfiguration": {
|
|
118
|
+
"logDriver": "awslogs",
|
|
119
|
+
"options": {
|
|
120
|
+
"awslogs-group": self._log_group,
|
|
121
|
+
"awslogs-region": self._region,
|
|
122
|
+
"awslogs-stream-prefix": "step",
|
|
123
|
+
},
|
|
124
|
+
},
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
resp = ecs.register_task_definition(
|
|
128
|
+
family=family,
|
|
129
|
+
networkMode="awsvpc",
|
|
130
|
+
requiresCompatibilities=["FARGATE"],
|
|
131
|
+
cpu=cpu,
|
|
132
|
+
memory=memory,
|
|
133
|
+
executionRoleArn=self._exec_role_arn,
|
|
134
|
+
taskRoleArn=self._task_role_arn,
|
|
135
|
+
containerDefinitions=[container_def],
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
result_arn = str(resp["taskDefinition"]["taskDefinitionArn"])
|
|
139
|
+
logger.info("Registered ECS task definition: %s (image=%s)", result_arn, image)
|
|
140
|
+
self._ecs_task_def_cache[cache_key] = result_arn
|
|
141
|
+
return result_arn
|
|
142
|
+
|
|
143
|
+
# ------------------------------------------------------------------
|
|
144
|
+
# Lambda Function
|
|
145
|
+
# ------------------------------------------------------------------
|
|
146
|
+
|
|
147
|
+
def ensure_lambda_function(
|
|
148
|
+
self,
|
|
149
|
+
function_name: str,
|
|
150
|
+
image_uri: str,
|
|
151
|
+
timeout: int = 900,
|
|
152
|
+
memory_size: int = 3008,
|
|
153
|
+
) -> str:
|
|
154
|
+
"""Create or update a Lambda function backed by a container image.
|
|
155
|
+
|
|
156
|
+
Always waits for the function to be Active before returning.
|
|
157
|
+
Returns the function name.
|
|
158
|
+
"""
|
|
159
|
+
if function_name in self._lambda_fn_cache:
|
|
160
|
+
return function_name
|
|
161
|
+
|
|
162
|
+
lam = boto3.client("lambda", region_name=self._region)
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
lam.get_function(FunctionName=function_name)
|
|
166
|
+
# Function exists — update code to latest image
|
|
167
|
+
self._update_lambda_code(function_name, image_uri)
|
|
168
|
+
logger.info("Lambda function exists, updated code: %s", function_name)
|
|
169
|
+
except ClientError as e:
|
|
170
|
+
if e.response["Error"]["Code"] != "ResourceNotFoundException":
|
|
171
|
+
raise
|
|
172
|
+
# Create the function
|
|
173
|
+
lam.create_function(
|
|
174
|
+
FunctionName=function_name,
|
|
175
|
+
Role=self._lambda_role_arn,
|
|
176
|
+
Code={"ImageUri": image_uri},
|
|
177
|
+
PackageType="Image",
|
|
178
|
+
Timeout=timeout,
|
|
179
|
+
MemorySize=memory_size,
|
|
180
|
+
VpcConfig={
|
|
181
|
+
"SubnetIds": self._lambda_subnets,
|
|
182
|
+
"SecurityGroupIds": self._lambda_sgs,
|
|
183
|
+
},
|
|
184
|
+
Environment={"Variables": {}},
|
|
185
|
+
)
|
|
186
|
+
logger.info("Created Lambda function: %s", function_name)
|
|
187
|
+
|
|
188
|
+
# Always wait for function to be Active (handles both create and update)
|
|
189
|
+
waiter = lam.get_waiter("function_active_v2")
|
|
190
|
+
waiter.wait(FunctionName=function_name, WaiterConfig={"MaxAttempts": 60})
|
|
191
|
+
logger.info("Lambda function active: %s", function_name)
|
|
192
|
+
|
|
193
|
+
self._lambda_fn_cache.add(function_name)
|
|
194
|
+
return function_name
|
|
195
|
+
|
|
196
|
+
def _update_lambda_code(self, function_name: str, image_uri: str) -> None:
|
|
197
|
+
"""Update Lambda function code to a new container image."""
|
|
198
|
+
lam = boto3.client("lambda", region_name=self._region)
|
|
199
|
+
try:
|
|
200
|
+
lam.update_function_code(FunctionName=function_name, ImageUri=image_uri)
|
|
201
|
+
except ClientError:
|
|
202
|
+
logger.warning(
|
|
203
|
+
"Failed to update Lambda code for %s", function_name, exc_info=True
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# ------------------------------------------------------------------
|
|
207
|
+
# ECR Repository
|
|
208
|
+
# ------------------------------------------------------------------
|
|
209
|
+
|
|
210
|
+
def ensure_ecr_repository(self, repo_name: str) -> str:
|
|
211
|
+
"""Create an ECR repository if it doesn't exist.
|
|
212
|
+
|
|
213
|
+
Returns the repository URI.
|
|
214
|
+
"""
|
|
215
|
+
if repo_name in self._ecr_repo_cache:
|
|
216
|
+
return self._ecr_repo_cache[repo_name]
|
|
217
|
+
|
|
218
|
+
ecr = boto3.client("ecr", region_name=self._region)
|
|
219
|
+
|
|
220
|
+
lifecycle_policy = (
|
|
221
|
+
'{"rules":[{"rulePriority":1,"description":"Keep last 30",'
|
|
222
|
+
'"selection":{"tagStatus":"any","countType":"imageCountMoreThan",'
|
|
223
|
+
'"countNumber":30},"action":{"type":"expire"}}]}'
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
try:
|
|
227
|
+
resp = ecr.describe_repositories(repositoryNames=[repo_name])
|
|
228
|
+
uri = str(resp["repositories"][0]["repositoryUri"])
|
|
229
|
+
except ClientError as e:
|
|
230
|
+
if e.response["Error"]["Code"] != "RepositoryNotFoundException":
|
|
231
|
+
raise
|
|
232
|
+
resp = ecr.create_repository(
|
|
233
|
+
repositoryName=repo_name,
|
|
234
|
+
imageScanningConfiguration={"scanOnPush": True},
|
|
235
|
+
imageTagMutability="MUTABLE",
|
|
236
|
+
)
|
|
237
|
+
uri = str(resp["repository"]["repositoryUri"])
|
|
238
|
+
ecr.put_lifecycle_policy(
|
|
239
|
+
repositoryName=repo_name,
|
|
240
|
+
lifecyclePolicyText=lifecycle_policy,
|
|
241
|
+
)
|
|
242
|
+
logger.info("Created ECR repository: %s (%s)", repo_name, uri)
|
|
243
|
+
|
|
244
|
+
self._ecr_repo_cache[repo_name] = uri
|
|
245
|
+
return uri
|
|
246
|
+
|
|
247
|
+
# ------------------------------------------------------------------
|
|
248
|
+
# Convenience: provision everything a module needs
|
|
249
|
+
# ------------------------------------------------------------------
|
|
250
|
+
|
|
251
|
+
def provision_for_ecs_module(
|
|
252
|
+
self,
|
|
253
|
+
product: str,
|
|
254
|
+
module: str,
|
|
255
|
+
task_def_family: str = "pulse-pipeline-step",
|
|
256
|
+
) -> dict[str, str]:
|
|
257
|
+
"""Ensure all ECS infrastructure exists for a pipeline module.
|
|
258
|
+
|
|
259
|
+
Returns dict with cluster, task_definition, subnets, security_groups.
|
|
260
|
+
"""
|
|
261
|
+
task_def_arn = self.ensure_ecs_task_definition(family=task_def_family)
|
|
262
|
+
ecr_repo = self.ensure_ecr_repository(f"{product}-{module}")
|
|
263
|
+
|
|
264
|
+
return {
|
|
265
|
+
"cluster": self._cluster,
|
|
266
|
+
"task_definition": task_def_arn,
|
|
267
|
+
"subnets": ",".join(self._subnets),
|
|
268
|
+
"security_groups": ",".join(self._security_groups),
|
|
269
|
+
"ecr_repository": ecr_repo,
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
def provision_for_lambda_module(
|
|
273
|
+
self,
|
|
274
|
+
product: str,
|
|
275
|
+
module: str,
|
|
276
|
+
image_uri: str,
|
|
277
|
+
) -> dict[str, str]:
|
|
278
|
+
"""Ensure Lambda function exists for a pipeline module.
|
|
279
|
+
|
|
280
|
+
Returns dict with function_name.
|
|
281
|
+
"""
|
|
282
|
+
func_name = f"{product}-{module}"
|
|
283
|
+
self.ensure_lambda_function(func_name, image_uri)
|
|
284
|
+
|
|
285
|
+
return {"function_name": func_name}
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Central dispatch for job triggering across all runner backends."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from typing import TYPE_CHECKING, Any
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
from pulse_engine.core.exceptions import (
|
|
11
|
+
BadRequestError,
|
|
12
|
+
NotFoundError,
|
|
13
|
+
UnprocessableEntityError,
|
|
14
|
+
)
|
|
15
|
+
from pulse_engine.deployment.backends.exceptions import BackendNotAvailableError
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from pulse_engine.config import Settings
|
|
19
|
+
from pulse_engine.core.job_token import JobTokenIssuer
|
|
20
|
+
from pulse_engine.deployment.backend_deployment_repository import (
|
|
21
|
+
BackendDeploymentRepository,
|
|
22
|
+
)
|
|
23
|
+
from pulse_engine.deployment.backends.registry import BackendRegistry
|
|
24
|
+
from pulse_engine.deployment.repository import RegistrationRepository
|
|
25
|
+
|
|
26
|
+
logger = structlog.get_logger(__name__)
|
|
27
|
+
|
|
28
|
+
# The engine-owned Prefect flow entrypoint. All product containers must expose
|
|
29
|
+
# entrypoint:run as a plain function; this wrapper (shipped with the engine)
|
|
30
|
+
# decorates it with @flow so product code stays Prefect-free.
|
|
31
|
+
PULSE_FLOW_ENTRYPOINT = "pulse_engine.runners.prefect_runner:main"
|
|
32
|
+
|
|
33
|
+
_STAGE_NAME_MAP = {
|
|
34
|
+
"extractor": "extraction",
|
|
35
|
+
"processor": "processing",
|
|
36
|
+
"storage": "storage",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class LaunchResult:
|
|
42
|
+
flow_run_id: str
|
|
43
|
+
token: str
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class JobLauncher:
|
|
47
|
+
"""Orchestrates the full job trigger sequence.
|
|
48
|
+
|
|
49
|
+
Steps:
|
|
50
|
+
1. Look up product registration -> image
|
|
51
|
+
2. Resolve backend from registry
|
|
52
|
+
3. prepare() — work pool setup or no-op
|
|
53
|
+
4. register() — upsert backend run unit (cached)
|
|
54
|
+
5. Issue job-scoped JWT
|
|
55
|
+
6. trigger() — fire the run, return run ID
|
|
56
|
+
7. Return LaunchResult
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def __init__(
|
|
60
|
+
self,
|
|
61
|
+
registration_repo: RegistrationRepository,
|
|
62
|
+
backend_deployment_repo: BackendDeploymentRepository,
|
|
63
|
+
registry: BackendRegistry,
|
|
64
|
+
token_issuer: JobTokenIssuer,
|
|
65
|
+
settings: Settings,
|
|
66
|
+
) -> None:
|
|
67
|
+
self._reg_repo = registration_repo
|
|
68
|
+
self._backend_repo = backend_deployment_repo
|
|
69
|
+
self._registry = registry
|
|
70
|
+
self._token_issuer = token_issuer
|
|
71
|
+
self._settings = settings
|
|
72
|
+
|
|
73
|
+
async def launch(
|
|
74
|
+
self,
|
|
75
|
+
job_id: str,
|
|
76
|
+
tenant_id: str,
|
|
77
|
+
product: str,
|
|
78
|
+
stage: str,
|
|
79
|
+
orchestrator: str,
|
|
80
|
+
compute: str,
|
|
81
|
+
chain: bool,
|
|
82
|
+
config: dict[str, Any],
|
|
83
|
+
) -> LaunchResult:
|
|
84
|
+
# 1. Look up registration
|
|
85
|
+
registration = await self._reg_repo.get(product, stage)
|
|
86
|
+
if registration is None:
|
|
87
|
+
raise NotFoundError(
|
|
88
|
+
f"No deployment registered for {product}/{stage}",
|
|
89
|
+
product=product,
|
|
90
|
+
stage=stage,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# 2. Resolve backend (raises 422 if unsupported)
|
|
94
|
+
try:
|
|
95
|
+
backend = self._registry.get(orchestrator, compute)
|
|
96
|
+
except BackendNotAvailableError as exc:
|
|
97
|
+
logger.warning(
|
|
98
|
+
"backend_not_available",
|
|
99
|
+
orchestrator=orchestrator,
|
|
100
|
+
compute=compute,
|
|
101
|
+
exc_info=True,
|
|
102
|
+
)
|
|
103
|
+
raise UnprocessableEntityError(
|
|
104
|
+
"Unsupported orchestrator/compute combination"
|
|
105
|
+
) from exc
|
|
106
|
+
|
|
107
|
+
# 3. prepare() — idempotent setup (work pool creation, no-op for native)
|
|
108
|
+
await backend.prepare()
|
|
109
|
+
|
|
110
|
+
# 4. register() — cached; Prefect gets image + entrypoint, native Lambda
|
|
111
|
+
# derives function name from product+stage convention.
|
|
112
|
+
# NOTE: Concurrent callers may both see a cache miss and both call register().
|
|
113
|
+
# This is safe: upsert() uses ON CONFLICT DO UPDATE, and register()
|
|
114
|
+
# is idempotent.
|
|
115
|
+
cached = await self._backend_repo.get(product, stage, orchestrator, compute)
|
|
116
|
+
if cached:
|
|
117
|
+
handle = cached.deployment_id
|
|
118
|
+
else:
|
|
119
|
+
handle = await backend.register(
|
|
120
|
+
product=product,
|
|
121
|
+
stage=stage,
|
|
122
|
+
image=registration.image,
|
|
123
|
+
entrypoint=PULSE_FLOW_ENTRYPOINT,
|
|
124
|
+
)
|
|
125
|
+
await self._backend_repo.upsert(
|
|
126
|
+
product=product,
|
|
127
|
+
stage=stage,
|
|
128
|
+
orchestrator=orchestrator,
|
|
129
|
+
compute=compute,
|
|
130
|
+
deployment_id=handle,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# 5. Issue job-scoped JWT
|
|
134
|
+
stage_name = _STAGE_NAME_MAP.get(stage)
|
|
135
|
+
if stage_name is None:
|
|
136
|
+
raise BadRequestError(
|
|
137
|
+
f"Unknown stage: {stage!r}. Must be one of: {list(_STAGE_NAME_MAP)}"
|
|
138
|
+
)
|
|
139
|
+
scope = ["jobs:status"]
|
|
140
|
+
if stage == "storage":
|
|
141
|
+
scope.append("kb:write")
|
|
142
|
+
if chain:
|
|
143
|
+
scope.append("jobs:trigger_next")
|
|
144
|
+
|
|
145
|
+
token = self._token_issuer.issue(
|
|
146
|
+
job_id=job_id,
|
|
147
|
+
tenant_id=tenant_id,
|
|
148
|
+
product=product,
|
|
149
|
+
stage=stage_name,
|
|
150
|
+
scope=scope,
|
|
151
|
+
orchestrator=orchestrator,
|
|
152
|
+
compute=compute,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# 6. trigger() — returns run ID (UUID for native Lambda, Prefect flow
|
|
156
|
+
# run UUID for Prefect)
|
|
157
|
+
run_id = await backend.trigger(
|
|
158
|
+
handle,
|
|
159
|
+
{
|
|
160
|
+
"job_id": job_id,
|
|
161
|
+
"chain": chain,
|
|
162
|
+
"config": config,
|
|
163
|
+
"pulse_api_token": token,
|
|
164
|
+
"pulse_engine_url": self._settings.pulse_engine_url,
|
|
165
|
+
},
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
logger.info(
|
|
169
|
+
"job_launched",
|
|
170
|
+
job_id=job_id,
|
|
171
|
+
product=product,
|
|
172
|
+
stage=stage_name,
|
|
173
|
+
orchestrator=orchestrator,
|
|
174
|
+
compute=compute,
|
|
175
|
+
run_id=run_id,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return LaunchResult(flow_run_id=run_id, token=token)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""ORM models for product registrations and backend deployment cache."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sa
|
|
6
|
+
from sqlalchemy.orm import Mapped, mapped_column
|
|
7
|
+
|
|
8
|
+
from pulse_engine.database import Base
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ProductRegistrationModel(Base):
|
|
12
|
+
"""Stores product stage images registered via POST /api/v1/deployments."""
|
|
13
|
+
|
|
14
|
+
__tablename__ = "product_registrations"
|
|
15
|
+
|
|
16
|
+
product: Mapped[str] = mapped_column(sa.String(100), primary_key=True)
|
|
17
|
+
stage: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
|
|
18
|
+
image: Mapped[str] = mapped_column(sa.String(500), nullable=False)
|
|
19
|
+
registered_at: Mapped[datetime] = mapped_column(
|
|
20
|
+
sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False
|
|
21
|
+
)
|
|
22
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
23
|
+
sa.DateTime(timezone=True),
|
|
24
|
+
server_default=sa.func.now(),
|
|
25
|
+
onupdate=sa.func.now(),
|
|
26
|
+
nullable=False,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ProductBackendDeploymentModel(Base):
|
|
31
|
+
"""Lazy cache: Prefect deployment IDs per deployment dimension tuple."""
|
|
32
|
+
|
|
33
|
+
__tablename__ = "product_deployments"
|
|
34
|
+
|
|
35
|
+
product: Mapped[str] = mapped_column(sa.String(100), primary_key=True)
|
|
36
|
+
stage: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
|
|
37
|
+
orchestrator: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
|
|
38
|
+
compute: Mapped[str] = mapped_column(sa.String(50), primary_key=True)
|
|
39
|
+
deployment_id: Mapped[str] = mapped_column(sa.String(255), nullable=False)
|
|
40
|
+
created_at: Mapped[datetime] = mapped_column(
|
|
41
|
+
sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False
|
|
42
|
+
)
|
|
43
|
+
updated_at: Mapped[datetime] = mapped_column(
|
|
44
|
+
sa.DateTime(timezone=True),
|
|
45
|
+
server_default=sa.func.now(),
|
|
46
|
+
onupdate=sa.func.now(),
|
|
47
|
+
nullable=False,
|
|
48
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Data access layer for product registrations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sqlalchemy as sa
|
|
6
|
+
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
+
|
|
8
|
+
from pulse_engine.deployment.models import ProductRegistrationModel
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RegistrationRepository:
|
|
12
|
+
"""CRUD for product_registrations table."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, session: AsyncSession) -> None:
|
|
15
|
+
self._session = session
|
|
16
|
+
|
|
17
|
+
async def upsert(
|
|
18
|
+
self,
|
|
19
|
+
product: str,
|
|
20
|
+
stage: str,
|
|
21
|
+
image: str,
|
|
22
|
+
) -> ProductRegistrationModel:
|
|
23
|
+
existing = await self.get(product, stage)
|
|
24
|
+
if existing:
|
|
25
|
+
existing.image = image
|
|
26
|
+
await self._session.commit()
|
|
27
|
+
await self._session.refresh(existing)
|
|
28
|
+
return existing
|
|
29
|
+
record = ProductRegistrationModel(
|
|
30
|
+
product=product,
|
|
31
|
+
stage=stage,
|
|
32
|
+
image=image,
|
|
33
|
+
)
|
|
34
|
+
self._session.add(record)
|
|
35
|
+
await self._session.commit()
|
|
36
|
+
await self._session.refresh(record)
|
|
37
|
+
return record
|
|
38
|
+
|
|
39
|
+
async def get(self, product: str, stage: str) -> ProductRegistrationModel | None:
|
|
40
|
+
stmt = sa.select(ProductRegistrationModel).where(
|
|
41
|
+
ProductRegistrationModel.product == product,
|
|
42
|
+
ProductRegistrationModel.stage == stage,
|
|
43
|
+
)
|
|
44
|
+
result = await self._session.execute(stmt)
|
|
45
|
+
return result.scalar_one_or_none()
|
|
46
|
+
|
|
47
|
+
async def list_by_product(self, product: str) -> list[ProductRegistrationModel]:
|
|
48
|
+
stmt = (
|
|
49
|
+
sa.select(ProductRegistrationModel)
|
|
50
|
+
.where(ProductRegistrationModel.product == product)
|
|
51
|
+
.order_by(ProductRegistrationModel.stage)
|
|
52
|
+
)
|
|
53
|
+
result = await self._session.execute(stmt)
|
|
54
|
+
return list(result.scalars().all())
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Deployments API router."""
|
|
2
|
+
|
|
3
|
+
from fastapi import APIRouter, Depends
|
|
4
|
+
|
|
5
|
+
from pulse_engine.dependencies import get_deployment_service
|
|
6
|
+
from pulse_engine.deployment.schemas import (
|
|
7
|
+
RegisterDeploymentRequest,
|
|
8
|
+
RegisterDeploymentResponse,
|
|
9
|
+
)
|
|
10
|
+
from pulse_engine.deployment.service import DeploymentService
|
|
11
|
+
from pulse_engine.middleware.tenant import get_tenant_id
|
|
12
|
+
|
|
13
|
+
router = APIRouter(prefix="/deployments", tags=["Deployments"])
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@router.post("/", response_model=RegisterDeploymentResponse, status_code=201)
|
|
17
|
+
async def register_deployment(
|
|
18
|
+
body: RegisterDeploymentRequest,
|
|
19
|
+
tenant_id: str = Depends(get_tenant_id),
|
|
20
|
+
service: DeploymentService = Depends(get_deployment_service),
|
|
21
|
+
) -> RegisterDeploymentResponse:
|
|
22
|
+
return await service.register(body)
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# src/pulse_engine/deployment/schemas.py
|
|
2
|
+
"""Pydantic schemas for the Deployments API."""
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class StageDefinition(BaseModel):
|
|
8
|
+
image: str # ECR URI
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RegisterDeploymentRequest(BaseModel):
|
|
12
|
+
product: str
|
|
13
|
+
stages: dict[str, StageDefinition] # key: "extractor" | "processor" | "storage"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RegisterDeploymentResponse(BaseModel):
|
|
17
|
+
product: str
|
|
18
|
+
stages: dict[str, dict[str, str]] # {"extractor": {"status": "registered"}, ...}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# src/pulse_engine/deployment/service.py
|
|
2
|
+
"""Business logic for registering product stage deployments."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
from pulse_engine.deployment.schemas import (
|
|
11
|
+
RegisterDeploymentRequest,
|
|
12
|
+
RegisterDeploymentResponse,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from pulse_engine.deployment.backend_deployment_repository import (
|
|
17
|
+
BackendDeploymentRepository,
|
|
18
|
+
)
|
|
19
|
+
from pulse_engine.deployment.repository import RegistrationRepository
|
|
20
|
+
|
|
21
|
+
logger = structlog.get_logger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DeploymentService:
|
|
25
|
+
"""Stores product stage image registrations. Does not create Prefect deployments.
|
|
26
|
+
|
|
27
|
+
Prefect deployments are created lazily by JobLauncher on first trigger.
|
|
28
|
+
When a product re-registers, all cached backend deployment IDs for that
|
|
29
|
+
product+stage are invalidated so JobLauncher recreates them with the new image.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
registration_repo: RegistrationRepository,
|
|
35
|
+
backend_deployment_repo: BackendDeploymentRepository,
|
|
36
|
+
) -> None:
|
|
37
|
+
self._repo = registration_repo
|
|
38
|
+
self._backend_repo = backend_deployment_repo
|
|
39
|
+
|
|
40
|
+
async def register(
|
|
41
|
+
self, request: RegisterDeploymentRequest
|
|
42
|
+
) -> RegisterDeploymentResponse:
|
|
43
|
+
result_stages: dict[str, dict[str, str]] = {}
|
|
44
|
+
|
|
45
|
+
for stage_name, stage_def in request.stages.items():
|
|
46
|
+
await self._repo.upsert(
|
|
47
|
+
product=request.product,
|
|
48
|
+
stage=stage_name,
|
|
49
|
+
image=stage_def.image,
|
|
50
|
+
)
|
|
51
|
+
# Invalidate cached Prefect deployment IDs — image may have changed
|
|
52
|
+
await self._backend_repo.delete_by_product_stage(
|
|
53
|
+
request.product, stage_name
|
|
54
|
+
)
|
|
55
|
+
result_stages[stage_name] = {"status": "registered"}
|
|
56
|
+
logger.info(
|
|
57
|
+
"stage_registration_stored",
|
|
58
|
+
product=request.product,
|
|
59
|
+
stage=stage_name,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
return RegisterDeploymentResponse(
|
|
63
|
+
product=request.product,
|
|
64
|
+
stages=result_stages,
|
|
65
|
+
)
|
|
File without changes
|
|
File without changes
|