pulse-engine 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. pulse_engine/__init__.py +0 -0
  2. pulse_engine/adapters/__init__.py +58 -0
  3. pulse_engine/adapters/audio_transcription.py +167 -0
  4. pulse_engine/adapters/batcher.py +36 -0
  5. pulse_engine/adapters/digital_news.py +128 -0
  6. pulse_engine/adapters/digital_news_metadata.py +536 -0
  7. pulse_engine/adapters/exceptions.py +10 -0
  8. pulse_engine/adapters/models.py +134 -0
  9. pulse_engine/adapters/opensearch_storage.py +160 -0
  10. pulse_engine/adapters/speech_content.py +130 -0
  11. pulse_engine/adapters/speech_metadata.py +374 -0
  12. pulse_engine/adapters/twitter.py +423 -0
  13. pulse_engine/adapters/youtube_downloader.py +186 -0
  14. pulse_engine/adapters/youtube_metadata.py +261 -0
  15. pulse_engine/api/__init__.py +0 -0
  16. pulse_engine/api/v1/__init__.py +0 -0
  17. pulse_engine/api/v1/auth.py +91 -0
  18. pulse_engine/api/v1/health.py +62 -0
  19. pulse_engine/api/v1/router.py +16 -0
  20. pulse_engine/chain_recovery.py +131 -0
  21. pulse_engine/cli/__init__.py +0 -0
  22. pulse_engine/cli/main.py +169 -0
  23. pulse_engine/cli/templates/cookiecutter.json +4 -0
  24. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
  25. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
  26. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
  27. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
  28. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
  29. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
  30. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
  31. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
  32. pulse_engine/client.py +95 -0
  33. pulse_engine/config.py +157 -0
  34. pulse_engine/core/__init__.py +0 -0
  35. pulse_engine/core/error_handlers.py +64 -0
  36. pulse_engine/core/exceptions.py +67 -0
  37. pulse_engine/core/job_token.py +109 -0
  38. pulse_engine/core/logging.py +45 -0
  39. pulse_engine/core/scope.py +23 -0
  40. pulse_engine/core/security.py +130 -0
  41. pulse_engine/database.py +30 -0
  42. pulse_engine/dependencies.py +166 -0
  43. pulse_engine/deployment/__init__.py +0 -0
  44. pulse_engine/deployment/backend_deployment_repository.py +83 -0
  45. pulse_engine/deployment/backends/__init__.py +0 -0
  46. pulse_engine/deployment/backends/base.py +50 -0
  47. pulse_engine/deployment/backends/exceptions.py +20 -0
  48. pulse_engine/deployment/backends/native_lambda.py +125 -0
  49. pulse_engine/deployment/backends/prefect_ecs.py +116 -0
  50. pulse_engine/deployment/backends/prefect_k8s.py +131 -0
  51. pulse_engine/deployment/backends/registry.py +50 -0
  52. pulse_engine/deployment/infra_provisioner.py +285 -0
  53. pulse_engine/deployment/job_launcher.py +178 -0
  54. pulse_engine/deployment/models.py +48 -0
  55. pulse_engine/deployment/repository.py +54 -0
  56. pulse_engine/deployment/router.py +22 -0
  57. pulse_engine/deployment/schemas.py +18 -0
  58. pulse_engine/deployment/service.py +65 -0
  59. pulse_engine/extractor/__init__.py +0 -0
  60. pulse_engine/extractor/adapters/__init__.py +0 -0
  61. pulse_engine/extractor/base.py +48 -0
  62. pulse_engine/extractor/models.py +50 -0
  63. pulse_engine/extractor/orchestrator/__init__.py +15 -0
  64. pulse_engine/extractor/orchestrator/base.py +34 -0
  65. pulse_engine/extractor/orchestrator/noop.py +37 -0
  66. pulse_engine/extractor/orchestrator/prefect.py +163 -0
  67. pulse_engine/extractor/repository.py +163 -0
  68. pulse_engine/extractor/router.py +102 -0
  69. pulse_engine/extractor/schemas.py +93 -0
  70. pulse_engine/extractor/service.py +431 -0
  71. pulse_engine/extractor/stage_models.py +36 -0
  72. pulse_engine/extractor/stage_repository.py +109 -0
  73. pulse_engine/main.py +195 -0
  74. pulse_engine/mcp/__init__.py +0 -0
  75. pulse_engine/mcp/__main__.py +5 -0
  76. pulse_engine/mcp/server.py +108 -0
  77. pulse_engine/mcp/tools_jobs.py +159 -0
  78. pulse_engine/mcp/tools_kb.py +88 -0
  79. pulse_engine/mcp/tools_modules.py +115 -0
  80. pulse_engine/mcp/tools_pipelines.py +215 -0
  81. pulse_engine/mcp/tools_processor.py +208 -0
  82. pulse_engine/middleware/__init__.py +0 -0
  83. pulse_engine/middleware/rate_limit.py +144 -0
  84. pulse_engine/middleware/request_id.py +16 -0
  85. pulse_engine/middleware/security_headers.py +25 -0
  86. pulse_engine/middleware/tenant.py +90 -0
  87. pulse_engine/pipeline/__init__.py +0 -0
  88. pulse_engine/pipeline/config_parser.py +148 -0
  89. pulse_engine/pipeline/expression.py +268 -0
  90. pulse_engine/pipeline/models.py +98 -0
  91. pulse_engine/pipeline/repositories.py +224 -0
  92. pulse_engine/pipeline/router_modules.py +66 -0
  93. pulse_engine/pipeline/router_pipelines.py +198 -0
  94. pulse_engine/pipeline/schemas.py +200 -0
  95. pulse_engine/pipeline/service.py +250 -0
  96. pulse_engine/pipeline/translators/__init__.py +44 -0
  97. pulse_engine/pipeline/translators/airflow_status.py +11 -0
  98. pulse_engine/pipeline/translators/airflow_translator.py +22 -0
  99. pulse_engine/pipeline/translators/base.py +42 -0
  100. pulse_engine/pipeline/translators/prefect_status.py +93 -0
  101. pulse_engine/pipeline/translators/prefect_translator.py +195 -0
  102. pulse_engine/processor/__init__.py +0 -0
  103. pulse_engine/processor/base.py +36 -0
  104. pulse_engine/processor/core/__init__.py +0 -0
  105. pulse_engine/processor/core/analysis.py +148 -0
  106. pulse_engine/processor/core/chunking.py +158 -0
  107. pulse_engine/processor/core/prompts.py +340 -0
  108. pulse_engine/processor/core/topic_splitter.py +105 -0
  109. pulse_engine/processor/defaults/__init__.py +11 -0
  110. pulse_engine/processor/defaults/core_processor.py +12 -0
  111. pulse_engine/processor/defaults/postprocessor.py +12 -0
  112. pulse_engine/processor/defaults/preprocessor.py +12 -0
  113. pulse_engine/processor/llm/__init__.py +0 -0
  114. pulse_engine/processor/llm/provider.py +58 -0
  115. pulse_engine/processor/ocr/gemini.py +52 -0
  116. pulse_engine/processor/pipeline.py +107 -0
  117. pulse_engine/processor/postprocessor/__init__.py +0 -0
  118. pulse_engine/processor/postprocessor/embeddings.py +34 -0
  119. pulse_engine/processor/postprocessor/tasks.py +180 -0
  120. pulse_engine/processor/preprocessor/__init__.py +0 -0
  121. pulse_engine/processor/preprocessor/tasks.py +71 -0
  122. pulse_engine/processor/router.py +192 -0
  123. pulse_engine/processor/schemas.py +167 -0
  124. pulse_engine/registry.py +117 -0
  125. pulse_engine/runners/__init__.py +0 -0
  126. pulse_engine/runners/lambda_runner.py +26 -0
  127. pulse_engine/runners/pipeline_runner.py +43 -0
  128. pulse_engine/runners/prefect_pipeline_flow.py +904 -0
  129. pulse_engine/runners/prefect_runner.py +33 -0
  130. pulse_engine/s3.py +72 -0
  131. pulse_engine/secrets.py +46 -0
  132. pulse_engine/services/__init__.py +0 -0
  133. pulse_engine/services/bootstrap.py +211 -0
  134. pulse_engine/services/opensearch.py +84 -0
  135. pulse_engine/storage/__init__.py +0 -0
  136. pulse_engine/storage/connectors/__init__.py +0 -0
  137. pulse_engine/storage/connectors/athena.py +226 -0
  138. pulse_engine/storage/connectors/base.py +32 -0
  139. pulse_engine/storage/connectors/opensearch.py +344 -0
  140. pulse_engine/storage/knowledge_base.py +68 -0
  141. pulse_engine/storage/router.py +78 -0
  142. pulse_engine/storage/schemas.py +93 -0
  143. pulse_engine/testing/__init__.py +13 -0
  144. pulse_engine/testing/fixtures.py +50 -0
  145. pulse_engine/testing/mocks.py +104 -0
  146. pulse_engine/worker.py +53 -0
  147. pulse_engine-0.2.0.dist-info/METADATA +654 -0
  148. pulse_engine-0.2.0.dist-info/RECORD +150 -0
  149. pulse_engine-0.2.0.dist-info/WHEEL +4 -0
  150. pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,250 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import UTC, datetime
4
+ from typing import Any
5
+
6
+ from pulse_engine.pipeline.models import PipelineRunModel
7
+ from pulse_engine.pipeline.repositories import (
8
+ ModuleRegistryRepository,
9
+ PipelineRunRepository,
10
+ PipelineStepRunRepository,
11
+ )
12
+ from pulse_engine.pipeline.schemas import PipelineConfig, StepStatus
13
+ from pulse_engine.pipeline.translators.base import (
14
+ BaseStatusProvider,
15
+ BaseTranslator,
16
+ PipelineStatus,
17
+ )
18
+
19
+
20
+ class PipelineServiceError(Exception):
21
+ pass
22
+
23
+
24
+ class PipelineSubmissionError(PipelineServiceError):
25
+ pass
26
+
27
+
28
+ class PipelineService:
29
+ def __init__(
30
+ self,
31
+ module_repo: ModuleRegistryRepository,
32
+ run_repo: PipelineRunRepository,
33
+ translators: dict[str, BaseTranslator],
34
+ status_providers: dict[str, BaseStatusProvider],
35
+ settings: Any,
36
+ token_issuer: Any,
37
+ step_repo: PipelineStepRunRepository | None = None,
38
+ ) -> None:
39
+ self._module_repo = module_repo
40
+ self._run_repo = run_repo
41
+ self._translators = translators
42
+ self._status_providers = status_providers
43
+ self._settings = settings
44
+ self._token_issuer = token_issuer
45
+ self._step_repo = step_repo
46
+
47
+ async def trigger(
48
+ self,
49
+ tenant_id: str,
50
+ product: str,
51
+ orchestrator: str,
52
+ config: PipelineConfig,
53
+ global_config: dict[str, Any],
54
+ ) -> str:
55
+ """Trigger a pipeline. Returns the pipeline run ID."""
56
+ # Validate orchestrator
57
+ if orchestrator not in self._translators:
58
+ raise PipelineServiceError(f"Unsupported orchestrator: '{orchestrator}'")
59
+
60
+ # Resolve module images
61
+ images = await self._module_repo.get_images_map(tenant_id, product)
62
+ missing = [m.name for m in config.modules if m.module not in images]
63
+ if missing:
64
+ raise PipelineServiceError(
65
+ f"Modules not registered for product '{product}': {missing}"
66
+ )
67
+
68
+ # Create pipeline run record
69
+ run = await self._run_repo.create(
70
+ tenant_id=tenant_id,
71
+ product=product,
72
+ orchestrator=orchestrator,
73
+ config_snapshot=config.model_dump(mode="json", by_alias=True),
74
+ global_config=global_config,
75
+ )
76
+
77
+ # Issue run-scoped token
78
+ token = self._token_issuer.issue_token(
79
+ pipeline_run_id=run.id,
80
+ tenant_id=tenant_id,
81
+ )
82
+
83
+ # Submit to orchestrator
84
+ translator = self._translators[orchestrator]
85
+ try:
86
+ orchestrator_run_id = await translator.submit(
87
+ pipeline_run_id=run.id,
88
+ parsed_config=config,
89
+ module_images=images,
90
+ global_config=global_config,
91
+ tenant_id=tenant_id,
92
+ pulse_engine_url=self._settings.pulse_engine_url,
93
+ pulse_api_token=token,
94
+ )
95
+ except Exception as e:
96
+ await self._run_repo.update_status(run.id, "submission_failed")
97
+ raise PipelineSubmissionError(
98
+ f"Failed to submit pipeline to {orchestrator}: {e}"
99
+ ) from e
100
+
101
+ await self._run_repo.set_orchestrator_run_id(run.id, orchestrator_run_id)
102
+ await self._run_repo.update_status(run.id, "running")
103
+ return run.id
104
+
105
+ async def get_run(self, tenant_id: str, run_id: str) -> PipelineRunModel | None:
106
+ """Get a pipeline run record."""
107
+ return await self._run_repo.get(run_id, tenant_id)
108
+
109
+ async def get_status(self, tenant_id: str, run_id: str) -> PipelineStatus | None:
110
+ """Get normalized pipeline status, merging DB step records as fallback."""
111
+ run = await self._run_repo.get(run_id, tenant_id)
112
+ if run is None:
113
+ return None
114
+
115
+ # Load DB-persisted step records for merge / fallback
116
+ db_steps: list[StepStatus] = []
117
+ if self._step_repo is not None:
118
+ rows = await self._step_repo.list_by_run(run_id, tenant_id)
119
+ db_steps = [
120
+ StepStatus(
121
+ step=r.step_name,
122
+ module=r.module_type,
123
+ status=r.status,
124
+ started_at=r.started_at,
125
+ completed_at=r.completed_at,
126
+ error_message=r.error_message,
127
+ output_ref=r.output_ref,
128
+ )
129
+ for r in rows
130
+ ]
131
+
132
+ if run.orchestrator_run_id is None:
133
+ return PipelineStatus(status=run.status, steps=db_steps)
134
+
135
+ provider = self._status_providers.get(run.orchestrator)
136
+ if provider is None:
137
+ return PipelineStatus(status=run.status, steps=db_steps)
138
+
139
+ try:
140
+ status = await provider.get_status(run.orchestrator_run_id)
141
+ except Exception:
142
+ # Orchestrator unreachable — serve from DB
143
+ return PipelineStatus(status=run.status, steps=db_steps)
144
+
145
+ # Merge: enrich orchestrator steps with DB fields (error_message, output_ref)
146
+ db_map = {s.step: s for s in db_steps}
147
+ merged: list[StepStatus] = []
148
+ for s in status.steps:
149
+ db = db_map.get(s.step)
150
+ if db is not None:
151
+ merged.append(
152
+ StepStatus(
153
+ step=s.step,
154
+ module=s.module,
155
+ status=s.status,
156
+ fan_out_count=s.fan_out_count,
157
+ completed_count=s.completed_count,
158
+ failed_count=s.failed_count,
159
+ started_at=s.started_at or db.started_at,
160
+ completed_at=db.completed_at,
161
+ error_message=db.error_message,
162
+ output_ref=db.output_ref,
163
+ )
164
+ )
165
+ else:
166
+ merged.append(s)
167
+
168
+ # Add DB-only steps not returned by orchestrator (e.g. after purge)
169
+ orchestrator_step_names = {s.step for s in status.steps}
170
+ for s in db_steps:
171
+ if s.step not in orchestrator_step_names:
172
+ merged.append(s)
173
+
174
+ status.steps = merged
175
+
176
+ # Cache top-level status
177
+ if status.status != run.status:
178
+ await self._run_repo.update_status(run_id, status.status)
179
+
180
+ return status
181
+
182
+ async def record_step_status(
183
+ self,
184
+ tenant_id: str,
185
+ run_id: str,
186
+ step_name: str,
187
+ status: str,
188
+ output_ref: dict[str, Any] | None = None,
189
+ error_message: str | None = None,
190
+ ) -> None:
191
+ """Persist a step status callback from the orchestrator runner."""
192
+ run = await self._run_repo.get(run_id, tenant_id)
193
+ if run is None or self._step_repo is None:
194
+ return
195
+
196
+ now = datetime.now(tz=UTC)
197
+ started_at = now if status == "running" else None
198
+ completed_at = now if status in {"completed", "failed"} else None
199
+
200
+ # Derive module_type from config snapshot if possible
201
+ module_type = ""
202
+ dag = run.config_snapshot.get("dag", [])
203
+ for step in dag:
204
+ base_name = step_name.split("[")[0]
205
+ if step.get("step") == base_name:
206
+ module_type = step.get("module_type") or step.get("module", "")
207
+ break
208
+
209
+ await self._step_repo.upsert(
210
+ pipeline_run_id=run_id,
211
+ tenant_id=tenant_id,
212
+ step_name=step_name,
213
+ module_type=module_type,
214
+ status=status,
215
+ started_at=started_at,
216
+ completed_at=completed_at,
217
+ error_message=error_message,
218
+ output_ref=output_ref,
219
+ )
220
+
221
+ async def cancel(self, tenant_id: str, run_id: str) -> bool:
222
+ """Cancel a running pipeline."""
223
+ run = await self._run_repo.get(run_id, tenant_id)
224
+ if run is None:
225
+ return False
226
+
227
+ if run.orchestrator_run_id is None:
228
+ await self._run_repo.update_status(run_id, "cancelled")
229
+ return True
230
+
231
+ provider = self._status_providers.get(run.orchestrator)
232
+ if provider is None:
233
+ return False
234
+
235
+ result = await provider.cancel(run.orchestrator_run_id)
236
+ if result:
237
+ await self._run_repo.update_status(run_id, "cancelled")
238
+ return result
239
+
240
+ async def list_runs(
241
+ self,
242
+ tenant_id: str,
243
+ product: str | None = None,
244
+ status: str | None = None,
245
+ limit: int = 20,
246
+ offset: int = 0,
247
+ ) -> tuple[list[PipelineRunModel], int]:
248
+ return await self._run_repo.list_runs(
249
+ tenant_id, product=product, status=status, limit=limit, offset=offset
250
+ )
@@ -0,0 +1,44 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from pulse_engine.pipeline.translators.base import BaseStatusProvider, BaseTranslator
6
+
7
+ __all__ = [
8
+ "BaseTranslator",
9
+ "BaseStatusProvider",
10
+ "get_translator",
11
+ "get_status_provider",
12
+ ]
13
+
14
+
15
+ def get_translator(orchestrator: str, **kwargs: Any) -> BaseTranslator:
16
+ if orchestrator == "prefect":
17
+ from pulse_engine.pipeline.translators.prefect_translator import (
18
+ PrefectTranslator,
19
+ )
20
+
21
+ return PrefectTranslator(**kwargs)
22
+ elif orchestrator == "airflow":
23
+ from pulse_engine.pipeline.translators.airflow_translator import (
24
+ AirflowTranslator,
25
+ )
26
+
27
+ return AirflowTranslator()
28
+ raise ValueError(f"Unknown orchestrator: {orchestrator}")
29
+
30
+
31
+ def get_status_provider(orchestrator: str, **kwargs: Any) -> BaseStatusProvider:
32
+ if orchestrator == "prefect":
33
+ from pulse_engine.pipeline.translators.prefect_status import (
34
+ PrefectStatusProvider,
35
+ )
36
+
37
+ return PrefectStatusProvider(**kwargs)
38
+ elif orchestrator == "airflow":
39
+ from pulse_engine.pipeline.translators.airflow_status import (
40
+ AirflowStatusProvider,
41
+ )
42
+
43
+ return AirflowStatusProvider()
44
+ raise ValueError(f"Unknown orchestrator: {orchestrator}")
@@ -0,0 +1,11 @@
1
+ from __future__ import annotations
2
+
3
+ from pulse_engine.pipeline.translators.base import BaseStatusProvider, PipelineStatus
4
+
5
+
6
+ class AirflowStatusProvider(BaseStatusProvider):
7
+ async def get_status(self, orchestrator_run_id: str) -> PipelineStatus:
8
+ raise NotImplementedError("Airflow status provider is not yet implemented.")
9
+
10
+ async def cancel(self, orchestrator_run_id: str) -> bool:
11
+ raise NotImplementedError("Airflow cancel is not yet implemented.")
@@ -0,0 +1,22 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from pulse_engine.pipeline.schemas import PipelineConfig
6
+ from pulse_engine.pipeline.translators.base import BaseTranslator
7
+
8
+
9
+ class AirflowTranslator(BaseTranslator):
10
+ async def submit(
11
+ self,
12
+ pipeline_run_id: str,
13
+ parsed_config: PipelineConfig,
14
+ module_images: dict[str, str],
15
+ global_config: dict[str, Any],
16
+ tenant_id: str,
17
+ pulse_engine_url: str,
18
+ pulse_api_token: str,
19
+ ) -> str:
20
+ raise NotImplementedError(
21
+ "Airflow translator is not yet implemented. Use orchestrator='prefect'."
22
+ )
@@ -0,0 +1,42 @@
1
+ from __future__ import annotations
2
+
3
+ from abc import ABC, abstractmethod
4
+ from dataclasses import dataclass, field
5
+ from typing import Any
6
+
7
+ from pulse_engine.pipeline.schemas import PipelineConfig, StepStatus
8
+
9
+
10
+ @dataclass
11
+ class PipelineStatus:
12
+ status: str # pending, running, completed, failed, cancelled
13
+ started_at: str | None = None
14
+ steps: list[StepStatus] = field(default_factory=list)
15
+
16
+
17
+ class BaseTranslator(ABC):
18
+ @abstractmethod
19
+ async def submit(
20
+ self,
21
+ pipeline_run_id: str,
22
+ parsed_config: PipelineConfig,
23
+ module_images: dict[str, str],
24
+ global_config: dict[str, Any],
25
+ tenant_id: str,
26
+ pulse_engine_url: str,
27
+ pulse_api_token: str,
28
+ ) -> str:
29
+ """Submit pipeline to orchestrator. Returns orchestrator's native run ID."""
30
+ ...
31
+
32
+
33
+ class BaseStatusProvider(ABC):
34
+ @abstractmethod
35
+ async def get_status(self, orchestrator_run_id: str) -> PipelineStatus:
36
+ """Query orchestrator and return normalized status."""
37
+ ...
38
+
39
+ @abstractmethod
40
+ async def cancel(self, orchestrator_run_id: str) -> bool:
41
+ """Cancel a running pipeline. Returns True if successfully cancelled."""
42
+ ...
@@ -0,0 +1,93 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+
5
+ import httpx
6
+
7
+ from pulse_engine.pipeline.schemas import StepStatus
8
+ from pulse_engine.pipeline.translators.base import BaseStatusProvider, PipelineStatus
9
+
10
+ _PREFECT_STATE_MAP: dict[str, str] = {
11
+ "COMPLETED": "completed",
12
+ "FAILED": "failed",
13
+ "CANCELLED": "cancelled",
14
+ "CANCELLING": "cancelled",
15
+ "RUNNING": "running",
16
+ "PENDING": "pending",
17
+ "SCHEDULED": "pending",
18
+ }
19
+
20
+
21
+ class PrefectStatusProvider(BaseStatusProvider):
22
+ def __init__(
23
+ self,
24
+ prefect_api_url: str,
25
+ prefect_api_key: str,
26
+ http_client: httpx.AsyncClient | None = None,
27
+ ) -> None:
28
+ self._api_url = prefect_api_url.rstrip("/")
29
+ self._api_key = prefect_api_key
30
+ self._client = http_client
31
+
32
+ def _get_client(self) -> httpx.AsyncClient:
33
+ if self._client is not None:
34
+ return self._client
35
+ auth_header = base64.b64encode(f":{self._api_key}".encode()).decode()
36
+ return httpx.AsyncClient(
37
+ headers={"Authorization": f"Basic {auth_header}"},
38
+ timeout=30.0,
39
+ )
40
+
41
+ async def get_status(self, orchestrator_run_id: str) -> PipelineStatus:
42
+ client = self._get_client()
43
+ try:
44
+ resp = await client.get(f"{self._api_url}/flow_runs/{orchestrator_run_id}")
45
+ resp.raise_for_status()
46
+ flow_run = resp.json()
47
+
48
+ prefect_state = flow_run.get("state", {}).get("type", "PENDING")
49
+ status = _PREFECT_STATE_MAP.get(prefect_state, "pending")
50
+ started_at = flow_run.get("start_time")
51
+
52
+ task_resp = await client.post(
53
+ f"{self._api_url}/task_runs/filter",
54
+ json={"flow_runs": {"id": {"any_": [orchestrator_run_id]}}},
55
+ )
56
+ task_resp.raise_for_status()
57
+ task_runs = task_resp.json()
58
+
59
+ steps: list[StepStatus] = []
60
+ for task in task_runs:
61
+ task_state = task.get("state", {}).get("type", "PENDING")
62
+ steps.append(
63
+ StepStatus(
64
+ step=task.get("name", "unknown"),
65
+ module=task.get("name", "unknown"),
66
+ status=_PREFECT_STATE_MAP.get(task_state, "pending"),
67
+ started_at=task.get("start_time"),
68
+ )
69
+ )
70
+
71
+ return PipelineStatus(
72
+ status=status,
73
+ started_at=started_at,
74
+ steps=steps,
75
+ )
76
+ finally:
77
+ if self._client is None:
78
+ await client.aclose()
79
+
80
+ async def cancel(self, orchestrator_run_id: str) -> bool:
81
+ client = self._get_client()
82
+ try:
83
+ resp = await client.post(
84
+ f"{self._api_url}/flow_runs/{orchestrator_run_id}/set_state",
85
+ json={"state": {"type": "CANCELLED"}},
86
+ )
87
+ resp.raise_for_status()
88
+ return True
89
+ except httpx.HTTPStatusError:
90
+ return False
91
+ finally:
92
+ if self._client is None:
93
+ await client.aclose()
@@ -0,0 +1,195 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ from typing import Any
5
+
6
+ import httpx
7
+
8
+ from pulse_engine.pipeline.schemas import PipelineConfig, SecretRef
9
+ from pulse_engine.pipeline.translators.base import BaseTranslator
10
+
11
+
12
+ class PrefectTranslator(BaseTranslator):
13
+ """Translates a PipelineConfig into a Prefect flow run submission."""
14
+
15
+ FLOW_ENTRYPOINT = "pulse_engine.runners.prefect_pipeline_flow:pipeline_flow"
16
+
17
+ def __init__(
18
+ self,
19
+ prefect_api_url: str,
20
+ prefect_api_key: str,
21
+ work_pool_name: str,
22
+ engine_image: str = "",
23
+ http_client: httpx.AsyncClient | None = None,
24
+ ) -> None:
25
+ self._api_url = prefect_api_url.rstrip("/")
26
+ self._api_key = prefect_api_key
27
+ self._work_pool_name = work_pool_name
28
+ self._engine_image = engine_image
29
+ self._client = http_client
30
+
31
+ def _get_client(self) -> httpx.AsyncClient:
32
+ if self._client is not None:
33
+ return self._client
34
+ auth_header = base64.b64encode(f":{self._api_key}".encode()).decode()
35
+ return httpx.AsyncClient(
36
+ headers={"Authorization": f"Basic {auth_header}"},
37
+ timeout=30.0,
38
+ )
39
+
40
+ def _serialize_dag(self, config: PipelineConfig) -> list[dict[str, Any]]:
41
+ """Serialize v2 DAG into a JSON-friendly list for Prefect flow parameters."""
42
+ module_map = {m.name: m for m in config.modules}
43
+ steps = []
44
+ for s in config.dag:
45
+ module = module_map[s.module]
46
+ step_dict: dict[str, Any] = {
47
+ "step": s.step,
48
+ "module": s.module,
49
+ "module_type": module.module,
50
+ "retries": module.retries,
51
+ "retry_delay": module.retry_delay,
52
+ "timeout": module.timeout,
53
+ "max_concurrency": module.max_concurrency,
54
+ "args": module.args,
55
+ "env": {
56
+ k: v.model_dump() if isinstance(v, SecretRef) else v
57
+ for k, v in module.env.items()
58
+ },
59
+ "resources": {
60
+ **module.resources.model_dump(),
61
+ "compute": module.resources.compute
62
+ or config.infra.default_compute
63
+ or "ecs",
64
+ "cpu_units": module.resources.ecs_cpu_units,
65
+ },
66
+ }
67
+ if s.for_each is not None:
68
+ step_dict["for_each"] = s.for_each
69
+ if s.collect_from is not None:
70
+ step_dict["collect_from"] = (
71
+ [s.collect_from]
72
+ if isinstance(s.collect_from, str)
73
+ else list(s.collect_from)
74
+ )
75
+ if s.when is not None:
76
+ step_dict["when"] = s.when
77
+ if s.trigger_rule is not None:
78
+ step_dict["trigger_rule"] = s.trigger_rule
79
+ if s.depends_on:
80
+ step_dict["depends_on"] = [{"step": d.step} for d in s.depends_on]
81
+ steps.append(step_dict)
82
+ return steps
83
+
84
+ async def _ensure_deployment(
85
+ self,
86
+ client: httpx.AsyncClient,
87
+ pipeline_name: str,
88
+ cron_schedule: str | None = None,
89
+ ) -> str:
90
+ """Create or update the Prefect deployment for this pipeline.
91
+
92
+ Returns the deployment ID.
93
+ """
94
+ deployment_name = self._get_deployment_name(pipeline_name)
95
+
96
+ # Get or create the flow (idempotent — Prefect returns existing if name matches)
97
+ flow_resp = await client.post(
98
+ f"{self._api_url}/flows/",
99
+ json={"name": deployment_name},
100
+ )
101
+ flow_resp.raise_for_status()
102
+ flow_id = str(flow_resp.json()["id"])
103
+
104
+ deploy_body: dict[str, Any] = {
105
+ "name": deployment_name,
106
+ "flow_id": flow_id,
107
+ "entrypoint": self.FLOW_ENTRYPOINT,
108
+ "work_pool_name": self._work_pool_name,
109
+ "job_variables": {"image": self._engine_image},
110
+ "path": "/app",
111
+ }
112
+ if cron_schedule:
113
+ deploy_body["schedules"] = [
114
+ {"schedule": {"cron": cron_schedule, "timezone": "UTC"}, "active": True}
115
+ ]
116
+
117
+ # Upsert deployment
118
+ # path="/app" required for Prefect process worker to locate the flow
119
+ # when the entrypoint is a package import (not a relative file path).
120
+ deploy_resp = await client.post(
121
+ f"{self._api_url}/deployments/",
122
+ json=deploy_body,
123
+ )
124
+ deploy_resp.raise_for_status()
125
+ return str(deploy_resp.json()["id"])
126
+
127
+ async def submit(
128
+ self,
129
+ pipeline_run_id: str,
130
+ parsed_config: PipelineConfig,
131
+ module_images: dict[str, str],
132
+ global_config: dict[str, Any],
133
+ tenant_id: str,
134
+ pulse_engine_url: str,
135
+ pulse_api_token: str,
136
+ ) -> str:
137
+ """Submit a pipeline as a Prefect flow run, upserting the deployment first."""
138
+ client = self._get_client()
139
+
140
+ parameters = {
141
+ "pipeline_run_id": pipeline_run_id,
142
+ "tenant_id": tenant_id,
143
+ "dag": self._serialize_dag(parsed_config),
144
+ "module_images": module_images,
145
+ "global_config": global_config,
146
+ "pulse_engine_url": pulse_engine_url,
147
+ "pulse_api_token": pulse_api_token,
148
+ "results_backend": parsed_config.infra.results_backend.model_dump(),
149
+ }
150
+
151
+ body = {
152
+ "parameters": parameters,
153
+ "state": {"type": "SCHEDULED"},
154
+ "tags": [f"product:{parsed_config.name}", f"pipeline:{pipeline_run_id}"],
155
+ }
156
+
157
+ try:
158
+ deployment_id = await self._ensure_deployment(
159
+ client, parsed_config.deployment_name, parsed_config.infra.schedule
160
+ )
161
+
162
+ response = await client.post(
163
+ f"{self._api_url}/deployments/{deployment_id}/create_flow_run",
164
+ json=body,
165
+ )
166
+ response.raise_for_status()
167
+ data = response.json()
168
+ return str(data["id"])
169
+ finally:
170
+ if self._client is None:
171
+ await client.aclose()
172
+
173
+ async def _resolve_deployment_id(
174
+ self, client: httpx.AsyncClient, deployment_name: str
175
+ ) -> str:
176
+ """Look up a Prefect deployment ID by name."""
177
+ response = await client.post(
178
+ f"{self._api_url}/deployments/filter",
179
+ json={
180
+ "deployments": {"name": {"any_": [deployment_name]}},
181
+ "limit": 1,
182
+ },
183
+ )
184
+ response.raise_for_status()
185
+ deployments = response.json()
186
+ if not deployments:
187
+ raise RuntimeError(
188
+ f"Prefect deployment '{deployment_name}' not found. "
189
+ f"Create it first with: prefect deployment build ..."
190
+ )
191
+ return str(deployments[0]["id"])
192
+
193
+ def _get_deployment_name(self, pipeline_name: str) -> str:
194
+ """Deployment name for the generic pipeline flow."""
195
+ return f"pulse-pipeline-{pipeline_name}"
File without changes