pulse-engine 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. pulse_engine/__init__.py +0 -0
  2. pulse_engine/adapters/__init__.py +58 -0
  3. pulse_engine/adapters/audio_transcription.py +167 -0
  4. pulse_engine/adapters/batcher.py +36 -0
  5. pulse_engine/adapters/digital_news.py +128 -0
  6. pulse_engine/adapters/digital_news_metadata.py +536 -0
  7. pulse_engine/adapters/exceptions.py +10 -0
  8. pulse_engine/adapters/models.py +134 -0
  9. pulse_engine/adapters/opensearch_storage.py +160 -0
  10. pulse_engine/adapters/speech_content.py +130 -0
  11. pulse_engine/adapters/speech_metadata.py +374 -0
  12. pulse_engine/adapters/twitter.py +423 -0
  13. pulse_engine/adapters/youtube_downloader.py +186 -0
  14. pulse_engine/adapters/youtube_metadata.py +261 -0
  15. pulse_engine/api/__init__.py +0 -0
  16. pulse_engine/api/v1/__init__.py +0 -0
  17. pulse_engine/api/v1/auth.py +91 -0
  18. pulse_engine/api/v1/health.py +62 -0
  19. pulse_engine/api/v1/router.py +16 -0
  20. pulse_engine/chain_recovery.py +131 -0
  21. pulse_engine/cli/__init__.py +0 -0
  22. pulse_engine/cli/main.py +169 -0
  23. pulse_engine/cli/templates/cookiecutter.json +4 -0
  24. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
  25. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
  26. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
  27. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
  28. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
  29. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
  30. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
  31. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
  32. pulse_engine/client.py +95 -0
  33. pulse_engine/config.py +157 -0
  34. pulse_engine/core/__init__.py +0 -0
  35. pulse_engine/core/error_handlers.py +64 -0
  36. pulse_engine/core/exceptions.py +67 -0
  37. pulse_engine/core/job_token.py +109 -0
  38. pulse_engine/core/logging.py +45 -0
  39. pulse_engine/core/scope.py +23 -0
  40. pulse_engine/core/security.py +130 -0
  41. pulse_engine/database.py +30 -0
  42. pulse_engine/dependencies.py +166 -0
  43. pulse_engine/deployment/__init__.py +0 -0
  44. pulse_engine/deployment/backend_deployment_repository.py +83 -0
  45. pulse_engine/deployment/backends/__init__.py +0 -0
  46. pulse_engine/deployment/backends/base.py +50 -0
  47. pulse_engine/deployment/backends/exceptions.py +20 -0
  48. pulse_engine/deployment/backends/native_lambda.py +125 -0
  49. pulse_engine/deployment/backends/prefect_ecs.py +116 -0
  50. pulse_engine/deployment/backends/prefect_k8s.py +131 -0
  51. pulse_engine/deployment/backends/registry.py +50 -0
  52. pulse_engine/deployment/infra_provisioner.py +285 -0
  53. pulse_engine/deployment/job_launcher.py +178 -0
  54. pulse_engine/deployment/models.py +48 -0
  55. pulse_engine/deployment/repository.py +54 -0
  56. pulse_engine/deployment/router.py +22 -0
  57. pulse_engine/deployment/schemas.py +18 -0
  58. pulse_engine/deployment/service.py +65 -0
  59. pulse_engine/extractor/__init__.py +0 -0
  60. pulse_engine/extractor/adapters/__init__.py +0 -0
  61. pulse_engine/extractor/base.py +48 -0
  62. pulse_engine/extractor/models.py +50 -0
  63. pulse_engine/extractor/orchestrator/__init__.py +15 -0
  64. pulse_engine/extractor/orchestrator/base.py +34 -0
  65. pulse_engine/extractor/orchestrator/noop.py +37 -0
  66. pulse_engine/extractor/orchestrator/prefect.py +163 -0
  67. pulse_engine/extractor/repository.py +163 -0
  68. pulse_engine/extractor/router.py +102 -0
  69. pulse_engine/extractor/schemas.py +93 -0
  70. pulse_engine/extractor/service.py +431 -0
  71. pulse_engine/extractor/stage_models.py +36 -0
  72. pulse_engine/extractor/stage_repository.py +109 -0
  73. pulse_engine/main.py +195 -0
  74. pulse_engine/mcp/__init__.py +0 -0
  75. pulse_engine/mcp/__main__.py +5 -0
  76. pulse_engine/mcp/server.py +108 -0
  77. pulse_engine/mcp/tools_jobs.py +159 -0
  78. pulse_engine/mcp/tools_kb.py +88 -0
  79. pulse_engine/mcp/tools_modules.py +115 -0
  80. pulse_engine/mcp/tools_pipelines.py +215 -0
  81. pulse_engine/mcp/tools_processor.py +208 -0
  82. pulse_engine/middleware/__init__.py +0 -0
  83. pulse_engine/middleware/rate_limit.py +144 -0
  84. pulse_engine/middleware/request_id.py +16 -0
  85. pulse_engine/middleware/security_headers.py +25 -0
  86. pulse_engine/middleware/tenant.py +90 -0
  87. pulse_engine/pipeline/__init__.py +0 -0
  88. pulse_engine/pipeline/config_parser.py +148 -0
  89. pulse_engine/pipeline/expression.py +268 -0
  90. pulse_engine/pipeline/models.py +98 -0
  91. pulse_engine/pipeline/repositories.py +224 -0
  92. pulse_engine/pipeline/router_modules.py +66 -0
  93. pulse_engine/pipeline/router_pipelines.py +198 -0
  94. pulse_engine/pipeline/schemas.py +200 -0
  95. pulse_engine/pipeline/service.py +250 -0
  96. pulse_engine/pipeline/translators/__init__.py +44 -0
  97. pulse_engine/pipeline/translators/airflow_status.py +11 -0
  98. pulse_engine/pipeline/translators/airflow_translator.py +22 -0
  99. pulse_engine/pipeline/translators/base.py +42 -0
  100. pulse_engine/pipeline/translators/prefect_status.py +93 -0
  101. pulse_engine/pipeline/translators/prefect_translator.py +195 -0
  102. pulse_engine/processor/__init__.py +0 -0
  103. pulse_engine/processor/base.py +36 -0
  104. pulse_engine/processor/core/__init__.py +0 -0
  105. pulse_engine/processor/core/analysis.py +148 -0
  106. pulse_engine/processor/core/chunking.py +158 -0
  107. pulse_engine/processor/core/prompts.py +340 -0
  108. pulse_engine/processor/core/topic_splitter.py +105 -0
  109. pulse_engine/processor/defaults/__init__.py +11 -0
  110. pulse_engine/processor/defaults/core_processor.py +12 -0
  111. pulse_engine/processor/defaults/postprocessor.py +12 -0
  112. pulse_engine/processor/defaults/preprocessor.py +12 -0
  113. pulse_engine/processor/llm/__init__.py +0 -0
  114. pulse_engine/processor/llm/provider.py +58 -0
  115. pulse_engine/processor/ocr/gemini.py +52 -0
  116. pulse_engine/processor/pipeline.py +107 -0
  117. pulse_engine/processor/postprocessor/__init__.py +0 -0
  118. pulse_engine/processor/postprocessor/embeddings.py +34 -0
  119. pulse_engine/processor/postprocessor/tasks.py +180 -0
  120. pulse_engine/processor/preprocessor/__init__.py +0 -0
  121. pulse_engine/processor/preprocessor/tasks.py +71 -0
  122. pulse_engine/processor/router.py +192 -0
  123. pulse_engine/processor/schemas.py +167 -0
  124. pulse_engine/registry.py +117 -0
  125. pulse_engine/runners/__init__.py +0 -0
  126. pulse_engine/runners/lambda_runner.py +26 -0
  127. pulse_engine/runners/pipeline_runner.py +43 -0
  128. pulse_engine/runners/prefect_pipeline_flow.py +904 -0
  129. pulse_engine/runners/prefect_runner.py +33 -0
  130. pulse_engine/s3.py +72 -0
  131. pulse_engine/secrets.py +46 -0
  132. pulse_engine/services/__init__.py +0 -0
  133. pulse_engine/services/bootstrap.py +211 -0
  134. pulse_engine/services/opensearch.py +84 -0
  135. pulse_engine/storage/__init__.py +0 -0
  136. pulse_engine/storage/connectors/__init__.py +0 -0
  137. pulse_engine/storage/connectors/athena.py +226 -0
  138. pulse_engine/storage/connectors/base.py +32 -0
  139. pulse_engine/storage/connectors/opensearch.py +344 -0
  140. pulse_engine/storage/knowledge_base.py +68 -0
  141. pulse_engine/storage/router.py +78 -0
  142. pulse_engine/storage/schemas.py +93 -0
  143. pulse_engine/testing/__init__.py +13 -0
  144. pulse_engine/testing/fixtures.py +50 -0
  145. pulse_engine/testing/mocks.py +104 -0
  146. pulse_engine/worker.py +53 -0
  147. pulse_engine-0.2.0.dist-info/METADATA +654 -0
  148. pulse_engine-0.2.0.dist-info/RECORD +150 -0
  149. pulse_engine-0.2.0.dist-info/WHEEL +4 -0
  150. pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,48 @@
1
+ """Base extractor ABC and supporting types for product extractors."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class ExtractorConfig:
12
+ """Configuration for a product extractor."""
13
+
14
+ name: str
15
+ job_type: str
16
+ schedule: str | None = None
17
+ timeout_seconds: int = 3600
18
+ max_retries: int = 3
19
+
20
+
21
+ @dataclass
22
+ class ExtractionResult:
23
+ """A single extracted item from a source."""
24
+
25
+ raw_content: str
26
+ source_id: str
27
+ source_type: str
28
+ metadata: dict[str, Any] = field(default_factory=dict)
29
+
30
+
31
+ class BaseExtractor(ABC):
32
+ """Abstract base class that all product extractors must implement."""
33
+
34
+ @abstractmethod
35
+ def get_config(self) -> ExtractorConfig:
36
+ """Return the static configuration for this extractor."""
37
+
38
+ @abstractmethod
39
+ async def extract(
40
+ self, tenant_id: str, parameters: dict[str, Any]
41
+ ) -> list[ExtractionResult]:
42
+ """Run extraction and return a list of results."""
43
+
44
+ async def on_success(self, tenant_id: str, results: list[ExtractionResult]) -> None:
45
+ """Hook called after successful extraction. Override for custom behaviour."""
46
+
47
+ async def on_failure(self, tenant_id: str, error: Exception) -> None:
48
+ """Hook called after extraction failure. Override for custom behaviour."""
@@ -0,0 +1,50 @@
1
+ """Job record ORM model."""
2
+
3
+ import uuid
4
+ from datetime import datetime
5
+ from typing import Any
6
+
7
+ import sqlalchemy as sa
8
+ from sqlalchemy.orm import Mapped, mapped_column
9
+
10
+ from pulse_engine.database import Base
11
+
12
+
13
+ class JobRecordModel(Base):
14
+ __tablename__ = "job_records"
15
+
16
+ job_id: Mapped[str] = mapped_column(
17
+ sa.String, primary_key=True, default=lambda: str(uuid.uuid4())
18
+ )
19
+ job_type: Mapped[str] = mapped_column(sa.String, nullable=False)
20
+ product: Mapped[str] = mapped_column(sa.String, nullable=False)
21
+ tenant_id: Mapped[str] = mapped_column(sa.String, nullable=False, index=True)
22
+ status: Mapped[str] = mapped_column(sa.String, nullable=False, default="pending")
23
+ priority: Mapped[str] = mapped_column(sa.String, nullable=False, default="normal")
24
+ parameters: Mapped[dict[str, Any]] = mapped_column(
25
+ sa.JSON, nullable=False, default=dict
26
+ )
27
+ orchestrator_run_id: Mapped[str | None] = mapped_column(
28
+ sa.String, nullable=True, default=None
29
+ )
30
+ callback_url: Mapped[str | None] = mapped_column(
31
+ sa.String, nullable=True, default=None
32
+ )
33
+ created_at: Mapped[datetime] = mapped_column(
34
+ sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False
35
+ )
36
+ started_at: Mapped[datetime | None] = mapped_column(
37
+ sa.DateTime(timezone=True), nullable=True, default=None
38
+ )
39
+ completed_at: Mapped[datetime | None] = mapped_column(
40
+ sa.DateTime(timezone=True), nullable=True, default=None
41
+ )
42
+ result_summary: Mapped[dict[str, Any] | None] = mapped_column(
43
+ sa.JSON, nullable=True, default=None
44
+ )
45
+ error: Mapped[str | None] = mapped_column(sa.String, nullable=True, default=None)
46
+
47
+ __table_args__ = (
48
+ sa.Index("ix_job_records_tenant_status", "tenant_id", "status"),
49
+ sa.Index("ix_job_records_tenant_product", "tenant_id", "product"),
50
+ )
@@ -0,0 +1,15 @@
1
+ from pulse_engine.config import Settings
2
+ from pulse_engine.extractor.orchestrator.base import BaseOrchestratorAdapter
3
+ from pulse_engine.extractor.orchestrator.noop import NoopAdapter
4
+
5
+
6
+ def get_orchestrator_adapter(settings: Settings) -> BaseOrchestratorAdapter:
7
+ backend = settings.pulse_orchestrator_backend.lower()
8
+ if backend == "prefect":
9
+ from pulse_engine.extractor.orchestrator.prefect import PrefectAdapter
10
+
11
+ return PrefectAdapter(
12
+ api_url=settings.prefect_api_url,
13
+ api_key=settings.prefect_api_key or None,
14
+ )
15
+ return NoopAdapter()
@@ -0,0 +1,34 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from typing import Any
4
+
5
+
6
+ @dataclass
7
+ class OrchestratorRunStatus:
8
+ run_id: str
9
+ status: str # pending, running, completed, failed, cancelled, unknown
10
+ raw_state: str | None = None
11
+
12
+
13
+ class BaseOrchestratorAdapter(ABC):
14
+ """Abstraction over any external job orchestrator (Prefect, Airflow, etc.)."""
15
+
16
+ @abstractmethod
17
+ async def get_run_status(self, run_id: str) -> OrchestratorRunStatus:
18
+ """Fetch the current status of an orchestrator run by its external ID."""
19
+
20
+ @abstractmethod
21
+ async def cancel_run(self, run_id: str) -> bool:
22
+ """Request cancellation of an orchestrator run."""
23
+
24
+ @abstractmethod
25
+ async def health_check(self) -> bool:
26
+ """Return True if the orchestrator is reachable."""
27
+
28
+ @abstractmethod
29
+ async def create_flow_run(
30
+ self,
31
+ deployment_id: str,
32
+ parameters: dict[str, Any] | None = None,
33
+ ) -> str:
34
+ """Create a new flow run for the given deployment and return its run ID."""
@@ -0,0 +1,37 @@
1
+ import uuid
2
+ from typing import Any
3
+
4
+ from pulse_engine.extractor.orchestrator.base import (
5
+ BaseOrchestratorAdapter,
6
+ OrchestratorRunStatus,
7
+ )
8
+
9
+
10
+ class NoopAdapter(BaseOrchestratorAdapter):
11
+ """No-op adapter used when no orchestrator is configured."""
12
+
13
+ async def get_run_status(self, run_id: str) -> OrchestratorRunStatus:
14
+ return OrchestratorRunStatus(run_id=run_id, status="unknown")
15
+
16
+ async def cancel_run(self, run_id: str) -> bool:
17
+ return False
18
+
19
+ async def health_check(self) -> bool:
20
+ return True
21
+
22
+ async def create_flow_run(
23
+ self,
24
+ deployment_id: str,
25
+ parameters: dict[str, Any] | None = None,
26
+ ) -> str:
27
+ return ""
28
+
29
+ async def create_or_update_deployment(
30
+ self,
31
+ name: str,
32
+ flow_entrypoint: str,
33
+ image: str,
34
+ work_pool_name: str = "products-worker-pool",
35
+ ) -> tuple[str, str]:
36
+ """Return a generated ID and the name — no actual orchestrator registration."""
37
+ return str(uuid.uuid4()), name
@@ -0,0 +1,163 @@
1
+ from __future__ import annotations
2
+
3
+ import base64
4
+ from typing import Any
5
+
6
+ import httpx
7
+ import structlog
8
+
9
+ from pulse_engine.extractor.base import BaseExtractor
10
+ from pulse_engine.extractor.orchestrator.base import (
11
+ BaseOrchestratorAdapter,
12
+ OrchestratorRunStatus,
13
+ )
14
+
15
+ logger = structlog.get_logger(__name__)
16
+
17
+ # Map Prefect flow-run state types to canonical job statuses
18
+ _PREFECT_STATE_MAP: dict[str, str] = {
19
+ "COMPLETED": "completed",
20
+ "FAILED": "failed",
21
+ "CRASHED": "failed",
22
+ "CANCELLED": "cancelled",
23
+ "CANCELLING": "cancelled",
24
+ "RUNNING": "running",
25
+ "PENDING": "pending",
26
+ "SCHEDULED": "pending",
27
+ }
28
+
29
+
30
+ class PrefectAdapter(BaseOrchestratorAdapter):
31
+ """Adapter for querying Prefect server API."""
32
+
33
+ def __init__(self, api_url: str, api_key: str | None = None) -> None:
34
+ self._api_url = api_url.rstrip("/")
35
+ headers: dict[str, str] = {"Content-Type": "application/json"}
36
+ if api_key:
37
+ encoded = base64.b64encode(api_key.encode()).decode()
38
+ headers["Authorization"] = f"Basic {encoded}"
39
+ self._client = httpx.AsyncClient(
40
+ base_url=self._api_url, headers=headers, timeout=10.0
41
+ )
42
+
43
+ async def get_run_status(self, run_id: str) -> OrchestratorRunStatus:
44
+ try:
45
+ resp = await self._client.get(f"/flow_runs/{run_id}")
46
+ resp.raise_for_status()
47
+ data = resp.json()
48
+ raw_state = data.get("state", {}).get("type", "UNKNOWN")
49
+ canonical = _PREFECT_STATE_MAP.get(raw_state.upper(), "unknown")
50
+ return OrchestratorRunStatus(
51
+ run_id=run_id, status=canonical, raw_state=raw_state
52
+ )
53
+ except Exception:
54
+ logger.warning("prefect_status_fetch_failed", run_id=run_id, exc_info=True)
55
+ return OrchestratorRunStatus(run_id=run_id, status="unknown")
56
+
57
+ async def cancel_run(self, run_id: str) -> bool:
58
+ try:
59
+ resp = await self._client.post(
60
+ f"/flow_runs/{run_id}/set_state",
61
+ json={"state": {"type": "CANCELLED"}},
62
+ )
63
+ return resp.is_success
64
+ except Exception:
65
+ logger.warning("prefect_cancel_failed", run_id=run_id, exc_info=True)
66
+ return False
67
+
68
+ async def health_check(self) -> bool:
69
+ try:
70
+ resp = await self._client.get("/health")
71
+ return resp.is_success
72
+ except Exception:
73
+ return False
74
+
75
+ async def register_extractors(
76
+ self,
77
+ extractors: list[type[BaseExtractor]],
78
+ pipeline: Any = None,
79
+ ) -> list[str]:
80
+ """Create Prefect deployments for each registered extractor.
81
+
82
+ Returns a list of deployment IDs created.
83
+ """
84
+ deployment_ids: list[str] = []
85
+ for ext_cls in extractors:
86
+ ext = ext_cls()
87
+ config = ext.get_config()
88
+ payload: dict[str, Any] = {
89
+ "name": config.name,
90
+ "flow_name": f"extract_{config.job_type}",
91
+ "parameters": {
92
+ "extractor_cls": f"{ext_cls.__module__}.{ext_cls.__qualname__}",
93
+ },
94
+ "tags": ["pulse-engine", config.job_type],
95
+ }
96
+ if config.schedule:
97
+ payload["schedule"] = {"cron": config.schedule}
98
+
99
+ try:
100
+ resp = await self._client.post("/deployments/", json=payload)
101
+ resp.raise_for_status()
102
+ dep_id = resp.json().get("id", "")
103
+ deployment_ids.append(dep_id)
104
+ logger.info(
105
+ "extractor_registered",
106
+ extractor=config.name,
107
+ job_type=config.job_type,
108
+ deployment_id=dep_id,
109
+ )
110
+ except Exception:
111
+ logger.warning(
112
+ "extractor_registration_failed",
113
+ extractor=config.name,
114
+ exc_info=True,
115
+ )
116
+
117
+ return deployment_ids
118
+
119
+ async def _get_or_create_flow_id(self, flow_name: str) -> str:
120
+ """Return the UUID of a Prefect flow, creating it if it doesn't exist."""
121
+ resp = await self._client.post("/flows/", json={"name": flow_name})
122
+ resp.raise_for_status()
123
+ return str(resp.json()["id"])
124
+
125
+ async def create_or_update_deployment(
126
+ self,
127
+ name: str,
128
+ flow_entrypoint: str,
129
+ image: str,
130
+ work_pool_name: str = "products-worker-pool",
131
+ ) -> tuple[str, str]:
132
+ """Create or update a Prefect deployment.
133
+
134
+ Returns (deployment_id, deployment_name).
135
+ """
136
+ flow_id = await self._get_or_create_flow_id(name)
137
+ payload: dict[str, Any] = {
138
+ "name": name,
139
+ "flow_id": flow_id,
140
+ "entrypoint": flow_entrypoint,
141
+ "work_pool_name": work_pool_name,
142
+ "job_variables": {"image": image},
143
+ }
144
+ resp = await self._client.post("/deployments/", json=payload)
145
+ resp.raise_for_status()
146
+ data: dict[str, Any] = resp.json()
147
+ return str(data["id"]), str(data.get("name", name))
148
+
149
+ async def create_flow_run(
150
+ self,
151
+ deployment_id: str,
152
+ parameters: dict[str, Any] | None = None,
153
+ ) -> str:
154
+ """Trigger a flow run from a deployment. Returns the flow run ID."""
155
+ payload: dict[str, Any] = {}
156
+ if parameters:
157
+ payload["parameters"] = parameters
158
+ resp = await self._client.post(
159
+ f"/deployments/{deployment_id}/create_flow_run",
160
+ json=payload,
161
+ )
162
+ resp.raise_for_status()
163
+ return str(resp.json()["id"])
@@ -0,0 +1,163 @@
1
+ """Job repository — data access layer for job records."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from datetime import UTC, datetime
7
+ from typing import Any
8
+
9
+ import sqlalchemy as sa
10
+ from sqlalchemy.engine import CursorResult
11
+ from sqlalchemy.ext.asyncio import AsyncSession
12
+
13
+ from pulse_engine.core.exceptions import BadRequestError
14
+ from pulse_engine.extractor.models import JobRecordModel
15
+
16
+ # Explicit allowlist of columns permitted for sort_by queries
17
+ _ALLOWED_SORT_FIELDS: set[str] = {
18
+ "created_at",
19
+ "status",
20
+ "product",
21
+ "job_type",
22
+ "priority",
23
+ }
24
+
25
+
26
+ class JobRepository:
27
+ def __init__(self, session: AsyncSession) -> None:
28
+ self._session = session
29
+
30
+ async def create(
31
+ self,
32
+ tenant_id: str,
33
+ job_type: str,
34
+ product: str,
35
+ priority: str = "normal",
36
+ parameters: dict[str, Any] | None = None,
37
+ orchestrator_run_id: str | None = None,
38
+ callback_url: str | None = None,
39
+ ) -> JobRecordModel:
40
+ record = JobRecordModel(
41
+ job_id=str(uuid.uuid4()),
42
+ tenant_id=tenant_id,
43
+ job_type=job_type,
44
+ product=product,
45
+ priority=priority,
46
+ parameters=parameters or {},
47
+ orchestrator_run_id=orchestrator_run_id,
48
+ callback_url=callback_url,
49
+ status="pending",
50
+ created_at=datetime.now(UTC),
51
+ )
52
+ self._session.add(record)
53
+ await self._session.commit()
54
+ await self._session.refresh(record)
55
+ return record
56
+
57
+ async def get(self, job_id: str, tenant_id: str) -> JobRecordModel | None:
58
+ stmt = sa.select(JobRecordModel).where(
59
+ JobRecordModel.job_id == job_id,
60
+ JobRecordModel.tenant_id == tenant_id,
61
+ )
62
+ result = await self._session.execute(stmt)
63
+ return result.scalar_one_or_none()
64
+
65
+ async def list_jobs(
66
+ self,
67
+ tenant_id: str,
68
+ status: str | None = None,
69
+ product: str | None = None,
70
+ job_type: str | None = None,
71
+ limit: int = 20,
72
+ offset: int = 0,
73
+ sort_by: str = "created_at",
74
+ order: str = "desc",
75
+ ) -> tuple[list[JobRecordModel], int]:
76
+ base = sa.select(JobRecordModel).where(
77
+ JobRecordModel.tenant_id == tenant_id,
78
+ )
79
+ count_base = (
80
+ sa.select(sa.func.count())
81
+ .select_from(JobRecordModel)
82
+ .where(
83
+ JobRecordModel.tenant_id == tenant_id,
84
+ )
85
+ )
86
+
87
+ if status:
88
+ base = base.where(JobRecordModel.status == status)
89
+ count_base = count_base.where(JobRecordModel.status == status)
90
+ if product:
91
+ base = base.where(JobRecordModel.product == product)
92
+ count_base = count_base.where(JobRecordModel.product == product)
93
+ if job_type:
94
+ base = base.where(JobRecordModel.job_type == job_type)
95
+ count_base = count_base.where(JobRecordModel.job_type == job_type)
96
+
97
+ # Sorting — validate against allowlist to prevent column-name injection
98
+ if sort_by not in _ALLOWED_SORT_FIELDS:
99
+ raise BadRequestError(
100
+ f"Invalid sort field: {sort_by!r}. "
101
+ f"Allowed: {', '.join(sorted(_ALLOWED_SORT_FIELDS))}"
102
+ )
103
+ sort_col = getattr(JobRecordModel, sort_by)
104
+ if order == "asc":
105
+ base = base.order_by(sa.asc(sort_col))
106
+ else:
107
+ base = base.order_by(sa.desc(sort_col))
108
+
109
+ base = base.offset(offset).limit(limit)
110
+
111
+ result = await self._session.execute(base)
112
+ jobs = list(result.scalars().all())
113
+
114
+ count_result = await self._session.execute(count_base)
115
+ total = count_result.scalar_one()
116
+
117
+ return jobs, total
118
+
119
+ async def update_status(
120
+ self,
121
+ job_id: str,
122
+ tenant_id: str,
123
+ status: str,
124
+ **fields: object,
125
+ ) -> JobRecordModel | None:
126
+ record = await self.get(job_id, tenant_id)
127
+ if record is None:
128
+ return None
129
+
130
+ record.status = status
131
+ for key, value in fields.items():
132
+ if hasattr(record, key):
133
+ setattr(record, key, value)
134
+
135
+ await self._session.commit()
136
+ await self._session.refresh(record)
137
+ return record
138
+
139
+ async def delete(self, job_id: str, tenant_id: str) -> bool:
140
+ stmt = sa.delete(JobRecordModel).where(
141
+ JobRecordModel.job_id == job_id,
142
+ JobRecordModel.tenant_id == tenant_id,
143
+ )
144
+ result: CursorResult[Any] = await self._session.execute(stmt) # type: ignore[assignment]
145
+ await self._session.commit()
146
+ return bool(result.rowcount > 0)
147
+
148
+ async def count_active(self, tenant_id: str) -> int:
149
+ stmt = (
150
+ sa.select(sa.func.count())
151
+ .select_from(JobRecordModel)
152
+ .where(
153
+ JobRecordModel.tenant_id == tenant_id,
154
+ JobRecordModel.status.in_(["pending", "running"]),
155
+ )
156
+ )
157
+ result = await self._session.execute(stmt)
158
+ return result.scalar_one()
159
+
160
+ async def get_by_id(self, job_id: str) -> JobRecordModel | None:
161
+ stmt = sa.select(JobRecordModel).where(JobRecordModel.job_id == job_id)
162
+ result = await self._session.execute(stmt)
163
+ return result.scalar_one_or_none()
@@ -0,0 +1,102 @@
1
+ """Jobs API router."""
2
+
3
+ from typing import Literal
4
+
5
+ from fastapi import APIRouter, Depends, Query
6
+ from starlette.responses import JSONResponse
7
+
8
+ from pulse_engine.core.scope import require_scope
9
+ from pulse_engine.dependencies import get_job_service
10
+ from pulse_engine.extractor.schemas import (
11
+ CreateJobRequest,
12
+ CreateJobResponse,
13
+ JobListResponse,
14
+ JobResponse,
15
+ StatusUpdateRequest,
16
+ StatusUpdateResponse,
17
+ )
18
+ from pulse_engine.extractor.service import JobService
19
+ from pulse_engine.middleware.tenant import get_tenant_id
20
+
21
+ router = APIRouter(prefix="/jobs", tags=["Jobs"])
22
+
23
+
24
+ @router.post(
25
+ "/",
26
+ response_model=CreateJobResponse,
27
+ status_code=202,
28
+ dependencies=[require_scope("jobs:trigger_next")],
29
+ )
30
+ async def register_job(
31
+ body: CreateJobRequest,
32
+ tenant_id: str = Depends(get_tenant_id),
33
+ service: JobService = Depends(get_job_service),
34
+ ) -> CreateJobResponse:
35
+ return await service.register_job(tenant_id, body)
36
+
37
+
38
+ @router.get("/{job_id}", response_model=JobResponse, status_code=200)
39
+ async def get_job(
40
+ job_id: str,
41
+ tenant_id: str = Depends(get_tenant_id),
42
+ service: JobService = Depends(get_job_service),
43
+ ) -> JobResponse:
44
+ return await service.get_job(tenant_id, job_id)
45
+
46
+
47
+ @router.get("/", response_model=JobListResponse, status_code=200)
48
+ async def list_jobs(
49
+ tenant_id: str = Depends(get_tenant_id),
50
+ service: JobService = Depends(get_job_service),
51
+ status: str | None = Query(None),
52
+ product: str | None = Query(None),
53
+ job_type: str | None = Query(None),
54
+ limit: int = Query(20, ge=1, le=100),
55
+ offset: int = Query(0, ge=0),
56
+ sort_by: str = Query("created_at"),
57
+ order: Literal["asc", "desc"] = Query("desc"),
58
+ ) -> JobListResponse:
59
+ return await service.list_jobs(
60
+ tenant_id,
61
+ status=status,
62
+ product=product,
63
+ job_type=job_type,
64
+ limit=limit,
65
+ offset=offset,
66
+ sort_by=sort_by,
67
+ order=order,
68
+ )
69
+
70
+
71
+ @router.post(
72
+ "/{job_id}/status",
73
+ response_model=StatusUpdateResponse,
74
+ status_code=200,
75
+ dependencies=[require_scope("jobs:status")],
76
+ )
77
+ async def push_status(
78
+ job_id: str,
79
+ body: StatusUpdateRequest,
80
+ tenant_id: str = Depends(get_tenant_id),
81
+ service: JobService = Depends(get_job_service),
82
+ ) -> StatusUpdateResponse:
83
+ return await service.push_status(tenant_id, job_id, body)
84
+
85
+
86
+ @router.post("/{job_id}/cancel", response_model=JobResponse, status_code=200)
87
+ async def cancel_job(
88
+ job_id: str,
89
+ tenant_id: str = Depends(get_tenant_id),
90
+ service: JobService = Depends(get_job_service),
91
+ ) -> JobResponse:
92
+ return await service.cancel_job(tenant_id, job_id)
93
+
94
+
95
+ @router.delete("/{job_id}", status_code=204)
96
+ async def delete_job(
97
+ job_id: str,
98
+ tenant_id: str = Depends(get_tenant_id),
99
+ service: JobService = Depends(get_job_service),
100
+ ) -> JSONResponse:
101
+ await service.delete_job(tenant_id, job_id)
102
+ return JSONResponse(status_code=204, content=None)