pulse-engine 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. pulse_engine/__init__.py +0 -0
  2. pulse_engine/adapters/__init__.py +58 -0
  3. pulse_engine/adapters/audio_transcription.py +167 -0
  4. pulse_engine/adapters/batcher.py +36 -0
  5. pulse_engine/adapters/digital_news.py +128 -0
  6. pulse_engine/adapters/digital_news_metadata.py +536 -0
  7. pulse_engine/adapters/exceptions.py +10 -0
  8. pulse_engine/adapters/models.py +134 -0
  9. pulse_engine/adapters/opensearch_storage.py +160 -0
  10. pulse_engine/adapters/speech_content.py +130 -0
  11. pulse_engine/adapters/speech_metadata.py +374 -0
  12. pulse_engine/adapters/twitter.py +423 -0
  13. pulse_engine/adapters/youtube_downloader.py +186 -0
  14. pulse_engine/adapters/youtube_metadata.py +261 -0
  15. pulse_engine/api/__init__.py +0 -0
  16. pulse_engine/api/v1/__init__.py +0 -0
  17. pulse_engine/api/v1/auth.py +91 -0
  18. pulse_engine/api/v1/health.py +62 -0
  19. pulse_engine/api/v1/router.py +16 -0
  20. pulse_engine/chain_recovery.py +131 -0
  21. pulse_engine/cli/__init__.py +0 -0
  22. pulse_engine/cli/main.py +169 -0
  23. pulse_engine/cli/templates/cookiecutter.json +4 -0
  24. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
  25. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
  26. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
  27. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
  28. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
  29. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
  30. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
  31. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
  32. pulse_engine/client.py +95 -0
  33. pulse_engine/config.py +157 -0
  34. pulse_engine/core/__init__.py +0 -0
  35. pulse_engine/core/error_handlers.py +64 -0
  36. pulse_engine/core/exceptions.py +67 -0
  37. pulse_engine/core/job_token.py +109 -0
  38. pulse_engine/core/logging.py +45 -0
  39. pulse_engine/core/scope.py +23 -0
  40. pulse_engine/core/security.py +130 -0
  41. pulse_engine/database.py +30 -0
  42. pulse_engine/dependencies.py +166 -0
  43. pulse_engine/deployment/__init__.py +0 -0
  44. pulse_engine/deployment/backend_deployment_repository.py +83 -0
  45. pulse_engine/deployment/backends/__init__.py +0 -0
  46. pulse_engine/deployment/backends/base.py +50 -0
  47. pulse_engine/deployment/backends/exceptions.py +20 -0
  48. pulse_engine/deployment/backends/native_lambda.py +125 -0
  49. pulse_engine/deployment/backends/prefect_ecs.py +116 -0
  50. pulse_engine/deployment/backends/prefect_k8s.py +131 -0
  51. pulse_engine/deployment/backends/registry.py +50 -0
  52. pulse_engine/deployment/infra_provisioner.py +285 -0
  53. pulse_engine/deployment/job_launcher.py +178 -0
  54. pulse_engine/deployment/models.py +48 -0
  55. pulse_engine/deployment/repository.py +54 -0
  56. pulse_engine/deployment/router.py +22 -0
  57. pulse_engine/deployment/schemas.py +18 -0
  58. pulse_engine/deployment/service.py +65 -0
  59. pulse_engine/extractor/__init__.py +0 -0
  60. pulse_engine/extractor/adapters/__init__.py +0 -0
  61. pulse_engine/extractor/base.py +48 -0
  62. pulse_engine/extractor/models.py +50 -0
  63. pulse_engine/extractor/orchestrator/__init__.py +15 -0
  64. pulse_engine/extractor/orchestrator/base.py +34 -0
  65. pulse_engine/extractor/orchestrator/noop.py +37 -0
  66. pulse_engine/extractor/orchestrator/prefect.py +163 -0
  67. pulse_engine/extractor/repository.py +163 -0
  68. pulse_engine/extractor/router.py +102 -0
  69. pulse_engine/extractor/schemas.py +93 -0
  70. pulse_engine/extractor/service.py +431 -0
  71. pulse_engine/extractor/stage_models.py +36 -0
  72. pulse_engine/extractor/stage_repository.py +109 -0
  73. pulse_engine/main.py +195 -0
  74. pulse_engine/mcp/__init__.py +0 -0
  75. pulse_engine/mcp/__main__.py +5 -0
  76. pulse_engine/mcp/server.py +108 -0
  77. pulse_engine/mcp/tools_jobs.py +159 -0
  78. pulse_engine/mcp/tools_kb.py +88 -0
  79. pulse_engine/mcp/tools_modules.py +115 -0
  80. pulse_engine/mcp/tools_pipelines.py +215 -0
  81. pulse_engine/mcp/tools_processor.py +208 -0
  82. pulse_engine/middleware/__init__.py +0 -0
  83. pulse_engine/middleware/rate_limit.py +144 -0
  84. pulse_engine/middleware/request_id.py +16 -0
  85. pulse_engine/middleware/security_headers.py +25 -0
  86. pulse_engine/middleware/tenant.py +90 -0
  87. pulse_engine/pipeline/__init__.py +0 -0
  88. pulse_engine/pipeline/config_parser.py +148 -0
  89. pulse_engine/pipeline/expression.py +268 -0
  90. pulse_engine/pipeline/models.py +98 -0
  91. pulse_engine/pipeline/repositories.py +224 -0
  92. pulse_engine/pipeline/router_modules.py +66 -0
  93. pulse_engine/pipeline/router_pipelines.py +198 -0
  94. pulse_engine/pipeline/schemas.py +200 -0
  95. pulse_engine/pipeline/service.py +250 -0
  96. pulse_engine/pipeline/translators/__init__.py +44 -0
  97. pulse_engine/pipeline/translators/airflow_status.py +11 -0
  98. pulse_engine/pipeline/translators/airflow_translator.py +22 -0
  99. pulse_engine/pipeline/translators/base.py +42 -0
  100. pulse_engine/pipeline/translators/prefect_status.py +93 -0
  101. pulse_engine/pipeline/translators/prefect_translator.py +195 -0
  102. pulse_engine/processor/__init__.py +0 -0
  103. pulse_engine/processor/base.py +36 -0
  104. pulse_engine/processor/core/__init__.py +0 -0
  105. pulse_engine/processor/core/analysis.py +148 -0
  106. pulse_engine/processor/core/chunking.py +158 -0
  107. pulse_engine/processor/core/prompts.py +340 -0
  108. pulse_engine/processor/core/topic_splitter.py +105 -0
  109. pulse_engine/processor/defaults/__init__.py +11 -0
  110. pulse_engine/processor/defaults/core_processor.py +12 -0
  111. pulse_engine/processor/defaults/postprocessor.py +12 -0
  112. pulse_engine/processor/defaults/preprocessor.py +12 -0
  113. pulse_engine/processor/llm/__init__.py +0 -0
  114. pulse_engine/processor/llm/provider.py +58 -0
  115. pulse_engine/processor/ocr/gemini.py +52 -0
  116. pulse_engine/processor/pipeline.py +107 -0
  117. pulse_engine/processor/postprocessor/__init__.py +0 -0
  118. pulse_engine/processor/postprocessor/embeddings.py +34 -0
  119. pulse_engine/processor/postprocessor/tasks.py +180 -0
  120. pulse_engine/processor/preprocessor/__init__.py +0 -0
  121. pulse_engine/processor/preprocessor/tasks.py +71 -0
  122. pulse_engine/processor/router.py +192 -0
  123. pulse_engine/processor/schemas.py +167 -0
  124. pulse_engine/registry.py +117 -0
  125. pulse_engine/runners/__init__.py +0 -0
  126. pulse_engine/runners/lambda_runner.py +26 -0
  127. pulse_engine/runners/pipeline_runner.py +43 -0
  128. pulse_engine/runners/prefect_pipeline_flow.py +904 -0
  129. pulse_engine/runners/prefect_runner.py +33 -0
  130. pulse_engine/s3.py +72 -0
  131. pulse_engine/secrets.py +46 -0
  132. pulse_engine/services/__init__.py +0 -0
  133. pulse_engine/services/bootstrap.py +211 -0
  134. pulse_engine/services/opensearch.py +84 -0
  135. pulse_engine/storage/__init__.py +0 -0
  136. pulse_engine/storage/connectors/__init__.py +0 -0
  137. pulse_engine/storage/connectors/athena.py +226 -0
  138. pulse_engine/storage/connectors/base.py +32 -0
  139. pulse_engine/storage/connectors/opensearch.py +344 -0
  140. pulse_engine/storage/knowledge_base.py +68 -0
  141. pulse_engine/storage/router.py +78 -0
  142. pulse_engine/storage/schemas.py +93 -0
  143. pulse_engine/testing/__init__.py +13 -0
  144. pulse_engine/testing/fixtures.py +50 -0
  145. pulse_engine/testing/mocks.py +104 -0
  146. pulse_engine/worker.py +53 -0
  147. pulse_engine-0.2.0.dist-info/METADATA +654 -0
  148. pulse_engine-0.2.0.dist-info/RECORD +150 -0
  149. pulse_engine-0.2.0.dist-info/WHEEL +4 -0
  150. pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,169 @@
1
+ """Pulse Engine CLI — scaffold, validate, and run products."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import importlib
6
+ from pathlib import Path
7
+ from typing import Annotated
8
+
9
+ import typer
10
+
11
+ app = typer.Typer(name="pulse", help="Pulse Engine CLI", no_args_is_help=True)
12
+
13
+
14
+ # ---------------------------------------------------------------------------
15
+ # pulse init
16
+ # ---------------------------------------------------------------------------
17
+
18
+
19
+ @app.command()
20
+ def init(
21
+ product_name: Annotated[
22
+ str, typer.Argument(help="Name of the new product (e.g. 'political')")
23
+ ],
24
+ output_dir: Annotated[
25
+ Path, typer.Option("--output-dir", "-o", help="Where to scaffold")
26
+ ] = Path("."),
27
+ ) -> None:
28
+ """Scaffold a new Pulse product from the built-in template."""
29
+ from cookiecutter.main import cookiecutter
30
+
31
+ template_dir = str(Path(__file__).parent / "templates")
32
+
33
+ # Normalise name: lowercase, underscores for Python
34
+ slug = product_name.lower().replace("-", "_").replace(" ", "_")
35
+ display = slug.replace("_", "-")
36
+
37
+ result = cookiecutter(
38
+ template_dir,
39
+ output_dir=str(output_dir),
40
+ no_input=True,
41
+ extra_context={
42
+ "product_name": display,
43
+ "product_slug": slug,
44
+ },
45
+ )
46
+ typer.echo(f"Scaffolded product at {result}")
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # pulse validate
51
+ # ---------------------------------------------------------------------------
52
+
53
+
54
+ @app.command()
55
+ def validate(
56
+ module: Annotated[
57
+ str,
58
+ typer.Argument(
59
+ help="Dotted import path to the manifest (e.g. 'pulse_political.manifest')"
60
+ ),
61
+ ] = "",
62
+ ) -> None:
63
+ """Import a product manifest and run validation checks."""
64
+ from pulse_engine.registry import discover_products, validate_manifest
65
+
66
+ manifests = []
67
+
68
+ if module:
69
+ mod = importlib.import_module(module)
70
+ manifest = getattr(mod, "manifest", None)
71
+ if manifest is None:
72
+ typer.echo(f"No 'manifest' attribute found in {module}", err=True)
73
+ raise typer.Exit(code=1)
74
+ manifests = [manifest]
75
+ else:
76
+ manifests = discover_products()
77
+ if not manifests:
78
+ typer.echo("No products discovered via entry points.", err=True)
79
+ raise typer.Exit(code=1)
80
+
81
+ all_ok = True
82
+ for m in manifests:
83
+ errors = validate_manifest(m)
84
+ if errors:
85
+ all_ok = False
86
+ typer.echo(f"[FAIL] {m.name} v{m.version}:")
87
+ for e in errors:
88
+ typer.echo(f" - {e}")
89
+ else:
90
+ typer.echo(f"[OK] {m.name} v{m.version}")
91
+
92
+ if not all_ok:
93
+ raise typer.Exit(code=1)
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # pulse run
98
+ # ---------------------------------------------------------------------------
99
+
100
+
101
+ @app.command()
102
+ def run(
103
+ host: Annotated[str, typer.Option(help="Bind host")] = "0.0.0.0",
104
+ port: Annotated[int, typer.Option(help="Bind port")] = 8000,
105
+ reload: Annotated[bool, typer.Option(help="Enable auto-reload")] = False,
106
+ ) -> None:
107
+ """Discover the product manifest and start the FastAPI server."""
108
+ import uvicorn
109
+
110
+ from pulse_engine.registry import discover_products
111
+
112
+ products = discover_products()
113
+ manifest = products[0] if products else None
114
+
115
+ if manifest:
116
+ typer.echo(f"Starting {manifest.name} v{manifest.version}")
117
+
118
+ # We use a factory string so uvicorn can import and call create_app
119
+ uvicorn.run(
120
+ "pulse_engine.main:create_app",
121
+ factory=True,
122
+ host=host,
123
+ port=port,
124
+ reload=reload,
125
+ )
126
+
127
+
128
+ # ---------------------------------------------------------------------------
129
+ # pulse run-worker
130
+ # ---------------------------------------------------------------------------
131
+
132
+
133
+ @app.command()
134
+ def run_worker() -> None:
135
+ """Discover the product manifest and start the Celery worker."""
136
+ from pulse_engine.config import get_settings
137
+ from pulse_engine.registry import discover_products
138
+ from pulse_engine.worker import create_celery_app
139
+
140
+ products = discover_products()
141
+ manifest = products[0] if products else None
142
+
143
+ settings = get_settings()
144
+ celery_app = create_celery_app(settings, manifest)
145
+
146
+ celery_app.worker_main(["worker", "--loglevel=info"])
147
+
148
+
149
+ # ---------------------------------------------------------------------------
150
+ # pulse run-mcp
151
+ # ---------------------------------------------------------------------------
152
+
153
+
154
+ @app.command()
155
+ def run_mcp() -> None:
156
+ """Discover the product manifest and start the MCP server."""
157
+ import asyncio
158
+
159
+ from pulse_engine.mcp.server import run_mcp_server
160
+ from pulse_engine.registry import discover_products
161
+
162
+ products = discover_products()
163
+ manifest = products[0] if products else None
164
+
165
+ asyncio.run(run_mcp_server(manifest))
166
+
167
+
168
+ if __name__ == "__main__":
169
+ app()
@@ -0,0 +1,4 @@
1
+ {
2
+ "product_name": "my-product",
3
+ "product_slug": "my_product"
4
+ }
@@ -0,0 +1,13 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .venv/
7
+ .env
8
+ .mypy_cache/
9
+ .pytest_cache/
10
+ .ruff_cache/
11
+ .coverage
12
+ htmlcov/
13
+ *.egg
@@ -0,0 +1,32 @@
1
+ FROM python:3.12-slim AS builder
2
+
3
+ WORKDIR /build
4
+
5
+ RUN pip install --no-cache-dir poetry==1.8.5
6
+
7
+ COPY pyproject.toml ./
8
+ RUN poetry config virtualenvs.in-project true && \
9
+ poetry install --only main --no-interaction --no-ansi
10
+
11
+ COPY src/ src/
12
+
13
+ FROM python:3.12-slim AS runtime
14
+
15
+ RUN groupadd -g 1001 appuser && \
16
+ useradd -u 1001 -g appuser -s /bin/false -m appuser
17
+
18
+ WORKDIR /app
19
+
20
+ COPY --from=builder /build/.venv .venv
21
+ COPY --from=builder /build/src src
22
+
23
+ ENV PATH="/app/.venv/bin:$PATH"
24
+
25
+ USER appuser
26
+
27
+ EXPOSE 8000
28
+
29
+ HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
30
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/v1/health')"
31
+
32
+ CMD ["pulse", "run", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,17 @@
1
+ name: {{cookiecutter.product_name}}
2
+ version: "1.0"
3
+ modules:
4
+ - name: extractor
5
+ compute: ecs
6
+ retries: 2
7
+ timeout: 900
8
+ - name: storage
9
+ compute: ecs
10
+
11
+ dag:
12
+ - step: extract
13
+ module: extractor
14
+ - step: store
15
+ module: storage
16
+ depends_on:
17
+ - step: extract
@@ -0,0 +1,25 @@
1
+ [tool.poetry]
2
+ name = "pulse-{{cookiecutter.product_name}}"
3
+ version = "0.1.0"
4
+ description = "Pulse product: {{cookiecutter.product_name}}"
5
+ authors = ["Pulse Team"]
6
+ packages = [{include = "pulse_{{cookiecutter.product_slug}}", from = "src"}]
7
+
8
+ [tool.poetry.dependencies]
9
+ python = ">=3.11,<3.13"
10
+ pulse-engine = "*"
11
+
12
+ [tool.poetry.group.dev.dependencies]
13
+ pytest = "^8.3.0"
14
+ pytest-asyncio = "^0.25.0"
15
+
16
+ [tool.poetry.plugins."pulse_engine.products"]
17
+ {{cookiecutter.product_slug}} = "pulse_{{cookiecutter.product_slug}}:manifest"
18
+
19
+ [build-system]
20
+ requires = ["poetry-core"]
21
+ build-backend = "poetry.core.masonry.api"
22
+
23
+ [tool.pytest.ini_options]
24
+ asyncio_mode = "auto"
25
+ testpaths = ["tests"]
@@ -0,0 +1,8 @@
1
+ """Pulse product: {{cookiecutter.product_name}}."""
2
+
3
+ from pulse_engine.registry import ProductManifest
4
+
5
+ manifest = ProductManifest(
6
+ name="{{cookiecutter.product_name}}",
7
+ version="0.1.0",
8
+ )
@@ -0,0 +1,15 @@
1
+ """Tests for the product manifest."""
2
+
3
+ from pulse_engine.registry import validate_manifest
4
+
5
+ from pulse_{{cookiecutter.product_slug}} import manifest
6
+
7
+
8
+ def test_manifest_is_valid() -> None:
9
+ errors = validate_manifest(manifest)
10
+ assert errors == [], f"Manifest validation failed: {errors}"
11
+
12
+
13
+ def test_manifest_metadata() -> None:
14
+ assert manifest.name == "{{cookiecutter.product_name}}"
15
+ assert manifest.version == "0.1.0"
pulse_engine/client.py ADDED
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, cast
4
+
5
+ import httpx
6
+ import structlog
7
+ from jose import jwt as jose_jwt
8
+
9
+ logger = structlog.get_logger(__name__)
10
+
11
+
12
+ class PulseEngineClient:
13
+ """Client used inside container stages to call Pulse Engine REST API."""
14
+
15
+ def __init__(
16
+ self,
17
+ base_url: str,
18
+ token: str,
19
+ job_id: str,
20
+ timeout: float = 30.0,
21
+ ) -> None:
22
+ self._base_url = base_url.rstrip("/")
23
+ self._job_id = job_id
24
+ # Decode token without verification to read backend claims
25
+ # (signature was already verified by Pulse Engine before injecting the token)
26
+ try:
27
+ self._token_claims: dict[str, Any] = jose_jwt.get_unverified_claims(token)
28
+ except Exception:
29
+ self._token_claims = {}
30
+ self._client = httpx.AsyncClient(
31
+ base_url=self._base_url,
32
+ headers={
33
+ "Authorization": f"Bearer {token}",
34
+ "Content-Type": "application/json",
35
+ },
36
+ timeout=timeout,
37
+ )
38
+
39
+ @property
40
+ def job_id(self) -> str:
41
+ return self._job_id
42
+
43
+ async def report_status(
44
+ self,
45
+ stage: str,
46
+ status: str,
47
+ error: str | None = None,
48
+ result_summary: dict[str, Any] | None = None,
49
+ ) -> dict[str, Any]:
50
+ """Report stage status back to Pulse Engine."""
51
+ payload: dict[str, Any] = {"stage": stage, "status": status}
52
+ if error is not None:
53
+ payload["error"] = error
54
+ if result_summary is not None:
55
+ payload["result_summary"] = result_summary
56
+ resp = await self._client.post(
57
+ f"/api/v1/jobs/{self._job_id}/status",
58
+ json=payload,
59
+ )
60
+ resp.raise_for_status()
61
+ return cast(dict[str, Any], resp.json())
62
+
63
+ async def store_documents(self, documents: list[dict[str, Any]]) -> dict[str, Any]:
64
+ """Store processed documents in the knowledge base."""
65
+ resp = await self._client.post(
66
+ "/api/v1/kb/documents",
67
+ json={"documents": documents},
68
+ )
69
+ resp.raise_for_status()
70
+ return cast(dict[str, Any], resp.json())
71
+
72
+ async def trigger_next_stage(
73
+ self,
74
+ product: str,
75
+ next_stage: str,
76
+ chain: bool = True,
77
+ config: dict[str, Any] | None = None,
78
+ ) -> dict[str, Any]:
79
+ """Trigger the next stage, forwarding the same orchestrator+compute backend."""
80
+ payload: dict[str, Any] = {
81
+ "product": product,
82
+ "stage": next_stage,
83
+ "chain": chain,
84
+ "job_id": self._job_id,
85
+ "orchestrator": self._token_claims.get("orchestrator", "prefect"),
86
+ "compute": self._token_claims.get("compute", "ecs"),
87
+ }
88
+ if config:
89
+ payload["config"] = config
90
+ resp = await self._client.post("/api/v1/jobs/", json=payload)
91
+ resp.raise_for_status()
92
+ return cast(dict[str, Any], resp.json())
93
+
94
+ async def close(self) -> None:
95
+ await self._client.aclose()
pulse_engine/config.py ADDED
@@ -0,0 +1,157 @@
1
+ from functools import lru_cache
2
+
3
+ from pydantic_settings import BaseSettings
4
+
5
+
6
+ class Settings(BaseSettings):
7
+ app_env: str = "development"
8
+ app_version: str = "0.1.0"
9
+ debug: bool = False
10
+ log_level: str = "INFO"
11
+
12
+ # AWS core credentials
13
+ aws_access_key_id: str = ""
14
+ aws_secret_access_key: str = ""
15
+ aws_region: str = "ap-south-1"
16
+ cognito_user_pool_id: str = ""
17
+ cognito_app_client_id: str = ""
18
+ cognito_app_client_secret: str = ""
19
+
20
+ # OpenSearch (hosted AWS)
21
+ opensearch_url: str = ""
22
+ opensearch_username: str = ""
23
+ opensearch_password: str = ""
24
+ opensearch_use_ssl: bool = True
25
+ opensearch_verify_certs: bool = True
26
+ opensearch_index_prefix: str = "pulse_kb"
27
+
28
+ # Prefect
29
+ prefect_api_url: str = ""
30
+ prefect_database_url: str = ""
31
+
32
+ # Athena (separate credentials — product specifies which database)
33
+ athena_aws_access_key_id: str = ""
34
+ athena_aws_secret_access_key: str = ""
35
+ athena_output_location: str = ""
36
+ athena_workgroup: str = "primary"
37
+ athena_query_timeout_seconds: int = 60
38
+ embedding_dimension: int = 1536
39
+
40
+ # Database (app)
41
+ database_url: str = ""
42
+
43
+ # Redis / Celery
44
+ redis_url: str = "redis://localhost:6379/0"
45
+ celery_broker_url: str = "" # defaults to redis_url if empty
46
+ celery_result_backend: str = "" # defaults to redis_url if empty
47
+
48
+ # Orchestrator
49
+ pulse_orchestrator_backend: str = "none"
50
+ prefect_api_key: str = ""
51
+
52
+ # Prefect ECS backend
53
+ prefect_ecs_work_pool_name: str = "pipeline-ecs"
54
+ # ECR image used to run pipeline_flow in ECS work pool
55
+ prefect_engine_image: str = ""
56
+ # Override compute backend for all pipeline steps ("ecs", "local", etc.)
57
+ pipeline_default_compute: str = ""
58
+
59
+ # Prefect Lambda backend
60
+ prefect_lambda_work_pool_name: str = "lambda-worker-pool"
61
+ prefect_lambda_function_name_template: str = "{product}-{stage}"
62
+
63
+ # Prefect Kubernetes backend
64
+ prefect_k8s_work_pool_name: str = "k8s-worker-pool"
65
+ prefect_k8s_namespace: str = "pulse-jobs"
66
+ prefect_k8s_default_cpu: str = "500m"
67
+ prefect_k8s_default_memory: str = "1Gi"
68
+ pulse_max_concurrent_jobs_per_tenant: int = 10
69
+ pulse_embedding_model: str = "text-embedding-3-small"
70
+ pulse_embedding_provider: str = "openai"
71
+ pulse_openai_embedding_model: str = "text-embedding-3-small"
72
+ pulse_openai_api_key: str = "" # falls back to pulse_llm_api_key
73
+
74
+ # LLM
75
+ pulse_llm_provider: str = "openai"
76
+ pulse_llm_model: str = "gpt-4o-mini"
77
+ pulse_llm_api_key: str = ""
78
+ pulse_llm_temperature: float = 0.0
79
+ pulse_default_chunk_size: int = 512
80
+ pulse_default_chunk_strategy: str = "token_count"
81
+ pulse_job_callback_timeout: int = 10
82
+ pulse_dedup_similarity_threshold: float = 0.95
83
+
84
+ # Pipeline separation
85
+ pulse_engine_url: str = "" # public URL containers use to call back
86
+ pulse_job_token_secret: str = "" # HMAC secret for job-scoped JWTs
87
+ pulse_s3_bucket: str = "" # S3 bucket for inter-stage data
88
+ pulse_chain_grace_period_seconds: int = (
89
+ 300 # seconds before auto-triggering next stage
90
+ )
91
+
92
+ # Pipeline infrastructure — populated from Terraform outputs
93
+ pipeline_task_definition: str = "pulse-pipeline-step" # ECS task def family
94
+ pipeline_cluster_name: str = "" # ECS cluster for pipeline steps
95
+ pipeline_execution_role_arn: str = "" # ECS task execution role
96
+ pipeline_task_role_arn: str = "" # ECS task role (S3, Lambda, ECS perms)
97
+ pipeline_log_group: str = "" # CloudWatch log group for ECS steps
98
+ pipeline_subnets: str = "" # comma-separated private subnet IDs
99
+ pipeline_security_groups: str = "" # comma-separated security group IDs
100
+ lambda_execution_role_arn: str = "" # Lambda execution role
101
+ lambda_subnets: str = "" # comma-separated subnet IDs for Lambda VPC
102
+ lambda_security_groups: str = "" # comma-separated SG IDs for Lambda
103
+ lambda_log_group: str = "" # CloudWatch log group for Lambda steps
104
+
105
+ # Local dev auth bypass — set this to use MockTokenVerifier instead of Cognito
106
+ mock_jwt_secret: str = ""
107
+
108
+ mcp_transport: str = "sse"
109
+ mcp_sse_host: str = "127.0.0.1"
110
+ mcp_sse_port: int = 8001
111
+
112
+ @property
113
+ def effective_celery_broker_url(self) -> str:
114
+ return self.celery_broker_url or self.redis_url
115
+
116
+ @property
117
+ def effective_celery_result_backend(self) -> str:
118
+ return self.celery_result_backend or self.redis_url
119
+
120
+ @property
121
+ def cognito_jwks_url(self) -> str:
122
+ return (
123
+ f"https://cognito-idp.{self.aws_region}.amazonaws.com/"
124
+ f"{self.cognito_user_pool_id}/.well-known/jwks.json"
125
+ )
126
+
127
+ @property
128
+ def cognito_issuer(self) -> str:
129
+ return (
130
+ f"https://cognito-idp.{self.aws_region}.amazonaws.com/"
131
+ f"{self.cognito_user_pool_id}"
132
+ )
133
+
134
+ @property
135
+ def pipeline_subnet_list(self) -> list[str]:
136
+ return [s.strip() for s in self.pipeline_subnets.split(",") if s.strip()]
137
+
138
+ @property
139
+ def pipeline_sg_list(self) -> list[str]:
140
+ return [
141
+ s.strip() for s in self.pipeline_security_groups.split(",") if s.strip()
142
+ ]
143
+
144
+ @property
145
+ def lambda_subnet_list(self) -> list[str]:
146
+ return [s.strip() for s in self.lambda_subnets.split(",") if s.strip()]
147
+
148
+ @property
149
+ def lambda_sg_list(self) -> list[str]:
150
+ return [s.strip() for s in self.lambda_security_groups.split(",") if s.strip()]
151
+
152
+ model_config = {"env_file": ".env", "extra": "ignore"}
153
+
154
+
155
+ @lru_cache
156
+ def get_settings() -> Settings:
157
+ return Settings()
File without changes
@@ -0,0 +1,64 @@
1
+ import structlog
2
+ from fastapi import FastAPI, Request
3
+ from fastapi.exceptions import RequestValidationError
4
+ from fastapi.responses import JSONResponse
5
+
6
+ from pulse_engine.core.exceptions import AppError
7
+
8
+ logger = structlog.get_logger()
9
+
10
+
11
+ def _get_request_id(request: Request) -> str:
12
+ return getattr(request.state, "request_id", "unknown")
13
+
14
+
15
+ async def app_error_handler(request: Request, exc: AppError) -> JSONResponse:
16
+ request_id = _get_request_id(request)
17
+ logger.warning(
18
+ "app_error",
19
+ error=exc.message,
20
+ status_code=exc.status_code,
21
+ request_id=request_id,
22
+ **exc.context,
23
+ )
24
+ return JSONResponse(
25
+ status_code=exc.status_code,
26
+ content={
27
+ "success": False,
28
+ "error": exc.message,
29
+ "request_id": request_id,
30
+ },
31
+ )
32
+
33
+
34
+ async def validation_error_handler(
35
+ request: Request, exc: RequestValidationError
36
+ ) -> JSONResponse:
37
+ request_id = _get_request_id(request)
38
+ return JSONResponse(
39
+ status_code=422,
40
+ content={
41
+ "success": False,
42
+ "error": "Validation error",
43
+ "request_id": request_id,
44
+ },
45
+ )
46
+
47
+
48
+ async def catch_all_handler(request: Request, exc: Exception) -> JSONResponse:
49
+ request_id = _get_request_id(request)
50
+ logger.exception("unhandled_exception", request_id=request_id)
51
+ return JSONResponse(
52
+ status_code=500,
53
+ content={
54
+ "success": False,
55
+ "error": "Internal server error",
56
+ "request_id": request_id,
57
+ },
58
+ )
59
+
60
+
61
+ def register_exception_handlers(app: FastAPI) -> None:
62
+ app.add_exception_handler(AppError, app_error_handler) # type: ignore[arg-type]
63
+ app.add_exception_handler(RequestValidationError, validation_error_handler) # type: ignore[arg-type]
64
+ app.add_exception_handler(Exception, catch_all_handler)
@@ -0,0 +1,67 @@
1
+ from typing import Any
2
+
3
+
4
+ class AppError(Exception):
5
+ status_code: int = 500
6
+ is_operational: bool = True
7
+
8
+ def __init__(self, message: str = "Internal server error", **context: Any) -> None:
9
+ self.message = message
10
+ self.context = context
11
+ super().__init__(message)
12
+
13
+
14
+ class BadRequestError(AppError):
15
+ status_code = 400
16
+
17
+ def __init__(self, message: str = "Bad request", **context: Any) -> None:
18
+ super().__init__(message, **context)
19
+
20
+
21
+ class UnauthorizedError(AppError):
22
+ status_code = 401
23
+
24
+ def __init__(self, message: str = "Unauthorized", **context: Any) -> None:
25
+ super().__init__(message, **context)
26
+
27
+
28
+ class ForbiddenError(AppError):
29
+ status_code = 403
30
+
31
+ def __init__(self, message: str = "Forbidden", **context: Any) -> None:
32
+ super().__init__(message, **context)
33
+
34
+
35
+ class NotFoundError(AppError):
36
+ status_code = 404
37
+
38
+ def __init__(self, message: str = "Not found", **context: Any) -> None:
39
+ super().__init__(message, **context)
40
+
41
+
42
+ class ConflictError(AppError):
43
+ status_code = 409
44
+
45
+ def __init__(self, message: str = "Conflict", **context: Any) -> None:
46
+ super().__init__(message, **context)
47
+
48
+
49
+ class UnprocessableEntityError(AppError):
50
+ status_code = 422
51
+
52
+ def __init__(self, message: str = "Unprocessable entity", **context: Any) -> None:
53
+ super().__init__(message, **context)
54
+
55
+
56
+ class TooManyRequestsError(AppError):
57
+ status_code = 429
58
+
59
+ def __init__(self, message: str = "Too many requests", **context: Any) -> None:
60
+ super().__init__(message, **context)
61
+
62
+
63
+ class ServiceUnavailableError(AppError):
64
+ status_code = 503
65
+
66
+ def __init__(self, message: str = "Service unavailable", **context: Any) -> None:
67
+ super().__init__(message, **context)