pulse-engine 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulse_engine/__init__.py +0 -0
- pulse_engine/adapters/__init__.py +58 -0
- pulse_engine/adapters/audio_transcription.py +167 -0
- pulse_engine/adapters/batcher.py +36 -0
- pulse_engine/adapters/digital_news.py +128 -0
- pulse_engine/adapters/digital_news_metadata.py +536 -0
- pulse_engine/adapters/exceptions.py +10 -0
- pulse_engine/adapters/models.py +134 -0
- pulse_engine/adapters/opensearch_storage.py +160 -0
- pulse_engine/adapters/speech_content.py +130 -0
- pulse_engine/adapters/speech_metadata.py +374 -0
- pulse_engine/adapters/twitter.py +423 -0
- pulse_engine/adapters/youtube_downloader.py +186 -0
- pulse_engine/adapters/youtube_metadata.py +261 -0
- pulse_engine/api/__init__.py +0 -0
- pulse_engine/api/v1/__init__.py +0 -0
- pulse_engine/api/v1/auth.py +91 -0
- pulse_engine/api/v1/health.py +62 -0
- pulse_engine/api/v1/router.py +16 -0
- pulse_engine/chain_recovery.py +131 -0
- pulse_engine/cli/__init__.py +0 -0
- pulse_engine/cli/main.py +169 -0
- pulse_engine/cli/templates/cookiecutter.json +4 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
- pulse_engine/client.py +95 -0
- pulse_engine/config.py +157 -0
- pulse_engine/core/__init__.py +0 -0
- pulse_engine/core/error_handlers.py +64 -0
- pulse_engine/core/exceptions.py +67 -0
- pulse_engine/core/job_token.py +109 -0
- pulse_engine/core/logging.py +45 -0
- pulse_engine/core/scope.py +23 -0
- pulse_engine/core/security.py +130 -0
- pulse_engine/database.py +30 -0
- pulse_engine/dependencies.py +166 -0
- pulse_engine/deployment/__init__.py +0 -0
- pulse_engine/deployment/backend_deployment_repository.py +83 -0
- pulse_engine/deployment/backends/__init__.py +0 -0
- pulse_engine/deployment/backends/base.py +50 -0
- pulse_engine/deployment/backends/exceptions.py +20 -0
- pulse_engine/deployment/backends/native_lambda.py +125 -0
- pulse_engine/deployment/backends/prefect_ecs.py +116 -0
- pulse_engine/deployment/backends/prefect_k8s.py +131 -0
- pulse_engine/deployment/backends/registry.py +50 -0
- pulse_engine/deployment/infra_provisioner.py +285 -0
- pulse_engine/deployment/job_launcher.py +178 -0
- pulse_engine/deployment/models.py +48 -0
- pulse_engine/deployment/repository.py +54 -0
- pulse_engine/deployment/router.py +22 -0
- pulse_engine/deployment/schemas.py +18 -0
- pulse_engine/deployment/service.py +65 -0
- pulse_engine/extractor/__init__.py +0 -0
- pulse_engine/extractor/adapters/__init__.py +0 -0
- pulse_engine/extractor/base.py +48 -0
- pulse_engine/extractor/models.py +50 -0
- pulse_engine/extractor/orchestrator/__init__.py +15 -0
- pulse_engine/extractor/orchestrator/base.py +34 -0
- pulse_engine/extractor/orchestrator/noop.py +37 -0
- pulse_engine/extractor/orchestrator/prefect.py +163 -0
- pulse_engine/extractor/repository.py +163 -0
- pulse_engine/extractor/router.py +102 -0
- pulse_engine/extractor/schemas.py +93 -0
- pulse_engine/extractor/service.py +431 -0
- pulse_engine/extractor/stage_models.py +36 -0
- pulse_engine/extractor/stage_repository.py +109 -0
- pulse_engine/main.py +195 -0
- pulse_engine/mcp/__init__.py +0 -0
- pulse_engine/mcp/__main__.py +5 -0
- pulse_engine/mcp/server.py +108 -0
- pulse_engine/mcp/tools_jobs.py +159 -0
- pulse_engine/mcp/tools_kb.py +88 -0
- pulse_engine/mcp/tools_modules.py +115 -0
- pulse_engine/mcp/tools_pipelines.py +215 -0
- pulse_engine/mcp/tools_processor.py +208 -0
- pulse_engine/middleware/__init__.py +0 -0
- pulse_engine/middleware/rate_limit.py +144 -0
- pulse_engine/middleware/request_id.py +16 -0
- pulse_engine/middleware/security_headers.py +25 -0
- pulse_engine/middleware/tenant.py +90 -0
- pulse_engine/pipeline/__init__.py +0 -0
- pulse_engine/pipeline/config_parser.py +148 -0
- pulse_engine/pipeline/expression.py +268 -0
- pulse_engine/pipeline/models.py +98 -0
- pulse_engine/pipeline/repositories.py +224 -0
- pulse_engine/pipeline/router_modules.py +66 -0
- pulse_engine/pipeline/router_pipelines.py +198 -0
- pulse_engine/pipeline/schemas.py +200 -0
- pulse_engine/pipeline/service.py +250 -0
- pulse_engine/pipeline/translators/__init__.py +44 -0
- pulse_engine/pipeline/translators/airflow_status.py +11 -0
- pulse_engine/pipeline/translators/airflow_translator.py +22 -0
- pulse_engine/pipeline/translators/base.py +42 -0
- pulse_engine/pipeline/translators/prefect_status.py +93 -0
- pulse_engine/pipeline/translators/prefect_translator.py +195 -0
- pulse_engine/processor/__init__.py +0 -0
- pulse_engine/processor/base.py +36 -0
- pulse_engine/processor/core/__init__.py +0 -0
- pulse_engine/processor/core/analysis.py +148 -0
- pulse_engine/processor/core/chunking.py +158 -0
- pulse_engine/processor/core/prompts.py +340 -0
- pulse_engine/processor/core/topic_splitter.py +105 -0
- pulse_engine/processor/defaults/__init__.py +11 -0
- pulse_engine/processor/defaults/core_processor.py +12 -0
- pulse_engine/processor/defaults/postprocessor.py +12 -0
- pulse_engine/processor/defaults/preprocessor.py +12 -0
- pulse_engine/processor/llm/__init__.py +0 -0
- pulse_engine/processor/llm/provider.py +58 -0
- pulse_engine/processor/ocr/gemini.py +52 -0
- pulse_engine/processor/pipeline.py +107 -0
- pulse_engine/processor/postprocessor/__init__.py +0 -0
- pulse_engine/processor/postprocessor/embeddings.py +34 -0
- pulse_engine/processor/postprocessor/tasks.py +180 -0
- pulse_engine/processor/preprocessor/__init__.py +0 -0
- pulse_engine/processor/preprocessor/tasks.py +71 -0
- pulse_engine/processor/router.py +192 -0
- pulse_engine/processor/schemas.py +167 -0
- pulse_engine/registry.py +117 -0
- pulse_engine/runners/__init__.py +0 -0
- pulse_engine/runners/lambda_runner.py +26 -0
- pulse_engine/runners/pipeline_runner.py +43 -0
- pulse_engine/runners/prefect_pipeline_flow.py +904 -0
- pulse_engine/runners/prefect_runner.py +33 -0
- pulse_engine/s3.py +72 -0
- pulse_engine/secrets.py +46 -0
- pulse_engine/services/__init__.py +0 -0
- pulse_engine/services/bootstrap.py +211 -0
- pulse_engine/services/opensearch.py +84 -0
- pulse_engine/storage/__init__.py +0 -0
- pulse_engine/storage/connectors/__init__.py +0 -0
- pulse_engine/storage/connectors/athena.py +226 -0
- pulse_engine/storage/connectors/base.py +32 -0
- pulse_engine/storage/connectors/opensearch.py +344 -0
- pulse_engine/storage/knowledge_base.py +68 -0
- pulse_engine/storage/router.py +78 -0
- pulse_engine/storage/schemas.py +93 -0
- pulse_engine/testing/__init__.py +13 -0
- pulse_engine/testing/fixtures.py +50 -0
- pulse_engine/testing/mocks.py +104 -0
- pulse_engine/worker.py +53 -0
- pulse_engine-0.2.0.dist-info/METADATA +654 -0
- pulse_engine-0.2.0.dist-info/RECORD +150 -0
- pulse_engine-0.2.0.dist-info/WHEEL +4 -0
- pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
pulse_engine/cli/main.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""Pulse Engine CLI — scaffold, validate, and run products."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import importlib
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(name="pulse", help="Pulse Engine CLI", no_args_is_help=True)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# ---------------------------------------------------------------------------
|
|
15
|
+
# pulse init
|
|
16
|
+
# ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@app.command()
|
|
20
|
+
def init(
|
|
21
|
+
product_name: Annotated[
|
|
22
|
+
str, typer.Argument(help="Name of the new product (e.g. 'political')")
|
|
23
|
+
],
|
|
24
|
+
output_dir: Annotated[
|
|
25
|
+
Path, typer.Option("--output-dir", "-o", help="Where to scaffold")
|
|
26
|
+
] = Path("."),
|
|
27
|
+
) -> None:
|
|
28
|
+
"""Scaffold a new Pulse product from the built-in template."""
|
|
29
|
+
from cookiecutter.main import cookiecutter
|
|
30
|
+
|
|
31
|
+
template_dir = str(Path(__file__).parent / "templates")
|
|
32
|
+
|
|
33
|
+
# Normalise name: lowercase, underscores for Python
|
|
34
|
+
slug = product_name.lower().replace("-", "_").replace(" ", "_")
|
|
35
|
+
display = slug.replace("_", "-")
|
|
36
|
+
|
|
37
|
+
result = cookiecutter(
|
|
38
|
+
template_dir,
|
|
39
|
+
output_dir=str(output_dir),
|
|
40
|
+
no_input=True,
|
|
41
|
+
extra_context={
|
|
42
|
+
"product_name": display,
|
|
43
|
+
"product_slug": slug,
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
typer.echo(f"Scaffolded product at {result}")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# ---------------------------------------------------------------------------
|
|
50
|
+
# pulse validate
|
|
51
|
+
# ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@app.command()
|
|
55
|
+
def validate(
|
|
56
|
+
module: Annotated[
|
|
57
|
+
str,
|
|
58
|
+
typer.Argument(
|
|
59
|
+
help="Dotted import path to the manifest (e.g. 'pulse_political.manifest')"
|
|
60
|
+
),
|
|
61
|
+
] = "",
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Import a product manifest and run validation checks."""
|
|
64
|
+
from pulse_engine.registry import discover_products, validate_manifest
|
|
65
|
+
|
|
66
|
+
manifests = []
|
|
67
|
+
|
|
68
|
+
if module:
|
|
69
|
+
mod = importlib.import_module(module)
|
|
70
|
+
manifest = getattr(mod, "manifest", None)
|
|
71
|
+
if manifest is None:
|
|
72
|
+
typer.echo(f"No 'manifest' attribute found in {module}", err=True)
|
|
73
|
+
raise typer.Exit(code=1)
|
|
74
|
+
manifests = [manifest]
|
|
75
|
+
else:
|
|
76
|
+
manifests = discover_products()
|
|
77
|
+
if not manifests:
|
|
78
|
+
typer.echo("No products discovered via entry points.", err=True)
|
|
79
|
+
raise typer.Exit(code=1)
|
|
80
|
+
|
|
81
|
+
all_ok = True
|
|
82
|
+
for m in manifests:
|
|
83
|
+
errors = validate_manifest(m)
|
|
84
|
+
if errors:
|
|
85
|
+
all_ok = False
|
|
86
|
+
typer.echo(f"[FAIL] {m.name} v{m.version}:")
|
|
87
|
+
for e in errors:
|
|
88
|
+
typer.echo(f" - {e}")
|
|
89
|
+
else:
|
|
90
|
+
typer.echo(f"[OK] {m.name} v{m.version}")
|
|
91
|
+
|
|
92
|
+
if not all_ok:
|
|
93
|
+
raise typer.Exit(code=1)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
# ---------------------------------------------------------------------------
|
|
97
|
+
# pulse run
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@app.command()
|
|
102
|
+
def run(
|
|
103
|
+
host: Annotated[str, typer.Option(help="Bind host")] = "0.0.0.0",
|
|
104
|
+
port: Annotated[int, typer.Option(help="Bind port")] = 8000,
|
|
105
|
+
reload: Annotated[bool, typer.Option(help="Enable auto-reload")] = False,
|
|
106
|
+
) -> None:
|
|
107
|
+
"""Discover the product manifest and start the FastAPI server."""
|
|
108
|
+
import uvicorn
|
|
109
|
+
|
|
110
|
+
from pulse_engine.registry import discover_products
|
|
111
|
+
|
|
112
|
+
products = discover_products()
|
|
113
|
+
manifest = products[0] if products else None
|
|
114
|
+
|
|
115
|
+
if manifest:
|
|
116
|
+
typer.echo(f"Starting {manifest.name} v{manifest.version}")
|
|
117
|
+
|
|
118
|
+
# We use a factory string so uvicorn can import and call create_app
|
|
119
|
+
uvicorn.run(
|
|
120
|
+
"pulse_engine.main:create_app",
|
|
121
|
+
factory=True,
|
|
122
|
+
host=host,
|
|
123
|
+
port=port,
|
|
124
|
+
reload=reload,
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
# ---------------------------------------------------------------------------
|
|
129
|
+
# pulse run-worker
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@app.command()
|
|
134
|
+
def run_worker() -> None:
|
|
135
|
+
"""Discover the product manifest and start the Celery worker."""
|
|
136
|
+
from pulse_engine.config import get_settings
|
|
137
|
+
from pulse_engine.registry import discover_products
|
|
138
|
+
from pulse_engine.worker import create_celery_app
|
|
139
|
+
|
|
140
|
+
products = discover_products()
|
|
141
|
+
manifest = products[0] if products else None
|
|
142
|
+
|
|
143
|
+
settings = get_settings()
|
|
144
|
+
celery_app = create_celery_app(settings, manifest)
|
|
145
|
+
|
|
146
|
+
celery_app.worker_main(["worker", "--loglevel=info"])
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ---------------------------------------------------------------------------
|
|
150
|
+
# pulse run-mcp
|
|
151
|
+
# ---------------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@app.command()
|
|
155
|
+
def run_mcp() -> None:
|
|
156
|
+
"""Discover the product manifest and start the MCP server."""
|
|
157
|
+
import asyncio
|
|
158
|
+
|
|
159
|
+
from pulse_engine.mcp.server import run_mcp_server
|
|
160
|
+
from pulse_engine.registry import discover_products
|
|
161
|
+
|
|
162
|
+
products = discover_products()
|
|
163
|
+
manifest = products[0] if products else None
|
|
164
|
+
|
|
165
|
+
asyncio.run(run_mcp_server(manifest))
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
app()
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
FROM python:3.12-slim AS builder
|
|
2
|
+
|
|
3
|
+
WORKDIR /build
|
|
4
|
+
|
|
5
|
+
RUN pip install --no-cache-dir poetry==1.8.5
|
|
6
|
+
|
|
7
|
+
COPY pyproject.toml ./
|
|
8
|
+
RUN poetry config virtualenvs.in-project true && \
|
|
9
|
+
poetry install --only main --no-interaction --no-ansi
|
|
10
|
+
|
|
11
|
+
COPY src/ src/
|
|
12
|
+
|
|
13
|
+
FROM python:3.12-slim AS runtime
|
|
14
|
+
|
|
15
|
+
RUN groupadd -g 1001 appuser && \
|
|
16
|
+
useradd -u 1001 -g appuser -s /bin/false -m appuser
|
|
17
|
+
|
|
18
|
+
WORKDIR /app
|
|
19
|
+
|
|
20
|
+
COPY --from=builder /build/.venv .venv
|
|
21
|
+
COPY --from=builder /build/src src
|
|
22
|
+
|
|
23
|
+
ENV PATH="/app/.venv/bin:$PATH"
|
|
24
|
+
|
|
25
|
+
USER appuser
|
|
26
|
+
|
|
27
|
+
EXPOSE 8000
|
|
28
|
+
|
|
29
|
+
HEALTHCHECK --interval=30s --timeout=5s --retries=3 \
|
|
30
|
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/v1/health')"
|
|
31
|
+
|
|
32
|
+
CMD ["pulse", "run", "--host", "0.0.0.0", "--port", "8000"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
name: {{cookiecutter.product_name}}
|
|
2
|
+
version: "1.0"
|
|
3
|
+
modules:
|
|
4
|
+
- name: extractor
|
|
5
|
+
compute: ecs
|
|
6
|
+
retries: 2
|
|
7
|
+
timeout: 900
|
|
8
|
+
- name: storage
|
|
9
|
+
compute: ecs
|
|
10
|
+
|
|
11
|
+
dag:
|
|
12
|
+
- step: extract
|
|
13
|
+
module: extractor
|
|
14
|
+
- step: store
|
|
15
|
+
module: storage
|
|
16
|
+
depends_on:
|
|
17
|
+
- step: extract
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "pulse-{{cookiecutter.product_name}}"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Pulse product: {{cookiecutter.product_name}}"
|
|
5
|
+
authors = ["Pulse Team"]
|
|
6
|
+
packages = [{include = "pulse_{{cookiecutter.product_slug}}", from = "src"}]
|
|
7
|
+
|
|
8
|
+
[tool.poetry.dependencies]
|
|
9
|
+
python = ">=3.11,<3.13"
|
|
10
|
+
pulse-engine = "*"
|
|
11
|
+
|
|
12
|
+
[tool.poetry.group.dev.dependencies]
|
|
13
|
+
pytest = "^8.3.0"
|
|
14
|
+
pytest-asyncio = "^0.25.0"
|
|
15
|
+
|
|
16
|
+
[tool.poetry.plugins."pulse_engine.products"]
|
|
17
|
+
{{cookiecutter.product_slug}} = "pulse_{{cookiecutter.product_slug}}:manifest"
|
|
18
|
+
|
|
19
|
+
[build-system]
|
|
20
|
+
requires = ["poetry-core"]
|
|
21
|
+
build-backend = "poetry.core.masonry.api"
|
|
22
|
+
|
|
23
|
+
[tool.pytest.ini_options]
|
|
24
|
+
asyncio_mode = "auto"
|
|
25
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""Tests for the product manifest."""
|
|
2
|
+
|
|
3
|
+
from pulse_engine.registry import validate_manifest
|
|
4
|
+
|
|
5
|
+
from pulse_{{cookiecutter.product_slug}} import manifest
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_manifest_is_valid() -> None:
|
|
9
|
+
errors = validate_manifest(manifest)
|
|
10
|
+
assert errors == [], f"Manifest validation failed: {errors}"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_manifest_metadata() -> None:
|
|
14
|
+
assert manifest.name == "{{cookiecutter.product_name}}"
|
|
15
|
+
assert manifest.version == "0.1.0"
|
pulse_engine/client.py
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any, cast
|
|
4
|
+
|
|
5
|
+
import httpx
|
|
6
|
+
import structlog
|
|
7
|
+
from jose import jwt as jose_jwt
|
|
8
|
+
|
|
9
|
+
logger = structlog.get_logger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PulseEngineClient:
|
|
13
|
+
"""Client used inside container stages to call Pulse Engine REST API."""
|
|
14
|
+
|
|
15
|
+
def __init__(
|
|
16
|
+
self,
|
|
17
|
+
base_url: str,
|
|
18
|
+
token: str,
|
|
19
|
+
job_id: str,
|
|
20
|
+
timeout: float = 30.0,
|
|
21
|
+
) -> None:
|
|
22
|
+
self._base_url = base_url.rstrip("/")
|
|
23
|
+
self._job_id = job_id
|
|
24
|
+
# Decode token without verification to read backend claims
|
|
25
|
+
# (signature was already verified by Pulse Engine before injecting the token)
|
|
26
|
+
try:
|
|
27
|
+
self._token_claims: dict[str, Any] = jose_jwt.get_unverified_claims(token)
|
|
28
|
+
except Exception:
|
|
29
|
+
self._token_claims = {}
|
|
30
|
+
self._client = httpx.AsyncClient(
|
|
31
|
+
base_url=self._base_url,
|
|
32
|
+
headers={
|
|
33
|
+
"Authorization": f"Bearer {token}",
|
|
34
|
+
"Content-Type": "application/json",
|
|
35
|
+
},
|
|
36
|
+
timeout=timeout,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def job_id(self) -> str:
|
|
41
|
+
return self._job_id
|
|
42
|
+
|
|
43
|
+
async def report_status(
|
|
44
|
+
self,
|
|
45
|
+
stage: str,
|
|
46
|
+
status: str,
|
|
47
|
+
error: str | None = None,
|
|
48
|
+
result_summary: dict[str, Any] | None = None,
|
|
49
|
+
) -> dict[str, Any]:
|
|
50
|
+
"""Report stage status back to Pulse Engine."""
|
|
51
|
+
payload: dict[str, Any] = {"stage": stage, "status": status}
|
|
52
|
+
if error is not None:
|
|
53
|
+
payload["error"] = error
|
|
54
|
+
if result_summary is not None:
|
|
55
|
+
payload["result_summary"] = result_summary
|
|
56
|
+
resp = await self._client.post(
|
|
57
|
+
f"/api/v1/jobs/{self._job_id}/status",
|
|
58
|
+
json=payload,
|
|
59
|
+
)
|
|
60
|
+
resp.raise_for_status()
|
|
61
|
+
return cast(dict[str, Any], resp.json())
|
|
62
|
+
|
|
63
|
+
async def store_documents(self, documents: list[dict[str, Any]]) -> dict[str, Any]:
|
|
64
|
+
"""Store processed documents in the knowledge base."""
|
|
65
|
+
resp = await self._client.post(
|
|
66
|
+
"/api/v1/kb/documents",
|
|
67
|
+
json={"documents": documents},
|
|
68
|
+
)
|
|
69
|
+
resp.raise_for_status()
|
|
70
|
+
return cast(dict[str, Any], resp.json())
|
|
71
|
+
|
|
72
|
+
async def trigger_next_stage(
|
|
73
|
+
self,
|
|
74
|
+
product: str,
|
|
75
|
+
next_stage: str,
|
|
76
|
+
chain: bool = True,
|
|
77
|
+
config: dict[str, Any] | None = None,
|
|
78
|
+
) -> dict[str, Any]:
|
|
79
|
+
"""Trigger the next stage, forwarding the same orchestrator+compute backend."""
|
|
80
|
+
payload: dict[str, Any] = {
|
|
81
|
+
"product": product,
|
|
82
|
+
"stage": next_stage,
|
|
83
|
+
"chain": chain,
|
|
84
|
+
"job_id": self._job_id,
|
|
85
|
+
"orchestrator": self._token_claims.get("orchestrator", "prefect"),
|
|
86
|
+
"compute": self._token_claims.get("compute", "ecs"),
|
|
87
|
+
}
|
|
88
|
+
if config:
|
|
89
|
+
payload["config"] = config
|
|
90
|
+
resp = await self._client.post("/api/v1/jobs/", json=payload)
|
|
91
|
+
resp.raise_for_status()
|
|
92
|
+
return cast(dict[str, Any], resp.json())
|
|
93
|
+
|
|
94
|
+
async def close(self) -> None:
|
|
95
|
+
await self._client.aclose()
|
pulse_engine/config.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
|
|
3
|
+
from pydantic_settings import BaseSettings
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Settings(BaseSettings):
|
|
7
|
+
app_env: str = "development"
|
|
8
|
+
app_version: str = "0.1.0"
|
|
9
|
+
debug: bool = False
|
|
10
|
+
log_level: str = "INFO"
|
|
11
|
+
|
|
12
|
+
# AWS core credentials
|
|
13
|
+
aws_access_key_id: str = ""
|
|
14
|
+
aws_secret_access_key: str = ""
|
|
15
|
+
aws_region: str = "ap-south-1"
|
|
16
|
+
cognito_user_pool_id: str = ""
|
|
17
|
+
cognito_app_client_id: str = ""
|
|
18
|
+
cognito_app_client_secret: str = ""
|
|
19
|
+
|
|
20
|
+
# OpenSearch (hosted AWS)
|
|
21
|
+
opensearch_url: str = ""
|
|
22
|
+
opensearch_username: str = ""
|
|
23
|
+
opensearch_password: str = ""
|
|
24
|
+
opensearch_use_ssl: bool = True
|
|
25
|
+
opensearch_verify_certs: bool = True
|
|
26
|
+
opensearch_index_prefix: str = "pulse_kb"
|
|
27
|
+
|
|
28
|
+
# Prefect
|
|
29
|
+
prefect_api_url: str = ""
|
|
30
|
+
prefect_database_url: str = ""
|
|
31
|
+
|
|
32
|
+
# Athena (separate credentials — product specifies which database)
|
|
33
|
+
athena_aws_access_key_id: str = ""
|
|
34
|
+
athena_aws_secret_access_key: str = ""
|
|
35
|
+
athena_output_location: str = ""
|
|
36
|
+
athena_workgroup: str = "primary"
|
|
37
|
+
athena_query_timeout_seconds: int = 60
|
|
38
|
+
embedding_dimension: int = 1536
|
|
39
|
+
|
|
40
|
+
# Database (app)
|
|
41
|
+
database_url: str = ""
|
|
42
|
+
|
|
43
|
+
# Redis / Celery
|
|
44
|
+
redis_url: str = "redis://localhost:6379/0"
|
|
45
|
+
celery_broker_url: str = "" # defaults to redis_url if empty
|
|
46
|
+
celery_result_backend: str = "" # defaults to redis_url if empty
|
|
47
|
+
|
|
48
|
+
# Orchestrator
|
|
49
|
+
pulse_orchestrator_backend: str = "none"
|
|
50
|
+
prefect_api_key: str = ""
|
|
51
|
+
|
|
52
|
+
# Prefect ECS backend
|
|
53
|
+
prefect_ecs_work_pool_name: str = "pipeline-ecs"
|
|
54
|
+
# ECR image used to run pipeline_flow in ECS work pool
|
|
55
|
+
prefect_engine_image: str = ""
|
|
56
|
+
# Override compute backend for all pipeline steps ("ecs", "local", etc.)
|
|
57
|
+
pipeline_default_compute: str = ""
|
|
58
|
+
|
|
59
|
+
# Prefect Lambda backend
|
|
60
|
+
prefect_lambda_work_pool_name: str = "lambda-worker-pool"
|
|
61
|
+
prefect_lambda_function_name_template: str = "{product}-{stage}"
|
|
62
|
+
|
|
63
|
+
# Prefect Kubernetes backend
|
|
64
|
+
prefect_k8s_work_pool_name: str = "k8s-worker-pool"
|
|
65
|
+
prefect_k8s_namespace: str = "pulse-jobs"
|
|
66
|
+
prefect_k8s_default_cpu: str = "500m"
|
|
67
|
+
prefect_k8s_default_memory: str = "1Gi"
|
|
68
|
+
pulse_max_concurrent_jobs_per_tenant: int = 10
|
|
69
|
+
pulse_embedding_model: str = "text-embedding-3-small"
|
|
70
|
+
pulse_embedding_provider: str = "openai"
|
|
71
|
+
pulse_openai_embedding_model: str = "text-embedding-3-small"
|
|
72
|
+
pulse_openai_api_key: str = "" # falls back to pulse_llm_api_key
|
|
73
|
+
|
|
74
|
+
# LLM
|
|
75
|
+
pulse_llm_provider: str = "openai"
|
|
76
|
+
pulse_llm_model: str = "gpt-4o-mini"
|
|
77
|
+
pulse_llm_api_key: str = ""
|
|
78
|
+
pulse_llm_temperature: float = 0.0
|
|
79
|
+
pulse_default_chunk_size: int = 512
|
|
80
|
+
pulse_default_chunk_strategy: str = "token_count"
|
|
81
|
+
pulse_job_callback_timeout: int = 10
|
|
82
|
+
pulse_dedup_similarity_threshold: float = 0.95
|
|
83
|
+
|
|
84
|
+
# Pipeline separation
|
|
85
|
+
pulse_engine_url: str = "" # public URL containers use to call back
|
|
86
|
+
pulse_job_token_secret: str = "" # HMAC secret for job-scoped JWTs
|
|
87
|
+
pulse_s3_bucket: str = "" # S3 bucket for inter-stage data
|
|
88
|
+
pulse_chain_grace_period_seconds: int = (
|
|
89
|
+
300 # seconds before auto-triggering next stage
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Pipeline infrastructure — populated from Terraform outputs
|
|
93
|
+
pipeline_task_definition: str = "pulse-pipeline-step" # ECS task def family
|
|
94
|
+
pipeline_cluster_name: str = "" # ECS cluster for pipeline steps
|
|
95
|
+
pipeline_execution_role_arn: str = "" # ECS task execution role
|
|
96
|
+
pipeline_task_role_arn: str = "" # ECS task role (S3, Lambda, ECS perms)
|
|
97
|
+
pipeline_log_group: str = "" # CloudWatch log group for ECS steps
|
|
98
|
+
pipeline_subnets: str = "" # comma-separated private subnet IDs
|
|
99
|
+
pipeline_security_groups: str = "" # comma-separated security group IDs
|
|
100
|
+
lambda_execution_role_arn: str = "" # Lambda execution role
|
|
101
|
+
lambda_subnets: str = "" # comma-separated subnet IDs for Lambda VPC
|
|
102
|
+
lambda_security_groups: str = "" # comma-separated SG IDs for Lambda
|
|
103
|
+
lambda_log_group: str = "" # CloudWatch log group for Lambda steps
|
|
104
|
+
|
|
105
|
+
# Local dev auth bypass — set this to use MockTokenVerifier instead of Cognito
|
|
106
|
+
mock_jwt_secret: str = ""
|
|
107
|
+
|
|
108
|
+
mcp_transport: str = "sse"
|
|
109
|
+
mcp_sse_host: str = "127.0.0.1"
|
|
110
|
+
mcp_sse_port: int = 8001
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def effective_celery_broker_url(self) -> str:
|
|
114
|
+
return self.celery_broker_url or self.redis_url
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def effective_celery_result_backend(self) -> str:
|
|
118
|
+
return self.celery_result_backend or self.redis_url
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def cognito_jwks_url(self) -> str:
|
|
122
|
+
return (
|
|
123
|
+
f"https://cognito-idp.{self.aws_region}.amazonaws.com/"
|
|
124
|
+
f"{self.cognito_user_pool_id}/.well-known/jwks.json"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def cognito_issuer(self) -> str:
|
|
129
|
+
return (
|
|
130
|
+
f"https://cognito-idp.{self.aws_region}.amazonaws.com/"
|
|
131
|
+
f"{self.cognito_user_pool_id}"
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
@property
|
|
135
|
+
def pipeline_subnet_list(self) -> list[str]:
|
|
136
|
+
return [s.strip() for s in self.pipeline_subnets.split(",") if s.strip()]
|
|
137
|
+
|
|
138
|
+
@property
|
|
139
|
+
def pipeline_sg_list(self) -> list[str]:
|
|
140
|
+
return [
|
|
141
|
+
s.strip() for s in self.pipeline_security_groups.split(",") if s.strip()
|
|
142
|
+
]
|
|
143
|
+
|
|
144
|
+
@property
|
|
145
|
+
def lambda_subnet_list(self) -> list[str]:
|
|
146
|
+
return [s.strip() for s in self.lambda_subnets.split(",") if s.strip()]
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def lambda_sg_list(self) -> list[str]:
|
|
150
|
+
return [s.strip() for s in self.lambda_security_groups.split(",") if s.strip()]
|
|
151
|
+
|
|
152
|
+
model_config = {"env_file": ".env", "extra": "ignore"}
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@lru_cache
|
|
156
|
+
def get_settings() -> Settings:
|
|
157
|
+
return Settings()
|
|
File without changes
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import structlog
|
|
2
|
+
from fastapi import FastAPI, Request
|
|
3
|
+
from fastapi.exceptions import RequestValidationError
|
|
4
|
+
from fastapi.responses import JSONResponse
|
|
5
|
+
|
|
6
|
+
from pulse_engine.core.exceptions import AppError
|
|
7
|
+
|
|
8
|
+
logger = structlog.get_logger()
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_request_id(request: Request) -> str:
|
|
12
|
+
return getattr(request.state, "request_id", "unknown")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
async def app_error_handler(request: Request, exc: AppError) -> JSONResponse:
|
|
16
|
+
request_id = _get_request_id(request)
|
|
17
|
+
logger.warning(
|
|
18
|
+
"app_error",
|
|
19
|
+
error=exc.message,
|
|
20
|
+
status_code=exc.status_code,
|
|
21
|
+
request_id=request_id,
|
|
22
|
+
**exc.context,
|
|
23
|
+
)
|
|
24
|
+
return JSONResponse(
|
|
25
|
+
status_code=exc.status_code,
|
|
26
|
+
content={
|
|
27
|
+
"success": False,
|
|
28
|
+
"error": exc.message,
|
|
29
|
+
"request_id": request_id,
|
|
30
|
+
},
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def validation_error_handler(
|
|
35
|
+
request: Request, exc: RequestValidationError
|
|
36
|
+
) -> JSONResponse:
|
|
37
|
+
request_id = _get_request_id(request)
|
|
38
|
+
return JSONResponse(
|
|
39
|
+
status_code=422,
|
|
40
|
+
content={
|
|
41
|
+
"success": False,
|
|
42
|
+
"error": "Validation error",
|
|
43
|
+
"request_id": request_id,
|
|
44
|
+
},
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
async def catch_all_handler(request: Request, exc: Exception) -> JSONResponse:
|
|
49
|
+
request_id = _get_request_id(request)
|
|
50
|
+
logger.exception("unhandled_exception", request_id=request_id)
|
|
51
|
+
return JSONResponse(
|
|
52
|
+
status_code=500,
|
|
53
|
+
content={
|
|
54
|
+
"success": False,
|
|
55
|
+
"error": "Internal server error",
|
|
56
|
+
"request_id": request_id,
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def register_exception_handlers(app: FastAPI) -> None:
|
|
62
|
+
app.add_exception_handler(AppError, app_error_handler) # type: ignore[arg-type]
|
|
63
|
+
app.add_exception_handler(RequestValidationError, validation_error_handler) # type: ignore[arg-type]
|
|
64
|
+
app.add_exception_handler(Exception, catch_all_handler)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AppError(Exception):
|
|
5
|
+
status_code: int = 500
|
|
6
|
+
is_operational: bool = True
|
|
7
|
+
|
|
8
|
+
def __init__(self, message: str = "Internal server error", **context: Any) -> None:
|
|
9
|
+
self.message = message
|
|
10
|
+
self.context = context
|
|
11
|
+
super().__init__(message)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class BadRequestError(AppError):
|
|
15
|
+
status_code = 400
|
|
16
|
+
|
|
17
|
+
def __init__(self, message: str = "Bad request", **context: Any) -> None:
|
|
18
|
+
super().__init__(message, **context)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class UnauthorizedError(AppError):
|
|
22
|
+
status_code = 401
|
|
23
|
+
|
|
24
|
+
def __init__(self, message: str = "Unauthorized", **context: Any) -> None:
|
|
25
|
+
super().__init__(message, **context)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class ForbiddenError(AppError):
|
|
29
|
+
status_code = 403
|
|
30
|
+
|
|
31
|
+
def __init__(self, message: str = "Forbidden", **context: Any) -> None:
|
|
32
|
+
super().__init__(message, **context)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class NotFoundError(AppError):
|
|
36
|
+
status_code = 404
|
|
37
|
+
|
|
38
|
+
def __init__(self, message: str = "Not found", **context: Any) -> None:
|
|
39
|
+
super().__init__(message, **context)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ConflictError(AppError):
|
|
43
|
+
status_code = 409
|
|
44
|
+
|
|
45
|
+
def __init__(self, message: str = "Conflict", **context: Any) -> None:
|
|
46
|
+
super().__init__(message, **context)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class UnprocessableEntityError(AppError):
|
|
50
|
+
status_code = 422
|
|
51
|
+
|
|
52
|
+
def __init__(self, message: str = "Unprocessable entity", **context: Any) -> None:
|
|
53
|
+
super().__init__(message, **context)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class TooManyRequestsError(AppError):
|
|
57
|
+
status_code = 429
|
|
58
|
+
|
|
59
|
+
def __init__(self, message: str = "Too many requests", **context: Any) -> None:
|
|
60
|
+
super().__init__(message, **context)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class ServiceUnavailableError(AppError):
|
|
64
|
+
status_code = 503
|
|
65
|
+
|
|
66
|
+
def __init__(self, message: str = "Service unavailable", **context: Any) -> None:
|
|
67
|
+
super().__init__(message, **context)
|