pulse-engine 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulse_engine/__init__.py +0 -0
- pulse_engine/adapters/__init__.py +58 -0
- pulse_engine/adapters/audio_transcription.py +167 -0
- pulse_engine/adapters/batcher.py +36 -0
- pulse_engine/adapters/digital_news.py +128 -0
- pulse_engine/adapters/digital_news_metadata.py +536 -0
- pulse_engine/adapters/exceptions.py +10 -0
- pulse_engine/adapters/models.py +134 -0
- pulse_engine/adapters/opensearch_storage.py +160 -0
- pulse_engine/adapters/speech_content.py +130 -0
- pulse_engine/adapters/speech_metadata.py +374 -0
- pulse_engine/adapters/twitter.py +423 -0
- pulse_engine/adapters/youtube_downloader.py +186 -0
- pulse_engine/adapters/youtube_metadata.py +261 -0
- pulse_engine/api/__init__.py +0 -0
- pulse_engine/api/v1/__init__.py +0 -0
- pulse_engine/api/v1/auth.py +91 -0
- pulse_engine/api/v1/health.py +62 -0
- pulse_engine/api/v1/router.py +16 -0
- pulse_engine/chain_recovery.py +131 -0
- pulse_engine/cli/__init__.py +0 -0
- pulse_engine/cli/main.py +169 -0
- pulse_engine/cli/templates/cookiecutter.json +4 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
- pulse_engine/client.py +95 -0
- pulse_engine/config.py +157 -0
- pulse_engine/core/__init__.py +0 -0
- pulse_engine/core/error_handlers.py +64 -0
- pulse_engine/core/exceptions.py +67 -0
- pulse_engine/core/job_token.py +109 -0
- pulse_engine/core/logging.py +45 -0
- pulse_engine/core/scope.py +23 -0
- pulse_engine/core/security.py +130 -0
- pulse_engine/database.py +30 -0
- pulse_engine/dependencies.py +166 -0
- pulse_engine/deployment/__init__.py +0 -0
- pulse_engine/deployment/backend_deployment_repository.py +83 -0
- pulse_engine/deployment/backends/__init__.py +0 -0
- pulse_engine/deployment/backends/base.py +50 -0
- pulse_engine/deployment/backends/exceptions.py +20 -0
- pulse_engine/deployment/backends/native_lambda.py +125 -0
- pulse_engine/deployment/backends/prefect_ecs.py +116 -0
- pulse_engine/deployment/backends/prefect_k8s.py +131 -0
- pulse_engine/deployment/backends/registry.py +50 -0
- pulse_engine/deployment/infra_provisioner.py +285 -0
- pulse_engine/deployment/job_launcher.py +178 -0
- pulse_engine/deployment/models.py +48 -0
- pulse_engine/deployment/repository.py +54 -0
- pulse_engine/deployment/router.py +22 -0
- pulse_engine/deployment/schemas.py +18 -0
- pulse_engine/deployment/service.py +65 -0
- pulse_engine/extractor/__init__.py +0 -0
- pulse_engine/extractor/adapters/__init__.py +0 -0
- pulse_engine/extractor/base.py +48 -0
- pulse_engine/extractor/models.py +50 -0
- pulse_engine/extractor/orchestrator/__init__.py +15 -0
- pulse_engine/extractor/orchestrator/base.py +34 -0
- pulse_engine/extractor/orchestrator/noop.py +37 -0
- pulse_engine/extractor/orchestrator/prefect.py +163 -0
- pulse_engine/extractor/repository.py +163 -0
- pulse_engine/extractor/router.py +102 -0
- pulse_engine/extractor/schemas.py +93 -0
- pulse_engine/extractor/service.py +431 -0
- pulse_engine/extractor/stage_models.py +36 -0
- pulse_engine/extractor/stage_repository.py +109 -0
- pulse_engine/main.py +195 -0
- pulse_engine/mcp/__init__.py +0 -0
- pulse_engine/mcp/__main__.py +5 -0
- pulse_engine/mcp/server.py +108 -0
- pulse_engine/mcp/tools_jobs.py +159 -0
- pulse_engine/mcp/tools_kb.py +88 -0
- pulse_engine/mcp/tools_modules.py +115 -0
- pulse_engine/mcp/tools_pipelines.py +215 -0
- pulse_engine/mcp/tools_processor.py +208 -0
- pulse_engine/middleware/__init__.py +0 -0
- pulse_engine/middleware/rate_limit.py +144 -0
- pulse_engine/middleware/request_id.py +16 -0
- pulse_engine/middleware/security_headers.py +25 -0
- pulse_engine/middleware/tenant.py +90 -0
- pulse_engine/pipeline/__init__.py +0 -0
- pulse_engine/pipeline/config_parser.py +148 -0
- pulse_engine/pipeline/expression.py +268 -0
- pulse_engine/pipeline/models.py +98 -0
- pulse_engine/pipeline/repositories.py +224 -0
- pulse_engine/pipeline/router_modules.py +66 -0
- pulse_engine/pipeline/router_pipelines.py +198 -0
- pulse_engine/pipeline/schemas.py +200 -0
- pulse_engine/pipeline/service.py +250 -0
- pulse_engine/pipeline/translators/__init__.py +44 -0
- pulse_engine/pipeline/translators/airflow_status.py +11 -0
- pulse_engine/pipeline/translators/airflow_translator.py +22 -0
- pulse_engine/pipeline/translators/base.py +42 -0
- pulse_engine/pipeline/translators/prefect_status.py +93 -0
- pulse_engine/pipeline/translators/prefect_translator.py +195 -0
- pulse_engine/processor/__init__.py +0 -0
- pulse_engine/processor/base.py +36 -0
- pulse_engine/processor/core/__init__.py +0 -0
- pulse_engine/processor/core/analysis.py +148 -0
- pulse_engine/processor/core/chunking.py +158 -0
- pulse_engine/processor/core/prompts.py +340 -0
- pulse_engine/processor/core/topic_splitter.py +105 -0
- pulse_engine/processor/defaults/__init__.py +11 -0
- pulse_engine/processor/defaults/core_processor.py +12 -0
- pulse_engine/processor/defaults/postprocessor.py +12 -0
- pulse_engine/processor/defaults/preprocessor.py +12 -0
- pulse_engine/processor/llm/__init__.py +0 -0
- pulse_engine/processor/llm/provider.py +58 -0
- pulse_engine/processor/ocr/gemini.py +52 -0
- pulse_engine/processor/pipeline.py +107 -0
- pulse_engine/processor/postprocessor/__init__.py +0 -0
- pulse_engine/processor/postprocessor/embeddings.py +34 -0
- pulse_engine/processor/postprocessor/tasks.py +180 -0
- pulse_engine/processor/preprocessor/__init__.py +0 -0
- pulse_engine/processor/preprocessor/tasks.py +71 -0
- pulse_engine/processor/router.py +192 -0
- pulse_engine/processor/schemas.py +167 -0
- pulse_engine/registry.py +117 -0
- pulse_engine/runners/__init__.py +0 -0
- pulse_engine/runners/lambda_runner.py +26 -0
- pulse_engine/runners/pipeline_runner.py +43 -0
- pulse_engine/runners/prefect_pipeline_flow.py +904 -0
- pulse_engine/runners/prefect_runner.py +33 -0
- pulse_engine/s3.py +72 -0
- pulse_engine/secrets.py +46 -0
- pulse_engine/services/__init__.py +0 -0
- pulse_engine/services/bootstrap.py +211 -0
- pulse_engine/services/opensearch.py +84 -0
- pulse_engine/storage/__init__.py +0 -0
- pulse_engine/storage/connectors/__init__.py +0 -0
- pulse_engine/storage/connectors/athena.py +226 -0
- pulse_engine/storage/connectors/base.py +32 -0
- pulse_engine/storage/connectors/opensearch.py +344 -0
- pulse_engine/storage/knowledge_base.py +68 -0
- pulse_engine/storage/router.py +78 -0
- pulse_engine/storage/schemas.py +93 -0
- pulse_engine/testing/__init__.py +13 -0
- pulse_engine/testing/fixtures.py +50 -0
- pulse_engine/testing/mocks.py +104 -0
- pulse_engine/worker.py +53 -0
- pulse_engine-0.2.0.dist-info/METADATA +654 -0
- pulse_engine-0.2.0.dist-info/RECORD +150 -0
- pulse_engine-0.2.0.dist-info/WHEEL +4 -0
- pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
"""Mock implementations of engine ABCs for product testing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pulse_engine.extractor.base import BaseExtractor, ExtractionResult, ExtractorConfig
|
|
8
|
+
from pulse_engine.extractor.orchestrator.base import (
|
|
9
|
+
BaseOrchestratorAdapter,
|
|
10
|
+
OrchestratorRunStatus,
|
|
11
|
+
)
|
|
12
|
+
from pulse_engine.storage.connectors.base import BaseStorageConnector
|
|
13
|
+
from pulse_engine.storage.schemas import (
|
|
14
|
+
ConnectorHealth,
|
|
15
|
+
Document,
|
|
16
|
+
SearchQuery,
|
|
17
|
+
SearchResult,
|
|
18
|
+
StoreResult,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MockStorageConnector(BaseStorageConnector):
|
|
23
|
+
"""In-memory storage connector for testing."""
|
|
24
|
+
|
|
25
|
+
def __init__(self) -> None:
|
|
26
|
+
self._documents: dict[str, dict[str, Document]] = {}
|
|
27
|
+
|
|
28
|
+
async def initialize(self) -> None:
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
async def store(self, tenant_id: str, documents: list[Document]) -> StoreResult:
|
|
32
|
+
bucket = self._documents.setdefault(tenant_id, {})
|
|
33
|
+
stored = 0
|
|
34
|
+
for doc in documents:
|
|
35
|
+
key = f"{doc.doc_id}:{doc.chunk_id}" if doc.chunk_id else doc.doc_id
|
|
36
|
+
bucket[key] = doc
|
|
37
|
+
stored += 1
|
|
38
|
+
return StoreResult(stored_count=stored, failed_count=0)
|
|
39
|
+
|
|
40
|
+
async def retrieve(self, tenant_id: str, doc_id: str) -> Document | None:
|
|
41
|
+
bucket = self._documents.get(tenant_id, {})
|
|
42
|
+
return bucket.get(doc_id)
|
|
43
|
+
|
|
44
|
+
async def search(self, tenant_id: str, query: SearchQuery) -> SearchResult:
|
|
45
|
+
return SearchResult(hits=[], total=0, took_ms=0)
|
|
46
|
+
|
|
47
|
+
async def delete(self, tenant_id: str, doc_id: str) -> bool:
|
|
48
|
+
bucket = self._documents.get(tenant_id, {})
|
|
49
|
+
return bucket.pop(doc_id, None) is not None
|
|
50
|
+
|
|
51
|
+
async def health_check(self) -> ConnectorHealth:
|
|
52
|
+
return ConnectorHealth(connector="mock", status="up", latency_ms=0.0)
|
|
53
|
+
|
|
54
|
+
async def teardown(self) -> None:
|
|
55
|
+
self._documents.clear()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class MockOrchestratorAdapter(BaseOrchestratorAdapter):
|
|
59
|
+
"""No-op orchestrator adapter for testing."""
|
|
60
|
+
|
|
61
|
+
def __init__(self) -> None:
|
|
62
|
+
self._runs: dict[str, str] = {}
|
|
63
|
+
|
|
64
|
+
async def get_run_status(self, run_id: str) -> OrchestratorRunStatus:
|
|
65
|
+
status = self._runs.get(run_id, "unknown")
|
|
66
|
+
return OrchestratorRunStatus(run_id=run_id, status=status)
|
|
67
|
+
|
|
68
|
+
async def cancel_run(self, run_id: str) -> bool:
|
|
69
|
+
self._runs[run_id] = "cancelled"
|
|
70
|
+
return True
|
|
71
|
+
|
|
72
|
+
async def health_check(self) -> bool:
|
|
73
|
+
return True
|
|
74
|
+
|
|
75
|
+
async def create_flow_run(
|
|
76
|
+
self,
|
|
77
|
+
deployment_id: str,
|
|
78
|
+
parameters: dict[str, Any] | None = None,
|
|
79
|
+
) -> str:
|
|
80
|
+
return ""
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class MockExtractor(BaseExtractor):
|
|
84
|
+
"""A configurable mock extractor for testing."""
|
|
85
|
+
|
|
86
|
+
def __init__(
|
|
87
|
+
self,
|
|
88
|
+
name: str = "mock",
|
|
89
|
+
job_type: str = "mock_extract",
|
|
90
|
+
results: list[ExtractionResult] | None = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
self._name = name
|
|
93
|
+
self._job_type = job_type
|
|
94
|
+
self._results = results or []
|
|
95
|
+
self.extract_calls: list[tuple[str, dict[str, Any]]] = []
|
|
96
|
+
|
|
97
|
+
def get_config(self) -> ExtractorConfig:
|
|
98
|
+
return ExtractorConfig(name=self._name, job_type=self._job_type)
|
|
99
|
+
|
|
100
|
+
async def extract(
|
|
101
|
+
self, tenant_id: str, parameters: dict[str, Any]
|
|
102
|
+
) -> list[ExtractionResult]:
|
|
103
|
+
self.extract_calls.append((tenant_id, parameters))
|
|
104
|
+
return self._results
|
pulse_engine/worker.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Celery application factory.
|
|
2
|
+
|
|
3
|
+
The engine provides the Celery infrastructure (broker, result backend, base
|
|
4
|
+
configuration). Individual *products* register their own tasks by importing
|
|
5
|
+
this app and using ``@celery_app.task``.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from celery import Celery
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from pulse_engine.config import Settings
|
|
16
|
+
from pulse_engine.registry import ProductManifest
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_celery_app(
|
|
20
|
+
settings: Settings, manifest: ProductManifest | None = None
|
|
21
|
+
) -> Celery:
|
|
22
|
+
app = Celery(
|
|
23
|
+
"pulse",
|
|
24
|
+
broker=settings.effective_celery_broker_url,
|
|
25
|
+
backend=settings.effective_celery_result_backend,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
app.conf.update(
|
|
29
|
+
task_serializer="json",
|
|
30
|
+
accept_content=["json"],
|
|
31
|
+
result_serializer="json",
|
|
32
|
+
timezone="UTC",
|
|
33
|
+
enable_utc=True,
|
|
34
|
+
task_track_started=True,
|
|
35
|
+
task_acks_late=True,
|
|
36
|
+
worker_prefetch_multiplier=1,
|
|
37
|
+
broker_connection_retry_on_startup=True,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Auto-discover tasks in engine and any product modules
|
|
41
|
+
packages = ["pulse_engine"]
|
|
42
|
+
if manifest and manifest.celery_task_modules:
|
|
43
|
+
packages.extend(manifest.celery_task_modules)
|
|
44
|
+
app.autodiscover_tasks(packages)
|
|
45
|
+
|
|
46
|
+
return app
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Module-level instance for Celery CLI (-A pulse_engine.worker) and for product
|
|
50
|
+
# packages to import as ``from pulse_engine.worker import celery_app``.
|
|
51
|
+
from pulse_engine.config import get_settings # noqa: E402
|
|
52
|
+
|
|
53
|
+
celery_app = create_celery_app(get_settings())
|