pulse-engine 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. pulse_engine/__init__.py +0 -0
  2. pulse_engine/adapters/__init__.py +58 -0
  3. pulse_engine/adapters/audio_transcription.py +167 -0
  4. pulse_engine/adapters/batcher.py +36 -0
  5. pulse_engine/adapters/digital_news.py +128 -0
  6. pulse_engine/adapters/digital_news_metadata.py +536 -0
  7. pulse_engine/adapters/exceptions.py +10 -0
  8. pulse_engine/adapters/models.py +134 -0
  9. pulse_engine/adapters/opensearch_storage.py +160 -0
  10. pulse_engine/adapters/speech_content.py +130 -0
  11. pulse_engine/adapters/speech_metadata.py +374 -0
  12. pulse_engine/adapters/twitter.py +423 -0
  13. pulse_engine/adapters/youtube_downloader.py +186 -0
  14. pulse_engine/adapters/youtube_metadata.py +261 -0
  15. pulse_engine/api/__init__.py +0 -0
  16. pulse_engine/api/v1/__init__.py +0 -0
  17. pulse_engine/api/v1/auth.py +91 -0
  18. pulse_engine/api/v1/health.py +62 -0
  19. pulse_engine/api/v1/router.py +16 -0
  20. pulse_engine/chain_recovery.py +131 -0
  21. pulse_engine/cli/__init__.py +0 -0
  22. pulse_engine/cli/main.py +169 -0
  23. pulse_engine/cli/templates/cookiecutter.json +4 -0
  24. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
  25. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
  26. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
  27. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
  28. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
  29. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
  30. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
  31. pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
  32. pulse_engine/client.py +95 -0
  33. pulse_engine/config.py +157 -0
  34. pulse_engine/core/__init__.py +0 -0
  35. pulse_engine/core/error_handlers.py +64 -0
  36. pulse_engine/core/exceptions.py +67 -0
  37. pulse_engine/core/job_token.py +109 -0
  38. pulse_engine/core/logging.py +45 -0
  39. pulse_engine/core/scope.py +23 -0
  40. pulse_engine/core/security.py +130 -0
  41. pulse_engine/database.py +30 -0
  42. pulse_engine/dependencies.py +166 -0
  43. pulse_engine/deployment/__init__.py +0 -0
  44. pulse_engine/deployment/backend_deployment_repository.py +83 -0
  45. pulse_engine/deployment/backends/__init__.py +0 -0
  46. pulse_engine/deployment/backends/base.py +50 -0
  47. pulse_engine/deployment/backends/exceptions.py +20 -0
  48. pulse_engine/deployment/backends/native_lambda.py +125 -0
  49. pulse_engine/deployment/backends/prefect_ecs.py +116 -0
  50. pulse_engine/deployment/backends/prefect_k8s.py +131 -0
  51. pulse_engine/deployment/backends/registry.py +50 -0
  52. pulse_engine/deployment/infra_provisioner.py +285 -0
  53. pulse_engine/deployment/job_launcher.py +178 -0
  54. pulse_engine/deployment/models.py +48 -0
  55. pulse_engine/deployment/repository.py +54 -0
  56. pulse_engine/deployment/router.py +22 -0
  57. pulse_engine/deployment/schemas.py +18 -0
  58. pulse_engine/deployment/service.py +65 -0
  59. pulse_engine/extractor/__init__.py +0 -0
  60. pulse_engine/extractor/adapters/__init__.py +0 -0
  61. pulse_engine/extractor/base.py +48 -0
  62. pulse_engine/extractor/models.py +50 -0
  63. pulse_engine/extractor/orchestrator/__init__.py +15 -0
  64. pulse_engine/extractor/orchestrator/base.py +34 -0
  65. pulse_engine/extractor/orchestrator/noop.py +37 -0
  66. pulse_engine/extractor/orchestrator/prefect.py +163 -0
  67. pulse_engine/extractor/repository.py +163 -0
  68. pulse_engine/extractor/router.py +102 -0
  69. pulse_engine/extractor/schemas.py +93 -0
  70. pulse_engine/extractor/service.py +431 -0
  71. pulse_engine/extractor/stage_models.py +36 -0
  72. pulse_engine/extractor/stage_repository.py +109 -0
  73. pulse_engine/main.py +195 -0
  74. pulse_engine/mcp/__init__.py +0 -0
  75. pulse_engine/mcp/__main__.py +5 -0
  76. pulse_engine/mcp/server.py +108 -0
  77. pulse_engine/mcp/tools_jobs.py +159 -0
  78. pulse_engine/mcp/tools_kb.py +88 -0
  79. pulse_engine/mcp/tools_modules.py +115 -0
  80. pulse_engine/mcp/tools_pipelines.py +215 -0
  81. pulse_engine/mcp/tools_processor.py +208 -0
  82. pulse_engine/middleware/__init__.py +0 -0
  83. pulse_engine/middleware/rate_limit.py +144 -0
  84. pulse_engine/middleware/request_id.py +16 -0
  85. pulse_engine/middleware/security_headers.py +25 -0
  86. pulse_engine/middleware/tenant.py +90 -0
  87. pulse_engine/pipeline/__init__.py +0 -0
  88. pulse_engine/pipeline/config_parser.py +148 -0
  89. pulse_engine/pipeline/expression.py +268 -0
  90. pulse_engine/pipeline/models.py +98 -0
  91. pulse_engine/pipeline/repositories.py +224 -0
  92. pulse_engine/pipeline/router_modules.py +66 -0
  93. pulse_engine/pipeline/router_pipelines.py +198 -0
  94. pulse_engine/pipeline/schemas.py +200 -0
  95. pulse_engine/pipeline/service.py +250 -0
  96. pulse_engine/pipeline/translators/__init__.py +44 -0
  97. pulse_engine/pipeline/translators/airflow_status.py +11 -0
  98. pulse_engine/pipeline/translators/airflow_translator.py +22 -0
  99. pulse_engine/pipeline/translators/base.py +42 -0
  100. pulse_engine/pipeline/translators/prefect_status.py +93 -0
  101. pulse_engine/pipeline/translators/prefect_translator.py +195 -0
  102. pulse_engine/processor/__init__.py +0 -0
  103. pulse_engine/processor/base.py +36 -0
  104. pulse_engine/processor/core/__init__.py +0 -0
  105. pulse_engine/processor/core/analysis.py +148 -0
  106. pulse_engine/processor/core/chunking.py +158 -0
  107. pulse_engine/processor/core/prompts.py +340 -0
  108. pulse_engine/processor/core/topic_splitter.py +105 -0
  109. pulse_engine/processor/defaults/__init__.py +11 -0
  110. pulse_engine/processor/defaults/core_processor.py +12 -0
  111. pulse_engine/processor/defaults/postprocessor.py +12 -0
  112. pulse_engine/processor/defaults/preprocessor.py +12 -0
  113. pulse_engine/processor/llm/__init__.py +0 -0
  114. pulse_engine/processor/llm/provider.py +58 -0
  115. pulse_engine/processor/ocr/gemini.py +52 -0
  116. pulse_engine/processor/pipeline.py +107 -0
  117. pulse_engine/processor/postprocessor/__init__.py +0 -0
  118. pulse_engine/processor/postprocessor/embeddings.py +34 -0
  119. pulse_engine/processor/postprocessor/tasks.py +180 -0
  120. pulse_engine/processor/preprocessor/__init__.py +0 -0
  121. pulse_engine/processor/preprocessor/tasks.py +71 -0
  122. pulse_engine/processor/router.py +192 -0
  123. pulse_engine/processor/schemas.py +167 -0
  124. pulse_engine/registry.py +117 -0
  125. pulse_engine/runners/__init__.py +0 -0
  126. pulse_engine/runners/lambda_runner.py +26 -0
  127. pulse_engine/runners/pipeline_runner.py +43 -0
  128. pulse_engine/runners/prefect_pipeline_flow.py +904 -0
  129. pulse_engine/runners/prefect_runner.py +33 -0
  130. pulse_engine/s3.py +72 -0
  131. pulse_engine/secrets.py +46 -0
  132. pulse_engine/services/__init__.py +0 -0
  133. pulse_engine/services/bootstrap.py +211 -0
  134. pulse_engine/services/opensearch.py +84 -0
  135. pulse_engine/storage/__init__.py +0 -0
  136. pulse_engine/storage/connectors/__init__.py +0 -0
  137. pulse_engine/storage/connectors/athena.py +226 -0
  138. pulse_engine/storage/connectors/base.py +32 -0
  139. pulse_engine/storage/connectors/opensearch.py +344 -0
  140. pulse_engine/storage/knowledge_base.py +68 -0
  141. pulse_engine/storage/router.py +78 -0
  142. pulse_engine/storage/schemas.py +93 -0
  143. pulse_engine/testing/__init__.py +13 -0
  144. pulse_engine/testing/fixtures.py +50 -0
  145. pulse_engine/testing/mocks.py +104 -0
  146. pulse_engine/worker.py +53 -0
  147. pulse_engine-0.2.0.dist-info/METADATA +654 -0
  148. pulse_engine-0.2.0.dist-info/RECORD +150 -0
  149. pulse_engine-0.2.0.dist-info/WHEEL +4 -0
  150. pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,104 @@
1
+ """Mock implementations of engine ABCs for product testing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from pulse_engine.extractor.base import BaseExtractor, ExtractionResult, ExtractorConfig
8
+ from pulse_engine.extractor.orchestrator.base import (
9
+ BaseOrchestratorAdapter,
10
+ OrchestratorRunStatus,
11
+ )
12
+ from pulse_engine.storage.connectors.base import BaseStorageConnector
13
+ from pulse_engine.storage.schemas import (
14
+ ConnectorHealth,
15
+ Document,
16
+ SearchQuery,
17
+ SearchResult,
18
+ StoreResult,
19
+ )
20
+
21
+
22
+ class MockStorageConnector(BaseStorageConnector):
23
+ """In-memory storage connector for testing."""
24
+
25
+ def __init__(self) -> None:
26
+ self._documents: dict[str, dict[str, Document]] = {}
27
+
28
+ async def initialize(self) -> None:
29
+ pass
30
+
31
+ async def store(self, tenant_id: str, documents: list[Document]) -> StoreResult:
32
+ bucket = self._documents.setdefault(tenant_id, {})
33
+ stored = 0
34
+ for doc in documents:
35
+ key = f"{doc.doc_id}:{doc.chunk_id}" if doc.chunk_id else doc.doc_id
36
+ bucket[key] = doc
37
+ stored += 1
38
+ return StoreResult(stored_count=stored, failed_count=0)
39
+
40
+ async def retrieve(self, tenant_id: str, doc_id: str) -> Document | None:
41
+ bucket = self._documents.get(tenant_id, {})
42
+ return bucket.get(doc_id)
43
+
44
+ async def search(self, tenant_id: str, query: SearchQuery) -> SearchResult:
45
+ return SearchResult(hits=[], total=0, took_ms=0)
46
+
47
+ async def delete(self, tenant_id: str, doc_id: str) -> bool:
48
+ bucket = self._documents.get(tenant_id, {})
49
+ return bucket.pop(doc_id, None) is not None
50
+
51
+ async def health_check(self) -> ConnectorHealth:
52
+ return ConnectorHealth(connector="mock", status="up", latency_ms=0.0)
53
+
54
+ async def teardown(self) -> None:
55
+ self._documents.clear()
56
+
57
+
58
+ class MockOrchestratorAdapter(BaseOrchestratorAdapter):
59
+ """No-op orchestrator adapter for testing."""
60
+
61
+ def __init__(self) -> None:
62
+ self._runs: dict[str, str] = {}
63
+
64
+ async def get_run_status(self, run_id: str) -> OrchestratorRunStatus:
65
+ status = self._runs.get(run_id, "unknown")
66
+ return OrchestratorRunStatus(run_id=run_id, status=status)
67
+
68
+ async def cancel_run(self, run_id: str) -> bool:
69
+ self._runs[run_id] = "cancelled"
70
+ return True
71
+
72
+ async def health_check(self) -> bool:
73
+ return True
74
+
75
+ async def create_flow_run(
76
+ self,
77
+ deployment_id: str,
78
+ parameters: dict[str, Any] | None = None,
79
+ ) -> str:
80
+ return ""
81
+
82
+
83
+ class MockExtractor(BaseExtractor):
84
+ """A configurable mock extractor for testing."""
85
+
86
+ def __init__(
87
+ self,
88
+ name: str = "mock",
89
+ job_type: str = "mock_extract",
90
+ results: list[ExtractionResult] | None = None,
91
+ ) -> None:
92
+ self._name = name
93
+ self._job_type = job_type
94
+ self._results = results or []
95
+ self.extract_calls: list[tuple[str, dict[str, Any]]] = []
96
+
97
+ def get_config(self) -> ExtractorConfig:
98
+ return ExtractorConfig(name=self._name, job_type=self._job_type)
99
+
100
+ async def extract(
101
+ self, tenant_id: str, parameters: dict[str, Any]
102
+ ) -> list[ExtractionResult]:
103
+ self.extract_calls.append((tenant_id, parameters))
104
+ return self._results
pulse_engine/worker.py ADDED
@@ -0,0 +1,53 @@
1
+ """Celery application factory.
2
+
3
+ The engine provides the Celery infrastructure (broker, result backend, base
4
+ configuration). Individual *products* register their own tasks by importing
5
+ this app and using ``@celery_app.task``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import TYPE_CHECKING
11
+
12
+ from celery import Celery
13
+
14
+ if TYPE_CHECKING:
15
+ from pulse_engine.config import Settings
16
+ from pulse_engine.registry import ProductManifest
17
+
18
+
19
+ def create_celery_app(
20
+ settings: Settings, manifest: ProductManifest | None = None
21
+ ) -> Celery:
22
+ app = Celery(
23
+ "pulse",
24
+ broker=settings.effective_celery_broker_url,
25
+ backend=settings.effective_celery_result_backend,
26
+ )
27
+
28
+ app.conf.update(
29
+ task_serializer="json",
30
+ accept_content=["json"],
31
+ result_serializer="json",
32
+ timezone="UTC",
33
+ enable_utc=True,
34
+ task_track_started=True,
35
+ task_acks_late=True,
36
+ worker_prefetch_multiplier=1,
37
+ broker_connection_retry_on_startup=True,
38
+ )
39
+
40
+ # Auto-discover tasks in engine and any product modules
41
+ packages = ["pulse_engine"]
42
+ if manifest and manifest.celery_task_modules:
43
+ packages.extend(manifest.celery_task_modules)
44
+ app.autodiscover_tasks(packages)
45
+
46
+ return app
47
+
48
+
49
+ # Module-level instance for Celery CLI (-A pulse_engine.worker) and for product
50
+ # packages to import as ``from pulse_engine.worker import celery_app``.
51
+ from pulse_engine.config import get_settings # noqa: E402
52
+
53
+ celery_app = create_celery_app(get_settings())