pulse-engine 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pulse_engine/__init__.py +0 -0
- pulse_engine/adapters/__init__.py +58 -0
- pulse_engine/adapters/audio_transcription.py +167 -0
- pulse_engine/adapters/batcher.py +36 -0
- pulse_engine/adapters/digital_news.py +128 -0
- pulse_engine/adapters/digital_news_metadata.py +536 -0
- pulse_engine/adapters/exceptions.py +10 -0
- pulse_engine/adapters/models.py +134 -0
- pulse_engine/adapters/opensearch_storage.py +160 -0
- pulse_engine/adapters/speech_content.py +130 -0
- pulse_engine/adapters/speech_metadata.py +374 -0
- pulse_engine/adapters/twitter.py +423 -0
- pulse_engine/adapters/youtube_downloader.py +186 -0
- pulse_engine/adapters/youtube_metadata.py +261 -0
- pulse_engine/api/__init__.py +0 -0
- pulse_engine/api/v1/__init__.py +0 -0
- pulse_engine/api/v1/auth.py +91 -0
- pulse_engine/api/v1/health.py +62 -0
- pulse_engine/api/v1/router.py +16 -0
- pulse_engine/chain_recovery.py +131 -0
- pulse_engine/cli/__init__.py +0 -0
- pulse_engine/cli/main.py +169 -0
- pulse_engine/cli/templates/cookiecutter.json +4 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/.gitignore +13 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/Dockerfile +32 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pipeline.yaml +17 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/pyproject.toml +25 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/src/pulse_{{cookiecutter.product_slug}}/__init__.py +8 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/__init__.py +0 -0
- pulse_engine/cli/templates/pulse-{{cookiecutter.product_name}}/tests/unit/test_manifest.py +15 -0
- pulse_engine/client.py +95 -0
- pulse_engine/config.py +157 -0
- pulse_engine/core/__init__.py +0 -0
- pulse_engine/core/error_handlers.py +64 -0
- pulse_engine/core/exceptions.py +67 -0
- pulse_engine/core/job_token.py +109 -0
- pulse_engine/core/logging.py +45 -0
- pulse_engine/core/scope.py +23 -0
- pulse_engine/core/security.py +130 -0
- pulse_engine/database.py +30 -0
- pulse_engine/dependencies.py +166 -0
- pulse_engine/deployment/__init__.py +0 -0
- pulse_engine/deployment/backend_deployment_repository.py +83 -0
- pulse_engine/deployment/backends/__init__.py +0 -0
- pulse_engine/deployment/backends/base.py +50 -0
- pulse_engine/deployment/backends/exceptions.py +20 -0
- pulse_engine/deployment/backends/native_lambda.py +125 -0
- pulse_engine/deployment/backends/prefect_ecs.py +116 -0
- pulse_engine/deployment/backends/prefect_k8s.py +131 -0
- pulse_engine/deployment/backends/registry.py +50 -0
- pulse_engine/deployment/infra_provisioner.py +285 -0
- pulse_engine/deployment/job_launcher.py +178 -0
- pulse_engine/deployment/models.py +48 -0
- pulse_engine/deployment/repository.py +54 -0
- pulse_engine/deployment/router.py +22 -0
- pulse_engine/deployment/schemas.py +18 -0
- pulse_engine/deployment/service.py +65 -0
- pulse_engine/extractor/__init__.py +0 -0
- pulse_engine/extractor/adapters/__init__.py +0 -0
- pulse_engine/extractor/base.py +48 -0
- pulse_engine/extractor/models.py +50 -0
- pulse_engine/extractor/orchestrator/__init__.py +15 -0
- pulse_engine/extractor/orchestrator/base.py +34 -0
- pulse_engine/extractor/orchestrator/noop.py +37 -0
- pulse_engine/extractor/orchestrator/prefect.py +163 -0
- pulse_engine/extractor/repository.py +163 -0
- pulse_engine/extractor/router.py +102 -0
- pulse_engine/extractor/schemas.py +93 -0
- pulse_engine/extractor/service.py +431 -0
- pulse_engine/extractor/stage_models.py +36 -0
- pulse_engine/extractor/stage_repository.py +109 -0
- pulse_engine/main.py +195 -0
- pulse_engine/mcp/__init__.py +0 -0
- pulse_engine/mcp/__main__.py +5 -0
- pulse_engine/mcp/server.py +108 -0
- pulse_engine/mcp/tools_jobs.py +159 -0
- pulse_engine/mcp/tools_kb.py +88 -0
- pulse_engine/mcp/tools_modules.py +115 -0
- pulse_engine/mcp/tools_pipelines.py +215 -0
- pulse_engine/mcp/tools_processor.py +208 -0
- pulse_engine/middleware/__init__.py +0 -0
- pulse_engine/middleware/rate_limit.py +144 -0
- pulse_engine/middleware/request_id.py +16 -0
- pulse_engine/middleware/security_headers.py +25 -0
- pulse_engine/middleware/tenant.py +90 -0
- pulse_engine/pipeline/__init__.py +0 -0
- pulse_engine/pipeline/config_parser.py +148 -0
- pulse_engine/pipeline/expression.py +268 -0
- pulse_engine/pipeline/models.py +98 -0
- pulse_engine/pipeline/repositories.py +224 -0
- pulse_engine/pipeline/router_modules.py +66 -0
- pulse_engine/pipeline/router_pipelines.py +198 -0
- pulse_engine/pipeline/schemas.py +200 -0
- pulse_engine/pipeline/service.py +250 -0
- pulse_engine/pipeline/translators/__init__.py +44 -0
- pulse_engine/pipeline/translators/airflow_status.py +11 -0
- pulse_engine/pipeline/translators/airflow_translator.py +22 -0
- pulse_engine/pipeline/translators/base.py +42 -0
- pulse_engine/pipeline/translators/prefect_status.py +93 -0
- pulse_engine/pipeline/translators/prefect_translator.py +195 -0
- pulse_engine/processor/__init__.py +0 -0
- pulse_engine/processor/base.py +36 -0
- pulse_engine/processor/core/__init__.py +0 -0
- pulse_engine/processor/core/analysis.py +148 -0
- pulse_engine/processor/core/chunking.py +158 -0
- pulse_engine/processor/core/prompts.py +340 -0
- pulse_engine/processor/core/topic_splitter.py +105 -0
- pulse_engine/processor/defaults/__init__.py +11 -0
- pulse_engine/processor/defaults/core_processor.py +12 -0
- pulse_engine/processor/defaults/postprocessor.py +12 -0
- pulse_engine/processor/defaults/preprocessor.py +12 -0
- pulse_engine/processor/llm/__init__.py +0 -0
- pulse_engine/processor/llm/provider.py +58 -0
- pulse_engine/processor/ocr/gemini.py +52 -0
- pulse_engine/processor/pipeline.py +107 -0
- pulse_engine/processor/postprocessor/__init__.py +0 -0
- pulse_engine/processor/postprocessor/embeddings.py +34 -0
- pulse_engine/processor/postprocessor/tasks.py +180 -0
- pulse_engine/processor/preprocessor/__init__.py +0 -0
- pulse_engine/processor/preprocessor/tasks.py +71 -0
- pulse_engine/processor/router.py +192 -0
- pulse_engine/processor/schemas.py +167 -0
- pulse_engine/registry.py +117 -0
- pulse_engine/runners/__init__.py +0 -0
- pulse_engine/runners/lambda_runner.py +26 -0
- pulse_engine/runners/pipeline_runner.py +43 -0
- pulse_engine/runners/prefect_pipeline_flow.py +904 -0
- pulse_engine/runners/prefect_runner.py +33 -0
- pulse_engine/s3.py +72 -0
- pulse_engine/secrets.py +46 -0
- pulse_engine/services/__init__.py +0 -0
- pulse_engine/services/bootstrap.py +211 -0
- pulse_engine/services/opensearch.py +84 -0
- pulse_engine/storage/__init__.py +0 -0
- pulse_engine/storage/connectors/__init__.py +0 -0
- pulse_engine/storage/connectors/athena.py +226 -0
- pulse_engine/storage/connectors/base.py +32 -0
- pulse_engine/storage/connectors/opensearch.py +344 -0
- pulse_engine/storage/knowledge_base.py +68 -0
- pulse_engine/storage/router.py +78 -0
- pulse_engine/storage/schemas.py +93 -0
- pulse_engine/testing/__init__.py +13 -0
- pulse_engine/testing/fixtures.py +50 -0
- pulse_engine/testing/mocks.py +104 -0
- pulse_engine/worker.py +53 -0
- pulse_engine-0.2.0.dist-info/METADATA +654 -0
- pulse_engine-0.2.0.dist-info/RECORD +150 -0
- pulse_engine-0.2.0.dist-info/WHEEL +4 -0
- pulse_engine-0.2.0.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""YouTube Data API v3 channel metadata adapter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from datetime import UTC, datetime, timedelta
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import httpx
|
|
10
|
+
import structlog
|
|
11
|
+
|
|
12
|
+
from .models import VideoMetadata
|
|
13
|
+
|
|
14
|
+
logger = structlog.get_logger(__name__)
|
|
15
|
+
|
|
16
|
+
_YOUTUBE_API_BASE = "https://www.googleapis.com/youtube/v3"
|
|
17
|
+
_MAX_PAGE_SIZE = 50
|
|
18
|
+
|
|
19
|
+
_ISO8601_DURATION_RE = re.compile(r"PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _parse_duration(duration: str) -> int:
|
|
23
|
+
"""Parse an ISO 8601 duration string (e.g. ``PT15M33S``) to seconds."""
|
|
24
|
+
m = _ISO8601_DURATION_RE.search(duration)
|
|
25
|
+
if not m:
|
|
26
|
+
return 0
|
|
27
|
+
hours = int(m.group(1) or 0)
|
|
28
|
+
minutes = int(m.group(2) or 0)
|
|
29
|
+
seconds = int(m.group(3) or 0)
|
|
30
|
+
return hours * 3600 + minutes * 60 + seconds
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _is_live_or_upcoming(video: dict[str, Any]) -> bool:
|
|
34
|
+
lbc = (video.get("snippet") or {}).get("liveBroadcastContent") or "none"
|
|
35
|
+
return lbc in ("live", "upcoming")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class YouTubeMetadataAdapter:
|
|
39
|
+
"""Discovers video metadata from a YouTube channel via the Data API v3.
|
|
40
|
+
|
|
41
|
+
Mirrors the ``DigitalNewsMetadataAdapter`` interface: construct with
|
|
42
|
+
connection config, call ``fetch()`` to get a list of ``VideoMetadata``.
|
|
43
|
+
|
|
44
|
+
Usage::
|
|
45
|
+
|
|
46
|
+
adapter = YouTubeMetadataAdapter(
|
|
47
|
+
channel_name="@mkbhd",
|
|
48
|
+
api_key="YOUR_YT_API_KEY",
|
|
49
|
+
)
|
|
50
|
+
videos = await adapter.fetch(since=timedelta(days=7), max_videos=50)
|
|
51
|
+
"""
|
|
52
|
+
|
|
53
|
+
def __init__(self, channel_name: str, api_key: str) -> None:
|
|
54
|
+
self._channel_name = channel_name
|
|
55
|
+
self._api_key = api_key
|
|
56
|
+
|
|
57
|
+
async def fetch(
|
|
58
|
+
self,
|
|
59
|
+
since: timedelta | None = None,
|
|
60
|
+
max_videos: int = 50,
|
|
61
|
+
min_duration_seconds: int = 0,
|
|
62
|
+
skip_live_streams: bool = True,
|
|
63
|
+
) -> list[VideoMetadata]:
|
|
64
|
+
"""Fetch video metadata from the channel.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
since: Only return videos published within this window.
|
|
68
|
+
max_videos: Upper bound on videos returned.
|
|
69
|
+
min_duration_seconds: Skip videos shorter than this threshold.
|
|
70
|
+
skip_live_streams: Drop live and scheduled broadcasts.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
List of ``VideoMetadata`` ordered newest-first from the API.
|
|
74
|
+
"""
|
|
75
|
+
published_after: str | None = None
|
|
76
|
+
if since is not None:
|
|
77
|
+
cutoff = datetime.now(UTC) - since
|
|
78
|
+
published_after = cutoff.isoformat()
|
|
79
|
+
|
|
80
|
+
logger.info(
|
|
81
|
+
"youtube_metadata_fetch_started",
|
|
82
|
+
channel=self._channel_name,
|
|
83
|
+
since=str(since),
|
|
84
|
+
max_videos=max_videos,
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
88
|
+
channel_id = await self._resolve_channel_id(client)
|
|
89
|
+
playlist_id = await self._get_uploads_playlist_id(client, channel_id)
|
|
90
|
+
video_ids = await self._list_playlist_video_ids(
|
|
91
|
+
client,
|
|
92
|
+
playlist_id,
|
|
93
|
+
max_videos=max_videos,
|
|
94
|
+
published_after=published_after,
|
|
95
|
+
)
|
|
96
|
+
raw_videos = await self._get_video_details(client, video_ids)
|
|
97
|
+
|
|
98
|
+
videos = self._apply_filters(
|
|
99
|
+
raw_videos,
|
|
100
|
+
min_duration_seconds=min_duration_seconds,
|
|
101
|
+
skip_live_streams=skip_live_streams,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
result = [self._to_model(v) for v in videos]
|
|
105
|
+
logger.info(
|
|
106
|
+
"youtube_metadata_fetch_completed",
|
|
107
|
+
channel=self._channel_name,
|
|
108
|
+
returned=len(result),
|
|
109
|
+
)
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
# ------------------------------------------------------------------
|
|
113
|
+
# Private helpers
|
|
114
|
+
# ------------------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
async def _resolve_channel_id(self, client: httpx.AsyncClient) -> str:
|
|
117
|
+
"""Resolve a channel handle or display name to a channel ID."""
|
|
118
|
+
handle = self._channel_name.lstrip("@")
|
|
119
|
+
resp = await client.get(
|
|
120
|
+
f"{_YOUTUBE_API_BASE}/channels",
|
|
121
|
+
params={"part": "id", "forHandle": handle, "key": self._api_key},
|
|
122
|
+
)
|
|
123
|
+
resp.raise_for_status()
|
|
124
|
+
items = resp.json().get("items", [])
|
|
125
|
+
if items:
|
|
126
|
+
return str(items[0]["id"])
|
|
127
|
+
|
|
128
|
+
# Fall back to search by display name.
|
|
129
|
+
resp = await client.get(
|
|
130
|
+
f"{_YOUTUBE_API_BASE}/search",
|
|
131
|
+
params={
|
|
132
|
+
"part": "snippet",
|
|
133
|
+
"type": "channel",
|
|
134
|
+
"q": self._channel_name,
|
|
135
|
+
"maxResults": 1,
|
|
136
|
+
"key": self._api_key,
|
|
137
|
+
},
|
|
138
|
+
)
|
|
139
|
+
resp.raise_for_status()
|
|
140
|
+
items = resp.json().get("items", [])
|
|
141
|
+
if not items:
|
|
142
|
+
raise ValueError(f"YouTube channel not found: '{self._channel_name}'")
|
|
143
|
+
return str(items[0]["snippet"]["channelId"])
|
|
144
|
+
|
|
145
|
+
async def _get_uploads_playlist_id(
|
|
146
|
+
self, client: httpx.AsyncClient, channel_id: str
|
|
147
|
+
) -> str:
|
|
148
|
+
resp = await client.get(
|
|
149
|
+
f"{_YOUTUBE_API_BASE}/channels",
|
|
150
|
+
params={"part": "contentDetails", "id": channel_id, "key": self._api_key},
|
|
151
|
+
)
|
|
152
|
+
resp.raise_for_status()
|
|
153
|
+
items = resp.json().get("items", [])
|
|
154
|
+
if not items:
|
|
155
|
+
raise ValueError(f"Channel not found: {channel_id}")
|
|
156
|
+
return str(items[0]["contentDetails"]["relatedPlaylists"]["uploads"])
|
|
157
|
+
|
|
158
|
+
async def _list_playlist_video_ids(
|
|
159
|
+
self,
|
|
160
|
+
client: httpx.AsyncClient,
|
|
161
|
+
playlist_id: str,
|
|
162
|
+
max_videos: int,
|
|
163
|
+
published_after: str | None,
|
|
164
|
+
) -> list[str]:
|
|
165
|
+
video_ids: list[str] = []
|
|
166
|
+
page_token: str | None = None
|
|
167
|
+
|
|
168
|
+
while len(video_ids) < max_videos:
|
|
169
|
+
page_size = min(_MAX_PAGE_SIZE, max_videos - len(video_ids))
|
|
170
|
+
params: dict[str, Any] = {
|
|
171
|
+
"part": "contentDetails,snippet",
|
|
172
|
+
"playlistId": playlist_id,
|
|
173
|
+
"maxResults": page_size,
|
|
174
|
+
"key": self._api_key,
|
|
175
|
+
}
|
|
176
|
+
if page_token:
|
|
177
|
+
params["pageToken"] = page_token
|
|
178
|
+
|
|
179
|
+
resp = await client.get(f"{_YOUTUBE_API_BASE}/playlistItems", params=params)
|
|
180
|
+
resp.raise_for_status()
|
|
181
|
+
data = resp.json()
|
|
182
|
+
|
|
183
|
+
for item in data.get("items", []):
|
|
184
|
+
vid_id = item["contentDetails"]["videoId"]
|
|
185
|
+
if published_after:
|
|
186
|
+
published = item["snippet"].get("publishedAt", "")
|
|
187
|
+
if published < published_after:
|
|
188
|
+
# Playlist is newest-first; older video means we're done.
|
|
189
|
+
return video_ids
|
|
190
|
+
video_ids.append(vid_id)
|
|
191
|
+
|
|
192
|
+
page_token = data.get("nextPageToken")
|
|
193
|
+
if not page_token:
|
|
194
|
+
break
|
|
195
|
+
|
|
196
|
+
return video_ids
|
|
197
|
+
|
|
198
|
+
async def _get_video_details(
|
|
199
|
+
self, client: httpx.AsyncClient, video_ids: list[str]
|
|
200
|
+
) -> list[dict[str, Any]]:
|
|
201
|
+
if not video_ids:
|
|
202
|
+
return []
|
|
203
|
+
details: list[dict[str, Any]] = []
|
|
204
|
+
for i in range(0, len(video_ids), _MAX_PAGE_SIZE):
|
|
205
|
+
batch = video_ids[i : i + _MAX_PAGE_SIZE]
|
|
206
|
+
resp = await client.get(
|
|
207
|
+
f"{_YOUTUBE_API_BASE}/videos",
|
|
208
|
+
params={
|
|
209
|
+
"part": "snippet,contentDetails,statistics",
|
|
210
|
+
"id": ",".join(batch),
|
|
211
|
+
"key": self._api_key,
|
|
212
|
+
},
|
|
213
|
+
)
|
|
214
|
+
resp.raise_for_status()
|
|
215
|
+
details.extend(resp.json().get("items", []))
|
|
216
|
+
return details
|
|
217
|
+
|
|
218
|
+
def _apply_filters(
|
|
219
|
+
self,
|
|
220
|
+
videos: list[dict[str, Any]],
|
|
221
|
+
min_duration_seconds: int,
|
|
222
|
+
skip_live_streams: bool,
|
|
223
|
+
) -> list[dict[str, Any]]:
|
|
224
|
+
result: list[dict[str, Any]] = []
|
|
225
|
+
for v in videos:
|
|
226
|
+
if skip_live_streams and _is_live_or_upcoming(v):
|
|
227
|
+
continue
|
|
228
|
+
if min_duration_seconds > 0:
|
|
229
|
+
details = v.get("contentDetails", {})
|
|
230
|
+
dur = _parse_duration(details.get("duration", "PT0S"))
|
|
231
|
+
if dur < min_duration_seconds:
|
|
232
|
+
continue
|
|
233
|
+
result.append(v)
|
|
234
|
+
return result
|
|
235
|
+
|
|
236
|
+
@staticmethod
|
|
237
|
+
def _to_model(v: dict[str, Any]) -> VideoMetadata:
|
|
238
|
+
snippet = v["snippet"]
|
|
239
|
+
pub_at = datetime.fromisoformat(snippet["publishedAt"].replace("Z", "+00:00"))
|
|
240
|
+
thumbnails = snippet.get("thumbnails", {})
|
|
241
|
+
thumbnail_url = (
|
|
242
|
+
(thumbnails.get("maxres") or thumbnails.get("high") or thumbnails.get("medium") or {})
|
|
243
|
+
.get("url")
|
|
244
|
+
)
|
|
245
|
+
return VideoMetadata(
|
|
246
|
+
video_id=v["id"],
|
|
247
|
+
url=f"https://www.youtube.com/watch?v={v['id']}",
|
|
248
|
+
title=snippet["title"],
|
|
249
|
+
channel_id=snippet["channelId"],
|
|
250
|
+
channel_name=snippet["channelTitle"],
|
|
251
|
+
published_at=pub_at,
|
|
252
|
+
description=snippet.get("description", ""),
|
|
253
|
+
tags=snippet.get("tags", []),
|
|
254
|
+
duration_seconds=_parse_duration(
|
|
255
|
+
v.get("contentDetails", {}).get("duration", "PT0S")
|
|
256
|
+
),
|
|
257
|
+
view_count=v.get("statistics", {}).get("viewCount"),
|
|
258
|
+
like_count=v.get("statistics", {}).get("likeCount"),
|
|
259
|
+
comment_count=v.get("statistics", {}).get("commentCount"),
|
|
260
|
+
thumbnail_url=thumbnail_url,
|
|
261
|
+
)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Authentication endpoint — exchanges email/password for Cognito tokens."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import hashlib
|
|
5
|
+
import hmac
|
|
6
|
+
|
|
7
|
+
import boto3
|
|
8
|
+
import structlog
|
|
9
|
+
from botocore.exceptions import ClientError
|
|
10
|
+
from fastapi import APIRouter, HTTPException, Request
|
|
11
|
+
from jose import jwt as jose_jwt
|
|
12
|
+
from pydantic import BaseModel, EmailStr, Field
|
|
13
|
+
|
|
14
|
+
from pulse_engine.config import get_settings
|
|
15
|
+
from pulse_engine.middleware.rate_limit import check_rate_limit
|
|
16
|
+
|
|
17
|
+
logger = structlog.get_logger()
|
|
18
|
+
|
|
19
|
+
router = APIRouter(prefix="/auth", tags=["Auth"])
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class LoginRequest(BaseModel):
|
|
23
|
+
email: EmailStr
|
|
24
|
+
password: str = Field(min_length=8, max_length=128)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LoginResponse(BaseModel):
|
|
28
|
+
id_token: str
|
|
29
|
+
access_token: str
|
|
30
|
+
refresh_token: str
|
|
31
|
+
expires_in: int
|
|
32
|
+
token_type: str = "Bearer"
|
|
33
|
+
tenant_id: str
|
|
34
|
+
email: str
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _compute_secret_hash(email: str, client_id: str, client_secret: str) -> str:
|
|
38
|
+
message = email + client_id
|
|
39
|
+
digest = hmac.new(
|
|
40
|
+
client_secret.encode("utf-8"),
|
|
41
|
+
message.encode("utf-8"),
|
|
42
|
+
hashlib.sha256,
|
|
43
|
+
).digest()
|
|
44
|
+
return base64.b64encode(digest).decode("utf-8")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@router.post("/login", response_model=LoginResponse)
|
|
48
|
+
async def login(request: Request, body: LoginRequest) -> LoginResponse:
|
|
49
|
+
"""Authenticate with email and password, returns JWT tokens."""
|
|
50
|
+
# Strict per-IP rate limit: 5 attempts per 60 seconds
|
|
51
|
+
check_rate_limit(request, scope="login", max_requests=5, window_seconds=60)
|
|
52
|
+
settings = get_settings()
|
|
53
|
+
|
|
54
|
+
client_id = settings.cognito_app_client_id
|
|
55
|
+
client_secret = settings.cognito_app_client_secret
|
|
56
|
+
|
|
57
|
+
auth_params: dict[str, str] = {
|
|
58
|
+
"USERNAME": body.email,
|
|
59
|
+
"PASSWORD": body.password,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if client_secret:
|
|
63
|
+
auth_params["SECRET_HASH"] = _compute_secret_hash(
|
|
64
|
+
body.email, client_id, client_secret
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
cognito = boto3.client("cognito-idp", region_name=settings.aws_region)
|
|
69
|
+
result = cognito.initiate_auth(
|
|
70
|
+
ClientId=client_id,
|
|
71
|
+
AuthFlow="USER_PASSWORD_AUTH",
|
|
72
|
+
AuthParameters=auth_params,
|
|
73
|
+
)
|
|
74
|
+
except ClientError as e:
|
|
75
|
+
code = e.response["Error"]["Code"]
|
|
76
|
+
if code in ("NotAuthorizedException", "UserNotFoundException"):
|
|
77
|
+
raise HTTPException(status_code=401, detail="Invalid email or password")
|
|
78
|
+
logger.error("cognito_auth_error", error=str(e), code=code)
|
|
79
|
+
raise HTTPException(status_code=500, detail="Authentication service error")
|
|
80
|
+
|
|
81
|
+
auth = result["AuthenticationResult"]
|
|
82
|
+
# Decode id_token (without verification) to extract tenant_id
|
|
83
|
+
claims = jose_jwt.get_unverified_claims(auth["IdToken"])
|
|
84
|
+
return LoginResponse(
|
|
85
|
+
id_token=auth["IdToken"],
|
|
86
|
+
access_token=auth["AccessToken"],
|
|
87
|
+
refresh_token=auth["RefreshToken"],
|
|
88
|
+
expires_in=auth["ExpiresIn"],
|
|
89
|
+
tenant_id=claims.get("custom:tenant_id", ""),
|
|
90
|
+
email=claims.get("email", body.email),
|
|
91
|
+
)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import httpx
|
|
2
|
+
import structlog
|
|
3
|
+
from fastapi import APIRouter, Depends, Request
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
from pulse_engine.config import Settings, get_settings
|
|
7
|
+
from pulse_engine.services.opensearch import OpenSearchService
|
|
8
|
+
|
|
9
|
+
router = APIRouter()
|
|
10
|
+
logger = structlog.get_logger()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ServiceHealth(BaseModel):
|
|
14
|
+
opensearch: str
|
|
15
|
+
prefect: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class HealthResponse(BaseModel):
|
|
19
|
+
status: str
|
|
20
|
+
version: str
|
|
21
|
+
environment: str
|
|
22
|
+
services: ServiceHealth
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def _check_opensearch(opensearch: OpenSearchService) -> str:
|
|
26
|
+
if await opensearch.ping():
|
|
27
|
+
return "up"
|
|
28
|
+
return "down"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
async def _check_prefect(url: str) -> str:
|
|
32
|
+
try:
|
|
33
|
+
async with httpx.AsyncClient(timeout=5.0) as client:
|
|
34
|
+
resp = await client.get(f"{url}/health")
|
|
35
|
+
resp.raise_for_status()
|
|
36
|
+
return "up"
|
|
37
|
+
except Exception:
|
|
38
|
+
logger.warning("prefect_health_check_failed", url=url)
|
|
39
|
+
return "down"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@router.get("/health", response_model=HealthResponse)
|
|
43
|
+
async def health_check(
|
|
44
|
+
request: Request,
|
|
45
|
+
settings: Settings = Depends(get_settings),
|
|
46
|
+
) -> HealthResponse:
|
|
47
|
+
opensearch: OpenSearchService = request.app.state.opensearch
|
|
48
|
+
opensearch_status = await _check_opensearch(opensearch)
|
|
49
|
+
prefect_status = await _check_prefect(settings.prefect_api_url)
|
|
50
|
+
|
|
51
|
+
all_up = opensearch_status == "up" and prefect_status == "up"
|
|
52
|
+
overall = "ok" if all_up else "degraded"
|
|
53
|
+
|
|
54
|
+
return HealthResponse(
|
|
55
|
+
status=overall,
|
|
56
|
+
version=settings.app_version,
|
|
57
|
+
environment=settings.app_env,
|
|
58
|
+
services=ServiceHealth(
|
|
59
|
+
opensearch=opensearch_status,
|
|
60
|
+
prefect=prefect_status,
|
|
61
|
+
),
|
|
62
|
+
)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from fastapi import APIRouter
|
|
2
|
+
|
|
3
|
+
from pulse_engine.api.v1.auth import router as auth_router
|
|
4
|
+
from pulse_engine.api.v1.health import router as health_router
|
|
5
|
+
from pulse_engine.deployment.router import router as deployments_router
|
|
6
|
+
from pulse_engine.extractor.router import router as jobs_router
|
|
7
|
+
from pulse_engine.processor.router import router as process_router
|
|
8
|
+
from pulse_engine.storage.router import router as kb_router
|
|
9
|
+
|
|
10
|
+
v1_router = APIRouter(prefix="/api/v1")
|
|
11
|
+
v1_router.include_router(auth_router)
|
|
12
|
+
v1_router.include_router(health_router)
|
|
13
|
+
v1_router.include_router(kb_router)
|
|
14
|
+
v1_router.include_router(jobs_router)
|
|
15
|
+
v1_router.include_router(process_router)
|
|
16
|
+
v1_router.include_router(deployments_router)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Background task that monitors Prefect for stalled or failed chained flows."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from datetime import UTC, datetime, timedelta
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from pulse_engine.config import Settings
|
|
12
|
+
from pulse_engine.deployment.job_launcher import JobLauncher
|
|
13
|
+
from pulse_engine.extractor.orchestrator.base import BaseOrchestratorAdapter
|
|
14
|
+
from pulse_engine.extractor.repository import JobRepository
|
|
15
|
+
from pulse_engine.extractor.stage_repository import StageRepository
|
|
16
|
+
|
|
17
|
+
logger = structlog.get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
_NEXT_STAGE = {
|
|
20
|
+
"extraction": ("processor", "processing"),
|
|
21
|
+
"processing": ("storage", "storage"),
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ChainRecoveryTask:
|
|
26
|
+
"""Polls Prefect for stalled chained flows and auto-recovers."""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
stage_repo: StageRepository,
|
|
31
|
+
job_repo: JobRepository,
|
|
32
|
+
job_launcher: JobLauncher,
|
|
33
|
+
orchestrator: BaseOrchestratorAdapter,
|
|
34
|
+
settings: Settings,
|
|
35
|
+
) -> None:
|
|
36
|
+
self._stage_repo = stage_repo
|
|
37
|
+
self._job_repo = job_repo
|
|
38
|
+
self._job_launcher = job_launcher
|
|
39
|
+
self._orchestrator = orchestrator
|
|
40
|
+
self._grace_seconds = settings.pulse_chain_grace_period_seconds
|
|
41
|
+
|
|
42
|
+
async def check_once(self) -> None:
|
|
43
|
+
"""Check running stages against Prefect for failures."""
|
|
44
|
+
stages = await self._stage_repo.get_running_stages()
|
|
45
|
+
for stage in stages:
|
|
46
|
+
if not stage.prefect_flow_run_id:
|
|
47
|
+
continue
|
|
48
|
+
try:
|
|
49
|
+
run_status = await self._orchestrator.get_run_status(
|
|
50
|
+
stage.prefect_flow_run_id
|
|
51
|
+
)
|
|
52
|
+
if run_status.status in ("failed", "cancelled"):
|
|
53
|
+
await self._stage_repo.update_status(
|
|
54
|
+
stage.id,
|
|
55
|
+
run_status.status,
|
|
56
|
+
)
|
|
57
|
+
job = await self._job_repo.get_by_id(stage.job_id)
|
|
58
|
+
if job:
|
|
59
|
+
await self._job_repo.update_status(
|
|
60
|
+
stage.job_id, job.tenant_id, "failed"
|
|
61
|
+
)
|
|
62
|
+
logger.warning(
|
|
63
|
+
"chain_recovery_stage_failed",
|
|
64
|
+
job_id=stage.job_id,
|
|
65
|
+
stage=stage.stage,
|
|
66
|
+
flow_run_status=run_status.status,
|
|
67
|
+
)
|
|
68
|
+
except Exception:
|
|
69
|
+
logger.warning(
|
|
70
|
+
"chain_recovery_check_error",
|
|
71
|
+
stage_id=stage.id,
|
|
72
|
+
exc_info=True,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
async def check_stalled_chains(self) -> None:
|
|
76
|
+
"""Auto-trigger next stage for completed stages where chain stalled."""
|
|
77
|
+
cutoff = datetime.now(UTC) - timedelta(
|
|
78
|
+
seconds=self._grace_seconds,
|
|
79
|
+
)
|
|
80
|
+
stages = await self._stage_repo.get_completed_unchained_stages()
|
|
81
|
+
|
|
82
|
+
for stage in stages:
|
|
83
|
+
if stage.completed_at and stage.completed_at < cutoff:
|
|
84
|
+
next_info = _NEXT_STAGE.get(stage.stage)
|
|
85
|
+
if not next_info:
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
next_stage_key, next_stage_name = next_info
|
|
89
|
+
job = await self._job_repo.get_by_id(stage.job_id)
|
|
90
|
+
if not job:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
params = getattr(job, "parameters", {}) or {}
|
|
94
|
+
if not params.get("chain", False):
|
|
95
|
+
continue
|
|
96
|
+
|
|
97
|
+
orchestrator = params.get("orchestrator", "prefect")
|
|
98
|
+
compute = params.get("compute", "ecs")
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
result = await self._job_launcher.launch(
|
|
102
|
+
job_id=job.job_id,
|
|
103
|
+
tenant_id=job.tenant_id,
|
|
104
|
+
product=job.product,
|
|
105
|
+
stage=next_stage_key,
|
|
106
|
+
orchestrator=orchestrator,
|
|
107
|
+
compute=compute,
|
|
108
|
+
chain=True,
|
|
109
|
+
config=params.get("config", {}),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
await self._stage_repo.create(
|
|
113
|
+
job_id=job.job_id,
|
|
114
|
+
stage=next_stage_name,
|
|
115
|
+
prefect_flow_run_id=result.flow_run_id,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
logger.info(
|
|
119
|
+
"chain_recovery_triggered_next_stage",
|
|
120
|
+
job_id=job.job_id,
|
|
121
|
+
from_stage=stage.stage,
|
|
122
|
+
to_stage=next_stage_name,
|
|
123
|
+
flow_run_id=result.flow_run_id,
|
|
124
|
+
)
|
|
125
|
+
except Exception:
|
|
126
|
+
logger.warning(
|
|
127
|
+
"chain_recovery_trigger_error",
|
|
128
|
+
job_id=stage.job_id,
|
|
129
|
+
stage=stage.stage,
|
|
130
|
+
exc_info=True,
|
|
131
|
+
)
|
|
File without changes
|