heimdex-worker-sdk 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. heimdex_worker_sdk-0.1.0/.claude/CLAUDE.md +62 -0
  2. heimdex_worker_sdk-0.1.0/.github/workflows/ci.yml +24 -0
  3. heimdex_worker_sdk-0.1.0/.github/workflows/release.yml +62 -0
  4. heimdex_worker_sdk-0.1.0/.gitignore +41 -0
  5. heimdex_worker_sdk-0.1.0/PKG-INFO +10 -0
  6. heimdex_worker_sdk-0.1.0/README.md +46 -0
  7. heimdex_worker_sdk-0.1.0/pyproject.toml +24 -0
  8. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/__init__.py +76 -0
  9. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/aircloud_client.py +198 -0
  10. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/content_type.py +52 -0
  11. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/drive_keys.py +53 -0
  12. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/gpu_orchestrator.py +394 -0
  13. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/internal_api.py +545 -0
  14. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/message_adapters.py +139 -0
  15. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/s3.py +172 -0
  16. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/settings.py +152 -0
  17. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/sqs_client.py +133 -0
  18. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/sqs_consumer.py +305 -0
  19. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/youtube_api.py +191 -0
  20. heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/youtube_keys.py +62 -0
  21. heimdex_worker_sdk-0.1.0/tests/test_drive_keys.py +84 -0
  22. heimdex_worker_sdk-0.1.0/tests/test_gpu_orchestrator.py +415 -0
  23. heimdex_worker_sdk-0.1.0/tests/test_internal_api.py +749 -0
  24. heimdex_worker_sdk-0.1.0/tests/test_message_adapters.py +115 -0
  25. heimdex_worker_sdk-0.1.0/tests/test_s3.py +96 -0
  26. heimdex_worker_sdk-0.1.0/tests/test_settings.py +111 -0
  27. heimdex_worker_sdk-0.1.0/tests/test_sqs_client.py +209 -0
  28. heimdex_worker_sdk-0.1.0/tests/test_sqs_consumer.py +251 -0
  29. heimdex_worker_sdk-0.1.0/tests/test_youtube_api.py +142 -0
  30. heimdex_worker_sdk-0.1.0/tests/test_youtube_keys.py +115 -0
@@ -0,0 +1,62 @@
1
+ # Heimdex Worker SDK
2
+
3
+ Shared worker utilities for the Heimdex ecosystem.
4
+
5
+ ## Quick Reference
6
+
7
+ ```bash
8
+ pip install -e ".[dev]"
9
+ pytest -v
10
+ ```
11
+
12
+ ## Design Philosophy
13
+
14
+ This package provides shared infrastructure for Heimdex workers:
15
+ - S3/MinIO dual-mode client
16
+ - SQS consumer with heartbeat and visibility extension
17
+ - Aircloud GPU worker lifecycle management (start/stop/scale)
18
+ - Internal API client for livecommerce endpoints
19
+ - Deterministic S3 key generation helpers
20
+ - MIME type classification
21
+
22
+ Dependencies are intentionally minimal: `pydantic-settings`, `boto3`, `requests`.
23
+
24
+ ## Package Structure
25
+
26
+ ```
27
+ src/heimdex_worker_sdk/
28
+ ├── aircloud_client.py # Aircloud External API (start/stop/scale/status)
29
+ ├── gpu_orchestrator.py # Auto start/stop GPU workers based on SQS queue depth
30
+ ├── sqs_client.py # Thin boto3 SQS wrapper
31
+ ├── sqs_consumer.py # Background SQS polling loop with heartbeat
32
+ ├── s3.py # S3/MinIO dual-mode client
33
+ ├── settings.py # WorkerSettings (pydantic-settings, 60+ env vars)
34
+ ├── internal_api.py # HTTP client for livecommerce /internal/drive/* endpoints
35
+ ├── drive_keys.py # Deterministic S3 key generation (Drive files)
36
+ ├── youtube_keys.py # S3 key generation (YouTube files)
37
+ ├── youtube_api.py # HTTP client for YouTube internal endpoints
38
+ ├── content_type.py # MIME type classification
39
+ └── message_adapters.py # SQS message → dataclass converters
40
+ ```
41
+
42
+ ## Consumers
43
+
44
+ Changes here affect ALL downstream repos. Always verify impact before merging.
45
+
46
+ | Consumer | What it uses |
47
+ |---------|-------------|
48
+ | dev-heimdex-for-livecommerce (8 workers + API) | Full SDK |
49
+ | dev-heimdex-playground | aircloud_client, gpu_orchestrator |
50
+
51
+ ## Release
52
+
53
+ - Trigger: Git tag `v*`
54
+ - Pipeline: Test -> Build -> Publish to PyPI -> GitHub Release
55
+ - Current version: 0.1.0
56
+
57
+ ## Rules
58
+
59
+ - Keep dependencies minimal (no torch, cv2, or heavy ML libs)
60
+ - All Aircloud calls must be fire-and-forget (never block ingest)
61
+ - GPU orchestrator settings are injected via `configure_settings_provider()`, not hardcoded imports
62
+ - Version bumps must consider downstream consumer compatibility
@@ -0,0 +1,24 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: '3.11'
19
+
20
+ - name: Install dependencies
21
+ run: pip install -e ".[dev]"
22
+
23
+ - name: Run tests
24
+ run: pytest -v
@@ -0,0 +1,62 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v*'
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: '3.11'
19
+
20
+ - name: Install dependencies
21
+ run: pip install -e ".[dev]"
22
+
23
+ - name: Run tests
24
+ run: pytest
25
+
26
+ publish:
27
+ needs: test
28
+ runs-on: ubuntu-latest
29
+ if: startsWith(github.ref, 'refs/tags/v')
30
+ permissions:
31
+ contents: read
32
+ id-token: write
33
+ steps:
34
+ - uses: actions/checkout@v4
35
+
36
+ - name: Set up Python
37
+ uses: actions/setup-python@v5
38
+ with:
39
+ python-version: '3.11'
40
+
41
+ - name: Install build dependencies
42
+ run: pip install build
43
+
44
+ - name: Build distribution
45
+ run: python -m build
46
+
47
+ - name: Publish to PyPI
48
+ uses: pypa/gh-action-pypi-publish@release/v1
49
+
50
+ github-release:
51
+ needs: publish
52
+ runs-on: ubuntu-latest
53
+ if: startsWith(github.ref, 'refs/tags/v')
54
+ permissions:
55
+ contents: write
56
+ steps:
57
+ - uses: actions/checkout@v4
58
+
59
+ - name: Create GitHub Release
60
+ uses: softprops/action-gh-release@v2
61
+ with:
62
+ generate_release_notes: true
@@ -0,0 +1,41 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ *.egg-info/
6
+ *.egg
7
+ .eggs/
8
+ dist/
9
+ build/
10
+ *.whl
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ env/
16
+
17
+ # Testing
18
+ .pytest_cache/
19
+ .coverage
20
+ htmlcov/
21
+ .tox/
22
+ .mypy_cache/
23
+
24
+ # Environment / secrets
25
+ .env
26
+ .env.*
27
+ !.env.example
28
+
29
+ # IDE
30
+ .vscode/
31
+ .idea/
32
+ *.swp
33
+ *.swo
34
+ *~
35
+
36
+ # OS
37
+ .DS_Store
38
+ Thumbs.db
39
+
40
+ # oh-my-claudecode (local agent state)
41
+ .omc/
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 2.4
2
+ Name: heimdex-worker-sdk
3
+ Version: 0.1.0
4
+ Summary: Shared S3 client, SQS consumer, Aircloud orchestrator, and worker utilities for Heimdex
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: boto3>=1.28
7
+ Requires-Dist: pydantic-settings>=2.0
8
+ Requires-Dist: requests>=2.31
9
+ Provides-Extra: dev
10
+ Requires-Dist: pytest>=7; extra == 'dev'
@@ -0,0 +1,46 @@
1
+ # heimdex-worker-sdk
2
+
3
+ Shared worker utilities for the Heimdex ecosystem: S3 client, SQS consumer, Aircloud GPU orchestrator, and internal API helpers.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install heimdex-worker-sdk
9
+ ```
10
+
11
+ ## Modules
12
+
13
+ | Module | Purpose |
14
+ |--------|---------|
15
+ | `aircloud_client` | HTTP client for Aircloud External API (start/stop/scale/status) |
16
+ | `gpu_orchestrator` | Auto start/stop GPU workers based on SQS queue depth |
17
+ | `sqs_client` | Thin boto3 SQS wrapper |
18
+ | `sqs_consumer` | Background SQS polling loop with heartbeat |
19
+ | `s3` | S3/MinIO dual-mode client |
20
+ | `settings` | WorkerSettings via pydantic-settings |
21
+ | `internal_api` | HTTP client for livecommerce internal endpoints |
22
+ | `drive_keys` | Deterministic S3 key generation for Drive files |
23
+ | `youtube_keys` | S3 key generation for YouTube files |
24
+ | `youtube_api` | HTTP client for YouTube internal endpoints |
25
+ | `content_type` | MIME type classification |
26
+ | `message_adapters` | SQS message to dataclass converters |
27
+
28
+ ## Consumers
29
+
30
+ | Product | What it uses |
31
+ |---------|-------------|
32
+ | dev-heimdex-for-livecommerce | Full SDK (all modules) |
33
+ | dev-heimdex-playground | `aircloud_client`, `gpu_orchestrator` |
34
+
35
+ ## Release
36
+
37
+ 1. Update `version` in `pyproject.toml`
38
+ 2. `git tag vX.Y.Z && git push origin vX.Y.Z`
39
+ 3. CI runs tests, builds wheel, publishes to PyPI, creates GitHub Release
40
+
41
+ ## Development
42
+
43
+ ```bash
44
+ pip install -e ".[dev]"
45
+ pytest -v
46
+ ```
@@ -0,0 +1,24 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "heimdex-worker-sdk"
7
+ version = "0.1.0"
8
+ description = "Shared S3 client, SQS consumer, Aircloud orchestrator, and worker utilities for Heimdex"
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "pydantic-settings>=2.0",
12
+ "boto3>=1.28",
13
+ "requests>=2.31",
14
+ ]
15
+
16
+ [project.optional-dependencies]
17
+ dev = ["pytest>=7"]
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+ packages = ["src/heimdex_worker_sdk"]
21
+
22
+ [tool.pytest.ini_options]
23
+ testpaths = ["tests"]
24
+ python_files = "test_*.py"
@@ -0,0 +1,76 @@
1
+ """Heimdex Worker SDK — shared settings, S3 client, drive key helpers, and internal API client."""
2
+
3
+ from heimdex_worker_sdk.content_type import (
4
+ classify_mime,
5
+ is_image,
6
+ is_supported_mime,
7
+ is_video,
8
+ )
9
+ from heimdex_worker_sdk.drive_keys import (
10
+ audio_s3_key,
11
+ drive_video_id,
12
+ enrichment_keyframe_s3_key,
13
+ enrichment_keyframe_s3_prefix,
14
+ proxy_s3_key,
15
+ scene_manifest_s3_key,
16
+ thumbnail_s3_key,
17
+ thumbnail_s3_prefix,
18
+ )
19
+ from heimdex_worker_sdk.s3 import S3Client
20
+ from heimdex_worker_sdk.settings import WorkerSettings, get_worker_settings
21
+ from heimdex_worker_sdk.internal_api import (
22
+ AccessToken,
23
+ ClaimedConnection,
24
+ ClaimedFile,
25
+ ClaimedProcessingFile,
26
+ InternalAPIClient,
27
+ UpsertResult,
28
+ )
29
+
30
+ from heimdex_worker_sdk.sqs_client import SQSJobClient, SQSMessage
31
+ from heimdex_worker_sdk.sqs_consumer import (
32
+ InvalidMessageError,
33
+ SQSConsumerLoop,
34
+ VisibilityHeartbeat,
35
+ )
36
+ from heimdex_worker_sdk.message_adapters import (
37
+ sqs_to_claimed_file,
38
+ sqs_to_claimed_processing_file,
39
+ )
40
+ from heimdex_worker_sdk.gpu_orchestrator import (
41
+ configure_settings_provider,
42
+ ensure_worker_running,
43
+ )
44
+
45
+ __all__ = [
46
+ "WorkerSettings",
47
+ "get_worker_settings",
48
+ "S3Client",
49
+ "InternalAPIClient",
50
+ "ClaimedConnection",
51
+ "ClaimedFile",
52
+ "UpsertResult",
53
+ "AccessToken",
54
+ "ClaimedProcessingFile",
55
+ "SQSJobClient",
56
+ "SQSMessage",
57
+ "InvalidMessageError",
58
+ "SQSConsumerLoop",
59
+ "VisibilityHeartbeat",
60
+ "sqs_to_claimed_file",
61
+ "sqs_to_claimed_processing_file",
62
+ "configure_settings_provider",
63
+ "ensure_worker_running",
64
+ "audio_s3_key",
65
+ "classify_mime",
66
+ "drive_video_id",
67
+ "enrichment_keyframe_s3_key",
68
+ "enrichment_keyframe_s3_prefix",
69
+ "is_image",
70
+ "is_supported_mime",
71
+ "is_video",
72
+ "proxy_s3_key",
73
+ "scene_manifest_s3_key",
74
+ "thumbnail_s3_key",
75
+ "thumbnail_s3_prefix",
76
+ ]
@@ -0,0 +1,198 @@
1
+ """
2
+ Aircloud External API client for GPU worker lifecycle management.
3
+
4
+ Thin HTTP wrapper over the Aircloud External API (start/stop/scale/status).
5
+ Used by:
6
+ - sqs_producer.py (API): wake up workers when publishing SQS jobs
7
+ - gpu_orchestrator.py (drive-worker): shut down idle workers periodically
8
+
9
+ All methods are synchronous and fire-and-forget safe. Errors are logged,
10
+ never raised to callers (unless explicitly requested via raise_on_error).
11
+
12
+ API docs: https://external.aieev.cloud:5007/external/api/v1
13
+ Auth: Bearer token via API key generated in Aircloud web console.
14
+ """
15
+
16
+ import logging
17
+ import time
18
+ from dataclasses import dataclass
19
+ from typing import Any, Optional
20
+
21
+ import requests
22
+ from requests.adapters import HTTPAdapter
23
+ from urllib3.util.retry import Retry
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ _DEFAULT_BASE_URL = "https://external.aieev.cloud:5007/external/api/v1"
28
+ _DEFAULT_TIMEOUT = 10 # seconds
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class EndpointStatus:
33
+ """Parsed response from GET /endpoints/{endpoint_id}."""
34
+
35
+ endpoint_id: str
36
+ name: str
37
+ is_active: bool
38
+ num_replicas: int
39
+ replica_status_summary: dict[str, int]
40
+ enable_autoscaling: bool
41
+ instance_type_name: str
42
+
43
+ @classmethod
44
+ def from_dict(cls, data: dict[str, Any]) -> "EndpointStatus":
45
+ return cls(
46
+ endpoint_id=data.get("endpoint_id", ""),
47
+ name=data.get("name", ""),
48
+ is_active=data.get("is_active", False),
49
+ num_replicas=data.get("num_replicas", 0),
50
+ replica_status_summary=data.get("replica_status_summary", {}),
51
+ enable_autoscaling=data.get("enable_autoscaling", False),
52
+ instance_type_name=data.get("instance_type_name", ""),
53
+ )
54
+
55
+
56
+ class AircloudClient:
57
+ """Synchronous HTTP client for the Aircloud External API.
58
+
59
+ Args:
60
+ api_key: Bearer token for authentication.
61
+ base_url: API base URL (default: production Aircloud endpoint).
62
+ timeout: Request timeout in seconds.
63
+ max_retries: Number of retries on transient failures (5xx, connection errors).
64
+ """
65
+
66
+ def __init__(
67
+ self,
68
+ api_key: str,
69
+ base_url: str = _DEFAULT_BASE_URL,
70
+ timeout: int = _DEFAULT_TIMEOUT,
71
+ max_retries: int = 2,
72
+ ) -> None:
73
+ self._api_key = api_key
74
+ self._base_url = base_url.rstrip("/")
75
+ self._timeout = timeout
76
+
77
+ self._session = requests.Session()
78
+ self._session.headers.update(
79
+ {
80
+ "Authorization": f"Bearer {api_key}",
81
+ "Content-Type": "application/json",
82
+ }
83
+ )
84
+
85
+ retry = Retry(
86
+ total=max_retries,
87
+ backoff_factor=0.5,
88
+ status_forcelist=[500, 502, 503, 504],
89
+ allowed_methods=["GET", "POST"],
90
+ )
91
+ adapter = HTTPAdapter(max_retries=retry)
92
+ self._session.mount("https://", adapter)
93
+ self._session.mount("http://", adapter)
94
+
95
+ # ── Public API ─────────────────────────────────────────────────
96
+
97
+ def get_status(self, endpoint_id: str) -> Optional[EndpointStatus]:
98
+ """Get current endpoint status. Returns None on failure."""
99
+ try:
100
+ resp = self._session.get(
101
+ f"{self._base_url}/endpoints/{endpoint_id}",
102
+ timeout=self._timeout,
103
+ )
104
+ resp.raise_for_status()
105
+ return EndpointStatus.from_dict(resp.json())
106
+ except Exception:
107
+ logger.exception(
108
+ "aircloud_get_status_failed",
109
+ extra={"endpoint_id": endpoint_id},
110
+ )
111
+ return None
112
+
113
+ def start(self, endpoint_id: str) -> bool:
114
+ """Start an inactive endpoint. Returns True on success.
115
+
116
+ Idempotent: starting an already-active endpoint is a no-op on
117
+ the Aircloud side (returns success).
118
+ """
119
+ try:
120
+ resp = self._session.post(
121
+ f"{self._base_url}/endpoints/{endpoint_id}/start",
122
+ timeout=self._timeout,
123
+ )
124
+ resp.raise_for_status()
125
+ data = resp.json()
126
+ logger.info(
127
+ "aircloud_endpoint_started",
128
+ extra={
129
+ "endpoint_id": endpoint_id,
130
+ "is_active": data.get("is_active"),
131
+ "response_message": data.get("message", ""),
132
+ },
133
+ )
134
+ return True
135
+ except Exception:
136
+ logger.exception(
137
+ "aircloud_start_failed",
138
+ extra={"endpoint_id": endpoint_id},
139
+ )
140
+ return False
141
+
142
+ def stop(self, endpoint_id: str) -> bool:
143
+ """Stop an active endpoint. Returns True on success."""
144
+ try:
145
+ resp = self._session.post(
146
+ f"{self._base_url}/endpoints/{endpoint_id}/stop",
147
+ timeout=self._timeout,
148
+ )
149
+ resp.raise_for_status()
150
+ data = resp.json()
151
+ logger.info(
152
+ "aircloud_endpoint_stopped",
153
+ extra={
154
+ "endpoint_id": endpoint_id,
155
+ "is_active": data.get("is_active"),
156
+ "response_message": data.get("message", ""),
157
+ },
158
+ )
159
+ return True
160
+ except Exception:
161
+ logger.exception(
162
+ "aircloud_stop_failed",
163
+ extra={"endpoint_id": endpoint_id},
164
+ )
165
+ return False
166
+
167
+ def scale(self, endpoint_id: str, num_replicas: int) -> bool:
168
+ """Scale replicas for an active endpoint. Returns True on success.
169
+
170
+ Requires autoscaling to be DISABLED on the Aircloud web console.
171
+ """
172
+ try:
173
+ resp = self._session.post(
174
+ f"{self._base_url}/endpoints/{endpoint_id}/scale",
175
+ json={"num_replicas": num_replicas},
176
+ timeout=self._timeout,
177
+ )
178
+ resp.raise_for_status()
179
+ data = resp.json()
180
+ logger.info(
181
+ "aircloud_endpoint_scaled",
182
+ extra={
183
+ "endpoint_id": endpoint_id,
184
+ "previous_replicas": data.get("previous_replicas"),
185
+ "current_replicas": data.get("current_replicas"),
186
+ "response_message": data.get("message", ""),
187
+ },
188
+ )
189
+ return True
190
+ except Exception:
191
+ logger.exception(
192
+ "aircloud_scale_failed",
193
+ extra={
194
+ "endpoint_id": endpoint_id,
195
+ "num_replicas": num_replicas,
196
+ },
197
+ )
198
+ return False
@@ -0,0 +1,52 @@
1
+ """Centralized MIME-type classification for the content pipeline.
2
+
3
+ Shared between API (via re-export at ``app.modules.content_type``) and
4
+ drive-worker (direct import from ``heimdex_worker_sdk.content_type``).
5
+
6
+ All MIME-type decisions must flow through these helpers — no scattered
7
+ ``startswith("video/")`` checks elsewhere in the codebase.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ IMAGE_MIME_TYPES: frozenset[str] = frozenset({
13
+ "image/jpeg",
14
+ "image/png",
15
+ "image/webp",
16
+ "image/heic",
17
+ "image/heif",
18
+ })
19
+
20
+ VIDEO_MIME_PREFIX: str = "video/"
21
+
22
+
23
+ def classify_mime(mime_type: str) -> str:
24
+ """Classify a MIME type string as ``"video"``, ``"image"``, or ``"unknown"``.
25
+
26
+ >>> classify_mime("video/mp4")
27
+ 'video'
28
+ >>> classify_mime("image/jpeg")
29
+ 'image'
30
+ >>> classify_mime("application/pdf")
31
+ 'unknown'
32
+ """
33
+ if mime_type in IMAGE_MIME_TYPES:
34
+ return "image"
35
+ if mime_type.startswith(VIDEO_MIME_PREFIX):
36
+ return "video"
37
+ return "unknown"
38
+
39
+
40
+ def is_supported_mime(mime_type: str) -> bool:
41
+ """Return ``True`` if *mime_type* is a supported video or image type."""
42
+ return classify_mime(mime_type) != "unknown"
43
+
44
+
45
+ def is_image(mime_type: str) -> bool:
46
+ """Return ``True`` if *mime_type* is a supported image type."""
47
+ return mime_type in IMAGE_MIME_TYPES
48
+
49
+
50
+ def is_video(mime_type: str) -> bool:
51
+ """Return ``True`` if *mime_type* is any video type."""
52
+ return mime_type.startswith(VIDEO_MIME_PREFIX)
@@ -0,0 +1,53 @@
1
+ """Drive S3 key helpers — standalone copy of ``app.modules.drive.keys``.
2
+
3
+ Pure functions, only ``hashlib`` dependency. Kept 1:1 with the API copy so
4
+ that workers and the API always generate identical keys.
5
+ """
6
+
7
+ import hashlib
8
+
9
+
10
+ def drive_video_id(org_id: str, google_file_id: str) -> str:
11
+ """Deterministic video_id for Drive files. Collision-resistant, idempotent."""
12
+ digest = hashlib.sha256(f"{org_id}:{google_file_id}".encode()).hexdigest()[:16]
13
+ return f"gd_{digest}"
14
+
15
+
16
+ def proxy_s3_key(org_id: str, drive_id: str, google_file_id: str) -> str:
17
+ return f"{org_id}/drive/{drive_id}/{google_file_id}/proxy.mp4"
18
+
19
+
20
+ def thumbnail_s3_key(org_id: str, video_id: str, scene_id: str) -> str:
21
+ return f"{org_id}/drive/thumbs/{video_id}/{scene_id}.jpg"
22
+
23
+
24
+ def thumbnail_s3_prefix(org_id: str, video_id: str) -> str:
25
+ return f"{org_id}/drive/thumbs/{video_id}/"
26
+
27
+
28
+ def audio_s3_key(org_id: str, video_id: str) -> str:
29
+ return f"{org_id}/drive/audio/{video_id}/audio.wav"
30
+
31
+
32
+ def enrichment_keyframe_s3_prefix(org_id: str, video_id: str) -> str:
33
+ return f"{org_id}/drive/keyframes/{video_id}/"
34
+
35
+
36
+ def enrichment_keyframe_s3_key(
37
+ org_id: str, video_id: str, scene_id: str,
38
+ ) -> str:
39
+ return f"{org_id}/drive/keyframes/{video_id}/{scene_id}.jpg"
40
+
41
+
42
+ def scene_manifest_s3_key(org_id: str, video_id: str) -> str:
43
+ return f"{org_id}/drive/manifests/{video_id}/scenes.json"
44
+
45
+
46
+ def original_s3_key(org_id: str, drive_id: str, google_file_id: str) -> str:
47
+ """S3 key for the original (pre-transcode) file uploaded by drive-worker."""
48
+ return f"{org_id}/drive/{drive_id}/{google_file_id}/original"
49
+
50
+
51
+ def stt_result_s3_key(org_id: str, video_id: str) -> str:
52
+ """S3 key for STT result JSON used by speech-aware scene splitting."""
53
+ return f"{org_id}/drive/stt/{video_id}/stt_result.json"