heimdex-worker-sdk 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- heimdex_worker_sdk-0.1.0/.claude/CLAUDE.md +62 -0
- heimdex_worker_sdk-0.1.0/.github/workflows/ci.yml +24 -0
- heimdex_worker_sdk-0.1.0/.github/workflows/release.yml +62 -0
- heimdex_worker_sdk-0.1.0/.gitignore +41 -0
- heimdex_worker_sdk-0.1.0/PKG-INFO +10 -0
- heimdex_worker_sdk-0.1.0/README.md +46 -0
- heimdex_worker_sdk-0.1.0/pyproject.toml +24 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/__init__.py +76 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/aircloud_client.py +198 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/content_type.py +52 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/drive_keys.py +53 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/gpu_orchestrator.py +394 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/internal_api.py +545 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/message_adapters.py +139 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/s3.py +172 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/settings.py +152 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/sqs_client.py +133 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/sqs_consumer.py +305 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/youtube_api.py +191 -0
- heimdex_worker_sdk-0.1.0/src/heimdex_worker_sdk/youtube_keys.py +62 -0
- heimdex_worker_sdk-0.1.0/tests/test_drive_keys.py +84 -0
- heimdex_worker_sdk-0.1.0/tests/test_gpu_orchestrator.py +415 -0
- heimdex_worker_sdk-0.1.0/tests/test_internal_api.py +749 -0
- heimdex_worker_sdk-0.1.0/tests/test_message_adapters.py +115 -0
- heimdex_worker_sdk-0.1.0/tests/test_s3.py +96 -0
- heimdex_worker_sdk-0.1.0/tests/test_settings.py +111 -0
- heimdex_worker_sdk-0.1.0/tests/test_sqs_client.py +209 -0
- heimdex_worker_sdk-0.1.0/tests/test_sqs_consumer.py +251 -0
- heimdex_worker_sdk-0.1.0/tests/test_youtube_api.py +142 -0
- heimdex_worker_sdk-0.1.0/tests/test_youtube_keys.py +115 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Heimdex Worker SDK
|
|
2
|
+
|
|
3
|
+
Shared worker utilities for the Heimdex ecosystem.
|
|
4
|
+
|
|
5
|
+
## Quick Reference
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install -e ".[dev]"
|
|
9
|
+
pytest -v
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
## Design Philosophy
|
|
13
|
+
|
|
14
|
+
This package provides shared infrastructure for Heimdex workers:
|
|
15
|
+
- S3/MinIO dual-mode client
|
|
16
|
+
- SQS consumer with heartbeat and visibility extension
|
|
17
|
+
- Aircloud GPU worker lifecycle management (start/stop/scale)
|
|
18
|
+
- Internal API client for livecommerce endpoints
|
|
19
|
+
- Deterministic S3 key generation helpers
|
|
20
|
+
- MIME type classification
|
|
21
|
+
|
|
22
|
+
Dependencies are intentionally minimal: `pydantic-settings`, `boto3`, `requests`.
|
|
23
|
+
|
|
24
|
+
## Package Structure
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
src/heimdex_worker_sdk/
|
|
28
|
+
├── aircloud_client.py # Aircloud External API (start/stop/scale/status)
|
|
29
|
+
├── gpu_orchestrator.py # Auto start/stop GPU workers based on SQS queue depth
|
|
30
|
+
├── sqs_client.py # Thin boto3 SQS wrapper
|
|
31
|
+
├── sqs_consumer.py # Background SQS polling loop with heartbeat
|
|
32
|
+
├── s3.py # S3/MinIO dual-mode client
|
|
33
|
+
├── settings.py # WorkerSettings (pydantic-settings, 60+ env vars)
|
|
34
|
+
├── internal_api.py # HTTP client for livecommerce /internal/drive/* endpoints
|
|
35
|
+
├── drive_keys.py # Deterministic S3 key generation (Drive files)
|
|
36
|
+
├── youtube_keys.py # S3 key generation (YouTube files)
|
|
37
|
+
├── youtube_api.py # HTTP client for YouTube internal endpoints
|
|
38
|
+
├── content_type.py # MIME type classification
|
|
39
|
+
└── message_adapters.py # SQS message → dataclass converters
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Consumers
|
|
43
|
+
|
|
44
|
+
Changes here affect ALL downstream repos. Always verify impact before merging.
|
|
45
|
+
|
|
46
|
+
| Consumer | What it uses |
|
|
47
|
+
|---------|-------------|
|
|
48
|
+
| dev-heimdex-for-livecommerce (8 workers + API) | Full SDK |
|
|
49
|
+
| dev-heimdex-playground | aircloud_client, gpu_orchestrator |
|
|
50
|
+
|
|
51
|
+
## Release
|
|
52
|
+
|
|
53
|
+
- Trigger: Git tag `v*`
|
|
54
|
+
- Pipeline: Test -> Build -> Publish to PyPI -> GitHub Release
|
|
55
|
+
- Current version: 0.1.0
|
|
56
|
+
|
|
57
|
+
## Rules
|
|
58
|
+
|
|
59
|
+
- Keep dependencies minimal (no torch, cv2, or heavy ML libs)
|
|
60
|
+
- All Aircloud calls must be fire-and-forget (never block ingest)
|
|
61
|
+
- GPU orchestrator settings are injected via `configure_settings_provider()`, not hardcoded imports
|
|
62
|
+
- Version bumps must consider downstream consumer compatibility
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: '3.11'
|
|
19
|
+
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: pip install -e ".[dev]"
|
|
22
|
+
|
|
23
|
+
- name: Run tests
|
|
24
|
+
run: pytest -v
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v*'
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: '3.11'
|
|
19
|
+
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: pip install -e ".[dev]"
|
|
22
|
+
|
|
23
|
+
- name: Run tests
|
|
24
|
+
run: pytest
|
|
25
|
+
|
|
26
|
+
publish:
|
|
27
|
+
needs: test
|
|
28
|
+
runs-on: ubuntu-latest
|
|
29
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
30
|
+
permissions:
|
|
31
|
+
contents: read
|
|
32
|
+
id-token: write
|
|
33
|
+
steps:
|
|
34
|
+
- uses: actions/checkout@v4
|
|
35
|
+
|
|
36
|
+
- name: Set up Python
|
|
37
|
+
uses: actions/setup-python@v5
|
|
38
|
+
with:
|
|
39
|
+
python-version: '3.11'
|
|
40
|
+
|
|
41
|
+
- name: Install build dependencies
|
|
42
|
+
run: pip install build
|
|
43
|
+
|
|
44
|
+
- name: Build distribution
|
|
45
|
+
run: python -m build
|
|
46
|
+
|
|
47
|
+
- name: Publish to PyPI
|
|
48
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
49
|
+
|
|
50
|
+
github-release:
|
|
51
|
+
needs: publish
|
|
52
|
+
runs-on: ubuntu-latest
|
|
53
|
+
if: startsWith(github.ref, 'refs/tags/v')
|
|
54
|
+
permissions:
|
|
55
|
+
contents: write
|
|
56
|
+
steps:
|
|
57
|
+
- uses: actions/checkout@v4
|
|
58
|
+
|
|
59
|
+
- name: Create GitHub Release
|
|
60
|
+
uses: softprops/action-gh-release@v2
|
|
61
|
+
with:
|
|
62
|
+
generate_release_notes: true
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.egg-info/
|
|
6
|
+
*.egg
|
|
7
|
+
.eggs/
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
*.whl
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
env/
|
|
16
|
+
|
|
17
|
+
# Testing
|
|
18
|
+
.pytest_cache/
|
|
19
|
+
.coverage
|
|
20
|
+
htmlcov/
|
|
21
|
+
.tox/
|
|
22
|
+
.mypy_cache/
|
|
23
|
+
|
|
24
|
+
# Environment / secrets
|
|
25
|
+
.env
|
|
26
|
+
.env.*
|
|
27
|
+
!.env.example
|
|
28
|
+
|
|
29
|
+
# IDE
|
|
30
|
+
.vscode/
|
|
31
|
+
.idea/
|
|
32
|
+
*.swp
|
|
33
|
+
*.swo
|
|
34
|
+
*~
|
|
35
|
+
|
|
36
|
+
# OS
|
|
37
|
+
.DS_Store
|
|
38
|
+
Thumbs.db
|
|
39
|
+
|
|
40
|
+
# oh-my-claudecode (local agent state)
|
|
41
|
+
.omc/
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: heimdex-worker-sdk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared S3 client, SQS consumer, Aircloud orchestrator, and worker utilities for Heimdex
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: boto3>=1.28
|
|
7
|
+
Requires-Dist: pydantic-settings>=2.0
|
|
8
|
+
Requires-Dist: requests>=2.31
|
|
9
|
+
Provides-Extra: dev
|
|
10
|
+
Requires-Dist: pytest>=7; extra == 'dev'
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# heimdex-worker-sdk
|
|
2
|
+
|
|
3
|
+
Shared worker utilities for the Heimdex ecosystem: S3 client, SQS consumer, Aircloud GPU orchestrator, and internal API helpers.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install heimdex-worker-sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Modules
|
|
12
|
+
|
|
13
|
+
| Module | Purpose |
|
|
14
|
+
|--------|---------|
|
|
15
|
+
| `aircloud_client` | HTTP client for Aircloud External API (start/stop/scale/status) |
|
|
16
|
+
| `gpu_orchestrator` | Auto start/stop GPU workers based on SQS queue depth |
|
|
17
|
+
| `sqs_client` | Thin boto3 SQS wrapper |
|
|
18
|
+
| `sqs_consumer` | Background SQS polling loop with heartbeat |
|
|
19
|
+
| `s3` | S3/MinIO dual-mode client |
|
|
20
|
+
| `settings` | WorkerSettings via pydantic-settings |
|
|
21
|
+
| `internal_api` | HTTP client for livecommerce internal endpoints |
|
|
22
|
+
| `drive_keys` | Deterministic S3 key generation for Drive files |
|
|
23
|
+
| `youtube_keys` | S3 key generation for YouTube files |
|
|
24
|
+
| `youtube_api` | HTTP client for YouTube internal endpoints |
|
|
25
|
+
| `content_type` | MIME type classification |
|
|
26
|
+
| `message_adapters` | SQS message to dataclass converters |
|
|
27
|
+
|
|
28
|
+
## Consumers
|
|
29
|
+
|
|
30
|
+
| Product | What it uses |
|
|
31
|
+
|---------|-------------|
|
|
32
|
+
| dev-heimdex-for-livecommerce | Full SDK (all modules) |
|
|
33
|
+
| dev-heimdex-playground | `aircloud_client`, `gpu_orchestrator` |
|
|
34
|
+
|
|
35
|
+
## Release
|
|
36
|
+
|
|
37
|
+
1. Update `version` in `pyproject.toml`
|
|
38
|
+
2. `git tag vX.Y.Z && git push origin vX.Y.Z`
|
|
39
|
+
3. CI runs tests, builds wheel, publishes to PyPI, creates GitHub Release
|
|
40
|
+
|
|
41
|
+
## Development
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install -e ".[dev]"
|
|
45
|
+
pytest -v
|
|
46
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "heimdex-worker-sdk"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Shared S3 client, SQS consumer, Aircloud orchestrator, and worker utilities for Heimdex"
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"pydantic-settings>=2.0",
|
|
12
|
+
"boto3>=1.28",
|
|
13
|
+
"requests>=2.31",
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[project.optional-dependencies]
|
|
17
|
+
dev = ["pytest>=7"]
|
|
18
|
+
|
|
19
|
+
[tool.hatch.build.targets.wheel]
|
|
20
|
+
packages = ["src/heimdex_worker_sdk"]
|
|
21
|
+
|
|
22
|
+
[tool.pytest.ini_options]
|
|
23
|
+
testpaths = ["tests"]
|
|
24
|
+
python_files = "test_*.py"
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Heimdex Worker SDK — shared settings, S3 client, drive key helpers, and internal API client."""
|
|
2
|
+
|
|
3
|
+
from heimdex_worker_sdk.content_type import (
|
|
4
|
+
classify_mime,
|
|
5
|
+
is_image,
|
|
6
|
+
is_supported_mime,
|
|
7
|
+
is_video,
|
|
8
|
+
)
|
|
9
|
+
from heimdex_worker_sdk.drive_keys import (
|
|
10
|
+
audio_s3_key,
|
|
11
|
+
drive_video_id,
|
|
12
|
+
enrichment_keyframe_s3_key,
|
|
13
|
+
enrichment_keyframe_s3_prefix,
|
|
14
|
+
proxy_s3_key,
|
|
15
|
+
scene_manifest_s3_key,
|
|
16
|
+
thumbnail_s3_key,
|
|
17
|
+
thumbnail_s3_prefix,
|
|
18
|
+
)
|
|
19
|
+
from heimdex_worker_sdk.s3 import S3Client
|
|
20
|
+
from heimdex_worker_sdk.settings import WorkerSettings, get_worker_settings
|
|
21
|
+
from heimdex_worker_sdk.internal_api import (
|
|
22
|
+
AccessToken,
|
|
23
|
+
ClaimedConnection,
|
|
24
|
+
ClaimedFile,
|
|
25
|
+
ClaimedProcessingFile,
|
|
26
|
+
InternalAPIClient,
|
|
27
|
+
UpsertResult,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
from heimdex_worker_sdk.sqs_client import SQSJobClient, SQSMessage
|
|
31
|
+
from heimdex_worker_sdk.sqs_consumer import (
|
|
32
|
+
InvalidMessageError,
|
|
33
|
+
SQSConsumerLoop,
|
|
34
|
+
VisibilityHeartbeat,
|
|
35
|
+
)
|
|
36
|
+
from heimdex_worker_sdk.message_adapters import (
|
|
37
|
+
sqs_to_claimed_file,
|
|
38
|
+
sqs_to_claimed_processing_file,
|
|
39
|
+
)
|
|
40
|
+
from heimdex_worker_sdk.gpu_orchestrator import (
|
|
41
|
+
configure_settings_provider,
|
|
42
|
+
ensure_worker_running,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
"WorkerSettings",
|
|
47
|
+
"get_worker_settings",
|
|
48
|
+
"S3Client",
|
|
49
|
+
"InternalAPIClient",
|
|
50
|
+
"ClaimedConnection",
|
|
51
|
+
"ClaimedFile",
|
|
52
|
+
"UpsertResult",
|
|
53
|
+
"AccessToken",
|
|
54
|
+
"ClaimedProcessingFile",
|
|
55
|
+
"SQSJobClient",
|
|
56
|
+
"SQSMessage",
|
|
57
|
+
"InvalidMessageError",
|
|
58
|
+
"SQSConsumerLoop",
|
|
59
|
+
"VisibilityHeartbeat",
|
|
60
|
+
"sqs_to_claimed_file",
|
|
61
|
+
"sqs_to_claimed_processing_file",
|
|
62
|
+
"configure_settings_provider",
|
|
63
|
+
"ensure_worker_running",
|
|
64
|
+
"audio_s3_key",
|
|
65
|
+
"classify_mime",
|
|
66
|
+
"drive_video_id",
|
|
67
|
+
"enrichment_keyframe_s3_key",
|
|
68
|
+
"enrichment_keyframe_s3_prefix",
|
|
69
|
+
"is_image",
|
|
70
|
+
"is_supported_mime",
|
|
71
|
+
"is_video",
|
|
72
|
+
"proxy_s3_key",
|
|
73
|
+
"scene_manifest_s3_key",
|
|
74
|
+
"thumbnail_s3_key",
|
|
75
|
+
"thumbnail_s3_prefix",
|
|
76
|
+
]
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Aircloud External API client for GPU worker lifecycle management.
|
|
3
|
+
|
|
4
|
+
Thin HTTP wrapper over the Aircloud External API (start/stop/scale/status).
|
|
5
|
+
Used by:
|
|
6
|
+
- sqs_producer.py (API): wake up workers when publishing SQS jobs
|
|
7
|
+
- gpu_orchestrator.py (drive-worker): shut down idle workers periodically
|
|
8
|
+
|
|
9
|
+
All methods are synchronous and fire-and-forget safe. Errors are logged,
|
|
10
|
+
never raised to callers (unless explicitly requested via raise_on_error).
|
|
11
|
+
|
|
12
|
+
API docs: https://external.aieev.cloud:5007/external/api/v1
|
|
13
|
+
Auth: Bearer token via API key generated in Aircloud web console.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import logging
|
|
17
|
+
import time
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
from typing import Any, Optional
|
|
20
|
+
|
|
21
|
+
import requests
|
|
22
|
+
from requests.adapters import HTTPAdapter
|
|
23
|
+
from urllib3.util.retry import Retry
|
|
24
|
+
|
|
25
|
+
logger = logging.getLogger(__name__)
|
|
26
|
+
|
|
27
|
+
_DEFAULT_BASE_URL = "https://external.aieev.cloud:5007/external/api/v1"
|
|
28
|
+
_DEFAULT_TIMEOUT = 10 # seconds
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class EndpointStatus:
|
|
33
|
+
"""Parsed response from GET /endpoints/{endpoint_id}."""
|
|
34
|
+
|
|
35
|
+
endpoint_id: str
|
|
36
|
+
name: str
|
|
37
|
+
is_active: bool
|
|
38
|
+
num_replicas: int
|
|
39
|
+
replica_status_summary: dict[str, int]
|
|
40
|
+
enable_autoscaling: bool
|
|
41
|
+
instance_type_name: str
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def from_dict(cls, data: dict[str, Any]) -> "EndpointStatus":
|
|
45
|
+
return cls(
|
|
46
|
+
endpoint_id=data.get("endpoint_id", ""),
|
|
47
|
+
name=data.get("name", ""),
|
|
48
|
+
is_active=data.get("is_active", False),
|
|
49
|
+
num_replicas=data.get("num_replicas", 0),
|
|
50
|
+
replica_status_summary=data.get("replica_status_summary", {}),
|
|
51
|
+
enable_autoscaling=data.get("enable_autoscaling", False),
|
|
52
|
+
instance_type_name=data.get("instance_type_name", ""),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class AircloudClient:
|
|
57
|
+
"""Synchronous HTTP client for the Aircloud External API.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
api_key: Bearer token for authentication.
|
|
61
|
+
base_url: API base URL (default: production Aircloud endpoint).
|
|
62
|
+
timeout: Request timeout in seconds.
|
|
63
|
+
max_retries: Number of retries on transient failures (5xx, connection errors).
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
api_key: str,
|
|
69
|
+
base_url: str = _DEFAULT_BASE_URL,
|
|
70
|
+
timeout: int = _DEFAULT_TIMEOUT,
|
|
71
|
+
max_retries: int = 2,
|
|
72
|
+
) -> None:
|
|
73
|
+
self._api_key = api_key
|
|
74
|
+
self._base_url = base_url.rstrip("/")
|
|
75
|
+
self._timeout = timeout
|
|
76
|
+
|
|
77
|
+
self._session = requests.Session()
|
|
78
|
+
self._session.headers.update(
|
|
79
|
+
{
|
|
80
|
+
"Authorization": f"Bearer {api_key}",
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
retry = Retry(
|
|
86
|
+
total=max_retries,
|
|
87
|
+
backoff_factor=0.5,
|
|
88
|
+
status_forcelist=[500, 502, 503, 504],
|
|
89
|
+
allowed_methods=["GET", "POST"],
|
|
90
|
+
)
|
|
91
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
92
|
+
self._session.mount("https://", adapter)
|
|
93
|
+
self._session.mount("http://", adapter)
|
|
94
|
+
|
|
95
|
+
# ── Public API ─────────────────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
def get_status(self, endpoint_id: str) -> Optional[EndpointStatus]:
|
|
98
|
+
"""Get current endpoint status. Returns None on failure."""
|
|
99
|
+
try:
|
|
100
|
+
resp = self._session.get(
|
|
101
|
+
f"{self._base_url}/endpoints/{endpoint_id}",
|
|
102
|
+
timeout=self._timeout,
|
|
103
|
+
)
|
|
104
|
+
resp.raise_for_status()
|
|
105
|
+
return EndpointStatus.from_dict(resp.json())
|
|
106
|
+
except Exception:
|
|
107
|
+
logger.exception(
|
|
108
|
+
"aircloud_get_status_failed",
|
|
109
|
+
extra={"endpoint_id": endpoint_id},
|
|
110
|
+
)
|
|
111
|
+
return None
|
|
112
|
+
|
|
113
|
+
def start(self, endpoint_id: str) -> bool:
|
|
114
|
+
"""Start an inactive endpoint. Returns True on success.
|
|
115
|
+
|
|
116
|
+
Idempotent: starting an already-active endpoint is a no-op on
|
|
117
|
+
the Aircloud side (returns success).
|
|
118
|
+
"""
|
|
119
|
+
try:
|
|
120
|
+
resp = self._session.post(
|
|
121
|
+
f"{self._base_url}/endpoints/{endpoint_id}/start",
|
|
122
|
+
timeout=self._timeout,
|
|
123
|
+
)
|
|
124
|
+
resp.raise_for_status()
|
|
125
|
+
data = resp.json()
|
|
126
|
+
logger.info(
|
|
127
|
+
"aircloud_endpoint_started",
|
|
128
|
+
extra={
|
|
129
|
+
"endpoint_id": endpoint_id,
|
|
130
|
+
"is_active": data.get("is_active"),
|
|
131
|
+
"response_message": data.get("message", ""),
|
|
132
|
+
},
|
|
133
|
+
)
|
|
134
|
+
return True
|
|
135
|
+
except Exception:
|
|
136
|
+
logger.exception(
|
|
137
|
+
"aircloud_start_failed",
|
|
138
|
+
extra={"endpoint_id": endpoint_id},
|
|
139
|
+
)
|
|
140
|
+
return False
|
|
141
|
+
|
|
142
|
+
def stop(self, endpoint_id: str) -> bool:
|
|
143
|
+
"""Stop an active endpoint. Returns True on success."""
|
|
144
|
+
try:
|
|
145
|
+
resp = self._session.post(
|
|
146
|
+
f"{self._base_url}/endpoints/{endpoint_id}/stop",
|
|
147
|
+
timeout=self._timeout,
|
|
148
|
+
)
|
|
149
|
+
resp.raise_for_status()
|
|
150
|
+
data = resp.json()
|
|
151
|
+
logger.info(
|
|
152
|
+
"aircloud_endpoint_stopped",
|
|
153
|
+
extra={
|
|
154
|
+
"endpoint_id": endpoint_id,
|
|
155
|
+
"is_active": data.get("is_active"),
|
|
156
|
+
"response_message": data.get("message", ""),
|
|
157
|
+
},
|
|
158
|
+
)
|
|
159
|
+
return True
|
|
160
|
+
except Exception:
|
|
161
|
+
logger.exception(
|
|
162
|
+
"aircloud_stop_failed",
|
|
163
|
+
extra={"endpoint_id": endpoint_id},
|
|
164
|
+
)
|
|
165
|
+
return False
|
|
166
|
+
|
|
167
|
+
def scale(self, endpoint_id: str, num_replicas: int) -> bool:
|
|
168
|
+
"""Scale replicas for an active endpoint. Returns True on success.
|
|
169
|
+
|
|
170
|
+
Requires autoscaling to be DISABLED on the Aircloud web console.
|
|
171
|
+
"""
|
|
172
|
+
try:
|
|
173
|
+
resp = self._session.post(
|
|
174
|
+
f"{self._base_url}/endpoints/{endpoint_id}/scale",
|
|
175
|
+
json={"num_replicas": num_replicas},
|
|
176
|
+
timeout=self._timeout,
|
|
177
|
+
)
|
|
178
|
+
resp.raise_for_status()
|
|
179
|
+
data = resp.json()
|
|
180
|
+
logger.info(
|
|
181
|
+
"aircloud_endpoint_scaled",
|
|
182
|
+
extra={
|
|
183
|
+
"endpoint_id": endpoint_id,
|
|
184
|
+
"previous_replicas": data.get("previous_replicas"),
|
|
185
|
+
"current_replicas": data.get("current_replicas"),
|
|
186
|
+
"response_message": data.get("message", ""),
|
|
187
|
+
},
|
|
188
|
+
)
|
|
189
|
+
return True
|
|
190
|
+
except Exception:
|
|
191
|
+
logger.exception(
|
|
192
|
+
"aircloud_scale_failed",
|
|
193
|
+
extra={
|
|
194
|
+
"endpoint_id": endpoint_id,
|
|
195
|
+
"num_replicas": num_replicas,
|
|
196
|
+
},
|
|
197
|
+
)
|
|
198
|
+
return False
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Centralized MIME-type classification for the content pipeline.
|
|
2
|
+
|
|
3
|
+
Shared between API (via re-export at ``app.modules.content_type``) and
|
|
4
|
+
drive-worker (direct import from ``heimdex_worker_sdk.content_type``).
|
|
5
|
+
|
|
6
|
+
All MIME-type decisions must flow through these helpers — no scattered
|
|
7
|
+
``startswith("video/")`` checks elsewhere in the codebase.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
IMAGE_MIME_TYPES: frozenset[str] = frozenset({
|
|
13
|
+
"image/jpeg",
|
|
14
|
+
"image/png",
|
|
15
|
+
"image/webp",
|
|
16
|
+
"image/heic",
|
|
17
|
+
"image/heif",
|
|
18
|
+
})
|
|
19
|
+
|
|
20
|
+
VIDEO_MIME_PREFIX: str = "video/"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def classify_mime(mime_type: str) -> str:
|
|
24
|
+
"""Classify a MIME type string as ``"video"``, ``"image"``, or ``"unknown"``.
|
|
25
|
+
|
|
26
|
+
>>> classify_mime("video/mp4")
|
|
27
|
+
'video'
|
|
28
|
+
>>> classify_mime("image/jpeg")
|
|
29
|
+
'image'
|
|
30
|
+
>>> classify_mime("application/pdf")
|
|
31
|
+
'unknown'
|
|
32
|
+
"""
|
|
33
|
+
if mime_type in IMAGE_MIME_TYPES:
|
|
34
|
+
return "image"
|
|
35
|
+
if mime_type.startswith(VIDEO_MIME_PREFIX):
|
|
36
|
+
return "video"
|
|
37
|
+
return "unknown"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def is_supported_mime(mime_type: str) -> bool:
|
|
41
|
+
"""Return ``True`` if *mime_type* is a supported video or image type."""
|
|
42
|
+
return classify_mime(mime_type) != "unknown"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def is_image(mime_type: str) -> bool:
|
|
46
|
+
"""Return ``True`` if *mime_type* is a supported image type."""
|
|
47
|
+
return mime_type in IMAGE_MIME_TYPES
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def is_video(mime_type: str) -> bool:
|
|
51
|
+
"""Return ``True`` if *mime_type* is any video type."""
|
|
52
|
+
return mime_type.startswith(VIDEO_MIME_PREFIX)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Drive S3 key helpers — standalone copy of ``app.modules.drive.keys``.
|
|
2
|
+
|
|
3
|
+
Pure functions, only ``hashlib`` dependency. Kept 1:1 with the API copy so
|
|
4
|
+
that workers and the API always generate identical keys.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def drive_video_id(org_id: str, google_file_id: str) -> str:
|
|
11
|
+
"""Deterministic video_id for Drive files. Collision-resistant, idempotent."""
|
|
12
|
+
digest = hashlib.sha256(f"{org_id}:{google_file_id}".encode()).hexdigest()[:16]
|
|
13
|
+
return f"gd_{digest}"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def proxy_s3_key(org_id: str, drive_id: str, google_file_id: str) -> str:
|
|
17
|
+
return f"{org_id}/drive/{drive_id}/{google_file_id}/proxy.mp4"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def thumbnail_s3_key(org_id: str, video_id: str, scene_id: str) -> str:
|
|
21
|
+
return f"{org_id}/drive/thumbs/{video_id}/{scene_id}.jpg"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def thumbnail_s3_prefix(org_id: str, video_id: str) -> str:
|
|
25
|
+
return f"{org_id}/drive/thumbs/{video_id}/"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def audio_s3_key(org_id: str, video_id: str) -> str:
|
|
29
|
+
return f"{org_id}/drive/audio/{video_id}/audio.wav"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def enrichment_keyframe_s3_prefix(org_id: str, video_id: str) -> str:
|
|
33
|
+
return f"{org_id}/drive/keyframes/{video_id}/"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def enrichment_keyframe_s3_key(
|
|
37
|
+
org_id: str, video_id: str, scene_id: str,
|
|
38
|
+
) -> str:
|
|
39
|
+
return f"{org_id}/drive/keyframes/{video_id}/{scene_id}.jpg"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def scene_manifest_s3_key(org_id: str, video_id: str) -> str:
|
|
43
|
+
return f"{org_id}/drive/manifests/{video_id}/scenes.json"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def original_s3_key(org_id: str, drive_id: str, google_file_id: str) -> str:
|
|
47
|
+
"""S3 key for the original (pre-transcode) file uploaded by drive-worker."""
|
|
48
|
+
return f"{org_id}/drive/{drive_id}/{google_file_id}/original"
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def stt_result_s3_key(org_id: str, video_id: str) -> str:
|
|
52
|
+
"""S3 key for STT result JSON used by speech-aware scene splitting."""
|
|
53
|
+
return f"{org_id}/drive/stt/{video_id}/stt_result.json"
|