karaoke-gen 0.75.54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of karaoke-gen might be problematic. Click here for more details.
- karaoke_gen/__init__.py +38 -0
- karaoke_gen/audio_fetcher.py +1614 -0
- karaoke_gen/audio_processor.py +790 -0
- karaoke_gen/config.py +83 -0
- karaoke_gen/file_handler.py +387 -0
- karaoke_gen/instrumental_review/__init__.py +45 -0
- karaoke_gen/instrumental_review/analyzer.py +408 -0
- karaoke_gen/instrumental_review/editor.py +322 -0
- karaoke_gen/instrumental_review/models.py +171 -0
- karaoke_gen/instrumental_review/server.py +475 -0
- karaoke_gen/instrumental_review/static/index.html +1529 -0
- karaoke_gen/instrumental_review/waveform.py +409 -0
- karaoke_gen/karaoke_finalise/__init__.py +1 -0
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
- karaoke_gen/karaoke_gen.py +1026 -0
- karaoke_gen/lyrics_processor.py +474 -0
- karaoke_gen/metadata.py +160 -0
- karaoke_gen/pipeline/__init__.py +87 -0
- karaoke_gen/pipeline/base.py +215 -0
- karaoke_gen/pipeline/context.py +230 -0
- karaoke_gen/pipeline/executors/__init__.py +21 -0
- karaoke_gen/pipeline/executors/local.py +159 -0
- karaoke_gen/pipeline/executors/remote.py +257 -0
- karaoke_gen/pipeline/stages/__init__.py +27 -0
- karaoke_gen/pipeline/stages/finalize.py +202 -0
- karaoke_gen/pipeline/stages/render.py +165 -0
- karaoke_gen/pipeline/stages/screens.py +139 -0
- karaoke_gen/pipeline/stages/separation.py +191 -0
- karaoke_gen/pipeline/stages/transcription.py +191 -0
- karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
- karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-Bold.ttf +0 -0
- karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
- karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
- karaoke_gen/style_loader.py +531 -0
- karaoke_gen/utils/__init__.py +18 -0
- karaoke_gen/utils/bulk_cli.py +492 -0
- karaoke_gen/utils/cli_args.py +432 -0
- karaoke_gen/utils/gen_cli.py +978 -0
- karaoke_gen/utils/remote_cli.py +3268 -0
- karaoke_gen/video_background_processor.py +351 -0
- karaoke_gen/video_generator.py +424 -0
- karaoke_gen-0.75.54.dist-info/METADATA +718 -0
- karaoke_gen-0.75.54.dist-info/RECORD +287 -0
- karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
- karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
- karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
- lyrics_transcriber/__init__.py +10 -0
- lyrics_transcriber/cli/__init__.py +0 -0
- lyrics_transcriber/cli/cli_main.py +285 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/config.py +50 -0
- lyrics_transcriber/core/controller.py +594 -0
- lyrics_transcriber/correction/__init__.py +0 -0
- lyrics_transcriber/correction/agentic/__init__.py +9 -0
- lyrics_transcriber/correction/agentic/adapter.py +71 -0
- lyrics_transcriber/correction/agentic/agent.py +313 -0
- lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
- lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
- lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
- lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
- lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
- lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
- lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
- lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
- lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
- lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
- lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
- lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
- lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
- lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
- lyrics_transcriber/correction/agentic/models/enums.py +38 -0
- lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
- lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
- lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
- lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
- lyrics_transcriber/correction/agentic/models/utils.py +19 -0
- lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
- lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
- lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
- lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
- lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
- lyrics_transcriber/correction/agentic/providers/base.py +36 -0
- lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
- lyrics_transcriber/correction/agentic/providers/config.py +73 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
- lyrics_transcriber/correction/agentic/providers/health.py +28 -0
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
- lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
- lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
- lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
- lyrics_transcriber/correction/agentic/router.py +35 -0
- lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
- lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
- lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
- lyrics_transcriber/correction/anchor_sequence.py +919 -0
- lyrics_transcriber/correction/corrector.py +760 -0
- lyrics_transcriber/correction/feedback/__init__.py +2 -0
- lyrics_transcriber/correction/feedback/schemas.py +107 -0
- lyrics_transcriber/correction/feedback/store.py +236 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +52 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
- lyrics_transcriber/correction/handlers/llm.py +293 -0
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
- lyrics_transcriber/correction/handlers/repeat.py +88 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
- lyrics_transcriber/correction/handlers/word_operations.py +187 -0
- lyrics_transcriber/correction/operations.py +352 -0
- lyrics_transcriber/correction/phrase_analyzer.py +435 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/frontend/.gitignore +23 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/README.md +50 -0
- lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
- lyrics_transcriber/frontend/__init__.py +25 -0
- lyrics_transcriber/frontend/eslint.config.js +28 -0
- lyrics_transcriber/frontend/index.html +18 -0
- lyrics_transcriber/frontend/package.json +42 -0
- lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/public/favicon.ico +0 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/src/App.tsx +214 -0
- lyrics_transcriber/frontend/src/api.ts +254 -0
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
- lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
- lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
- lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
- lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
- lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
- lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
- lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
- lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
- lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
- lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
- lyrics_transcriber/frontend/src/main.tsx +17 -0
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +199 -0
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
- lyrics_transcriber/frontend/tsconfig.app.json +26 -0
- lyrics_transcriber/frontend/tsconfig.json +25 -0
- lyrics_transcriber/frontend/tsconfig.node.json +23 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
- lyrics_transcriber/frontend/update_version.js +11 -0
- lyrics_transcriber/frontend/vite.config.d.ts +2 -0
- lyrics_transcriber/frontend/vite.config.js +10 -0
- lyrics_transcriber/frontend/vite.config.ts +11 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- lyrics_transcriber/frontend/web_assets/index.html +18 -0
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/__init__.py +0 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
- lyrics_transcriber/lyrics/file_provider.py +95 -0
- lyrics_transcriber/lyrics/genius.py +384 -0
- lyrics_transcriber/lyrics/lrclib.py +231 -0
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +290 -0
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/ass/ass.py +2088 -0
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +180 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +265 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +619 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/countdown_processor.py +306 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +257 -0
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +96 -0
- lyrics_transcriber/output/segment_resizer.py +431 -0
- lyrics_transcriber/output/subtitles.py +397 -0
- lyrics_transcriber/output/video.py +544 -0
- lyrics_transcriber/review/__init__.py +0 -0
- lyrics_transcriber/review/server.py +676 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/__init__.py +0 -0
- lyrics_transcriber/transcribers/audioshake.py +379 -0
- lyrics_transcriber/transcribers/base_transcriber.py +157 -0
- lyrics_transcriber/transcribers/whisper.py +330 -0
- lyrics_transcriber/types.py +650 -0
- lyrics_transcriber/utils/__init__.py +0 -0
- lyrics_transcriber/utils/word_utils.py +27 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Protocol, BinaryIO, Optional, List, Any
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
import logging
|
|
6
|
+
import requests
|
|
7
|
+
from dropbox import Dropbox
|
|
8
|
+
from dropbox.files import WriteMode, FileMetadata
|
|
9
|
+
from dropbox.sharing import RequestedVisibility, SharedLinkSettings
|
|
10
|
+
from dropbox.exceptions import AuthError, ApiError
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class DropboxConfig:
|
|
17
|
+
"""Configuration for Dropbox client."""
|
|
18
|
+
|
|
19
|
+
app_key: Optional[str] = None
|
|
20
|
+
app_secret: Optional[str] = None
|
|
21
|
+
refresh_token: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def from_env(cls) -> "DropboxConfig":
|
|
25
|
+
"""Create config from environment variables."""
|
|
26
|
+
return cls(
|
|
27
|
+
app_key=os.environ.get("WHISPER_DROPBOX_APP_KEY"),
|
|
28
|
+
app_secret=os.environ.get("WHISPER_DROPBOX_APP_SECRET"),
|
|
29
|
+
refresh_token=os.environ.get("WHISPER_DROPBOX_REFRESH_TOKEN"),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DropboxAPI(Protocol):
|
|
34
|
+
"""Protocol for Dropbox API operations."""
|
|
35
|
+
|
|
36
|
+
def files_upload(self, f: bytes, path: str, mode: WriteMode) -> Any: ...
|
|
37
|
+
def files_list_folder(self, path: str, recursive: bool = False) -> Any: ...
|
|
38
|
+
def files_list_folder_continue(self, cursor: str) -> Any: ...
|
|
39
|
+
def files_download(self, path: str) -> tuple[Any, Any]: ...
|
|
40
|
+
def files_download_to_file(self, download_path: str, path: str) -> None: ...
|
|
41
|
+
def files_get_metadata(self, path: str) -> Any: ...
|
|
42
|
+
def sharing_create_shared_link_with_settings(self, path: str, settings: SharedLinkSettings) -> Any: ...
|
|
43
|
+
def sharing_list_shared_links(self, path: str) -> Any: ...
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class DropboxHandler:
|
|
47
|
+
"""Handles Dropbox storage operations with automatic token refresh."""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
config: Optional[DropboxConfig] = None,
|
|
52
|
+
client: Optional[DropboxAPI] = None,
|
|
53
|
+
):
|
|
54
|
+
"""Initialize the Dropbox handler."""
|
|
55
|
+
self.config = config or DropboxConfig.from_env()
|
|
56
|
+
self._validate_config()
|
|
57
|
+
|
|
58
|
+
self.client = client or Dropbox(
|
|
59
|
+
app_key=self.config.app_key,
|
|
60
|
+
app_secret=self.config.app_secret,
|
|
61
|
+
oauth2_refresh_token=self.config.refresh_token,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def _validate_config(self) -> None:
|
|
65
|
+
"""Validate the configuration."""
|
|
66
|
+
logger.debug("Validating DropboxConfig with values:")
|
|
67
|
+
logger.debug(f"app_key: {self.config.app_key[:4] + '...' if self.config.app_key else 'None'}")
|
|
68
|
+
logger.debug(f"app_secret: {self.config.app_secret[:4] + '...' if self.config.app_secret else 'None'}")
|
|
69
|
+
logger.debug(f"refresh_token: {self.config.refresh_token[:4] + '...' if self.config.refresh_token else 'None'}")
|
|
70
|
+
|
|
71
|
+
missing = []
|
|
72
|
+
if not self.config.app_key:
|
|
73
|
+
missing.append("app_key")
|
|
74
|
+
if not self.config.app_secret:
|
|
75
|
+
missing.append("app_secret")
|
|
76
|
+
if not self.config.refresh_token:
|
|
77
|
+
missing.append("refresh_token")
|
|
78
|
+
|
|
79
|
+
if missing:
|
|
80
|
+
error_msg = f"Missing required Dropbox configuration: {', '.join(missing)}"
|
|
81
|
+
logger.error(error_msg)
|
|
82
|
+
raise ValueError(error_msg)
|
|
83
|
+
|
|
84
|
+
def upload_with_retry(self, file: BinaryIO, path: str, max_retries: int = 3) -> None:
|
|
85
|
+
"""Upload a file to Dropbox with retries."""
|
|
86
|
+
for attempt in range(max_retries):
|
|
87
|
+
try:
|
|
88
|
+
logger.debug(f"Attempting file upload to {path} (attempt {attempt + 1}/{max_retries})")
|
|
89
|
+
file.seek(0)
|
|
90
|
+
self.client.files_upload(file.read(), path, mode=WriteMode.overwrite)
|
|
91
|
+
logger.debug(f"Successfully uploaded file to {path}")
|
|
92
|
+
return
|
|
93
|
+
except ApiError as e:
|
|
94
|
+
logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
|
|
95
|
+
if attempt == max_retries - 1:
|
|
96
|
+
logger.error(f"All upload attempts failed for {path}")
|
|
97
|
+
raise
|
|
98
|
+
time.sleep(1 * (attempt + 1))
|
|
99
|
+
|
|
100
|
+
def upload_string_with_retry(self, content: str, path: str, max_retries: int = 3) -> None:
|
|
101
|
+
"""Upload a string content to Dropbox with retries."""
|
|
102
|
+
for attempt in range(max_retries):
|
|
103
|
+
try:
|
|
104
|
+
logger.debug(f"Attempting string upload to {path} (attempt {attempt + 1}/{max_retries})")
|
|
105
|
+
self.client.files_upload(content.encode(), path, mode=WriteMode.overwrite)
|
|
106
|
+
logger.debug(f"Successfully uploaded string content to {path}")
|
|
107
|
+
return
|
|
108
|
+
except ApiError as e:
|
|
109
|
+
logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
|
|
110
|
+
if attempt == max_retries - 1:
|
|
111
|
+
logger.error(f"All upload attempts failed for {path}")
|
|
112
|
+
raise
|
|
113
|
+
time.sleep(1 * (attempt + 1))
|
|
114
|
+
|
|
115
|
+
def list_folder_recursive(self, path: str = "") -> List[FileMetadata]:
|
|
116
|
+
"""List all files in a folder recursively."""
|
|
117
|
+
try:
|
|
118
|
+
logger.debug(f"Listing files recursively from {path}")
|
|
119
|
+
entries = []
|
|
120
|
+
result = self.client.files_list_folder(path, recursive=True)
|
|
121
|
+
|
|
122
|
+
while True:
|
|
123
|
+
entries.extend(result.entries)
|
|
124
|
+
if not result.has_more:
|
|
125
|
+
break
|
|
126
|
+
result = self.client.files_list_folder_continue(result.cursor)
|
|
127
|
+
|
|
128
|
+
return entries
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.error(f"Error listing files: {str(e)}", exc_info=True)
|
|
131
|
+
raise
|
|
132
|
+
|
|
133
|
+
def download_file_content(self, path: str) -> bytes:
|
|
134
|
+
"""Download and return the content of a file."""
|
|
135
|
+
try:
|
|
136
|
+
logger.debug(f"Downloading file content from {path}")
|
|
137
|
+
return self.client.files_download(path)[1].content
|
|
138
|
+
except Exception as e:
|
|
139
|
+
logger.error(f"Error downloading file: {str(e)}", exc_info=True)
|
|
140
|
+
raise
|
|
141
|
+
|
|
142
|
+
def download_folder(self, dropbox_path: str, local_path: str) -> None:
|
|
143
|
+
"""Download all files from a Dropbox folder to a local path."""
|
|
144
|
+
try:
|
|
145
|
+
logger.debug(f"Downloading folder {dropbox_path} to {local_path}")
|
|
146
|
+
entries = self.list_folder_recursive(dropbox_path)
|
|
147
|
+
|
|
148
|
+
for entry in entries:
|
|
149
|
+
if isinstance(entry, FileMetadata):
|
|
150
|
+
rel_path = entry.path_display[len(dropbox_path) :].lstrip("/")
|
|
151
|
+
local_file_path = os.path.join(local_path, rel_path)
|
|
152
|
+
|
|
153
|
+
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
|
|
154
|
+
logger.debug(f"Downloading {entry.path_display} to {local_file_path}")
|
|
155
|
+
self.client.files_download_to_file(local_file_path, entry.path_display)
|
|
156
|
+
|
|
157
|
+
logger.debug(f"Successfully downloaded folder {dropbox_path}")
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.error(f"Error downloading folder: {str(e)}", exc_info=True)
|
|
160
|
+
raise
|
|
161
|
+
|
|
162
|
+
def upload_folder(self, local_path: str, dropbox_path: str) -> None:
|
|
163
|
+
"""Upload all files from a local folder to a Dropbox path."""
|
|
164
|
+
try:
|
|
165
|
+
logger.debug(f"Uploading folder {local_path} to {dropbox_path}")
|
|
166
|
+
for root, _, files in os.walk(local_path):
|
|
167
|
+
for filename in files:
|
|
168
|
+
local_file_path = os.path.join(root, filename)
|
|
169
|
+
rel_path = os.path.relpath(local_file_path, local_path)
|
|
170
|
+
target_path = f"{dropbox_path}/{rel_path}"
|
|
171
|
+
|
|
172
|
+
logger.debug(f"Uploading {rel_path} to {target_path}")
|
|
173
|
+
with open(local_file_path, "rb") as f:
|
|
174
|
+
self.client.files_upload(f.read(), target_path, mode=WriteMode.overwrite)
|
|
175
|
+
|
|
176
|
+
logger.debug(f"Successfully uploaded folder {local_path}")
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.error(f"Error uploading folder: {str(e)}", exc_info=True)
|
|
179
|
+
raise
|
|
180
|
+
|
|
181
|
+
def create_shared_link(self, path: str) -> str:
|
|
182
|
+
"""Create a shared link for a file that's accessible without login."""
|
|
183
|
+
try:
|
|
184
|
+
logger.debug(f"Creating shared link for {path}")
|
|
185
|
+
shared_link = self.client.sharing_create_shared_link_with_settings(
|
|
186
|
+
path, settings=SharedLinkSettings(requested_visibility=RequestedVisibility.public)
|
|
187
|
+
)
|
|
188
|
+
return shared_link.url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.error(f"Error creating shared link: {str(e)}", exc_info=True)
|
|
191
|
+
raise
|
|
192
|
+
|
|
193
|
+
def get_existing_shared_link(self, path: str) -> Optional[str]:
|
|
194
|
+
"""Get existing shared link for a file if it exists."""
|
|
195
|
+
try:
|
|
196
|
+
logger.debug(f"Getting existing shared link for {path}")
|
|
197
|
+
shared_links = self.client.sharing_list_shared_links(path=path).links
|
|
198
|
+
if shared_links:
|
|
199
|
+
return shared_links[0].url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
|
|
200
|
+
return None
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(f"Error getting existing shared link: {str(e)}", exc_info=True)
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
def create_or_get_shared_link(self, path: str) -> str:
|
|
206
|
+
"""Create a shared link or get existing one."""
|
|
207
|
+
try:
|
|
208
|
+
existing_link = self.get_existing_shared_link(path)
|
|
209
|
+
if existing_link:
|
|
210
|
+
logger.debug(f"Found existing shared link for {path}")
|
|
211
|
+
return existing_link
|
|
212
|
+
|
|
213
|
+
logger.debug(f"Creating new shared link for {path}")
|
|
214
|
+
return self.create_shared_link(path)
|
|
215
|
+
except Exception as e:
|
|
216
|
+
logger.error(f"Error creating/getting shared link: {str(e)}", exc_info=True)
|
|
217
|
+
raise
|
|
218
|
+
|
|
219
|
+
def file_exists(self, path: str) -> bool:
|
|
220
|
+
"""Check if a file exists in Dropbox."""
|
|
221
|
+
try:
|
|
222
|
+
self.client.files_get_metadata(path)
|
|
223
|
+
return True
|
|
224
|
+
except:
|
|
225
|
+
return False
|
|
File without changes
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
import requests
|
|
3
|
+
import time
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
from typing import Dict, Optional, Any, Union, Tuple
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from pydub import AudioSegment
|
|
9
|
+
from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
|
|
10
|
+
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
|
|
11
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
|
12
|
+
|
|
13
|
+
# Lossy formats that should be uploaded directly (transcoding would cause quality loss)
|
|
14
|
+
LOSSY_FORMATS = {'.mp3', '.aac', '.ogg', '.m4a', '.wma', '.opus'}
|
|
15
|
+
# Lossless formats that are already compressed and can be uploaded directly
|
|
16
|
+
LOSSLESS_COMPRESSED_FORMATS = {'.flac', '.alac'}
|
|
17
|
+
# Uncompressed formats that should be converted to FLAC for efficient upload
|
|
18
|
+
UNCOMPRESSED_FORMATS = {'.wav', '.aiff', '.aif', '.pcm'}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AudioUploadOptimizer:
|
|
22
|
+
"""Optimizes audio files for upload by converting uncompressed formats to FLAC."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, logger):
|
|
25
|
+
self.logger = logger
|
|
26
|
+
|
|
27
|
+
def prepare_for_upload(self, filepath: str) -> Tuple[str, Optional[str]]:
|
|
28
|
+
"""
|
|
29
|
+
Prepare audio file for optimal upload.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Tuple of (filepath_to_upload, temp_file_to_cleanup)
|
|
33
|
+
- If no conversion needed, returns (original_filepath, None)
|
|
34
|
+
- If converted, returns (temp_flac_filepath, temp_flac_filepath)
|
|
35
|
+
"""
|
|
36
|
+
ext = os.path.splitext(filepath)[1].lower()
|
|
37
|
+
|
|
38
|
+
# Lossy formats: upload directly (transcoding would lose quality)
|
|
39
|
+
if ext in LOSSY_FORMATS:
|
|
40
|
+
self.logger.info(f"Uploading lossy format ({ext}) directly to preserve quality")
|
|
41
|
+
return filepath, None
|
|
42
|
+
|
|
43
|
+
# Already compressed lossless: upload directly
|
|
44
|
+
if ext in LOSSLESS_COMPRESSED_FORMATS:
|
|
45
|
+
self.logger.info(f"Uploading lossless compressed format ({ext}) directly")
|
|
46
|
+
return filepath, None
|
|
47
|
+
|
|
48
|
+
# Uncompressed formats: convert to FLAC for smaller upload
|
|
49
|
+
if ext in UNCOMPRESSED_FORMATS:
|
|
50
|
+
self.logger.info(f"Converting uncompressed format ({ext}) to FLAC for efficient upload")
|
|
51
|
+
return self._convert_to_flac(filepath)
|
|
52
|
+
|
|
53
|
+
# Unknown format: try to upload directly
|
|
54
|
+
self.logger.warning(f"Unknown audio format ({ext}), uploading directly")
|
|
55
|
+
return filepath, None
|
|
56
|
+
|
|
57
|
+
def _convert_to_flac(self, filepath: str) -> Tuple[str, str]:
|
|
58
|
+
"""Convert audio file to FLAC format."""
|
|
59
|
+
ext = os.path.splitext(filepath)[1].lower()
|
|
60
|
+
|
|
61
|
+
# Load audio based on format
|
|
62
|
+
if ext == '.wav':
|
|
63
|
+
audio = AudioSegment.from_wav(filepath)
|
|
64
|
+
elif ext in {'.aiff', '.aif'}:
|
|
65
|
+
audio = AudioSegment.from_file(filepath, format='aiff')
|
|
66
|
+
else:
|
|
67
|
+
audio = AudioSegment.from_file(filepath)
|
|
68
|
+
|
|
69
|
+
# Create temp file for FLAC output
|
|
70
|
+
with tempfile.NamedTemporaryFile(suffix=".flac", delete=False) as temp_flac:
|
|
71
|
+
flac_path = temp_flac.name
|
|
72
|
+
audio.export(flac_path, format="flac")
|
|
73
|
+
|
|
74
|
+
# Log size reduction
|
|
75
|
+
original_size = os.path.getsize(filepath)
|
|
76
|
+
flac_size = os.path.getsize(flac_path)
|
|
77
|
+
reduction_pct = (1 - flac_size / original_size) * 100
|
|
78
|
+
self.logger.info(f"Converted to FLAC: {original_size / 1024 / 1024:.1f}MB → {flac_size / 1024 / 1024:.1f}MB ({reduction_pct:.0f}% smaller)")
|
|
79
|
+
|
|
80
|
+
return flac_path, flac_path
|
|
81
|
+
|
|
82
|
+
def cleanup(self, temp_filepath: Optional[str]) -> None:
|
|
83
|
+
"""Clean up temporary file if it exists."""
|
|
84
|
+
if temp_filepath and os.path.exists(temp_filepath):
|
|
85
|
+
try:
|
|
86
|
+
os.unlink(temp_filepath)
|
|
87
|
+
self.logger.debug(f"Cleaned up temporary file: {temp_filepath}")
|
|
88
|
+
except OSError as e:
|
|
89
|
+
self.logger.warning(f"Failed to clean up temporary file {temp_filepath}: {e}")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class AudioShakeConfig:
|
|
94
|
+
"""Configuration for AudioShake transcription service."""
|
|
95
|
+
|
|
96
|
+
api_token: Optional[str] = None
|
|
97
|
+
base_url: str = "https://api.audioshake.ai"
|
|
98
|
+
output_prefix: Optional[str] = None
|
|
99
|
+
timeout_minutes: int = 20 # Added timeout configuration
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class AudioShakeAPI:
|
|
103
|
+
"""Handles direct API interactions with AudioShake."""
|
|
104
|
+
|
|
105
|
+
def __init__(self, config: AudioShakeConfig, logger):
|
|
106
|
+
self.config = config
|
|
107
|
+
self.logger = logger
|
|
108
|
+
|
|
109
|
+
def _validate_config(self) -> None:
|
|
110
|
+
"""Validate API configuration."""
|
|
111
|
+
if not self.config.api_token:
|
|
112
|
+
raise ValueError("AudioShake API token must be provided")
|
|
113
|
+
|
|
114
|
+
def _get_headers(self) -> Dict[str, str]:
|
|
115
|
+
"""Get headers for API requests."""
|
|
116
|
+
self._validate_config() # Validate before making any API calls
|
|
117
|
+
return {"x-api-key": self.config.api_token, "Content-Type": "application/json"}
|
|
118
|
+
|
|
119
|
+
def upload_file(self, filepath: str) -> str:
|
|
120
|
+
"""Upload audio file and return file URL."""
|
|
121
|
+
self.logger.info(f"Uploading {filepath} to AudioShake")
|
|
122
|
+
self._validate_config() # Validate before making API call
|
|
123
|
+
|
|
124
|
+
url = f"{self.config.base_url}/upload/"
|
|
125
|
+
with open(filepath, "rb") as file:
|
|
126
|
+
files = {"file": (os.path.basename(filepath), file)}
|
|
127
|
+
response = requests.post(url, headers={"x-api-key": self.config.api_token}, files=files)
|
|
128
|
+
|
|
129
|
+
self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
|
|
130
|
+
response.raise_for_status()
|
|
131
|
+
return response.json()["link"]
|
|
132
|
+
|
|
133
|
+
def create_task(self, file_url: str) -> str:
|
|
134
|
+
"""Create transcription task and return task ID."""
|
|
135
|
+
self.logger.info(f"Creating task for file {file_url}")
|
|
136
|
+
|
|
137
|
+
url = f"{self.config.base_url}/tasks"
|
|
138
|
+
data = {
|
|
139
|
+
"url": file_url,
|
|
140
|
+
"targets": [
|
|
141
|
+
{
|
|
142
|
+
"model": "alignment",
|
|
143
|
+
"formats": ["json"],
|
|
144
|
+
"language": "en"
|
|
145
|
+
}
|
|
146
|
+
],
|
|
147
|
+
}
|
|
148
|
+
response = requests.post(url, headers=self._get_headers(), json=data)
|
|
149
|
+
response.raise_for_status()
|
|
150
|
+
return response.json()["id"]
|
|
151
|
+
|
|
152
|
+
def wait_for_task_result(self, task_id: str) -> Dict[str, Any]:
|
|
153
|
+
"""Poll for task completion and return results."""
|
|
154
|
+
self.logger.info(f"Getting task result for task {task_id}")
|
|
155
|
+
|
|
156
|
+
# Use the list endpoint which has fresh data, not the individual task endpoint which caches
|
|
157
|
+
url = f"{self.config.base_url}/tasks"
|
|
158
|
+
start_time = time.time()
|
|
159
|
+
last_status_log = start_time
|
|
160
|
+
timeout_seconds = self.config.timeout_minutes * 60
|
|
161
|
+
|
|
162
|
+
# Add initial retry logic for when task is not found yet
|
|
163
|
+
initial_retry_count = 0
|
|
164
|
+
max_initial_retries = 5
|
|
165
|
+
initial_retry_delay = 2 # seconds
|
|
166
|
+
|
|
167
|
+
while True:
|
|
168
|
+
current_time = time.time()
|
|
169
|
+
elapsed_time = current_time - start_time
|
|
170
|
+
|
|
171
|
+
# Check for timeout
|
|
172
|
+
if elapsed_time > timeout_seconds:
|
|
173
|
+
raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
|
|
174
|
+
|
|
175
|
+
# Log status every minute
|
|
176
|
+
if current_time - last_status_log >= 60:
|
|
177
|
+
self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
|
|
178
|
+
last_status_log = current_time
|
|
179
|
+
|
|
180
|
+
try:
|
|
181
|
+
response = requests.get(url, headers=self._get_headers())
|
|
182
|
+
response.raise_for_status()
|
|
183
|
+
tasks_list = response.json()
|
|
184
|
+
|
|
185
|
+
# Find our specific task in the list
|
|
186
|
+
task_data = None
|
|
187
|
+
for task in tasks_list:
|
|
188
|
+
if task.get("id") == task_id:
|
|
189
|
+
task_data = task
|
|
190
|
+
break
|
|
191
|
+
|
|
192
|
+
if not task_data:
|
|
193
|
+
# Task not found in list yet
|
|
194
|
+
if initial_retry_count < max_initial_retries:
|
|
195
|
+
initial_retry_count += 1
|
|
196
|
+
self.logger.info(f"Task not found in list yet (attempt {initial_retry_count}/{max_initial_retries}), retrying in {initial_retry_delay} seconds...")
|
|
197
|
+
time.sleep(initial_retry_delay)
|
|
198
|
+
continue
|
|
199
|
+
else:
|
|
200
|
+
raise TranscriptionError(f"Task {task_id} not found in task list after {max_initial_retries} retries")
|
|
201
|
+
|
|
202
|
+
# Log the full response for debugging
|
|
203
|
+
self.logger.debug(f"Task status response: {task_data}")
|
|
204
|
+
|
|
205
|
+
# Check status of targets (not the task itself)
|
|
206
|
+
targets = task_data.get("targets", [])
|
|
207
|
+
if not targets:
|
|
208
|
+
raise TranscriptionError("No targets found in task response")
|
|
209
|
+
|
|
210
|
+
# Check if all targets are completed or if any failed
|
|
211
|
+
all_completed = True
|
|
212
|
+
for target in targets:
|
|
213
|
+
target_status = target.get("status")
|
|
214
|
+
target_model = target.get("model")
|
|
215
|
+
self.logger.debug(f"Target {target_model} status: {target_status}")
|
|
216
|
+
|
|
217
|
+
if target_status == "failed":
|
|
218
|
+
error_msg = target.get("error", "Unknown error")
|
|
219
|
+
raise TranscriptionError(f"Target {target_model} failed: {error_msg}")
|
|
220
|
+
elif target_status != "completed":
|
|
221
|
+
all_completed = False
|
|
222
|
+
|
|
223
|
+
if all_completed:
|
|
224
|
+
self.logger.info("All targets completed successfully")
|
|
225
|
+
return task_data
|
|
226
|
+
|
|
227
|
+
# Reset retry count on successful response
|
|
228
|
+
initial_retry_count = 0
|
|
229
|
+
|
|
230
|
+
except requests.exceptions.HTTPError as e:
|
|
231
|
+
raise
|
|
232
|
+
|
|
233
|
+
time.sleep(30) # Wait before next poll
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class AudioShakeTranscriber(BaseTranscriber):
|
|
237
|
+
"""Transcription service using AudioShake's API."""
|
|
238
|
+
|
|
239
|
+
def __init__(
|
|
240
|
+
self,
|
|
241
|
+
cache_dir: Union[str, Path],
|
|
242
|
+
config: Optional[AudioShakeConfig] = None,
|
|
243
|
+
logger: Optional[Any] = None,
|
|
244
|
+
api_client: Optional[AudioShakeAPI] = None,
|
|
245
|
+
upload_optimizer: Optional[AudioUploadOptimizer] = None,
|
|
246
|
+
):
|
|
247
|
+
"""Initialize AudioShake transcriber."""
|
|
248
|
+
super().__init__(cache_dir=cache_dir, logger=logger)
|
|
249
|
+
self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
|
|
250
|
+
self.api = api_client or AudioShakeAPI(self.config, self.logger)
|
|
251
|
+
self.upload_optimizer = upload_optimizer or AudioUploadOptimizer(self.logger)
|
|
252
|
+
|
|
253
|
+
def get_name(self) -> str:
|
|
254
|
+
return "AudioShake"
|
|
255
|
+
|
|
256
|
+
def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
|
|
257
|
+
"""Actually perform the transcription using AudioShake API."""
|
|
258
|
+
self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
|
|
259
|
+
self.logger.info(f"Starting transcription for {audio_filepath}")
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
# Start task and get results
|
|
263
|
+
self.logger.debug("Calling start_transcription()")
|
|
264
|
+
task_id = self.start_transcription(audio_filepath)
|
|
265
|
+
self.logger.debug(f"Got task_id: {task_id}")
|
|
266
|
+
|
|
267
|
+
self.logger.debug("Calling get_transcription_result()")
|
|
268
|
+
result = self.get_transcription_result(task_id)
|
|
269
|
+
self.logger.debug("Got transcription result")
|
|
270
|
+
|
|
271
|
+
return result
|
|
272
|
+
except Exception as e:
|
|
273
|
+
self.logger.error(f"Error in _perform_transcription: {str(e)}")
|
|
274
|
+
raise
|
|
275
|
+
|
|
276
|
+
def start_transcription(self, audio_filepath: str) -> str:
|
|
277
|
+
"""Starts the transcription task and returns the task ID."""
|
|
278
|
+
self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
|
|
279
|
+
|
|
280
|
+
# Optimize file format for upload (convert WAV to FLAC, etc.)
|
|
281
|
+
upload_filepath, temp_filepath = self.upload_optimizer.prepare_for_upload(audio_filepath)
|
|
282
|
+
|
|
283
|
+
try:
|
|
284
|
+
# Upload file and create task
|
|
285
|
+
file_url = self.api.upload_file(upload_filepath)
|
|
286
|
+
self.logger.debug(f"File uploaded successfully. File URL: {file_url}")
|
|
287
|
+
|
|
288
|
+
task_id = self.api.create_task(file_url)
|
|
289
|
+
self.logger.debug(f"Task created successfully. Task ID: {task_id}")
|
|
290
|
+
|
|
291
|
+
return task_id
|
|
292
|
+
finally:
|
|
293
|
+
# Clean up any temporary file created during optimization
|
|
294
|
+
self.upload_optimizer.cleanup(temp_filepath)
|
|
295
|
+
|
|
296
|
+
def get_transcription_result(self, task_id: str) -> Dict[str, Any]:
|
|
297
|
+
"""Gets the raw results for a previously started task."""
|
|
298
|
+
self.logger.debug(f"Entering get_transcription_result() for task ID: {task_id}")
|
|
299
|
+
|
|
300
|
+
# Wait for task completion
|
|
301
|
+
task_data = self.api.wait_for_task_result(task_id)
|
|
302
|
+
self.logger.debug("Task completed. Getting results...")
|
|
303
|
+
|
|
304
|
+
# Find the alignment target output
|
|
305
|
+
alignment_target = None
|
|
306
|
+
for target in task_data.get("targets", []):
|
|
307
|
+
if target.get("model") == "alignment":
|
|
308
|
+
alignment_target = target
|
|
309
|
+
break
|
|
310
|
+
|
|
311
|
+
if not alignment_target:
|
|
312
|
+
raise TranscriptionError("Required output not found in task results")
|
|
313
|
+
|
|
314
|
+
# Get the output file URL
|
|
315
|
+
output = alignment_target.get("output", [])
|
|
316
|
+
if not output:
|
|
317
|
+
raise TranscriptionError("No output found in alignment target")
|
|
318
|
+
|
|
319
|
+
output_url = output[0].get("link")
|
|
320
|
+
if not output_url:
|
|
321
|
+
raise TranscriptionError("Output link not found in alignment target")
|
|
322
|
+
|
|
323
|
+
# Fetch transcription data
|
|
324
|
+
response = requests.get(output_url)
|
|
325
|
+
response.raise_for_status()
|
|
326
|
+
|
|
327
|
+
# Return combined raw data
|
|
328
|
+
raw_data = {"task_data": task_data, "transcription": response.json()}
|
|
329
|
+
|
|
330
|
+
self.logger.debug("Raw results retrieved successfully")
|
|
331
|
+
return raw_data
|
|
332
|
+
|
|
333
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
|
|
334
|
+
"""Process raw Audioshake API response into standard format."""
|
|
335
|
+
self.logger.debug(f"Processing result for task {raw_data['task_data']['id']}")
|
|
336
|
+
|
|
337
|
+
transcription_data = raw_data["transcription"]
|
|
338
|
+
task_data = raw_data["task_data"]
|
|
339
|
+
|
|
340
|
+
segments = []
|
|
341
|
+
all_words = [] # Collect all words across segments
|
|
342
|
+
|
|
343
|
+
for line in transcription_data.get("lines", []):
|
|
344
|
+
words = [
|
|
345
|
+
Word(
|
|
346
|
+
id=WordUtils.generate_id(), # Generate unique ID for each word
|
|
347
|
+
text=word["text"].strip(" "),
|
|
348
|
+
start_time=word.get("start", 0.0),
|
|
349
|
+
end_time=word.get("end", 0.0),
|
|
350
|
+
)
|
|
351
|
+
for word in line.get("words", [])
|
|
352
|
+
]
|
|
353
|
+
all_words.extend(words) # Add words to flat list
|
|
354
|
+
|
|
355
|
+
segments.append(
|
|
356
|
+
LyricsSegment(
|
|
357
|
+
id=WordUtils.generate_id(), # Generate unique ID for each segment
|
|
358
|
+
text=line.get("text", " ".join(w.text for w in words)),
|
|
359
|
+
words=words,
|
|
360
|
+
start_time=min((w.start_time for w in words), default=0.0),
|
|
361
|
+
end_time=max((w.end_time for w in words), default=0.0),
|
|
362
|
+
)
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
return TranscriptionData(
|
|
366
|
+
text=transcription_data.get("text", ""),
|
|
367
|
+
words=all_words,
|
|
368
|
+
segments=segments,
|
|
369
|
+
source=self.get_name(),
|
|
370
|
+
metadata={
|
|
371
|
+
"language": transcription_data.get("metadata", {}).get("language"),
|
|
372
|
+
"task_id": task_data["id"],
|
|
373
|
+
"duration": task_data.get("duration"),
|
|
374
|
+
},
|
|
375
|
+
)
|
|
376
|
+
|
|
377
|
+
def get_output_filename(self, suffix: str) -> str:
|
|
378
|
+
"""Generate consistent filename with (Purpose) suffix pattern."""
|
|
379
|
+
return f"{self.config.output_prefix}{suffix}"
|