karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/audio_fetcher.py +461 -0
- karaoke_gen/audio_processor.py +407 -30
- karaoke_gen/config.py +62 -113
- karaoke_gen/file_handler.py +32 -59
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
- karaoke_gen/karaoke_gen.py +270 -61
- karaoke_gen/lyrics_processor.py +13 -1
- karaoke_gen/metadata.py +78 -73
- karaoke_gen/pipeline/__init__.py +87 -0
- karaoke_gen/pipeline/base.py +215 -0
- karaoke_gen/pipeline/context.py +230 -0
- karaoke_gen/pipeline/executors/__init__.py +21 -0
- karaoke_gen/pipeline/executors/local.py +159 -0
- karaoke_gen/pipeline/executors/remote.py +257 -0
- karaoke_gen/pipeline/stages/__init__.py +27 -0
- karaoke_gen/pipeline/stages/finalize.py +202 -0
- karaoke_gen/pipeline/stages/render.py +165 -0
- karaoke_gen/pipeline/stages/screens.py +139 -0
- karaoke_gen/pipeline/stages/separation.py +191 -0
- karaoke_gen/pipeline/stages/transcription.py +191 -0
- karaoke_gen/style_loader.py +531 -0
- karaoke_gen/utils/bulk_cli.py +6 -0
- karaoke_gen/utils/cli_args.py +424 -0
- karaoke_gen/utils/gen_cli.py +26 -261
- karaoke_gen/utils/remote_cli.py +1815 -0
- karaoke_gen/video_background_processor.py +351 -0
- karaoke_gen-0.71.23.dist-info/METADATA +610 -0
- karaoke_gen-0.71.23.dist-info/RECORD +275 -0
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/__init__.py +10 -0
- lyrics_transcriber/cli/__init__.py +0 -0
- lyrics_transcriber/cli/cli_main.py +285 -0
- lyrics_transcriber/core/__init__.py +0 -0
- lyrics_transcriber/core/config.py +50 -0
- lyrics_transcriber/core/controller.py +520 -0
- lyrics_transcriber/correction/__init__.py +0 -0
- lyrics_transcriber/correction/agentic/__init__.py +9 -0
- lyrics_transcriber/correction/agentic/adapter.py +71 -0
- lyrics_transcriber/correction/agentic/agent.py +313 -0
- lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
- lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
- lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
- lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
- lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
- lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
- lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
- lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
- lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
- lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
- lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
- lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
- lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
- lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
- lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
- lyrics_transcriber/correction/agentic/models/enums.py +38 -0
- lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
- lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
- lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
- lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
- lyrics_transcriber/correction/agentic/models/utils.py +19 -0
- lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
- lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
- lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
- lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
- lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
- lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
- lyrics_transcriber/correction/agentic/providers/base.py +36 -0
- lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
- lyrics_transcriber/correction/agentic/providers/config.py +73 -0
- lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
- lyrics_transcriber/correction/agentic/providers/health.py +28 -0
- lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
- lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
- lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
- lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
- lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
- lyrics_transcriber/correction/agentic/router.py +35 -0
- lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
- lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
- lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
- lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
- lyrics_transcriber/correction/anchor_sequence.py +1043 -0
- lyrics_transcriber/correction/corrector.py +760 -0
- lyrics_transcriber/correction/feedback/__init__.py +2 -0
- lyrics_transcriber/correction/feedback/schemas.py +107 -0
- lyrics_transcriber/correction/feedback/store.py +236 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +52 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
- lyrics_transcriber/correction/handlers/llm.py +293 -0
- lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
- lyrics_transcriber/correction/handlers/repeat.py +88 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
- lyrics_transcriber/correction/handlers/word_operations.py +187 -0
- lyrics_transcriber/correction/operations.py +352 -0
- lyrics_transcriber/correction/phrase_analyzer.py +435 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/frontend/.gitignore +23 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/README.md +50 -0
- lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
- lyrics_transcriber/frontend/__init__.py +25 -0
- lyrics_transcriber/frontend/eslint.config.js +28 -0
- lyrics_transcriber/frontend/index.html +18 -0
- lyrics_transcriber/frontend/package.json +42 -0
- lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/public/favicon.ico +0 -0
- lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/src/App.tsx +212 -0
- lyrics_transcriber/frontend/src/api.ts +239 -0
- lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
- lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
- lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
- lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
- lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
- lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
- lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
- lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
- lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
- lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
- lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
- lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
- lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
- lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
- lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
- lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
- lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
- lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
- lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
- lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
- lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
- lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
- lyrics_transcriber/frontend/src/main.tsx +17 -0
- lyrics_transcriber/frontend/src/theme.ts +177 -0
- lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +199 -0
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
- lyrics_transcriber/frontend/tsconfig.app.json +26 -0
- lyrics_transcriber/frontend/tsconfig.json +25 -0
- lyrics_transcriber/frontend/tsconfig.node.json +23 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
- lyrics_transcriber/frontend/update_version.js +11 -0
- lyrics_transcriber/frontend/vite.config.d.ts +2 -0
- lyrics_transcriber/frontend/vite.config.js +10 -0
- lyrics_transcriber/frontend/vite.config.ts +11 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
- lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
- lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
- lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
- lyrics_transcriber/frontend/web_assets/index.html +18 -0
- lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/__init__.py +0 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
- lyrics_transcriber/lyrics/file_provider.py +95 -0
- lyrics_transcriber/lyrics/genius.py +384 -0
- lyrics_transcriber/lyrics/lrclib.py +231 -0
- lyrics_transcriber/lyrics/musixmatch.py +156 -0
- lyrics_transcriber/lyrics/spotify.py +290 -0
- lyrics_transcriber/lyrics/user_input_provider.py +44 -0
- lyrics_transcriber/output/__init__.py +0 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/ass/ass.py +2088 -0
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +180 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +265 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +619 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/countdown_processor.py +267 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +257 -0
- lyrics_transcriber/output/lrc_to_cdg.py +61 -0
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +96 -0
- lyrics_transcriber/output/segment_resizer.py +431 -0
- lyrics_transcriber/output/subtitles.py +397 -0
- lyrics_transcriber/output/video.py +544 -0
- lyrics_transcriber/review/__init__.py +0 -0
- lyrics_transcriber/review/server.py +676 -0
- lyrics_transcriber/storage/__init__.py +0 -0
- lyrics_transcriber/storage/dropbox.py +225 -0
- lyrics_transcriber/transcribers/__init__.py +0 -0
- lyrics_transcriber/transcribers/audioshake.py +290 -0
- lyrics_transcriber/transcribers/base_transcriber.py +157 -0
- lyrics_transcriber/transcribers/whisper.py +330 -0
- lyrics_transcriber/types.py +648 -0
- lyrics_transcriber/utils/__init__.py +0 -0
- lyrics_transcriber/utils/word_utils.py +27 -0
- karaoke_gen-0.57.0.dist-info/METADATA +0 -167
- karaoke_gen-0.57.0.dist-info/RECORD +0 -23
- {karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from typing import Protocol, BinaryIO, Optional, List, Any
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
import logging
|
|
6
|
+
import requests
|
|
7
|
+
from dropbox import Dropbox
|
|
8
|
+
from dropbox.files import WriteMode, FileMetadata
|
|
9
|
+
from dropbox.sharing import RequestedVisibility, SharedLinkSettings
|
|
10
|
+
from dropbox.exceptions import AuthError, ApiError
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class DropboxConfig:
|
|
17
|
+
"""Configuration for Dropbox client."""
|
|
18
|
+
|
|
19
|
+
app_key: Optional[str] = None
|
|
20
|
+
app_secret: Optional[str] = None
|
|
21
|
+
refresh_token: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def from_env(cls) -> "DropboxConfig":
|
|
25
|
+
"""Create config from environment variables."""
|
|
26
|
+
return cls(
|
|
27
|
+
app_key=os.environ.get("WHISPER_DROPBOX_APP_KEY"),
|
|
28
|
+
app_secret=os.environ.get("WHISPER_DROPBOX_APP_SECRET"),
|
|
29
|
+
refresh_token=os.environ.get("WHISPER_DROPBOX_REFRESH_TOKEN"),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class DropboxAPI(Protocol):
|
|
34
|
+
"""Protocol for Dropbox API operations."""
|
|
35
|
+
|
|
36
|
+
def files_upload(self, f: bytes, path: str, mode: WriteMode) -> Any: ...
|
|
37
|
+
def files_list_folder(self, path: str, recursive: bool = False) -> Any: ...
|
|
38
|
+
def files_list_folder_continue(self, cursor: str) -> Any: ...
|
|
39
|
+
def files_download(self, path: str) -> tuple[Any, Any]: ...
|
|
40
|
+
def files_download_to_file(self, download_path: str, path: str) -> None: ...
|
|
41
|
+
def files_get_metadata(self, path: str) -> Any: ...
|
|
42
|
+
def sharing_create_shared_link_with_settings(self, path: str, settings: SharedLinkSettings) -> Any: ...
|
|
43
|
+
def sharing_list_shared_links(self, path: str) -> Any: ...
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class DropboxHandler:
|
|
47
|
+
"""Handles Dropbox storage operations with automatic token refresh."""
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
config: Optional[DropboxConfig] = None,
|
|
52
|
+
client: Optional[DropboxAPI] = None,
|
|
53
|
+
):
|
|
54
|
+
"""Initialize the Dropbox handler."""
|
|
55
|
+
self.config = config or DropboxConfig.from_env()
|
|
56
|
+
self._validate_config()
|
|
57
|
+
|
|
58
|
+
self.client = client or Dropbox(
|
|
59
|
+
app_key=self.config.app_key,
|
|
60
|
+
app_secret=self.config.app_secret,
|
|
61
|
+
oauth2_refresh_token=self.config.refresh_token,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def _validate_config(self) -> None:
|
|
65
|
+
"""Validate the configuration."""
|
|
66
|
+
logger.debug("Validating DropboxConfig with values:")
|
|
67
|
+
logger.debug(f"app_key: {self.config.app_key[:4] + '...' if self.config.app_key else 'None'}")
|
|
68
|
+
logger.debug(f"app_secret: {self.config.app_secret[:4] + '...' if self.config.app_secret else 'None'}")
|
|
69
|
+
logger.debug(f"refresh_token: {self.config.refresh_token[:4] + '...' if self.config.refresh_token else 'None'}")
|
|
70
|
+
|
|
71
|
+
missing = []
|
|
72
|
+
if not self.config.app_key:
|
|
73
|
+
missing.append("app_key")
|
|
74
|
+
if not self.config.app_secret:
|
|
75
|
+
missing.append("app_secret")
|
|
76
|
+
if not self.config.refresh_token:
|
|
77
|
+
missing.append("refresh_token")
|
|
78
|
+
|
|
79
|
+
if missing:
|
|
80
|
+
error_msg = f"Missing required Dropbox configuration: {', '.join(missing)}"
|
|
81
|
+
logger.error(error_msg)
|
|
82
|
+
raise ValueError(error_msg)
|
|
83
|
+
|
|
84
|
+
def upload_with_retry(self, file: BinaryIO, path: str, max_retries: int = 3) -> None:
|
|
85
|
+
"""Upload a file to Dropbox with retries."""
|
|
86
|
+
for attempt in range(max_retries):
|
|
87
|
+
try:
|
|
88
|
+
logger.debug(f"Attempting file upload to {path} (attempt {attempt + 1}/{max_retries})")
|
|
89
|
+
file.seek(0)
|
|
90
|
+
self.client.files_upload(file.read(), path, mode=WriteMode.overwrite)
|
|
91
|
+
logger.debug(f"Successfully uploaded file to {path}")
|
|
92
|
+
return
|
|
93
|
+
except ApiError as e:
|
|
94
|
+
logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
|
|
95
|
+
if attempt == max_retries - 1:
|
|
96
|
+
logger.error(f"All upload attempts failed for {path}")
|
|
97
|
+
raise
|
|
98
|
+
time.sleep(1 * (attempt + 1))
|
|
99
|
+
|
|
100
|
+
def upload_string_with_retry(self, content: str, path: str, max_retries: int = 3) -> None:
|
|
101
|
+
"""Upload a string content to Dropbox with retries."""
|
|
102
|
+
for attempt in range(max_retries):
|
|
103
|
+
try:
|
|
104
|
+
logger.debug(f"Attempting string upload to {path} (attempt {attempt + 1}/{max_retries})")
|
|
105
|
+
self.client.files_upload(content.encode(), path, mode=WriteMode.overwrite)
|
|
106
|
+
logger.debug(f"Successfully uploaded string content to {path}")
|
|
107
|
+
return
|
|
108
|
+
except ApiError as e:
|
|
109
|
+
logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
|
|
110
|
+
if attempt == max_retries - 1:
|
|
111
|
+
logger.error(f"All upload attempts failed for {path}")
|
|
112
|
+
raise
|
|
113
|
+
time.sleep(1 * (attempt + 1))
|
|
114
|
+
|
|
115
|
+
def list_folder_recursive(self, path: str = "") -> List[FileMetadata]:
|
|
116
|
+
"""List all files in a folder recursively."""
|
|
117
|
+
try:
|
|
118
|
+
logger.debug(f"Listing files recursively from {path}")
|
|
119
|
+
entries = []
|
|
120
|
+
result = self.client.files_list_folder(path, recursive=True)
|
|
121
|
+
|
|
122
|
+
while True:
|
|
123
|
+
entries.extend(result.entries)
|
|
124
|
+
if not result.has_more:
|
|
125
|
+
break
|
|
126
|
+
result = self.client.files_list_folder_continue(result.cursor)
|
|
127
|
+
|
|
128
|
+
return entries
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.error(f"Error listing files: {str(e)}", exc_info=True)
|
|
131
|
+
raise
|
|
132
|
+
|
|
133
|
+
def download_file_content(self, path: str) -> bytes:
|
|
134
|
+
"""Download and return the content of a file."""
|
|
135
|
+
try:
|
|
136
|
+
logger.debug(f"Downloading file content from {path}")
|
|
137
|
+
return self.client.files_download(path)[1].content
|
|
138
|
+
except Exception as e:
|
|
139
|
+
logger.error(f"Error downloading file: {str(e)}", exc_info=True)
|
|
140
|
+
raise
|
|
141
|
+
|
|
142
|
+
def download_folder(self, dropbox_path: str, local_path: str) -> None:
|
|
143
|
+
"""Download all files from a Dropbox folder to a local path."""
|
|
144
|
+
try:
|
|
145
|
+
logger.debug(f"Downloading folder {dropbox_path} to {local_path}")
|
|
146
|
+
entries = self.list_folder_recursive(dropbox_path)
|
|
147
|
+
|
|
148
|
+
for entry in entries:
|
|
149
|
+
if isinstance(entry, FileMetadata):
|
|
150
|
+
rel_path = entry.path_display[len(dropbox_path) :].lstrip("/")
|
|
151
|
+
local_file_path = os.path.join(local_path, rel_path)
|
|
152
|
+
|
|
153
|
+
os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
|
|
154
|
+
logger.debug(f"Downloading {entry.path_display} to {local_file_path}")
|
|
155
|
+
self.client.files_download_to_file(local_file_path, entry.path_display)
|
|
156
|
+
|
|
157
|
+
logger.debug(f"Successfully downloaded folder {dropbox_path}")
|
|
158
|
+
except Exception as e:
|
|
159
|
+
logger.error(f"Error downloading folder: {str(e)}", exc_info=True)
|
|
160
|
+
raise
|
|
161
|
+
|
|
162
|
+
def upload_folder(self, local_path: str, dropbox_path: str) -> None:
|
|
163
|
+
"""Upload all files from a local folder to a Dropbox path."""
|
|
164
|
+
try:
|
|
165
|
+
logger.debug(f"Uploading folder {local_path} to {dropbox_path}")
|
|
166
|
+
for root, _, files in os.walk(local_path):
|
|
167
|
+
for filename in files:
|
|
168
|
+
local_file_path = os.path.join(root, filename)
|
|
169
|
+
rel_path = os.path.relpath(local_file_path, local_path)
|
|
170
|
+
target_path = f"{dropbox_path}/{rel_path}"
|
|
171
|
+
|
|
172
|
+
logger.debug(f"Uploading {rel_path} to {target_path}")
|
|
173
|
+
with open(local_file_path, "rb") as f:
|
|
174
|
+
self.client.files_upload(f.read(), target_path, mode=WriteMode.overwrite)
|
|
175
|
+
|
|
176
|
+
logger.debug(f"Successfully uploaded folder {local_path}")
|
|
177
|
+
except Exception as e:
|
|
178
|
+
logger.error(f"Error uploading folder: {str(e)}", exc_info=True)
|
|
179
|
+
raise
|
|
180
|
+
|
|
181
|
+
def create_shared_link(self, path: str) -> str:
|
|
182
|
+
"""Create a shared link for a file that's accessible without login."""
|
|
183
|
+
try:
|
|
184
|
+
logger.debug(f"Creating shared link for {path}")
|
|
185
|
+
shared_link = self.client.sharing_create_shared_link_with_settings(
|
|
186
|
+
path, settings=SharedLinkSettings(requested_visibility=RequestedVisibility.public)
|
|
187
|
+
)
|
|
188
|
+
return shared_link.url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
|
|
189
|
+
except Exception as e:
|
|
190
|
+
logger.error(f"Error creating shared link: {str(e)}", exc_info=True)
|
|
191
|
+
raise
|
|
192
|
+
|
|
193
|
+
def get_existing_shared_link(self, path: str) -> Optional[str]:
|
|
194
|
+
"""Get existing shared link for a file if it exists."""
|
|
195
|
+
try:
|
|
196
|
+
logger.debug(f"Getting existing shared link for {path}")
|
|
197
|
+
shared_links = self.client.sharing_list_shared_links(path=path).links
|
|
198
|
+
if shared_links:
|
|
199
|
+
return shared_links[0].url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
|
|
200
|
+
return None
|
|
201
|
+
except Exception as e:
|
|
202
|
+
logger.error(f"Error getting existing shared link: {str(e)}", exc_info=True)
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
def create_or_get_shared_link(self, path: str) -> str:
|
|
206
|
+
"""Create a shared link or get existing one."""
|
|
207
|
+
try:
|
|
208
|
+
existing_link = self.get_existing_shared_link(path)
|
|
209
|
+
if existing_link:
|
|
210
|
+
logger.debug(f"Found existing shared link for {path}")
|
|
211
|
+
return existing_link
|
|
212
|
+
|
|
213
|
+
logger.debug(f"Creating new shared link for {path}")
|
|
214
|
+
return self.create_shared_link(path)
|
|
215
|
+
except Exception as e:
|
|
216
|
+
logger.error(f"Error creating/getting shared link: {str(e)}", exc_info=True)
|
|
217
|
+
raise
|
|
218
|
+
|
|
219
|
+
def file_exists(self, path: str) -> bool:
|
|
220
|
+
"""Check if a file exists in Dropbox."""
|
|
221
|
+
try:
|
|
222
|
+
self.client.files_get_metadata(path)
|
|
223
|
+
return True
|
|
224
|
+
except:
|
|
225
|
+
return False
|
|
File without changes
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
import requests
|
|
3
|
+
import time
|
|
4
|
+
import os
|
|
5
|
+
from typing import Dict, Optional, Any, Union
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
|
|
8
|
+
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
|
|
9
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class AudioShakeConfig:
|
|
14
|
+
"""Configuration for AudioShake transcription service."""
|
|
15
|
+
|
|
16
|
+
api_token: Optional[str] = None
|
|
17
|
+
base_url: str = "https://api.audioshake.ai"
|
|
18
|
+
output_prefix: Optional[str] = None
|
|
19
|
+
timeout_minutes: int = 20 # Added timeout configuration
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AudioShakeAPI:
|
|
23
|
+
"""Handles direct API interactions with AudioShake."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, config: AudioShakeConfig, logger):
|
|
26
|
+
self.config = config
|
|
27
|
+
self.logger = logger
|
|
28
|
+
|
|
29
|
+
def _validate_config(self) -> None:
|
|
30
|
+
"""Validate API configuration."""
|
|
31
|
+
if not self.config.api_token:
|
|
32
|
+
raise ValueError("AudioShake API token must be provided")
|
|
33
|
+
|
|
34
|
+
def _get_headers(self) -> Dict[str, str]:
|
|
35
|
+
"""Get headers for API requests."""
|
|
36
|
+
self._validate_config() # Validate before making any API calls
|
|
37
|
+
return {"x-api-key": self.config.api_token, "Content-Type": "application/json"}
|
|
38
|
+
|
|
39
|
+
def upload_file(self, filepath: str) -> str:
|
|
40
|
+
"""Upload audio file and return file URL."""
|
|
41
|
+
self.logger.info(f"Uploading {filepath} to AudioShake")
|
|
42
|
+
self._validate_config() # Validate before making API call
|
|
43
|
+
|
|
44
|
+
url = f"{self.config.base_url}/upload/"
|
|
45
|
+
with open(filepath, "rb") as file:
|
|
46
|
+
files = {"file": (os.path.basename(filepath), file)}
|
|
47
|
+
response = requests.post(url, headers={"x-api-key": self.config.api_token}, files=files)
|
|
48
|
+
|
|
49
|
+
self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
|
|
50
|
+
response.raise_for_status()
|
|
51
|
+
return response.json()["link"]
|
|
52
|
+
|
|
53
|
+
def create_task(self, file_url: str) -> str:
|
|
54
|
+
"""Create transcription task and return task ID."""
|
|
55
|
+
self.logger.info(f"Creating task for file {file_url}")
|
|
56
|
+
|
|
57
|
+
url = f"{self.config.base_url}/tasks"
|
|
58
|
+
data = {
|
|
59
|
+
"url": file_url,
|
|
60
|
+
"targets": [
|
|
61
|
+
{
|
|
62
|
+
"model": "alignment",
|
|
63
|
+
"formats": ["json"],
|
|
64
|
+
"language": "en"
|
|
65
|
+
}
|
|
66
|
+
],
|
|
67
|
+
}
|
|
68
|
+
response = requests.post(url, headers=self._get_headers(), json=data)
|
|
69
|
+
response.raise_for_status()
|
|
70
|
+
return response.json()["id"]
|
|
71
|
+
|
|
72
|
+
def wait_for_task_result(self, task_id: str) -> Dict[str, Any]:
|
|
73
|
+
"""Poll for task completion and return results."""
|
|
74
|
+
self.logger.info(f"Getting task result for task {task_id}")
|
|
75
|
+
|
|
76
|
+
# Use the list endpoint which has fresh data, not the individual task endpoint which caches
|
|
77
|
+
url = f"{self.config.base_url}/tasks"
|
|
78
|
+
start_time = time.time()
|
|
79
|
+
last_status_log = start_time
|
|
80
|
+
timeout_seconds = self.config.timeout_minutes * 60
|
|
81
|
+
|
|
82
|
+
# Add initial retry logic for when task is not found yet
|
|
83
|
+
initial_retry_count = 0
|
|
84
|
+
max_initial_retries = 5
|
|
85
|
+
initial_retry_delay = 2 # seconds
|
|
86
|
+
|
|
87
|
+
while True:
|
|
88
|
+
current_time = time.time()
|
|
89
|
+
elapsed_time = current_time - start_time
|
|
90
|
+
|
|
91
|
+
# Check for timeout
|
|
92
|
+
if elapsed_time > timeout_seconds:
|
|
93
|
+
raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
|
|
94
|
+
|
|
95
|
+
# Log status every minute
|
|
96
|
+
if current_time - last_status_log >= 60:
|
|
97
|
+
self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
|
|
98
|
+
last_status_log = current_time
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
response = requests.get(url, headers=self._get_headers())
|
|
102
|
+
response.raise_for_status()
|
|
103
|
+
tasks_list = response.json()
|
|
104
|
+
|
|
105
|
+
# Find our specific task in the list
|
|
106
|
+
task_data = None
|
|
107
|
+
for task in tasks_list:
|
|
108
|
+
if task.get("id") == task_id:
|
|
109
|
+
task_data = task
|
|
110
|
+
break
|
|
111
|
+
|
|
112
|
+
if not task_data:
|
|
113
|
+
# Task not found in list yet
|
|
114
|
+
if initial_retry_count < max_initial_retries:
|
|
115
|
+
initial_retry_count += 1
|
|
116
|
+
self.logger.info(f"Task not found in list yet (attempt {initial_retry_count}/{max_initial_retries}), retrying in {initial_retry_delay} seconds...")
|
|
117
|
+
time.sleep(initial_retry_delay)
|
|
118
|
+
continue
|
|
119
|
+
else:
|
|
120
|
+
raise TranscriptionError(f"Task {task_id} not found in task list after {max_initial_retries} retries")
|
|
121
|
+
|
|
122
|
+
# Log the full response for debugging
|
|
123
|
+
self.logger.debug(f"Task status response: {task_data}")
|
|
124
|
+
|
|
125
|
+
# Check status of targets (not the task itself)
|
|
126
|
+
targets = task_data.get("targets", [])
|
|
127
|
+
if not targets:
|
|
128
|
+
raise TranscriptionError("No targets found in task response")
|
|
129
|
+
|
|
130
|
+
# Check if all targets are completed or if any failed
|
|
131
|
+
all_completed = True
|
|
132
|
+
for target in targets:
|
|
133
|
+
target_status = target.get("status")
|
|
134
|
+
target_model = target.get("model")
|
|
135
|
+
self.logger.debug(f"Target {target_model} status: {target_status}")
|
|
136
|
+
|
|
137
|
+
if target_status == "failed":
|
|
138
|
+
error_msg = target.get("error", "Unknown error")
|
|
139
|
+
raise TranscriptionError(f"Target {target_model} failed: {error_msg}")
|
|
140
|
+
elif target_status != "completed":
|
|
141
|
+
all_completed = False
|
|
142
|
+
|
|
143
|
+
if all_completed:
|
|
144
|
+
self.logger.info("All targets completed successfully")
|
|
145
|
+
return task_data
|
|
146
|
+
|
|
147
|
+
# Reset retry count on successful response
|
|
148
|
+
initial_retry_count = 0
|
|
149
|
+
|
|
150
|
+
except requests.exceptions.HTTPError as e:
|
|
151
|
+
raise
|
|
152
|
+
|
|
153
|
+
time.sleep(30) # Wait before next poll
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class AudioShakeTranscriber(BaseTranscriber):
|
|
157
|
+
"""Transcription service using AudioShake's API."""
|
|
158
|
+
|
|
159
|
+
def __init__(
|
|
160
|
+
self,
|
|
161
|
+
cache_dir: Union[str, Path],
|
|
162
|
+
config: Optional[AudioShakeConfig] = None,
|
|
163
|
+
logger: Optional[Any] = None,
|
|
164
|
+
api_client: Optional[AudioShakeAPI] = None,
|
|
165
|
+
):
|
|
166
|
+
"""Initialize AudioShake transcriber."""
|
|
167
|
+
super().__init__(cache_dir=cache_dir, logger=logger)
|
|
168
|
+
self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
|
|
169
|
+
self.api = api_client or AudioShakeAPI(self.config, self.logger)
|
|
170
|
+
|
|
171
|
+
def get_name(self) -> str:
|
|
172
|
+
return "AudioShake"
|
|
173
|
+
|
|
174
|
+
def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
|
|
175
|
+
"""Actually perform the transcription using AudioShake API."""
|
|
176
|
+
self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
|
|
177
|
+
self.logger.info(f"Starting transcription for {audio_filepath}")
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# Start task and get results
|
|
181
|
+
self.logger.debug("Calling start_transcription()")
|
|
182
|
+
task_id = self.start_transcription(audio_filepath)
|
|
183
|
+
self.logger.debug(f"Got task_id: {task_id}")
|
|
184
|
+
|
|
185
|
+
self.logger.debug("Calling get_transcription_result()")
|
|
186
|
+
result = self.get_transcription_result(task_id)
|
|
187
|
+
self.logger.debug("Got transcription result")
|
|
188
|
+
|
|
189
|
+
return result
|
|
190
|
+
except Exception as e:
|
|
191
|
+
self.logger.error(f"Error in _perform_transcription: {str(e)}")
|
|
192
|
+
raise
|
|
193
|
+
|
|
194
|
+
def start_transcription(self, audio_filepath: str) -> str:
|
|
195
|
+
"""Starts the transcription task and returns the task ID."""
|
|
196
|
+
self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
|
|
197
|
+
|
|
198
|
+
# Upload file and create task
|
|
199
|
+
file_url = self.api.upload_file(audio_filepath)
|
|
200
|
+
self.logger.debug(f"File uploaded successfully. File URL: {file_url}")
|
|
201
|
+
|
|
202
|
+
task_id = self.api.create_task(file_url)
|
|
203
|
+
self.logger.debug(f"Task created successfully. Task ID: {task_id}")
|
|
204
|
+
|
|
205
|
+
return task_id
|
|
206
|
+
|
|
207
|
+
def get_transcription_result(self, task_id: str) -> Dict[str, Any]:
|
|
208
|
+
"""Gets the raw results for a previously started task."""
|
|
209
|
+
self.logger.debug(f"Entering get_transcription_result() for task ID: {task_id}")
|
|
210
|
+
|
|
211
|
+
# Wait for task completion
|
|
212
|
+
task_data = self.api.wait_for_task_result(task_id)
|
|
213
|
+
self.logger.debug("Task completed. Getting results...")
|
|
214
|
+
|
|
215
|
+
# Find the alignment target output
|
|
216
|
+
alignment_target = None
|
|
217
|
+
for target in task_data.get("targets", []):
|
|
218
|
+
if target.get("model") == "alignment":
|
|
219
|
+
alignment_target = target
|
|
220
|
+
break
|
|
221
|
+
|
|
222
|
+
if not alignment_target:
|
|
223
|
+
raise TranscriptionError("Required output not found in task results")
|
|
224
|
+
|
|
225
|
+
# Get the output file URL
|
|
226
|
+
output = alignment_target.get("output", [])
|
|
227
|
+
if not output:
|
|
228
|
+
raise TranscriptionError("No output found in alignment target")
|
|
229
|
+
|
|
230
|
+
output_url = output[0].get("link")
|
|
231
|
+
if not output_url:
|
|
232
|
+
raise TranscriptionError("Output link not found in alignment target")
|
|
233
|
+
|
|
234
|
+
# Fetch transcription data
|
|
235
|
+
response = requests.get(output_url)
|
|
236
|
+
response.raise_for_status()
|
|
237
|
+
|
|
238
|
+
# Return combined raw data
|
|
239
|
+
raw_data = {"task_data": task_data, "transcription": response.json()}
|
|
240
|
+
|
|
241
|
+
self.logger.debug("Raw results retrieved successfully")
|
|
242
|
+
return raw_data
|
|
243
|
+
|
|
244
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
|
|
245
|
+
"""Process raw Audioshake API response into standard format."""
|
|
246
|
+
self.logger.debug(f"Processing result for task {raw_data['task_data']['id']}")
|
|
247
|
+
|
|
248
|
+
transcription_data = raw_data["transcription"]
|
|
249
|
+
task_data = raw_data["task_data"]
|
|
250
|
+
|
|
251
|
+
segments = []
|
|
252
|
+
all_words = [] # Collect all words across segments
|
|
253
|
+
|
|
254
|
+
for line in transcription_data.get("lines", []):
|
|
255
|
+
words = [
|
|
256
|
+
Word(
|
|
257
|
+
id=WordUtils.generate_id(), # Generate unique ID for each word
|
|
258
|
+
text=word["text"].strip(" "),
|
|
259
|
+
start_time=word.get("start", 0.0),
|
|
260
|
+
end_time=word.get("end", 0.0),
|
|
261
|
+
)
|
|
262
|
+
for word in line.get("words", [])
|
|
263
|
+
]
|
|
264
|
+
all_words.extend(words) # Add words to flat list
|
|
265
|
+
|
|
266
|
+
segments.append(
|
|
267
|
+
LyricsSegment(
|
|
268
|
+
id=WordUtils.generate_id(), # Generate unique ID for each segment
|
|
269
|
+
text=line.get("text", " ".join(w.text for w in words)),
|
|
270
|
+
words=words,
|
|
271
|
+
start_time=min((w.start_time for w in words), default=0.0),
|
|
272
|
+
end_time=max((w.end_time for w in words), default=0.0),
|
|
273
|
+
)
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
return TranscriptionData(
|
|
277
|
+
text=transcription_data.get("text", ""),
|
|
278
|
+
words=all_words,
|
|
279
|
+
segments=segments,
|
|
280
|
+
source=self.get_name(),
|
|
281
|
+
metadata={
|
|
282
|
+
"language": transcription_data.get("metadata", {}).get("language"),
|
|
283
|
+
"task_id": task_data["id"],
|
|
284
|
+
"duration": task_data.get("duration"),
|
|
285
|
+
},
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def get_output_filename(self, suffix: str) -> str:
|
|
289
|
+
"""Generate consistent filename with (Purpose) suffix pattern."""
|
|
290
|
+
return f"{self.config.output_prefix}{suffix}"
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Dict, Any, Optional, Union
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
import json
|
|
7
|
+
import hashlib
|
|
8
|
+
from lyrics_transcriber.types import TranscriptionData
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TranscriptionError(Exception):
|
|
12
|
+
"""Base exception for transcription errors."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, message: str):
|
|
15
|
+
super().__init__(message)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseTranscriber(ABC):
|
|
19
|
+
"""Base class for all transcription services."""
|
|
20
|
+
|
|
21
|
+
def __init__(self, cache_dir: Union[str, Path], logger: Optional[logging.Logger] = None):
|
|
22
|
+
"""
|
|
23
|
+
Initialize transcriber with cache directory and logger.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
cache_dir: Directory to store cache files. Must be provided.
|
|
27
|
+
logger: Logger instance to use. If None, creates a new logger.
|
|
28
|
+
"""
|
|
29
|
+
self.cache_dir = Path(cache_dir)
|
|
30
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
31
|
+
|
|
32
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
|
|
34
|
+
|
|
35
|
+
def _get_file_hash(self, filepath: str) -> str:
|
|
36
|
+
"""Calculate MD5 hash of a file."""
|
|
37
|
+
self.logger.debug(f"Calculating hash for file: {filepath}")
|
|
38
|
+
md5_hash = hashlib.md5()
|
|
39
|
+
with open(filepath, "rb") as f:
|
|
40
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
41
|
+
md5_hash.update(chunk)
|
|
42
|
+
hash_result = md5_hash.hexdigest()
|
|
43
|
+
self.logger.debug(f"File hash: {hash_result}")
|
|
44
|
+
return hash_result
|
|
45
|
+
|
|
46
|
+
def _get_cache_path(self, file_hash: str, suffix: str) -> str:
|
|
47
|
+
"""Get the cache file path for a given file hash."""
|
|
48
|
+
cache_path = os.path.join(self.cache_dir, f"{self.get_name().lower()}_{file_hash}_{suffix}.json")
|
|
49
|
+
self.logger.debug(f"Cache path: {cache_path}")
|
|
50
|
+
return cache_path
|
|
51
|
+
|
|
52
|
+
def _save_to_cache(self, cache_path: str, raw_data: Dict[str, Any]) -> None:
|
|
53
|
+
"""Save raw API response data to cache."""
|
|
54
|
+
self.logger.debug(f"Saving JSON to cache: {cache_path}")
|
|
55
|
+
with open(cache_path, "w") as f:
|
|
56
|
+
json.dump(raw_data, f, indent=2)
|
|
57
|
+
self.logger.debug("Cache save completed")
|
|
58
|
+
|
|
59
|
+
def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
|
|
60
|
+
"""Load raw API response data from cache if it exists."""
|
|
61
|
+
self.logger.debug(f"Attempting to load from cache: {cache_path}")
|
|
62
|
+
try:
|
|
63
|
+
with open(cache_path, "r") as f:
|
|
64
|
+
data = json.load(f)
|
|
65
|
+
self.logger.debug("Raw API response loaded from cache")
|
|
66
|
+
return data
|
|
67
|
+
except FileNotFoundError:
|
|
68
|
+
self.logger.debug("Cache file not found")
|
|
69
|
+
return None
|
|
70
|
+
except json.JSONDecodeError:
|
|
71
|
+
self.logger.warning(f"Cache file {cache_path} is corrupted")
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
def _save_and_convert_result(self, file_hash: str, raw_result: Dict[str, Any]) -> TranscriptionData:
|
|
75
|
+
"""Convert raw result to TranscriptionData, save to cache, and return."""
|
|
76
|
+
converted_cache_path = self._get_cache_path(file_hash, "converted")
|
|
77
|
+
converted_result = self._convert_result_format(raw_result)
|
|
78
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
|
79
|
+
return converted_result
|
|
80
|
+
|
|
81
|
+
def transcribe(self, audio_filepath: str) -> TranscriptionData:
|
|
82
|
+
"""
|
|
83
|
+
Transcribe an audio file, using cache if available.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
audio_filepath: Path to the audio file to transcribe
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
TranscriptionData containing segments, text, and metadata
|
|
90
|
+
"""
|
|
91
|
+
self.logger.debug(f"Starting transcription for {audio_filepath}")
|
|
92
|
+
|
|
93
|
+
try:
|
|
94
|
+
self._validate_audio_file(audio_filepath)
|
|
95
|
+
self.logger.debug("Audio file validation passed")
|
|
96
|
+
|
|
97
|
+
# Check converted cache first
|
|
98
|
+
file_hash = self._get_file_hash(audio_filepath)
|
|
99
|
+
converted_cache_path = self._get_cache_path(file_hash, "converted")
|
|
100
|
+
converted_data = self._load_from_cache(converted_cache_path)
|
|
101
|
+
if converted_data:
|
|
102
|
+
self.logger.info(f"Using cached converted data for {audio_filepath}")
|
|
103
|
+
return TranscriptionData.from_dict(converted_data)
|
|
104
|
+
|
|
105
|
+
# Check raw cache next
|
|
106
|
+
raw_cache_path = self._get_cache_path(file_hash, "raw")
|
|
107
|
+
raw_data = self._load_from_cache(raw_cache_path)
|
|
108
|
+
if raw_data:
|
|
109
|
+
self.logger.info(f"Using cached raw data for {audio_filepath}")
|
|
110
|
+
converted_result = self._convert_result_format(raw_data)
|
|
111
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
|
112
|
+
return converted_result
|
|
113
|
+
|
|
114
|
+
# If not in cache, perform transcription
|
|
115
|
+
self.logger.info(f"No cache found, transcribing {audio_filepath}")
|
|
116
|
+
raw_result = self._perform_transcription(audio_filepath)
|
|
117
|
+
self.logger.debug("Transcription completed")
|
|
118
|
+
|
|
119
|
+
# Save raw result to cache
|
|
120
|
+
self._save_to_cache(raw_cache_path, raw_result)
|
|
121
|
+
|
|
122
|
+
return self._save_and_convert_result(file_hash, raw_result)
|
|
123
|
+
|
|
124
|
+
except Exception as e:
|
|
125
|
+
self.logger.error(f"Error during transcription: {str(e)}")
|
|
126
|
+
raise
|
|
127
|
+
|
|
128
|
+
@abstractmethod
|
|
129
|
+
def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
|
|
130
|
+
"""
|
|
131
|
+
Actually perform the transcription (implemented by subclasses).
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
audio_filepath: Path to the audio file to transcribe
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
TranscriptionData containing segments, text, and metadata
|
|
138
|
+
"""
|
|
139
|
+
pass # pragma: no cover
|
|
140
|
+
|
|
141
|
+
@abstractmethod
|
|
142
|
+
def get_name(self) -> str:
|
|
143
|
+
"""Return the name of this transcription service."""
|
|
144
|
+
pass # pragma: no cover
|
|
145
|
+
|
|
146
|
+
def _validate_audio_file(self, audio_filepath: str) -> None:
|
|
147
|
+
"""Validate that the audio file exists and is accessible."""
|
|
148
|
+
self.logger.debug(f"Validating audio file: {audio_filepath}")
|
|
149
|
+
if not os.path.exists(audio_filepath):
|
|
150
|
+
self.logger.error(f"Audio file not found: {audio_filepath}")
|
|
151
|
+
raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
|
|
152
|
+
self.logger.debug("Audio file validation successful")
|
|
153
|
+
|
|
154
|
+
@abstractmethod
|
|
155
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
|
|
156
|
+
"""Convert raw API response to TranscriptionData format."""
|
|
157
|
+
pass # pragma: no cover
|