karaoke-gen 0.75.54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of karaoke-gen might be problematic. Click here for more details.

Files changed (287) hide show
  1. karaoke_gen/__init__.py +38 -0
  2. karaoke_gen/audio_fetcher.py +1614 -0
  3. karaoke_gen/audio_processor.py +790 -0
  4. karaoke_gen/config.py +83 -0
  5. karaoke_gen/file_handler.py +387 -0
  6. karaoke_gen/instrumental_review/__init__.py +45 -0
  7. karaoke_gen/instrumental_review/analyzer.py +408 -0
  8. karaoke_gen/instrumental_review/editor.py +322 -0
  9. karaoke_gen/instrumental_review/models.py +171 -0
  10. karaoke_gen/instrumental_review/server.py +475 -0
  11. karaoke_gen/instrumental_review/static/index.html +1529 -0
  12. karaoke_gen/instrumental_review/waveform.py +409 -0
  13. karaoke_gen/karaoke_finalise/__init__.py +1 -0
  14. karaoke_gen/karaoke_finalise/karaoke_finalise.py +1833 -0
  15. karaoke_gen/karaoke_gen.py +1026 -0
  16. karaoke_gen/lyrics_processor.py +474 -0
  17. karaoke_gen/metadata.py +160 -0
  18. karaoke_gen/pipeline/__init__.py +87 -0
  19. karaoke_gen/pipeline/base.py +215 -0
  20. karaoke_gen/pipeline/context.py +230 -0
  21. karaoke_gen/pipeline/executors/__init__.py +21 -0
  22. karaoke_gen/pipeline/executors/local.py +159 -0
  23. karaoke_gen/pipeline/executors/remote.py +257 -0
  24. karaoke_gen/pipeline/stages/__init__.py +27 -0
  25. karaoke_gen/pipeline/stages/finalize.py +202 -0
  26. karaoke_gen/pipeline/stages/render.py +165 -0
  27. karaoke_gen/pipeline/stages/screens.py +139 -0
  28. karaoke_gen/pipeline/stages/separation.py +191 -0
  29. karaoke_gen/pipeline/stages/transcription.py +191 -0
  30. karaoke_gen/resources/AvenirNext-Bold.ttf +0 -0
  31. karaoke_gen/resources/Montserrat-Bold.ttf +0 -0
  32. karaoke_gen/resources/Oswald-Bold.ttf +0 -0
  33. karaoke_gen/resources/Oswald-SemiBold.ttf +0 -0
  34. karaoke_gen/resources/Zurich_Cn_BT_Bold.ttf +0 -0
  35. karaoke_gen/style_loader.py +531 -0
  36. karaoke_gen/utils/__init__.py +18 -0
  37. karaoke_gen/utils/bulk_cli.py +492 -0
  38. karaoke_gen/utils/cli_args.py +432 -0
  39. karaoke_gen/utils/gen_cli.py +978 -0
  40. karaoke_gen/utils/remote_cli.py +3268 -0
  41. karaoke_gen/video_background_processor.py +351 -0
  42. karaoke_gen/video_generator.py +424 -0
  43. karaoke_gen-0.75.54.dist-info/METADATA +718 -0
  44. karaoke_gen-0.75.54.dist-info/RECORD +287 -0
  45. karaoke_gen-0.75.54.dist-info/WHEEL +4 -0
  46. karaoke_gen-0.75.54.dist-info/entry_points.txt +5 -0
  47. karaoke_gen-0.75.54.dist-info/licenses/LICENSE +21 -0
  48. lyrics_transcriber/__init__.py +10 -0
  49. lyrics_transcriber/cli/__init__.py +0 -0
  50. lyrics_transcriber/cli/cli_main.py +285 -0
  51. lyrics_transcriber/core/__init__.py +0 -0
  52. lyrics_transcriber/core/config.py +50 -0
  53. lyrics_transcriber/core/controller.py +594 -0
  54. lyrics_transcriber/correction/__init__.py +0 -0
  55. lyrics_transcriber/correction/agentic/__init__.py +9 -0
  56. lyrics_transcriber/correction/agentic/adapter.py +71 -0
  57. lyrics_transcriber/correction/agentic/agent.py +313 -0
  58. lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
  59. lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
  60. lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
  61. lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
  62. lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
  63. lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
  64. lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
  65. lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
  66. lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
  67. lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
  68. lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
  69. lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
  70. lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
  71. lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
  72. lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
  73. lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
  74. lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
  75. lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
  76. lyrics_transcriber/correction/agentic/models/enums.py +38 -0
  77. lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
  78. lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
  79. lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
  80. lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
  81. lyrics_transcriber/correction/agentic/models/utils.py +19 -0
  82. lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
  83. lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
  84. lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
  85. lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
  86. lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
  87. lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
  88. lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
  89. lyrics_transcriber/correction/agentic/providers/base.py +36 -0
  90. lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
  91. lyrics_transcriber/correction/agentic/providers/config.py +73 -0
  92. lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
  93. lyrics_transcriber/correction/agentic/providers/health.py +28 -0
  94. lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
  95. lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
  96. lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
  97. lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
  98. lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
  99. lyrics_transcriber/correction/agentic/router.py +35 -0
  100. lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
  101. lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
  102. lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
  103. lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
  104. lyrics_transcriber/correction/anchor_sequence.py +919 -0
  105. lyrics_transcriber/correction/corrector.py +760 -0
  106. lyrics_transcriber/correction/feedback/__init__.py +2 -0
  107. lyrics_transcriber/correction/feedback/schemas.py +107 -0
  108. lyrics_transcriber/correction/feedback/store.py +236 -0
  109. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  110. lyrics_transcriber/correction/handlers/base.py +52 -0
  111. lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
  112. lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
  113. lyrics_transcriber/correction/handlers/llm.py +293 -0
  114. lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
  115. lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
  116. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
  117. lyrics_transcriber/correction/handlers/repeat.py +88 -0
  118. lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
  119. lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
  120. lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
  121. lyrics_transcriber/correction/handlers/word_operations.py +187 -0
  122. lyrics_transcriber/correction/operations.py +352 -0
  123. lyrics_transcriber/correction/phrase_analyzer.py +435 -0
  124. lyrics_transcriber/correction/text_utils.py +30 -0
  125. lyrics_transcriber/frontend/.gitignore +23 -0
  126. lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
  127. lyrics_transcriber/frontend/.yarnrc.yml +3 -0
  128. lyrics_transcriber/frontend/README.md +50 -0
  129. lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
  130. lyrics_transcriber/frontend/__init__.py +25 -0
  131. lyrics_transcriber/frontend/eslint.config.js +28 -0
  132. lyrics_transcriber/frontend/index.html +18 -0
  133. lyrics_transcriber/frontend/package.json +42 -0
  134. lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
  135. lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
  136. lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
  137. lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
  138. lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
  139. lyrics_transcriber/frontend/public/favicon.ico +0 -0
  140. lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
  141. lyrics_transcriber/frontend/src/App.tsx +214 -0
  142. lyrics_transcriber/frontend/src/api.ts +254 -0
  143. lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
  144. lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
  145. lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
  146. lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
  147. lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
  148. lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
  149. lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
  150. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
  151. lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
  152. lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
  153. lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
  154. lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
  155. lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
  156. lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
  157. lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
  158. lyrics_transcriber/frontend/src/components/Header.tsx +413 -0
  159. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1387 -0
  160. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  161. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  162. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  163. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  164. lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
  165. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  166. lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
  167. lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
  168. lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
  169. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
  170. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +336 -0
  171. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
  172. lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
  173. lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
  174. lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
  175. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
  176. lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
  177. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
  178. lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
  179. lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
  180. lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
  181. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
  182. lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
  183. lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
  184. lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
  185. lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
  186. lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
  187. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
  188. lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
  189. lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
  190. lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
  191. lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
  192. lyrics_transcriber/frontend/src/main.tsx +17 -0
  193. lyrics_transcriber/frontend/src/theme.ts +177 -0
  194. lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
  195. lyrics_transcriber/frontend/src/types.js +2 -0
  196. lyrics_transcriber/frontend/src/types.ts +199 -0
  197. lyrics_transcriber/frontend/src/validation.ts +132 -0
  198. lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
  199. lyrics_transcriber/frontend/tsconfig.app.json +26 -0
  200. lyrics_transcriber/frontend/tsconfig.json +25 -0
  201. lyrics_transcriber/frontend/tsconfig.node.json +23 -0
  202. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
  203. lyrics_transcriber/frontend/update_version.js +11 -0
  204. lyrics_transcriber/frontend/vite.config.d.ts +2 -0
  205. lyrics_transcriber/frontend/vite.config.js +10 -0
  206. lyrics_transcriber/frontend/vite.config.ts +11 -0
  207. lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
  208. lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
  209. lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
  210. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js +43288 -0
  211. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  212. lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
  213. lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
  214. lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
  215. lyrics_transcriber/frontend/web_assets/index.html +18 -0
  216. lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
  217. lyrics_transcriber/frontend/yarn.lock +3752 -0
  218. lyrics_transcriber/lyrics/__init__.py +0 -0
  219. lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
  220. lyrics_transcriber/lyrics/file_provider.py +95 -0
  221. lyrics_transcriber/lyrics/genius.py +384 -0
  222. lyrics_transcriber/lyrics/lrclib.py +231 -0
  223. lyrics_transcriber/lyrics/musixmatch.py +156 -0
  224. lyrics_transcriber/lyrics/spotify.py +290 -0
  225. lyrics_transcriber/lyrics/user_input_provider.py +44 -0
  226. lyrics_transcriber/output/__init__.py +0 -0
  227. lyrics_transcriber/output/ass/__init__.py +21 -0
  228. lyrics_transcriber/output/ass/ass.py +2088 -0
  229. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  230. lyrics_transcriber/output/ass/config.py +180 -0
  231. lyrics_transcriber/output/ass/constants.py +23 -0
  232. lyrics_transcriber/output/ass/event.py +94 -0
  233. lyrics_transcriber/output/ass/formatters.py +132 -0
  234. lyrics_transcriber/output/ass/lyrics_line.py +265 -0
  235. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  236. lyrics_transcriber/output/ass/section_detector.py +89 -0
  237. lyrics_transcriber/output/ass/section_screen.py +106 -0
  238. lyrics_transcriber/output/ass/style.py +187 -0
  239. lyrics_transcriber/output/cdg.py +619 -0
  240. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  241. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  242. lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
  243. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  244. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  245. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  246. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  247. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  248. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  249. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  250. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  251. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  252. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  253. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  254. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  255. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  256. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  257. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  258. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  259. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  260. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  261. lyrics_transcriber/output/countdown_processor.py +306 -0
  262. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  263. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  264. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  265. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  266. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  267. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  268. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  269. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  270. lyrics_transcriber/output/generator.py +257 -0
  271. lyrics_transcriber/output/lrc_to_cdg.py +61 -0
  272. lyrics_transcriber/output/lyrics_file.py +102 -0
  273. lyrics_transcriber/output/plain_text.py +96 -0
  274. lyrics_transcriber/output/segment_resizer.py +431 -0
  275. lyrics_transcriber/output/subtitles.py +397 -0
  276. lyrics_transcriber/output/video.py +544 -0
  277. lyrics_transcriber/review/__init__.py +0 -0
  278. lyrics_transcriber/review/server.py +676 -0
  279. lyrics_transcriber/storage/__init__.py +0 -0
  280. lyrics_transcriber/storage/dropbox.py +225 -0
  281. lyrics_transcriber/transcribers/__init__.py +0 -0
  282. lyrics_transcriber/transcribers/audioshake.py +379 -0
  283. lyrics_transcriber/transcribers/base_transcriber.py +157 -0
  284. lyrics_transcriber/transcribers/whisper.py +330 -0
  285. lyrics_transcriber/types.py +650 -0
  286. lyrics_transcriber/utils/__init__.py +0 -0
  287. lyrics_transcriber/utils/word_utils.py +27 -0
@@ -0,0 +1,225 @@
1
+ from dataclasses import dataclass
2
+ from typing import Protocol, BinaryIO, Optional, List, Any
3
+ import os
4
+ import time
5
+ import logging
6
+ import requests
7
+ from dropbox import Dropbox
8
+ from dropbox.files import WriteMode, FileMetadata
9
+ from dropbox.sharing import RequestedVisibility, SharedLinkSettings
10
+ from dropbox.exceptions import AuthError, ApiError
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ @dataclass
16
+ class DropboxConfig:
17
+ """Configuration for Dropbox client."""
18
+
19
+ app_key: Optional[str] = None
20
+ app_secret: Optional[str] = None
21
+ refresh_token: Optional[str] = None
22
+
23
+ @classmethod
24
+ def from_env(cls) -> "DropboxConfig":
25
+ """Create config from environment variables."""
26
+ return cls(
27
+ app_key=os.environ.get("WHISPER_DROPBOX_APP_KEY"),
28
+ app_secret=os.environ.get("WHISPER_DROPBOX_APP_SECRET"),
29
+ refresh_token=os.environ.get("WHISPER_DROPBOX_REFRESH_TOKEN"),
30
+ )
31
+
32
+
33
+ class DropboxAPI(Protocol):
34
+ """Protocol for Dropbox API operations."""
35
+
36
+ def files_upload(self, f: bytes, path: str, mode: WriteMode) -> Any: ...
37
+ def files_list_folder(self, path: str, recursive: bool = False) -> Any: ...
38
+ def files_list_folder_continue(self, cursor: str) -> Any: ...
39
+ def files_download(self, path: str) -> tuple[Any, Any]: ...
40
+ def files_download_to_file(self, download_path: str, path: str) -> None: ...
41
+ def files_get_metadata(self, path: str) -> Any: ...
42
+ def sharing_create_shared_link_with_settings(self, path: str, settings: SharedLinkSettings) -> Any: ...
43
+ def sharing_list_shared_links(self, path: str) -> Any: ...
44
+
45
+
46
+ class DropboxHandler:
47
+ """Handles Dropbox storage operations with automatic token refresh."""
48
+
49
+ def __init__(
50
+ self,
51
+ config: Optional[DropboxConfig] = None,
52
+ client: Optional[DropboxAPI] = None,
53
+ ):
54
+ """Initialize the Dropbox handler."""
55
+ self.config = config or DropboxConfig.from_env()
56
+ self._validate_config()
57
+
58
+ self.client = client or Dropbox(
59
+ app_key=self.config.app_key,
60
+ app_secret=self.config.app_secret,
61
+ oauth2_refresh_token=self.config.refresh_token,
62
+ )
63
+
64
+ def _validate_config(self) -> None:
65
+ """Validate the configuration."""
66
+ logger.debug("Validating DropboxConfig with values:")
67
+ logger.debug(f"app_key: {self.config.app_key[:4] + '...' if self.config.app_key else 'None'}")
68
+ logger.debug(f"app_secret: {self.config.app_secret[:4] + '...' if self.config.app_secret else 'None'}")
69
+ logger.debug(f"refresh_token: {self.config.refresh_token[:4] + '...' if self.config.refresh_token else 'None'}")
70
+
71
+ missing = []
72
+ if not self.config.app_key:
73
+ missing.append("app_key")
74
+ if not self.config.app_secret:
75
+ missing.append("app_secret")
76
+ if not self.config.refresh_token:
77
+ missing.append("refresh_token")
78
+
79
+ if missing:
80
+ error_msg = f"Missing required Dropbox configuration: {', '.join(missing)}"
81
+ logger.error(error_msg)
82
+ raise ValueError(error_msg)
83
+
84
+ def upload_with_retry(self, file: BinaryIO, path: str, max_retries: int = 3) -> None:
85
+ """Upload a file to Dropbox with retries."""
86
+ for attempt in range(max_retries):
87
+ try:
88
+ logger.debug(f"Attempting file upload to {path} (attempt {attempt + 1}/{max_retries})")
89
+ file.seek(0)
90
+ self.client.files_upload(file.read(), path, mode=WriteMode.overwrite)
91
+ logger.debug(f"Successfully uploaded file to {path}")
92
+ return
93
+ except ApiError as e:
94
+ logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
95
+ if attempt == max_retries - 1:
96
+ logger.error(f"All upload attempts failed for {path}")
97
+ raise
98
+ time.sleep(1 * (attempt + 1))
99
+
100
+ def upload_string_with_retry(self, content: str, path: str, max_retries: int = 3) -> None:
101
+ """Upload a string content to Dropbox with retries."""
102
+ for attempt in range(max_retries):
103
+ try:
104
+ logger.debug(f"Attempting string upload to {path} (attempt {attempt + 1}/{max_retries})")
105
+ self.client.files_upload(content.encode(), path, mode=WriteMode.overwrite)
106
+ logger.debug(f"Successfully uploaded string content to {path}")
107
+ return
108
+ except ApiError as e:
109
+ logger.warning(f"Upload attempt {attempt + 1} failed: {str(e)}")
110
+ if attempt == max_retries - 1:
111
+ logger.error(f"All upload attempts failed for {path}")
112
+ raise
113
+ time.sleep(1 * (attempt + 1))
114
+
115
+ def list_folder_recursive(self, path: str = "") -> List[FileMetadata]:
116
+ """List all files in a folder recursively."""
117
+ try:
118
+ logger.debug(f"Listing files recursively from {path}")
119
+ entries = []
120
+ result = self.client.files_list_folder(path, recursive=True)
121
+
122
+ while True:
123
+ entries.extend(result.entries)
124
+ if not result.has_more:
125
+ break
126
+ result = self.client.files_list_folder_continue(result.cursor)
127
+
128
+ return entries
129
+ except Exception as e:
130
+ logger.error(f"Error listing files: {str(e)}", exc_info=True)
131
+ raise
132
+
133
+ def download_file_content(self, path: str) -> bytes:
134
+ """Download and return the content of a file."""
135
+ try:
136
+ logger.debug(f"Downloading file content from {path}")
137
+ return self.client.files_download(path)[1].content
138
+ except Exception as e:
139
+ logger.error(f"Error downloading file: {str(e)}", exc_info=True)
140
+ raise
141
+
142
+ def download_folder(self, dropbox_path: str, local_path: str) -> None:
143
+ """Download all files from a Dropbox folder to a local path."""
144
+ try:
145
+ logger.debug(f"Downloading folder {dropbox_path} to {local_path}")
146
+ entries = self.list_folder_recursive(dropbox_path)
147
+
148
+ for entry in entries:
149
+ if isinstance(entry, FileMetadata):
150
+ rel_path = entry.path_display[len(dropbox_path) :].lstrip("/")
151
+ local_file_path = os.path.join(local_path, rel_path)
152
+
153
+ os.makedirs(os.path.dirname(local_file_path), exist_ok=True)
154
+ logger.debug(f"Downloading {entry.path_display} to {local_file_path}")
155
+ self.client.files_download_to_file(local_file_path, entry.path_display)
156
+
157
+ logger.debug(f"Successfully downloaded folder {dropbox_path}")
158
+ except Exception as e:
159
+ logger.error(f"Error downloading folder: {str(e)}", exc_info=True)
160
+ raise
161
+
162
+ def upload_folder(self, local_path: str, dropbox_path: str) -> None:
163
+ """Upload all files from a local folder to a Dropbox path."""
164
+ try:
165
+ logger.debug(f"Uploading folder {local_path} to {dropbox_path}")
166
+ for root, _, files in os.walk(local_path):
167
+ for filename in files:
168
+ local_file_path = os.path.join(root, filename)
169
+ rel_path = os.path.relpath(local_file_path, local_path)
170
+ target_path = f"{dropbox_path}/{rel_path}"
171
+
172
+ logger.debug(f"Uploading {rel_path} to {target_path}")
173
+ with open(local_file_path, "rb") as f:
174
+ self.client.files_upload(f.read(), target_path, mode=WriteMode.overwrite)
175
+
176
+ logger.debug(f"Successfully uploaded folder {local_path}")
177
+ except Exception as e:
178
+ logger.error(f"Error uploading folder: {str(e)}", exc_info=True)
179
+ raise
180
+
181
+ def create_shared_link(self, path: str) -> str:
182
+ """Create a shared link for a file that's accessible without login."""
183
+ try:
184
+ logger.debug(f"Creating shared link for {path}")
185
+ shared_link = self.client.sharing_create_shared_link_with_settings(
186
+ path, settings=SharedLinkSettings(requested_visibility=RequestedVisibility.public)
187
+ )
188
+ return shared_link.url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
189
+ except Exception as e:
190
+ logger.error(f"Error creating shared link: {str(e)}", exc_info=True)
191
+ raise
192
+
193
+ def get_existing_shared_link(self, path: str) -> Optional[str]:
194
+ """Get existing shared link for a file if it exists."""
195
+ try:
196
+ logger.debug(f"Getting existing shared link for {path}")
197
+ shared_links = self.client.sharing_list_shared_links(path=path).links
198
+ if shared_links:
199
+ return shared_links[0].url.replace("www.dropbox.com", "dl.dropboxusercontent.com")
200
+ return None
201
+ except Exception as e:
202
+ logger.error(f"Error getting existing shared link: {str(e)}", exc_info=True)
203
+ return None
204
+
205
+ def create_or_get_shared_link(self, path: str) -> str:
206
+ """Create a shared link or get existing one."""
207
+ try:
208
+ existing_link = self.get_existing_shared_link(path)
209
+ if existing_link:
210
+ logger.debug(f"Found existing shared link for {path}")
211
+ return existing_link
212
+
213
+ logger.debug(f"Creating new shared link for {path}")
214
+ return self.create_shared_link(path)
215
+ except Exception as e:
216
+ logger.error(f"Error creating/getting shared link: {str(e)}", exc_info=True)
217
+ raise
218
+
219
+ def file_exists(self, path: str) -> bool:
220
+ """Check if a file exists in Dropbox."""
221
+ try:
222
+ self.client.files_get_metadata(path)
223
+ return True
224
+ except:
225
+ return False
File without changes
@@ -0,0 +1,379 @@
1
+ from dataclasses import dataclass
2
+ import requests
3
+ import time
4
+ import os
5
+ import tempfile
6
+ from typing import Dict, Optional, Any, Union, Tuple
7
+ from pathlib import Path
8
+ from pydub import AudioSegment
9
+ from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
10
+ from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
11
+ from lyrics_transcriber.utils.word_utils import WordUtils
12
+
13
+ # Lossy formats that should be uploaded directly (transcoding would cause quality loss)
14
+ LOSSY_FORMATS = {'.mp3', '.aac', '.ogg', '.m4a', '.wma', '.opus'}
15
+ # Lossless formats that are already compressed and can be uploaded directly
16
+ LOSSLESS_COMPRESSED_FORMATS = {'.flac', '.alac'}
17
+ # Uncompressed formats that should be converted to FLAC for efficient upload
18
+ UNCOMPRESSED_FORMATS = {'.wav', '.aiff', '.aif', '.pcm'}
19
+
20
+
21
+ class AudioUploadOptimizer:
22
+ """Optimizes audio files for upload by converting uncompressed formats to FLAC."""
23
+
24
+ def __init__(self, logger):
25
+ self.logger = logger
26
+
27
+ def prepare_for_upload(self, filepath: str) -> Tuple[str, Optional[str]]:
28
+ """
29
+ Prepare audio file for optimal upload.
30
+
31
+ Returns:
32
+ Tuple of (filepath_to_upload, temp_file_to_cleanup)
33
+ - If no conversion needed, returns (original_filepath, None)
34
+ - If converted, returns (temp_flac_filepath, temp_flac_filepath)
35
+ """
36
+ ext = os.path.splitext(filepath)[1].lower()
37
+
38
+ # Lossy formats: upload directly (transcoding would lose quality)
39
+ if ext in LOSSY_FORMATS:
40
+ self.logger.info(f"Uploading lossy format ({ext}) directly to preserve quality")
41
+ return filepath, None
42
+
43
+ # Already compressed lossless: upload directly
44
+ if ext in LOSSLESS_COMPRESSED_FORMATS:
45
+ self.logger.info(f"Uploading lossless compressed format ({ext}) directly")
46
+ return filepath, None
47
+
48
+ # Uncompressed formats: convert to FLAC for smaller upload
49
+ if ext in UNCOMPRESSED_FORMATS:
50
+ self.logger.info(f"Converting uncompressed format ({ext}) to FLAC for efficient upload")
51
+ return self._convert_to_flac(filepath)
52
+
53
+ # Unknown format: try to upload directly
54
+ self.logger.warning(f"Unknown audio format ({ext}), uploading directly")
55
+ return filepath, None
56
+
57
+ def _convert_to_flac(self, filepath: str) -> Tuple[str, str]:
58
+ """Convert audio file to FLAC format."""
59
+ ext = os.path.splitext(filepath)[1].lower()
60
+
61
+ # Load audio based on format
62
+ if ext == '.wav':
63
+ audio = AudioSegment.from_wav(filepath)
64
+ elif ext in {'.aiff', '.aif'}:
65
+ audio = AudioSegment.from_file(filepath, format='aiff')
66
+ else:
67
+ audio = AudioSegment.from_file(filepath)
68
+
69
+ # Create temp file for FLAC output
70
+ with tempfile.NamedTemporaryFile(suffix=".flac", delete=False) as temp_flac:
71
+ flac_path = temp_flac.name
72
+ audio.export(flac_path, format="flac")
73
+
74
+ # Log size reduction
75
+ original_size = os.path.getsize(filepath)
76
+ flac_size = os.path.getsize(flac_path)
77
+ reduction_pct = (1 - flac_size / original_size) * 100
78
+ self.logger.info(f"Converted to FLAC: {original_size / 1024 / 1024:.1f}MB → {flac_size / 1024 / 1024:.1f}MB ({reduction_pct:.0f}% smaller)")
79
+
80
+ return flac_path, flac_path
81
+
82
+ def cleanup(self, temp_filepath: Optional[str]) -> None:
83
+ """Clean up temporary file if it exists."""
84
+ if temp_filepath and os.path.exists(temp_filepath):
85
+ try:
86
+ os.unlink(temp_filepath)
87
+ self.logger.debug(f"Cleaned up temporary file: {temp_filepath}")
88
+ except OSError as e:
89
+ self.logger.warning(f"Failed to clean up temporary file {temp_filepath}: {e}")
90
+
91
+
92
+ @dataclass
93
+ class AudioShakeConfig:
94
+ """Configuration for AudioShake transcription service."""
95
+
96
+ api_token: Optional[str] = None
97
+ base_url: str = "https://api.audioshake.ai"
98
+ output_prefix: Optional[str] = None
99
+ timeout_minutes: int = 20 # Added timeout configuration
100
+
101
+
102
+ class AudioShakeAPI:
103
+ """Handles direct API interactions with AudioShake."""
104
+
105
+ def __init__(self, config: AudioShakeConfig, logger):
106
+ self.config = config
107
+ self.logger = logger
108
+
109
+ def _validate_config(self) -> None:
110
+ """Validate API configuration."""
111
+ if not self.config.api_token:
112
+ raise ValueError("AudioShake API token must be provided")
113
+
114
+ def _get_headers(self) -> Dict[str, str]:
115
+ """Get headers for API requests."""
116
+ self._validate_config() # Validate before making any API calls
117
+ return {"x-api-key": self.config.api_token, "Content-Type": "application/json"}
118
+
119
+ def upload_file(self, filepath: str) -> str:
120
+ """Upload audio file and return file URL."""
121
+ self.logger.info(f"Uploading {filepath} to AudioShake")
122
+ self._validate_config() # Validate before making API call
123
+
124
+ url = f"{self.config.base_url}/upload/"
125
+ with open(filepath, "rb") as file:
126
+ files = {"file": (os.path.basename(filepath), file)}
127
+ response = requests.post(url, headers={"x-api-key": self.config.api_token}, files=files)
128
+
129
+ self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
130
+ response.raise_for_status()
131
+ return response.json()["link"]
132
+
133
+ def create_task(self, file_url: str) -> str:
134
+ """Create transcription task and return task ID."""
135
+ self.logger.info(f"Creating task for file {file_url}")
136
+
137
+ url = f"{self.config.base_url}/tasks"
138
+ data = {
139
+ "url": file_url,
140
+ "targets": [
141
+ {
142
+ "model": "alignment",
143
+ "formats": ["json"],
144
+ "language": "en"
145
+ }
146
+ ],
147
+ }
148
+ response = requests.post(url, headers=self._get_headers(), json=data)
149
+ response.raise_for_status()
150
+ return response.json()["id"]
151
+
152
+ def wait_for_task_result(self, task_id: str) -> Dict[str, Any]:
153
+ """Poll for task completion and return results."""
154
+ self.logger.info(f"Getting task result for task {task_id}")
155
+
156
+ # Use the list endpoint which has fresh data, not the individual task endpoint which caches
157
+ url = f"{self.config.base_url}/tasks"
158
+ start_time = time.time()
159
+ last_status_log = start_time
160
+ timeout_seconds = self.config.timeout_minutes * 60
161
+
162
+ # Add initial retry logic for when task is not found yet
163
+ initial_retry_count = 0
164
+ max_initial_retries = 5
165
+ initial_retry_delay = 2 # seconds
166
+
167
+ while True:
168
+ current_time = time.time()
169
+ elapsed_time = current_time - start_time
170
+
171
+ # Check for timeout
172
+ if elapsed_time > timeout_seconds:
173
+ raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
174
+
175
+ # Log status every minute
176
+ if current_time - last_status_log >= 60:
177
+ self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
178
+ last_status_log = current_time
179
+
180
+ try:
181
+ response = requests.get(url, headers=self._get_headers())
182
+ response.raise_for_status()
183
+ tasks_list = response.json()
184
+
185
+ # Find our specific task in the list
186
+ task_data = None
187
+ for task in tasks_list:
188
+ if task.get("id") == task_id:
189
+ task_data = task
190
+ break
191
+
192
+ if not task_data:
193
+ # Task not found in list yet
194
+ if initial_retry_count < max_initial_retries:
195
+ initial_retry_count += 1
196
+ self.logger.info(f"Task not found in list yet (attempt {initial_retry_count}/{max_initial_retries}), retrying in {initial_retry_delay} seconds...")
197
+ time.sleep(initial_retry_delay)
198
+ continue
199
+ else:
200
+ raise TranscriptionError(f"Task {task_id} not found in task list after {max_initial_retries} retries")
201
+
202
+ # Log the full response for debugging
203
+ self.logger.debug(f"Task status response: {task_data}")
204
+
205
+ # Check status of targets (not the task itself)
206
+ targets = task_data.get("targets", [])
207
+ if not targets:
208
+ raise TranscriptionError("No targets found in task response")
209
+
210
+ # Check if all targets are completed or if any failed
211
+ all_completed = True
212
+ for target in targets:
213
+ target_status = target.get("status")
214
+ target_model = target.get("model")
215
+ self.logger.debug(f"Target {target_model} status: {target_status}")
216
+
217
+ if target_status == "failed":
218
+ error_msg = target.get("error", "Unknown error")
219
+ raise TranscriptionError(f"Target {target_model} failed: {error_msg}")
220
+ elif target_status != "completed":
221
+ all_completed = False
222
+
223
+ if all_completed:
224
+ self.logger.info("All targets completed successfully")
225
+ return task_data
226
+
227
+ # Reset retry count on successful response
228
+ initial_retry_count = 0
229
+
230
+ except requests.exceptions.HTTPError as e:
231
+ raise
232
+
233
+ time.sleep(30) # Wait before next poll
234
+
235
+
236
+ class AudioShakeTranscriber(BaseTranscriber):
237
+ """Transcription service using AudioShake's API."""
238
+
239
+ def __init__(
240
+ self,
241
+ cache_dir: Union[str, Path],
242
+ config: Optional[AudioShakeConfig] = None,
243
+ logger: Optional[Any] = None,
244
+ api_client: Optional[AudioShakeAPI] = None,
245
+ upload_optimizer: Optional[AudioUploadOptimizer] = None,
246
+ ):
247
+ """Initialize AudioShake transcriber."""
248
+ super().__init__(cache_dir=cache_dir, logger=logger)
249
+ self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
250
+ self.api = api_client or AudioShakeAPI(self.config, self.logger)
251
+ self.upload_optimizer = upload_optimizer or AudioUploadOptimizer(self.logger)
252
+
253
+ def get_name(self) -> str:
254
+ return "AudioShake"
255
+
256
+ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
257
+ """Actually perform the transcription using AudioShake API."""
258
+ self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
259
+ self.logger.info(f"Starting transcription for {audio_filepath}")
260
+
261
+ try:
262
+ # Start task and get results
263
+ self.logger.debug("Calling start_transcription()")
264
+ task_id = self.start_transcription(audio_filepath)
265
+ self.logger.debug(f"Got task_id: {task_id}")
266
+
267
+ self.logger.debug("Calling get_transcription_result()")
268
+ result = self.get_transcription_result(task_id)
269
+ self.logger.debug("Got transcription result")
270
+
271
+ return result
272
+ except Exception as e:
273
+ self.logger.error(f"Error in _perform_transcription: {str(e)}")
274
+ raise
275
+
276
+ def start_transcription(self, audio_filepath: str) -> str:
277
+ """Starts the transcription task and returns the task ID."""
278
+ self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
279
+
280
+ # Optimize file format for upload (convert WAV to FLAC, etc.)
281
+ upload_filepath, temp_filepath = self.upload_optimizer.prepare_for_upload(audio_filepath)
282
+
283
+ try:
284
+ # Upload file and create task
285
+ file_url = self.api.upload_file(upload_filepath)
286
+ self.logger.debug(f"File uploaded successfully. File URL: {file_url}")
287
+
288
+ task_id = self.api.create_task(file_url)
289
+ self.logger.debug(f"Task created successfully. Task ID: {task_id}")
290
+
291
+ return task_id
292
+ finally:
293
+ # Clean up any temporary file created during optimization
294
+ self.upload_optimizer.cleanup(temp_filepath)
295
+
296
+ def get_transcription_result(self, task_id: str) -> Dict[str, Any]:
297
+ """Gets the raw results for a previously started task."""
298
+ self.logger.debug(f"Entering get_transcription_result() for task ID: {task_id}")
299
+
300
+ # Wait for task completion
301
+ task_data = self.api.wait_for_task_result(task_id)
302
+ self.logger.debug("Task completed. Getting results...")
303
+
304
+ # Find the alignment target output
305
+ alignment_target = None
306
+ for target in task_data.get("targets", []):
307
+ if target.get("model") == "alignment":
308
+ alignment_target = target
309
+ break
310
+
311
+ if not alignment_target:
312
+ raise TranscriptionError("Required output not found in task results")
313
+
314
+ # Get the output file URL
315
+ output = alignment_target.get("output", [])
316
+ if not output:
317
+ raise TranscriptionError("No output found in alignment target")
318
+
319
+ output_url = output[0].get("link")
320
+ if not output_url:
321
+ raise TranscriptionError("Output link not found in alignment target")
322
+
323
+ # Fetch transcription data
324
+ response = requests.get(output_url)
325
+ response.raise_for_status()
326
+
327
+ # Return combined raw data
328
+ raw_data = {"task_data": task_data, "transcription": response.json()}
329
+
330
+ self.logger.debug("Raw results retrieved successfully")
331
+ return raw_data
332
+
333
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
334
+ """Process raw Audioshake API response into standard format."""
335
+ self.logger.debug(f"Processing result for task {raw_data['task_data']['id']}")
336
+
337
+ transcription_data = raw_data["transcription"]
338
+ task_data = raw_data["task_data"]
339
+
340
+ segments = []
341
+ all_words = [] # Collect all words across segments
342
+
343
+ for line in transcription_data.get("lines", []):
344
+ words = [
345
+ Word(
346
+ id=WordUtils.generate_id(), # Generate unique ID for each word
347
+ text=word["text"].strip(" "),
348
+ start_time=word.get("start", 0.0),
349
+ end_time=word.get("end", 0.0),
350
+ )
351
+ for word in line.get("words", [])
352
+ ]
353
+ all_words.extend(words) # Add words to flat list
354
+
355
+ segments.append(
356
+ LyricsSegment(
357
+ id=WordUtils.generate_id(), # Generate unique ID for each segment
358
+ text=line.get("text", " ".join(w.text for w in words)),
359
+ words=words,
360
+ start_time=min((w.start_time for w in words), default=0.0),
361
+ end_time=max((w.end_time for w in words), default=0.0),
362
+ )
363
+ )
364
+
365
+ return TranscriptionData(
366
+ text=transcription_data.get("text", ""),
367
+ words=all_words,
368
+ segments=segments,
369
+ source=self.get_name(),
370
+ metadata={
371
+ "language": transcription_data.get("metadata", {}).get("language"),
372
+ "task_id": task_data["id"],
373
+ "duration": task_data.get("duration"),
374
+ },
375
+ )
376
+
377
+ def get_output_filename(self, suffix: str) -> str:
378
+ """Generate consistent filename with (Purpose) suffix pattern."""
379
+ return f"{self.config.output_prefix}{suffix}"