lyrics-transcriber 0.30.0__py3-none-any.whl → 0.32.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. lyrics_transcriber/__init__.py +2 -1
  2. lyrics_transcriber/cli/{main.py → cli_main.py} +47 -14
  3. lyrics_transcriber/core/config.py +35 -0
  4. lyrics_transcriber/core/controller.py +164 -166
  5. lyrics_transcriber/correction/anchor_sequence.py +471 -0
  6. lyrics_transcriber/correction/corrector.py +256 -0
  7. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  8. lyrics_transcriber/correction/handlers/base.py +30 -0
  9. lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
  10. lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
  11. lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
  12. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
  13. lyrics_transcriber/correction/handlers/repeat.py +71 -0
  14. lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
  15. lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
  16. lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
  17. lyrics_transcriber/correction/handlers/word_operations.py +135 -0
  18. lyrics_transcriber/correction/phrase_analyzer.py +426 -0
  19. lyrics_transcriber/correction/text_utils.py +30 -0
  20. lyrics_transcriber/lyrics/base_lyrics_provider.py +125 -0
  21. lyrics_transcriber/lyrics/genius.py +73 -0
  22. lyrics_transcriber/lyrics/spotify.py +82 -0
  23. lyrics_transcriber/output/ass/__init__.py +21 -0
  24. lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
  25. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  26. lyrics_transcriber/output/ass/config.py +37 -0
  27. lyrics_transcriber/output/ass/constants.py +23 -0
  28. lyrics_transcriber/output/ass/event.py +94 -0
  29. lyrics_transcriber/output/ass/formatters.py +132 -0
  30. lyrics_transcriber/output/ass/lyrics_line.py +219 -0
  31. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  32. lyrics_transcriber/output/ass/section_detector.py +89 -0
  33. lyrics_transcriber/output/ass/section_screen.py +106 -0
  34. lyrics_transcriber/output/ass/style.py +187 -0
  35. lyrics_transcriber/output/cdg.py +503 -0
  36. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  37. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  38. lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
  39. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  40. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  41. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  42. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  43. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  44. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  45. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  46. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  47. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  48. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  49. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  50. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  51. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  52. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  53. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  54. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  55. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  56. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  57. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  58. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  59. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  60. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  61. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  62. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  63. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  64. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  65. lyrics_transcriber/output/generator.py +140 -171
  66. lyrics_transcriber/output/lyrics_file.py +102 -0
  67. lyrics_transcriber/output/plain_text.py +91 -0
  68. lyrics_transcriber/output/segment_resizer.py +416 -0
  69. lyrics_transcriber/output/subtitles.py +328 -302
  70. lyrics_transcriber/output/video.py +219 -0
  71. lyrics_transcriber/review/__init__.py +1 -0
  72. lyrics_transcriber/review/server.py +138 -0
  73. lyrics_transcriber/storage/dropbox.py +110 -134
  74. lyrics_transcriber/transcribers/audioshake.py +171 -105
  75. lyrics_transcriber/transcribers/base_transcriber.py +149 -0
  76. lyrics_transcriber/transcribers/whisper.py +267 -133
  77. lyrics_transcriber/types.py +454 -0
  78. {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/METADATA +14 -3
  79. lyrics_transcriber-0.32.1.dist-info/RECORD +86 -0
  80. {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/WHEEL +1 -1
  81. lyrics_transcriber-0.32.1.dist-info/entry_points.txt +4 -0
  82. lyrics_transcriber/core/corrector.py +0 -56
  83. lyrics_transcriber/core/fetcher.py +0 -143
  84. lyrics_transcriber/storage/tokens.py +0 -116
  85. lyrics_transcriber/transcribers/base.py +0 -31
  86. lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
  87. lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
  88. {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,125 @@
1
+ from dataclasses import dataclass
2
+ import logging
3
+ from typing import Optional, Dict, Any
4
+ import json
5
+ import hashlib
6
+ from pathlib import Path
7
+ import os
8
+ from abc import ABC, abstractmethod
9
+ from lyrics_transcriber.types import LyricsData
10
+
11
+
12
+ @dataclass
13
+ class LyricsProviderConfig:
14
+ """Configuration for lyrics providers."""
15
+
16
+ genius_api_token: Optional[str] = None
17
+ spotify_cookie: Optional[str] = None
18
+ cache_dir: Optional[str] = None
19
+ audio_filepath: Optional[str] = None
20
+
21
+
22
+ class BaseLyricsProvider(ABC):
23
+ """Base class for lyrics providers."""
24
+
25
+ def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
26
+ self.logger = logger or logging.getLogger(__name__)
27
+ self.cache_dir = Path(config.cache_dir) if config.cache_dir else None
28
+ self.audio_filepath = config.audio_filepath
29
+ if self.cache_dir:
30
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
31
+ self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
32
+
33
+ def fetch_lyrics(self, artist: str, title: str) -> Optional[LyricsData]:
34
+ """Fetch lyrics for a given artist and title, using cache if available."""
35
+ if not self.cache_dir:
36
+ return self._fetch_and_convert_result(artist, title)
37
+
38
+ file_hash = self._get_file_hash(self.audio_filepath)
39
+ raw_cache_path = self._get_cache_path(file_hash, "raw")
40
+
41
+ # Try to load from cache first
42
+ raw_data = self._load_from_cache(raw_cache_path)
43
+ if raw_data is not None:
44
+ self.logger.info(f"Using cached lyrics for {artist} - {title}")
45
+ return self._save_and_convert_result(file_hash, raw_data)
46
+
47
+ # If not in cache, fetch from source
48
+ raw_result = self._fetch_data_from_source(artist, title)
49
+ if raw_result:
50
+ # Save raw API response
51
+ self._save_to_cache(raw_cache_path, raw_result)
52
+ return self._save_and_convert_result(file_hash, raw_result)
53
+
54
+ return None
55
+
56
+ def _get_file_hash(self, filepath: str) -> str:
57
+ """Calculate MD5 hash of a file."""
58
+ self.logger.debug(f"Calculating hash for file: {filepath}")
59
+ md5_hash = hashlib.md5()
60
+ with open(filepath, "rb") as f:
61
+ for chunk in iter(lambda: f.read(4096), b""):
62
+ md5_hash.update(chunk)
63
+ hash_result = md5_hash.hexdigest()
64
+ self.logger.debug(f"File hash: {hash_result}")
65
+ return hash_result
66
+
67
+ def _get_artist_title_hash(self, artist: str, title: str) -> str:
68
+ """Calculate MD5 hash of the artist and title."""
69
+ combined = f"{artist.lower()}_{title.lower()}"
70
+ return hashlib.md5(combined.encode()).hexdigest()
71
+
72
+ def _get_cache_path(self, cache_key: str, suffix: str) -> str:
73
+ """Get the cache file path for a given cache key and suffix."""
74
+ return os.path.join(self.cache_dir, f"{self.get_name().lower()}_{cache_key}_{suffix}.json")
75
+
76
+ def _save_to_cache(self, cache_path: str, data: Dict[str, Any]) -> None:
77
+ """Save data to cache."""
78
+ self.logger.debug(f"Saving lyrics to cache: {cache_path}")
79
+ with open(cache_path, "w", encoding="utf-8") as f:
80
+ json.dump(data, f, indent=2, ensure_ascii=False)
81
+ self.logger.debug("Cache save completed")
82
+
83
+ def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
84
+ """Load data from cache if it exists."""
85
+ self.logger.debug(f"Attempting to load from cache: {cache_path}")
86
+ try:
87
+ with open(cache_path, "r", encoding="utf-8") as f:
88
+ data = json.load(f)
89
+ self.logger.debug("Lyrics loaded from cache")
90
+ return data
91
+ except FileNotFoundError:
92
+ self.logger.debug("Cache file not found")
93
+ return None
94
+ except json.JSONDecodeError:
95
+ self.logger.warning(f"Cache file {cache_path} is corrupted")
96
+ return None
97
+
98
+ def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
99
+ """Convert raw result to standardized format, save to cache, and return."""
100
+ converted_cache_path = self._get_cache_path(cache_key, "converted")
101
+ converted_result = self._convert_result_format(raw_data)
102
+ # Convert to dictionary before saving to cache
103
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
104
+ return converted_result
105
+
106
+ def _fetch_and_convert_result(self, artist: str, title: str) -> Optional[LyricsData]:
107
+ """Fetch and convert result when caching is disabled."""
108
+ raw_result = self._fetch_data_from_source(artist, title)
109
+ if raw_result:
110
+ return self._convert_result_format(raw_result)
111
+ return None
112
+
113
+ @abstractmethod
114
+ def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
115
+ """Fetch raw data from the source (implemented by subclasses)."""
116
+ raise NotImplementedError("Subclasses must implement _fetch_data_from_source") # pragma: no cover
117
+
118
+ @abstractmethod
119
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
120
+ """Convert raw API response to standardized format (implemented by subclasses)."""
121
+ raise NotImplementedError("Subclasses must implement _convert_result_format") # pragma: no cover
122
+
123
+ def get_name(self) -> str:
124
+ """Return the name of this lyrics provider."""
125
+ return self.__class__.__name__.replace("Provider", "")
@@ -0,0 +1,73 @@
1
+ import logging
2
+ from typing import Optional, Dict, Any
3
+ import lyricsgenius
4
+ from lyrics_transcriber.types import LyricsData, LyricsMetadata
5
+ from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
6
+
7
+
8
+ class GeniusProvider(BaseLyricsProvider):
9
+ """Handles fetching lyrics from Genius."""
10
+
11
+ def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
12
+ super().__init__(config, logger)
13
+ self.api_token = config.genius_api_token
14
+ self.client = None
15
+ if self.api_token:
16
+ self.client = lyricsgenius.Genius(self.api_token)
17
+ self.client.verbose = False
18
+ self.client.remove_section_headers = True
19
+
20
+ def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
21
+ """Fetch raw song data from Genius API."""
22
+ if not self.client:
23
+ self.logger.warning("No Genius API token provided")
24
+ return None
25
+
26
+ self.logger.info(f"Searching Genius for {artist} - {title}")
27
+ try:
28
+ song = self.client.search_song(title, artist)
29
+ if song:
30
+ self.logger.info("Found lyrics on Genius")
31
+ return song.to_dict()
32
+ except Exception as e:
33
+ self.logger.error(f"Error fetching from Genius: {str(e)}")
34
+ return None
35
+
36
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
37
+ """Convert Genius's raw API response to standardized format."""
38
+ # Extract release date components if available
39
+ release_date = None
40
+ if release_components := raw_data.get("release_date_components"):
41
+ year = release_components.get("year")
42
+ month = release_components.get("month")
43
+ day = release_components.get("day")
44
+ if all(x is not None for x in (year, month, day)):
45
+ release_date = f"{year}-{month:02d}-{day:02d}"
46
+
47
+ # Create metadata object
48
+ metadata = LyricsMetadata(
49
+ source="genius",
50
+ track_name=raw_data.get("title", ""),
51
+ artist_names=raw_data.get("artist_names", ""),
52
+ album_name=raw_data.get("album", {}).get("name"),
53
+ lyrics_provider="genius",
54
+ lyrics_provider_id=str(raw_data.get("id")),
55
+ is_synced=False, # Genius doesn't provide synced lyrics
56
+ provider_metadata={
57
+ "genius_id": raw_data.get("id"),
58
+ "release_date": release_date,
59
+ "page_url": raw_data.get("url"),
60
+ "annotation_count": raw_data.get("annotation_count"),
61
+ "lyrics_state": raw_data.get("lyrics_state"),
62
+ "lyrics_owner_id": raw_data.get("lyrics_owner_id"),
63
+ "pyongs_count": raw_data.get("pyongs_count"),
64
+ "verified_annotations": len(raw_data.get("verified_annotations_by", [])),
65
+ "verified_contributors": len(raw_data.get("verified_contributors", [])),
66
+ "external_urls": {"genius": raw_data.get("url")},
67
+ },
68
+ )
69
+
70
+ # Create result object
71
+ return LyricsData(
72
+ source="genius", lyrics=raw_data.get("lyrics", ""), segments=[], metadata=metadata
73
+ ) # Genius doesn't provide timestamp data
@@ -0,0 +1,82 @@
1
+ import logging
2
+ from typing import Optional, Dict, Any
3
+ import syrics.api
4
+
5
+ from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
6
+ from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
7
+
8
+
9
+ class SpotifyProvider(BaseLyricsProvider):
10
+ """Handles fetching lyrics from Spotify."""
11
+
12
+ def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
13
+ super().__init__(config, logger)
14
+ self.cookie = config.spotify_cookie
15
+ self.client = syrics.api.Spotify(self.cookie) if self.cookie else None
16
+
17
+ def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
18
+ """Fetch raw data from Spotify APIs using syrics library."""
19
+ if not self.client:
20
+ self.logger.warning("No Spotify cookie provided")
21
+ return None
22
+
23
+ try:
24
+ # Search for track
25
+ search_query = f"{title} - {artist}"
26
+ search_results = self.client.search(search_query, type="track", limit=1)
27
+
28
+ track_data = search_results["tracks"]["items"][0]
29
+ self.logger.debug(
30
+ f"Found track: {track_data['artists'][0]['name']} - {track_data['name']} " f"({track_data['external_urls']['spotify']})"
31
+ )
32
+
33
+ # Get lyrics data
34
+ lyrics_data = self.client.get_lyrics(track_data["id"])
35
+ if not lyrics_data:
36
+ return None
37
+
38
+ return {"track_data": track_data, "lyrics_data": lyrics_data}
39
+ except Exception as e:
40
+ self.logger.error(f"Error fetching from Spotify: {str(e)}")
41
+ return None
42
+
43
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
44
+ """Convert Spotify's raw API response to standardized format."""
45
+ track_data = raw_data["track_data"]
46
+ lyrics_data = raw_data["lyrics_data"]["lyrics"]
47
+
48
+ # Convert raw lines to LyricsSegment objects
49
+ segments = []
50
+ for line in lyrics_data.get("lines", []):
51
+ if not line.get("words"):
52
+ continue
53
+
54
+ segment = LyricsSegment(
55
+ text=line["words"],
56
+ words=[], # TODO: Could potentially split words if needed
57
+ start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
58
+ end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
59
+ )
60
+ segments.append(segment)
61
+
62
+ # Create metadata object
63
+ metadata = LyricsMetadata(
64
+ source="spotify",
65
+ track_name=track_data.get("name"),
66
+ artist_names=", ".join(artist.get("name", "") for artist in track_data.get("artists", [])),
67
+ album_name=track_data.get("album", {}).get("name"),
68
+ duration_ms=track_data.get("duration_ms"),
69
+ explicit=track_data.get("explicit"),
70
+ language=lyrics_data.get("language"),
71
+ is_synced=lyrics_data.get("syncType") == "LINE_SYNCED",
72
+ lyrics_provider=lyrics_data.get("provider"),
73
+ lyrics_provider_id=lyrics_data.get("providerLyricsId"),
74
+ provider_metadata={
75
+ "spotify_id": track_data.get("id"),
76
+ "preview_url": track_data.get("preview_url"),
77
+ "external_urls": track_data.get("external_urls"),
78
+ "sync_type": lyrics_data.get("syncType"),
79
+ },
80
+ )
81
+
82
+ return LyricsData(source="spotify", lyrics="\n".join(segment.text for segment in segments), segments=segments, metadata=metadata)
@@ -0,0 +1,21 @@
1
+ from lyrics_transcriber.output.ass.lyrics_screen import LyricsScreen
2
+ from lyrics_transcriber.output.ass.lyrics_line import LyricsLine
3
+ from lyrics_transcriber.output.ass.section_screen import SectionScreen
4
+ from lyrics_transcriber.output.ass.style import Style
5
+ from lyrics_transcriber.output.ass.event import Event
6
+ from lyrics_transcriber.output.ass.config import (
7
+ ScreenConfig,
8
+ LineTimingInfo,
9
+ LineState,
10
+ )
11
+
12
+ __all__ = [
13
+ 'LyricsScreen',
14
+ 'LyricsLine',
15
+ 'SectionScreen',
16
+ 'Style',
17
+ 'Event',
18
+ 'ScreenConfig',
19
+ 'LineTimingInfo',
20
+ 'LineState',
21
+ ]