lyrics-transcriber 0.30.0__py3-none-any.whl → 0.32.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/__init__.py +2 -1
- lyrics_transcriber/cli/{main.py → cli_main.py} +47 -14
- lyrics_transcriber/core/config.py +35 -0
- lyrics_transcriber/core/controller.py +164 -166
- lyrics_transcriber/correction/anchor_sequence.py +471 -0
- lyrics_transcriber/correction/corrector.py +256 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +30 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
- lyrics_transcriber/correction/handlers/repeat.py +71 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
- lyrics_transcriber/correction/handlers/word_operations.py +135 -0
- lyrics_transcriber/correction/phrase_analyzer.py +426 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +125 -0
- lyrics_transcriber/lyrics/genius.py +73 -0
- lyrics_transcriber/lyrics/spotify.py +82 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +37 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +219 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +503 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +140 -171
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +91 -0
- lyrics_transcriber/output/segment_resizer.py +416 -0
- lyrics_transcriber/output/subtitles.py +328 -302
- lyrics_transcriber/output/video.py +219 -0
- lyrics_transcriber/review/__init__.py +1 -0
- lyrics_transcriber/review/server.py +138 -0
- lyrics_transcriber/storage/dropbox.py +110 -134
- lyrics_transcriber/transcribers/audioshake.py +171 -105
- lyrics_transcriber/transcribers/base_transcriber.py +149 -0
- lyrics_transcriber/transcribers/whisper.py +267 -133
- lyrics_transcriber/types.py +454 -0
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/METADATA +14 -3
- lyrics_transcriber-0.32.1.dist-info/RECORD +86 -0
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/WHEEL +1 -1
- lyrics_transcriber-0.32.1.dist-info/entry_points.txt +4 -0
- lyrics_transcriber/core/corrector.py +0 -56
- lyrics_transcriber/core/fetcher.py +0 -143
- lyrics_transcriber/storage/tokens.py +0 -116
- lyrics_transcriber/transcribers/base.py +0 -31
- lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
- lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/LICENSE +0 -0
@@ -0,0 +1,125 @@
|
|
1
|
+
from dataclasses import dataclass
|
2
|
+
import logging
|
3
|
+
from typing import Optional, Dict, Any
|
4
|
+
import json
|
5
|
+
import hashlib
|
6
|
+
from pathlib import Path
|
7
|
+
import os
|
8
|
+
from abc import ABC, abstractmethod
|
9
|
+
from lyrics_transcriber.types import LyricsData
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class LyricsProviderConfig:
|
14
|
+
"""Configuration for lyrics providers."""
|
15
|
+
|
16
|
+
genius_api_token: Optional[str] = None
|
17
|
+
spotify_cookie: Optional[str] = None
|
18
|
+
cache_dir: Optional[str] = None
|
19
|
+
audio_filepath: Optional[str] = None
|
20
|
+
|
21
|
+
|
22
|
+
class BaseLyricsProvider(ABC):
|
23
|
+
"""Base class for lyrics providers."""
|
24
|
+
|
25
|
+
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
26
|
+
self.logger = logger or logging.getLogger(__name__)
|
27
|
+
self.cache_dir = Path(config.cache_dir) if config.cache_dir else None
|
28
|
+
self.audio_filepath = config.audio_filepath
|
29
|
+
if self.cache_dir:
|
30
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
31
|
+
self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
|
32
|
+
|
33
|
+
def fetch_lyrics(self, artist: str, title: str) -> Optional[LyricsData]:
|
34
|
+
"""Fetch lyrics for a given artist and title, using cache if available."""
|
35
|
+
if not self.cache_dir:
|
36
|
+
return self._fetch_and_convert_result(artist, title)
|
37
|
+
|
38
|
+
file_hash = self._get_file_hash(self.audio_filepath)
|
39
|
+
raw_cache_path = self._get_cache_path(file_hash, "raw")
|
40
|
+
|
41
|
+
# Try to load from cache first
|
42
|
+
raw_data = self._load_from_cache(raw_cache_path)
|
43
|
+
if raw_data is not None:
|
44
|
+
self.logger.info(f"Using cached lyrics for {artist} - {title}")
|
45
|
+
return self._save_and_convert_result(file_hash, raw_data)
|
46
|
+
|
47
|
+
# If not in cache, fetch from source
|
48
|
+
raw_result = self._fetch_data_from_source(artist, title)
|
49
|
+
if raw_result:
|
50
|
+
# Save raw API response
|
51
|
+
self._save_to_cache(raw_cache_path, raw_result)
|
52
|
+
return self._save_and_convert_result(file_hash, raw_result)
|
53
|
+
|
54
|
+
return None
|
55
|
+
|
56
|
+
def _get_file_hash(self, filepath: str) -> str:
|
57
|
+
"""Calculate MD5 hash of a file."""
|
58
|
+
self.logger.debug(f"Calculating hash for file: {filepath}")
|
59
|
+
md5_hash = hashlib.md5()
|
60
|
+
with open(filepath, "rb") as f:
|
61
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
62
|
+
md5_hash.update(chunk)
|
63
|
+
hash_result = md5_hash.hexdigest()
|
64
|
+
self.logger.debug(f"File hash: {hash_result}")
|
65
|
+
return hash_result
|
66
|
+
|
67
|
+
def _get_artist_title_hash(self, artist: str, title: str) -> str:
|
68
|
+
"""Calculate MD5 hash of the artist and title."""
|
69
|
+
combined = f"{artist.lower()}_{title.lower()}"
|
70
|
+
return hashlib.md5(combined.encode()).hexdigest()
|
71
|
+
|
72
|
+
def _get_cache_path(self, cache_key: str, suffix: str) -> str:
|
73
|
+
"""Get the cache file path for a given cache key and suffix."""
|
74
|
+
return os.path.join(self.cache_dir, f"{self.get_name().lower()}_{cache_key}_{suffix}.json")
|
75
|
+
|
76
|
+
def _save_to_cache(self, cache_path: str, data: Dict[str, Any]) -> None:
|
77
|
+
"""Save data to cache."""
|
78
|
+
self.logger.debug(f"Saving lyrics to cache: {cache_path}")
|
79
|
+
with open(cache_path, "w", encoding="utf-8") as f:
|
80
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
81
|
+
self.logger.debug("Cache save completed")
|
82
|
+
|
83
|
+
def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
|
84
|
+
"""Load data from cache if it exists."""
|
85
|
+
self.logger.debug(f"Attempting to load from cache: {cache_path}")
|
86
|
+
try:
|
87
|
+
with open(cache_path, "r", encoding="utf-8") as f:
|
88
|
+
data = json.load(f)
|
89
|
+
self.logger.debug("Lyrics loaded from cache")
|
90
|
+
return data
|
91
|
+
except FileNotFoundError:
|
92
|
+
self.logger.debug("Cache file not found")
|
93
|
+
return None
|
94
|
+
except json.JSONDecodeError:
|
95
|
+
self.logger.warning(f"Cache file {cache_path} is corrupted")
|
96
|
+
return None
|
97
|
+
|
98
|
+
def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
|
99
|
+
"""Convert raw result to standardized format, save to cache, and return."""
|
100
|
+
converted_cache_path = self._get_cache_path(cache_key, "converted")
|
101
|
+
converted_result = self._convert_result_format(raw_data)
|
102
|
+
# Convert to dictionary before saving to cache
|
103
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
104
|
+
return converted_result
|
105
|
+
|
106
|
+
def _fetch_and_convert_result(self, artist: str, title: str) -> Optional[LyricsData]:
|
107
|
+
"""Fetch and convert result when caching is disabled."""
|
108
|
+
raw_result = self._fetch_data_from_source(artist, title)
|
109
|
+
if raw_result:
|
110
|
+
return self._convert_result_format(raw_result)
|
111
|
+
return None
|
112
|
+
|
113
|
+
@abstractmethod
|
114
|
+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
115
|
+
"""Fetch raw data from the source (implemented by subclasses)."""
|
116
|
+
raise NotImplementedError("Subclasses must implement _fetch_data_from_source") # pragma: no cover
|
117
|
+
|
118
|
+
@abstractmethod
|
119
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
120
|
+
"""Convert raw API response to standardized format (implemented by subclasses)."""
|
121
|
+
raise NotImplementedError("Subclasses must implement _convert_result_format") # pragma: no cover
|
122
|
+
|
123
|
+
def get_name(self) -> str:
|
124
|
+
"""Return the name of this lyrics provider."""
|
125
|
+
return self.__class__.__name__.replace("Provider", "")
|
@@ -0,0 +1,73 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Optional, Dict, Any
|
3
|
+
import lyricsgenius
|
4
|
+
from lyrics_transcriber.types import LyricsData, LyricsMetadata
|
5
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
6
|
+
|
7
|
+
|
8
|
+
class GeniusProvider(BaseLyricsProvider):
|
9
|
+
"""Handles fetching lyrics from Genius."""
|
10
|
+
|
11
|
+
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
12
|
+
super().__init__(config, logger)
|
13
|
+
self.api_token = config.genius_api_token
|
14
|
+
self.client = None
|
15
|
+
if self.api_token:
|
16
|
+
self.client = lyricsgenius.Genius(self.api_token)
|
17
|
+
self.client.verbose = False
|
18
|
+
self.client.remove_section_headers = True
|
19
|
+
|
20
|
+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
21
|
+
"""Fetch raw song data from Genius API."""
|
22
|
+
if not self.client:
|
23
|
+
self.logger.warning("No Genius API token provided")
|
24
|
+
return None
|
25
|
+
|
26
|
+
self.logger.info(f"Searching Genius for {artist} - {title}")
|
27
|
+
try:
|
28
|
+
song = self.client.search_song(title, artist)
|
29
|
+
if song:
|
30
|
+
self.logger.info("Found lyrics on Genius")
|
31
|
+
return song.to_dict()
|
32
|
+
except Exception as e:
|
33
|
+
self.logger.error(f"Error fetching from Genius: {str(e)}")
|
34
|
+
return None
|
35
|
+
|
36
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
37
|
+
"""Convert Genius's raw API response to standardized format."""
|
38
|
+
# Extract release date components if available
|
39
|
+
release_date = None
|
40
|
+
if release_components := raw_data.get("release_date_components"):
|
41
|
+
year = release_components.get("year")
|
42
|
+
month = release_components.get("month")
|
43
|
+
day = release_components.get("day")
|
44
|
+
if all(x is not None for x in (year, month, day)):
|
45
|
+
release_date = f"{year}-{month:02d}-{day:02d}"
|
46
|
+
|
47
|
+
# Create metadata object
|
48
|
+
metadata = LyricsMetadata(
|
49
|
+
source="genius",
|
50
|
+
track_name=raw_data.get("title", ""),
|
51
|
+
artist_names=raw_data.get("artist_names", ""),
|
52
|
+
album_name=raw_data.get("album", {}).get("name"),
|
53
|
+
lyrics_provider="genius",
|
54
|
+
lyrics_provider_id=str(raw_data.get("id")),
|
55
|
+
is_synced=False, # Genius doesn't provide synced lyrics
|
56
|
+
provider_metadata={
|
57
|
+
"genius_id": raw_data.get("id"),
|
58
|
+
"release_date": release_date,
|
59
|
+
"page_url": raw_data.get("url"),
|
60
|
+
"annotation_count": raw_data.get("annotation_count"),
|
61
|
+
"lyrics_state": raw_data.get("lyrics_state"),
|
62
|
+
"lyrics_owner_id": raw_data.get("lyrics_owner_id"),
|
63
|
+
"pyongs_count": raw_data.get("pyongs_count"),
|
64
|
+
"verified_annotations": len(raw_data.get("verified_annotations_by", [])),
|
65
|
+
"verified_contributors": len(raw_data.get("verified_contributors", [])),
|
66
|
+
"external_urls": {"genius": raw_data.get("url")},
|
67
|
+
},
|
68
|
+
)
|
69
|
+
|
70
|
+
# Create result object
|
71
|
+
return LyricsData(
|
72
|
+
source="genius", lyrics=raw_data.get("lyrics", ""), segments=[], metadata=metadata
|
73
|
+
) # Genius doesn't provide timestamp data
|
@@ -0,0 +1,82 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Optional, Dict, Any
|
3
|
+
import syrics.api
|
4
|
+
|
5
|
+
from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
|
6
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
7
|
+
|
8
|
+
|
9
|
+
class SpotifyProvider(BaseLyricsProvider):
|
10
|
+
"""Handles fetching lyrics from Spotify."""
|
11
|
+
|
12
|
+
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
13
|
+
super().__init__(config, logger)
|
14
|
+
self.cookie = config.spotify_cookie
|
15
|
+
self.client = syrics.api.Spotify(self.cookie) if self.cookie else None
|
16
|
+
|
17
|
+
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
18
|
+
"""Fetch raw data from Spotify APIs using syrics library."""
|
19
|
+
if not self.client:
|
20
|
+
self.logger.warning("No Spotify cookie provided")
|
21
|
+
return None
|
22
|
+
|
23
|
+
try:
|
24
|
+
# Search for track
|
25
|
+
search_query = f"{title} - {artist}"
|
26
|
+
search_results = self.client.search(search_query, type="track", limit=1)
|
27
|
+
|
28
|
+
track_data = search_results["tracks"]["items"][0]
|
29
|
+
self.logger.debug(
|
30
|
+
f"Found track: {track_data['artists'][0]['name']} - {track_data['name']} " f"({track_data['external_urls']['spotify']})"
|
31
|
+
)
|
32
|
+
|
33
|
+
# Get lyrics data
|
34
|
+
lyrics_data = self.client.get_lyrics(track_data["id"])
|
35
|
+
if not lyrics_data:
|
36
|
+
return None
|
37
|
+
|
38
|
+
return {"track_data": track_data, "lyrics_data": lyrics_data}
|
39
|
+
except Exception as e:
|
40
|
+
self.logger.error(f"Error fetching from Spotify: {str(e)}")
|
41
|
+
return None
|
42
|
+
|
43
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
44
|
+
"""Convert Spotify's raw API response to standardized format."""
|
45
|
+
track_data = raw_data["track_data"]
|
46
|
+
lyrics_data = raw_data["lyrics_data"]["lyrics"]
|
47
|
+
|
48
|
+
# Convert raw lines to LyricsSegment objects
|
49
|
+
segments = []
|
50
|
+
for line in lyrics_data.get("lines", []):
|
51
|
+
if not line.get("words"):
|
52
|
+
continue
|
53
|
+
|
54
|
+
segment = LyricsSegment(
|
55
|
+
text=line["words"],
|
56
|
+
words=[], # TODO: Could potentially split words if needed
|
57
|
+
start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
|
58
|
+
end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
|
59
|
+
)
|
60
|
+
segments.append(segment)
|
61
|
+
|
62
|
+
# Create metadata object
|
63
|
+
metadata = LyricsMetadata(
|
64
|
+
source="spotify",
|
65
|
+
track_name=track_data.get("name"),
|
66
|
+
artist_names=", ".join(artist.get("name", "") for artist in track_data.get("artists", [])),
|
67
|
+
album_name=track_data.get("album", {}).get("name"),
|
68
|
+
duration_ms=track_data.get("duration_ms"),
|
69
|
+
explicit=track_data.get("explicit"),
|
70
|
+
language=lyrics_data.get("language"),
|
71
|
+
is_synced=lyrics_data.get("syncType") == "LINE_SYNCED",
|
72
|
+
lyrics_provider=lyrics_data.get("provider"),
|
73
|
+
lyrics_provider_id=lyrics_data.get("providerLyricsId"),
|
74
|
+
provider_metadata={
|
75
|
+
"spotify_id": track_data.get("id"),
|
76
|
+
"preview_url": track_data.get("preview_url"),
|
77
|
+
"external_urls": track_data.get("external_urls"),
|
78
|
+
"sync_type": lyrics_data.get("syncType"),
|
79
|
+
},
|
80
|
+
)
|
81
|
+
|
82
|
+
return LyricsData(source="spotify", lyrics="\n".join(segment.text for segment in segments), segments=segments, metadata=metadata)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
from lyrics_transcriber.output.ass.lyrics_screen import LyricsScreen
|
2
|
+
from lyrics_transcriber.output.ass.lyrics_line import LyricsLine
|
3
|
+
from lyrics_transcriber.output.ass.section_screen import SectionScreen
|
4
|
+
from lyrics_transcriber.output.ass.style import Style
|
5
|
+
from lyrics_transcriber.output.ass.event import Event
|
6
|
+
from lyrics_transcriber.output.ass.config import (
|
7
|
+
ScreenConfig,
|
8
|
+
LineTimingInfo,
|
9
|
+
LineState,
|
10
|
+
)
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
'LyricsScreen',
|
14
|
+
'LyricsLine',
|
15
|
+
'SectionScreen',
|
16
|
+
'Style',
|
17
|
+
'Event',
|
18
|
+
'ScreenConfig',
|
19
|
+
'LineTimingInfo',
|
20
|
+
'LineState',
|
21
|
+
]
|