lyrics-transcriber 0.33.0__tar.gz → 0.34.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/PKG-INFO +1 -1
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/core/controller.py +10 -6
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/corrector.py +9 -1
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/lyrics/base_lyrics_provider.py +28 -7
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/lyrics/genius.py +33 -6
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/generator.py +32 -30
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/pyproject.toml +1 -1
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/LICENSE +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/README.md +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/cli/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/cli/cli_main.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/core/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/core/config.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/anchor_sequence.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/base.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/extend_anchor.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/levenshtein.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/no_space_punct_match.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/repeat.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/sound_alike.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/syllables_match.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/word_count_match.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/word_operations.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/phrase_analyzer.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/text_utils.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/lyrics/spotify.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/ass.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/ass_specs.txt +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/config.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/constants.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/event.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/formatters.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/lyrics_line.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/lyrics_screen.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/section_detector.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/section_screen.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/style.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdg.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/cdg.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/composer.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/config.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/pack.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/render.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/utils.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/arial.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/lyrics_file.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/plain_text.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/segment_resizer.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/subtitles.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/video.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/review/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/review/server.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/storage/__init__.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/storage/dropbox.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/transcribers/audioshake.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/transcribers/base_transcriber.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/transcribers/whisper.py +0 -0
- {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/types.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: lyrics-transcriber
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.34.0
|
4
4
|
Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
|
5
5
|
License: MIT
|
6
6
|
Author: Andrew Beveridge
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/core/controller.py
RENAMED
@@ -1,10 +1,9 @@
|
|
1
1
|
import difflib
|
2
|
-
import json
|
3
2
|
import os
|
4
3
|
import logging
|
5
4
|
from dataclasses import dataclass, field
|
6
5
|
from typing import Dict, Optional, List
|
7
|
-
from lyrics_transcriber.types import LyricsData,
|
6
|
+
from lyrics_transcriber.types import LyricsData, TranscriptionResult, CorrectionResult
|
8
7
|
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
|
9
8
|
from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
|
10
9
|
from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
|
@@ -205,12 +204,17 @@ class LyricsTranscriber:
|
|
205
204
|
if self.output_config.run_transcription:
|
206
205
|
self.transcribe()
|
207
206
|
|
208
|
-
# Step 3: Process and correct lyrics if enabled
|
209
|
-
if self.output_config.run_correction:
|
207
|
+
# Step 3: Process and correct lyrics if enabled AND we have transcription results
|
208
|
+
if self.output_config.run_correction and self.results.transcription_results:
|
210
209
|
self.correct_lyrics()
|
210
|
+
elif self.output_config.run_correction:
|
211
|
+
self.logger.info("Skipping lyrics correction - no transcription results available")
|
211
212
|
|
212
|
-
# Step 4: Generate outputs based on what
|
213
|
-
self.
|
213
|
+
# Step 4: Generate outputs based on what we have
|
214
|
+
if self.results.transcription_corrected or self.results.lyrics_results:
|
215
|
+
self.generate_outputs()
|
216
|
+
else:
|
217
|
+
self.logger.warning("No corrected transcription or lyrics available. Skipping output generation.")
|
214
218
|
|
215
219
|
self.logger.info("Processing completed successfully")
|
216
220
|
return self.results
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/corrector.py
RENAMED
@@ -28,7 +28,8 @@ class LyricsCorrector:
|
|
28
28
|
logger: Optional[logging.Logger] = None,
|
29
29
|
):
|
30
30
|
self.logger = logger or logging.getLogger(__name__)
|
31
|
-
self.
|
31
|
+
self._anchor_finder = anchor_finder
|
32
|
+
self._cache_dir = cache_dir
|
32
33
|
|
33
34
|
# Default handlers in order of preference
|
34
35
|
self.handlers = handlers or [
|
@@ -42,6 +43,13 @@ class LyricsCorrector:
|
|
42
43
|
LevenshteinHandler(),
|
43
44
|
]
|
44
45
|
|
46
|
+
@property
|
47
|
+
def anchor_finder(self) -> AnchorSequenceFinder:
|
48
|
+
"""Lazy load the anchor finder instance, initializing it if not already set."""
|
49
|
+
if self._anchor_finder is None:
|
50
|
+
self._anchor_finder = AnchorSequenceFinder(cache_dir=self._cache_dir, logger=self.logger)
|
51
|
+
return self._anchor_finder
|
52
|
+
|
45
53
|
def run(self, transcription_results: List[TranscriptionResult], lyrics_results: List[LyricsData]) -> CorrectionResult:
|
46
54
|
"""Execute the correction process."""
|
47
55
|
if not transcription_results:
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
7
7
|
import os
|
8
8
|
from abc import ABC, abstractmethod
|
9
9
|
from lyrics_transcriber.types import LyricsData
|
10
|
+
from karaoke_lyrics_processor import KaraokeLyricsProcessor
|
10
11
|
|
11
12
|
|
12
13
|
@dataclass
|
@@ -17,6 +18,7 @@ class LyricsProviderConfig:
|
|
17
18
|
spotify_cookie: Optional[str] = None
|
18
19
|
cache_dir: Optional[str] = None
|
19
20
|
audio_filepath: Optional[str] = None
|
21
|
+
max_line_length: int = 36 # New config parameter for KaraokeLyricsProcessor
|
20
22
|
|
21
23
|
|
22
24
|
class BaseLyricsProvider(ABC):
|
@@ -26,6 +28,7 @@ class BaseLyricsProvider(ABC):
|
|
26
28
|
self.logger = logger or logging.getLogger(__name__)
|
27
29
|
self.cache_dir = Path(config.cache_dir) if config.cache_dir else None
|
28
30
|
self.audio_filepath = config.audio_filepath
|
31
|
+
self.max_line_length = config.max_line_length
|
29
32
|
if self.cache_dir:
|
30
33
|
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
31
34
|
self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
|
@@ -35,21 +38,22 @@ class BaseLyricsProvider(ABC):
|
|
35
38
|
if not self.cache_dir:
|
36
39
|
return self._fetch_and_convert_result(artist, title)
|
37
40
|
|
38
|
-
|
39
|
-
|
41
|
+
# Use artist and title for cache key instead of audio file hash
|
42
|
+
cache_key = self._get_artist_title_hash(artist, title)
|
43
|
+
raw_cache_path = self._get_cache_path(cache_key, "raw")
|
40
44
|
|
41
45
|
# Try to load from cache first
|
42
46
|
raw_data = self._load_from_cache(raw_cache_path)
|
43
47
|
if raw_data is not None:
|
44
48
|
self.logger.info(f"Using cached lyrics for {artist} - {title}")
|
45
|
-
return self._save_and_convert_result(
|
49
|
+
return self._save_and_convert_result(cache_key, raw_data)
|
46
50
|
|
47
51
|
# If not in cache, fetch from source
|
48
52
|
raw_result = self._fetch_data_from_source(artist, title)
|
49
53
|
if raw_result:
|
50
54
|
# Save raw API response
|
51
55
|
self._save_to_cache(raw_cache_path, raw_result)
|
52
|
-
return self._save_and_convert_result(
|
56
|
+
return self._save_and_convert_result(cache_key, raw_result)
|
53
57
|
|
54
58
|
return None
|
55
59
|
|
@@ -95,13 +99,30 @@ class BaseLyricsProvider(ABC):
|
|
95
99
|
self.logger.warning(f"Cache file {cache_path} is corrupted")
|
96
100
|
return None
|
97
101
|
|
102
|
+
def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
|
103
|
+
"""Process lyrics using KaraokeLyricsProcessor."""
|
104
|
+
processor = KaraokeLyricsProcessor(
|
105
|
+
log_level=self.logger.getEffectiveLevel(),
|
106
|
+
log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
|
107
|
+
input_lyrics_text=lyrics_data.lyrics,
|
108
|
+
max_line_length=self.max_line_length,
|
109
|
+
)
|
110
|
+
processed_text = processor.process()
|
111
|
+
|
112
|
+
# Create new LyricsData with processed text
|
113
|
+
return LyricsData(source=lyrics_data.source, lyrics=processed_text, segments=lyrics_data.segments, metadata=lyrics_data.metadata)
|
114
|
+
|
98
115
|
def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
|
99
|
-
"""Convert raw result to standardized format, save to cache, and return."""
|
116
|
+
"""Convert raw result to standardized format, process lyrics, save to cache, and return."""
|
100
117
|
converted_cache_path = self._get_cache_path(cache_key, "converted")
|
101
118
|
converted_result = self._convert_result_format(raw_data)
|
119
|
+
|
120
|
+
# Process the lyrics
|
121
|
+
processed_result = self._process_lyrics(converted_result)
|
122
|
+
|
102
123
|
# Convert to dictionary before saving to cache
|
103
|
-
self._save_to_cache(converted_cache_path,
|
104
|
-
return
|
124
|
+
self._save_to_cache(converted_cache_path, processed_result.to_dict())
|
125
|
+
return processed_result
|
105
126
|
|
106
127
|
def _fetch_and_convert_result(self, artist: str, title: str) -> Optional[LyricsData]:
|
107
128
|
"""Fetch and convert result when caching is disabled."""
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import logging
|
2
|
+
import re
|
2
3
|
from typing import Optional, Dict, Any
|
3
4
|
import lyricsgenius
|
4
5
|
from lyrics_transcriber.types import LyricsData, LyricsMetadata
|
@@ -13,9 +14,15 @@ class GeniusProvider(BaseLyricsProvider):
|
|
13
14
|
self.api_token = config.genius_api_token
|
14
15
|
self.client = None
|
15
16
|
if self.api_token:
|
16
|
-
self.client = lyricsgenius.Genius(
|
17
|
-
|
18
|
-
|
17
|
+
self.client = lyricsgenius.Genius(
|
18
|
+
self.api_token,
|
19
|
+
verbose=(logger.getEffectiveLevel() == logging.DEBUG if logger else False),
|
20
|
+
remove_section_headers=True, # Remove [Chorus], [Verse], etc.
|
21
|
+
skip_non_songs=True, # Skip track listings and other non-song results
|
22
|
+
timeout=10, # Reasonable timeout for requests
|
23
|
+
retries=3, # Number of retries for failed requests
|
24
|
+
sleep_time=1, # Small delay between requests to be nice to the API
|
25
|
+
)
|
19
26
|
|
20
27
|
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
21
28
|
"""Fetch raw song data from Genius API."""
|
@@ -35,6 +42,9 @@ class GeniusProvider(BaseLyricsProvider):
|
|
35
42
|
|
36
43
|
def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
|
37
44
|
"""Convert Genius's raw API response to standardized format."""
|
45
|
+
# Clean the lyrics before processing
|
46
|
+
lyrics = self._clean_lyrics(raw_data.get("lyrics", ""))
|
47
|
+
|
38
48
|
# Extract release date components if available
|
39
49
|
release_date = None
|
40
50
|
if release_components := raw_data.get("release_date_components"):
|
@@ -68,6 +78,23 @@ class GeniusProvider(BaseLyricsProvider):
|
|
68
78
|
)
|
69
79
|
|
70
80
|
# Create result object
|
71
|
-
return LyricsData(
|
72
|
-
|
73
|
-
|
81
|
+
return LyricsData(source="genius", lyrics=lyrics, segments=[], metadata=metadata)
|
82
|
+
|
83
|
+
def _clean_lyrics(self, lyrics: str) -> str:
|
84
|
+
"""Clean and process lyrics from Genius to remove unwanted content."""
|
85
|
+
|
86
|
+
lyrics = lyrics.replace("\\n", "\n")
|
87
|
+
lyrics = re.sub(r"You might also like", "", lyrics)
|
88
|
+
lyrics = re.sub(
|
89
|
+
r".*?Lyrics([A-Z])", r"\1", lyrics
|
90
|
+
) # Remove the song name and word "Lyrics" if this has a non-newline char at the start
|
91
|
+
lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics) # Remove this example: 27 ContributorsSex Bomb Lyrics
|
92
|
+
lyrics = re.sub(
|
93
|
+
r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
|
94
|
+
) # Remove this example: See Tom Jones LiveGet tickets as low as $71
|
95
|
+
lyrics = re.sub(r"[0-9]+Embed$", "", lyrics) # Remove the word "Embed" at end of line with preceding numbers if found
|
96
|
+
lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
|
97
|
+
lyrics = re.sub(r"^Embed$", r"", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
|
98
|
+
lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics) # Remove lines containing square brackets
|
99
|
+
# add any additional cleaning rules here
|
100
|
+
return lyrics
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/generator.py
RENAMED
@@ -95,7 +95,7 @@ class OutputGenerator:
|
|
95
95
|
|
96
96
|
def generate_outputs(
|
97
97
|
self,
|
98
|
-
transcription_corrected: CorrectionResult,
|
98
|
+
transcription_corrected: Optional[CorrectionResult],
|
99
99
|
lyrics_results: List[LyricsData],
|
100
100
|
output_prefix: str,
|
101
101
|
audio_filepath: str,
|
@@ -110,35 +110,37 @@ class OutputGenerator:
|
|
110
110
|
for lyrics_data in lyrics_results:
|
111
111
|
self.plain_text.write_lyrics(lyrics_data, output_prefix)
|
112
112
|
|
113
|
-
#
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
# Generate
|
140
|
-
|
141
|
-
|
113
|
+
# Only process transcription-related outputs if we have transcription data
|
114
|
+
if transcription_corrected:
|
115
|
+
# Write original (uncorrected) transcription
|
116
|
+
outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
|
117
|
+
|
118
|
+
# Resize corrected segments to ensure none are longer than max_line_length
|
119
|
+
resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
|
120
|
+
transcription_corrected.resized_segments = resized_segments
|
121
|
+
outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
|
122
|
+
|
123
|
+
# Write corrected lyrics as plain text
|
124
|
+
outputs.corrected_txt = self.plain_text.write_corrected_lyrics(resized_segments, output_prefix)
|
125
|
+
|
126
|
+
# Generate LRC using LyricsFileGenerator
|
127
|
+
outputs.lrc = self.lyrics_file.generate_lrc(resized_segments, output_prefix)
|
128
|
+
|
129
|
+
# Generate CDG file if requested
|
130
|
+
if self.config.generate_cdg:
|
131
|
+
outputs.cdg, outputs.mp3, outputs.cdg_zip = self.cdg.generate_cdg(
|
132
|
+
segments=resized_segments,
|
133
|
+
audio_file=audio_filepath,
|
134
|
+
title=title or output_prefix,
|
135
|
+
artist=artist or "",
|
136
|
+
cdg_styles=self.config.styles["cdg"],
|
137
|
+
)
|
138
|
+
|
139
|
+
# Generate video if requested
|
140
|
+
if self.config.render_video:
|
141
|
+
# Generate ASS subtitles
|
142
|
+
outputs.ass = self.subtitle.generate_ass(resized_segments, output_prefix, audio_filepath)
|
143
|
+
outputs.video = self.video.generate_video(outputs.ass, audio_filepath, output_prefix)
|
142
144
|
|
143
145
|
return outputs
|
144
146
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "lyrics-transcriber"
|
3
|
-
version = "0.
|
3
|
+
version = "0.34.0"
|
4
4
|
description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
|
5
5
|
authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
|
6
6
|
license = "MIT"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/text_utils.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/lyrics/spotify.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/__init__.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/__init__.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/ass.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/ass_specs.txt
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/config.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/constants.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/event.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/formatters.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/lyrics_line.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/style.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/cdg.py
RENAMED
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/config.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/pack.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/render.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/utils.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/arial.ttf
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/georgia.ttf
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/verdana.ttf
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/lyrics_file.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/plain_text.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/segment_resizer.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/subtitles.py
RENAMED
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/review/__init__.py
RENAMED
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/storage/__init__.py
RENAMED
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/storage/dropbox.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/transcribers/whisper.py
RENAMED
File without changes
|
File without changes
|