lyrics-transcriber 0.40.0__py3-none-any.whl → 0.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/cli/cli_main.py +7 -0
- lyrics_transcriber/core/config.py +1 -0
- lyrics_transcriber/core/controller.py +30 -52
- lyrics_transcriber/correction/anchor_sequence.py +325 -150
- lyrics_transcriber/correction/corrector.py +224 -107
- lyrics_transcriber/correction/handlers/base.py +28 -10
- lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
- lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
- lyrics_transcriber/correction/handlers/llm.py +290 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
- lyrics_transcriber/correction/handlers/repeat.py +28 -11
- lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
- lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
- lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
- lyrics_transcriber/correction/handlers/word_operations.py +68 -22
- lyrics_transcriber/correction/text_utils.py +3 -7
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
- lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +6 -2
- lyrics_transcriber/frontend/src/App.tsx +18 -2
- lyrics_transcriber/frontend/src/api.ts +103 -6
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
- lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
- lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +70 -49
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
- lyrics_transcriber/lyrics/file_provider.py +6 -5
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +58 -21
- lyrics_transcriber/output/ass/config.py +16 -5
- lyrics_transcriber/output/cdg.py +8 -8
- lyrics_transcriber/output/generator.py +29 -14
- lyrics_transcriber/output/plain_text.py +15 -10
- lyrics_transcriber/output/segment_resizer.py +16 -3
- lyrics_transcriber/output/subtitles.py +56 -2
- lyrics_transcriber/output/video.py +107 -1
- lyrics_transcriber/review/__init__.py +0 -1
- lyrics_transcriber/review/server.py +337 -164
- lyrics_transcriber/transcribers/audioshake.py +3 -0
- lyrics_transcriber/transcribers/base_transcriber.py +11 -3
- lyrics_transcriber/transcribers/whisper.py +11 -1
- lyrics_transcriber/types.py +151 -105
- lyrics_transcriber/utils/word_utils.py +27 -0
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +76 -63
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
- lyrics_transcriber/frontend/package-lock.json +0 -4260
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.40.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,14 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
2
|
import logging
|
3
|
-
from typing import Optional, Dict, Any
|
3
|
+
from typing import Optional, Dict, Any, List
|
4
4
|
import json
|
5
5
|
import hashlib
|
6
6
|
from pathlib import Path
|
7
7
|
import os
|
8
8
|
from abc import ABC, abstractmethod
|
9
|
-
from lyrics_transcriber.types import LyricsData
|
9
|
+
from lyrics_transcriber.types import LyricsData, LyricsSegment, Word
|
10
10
|
from karaoke_lyrics_processor import KaraokeLyricsProcessor
|
11
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
11
12
|
|
12
13
|
|
13
14
|
@dataclass
|
@@ -41,20 +42,31 @@ class BaseLyricsProvider(ABC):
|
|
41
42
|
|
42
43
|
# Use artist and title for cache key instead of audio file hash
|
43
44
|
cache_key = self._get_artist_title_hash(artist, title)
|
44
|
-
raw_cache_path = self._get_cache_path(cache_key, "raw")
|
45
45
|
|
46
|
-
#
|
46
|
+
# Check converted cache first
|
47
|
+
converted_cache_path = self._get_cache_path(cache_key, "converted")
|
48
|
+
converted_data = self._load_from_cache(converted_cache_path)
|
49
|
+
if converted_data:
|
50
|
+
self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
|
51
|
+
return LyricsData.from_dict(converted_data)
|
52
|
+
|
53
|
+
# Check raw cache next
|
54
|
+
raw_cache_path = self._get_cache_path(cache_key, "raw")
|
47
55
|
raw_data = self._load_from_cache(raw_cache_path)
|
48
|
-
if raw_data
|
49
|
-
self.logger.info(f"Using cached lyrics for {artist} - {title}")
|
50
|
-
|
56
|
+
if raw_data:
|
57
|
+
self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
|
58
|
+
converted_result = self._convert_result_format(raw_data)
|
59
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
60
|
+
return converted_result
|
51
61
|
|
52
62
|
# If not in cache, fetch from source
|
53
63
|
raw_result = self._fetch_data_from_source(artist, title)
|
54
64
|
if raw_result:
|
55
65
|
# Save raw API response
|
56
66
|
self._save_to_cache(raw_cache_path, raw_result)
|
57
|
-
|
67
|
+
converted_result = self._convert_result_format(raw_result)
|
68
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
69
|
+
return converted_result
|
58
70
|
|
59
71
|
return None
|
60
72
|
|
@@ -100,18 +112,69 @@ class BaseLyricsProvider(ABC):
|
|
100
112
|
self.logger.warning(f"Cache file {cache_path} is corrupted")
|
101
113
|
return None
|
102
114
|
|
115
|
+
def _create_segments_with_words(self, text: str, is_synced: bool = False) -> List[LyricsSegment]:
|
116
|
+
"""Create LyricsSegment objects with properly formatted words from text.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
text: Raw lyrics text
|
120
|
+
is_synced: Whether timing information is available
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
List of LyricsSegment objects with unique IDs and Word objects
|
124
|
+
"""
|
125
|
+
segments = []
|
126
|
+
lines = text.strip().split("\n")
|
127
|
+
|
128
|
+
for line in lines:
|
129
|
+
if not line.strip():
|
130
|
+
continue
|
131
|
+
|
132
|
+
# Split line into words
|
133
|
+
word_texts = line.strip().split()
|
134
|
+
if not word_texts:
|
135
|
+
continue
|
136
|
+
|
137
|
+
words = []
|
138
|
+
for word_text in word_texts:
|
139
|
+
word = Word(
|
140
|
+
id=WordUtils.generate_id(),
|
141
|
+
text=word_text,
|
142
|
+
start_time=0.0 if is_synced else None,
|
143
|
+
end_time=0.0 if is_synced else None,
|
144
|
+
confidence=1.0, # Reference lyrics are considered ground truth
|
145
|
+
created_during_correction=False,
|
146
|
+
)
|
147
|
+
words.append(word)
|
148
|
+
|
149
|
+
segment = LyricsSegment(
|
150
|
+
id=WordUtils.generate_id(),
|
151
|
+
text=line.strip(),
|
152
|
+
words=words,
|
153
|
+
start_time=words[0].start_time if is_synced else None,
|
154
|
+
end_time=words[-1].end_time if is_synced else None,
|
155
|
+
)
|
156
|
+
segments.append(segment)
|
157
|
+
|
158
|
+
return segments
|
159
|
+
|
103
160
|
def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
|
104
|
-
"""Process lyrics using KaraokeLyricsProcessor."""
|
161
|
+
"""Process lyrics using KaraokeLyricsProcessor and create proper segments."""
|
162
|
+
# Concatenate all segment texts to get the full lyrics
|
163
|
+
full_lyrics = lyrics_data.get_full_text()
|
164
|
+
|
105
165
|
processor = KaraokeLyricsProcessor(
|
106
166
|
log_level=self.logger.getEffectiveLevel(),
|
107
167
|
log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
|
108
|
-
input_lyrics_text=
|
168
|
+
input_lyrics_text=full_lyrics,
|
109
169
|
max_line_length=self.max_line_length,
|
110
170
|
)
|
111
171
|
processed_text = processor.process()
|
112
172
|
|
113
|
-
# Create
|
114
|
-
|
173
|
+
# Create segments with words from processed text
|
174
|
+
segments = self._create_segments_with_words(processed_text, is_synced=lyrics_data.metadata.is_synced)
|
175
|
+
|
176
|
+
# Create new LyricsData with processed text and segments
|
177
|
+
return LyricsData(source=lyrics_data.source, segments=segments, metadata=lyrics_data.metadata)
|
115
178
|
|
116
179
|
def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
|
117
180
|
"""Convert raw result to standardized format, process lyrics, save to cache, and return."""
|
@@ -67,7 +67,7 @@ class FileProvider(BaseLyricsProvider):
|
|
67
67
|
self.logger.debug(f"Converting raw data to LyricsData format: {raw_data}")
|
68
68
|
|
69
69
|
try:
|
70
|
-
# Create metadata object
|
70
|
+
# Create metadata object
|
71
71
|
metadata = LyricsMetadata(
|
72
72
|
source="file",
|
73
73
|
track_name=self.title,
|
@@ -78,10 +78,11 @@ class FileProvider(BaseLyricsProvider):
|
|
78
78
|
provider_metadata={"filepath": raw_data["filepath"]},
|
79
79
|
)
|
80
80
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
81
|
+
# Create segments with words from the processed text
|
82
|
+
segments = self._create_segments_with_words(raw_data["text"], is_synced=False)
|
83
|
+
|
84
|
+
lyrics_data = LyricsData(source="file", segments=segments, metadata=metadata)
|
85
|
+
self.logger.debug(f"Created LyricsData object with {len(segments)} segments")
|
85
86
|
return lyrics_data
|
86
87
|
|
87
88
|
except Exception as e:
|
@@ -77,8 +77,11 @@ class GeniusProvider(BaseLyricsProvider):
|
|
77
77
|
},
|
78
78
|
)
|
79
79
|
|
80
|
-
# Create
|
81
|
-
|
80
|
+
# Create segments with words from cleaned lyrics
|
81
|
+
segments = self._create_segments_with_words(lyrics, is_synced=False)
|
82
|
+
|
83
|
+
# Create result object with segments
|
84
|
+
return LyricsData(source="genius", segments=segments, metadata=metadata)
|
82
85
|
|
83
86
|
def _clean_lyrics(self, lyrics: str) -> str:
|
84
87
|
"""Clean and process lyrics from Genius to remove unwanted content."""
|
@@ -1,9 +1,11 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import Optional, Dict, Any
|
3
3
|
import syrics.api
|
4
|
+
import time
|
4
5
|
|
5
|
-
from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
|
6
|
+
from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
|
6
7
|
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
8
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
7
9
|
|
8
10
|
|
9
11
|
class SpotifyProvider(BaseLyricsProvider):
|
@@ -12,7 +14,22 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
12
14
|
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
13
15
|
super().__init__(config, logger)
|
14
16
|
self.cookie = config.spotify_cookie
|
15
|
-
self.client =
|
17
|
+
self.client = None
|
18
|
+
|
19
|
+
if self.cookie:
|
20
|
+
max_retries = 5
|
21
|
+
retry_delay = 5 # seconds
|
22
|
+
|
23
|
+
for attempt in range(max_retries):
|
24
|
+
try:
|
25
|
+
self.client = syrics.api.Spotify(self.cookie)
|
26
|
+
break # Successfully initialized
|
27
|
+
except Exception as e:
|
28
|
+
if attempt == max_retries - 1: # Last attempt
|
29
|
+
self.logger.error(f"Failed to initialize Spotify client after {max_retries} attempts: {str(e)}")
|
30
|
+
break
|
31
|
+
self.logger.warning(f"Attempt {attempt + 1}/{max_retries} failed, retrying in {retry_delay} seconds...")
|
32
|
+
time.sleep(retry_delay)
|
16
33
|
|
17
34
|
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
18
35
|
"""Fetch raw data from Spotify APIs using syrics library."""
|
@@ -45,24 +62,6 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
45
62
|
track_data = raw_data["track_data"]
|
46
63
|
lyrics_data = raw_data["lyrics_data"]["lyrics"]
|
47
64
|
|
48
|
-
# Convert raw lines to LyricsSegment objects
|
49
|
-
segments = []
|
50
|
-
for line in lyrics_data.get("lines", []):
|
51
|
-
if not line.get("words"):
|
52
|
-
continue
|
53
|
-
|
54
|
-
# Skip lines that are just musical notes
|
55
|
-
if not self._clean_lyrics(line["words"]):
|
56
|
-
continue
|
57
|
-
|
58
|
-
segment = LyricsSegment(
|
59
|
-
text=line["words"],
|
60
|
-
words=[], # TODO: Could potentially split words if needed
|
61
|
-
start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
|
62
|
-
end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
|
63
|
-
)
|
64
|
-
segments.append(segment)
|
65
|
-
|
66
65
|
# Create metadata object
|
67
66
|
metadata = LyricsMetadata(
|
68
67
|
source="spotify",
|
@@ -83,7 +82,45 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
83
82
|
},
|
84
83
|
)
|
85
84
|
|
86
|
-
|
85
|
+
# Create segments with timing information
|
86
|
+
segments = []
|
87
|
+
for line in lyrics_data.get("lines", []):
|
88
|
+
if not line.get("words"):
|
89
|
+
continue
|
90
|
+
|
91
|
+
# Skip lines that are just musical notes
|
92
|
+
if not self._clean_lyrics(line["words"]):
|
93
|
+
continue
|
94
|
+
|
95
|
+
# Split line into words
|
96
|
+
word_texts = line["words"].strip().split()
|
97
|
+
if not word_texts:
|
98
|
+
continue
|
99
|
+
|
100
|
+
# Calculate approximate timing for each word
|
101
|
+
start_time = float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else 0.0
|
102
|
+
end_time = float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else 0.0
|
103
|
+
duration = end_time - start_time
|
104
|
+
word_duration = duration / len(word_texts)
|
105
|
+
|
106
|
+
words = []
|
107
|
+
for i, word_text in enumerate(word_texts):
|
108
|
+
word = Word(
|
109
|
+
id=WordUtils.generate_id(),
|
110
|
+
text=word_text,
|
111
|
+
start_time=start_time + (i * word_duration),
|
112
|
+
end_time=start_time + ((i + 1) * word_duration),
|
113
|
+
confidence=1.0,
|
114
|
+
created_during_correction=False,
|
115
|
+
)
|
116
|
+
words.append(word)
|
117
|
+
|
118
|
+
segment = LyricsSegment(
|
119
|
+
id=WordUtils.generate_id(), text=line["words"].strip(), words=words, start_time=start_time, end_time=end_time
|
120
|
+
)
|
121
|
+
segments.append(segment)
|
122
|
+
|
123
|
+
return LyricsData(source="spotify", segments=segments, metadata=metadata)
|
87
124
|
|
88
125
|
def _clean_lyrics(self, lyrics: str) -> str:
|
89
126
|
"""Clean and process lyrics from Spotify to remove unwanted content."""
|
@@ -4,7 +4,18 @@ from dataclasses import dataclass
|
|
4
4
|
class ScreenConfig:
|
5
5
|
"""Configuration for screen timing and layout."""
|
6
6
|
|
7
|
-
def __init__(
|
7
|
+
def __init__(
|
8
|
+
self,
|
9
|
+
line_height: int = 50,
|
10
|
+
max_visible_lines: int = 4,
|
11
|
+
top_padding: int = None,
|
12
|
+
video_width: int = 640,
|
13
|
+
video_height: int = 360,
|
14
|
+
screen_gap_threshold: float = 5.0,
|
15
|
+
post_roll_time: float = 1.0,
|
16
|
+
fade_in_ms: int = 200,
|
17
|
+
fade_out_ms: int = 300,
|
18
|
+
):
|
8
19
|
# Screen layout
|
9
20
|
self.max_visible_lines = max_visible_lines
|
10
21
|
self.line_height = line_height
|
@@ -12,10 +23,10 @@ class ScreenConfig:
|
|
12
23
|
self.video_height = video_height
|
13
24
|
self.video_width = video_width
|
14
25
|
# Timing configuration
|
15
|
-
self.screen_gap_threshold =
|
16
|
-
self.post_roll_time =
|
17
|
-
self.fade_in_ms =
|
18
|
-
self.fade_out_ms =
|
26
|
+
self.screen_gap_threshold = screen_gap_threshold
|
27
|
+
self.post_roll_time = post_roll_time
|
28
|
+
self.fade_in_ms = fade_in_ms
|
29
|
+
self.fade_out_ms = fade_out_ms
|
19
30
|
|
20
31
|
|
21
32
|
@dataclass
|
lyrics_transcriber/output/cdg.py
CHANGED
@@ -126,7 +126,7 @@ class CDGGenerator:
|
|
126
126
|
cdg_styles: dict,
|
127
127
|
) -> str:
|
128
128
|
"""Create TOML configuration file for CDG generation."""
|
129
|
-
safe_filename = self._get_safe_filename(artist, title, "Karaoke
|
129
|
+
safe_filename = self._get_safe_filename(artist, title, "Karaoke", "toml")
|
130
130
|
toml_file = os.path.join(self.output_dir, safe_filename)
|
131
131
|
self.logger.debug(f"Generating TOML file: {toml_file}")
|
132
132
|
|
@@ -161,7 +161,7 @@ class CDGGenerator:
|
|
161
161
|
title=title,
|
162
162
|
artist=artist,
|
163
163
|
audio_file=audio_file,
|
164
|
-
output_name=f"{artist} - {title} (Karaoke
|
164
|
+
output_name=f"{artist} - {title} (Karaoke)",
|
165
165
|
sync_times=sync_times,
|
166
166
|
instrumentals=instrumentals,
|
167
167
|
formatted_lyrics=formatted_lyrics,
|
@@ -190,11 +190,11 @@ class CDGGenerator:
|
|
190
190
|
"""Compose CDG using KaraokeComposer."""
|
191
191
|
kc = KaraokeComposer.from_file(toml_file)
|
192
192
|
kc.compose()
|
193
|
-
kc.create_mp4(height=1080, fps=30)
|
193
|
+
# kc.create_mp4(height=1080, fps=30)
|
194
194
|
|
195
195
|
def _find_cdg_zip(self, artist: str, title: str) -> str:
|
196
196
|
"""Find the generated CDG ZIP file."""
|
197
|
-
safe_filename = self._get_safe_filename(artist, title, "Karaoke
|
197
|
+
safe_filename = self._get_safe_filename(artist, title, "Karaoke", "zip")
|
198
198
|
output_zip = os.path.join(self.output_dir, safe_filename)
|
199
199
|
|
200
200
|
self.logger.info(f"Looking for CDG ZIP file in output directory: {output_zip}")
|
@@ -216,12 +216,12 @@ class CDGGenerator:
|
|
216
216
|
|
217
217
|
def _get_cdg_path(self, artist: str, title: str) -> str:
|
218
218
|
"""Get the path to the CDG file."""
|
219
|
-
safe_filename = self._get_safe_filename(artist, title, "Karaoke
|
219
|
+
safe_filename = self._get_safe_filename(artist, title, "Karaoke", "cdg")
|
220
220
|
return os.path.join(self.output_dir, safe_filename)
|
221
221
|
|
222
222
|
def _get_mp3_path(self, artist: str, title: str) -> str:
|
223
223
|
"""Get the path to the MP3 file."""
|
224
|
-
safe_filename = self._get_safe_filename(artist, title, "Karaoke
|
224
|
+
safe_filename = self._get_safe_filename(artist, title, "Karaoke", "mp3")
|
225
225
|
return os.path.join(self.output_dir, safe_filename)
|
226
226
|
|
227
227
|
def _verify_output_files(self, cdg_file: str, mp3_file: str) -> None:
|
@@ -376,7 +376,7 @@ class CDGGenerator:
|
|
376
376
|
cdg_styles: dict,
|
377
377
|
) -> dict:
|
378
378
|
"""Create TOML data structure."""
|
379
|
-
safe_output_name = self._get_safe_filename(artist, title, "Karaoke
|
379
|
+
safe_output_name = self._get_safe_filename(artist, title, "Karaoke")
|
380
380
|
return {
|
381
381
|
"title": title,
|
382
382
|
"artist": artist,
|
@@ -496,7 +496,7 @@ class CDGGenerator:
|
|
496
496
|
text = text[1:]
|
497
497
|
|
498
498
|
current_line += text + " "
|
499
|
-
self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
|
499
|
+
# self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
|
500
500
|
|
501
501
|
is_last_before_instrumental = any(
|
502
502
|
inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
|
@@ -78,6 +78,7 @@ class OutputGenerator:
|
|
78
78
|
font_size=self.font_size,
|
79
79
|
line_height=self.line_height,
|
80
80
|
styles=self.config.styles,
|
81
|
+
subtitle_offset_ms=self.config.subtitle_offset_ms,
|
81
82
|
logger=self.logger,
|
82
83
|
)
|
83
84
|
|
@@ -96,28 +97,42 @@ class OutputGenerator:
|
|
96
97
|
def generate_outputs(
|
97
98
|
self,
|
98
99
|
transcription_corrected: Optional[CorrectionResult],
|
99
|
-
lyrics_results:
|
100
|
+
lyrics_results: dict[str, LyricsData],
|
100
101
|
output_prefix: str,
|
101
102
|
audio_filepath: str,
|
102
103
|
artist: Optional[str] = None,
|
103
104
|
title: Optional[str] = None,
|
105
|
+
preview_mode: bool = False,
|
104
106
|
) -> OutputPaths:
|
105
107
|
"""Generate all requested output formats."""
|
106
108
|
outputs = OutputPaths()
|
107
109
|
|
108
110
|
try:
|
109
|
-
# Generate plain lyrics files for each provider
|
110
|
-
for lyrics_data in lyrics_results:
|
111
|
-
self.plain_text.write_lyrics(lyrics_data, output_prefix)
|
112
|
-
|
113
111
|
# Only process transcription-related outputs if we have transcription data
|
114
112
|
if transcription_corrected:
|
115
|
-
# Write original (uncorrected) transcription
|
116
|
-
outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
|
117
113
|
|
118
|
-
# Resize corrected segments
|
114
|
+
# Resize corrected segments
|
119
115
|
resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
|
120
116
|
transcription_corrected.resized_segments = resized_segments
|
117
|
+
|
118
|
+
# For preview, we only need to generate ASS and video
|
119
|
+
if preview_mode:
|
120
|
+
# Generate ASS subtitles for preview
|
121
|
+
outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
|
122
|
+
|
123
|
+
# Generate preview video
|
124
|
+
outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
|
125
|
+
|
126
|
+
return outputs
|
127
|
+
|
128
|
+
# Normal output generation (non-preview mode)
|
129
|
+
# Generate plain lyrics files for each provider
|
130
|
+
for name, lyrics_data in lyrics_results.items():
|
131
|
+
self.plain_text.write_lyrics(lyrics_data, output_prefix)
|
132
|
+
|
133
|
+
# Write original (uncorrected) transcription
|
134
|
+
outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
|
135
|
+
|
121
136
|
outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
|
122
137
|
|
123
138
|
# Write corrected lyrics as plain text
|
@@ -161,12 +176,12 @@ class OutputGenerator:
|
|
161
176
|
"720p": (1280, 720),
|
162
177
|
"360p": (640, 360),
|
163
178
|
}
|
164
|
-
|
179
|
+
|
165
180
|
if resolution not in resolution_map:
|
166
181
|
raise ValueError("Invalid video_resolution value. Must be one of: 4k, 1080p, 720p, 360p")
|
167
|
-
|
182
|
+
|
168
183
|
resolution_dims = resolution_map[resolution]
|
169
|
-
|
184
|
+
|
170
185
|
# Default font sizes for each resolution
|
171
186
|
default_font_sizes = {
|
172
187
|
"4k": 250,
|
@@ -174,13 +189,13 @@ class OutputGenerator:
|
|
174
189
|
"720p": 100,
|
175
190
|
"360p": 40,
|
176
191
|
}
|
177
|
-
|
192
|
+
|
178
193
|
# Get font size from styles if available, otherwise use default
|
179
194
|
font_size = self.config.styles.get("karaoke", {}).get("font_size", default_font_sizes[resolution])
|
180
|
-
|
195
|
+
|
181
196
|
# Line height matches font size for all except 360p
|
182
197
|
line_height = 50 if resolution == "360p" else font_size
|
183
|
-
|
198
|
+
|
184
199
|
return resolution_dims, font_size, line_height
|
185
200
|
|
186
201
|
def write_corrections_data(self, correction_result: CorrectionResult, output_prefix: str) -> str:
|
@@ -5,12 +5,13 @@ from typing import List, Optional
|
|
5
5
|
from lyrics_transcriber.types import LyricsData, LyricsSegment
|
6
6
|
from lyrics_transcriber.correction.corrector import CorrectionResult
|
7
7
|
|
8
|
+
|
8
9
|
class PlainTextGenerator:
|
9
10
|
"""Handles generation of plain text output files for lyrics and transcriptions."""
|
10
11
|
|
11
12
|
def __init__(self, output_dir: str, logger: Optional[logging.Logger] = None):
|
12
13
|
"""Initialize PlainTextGenerator.
|
13
|
-
|
14
|
+
|
14
15
|
Args:
|
15
16
|
output_dir: Directory where output files will be written
|
16
17
|
logger: Optional logger instance
|
@@ -24,11 +25,11 @@ class PlainTextGenerator:
|
|
24
25
|
|
25
26
|
def write_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
|
26
27
|
"""Write plain text lyrics file from provider data.
|
27
|
-
|
28
|
+
|
28
29
|
Args:
|
29
30
|
lyrics_data: LyricsData from a lyrics provider
|
30
31
|
output_prefix: Prefix for output filename
|
31
|
-
|
32
|
+
|
32
33
|
Returns:
|
33
34
|
Path to generated file
|
34
35
|
"""
|
@@ -38,7 +39,9 @@ class PlainTextGenerator:
|
|
38
39
|
|
39
40
|
try:
|
40
41
|
with open(output_path, "w", encoding="utf-8") as f:
|
41
|
-
|
42
|
+
# Join segment texts with newlines
|
43
|
+
lyrics_text = "\n".join(segment.text for segment in lyrics_data.segments)
|
44
|
+
f.write(lyrics_text)
|
42
45
|
self.logger.info(f"Plain lyrics file generated: {output_path}")
|
43
46
|
return output_path
|
44
47
|
except Exception as e:
|
@@ -47,11 +50,11 @@ class PlainTextGenerator:
|
|
47
50
|
|
48
51
|
def write_corrected_lyrics(self, segments: List[LyricsSegment], output_prefix: str) -> str:
|
49
52
|
"""Write corrected lyrics as plain text file.
|
50
|
-
|
53
|
+
|
51
54
|
Args:
|
52
55
|
segments: List of corrected LyricsSegment objects
|
53
56
|
output_prefix: Prefix for output filename
|
54
|
-
|
57
|
+
|
55
58
|
Returns:
|
56
59
|
Path to generated file
|
57
60
|
"""
|
@@ -70,22 +73,24 @@ class PlainTextGenerator:
|
|
70
73
|
|
71
74
|
def write_original_transcription(self, correction_result: CorrectionResult, output_prefix: str) -> str:
|
72
75
|
"""Write original (uncorrected) transcription as plain text.
|
73
|
-
|
76
|
+
|
74
77
|
Args:
|
75
78
|
correction_result: CorrectionResult containing original transcription
|
76
79
|
output_prefix: Prefix for output filename
|
77
|
-
|
80
|
+
|
78
81
|
Returns:
|
79
82
|
Path to generated file
|
80
83
|
"""
|
81
84
|
self.logger.info("Writing original transcription file")
|
82
85
|
output_path = self._get_output_path(f"{output_prefix} (Lyrics Uncorrected)", "txt")
|
83
86
|
|
87
|
+
transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in correction_result.original_segments)
|
88
|
+
|
84
89
|
try:
|
85
90
|
with open(output_path, "w", encoding="utf-8") as f:
|
86
|
-
f.write(
|
91
|
+
f.write(transcribed_text)
|
87
92
|
self.logger.info(f"Original transcription file generated: {output_path}")
|
88
93
|
return output_path
|
89
94
|
except Exception as e:
|
90
95
|
self.logger.error(f"Failed to write original transcription file: {str(e)}")
|
91
|
-
raise
|
96
|
+
raise
|
@@ -1,8 +1,9 @@
|
|
1
1
|
import logging
|
2
2
|
import re
|
3
|
-
from typing import List, Optional
|
3
|
+
from typing import List, Optional
|
4
4
|
|
5
5
|
from lyrics_transcriber.types import LyricsSegment, Word
|
6
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
6
7
|
|
7
8
|
|
8
9
|
class SegmentResizer:
|
@@ -101,7 +102,13 @@ class SegmentResizer:
|
|
101
102
|
Output: LyricsSegment(text="Hello World", words=[...])
|
102
103
|
"""
|
103
104
|
cleaned_text = self._clean_text(segment.text)
|
104
|
-
return LyricsSegment(
|
105
|
+
return LyricsSegment(
|
106
|
+
id=segment.id, # Preserve the original segment ID
|
107
|
+
text=cleaned_text,
|
108
|
+
words=segment.words,
|
109
|
+
start_time=segment.start_time,
|
110
|
+
end_time=segment.end_time,
|
111
|
+
)
|
105
112
|
|
106
113
|
def _create_cleaned_word(self, word: Word) -> Word:
|
107
114
|
"""Create a new word with cleaned text."""
|
@@ -226,7 +233,13 @@ class SegmentResizer:
|
|
226
233
|
def _create_segment_from_words(self, line: str, words: List[Word]) -> LyricsSegment:
|
227
234
|
"""Create a new segment from a list of words."""
|
228
235
|
cleaned_text = self._clean_text(line)
|
229
|
-
return LyricsSegment(
|
236
|
+
return LyricsSegment(
|
237
|
+
id=WordUtils.generate_id(), # Generate new ID for split segments
|
238
|
+
text=cleaned_text,
|
239
|
+
words=words,
|
240
|
+
start_time=words[0].start_time,
|
241
|
+
end_time=words[-1].end_time,
|
242
|
+
)
|
230
243
|
|
231
244
|
def _process_segment_text(self, text: str) -> List[str]:
|
232
245
|
"""Process segment text to determine optimal split points."""
|