lyrics-transcriber 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/core/controller.py +30 -52
- lyrics_transcriber/correction/anchor_sequence.py +325 -150
- lyrics_transcriber/correction/corrector.py +224 -107
- lyrics_transcriber/correction/handlers/base.py +28 -10
- lyrics_transcriber/correction/handlers/extend_anchor.py +47 -24
- lyrics_transcriber/correction/handlers/levenshtein.py +75 -33
- lyrics_transcriber/correction/handlers/llm.py +290 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +81 -36
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +46 -26
- lyrics_transcriber/correction/handlers/repeat.py +28 -11
- lyrics_transcriber/correction/handlers/sound_alike.py +68 -32
- lyrics_transcriber/correction/handlers/syllables_match.py +80 -30
- lyrics_transcriber/correction/handlers/word_count_match.py +36 -19
- lyrics_transcriber/correction/handlers/word_operations.py +68 -22
- lyrics_transcriber/correction/text_utils.py +3 -7
- lyrics_transcriber/frontend/.yarn/install-state.gz +0 -0
- lyrics_transcriber/frontend/.yarn/releases/yarn-4.6.0.cjs +934 -0
- lyrics_transcriber/frontend/.yarnrc.yml +3 -0
- lyrics_transcriber/frontend/dist/assets/{index-DKnNJHRK.js → index-coH8y7gV.js} +16284 -9032
- lyrics_transcriber/frontend/dist/assets/index-coH8y7gV.js.map +1 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/package.json +6 -2
- lyrics_transcriber/frontend/src/App.tsx +18 -2
- lyrics_transcriber/frontend/src/api.ts +103 -6
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +7 -6
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +86 -59
- lyrics_transcriber/frontend/src/components/EditModal.tsx +93 -43
- lyrics_transcriber/frontend/src/components/FileUpload.tsx +2 -2
- lyrics_transcriber/frontend/src/components/Header.tsx +251 -0
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +303 -265
- lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +117 -0
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +125 -40
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +129 -115
- lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +59 -78
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +40 -16
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +4 -10
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +137 -68
- lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +1 -1
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +85 -115
- lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
- lyrics_transcriber/frontend/src/components/shared/types.ts +15 -7
- lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +35 -0
- lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +7 -7
- lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +121 -0
- lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
- lyrics_transcriber/frontend/src/types.js +2 -0
- lyrics_transcriber/frontend/src/types.ts +70 -49
- lyrics_transcriber/frontend/src/validation.ts +132 -0
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/yarn.lock +3752 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +75 -12
- lyrics_transcriber/lyrics/file_provider.py +6 -5
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +58 -21
- lyrics_transcriber/output/ass/config.py +16 -5
- lyrics_transcriber/output/cdg.py +1 -1
- lyrics_transcriber/output/generator.py +22 -8
- lyrics_transcriber/output/plain_text.py +15 -10
- lyrics_transcriber/output/segment_resizer.py +16 -3
- lyrics_transcriber/output/subtitles.py +27 -1
- lyrics_transcriber/output/video.py +107 -1
- lyrics_transcriber/review/__init__.py +0 -1
- lyrics_transcriber/review/server.py +337 -164
- lyrics_transcriber/transcribers/audioshake.py +3 -0
- lyrics_transcriber/transcribers/base_transcriber.py +11 -3
- lyrics_transcriber/transcribers/whisper.py +11 -1
- lyrics_transcriber/types.py +151 -105
- lyrics_transcriber/utils/word_utils.py +27 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA +3 -1
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/RECORD +74 -61
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/WHEEL +1 -1
- lyrics_transcriber/frontend/dist/assets/index-DKnNJHRK.js.map +0 -1
- lyrics_transcriber/frontend/package-lock.json +0 -4260
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +0 -202
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/entry_points.txt +0 -0
@@ -1,13 +1,14 @@
|
|
1
1
|
from dataclasses import dataclass
|
2
2
|
import logging
|
3
|
-
from typing import Optional, Dict, Any
|
3
|
+
from typing import Optional, Dict, Any, List
|
4
4
|
import json
|
5
5
|
import hashlib
|
6
6
|
from pathlib import Path
|
7
7
|
import os
|
8
8
|
from abc import ABC, abstractmethod
|
9
|
-
from lyrics_transcriber.types import LyricsData
|
9
|
+
from lyrics_transcriber.types import LyricsData, LyricsSegment, Word
|
10
10
|
from karaoke_lyrics_processor import KaraokeLyricsProcessor
|
11
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
11
12
|
|
12
13
|
|
13
14
|
@dataclass
|
@@ -41,20 +42,31 @@ class BaseLyricsProvider(ABC):
|
|
41
42
|
|
42
43
|
# Use artist and title for cache key instead of audio file hash
|
43
44
|
cache_key = self._get_artist_title_hash(artist, title)
|
44
|
-
raw_cache_path = self._get_cache_path(cache_key, "raw")
|
45
45
|
|
46
|
-
#
|
46
|
+
# Check converted cache first
|
47
|
+
converted_cache_path = self._get_cache_path(cache_key, "converted")
|
48
|
+
converted_data = self._load_from_cache(converted_cache_path)
|
49
|
+
if converted_data:
|
50
|
+
self.logger.info(f"Using cached converted lyrics for {artist} - {title}")
|
51
|
+
return LyricsData.from_dict(converted_data)
|
52
|
+
|
53
|
+
# Check raw cache next
|
54
|
+
raw_cache_path = self._get_cache_path(cache_key, "raw")
|
47
55
|
raw_data = self._load_from_cache(raw_cache_path)
|
48
|
-
if raw_data
|
49
|
-
self.logger.info(f"Using cached lyrics for {artist} - {title}")
|
50
|
-
|
56
|
+
if raw_data:
|
57
|
+
self.logger.info(f"Using cached raw lyrics for {artist} - {title}")
|
58
|
+
converted_result = self._convert_result_format(raw_data)
|
59
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
60
|
+
return converted_result
|
51
61
|
|
52
62
|
# If not in cache, fetch from source
|
53
63
|
raw_result = self._fetch_data_from_source(artist, title)
|
54
64
|
if raw_result:
|
55
65
|
# Save raw API response
|
56
66
|
self._save_to_cache(raw_cache_path, raw_result)
|
57
|
-
|
67
|
+
converted_result = self._convert_result_format(raw_result)
|
68
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
69
|
+
return converted_result
|
58
70
|
|
59
71
|
return None
|
60
72
|
|
@@ -100,18 +112,69 @@ class BaseLyricsProvider(ABC):
|
|
100
112
|
self.logger.warning(f"Cache file {cache_path} is corrupted")
|
101
113
|
return None
|
102
114
|
|
115
|
+
def _create_segments_with_words(self, text: str, is_synced: bool = False) -> List[LyricsSegment]:
|
116
|
+
"""Create LyricsSegment objects with properly formatted words from text.
|
117
|
+
|
118
|
+
Args:
|
119
|
+
text: Raw lyrics text
|
120
|
+
is_synced: Whether timing information is available
|
121
|
+
|
122
|
+
Returns:
|
123
|
+
List of LyricsSegment objects with unique IDs and Word objects
|
124
|
+
"""
|
125
|
+
segments = []
|
126
|
+
lines = text.strip().split("\n")
|
127
|
+
|
128
|
+
for line in lines:
|
129
|
+
if not line.strip():
|
130
|
+
continue
|
131
|
+
|
132
|
+
# Split line into words
|
133
|
+
word_texts = line.strip().split()
|
134
|
+
if not word_texts:
|
135
|
+
continue
|
136
|
+
|
137
|
+
words = []
|
138
|
+
for word_text in word_texts:
|
139
|
+
word = Word(
|
140
|
+
id=WordUtils.generate_id(),
|
141
|
+
text=word_text,
|
142
|
+
start_time=0.0 if is_synced else None,
|
143
|
+
end_time=0.0 if is_synced else None,
|
144
|
+
confidence=1.0, # Reference lyrics are considered ground truth
|
145
|
+
created_during_correction=False,
|
146
|
+
)
|
147
|
+
words.append(word)
|
148
|
+
|
149
|
+
segment = LyricsSegment(
|
150
|
+
id=WordUtils.generate_id(),
|
151
|
+
text=line.strip(),
|
152
|
+
words=words,
|
153
|
+
start_time=words[0].start_time if is_synced else None,
|
154
|
+
end_time=words[-1].end_time if is_synced else None,
|
155
|
+
)
|
156
|
+
segments.append(segment)
|
157
|
+
|
158
|
+
return segments
|
159
|
+
|
103
160
|
def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
|
104
|
-
"""Process lyrics using KaraokeLyricsProcessor."""
|
161
|
+
"""Process lyrics using KaraokeLyricsProcessor and create proper segments."""
|
162
|
+
# Concatenate all segment texts to get the full lyrics
|
163
|
+
full_lyrics = lyrics_data.get_full_text()
|
164
|
+
|
105
165
|
processor = KaraokeLyricsProcessor(
|
106
166
|
log_level=self.logger.getEffectiveLevel(),
|
107
167
|
log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
|
108
|
-
input_lyrics_text=
|
168
|
+
input_lyrics_text=full_lyrics,
|
109
169
|
max_line_length=self.max_line_length,
|
110
170
|
)
|
111
171
|
processed_text = processor.process()
|
112
172
|
|
113
|
-
# Create
|
114
|
-
|
173
|
+
# Create segments with words from processed text
|
174
|
+
segments = self._create_segments_with_words(processed_text, is_synced=lyrics_data.metadata.is_synced)
|
175
|
+
|
176
|
+
# Create new LyricsData with processed text and segments
|
177
|
+
return LyricsData(source=lyrics_data.source, segments=segments, metadata=lyrics_data.metadata)
|
115
178
|
|
116
179
|
def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
|
117
180
|
"""Convert raw result to standardized format, process lyrics, save to cache, and return."""
|
@@ -67,7 +67,7 @@ class FileProvider(BaseLyricsProvider):
|
|
67
67
|
self.logger.debug(f"Converting raw data to LyricsData format: {raw_data}")
|
68
68
|
|
69
69
|
try:
|
70
|
-
# Create metadata object
|
70
|
+
# Create metadata object
|
71
71
|
metadata = LyricsMetadata(
|
72
72
|
source="file",
|
73
73
|
track_name=self.title,
|
@@ -78,10 +78,11 @@ class FileProvider(BaseLyricsProvider):
|
|
78
78
|
provider_metadata={"filepath": raw_data["filepath"]},
|
79
79
|
)
|
80
80
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
81
|
+
# Create segments with words from the processed text
|
82
|
+
segments = self._create_segments_with_words(raw_data["text"], is_synced=False)
|
83
|
+
|
84
|
+
lyrics_data = LyricsData(source="file", segments=segments, metadata=metadata)
|
85
|
+
self.logger.debug(f"Created LyricsData object with {len(segments)} segments")
|
85
86
|
return lyrics_data
|
86
87
|
|
87
88
|
except Exception as e:
|
@@ -77,8 +77,11 @@ class GeniusProvider(BaseLyricsProvider):
|
|
77
77
|
},
|
78
78
|
)
|
79
79
|
|
80
|
-
# Create
|
81
|
-
|
80
|
+
# Create segments with words from cleaned lyrics
|
81
|
+
segments = self._create_segments_with_words(lyrics, is_synced=False)
|
82
|
+
|
83
|
+
# Create result object with segments
|
84
|
+
return LyricsData(source="genius", segments=segments, metadata=metadata)
|
82
85
|
|
83
86
|
def _clean_lyrics(self, lyrics: str) -> str:
|
84
87
|
"""Clean and process lyrics from Genius to remove unwanted content."""
|
@@ -1,9 +1,11 @@
|
|
1
1
|
import logging
|
2
2
|
from typing import Optional, Dict, Any
|
3
3
|
import syrics.api
|
4
|
+
import time
|
4
5
|
|
5
|
-
from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment
|
6
|
+
from lyrics_transcriber.types import LyricsData, LyricsMetadata, LyricsSegment, Word
|
6
7
|
from lyrics_transcriber.lyrics.base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig
|
8
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
7
9
|
|
8
10
|
|
9
11
|
class SpotifyProvider(BaseLyricsProvider):
|
@@ -12,7 +14,22 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
12
14
|
def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
|
13
15
|
super().__init__(config, logger)
|
14
16
|
self.cookie = config.spotify_cookie
|
15
|
-
self.client =
|
17
|
+
self.client = None
|
18
|
+
|
19
|
+
if self.cookie:
|
20
|
+
max_retries = 5
|
21
|
+
retry_delay = 5 # seconds
|
22
|
+
|
23
|
+
for attempt in range(max_retries):
|
24
|
+
try:
|
25
|
+
self.client = syrics.api.Spotify(self.cookie)
|
26
|
+
break # Successfully initialized
|
27
|
+
except Exception as e:
|
28
|
+
if attempt == max_retries - 1: # Last attempt
|
29
|
+
self.logger.error(f"Failed to initialize Spotify client after {max_retries} attempts: {str(e)}")
|
30
|
+
break
|
31
|
+
self.logger.warning(f"Attempt {attempt + 1}/{max_retries} failed, retrying in {retry_delay} seconds...")
|
32
|
+
time.sleep(retry_delay)
|
16
33
|
|
17
34
|
def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
|
18
35
|
"""Fetch raw data from Spotify APIs using syrics library."""
|
@@ -45,24 +62,6 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
45
62
|
track_data = raw_data["track_data"]
|
46
63
|
lyrics_data = raw_data["lyrics_data"]["lyrics"]
|
47
64
|
|
48
|
-
# Convert raw lines to LyricsSegment objects
|
49
|
-
segments = []
|
50
|
-
for line in lyrics_data.get("lines", []):
|
51
|
-
if not line.get("words"):
|
52
|
-
continue
|
53
|
-
|
54
|
-
# Skip lines that are just musical notes
|
55
|
-
if not self._clean_lyrics(line["words"]):
|
56
|
-
continue
|
57
|
-
|
58
|
-
segment = LyricsSegment(
|
59
|
-
text=line["words"],
|
60
|
-
words=[], # TODO: Could potentially split words if needed
|
61
|
-
start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
|
62
|
-
end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
|
63
|
-
)
|
64
|
-
segments.append(segment)
|
65
|
-
|
66
65
|
# Create metadata object
|
67
66
|
metadata = LyricsMetadata(
|
68
67
|
source="spotify",
|
@@ -83,7 +82,45 @@ class SpotifyProvider(BaseLyricsProvider):
|
|
83
82
|
},
|
84
83
|
)
|
85
84
|
|
86
|
-
|
85
|
+
# Create segments with timing information
|
86
|
+
segments = []
|
87
|
+
for line in lyrics_data.get("lines", []):
|
88
|
+
if not line.get("words"):
|
89
|
+
continue
|
90
|
+
|
91
|
+
# Skip lines that are just musical notes
|
92
|
+
if not self._clean_lyrics(line["words"]):
|
93
|
+
continue
|
94
|
+
|
95
|
+
# Split line into words
|
96
|
+
word_texts = line["words"].strip().split()
|
97
|
+
if not word_texts:
|
98
|
+
continue
|
99
|
+
|
100
|
+
# Calculate approximate timing for each word
|
101
|
+
start_time = float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else 0.0
|
102
|
+
end_time = float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else 0.0
|
103
|
+
duration = end_time - start_time
|
104
|
+
word_duration = duration / len(word_texts)
|
105
|
+
|
106
|
+
words = []
|
107
|
+
for i, word_text in enumerate(word_texts):
|
108
|
+
word = Word(
|
109
|
+
id=WordUtils.generate_id(),
|
110
|
+
text=word_text,
|
111
|
+
start_time=start_time + (i * word_duration),
|
112
|
+
end_time=start_time + ((i + 1) * word_duration),
|
113
|
+
confidence=1.0,
|
114
|
+
created_during_correction=False,
|
115
|
+
)
|
116
|
+
words.append(word)
|
117
|
+
|
118
|
+
segment = LyricsSegment(
|
119
|
+
id=WordUtils.generate_id(), text=line["words"].strip(), words=words, start_time=start_time, end_time=end_time
|
120
|
+
)
|
121
|
+
segments.append(segment)
|
122
|
+
|
123
|
+
return LyricsData(source="spotify", segments=segments, metadata=metadata)
|
87
124
|
|
88
125
|
def _clean_lyrics(self, lyrics: str) -> str:
|
89
126
|
"""Clean and process lyrics from Spotify to remove unwanted content."""
|
@@ -4,7 +4,18 @@ from dataclasses import dataclass
|
|
4
4
|
class ScreenConfig:
|
5
5
|
"""Configuration for screen timing and layout."""
|
6
6
|
|
7
|
-
def __init__(
|
7
|
+
def __init__(
|
8
|
+
self,
|
9
|
+
line_height: int = 50,
|
10
|
+
max_visible_lines: int = 4,
|
11
|
+
top_padding: int = None,
|
12
|
+
video_width: int = 640,
|
13
|
+
video_height: int = 360,
|
14
|
+
screen_gap_threshold: float = 5.0,
|
15
|
+
post_roll_time: float = 1.0,
|
16
|
+
fade_in_ms: int = 200,
|
17
|
+
fade_out_ms: int = 300,
|
18
|
+
):
|
8
19
|
# Screen layout
|
9
20
|
self.max_visible_lines = max_visible_lines
|
10
21
|
self.line_height = line_height
|
@@ -12,10 +23,10 @@ class ScreenConfig:
|
|
12
23
|
self.video_height = video_height
|
13
24
|
self.video_width = video_width
|
14
25
|
# Timing configuration
|
15
|
-
self.screen_gap_threshold =
|
16
|
-
self.post_roll_time =
|
17
|
-
self.fade_in_ms =
|
18
|
-
self.fade_out_ms =
|
26
|
+
self.screen_gap_threshold = screen_gap_threshold
|
27
|
+
self.post_roll_time = post_roll_time
|
28
|
+
self.fade_in_ms = fade_in_ms
|
29
|
+
self.fade_out_ms = fade_out_ms
|
19
30
|
|
20
31
|
|
21
32
|
@dataclass
|
lyrics_transcriber/output/cdg.py
CHANGED
@@ -496,7 +496,7 @@ class CDGGenerator:
|
|
496
496
|
text = text[1:]
|
497
497
|
|
498
498
|
current_line += text + " "
|
499
|
-
self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
|
499
|
+
# self.logger.debug(f"format_lyrics: Current line: '{current_line}'")
|
500
500
|
|
501
501
|
is_last_before_instrumental = any(
|
502
502
|
inst["sync"] > sync_times[i] and (i == len(sync_times) - 1 or sync_times[i + 1] > inst["sync"]) for inst in instrumentals
|
@@ -97,28 +97,42 @@ class OutputGenerator:
|
|
97
97
|
def generate_outputs(
|
98
98
|
self,
|
99
99
|
transcription_corrected: Optional[CorrectionResult],
|
100
|
-
lyrics_results:
|
100
|
+
lyrics_results: dict[str, LyricsData],
|
101
101
|
output_prefix: str,
|
102
102
|
audio_filepath: str,
|
103
103
|
artist: Optional[str] = None,
|
104
104
|
title: Optional[str] = None,
|
105
|
+
preview_mode: bool = False,
|
105
106
|
) -> OutputPaths:
|
106
107
|
"""Generate all requested output formats."""
|
107
108
|
outputs = OutputPaths()
|
108
109
|
|
109
110
|
try:
|
110
|
-
# Generate plain lyrics files for each provider
|
111
|
-
for lyrics_data in lyrics_results:
|
112
|
-
self.plain_text.write_lyrics(lyrics_data, output_prefix)
|
113
|
-
|
114
111
|
# Only process transcription-related outputs if we have transcription data
|
115
112
|
if transcription_corrected:
|
116
|
-
# Write original (uncorrected) transcription
|
117
|
-
outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
|
118
113
|
|
119
|
-
# Resize corrected segments
|
114
|
+
# Resize corrected segments
|
120
115
|
resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
|
121
116
|
transcription_corrected.resized_segments = resized_segments
|
117
|
+
|
118
|
+
# For preview, we only need to generate ASS and video
|
119
|
+
if preview_mode:
|
120
|
+
# Generate ASS subtitles for preview
|
121
|
+
outputs.ass = self.subtitle.generate_ass(transcription_corrected.resized_segments, output_prefix, audio_filepath)
|
122
|
+
|
123
|
+
# Generate preview video
|
124
|
+
outputs.video = self.video.generate_preview_video(outputs.ass, audio_filepath, output_prefix)
|
125
|
+
|
126
|
+
return outputs
|
127
|
+
|
128
|
+
# Normal output generation (non-preview mode)
|
129
|
+
# Generate plain lyrics files for each provider
|
130
|
+
for name, lyrics_data in lyrics_results.items():
|
131
|
+
self.plain_text.write_lyrics(lyrics_data, output_prefix)
|
132
|
+
|
133
|
+
# Write original (uncorrected) transcription
|
134
|
+
outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
|
135
|
+
|
122
136
|
outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
|
123
137
|
|
124
138
|
# Write corrected lyrics as plain text
|
@@ -5,12 +5,13 @@ from typing import List, Optional
|
|
5
5
|
from lyrics_transcriber.types import LyricsData, LyricsSegment
|
6
6
|
from lyrics_transcriber.correction.corrector import CorrectionResult
|
7
7
|
|
8
|
+
|
8
9
|
class PlainTextGenerator:
|
9
10
|
"""Handles generation of plain text output files for lyrics and transcriptions."""
|
10
11
|
|
11
12
|
def __init__(self, output_dir: str, logger: Optional[logging.Logger] = None):
|
12
13
|
"""Initialize PlainTextGenerator.
|
13
|
-
|
14
|
+
|
14
15
|
Args:
|
15
16
|
output_dir: Directory where output files will be written
|
16
17
|
logger: Optional logger instance
|
@@ -24,11 +25,11 @@ class PlainTextGenerator:
|
|
24
25
|
|
25
26
|
def write_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
|
26
27
|
"""Write plain text lyrics file from provider data.
|
27
|
-
|
28
|
+
|
28
29
|
Args:
|
29
30
|
lyrics_data: LyricsData from a lyrics provider
|
30
31
|
output_prefix: Prefix for output filename
|
31
|
-
|
32
|
+
|
32
33
|
Returns:
|
33
34
|
Path to generated file
|
34
35
|
"""
|
@@ -38,7 +39,9 @@ class PlainTextGenerator:
|
|
38
39
|
|
39
40
|
try:
|
40
41
|
with open(output_path, "w", encoding="utf-8") as f:
|
41
|
-
|
42
|
+
# Join segment texts with newlines
|
43
|
+
lyrics_text = "\n".join(segment.text for segment in lyrics_data.segments)
|
44
|
+
f.write(lyrics_text)
|
42
45
|
self.logger.info(f"Plain lyrics file generated: {output_path}")
|
43
46
|
return output_path
|
44
47
|
except Exception as e:
|
@@ -47,11 +50,11 @@ class PlainTextGenerator:
|
|
47
50
|
|
48
51
|
def write_corrected_lyrics(self, segments: List[LyricsSegment], output_prefix: str) -> str:
|
49
52
|
"""Write corrected lyrics as plain text file.
|
50
|
-
|
53
|
+
|
51
54
|
Args:
|
52
55
|
segments: List of corrected LyricsSegment objects
|
53
56
|
output_prefix: Prefix for output filename
|
54
|
-
|
57
|
+
|
55
58
|
Returns:
|
56
59
|
Path to generated file
|
57
60
|
"""
|
@@ -70,22 +73,24 @@ class PlainTextGenerator:
|
|
70
73
|
|
71
74
|
def write_original_transcription(self, correction_result: CorrectionResult, output_prefix: str) -> str:
|
72
75
|
"""Write original (uncorrected) transcription as plain text.
|
73
|
-
|
76
|
+
|
74
77
|
Args:
|
75
78
|
correction_result: CorrectionResult containing original transcription
|
76
79
|
output_prefix: Prefix for output filename
|
77
|
-
|
80
|
+
|
78
81
|
Returns:
|
79
82
|
Path to generated file
|
80
83
|
"""
|
81
84
|
self.logger.info("Writing original transcription file")
|
82
85
|
output_path = self._get_output_path(f"{output_prefix} (Lyrics Uncorrected)", "txt")
|
83
86
|
|
87
|
+
transcribed_text = " ".join(" ".join(w.text for w in segment.words) for segment in correction_result.original_segments)
|
88
|
+
|
84
89
|
try:
|
85
90
|
with open(output_path, "w", encoding="utf-8") as f:
|
86
|
-
f.write(
|
91
|
+
f.write(transcribed_text)
|
87
92
|
self.logger.info(f"Original transcription file generated: {output_path}")
|
88
93
|
return output_path
|
89
94
|
except Exception as e:
|
90
95
|
self.logger.error(f"Failed to write original transcription file: {str(e)}")
|
91
|
-
raise
|
96
|
+
raise
|
@@ -1,8 +1,9 @@
|
|
1
1
|
import logging
|
2
2
|
import re
|
3
|
-
from typing import List, Optional
|
3
|
+
from typing import List, Optional
|
4
4
|
|
5
5
|
from lyrics_transcriber.types import LyricsSegment, Word
|
6
|
+
from lyrics_transcriber.utils.word_utils import WordUtils
|
6
7
|
|
7
8
|
|
8
9
|
class SegmentResizer:
|
@@ -101,7 +102,13 @@ class SegmentResizer:
|
|
101
102
|
Output: LyricsSegment(text="Hello World", words=[...])
|
102
103
|
"""
|
103
104
|
cleaned_text = self._clean_text(segment.text)
|
104
|
-
return LyricsSegment(
|
105
|
+
return LyricsSegment(
|
106
|
+
id=segment.id, # Preserve the original segment ID
|
107
|
+
text=cleaned_text,
|
108
|
+
words=segment.words,
|
109
|
+
start_time=segment.start_time,
|
110
|
+
end_time=segment.end_time,
|
111
|
+
)
|
105
112
|
|
106
113
|
def _create_cleaned_word(self, word: Word) -> Word:
|
107
114
|
"""Create a new word with cleaned text."""
|
@@ -226,7 +233,13 @@ class SegmentResizer:
|
|
226
233
|
def _create_segment_from_words(self, line: str, words: List[Word]) -> LyricsSegment:
|
227
234
|
"""Create a new segment from a list of words."""
|
228
235
|
cleaned_text = self._clean_text(line)
|
229
|
-
return LyricsSegment(
|
236
|
+
return LyricsSegment(
|
237
|
+
id=WordUtils.generate_id(), # Generate new ID for split segments
|
238
|
+
text=cleaned_text,
|
239
|
+
words=words,
|
240
|
+
start_time=words[0].start_time,
|
241
|
+
end_time=words[-1].end_time,
|
242
|
+
)
|
230
243
|
|
231
244
|
def _process_segment_text(self, text: str) -> List[str]:
|
232
245
|
"""Process segment text to determine optimal split points."""
|
@@ -44,7 +44,30 @@ class SubtitlesGenerator:
|
|
44
44
|
self.font_size = font_size
|
45
45
|
self.styles = styles
|
46
46
|
self.subtitle_offset_ms = subtitle_offset_ms
|
47
|
-
|
47
|
+
|
48
|
+
# Create ScreenConfig with potential overrides from styles
|
49
|
+
karaoke_styles = styles.get("karaoke", {})
|
50
|
+
config_params = {
|
51
|
+
"line_height": line_height,
|
52
|
+
"video_width": video_resolution[0],
|
53
|
+
"video_height": video_resolution[1]
|
54
|
+
}
|
55
|
+
|
56
|
+
# Add any overrides from styles
|
57
|
+
screen_config_props = [
|
58
|
+
"max_visible_lines",
|
59
|
+
"top_padding",
|
60
|
+
"screen_gap_threshold",
|
61
|
+
"post_roll_time",
|
62
|
+
"fade_in_ms",
|
63
|
+
"fade_out_ms"
|
64
|
+
]
|
65
|
+
|
66
|
+
for prop in screen_config_props:
|
67
|
+
if prop in karaoke_styles:
|
68
|
+
config_params[prop] = karaoke_styles[prop]
|
69
|
+
|
70
|
+
self.config = ScreenConfig(**config_params)
|
48
71
|
self.logger = logger or logging.getLogger(__name__)
|
49
72
|
|
50
73
|
def _get_output_path(self, output_prefix: str, extension: str) -> str:
|
@@ -102,13 +125,16 @@ class SubtitlesGenerator:
|
|
102
125
|
offset_seconds = self.subtitle_offset_ms / 1000.0
|
103
126
|
segments = [
|
104
127
|
LyricsSegment(
|
128
|
+
id=seg.id, # Preserve original segment ID
|
105
129
|
text=seg.text,
|
106
130
|
words=[
|
107
131
|
Word(
|
132
|
+
id=word.id, # Preserve original word ID
|
108
133
|
text=word.text,
|
109
134
|
start_time=max(0, word.start_time + offset_seconds),
|
110
135
|
end_time=word.end_time + offset_seconds,
|
111
136
|
confidence=word.confidence,
|
137
|
+
created_during_correction=getattr(word, "created_during_correction", False), # Preserve correction flag
|
112
138
|
)
|
113
139
|
for word in seg.words
|
114
140
|
],
|
@@ -88,6 +88,52 @@ class VideoGenerator:
|
|
88
88
|
pass
|
89
89
|
raise
|
90
90
|
|
91
|
+
def generate_preview_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
|
92
|
+
"""Generate lower resolution MP4 preview video with lyrics overlay.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
ass_path: Path to ASS subtitles file
|
96
|
+
audio_path: Path to audio file
|
97
|
+
output_prefix: Prefix for output filename
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
Path to generated preview video file
|
101
|
+
"""
|
102
|
+
self.logger.info("Generating preview video with lyrics overlay")
|
103
|
+
output_path = os.path.join(self.cache_dir, f"{output_prefix}_preview.mp4")
|
104
|
+
|
105
|
+
# Check input files exist before running FFmpeg
|
106
|
+
if not os.path.isfile(ass_path):
|
107
|
+
raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
|
108
|
+
if not os.path.isfile(audio_path):
|
109
|
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
110
|
+
|
111
|
+
try:
|
112
|
+
# Create a temporary copy of the ASS file with a safe filename
|
113
|
+
temp_ass_path = os.path.join(self.cache_dir, "temp_preview_subtitles.ass")
|
114
|
+
import shutil
|
115
|
+
|
116
|
+
shutil.copy2(ass_path, temp_ass_path)
|
117
|
+
self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
|
118
|
+
|
119
|
+
cmd = self._build_preview_ffmpeg_command(temp_ass_path, audio_path, output_path)
|
120
|
+
self._run_ffmpeg_command(cmd)
|
121
|
+
self.logger.info(f"Preview video generated: {output_path}")
|
122
|
+
|
123
|
+
# Clean up temporary file
|
124
|
+
os.remove(temp_ass_path)
|
125
|
+
return output_path
|
126
|
+
|
127
|
+
except Exception as e:
|
128
|
+
self.logger.error(f"Failed to generate preview video: {str(e)}")
|
129
|
+
# Clean up temporary file in case of error
|
130
|
+
if "temp_ass_path" in locals():
|
131
|
+
try:
|
132
|
+
os.remove(temp_ass_path)
|
133
|
+
except:
|
134
|
+
pass
|
135
|
+
raise
|
136
|
+
|
91
137
|
def _get_output_path(self, output_prefix: str, extension: str) -> str:
|
92
138
|
"""Generate full output path for a file."""
|
93
139
|
return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
|
@@ -181,7 +227,7 @@ class VideoGenerator:
|
|
181
227
|
"-vf", f"ass={ass_path}", # Add subtitles
|
182
228
|
"-c:v", self._get_video_codec(),
|
183
229
|
# Video quality settings
|
184
|
-
"-preset", "
|
230
|
+
"-preset", "fast", # Better compression efficiency
|
185
231
|
"-b:v", "5000k", # Base video bitrate
|
186
232
|
"-minrate", "5000k", # Minimum bitrate
|
187
233
|
"-maxrate", "20000k", # Maximum bitrate
|
@@ -196,6 +242,66 @@ class VideoGenerator:
|
|
196
242
|
|
197
243
|
return cmd
|
198
244
|
|
245
|
+
def _build_preview_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
|
246
|
+
"""Build FFmpeg command for preview video generation with optimized settings."""
|
247
|
+
# Use 360p resolution for preview
|
248
|
+
width, height = 640, 360
|
249
|
+
|
250
|
+
# fmt: off
|
251
|
+
cmd = [
|
252
|
+
"ffmpeg",
|
253
|
+
"-hide_banner",
|
254
|
+
"-loglevel", "error",
|
255
|
+
"-r", "30", # Set frame rate to 30 fps
|
256
|
+
]
|
257
|
+
|
258
|
+
# Input source (background)
|
259
|
+
if self.background_image:
|
260
|
+
# Resize background image first
|
261
|
+
resized_bg = self._resize_background_image(self.background_image)
|
262
|
+
self.logger.debug(f"Using resized background image: {resized_bg}")
|
263
|
+
cmd.extend([
|
264
|
+
"-loop", "1", # Loop the image
|
265
|
+
"-i", resized_bg,
|
266
|
+
])
|
267
|
+
else:
|
268
|
+
self.logger.debug(
|
269
|
+
f"Using solid {self.background_color} background "
|
270
|
+
f"with resolution: {width}x{height}"
|
271
|
+
)
|
272
|
+
cmd.extend([
|
273
|
+
"-f", "lavfi",
|
274
|
+
"-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
|
275
|
+
])
|
276
|
+
|
277
|
+
# Add audio input and subtitle overlay
|
278
|
+
cmd.extend([
|
279
|
+
"-i", audio_path,
|
280
|
+
"-c:a", "aac", # Use AAC for audio
|
281
|
+
"-b:a", "128k", # Audio bitrate
|
282
|
+
"-vf", f"ass={ass_path}", # Add subtitles
|
283
|
+
"-c:v", "libx264", # Use H.264 codec
|
284
|
+
"-profile:v", "baseline", # Most compatible H.264 profile
|
285
|
+
"-level", "3.0", # Compatibility level
|
286
|
+
"-pix_fmt", "yuv420p", # Required for browser compatibility
|
287
|
+
"-preset", "ultrafast",
|
288
|
+
"-b:v", "1000k", # Slightly higher bitrate
|
289
|
+
"-maxrate", "1500k",
|
290
|
+
"-bufsize", "2000k",
|
291
|
+
"-movflags", "+faststart+frag_keyframe+empty_moov", # Enhanced streaming flags
|
292
|
+
"-g", "30", # Keyframe every 30 frames (1 second)
|
293
|
+
"-keyint_min", "30", # Minimum keyframe interval
|
294
|
+
"-sc_threshold", "0", # Disable scene change detection
|
295
|
+
"-shortest",
|
296
|
+
"-y"
|
297
|
+
])
|
298
|
+
# fmt: on
|
299
|
+
|
300
|
+
# Add output path
|
301
|
+
cmd.append(output_path)
|
302
|
+
|
303
|
+
return cmd
|
304
|
+
|
199
305
|
def _get_video_codec(self) -> str:
|
200
306
|
"""Determine the best available video codec."""
|
201
307
|
# try:
|