lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/cli/{main.py → cli_main.py} +15 -3
- lyrics_transcriber/core/controller.py +129 -95
- lyrics_transcriber/correction/base_strategy.py +29 -0
- lyrics_transcriber/correction/corrector.py +52 -0
- lyrics_transcriber/correction/strategy_diff.py +263 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
- lyrics_transcriber/lyrics/genius.py +70 -0
- lyrics_transcriber/lyrics/spotify.py +82 -0
- lyrics_transcriber/output/generator.py +158 -97
- lyrics_transcriber/output/subtitles.py +12 -12
- lyrics_transcriber/storage/dropbox.py +110 -134
- lyrics_transcriber/transcribers/audioshake.py +170 -105
- lyrics_transcriber/transcribers/base_transcriber.py +186 -0
- lyrics_transcriber/transcribers/whisper.py +268 -133
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +1 -1
- lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
- lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
- lyrics_transcriber/core/corrector.py +0 -56
- lyrics_transcriber/core/fetcher.py +0 -143
- lyrics_transcriber/storage/tokens.py +0 -116
- lyrics_transcriber/transcribers/base.py +0 -31
- lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
- lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0
@@ -1,9 +1,42 @@
|
|
1
|
+
from dataclasses import dataclass
|
1
2
|
import os
|
2
3
|
import logging
|
3
|
-
from typing import Dict, Any, Optional
|
4
|
+
from typing import Dict, Any, List, Optional, Tuple
|
4
5
|
import subprocess
|
5
6
|
from datetime import timedelta
|
7
|
+
|
8
|
+
from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
|
6
9
|
from .subtitles import create_styled_subtitles, LyricsScreen, LyricsLine, LyricSegment
|
10
|
+
from ..correction.corrector import CorrectionResult
|
11
|
+
|
12
|
+
|
13
|
+
@dataclass
|
14
|
+
class OutputGeneratorConfig:
|
15
|
+
"""Configuration for output generation."""
|
16
|
+
|
17
|
+
output_dir: str
|
18
|
+
cache_dir: str
|
19
|
+
video_resolution: str = "360p"
|
20
|
+
video_background_image: Optional[str] = None
|
21
|
+
video_background_color: str = "black"
|
22
|
+
|
23
|
+
def __post_init__(self):
|
24
|
+
"""Validate configuration after initialization."""
|
25
|
+
if not self.output_dir:
|
26
|
+
raise ValueError("output_dir must be provided")
|
27
|
+
if not self.cache_dir:
|
28
|
+
raise ValueError("cache_dir must be provided")
|
29
|
+
if self.video_background_image and not os.path.isfile(self.video_background_image):
|
30
|
+
raise FileNotFoundError(f"Video background image not found: {self.video_background_image}")
|
31
|
+
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class OutputPaths:
|
35
|
+
"""Holds paths for generated output files."""
|
36
|
+
|
37
|
+
lrc: Optional[str] = None
|
38
|
+
ass: Optional[str] = None
|
39
|
+
video: Optional[str] = None
|
7
40
|
|
8
41
|
|
9
42
|
class OutputGenerator:
|
@@ -11,59 +44,56 @@ class OutputGenerator:
|
|
11
44
|
|
12
45
|
def __init__(
|
13
46
|
self,
|
47
|
+
config: OutputGeneratorConfig,
|
14
48
|
logger: Optional[logging.Logger] = None,
|
15
|
-
output_dir: Optional[str] = None,
|
16
|
-
cache_dir: str = "/tmp/lyrics-transcriber-cache/",
|
17
|
-
video_resolution: str = "360p",
|
18
|
-
video_background_image: Optional[str] = None,
|
19
|
-
video_background_color: str = "black",
|
20
49
|
):
|
50
|
+
"""
|
51
|
+
Initialize OutputGenerator with configuration.
|
52
|
+
|
53
|
+
Args:
|
54
|
+
config: OutputGeneratorConfig instance with required paths
|
55
|
+
logger: Optional logger instance
|
56
|
+
"""
|
57
|
+
self.config = config
|
21
58
|
self.logger = logger or logging.getLogger(__name__)
|
22
|
-
self.output_dir = output_dir
|
23
|
-
self.cache_dir = cache_dir
|
24
59
|
|
25
|
-
#
|
26
|
-
self.
|
27
|
-
self.
|
28
|
-
self.video_background_color = video_background_color
|
60
|
+
# Log the configured directories
|
61
|
+
self.logger.debug(f"Initialized OutputGenerator with output_dir: {self.config.output_dir}")
|
62
|
+
self.logger.debug(f"Using cache_dir: {self.config.cache_dir}")
|
29
63
|
|
30
64
|
# Set video resolution parameters
|
31
|
-
self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(video_resolution)
|
32
|
-
|
33
|
-
# Validate video background if provided
|
34
|
-
if self.video_background_image and not os.path.isfile(self.video_background_image):
|
35
|
-
raise FileNotFoundError(f"Video background image not found: {self.video_background_image}")
|
65
|
+
self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
|
36
66
|
|
37
67
|
def generate_outputs(
|
38
|
-
self,
|
39
|
-
|
40
|
-
|
41
|
-
|
68
|
+
self,
|
69
|
+
transcription_corrected: CorrectionResult,
|
70
|
+
lyrics_results: List[LyricsData],
|
71
|
+
output_prefix: str,
|
72
|
+
audio_filepath: str,
|
73
|
+
render_video: bool = False,
|
74
|
+
) -> OutputPaths:
|
75
|
+
"""Generate all requested output formats."""
|
76
|
+
outputs = OutputPaths()
|
42
77
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
78
|
+
try:
|
79
|
+
# Generate plain lyrics files for each provider
|
80
|
+
for lyrics_data in lyrics_results:
|
81
|
+
provider_name = lyrics_data.metadata.source.title()
|
82
|
+
self.write_plain_lyrics(lyrics_data, f"{output_prefix} (Lyrics {provider_name})")
|
48
83
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
outputs = {}
|
84
|
+
if transcription_corrected:
|
85
|
+
# Write corrected lyrics as plain text
|
86
|
+
self.write_plain_lyrics_from_correction(transcription_corrected, f"{output_prefix} (Lyrics Corrected)")
|
53
87
|
|
54
|
-
|
55
|
-
|
56
|
-
lrc_path = self.generate_lrc(transcription_data, output_prefix)
|
57
|
-
outputs["lrc"] = lrc_path
|
88
|
+
# Generate LRC
|
89
|
+
outputs.lrc = self.generate_lrc(transcription_corrected, output_prefix)
|
58
90
|
|
59
|
-
|
60
|
-
|
61
|
-
outputs["ass"] = ass_path
|
91
|
+
# Generate ASS
|
92
|
+
outputs.ass = self.generate_ass(transcription_corrected, output_prefix)
|
62
93
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
outputs["video"] = video_path
|
94
|
+
# Generate video if requested
|
95
|
+
if render_video:
|
96
|
+
outputs.video = self.generate_video(outputs.ass, audio_filepath, output_prefix)
|
67
97
|
|
68
98
|
except Exception as e:
|
69
99
|
self.logger.error(f"Error generating outputs: {str(e)}")
|
@@ -71,19 +101,17 @@ class OutputGenerator:
|
|
71
101
|
|
72
102
|
return outputs
|
73
103
|
|
74
|
-
def
|
104
|
+
def _get_output_path(self, output_prefix: str, extension: str) -> str:
|
105
|
+
"""Generate full output path for a file."""
|
106
|
+
return os.path.join(self.config.output_dir or self.config.cache_dir, f"{output_prefix}.{extension}")
|
107
|
+
|
108
|
+
def generate_lrc(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
|
75
109
|
"""Generate LRC format lyrics file."""
|
76
110
|
self.logger.info("Generating LRC format lyrics")
|
77
|
-
|
78
|
-
output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.lrc")
|
111
|
+
output_path = self._get_output_path(output_prefix, "lrc")
|
79
112
|
|
80
113
|
try:
|
81
|
-
|
82
|
-
for segment in transcription_data["segments"]:
|
83
|
-
start_time = self._format_lrc_timestamp(segment["start"])
|
84
|
-
line = f"[{start_time}]{segment['text']}\n"
|
85
|
-
f.write(line)
|
86
|
-
|
114
|
+
self._write_lrc_file(output_path, transcription_data.segments)
|
87
115
|
self.logger.info(f"LRC file generated: {output_path}")
|
88
116
|
return output_path
|
89
117
|
|
@@ -91,24 +119,21 @@ class OutputGenerator:
|
|
91
119
|
self.logger.error(f"Failed to generate LRC file: {str(e)}")
|
92
120
|
raise
|
93
121
|
|
94
|
-
def
|
122
|
+
def _write_lrc_file(self, output_path: str, segments: list) -> None:
|
123
|
+
"""Write LRC file content."""
|
124
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
125
|
+
for segment in segments:
|
126
|
+
start_time = self._format_lrc_timestamp(segment.start_time)
|
127
|
+
line = f"[{start_time}]{segment.text}\n"
|
128
|
+
f.write(line)
|
129
|
+
|
130
|
+
def generate_ass(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
|
95
131
|
"""Generate ASS format subtitles file."""
|
96
132
|
self.logger.info("Generating ASS format subtitles")
|
97
|
-
|
98
|
-
output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.ass")
|
133
|
+
output_path = self._get_output_path(output_prefix, "ass")
|
99
134
|
|
100
135
|
try:
|
101
|
-
|
102
|
-
# Write ASS header
|
103
|
-
f.write(self._get_ass_header())
|
104
|
-
|
105
|
-
# Write events
|
106
|
-
for segment in transcription_data["segments"]:
|
107
|
-
start_time = self._format_ass_timestamp(segment["start"])
|
108
|
-
end_time = self._format_ass_timestamp(segment["end"])
|
109
|
-
line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{segment['text']}\n"
|
110
|
-
f.write(line)
|
111
|
-
|
136
|
+
self._write_ass_file(output_path, transcription_data.segments)
|
112
137
|
self.logger.info(f"ASS file generated: {output_path}")
|
113
138
|
return output_path
|
114
139
|
|
@@ -116,51 +141,57 @@ class OutputGenerator:
|
|
116
141
|
self.logger.error(f"Failed to generate ASS file: {str(e)}")
|
117
142
|
raise
|
118
143
|
|
144
|
+
def _write_ass_file(self, output_path: str, segments: list) -> None:
|
145
|
+
"""Write ASS file content."""
|
146
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
147
|
+
f.write(self._get_ass_header())
|
148
|
+
for segment in segments:
|
149
|
+
# Change from ts/end_ts to start_time/end_time
|
150
|
+
start_time = self._format_ass_timestamp(segment.start_time)
|
151
|
+
end_time = self._format_ass_timestamp(segment.end_time)
|
152
|
+
line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{segment.text}\n"
|
153
|
+
f.write(line)
|
154
|
+
|
119
155
|
def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
|
120
156
|
"""Generate MP4 video with lyrics overlay."""
|
121
157
|
self.logger.info("Generating video with lyrics overlay")
|
122
|
-
|
123
|
-
output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.mp4")
|
124
|
-
width, height = self.video_resolution_num
|
158
|
+
output_path = self._get_output_path(output_prefix, "mp4")
|
125
159
|
|
126
160
|
try:
|
127
|
-
|
128
|
-
cmd
|
129
|
-
"ffmpeg",
|
130
|
-
"-y",
|
131
|
-
"-f",
|
132
|
-
"lavfi",
|
133
|
-
"-i",
|
134
|
-
f"color=c={self.video_background_color}:s={width}x{height}",
|
135
|
-
"-i",
|
136
|
-
audio_path,
|
137
|
-
"-vf",
|
138
|
-
f"ass={ass_path}",
|
139
|
-
"-c:v",
|
140
|
-
"libx264",
|
141
|
-
"-c:a",
|
142
|
-
"aac",
|
143
|
-
"-shortest",
|
144
|
-
output_path,
|
145
|
-
]
|
146
|
-
|
147
|
-
# If background image provided, use it instead of solid color
|
148
|
-
if self.video_background_image:
|
149
|
-
cmd[3:6] = ["-i", self.video_background_image]
|
150
|
-
|
151
|
-
self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
|
152
|
-
subprocess.run(cmd, check=True)
|
153
|
-
|
161
|
+
cmd = self._build_ffmpeg_command(ass_path, audio_path, output_path)
|
162
|
+
self._run_ffmpeg_command(cmd)
|
154
163
|
self.logger.info(f"Video generated: {output_path}")
|
155
164
|
return output_path
|
156
165
|
|
157
|
-
except subprocess.CalledProcessError as e:
|
158
|
-
self.logger.error(f"FFmpeg error: {str(e)}")
|
159
|
-
raise
|
160
166
|
except Exception as e:
|
161
167
|
self.logger.error(f"Failed to generate video: {str(e)}")
|
162
168
|
raise
|
163
169
|
|
170
|
+
def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> list:
|
171
|
+
"""Build FFmpeg command for video generation."""
|
172
|
+
width, height = self.video_resolution_num
|
173
|
+
cmd = ["ffmpeg", "-y"]
|
174
|
+
|
175
|
+
# Input source (background)
|
176
|
+
if self.config.video_background_image:
|
177
|
+
cmd.extend(["-i", self.config.video_background_image])
|
178
|
+
else:
|
179
|
+
cmd.extend(["-f", "lavfi", "-i", f"color=c={self.config.video_background_color}:s={width}x{height}"])
|
180
|
+
|
181
|
+
# Add audio and subtitle inputs
|
182
|
+
cmd.extend(["-i", audio_path, "-vf", f"ass={ass_path}", "-c:v", "libx264", "-c:a", "aac", "-shortest", output_path])
|
183
|
+
|
184
|
+
return cmd
|
185
|
+
|
186
|
+
def _run_ffmpeg_command(self, cmd: list) -> None:
|
187
|
+
"""Execute FFmpeg command."""
|
188
|
+
self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
|
189
|
+
try:
|
190
|
+
subprocess.run(cmd, check=True)
|
191
|
+
except subprocess.CalledProcessError as e:
|
192
|
+
self.logger.error(f"FFmpeg error: {str(e)}")
|
193
|
+
raise
|
194
|
+
|
164
195
|
def _get_video_params(self, resolution: str) -> tuple:
|
165
196
|
"""Get video parameters based on resolution setting."""
|
166
197
|
match resolution:
|
@@ -208,3 +239,33 @@ Style: Default,Arial,{self.font_size},&H00FFFFFF,&H000000FF,&H00000000,&H0000000
|
|
208
239
|
[Events]
|
209
240
|
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
210
241
|
"""
|
242
|
+
|
243
|
+
def write_plain_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
|
244
|
+
"""Write plain text lyrics file."""
|
245
|
+
self.logger.info("Writing plain lyrics file")
|
246
|
+
output_path = self._get_output_path(output_prefix, "txt")
|
247
|
+
|
248
|
+
try:
|
249
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
250
|
+
f.write(lyrics_data.lyrics)
|
251
|
+
self.logger.info(f"Plain lyrics file generated: {output_path}")
|
252
|
+
return output_path
|
253
|
+
|
254
|
+
except Exception as e:
|
255
|
+
self.logger.error(f"Failed to write plain lyrics file: {str(e)}")
|
256
|
+
raise
|
257
|
+
|
258
|
+
def write_plain_lyrics_from_correction(self, correction_result: CorrectionResult, output_prefix: str) -> str:
|
259
|
+
"""Write corrected lyrics as plain text file."""
|
260
|
+
self.logger.info("Writing corrected lyrics file")
|
261
|
+
output_path = self._get_output_path(output_prefix, "txt")
|
262
|
+
|
263
|
+
try:
|
264
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
265
|
+
f.write(correction_result.text)
|
266
|
+
self.logger.info(f"Corrected lyrics file generated: {output_path}")
|
267
|
+
return output_path
|
268
|
+
|
269
|
+
except Exception as e:
|
270
|
+
self.logger.error(f"Failed to write corrected lyrics file: {str(e)}")
|
271
|
+
raise
|
@@ -18,10 +18,10 @@ Functions for generating ASS subtitles from lyric data
|
|
18
18
|
class LyricSegmentIterator:
|
19
19
|
def __init__(self, lyrics_segments: List[str]):
|
20
20
|
self._segments = lyrics_segments
|
21
|
-
self._current_segment =
|
21
|
+
self._current_segment = 0
|
22
22
|
|
23
23
|
def __iter__(self):
|
24
|
-
self.
|
24
|
+
self._current_segment = 0
|
25
25
|
return self
|
26
26
|
|
27
27
|
def __next__(self):
|
@@ -49,17 +49,17 @@ class LyricSegment:
|
|
49
49
|
def to_ass(self) -> str:
|
50
50
|
"""Render this segment as part of an ASS event line"""
|
51
51
|
duration = (self.end_ts - self.ts).total_seconds() * 100
|
52
|
-
return
|
52
|
+
return rf"{{\kf{duration}}}{self.text}"
|
53
53
|
|
54
54
|
def to_dict(self) -> dict:
|
55
|
-
return {"text": self.text, "ts":
|
55
|
+
return {"text": self.text, "ts": self.ts.total_seconds(), "end_ts": self.end_ts.total_seconds() if self.end_ts else None}
|
56
56
|
|
57
57
|
@classmethod
|
58
58
|
def from_dict(cls, data: dict) -> "LyricSegment":
|
59
59
|
return cls(
|
60
60
|
text=data["text"],
|
61
|
-
ts=timedelta(seconds=
|
62
|
-
end_ts=timedelta(seconds=
|
61
|
+
ts=timedelta(seconds=data["ts"]),
|
62
|
+
end_ts=timedelta(seconds=data["end_ts"]) if data["end_ts"] is not None else None,
|
63
63
|
)
|
64
64
|
|
65
65
|
|
@@ -73,7 +73,7 @@ class LyricsLine:
|
|
73
73
|
|
74
74
|
@property
|
75
75
|
def end_ts(self) -> Optional[timedelta]:
|
76
|
-
return self.segments[-1].end_ts
|
76
|
+
return self.segments[-1].end_ts if self.segments else None
|
77
77
|
|
78
78
|
@ts.setter
|
79
79
|
def ts(self, value):
|
@@ -105,7 +105,7 @@ class LyricsLine:
|
|
105
105
|
"""Decorate line with karaoke tags"""
|
106
106
|
# Prefix the tag with centisecs prior to line in screen
|
107
107
|
start_time = (self.ts - screen_start_ts).total_seconds() * 100
|
108
|
-
line =
|
108
|
+
line = rf"{{\k{start_time}}}"
|
109
109
|
prev_end: Optional[timedelta] = None
|
110
110
|
for s in self.segments:
|
111
111
|
if prev_end is not None and prev_end < s.ts:
|
@@ -168,10 +168,10 @@ class LyricsScreen:
|
|
168
168
|
events = []
|
169
169
|
for i, line in enumerate(self.lines):
|
170
170
|
y_position = self.get_line_y(i)
|
171
|
-
|
171
|
+
|
172
172
|
# if self.logger:
|
173
173
|
# self.logger.debug(f"Creating ASS event for line {i + 1} at y-position: {y_position}")
|
174
|
-
|
174
|
+
|
175
175
|
event = line.as_ass_event(self.start_ts, self.end_ts, style, y_position)
|
176
176
|
events.append(event)
|
177
177
|
return events
|
@@ -188,12 +188,12 @@ class LyricsScreen:
|
|
188
188
|
return LyricsScreen(new_lines, start_ts)
|
189
189
|
|
190
190
|
def to_dict(self) -> dict:
|
191
|
-
return {"lines": [line.to_dict() for line in self.lines], "start_ts":
|
191
|
+
return {"lines": [line.to_dict() for line in self.lines], "start_ts": self.start_ts.total_seconds() if self.start_ts else None}
|
192
192
|
|
193
193
|
@classmethod
|
194
194
|
def from_dict(cls, data: dict) -> "LyricsScreen":
|
195
195
|
lines = [LyricsLine.from_dict(line_data) for line_data in data["lines"]]
|
196
|
-
start_ts = timedelta(seconds=
|
196
|
+
start_ts = timedelta(seconds=data["start_ts"]) if data["start_ts"] is not None else None
|
197
197
|
return cls(lines=lines, start_ts=start_ts)
|
198
198
|
|
199
199
|
|