lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,42 @@
1
+ from dataclasses import dataclass
1
2
  import os
2
3
  import logging
3
- from typing import Dict, Any, Optional
4
+ from typing import Dict, Any, List, Optional, Tuple
4
5
  import subprocess
5
6
  from datetime import timedelta
7
+
8
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsData
6
9
  from .subtitles import create_styled_subtitles, LyricsScreen, LyricsLine, LyricSegment
10
+ from ..correction.corrector import CorrectionResult
11
+
12
+
13
+ @dataclass
14
+ class OutputGeneratorConfig:
15
+ """Configuration for output generation."""
16
+
17
+ output_dir: str
18
+ cache_dir: str
19
+ video_resolution: str = "360p"
20
+ video_background_image: Optional[str] = None
21
+ video_background_color: str = "black"
22
+
23
+ def __post_init__(self):
24
+ """Validate configuration after initialization."""
25
+ if not self.output_dir:
26
+ raise ValueError("output_dir must be provided")
27
+ if not self.cache_dir:
28
+ raise ValueError("cache_dir must be provided")
29
+ if self.video_background_image and not os.path.isfile(self.video_background_image):
30
+ raise FileNotFoundError(f"Video background image not found: {self.video_background_image}")
31
+
32
+
33
+ @dataclass
34
+ class OutputPaths:
35
+ """Holds paths for generated output files."""
36
+
37
+ lrc: Optional[str] = None
38
+ ass: Optional[str] = None
39
+ video: Optional[str] = None
7
40
 
8
41
 
9
42
  class OutputGenerator:
@@ -11,59 +44,56 @@ class OutputGenerator:
11
44
 
12
45
  def __init__(
13
46
  self,
47
+ config: OutputGeneratorConfig,
14
48
  logger: Optional[logging.Logger] = None,
15
- output_dir: Optional[str] = None,
16
- cache_dir: str = "/tmp/lyrics-transcriber-cache/",
17
- video_resolution: str = "360p",
18
- video_background_image: Optional[str] = None,
19
- video_background_color: str = "black",
20
49
  ):
50
+ """
51
+ Initialize OutputGenerator with configuration.
52
+
53
+ Args:
54
+ config: OutputGeneratorConfig instance with required paths
55
+ logger: Optional logger instance
56
+ """
57
+ self.config = config
21
58
  self.logger = logger or logging.getLogger(__name__)
22
- self.output_dir = output_dir
23
- self.cache_dir = cache_dir
24
59
 
25
- # Video settings
26
- self.video_resolution = video_resolution
27
- self.video_background_image = video_background_image
28
- self.video_background_color = video_background_color
60
+ # Log the configured directories
61
+ self.logger.debug(f"Initialized OutputGenerator with output_dir: {self.config.output_dir}")
62
+ self.logger.debug(f"Using cache_dir: {self.config.cache_dir}")
29
63
 
30
64
  # Set video resolution parameters
31
- self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(video_resolution)
32
-
33
- # Validate video background if provided
34
- if self.video_background_image and not os.path.isfile(self.video_background_image):
35
- raise FileNotFoundError(f"Video background image not found: {self.video_background_image}")
65
+ self.video_resolution_num, self.font_size, self.line_height = self._get_video_params(self.config.video_resolution)
36
66
 
37
67
  def generate_outputs(
38
- self, transcription_data: Dict[str, Any], output_prefix: str, audio_filepath: str, render_video: bool = False
39
- ) -> Dict[str, str]:
40
- """
41
- Generate all requested output formats.
68
+ self,
69
+ transcription_corrected: CorrectionResult,
70
+ lyrics_results: List[LyricsData],
71
+ output_prefix: str,
72
+ audio_filepath: str,
73
+ render_video: bool = False,
74
+ ) -> OutputPaths:
75
+ """Generate all requested output formats."""
76
+ outputs = OutputPaths()
42
77
 
43
- Args:
44
- transcription_data: Dictionary containing transcription segments with timing
45
- output_prefix: Prefix for output filenames
46
- audio_filepath: Path to the source audio file
47
- render_video: Whether to generate video output
78
+ try:
79
+ # Generate plain lyrics files for each provider
80
+ for lyrics_data in lyrics_results:
81
+ provider_name = lyrics_data.metadata.source.title()
82
+ self.write_plain_lyrics(lyrics_data, f"{output_prefix} (Lyrics {provider_name})")
48
83
 
49
- Returns:
50
- Dictionary of output paths for each format
51
- """
52
- outputs = {}
84
+ if transcription_corrected:
85
+ # Write corrected lyrics as plain text
86
+ self.write_plain_lyrics_from_correction(transcription_corrected, f"{output_prefix} (Lyrics Corrected)")
53
87
 
54
- try:
55
- # Generate LRC
56
- lrc_path = self.generate_lrc(transcription_data, output_prefix)
57
- outputs["lrc"] = lrc_path
88
+ # Generate LRC
89
+ outputs.lrc = self.generate_lrc(transcription_corrected, output_prefix)
58
90
 
59
- # Generate ASS
60
- ass_path = self.generate_ass(transcription_data, output_prefix)
61
- outputs["ass"] = ass_path
91
+ # Generate ASS
92
+ outputs.ass = self.generate_ass(transcription_corrected, output_prefix)
62
93
 
63
- # Generate video if requested
64
- if render_video:
65
- video_path = self.generate_video(ass_path, audio_filepath, output_prefix)
66
- outputs["video"] = video_path
94
+ # Generate video if requested
95
+ if render_video:
96
+ outputs.video = self.generate_video(outputs.ass, audio_filepath, output_prefix)
67
97
 
68
98
  except Exception as e:
69
99
  self.logger.error(f"Error generating outputs: {str(e)}")
@@ -71,19 +101,17 @@ class OutputGenerator:
71
101
 
72
102
  return outputs
73
103
 
74
- def generate_lrc(self, transcription_data: Dict[str, Any], output_prefix: str) -> str:
104
+ def _get_output_path(self, output_prefix: str, extension: str) -> str:
105
+ """Generate full output path for a file."""
106
+ return os.path.join(self.config.output_dir or self.config.cache_dir, f"{output_prefix}.{extension}")
107
+
108
+ def generate_lrc(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
75
109
  """Generate LRC format lyrics file."""
76
110
  self.logger.info("Generating LRC format lyrics")
77
-
78
- output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.lrc")
111
+ output_path = self._get_output_path(output_prefix, "lrc")
79
112
 
80
113
  try:
81
- with open(output_path, "w", encoding="utf-8") as f:
82
- for segment in transcription_data["segments"]:
83
- start_time = self._format_lrc_timestamp(segment["start"])
84
- line = f"[{start_time}]{segment['text']}\n"
85
- f.write(line)
86
-
114
+ self._write_lrc_file(output_path, transcription_data.segments)
87
115
  self.logger.info(f"LRC file generated: {output_path}")
88
116
  return output_path
89
117
 
@@ -91,24 +119,21 @@ class OutputGenerator:
91
119
  self.logger.error(f"Failed to generate LRC file: {str(e)}")
92
120
  raise
93
121
 
94
- def generate_ass(self, transcription_data: Dict[str, Any], output_prefix: str) -> str:
122
+ def _write_lrc_file(self, output_path: str, segments: list) -> None:
123
+ """Write LRC file content."""
124
+ with open(output_path, "w", encoding="utf-8") as f:
125
+ for segment in segments:
126
+ start_time = self._format_lrc_timestamp(segment.start_time)
127
+ line = f"[{start_time}]{segment.text}\n"
128
+ f.write(line)
129
+
130
+ def generate_ass(self, transcription_data: CorrectionResult, output_prefix: str) -> str:
95
131
  """Generate ASS format subtitles file."""
96
132
  self.logger.info("Generating ASS format subtitles")
97
-
98
- output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.ass")
133
+ output_path = self._get_output_path(output_prefix, "ass")
99
134
 
100
135
  try:
101
- with open(output_path, "w", encoding="utf-8") as f:
102
- # Write ASS header
103
- f.write(self._get_ass_header())
104
-
105
- # Write events
106
- for segment in transcription_data["segments"]:
107
- start_time = self._format_ass_timestamp(segment["start"])
108
- end_time = self._format_ass_timestamp(segment["end"])
109
- line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{segment['text']}\n"
110
- f.write(line)
111
-
136
+ self._write_ass_file(output_path, transcription_data.segments)
112
137
  self.logger.info(f"ASS file generated: {output_path}")
113
138
  return output_path
114
139
 
@@ -116,51 +141,57 @@ class OutputGenerator:
116
141
  self.logger.error(f"Failed to generate ASS file: {str(e)}")
117
142
  raise
118
143
 
144
+ def _write_ass_file(self, output_path: str, segments: list) -> None:
145
+ """Write ASS file content."""
146
+ with open(output_path, "w", encoding="utf-8") as f:
147
+ f.write(self._get_ass_header())
148
+ for segment in segments:
149
+ # Change from ts/end_ts to start_time/end_time
150
+ start_time = self._format_ass_timestamp(segment.start_time)
151
+ end_time = self._format_ass_timestamp(segment.end_time)
152
+ line = f"Dialogue: 0,{start_time},{end_time},Default,,0,0,0,,{segment.text}\n"
153
+ f.write(line)
154
+
119
155
  def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
120
156
  """Generate MP4 video with lyrics overlay."""
121
157
  self.logger.info("Generating video with lyrics overlay")
122
-
123
- output_path = os.path.join(self.output_dir or self.cache_dir, f"{output_prefix}.mp4")
124
- width, height = self.video_resolution_num
158
+ output_path = self._get_output_path(output_prefix, "mp4")
125
159
 
126
160
  try:
127
- # Prepare FFmpeg command
128
- cmd = [
129
- "ffmpeg",
130
- "-y",
131
- "-f",
132
- "lavfi",
133
- "-i",
134
- f"color=c={self.video_background_color}:s={width}x{height}",
135
- "-i",
136
- audio_path,
137
- "-vf",
138
- f"ass={ass_path}",
139
- "-c:v",
140
- "libx264",
141
- "-c:a",
142
- "aac",
143
- "-shortest",
144
- output_path,
145
- ]
146
-
147
- # If background image provided, use it instead of solid color
148
- if self.video_background_image:
149
- cmd[3:6] = ["-i", self.video_background_image]
150
-
151
- self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
152
- subprocess.run(cmd, check=True)
153
-
161
+ cmd = self._build_ffmpeg_command(ass_path, audio_path, output_path)
162
+ self._run_ffmpeg_command(cmd)
154
163
  self.logger.info(f"Video generated: {output_path}")
155
164
  return output_path
156
165
 
157
- except subprocess.CalledProcessError as e:
158
- self.logger.error(f"FFmpeg error: {str(e)}")
159
- raise
160
166
  except Exception as e:
161
167
  self.logger.error(f"Failed to generate video: {str(e)}")
162
168
  raise
163
169
 
170
+ def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> list:
171
+ """Build FFmpeg command for video generation."""
172
+ width, height = self.video_resolution_num
173
+ cmd = ["ffmpeg", "-y"]
174
+
175
+ # Input source (background)
176
+ if self.config.video_background_image:
177
+ cmd.extend(["-i", self.config.video_background_image])
178
+ else:
179
+ cmd.extend(["-f", "lavfi", "-i", f"color=c={self.config.video_background_color}:s={width}x{height}"])
180
+
181
+ # Add audio and subtitle inputs
182
+ cmd.extend(["-i", audio_path, "-vf", f"ass={ass_path}", "-c:v", "libx264", "-c:a", "aac", "-shortest", output_path])
183
+
184
+ return cmd
185
+
186
+ def _run_ffmpeg_command(self, cmd: list) -> None:
187
+ """Execute FFmpeg command."""
188
+ self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
189
+ try:
190
+ subprocess.run(cmd, check=True)
191
+ except subprocess.CalledProcessError as e:
192
+ self.logger.error(f"FFmpeg error: {str(e)}")
193
+ raise
194
+
164
195
  def _get_video_params(self, resolution: str) -> tuple:
165
196
  """Get video parameters based on resolution setting."""
166
197
  match resolution:
@@ -208,3 +239,33 @@ Style: Default,Arial,{self.font_size},&H00FFFFFF,&H000000FF,&H00000000,&H0000000
208
239
  [Events]
209
240
  Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
210
241
  """
242
+
243
+ def write_plain_lyrics(self, lyrics_data: LyricsData, output_prefix: str) -> str:
244
+ """Write plain text lyrics file."""
245
+ self.logger.info("Writing plain lyrics file")
246
+ output_path = self._get_output_path(output_prefix, "txt")
247
+
248
+ try:
249
+ with open(output_path, "w", encoding="utf-8") as f:
250
+ f.write(lyrics_data.lyrics)
251
+ self.logger.info(f"Plain lyrics file generated: {output_path}")
252
+ return output_path
253
+
254
+ except Exception as e:
255
+ self.logger.error(f"Failed to write plain lyrics file: {str(e)}")
256
+ raise
257
+
258
+ def write_plain_lyrics_from_correction(self, correction_result: CorrectionResult, output_prefix: str) -> str:
259
+ """Write corrected lyrics as plain text file."""
260
+ self.logger.info("Writing corrected lyrics file")
261
+ output_path = self._get_output_path(output_prefix, "txt")
262
+
263
+ try:
264
+ with open(output_path, "w", encoding="utf-8") as f:
265
+ f.write(correction_result.text)
266
+ self.logger.info(f"Corrected lyrics file generated: {output_path}")
267
+ return output_path
268
+
269
+ except Exception as e:
270
+ self.logger.error(f"Failed to write corrected lyrics file: {str(e)}")
271
+ raise
@@ -18,10 +18,10 @@ Functions for generating ASS subtitles from lyric data
18
18
  class LyricSegmentIterator:
19
19
  def __init__(self, lyrics_segments: List[str]):
20
20
  self._segments = lyrics_segments
21
- self._current_segment = None
21
+ self._current_segment = 0
22
22
 
23
23
  def __iter__(self):
24
- self._current_sement = 0
24
+ self._current_segment = 0
25
25
  return self
26
26
 
27
27
  def __next__(self):
@@ -49,17 +49,17 @@ class LyricSegment:
49
49
  def to_ass(self) -> str:
50
50
  """Render this segment as part of an ASS event line"""
51
51
  duration = (self.end_ts - self.ts).total_seconds() * 100
52
- return f"{{\kf{duration}}}{self.text}"
52
+ return rf"{{\kf{duration}}}{self.text}"
53
53
 
54
54
  def to_dict(self) -> dict:
55
- return {"text": self.text, "ts": str(self.ts), "end_ts": str(self.end_ts) if self.end_ts else None}
55
+ return {"text": self.text, "ts": self.ts.total_seconds(), "end_ts": self.end_ts.total_seconds() if self.end_ts else None}
56
56
 
57
57
  @classmethod
58
58
  def from_dict(cls, data: dict) -> "LyricSegment":
59
59
  return cls(
60
60
  text=data["text"],
61
- ts=timedelta(seconds=float(data["ts"])),
62
- end_ts=timedelta(seconds=float(data["end_ts"])) if data["end_ts"] else None,
61
+ ts=timedelta(seconds=data["ts"]),
62
+ end_ts=timedelta(seconds=data["end_ts"]) if data["end_ts"] is not None else None,
63
63
  )
64
64
 
65
65
 
@@ -73,7 +73,7 @@ class LyricsLine:
73
73
 
74
74
  @property
75
75
  def end_ts(self) -> Optional[timedelta]:
76
- return self.segments[-1].end_ts
76
+ return self.segments[-1].end_ts if self.segments else None
77
77
 
78
78
  @ts.setter
79
79
  def ts(self, value):
@@ -105,7 +105,7 @@ class LyricsLine:
105
105
  """Decorate line with karaoke tags"""
106
106
  # Prefix the tag with centisecs prior to line in screen
107
107
  start_time = (self.ts - screen_start_ts).total_seconds() * 100
108
- line = f"{{\k{start_time}}}"
108
+ line = rf"{{\k{start_time}}}"
109
109
  prev_end: Optional[timedelta] = None
110
110
  for s in self.segments:
111
111
  if prev_end is not None and prev_end < s.ts:
@@ -168,10 +168,10 @@ class LyricsScreen:
168
168
  events = []
169
169
  for i, line in enumerate(self.lines):
170
170
  y_position = self.get_line_y(i)
171
-
171
+
172
172
  # if self.logger:
173
173
  # self.logger.debug(f"Creating ASS event for line {i + 1} at y-position: {y_position}")
174
-
174
+
175
175
  event = line.as_ass_event(self.start_ts, self.end_ts, style, y_position)
176
176
  events.append(event)
177
177
  return events
@@ -188,12 +188,12 @@ class LyricsScreen:
188
188
  return LyricsScreen(new_lines, start_ts)
189
189
 
190
190
  def to_dict(self) -> dict:
191
- return {"lines": [line.to_dict() for line in self.lines], "start_ts": str(self.start_ts) if self.start_ts else None}
191
+ return {"lines": [line.to_dict() for line in self.lines], "start_ts": self.start_ts.total_seconds() if self.start_ts else None}
192
192
 
193
193
  @classmethod
194
194
  def from_dict(cls, data: dict) -> "LyricsScreen":
195
195
  lines = [LyricsLine.from_dict(line_data) for line_data in data["lines"]]
196
- start_ts = timedelta(seconds=float(data["start_ts"])) if data["start_ts"] else None
196
+ start_ts = timedelta(seconds=data["start_ts"]) if data["start_ts"] is not None else None
197
197
  return cls(lines=lines, start_ts=start_ts)
198
198
 
199
199