lyrics-transcriber 0.30.1__py3-none-any.whl → 0.32.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. lyrics_transcriber/__init__.py +2 -1
  2. lyrics_transcriber/cli/cli_main.py +33 -12
  3. lyrics_transcriber/core/config.py +35 -0
  4. lyrics_transcriber/core/controller.py +85 -121
  5. lyrics_transcriber/correction/anchor_sequence.py +471 -0
  6. lyrics_transcriber/correction/corrector.py +237 -33
  7. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  8. lyrics_transcriber/correction/handlers/base.py +30 -0
  9. lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
  10. lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
  11. lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
  12. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
  13. lyrics_transcriber/correction/handlers/repeat.py +71 -0
  14. lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
  15. lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
  16. lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
  17. lyrics_transcriber/correction/handlers/word_operations.py +135 -0
  18. lyrics_transcriber/correction/phrase_analyzer.py +426 -0
  19. lyrics_transcriber/correction/text_utils.py +30 -0
  20. lyrics_transcriber/lyrics/base_lyrics_provider.py +5 -81
  21. lyrics_transcriber/lyrics/genius.py +5 -2
  22. lyrics_transcriber/lyrics/spotify.py +3 -3
  23. lyrics_transcriber/output/ass/__init__.py +21 -0
  24. lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
  25. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  26. lyrics_transcriber/output/ass/config.py +37 -0
  27. lyrics_transcriber/output/ass/constants.py +23 -0
  28. lyrics_transcriber/output/ass/event.py +94 -0
  29. lyrics_transcriber/output/ass/formatters.py +132 -0
  30. lyrics_transcriber/output/ass/lyrics_line.py +219 -0
  31. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  32. lyrics_transcriber/output/ass/section_detector.py +89 -0
  33. lyrics_transcriber/output/ass/section_screen.py +106 -0
  34. lyrics_transcriber/output/ass/style.py +187 -0
  35. lyrics_transcriber/output/cdg.py +503 -0
  36. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  37. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  38. lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
  39. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  40. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  41. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  42. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  43. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  44. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  45. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  46. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  47. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  48. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  49. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  50. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  51. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  52. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  53. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  54. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  55. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  56. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  57. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  58. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  59. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  60. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  61. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  62. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  63. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  64. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  65. lyrics_transcriber/output/generator.py +101 -193
  66. lyrics_transcriber/output/lyrics_file.py +102 -0
  67. lyrics_transcriber/output/plain_text.py +91 -0
  68. lyrics_transcriber/output/segment_resizer.py +416 -0
  69. lyrics_transcriber/output/subtitles.py +328 -302
  70. lyrics_transcriber/output/video.py +219 -0
  71. lyrics_transcriber/review/__init__.py +1 -0
  72. lyrics_transcriber/review/server.py +138 -0
  73. lyrics_transcriber/transcribers/audioshake.py +3 -2
  74. lyrics_transcriber/transcribers/base_transcriber.py +5 -42
  75. lyrics_transcriber/transcribers/whisper.py +3 -4
  76. lyrics_transcriber/types.py +454 -0
  77. {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/METADATA +14 -3
  78. lyrics_transcriber-0.32.2.dist-info/RECORD +86 -0
  79. {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/WHEEL +1 -1
  80. {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/entry_points.txt +1 -0
  81. lyrics_transcriber/correction/base_strategy.py +0 -29
  82. lyrics_transcriber/correction/strategy_diff.py +0 -263
  83. lyrics_transcriber-0.30.1.dist-info/RECORD +0 -25
  84. {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/LICENSE +0 -0
@@ -1,305 +1,331 @@
1
- from dataclasses import dataclass, field
2
- from datetime import timedelta
3
- from typing import Dict, List, Optional, Tuple
4
- import json
5
- import itertools
6
- from pathlib import Path
7
- from enum import IntEnum
1
+ import os
8
2
  import logging
3
+ from typing import List, Optional, Tuple, Union
4
+ import subprocess
5
+ import json
9
6
 
10
- from . import ass
11
-
12
-
13
- """
14
- Functions for generating ASS subtitles from lyric data
15
- """
16
-
17
-
18
- class LyricSegmentIterator:
19
- def __init__(self, lyrics_segments: List[str]):
20
- self._segments = lyrics_segments
21
- self._current_segment = 0
22
-
23
- def __iter__(self):
24
- self._current_segment = 0
25
- return self
26
-
27
- def __next__(self):
28
- if self._current_segment >= len(self._segments):
29
- raise StopIteration
30
- val = self._segments[self._current_segment]
31
- self._current_segment += 1
32
- return val
33
-
34
- def __len__(self):
35
- return len(self._segments)
36
-
37
-
38
- @dataclass
39
- class LyricSegment:
40
- text: str
41
- ts: timedelta
42
- end_ts: Optional[timedelta] = None
43
-
44
- def adjust_timestamps(self, adjustment) -> "LyricSegment":
45
- ts = self.ts + adjustment
46
- end_ts = self.end_ts + adjustment if self.end_ts else None
47
- return LyricSegment(self.text, ts, end_ts)
48
-
49
- def to_ass(self) -> str:
50
- """Render this segment as part of an ASS event line"""
51
- duration = (self.end_ts - self.ts).total_seconds() * 100
52
- return rf"{{\kf{duration}}}{self.text}"
53
-
54
- def to_dict(self) -> dict:
55
- return {"text": self.text, "ts": self.ts.total_seconds(), "end_ts": self.end_ts.total_seconds() if self.end_ts else None}
56
-
57
- @classmethod
58
- def from_dict(cls, data: dict) -> "LyricSegment":
59
- return cls(
60
- text=data["text"],
61
- ts=timedelta(seconds=data["ts"]),
62
- end_ts=timedelta(seconds=data["end_ts"]) if data["end_ts"] is not None else None,
63
- )
64
-
65
-
66
- @dataclass
67
- class LyricsLine:
68
- segments: List[LyricSegment] = field(default_factory=list)
69
-
70
- @property
71
- def ts(self) -> Optional[timedelta]:
72
- return self.segments[0].ts if len(self.segments) else None
73
-
74
- @property
75
- def end_ts(self) -> Optional[timedelta]:
76
- return self.segments[-1].end_ts if self.segments else None
77
-
78
- @ts.setter
79
- def ts(self, value):
80
- self.segments[0].ts = value
81
-
82
- @end_ts.setter
83
- def end_ts(self, value):
84
- self.segments[-1].end_ts = value
85
-
86
- def __str__(self):
87
- return "".join([f"{{{s.text}}}" for s in self.segments])
88
-
89
- def as_ass_event(self, screen_start: timedelta, screen_end: timedelta, style: ass.ASS.Style, y_position: int):
90
- e = ass.ASS.Event()
91
- e.type = "Dialogue"
92
- e.Layer = 0
93
- e.Style = style
94
- e.Start = screen_start.total_seconds()
95
- e.End = screen_end.total_seconds()
96
- e.MarginV = y_position
97
- e.Text = self.decorate_ass_line(self.segments, screen_start)
98
-
99
- # Set alignment to top-center
100
- e.Text = "{\\an8}" + e.Text
101
-
102
- return e
103
-
104
- def decorate_ass_line(self, segments, screen_start_ts: timedelta):
105
- """Decorate line with karaoke tags"""
106
- # Prefix the tag with centisecs prior to line in screen
107
- start_time = (self.ts - screen_start_ts).total_seconds() * 100
108
- line = rf"{{\k{start_time}}}"
109
- prev_end: Optional[timedelta] = None
110
- for s in self.segments:
111
- if prev_end is not None and prev_end < s.ts:
112
- blank_segment = LyricSegment("", prev_end, s.ts)
113
- line += blank_segment.to_ass()
114
- line += s.to_ass()
115
- prev_end = s.end_ts
116
-
117
- return line
118
-
119
- def adjust_timestamps(self, adjustment) -> "LyricsLine":
120
- new_segments = [s.adjust_timestamps(adjustment) for s in self.segments]
121
- start_ts = self.ts + adjustment if self.ts else None
122
- return LyricsLine(new_segments)
123
-
124
- def to_dict(self) -> dict:
125
- return {"segments": [segment.to_dict() for segment in self.segments]}
126
-
127
- @classmethod
128
- def from_dict(cls, data: dict) -> "LyricsLine":
129
- segments = [LyricSegment.from_dict(segment_data) for segment_data in data["segments"]]
130
- return cls(segments=segments)
131
-
132
-
133
- @dataclass
134
- class LyricsScreen:
135
- lines: List[LyricsLine] = field(default_factory=list)
136
- start_ts: Optional[timedelta] = None
137
- video_size: Tuple[int, int] = None
138
- line_height: int = None
139
- logger: logging.Logger = None
140
-
141
- @property
142
- def end_ts(self) -> timedelta:
143
- return self.lines[-1].end_ts
144
-
145
- def get_line_y(self, line_num: int) -> int:
146
- _, h = self.video_size
147
- line_count = len(self.lines)
148
- total_height = line_count * self.line_height
149
-
150
- # Calculate the top margin to center the lyrics block
151
- top_margin = (h - total_height) / 2
152
-
153
- # Calculate the y-position for this specific line
154
- line_y = top_margin + (line_num * self.line_height)
155
-
156
- # if self.logger:
157
- # self.logger.debug(f"Line {line_num + 1} positioning:")
158
- # self.logger.debug(f" Video height: {h}")
159
- # self.logger.debug(f" Total lines: {line_count}")
160
- # self.logger.debug(f" Line height: {self.line_height}")
161
- # self.logger.debug(f" Total lyrics height: {total_height}")
162
- # self.logger.debug(f" Top margin: {top_margin}")
163
- # self.logger.debug(f" Line y: {line_y}")
164
-
165
- return int(line_y)
166
-
167
- def as_ass_events(self, style: ass.ASS.Style) -> List[ass.ASS.Event]:
168
- events = []
169
- for i, line in enumerate(self.lines):
170
- y_position = self.get_line_y(i)
171
-
172
- # if self.logger:
173
- # self.logger.debug(f"Creating ASS event for line {i + 1} at y-position: {y_position}")
174
-
175
- event = line.as_ass_event(self.start_ts, self.end_ts, style, y_position)
176
- events.append(event)
177
- return events
178
-
179
- def __str__(self):
180
- lines = [f"{self.start_ts} - {self.end_ts}:"]
181
- for line in self.lines:
182
- lines.append(f"\t{line}")
183
- return "\n".join(lines)
184
-
185
- def adjust_timestamps(self, adjustment: timedelta) -> "LyricsScreen":
186
- new_lines = [l.adjust_timestamps(adjustment) for l in self.lines]
187
- start_ts = self.start_ts + adjustment if self.start_ts else None
188
- return LyricsScreen(new_lines, start_ts)
189
-
190
- def to_dict(self) -> dict:
191
- return {"lines": [line.to_dict() for line in self.lines], "start_ts": self.start_ts.total_seconds() if self.start_ts else None}
192
-
193
- @classmethod
194
- def from_dict(cls, data: dict) -> "LyricsScreen":
195
- lines = [LyricsLine.from_dict(line_data) for line_data in data["lines"]]
196
- start_ts = timedelta(seconds=data["start_ts"]) if data["start_ts"] is not None else None
197
- return cls(lines=lines, start_ts=start_ts)
198
-
199
-
200
- class LyricsObjectJSONEncoder(json.JSONEncoder):
201
- def default(self, o):
202
- if isinstance(o, (LyricSegment, LyricsLine, LyricsScreen)):
203
- return o.to_dict()
204
- return super().default(o)
205
-
206
-
207
- def set_segment_end_times(screens: List[LyricsScreen], song_duration_seconds: int) -> List[LyricsScreen]:
208
- """
209
- Infer end times of lines for screens where they are not already set.
210
- """
211
- segments = list(itertools.chain.from_iterable([l.segments for s in screens for l in s.lines]))
212
- for i, segment in enumerate(segments):
213
- if not segment.end_ts:
214
- if i == len(segments) - 1:
215
- segment.end_ts = timedelta(seconds=song_duration_seconds)
7
+ from lyrics_transcriber.output.ass.section_screen import SectionScreen
8
+ from lyrics_transcriber.types import LyricsSegment
9
+ from lyrics_transcriber.output.ass import LyricsScreen, LyricsLine
10
+ from lyrics_transcriber.output.ass.ass import ASS
11
+ from lyrics_transcriber.output.ass.style import Style
12
+ from lyrics_transcriber.output.ass.constants import ALIGN_TOP_CENTER
13
+ from lyrics_transcriber.output.ass import LyricsScreen
14
+ from lyrics_transcriber.output.ass.section_detector import SectionDetector
15
+ from lyrics_transcriber.output.ass.config import ScreenConfig
16
+
17
+
18
+ class SubtitlesGenerator:
19
+ """Handles generation of subtitle files in various formats."""
20
+
21
+ def __init__(
22
+ self,
23
+ output_dir: str,
24
+ video_resolution: Tuple[int, int],
25
+ font_size: int,
26
+ line_height: int,
27
+ styles: dict,
28
+ logger: Optional[logging.Logger] = None,
29
+ ):
30
+ """Initialize SubtitleGenerator.
31
+
32
+ Args:
33
+ output_dir: Directory where output files will be written
34
+ video_resolution: Tuple of (width, height) for video resolution
35
+ font_size: Font size for subtitles
36
+ line_height: Line height for subtitle positioning
37
+ logger: Optional logger instance
38
+ """
39
+ self.output_dir = output_dir
40
+ self.video_resolution = video_resolution
41
+ self.font_size = font_size
42
+ self.styles = styles
43
+ self.config = ScreenConfig(line_height=line_height, video_width=video_resolution[0], video_height=video_resolution[1])
44
+ self.logger = logger or logging.getLogger(__name__)
45
+
46
+ def _get_output_path(self, output_prefix: str, extension: str) -> str:
47
+ """Generate full output path for a file."""
48
+ return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
49
+
50
+ def _get_audio_duration(self, audio_filepath: str, segments: Optional[List[LyricsSegment]] = None) -> float:
51
+ """Get audio duration using ffprobe."""
52
+ try:
53
+ probe_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "json", audio_filepath]
54
+ probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
55
+ probe_data = json.loads(probe_output)
56
+ duration = float(probe_data["format"]["duration"])
57
+ self.logger.debug(f"Detected audio duration: {duration:.2f}s")
58
+ return duration
59
+ except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
60
+ self.logger.error(f"Failed to get audio duration: {e}")
61
+ # Fallback to last segment end time plus buffer
62
+ if segments:
63
+ duration = segments[-1].end_time + 30.0
64
+ self.logger.warning(f"Using fallback duration: {duration:.2f}s")
65
+ return duration
66
+ return 0.0
67
+
68
+ def generate_ass(self, segments: List[LyricsSegment], output_prefix: str, audio_filepath: str) -> str:
69
+ self.logger.info("Generating ASS format subtitles")
70
+ output_path = self._get_output_path(f"{output_prefix} (Karaoke)", "ass")
71
+
72
+ try:
73
+ self.logger.debug(f"Processing {len(segments)} segments")
74
+ song_duration = self._get_audio_duration(audio_filepath, segments)
75
+
76
+ screens = self._create_screens(segments, song_duration)
77
+ self.logger.debug(f"Created {len(screens)} initial screens")
78
+
79
+ lyric_subtitles_ass = self._create_styled_subtitles(screens, self.video_resolution, self.font_size)
80
+ self.logger.debug("Created styled subtitles")
81
+
82
+ lyric_subtitles_ass.write(output_path)
83
+ self.logger.info(f"ASS file generated: {output_path}")
84
+ return output_path
85
+
86
+ except Exception as e:
87
+ self.logger.error(f"Failed to generate ASS file: {str(e)}", exc_info=True)
88
+ raise
89
+
90
+ def _create_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[LyricsScreen]:
91
+ """Create screens from segments with detailed logging."""
92
+ self.logger.debug("Creating screens from segments")
93
+
94
+ # Create section screens and get instrumental boundaries
95
+ section_screens = self._create_section_screens(segments, song_duration)
96
+ instrumental_times = self._get_instrumental_times(section_screens)
97
+
98
+ # Create regular lyric screens
99
+ lyric_screens = self._create_lyric_screens(segments, instrumental_times)
100
+
101
+ # Merge and process all screens
102
+ all_screens = self._merge_and_process_screens(section_screens, lyric_screens)
103
+
104
+ # Log final results
105
+ self._log_final_screens(all_screens)
106
+
107
+ return all_screens
108
+
109
+ def _create_section_screens(self, segments: List[LyricsSegment], song_duration: float) -> List[SectionScreen]:
110
+ """Create section screens using SectionDetector."""
111
+ section_detector = SectionDetector(logger=self.logger)
112
+ return section_detector.process_segments(segments, self.video_resolution, self.config.line_height, song_duration)
113
+
114
+ def _get_instrumental_times(self, section_screens: List[SectionScreen]) -> List[Tuple[float, float]]:
115
+ """Extract instrumental section time boundaries."""
116
+ instrumental_times = [
117
+ (s.start_time, s.end_time) for s in section_screens if isinstance(s, SectionScreen) and s.section_type == "INSTRUMENTAL"
118
+ ]
119
+
120
+ self.logger.debug(f"Found {len(instrumental_times)} instrumental sections:")
121
+ for start, end in instrumental_times:
122
+ self.logger.debug(f" {start:.2f}s - {end:.2f}s")
123
+
124
+ return instrumental_times
125
+
126
+ def _create_lyric_screens(self, segments: List[LyricsSegment], instrumental_times: List[Tuple[float, float]]) -> List[LyricsScreen]:
127
+ """Create regular lyric screens, handling instrumental boundaries."""
128
+ screens: List[LyricsScreen] = []
129
+ current_screen: Optional[LyricsScreen] = None
130
+
131
+ for i, segment in enumerate(segments):
132
+ self.logger.debug(f"Processing segment {i}: {segment.start_time:.2f}s - {segment.end_time:.2f}s")
133
+
134
+ # Skip segments in instrumental sections
135
+ if self._is_in_instrumental_section(segment, instrumental_times):
136
+ continue
137
+
138
+ # Check if we need a new screen
139
+ if self._should_start_new_screen(current_screen, segment, instrumental_times):
140
+ # fmt: off
141
+ current_screen = LyricsScreen(
142
+ video_size=self.video_resolution,
143
+ line_height=self.config.line_height,
144
+ config=self.config,
145
+ logger=self.logger
146
+ )
147
+ # fmt: on
148
+ screens.append(current_screen)
149
+ self.logger.debug(" Created new screen")
150
+
151
+ # Add line to current screen
152
+ line = LyricsLine(logger=self.logger, segment=segment, screen_config=self.config)
153
+ current_screen.lines.append(line)
154
+ self.logger.debug(f" Added line to screen (now has {len(current_screen.lines)} lines)")
155
+
156
+ return screens
157
+
158
+ def _is_in_instrumental_section(self, segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]) -> bool:
159
+ """Check if a segment falls within any instrumental section."""
160
+ for inst_start, inst_end in instrumental_times:
161
+ if segment.start_time >= inst_start and segment.start_time < inst_end:
162
+ self.logger.debug(f" Skipping segment - falls within instrumental {inst_start:.2f}s - {inst_end:.2f}s")
163
+ return True
164
+ return False
165
+
166
+ def _should_start_new_screen(
167
+ self, current_screen: Optional[LyricsScreen], segment: LyricsSegment, instrumental_times: List[Tuple[float, float]]
168
+ ) -> bool:
169
+ """Determine if a new screen should be started."""
170
+ if current_screen is None:
171
+ return True
172
+
173
+ if len(current_screen.lines) >= self.config.max_visible_lines:
174
+ return True
175
+
176
+ # Check if this segment is first after any instrumental section
177
+ if current_screen.lines:
178
+ prev_segment = current_screen.lines[-1].segment
179
+ for inst_start, inst_end in instrumental_times:
180
+ if prev_segment.end_time <= inst_start and segment.start_time >= inst_end:
181
+ self.logger.debug(f" Forcing new screen - first segment after instrumental {inst_start:.2f}s - {inst_end:.2f}s")
182
+ return True
183
+
184
+ return False
185
+
186
+ def _merge_and_process_screens(
187
+ self, section_screens: List[SectionScreen], lyric_screens: List[LyricsScreen]
188
+ ) -> List[Union[SectionScreen, LyricsScreen]]:
189
+ """Merge section and lyric screens in chronological order."""
190
+ # Sort all screens by start time
191
+ return sorted(section_screens + lyric_screens, key=lambda s: s.start_ts)
192
+
193
+ def _log_final_screens(self, screens: List[Union[SectionScreen, LyricsScreen]]) -> None:
194
+ """Log details of all final screens."""
195
+ self.logger.debug("Final screens created:")
196
+ for i, screen in enumerate(screens):
197
+ self.logger.debug(f"Screen {i + 1}:")
198
+ if isinstance(screen, SectionScreen):
199
+ self.logger.debug(f" Section: {screen.section_type}")
200
+ self.logger.debug(f" Text: {screen.text}")
201
+ self.logger.debug(f" Time: {screen.start_time:.2f}s - {screen.end_time:.2f}s")
216
202
  else:
217
- next_segment = segments[i + 1]
218
- segment.end_ts = next_segment.ts
219
- return screens
220
-
221
-
222
- def set_screen_start_times(screens: List[LyricsScreen]) -> List[LyricsScreen]:
223
- """
224
- Set start times for screens to the end times of the previous screen.
225
- """
226
- prev_screen = None
227
- for screen in screens:
228
- if prev_screen is None:
229
- screen.start_ts = timedelta()
230
- else:
231
- screen.start_ts = prev_screen.end_ts + timedelta(seconds=0.1)
232
- prev_screen = screen
233
- return screens
234
-
235
-
236
- def create_styled_subtitles(
237
- lyric_screens: List[LyricsScreen],
238
- resolution,
239
- fontsize,
240
- ) -> ass.ASS:
241
- a = ass.ASS()
242
- a.set_resolution(resolution)
243
-
244
- a.styles_format = [
245
- "Name", # The name of the Style. Case sensitive. Cannot include commas.
246
- "Fontname", # The fontname as used by Windows. Case-sensitive.
247
- "Fontsize", # Font size
248
- "PrimaryColour", # This is the colour that a subtitle will normally appear in.
249
- "SecondaryColour", # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
250
- "OutlineColour", # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
251
- "BackColour", # This is the colour of the subtitle outline or shadow, if these are used
252
- "Bold", # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
253
- "Italic", # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
254
- "Underline", # [-1 or 0]
255
- "StrikeOut", # [-1 or 0]
256
- "ScaleX", # Modifies the width of the font. [percent]
257
- "ScaleY", # Modifies the height of the font. [percent]
258
- "Spacing", # Extra space between characters. [pixels]
259
- "Angle", # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
260
- "BorderStyle", # 1=Outline + drop shadow, 3=Opaque box
261
- "Outline", # If BorderStyle is 1, then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
262
- "Shadow", # If BorderStyle is 1, then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
263
- "Alignment", # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
264
- "MarginL", # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
265
- "MarginR", # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
266
- "MarginV", # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
267
- "Encoding", #
268
- ]
269
-
270
- style = ass.ASS.Style()
271
- style.type = "Style"
272
- style.Name = "Nomad"
273
- style.Fontname = "Avenir Next Bold"
274
- style.Fontsize = fontsize
275
-
276
- style.PrimaryColour = (112, 112, 247, 255)
277
- style.SecondaryColour = (255, 255, 255, 255)
278
- style.OutlineColour = (26, 58, 235, 255)
279
- style.BackColour = (0, 255, 0, 255) # (26, 58, 235, 255)
280
-
281
- style.Bold = False
282
- style.Italic = False
283
- style.Underline = False
284
- style.StrikeOut = False
285
-
286
- style.ScaleX = 100
287
- style.ScaleY = 100
288
- style.Spacing = 0
289
- style.Angle = 0.0
290
- style.BorderStyle = 1
291
- style.Outline = 1
292
- style.Shadow = 0
293
- style.Alignment = ass.ASS.ALIGN_TOP_CENTER
294
- style.MarginL = 0
295
- style.MarginR = 0
296
- style.MarginV = 0
297
- style.Encoding = 0
298
-
299
- a.add_style(style)
300
-
301
- a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]
302
- for screen in lyric_screens:
303
- [a.add(event) for event in screen.as_ass_events(style)]
304
-
305
- return a
203
+ self.logger.debug(f" Number of lines: {len(screen.lines)}")
204
+ for j, line in enumerate(screen.lines):
205
+ self.logger.debug(f" Line {j + 1} ({line.segment.start_time:.2f}s - {line.segment.end_time:.2f}s): {line}")
206
+
207
+ def _create_styled_ass_instance(self, resolution, fontsize):
208
+ a = ASS()
209
+ a.set_resolution(resolution)
210
+
211
+ a.styles_format = [
212
+ "Name", # The name of the Style. Case sensitive. Cannot include commas.
213
+ "Fontname", # The fontname as used by Windows. Case-sensitive.
214
+ "Fontpath", # The path to the font file.
215
+ "Fontsize", # Font size
216
+ "PrimaryColour", # This is the colour that a subtitle will normally appear in.
217
+ "SecondaryColour", # This colour may be used instead of the Primary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
218
+ "OutlineColour", # This colour may be used instead of the Primary or Secondary colour when a subtitle is automatically shifted to prevent an onscreen collsion, to distinguish the different subtitles.
219
+ "BackColour", # This is the colour of the subtitle outline or shadow, if these are used
220
+ "Bold", # This defines whether text is bold (true) or not (false). -1 is True, 0 is False
221
+ "Italic", # This defines whether text is italic (true) or not (false). -1 is True, 0 is False
222
+ "Underline", # [-1 or 0]
223
+ "StrikeOut", # [-1 or 0]
224
+ "ScaleX", # Modifies the width of the font. [percent]
225
+ "ScaleY", # Modifies the height of the font. [percent]
226
+ "Spacing", # Extra space between characters. [pixels]
227
+ "Angle", # The origin of the rotation is defined by the alignment. Can be a floating point number. [degrees]
228
+ "BorderStyle", # 1=Outline + drop shadow, 3=Opaque box
229
+ "Outline", # If BorderStyle is 1, then this specifies the width of the outline around the text, in pixels. Values may be 0, 1, 2, 3 or 4.
230
+ "Shadow", # If BorderStyle is 1, then this specifies the depth of the drop shadow behind the text, in pixels. Values may be 0, 1, 2, 3 or 4. Drop shadow is always used in addition to an outline - SSA will force an outline of 1 pixel if no outline width is given.
231
+ "Alignment", # This sets how text is "justified" within the Left/Right onscreen margins, and also the vertical placing. Values may be 1=Left, 2=Centered, 3=Right. Add 4 to the value for a "Toptitle". Add 8 to the value for a "Midtitle". eg. 5 = left-justified toptitle
232
+ "MarginL", # This defines the Left Margin in pixels. It is the distance from the left-hand edge of the screen.The three onscreen margins (MarginL, MarginR, MarginV) define areas in which the subtitle text will be displayed.
233
+ "MarginR", # This defines the Right Margin in pixels. It is the distance from the right-hand edge of the screen.
234
+ "MarginV", # MarginV. This defines the vertical Left Margin in pixels. For a subtitle, it is the distance from the bottom of the screen. For a toptitle, it is the distance from the top of the screen. For a midtitle, the value is ignored - the text will be vertically centred
235
+ "Encoding", #
236
+ ]
237
+
238
+ # Get font settings from styles
239
+ karaoke_styles = self.styles.get("karaoke", {})
240
+ font_path = karaoke_styles.get("font_path")
241
+
242
+ style = Style()
243
+
244
+ style.type = "Style"
245
+ style.Name = self.styles["karaoke"]["ass_name"]
246
+ style.Fontname = self.styles["karaoke"]["font"]
247
+ style.Fontpath = font_path
248
+ style.Fontsize = fontsize
249
+
250
+ style.Alignment = ALIGN_TOP_CENTER
251
+
252
+ # Convert color strings to tuples of integers
253
+ def parse_color(color_str):
254
+ return tuple(int(x.strip()) for x in color_str.split(","))
255
+
256
+ style.PrimaryColour = parse_color(self.styles["karaoke"]["primary_color"])
257
+ style.SecondaryColour = parse_color(self.styles["karaoke"]["secondary_color"])
258
+ style.OutlineColour = parse_color(self.styles["karaoke"]["outline_color"])
259
+ style.BackColour = parse_color(self.styles["karaoke"]["back_color"])
260
+
261
+ # Convert boolean strings to integers (-1 for True, 0 for False)
262
+ def parse_bool(value):
263
+ return -1 if value else 0
264
+
265
+ style.Bold = parse_bool(self.styles["karaoke"]["bold"])
266
+ style.Italic = parse_bool(self.styles["karaoke"]["italic"])
267
+ style.Underline = parse_bool(self.styles["karaoke"]["underline"])
268
+ style.StrikeOut = parse_bool(self.styles["karaoke"]["strike_out"])
269
+
270
+ # Convert numeric strings to appropriate types
271
+ style.ScaleX = int(self.styles["karaoke"]["scale_x"])
272
+ style.ScaleY = int(self.styles["karaoke"]["scale_y"])
273
+ style.Spacing = int(self.styles["karaoke"]["spacing"])
274
+ style.Angle = float(self.styles["karaoke"]["angle"])
275
+ style.BorderStyle = int(self.styles["karaoke"]["border_style"])
276
+ style.Outline = int(self.styles["karaoke"]["outline"])
277
+ style.Shadow = int(self.styles["karaoke"]["shadow"])
278
+ style.MarginL = int(self.styles["karaoke"]["margin_l"])
279
+ style.MarginR = int(self.styles["karaoke"]["margin_r"])
280
+ style.MarginV = int(self.styles["karaoke"]["margin_v"])
281
+ style.Encoding = int(self.styles["karaoke"]["encoding"])
282
+
283
+ a.add_style(style)
284
+
285
+ a.events_format = ["Layer", "Style", "Start", "End", "MarginV", "Text"]
286
+ return a, style
287
+
288
+ def _create_styled_subtitles(
289
+ self,
290
+ screens: List[Union[SectionScreen, LyricsScreen]],
291
+ resolution: Tuple[int, int],
292
+ fontsize: int,
293
+ ) -> ASS:
294
+ """Create styled ASS subtitles from all screens."""
295
+ ass_file, style = self._create_styled_ass_instance(resolution, fontsize)
296
+
297
+ active_lines = []
298
+ previous_instrumental_end = None
299
+
300
+ for screen in screens:
301
+ if isinstance(screen, SectionScreen):
302
+ # Create section marker events (returns tuple of ([event], []))
303
+ section_events, _ = screen.as_ass_events(style=style)
304
+ for event in section_events: # Now we're iterating over the list of events
305
+ ass_file.add(event)
306
+
307
+ previous_instrumental_end = screen.end_time
308
+ active_lines = []
309
+ self.logger.debug(f"Found instrumental section ending at {screen.end_time:.2f}s")
310
+ continue
311
+
312
+ # Process screen and get its events
313
+ self.logger.debug(f"Processing screen with instrumental_end={previous_instrumental_end}")
314
+ # fmt: off
315
+ events, active_lines = screen.as_ass_events(
316
+ style=style,
317
+ previous_active_lines=active_lines,
318
+ previous_instrumental_end=previous_instrumental_end
319
+ )
320
+ # fmt: on
321
+
322
+ # Only reset instrumental end after we've processed the first post-instrumental screen
323
+ if previous_instrumental_end is not None:
324
+ self.logger.debug("Clearing instrumental end time after processing post-instrumental screen")
325
+ previous_instrumental_end = None
326
+
327
+ # Add all events to ASS file
328
+ for event in events:
329
+ ass_file.add(event)
330
+
331
+ return ass_file