lyrics-transcriber 0.33.0__tar.gz → 0.34.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/PKG-INFO +1 -1
  2. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/core/controller.py +10 -6
  3. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/corrector.py +9 -1
  4. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/lyrics/base_lyrics_provider.py +28 -7
  5. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/lyrics/genius.py +33 -6
  6. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/generator.py +32 -30
  7. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/pyproject.toml +1 -1
  8. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/LICENSE +0 -0
  9. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/README.md +0 -0
  10. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/__init__.py +0 -0
  11. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/cli/__init__.py +0 -0
  12. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/cli/cli_main.py +0 -0
  13. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/core/__init__.py +0 -0
  14. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/core/config.py +0 -0
  15. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/anchor_sequence.py +0 -0
  16. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/__init__.py +0 -0
  17. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/base.py +0 -0
  18. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/extend_anchor.py +0 -0
  19. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/levenshtein.py +0 -0
  20. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/no_space_punct_match.py +0 -0
  21. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +0 -0
  22. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/repeat.py +0 -0
  23. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/sound_alike.py +0 -0
  24. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/syllables_match.py +0 -0
  25. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/word_count_match.py +0 -0
  26. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/handlers/word_operations.py +0 -0
  27. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/phrase_analyzer.py +0 -0
  28. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/correction/text_utils.py +0 -0
  29. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/lyrics/spotify.py +0 -0
  30. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/__init__.py +0 -0
  31. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/__init__.py +0 -0
  32. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/ass.py +0 -0
  33. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/ass_specs.txt +0 -0
  34. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/config.py +0 -0
  35. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/constants.py +0 -0
  36. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/event.py +0 -0
  37. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/formatters.py +0 -0
  38. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/lyrics_line.py +0 -0
  39. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/lyrics_screen.py +0 -0
  40. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/section_detector.py +0 -0
  41. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/section_screen.py +0 -0
  42. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/ass/style.py +0 -0
  43. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdg.py +0 -0
  44. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  45. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/cdg.py +0 -0
  46. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/composer.py +0 -0
  47. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/config.py +0 -0
  48. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  49. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  50. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/pack.py +0 -0
  51. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/render.py +0 -0
  52. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  53. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  54. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  55. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  56. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  57. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  58. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  59. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  60. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  61. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  62. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  63. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  64. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/cdgmaker/utils.py +0 -0
  65. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  66. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  67. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  68. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  69. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  70. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/arial.ttf +0 -0
  71. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  72. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  73. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/lyrics_file.py +0 -0
  74. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/plain_text.py +0 -0
  75. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/segment_resizer.py +0 -0
  76. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/subtitles.py +0 -0
  77. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/output/video.py +0 -0
  78. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/review/__init__.py +0 -0
  79. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/review/server.py +0 -0
  80. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/storage/__init__.py +0 -0
  81. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/storage/dropbox.py +0 -0
  82. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/transcribers/audioshake.py +0 -0
  83. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/transcribers/base_transcriber.py +0 -0
  84. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/transcribers/whisper.py +0 -0
  85. {lyrics_transcriber-0.33.0 → lyrics_transcriber-0.34.0}/lyrics_transcriber/types.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lyrics-transcriber
3
- Version: 0.33.0
3
+ Version: 0.34.0
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  License: MIT
6
6
  Author: Andrew Beveridge
@@ -1,10 +1,9 @@
1
1
  import difflib
2
- import json
3
2
  import os
4
3
  import logging
5
4
  from dataclasses import dataclass, field
6
5
  from typing import Dict, Optional, List
7
- from lyrics_transcriber.types import LyricsData, PhraseType, TranscriptionResult, CorrectionResult, AnchorSequence, GapSequence, PhraseScore
6
+ from lyrics_transcriber.types import LyricsData, TranscriptionResult, CorrectionResult
8
7
  from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber
9
8
  from lyrics_transcriber.transcribers.audioshake import AudioShakeTranscriber, AudioShakeConfig
10
9
  from lyrics_transcriber.transcribers.whisper import WhisperTranscriber, WhisperConfig
@@ -205,12 +204,17 @@ class LyricsTranscriber:
205
204
  if self.output_config.run_transcription:
206
205
  self.transcribe()
207
206
 
208
- # Step 3: Process and correct lyrics if enabled
209
- if self.output_config.run_correction:
207
+ # Step 3: Process and correct lyrics if enabled AND we have transcription results
208
+ if self.output_config.run_correction and self.results.transcription_results:
210
209
  self.correct_lyrics()
210
+ elif self.output_config.run_correction:
211
+ self.logger.info("Skipping lyrics correction - no transcription results available")
211
212
 
212
- # Step 4: Generate outputs based on what's enabled and available
213
- self.generate_outputs()
213
+ # Step 4: Generate outputs based on what we have
214
+ if self.results.transcription_corrected or self.results.lyrics_results:
215
+ self.generate_outputs()
216
+ else:
217
+ self.logger.warning("No corrected transcription or lyrics available. Skipping output generation.")
214
218
 
215
219
  self.logger.info("Processing completed successfully")
216
220
  return self.results
@@ -28,7 +28,8 @@ class LyricsCorrector:
28
28
  logger: Optional[logging.Logger] = None,
29
29
  ):
30
30
  self.logger = logger or logging.getLogger(__name__)
31
- self.anchor_finder = anchor_finder or AnchorSequenceFinder(cache_dir=cache_dir, logger=self.logger)
31
+ self._anchor_finder = anchor_finder
32
+ self._cache_dir = cache_dir
32
33
 
33
34
  # Default handlers in order of preference
34
35
  self.handlers = handlers or [
@@ -42,6 +43,13 @@ class LyricsCorrector:
42
43
  LevenshteinHandler(),
43
44
  ]
44
45
 
46
+ @property
47
+ def anchor_finder(self) -> AnchorSequenceFinder:
48
+ """Lazy load the anchor finder instance, initializing it if not already set."""
49
+ if self._anchor_finder is None:
50
+ self._anchor_finder = AnchorSequenceFinder(cache_dir=self._cache_dir, logger=self.logger)
51
+ return self._anchor_finder
52
+
45
53
  def run(self, transcription_results: List[TranscriptionResult], lyrics_results: List[LyricsData]) -> CorrectionResult:
46
54
  """Execute the correction process."""
47
55
  if not transcription_results:
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
  import os
8
8
  from abc import ABC, abstractmethod
9
9
  from lyrics_transcriber.types import LyricsData
10
+ from karaoke_lyrics_processor import KaraokeLyricsProcessor
10
11
 
11
12
 
12
13
  @dataclass
@@ -17,6 +18,7 @@ class LyricsProviderConfig:
17
18
  spotify_cookie: Optional[str] = None
18
19
  cache_dir: Optional[str] = None
19
20
  audio_filepath: Optional[str] = None
21
+ max_line_length: int = 36 # New config parameter for KaraokeLyricsProcessor
20
22
 
21
23
 
22
24
  class BaseLyricsProvider(ABC):
@@ -26,6 +28,7 @@ class BaseLyricsProvider(ABC):
26
28
  self.logger = logger or logging.getLogger(__name__)
27
29
  self.cache_dir = Path(config.cache_dir) if config.cache_dir else None
28
30
  self.audio_filepath = config.audio_filepath
31
+ self.max_line_length = config.max_line_length
29
32
  if self.cache_dir:
30
33
  self.cache_dir.mkdir(parents=True, exist_ok=True)
31
34
  self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
@@ -35,21 +38,22 @@ class BaseLyricsProvider(ABC):
35
38
  if not self.cache_dir:
36
39
  return self._fetch_and_convert_result(artist, title)
37
40
 
38
- file_hash = self._get_file_hash(self.audio_filepath)
39
- raw_cache_path = self._get_cache_path(file_hash, "raw")
41
+ # Use artist and title for cache key instead of audio file hash
42
+ cache_key = self._get_artist_title_hash(artist, title)
43
+ raw_cache_path = self._get_cache_path(cache_key, "raw")
40
44
 
41
45
  # Try to load from cache first
42
46
  raw_data = self._load_from_cache(raw_cache_path)
43
47
  if raw_data is not None:
44
48
  self.logger.info(f"Using cached lyrics for {artist} - {title}")
45
- return self._save_and_convert_result(file_hash, raw_data)
49
+ return self._save_and_convert_result(cache_key, raw_data)
46
50
 
47
51
  # If not in cache, fetch from source
48
52
  raw_result = self._fetch_data_from_source(artist, title)
49
53
  if raw_result:
50
54
  # Save raw API response
51
55
  self._save_to_cache(raw_cache_path, raw_result)
52
- return self._save_and_convert_result(file_hash, raw_result)
56
+ return self._save_and_convert_result(cache_key, raw_result)
53
57
 
54
58
  return None
55
59
 
@@ -95,13 +99,30 @@ class BaseLyricsProvider(ABC):
95
99
  self.logger.warning(f"Cache file {cache_path} is corrupted")
96
100
  return None
97
101
 
102
+ def _process_lyrics(self, lyrics_data: LyricsData) -> LyricsData:
103
+ """Process lyrics using KaraokeLyricsProcessor."""
104
+ processor = KaraokeLyricsProcessor(
105
+ log_level=self.logger.getEffectiveLevel(),
106
+ log_formatter=self.logger.handlers[0].formatter if self.logger.handlers else None,
107
+ input_lyrics_text=lyrics_data.lyrics,
108
+ max_line_length=self.max_line_length,
109
+ )
110
+ processed_text = processor.process()
111
+
112
+ # Create new LyricsData with processed text
113
+ return LyricsData(source=lyrics_data.source, lyrics=processed_text, segments=lyrics_data.segments, metadata=lyrics_data.metadata)
114
+
98
115
  def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
99
- """Convert raw result to standardized format, save to cache, and return."""
116
+ """Convert raw result to standardized format, process lyrics, save to cache, and return."""
100
117
  converted_cache_path = self._get_cache_path(cache_key, "converted")
101
118
  converted_result = self._convert_result_format(raw_data)
119
+
120
+ # Process the lyrics
121
+ processed_result = self._process_lyrics(converted_result)
122
+
102
123
  # Convert to dictionary before saving to cache
103
- self._save_to_cache(converted_cache_path, converted_result.to_dict())
104
- return converted_result
124
+ self._save_to_cache(converted_cache_path, processed_result.to_dict())
125
+ return processed_result
105
126
 
106
127
  def _fetch_and_convert_result(self, artist: str, title: str) -> Optional[LyricsData]:
107
128
  """Fetch and convert result when caching is disabled."""
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import re
2
3
  from typing import Optional, Dict, Any
3
4
  import lyricsgenius
4
5
  from lyrics_transcriber.types import LyricsData, LyricsMetadata
@@ -13,9 +14,15 @@ class GeniusProvider(BaseLyricsProvider):
13
14
  self.api_token = config.genius_api_token
14
15
  self.client = None
15
16
  if self.api_token:
16
- self.client = lyricsgenius.Genius(self.api_token)
17
- self.client.verbose = False
18
- self.client.remove_section_headers = True
17
+ self.client = lyricsgenius.Genius(
18
+ self.api_token,
19
+ verbose=(logger.getEffectiveLevel() == logging.DEBUG if logger else False),
20
+ remove_section_headers=True, # Remove [Chorus], [Verse], etc.
21
+ skip_non_songs=True, # Skip track listings and other non-song results
22
+ timeout=10, # Reasonable timeout for requests
23
+ retries=3, # Number of retries for failed requests
24
+ sleep_time=1, # Small delay between requests to be nice to the API
25
+ )
19
26
 
20
27
  def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
21
28
  """Fetch raw song data from Genius API."""
@@ -35,6 +42,9 @@ class GeniusProvider(BaseLyricsProvider):
35
42
 
36
43
  def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
37
44
  """Convert Genius's raw API response to standardized format."""
45
+ # Clean the lyrics before processing
46
+ lyrics = self._clean_lyrics(raw_data.get("lyrics", ""))
47
+
38
48
  # Extract release date components if available
39
49
  release_date = None
40
50
  if release_components := raw_data.get("release_date_components"):
@@ -68,6 +78,23 @@ class GeniusProvider(BaseLyricsProvider):
68
78
  )
69
79
 
70
80
  # Create result object
71
- return LyricsData(
72
- source="genius", lyrics=raw_data.get("lyrics", ""), segments=[], metadata=metadata
73
- ) # Genius doesn't provide timestamp data
81
+ return LyricsData(source="genius", lyrics=lyrics, segments=[], metadata=metadata)
82
+
83
+ def _clean_lyrics(self, lyrics: str) -> str:
84
+ """Clean and process lyrics from Genius to remove unwanted content."""
85
+
86
+ lyrics = lyrics.replace("\\n", "\n")
87
+ lyrics = re.sub(r"You might also like", "", lyrics)
88
+ lyrics = re.sub(
89
+ r".*?Lyrics([A-Z])", r"\1", lyrics
90
+ ) # Remove the song name and word "Lyrics" if this has a non-newline char at the start
91
+ lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics) # Remove this example: 27 ContributorsSex Bomb Lyrics
92
+ lyrics = re.sub(
93
+ r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
94
+ ) # Remove this example: See Tom Jones LiveGet tickets as low as $71
95
+ lyrics = re.sub(r"[0-9]+Embed$", "", lyrics) # Remove the word "Embed" at end of line with preceding numbers if found
96
+ lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
97
+ lyrics = re.sub(r"^Embed$", r"", lyrics) # Remove the word "Embed" if it has been tacked onto a word at the end of a line
98
+ lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics) # Remove lines containing square brackets
99
+ # add any additional cleaning rules here
100
+ return lyrics
@@ -95,7 +95,7 @@ class OutputGenerator:
95
95
 
96
96
  def generate_outputs(
97
97
  self,
98
- transcription_corrected: CorrectionResult,
98
+ transcription_corrected: Optional[CorrectionResult],
99
99
  lyrics_results: List[LyricsData],
100
100
  output_prefix: str,
101
101
  audio_filepath: str,
@@ -110,35 +110,37 @@ class OutputGenerator:
110
110
  for lyrics_data in lyrics_results:
111
111
  self.plain_text.write_lyrics(lyrics_data, output_prefix)
112
112
 
113
- # Write original (uncorrected) transcription
114
- outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
115
-
116
- # Resize corrected segments to ensure none are longer than max_line_length
117
- resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
118
- transcription_corrected.resized_segments = resized_segments
119
- outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
120
-
121
- # Write corrected lyrics as plain text
122
- outputs.corrected_txt = self.plain_text.write_corrected_lyrics(resized_segments, output_prefix)
123
-
124
- # Generate LRC using LyricsFileGenerator
125
- outputs.lrc = self.lyrics_file.generate_lrc(resized_segments, output_prefix)
126
-
127
- # Generate CDG file if requested
128
- if self.config.generate_cdg:
129
- outputs.cdg, outputs.mp3, outputs.cdg_zip = self.cdg.generate_cdg(
130
- segments=resized_segments,
131
- audio_file=audio_filepath,
132
- title=title or output_prefix,
133
- artist=artist or "",
134
- cdg_styles=self.config.styles["cdg"],
135
- )
136
-
137
- # Generate video if requested
138
- if self.config.render_video:
139
- # Generate ASS subtitles
140
- outputs.ass = self.subtitle.generate_ass(resized_segments, output_prefix, audio_filepath)
141
- outputs.video = self.video.generate_video(outputs.ass, audio_filepath, output_prefix)
113
+ # Only process transcription-related outputs if we have transcription data
114
+ if transcription_corrected:
115
+ # Write original (uncorrected) transcription
116
+ outputs.original_txt = self.plain_text.write_original_transcription(transcription_corrected, output_prefix)
117
+
118
+ # Resize corrected segments to ensure none are longer than max_line_length
119
+ resized_segments = self.segment_resizer.resize_segments(transcription_corrected.corrected_segments)
120
+ transcription_corrected.resized_segments = resized_segments
121
+ outputs.corrections_json = self.write_corrections_data(transcription_corrected, output_prefix)
122
+
123
+ # Write corrected lyrics as plain text
124
+ outputs.corrected_txt = self.plain_text.write_corrected_lyrics(resized_segments, output_prefix)
125
+
126
+ # Generate LRC using LyricsFileGenerator
127
+ outputs.lrc = self.lyrics_file.generate_lrc(resized_segments, output_prefix)
128
+
129
+ # Generate CDG file if requested
130
+ if self.config.generate_cdg:
131
+ outputs.cdg, outputs.mp3, outputs.cdg_zip = self.cdg.generate_cdg(
132
+ segments=resized_segments,
133
+ audio_file=audio_filepath,
134
+ title=title or output_prefix,
135
+ artist=artist or "",
136
+ cdg_styles=self.config.styles["cdg"],
137
+ )
138
+
139
+ # Generate video if requested
140
+ if self.config.render_video:
141
+ # Generate ASS subtitles
142
+ outputs.ass = self.subtitle.generate_ass(resized_segments, output_prefix, audio_filepath)
143
+ outputs.video = self.video.generate_video(outputs.ass, audio_filepath, output_prefix)
142
144
 
143
145
  return outputs
144
146
 
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "lyrics-transcriber"
3
- version = "0.33.0"
3
+ version = "0.34.0"
4
4
  description = "Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify"
5
5
  authors = ["Andrew Beveridge <andrew@beveridge.uk>"]
6
6
  license = "MIT"