lyrics-transcriber 0.36.1__py3-none-any.whl → 0.37.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. lyrics_transcriber/core/controller.py +22 -2
  2. lyrics_transcriber/correction/corrector.py +8 -8
  3. lyrics_transcriber/correction/handlers/base.py +4 -0
  4. lyrics_transcriber/correction/handlers/extend_anchor.py +9 -0
  5. lyrics_transcriber/correction/handlers/no_space_punct_match.py +21 -10
  6. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +21 -11
  7. lyrics_transcriber/correction/handlers/syllables_match.py +4 -4
  8. lyrics_transcriber/correction/handlers/word_count_match.py +19 -10
  9. lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +182 -0
  10. lyrics_transcriber/frontend/dist/index.html +1 -1
  11. lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +1 -2
  12. lyrics_transcriber/frontend/src/components/DetailsModal.tsx +76 -70
  13. lyrics_transcriber/frontend/src/components/EditModal.tsx +10 -2
  14. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +128 -125
  15. lyrics_transcriber/frontend/src/components/ReferenceView.tsx +1 -3
  16. lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +24 -12
  17. lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +8 -15
  18. lyrics_transcriber/frontend/src/components/WordEditControls.tsx +3 -3
  19. lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +34 -52
  20. lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +39 -31
  21. lyrics_transcriber/frontend/src/components/shared/types.ts +3 -3
  22. lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +146 -0
  23. lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +23 -24
  24. lyrics_transcriber/frontend/src/types.ts +25 -15
  25. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  26. lyrics_transcriber/output/cdg.py +32 -6
  27. lyrics_transcriber/output/video.py +17 -7
  28. lyrics_transcriber/review/server.py +24 -8
  29. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/METADATA +1 -1
  30. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/RECORD +33 -33
  31. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/entry_points.txt +1 -0
  32. lyrics_transcriber/frontend/dist/assets/index-ztlAYPYT.js +0 -181
  33. lyrics_transcriber/frontend/src/components/shared/utils/newlineCalculator.ts +0 -37
  34. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/LICENSE +0 -0
  35. {lyrics_transcriber-0.36.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/WHEEL +0 -0
@@ -91,11 +91,11 @@ class LyricsTranscriber:
91
91
  self.output_config.generate_cdg = False
92
92
  self.output_config.render_video = False
93
93
 
94
- # Basic settings
94
+ # Basic settings with sanitized filenames
95
95
  self.audio_filepath = audio_filepath
96
96
  self.artist = artist
97
97
  self.title = title
98
- self.output_prefix = f"{artist} - {title}" if artist and title else os.path.splitext(os.path.basename(audio_filepath))[0]
98
+ self.output_prefix = self._create_sanitized_output_prefix(artist, title)
99
99
 
100
100
  # Add after creating necessary folders
101
101
  self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
@@ -126,6 +126,26 @@ class LyricsTranscriber:
126
126
  if self.output_config.render_video:
127
127
  self.logger.info(f" Video resolution: {self.output_config.video_resolution}")
128
128
 
129
+ def _sanitize_filename(self, filename: str) -> str:
130
+ """Replace or remove characters that are unsafe for filenames."""
131
+ if not filename:
132
+ return ""
133
+ # Replace problematic characters with underscores
134
+ for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
135
+ filename = filename.replace(char, "_")
136
+ # Remove any trailing spaces
137
+ filename = filename.rstrip(" ")
138
+ return filename
139
+
140
+ def _create_sanitized_output_prefix(self, artist: Optional[str], title: Optional[str]) -> str:
141
+ """Create a sanitized output prefix from artist and title."""
142
+ if artist and title:
143
+ sanitized_artist = self._sanitize_filename(artist)
144
+ sanitized_title = self._sanitize_filename(title)
145
+ return f"{sanitized_artist} - {sanitized_title}"
146
+ else:
147
+ return self._sanitize_filename(os.path.splitext(os.path.basename(self.audio_filepath))[0])
148
+
129
149
  def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
130
150
  """Initialize available transcription services."""
131
151
  transcribers = {}
@@ -33,14 +33,14 @@ class LyricsCorrector:
33
33
 
34
34
  # Default handlers in order of preference
35
35
  self.handlers = handlers or [
36
- WordCountMatchHandler(),
37
- RelaxedWordCountMatchHandler(),
38
- NoSpacePunctuationMatchHandler(),
39
- SyllablesMatchHandler(),
40
- ExtendAnchorHandler(),
41
- # RepeatCorrectionHandler(),
42
- # SoundAlikeHandler(),
43
- # LevenshteinHandler(),
36
+ # WordCountMatchHandler(logger=self.logger),
37
+ # RelaxedWordCountMatchHandler(logger=self.logger),
38
+ # NoSpacePunctuationMatchHandler(logger=self.logger),
39
+ # SyllablesMatchHandler(logger=self.logger),
40
+ ExtendAnchorHandler(logger=self.logger),
41
+ # RepeatCorrectionHandler(logger=self.logger),
42
+ # SoundAlikeHandler(logger=self.logger),
43
+ # LevenshteinHandler(logger=self.logger),
44
44
  ]
45
45
 
46
46
  @property
@@ -1,5 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from typing import List, Optional, Tuple, Dict, Any
3
+ import logging
3
4
 
4
5
  from lyrics_transcriber.types import GapSequence, WordCorrection
5
6
 
@@ -7,6 +8,9 @@ from lyrics_transcriber.types import GapSequence, WordCorrection
7
8
  class GapCorrectionHandler(ABC):
8
9
  """Base class for gap correction handlers."""
9
10
 
11
+ def __init__(self, logger: Optional[logging.Logger] = None):
12
+ self.logger = logger or logging.getLogger(__name__)
13
+
10
14
  @abstractmethod
11
15
  def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
12
16
  """Determine if this handler can process the given gap.
@@ -1,4 +1,5 @@
1
1
  from typing import List, Optional, Tuple, Dict, Any
2
+ import logging
2
3
 
3
4
  from lyrics_transcriber.types import GapSequence, WordCorrection
4
5
  from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -38,13 +39,19 @@ class ExtendAnchorHandler(GapCorrectionHandler):
38
39
  - Leave "youre" and "a" unchanged
39
40
  """
40
41
 
42
+ def __init__(self, logger: Optional[logging.Logger] = None):
43
+ super().__init__(logger)
44
+ self.logger = logger or logging.getLogger(__name__)
45
+
41
46
  def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
42
47
  # Must have reference words
43
48
  if not gap.reference_words:
49
+ self.logger.debug("No reference words available.")
44
50
  return False, {}
45
51
 
46
52
  # Gap must have words
47
53
  if not gap.words:
54
+ self.logger.debug("No words in the gap to process.")
48
55
  return False, {}
49
56
 
50
57
  # At least one word must match between gap and any reference source
@@ -55,6 +62,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
55
62
  for i in range(min(len(gap.words), len(ref_words)))
56
63
  )
57
64
 
65
+ self.logger.debug(f"Can handle gap: {has_match}")
58
66
  return has_match, {}
59
67
 
60
68
  def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -86,6 +94,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
86
94
  reference_positions=reference_positions,
87
95
  )
88
96
  )
97
+ self.logger.debug(f"Validated word '{word}' with confidence {confidence} from sources: {sources}")
89
98
  # No else clause - non-matching words are left unchanged
90
99
 
91
100
  return corrections
@@ -1,4 +1,5 @@
1
1
  from typing import List, Optional, Tuple, Dict, Any
2
+ import logging
2
3
  import re
3
4
 
4
5
  from lyrics_transcriber.types import GapSequence, WordCorrection
@@ -9,6 +10,10 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
9
10
  class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
10
11
  """Handles gaps where reference text matches when spaces and punctuation are removed."""
11
12
 
13
+ def __init__(self, logger: Optional[logging.Logger] = None):
14
+ super().__init__(logger)
15
+ self.logger = logger or logging.getLogger(__name__)
16
+
12
17
  def _remove_spaces_and_punct(self, words: List[str]) -> str:
13
18
  """Join words and remove all whitespace and punctuation."""
14
19
  text = "".join(words).lower()
@@ -18,6 +23,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
18
23
  def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
19
24
  # Must have reference words
20
25
  if not gap.reference_words:
26
+ self.logger.debug("No reference words available.")
21
27
  return False, {}
22
28
 
23
29
  # Get the gap text without spaces and punctuation
@@ -27,8 +33,10 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
27
33
  for words in gap.reference_words.values():
28
34
  ref_text = self._remove_spaces_and_punct(words)
29
35
  if gap_text == ref_text:
36
+ self.logger.debug("Found a matching reference source with spaces and punctuation removed.")
30
37
  return True, {}
31
38
 
39
+ self.logger.debug("No matching reference source found with spaces and punctuation removed.")
32
40
  return False, {}
33
41
 
34
42
  def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -44,6 +52,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
44
52
  matching_source = source
45
53
  reference_words = words
46
54
  reference_words_original = gap.reference_words_original[source]
55
+ self.logger.debug(f"Using source '{source}' for corrections.")
47
56
  break
48
57
 
49
58
  # Calculate reference positions for the matching source
@@ -64,6 +73,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
64
73
  reference_positions=reference_positions,
65
74
  )
66
75
  )
76
+ self.logger.debug(f"Combined words into '{reference_words_original[0]}'.")
67
77
 
68
78
  elif len(gap.words) < len(reference_words):
69
79
  # Single transcribed word -> multiple reference words
@@ -78,21 +88,22 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
78
88
  reference_positions=reference_positions,
79
89
  )
80
90
  )
91
+ self.logger.debug(f"Split word '{gap.words[0]}' into {reference_words_original}.")
81
92
 
82
93
  else:
83
94
  # One-to-one replacement
84
95
  for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
85
96
  if orig_word.lower() != ref_word.lower():
86
- corrections.append(
87
- WordOperations.create_word_replacement_correction(
88
- original_word=orig_word,
89
- corrected_word=ref_word_original,
90
- original_position=gap.transcription_position + i,
91
- source=matching_source,
92
- confidence=1.0,
93
- reason=f"NoSpacePunctuationMatchHandler: Source '{matching_source}' matched when spaces and punctuation removed",
94
- reference_positions=reference_positions,
95
- )
97
+ correction = WordOperations.create_word_replacement_correction(
98
+ original_word=orig_word,
99
+ corrected_word=ref_word_original,
100
+ original_position=gap.transcription_position + i,
101
+ source=matching_source,
102
+ confidence=1.0,
103
+ reason=f"NoSpacePunctuationMatchHandler: Source '{matching_source}' matched when spaces and punctuation removed",
104
+ reference_positions=reference_positions,
96
105
  )
106
+ corrections.append(correction)
107
+ self.logger.debug(f"Correction made: {correction}")
97
108
 
98
109
  return corrections
@@ -1,4 +1,5 @@
1
1
  from typing import List, Tuple, Dict, Any, Optional
2
+ import logging
2
3
 
3
4
  from lyrics_transcriber.types import GapSequence, WordCorrection
4
5
  from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -8,16 +9,23 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
8
9
  class RelaxedWordCountMatchHandler(GapCorrectionHandler):
9
10
  """Handles gaps where at least one reference source has matching word count."""
10
11
 
12
+ def __init__(self, logger: Optional[logging.Logger] = None):
13
+ super().__init__(logger)
14
+ self.logger = logger
15
+
11
16
  def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
12
17
  # Must have reference words
13
18
  if not gap.reference_words:
19
+ self.logger.debug("No reference words available.")
14
20
  return False, {}
15
21
 
16
22
  # Check if any source has matching word count
17
- for words in gap.reference_words.values():
23
+ for source, words in gap.reference_words.items():
18
24
  if len(words) == gap.length:
25
+ self.logger.debug(f"Source '{source}' has matching word count.")
19
26
  return True, {}
20
27
 
28
+ self.logger.debug("No source with matching word count found.")
21
29
  return False, {}
22
30
 
23
31
  def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -32,24 +40,26 @@ class RelaxedWordCountMatchHandler(GapCorrectionHandler):
32
40
  matching_source = source
33
41
  reference_words = words
34
42
  reference_words_original = gap.reference_words_original[source]
43
+ self.logger.debug(f"Using source '{source}' for corrections.")
35
44
  break
36
45
 
37
46
  # Use the centralized method to calculate reference positions for the matching source
38
47
  reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
48
+ self.logger.debug(f"Calculated reference positions: {reference_positions}")
39
49
 
40
50
  # Since we found a source with matching word count, we can correct using that source
41
51
  for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
42
52
  if orig_word.lower() != ref_word.lower():
43
- corrections.append(
44
- WordOperations.create_word_replacement_correction(
45
- original_word=orig_word,
46
- corrected_word=ref_word_original,
47
- original_position=gap.transcription_position + i,
48
- source=matching_source,
49
- confidence=1.0,
50
- reason=f"RelaxedWordCountMatchHandler: Source '{matching_source}' had matching word count",
51
- reference_positions=reference_positions,
52
- )
53
+ correction = WordOperations.create_word_replacement_correction(
54
+ original_word=orig_word,
55
+ corrected_word=ref_word_original,
56
+ original_position=gap.transcription_position + i,
57
+ source=matching_source,
58
+ confidence=1.0,
59
+ reason=f"RelaxedWordCountMatchHandler: Source '{matching_source}' had matching word count",
60
+ reference_positions=reference_positions,
53
61
  )
62
+ corrections.append(correction)
63
+ self.logger.debug(f"Correction made: {correction}")
54
64
 
55
65
  return corrections
@@ -1,4 +1,4 @@
1
- from typing import List, Tuple, Dict, Any
1
+ from typing import List, Tuple, Dict, Any, Optional
2
2
  import spacy
3
3
  import logging
4
4
  import pyphen
@@ -15,9 +15,9 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
15
15
  class SyllablesMatchHandler(GapCorrectionHandler):
16
16
  """Handles gaps where number of syllables in reference text matches number of syllables in transcription."""
17
17
 
18
- def __init__(self):
19
- # Initialize logger first
20
- self.logger = logging.getLogger(__name__)
18
+ def __init__(self, logger: Optional[logging.Logger] = None):
19
+ super().__init__(logger)
20
+ self.logger = logger or logging.getLogger(__name__)
21
21
 
22
22
  # Marking SpacySyllables as used to prevent unused import warning
23
23
  _ = SpacySyllables
@@ -1,4 +1,5 @@
1
1
  from typing import List, Tuple, Dict, Any, Optional
2
+ import logging
2
3
 
3
4
  from lyrics_transcriber.types import GapSequence, WordCorrection
4
5
  from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
@@ -8,21 +9,29 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
8
9
  class WordCountMatchHandler(GapCorrectionHandler):
9
10
  """Handles gaps where reference sources agree and have matching word counts."""
10
11
 
12
+ def __init__(self, logger: Optional[logging.Logger] = None):
13
+ super().__init__(logger)
14
+ self.logger = logger or logging.getLogger(__name__)
15
+
11
16
  def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
12
17
  # Must have reference words
13
18
  if not gap.reference_words:
19
+ self.logger.debug("No reference words available.")
14
20
  return False, {}
15
21
 
16
22
  ref_words_lists = list(gap.reference_words.values())
17
23
 
18
24
  # All sources must have same number of words as gap
19
25
  if not all(len(words) == gap.length for words in ref_words_lists):
26
+ self.logger.debug("Not all sources have the same number of words as the gap.")
20
27
  return False, {}
21
28
 
22
29
  # If we have multiple sources, they must all agree
23
30
  if len(ref_words_lists) > 1 and not all(words == ref_words_lists[0] for words in ref_words_lists[1:]):
31
+ self.logger.debug("Not all sources agree on the words.")
24
32
  return False, {}
25
33
 
34
+ self.logger.debug("All sources agree and have matching word counts.")
26
35
  return True, {}
27
36
 
28
37
  def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
@@ -39,16 +48,16 @@ class WordCountMatchHandler(GapCorrectionHandler):
39
48
  # Since we know all reference sources agree, we can correct all words in the gap
40
49
  for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
41
50
  if orig_word.lower() != ref_word.lower():
42
- corrections.append(
43
- WordOperations.create_word_replacement_correction(
44
- original_word=orig_word,
45
- corrected_word=ref_word_original,
46
- original_position=gap.transcription_position + i,
47
- source=sources,
48
- confidence=1.0,
49
- reason="WordCountMatchHandler: Reference sources had same word count as gap",
50
- reference_positions=reference_positions,
51
- )
51
+ correction = WordOperations.create_word_replacement_correction(
52
+ original_word=orig_word,
53
+ corrected_word=ref_word_original,
54
+ original_position=gap.transcription_position + i,
55
+ source=sources,
56
+ confidence=1.0,
57
+ reason="WordCountMatchHandler: Reference sources had same word count as gap",
58
+ reference_positions=reference_positions,
52
59
  )
60
+ corrections.append(correction)
61
+ self.logger.debug(f"Correction made: {correction}")
53
62
 
54
63
  return corrections