lyrics-transcriber 0.35.1__py3-none-any.whl → 0.37.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/cli/cli_main.py +2 -0
- lyrics_transcriber/core/config.py +1 -1
- lyrics_transcriber/core/controller.py +35 -2
- lyrics_transcriber/correction/corrector.py +8 -8
- lyrics_transcriber/correction/handlers/base.py +4 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +9 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +21 -10
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +21 -11
- lyrics_transcriber/correction/handlers/syllables_match.py +4 -4
- lyrics_transcriber/correction/handlers/word_count_match.py +19 -10
- lyrics_transcriber/frontend/dist/assets/index-BNNbsbVN.js +182 -0
- lyrics_transcriber/frontend/dist/index.html +1 -1
- lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +18 -7
- lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +28 -27
- lyrics_transcriber/frontend/src/components/DetailsModal.tsx +108 -12
- lyrics_transcriber/frontend/src/components/EditModal.tsx +10 -2
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +145 -141
- lyrics_transcriber/frontend/src/components/ReferenceView.tsx +7 -2
- lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +24 -12
- lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +8 -15
- lyrics_transcriber/frontend/src/components/WordEditControls.tsx +3 -3
- lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +36 -51
- lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +17 -19
- lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +41 -33
- lyrics_transcriber/frontend/src/components/shared/types.ts +6 -6
- lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx +146 -0
- lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +24 -25
- lyrics_transcriber/frontend/src/types.ts +24 -23
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/lyrics/base_lyrics_provider.py +1 -0
- lyrics_transcriber/lyrics/file_provider.py +89 -0
- lyrics_transcriber/output/cdg.py +32 -6
- lyrics_transcriber/output/video.py +17 -7
- lyrics_transcriber/review/server.py +24 -8
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/METADATA +1 -1
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/RECORD +39 -38
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/frontend/dist/assets/index-CQCER5Fo.js +0 -181
- lyrics_transcriber/frontend/src/components/shared/utils/newlineCalculator.ts +0 -37
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/LICENSE +0 -0
- {lyrics_transcriber-0.35.1.dist-info → lyrics_transcriber-0.37.0.dist-info}/WHEEL +0 -0
@@ -39,6 +39,7 @@ def create_arg_parser() -> argparse.ArgumentParser:
|
|
39
39
|
song_group = parser.add_argument_group("Song Identification")
|
40
40
|
song_group.add_argument("--artist", help="Song artist for lyrics lookup and auto-correction")
|
41
41
|
song_group.add_argument("--title", help="Song title for lyrics lookup and auto-correction")
|
42
|
+
song_group.add_argument("--lyrics_file", help="Path to file containing lyrics (txt, docx, or rtf format)")
|
42
43
|
|
43
44
|
# API Credentials
|
44
45
|
api_group = parser.add_argument_group("API Credentials")
|
@@ -134,6 +135,7 @@ def create_configs(args: argparse.Namespace, env_config: Dict[str, str]) -> tupl
|
|
134
135
|
lyrics_config = LyricsConfig(
|
135
136
|
genius_api_token=args.genius_api_token or env_config.get("genius_api_token"),
|
136
137
|
spotify_cookie=args.spotify_cookie or env_config.get("spotify_cookie"),
|
138
|
+
lyrics_file=args.lyrics_file,
|
137
139
|
)
|
138
140
|
|
139
141
|
output_config = OutputConfig(
|
@@ -13,6 +13,7 @@ from lyrics_transcriber.lyrics.spotify import SpotifyProvider
|
|
13
13
|
from lyrics_transcriber.output.generator import OutputGenerator
|
14
14
|
from lyrics_transcriber.correction.corrector import LyricsCorrector
|
15
15
|
from lyrics_transcriber.core.config import TranscriberConfig, LyricsConfig, OutputConfig
|
16
|
+
from lyrics_transcriber.lyrics.file_provider import FileProvider
|
16
17
|
|
17
18
|
|
18
19
|
@dataclass
|
@@ -90,11 +91,11 @@ class LyricsTranscriber:
|
|
90
91
|
self.output_config.generate_cdg = False
|
91
92
|
self.output_config.render_video = False
|
92
93
|
|
93
|
-
# Basic settings
|
94
|
+
# Basic settings with sanitized filenames
|
94
95
|
self.audio_filepath = audio_filepath
|
95
96
|
self.artist = artist
|
96
97
|
self.title = title
|
97
|
-
self.output_prefix =
|
98
|
+
self.output_prefix = self._create_sanitized_output_prefix(artist, title)
|
98
99
|
|
99
100
|
# Add after creating necessary folders
|
100
101
|
self.logger.debug(f"Using cache directory: {self.output_config.cache_dir}")
|
@@ -125,6 +126,26 @@ class LyricsTranscriber:
|
|
125
126
|
if self.output_config.render_video:
|
126
127
|
self.logger.info(f" Video resolution: {self.output_config.video_resolution}")
|
127
128
|
|
129
|
+
def _sanitize_filename(self, filename: str) -> str:
|
130
|
+
"""Replace or remove characters that are unsafe for filenames."""
|
131
|
+
if not filename:
|
132
|
+
return ""
|
133
|
+
# Replace problematic characters with underscores
|
134
|
+
for char in ["\\", "/", ":", "*", "?", '"', "<", ">", "|"]:
|
135
|
+
filename = filename.replace(char, "_")
|
136
|
+
# Remove any trailing spaces
|
137
|
+
filename = filename.rstrip(" ")
|
138
|
+
return filename
|
139
|
+
|
140
|
+
def _create_sanitized_output_prefix(self, artist: Optional[str], title: Optional[str]) -> str:
|
141
|
+
"""Create a sanitized output prefix from artist and title."""
|
142
|
+
if artist and title:
|
143
|
+
sanitized_artist = self._sanitize_filename(artist)
|
144
|
+
sanitized_title = self._sanitize_filename(title)
|
145
|
+
return f"{sanitized_artist} - {sanitized_title}"
|
146
|
+
else:
|
147
|
+
return self._sanitize_filename(os.path.splitext(os.path.basename(self.audio_filepath))[0])
|
148
|
+
|
128
149
|
def _initialize_transcribers(self) -> Dict[str, BaseTranscriber]:
|
129
150
|
"""Initialize available transcription services."""
|
130
151
|
transcribers = {}
|
@@ -171,10 +192,16 @@ class LyricsTranscriber:
|
|
171
192
|
provider_config = LyricsProviderConfig(
|
172
193
|
genius_api_token=self.lyrics_config.genius_api_token,
|
173
194
|
spotify_cookie=self.lyrics_config.spotify_cookie,
|
195
|
+
lyrics_file=self.lyrics_config.lyrics_file,
|
174
196
|
cache_dir=self.output_config.cache_dir,
|
175
197
|
audio_filepath=self.audio_filepath,
|
176
198
|
)
|
177
199
|
|
200
|
+
if provider_config.lyrics_file and os.path.exists(provider_config.lyrics_file):
|
201
|
+
self.logger.debug(f"Initializing File lyrics provider with file: {provider_config.lyrics_file}")
|
202
|
+
providers["file"] = FileProvider(config=provider_config, logger=self.logger)
|
203
|
+
return providers
|
204
|
+
|
178
205
|
if provider_config.genius_api_token:
|
179
206
|
self.logger.debug("Initializing Genius lyrics provider")
|
180
207
|
providers["genius"] = GeniusProvider(config=provider_config, logger=self.logger)
|
@@ -196,13 +223,19 @@ class LyricsTranscriber:
|
|
196
223
|
def process(self) -> LyricsControllerResult:
|
197
224
|
"""Main processing method that orchestrates the entire workflow."""
|
198
225
|
|
226
|
+
self.logger.info(f"LyricsTranscriber controller beginning processing for {self.artist} - {self.title}")
|
227
|
+
|
199
228
|
# Step 1: Fetch lyrics if enabled and artist/title are provided
|
200
229
|
if self.output_config.fetch_lyrics and self.artist and self.title:
|
201
230
|
self.fetch_lyrics()
|
231
|
+
else:
|
232
|
+
self.logger.info("Skipping lyrics fetching - no artist/title provided or fetching disabled")
|
202
233
|
|
203
234
|
# Step 2: Run transcription if enabled
|
204
235
|
if self.output_config.run_transcription:
|
205
236
|
self.transcribe()
|
237
|
+
else:
|
238
|
+
self.logger.info("Skipping transcription - transcription disabled")
|
206
239
|
|
207
240
|
# Step 3: Process and correct lyrics if enabled AND we have transcription results
|
208
241
|
if self.output_config.run_correction and self.results.transcription_results:
|
@@ -33,14 +33,14 @@ class LyricsCorrector:
|
|
33
33
|
|
34
34
|
# Default handlers in order of preference
|
35
35
|
self.handlers = handlers or [
|
36
|
-
WordCountMatchHandler(),
|
37
|
-
RelaxedWordCountMatchHandler(),
|
38
|
-
NoSpacePunctuationMatchHandler(),
|
39
|
-
SyllablesMatchHandler(),
|
40
|
-
ExtendAnchorHandler(),
|
41
|
-
# RepeatCorrectionHandler(),
|
42
|
-
# SoundAlikeHandler(),
|
43
|
-
# LevenshteinHandler(),
|
36
|
+
# WordCountMatchHandler(logger=self.logger),
|
37
|
+
# RelaxedWordCountMatchHandler(logger=self.logger),
|
38
|
+
# NoSpacePunctuationMatchHandler(logger=self.logger),
|
39
|
+
# SyllablesMatchHandler(logger=self.logger),
|
40
|
+
ExtendAnchorHandler(logger=self.logger),
|
41
|
+
# RepeatCorrectionHandler(logger=self.logger),
|
42
|
+
# SoundAlikeHandler(logger=self.logger),
|
43
|
+
# LevenshteinHandler(logger=self.logger),
|
44
44
|
]
|
45
45
|
|
46
46
|
@property
|
@@ -1,5 +1,6 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from typing import List, Optional, Tuple, Dict, Any
|
3
|
+
import logging
|
3
4
|
|
4
5
|
from lyrics_transcriber.types import GapSequence, WordCorrection
|
5
6
|
|
@@ -7,6 +8,9 @@ from lyrics_transcriber.types import GapSequence, WordCorrection
|
|
7
8
|
class GapCorrectionHandler(ABC):
|
8
9
|
"""Base class for gap correction handlers."""
|
9
10
|
|
11
|
+
def __init__(self, logger: Optional[logging.Logger] = None):
|
12
|
+
self.logger = logger or logging.getLogger(__name__)
|
13
|
+
|
10
14
|
@abstractmethod
|
11
15
|
def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
|
12
16
|
"""Determine if this handler can process the given gap.
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from typing import List, Optional, Tuple, Dict, Any
|
2
|
+
import logging
|
2
3
|
|
3
4
|
from lyrics_transcriber.types import GapSequence, WordCorrection
|
4
5
|
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
@@ -38,13 +39,19 @@ class ExtendAnchorHandler(GapCorrectionHandler):
|
|
38
39
|
- Leave "youre" and "a" unchanged
|
39
40
|
"""
|
40
41
|
|
42
|
+
def __init__(self, logger: Optional[logging.Logger] = None):
|
43
|
+
super().__init__(logger)
|
44
|
+
self.logger = logger or logging.getLogger(__name__)
|
45
|
+
|
41
46
|
def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
|
42
47
|
# Must have reference words
|
43
48
|
if not gap.reference_words:
|
49
|
+
self.logger.debug("No reference words available.")
|
44
50
|
return False, {}
|
45
51
|
|
46
52
|
# Gap must have words
|
47
53
|
if not gap.words:
|
54
|
+
self.logger.debug("No words in the gap to process.")
|
48
55
|
return False, {}
|
49
56
|
|
50
57
|
# At least one word must match between gap and any reference source
|
@@ -55,6 +62,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
|
|
55
62
|
for i in range(min(len(gap.words), len(ref_words)))
|
56
63
|
)
|
57
64
|
|
65
|
+
self.logger.debug(f"Can handle gap: {has_match}")
|
58
66
|
return has_match, {}
|
59
67
|
|
60
68
|
def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
|
@@ -86,6 +94,7 @@ class ExtendAnchorHandler(GapCorrectionHandler):
|
|
86
94
|
reference_positions=reference_positions,
|
87
95
|
)
|
88
96
|
)
|
97
|
+
self.logger.debug(f"Validated word '{word}' with confidence {confidence} from sources: {sources}")
|
89
98
|
# No else clause - non-matching words are left unchanged
|
90
99
|
|
91
100
|
return corrections
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from typing import List, Optional, Tuple, Dict, Any
|
2
|
+
import logging
|
2
3
|
import re
|
3
4
|
|
4
5
|
from lyrics_transcriber.types import GapSequence, WordCorrection
|
@@ -9,6 +10,10 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
|
|
9
10
|
class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
|
10
11
|
"""Handles gaps where reference text matches when spaces and punctuation are removed."""
|
11
12
|
|
13
|
+
def __init__(self, logger: Optional[logging.Logger] = None):
|
14
|
+
super().__init__(logger)
|
15
|
+
self.logger = logger or logging.getLogger(__name__)
|
16
|
+
|
12
17
|
def _remove_spaces_and_punct(self, words: List[str]) -> str:
|
13
18
|
"""Join words and remove all whitespace and punctuation."""
|
14
19
|
text = "".join(words).lower()
|
@@ -18,6 +23,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
|
|
18
23
|
def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
|
19
24
|
# Must have reference words
|
20
25
|
if not gap.reference_words:
|
26
|
+
self.logger.debug("No reference words available.")
|
21
27
|
return False, {}
|
22
28
|
|
23
29
|
# Get the gap text without spaces and punctuation
|
@@ -27,8 +33,10 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
|
|
27
33
|
for words in gap.reference_words.values():
|
28
34
|
ref_text = self._remove_spaces_and_punct(words)
|
29
35
|
if gap_text == ref_text:
|
36
|
+
self.logger.debug("Found a matching reference source with spaces and punctuation removed.")
|
30
37
|
return True, {}
|
31
38
|
|
39
|
+
self.logger.debug("No matching reference source found with spaces and punctuation removed.")
|
32
40
|
return False, {}
|
33
41
|
|
34
42
|
def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
|
@@ -44,6 +52,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
|
|
44
52
|
matching_source = source
|
45
53
|
reference_words = words
|
46
54
|
reference_words_original = gap.reference_words_original[source]
|
55
|
+
self.logger.debug(f"Using source '{source}' for corrections.")
|
47
56
|
break
|
48
57
|
|
49
58
|
# Calculate reference positions for the matching source
|
@@ -64,6 +73,7 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
|
|
64
73
|
reference_positions=reference_positions,
|
65
74
|
)
|
66
75
|
)
|
76
|
+
self.logger.debug(f"Combined words into '{reference_words_original[0]}'.")
|
67
77
|
|
68
78
|
elif len(gap.words) < len(reference_words):
|
69
79
|
# Single transcribed word -> multiple reference words
|
@@ -78,21 +88,22 @@ class NoSpacePunctuationMatchHandler(GapCorrectionHandler):
|
|
78
88
|
reference_positions=reference_positions,
|
79
89
|
)
|
80
90
|
)
|
91
|
+
self.logger.debug(f"Split word '{gap.words[0]}' into {reference_words_original}.")
|
81
92
|
|
82
93
|
else:
|
83
94
|
# One-to-one replacement
|
84
95
|
for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
|
85
96
|
if orig_word.lower() != ref_word.lower():
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
reference_positions=reference_positions,
|
95
|
-
)
|
97
|
+
correction = WordOperations.create_word_replacement_correction(
|
98
|
+
original_word=orig_word,
|
99
|
+
corrected_word=ref_word_original,
|
100
|
+
original_position=gap.transcription_position + i,
|
101
|
+
source=matching_source,
|
102
|
+
confidence=1.0,
|
103
|
+
reason=f"NoSpacePunctuationMatchHandler: Source '{matching_source}' matched when spaces and punctuation removed",
|
104
|
+
reference_positions=reference_positions,
|
96
105
|
)
|
106
|
+
corrections.append(correction)
|
107
|
+
self.logger.debug(f"Correction made: {correction}")
|
97
108
|
|
98
109
|
return corrections
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from typing import List, Tuple, Dict, Any, Optional
|
2
|
+
import logging
|
2
3
|
|
3
4
|
from lyrics_transcriber.types import GapSequence, WordCorrection
|
4
5
|
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
@@ -8,16 +9,23 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
|
|
8
9
|
class RelaxedWordCountMatchHandler(GapCorrectionHandler):
|
9
10
|
"""Handles gaps where at least one reference source has matching word count."""
|
10
11
|
|
12
|
+
def __init__(self, logger: Optional[logging.Logger] = None):
|
13
|
+
super().__init__(logger)
|
14
|
+
self.logger = logger
|
15
|
+
|
11
16
|
def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
|
12
17
|
# Must have reference words
|
13
18
|
if not gap.reference_words:
|
19
|
+
self.logger.debug("No reference words available.")
|
14
20
|
return False, {}
|
15
21
|
|
16
22
|
# Check if any source has matching word count
|
17
|
-
for words in gap.reference_words.
|
23
|
+
for source, words in gap.reference_words.items():
|
18
24
|
if len(words) == gap.length:
|
25
|
+
self.logger.debug(f"Source '{source}' has matching word count.")
|
19
26
|
return True, {}
|
20
27
|
|
28
|
+
self.logger.debug("No source with matching word count found.")
|
21
29
|
return False, {}
|
22
30
|
|
23
31
|
def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
|
@@ -32,24 +40,26 @@ class RelaxedWordCountMatchHandler(GapCorrectionHandler):
|
|
32
40
|
matching_source = source
|
33
41
|
reference_words = words
|
34
42
|
reference_words_original = gap.reference_words_original[source]
|
43
|
+
self.logger.debug(f"Using source '{source}' for corrections.")
|
35
44
|
break
|
36
45
|
|
37
46
|
# Use the centralized method to calculate reference positions for the matching source
|
38
47
|
reference_positions = WordOperations.calculate_reference_positions(gap, [matching_source])
|
48
|
+
self.logger.debug(f"Calculated reference positions: {reference_positions}")
|
39
49
|
|
40
50
|
# Since we found a source with matching word count, we can correct using that source
|
41
51
|
for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
|
42
52
|
if orig_word.lower() != ref_word.lower():
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
reference_positions=reference_positions,
|
52
|
-
)
|
53
|
+
correction = WordOperations.create_word_replacement_correction(
|
54
|
+
original_word=orig_word,
|
55
|
+
corrected_word=ref_word_original,
|
56
|
+
original_position=gap.transcription_position + i,
|
57
|
+
source=matching_source,
|
58
|
+
confidence=1.0,
|
59
|
+
reason=f"RelaxedWordCountMatchHandler: Source '{matching_source}' had matching word count",
|
60
|
+
reference_positions=reference_positions,
|
53
61
|
)
|
62
|
+
corrections.append(correction)
|
63
|
+
self.logger.debug(f"Correction made: {correction}")
|
54
64
|
|
55
65
|
return corrections
|
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List, Tuple, Dict, Any
|
1
|
+
from typing import List, Tuple, Dict, Any, Optional
|
2
2
|
import spacy
|
3
3
|
import logging
|
4
4
|
import pyphen
|
@@ -15,9 +15,9 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
|
|
15
15
|
class SyllablesMatchHandler(GapCorrectionHandler):
|
16
16
|
"""Handles gaps where number of syllables in reference text matches number of syllables in transcription."""
|
17
17
|
|
18
|
-
def __init__(self):
|
19
|
-
|
20
|
-
self.logger = logging.getLogger(__name__)
|
18
|
+
def __init__(self, logger: Optional[logging.Logger] = None):
|
19
|
+
super().__init__(logger)
|
20
|
+
self.logger = logger or logging.getLogger(__name__)
|
21
21
|
|
22
22
|
# Marking SpacySyllables as used to prevent unused import warning
|
23
23
|
_ = SpacySyllables
|
@@ -1,4 +1,5 @@
|
|
1
1
|
from typing import List, Tuple, Dict, Any, Optional
|
2
|
+
import logging
|
2
3
|
|
3
4
|
from lyrics_transcriber.types import GapSequence, WordCorrection
|
4
5
|
from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
|
@@ -8,21 +9,29 @@ from lyrics_transcriber.correction.handlers.word_operations import WordOperation
|
|
8
9
|
class WordCountMatchHandler(GapCorrectionHandler):
|
9
10
|
"""Handles gaps where reference sources agree and have matching word counts."""
|
10
11
|
|
12
|
+
def __init__(self, logger: Optional[logging.Logger] = None):
|
13
|
+
super().__init__(logger)
|
14
|
+
self.logger = logger or logging.getLogger(__name__)
|
15
|
+
|
11
16
|
def can_handle(self, gap: GapSequence) -> Tuple[bool, Dict[str, Any]]:
|
12
17
|
# Must have reference words
|
13
18
|
if not gap.reference_words:
|
19
|
+
self.logger.debug("No reference words available.")
|
14
20
|
return False, {}
|
15
21
|
|
16
22
|
ref_words_lists = list(gap.reference_words.values())
|
17
23
|
|
18
24
|
# All sources must have same number of words as gap
|
19
25
|
if not all(len(words) == gap.length for words in ref_words_lists):
|
26
|
+
self.logger.debug("Not all sources have the same number of words as the gap.")
|
20
27
|
return False, {}
|
21
28
|
|
22
29
|
# If we have multiple sources, they must all agree
|
23
30
|
if len(ref_words_lists) > 1 and not all(words == ref_words_lists[0] for words in ref_words_lists[1:]):
|
31
|
+
self.logger.debug("Not all sources agree on the words.")
|
24
32
|
return False, {}
|
25
33
|
|
34
|
+
self.logger.debug("All sources agree and have matching word counts.")
|
26
35
|
return True, {}
|
27
36
|
|
28
37
|
def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
|
@@ -39,16 +48,16 @@ class WordCountMatchHandler(GapCorrectionHandler):
|
|
39
48
|
# Since we know all reference sources agree, we can correct all words in the gap
|
40
49
|
for i, (orig_word, ref_word, ref_word_original) in enumerate(zip(gap.words, reference_words, reference_words_original)):
|
41
50
|
if orig_word.lower() != ref_word.lower():
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
reference_positions=reference_positions,
|
51
|
-
)
|
51
|
+
correction = WordOperations.create_word_replacement_correction(
|
52
|
+
original_word=orig_word,
|
53
|
+
corrected_word=ref_word_original,
|
54
|
+
original_position=gap.transcription_position + i,
|
55
|
+
source=sources,
|
56
|
+
confidence=1.0,
|
57
|
+
reason="WordCountMatchHandler: Reference sources had same word count as gap",
|
58
|
+
reference_positions=reference_positions,
|
52
59
|
)
|
60
|
+
corrections.append(correction)
|
61
|
+
self.logger.debug(f"Correction made: {correction}")
|
53
62
|
|
54
63
|
return corrections
|