lyrics-transcriber 0.30.1__py3-none-any.whl → 0.32.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/__init__.py +2 -1
- lyrics_transcriber/cli/cli_main.py +33 -12
- lyrics_transcriber/core/config.py +35 -0
- lyrics_transcriber/core/controller.py +85 -121
- lyrics_transcriber/correction/anchor_sequence.py +471 -0
- lyrics_transcriber/correction/corrector.py +237 -33
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +30 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
- lyrics_transcriber/correction/handlers/repeat.py +71 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
- lyrics_transcriber/correction/handlers/word_operations.py +135 -0
- lyrics_transcriber/correction/phrase_analyzer.py +426 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +5 -81
- lyrics_transcriber/lyrics/genius.py +5 -2
- lyrics_transcriber/lyrics/spotify.py +3 -3
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +37 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +219 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +503 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +101 -193
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +91 -0
- lyrics_transcriber/output/segment_resizer.py +416 -0
- lyrics_transcriber/output/subtitles.py +328 -302
- lyrics_transcriber/output/video.py +219 -0
- lyrics_transcriber/review/__init__.py +1 -0
- lyrics_transcriber/review/server.py +138 -0
- lyrics_transcriber/transcribers/audioshake.py +3 -2
- lyrics_transcriber/transcribers/base_transcriber.py +5 -42
- lyrics_transcriber/transcribers/whisper.py +3 -4
- lyrics_transcriber/types.py +454 -0
- {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/METADATA +14 -3
- lyrics_transcriber-0.32.2.dist-info/RECORD +86 -0
- {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/WHEEL +1 -1
- {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/entry_points.txt +1 -0
- lyrics_transcriber/correction/base_strategy.py +0 -29
- lyrics_transcriber/correction/strategy_diff.py +0 -263
- lyrics_transcriber-0.30.1.dist-info/RECORD +0 -25
- {lyrics_transcriber-0.30.1.dist-info → lyrics_transcriber-0.32.2.dist-info}/LICENSE +0 -0
@@ -0,0 +1,219 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
import json
|
4
|
+
import subprocess
|
5
|
+
from typing import List, Optional, Tuple
|
6
|
+
|
7
|
+
|
8
|
+
class VideoGenerator:
|
9
|
+
"""Handles generation of video files with lyrics overlay."""
|
10
|
+
|
11
|
+
def __init__(
|
12
|
+
self,
|
13
|
+
output_dir: str,
|
14
|
+
cache_dir: str,
|
15
|
+
video_resolution: Tuple[int, int],
|
16
|
+
styles: dict,
|
17
|
+
logger: Optional[logging.Logger] = None,
|
18
|
+
):
|
19
|
+
"""Initialize VideoGenerator.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
output_dir: Directory where output files will be written
|
23
|
+
cache_dir: Directory for temporary files
|
24
|
+
video_resolution: Tuple of (width, height) for video resolution
|
25
|
+
styles: Dictionary of output video & CDG styling configuration
|
26
|
+
logger: Optional logger instance
|
27
|
+
"""
|
28
|
+
if not all(x > 0 for x in video_resolution):
|
29
|
+
raise ValueError("Video resolution dimensions must be greater than 0")
|
30
|
+
|
31
|
+
self.output_dir = output_dir
|
32
|
+
self.cache_dir = cache_dir
|
33
|
+
self.video_resolution = video_resolution
|
34
|
+
self.logger = logger or logging.getLogger(__name__)
|
35
|
+
|
36
|
+
# Get background settings from styles, with defaults
|
37
|
+
karaoke_styles = styles.get("karaoke", {})
|
38
|
+
self.background_image = karaoke_styles.get("background_image")
|
39
|
+
self.background_color = karaoke_styles.get("background_color", "black")
|
40
|
+
|
41
|
+
# Validate background image if specified
|
42
|
+
if self.background_image and not os.path.isfile(self.background_image):
|
43
|
+
raise FileNotFoundError(f"Video background image not found: {self.background_image}")
|
44
|
+
|
45
|
+
def generate_video(self, ass_path: str, audio_path: str, output_prefix: str) -> str:
|
46
|
+
"""Generate MP4 video with lyrics overlay.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
ass_path: Path to ASS subtitles file
|
50
|
+
audio_path: Path to audio file
|
51
|
+
output_prefix: Prefix for output filename
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
Path to generated video file
|
55
|
+
"""
|
56
|
+
self.logger.info("Generating video with lyrics overlay")
|
57
|
+
output_path = self._get_output_path(f"{output_prefix} (With Vocals)", "mkv")
|
58
|
+
|
59
|
+
# Check input files exist before running FFmpeg
|
60
|
+
if not os.path.isfile(ass_path):
|
61
|
+
raise FileNotFoundError(f"Subtitles file not found: {ass_path}")
|
62
|
+
if not os.path.isfile(audio_path):
|
63
|
+
raise FileNotFoundError(f"Audio file not found: {audio_path}")
|
64
|
+
|
65
|
+
try:
|
66
|
+
# Create a temporary copy of the ASS file with a safe filename
|
67
|
+
temp_ass_path = os.path.join(self.cache_dir, "temp_subtitles.ass")
|
68
|
+
import shutil
|
69
|
+
|
70
|
+
shutil.copy2(ass_path, temp_ass_path)
|
71
|
+
self.logger.debug(f"Created temporary ASS file: {temp_ass_path}")
|
72
|
+
|
73
|
+
cmd = self._build_ffmpeg_command(temp_ass_path, audio_path, output_path)
|
74
|
+
self._run_ffmpeg_command(cmd)
|
75
|
+
self.logger.info(f"Video generated: {output_path}")
|
76
|
+
|
77
|
+
# Clean up temporary file
|
78
|
+
os.remove(temp_ass_path)
|
79
|
+
return output_path
|
80
|
+
|
81
|
+
except Exception as e:
|
82
|
+
self.logger.error(f"Failed to generate video: {str(e)}")
|
83
|
+
# Clean up temporary file in case of error
|
84
|
+
if "temp_ass_path" in locals():
|
85
|
+
try:
|
86
|
+
os.remove(temp_ass_path)
|
87
|
+
except:
|
88
|
+
pass
|
89
|
+
raise
|
90
|
+
|
91
|
+
def _get_output_path(self, output_prefix: str, extension: str) -> str:
|
92
|
+
"""Generate full output path for a file."""
|
93
|
+
return os.path.join(self.output_dir, f"{output_prefix}.{extension}")
|
94
|
+
|
95
|
+
def _resize_background_image(self, input_path: str) -> str:
|
96
|
+
"""Resize background image to match target resolution and save to temp file."""
|
97
|
+
target_width, target_height = self.video_resolution
|
98
|
+
|
99
|
+
# Get current image dimensions using ffprobe
|
100
|
+
try:
|
101
|
+
probe_cmd = [
|
102
|
+
"ffprobe",
|
103
|
+
"-v",
|
104
|
+
"error",
|
105
|
+
"-select_streams",
|
106
|
+
"v:0",
|
107
|
+
"-show_entries",
|
108
|
+
"stream=width,height",
|
109
|
+
"-of",
|
110
|
+
"json",
|
111
|
+
input_path,
|
112
|
+
]
|
113
|
+
probe_output = subprocess.check_output(probe_cmd, universal_newlines=True)
|
114
|
+
probe_data = json.loads(probe_output)
|
115
|
+
current_width = probe_data["streams"][0]["width"]
|
116
|
+
current_height = probe_data["streams"][0]["height"]
|
117
|
+
|
118
|
+
# If dimensions already match, return original path
|
119
|
+
if current_width == target_width and current_height == target_height:
|
120
|
+
self.logger.debug("Background image already at target resolution")
|
121
|
+
return input_path
|
122
|
+
|
123
|
+
except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
|
124
|
+
self.logger.warning(f"Failed to get image dimensions: {e}")
|
125
|
+
# Continue with resize attempt if probe fails
|
126
|
+
|
127
|
+
temp_path = os.path.join(self.cache_dir, "resized_background.png")
|
128
|
+
cmd = [
|
129
|
+
"ffmpeg",
|
130
|
+
"-y",
|
131
|
+
"-i",
|
132
|
+
input_path,
|
133
|
+
"-vf",
|
134
|
+
f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease,"
|
135
|
+
f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2",
|
136
|
+
temp_path,
|
137
|
+
]
|
138
|
+
|
139
|
+
try:
|
140
|
+
subprocess.check_output(cmd, stderr=subprocess.STDOUT, universal_newlines=True)
|
141
|
+
return temp_path
|
142
|
+
except subprocess.CalledProcessError as e:
|
143
|
+
self.logger.error(f"Failed to resize background image: {e.output}")
|
144
|
+
raise
|
145
|
+
|
146
|
+
def _build_ffmpeg_command(self, ass_path: str, audio_path: str, output_path: str) -> List[str]:
|
147
|
+
"""Build FFmpeg command for video generation with optimized settings."""
|
148
|
+
width, height = self.video_resolution
|
149
|
+
|
150
|
+
# fmt: off
|
151
|
+
cmd = [
|
152
|
+
"ffmpeg",
|
153
|
+
"-hide_banner",
|
154
|
+
"-loglevel", "error",
|
155
|
+
"-r", "30", # Set frame rate to 30 fps
|
156
|
+
]
|
157
|
+
|
158
|
+
# Input source (background)
|
159
|
+
if self.background_image:
|
160
|
+
# Resize background image first
|
161
|
+
resized_bg = self._resize_background_image(self.background_image)
|
162
|
+
self.logger.debug(f"Using resized background image: {resized_bg}")
|
163
|
+
cmd.extend([
|
164
|
+
"-loop", "1", # Loop the image
|
165
|
+
"-i", resized_bg,
|
166
|
+
])
|
167
|
+
else:
|
168
|
+
self.logger.debug(
|
169
|
+
f"Using solid {self.background_color} background "
|
170
|
+
f"with resolution: {width}x{height}"
|
171
|
+
)
|
172
|
+
cmd.extend([
|
173
|
+
"-f", "lavfi",
|
174
|
+
"-i", f"color=c={self.background_color}:s={width}x{height}:r=30"
|
175
|
+
])
|
176
|
+
|
177
|
+
# Add audio input and subtitle overlay
|
178
|
+
cmd.extend([
|
179
|
+
"-i", audio_path,
|
180
|
+
"-c:a", "flac", # Re-encode audio as FLAC
|
181
|
+
"-vf", f"ass={ass_path}", # Add subtitles
|
182
|
+
"-c:v", self._get_video_codec(),
|
183
|
+
# Video quality settings
|
184
|
+
"-preset", "slow", # Better compression efficiency
|
185
|
+
"-b:v", "5000k", # Base video bitrate
|
186
|
+
"-minrate", "5000k", # Minimum bitrate
|
187
|
+
"-maxrate", "20000k", # Maximum bitrate
|
188
|
+
"-bufsize", "10000k", # Buffer size (2x base rate)
|
189
|
+
"-shortest", # End encoding after shortest stream
|
190
|
+
"-y", # Overwrite output without asking
|
191
|
+
])
|
192
|
+
# fmt: on
|
193
|
+
|
194
|
+
# Add output path
|
195
|
+
cmd.append(output_path)
|
196
|
+
|
197
|
+
return cmd
|
198
|
+
|
199
|
+
def _get_video_codec(self) -> str:
|
200
|
+
"""Determine the best available video codec."""
|
201
|
+
try:
|
202
|
+
ffmpeg_codes = subprocess.getoutput("ffmpeg -codecs")
|
203
|
+
if "h264_videotoolbox" in ffmpeg_codes:
|
204
|
+
self.logger.info("Using hardware accelerated h264_videotoolbox")
|
205
|
+
return "h264_videotoolbox"
|
206
|
+
except Exception as e:
|
207
|
+
self.logger.warning(f"Error checking for hardware acceleration: {e}")
|
208
|
+
|
209
|
+
return "libx264"
|
210
|
+
|
211
|
+
def _run_ffmpeg_command(self, cmd: List[str]) -> None:
|
212
|
+
"""Execute FFmpeg command with output handling."""
|
213
|
+
self.logger.debug(f"Running FFmpeg command: {' '.join(cmd)}")
|
214
|
+
try:
|
215
|
+
output = subprocess.check_output(cmd, universal_newlines=True, stderr=subprocess.STDOUT)
|
216
|
+
# self.logger.debug(f"FFmpeg output: {output}")
|
217
|
+
except subprocess.CalledProcessError as e:
|
218
|
+
self.logger.error(f"FFmpeg error: {e.output}")
|
219
|
+
raise
|
@@ -0,0 +1 @@
|
|
1
|
+
from .server import start_review_server, complete_review
|
@@ -0,0 +1,138 @@
|
|
1
|
+
import logging
|
2
|
+
from fastapi import FastAPI, Body
|
3
|
+
from fastapi.middleware.cors import CORSMiddleware
|
4
|
+
from typing import Optional, Dict, Any
|
5
|
+
from ..types import CorrectionResult
|
6
|
+
import time
|
7
|
+
import subprocess
|
8
|
+
import os
|
9
|
+
import atexit
|
10
|
+
import urllib.parse
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
app = FastAPI()
|
15
|
+
|
16
|
+
# Configure CORS for development
|
17
|
+
app.add_middleware(
|
18
|
+
CORSMiddleware,
|
19
|
+
allow_origins=["http://localhost:5173"], # Vite's default dev server port
|
20
|
+
allow_credentials=True,
|
21
|
+
allow_methods=["*"],
|
22
|
+
allow_headers=["*"],
|
23
|
+
)
|
24
|
+
|
25
|
+
# Global state for the review process
|
26
|
+
current_review: Optional[CorrectionResult] = None
|
27
|
+
review_completed = False
|
28
|
+
vite_process: Optional[subprocess.Popen] = None
|
29
|
+
|
30
|
+
|
31
|
+
def start_vite_server():
|
32
|
+
"""Start the Vite development server."""
|
33
|
+
global vite_process
|
34
|
+
|
35
|
+
# Get the path to the lyrics-analyzer directory relative to this file
|
36
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
37
|
+
vite_dir = os.path.abspath(os.path.join(current_dir, "../../lyrics-analyzer"))
|
38
|
+
|
39
|
+
logger.info(f"Starting Vite dev server in {vite_dir}")
|
40
|
+
|
41
|
+
# Start the Vite dev server
|
42
|
+
vite_process = subprocess.Popen(["npm", "run", "dev"], cwd=vite_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
43
|
+
|
44
|
+
# Register cleanup function to kill Vite server on exit
|
45
|
+
atexit.register(lambda: vite_process.terminate() if vite_process else None)
|
46
|
+
|
47
|
+
# Wait a bit for the server to start
|
48
|
+
time.sleep(2) # Adjust this if needed
|
49
|
+
|
50
|
+
return vite_process
|
51
|
+
|
52
|
+
|
53
|
+
@app.get("/api/correction-data")
|
54
|
+
async def get_correction_data():
|
55
|
+
"""Get the current correction data for review."""
|
56
|
+
if current_review is None:
|
57
|
+
return {"error": "No review in progress"}
|
58
|
+
return current_review.to_dict()
|
59
|
+
|
60
|
+
|
61
|
+
@app.post("/api/complete")
|
62
|
+
async def complete_review(updated_data: Dict[str, Any] = Body(...)):
|
63
|
+
"""
|
64
|
+
Mark the review as complete and update the correction data.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
updated_data: The complete correction result data with any modifications
|
68
|
+
"""
|
69
|
+
global review_completed, current_review
|
70
|
+
|
71
|
+
logger.info("Received updated correction data")
|
72
|
+
|
73
|
+
try:
|
74
|
+
# Update the current review with modified data
|
75
|
+
# We use from_dict to ensure the data is properly structured
|
76
|
+
current_review = CorrectionResult.from_dict(updated_data)
|
77
|
+
logger.info(f"Successfully updated correction data with {len(current_review.corrections)} corrections")
|
78
|
+
|
79
|
+
review_completed = True
|
80
|
+
return {"status": "success"}
|
81
|
+
except Exception as e:
|
82
|
+
logger.error(f"Failed to update correction data: {str(e)}")
|
83
|
+
return {"status": "error", "message": str(e)}
|
84
|
+
|
85
|
+
|
86
|
+
def start_review_server(correction_result: CorrectionResult) -> CorrectionResult:
|
87
|
+
"""
|
88
|
+
Start the review server and wait for completion.
|
89
|
+
|
90
|
+
Args:
|
91
|
+
correction_result: The correction result to review
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
The potentially modified correction result after review
|
95
|
+
"""
|
96
|
+
import uvicorn
|
97
|
+
import webbrowser
|
98
|
+
from threading import Thread
|
99
|
+
|
100
|
+
global current_review, review_completed
|
101
|
+
current_review = correction_result
|
102
|
+
review_completed = False
|
103
|
+
|
104
|
+
logger.info("Starting review server...")
|
105
|
+
|
106
|
+
# Start Vite dev server
|
107
|
+
vite_proc = start_vite_server()
|
108
|
+
logger.info("Vite dev server started")
|
109
|
+
|
110
|
+
# Start FastAPI server in a separate thread
|
111
|
+
server_thread = Thread(target=uvicorn.run, args=(app,), kwargs={"host": "127.0.0.1", "port": 8000, "log_level": "info"}, daemon=True)
|
112
|
+
server_thread.start()
|
113
|
+
logger.info("Server thread started")
|
114
|
+
|
115
|
+
# Open browser
|
116
|
+
base_api_url = "http://localhost:8000/api"
|
117
|
+
encoded_api_url = urllib.parse.quote(base_api_url, safe="")
|
118
|
+
webbrowser.open(f"http://localhost:5173?baseApiUrl={encoded_api_url}")
|
119
|
+
logger.info("Opened browser for review")
|
120
|
+
|
121
|
+
# Wait for review to complete
|
122
|
+
start_time = time.time()
|
123
|
+
while not review_completed:
|
124
|
+
time.sleep(0.1)
|
125
|
+
# if time.time() - start_time > 600: # 10 minute timeout
|
126
|
+
# logger.error("Review timed out after 10 minutes")
|
127
|
+
# raise TimeoutError("Review did not complete within the expected time frame.")
|
128
|
+
|
129
|
+
# Clean up Vite server
|
130
|
+
if vite_proc:
|
131
|
+
vite_proc.terminate()
|
132
|
+
try:
|
133
|
+
vite_proc.wait(timeout=5)
|
134
|
+
except subprocess.TimeoutExpired:
|
135
|
+
vite_proc.kill()
|
136
|
+
|
137
|
+
logger.info("Review completed, returning results")
|
138
|
+
return current_review
|
@@ -4,7 +4,8 @@ import time
|
|
4
4
|
import os
|
5
5
|
from typing import Dict, Optional, Any, Union
|
6
6
|
from pathlib import Path
|
7
|
-
from .
|
7
|
+
from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
|
8
|
+
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
|
8
9
|
|
9
10
|
|
10
11
|
@dataclass
|
@@ -182,7 +183,7 @@ class AudioShakeTranscriber(BaseTranscriber):
|
|
182
183
|
for line in transcription_data.get("lines", []):
|
183
184
|
words = [
|
184
185
|
Word(
|
185
|
-
text=word["text"],
|
186
|
+
text=word["text"].strip(" "),
|
186
187
|
start_time=word.get("start", 0.0),
|
187
188
|
end_time=word.get("end", 0.0),
|
188
189
|
)
|
@@ -1,61 +1,24 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
|
-
from
|
3
|
-
from typing import Dict, Any, Optional, Protocol, List, Union
|
2
|
+
from typing import Dict, Any, Optional, Union
|
4
3
|
from pathlib import Path
|
5
4
|
import logging
|
6
5
|
import os
|
7
6
|
import json
|
8
7
|
import hashlib
|
9
|
-
from lyrics_transcriber.
|
10
|
-
|
11
|
-
|
12
|
-
@dataclass
|
13
|
-
class TranscriptionData:
|
14
|
-
"""Structured container for transcription results."""
|
15
|
-
|
16
|
-
segments: List[LyricsSegment]
|
17
|
-
words: List[Word]
|
18
|
-
text: str
|
19
|
-
source: str # e.g., "whisper", "audioshake"
|
20
|
-
metadata: Optional[Dict[str, Any]] = None
|
21
|
-
|
22
|
-
def to_dict(self) -> Dict[str, Any]:
|
23
|
-
"""Convert TranscriptionData to dictionary for JSON serialization."""
|
24
|
-
return {
|
25
|
-
"segments": [segment.to_dict() for segment in self.segments],
|
26
|
-
"words": [word.to_dict() for word in self.words],
|
27
|
-
"text": self.text,
|
28
|
-
"source": self.source,
|
29
|
-
"metadata": self.metadata,
|
30
|
-
}
|
31
|
-
|
32
|
-
|
33
|
-
@dataclass
|
34
|
-
class TranscriptionResult:
|
35
|
-
name: str
|
36
|
-
priority: int
|
37
|
-
result: TranscriptionData
|
38
|
-
|
39
|
-
|
40
|
-
class LoggerProtocol(Protocol):
|
41
|
-
"""Protocol for logger interface."""
|
42
|
-
|
43
|
-
def debug(self, msg: str) -> None: ...
|
44
|
-
def info(self, msg: str) -> None: ...
|
45
|
-
def warning(self, msg: str) -> None: ...
|
46
|
-
def error(self, msg: str) -> None: ...
|
8
|
+
from lyrics_transcriber.types import TranscriptionData
|
47
9
|
|
48
10
|
|
49
11
|
class TranscriptionError(Exception):
|
50
12
|
"""Base exception for transcription errors."""
|
51
13
|
|
52
|
-
|
14
|
+
def __init__(self, message: str):
|
15
|
+
super().__init__(message)
|
53
16
|
|
54
17
|
|
55
18
|
class BaseTranscriber(ABC):
|
56
19
|
"""Base class for all transcription services."""
|
57
20
|
|
58
|
-
def __init__(self, cache_dir: Union[str, Path], logger: Optional[
|
21
|
+
def __init__(self, cache_dir: Union[str, Path], logger: Optional[logging.Logger] = None):
|
59
22
|
"""
|
60
23
|
Initialize transcriber with cache directory and logger.
|
61
24
|
|
@@ -9,7 +9,8 @@ import time
|
|
9
9
|
from typing import Optional, Dict, Any, Protocol, Union
|
10
10
|
from pathlib import Path
|
11
11
|
from pydub import AudioSegment
|
12
|
-
from .
|
12
|
+
from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
|
13
|
+
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
|
13
14
|
|
14
15
|
|
15
16
|
@dataclass
|
@@ -194,7 +195,7 @@ class WhisperTranscriber(BaseTranscriber):
|
|
194
195
|
|
195
196
|
def _initialize_storage(self) -> FileStorageProtocol:
|
196
197
|
"""Initialize storage client."""
|
197
|
-
from
|
198
|
+
from lyrics_transcriber.storage.dropbox import DropboxHandler, DropboxConfig
|
198
199
|
|
199
200
|
# Create config using os.getenv directly
|
200
201
|
config = DropboxConfig(
|
@@ -313,8 +314,6 @@ class WhisperTranscriber(BaseTranscriber):
|
|
313
314
|
|
314
315
|
def _validate_response(self, raw_data: Dict[str, Any]) -> None:
|
315
316
|
"""Validate the response contains required fields."""
|
316
|
-
if not isinstance(raw_data, dict):
|
317
|
-
raise TranscriptionError(f"Invalid response format: {raw_data}")
|
318
317
|
if "segments" not in raw_data:
|
319
318
|
raise TranscriptionError("Response missing required 'segments' field")
|
320
319
|
if "transcription" not in raw_data:
|