lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,134 +1,60 @@
1
1
  #! /usr/bin/env python3
2
+ from dataclasses import dataclass
2
3
  import os
3
- import sys
4
4
  import json
5
5
  import requests
6
6
  import hashlib
7
7
  import tempfile
8
- from time import sleep
8
+ import time
9
+ from typing import Optional, Dict, Any, Protocol, Union
10
+ from pathlib import Path
9
11
  from pydub import AudioSegment
10
- from .base import BaseTranscriber
11
- from ..storage.dropbox import DropboxHandler
12
+ from .base_transcriber import BaseTranscriber, TranscriptionData, LyricsSegment, Word, TranscriptionError
12
13
 
13
14
 
14
- class WhisperTranscriber(BaseTranscriber):
15
- """Transcription service using Whisper API via RunPod."""
15
+ @dataclass
16
+ class WhisperConfig:
17
+ """Configuration for Whisper transcription service."""
16
18
 
17
- def __init__(
18
- self,
19
- logger=None,
20
- runpod_api_key=None,
21
- endpoint_id=None,
22
- dropbox_app_key=None,
23
- dropbox_app_secret=None,
24
- dropbox_refresh_token=None,
25
- dropbox_access_token=None,
26
- ):
27
- super().__init__(logger)
28
- self.runpod_api_key = runpod_api_key or os.getenv("RUNPOD_API_KEY")
29
- self.endpoint_id = endpoint_id or os.getenv("WHISPER_RUNPOD_ID")
30
-
31
- if not self.runpod_api_key or not self.endpoint_id:
32
- raise ValueError("RunPod API key and endpoint ID must be provided either directly or via environment variables")
33
-
34
- self.dbx = DropboxHandler(
35
- app_key=dropbox_app_key or os.getenv("WHISPER_DROPBOX_APP_KEY"),
36
- app_secret=dropbox_app_secret or os.getenv("WHISPER_DROPBOX_APP_SECRET"),
37
- refresh_token=dropbox_refresh_token or os.getenv("WHISPER_DROPBOX_REFRESH_TOKEN"),
38
- access_token=dropbox_access_token or os.getenv("WHISPER_DROPBOX_ACCESS_TOKEN"),
39
- )
19
+ runpod_api_key: Optional[str] = None
20
+ endpoint_id: Optional[str] = None
21
+ dropbox_app_key: Optional[str] = None
22
+ dropbox_app_secret: Optional[str] = None
23
+ dropbox_refresh_token: Optional[str] = None
24
+ timeout_minutes: int = 10
40
25
 
41
- def get_name(self) -> str:
42
- return "Whisper"
43
26
 
44
- def transcribe(self, audio_filepath: str) -> dict:
45
- """
46
- Transcribe an audio file using Whisper API via RunPod.
27
+ class FileStorageProtocol(Protocol):
28
+ """Protocol for file storage operations."""
47
29
 
48
- Args:
49
- audio_filepath: Path to the audio file to transcribe
30
+ def file_exists(self, path: str) -> bool: ... # pragma: no cover
31
+ def upload_with_retry(self, file: Any, path: str) -> None: ... # pragma: no cover
32
+ def create_or_get_shared_link(self, path: str) -> str: ... # pragma: no cover
50
33
 
51
- Returns:
52
- Dict containing:
53
- - segments: List of segments with start/end times and word-level data
54
- - text: Full text transcription
55
- - metadata: Dict of additional info
56
- """
57
- self.logger.info(f"Starting transcription for {audio_filepath} using Whisper API")
58
34
 
59
- # Calculate MD5 hash and prepare file
60
- file_hash = self._get_file_md5(audio_filepath)
61
- processed_filepath = self._convert_to_flac(audio_filepath)
35
+ class RunPodWhisperAPI:
36
+ """Handles interactions with RunPod API."""
62
37
 
63
- try:
64
- # Upload to Dropbox and get URL
65
- dropbox_path = f"/transcription_temp/{file_hash}{os.path.splitext(processed_filepath)[1]}"
66
- audio_url = self._upload_and_get_link(processed_filepath, dropbox_path)
67
-
68
- # Get transcription from API
69
- result = self._run_transcription(audio_url)
70
-
71
- # Add metadata
72
- result["metadata"] = {
73
- "service": self.get_name(),
74
- "model": "large-v2",
75
- "language": "en",
76
- }
38
+ def __init__(self, config: WhisperConfig, logger):
39
+ self.config = config
40
+ self.logger = logger
41
+ self._validate_config()
77
42
 
78
- return result
43
+ def _validate_config(self) -> None:
44
+ """Validate API configuration."""
45
+ if not self.config.runpod_api_key or not self.config.endpoint_id:
46
+ raise ValueError("RunPod API key and endpoint ID must be provided")
79
47
 
80
- finally:
81
- # Clean up temporary FLAC file if one was created
82
- if processed_filepath != audio_filepath:
83
- self.logger.debug(f"Cleaning up temporary file: {processed_filepath}")
84
- os.unlink(processed_filepath)
85
-
86
- def _convert_to_flac(self, filepath: str) -> str:
87
- """Convert WAV to FLAC if needed for faster upload."""
88
- if not filepath.lower().endswith(".wav"):
89
- return filepath
90
-
91
- self.logger.info("Converting WAV to FLAC for faster upload...")
92
- audio = AudioSegment.from_wav(filepath)
93
-
94
- with tempfile.NamedTemporaryFile(suffix=".flac", delete=False) as temp_flac:
95
- flac_path = temp_flac.name
96
- audio.export(flac_path, format="flac")
97
-
98
- return flac_path
99
-
100
- def _get_file_md5(self, filepath: str) -> str:
101
- """Calculate MD5 hash of a file."""
102
- md5_hash = hashlib.md5()
103
- with open(filepath, "rb") as f:
104
- for chunk in iter(lambda: f.read(4096), b""):
105
- md5_hash.update(chunk)
106
- return md5_hash.hexdigest()
107
-
108
- def _upload_and_get_link(self, filepath: str, dropbox_path: str) -> str:
109
- """Upload file to Dropbox and return shared link."""
110
- if not self.dbx.file_exists(dropbox_path):
111
- self.logger.info("Uploading file to Dropbox...")
112
- with open(filepath, "rb") as f:
113
- self.dbx.upload_with_retry(f, dropbox_path)
114
- else:
115
- self.logger.info("File already exists in Dropbox, skipping upload...")
116
-
117
- audio_url = self.dbx.create_or_get_shared_link(dropbox_path)
118
- self.logger.debug(f"Using shared link: {audio_url}")
119
- return audio_url
120
-
121
- def _run_transcription(self, audio_url: str) -> dict:
122
- """Submit transcription job to RunPod and get results."""
123
- run_url = f"https://api.runpod.ai/v2/{self.endpoint_id}/run"
124
- status_url = f"https://api.runpod.ai/v2/{self.endpoint_id}/status"
125
- headers = {"Authorization": f"Bearer {self.runpod_api_key}"}
48
+ def submit_job(self, audio_url: str) -> str:
49
+ """Submit transcription job and return job ID."""
50
+ run_url = f"https://api.runpod.ai/v2/{self.config.endpoint_id}/run"
51
+ headers = {"Authorization": f"Bearer {self.config.runpod_api_key}"}
126
52
 
127
53
  payload = {
128
54
  "input": {
129
55
  "audio": audio_url,
130
56
  "word_timestamps": True,
131
- "model": "large-v2",
57
+ "model": "medium",
132
58
  "temperature": 0.2,
133
59
  "best_of": 5,
134
60
  "compression_ratio_threshold": 2.8,
@@ -138,49 +64,258 @@ class WhisperTranscriber(BaseTranscriber):
138
64
  }
139
65
  }
140
66
 
141
- # Submit job
142
67
  self.logger.info("Submitting transcription job...")
143
68
  response = requests.post(run_url, json=payload, headers=headers)
144
69
 
145
70
  self.logger.debug(f"Response status code: {response.status_code}")
71
+
72
+ # Try to parse and log the JSON response
146
73
  try:
147
- self.logger.debug(f"Response content: {json.dumps(response.json(), indent=2)}")
148
- except:
74
+ response_json = response.json()
75
+ self.logger.debug(f"Response content: {json.dumps(response_json, indent=2)}")
76
+ except ValueError:
149
77
  self.logger.debug(f"Raw response content: {response.text}")
78
+ # Re-raise if we can't parse the response at all
79
+ raise TranscriptionError(f"Invalid JSON response: {response.text}")
80
+
81
+ response.raise_for_status()
82
+ return response_json["id"]
83
+
84
+ def get_job_status(self, job_id: str) -> Dict[str, Any]:
85
+ """Get job status and results."""
86
+ status_url = f"https://api.runpod.ai/v2/{self.config.endpoint_id}/status/{job_id}"
87
+ headers = {"Authorization": f"Bearer {self.config.runpod_api_key}"}
150
88
 
89
+ response = requests.get(status_url, headers=headers)
151
90
  response.raise_for_status()
152
- job_id = response.json()["id"]
91
+ return response.json()
92
+
93
+ def cancel_job(self, job_id: str) -> None:
94
+ """Cancel a running job."""
95
+ cancel_url = f"https://api.runpod.ai/v2/{self.config.endpoint_id}/cancel/{job_id}"
96
+ headers = {"Authorization": f"Bearer {self.config.runpod_api_key}"}
97
+
98
+ try:
99
+ response = requests.post(cancel_url, headers=headers)
100
+ response.raise_for_status()
101
+ except Exception as e:
102
+ self.logger.warning(f"Failed to cancel job {job_id}: {e}")
103
+
104
+ def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
105
+ """Poll for job completion and return results."""
106
+ self.logger.info(f"Getting job result for job {job_id}")
107
+
108
+ start_time = time.time()
109
+ last_status_log = start_time
110
+ timeout_seconds = self.config.timeout_minutes * 60
153
111
 
154
- # Poll for results
155
- self.logger.info("Waiting for results...")
156
112
  while True:
157
- status_response = requests.get(f"{status_url}/{job_id}", headers=headers)
158
- status_response.raise_for_status()
159
- status_data = status_response.json()
113
+ current_time = time.time()
114
+ elapsed_time = current_time - start_time
115
+
116
+ if elapsed_time > timeout_seconds:
117
+ self.cancel_job(job_id)
118
+ raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
119
+
120
+ # Log status periodically
121
+ if current_time - last_status_log >= 60:
122
+ self.logger.info(f"Still waiting for transcription... Elapsed time: {int(elapsed_time/60)} minutes")
123
+ last_status_log = current_time
124
+
125
+ status_data = self.get_job_status(job_id)
160
126
 
161
127
  if status_data["status"] == "COMPLETED":
162
128
  return status_data["output"]
163
129
  elif status_data["status"] == "FAILED":
164
- raise Exception(f"Transcription failed: {status_data.get('error', 'Unknown error')}")
130
+ error_msg = status_data.get("error", "Unknown error")
131
+ self.logger.error(f"Job failed with error: {error_msg}")
132
+ raise TranscriptionError(f"Transcription failed: {error_msg}")
133
+
134
+ time.sleep(5)
135
+
136
+
137
+ class AudioProcessor:
138
+ """Handles audio file processing."""
139
+
140
+ def __init__(self, logger):
141
+ self.logger = logger
142
+
143
+ def get_file_md5(self, filepath: str) -> str:
144
+ """Calculate MD5 hash of a file."""
145
+ md5_hash = hashlib.md5()
146
+ with open(filepath, "rb") as f:
147
+ for chunk in iter(lambda: f.read(4096), b""):
148
+ md5_hash.update(chunk)
149
+ return md5_hash.hexdigest()
150
+
151
+ def convert_to_flac(self, filepath: str) -> str:
152
+ """Convert WAV to FLAC if needed for faster upload."""
153
+ if not filepath.lower().endswith(".wav"):
154
+ return filepath
155
+
156
+ self.logger.info("Converting WAV to FLAC for faster upload...")
157
+ audio = AudioSegment.from_wav(filepath)
158
+
159
+ with tempfile.NamedTemporaryFile(suffix=".flac", delete=False) as temp_flac:
160
+ flac_path = temp_flac.name
161
+ audio.export(flac_path, format="flac")
162
+
163
+ return flac_path
164
+
165
+
166
+ class WhisperTranscriber(BaseTranscriber):
167
+ """Transcription service using Whisper API via RunPod."""
168
+
169
+ def __init__(
170
+ self,
171
+ cache_dir: Union[str, Path],
172
+ config: Optional[WhisperConfig] = None,
173
+ logger: Optional[Any] = None,
174
+ runpod_client: Optional[RunPodWhisperAPI] = None,
175
+ storage_client: Optional[FileStorageProtocol] = None,
176
+ audio_processor: Optional[AudioProcessor] = None,
177
+ ):
178
+ """Initialize Whisper transcriber."""
179
+ super().__init__(cache_dir=cache_dir, logger=logger)
180
+
181
+ # Initialize configuration
182
+ self.config = config or WhisperConfig(
183
+ runpod_api_key=os.getenv("RUNPOD_API_KEY"),
184
+ endpoint_id=os.getenv("WHISPER_RUNPOD_ID"),
185
+ dropbox_app_key=os.getenv("WHISPER_DROPBOX_APP_KEY"),
186
+ dropbox_app_secret=os.getenv("WHISPER_DROPBOX_APP_SECRET"),
187
+ dropbox_refresh_token=os.getenv("WHISPER_DROPBOX_REFRESH_TOKEN"),
188
+ )
189
+
190
+ # Initialize components (with dependency injection)
191
+ self.runpod = runpod_client or RunPodWhisperAPI(self.config, self.logger)
192
+ self.storage = storage_client or self._initialize_storage()
193
+ self.audio_processor = audio_processor or AudioProcessor(self.logger)
194
+
195
+ def _initialize_storage(self) -> FileStorageProtocol:
196
+ """Initialize storage client."""
197
+ from ..storage.dropbox import DropboxHandler, DropboxConfig
165
198
 
166
- sleep(2) # Wait 2 seconds before checking again
199
+ # Create config using os.getenv directly
200
+ config = DropboxConfig(
201
+ app_key=os.getenv("WHISPER_DROPBOX_APP_KEY"),
202
+ app_secret=os.getenv("WHISPER_DROPBOX_APP_SECRET"),
203
+ refresh_token=os.getenv("WHISPER_DROPBOX_REFRESH_TOKEN"),
204
+ )
205
+
206
+ # Log the actual config values being used
207
+ self.logger.debug("Initializing DropboxHandler with config")
208
+ return DropboxHandler(config=config)
209
+
210
+ def get_name(self) -> str:
211
+ return "Whisper"
167
212
 
213
+ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
214
+ """Actually perform the whisper transcription using Whisper API."""
215
+ self.logger.info(f"Starting transcription for {audio_filepath}")
168
216
 
169
- if __name__ == "__main__":
170
- # Example usage
171
- import logging
217
+ # Start transcription and get results
218
+ job_id = self.start_transcription(audio_filepath)
219
+ result = self.get_transcription_result(job_id)
220
+ return result
172
221
 
173
- logging.basicConfig(level=logging.INFO)
222
+ def start_transcription(self, audio_filepath: str) -> str:
223
+ """Prepare audio and start whisper transcription job."""
224
+ audio_url, temp_filepath = self._prepare_audio_url(audio_filepath)
225
+ try:
226
+ return self.runpod.submit_job(audio_url)
227
+ except Exception as e:
228
+ if temp_filepath:
229
+ self._cleanup_temporary_files(temp_filepath)
230
+ raise TranscriptionError(f"Failed to submit job: {str(e)}") from e
231
+
232
+ def _prepare_audio_url(self, audio_filepath: str) -> tuple[str, Optional[str]]:
233
+ """Process audio file and return URL for API and path to any temporary files."""
234
+ if audio_filepath.startswith(("http://", "https://")):
235
+ return audio_filepath, None
236
+
237
+ file_hash = self.audio_processor.get_file_md5(audio_filepath)
238
+ temp_flac_filepath = self.audio_processor.convert_to_flac(audio_filepath)
239
+
240
+ # Upload and get URL
241
+ dropbox_path = f"/transcription_temp/{file_hash}{os.path.splitext(temp_flac_filepath)[1]}"
242
+ url = self._upload_and_get_link(temp_flac_filepath, dropbox_path)
243
+ return url, temp_flac_filepath
244
+
245
+ def get_transcription_result(self, job_id: str) -> Dict[str, Any]:
246
+ """Poll for whisper job completion and return raw results."""
247
+ raw_data = self.runpod.wait_for_job_result(job_id)
248
+
249
+ # Add job_id to raw data for later use
250
+ raw_data["job_id"] = job_id
251
+
252
+ return raw_data
253
+
254
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
255
+ """Convert Whisper API response to standard format."""
256
+ self._validate_response(raw_data)
257
+
258
+ job_id = raw_data.get("job_id")
259
+ all_words = []
260
+
261
+ # First collect all words from word_timestamps
262
+ word_list = [
263
+ Word(
264
+ text=word["word"].strip(),
265
+ start_time=word["start"],
266
+ end_time=word["end"],
267
+ confidence=word.get("probability"), # Only set if provided
268
+ )
269
+ for word in raw_data.get("word_timestamps", [])
270
+ ]
271
+ all_words.extend(word_list)
272
+
273
+ # Then create segments, using the words that fall within each segment's time range
274
+ segments = []
275
+ for seg in raw_data["segments"]:
276
+ segment_words = [word for word in word_list if seg["start"] <= word.start_time < seg["end"]]
277
+ segments.append(LyricsSegment(text=seg["text"].strip(), words=segment_words, start_time=seg["start"], end_time=seg["end"]))
278
+
279
+ return TranscriptionData(
280
+ segments=segments,
281
+ words=all_words,
282
+ text=raw_data["transcription"],
283
+ source=self.get_name(),
284
+ metadata={
285
+ "language": raw_data.get("detected_language", "en"),
286
+ "model": raw_data.get("model"),
287
+ "job_id": job_id,
288
+ },
289
+ )
174
290
 
175
- if len(sys.argv) > 1:
176
- audio_file = sys.argv[1]
177
- else:
178
- audio_file = input("Enter the path to your audio file: ")
291
+ def _upload_and_get_link(self, filepath: str, dropbox_path: str) -> str:
292
+ """Upload file to storage and return shared link."""
293
+ if not self.storage.file_exists(dropbox_path):
294
+ self.logger.info("Uploading file to storage...")
295
+ with open(filepath, "rb") as f:
296
+ self.storage.upload_with_retry(f, dropbox_path)
297
+ else:
298
+ self.logger.info("File already exists in storage, skipping upload...")
179
299
 
180
- transcriber = WhisperTranscriber()
181
- results = transcriber.transcribe(audio_file)
300
+ audio_url = self.storage.create_or_get_shared_link(dropbox_path)
301
+ self.logger.debug(f"Using shared link: {audio_url}")
302
+ return audio_url
182
303
 
183
- output_file = f"transcription_results_{WhisperTranscriber._get_file_md5(audio_file)}.json"
184
- with open(output_file, "w", encoding="utf-8") as f:
185
- json.dump(results, f, indent=2)
186
- print(f"Transcription completed! Results saved to {output_file}")
304
+ def _cleanup_temporary_files(self, *filepaths: Optional[str]) -> None:
305
+ """Clean up any temporary files that were created during transcription."""
306
+ for filepath in filepaths:
307
+ if filepath and os.path.exists(filepath):
308
+ try:
309
+ os.remove(filepath)
310
+ self.logger.debug(f"Cleaned up temporary file: {filepath}")
311
+ except Exception as e:
312
+ self.logger.warning(f"Failed to clean up temporary file {filepath}: {e}")
313
+
314
+ def _validate_response(self, raw_data: Dict[str, Any]) -> None:
315
+ """Validate the response contains required fields."""
316
+ if not isinstance(raw_data, dict):
317
+ raise TranscriptionError(f"Invalid response format: {raw_data}")
318
+ if "segments" not in raw_data:
319
+ raise TranscriptionError("Response missing required 'segments' field")
320
+ if "transcription" not in raw_data:
321
+ raise TranscriptionError("Response missing required 'transcription' field")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lyrics-transcriber
3
- Version: 0.30.0
3
+ Version: 0.30.1
4
4
  Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
5
5
  Home-page: https://github.com/karaokenerds/python-lyrics-transcriber
6
6
  License: MIT
@@ -0,0 +1,25 @@
1
+ lyrics_transcriber/__init__.py,sha256=Hj2HdSBAl6kmiqa5s3MDo_RobkITadzuF-81-ON3awA,180
2
+ lyrics_transcriber/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ lyrics_transcriber/cli/cli_main.py,sha256=-h3W9E4P5lHEjIBWiDvY0v7avldhA-cfYoAVwMlv0Zo,8137
4
+ lyrics_transcriber/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ lyrics_transcriber/core/controller.py,sha256=k_moklU2NqpHOGxWTRVyImWgX6_dv1NES0j50-FRGxw,13057
6
+ lyrics_transcriber/correction/base_strategy.py,sha256=vEKsj19ZNZZkvHRP0J7cZamJWqjLZHbRJ9sN0AyHbAA,867
7
+ lyrics_transcriber/correction/corrector.py,sha256=lsXJ1l5sNoZjIU65A3yWTXkOcraz7QP9KU8OUzA_UTc,2147
8
+ lyrics_transcriber/correction/strategy_diff.py,sha256=xJTFnmVcuE18zZcitweVaRqB82jCMm9Ey29zAFB4LsI,10188
9
+ lyrics_transcriber/lyrics/base_lyrics_provider.py,sha256=s5IDrlT6OudAA_gIlAQzeD0bPqoUFsiYftSQQm7XxOE,7518
10
+ lyrics_transcriber/lyrics/genius.py,sha256=zDiv0t2f7wphnPdcyPH6tahXBfOnbE63Nu8eRG0nqg4,3195
11
+ lyrics_transcriber/lyrics/spotify.py,sha256=Sic3nPFcpSWW7lE-yr3stb6D5m5WFSQXCwzWj3lW0Ls,3584
12
+ lyrics_transcriber/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ lyrics_transcriber/output/ass.py,sha256=b8lnjgXGD1OD1ld_b1xxUmSOf4nSEfz9BpgSkh16R4g,90291
14
+ lyrics_transcriber/output/generator.py,sha256=idUsuS01bnaIB5spDFZlxE0wsvJ2I071SmJfXO9BCCk,10870
15
+ lyrics_transcriber/output/subtitles.py,sha256=JEehSPl81hxhK6cS6RK4XAC_OLentCxiMCE7UYI9B64,11851
16
+ lyrics_transcriber/storage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ lyrics_transcriber/storage/dropbox.py,sha256=Dyam1ULTkoxD1X5trkZ5dGp5XhBGCn998moC8IS9-68,9804
18
+ lyrics_transcriber/transcribers/audioshake.py,sha256=0sXvD1FJYXxISH72n5HaN9fnTxgmaQrqmY1W5Lb6Yu8,8631
19
+ lyrics_transcriber/transcribers/base_transcriber.py,sha256=9XWUlBSwBCjKvz7Gs1NT7EIysMyacS-YlvDjpwlqwgI,6985
20
+ lyrics_transcriber/transcribers/whisper.py,sha256=QE9Dsb6emGOaFcepJHrECjVdCfAJZRncGj7uXy-0mAk,12942
21
+ lyrics_transcriber-0.30.1.dist-info/LICENSE,sha256=BiPihPDxhxIPEx6yAxVfAljD5Bhm_XG2teCbPEj_m0Y,1069
22
+ lyrics_transcriber-0.30.1.dist-info/METADATA,sha256=c6P3R-KVxCJ10m-92bezeetdztdB7vvv5RMlTnF4Xbg,5485
23
+ lyrics_transcriber-0.30.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
24
+ lyrics_transcriber-0.30.1.dist-info/entry_points.txt,sha256=KHZMIwodpv7TQUN9z28G-0knEFsRta9ZBAcIbmBAT40,75
25
+ lyrics_transcriber-0.30.1.dist-info/RECORD,,
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ lyrics-transcriber=lyrics_transcriber.cli.cli_main:main
3
+
@@ -1,56 +0,0 @@
1
- import json
2
- import logging
3
- from typing import Dict, Optional
4
-
5
-
6
- class LyricsTranscriptionCorrector:
7
- def __init__(
8
- self,
9
- logger: Optional[logging.Logger] = None,
10
- ):
11
- self.logger = logger or logging.getLogger(__name__)
12
-
13
- # Initialize instance variables for input data
14
- self.spotify_lyrics_data_dict = None
15
- self.spotify_lyrics_text = None
16
- self.genius_lyrics_text = None
17
- self.transcription_data_dict_whisper = None
18
- self.transcription_data_dict_audioshake = None
19
-
20
- def set_input_data(
21
- self,
22
- spotify_lyrics_data_dict: Optional[Dict] = None,
23
- spotify_lyrics_text: Optional[str] = None,
24
- genius_lyrics_text: Optional[str] = None,
25
- transcription_data_dict_whisper: Optional[Dict] = None,
26
- transcription_data_dict_audioshake: Optional[Dict] = None,
27
- ) -> None:
28
- """Store the input data as instance variables"""
29
- self.spotify_lyrics_data_dict = spotify_lyrics_data_dict
30
- self.spotify_lyrics_text = spotify_lyrics_text
31
- self.genius_lyrics_text = genius_lyrics_text
32
- self.transcription_data_dict_whisper = transcription_data_dict_whisper
33
- self.transcription_data_dict_audioshake = transcription_data_dict_audioshake
34
-
35
- def run_corrector(self) -> Dict:
36
- """
37
- Test implementation that replaces every third word with 'YOLO' in the AudioShake transcription.
38
- """
39
- self.logger.info("Running corrector (test implementation - replacing every 3rd word with YOLO)")
40
-
41
- # Create a deep copy to avoid modifying the original
42
- modified_data = json.loads(json.dumps(self.transcription_data_dict_audioshake))
43
-
44
- # Process each segment
45
- for segment in modified_data["segments"]:
46
- # Replace every third word in the words list
47
- for i in range(2, len(segment["words"]), 3):
48
- segment["words"][i]["text"] = "YOLO"
49
-
50
- # Reconstruct the segment text from the modified words
51
- segment["text"] = " ".join(word["text"] for word in segment["words"])
52
-
53
- # Reconstruct the full text from all segments
54
- modified_data["text"] = "".join(segment["text"] for segment in modified_data["segments"])
55
-
56
- return modified_data