lyrics-transcriber 0.20.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. lyrics_transcriber/__init__.py +2 -5
  2. lyrics_transcriber/cli/cli_main.py +206 -0
  3. lyrics_transcriber/core/__init__.py +0 -0
  4. lyrics_transcriber/core/controller.py +317 -0
  5. lyrics_transcriber/correction/base_strategy.py +29 -0
  6. lyrics_transcriber/correction/corrector.py +52 -0
  7. lyrics_transcriber/correction/strategy_diff.py +263 -0
  8. lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
  9. lyrics_transcriber/lyrics/genius.py +70 -0
  10. lyrics_transcriber/lyrics/spotify.py +82 -0
  11. lyrics_transcriber/output/__init__.py +0 -0
  12. lyrics_transcriber/output/generator.py +271 -0
  13. lyrics_transcriber/{utils → output}/subtitles.py +12 -12
  14. lyrics_transcriber/storage/__init__.py +0 -0
  15. lyrics_transcriber/storage/dropbox.py +225 -0
  16. lyrics_transcriber/transcribers/audioshake.py +216 -0
  17. lyrics_transcriber/transcribers/base_transcriber.py +186 -0
  18. lyrics_transcriber/transcribers/whisper.py +321 -0
  19. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +5 -16
  20. lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
  21. lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
  22. lyrics_transcriber/audioshake_transcriber.py +0 -122
  23. lyrics_transcriber/corrector.py +0 -57
  24. lyrics_transcriber/llm_prompts/README.md +0 -10
  25. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
  26. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
  27. lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
  28. lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
  29. lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
  30. lyrics_transcriber/transcriber.py +0 -934
  31. lyrics_transcriber/utils/cli.py +0 -179
  32. lyrics_transcriber-0.20.0.dist-info/RECORD +0 -19
  33. lyrics_transcriber-0.20.0.dist-info/entry_points.txt +0 -3
  34. /lyrics_transcriber/{utils → cli}/__init__.py +0 -0
  35. /lyrics_transcriber/{utils → output}/ass.py +0 -0
  36. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
  37. {lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,216 @@
1
+ from dataclasses import dataclass
2
+ import requests
3
+ import time
4
+ import os
5
+ from typing import Dict, Optional, Any, Union
6
+ from pathlib import Path
7
+ from .base_transcriber import BaseTranscriber, TranscriptionData, LyricsSegment, Word, TranscriptionError
8
+
9
+
10
+ @dataclass
11
+ class AudioShakeConfig:
12
+ """Configuration for AudioShake transcription service."""
13
+
14
+ api_token: Optional[str] = None
15
+ base_url: str = "https://groovy.audioshake.ai"
16
+ output_prefix: Optional[str] = None
17
+ timeout_minutes: int = 10 # Added timeout configuration
18
+
19
+
20
+ class AudioShakeAPI:
21
+ """Handles direct API interactions with AudioShake."""
22
+
23
+ def __init__(self, config: AudioShakeConfig, logger):
24
+ self.config = config
25
+ self.logger = logger
26
+
27
+ def _validate_config(self) -> None:
28
+ """Validate API configuration."""
29
+ if not self.config.api_token:
30
+ raise ValueError("AudioShake API token must be provided")
31
+
32
+ def _get_headers(self) -> Dict[str, str]:
33
+ """Get headers for API requests."""
34
+ self._validate_config() # Validate before making any API calls
35
+ return {"Authorization": f"Bearer {self.config.api_token}", "Content-Type": "application/json"}
36
+
37
+ def upload_file(self, filepath: str) -> str:
38
+ """Upload audio file and return asset ID."""
39
+ self.logger.info(f"Uploading {filepath} to AudioShake")
40
+ self._validate_config() # Validate before making API call
41
+
42
+ url = f"{self.config.base_url}/upload"
43
+ with open(filepath, "rb") as file:
44
+ files = {"file": (os.path.basename(filepath), file)}
45
+ response = requests.post(url, headers={"Authorization": self._get_headers()["Authorization"]}, files=files)
46
+
47
+ self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
48
+ response.raise_for_status()
49
+ return response.json()["id"]
50
+
51
+ def create_job(self, asset_id: str) -> str:
52
+ """Create transcription job and return job ID."""
53
+ self.logger.info(f"Creating job for asset {asset_id}")
54
+
55
+ url = f"{self.config.base_url}/job/"
56
+ data = {
57
+ "metadata": {"format": "json", "name": "alignment", "language": "en"},
58
+ "callbackUrl": "https://example.com/webhook/alignment",
59
+ "assetId": asset_id,
60
+ }
61
+ response = requests.post(url, headers=self._get_headers(), json=data)
62
+ response.raise_for_status()
63
+ return response.json()["job"]["id"]
64
+
65
+ def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
66
+ """Poll for job completion and return results."""
67
+ self.logger.info(f"Getting job result for job {job_id}")
68
+
69
+ url = f"{self.config.base_url}/job/{job_id}"
70
+ start_time = time.time()
71
+ last_status_log = start_time
72
+ timeout_seconds = self.config.timeout_minutes * 60
73
+
74
+ while True:
75
+ current_time = time.time()
76
+ elapsed_time = current_time - start_time
77
+
78
+ # Check for timeout
79
+ if elapsed_time > timeout_seconds:
80
+ raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
81
+
82
+ # Log status every minute
83
+ if current_time - last_status_log >= 60:
84
+ self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
85
+ last_status_log = current_time
86
+
87
+ response = requests.get(url, headers=self._get_headers())
88
+ response.raise_for_status()
89
+ job_data = response.json()["job"]
90
+
91
+ if job_data["status"] == "completed":
92
+ return job_data
93
+ elif job_data["status"] == "failed":
94
+ raise TranscriptionError(f"Job failed: {job_data.get('error', 'Unknown error')}")
95
+
96
+ time.sleep(5) # Wait before next poll
97
+
98
+
99
+ class AudioShakeTranscriber(BaseTranscriber):
100
+ """Transcription service using AudioShake's API."""
101
+
102
+ def __init__(
103
+ self,
104
+ cache_dir: Union[str, Path],
105
+ config: Optional[AudioShakeConfig] = None,
106
+ logger: Optional[Any] = None,
107
+ api_client: Optional[AudioShakeAPI] = None,
108
+ ):
109
+ """Initialize AudioShake transcriber."""
110
+ super().__init__(cache_dir=cache_dir, logger=logger)
111
+ self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
112
+ self.api = api_client or AudioShakeAPI(self.config, self.logger)
113
+
114
+ def get_name(self) -> str:
115
+ return "AudioShake"
116
+
117
+ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
118
+ """Actually perform the transcription using AudioShake API."""
119
+ self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
120
+ self.logger.info(f"Starting transcription for {audio_filepath}")
121
+
122
+ try:
123
+ # Start job and get results
124
+ self.logger.debug("Calling start_transcription()")
125
+ job_id = self.start_transcription(audio_filepath)
126
+ self.logger.debug(f"Got job_id: {job_id}")
127
+
128
+ self.logger.debug("Calling get_transcription_result()")
129
+ result = self.get_transcription_result(job_id)
130
+ self.logger.debug("Got transcription result")
131
+
132
+ return result
133
+ except Exception as e:
134
+ self.logger.error(f"Error in _perform_transcription: {str(e)}")
135
+ raise
136
+
137
+ def start_transcription(self, audio_filepath: str) -> str:
138
+ """Starts the transcription job and returns the job ID."""
139
+ self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
140
+
141
+ # Upload file and create job
142
+ asset_id = self.api.upload_file(audio_filepath)
143
+ self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
144
+
145
+ job_id = self.api.create_job(asset_id)
146
+ self.logger.debug(f"Job created successfully. Job ID: {job_id}")
147
+
148
+ return job_id
149
+
150
+ def get_transcription_result(self, job_id: str) -> Dict[str, Any]:
151
+ """Gets the raw results for a previously started job."""
152
+ self.logger.debug(f"Entering get_transcription_result() for job ID: {job_id}")
153
+
154
+ # Wait for job completion
155
+ job_data = self.api.wait_for_job_result(job_id)
156
+ self.logger.debug("Job completed. Getting results...")
157
+
158
+ output_asset = next((asset for asset in job_data.get("outputAssets", []) if asset["name"] == "alignment.json"), None)
159
+ if not output_asset:
160
+ raise TranscriptionError("Required output not found in job results")
161
+
162
+ # Fetch transcription data
163
+ response = requests.get(output_asset["link"])
164
+ response.raise_for_status()
165
+
166
+ # Return combined raw data
167
+ raw_data = {"job_data": job_data, "transcription": response.json()}
168
+
169
+ self.logger.debug("Raw results retrieved successfully")
170
+ return raw_data
171
+
172
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
173
+ """Process raw Audioshake API response into standard format."""
174
+ self.logger.debug(f"Processing result for job {raw_data['job_data']['id']}")
175
+
176
+ transcription_data = raw_data["transcription"]
177
+ job_data = raw_data["job_data"]
178
+
179
+ segments = []
180
+ all_words = [] # Collect all words across segments
181
+
182
+ for line in transcription_data.get("lines", []):
183
+ words = [
184
+ Word(
185
+ text=word["text"],
186
+ start_time=word.get("start", 0.0),
187
+ end_time=word.get("end", 0.0),
188
+ )
189
+ for word in line.get("words", [])
190
+ ]
191
+ all_words.extend(words) # Add words to flat list
192
+
193
+ segments.append(
194
+ LyricsSegment(
195
+ text=line.get("text", " ".join(w.text for w in words)),
196
+ words=words,
197
+ start_time=min((w.start_time for w in words), default=0.0),
198
+ end_time=max((w.end_time for w in words), default=0.0),
199
+ )
200
+ )
201
+
202
+ return TranscriptionData(
203
+ text=transcription_data.get("text", ""),
204
+ words=all_words,
205
+ segments=segments,
206
+ source=self.get_name(),
207
+ metadata={
208
+ "language": transcription_data.get("metadata", {}).get("language"),
209
+ "job_id": job_data["id"],
210
+ "duration": job_data.get("statusInfo", {}).get("duration"),
211
+ },
212
+ )
213
+
214
+ def get_output_filename(self, suffix: str) -> str:
215
+ """Generate consistent filename with (Purpose) suffix pattern."""
216
+ return f"{self.config.output_prefix}{suffix}"
@@ -0,0 +1,186 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from typing import Dict, Any, Optional, Protocol, List, Union
4
+ from pathlib import Path
5
+ import logging
6
+ import os
7
+ import json
8
+ import hashlib
9
+ from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsSegment, Word
10
+
11
+
12
+ @dataclass
13
+ class TranscriptionData:
14
+ """Structured container for transcription results."""
15
+
16
+ segments: List[LyricsSegment]
17
+ words: List[Word]
18
+ text: str
19
+ source: str # e.g., "whisper", "audioshake"
20
+ metadata: Optional[Dict[str, Any]] = None
21
+
22
+ def to_dict(self) -> Dict[str, Any]:
23
+ """Convert TranscriptionData to dictionary for JSON serialization."""
24
+ return {
25
+ "segments": [segment.to_dict() for segment in self.segments],
26
+ "words": [word.to_dict() for word in self.words],
27
+ "text": self.text,
28
+ "source": self.source,
29
+ "metadata": self.metadata,
30
+ }
31
+
32
+
33
+ @dataclass
34
+ class TranscriptionResult:
35
+ name: str
36
+ priority: int
37
+ result: TranscriptionData
38
+
39
+
40
+ class LoggerProtocol(Protocol):
41
+ """Protocol for logger interface."""
42
+
43
+ def debug(self, msg: str) -> None: ...
44
+ def info(self, msg: str) -> None: ...
45
+ def warning(self, msg: str) -> None: ...
46
+ def error(self, msg: str) -> None: ...
47
+
48
+
49
+ class TranscriptionError(Exception):
50
+ """Base exception for transcription errors."""
51
+
52
+ pass
53
+
54
+
55
+ class BaseTranscriber(ABC):
56
+ """Base class for all transcription services."""
57
+
58
+ def __init__(self, cache_dir: Union[str, Path], logger: Optional[LoggerProtocol] = None):
59
+ """
60
+ Initialize transcriber with cache directory and logger.
61
+
62
+ Args:
63
+ cache_dir: Directory to store cache files. Must be provided.
64
+ logger: Logger instance to use. If None, creates a new logger.
65
+ """
66
+ self.cache_dir = Path(cache_dir)
67
+ self.logger = logger or logging.getLogger(__name__)
68
+
69
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
70
+ self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
71
+
72
+ def _get_file_hash(self, filepath: str) -> str:
73
+ """Calculate MD5 hash of a file."""
74
+ self.logger.debug(f"Calculating hash for file: {filepath}")
75
+ md5_hash = hashlib.md5()
76
+ with open(filepath, "rb") as f:
77
+ for chunk in iter(lambda: f.read(4096), b""):
78
+ md5_hash.update(chunk)
79
+ hash_result = md5_hash.hexdigest()
80
+ self.logger.debug(f"File hash: {hash_result}")
81
+ return hash_result
82
+
83
+ def _get_cache_path(self, file_hash: str, suffix: str) -> str:
84
+ """Get the cache file path for a given file hash."""
85
+ cache_path = os.path.join(self.cache_dir, f"{self.get_name().lower()}_{file_hash}_{suffix}.json")
86
+ self.logger.debug(f"Cache path: {cache_path}")
87
+ return cache_path
88
+
89
+ def _save_to_cache(self, cache_path: str, raw_data: Dict[str, Any]) -> None:
90
+ """Save raw API response data to cache."""
91
+ self.logger.debug(f"Saving JSON to cache: {cache_path}")
92
+ with open(cache_path, "w") as f:
93
+ json.dump(raw_data, f, indent=2)
94
+ self.logger.debug("Cache save completed")
95
+
96
+ def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
97
+ """Load raw API response data from cache if it exists."""
98
+ self.logger.debug(f"Attempting to load from cache: {cache_path}")
99
+ try:
100
+ with open(cache_path, "r") as f:
101
+ data = json.load(f)
102
+ self.logger.debug("Raw API response loaded from cache")
103
+ return data
104
+ except FileNotFoundError:
105
+ self.logger.debug("Cache file not found")
106
+ return None
107
+ except json.JSONDecodeError:
108
+ self.logger.warning(f"Cache file {cache_path} is corrupted")
109
+ return None
110
+
111
+ def _save_and_convert_result(self, file_hash: str, raw_result: Dict[str, Any]) -> TranscriptionData:
112
+ """Convert raw result to TranscriptionData, save to cache, and return."""
113
+ converted_cache_path = self._get_cache_path(file_hash, "converted")
114
+ converted_result = self._convert_result_format(raw_result)
115
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
116
+ return converted_result
117
+
118
+ def transcribe(self, audio_filepath: str) -> TranscriptionData:
119
+ """
120
+ Transcribe an audio file, using cache if available.
121
+
122
+ Args:
123
+ audio_filepath: Path to the audio file to transcribe
124
+
125
+ Returns:
126
+ TranscriptionData containing segments, text, and metadata
127
+ """
128
+ self.logger.debug(f"Starting transcription for {audio_filepath}")
129
+
130
+ try:
131
+ self._validate_audio_file(audio_filepath)
132
+ self.logger.debug("Audio file validation passed")
133
+
134
+ # Check cache first
135
+ file_hash = self._get_file_hash(audio_filepath)
136
+ raw_cache_path = self._get_cache_path(file_hash, "raw")
137
+
138
+ raw_data = self._load_from_cache(raw_cache_path)
139
+ if raw_data:
140
+ self.logger.info(f"Using cached raw data for {audio_filepath}")
141
+ return self._save_and_convert_result(file_hash, raw_data)
142
+
143
+ # If not in cache, perform transcription
144
+ self.logger.info(f"No cache found, transcribing {audio_filepath}")
145
+ raw_result = self._perform_transcription(audio_filepath)
146
+ self.logger.debug("Transcription completed")
147
+
148
+ # Save raw result to cache
149
+ self._save_to_cache(raw_cache_path, raw_result)
150
+
151
+ return self._save_and_convert_result(file_hash, raw_result)
152
+
153
+ except Exception as e:
154
+ self.logger.error(f"Error during transcription: {str(e)}")
155
+ raise
156
+
157
+ @abstractmethod
158
+ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
159
+ """
160
+ Actually perform the transcription (implemented by subclasses).
161
+
162
+ Args:
163
+ audio_filepath: Path to the audio file to transcribe
164
+
165
+ Returns:
166
+ TranscriptionData containing segments, text, and metadata
167
+ """
168
+ pass # pragma: no cover
169
+
170
+ @abstractmethod
171
+ def get_name(self) -> str:
172
+ """Return the name of this transcription service."""
173
+ pass # pragma: no cover
174
+
175
+ def _validate_audio_file(self, audio_filepath: str) -> None:
176
+ """Validate that the audio file exists and is accessible."""
177
+ self.logger.debug(f"Validating audio file: {audio_filepath}")
178
+ if not os.path.exists(audio_filepath):
179
+ self.logger.error(f"Audio file not found: {audio_filepath}")
180
+ raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
181
+ self.logger.debug("Audio file validation successful")
182
+
183
+ @abstractmethod
184
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
185
+ """Convert raw API response to TranscriptionData format."""
186
+ pass # pragma: no cover