lyrics-transcriber 0.30.0__py3-none-any.whl → 0.32.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. lyrics_transcriber/__init__.py +2 -1
  2. lyrics_transcriber/cli/{main.py → cli_main.py} +47 -14
  3. lyrics_transcriber/core/config.py +35 -0
  4. lyrics_transcriber/core/controller.py +164 -166
  5. lyrics_transcriber/correction/anchor_sequence.py +471 -0
  6. lyrics_transcriber/correction/corrector.py +256 -0
  7. lyrics_transcriber/correction/handlers/__init__.py +0 -0
  8. lyrics_transcriber/correction/handlers/base.py +30 -0
  9. lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
  10. lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
  11. lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
  12. lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
  13. lyrics_transcriber/correction/handlers/repeat.py +71 -0
  14. lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
  15. lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
  16. lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
  17. lyrics_transcriber/correction/handlers/word_operations.py +135 -0
  18. lyrics_transcriber/correction/phrase_analyzer.py +426 -0
  19. lyrics_transcriber/correction/text_utils.py +30 -0
  20. lyrics_transcriber/lyrics/base_lyrics_provider.py +125 -0
  21. lyrics_transcriber/lyrics/genius.py +73 -0
  22. lyrics_transcriber/lyrics/spotify.py +82 -0
  23. lyrics_transcriber/output/ass/__init__.py +21 -0
  24. lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
  25. lyrics_transcriber/output/ass/ass_specs.txt +732 -0
  26. lyrics_transcriber/output/ass/config.py +37 -0
  27. lyrics_transcriber/output/ass/constants.py +23 -0
  28. lyrics_transcriber/output/ass/event.py +94 -0
  29. lyrics_transcriber/output/ass/formatters.py +132 -0
  30. lyrics_transcriber/output/ass/lyrics_line.py +219 -0
  31. lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
  32. lyrics_transcriber/output/ass/section_detector.py +89 -0
  33. lyrics_transcriber/output/ass/section_screen.py +106 -0
  34. lyrics_transcriber/output/ass/style.py +187 -0
  35. lyrics_transcriber/output/cdg.py +503 -0
  36. lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
  37. lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
  38. lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
  39. lyrics_transcriber/output/cdgmaker/config.py +151 -0
  40. lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
  41. lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
  42. lyrics_transcriber/output/cdgmaker/pack.py +507 -0
  43. lyrics_transcriber/output/cdgmaker/render.py +346 -0
  44. lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
  45. lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
  46. lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
  47. lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
  48. lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
  49. lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
  50. lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
  51. lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
  52. lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
  53. lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
  54. lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
  55. lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
  56. lyrics_transcriber/output/cdgmaker/utils.py +132 -0
  57. lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
  58. lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
  59. lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
  60. lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
  61. lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
  62. lyrics_transcriber/output/fonts/arial.ttf +0 -0
  63. lyrics_transcriber/output/fonts/georgia.ttf +0 -0
  64. lyrics_transcriber/output/fonts/verdana.ttf +0 -0
  65. lyrics_transcriber/output/generator.py +140 -171
  66. lyrics_transcriber/output/lyrics_file.py +102 -0
  67. lyrics_transcriber/output/plain_text.py +91 -0
  68. lyrics_transcriber/output/segment_resizer.py +416 -0
  69. lyrics_transcriber/output/subtitles.py +328 -302
  70. lyrics_transcriber/output/video.py +219 -0
  71. lyrics_transcriber/review/__init__.py +1 -0
  72. lyrics_transcriber/review/server.py +138 -0
  73. lyrics_transcriber/storage/dropbox.py +110 -134
  74. lyrics_transcriber/transcribers/audioshake.py +171 -105
  75. lyrics_transcriber/transcribers/base_transcriber.py +149 -0
  76. lyrics_transcriber/transcribers/whisper.py +267 -133
  77. lyrics_transcriber/types.py +454 -0
  78. {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/METADATA +14 -3
  79. lyrics_transcriber-0.32.1.dist-info/RECORD +86 -0
  80. {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/WHEEL +1 -1
  81. lyrics_transcriber-0.32.1.dist-info/entry_points.txt +4 -0
  82. lyrics_transcriber/core/corrector.py +0 -56
  83. lyrics_transcriber/core/fetcher.py +0 -143
  84. lyrics_transcriber/storage/tokens.py +0 -116
  85. lyrics_transcriber/transcribers/base.py +0 -31
  86. lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
  87. lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
  88. {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/LICENSE +0 -0
@@ -1,151 +1,217 @@
1
+ from dataclasses import dataclass
1
2
  import requests
2
3
  import time
3
4
  import os
4
- import json
5
- from .base import BaseTranscriber
5
+ from typing import Dict, Optional, Any, Union
6
+ from pathlib import Path
7
+ from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
8
+ from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
6
9
 
7
10
 
8
- class AudioShakeTranscriber(BaseTranscriber):
9
- """Transcription service using AudioShake's API."""
10
-
11
- def __init__(self, api_token=None, logger=None, output_prefix=None):
12
- super().__init__(logger)
13
- self.api_token = api_token or os.getenv("AUDIOSHAKE_API_TOKEN")
14
- self.base_url = "https://groovy.audioshake.ai"
15
- self.output_prefix = output_prefix
16
-
17
- if not self.api_token:
18
- raise ValueError("AudioShake API token must be provided either directly or via AUDIOSHAKE_API_TOKEN env var")
19
-
20
- def get_name(self) -> str:
21
- return "AudioShake"
22
-
23
- def transcribe(self, audio_filepath: str) -> dict:
24
- """
25
- Transcribe an audio file using AudioShake API.
26
-
27
- Args:
28
- audio_filepath: Path to the audio file to transcribe
29
-
30
- Returns:
31
- Dict containing:
32
- - segments: List of segments with start/end times and word-level data
33
- - text: Full text transcription
34
- - metadata: Dict of additional info
35
- """
36
- self.logger.info(f"Starting transcription for {audio_filepath} using AudioShake API")
37
-
38
- # Start job and get results
39
- job_id = self.start_transcription(audio_filepath)
40
- result = self.get_transcription_result(job_id)
41
-
42
- # Add metadata to the result
43
- result["metadata"] = {
44
- "service": self.get_name(),
45
- "language": "en", # AudioShake currently only supports English
46
- }
47
-
48
- return result
11
+ @dataclass
12
+ class AudioShakeConfig:
13
+ """Configuration for AudioShake transcription service."""
49
14
 
50
- def start_transcription(self, audio_filepath: str) -> str:
51
- """Starts the transcription job and returns the job ID."""
52
- # Step 1: Upload the audio file
53
- asset_id = self._upload_file(audio_filepath)
54
- self.logger.info(f"File uploaded successfully. Asset ID: {asset_id}")
15
+ api_token: Optional[str] = None
16
+ base_url: str = "https://groovy.audioshake.ai"
17
+ output_prefix: Optional[str] = None
18
+ timeout_minutes: int = 10 # Added timeout configuration
55
19
 
56
- # Step 2: Create a job for transcription and alignment
57
- job_id = self._create_job(asset_id)
58
- self.logger.info(f"Job created successfully. Job ID: {job_id}")
59
20
 
60
- return job_id
21
+ class AudioShakeAPI:
22
+ """Handles direct API interactions with AudioShake."""
61
23
 
62
- def get_transcription_result(self, job_id: str) -> dict:
63
- """Gets the results for a previously started job."""
64
- self.logger.info(f"Getting results for job ID: {job_id}")
24
+ def __init__(self, config: AudioShakeConfig, logger):
25
+ self.config = config
26
+ self.logger = logger
65
27
 
66
- # Wait for job completion and get results
67
- result = self._get_job_result(job_id)
68
- self.logger.info(f"Job completed. Processing results...")
28
+ def _validate_config(self) -> None:
29
+ """Validate API configuration."""
30
+ if not self.config.api_token:
31
+ raise ValueError("AudioShake API token must be provided")
69
32
 
70
- # Process and return in standard format
71
- return self._process_result(result)
33
+ def _get_headers(self) -> Dict[str, str]:
34
+ """Get headers for API requests."""
35
+ self._validate_config() # Validate before making any API calls
36
+ return {"Authorization": f"Bearer {self.config.api_token}", "Content-Type": "application/json"}
72
37
 
73
- def _upload_file(self, filepath):
38
+ def upload_file(self, filepath: str) -> str:
39
+ """Upload audio file and return asset ID."""
74
40
  self.logger.info(f"Uploading {filepath} to AudioShake")
75
- url = f"{self.base_url}/upload"
76
- headers = {"Authorization": f"Bearer {self.api_token}"}
41
+ self._validate_config() # Validate before making API call
42
+
43
+ url = f"{self.config.base_url}/upload"
77
44
  with open(filepath, "rb") as file:
78
45
  files = {"file": (os.path.basename(filepath), file)}
79
- response = requests.post(url, headers=headers, files=files)
80
-
81
- self.logger.info(f"Upload response status code: {response.status_code}")
82
- self.logger.info(f"Upload response content: {response.text}")
46
+ response = requests.post(url, headers={"Authorization": self._get_headers()["Authorization"]}, files=files)
83
47
 
48
+ self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
84
49
  response.raise_for_status()
85
50
  return response.json()["id"]
86
51
 
87
- def _create_job(self, asset_id):
52
+ def create_job(self, asset_id: str) -> str:
53
+ """Create transcription job and return job ID."""
88
54
  self.logger.info(f"Creating job for asset {asset_id}")
89
- url = f"{self.base_url}/job/"
90
- headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
55
+
56
+ url = f"{self.config.base_url}/job/"
91
57
  data = {
92
58
  "metadata": {"format": "json", "name": "alignment", "language": "en"},
93
59
  "callbackUrl": "https://example.com/webhook/alignment",
94
60
  "assetId": asset_id,
95
61
  }
96
- response = requests.post(url, headers=headers, json=data)
62
+ response = requests.post(url, headers=self._get_headers(), json=data)
97
63
  response.raise_for_status()
98
64
  return response.json()["job"]["id"]
99
65
 
100
- def _get_job_result(self, job_id):
66
+ def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
67
+ """Poll for job completion and return results."""
101
68
  self.logger.info(f"Getting job result for job {job_id}")
102
- url = f"{self.base_url}/job/{job_id}"
103
- headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
69
+
70
+ url = f"{self.config.base_url}/job/{job_id}"
71
+ start_time = time.time()
72
+ last_status_log = start_time
73
+ timeout_seconds = self.config.timeout_minutes * 60
74
+
104
75
  while True:
105
- response = requests.get(url, headers=headers)
76
+ current_time = time.time()
77
+ elapsed_time = current_time - start_time
78
+
79
+ # Check for timeout
80
+ if elapsed_time > timeout_seconds:
81
+ raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
82
+
83
+ # Log status every minute
84
+ if current_time - last_status_log >= 60:
85
+ self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
86
+ last_status_log = current_time
87
+
88
+ response = requests.get(url, headers=self._get_headers())
106
89
  response.raise_for_status()
107
90
  job_data = response.json()["job"]
91
+
108
92
  if job_data["status"] == "completed":
109
93
  return job_data
110
94
  elif job_data["status"] == "failed":
111
- raise Exception("Job failed")
112
- time.sleep(5) # Wait 5 seconds before checking again
95
+ raise TranscriptionError(f"Job failed: {job_data.get('error', 'Unknown error')}")
113
96
 
114
- def _process_result(self, job_data):
115
- self.logger.debug(f"Processing result for job {job_data['id']}")
116
- self.logger.debug(f"Job data: {json.dumps(job_data, indent=2)}")
97
+ time.sleep(5) # Wait before next poll
117
98
 
118
- output_assets = job_data.get("outputAssets", [])
119
- self.logger.debug(f"Output assets: {output_assets}")
120
99
 
121
- output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
100
+ class AudioShakeTranscriber(BaseTranscriber):
101
+ """Transcription service using AudioShake's API."""
122
102
 
123
- if not output_asset:
124
- self.logger.error("'alignment.json' found in job results")
125
- self.logger.error(f"Available output assets: {[asset['name'] for asset in output_assets]}")
126
- raise Exception("Required output not found in job results")
103
+ def __init__(
104
+ self,
105
+ cache_dir: Union[str, Path],
106
+ config: Optional[AudioShakeConfig] = None,
107
+ logger: Optional[Any] = None,
108
+ api_client: Optional[AudioShakeAPI] = None,
109
+ ):
110
+ """Initialize AudioShake transcriber."""
111
+ super().__init__(cache_dir=cache_dir, logger=logger)
112
+ self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
113
+ self.api = api_client or AudioShakeAPI(self.config, self.logger)
127
114
 
128
- transcription_url = output_asset["link"]
129
- self.logger.debug(f"Output URL: {transcription_url}")
115
+ def get_name(self) -> str:
116
+ return "AudioShake"
130
117
 
131
- response = requests.get(transcription_url)
132
- response.raise_for_status()
133
- transcription_data = response.json()
134
- self.logger.debug(f"Output data: {json.dumps(transcription_data, indent=2)}")
118
+ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
119
+ """Actually perform the transcription using AudioShake API."""
120
+ self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
121
+ self.logger.info(f"Starting transcription for {audio_filepath}")
122
+
123
+ try:
124
+ # Start job and get results
125
+ self.logger.debug("Calling start_transcription()")
126
+ job_id = self.start_transcription(audio_filepath)
127
+ self.logger.debug(f"Got job_id: {job_id}")
128
+
129
+ self.logger.debug("Calling get_transcription_result()")
130
+ result = self.get_transcription_result(job_id)
131
+ self.logger.debug("Got transcription result")
135
132
 
136
- transcription_data = {"segments": transcription_data.get("lines", []), "text": transcription_data.get("text", "")}
133
+ return result
134
+ except Exception as e:
135
+ self.logger.error(f"Error in _perform_transcription: {str(e)}")
136
+ raise
137
137
 
138
- # Ensure each segment has the required fields
139
- for segment in transcription_data["segments"]:
140
- if "words" not in segment:
141
- segment["words"] = []
142
- if "text" not in segment:
143
- segment["text"] = " ".join(word["text"] for word in segment["words"])
138
+ def start_transcription(self, audio_filepath: str) -> str:
139
+ """Starts the transcription job and returns the job ID."""
140
+ self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
141
+
142
+ # Upload file and create job
143
+ asset_id = self.api.upload_file(audio_filepath)
144
+ self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
145
+
146
+ job_id = self.api.create_job(asset_id)
147
+ self.logger.debug(f"Job created successfully. Job ID: {job_id}")
148
+
149
+ return job_id
150
+
151
+ def get_transcription_result(self, job_id: str) -> Dict[str, Any]:
152
+ """Gets the raw results for a previously started job."""
153
+ self.logger.debug(f"Entering get_transcription_result() for job ID: {job_id}")
144
154
 
145
- transcription_data["output_filename"] = self.get_output_filename(" (AudioShake)")
155
+ # Wait for job completion
156
+ job_data = self.api.wait_for_job_result(job_id)
157
+ self.logger.debug("Job completed. Getting results...")
146
158
 
147
- return transcription_data
159
+ output_asset = next((asset for asset in job_data.get("outputAssets", []) if asset["name"] == "alignment.json"), None)
160
+ if not output_asset:
161
+ raise TranscriptionError("Required output not found in job results")
162
+
163
+ # Fetch transcription data
164
+ response = requests.get(output_asset["link"])
165
+ response.raise_for_status()
148
166
 
149
- def get_output_filename(self, suffix):
150
- """Generate consistent filename with (Purpose) suffix pattern"""
151
- return f"{self.output_prefix}{suffix}"
167
+ # Return combined raw data
168
+ raw_data = {"job_data": job_data, "transcription": response.json()}
169
+
170
+ self.logger.debug("Raw results retrieved successfully")
171
+ return raw_data
172
+
173
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
174
+ """Process raw Audioshake API response into standard format."""
175
+ self.logger.debug(f"Processing result for job {raw_data['job_data']['id']}")
176
+
177
+ transcription_data = raw_data["transcription"]
178
+ job_data = raw_data["job_data"]
179
+
180
+ segments = []
181
+ all_words = [] # Collect all words across segments
182
+
183
+ for line in transcription_data.get("lines", []):
184
+ words = [
185
+ Word(
186
+ text=word["text"].strip(" "),
187
+ start_time=word.get("start", 0.0),
188
+ end_time=word.get("end", 0.0),
189
+ )
190
+ for word in line.get("words", [])
191
+ ]
192
+ all_words.extend(words) # Add words to flat list
193
+
194
+ segments.append(
195
+ LyricsSegment(
196
+ text=line.get("text", " ".join(w.text for w in words)),
197
+ words=words,
198
+ start_time=min((w.start_time for w in words), default=0.0),
199
+ end_time=max((w.end_time for w in words), default=0.0),
200
+ )
201
+ )
202
+
203
+ return TranscriptionData(
204
+ text=transcription_data.get("text", ""),
205
+ words=all_words,
206
+ segments=segments,
207
+ source=self.get_name(),
208
+ metadata={
209
+ "language": transcription_data.get("metadata", {}).get("language"),
210
+ "job_id": job_data["id"],
211
+ "duration": job_data.get("statusInfo", {}).get("duration"),
212
+ },
213
+ )
214
+
215
+ def get_output_filename(self, suffix: str) -> str:
216
+ """Generate consistent filename with (Purpose) suffix pattern."""
217
+ return f"{self.config.output_prefix}{suffix}"
@@ -0,0 +1,149 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Dict, Any, Optional, Union
3
+ from pathlib import Path
4
+ import logging
5
+ import os
6
+ import json
7
+ import hashlib
8
+ from lyrics_transcriber.types import TranscriptionData
9
+
10
+
11
+ class TranscriptionError(Exception):
12
+ """Base exception for transcription errors."""
13
+
14
+ def __init__(self, message: str):
15
+ super().__init__(message)
16
+
17
+
18
+ class BaseTranscriber(ABC):
19
+ """Base class for all transcription services."""
20
+
21
+ def __init__(self, cache_dir: Union[str, Path], logger: Optional[logging.Logger] = None):
22
+ """
23
+ Initialize transcriber with cache directory and logger.
24
+
25
+ Args:
26
+ cache_dir: Directory to store cache files. Must be provided.
27
+ logger: Logger instance to use. If None, creates a new logger.
28
+ """
29
+ self.cache_dir = Path(cache_dir)
30
+ self.logger = logger or logging.getLogger(__name__)
31
+
32
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
33
+ self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
34
+
35
+ def _get_file_hash(self, filepath: str) -> str:
36
+ """Calculate MD5 hash of a file."""
37
+ self.logger.debug(f"Calculating hash for file: {filepath}")
38
+ md5_hash = hashlib.md5()
39
+ with open(filepath, "rb") as f:
40
+ for chunk in iter(lambda: f.read(4096), b""):
41
+ md5_hash.update(chunk)
42
+ hash_result = md5_hash.hexdigest()
43
+ self.logger.debug(f"File hash: {hash_result}")
44
+ return hash_result
45
+
46
+ def _get_cache_path(self, file_hash: str, suffix: str) -> str:
47
+ """Get the cache file path for a given file hash."""
48
+ cache_path = os.path.join(self.cache_dir, f"{self.get_name().lower()}_{file_hash}_{suffix}.json")
49
+ self.logger.debug(f"Cache path: {cache_path}")
50
+ return cache_path
51
+
52
+ def _save_to_cache(self, cache_path: str, raw_data: Dict[str, Any]) -> None:
53
+ """Save raw API response data to cache."""
54
+ self.logger.debug(f"Saving JSON to cache: {cache_path}")
55
+ with open(cache_path, "w") as f:
56
+ json.dump(raw_data, f, indent=2)
57
+ self.logger.debug("Cache save completed")
58
+
59
+ def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
60
+ """Load raw API response data from cache if it exists."""
61
+ self.logger.debug(f"Attempting to load from cache: {cache_path}")
62
+ try:
63
+ with open(cache_path, "r") as f:
64
+ data = json.load(f)
65
+ self.logger.debug("Raw API response loaded from cache")
66
+ return data
67
+ except FileNotFoundError:
68
+ self.logger.debug("Cache file not found")
69
+ return None
70
+ except json.JSONDecodeError:
71
+ self.logger.warning(f"Cache file {cache_path} is corrupted")
72
+ return None
73
+
74
+ def _save_and_convert_result(self, file_hash: str, raw_result: Dict[str, Any]) -> TranscriptionData:
75
+ """Convert raw result to TranscriptionData, save to cache, and return."""
76
+ converted_cache_path = self._get_cache_path(file_hash, "converted")
77
+ converted_result = self._convert_result_format(raw_result)
78
+ self._save_to_cache(converted_cache_path, converted_result.to_dict())
79
+ return converted_result
80
+
81
+ def transcribe(self, audio_filepath: str) -> TranscriptionData:
82
+ """
83
+ Transcribe an audio file, using cache if available.
84
+
85
+ Args:
86
+ audio_filepath: Path to the audio file to transcribe
87
+
88
+ Returns:
89
+ TranscriptionData containing segments, text, and metadata
90
+ """
91
+ self.logger.debug(f"Starting transcription for {audio_filepath}")
92
+
93
+ try:
94
+ self._validate_audio_file(audio_filepath)
95
+ self.logger.debug("Audio file validation passed")
96
+
97
+ # Check cache first
98
+ file_hash = self._get_file_hash(audio_filepath)
99
+ raw_cache_path = self._get_cache_path(file_hash, "raw")
100
+
101
+ raw_data = self._load_from_cache(raw_cache_path)
102
+ if raw_data:
103
+ self.logger.info(f"Using cached raw data for {audio_filepath}")
104
+ return self._save_and_convert_result(file_hash, raw_data)
105
+
106
+ # If not in cache, perform transcription
107
+ self.logger.info(f"No cache found, transcribing {audio_filepath}")
108
+ raw_result = self._perform_transcription(audio_filepath)
109
+ self.logger.debug("Transcription completed")
110
+
111
+ # Save raw result to cache
112
+ self._save_to_cache(raw_cache_path, raw_result)
113
+
114
+ return self._save_and_convert_result(file_hash, raw_result)
115
+
116
+ except Exception as e:
117
+ self.logger.error(f"Error during transcription: {str(e)}")
118
+ raise
119
+
120
+ @abstractmethod
121
+ def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
122
+ """
123
+ Actually perform the transcription (implemented by subclasses).
124
+
125
+ Args:
126
+ audio_filepath: Path to the audio file to transcribe
127
+
128
+ Returns:
129
+ TranscriptionData containing segments, text, and metadata
130
+ """
131
+ pass # pragma: no cover
132
+
133
+ @abstractmethod
134
+ def get_name(self) -> str:
135
+ """Return the name of this transcription service."""
136
+ pass # pragma: no cover
137
+
138
+ def _validate_audio_file(self, audio_filepath: str) -> None:
139
+ """Validate that the audio file exists and is accessible."""
140
+ self.logger.debug(f"Validating audio file: {audio_filepath}")
141
+ if not os.path.exists(audio_filepath):
142
+ self.logger.error(f"Audio file not found: {audio_filepath}")
143
+ raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
144
+ self.logger.debug("Audio file validation successful")
145
+
146
+ @abstractmethod
147
+ def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
148
+ """Convert raw API response to TranscriptionData format."""
149
+ pass # pragma: no cover