lyrics-transcriber 0.30.0__py3-none-any.whl → 0.32.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lyrics_transcriber/__init__.py +2 -1
- lyrics_transcriber/cli/{main.py → cli_main.py} +47 -14
- lyrics_transcriber/core/config.py +35 -0
- lyrics_transcriber/core/controller.py +164 -166
- lyrics_transcriber/correction/anchor_sequence.py +471 -0
- lyrics_transcriber/correction/corrector.py +256 -0
- lyrics_transcriber/correction/handlers/__init__.py +0 -0
- lyrics_transcriber/correction/handlers/base.py +30 -0
- lyrics_transcriber/correction/handlers/extend_anchor.py +91 -0
- lyrics_transcriber/correction/handlers/levenshtein.py +147 -0
- lyrics_transcriber/correction/handlers/no_space_punct_match.py +98 -0
- lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +55 -0
- lyrics_transcriber/correction/handlers/repeat.py +71 -0
- lyrics_transcriber/correction/handlers/sound_alike.py +223 -0
- lyrics_transcriber/correction/handlers/syllables_match.py +182 -0
- lyrics_transcriber/correction/handlers/word_count_match.py +54 -0
- lyrics_transcriber/correction/handlers/word_operations.py +135 -0
- lyrics_transcriber/correction/phrase_analyzer.py +426 -0
- lyrics_transcriber/correction/text_utils.py +30 -0
- lyrics_transcriber/lyrics/base_lyrics_provider.py +125 -0
- lyrics_transcriber/lyrics/genius.py +73 -0
- lyrics_transcriber/lyrics/spotify.py +82 -0
- lyrics_transcriber/output/ass/__init__.py +21 -0
- lyrics_transcriber/output/{ass.py → ass/ass.py} +150 -690
- lyrics_transcriber/output/ass/ass_specs.txt +732 -0
- lyrics_transcriber/output/ass/config.py +37 -0
- lyrics_transcriber/output/ass/constants.py +23 -0
- lyrics_transcriber/output/ass/event.py +94 -0
- lyrics_transcriber/output/ass/formatters.py +132 -0
- lyrics_transcriber/output/ass/lyrics_line.py +219 -0
- lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
- lyrics_transcriber/output/ass/section_detector.py +89 -0
- lyrics_transcriber/output/ass/section_screen.py +106 -0
- lyrics_transcriber/output/ass/style.py +187 -0
- lyrics_transcriber/output/cdg.py +503 -0
- lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
- lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
- lyrics_transcriber/output/cdgmaker/composer.py +1919 -0
- lyrics_transcriber/output/cdgmaker/config.py +151 -0
- lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
- lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
- lyrics_transcriber/output/cdgmaker/pack.py +507 -0
- lyrics_transcriber/output/cdgmaker/render.py +346 -0
- lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
- lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
- lyrics_transcriber/output/cdgmaker/utils.py +132 -0
- lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
- lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
- lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
- lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
- lyrics_transcriber/output/fonts/arial.ttf +0 -0
- lyrics_transcriber/output/fonts/georgia.ttf +0 -0
- lyrics_transcriber/output/fonts/verdana.ttf +0 -0
- lyrics_transcriber/output/generator.py +140 -171
- lyrics_transcriber/output/lyrics_file.py +102 -0
- lyrics_transcriber/output/plain_text.py +91 -0
- lyrics_transcriber/output/segment_resizer.py +416 -0
- lyrics_transcriber/output/subtitles.py +328 -302
- lyrics_transcriber/output/video.py +219 -0
- lyrics_transcriber/review/__init__.py +1 -0
- lyrics_transcriber/review/server.py +138 -0
- lyrics_transcriber/storage/dropbox.py +110 -134
- lyrics_transcriber/transcribers/audioshake.py +171 -105
- lyrics_transcriber/transcribers/base_transcriber.py +149 -0
- lyrics_transcriber/transcribers/whisper.py +267 -133
- lyrics_transcriber/types.py +454 -0
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/METADATA +14 -3
- lyrics_transcriber-0.32.1.dist-info/RECORD +86 -0
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/WHEEL +1 -1
- lyrics_transcriber-0.32.1.dist-info/entry_points.txt +4 -0
- lyrics_transcriber/core/corrector.py +0 -56
- lyrics_transcriber/core/fetcher.py +0 -143
- lyrics_transcriber/storage/tokens.py +0 -116
- lyrics_transcriber/transcribers/base.py +0 -31
- lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
- lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
- {lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.32.1.dist-info}/LICENSE +0 -0
@@ -1,151 +1,217 @@
|
|
1
|
+
from dataclasses import dataclass
|
1
2
|
import requests
|
2
3
|
import time
|
3
4
|
import os
|
4
|
-
import
|
5
|
-
from
|
5
|
+
from typing import Dict, Optional, Any, Union
|
6
|
+
from pathlib import Path
|
7
|
+
from lyrics_transcriber.types import TranscriptionData, LyricsSegment, Word
|
8
|
+
from lyrics_transcriber.transcribers.base_transcriber import BaseTranscriber, TranscriptionError
|
6
9
|
|
7
10
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
def __init__(self, api_token=None, logger=None, output_prefix=None):
|
12
|
-
super().__init__(logger)
|
13
|
-
self.api_token = api_token or os.getenv("AUDIOSHAKE_API_TOKEN")
|
14
|
-
self.base_url = "https://groovy.audioshake.ai"
|
15
|
-
self.output_prefix = output_prefix
|
16
|
-
|
17
|
-
if not self.api_token:
|
18
|
-
raise ValueError("AudioShake API token must be provided either directly or via AUDIOSHAKE_API_TOKEN env var")
|
19
|
-
|
20
|
-
def get_name(self) -> str:
|
21
|
-
return "AudioShake"
|
22
|
-
|
23
|
-
def transcribe(self, audio_filepath: str) -> dict:
|
24
|
-
"""
|
25
|
-
Transcribe an audio file using AudioShake API.
|
26
|
-
|
27
|
-
Args:
|
28
|
-
audio_filepath: Path to the audio file to transcribe
|
29
|
-
|
30
|
-
Returns:
|
31
|
-
Dict containing:
|
32
|
-
- segments: List of segments with start/end times and word-level data
|
33
|
-
- text: Full text transcription
|
34
|
-
- metadata: Dict of additional info
|
35
|
-
"""
|
36
|
-
self.logger.info(f"Starting transcription for {audio_filepath} using AudioShake API")
|
37
|
-
|
38
|
-
# Start job and get results
|
39
|
-
job_id = self.start_transcription(audio_filepath)
|
40
|
-
result = self.get_transcription_result(job_id)
|
41
|
-
|
42
|
-
# Add metadata to the result
|
43
|
-
result["metadata"] = {
|
44
|
-
"service": self.get_name(),
|
45
|
-
"language": "en", # AudioShake currently only supports English
|
46
|
-
}
|
47
|
-
|
48
|
-
return result
|
11
|
+
@dataclass
|
12
|
+
class AudioShakeConfig:
|
13
|
+
"""Configuration for AudioShake transcription service."""
|
49
14
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
self.logger.info(f"File uploaded successfully. Asset ID: {asset_id}")
|
15
|
+
api_token: Optional[str] = None
|
16
|
+
base_url: str = "https://groovy.audioshake.ai"
|
17
|
+
output_prefix: Optional[str] = None
|
18
|
+
timeout_minutes: int = 10 # Added timeout configuration
|
55
19
|
|
56
|
-
# Step 2: Create a job for transcription and alignment
|
57
|
-
job_id = self._create_job(asset_id)
|
58
|
-
self.logger.info(f"Job created successfully. Job ID: {job_id}")
|
59
20
|
|
60
|
-
|
21
|
+
class AudioShakeAPI:
|
22
|
+
"""Handles direct API interactions with AudioShake."""
|
61
23
|
|
62
|
-
def
|
63
|
-
|
64
|
-
self.logger
|
24
|
+
def __init__(self, config: AudioShakeConfig, logger):
|
25
|
+
self.config = config
|
26
|
+
self.logger = logger
|
65
27
|
|
66
|
-
|
67
|
-
|
68
|
-
self.
|
28
|
+
def _validate_config(self) -> None:
|
29
|
+
"""Validate API configuration."""
|
30
|
+
if not self.config.api_token:
|
31
|
+
raise ValueError("AudioShake API token must be provided")
|
69
32
|
|
70
|
-
|
71
|
-
|
33
|
+
def _get_headers(self) -> Dict[str, str]:
|
34
|
+
"""Get headers for API requests."""
|
35
|
+
self._validate_config() # Validate before making any API calls
|
36
|
+
return {"Authorization": f"Bearer {self.config.api_token}", "Content-Type": "application/json"}
|
72
37
|
|
73
|
-
def
|
38
|
+
def upload_file(self, filepath: str) -> str:
|
39
|
+
"""Upload audio file and return asset ID."""
|
74
40
|
self.logger.info(f"Uploading {filepath} to AudioShake")
|
75
|
-
|
76
|
-
|
41
|
+
self._validate_config() # Validate before making API call
|
42
|
+
|
43
|
+
url = f"{self.config.base_url}/upload"
|
77
44
|
with open(filepath, "rb") as file:
|
78
45
|
files = {"file": (os.path.basename(filepath), file)}
|
79
|
-
response = requests.post(url, headers=
|
80
|
-
|
81
|
-
self.logger.info(f"Upload response status code: {response.status_code}")
|
82
|
-
self.logger.info(f"Upload response content: {response.text}")
|
46
|
+
response = requests.post(url, headers={"Authorization": self._get_headers()["Authorization"]}, files=files)
|
83
47
|
|
48
|
+
self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
|
84
49
|
response.raise_for_status()
|
85
50
|
return response.json()["id"]
|
86
51
|
|
87
|
-
def
|
52
|
+
def create_job(self, asset_id: str) -> str:
|
53
|
+
"""Create transcription job and return job ID."""
|
88
54
|
self.logger.info(f"Creating job for asset {asset_id}")
|
89
|
-
|
90
|
-
|
55
|
+
|
56
|
+
url = f"{self.config.base_url}/job/"
|
91
57
|
data = {
|
92
58
|
"metadata": {"format": "json", "name": "alignment", "language": "en"},
|
93
59
|
"callbackUrl": "https://example.com/webhook/alignment",
|
94
60
|
"assetId": asset_id,
|
95
61
|
}
|
96
|
-
response = requests.post(url, headers=
|
62
|
+
response = requests.post(url, headers=self._get_headers(), json=data)
|
97
63
|
response.raise_for_status()
|
98
64
|
return response.json()["job"]["id"]
|
99
65
|
|
100
|
-
def
|
66
|
+
def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
|
67
|
+
"""Poll for job completion and return results."""
|
101
68
|
self.logger.info(f"Getting job result for job {job_id}")
|
102
|
-
|
103
|
-
|
69
|
+
|
70
|
+
url = f"{self.config.base_url}/job/{job_id}"
|
71
|
+
start_time = time.time()
|
72
|
+
last_status_log = start_time
|
73
|
+
timeout_seconds = self.config.timeout_minutes * 60
|
74
|
+
|
104
75
|
while True:
|
105
|
-
|
76
|
+
current_time = time.time()
|
77
|
+
elapsed_time = current_time - start_time
|
78
|
+
|
79
|
+
# Check for timeout
|
80
|
+
if elapsed_time > timeout_seconds:
|
81
|
+
raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
|
82
|
+
|
83
|
+
# Log status every minute
|
84
|
+
if current_time - last_status_log >= 60:
|
85
|
+
self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
|
86
|
+
last_status_log = current_time
|
87
|
+
|
88
|
+
response = requests.get(url, headers=self._get_headers())
|
106
89
|
response.raise_for_status()
|
107
90
|
job_data = response.json()["job"]
|
91
|
+
|
108
92
|
if job_data["status"] == "completed":
|
109
93
|
return job_data
|
110
94
|
elif job_data["status"] == "failed":
|
111
|
-
raise
|
112
|
-
time.sleep(5) # Wait 5 seconds before checking again
|
95
|
+
raise TranscriptionError(f"Job failed: {job_data.get('error', 'Unknown error')}")
|
113
96
|
|
114
|
-
|
115
|
-
self.logger.debug(f"Processing result for job {job_data['id']}")
|
116
|
-
self.logger.debug(f"Job data: {json.dumps(job_data, indent=2)}")
|
97
|
+
time.sleep(5) # Wait before next poll
|
117
98
|
|
118
|
-
output_assets = job_data.get("outputAssets", [])
|
119
|
-
self.logger.debug(f"Output assets: {output_assets}")
|
120
99
|
|
121
|
-
|
100
|
+
class AudioShakeTranscriber(BaseTranscriber):
|
101
|
+
"""Transcription service using AudioShake's API."""
|
122
102
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
103
|
+
def __init__(
|
104
|
+
self,
|
105
|
+
cache_dir: Union[str, Path],
|
106
|
+
config: Optional[AudioShakeConfig] = None,
|
107
|
+
logger: Optional[Any] = None,
|
108
|
+
api_client: Optional[AudioShakeAPI] = None,
|
109
|
+
):
|
110
|
+
"""Initialize AudioShake transcriber."""
|
111
|
+
super().__init__(cache_dir=cache_dir, logger=logger)
|
112
|
+
self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
|
113
|
+
self.api = api_client or AudioShakeAPI(self.config, self.logger)
|
127
114
|
|
128
|
-
|
129
|
-
|
115
|
+
def get_name(self) -> str:
|
116
|
+
return "AudioShake"
|
130
117
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
self.logger.
|
118
|
+
def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
|
119
|
+
"""Actually perform the transcription using AudioShake API."""
|
120
|
+
self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
|
121
|
+
self.logger.info(f"Starting transcription for {audio_filepath}")
|
122
|
+
|
123
|
+
try:
|
124
|
+
# Start job and get results
|
125
|
+
self.logger.debug("Calling start_transcription()")
|
126
|
+
job_id = self.start_transcription(audio_filepath)
|
127
|
+
self.logger.debug(f"Got job_id: {job_id}")
|
128
|
+
|
129
|
+
self.logger.debug("Calling get_transcription_result()")
|
130
|
+
result = self.get_transcription_result(job_id)
|
131
|
+
self.logger.debug("Got transcription result")
|
135
132
|
|
136
|
-
|
133
|
+
return result
|
134
|
+
except Exception as e:
|
135
|
+
self.logger.error(f"Error in _perform_transcription: {str(e)}")
|
136
|
+
raise
|
137
137
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
138
|
+
def start_transcription(self, audio_filepath: str) -> str:
|
139
|
+
"""Starts the transcription job and returns the job ID."""
|
140
|
+
self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
|
141
|
+
|
142
|
+
# Upload file and create job
|
143
|
+
asset_id = self.api.upload_file(audio_filepath)
|
144
|
+
self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
|
145
|
+
|
146
|
+
job_id = self.api.create_job(asset_id)
|
147
|
+
self.logger.debug(f"Job created successfully. Job ID: {job_id}")
|
148
|
+
|
149
|
+
return job_id
|
150
|
+
|
151
|
+
def get_transcription_result(self, job_id: str) -> Dict[str, Any]:
|
152
|
+
"""Gets the raw results for a previously started job."""
|
153
|
+
self.logger.debug(f"Entering get_transcription_result() for job ID: {job_id}")
|
144
154
|
|
145
|
-
|
155
|
+
# Wait for job completion
|
156
|
+
job_data = self.api.wait_for_job_result(job_id)
|
157
|
+
self.logger.debug("Job completed. Getting results...")
|
146
158
|
|
147
|
-
|
159
|
+
output_asset = next((asset for asset in job_data.get("outputAssets", []) if asset["name"] == "alignment.json"), None)
|
160
|
+
if not output_asset:
|
161
|
+
raise TranscriptionError("Required output not found in job results")
|
162
|
+
|
163
|
+
# Fetch transcription data
|
164
|
+
response = requests.get(output_asset["link"])
|
165
|
+
response.raise_for_status()
|
148
166
|
|
149
|
-
|
150
|
-
""
|
151
|
-
|
167
|
+
# Return combined raw data
|
168
|
+
raw_data = {"job_data": job_data, "transcription": response.json()}
|
169
|
+
|
170
|
+
self.logger.debug("Raw results retrieved successfully")
|
171
|
+
return raw_data
|
172
|
+
|
173
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
|
174
|
+
"""Process raw Audioshake API response into standard format."""
|
175
|
+
self.logger.debug(f"Processing result for job {raw_data['job_data']['id']}")
|
176
|
+
|
177
|
+
transcription_data = raw_data["transcription"]
|
178
|
+
job_data = raw_data["job_data"]
|
179
|
+
|
180
|
+
segments = []
|
181
|
+
all_words = [] # Collect all words across segments
|
182
|
+
|
183
|
+
for line in transcription_data.get("lines", []):
|
184
|
+
words = [
|
185
|
+
Word(
|
186
|
+
text=word["text"].strip(" "),
|
187
|
+
start_time=word.get("start", 0.0),
|
188
|
+
end_time=word.get("end", 0.0),
|
189
|
+
)
|
190
|
+
for word in line.get("words", [])
|
191
|
+
]
|
192
|
+
all_words.extend(words) # Add words to flat list
|
193
|
+
|
194
|
+
segments.append(
|
195
|
+
LyricsSegment(
|
196
|
+
text=line.get("text", " ".join(w.text for w in words)),
|
197
|
+
words=words,
|
198
|
+
start_time=min((w.start_time for w in words), default=0.0),
|
199
|
+
end_time=max((w.end_time for w in words), default=0.0),
|
200
|
+
)
|
201
|
+
)
|
202
|
+
|
203
|
+
return TranscriptionData(
|
204
|
+
text=transcription_data.get("text", ""),
|
205
|
+
words=all_words,
|
206
|
+
segments=segments,
|
207
|
+
source=self.get_name(),
|
208
|
+
metadata={
|
209
|
+
"language": transcription_data.get("metadata", {}).get("language"),
|
210
|
+
"job_id": job_data["id"],
|
211
|
+
"duration": job_data.get("statusInfo", {}).get("duration"),
|
212
|
+
},
|
213
|
+
)
|
214
|
+
|
215
|
+
def get_output_filename(self, suffix: str) -> str:
|
216
|
+
"""Generate consistent filename with (Purpose) suffix pattern."""
|
217
|
+
return f"{self.config.output_prefix}{suffix}"
|
@@ -0,0 +1,149 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import Dict, Any, Optional, Union
|
3
|
+
from pathlib import Path
|
4
|
+
import logging
|
5
|
+
import os
|
6
|
+
import json
|
7
|
+
import hashlib
|
8
|
+
from lyrics_transcriber.types import TranscriptionData
|
9
|
+
|
10
|
+
|
11
|
+
class TranscriptionError(Exception):
|
12
|
+
"""Base exception for transcription errors."""
|
13
|
+
|
14
|
+
def __init__(self, message: str):
|
15
|
+
super().__init__(message)
|
16
|
+
|
17
|
+
|
18
|
+
class BaseTranscriber(ABC):
|
19
|
+
"""Base class for all transcription services."""
|
20
|
+
|
21
|
+
def __init__(self, cache_dir: Union[str, Path], logger: Optional[logging.Logger] = None):
|
22
|
+
"""
|
23
|
+
Initialize transcriber with cache directory and logger.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
cache_dir: Directory to store cache files. Must be provided.
|
27
|
+
logger: Logger instance to use. If None, creates a new logger.
|
28
|
+
"""
|
29
|
+
self.cache_dir = Path(cache_dir)
|
30
|
+
self.logger = logger or logging.getLogger(__name__)
|
31
|
+
|
32
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
33
|
+
self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
|
34
|
+
|
35
|
+
def _get_file_hash(self, filepath: str) -> str:
|
36
|
+
"""Calculate MD5 hash of a file."""
|
37
|
+
self.logger.debug(f"Calculating hash for file: {filepath}")
|
38
|
+
md5_hash = hashlib.md5()
|
39
|
+
with open(filepath, "rb") as f:
|
40
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
41
|
+
md5_hash.update(chunk)
|
42
|
+
hash_result = md5_hash.hexdigest()
|
43
|
+
self.logger.debug(f"File hash: {hash_result}")
|
44
|
+
return hash_result
|
45
|
+
|
46
|
+
def _get_cache_path(self, file_hash: str, suffix: str) -> str:
|
47
|
+
"""Get the cache file path for a given file hash."""
|
48
|
+
cache_path = os.path.join(self.cache_dir, f"{self.get_name().lower()}_{file_hash}_{suffix}.json")
|
49
|
+
self.logger.debug(f"Cache path: {cache_path}")
|
50
|
+
return cache_path
|
51
|
+
|
52
|
+
def _save_to_cache(self, cache_path: str, raw_data: Dict[str, Any]) -> None:
|
53
|
+
"""Save raw API response data to cache."""
|
54
|
+
self.logger.debug(f"Saving JSON to cache: {cache_path}")
|
55
|
+
with open(cache_path, "w") as f:
|
56
|
+
json.dump(raw_data, f, indent=2)
|
57
|
+
self.logger.debug("Cache save completed")
|
58
|
+
|
59
|
+
def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
|
60
|
+
"""Load raw API response data from cache if it exists."""
|
61
|
+
self.logger.debug(f"Attempting to load from cache: {cache_path}")
|
62
|
+
try:
|
63
|
+
with open(cache_path, "r") as f:
|
64
|
+
data = json.load(f)
|
65
|
+
self.logger.debug("Raw API response loaded from cache")
|
66
|
+
return data
|
67
|
+
except FileNotFoundError:
|
68
|
+
self.logger.debug("Cache file not found")
|
69
|
+
return None
|
70
|
+
except json.JSONDecodeError:
|
71
|
+
self.logger.warning(f"Cache file {cache_path} is corrupted")
|
72
|
+
return None
|
73
|
+
|
74
|
+
def _save_and_convert_result(self, file_hash: str, raw_result: Dict[str, Any]) -> TranscriptionData:
|
75
|
+
"""Convert raw result to TranscriptionData, save to cache, and return."""
|
76
|
+
converted_cache_path = self._get_cache_path(file_hash, "converted")
|
77
|
+
converted_result = self._convert_result_format(raw_result)
|
78
|
+
self._save_to_cache(converted_cache_path, converted_result.to_dict())
|
79
|
+
return converted_result
|
80
|
+
|
81
|
+
def transcribe(self, audio_filepath: str) -> TranscriptionData:
|
82
|
+
"""
|
83
|
+
Transcribe an audio file, using cache if available.
|
84
|
+
|
85
|
+
Args:
|
86
|
+
audio_filepath: Path to the audio file to transcribe
|
87
|
+
|
88
|
+
Returns:
|
89
|
+
TranscriptionData containing segments, text, and metadata
|
90
|
+
"""
|
91
|
+
self.logger.debug(f"Starting transcription for {audio_filepath}")
|
92
|
+
|
93
|
+
try:
|
94
|
+
self._validate_audio_file(audio_filepath)
|
95
|
+
self.logger.debug("Audio file validation passed")
|
96
|
+
|
97
|
+
# Check cache first
|
98
|
+
file_hash = self._get_file_hash(audio_filepath)
|
99
|
+
raw_cache_path = self._get_cache_path(file_hash, "raw")
|
100
|
+
|
101
|
+
raw_data = self._load_from_cache(raw_cache_path)
|
102
|
+
if raw_data:
|
103
|
+
self.logger.info(f"Using cached raw data for {audio_filepath}")
|
104
|
+
return self._save_and_convert_result(file_hash, raw_data)
|
105
|
+
|
106
|
+
# If not in cache, perform transcription
|
107
|
+
self.logger.info(f"No cache found, transcribing {audio_filepath}")
|
108
|
+
raw_result = self._perform_transcription(audio_filepath)
|
109
|
+
self.logger.debug("Transcription completed")
|
110
|
+
|
111
|
+
# Save raw result to cache
|
112
|
+
self._save_to_cache(raw_cache_path, raw_result)
|
113
|
+
|
114
|
+
return self._save_and_convert_result(file_hash, raw_result)
|
115
|
+
|
116
|
+
except Exception as e:
|
117
|
+
self.logger.error(f"Error during transcription: {str(e)}")
|
118
|
+
raise
|
119
|
+
|
120
|
+
@abstractmethod
|
121
|
+
def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
|
122
|
+
"""
|
123
|
+
Actually perform the transcription (implemented by subclasses).
|
124
|
+
|
125
|
+
Args:
|
126
|
+
audio_filepath: Path to the audio file to transcribe
|
127
|
+
|
128
|
+
Returns:
|
129
|
+
TranscriptionData containing segments, text, and metadata
|
130
|
+
"""
|
131
|
+
pass # pragma: no cover
|
132
|
+
|
133
|
+
@abstractmethod
|
134
|
+
def get_name(self) -> str:
|
135
|
+
"""Return the name of this transcription service."""
|
136
|
+
pass # pragma: no cover
|
137
|
+
|
138
|
+
def _validate_audio_file(self, audio_filepath: str) -> None:
|
139
|
+
"""Validate that the audio file exists and is accessible."""
|
140
|
+
self.logger.debug(f"Validating audio file: {audio_filepath}")
|
141
|
+
if not os.path.exists(audio_filepath):
|
142
|
+
self.logger.error(f"Audio file not found: {audio_filepath}")
|
143
|
+
raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
|
144
|
+
self.logger.debug("Audio file validation successful")
|
145
|
+
|
146
|
+
@abstractmethod
|
147
|
+
def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
|
148
|
+
"""Convert raw API response to TranscriptionData format."""
|
149
|
+
pass # pragma: no cover
|