PyPI - lyrics-transcriber - Versions diffs - 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl - Mend

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

lyrics_transcriber/cli/{main.py → cli_main.py} +15 -3
lyrics_transcriber/core/controller.py +129 -95
lyrics_transcriber/correction/base_strategy.py +29 -0
lyrics_transcriber/correction/corrector.py +52 -0
lyrics_transcriber/correction/strategy_diff.py +263 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
lyrics_transcriber/lyrics/genius.py +70 -0
lyrics_transcriber/lyrics/spotify.py +82 -0
lyrics_transcriber/output/generator.py +158 -97
lyrics_transcriber/output/subtitles.py +12 -12
lyrics_transcriber/storage/dropbox.py +110 -134
lyrics_transcriber/transcribers/audioshake.py +170 -105
lyrics_transcriber/transcribers/base_transcriber.py +186 -0
lyrics_transcriber/transcribers/whisper.py +268 -133
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +1 -1
lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
lyrics_transcriber/core/corrector.py +0 -56
lyrics_transcriber/core/fetcher.py +0 -143
lyrics_transcriber/storage/tokens.py +0 -116
lyrics_transcriber/transcribers/base.py +0 -31
lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0

lyrics_transcriber/transcribers/audioshake.py CHANGED Viewed

@@ -1,151 +1,216 @@
+from dataclasses import dataclass
 import requests
 import time
 import os
-import json
-from .base import BaseTranscriber
+from typing import Dict, Optional, Any, Union
+from pathlib import Path
+from .base_transcriber import BaseTranscriber, TranscriptionData, LyricsSegment, Word, TranscriptionError
-class AudioShakeTranscriber(BaseTranscriber):
-    """Transcription service using AudioShake's API."""
-    def __init__(self, api_token=None, logger=None, output_prefix=None):
-        super().__init__(logger)
-        self.api_token = api_token or os.getenv("AUDIOSHAKE_API_TOKEN")
-        self.base_url = "https://groovy.audioshake.ai"
-        self.output_prefix = output_prefix
-        if not self.api_token:
-            raise ValueError("AudioShake API token must be provided either directly or via AUDIOSHAKE_API_TOKEN env var")
-    def get_name(self) -> str:
-        return "AudioShake"
-    def transcribe(self, audio_filepath: str) -> dict:
-        """
-        Transcribe an audio file using AudioShake API.
-        Args:
-            audio_filepath: Path to the audio file to transcribe
-        Returns:
-            Dict containing:
-                - segments: List of segments with start/end times and word-level data
-                - text: Full text transcription
-                - metadata: Dict of additional info
-        """
-        self.logger.info(f"Starting transcription for {audio_filepath} using AudioShake API")
-        # Start job and get results
-        job_id = self.start_transcription(audio_filepath)
-        result = self.get_transcription_result(job_id)
-        # Add metadata to the result
-        result["metadata"] = {
-            "service": self.get_name(),
-            "language": "en",  # AudioShake currently only supports English
-        }
-        return result
+@dataclass
+class AudioShakeConfig:
+    """Configuration for AudioShake transcription service."""
-    def start_transcription(self, audio_filepath: str) -> str:
-        """Starts the transcription job and returns the job ID."""
-        # Step 1: Upload the audio file
-        asset_id = self._upload_file(audio_filepath)
-        self.logger.info(f"File uploaded successfully. Asset ID: {asset_id}")
+    api_token: Optional[str] = None
+    base_url: str = "https://groovy.audioshake.ai"
+    output_prefix: Optional[str] = None
+    timeout_minutes: int = 10  # Added timeout configuration
-        # Step 2: Create a job for transcription and alignment
-        job_id = self._create_job(asset_id)
-        self.logger.info(f"Job created successfully. Job ID: {job_id}")
-        return job_id
+class AudioShakeAPI:
+    """Handles direct API interactions with AudioShake."""
-    def get_transcription_result(self, job_id: str) -> dict:
-        """Gets the results for a previously started job."""
-        self.logger.info(f"Getting results for job ID: {job_id}")
+    def __init__(self, config: AudioShakeConfig, logger):
+        self.config = config
+        self.logger = logger
-        # Wait for job completion and get results
-        result = self._get_job_result(job_id)
-        self.logger.info(f"Job completed. Processing results...")
+    def _validate_config(self) -> None:
+        """Validate API configuration."""
+        if not self.config.api_token:
+            raise ValueError("AudioShake API token must be provided")
-        # Process and return in standard format
-        return self._process_result(result)
+    def _get_headers(self) -> Dict[str, str]:
+        """Get headers for API requests."""
+        self._validate_config()  # Validate before making any API calls
+        return {"Authorization": f"Bearer {self.config.api_token}", "Content-Type": "application/json"}
-    def _upload_file(self, filepath):
+    def upload_file(self, filepath: str) -> str:
+        """Upload audio file and return asset ID."""
         self.logger.info(f"Uploading {filepath} to AudioShake")
-        url = f"{self.base_url}/upload"
-        headers = {"Authorization": f"Bearer {self.api_token}"}
+        self._validate_config()  # Validate before making API call
+        url = f"{self.config.base_url}/upload"
         with open(filepath, "rb") as file:
             files = {"file": (os.path.basename(filepath), file)}
-            response = requests.post(url, headers=headers, files=files)
-        self.logger.info(f"Upload response status code: {response.status_code}")
-        self.logger.info(f"Upload response content: {response.text}")
+            response = requests.post(url, headers={"Authorization": self._get_headers()["Authorization"]}, files=files)
+        self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
         response.raise_for_status()
         return response.json()["id"]
-    def _create_job(self, asset_id):
+    def create_job(self, asset_id: str) -> str:
+        """Create transcription job and return job ID."""
         self.logger.info(f"Creating job for asset {asset_id}")
-        url = f"{self.base_url}/job/"
-        headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
+        url = f"{self.config.base_url}/job/"
         data = {
             "metadata": {"format": "json", "name": "alignment", "language": "en"},
             "callbackUrl": "https://example.com/webhook/alignment",
             "assetId": asset_id,
         }
-        response = requests.post(url, headers=headers, json=data)
+        response = requests.post(url, headers=self._get_headers(), json=data)
         response.raise_for_status()
         return response.json()["job"]["id"]
-    def _get_job_result(self, job_id):
+    def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
+        """Poll for job completion and return results."""
         self.logger.info(f"Getting job result for job {job_id}")
-        url = f"{self.base_url}/job/{job_id}"
-        headers = {"Authorization": f"Bearer {self.api_token}", "Content-Type": "application/json"}
+        url = f"{self.config.base_url}/job/{job_id}"
+        start_time = time.time()
+        last_status_log = start_time
+        timeout_seconds = self.config.timeout_minutes * 60
         while True:
-            response = requests.get(url, headers=headers)
+            current_time = time.time()
+            elapsed_time = current_time - start_time
+            # Check for timeout
+            if elapsed_time > timeout_seconds:
+                raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
+            # Log status every minute
+            if current_time - last_status_log >= 60:
+                self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
+                last_status_log = current_time
+            response = requests.get(url, headers=self._get_headers())
             response.raise_for_status()
             job_data = response.json()["job"]
             if job_data["status"] == "completed":
                 return job_data
             elif job_data["status"] == "failed":
-                raise Exception("Job failed")
-            time.sleep(5)  # Wait 5 seconds before checking again
+                raise TranscriptionError(f"Job failed: {job_data.get('error', 'Unknown error')}")
-    def _process_result(self, job_data):
-        self.logger.debug(f"Processing result for job {job_data['id']}")
-        self.logger.debug(f"Job data: {json.dumps(job_data, indent=2)}")
+            time.sleep(5)  # Wait before next poll
-        output_assets = job_data.get("outputAssets", [])
-        self.logger.debug(f"Output assets: {output_assets}")
-        output_asset = next((asset for asset in output_assets if asset["name"] == "alignment.json"), None)
+class AudioShakeTranscriber(BaseTranscriber):
+    """Transcription service using AudioShake's API."""
-        if not output_asset:
-            self.logger.error("'alignment.json' found in job results")
-            self.logger.error(f"Available output assets: {[asset['name'] for asset in output_assets]}")
-            raise Exception("Required output not found in job results")
+    def __init__(
+        self,
+        cache_dir: Union[str, Path],
+        config: Optional[AudioShakeConfig] = None,
+        logger: Optional[Any] = None,
+        api_client: Optional[AudioShakeAPI] = None,
+    ):
+        """Initialize AudioShake transcriber."""
+        super().__init__(cache_dir=cache_dir, logger=logger)
+        self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
+        self.api = api_client or AudioShakeAPI(self.config, self.logger)
-        transcription_url = output_asset["link"]
-        self.logger.debug(f"Output URL: {transcription_url}")
+    def get_name(self) -> str:
+        return "AudioShake"
-        response = requests.get(transcription_url)
-        response.raise_for_status()
-        transcription_data = response.json()
-        self.logger.debug(f"Output data: {json.dumps(transcription_data, indent=2)}")
+    def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
+        """Actually perform the transcription using AudioShake API."""
+        self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
+        self.logger.info(f"Starting transcription for {audio_filepath}")
+        try:
+            # Start job and get results
+            self.logger.debug("Calling start_transcription()")
+            job_id = self.start_transcription(audio_filepath)
+            self.logger.debug(f"Got job_id: {job_id}")
+            self.logger.debug("Calling get_transcription_result()")
+            result = self.get_transcription_result(job_id)
+            self.logger.debug("Got transcription result")
-        transcription_data = {"segments": transcription_data.get("lines", []), "text": transcription_data.get("text", "")}
+            return result
+        except Exception as e:
+            self.logger.error(f"Error in _perform_transcription: {str(e)}")
+            raise
-        # Ensure each segment has the required fields
-        for segment in transcription_data["segments"]:
-            if "words" not in segment:
-                segment["words"] = []
-            if "text" not in segment:
-                segment["text"] = " ".join(word["text"] for word in segment["words"])
+    def start_transcription(self, audio_filepath: str) -> str:
+        """Starts the transcription job and returns the job ID."""
+        self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
+        # Upload file and create job
+        asset_id = self.api.upload_file(audio_filepath)
+        self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
+        job_id = self.api.create_job(asset_id)
+        self.logger.debug(f"Job created successfully. Job ID: {job_id}")
+        return job_id
+    def get_transcription_result(self, job_id: str) -> Dict[str, Any]:
+        """Gets the raw results for a previously started job."""
+        self.logger.debug(f"Entering get_transcription_result() for job ID: {job_id}")
-        transcription_data["output_filename"] = self.get_output_filename(" (AudioShake)")
+        # Wait for job completion
+        job_data = self.api.wait_for_job_result(job_id)
+        self.logger.debug("Job completed. Getting results...")
-        return transcription_data
+        output_asset = next((asset for asset in job_data.get("outputAssets", []) if asset["name"] == "alignment.json"), None)
+        if not output_asset:
+            raise TranscriptionError("Required output not found in job results")
+        # Fetch transcription data
+        response = requests.get(output_asset["link"])
+        response.raise_for_status()
-    def get_output_filename(self, suffix):
-        """Generate consistent filename with (Purpose) suffix pattern"""
-        return f"{self.output_prefix}{suffix}"
+        # Return combined raw data
+        raw_data = {"job_data": job_data, "transcription": response.json()}
+        self.logger.debug("Raw results retrieved successfully")
+        return raw_data
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
+        """Process raw Audioshake API response into standard format."""
+        self.logger.debug(f"Processing result for job {raw_data['job_data']['id']}")
+        transcription_data = raw_data["transcription"]
+        job_data = raw_data["job_data"]
+        segments = []
+        all_words = []  # Collect all words across segments
+        for line in transcription_data.get("lines", []):
+            words = [
+                Word(
+                    text=word["text"],
+                    start_time=word.get("start", 0.0),
+                    end_time=word.get("end", 0.0),
+                )
+                for word in line.get("words", [])
+            ]
+            all_words.extend(words)  # Add words to flat list
+            segments.append(
+                LyricsSegment(
+                    text=line.get("text", " ".join(w.text for w in words)),
+                    words=words,
+                    start_time=min((w.start_time for w in words), default=0.0),
+                    end_time=max((w.end_time for w in words), default=0.0),
+                )
+            )
+        return TranscriptionData(
+            text=transcription_data.get("text", ""),
+            words=all_words,
+            segments=segments,
+            source=self.get_name(),
+            metadata={
+                "language": transcription_data.get("metadata", {}).get("language"),
+                "job_id": job_data["id"],
+                "duration": job_data.get("statusInfo", {}).get("duration"),
+            },
+        )
+    def get_output_filename(self, suffix: str) -> str:
+        """Generate consistent filename with (Purpose) suffix pattern."""
+        return f"{self.config.output_prefix}{suffix}"

lyrics_transcriber/transcribers/base_transcriber.py ADDED Viewed

@@ -0,0 +1,186 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Dict, Any, Optional, Protocol, List, Union
+from pathlib import Path
+import logging
+import os
+import json
+import hashlib
+from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsSegment, Word
+@dataclass
+class TranscriptionData:
+    """Structured container for transcription results."""
+    segments: List[LyricsSegment]
+    words: List[Word]
+    text: str
+    source: str  # e.g., "whisper", "audioshake"
+    metadata: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert TranscriptionData to dictionary for JSON serialization."""
+        return {
+            "segments": [segment.to_dict() for segment in self.segments],
+            "words": [word.to_dict() for word in self.words],
+            "text": self.text,
+            "source": self.source,
+            "metadata": self.metadata,
+        }
+@dataclass
+class TranscriptionResult:
+    name: str
+    priority: int
+    result: TranscriptionData
+class LoggerProtocol(Protocol):
+    """Protocol for logger interface."""
+    def debug(self, msg: str) -> None: ...
+    def info(self, msg: str) -> None: ...
+    def warning(self, msg: str) -> None: ...
+    def error(self, msg: str) -> None: ...
+class TranscriptionError(Exception):
+    """Base exception for transcription errors."""
+    pass
+class BaseTranscriber(ABC):
+    """Base class for all transcription services."""
+    def __init__(self, cache_dir: Union[str, Path], logger: Optional[LoggerProtocol] = None):
+        """
+        Initialize transcriber with cache directory and logger.
+        Args:
+            cache_dir: Directory to store cache files. Must be provided.
+            logger: Logger instance to use. If None, creates a new logger.
+        """
+        self.cache_dir = Path(cache_dir)
+        self.logger = logger or logging.getLogger(__name__)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
+    def _get_file_hash(self, filepath: str) -> str:
+        """Calculate MD5 hash of a file."""
+        self.logger.debug(f"Calculating hash for file: {filepath}")
+        md5_hash = hashlib.md5()
+        with open(filepath, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                md5_hash.update(chunk)
+        hash_result = md5_hash.hexdigest()
+        self.logger.debug(f"File hash: {hash_result}")
+        return hash_result
+    def _get_cache_path(self, file_hash: str, suffix: str) -> str:
+        """Get the cache file path for a given file hash."""
+        cache_path = os.path.join(self.cache_dir, f"{self.get_name().lower()}_{file_hash}_{suffix}.json")
+        self.logger.debug(f"Cache path: {cache_path}")
+        return cache_path
+    def _save_to_cache(self, cache_path: str, raw_data: Dict[str, Any]) -> None:
+        """Save raw API response data to cache."""
+        self.logger.debug(f"Saving JSON to cache: {cache_path}")
+        with open(cache_path, "w") as f:
+            json.dump(raw_data, f, indent=2)
+        self.logger.debug("Cache save completed")
+    def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
+        """Load raw API response data from cache if it exists."""
+        self.logger.debug(f"Attempting to load from cache: {cache_path}")
+        try:
+            with open(cache_path, "r") as f:
+                data = json.load(f)
+                self.logger.debug("Raw API response loaded from cache")
+                return data
+        except FileNotFoundError:
+            self.logger.debug("Cache file not found")
+            return None
+        except json.JSONDecodeError:
+            self.logger.warning(f"Cache file {cache_path} is corrupted")
+            return None
+    def _save_and_convert_result(self, file_hash: str, raw_result: Dict[str, Any]) -> TranscriptionData:
+        """Convert raw result to TranscriptionData, save to cache, and return."""
+        converted_cache_path = self._get_cache_path(file_hash, "converted")
+        converted_result = self._convert_result_format(raw_result)
+        self._save_to_cache(converted_cache_path, converted_result.to_dict())
+        return converted_result
+    def transcribe(self, audio_filepath: str) -> TranscriptionData:
+        """
+        Transcribe an audio file, using cache if available.
+        Args:
+            audio_filepath: Path to the audio file to transcribe
+        Returns:
+            TranscriptionData containing segments, text, and metadata
+        """
+        self.logger.debug(f"Starting transcription for {audio_filepath}")
+        try:
+            self._validate_audio_file(audio_filepath)
+            self.logger.debug("Audio file validation passed")
+            # Check cache first
+            file_hash = self._get_file_hash(audio_filepath)
+            raw_cache_path = self._get_cache_path(file_hash, "raw")
+            raw_data = self._load_from_cache(raw_cache_path)
+            if raw_data:
+                self.logger.info(f"Using cached raw data for {audio_filepath}")
+                return self._save_and_convert_result(file_hash, raw_data)
+            # If not in cache, perform transcription
+            self.logger.info(f"No cache found, transcribing {audio_filepath}")
+            raw_result = self._perform_transcription(audio_filepath)
+            self.logger.debug("Transcription completed")
+            # Save raw result to cache
+            self._save_to_cache(raw_cache_path, raw_result)
+            return self._save_and_convert_result(file_hash, raw_result)
+        except Exception as e:
+            self.logger.error(f"Error during transcription: {str(e)}")
+            raise
+    @abstractmethod
+    def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
+        """
+        Actually perform the transcription (implemented by subclasses).
+        Args:
+            audio_filepath: Path to the audio file to transcribe
+        Returns:
+            TranscriptionData containing segments, text, and metadata
+        """
+        pass  # pragma: no cover
+    @abstractmethod
+    def get_name(self) -> str:
+        """Return the name of this transcription service."""
+        pass  # pragma: no cover
+    def _validate_audio_file(self, audio_filepath: str) -> None:
+        """Validate that the audio file exists and is accessible."""
+        self.logger.debug(f"Validating audio file: {audio_filepath}")
+        if not os.path.exists(audio_filepath):
+            self.logger.error(f"Audio file not found: {audio_filepath}")
+            raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
+        self.logger.debug("Audio file validation successful")
+    @abstractmethod
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
+        """Convert raw API response to TranscriptionData format."""
+        pass  # pragma: no cover

lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl