PyPI - lyrics-transcriber - Versions diffs - 0.20.0__py3-none-any.whl → 0.30.1__py3-none-any.whl - Mend

lyrics-transcriber 0.20.0py3-none-any.whl → 0.30.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

lyrics_transcriber/__init__.py +2 -5
lyrics_transcriber/cli/cli_main.py +206 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/controller.py +317 -0
lyrics_transcriber/correction/base_strategy.py +29 -0
lyrics_transcriber/correction/corrector.py +52 -0
lyrics_transcriber/correction/strategy_diff.py +263 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
lyrics_transcriber/lyrics/genius.py +70 -0
lyrics_transcriber/lyrics/spotify.py +82 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/generator.py +271 -0
lyrics_transcriber/{utils → output}/subtitles.py +12 -12
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/audioshake.py +216 -0
lyrics_transcriber/transcribers/base_transcriber.py +186 -0
lyrics_transcriber/transcribers/whisper.py +321 -0
{lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +5 -16
lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
lyrics_transcriber/audioshake_transcriber.py +0 -122
lyrics_transcriber/corrector.py +0 -57
lyrics_transcriber/llm_prompts/README.md +0 -10
lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_andrew_handwritten_20231118.txt +0 -55
lyrics_transcriber/llm_prompts/llm_prompt_lyrics_correction_gpt_optimised_20231119.txt +0 -36
lyrics_transcriber/llm_prompts/llm_prompt_lyrics_matching_andrew_handwritten_20231118.txt +0 -19
lyrics_transcriber/llm_prompts/promptfooconfig.yaml +0 -61
lyrics_transcriber/llm_prompts/test_data/ABBA-UnderAttack-Genius.txt +0 -48
lyrics_transcriber/transcriber.py +0 -934
lyrics_transcriber/utils/cli.py +0 -179
lyrics_transcriber-0.20.0.dist-info/RECORD +0 -19
lyrics_transcriber-0.20.0.dist-info/entry_points.txt +0 -3
/lyrics_transcriber/{utils → cli}/__init__.py +0 -0
/lyrics_transcriber/{utils → output}/ass.py +0 -0
{lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
{lyrics_transcriber-0.20.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0

lyrics_transcriber/transcribers/audioshake.py ADDED Viewed

@@ -0,0 +1,216 @@
+from dataclasses import dataclass
+import requests
+import time
+import os
+from typing import Dict, Optional, Any, Union
+from pathlib import Path
+from .base_transcriber import BaseTranscriber, TranscriptionData, LyricsSegment, Word, TranscriptionError
+@dataclass
+class AudioShakeConfig:
+    """Configuration for AudioShake transcription service."""
+    api_token: Optional[str] = None
+    base_url: str = "https://groovy.audioshake.ai"
+    output_prefix: Optional[str] = None
+    timeout_minutes: int = 10  # Added timeout configuration
+class AudioShakeAPI:
+    """Handles direct API interactions with AudioShake."""
+    def __init__(self, config: AudioShakeConfig, logger):
+        self.config = config
+        self.logger = logger
+    def _validate_config(self) -> None:
+        """Validate API configuration."""
+        if not self.config.api_token:
+            raise ValueError("AudioShake API token must be provided")
+    def _get_headers(self) -> Dict[str, str]:
+        """Get headers for API requests."""
+        self._validate_config()  # Validate before making any API calls
+        return {"Authorization": f"Bearer {self.config.api_token}", "Content-Type": "application/json"}
+    def upload_file(self, filepath: str) -> str:
+        """Upload audio file and return asset ID."""
+        self.logger.info(f"Uploading {filepath} to AudioShake")
+        self._validate_config()  # Validate before making API call
+        url = f"{self.config.base_url}/upload"
+        with open(filepath, "rb") as file:
+            files = {"file": (os.path.basename(filepath), file)}
+            response = requests.post(url, headers={"Authorization": self._get_headers()["Authorization"]}, files=files)
+        self.logger.debug(f"Upload response: {response.status_code} - {response.text}")
+        response.raise_for_status()
+        return response.json()["id"]
+    def create_job(self, asset_id: str) -> str:
+        """Create transcription job and return job ID."""
+        self.logger.info(f"Creating job for asset {asset_id}")
+        url = f"{self.config.base_url}/job/"
+        data = {
+            "metadata": {"format": "json", "name": "alignment", "language": "en"},
+            "callbackUrl": "https://example.com/webhook/alignment",
+            "assetId": asset_id,
+        }
+        response = requests.post(url, headers=self._get_headers(), json=data)
+        response.raise_for_status()
+        return response.json()["job"]["id"]
+    def wait_for_job_result(self, job_id: str) -> Dict[str, Any]:
+        """Poll for job completion and return results."""
+        self.logger.info(f"Getting job result for job {job_id}")
+        url = f"{self.config.base_url}/job/{job_id}"
+        start_time = time.time()
+        last_status_log = start_time
+        timeout_seconds = self.config.timeout_minutes * 60
+        while True:
+            current_time = time.time()
+            elapsed_time = current_time - start_time
+            # Check for timeout
+            if elapsed_time > timeout_seconds:
+                raise TranscriptionError(f"Transcription timed out after {self.config.timeout_minutes} minutes")
+            # Log status every minute
+            if current_time - last_status_log >= 60:
+                self.logger.info(f"Still waiting for transcription... " f"Elapsed time: {int(elapsed_time/60)} minutes")
+                last_status_log = current_time
+            response = requests.get(url, headers=self._get_headers())
+            response.raise_for_status()
+            job_data = response.json()["job"]
+            if job_data["status"] == "completed":
+                return job_data
+            elif job_data["status"] == "failed":
+                raise TranscriptionError(f"Job failed: {job_data.get('error', 'Unknown error')}")
+            time.sleep(5)  # Wait before next poll
+class AudioShakeTranscriber(BaseTranscriber):
+    """Transcription service using AudioShake's API."""
+    def __init__(
+        self,
+        cache_dir: Union[str, Path],
+        config: Optional[AudioShakeConfig] = None,
+        logger: Optional[Any] = None,
+        api_client: Optional[AudioShakeAPI] = None,
+    ):
+        """Initialize AudioShake transcriber."""
+        super().__init__(cache_dir=cache_dir, logger=logger)
+        self.config = config or AudioShakeConfig(api_token=os.getenv("AUDIOSHAKE_API_TOKEN"))
+        self.api = api_client or AudioShakeAPI(self.config, self.logger)
+    def get_name(self) -> str:
+        return "AudioShake"
+    def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
+        """Actually perform the transcription using AudioShake API."""
+        self.logger.debug(f"Entering _perform_transcription() for {audio_filepath}")
+        self.logger.info(f"Starting transcription for {audio_filepath}")
+        try:
+            # Start job and get results
+            self.logger.debug("Calling start_transcription()")
+            job_id = self.start_transcription(audio_filepath)
+            self.logger.debug(f"Got job_id: {job_id}")
+            self.logger.debug("Calling get_transcription_result()")
+            result = self.get_transcription_result(job_id)
+            self.logger.debug("Got transcription result")
+            return result
+        except Exception as e:
+            self.logger.error(f"Error in _perform_transcription: {str(e)}")
+            raise
+    def start_transcription(self, audio_filepath: str) -> str:
+        """Starts the transcription job and returns the job ID."""
+        self.logger.debug(f"Entering start_transcription() for {audio_filepath}")
+        # Upload file and create job
+        asset_id = self.api.upload_file(audio_filepath)
+        self.logger.debug(f"File uploaded successfully. Asset ID: {asset_id}")
+        job_id = self.api.create_job(asset_id)
+        self.logger.debug(f"Job created successfully. Job ID: {job_id}")
+        return job_id
+    def get_transcription_result(self, job_id: str) -> Dict[str, Any]:
+        """Gets the raw results for a previously started job."""
+        self.logger.debug(f"Entering get_transcription_result() for job ID: {job_id}")
+        # Wait for job completion
+        job_data = self.api.wait_for_job_result(job_id)
+        self.logger.debug("Job completed. Getting results...")
+        output_asset = next((asset for asset in job_data.get("outputAssets", []) if asset["name"] == "alignment.json"), None)
+        if not output_asset:
+            raise TranscriptionError("Required output not found in job results")
+        # Fetch transcription data
+        response = requests.get(output_asset["link"])
+        response.raise_for_status()
+        # Return combined raw data
+        raw_data = {"job_data": job_data, "transcription": response.json()}
+        self.logger.debug("Raw results retrieved successfully")
+        return raw_data
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
+        """Process raw Audioshake API response into standard format."""
+        self.logger.debug(f"Processing result for job {raw_data['job_data']['id']}")
+        transcription_data = raw_data["transcription"]
+        job_data = raw_data["job_data"]
+        segments = []
+        all_words = []  # Collect all words across segments
+        for line in transcription_data.get("lines", []):
+            words = [
+                Word(
+                    text=word["text"],
+                    start_time=word.get("start", 0.0),
+                    end_time=word.get("end", 0.0),
+                )
+                for word in line.get("words", [])
+            ]
+            all_words.extend(words)  # Add words to flat list
+            segments.append(
+                LyricsSegment(
+                    text=line.get("text", " ".join(w.text for w in words)),
+                    words=words,
+                    start_time=min((w.start_time for w in words), default=0.0),
+                    end_time=max((w.end_time for w in words), default=0.0),
+                )
+            )
+        return TranscriptionData(
+            text=transcription_data.get("text", ""),
+            words=all_words,
+            segments=segments,
+            source=self.get_name(),
+            metadata={
+                "language": transcription_data.get("metadata", {}).get("language"),
+                "job_id": job_data["id"],
+                "duration": job_data.get("statusInfo", {}).get("duration"),
+            },
+        )
+    def get_output_filename(self, suffix: str) -> str:
+        """Generate consistent filename with (Purpose) suffix pattern."""
+        return f"{self.config.output_prefix}{suffix}"

lyrics_transcriber/transcribers/base_transcriber.py ADDED Viewed

@@ -0,0 +1,186 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Dict, Any, Optional, Protocol, List, Union
+from pathlib import Path
+import logging
+import os
+import json
+import hashlib
+from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsSegment, Word
+@dataclass
+class TranscriptionData:
+    """Structured container for transcription results."""
+    segments: List[LyricsSegment]
+    words: List[Word]
+    text: str
+    source: str  # e.g., "whisper", "audioshake"
+    metadata: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert TranscriptionData to dictionary for JSON serialization."""
+        return {
+            "segments": [segment.to_dict() for segment in self.segments],
+            "words": [word.to_dict() for word in self.words],
+            "text": self.text,
+            "source": self.source,
+            "metadata": self.metadata,
+        }
+@dataclass
+class TranscriptionResult:
+    name: str
+    priority: int
+    result: TranscriptionData
+class LoggerProtocol(Protocol):
+    """Protocol for logger interface."""
+    def debug(self, msg: str) -> None: ...
+    def info(self, msg: str) -> None: ...
+    def warning(self, msg: str) -> None: ...
+    def error(self, msg: str) -> None: ...
+class TranscriptionError(Exception):
+    """Base exception for transcription errors."""
+    pass
+class BaseTranscriber(ABC):
+    """Base class for all transcription services."""
+    def __init__(self, cache_dir: Union[str, Path], logger: Optional[LoggerProtocol] = None):
+        """
+        Initialize transcriber with cache directory and logger.
+        Args:
+            cache_dir: Directory to store cache files. Must be provided.
+            logger: Logger instance to use. If None, creates a new logger.
+        """
+        self.cache_dir = Path(cache_dir)
+        self.logger = logger or logging.getLogger(__name__)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+        self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
+    def _get_file_hash(self, filepath: str) -> str:
+        """Calculate MD5 hash of a file."""
+        self.logger.debug(f"Calculating hash for file: {filepath}")
+        md5_hash = hashlib.md5()
+        with open(filepath, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                md5_hash.update(chunk)
+        hash_result = md5_hash.hexdigest()
+        self.logger.debug(f"File hash: {hash_result}")
+        return hash_result
+    def _get_cache_path(self, file_hash: str, suffix: str) -> str:
+        """Get the cache file path for a given file hash."""
+        cache_path = os.path.join(self.cache_dir, f"{self.get_name().lower()}_{file_hash}_{suffix}.json")
+        self.logger.debug(f"Cache path: {cache_path}")
+        return cache_path
+    def _save_to_cache(self, cache_path: str, raw_data: Dict[str, Any]) -> None:
+        """Save raw API response data to cache."""
+        self.logger.debug(f"Saving JSON to cache: {cache_path}")
+        with open(cache_path, "w") as f:
+            json.dump(raw_data, f, indent=2)
+        self.logger.debug("Cache save completed")
+    def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
+        """Load raw API response data from cache if it exists."""
+        self.logger.debug(f"Attempting to load from cache: {cache_path}")
+        try:
+            with open(cache_path, "r") as f:
+                data = json.load(f)
+                self.logger.debug("Raw API response loaded from cache")
+                return data
+        except FileNotFoundError:
+            self.logger.debug("Cache file not found")
+            return None
+        except json.JSONDecodeError:
+            self.logger.warning(f"Cache file {cache_path} is corrupted")
+            return None
+    def _save_and_convert_result(self, file_hash: str, raw_result: Dict[str, Any]) -> TranscriptionData:
+        """Convert raw result to TranscriptionData, save to cache, and return."""
+        converted_cache_path = self._get_cache_path(file_hash, "converted")
+        converted_result = self._convert_result_format(raw_result)
+        self._save_to_cache(converted_cache_path, converted_result.to_dict())
+        return converted_result
+    def transcribe(self, audio_filepath: str) -> TranscriptionData:
+        """
+        Transcribe an audio file, using cache if available.
+        Args:
+            audio_filepath: Path to the audio file to transcribe
+        Returns:
+            TranscriptionData containing segments, text, and metadata
+        """
+        self.logger.debug(f"Starting transcription for {audio_filepath}")
+        try:
+            self._validate_audio_file(audio_filepath)
+            self.logger.debug("Audio file validation passed")
+            # Check cache first
+            file_hash = self._get_file_hash(audio_filepath)
+            raw_cache_path = self._get_cache_path(file_hash, "raw")
+            raw_data = self._load_from_cache(raw_cache_path)
+            if raw_data:
+                self.logger.info(f"Using cached raw data for {audio_filepath}")
+                return self._save_and_convert_result(file_hash, raw_data)
+            # If not in cache, perform transcription
+            self.logger.info(f"No cache found, transcribing {audio_filepath}")
+            raw_result = self._perform_transcription(audio_filepath)
+            self.logger.debug("Transcription completed")
+            # Save raw result to cache
+            self._save_to_cache(raw_cache_path, raw_result)
+            return self._save_and_convert_result(file_hash, raw_result)
+        except Exception as e:
+            self.logger.error(f"Error during transcription: {str(e)}")
+            raise
+    @abstractmethod
+    def _perform_transcription(self, audio_filepath: str) -> TranscriptionData:
+        """
+        Actually perform the transcription (implemented by subclasses).
+        Args:
+            audio_filepath: Path to the audio file to transcribe
+        Returns:
+            TranscriptionData containing segments, text, and metadata
+        """
+        pass  # pragma: no cover
+    @abstractmethod
+    def get_name(self) -> str:
+        """Return the name of this transcription service."""
+        pass  # pragma: no cover
+    def _validate_audio_file(self, audio_filepath: str) -> None:
+        """Validate that the audio file exists and is accessible."""
+        self.logger.debug(f"Validating audio file: {audio_filepath}")
+        if not os.path.exists(audio_filepath):
+            self.logger.error(f"Audio file not found: {audio_filepath}")
+            raise FileNotFoundError(f"Audio file not found: {audio_filepath}")
+        self.logger.debug("Audio file validation successful")
+    @abstractmethod
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> TranscriptionData:
+        """Convert raw API response to TranscriptionData format."""
+        pass  # pragma: no cover

lyrics-transcriber 0.20.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

lyrics-transcriber 0.20.0py3-none-any.whl → 0.30.1py3-none-any.whl