PyPI - lyrics-transcriber - Versions diffs - 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl - Mend

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

lyrics_transcriber/cli/{main.py → cli_main.py} +15 -3
lyrics_transcriber/core/controller.py +129 -95
lyrics_transcriber/correction/base_strategy.py +29 -0
lyrics_transcriber/correction/corrector.py +52 -0
lyrics_transcriber/correction/strategy_diff.py +263 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +201 -0
lyrics_transcriber/lyrics/genius.py +70 -0
lyrics_transcriber/lyrics/spotify.py +82 -0
lyrics_transcriber/output/generator.py +158 -97
lyrics_transcriber/output/subtitles.py +12 -12
lyrics_transcriber/storage/dropbox.py +110 -134
lyrics_transcriber/transcribers/audioshake.py +170 -105
lyrics_transcriber/transcribers/base_transcriber.py +186 -0
lyrics_transcriber/transcribers/whisper.py +268 -133
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/METADATA +1 -1
lyrics_transcriber-0.30.1.dist-info/RECORD +25 -0
lyrics_transcriber-0.30.1.dist-info/entry_points.txt +3 -0
lyrics_transcriber/core/corrector.py +0 -56
lyrics_transcriber/core/fetcher.py +0 -143
lyrics_transcriber/storage/tokens.py +0 -116
lyrics_transcriber/transcribers/base.py +0 -31
lyrics_transcriber-0.30.0.dist-info/RECORD +0 -22
lyrics_transcriber-0.30.0.dist-info/entry_points.txt +0 -3
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/LICENSE +0 -0
{lyrics_transcriber-0.30.0.dist-info → lyrics_transcriber-0.30.1.dist-info}/WHEEL +0 -0

lyrics_transcriber/correction/strategy_diff.py ADDED Viewed

@@ -0,0 +1,263 @@
+import logging
+import difflib
+from typing import Any, Dict, List, Optional, Set, Tuple
+from ..transcribers.base_transcriber import TranscriptionData, LyricsSegment, Word, TranscriptionResult
+from ..lyrics.base_lyrics_provider import LyricsData
+from .base_strategy import CorrectionResult, CorrectionStrategy
+class DiffBasedCorrector(CorrectionStrategy):
+    """
+    Implements word-diff based correction strategy using anchor words
+    to align and correct transcribed lyrics.
+    Key Features:
+    - Uses multiple reference sources (internet lyrics + optional second transcription)
+    - Preserves timing information from original transcription
+    - Provides detailed metadata about corrections made
+    - Falls back to original words when corrections aren't confident
+    Potential Improvements:
+    1. Add phonetic matching for better word alignment (e.g., Soundex or Metaphone)
+    2. Implement context-aware corrections using surrounding words
+    3. Use more sophisticated alignment algorithms (e.g., Smith-Waterman)
+    4. Add validation using language models to ensure semantic consistency
+    5. Implement word normalization (e.g., handling contractions, punctuation)
+    """
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        self.logger = logger or logging.getLogger(__name__)
+    def _find_anchor_words(self, segments: List[LyricsSegment]) -> Set[str]:
+        """
+        Identify potential anchor words from transcribed segments.
+        Since we don't have confidence values, we'll use these heuristics:
+        1. Words that are longer (more likely to be distinctive)
+        2. Words that aren't common stop words
+        3. Words that appear multiple times in the same position
+        """
+        stop_words = {
+            "a",
+            "an",
+            "and",
+            "are",
+            "as",
+            "at",
+            "be",
+            "by",
+            "for",
+            "from",
+            "has",
+            "he",
+            "in",
+            "is",
+            "it",
+            "its",
+            "of",
+            "on",
+            "that",
+            "the",
+            "to",
+            "was",
+            "were",
+            "will",
+            "with",
+        }
+        anchors = set()
+        word_positions = {}  # Track words and their relative positions
+        for segment in segments:
+            for i, word in enumerate(segment.words):
+                word_lower = word.text.lower().strip()
+                # Skip very short words and stop words
+                if len(word_lower) <= 2 or word_lower in stop_words:
+                    continue
+                # Track position of this word
+                if word_lower not in word_positions:
+                    word_positions[word_lower] = []
+                word_positions[word_lower].append(i)
+                # If word appears multiple times in similar positions, it's a good anchor
+                if len(word_positions[word_lower]) >= 2:
+                    anchors.add(word_lower)
+                # Longer words (4+ chars) are more likely to be distinctive
+                if len(word_lower) >= 4:
+                    anchors.add(word_lower)
+        return anchors
+    def _align_texts(self, source_text: str, target_text: str) -> List[Tuple[str, str]]:
+        """
+        Align two texts using difflib and return word pairs.
+        Uses Python's difflib for fuzzy string matching to find the best
+        alignment between transcribed text and reference lyrics.
+        Returns both matching and non-matching word pairs.
+        """
+        # Split into words and convert to lowercase for matching
+        source_words = source_text.lower().split()
+        target_words = target_text.lower().split()
+        # Use SequenceMatcher to find matching blocks
+        matcher = difflib.SequenceMatcher(None, source_words, target_words)
+        # Create alignment pairs for both matching and non-matching sections
+        alignments = []
+        i = j = 0
+        for block in matcher.get_matching_blocks():
+            # Add non-matching pairs before this block
+            while i < block.a and j < block.b:
+                alignments.append((source_words[i], target_words[j]))
+                i += 1
+                j += 1
+            # Add matching pairs from this block
+            for _ in range(block.size):
+                alignments.append((source_words[i], target_words[j]))
+                i += 1
+                j += 1
+        # Add any remaining non-matching pairs
+        while i < len(source_words) and j < len(target_words):
+            alignments.append((source_words[i], target_words[j]))
+            i += 1
+            j += 1
+        return alignments
+    def _create_correction_mapping(
+        self, transcription: TranscriptionData, lyrics_results: List[LyricsData], anchor_words: Set[str]
+    ) -> Dict[str, Dict[str, int]]:
+        """
+        Create a mapping of potential corrections based on aligned texts.
+        Strategy:
+        1. Use anchor words to establish alignment points
+        2. Look at words between anchor points in both sources
+        3. Build frequency map of potential corrections
+        4. Consider timing information when available
+        """
+        correction_counts: Dict[str, Dict[str, int]] = {}
+        # Get transcription text as list of words
+        trans_words = [w.text.lower().strip() for segment in transcription.segments for w in segment.words]
+        # Process each lyrics source
+        for lyrics in lyrics_results:
+            # Split lyrics into words
+            lyrics_words = lyrics.lyrics.lower().split()
+            # Get alignments between transcription and lyrics
+            alignments = self._align_texts(transcription.text, lyrics.lyrics)
+            # Process aligned word pairs
+            for trans_word, lyrics_word in alignments:
+                trans_word = trans_word.strip()
+                lyrics_word = lyrics_word.strip()
+                # Skip if words are identical
+                if trans_word == lyrics_word:
+                    continue
+                # Initialize correction mapping for this word if needed
+                if trans_word not in correction_counts:
+                    correction_counts[trans_word] = {}
+                # Count this correction
+                correction_counts[trans_word][lyrics_word] = correction_counts[trans_word].get(lyrics_word, 0) + 1
+        return correction_counts
+    def correct(
+        self,
+        transcription_results: List[TranscriptionResult],
+        lyrics_results: List[LyricsData],
+    ) -> CorrectionResult:
+        """Apply diff-based correction algorithm."""
+        self.logger.info("Starting diff-based correction")
+        # Sort transcription results by priority
+        sorted_results = sorted(transcription_results, key=lambda x: x.priority)
+        if not sorted_results:
+            raise ValueError("No transcription results available")
+        # Use highest priority transcription as primary source
+        primary_transcription = sorted_results[0].result
+        # Find anchor words from all transcriptions
+        anchor_words = self._find_anchor_words(primary_transcription.segments)
+        for result in sorted_results[1:]:
+            anchor_words.update(self._find_anchor_words(result.result.segments))
+        # Create correction mapping
+        corrections = self._create_correction_mapping(primary_transcription, lyrics_results, anchor_words)
+        # Apply corrections while preserving timing
+        corrected_segments = []
+        corrections_made = 0
+        source_mapping = {}
+        for segment in primary_transcription.segments:
+            corrected_words = []
+            for word in segment.words:
+                word_lower = word.text.lower().strip()
+                # Check if we have a correction for this word
+                if word_lower in corrections:
+                    # Get the most common correction
+                    possible_corrections = corrections[word_lower]
+                    if possible_corrections:
+                        best_correction = max(possible_corrections.items(), key=lambda x: x[1])[0]
+                        # Create corrected word with preserved timing
+                        corrected_word = Word(
+                            text=best_correction,
+                            start_time=word.start_time,
+                            end_time=word.end_time,
+                            confidence=None,  # We don't have confidence values
+                        )
+                        corrected_words.append(corrected_word)
+                        corrections_made += 1
+                        source_mapping[best_correction] = "internet_lyrics"
+                        continue
+                # If no correction made, keep original word
+                corrected_words.append(word)
+            # Create new segment with corrected words
+            corrected_segment = LyricsSegment(
+                text=" ".join(w.text for w in corrected_words),
+                words=corrected_words,
+                start_time=segment.start_time,
+                end_time=segment.end_time,
+            )
+            corrected_segments.append(corrected_segment)
+        # Since we don't have confidence values, use a simpler metric
+        # based on how many corrections were needed
+        total_words = sum(len(segment.words) for segment in corrected_segments)
+        correction_ratio = 1 - (corrections_made / total_words if total_words > 0 else 0)
+        return CorrectionResult(
+            segments=corrected_segments,
+            text=" ".join(segment.text for segment in corrected_segments),
+            confidence=correction_ratio,  # Use correction ratio as confidence
+            corrections_made=corrections_made,
+            source_mapping=source_mapping,
+            metadata={
+                "correction_strategy": "diff_based",
+                "anchor_words_count": len(anchor_words),
+                "total_words": total_words,
+                "correction_ratio": correction_ratio,
+                "primary_source": sorted_results[0].name,
+            },
+        )

lyrics_transcriber/lyrics/base_lyrics_provider.py ADDED Viewed

@@ -0,0 +1,201 @@
+from dataclasses import dataclass, asdict
+import logging
+from typing import Optional, Dict, Any, List
+import json
+import hashlib
+from pathlib import Path
+import os
+from abc import ABC, abstractmethod
+@dataclass
+class Word:
+    """Represents a single word with its timing and confidence information."""
+    text: str
+    start_time: float
+    end_time: float
+    confidence: Optional[float] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert Word to dictionary for JSON serialization."""
+        d = asdict(self)
+        # Remove confidence from output if it's None
+        if d["confidence"] is None:
+            del d["confidence"]
+        return d
+@dataclass
+class LyricsSegment:
+    """Represents a segment/line of lyrics with timing information."""
+    text: str
+    words: List[Word]
+    start_time: float
+    end_time: float
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert LyricsSegment to dictionary for JSON serialization."""
+        return {
+            "text": self.text,
+            "words": [word.to_dict() for word in self.words],
+            "start_time": self.start_time,
+            "end_time": self.end_time,
+        }
+@dataclass
+class LyricsProviderConfig:
+    """Configuration for lyrics providers."""
+    genius_api_token: Optional[str] = None
+    spotify_cookie: Optional[str] = None
+    cache_dir: Optional[str] = None
+    audio_filepath: Optional[str] = None
+@dataclass
+class LyricsMetadata:
+    """Standardized metadata for lyrics results."""
+    source: str
+    track_name: str
+    artist_names: str
+    # Common metadata fields
+    album_name: Optional[str] = None
+    duration_ms: Optional[int] = None
+    explicit: Optional[bool] = None
+    language: Optional[str] = None
+    is_synced: bool = False
+    # Lyrics provider details
+    lyrics_provider: Optional[str] = None
+    lyrics_provider_id: Optional[str] = None
+    # Provider-specific metadata
+    provider_metadata: Dict[str, Any] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert metadata to dictionary for JSON serialization."""
+        return asdict(self)
+@dataclass
+class LyricsData:
+    """Standardized response format for all lyrics providers."""
+    lyrics: str
+    segments: List[LyricsSegment]
+    metadata: LyricsMetadata
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert result to dictionary for JSON serialization."""
+        return {"lyrics": self.lyrics, "segments": [segment.to_dict() for segment in self.segments], "metadata": self.metadata.to_dict()}
+class BaseLyricsProvider(ABC):
+    """Base class for lyrics providers."""
+    def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
+        self.logger = logger or logging.getLogger(__name__)
+        self.cache_dir = Path(config.cache_dir) if config.cache_dir else None
+        self.audio_filepath = config.audio_filepath
+        if self.cache_dir:
+            self.cache_dir.mkdir(parents=True, exist_ok=True)
+            self.logger.debug(f"Initialized {self.__class__.__name__} with cache dir: {self.cache_dir}")
+    def fetch_lyrics(self, artist: str, title: str) -> Optional[LyricsData]:
+        """Fetch lyrics for a given artist and title, using cache if available."""
+        if not self.cache_dir:
+            return self._fetch_and_convert_result(artist, title)
+        file_hash = self._get_file_hash(self.audio_filepath)
+        raw_cache_path = self._get_cache_path(file_hash, "raw")
+        # Try to load from cache first
+        raw_data = self._load_from_cache(raw_cache_path)
+        if raw_data is not None:
+            self.logger.info(f"Using cached lyrics for {artist} - {title}")
+            return self._save_and_convert_result(file_hash, raw_data)
+        # If not in cache, fetch from source
+        raw_result = self._fetch_data_from_source(artist, title)
+        if raw_result:
+            # Save raw API response
+            self._save_to_cache(raw_cache_path, raw_result)
+            return self._save_and_convert_result(file_hash, raw_result)
+        return None
+    def _get_file_hash(self, filepath: str) -> str:
+        """Calculate MD5 hash of a file."""
+        self.logger.debug(f"Calculating hash for file: {filepath}")
+        md5_hash = hashlib.md5()
+        with open(filepath, "rb") as f:
+            for chunk in iter(lambda: f.read(4096), b""):
+                md5_hash.update(chunk)
+        hash_result = md5_hash.hexdigest()
+        self.logger.debug(f"File hash: {hash_result}")
+        return hash_result
+    def _get_artist_title_hash(self, artist: str, title: str) -> str:
+        """Calculate MD5 hash of the artist and title."""
+        combined = f"{artist.lower()}_{title.lower()}"
+        return hashlib.md5(combined.encode()).hexdigest()
+    def _get_cache_path(self, cache_key: str, suffix: str) -> str:
+        """Get the cache file path for a given cache key and suffix."""
+        return os.path.join(self.cache_dir, f"{self.get_name().lower()}_{cache_key}_{suffix}.json")
+    def _save_to_cache(self, cache_path: str, data: Dict[str, Any]) -> None:
+        """Save data to cache."""
+        self.logger.debug(f"Saving lyrics to cache: {cache_path}")
+        with open(cache_path, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+        self.logger.debug("Cache save completed")
+    def _load_from_cache(self, cache_path: str) -> Optional[Dict[str, Any]]:
+        """Load data from cache if it exists."""
+        self.logger.debug(f"Attempting to load from cache: {cache_path}")
+        try:
+            with open(cache_path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+                self.logger.debug("Lyrics loaded from cache")
+                return data
+        except FileNotFoundError:
+            self.logger.debug("Cache file not found")
+            return None
+        except json.JSONDecodeError:
+            self.logger.warning(f"Cache file {cache_path} is corrupted")
+            return None
+    def _save_and_convert_result(self, cache_key: str, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert raw result to standardized format, save to cache, and return."""
+        converted_cache_path = self._get_cache_path(cache_key, "converted")
+        converted_result = self._convert_result_format(raw_data)
+        # Convert to dictionary before saving to cache
+        self._save_to_cache(converted_cache_path, converted_result.to_dict())
+        return converted_result
+    def _fetch_and_convert_result(self, artist: str, title: str) -> Optional[LyricsData]:
+        """Fetch and convert result when caching is disabled."""
+        raw_result = self._fetch_data_from_source(artist, title)
+        if raw_result:
+            return self._convert_result_format(raw_result)
+        return None
+    @abstractmethod
+    def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
+        """Fetch raw data from the source (implemented by subclasses)."""
+        raise NotImplementedError("Subclasses must implement _fetch_data_from_source")
+    @abstractmethod
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert raw API response to standardized format (implemented by subclasses)."""
+        raise NotImplementedError("Subclasses must implement _convert_result_format")
+    def get_name(self) -> str:
+        """Return the name of this lyrics provider."""
+        return self.__class__.__name__.replace("Provider", "")

lyrics_transcriber/lyrics/genius.py ADDED Viewed

@@ -0,0 +1,70 @@
+import logging
+from typing import Optional, Dict, Any
+import lyricsgenius
+from .base_lyrics_provider import BaseLyricsProvider, LyricsMetadata, LyricsProviderConfig, LyricsData
+class GeniusProvider(BaseLyricsProvider):
+    """Handles fetching lyrics from Genius."""
+    def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
+        super().__init__(config, logger)
+        self.api_token = config.genius_api_token
+        self.client = None
+        if self.api_token:
+            self.client = lyricsgenius.Genius(self.api_token)
+            self.client.verbose = False
+            self.client.remove_section_headers = True
+    def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
+        """Fetch raw song data from Genius API."""
+        if not self.client:
+            self.logger.warning("No Genius API token provided")
+            return None
+        self.logger.info(f"Searching Genius for {artist} - {title}")
+        try:
+            song = self.client.search_song(title, artist)
+            if song:
+                self.logger.info("Found lyrics on Genius")
+                return song.to_dict()
+        except Exception as e:
+            self.logger.error(f"Error fetching from Genius: {str(e)}")
+        return None
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert Genius's raw API response to standardized format."""
+        # Extract release date components if available
+        release_date = None
+        if release_components := raw_data.get("release_date_components"):
+            year = release_components.get("year")
+            month = release_components.get("month")
+            day = release_components.get("day")
+            if all(x is not None for x in (year, month, day)):
+                release_date = f"{year}-{month:02d}-{day:02d}"
+        # Create metadata object
+        metadata = LyricsMetadata(
+            source="genius",
+            track_name=raw_data.get("title", ""),
+            artist_names=raw_data.get("artist_names", ""),
+            album_name=raw_data.get("album", {}).get("name"),
+            lyrics_provider="genius",
+            lyrics_provider_id=str(raw_data.get("id")),
+            is_synced=False,  # Genius doesn't provide synced lyrics
+            provider_metadata={
+                "genius_id": raw_data.get("id"),
+                "release_date": release_date,
+                "page_url": raw_data.get("url"),
+                "annotation_count": raw_data.get("annotation_count"),
+                "lyrics_state": raw_data.get("lyrics_state"),
+                "lyrics_owner_id": raw_data.get("lyrics_owner_id"),
+                "pyongs_count": raw_data.get("pyongs_count"),
+                "verified_annotations": len(raw_data.get("verified_annotations_by", [])),
+                "verified_contributors": len(raw_data.get("verified_contributors", [])),
+                "external_urls": {"genius": raw_data.get("url")},
+            },
+        )
+        # Create result object
+        return LyricsData(lyrics=raw_data.get("lyrics", ""), segments=[], metadata=metadata)  # Genius doesn't provide timestamp data

lyrics_transcriber/lyrics/spotify.py ADDED Viewed

@@ -0,0 +1,82 @@
+import logging
+from typing import Optional, Dict, Any
+import syrics.api
+from lyrics_transcriber.lyrics.base_lyrics_provider import LyricsSegment, Word
+from .base_lyrics_provider import BaseLyricsProvider, LyricsProviderConfig, LyricsMetadata, LyricsData
+class SpotifyProvider(BaseLyricsProvider):
+    """Handles fetching lyrics from Spotify."""
+    def __init__(self, config: LyricsProviderConfig, logger: Optional[logging.Logger] = None):
+        super().__init__(config, logger)
+        self.cookie = config.spotify_cookie
+        self.client = syrics.api.Spotify(self.cookie) if self.cookie else None
+    def _fetch_data_from_source(self, artist: str, title: str) -> Optional[Dict[str, Any]]:
+        """Fetch raw data from Spotify APIs using syrics library."""
+        if not self.client:
+            self.logger.warning("No Spotify cookie provided")
+            return None
+        try:
+            # Search for track
+            search_query = f"{title} - {artist}"
+            search_results = self.client.search(search_query, type="track", limit=1)
+            track_data = search_results["tracks"]["items"][0]
+            self.logger.debug(
+                f"Found track: {track_data['artists'][0]['name']} - {track_data['name']} " f"({track_data['external_urls']['spotify']})"
+            )
+            # Get lyrics data
+            lyrics_data = self.client.get_lyrics(track_data["id"])
+            if not lyrics_data:
+                return None
+            return {"track_data": track_data, "lyrics_data": lyrics_data}
+        except Exception as e:
+            self.logger.error(f"Error fetching from Spotify: {str(e)}")
+            return None
+    def _convert_result_format(self, raw_data: Dict[str, Any]) -> LyricsData:
+        """Convert Spotify's raw API response to standardized format."""
+        track_data = raw_data["track_data"]
+        lyrics_data = raw_data["lyrics_data"]["lyrics"]
+        # Convert raw lines to LyricsSegment objects
+        segments = []
+        for line in lyrics_data.get("lines", []):
+            if not line.get("words"):
+                continue
+            segment = LyricsSegment(
+                text=line["words"],
+                words=[],  # TODO: Could potentially split words if needed
+                start_time=float(line["startTimeMs"]) / 1000 if line["startTimeMs"] != "0" else None,
+                end_time=float(line["endTimeMs"]) / 1000 if line["endTimeMs"] != "0" else None,
+            )
+            segments.append(segment)
+        # Create metadata object
+        metadata = LyricsMetadata(
+            source="spotify",
+            track_name=track_data.get("name"),
+            artist_names=", ".join(artist.get("name", "") for artist in track_data.get("artists", [])),
+            album_name=track_data.get("album", {}).get("name"),
+            duration_ms=track_data.get("duration_ms"),
+            explicit=track_data.get("explicit"),
+            language=lyrics_data.get("language"),
+            is_synced=lyrics_data.get("syncType") == "LINE_SYNCED",
+            lyrics_provider=lyrics_data.get("provider"),
+            lyrics_provider_id=lyrics_data.get("providerLyricsId"),
+            provider_metadata={
+                "spotify_id": track_data.get("id"),
+                "preview_url": track_data.get("preview_url"),
+                "external_urls": track_data.get("external_urls"),
+                "sync_type": lyrics_data.get("syncType"),
+            },
+        )
+        return LyricsData(lyrics="\n".join(segment.text for segment in segments), segments=segments, metadata=metadata)

lyrics-transcriber 0.30.0__py3-none-any.whl → 0.30.1__py3-none-any.whl

lyrics-transcriber 0.30.0py3-none-any.whl → 0.30.1py3-none-any.whl