PyPI - lyrics-transcriber - Versions diffs - 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl - Mend

lyrics-transcriber 0.41.0py3-none-any.whl → 0.42.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (77) hide show

lyrics_transcriber/types.py CHANGED Viewed

@@ -1,16 +1,20 @@
-from dataclasses import dataclass, asdict, field
-from typing import Any, Dict, List, Optional, Set, Protocol, Tuple
+from dataclasses import dataclass, asdict, field, fields
+from typing import Any, Dict, List, Optional, Set, Tuple
 from enum import Enum
+from lyrics_transcriber.utils.word_utils import WordUtils
 @dataclass
 class Word:
     """Represents a single word with its timing (in seconds) and confidence information."""
+    id: str  # New: Unique identifier for each word
     text: str
     start_time: float
     end_time: float
     confidence: Optional[float] = None
+    # New: Track if this word was created during correction
+    created_during_correction: bool = False
     def to_dict(self) -> Dict[str, Any]:
         """Convert Word to dictionary for JSON serialization."""
@@ -24,10 +28,12 @@ class Word:
     def from_dict(cls, data: Dict[str, Any]) -> "Word":
         """Create Word from dictionary."""
         return cls(
+            id=data["id"],
             text=data["text"],
             start_time=data["start_time"],
             end_time=data["end_time"],
             confidence=data.get("confidence"),  # Use get() since confidence is optional
+            created_during_correction=data.get("created_during_correction", False),
         )
@@ -35,6 +41,7 @@ class Word:
 class LyricsSegment:
     """Represents a segment/line of lyrics with timing information in seconds."""
+    id: str  # New: Unique identifier for each segment
     text: str
     words: List[Word]
     start_time: float
@@ -43,6 +50,7 @@ class LyricsSegment:
     def to_dict(self) -> Dict[str, Any]:
         """Convert LyricsSegment to dictionary for JSON serialization."""
         return {
+            "id": self.id,
             "text": self.text,
             "words": [word.to_dict() for word in self.words],
             "start_time": self.start_time,
@@ -53,6 +61,7 @@ class LyricsSegment:
     def from_dict(cls, data: Dict[str, Any]) -> "LyricsSegment":
         """Create LyricsSegment from dictionary."""
         return cls(
+            id=data["id"],
             text=data["text"],
             words=[Word.from_dict(w) for w in data["words"]],
             start_time=data["start_time"],
@@ -80,31 +89,59 @@ class LyricsMetadata:
     lyrics_provider_id: Optional[str] = None
     # Provider-specific metadata
-    provider_metadata: Dict[str, Any] = None
+    provider_metadata: Dict[str, Any] = field(default_factory=dict)
     def to_dict(self) -> Dict[str, Any]:
         """Convert metadata to dictionary for JSON serialization."""
         return asdict(self)
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "LyricsMetadata":
+        """Create LyricsMetadata from dictionary."""
+        return cls(
+            source=data["source"],
+            track_name=data["track_name"],
+            artist_names=data["artist_names"],
+            album_name=data.get("album_name"),
+            duration_ms=data.get("duration_ms"),
+            explicit=data.get("explicit"),
+            language=data.get("language"),
+            is_synced=data.get("is_synced", False),
+            lyrics_provider=data.get("lyrics_provider"),
+            lyrics_provider_id=data.get("lyrics_provider_id"),
+            provider_metadata=data.get("provider_metadata", {}),
+        )
 @dataclass
 class LyricsData:
     """Standardized response format for all lyrics providers."""
-    lyrics: str
     segments: List[LyricsSegment]
     metadata: LyricsMetadata
     source: str  # e.g., "genius", "spotify", etc.
+    def get_full_text(self) -> str:
+        """Get the full lyrics text by joining all segment texts."""
+        return "\n".join(segment.text for segment in self.segments)
     def to_dict(self) -> Dict[str, Any]:
         """Convert result to dictionary for JSON serialization."""
         return {
-            "lyrics": self.lyrics,
             "segments": [segment.to_dict() for segment in self.segments],
             "metadata": self.metadata.to_dict(),
             "source": self.source,
         }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "LyricsData":
+        """Create LyricsData from dictionary."""
+        return cls(
+            segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
+            metadata=LyricsMetadata.from_dict(data["metadata"]),
+            source=data["source"],
+        )
 @dataclass
 class WordCorrection:
@@ -112,12 +149,12 @@ class WordCorrection:
     original_word: str
     corrected_word: str  # Empty string indicates word should be deleted
-    segment_index: int
     original_position: int
     source: str  # e.g., "spotify", "genius"
-    confidence: Optional[float]
     reason: str  # e.g., "matched_in_3_sources", "high_confidence_match"
-    alternatives: Dict[str, int]  # Other possible corrections and their occurrence counts
+    segment_index: int = 0  # Default to 0 since it's often not needed
+    confidence: Optional[float] = None
+    alternatives: Dict[str, int] = field(default_factory=dict)  # Other possible corrections and their occurrence counts
     is_deletion: bool = False  # New field to explicitly mark deletions
     # New fields for handling word splits
     split_index: Optional[int] = None  # Position in the split sequence (0-based)
@@ -127,14 +164,22 @@ class WordCorrection:
     # New fields to match TypeScript interface
     reference_positions: Optional[Dict[str, int]] = None  # Maps source to position in reference text
     length: int = 1  # Default to 1 for single-word corrections
+    handler: Optional[str] = None  # Name of the correction handler that created this correction
+    # New ID fields for tracking word identity through corrections
+    word_id: Optional[str] = None  # ID of the original word being corrected
+    corrected_word_id: Optional[str] = None  # ID of the new word after correction
     def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary representation."""
         return asdict(self)
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "WordCorrection":
         """Create WordCorrection from dictionary."""
-        return cls(**data)
+        # Filter out any keys that aren't part of the dataclass
+        valid_fields = {f.name for f in fields(cls)}
+        filtered_data = {k: v for k, v in data.items() if k in valid_fields}
+        return cls(**filtered_data)
 @dataclass
@@ -157,6 +202,17 @@ class TranscriptionData:
             "metadata": self.metadata,
         }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "TranscriptionData":
+        """Create TranscriptionData from dictionary."""
+        return cls(
+            segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
+            words=[Word.from_dict(w) for w in data["words"]],
+            text=data["text"],
+            source=data["source"],
+            metadata=data.get("metadata"),
+        )
 @dataclass
 class TranscriptionResult:
@@ -207,29 +263,33 @@ class PhraseScore:
 class AnchorSequence:
     """Represents a sequence of words that appears in both transcribed and reference lyrics."""
-    words: List[str]
+    id: str  # Unique identifier for this anchor sequence
+    transcribed_word_ids: List[str]  # IDs of Word objects from the transcription
     transcription_position: int  # Starting position in transcribed text
     reference_positions: Dict[str, int]  # Source -> position mapping
+    reference_word_ids: Dict[str, List[str]]  # Source -> list of Word IDs from reference
     confidence: float
     @property
     def text(self) -> str:
         """Get the sequence as a space-separated string."""
+        # This property might need to be updated to look up words from parent object
+        # For now, keeping it for backwards compatibility
         return " ".join(self.words)
     @property
     def length(self) -> int:
         """Get the number of words in the sequence."""
-        return len(self.words)
+        return len(self.transcribed_word_ids)
     def to_dict(self) -> Dict[str, Any]:
         """Convert the anchor sequence to a JSON-serializable dictionary."""
         return {
-            "words": self.words,
-            "text": self.text,
-            "length": self.length,
+            "id": self.id,
+            "transcribed_word_ids": self.transcribed_word_ids,
             "transcription_position": self.transcription_position,
             "reference_positions": self.reference_positions,
+            "reference_word_ids": self.reference_word_ids,
             "confidence": self.confidence,
         }
@@ -237,9 +297,11 @@ class AnchorSequence:
     def from_dict(cls, data: Dict[str, Any]) -> "AnchorSequence":
         """Create AnchorSequence from dictionary."""
         return cls(
-            words=data["words"],
+            id=data.get("id", WordUtils.generate_id()),  # Generate ID if not present in old data
+            transcribed_word_ids=data["transcribed_word_ids"],
             transcription_position=data["transcription_position"],
             reference_positions=data["reference_positions"],
+            reference_word_ids=data["reference_word_ids"],
             confidence=data["confidence"],
         )
@@ -284,115 +346,94 @@ class ScoredAnchor:
 class GapSequence:
     """Represents a sequence of words between anchor sequences in transcribed lyrics."""
-    words: Tuple[str, ...]
+    id: str  # Unique identifier for this gap sequence
+    transcribed_word_ids: List[str]  # IDs of Word objects from the transcription
     transcription_position: int  # Original starting position in transcription
-    preceding_anchor: Optional[AnchorSequence]
-    following_anchor: Optional[AnchorSequence]
-    reference_words: Dict[str, List[str]]
-    reference_words_original: Dict[str, List[str]]
-    corrections: List[WordCorrection] = field(default_factory=list)
+    preceding_anchor_id: Optional[str]  # ID of preceding AnchorSequence
+    following_anchor_id: Optional[str]  # ID of following AnchorSequence
+    reference_word_ids: Dict[str, List[str]]  # Source -> list of Word IDs from reference
     _corrected_positions: Set[int] = field(default_factory=set, repr=False)
     _position_offset: int = field(default=0, repr=False)  # Track cumulative position changes
-    def add_correction(self, correction: WordCorrection) -> None:
-        """Add a correction and mark its position as corrected."""
-        self.corrections.append(correction)
-        relative_pos = correction.original_position - self.transcription_position
-        self._corrected_positions.add(relative_pos)
-        # Update position offset based on correction type
-        if correction.is_deletion:
-            self._position_offset -= 1
-        elif correction.split_total:
-            self._position_offset += correction.split_total - 1
-        # Update corrected position for the correction
-        correction.corrected_position = correction.original_position + self._position_offset
-    def get_corrected_position(self, original_position: int) -> int:
-        """Convert an original position to its corrected position."""
-        offset = sum(
-            -1 if c.is_deletion else (c.split_total - 1 if c.split_total else 0)
-            for c in self.corrections
-            if c.original_position < original_position
-        )
-        return original_position + offset
-    @property
-    def corrected_length(self) -> int:
-        """Get the length after applying all corrections."""
-        return self.length + self._position_offset
-    def is_word_corrected(self, relative_position: int) -> bool:
-        """Check if a word at the given position (relative to gap start) has been corrected."""
-        return relative_position in self._corrected_positions
-    @property
-    def uncorrected_words(self) -> List[Tuple[int, str]]:
-        """Get list of (position, word) tuples for words that haven't been corrected yet."""
-        return [(i, word) for i, word in enumerate(self.words) if i not in self._corrected_positions]
-    @property
-    def is_fully_corrected(self) -> bool:
-        """Check if all words in the gap have been corrected."""
-        return len(self._corrected_positions) == self.length
-    def __hash__(self):
-        # Hash based on words and position
-        return hash((self.words, self.transcription_position))
-    def __eq__(self, other):
-        if not isinstance(other, GapSequence):
-            return NotImplemented
-        return self.words == other.words and self.transcription_position == other.transcription_position
     @property
     def text(self) -> str:
         """Get the sequence as a space-separated string."""
+        # This property might need to be updated to look up words from parent object
         return " ".join(self.words)
     @property
     def length(self) -> int:
         """Get the number of words in the sequence."""
-        return len(self.words)
-    @property
-    def was_corrected(self) -> bool:
-        """Check if this gap has any corrections."""
-        return len(self.corrections) > 0
+        return len(self.transcribed_word_ids)
     def to_dict(self) -> Dict[str, Any]:
         """Convert the gap sequence to a JSON-serializable dictionary."""
         return {
-            "words": self.words,
-            "text": self.text,
-            "length": self.length,
+            "id": self.id,
+            "transcribed_word_ids": self.transcribed_word_ids,
             "transcription_position": self.transcription_position,
-            "preceding_anchor": self.preceding_anchor.to_dict() if self.preceding_anchor else None,
-            "following_anchor": self.following_anchor.to_dict() if self.following_anchor else None,
-            "reference_words": self.reference_words,
-            "reference_words_original": self.reference_words_original,
-            "corrections": [c.to_dict() for c in self.corrections],
+            "preceding_anchor_id": self.preceding_anchor_id,
+            "following_anchor_id": self.following_anchor_id,
+            "reference_word_ids": self.reference_word_ids,
         }
     @classmethod
     def from_dict(cls, data: Dict[str, Any]) -> "GapSequence":
         """Create GapSequence from dictionary."""
         gap = cls(
-            words=tuple(data["words"]),
+            id=data.get("id", WordUtils.generate_id()),  # Generate ID if not present in old data
+            transcribed_word_ids=data["transcribed_word_ids"],
             transcription_position=data["transcription_position"],
-            preceding_anchor=AnchorSequence.from_dict(data["preceding_anchor"]) if data["preceding_anchor"] else None,
-            following_anchor=AnchorSequence.from_dict(data["following_anchor"]) if data["following_anchor"] else None,
-            reference_words=data["reference_words"],
-            reference_words_original=data.get("reference_words_original", {}),
+            preceding_anchor_id=data["preceding_anchor_id"],
+            following_anchor_id=data["following_anchor_id"],
+            reference_word_ids=data["reference_word_ids"],
         )
-        # Add any corrections from the data
-        if "corrections" in data:
-            for correction_data in data["corrections"]:
-                gap.add_correction(WordCorrection.from_dict(correction_data))
         return gap
+@dataclass
+class CorrectionStep:
+    """Represents a single correction operation with enough info to replay/undo."""
+    handler_name: str
+    affected_word_ids: List[str]  # IDs of words modified/deleted
+    affected_segment_ids: List[str]  # IDs of segments modified
+    corrections: List[WordCorrection]
+    # State before and after for affected segments
+    segments_before: List[LyricsSegment]
+    segments_after: List[LyricsSegment]
+    # For splits/merges
+    created_word_ids: List[str] = field(default_factory=list)  # New words created
+    deleted_word_ids: List[str] = field(default_factory=list)  # Words removed
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert CorrectionStep to dictionary for JSON serialization."""
+        return {
+            "handler_name": self.handler_name,
+            "affected_word_ids": self.affected_word_ids,
+            "affected_segment_ids": self.affected_segment_ids,
+            "corrections": [c.to_dict() for c in self.corrections],
+            "segments_before": [s.to_dict() for s in self.segments_before],
+            "segments_after": [s.to_dict() for s in self.segments_after],
+            "created_word_ids": self.created_word_ids,
+            "deleted_word_ids": self.deleted_word_ids,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "CorrectionStep":
+        """Create CorrectionStep from dictionary."""
+        return cls(
+            handler_name=data["handler_name"],
+            affected_word_ids=data["affected_word_ids"],
+            affected_segment_ids=data["affected_segment_ids"],
+            corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
+            segments_before=[LyricsSegment.from_dict(s) for s in data["segments_before"]],
+            segments_after=[LyricsSegment.from_dict(s) for s in data["segments_after"]],
+            created_word_ids=data["created_word_ids"],
+            deleted_word_ids=data["deleted_word_ids"],
+        )
 @dataclass
 class CorrectionResult:
     """Container for correction results with detailed correction information."""
@@ -402,7 +443,6 @@ class CorrectionResult:
     # Corrected data
     corrected_segments: List[LyricsSegment]
-    corrected_text: str
     # Correction details
     corrections: List[WordCorrection]
@@ -410,29 +450,34 @@ class CorrectionResult:
     confidence: float
     # Debug/analysis information
-    transcribed_text: str
-    reference_texts: Dict[str, str]
+    reference_lyrics: Dict[str, LyricsData]  # Maps source to LyricsData
     anchor_sequences: List[AnchorSequence]
     gap_sequences: List[GapSequence]
     resized_segments: List[LyricsSegment]
     metadata: Dict[str, Any]
+    # Correction history
+    correction_steps: List[CorrectionStep]
+    word_id_map: Dict[str, str]  # Maps original word IDs to corrected word IDs
+    segment_id_map: Dict[str, str]  # Maps original segment IDs to corrected segment IDs
     def to_dict(self) -> Dict[str, Any]:
         """Convert the correction result to a JSON-serializable dictionary."""
         return {
-            "transcribed_text": self.transcribed_text,
             "original_segments": [s.to_dict() for s in self.original_segments],
-            "reference_texts": self.reference_texts,
+            "reference_lyrics": {source: lyrics.to_dict() for source, lyrics in self.reference_lyrics.items()},
             "anchor_sequences": [a.to_dict() for a in self.anchor_sequences],
             "gap_sequences": [g.to_dict() for g in self.gap_sequences],
             "resized_segments": [s.to_dict() for s in self.resized_segments],
-            "corrected_text": self.corrected_text,
             "corrections_made": self.corrections_made,
             "confidence": self.confidence,
             "corrections": [c.to_dict() for c in self.corrections],
             "corrected_segments": [s.to_dict() for s in self.corrected_segments],
             "metadata": self.metadata,
+            "correction_steps": [step.to_dict() for step in self.correction_steps],
+            "word_id_map": self.word_id_map,
+            "segment_id_map": self.segment_id_map,
         }
     @classmethod
@@ -441,14 +486,15 @@ class CorrectionResult:
         return cls(
             original_segments=[LyricsSegment.from_dict(s) for s in data["original_segments"]],
             corrected_segments=[LyricsSegment.from_dict(s) for s in data["corrected_segments"]],
-            corrected_text=data["corrected_text"],
             corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
             corrections_made=data["corrections_made"],
             confidence=data["confidence"],
-            transcribed_text=data["transcribed_text"],
-            reference_texts=data["reference_texts"],
+            reference_lyrics={source: LyricsData.from_dict(lyrics) for source, lyrics in data["reference_lyrics"].items()},
             anchor_sequences=[AnchorSequence.from_dict(a) for a in data["anchor_sequences"]],
             gap_sequences=[GapSequence.from_dict(g) for g in data["gap_sequences"]],
             resized_segments=[LyricsSegment.from_dict(s) for s in data["resized_segments"]],
             metadata=data["metadata"],
+            correction_steps=[CorrectionStep.from_dict(step) for step in data["correction_steps"]],
+            word_id_map=data["word_id_map"],
+            segment_id_map=data["segment_id_map"],
         )

lyrics_transcriber/utils/word_utils.py ADDED Viewed

@@ -0,0 +1,27 @@
+import random
+import string
+class WordUtils:
+    """Utility class for word-related operations."""
+    _used_ids = set()  # Keep track of used IDs
+    _id_length = 6  # Length of generated IDs
+    @classmethod
+    def generate_id(cls) -> str:
+        """Generate a unique ID for words/segments.
+        Uses a combination of letters and numbers to create an 8-character ID.
+        With 36 possible characters (26 letters + 10 digits), this gives us
+        36^8 = ~2.8 trillion possible combinations, which is more than enough
+        for our use case while being much shorter than UUID.
+        """
+        while True:
+            # Generate random string of letters and numbers
+            new_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=cls._id_length))
+            # Make sure it's unique for this session
+            if new_id not in cls._used_ids:
+                cls._used_ids.add(new_id)
+                return new_id

{lyrics_transcriber-0.41.0.dist-info → lyrics_transcriber-0.42.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: lyrics-transcriber
-Version: 0.41.0
+Version: 0.42.0
 Summary: Automatically create synchronised lyrics files in ASS and MidiCo LRC formats with word-level timestamps, using Whisper and lyrics from Genius and Spotify
 License: MIT
 Author: Andrew Beveridge
@@ -19,10 +19,12 @@ Requires-Dist: karaoke-lyrics-processor (>=0.4)
 Requires-Dist: lyricsgenius (>=3)
 Requires-Dist: metaphone (>=0.6)
 Requires-Dist: nltk (>=3.9)
+Requires-Dist: ollama (>=0.4.7,<0.5.0)
 Requires-Dist: pydub (>=0.25)
 Requires-Dist: python-dotenv (>=1)
 Requires-Dist: python-levenshtein (>=0.26)
 Requires-Dist: python-slugify (>=8)
+Requires-Dist: shortuuid (>=1.0.13,<2.0.0)
 Requires-Dist: spacy (>=3.8)
 Requires-Dist: spacy-syllables (>=3)
 Requires-Dist: syllables (>=1)

lyrics-transcriber 0.41.0__py3-none-any.whl → 0.42.0__py3-none-any.whl

lyrics-transcriber 0.41.0py3-none-any.whl → 0.42.0py3-none-any.whl