PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1815 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.23.dist-info/METADATA +610 -0
karaoke_gen-0.71.23.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.23.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/types.py ADDED Viewed

@@ -0,0 +1,648 @@
+from dataclasses import dataclass, asdict, field, fields
+from typing import Any, Dict, List, Optional, Set, Tuple
+from enum import Enum
+from lyrics_transcriber.utils.word_utils import WordUtils
+@dataclass
+class Word:
+    """Represents a single word with its timing (in seconds) and confidence information."""
+    id: str  # New: Unique identifier for each word
+    text: str
+    start_time: float
+    end_time: float
+    confidence: Optional[float] = None
+    # New: Track if this word was created during correction
+    created_during_correction: bool = False
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert Word to dictionary for JSON serialization."""
+        d = asdict(self)
+        # Remove confidence from output if it's None
+        if d["confidence"] is None:
+            del d["confidence"]
+        return d
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "Word":
+        """Create Word from dictionary."""
+        return cls(
+            id=data["id"],
+            text=data["text"],
+            start_time=data["start_time"],
+            end_time=data["end_time"],
+            confidence=data.get("confidence"),  # Use get() since confidence is optional
+            created_during_correction=data.get("created_during_correction", False),
+        )
+@dataclass
+class LyricsSegment:
+    """Represents a segment/line of lyrics with timing information in seconds."""
+    id: str  # New: Unique identifier for each segment
+    text: str
+    words: List[Word]
+    start_time: float
+    end_time: float
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert LyricsSegment to dictionary for JSON serialization."""
+        return {
+            "id": self.id,
+            "text": self.text,
+            "words": [word.to_dict() for word in self.words],
+            "start_time": self.start_time,
+            "end_time": self.end_time,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "LyricsSegment":
+        """Create LyricsSegment from dictionary."""
+        return cls(
+            id=data["id"],
+            text=data["text"],
+            words=[Word.from_dict(w) for w in data["words"]],
+            start_time=data["start_time"],
+            end_time=data["end_time"],
+        )
+@dataclass
+class LyricsMetadata:
+    """Standardized metadata for lyrics results."""
+    source: str
+    track_name: str
+    artist_names: str
+    # Common metadata fields
+    album_name: Optional[str] = None
+    duration_ms: Optional[int] = None
+    explicit: Optional[bool] = None
+    language: Optional[str] = None
+    is_synced: bool = False
+    # Lyrics provider details
+    lyrics_provider: Optional[str] = None
+    lyrics_provider_id: Optional[str] = None
+    # Provider-specific metadata
+    provider_metadata: Dict[str, Any] = field(default_factory=dict)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert metadata to dictionary for JSON serialization."""
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "LyricsMetadata":
+        """Create LyricsMetadata from dictionary."""
+        return cls(
+            source=data["source"],
+            track_name=data["track_name"],
+            artist_names=data["artist_names"],
+            album_name=data.get("album_name"),
+            duration_ms=data.get("duration_ms"),
+            explicit=data.get("explicit"),
+            language=data.get("language"),
+            is_synced=data.get("is_synced", False),
+            lyrics_provider=data.get("lyrics_provider"),
+            lyrics_provider_id=data.get("lyrics_provider_id"),
+            provider_metadata=data.get("provider_metadata", {}),
+        )
+@dataclass
+class LyricsData:
+    """Standardized response format for all lyrics providers."""
+    segments: List[LyricsSegment]
+    metadata: LyricsMetadata
+    source: str  # e.g., "genius", "spotify", etc.
+    def get_full_text(self) -> str:
+        """Get the full lyrics text by joining all segment texts."""
+        return "\n".join(segment.text for segment in self.segments)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert result to dictionary for JSON serialization."""
+        return {
+            "segments": [segment.to_dict() for segment in self.segments],
+            "metadata": self.metadata.to_dict(),
+            "source": self.source,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "LyricsData":
+        """Create LyricsData from dictionary."""
+        return cls(
+            segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
+            metadata=LyricsMetadata.from_dict(data["metadata"]),
+            source=data["source"],
+        )
+@dataclass
+class WordCorrection:
+    """Details about a single word correction."""
+    original_word: str
+    corrected_word: str  # Empty string indicates word should be deleted
+    source: str  # e.g., "spotify", "genius"
+    reason: str  # e.g., "matched_in_3_sources", "high_confidence_match"
+    original_position: int = 0  # Default to 0 for backwards compatibility with frontend
+    segment_index: int = 0  # Default to 0 since it's often not needed
+    confidence: Optional[float] = None
+    alternatives: Dict[str, int] = field(default_factory=dict)  # Other possible corrections and their occurrence counts
+    is_deletion: bool = False  # New field to explicitly mark deletions
+    # New fields for handling word splits
+    split_index: Optional[int] = None  # Position in the split sequence (0-based)
+    split_total: Optional[int] = None  # Total number of words in split
+    # New field to track position after corrections
+    corrected_position: Optional[int] = None
+    # New fields to match TypeScript interface
+    reference_positions: Optional[Dict[str, int]] = None  # Maps source to position in reference text
+    length: int = 1  # Default to 1 for single-word corrections
+    handler: Optional[str] = None  # Name of the correction handler that created this correction
+    # New ID fields for tracking word identity through corrections
+    word_id: Optional[str] = None  # ID of the original word being corrected
+    corrected_word_id: Optional[str] = None  # ID of the new word after correction
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary representation."""
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "WordCorrection":
+        """Create WordCorrection from dictionary."""
+        # Filter out any keys that aren't part of the dataclass
+        valid_fields = {f.name for f in fields(cls)}
+        filtered_data = {k: v for k, v in data.items() if k in valid_fields}
+        return cls(**filtered_data)
+@dataclass
+class TranscriptionData:
+    """Structured container for transcription results."""
+    segments: List[LyricsSegment]
+    words: List[Word]
+    text: str
+    source: str  # e.g., "whisper", "audioshake"
+    metadata: Optional[Dict[str, Any]] = None
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert TranscriptionData to dictionary for JSON serialization."""
+        return {
+            "segments": [segment.to_dict() for segment in self.segments],
+            "words": [word.to_dict() for word in self.words],
+            "text": self.text,
+            "source": self.source,
+            "metadata": self.metadata,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "TranscriptionData":
+        """Create TranscriptionData from dictionary."""
+        return cls(
+            segments=[LyricsSegment.from_dict(s) for s in data["segments"]],
+            words=[Word.from_dict(w) for w in data["words"]],
+            text=data["text"],
+            source=data["source"],
+            metadata=data.get("metadata"),
+        )
+@dataclass
+class TranscriptionResult:
+    name: str
+    priority: int
+    result: TranscriptionData
+class PhraseType(Enum):
+    """Types of phrases we can identify"""
+    COMPLETE = "complete"  # Grammatically complete unit
+    PARTIAL = "partial"  # Incomplete but valid fragment
+    CROSS_BOUNDARY = "cross"  # Crosses natural boundaries
+@dataclass
+class PhraseScore:
+    """Scores for a potential phrase"""
+    phrase_type: PhraseType
+    natural_break_score: float  # 0-1, how well it respects natural breaks
+    length_score: float  # 0-1, how appropriate the length is
+    @property
+    def total_score(self) -> float:
+        """Calculate total score with weights"""
+        weights = {PhraseType.COMPLETE: 1.0, PhraseType.PARTIAL: 0.7, PhraseType.CROSS_BOUNDARY: 0.3}
+        return weights[self.phrase_type] * 0.5 + self.natural_break_score * 0.3 + self.length_score * 0.2
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert PhraseScore to dictionary for JSON serialization."""
+        return {
+            "phrase_type": self.phrase_type.value,  # Convert enum to value for JSON
+            "natural_break_score": self.natural_break_score,
+            "length_score": self.length_score,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "PhraseScore":
+        """Create PhraseScore from dictionary."""
+        return cls(
+            phrase_type=PhraseType(data["phrase_type"]), natural_break_score=data["natural_break_score"], length_score=data["length_score"]
+        )
+@dataclass
+class AnchorSequence:
+    """Represents a sequence of words that appears in both transcribed and reference lyrics."""
+    id: str  # Unique identifier for this anchor sequence
+    transcribed_word_ids: List[str]  # IDs of Word objects from the transcription
+    transcription_position: int  # Starting position in transcribed text
+    reference_positions: Dict[str, int]  # Source -> position mapping
+    reference_word_ids: Dict[str, List[str]]  # Source -> list of Word IDs from reference
+    confidence: float
+    # Backwards compatibility: store original words as text for tests
+    _words: Optional[List[str]] = field(default=None, repr=False)
+    def __init__(self, *args, **kwargs):
+        """Backwards-compatible constructor supporting both old and new APIs."""
+        # Check for old API usage (either positional args or 'words' keyword)
+        if (len(args) >= 3 and isinstance(args[0], list)) or 'words' in kwargs:
+            # Old API: either AnchorSequence(words, ...) or AnchorSequence(words=..., ...)
+            if 'words' in kwargs:
+                # Keyword argument version
+                words = kwargs.pop('words')
+                transcription_position = kwargs.pop('transcription_position', 0)
+                reference_positions = kwargs.pop('reference_positions', {})
+                confidence = kwargs.pop('confidence', 0.0)
+            else:
+                # Positional argument version (may have confidence as keyword)
+                words = args[0]
+                transcription_position = args[1] if len(args) > 1 else 0
+                reference_positions = args[2] if len(args) > 2 else {}
+                # Handle confidence - could be positional or keyword
+                if len(args) > 3:
+                    confidence = args[3]
+                else:
+                    confidence = kwargs.pop('confidence', 0.0)
+            # Store words for backwards compatibility
+            self._words = words
+            # Create new API fields
+            self.id = kwargs.get('id', WordUtils.generate_id())
+            self.transcribed_word_ids = [WordUtils.generate_id() for _ in words]
+            self.transcription_position = transcription_position
+            self.reference_positions = reference_positions
+            # Create reference_word_ids with same structure as reference_positions
+            self.reference_word_ids = {source: [WordUtils.generate_id() for _ in words]
+                                     for source in reference_positions.keys()}
+            self.confidence = confidence
+        else:
+            # New API: use keyword arguments
+            self.id = kwargs.get('id', args[0] if len(args) > 0 else WordUtils.generate_id())
+            self.transcribed_word_ids = kwargs.get('transcribed_word_ids', args[1] if len(args) > 1 else [])
+            self.transcription_position = kwargs.get('transcription_position', args[2] if len(args) > 2 else 0)
+            self.reference_positions = kwargs.get('reference_positions', args[3] if len(args) > 3 else {})
+            self.reference_word_ids = kwargs.get('reference_word_ids', args[4] if len(args) > 4 else {})
+            self.confidence = kwargs.get('confidence', args[5] if len(args) > 5 else 0.0)
+            self._words = kwargs.get('_words', None)
+    @property
+    def words(self) -> List[str]:
+        """Get the words as a list of strings (backwards compatibility)."""
+        if self._words is not None:
+            return self._words
+        # If we don't have stored words, we can't resolve IDs without a word map
+        # This is a limitation of the backwards compatibility
+        return [f"word_{i}" for i in range(len(self.transcribed_word_ids))]
+    @property
+    def text(self) -> str:
+        """Get the sequence as a space-separated string."""
+        return " ".join(self.words)
+    @property
+    def length(self) -> int:
+        """Get the number of words in the sequence."""
+        return len(self.transcribed_word_ids)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the anchor sequence to a JSON-serializable dictionary."""
+        # Always return the new format that includes all required fields
+        result = {
+            "id": self.id,
+            "transcribed_word_ids": self.transcribed_word_ids,
+            "transcription_position": self.transcription_position,
+            "reference_positions": self.reference_positions,
+            "reference_word_ids": self.reference_word_ids,
+            "confidence": self.confidence,
+        }
+        # For backwards compatibility, include words and text fields if _words is present
+        if self._words is not None:
+            result.update({
+                "words": self._words,
+                "text": self.text,
+                "length": self.length,
+            })
+        return result
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "AnchorSequence":
+        """Create AnchorSequence from dictionary."""
+        # Handle both old and new dictionary formats
+        if "words" in data:
+            # Old format - convert to new format without setting _words
+            # This ensures to_dict() always returns the new format
+            words = data["words"]
+            return cls(
+                id=data.get("id", WordUtils.generate_id()),
+                transcribed_word_ids=[WordUtils.generate_id() for _ in words],
+                transcription_position=data["transcription_position"],
+                reference_positions=data["reference_positions"],
+                reference_word_ids={source: [WordUtils.generate_id() for _ in words]
+                                   for source in data["reference_positions"].keys()},
+                confidence=data["confidence"],
+                # Don't set _words - this ensures we always use the new format
+            )
+        else:
+            # New format
+            return cls(
+                id=data.get("id", WordUtils.generate_id()),
+                transcribed_word_ids=data["transcribed_word_ids"],
+                transcription_position=data["transcription_position"],
+                reference_positions=data["reference_positions"],
+                reference_word_ids=data["reference_word_ids"],
+                confidence=data["confidence"],
+            )
+@dataclass
+class ScoredAnchor:
+    """An anchor sequence with its quality score"""
+    anchor: AnchorSequence
+    phrase_score: PhraseScore
+    @property
+    def total_score(self) -> float:
+        """Combine confidence, phrase quality, and length"""
+        # Length bonus: (length - 1) * 0.1 gives 0.1 per extra word
+        length_bonus = (self.anchor.length - 1) * 0.1
+        # Base score heavily weighted towards confidence
+        base_score = self.anchor.confidence * 0.8 + self.phrase_score.total_score * 0.2
+        # Combine scores
+        return base_score + length_bonus
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the scored anchor to a JSON-serializable dictionary."""
+        return {
+            **self.anchor.to_dict(),
+            "phrase_score": {
+                "phrase_type": self.phrase_score.phrase_type.value,
+                "natural_break_score": self.phrase_score.natural_break_score,
+                "length_score": self.phrase_score.length_score,
+                "total_score": self.phrase_score.total_score,
+            },
+            "total_score": self.total_score,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "ScoredAnchor":
+        """Create ScoredAnchor from dictionary."""
+        return cls(anchor=AnchorSequence.from_dict(data["anchor"]), phrase_score=PhraseScore.from_dict(data["phrase_score"]))
+@dataclass
+class GapSequence:
+    """Represents a sequence of words between anchor sequences in transcribed lyrics."""
+    id: str  # Unique identifier for this gap sequence
+    transcribed_word_ids: List[str]  # IDs of Word objects from the transcription
+    transcription_position: int  # Original starting position in transcription
+    preceding_anchor_id: Optional[str]  # ID of preceding AnchorSequence
+    following_anchor_id: Optional[str]  # ID of following AnchorSequence
+    reference_word_ids: Dict[str, List[str]]  # Source -> list of Word IDs from reference
+    _corrected_positions: Set[int] = field(default_factory=set, repr=False)
+    _position_offset: int = field(default=0, repr=False)  # Track cumulative position changes
+    # Backwards compatibility: store original words as text for tests
+    _words: Optional[List[str]] = field(default=None, repr=False)
+    def __init__(self, *args, **kwargs):
+        """Backwards-compatible constructor supporting both old and new APIs."""
+        if len(args) >= 5 and isinstance(args[0], (list, tuple)):
+            # Old API: GapSequence(words, transcription_position, preceding_anchor, following_anchor, reference_words)
+            words, transcription_position, preceding_anchor, following_anchor, reference_words = args[:5]
+            # Store words for backwards compatibility
+            self._words = list(words) if isinstance(words, tuple) else words
+            # Create new API fields
+            self.id = kwargs.get('id', WordUtils.generate_id())
+            self.transcribed_word_ids = [WordUtils.generate_id() for _ in self._words]
+            self.transcription_position = transcription_position
+            self.preceding_anchor_id = getattr(preceding_anchor, 'id', None) if preceding_anchor else None
+            self.following_anchor_id = getattr(following_anchor, 'id', None) if following_anchor else None
+            # Convert reference_words to reference_word_ids
+            self.reference_word_ids = {source: [WordUtils.generate_id() for _ in ref_words]
+                                     for source, ref_words in reference_words.items()}
+            self._corrected_positions = set()
+            self._position_offset = 0
+        else:
+            # New API: use keyword arguments
+            self.id = kwargs.get('id', args[0] if len(args) > 0 else WordUtils.generate_id())
+            self.transcribed_word_ids = kwargs.get('transcribed_word_ids', args[1] if len(args) > 1 else [])
+            self.transcription_position = kwargs.get('transcription_position', args[2] if len(args) > 2 else 0)
+            self.preceding_anchor_id = kwargs.get('preceding_anchor_id', args[3] if len(args) > 3 else None)
+            self.following_anchor_id = kwargs.get('following_anchor_id', args[4] if len(args) > 4 else None)
+            self.reference_word_ids = kwargs.get('reference_word_ids', args[5] if len(args) > 5 else {})
+            self._corrected_positions = kwargs.get('_corrected_positions', set())
+            self._position_offset = kwargs.get('_position_offset', 0)
+            self._words = kwargs.get('_words', None)
+    @property
+    def words(self) -> List[str]:
+        """Get the words as a list of strings (backwards compatibility)."""
+        if self._words is not None:
+            return self._words
+        # If we don't have stored words, we can't resolve IDs without a word map
+        return [f"word_{i}" for i in range(len(self.transcribed_word_ids))]
+    @property
+    def text(self) -> str:
+        """Get the sequence as a space-separated string."""
+        return " ".join(self.words)
+    @property
+    def length(self) -> int:
+        """Get the number of words in the sequence."""
+        return len(self.transcribed_word_ids)
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the gap sequence to a JSON-serializable dictionary."""
+        result = {
+            "id": self.id,
+            "transcribed_word_ids": self.transcribed_word_ids,
+            "transcription_position": self.transcription_position,
+            "preceding_anchor_id": self.preceding_anchor_id,
+            "following_anchor_id": self.following_anchor_id,
+            "reference_word_ids": self.reference_word_ids,
+        }
+        # For backwards compatibility, include words and text in dict
+        if self._words is not None:
+            result.update({
+                "words": self._words,
+                "text": self.text,
+                "length": self.length,
+            })
+        return result
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "GapSequence":
+        """Create GapSequence from dictionary."""
+        # Handle both old and new dictionary formats
+        if "words" in data:
+            # Old format - use backwards compatible constructor
+            return cls(
+                data["words"],
+                data["transcription_position"],
+                None,  # preceding_anchor
+                None,  # following_anchor
+                data.get("reference_words", {}),
+                id=data.get("id", WordUtils.generate_id())
+            )
+        else:
+            # New format
+            gap = cls(
+                id=data.get("id", WordUtils.generate_id()),
+                transcribed_word_ids=data["transcribed_word_ids"],
+                transcription_position=data["transcription_position"],
+                preceding_anchor_id=data["preceding_anchor_id"],
+                following_anchor_id=data["following_anchor_id"],
+                reference_word_ids=data["reference_word_ids"],
+            )
+            return gap
+@dataclass
+class CorrectionStep:
+    """Represents a single correction operation with enough info to replay/undo."""
+    handler_name: str
+    affected_word_ids: List[str]  # IDs of words modified/deleted
+    affected_segment_ids: List[str]  # IDs of segments modified
+    corrections: List[WordCorrection]
+    # State before and after for affected segments
+    segments_before: List[LyricsSegment]
+    segments_after: List[LyricsSegment]
+    # For splits/merges
+    created_word_ids: List[str] = field(default_factory=list)  # New words created
+    deleted_word_ids: List[str] = field(default_factory=list)  # Words removed
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert CorrectionStep to dictionary for JSON serialization."""
+        return {
+            "handler_name": self.handler_name,
+            "affected_word_ids": self.affected_word_ids,
+            "affected_segment_ids": self.affected_segment_ids,
+            "corrections": [c.to_dict() for c in self.corrections],
+            "segments_before": [s.to_dict() for s in self.segments_before],
+            "segments_after": [s.to_dict() for s in self.segments_after],
+            "created_word_ids": self.created_word_ids,
+            "deleted_word_ids": self.deleted_word_ids,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "CorrectionStep":
+        """Create CorrectionStep from dictionary."""
+        return cls(
+            handler_name=data["handler_name"],
+            affected_word_ids=data["affected_word_ids"],
+            affected_segment_ids=data["affected_segment_ids"],
+            corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
+            segments_before=[LyricsSegment.from_dict(s) for s in data["segments_before"]],
+            segments_after=[LyricsSegment.from_dict(s) for s in data["segments_after"]],
+            created_word_ids=data["created_word_ids"],
+            deleted_word_ids=data["deleted_word_ids"],
+        )
+@dataclass
+class CorrectionResult:
+    """Container for correction results with detailed correction information."""
+    # Original (uncorrected) data
+    original_segments: List[LyricsSegment]
+    # Corrected data
+    corrected_segments: List[LyricsSegment]
+    # Correction details
+    corrections: List[WordCorrection]
+    corrections_made: int
+    confidence: float
+    # Debug/analysis information
+    reference_lyrics: Dict[str, LyricsData]  # Maps source to LyricsData
+    anchor_sequences: List[AnchorSequence]
+    gap_sequences: List[GapSequence]
+    resized_segments: List[LyricsSegment]
+    metadata: Dict[str, Any]
+    # Correction history
+    correction_steps: List[CorrectionStep]
+    word_id_map: Dict[str, str]  # Maps original word IDs to corrected word IDs
+    segment_id_map: Dict[str, str]  # Maps original segment IDs to corrected segment IDs
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the correction result to a JSON-serializable dictionary."""
+        return {
+            "original_segments": [s.to_dict() for s in self.original_segments],
+            "reference_lyrics": {source: lyrics.to_dict() for source, lyrics in self.reference_lyrics.items()},
+            "anchor_sequences": [a.to_dict() for a in self.anchor_sequences],
+            "gap_sequences": [g.to_dict() for g in self.gap_sequences],
+            "resized_segments": [s.to_dict() for s in self.resized_segments],
+            "corrections_made": self.corrections_made,
+            "confidence": self.confidence,
+            "corrections": [c.to_dict() for c in self.corrections],
+            "corrected_segments": [s.to_dict() for s in self.corrected_segments],
+            "metadata": self.metadata,
+            "correction_steps": [step.to_dict() for step in self.correction_steps],
+            "word_id_map": self.word_id_map,
+            "segment_id_map": self.segment_id_map,
+        }
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "CorrectionResult":
+        """Create CorrectionResult from dictionary."""
+        return cls(
+            original_segments=[LyricsSegment.from_dict(s) for s in data["original_segments"]],
+            corrected_segments=[LyricsSegment.from_dict(s) for s in data["corrected_segments"]],
+            corrections=[WordCorrection.from_dict(c) for c in data["corrections"]],
+            corrections_made=data["corrections_made"],
+            confidence=data["confidence"],
+            reference_lyrics={source: LyricsData.from_dict(lyrics) for source, lyrics in data["reference_lyrics"].items()},
+            anchor_sequences=[AnchorSequence.from_dict(a) for a in data["anchor_sequences"]],
+            gap_sequences=[GapSequence.from_dict(g) for g in data["gap_sequences"]],
+            resized_segments=[LyricsSegment.from_dict(s) for s in data["resized_segments"]],
+            metadata=data["metadata"],
+            correction_steps=[CorrectionStep.from_dict(step) for step in data["correction_steps"]],
+            word_id_map=data["word_id_map"],
+            segment_id_map=data["segment_id_map"],
+        )

lyrics_transcriber/utils/__init__.py ADDED Viewed

File without changes

lyrics_transcriber/utils/word_utils.py ADDED Viewed

@@ -0,0 +1,27 @@
+import random
+import string
+class WordUtils:
+    """Utility class for word-related operations."""
+    _used_ids = set()  # Keep track of used IDs
+    _id_length = 6  # Length of generated IDs
+    @classmethod
+    def generate_id(cls) -> str:
+        """Generate a unique ID for words/segments.
+        Uses a combination of letters and numbers to create an 8-character ID.
+        With 36 possible characters (26 letters + 10 digits), this gives us
+        36^8 = ~2.8 trillion possible combinations, which is more than enough
+        for our use case while being much shorter than UUID.
+        """
+        while True:
+            # Generate random string of letters and numbers
+            new_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=cls._id_length))
+            # Make sure it's unique for this session
+            if new_id not in cls._used_ids:
+                cls._used_ids.add(new_id)
+                return new_id

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.23__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.23py3-none-any.whl