PyPI - karaoke-gen - Versions diffs - 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl - Mend

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (268) hide show

karaoke_gen/audio_fetcher.py +461 -0
karaoke_gen/audio_processor.py +407 -30
karaoke_gen/config.py +62 -113
karaoke_gen/file_handler.py +32 -59
karaoke_gen/karaoke_finalise/karaoke_finalise.py +148 -67
karaoke_gen/karaoke_gen.py +270 -61
karaoke_gen/lyrics_processor.py +13 -1
karaoke_gen/metadata.py +78 -73
karaoke_gen/pipeline/__init__.py +87 -0
karaoke_gen/pipeline/base.py +215 -0
karaoke_gen/pipeline/context.py +230 -0
karaoke_gen/pipeline/executors/__init__.py +21 -0
karaoke_gen/pipeline/executors/local.py +159 -0
karaoke_gen/pipeline/executors/remote.py +257 -0
karaoke_gen/pipeline/stages/__init__.py +27 -0
karaoke_gen/pipeline/stages/finalize.py +202 -0
karaoke_gen/pipeline/stages/render.py +165 -0
karaoke_gen/pipeline/stages/screens.py +139 -0
karaoke_gen/pipeline/stages/separation.py +191 -0
karaoke_gen/pipeline/stages/transcription.py +191 -0
karaoke_gen/style_loader.py +531 -0
karaoke_gen/utils/bulk_cli.py +6 -0
karaoke_gen/utils/cli_args.py +424 -0
karaoke_gen/utils/gen_cli.py +26 -261
karaoke_gen/utils/remote_cli.py +1965 -0
karaoke_gen/video_background_processor.py +351 -0
karaoke_gen-0.71.27.dist-info/METADATA +610 -0
karaoke_gen-0.71.27.dist-info/RECORD +275 -0
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/WHEEL +1 -1
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info}/entry_points.txt +1 -0
lyrics_transcriber/__init__.py +10 -0
lyrics_transcriber/cli/__init__.py +0 -0
lyrics_transcriber/cli/cli_main.py +285 -0
lyrics_transcriber/core/__init__.py +0 -0
lyrics_transcriber/core/config.py +50 -0
lyrics_transcriber/core/controller.py +520 -0
lyrics_transcriber/correction/__init__.py +0 -0
lyrics_transcriber/correction/agentic/__init__.py +9 -0
lyrics_transcriber/correction/agentic/adapter.py +71 -0
lyrics_transcriber/correction/agentic/agent.py +313 -0
lyrics_transcriber/correction/agentic/feedback/aggregator.py +12 -0
lyrics_transcriber/correction/agentic/feedback/collector.py +17 -0
lyrics_transcriber/correction/agentic/feedback/retention.py +24 -0
lyrics_transcriber/correction/agentic/feedback/store.py +76 -0
lyrics_transcriber/correction/agentic/handlers/__init__.py +24 -0
lyrics_transcriber/correction/agentic/handlers/ambiguous.py +44 -0
lyrics_transcriber/correction/agentic/handlers/background_vocals.py +68 -0
lyrics_transcriber/correction/agentic/handlers/base.py +51 -0
lyrics_transcriber/correction/agentic/handlers/complex_multi_error.py +46 -0
lyrics_transcriber/correction/agentic/handlers/extra_words.py +74 -0
lyrics_transcriber/correction/agentic/handlers/no_error.py +42 -0
lyrics_transcriber/correction/agentic/handlers/punctuation.py +44 -0
lyrics_transcriber/correction/agentic/handlers/registry.py +60 -0
lyrics_transcriber/correction/agentic/handlers/repeated_section.py +44 -0
lyrics_transcriber/correction/agentic/handlers/sound_alike.py +126 -0
lyrics_transcriber/correction/agentic/models/__init__.py +5 -0
lyrics_transcriber/correction/agentic/models/ai_correction.py +31 -0
lyrics_transcriber/correction/agentic/models/correction_session.py +30 -0
lyrics_transcriber/correction/agentic/models/enums.py +38 -0
lyrics_transcriber/correction/agentic/models/human_feedback.py +30 -0
lyrics_transcriber/correction/agentic/models/learning_data.py +26 -0
lyrics_transcriber/correction/agentic/models/observability_metrics.py +28 -0
lyrics_transcriber/correction/agentic/models/schemas.py +46 -0
lyrics_transcriber/correction/agentic/models/utils.py +19 -0
lyrics_transcriber/correction/agentic/observability/__init__.py +5 -0
lyrics_transcriber/correction/agentic/observability/langfuse_integration.py +35 -0
lyrics_transcriber/correction/agentic/observability/metrics.py +46 -0
lyrics_transcriber/correction/agentic/observability/performance.py +19 -0
lyrics_transcriber/correction/agentic/prompts/__init__.py +2 -0
lyrics_transcriber/correction/agentic/prompts/classifier.py +227 -0
lyrics_transcriber/correction/agentic/providers/__init__.py +6 -0
lyrics_transcriber/correction/agentic/providers/base.py +36 -0
lyrics_transcriber/correction/agentic/providers/circuit_breaker.py +145 -0
lyrics_transcriber/correction/agentic/providers/config.py +73 -0
lyrics_transcriber/correction/agentic/providers/constants.py +24 -0
lyrics_transcriber/correction/agentic/providers/health.py +28 -0
lyrics_transcriber/correction/agentic/providers/langchain_bridge.py +212 -0
lyrics_transcriber/correction/agentic/providers/model_factory.py +209 -0
lyrics_transcriber/correction/agentic/providers/response_cache.py +218 -0
lyrics_transcriber/correction/agentic/providers/response_parser.py +111 -0
lyrics_transcriber/correction/agentic/providers/retry_executor.py +127 -0
lyrics_transcriber/correction/agentic/router.py +35 -0
lyrics_transcriber/correction/agentic/workflows/__init__.py +5 -0
lyrics_transcriber/correction/agentic/workflows/consensus_workflow.py +24 -0
lyrics_transcriber/correction/agentic/workflows/correction_graph.py +59 -0
lyrics_transcriber/correction/agentic/workflows/feedback_workflow.py +24 -0
lyrics_transcriber/correction/anchor_sequence.py +1043 -0
lyrics_transcriber/correction/corrector.py +760 -0
lyrics_transcriber/correction/feedback/__init__.py +2 -0
lyrics_transcriber/correction/feedback/schemas.py +107 -0
lyrics_transcriber/correction/feedback/store.py +236 -0
lyrics_transcriber/correction/handlers/__init__.py +0 -0
lyrics_transcriber/correction/handlers/base.py +52 -0
lyrics_transcriber/correction/handlers/extend_anchor.py +149 -0
lyrics_transcriber/correction/handlers/levenshtein.py +189 -0
lyrics_transcriber/correction/handlers/llm.py +293 -0
lyrics_transcriber/correction/handlers/llm_providers.py +60 -0
lyrics_transcriber/correction/handlers/no_space_punct_match.py +154 -0
lyrics_transcriber/correction/handlers/relaxed_word_count_match.py +85 -0
lyrics_transcriber/correction/handlers/repeat.py +88 -0
lyrics_transcriber/correction/handlers/sound_alike.py +259 -0
lyrics_transcriber/correction/handlers/syllables_match.py +252 -0
lyrics_transcriber/correction/handlers/word_count_match.py +80 -0
lyrics_transcriber/correction/handlers/word_operations.py +187 -0
lyrics_transcriber/correction/operations.py +352 -0
lyrics_transcriber/correction/phrase_analyzer.py +435 -0
lyrics_transcriber/correction/text_utils.py +30 -0
lyrics_transcriber/frontend/.gitignore +23 -0
lyrics_transcriber/frontend/.yarn/releases/yarn-4.7.0.cjs +935 -0
lyrics_transcriber/frontend/.yarnrc.yml +3 -0
lyrics_transcriber/frontend/README.md +50 -0
lyrics_transcriber/frontend/REPLACE_ALL_FUNCTIONALITY.md +210 -0
lyrics_transcriber/frontend/__init__.py +25 -0
lyrics_transcriber/frontend/eslint.config.js +28 -0
lyrics_transcriber/frontend/index.html +18 -0
lyrics_transcriber/frontend/package.json +42 -0
lyrics_transcriber/frontend/public/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/public/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/public/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/public/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/public/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/public/favicon.ico +0 -0
lyrics_transcriber/frontend/public/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/src/App.tsx +212 -0
lyrics_transcriber/frontend/src/api.ts +239 -0
lyrics_transcriber/frontend/src/components/AIFeedbackModal.tsx +77 -0
lyrics_transcriber/frontend/src/components/AddLyricsModal.tsx +114 -0
lyrics_transcriber/frontend/src/components/AgenticCorrectionMetrics.tsx +204 -0
lyrics_transcriber/frontend/src/components/AudioPlayer.tsx +180 -0
lyrics_transcriber/frontend/src/components/CorrectedWordWithActions.tsx +167 -0
lyrics_transcriber/frontend/src/components/CorrectionAnnotationModal.tsx +359 -0
lyrics_transcriber/frontend/src/components/CorrectionDetailCard.tsx +281 -0
lyrics_transcriber/frontend/src/components/CorrectionMetrics.tsx +162 -0
lyrics_transcriber/frontend/src/components/DurationTimelineView.tsx +257 -0
lyrics_transcriber/frontend/src/components/EditActionBar.tsx +68 -0
lyrics_transcriber/frontend/src/components/EditModal.tsx +702 -0
lyrics_transcriber/frontend/src/components/EditTimelineSection.tsx +496 -0
lyrics_transcriber/frontend/src/components/EditWordList.tsx +379 -0
lyrics_transcriber/frontend/src/components/FileUpload.tsx +77 -0
lyrics_transcriber/frontend/src/components/FindReplaceModal.tsx +467 -0
lyrics_transcriber/frontend/src/components/Header.tsx +387 -0
lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +1373 -0
lyrics_transcriber/frontend/src/components/MetricsDashboard.tsx +51 -0
lyrics_transcriber/frontend/src/components/ModeSelector.tsx +67 -0
lyrics_transcriber/frontend/src/components/ModelSelector.tsx +23 -0
lyrics_transcriber/frontend/src/components/PreviewVideoSection.tsx +144 -0
lyrics_transcriber/frontend/src/components/ReferenceView.tsx +268 -0
lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +688 -0
lyrics_transcriber/frontend/src/components/ReviewChangesModal.tsx +354 -0
lyrics_transcriber/frontend/src/components/SegmentDetailsModal.tsx +64 -0
lyrics_transcriber/frontend/src/components/TimelineEditor.tsx +376 -0
lyrics_transcriber/frontend/src/components/TimingOffsetModal.tsx +131 -0
lyrics_transcriber/frontend/src/components/TranscriptionView.tsx +256 -0
lyrics_transcriber/frontend/src/components/WordDivider.tsx +187 -0
lyrics_transcriber/frontend/src/components/shared/components/HighlightedText.tsx +379 -0
lyrics_transcriber/frontend/src/components/shared/components/SourceSelector.tsx +56 -0
lyrics_transcriber/frontend/src/components/shared/components/Word.tsx +87 -0
lyrics_transcriber/frontend/src/components/shared/constants.ts +20 -0
lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts +180 -0
lyrics_transcriber/frontend/src/components/shared/styles.ts +13 -0
lyrics_transcriber/frontend/src/components/shared/types.js +2 -0
lyrics_transcriber/frontend/src/components/shared/types.ts +129 -0
lyrics_transcriber/frontend/src/components/shared/utils/keyboardHandlers.ts +177 -0
lyrics_transcriber/frontend/src/components/shared/utils/localStorage.ts +78 -0
lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts +75 -0
lyrics_transcriber/frontend/src/components/shared/utils/segmentOperations.ts +360 -0
lyrics_transcriber/frontend/src/components/shared/utils/timingUtils.ts +110 -0
lyrics_transcriber/frontend/src/components/shared/utils/wordUtils.ts +22 -0
lyrics_transcriber/frontend/src/hooks/useManualSync.ts +435 -0
lyrics_transcriber/frontend/src/main.tsx +17 -0
lyrics_transcriber/frontend/src/theme.ts +177 -0
lyrics_transcriber/frontend/src/types/global.d.ts +9 -0
lyrics_transcriber/frontend/src/types.js +2 -0
lyrics_transcriber/frontend/src/types.ts +199 -0
lyrics_transcriber/frontend/src/validation.ts +132 -0
lyrics_transcriber/frontend/src/vite-env.d.ts +1 -0
lyrics_transcriber/frontend/tsconfig.app.json +26 -0
lyrics_transcriber/frontend/tsconfig.json +25 -0
lyrics_transcriber/frontend/tsconfig.node.json +23 -0
lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -0
lyrics_transcriber/frontend/update_version.js +11 -0
lyrics_transcriber/frontend/vite.config.d.ts +2 -0
lyrics_transcriber/frontend/vite.config.js +10 -0
lyrics_transcriber/frontend/vite.config.ts +11 -0
lyrics_transcriber/frontend/web_assets/android-chrome-192x192.png +0 -0
lyrics_transcriber/frontend/web_assets/android-chrome-512x512.png +0 -0
lyrics_transcriber/frontend/web_assets/apple-touch-icon.png +0 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js +42039 -0
lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +1 -0
lyrics_transcriber/frontend/web_assets/favicon-16x16.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon-32x32.png +0 -0
lyrics_transcriber/frontend/web_assets/favicon.ico +0 -0
lyrics_transcriber/frontend/web_assets/index.html +18 -0
lyrics_transcriber/frontend/web_assets/nomad-karaoke-logo.png +0 -0
lyrics_transcriber/frontend/yarn.lock +3752 -0
lyrics_transcriber/lyrics/__init__.py +0 -0
lyrics_transcriber/lyrics/base_lyrics_provider.py +211 -0
lyrics_transcriber/lyrics/file_provider.py +95 -0
lyrics_transcriber/lyrics/genius.py +384 -0
lyrics_transcriber/lyrics/lrclib.py +231 -0
lyrics_transcriber/lyrics/musixmatch.py +156 -0
lyrics_transcriber/lyrics/spotify.py +290 -0
lyrics_transcriber/lyrics/user_input_provider.py +44 -0
lyrics_transcriber/output/__init__.py +0 -0
lyrics_transcriber/output/ass/__init__.py +21 -0
lyrics_transcriber/output/ass/ass.py +2088 -0
lyrics_transcriber/output/ass/ass_specs.txt +732 -0
lyrics_transcriber/output/ass/config.py +180 -0
lyrics_transcriber/output/ass/constants.py +23 -0
lyrics_transcriber/output/ass/event.py +94 -0
lyrics_transcriber/output/ass/formatters.py +132 -0
lyrics_transcriber/output/ass/lyrics_line.py +265 -0
lyrics_transcriber/output/ass/lyrics_screen.py +252 -0
lyrics_transcriber/output/ass/section_detector.py +89 -0
lyrics_transcriber/output/ass/section_screen.py +106 -0
lyrics_transcriber/output/ass/style.py +187 -0
lyrics_transcriber/output/cdg.py +619 -0
lyrics_transcriber/output/cdgmaker/__init__.py +0 -0
lyrics_transcriber/output/cdgmaker/cdg.py +262 -0
lyrics_transcriber/output/cdgmaker/composer.py +2260 -0
lyrics_transcriber/output/cdgmaker/config.py +151 -0
lyrics_transcriber/output/cdgmaker/images/instrumental.png +0 -0
lyrics_transcriber/output/cdgmaker/images/intro.png +0 -0
lyrics_transcriber/output/cdgmaker/pack.py +507 -0
lyrics_transcriber/output/cdgmaker/render.py +346 -0
lyrics_transcriber/output/cdgmaker/transitions/centertexttoplogobottomtext.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circlein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/circleout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/fizzle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/largecentertexttoplogo.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/rectangle.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/spiral.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/topleftmusicalnotes.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipein.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeleft.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wipeout.png +0 -0
lyrics_transcriber/output/cdgmaker/transitions/wiperight.png +0 -0
lyrics_transcriber/output/cdgmaker/utils.py +132 -0
lyrics_transcriber/output/countdown_processor.py +267 -0
lyrics_transcriber/output/fonts/AvenirNext-Bold.ttf +0 -0
lyrics_transcriber/output/fonts/DMSans-VariableFont_opsz,wght.ttf +0 -0
lyrics_transcriber/output/fonts/DMSerifDisplay-Regular.ttf +0 -0
lyrics_transcriber/output/fonts/Oswald-SemiBold.ttf +0 -0
lyrics_transcriber/output/fonts/Zurich_Cn_BT_Bold.ttf +0 -0
lyrics_transcriber/output/fonts/arial.ttf +0 -0
lyrics_transcriber/output/fonts/georgia.ttf +0 -0
lyrics_transcriber/output/fonts/verdana.ttf +0 -0
lyrics_transcriber/output/generator.py +257 -0
lyrics_transcriber/output/lrc_to_cdg.py +61 -0
lyrics_transcriber/output/lyrics_file.py +102 -0
lyrics_transcriber/output/plain_text.py +96 -0
lyrics_transcriber/output/segment_resizer.py +431 -0
lyrics_transcriber/output/subtitles.py +397 -0
lyrics_transcriber/output/video.py +544 -0
lyrics_transcriber/review/__init__.py +0 -0
lyrics_transcriber/review/server.py +676 -0
lyrics_transcriber/storage/__init__.py +0 -0
lyrics_transcriber/storage/dropbox.py +225 -0
lyrics_transcriber/transcribers/__init__.py +0 -0
lyrics_transcriber/transcribers/audioshake.py +290 -0
lyrics_transcriber/transcribers/base_transcriber.py +157 -0
lyrics_transcriber/transcribers/whisper.py +330 -0
lyrics_transcriber/types.py +648 -0
lyrics_transcriber/utils/__init__.py +0 -0
lyrics_transcriber/utils/word_utils.py +27 -0
karaoke_gen-0.57.0.dist-info/METADATA +0 -167
karaoke_gen-0.57.0.dist-info/RECORD +0 -23
{karaoke_gen-0.57.0.dist-info → karaoke_gen-0.71.27.dist-info/licenses}/LICENSE +0 -0

lyrics_transcriber/correction/feedback/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ """Human feedback collection system for continuous improvement."""
2	+

lyrics_transcriber/correction/feedback/schemas.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Schemas for correction annotations and human feedback."""
+from __future__ import annotations
+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field
+from enum import Enum
+from datetime import datetime
+import uuid
+class CorrectionAnnotationType(str, Enum):
+    """Types of corrections that can be annotated."""
+    PUNCTUATION_ONLY = "PUNCTUATION_ONLY"
+    SOUND_ALIKE = "SOUND_ALIKE"
+    BACKGROUND_VOCALS = "BACKGROUND_VOCALS"
+    EXTRA_WORDS = "EXTRA_WORDS"
+    REPEATED_SECTION = "REPEATED_SECTION"
+    COMPLEX_MULTI_ERROR = "COMPLEX_MULTI_ERROR"
+    AMBIGUOUS = "AMBIGUOUS"
+    NO_ERROR = "NO_ERROR"
+    MANUAL_EDIT = "MANUAL_EDIT"  # Human-initiated edit not from gap
+class CorrectionAction(str, Enum):
+    """Actions that can be taken for corrections."""
+    NO_ACTION = "NO_ACTION"
+    REPLACE = "REPLACE"
+    DELETE = "DELETE"
+    INSERT = "INSERT"
+    MERGE = "MERGE"
+    SPLIT = "SPLIT"
+    FLAG = "FLAG"
+class CorrectionAnnotation(BaseModel):
+    """Annotation for a manual correction made by a human."""
+    annotation_id: str = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique identifier")
+    audio_hash: str = Field(..., description="Hash of the audio file")
+    gap_id: Optional[str] = Field(None, description="Gap ID if this correction is for a gap")
+    # Classification
+    annotation_type: CorrectionAnnotationType = Field(..., description="Type of correction")
+    action_taken: CorrectionAction = Field(..., description="Action that was taken")
+    # Content
+    original_text: str = Field(..., description="Original transcribed text")
+    corrected_text: str = Field(..., description="Corrected text after human edit")
+    # Metadata
+    confidence: float = Field(..., ge=1.0, le=5.0, description="Human confidence rating (1-5)")
+    reasoning: str = Field(..., min_length=10, description="Human explanation for the correction")
+    word_ids_affected: List[str] = Field(default_factory=list, description="Word IDs involved in correction")
+    # Agentic AI comparison
+    agentic_proposal: Optional[Dict[str, Any]] = Field(None, description="What the AI suggested (if applicable)")
+    agentic_category: Optional[str] = Field(None, description="Category the AI classified this as")
+    agentic_agreed: bool = Field(False, description="Whether human agreed with AI proposal")
+    # Reference lyrics
+    reference_sources_consulted: List[str] = Field(default_factory=list, description="Which reference sources were used")
+    # Song metadata
+    artist: str = Field(..., description="Song artist")
+    title: str = Field(..., description="Song title")
+    session_id: str = Field(..., description="Correction session ID")
+    # Timestamp
+    timestamp: datetime = Field(default_factory=datetime.utcnow, description="When annotation was created")
+    class Config:
+        json_schema_extra = {
+            "example": {
+                "annotation_id": "550e8400-e29b-41d4-a716-446655440000",
+                "audio_hash": "abc123",
+                "gap_id": "gap_1",
+                "annotation_type": "sound_alike",
+                "action_taken": "REPLACE",
+                "original_text": "out I'm starting over",
+                "corrected_text": "now I'm starting over",
+                "confidence": 5.0,
+                "reasoning": "The word 'out' sounds like 'now' but the reference lyrics and context make it clear it should be 'now'",
+                "word_ids_affected": ["word_123"],
+                "agentic_proposal": {"action": "ReplaceWord", "replacement_text": "now"},
+                "agentic_category": "sound_alike",
+                "agentic_agreed": True,
+                "reference_sources_consulted": ["genius", "spotify"],
+                "artist": "Rancid",
+                "title": "Time Bomb",
+                "session_id": "session_abc",
+                "timestamp": "2025-01-01T12:00:00"
+            }
+        }
+class AnnotationStatistics(BaseModel):
+    """Aggregated statistics from annotations."""
+    total_annotations: int = 0
+    annotations_by_type: Dict[str, int] = Field(default_factory=dict)
+    annotations_by_action: Dict[str, int] = Field(default_factory=dict)
+    average_confidence: float = 0.0
+    agentic_agreement_rate: float = 0.0
+    most_common_errors: List[Dict[str, Any]] = Field(default_factory=list)
+    songs_annotated: int = 0

lyrics_transcriber/correction/feedback/store.py ADDED Viewed

@@ -0,0 +1,236 @@
+"""Storage backend for correction annotations."""
+from __future__ import annotations
+import json
+import logging
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from collections import Counter, defaultdict
+from .schemas import CorrectionAnnotation, AnnotationStatistics
+logger = logging.getLogger(__name__)
+class FeedbackStore:
+    """Stores correction annotations in JSONL format."""
+    def __init__(self, storage_dir: str = "cache"):
+        """Initialize feedback store.
+        Args:
+            storage_dir: Directory to store annotations file
+        """
+        self.storage_dir = Path(storage_dir)
+        self.storage_dir.mkdir(parents=True, exist_ok=True)
+        self.annotations_file = self.storage_dir / "correction_annotations.jsonl"
+        # Ensure file exists
+        if not self.annotations_file.exists():
+            self.annotations_file.touch()
+            logger.info(f"Created annotations file: {self.annotations_file}")
+    def save_annotation(self, annotation: CorrectionAnnotation) -> bool:
+        """Save a single annotation to the JSONL file.
+        Args:
+            annotation: CorrectionAnnotation to save
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            # Convert to dict and handle datetime serialization
+            data = annotation.model_dump()
+            data['timestamp'] = data['timestamp'].isoformat()
+            # Append to JSONL file
+            with open(self.annotations_file, 'a', encoding='utf-8') as f:
+                f.write(json.dumps(data, ensure_ascii=False) + '\n')
+            logger.debug(f"Saved annotation {annotation.annotation_id}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to save annotation: {e}")
+            return False
+    def save_annotations(self, annotations: List[CorrectionAnnotation]) -> int:
+        """Save multiple annotations.
+        Args:
+            annotations: List of annotations to save
+        Returns:
+            Number of annotations successfully saved
+        """
+        saved = 0
+        for annotation in annotations:
+            if self.save_annotation(annotation):
+                saved += 1
+        return saved
+    def get_all_annotations(self) -> List[CorrectionAnnotation]:
+        """Load all annotations from the JSONL file.
+        Returns:
+            List of CorrectionAnnotation objects
+        """
+        annotations = []
+        if not self.annotations_file.exists():
+            return annotations
+        try:
+            with open(self.annotations_file, 'r', encoding='utf-8') as f:
+                for line_num, line in enumerate(f, 1):
+                    line = line.strip()
+                    if not line:
+                        continue
+                    try:
+                        data = json.loads(line)
+                        # Parse timestamp if string
+                        if isinstance(data.get('timestamp'), str):
+                            data['timestamp'] = datetime.fromisoformat(data['timestamp'])
+                        annotation = CorrectionAnnotation.model_validate(data)
+                        annotations.append(annotation)
+                    except Exception as e:
+                        logger.warning(f"Failed to parse annotation on line {line_num}: {e}")
+                        continue
+            logger.debug(f"Loaded {len(annotations)} annotations")
+            return annotations
+        except Exception as e:
+            logger.error(f"Failed to load annotations: {e}")
+            return []
+    def get_annotations_by_song(self, audio_hash: str) -> List[CorrectionAnnotation]:
+        """Get all annotations for a specific song.
+        Args:
+            audio_hash: Hash of the audio file
+        Returns:
+            List of annotations for that song
+        """
+        all_annotations = self.get_all_annotations()
+        return [a for a in all_annotations if a.audio_hash == audio_hash]
+    def get_annotations_by_category(self, category: str) -> List[CorrectionAnnotation]:
+        """Get all annotations of a specific type.
+        Args:
+            category: Annotation type category
+        Returns:
+            List of annotations of that type
+        """
+        all_annotations = self.get_all_annotations()
+        return [a for a in all_annotations if a.annotation_type == category]
+    def get_statistics(self) -> AnnotationStatistics:
+        """Generate aggregated statistics from all annotations.
+        Returns:
+            AnnotationStatistics object with aggregated data
+        """
+        annotations = self.get_all_annotations()
+        if not annotations:
+            return AnnotationStatistics()
+        # Count by type
+        type_counts = Counter(a.annotation_type for a in annotations)
+        # Count by action
+        action_counts = Counter(a.action_taken for a in annotations)
+        # Average confidence
+        avg_confidence = sum(a.confidence for a in annotations) / len(annotations)
+        # Agentic agreement rate
+        agentic_proposals = [a for a in annotations if a.agentic_proposal is not None]
+        if agentic_proposals:
+            agentic_agreement_rate = sum(1 for a in agentic_proposals if a.agentic_agreed) / len(agentic_proposals)
+        else:
+            agentic_agreement_rate = 0.0
+        # Most common error patterns
+        error_patterns = defaultdict(list)
+        for a in annotations:
+            if a.action_taken != "NO_ACTION":
+                pattern = f"{a.original_text} -> {a.corrected_text}"
+                error_patterns[pattern].append(a)
+        most_common = [
+            {
+                "pattern": pattern,
+                "count": len(anns),
+                "annotation_type": anns[0].annotation_type
+            }
+            for pattern, anns in sorted(error_patterns.items(), key=lambda x: len(x[1]), reverse=True)[:10]
+        ]
+        # Unique songs
+        unique_hashes = set(a.audio_hash for a in annotations)
+        return AnnotationStatistics(
+            total_annotations=len(annotations),
+            annotations_by_type={k: v for k, v in type_counts.items()},
+            annotations_by_action={k: v for k, v in action_counts.items()},
+            average_confidence=avg_confidence,
+            agentic_agreement_rate=agentic_agreement_rate,
+            most_common_errors=most_common,
+            songs_annotated=len(unique_hashes)
+        )
+    def export_to_training_data(self, output_file: Optional[Path] = None) -> Path:
+        """Export annotations in a format suitable for model fine-tuning.
+        Args:
+            output_file: Optional path for output file
+        Returns:
+            Path to the exported file
+        """
+        if output_file is None:
+            output_file = self.storage_dir / "training_data.jsonl"
+        annotations = self.get_all_annotations()
+        # Filter to high-confidence annotations (4-5 rating)
+        high_confidence = [a for a in annotations if a.confidence >= 4.0]
+        with open(output_file, 'w', encoding='utf-8') as f:
+            for annotation in high_confidence:
+                # Create a training example with input/output format
+                training_example = {
+                    "input": {
+                        "original_text": annotation.original_text,
+                        "annotation_type": annotation.annotation_type,
+                        "artist": annotation.artist,
+                        "title": annotation.title,
+                        "reference_sources": annotation.reference_sources_consulted
+                    },
+                    "output": {
+                        "action": annotation.action_taken,
+                        "corrected_text": annotation.corrected_text,
+                        "reasoning": annotation.reasoning
+                    },
+                    "metadata": {
+                        "confidence": annotation.confidence,
+                        "annotation_id": annotation.annotation_id,
+                        "timestamp": annotation.timestamp.isoformat()
+                    }
+                }
+                f.write(json.dumps(training_example, ensure_ascii=False) + '\n')
+        logger.info(f"Exported {len(high_confidence)} training examples to {output_file}")
+        return output_file

lyrics_transcriber/correction/handlers/__init__.py ADDED Viewed

File without changes

lyrics_transcriber/correction/handlers/base.py ADDED Viewed

@@ -0,0 +1,52 @@
+from abc import ABC, abstractmethod
+from typing import List, Optional, Tuple, Dict, Any
+import logging
+from lyrics_transcriber.types import GapSequence, WordCorrection
+class GapCorrectionHandler(ABC):
+    """Base class for gap correction handlers."""
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        self.logger = logger or logging.getLogger(__name__)
+    @abstractmethod
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        """Check if this handler can process the given gap.
+        Args:
+            gap: The gap sequence to check
+            data: Optional dictionary containing additional data like word_map
+        Returns:
+            Tuple of (can_handle, handler_data)
+        """
+        pass
+    @abstractmethod
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        """Process the gap and return any corrections.
+        Args:
+            gap: The gap sequence to process
+            data: Optional dictionary containing additional data like word_map
+        Returns:
+            List of corrections to apply
+        """
+        pass
+    def _validate_data(self, data: Optional[Dict[str, Any]]) -> bool:
+        """Validate that required data is present.
+        Args:
+            data: The data dictionary to validate
+        Returns:
+            True if data is valid, False otherwise
+        """
+        if not data or "word_map" not in data:
+            self.logger.error("No word_map provided in data")
+            return False
+        return True

lyrics_transcriber/correction/handlers/extend_anchor.py ADDED Viewed

@@ -0,0 +1,149 @@
+from typing import List, Optional, Tuple, Dict, Any
+import logging
+from lyrics_transcriber.types import GapSequence, WordCorrection, Word
+from lyrics_transcriber.correction.handlers.base import GapCorrectionHandler
+from lyrics_transcriber.correction.handlers.word_operations import WordOperations
+class ExtendAnchorHandler(GapCorrectionHandler):
+    """Handles gaps where some words match reference text but there are extra words.
+    This handler looks for cases where:
+    1. One or more words in the gap match words in the same position in at least one reference source
+    2. The gap may contain additional words that aren't in the reference
+    When such matches are found, it:
+    1. Validates all matching words (creates corrections that keep the same words)
+    2. Leaves all non-matching words unchanged for other handlers to process
+    The confidence of validations is based on the ratio of reference sources that agree.
+    For example, if 2 out of 4 sources have the matching word, confidence will be 0.5.
+    Examples:
+        Gap: "hello world extra words"
+        References:
+            genius: ["hello", "world"]
+            spotify: ["hello", "world"]
+        Result:
+            - Validate "hello" (confidence=1.0)
+            - Validate "world" (confidence=1.0)
+            - Leave "extra" and "words" unchanged
+        Gap: "martyr youre a"
+        References:
+            genius: ["martyr"]
+            spotify: ["mother"]
+        Result:
+            - Validate "martyr" (confidence=0.5, source="genius")
+            - Leave "youre" and "a" unchanged
+    """
+    def __init__(self, logger: Optional[logging.Logger] = None):
+        self.logger = logger or logging.getLogger(__name__)
+    def can_handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> Tuple[bool, Dict[str, Any]]:
+        """Check if this gap can be handled by extending anchor sequences."""
+        # Must have reference word IDs
+        if not gap.reference_word_ids:
+            self.logger.debug("No reference word IDs available.")
+            return False, {}
+        # Gap must have word IDs
+        if not gap.transcribed_word_ids:
+            self.logger.debug("No word IDs in the gap to process.")
+            return False, {}
+        # Must have word map to resolve IDs to actual words
+        if not self._validate_data(data):
+            return False, {}
+        word_map = data["word_map"]
+        # At least one word must match between gap and any reference source by text content
+        has_match = False
+        for i, trans_word_id in enumerate(gap.transcribed_word_ids):
+            if trans_word_id not in word_map:
+                continue
+            trans_word = word_map[trans_word_id]
+            # Check if this word matches any reference word at the same position
+            for ref_word_ids in gap.reference_word_ids.values():
+                if i < len(ref_word_ids):
+                    ref_word_id = ref_word_ids[i]
+                    if ref_word_id in word_map:
+                        ref_word = word_map[ref_word_id]
+                        if trans_word.text.lower() == ref_word.text.lower():
+                            has_match = True
+                            break
+            if has_match:
+                break
+        self.logger.debug(f"Can handle gap: {has_match}")
+        return has_match, {"word_map": word_map}
+    def handle(self, gap: GapSequence, data: Optional[Dict[str, Any]] = None) -> List[WordCorrection]:
+        corrections = []
+        # Get word lookup map from data
+        if not self._validate_data(data):
+            return []
+        word_map = data["word_map"]
+        # Process each word in the gap that has a corresponding reference position
+        for i, word_id in enumerate(gap.transcribed_word_ids):
+            # Get the actual word object
+            if word_id not in word_map:
+                self.logger.error(f"Word ID {word_id} not found in word_map")
+                continue
+            word = word_map[word_id]
+            # Find reference sources that have a matching word (by text) at this position
+            matching_sources = []
+            corrected_word_id = None
+            for source, ref_word_ids in gap.reference_word_ids.items():
+                if i < len(ref_word_ids):
+                    ref_word_id = ref_word_ids[i]
+                    if ref_word_id in word_map:
+                        ref_word = word_map[ref_word_id]
+                        if word.text.lower() == ref_word.text.lower():
+                            matching_sources.append(source)
+                            if corrected_word_id is None:
+                                corrected_word_id = ref_word_id
+            if not matching_sources:
+                self.logger.debug(f"Skipping word '{word.text}' at position {i} - no matching references")
+                continue
+            # Word matches reference(s) at this position - validate it
+            confidence = len(matching_sources) / len(gap.reference_word_ids)
+            sources = ", ".join(matching_sources)
+            # Get base reference positions
+            base_reference_positions = WordOperations.calculate_reference_positions(gap, matching_sources)
+            # Adjust reference positions based on the word's position in the reference text
+            reference_positions = {}
+            for source in matching_sources:
+                if source in base_reference_positions:
+                    reference_positions[source] = base_reference_positions[source] + i
+            corrections.append(
+                WordOperations.create_word_replacement_correction(
+                    original_word=word.text,
+                    corrected_word=word.text,
+                    original_position=gap.transcription_position + i,
+                    source=sources,
+                    confidence=confidence,
+                    reason="Matched reference source(s)",
+                    reference_positions=reference_positions,
+                    handler="ExtendAnchorHandler",
+                    original_word_id=word_id,
+                    corrected_word_id=corrected_word_id,
+                )
+            )
+            self.logger.debug(f"Validated word '{word.text}' with confidence {confidence} from sources: {sources}")
+        return corrections

karaoke-gen 0.57.0__py3-none-any.whl → 0.71.27__py3-none-any.whl

karaoke-gen 0.57.0py3-none-any.whl → 0.71.27py3-none-any.whl