PyPI - lyrics-transcriber - Versions diffs - 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

lyrics-transcriber 0.37.0py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts CHANGED Viewed

@@ -19,6 +19,7 @@ export interface UseWordClickProps {
     onWordClick?: (info: WordClickInfo) => void
     isReference?: boolean
     currentSource?: string
+    gaps?: GapSequence[]
 }
 export function useWordClick({
@@ -26,7 +27,8 @@ export function useWordClick({
     onElementClick,
     onWordClick,
     isReference,
-    currentSource
+    currentSource,
+    gaps = []
 }: UseWordClickProps) {
     const handleWordClick = useCallback((
         word: string,
@@ -47,21 +49,20 @@ export function useWordClick({
                     wordIds: anchor.word_ids,
                     length: anchor.length,
                     words: anchor.words,
-                    referenceWordIds: anchor.reference_word_ids
+                    referenceWordIds: anchor.reference_word_ids,
+                    matchesWordId: isReference
+                        ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
+                        : anchor.word_ids.includes(wordId)
                 },
                 gapInfo: gap && {
                     wordIds: gap.word_ids,
                     length: gap.length,
                     words: gap.words,
-                    corrections: gap.corrections.map(c => ({
-                        original_word: c.original_word,
-                        corrected_word: c.corrected_word,
-                        word_id: c.word_id,
-                        length: c.length,
-                        is_deletion: c.is_deletion,
-                        split_index: c.split_index,
-                        split_total: c.split_total
-                    }))
+                    referenceWords: gap.reference_words,
+                    corrections: gap.corrections,
+                    matchesWordId: isReference
+                        ? gap.reference_words[currentSource!]?.includes(wordId)
+                        : gap.word_ids.includes(wordId)
                 },
                 belongsToAnchor: anchor && (
                     isReference
@@ -78,6 +79,28 @@ export function useWordClick({
             }
         }, null, 2))
+        // For reference view clicks, find the corresponding gap
+        if (isReference && currentSource) {
+            // Extract position from wordId (e.g., "genius-word-3" -> 3)
+            const position = parseInt(wordId.split('-').pop() || '', 10);
+            // Find gap that has a correction matching this reference position
+            const matchingGap = gaps?.find(g =>
+                g.corrections.some(c => {
+                    const refPosition = c.reference_positions?.[currentSource];
+                    return typeof refPosition === 'number' && refPosition === position;
+                })
+            );
+            if (matchingGap) {
+                console.log('Found matching gap for reference click:', {
+                    position,
+                    gap: matchingGap
+                });
+                gap = matchingGap;
+            }
+        }
         const belongsToAnchor = anchor && (
             isReference
                 ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
@@ -86,17 +109,58 @@ export function useWordClick({
         const belongsToGap = gap && (
             isReference
-                ? gap.corrections.some(c => c.word_id === wordId)
+                ? gap.corrections.some(c => {
+                    const refPosition = c.reference_positions?.[currentSource!];
+                    const clickedPosition = parseInt(wordId.split('-').pop() || '', 10);
+                    return typeof refPosition === 'number' && refPosition === clickedPosition;
+                })
                 : gap.word_ids.includes(wordId)
         )
         if (mode === 'highlight' || mode === 'edit') {
-            onWordClick?.({
-                word_id: wordId,
-                type: belongsToAnchor ? 'anchor' : belongsToGap ? 'gap' : 'other',
-                anchor: belongsToAnchor ? anchor : undefined,
-                gap: belongsToGap ? gap : undefined
-            })
+            if (belongsToAnchor && anchor) {
+                onWordClick?.({
+                    word_id: wordId,
+                    type: 'anchor',
+                    anchor,
+                    gap: undefined
+                })
+            } else if (belongsToGap && gap) {
+                // Create highlight info that includes both transcription and reference IDs
+                const referenceWords: Record<string, string[]> = {};
+                // For each correction in the gap, add its reference positions
+                gap.corrections.forEach(correction => {
+                    Object.entries(correction.reference_positions || {}).forEach(([source, position]) => {
+                        if (typeof position === 'number') {
+                            const refId = `${source}-word-${position}`;
+                            if (!referenceWords[source]) {
+                                referenceWords[source] = [];
+                            }
+                            if (!referenceWords[source].includes(refId)) {
+                                referenceWords[source].push(refId);
+                            }
+                        }
+                    });
+                });
+                onWordClick?.({
+                    word_id: wordId,
+                    type: 'gap',
+                    anchor: undefined,
+                    gap: {
+                        ...gap,
+                        reference_words: referenceWords // Use reference_words instead of reference_word_ids
+                    }
+                })
+            } else {
+                onWordClick?.({
+                    word_id: wordId,
+                    type: 'other',
+                    anchor: undefined,
+                    gap: undefined
+                })
+            }
         } else if (mode === 'details') {
             if (belongsToAnchor && anchor) {
                 onElementClick({
@@ -139,7 +203,7 @@ export function useWordClick({
                 })
             }
         }
-    }, [mode, onWordClick, onElementClick, isReference, currentSource])
+    }, [mode, onWordClick, onElementClick, isReference, currentSource, gaps])
     return { handleWordClick }
 }

lyrics_transcriber/frontend/src/components/shared/types.ts CHANGED Viewed

@@ -35,7 +35,10 @@ export interface TranscriptionWordPosition extends BaseWordPosition {
         start_time?: number
         end_time?: number
     }
+    type: 'anchor' | 'gap' | 'other'
+    sequence?: AnchorSequence | GapSequence
     isInRange: boolean
+    isCorrected?: boolean
 }
 // Reference-specific word position with simple string word

lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx CHANGED Viewed

@@ -54,16 +54,57 @@ function findWordIdsForSequence(
     return allWords.slice(startIndex, endIndex).map(word => word.id);
 }
-// Helper function to find word ID for a correction
+// Add this at the top of the file
+const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
+    const allWords = segments.flatMap(s => s.words);
+    console.log('Word ID Assignment:', {
+        searchingFor: correction.original_word,
+        allWordsWithIds: allWords.map(w => ({
+            text: w.text,
+            id: w.id
+        })),
+        matchedId: foundId,
+        matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
+    });
+};
+// Modify findWordIdForCorrection to include logging
 function findWordIdForCorrection(
     segments: LyricsSegment[],
-    correction: { original_word: string; }
+    correction: {
+        original_word: string;
+        original_position?: number;
+    }
 ): string {
+    const allWords = segments.flatMap(s => s.words);
+    // If we have position information, use it to find the exact word
+    if (typeof correction.original_position === 'number') {
+        const word = allWords[correction.original_position];
+        if (word && word.text === correction.original_word) {
+            logWordMatching(segments, correction, word.id);
+            return word.id;
+        }
+    }
+    // Fallback to finding by text (but log a warning)
     for (const segment of segments) {
         const word = segment.words.find(w => w.text === correction.original_word);
-        if (word) return word.id;
+        if (word) {
+            console.warn(
+                'Warning: Had to find word by text match rather than position.',
+                correction.original_word,
+                'Consider using position information for more accurate matching.'
+            );
+            logWordMatching(segments, correction, word.id);
+            return word.id;
+        }
     }
-    return nanoid(); // Fallback if word not found
+    const newId = nanoid();
+    logWordMatching(segments, correction, null);
+    console.log('Generated new ID:', newId, 'for word:', correction.original_word);
+    return newId;
 }
 // Helper function to find word IDs in reference text
@@ -130,15 +171,30 @@ export function initializeDataWithIds(data: CorrectionData): CorrectionData {
     // Update gap sequences to use word IDs
     newData.gap_sequences = newData.gap_sequences.map((gap) => {
         const serverGap = gap as unknown as ServerData;
+        console.log('Processing gap sequence:', {
+            words: gap.words,
+            word_ids: gap.word_ids,
+            corrections: gap.corrections,
+            foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
+        });
         return {
             ...gap,
             id: gap.id || nanoid(),
             word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
-            corrections: gap.corrections.map((correction: WordCorrection) => ({
-                ...correction,
-                id: correction.id || nanoid(),
-                word_id: correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction)
-            }))
+            corrections: gap.corrections.map((correction: WordCorrection) => {
+                const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
+                console.log('Correction word ID assignment:', {
+                    original_word: correction.original_word,
+                    corrected_word: correction.corrected_word,
+                    assigned_id: wordId
+                });
+                return {
+                    ...correction,
+                    id: correction.id || nanoid(),
+                    word_id: wordId
+                };
+            })
         } as GapSequence;
     });

lyrics_transcriber/frontend/vite.config.js CHANGED Viewed

@@ -3,4 +3,8 @@ import react from '@vitejs/plugin-react';
 // https://vite.dev/config/
 export default defineConfig({
     plugins: [react()],
+    build: {
+        minify: false,
+        sourcemap: true,
+    }
 });

lyrics_transcriber/frontend/vite.config.ts CHANGED Viewed

@@ -4,4 +4,8 @@ import react from '@vitejs/plugin-react'
 // https://vite.dev/config/
 export default defineConfig({
   plugins: [react()],
+  build: {
+    minify: false,
+    sourcemap: true,
+  }
 })

lyrics_transcriber/lyrics/genius.py CHANGED Viewed

@@ -82,19 +82,48 @@ class GeniusProvider(BaseLyricsProvider):
     def _clean_lyrics(self, lyrics: str) -> str:
         """Clean and process lyrics from Genius to remove unwanted content."""
+        self.logger.debug("Starting lyrics cleaning process")
+        original = lyrics
         lyrics = lyrics.replace("\\n", "\n")
         lyrics = re.sub(r"You might also like", "", lyrics)
-        lyrics = re.sub(
-            r".*?Lyrics([A-Z])", r"\1", lyrics
-        )  # Remove the song name and word "Lyrics" if this has a non-newline char at the start
-        lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)  # Remove this example: 27 ContributorsSex Bomb Lyrics
-        lyrics = re.sub(
-            r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics
-        )  # Remove this example: See Tom Jones LiveGet tickets as low as $71
-        lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)  # Remove the word "Embed" at end of line with preceding numbers if found
-        lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)  # Remove the word "Embed" if it has been tacked onto a word at the end of a line
-        lyrics = re.sub(r"^Embed$", r"", lyrics)  # Remove the word "Embed" if it has been tacked onto a word at the end of a line
-        lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics)  # Remove lines containing square brackets
-        # add any additional cleaning rules here
+        if original != lyrics:
+            self.logger.debug("Removed 'You might also like' text")
+        original = lyrics
+        lyrics = re.sub(r".*?Lyrics([A-Z])", r"\1", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed song name and 'Lyrics' prefix")
+        original = lyrics
+        lyrics = re.sub(r"^[0-9]* Contributors.*Lyrics", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed contributors count and 'Lyrics' text")
+        original = lyrics
+        lyrics = re.sub(r"See.*Live.*Get tickets as low as \$[0-9]+", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed ticket sales text")
+        original = lyrics
+        lyrics = re.sub(r"[0-9]+Embed$", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed numbered embed marker")
+        original = lyrics
+        lyrics = re.sub(r"(\S)Embed$", r"\1", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed 'Embed' suffix from word")
+        original = lyrics
+        lyrics = re.sub(r"^Embed$", r"", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed standalone 'Embed' text")
+        original = lyrics
+        lyrics = re.sub(r".*?\[.*?\].*?", "", lyrics)
+        if original != lyrics:
+            self.logger.debug("Removed lines containing square brackets")
+        self.logger.debug("Completed lyrics cleaning process")
         return lyrics

lyrics_transcriber/output/cdg.py CHANGED Viewed

@@ -189,6 +189,7 @@ class CDGGenerator:
         """Compose CDG using KaraokeComposer."""
         kc = KaraokeComposer.from_file(toml_file)
         kc.compose()
+        kc.create_mp4(height=1080, fps=30)
     def _find_cdg_zip(self, artist: str, title: str) -> str:
         """Find the generated CDG ZIP file."""

lyrics-transcriber 0.37.0__py3-none-any.whl → 0.39.0__py3-none-any.whl

lyrics-transcriber 0.37.0py3-none-any.whl → 0.39.0py3-none-any.whl