PyPI - lyrics-transcriber - Versions diffs - 0.36.1__py3-none-any.whl → 0.39.0__py3-none-any.whl - Mend

lyrics-transcriber 0.36.1py3-none-any.whl → 0.39.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

lyrics_transcriber/frontend/src/components/shared/hooks/useWordClick.ts CHANGED Viewed

@@ -3,12 +3,23 @@ import { AnchorSequence, GapSequence, InteractionMode } from '../../../types'
 import { ModalContent } from '../../LyricsAnalyzer'
 import { WordClickInfo } from '../types'
+// Define debug info type
+interface WordDebugInfo {
+    wordSplitInfo?: {
+        text: string
+        startIndex: number
+        endIndex: number
+    }
+    nearbyAnchors?: AnchorSequence[]
+}
 export interface UseWordClickProps {
     mode: InteractionMode
     onElementClick: (content: ModalContent) => void
     onWordClick?: (info: WordClickInfo) => void
     isReference?: boolean
     currentSource?: string
+    gaps?: GapSequence[]
 }
 export function useWordClick({
@@ -16,87 +27,147 @@ export function useWordClick({
     onElementClick,
     onWordClick,
     isReference,
-    currentSource
+    currentSource,
+    gaps = []
 }: UseWordClickProps) {
     const handleWordClick = useCallback((
         word: string,
-        position: number,
+        wordId: string,
         anchor?: AnchorSequence,
         gap?: GapSequence,
-        debugInfo?: any
+        debugInfo?: WordDebugInfo
     ) => {
         console.log(JSON.stringify({
             debug: {
                 clickedWord: word,
-                position,
+                wordId,
                 isReference,
                 currentSource,
                 wordInfo: debugInfo?.wordSplitInfo,
                 nearbyAnchors: debugInfo?.nearbyAnchors,
                 anchorInfo: anchor && {
-                    transcriptionPos: anchor.transcription_position,
+                    wordIds: anchor.word_ids,
                     length: anchor.length,
                     words: anchor.words,
-                    refPositions: anchor.reference_positions
+                    referenceWordIds: anchor.reference_word_ids,
+                    matchesWordId: isReference
+                        ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
+                        : anchor.word_ids.includes(wordId)
                 },
                 gapInfo: gap && {
-                    transcriptionPos: gap.transcription_position,
+                    wordIds: gap.word_ids,
                     length: gap.length,
                     words: gap.words,
-                    corrections: gap.corrections.map(c => ({
-                        length: c.length,
-                        refPositions: c.reference_positions
-                    }))
+                    referenceWords: gap.reference_words,
+                    corrections: gap.corrections,
+                    matchesWordId: isReference
+                        ? gap.reference_words[currentSource!]?.includes(wordId)
+                        : gap.word_ids.includes(wordId)
                 },
                 belongsToAnchor: anchor && (
                     isReference
-                        ? position >= (anchor.reference_positions[currentSource!] ?? -1) &&
-                        position < ((anchor.reference_positions[currentSource!] ?? -1) + anchor.length)
-                        : position >= anchor.transcription_position &&
-                        position < (anchor.transcription_position + anchor.length)
+                        ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
+                        : anchor.word_ids.includes(wordId)
                 ),
                 belongsToGap: gap && (
                     isReference
-                        ? gap.corrections[0]?.reference_positions?.[currentSource!] !== undefined &&
-                        position >= (gap.corrections[0].reference_positions![currentSource!]) &&
-                        position < (gap.corrections[0].reference_positions![currentSource!] + gap.corrections[0].length)
-                        : position >= gap.transcription_position &&
-                        position < (gap.transcription_position + gap.length)
-                )
+                        ? gap.corrections.some(c => c.word_id === wordId)
+                        : gap.word_ids.includes(wordId)
+                ),
+                wordIndexInGap: gap && gap.words.indexOf(word),
+                hasMatchingCorrection: gap && gap.corrections.some(c => c.word_id === wordId)
             }
         }, null, 2))
+        // For reference view clicks, find the corresponding gap
+        if (isReference && currentSource) {
+            // Extract position from wordId (e.g., "genius-word-3" -> 3)
+            const position = parseInt(wordId.split('-').pop() || '', 10);
+            // Find gap that has a correction matching this reference position
+            const matchingGap = gaps?.find(g =>
+                g.corrections.some(c => {
+                    const refPosition = c.reference_positions?.[currentSource];
+                    return typeof refPosition === 'number' && refPosition === position;
+                })
+            );
+            if (matchingGap) {
+                console.log('Found matching gap for reference click:', {
+                    position,
+                    gap: matchingGap
+                });
+                gap = matchingGap;
+            }
+        }
         const belongsToAnchor = anchor && (
             isReference
-                ? position >= (anchor.reference_positions[currentSource!] ?? -1) &&
-                position < ((anchor.reference_positions[currentSource!] ?? -1) + anchor.length)
-                : position >= anchor.transcription_position &&
-                position < (anchor.transcription_position + anchor.length)
+                ? anchor.reference_word_ids[currentSource!]?.includes(wordId)
+                : anchor.word_ids.includes(wordId)
         )
         const belongsToGap = gap && (
             isReference
-                ? gap.corrections[0]?.reference_positions?.[currentSource!] !== undefined &&
-                position >= (gap.corrections[0].reference_positions![currentSource!]) &&
-                position < (gap.corrections[0].reference_positions![currentSource!] + gap.corrections[0].length)
-                : position >= gap.transcription_position &&
-                position < (gap.transcription_position + gap.length)
+                ? gap.corrections.some(c => {
+                    const refPosition = c.reference_positions?.[currentSource!];
+                    const clickedPosition = parseInt(wordId.split('-').pop() || '', 10);
+                    return typeof refPosition === 'number' && refPosition === clickedPosition;
+                })
+                : gap.word_ids.includes(wordId)
         )
         if (mode === 'highlight' || mode === 'edit') {
-            onWordClick?.({
-                wordIndex: position,
-                type: belongsToAnchor ? 'anchor' : belongsToGap ? 'gap' : 'other',
-                anchor: belongsToAnchor ? anchor : undefined,
-                gap: belongsToGap ? gap : undefined
-            })
+            if (belongsToAnchor && anchor) {
+                onWordClick?.({
+                    word_id: wordId,
+                    type: 'anchor',
+                    anchor,
+                    gap: undefined
+                })
+            } else if (belongsToGap && gap) {
+                // Create highlight info that includes both transcription and reference IDs
+                const referenceWords: Record<string, string[]> = {};
+                // For each correction in the gap, add its reference positions
+                gap.corrections.forEach(correction => {
+                    Object.entries(correction.reference_positions || {}).forEach(([source, position]) => {
+                        if (typeof position === 'number') {
+                            const refId = `${source}-word-${position}`;
+                            if (!referenceWords[source]) {
+                                referenceWords[source] = [];
+                            }
+                            if (!referenceWords[source].includes(refId)) {
+                                referenceWords[source].push(refId);
+                            }
+                        }
+                    });
+                });
+                onWordClick?.({
+                    word_id: wordId,
+                    type: 'gap',
+                    anchor: undefined,
+                    gap: {
+                        ...gap,
+                        reference_words: referenceWords // Use reference_words instead of reference_word_ids
+                    }
+                })
+            } else {
+                onWordClick?.({
+                    word_id: wordId,
+                    type: 'other',
+                    anchor: undefined,
+                    gap: undefined
+                })
+            }
         } else if (mode === 'details') {
             if (belongsToAnchor && anchor) {
                 onElementClick({
                     type: 'anchor',
                     data: {
                         ...anchor,
-                        position,
+                        wordId,
                         word
                     }
                 })
@@ -105,16 +176,17 @@ export function useWordClick({
                     type: 'gap',
                     data: {
                         ...gap,
-                        position,
+                        wordId,
                         word
                     }
                 })
             } else if (!isReference) {
                 // Create synthetic gap for non-sequence words (transcription view only)
                 const syntheticGap: GapSequence = {
+                    id: `synthetic-${wordId}`,
                     text: word,
                     words: [word],
-                    transcription_position: position,
+                    word_ids: [wordId],
                     length: 1,
                     corrections: [],
                     preceding_anchor: null,
@@ -125,13 +197,13 @@ export function useWordClick({
                     type: 'gap',
                     data: {
                         ...syntheticGap,
-                        position: 0,
+                        wordId,
                         word
                     }
                 })
             }
         }
-    }, [mode, onWordClick, onElementClick, isReference, currentSource])
+    }, [mode, onWordClick, onElementClick, isReference, currentSource, gaps])
     return { handleWordClick }
 }

lyrics_transcriber/frontend/src/components/shared/types.ts CHANGED Viewed

@@ -6,7 +6,7 @@ export type FlashType = 'anchor' | 'corrected' | 'uncorrected' | 'word' | null
 // Common word click handling
 export interface WordClickInfo {
-    wordIndex: number
+    word_id: string
     type: 'anchor' | 'gap' | 'other'
     anchor?: AnchorSequence
     gap?: GapSequence
@@ -29,13 +29,16 @@ export interface BaseWordPosition {
 // Transcription-specific word position with timing info
 export interface TranscriptionWordPosition extends BaseWordPosition {
-    position: number
-    isInRange: boolean
     word: {
+        id: string
         text: string
         start_time?: number
         end_time?: number
     }
+    type: 'anchor' | 'gap' | 'other'
+    sequence?: AnchorSequence | GapSequence
+    isInRange: boolean
+    isCorrected?: boolean
 }
 // Reference-specific word position with simple string word

lyrics_transcriber/frontend/src/components/shared/utils/initializeDataWithIds.tsx ADDED Viewed

@@ -0,0 +1,202 @@
+import { CorrectionData, LyricsSegment, Word, AnchorSequence, GapSequence, WordCorrection } from '@/types';
+import { nanoid } from 'nanoid';
+// Define server-side types just for this file
+interface ServerData {
+    transcription_position: number;
+    length: number;
+    words: string[];
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    [key: string]: any;
+}
+export function normalizeDataForSubmission(data: CorrectionData): CorrectionData {
+    // Create a deep clone to avoid modifying the original
+    const normalized = JSON.parse(JSON.stringify(data));
+    // Preserve floating point numbers with original precision
+    const preserveFloats = (obj: Record<string, unknown>): void => {
+        for (const key in obj) {
+            const value = obj[key];
+            if (typeof value === 'number') {
+                // Handle integers and floats differently
+                let formatted: string;
+                if (Number.isInteger(value)) {
+                    formatted = value.toFixed(1); // Force decimal point for integers
+                } else {
+                    formatted = value.toString(); // Keep original precision for floats
+                }
+                obj[key] = parseFloat(formatted);
+            } else if (typeof value === 'object' && value !== null) {
+                preserveFloats(value as Record<string, unknown>);
+            }
+        }
+    };
+    preserveFloats(normalized);
+    return normalized;
+}
+// Helper function to find word IDs for a sequence based on original positions
+function findWordIdsForSequence(
+    segments: LyricsSegment[],
+    sequence: ServerData
+): string[] {
+    const allWords = segments.flatMap(s => s.words);
+    const startIndex = sequence.transcription_position;
+    const endIndex = startIndex + sequence.length;
+    console.log('Finding word IDs for sequence:', JSON.stringify({
+        position: sequence.transcription_position,
+        length: sequence.length,
+        words: allWords.slice(startIndex, endIndex).map(w => w.text)
+    }));
+    return allWords.slice(startIndex, endIndex).map(word => word.id);
+}
+// Add this at the top of the file
+const logWordMatching = (segments: LyricsSegment[], correction: { original_word: string }, foundId: string | null) => {
+    const allWords = segments.flatMap(s => s.words);
+    console.log('Word ID Assignment:', {
+        searchingFor: correction.original_word,
+        allWordsWithIds: allWords.map(w => ({
+            text: w.text,
+            id: w.id
+        })),
+        matchedId: foundId,
+        matchedWord: foundId ? allWords.find(w => w.id === foundId)?.text : null
+    });
+};
+// Modify findWordIdForCorrection to include logging
+function findWordIdForCorrection(
+    segments: LyricsSegment[],
+    correction: {
+        original_word: string;
+        original_position?: number;
+    }
+): string {
+    const allWords = segments.flatMap(s => s.words);
+    // If we have position information, use it to find the exact word
+    if (typeof correction.original_position === 'number') {
+        const word = allWords[correction.original_position];
+        if (word && word.text === correction.original_word) {
+            logWordMatching(segments, correction, word.id);
+            return word.id;
+        }
+    }
+    // Fallback to finding by text (but log a warning)
+    for (const segment of segments) {
+        const word = segment.words.find(w => w.text === correction.original_word);
+        if (word) {
+            console.warn(
+                'Warning: Had to find word by text match rather than position.',
+                correction.original_word,
+                'Consider using position information for more accurate matching.'
+            );
+            logWordMatching(segments, correction, word.id);
+            return word.id;
+        }
+    }
+    const newId = nanoid();
+    logWordMatching(segments, correction, null);
+    console.log('Generated new ID:', newId, 'for word:', correction.original_word);
+    return newId;
+}
+// Helper function to find word IDs in reference text
+function findReferenceWordIds(
+    referenceSource: string,
+    sequence: ServerData
+): string[] {
+    const referencePosition = sequence.reference_positions?.[referenceSource];
+    if (typeof referencePosition !== 'number') {
+        return [];
+    }
+    // Generate IDs in the same format as HighlightedText
+    const wordIds = Array.from({ length: sequence.length },
+        (_, i) => `${referenceSource}-word-${referencePosition + i}`
+    );
+    return wordIds;
+}
+export function initializeDataWithIds(data: CorrectionData): CorrectionData {
+    const newData = JSON.parse(JSON.stringify(data)) as CorrectionData;
+    // Initialize segment and word IDs
+    newData.corrected_segments = newData.corrected_segments.map((segment: LyricsSegment) => ({
+        ...segment,
+        id: segment.id || nanoid(),
+        words: segment.words.map((word: Word) => ({
+            ...word,
+            id: word.id || nanoid()
+        }))
+    }));
+    console.log('Segments after ID initialization:', JSON.stringify({
+        segmentCount: newData.corrected_segments.length,
+        totalWords: newData.corrected_segments.reduce((sum, seg) => sum + seg.words.length, 0),
+        sampleWords: newData.corrected_segments[0].words.map(w => ({ id: w.id, text: w.text }))
+    }));
+    // Update anchor sequences with word IDs based on positions
+    newData.anchor_sequences = newData.anchor_sequences.map((anchor) => {
+        const serverAnchor = anchor as unknown as ServerData;
+        // Get reference word IDs for each source
+        const referenceWordIds: Record<string, string[]> = {};
+        Object.keys(data.reference_texts || {}).forEach(source => {
+            referenceWordIds[source] = findReferenceWordIds(source, serverAnchor);
+        });
+        console.log('Processing anchor with references:', JSON.stringify({
+            words: anchor.words,
+            reference_positions: serverAnchor.reference_positions,
+            reference_word_ids: referenceWordIds
+        }));
+        return {
+            ...anchor,
+            id: anchor.id || nanoid(),
+            word_ids: findWordIdsForSequence(newData.corrected_segments, serverAnchor),
+            reference_word_ids: referenceWordIds
+        } as AnchorSequence;
+    });
+    // Update gap sequences to use word IDs
+    newData.gap_sequences = newData.gap_sequences.map((gap) => {
+        const serverGap = gap as unknown as ServerData;
+        console.log('Processing gap sequence:', {
+            words: gap.words,
+            word_ids: gap.word_ids,
+            corrections: gap.corrections,
+            foundWordIds: findWordIdsForSequence(newData.corrected_segments, serverGap)
+        });
+        return {
+            ...gap,
+            id: gap.id || nanoid(),
+            word_ids: gap.word_ids || findWordIdsForSequence(newData.corrected_segments, serverGap),
+            corrections: gap.corrections.map((correction: WordCorrection) => {
+                const wordId = correction.word_id || findWordIdForCorrection(newData.corrected_segments, correction);
+                console.log('Correction word ID assignment:', {
+                    original_word: correction.original_word,
+                    corrected_word: correction.corrected_word,
+                    assigned_id: wordId
+                });
+                return {
+                    ...correction,
+                    id: correction.id || nanoid(),
+                    word_id: wordId
+                };
+            })
+        } as GapSequence;
+    });
+    return newData;
+}

lyrics_transcriber/frontend/src/components/shared/utils/referenceLineCalculator.ts CHANGED Viewed

@@ -10,34 +10,31 @@ export function calculateReferenceLinePositions(
     let currentReferencePosition = 0
     // First, find all anchor sequences that cover entire lines
-    const fullLineAnchors = anchors.map(anchor => {
-        const referencePos = anchor.reference_positions[currentSource]
-        if (referencePos === undefined) return null
+    const fullLineAnchors = anchors?.map(anchor => {
+        // Add null checks for anchor and reference_word_ids
+        if (!anchor?.reference_word_ids?.[currentSource]) return null
-        return {
-            referenceStart: referencePos,
-            referenceLength: anchor.length,
-            transcriptionLine: corrected_segments.findIndex((segment, segmentIndex) => {
-                const words = segment.words
-                if (!words.length) return false
-                // Calculate the absolute position of the first and last words in this segment
-                let absolutePosition = 0
-                for (let i = 0; i < segmentIndex; i++) {
-                    absolutePosition += corrected_segments[i].words.length
-                }
+        const referenceWordIds = anchor.reference_word_ids[currentSource]
+        if (!referenceWordIds?.length) return null
-                const firstWordPosition = absolutePosition
-                const lastWordPosition = absolutePosition + words.length - 1
+        return {
+            referenceWordIds,
+            transcriptionLine: corrected_segments.findIndex((segment) => {
+                const wordIds = segment.words.map(w => w.id)
+                if (!wordIds.length) return false
-                return firstWordPosition >= anchor.transcription_position &&
-                    lastWordPosition < anchor.transcription_position + anchor.length
+                // Check if all word IDs in this segment are part of the anchor
+                return wordIds.every(id => anchor.word_ids?.includes(id))
             })
         }
-    }).filter((a): a is NonNullable<typeof a> => a !== null)
+    })?.filter((a): a is NonNullable<typeof a> => a !== null) ?? []
-    // Sort by reference position to process in order
-    fullLineAnchors.sort((a, b) => a.referenceStart - b.referenceStart)
+    // Sort by first reference word ID to process in order
+    fullLineAnchors.sort((a, b) => {
+        const firstIdA = a.referenceWordIds[0]
+        const firstIdB = b.referenceWordIds[0]
+        return firstIdA.localeCompare(firstIdB)
+    })
     // Add line positions with padding
     let currentLine = 0
@@ -55,10 +52,12 @@ export function calculateReferenceLinePositions(
         // Add the actual line position
         linePositions.push({
-            position: anchor.referenceStart,
-            lineNumber: currentLine
+            position: currentReferencePosition,
+            lineNumber: currentLine,
+            isEmpty: false
         })
         currentLine++
+        currentReferencePosition++
     })
     // Add any remaining lines after the last anchor

lyrics_transcriber/frontend/src/types.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 export interface Word {
+    id: string
     text: string
     start_time: number
     end_time: number
@@ -6,6 +7,7 @@ export interface Word {
 }
 export interface LyricsSegment {
+    id: string
     text: string
     words: Word[]
     start_time: number
@@ -13,10 +15,11 @@ export interface LyricsSegment {
 }
 export interface WordCorrection {
+    id: string
     original_word: string
     corrected_word: string
-    segment_index: number
-    original_position: number
+    segment_id: string
+    word_id: string
     source: string
     confidence: number
     reason: string
@@ -24,7 +27,7 @@ export interface WordCorrection {
     is_deletion: boolean
     split_index?: number
     split_total?: number
-    reference_positions?: Record<string, number>
+    reference_positions?: Record<string, string>
     length: number
 }
@@ -36,26 +39,35 @@ export interface PhraseScore {
 }
 export interface AnchorSequence {
+    id: string
     words: string[]
     text: string
     length: number
-    transcription_position: number
-    reference_positions: Record<string, number>
+    word_ids: string[]
+    reference_word_ids: Record<string, string[]>
     confidence: number
     phrase_score: PhraseScore
     total_score: number
 }
+export interface AnchorReference {
+    text: string
+    word_ids: string[]
+    confidence: number
+}
 export interface GapSequence {
-    words: string[]
+    id: string
     text: string
+    words: string[]
+    word_ids: string[]
     length: number
-    transcription_position: number
-    preceding_anchor: AnchorSequence | null
-    following_anchor: AnchorSequence | null
-    reference_words: Record<string, string[]>
-    reference_words_original?: Record<string, string[]>
     corrections: WordCorrection[]
+    preceding_anchor: AnchorReference | null
+    following_anchor: AnchorReference | null
+    reference_words: {
+        [source: string]: string[]
+    }
 }
 export interface LyricsData {
@@ -98,10 +110,8 @@ export interface CorrectionData {
 }
 export interface HighlightInfo {
-    transcriptionIndex?: number
-    transcriptionLength?: number
-    referenceIndices: Record<string, number>
-    referenceLength?: number
+    word_ids?: string[]
+    reference_word_ids?: Record<string, string[]>
     type: 'single' | 'gap' | 'anchor'
 }

lyrics_transcriber/frontend/tsconfig.tsbuildinfo CHANGED Viewed

	@@ -1 +1 @@
1	- {"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/vite-env.d.ts","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/detailsmodal.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/wordeditcontrols.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/~~newlinecalculator~~.ts","./src/components/shared/utils/referencelinecalculator.ts"],"version":"5.6.3"}
1	+ {"root":["./src/app.tsx","./src/api.ts","./src/main.tsx","./src/types.ts","./src/vite-env.d.ts","./src/components/audioplayer.tsx","./src/components/correctionmetrics.tsx","./src/components/detailsmodal.tsx","./src/components/editmodal.tsx","./src/components/fileupload.tsx","./src/components/lyricsanalyzer.tsx","./src/components/modeselector.tsx","./src/components/referenceview.tsx","./src/components/reviewchangesmodal.tsx","./src/components/segmentdetailsmodal.tsx","./src/components/timelineeditor.tsx","./src/components/transcriptionview.tsx","./src/components/wordeditcontrols.tsx","./src/components/shared/constants.ts","./src/components/shared/styles.ts","./src/components/shared/types.ts","./src/components/shared/components/highlightedtext.tsx","./src/components/shared/components/sourceselector.tsx","./src/components/shared/components/word.tsx","./src/components/shared/hooks/usewordclick.ts","./src/components/shared/utils/initializedatawithids.tsx","./src/components/shared/utils/referencelinecalculator.ts"],"version":"5.6.3"}

lyrics_transcriber/frontend/vite.config.js CHANGED Viewed

@@ -3,4 +3,8 @@ import react from '@vitejs/plugin-react';
 // https://vite.dev/config/
 export default defineConfig({
     plugins: [react()],
+    build: {
+        minify: false,
+        sourcemap: true,
+    }
 });

lyrics_transcriber/frontend/vite.config.ts CHANGED Viewed

@@ -4,4 +4,8 @@ import react from '@vitejs/plugin-react'
 // https://vite.dev/config/
 export default defineConfig({
   plugins: [react()],
+  build: {
+    minify: false,
+    sourcemap: true,
+  }
 })

lyrics-transcriber 0.36.1__py3-none-any.whl → 0.39.0__py3-none-any.whl

lyrics-transcriber 0.36.1py3-none-any.whl → 0.39.0py3-none-any.whl