karaoke-gen 0.71.42__py3-none-any.whl → 0.75.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. karaoke_gen/__init__.py +32 -1
  2. karaoke_gen/audio_fetcher.py +1220 -67
  3. karaoke_gen/audio_processor.py +15 -3
  4. karaoke_gen/instrumental_review/server.py +154 -860
  5. karaoke_gen/instrumental_review/static/index.html +1529 -0
  6. karaoke_gen/karaoke_finalise/karaoke_finalise.py +87 -2
  7. karaoke_gen/karaoke_gen.py +131 -14
  8. karaoke_gen/lyrics_processor.py +172 -4
  9. karaoke_gen/utils/bulk_cli.py +3 -0
  10. karaoke_gen/utils/cli_args.py +7 -4
  11. karaoke_gen/utils/gen_cli.py +221 -5
  12. karaoke_gen/utils/remote_cli.py +786 -43
  13. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +109 -4
  14. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +37 -31
  15. lyrics_transcriber/core/controller.py +76 -2
  16. lyrics_transcriber/frontend/package.json +1 -1
  17. lyrics_transcriber/frontend/src/App.tsx +6 -4
  18. lyrics_transcriber/frontend/src/api.ts +25 -10
  19. lyrics_transcriber/frontend/src/components/Header.tsx +38 -12
  20. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +17 -3
  21. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  22. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  23. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  24. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  25. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  26. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +190 -542
  27. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  28. lyrics_transcriber/frontend/web_assets/assets/{index-DdJTDWH3.js → index-BECn1o8Q.js} +1802 -553
  29. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  30. lyrics_transcriber/frontend/web_assets/index.html +1 -1
  31. lyrics_transcriber/output/countdown_processor.py +39 -0
  32. lyrics_transcriber/review/server.py +5 -5
  33. lyrics_transcriber/transcribers/audioshake.py +96 -7
  34. lyrics_transcriber/types.py +14 -12
  35. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +0 -1
  36. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
  37. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
  38. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,905 @@
1
+ import { useState, useCallback, useEffect, useRef, useMemo, memo } from 'react'
2
+ import {
3
+ Box,
4
+ Typography,
5
+ Slider,
6
+ Dialog,
7
+ DialogTitle,
8
+ DialogContent,
9
+ DialogActions,
10
+ TextField,
11
+ Button,
12
+ Paper,
13
+ Alert
14
+ } from '@mui/material'
15
+ import ZoomInIcon from '@mui/icons-material/ZoomIn'
16
+ import ZoomOutIcon from '@mui/icons-material/ZoomOut'
17
+ import { Word, LyricsSegment } from '../../types'
18
+ import TimelineCanvas from './TimelineCanvas'
19
+ import UpcomingWordsBar from './UpcomingWordsBar'
20
+ import SyncControls from './SyncControls'
21
+
22
+ // Augment window type for audio functions
23
+ declare global {
24
+ interface Window {
25
+ getAudioDuration?: () => number
26
+ toggleAudioPlayback?: () => void
27
+ isAudioPlaying?: boolean
28
+ }
29
+ }
30
+
31
+ interface LyricsSynchronizerProps {
32
+ segments: LyricsSegment[]
33
+ currentTime: number
34
+ onPlaySegment?: (startTime: number) => void
35
+ onSave: (segments: LyricsSegment[]) => void
36
+ onCancel: () => void
37
+ setModalSpacebarHandler: (handler: (() => (e: KeyboardEvent) => void) | undefined) => void
38
+ }
39
+
40
+ // Constants for zoom
41
+ const MIN_ZOOM_SECONDS = 4.5 // Most zoomed in - 4.5 seconds visible
42
+ const MAX_ZOOM_SECONDS = 24 // Most zoomed out - 24 seconds visible
43
+ const ZOOM_STEPS = 50 // Number of zoom levels
44
+
45
+ // Get all words from segments
46
+ function getAllWords(segments: LyricsSegment[]): Word[] {
47
+ return segments.flatMap(s => s.words)
48
+ }
49
+
50
+ // Deep clone segments
51
+ function cloneSegments(segments: LyricsSegment[]): LyricsSegment[] {
52
+ return JSON.parse(JSON.stringify(segments))
53
+ }
54
+
55
+ const LyricsSynchronizer = memo(function LyricsSynchronizer({
56
+ segments: initialSegments,
57
+ currentTime,
58
+ onPlaySegment,
59
+ onSave,
60
+ onCancel,
61
+ setModalSpacebarHandler
62
+ }: LyricsSynchronizerProps) {
63
+ // Working copy of segments
64
+ const [workingSegments, setWorkingSegments] = useState<LyricsSegment[]>(() =>
65
+ cloneSegments(initialSegments)
66
+ )
67
+
68
+ // Get all words flattened
69
+ const allWords = useMemo(() => getAllWords(workingSegments), [workingSegments])
70
+
71
+ // Audio duration
72
+ const audioDuration = useMemo(() => {
73
+ if (typeof window.getAudioDuration === 'function') {
74
+ const duration = window.getAudioDuration()
75
+ return duration > 0 ? duration : 300
76
+ }
77
+ return 300 // 5 minute fallback
78
+ }, [])
79
+
80
+ // Zoom state (value is the visible time window in seconds)
81
+ const [zoomSeconds, setZoomSeconds] = useState(12) // Default ~12 seconds visible
82
+
83
+ // Visible time range
84
+ const [visibleStartTime, setVisibleStartTime] = useState(0)
85
+ const visibleEndTime = useMemo(() =>
86
+ Math.min(visibleStartTime + zoomSeconds, audioDuration),
87
+ [visibleStartTime, zoomSeconds, audioDuration]
88
+ )
89
+
90
+ // Manual sync state
91
+ const [isManualSyncing, setIsManualSyncing] = useState(false)
92
+ const [isPaused, setIsPaused] = useState(false)
93
+ const [syncWordIndex, setSyncWordIndex] = useState(-1)
94
+ const [isSpacebarPressed, setIsSpacebarPressed] = useState(false)
95
+ const wordStartTimeRef = useRef<number | null>(null)
96
+ const spacebarPressTimeRef = useRef<number | null>(null)
97
+ const currentTimeRef = useRef(currentTime)
98
+
99
+ // Selection state
100
+ const [selectedWordIds, setSelectedWordIds] = useState<Set<string>>(new Set())
101
+
102
+ // Edit lyrics modal state
103
+ const [showEditLyricsModal, setShowEditLyricsModal] = useState(false)
104
+ const [editLyricsText, setEditLyricsText] = useState('')
105
+
106
+ // Edit word modal state
107
+ const [showEditWordModal, setShowEditWordModal] = useState(false)
108
+ const [editWordText, setEditWordText] = useState('')
109
+ const [editWordId, setEditWordId] = useState<string | null>(null)
110
+
111
+ // Keep currentTimeRef up to date
112
+ useEffect(() => {
113
+ currentTimeRef.current = currentTime
114
+ }, [currentTime])
115
+
116
+ // Auto-scroll to follow playhead during sync
117
+ useEffect(() => {
118
+ if (isManualSyncing && !isPaused && currentTime > 0) {
119
+ // If playhead is near the end of visible area, scroll forward
120
+ if (currentTime > visibleEndTime - (zoomSeconds * 0.1)) {
121
+ const newStart = Math.max(0, currentTime - zoomSeconds * 0.1)
122
+ setVisibleStartTime(newStart)
123
+ }
124
+ // If playhead is before visible area, scroll back
125
+ else if (currentTime < visibleStartTime) {
126
+ setVisibleStartTime(Math.max(0, currentTime - 1))
127
+ }
128
+ }
129
+ }, [currentTime, isManualSyncing, isPaused, visibleStartTime, visibleEndTime, zoomSeconds])
130
+
131
+ // Handle zoom slider change
132
+ const handleZoomChange = useCallback((_: Event, value: number | number[]) => {
133
+ const zoomValue = value as number
134
+ // Map slider value (0-50) to zoom range (4.5-24 seconds)
135
+ const newZoomSeconds = MIN_ZOOM_SECONDS + (zoomValue / ZOOM_STEPS) * (MAX_ZOOM_SECONDS - MIN_ZOOM_SECONDS)
136
+ setZoomSeconds(newZoomSeconds)
137
+ }, [])
138
+
139
+ // Get slider value from zoom seconds
140
+ const sliderValue = useMemo(() => {
141
+ return ((zoomSeconds - MIN_ZOOM_SECONDS) / (MAX_ZOOM_SECONDS - MIN_ZOOM_SECONDS)) * ZOOM_STEPS
142
+ }, [zoomSeconds])
143
+
144
+ // Handle scroll change from timeline
145
+ const handleScrollChange = useCallback((newStartTime: number) => {
146
+ setVisibleStartTime(newStartTime)
147
+ }, [])
148
+
149
+ // Update words in segments
150
+ const updateWords = useCallback((newWords: Word[]) => {
151
+ setWorkingSegments(prevSegments => {
152
+ const newSegments = cloneSegments(prevSegments)
153
+
154
+ // Create a map of word id to word for quick lookup
155
+ const wordMap = new Map(newWords.map(w => [w.id, w]))
156
+
157
+ // Update each segment's words
158
+ for (const segment of newSegments) {
159
+ segment.words = segment.words.map(w => wordMap.get(w.id) || w)
160
+
161
+ // Recalculate segment timing
162
+ const timedWords = segment.words.filter(w =>
163
+ w.start_time !== null && w.end_time !== null
164
+ )
165
+
166
+ if (timedWords.length > 0) {
167
+ segment.start_time = Math.min(...timedWords.map(w => w.start_time!))
168
+ segment.end_time = Math.max(...timedWords.map(w => w.end_time!))
169
+ } else {
170
+ segment.start_time = null
171
+ segment.end_time = null
172
+ }
173
+ }
174
+
175
+ return newSegments
176
+ })
177
+ }, [])
178
+
179
+ // Check if audio is playing
180
+ const [isPlaying, setIsPlaying] = useState(false)
181
+
182
+ // Update isPlaying state periodically
183
+ useEffect(() => {
184
+ const checkPlaying = () => {
185
+ setIsPlaying(typeof window.isAudioPlaying === 'boolean' ? window.isAudioPlaying : false)
186
+ }
187
+ checkPlaying()
188
+ const interval = setInterval(checkPlaying, 100)
189
+ return () => clearInterval(interval)
190
+ }, [])
191
+
192
+ // Play audio from current position
193
+ const handlePlayAudio = useCallback(() => {
194
+ if (onPlaySegment) {
195
+ onPlaySegment(currentTimeRef.current)
196
+ }
197
+ }, [onPlaySegment])
198
+
199
+ // Stop audio playback - also exits sync mode
200
+ const handleStopAudio = useCallback(() => {
201
+ if (typeof window.toggleAudioPlayback === 'function' && window.isAudioPlaying) {
202
+ window.toggleAudioPlayback()
203
+ }
204
+ // Also exit sync mode when stopping
205
+ if (isManualSyncing) {
206
+ setIsManualSyncing(false)
207
+ setIsPaused(false)
208
+ setIsSpacebarPressed(false)
209
+ }
210
+ }, [isManualSyncing])
211
+
212
+ // Start manual sync
213
+ const handleStartSync = useCallback(() => {
214
+ if (isManualSyncing) {
215
+ // Stop sync
216
+ setIsManualSyncing(false)
217
+ setIsPaused(false)
218
+ setSyncWordIndex(-1)
219
+ setIsSpacebarPressed(false)
220
+
221
+ // Stop audio
222
+ handleStopAudio()
223
+ return
224
+ }
225
+
226
+ // Find first unsynced word
227
+ const firstUnsyncedIndex = allWords.findIndex(w =>
228
+ w.start_time === null || w.end_time === null
229
+ )
230
+
231
+ const startIndex = firstUnsyncedIndex !== -1 ? firstUnsyncedIndex : 0
232
+
233
+ setIsManualSyncing(true)
234
+ setIsPaused(false)
235
+ setSyncWordIndex(startIndex)
236
+ setIsSpacebarPressed(false)
237
+
238
+ // Start playback
239
+ if (onPlaySegment) {
240
+ onPlaySegment(Math.max(0, currentTimeRef.current - 1))
241
+ }
242
+ }, [isManualSyncing, allWords, onPlaySegment, handleStopAudio])
243
+
244
+ // Pause sync
245
+ const handlePauseSync = useCallback(() => {
246
+ setIsPaused(true)
247
+ handleStopAudio()
248
+ }, [handleStopAudio])
249
+
250
+ // Resume sync
251
+ const handleResumeSync = useCallback(() => {
252
+ setIsPaused(false)
253
+
254
+ // Find first unsynced word from current position
255
+ const firstUnsyncedIndex = allWords.findIndex(w =>
256
+ w.start_time === null || w.end_time === null
257
+ )
258
+
259
+ if (firstUnsyncedIndex !== -1 && firstUnsyncedIndex !== syncWordIndex) {
260
+ setSyncWordIndex(firstUnsyncedIndex)
261
+ }
262
+
263
+ // Resume playback
264
+ if (onPlaySegment) {
265
+ onPlaySegment(currentTimeRef.current)
266
+ }
267
+ }, [allWords, syncWordIndex, onPlaySegment])
268
+
269
+ // Clear all sync data
270
+ const handleClearSync = useCallback(() => {
271
+ setWorkingSegments(prevSegments => {
272
+ const newSegments = cloneSegments(prevSegments)
273
+
274
+ for (const segment of newSegments) {
275
+ for (const word of segment.words) {
276
+ word.start_time = null
277
+ word.end_time = null
278
+ }
279
+ segment.start_time = null
280
+ segment.end_time = null
281
+ }
282
+
283
+ return newSegments
284
+ })
285
+
286
+ setSyncWordIndex(-1)
287
+ }, [])
288
+
289
+ // Unsync from cursor position
290
+ const handleUnsyncFromCursor = useCallback(() => {
291
+ const cursorTime = currentTimeRef.current
292
+
293
+ setWorkingSegments(prevSegments => {
294
+ const newSegments = cloneSegments(prevSegments)
295
+
296
+ for (const segment of newSegments) {
297
+ for (const word of segment.words) {
298
+ // Reset words that start after cursor position
299
+ if (word.start_time !== null && word.start_time > cursorTime) {
300
+ word.start_time = null
301
+ word.end_time = null
302
+ }
303
+ }
304
+
305
+ // Recalculate segment timing
306
+ const timedWords = segment.words.filter(w =>
307
+ w.start_time !== null && w.end_time !== null
308
+ )
309
+
310
+ if (timedWords.length > 0) {
311
+ segment.start_time = Math.min(...timedWords.map(w => w.start_time!))
312
+ segment.end_time = Math.max(...timedWords.map(w => w.end_time!))
313
+ } else {
314
+ segment.start_time = null
315
+ segment.end_time = null
316
+ }
317
+ }
318
+
319
+ return newSegments
320
+ })
321
+ }, [])
322
+
323
+ // Check if there are words after cursor that can be unsynced
324
+ const canUnsyncFromCursor = useMemo(() => {
325
+ const cursorTime = currentTimeRef.current
326
+ return allWords.some(w =>
327
+ w.start_time !== null && w.start_time > cursorTime
328
+ )
329
+ }, [allWords, currentTime])
330
+
331
+ // Open edit lyrics modal
332
+ const handleEditLyrics = useCallback(() => {
333
+ const text = workingSegments.map(s => s.text).join('\n')
334
+ setEditLyricsText(text)
335
+ setShowEditLyricsModal(true)
336
+ }, [workingSegments])
337
+
338
+ // Save edited lyrics (warning: resets timing)
339
+ const handleSaveEditedLyrics = useCallback(() => {
340
+ const lines = editLyricsText.split('\n').filter(l => l.trim())
341
+
342
+ const newSegments: LyricsSegment[] = lines.map((line, idx) => {
343
+ const words = line.trim().split(/\s+/).map((text, wIdx) => ({
344
+ id: `word-${idx}-${wIdx}-${Date.now()}`,
345
+ text,
346
+ start_time: null,
347
+ end_time: null,
348
+ confidence: 1.0
349
+ }))
350
+
351
+ return {
352
+ id: `segment-${idx}-${Date.now()}`,
353
+ text: line.trim(),
354
+ words,
355
+ start_time: null,
356
+ end_time: null
357
+ }
358
+ })
359
+
360
+ setWorkingSegments(newSegments)
361
+ setShowEditLyricsModal(false)
362
+ setSyncWordIndex(-1)
363
+ }, [editLyricsText])
364
+
365
+ // Open edit word modal
366
+ const handleEditSelectedWord = useCallback(() => {
367
+ if (selectedWordIds.size !== 1) return
368
+
369
+ const wordId = Array.from(selectedWordIds)[0]
370
+ const word = allWords.find(w => w.id === wordId)
371
+
372
+ if (word) {
373
+ setEditWordId(wordId)
374
+ setEditWordText(word.text)
375
+ setShowEditWordModal(true)
376
+ }
377
+ }, [selectedWordIds, allWords])
378
+
379
+ // Save edited word
380
+ const handleSaveEditedWord = useCallback(() => {
381
+ if (!editWordId) return
382
+
383
+ const newText = editWordText.trim()
384
+ if (!newText) return
385
+
386
+ // Check if we're splitting into multiple words
387
+ const newWords = newText.split(/\s+/)
388
+
389
+ if (newWords.length === 1) {
390
+ // Simple rename
391
+ const updatedWords = allWords.map(w =>
392
+ w.id === editWordId ? { ...w, text: newWords[0] } : w
393
+ )
394
+ updateWords(updatedWords)
395
+ } else {
396
+ // Split word - preserve timing for first word, null for rest
397
+ const originalWord = allWords.find(w => w.id === editWordId)
398
+ if (!originalWord) return
399
+
400
+ setWorkingSegments(prevSegments => {
401
+ const newSegments = cloneSegments(prevSegments)
402
+
403
+ for (const segment of newSegments) {
404
+ const wordIndex = segment.words.findIndex(w => w.id === editWordId)
405
+ if (wordIndex !== -1) {
406
+ const newWordObjects = newWords.map((text, idx) => ({
407
+ id: idx === 0 ? editWordId : `${editWordId}-split-${idx}`,
408
+ text,
409
+ start_time: idx === 0 ? originalWord.start_time : null,
410
+ end_time: idx === 0 ? originalWord.end_time : null,
411
+ confidence: 1.0
412
+ }))
413
+
414
+ segment.words.splice(wordIndex, 1, ...newWordObjects)
415
+ segment.text = segment.words.map(w => w.text).join(' ')
416
+ break
417
+ }
418
+ }
419
+
420
+ return newSegments
421
+ })
422
+ }
423
+
424
+ setShowEditWordModal(false)
425
+ setEditWordId(null)
426
+ setEditWordText('')
427
+ setSelectedWordIds(new Set())
428
+ }, [editWordId, editWordText, allWords, updateWords])
429
+
430
+ // Delete selected words
431
+ const handleDeleteSelected = useCallback(() => {
432
+ if (selectedWordIds.size === 0) return
433
+
434
+ setWorkingSegments(prevSegments => {
435
+ const newSegments = cloneSegments(prevSegments)
436
+
437
+ for (const segment of newSegments) {
438
+ segment.words = segment.words.filter(w => !selectedWordIds.has(w.id))
439
+ segment.text = segment.words.map(w => w.text).join(' ')
440
+
441
+ // Recalculate segment timing
442
+ const timedWords = segment.words.filter(w =>
443
+ w.start_time !== null && w.end_time !== null
444
+ )
445
+
446
+ if (timedWords.length > 0) {
447
+ segment.start_time = Math.min(...timedWords.map(w => w.start_time!))
448
+ segment.end_time = Math.max(...timedWords.map(w => w.end_time!))
449
+ } else {
450
+ segment.start_time = null
451
+ segment.end_time = null
452
+ }
453
+ }
454
+
455
+ // Remove empty segments
456
+ return newSegments.filter(s => s.words.length > 0)
457
+ })
458
+
459
+ setSelectedWordIds(new Set())
460
+ }, [selectedWordIds])
461
+
462
+ // Handle word click (selection)
463
+ const handleWordClick = useCallback((wordId: string, event: React.MouseEvent) => {
464
+ if (event.shiftKey || event.ctrlKey || event.metaKey) {
465
+ // Add to selection
466
+ setSelectedWordIds(prev => {
467
+ const newSet = new Set(prev)
468
+ if (newSet.has(wordId)) {
469
+ newSet.delete(wordId)
470
+ } else {
471
+ newSet.add(wordId)
472
+ }
473
+ return newSet
474
+ })
475
+ } else {
476
+ // Single selection
477
+ setSelectedWordIds(new Set([wordId]))
478
+ }
479
+ }, [])
480
+
481
+ // Handle background click (deselect)
482
+ const handleBackgroundClick = useCallback(() => {
483
+ setSelectedWordIds(new Set())
484
+ }, [])
485
+
486
+ // Handle single word timing change (from resize)
487
+ const handleWordTimingChange = useCallback((wordId: string, newStartTime: number, newEndTime: number) => {
488
+ setWorkingSegments(prevSegments => {
489
+ const newSegments = cloneSegments(prevSegments)
490
+
491
+ for (const segment of newSegments) {
492
+ const word = segment.words.find(w => w.id === wordId)
493
+ if (word) {
494
+ word.start_time = Math.max(0, newStartTime)
495
+ word.end_time = Math.max(word.start_time + 0.05, newEndTime)
496
+
497
+ // Recalculate segment timing
498
+ const timedWords = segment.words.filter(w =>
499
+ w.start_time !== null && w.end_time !== null
500
+ )
501
+ if (timedWords.length > 0) {
502
+ segment.start_time = Math.min(...timedWords.map(w => w.start_time!))
503
+ segment.end_time = Math.max(...timedWords.map(w => w.end_time!))
504
+ }
505
+ break
506
+ }
507
+ }
508
+
509
+ return newSegments
510
+ })
511
+ }, [])
512
+
513
+ // Handle moving multiple words (from drag)
514
+ const handleWordsMove = useCallback((updates: Array<{ wordId: string; newStartTime: number; newEndTime: number }>) => {
515
+ setWorkingSegments(prevSegments => {
516
+ const newSegments = cloneSegments(prevSegments)
517
+
518
+ // Create a map for quick lookup
519
+ const updateMap = new Map(updates.map(u => [u.wordId, u]))
520
+
521
+ for (const segment of newSegments) {
522
+ for (const word of segment.words) {
523
+ const update = updateMap.get(word.id)
524
+ if (update) {
525
+ word.start_time = update.newStartTime
526
+ word.end_time = update.newEndTime
527
+ }
528
+ }
529
+
530
+ // Recalculate segment timing
531
+ const timedWords = segment.words.filter(w =>
532
+ w.start_time !== null && w.end_time !== null
533
+ )
534
+ if (timedWords.length > 0) {
535
+ segment.start_time = Math.min(...timedWords.map(w => w.start_time!))
536
+ segment.end_time = Math.max(...timedWords.map(w => w.end_time!))
537
+ }
538
+ }
539
+
540
+ return newSegments
541
+ })
542
+ }, [])
543
+
544
+ // Handle time bar click (seek to position without playing)
545
+ const handleTimeBarClick = useCallback((time: number) => {
546
+ // Scroll the timeline to show this time centered
547
+ const newStart = Math.max(0, time - zoomSeconds / 2)
548
+ setVisibleStartTime(Math.min(newStart, Math.max(0, audioDuration - zoomSeconds)))
549
+
550
+ // Seek to the position (this will briefly start playback)
551
+ if (onPlaySegment) {
552
+ onPlaySegment(time)
553
+ // Immediately stop playback after seeking
554
+ setTimeout(() => {
555
+ if (typeof window.toggleAudioPlayback === 'function' && window.isAudioPlaying) {
556
+ window.toggleAudioPlayback()
557
+ }
558
+ }, 50)
559
+ }
560
+ }, [zoomSeconds, audioDuration, onPlaySegment])
561
+
562
+ // Handle selection complete from drag
563
+ const handleSelectionComplete = useCallback((wordIds: string[]) => {
564
+ setSelectedWordIds(new Set(wordIds))
565
+ }, [])
566
+
567
+ // Handle spacebar for manual sync
568
+ const handleKeyDown = useCallback((e: KeyboardEvent) => {
569
+ if (e.code !== 'Space') return
570
+ if (!isManualSyncing || isPaused) return
571
+ if (syncWordIndex < 0 || syncWordIndex >= allWords.length) return
572
+
573
+ e.preventDefault()
574
+ e.stopPropagation()
575
+
576
+ if (isSpacebarPressed) return
577
+
578
+ setIsSpacebarPressed(true)
579
+ wordStartTimeRef.current = currentTimeRef.current
580
+ spacebarPressTimeRef.current = Date.now()
581
+
582
+ // Set start time for current word
583
+ const newWords = [...allWords]
584
+ const currentWord = newWords[syncWordIndex]
585
+ currentWord.start_time = currentTimeRef.current
586
+
587
+ // Handle previous word's end time
588
+ if (syncWordIndex > 0) {
589
+ const prevWord = newWords[syncWordIndex - 1]
590
+ if (prevWord.start_time !== null && prevWord.end_time === null) {
591
+ const gap = currentTimeRef.current - prevWord.start_time
592
+ if (gap > 1.0) {
593
+ prevWord.end_time = prevWord.start_time + 0.5
594
+ } else {
595
+ prevWord.end_time = currentTimeRef.current - 0.005
596
+ }
597
+ }
598
+ }
599
+
600
+ updateWords(newWords)
601
+ }, [isManualSyncing, isPaused, syncWordIndex, allWords, isSpacebarPressed, updateWords])
602
+
603
+ const handleKeyUp = useCallback((e: KeyboardEvent) => {
604
+ if (e.code !== 'Space') return
605
+ if (!isManualSyncing || isPaused) return
606
+ if (!isSpacebarPressed) return
607
+
608
+ e.preventDefault()
609
+ e.stopPropagation()
610
+
611
+ setIsSpacebarPressed(false)
612
+
613
+ const pressDuration = spacebarPressTimeRef.current
614
+ ? Date.now() - spacebarPressTimeRef.current
615
+ : 0
616
+ const isTap = pressDuration < 200 // 200ms threshold
617
+
618
+ const newWords = [...allWords]
619
+ const currentWord = newWords[syncWordIndex]
620
+
621
+ if (isTap) {
622
+ // Short tap: default 500ms duration
623
+ currentWord.end_time = (wordStartTimeRef.current || currentTimeRef.current) + 0.5
624
+ } else {
625
+ // Hold: use actual timing
626
+ currentWord.end_time = currentTimeRef.current
627
+ }
628
+
629
+ updateWords(newWords)
630
+
631
+ // Move to next word
632
+ if (syncWordIndex < allWords.length - 1) {
633
+ setSyncWordIndex(syncWordIndex + 1)
634
+ } else {
635
+ // All words synced
636
+ setIsManualSyncing(false)
637
+ setSyncWordIndex(-1)
638
+ handleStopAudio()
639
+ }
640
+
641
+ wordStartTimeRef.current = null
642
+ spacebarPressTimeRef.current = null
643
+ }, [isManualSyncing, isPaused, isSpacebarPressed, syncWordIndex, allWords, updateWords, handleStopAudio])
644
+
645
+ // Combined spacebar handler
646
+ const handleSpacebar = useCallback((e: KeyboardEvent) => {
647
+ if (e.type === 'keydown') {
648
+ handleKeyDown(e)
649
+ } else if (e.type === 'keyup') {
650
+ handleKeyUp(e)
651
+ }
652
+ }, [handleKeyDown, handleKeyUp])
653
+
654
+ // Keep ref for handler
655
+ const spacebarHandlerRef = useRef(handleSpacebar)
656
+ spacebarHandlerRef.current = handleSpacebar
657
+
658
+ // Set up spacebar handler
659
+ useEffect(() => {
660
+ const handler = (e: KeyboardEvent) => {
661
+ if (e.code === 'Space') {
662
+ e.preventDefault()
663
+ e.stopPropagation()
664
+ spacebarHandlerRef.current(e)
665
+ }
666
+ }
667
+
668
+ setModalSpacebarHandler(() => handler)
669
+
670
+ return () => {
671
+ setModalSpacebarHandler(undefined)
672
+ }
673
+ }, [setModalSpacebarHandler])
674
+
675
+ // Handle save
676
+ const handleSave = useCallback(() => {
677
+ onSave(workingSegments)
678
+ }, [workingSegments, onSave])
679
+
680
+ // Progress stats
681
+ const stats = useMemo(() => {
682
+ const total = allWords.length
683
+ const synced = allWords.filter(w =>
684
+ w.start_time !== null && w.end_time !== null
685
+ ).length
686
+ return { total, synced, remaining: total - synced }
687
+ }, [allWords])
688
+
689
+ // Get instruction text based on state
690
+ const getInstructionText = useCallback(() => {
691
+ if (isManualSyncing) {
692
+ if (isSpacebarPressed) {
693
+ // Always include secondary text to prevent layout shift
694
+ return { primary: '⏱️ Holding... release when word ends', secondary: 'Release spacebar when the word finishes' }
695
+ }
696
+ if (stats.remaining === 0) {
697
+ return { primary: '✅ All words synced!', secondary: 'Click "Stop Sync" then "Apply" to save' }
698
+ }
699
+ return { primary: '👆 Press SPACEBAR when you hear each word', secondary: 'Tap for short words, hold for longer words' }
700
+ }
701
+ if (stats.synced === 0) {
702
+ return { primary: 'Click "Start Sync" to begin timing words', secondary: 'Audio will play and you\'ll tap spacebar for each word' }
703
+ }
704
+ if (stats.remaining > 0) {
705
+ return { primary: `${stats.remaining} words remaining to sync`, secondary: 'Click "Start Sync" to continue, or "Unsync from Cursor" to re-sync from a point' }
706
+ }
707
+ return { primary: '✅ All words synced!', secondary: 'Click "Apply" to save changes, or make adjustments first' }
708
+ }, [isManualSyncing, isSpacebarPressed, stats.synced, stats.remaining])
709
+
710
+ const instruction = getInstructionText()
711
+
712
+ return (
713
+ <Box sx={{ display: 'flex', flexDirection: 'column', height: '100%', gap: 1 }}>
714
+ {/* Stats bar - fixed height */}
715
+ <Box sx={{ display: 'flex', justifyContent: 'flex-end', alignItems: 'center', height: 24 }}>
716
+ <Typography variant="body2" color="text.secondary">
717
+ {stats.synced} / {stats.total} words synced
718
+ {stats.remaining > 0 && ` (${stats.remaining} remaining)`}
719
+ </Typography>
720
+ </Box>
721
+
722
+ {/* Instruction banner - fixed height to prevent layout shifts */}
723
+ <Box
724
+ sx={{
725
+ height: 56,
726
+ flexShrink: 0
727
+ }}
728
+ >
729
+ <Paper
730
+ sx={{
731
+ p: 1.5,
732
+ height: '100%',
733
+ bgcolor: isManualSyncing ? 'info.main' : 'grey.100',
734
+ color: isManualSyncing ? 'info.contrastText' : 'text.primary',
735
+ display: 'flex',
736
+ flexDirection: 'column',
737
+ justifyContent: 'center',
738
+ overflow: 'hidden',
739
+ boxSizing: 'border-box'
740
+ }}
741
+ >
742
+ <Typography variant="body2" sx={{ fontWeight: 500, lineHeight: 1.3 }}>
743
+ {instruction.primary}
744
+ </Typography>
745
+ <Typography variant="caption" sx={{ opacity: 0.85, display: 'block', lineHeight: 1.3 }}>
746
+ {instruction.secondary}
747
+ </Typography>
748
+ </Paper>
749
+ </Box>
750
+
751
+ {/* Controls - fixed height section */}
752
+ <Box sx={{ height: 88, flexShrink: 0 }}>
753
+ <SyncControls
754
+ isManualSyncing={isManualSyncing}
755
+ isPaused={isPaused}
756
+ onStartSync={handleStartSync}
757
+ onPauseSync={handlePauseSync}
758
+ onResumeSync={handleResumeSync}
759
+ onClearSync={handleClearSync}
760
+ onEditLyrics={handleEditLyrics}
761
+ onPlay={handlePlayAudio}
762
+ onStop={handleStopAudio}
763
+ isPlaying={isPlaying}
764
+ hasSelectedWords={selectedWordIds.size > 0}
765
+ selectedWordCount={selectedWordIds.size}
766
+ onUnsyncFromCursor={handleUnsyncFromCursor}
767
+ onEditSelectedWord={handleEditSelectedWord}
768
+ onDeleteSelected={handleDeleteSelected}
769
+ canUnsyncFromCursor={canUnsyncFromCursor}
770
+ />
771
+ </Box>
772
+
773
+ {/* Upcoming words bar - fixed height, immediately above timeline */}
774
+ <Box sx={{ height: 44, flexShrink: 0 }}>
775
+ {isManualSyncing && (
776
+ <UpcomingWordsBar
777
+ words={allWords}
778
+ syncWordIndex={syncWordIndex}
779
+ isManualSyncing={isManualSyncing}
780
+ />
781
+ )}
782
+ </Box>
783
+
784
+ {/* Timeline canvas */}
785
+ <Box sx={{ flexGrow: 1, minHeight: 200 }}>
786
+ <TimelineCanvas
787
+ words={allWords}
788
+ segments={workingSegments}
789
+ visibleStartTime={visibleStartTime}
790
+ visibleEndTime={visibleEndTime}
791
+ currentTime={currentTime}
792
+ selectedWordIds={selectedWordIds}
793
+ onWordClick={handleWordClick}
794
+ onBackgroundClick={handleBackgroundClick}
795
+ onTimeBarClick={handleTimeBarClick}
796
+ onSelectionComplete={handleSelectionComplete}
797
+ onWordTimingChange={handleWordTimingChange}
798
+ onWordsMove={handleWordsMove}
799
+ syncWordIndex={syncWordIndex}
800
+ isManualSyncing={isManualSyncing}
801
+ onScrollChange={handleScrollChange}
802
+ audioDuration={audioDuration}
803
+ zoomSeconds={zoomSeconds}
804
+ height={200}
805
+ />
806
+ </Box>
807
+
808
+ {/* Zoom slider */}
809
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 2, px: 2 }}>
810
+ <ZoomInIcon color="action" fontSize="small" />
811
+ <Slider
812
+ value={sliderValue}
813
+ onChange={handleZoomChange}
814
+ min={0}
815
+ max={ZOOM_STEPS}
816
+ step={1}
817
+ sx={{ flexGrow: 1 }}
818
+ disabled={isManualSyncing && !isPaused}
819
+ />
820
+ <ZoomOutIcon color="action" fontSize="small" />
821
+ <Typography variant="caption" color="text.secondary" sx={{ minWidth: 60 }}>
822
+ {zoomSeconds.toFixed(1)}s view
823
+ </Typography>
824
+ </Box>
825
+
826
+ {/* Action buttons */}
827
+ <Box sx={{ display: 'flex', justifyContent: 'flex-end', gap: 2, pt: 2, borderTop: 1, borderColor: 'divider' }}>
828
+ <Button onClick={onCancel} color="inherit">
829
+ Cancel
830
+ </Button>
831
+ <Button
832
+ onClick={handleSave}
833
+ variant="contained"
834
+ color="primary"
835
+ disabled={isManualSyncing && !isPaused}
836
+ >
837
+ Apply
838
+ </Button>
839
+ </Box>
840
+
841
+ {/* Edit Lyrics Modal */}
842
+ <Dialog
843
+ open={showEditLyricsModal}
844
+ onClose={() => setShowEditLyricsModal(false)}
845
+ maxWidth="md"
846
+ fullWidth
847
+ >
848
+ <DialogTitle>Edit Lyrics</DialogTitle>
849
+ <DialogContent>
850
+ <Alert severity="warning" sx={{ mb: 2 }}>
851
+ Editing lyrics will reset all timing data. You will need to re-sync the entire song.
852
+ </Alert>
853
+ <TextField
854
+ multiline
855
+ rows={15}
856
+ fullWidth
857
+ value={editLyricsText}
858
+ onChange={(e) => setEditLyricsText(e.target.value)}
859
+ placeholder="Enter lyrics, one line per segment..."
860
+ />
861
+ </DialogContent>
862
+ <DialogActions>
863
+ <Button onClick={() => setShowEditLyricsModal(false)}>Cancel</Button>
864
+ <Button onClick={handleSaveEditedLyrics} variant="contained" color="warning">
865
+ Save & Reset Timing
866
+ </Button>
867
+ </DialogActions>
868
+ </Dialog>
869
+
870
+ {/* Edit Word Modal */}
871
+ <Dialog
872
+ open={showEditWordModal}
873
+ onClose={() => setShowEditWordModal(false)}
874
+ maxWidth="xs"
875
+ fullWidth
876
+ >
877
+ <DialogTitle>Edit Word</DialogTitle>
878
+ <DialogContent>
879
+ <Typography variant="body2" color="text.secondary" sx={{ mb: 2 }}>
880
+ Edit the word text. Enter multiple words separated by spaces to split.
881
+ </Typography>
882
+ <TextField
883
+ fullWidth
884
+ value={editWordText}
885
+ onChange={(e) => setEditWordText(e.target.value)}
886
+ autoFocus
887
+ onKeyDown={(e) => {
888
+ if (e.key === 'Enter') {
889
+ handleSaveEditedWord()
890
+ }
891
+ }}
892
+ />
893
+ </DialogContent>
894
+ <DialogActions>
895
+ <Button onClick={() => setShowEditWordModal(false)}>Cancel</Button>
896
+ <Button onClick={handleSaveEditedWord} variant="contained">
897
+ Save
898
+ </Button>
899
+ </DialogActions>
900
+ </Dialog>
901
+ </Box>
902
+ )
903
+ })
904
+
905
+ export default LyricsSynchronizer