karaoke-gen 0.71.42__py3-none-any.whl → 0.75.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. karaoke_gen/__init__.py +32 -1
  2. karaoke_gen/audio_fetcher.py +1220 -67
  3. karaoke_gen/audio_processor.py +15 -3
  4. karaoke_gen/instrumental_review/server.py +154 -860
  5. karaoke_gen/instrumental_review/static/index.html +1529 -0
  6. karaoke_gen/karaoke_finalise/karaoke_finalise.py +87 -2
  7. karaoke_gen/karaoke_gen.py +131 -14
  8. karaoke_gen/lyrics_processor.py +172 -4
  9. karaoke_gen/utils/bulk_cli.py +3 -0
  10. karaoke_gen/utils/cli_args.py +7 -4
  11. karaoke_gen/utils/gen_cli.py +221 -5
  12. karaoke_gen/utils/remote_cli.py +786 -43
  13. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +109 -4
  14. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +37 -31
  15. lyrics_transcriber/core/controller.py +76 -2
  16. lyrics_transcriber/frontend/package.json +1 -1
  17. lyrics_transcriber/frontend/src/App.tsx +6 -4
  18. lyrics_transcriber/frontend/src/api.ts +25 -10
  19. lyrics_transcriber/frontend/src/components/Header.tsx +38 -12
  20. lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +17 -3
  21. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
  22. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
  23. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
  24. lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
  25. lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
  26. lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +190 -542
  27. lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
  28. lyrics_transcriber/frontend/web_assets/assets/{index-DdJTDWH3.js → index-BECn1o8Q.js} +1802 -553
  29. lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
  30. lyrics_transcriber/frontend/web_assets/index.html +1 -1
  31. lyrics_transcriber/output/countdown_processor.py +39 -0
  32. lyrics_transcriber/review/server.py +5 -5
  33. lyrics_transcriber/transcribers/audioshake.py +96 -7
  34. lyrics_transcriber/types.py +14 -12
  35. lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +0 -1
  36. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
  37. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
  38. {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,704 @@
1
+ import { useRef, useEffect, useCallback, useState, memo } from 'react'
2
+ import { Box, IconButton, Tooltip } from '@mui/material'
3
+ import ArrowBackIcon from '@mui/icons-material/ArrowBack'
4
+ import ArrowForwardIcon from '@mui/icons-material/ArrowForward'
5
+ import { Word, LyricsSegment } from '../../types'
6
+
7
+ interface TimelineCanvasProps {
8
+ words: Word[]
9
+ segments: LyricsSegment[]
10
+ visibleStartTime: number
11
+ visibleEndTime: number
12
+ currentTime: number
13
+ selectedWordIds: Set<string>
14
+ onWordClick: (wordId: string, event: React.MouseEvent) => void
15
+ onBackgroundClick: () => void
16
+ onTimeBarClick: (time: number) => void
17
+ onSelectionComplete: (wordIds: string[]) => void
18
+ onWordTimingChange: (wordId: string, newStartTime: number, newEndTime: number) => void
19
+ onWordsMove: (updates: Array<{ wordId: string; newStartTime: number; newEndTime: number }>) => void
20
+ syncWordIndex: number
21
+ isManualSyncing: boolean
22
+ onScrollChange: (newStartTime: number) => void
23
+ audioDuration: number
24
+ zoomSeconds: number
25
+ height?: number
26
+ }
27
+
28
+ // Constants for rendering
29
+ const TIME_BAR_HEIGHT = 28
30
+ const WORD_BLOCK_HEIGHT = 24
31
+ const WORD_LEVEL_SPACING = 50
32
+ const CANVAS_PADDING = 8
33
+ const TEXT_ABOVE_BLOCK = 14
34
+ const RESIZE_HANDLE_SIZE = 8
35
+ const RESIZE_HANDLE_HITAREA = 12
36
+ const PLAYHEAD_COLOR = '#ffffff'
37
+ const WORD_BLOCK_COLOR = '#d32f2f'
38
+ const WORD_BLOCK_SELECTED_COLOR = '#b71c1c'
39
+ const WORD_BLOCK_CURRENT_COLOR = '#f44336'
40
+ const WORD_TEXT_CURRENT_COLOR = '#d32f2f'
41
+ const UPCOMING_WORD_BG = '#fff9c4'
42
+ const UPCOMING_WORD_TEXT = '#000000'
43
+ const TIME_BAR_BG = '#f5f5f5'
44
+ const TIME_BAR_TEXT = '#666666'
45
+ const TIMELINE_BG = '#e0e0e0'
46
+
47
+ // Drag modes
48
+ type DragMode = 'none' | 'selection' | 'resize' | 'move'
49
+
50
+ // Build a map of word ID to segment index
51
+ function buildWordToSegmentMap(segments: LyricsSegment[]): Map<string, number> {
52
+ const map = new Map<string, number>()
53
+ segments.forEach((segment, idx) => {
54
+ segment.words.forEach(word => {
55
+ map.set(word.id, idx)
56
+ })
57
+ })
58
+ return map
59
+ }
60
+
61
+ // Calculate which vertical level a word should be on
62
+ function calculateWordLevels(words: Word[], segments: LyricsSegment[]): Map<string, number> {
63
+ const levels = new Map<string, number>()
64
+ const wordToSegment = buildWordToSegmentMap(segments)
65
+
66
+ const segmentsWithTiming = segments
67
+ .map((segment, idx) => {
68
+ const timedWords = segment.words.filter(w => w.start_time !== null)
69
+ const minStart = timedWords.length > 0
70
+ ? Math.min(...timedWords.map(w => w.start_time!))
71
+ : Infinity
72
+ return { idx, minStart }
73
+ })
74
+ .filter(s => s.minStart !== Infinity)
75
+ .sort((a, b) => a.minStart - b.minStart)
76
+
77
+ const segmentLevels = new Map<number, number>()
78
+ segmentsWithTiming.forEach(({ idx }, orderIndex) => {
79
+ segmentLevels.set(idx, orderIndex % 2)
80
+ })
81
+
82
+ for (const word of words) {
83
+ const segmentIdx = wordToSegment.get(word.id)
84
+ if (segmentIdx !== undefined && segmentLevels.has(segmentIdx)) {
85
+ levels.set(word.id, segmentLevels.get(segmentIdx)!)
86
+ } else {
87
+ levels.set(word.id, 0)
88
+ }
89
+ }
90
+
91
+ return levels
92
+ }
93
+
94
+ function formatTime(seconds: number): string {
95
+ const mins = Math.floor(seconds / 60)
96
+ const secs = Math.floor(seconds % 60)
97
+ return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`
98
+ }
99
+
100
+ const TimelineCanvas = memo(function TimelineCanvas({
101
+ words,
102
+ segments,
103
+ visibleStartTime,
104
+ visibleEndTime,
105
+ currentTime,
106
+ selectedWordIds,
107
+ onWordClick,
108
+ onBackgroundClick,
109
+ onTimeBarClick,
110
+ onSelectionComplete,
111
+ onWordTimingChange,
112
+ onWordsMove,
113
+ syncWordIndex,
114
+ isManualSyncing,
115
+ onScrollChange,
116
+ audioDuration,
117
+ zoomSeconds,
118
+ height = 200
119
+ }: TimelineCanvasProps) {
120
+ const canvasRef = useRef<HTMLCanvasElement>(null)
121
+ const containerRef = useRef<HTMLDivElement>(null)
122
+ const [canvasWidth, setCanvasWidth] = useState(800)
123
+ const animationFrameRef = useRef<number>()
124
+ const wordLevelsRef = useRef<Map<string, number>>(new Map())
125
+
126
+ // Drag state
127
+ const [dragMode, setDragMode] = useState<DragMode>('none')
128
+ const dragStartRef = useRef<{ x: number; y: number; time: number } | null>(null)
129
+ const dragWordIdRef = useRef<string | null>(null)
130
+ const dragOriginalTimesRef = useRef<Map<string, { start: number; end: number }>>(new Map())
131
+
132
+ // Selection rectangle
133
+ const [selectionRect, setSelectionRect] = useState<{
134
+ startX: number; startY: number; endX: number; endY: number
135
+ } | null>(null)
136
+
137
+ // Hover state for showing resize handle
138
+ const [hoveredWordId, setHoveredWordId] = useState<string | null>(null)
139
+ const [cursorStyle, setCursorStyle] = useState<string>('default')
140
+
141
+ // Update canvas width on resize
142
+ useEffect(() => {
143
+ const updateWidth = () => {
144
+ if (containerRef.current) {
145
+ setCanvasWidth(containerRef.current.clientWidth)
146
+ }
147
+ }
148
+
149
+ updateWidth()
150
+ const resizeObserver = new ResizeObserver(updateWidth)
151
+ if (containerRef.current) {
152
+ resizeObserver.observe(containerRef.current)
153
+ }
154
+
155
+ return () => resizeObserver.disconnect()
156
+ }, [])
157
+
158
+ // Calculate word levels when words or segments change
159
+ useEffect(() => {
160
+ wordLevelsRef.current = calculateWordLevels(words, segments)
161
+ }, [words, segments])
162
+
163
+ // Convert time to x position
164
+ const timeToX = useCallback((time: number): number => {
165
+ const duration = visibleEndTime - visibleStartTime
166
+ if (duration <= 0) return 0
167
+ return CANVAS_PADDING + ((time - visibleStartTime) / duration) * (canvasWidth - CANVAS_PADDING * 2)
168
+ }, [visibleStartTime, visibleEndTime, canvasWidth])
169
+
170
+ // Convert x position to time
171
+ const xToTime = useCallback((x: number): number => {
172
+ const duration = visibleEndTime - visibleStartTime
173
+ return visibleStartTime + ((x - CANVAS_PADDING) / (canvasWidth - CANVAS_PADDING * 2)) * duration
174
+ }, [visibleStartTime, visibleEndTime, canvasWidth])
175
+
176
+ // Get word bounds
177
+ const getWordBounds = useCallback((word: Word) => {
178
+ if (word.start_time === null || word.end_time === null) return null
179
+
180
+ const level = wordLevelsRef.current.get(word.id) || 0
181
+ const startX = timeToX(word.start_time)
182
+ const endX = timeToX(word.end_time)
183
+ const blockWidth = Math.max(endX - startX, 4)
184
+ const y = TIME_BAR_HEIGHT + CANVAS_PADDING + TEXT_ABOVE_BLOCK + level * WORD_LEVEL_SPACING
185
+
186
+ return { startX, endX, blockWidth, y, level }
187
+ }, [timeToX])
188
+
189
+ // Check if position is near resize handle
190
+ const isNearResizeHandlePos = useCallback((word: Word, x: number, y: number): boolean => {
191
+ const bounds = getWordBounds(word)
192
+ if (!bounds) return false
193
+
194
+ const handleX = bounds.startX + bounds.blockWidth - RESIZE_HANDLE_SIZE / 2
195
+ const handleY = bounds.y + WORD_BLOCK_HEIGHT / 2
196
+
197
+ return Math.abs(x - handleX) < RESIZE_HANDLE_HITAREA / 2 &&
198
+ Math.abs(y - handleY) < RESIZE_HANDLE_HITAREA / 2
199
+ }, [getWordBounds])
200
+
201
+ // Find word at position
202
+ const findWordAtPosition = useCallback((x: number, y: number): Word | null => {
203
+ for (const word of words) {
204
+ const bounds = getWordBounds(word)
205
+ if (!bounds) continue
206
+
207
+ if (x >= bounds.startX && x <= bounds.startX + bounds.blockWidth &&
208
+ y >= bounds.y && y <= bounds.y + WORD_BLOCK_HEIGHT) {
209
+ return word
210
+ }
211
+ }
212
+ return null
213
+ }, [words, getWordBounds])
214
+
215
+ // Find words in selection rectangle
216
+ const findWordsInRect = useCallback((rect: { startX: number; startY: number; endX: number; endY: number }): string[] => {
217
+ const rectLeft = Math.min(rect.startX, rect.endX)
218
+ const rectRight = Math.max(rect.startX, rect.endX)
219
+ const rectTop = Math.min(rect.startY, rect.endY)
220
+ const rectBottom = Math.max(rect.startY, rect.endY)
221
+
222
+ const selectedIds: string[] = []
223
+
224
+ for (const word of words) {
225
+ const bounds = getWordBounds(word)
226
+ if (!bounds) continue
227
+
228
+ if (bounds.startX + bounds.blockWidth >= rectLeft && bounds.startX <= rectRight &&
229
+ bounds.y + WORD_BLOCK_HEIGHT >= rectTop && bounds.y <= rectBottom) {
230
+ selectedIds.push(word.id)
231
+ }
232
+ }
233
+
234
+ return selectedIds
235
+ }, [words, getWordBounds])
236
+
237
+ // Draw the timeline
238
+ const draw = useCallback(() => {
239
+ const canvas = canvasRef.current
240
+ if (!canvas) return
241
+
242
+ const ctx = canvas.getContext('2d')
243
+ if (!ctx) return
244
+
245
+ const dpr = window.devicePixelRatio || 1
246
+ canvas.width = canvasWidth * dpr
247
+ canvas.height = height * dpr
248
+ ctx.scale(dpr, dpr)
249
+
250
+ // Clear canvas
251
+ ctx.fillStyle = TIMELINE_BG
252
+ ctx.fillRect(0, 0, canvasWidth, height)
253
+
254
+ // Draw time bar background
255
+ ctx.fillStyle = TIME_BAR_BG
256
+ ctx.fillRect(0, 0, canvasWidth, TIME_BAR_HEIGHT)
257
+
258
+ // Draw time markers
259
+ const duration = visibleEndTime - visibleStartTime
260
+ const secondsPerTick = duration > 15 ? 2 : duration > 8 ? 1 : 0.5
261
+ const startSecond = Math.ceil(visibleStartTime / secondsPerTick) * secondsPerTick
262
+
263
+ ctx.fillStyle = TIME_BAR_TEXT
264
+ ctx.font = '11px system-ui, -apple-system, sans-serif'
265
+ ctx.textAlign = 'center'
266
+
267
+ for (let t = startSecond; t <= visibleEndTime; t += secondsPerTick) {
268
+ const x = timeToX(t)
269
+
270
+ ctx.beginPath()
271
+ ctx.strokeStyle = '#999999'
272
+ ctx.lineWidth = 1
273
+ ctx.moveTo(x, TIME_BAR_HEIGHT - 6)
274
+ ctx.lineTo(x, TIME_BAR_HEIGHT)
275
+ ctx.stroke()
276
+
277
+ if (t % 1 === 0) {
278
+ ctx.fillText(formatTime(t), x, TIME_BAR_HEIGHT - 10)
279
+ }
280
+ }
281
+
282
+ ctx.beginPath()
283
+ ctx.strokeStyle = '#cccccc'
284
+ ctx.lineWidth = 1
285
+ ctx.moveTo(0, TIME_BAR_HEIGHT)
286
+ ctx.lineTo(canvasWidth, TIME_BAR_HEIGHT)
287
+ ctx.stroke()
288
+
289
+ const wordToSegment = buildWordToSegmentMap(segments)
290
+ const syncedWords = words.filter(w => w.start_time !== null && w.end_time !== null)
291
+
292
+ const currentWordId = syncedWords.find(w =>
293
+ currentTime >= w.start_time! && currentTime <= w.end_time!
294
+ )?.id || null
295
+
296
+ // First pass: draw all blocks
297
+ for (const word of syncedWords) {
298
+ const bounds = getWordBounds(word)
299
+ if (!bounds) continue
300
+
301
+ const isSelected = selectedWordIds.has(word.id)
302
+ const isCurrent = word.id === currentWordId
303
+ const isHovered = word.id === hoveredWordId
304
+
305
+ // Draw word block background
306
+ if (isSelected) {
307
+ ctx.fillStyle = WORD_BLOCK_SELECTED_COLOR
308
+ } else if (isCurrent) {
309
+ ctx.fillStyle = WORD_BLOCK_CURRENT_COLOR
310
+ } else {
311
+ ctx.fillStyle = WORD_BLOCK_COLOR
312
+ }
313
+ ctx.fillRect(bounds.startX, bounds.y, bounds.blockWidth, WORD_BLOCK_HEIGHT)
314
+
315
+ // Draw selection border
316
+ if (isSelected) {
317
+ ctx.strokeStyle = '#ffffff'
318
+ ctx.lineWidth = 2
319
+ ctx.strokeRect(bounds.startX, bounds.y, bounds.blockWidth, WORD_BLOCK_HEIGHT)
320
+
321
+ // Draw resize handle (white dot on right edge) for selected words when hovered
322
+ if (isHovered || selectedWordIds.size === 1) {
323
+ const handleX = bounds.startX + bounds.blockWidth - RESIZE_HANDLE_SIZE / 2
324
+ const handleY = bounds.y + WORD_BLOCK_HEIGHT / 2
325
+
326
+ ctx.beginPath()
327
+ ctx.fillStyle = '#ffffff'
328
+ ctx.arc(handleX, handleY, RESIZE_HANDLE_SIZE / 2, 0, Math.PI * 2)
329
+ ctx.fill()
330
+ ctx.strokeStyle = '#666666'
331
+ ctx.lineWidth = 1
332
+ ctx.stroke()
333
+ }
334
+ }
335
+ }
336
+
337
+ // Second pass: draw text
338
+ const wordsBySegment = new Map<number, Word[]>()
339
+ for (const word of syncedWords) {
340
+ const segIdx = wordToSegment.get(word.id)
341
+ if (segIdx !== undefined) {
342
+ if (!wordsBySegment.has(segIdx)) {
343
+ wordsBySegment.set(segIdx, [])
344
+ }
345
+ wordsBySegment.get(segIdx)!.push(word)
346
+ }
347
+ }
348
+
349
+ ctx.font = '11px system-ui, -apple-system, sans-serif'
350
+ ctx.textAlign = 'left'
351
+
352
+ for (const [, segmentWords] of wordsBySegment) {
353
+ const sortedWords = [...segmentWords].sort((a, b) =>
354
+ (a.start_time || 0) - (b.start_time || 0)
355
+ )
356
+
357
+ if (sortedWords.length === 0) continue
358
+
359
+ const level = wordLevelsRef.current.get(sortedWords[0].id) || 0
360
+ const textY = TIME_BAR_HEIGHT + CANVAS_PADDING + TEXT_ABOVE_BLOCK + level * WORD_LEVEL_SPACING - 3
361
+
362
+ let rightmostTextEnd = -Infinity
363
+
364
+ for (const word of sortedWords) {
365
+ const blockStartX = timeToX(word.start_time!)
366
+ const textWidth = ctx.measureText(word.text).width
367
+ const textStartX = Math.max(blockStartX, rightmostTextEnd + 3)
368
+
369
+ if (textStartX < canvasWidth - 10) {
370
+ const isCurrent = word.id === currentWordId
371
+ ctx.fillStyle = isCurrent ? WORD_TEXT_CURRENT_COLOR : '#333333'
372
+ ctx.fillText(word.text, textStartX, textY)
373
+ rightmostTextEnd = textStartX + textWidth
374
+ }
375
+ }
376
+ }
377
+
378
+ // Draw upcoming words during sync
379
+ if (isManualSyncing && syncWordIndex >= 0) {
380
+ const upcomingWords = words.slice(syncWordIndex).filter(w => w.start_time === null)
381
+ const playheadX = timeToX(currentTime)
382
+ let offsetX = playheadX + 10
383
+
384
+ ctx.font = '11px system-ui, -apple-system, sans-serif'
385
+
386
+ for (let i = 0; i < Math.min(upcomingWords.length, 12); i++) {
387
+ const word = upcomingWords[i]
388
+ const textWidth = ctx.measureText(word.text).width + 10
389
+
390
+ ctx.fillStyle = UPCOMING_WORD_BG
391
+ ctx.fillRect(offsetX, TIME_BAR_HEIGHT + CANVAS_PADDING + WORD_LEVEL_SPACING + 60, textWidth, 20)
392
+
393
+ ctx.fillStyle = UPCOMING_WORD_TEXT
394
+ ctx.textAlign = 'left'
395
+ ctx.fillText(word.text, offsetX + 5, TIME_BAR_HEIGHT + CANVAS_PADDING + WORD_LEVEL_SPACING + 74)
396
+
397
+ offsetX += textWidth + 3
398
+ if (offsetX > canvasWidth - 20) break
399
+ }
400
+ }
401
+
402
+ // Draw playhead
403
+ if (currentTime >= visibleStartTime && currentTime <= visibleEndTime) {
404
+ const playheadX = timeToX(currentTime)
405
+
406
+ ctx.beginPath()
407
+ ctx.fillStyle = PLAYHEAD_COLOR
408
+ ctx.strokeStyle = '#333333'
409
+ ctx.lineWidth = 1
410
+ ctx.moveTo(playheadX - 6, 2)
411
+ ctx.lineTo(playheadX + 6, 2)
412
+ ctx.lineTo(playheadX, TIME_BAR_HEIGHT - 4)
413
+ ctx.closePath()
414
+ ctx.fill()
415
+ ctx.stroke()
416
+
417
+ ctx.beginPath()
418
+ ctx.strokeStyle = PLAYHEAD_COLOR
419
+ ctx.lineWidth = 2
420
+ ctx.moveTo(playheadX, TIME_BAR_HEIGHT)
421
+ ctx.lineTo(playheadX, height)
422
+ ctx.stroke()
423
+
424
+ ctx.beginPath()
425
+ ctx.strokeStyle = 'rgba(0,0,0,0.4)'
426
+ ctx.lineWidth = 1
427
+ ctx.moveTo(playheadX + 1, TIME_BAR_HEIGHT)
428
+ ctx.lineTo(playheadX + 1, height)
429
+ ctx.stroke()
430
+ }
431
+
432
+ // Draw selection rectangle
433
+ if (selectionRect) {
434
+ ctx.fillStyle = 'rgba(25, 118, 210, 0.2)'
435
+ ctx.strokeStyle = 'rgba(25, 118, 210, 0.8)'
436
+ ctx.lineWidth = 1
437
+
438
+ const rectX = Math.min(selectionRect.startX, selectionRect.endX)
439
+ const rectY = Math.min(selectionRect.startY, selectionRect.endY)
440
+ const rectW = Math.abs(selectionRect.endX - selectionRect.startX)
441
+ const rectH = Math.abs(selectionRect.endY - selectionRect.startY)
442
+
443
+ ctx.fillRect(rectX, rectY, rectW, rectH)
444
+ ctx.strokeRect(rectX, rectY, rectW, rectH)
445
+ }
446
+ }, [
447
+ canvasWidth, height, visibleStartTime, visibleEndTime, currentTime,
448
+ words, segments, selectedWordIds, selectionRect, hoveredWordId,
449
+ syncWordIndex, isManualSyncing, timeToX, getWordBounds
450
+ ])
451
+
452
+ // Animation frame
453
+ useEffect(() => {
454
+ const animate = () => {
455
+ draw()
456
+ animationFrameRef.current = requestAnimationFrame(animate)
457
+ }
458
+ animate()
459
+
460
+ return () => {
461
+ if (animationFrameRef.current) {
462
+ cancelAnimationFrame(animationFrameRef.current)
463
+ }
464
+ }
465
+ }, [draw])
466
+
467
+ // Mouse handlers
468
+ const handleMouseDown = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
469
+ const rect = canvasRef.current?.getBoundingClientRect()
470
+ if (!rect) return
471
+
472
+ const x = e.clientX - rect.left
473
+ const y = e.clientY - rect.top
474
+ const time = xToTime(x)
475
+
476
+ // Time bar click
477
+ if (y < TIME_BAR_HEIGHT) {
478
+ onTimeBarClick(Math.max(0, time))
479
+ return
480
+ }
481
+
482
+ const clickedWord = findWordAtPosition(x, y)
483
+
484
+ if (clickedWord && selectedWordIds.has(clickedWord.id)) {
485
+ // Check if clicking on resize handle
486
+ if (isNearResizeHandlePos(clickedWord, x, y)) {
487
+ // Start resize
488
+ setDragMode('resize')
489
+ dragStartRef.current = { x, y, time }
490
+ dragWordIdRef.current = clickedWord.id
491
+ dragOriginalTimesRef.current = new Map([[clickedWord.id, {
492
+ start: clickedWord.start_time!,
493
+ end: clickedWord.end_time!
494
+ }]])
495
+ return
496
+ }
497
+
498
+ // Start move (for all selected words)
499
+ setDragMode('move')
500
+ dragStartRef.current = { x, y, time }
501
+ dragWordIdRef.current = clickedWord.id
502
+
503
+ // Store original times for all selected words
504
+ const originalTimes = new Map<string, { start: number; end: number }>()
505
+ for (const wordId of selectedWordIds) {
506
+ const word = words.find(w => w.id === wordId)
507
+ if (word && word.start_time !== null && word.end_time !== null) {
508
+ originalTimes.set(wordId, { start: word.start_time, end: word.end_time })
509
+ }
510
+ }
511
+ dragOriginalTimesRef.current = originalTimes
512
+ return
513
+ }
514
+
515
+ if (clickedWord) {
516
+ // Click on unselected word - select it
517
+ onWordClick(clickedWord.id, e)
518
+ return
519
+ }
520
+
521
+ // Background - start selection
522
+ setDragMode('selection')
523
+ dragStartRef.current = { x, y, time }
524
+ setSelectionRect({ startX: x, startY: y, endX: x, endY: y })
525
+ }, [xToTime, onTimeBarClick, findWordAtPosition, selectedWordIds, isNearResizeHandlePos, onWordClick, words])
526
+
527
+ const handleMouseMove = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
528
+ const rect = canvasRef.current?.getBoundingClientRect()
529
+ if (!rect) return
530
+
531
+ const x = e.clientX - rect.left
532
+ const y = e.clientY - rect.top
533
+ const time = xToTime(x)
534
+
535
+ // Update hover state and cursor
536
+ if (dragMode === 'none') {
537
+ const hoveredWord = findWordAtPosition(x, y)
538
+ setHoveredWordId(hoveredWord?.id || null)
539
+
540
+ if (hoveredWord && selectedWordIds.has(hoveredWord.id)) {
541
+ const nearHandle = isNearResizeHandlePos(hoveredWord, x, y)
542
+ setCursorStyle(nearHandle ? 'ew-resize' : 'grab')
543
+ } else if (hoveredWord) {
544
+ setCursorStyle('pointer')
545
+ } else if (y < TIME_BAR_HEIGHT) {
546
+ setCursorStyle('pointer')
547
+ } else {
548
+ setCursorStyle('default')
549
+ }
550
+ }
551
+
552
+ if (!dragStartRef.current) return
553
+
554
+ if (dragMode === 'selection') {
555
+ setSelectionRect({
556
+ startX: dragStartRef.current.x,
557
+ startY: dragStartRef.current.y,
558
+ endX: x,
559
+ endY: y
560
+ })
561
+ } else if (dragMode === 'resize' && dragWordIdRef.current) {
562
+ // Resize the word
563
+ const originalTimes = dragOriginalTimesRef.current.get(dragWordIdRef.current)
564
+ if (originalTimes) {
565
+ const deltaTime = time - dragStartRef.current.time
566
+ const newEndTime = Math.max(originalTimes.start + 0.05, originalTimes.end + deltaTime)
567
+ onWordTimingChange(dragWordIdRef.current, originalTimes.start, newEndTime)
568
+ }
569
+ setCursorStyle('ew-resize')
570
+ } else if (dragMode === 'move') {
571
+ // Move all selected words
572
+ const deltaTime = time - dragStartRef.current.time
573
+ const updates: Array<{ wordId: string; newStartTime: number; newEndTime: number }> = []
574
+
575
+ for (const [wordId, originalTimes] of dragOriginalTimesRef.current) {
576
+ // Ensure end time is always after start time (at least 0.05s duration)
577
+ const newStartTime = Math.max(0, originalTimes.start + deltaTime)
578
+ const newEndTime = Math.max(newStartTime + 0.05, originalTimes.end + deltaTime)
579
+ updates.push({
580
+ wordId,
581
+ newStartTime,
582
+ newEndTime
583
+ })
584
+ }
585
+
586
+ if (updates.length > 0) {
587
+ onWordsMove(updates)
588
+ }
589
+ setCursorStyle('grabbing')
590
+ }
591
+ }, [dragMode, xToTime, findWordAtPosition, selectedWordIds, isNearResizeHandlePos, onWordTimingChange, onWordsMove])
592
+
593
+ const handleMouseUp = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
594
+ const rect = canvasRef.current?.getBoundingClientRect()
595
+
596
+ if (dragMode === 'selection' && dragStartRef.current && rect) {
597
+ const endX = e.clientX - rect.left
598
+ const endY = e.clientY - rect.top
599
+
600
+ const dragDistance = Math.sqrt(
601
+ Math.pow(endX - dragStartRef.current.x, 2) +
602
+ Math.pow(endY - dragStartRef.current.y, 2)
603
+ )
604
+
605
+ if (dragDistance < 5) {
606
+ onBackgroundClick()
607
+ } else {
608
+ const finalRect = {
609
+ startX: dragStartRef.current.x,
610
+ startY: dragStartRef.current.y,
611
+ endX,
612
+ endY
613
+ }
614
+ const selectedIds = findWordsInRect(finalRect)
615
+ if (selectedIds.length > 0) {
616
+ onSelectionComplete(selectedIds)
617
+ }
618
+ }
619
+ }
620
+
621
+ // Reset drag state
622
+ setDragMode('none')
623
+ dragStartRef.current = null
624
+ dragWordIdRef.current = null
625
+ dragOriginalTimesRef.current = new Map()
626
+ setSelectionRect(null)
627
+ setCursorStyle('default')
628
+ }, [dragMode, onBackgroundClick, findWordsInRect, onSelectionComplete])
629
+
630
+ // Wheel handler
631
+ const handleWheel = useCallback((e: React.WheelEvent<HTMLCanvasElement>) => {
632
+ const delta = e.deltaX !== 0 ? e.deltaX : e.deltaY
633
+ const scrollAmount = (delta / 100) * (zoomSeconds / 4)
634
+ let newStart = Math.max(0, Math.min(audioDuration - zoomSeconds, visibleStartTime + scrollAmount))
635
+
636
+ if (newStart !== visibleStartTime) {
637
+ onScrollChange(newStart)
638
+ }
639
+ }, [visibleStartTime, zoomSeconds, audioDuration, onScrollChange])
640
+
641
+ const handleScrollLeft = useCallback(() => {
642
+ const newStart = Math.max(0, visibleStartTime - zoomSeconds * 0.25)
643
+ onScrollChange(newStart)
644
+ }, [visibleStartTime, zoomSeconds, onScrollChange])
645
+
646
+ const handleScrollRight = useCallback(() => {
647
+ const newStart = Math.min(audioDuration - zoomSeconds, visibleStartTime + zoomSeconds * 0.25)
648
+ onScrollChange(Math.max(0, newStart))
649
+ }, [visibleStartTime, zoomSeconds, audioDuration, onScrollChange])
650
+
651
+ return (
652
+ <Box sx={{ display: 'flex', flexDirection: 'column', gap: 0.5 }}>
653
+ <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
654
+ <Tooltip title="Scroll Left">
655
+ <IconButton
656
+ size="small"
657
+ onClick={handleScrollLeft}
658
+ disabled={visibleStartTime <= 0}
659
+ >
660
+ <ArrowBackIcon fontSize="small" />
661
+ </IconButton>
662
+ </Tooltip>
663
+
664
+ <Box
665
+ ref={containerRef}
666
+ sx={{
667
+ flexGrow: 1,
668
+ height,
669
+ cursor: cursorStyle,
670
+ borderRadius: 1,
671
+ overflow: 'hidden'
672
+ }}
673
+ >
674
+ <canvas
675
+ ref={canvasRef}
676
+ style={{
677
+ width: '100%',
678
+ height: '100%',
679
+ display: 'block',
680
+ cursor: cursorStyle
681
+ }}
682
+ onMouseDown={handleMouseDown}
683
+ onMouseMove={handleMouseMove}
684
+ onMouseUp={handleMouseUp}
685
+ onMouseLeave={handleMouseUp}
686
+ onWheel={handleWheel}
687
+ />
688
+ </Box>
689
+
690
+ <Tooltip title="Scroll Right">
691
+ <IconButton
692
+ size="small"
693
+ onClick={handleScrollRight}
694
+ disabled={visibleStartTime >= audioDuration - zoomSeconds}
695
+ >
696
+ <ArrowForwardIcon fontSize="small" />
697
+ </IconButton>
698
+ </Tooltip>
699
+ </Box>
700
+ </Box>
701
+ )
702
+ })
703
+
704
+ export default TimelineCanvas