karaoke-gen 0.71.42__py3-none-any.whl → 0.75.53__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- karaoke_gen/__init__.py +32 -1
- karaoke_gen/audio_fetcher.py +1220 -67
- karaoke_gen/audio_processor.py +15 -3
- karaoke_gen/instrumental_review/server.py +154 -860
- karaoke_gen/instrumental_review/static/index.html +1529 -0
- karaoke_gen/karaoke_finalise/karaoke_finalise.py +87 -2
- karaoke_gen/karaoke_gen.py +131 -14
- karaoke_gen/lyrics_processor.py +172 -4
- karaoke_gen/utils/bulk_cli.py +3 -0
- karaoke_gen/utils/cli_args.py +7 -4
- karaoke_gen/utils/gen_cli.py +221 -5
- karaoke_gen/utils/remote_cli.py +786 -43
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/METADATA +109 -4
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/RECORD +37 -31
- lyrics_transcriber/core/controller.py +76 -2
- lyrics_transcriber/frontend/package.json +1 -1
- lyrics_transcriber/frontend/src/App.tsx +6 -4
- lyrics_transcriber/frontend/src/api.ts +25 -10
- lyrics_transcriber/frontend/src/components/Header.tsx +38 -12
- lyrics_transcriber/frontend/src/components/LyricsAnalyzer.tsx +17 -3
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/SyncControls.tsx +185 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/TimelineCanvas.tsx +704 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/UpcomingWordsBar.tsx +80 -0
- lyrics_transcriber/frontend/src/components/LyricsSynchronizer/index.tsx +905 -0
- lyrics_transcriber/frontend/src/components/ModeSelectionModal.tsx +127 -0
- lyrics_transcriber/frontend/src/components/ReplaceAllLyricsModal.tsx +190 -542
- lyrics_transcriber/frontend/tsconfig.tsbuildinfo +1 -1
- lyrics_transcriber/frontend/web_assets/assets/{index-DdJTDWH3.js → index-BECn1o8Q.js} +1802 -553
- lyrics_transcriber/frontend/web_assets/assets/index-BECn1o8Q.js.map +1 -0
- lyrics_transcriber/frontend/web_assets/index.html +1 -1
- lyrics_transcriber/output/countdown_processor.py +39 -0
- lyrics_transcriber/review/server.py +5 -5
- lyrics_transcriber/transcribers/audioshake.py +96 -7
- lyrics_transcriber/types.py +14 -12
- lyrics_transcriber/frontend/web_assets/assets/index-DdJTDWH3.js.map +0 -1
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/WHEEL +0 -0
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/entry_points.txt +0 -0
- {karaoke_gen-0.71.42.dist-info → karaoke_gen-0.75.53.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
import { useRef, useEffect, useCallback, useState, memo } from 'react'
|
|
2
|
+
import { Box, IconButton, Tooltip } from '@mui/material'
|
|
3
|
+
import ArrowBackIcon from '@mui/icons-material/ArrowBack'
|
|
4
|
+
import ArrowForwardIcon from '@mui/icons-material/ArrowForward'
|
|
5
|
+
import { Word, LyricsSegment } from '../../types'
|
|
6
|
+
|
|
7
|
+
interface TimelineCanvasProps {
|
|
8
|
+
words: Word[]
|
|
9
|
+
segments: LyricsSegment[]
|
|
10
|
+
visibleStartTime: number
|
|
11
|
+
visibleEndTime: number
|
|
12
|
+
currentTime: number
|
|
13
|
+
selectedWordIds: Set<string>
|
|
14
|
+
onWordClick: (wordId: string, event: React.MouseEvent) => void
|
|
15
|
+
onBackgroundClick: () => void
|
|
16
|
+
onTimeBarClick: (time: number) => void
|
|
17
|
+
onSelectionComplete: (wordIds: string[]) => void
|
|
18
|
+
onWordTimingChange: (wordId: string, newStartTime: number, newEndTime: number) => void
|
|
19
|
+
onWordsMove: (updates: Array<{ wordId: string; newStartTime: number; newEndTime: number }>) => void
|
|
20
|
+
syncWordIndex: number
|
|
21
|
+
isManualSyncing: boolean
|
|
22
|
+
onScrollChange: (newStartTime: number) => void
|
|
23
|
+
audioDuration: number
|
|
24
|
+
zoomSeconds: number
|
|
25
|
+
height?: number
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Constants for rendering
|
|
29
|
+
const TIME_BAR_HEIGHT = 28
|
|
30
|
+
const WORD_BLOCK_HEIGHT = 24
|
|
31
|
+
const WORD_LEVEL_SPACING = 50
|
|
32
|
+
const CANVAS_PADDING = 8
|
|
33
|
+
const TEXT_ABOVE_BLOCK = 14
|
|
34
|
+
const RESIZE_HANDLE_SIZE = 8
|
|
35
|
+
const RESIZE_HANDLE_HITAREA = 12
|
|
36
|
+
const PLAYHEAD_COLOR = '#ffffff'
|
|
37
|
+
const WORD_BLOCK_COLOR = '#d32f2f'
|
|
38
|
+
const WORD_BLOCK_SELECTED_COLOR = '#b71c1c'
|
|
39
|
+
const WORD_BLOCK_CURRENT_COLOR = '#f44336'
|
|
40
|
+
const WORD_TEXT_CURRENT_COLOR = '#d32f2f'
|
|
41
|
+
const UPCOMING_WORD_BG = '#fff9c4'
|
|
42
|
+
const UPCOMING_WORD_TEXT = '#000000'
|
|
43
|
+
const TIME_BAR_BG = '#f5f5f5'
|
|
44
|
+
const TIME_BAR_TEXT = '#666666'
|
|
45
|
+
const TIMELINE_BG = '#e0e0e0'
|
|
46
|
+
|
|
47
|
+
// Drag modes
|
|
48
|
+
type DragMode = 'none' | 'selection' | 'resize' | 'move'
|
|
49
|
+
|
|
50
|
+
// Build a map of word ID to segment index
|
|
51
|
+
function buildWordToSegmentMap(segments: LyricsSegment[]): Map<string, number> {
|
|
52
|
+
const map = new Map<string, number>()
|
|
53
|
+
segments.forEach((segment, idx) => {
|
|
54
|
+
segment.words.forEach(word => {
|
|
55
|
+
map.set(word.id, idx)
|
|
56
|
+
})
|
|
57
|
+
})
|
|
58
|
+
return map
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Calculate which vertical level a word should be on
|
|
62
|
+
function calculateWordLevels(words: Word[], segments: LyricsSegment[]): Map<string, number> {
|
|
63
|
+
const levels = new Map<string, number>()
|
|
64
|
+
const wordToSegment = buildWordToSegmentMap(segments)
|
|
65
|
+
|
|
66
|
+
const segmentsWithTiming = segments
|
|
67
|
+
.map((segment, idx) => {
|
|
68
|
+
const timedWords = segment.words.filter(w => w.start_time !== null)
|
|
69
|
+
const minStart = timedWords.length > 0
|
|
70
|
+
? Math.min(...timedWords.map(w => w.start_time!))
|
|
71
|
+
: Infinity
|
|
72
|
+
return { idx, minStart }
|
|
73
|
+
})
|
|
74
|
+
.filter(s => s.minStart !== Infinity)
|
|
75
|
+
.sort((a, b) => a.minStart - b.minStart)
|
|
76
|
+
|
|
77
|
+
const segmentLevels = new Map<number, number>()
|
|
78
|
+
segmentsWithTiming.forEach(({ idx }, orderIndex) => {
|
|
79
|
+
segmentLevels.set(idx, orderIndex % 2)
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
for (const word of words) {
|
|
83
|
+
const segmentIdx = wordToSegment.get(word.id)
|
|
84
|
+
if (segmentIdx !== undefined && segmentLevels.has(segmentIdx)) {
|
|
85
|
+
levels.set(word.id, segmentLevels.get(segmentIdx)!)
|
|
86
|
+
} else {
|
|
87
|
+
levels.set(word.id, 0)
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return levels
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function formatTime(seconds: number): string {
|
|
95
|
+
const mins = Math.floor(seconds / 60)
|
|
96
|
+
const secs = Math.floor(seconds % 60)
|
|
97
|
+
return `${mins.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const TimelineCanvas = memo(function TimelineCanvas({
|
|
101
|
+
words,
|
|
102
|
+
segments,
|
|
103
|
+
visibleStartTime,
|
|
104
|
+
visibleEndTime,
|
|
105
|
+
currentTime,
|
|
106
|
+
selectedWordIds,
|
|
107
|
+
onWordClick,
|
|
108
|
+
onBackgroundClick,
|
|
109
|
+
onTimeBarClick,
|
|
110
|
+
onSelectionComplete,
|
|
111
|
+
onWordTimingChange,
|
|
112
|
+
onWordsMove,
|
|
113
|
+
syncWordIndex,
|
|
114
|
+
isManualSyncing,
|
|
115
|
+
onScrollChange,
|
|
116
|
+
audioDuration,
|
|
117
|
+
zoomSeconds,
|
|
118
|
+
height = 200
|
|
119
|
+
}: TimelineCanvasProps) {
|
|
120
|
+
const canvasRef = useRef<HTMLCanvasElement>(null)
|
|
121
|
+
const containerRef = useRef<HTMLDivElement>(null)
|
|
122
|
+
const [canvasWidth, setCanvasWidth] = useState(800)
|
|
123
|
+
const animationFrameRef = useRef<number>()
|
|
124
|
+
const wordLevelsRef = useRef<Map<string, number>>(new Map())
|
|
125
|
+
|
|
126
|
+
// Drag state
|
|
127
|
+
const [dragMode, setDragMode] = useState<DragMode>('none')
|
|
128
|
+
const dragStartRef = useRef<{ x: number; y: number; time: number } | null>(null)
|
|
129
|
+
const dragWordIdRef = useRef<string | null>(null)
|
|
130
|
+
const dragOriginalTimesRef = useRef<Map<string, { start: number; end: number }>>(new Map())
|
|
131
|
+
|
|
132
|
+
// Selection rectangle
|
|
133
|
+
const [selectionRect, setSelectionRect] = useState<{
|
|
134
|
+
startX: number; startY: number; endX: number; endY: number
|
|
135
|
+
} | null>(null)
|
|
136
|
+
|
|
137
|
+
// Hover state for showing resize handle
|
|
138
|
+
const [hoveredWordId, setHoveredWordId] = useState<string | null>(null)
|
|
139
|
+
const [cursorStyle, setCursorStyle] = useState<string>('default')
|
|
140
|
+
|
|
141
|
+
// Update canvas width on resize
|
|
142
|
+
useEffect(() => {
|
|
143
|
+
const updateWidth = () => {
|
|
144
|
+
if (containerRef.current) {
|
|
145
|
+
setCanvasWidth(containerRef.current.clientWidth)
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
updateWidth()
|
|
150
|
+
const resizeObserver = new ResizeObserver(updateWidth)
|
|
151
|
+
if (containerRef.current) {
|
|
152
|
+
resizeObserver.observe(containerRef.current)
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return () => resizeObserver.disconnect()
|
|
156
|
+
}, [])
|
|
157
|
+
|
|
158
|
+
// Calculate word levels when words or segments change
|
|
159
|
+
useEffect(() => {
|
|
160
|
+
wordLevelsRef.current = calculateWordLevels(words, segments)
|
|
161
|
+
}, [words, segments])
|
|
162
|
+
|
|
163
|
+
// Convert time to x position
|
|
164
|
+
const timeToX = useCallback((time: number): number => {
|
|
165
|
+
const duration = visibleEndTime - visibleStartTime
|
|
166
|
+
if (duration <= 0) return 0
|
|
167
|
+
return CANVAS_PADDING + ((time - visibleStartTime) / duration) * (canvasWidth - CANVAS_PADDING * 2)
|
|
168
|
+
}, [visibleStartTime, visibleEndTime, canvasWidth])
|
|
169
|
+
|
|
170
|
+
// Convert x position to time
|
|
171
|
+
const xToTime = useCallback((x: number): number => {
|
|
172
|
+
const duration = visibleEndTime - visibleStartTime
|
|
173
|
+
return visibleStartTime + ((x - CANVAS_PADDING) / (canvasWidth - CANVAS_PADDING * 2)) * duration
|
|
174
|
+
}, [visibleStartTime, visibleEndTime, canvasWidth])
|
|
175
|
+
|
|
176
|
+
// Get word bounds
|
|
177
|
+
const getWordBounds = useCallback((word: Word) => {
|
|
178
|
+
if (word.start_time === null || word.end_time === null) return null
|
|
179
|
+
|
|
180
|
+
const level = wordLevelsRef.current.get(word.id) || 0
|
|
181
|
+
const startX = timeToX(word.start_time)
|
|
182
|
+
const endX = timeToX(word.end_time)
|
|
183
|
+
const blockWidth = Math.max(endX - startX, 4)
|
|
184
|
+
const y = TIME_BAR_HEIGHT + CANVAS_PADDING + TEXT_ABOVE_BLOCK + level * WORD_LEVEL_SPACING
|
|
185
|
+
|
|
186
|
+
return { startX, endX, blockWidth, y, level }
|
|
187
|
+
}, [timeToX])
|
|
188
|
+
|
|
189
|
+
// Check if position is near resize handle
|
|
190
|
+
const isNearResizeHandlePos = useCallback((word: Word, x: number, y: number): boolean => {
|
|
191
|
+
const bounds = getWordBounds(word)
|
|
192
|
+
if (!bounds) return false
|
|
193
|
+
|
|
194
|
+
const handleX = bounds.startX + bounds.blockWidth - RESIZE_HANDLE_SIZE / 2
|
|
195
|
+
const handleY = bounds.y + WORD_BLOCK_HEIGHT / 2
|
|
196
|
+
|
|
197
|
+
return Math.abs(x - handleX) < RESIZE_HANDLE_HITAREA / 2 &&
|
|
198
|
+
Math.abs(y - handleY) < RESIZE_HANDLE_HITAREA / 2
|
|
199
|
+
}, [getWordBounds])
|
|
200
|
+
|
|
201
|
+
// Find word at position
|
|
202
|
+
const findWordAtPosition = useCallback((x: number, y: number): Word | null => {
|
|
203
|
+
for (const word of words) {
|
|
204
|
+
const bounds = getWordBounds(word)
|
|
205
|
+
if (!bounds) continue
|
|
206
|
+
|
|
207
|
+
if (x >= bounds.startX && x <= bounds.startX + bounds.blockWidth &&
|
|
208
|
+
y >= bounds.y && y <= bounds.y + WORD_BLOCK_HEIGHT) {
|
|
209
|
+
return word
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return null
|
|
213
|
+
}, [words, getWordBounds])
|
|
214
|
+
|
|
215
|
+
// Find words in selection rectangle
|
|
216
|
+
const findWordsInRect = useCallback((rect: { startX: number; startY: number; endX: number; endY: number }): string[] => {
|
|
217
|
+
const rectLeft = Math.min(rect.startX, rect.endX)
|
|
218
|
+
const rectRight = Math.max(rect.startX, rect.endX)
|
|
219
|
+
const rectTop = Math.min(rect.startY, rect.endY)
|
|
220
|
+
const rectBottom = Math.max(rect.startY, rect.endY)
|
|
221
|
+
|
|
222
|
+
const selectedIds: string[] = []
|
|
223
|
+
|
|
224
|
+
for (const word of words) {
|
|
225
|
+
const bounds = getWordBounds(word)
|
|
226
|
+
if (!bounds) continue
|
|
227
|
+
|
|
228
|
+
if (bounds.startX + bounds.blockWidth >= rectLeft && bounds.startX <= rectRight &&
|
|
229
|
+
bounds.y + WORD_BLOCK_HEIGHT >= rectTop && bounds.y <= rectBottom) {
|
|
230
|
+
selectedIds.push(word.id)
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
return selectedIds
|
|
235
|
+
}, [words, getWordBounds])
|
|
236
|
+
|
|
237
|
+
// Draw the timeline
|
|
238
|
+
const draw = useCallback(() => {
|
|
239
|
+
const canvas = canvasRef.current
|
|
240
|
+
if (!canvas) return
|
|
241
|
+
|
|
242
|
+
const ctx = canvas.getContext('2d')
|
|
243
|
+
if (!ctx) return
|
|
244
|
+
|
|
245
|
+
const dpr = window.devicePixelRatio || 1
|
|
246
|
+
canvas.width = canvasWidth * dpr
|
|
247
|
+
canvas.height = height * dpr
|
|
248
|
+
ctx.scale(dpr, dpr)
|
|
249
|
+
|
|
250
|
+
// Clear canvas
|
|
251
|
+
ctx.fillStyle = TIMELINE_BG
|
|
252
|
+
ctx.fillRect(0, 0, canvasWidth, height)
|
|
253
|
+
|
|
254
|
+
// Draw time bar background
|
|
255
|
+
ctx.fillStyle = TIME_BAR_BG
|
|
256
|
+
ctx.fillRect(0, 0, canvasWidth, TIME_BAR_HEIGHT)
|
|
257
|
+
|
|
258
|
+
// Draw time markers
|
|
259
|
+
const duration = visibleEndTime - visibleStartTime
|
|
260
|
+
const secondsPerTick = duration > 15 ? 2 : duration > 8 ? 1 : 0.5
|
|
261
|
+
const startSecond = Math.ceil(visibleStartTime / secondsPerTick) * secondsPerTick
|
|
262
|
+
|
|
263
|
+
ctx.fillStyle = TIME_BAR_TEXT
|
|
264
|
+
ctx.font = '11px system-ui, -apple-system, sans-serif'
|
|
265
|
+
ctx.textAlign = 'center'
|
|
266
|
+
|
|
267
|
+
for (let t = startSecond; t <= visibleEndTime; t += secondsPerTick) {
|
|
268
|
+
const x = timeToX(t)
|
|
269
|
+
|
|
270
|
+
ctx.beginPath()
|
|
271
|
+
ctx.strokeStyle = '#999999'
|
|
272
|
+
ctx.lineWidth = 1
|
|
273
|
+
ctx.moveTo(x, TIME_BAR_HEIGHT - 6)
|
|
274
|
+
ctx.lineTo(x, TIME_BAR_HEIGHT)
|
|
275
|
+
ctx.stroke()
|
|
276
|
+
|
|
277
|
+
if (t % 1 === 0) {
|
|
278
|
+
ctx.fillText(formatTime(t), x, TIME_BAR_HEIGHT - 10)
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
ctx.beginPath()
|
|
283
|
+
ctx.strokeStyle = '#cccccc'
|
|
284
|
+
ctx.lineWidth = 1
|
|
285
|
+
ctx.moveTo(0, TIME_BAR_HEIGHT)
|
|
286
|
+
ctx.lineTo(canvasWidth, TIME_BAR_HEIGHT)
|
|
287
|
+
ctx.stroke()
|
|
288
|
+
|
|
289
|
+
const wordToSegment = buildWordToSegmentMap(segments)
|
|
290
|
+
const syncedWords = words.filter(w => w.start_time !== null && w.end_time !== null)
|
|
291
|
+
|
|
292
|
+
const currentWordId = syncedWords.find(w =>
|
|
293
|
+
currentTime >= w.start_time! && currentTime <= w.end_time!
|
|
294
|
+
)?.id || null
|
|
295
|
+
|
|
296
|
+
// First pass: draw all blocks
|
|
297
|
+
for (const word of syncedWords) {
|
|
298
|
+
const bounds = getWordBounds(word)
|
|
299
|
+
if (!bounds) continue
|
|
300
|
+
|
|
301
|
+
const isSelected = selectedWordIds.has(word.id)
|
|
302
|
+
const isCurrent = word.id === currentWordId
|
|
303
|
+
const isHovered = word.id === hoveredWordId
|
|
304
|
+
|
|
305
|
+
// Draw word block background
|
|
306
|
+
if (isSelected) {
|
|
307
|
+
ctx.fillStyle = WORD_BLOCK_SELECTED_COLOR
|
|
308
|
+
} else if (isCurrent) {
|
|
309
|
+
ctx.fillStyle = WORD_BLOCK_CURRENT_COLOR
|
|
310
|
+
} else {
|
|
311
|
+
ctx.fillStyle = WORD_BLOCK_COLOR
|
|
312
|
+
}
|
|
313
|
+
ctx.fillRect(bounds.startX, bounds.y, bounds.blockWidth, WORD_BLOCK_HEIGHT)
|
|
314
|
+
|
|
315
|
+
// Draw selection border
|
|
316
|
+
if (isSelected) {
|
|
317
|
+
ctx.strokeStyle = '#ffffff'
|
|
318
|
+
ctx.lineWidth = 2
|
|
319
|
+
ctx.strokeRect(bounds.startX, bounds.y, bounds.blockWidth, WORD_BLOCK_HEIGHT)
|
|
320
|
+
|
|
321
|
+
// Draw resize handle (white dot on right edge) for selected words when hovered
|
|
322
|
+
if (isHovered || selectedWordIds.size === 1) {
|
|
323
|
+
const handleX = bounds.startX + bounds.blockWidth - RESIZE_HANDLE_SIZE / 2
|
|
324
|
+
const handleY = bounds.y + WORD_BLOCK_HEIGHT / 2
|
|
325
|
+
|
|
326
|
+
ctx.beginPath()
|
|
327
|
+
ctx.fillStyle = '#ffffff'
|
|
328
|
+
ctx.arc(handleX, handleY, RESIZE_HANDLE_SIZE / 2, 0, Math.PI * 2)
|
|
329
|
+
ctx.fill()
|
|
330
|
+
ctx.strokeStyle = '#666666'
|
|
331
|
+
ctx.lineWidth = 1
|
|
332
|
+
ctx.stroke()
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// Second pass: draw text
|
|
338
|
+
const wordsBySegment = new Map<number, Word[]>()
|
|
339
|
+
for (const word of syncedWords) {
|
|
340
|
+
const segIdx = wordToSegment.get(word.id)
|
|
341
|
+
if (segIdx !== undefined) {
|
|
342
|
+
if (!wordsBySegment.has(segIdx)) {
|
|
343
|
+
wordsBySegment.set(segIdx, [])
|
|
344
|
+
}
|
|
345
|
+
wordsBySegment.get(segIdx)!.push(word)
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
ctx.font = '11px system-ui, -apple-system, sans-serif'
|
|
350
|
+
ctx.textAlign = 'left'
|
|
351
|
+
|
|
352
|
+
for (const [, segmentWords] of wordsBySegment) {
|
|
353
|
+
const sortedWords = [...segmentWords].sort((a, b) =>
|
|
354
|
+
(a.start_time || 0) - (b.start_time || 0)
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
if (sortedWords.length === 0) continue
|
|
358
|
+
|
|
359
|
+
const level = wordLevelsRef.current.get(sortedWords[0].id) || 0
|
|
360
|
+
const textY = TIME_BAR_HEIGHT + CANVAS_PADDING + TEXT_ABOVE_BLOCK + level * WORD_LEVEL_SPACING - 3
|
|
361
|
+
|
|
362
|
+
let rightmostTextEnd = -Infinity
|
|
363
|
+
|
|
364
|
+
for (const word of sortedWords) {
|
|
365
|
+
const blockStartX = timeToX(word.start_time!)
|
|
366
|
+
const textWidth = ctx.measureText(word.text).width
|
|
367
|
+
const textStartX = Math.max(blockStartX, rightmostTextEnd + 3)
|
|
368
|
+
|
|
369
|
+
if (textStartX < canvasWidth - 10) {
|
|
370
|
+
const isCurrent = word.id === currentWordId
|
|
371
|
+
ctx.fillStyle = isCurrent ? WORD_TEXT_CURRENT_COLOR : '#333333'
|
|
372
|
+
ctx.fillText(word.text, textStartX, textY)
|
|
373
|
+
rightmostTextEnd = textStartX + textWidth
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// Draw upcoming words during sync
|
|
379
|
+
if (isManualSyncing && syncWordIndex >= 0) {
|
|
380
|
+
const upcomingWords = words.slice(syncWordIndex).filter(w => w.start_time === null)
|
|
381
|
+
const playheadX = timeToX(currentTime)
|
|
382
|
+
let offsetX = playheadX + 10
|
|
383
|
+
|
|
384
|
+
ctx.font = '11px system-ui, -apple-system, sans-serif'
|
|
385
|
+
|
|
386
|
+
for (let i = 0; i < Math.min(upcomingWords.length, 12); i++) {
|
|
387
|
+
const word = upcomingWords[i]
|
|
388
|
+
const textWidth = ctx.measureText(word.text).width + 10
|
|
389
|
+
|
|
390
|
+
ctx.fillStyle = UPCOMING_WORD_BG
|
|
391
|
+
ctx.fillRect(offsetX, TIME_BAR_HEIGHT + CANVAS_PADDING + WORD_LEVEL_SPACING + 60, textWidth, 20)
|
|
392
|
+
|
|
393
|
+
ctx.fillStyle = UPCOMING_WORD_TEXT
|
|
394
|
+
ctx.textAlign = 'left'
|
|
395
|
+
ctx.fillText(word.text, offsetX + 5, TIME_BAR_HEIGHT + CANVAS_PADDING + WORD_LEVEL_SPACING + 74)
|
|
396
|
+
|
|
397
|
+
offsetX += textWidth + 3
|
|
398
|
+
if (offsetX > canvasWidth - 20) break
|
|
399
|
+
}
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
// Draw playhead
|
|
403
|
+
if (currentTime >= visibleStartTime && currentTime <= visibleEndTime) {
|
|
404
|
+
const playheadX = timeToX(currentTime)
|
|
405
|
+
|
|
406
|
+
ctx.beginPath()
|
|
407
|
+
ctx.fillStyle = PLAYHEAD_COLOR
|
|
408
|
+
ctx.strokeStyle = '#333333'
|
|
409
|
+
ctx.lineWidth = 1
|
|
410
|
+
ctx.moveTo(playheadX - 6, 2)
|
|
411
|
+
ctx.lineTo(playheadX + 6, 2)
|
|
412
|
+
ctx.lineTo(playheadX, TIME_BAR_HEIGHT - 4)
|
|
413
|
+
ctx.closePath()
|
|
414
|
+
ctx.fill()
|
|
415
|
+
ctx.stroke()
|
|
416
|
+
|
|
417
|
+
ctx.beginPath()
|
|
418
|
+
ctx.strokeStyle = PLAYHEAD_COLOR
|
|
419
|
+
ctx.lineWidth = 2
|
|
420
|
+
ctx.moveTo(playheadX, TIME_BAR_HEIGHT)
|
|
421
|
+
ctx.lineTo(playheadX, height)
|
|
422
|
+
ctx.stroke()
|
|
423
|
+
|
|
424
|
+
ctx.beginPath()
|
|
425
|
+
ctx.strokeStyle = 'rgba(0,0,0,0.4)'
|
|
426
|
+
ctx.lineWidth = 1
|
|
427
|
+
ctx.moveTo(playheadX + 1, TIME_BAR_HEIGHT)
|
|
428
|
+
ctx.lineTo(playheadX + 1, height)
|
|
429
|
+
ctx.stroke()
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
// Draw selection rectangle
|
|
433
|
+
if (selectionRect) {
|
|
434
|
+
ctx.fillStyle = 'rgba(25, 118, 210, 0.2)'
|
|
435
|
+
ctx.strokeStyle = 'rgba(25, 118, 210, 0.8)'
|
|
436
|
+
ctx.lineWidth = 1
|
|
437
|
+
|
|
438
|
+
const rectX = Math.min(selectionRect.startX, selectionRect.endX)
|
|
439
|
+
const rectY = Math.min(selectionRect.startY, selectionRect.endY)
|
|
440
|
+
const rectW = Math.abs(selectionRect.endX - selectionRect.startX)
|
|
441
|
+
const rectH = Math.abs(selectionRect.endY - selectionRect.startY)
|
|
442
|
+
|
|
443
|
+
ctx.fillRect(rectX, rectY, rectW, rectH)
|
|
444
|
+
ctx.strokeRect(rectX, rectY, rectW, rectH)
|
|
445
|
+
}
|
|
446
|
+
}, [
|
|
447
|
+
canvasWidth, height, visibleStartTime, visibleEndTime, currentTime,
|
|
448
|
+
words, segments, selectedWordIds, selectionRect, hoveredWordId,
|
|
449
|
+
syncWordIndex, isManualSyncing, timeToX, getWordBounds
|
|
450
|
+
])
|
|
451
|
+
|
|
452
|
+
// Animation frame
|
|
453
|
+
useEffect(() => {
|
|
454
|
+
const animate = () => {
|
|
455
|
+
draw()
|
|
456
|
+
animationFrameRef.current = requestAnimationFrame(animate)
|
|
457
|
+
}
|
|
458
|
+
animate()
|
|
459
|
+
|
|
460
|
+
return () => {
|
|
461
|
+
if (animationFrameRef.current) {
|
|
462
|
+
cancelAnimationFrame(animationFrameRef.current)
|
|
463
|
+
}
|
|
464
|
+
}
|
|
465
|
+
}, [draw])
|
|
466
|
+
|
|
467
|
+
// Mouse handlers
|
|
468
|
+
const handleMouseDown = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
|
|
469
|
+
const rect = canvasRef.current?.getBoundingClientRect()
|
|
470
|
+
if (!rect) return
|
|
471
|
+
|
|
472
|
+
const x = e.clientX - rect.left
|
|
473
|
+
const y = e.clientY - rect.top
|
|
474
|
+
const time = xToTime(x)
|
|
475
|
+
|
|
476
|
+
// Time bar click
|
|
477
|
+
if (y < TIME_BAR_HEIGHT) {
|
|
478
|
+
onTimeBarClick(Math.max(0, time))
|
|
479
|
+
return
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const clickedWord = findWordAtPosition(x, y)
|
|
483
|
+
|
|
484
|
+
if (clickedWord && selectedWordIds.has(clickedWord.id)) {
|
|
485
|
+
// Check if clicking on resize handle
|
|
486
|
+
if (isNearResizeHandlePos(clickedWord, x, y)) {
|
|
487
|
+
// Start resize
|
|
488
|
+
setDragMode('resize')
|
|
489
|
+
dragStartRef.current = { x, y, time }
|
|
490
|
+
dragWordIdRef.current = clickedWord.id
|
|
491
|
+
dragOriginalTimesRef.current = new Map([[clickedWord.id, {
|
|
492
|
+
start: clickedWord.start_time!,
|
|
493
|
+
end: clickedWord.end_time!
|
|
494
|
+
}]])
|
|
495
|
+
return
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
// Start move (for all selected words)
|
|
499
|
+
setDragMode('move')
|
|
500
|
+
dragStartRef.current = { x, y, time }
|
|
501
|
+
dragWordIdRef.current = clickedWord.id
|
|
502
|
+
|
|
503
|
+
// Store original times for all selected words
|
|
504
|
+
const originalTimes = new Map<string, { start: number; end: number }>()
|
|
505
|
+
for (const wordId of selectedWordIds) {
|
|
506
|
+
const word = words.find(w => w.id === wordId)
|
|
507
|
+
if (word && word.start_time !== null && word.end_time !== null) {
|
|
508
|
+
originalTimes.set(wordId, { start: word.start_time, end: word.end_time })
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
dragOriginalTimesRef.current = originalTimes
|
|
512
|
+
return
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
if (clickedWord) {
|
|
516
|
+
// Click on unselected word - select it
|
|
517
|
+
onWordClick(clickedWord.id, e)
|
|
518
|
+
return
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// Background - start selection
|
|
522
|
+
setDragMode('selection')
|
|
523
|
+
dragStartRef.current = { x, y, time }
|
|
524
|
+
setSelectionRect({ startX: x, startY: y, endX: x, endY: y })
|
|
525
|
+
}, [xToTime, onTimeBarClick, findWordAtPosition, selectedWordIds, isNearResizeHandlePos, onWordClick, words])
|
|
526
|
+
|
|
527
|
+
const handleMouseMove = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
|
|
528
|
+
const rect = canvasRef.current?.getBoundingClientRect()
|
|
529
|
+
if (!rect) return
|
|
530
|
+
|
|
531
|
+
const x = e.clientX - rect.left
|
|
532
|
+
const y = e.clientY - rect.top
|
|
533
|
+
const time = xToTime(x)
|
|
534
|
+
|
|
535
|
+
// Update hover state and cursor
|
|
536
|
+
if (dragMode === 'none') {
|
|
537
|
+
const hoveredWord = findWordAtPosition(x, y)
|
|
538
|
+
setHoveredWordId(hoveredWord?.id || null)
|
|
539
|
+
|
|
540
|
+
if (hoveredWord && selectedWordIds.has(hoveredWord.id)) {
|
|
541
|
+
const nearHandle = isNearResizeHandlePos(hoveredWord, x, y)
|
|
542
|
+
setCursorStyle(nearHandle ? 'ew-resize' : 'grab')
|
|
543
|
+
} else if (hoveredWord) {
|
|
544
|
+
setCursorStyle('pointer')
|
|
545
|
+
} else if (y < TIME_BAR_HEIGHT) {
|
|
546
|
+
setCursorStyle('pointer')
|
|
547
|
+
} else {
|
|
548
|
+
setCursorStyle('default')
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
if (!dragStartRef.current) return
|
|
553
|
+
|
|
554
|
+
if (dragMode === 'selection') {
|
|
555
|
+
setSelectionRect({
|
|
556
|
+
startX: dragStartRef.current.x,
|
|
557
|
+
startY: dragStartRef.current.y,
|
|
558
|
+
endX: x,
|
|
559
|
+
endY: y
|
|
560
|
+
})
|
|
561
|
+
} else if (dragMode === 'resize' && dragWordIdRef.current) {
|
|
562
|
+
// Resize the word
|
|
563
|
+
const originalTimes = dragOriginalTimesRef.current.get(dragWordIdRef.current)
|
|
564
|
+
if (originalTimes) {
|
|
565
|
+
const deltaTime = time - dragStartRef.current.time
|
|
566
|
+
const newEndTime = Math.max(originalTimes.start + 0.05, originalTimes.end + deltaTime)
|
|
567
|
+
onWordTimingChange(dragWordIdRef.current, originalTimes.start, newEndTime)
|
|
568
|
+
}
|
|
569
|
+
setCursorStyle('ew-resize')
|
|
570
|
+
} else if (dragMode === 'move') {
|
|
571
|
+
// Move all selected words
|
|
572
|
+
const deltaTime = time - dragStartRef.current.time
|
|
573
|
+
const updates: Array<{ wordId: string; newStartTime: number; newEndTime: number }> = []
|
|
574
|
+
|
|
575
|
+
for (const [wordId, originalTimes] of dragOriginalTimesRef.current) {
|
|
576
|
+
// Ensure end time is always after start time (at least 0.05s duration)
|
|
577
|
+
const newStartTime = Math.max(0, originalTimes.start + deltaTime)
|
|
578
|
+
const newEndTime = Math.max(newStartTime + 0.05, originalTimes.end + deltaTime)
|
|
579
|
+
updates.push({
|
|
580
|
+
wordId,
|
|
581
|
+
newStartTime,
|
|
582
|
+
newEndTime
|
|
583
|
+
})
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
if (updates.length > 0) {
|
|
587
|
+
onWordsMove(updates)
|
|
588
|
+
}
|
|
589
|
+
setCursorStyle('grabbing')
|
|
590
|
+
}
|
|
591
|
+
}, [dragMode, xToTime, findWordAtPosition, selectedWordIds, isNearResizeHandlePos, onWordTimingChange, onWordsMove])
|
|
592
|
+
|
|
593
|
+
const handleMouseUp = useCallback((e: React.MouseEvent<HTMLCanvasElement>) => {
|
|
594
|
+
const rect = canvasRef.current?.getBoundingClientRect()
|
|
595
|
+
|
|
596
|
+
if (dragMode === 'selection' && dragStartRef.current && rect) {
|
|
597
|
+
const endX = e.clientX - rect.left
|
|
598
|
+
const endY = e.clientY - rect.top
|
|
599
|
+
|
|
600
|
+
const dragDistance = Math.sqrt(
|
|
601
|
+
Math.pow(endX - dragStartRef.current.x, 2) +
|
|
602
|
+
Math.pow(endY - dragStartRef.current.y, 2)
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
if (dragDistance < 5) {
|
|
606
|
+
onBackgroundClick()
|
|
607
|
+
} else {
|
|
608
|
+
const finalRect = {
|
|
609
|
+
startX: dragStartRef.current.x,
|
|
610
|
+
startY: dragStartRef.current.y,
|
|
611
|
+
endX,
|
|
612
|
+
endY
|
|
613
|
+
}
|
|
614
|
+
const selectedIds = findWordsInRect(finalRect)
|
|
615
|
+
if (selectedIds.length > 0) {
|
|
616
|
+
onSelectionComplete(selectedIds)
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
// Reset drag state
|
|
622
|
+
setDragMode('none')
|
|
623
|
+
dragStartRef.current = null
|
|
624
|
+
dragWordIdRef.current = null
|
|
625
|
+
dragOriginalTimesRef.current = new Map()
|
|
626
|
+
setSelectionRect(null)
|
|
627
|
+
setCursorStyle('default')
|
|
628
|
+
}, [dragMode, onBackgroundClick, findWordsInRect, onSelectionComplete])
|
|
629
|
+
|
|
630
|
+
// Wheel handler
|
|
631
|
+
const handleWheel = useCallback((e: React.WheelEvent<HTMLCanvasElement>) => {
|
|
632
|
+
const delta = e.deltaX !== 0 ? e.deltaX : e.deltaY
|
|
633
|
+
const scrollAmount = (delta / 100) * (zoomSeconds / 4)
|
|
634
|
+
let newStart = Math.max(0, Math.min(audioDuration - zoomSeconds, visibleStartTime + scrollAmount))
|
|
635
|
+
|
|
636
|
+
if (newStart !== visibleStartTime) {
|
|
637
|
+
onScrollChange(newStart)
|
|
638
|
+
}
|
|
639
|
+
}, [visibleStartTime, zoomSeconds, audioDuration, onScrollChange])
|
|
640
|
+
|
|
641
|
+
const handleScrollLeft = useCallback(() => {
|
|
642
|
+
const newStart = Math.max(0, visibleStartTime - zoomSeconds * 0.25)
|
|
643
|
+
onScrollChange(newStart)
|
|
644
|
+
}, [visibleStartTime, zoomSeconds, onScrollChange])
|
|
645
|
+
|
|
646
|
+
const handleScrollRight = useCallback(() => {
|
|
647
|
+
const newStart = Math.min(audioDuration - zoomSeconds, visibleStartTime + zoomSeconds * 0.25)
|
|
648
|
+
onScrollChange(Math.max(0, newStart))
|
|
649
|
+
}, [visibleStartTime, zoomSeconds, audioDuration, onScrollChange])
|
|
650
|
+
|
|
651
|
+
return (
|
|
652
|
+
<Box sx={{ display: 'flex', flexDirection: 'column', gap: 0.5 }}>
|
|
653
|
+
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
|
654
|
+
<Tooltip title="Scroll Left">
|
|
655
|
+
<IconButton
|
|
656
|
+
size="small"
|
|
657
|
+
onClick={handleScrollLeft}
|
|
658
|
+
disabled={visibleStartTime <= 0}
|
|
659
|
+
>
|
|
660
|
+
<ArrowBackIcon fontSize="small" />
|
|
661
|
+
</IconButton>
|
|
662
|
+
</Tooltip>
|
|
663
|
+
|
|
664
|
+
<Box
|
|
665
|
+
ref={containerRef}
|
|
666
|
+
sx={{
|
|
667
|
+
flexGrow: 1,
|
|
668
|
+
height,
|
|
669
|
+
cursor: cursorStyle,
|
|
670
|
+
borderRadius: 1,
|
|
671
|
+
overflow: 'hidden'
|
|
672
|
+
}}
|
|
673
|
+
>
|
|
674
|
+
<canvas
|
|
675
|
+
ref={canvasRef}
|
|
676
|
+
style={{
|
|
677
|
+
width: '100%',
|
|
678
|
+
height: '100%',
|
|
679
|
+
display: 'block',
|
|
680
|
+
cursor: cursorStyle
|
|
681
|
+
}}
|
|
682
|
+
onMouseDown={handleMouseDown}
|
|
683
|
+
onMouseMove={handleMouseMove}
|
|
684
|
+
onMouseUp={handleMouseUp}
|
|
685
|
+
onMouseLeave={handleMouseUp}
|
|
686
|
+
onWheel={handleWheel}
|
|
687
|
+
/>
|
|
688
|
+
</Box>
|
|
689
|
+
|
|
690
|
+
<Tooltip title="Scroll Right">
|
|
691
|
+
<IconButton
|
|
692
|
+
size="small"
|
|
693
|
+
onClick={handleScrollRight}
|
|
694
|
+
disabled={visibleStartTime >= audioDuration - zoomSeconds}
|
|
695
|
+
>
|
|
696
|
+
<ArrowForwardIcon fontSize="small" />
|
|
697
|
+
</IconButton>
|
|
698
|
+
</Tooltip>
|
|
699
|
+
</Box>
|
|
700
|
+
</Box>
|
|
701
|
+
)
|
|
702
|
+
})
|
|
703
|
+
|
|
704
|
+
export default TimelineCanvas
|