eprec 0.0.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +52 -29
  3. package/cli.ts +150 -0
  4. package/package.json +39 -7
  5. package/process-course/chapter-processor.ts +1037 -0
  6. package/process-course/cli.ts +236 -0
  7. package/process-course/config.ts +50 -0
  8. package/process-course/edits/cli.ts +167 -0
  9. package/process-course/edits/combined-video-editor.ts +316 -0
  10. package/process-course/edits/edit-workspace.ts +90 -0
  11. package/process-course/edits/index.ts +20 -0
  12. package/process-course/edits/regenerate-transcript.ts +84 -0
  13. package/process-course/edits/remove-ranges.test.ts +36 -0
  14. package/process-course/edits/remove-ranges.ts +287 -0
  15. package/process-course/edits/timestamp-refinement.test.ts +25 -0
  16. package/process-course/edits/timestamp-refinement.ts +172 -0
  17. package/process-course/edits/transcript-diff.test.ts +105 -0
  18. package/process-course/edits/transcript-diff.ts +214 -0
  19. package/process-course/edits/transcript-output.test.ts +50 -0
  20. package/process-course/edits/transcript-output.ts +36 -0
  21. package/process-course/edits/types.ts +26 -0
  22. package/process-course/edits/video-editor.ts +246 -0
  23. package/process-course/errors.test.ts +63 -0
  24. package/process-course/errors.ts +82 -0
  25. package/process-course/ffmpeg.ts +449 -0
  26. package/process-course/jarvis-commands/handlers.ts +71 -0
  27. package/process-course/jarvis-commands/index.ts +14 -0
  28. package/process-course/jarvis-commands/parser.test.ts +348 -0
  29. package/process-course/jarvis-commands/parser.ts +257 -0
  30. package/process-course/jarvis-commands/types.ts +46 -0
  31. package/process-course/jarvis-commands/windows.ts +254 -0
  32. package/process-course/logging.ts +24 -0
  33. package/process-course/paths.test.ts +59 -0
  34. package/process-course/paths.ts +53 -0
  35. package/process-course/summary.test.ts +209 -0
  36. package/process-course/summary.ts +210 -0
  37. package/process-course/types.ts +85 -0
  38. package/process-course/utils/audio-analysis.test.ts +348 -0
  39. package/process-course/utils/audio-analysis.ts +463 -0
  40. package/process-course/utils/chapter-selection.test.ts +307 -0
  41. package/process-course/utils/chapter-selection.ts +136 -0
  42. package/process-course/utils/file-utils.test.ts +83 -0
  43. package/process-course/utils/file-utils.ts +57 -0
  44. package/process-course/utils/filename.test.ts +27 -0
  45. package/process-course/utils/filename.ts +12 -0
  46. package/process-course/utils/time-ranges.test.ts +221 -0
  47. package/process-course/utils/time-ranges.ts +86 -0
  48. package/process-course/utils/transcript.test.ts +257 -0
  49. package/process-course/utils/transcript.ts +86 -0
  50. package/process-course/utils/video-editing.ts +44 -0
  51. package/process-course-video.ts +389 -0
  52. package/speech-detection.ts +355 -0
  53. package/utils.ts +138 -0
  54. package/whispercpp-transcribe.ts +345 -0
@@ -0,0 +1,1037 @@
1
+ import path from 'node:path'
2
+ import { detectSpeechBounds, checkSegmentHasSpeech } from '../speech-detection'
3
+ import { transcribeAudio } from '../whispercpp-transcribe'
4
+ import { clamp, formatSeconds } from '../utils'
5
+ import {
6
+ COMMAND_CLOSE_WORD,
7
+ COMMAND_WAKE_WORD,
8
+ CONFIG,
9
+ EDIT_CONFIG,
10
+ } from './config'
11
+ import {
12
+ analyzeLoudness,
13
+ concatSegments,
14
+ extractChapterSegment,
15
+ extractChapterSegmentAccurate,
16
+ extractTranscriptionAudio,
17
+ renderChapter,
18
+ } from './ffmpeg'
19
+ import {
20
+ buildIntermediateAudioPath,
21
+ buildIntermediatePath,
22
+ buildJarvisOutputBase,
23
+ buildTranscriptionOutputBase,
24
+ } from './paths'
25
+ import { logInfo, logWarn, writeChapterLog } from './logging'
26
+ import { mergeTimeRanges, buildKeepRanges } from './utils/time-ranges'
27
+ import {
28
+ findSpeechEndWithRmsFallback,
29
+ findSpeechStartWithRmsFallback,
30
+ } from './utils/audio-analysis'
31
+ import { safeUnlink } from './utils/file-utils'
32
+ import { formatChapterFilename } from './utils/filename'
33
+ import { findWordTimings, transcriptIncludesWord } from './utils/transcript'
34
+ import { allocateJoinPadding } from './utils/video-editing'
35
+ import {
36
+ extractTranscriptCommands,
37
+ scaleTranscriptSegments,
38
+ buildCommandWindows,
39
+ refineCommandWindows,
40
+ analyzeCommands,
41
+ formatCommandTypes,
42
+ } from './jarvis-commands'
43
+ import type {
44
+ Chapter,
45
+ TimeRange,
46
+ JarvisWarning,
47
+ JarvisEdit,
48
+ JarvisNote,
49
+ ProcessedChapterInfo,
50
+ EditWorkspaceInfo,
51
+ } from './types'
52
+ import { createEditWorkspace } from './edits'
53
+
54
+ export interface ChapterProcessingOptions {
55
+ inputPath: string
56
+ outputDir: string
57
+ tmpDir: string
58
+ minChapterDurationSeconds: number
59
+ enableTranscription: boolean
60
+ whisperModelPath: string
61
+ whisperLanguage: string
62
+ whisperBinaryPath: string | undefined
63
+ keepIntermediates: boolean
64
+ writeLogs: boolean
65
+ dryRun: boolean
66
+ previousProcessedChapter?: ProcessedChapterInfo | null
67
+ }
68
+
69
+ export interface ChapterProcessingResult {
70
+ status: 'processed' | 'skipped'
71
+ skipReason?:
72
+ | 'short-initial'
73
+ | 'short-trimmed'
74
+ | 'transcript'
75
+ | 'bad-take'
76
+ | 'dry-run'
77
+ jarvisWarning?: JarvisWarning
78
+ jarvisEdit?: JarvisEdit
79
+ jarvisNotes?: JarvisNote[]
80
+ fallbackNote?: string
81
+ logWritten: boolean
82
+ processedInfo?: ProcessedChapterInfo
83
+ editWorkspace?: EditWorkspaceInfo
84
+ }
85
+
86
+ /**
87
+ * Process a single chapter: extract, normalize, detect commands, splice, trim, and output.
88
+ */
89
+ export async function processChapter(
90
+ chapter: Chapter,
91
+ options: ChapterProcessingOptions,
92
+ ): Promise<ChapterProcessingResult> {
93
+ const duration = chapter.end - chapter.start
94
+ if (duration <= 0) {
95
+ throw new Error(
96
+ `Invalid chapter duration for "${chapter.title}" (${duration}s)`,
97
+ )
98
+ }
99
+
100
+ const outputBasePath = path.join(
101
+ options.outputDir,
102
+ `${formatChapterFilename(chapter)}${path.extname(options.inputPath)}`,
103
+ )
104
+
105
+ // Check minimum duration before processing
106
+ if (duration < options.minChapterDurationSeconds) {
107
+ logInfo(
108
+ `Skipping chapter ${chapter.index + 1}: ${chapter.title} (${formatSeconds(duration)})`,
109
+ )
110
+ let logWritten = false
111
+ if (options.writeLogs && !options.dryRun) {
112
+ await writeChapterLog(options.tmpDir, outputBasePath, [
113
+ `Chapter: ${chapter.index + 1} - ${chapter.title}`,
114
+ `Input: ${options.inputPath}`,
115
+ `Duration: ${formatSeconds(duration)}`,
116
+ `Skip threshold: ${formatSeconds(options.minChapterDurationSeconds)}`,
117
+ 'Reason: Chapter shorter than minimum duration threshold.',
118
+ ])
119
+ logWritten = true
120
+ }
121
+ return { status: 'skipped', skipReason: 'short-initial', logWritten }
122
+ }
123
+
124
+ // Dry run - don't actually process
125
+ if (options.dryRun) {
126
+ logInfo(
127
+ `[dry-run] Would process chapter ${chapter.index + 1}: ${chapter.title}`,
128
+ )
129
+ return { status: 'processed', skipReason: 'dry-run', logWritten: false }
130
+ }
131
+
132
+ logInfo(`Processing chapter ${chapter.index + 1}: ${chapter.title}`)
133
+
134
+ // Build all intermediate paths
135
+ const paths = buildIntermediatePaths(options.tmpDir, outputBasePath)
136
+
137
+ try {
138
+ // Step 1: Extract raw segment with padding trimmed
139
+ const rawTrimStart = chapter.start + CONFIG.rawTrimPaddingSeconds
140
+ const rawTrimEnd = chapter.end - CONFIG.rawTrimPaddingSeconds
141
+ const rawDuration = rawTrimEnd - rawTrimStart
142
+ if (rawDuration <= 0) {
143
+ throw new Error(
144
+ `Chapter too short to trim ${CONFIG.rawTrimPaddingSeconds}s from both ends (${formatSeconds(duration)}).`,
145
+ )
146
+ }
147
+
148
+ await extractChapterSegment({
149
+ inputPath: options.inputPath,
150
+ outputPath: paths.rawPath,
151
+ start: rawTrimStart,
152
+ end: rawTrimEnd,
153
+ })
154
+
155
+ // Step 2: Normalize audio
156
+ const analysis = await analyzeLoudness(paths.rawPath, 0, rawDuration)
157
+ await renderChapter({
158
+ inputPath: paths.rawPath,
159
+ outputPath: paths.normalizedPath,
160
+ absoluteStart: 0,
161
+ absoluteEnd: rawDuration,
162
+ analysis,
163
+ })
164
+
165
+ // Step 3: Transcribe and analyze commands
166
+ let commandWindows: TimeRange[] = []
167
+ let commandFilenameOverride: string | null = null
168
+ let hasEditCommand = false
169
+ let commandNotes: Array<{ value: string; window: TimeRange }> = []
170
+
171
+ if (options.enableTranscription) {
172
+ const transcriptionResult = await transcribeAndAnalyze({
173
+ normalizedPath: paths.normalizedPath,
174
+ transcriptionAudioPath: paths.transcriptionAudioPath,
175
+ transcriptionOutputBase: paths.transcriptionOutputBase,
176
+ rawDuration,
177
+ options,
178
+ })
179
+
180
+ if (transcriptionResult.shouldSkip) {
181
+ let logWritten = false
182
+ if (options.writeLogs) {
183
+ await writeChapterLog(options.tmpDir, outputBasePath, [
184
+ `Chapter: ${chapter.index + 1} - ${chapter.title}`,
185
+ `Input: ${options.inputPath}`,
186
+ `Duration: ${formatSeconds(duration)}`,
187
+ `Reason: ${transcriptionResult.skipReason}`,
188
+ ])
189
+ logWritten = true
190
+ }
191
+ await safeUnlink(outputBasePath)
192
+ return {
193
+ status: 'skipped',
194
+ skipReason: transcriptionResult.hasBadTake
195
+ ? 'bad-take'
196
+ : 'transcript',
197
+ logWritten,
198
+ }
199
+ }
200
+
201
+ commandWindows = transcriptionResult.commandWindows
202
+ commandFilenameOverride = transcriptionResult.filenameOverride
203
+ hasEditCommand = transcriptionResult.hasEdit
204
+ commandNotes = transcriptionResult.notes
205
+
206
+ // Handle combine-previous command
207
+ if (transcriptionResult.hasCombinePrevious) {
208
+ if (!options.previousProcessedChapter) {
209
+ logWarn(
210
+ `Combine previous command detected for chapter ${chapter.index + 1}, but no previous chapter available. Processing normally.`,
211
+ )
212
+ } else {
213
+ const combineResult = await handleCombinePrevious({
214
+ chapter,
215
+ previousProcessedChapter: options.previousProcessedChapter,
216
+ commandWindows,
217
+ commandNotes,
218
+ normalizedPath: paths.normalizedPath,
219
+ rawDuration,
220
+ tmpDir: options.tmpDir,
221
+ outputBasePath,
222
+ paths,
223
+ options,
224
+ })
225
+ // If combine failed (returned null), continue with normal processing
226
+ if (combineResult !== null) {
227
+ return combineResult
228
+ }
229
+ // Otherwise, fall through to normal processing
230
+ }
231
+ }
232
+ }
233
+
234
+ // Step 4: Determine final output path
235
+ const outputTitle = commandFilenameOverride ?? chapter.title
236
+ const finalOutputPath = path.join(
237
+ options.outputDir,
238
+ `${formatChapterFilename({ ...chapter, title: outputTitle })}${path.extname(options.inputPath)}`,
239
+ )
240
+
241
+ // Step 5: Handle command splicing
242
+ const spliceResult = await handleCommandSplicing({
243
+ commandWindows,
244
+ normalizedPath: paths.normalizedPath,
245
+ rawDuration,
246
+ tmpDir: options.tmpDir,
247
+ outputBasePath,
248
+ paths,
249
+ })
250
+
251
+ // Step 6: Detect speech bounds
252
+ const speechBounds = await detectSpeechBounds(
253
+ spliceResult.sourcePath,
254
+ 0,
255
+ spliceResult.sourceDuration,
256
+ spliceResult.sourceDuration,
257
+ )
258
+
259
+ let fallbackNote: string | undefined
260
+ let logWritten = false
261
+ if (speechBounds.note) {
262
+ fallbackNote = speechBounds.note
263
+ logInfo(`Speech detection fallback: ${speechBounds.note}`)
264
+ if (options.writeLogs) {
265
+ await writeChapterLog(options.tmpDir, outputBasePath, [
266
+ `Chapter: ${chapter.index + 1} - ${chapter.title}`,
267
+ `Input: ${options.inputPath}`,
268
+ `Reason: ${speechBounds.note}`,
269
+ ])
270
+ logWritten = true
271
+ }
272
+ }
273
+
274
+ // Step 7: Apply speech padding
275
+ const paddedStart = clamp(
276
+ speechBounds.start - CONFIG.preSpeechPaddingSeconds,
277
+ 0,
278
+ spliceResult.sourceDuration,
279
+ )
280
+ const paddedEnd = clamp(
281
+ speechBounds.end + CONFIG.postSpeechPaddingSeconds,
282
+ 0,
283
+ spliceResult.sourceDuration,
284
+ )
285
+ const trimmedDuration = paddedEnd - paddedStart
286
+
287
+ if (paddedEnd <= paddedStart + CONFIG.minTrimWindowSeconds) {
288
+ throw new Error(
289
+ `Trim window too small for "${chapter.title}" (${paddedStart}s -> ${paddedEnd}s)`,
290
+ )
291
+ }
292
+
293
+ logInfo(
294
+ `Speech bounds: ${formatSeconds(speechBounds.start)} -> ${formatSeconds(speechBounds.end)}, padded to ${formatSeconds(paddedStart)} -> ${formatSeconds(paddedEnd)}`,
295
+ )
296
+
297
+ // Step 8: Check trimmed duration
298
+ if (trimmedDuration < options.minChapterDurationSeconds) {
299
+ logInfo(
300
+ `Skipping chapter ${chapter.index + 1}: trimmed ${formatSeconds(trimmedDuration)} < ${formatSeconds(options.minChapterDurationSeconds)}.`,
301
+ )
302
+ if (options.writeLogs) {
303
+ await writeChapterLog(options.tmpDir, outputBasePath, [
304
+ `Chapter: ${chapter.index + 1} - ${chapter.title}`,
305
+ `Input: ${options.inputPath}`,
306
+ `Duration: ${formatSeconds(duration)}`,
307
+ `Trimmed duration: ${formatSeconds(trimmedDuration)}`,
308
+ `Skip threshold: ${formatSeconds(options.minChapterDurationSeconds)}`,
309
+ 'Reason: Trimmed duration shorter than minimum duration threshold.',
310
+ ])
311
+ logWritten = true
312
+ }
313
+ await safeUnlink(outputBasePath)
314
+ return { status: 'skipped', skipReason: 'short-trimmed', logWritten }
315
+ }
316
+
317
+ // Step 9: Write final output
318
+ await extractChapterSegment({
319
+ inputPath: spliceResult.sourcePath,
320
+ outputPath: finalOutputPath,
321
+ start: paddedStart,
322
+ end: paddedEnd,
323
+ })
324
+
325
+ // Step 10: Verify no jarvis in final output
326
+ let jarvisWarning: JarvisWarning | undefined
327
+ await extractTranscriptionAudio({
328
+ inputPath: finalOutputPath,
329
+ outputPath: paths.jarvisTranscriptionAudioPath,
330
+ start: 0,
331
+ end: trimmedDuration,
332
+ })
333
+ const jarvisTranscription = await transcribeAudio(
334
+ paths.jarvisTranscriptionAudioPath,
335
+ {
336
+ modelPath: options.whisperModelPath,
337
+ language: options.whisperLanguage,
338
+ binaryPath: options.whisperBinaryPath,
339
+ outputBasePath: paths.jarvisTranscriptionOutputBase,
340
+ },
341
+ )
342
+ const jarvisSegments =
343
+ jarvisTranscription.segmentsSource === 'tokens'
344
+ ? jarvisTranscription.segments
345
+ : scaleTranscriptSegments(jarvisTranscription.segments, trimmedDuration)
346
+ const jarvisWordTimings = findWordTimings(jarvisSegments, 'jarvis')
347
+ if (transcriptIncludesWord(jarvisTranscription.text, 'jarvis')) {
348
+ jarvisWarning = {
349
+ chapter,
350
+ outputPath: finalOutputPath,
351
+ timestamps: jarvisWordTimings,
352
+ }
353
+ logWarn(
354
+ `Jarvis detected in chapter ${chapter.index + 1}: ${path.basename(finalOutputPath)}`,
355
+ )
356
+ }
357
+
358
+ // Step 11: Track edit commands
359
+ let jarvisEdit: JarvisEdit | undefined
360
+ if (hasEditCommand) {
361
+ jarvisEdit = { chapter, outputPath: finalOutputPath }
362
+ logInfo(
363
+ `Edit command detected for chapter ${chapter.index + 1}: ${path.basename(finalOutputPath)}`,
364
+ )
365
+ }
366
+
367
+ // Step 12: Track note commands
368
+ const jarvisNotes: JarvisNote[] = commandNotes.map((note) => ({
369
+ chapter,
370
+ outputPath: finalOutputPath,
371
+ note: note.value,
372
+ timestamp: note.window.start,
373
+ }))
374
+ if (jarvisNotes.length > 0) {
375
+ logInfo(
376
+ `Note command${jarvisNotes.length > 1 ? 's' : ''} detected for chapter ${chapter.index + 1}: ${jarvisNotes.map((n) => n.note).join(', ')}`,
377
+ )
378
+ }
379
+
380
+ // Step 13: Create edit workspace when needed
381
+ let editWorkspace: EditWorkspaceInfo | undefined
382
+ if (
383
+ EDIT_CONFIG.autoCreateEditsDirectory &&
384
+ (hasEditCommand || jarvisWarning)
385
+ ) {
386
+ const reason = hasEditCommand ? 'edit-command' : 'jarvis-warning'
387
+ const workspace = await createEditWorkspace({
388
+ outputDir: options.outputDir,
389
+ sourceVideoPath: finalOutputPath,
390
+ sourceDuration: trimmedDuration,
391
+ segments: jarvisSegments,
392
+ })
393
+ editWorkspace = {
394
+ chapter,
395
+ outputPath: finalOutputPath,
396
+ reason,
397
+ editsDirectory: workspace.editsDirectory,
398
+ transcriptTextPath: workspace.transcriptTextPath,
399
+ transcriptJsonPath: workspace.transcriptJsonPath,
400
+ originalVideoPath: workspace.originalVideoPath,
401
+ instructionsPath: workspace.instructionsPath,
402
+ }
403
+ }
404
+
405
+ const processedInfo: ProcessedChapterInfo = {
406
+ chapter,
407
+ outputPath: finalOutputPath,
408
+ processedPath: finalOutputPath, // Use output path as processed path (intermediates may be cleaned up)
409
+ processedDuration: trimmedDuration,
410
+ }
411
+
412
+ return {
413
+ status: 'processed',
414
+ jarvisWarning,
415
+ jarvisEdit,
416
+ jarvisNotes: jarvisNotes.length > 0 ? jarvisNotes : undefined,
417
+ fallbackNote,
418
+ logWritten,
419
+ processedInfo,
420
+ editWorkspace,
421
+ }
422
+ } finally {
423
+ // Cleanup intermediate files
424
+ if (!options.keepIntermediates) {
425
+ await cleanupIntermediateFiles(paths)
426
+ }
427
+ }
428
+ }
429
+
430
+ interface IntermediatePaths {
431
+ rawPath: string
432
+ normalizedPath: string
433
+ transcriptionAudioPath: string
434
+ transcriptionOutputBase: string
435
+ transcriptionTextPath: string
436
+ transcriptionJsonPath: string
437
+ jarvisTranscriptionAudioPath: string
438
+ jarvisTranscriptionOutputBase: string
439
+ jarvisTranscriptionTextPath: string
440
+ jarvisTranscriptionJsonPath: string
441
+ spliceSegmentPaths: string[]
442
+ splicedPath: string | null
443
+ }
444
+
445
+ function buildIntermediatePaths(
446
+ tmpDir: string,
447
+ outputBasePath: string,
448
+ ): IntermediatePaths {
449
+ const transcriptionOutputBase = buildTranscriptionOutputBase(
450
+ tmpDir,
451
+ outputBasePath,
452
+ )
453
+ const jarvisTranscriptionOutputBase = buildJarvisOutputBase(
454
+ tmpDir,
455
+ outputBasePath,
456
+ )
457
+
458
+ return {
459
+ rawPath: buildIntermediatePath(tmpDir, outputBasePath, 'raw'),
460
+ normalizedPath: buildIntermediatePath(tmpDir, outputBasePath, 'normalized'),
461
+ transcriptionAudioPath: buildIntermediateAudioPath(
462
+ tmpDir,
463
+ outputBasePath,
464
+ 'transcribe',
465
+ ),
466
+ transcriptionOutputBase,
467
+ transcriptionTextPath: `${transcriptionOutputBase}.txt`,
468
+ transcriptionJsonPath: `${transcriptionOutputBase}.json`,
469
+ jarvisTranscriptionAudioPath: buildIntermediateAudioPath(
470
+ tmpDir,
471
+ outputBasePath,
472
+ 'jarvis',
473
+ ),
474
+ jarvisTranscriptionOutputBase,
475
+ jarvisTranscriptionTextPath: `${jarvisTranscriptionOutputBase}.txt`,
476
+ jarvisTranscriptionJsonPath: `${jarvisTranscriptionOutputBase}.json`,
477
+ spliceSegmentPaths: [],
478
+ splicedPath: null,
479
+ }
480
+ }
481
+
482
+ async function cleanupIntermediateFiles(paths: IntermediatePaths) {
483
+ await safeUnlink(paths.rawPath)
484
+ await safeUnlink(paths.normalizedPath)
485
+ await safeUnlink(paths.transcriptionAudioPath)
486
+ await safeUnlink(paths.transcriptionTextPath)
487
+ await safeUnlink(paths.transcriptionJsonPath)
488
+ await safeUnlink(paths.jarvisTranscriptionAudioPath)
489
+ await safeUnlink(paths.jarvisTranscriptionTextPath)
490
+ await safeUnlink(paths.jarvisTranscriptionJsonPath)
491
+ if (paths.splicedPath) {
492
+ await safeUnlink(paths.splicedPath)
493
+ }
494
+ for (const segmentPath of paths.spliceSegmentPaths) {
495
+ await safeUnlink(segmentPath)
496
+ }
497
+ }
498
+
499
+ interface TranscriptionAnalysisResult {
500
+ commandWindows: TimeRange[]
501
+ filenameOverride: string | null
502
+ hasEdit: boolean
503
+ hasBadTake: boolean
504
+ hasCombinePrevious: boolean
505
+ notes: Array<{ value: string; window: TimeRange }>
506
+ shouldSkip: boolean
507
+ skipReason?: string
508
+ }
509
+
510
+ async function transcribeAndAnalyze(params: {
511
+ normalizedPath: string
512
+ transcriptionAudioPath: string
513
+ transcriptionOutputBase: string
514
+ rawDuration: number
515
+ options: ChapterProcessingOptions
516
+ }): Promise<TranscriptionAnalysisResult> {
517
+ await extractTranscriptionAudio({
518
+ inputPath: params.normalizedPath,
519
+ outputPath: params.transcriptionAudioPath,
520
+ start: 0,
521
+ end: params.rawDuration,
522
+ })
523
+
524
+ const transcriptionResult = await transcribeAudio(
525
+ params.transcriptionAudioPath,
526
+ {
527
+ modelPath: params.options.whisperModelPath,
528
+ language: params.options.whisperLanguage,
529
+ binaryPath: params.options.whisperBinaryPath,
530
+ outputBasePath: params.transcriptionOutputBase,
531
+ },
532
+ )
533
+
534
+ const transcript = transcriptionResult.text
535
+ const scaledSegments =
536
+ transcriptionResult.segmentsSource === 'tokens'
537
+ ? transcriptionResult.segments
538
+ : scaleTranscriptSegments(
539
+ transcriptionResult.segments,
540
+ params.rawDuration,
541
+ )
542
+
543
+ const commands = extractTranscriptCommands(scaledSegments, {
544
+ wakeWord: COMMAND_WAKE_WORD,
545
+ closeWord: COMMAND_CLOSE_WORD,
546
+ })
547
+
548
+ if (commands.length > 0) {
549
+ logInfo(`Commands detected: ${formatCommandTypes(commands)}`)
550
+ }
551
+
552
+ const analysis = analyzeCommands(commands, transcript)
553
+
554
+ if (analysis.filenameOverride) {
555
+ logInfo(`Filename command: ${analysis.filenameOverride}`)
556
+ }
557
+
558
+ if (analysis.shouldSkip) {
559
+ logInfo(`Skipping: ${analysis.skipReason}`)
560
+ return {
561
+ commandWindows: [],
562
+ filenameOverride: analysis.filenameOverride,
563
+ hasEdit: analysis.hasEdit,
564
+ hasBadTake: analysis.hasBadTake,
565
+ hasCombinePrevious: analysis.hasCombinePrevious,
566
+ notes: analysis.notes,
567
+ shouldSkip: true,
568
+ skipReason: analysis.skipReason,
569
+ }
570
+ }
571
+
572
+ let commandWindows = buildCommandWindows(commands, {
573
+ offset: 0,
574
+ min: 0,
575
+ max: params.rawDuration,
576
+ paddingSeconds: CONFIG.commandTrimPaddingSeconds,
577
+ })
578
+
579
+ if (commandWindows.length > 0) {
580
+ commandWindows = await refineCommandWindows({
581
+ commandWindows,
582
+ inputPath: params.normalizedPath,
583
+ duration: params.rawDuration,
584
+ })
585
+ }
586
+
587
+ return {
588
+ commandWindows,
589
+ filenameOverride: analysis.filenameOverride,
590
+ hasEdit: analysis.hasEdit,
591
+ hasBadTake: analysis.hasBadTake,
592
+ hasCombinePrevious: analysis.hasCombinePrevious,
593
+ notes: analysis.notes,
594
+ shouldSkip: false,
595
+ }
596
+ }
597
+
598
+ interface SpliceResult {
599
+ sourcePath: string
600
+ sourceDuration: number
601
+ }
602
+
603
+ async function handleCommandSplicing(params: {
604
+ commandWindows: TimeRange[]
605
+ normalizedPath: string
606
+ rawDuration: number
607
+ tmpDir: string
608
+ outputBasePath: string
609
+ paths: IntermediatePaths
610
+ }): Promise<SpliceResult> {
611
+ let sourcePath = params.normalizedPath
612
+ let sourceDuration = params.rawDuration
613
+
614
+ if (params.commandWindows.length === 0) {
615
+ return { sourcePath, sourceDuration }
616
+ }
617
+
618
+ const mergedCommandWindows = mergeTimeRanges(params.commandWindows)
619
+ const keepRanges = buildKeepRanges(
620
+ 0,
621
+ params.rawDuration,
622
+ mergedCommandWindows,
623
+ )
624
+
625
+ if (keepRanges.length === 0) {
626
+ throw new Error('Command windows removed entire chapter.')
627
+ }
628
+
629
+ const isFullRange =
630
+ keepRanges.length === 1 &&
631
+ keepRanges[0] &&
632
+ keepRanges[0].start <= 0.001 &&
633
+ keepRanges[0].end >= params.rawDuration - 0.001
634
+
635
+ if (isFullRange) {
636
+ return { sourcePath, sourceDuration }
637
+ }
638
+
639
+ // Check if command is at end - just trim instead of splicing
640
+ const isCommandAtEnd =
641
+ keepRanges.length === 1 && keepRanges[0] && keepRanges[0].start <= 0.001
642
+
643
+ if (isCommandAtEnd && keepRanges[0]) {
644
+ sourceDuration = keepRanges[0].end
645
+ logInfo(`Command at end - trimming to ${formatSeconds(sourceDuration)}`)
646
+ return { sourcePath, sourceDuration }
647
+ }
648
+
649
+ // Command mid-video - need to splice
650
+ const splicedPath = buildIntermediatePath(
651
+ params.tmpDir,
652
+ params.outputBasePath,
653
+ 'spliced',
654
+ )
655
+ params.paths.splicedPath = splicedPath
656
+
657
+ const segmentsWithSpeech: { path: string; range: TimeRange }[] = []
658
+
659
+ for (const [index, range] of keepRanges.entries()) {
660
+ const segmentPath = buildIntermediatePath(
661
+ params.tmpDir,
662
+ params.outputBasePath,
663
+ `splice-${index + 1}`,
664
+ )
665
+ params.paths.spliceSegmentPaths.push(segmentPath)
666
+
667
+ await extractChapterSegmentAccurate({
668
+ inputPath: params.normalizedPath,
669
+ outputPath: segmentPath,
670
+ start: range.start,
671
+ end: range.end,
672
+ })
673
+
674
+ // Check if segment has speech using VAD
675
+ const segmentDuration = range.end - range.start
676
+ const hasSpeech = await checkSegmentHasSpeech(segmentPath, segmentDuration)
677
+
678
+ if (hasSpeech) {
679
+ segmentsWithSpeech.push({ path: segmentPath, range })
680
+ } else {
681
+ logInfo(
682
+ `Splice segment ${index + 1} has no speech, excluding from combined output`,
683
+ )
684
+ }
685
+ }
686
+
687
+ if (segmentsWithSpeech.length === 0) {
688
+ throw new Error('All splice segments have no speech.')
689
+ }
690
+
691
+ if (segmentsWithSpeech.length === 1 && segmentsWithSpeech[0]) {
692
+ // Only one segment with speech - use it directly without concat
693
+ sourcePath = segmentsWithSpeech[0].path
694
+ sourceDuration =
695
+ segmentsWithSpeech[0].range.end - segmentsWithSpeech[0].range.start
696
+ params.paths.splicedPath = null // Don't delete the segment we're using
697
+ logInfo(
698
+ `Using single segment with speech, duration: ${formatSeconds(sourceDuration)}`,
699
+ )
700
+ } else {
701
+ await concatSegments({
702
+ segmentPaths: segmentsWithSpeech.map((s) => s.path),
703
+ outputPath: splicedPath,
704
+ })
705
+ sourcePath = splicedPath
706
+ sourceDuration = segmentsWithSpeech.reduce(
707
+ (total, s) => total + (s.range.end - s.range.start),
708
+ 0,
709
+ )
710
+ logInfo(
711
+ `Spliced ${segmentsWithSpeech.length} segments (of ${keepRanges.length}), combined duration: ${formatSeconds(sourceDuration)}`,
712
+ )
713
+ }
714
+
715
+ return { sourcePath, sourceDuration }
716
+ }
717
+
718
+ async function handleCombinePrevious(params: {
719
+ chapter: Chapter
720
+ previousProcessedChapter: ProcessedChapterInfo
721
+ commandWindows: TimeRange[]
722
+ commandNotes: Array<{ value: string; window: TimeRange }>
723
+ normalizedPath: string
724
+ rawDuration: number
725
+ tmpDir: string
726
+ outputBasePath: string
727
+ paths: IntermediatePaths
728
+ options: ChapterProcessingOptions
729
+ }): Promise<ChapterProcessingResult | null> {
730
+ const {
731
+ chapter,
732
+ previousProcessedChapter,
733
+ commandWindows,
734
+ commandNotes,
735
+ normalizedPath,
736
+ rawDuration,
737
+ tmpDir,
738
+ outputBasePath,
739
+ paths,
740
+ options,
741
+ } = params
742
+
743
+ // Check if previous chapter has speech before attempting to combine
744
+ // If it doesn't, return null to signal caller should try with an earlier chapter
745
+ const previousHasSpeech = await checkSegmentHasSpeech(
746
+ previousProcessedChapter.outputPath,
747
+ previousProcessedChapter.processedDuration,
748
+ )
749
+
750
+ if (!previousHasSpeech) {
751
+ logInfo(
752
+ `Previous chapter ${previousProcessedChapter.chapter.index + 1} has no speech. Cannot combine with chapter ${chapter.index + 1}.`,
753
+ )
754
+ return null
755
+ }
756
+
757
+ logInfo(
758
+ `Combining chapter ${chapter.index + 1} with previous chapter ${previousProcessedChapter.chapter.index + 1}`,
759
+ )
760
+
761
+ // Step 1: Remove combine-previous command window from current chapter
762
+ const spliceResult = await handleCommandSplicing({
763
+ commandWindows,
764
+ normalizedPath,
765
+ rawDuration,
766
+ tmpDir,
767
+ outputBasePath,
768
+ paths,
769
+ })
770
+
771
+ // Step 2: Detect speech bounds on current chapter (after splicing)
772
+ const currentSpeechBounds = await detectSpeechBounds(
773
+ spliceResult.sourcePath,
774
+ 0,
775
+ spliceResult.sourceDuration,
776
+ spliceResult.sourceDuration,
777
+ )
778
+
779
+ // Step 3: Trim end of previous chapter's output
780
+ // Load the previous chapter's output and detect speech bounds on the end portion
781
+ const previousOutputDuration = previousProcessedChapter.processedDuration
782
+ const endSearchWindow = Math.min(
783
+ previousOutputDuration * 0.3, // Search last 30% of previous chapter
784
+ EDIT_CONFIG.speechSearchWindowSeconds * 2, // Or up to 2x the silence search window
785
+ )
786
+ const previousEndSearchStart = Math.max(
787
+ 0,
788
+ previousOutputDuration - endSearchWindow,
789
+ )
790
+
791
+ // Detect speech bounds on the end portion
792
+ const previousEndSpeechBounds = await detectSpeechBounds(
793
+ previousProcessedChapter.outputPath,
794
+ previousEndSearchStart,
795
+ previousOutputDuration,
796
+ previousOutputDuration,
797
+ )
798
+
799
+ // Convert relative bounds to absolute times (detectSpeechBounds returns bounds relative to chapterStart)
800
+ // However, when VAD fails and uses speechFallback, the returned end value is already absolute (duration)
801
+ const absoluteSpeechEnd = previousEndSpeechBounds.note
802
+ ? previousEndSpeechBounds.end // Fallback case: already absolute
803
+ : previousEndSearchStart + previousEndSpeechBounds.end // Normal case: convert relative to absolute
804
+ let effectiveSpeechEnd = absoluteSpeechEnd
805
+ if (
806
+ previousEndSpeechBounds.note ||
807
+ previousOutputDuration - absoluteSpeechEnd < 0.05
808
+ ) {
809
+ const rmsSpeechEnd = await findSpeechEndWithRmsFallback({
810
+ inputPath: previousProcessedChapter.outputPath,
811
+ start: previousEndSearchStart,
812
+ duration: previousOutputDuration - previousEndSearchStart,
813
+ })
814
+ if (rmsSpeechEnd !== null) {
815
+ effectiveSpeechEnd = previousEndSearchStart + rmsSpeechEnd
816
+ }
817
+ }
818
+
819
+ const finalPreviousEnd = effectiveSpeechEnd
820
+
821
+ let effectiveSpeechStart = currentSpeechBounds.start
822
+ if (currentSpeechBounds.note || currentSpeechBounds.start <= 0.05) {
823
+ const rmsSpeechStart = await findSpeechStartWithRmsFallback({
824
+ inputPath: spliceResult.sourcePath,
825
+ start: 0,
826
+ duration: spliceResult.sourceDuration,
827
+ })
828
+ if (rmsSpeechStart !== null) {
829
+ effectiveSpeechStart = rmsSpeechStart
830
+ }
831
+ }
832
+ const finalCurrentStart = effectiveSpeechStart
833
+
834
+ let currentEffectiveSpeechEnd = currentSpeechBounds.end
835
+ if (
836
+ currentSpeechBounds.note ||
837
+ spliceResult.sourceDuration - currentSpeechBounds.end < 0.05
838
+ ) {
839
+ const rmsSpeechEnd = await findSpeechEndWithRmsFallback({
840
+ inputPath: spliceResult.sourcePath,
841
+ start: 0,
842
+ duration: spliceResult.sourceDuration,
843
+ })
844
+ if (rmsSpeechEnd !== null) {
845
+ currentEffectiveSpeechEnd = rmsSpeechEnd
846
+ }
847
+ }
848
+ const finalCurrentEnd = currentEffectiveSpeechEnd
849
+
850
+ // Apply padding (maximize total gap if one side lacks silence)
851
+ const speechPaddingSeconds = EDIT_CONFIG.speechBoundaryPaddingMs / 1000
852
+ const previousAvailableSilence = Math.max(
853
+ 0,
854
+ previousOutputDuration - finalPreviousEnd,
855
+ )
856
+ const currentAvailableSilence = Math.max(0, finalCurrentStart)
857
+ const { previousPaddingSeconds, currentPaddingSeconds } = allocateJoinPadding(
858
+ {
859
+ paddingSeconds: speechPaddingSeconds,
860
+ previousAvailableSeconds: previousAvailableSilence,
861
+ currentAvailableSeconds: currentAvailableSilence,
862
+ },
863
+ )
864
+ const previousPaddedEnd = clamp(
865
+ finalPreviousEnd + previousPaddingSeconds,
866
+ 0,
867
+ previousOutputDuration,
868
+ )
869
+ const currentPaddedStart = clamp(
870
+ finalCurrentStart - currentPaddingSeconds,
871
+ 0,
872
+ spliceResult.sourceDuration,
873
+ )
874
+ const currentPaddedEnd = clamp(
875
+ finalCurrentEnd + speechPaddingSeconds,
876
+ 0,
877
+ spliceResult.sourceDuration,
878
+ )
879
+
880
+ logInfo(
881
+ `Previous chapter trim: ${formatSeconds(previousPaddedEnd)} (from ${formatSeconds(previousOutputDuration)})`,
882
+ )
883
+ logInfo(
884
+ `Current chapter trim: ${formatSeconds(currentPaddedStart)} -> ${formatSeconds(currentPaddedEnd)}`,
885
+ )
886
+
887
+ // Step 5: Extract trimmed segments
888
+ const previousTrimmedPath = buildIntermediatePath(
889
+ tmpDir,
890
+ outputBasePath,
891
+ 'previous-trimmed',
892
+ )
893
+ await extractChapterSegmentAccurate({
894
+ inputPath: previousProcessedChapter.outputPath,
895
+ outputPath: previousTrimmedPath,
896
+ start: 0,
897
+ end: previousPaddedEnd,
898
+ })
899
+
900
+ const currentTrimmedPath = buildIntermediatePath(
901
+ tmpDir,
902
+ outputBasePath,
903
+ 'current-trimmed',
904
+ )
905
+ if (currentPaddedEnd <= currentPaddedStart + 0.005) {
906
+ throw new Error(
907
+ `Invalid trim bounds for current segment: start (${currentPaddedStart.toFixed(3)}s) >= end (${currentPaddedEnd.toFixed(3)}s)`,
908
+ )
909
+ }
910
+ await extractChapterSegmentAccurate({
911
+ inputPath: spliceResult.sourcePath,
912
+ outputPath: currentTrimmedPath,
913
+ start: currentPaddedStart,
914
+ end: currentPaddedEnd,
915
+ })
916
+
917
+ // Step 6: Check if current segment has speech
918
+ // Note: We already verified previous chapter has speech at the start of this function
919
+ const previousDuration = previousPaddedEnd
920
+ const currentDuration = currentPaddedEnd - currentPaddedStart
921
+ const currentHasSpeech = await checkSegmentHasSpeech(
922
+ currentTrimmedPath,
923
+ currentDuration,
924
+ )
925
+
926
+ if (!currentHasSpeech) {
927
+ throw new Error(`Cannot combine: current segment has no speech.`)
928
+ }
929
+
930
+ // Step 7: Delete old previous chapter output and concatenate segments to final path
931
+ const finalOutputPath = previousProcessedChapter.outputPath
932
+ await safeUnlink(finalOutputPath)
933
+
934
+ const combinedDuration = previousDuration + currentDuration
935
+ await concatSegments({
936
+ segmentPaths: [previousTrimmedPath, currentTrimmedPath],
937
+ outputPath: finalOutputPath,
938
+ })
939
+
940
+ logInfo(
941
+ `Combined output written to ${path.basename(finalOutputPath)} (${formatSeconds(combinedDuration)})`,
942
+ )
943
+
944
+ // Step 9: Cleanup intermediate files
945
+ if (!options.keepIntermediates) {
946
+ await safeUnlink(previousTrimmedPath)
947
+ await safeUnlink(currentTrimmedPath)
948
+ }
949
+
950
+ // Step 10: Verify no jarvis in final output
951
+ let jarvisWarning: JarvisWarning | undefined
952
+ const jarvisTranscriptionAudioPath = buildIntermediateAudioPath(
953
+ tmpDir,
954
+ outputBasePath,
955
+ 'jarvis-combined',
956
+ )
957
+ await extractTranscriptionAudio({
958
+ inputPath: finalOutputPath,
959
+ outputPath: jarvisTranscriptionAudioPath,
960
+ start: 0,
961
+ end: combinedDuration,
962
+ })
963
+ const jarvisTranscription = await transcribeAudio(
964
+ jarvisTranscriptionAudioPath,
965
+ {
966
+ modelPath: options.whisperModelPath,
967
+ language: options.whisperLanguage,
968
+ binaryPath: options.whisperBinaryPath,
969
+ outputBasePath: buildJarvisOutputBase(tmpDir, outputBasePath),
970
+ },
971
+ )
972
+ const jarvisSegments =
973
+ jarvisTranscription.segmentsSource === 'tokens'
974
+ ? jarvisTranscription.segments
975
+ : scaleTranscriptSegments(jarvisTranscription.segments, combinedDuration)
976
+ const jarvisWordTimings = findWordTimings(jarvisSegments, 'jarvis')
977
+ if (transcriptIncludesWord(jarvisTranscription.text, 'jarvis')) {
978
+ jarvisWarning = {
979
+ chapter: previousProcessedChapter.chapter,
980
+ outputPath: finalOutputPath,
981
+ timestamps: jarvisWordTimings,
982
+ }
983
+ logWarn(
984
+ `Jarvis detected in combined chapter: ${path.basename(finalOutputPath)}`,
985
+ )
986
+ }
987
+
988
+ if (!options.keepIntermediates) {
989
+ await safeUnlink(jarvisTranscriptionAudioPath)
990
+ }
991
+
992
+ // Step 11: Create edit workspace for combined output
993
+ let editWorkspace: EditWorkspaceInfo | undefined
994
+ if (EDIT_CONFIG.autoCreateEditsDirectory) {
995
+ const workspace = await createEditWorkspace({
996
+ outputDir: options.outputDir,
997
+ sourceVideoPath: finalOutputPath,
998
+ sourceDuration: combinedDuration,
999
+ segments: jarvisSegments,
1000
+ })
1001
+ editWorkspace = {
1002
+ chapter: previousProcessedChapter.chapter,
1003
+ outputPath: finalOutputPath,
1004
+ reason: 'combine-previous',
1005
+ editsDirectory: workspace.editsDirectory,
1006
+ transcriptTextPath: workspace.transcriptTextPath,
1007
+ transcriptJsonPath: workspace.transcriptJsonPath,
1008
+ originalVideoPath: workspace.originalVideoPath,
1009
+ instructionsPath: workspace.instructionsPath,
1010
+ }
1011
+ }
1012
+
1013
+ // Step 12: Track note commands from current chapter
1014
+ const jarvisNotes: JarvisNote[] = commandNotes.map((note) => ({
1015
+ chapter: previousProcessedChapter.chapter,
1016
+ outputPath: finalOutputPath,
1017
+ note: note.value,
1018
+ timestamp: note.window.start,
1019
+ }))
1020
+
1021
+ // Return combined chapter info (using previous chapter's info but with updated duration)
1022
+ const processedInfo: ProcessedChapterInfo = {
1023
+ chapter: previousProcessedChapter.chapter,
1024
+ outputPath: finalOutputPath,
1025
+ processedPath: finalOutputPath,
1026
+ processedDuration: combinedDuration,
1027
+ }
1028
+
1029
+ return {
1030
+ status: 'processed',
1031
+ jarvisWarning,
1032
+ jarvisNotes: jarvisNotes.length > 0 ? jarvisNotes : undefined,
1033
+ logWritten: false,
1034
+ processedInfo,
1035
+ editWorkspace,
1036
+ }
1037
+ }