eprec 0.0.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +52 -29
- package/cli.ts +150 -0
- package/package.json +39 -7
- package/process-course/chapter-processor.ts +1037 -0
- package/process-course/cli.ts +236 -0
- package/process-course/config.ts +50 -0
- package/process-course/edits/cli.ts +167 -0
- package/process-course/edits/combined-video-editor.ts +316 -0
- package/process-course/edits/edit-workspace.ts +90 -0
- package/process-course/edits/index.ts +20 -0
- package/process-course/edits/regenerate-transcript.ts +84 -0
- package/process-course/edits/remove-ranges.test.ts +36 -0
- package/process-course/edits/remove-ranges.ts +287 -0
- package/process-course/edits/timestamp-refinement.test.ts +25 -0
- package/process-course/edits/timestamp-refinement.ts +172 -0
- package/process-course/edits/transcript-diff.test.ts +105 -0
- package/process-course/edits/transcript-diff.ts +214 -0
- package/process-course/edits/transcript-output.test.ts +50 -0
- package/process-course/edits/transcript-output.ts +36 -0
- package/process-course/edits/types.ts +26 -0
- package/process-course/edits/video-editor.ts +246 -0
- package/process-course/errors.test.ts +63 -0
- package/process-course/errors.ts +82 -0
- package/process-course/ffmpeg.ts +449 -0
- package/process-course/jarvis-commands/handlers.ts +71 -0
- package/process-course/jarvis-commands/index.ts +14 -0
- package/process-course/jarvis-commands/parser.test.ts +348 -0
- package/process-course/jarvis-commands/parser.ts +257 -0
- package/process-course/jarvis-commands/types.ts +46 -0
- package/process-course/jarvis-commands/windows.ts +254 -0
- package/process-course/logging.ts +24 -0
- package/process-course/paths.test.ts +59 -0
- package/process-course/paths.ts +53 -0
- package/process-course/summary.test.ts +209 -0
- package/process-course/summary.ts +210 -0
- package/process-course/types.ts +85 -0
- package/process-course/utils/audio-analysis.test.ts +348 -0
- package/process-course/utils/audio-analysis.ts +463 -0
- package/process-course/utils/chapter-selection.test.ts +307 -0
- package/process-course/utils/chapter-selection.ts +136 -0
- package/process-course/utils/file-utils.test.ts +83 -0
- package/process-course/utils/file-utils.ts +57 -0
- package/process-course/utils/filename.test.ts +27 -0
- package/process-course/utils/filename.ts +12 -0
- package/process-course/utils/time-ranges.test.ts +221 -0
- package/process-course/utils/time-ranges.ts +86 -0
- package/process-course/utils/transcript.test.ts +257 -0
- package/process-course/utils/transcript.ts +86 -0
- package/process-course/utils/video-editing.ts +44 -0
- package/process-course-video.ts +389 -0
- package/speech-detection.ts +355 -0
- package/utils.ts +138 -0
- package/whispercpp-transcribe.ts +345 -0
|
@@ -0,0 +1,1037 @@
|
|
|
1
|
+
import path from 'node:path'
|
|
2
|
+
import { detectSpeechBounds, checkSegmentHasSpeech } from '../speech-detection'
|
|
3
|
+
import { transcribeAudio } from '../whispercpp-transcribe'
|
|
4
|
+
import { clamp, formatSeconds } from '../utils'
|
|
5
|
+
import {
|
|
6
|
+
COMMAND_CLOSE_WORD,
|
|
7
|
+
COMMAND_WAKE_WORD,
|
|
8
|
+
CONFIG,
|
|
9
|
+
EDIT_CONFIG,
|
|
10
|
+
} from './config'
|
|
11
|
+
import {
|
|
12
|
+
analyzeLoudness,
|
|
13
|
+
concatSegments,
|
|
14
|
+
extractChapterSegment,
|
|
15
|
+
extractChapterSegmentAccurate,
|
|
16
|
+
extractTranscriptionAudio,
|
|
17
|
+
renderChapter,
|
|
18
|
+
} from './ffmpeg'
|
|
19
|
+
import {
|
|
20
|
+
buildIntermediateAudioPath,
|
|
21
|
+
buildIntermediatePath,
|
|
22
|
+
buildJarvisOutputBase,
|
|
23
|
+
buildTranscriptionOutputBase,
|
|
24
|
+
} from './paths'
|
|
25
|
+
import { logInfo, logWarn, writeChapterLog } from './logging'
|
|
26
|
+
import { mergeTimeRanges, buildKeepRanges } from './utils/time-ranges'
|
|
27
|
+
import {
|
|
28
|
+
findSpeechEndWithRmsFallback,
|
|
29
|
+
findSpeechStartWithRmsFallback,
|
|
30
|
+
} from './utils/audio-analysis'
|
|
31
|
+
import { safeUnlink } from './utils/file-utils'
|
|
32
|
+
import { formatChapterFilename } from './utils/filename'
|
|
33
|
+
import { findWordTimings, transcriptIncludesWord } from './utils/transcript'
|
|
34
|
+
import { allocateJoinPadding } from './utils/video-editing'
|
|
35
|
+
import {
|
|
36
|
+
extractTranscriptCommands,
|
|
37
|
+
scaleTranscriptSegments,
|
|
38
|
+
buildCommandWindows,
|
|
39
|
+
refineCommandWindows,
|
|
40
|
+
analyzeCommands,
|
|
41
|
+
formatCommandTypes,
|
|
42
|
+
} from './jarvis-commands'
|
|
43
|
+
import type {
|
|
44
|
+
Chapter,
|
|
45
|
+
TimeRange,
|
|
46
|
+
JarvisWarning,
|
|
47
|
+
JarvisEdit,
|
|
48
|
+
JarvisNote,
|
|
49
|
+
ProcessedChapterInfo,
|
|
50
|
+
EditWorkspaceInfo,
|
|
51
|
+
} from './types'
|
|
52
|
+
import { createEditWorkspace } from './edits'
|
|
53
|
+
|
|
54
|
+
export interface ChapterProcessingOptions {
|
|
55
|
+
inputPath: string
|
|
56
|
+
outputDir: string
|
|
57
|
+
tmpDir: string
|
|
58
|
+
minChapterDurationSeconds: number
|
|
59
|
+
enableTranscription: boolean
|
|
60
|
+
whisperModelPath: string
|
|
61
|
+
whisperLanguage: string
|
|
62
|
+
whisperBinaryPath: string | undefined
|
|
63
|
+
keepIntermediates: boolean
|
|
64
|
+
writeLogs: boolean
|
|
65
|
+
dryRun: boolean
|
|
66
|
+
previousProcessedChapter?: ProcessedChapterInfo | null
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export interface ChapterProcessingResult {
|
|
70
|
+
status: 'processed' | 'skipped'
|
|
71
|
+
skipReason?:
|
|
72
|
+
| 'short-initial'
|
|
73
|
+
| 'short-trimmed'
|
|
74
|
+
| 'transcript'
|
|
75
|
+
| 'bad-take'
|
|
76
|
+
| 'dry-run'
|
|
77
|
+
jarvisWarning?: JarvisWarning
|
|
78
|
+
jarvisEdit?: JarvisEdit
|
|
79
|
+
jarvisNotes?: JarvisNote[]
|
|
80
|
+
fallbackNote?: string
|
|
81
|
+
logWritten: boolean
|
|
82
|
+
processedInfo?: ProcessedChapterInfo
|
|
83
|
+
editWorkspace?: EditWorkspaceInfo
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Process a single chapter: extract, normalize, detect commands, splice, trim, and output.
|
|
88
|
+
*/
|
|
89
|
+
export async function processChapter(
|
|
90
|
+
chapter: Chapter,
|
|
91
|
+
options: ChapterProcessingOptions,
|
|
92
|
+
): Promise<ChapterProcessingResult> {
|
|
93
|
+
const duration = chapter.end - chapter.start
|
|
94
|
+
if (duration <= 0) {
|
|
95
|
+
throw new Error(
|
|
96
|
+
`Invalid chapter duration for "${chapter.title}" (${duration}s)`,
|
|
97
|
+
)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
const outputBasePath = path.join(
|
|
101
|
+
options.outputDir,
|
|
102
|
+
`${formatChapterFilename(chapter)}${path.extname(options.inputPath)}`,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
// Check minimum duration before processing
|
|
106
|
+
if (duration < options.minChapterDurationSeconds) {
|
|
107
|
+
logInfo(
|
|
108
|
+
`Skipping chapter ${chapter.index + 1}: ${chapter.title} (${formatSeconds(duration)})`,
|
|
109
|
+
)
|
|
110
|
+
let logWritten = false
|
|
111
|
+
if (options.writeLogs && !options.dryRun) {
|
|
112
|
+
await writeChapterLog(options.tmpDir, outputBasePath, [
|
|
113
|
+
`Chapter: ${chapter.index + 1} - ${chapter.title}`,
|
|
114
|
+
`Input: ${options.inputPath}`,
|
|
115
|
+
`Duration: ${formatSeconds(duration)}`,
|
|
116
|
+
`Skip threshold: ${formatSeconds(options.minChapterDurationSeconds)}`,
|
|
117
|
+
'Reason: Chapter shorter than minimum duration threshold.',
|
|
118
|
+
])
|
|
119
|
+
logWritten = true
|
|
120
|
+
}
|
|
121
|
+
return { status: 'skipped', skipReason: 'short-initial', logWritten }
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Dry run - don't actually process
|
|
125
|
+
if (options.dryRun) {
|
|
126
|
+
logInfo(
|
|
127
|
+
`[dry-run] Would process chapter ${chapter.index + 1}: ${chapter.title}`,
|
|
128
|
+
)
|
|
129
|
+
return { status: 'processed', skipReason: 'dry-run', logWritten: false }
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
logInfo(`Processing chapter ${chapter.index + 1}: ${chapter.title}`)
|
|
133
|
+
|
|
134
|
+
// Build all intermediate paths
|
|
135
|
+
const paths = buildIntermediatePaths(options.tmpDir, outputBasePath)
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
// Step 1: Extract raw segment with padding trimmed
|
|
139
|
+
const rawTrimStart = chapter.start + CONFIG.rawTrimPaddingSeconds
|
|
140
|
+
const rawTrimEnd = chapter.end - CONFIG.rawTrimPaddingSeconds
|
|
141
|
+
const rawDuration = rawTrimEnd - rawTrimStart
|
|
142
|
+
if (rawDuration <= 0) {
|
|
143
|
+
throw new Error(
|
|
144
|
+
`Chapter too short to trim ${CONFIG.rawTrimPaddingSeconds}s from both ends (${formatSeconds(duration)}).`,
|
|
145
|
+
)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
await extractChapterSegment({
|
|
149
|
+
inputPath: options.inputPath,
|
|
150
|
+
outputPath: paths.rawPath,
|
|
151
|
+
start: rawTrimStart,
|
|
152
|
+
end: rawTrimEnd,
|
|
153
|
+
})
|
|
154
|
+
|
|
155
|
+
// Step 2: Normalize audio
|
|
156
|
+
const analysis = await analyzeLoudness(paths.rawPath, 0, rawDuration)
|
|
157
|
+
await renderChapter({
|
|
158
|
+
inputPath: paths.rawPath,
|
|
159
|
+
outputPath: paths.normalizedPath,
|
|
160
|
+
absoluteStart: 0,
|
|
161
|
+
absoluteEnd: rawDuration,
|
|
162
|
+
analysis,
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
// Step 3: Transcribe and analyze commands
|
|
166
|
+
let commandWindows: TimeRange[] = []
|
|
167
|
+
let commandFilenameOverride: string | null = null
|
|
168
|
+
let hasEditCommand = false
|
|
169
|
+
let commandNotes: Array<{ value: string; window: TimeRange }> = []
|
|
170
|
+
|
|
171
|
+
if (options.enableTranscription) {
|
|
172
|
+
const transcriptionResult = await transcribeAndAnalyze({
|
|
173
|
+
normalizedPath: paths.normalizedPath,
|
|
174
|
+
transcriptionAudioPath: paths.transcriptionAudioPath,
|
|
175
|
+
transcriptionOutputBase: paths.transcriptionOutputBase,
|
|
176
|
+
rawDuration,
|
|
177
|
+
options,
|
|
178
|
+
})
|
|
179
|
+
|
|
180
|
+
if (transcriptionResult.shouldSkip) {
|
|
181
|
+
let logWritten = false
|
|
182
|
+
if (options.writeLogs) {
|
|
183
|
+
await writeChapterLog(options.tmpDir, outputBasePath, [
|
|
184
|
+
`Chapter: ${chapter.index + 1} - ${chapter.title}`,
|
|
185
|
+
`Input: ${options.inputPath}`,
|
|
186
|
+
`Duration: ${formatSeconds(duration)}`,
|
|
187
|
+
`Reason: ${transcriptionResult.skipReason}`,
|
|
188
|
+
])
|
|
189
|
+
logWritten = true
|
|
190
|
+
}
|
|
191
|
+
await safeUnlink(outputBasePath)
|
|
192
|
+
return {
|
|
193
|
+
status: 'skipped',
|
|
194
|
+
skipReason: transcriptionResult.hasBadTake
|
|
195
|
+
? 'bad-take'
|
|
196
|
+
: 'transcript',
|
|
197
|
+
logWritten,
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
commandWindows = transcriptionResult.commandWindows
|
|
202
|
+
commandFilenameOverride = transcriptionResult.filenameOverride
|
|
203
|
+
hasEditCommand = transcriptionResult.hasEdit
|
|
204
|
+
commandNotes = transcriptionResult.notes
|
|
205
|
+
|
|
206
|
+
// Handle combine-previous command
|
|
207
|
+
if (transcriptionResult.hasCombinePrevious) {
|
|
208
|
+
if (!options.previousProcessedChapter) {
|
|
209
|
+
logWarn(
|
|
210
|
+
`Combine previous command detected for chapter ${chapter.index + 1}, but no previous chapter available. Processing normally.`,
|
|
211
|
+
)
|
|
212
|
+
} else {
|
|
213
|
+
const combineResult = await handleCombinePrevious({
|
|
214
|
+
chapter,
|
|
215
|
+
previousProcessedChapter: options.previousProcessedChapter,
|
|
216
|
+
commandWindows,
|
|
217
|
+
commandNotes,
|
|
218
|
+
normalizedPath: paths.normalizedPath,
|
|
219
|
+
rawDuration,
|
|
220
|
+
tmpDir: options.tmpDir,
|
|
221
|
+
outputBasePath,
|
|
222
|
+
paths,
|
|
223
|
+
options,
|
|
224
|
+
})
|
|
225
|
+
// If combine failed (returned null), continue with normal processing
|
|
226
|
+
if (combineResult !== null) {
|
|
227
|
+
return combineResult
|
|
228
|
+
}
|
|
229
|
+
// Otherwise, fall through to normal processing
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// Step 4: Determine final output path
|
|
235
|
+
const outputTitle = commandFilenameOverride ?? chapter.title
|
|
236
|
+
const finalOutputPath = path.join(
|
|
237
|
+
options.outputDir,
|
|
238
|
+
`${formatChapterFilename({ ...chapter, title: outputTitle })}${path.extname(options.inputPath)}`,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
// Step 5: Handle command splicing
|
|
242
|
+
const spliceResult = await handleCommandSplicing({
|
|
243
|
+
commandWindows,
|
|
244
|
+
normalizedPath: paths.normalizedPath,
|
|
245
|
+
rawDuration,
|
|
246
|
+
tmpDir: options.tmpDir,
|
|
247
|
+
outputBasePath,
|
|
248
|
+
paths,
|
|
249
|
+
})
|
|
250
|
+
|
|
251
|
+
// Step 6: Detect speech bounds
|
|
252
|
+
const speechBounds = await detectSpeechBounds(
|
|
253
|
+
spliceResult.sourcePath,
|
|
254
|
+
0,
|
|
255
|
+
spliceResult.sourceDuration,
|
|
256
|
+
spliceResult.sourceDuration,
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
let fallbackNote: string | undefined
|
|
260
|
+
let logWritten = false
|
|
261
|
+
if (speechBounds.note) {
|
|
262
|
+
fallbackNote = speechBounds.note
|
|
263
|
+
logInfo(`Speech detection fallback: ${speechBounds.note}`)
|
|
264
|
+
if (options.writeLogs) {
|
|
265
|
+
await writeChapterLog(options.tmpDir, outputBasePath, [
|
|
266
|
+
`Chapter: ${chapter.index + 1} - ${chapter.title}`,
|
|
267
|
+
`Input: ${options.inputPath}`,
|
|
268
|
+
`Reason: ${speechBounds.note}`,
|
|
269
|
+
])
|
|
270
|
+
logWritten = true
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Step 7: Apply speech padding
|
|
275
|
+
const paddedStart = clamp(
|
|
276
|
+
speechBounds.start - CONFIG.preSpeechPaddingSeconds,
|
|
277
|
+
0,
|
|
278
|
+
spliceResult.sourceDuration,
|
|
279
|
+
)
|
|
280
|
+
const paddedEnd = clamp(
|
|
281
|
+
speechBounds.end + CONFIG.postSpeechPaddingSeconds,
|
|
282
|
+
0,
|
|
283
|
+
spliceResult.sourceDuration,
|
|
284
|
+
)
|
|
285
|
+
const trimmedDuration = paddedEnd - paddedStart
|
|
286
|
+
|
|
287
|
+
if (paddedEnd <= paddedStart + CONFIG.minTrimWindowSeconds) {
|
|
288
|
+
throw new Error(
|
|
289
|
+
`Trim window too small for "${chapter.title}" (${paddedStart}s -> ${paddedEnd}s)`,
|
|
290
|
+
)
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
logInfo(
|
|
294
|
+
`Speech bounds: ${formatSeconds(speechBounds.start)} -> ${formatSeconds(speechBounds.end)}, padded to ${formatSeconds(paddedStart)} -> ${formatSeconds(paddedEnd)}`,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
// Step 8: Check trimmed duration
|
|
298
|
+
if (trimmedDuration < options.minChapterDurationSeconds) {
|
|
299
|
+
logInfo(
|
|
300
|
+
`Skipping chapter ${chapter.index + 1}: trimmed ${formatSeconds(trimmedDuration)} < ${formatSeconds(options.minChapterDurationSeconds)}.`,
|
|
301
|
+
)
|
|
302
|
+
if (options.writeLogs) {
|
|
303
|
+
await writeChapterLog(options.tmpDir, outputBasePath, [
|
|
304
|
+
`Chapter: ${chapter.index + 1} - ${chapter.title}`,
|
|
305
|
+
`Input: ${options.inputPath}`,
|
|
306
|
+
`Duration: ${formatSeconds(duration)}`,
|
|
307
|
+
`Trimmed duration: ${formatSeconds(trimmedDuration)}`,
|
|
308
|
+
`Skip threshold: ${formatSeconds(options.minChapterDurationSeconds)}`,
|
|
309
|
+
'Reason: Trimmed duration shorter than minimum duration threshold.',
|
|
310
|
+
])
|
|
311
|
+
logWritten = true
|
|
312
|
+
}
|
|
313
|
+
await safeUnlink(outputBasePath)
|
|
314
|
+
return { status: 'skipped', skipReason: 'short-trimmed', logWritten }
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// Step 9: Write final output
|
|
318
|
+
await extractChapterSegment({
|
|
319
|
+
inputPath: spliceResult.sourcePath,
|
|
320
|
+
outputPath: finalOutputPath,
|
|
321
|
+
start: paddedStart,
|
|
322
|
+
end: paddedEnd,
|
|
323
|
+
})
|
|
324
|
+
|
|
325
|
+
// Step 10: Verify no jarvis in final output
|
|
326
|
+
let jarvisWarning: JarvisWarning | undefined
|
|
327
|
+
await extractTranscriptionAudio({
|
|
328
|
+
inputPath: finalOutputPath,
|
|
329
|
+
outputPath: paths.jarvisTranscriptionAudioPath,
|
|
330
|
+
start: 0,
|
|
331
|
+
end: trimmedDuration,
|
|
332
|
+
})
|
|
333
|
+
const jarvisTranscription = await transcribeAudio(
|
|
334
|
+
paths.jarvisTranscriptionAudioPath,
|
|
335
|
+
{
|
|
336
|
+
modelPath: options.whisperModelPath,
|
|
337
|
+
language: options.whisperLanguage,
|
|
338
|
+
binaryPath: options.whisperBinaryPath,
|
|
339
|
+
outputBasePath: paths.jarvisTranscriptionOutputBase,
|
|
340
|
+
},
|
|
341
|
+
)
|
|
342
|
+
const jarvisSegments =
|
|
343
|
+
jarvisTranscription.segmentsSource === 'tokens'
|
|
344
|
+
? jarvisTranscription.segments
|
|
345
|
+
: scaleTranscriptSegments(jarvisTranscription.segments, trimmedDuration)
|
|
346
|
+
const jarvisWordTimings = findWordTimings(jarvisSegments, 'jarvis')
|
|
347
|
+
if (transcriptIncludesWord(jarvisTranscription.text, 'jarvis')) {
|
|
348
|
+
jarvisWarning = {
|
|
349
|
+
chapter,
|
|
350
|
+
outputPath: finalOutputPath,
|
|
351
|
+
timestamps: jarvisWordTimings,
|
|
352
|
+
}
|
|
353
|
+
logWarn(
|
|
354
|
+
`Jarvis detected in chapter ${chapter.index + 1}: ${path.basename(finalOutputPath)}`,
|
|
355
|
+
)
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// Step 11: Track edit commands
|
|
359
|
+
let jarvisEdit: JarvisEdit | undefined
|
|
360
|
+
if (hasEditCommand) {
|
|
361
|
+
jarvisEdit = { chapter, outputPath: finalOutputPath }
|
|
362
|
+
logInfo(
|
|
363
|
+
`Edit command detected for chapter ${chapter.index + 1}: ${path.basename(finalOutputPath)}`,
|
|
364
|
+
)
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
// Step 12: Track note commands
|
|
368
|
+
const jarvisNotes: JarvisNote[] = commandNotes.map((note) => ({
|
|
369
|
+
chapter,
|
|
370
|
+
outputPath: finalOutputPath,
|
|
371
|
+
note: note.value,
|
|
372
|
+
timestamp: note.window.start,
|
|
373
|
+
}))
|
|
374
|
+
if (jarvisNotes.length > 0) {
|
|
375
|
+
logInfo(
|
|
376
|
+
`Note command${jarvisNotes.length > 1 ? 's' : ''} detected for chapter ${chapter.index + 1}: ${jarvisNotes.map((n) => n.note).join(', ')}`,
|
|
377
|
+
)
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// Step 13: Create edit workspace when needed
|
|
381
|
+
let editWorkspace: EditWorkspaceInfo | undefined
|
|
382
|
+
if (
|
|
383
|
+
EDIT_CONFIG.autoCreateEditsDirectory &&
|
|
384
|
+
(hasEditCommand || jarvisWarning)
|
|
385
|
+
) {
|
|
386
|
+
const reason = hasEditCommand ? 'edit-command' : 'jarvis-warning'
|
|
387
|
+
const workspace = await createEditWorkspace({
|
|
388
|
+
outputDir: options.outputDir,
|
|
389
|
+
sourceVideoPath: finalOutputPath,
|
|
390
|
+
sourceDuration: trimmedDuration,
|
|
391
|
+
segments: jarvisSegments,
|
|
392
|
+
})
|
|
393
|
+
editWorkspace = {
|
|
394
|
+
chapter,
|
|
395
|
+
outputPath: finalOutputPath,
|
|
396
|
+
reason,
|
|
397
|
+
editsDirectory: workspace.editsDirectory,
|
|
398
|
+
transcriptTextPath: workspace.transcriptTextPath,
|
|
399
|
+
transcriptJsonPath: workspace.transcriptJsonPath,
|
|
400
|
+
originalVideoPath: workspace.originalVideoPath,
|
|
401
|
+
instructionsPath: workspace.instructionsPath,
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
const processedInfo: ProcessedChapterInfo = {
|
|
406
|
+
chapter,
|
|
407
|
+
outputPath: finalOutputPath,
|
|
408
|
+
processedPath: finalOutputPath, // Use output path as processed path (intermediates may be cleaned up)
|
|
409
|
+
processedDuration: trimmedDuration,
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return {
|
|
413
|
+
status: 'processed',
|
|
414
|
+
jarvisWarning,
|
|
415
|
+
jarvisEdit,
|
|
416
|
+
jarvisNotes: jarvisNotes.length > 0 ? jarvisNotes : undefined,
|
|
417
|
+
fallbackNote,
|
|
418
|
+
logWritten,
|
|
419
|
+
processedInfo,
|
|
420
|
+
editWorkspace,
|
|
421
|
+
}
|
|
422
|
+
} finally {
|
|
423
|
+
// Cleanup intermediate files
|
|
424
|
+
if (!options.keepIntermediates) {
|
|
425
|
+
await cleanupIntermediateFiles(paths)
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
interface IntermediatePaths {
|
|
431
|
+
rawPath: string
|
|
432
|
+
normalizedPath: string
|
|
433
|
+
transcriptionAudioPath: string
|
|
434
|
+
transcriptionOutputBase: string
|
|
435
|
+
transcriptionTextPath: string
|
|
436
|
+
transcriptionJsonPath: string
|
|
437
|
+
jarvisTranscriptionAudioPath: string
|
|
438
|
+
jarvisTranscriptionOutputBase: string
|
|
439
|
+
jarvisTranscriptionTextPath: string
|
|
440
|
+
jarvisTranscriptionJsonPath: string
|
|
441
|
+
spliceSegmentPaths: string[]
|
|
442
|
+
splicedPath: string | null
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
function buildIntermediatePaths(
|
|
446
|
+
tmpDir: string,
|
|
447
|
+
outputBasePath: string,
|
|
448
|
+
): IntermediatePaths {
|
|
449
|
+
const transcriptionOutputBase = buildTranscriptionOutputBase(
|
|
450
|
+
tmpDir,
|
|
451
|
+
outputBasePath,
|
|
452
|
+
)
|
|
453
|
+
const jarvisTranscriptionOutputBase = buildJarvisOutputBase(
|
|
454
|
+
tmpDir,
|
|
455
|
+
outputBasePath,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
return {
|
|
459
|
+
rawPath: buildIntermediatePath(tmpDir, outputBasePath, 'raw'),
|
|
460
|
+
normalizedPath: buildIntermediatePath(tmpDir, outputBasePath, 'normalized'),
|
|
461
|
+
transcriptionAudioPath: buildIntermediateAudioPath(
|
|
462
|
+
tmpDir,
|
|
463
|
+
outputBasePath,
|
|
464
|
+
'transcribe',
|
|
465
|
+
),
|
|
466
|
+
transcriptionOutputBase,
|
|
467
|
+
transcriptionTextPath: `${transcriptionOutputBase}.txt`,
|
|
468
|
+
transcriptionJsonPath: `${transcriptionOutputBase}.json`,
|
|
469
|
+
jarvisTranscriptionAudioPath: buildIntermediateAudioPath(
|
|
470
|
+
tmpDir,
|
|
471
|
+
outputBasePath,
|
|
472
|
+
'jarvis',
|
|
473
|
+
),
|
|
474
|
+
jarvisTranscriptionOutputBase,
|
|
475
|
+
jarvisTranscriptionTextPath: `${jarvisTranscriptionOutputBase}.txt`,
|
|
476
|
+
jarvisTranscriptionJsonPath: `${jarvisTranscriptionOutputBase}.json`,
|
|
477
|
+
spliceSegmentPaths: [],
|
|
478
|
+
splicedPath: null,
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
async function cleanupIntermediateFiles(paths: IntermediatePaths) {
|
|
483
|
+
await safeUnlink(paths.rawPath)
|
|
484
|
+
await safeUnlink(paths.normalizedPath)
|
|
485
|
+
await safeUnlink(paths.transcriptionAudioPath)
|
|
486
|
+
await safeUnlink(paths.transcriptionTextPath)
|
|
487
|
+
await safeUnlink(paths.transcriptionJsonPath)
|
|
488
|
+
await safeUnlink(paths.jarvisTranscriptionAudioPath)
|
|
489
|
+
await safeUnlink(paths.jarvisTranscriptionTextPath)
|
|
490
|
+
await safeUnlink(paths.jarvisTranscriptionJsonPath)
|
|
491
|
+
if (paths.splicedPath) {
|
|
492
|
+
await safeUnlink(paths.splicedPath)
|
|
493
|
+
}
|
|
494
|
+
for (const segmentPath of paths.spliceSegmentPaths) {
|
|
495
|
+
await safeUnlink(segmentPath)
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
interface TranscriptionAnalysisResult {
|
|
500
|
+
commandWindows: TimeRange[]
|
|
501
|
+
filenameOverride: string | null
|
|
502
|
+
hasEdit: boolean
|
|
503
|
+
hasBadTake: boolean
|
|
504
|
+
hasCombinePrevious: boolean
|
|
505
|
+
notes: Array<{ value: string; window: TimeRange }>
|
|
506
|
+
shouldSkip: boolean
|
|
507
|
+
skipReason?: string
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
async function transcribeAndAnalyze(params: {
|
|
511
|
+
normalizedPath: string
|
|
512
|
+
transcriptionAudioPath: string
|
|
513
|
+
transcriptionOutputBase: string
|
|
514
|
+
rawDuration: number
|
|
515
|
+
options: ChapterProcessingOptions
|
|
516
|
+
}): Promise<TranscriptionAnalysisResult> {
|
|
517
|
+
await extractTranscriptionAudio({
|
|
518
|
+
inputPath: params.normalizedPath,
|
|
519
|
+
outputPath: params.transcriptionAudioPath,
|
|
520
|
+
start: 0,
|
|
521
|
+
end: params.rawDuration,
|
|
522
|
+
})
|
|
523
|
+
|
|
524
|
+
const transcriptionResult = await transcribeAudio(
|
|
525
|
+
params.transcriptionAudioPath,
|
|
526
|
+
{
|
|
527
|
+
modelPath: params.options.whisperModelPath,
|
|
528
|
+
language: params.options.whisperLanguage,
|
|
529
|
+
binaryPath: params.options.whisperBinaryPath,
|
|
530
|
+
outputBasePath: params.transcriptionOutputBase,
|
|
531
|
+
},
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
const transcript = transcriptionResult.text
|
|
535
|
+
const scaledSegments =
|
|
536
|
+
transcriptionResult.segmentsSource === 'tokens'
|
|
537
|
+
? transcriptionResult.segments
|
|
538
|
+
: scaleTranscriptSegments(
|
|
539
|
+
transcriptionResult.segments,
|
|
540
|
+
params.rawDuration,
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
const commands = extractTranscriptCommands(scaledSegments, {
|
|
544
|
+
wakeWord: COMMAND_WAKE_WORD,
|
|
545
|
+
closeWord: COMMAND_CLOSE_WORD,
|
|
546
|
+
})
|
|
547
|
+
|
|
548
|
+
if (commands.length > 0) {
|
|
549
|
+
logInfo(`Commands detected: ${formatCommandTypes(commands)}`)
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
const analysis = analyzeCommands(commands, transcript)
|
|
553
|
+
|
|
554
|
+
if (analysis.filenameOverride) {
|
|
555
|
+
logInfo(`Filename command: ${analysis.filenameOverride}`)
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
if (analysis.shouldSkip) {
|
|
559
|
+
logInfo(`Skipping: ${analysis.skipReason}`)
|
|
560
|
+
return {
|
|
561
|
+
commandWindows: [],
|
|
562
|
+
filenameOverride: analysis.filenameOverride,
|
|
563
|
+
hasEdit: analysis.hasEdit,
|
|
564
|
+
hasBadTake: analysis.hasBadTake,
|
|
565
|
+
hasCombinePrevious: analysis.hasCombinePrevious,
|
|
566
|
+
notes: analysis.notes,
|
|
567
|
+
shouldSkip: true,
|
|
568
|
+
skipReason: analysis.skipReason,
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
let commandWindows = buildCommandWindows(commands, {
|
|
573
|
+
offset: 0,
|
|
574
|
+
min: 0,
|
|
575
|
+
max: params.rawDuration,
|
|
576
|
+
paddingSeconds: CONFIG.commandTrimPaddingSeconds,
|
|
577
|
+
})
|
|
578
|
+
|
|
579
|
+
if (commandWindows.length > 0) {
|
|
580
|
+
commandWindows = await refineCommandWindows({
|
|
581
|
+
commandWindows,
|
|
582
|
+
inputPath: params.normalizedPath,
|
|
583
|
+
duration: params.rawDuration,
|
|
584
|
+
})
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
return {
|
|
588
|
+
commandWindows,
|
|
589
|
+
filenameOverride: analysis.filenameOverride,
|
|
590
|
+
hasEdit: analysis.hasEdit,
|
|
591
|
+
hasBadTake: analysis.hasBadTake,
|
|
592
|
+
hasCombinePrevious: analysis.hasCombinePrevious,
|
|
593
|
+
notes: analysis.notes,
|
|
594
|
+
shouldSkip: false,
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
interface SpliceResult {
|
|
599
|
+
sourcePath: string
|
|
600
|
+
sourceDuration: number
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
async function handleCommandSplicing(params: {
|
|
604
|
+
commandWindows: TimeRange[]
|
|
605
|
+
normalizedPath: string
|
|
606
|
+
rawDuration: number
|
|
607
|
+
tmpDir: string
|
|
608
|
+
outputBasePath: string
|
|
609
|
+
paths: IntermediatePaths
|
|
610
|
+
}): Promise<SpliceResult> {
|
|
611
|
+
let sourcePath = params.normalizedPath
|
|
612
|
+
let sourceDuration = params.rawDuration
|
|
613
|
+
|
|
614
|
+
if (params.commandWindows.length === 0) {
|
|
615
|
+
return { sourcePath, sourceDuration }
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
const mergedCommandWindows = mergeTimeRanges(params.commandWindows)
|
|
619
|
+
const keepRanges = buildKeepRanges(
|
|
620
|
+
0,
|
|
621
|
+
params.rawDuration,
|
|
622
|
+
mergedCommandWindows,
|
|
623
|
+
)
|
|
624
|
+
|
|
625
|
+
if (keepRanges.length === 0) {
|
|
626
|
+
throw new Error('Command windows removed entire chapter.')
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
const isFullRange =
|
|
630
|
+
keepRanges.length === 1 &&
|
|
631
|
+
keepRanges[0] &&
|
|
632
|
+
keepRanges[0].start <= 0.001 &&
|
|
633
|
+
keepRanges[0].end >= params.rawDuration - 0.001
|
|
634
|
+
|
|
635
|
+
if (isFullRange) {
|
|
636
|
+
return { sourcePath, sourceDuration }
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Check if command is at end - just trim instead of splicing
|
|
640
|
+
const isCommandAtEnd =
|
|
641
|
+
keepRanges.length === 1 && keepRanges[0] && keepRanges[0].start <= 0.001
|
|
642
|
+
|
|
643
|
+
if (isCommandAtEnd && keepRanges[0]) {
|
|
644
|
+
sourceDuration = keepRanges[0].end
|
|
645
|
+
logInfo(`Command at end - trimming to ${formatSeconds(sourceDuration)}`)
|
|
646
|
+
return { sourcePath, sourceDuration }
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
// Command mid-video - need to splice
|
|
650
|
+
const splicedPath = buildIntermediatePath(
|
|
651
|
+
params.tmpDir,
|
|
652
|
+
params.outputBasePath,
|
|
653
|
+
'spliced',
|
|
654
|
+
)
|
|
655
|
+
params.paths.splicedPath = splicedPath
|
|
656
|
+
|
|
657
|
+
const segmentsWithSpeech: { path: string; range: TimeRange }[] = []
|
|
658
|
+
|
|
659
|
+
for (const [index, range] of keepRanges.entries()) {
|
|
660
|
+
const segmentPath = buildIntermediatePath(
|
|
661
|
+
params.tmpDir,
|
|
662
|
+
params.outputBasePath,
|
|
663
|
+
`splice-${index + 1}`,
|
|
664
|
+
)
|
|
665
|
+
params.paths.spliceSegmentPaths.push(segmentPath)
|
|
666
|
+
|
|
667
|
+
await extractChapterSegmentAccurate({
|
|
668
|
+
inputPath: params.normalizedPath,
|
|
669
|
+
outputPath: segmentPath,
|
|
670
|
+
start: range.start,
|
|
671
|
+
end: range.end,
|
|
672
|
+
})
|
|
673
|
+
|
|
674
|
+
// Check if segment has speech using VAD
|
|
675
|
+
const segmentDuration = range.end - range.start
|
|
676
|
+
const hasSpeech = await checkSegmentHasSpeech(segmentPath, segmentDuration)
|
|
677
|
+
|
|
678
|
+
if (hasSpeech) {
|
|
679
|
+
segmentsWithSpeech.push({ path: segmentPath, range })
|
|
680
|
+
} else {
|
|
681
|
+
logInfo(
|
|
682
|
+
`Splice segment ${index + 1} has no speech, excluding from combined output`,
|
|
683
|
+
)
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
if (segmentsWithSpeech.length === 0) {
|
|
688
|
+
throw new Error('All splice segments have no speech.')
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
if (segmentsWithSpeech.length === 1 && segmentsWithSpeech[0]) {
|
|
692
|
+
// Only one segment with speech - use it directly without concat
|
|
693
|
+
sourcePath = segmentsWithSpeech[0].path
|
|
694
|
+
sourceDuration =
|
|
695
|
+
segmentsWithSpeech[0].range.end - segmentsWithSpeech[0].range.start
|
|
696
|
+
params.paths.splicedPath = null // Don't delete the segment we're using
|
|
697
|
+
logInfo(
|
|
698
|
+
`Using single segment with speech, duration: ${formatSeconds(sourceDuration)}`,
|
|
699
|
+
)
|
|
700
|
+
} else {
|
|
701
|
+
await concatSegments({
|
|
702
|
+
segmentPaths: segmentsWithSpeech.map((s) => s.path),
|
|
703
|
+
outputPath: splicedPath,
|
|
704
|
+
})
|
|
705
|
+
sourcePath = splicedPath
|
|
706
|
+
sourceDuration = segmentsWithSpeech.reduce(
|
|
707
|
+
(total, s) => total + (s.range.end - s.range.start),
|
|
708
|
+
0,
|
|
709
|
+
)
|
|
710
|
+
logInfo(
|
|
711
|
+
`Spliced ${segmentsWithSpeech.length} segments (of ${keepRanges.length}), combined duration: ${formatSeconds(sourceDuration)}`,
|
|
712
|
+
)
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
return { sourcePath, sourceDuration }
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
async function handleCombinePrevious(params: {
|
|
719
|
+
chapter: Chapter
|
|
720
|
+
previousProcessedChapter: ProcessedChapterInfo
|
|
721
|
+
commandWindows: TimeRange[]
|
|
722
|
+
commandNotes: Array<{ value: string; window: TimeRange }>
|
|
723
|
+
normalizedPath: string
|
|
724
|
+
rawDuration: number
|
|
725
|
+
tmpDir: string
|
|
726
|
+
outputBasePath: string
|
|
727
|
+
paths: IntermediatePaths
|
|
728
|
+
options: ChapterProcessingOptions
|
|
729
|
+
}): Promise<ChapterProcessingResult | null> {
|
|
730
|
+
const {
|
|
731
|
+
chapter,
|
|
732
|
+
previousProcessedChapter,
|
|
733
|
+
commandWindows,
|
|
734
|
+
commandNotes,
|
|
735
|
+
normalizedPath,
|
|
736
|
+
rawDuration,
|
|
737
|
+
tmpDir,
|
|
738
|
+
outputBasePath,
|
|
739
|
+
paths,
|
|
740
|
+
options,
|
|
741
|
+
} = params
|
|
742
|
+
|
|
743
|
+
// Check if previous chapter has speech before attempting to combine
|
|
744
|
+
// If it doesn't, return null to signal caller should try with an earlier chapter
|
|
745
|
+
const previousHasSpeech = await checkSegmentHasSpeech(
|
|
746
|
+
previousProcessedChapter.outputPath,
|
|
747
|
+
previousProcessedChapter.processedDuration,
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
if (!previousHasSpeech) {
|
|
751
|
+
logInfo(
|
|
752
|
+
`Previous chapter ${previousProcessedChapter.chapter.index + 1} has no speech. Cannot combine with chapter ${chapter.index + 1}.`,
|
|
753
|
+
)
|
|
754
|
+
return null
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
logInfo(
|
|
758
|
+
`Combining chapter ${chapter.index + 1} with previous chapter ${previousProcessedChapter.chapter.index + 1}`,
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
// Step 1: Remove combine-previous command window from current chapter
|
|
762
|
+
const spliceResult = await handleCommandSplicing({
|
|
763
|
+
commandWindows,
|
|
764
|
+
normalizedPath,
|
|
765
|
+
rawDuration,
|
|
766
|
+
tmpDir,
|
|
767
|
+
outputBasePath,
|
|
768
|
+
paths,
|
|
769
|
+
})
|
|
770
|
+
|
|
771
|
+
// Step 2: Detect speech bounds on current chapter (after splicing)
|
|
772
|
+
const currentSpeechBounds = await detectSpeechBounds(
|
|
773
|
+
spliceResult.sourcePath,
|
|
774
|
+
0,
|
|
775
|
+
spliceResult.sourceDuration,
|
|
776
|
+
spliceResult.sourceDuration,
|
|
777
|
+
)
|
|
778
|
+
|
|
779
|
+
// Step 3: Trim end of previous chapter's output
|
|
780
|
+
// Load the previous chapter's output and detect speech bounds on the end portion
|
|
781
|
+
const previousOutputDuration = previousProcessedChapter.processedDuration
|
|
782
|
+
const endSearchWindow = Math.min(
|
|
783
|
+
previousOutputDuration * 0.3, // Search last 30% of previous chapter
|
|
784
|
+
EDIT_CONFIG.speechSearchWindowSeconds * 2, // Or up to 2x the silence search window
|
|
785
|
+
)
|
|
786
|
+
const previousEndSearchStart = Math.max(
|
|
787
|
+
0,
|
|
788
|
+
previousOutputDuration - endSearchWindow,
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
// Detect speech bounds on the end portion
|
|
792
|
+
const previousEndSpeechBounds = await detectSpeechBounds(
|
|
793
|
+
previousProcessedChapter.outputPath,
|
|
794
|
+
previousEndSearchStart,
|
|
795
|
+
previousOutputDuration,
|
|
796
|
+
previousOutputDuration,
|
|
797
|
+
)
|
|
798
|
+
|
|
799
|
+
// Convert relative bounds to absolute times (detectSpeechBounds returns bounds relative to chapterStart)
|
|
800
|
+
// However, when VAD fails and uses speechFallback, the returned end value is already absolute (duration)
|
|
801
|
+
const absoluteSpeechEnd = previousEndSpeechBounds.note
|
|
802
|
+
? previousEndSpeechBounds.end // Fallback case: already absolute
|
|
803
|
+
: previousEndSearchStart + previousEndSpeechBounds.end // Normal case: convert relative to absolute
|
|
804
|
+
let effectiveSpeechEnd = absoluteSpeechEnd
|
|
805
|
+
if (
|
|
806
|
+
previousEndSpeechBounds.note ||
|
|
807
|
+
previousOutputDuration - absoluteSpeechEnd < 0.05
|
|
808
|
+
) {
|
|
809
|
+
const rmsSpeechEnd = await findSpeechEndWithRmsFallback({
|
|
810
|
+
inputPath: previousProcessedChapter.outputPath,
|
|
811
|
+
start: previousEndSearchStart,
|
|
812
|
+
duration: previousOutputDuration - previousEndSearchStart,
|
|
813
|
+
})
|
|
814
|
+
if (rmsSpeechEnd !== null) {
|
|
815
|
+
effectiveSpeechEnd = previousEndSearchStart + rmsSpeechEnd
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
const finalPreviousEnd = effectiveSpeechEnd
|
|
820
|
+
|
|
821
|
+
let effectiveSpeechStart = currentSpeechBounds.start
|
|
822
|
+
if (currentSpeechBounds.note || currentSpeechBounds.start <= 0.05) {
|
|
823
|
+
const rmsSpeechStart = await findSpeechStartWithRmsFallback({
|
|
824
|
+
inputPath: spliceResult.sourcePath,
|
|
825
|
+
start: 0,
|
|
826
|
+
duration: spliceResult.sourceDuration,
|
|
827
|
+
})
|
|
828
|
+
if (rmsSpeechStart !== null) {
|
|
829
|
+
effectiveSpeechStart = rmsSpeechStart
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
const finalCurrentStart = effectiveSpeechStart
|
|
833
|
+
|
|
834
|
+
let currentEffectiveSpeechEnd = currentSpeechBounds.end
|
|
835
|
+
if (
|
|
836
|
+
currentSpeechBounds.note ||
|
|
837
|
+
spliceResult.sourceDuration - currentSpeechBounds.end < 0.05
|
|
838
|
+
) {
|
|
839
|
+
const rmsSpeechEnd = await findSpeechEndWithRmsFallback({
|
|
840
|
+
inputPath: spliceResult.sourcePath,
|
|
841
|
+
start: 0,
|
|
842
|
+
duration: spliceResult.sourceDuration,
|
|
843
|
+
})
|
|
844
|
+
if (rmsSpeechEnd !== null) {
|
|
845
|
+
currentEffectiveSpeechEnd = rmsSpeechEnd
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
const finalCurrentEnd = currentEffectiveSpeechEnd
|
|
849
|
+
|
|
850
|
+
// Apply padding (maximize total gap if one side lacks silence)
|
|
851
|
+
const speechPaddingSeconds = EDIT_CONFIG.speechBoundaryPaddingMs / 1000
|
|
852
|
+
const previousAvailableSilence = Math.max(
|
|
853
|
+
0,
|
|
854
|
+
previousOutputDuration - finalPreviousEnd,
|
|
855
|
+
)
|
|
856
|
+
const currentAvailableSilence = Math.max(0, finalCurrentStart)
|
|
857
|
+
const { previousPaddingSeconds, currentPaddingSeconds } = allocateJoinPadding(
|
|
858
|
+
{
|
|
859
|
+
paddingSeconds: speechPaddingSeconds,
|
|
860
|
+
previousAvailableSeconds: previousAvailableSilence,
|
|
861
|
+
currentAvailableSeconds: currentAvailableSilence,
|
|
862
|
+
},
|
|
863
|
+
)
|
|
864
|
+
const previousPaddedEnd = clamp(
|
|
865
|
+
finalPreviousEnd + previousPaddingSeconds,
|
|
866
|
+
0,
|
|
867
|
+
previousOutputDuration,
|
|
868
|
+
)
|
|
869
|
+
const currentPaddedStart = clamp(
|
|
870
|
+
finalCurrentStart - currentPaddingSeconds,
|
|
871
|
+
0,
|
|
872
|
+
spliceResult.sourceDuration,
|
|
873
|
+
)
|
|
874
|
+
const currentPaddedEnd = clamp(
|
|
875
|
+
finalCurrentEnd + speechPaddingSeconds,
|
|
876
|
+
0,
|
|
877
|
+
spliceResult.sourceDuration,
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
logInfo(
|
|
881
|
+
`Previous chapter trim: ${formatSeconds(previousPaddedEnd)} (from ${formatSeconds(previousOutputDuration)})`,
|
|
882
|
+
)
|
|
883
|
+
logInfo(
|
|
884
|
+
`Current chapter trim: ${formatSeconds(currentPaddedStart)} -> ${formatSeconds(currentPaddedEnd)}`,
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
// Step 5: Extract trimmed segments
|
|
888
|
+
const previousTrimmedPath = buildIntermediatePath(
|
|
889
|
+
tmpDir,
|
|
890
|
+
outputBasePath,
|
|
891
|
+
'previous-trimmed',
|
|
892
|
+
)
|
|
893
|
+
await extractChapterSegmentAccurate({
|
|
894
|
+
inputPath: previousProcessedChapter.outputPath,
|
|
895
|
+
outputPath: previousTrimmedPath,
|
|
896
|
+
start: 0,
|
|
897
|
+
end: previousPaddedEnd,
|
|
898
|
+
})
|
|
899
|
+
|
|
900
|
+
const currentTrimmedPath = buildIntermediatePath(
|
|
901
|
+
tmpDir,
|
|
902
|
+
outputBasePath,
|
|
903
|
+
'current-trimmed',
|
|
904
|
+
)
|
|
905
|
+
if (currentPaddedEnd <= currentPaddedStart + 0.005) {
|
|
906
|
+
throw new Error(
|
|
907
|
+
`Invalid trim bounds for current segment: start (${currentPaddedStart.toFixed(3)}s) >= end (${currentPaddedEnd.toFixed(3)}s)`,
|
|
908
|
+
)
|
|
909
|
+
}
|
|
910
|
+
await extractChapterSegmentAccurate({
|
|
911
|
+
inputPath: spliceResult.sourcePath,
|
|
912
|
+
outputPath: currentTrimmedPath,
|
|
913
|
+
start: currentPaddedStart,
|
|
914
|
+
end: currentPaddedEnd,
|
|
915
|
+
})
|
|
916
|
+
|
|
917
|
+
// Step 6: Check if current segment has speech
|
|
918
|
+
// Note: We already verified previous chapter has speech at the start of this function
|
|
919
|
+
const previousDuration = previousPaddedEnd
|
|
920
|
+
const currentDuration = currentPaddedEnd - currentPaddedStart
|
|
921
|
+
const currentHasSpeech = await checkSegmentHasSpeech(
|
|
922
|
+
currentTrimmedPath,
|
|
923
|
+
currentDuration,
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
if (!currentHasSpeech) {
|
|
927
|
+
throw new Error(`Cannot combine: current segment has no speech.`)
|
|
928
|
+
}
|
|
929
|
+
|
|
930
|
+
// Step 7: Delete old previous chapter output and concatenate segments to final path
|
|
931
|
+
const finalOutputPath = previousProcessedChapter.outputPath
|
|
932
|
+
await safeUnlink(finalOutputPath)
|
|
933
|
+
|
|
934
|
+
const combinedDuration = previousDuration + currentDuration
|
|
935
|
+
await concatSegments({
|
|
936
|
+
segmentPaths: [previousTrimmedPath, currentTrimmedPath],
|
|
937
|
+
outputPath: finalOutputPath,
|
|
938
|
+
})
|
|
939
|
+
|
|
940
|
+
logInfo(
|
|
941
|
+
`Combined output written to ${path.basename(finalOutputPath)} (${formatSeconds(combinedDuration)})`,
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
// Step 9: Cleanup intermediate files
|
|
945
|
+
if (!options.keepIntermediates) {
|
|
946
|
+
await safeUnlink(previousTrimmedPath)
|
|
947
|
+
await safeUnlink(currentTrimmedPath)
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
// Step 10: Verify no jarvis in final output
|
|
951
|
+
let jarvisWarning: JarvisWarning | undefined
|
|
952
|
+
const jarvisTranscriptionAudioPath = buildIntermediateAudioPath(
|
|
953
|
+
tmpDir,
|
|
954
|
+
outputBasePath,
|
|
955
|
+
'jarvis-combined',
|
|
956
|
+
)
|
|
957
|
+
await extractTranscriptionAudio({
|
|
958
|
+
inputPath: finalOutputPath,
|
|
959
|
+
outputPath: jarvisTranscriptionAudioPath,
|
|
960
|
+
start: 0,
|
|
961
|
+
end: combinedDuration,
|
|
962
|
+
})
|
|
963
|
+
const jarvisTranscription = await transcribeAudio(
|
|
964
|
+
jarvisTranscriptionAudioPath,
|
|
965
|
+
{
|
|
966
|
+
modelPath: options.whisperModelPath,
|
|
967
|
+
language: options.whisperLanguage,
|
|
968
|
+
binaryPath: options.whisperBinaryPath,
|
|
969
|
+
outputBasePath: buildJarvisOutputBase(tmpDir, outputBasePath),
|
|
970
|
+
},
|
|
971
|
+
)
|
|
972
|
+
const jarvisSegments =
|
|
973
|
+
jarvisTranscription.segmentsSource === 'tokens'
|
|
974
|
+
? jarvisTranscription.segments
|
|
975
|
+
: scaleTranscriptSegments(jarvisTranscription.segments, combinedDuration)
|
|
976
|
+
const jarvisWordTimings = findWordTimings(jarvisSegments, 'jarvis')
|
|
977
|
+
if (transcriptIncludesWord(jarvisTranscription.text, 'jarvis')) {
|
|
978
|
+
jarvisWarning = {
|
|
979
|
+
chapter: previousProcessedChapter.chapter,
|
|
980
|
+
outputPath: finalOutputPath,
|
|
981
|
+
timestamps: jarvisWordTimings,
|
|
982
|
+
}
|
|
983
|
+
logWarn(
|
|
984
|
+
`Jarvis detected in combined chapter: ${path.basename(finalOutputPath)}`,
|
|
985
|
+
)
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
if (!options.keepIntermediates) {
|
|
989
|
+
await safeUnlink(jarvisTranscriptionAudioPath)
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
// Step 11: Create edit workspace for combined output
|
|
993
|
+
let editWorkspace: EditWorkspaceInfo | undefined
|
|
994
|
+
if (EDIT_CONFIG.autoCreateEditsDirectory) {
|
|
995
|
+
const workspace = await createEditWorkspace({
|
|
996
|
+
outputDir: options.outputDir,
|
|
997
|
+
sourceVideoPath: finalOutputPath,
|
|
998
|
+
sourceDuration: combinedDuration,
|
|
999
|
+
segments: jarvisSegments,
|
|
1000
|
+
})
|
|
1001
|
+
editWorkspace = {
|
|
1002
|
+
chapter: previousProcessedChapter.chapter,
|
|
1003
|
+
outputPath: finalOutputPath,
|
|
1004
|
+
reason: 'combine-previous',
|
|
1005
|
+
editsDirectory: workspace.editsDirectory,
|
|
1006
|
+
transcriptTextPath: workspace.transcriptTextPath,
|
|
1007
|
+
transcriptJsonPath: workspace.transcriptJsonPath,
|
|
1008
|
+
originalVideoPath: workspace.originalVideoPath,
|
|
1009
|
+
instructionsPath: workspace.instructionsPath,
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
// Step 12: Track note commands from current chapter
|
|
1014
|
+
const jarvisNotes: JarvisNote[] = commandNotes.map((note) => ({
|
|
1015
|
+
chapter: previousProcessedChapter.chapter,
|
|
1016
|
+
outputPath: finalOutputPath,
|
|
1017
|
+
note: note.value,
|
|
1018
|
+
timestamp: note.window.start,
|
|
1019
|
+
}))
|
|
1020
|
+
|
|
1021
|
+
// Return combined chapter info (using previous chapter's info but with updated duration)
|
|
1022
|
+
const processedInfo: ProcessedChapterInfo = {
|
|
1023
|
+
chapter: previousProcessedChapter.chapter,
|
|
1024
|
+
outputPath: finalOutputPath,
|
|
1025
|
+
processedPath: finalOutputPath,
|
|
1026
|
+
processedDuration: combinedDuration,
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
return {
|
|
1030
|
+
status: 'processed',
|
|
1031
|
+
jarvisWarning,
|
|
1032
|
+
jarvisNotes: jarvisNotes.length > 0 ? jarvisNotes : undefined,
|
|
1033
|
+
logWritten: false,
|
|
1034
|
+
processedInfo,
|
|
1035
|
+
editWorkspace,
|
|
1036
|
+
}
|
|
1037
|
+
}
|