npm - eprec - Versions diffs - 1.10.2 → 1.11.0 - Mend

eprec 1.10.2 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/process-course/edits/cli.ts +59 -22
package/process-course/edits/combined-video-editor.ts +23 -0
package/process-course/edits/video-editor.ts +23 -0
package/src/cli.ts +40 -12
package/src/speech-detection.ts +31 -0
package/src/whispercpp-transcribe.ts +14 -2

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "eprec",
   "type": "module",
-  "version": "1.10.2",
+  "version": "1.11.0",
   "license": "MIT",
   "repository": {
     "type": "git",

package/process-course/edits/cli.ts CHANGED Viewed

@@ -7,14 +7,18 @@ import {
 	PromptCancelled,
 	createInquirerPrompter,
 	createPathPicker,
+	createStepProgressReporter,
 	isInteractive,
+	pauseActiveSpinner,
 	resolveOptionalString,
+	resumeActiveSpinner,
 	type PathPicker,
 	type Prompter,
 	withSpinner,
 } from '../../cli-ux'
 import { editVideo, buildEditedOutputPath } from './video-editor'
 import { combineVideos } from './combined-video-editor'
+import { setLogHooks } from '../logging'
 export type EditVideoCommandArgs = {
 	input: string
@@ -176,21 +180,33 @@ function resolvePaddingMs(value: unknown) {
 export function createEditVideoHandler(options: CliUxOptions): CommandHandler {
 	return async (argv) => {
 		const args = await resolveEditVideoArgs(argv, options)
+		const progress = options.interactive
+			? createStepProgressReporter({ action: 'Editing video' })
+			: undefined
 		await withSpinner(
 			'Editing video',
 			async () => {
-				const result = await editVideo({
-					inputPath: String(args.input),
-					transcriptJsonPath: String(args.transcript),
-					editedTextPath: String(args.edited),
-					outputPath: String(args.output),
-					paddingMs: args['padding-ms'],
+				setLogHooks({
+					beforeLog: pauseActiveSpinner,
+					afterLog: resumeActiveSpinner,
 				})
-				if (!result.success) {
-					throw new Error(result.error ?? 'Edit failed.')
+				try {
+					const result = await editVideo({
+						inputPath: String(args.input),
+						transcriptJsonPath: String(args.transcript),
+						editedTextPath: String(args.edited),
+						outputPath: String(args.output),
+						paddingMs: args['padding-ms'],
+						progress,
+					})
+					if (!result.success) {
+						throw new Error(result.error ?? 'Edit failed.')
+					}
+				} finally {
+					setLogHooks({})
 				}
 			},
-			{ successText: 'Edit complete' },
+			{ successText: 'Edit complete', enabled: options.interactive },
 		)
 		console.log(`Edited video written to ${args.output}`)
 	}
@@ -201,26 +217,47 @@ export function createCombineVideosHandler(
 ): CommandHandler {
 	return async (argv) => {
 		const args = await resolveCombineVideosArgs(argv, options)
+		const progress = options.interactive
+			? createStepProgressReporter({ action: 'Combining videos' })
+			: undefined
+		const editProgressFactory = options.interactive
+			? (detail: string) =>
+					createStepProgressReporter({
+						action: 'Combining videos',
+						detail,
+						maxLabelLength: 28,
+					})
+			: undefined
 		let outputPath = ''
 		await withSpinner(
 			'Combining videos',
 			async () => {
-				const result = await combineVideos({
-					video1Path: String(args.video1),
-					video1TranscriptJsonPath: args.transcript1,
-					video1EditedTextPath: args.edited1,
-					video2Path: String(args.video2),
-					video2TranscriptJsonPath: args.transcript2,
-					video2EditedTextPath: args.edited2,
-					outputPath: String(args.output),
-					overlapPaddingMs: args['padding-ms'],
+				setLogHooks({
+					beforeLog: pauseActiveSpinner,
+					afterLog: resumeActiveSpinner,
 				})
-				if (!result.success) {
-					throw new Error(result.error ?? 'Combine failed.')
+				try {
+					const result = await combineVideos({
+						video1Path: String(args.video1),
+						video1TranscriptJsonPath: args.transcript1,
+						video1EditedTextPath: args.edited1,
+						video2Path: String(args.video2),
+						video2TranscriptJsonPath: args.transcript2,
+						video2EditedTextPath: args.edited2,
+						outputPath: String(args.output),
+						overlapPaddingMs: args['padding-ms'],
+						progress,
+						editProgressFactory,
+					})
+					if (!result.success) {
+						throw new Error(result.error ?? 'Combine failed.')
+					}
+					outputPath = result.outputPath
+				} finally {
+					setLogHooks({})
 				}
-				outputPath = result.outputPath
 			},
-			{ successText: 'Combine complete' },
+			{ successText: 'Combine complete', enabled: options.interactive },
 		)
 		console.log(`Combined video written to ${outputPath}`)
 	}

package/process-course/edits/combined-video-editor.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
 	findSpeechStartWithRmsFallback,
 } from '../utils/audio-analysis'
 import { allocateJoinPadding } from '../utils/video-editing'
+import type { StepProgressReporter } from '../../progress-reporter'
 export interface CombineVideosOptions {
 	video1Path: string
@@ -26,6 +27,8 @@ export interface CombineVideosOptions {
 	video2Duration?: number
 	outputPath: string
 	overlapPaddingMs?: number
+	progress?: StepProgressReporter
+	editProgressFactory?: (detail: string) => StepProgressReporter | undefined
 }
 export interface CombineVideosResult {
@@ -39,12 +42,17 @@ export interface CombineVideosResult {
 export async function combineVideos(
 	options: CombineVideosOptions,
 ): Promise<CombineVideosResult> {
+	const progress = options.progress
+	const totalSteps = 5
+	progress?.start({ stepCount: totalSteps, label: 'Preparing edits' })
 	const tempDir = await mkdtemp(path.join(os.tmpdir(), 'video-combine-'))
 	try {
 		const { video1Path, video2Path } = await applyOptionalEdits(
 			options,
 			tempDir,
 		)
+		progress?.step('Measuring durations')
 		const editsApplied =
 			options.video1EditedTextPath || options.video2EditedTextPath
 		const video1Duration = editsApplied
@@ -54,6 +62,8 @@ export async function combineVideos(
 			? await getMediaDurationSeconds(video2Path)
 			: (options.video2Duration ?? (await getMediaDurationSeconds(video2Path)))
+		progress?.step('Detecting speech')
+		progress?.setLabel('Checking first video')
 		const video1HasSpeech = await checkSegmentHasSpeech(
 			video1Path,
 			video1Duration,
@@ -67,6 +77,7 @@ export async function combineVideos(
 			}
 		}
+		progress?.setLabel('Finding first video speech end')
 		const paddingSeconds =
 			(options.overlapPaddingMs ?? EDIT_CONFIG.speechBoundaryPaddingMs) / 1000
@@ -74,6 +85,7 @@ export async function combineVideos(
 			inputPath: video1Path,
 			duration: video1Duration,
 		})
+		progress?.setLabel('Finding second video speech bounds')
 		const { speechStart: video2SpeechStart, speechEnd: video2SpeechEnd } =
 			await findVideo2SpeechBounds({
 				inputPath: video2Path,
@@ -103,8 +115,10 @@ export async function combineVideos(
 			video2Duration,
 		)
+		progress?.step('Trimming segments')
 		const segment1Path = path.join(tempDir, 'segment-1.mp4')
 		const segment2Path = path.join(tempDir, 'segment-2.mp4')
+		progress?.setLabel('Extracting segment 1/2')
 		await extractChapterSegmentAccurate({
 			inputPath: video1Path,
 			outputPath: segment1Path,
@@ -119,6 +133,7 @@ export async function combineVideos(
 				video2TrimStart,
 			}
 		}
+		progress?.setLabel('Extracting segment 2/2')
 		await extractChapterSegmentAccurate({
 			inputPath: video2Path,
 			outputPath: segment2Path,
@@ -126,6 +141,7 @@ export async function combineVideos(
 			end: video2TrimEnd,
 		})
+		progress?.setLabel('Validating trimmed speech')
 		const segment2HasSpeech = await checkSegmentHasSpeech(
 			segment2Path,
 			video2TrimEnd - video2TrimStart,
@@ -139,6 +155,7 @@ export async function combineVideos(
 			}
 		}
+		progress?.step('Combining output')
 		const resolvedOutputPath = await resolveOutputPath(
 			options.outputPath,
 			video1Path,
@@ -151,6 +168,7 @@ export async function combineVideos(
 			outputPath: resolvedOutputPath,
 		})
 		await finalizeOutput(resolvedOutputPath, options.outputPath)
+		progress?.finish('Complete')
 		return {
 			success: true,
@@ -176,18 +194,21 @@ async function applyOptionalEdits(
 ): Promise<{ video1Path: string; video2Path: string }> {
 	let video1Path = options.video1Path
 	let video2Path = options.video2Path
+	const editProgressFactory = options.editProgressFactory
 	if (options.video1EditedTextPath) {
 		if (!options.video1TranscriptJsonPath) {
 			throw new Error('Missing transcript JSON for first video edits.')
 		}
 		const editedPath = path.join(tempDir, 'video1-edited.mp4')
+		const progress = editProgressFactory?.('Edit first video')
 		const result = await editVideo({
 			inputPath: options.video1Path,
 			transcriptJsonPath: options.video1TranscriptJsonPath,
 			editedTextPath: options.video1EditedTextPath,
 			outputPath: editedPath,
 			paddingMs: options.overlapPaddingMs,
+			progress,
 		})
 		if (!result.success) {
 			throw new Error(result.error ?? 'Failed to edit first video.')
@@ -200,12 +221,14 @@ async function applyOptionalEdits(
 			throw new Error('Missing transcript JSON for second video edits.')
 		}
 		const editedPath = path.join(tempDir, 'video2-edited.mp4')
+		const progress = editProgressFactory?.('Edit second video')
 		const result = await editVideo({
 			inputPath: options.video2Path,
 			transcriptJsonPath: options.video2TranscriptJsonPath,
 			editedTextPath: options.video2EditedTextPath,
 			outputPath: editedPath,
 			paddingMs: options.overlapPaddingMs,
+			progress,
 		})
 		if (!result.success) {
 			throw new Error(result.error ?? 'Failed to edit second video.')

package/process-course/edits/video-editor.ts CHANGED Viewed

@@ -11,6 +11,7 @@ import {
 } from './timestamp-refinement'
 import type { TimeRange } from '../types'
 import type { TranscriptJson, TranscriptWordWithIndex } from './types'
+import type { StepProgressReporter } from '../../progress-reporter'
 export interface EditVideoOptions {
 	inputPath: string
@@ -18,6 +19,7 @@ export interface EditVideoOptions {
 	editedTextPath: string
 	outputPath: string
 	paddingMs?: number
+	progress?: StepProgressReporter
 }
 export interface EditVideoResult {
@@ -37,8 +39,13 @@ export async function editVideo(
 	options: EditVideoOptions,
 ): Promise<EditVideoResult> {
 	try {
+		const progress = options.progress
+		const totalSteps = 5
+		progress?.start({ stepCount: totalSteps, label: 'Loading transcript' })
 		const transcript = await readTranscriptJson(options.transcriptJsonPath)
 		const editedText = await Bun.file(options.editedTextPath).text()
+		progress?.step('Validating edits')
 		const validation = validateEditedTranscript({
 			originalWords: transcript.words,
 			editedText,
@@ -51,6 +58,7 @@ export async function editVideo(
 				removedRanges: [],
 			}
 		}
+		progress?.step('Diffing transcript')
 		const diffResult = diffTranscripts({
 			originalWords: transcript.words,
 			editedText,
@@ -64,9 +72,12 @@ export async function editVideo(
 			}
 		}
+		progress?.step('Planning edits')
 		const removedWords = diffResult.removedWords
 		if (removedWords.length === 0) {
+			progress?.step('Rendering output')
 			await ensureOutputCopy(options.inputPath, options.outputPath)
+			progress?.finish('No edits')
 			return {
 				success: true,
 				outputPath: options.outputPath,
@@ -77,7 +88,9 @@ export async function editVideo(
 		const removalRanges = wordsToTimeRanges(removedWords)
 		if (removalRanges.length === 0) {
+			progress?.step('Rendering output')
 			await ensureOutputCopy(options.inputPath, options.outputPath)
+			progress?.finish('No ranges')
 			return {
 				success: true,
 				outputPath: options.outputPath,
@@ -86,6 +99,7 @@ export async function editVideo(
 			}
 		}
+		progress?.setLabel('Refining ranges')
 		const refinedRanges = await refineAllRemovalRanges({
 			inputPath: options.inputPath,
 			duration: transcript.source_duration,
@@ -111,6 +125,7 @@ export async function editVideo(
 			}
 		}
+		progress?.step('Rendering output')
 		await mkdir(path.dirname(options.outputPath), { recursive: true })
 		const isFullRange =
@@ -120,6 +135,7 @@ export async function editVideo(
 			keepRanges[0].end >= transcript.source_duration - 0.001
 		if (isFullRange) {
 			await ensureOutputCopy(options.inputPath, options.outputPath)
+			progress?.finish('Complete')
 			return {
 				success: true,
 				outputPath: options.outputPath,
@@ -129,12 +145,14 @@ export async function editVideo(
 		}
 		if (keepRanges.length === 1 && keepRanges[0]) {
+			progress?.setLabel('Extracting segment')
 			await extractChapterSegmentAccurate({
 				inputPath: options.inputPath,
 				outputPath: options.outputPath,
 				start: keepRanges[0].start,
 				end: keepRanges[0].end,
 			})
+			progress?.finish('Complete')
 			return {
 				success: true,
 				outputPath: options.outputPath,
@@ -147,6 +165,9 @@ export async function editVideo(
 		try {
 			const segmentPaths: string[] = []
 			for (const [index, range] of keepRanges.entries()) {
+				progress?.setLabel(
+					`Extracting segment ${index + 1}/${keepRanges.length}`,
+				)
 				const segmentPath = path.join(tempDir, `segment-${index + 1}.mp4`)
 				await extractChapterSegmentAccurate({
 					inputPath: options.inputPath,
@@ -156,10 +177,12 @@ export async function editVideo(
 				})
 				segmentPaths.push(segmentPath)
 			}
+			progress?.setLabel('Concatenating segments')
 			await concatSegments({
 				segmentPaths,
 				outputPath: options.outputPath,
 			})
+			progress?.finish('Complete')
 			return {
 				success: true,
 				outputPath: options.outputPath,

package/src/cli.ts CHANGED Viewed

@@ -27,6 +27,7 @@ import {
 	PromptCancelled,
 	createInquirerPrompter,
 	createPathPicker,
+	createStepProgressReporter,
 	isInteractive,
 	pauseActiveSpinner,
 	resumeActiveSpinner,
@@ -156,18 +157,33 @@ async function main(rawArgs = hideBin(process.argv)) {
 					}),
 			async (argv) => {
 				const transcribeArgs = await resolveTranscribeArgs(argv, context)
+				const progress = context.interactive
+					? createStepProgressReporter({ action: 'Transcribing audio' })
+					: undefined
 				let resultText = ''
 				await withSpinner(
 					'Transcribing audio',
 					async () => {
-						const result = await transcribeAudio(transcribeArgs.inputPath, {
-							modelPath: transcribeArgs.modelPath,
-							language: transcribeArgs.language,
-							threads: transcribeArgs.threads,
-							binaryPath: transcribeArgs.binaryPath,
-							outputBasePath: transcribeArgs.outputBasePath,
+						setLogHooks({
+							beforeLog: pauseActiveSpinner,
+							afterLog: resumeActiveSpinner,
 						})
-						resultText = result.text
+						try {
+							const result = await transcribeAudio(
+								transcribeArgs.inputPath,
+								{
+									modelPath: transcribeArgs.modelPath,
+									language: transcribeArgs.language,
+									threads: transcribeArgs.threads,
+									binaryPath: transcribeArgs.binaryPath,
+									outputBasePath: transcribeArgs.outputBasePath,
+									progress,
+								},
+							)
+							resultText = result.text
+						} finally {
+							setLogHooks({})
+						}
 					},
 					{
 						successText: 'Transcription complete',
@@ -203,16 +219,28 @@ async function main(rawArgs = hideBin(process.argv)) {
 					argv,
 					context,
 				)
+				const progress = context.interactive
+					? createStepProgressReporter({ action: 'Detecting speech' })
+					: undefined
 				let segments: unknown = []
 				await withSpinner(
 					'Detecting speech',
 					async () => {
-						await ensureFfmpegAvailable()
-						segments = await detectSpeechSegmentsForFile({
-							inputPath,
-							start,
-							end,
+						setLogHooks({
+							beforeLog: pauseActiveSpinner,
+							afterLog: resumeActiveSpinner,
 						})
+						try {
+							await ensureFfmpegAvailable()
+							segments = await detectSpeechSegmentsForFile({
+								inputPath,
+								start,
+								end,
+								progress,
+							})
+						} finally {
+							setLogHooks({})
+						}
 					},
 					{
 						successText: 'Speech detection complete',

package/src/speech-detection.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import { CONFIG } from '../process-course/config'
 import { formatSeconds, getMediaDurationSeconds } from './utils'
 import { speechFallback } from '../process-course/utils/audio-analysis'
 import type { SpeechBounds } from '../process-course/types'
+import type { StepProgressReporter } from '../progress-reporter'
 export type VadConfig = {
 	vadWindowSamples: number
@@ -27,6 +28,7 @@ export async function detectSpeechSegmentsWithVad(
 	samples: Float32Array,
 	sampleRate: number,
 	config: VadConfig,
+	options?: { onProgress?: () => void; updateStride?: number },
 ): Promise<VadSegment[]> {
 	const vadSession = await getVadSession(config)
 	const probabilities = await getVadProbabilities(
@@ -34,6 +36,7 @@ export async function detectSpeechSegmentsWithVad(
 		sampleRate,
 		config,
 		vadSession,
+		options,
 	)
 	return probabilitiesToSegments(
 		samples.length,
@@ -47,7 +50,10 @@ export async function detectSpeechSegmentsForFile(options: {
 	inputPath: string
 	start?: number
 	end?: number
+	progress?: StepProgressReporter
 }): Promise<SpeechSegment[]> {
+	const progress = options.progress
+	progress?.start({ stepCount: 1, label: 'Loading audio' })
 	const start = options.start ?? 0
 	if (!Number.isFinite(start) || start < 0) {
 		throw new Error('Start time must be a non-negative number.')
@@ -66,13 +72,31 @@ export async function detectSpeechSegmentsForFile(options: {
 		sampleRate: CONFIG.vadSampleRate,
 	})
 	if (samples.length === 0) {
+		progress?.finish('No audio')
 		return []
 	}
+	const windowSamples = CONFIG.vadWindowSamples
+	const totalWindows = Math.ceil(samples.length / windowSamples)
+	const updateStride = Math.max(1, Math.floor(totalWindows / 50))
+	const updateCount = Math.max(1, Math.ceil(totalWindows / updateStride))
+	progress?.start({ stepCount: updateCount, label: 'Running VAD' })
+	let progressUpdates = 0
 	const segments = await detectSpeechSegmentsWithVad(
 		samples,
 		CONFIG.vadSampleRate,
 		CONFIG,
+		{
+			onProgress: () => {
+				progressUpdates += 1
+				if (progressUpdates <= updateCount) {
+					progress?.step('Running VAD')
+				}
+			},
+			updateStride,
+		},
 	)
+	progress?.setLabel('Building segments')
+	progress?.finish('Complete')
 	return segments.map((segment) => ({
 		start: segment.start + start,
 		end: segment.end + start,
@@ -116,6 +140,7 @@ async function getVadProbabilities(
 	sampleRate: number,
 	config: VadConfig,
 	session: ort.InferenceSession,
+	options?: { onProgress?: () => void; updateStride?: number },
 ) {
 	const windowSamples = config.vadWindowSamples
 	const srTensor = new ort.Tensor(
@@ -126,6 +151,8 @@ async function getVadProbabilities(
 	const probabilities: number[] = []
 	let stateH = new Float32Array(2 * 1 * 64)
 	let stateC = new Float32Array(2 * 1 * 64)
+	const updateStride = Math.max(1, Math.floor(options?.updateStride ?? 1))
+	let updateIndex = 0
 	for (let offset = 0; offset < samples.length; offset += windowSamples) {
 		const chunk = samples.subarray(offset, offset + windowSamples)
@@ -154,6 +181,10 @@ async function getVadProbabilities(
 		probabilities.push((probTensor.data as Float32Array)[0] ?? 0)
 		stateH = new Float32Array(nextH.data as Float32Array)
 		stateC = new Float32Array(nextC.data as Float32Array)
+		if (updateIndex % updateStride === 0) {
+			options?.onProgress?.()
+		}
+		updateIndex += 1
 	}
 	return probabilities

package/src/whispercpp-transcribe.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import path from 'node:path'
 import { mkdir } from 'node:fs/promises'
 import { runCommand } from './utils'
+import type { StepProgressReporter } from '../progress-reporter'
 const DEFAULT_MODEL_FILENAME = 'ggml-small.en.bin'
 const DEFAULT_MODEL_URL =
@@ -14,6 +15,7 @@ type TranscribeOptions = {
 	threads?: number
 	binaryPath?: string
 	outputBasePath?: string
+	progress?: StepProgressReporter
 }
 export type TranscriptSegment = {
@@ -36,6 +38,7 @@ export async function transcribeAudio(
 	audioPath: string,
 	options: TranscribeOptions = {},
 ): Promise<TranscriptionResult> {
+	const progress = options.progress
 	const resolvedAudioPath = path.resolve(audioPath)
 	const resolvedModelPath = path.resolve(
 		options.modelPath ?? getDefaultWhisperModelPath(),
@@ -49,7 +52,9 @@ export async function transcribeAudio(
 			`${path.parse(resolvedAudioPath).name}-transcript`,
 		)
-	await ensureModelFile(resolvedModelPath)
+	const totalSteps = 3
+	progress?.start({ stepCount: totalSteps, label: 'Checking model' })
+	await ensureModelFile(resolvedModelPath, progress)
 	const args = [
 		binaryPath,
@@ -69,17 +74,23 @@ export async function transcribeAudio(
 		args.push('-t', String(options.threads))
 	}
+	progress?.step('Transcribing audio')
 	const result = await runCommand(args)
+	progress?.step('Reading output')
 	const transcriptPath = `${outputBasePath}.txt`
 	const transcript = await readTranscriptText(transcriptPath, result.stdout)
 	const { segments, source } = await readTranscriptSegments(
 		`${outputBasePath}.json`,
 	)
 	const normalized = normalizeTranscriptText(transcript)
+	progress?.finish('Complete')
 	return { text: normalized, segments, segmentsSource: source }
 }
-async function ensureModelFile(modelPath: string) {
+async function ensureModelFile(
+	modelPath: string,
+	progress?: StepProgressReporter,
+) {
 	const file = Bun.file(modelPath)
 	if (await file.exists()) {
 		return
@@ -90,6 +101,7 @@ async function ensureModelFile(modelPath: string) {
 		throw new Error(`Whisper model not found at ${modelPath}.`)
 	}
+	progress?.setLabel('Downloading model')
 	await mkdir(path.dirname(modelPath), { recursive: true })
 	const response = await fetch(DEFAULT_MODEL_URL)
 	if (!response.ok) {