npm - eprec - Versions diffs - 1.10.2 → 1.12.0 - Mend

eprec 1.10.2 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/app/assets/styles.css +43 -0
package/app/client/editing-workspace.tsx +260 -78
package/package.json +1 -1
package/process-course/edits/cli.ts +59 -22
package/process-course/edits/combined-video-editor.ts +23 -0
package/process-course/edits/video-editor.ts +23 -0
package/server/processing-queue.ts +441 -0
package/src/app-server.ts +4 -0
package/src/cli.ts +37 -12
package/src/speech-detection.ts +31 -0
package/src/whispercpp-transcribe.ts +14 -2

package/src/cli.ts CHANGED Viewed

@@ -27,6 +27,7 @@ import {
 	PromptCancelled,
 	createInquirerPrompter,
 	createPathPicker,
+	createStepProgressReporter,
 	isInteractive,
 	pauseActiveSpinner,
 	resumeActiveSpinner,
@@ -156,18 +157,30 @@ async function main(rawArgs = hideBin(process.argv)) {
 					}),
 			async (argv) => {
 				const transcribeArgs = await resolveTranscribeArgs(argv, context)
+				const progress = context.interactive
+					? createStepProgressReporter({ action: 'Transcribing audio' })
+					: undefined
 				let resultText = ''
 				await withSpinner(
 					'Transcribing audio',
 					async () => {
-						const result = await transcribeAudio(transcribeArgs.inputPath, {
-							modelPath: transcribeArgs.modelPath,
-							language: transcribeArgs.language,
-							threads: transcribeArgs.threads,
-							binaryPath: transcribeArgs.binaryPath,
-							outputBasePath: transcribeArgs.outputBasePath,
+						setLogHooks({
+							beforeLog: pauseActiveSpinner,
+							afterLog: resumeActiveSpinner,
 						})
-						resultText = result.text
+						try {
+							const result = await transcribeAudio(transcribeArgs.inputPath, {
+								modelPath: transcribeArgs.modelPath,
+								language: transcribeArgs.language,
+								threads: transcribeArgs.threads,
+								binaryPath: transcribeArgs.binaryPath,
+								outputBasePath: transcribeArgs.outputBasePath,
+								progress,
+							})
+							resultText = result.text
+						} finally {
+							setLogHooks({})
+						}
 					},
 					{
 						successText: 'Transcription complete',
@@ -203,16 +216,28 @@ async function main(rawArgs = hideBin(process.argv)) {
 					argv,
 					context,
 				)
+				const progress = context.interactive
+					? createStepProgressReporter({ action: 'Detecting speech' })
+					: undefined
 				let segments: unknown = []
 				await withSpinner(
 					'Detecting speech',
 					async () => {
-						await ensureFfmpegAvailable()
-						segments = await detectSpeechSegmentsForFile({
-							inputPath,
-							start,
-							end,
+						setLogHooks({
+							beforeLog: pauseActiveSpinner,
+							afterLog: resumeActiveSpinner,
 						})
+						try {
+							await ensureFfmpegAvailable()
+							segments = await detectSpeechSegmentsForFile({
+								inputPath,
+								start,
+								end,
+								progress,
+							})
+						} finally {
+							setLogHooks({})
+						}
 					},
 					{
 						successText: 'Speech detection complete',

package/src/speech-detection.ts CHANGED Viewed

@@ -6,6 +6,7 @@ import { CONFIG } from '../process-course/config'
 import { formatSeconds, getMediaDurationSeconds } from './utils'
 import { speechFallback } from '../process-course/utils/audio-analysis'
 import type { SpeechBounds } from '../process-course/types'
+import type { StepProgressReporter } from '../progress-reporter'
 export type VadConfig = {
 	vadWindowSamples: number
@@ -27,6 +28,7 @@ export async function detectSpeechSegmentsWithVad(
 	samples: Float32Array,
 	sampleRate: number,
 	config: VadConfig,
+	options?: { onProgress?: () => void; updateStride?: number },
 ): Promise<VadSegment[]> {
 	const vadSession = await getVadSession(config)
 	const probabilities = await getVadProbabilities(
@@ -34,6 +36,7 @@ export async function detectSpeechSegmentsWithVad(
 		sampleRate,
 		config,
 		vadSession,
+		options,
 	)
 	return probabilitiesToSegments(
 		samples.length,
@@ -47,7 +50,10 @@ export async function detectSpeechSegmentsForFile(options: {
 	inputPath: string
 	start?: number
 	end?: number
+	progress?: StepProgressReporter
 }): Promise<SpeechSegment[]> {
+	const progress = options.progress
+	progress?.start({ stepCount: 1, label: 'Loading audio' })
 	const start = options.start ?? 0
 	if (!Number.isFinite(start) || start < 0) {
 		throw new Error('Start time must be a non-negative number.')
@@ -66,13 +72,31 @@ export async function detectSpeechSegmentsForFile(options: {
 		sampleRate: CONFIG.vadSampleRate,
 	})
 	if (samples.length === 0) {
+		progress?.finish('No audio')
 		return []
 	}
+	const windowSamples = CONFIG.vadWindowSamples
+	const totalWindows = Math.ceil(samples.length / windowSamples)
+	const updateStride = Math.max(1, Math.floor(totalWindows / 50))
+	const updateCount = Math.max(1, Math.ceil(totalWindows / updateStride))
+	progress?.start({ stepCount: updateCount, label: 'Running VAD' })
+	let progressUpdates = 0
 	const segments = await detectSpeechSegmentsWithVad(
 		samples,
 		CONFIG.vadSampleRate,
 		CONFIG,
+		{
+			onProgress: () => {
+				progressUpdates += 1
+				if (progressUpdates <= updateCount) {
+					progress?.step('Running VAD')
+				}
+			},
+			updateStride,
+		},
 	)
+	progress?.setLabel('Building segments')
+	progress?.finish('Complete')
 	return segments.map((segment) => ({
 		start: segment.start + start,
 		end: segment.end + start,
@@ -116,6 +140,7 @@ async function getVadProbabilities(
 	sampleRate: number,
 	config: VadConfig,
 	session: ort.InferenceSession,
+	options?: { onProgress?: () => void; updateStride?: number },
 ) {
 	const windowSamples = config.vadWindowSamples
 	const srTensor = new ort.Tensor(
@@ -126,6 +151,8 @@ async function getVadProbabilities(
 	const probabilities: number[] = []
 	let stateH = new Float32Array(2 * 1 * 64)
 	let stateC = new Float32Array(2 * 1 * 64)
+	const updateStride = Math.max(1, Math.floor(options?.updateStride ?? 1))
+	let updateIndex = 0
 	for (let offset = 0; offset < samples.length; offset += windowSamples) {
 		const chunk = samples.subarray(offset, offset + windowSamples)
@@ -154,6 +181,10 @@ async function getVadProbabilities(
 		probabilities.push((probTensor.data as Float32Array)[0] ?? 0)
 		stateH = new Float32Array(nextH.data as Float32Array)
 		stateC = new Float32Array(nextC.data as Float32Array)
+		if (updateIndex % updateStride === 0) {
+			options?.onProgress?.()
+		}
+		updateIndex += 1
 	}
 	return probabilities

package/src/whispercpp-transcribe.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import path from 'node:path'
 import { mkdir } from 'node:fs/promises'
 import { runCommand } from './utils'
+import type { StepProgressReporter } from '../progress-reporter'
 const DEFAULT_MODEL_FILENAME = 'ggml-small.en.bin'
 const DEFAULT_MODEL_URL =
@@ -14,6 +15,7 @@ type TranscribeOptions = {
 	threads?: number
 	binaryPath?: string
 	outputBasePath?: string
+	progress?: StepProgressReporter
 }
 export type TranscriptSegment = {
@@ -36,6 +38,7 @@ export async function transcribeAudio(
 	audioPath: string,
 	options: TranscribeOptions = {},
 ): Promise<TranscriptionResult> {
+	const progress = options.progress
 	const resolvedAudioPath = path.resolve(audioPath)
 	const resolvedModelPath = path.resolve(
 		options.modelPath ?? getDefaultWhisperModelPath(),
@@ -49,7 +52,9 @@ export async function transcribeAudio(
 			`${path.parse(resolvedAudioPath).name}-transcript`,
 		)
-	await ensureModelFile(resolvedModelPath)
+	const totalSteps = 3
+	progress?.start({ stepCount: totalSteps, label: 'Checking model' })
+	await ensureModelFile(resolvedModelPath, progress)
 	const args = [
 		binaryPath,
@@ -69,17 +74,23 @@ export async function transcribeAudio(
 		args.push('-t', String(options.threads))
 	}
+	progress?.step('Transcribing audio')
 	const result = await runCommand(args)
+	progress?.step('Reading output')
 	const transcriptPath = `${outputBasePath}.txt`
 	const transcript = await readTranscriptText(transcriptPath, result.stdout)
 	const { segments, source } = await readTranscriptSegments(
 		`${outputBasePath}.json`,
 	)
 	const normalized = normalizeTranscriptText(transcript)
+	progress?.finish('Complete')
 	return { text: normalized, segments, segmentsSource: source }
 }
-async function ensureModelFile(modelPath: string) {
+async function ensureModelFile(
+	modelPath: string,
+	progress?: StepProgressReporter,
+) {
 	const file = Bun.file(modelPath)
 	if (await file.exists()) {
 		return
@@ -90,6 +101,7 @@ async function ensureModelFile(modelPath: string) {
 		throw new Error(`Whisper model not found at ${modelPath}.`)
 	}
+	progress?.setLabel('Downloading model')
 	await mkdir(path.dirname(modelPath), { recursive: true })
 	const response = await fetch(DEFAULT_MODEL_URL)
 	if (!response.ok) {