npm - eprec - Versions diffs - 0.0.1 → 1.1.0 - Mend

eprec 0.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (69) hide show

package/LICENSE +21 -0
package/README.md +122 -29
package/app/assets/styles.css +129 -0
package/app/client/app.tsx +37 -0
package/app/client/counter.tsx +22 -0
package/app/client/entry.tsx +8 -0
package/app/components/layout.tsx +37 -0
package/app/config/env.ts +31 -0
package/app/config/import-map.ts +9 -0
package/app/config/init-env.ts +3 -0
package/app/config/routes.ts +5 -0
package/app/helpers/render.ts +6 -0
package/app/router.tsx +102 -0
package/app/routes/index.tsx +50 -0
package/app-server.ts +60 -0
package/cli.ts +173 -0
package/package.json +46 -7
package/process-course/chapter-processor.ts +1037 -0
package/process-course/cli.ts +236 -0
package/process-course/config.ts +50 -0
package/process-course/edits/cli.ts +167 -0
package/process-course/edits/combined-video-editor.ts +316 -0
package/process-course/edits/edit-workspace.ts +90 -0
package/process-course/edits/index.ts +20 -0
package/process-course/edits/regenerate-transcript.ts +84 -0
package/process-course/edits/remove-ranges.test.ts +36 -0
package/process-course/edits/remove-ranges.ts +287 -0
package/process-course/edits/timestamp-refinement.test.ts +25 -0
package/process-course/edits/timestamp-refinement.ts +172 -0
package/process-course/edits/transcript-diff.test.ts +105 -0
package/process-course/edits/transcript-diff.ts +214 -0
package/process-course/edits/transcript-output.test.ts +50 -0
package/process-course/edits/transcript-output.ts +36 -0
package/process-course/edits/types.ts +26 -0
package/process-course/edits/video-editor.ts +246 -0
package/process-course/errors.test.ts +63 -0
package/process-course/errors.ts +82 -0
package/process-course/ffmpeg.ts +449 -0
package/process-course/jarvis-commands/handlers.ts +71 -0
package/process-course/jarvis-commands/index.ts +14 -0
package/process-course/jarvis-commands/parser.test.ts +348 -0
package/process-course/jarvis-commands/parser.ts +257 -0
package/process-course/jarvis-commands/types.ts +46 -0
package/process-course/jarvis-commands/windows.ts +254 -0
package/process-course/logging.ts +24 -0
package/process-course/paths.test.ts +59 -0
package/process-course/paths.ts +53 -0
package/process-course/summary.test.ts +209 -0
package/process-course/summary.ts +210 -0
package/process-course/types.ts +85 -0
package/process-course/utils/audio-analysis.test.ts +348 -0
package/process-course/utils/audio-analysis.ts +463 -0
package/process-course/utils/chapter-selection.test.ts +307 -0
package/process-course/utils/chapter-selection.ts +136 -0
package/process-course/utils/file-utils.test.ts +83 -0
package/process-course/utils/file-utils.ts +57 -0
package/process-course/utils/filename.test.ts +27 -0
package/process-course/utils/filename.ts +12 -0
package/process-course/utils/time-ranges.test.ts +221 -0
package/process-course/utils/time-ranges.ts +86 -0
package/process-course/utils/transcript.test.ts +257 -0
package/process-course/utils/transcript.ts +86 -0
package/process-course/utils/video-editing.ts +44 -0
package/process-course-video.ts +389 -0
package/public/robots.txt +2 -0
package/server/bundling.ts +210 -0
package/speech-detection.ts +355 -0
package/utils.ts +138 -0
package/whispercpp-transcribe.ts +343 -0

package/speech-detection.ts ADDED Viewed

@@ -0,0 +1,355 @@
+import path from 'node:path'
+import { mkdir } from 'node:fs/promises'
+import * as ort from 'onnxruntime-node'
+import { readAudioSamples } from './process-course/ffmpeg'
+import { CONFIG } from './process-course/config'
+import { formatSeconds, getMediaDurationSeconds } from './utils'
+import { speechFallback } from './process-course/utils/audio-analysis'
+import type { SpeechBounds } from './process-course/types'
+export type VadConfig = {
+	vadWindowSamples: number
+	vadSpeechThreshold: number
+	vadNegThreshold: number
+	vadMinSpeechDurationMs: number
+	vadMinSilenceDurationMs: number
+	vadSpeechPadMs: number
+	vadModelUrl: string
+}
+export type SpeechSegment = { start: number; end: number }
+type VadSegment = SpeechSegment
+let vadSessionPromise: Promise<ort.InferenceSession> | null = null
+export async function detectSpeechSegmentsWithVad(
+	samples: Float32Array,
+	sampleRate: number,
+	config: VadConfig,
+): Promise<VadSegment[]> {
+	const vadSession = await getVadSession(config)
+	const probabilities = await getVadProbabilities(
+		samples,
+		sampleRate,
+		config,
+		vadSession,
+	)
+	return probabilitiesToSegments(
+		samples.length,
+		probabilities,
+		sampleRate,
+		config,
+	)
+}
+export async function detectSpeechSegmentsForFile(options: {
+	inputPath: string
+	start?: number
+	end?: number
+}): Promise<SpeechSegment[]> {
+	const start = options.start ?? 0
+	if (!Number.isFinite(start) || start < 0) {
+		throw new Error('Start time must be a non-negative number.')
+	}
+	const durationSeconds = await getMediaDurationSeconds(options.inputPath)
+	const end = options.end ?? durationSeconds
+	if (!Number.isFinite(end) || end <= start) {
+		throw new Error('End time must be greater than start time.')
+	}
+	const duration = end - start
+	const samples = await readAudioSamples({
+		inputPath: options.inputPath,
+		start,
+		duration,
+		sampleRate: CONFIG.vadSampleRate,
+	})
+	if (samples.length === 0) {
+		return []
+	}
+	const segments = await detectSpeechSegmentsWithVad(
+		samples,
+		CONFIG.vadSampleRate,
+		CONFIG,
+	)
+	return segments.map((segment) => ({
+		start: segment.start + start,
+		end: segment.end + start,
+	}))
+}
+async function getVadSession(config: VadConfig) {
+	if (!vadSessionPromise) {
+		vadSessionPromise = (async () => {
+			const modelPath = await ensureVadModel(config)
+			return ort.InferenceSession.create(modelPath, {
+				executionProviders: ['cpu'],
+			})
+		})()
+	}
+	return vadSessionPromise
+}
+async function ensureVadModel(config: VadConfig) {
+	const cacheDir = path.join(process.cwd(), '.cache')
+	const modelPath = path.join(cacheDir, 'silero-vad.onnx')
+	const file = Bun.file(modelPath)
+	if (await file.exists()) {
+		return modelPath
+	}
+	await mkdir(cacheDir, { recursive: true })
+	const response = await fetch(config.vadModelUrl)
+	if (!response.ok) {
+		throw new Error(
+			`Failed to download VAD model (${response.status} ${response.statusText}).`,
+		)
+	}
+	const buffer = await response.arrayBuffer()
+	await Bun.write(modelPath, new Uint8Array(buffer))
+	return modelPath
+}
+async function getVadProbabilities(
+	samples: Float32Array,
+	sampleRate: number,
+	config: VadConfig,
+	session: ort.InferenceSession,
+) {
+	const windowSamples = config.vadWindowSamples
+	const srTensor = new ort.Tensor(
+		'int64',
+		new BigInt64Array([BigInt(sampleRate)]),
+		[],
+	)
+	const probabilities: number[] = []
+	let stateH = new Float32Array(2 * 1 * 64)
+	let stateC = new Float32Array(2 * 1 * 64)
+	for (let offset = 0; offset < samples.length; offset += windowSamples) {
+		const chunk = samples.subarray(offset, offset + windowSamples)
+		const paddedChunk = new Float32Array(windowSamples)
+		paddedChunk.set(chunk)
+		const inputTensor = new ort.Tensor('float32', paddedChunk, [
+			1,
+			windowSamples,
+		])
+		const hTensor = new ort.Tensor('float32', stateH, [2, 1, 64])
+		const cTensor = new ort.Tensor('float32', stateC, [2, 1, 64])
+		const outputs = await session.run({
+			input: inputTensor,
+			sr: srTensor,
+			h: hTensor,
+			c: cTensor,
+		})
+		const {
+			probTensor,
+			hTensor: nextH,
+			cTensor: nextC,
+		} = pickVadOutputs(outputs, session.outputNames)
+		probabilities.push((probTensor.data as Float32Array)[0] ?? 0)
+		stateH = new Float32Array(nextH.data as Float32Array)
+		stateC = new Float32Array(nextC.data as Float32Array)
+	}
+	return probabilities
+}
+function pickVadOutputs(
+	outputs: Record<string, ort.Tensor>,
+	outputNames: readonly string[],
+) {
+	let probTensor: ort.Tensor | null = null
+	let hTensor: ort.Tensor | null = null
+	let cTensor: ort.Tensor | null = null
+	for (const name of outputNames) {
+		const tensor = outputs[name]
+		if (!tensor) {
+			continue
+		}
+		if (name === 'output') {
+			probTensor = tensor
+		} else if (name === 'hn') {
+			hTensor = tensor
+		} else if (name === 'cn') {
+			cTensor = tensor
+		}
+	}
+	if (!probTensor || !hTensor || !cTensor) {
+		throw new Error(
+			'Unexpected VAD outputs; unable to read speech probabilities.',
+		)
+	}
+	return { probTensor, hTensor, cTensor }
+}
+function probabilitiesToSegments(
+	totalSamples: number,
+	probabilities: number[],
+	sampleRate: number,
+	config: VadConfig,
+): VadSegment[] {
+	const windowSamples = config.vadWindowSamples
+	const threshold = config.vadSpeechThreshold
+	const negThreshold = config.vadNegThreshold
+	const minSpeechSamples = (sampleRate * config.vadMinSpeechDurationMs) / 1000
+	const minSilenceSamples = (sampleRate * config.vadMinSilenceDurationMs) / 1000
+	const speechPadSamples = (sampleRate * config.vadSpeechPadMs) / 1000
+	let triggered = false
+	let tempEnd = 0
+	let currentSpeechStart = 0
+	const speeches: VadSegment[] = []
+	for (let index = 0; index < probabilities.length; index += 1) {
+		const prob = probabilities[index] ?? 0
+		const currentSample = index * windowSamples
+		if (prob >= threshold && tempEnd) {
+			tempEnd = 0
+		}
+		if (prob >= threshold && !triggered) {
+			triggered = true
+			currentSpeechStart = currentSample
+			continue
+		}
+		if (prob < negThreshold && triggered) {
+			if (!tempEnd) {
+				tempEnd = currentSample
+			}
+			if (currentSample - tempEnd < minSilenceSamples) {
+				continue
+			}
+			const speechEnd = tempEnd
+			if (speechEnd - currentSpeechStart >= minSpeechSamples) {
+				speeches.push({ start: currentSpeechStart, end: speechEnd })
+			}
+			triggered = false
+			tempEnd = 0
+			currentSpeechStart = 0
+		}
+	}
+	if (triggered) {
+		const speechEnd = totalSamples
+		if (speechEnd - currentSpeechStart >= minSpeechSamples) {
+			speeches.push({ start: currentSpeechStart, end: speechEnd })
+		}
+	}
+	if (speeches.length === 0) {
+		return []
+	}
+	for (let index = 0; index < speeches.length; index += 1) {
+		const speech = speeches[index]
+		if (!speech) {
+			continue
+		}
+		const nextSpeech = speeches[index + 1]
+		if (index === 0) {
+			speech.start = Math.max(0, speech.start - speechPadSamples)
+		}
+		if (nextSpeech) {
+			const silence = nextSpeech.start - speech.end
+			if (silence < speechPadSamples * 2) {
+				const adjustment = silence / 2
+				speech.end += adjustment
+				nextSpeech.start = Math.max(0, nextSpeech.start - adjustment)
+			} else {
+				speech.end = Math.min(totalSamples, speech.end + speechPadSamples)
+				nextSpeech.start = Math.max(0, nextSpeech.start - speechPadSamples)
+			}
+		} else {
+			speech.end = Math.min(totalSamples, speech.end + speechPadSamples)
+		}
+	}
+	return speeches.map((speech) => ({
+		start: speech.start / sampleRate,
+		end: speech.end / sampleRate,
+	}))
+}
+export async function detectSpeechBounds(
+	inputPath: string,
+	chapterStart: number,
+	chapterEnd: number,
+	duration: number,
+): Promise<SpeechBounds> {
+	const clipDuration = chapterEnd - chapterStart
+	if (clipDuration <= 0) {
+		return speechFallback(
+			duration,
+			'Invalid chapter boundaries; using full chapter.',
+		)
+	}
+	const samples = await readAudioSamples({
+		inputPath,
+		start: chapterStart,
+		duration: clipDuration,
+		sampleRate: CONFIG.vadSampleRate,
+	})
+	const fallbackNote = `Speech detection failed near ${formatSeconds(chapterStart)}; using full chapter.`
+	if (samples.length === 0) {
+		return speechFallback(duration, fallbackNote)
+	}
+	const vadSegments = await detectSpeechSegmentsWithVad(
+		samples,
+		CONFIG.vadSampleRate,
+		CONFIG,
+	)
+	if (vadSegments.length === 0) {
+		return speechFallback(duration, fallbackNote)
+	}
+	const firstSegment = vadSegments[0]
+	const lastSegment = vadSegments[vadSegments.length - 1]
+	if (!firstSegment || !lastSegment) {
+		return speechFallback(duration, fallbackNote)
+	}
+	const speechStart = firstSegment.start
+	const speechEnd = lastSegment.end
+	if (speechEnd <= speechStart + 0.1) {
+		return speechFallback(duration, fallbackNote)
+	}
+	return { start: speechStart, end: speechEnd }
+}
+export async function checkSegmentHasSpeech(
+	inputPath: string,
+	duration: number,
+): Promise<boolean> {
+	if (duration <= 0) {
+		return false
+	}
+	const samples = await readAudioSamples({
+		inputPath,
+		start: 0,
+		duration,
+		sampleRate: CONFIG.vadSampleRate,
+	})
+	if (samples.length === 0) {
+		return false
+	}
+	const vadSegments = await detectSpeechSegmentsWithVad(
+		samples,
+		CONFIG.vadSampleRate,
+		CONFIG,
+	)
+	return vadSegments.length > 0
+}

package/utils.ts ADDED Viewed

@@ -0,0 +1,138 @@
+type RunCommandOptions = {
+	allowFailure?: boolean
+	logCommand?: (command: string[]) => void
+}
+export function formatCommand(command: string[]) {
+	return command
+		.map((part) => (part.includes(' ') ? `"${part}"` : part))
+		.join(' ')
+}
+export async function runCommand(
+	command: string[],
+	options: RunCommandOptions = {},
+) {
+	options.logCommand?.(command)
+	const proc = Bun.spawn(command, {
+		stdout: 'pipe',
+		stderr: 'pipe',
+	})
+	const [stdout, stderr, exitCode] = await Promise.all([
+		new Response(proc.stdout).text(),
+		new Response(proc.stderr).text(),
+		proc.exited,
+	])
+	if (exitCode !== 0 && !options.allowFailure) {
+		throw new Error(
+			`Command failed (${exitCode}): ${formatCommand(command)}\n${stderr}`,
+		)
+	}
+	return { stdout, stderr, exitCode }
+}
+export async function runCommandBinary(
+	command: string[],
+	options: RunCommandOptions = {},
+) {
+	options.logCommand?.(command)
+	const proc = Bun.spawn(command, {
+		stdout: 'pipe',
+		stderr: 'pipe',
+	})
+	const [stdout, stderr, exitCode] = await Promise.all([
+		new Response(proc.stdout).arrayBuffer(),
+		new Response(proc.stderr).text(),
+		proc.exited,
+	])
+	if (exitCode !== 0 && !options.allowFailure) {
+		throw new Error(
+			`Command failed (${exitCode}): ${formatCommand(command)}\n${stderr}`,
+		)
+	}
+	return { stdout: new Uint8Array(stdout), stderr, exitCode }
+}
+export function formatSeconds(value: number) {
+	return `${value.toFixed(2)}s`
+}
+export function clamp(value: number, min: number, max: number) {
+	return Math.min(Math.max(value, min), max)
+}
+export function toKebabCase(value: string) {
+	return (
+		value
+			.trim()
+			.toLowerCase()
+			.replace(/['".,]/g, '')
+			.replace(/[^a-z0-9]+/g, '-')
+			.replace(/^-+|-+$/g, '')
+			.replace(/-+/g, '-') || 'untitled'
+	)
+}
+export function normalizeFilename(value: string) {
+	const numberWords: Record<string, number> = {
+		zero: 0,
+		one: 1,
+		two: 2,
+		three: 3,
+		four: 4,
+		five: 5,
+		six: 6,
+		seven: 7,
+		eight: 8,
+		nine: 9,
+		ten: 10,
+		eleven: 11,
+		twelve: 12,
+		thirteen: 13,
+		fourteen: 14,
+		fifteen: 15,
+		sixteen: 16,
+		seventeen: 17,
+		eighteen: 18,
+		nineteen: 19,
+		twenty: 20,
+	}
+	const numberWordPattern = new RegExp(
+		`\\b(${Object.keys(numberWords).join('|')})\\b`,
+		'g',
+	)
+	return value
+		.trim()
+		.toLowerCase()
+		.replace(/\b(point|dot)\b/g, '.')
+		.replace(/\s*\.\s*/g, '.')
+		.replace(numberWordPattern, (word) =>
+			String(numberWords[word] ?? word).padStart(2, '0'),
+		)
+}
+export async function getMediaDurationSeconds(
+	filePath: string,
+): Promise<number> {
+	const result = await runCommand([
+		'ffprobe',
+		'-v',
+		'error',
+		'-show_entries',
+		'format=duration',
+		'-of',
+		'default=noprint_wrappers=1:nokey=1',
+		filePath,
+	])
+	const duration = Number.parseFloat(result.stdout.trim())
+	if (!Number.isFinite(duration) || duration <= 0) {
+		throw new Error(`Invalid duration for ${filePath}: ${result.stdout}`)
+	}
+	return duration
+}