eprec 0.0.1 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +122 -29
  3. package/app/assets/styles.css +129 -0
  4. package/app/client/app.tsx +37 -0
  5. package/app/client/counter.tsx +22 -0
  6. package/app/client/entry.tsx +8 -0
  7. package/app/components/layout.tsx +37 -0
  8. package/app/config/env.ts +31 -0
  9. package/app/config/import-map.ts +9 -0
  10. package/app/config/init-env.ts +3 -0
  11. package/app/config/routes.ts +5 -0
  12. package/app/helpers/render.ts +6 -0
  13. package/app/router.tsx +102 -0
  14. package/app/routes/index.tsx +50 -0
  15. package/app-server.ts +60 -0
  16. package/cli.ts +173 -0
  17. package/package.json +46 -7
  18. package/process-course/chapter-processor.ts +1037 -0
  19. package/process-course/cli.ts +236 -0
  20. package/process-course/config.ts +50 -0
  21. package/process-course/edits/cli.ts +167 -0
  22. package/process-course/edits/combined-video-editor.ts +316 -0
  23. package/process-course/edits/edit-workspace.ts +90 -0
  24. package/process-course/edits/index.ts +20 -0
  25. package/process-course/edits/regenerate-transcript.ts +84 -0
  26. package/process-course/edits/remove-ranges.test.ts +36 -0
  27. package/process-course/edits/remove-ranges.ts +287 -0
  28. package/process-course/edits/timestamp-refinement.test.ts +25 -0
  29. package/process-course/edits/timestamp-refinement.ts +172 -0
  30. package/process-course/edits/transcript-diff.test.ts +105 -0
  31. package/process-course/edits/transcript-diff.ts +214 -0
  32. package/process-course/edits/transcript-output.test.ts +50 -0
  33. package/process-course/edits/transcript-output.ts +36 -0
  34. package/process-course/edits/types.ts +26 -0
  35. package/process-course/edits/video-editor.ts +246 -0
  36. package/process-course/errors.test.ts +63 -0
  37. package/process-course/errors.ts +82 -0
  38. package/process-course/ffmpeg.ts +449 -0
  39. package/process-course/jarvis-commands/handlers.ts +71 -0
  40. package/process-course/jarvis-commands/index.ts +14 -0
  41. package/process-course/jarvis-commands/parser.test.ts +348 -0
  42. package/process-course/jarvis-commands/parser.ts +257 -0
  43. package/process-course/jarvis-commands/types.ts +46 -0
  44. package/process-course/jarvis-commands/windows.ts +254 -0
  45. package/process-course/logging.ts +24 -0
  46. package/process-course/paths.test.ts +59 -0
  47. package/process-course/paths.ts +53 -0
  48. package/process-course/summary.test.ts +209 -0
  49. package/process-course/summary.ts +210 -0
  50. package/process-course/types.ts +85 -0
  51. package/process-course/utils/audio-analysis.test.ts +348 -0
  52. package/process-course/utils/audio-analysis.ts +463 -0
  53. package/process-course/utils/chapter-selection.test.ts +307 -0
  54. package/process-course/utils/chapter-selection.ts +136 -0
  55. package/process-course/utils/file-utils.test.ts +83 -0
  56. package/process-course/utils/file-utils.ts +57 -0
  57. package/process-course/utils/filename.test.ts +27 -0
  58. package/process-course/utils/filename.ts +12 -0
  59. package/process-course/utils/time-ranges.test.ts +221 -0
  60. package/process-course/utils/time-ranges.ts +86 -0
  61. package/process-course/utils/transcript.test.ts +257 -0
  62. package/process-course/utils/transcript.ts +86 -0
  63. package/process-course/utils/video-editing.ts +44 -0
  64. package/process-course-video.ts +389 -0
  65. package/public/robots.txt +2 -0
  66. package/server/bundling.ts +210 -0
  67. package/speech-detection.ts +355 -0
  68. package/utils.ts +138 -0
  69. package/whispercpp-transcribe.ts +343 -0
@@ -0,0 +1,355 @@
1
+ import path from 'node:path'
2
+ import { mkdir } from 'node:fs/promises'
3
+ import * as ort from 'onnxruntime-node'
4
+ import { readAudioSamples } from './process-course/ffmpeg'
5
+ import { CONFIG } from './process-course/config'
6
+ import { formatSeconds, getMediaDurationSeconds } from './utils'
7
+ import { speechFallback } from './process-course/utils/audio-analysis'
8
+ import type { SpeechBounds } from './process-course/types'
9
+
10
+ export type VadConfig = {
11
+ vadWindowSamples: number
12
+ vadSpeechThreshold: number
13
+ vadNegThreshold: number
14
+ vadMinSpeechDurationMs: number
15
+ vadMinSilenceDurationMs: number
16
+ vadSpeechPadMs: number
17
+ vadModelUrl: string
18
+ }
19
+
20
+ export type SpeechSegment = { start: number; end: number }
21
+
22
+ type VadSegment = SpeechSegment
23
+
24
+ let vadSessionPromise: Promise<ort.InferenceSession> | null = null
25
+
26
+ export async function detectSpeechSegmentsWithVad(
27
+ samples: Float32Array,
28
+ sampleRate: number,
29
+ config: VadConfig,
30
+ ): Promise<VadSegment[]> {
31
+ const vadSession = await getVadSession(config)
32
+ const probabilities = await getVadProbabilities(
33
+ samples,
34
+ sampleRate,
35
+ config,
36
+ vadSession,
37
+ )
38
+ return probabilitiesToSegments(
39
+ samples.length,
40
+ probabilities,
41
+ sampleRate,
42
+ config,
43
+ )
44
+ }
45
+
46
+ export async function detectSpeechSegmentsForFile(options: {
47
+ inputPath: string
48
+ start?: number
49
+ end?: number
50
+ }): Promise<SpeechSegment[]> {
51
+ const start = options.start ?? 0
52
+ if (!Number.isFinite(start) || start < 0) {
53
+ throw new Error('Start time must be a non-negative number.')
54
+ }
55
+ const durationSeconds = await getMediaDurationSeconds(options.inputPath)
56
+ const end = options.end ?? durationSeconds
57
+ if (!Number.isFinite(end) || end <= start) {
58
+ throw new Error('End time must be greater than start time.')
59
+ }
60
+ const duration = end - start
61
+
62
+ const samples = await readAudioSamples({
63
+ inputPath: options.inputPath,
64
+ start,
65
+ duration,
66
+ sampleRate: CONFIG.vadSampleRate,
67
+ })
68
+ if (samples.length === 0) {
69
+ return []
70
+ }
71
+ const segments = await detectSpeechSegmentsWithVad(
72
+ samples,
73
+ CONFIG.vadSampleRate,
74
+ CONFIG,
75
+ )
76
+ return segments.map((segment) => ({
77
+ start: segment.start + start,
78
+ end: segment.end + start,
79
+ }))
80
+ }
81
+
82
+ async function getVadSession(config: VadConfig) {
83
+ if (!vadSessionPromise) {
84
+ vadSessionPromise = (async () => {
85
+ const modelPath = await ensureVadModel(config)
86
+ return ort.InferenceSession.create(modelPath, {
87
+ executionProviders: ['cpu'],
88
+ })
89
+ })()
90
+ }
91
+ return vadSessionPromise
92
+ }
93
+
94
+ async function ensureVadModel(config: VadConfig) {
95
+ const cacheDir = path.join(process.cwd(), '.cache')
96
+ const modelPath = path.join(cacheDir, 'silero-vad.onnx')
97
+ const file = Bun.file(modelPath)
98
+ if (await file.exists()) {
99
+ return modelPath
100
+ }
101
+
102
+ await mkdir(cacheDir, { recursive: true })
103
+ const response = await fetch(config.vadModelUrl)
104
+ if (!response.ok) {
105
+ throw new Error(
106
+ `Failed to download VAD model (${response.status} ${response.statusText}).`,
107
+ )
108
+ }
109
+ const buffer = await response.arrayBuffer()
110
+ await Bun.write(modelPath, new Uint8Array(buffer))
111
+ return modelPath
112
+ }
113
+
114
+ async function getVadProbabilities(
115
+ samples: Float32Array,
116
+ sampleRate: number,
117
+ config: VadConfig,
118
+ session: ort.InferenceSession,
119
+ ) {
120
+ const windowSamples = config.vadWindowSamples
121
+ const srTensor = new ort.Tensor(
122
+ 'int64',
123
+ new BigInt64Array([BigInt(sampleRate)]),
124
+ [],
125
+ )
126
+ const probabilities: number[] = []
127
+ let stateH = new Float32Array(2 * 1 * 64)
128
+ let stateC = new Float32Array(2 * 1 * 64)
129
+
130
+ for (let offset = 0; offset < samples.length; offset += windowSamples) {
131
+ const chunk = samples.subarray(offset, offset + windowSamples)
132
+ const paddedChunk = new Float32Array(windowSamples)
133
+ paddedChunk.set(chunk)
134
+
135
+ const inputTensor = new ort.Tensor('float32', paddedChunk, [
136
+ 1,
137
+ windowSamples,
138
+ ])
139
+ const hTensor = new ort.Tensor('float32', stateH, [2, 1, 64])
140
+ const cTensor = new ort.Tensor('float32', stateC, [2, 1, 64])
141
+
142
+ const outputs = await session.run({
143
+ input: inputTensor,
144
+ sr: srTensor,
145
+ h: hTensor,
146
+ c: cTensor,
147
+ })
148
+
149
+ const {
150
+ probTensor,
151
+ hTensor: nextH,
152
+ cTensor: nextC,
153
+ } = pickVadOutputs(outputs, session.outputNames)
154
+ probabilities.push((probTensor.data as Float32Array)[0] ?? 0)
155
+ stateH = new Float32Array(nextH.data as Float32Array)
156
+ stateC = new Float32Array(nextC.data as Float32Array)
157
+ }
158
+
159
+ return probabilities
160
+ }
161
+
162
+ function pickVadOutputs(
163
+ outputs: Record<string, ort.Tensor>,
164
+ outputNames: readonly string[],
165
+ ) {
166
+ let probTensor: ort.Tensor | null = null
167
+ let hTensor: ort.Tensor | null = null
168
+ let cTensor: ort.Tensor | null = null
169
+
170
+ for (const name of outputNames) {
171
+ const tensor = outputs[name]
172
+ if (!tensor) {
173
+ continue
174
+ }
175
+ if (name === 'output') {
176
+ probTensor = tensor
177
+ } else if (name === 'hn') {
178
+ hTensor = tensor
179
+ } else if (name === 'cn') {
180
+ cTensor = tensor
181
+ }
182
+ }
183
+
184
+ if (!probTensor || !hTensor || !cTensor) {
185
+ throw new Error(
186
+ 'Unexpected VAD outputs; unable to read speech probabilities.',
187
+ )
188
+ }
189
+
190
+ return { probTensor, hTensor, cTensor }
191
+ }
192
+
193
+ function probabilitiesToSegments(
194
+ totalSamples: number,
195
+ probabilities: number[],
196
+ sampleRate: number,
197
+ config: VadConfig,
198
+ ): VadSegment[] {
199
+ const windowSamples = config.vadWindowSamples
200
+ const threshold = config.vadSpeechThreshold
201
+ const negThreshold = config.vadNegThreshold
202
+ const minSpeechSamples = (sampleRate * config.vadMinSpeechDurationMs) / 1000
203
+ const minSilenceSamples = (sampleRate * config.vadMinSilenceDurationMs) / 1000
204
+ const speechPadSamples = (sampleRate * config.vadSpeechPadMs) / 1000
205
+
206
+ let triggered = false
207
+ let tempEnd = 0
208
+ let currentSpeechStart = 0
209
+ const speeches: VadSegment[] = []
210
+
211
+ for (let index = 0; index < probabilities.length; index += 1) {
212
+ const prob = probabilities[index] ?? 0
213
+ const currentSample = index * windowSamples
214
+
215
+ if (prob >= threshold && tempEnd) {
216
+ tempEnd = 0
217
+ }
218
+
219
+ if (prob >= threshold && !triggered) {
220
+ triggered = true
221
+ currentSpeechStart = currentSample
222
+ continue
223
+ }
224
+
225
+ if (prob < negThreshold && triggered) {
226
+ if (!tempEnd) {
227
+ tempEnd = currentSample
228
+ }
229
+ if (currentSample - tempEnd < minSilenceSamples) {
230
+ continue
231
+ }
232
+ const speechEnd = tempEnd
233
+ if (speechEnd - currentSpeechStart >= minSpeechSamples) {
234
+ speeches.push({ start: currentSpeechStart, end: speechEnd })
235
+ }
236
+ triggered = false
237
+ tempEnd = 0
238
+ currentSpeechStart = 0
239
+ }
240
+ }
241
+
242
+ if (triggered) {
243
+ const speechEnd = totalSamples
244
+ if (speechEnd - currentSpeechStart >= minSpeechSamples) {
245
+ speeches.push({ start: currentSpeechStart, end: speechEnd })
246
+ }
247
+ }
248
+
249
+ if (speeches.length === 0) {
250
+ return []
251
+ }
252
+
253
+ for (let index = 0; index < speeches.length; index += 1) {
254
+ const speech = speeches[index]
255
+ if (!speech) {
256
+ continue
257
+ }
258
+ const nextSpeech = speeches[index + 1]
259
+ if (index === 0) {
260
+ speech.start = Math.max(0, speech.start - speechPadSamples)
261
+ }
262
+ if (nextSpeech) {
263
+ const silence = nextSpeech.start - speech.end
264
+ if (silence < speechPadSamples * 2) {
265
+ const adjustment = silence / 2
266
+ speech.end += adjustment
267
+ nextSpeech.start = Math.max(0, nextSpeech.start - adjustment)
268
+ } else {
269
+ speech.end = Math.min(totalSamples, speech.end + speechPadSamples)
270
+ nextSpeech.start = Math.max(0, nextSpeech.start - speechPadSamples)
271
+ }
272
+ } else {
273
+ speech.end = Math.min(totalSamples, speech.end + speechPadSamples)
274
+ }
275
+ }
276
+
277
+ return speeches.map((speech) => ({
278
+ start: speech.start / sampleRate,
279
+ end: speech.end / sampleRate,
280
+ }))
281
+ }
282
+
283
+ export async function detectSpeechBounds(
284
+ inputPath: string,
285
+ chapterStart: number,
286
+ chapterEnd: number,
287
+ duration: number,
288
+ ): Promise<SpeechBounds> {
289
+ const clipDuration = chapterEnd - chapterStart
290
+ if (clipDuration <= 0) {
291
+ return speechFallback(
292
+ duration,
293
+ 'Invalid chapter boundaries; using full chapter.',
294
+ )
295
+ }
296
+
297
+ const samples = await readAudioSamples({
298
+ inputPath,
299
+ start: chapterStart,
300
+ duration: clipDuration,
301
+ sampleRate: CONFIG.vadSampleRate,
302
+ })
303
+ const fallbackNote = `Speech detection failed near ${formatSeconds(chapterStart)}; using full chapter.`
304
+ if (samples.length === 0) {
305
+ return speechFallback(duration, fallbackNote)
306
+ }
307
+
308
+ const vadSegments = await detectSpeechSegmentsWithVad(
309
+ samples,
310
+ CONFIG.vadSampleRate,
311
+ CONFIG,
312
+ )
313
+ if (vadSegments.length === 0) {
314
+ return speechFallback(duration, fallbackNote)
315
+ }
316
+ const firstSegment = vadSegments[0]
317
+ const lastSegment = vadSegments[vadSegments.length - 1]
318
+ if (!firstSegment || !lastSegment) {
319
+ return speechFallback(duration, fallbackNote)
320
+ }
321
+ const speechStart = firstSegment.start
322
+ const speechEnd = lastSegment.end
323
+
324
+ if (speechEnd <= speechStart + 0.1) {
325
+ return speechFallback(duration, fallbackNote)
326
+ }
327
+
328
+ return { start: speechStart, end: speechEnd }
329
+ }
330
+
331
+ export async function checkSegmentHasSpeech(
332
+ inputPath: string,
333
+ duration: number,
334
+ ): Promise<boolean> {
335
+ if (duration <= 0) {
336
+ return false
337
+ }
338
+
339
+ const samples = await readAudioSamples({
340
+ inputPath,
341
+ start: 0,
342
+ duration,
343
+ sampleRate: CONFIG.vadSampleRate,
344
+ })
345
+ if (samples.length === 0) {
346
+ return false
347
+ }
348
+
349
+ const vadSegments = await detectSpeechSegmentsWithVad(
350
+ samples,
351
+ CONFIG.vadSampleRate,
352
+ CONFIG,
353
+ )
354
+ return vadSegments.length > 0
355
+ }
package/utils.ts ADDED
@@ -0,0 +1,138 @@
1
+ type RunCommandOptions = {
2
+ allowFailure?: boolean
3
+ logCommand?: (command: string[]) => void
4
+ }
5
+
6
+ export function formatCommand(command: string[]) {
7
+ return command
8
+ .map((part) => (part.includes(' ') ? `"${part}"` : part))
9
+ .join(' ')
10
+ }
11
+
12
+ export async function runCommand(
13
+ command: string[],
14
+ options: RunCommandOptions = {},
15
+ ) {
16
+ options.logCommand?.(command)
17
+ const proc = Bun.spawn(command, {
18
+ stdout: 'pipe',
19
+ stderr: 'pipe',
20
+ })
21
+ const [stdout, stderr, exitCode] = await Promise.all([
22
+ new Response(proc.stdout).text(),
23
+ new Response(proc.stderr).text(),
24
+ proc.exited,
25
+ ])
26
+
27
+ if (exitCode !== 0 && !options.allowFailure) {
28
+ throw new Error(
29
+ `Command failed (${exitCode}): ${formatCommand(command)}\n${stderr}`,
30
+ )
31
+ }
32
+
33
+ return { stdout, stderr, exitCode }
34
+ }
35
+
36
+ export async function runCommandBinary(
37
+ command: string[],
38
+ options: RunCommandOptions = {},
39
+ ) {
40
+ options.logCommand?.(command)
41
+ const proc = Bun.spawn(command, {
42
+ stdout: 'pipe',
43
+ stderr: 'pipe',
44
+ })
45
+ const [stdout, stderr, exitCode] = await Promise.all([
46
+ new Response(proc.stdout).arrayBuffer(),
47
+ new Response(proc.stderr).text(),
48
+ proc.exited,
49
+ ])
50
+
51
+ if (exitCode !== 0 && !options.allowFailure) {
52
+ throw new Error(
53
+ `Command failed (${exitCode}): ${formatCommand(command)}\n${stderr}`,
54
+ )
55
+ }
56
+
57
+ return { stdout: new Uint8Array(stdout), stderr, exitCode }
58
+ }
59
+
60
+ export function formatSeconds(value: number) {
61
+ return `${value.toFixed(2)}s`
62
+ }
63
+
64
+ export function clamp(value: number, min: number, max: number) {
65
+ return Math.min(Math.max(value, min), max)
66
+ }
67
+
68
+ export function toKebabCase(value: string) {
69
+ return (
70
+ value
71
+ .trim()
72
+ .toLowerCase()
73
+ .replace(/['".,]/g, '')
74
+ .replace(/[^a-z0-9]+/g, '-')
75
+ .replace(/^-+|-+$/g, '')
76
+ .replace(/-+/g, '-') || 'untitled'
77
+ )
78
+ }
79
+
80
+ export function normalizeFilename(value: string) {
81
+ const numberWords: Record<string, number> = {
82
+ zero: 0,
83
+ one: 1,
84
+ two: 2,
85
+ three: 3,
86
+ four: 4,
87
+ five: 5,
88
+ six: 6,
89
+ seven: 7,
90
+ eight: 8,
91
+ nine: 9,
92
+ ten: 10,
93
+ eleven: 11,
94
+ twelve: 12,
95
+ thirteen: 13,
96
+ fourteen: 14,
97
+ fifteen: 15,
98
+ sixteen: 16,
99
+ seventeen: 17,
100
+ eighteen: 18,
101
+ nineteen: 19,
102
+ twenty: 20,
103
+ }
104
+
105
+ const numberWordPattern = new RegExp(
106
+ `\\b(${Object.keys(numberWords).join('|')})\\b`,
107
+ 'g',
108
+ )
109
+
110
+ return value
111
+ .trim()
112
+ .toLowerCase()
113
+ .replace(/\b(point|dot)\b/g, '.')
114
+ .replace(/\s*\.\s*/g, '.')
115
+ .replace(numberWordPattern, (word) =>
116
+ String(numberWords[word] ?? word).padStart(2, '0'),
117
+ )
118
+ }
119
+
120
+ export async function getMediaDurationSeconds(
121
+ filePath: string,
122
+ ): Promise<number> {
123
+ const result = await runCommand([
124
+ 'ffprobe',
125
+ '-v',
126
+ 'error',
127
+ '-show_entries',
128
+ 'format=duration',
129
+ '-of',
130
+ 'default=noprint_wrappers=1:nokey=1',
131
+ filePath,
132
+ ])
133
+ const duration = Number.parseFloat(result.stdout.trim())
134
+ if (!Number.isFinite(duration) || duration <= 0) {
135
+ throw new Error(`Invalid duration for ${filePath}: ${result.stdout}`)
136
+ }
137
+ return duration
138
+ }