eprec 0.0.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +52 -29
  3. package/cli.ts +150 -0
  4. package/package.json +39 -7
  5. package/process-course/chapter-processor.ts +1037 -0
  6. package/process-course/cli.ts +236 -0
  7. package/process-course/config.ts +50 -0
  8. package/process-course/edits/cli.ts +167 -0
  9. package/process-course/edits/combined-video-editor.ts +316 -0
  10. package/process-course/edits/edit-workspace.ts +90 -0
  11. package/process-course/edits/index.ts +20 -0
  12. package/process-course/edits/regenerate-transcript.ts +84 -0
  13. package/process-course/edits/remove-ranges.test.ts +36 -0
  14. package/process-course/edits/remove-ranges.ts +287 -0
  15. package/process-course/edits/timestamp-refinement.test.ts +25 -0
  16. package/process-course/edits/timestamp-refinement.ts +172 -0
  17. package/process-course/edits/transcript-diff.test.ts +105 -0
  18. package/process-course/edits/transcript-diff.ts +214 -0
  19. package/process-course/edits/transcript-output.test.ts +50 -0
  20. package/process-course/edits/transcript-output.ts +36 -0
  21. package/process-course/edits/types.ts +26 -0
  22. package/process-course/edits/video-editor.ts +246 -0
  23. package/process-course/errors.test.ts +63 -0
  24. package/process-course/errors.ts +82 -0
  25. package/process-course/ffmpeg.ts +449 -0
  26. package/process-course/jarvis-commands/handlers.ts +71 -0
  27. package/process-course/jarvis-commands/index.ts +14 -0
  28. package/process-course/jarvis-commands/parser.test.ts +348 -0
  29. package/process-course/jarvis-commands/parser.ts +257 -0
  30. package/process-course/jarvis-commands/types.ts +46 -0
  31. package/process-course/jarvis-commands/windows.ts +254 -0
  32. package/process-course/logging.ts +24 -0
  33. package/process-course/paths.test.ts +59 -0
  34. package/process-course/paths.ts +53 -0
  35. package/process-course/summary.test.ts +209 -0
  36. package/process-course/summary.ts +210 -0
  37. package/process-course/types.ts +85 -0
  38. package/process-course/utils/audio-analysis.test.ts +348 -0
  39. package/process-course/utils/audio-analysis.ts +463 -0
  40. package/process-course/utils/chapter-selection.test.ts +307 -0
  41. package/process-course/utils/chapter-selection.ts +136 -0
  42. package/process-course/utils/file-utils.test.ts +83 -0
  43. package/process-course/utils/file-utils.ts +57 -0
  44. package/process-course/utils/filename.test.ts +27 -0
  45. package/process-course/utils/filename.ts +12 -0
  46. package/process-course/utils/time-ranges.test.ts +221 -0
  47. package/process-course/utils/time-ranges.ts +86 -0
  48. package/process-course/utils/transcript.test.ts +257 -0
  49. package/process-course/utils/transcript.ts +86 -0
  50. package/process-course/utils/video-editing.ts +44 -0
  51. package/process-course-video.ts +389 -0
  52. package/speech-detection.ts +355 -0
  53. package/utils.ts +138 -0
  54. package/whispercpp-transcribe.ts +345 -0
@@ -0,0 +1,463 @@
1
+ import type {
2
+ SilenceBoundaryDirection,
3
+ SpeechBounds,
4
+ TimeRange,
5
+ } from '../types'
6
+ import { readAudioSamples } from '../ffmpeg'
7
+ import { CONFIG } from '../config'
8
+
9
+ /**
10
+ * Compute the RMS (root mean square) of audio samples.
11
+ */
12
+ export function computeRms(samples: Float32Array): number {
13
+ if (samples.length === 0) {
14
+ return 0
15
+ }
16
+ let sumSquares = 0
17
+ for (const sample of samples) {
18
+ sumSquares += sample * sample
19
+ }
20
+ return Math.sqrt(sumSquares / samples.length)
21
+ }
22
+
23
+ /**
24
+ * Compute the minimum RMS value across all windows of a given size.
25
+ */
26
+ export function computeMinWindowRms(
27
+ samples: Float32Array,
28
+ windowSamples: number,
29
+ ): number {
30
+ if (samples.length === 0 || windowSamples <= 0) {
31
+ return 0
32
+ }
33
+ if (samples.length <= windowSamples) {
34
+ return computeRms(samples)
35
+ }
36
+ let minRms = Number.POSITIVE_INFINITY
37
+ for (let offset = 0; offset + windowSamples <= samples.length; offset += 1) {
38
+ let sumSquares = 0
39
+ for (let i = 0; i < windowSamples; i += 1) {
40
+ const sample = samples[offset + i] ?? 0
41
+ sumSquares += sample * sample
42
+ }
43
+ const rms = Math.sqrt(sumSquares / windowSamples)
44
+ if (rms < minRms) {
45
+ minRms = rms
46
+ }
47
+ }
48
+ return Number.isFinite(minRms) ? minRms : 0
49
+ }
50
+
51
+ export function findSpeechStartWithRms(options: {
52
+ samples: Float32Array
53
+ sampleRate: number
54
+ rmsWindowMs: number
55
+ rmsThreshold: number
56
+ }): number | null {
57
+ const windowSamples = Math.max(
58
+ 1,
59
+ Math.round((options.sampleRate * options.rmsWindowMs) / 1000),
60
+ )
61
+ const totalWindows = Math.floor(options.samples.length / windowSamples)
62
+ if (totalWindows === 0) {
63
+ return null
64
+ }
65
+ for (let index = 0; index < totalWindows; index += 1) {
66
+ const offset = index * windowSamples
67
+ let sumSquares = 0
68
+ for (let i = 0; i < windowSamples; i += 1) {
69
+ const sample = options.samples[offset + i] ?? 0
70
+ sumSquares += sample * sample
71
+ }
72
+ const rms = Math.sqrt(sumSquares / windowSamples)
73
+ if (rms >= options.rmsThreshold) {
74
+ return (index * windowSamples) / options.sampleRate
75
+ }
76
+ }
77
+ return null
78
+ }
79
+
80
+ export function findSpeechEndWithRms(options: {
81
+ samples: Float32Array
82
+ sampleRate: number
83
+ rmsWindowMs: number
84
+ rmsThreshold: number
85
+ }): number | null {
86
+ const windowSamples = Math.max(
87
+ 1,
88
+ Math.round((options.sampleRate * options.rmsWindowMs) / 1000),
89
+ )
90
+ const totalWindows = Math.floor(options.samples.length / windowSamples)
91
+ if (totalWindows === 0) {
92
+ return null
93
+ }
94
+ for (let index = totalWindows - 1; index >= 0; index -= 1) {
95
+ const offset = index * windowSamples
96
+ let sumSquares = 0
97
+ for (let i = 0; i < windowSamples; i += 1) {
98
+ const sample = options.samples[offset + i] ?? 0
99
+ sumSquares += sample * sample
100
+ }
101
+ const rms = Math.sqrt(sumSquares / windowSamples)
102
+ if (rms >= options.rmsThreshold) {
103
+ return ((index + 1) * windowSamples) / options.sampleRate
104
+ }
105
+ }
106
+ return null
107
+ }
108
+
109
+ /**
110
+ * Build silence gaps from speech segments and total duration.
111
+ */
112
+ export function buildSilenceGapsFromSpeech(
113
+ speechSegments: TimeRange[],
114
+ duration: number,
115
+ ): TimeRange[] {
116
+ const gaps: TimeRange[] = []
117
+ let cursor = 0
118
+ for (const segment of speechSegments) {
119
+ if (segment.start > cursor) {
120
+ gaps.push({ start: cursor, end: segment.start })
121
+ }
122
+ cursor = Math.max(cursor, segment.end)
123
+ }
124
+ if (cursor < duration) {
125
+ gaps.push({ start: cursor, end: duration })
126
+ }
127
+ return gaps.filter((gap) => gap.end > gap.start + 0.001)
128
+ }
129
+
130
+ /**
131
+ * Find a silence boundary from pre-computed gaps.
132
+ */
133
+ export function findSilenceBoundaryFromGaps(
134
+ gaps: TimeRange[],
135
+ targetOffset: number,
136
+ direction: SilenceBoundaryDirection,
137
+ ): number | null {
138
+ for (const gap of gaps) {
139
+ if (targetOffset >= gap.start && targetOffset <= gap.end) {
140
+ return targetOffset
141
+ }
142
+ }
143
+ if (direction === 'before') {
144
+ let boundary: number | null = null
145
+ for (const gap of gaps) {
146
+ if (gap.end <= targetOffset + 0.001) {
147
+ boundary = gap.end
148
+ }
149
+ }
150
+ return boundary
151
+ }
152
+ for (const gap of gaps) {
153
+ if (gap.start >= targetOffset - 0.001) {
154
+ return gap.start
155
+ }
156
+ }
157
+ return null
158
+ }
159
+
160
+ /**
161
+ * Create a fallback speech bounds covering the full duration.
162
+ */
163
+ export function speechFallback(duration: number, note: string): SpeechBounds {
164
+ return { start: 0, end: duration, note }
165
+ }
166
+
167
+ /**
168
+ * Find a silence boundary using RMS analysis.
169
+ */
170
+ export function findSilenceBoundaryWithRms(options: {
171
+ samples: Float32Array
172
+ sampleRate: number
173
+ direction: SilenceBoundaryDirection
174
+ rmsWindowMs: number
175
+ rmsThreshold: number
176
+ minSilenceMs: number
177
+ }): number | null {
178
+ const windowSamples = Math.max(
179
+ 1,
180
+ Math.round((options.sampleRate * options.rmsWindowMs) / 1000),
181
+ )
182
+ const minSilentWindows = Math.max(
183
+ 1,
184
+ Math.round(options.minSilenceMs / options.rmsWindowMs),
185
+ )
186
+ const totalWindows = Math.floor(options.samples.length / windowSamples)
187
+ if (totalWindows === 0) {
188
+ return null
189
+ }
190
+ const isSilent: boolean[] = []
191
+ for (let index = 0; index < totalWindows; index += 1) {
192
+ const offset = index * windowSamples
193
+ let sumSquares = 0
194
+ for (let i = 0; i < windowSamples; i += 1) {
195
+ const sample = options.samples[offset + i] ?? 0
196
+ sumSquares += sample * sample
197
+ }
198
+ const rms = Math.sqrt(sumSquares / windowSamples)
199
+ isSilent.push(rms < options.rmsThreshold)
200
+ }
201
+
202
+ const windowSeconds = windowSamples / options.sampleRate
203
+ if (options.direction === 'before') {
204
+ let run = 0
205
+ for (let index = totalWindows - 1; index >= 0; index -= 1) {
206
+ if (isSilent[index]) {
207
+ run += 1
208
+ if (run >= minSilentWindows) {
209
+ const boundaryIndex = index + run
210
+ return boundaryIndex * windowSeconds
211
+ }
212
+ } else {
213
+ run = 0
214
+ }
215
+ }
216
+ } else {
217
+ let run = 0
218
+ for (let index = 0; index < totalWindows; index += 1) {
219
+ if (isSilent[index]) {
220
+ run += 1
221
+ if (run >= minSilentWindows) {
222
+ const runStart = index - run + 1
223
+ return runStart * windowSeconds
224
+ }
225
+ } else {
226
+ run = 0
227
+ }
228
+ }
229
+ }
230
+
231
+ return null
232
+ }
233
+
234
+ export function findSilenceBoundaryProgressive(options: {
235
+ samples: Float32Array
236
+ sampleRate: number
237
+ direction: SilenceBoundaryDirection
238
+ startWindowSeconds: number
239
+ stepSeconds: number
240
+ maxWindowSeconds: number
241
+ rmsWindowMs: number
242
+ rmsThreshold: number
243
+ minSilenceMs: number
244
+ }): number | null {
245
+ if (options.samples.length === 0 || options.sampleRate <= 0) {
246
+ return null
247
+ }
248
+ const totalSeconds = options.samples.length / options.sampleRate
249
+ const maxWindowSeconds = Math.min(options.maxWindowSeconds, totalSeconds)
250
+ if (maxWindowSeconds <= 0.01) {
251
+ return null
252
+ }
253
+ const startWindowSeconds = Math.min(
254
+ Math.max(options.startWindowSeconds, 0.01),
255
+ maxWindowSeconds,
256
+ )
257
+ const stepSeconds = Math.max(options.stepSeconds, 0.01)
258
+ const totalSamples = options.samples.length
259
+
260
+ for (
261
+ let windowSeconds = startWindowSeconds;
262
+ windowSeconds <= maxWindowSeconds + 1e-6;
263
+ windowSeconds = Math.min(maxWindowSeconds, windowSeconds + stepSeconds)
264
+ ) {
265
+ const windowSamples = Math.max(
266
+ 1,
267
+ Math.round(windowSeconds * options.sampleRate),
268
+ )
269
+ if (options.direction === 'before') {
270
+ const startIndex = Math.max(0, totalSamples - windowSamples)
271
+ const slice = options.samples.subarray(startIndex, totalSamples)
272
+ const boundary = findSilenceBoundaryWithRms({
273
+ samples: slice,
274
+ sampleRate: options.sampleRate,
275
+ direction: options.direction,
276
+ rmsWindowMs: options.rmsWindowMs,
277
+ rmsThreshold: options.rmsThreshold,
278
+ minSilenceMs: options.minSilenceMs,
279
+ })
280
+ if (boundary !== null) {
281
+ const windowStartOffset = totalSeconds - windowSeconds
282
+ return windowStartOffset + boundary
283
+ }
284
+ } else {
285
+ const slice = options.samples.subarray(0, windowSamples)
286
+ const boundary = findSilenceBoundaryWithRms({
287
+ samples: slice,
288
+ sampleRate: options.sampleRate,
289
+ direction: options.direction,
290
+ rmsWindowMs: options.rmsWindowMs,
291
+ rmsThreshold: options.rmsThreshold,
292
+ minSilenceMs: options.minSilenceMs,
293
+ })
294
+ if (boundary !== null) {
295
+ return boundary
296
+ }
297
+ }
298
+
299
+ if (windowSeconds >= maxWindowSeconds) {
300
+ break
301
+ }
302
+ }
303
+
304
+ return null
305
+ }
306
+
307
+ export function findLowestAmplitudeOffset(options: {
308
+ samples: Float32Array
309
+ sampleRate: number
310
+ rmsWindowMs: number
311
+ }): { offsetSeconds: number; rms: number } | null {
312
+ const windowSamples = Math.max(
313
+ 1,
314
+ Math.round((options.sampleRate * options.rmsWindowMs) / 1000),
315
+ )
316
+ if (options.samples.length === 0 || windowSamples <= 0) {
317
+ return null
318
+ }
319
+ if (options.samples.length <= windowSamples) {
320
+ return {
321
+ offsetSeconds: options.samples.length / 2 / options.sampleRate,
322
+ rms: computeRms(options.samples),
323
+ }
324
+ }
325
+ let minRms = Number.POSITIVE_INFINITY
326
+ let minOffset = 0
327
+ for (
328
+ let offset = 0;
329
+ offset + windowSamples <= options.samples.length;
330
+ offset += windowSamples
331
+ ) {
332
+ let sumSquares = 0
333
+ for (let index = 0; index < windowSamples; index += 1) {
334
+ const sample = options.samples[offset + index] ?? 0
335
+ sumSquares += sample * sample
336
+ }
337
+ const rms = Math.sqrt(sumSquares / windowSamples)
338
+ if (rms < minRms) {
339
+ minRms = rms
340
+ minOffset = offset
341
+ }
342
+ }
343
+ if (!Number.isFinite(minRms)) {
344
+ return null
345
+ }
346
+ const offsetSeconds = (minOffset + windowSamples / 2) / options.sampleRate
347
+ return { offsetSeconds, rms: minRms }
348
+ }
349
+
350
+ export function findLowestAmplitudeBoundaryProgressive(options: {
351
+ samples: Float32Array
352
+ sampleRate: number
353
+ direction: SilenceBoundaryDirection
354
+ startWindowSeconds: number
355
+ stepSeconds: number
356
+ maxWindowSeconds: number
357
+ rmsWindowMs: number
358
+ rmsThreshold: number
359
+ }): number | null {
360
+ if (options.samples.length === 0 || options.sampleRate <= 0) {
361
+ return null
362
+ }
363
+ const totalSeconds = options.samples.length / options.sampleRate
364
+ const maxWindowSeconds = Math.min(options.maxWindowSeconds, totalSeconds)
365
+ if (maxWindowSeconds <= 0.01) {
366
+ return null
367
+ }
368
+ const startWindowSeconds = Math.min(
369
+ Math.max(options.startWindowSeconds, 0.01),
370
+ maxWindowSeconds,
371
+ )
372
+ const stepSeconds = Math.max(options.stepSeconds, 0.01)
373
+ const totalSamples = options.samples.length
374
+
375
+ for (
376
+ let windowSeconds = startWindowSeconds;
377
+ windowSeconds <= maxWindowSeconds + 1e-6;
378
+ windowSeconds = Math.min(maxWindowSeconds, windowSeconds + stepSeconds)
379
+ ) {
380
+ const windowSamples = Math.max(
381
+ 1,
382
+ Math.round(windowSeconds * options.sampleRate),
383
+ )
384
+ let slice: Float32Array
385
+ let offsetBaseSeconds = 0
386
+ if (options.direction === 'before') {
387
+ const startIndex = Math.max(0, totalSamples - windowSamples)
388
+ slice = options.samples.subarray(startIndex, totalSamples)
389
+ offsetBaseSeconds = (totalSamples - windowSamples) / options.sampleRate
390
+ } else {
391
+ slice = options.samples.subarray(0, windowSamples)
392
+ }
393
+ const lowest = findLowestAmplitudeOffset({
394
+ samples: slice,
395
+ sampleRate: options.sampleRate,
396
+ rmsWindowMs: options.rmsWindowMs,
397
+ })
398
+ if (lowest && lowest.rms < options.rmsThreshold) {
399
+ return offsetBaseSeconds + lowest.offsetSeconds
400
+ }
401
+ if (windowSeconds >= maxWindowSeconds) {
402
+ break
403
+ }
404
+ }
405
+
406
+ return null
407
+ }
408
+
409
+ /**
410
+ * Find speech end using RMS analysis with audio sample loading fallback.
411
+ */
412
+ export async function findSpeechEndWithRmsFallback(options: {
413
+ inputPath: string
414
+ start: number
415
+ duration: number
416
+ }): Promise<number | null> {
417
+ if (options.duration <= 0.05) {
418
+ return null
419
+ }
420
+ const samples = await readAudioSamples({
421
+ inputPath: options.inputPath,
422
+ start: options.start,
423
+ duration: options.duration,
424
+ sampleRate: CONFIG.vadSampleRate,
425
+ })
426
+ if (samples.length === 0) {
427
+ return null
428
+ }
429
+ return findSpeechEndWithRms({
430
+ samples,
431
+ sampleRate: CONFIG.vadSampleRate,
432
+ rmsWindowMs: CONFIG.commandSilenceRmsWindowMs,
433
+ rmsThreshold: CONFIG.commandSilenceRmsThreshold,
434
+ })
435
+ }
436
+
437
+ /**
438
+ * Find speech start using RMS analysis with audio sample loading fallback.
439
+ */
440
+ export async function findSpeechStartWithRmsFallback(options: {
441
+ inputPath: string
442
+ start: number
443
+ duration: number
444
+ }): Promise<number | null> {
445
+ if (options.duration <= 0.05) {
446
+ return null
447
+ }
448
+ const samples = await readAudioSamples({
449
+ inputPath: options.inputPath,
450
+ start: options.start,
451
+ duration: options.duration,
452
+ sampleRate: CONFIG.vadSampleRate,
453
+ })
454
+ if (samples.length === 0) {
455
+ return null
456
+ }
457
+ return findSpeechStartWithRms({
458
+ samples,
459
+ sampleRate: CONFIG.vadSampleRate,
460
+ rmsWindowMs: CONFIG.commandSilenceRmsWindowMs,
461
+ rmsThreshold: CONFIG.commandSilenceRmsThreshold,
462
+ })
463
+ }