eprec 1.10.2 → 1.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/app/assets/styles.css +43 -0
- package/app/client/editing-workspace.tsx +260 -78
- package/package.json +1 -1
- package/process-course/edits/cli.ts +59 -22
- package/process-course/edits/combined-video-editor.ts +23 -0
- package/process-course/edits/video-editor.ts +23 -0
- package/server/processing-queue.ts +441 -0
- package/src/app-server.ts +4 -0
- package/src/cli.ts +37 -12
- package/src/speech-detection.ts +31 -0
- package/src/whispercpp-transcribe.ts +14 -2
package/src/cli.ts
CHANGED
|
@@ -27,6 +27,7 @@ import {
|
|
|
27
27
|
PromptCancelled,
|
|
28
28
|
createInquirerPrompter,
|
|
29
29
|
createPathPicker,
|
|
30
|
+
createStepProgressReporter,
|
|
30
31
|
isInteractive,
|
|
31
32
|
pauseActiveSpinner,
|
|
32
33
|
resumeActiveSpinner,
|
|
@@ -156,18 +157,30 @@ async function main(rawArgs = hideBin(process.argv)) {
|
|
|
156
157
|
}),
|
|
157
158
|
async (argv) => {
|
|
158
159
|
const transcribeArgs = await resolveTranscribeArgs(argv, context)
|
|
160
|
+
const progress = context.interactive
|
|
161
|
+
? createStepProgressReporter({ action: 'Transcribing audio' })
|
|
162
|
+
: undefined
|
|
159
163
|
let resultText = ''
|
|
160
164
|
await withSpinner(
|
|
161
165
|
'Transcribing audio',
|
|
162
166
|
async () => {
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
threads: transcribeArgs.threads,
|
|
167
|
-
binaryPath: transcribeArgs.binaryPath,
|
|
168
|
-
outputBasePath: transcribeArgs.outputBasePath,
|
|
167
|
+
setLogHooks({
|
|
168
|
+
beforeLog: pauseActiveSpinner,
|
|
169
|
+
afterLog: resumeActiveSpinner,
|
|
169
170
|
})
|
|
170
|
-
|
|
171
|
+
try {
|
|
172
|
+
const result = await transcribeAudio(transcribeArgs.inputPath, {
|
|
173
|
+
modelPath: transcribeArgs.modelPath,
|
|
174
|
+
language: transcribeArgs.language,
|
|
175
|
+
threads: transcribeArgs.threads,
|
|
176
|
+
binaryPath: transcribeArgs.binaryPath,
|
|
177
|
+
outputBasePath: transcribeArgs.outputBasePath,
|
|
178
|
+
progress,
|
|
179
|
+
})
|
|
180
|
+
resultText = result.text
|
|
181
|
+
} finally {
|
|
182
|
+
setLogHooks({})
|
|
183
|
+
}
|
|
171
184
|
},
|
|
172
185
|
{
|
|
173
186
|
successText: 'Transcription complete',
|
|
@@ -203,16 +216,28 @@ async function main(rawArgs = hideBin(process.argv)) {
|
|
|
203
216
|
argv,
|
|
204
217
|
context,
|
|
205
218
|
)
|
|
219
|
+
const progress = context.interactive
|
|
220
|
+
? createStepProgressReporter({ action: 'Detecting speech' })
|
|
221
|
+
: undefined
|
|
206
222
|
let segments: unknown = []
|
|
207
223
|
await withSpinner(
|
|
208
224
|
'Detecting speech',
|
|
209
225
|
async () => {
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
start,
|
|
214
|
-
end,
|
|
226
|
+
setLogHooks({
|
|
227
|
+
beforeLog: pauseActiveSpinner,
|
|
228
|
+
afterLog: resumeActiveSpinner,
|
|
215
229
|
})
|
|
230
|
+
try {
|
|
231
|
+
await ensureFfmpegAvailable()
|
|
232
|
+
segments = await detectSpeechSegmentsForFile({
|
|
233
|
+
inputPath,
|
|
234
|
+
start,
|
|
235
|
+
end,
|
|
236
|
+
progress,
|
|
237
|
+
})
|
|
238
|
+
} finally {
|
|
239
|
+
setLogHooks({})
|
|
240
|
+
}
|
|
216
241
|
},
|
|
217
242
|
{
|
|
218
243
|
successText: 'Speech detection complete',
|
package/src/speech-detection.ts
CHANGED
|
@@ -6,6 +6,7 @@ import { CONFIG } from '../process-course/config'
|
|
|
6
6
|
import { formatSeconds, getMediaDurationSeconds } from './utils'
|
|
7
7
|
import { speechFallback } from '../process-course/utils/audio-analysis'
|
|
8
8
|
import type { SpeechBounds } from '../process-course/types'
|
|
9
|
+
import type { StepProgressReporter } from '../progress-reporter'
|
|
9
10
|
|
|
10
11
|
export type VadConfig = {
|
|
11
12
|
vadWindowSamples: number
|
|
@@ -27,6 +28,7 @@ export async function detectSpeechSegmentsWithVad(
|
|
|
27
28
|
samples: Float32Array,
|
|
28
29
|
sampleRate: number,
|
|
29
30
|
config: VadConfig,
|
|
31
|
+
options?: { onProgress?: () => void; updateStride?: number },
|
|
30
32
|
): Promise<VadSegment[]> {
|
|
31
33
|
const vadSession = await getVadSession(config)
|
|
32
34
|
const probabilities = await getVadProbabilities(
|
|
@@ -34,6 +36,7 @@ export async function detectSpeechSegmentsWithVad(
|
|
|
34
36
|
sampleRate,
|
|
35
37
|
config,
|
|
36
38
|
vadSession,
|
|
39
|
+
options,
|
|
37
40
|
)
|
|
38
41
|
return probabilitiesToSegments(
|
|
39
42
|
samples.length,
|
|
@@ -47,7 +50,10 @@ export async function detectSpeechSegmentsForFile(options: {
|
|
|
47
50
|
inputPath: string
|
|
48
51
|
start?: number
|
|
49
52
|
end?: number
|
|
53
|
+
progress?: StepProgressReporter
|
|
50
54
|
}): Promise<SpeechSegment[]> {
|
|
55
|
+
const progress = options.progress
|
|
56
|
+
progress?.start({ stepCount: 1, label: 'Loading audio' })
|
|
51
57
|
const start = options.start ?? 0
|
|
52
58
|
if (!Number.isFinite(start) || start < 0) {
|
|
53
59
|
throw new Error('Start time must be a non-negative number.')
|
|
@@ -66,13 +72,31 @@ export async function detectSpeechSegmentsForFile(options: {
|
|
|
66
72
|
sampleRate: CONFIG.vadSampleRate,
|
|
67
73
|
})
|
|
68
74
|
if (samples.length === 0) {
|
|
75
|
+
progress?.finish('No audio')
|
|
69
76
|
return []
|
|
70
77
|
}
|
|
78
|
+
const windowSamples = CONFIG.vadWindowSamples
|
|
79
|
+
const totalWindows = Math.ceil(samples.length / windowSamples)
|
|
80
|
+
const updateStride = Math.max(1, Math.floor(totalWindows / 50))
|
|
81
|
+
const updateCount = Math.max(1, Math.ceil(totalWindows / updateStride))
|
|
82
|
+
progress?.start({ stepCount: updateCount, label: 'Running VAD' })
|
|
83
|
+
let progressUpdates = 0
|
|
71
84
|
const segments = await detectSpeechSegmentsWithVad(
|
|
72
85
|
samples,
|
|
73
86
|
CONFIG.vadSampleRate,
|
|
74
87
|
CONFIG,
|
|
88
|
+
{
|
|
89
|
+
onProgress: () => {
|
|
90
|
+
progressUpdates += 1
|
|
91
|
+
if (progressUpdates <= updateCount) {
|
|
92
|
+
progress?.step('Running VAD')
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
updateStride,
|
|
96
|
+
},
|
|
75
97
|
)
|
|
98
|
+
progress?.setLabel('Building segments')
|
|
99
|
+
progress?.finish('Complete')
|
|
76
100
|
return segments.map((segment) => ({
|
|
77
101
|
start: segment.start + start,
|
|
78
102
|
end: segment.end + start,
|
|
@@ -116,6 +140,7 @@ async function getVadProbabilities(
|
|
|
116
140
|
sampleRate: number,
|
|
117
141
|
config: VadConfig,
|
|
118
142
|
session: ort.InferenceSession,
|
|
143
|
+
options?: { onProgress?: () => void; updateStride?: number },
|
|
119
144
|
) {
|
|
120
145
|
const windowSamples = config.vadWindowSamples
|
|
121
146
|
const srTensor = new ort.Tensor(
|
|
@@ -126,6 +151,8 @@ async function getVadProbabilities(
|
|
|
126
151
|
const probabilities: number[] = []
|
|
127
152
|
let stateH = new Float32Array(2 * 1 * 64)
|
|
128
153
|
let stateC = new Float32Array(2 * 1 * 64)
|
|
154
|
+
const updateStride = Math.max(1, Math.floor(options?.updateStride ?? 1))
|
|
155
|
+
let updateIndex = 0
|
|
129
156
|
|
|
130
157
|
for (let offset = 0; offset < samples.length; offset += windowSamples) {
|
|
131
158
|
const chunk = samples.subarray(offset, offset + windowSamples)
|
|
@@ -154,6 +181,10 @@ async function getVadProbabilities(
|
|
|
154
181
|
probabilities.push((probTensor.data as Float32Array)[0] ?? 0)
|
|
155
182
|
stateH = new Float32Array(nextH.data as Float32Array)
|
|
156
183
|
stateC = new Float32Array(nextC.data as Float32Array)
|
|
184
|
+
if (updateIndex % updateStride === 0) {
|
|
185
|
+
options?.onProgress?.()
|
|
186
|
+
}
|
|
187
|
+
updateIndex += 1
|
|
157
188
|
}
|
|
158
189
|
|
|
159
190
|
return probabilities
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import path from 'node:path'
|
|
2
2
|
import { mkdir } from 'node:fs/promises'
|
|
3
3
|
import { runCommand } from './utils'
|
|
4
|
+
import type { StepProgressReporter } from '../progress-reporter'
|
|
4
5
|
|
|
5
6
|
const DEFAULT_MODEL_FILENAME = 'ggml-small.en.bin'
|
|
6
7
|
const DEFAULT_MODEL_URL =
|
|
@@ -14,6 +15,7 @@ type TranscribeOptions = {
|
|
|
14
15
|
threads?: number
|
|
15
16
|
binaryPath?: string
|
|
16
17
|
outputBasePath?: string
|
|
18
|
+
progress?: StepProgressReporter
|
|
17
19
|
}
|
|
18
20
|
|
|
19
21
|
export type TranscriptSegment = {
|
|
@@ -36,6 +38,7 @@ export async function transcribeAudio(
|
|
|
36
38
|
audioPath: string,
|
|
37
39
|
options: TranscribeOptions = {},
|
|
38
40
|
): Promise<TranscriptionResult> {
|
|
41
|
+
const progress = options.progress
|
|
39
42
|
const resolvedAudioPath = path.resolve(audioPath)
|
|
40
43
|
const resolvedModelPath = path.resolve(
|
|
41
44
|
options.modelPath ?? getDefaultWhisperModelPath(),
|
|
@@ -49,7 +52,9 @@ export async function transcribeAudio(
|
|
|
49
52
|
`${path.parse(resolvedAudioPath).name}-transcript`,
|
|
50
53
|
)
|
|
51
54
|
|
|
52
|
-
|
|
55
|
+
const totalSteps = 3
|
|
56
|
+
progress?.start({ stepCount: totalSteps, label: 'Checking model' })
|
|
57
|
+
await ensureModelFile(resolvedModelPath, progress)
|
|
53
58
|
|
|
54
59
|
const args = [
|
|
55
60
|
binaryPath,
|
|
@@ -69,17 +74,23 @@ export async function transcribeAudio(
|
|
|
69
74
|
args.push('-t', String(options.threads))
|
|
70
75
|
}
|
|
71
76
|
|
|
77
|
+
progress?.step('Transcribing audio')
|
|
72
78
|
const result = await runCommand(args)
|
|
79
|
+
progress?.step('Reading output')
|
|
73
80
|
const transcriptPath = `${outputBasePath}.txt`
|
|
74
81
|
const transcript = await readTranscriptText(transcriptPath, result.stdout)
|
|
75
82
|
const { segments, source } = await readTranscriptSegments(
|
|
76
83
|
`${outputBasePath}.json`,
|
|
77
84
|
)
|
|
78
85
|
const normalized = normalizeTranscriptText(transcript)
|
|
86
|
+
progress?.finish('Complete')
|
|
79
87
|
return { text: normalized, segments, segmentsSource: source }
|
|
80
88
|
}
|
|
81
89
|
|
|
82
|
-
async function ensureModelFile(
|
|
90
|
+
async function ensureModelFile(
|
|
91
|
+
modelPath: string,
|
|
92
|
+
progress?: StepProgressReporter,
|
|
93
|
+
) {
|
|
83
94
|
const file = Bun.file(modelPath)
|
|
84
95
|
if (await file.exists()) {
|
|
85
96
|
return
|
|
@@ -90,6 +101,7 @@ async function ensureModelFile(modelPath: string) {
|
|
|
90
101
|
throw new Error(`Whisper model not found at ${modelPath}.`)
|
|
91
102
|
}
|
|
92
103
|
|
|
104
|
+
progress?.setLabel('Downloading model')
|
|
93
105
|
await mkdir(path.dirname(modelPath), { recursive: true })
|
|
94
106
|
const response = await fetch(DEFAULT_MODEL_URL)
|
|
95
107
|
if (!response.ok) {
|