eprec 1.10.2 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cli.ts CHANGED
@@ -27,6 +27,7 @@ import {
27
27
  PromptCancelled,
28
28
  createInquirerPrompter,
29
29
  createPathPicker,
30
+ createStepProgressReporter,
30
31
  isInteractive,
31
32
  pauseActiveSpinner,
32
33
  resumeActiveSpinner,
@@ -156,18 +157,30 @@ async function main(rawArgs = hideBin(process.argv)) {
156
157
  }),
157
158
  async (argv) => {
158
159
  const transcribeArgs = await resolveTranscribeArgs(argv, context)
160
+ const progress = context.interactive
161
+ ? createStepProgressReporter({ action: 'Transcribing audio' })
162
+ : undefined
159
163
  let resultText = ''
160
164
  await withSpinner(
161
165
  'Transcribing audio',
162
166
  async () => {
163
- const result = await transcribeAudio(transcribeArgs.inputPath, {
164
- modelPath: transcribeArgs.modelPath,
165
- language: transcribeArgs.language,
166
- threads: transcribeArgs.threads,
167
- binaryPath: transcribeArgs.binaryPath,
168
- outputBasePath: transcribeArgs.outputBasePath,
167
+ setLogHooks({
168
+ beforeLog: pauseActiveSpinner,
169
+ afterLog: resumeActiveSpinner,
169
170
  })
170
- resultText = result.text
171
+ try {
172
+ const result = await transcribeAudio(transcribeArgs.inputPath, {
173
+ modelPath: transcribeArgs.modelPath,
174
+ language: transcribeArgs.language,
175
+ threads: transcribeArgs.threads,
176
+ binaryPath: transcribeArgs.binaryPath,
177
+ outputBasePath: transcribeArgs.outputBasePath,
178
+ progress,
179
+ })
180
+ resultText = result.text
181
+ } finally {
182
+ setLogHooks({})
183
+ }
171
184
  },
172
185
  {
173
186
  successText: 'Transcription complete',
@@ -203,16 +216,28 @@ async function main(rawArgs = hideBin(process.argv)) {
203
216
  argv,
204
217
  context,
205
218
  )
219
+ const progress = context.interactive
220
+ ? createStepProgressReporter({ action: 'Detecting speech' })
221
+ : undefined
206
222
  let segments: unknown = []
207
223
  await withSpinner(
208
224
  'Detecting speech',
209
225
  async () => {
210
- await ensureFfmpegAvailable()
211
- segments = await detectSpeechSegmentsForFile({
212
- inputPath,
213
- start,
214
- end,
226
+ setLogHooks({
227
+ beforeLog: pauseActiveSpinner,
228
+ afterLog: resumeActiveSpinner,
215
229
  })
230
+ try {
231
+ await ensureFfmpegAvailable()
232
+ segments = await detectSpeechSegmentsForFile({
233
+ inputPath,
234
+ start,
235
+ end,
236
+ progress,
237
+ })
238
+ } finally {
239
+ setLogHooks({})
240
+ }
216
241
  },
217
242
  {
218
243
  successText: 'Speech detection complete',
@@ -6,6 +6,7 @@ import { CONFIG } from '../process-course/config'
6
6
  import { formatSeconds, getMediaDurationSeconds } from './utils'
7
7
  import { speechFallback } from '../process-course/utils/audio-analysis'
8
8
  import type { SpeechBounds } from '../process-course/types'
9
+ import type { StepProgressReporter } from '../progress-reporter'
9
10
 
10
11
  export type VadConfig = {
11
12
  vadWindowSamples: number
@@ -27,6 +28,7 @@ export async function detectSpeechSegmentsWithVad(
27
28
  samples: Float32Array,
28
29
  sampleRate: number,
29
30
  config: VadConfig,
31
+ options?: { onProgress?: () => void; updateStride?: number },
30
32
  ): Promise<VadSegment[]> {
31
33
  const vadSession = await getVadSession(config)
32
34
  const probabilities = await getVadProbabilities(
@@ -34,6 +36,7 @@ export async function detectSpeechSegmentsWithVad(
34
36
  sampleRate,
35
37
  config,
36
38
  vadSession,
39
+ options,
37
40
  )
38
41
  return probabilitiesToSegments(
39
42
  samples.length,
@@ -47,7 +50,10 @@ export async function detectSpeechSegmentsForFile(options: {
47
50
  inputPath: string
48
51
  start?: number
49
52
  end?: number
53
+ progress?: StepProgressReporter
50
54
  }): Promise<SpeechSegment[]> {
55
+ const progress = options.progress
56
+ progress?.start({ stepCount: 1, label: 'Loading audio' })
51
57
  const start = options.start ?? 0
52
58
  if (!Number.isFinite(start) || start < 0) {
53
59
  throw new Error('Start time must be a non-negative number.')
@@ -66,13 +72,31 @@ export async function detectSpeechSegmentsForFile(options: {
66
72
  sampleRate: CONFIG.vadSampleRate,
67
73
  })
68
74
  if (samples.length === 0) {
75
+ progress?.finish('No audio')
69
76
  return []
70
77
  }
78
+ const windowSamples = CONFIG.vadWindowSamples
79
+ const totalWindows = Math.ceil(samples.length / windowSamples)
80
+ const updateStride = Math.max(1, Math.floor(totalWindows / 50))
81
+ const updateCount = Math.max(1, Math.ceil(totalWindows / updateStride))
82
+ progress?.start({ stepCount: updateCount, label: 'Running VAD' })
83
+ let progressUpdates = 0
71
84
  const segments = await detectSpeechSegmentsWithVad(
72
85
  samples,
73
86
  CONFIG.vadSampleRate,
74
87
  CONFIG,
88
+ {
89
+ onProgress: () => {
90
+ progressUpdates += 1
91
+ if (progressUpdates <= updateCount) {
92
+ progress?.step('Running VAD')
93
+ }
94
+ },
95
+ updateStride,
96
+ },
75
97
  )
98
+ progress?.setLabel('Building segments')
99
+ progress?.finish('Complete')
76
100
  return segments.map((segment) => ({
77
101
  start: segment.start + start,
78
102
  end: segment.end + start,
@@ -116,6 +140,7 @@ async function getVadProbabilities(
116
140
  sampleRate: number,
117
141
  config: VadConfig,
118
142
  session: ort.InferenceSession,
143
+ options?: { onProgress?: () => void; updateStride?: number },
119
144
  ) {
120
145
  const windowSamples = config.vadWindowSamples
121
146
  const srTensor = new ort.Tensor(
@@ -126,6 +151,8 @@ async function getVadProbabilities(
126
151
  const probabilities: number[] = []
127
152
  let stateH = new Float32Array(2 * 1 * 64)
128
153
  let stateC = new Float32Array(2 * 1 * 64)
154
+ const updateStride = Math.max(1, Math.floor(options?.updateStride ?? 1))
155
+ let updateIndex = 0
129
156
 
130
157
  for (let offset = 0; offset < samples.length; offset += windowSamples) {
131
158
  const chunk = samples.subarray(offset, offset + windowSamples)
@@ -154,6 +181,10 @@ async function getVadProbabilities(
154
181
  probabilities.push((probTensor.data as Float32Array)[0] ?? 0)
155
182
  stateH = new Float32Array(nextH.data as Float32Array)
156
183
  stateC = new Float32Array(nextC.data as Float32Array)
184
+ if (updateIndex % updateStride === 0) {
185
+ options?.onProgress?.()
186
+ }
187
+ updateIndex += 1
157
188
  }
158
189
 
159
190
  return probabilities
@@ -1,6 +1,7 @@
1
1
  import path from 'node:path'
2
2
  import { mkdir } from 'node:fs/promises'
3
3
  import { runCommand } from './utils'
4
+ import type { StepProgressReporter } from '../progress-reporter'
4
5
 
5
6
  const DEFAULT_MODEL_FILENAME = 'ggml-small.en.bin'
6
7
  const DEFAULT_MODEL_URL =
@@ -14,6 +15,7 @@ type TranscribeOptions = {
14
15
  threads?: number
15
16
  binaryPath?: string
16
17
  outputBasePath?: string
18
+ progress?: StepProgressReporter
17
19
  }
18
20
 
19
21
  export type TranscriptSegment = {
@@ -36,6 +38,7 @@ export async function transcribeAudio(
36
38
  audioPath: string,
37
39
  options: TranscribeOptions = {},
38
40
  ): Promise<TranscriptionResult> {
41
+ const progress = options.progress
39
42
  const resolvedAudioPath = path.resolve(audioPath)
40
43
  const resolvedModelPath = path.resolve(
41
44
  options.modelPath ?? getDefaultWhisperModelPath(),
@@ -49,7 +52,9 @@ export async function transcribeAudio(
49
52
  `${path.parse(resolvedAudioPath).name}-transcript`,
50
53
  )
51
54
 
52
- await ensureModelFile(resolvedModelPath)
55
+ const totalSteps = 3
56
+ progress?.start({ stepCount: totalSteps, label: 'Checking model' })
57
+ await ensureModelFile(resolvedModelPath, progress)
53
58
 
54
59
  const args = [
55
60
  binaryPath,
@@ -69,17 +74,23 @@ export async function transcribeAudio(
69
74
  args.push('-t', String(options.threads))
70
75
  }
71
76
 
77
+ progress?.step('Transcribing audio')
72
78
  const result = await runCommand(args)
79
+ progress?.step('Reading output')
73
80
  const transcriptPath = `${outputBasePath}.txt`
74
81
  const transcript = await readTranscriptText(transcriptPath, result.stdout)
75
82
  const { segments, source } = await readTranscriptSegments(
76
83
  `${outputBasePath}.json`,
77
84
  )
78
85
  const normalized = normalizeTranscriptText(transcript)
86
+ progress?.finish('Complete')
79
87
  return { text: normalized, segments, segmentsSource: source }
80
88
  }
81
89
 
82
- async function ensureModelFile(modelPath: string) {
90
+ async function ensureModelFile(
91
+ modelPath: string,
92
+ progress?: StepProgressReporter,
93
+ ) {
83
94
  const file = Bun.file(modelPath)
84
95
  if (await file.exists()) {
85
96
  return
@@ -90,6 +101,7 @@ async function ensureModelFile(modelPath: string) {
90
101
  throw new Error(`Whisper model not found at ${modelPath}.`)
91
102
  }
92
103
 
104
+ progress?.setLabel('Downloading model')
93
105
  await mkdir(path.dirname(modelPath), { recursive: true })
94
106
  const response = await fetch(DEFAULT_MODEL_URL)
95
107
  if (!response.ok) {