eprec 1.10.2 → 1.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "eprec",
3
3
  "type": "module",
4
- "version": "1.10.2",
4
+ "version": "1.11.0",
5
5
  "license": "MIT",
6
6
  "repository": {
7
7
  "type": "git",
@@ -7,14 +7,18 @@ import {
7
7
  PromptCancelled,
8
8
  createInquirerPrompter,
9
9
  createPathPicker,
10
+ createStepProgressReporter,
10
11
  isInteractive,
12
+ pauseActiveSpinner,
11
13
  resolveOptionalString,
14
+ resumeActiveSpinner,
12
15
  type PathPicker,
13
16
  type Prompter,
14
17
  withSpinner,
15
18
  } from '../../cli-ux'
16
19
  import { editVideo, buildEditedOutputPath } from './video-editor'
17
20
  import { combineVideos } from './combined-video-editor'
21
+ import { setLogHooks } from '../logging'
18
22
 
19
23
  export type EditVideoCommandArgs = {
20
24
  input: string
@@ -176,21 +180,33 @@ function resolvePaddingMs(value: unknown) {
176
180
  export function createEditVideoHandler(options: CliUxOptions): CommandHandler {
177
181
  return async (argv) => {
178
182
  const args = await resolveEditVideoArgs(argv, options)
183
+ const progress = options.interactive
184
+ ? createStepProgressReporter({ action: 'Editing video' })
185
+ : undefined
179
186
  await withSpinner(
180
187
  'Editing video',
181
188
  async () => {
182
- const result = await editVideo({
183
- inputPath: String(args.input),
184
- transcriptJsonPath: String(args.transcript),
185
- editedTextPath: String(args.edited),
186
- outputPath: String(args.output),
187
- paddingMs: args['padding-ms'],
189
+ setLogHooks({
190
+ beforeLog: pauseActiveSpinner,
191
+ afterLog: resumeActiveSpinner,
188
192
  })
189
- if (!result.success) {
190
- throw new Error(result.error ?? 'Edit failed.')
193
+ try {
194
+ const result = await editVideo({
195
+ inputPath: String(args.input),
196
+ transcriptJsonPath: String(args.transcript),
197
+ editedTextPath: String(args.edited),
198
+ outputPath: String(args.output),
199
+ paddingMs: args['padding-ms'],
200
+ progress,
201
+ })
202
+ if (!result.success) {
203
+ throw new Error(result.error ?? 'Edit failed.')
204
+ }
205
+ } finally {
206
+ setLogHooks({})
191
207
  }
192
208
  },
193
- { successText: 'Edit complete' },
209
+ { successText: 'Edit complete', enabled: options.interactive },
194
210
  )
195
211
  console.log(`Edited video written to ${args.output}`)
196
212
  }
@@ -201,26 +217,47 @@ export function createCombineVideosHandler(
201
217
  ): CommandHandler {
202
218
  return async (argv) => {
203
219
  const args = await resolveCombineVideosArgs(argv, options)
220
+ const progress = options.interactive
221
+ ? createStepProgressReporter({ action: 'Combining videos' })
222
+ : undefined
223
+ const editProgressFactory = options.interactive
224
+ ? (detail: string) =>
225
+ createStepProgressReporter({
226
+ action: 'Combining videos',
227
+ detail,
228
+ maxLabelLength: 28,
229
+ })
230
+ : undefined
204
231
  let outputPath = ''
205
232
  await withSpinner(
206
233
  'Combining videos',
207
234
  async () => {
208
- const result = await combineVideos({
209
- video1Path: String(args.video1),
210
- video1TranscriptJsonPath: args.transcript1,
211
- video1EditedTextPath: args.edited1,
212
- video2Path: String(args.video2),
213
- video2TranscriptJsonPath: args.transcript2,
214
- video2EditedTextPath: args.edited2,
215
- outputPath: String(args.output),
216
- overlapPaddingMs: args['padding-ms'],
235
+ setLogHooks({
236
+ beforeLog: pauseActiveSpinner,
237
+ afterLog: resumeActiveSpinner,
217
238
  })
218
- if (!result.success) {
219
- throw new Error(result.error ?? 'Combine failed.')
239
+ try {
240
+ const result = await combineVideos({
241
+ video1Path: String(args.video1),
242
+ video1TranscriptJsonPath: args.transcript1,
243
+ video1EditedTextPath: args.edited1,
244
+ video2Path: String(args.video2),
245
+ video2TranscriptJsonPath: args.transcript2,
246
+ video2EditedTextPath: args.edited2,
247
+ outputPath: String(args.output),
248
+ overlapPaddingMs: args['padding-ms'],
249
+ progress,
250
+ editProgressFactory,
251
+ })
252
+ if (!result.success) {
253
+ throw new Error(result.error ?? 'Combine failed.')
254
+ }
255
+ outputPath = result.outputPath
256
+ } finally {
257
+ setLogHooks({})
220
258
  }
221
- outputPath = result.outputPath
222
259
  },
223
- { successText: 'Combine complete' },
260
+ { successText: 'Combine complete', enabled: options.interactive },
224
261
  )
225
262
  console.log(`Combined video written to ${outputPath}`)
226
263
  }
@@ -14,6 +14,7 @@ import {
14
14
  findSpeechStartWithRmsFallback,
15
15
  } from '../utils/audio-analysis'
16
16
  import { allocateJoinPadding } from '../utils/video-editing'
17
+ import type { StepProgressReporter } from '../../progress-reporter'
17
18
 
18
19
  export interface CombineVideosOptions {
19
20
  video1Path: string
@@ -26,6 +27,8 @@ export interface CombineVideosOptions {
26
27
  video2Duration?: number
27
28
  outputPath: string
28
29
  overlapPaddingMs?: number
30
+ progress?: StepProgressReporter
31
+ editProgressFactory?: (detail: string) => StepProgressReporter | undefined
29
32
  }
30
33
 
31
34
  export interface CombineVideosResult {
@@ -39,12 +42,17 @@ export interface CombineVideosResult {
39
42
  export async function combineVideos(
40
43
  options: CombineVideosOptions,
41
44
  ): Promise<CombineVideosResult> {
45
+ const progress = options.progress
46
+ const totalSteps = 5
47
+ progress?.start({ stepCount: totalSteps, label: 'Preparing edits' })
48
+
42
49
  const tempDir = await mkdtemp(path.join(os.tmpdir(), 'video-combine-'))
43
50
  try {
44
51
  const { video1Path, video2Path } = await applyOptionalEdits(
45
52
  options,
46
53
  tempDir,
47
54
  )
55
+ progress?.step('Measuring durations')
48
56
  const editsApplied =
49
57
  options.video1EditedTextPath || options.video2EditedTextPath
50
58
  const video1Duration = editsApplied
@@ -54,6 +62,8 @@ export async function combineVideos(
54
62
  ? await getMediaDurationSeconds(video2Path)
55
63
  : (options.video2Duration ?? (await getMediaDurationSeconds(video2Path)))
56
64
 
65
+ progress?.step('Detecting speech')
66
+ progress?.setLabel('Checking first video')
57
67
  const video1HasSpeech = await checkSegmentHasSpeech(
58
68
  video1Path,
59
69
  video1Duration,
@@ -67,6 +77,7 @@ export async function combineVideos(
67
77
  }
68
78
  }
69
79
 
80
+ progress?.setLabel('Finding first video speech end')
70
81
  const paddingSeconds =
71
82
  (options.overlapPaddingMs ?? EDIT_CONFIG.speechBoundaryPaddingMs) / 1000
72
83
 
@@ -74,6 +85,7 @@ export async function combineVideos(
74
85
  inputPath: video1Path,
75
86
  duration: video1Duration,
76
87
  })
88
+ progress?.setLabel('Finding second video speech bounds')
77
89
  const { speechStart: video2SpeechStart, speechEnd: video2SpeechEnd } =
78
90
  await findVideo2SpeechBounds({
79
91
  inputPath: video2Path,
@@ -103,8 +115,10 @@ export async function combineVideos(
103
115
  video2Duration,
104
116
  )
105
117
 
118
+ progress?.step('Trimming segments')
106
119
  const segment1Path = path.join(tempDir, 'segment-1.mp4')
107
120
  const segment2Path = path.join(tempDir, 'segment-2.mp4')
121
+ progress?.setLabel('Extracting segment 1/2')
108
122
  await extractChapterSegmentAccurate({
109
123
  inputPath: video1Path,
110
124
  outputPath: segment1Path,
@@ -119,6 +133,7 @@ export async function combineVideos(
119
133
  video2TrimStart,
120
134
  }
121
135
  }
136
+ progress?.setLabel('Extracting segment 2/2')
122
137
  await extractChapterSegmentAccurate({
123
138
  inputPath: video2Path,
124
139
  outputPath: segment2Path,
@@ -126,6 +141,7 @@ export async function combineVideos(
126
141
  end: video2TrimEnd,
127
142
  })
128
143
 
144
+ progress?.setLabel('Validating trimmed speech')
129
145
  const segment2HasSpeech = await checkSegmentHasSpeech(
130
146
  segment2Path,
131
147
  video2TrimEnd - video2TrimStart,
@@ -139,6 +155,7 @@ export async function combineVideos(
139
155
  }
140
156
  }
141
157
 
158
+ progress?.step('Combining output')
142
159
  const resolvedOutputPath = await resolveOutputPath(
143
160
  options.outputPath,
144
161
  video1Path,
@@ -151,6 +168,7 @@ export async function combineVideos(
151
168
  outputPath: resolvedOutputPath,
152
169
  })
153
170
  await finalizeOutput(resolvedOutputPath, options.outputPath)
171
+ progress?.finish('Complete')
154
172
 
155
173
  return {
156
174
  success: true,
@@ -176,18 +194,21 @@ async function applyOptionalEdits(
176
194
  ): Promise<{ video1Path: string; video2Path: string }> {
177
195
  let video1Path = options.video1Path
178
196
  let video2Path = options.video2Path
197
+ const editProgressFactory = options.editProgressFactory
179
198
 
180
199
  if (options.video1EditedTextPath) {
181
200
  if (!options.video1TranscriptJsonPath) {
182
201
  throw new Error('Missing transcript JSON for first video edits.')
183
202
  }
184
203
  const editedPath = path.join(tempDir, 'video1-edited.mp4')
204
+ const progress = editProgressFactory?.('Edit first video')
185
205
  const result = await editVideo({
186
206
  inputPath: options.video1Path,
187
207
  transcriptJsonPath: options.video1TranscriptJsonPath,
188
208
  editedTextPath: options.video1EditedTextPath,
189
209
  outputPath: editedPath,
190
210
  paddingMs: options.overlapPaddingMs,
211
+ progress,
191
212
  })
192
213
  if (!result.success) {
193
214
  throw new Error(result.error ?? 'Failed to edit first video.')
@@ -200,12 +221,14 @@ async function applyOptionalEdits(
200
221
  throw new Error('Missing transcript JSON for second video edits.')
201
222
  }
202
223
  const editedPath = path.join(tempDir, 'video2-edited.mp4')
224
+ const progress = editProgressFactory?.('Edit second video')
203
225
  const result = await editVideo({
204
226
  inputPath: options.video2Path,
205
227
  transcriptJsonPath: options.video2TranscriptJsonPath,
206
228
  editedTextPath: options.video2EditedTextPath,
207
229
  outputPath: editedPath,
208
230
  paddingMs: options.overlapPaddingMs,
231
+ progress,
209
232
  })
210
233
  if (!result.success) {
211
234
  throw new Error(result.error ?? 'Failed to edit second video.')
@@ -11,6 +11,7 @@ import {
11
11
  } from './timestamp-refinement'
12
12
  import type { TimeRange } from '../types'
13
13
  import type { TranscriptJson, TranscriptWordWithIndex } from './types'
14
+ import type { StepProgressReporter } from '../../progress-reporter'
14
15
 
15
16
  export interface EditVideoOptions {
16
17
  inputPath: string
@@ -18,6 +19,7 @@ export interface EditVideoOptions {
18
19
  editedTextPath: string
19
20
  outputPath: string
20
21
  paddingMs?: number
22
+ progress?: StepProgressReporter
21
23
  }
22
24
 
23
25
  export interface EditVideoResult {
@@ -37,8 +39,13 @@ export async function editVideo(
37
39
  options: EditVideoOptions,
38
40
  ): Promise<EditVideoResult> {
39
41
  try {
42
+ const progress = options.progress
43
+ const totalSteps = 5
44
+ progress?.start({ stepCount: totalSteps, label: 'Loading transcript' })
45
+
40
46
  const transcript = await readTranscriptJson(options.transcriptJsonPath)
41
47
  const editedText = await Bun.file(options.editedTextPath).text()
48
+ progress?.step('Validating edits')
42
49
  const validation = validateEditedTranscript({
43
50
  originalWords: transcript.words,
44
51
  editedText,
@@ -51,6 +58,7 @@ export async function editVideo(
51
58
  removedRanges: [],
52
59
  }
53
60
  }
61
+ progress?.step('Diffing transcript')
54
62
  const diffResult = diffTranscripts({
55
63
  originalWords: transcript.words,
56
64
  editedText,
@@ -64,9 +72,12 @@ export async function editVideo(
64
72
  }
65
73
  }
66
74
 
75
+ progress?.step('Planning edits')
67
76
  const removedWords = diffResult.removedWords
68
77
  if (removedWords.length === 0) {
78
+ progress?.step('Rendering output')
69
79
  await ensureOutputCopy(options.inputPath, options.outputPath)
80
+ progress?.finish('No edits')
70
81
  return {
71
82
  success: true,
72
83
  outputPath: options.outputPath,
@@ -77,7 +88,9 @@ export async function editVideo(
77
88
 
78
89
  const removalRanges = wordsToTimeRanges(removedWords)
79
90
  if (removalRanges.length === 0) {
91
+ progress?.step('Rendering output')
80
92
  await ensureOutputCopy(options.inputPath, options.outputPath)
93
+ progress?.finish('No ranges')
81
94
  return {
82
95
  success: true,
83
96
  outputPath: options.outputPath,
@@ -86,6 +99,7 @@ export async function editVideo(
86
99
  }
87
100
  }
88
101
 
102
+ progress?.setLabel('Refining ranges')
89
103
  const refinedRanges = await refineAllRemovalRanges({
90
104
  inputPath: options.inputPath,
91
105
  duration: transcript.source_duration,
@@ -111,6 +125,7 @@ export async function editVideo(
111
125
  }
112
126
  }
113
127
 
128
+ progress?.step('Rendering output')
114
129
  await mkdir(path.dirname(options.outputPath), { recursive: true })
115
130
 
116
131
  const isFullRange =
@@ -120,6 +135,7 @@ export async function editVideo(
120
135
  keepRanges[0].end >= transcript.source_duration - 0.001
121
136
  if (isFullRange) {
122
137
  await ensureOutputCopy(options.inputPath, options.outputPath)
138
+ progress?.finish('Complete')
123
139
  return {
124
140
  success: true,
125
141
  outputPath: options.outputPath,
@@ -129,12 +145,14 @@ export async function editVideo(
129
145
  }
130
146
 
131
147
  if (keepRanges.length === 1 && keepRanges[0]) {
148
+ progress?.setLabel('Extracting segment')
132
149
  await extractChapterSegmentAccurate({
133
150
  inputPath: options.inputPath,
134
151
  outputPath: options.outputPath,
135
152
  start: keepRanges[0].start,
136
153
  end: keepRanges[0].end,
137
154
  })
155
+ progress?.finish('Complete')
138
156
  return {
139
157
  success: true,
140
158
  outputPath: options.outputPath,
@@ -147,6 +165,9 @@ export async function editVideo(
147
165
  try {
148
166
  const segmentPaths: string[] = []
149
167
  for (const [index, range] of keepRanges.entries()) {
168
+ progress?.setLabel(
169
+ `Extracting segment ${index + 1}/${keepRanges.length}`,
170
+ )
150
171
  const segmentPath = path.join(tempDir, `segment-${index + 1}.mp4`)
151
172
  await extractChapterSegmentAccurate({
152
173
  inputPath: options.inputPath,
@@ -156,10 +177,12 @@ export async function editVideo(
156
177
  })
157
178
  segmentPaths.push(segmentPath)
158
179
  }
180
+ progress?.setLabel('Concatenating segments')
159
181
  await concatSegments({
160
182
  segmentPaths,
161
183
  outputPath: options.outputPath,
162
184
  })
185
+ progress?.finish('Complete')
163
186
  return {
164
187
  success: true,
165
188
  outputPath: options.outputPath,
package/src/cli.ts CHANGED
@@ -27,6 +27,7 @@ import {
27
27
  PromptCancelled,
28
28
  createInquirerPrompter,
29
29
  createPathPicker,
30
+ createStepProgressReporter,
30
31
  isInteractive,
31
32
  pauseActiveSpinner,
32
33
  resumeActiveSpinner,
@@ -156,18 +157,33 @@ async function main(rawArgs = hideBin(process.argv)) {
156
157
  }),
157
158
  async (argv) => {
158
159
  const transcribeArgs = await resolveTranscribeArgs(argv, context)
160
+ const progress = context.interactive
161
+ ? createStepProgressReporter({ action: 'Transcribing audio' })
162
+ : undefined
159
163
  let resultText = ''
160
164
  await withSpinner(
161
165
  'Transcribing audio',
162
166
  async () => {
163
- const result = await transcribeAudio(transcribeArgs.inputPath, {
164
- modelPath: transcribeArgs.modelPath,
165
- language: transcribeArgs.language,
166
- threads: transcribeArgs.threads,
167
- binaryPath: transcribeArgs.binaryPath,
168
- outputBasePath: transcribeArgs.outputBasePath,
167
+ setLogHooks({
168
+ beforeLog: pauseActiveSpinner,
169
+ afterLog: resumeActiveSpinner,
169
170
  })
170
- resultText = result.text
171
+ try {
172
+ const result = await transcribeAudio(
173
+ transcribeArgs.inputPath,
174
+ {
175
+ modelPath: transcribeArgs.modelPath,
176
+ language: transcribeArgs.language,
177
+ threads: transcribeArgs.threads,
178
+ binaryPath: transcribeArgs.binaryPath,
179
+ outputBasePath: transcribeArgs.outputBasePath,
180
+ progress,
181
+ },
182
+ )
183
+ resultText = result.text
184
+ } finally {
185
+ setLogHooks({})
186
+ }
171
187
  },
172
188
  {
173
189
  successText: 'Transcription complete',
@@ -203,16 +219,28 @@ async function main(rawArgs = hideBin(process.argv)) {
203
219
  argv,
204
220
  context,
205
221
  )
222
+ const progress = context.interactive
223
+ ? createStepProgressReporter({ action: 'Detecting speech' })
224
+ : undefined
206
225
  let segments: unknown = []
207
226
  await withSpinner(
208
227
  'Detecting speech',
209
228
  async () => {
210
- await ensureFfmpegAvailable()
211
- segments = await detectSpeechSegmentsForFile({
212
- inputPath,
213
- start,
214
- end,
229
+ setLogHooks({
230
+ beforeLog: pauseActiveSpinner,
231
+ afterLog: resumeActiveSpinner,
215
232
  })
233
+ try {
234
+ await ensureFfmpegAvailable()
235
+ segments = await detectSpeechSegmentsForFile({
236
+ inputPath,
237
+ start,
238
+ end,
239
+ progress,
240
+ })
241
+ } finally {
242
+ setLogHooks({})
243
+ }
216
244
  },
217
245
  {
218
246
  successText: 'Speech detection complete',
@@ -6,6 +6,7 @@ import { CONFIG } from '../process-course/config'
6
6
  import { formatSeconds, getMediaDurationSeconds } from './utils'
7
7
  import { speechFallback } from '../process-course/utils/audio-analysis'
8
8
  import type { SpeechBounds } from '../process-course/types'
9
+ import type { StepProgressReporter } from '../progress-reporter'
9
10
 
10
11
  export type VadConfig = {
11
12
  vadWindowSamples: number
@@ -27,6 +28,7 @@ export async function detectSpeechSegmentsWithVad(
27
28
  samples: Float32Array,
28
29
  sampleRate: number,
29
30
  config: VadConfig,
31
+ options?: { onProgress?: () => void; updateStride?: number },
30
32
  ): Promise<VadSegment[]> {
31
33
  const vadSession = await getVadSession(config)
32
34
  const probabilities = await getVadProbabilities(
@@ -34,6 +36,7 @@ export async function detectSpeechSegmentsWithVad(
34
36
  sampleRate,
35
37
  config,
36
38
  vadSession,
39
+ options,
37
40
  )
38
41
  return probabilitiesToSegments(
39
42
  samples.length,
@@ -47,7 +50,10 @@ export async function detectSpeechSegmentsForFile(options: {
47
50
  inputPath: string
48
51
  start?: number
49
52
  end?: number
53
+ progress?: StepProgressReporter
50
54
  }): Promise<SpeechSegment[]> {
55
+ const progress = options.progress
56
+ progress?.start({ stepCount: 1, label: 'Loading audio' })
51
57
  const start = options.start ?? 0
52
58
  if (!Number.isFinite(start) || start < 0) {
53
59
  throw new Error('Start time must be a non-negative number.')
@@ -66,13 +72,31 @@ export async function detectSpeechSegmentsForFile(options: {
66
72
  sampleRate: CONFIG.vadSampleRate,
67
73
  })
68
74
  if (samples.length === 0) {
75
+ progress?.finish('No audio')
69
76
  return []
70
77
  }
78
+ const windowSamples = CONFIG.vadWindowSamples
79
+ const totalWindows = Math.ceil(samples.length / windowSamples)
80
+ const updateStride = Math.max(1, Math.floor(totalWindows / 50))
81
+ const updateCount = Math.max(1, Math.ceil(totalWindows / updateStride))
82
+ progress?.start({ stepCount: updateCount, label: 'Running VAD' })
83
+ let progressUpdates = 0
71
84
  const segments = await detectSpeechSegmentsWithVad(
72
85
  samples,
73
86
  CONFIG.vadSampleRate,
74
87
  CONFIG,
88
+ {
89
+ onProgress: () => {
90
+ progressUpdates += 1
91
+ if (progressUpdates <= updateCount) {
92
+ progress?.step('Running VAD')
93
+ }
94
+ },
95
+ updateStride,
96
+ },
75
97
  )
98
+ progress?.setLabel('Building segments')
99
+ progress?.finish('Complete')
76
100
  return segments.map((segment) => ({
77
101
  start: segment.start + start,
78
102
  end: segment.end + start,
@@ -116,6 +140,7 @@ async function getVadProbabilities(
116
140
  sampleRate: number,
117
141
  config: VadConfig,
118
142
  session: ort.InferenceSession,
143
+ options?: { onProgress?: () => void; updateStride?: number },
119
144
  ) {
120
145
  const windowSamples = config.vadWindowSamples
121
146
  const srTensor = new ort.Tensor(
@@ -126,6 +151,8 @@ async function getVadProbabilities(
126
151
  const probabilities: number[] = []
127
152
  let stateH = new Float32Array(2 * 1 * 64)
128
153
  let stateC = new Float32Array(2 * 1 * 64)
154
+ const updateStride = Math.max(1, Math.floor(options?.updateStride ?? 1))
155
+ let updateIndex = 0
129
156
 
130
157
  for (let offset = 0; offset < samples.length; offset += windowSamples) {
131
158
  const chunk = samples.subarray(offset, offset + windowSamples)
@@ -154,6 +181,10 @@ async function getVadProbabilities(
154
181
  probabilities.push((probTensor.data as Float32Array)[0] ?? 0)
155
182
  stateH = new Float32Array(nextH.data as Float32Array)
156
183
  stateC = new Float32Array(nextC.data as Float32Array)
184
+ if (updateIndex % updateStride === 0) {
185
+ options?.onProgress?.()
186
+ }
187
+ updateIndex += 1
157
188
  }
158
189
 
159
190
  return probabilities
@@ -1,6 +1,7 @@
1
1
  import path from 'node:path'
2
2
  import { mkdir } from 'node:fs/promises'
3
3
  import { runCommand } from './utils'
4
+ import type { StepProgressReporter } from '../progress-reporter'
4
5
 
5
6
  const DEFAULT_MODEL_FILENAME = 'ggml-small.en.bin'
6
7
  const DEFAULT_MODEL_URL =
@@ -14,6 +15,7 @@ type TranscribeOptions = {
14
15
  threads?: number
15
16
  binaryPath?: string
16
17
  outputBasePath?: string
18
+ progress?: StepProgressReporter
17
19
  }
18
20
 
19
21
  export type TranscriptSegment = {
@@ -36,6 +38,7 @@ export async function transcribeAudio(
36
38
  audioPath: string,
37
39
  options: TranscribeOptions = {},
38
40
  ): Promise<TranscriptionResult> {
41
+ const progress = options.progress
39
42
  const resolvedAudioPath = path.resolve(audioPath)
40
43
  const resolvedModelPath = path.resolve(
41
44
  options.modelPath ?? getDefaultWhisperModelPath(),
@@ -49,7 +52,9 @@ export async function transcribeAudio(
49
52
  `${path.parse(resolvedAudioPath).name}-transcript`,
50
53
  )
51
54
 
52
- await ensureModelFile(resolvedModelPath)
55
+ const totalSteps = 3
56
+ progress?.start({ stepCount: totalSteps, label: 'Checking model' })
57
+ await ensureModelFile(resolvedModelPath, progress)
53
58
 
54
59
  const args = [
55
60
  binaryPath,
@@ -69,17 +74,23 @@ export async function transcribeAudio(
69
74
  args.push('-t', String(options.threads))
70
75
  }
71
76
 
77
+ progress?.step('Transcribing audio')
72
78
  const result = await runCommand(args)
79
+ progress?.step('Reading output')
73
80
  const transcriptPath = `${outputBasePath}.txt`
74
81
  const transcript = await readTranscriptText(transcriptPath, result.stdout)
75
82
  const { segments, source } = await readTranscriptSegments(
76
83
  `${outputBasePath}.json`,
77
84
  )
78
85
  const normalized = normalizeTranscriptText(transcript)
86
+ progress?.finish('Complete')
79
87
  return { text: normalized, segments, segmentsSource: source }
80
88
  }
81
89
 
82
- async function ensureModelFile(modelPath: string) {
90
+ async function ensureModelFile(
91
+ modelPath: string,
92
+ progress?: StepProgressReporter,
93
+ ) {
83
94
  const file = Bun.file(modelPath)
84
95
  if (await file.exists()) {
85
96
  return
@@ -90,6 +101,7 @@ async function ensureModelFile(modelPath: string) {
90
101
  throw new Error(`Whisper model not found at ${modelPath}.`)
91
102
  }
92
103
 
104
+ progress?.setLabel('Downloading model')
93
105
  await mkdir(path.dirname(modelPath), { recursive: true })
94
106
  const response = await fetch(DEFAULT_MODEL_URL)
95
107
  if (!response.ok) {