@wovin/tranz 0.1.36 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,198 @@
1
+ /**
2
+ * Utilities for merging transcription results from split audio segments
3
+ */
4
+
5
+ import type { TranscriptionResult, TranscriptSegment } from '../transcription/providers.ts'
6
+ import type { AudioSegment } from './split.ts'
7
+
8
+ /**
9
+ * Word-level data with timing information
10
+ */
11
+ export interface WordData {
12
+ word: string
13
+ start: number
14
+ end: number
15
+ confidence?: number
16
+ speaker?: string | number
17
+ }
18
+
19
+ /**
20
+ * Metadata describing one audio chunk in an auto-split + merge run.
21
+ */
22
+ export interface AudioChunk {
23
+ index: number
24
+ startSec: number
25
+ endSec: number
26
+ text: string
27
+ }
28
+
29
+ /**
30
+ * Merged transcription result with chunk-level metadata.
31
+ */
32
+ export interface MergedTranscriptionResult extends TranscriptionResult {
33
+ /** Audio chunks that were transcribed independently and merged. Absent when no split happened. */
34
+ audioChunks?: AudioChunk[]
35
+ }
36
+
37
+ /**
38
+ * Prefix a per-chunk diarization label so values from different chunks don't collide.
39
+ * Chunk-scoped labels (Mistral's `speaker_1`, Deepgram's `0`) are NOT comparable across
40
+ * chunks — `chunk0/speaker_1` and `chunk1/speaker_1` are almost always different humans.
41
+ */
42
+ function prefixChunkLabel(chunkIndex: number, value: string | number | undefined): string | number | undefined {
43
+ if (value === undefined) return undefined
44
+ return `chunk${chunkIndex}/${String(value)}`
45
+ }
46
+
47
+ /**
48
+ * Merge multiple transcription results from audio segments into one
49
+ * Adjusts word timestamps to be relative to the original audio
50
+ *
51
+ * @param results - Array of transcription results from each segment
52
+ * @param segments - Array of audio segment metadata
53
+ * @returns Merged transcription result
54
+ */
55
+ export function mergeTranscriptionResults(
56
+ results: TranscriptionResult[],
57
+ segments: AudioSegment[]
58
+ ): MergedTranscriptionResult {
59
+ if (results.length === 0) {
60
+ return {
61
+ text: '',
62
+ error: 'No results to merge',
63
+ }
64
+ }
65
+
66
+ if (results.length === 1) {
67
+ // Single result, no merging needed — preserve native diarization types verbatim.
68
+ return results[0]
69
+ }
70
+
71
+ // Check for errors in any segment
72
+ const errors = results
73
+ .map((r, i) => (r.error ? `Segment ${i}: ${r.error}` : null))
74
+ .filter(Boolean)
75
+
76
+ if (errors.length > 0) {
77
+ return {
78
+ text: '',
79
+ error: `Errors in segments: ${errors.join('; ')}`,
80
+ }
81
+ }
82
+
83
+ // Merge text with segment markers (double newline between segments)
84
+ const mergedText = results.map((r) => r.text.trim()).join('\n\n')
85
+
86
+ // Merge and adjust word timestamps. Words use seconds (legacy WordData shape).
87
+ const mergedWords: WordData[] = []
88
+ for (let i = 0; i < results.length; i++) {
89
+ const result = results[i]
90
+ const segment = segments[i]
91
+ const words = result.words || result.rawResponse?.words || []
92
+
93
+ for (const word of words) {
94
+ mergedWords.push({
95
+ word: word.word || word.text,
96
+ start: (word.start || 0) + segment.startSec,
97
+ end: (word.end || 0) + segment.startSec,
98
+ confidence: word.confidence,
99
+ speaker: prefixChunkLabel(i, word.speaker) as string | number | undefined,
100
+ })
101
+ }
102
+ }
103
+
104
+ // Merge transcription segments. These use integer ms (TranscriptSegment shape).
105
+ const mergedSegments: TranscriptSegment[] = []
106
+ for (let i = 0; i < results.length; i++) {
107
+ const result = results[i]
108
+ const chunkOffsetMs = Math.round(segments[i].startSec * 1000)
109
+ if (!result.segments) continue
110
+ for (const seg of result.segments) {
111
+ mergedSegments.push({
112
+ startMs: seg.startMs + chunkOffsetMs,
113
+ endMs: seg.endMs + chunkOffsetMs,
114
+ text: seg.text,
115
+ ...(seg.diarization !== undefined
116
+ ? { diarization: prefixChunkLabel(i, seg.diarization) as string | number }
117
+ : {}),
118
+ })
119
+ }
120
+ }
121
+
122
+ // Calculate total duration
123
+ const totalDuration = segments.reduce((sum, seg) => sum + seg.durationSec, 0)
124
+
125
+ // Per-chunk metadata
126
+ const audioChunks: AudioChunk[] = results.map((r, i) => ({
127
+ index: i,
128
+ startSec: segments[i].startSec,
129
+ endSec: segments[i].endSec,
130
+ text: r.text.trim(),
131
+ }))
132
+
133
+ // Merge raw responses
134
+ const mergedRawResponse = {
135
+ merged: true,
136
+ chunkCount: results.length,
137
+ chunks: results.map((r, i) => ({
138
+ index: i,
139
+ startSec: segments[i].startSec,
140
+ rawResponse: r.rawResponse,
141
+ })),
142
+ }
143
+
144
+ // Take language and model from first result
145
+ const firstResult = results[0]
146
+
147
+ return {
148
+ text: mergedText,
149
+ duration: totalDuration,
150
+ language: firstResult.language,
151
+ model: firstResult.model,
152
+ rawResponse: mergedRawResponse,
153
+ audioChunks,
154
+ ...(mergedWords.length > 0 ? { words: mergedWords } : {}),
155
+ ...(mergedSegments.length > 0 ? { segments: mergedSegments } : {}),
156
+ }
157
+ }
158
+
159
+ /**
160
+ * Format merged results with optional segment markers in the text
161
+ *
162
+ * @param result - Merged transcription result
163
+ * @param includeMarkers - Whether to include [Chunk N] markers
164
+ * @returns Formatted text
165
+ */
166
+ export function formatMergedText(
167
+ result: MergedTranscriptionResult,
168
+ includeMarkers: boolean = false
169
+ ): string {
170
+ if (!result.audioChunks || result.audioChunks.length <= 1) {
171
+ return result.text
172
+ }
173
+
174
+ if (!includeMarkers) {
175
+ return result.text
176
+ }
177
+
178
+ return result.audioChunks
179
+ .map((chunk, i) => {
180
+ const timeStr = formatTimestamp(chunk.startSec)
181
+ return `[Chunk ${i + 1} @ ${timeStr}]\n${chunk.text}`
182
+ })
183
+ .join('\n\n')
184
+ }
185
+
186
+ /**
187
+ * Format seconds as HH:MM:SS or MM:SS
188
+ */
189
+ function formatTimestamp(seconds: number): string {
190
+ const hours = Math.floor(seconds / 3600)
191
+ const minutes = Math.floor((seconds % 3600) / 60)
192
+ const secs = Math.floor(seconds % 60)
193
+
194
+ if (hours > 0) {
195
+ return `${hours}:${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`
196
+ }
197
+ return `${minutes}:${secs.toString().padStart(2, '0')}`
198
+ }
@@ -0,0 +1,504 @@
1
+ /**
2
+ * Audio splitting utilities for tranz-cli
3
+ * Provides silence detection and optimal split point calculation
4
+ */
5
+
6
+ import { execa } from 'execa'
7
+ import * as fs from 'node:fs'
8
+ import path from 'node:path'
9
+ import { spawn } from 'node:child_process'
10
+
11
+ /**
12
+ * Configuration for audio splitting
13
+ */
14
+ export interface SplitConfig {
15
+ /** Maximum segment duration in seconds (default: 600 = 10min) */
16
+ maxDurationSec: number
17
+ /** Minimum silence duration to consider for split (default: 1.0s) */
18
+ minSilenceDurSec: number
19
+ /** FFmpeg silence threshold (default: '-35dB') */
20
+ silenceThreshold: string
21
+ /** Prefer longer silences for splits (default: true) */
22
+ preferLongerSilence: boolean
23
+ /** Buffer to leave at silence edges (default: 0.2s) */
24
+ silenceBuffer: number
25
+ }
26
+
27
+ /**
28
+ * A detected silence region in the audio
29
+ */
30
+ export interface SilenceRegion {
31
+ startSec: number
32
+ endSec: number
33
+ durationSec: number
34
+ }
35
+
36
+ /**
37
+ * A calculated split point
38
+ */
39
+ export interface SplitPoint {
40
+ /** Time in seconds where to split (middle of silence) */
41
+ timeSec: number
42
+ /** Duration of the silence at this split point */
43
+ silenceDuration: number
44
+ }
45
+
46
+ /**
47
+ * An audio segment after splitting
48
+ */
49
+ export interface AudioSegment {
50
+ index: number
51
+ startSec: number
52
+ endSec: number
53
+ durationSec: number
54
+ outputPath: string
55
+ }
56
+
57
+ /**
58
+ * Default split configuration
59
+ */
60
+ export const DEFAULT_SPLIT_CONFIG: SplitConfig = {
61
+ maxDurationSec: 600, // 10 minutes
62
+ minSilenceDurSec: 1.0,
63
+ silenceThreshold: '-35dB',
64
+ preferLongerSilence: true,
65
+ silenceBuffer: 0.2,
66
+ }
67
+
68
+ /**
69
+ * Execute ffprobe and return metadata
70
+ * Uses -show_format and -show_streams to get duration from either source
71
+ */
72
+ async function execFFprobe(audioPath: string): Promise<{
73
+ format?: { duration?: string | number }
74
+ streams?: Array<{ duration?: string | number }>
75
+ }> {
76
+ try {
77
+ const { stdout } = await execa('ffprobe', [
78
+ '-v', 'error',
79
+ '-print_format', 'json',
80
+ '-show_format',
81
+ '-show_streams',
82
+ audioPath
83
+ ])
84
+ return JSON.parse(stdout)
85
+ } catch (err) {
86
+ throw new Error(`Failed to probe audio: ${err instanceof Error ? err.message : String(err)}`)
87
+ }
88
+ }
89
+
90
+ /**
91
+ * Extract audio segment using ffmpeg
92
+ */
93
+ async function extractAudioSegment(
94
+ inputPath: string,
95
+ outputPath: string,
96
+ startSec: number,
97
+ durationSec: number
98
+ ): Promise<void> {
99
+ try {
100
+ await execa('ffmpeg', [
101
+ '-ss', startSec.toString(),
102
+ '-t', durationSec.toString(),
103
+ '-i', inputPath,
104
+ '-ar', '16000', // 16kHz sample rate (Whisper-compatible)
105
+ '-ac', '1', // mono
106
+ '-c:a', 'pcm_s16le', // 16-bit PCM codec
107
+ '-y', // overwrite output
108
+ outputPath
109
+ ])
110
+ } catch (err) {
111
+ throw new Error(`Failed to extract segment: ${err instanceof Error ? err.message : String(err)}`)
112
+ }
113
+ }
114
+
115
+ /**
116
+ * Get duration using ffmpeg decode (slower but more reliable)
117
+ * Used as fallback when ffprobe can't determine duration
118
+ */
119
+ async function getDurationViaFfmpeg(audioPath: string): Promise<number | undefined> {
120
+ try {
121
+ // Use ffmpeg to decode to null and capture the duration from stderr
122
+ const { stderr } = await execa('ffmpeg', [
123
+ '-i', audioPath,
124
+ '-f', 'null',
125
+ '-'
126
+ ], { reject: false })
127
+
128
+ // Parse duration from ffmpeg output: "Duration: HH:MM:SS.ss" or "time=HH:MM:SS.ss"
129
+ const durationMatch = stderr.match(/Duration:\s*(\d+):(\d+):(\d+(?:\.\d+)?)/)
130
+ if (durationMatch) {
131
+ const hours = parseFloat(durationMatch[1])
132
+ const minutes = parseFloat(durationMatch[2])
133
+ const seconds = parseFloat(durationMatch[3])
134
+ return hours * 3600 + minutes * 60 + seconds
135
+ }
136
+
137
+ // Try parsing from final time= output
138
+ const timeMatches = [...stderr.matchAll(/time=(\d+):(\d+):(\d+(?:\.\d+)?)/g)]
139
+ if (timeMatches.length > 0) {
140
+ const lastMatch = timeMatches[timeMatches.length - 1]
141
+ const hours = parseFloat(lastMatch[1])
142
+ const minutes = parseFloat(lastMatch[2])
143
+ const seconds = parseFloat(lastMatch[3])
144
+ return hours * 3600 + minutes * 60 + seconds
145
+ }
146
+ } catch {
147
+ // Ignore errors, return undefined
148
+ }
149
+ return undefined
150
+ }
151
+
152
+ /**
153
+ * Get the duration of an audio file in seconds
154
+ * Tries format.duration first, then falls back to stream duration,
155
+ * and finally uses ffmpeg decode as last resort
156
+ */
157
+ export async function getAudioDuration(audioPath: string): Promise<number> {
158
+ const metadata = await execFFprobe(audioPath)
159
+
160
+ // Try format duration first (most reliable for container formats)
161
+ if (metadata.format?.duration) {
162
+ const duration = parseFloat(String(metadata.format.duration))
163
+ if (!isNaN(duration) && duration > 0) {
164
+ return duration
165
+ }
166
+ }
167
+
168
+ // Fall back to stream duration (works for raw audio formats)
169
+ if (metadata.streams?.length) {
170
+ for (const stream of metadata.streams) {
171
+ if (stream.duration) {
172
+ const duration = parseFloat(String(stream.duration))
173
+ if (!isNaN(duration) && duration > 0) {
174
+ return duration
175
+ }
176
+ }
177
+ }
178
+ }
179
+
180
+ // Last resort: decode with ffmpeg to get duration
181
+ const ffmpegDuration = await getDurationViaFfmpeg(audioPath)
182
+ if (ffmpegDuration !== undefined && ffmpegDuration > 0) {
183
+ return ffmpegDuration
184
+ }
185
+
186
+ // Provide helpful debug info
187
+ const hasFormat = !!metadata.format
188
+ const hasStreams = !!metadata.streams?.length
189
+ throw new Error(
190
+ `Could not determine audio duration (format: ${hasFormat}, streams: ${hasStreams}). ` +
191
+ `File may be corrupted or in an unsupported format.`
192
+ )
193
+ }
194
+
195
+ /**
196
+ * Detect silence regions in an audio file using FFmpeg
197
+ * Uses spawn directly for better compatibility
198
+ */
199
+ export async function detectSilenceRegions(
200
+ audioPath: string,
201
+ config: Partial<SplitConfig> = {}
202
+ ): Promise<SilenceRegion[]> {
203
+ const { minSilenceDurSec, silenceThreshold } = { ...DEFAULT_SPLIT_CONFIG, ...config }
204
+
205
+ return new Promise((resolve, reject) => {
206
+ const silenceRegions: SilenceRegion[] = []
207
+
208
+ // Use spawn directly for better ffmpeg compatibility
209
+ const args = [
210
+ '-i', audioPath,
211
+ '-af', `silencedetect=n=${silenceThreshold}:d=${minSilenceDurSec}`,
212
+ '-f', 'wav',
213
+ '-ac', '1',
214
+ '-ar', '8000',
215
+ 'pipe:1'
216
+ ]
217
+
218
+ const proc = spawn('ffmpeg', args)
219
+
220
+ // Discard stdout (audio data)
221
+ proc.stdout.on('data', () => {})
222
+
223
+ // Parse stderr for silence info
224
+ proc.stderr.on('data', (data: Buffer) => {
225
+ const lines = data.toString().split('\n')
226
+ for (const line of lines) {
227
+ if (line.includes('silence_end:')) {
228
+ const match = line.match(/silence_end:\s*([\d.]+)\s*\|\s*silence_duration:\s*([\d.]+)/)
229
+ if (match) {
230
+ const endSec = parseFloat(match[1])
231
+ const durationSec = parseFloat(match[2])
232
+ if (!isNaN(endSec) && !isNaN(durationSec)) {
233
+ silenceRegions.push({
234
+ startSec: endSec - durationSec,
235
+ endSec,
236
+ durationSec,
237
+ })
238
+ }
239
+ }
240
+ }
241
+ }
242
+ })
243
+
244
+ proc.on('close', (code: number) => {
245
+ if (code === 0 || silenceRegions.length > 0) {
246
+ resolve(silenceRegions)
247
+ } else {
248
+ reject(new Error(`FFmpeg exited with code ${code}`))
249
+ }
250
+ })
251
+
252
+ proc.on('error', (err: Error) => {
253
+ reject(new Error(`Silence detection failed: ${err.message}`))
254
+ })
255
+ })
256
+ }
257
+
258
+ /**
259
+ * Find optimal split points in audio based on silence regions
260
+ * Prefers splitting at longer silences when possible
261
+ *
262
+ * @param silenceRegions - Detected silence regions
263
+ * @param totalDuration - Total audio duration in seconds
264
+ * @param config - Split configuration
265
+ * @returns Array of optimal split points
266
+ */
267
+ export function findOptimalSplitPoints(
268
+ silenceRegions: SilenceRegion[],
269
+ totalDuration: number,
270
+ config: Partial<SplitConfig> = {}
271
+ ): SplitPoint[] {
272
+ const { maxDurationSec, preferLongerSilence, silenceBuffer } = {
273
+ ...DEFAULT_SPLIT_CONFIG,
274
+ ...config,
275
+ }
276
+
277
+ // No splits needed if audio is short enough
278
+ if (totalDuration <= maxDurationSec) {
279
+ return []
280
+ }
281
+
282
+ // Calculate how many segments we need
283
+ const numSegments = Math.ceil(totalDuration / maxDurationSec)
284
+ const idealSegmentDuration = totalDuration / numSegments
285
+
286
+ const splitPoints: SplitPoint[] = []
287
+
288
+ // Find split points for each required split (numSegments - 1 splits)
289
+ for (let i = 1; i < numSegments; i++) {
290
+ const idealSplitTime = idealSegmentDuration * i
291
+
292
+ // Define search window: ±30% of segment duration around ideal point
293
+ const windowSize = idealSegmentDuration * 0.3
294
+ const windowStart = idealSplitTime - windowSize
295
+ const windowEnd = idealSplitTime + windowSize
296
+
297
+ // Find all silences within the window
298
+ const candidateSilences = silenceRegions.filter((silence) => {
299
+ const silenceMid = (silence.startSec + silence.endSec) / 2
300
+ return silenceMid >= windowStart && silenceMid <= windowEnd
301
+ })
302
+
303
+ let bestSplitPoint: SplitPoint
304
+
305
+ if (candidateSilences.length > 0) {
306
+ // Score each silence and pick the best
307
+ let bestScore = -Infinity
308
+ let bestSilence = candidateSilences[0]
309
+
310
+ for (const silence of candidateSilences) {
311
+ const silenceMid = (silence.startSec + silence.endSec) / 2
312
+ const proximityScore = 1 - Math.abs(silenceMid - idealSplitTime) / windowSize
313
+
314
+ // Score formula: prefer longer silences and closer to ideal point
315
+ const score = preferLongerSilence
316
+ ? silence.durationSec * proximityScore
317
+ : proximityScore
318
+
319
+ if (score > bestScore) {
320
+ bestScore = score
321
+ bestSilence = silence
322
+ }
323
+ }
324
+
325
+ // Split at the middle of the best silence
326
+ bestSplitPoint = {
327
+ timeSec: (bestSilence.startSec + bestSilence.endSec) / 2,
328
+ silenceDuration: bestSilence.durationSec,
329
+ }
330
+ } else {
331
+ // No silence found in window - split at ideal point
332
+ // This is a fallback; may split mid-speech
333
+ bestSplitPoint = {
334
+ timeSec: idealSplitTime,
335
+ silenceDuration: 0,
336
+ }
337
+ }
338
+
339
+ splitPoints.push(bestSplitPoint)
340
+ }
341
+
342
+ return splitPoints.sort((a, b) => a.timeSec - b.timeSec)
343
+ }
344
+
345
+ /**
346
+ * Split audio file at specified points using FFmpeg
347
+ *
348
+ * @param audioPath - Path to source audio file
349
+ * @param splitPoints - Where to split the audio
350
+ * @param totalDuration - Total duration of source audio
351
+ * @param outputDir - Directory to write segments
352
+ * @param baseName - Base name for output files
353
+ * @returns Array of created audio segments
354
+ */
355
+ export async function splitAudioAtPoints(
356
+ audioPath: string,
357
+ splitPoints: SplitPoint[],
358
+ totalDuration: number,
359
+ outputDir: string,
360
+ baseName: string
361
+ ): Promise<AudioSegment[]> {
362
+ // Ensure output directory exists
363
+ fs.mkdirSync(outputDir, { recursive: true })
364
+
365
+ const segments: AudioSegment[] = []
366
+
367
+ // Build segment boundaries
368
+ const boundaries = [0, ...splitPoints.map((sp) => sp.timeSec), totalDuration]
369
+
370
+ const splitPromises: Promise<void>[] = []
371
+
372
+ for (let i = 0; i < boundaries.length - 1; i++) {
373
+ const startSec = boundaries[i]
374
+ const endSec = boundaries[i + 1]
375
+ const durationSec = endSec - startSec
376
+ const outputPath = path.join(outputDir, `${baseName}-segment-${i.toString().padStart(3, '0')}.wav`)
377
+
378
+ const segment: AudioSegment = {
379
+ index: i,
380
+ startSec,
381
+ endSec,
382
+ durationSec,
383
+ outputPath,
384
+ }
385
+ segments.push(segment)
386
+
387
+ // Create promise for this segment's extraction
388
+ const extractPromise = extractAudioSegment(
389
+ audioPath,
390
+ outputPath,
391
+ startSec,
392
+ durationSec
393
+ ).catch(err => {
394
+ throw new Error(`Failed to extract segment ${i}: ${err instanceof Error ? err.message : String(err)}`)
395
+ })
396
+
397
+ splitPromises.push(extractPromise)
398
+ }
399
+
400
+ // Wait for all segments to be extracted
401
+ await Promise.all(splitPromises)
402
+
403
+ return segments
404
+ }
405
+
406
+ /**
407
+ * Auto-split an audio file if it exceeds the maximum duration
408
+ * Returns the original file path if no split is needed
409
+ *
410
+ * @param audioPath - Path to source audio file
411
+ * @param outputDir - Directory for split segments
412
+ * @param config - Split configuration
413
+ * @returns Array of audio segment paths (single element if no split needed)
414
+ */
415
+ export async function autoSplitAudio(
416
+ audioPath: string,
417
+ outputDir: string,
418
+ config: Partial<SplitConfig> = {}
419
+ ): Promise<AudioSegment[]> {
420
+ const mergedConfig = { ...DEFAULT_SPLIT_CONFIG, ...config }
421
+
422
+ // Get audio duration
423
+ const totalDuration = await getAudioDuration(audioPath)
424
+
425
+ // Check if splitting is needed
426
+ if (totalDuration <= mergedConfig.maxDurationSec) {
427
+ // No split needed - return original as single segment
428
+ return [
429
+ {
430
+ index: 0,
431
+ startSec: 0,
432
+ endSec: totalDuration,
433
+ durationSec: totalDuration,
434
+ outputPath: audioPath,
435
+ },
436
+ ]
437
+ }
438
+
439
+ // Detect silence regions for optimal split points
440
+ const silenceRegions = await detectSilenceRegions(audioPath, mergedConfig)
441
+
442
+ // Find optimal split points
443
+ const splitPoints = findOptimalSplitPoints(silenceRegions, totalDuration, mergedConfig)
444
+
445
+ // Extract base name from audio path
446
+ const baseName = path.basename(audioPath, path.extname(audioPath))
447
+
448
+ // Split the audio
449
+ const segments = await splitAudioAtPoints(
450
+ audioPath,
451
+ splitPoints,
452
+ totalDuration,
453
+ outputDir,
454
+ baseName
455
+ )
456
+
457
+ return segments
458
+ }
459
+
460
+ /**
461
+ * Information about split points for logging/debugging
462
+ */
463
+ export interface SplitAnalysis {
464
+ totalDuration: number
465
+ numSegments: number
466
+ splitPoints: SplitPoint[]
467
+ silenceRegions: SilenceRegion[]
468
+ needsSplit: boolean
469
+ }
470
+
471
+ /**
472
+ * Analyze audio file and return split information without actually splitting
473
+ * Useful for preview/dry-run functionality
474
+ */
475
+ export async function analyzeSplitPoints(
476
+ audioPath: string,
477
+ config: Partial<SplitConfig> = {}
478
+ ): Promise<SplitAnalysis> {
479
+ const mergedConfig = { ...DEFAULT_SPLIT_CONFIG, ...config }
480
+
481
+ const totalDuration = await getAudioDuration(audioPath)
482
+ const needsSplit = totalDuration > mergedConfig.maxDurationSec
483
+
484
+ if (!needsSplit) {
485
+ return {
486
+ totalDuration,
487
+ numSegments: 1,
488
+ splitPoints: [],
489
+ silenceRegions: [],
490
+ needsSplit: false,
491
+ }
492
+ }
493
+
494
+ const silenceRegions = await detectSilenceRegions(audioPath, mergedConfig)
495
+ const splitPoints = findOptimalSplitPoints(silenceRegions, totalDuration, mergedConfig)
496
+
497
+ return {
498
+ totalDuration,
499
+ numSegments: splitPoints.length + 1,
500
+ splitPoints,
501
+ silenceRegions,
502
+ needsSplit: true,
503
+ }
504
+ }
@@ -0,0 +1,16 @@
1
+ import path from 'node:path'
2
+
3
+ export const getExt = (filePath: string) => {
4
+ return getFileInfo(filePath).ext
5
+ }
6
+ export const getName = (filePath: string) => {
7
+ return getFileInfo(filePath).name
8
+ }
9
+ export const getNameWithExt = (filePath: string) => {
10
+ const { name, ext } = getFileInfo(filePath)
11
+ return `${name}.${ext}`
12
+ }
13
+ export const getFileInfo = (filePath: string) => {
14
+ const normed = path.normalize(filePath)
15
+ return path.parse(normed)
16
+ }