eprec 0.0.1 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +52 -29
  3. package/cli.ts +150 -0
  4. package/package.json +39 -7
  5. package/process-course/chapter-processor.ts +1037 -0
  6. package/process-course/cli.ts +236 -0
  7. package/process-course/config.ts +50 -0
  8. package/process-course/edits/cli.ts +167 -0
  9. package/process-course/edits/combined-video-editor.ts +316 -0
  10. package/process-course/edits/edit-workspace.ts +90 -0
  11. package/process-course/edits/index.ts +20 -0
  12. package/process-course/edits/regenerate-transcript.ts +84 -0
  13. package/process-course/edits/remove-ranges.test.ts +36 -0
  14. package/process-course/edits/remove-ranges.ts +287 -0
  15. package/process-course/edits/timestamp-refinement.test.ts +25 -0
  16. package/process-course/edits/timestamp-refinement.ts +172 -0
  17. package/process-course/edits/transcript-diff.test.ts +105 -0
  18. package/process-course/edits/transcript-diff.ts +214 -0
  19. package/process-course/edits/transcript-output.test.ts +50 -0
  20. package/process-course/edits/transcript-output.ts +36 -0
  21. package/process-course/edits/types.ts +26 -0
  22. package/process-course/edits/video-editor.ts +246 -0
  23. package/process-course/errors.test.ts +63 -0
  24. package/process-course/errors.ts +82 -0
  25. package/process-course/ffmpeg.ts +449 -0
  26. package/process-course/jarvis-commands/handlers.ts +71 -0
  27. package/process-course/jarvis-commands/index.ts +14 -0
  28. package/process-course/jarvis-commands/parser.test.ts +348 -0
  29. package/process-course/jarvis-commands/parser.ts +257 -0
  30. package/process-course/jarvis-commands/types.ts +46 -0
  31. package/process-course/jarvis-commands/windows.ts +254 -0
  32. package/process-course/logging.ts +24 -0
  33. package/process-course/paths.test.ts +59 -0
  34. package/process-course/paths.ts +53 -0
  35. package/process-course/summary.test.ts +209 -0
  36. package/process-course/summary.ts +210 -0
  37. package/process-course/types.ts +85 -0
  38. package/process-course/utils/audio-analysis.test.ts +348 -0
  39. package/process-course/utils/audio-analysis.ts +463 -0
  40. package/process-course/utils/chapter-selection.test.ts +307 -0
  41. package/process-course/utils/chapter-selection.ts +136 -0
  42. package/process-course/utils/file-utils.test.ts +83 -0
  43. package/process-course/utils/file-utils.ts +57 -0
  44. package/process-course/utils/filename.test.ts +27 -0
  45. package/process-course/utils/filename.ts +12 -0
  46. package/process-course/utils/time-ranges.test.ts +221 -0
  47. package/process-course/utils/time-ranges.ts +86 -0
  48. package/process-course/utils/transcript.test.ts +257 -0
  49. package/process-course/utils/transcript.ts +86 -0
  50. package/process-course/utils/video-editing.ts +44 -0
  51. package/process-course-video.ts +389 -0
  52. package/speech-detection.ts +355 -0
  53. package/utils.ts +138 -0
  54. package/whispercpp-transcribe.ts +345 -0
@@ -0,0 +1,345 @@
1
+ import path from 'node:path'
2
+ import { mkdir } from 'node:fs/promises'
3
+ import { runCommand } from './utils'
4
+
5
+ const DEFAULT_MODEL_FILENAME = 'ggml-small.en.bin'
6
+ const DEFAULT_MODEL_URL =
7
+ 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin'
8
+ const DEFAULT_LANGUAGE = 'en'
9
+ const DEFAULT_BINARY = 'whisper-cli'
10
+
11
+ type TranscribeOptions = {
12
+ modelPath?: string
13
+ language?: string
14
+ threads?: number
15
+ binaryPath?: string
16
+ outputBasePath?: string
17
+ }
18
+
19
+ export type TranscriptSegment = {
20
+ start: number
21
+ end: number
22
+ text: string
23
+ }
24
+
25
+ export type TranscriptionResult = {
26
+ text: string
27
+ segments: TranscriptSegment[]
28
+ segmentsSource: 'tokens' | 'segments' | 'transcription' | 'none'
29
+ }
30
+
31
+ export function getDefaultWhisperModelPath() {
32
+ return path.resolve('.cache', 'whispercpp', DEFAULT_MODEL_FILENAME)
33
+ }
34
+
35
+ export async function transcribeAudio(
36
+ audioPath: string,
37
+ options: TranscribeOptions = {},
38
+ ): Promise<TranscriptionResult> {
39
+ const resolvedAudioPath = path.resolve(audioPath)
40
+ const resolvedModelPath = path.resolve(
41
+ options.modelPath ?? getDefaultWhisperModelPath(),
42
+ )
43
+ const language = (options.language ?? DEFAULT_LANGUAGE).trim() || 'en'
44
+ const binaryPath = options.binaryPath ?? DEFAULT_BINARY
45
+ const outputBasePath =
46
+ options.outputBasePath ??
47
+ path.join(
48
+ path.dirname(resolvedAudioPath),
49
+ `${path.parse(resolvedAudioPath).name}-transcript`,
50
+ )
51
+
52
+ await ensureModelFile(resolvedModelPath)
53
+
54
+ const args = [
55
+ binaryPath,
56
+ '-m',
57
+ resolvedModelPath,
58
+ '-f',
59
+ resolvedAudioPath,
60
+ '-l',
61
+ language,
62
+ '-ojf',
63
+ '-otxt',
64
+ '-of',
65
+ outputBasePath,
66
+ ]
67
+
68
+ if (options.threads && Number.isFinite(options.threads)) {
69
+ args.push('-t', String(options.threads))
70
+ }
71
+
72
+ const result = await runCommand(args)
73
+ const transcriptPath = `${outputBasePath}.txt`
74
+ const transcript = await readTranscriptText(transcriptPath, result.stdout)
75
+ const { segments, source } = await readTranscriptSegments(
76
+ `${outputBasePath}.json`,
77
+ )
78
+ const normalized = normalizeTranscriptText(transcript)
79
+ return { text: normalized, segments, segmentsSource: source }
80
+ }
81
+
82
+ async function ensureModelFile(modelPath: string) {
83
+ const file = Bun.file(modelPath)
84
+ if (await file.exists()) {
85
+ return
86
+ }
87
+
88
+ const defaultPath = getDefaultWhisperModelPath()
89
+ if (path.resolve(modelPath) !== path.resolve(defaultPath)) {
90
+ throw new Error(`Whisper model not found at ${modelPath}.`)
91
+ }
92
+
93
+ await mkdir(path.dirname(modelPath), { recursive: true })
94
+ const response = await fetch(DEFAULT_MODEL_URL)
95
+ if (!response.ok) {
96
+ throw new Error(
97
+ `Failed to download whisper.cpp model (${response.status} ${response.statusText}).`,
98
+ )
99
+ }
100
+
101
+ const bytes = await response.arrayBuffer()
102
+ await Bun.write(modelPath, bytes)
103
+ }
104
+
105
+ async function readTranscriptText(transcriptPath: string, fallback: string) {
106
+ const transcriptFile = Bun.file(transcriptPath)
107
+ if (await transcriptFile.exists()) {
108
+ return transcriptFile.text()
109
+ }
110
+ if (fallback.trim().length > 0) {
111
+ return fallback
112
+ }
113
+ throw new Error('Whisper.cpp transcript output was empty.')
114
+ }
115
+
116
+ async function readTranscriptSegments(
117
+ transcriptPath: string,
118
+ ): Promise<{
119
+ segments: TranscriptSegment[]
120
+ source: TranscriptionResult['segmentsSource']
121
+ }> {
122
+ const transcriptFile = Bun.file(transcriptPath)
123
+ if (!(await transcriptFile.exists())) {
124
+ return { segments: [], source: 'none' }
125
+ }
126
+ const raw = await transcriptFile.text()
127
+ try {
128
+ const payload = JSON.parse(raw)
129
+ return parseTranscriptSegments(payload)
130
+ } catch (error) {
131
+ throw new Error(
132
+ `Failed to parse whisper.cpp JSON transcript: ${error instanceof Error ? error.message : error}`,
133
+ )
134
+ }
135
+ }
136
+
137
+ function parseTranscriptSegments(payload: unknown): {
138
+ segments: TranscriptSegment[]
139
+ source: TranscriptionResult['segmentsSource']
140
+ } {
141
+ if (!payload || typeof payload !== 'object') {
142
+ return { segments: [], source: 'none' }
143
+ }
144
+ const transcription = (payload as any).transcription
145
+ const tokenSegments = parseTokenSegments(transcription)
146
+ if (tokenSegments.length > 0) {
147
+ return {
148
+ segments: tokenSegments.sort((a, b) => a.start - b.start),
149
+ source: 'tokens',
150
+ }
151
+ }
152
+ const segments = parseSegmentsArray((payload as any).segments)
153
+ if (segments.length > 0) {
154
+ return {
155
+ segments: segments.sort((a, b) => a.start - b.start),
156
+ source: 'segments',
157
+ }
158
+ }
159
+ const transcriptionSegments = parseTranscriptionArray(transcription)
160
+ return {
161
+ segments: transcriptionSegments.sort((a, b) => a.start - b.start),
162
+ source: transcriptionSegments.length > 0 ? 'transcription' : 'none',
163
+ }
164
+ }
165
+
166
+ type TokenOffsets = { from: number; to: number }
167
+
168
+ function parseTokenSegments(rawTranscription: unknown): TranscriptSegment[] {
169
+ if (!Array.isArray(rawTranscription)) {
170
+ return []
171
+ }
172
+ const tokens = rawTranscription.flatMap((segment: any) =>
173
+ Array.isArray(segment?.tokens) ? segment.tokens : [],
174
+ )
175
+ if (tokens.length === 0) {
176
+ return []
177
+ }
178
+
179
+ const segments: TranscriptSegment[] = []
180
+ let currentWord = ''
181
+ let currentStart: number | null = null
182
+ let currentEnd: number | null = null
183
+
184
+ const flush = () => {
185
+ if (currentWord.trim() && currentStart !== null && currentEnd !== null) {
186
+ segments.push({
187
+ start: currentStart,
188
+ end: currentEnd,
189
+ text: currentWord.trim(),
190
+ })
191
+ }
192
+ currentWord = ''
193
+ currentStart = null
194
+ currentEnd = null
195
+ }
196
+
197
+ for (const token of tokens) {
198
+ if (!token || typeof token !== 'object') {
199
+ continue
200
+ }
201
+ const text = typeof token.text === 'string' ? token.text : ''
202
+ if (!text || text.startsWith('[_')) {
203
+ continue
204
+ }
205
+ const offsets = getTokenOffsets(token)
206
+ if (!offsets) {
207
+ continue
208
+ }
209
+
210
+ const hasLeadingSpace = /^\s/.test(text)
211
+ const cleaned = text.replace(/^\s+/, '')
212
+ if (!cleaned) {
213
+ continue
214
+ }
215
+ const isPunctuation = !/[a-z0-9]/i.test(cleaned)
216
+
217
+ if (hasLeadingSpace && currentWord) {
218
+ flush()
219
+ }
220
+ if (isPunctuation) {
221
+ if (currentWord) {
222
+ currentEnd = offsets.to / 1000
223
+ }
224
+ continue
225
+ }
226
+
227
+ if (!currentWord) {
228
+ currentStart = offsets.from / 1000
229
+ }
230
+ currentWord += cleaned
231
+ currentEnd = offsets.to / 1000
232
+ }
233
+ flush()
234
+ return segments
235
+ }
236
+
237
+ function getTokenOffsets(token: any): TokenOffsets | null {
238
+ const offsets = token?.offsets
239
+ const startMs = Number(offsets?.from)
240
+ const endMs = Number(offsets?.to)
241
+ if (!Number.isFinite(startMs) || !Number.isFinite(endMs)) {
242
+ return null
243
+ }
244
+ if (endMs < startMs) {
245
+ return null
246
+ }
247
+ return { from: startMs, to: endMs }
248
+ }
249
+
250
+ function parseSegmentsArray(rawSegments: unknown): TranscriptSegment[] {
251
+ if (!Array.isArray(rawSegments)) {
252
+ return []
253
+ }
254
+ return rawSegments
255
+ .map((segment: any) => {
256
+ const times = getSegmentTimes(segment)
257
+ if (!times) {
258
+ return null
259
+ }
260
+ const text =
261
+ typeof segment.text === 'string'
262
+ ? segment.text
263
+ : typeof segment.transcript === 'string'
264
+ ? segment.transcript
265
+ : ''
266
+ if (!text.trim()) {
267
+ return null
268
+ }
269
+ return {
270
+ start: times.start,
271
+ end: times.end,
272
+ text: text.trim(),
273
+ } satisfies TranscriptSegment
274
+ })
275
+ .filter((segment): segment is TranscriptSegment => Boolean(segment))
276
+ }
277
+
278
+ function parseTranscriptionArray(
279
+ rawTranscription: unknown,
280
+ ): TranscriptSegment[] {
281
+ if (!Array.isArray(rawTranscription)) {
282
+ return []
283
+ }
284
+ return rawTranscription
285
+ .map((segment: any) => {
286
+ if (!segment || typeof segment !== 'object') {
287
+ return null
288
+ }
289
+ const offsets = (segment as any).offsets
290
+ if (!offsets || typeof offsets !== 'object') {
291
+ return null
292
+ }
293
+ const startMs = Number((offsets as any).from)
294
+ const endMs = Number((offsets as any).to)
295
+ if (!Number.isFinite(startMs) || !Number.isFinite(endMs)) {
296
+ return null
297
+ }
298
+ if (endMs <= startMs) {
299
+ return null
300
+ }
301
+ const text =
302
+ typeof (segment as any).text === 'string' ? (segment as any).text : ''
303
+ if (!text.trim()) {
304
+ return null
305
+ }
306
+ return {
307
+ start: startMs / 1000,
308
+ end: endMs / 1000,
309
+ text: text.trim(),
310
+ } satisfies TranscriptSegment
311
+ })
312
+ .filter((segment): segment is TranscriptSegment => Boolean(segment))
313
+ }
314
+
315
+ function getSegmentTimes(segment: any): { start: number; end: number } | null {
316
+ if (
317
+ segment &&
318
+ typeof segment.start === 'number' &&
319
+ typeof segment.end === 'number'
320
+ ) {
321
+ if (segment.end > segment.start) {
322
+ return { start: segment.start, end: segment.end }
323
+ }
324
+ }
325
+ if (
326
+ segment &&
327
+ typeof segment.t0 === 'number' &&
328
+ typeof segment.t1 === 'number'
329
+ ) {
330
+ const start = segment.t0 * 0.01
331
+ const end = segment.t1 * 0.01
332
+ if (end > start) {
333
+ return { start, end }
334
+ }
335
+ }
336
+ return null
337
+ }
338
+
339
+ function normalizeTranscriptText(text: string) {
340
+ return text
341
+ .toLowerCase()
342
+ .replace(/[^a-z0-9]+/g, ' ')
343
+ .replace(/\s+/g, ' ')
344
+ .trim()
345
+ }