@remotion/install-whisper-cpp 4.0.215 → 4.0.216

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,11 @@ export type Caption = {
3
3
  text: string;
4
4
  startInSeconds: number;
5
5
  };
6
+ /**
7
+ *
8
+ * @deprecated Use the `toCaptions()` function from `@remotion/install-whisper-cpp` instead
9
+ * and then process the captions using `createTikTokStyleCaptions()` from `@remotion/captions`.
10
+ */
6
11
  export declare function convertToCaptions({ transcription, combineTokensWithinMilliseconds, }: {
7
12
  transcription: TranscriptionJson<true>['transcription'];
8
13
  combineTokensWithinMilliseconds: number;
@@ -1,6 +1,11 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.convertToCaptions = convertToCaptions;
4
+ /**
5
+ *
6
+ * @deprecated Use the `toCaptions()` function from `@remotion/install-whisper-cpp` instead
7
+ * and then process the captions using `createTikTokStyleCaptions()` from `@remotion/captions`.
8
+ */
4
9
  function convertToCaptions({ transcription, combineTokensWithinMilliseconds, }) {
5
10
  const merged = [];
6
11
  let currentText = '';
@@ -1,5 +1,5 @@
1
1
  import { type OnProgress } from './download';
2
- declare const models: readonly ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3"];
2
+ declare const models: readonly ["tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo"];
3
3
  export type WhisperModel = (typeof models)[number];
4
4
  export declare const getModelPath: (folder: string, model: WhisperModel) => string;
5
5
  export declare const downloadWhisperModel: ({ model, folder, printOutput, onProgress, signal, }: {
@@ -42,6 +42,7 @@ const models = [
42
42
  'large-v1',
43
43
  'large-v2',
44
44
  'large-v3',
45
+ 'large-v3-turbo',
45
46
  ];
46
47
  const modelSizes = {
47
48
  'medium.en': 1533774781,
@@ -49,6 +50,7 @@ const modelSizes = {
49
50
  'large-v1': 3094623691,
50
51
  'large-v2': 3094623691,
51
52
  'large-v3': 3095033483,
53
+ 'large-v3-turbo': 1624555275,
52
54
  small: 487601967,
53
55
  tiny: 77691713,
54
56
  'small.en': 487614201,
package/dist/index.d.ts CHANGED
@@ -3,4 +3,5 @@ export type { OnProgress } from './download';
3
3
  export { WhisperModel, downloadWhisperModel } from './download-whisper-model';
4
4
  export { installWhisperCpp } from './install-whisper-cpp';
5
5
  export type { Language } from './languages';
6
+ export { toCaptions } from './to-captions';
6
7
  export { TranscribeOnProgress, TranscriptionJson, transcribe, } from './transcribe';
package/dist/index.js CHANGED
@@ -1,11 +1,13 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.transcribe = exports.installWhisperCpp = exports.downloadWhisperModel = exports.convertToCaptions = void 0;
3
+ exports.transcribe = exports.toCaptions = exports.installWhisperCpp = exports.downloadWhisperModel = exports.convertToCaptions = void 0;
4
4
  var convert_to_captions_1 = require("./convert-to-captions");
5
5
  Object.defineProperty(exports, "convertToCaptions", { enumerable: true, get: function () { return convert_to_captions_1.convertToCaptions; } });
6
6
  var download_whisper_model_1 = require("./download-whisper-model");
7
7
  Object.defineProperty(exports, "downloadWhisperModel", { enumerable: true, get: function () { return download_whisper_model_1.downloadWhisperModel; } });
8
8
  var install_whisper_cpp_1 = require("./install-whisper-cpp");
9
9
  Object.defineProperty(exports, "installWhisperCpp", { enumerable: true, get: function () { return install_whisper_cpp_1.installWhisperCpp; } });
10
+ var to_captions_1 = require("./to-captions");
11
+ Object.defineProperty(exports, "toCaptions", { enumerable: true, get: function () { return to_captions_1.toCaptions; } });
10
12
  var transcribe_1 = require("./transcribe");
11
13
  Object.defineProperty(exports, "transcribe", { enumerable: true, get: function () { return transcribe_1.transcribe; } });
@@ -1,123 +1,680 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
+ const captions_1 = require("@remotion/captions");
3
4
  const bun_test_1 = require("bun:test");
4
5
  const convert_to_captions_1 = require("../convert-to-captions");
6
+ const to_captions_1 = require("../to-captions");
5
7
  const example_payload_1 = require("./example-payload");
6
8
  (0, bun_test_1.test)('Convert to captions - 200ms together', () => {
7
- const { captions: transcript } = (0, convert_to_captions_1.convertToCaptions)({
8
- transcription: example_payload_1.examplePayload.transcription,
9
+ const { captions } = (0, to_captions_1.toCaptions)({
10
+ whisperCppOutput: example_payload_1.examplePayload,
11
+ });
12
+ const { pages } = (0, captions_1.createTikTokStyleCaptions)({
13
+ captions,
9
14
  combineTokensWithinMilliseconds: 200,
10
15
  });
11
- (0, bun_test_1.expect)(transcript).toEqual([
12
- { text: 'William', startInSeconds: 0.24 },
13
- { text: 'just', startInSeconds: 0.48 },
14
- { text: 'hit 100,000', startInSeconds: 0.7 },
15
- { text: 'YouTube', startInSeconds: 2.22 },
16
- { text: 'subscribers', startInSeconds: 2.94 },
17
- { text: 'And we', startInSeconds: 3.24 },
18
- { text: 'are going', startInSeconds: 3.42 },
19
- { text: 'to celebrate', startInSeconds: 3.76 },
20
- { text: 'that', startInSeconds: 4.34 },
21
- { text: 'We thought', startInSeconds: 4.5 },
22
- { text: 'about', startInSeconds: 5.1 },
23
- { text: 'to bake', startInSeconds: 5.42 },
24
- { text: 'a cake', startInSeconds: 6.14 },
25
- { text: 'We found', startInSeconds: 6.56 },
26
- { text: 'this', startInSeconds: 7.12 },
27
- { text: 'and it', startInSeconds: 7.36 },
28
- { text: 'reminded', startInSeconds: 7.78 },
29
- { text: 'us of', startInSeconds: 8.04 },
30
- { text: 'William', startInSeconds: 8.52 },
31
- { text: 'We hope', startInSeconds: 8.94 },
32
- { text: 'he will', startInSeconds: 9.42 },
33
- { text: 'like', startInSeconds: 9.68 },
34
- { text: 'the cake', startInSeconds: 9.86 },
35
- { text: "Let's start", startInSeconds: 10.28 },
36
- { text: 'with the', startInSeconds: 10.58 },
37
- { text: 'dough', startInSeconds: 10.96 },
38
- { text: 'By putting', startInSeconds: 11.2 },
39
- { text: 'some', startInSeconds: 11.64 },
40
- { text: 'butter', startInSeconds: 12.06 },
41
- { text: 'Some', startInSeconds: 12.86 },
42
- { text: 'sugar', startInSeconds: 13.3 },
43
- { text: 'Eggs', startInSeconds: 14.36 },
16
+ (0, bun_test_1.expect)(pages).toEqual([
17
+ {
18
+ text: 'William',
19
+ startMs: 40,
20
+ tokens: [{ text: 'William', fromMs: 40, toMs: 420 }],
21
+ },
22
+ {
23
+ text: 'just',
24
+ startMs: 420,
25
+ tokens: [{ text: 'just', fromMs: 420, toMs: 650 }],
26
+ },
27
+ {
28
+ text: 'hit 100,000',
29
+ startMs: 650,
30
+ tokens: [
31
+ { text: 'hit', fromMs: 650, toMs: 810 },
32
+ { text: ' 100', fromMs: 810, toMs: 1330 },
33
+ { text: ',', fromMs: 1330, toMs: 1440 },
34
+ { text: '000', fromMs: 1440, toMs: 1950 },
35
+ ],
36
+ },
37
+ {
38
+ text: 'YouTube',
39
+ startMs: 1950,
40
+ tokens: [{ text: 'YouTube', fromMs: 1950, toMs: 2370 }],
41
+ },
42
+ {
43
+ text: 'subscribers',
44
+ startMs: 2370,
45
+ tokens: [{ text: 'subscribers', fromMs: 2370, toMs: 3060 }],
46
+ },
47
+ {
48
+ text: 'And we',
49
+ startMs: 3060,
50
+ tokens: [
51
+ { text: 'And', fromMs: 3060, toMs: 3190 },
52
+ { text: ' we', fromMs: 3190, toMs: 3280 },
53
+ ],
54
+ },
55
+ {
56
+ text: 'are going',
57
+ startMs: 3280,
58
+ tokens: [
59
+ { text: 'are', fromMs: 3280, toMs: 3410 },
60
+ { text: ' going', fromMs: 3410, toMs: 3630 },
61
+ ],
62
+ },
63
+ {
64
+ text: 'to celebrate',
65
+ startMs: 3630,
66
+ tokens: [
67
+ { text: 'to', fromMs: 3630, toMs: 3710 },
68
+ { text: ' celebrate', fromMs: 3710, toMs: 4130 },
69
+ ],
70
+ },
71
+ {
72
+ text: 'that',
73
+ startMs: 4130,
74
+ tokens: [{ text: 'that', fromMs: 4130, toMs: 4340 }],
75
+ },
76
+ {
77
+ text: 'We thought',
78
+ startMs: 4340,
79
+ tokens: [
80
+ { text: 'We', fromMs: 4340, toMs: 4500 },
81
+ { text: ' thought', fromMs: 4500, toMs: 5140 },
82
+ ],
83
+ },
84
+ {
85
+ text: 'about',
86
+ startMs: 5140,
87
+ tokens: [{ text: 'about', fromMs: 5140, toMs: 5480 }],
88
+ },
89
+ {
90
+ text: 'to bake',
91
+ startMs: 5480,
92
+ tokens: [
93
+ { text: 'to', fromMs: 5480, toMs: 5660 },
94
+ { text: ' bake', fromMs: 5660, toMs: 5980 },
95
+ ],
96
+ },
97
+ {
98
+ text: 'a cake',
99
+ startMs: 5980,
100
+ tokens: [
101
+ { text: 'a', fromMs: 5980, toMs: 6080 },
102
+ { text: ' cake', fromMs: 6080, toMs: 6400 },
103
+ ],
104
+ },
105
+ {
106
+ text: 'We found',
107
+ startMs: 6400,
108
+ tokens: [
109
+ { text: 'We', fromMs: 6400, toMs: 6540 },
110
+ { text: ' found', fromMs: 6540, toMs: 6900 },
111
+ ],
112
+ },
113
+ {
114
+ text: 'this',
115
+ startMs: 6900,
116
+ tokens: [{ text: 'this', fromMs: 6900, toMs: 7200 }],
117
+ },
118
+ {
119
+ text: 'and it',
120
+ startMs: 7200,
121
+ tokens: [
122
+ { text: 'and', fromMs: 7200, toMs: 7390 },
123
+ { text: ' it', fromMs: 7390, toMs: 7510 },
124
+ ],
125
+ },
126
+ {
127
+ text: 'reminded',
128
+ startMs: 7510,
129
+ tokens: [{ text: 'reminded', fromMs: 7510, toMs: 8030 }],
130
+ },
131
+ {
132
+ text: 'us of',
133
+ startMs: 8030,
134
+ tokens: [
135
+ { text: 'us', fromMs: 8030, toMs: 8170 },
136
+ { text: ' of', fromMs: 8170, toMs: 8260 },
137
+ ],
138
+ },
139
+ {
140
+ text: 'William',
141
+ startMs: 8260,
142
+ tokens: [{ text: 'William', fromMs: 8260, toMs: 8740 }],
143
+ },
144
+ {
145
+ text: 'We hope',
146
+ startMs: 8740,
147
+ tokens: [
148
+ { text: 'We', fromMs: 8740, toMs: 8850 },
149
+ { text: ' hope', fromMs: 8850, toMs: 9080 },
150
+ ],
151
+ },
152
+ {
153
+ text: 'he will',
154
+ startMs: 9080,
155
+ tokens: [
156
+ { text: 'he', fromMs: 9080, toMs: 9190 },
157
+ { text: ' will', fromMs: 9190, toMs: 9420 },
158
+ ],
159
+ },
160
+ {
161
+ text: 'like',
162
+ startMs: 9420,
163
+ tokens: [{ text: 'like', fromMs: 9420, toMs: 9650 }],
164
+ },
165
+ {
166
+ text: 'the cake',
167
+ startMs: 9650,
168
+ tokens: [
169
+ { text: 'the', fromMs: 9650, toMs: 9820 },
170
+ { text: ' cake', fromMs: 9820, toMs: 10100 },
171
+ ],
172
+ },
173
+ {
174
+ text: "Let's start",
175
+ startMs: 10100,
176
+ tokens: [
177
+ { text: 'Let', fromMs: 10100, toMs: 10220 },
178
+ { text: "'s", fromMs: 10220, toMs: 10300 },
179
+ { text: ' start', fromMs: 10300, toMs: 10570 },
180
+ ],
181
+ },
182
+ {
183
+ text: 'with the',
184
+ startMs: 10570,
185
+ tokens: [
186
+ { text: 'with', fromMs: 10570, toMs: 10700 },
187
+ { text: ' the', fromMs: 10700, toMs: 10780 },
188
+ ],
189
+ },
190
+ {
191
+ text: 'dough',
192
+ startMs: 10780,
193
+ tokens: [{ text: 'dough', fromMs: 10780, toMs: 11000 }],
194
+ },
195
+ {
196
+ text: 'By putting',
197
+ startMs: 11000,
198
+ tokens: [
199
+ { text: 'By', fromMs: 11000, toMs: 11120 },
200
+ { text: ' putting', fromMs: 11120, toMs: 11550 },
201
+ ],
202
+ },
203
+ {
204
+ text: 'some',
205
+ startMs: 11550,
206
+ tokens: [{ text: 'some', fromMs: 11550, toMs: 11790 }],
207
+ },
208
+ {
209
+ text: 'butter',
210
+ startMs: 11790,
211
+ tokens: [{ text: 'butter', fromMs: 11790, toMs: 12180 }],
212
+ },
213
+ {
214
+ text: 'Some',
215
+ startMs: 12180,
216
+ tokens: [{ text: 'Some', fromMs: 12180, toMs: 12750 }],
217
+ },
218
+ {
219
+ text: 'sugar',
220
+ startMs: 12750,
221
+ tokens: [{ text: 'sugar', fromMs: 12750, toMs: 13380 }],
222
+ },
223
+ {
224
+ text: 'Eggs',
225
+ startMs: 13380,
226
+ tokens: [{ text: 'Eggs', fromMs: 13380, toMs: 14580 }],
227
+ },
44
228
  {
45
229
  text: 'No frameworks,',
46
- startInSeconds: 14.78,
230
+ startMs: 14580,
231
+ tokens: [
232
+ { text: 'No', fromMs: 14580, toMs: 14720 },
233
+ { text: ' frameworks', fromMs: 14720, toMs: 15440 },
234
+ { text: ',', fromMs: 15440, toMs: 15550 },
235
+ ],
236
+ },
237
+ {
238
+ text: 'just',
239
+ startMs: 15550,
240
+ tokens: [{ text: 'just', fromMs: 15550, toMs: 15790 }],
241
+ },
242
+ {
243
+ text: 'vanilla',
244
+ startMs: 15790,
245
+ tokens: [{ text: 'vanilla', fromMs: 15790, toMs: 16220 }],
246
+ },
247
+ {
248
+ text: 'Pinch',
249
+ startMs: 16220,
250
+ tokens: [
251
+ { text: 'P', fromMs: 16220, toMs: 16300 },
252
+ { text: 'inch', fromMs: 16300, toMs: 16640 },
253
+ ],
254
+ },
255
+ {
256
+ text: 'of salt',
257
+ startMs: 16640,
258
+ tokens: [
259
+ { text: 'of', fromMs: 16640, toMs: 16810 },
260
+ { text: ' salt', fromMs: 16810, toMs: 17180 },
261
+ ],
262
+ },
263
+ {
264
+ text: 'Some',
265
+ startMs: 17180,
266
+ tokens: [{ text: 'Some', fromMs: 17180, toMs: 17480 }],
267
+ },
268
+ {
269
+ text: 'Nutella',
270
+ startMs: 17480,
271
+ tokens: [
272
+ { text: 'Nut', fromMs: 17480, toMs: 17700 },
273
+ { text: 'ella', fromMs: 17700, toMs: 18020 },
274
+ ],
275
+ },
276
+ {
277
+ text: 'Some',
278
+ startMs: 18020,
279
+ tokens: [{ text: 'Some', fromMs: 18020, toMs: 18280 }],
280
+ },
281
+ {
282
+ text: 'chocolate',
283
+ startMs: 18280,
284
+ tokens: [{ text: 'chocolate', fromMs: 18280, toMs: 18880 }],
285
+ },
286
+ {
287
+ text: 'Baking',
288
+ startMs: 18880,
289
+ tokens: [
290
+ { text: 'B', fromMs: 18880, toMs: 18980 },
291
+ { text: 'aking', fromMs: 18980, toMs: 19420 },
292
+ ],
293
+ },
294
+ {
295
+ text: 'powder',
296
+ startMs: 19420,
297
+ tokens: [{ text: 'powder', fromMs: 19420, toMs: 19980 }],
298
+ },
299
+ {
300
+ text: 'And',
301
+ startMs: 19980,
302
+ tokens: [{ text: 'And', fromMs: 19980, toMs: 20500 }],
303
+ },
304
+ {
305
+ text: 'flour',
306
+ startMs: 20500,
307
+ tokens: [{ text: 'flour', fromMs: 20500, toMs: 21380 }],
308
+ },
309
+ {
310
+ text: 'Just',
311
+ startMs: 21380,
312
+ tokens: [{ text: 'Just', fromMs: 21380, toMs: 21690 }],
313
+ },
314
+ {
315
+ text: 'massage',
316
+ startMs: 21690,
317
+ tokens: [{ text: 'massage', fromMs: 21690, toMs: 22120 }],
318
+ },
319
+ {
320
+ text: 'in the',
321
+ startMs: 22120,
322
+ tokens: [
323
+ { text: 'in', fromMs: 22120, toMs: 22250 },
324
+ { text: ' the', fromMs: 22250, toMs: 22450 },
325
+ ],
326
+ },
327
+ {
328
+ text: 'butter',
329
+ startMs: 22450,
330
+ tokens: [{ text: 'butter', fromMs: 22450, toMs: 22880 }],
331
+ },
332
+ {
333
+ text: 'to give',
334
+ startMs: 22880,
335
+ tokens: [
336
+ { text: 'to', fromMs: 22880, toMs: 23000 },
337
+ { text: ' give', fromMs: 23000, toMs: 23250 },
338
+ ],
339
+ },
340
+ {
341
+ text: 'it the',
342
+ startMs: 23250,
343
+ tokens: [
344
+ { text: 'it', fromMs: 23250, toMs: 23380 },
345
+ { text: ' the', fromMs: 23380, toMs: 23590 },
346
+ ],
347
+ },
348
+ {
349
+ text: 'full',
350
+ startMs: 23590,
351
+ tokens: [{ text: 'full', fromMs: 23590, toMs: 23890 }],
352
+ },
353
+ {
354
+ text: 'treatment',
355
+ startMs: 23890,
356
+ tokens: [{ text: 'treatment', fromMs: 23890, toMs: 24380 }],
357
+ },
358
+ {
359
+ text: 'Fill',
360
+ startMs: 24380,
361
+ tokens: [{ text: 'Fill', fromMs: 24380, toMs: 24740 }],
362
+ },
363
+ {
364
+ text: 'it in',
365
+ startMs: 24740,
366
+ tokens: [
367
+ { text: 'it', fromMs: 24740, toMs: 24920 },
368
+ { text: ' in', fromMs: 24920, toMs: 25120 },
369
+ ],
370
+ },
371
+ {
372
+ text: 'Bake',
373
+ startMs: 25120,
374
+ tokens: [{ text: 'Bake', fromMs: 25120, toMs: 25570 }],
375
+ },
376
+ {
377
+ text: 'it for',
378
+ startMs: 25570,
379
+ tokens: [
380
+ { text: 'it', fromMs: 25570, toMs: 25770 },
381
+ { text: ' for', fromMs: 25770, toMs: 26060 },
382
+ ],
383
+ },
384
+ {
385
+ text: 'half',
386
+ startMs: 26060,
387
+ tokens: [{ text: 'half', fromMs: 26060, toMs: 26480 }],
388
+ },
389
+ {
390
+ text: 'an',
391
+ startMs: 26480,
392
+ tokens: [{ text: 'an', fromMs: 26480, toMs: 26690 }],
393
+ },
394
+ {
395
+ text: 'hour',
396
+ startMs: 26690,
397
+ tokens: [{ text: 'hour', fromMs: 26690, toMs: 27110 }],
398
+ },
399
+ {
400
+ text: 'at 170',
401
+ startMs: 27110,
402
+ tokens: [
403
+ { text: 'at', fromMs: 27110, toMs: 27300 },
404
+ { text: ' 170', fromMs: 27300, toMs: 28170 },
405
+ ],
406
+ },
407
+ {
408
+ text: 'degrees',
409
+ startMs: 28170,
410
+ tokens: [{ text: 'degrees', fromMs: 28170, toMs: 28800 }],
411
+ },
412
+ {
413
+ text: "It's time",
414
+ startMs: 28850,
415
+ tokens: [
416
+ { text: 'It', fromMs: 28850, toMs: 28900 },
417
+ { text: "'s", fromMs: 28900, toMs: 29000 },
418
+ { text: ' time', fromMs: 29000, toMs: 29210 },
419
+ ],
420
+ },
421
+ {
422
+ text: 'for the',
423
+ startMs: 29210,
424
+ tokens: [
425
+ { text: 'for', fromMs: 29210, toMs: 29400 },
426
+ { text: ' the', fromMs: 29400, toMs: 29530 },
427
+ ],
428
+ },
429
+ {
430
+ text: 'icing',
431
+ startMs: 29530,
432
+ tokens: [{ text: 'icing', fromMs: 29530, toMs: 29800 }],
433
+ },
434
+ {
435
+ text: 'on the',
436
+ startMs: 29800,
437
+ tokens: [
438
+ { text: 'on', fromMs: 29800, toMs: 29900 },
439
+ { text: ' the', fromMs: 29900, toMs: 30060 },
440
+ ],
441
+ },
442
+ {
443
+ text: 'cake',
444
+ startMs: 30060,
445
+ tokens: [{ text: 'cake', fromMs: 30060, toMs: 30320 }],
446
+ },
447
+ {
448
+ text: 'Time',
449
+ startMs: 30320,
450
+ tokens: [{ text: 'Time', fromMs: 30320, toMs: 30940 }],
451
+ },
452
+ {
453
+ text: 'for',
454
+ startMs: 30940,
455
+ tokens: [{ text: 'for', fromMs: 30940, toMs: 31410 }],
456
+ },
457
+ {
458
+ text: 'the',
459
+ startMs: 31410,
460
+ tokens: [{ text: 'the', fromMs: 31410, toMs: 31880 }],
461
+ },
462
+ {
463
+ text: 'most',
464
+ startMs: 31880,
465
+ tokens: [{ text: 'most', fromMs: 31880, toMs: 32500 }],
466
+ },
467
+ {
468
+ text: 'critical',
469
+ startMs: 32500,
470
+ tokens: [{ text: 'critical', fromMs: 32500, toMs: 33750 }],
471
+ },
472
+ {
473
+ text: 'part',
474
+ startMs: 33750,
475
+ tokens: [{ text: 'part', fromMs: 33750, toMs: 34400 }],
476
+ },
477
+ {
478
+ text: 'This',
479
+ startMs: 34400,
480
+ tokens: [{ text: 'This', fromMs: 34400, toMs: 35840 }],
481
+ },
482
+ {
483
+ text: 'is',
484
+ startMs: 35840,
485
+ tokens: [{ text: 'is', fromMs: 35840, toMs: 36580 }],
486
+ },
487
+ {
488
+ text: 'how',
489
+ startMs: 36580,
490
+ tokens: [{ text: 'how', fromMs: 36580, toMs: 37670 }],
491
+ },
492
+ {
493
+ text: 'it',
494
+ startMs: 37670,
495
+ tokens: [{ text: 'it', fromMs: 37670, toMs: 38420 }],
496
+ },
497
+ {
498
+ text: 'turned',
499
+ startMs: 38420,
500
+ tokens: [{ text: 'turned', fromMs: 38420, toMs: 40580 }],
501
+ },
502
+ {
503
+ text: 'out',
504
+ startMs: 40580,
505
+ tokens: [{ text: 'out', fromMs: 40580, toMs: 41700 }],
506
+ },
507
+ {
508
+ text: 'Stupid',
509
+ startMs: 41700,
510
+ tokens: [{ text: 'Stupid', fromMs: 41700, toMs: 42750 }],
511
+ },
512
+ {
513
+ text: 'idea,',
514
+ startMs: 42750,
515
+ tokens: [
516
+ { text: 'idea', fromMs: 42750, toMs: 43440 },
517
+ { text: ',', fromMs: 43440, toMs: 43590 },
518
+ ],
519
+ },
520
+ {
521
+ text: 'pretty',
522
+ startMs: 43590,
523
+ tokens: [{ text: 'pretty', fromMs: 43590, toMs: 44070 }],
524
+ },
525
+ {
526
+ text: 'bad',
527
+ startMs: 44070,
528
+ tokens: [{ text: 'bad', fromMs: 44070, toMs: 44310 }],
529
+ },
530
+ {
531
+ text: 'execution',
532
+ startMs: 44310,
533
+ tokens: [{ text: 'execution', fromMs: 44310, toMs: 45040 }],
534
+ },
535
+ {
536
+ text: 'I hope',
537
+ startMs: 45040,
538
+ tokens: [
539
+ { text: 'I', fromMs: 45040, toMs: 45140 },
540
+ { text: ' hope', fromMs: 45140, toMs: 45530 },
541
+ ],
542
+ },
543
+ {
544
+ text: 'he likes',
545
+ startMs: 45530,
546
+ tokens: [
547
+ { text: 'he', fromMs: 45530, toMs: 45730 },
548
+ { text: ' likes', fromMs: 45730, toMs: 46220 },
549
+ ],
550
+ },
551
+ {
552
+ text: 'it anyway',
553
+ startMs: 46220,
554
+ tokens: [
555
+ { text: 'it', fromMs: 46220, toMs: 46420 },
556
+ { text: ' anyway', fromMs: 46420, toMs: 47040 },
557
+ ],
558
+ },
559
+ {
560
+ text: 'Hey',
561
+ startMs: 47040,
562
+ tokens: [{ text: 'Hey', fromMs: 47040, toMs: 48030 }],
563
+ },
564
+ {
565
+ text: 'William',
566
+ startMs: 48030,
567
+ tokens: [{ text: 'William', fromMs: 48030, toMs: 50340 }],
568
+ },
569
+ {
570
+ text: 'Congrats',
571
+ startMs: 50340,
572
+ tokens: [
573
+ { text: 'Cong', fromMs: 50340, toMs: 51970 },
574
+ { text: 'rats', fromMs: 51970, toMs: 53580 },
575
+ ],
576
+ },
577
+ {
578
+ text: 'We',
579
+ startMs: 53580,
580
+ tokens: [{ text: 'We', fromMs: 53580, toMs: 53790 }],
581
+ },
582
+ {
583
+ text: 'wanted',
584
+ startMs: 53790,
585
+ tokens: [{ text: 'wanted', fromMs: 53790, toMs: 54440 }],
586
+ },
587
+ {
588
+ text: 'to congratulate',
589
+ startMs: 54440,
590
+ tokens: [
591
+ { text: 'to', fromMs: 54440, toMs: 54640 },
592
+ { text: ' congratulate', fromMs: 54640, toMs: 55930 },
593
+ ],
594
+ },
595
+ {
596
+ text: 'you',
597
+ startMs: 55930,
598
+ tokens: [{ text: 'you', fromMs: 55930, toMs: 56280 }],
599
+ },
600
+ {
601
+ text: 'on',
602
+ startMs: 56280,
603
+ tokens: [{ text: 'on', fromMs: 56280, toMs: 56520 }],
604
+ },
605
+ {
606
+ text: 'the',
607
+ startMs: 56520,
608
+ tokens: [{ text: 'the', fromMs: 56520, toMs: 56880 }],
609
+ },
610
+ {
611
+ text: '100,000',
612
+ startMs: 56880,
613
+ tokens: [
614
+ { text: '100', fromMs: 56880, toMs: 57980 },
615
+ { text: ',', fromMs: 57980, toMs: 58010 },
616
+ { text: '000', fromMs: 58010, toMs: 58180 },
617
+ ],
618
+ },
619
+ {
620
+ text: 'You hear',
621
+ startMs: 58690,
622
+ tokens: [
623
+ { text: 'You', fromMs: 58690, toMs: 58870 },
624
+ { text: ' hear', fromMs: 58870, toMs: 59150 },
625
+ ],
626
+ },
627
+ {
628
+ text: 'Joseph',
629
+ startMs: 59150,
630
+ tokens: [{ text: 'Joseph', fromMs: 59150, toMs: 59570 }],
631
+ },
632
+ {
633
+ text: 'crying?',
634
+ startMs: 59570,
635
+ tokens: [
636
+ { text: 'crying', fromMs: 59570, toMs: 59990 },
637
+ { text: '?', fromMs: 59990, toMs: 60220 },
638
+ ],
639
+ },
640
+ {
641
+ text: 'Thank',
642
+ startMs: 60220,
643
+ tokens: [{ text: 'Thank', fromMs: 60220, toMs: 60820 }],
644
+ },
645
+ {
646
+ text: 'you',
647
+ startMs: 60820,
648
+ tokens: [{ text: 'you', fromMs: 60820, toMs: 61180 }],
649
+ },
650
+ {
651
+ text: 'so',
652
+ startMs: 61180,
653
+ tokens: [{ text: 'so', fromMs: 61180, toMs: 61420 }],
654
+ },
655
+ {
656
+ text: 'much',
657
+ startMs: 61420,
658
+ tokens: [{ text: 'much', fromMs: 61420, toMs: 61920 }],
659
+ },
660
+ {
661
+ text: '(electronic',
662
+ startMs: 61920,
663
+ tokens: [
664
+ { text: '(', fromMs: 61920, toMs: 62080 },
665
+ { text: 'elect', fromMs: 62080, toMs: 62880 },
666
+ { text: 'ronic', fromMs: 62880, toMs: 63680 },
667
+ ],
668
+ },
669
+ {
670
+ text: 'beeping)',
671
+ startMs: 63680,
672
+ tokens: [
673
+ { text: 'be', fromMs: 63680, toMs: 63890 },
674
+ { text: 'eping', fromMs: 63890, toMs: 64800 },
675
+ { text: ')', fromMs: 64800, toMs: 65000 },
676
+ ],
47
677
  },
48
- { text: 'just', startInSeconds: 15.68 },
49
- { text: 'vanilla', startInSeconds: 16.1 },
50
- { text: 'Pinch', startInSeconds: 16.38 },
51
- { text: 'of salt', startInSeconds: 16.58 },
52
- { text: 'Some', startInSeconds: 17.44 },
53
- { text: 'Nutella', startInSeconds: 17.78 },
54
- { text: 'Some', startInSeconds: 18.3 },
55
- { text: 'chocolate', startInSeconds: 18.68 },
56
- { text: 'Baking', startInSeconds: 19.12 },
57
- { text: 'powder', startInSeconds: 19.76 },
58
- { text: 'And', startInSeconds: 20.68 },
59
- { text: 'flour', startInSeconds: 21.14 },
60
- { text: 'Just', startInSeconds: 21.66 },
61
- { text: 'massage', startInSeconds: 22.06 },
62
- { text: 'in the', startInSeconds: 22.38 },
63
- { text: 'butter', startInSeconds: 22.84 },
64
- { text: 'to give', startInSeconds: 23.08 },
65
- { text: 'it the', startInSeconds: 23.32 },
66
- { text: 'full', startInSeconds: 23.76 },
67
- { text: 'treatment', startInSeconds: 24.28 },
68
- { text: 'Fill', startInSeconds: 24.54 },
69
- { text: 'it in', startInSeconds: 24.7 },
70
- { text: 'Bake', startInSeconds: 26.1 },
71
- { text: 'it for', startInSeconds: 26.3 },
72
- { text: 'half', startInSeconds: 26.64 },
73
- { text: 'an', startInSeconds: 26.86 },
74
- { text: 'hour', startInSeconds: 27.06 },
75
- { text: 'at 170', startInSeconds: 27.4 },
76
- { text: 'degrees', startInSeconds: 28.6 },
77
- { text: "It's time", startInSeconds: 28.84 },
78
- { text: 'for the', startInSeconds: 29.28 },
79
- { text: 'icing', startInSeconds: 29.7 },
80
- { text: 'on the', startInSeconds: 29.88 },
81
- { text: 'cake', startInSeconds: 30.56 },
82
- { text: 'Time', startInSeconds: 33.14 },
83
- { text: 'for', startInSeconds: 33.34 },
84
- { text: 'the', startInSeconds: 33.46 },
85
- { text: 'most', startInSeconds: 33.62 },
86
- { text: 'critical', startInSeconds: 34 },
87
- { text: 'part', startInSeconds: 34.76 },
88
- { text: 'This', startInSeconds: 40.74 },
89
- { text: 'is', startInSeconds: 40.86 },
90
- { text: 'how', startInSeconds: 41 },
91
- { text: 'it', startInSeconds: 41.1 },
92
- { text: 'turned', startInSeconds: 41.32 },
93
- { text: 'out', startInSeconds: 42.02 },
94
- { text: 'Stupid', startInSeconds: 42.78 },
95
- { text: 'idea,', startInSeconds: 43.36 },
96
- { text: 'pretty', startInSeconds: 43.7 },
97
- { text: 'bad', startInSeconds: 44.02 },
98
- { text: 'execution', startInSeconds: 44.72 },
99
- { text: 'I hope', startInSeconds: 45.88 },
100
- { text: 'he likes', startInSeconds: 46.2 },
101
- { text: 'it anyway', startInSeconds: 46.58 },
102
- { text: 'Hey', startInSeconds: 49.52 },
103
- { text: 'William', startInSeconds: 50.06 },
104
- { text: 'Congrats', startInSeconds: 52.32 },
105
- { text: 'We', startInSeconds: 54.56 },
106
- { text: 'wanted', startInSeconds: 54.86 },
107
- { text: 'to congratulate', startInSeconds: 55 },
108
- { text: 'you', startInSeconds: 56.22 },
109
- { text: 'on', startInSeconds: 56.54 },
110
- { text: 'the', startInSeconds: 56.68 },
111
- { text: '100,000', startInSeconds: 57.14 },
112
- { text: 'You hear', startInSeconds: 58.86 },
113
- { text: 'Joseph', startInSeconds: 59.4 },
114
- { text: 'crying?', startInSeconds: 59.74 },
115
- { text: 'Thank', startInSeconds: 61.04 },
116
- { text: 'you', startInSeconds: 61.18 },
117
- { text: 'so', startInSeconds: 61.44 },
118
- { text: 'much', startInSeconds: 61.76 },
119
- { text: '(electronic', startInSeconds: 62.88 },
120
- { text: 'beeping)', startInSeconds: 63.02 },
121
678
  ]);
122
679
  });
123
680
  (0, bun_test_1.test)('Convert to captions - 0ms together', () => {
@@ -0,0 +1,10 @@
1
+ import type { Caption } from '@remotion/captions';
2
+ import type { TranscriptionJson } from './transcribe';
3
+ type ToCaptionsInput = {
4
+ whisperCppOutput: TranscriptionJson<true>;
5
+ };
6
+ type ToCaptionsOutput = {
7
+ captions: Caption[];
8
+ };
9
+ export declare const toCaptions: (input: ToCaptionsInput) => ToCaptionsOutput;
10
+ export {};
@@ -0,0 +1,21 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.toCaptions = void 0;
4
+ const toCaptions = (input) => {
5
+ const { transcription } = input.whisperCppOutput;
6
+ const captions = [];
7
+ for (const item of transcription) {
8
+ if (item.text === '') {
9
+ continue;
10
+ }
11
+ captions.push({
12
+ text: captions.length === 0 ? item.text.trimStart() : item.text,
13
+ startMs: item.offsets.from,
14
+ endMs: item.offsets.to,
15
+ timestampMs: item.tokens[0].t_dtw === -1 ? null : item.tokens[0].t_dtw * 10,
16
+ confidence: item.tokens[0].p,
17
+ });
18
+ }
19
+ return { captions };
20
+ };
21
+ exports.toCaptions = toCaptions;
@@ -46,7 +46,7 @@ const readJson = async (jsonPath) => {
46
46
  // https://github.com/ggerganov/whisper.cpp/blob/fe36c909715e6751277ddb020e7892c7670b61d4/examples/main/main.cpp#L989-L999
47
47
  // https://github.com/remotion-dev/remotion/issues/4168
48
48
  const modelToDtw = (model) => {
49
- if (model === 'large-v3') {
49
+ if (model === 'large-v3' || model === 'large-v3-turbo') {
50
50
  return 'large.v3';
51
51
  }
52
52
  if (model === 'large-v2') {
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "url": "https://github.com/remotion-dev/remotion/tree/main/packages/install-whisper-cpp"
4
4
  },
5
5
  "name": "@remotion/install-whisper-cpp",
6
- "version": "4.0.215",
6
+ "version": "4.0.216",
7
7
  "description": "Helpers for installing and using Whisper.cpp",
8
8
  "main": "dist/index.js",
9
9
  "sideEffects": false,
@@ -15,11 +15,10 @@
15
15
  ],
16
16
  "author": "Jonny Burger <jonny@remotion.dev>",
17
17
  "license": "SEE LICENSE IN LICENSE.md",
18
- "dependencies": {},
19
- "peerDependencies": {
20
- "react": ">=16.8.0",
21
- "react-dom": ">=16.8.0"
18
+ "dependencies": {
19
+ "@remotion/captions": "4.0.216"
22
20
  },
21
+ "peerDependencies": {},
23
22
  "devDependencies": {},
24
23
  "keywords": [
25
24
  "remotion",