@remotion/install-whisper-cpp 4.0.130 → 4.0.132

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ import type { TranscriptionJson } from './transcribe';
2
+ export type Caption = {
3
+ text: string;
4
+ startInSeconds: number;
5
+ };
6
+ export declare function convertToCaptions({ transcription, combineTokensWithinMilliseconds, }: {
7
+ transcription: TranscriptionJson<true>['transcription'];
8
+ combineTokensWithinMilliseconds: number;
9
+ }): {
10
+ captions: Caption[];
11
+ };
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.convertToCaptions = void 0;
4
+ function convertToCaptions({ transcription, combineTokensWithinMilliseconds, }) {
5
+ const merged = [];
6
+ let currentText = '';
7
+ let currentFrom = 0;
8
+ let currentTo = 0;
9
+ let currentTokenLevelTimestamp = 0;
10
+ transcription.forEach((item, index) => {
11
+ const { text } = item;
12
+ // If text starts with a space, push the currentText (if it exists) and start a new one
13
+ if (text.startsWith(' ') &&
14
+ currentTo - currentFrom > combineTokensWithinMilliseconds) {
15
+ if (currentText !== '') {
16
+ merged.push({
17
+ text: currentText,
18
+ startInSeconds: currentTokenLevelTimestamp / 100,
19
+ });
20
+ }
21
+ // Start a new sentence
22
+ currentText = text.trimStart();
23
+ currentFrom = item.offsets.from;
24
+ currentTo = item.offsets.to;
25
+ currentTokenLevelTimestamp = item.tokens[0].t_dtw;
26
+ }
27
+ else {
28
+ // Continuation or start of a new sentence without leading space
29
+ if (currentText === '') {
30
+ // It's the start of the document or after a sentence that started with a space
31
+ currentFrom = item.offsets.from;
32
+ currentTokenLevelTimestamp = item.tokens[0].t_dtw;
33
+ }
34
+ currentText += text;
35
+ currentText = currentText.trimStart();
36
+ currentTo = item.offsets.to;
37
+ }
38
+ // Ensure the last sentence is added
39
+ if (index === transcription.length - 1 && currentText !== '') {
40
+ merged.push({
41
+ text: currentText,
42
+ startInSeconds: currentTokenLevelTimestamp / 100,
43
+ });
44
+ }
45
+ });
46
+ return { captions: merged };
47
+ }
48
+ exports.convertToCaptions = convertToCaptions;
package/dist/index.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ export { Caption, convertToCaptions } from './convert-to-captions';
1
2
  export { downloadWhisperModel, OnProgress, WhisperModel, } from './download-whisper-model';
2
3
  export { installWhisperCpp } from './install-whisper-cpp';
3
- export { transcribe } from './transcribe';
4
+ export { transcribe, TranscriptionJson } from './transcribe';
package/dist/index.js CHANGED
@@ -1,6 +1,8 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.transcribe = exports.installWhisperCpp = exports.downloadWhisperModel = void 0;
3
+ exports.transcribe = exports.installWhisperCpp = exports.downloadWhisperModel = exports.convertToCaptions = void 0;
4
+ var convert_to_captions_1 = require("./convert-to-captions");
5
+ Object.defineProperty(exports, "convertToCaptions", { enumerable: true, get: function () { return convert_to_captions_1.convertToCaptions; } });
4
6
  var download_whisper_model_1 = require("./download-whisper-model");
5
7
  Object.defineProperty(exports, "downloadWhisperModel", { enumerable: true, get: function () { return download_whisper_model_1.downloadWhisperModel; } });
6
8
  var install_whisper_cpp_1 = require("./install-whisper-cpp");
@@ -1,3 +1,4 @@
1
+ export declare const getWhisperExecutablePath: (whisperPath: string) => string;
1
2
  export declare const installWhisperCpp: ({ version, to, printOutput, }: {
2
3
  version: string;
3
4
  to: string;
@@ -26,11 +26,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
26
26
  return (mod && mod.__esModule) ? mod : { "default": mod };
27
27
  };
28
28
  Object.defineProperty(exports, "__esModule", { value: true });
29
- exports.installWhisperCpp = void 0;
29
+ exports.installWhisperCpp = exports.getWhisperExecutablePath = void 0;
30
30
  const fs_1 = __importStar(require("fs"));
31
31
  const node_child_process_1 = require("node:child_process");
32
32
  const node_stream_1 = require("node:stream");
33
33
  const promises_1 = require("node:stream/promises");
34
+ const os_1 = __importDefault(require("os"));
34
35
  const path_1 = __importDefault(require("path"));
35
36
  const installForWindows = async ({ version, to, printOutput, }) => {
36
37
  const url = `https://github.com/ggerganov/whisper.cpp/releases/download/v${version}/whisper-bin-x64.zip`;
@@ -52,7 +53,9 @@ const installWhisperForUnix = ({ version, to, printOutput, }) => {
52
53
  (0, node_child_process_1.execSync)(`git clone https://github.com/ggerganov/whisper.cpp.git ${to}`, {
53
54
  stdio,
54
55
  });
55
- (0, node_child_process_1.execSync)(`git checkout v${version}`, {
56
+ const isSemVer = /^[\d]{1}\.[\d]{1,2}\.+/;
57
+ const ref = isSemVer.test(version) ? `v${version}` : version;
58
+ (0, node_child_process_1.execSync)(`git checkout ${ref}`, {
56
59
  stdio,
57
60
  cwd: to,
58
61
  });
@@ -61,8 +64,20 @@ const installWhisperForUnix = ({ version, to, printOutput, }) => {
61
64
  stdio,
62
65
  });
63
66
  };
67
+ const getWhisperExecutablePath = (whisperPath) => {
68
+ return os_1.default.platform() === 'win32'
69
+ ? path_1.default.join(whisperPath, 'main.exe')
70
+ : path_1.default.join(whisperPath, './main');
71
+ };
72
+ exports.getWhisperExecutablePath = getWhisperExecutablePath;
64
73
  const installWhisperCpp = async ({ version, to, printOutput = true, }) => {
65
74
  if ((0, fs_1.existsSync)(to)) {
75
+ if (!(0, fs_1.existsSync)((0, exports.getWhisperExecutablePath)(to))) {
76
+ if (printOutput) {
77
+ console.log(`Whisper folder exists but the executable (${to}) is missing. Delete ${to} and try again.`);
78
+ }
79
+ return Promise.resolve({ alreadyExisted: false });
80
+ }
66
81
  if (printOutput) {
67
82
  console.log(`Whisper already exists at ${to}`);
68
83
  }
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,268 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ const vitest_1 = require("vitest");
4
+ const convert_to_captions_1 = require("../convert-to-captions");
5
+ const example_payload_1 = require("./example-payload");
6
+ (0, vitest_1.test)('Convert to captions - 200ms together', () => {
7
+ const { captions: transcript } = (0, convert_to_captions_1.convertToCaptions)({
8
+ transcription: example_payload_1.examplePayload.transcription,
9
+ combineTokensWithinMilliseconds: 200,
10
+ });
11
+ (0, vitest_1.expect)(transcript).toEqual([
12
+ { text: 'William', startInSeconds: 0.24 },
13
+ { text: 'just', startInSeconds: 0.48 },
14
+ { text: 'hit 100,000', startInSeconds: 0.7 },
15
+ { text: 'YouTube', startInSeconds: 2.22 },
16
+ { text: 'subscribers', startInSeconds: 2.94 },
17
+ { text: 'And we', startInSeconds: 3.24 },
18
+ { text: 'are going', startInSeconds: 3.42 },
19
+ { text: 'to celebrate', startInSeconds: 3.76 },
20
+ { text: 'that', startInSeconds: 4.34 },
21
+ { text: 'We thought', startInSeconds: 4.5 },
22
+ { text: 'about', startInSeconds: 5.1 },
23
+ { text: 'to bake', startInSeconds: 5.42 },
24
+ { text: 'a cake', startInSeconds: 6.14 },
25
+ { text: 'We found', startInSeconds: 6.56 },
26
+ { text: 'this', startInSeconds: 7.12 },
27
+ { text: 'and it', startInSeconds: 7.36 },
28
+ { text: 'reminded', startInSeconds: 7.78 },
29
+ { text: 'us of', startInSeconds: 8.04 },
30
+ { text: 'William', startInSeconds: 8.52 },
31
+ { text: 'We hope', startInSeconds: 8.94 },
32
+ { text: 'he will', startInSeconds: 9.42 },
33
+ { text: 'like', startInSeconds: 9.68 },
34
+ { text: 'the cake', startInSeconds: 9.86 },
35
+ { text: "Let's start", startInSeconds: 10.28 },
36
+ { text: 'with the', startInSeconds: 10.58 },
37
+ { text: 'dough', startInSeconds: 10.96 },
38
+ { text: 'By putting', startInSeconds: 11.2 },
39
+ { text: 'some', startInSeconds: 11.64 },
40
+ { text: 'butter', startInSeconds: 12.06 },
41
+ { text: 'Some', startInSeconds: 12.86 },
42
+ { text: 'sugar', startInSeconds: 13.3 },
43
+ { text: 'Eggs', startInSeconds: 14.36 },
44
+ {
45
+ text: 'No frameworks,',
46
+ startInSeconds: 14.78,
47
+ },
48
+ { text: 'just', startInSeconds: 15.68 },
49
+ { text: 'vanilla', startInSeconds: 16.1 },
50
+ { text: 'Pinch', startInSeconds: 16.38 },
51
+ { text: 'of salt', startInSeconds: 16.58 },
52
+ { text: 'Some', startInSeconds: 17.44 },
53
+ { text: 'Nutella', startInSeconds: 17.78 },
54
+ { text: 'Some', startInSeconds: 18.3 },
55
+ { text: 'chocolate', startInSeconds: 18.68 },
56
+ { text: 'Baking', startInSeconds: 19.12 },
57
+ { text: 'powder', startInSeconds: 19.76 },
58
+ { text: 'And', startInSeconds: 20.68 },
59
+ { text: 'flour', startInSeconds: 21.14 },
60
+ { text: 'Just', startInSeconds: 21.66 },
61
+ { text: 'massage', startInSeconds: 22.06 },
62
+ { text: 'in the', startInSeconds: 22.38 },
63
+ { text: 'butter', startInSeconds: 22.84 },
64
+ { text: 'to give', startInSeconds: 23.08 },
65
+ { text: 'it the', startInSeconds: 23.32 },
66
+ { text: 'full', startInSeconds: 23.76 },
67
+ { text: 'treatment', startInSeconds: 24.28 },
68
+ { text: 'Fill', startInSeconds: 24.54 },
69
+ { text: 'it in', startInSeconds: 24.7 },
70
+ { text: 'Bake', startInSeconds: 26.1 },
71
+ { text: 'it for', startInSeconds: 26.3 },
72
+ { text: 'half', startInSeconds: 26.64 },
73
+ { text: 'an', startInSeconds: 26.86 },
74
+ { text: 'hour', startInSeconds: 27.06 },
75
+ { text: 'at 170', startInSeconds: 27.4 },
76
+ { text: 'degrees', startInSeconds: 28.6 },
77
+ { text: "It's time", startInSeconds: 28.84 },
78
+ { text: 'for the', startInSeconds: 29.28 },
79
+ { text: 'icing', startInSeconds: 29.7 },
80
+ { text: 'on the', startInSeconds: 29.88 },
81
+ { text: 'cake', startInSeconds: 30.56 },
82
+ { text: 'Time', startInSeconds: 33.14 },
83
+ { text: 'for', startInSeconds: 33.34 },
84
+ { text: 'the', startInSeconds: 33.46 },
85
+ { text: 'most', startInSeconds: 33.62 },
86
+ { text: 'critical', startInSeconds: 34 },
87
+ { text: 'part', startInSeconds: 34.76 },
88
+ { text: 'This', startInSeconds: 40.74 },
89
+ { text: 'is', startInSeconds: 40.86 },
90
+ { text: 'how', startInSeconds: 41 },
91
+ { text: 'it', startInSeconds: 41.1 },
92
+ { text: 'turned', startInSeconds: 41.32 },
93
+ { text: 'out', startInSeconds: 42.02 },
94
+ { text: 'Stupid', startInSeconds: 42.78 },
95
+ { text: 'idea,', startInSeconds: 43.36 },
96
+ { text: 'pretty', startInSeconds: 43.7 },
97
+ { text: 'bad', startInSeconds: 44.02 },
98
+ { text: 'execution', startInSeconds: 44.72 },
99
+ { text: 'I hope', startInSeconds: 45.88 },
100
+ { text: 'he likes', startInSeconds: 46.2 },
101
+ { text: 'it anyway', startInSeconds: 46.58 },
102
+ { text: 'Hey', startInSeconds: 49.52 },
103
+ { text: 'William', startInSeconds: 50.06 },
104
+ { text: 'Congrats', startInSeconds: 52.32 },
105
+ { text: 'We', startInSeconds: 54.56 },
106
+ { text: 'wanted', startInSeconds: 54.86 },
107
+ { text: 'to congratulate', startInSeconds: 55 },
108
+ { text: 'you', startInSeconds: 56.22 },
109
+ { text: 'on', startInSeconds: 56.54 },
110
+ { text: 'the', startInSeconds: 56.68 },
111
+ { text: '100,000', startInSeconds: 57.14 },
112
+ { text: 'You hear', startInSeconds: 58.86 },
113
+ { text: 'Joseph', startInSeconds: 59.4 },
114
+ { text: 'crying?', startInSeconds: 59.74 },
115
+ { text: 'Thank', startInSeconds: 61.04 },
116
+ { text: 'you', startInSeconds: 61.18 },
117
+ { text: 'so', startInSeconds: 61.44 },
118
+ { text: 'much', startInSeconds: 61.76 },
119
+ { text: '(electronic', startInSeconds: 62.88 },
120
+ { text: 'beeping)', startInSeconds: 63.02 },
121
+ ]);
122
+ });
123
+ (0, vitest_1.test)('Convert to captions - 0ms together', () => {
124
+ const { captions: transcript } = (0, convert_to_captions_1.convertToCaptions)({
125
+ transcription: example_payload_1.examplePayload.transcription,
126
+ combineTokensWithinMilliseconds: 0,
127
+ });
128
+ (0, vitest_1.expect)(transcript).toEqual([
129
+ { text: 'William', startInSeconds: 0.24 },
130
+ { text: 'just', startInSeconds: 0.48 },
131
+ { text: 'hit', startInSeconds: 0.7 },
132
+ { text: '100,000', startInSeconds: 1.3 },
133
+ { text: 'YouTube', startInSeconds: 2.22 },
134
+ { text: 'subscribers', startInSeconds: 2.94 },
135
+ { text: 'And', startInSeconds: 3.24 },
136
+ { text: 'we', startInSeconds: 3.32 },
137
+ { text: 'are', startInSeconds: 3.42 },
138
+ { text: 'going', startInSeconds: 3.58 },
139
+ { text: 'to', startInSeconds: 3.76 },
140
+ { text: 'celebrate', startInSeconds: 4.1 },
141
+ { text: 'that', startInSeconds: 4.34 },
142
+ { text: 'We', startInSeconds: 4.5 },
143
+ { text: 'thought', startInSeconds: 4.7 },
144
+ { text: 'about', startInSeconds: 5.1 },
145
+ { text: 'to', startInSeconds: 5.42 },
146
+ { text: 'bake', startInSeconds: 5.92 },
147
+ { text: 'a', startInSeconds: 6.14 },
148
+ { text: 'cake', startInSeconds: 6.4 },
149
+ { text: 'We', startInSeconds: 6.56 },
150
+ { text: 'found', startInSeconds: 6.8 },
151
+ { text: 'this', startInSeconds: 7.12 },
152
+ { text: 'and', startInSeconds: 7.36 },
153
+ { text: 'it', startInSeconds: 7.5 },
154
+ { text: 'reminded', startInSeconds: 7.78 },
155
+ { text: 'us', startInSeconds: 8.04 },
156
+ { text: 'of', startInSeconds: 8.24 },
157
+ { text: 'William', startInSeconds: 8.52 },
158
+ { text: 'We', startInSeconds: 8.94 },
159
+ { text: 'hope', startInSeconds: 9.22 },
160
+ { text: 'he', startInSeconds: 9.42 },
161
+ { text: 'will', startInSeconds: 9.56 },
162
+ { text: 'like', startInSeconds: 9.68 },
163
+ { text: 'the', startInSeconds: 9.86 },
164
+ { text: 'cake', startInSeconds: 10.08 },
165
+ { text: "Let's", startInSeconds: 10.28 },
166
+ { text: 'start', startInSeconds: 10.46 },
167
+ { text: 'with', startInSeconds: 10.58 },
168
+ { text: 'the', startInSeconds: 10.68 },
169
+ { text: 'dough', startInSeconds: 10.96 },
170
+ { text: 'By', startInSeconds: 11.2 },
171
+ { text: 'putting', startInSeconds: 11.44 },
172
+ { text: 'some', startInSeconds: 11.64 },
173
+ { text: 'butter', startInSeconds: 12.06 },
174
+ { text: 'Some', startInSeconds: 12.86 },
175
+ { text: 'sugar', startInSeconds: 13.3 },
176
+ { text: 'Eggs', startInSeconds: 14.36 },
177
+ { text: 'No', startInSeconds: 14.78 },
178
+ { text: 'frameworks,', startInSeconds: 15.28 },
179
+ { text: 'just', startInSeconds: 15.68 },
180
+ { text: 'vanilla', startInSeconds: 16.1 },
181
+ { text: 'Pinch', startInSeconds: 16.38 },
182
+ { text: 'of', startInSeconds: 16.58 },
183
+ { text: 'salt', startInSeconds: 17.08 },
184
+ { text: 'Some', startInSeconds: 17.44 },
185
+ { text: 'Nutella', startInSeconds: 17.78 },
186
+ { text: 'Some', startInSeconds: 18.3 },
187
+ { text: 'chocolate', startInSeconds: 18.68 },
188
+ { text: 'Baking', startInSeconds: 19.12 },
189
+ { text: 'powder', startInSeconds: 19.76 },
190
+ { text: 'And', startInSeconds: 20.68 },
191
+ { text: 'flour', startInSeconds: 21.14 },
192
+ { text: 'Just', startInSeconds: 21.66 },
193
+ { text: 'massage', startInSeconds: 22.06 },
194
+ { text: 'in', startInSeconds: 22.38 },
195
+ { text: 'the', startInSeconds: 22.5 },
196
+ { text: 'butter', startInSeconds: 22.84 },
197
+ { text: 'to', startInSeconds: 23.08 },
198
+ { text: 'give', startInSeconds: 23.18 },
199
+ { text: 'it', startInSeconds: 23.32 },
200
+ { text: 'the', startInSeconds: 23.42 },
201
+ { text: 'full', startInSeconds: 23.76 },
202
+ { text: 'treatment', startInSeconds: 24.28 },
203
+ { text: 'Fill', startInSeconds: 24.54 },
204
+ { text: 'it', startInSeconds: 24.7 },
205
+ { text: 'in', startInSeconds: 25.32 },
206
+ { text: 'Bake', startInSeconds: 26.1 },
207
+ { text: 'it', startInSeconds: 26.3 },
208
+ { text: 'for', startInSeconds: 26.48 },
209
+ { text: 'half', startInSeconds: 26.64 },
210
+ { text: 'an', startInSeconds: 26.86 },
211
+ { text: 'hour', startInSeconds: 27.06 },
212
+ { text: 'at', startInSeconds: 27.4 },
213
+ { text: '170', startInSeconds: 28.14 },
214
+ { text: 'degrees', startInSeconds: 28.6 },
215
+ { text: "It's", startInSeconds: 28.84 },
216
+ { text: 'time', startInSeconds: 29.04 },
217
+ { text: 'for', startInSeconds: 29.28 },
218
+ { text: 'the', startInSeconds: 29.42 },
219
+ { text: 'icing', startInSeconds: 29.7 },
220
+ { text: 'on', startInSeconds: 29.88 },
221
+ { text: 'the', startInSeconds: 30.0 },
222
+ { text: 'cake', startInSeconds: 30.56 },
223
+ { text: 'Time', startInSeconds: 33.14 },
224
+ { text: 'for', startInSeconds: 33.34 },
225
+ { text: 'the', startInSeconds: 33.46 },
226
+ { text: 'most', startInSeconds: 33.62 },
227
+ { text: 'critical', startInSeconds: 34.0 },
228
+ { text: 'part', startInSeconds: 34.76 },
229
+ { text: 'This', startInSeconds: 40.74 },
230
+ { text: 'is', startInSeconds: 40.86 },
231
+ { text: 'how', startInSeconds: 41.0 },
232
+ { text: 'it', startInSeconds: 41.1 },
233
+ { text: 'turned', startInSeconds: 41.32 },
234
+ { text: 'out', startInSeconds: 42.02 },
235
+ { text: 'Stupid', startInSeconds: 42.78 },
236
+ { text: 'idea,', startInSeconds: 43.36 },
237
+ { text: 'pretty', startInSeconds: 43.7 },
238
+ { text: 'bad', startInSeconds: 44.02 },
239
+ { text: 'execution', startInSeconds: 44.72 },
240
+ { text: 'I', startInSeconds: 45.88 },
241
+ { text: 'hope', startInSeconds: 46.06 },
242
+ { text: 'he', startInSeconds: 46.2 },
243
+ { text: 'likes', startInSeconds: 46.38 },
244
+ { text: 'it', startInSeconds: 46.58 },
245
+ { text: 'anyway', startInSeconds: 47.56 },
246
+ { text: 'Hey', startInSeconds: 49.52 },
247
+ { text: 'William', startInSeconds: 50.06 },
248
+ { text: 'Congrats', startInSeconds: 52.32 },
249
+ { text: 'We', startInSeconds: 54.56 },
250
+ { text: 'wanted', startInSeconds: 54.86 },
251
+ { text: 'to', startInSeconds: 55.0 },
252
+ { text: 'congratulate', startInSeconds: 55.58 },
253
+ { text: 'you', startInSeconds: 56.22 },
254
+ { text: 'on', startInSeconds: 56.54 },
255
+ { text: 'the', startInSeconds: 56.68 },
256
+ { text: '100,000', startInSeconds: 57.14 },
257
+ { text: 'You', startInSeconds: 58.86 },
258
+ { text: 'hear', startInSeconds: 59.06 },
259
+ { text: 'Joseph', startInSeconds: 59.4 },
260
+ { text: 'crying?', startInSeconds: 59.74 },
261
+ { text: 'Thank', startInSeconds: 61.04 },
262
+ { text: 'you', startInSeconds: 61.18 },
263
+ { text: 'so', startInSeconds: 61.44 },
264
+ { text: 'much', startInSeconds: 61.76 },
265
+ { text: '(electronic', startInSeconds: 62.88 },
266
+ { text: 'beeping)', startInSeconds: 63.02 },
267
+ ]);
268
+ });
@@ -0,0 +1,2 @@
1
+ import type { TranscriptionJson } from '../transcribe';
2
+ export declare const examplePayload: TranscriptionJson<true>;