@remotion/whisper-web 4.0.364 → 4.0.366

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -3,7 +3,7 @@
3
3
  "url": "https://github.com/remotion-dev/remotion/tree/main/packages/whisper-web"
4
4
  },
5
5
  "name": "@remotion/whisper-web",
6
- "version": "4.0.364",
6
+ "version": "4.0.366",
7
7
  "main": "dist/index.js",
8
8
  "sideEffects": false,
9
9
  "scripts": {
@@ -14,10 +14,10 @@
14
14
  "author": "Hunain Ahmed <junaidhunain6@gmail.com>",
15
15
  "license": "UNLICENSED",
16
16
  "dependencies": {
17
- "@remotion/captions": "4.0.364"
17
+ "@remotion/captions": "4.0.366"
18
18
  },
19
19
  "devDependencies": {
20
- "@remotion/eslint-config-internal": "4.0.364",
20
+ "@remotion/eslint-config-internal": "4.0.366",
21
21
  "eslint": "9.19.0"
22
22
  },
23
23
  "publishConfig": {
@@ -1,4 +0,0 @@
1
-
2
- $ prettier --experimental-cli src --check
3
- [?25l- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ Checking formatting...- Checking formatting...\ Checking formatting...| Checking formatting.../ src/get-loaded-models.ts- src/download-whisper-model.ts\ src/download-whisper-model.ts| src/download-whisper-model.ts/ src/simulate-progress.ts- src/download-model.ts\ src/download-model.ts| src/db/open-db.ts/ src/get-model-url.ts- src/db/get-object-from-db.ts\ src/db/get-object-from-db.ts| src/index.module.ts/ src/get-available-models.ts- src/can-use-whisper-web.ts\ src/resample-to-16khz.ts| src/delete-model.ts/ src/delete-model.ts[?25hChecking formatting...
4
- [?25hAll matched files use Prettier code style!
@@ -1,25 +0,0 @@
1
-
2
- $ eslint src
3
- =============
4
-
5
- WARNING: You are currently running a version of TypeScript which is not officially supported by @typescript-eslint/typescript-estree.
6
-
7
- You may find that it works just fine, or you may not.
8
-
9
- SUPPORTED TYPESCRIPT VERSIONS: >=4.7.4 <5.7.0
10
-
11
- YOUR TYPESCRIPT VERSION: 5.8.2
12
-
13
- Please only submit bug reports when using the officially supported version.
14
-
15
- =============
16
- 
17
- /Users/jonathanburger/remotion/packages/whisper-web/src/resample-to-16khz.ts
18
-  47:50 warning Async arrow function has no 'await' expression require-await
19
- 
20
- /Users/jonathanburger/remotion/packages/whisper-web/src/transcribe.ts
21
-  18:60 warning Unexpected any. Specify a different type @typescript-eslint/no-explicit-any
22
-  51:55 warning Unexpected any. Specify a different type @typescript-eslint/no-explicit-any
23
- 
24
- ✖ 3 problems (0 errors, 3 warnings)
25
- 
@@ -1,3 +0,0 @@
1
-
2
- $ tsc -d && bun --env-file=../.env.bundle bundle.ts
3
- [16.38ms] Generated.
package/build-wasm.ts DELETED
@@ -1,90 +0,0 @@
1
- // TODO: there's no indication of error in case worker.js is fails to dynamically import
2
-
3
- import {$} from 'bun';
4
- import fs from 'fs';
5
- import os from 'os';
6
- import path from 'path';
7
-
8
- const randomDir = os.tmpdir();
9
-
10
- const wasmDir = path.join(randomDir, 'whisper-web');
11
-
12
- if (fs.existsSync(wasmDir)) {
13
- fs.rmSync(wasmDir, {recursive: true});
14
- }
15
-
16
- fs.mkdirSync(wasmDir, {recursive: true});
17
-
18
- const cwd = path.join(wasmDir, 'build-em');
19
-
20
- await $`git clone https://github.com/ggerganov/whisper.cpp ${wasmDir}`;
21
- await $`git checkout v1.7.5`.cwd(wasmDir);
22
-
23
- fs.mkdirSync(cwd);
24
-
25
- const cmakeListsFile = path.join(
26
- wasmDir,
27
- 'examples',
28
- 'whisper.wasm',
29
- 'CMakeLists.txt',
30
- );
31
-
32
- const file = fs.readFileSync(cmakeListsFile, 'utf8');
33
-
34
- // Disable Node.JS target, compile with assertions to get stack trace
35
- const lines = file.split('\n').map((line) => {
36
- if (line.includes('-s FORCE_FILESYSTEM=1 \\')) {
37
- // output ES6 module so we can import it dynamically without injeting the script tag
38
- return `-s FORCE_FILESYSTEM=1 -s ENVIRONMENT='web,worker' -s EXPORT_ES6=1 -s MODULARIZE=1 -s EXPORT_NAME=\\"createModule\\" \\`;
39
- }
40
-
41
- if (line.includes('-s EXPORTED_RUNTIME_METHODS')) {
42
- return `-s EXPORTED_RUNTIME_METHODS=\\"['print', 'printErr', 'ccall', 'cwrap', 'HEAPU8']\\" \\`;
43
- }
44
-
45
- return line;
46
- });
47
-
48
- fs.writeFileSync(
49
- cmakeListsFile,
50
- [
51
- ...lines,
52
- // Generate a .d.ts file
53
- `set(CMAKE_CXX_FLAGS "$\{CMAKE_CXX_FLAGS\} -lembind --emit-tsd $\{CMAKE_CURRENT_BINARY_DIR\}/interface.d.ts")`,
54
- ].join('\n'),
55
- );
56
-
57
- const emscriptenFilePath = path.join(
58
- wasmDir,
59
- 'examples',
60
- 'whisper.wasm',
61
- 'emscripten.cpp',
62
- );
63
- //now get our version
64
- const modifiedVersion = fs.readFileSync('./emscripten.cpp', 'utf8');
65
- fs.writeFileSync(emscriptenFilePath, modifiedVersion);
66
-
67
- // brew install emscripten if necessary
68
- await $`emcmake cmake ..`.cwd(cwd);
69
- await $`make -j`.cwd(cwd);
70
-
71
- const mainJsFile = path.join(cwd, 'bin', 'whisper.wasm', 'main.js');
72
-
73
- let content = fs
74
- .readFileSync(mainJsFile, 'utf8')
75
- .replace('libmain.js', './main.js');
76
-
77
- // Write the modified content directly to the destination
78
- fs.writeFileSync(path.join(__dirname, 'main.js'), content, 'utf8');
79
-
80
- const dTsFile = path.join(
81
- wasmDir,
82
- 'build-em',
83
- 'examples',
84
- 'whisper.wasm',
85
- 'interface.d.ts',
86
- );
87
-
88
- fs.copyFileSync(dTsFile, path.join(__dirname, 'main.d.ts'));
89
-
90
- fs.rmSync(wasmDir, {recursive: true});
package/bundle.ts DELETED
@@ -1,15 +0,0 @@
1
- import {buildPackage} from '../.monorepo/builder';
2
-
3
- await buildPackage({
4
- formats: {
5
- cjs: 'use-tsc',
6
- esm: 'build',
7
- },
8
- external: ['./main.js', './worker.js'],
9
- entrypoints: [
10
- {
11
- path: 'src/index.module.ts',
12
- target: 'browser',
13
- },
14
- ],
15
- });
package/emscripten.cpp DELETED
@@ -1,303 +0,0 @@
1
- #include "whisper.h"
2
-
3
- #include <emscripten.h>
4
- #include <emscripten/bind.h>
5
- #include <iostream>
6
- #include <vector>
7
- #include <thread>
8
-
9
- std::thread g_worker;
10
-
11
- static inline int mpow2(int n) {
12
- int p = 1;
13
- while (p <= n) p *= 2;
14
- return p / 2;
15
- }
16
-
17
- static char * escape_double_quotes_and_backslashes(const char * str) {
18
- if (str == NULL) {
19
- return NULL;
20
- }
21
-
22
- size_t escaped_length = strlen(str) + 1;
23
-
24
- for (size_t i = 0; str[i] != '\0'; i++) {
25
- if (str[i] == '"' || str[i] == '\\') {
26
- escaped_length++;
27
- }
28
- }
29
-
30
- char * escaped = (char *)calloc(escaped_length, 1); // pre-zeroed
31
- if (escaped == NULL) {
32
- return NULL;
33
- }
34
-
35
- size_t pos = 0;
36
- for (size_t i = 0; str[i] != '\0'; i++) {
37
- if (str[i] == '"' || str[i] == '\\') {
38
- escaped[pos++] = '\\';
39
- }
40
- escaped[pos++] = str[i];
41
- }
42
-
43
- // no need to set zero due to calloc() being used prior
44
-
45
- return escaped;
46
- }
47
-
48
- // 500 -> 00:05.000
49
- // 6000 -> 01:00.000
50
- std::string to_timestamp(int64_t t, bool comma) {
51
- int64_t msec = t * 10;
52
- int64_t hr = msec / (1000 * 60 * 60);
53
- msec = msec - hr * (1000 * 60 * 60);
54
- int64_t min = msec / (1000 * 60);
55
- msec = msec - min * (1000 * 60);
56
- int64_t sec = msec / 1000;
57
- msec = msec - sec * 1000;
58
-
59
- char buf[32];
60
- snprintf(buf, sizeof(buf), "%02d:%02d:%02d%s%03d", (int) hr, (int) min, (int) sec, comma ? "," : ".", (int) msec);
61
-
62
- return std::string(buf);
63
- }
64
-
65
- static bool output_json(
66
- struct whisper_context * ctx,
67
- bool final,
68
- int first_segment,
69
- int n_segments) {
70
- int indent = 0;
71
- std::string output;
72
-
73
-
74
- auto doindent = [&]() {
75
- for (int i = 0; i < indent; i++) output += "\t";
76
- };
77
-
78
- auto start_arr = [&](const char *name) {
79
- doindent();
80
- output += "\"" + std::string(name) + "\": [";
81
- indent++;
82
- };
83
-
84
- auto end_arr = [&](bool end) {
85
- indent--;
86
- doindent();
87
- output += (end ? "]" : "],");
88
- };
89
-
90
- auto start_obj = [&](const char *name) {
91
- doindent();
92
- if (name) {
93
- output += "\"" + std::string(name) + "\": {";
94
- } else {
95
- output += "{";
96
- }
97
- indent++;
98
- };
99
-
100
- auto end_obj = [&](bool end) {
101
- indent--;
102
- doindent();
103
- output += (end ? "}" : "},");
104
- };
105
-
106
- auto start_value = [&](const char *name) {
107
- doindent();
108
- output += "\"" + std::string(name) + "\": ";
109
- };
110
-
111
- auto value_s = [&](const char *name, const char *val, bool end) {
112
- start_value(name);
113
- char * val_escaped = escape_double_quotes_and_backslashes(val);
114
- output += "\"" + std::string(val_escaped) + (end ? "\"" : "\",");
115
- free(val_escaped);
116
- };
117
-
118
- auto end_value = [&](bool end) {
119
- output += (end ? "" : ",");
120
- };
121
-
122
- auto value_i = [&](const char *name, const int64_t val, bool end) {
123
- start_value(name);
124
- output += std::to_string(val);
125
- end_value(end);
126
- };
127
-
128
- auto value_f = [&](const char *name, const float val, bool end) {
129
- start_value(name);
130
- output += std::to_string(val);
131
- end_value(end);
132
- };
133
-
134
- auto value_b = [&](const char *name, const bool val, bool end) {
135
- start_value(name);
136
- output += (val ? "true" : "false");
137
- end_value(end);
138
- };
139
-
140
- auto times_o = [&](int64_t t0, int64_t t1, bool end) {
141
- start_obj("timestamps");
142
- value_s("from", to_timestamp(t0, true).c_str(), false);
143
- value_s("to", to_timestamp(t1, true).c_str(), true);
144
- end_obj(false);
145
- start_obj("offsets");
146
- value_i("from", t0 * 10, false);
147
- value_i("to", t1 * 10, true);
148
- end_obj(end);
149
- };
150
-
151
- start_obj(nullptr);
152
- value_s("systeminfo", whisper_print_system_info(), false);
153
- start_obj("model");
154
- value_s("type", whisper_model_type_readable(ctx), false);
155
- value_b("multilingual", whisper_is_multilingual(ctx), false);
156
- value_i("vocab", whisper_model_n_vocab(ctx), false);
157
- start_obj("audio");
158
- value_i("ctx", whisper_model_n_audio_ctx(ctx), false);
159
- value_i("state", whisper_model_n_audio_state(ctx), false);
160
- value_i("head", whisper_model_n_audio_head(ctx), false);
161
- value_i("layer", whisper_model_n_audio_layer(ctx), true);
162
- end_obj(false);
163
- start_obj("text");
164
- value_i("ctx", whisper_model_n_text_ctx(ctx), false);
165
- value_i("state", whisper_model_n_text_state(ctx), false);
166
- value_i("head", whisper_model_n_text_head(ctx), false);
167
- value_i("layer", whisper_model_n_text_layer(ctx), true);
168
- end_obj(false);
169
- value_i("mels", whisper_model_n_mels(ctx), false);
170
- value_i("ftype", whisper_model_ftype(ctx), true);
171
- end_obj(false);
172
- start_obj("result");
173
- value_s("language", whisper_lang_str(whisper_full_lang_id(ctx)), true);
174
- end_obj(false);
175
- start_arr("transcription");
176
-
177
- for (int i = first_segment; i < n_segments; ++i) {
178
- const char * text = whisper_full_get_segment_text(ctx, i);
179
-
180
- const int64_t t0 = whisper_full_get_segment_t0(ctx, i);
181
- const int64_t t1 = whisper_full_get_segment_t1(ctx, i);
182
-
183
- start_obj(nullptr);
184
- times_o(t0, t1, false);
185
- value_s("text", text, false);
186
-
187
- start_arr("tokens");
188
- const int n = whisper_full_n_tokens(ctx, i);
189
- for (int j = 0; j < n; ++j) {
190
- auto token = whisper_full_get_token_data(ctx, i, j);
191
- start_obj(nullptr);
192
- value_s("text", whisper_token_to_str(ctx, token.id), false);
193
- if(token.t0 > -1 && token.t1 > -1) {
194
- // If we have per-token timestamps, write them out
195
- times_o(token.t0, token.t1, false);
196
- }
197
- value_i("id", token.id, false);
198
- value_f("p", token.p, false);
199
- value_f("t_dtw", token.t_dtw, true);
200
- end_obj(j == (n - 1));
201
- }
202
- end_arr(true);
203
-
204
- end_obj(i == (n_segments - 1));
205
- }
206
-
207
- end_arr(true);
208
- end_obj(true);
209
-
210
- if (final) {
211
- printf("remotion_final:%s\n", output.c_str());
212
- } else {
213
- printf("remotion_update:%s\n", output.c_str());
214
- }
215
-
216
- return true;
217
- }
218
-
219
-
220
- void whisper_print_segment_callback(struct whisper_context * ctx, struct whisper_state * /*state*/, int n_new, void * user_data) {
221
- const int n_segments = whisper_full_n_segments(ctx);
222
- const int s0 = n_segments - n_new;
223
-
224
- if (s0 == 0) {
225
- printf("\n");
226
- }
227
-
228
- output_json(ctx, false, s0, n_segments);
229
- }
230
-
231
- // Define the progress callback function
232
- void progress_callback(struct whisper_context * ctx, struct whisper_state * state, int progress, void * user_data) {
233
- printf("remotion_progress:%d%%\n", progress);
234
- }
235
-
236
- std::vector<struct whisper_context *> g_contexts(1, nullptr);
237
-
238
-
239
- EMSCRIPTEN_BINDINGS(whisper) {
240
- emscripten::function("full_default", emscripten::optional_override([](const std::string & path_model, const emscripten::val & audio, const std::string & model, const std::string & lang, int nthreads, bool translate) {
241
- if (g_contexts[0] != nullptr) {
242
- printf("remotion_busy:\n");
243
- return 0;
244
- }
245
-
246
- g_contexts[0] = whisper_init_from_file_with_params(path_model.c_str(), whisper_context_default_params());
247
-
248
- struct whisper_full_params params = whisper_full_default_params(whisper_sampling_strategy::WHISPER_SAMPLING_GREEDY);
249
-
250
- std::vector<float> pcmf32;
251
-
252
- params.print_realtime = false;
253
- params.new_segment_callback = whisper_print_segment_callback;
254
- params.print_progress = false;
255
- params.print_timestamps = false;
256
- params.print_special = false;
257
- params.translate = translate;
258
- params.token_timestamps = true;
259
- params.language = lang.c_str(); // Convert std::string to const char*
260
- params.n_threads = std::min(nthreads, std::min(16, mpow2(std::thread::hardware_concurrency())));
261
- params.offset_ms = 0;
262
- params.progress_callback = progress_callback; // Assigning the callback
263
-
264
- const int n = audio["length"].as<int>();
265
-
266
- emscripten::val heap = emscripten::val::module_property("HEAPU8");
267
- emscripten::val memory = heap["buffer"];
268
-
269
- pcmf32.resize(n);
270
-
271
- emscripten::val memoryView = audio["constructor"].new_(memory, reinterpret_cast<uintptr_t>(pcmf32.data()), n);
272
- memoryView.call<void>("set", audio);
273
-
274
- // Print system information
275
- {
276
- printf("system_info: n_threads = %d / %d | %s\n",
277
- params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
278
-
279
- printf("%s: processing %d samples, %.1f sec, %d threads, lang = %s, task = %s ...\n",
280
- __func__, int(pcmf32.size()), float(pcmf32.size()) / WHISPER_SAMPLE_RATE,
281
- params.n_threads,
282
- params.language,
283
- params.translate ? "translate" : "transcribe");
284
-
285
- printf("\n");
286
- }
287
-
288
-
289
- // Run the worker
290
- {
291
- g_worker = std::thread([params, pcm = std::move(pcmf32)]() {
292
- whisper_reset_timings(g_contexts[0]);
293
- whisper_full(g_contexts[0], params, pcm.data(), pcm.size());
294
- const int n_segments = whisper_full_n_segments(g_contexts[0]);
295
- output_json(g_contexts[0], true, 0, n_segments);
296
- whisper_free(g_contexts[0]);
297
- g_contexts[0] = nullptr;
298
- });
299
- }
300
-
301
- return 0;
302
- }));
303
- }
package/eslint.config.mjs DELETED
@@ -1,5 +0,0 @@
1
- import {remotionFlatConfig} from '@remotion/eslint-config-internal';
2
-
3
- const config = remotionFlatConfig({react: false});
4
-
5
- export default [config];
@@ -1,103 +0,0 @@
1
- import type {WhisperWebModel} from './constants';
2
- import {SIZES} from './constants';
3
-
4
- export enum WhisperWebUnsupportedReason {
5
- WindowUndefined = 'window-undefined',
6
- IndexedDbUnavailable = 'indexed-db-unavailable',
7
- NavigatorStorageUnavailable = 'navigator-storage-unavailable',
8
- StorageEstimationApiUnavailable = 'storage-estimation-api-unavailable',
9
- QuotaUndefined = 'quota-undefined',
10
- UsageUndefined = 'usage-undefined',
11
- NotEnoughSpace = 'not-enough-space',
12
- ErrorEstimatingStorage = 'error-estimating-storage',
13
- NotCrossOriginIsolated = 'not-cross-origin-isolated',
14
- }
15
-
16
- export interface CanUseWhisperWebResult {
17
- supported: boolean;
18
- reason?: WhisperWebUnsupportedReason;
19
- detailedReason?: string;
20
- }
21
-
22
- export const canUseWhisperWeb = async (
23
- model: WhisperWebModel,
24
- ): Promise<CanUseWhisperWebResult> => {
25
- if (typeof window === 'undefined') {
26
- return {
27
- supported: false,
28
- reason: WhisperWebUnsupportedReason.WindowUndefined,
29
- detailedReason:
30
- '`window` is not defined. This module can only be used in a browser environment.',
31
- };
32
- }
33
-
34
- if (!window.crossOriginIsolated) {
35
- return {
36
- supported: false,
37
- reason: WhisperWebUnsupportedReason.NotCrossOriginIsolated,
38
- detailedReason:
39
- 'The document is not cross-origin isolated (window.crossOriginIsolated = false). This prevents the usage of SharedArrayBuffer, which is required by `@remotion/whisper-web`. Make sure the document is served with the HTTP header `Cross-Origin-Opener-Policy: same-origin` and `Cross-Origin-Embedder-Policy: require-corp`: https://remotion.dev/docs/miscellaneous/cross-origin-isolation',
40
- };
41
- }
42
-
43
- if (!window.indexedDB) {
44
- return {
45
- supported: false,
46
- reason: WhisperWebUnsupportedReason.IndexedDbUnavailable,
47
- detailedReason: 'IndexedDB is not available in this environment.',
48
- };
49
- }
50
-
51
- if (!navigator?.storage || !navigator?.storage.estimate) {
52
- return {
53
- supported: false,
54
- reason: WhisperWebUnsupportedReason.NavigatorStorageUnavailable,
55
- detailedReason:
56
- '`navigator.storage.estimate()` API is not available in this environment.',
57
- };
58
- }
59
-
60
- try {
61
- const estimate = await navigator.storage.estimate();
62
-
63
- if (estimate.quota === undefined) {
64
- return {
65
- supported: false,
66
- reason: WhisperWebUnsupportedReason.QuotaUndefined,
67
- detailedReason:
68
- 'navigator.storage.estimate() API returned undefined quota.',
69
- };
70
- }
71
-
72
- if (estimate.usage === undefined) {
73
- return {
74
- supported: false,
75
- reason: WhisperWebUnsupportedReason.UsageUndefined,
76
- detailedReason:
77
- 'navigator.storage.estimate() API returned undefined usage.',
78
- };
79
- }
80
-
81
- const remaining = estimate.quota - estimate.usage;
82
- const modelSize = SIZES[model];
83
-
84
- if (remaining < modelSize) {
85
- return {
86
- supported: false,
87
- reason: WhisperWebUnsupportedReason.NotEnoughSpace,
88
- detailedReason: `Not enough space to download the model. Required: ${modelSize} bytes, Available: ${remaining} bytes.`,
89
- };
90
- }
91
- } catch (error) {
92
- const errorMessage = error instanceof Error ? error.message : String(error);
93
- return {
94
- supported: false,
95
- reason: WhisperWebUnsupportedReason.ErrorEstimatingStorage,
96
- detailedReason: `Error estimating storage: ${errorMessage}`,
97
- };
98
- }
99
-
100
- return {
101
- supported: true,
102
- };
103
- };