@remotion/whisper-web 4.0.364 → 4.0.366

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +0,0 @@
1
- // ⚠️⚠️⚠️⚠️⚠️!! Intentionally putting this in a subdirectory, so it is 2 directories deep
2
- // That way it can be imported when the output is dist/esm/index.js
3
-
4
- import type {printHandler} from '../print-handler.js';
5
-
6
- export const loadMod = async ({
7
- handler,
8
- }: {
9
- handler: ReturnType<typeof printHandler>;
10
- }) => {
11
- const createModule = await import('../../main.js').then((mod) => mod.default);
12
-
13
- const Module = await createModule({
14
- print: handler,
15
- printErr: handler,
16
- });
17
-
18
- return Module;
19
- };
package/src/log.ts DELETED
@@ -1,41 +0,0 @@
1
- /* eslint-disable no-console */
2
- export const logLevels = ['trace', 'verbose', 'info', 'warn', 'error'] as const;
3
-
4
- export type LogLevel = (typeof logLevels)[number];
5
-
6
- const getNumberForLogLevel = (level: LogLevel) => {
7
- return logLevels.indexOf(level);
8
- };
9
-
10
- export const isEqualOrBelowLogLevel = (
11
- currentLevel: LogLevel,
12
- level: LogLevel,
13
- ) => {
14
- return getNumberForLogLevel(currentLevel) <= getNumberForLogLevel(level);
15
- };
16
-
17
- export const Log = {
18
- trace: (logLevel: LogLevel, ...args: Parameters<typeof console.log>) => {
19
- if (isEqualOrBelowLogLevel(logLevel, 'trace')) {
20
- return console.log(...args);
21
- }
22
- },
23
- verbose: (logLevel: LogLevel, ...args: Parameters<typeof console.log>) => {
24
- if (isEqualOrBelowLogLevel(logLevel, 'verbose')) {
25
- return console.log(...args);
26
- }
27
- },
28
- info: (logLevel: LogLevel, ...args: Parameters<typeof console.log>) => {
29
- if (isEqualOrBelowLogLevel(logLevel, 'info')) {
30
- return console.log(...args);
31
- }
32
- },
33
- warn: (logLevel: LogLevel, ...args: Parameters<typeof console.log>) => {
34
- if (isEqualOrBelowLogLevel(logLevel, 'warn')) {
35
- return console.warn(...args);
36
- }
37
- },
38
- error: (...args: Parameters<typeof console.log>) => {
39
- return console.error(...args);
40
- },
41
- };
package/src/mod.ts DELETED
@@ -1,13 +0,0 @@
1
- import type {TranscriptionJson} from './result';
2
-
3
- export type ModelState = {
4
- transcriptionProgressPlayback: null | ((progress: number) => void);
5
- transcriptionChunkPlayback:
6
- | null
7
- | ((
8
- timestampStart: string,
9
- timestampEnd: string,
10
- textOnly: string,
11
- ) => void);
12
- resolver: null | ((transcript: TranscriptionJson) => void);
13
- };
@@ -1,39 +0,0 @@
1
- import type {LogLevel} from './log';
2
- import {Log} from './log';
3
- import type {TranscriptionJson} from './result';
4
-
5
- const RESULT_TOKEN = 'remotion_final:';
6
- const PROGRESS_TOKEN = 'remotion_progress:';
7
- const UPDATE_TOKEN = 'remotion_update:';
8
- const BUSY_TOKEN = 'remotion_busy:';
9
-
10
- export const printHandler = ({
11
- onProgress,
12
- onDone,
13
- onBusy,
14
- onUpdate,
15
- logLevel,
16
- }: {
17
- onProgress: (value: number) => void;
18
- onBusy: () => void;
19
- onDone: (value: TranscriptionJson) => void;
20
- onUpdate: (value: TranscriptionJson) => void;
21
- logLevel: LogLevel;
22
- }) => {
23
- return (text: string) => {
24
- Log.verbose(logLevel, text);
25
-
26
- if (text.startsWith(PROGRESS_TOKEN)) {
27
- const value = parseInt(text.slice(PROGRESS_TOKEN.length), 10);
28
- onProgress(value);
29
- } else if (text.startsWith(RESULT_TOKEN)) {
30
- const json = JSON.parse(text.slice(RESULT_TOKEN.length));
31
- onDone(json);
32
- } else if (text.startsWith(UPDATE_TOKEN)) {
33
- const json = JSON.parse(text.slice(UPDATE_TOKEN.length));
34
- onUpdate(json);
35
- } else if (text.startsWith(BUSY_TOKEN)) {
36
- onBusy();
37
- }
38
- };
39
- };
@@ -1,105 +0,0 @@
1
- import type {LogLevel} from './log';
2
- import {Log} from './log';
3
-
4
- // this is a requirement of whisper.cpp
5
- export const EXPECTED_SAMPLE_RATE = 16000;
6
-
7
- let context: AudioContext | undefined;
8
-
9
- const getAudioContext = () => {
10
- if (!context) {
11
- context = new AudioContext({
12
- sampleRate: EXPECTED_SAMPLE_RATE,
13
- });
14
- }
15
-
16
- return context;
17
- };
18
-
19
- const audioDecoder = async (
20
- audioBuffer: AudioBuffer,
21
- ): Promise<Float32Array> => {
22
- const offlineContext = new OfflineAudioContext(
23
- audioBuffer.numberOfChannels,
24
- audioBuffer.length,
25
- audioBuffer.sampleRate,
26
- );
27
-
28
- const source = offlineContext.createBufferSource();
29
- source.buffer = audioBuffer;
30
- source.connect(offlineContext.destination);
31
- source.start(0);
32
-
33
- const renderedBuffer = await offlineContext.startRendering();
34
- return renderedBuffer.getChannelData(0);
35
- };
36
-
37
- export type ResampleTo16KhzParams = {
38
- file: Blob;
39
- onProgress?: (p: number) => void;
40
- logLevel?: LogLevel;
41
- };
42
-
43
- export const resampleTo16Khz = async ({
44
- file,
45
- onProgress,
46
- logLevel = 'info',
47
- }: ResampleTo16KhzParams): Promise<Float32Array> => {
48
- Log.info(logLevel, `Starting resampling for file, size: ${file.size}`);
49
- onProgress?.(0);
50
-
51
- if (typeof window === 'undefined') {
52
- Log.error(
53
- logLevel,
54
- 'Window object not found. Resampling can only be done in a browser environment.',
55
- );
56
- throw new Error(
57
- 'Window object not found. Resampling requires a browser environment.',
58
- );
59
- }
60
-
61
- if (!file) {
62
- Log.error(logLevel, 'File is empty.');
63
- throw new Error('File is empty');
64
- }
65
-
66
- const innerContext = getAudioContext();
67
- const reader = new FileReader();
68
-
69
- return new Promise<Float32Array>((resolve, reject) => {
70
- reader.onprogress = (event) => {
71
- if (event.lengthComputable) {
72
- const percentage = (event.loaded / event.total) * 0.5; // File reading up to 50%
73
- onProgress?.(Math.min(0.5, percentage));
74
- }
75
- };
76
-
77
- reader.onload = async () => {
78
- try {
79
- Log.info(logLevel, 'File reading complete. Decoding audio data...');
80
- onProgress?.(0.5);
81
- const buffer = new Uint8Array(reader.result as ArrayBuffer);
82
- const audioBuffer = await innerContext.decodeAudioData(
83
- buffer.buffer as ArrayBuffer,
84
- );
85
- Log.info(logLevel, 'Audio decoding complete. Starting rendering...');
86
- onProgress?.(0.75);
87
-
88
- const processedAudio = await audioDecoder(audioBuffer);
89
- Log.info(logLevel, 'Audio resampling and processing complete.');
90
- onProgress?.(1);
91
- resolve(processedAudio);
92
- } catch (error) {
93
- Log.error(logLevel, 'Error during audio processing:', error);
94
- reject(error);
95
- }
96
- };
97
-
98
- reader.onerror = () => {
99
- Log.error(logLevel, 'File reading failed.');
100
- reject(new Error('File reading failed'));
101
- };
102
-
103
- reader.readAsArrayBuffer(file);
104
- });
105
- };
package/src/result.ts DELETED
@@ -1,59 +0,0 @@
1
- type Result = {
2
- language: string;
3
- };
4
-
5
- type Model = {
6
- type: string;
7
- multilingual: boolean;
8
- vocab: number;
9
- audio: {
10
- ctx: number;
11
- state: number;
12
- head: number;
13
- layer: number;
14
- };
15
- text: {
16
- ctx: number;
17
- state: number;
18
- head: number;
19
- layer: number;
20
- };
21
- mels: number;
22
- ftype: number;
23
- };
24
-
25
- type Timestamps = {
26
- from: string;
27
- to: string;
28
- };
29
-
30
- type Offsets = {
31
- from: number;
32
- to: number;
33
- };
34
-
35
- type TranscriptionItem = {
36
- timestamps: Timestamps;
37
- offsets: Offsets;
38
- text: string;
39
- };
40
-
41
- export type WordLevelToken = {
42
- t_dtw: number;
43
- text: string;
44
- timestamps: Timestamps;
45
- offsets: Offsets;
46
- id: number;
47
- p: number;
48
- };
49
-
50
- export type TranscriptionItemWithTimestamp = TranscriptionItem & {
51
- tokens: WordLevelToken[];
52
- };
53
-
54
- export type TranscriptionJson = {
55
- systeminfo: string;
56
- model: Model;
57
- result: Result;
58
- transcription: TranscriptionItemWithTimestamp[];
59
- };
@@ -1,74 +0,0 @@
1
- import {
2
- getActualTranscriptionSpeedInMilliseconds,
3
- NEW_PROGRESS_EVENT_EVERY_N_SECONDS,
4
- storeActualTranscriptionSpeed,
5
- } from './transcription-speed';
6
-
7
- export const simulateProgress = ({
8
- audioDurationInSeconds,
9
- onProgress,
10
- }: {
11
- audioDurationInSeconds: number;
12
- onProgress: (num: number) => void;
13
- }) => {
14
- let progress = 0;
15
-
16
- const progressSteps =
17
- audioDurationInSeconds / NEW_PROGRESS_EVENT_EVERY_N_SECONDS;
18
-
19
- let progressStepsReceived = 0;
20
- let timer: NodeJS.Timer | null = null;
21
- let lastTimerStart: number | null = null;
22
-
23
- const start = () => {
24
- const speed = getActualTranscriptionSpeedInMilliseconds();
25
-
26
- let iterations = 0;
27
- lastTimerStart = Date.now();
28
-
29
- timer = setInterval(() => {
30
- progress += 1 / NEW_PROGRESS_EVENT_EVERY_N_SECONDS / (progressSteps + 1);
31
- progress = Math.min(progress, 0.99);
32
- onProgress(progress);
33
- iterations += 1;
34
- if (iterations > NEW_PROGRESS_EVENT_EVERY_N_SECONDS - 1 && timer) {
35
- clearInterval(timer);
36
- timer = null;
37
- }
38
- }, speed / NEW_PROGRESS_EVENT_EVERY_N_SECONDS);
39
- };
40
-
41
- return {
42
- start,
43
- progressStepReceived: () => {
44
- progressStepsReceived += 1;
45
- progress = progressStepsReceived / progressSteps;
46
- if (timer) {
47
- clearInterval(timer);
48
- timer = null;
49
- }
50
-
51
- if (lastTimerStart) {
52
- const timeToProcessChunk = Date.now() - (lastTimerStart ?? Date.now());
53
- storeActualTranscriptionSpeed(timeToProcessChunk);
54
- }
55
-
56
- start();
57
- },
58
- onDone: () => {
59
- if (timer) {
60
- clearInterval(timer);
61
- timer = null;
62
- }
63
-
64
- progress = 1;
65
- onProgress(1);
66
- },
67
- abort: () => {
68
- if (timer) {
69
- clearInterval(timer);
70
- timer = null;
71
- }
72
- },
73
- };
74
- };
@@ -1,37 +0,0 @@
1
- import type {Caption} from '@remotion/captions';
2
- import type {TranscriptionItemWithTimestamp, TranscriptionJson} from './result';
3
-
4
- type ToCaptionsInput = {
5
- whisperWebOutput: TranscriptionJson | TranscriptionItemWithTimestamp[];
6
- };
7
-
8
- type ToCaptionsOutput = {
9
- captions: Caption[];
10
- };
11
-
12
- export const toCaptions = (input: ToCaptionsInput): ToCaptionsOutput => {
13
- const transcription =
14
- 'transcription' in input.whisperWebOutput
15
- ? input.whisperWebOutput.transcription
16
- : input.whisperWebOutput;
17
-
18
- const captions: Caption[] = [];
19
-
20
- for (const item of transcription) {
21
- if (item.text === '') {
22
- continue;
23
- }
24
-
25
- for (const token of item.tokens) {
26
- captions.push({
27
- text: captions.length === 0 ? token.text.trimStart() : token.text,
28
- startMs: token.offsets.from,
29
- endMs: token.offsets.to,
30
- timestampMs: token.t_dtw === -1 ? null : token.t_dtw * 10,
31
- confidence: token.p,
32
- });
33
- }
34
- }
35
-
36
- return {captions};
37
- };
package/src/transcribe.ts DELETED
@@ -1,173 +0,0 @@
1
- /* eslint-disable new-cap */
2
- import type {MainModule} from '../main';
3
- import type {WhisperWebLanguage, WhisperWebModel} from './constants';
4
- import {getObject} from './db/get-object-from-db';
5
- import {getModelUrl} from './get-model-url';
6
- import {loadMod} from './load-mod/load-mod';
7
- import type {LogLevel} from './log';
8
- import {Log} from './log';
9
- import {printHandler} from './print-handler';
10
- import {EXPECTED_SAMPLE_RATE} from './resample-to-16khz';
11
- import type {TranscriptionItemWithTimestamp, TranscriptionJson} from './result';
12
- import {simulateProgress} from './simulate-progress';
13
- const MAX_THREADS_ALLOWED = 16;
14
- const DEFAULT_THREADS = 4;
15
-
16
- declare global {
17
- interface Window {
18
- remotion_wasm_moduleOverrides?: Record<string, (...args: any[]) => void>;
19
- }
20
- }
21
-
22
- interface WithResolvers<T> {
23
- promise: Promise<T>;
24
- resolve: (value: T | PromiseLike<T>) => void;
25
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
26
- reject: (reason?: any) => void;
27
- }
28
-
29
- const withResolvers = function <T>() {
30
- let resolve: WithResolvers<T>['resolve'];
31
- let reject: WithResolvers<T>['reject'];
32
- const promise = new Promise<T>((res, rej) => {
33
- resolve = res;
34
- reject = rej;
35
- });
36
- return {promise, resolve: resolve!, reject: reject!};
37
- };
38
-
39
- export type TranscribeParams = {
40
- channelWaveform: Float32Array;
41
- model: WhisperWebModel;
42
- language?: WhisperWebLanguage;
43
- onProgress?: (p: number) => void;
44
- onTranscriptionChunk?: (
45
- transcription: TranscriptionItemWithTimestamp[],
46
- ) => void;
47
- threads?: number;
48
- logLevel?: LogLevel;
49
- };
50
-
51
- const storeFS = (mod: MainModule, fname: string, buf: any) => {
52
- try {
53
- mod.FS_unlink(fname);
54
- } catch {
55
- // ignore
56
- }
57
-
58
- mod.FS_createDataFile('/', fname, buf, true, true, undefined);
59
- };
60
-
61
- export const transcribe = async ({
62
- channelWaveform,
63
- model,
64
- language = 'auto',
65
- onProgress,
66
- threads,
67
- onTranscriptionChunk,
68
- logLevel = 'info',
69
- }: TranscribeParams): Promise<TranscriptionJson> => {
70
- if (!channelWaveform || channelWaveform.length === 0) {
71
- Log.error(logLevel, 'No audio data provided or audio data is empty.');
72
- throw new Error('No audio data provided or audio data is empty.');
73
- }
74
-
75
- Log.info(
76
- logLevel,
77
- `Starting transcription with model: ${model}, language: ${language}, threads: ${
78
- threads ?? DEFAULT_THREADS
79
- }`,
80
- );
81
-
82
- if ((threads ?? DEFAULT_THREADS) > MAX_THREADS_ALLOWED) {
83
- Log.warn(
84
- logLevel,
85
- `Thread limit exceeded: Used ${
86
- threads ?? DEFAULT_THREADS
87
- }, max ${MAX_THREADS_ALLOWED} allowed.`,
88
- );
89
- return Promise.reject(
90
- new Error(`Thread limit exceeded: max ${MAX_THREADS_ALLOWED} allowed.`),
91
- );
92
- }
93
-
94
- const audioDurationInSeconds = channelWaveform.length / EXPECTED_SAMPLE_RATE;
95
-
96
- const {
97
- abort: abortProgress,
98
- onDone: onProgressDone,
99
- progressStepReceived,
100
- start: startProgress,
101
- } = simulateProgress({
102
- audioDurationInSeconds,
103
- onProgress: (p) => {
104
- onProgress?.(p);
105
- },
106
- });
107
-
108
- const {
109
- promise,
110
- resolve: _resolve,
111
- reject: _reject,
112
- } = withResolvers<TranscriptionJson>();
113
-
114
- const resolve = (value: TranscriptionJson) => {
115
- _resolve(value);
116
- abortProgress();
117
- Log.info(logLevel, 'Transcription completed successfully.');
118
- };
119
-
120
- const reject = (reason: Error) => {
121
- _reject(reason);
122
- abortProgress();
123
- Log.error('Transcription failed:', reason);
124
- };
125
-
126
- const handler = printHandler({
127
- logLevel,
128
- onProgress: (p: number) => {
129
- if (p === 0) {
130
- startProgress();
131
- } else if (p === 100) {
132
- onProgressDone();
133
- } else {
134
- progressStepReceived();
135
- }
136
- },
137
- onDone: resolve,
138
- onBusy: () => {
139
- reject(new Error('Another transcription is already in progress'));
140
- },
141
- onUpdate: (json: TranscriptionJson) => {
142
- onTranscriptionChunk?.(json.transcription);
143
- },
144
- });
145
-
146
- const Mod = await loadMod({handler});
147
-
148
- const url = getModelUrl(model);
149
- const result = await getObject({key: url});
150
- if (!result) {
151
- throw new Error(
152
- `Model ${model} is not loaded. Call downloadWhisperModel() first.`,
153
- );
154
- }
155
-
156
- Log.info(logLevel, `Model ${model} loaded successfully.`);
157
-
158
- const fileName = `${model}.bin`;
159
-
160
- storeFS(Mod, fileName, result);
161
-
162
- Log.info(logLevel, 'Starting main transcription process...');
163
- Mod.full_default(
164
- fileName,
165
- channelWaveform,
166
- model,
167
- language,
168
- threads ?? DEFAULT_THREADS,
169
- false,
170
- );
171
-
172
- return promise;
173
- };
@@ -1,21 +0,0 @@
1
- export const storeActualTranscriptionSpeed = (speed: number) => {
2
- window.localStorage.setItem(
3
- 'remotion-whisper-web-transcription-speed',
4
- speed.toString(),
5
- );
6
- };
7
-
8
- // conservative estimate that 30 seconds of audio takes 30 second to process
9
- const DEFAULT_ASSUMED_SPEED = 1;
10
- export const NEW_PROGRESS_EVENT_EVERY_N_SECONDS = 30;
11
-
12
- export const getActualTranscriptionSpeedInMilliseconds = () => {
13
- const speed = window.localStorage.getItem(
14
- 'remotion-whisper-web-transcription-speed',
15
- );
16
- if (!speed) {
17
- return DEFAULT_ASSUMED_SPEED * NEW_PROGRESS_EVENT_EVERY_N_SECONDS * 1000;
18
- }
19
-
20
- return parseFloat(speed);
21
- };
package/tsconfig.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "extends": "../tsconfig.settings.json",
3
- "compilerOptions": {
4
- "rootDir": "src",
5
- "outDir": "dist"
6
- },
7
- "include": ["src", "main.d.ts"],
8
- "exclude": ["src/index.module.ts"],
9
- "references": [{"path": "../captions"}]
10
- }