npm - @remotion/install-whisper-cpp - Versions diffs - 4.0.130 → 4.0.132 - Mend

@remotion/install-whisper-cpp 4.0.130 → 4.0.132

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/dist/convert-to-captions.d.ts +11 -0
package/dist/convert-to-captions.js +48 -0
package/dist/index.d.ts +2 -1
package/dist/index.js +3 -1
package/dist/install-whisper-cpp.d.ts +1 -0
package/dist/install-whisper-cpp.js +17 -2
package/dist/test/convert-to-captions.test.d.ts +1 -0
package/dist/test/convert-to-captions.test.js +268 -0
package/dist/test/example-payload.d.ts +2 -0
package/dist/test/example-payload.js +4734 -0
package/dist/transcribe.d.ts +17 -4
package/dist/transcribe.js +62 -19
package/package.json +3 -2

package/dist/convert-to-captions.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import type { TranscriptionJson } from './transcribe';
+export type Caption = {
+    text: string;
+    startInSeconds: number;
+};
+export declare function convertToCaptions({ transcription, combineTokensWithinMilliseconds, }: {
+    transcription: TranscriptionJson<true>['transcription'];
+    combineTokensWithinMilliseconds: number;
+}): {
+    captions: Caption[];
+};

package/dist/convert-to-captions.js ADDED Viewed

@@ -0,0 +1,48 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.convertToCaptions = void 0;
+function convertToCaptions({ transcription, combineTokensWithinMilliseconds, }) {
+    const merged = [];
+    let currentText = '';
+    let currentFrom = 0;
+    let currentTo = 0;
+    let currentTokenLevelTimestamp = 0;
+    transcription.forEach((item, index) => {
+        const { text } = item;
+        // If text starts with a space, push the currentText (if it exists) and start a new one
+        if (text.startsWith(' ') &&
+            currentTo - currentFrom > combineTokensWithinMilliseconds) {
+            if (currentText !== '') {
+                merged.push({
+                    text: currentText,
+                    startInSeconds: currentTokenLevelTimestamp / 100,
+                });
+            }
+            // Start a new sentence
+            currentText = text.trimStart();
+            currentFrom = item.offsets.from;
+            currentTo = item.offsets.to;
+            currentTokenLevelTimestamp = item.tokens[0].t_dtw;
+        }
+        else {
+            // Continuation or start of a new sentence without leading space
+            if (currentText === '') {
+                // It's the start of the document or after a sentence that started with a space
+                currentFrom = item.offsets.from;
+                currentTokenLevelTimestamp = item.tokens[0].t_dtw;
+            }
+            currentText += text;
+            currentText = currentText.trimStart();
+            currentTo = item.offsets.to;
+        }
+        // Ensure the last sentence is added
+        if (index === transcription.length - 1 && currentText !== '') {
+            merged.push({
+                text: currentText,
+                startInSeconds: currentTokenLevelTimestamp / 100,
+            });
+        }
+    });
+    return { captions: merged };
+}
+exports.convertToCaptions = convertToCaptions;

package/dist/index.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+export { Caption, convertToCaptions } from './convert-to-captions';
 export { downloadWhisperModel, OnProgress, WhisperModel, } from './download-whisper-model';
 export { installWhisperCpp } from './install-whisper-cpp';
-export { transcribe } from './transcribe';
+export { transcribe, TranscriptionJson } from './transcribe';

package/dist/index.js CHANGED Viewed

@@ -1,6 +1,8 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.transcribe = exports.installWhisperCpp = exports.downloadWhisperModel = void 0;
+exports.transcribe = exports.installWhisperCpp = exports.downloadWhisperModel = exports.convertToCaptions = void 0;
+var convert_to_captions_1 = require("./convert-to-captions");
+Object.defineProperty(exports, "convertToCaptions", { enumerable: true, get: function () { return convert_to_captions_1.convertToCaptions; } });
 var download_whisper_model_1 = require("./download-whisper-model");
 Object.defineProperty(exports, "downloadWhisperModel", { enumerable: true, get: function () { return download_whisper_model_1.downloadWhisperModel; } });
 var install_whisper_cpp_1 = require("./install-whisper-cpp");

package/dist/install-whisper-cpp.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+export declare const getWhisperExecutablePath: (whisperPath: string) => string;
 export declare const installWhisperCpp: ({ version, to, printOutput, }: {
     version: string;
     to: string;

package/dist/install-whisper-cpp.js CHANGED Viewed

@@ -26,11 +26,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.installWhisperCpp = void 0;
+exports.installWhisperCpp = exports.getWhisperExecutablePath = void 0;
 const fs_1 = __importStar(require("fs"));
 const node_child_process_1 = require("node:child_process");
 const node_stream_1 = require("node:stream");
 const promises_1 = require("node:stream/promises");
+const os_1 = __importDefault(require("os"));
 const path_1 = __importDefault(require("path"));
 const installForWindows = async ({ version, to, printOutput, }) => {
     const url = `https://github.com/ggerganov/whisper.cpp/releases/download/v${version}/whisper-bin-x64.zip`;
@@ -52,7 +53,9 @@ const installWhisperForUnix = ({ version, to, printOutput, }) => {
     (0, node_child_process_1.execSync)(`git clone https://github.com/ggerganov/whisper.cpp.git ${to}`, {
         stdio,
     });
-    (0, node_child_process_1.execSync)(`git checkout v${version}`, {
+    const isSemVer = /^[\d]{1}\.[\d]{1,2}\.+/;
+    const ref = isSemVer.test(version) ? `v${version}` : version;
+    (0, node_child_process_1.execSync)(`git checkout ${ref}`, {
         stdio,
         cwd: to,
     });
@@ -61,8 +64,20 @@ const installWhisperForUnix = ({ version, to, printOutput, }) => {
         stdio,
     });
 };
+const getWhisperExecutablePath = (whisperPath) => {
+    return os_1.default.platform() === 'win32'
+        ? path_1.default.join(whisperPath, 'main.exe')
+        : path_1.default.join(whisperPath, './main');
+};
+exports.getWhisperExecutablePath = getWhisperExecutablePath;
 const installWhisperCpp = async ({ version, to, printOutput = true, }) => {
     if ((0, fs_1.existsSync)(to)) {
+        if (!(0, fs_1.existsSync)((0, exports.getWhisperExecutablePath)(to))) {
+            if (printOutput) {
+                console.log(`Whisper folder exists but the executable (${to}) is missing. Delete ${to} and try again.`);
+            }
+            return Promise.resolve({ alreadyExisted: false });
+        }
         if (printOutput) {
             console.log(`Whisper already exists at ${to}`);
         }

package/dist/test/convert-to-captions.test.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/test/convert-to-captions.test.js ADDED Viewed

@@ -0,0 +1,268 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+const vitest_1 = require("vitest");
+const convert_to_captions_1 = require("../convert-to-captions");
+const example_payload_1 = require("./example-payload");
+(0, vitest_1.test)('Convert to captions - 200ms together', () => {
+    const { captions: transcript } = (0, convert_to_captions_1.convertToCaptions)({
+        transcription: example_payload_1.examplePayload.transcription,
+        combineTokensWithinMilliseconds: 200,
+    });
+    (0, vitest_1.expect)(transcript).toEqual([
+        { text: 'William', startInSeconds: 0.24 },
+        { text: 'just', startInSeconds: 0.48 },
+        { text: 'hit 100,000', startInSeconds: 0.7 },
+        { text: 'YouTube', startInSeconds: 2.22 },
+        { text: 'subscribers', startInSeconds: 2.94 },
+        { text: 'And we', startInSeconds: 3.24 },
+        { text: 'are going', startInSeconds: 3.42 },
+        { text: 'to celebrate', startInSeconds: 3.76 },
+        { text: 'that', startInSeconds: 4.34 },
+        { text: 'We thought', startInSeconds: 4.5 },
+        { text: 'about', startInSeconds: 5.1 },
+        { text: 'to bake', startInSeconds: 5.42 },
+        { text: 'a cake', startInSeconds: 6.14 },
+        { text: 'We found', startInSeconds: 6.56 },
+        { text: 'this', startInSeconds: 7.12 },
+        { text: 'and it', startInSeconds: 7.36 },
+        { text: 'reminded', startInSeconds: 7.78 },
+        { text: 'us of', startInSeconds: 8.04 },
+        { text: 'William', startInSeconds: 8.52 },
+        { text: 'We hope', startInSeconds: 8.94 },
+        { text: 'he will', startInSeconds: 9.42 },
+        { text: 'like', startInSeconds: 9.68 },
+        { text: 'the cake', startInSeconds: 9.86 },
+        { text: "Let's start", startInSeconds: 10.28 },
+        { text: 'with the', startInSeconds: 10.58 },
+        { text: 'dough', startInSeconds: 10.96 },
+        { text: 'By putting', startInSeconds: 11.2 },
+        { text: 'some', startInSeconds: 11.64 },
+        { text: 'butter', startInSeconds: 12.06 },
+        { text: 'Some', startInSeconds: 12.86 },
+        { text: 'sugar', startInSeconds: 13.3 },
+        { text: 'Eggs', startInSeconds: 14.36 },
+        {
+            text: 'No frameworks,',
+            startInSeconds: 14.78,
+        },
+        { text: 'just', startInSeconds: 15.68 },
+        { text: 'vanilla', startInSeconds: 16.1 },
+        { text: 'Pinch', startInSeconds: 16.38 },
+        { text: 'of salt', startInSeconds: 16.58 },
+        { text: 'Some', startInSeconds: 17.44 },
+        { text: 'Nutella', startInSeconds: 17.78 },
+        { text: 'Some', startInSeconds: 18.3 },
+        { text: 'chocolate', startInSeconds: 18.68 },
+        { text: 'Baking', startInSeconds: 19.12 },
+        { text: 'powder', startInSeconds: 19.76 },
+        { text: 'And', startInSeconds: 20.68 },
+        { text: 'flour', startInSeconds: 21.14 },
+        { text: 'Just', startInSeconds: 21.66 },
+        { text: 'massage', startInSeconds: 22.06 },
+        { text: 'in the', startInSeconds: 22.38 },
+        { text: 'butter', startInSeconds: 22.84 },
+        { text: 'to give', startInSeconds: 23.08 },
+        { text: 'it the', startInSeconds: 23.32 },
+        { text: 'full', startInSeconds: 23.76 },
+        { text: 'treatment', startInSeconds: 24.28 },
+        { text: 'Fill', startInSeconds: 24.54 },
+        { text: 'it in', startInSeconds: 24.7 },
+        { text: 'Bake', startInSeconds: 26.1 },
+        { text: 'it for', startInSeconds: 26.3 },
+        { text: 'half', startInSeconds: 26.64 },
+        { text: 'an', startInSeconds: 26.86 },
+        { text: 'hour', startInSeconds: 27.06 },
+        { text: 'at 170', startInSeconds: 27.4 },
+        { text: 'degrees', startInSeconds: 28.6 },
+        { text: "It's time", startInSeconds: 28.84 },
+        { text: 'for the', startInSeconds: 29.28 },
+        { text: 'icing', startInSeconds: 29.7 },
+        { text: 'on the', startInSeconds: 29.88 },
+        { text: 'cake', startInSeconds: 30.56 },
+        { text: 'Time', startInSeconds: 33.14 },
+        { text: 'for', startInSeconds: 33.34 },
+        { text: 'the', startInSeconds: 33.46 },
+        { text: 'most', startInSeconds: 33.62 },
+        { text: 'critical', startInSeconds: 34 },
+        { text: 'part', startInSeconds: 34.76 },
+        { text: 'This', startInSeconds: 40.74 },
+        { text: 'is', startInSeconds: 40.86 },
+        { text: 'how', startInSeconds: 41 },
+        { text: 'it', startInSeconds: 41.1 },
+        { text: 'turned', startInSeconds: 41.32 },
+        { text: 'out', startInSeconds: 42.02 },
+        { text: 'Stupid', startInSeconds: 42.78 },
+        { text: 'idea,', startInSeconds: 43.36 },
+        { text: 'pretty', startInSeconds: 43.7 },
+        { text: 'bad', startInSeconds: 44.02 },
+        { text: 'execution', startInSeconds: 44.72 },
+        { text: 'I hope', startInSeconds: 45.88 },
+        { text: 'he likes', startInSeconds: 46.2 },
+        { text: 'it anyway', startInSeconds: 46.58 },
+        { text: 'Hey', startInSeconds: 49.52 },
+        { text: 'William', startInSeconds: 50.06 },
+        { text: 'Congrats', startInSeconds: 52.32 },
+        { text: 'We', startInSeconds: 54.56 },
+        { text: 'wanted', startInSeconds: 54.86 },
+        { text: 'to congratulate', startInSeconds: 55 },
+        { text: 'you', startInSeconds: 56.22 },
+        { text: 'on', startInSeconds: 56.54 },
+        { text: 'the', startInSeconds: 56.68 },
+        { text: '100,000', startInSeconds: 57.14 },
+        { text: 'You hear', startInSeconds: 58.86 },
+        { text: 'Joseph', startInSeconds: 59.4 },
+        { text: 'crying?', startInSeconds: 59.74 },
+        { text: 'Thank', startInSeconds: 61.04 },
+        { text: 'you', startInSeconds: 61.18 },
+        { text: 'so', startInSeconds: 61.44 },
+        { text: 'much', startInSeconds: 61.76 },
+        { text: '(electronic', startInSeconds: 62.88 },
+        { text: 'beeping)', startInSeconds: 63.02 },
+    ]);
+});
+(0, vitest_1.test)('Convert to captions - 0ms together', () => {
+    const { captions: transcript } = (0, convert_to_captions_1.convertToCaptions)({
+        transcription: example_payload_1.examplePayload.transcription,
+        combineTokensWithinMilliseconds: 0,
+    });
+    (0, vitest_1.expect)(transcript).toEqual([
+        { text: 'William', startInSeconds: 0.24 },
+        { text: 'just', startInSeconds: 0.48 },
+        { text: 'hit', startInSeconds: 0.7 },
+        { text: '100,000', startInSeconds: 1.3 },
+        { text: 'YouTube', startInSeconds: 2.22 },
+        { text: 'subscribers', startInSeconds: 2.94 },
+        { text: 'And', startInSeconds: 3.24 },
+        { text: 'we', startInSeconds: 3.32 },
+        { text: 'are', startInSeconds: 3.42 },
+        { text: 'going', startInSeconds: 3.58 },
+        { text: 'to', startInSeconds: 3.76 },
+        { text: 'celebrate', startInSeconds: 4.1 },
+        { text: 'that', startInSeconds: 4.34 },
+        { text: 'We', startInSeconds: 4.5 },
+        { text: 'thought', startInSeconds: 4.7 },
+        { text: 'about', startInSeconds: 5.1 },
+        { text: 'to', startInSeconds: 5.42 },
+        { text: 'bake', startInSeconds: 5.92 },
+        { text: 'a', startInSeconds: 6.14 },
+        { text: 'cake', startInSeconds: 6.4 },
+        { text: 'We', startInSeconds: 6.56 },
+        { text: 'found', startInSeconds: 6.8 },
+        { text: 'this', startInSeconds: 7.12 },
+        { text: 'and', startInSeconds: 7.36 },
+        { text: 'it', startInSeconds: 7.5 },
+        { text: 'reminded', startInSeconds: 7.78 },
+        { text: 'us', startInSeconds: 8.04 },
+        { text: 'of', startInSeconds: 8.24 },
+        { text: 'William', startInSeconds: 8.52 },
+        { text: 'We', startInSeconds: 8.94 },
+        { text: 'hope', startInSeconds: 9.22 },
+        { text: 'he', startInSeconds: 9.42 },
+        { text: 'will', startInSeconds: 9.56 },
+        { text: 'like', startInSeconds: 9.68 },
+        { text: 'the', startInSeconds: 9.86 },
+        { text: 'cake', startInSeconds: 10.08 },
+        { text: "Let's", startInSeconds: 10.28 },
+        { text: 'start', startInSeconds: 10.46 },
+        { text: 'with', startInSeconds: 10.58 },
+        { text: 'the', startInSeconds: 10.68 },
+        { text: 'dough', startInSeconds: 10.96 },
+        { text: 'By', startInSeconds: 11.2 },
+        { text: 'putting', startInSeconds: 11.44 },
+        { text: 'some', startInSeconds: 11.64 },
+        { text: 'butter', startInSeconds: 12.06 },
+        { text: 'Some', startInSeconds: 12.86 },
+        { text: 'sugar', startInSeconds: 13.3 },
+        { text: 'Eggs', startInSeconds: 14.36 },
+        { text: 'No', startInSeconds: 14.78 },
+        { text: 'frameworks,', startInSeconds: 15.28 },
+        { text: 'just', startInSeconds: 15.68 },
+        { text: 'vanilla', startInSeconds: 16.1 },
+        { text: 'Pinch', startInSeconds: 16.38 },
+        { text: 'of', startInSeconds: 16.58 },
+        { text: 'salt', startInSeconds: 17.08 },
+        { text: 'Some', startInSeconds: 17.44 },
+        { text: 'Nutella', startInSeconds: 17.78 },
+        { text: 'Some', startInSeconds: 18.3 },
+        { text: 'chocolate', startInSeconds: 18.68 },
+        { text: 'Baking', startInSeconds: 19.12 },
+        { text: 'powder', startInSeconds: 19.76 },
+        { text: 'And', startInSeconds: 20.68 },
+        { text: 'flour', startInSeconds: 21.14 },
+        { text: 'Just', startInSeconds: 21.66 },
+        { text: 'massage', startInSeconds: 22.06 },
+        { text: 'in', startInSeconds: 22.38 },
+        { text: 'the', startInSeconds: 22.5 },
+        { text: 'butter', startInSeconds: 22.84 },
+        { text: 'to', startInSeconds: 23.08 },
+        { text: 'give', startInSeconds: 23.18 },
+        { text: 'it', startInSeconds: 23.32 },
+        { text: 'the', startInSeconds: 23.42 },
+        { text: 'full', startInSeconds: 23.76 },
+        { text: 'treatment', startInSeconds: 24.28 },
+        { text: 'Fill', startInSeconds: 24.54 },
+        { text: 'it', startInSeconds: 24.7 },
+        { text: 'in', startInSeconds: 25.32 },
+        { text: 'Bake', startInSeconds: 26.1 },
+        { text: 'it', startInSeconds: 26.3 },
+        { text: 'for', startInSeconds: 26.48 },
+        { text: 'half', startInSeconds: 26.64 },
+        { text: 'an', startInSeconds: 26.86 },
+        { text: 'hour', startInSeconds: 27.06 },
+        { text: 'at', startInSeconds: 27.4 },
+        { text: '170', startInSeconds: 28.14 },
+        { text: 'degrees', startInSeconds: 28.6 },
+        { text: "It's", startInSeconds: 28.84 },
+        { text: 'time', startInSeconds: 29.04 },
+        { text: 'for', startInSeconds: 29.28 },
+        { text: 'the', startInSeconds: 29.42 },
+        { text: 'icing', startInSeconds: 29.7 },
+        { text: 'on', startInSeconds: 29.88 },
+        { text: 'the', startInSeconds: 30.0 },
+        { text: 'cake', startInSeconds: 30.56 },
+        { text: 'Time', startInSeconds: 33.14 },
+        { text: 'for', startInSeconds: 33.34 },
+        { text: 'the', startInSeconds: 33.46 },
+        { text: 'most', startInSeconds: 33.62 },
+        { text: 'critical', startInSeconds: 34.0 },
+        { text: 'part', startInSeconds: 34.76 },
+        { text: 'This', startInSeconds: 40.74 },
+        { text: 'is', startInSeconds: 40.86 },
+        { text: 'how', startInSeconds: 41.0 },
+        { text: 'it', startInSeconds: 41.1 },
+        { text: 'turned', startInSeconds: 41.32 },
+        { text: 'out', startInSeconds: 42.02 },
+        { text: 'Stupid', startInSeconds: 42.78 },
+        { text: 'idea,', startInSeconds: 43.36 },
+        { text: 'pretty', startInSeconds: 43.7 },
+        { text: 'bad', startInSeconds: 44.02 },
+        { text: 'execution', startInSeconds: 44.72 },
+        { text: 'I', startInSeconds: 45.88 },
+        { text: 'hope', startInSeconds: 46.06 },
+        { text: 'he', startInSeconds: 46.2 },
+        { text: 'likes', startInSeconds: 46.38 },
+        { text: 'it', startInSeconds: 46.58 },
+        { text: 'anyway', startInSeconds: 47.56 },
+        { text: 'Hey', startInSeconds: 49.52 },
+        { text: 'William', startInSeconds: 50.06 },
+        { text: 'Congrats', startInSeconds: 52.32 },
+        { text: 'We', startInSeconds: 54.56 },
+        { text: 'wanted', startInSeconds: 54.86 },
+        { text: 'to', startInSeconds: 55.0 },
+        { text: 'congratulate', startInSeconds: 55.58 },
+        { text: 'you', startInSeconds: 56.22 },
+        { text: 'on', startInSeconds: 56.54 },
+        { text: 'the', startInSeconds: 56.68 },
+        { text: '100,000', startInSeconds: 57.14 },
+        { text: 'You', startInSeconds: 58.86 },
+        { text: 'hear', startInSeconds: 59.06 },
+        { text: 'Joseph', startInSeconds: 59.4 },
+        { text: 'crying?', startInSeconds: 59.74 },
+        { text: 'Thank', startInSeconds: 61.04 },
+        { text: 'you', startInSeconds: 61.18 },
+        { text: 'so', startInSeconds: 61.44 },
+        { text: 'much', startInSeconds: 61.76 },
+        { text: '(electronic', startInSeconds: 62.88 },
+        { text: 'beeping)', startInSeconds: 63.02 },
+    ]);
+});

package/dist/test/example-payload.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import type { TranscriptionJson } from '../transcribe';
2	+ export declare const examplePayload: TranscriptionJson<true>;