@codearcade/subtitle-generator 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,291 @@
1
+ # Subtitle Generator
2
+
3
+ Generate subtitle files (SRT, VTT, TXT) from audio files using OpenAI's Whisper model. Automatically downloads the binary and model files for your operating system.
4
+
5
+ ## Features
6
+
7
+ - 🎬 Supports multiple subtitle formats (SRT, VTT, TXT)
8
+ - 🤖 Uses OpenAI's Whisper for accurate transcription
9
+ - 🔧 Multiple model sizes (small, medium, large)
10
+ - 🖥️ Cross-platform support (Windows, macOS, Linux)
11
+ - ⚡ Configurable thread count for performance tuning
12
+ - 🌍 Multi-language support with auto-detection
13
+
14
+ ## Installation
15
+
16
+ ```bash
17
+ npm install @codearcade/subtitle-generator
18
+ ```
19
+
20
+ ## Quick Start
21
+
22
+ ### 1. Initialize the Package
23
+
24
+ Run the initialization command to download the Whisper binary and model:
25
+
26
+ ```bash
27
+ npx subtitle-generator init
28
+ ```
29
+
30
+ The init command will:
31
+
32
+ - Detect your operating system
33
+ - Download the appropriate Whisper binary
34
+ - Prompt you to choose a model size:
35
+ - **small** - Fast transcription with good accuracy
36
+ - **medium** - Balanced performance and accuracy
37
+ - **large** - Highest accuracy (requires more resources)
38
+
39
+ ### 2. Use in Your Code
40
+
41
+ Create a Node.js script to transcribe audio files:
42
+
43
+ ```javascript
44
+ import path from "path";
45
+ import { Whisper } from "@codearcade/subtitle-generator";
46
+
47
+ const whisper = new Whisper({
48
+ modelPath: path.join(process.cwd(), "models", "ggml-small.bin"),
49
+ threads: 8,
50
+ outputFormat: "srt", // "srt" | "txt" | "vtt"
51
+ srtLang: "English",
52
+ // audioLang: "English" // leave undefined for auto-detection
53
+ });
54
+
55
+ const inputFile = path.join(process.cwd(), "input", "audio.mp3");
56
+ const outputFile = path.join(process.cwd(), "output", "audio.srt");
57
+
58
+ async function main() {
59
+ await whisper.transcribe(inputFile, outputFile);
60
+ console.log("Transcription finished!");
61
+ }
62
+
63
+ main();
64
+ ```
65
+
66
+ ## API Reference
67
+
68
+ ### Whisper Constructor
69
+
70
+ Create a new Whisper instance with configuration options:
71
+
72
+ ```javascript
73
+ const whisper = new Whisper(options);
74
+ ```
75
+
76
+ #### Options
77
+
78
+ | Option | Type | Default | Description |
79
+ | -------------- | ------ | --------- | -------------------------------------------------------------------------------- |
80
+ | `modelPath` | string | required | Absolute path to the Whisper model binary file |
81
+ | `threads` | number | 4 | Number of threads to use for transcription |
82
+ | `outputFormat` | string | "srt" | Output subtitle format: `"srt"`, `"vtt"`, or `"txt"` |
83
+ | `srtLang` | string | "English" | Language for SRT metadata |
84
+ | `audioLang` | string | undefined | Audio language code (e.g., "en", "es", "fr"). Leave undefined for auto-detection |
85
+
86
+ ### Methods
87
+
88
+ #### `transcribe(inputPath, outputPath)`
89
+
90
+ Transcribe an audio file and save the output to a subtitle file.
91
+
92
+ **Parameters:**
93
+
94
+ - `inputPath` (string): Path to the audio file (supports mp3, wav, m4a, flac, etc.)
95
+ - `outputPath` (string): Path where the subtitle file will be saved
96
+
97
+ **Returns:** Promise<void>
98
+
99
+ **Example:**
100
+
101
+ ```javascript
102
+ await whisper.transcribe("./audio/interview.mp3", "./subtitles/interview.srt");
103
+ ```
104
+
105
+ ## Supported Audio Formats
106
+
107
+ - MP3
108
+ - WAV
109
+ - M4A
110
+ - FLAC
111
+ - OGG
112
+ - WEBM
113
+ - And other common audio formats
114
+
115
+ ## Supported Output Formats
116
+
117
+ ### SRT (SubRip)
118
+
119
+ Standard subtitle format with timecodes and subtitle index:
120
+
121
+ ```
122
+ 1
123
+ 00:00:00,000 --> 00:00:05,000
124
+ First subtitle line
125
+
126
+ 2
127
+ 00:00:05,000 --> 00:00:10,000
128
+ Second subtitle line
129
+ ```
130
+
131
+ ### VTT (WebVTT)
132
+
133
+ Web video text track format:
134
+
135
+ ```
136
+ WEBVTT
137
+
138
+ 00:00:00.000 --> 00:00:05.000
139
+ First subtitle line
140
+
141
+ 00:00:05.000 --> 00:00:10.000
142
+ Second subtitle line
143
+ ```
144
+
145
+ ### TXT (Plain Text)
146
+
147
+ Simple text transcription with timestamps:
148
+
149
+ ```
150
+ [00:00:00] First subtitle line
151
+ [00:00:05] Second subtitle line
152
+ ```
153
+
154
+ ## Configuration
155
+
156
+ ### Model Selection
157
+
158
+ Models are stored in a `models` directory. After running `init`, choose your model size:
159
+
160
+ - **small** (~140MB) - Good for fast transcription with reasonable accuracy
161
+ - **medium** (~380MB) - Better accuracy with moderate performance impact
162
+ - **large** (~1.4GB) - Highest accuracy, requires more RAM and processing time
163
+
164
+ ### Thread Count
165
+
166
+ Adjust the `threads` option based on your CPU:
167
+
168
+ ```javascript
169
+ const whisper = new Whisper({
170
+ modelPath: "./models/ggml-small.bin",
171
+ threads: 16, // Use more threads on high-core-count CPUs
172
+ outputFormat: "srt",
173
+ });
174
+ ```
175
+
176
+ ## Examples
177
+
178
+ ### Batch Processing Multiple Files
179
+
180
+ ```javascript
181
+ import path from "path";
182
+ import { Whisper } from "@codearcade/subtitle-generator";
183
+ import fs from "fs";
184
+
185
+ const whisper = new Whisper({
186
+ modelPath: path.join(process.cwd(), "models", "ggml-small.bin"),
187
+ threads: 8,
188
+ outputFormat: "srt",
189
+ });
190
+
191
+ async function processAudioFiles() {
192
+ const inputDir = "./input";
193
+ const outputDir = "./output";
194
+
195
+ const files = fs.readdirSync(inputDir);
196
+
197
+ for (const file of files) {
198
+ if (!file.endsWith(".mp3")) continue;
199
+
200
+ const inputPath = path.join(inputDir, file);
201
+ const outputPath = path.join(outputDir, file.replace(".mp3", ".srt"));
202
+
203
+ console.log(`Processing ${file}...`);
204
+ await whisper.transcribe(inputPath, outputPath);
205
+ console.log(`Completed: ${file}`);
206
+ }
207
+ }
208
+
209
+ processAudioFiles();
210
+ ```
211
+
212
+ ### Auto-Detect Language
213
+
214
+ ```javascript
215
+ const whisper = new Whisper({
216
+ modelPath: "./models/ggml-small.bin",
217
+ threads: 8,
218
+ outputFormat: "srt",
219
+ // audioLang is undefined - language will be auto-detected
220
+ });
221
+
222
+ await whisper.transcribe("./audio/unknown-language.mp3", "./output/result.srt");
223
+ ```
224
+
225
+ ### Specify Language
226
+
227
+ ```javascript
228
+ const whisper = new Whisper({
229
+ modelPath: "./models/ggml-small.bin",
230
+ threads: 8,
231
+ outputFormat: "srt",
232
+ audioLang: "es", // Spanish
233
+ });
234
+
235
+ await whisper.transcribe("./audio/spanish.mp3", "./output/spanish.srt");
236
+ ```
237
+
238
+ ## Project Structure
239
+
240
+ After using the package, your project structure might look like:
241
+
242
+ ```
243
+ project/
244
+ ├── models/
245
+ │ └── ggml-small.bin # Downloaded Whisper model
246
+ ├── input/
247
+ │ ├── audio1.mp3
248
+ │ └── audio2.mp3
249
+ ├── output/
250
+ │ ├── audio1.srt
251
+ │ └── audio2.srt
252
+ ├── transcribe.js # Your transcription script
253
+ └── package.json
254
+ ```
255
+
256
+ ## Troubleshooting
257
+
258
+ ### "Model file not found"
259
+
260
+ Make sure you ran `npx subtitle-generator init` first to download the model.
261
+
262
+ ### "Binary not found for your OS"
263
+
264
+ The package only supports Windows, macOS, and Linux. Check that you're on a supported operating system.
265
+
266
+ ### Slow transcription
267
+
268
+ - Increase the `threads` option (if your CPU has more cores)
269
+ - Use a smaller model (small instead of large)
270
+ - Ensure your system has adequate RAM available
271
+
272
+ ### Out of memory errors
273
+
274
+ - Use a smaller model size
275
+ - Reduce the `threads` count
276
+ - Process files one at a time instead of in parallel
277
+
278
+ ## Performance Tips
279
+
280
+ 1. **Model Size**: Start with `small` model for speed, upgrade to `medium` or `large` if accuracy isn't sufficient
281
+ 2. **Thread Count**: Set to number of CPU cores for optimal performance
282
+ 3. **Batch Processing**: Process multiple files sequentially to avoid memory issues
283
+ 4. **Audio Quality**: Clear audio produces better results than noisy recordings
284
+
285
+ ## License
286
+
287
+ MIT © [CodeArcade](https://github.com/codearcade-io)
288
+
289
+ ## Support
290
+
291
+ For issues, questions, or contributions, visit the [GitHub repository](https://github.com/codearcade-io/subtitle-generator).
package/cli/index.js ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env node
2
+
3
+ const args = process.argv.slice(2);
4
+
5
+ if (args[0] === 'init') {
6
+ import('../init/index.js').then(mod => mod.default());
7
+ } else {
8
+ console.log('Usage: npx @codearcade/ai-subtitle-generator init');
9
+ }
@@ -0,0 +1,39 @@
1
+ declare const audioLang: {
2
+ readonly English: "en";
3
+ readonly Spanish: "es";
4
+ readonly French: "fr";
5
+ readonly German: "de";
6
+ readonly Chinese: "zh";
7
+ readonly Japanese: "ja";
8
+ readonly Korean: "ko";
9
+ readonly Russian: "ru";
10
+ readonly Portuguese: "pt";
11
+ readonly Italian: "it";
12
+ readonly Dutch: "nl";
13
+ readonly Arabic: "ar";
14
+ readonly Hindi: "hi";
15
+ readonly Turkish: "tr";
16
+ };
17
+ type AudioLangKey = keyof typeof audioLang;
18
+ interface WhisperOptions {
19
+ modelPath: string;
20
+ threads?: number;
21
+ outputFormat?: "srt" | "txt" | "vtt";
22
+ audioLang?: AudioLangKey;
23
+ srtLang?: AudioLangKey;
24
+ }
25
+ export declare class Whisper {
26
+ private modelPath;
27
+ private threads;
28
+ private outputFormat;
29
+ private audioLang?;
30
+ private srtLang?;
31
+ constructor(options: WhisperOptions);
32
+ /**
33
+ * Transcribe a single audio file
34
+ * @param inputFile - path to input audio
35
+ * @param outputFile - path to output (without extension)
36
+ */
37
+ transcribe(inputFile: string, outputFile: string): Promise<void>;
38
+ }
39
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,95 @@
1
+ // src/index.ts
2
+ import { spawn } from "child_process";
3
+ import os from "os";
4
+ import path from "path";
5
+ var __dirname2 = process.cwd();
6
+ var isWindows = os.platform() === "win32";
7
+ var isMac = os.platform() === "darwin";
8
+ var binaryName = isWindows ? "whisper-cli.exe" : isMac ? "whisper" : "whisper";
9
+ var audioLang = {
10
+ English: "en",
11
+ Spanish: "es",
12
+ French: "fr",
13
+ German: "de",
14
+ Chinese: "zh",
15
+ Japanese: "ja",
16
+ Korean: "ko",
17
+ Russian: "ru",
18
+ Portuguese: "pt",
19
+ Italian: "it",
20
+ Dutch: "nl",
21
+ Arabic: "ar",
22
+ Hindi: "hi",
23
+ Turkish: "tr"
24
+ };
25
+
26
+ class Whisper {
27
+ modelPath;
28
+ threads;
29
+ outputFormat;
30
+ audioLang;
31
+ srtLang;
32
+ constructor(options) {
33
+ this.modelPath = options.modelPath;
34
+ this.threads = options.threads || 8;
35
+ this.outputFormat = options.outputFormat || "srt";
36
+ this.audioLang = options.audioLang ? audioLang[options.audioLang] : undefined;
37
+ this.srtLang = options.srtLang ? audioLang[options.srtLang] : undefined;
38
+ }
39
+ transcribe(inputFile, outputFile) {
40
+ return new Promise((resolve, reject) => {
41
+ const outputFlag = this.outputFormat === "srt" ? "-osrt" : this.outputFormat === "txt" ? "-otxt" : this.outputFormat === "vtt" ? "-ovtt" : "-osrt";
42
+ const args = [
43
+ "-m",
44
+ this.modelPath,
45
+ "-f",
46
+ inputFile,
47
+ outputFlag,
48
+ "-of",
49
+ outputFile,
50
+ "-t",
51
+ String(this.threads),
52
+ "-bs",
53
+ "5",
54
+ "-bo",
55
+ "1",
56
+ "-ml",
57
+ "30",
58
+ "-sow",
59
+ "false",
60
+ "-nt",
61
+ "-et",
62
+ "2.4",
63
+ "-lpt",
64
+ "-1.0"
65
+ ];
66
+ if (this.audioLang) {
67
+ args.push("-l", this.audioLang);
68
+ }
69
+ if (this.srtLang === "en" && this.audioLang !== "en") {
70
+ args.push("-tr");
71
+ }
72
+ const binaryPath = path.join(__dirname2, "whisper", binaryName);
73
+ console.log("Running:", binaryPath, args.join(" "));
74
+ const process2 = spawn(binaryPath, args);
75
+ process2.stdout.on("data", (data) => {
76
+ console.log(`[stdout]: ${data}`);
77
+ });
78
+ process2.stderr.on("data", (data) => {
79
+ console.log(`[stderr]: ${data}`);
80
+ });
81
+ process2.on("close", (code) => {
82
+ if (code !== 0) {
83
+ return reject(new Error(`Process exited with code ${code}`));
84
+ }
85
+ resolve();
86
+ });
87
+ process2.on("error", (err) => {
88
+ reject(err);
89
+ });
90
+ });
91
+ }
92
+ }
93
+ export {
94
+ Whisper
95
+ };
package/init/index.js ADDED
@@ -0,0 +1,179 @@
1
+ import inquirer from "inquirer";
2
+ import path from "path";
3
+ import fs from "fs";
4
+ import os from "os";
5
+ import { execSync } from "child_process";
6
+
7
+ const init = async () => {
8
+ const { modelSize } = await inquirer.prompt([
9
+ {
10
+ type: "list",
11
+ name: "modelSize",
12
+ message: "Which Whisper model do you want to download?",
13
+ choices: [
14
+ { name: "Small — fast, lower accuracy", value: "small" },
15
+ { name: "Medium — balanced", value: "medium" },
16
+ { name: "Large — slow, highest accuracy", value: "large" },
17
+ ],
18
+ default: "small",
19
+ },
20
+ ]);
21
+
22
+ const modelUrls = {
23
+ small:
24
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
25
+ medium:
26
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin",
27
+ large:
28
+ "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
29
+ };
30
+
31
+ const modelUrl = modelUrls[modelSize];
32
+
33
+ // Helper function to download both models and zip files
34
+ async function downloadFile(url, dest, label) {
35
+ if (fs.existsSync(dest)) {
36
+ console.log(`${label} already exists at ${dest}`);
37
+ return;
38
+ }
39
+
40
+ console.log(`Downloading ${label}...`);
41
+ const res = await fetch(url);
42
+
43
+ if (!res.ok) throw new Error(`Failed to download: ${res.statusText}`);
44
+
45
+ const total = Number(res.headers.get("content-length")) || 0;
46
+ let downloaded = 0;
47
+
48
+ const file = fs.createWriteStream(dest);
49
+
50
+ await res.body.pipeTo(
51
+ new WritableStream({
52
+ write(chunk) {
53
+ downloaded += chunk.length;
54
+ if (total) {
55
+ const percent = ((downloaded / total) * 100).toFixed(2);
56
+ process.stdout.write(`Downloading ${label}... ${percent}%\r`);
57
+ } else {
58
+ process.stdout.write(`Downloaded ${downloaded} bytes\r`);
59
+ }
60
+ file.write(chunk);
61
+ },
62
+ close() {
63
+ file.end();
64
+ console.log(`\n${label} download complete.`);
65
+ },
66
+ abort(err) {
67
+ file.destroy(err);
68
+ },
69
+ }),
70
+ );
71
+ }
72
+
73
+ // 1. Download the AI Model
74
+ const modelsDir = path.join(process.cwd(), "models");
75
+ if (!fs.existsSync(modelsDir)) {
76
+ fs.mkdirSync(modelsDir, { recursive: true });
77
+ }
78
+ const destModelPath = path.join(modelsDir, path.basename(modelUrl));
79
+ await downloadFile(modelUrl, destModelPath, `${modelSize} model`);
80
+
81
+ // 2. Setup the Binary based on OS
82
+ console.log("\nSetting up whisper.cpp binary...");
83
+ const platform = os.platform();
84
+ const whisperDir = path.join(process.cwd(), "whisper");
85
+ const isWindows = platform === "win32";
86
+
87
+ if (!fs.existsSync(whisperDir)) {
88
+ fs.mkdirSync(whisperDir, { recursive: true });
89
+ }
90
+
91
+ if (isWindows) {
92
+ // Windows Setup
93
+ const binaryDest = path.join(whisperDir, "whisper-cli.exe");
94
+ if (!fs.existsSync(binaryDest)) {
95
+ // Using latest pre-built x64 binaries from ggml-org
96
+ const zipUrl =
97
+ "https://github.com/ggerganov/whisper.cpp/releases/latest/download/whisper-bin-x64.zip";
98
+ const zipPath = path.join(whisperDir, "whisper.zip");
99
+
100
+ await downloadFile(zipUrl, zipPath, "Whisper Windows Binary");
101
+
102
+ console.log("Extracting binary...");
103
+ // Windows 10+ has native tar for unzipping
104
+ execSync(`tar -xf "${zipPath}" -C "${whisperDir}"`);
105
+
106
+ // 1. Move everything out of the "Release" folder (or any other subfolder the zip might use)
107
+ const possibleReleaseDir = path.join(whisperDir, "Release");
108
+
109
+ if (fs.existsSync(possibleReleaseDir)) {
110
+ // Read all files inside the Release folder
111
+ const files = fs.readdirSync(possibleReleaseDir);
112
+
113
+ // Move them up one level into the main whisperDir
114
+ for (const file of files) {
115
+ fs.renameSync(
116
+ path.join(possibleReleaseDir, file),
117
+ path.join(whisperDir, file),
118
+ );
119
+ }
120
+ // Delete the now-empty Release folder
121
+ fs.rmdirSync(possibleReleaseDir);
122
+ }
123
+
124
+ // 2. Rename extracted main.exe to match our wrapper expectations
125
+ const extractedMain = path.join(whisperDir, "main.exe");
126
+ if (fs.existsSync(extractedMain)) {
127
+ fs.renameSync(extractedMain, binaryDest);
128
+ } else {
129
+ console.error("Warning: Could not find main.exe after extraction.");
130
+ }
131
+
132
+ // 3. Clean up the zip file so it doesn't clutter the folder
133
+ if (fs.existsSync(zipPath)) {
134
+ fs.unlinkSync(zipPath);
135
+ }
136
+
137
+ console.log("Whisper binary setup complete for Windows.");
138
+ } else {
139
+ console.log("Whisper binary already exists for Windows.");
140
+ }
141
+ } else {
142
+ // Mac / Linux Setup
143
+ const binaryDest = path.join(whisperDir, "whisper");
144
+ if (!fs.existsSync(binaryDest)) {
145
+ console.log("Mac/Linux detected. Building whisper.cpp from source...");
146
+ console.log(
147
+ "Note: This requires 'git', 'make', and a C/C++ compiler (gcc/clang).",
148
+ );
149
+
150
+ const repoDir = path.join(whisperDir, "source");
151
+ if (!fs.existsSync(repoDir)) {
152
+ // Shallow clone to save time and bandwidth
153
+ execSync(
154
+ `git clone --depth 1 https://github.com/ggerganov/whisper.cpp.git "${repoDir}"`,
155
+ { stdio: "inherit" },
156
+ );
157
+ }
158
+
159
+ console.log("Compiling... This might take a minute.");
160
+ // Compile only the main CLI tool
161
+ execSync(`cd "${repoDir}" && make main`, { stdio: "inherit" });
162
+
163
+ const compiledPath = path.join(repoDir, "main");
164
+ if (fs.existsSync(compiledPath)) {
165
+ fs.copyFileSync(compiledPath, binaryDest);
166
+ execSync(`chmod +x "${binaryDest}"`);
167
+ console.log("Whisper binary compiled and configured successfully.");
168
+ } else {
169
+ console.error(
170
+ "Error: Could not find the compiled binary. Compilation might have failed.",
171
+ );
172
+ }
173
+ } else {
174
+ console.log("Whisper binary already exists.");
175
+ }
176
+ }
177
+ };
178
+
179
+ export default init;
package/package.json ADDED
@@ -0,0 +1,63 @@
1
+ {
2
+ "name": "@codearcade/subtitle-generator",
3
+ "version": "1.0.0",
4
+ "main": "index.js",
5
+ "type": "module",
6
+ "module": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "scripts": {
15
+ "build": "bun ./builder.ts && bun run generate-types",
16
+ "prepublishOnly": "bun run build",
17
+ "generate-types": "bunx tsc --emitDeclarationOnly --declaration --outDir dist",
18
+ "clear": "rm -rf dist"
19
+ },
20
+ "bin": {
21
+ "subtitle-generator": "./cli/index.js"
22
+ },
23
+ "files": [
24
+ "dist",
25
+ "cli",
26
+ "init"
27
+ ],
28
+ "homepage": "https://github.com/codearcade-io/subtitle-generator.git#readme",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "git+https://github.com/codearcade-io/subtitle-generator.git"
32
+ },
33
+ "bugs": "https://github.com/codearcade-io/subtitle-generator/issues",
34
+ "author": "Abhishek Singh <official.6packprogrammer@gmail.com>",
35
+ "license": "MIT",
36
+ "publishConfig": {
37
+ "access": "public"
38
+ },
39
+ "description": "Generate subtitles from audio files using OpenAI Whisper with support for SRT, VTT, and TXT formats. Automatically downloads required binaries and models, with cross-platform support and configurable performance options.",
40
+ "keywords": [
41
+ "whisper",
42
+ "subtitle",
43
+ "subtitles",
44
+ "speech-to-text",
45
+ "transcription",
46
+ "audio",
47
+ "srt",
48
+ "vtt",
49
+ "nodejs",
50
+ "npm",
51
+ "openai",
52
+ "ai",
53
+ "speech-recognition",
54
+ "cli",
55
+ "cross-platform"
56
+ ],
57
+ "devDependencies": {
58
+ "@types/bun": "^1.3.11"
59
+ },
60
+ "dependencies": {
61
+ "inquirer": "^13.3.2"
62
+ }
63
+ }