@codearcade/subtitle-generator 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +291 -0
- package/cli/index.js +9 -0
- package/dist/index.d.ts +39 -0
- package/dist/index.js +95 -0
- package/init/index.js +179 -0
- package/package.json +63 -0
package/README.md
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
# Subtitle Generator
|
|
2
|
+
|
|
3
|
+
Generate subtitle files (SRT, VTT, TXT) from audio files using OpenAI's Whisper model. Automatically downloads the binary and model files for your operating system.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- 🎬 Supports multiple subtitle formats (SRT, VTT, TXT)
|
|
8
|
+
- 🤖 Uses OpenAI's Whisper for accurate transcription
|
|
9
|
+
- 🔧 Multiple model sizes (small, medium, large)
|
|
10
|
+
- 🖥️ Cross-platform support (Windows, macOS, Linux)
|
|
11
|
+
- ⚡ Configurable thread count for performance tuning
|
|
12
|
+
- 🌍 Multi-language support with auto-detection
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install @codearcade/subtitle-generator
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
### 1. Initialize the Package
|
|
23
|
+
|
|
24
|
+
Run the initialization command to download the Whisper binary and model:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
npx subtitle-generator init
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
The init command will:
|
|
31
|
+
|
|
32
|
+
- Detect your operating system
|
|
33
|
+
- Download the appropriate Whisper binary
|
|
34
|
+
- Prompt you to choose a model size:
|
|
35
|
+
- **small** - Fast transcription with good accuracy
|
|
36
|
+
- **medium** - Balanced performance and accuracy
|
|
37
|
+
- **large** - Highest accuracy (requires more resources)
|
|
38
|
+
|
|
39
|
+
### 2. Use in Your Code
|
|
40
|
+
|
|
41
|
+
Create a Node.js script to transcribe audio files:
|
|
42
|
+
|
|
43
|
+
```javascript
|
|
44
|
+
import path from "path";
|
|
45
|
+
import { Whisper } from "@codearcade/subtitle-generator";
|
|
46
|
+
|
|
47
|
+
const whisper = new Whisper({
|
|
48
|
+
modelPath: path.join(process.cwd(), "models", "ggml-small.bin"),
|
|
49
|
+
threads: 8,
|
|
50
|
+
outputFormat: "srt", // "srt" | "txt" | "vtt"
|
|
51
|
+
srtLang: "English",
|
|
52
|
+
// audioLang: "English" // leave undefined for auto-detection
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
const inputFile = path.join(process.cwd(), "input", "audio.mp3");
|
|
56
|
+
const outputFile = path.join(process.cwd(), "output", "audio.srt");
|
|
57
|
+
|
|
58
|
+
async function main() {
|
|
59
|
+
await whisper.transcribe(inputFile, outputFile);
|
|
60
|
+
console.log("Transcription finished!");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
main();
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## API Reference
|
|
67
|
+
|
|
68
|
+
### Whisper Constructor
|
|
69
|
+
|
|
70
|
+
Create a new Whisper instance with configuration options:
|
|
71
|
+
|
|
72
|
+
```javascript
|
|
73
|
+
const whisper = new Whisper(options);
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
#### Options
|
|
77
|
+
|
|
78
|
+
| Option | Type | Default | Description |
|
|
79
|
+
| -------------- | ------ | --------- | -------------------------------------------------------------------------------- |
|
|
80
|
+
| `modelPath` | string | required | Absolute path to the Whisper model binary file |
|
|
81
|
+
| `threads` | number | 4 | Number of threads to use for transcription |
|
|
82
|
+
| `outputFormat` | string | "srt" | Output subtitle format: `"srt"`, `"vtt"`, or `"txt"` |
|
|
83
|
+
| `srtLang` | string | "English" | Language for SRT metadata |
|
|
84
|
+
| `audioLang` | string | undefined | Audio language code (e.g., "en", "es", "fr"). Leave undefined for auto-detection |
|
|
85
|
+
|
|
86
|
+
### Methods
|
|
87
|
+
|
|
88
|
+
#### `transcribe(inputPath, outputPath)`
|
|
89
|
+
|
|
90
|
+
Transcribe an audio file and save the output to a subtitle file.
|
|
91
|
+
|
|
92
|
+
**Parameters:**
|
|
93
|
+
|
|
94
|
+
- `inputPath` (string): Path to the audio file (supports mp3, wav, m4a, flac, etc.)
|
|
95
|
+
- `outputPath` (string): Path where the subtitle file will be saved
|
|
96
|
+
|
|
97
|
+
**Returns:** Promise<void>
|
|
98
|
+
|
|
99
|
+
**Example:**
|
|
100
|
+
|
|
101
|
+
```javascript
|
|
102
|
+
await whisper.transcribe("./audio/interview.mp3", "./subtitles/interview.srt");
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Supported Audio Formats
|
|
106
|
+
|
|
107
|
+
- MP3
|
|
108
|
+
- WAV
|
|
109
|
+
- M4A
|
|
110
|
+
- FLAC
|
|
111
|
+
- OGG
|
|
112
|
+
- WEBM
|
|
113
|
+
- And other common audio formats
|
|
114
|
+
|
|
115
|
+
## Supported Output Formats
|
|
116
|
+
|
|
117
|
+
### SRT (SubRip)
|
|
118
|
+
|
|
119
|
+
Standard subtitle format with timecodes and subtitle index:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
1
|
|
123
|
+
00:00:00,000 --> 00:00:05,000
|
|
124
|
+
First subtitle line
|
|
125
|
+
|
|
126
|
+
2
|
|
127
|
+
00:00:05,000 --> 00:00:10,000
|
|
128
|
+
Second subtitle line
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### VTT (WebVTT)
|
|
132
|
+
|
|
133
|
+
Web video text track format:
|
|
134
|
+
|
|
135
|
+
```
|
|
136
|
+
WEBVTT
|
|
137
|
+
|
|
138
|
+
00:00:00.000 --> 00:00:05.000
|
|
139
|
+
First subtitle line
|
|
140
|
+
|
|
141
|
+
00:00:05.000 --> 00:00:10.000
|
|
142
|
+
Second subtitle line
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
### TXT (Plain Text)
|
|
146
|
+
|
|
147
|
+
Simple text transcription with timestamps:
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
[00:00:00] First subtitle line
|
|
151
|
+
[00:00:05] Second subtitle line
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
## Configuration
|
|
155
|
+
|
|
156
|
+
### Model Selection
|
|
157
|
+
|
|
158
|
+
Models are stored in a `models` directory. After running `init`, choose your model size:
|
|
159
|
+
|
|
160
|
+
- **small** (~140MB) - Good for fast transcription with reasonable accuracy
|
|
161
|
+
- **medium** (~380MB) - Better accuracy with moderate performance impact
|
|
162
|
+
- **large** (~1.4GB) - Highest accuracy, requires more RAM and processing time
|
|
163
|
+
|
|
164
|
+
### Thread Count
|
|
165
|
+
|
|
166
|
+
Adjust the `threads` option based on your CPU:
|
|
167
|
+
|
|
168
|
+
```javascript
|
|
169
|
+
const whisper = new Whisper({
|
|
170
|
+
modelPath: "./models/ggml-small.bin",
|
|
171
|
+
threads: 16, // Use more threads on high-core-count CPUs
|
|
172
|
+
outputFormat: "srt",
|
|
173
|
+
});
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Examples
|
|
177
|
+
|
|
178
|
+
### Batch Processing Multiple Files
|
|
179
|
+
|
|
180
|
+
```javascript
|
|
181
|
+
import path from "path";
|
|
182
|
+
import { Whisper } from "@codearcade/subtitle-generator";
|
|
183
|
+
import fs from "fs";
|
|
184
|
+
|
|
185
|
+
const whisper = new Whisper({
|
|
186
|
+
modelPath: path.join(process.cwd(), "models", "ggml-small.bin"),
|
|
187
|
+
threads: 8,
|
|
188
|
+
outputFormat: "srt",
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
async function processAudioFiles() {
|
|
192
|
+
const inputDir = "./input";
|
|
193
|
+
const outputDir = "./output";
|
|
194
|
+
|
|
195
|
+
const files = fs.readdirSync(inputDir);
|
|
196
|
+
|
|
197
|
+
for (const file of files) {
|
|
198
|
+
if (!file.endsWith(".mp3")) continue;
|
|
199
|
+
|
|
200
|
+
const inputPath = path.join(inputDir, file);
|
|
201
|
+
const outputPath = path.join(outputDir, file.replace(".mp3", ".srt"));
|
|
202
|
+
|
|
203
|
+
console.log(`Processing ${file}...`);
|
|
204
|
+
await whisper.transcribe(inputPath, outputPath);
|
|
205
|
+
console.log(`Completed: ${file}`);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
processAudioFiles();
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### Auto-Detect Language
|
|
213
|
+
|
|
214
|
+
```javascript
|
|
215
|
+
const whisper = new Whisper({
|
|
216
|
+
modelPath: "./models/ggml-small.bin",
|
|
217
|
+
threads: 8,
|
|
218
|
+
outputFormat: "srt",
|
|
219
|
+
// audioLang is undefined - language will be auto-detected
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
await whisper.transcribe("./audio/unknown-language.mp3", "./output/result.srt");
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### Specify Language
|
|
226
|
+
|
|
227
|
+
```javascript
|
|
228
|
+
const whisper = new Whisper({
|
|
229
|
+
modelPath: "./models/ggml-small.bin",
|
|
230
|
+
threads: 8,
|
|
231
|
+
outputFormat: "srt",
|
|
232
|
+
audioLang: "es", // Spanish
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
await whisper.transcribe("./audio/spanish.mp3", "./output/spanish.srt");
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Project Structure
|
|
239
|
+
|
|
240
|
+
After using the package, your project structure might look like:
|
|
241
|
+
|
|
242
|
+
```
|
|
243
|
+
project/
|
|
244
|
+
├── models/
|
|
245
|
+
│ └── ggml-small.bin # Downloaded Whisper model
|
|
246
|
+
├── input/
|
|
247
|
+
│ ├── audio1.mp3
|
|
248
|
+
│ └── audio2.mp3
|
|
249
|
+
├── output/
|
|
250
|
+
│ ├── audio1.srt
|
|
251
|
+
│ └── audio2.srt
|
|
252
|
+
├── transcribe.js # Your transcription script
|
|
253
|
+
└── package.json
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Troubleshooting
|
|
257
|
+
|
|
258
|
+
### "Model file not found"
|
|
259
|
+
|
|
260
|
+
Make sure you ran `npx subtitle-generator init` first to download the model.
|
|
261
|
+
|
|
262
|
+
### "Binary not found for your OS"
|
|
263
|
+
|
|
264
|
+
The package only supports Windows, macOS, and Linux. Check that you're on a supported operating system.
|
|
265
|
+
|
|
266
|
+
### Slow transcription
|
|
267
|
+
|
|
268
|
+
- Increase the `threads` option (if your CPU has more cores)
|
|
269
|
+
- Use a smaller model (small instead of large)
|
|
270
|
+
- Ensure your system has adequate RAM available
|
|
271
|
+
|
|
272
|
+
### Out of memory errors
|
|
273
|
+
|
|
274
|
+
- Use a smaller model size
|
|
275
|
+
- Reduce the `threads` count
|
|
276
|
+
- Process files one at a time instead of in parallel
|
|
277
|
+
|
|
278
|
+
## Performance Tips
|
|
279
|
+
|
|
280
|
+
1. **Model Size**: Start with `small` model for speed, upgrade to `medium` or `large` if accuracy isn't sufficient
|
|
281
|
+
2. **Thread Count**: Set to number of CPU cores for optimal performance
|
|
282
|
+
3. **Batch Processing**: Process multiple files sequentially to avoid memory issues
|
|
283
|
+
4. **Audio Quality**: Clear audio produces better results than noisy recordings
|
|
284
|
+
|
|
285
|
+
## License
|
|
286
|
+
|
|
287
|
+
MIT © [CodeArcade](https://github.com/codearcade-io)
|
|
288
|
+
|
|
289
|
+
## Support
|
|
290
|
+
|
|
291
|
+
For issues, questions, or contributions, visit the [GitHub repository](https://github.com/codearcade-io/subtitle-generator).
|
package/cli/index.js
ADDED
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
declare const audioLang: {
|
|
2
|
+
readonly English: "en";
|
|
3
|
+
readonly Spanish: "es";
|
|
4
|
+
readonly French: "fr";
|
|
5
|
+
readonly German: "de";
|
|
6
|
+
readonly Chinese: "zh";
|
|
7
|
+
readonly Japanese: "ja";
|
|
8
|
+
readonly Korean: "ko";
|
|
9
|
+
readonly Russian: "ru";
|
|
10
|
+
readonly Portuguese: "pt";
|
|
11
|
+
readonly Italian: "it";
|
|
12
|
+
readonly Dutch: "nl";
|
|
13
|
+
readonly Arabic: "ar";
|
|
14
|
+
readonly Hindi: "hi";
|
|
15
|
+
readonly Turkish: "tr";
|
|
16
|
+
};
|
|
17
|
+
type AudioLangKey = keyof typeof audioLang;
|
|
18
|
+
interface WhisperOptions {
|
|
19
|
+
modelPath: string;
|
|
20
|
+
threads?: number;
|
|
21
|
+
outputFormat?: "srt" | "txt" | "vtt";
|
|
22
|
+
audioLang?: AudioLangKey;
|
|
23
|
+
srtLang?: AudioLangKey;
|
|
24
|
+
}
|
|
25
|
+
export declare class Whisper {
|
|
26
|
+
private modelPath;
|
|
27
|
+
private threads;
|
|
28
|
+
private outputFormat;
|
|
29
|
+
private audioLang?;
|
|
30
|
+
private srtLang?;
|
|
31
|
+
constructor(options: WhisperOptions);
|
|
32
|
+
/**
|
|
33
|
+
* Transcribe a single audio file
|
|
34
|
+
* @param inputFile - path to input audio
|
|
35
|
+
* @param outputFile - path to output (without extension)
|
|
36
|
+
*/
|
|
37
|
+
transcribe(inputFile: string, outputFile: string): Promise<void>;
|
|
38
|
+
}
|
|
39
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import { spawn } from "child_process";
|
|
3
|
+
import os from "os";
|
|
4
|
+
import path from "path";
|
|
5
|
+
var __dirname2 = process.cwd();
|
|
6
|
+
var isWindows = os.platform() === "win32";
|
|
7
|
+
var isMac = os.platform() === "darwin";
|
|
8
|
+
var binaryName = isWindows ? "whisper-cli.exe" : isMac ? "whisper" : "whisper";
|
|
9
|
+
var audioLang = {
|
|
10
|
+
English: "en",
|
|
11
|
+
Spanish: "es",
|
|
12
|
+
French: "fr",
|
|
13
|
+
German: "de",
|
|
14
|
+
Chinese: "zh",
|
|
15
|
+
Japanese: "ja",
|
|
16
|
+
Korean: "ko",
|
|
17
|
+
Russian: "ru",
|
|
18
|
+
Portuguese: "pt",
|
|
19
|
+
Italian: "it",
|
|
20
|
+
Dutch: "nl",
|
|
21
|
+
Arabic: "ar",
|
|
22
|
+
Hindi: "hi",
|
|
23
|
+
Turkish: "tr"
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
class Whisper {
|
|
27
|
+
modelPath;
|
|
28
|
+
threads;
|
|
29
|
+
outputFormat;
|
|
30
|
+
audioLang;
|
|
31
|
+
srtLang;
|
|
32
|
+
constructor(options) {
|
|
33
|
+
this.modelPath = options.modelPath;
|
|
34
|
+
this.threads = options.threads || 8;
|
|
35
|
+
this.outputFormat = options.outputFormat || "srt";
|
|
36
|
+
this.audioLang = options.audioLang ? audioLang[options.audioLang] : undefined;
|
|
37
|
+
this.srtLang = options.srtLang ? audioLang[options.srtLang] : undefined;
|
|
38
|
+
}
|
|
39
|
+
transcribe(inputFile, outputFile) {
|
|
40
|
+
return new Promise((resolve, reject) => {
|
|
41
|
+
const outputFlag = this.outputFormat === "srt" ? "-osrt" : this.outputFormat === "txt" ? "-otxt" : this.outputFormat === "vtt" ? "-ovtt" : "-osrt";
|
|
42
|
+
const args = [
|
|
43
|
+
"-m",
|
|
44
|
+
this.modelPath,
|
|
45
|
+
"-f",
|
|
46
|
+
inputFile,
|
|
47
|
+
outputFlag,
|
|
48
|
+
"-of",
|
|
49
|
+
outputFile,
|
|
50
|
+
"-t",
|
|
51
|
+
String(this.threads),
|
|
52
|
+
"-bs",
|
|
53
|
+
"5",
|
|
54
|
+
"-bo",
|
|
55
|
+
"1",
|
|
56
|
+
"-ml",
|
|
57
|
+
"30",
|
|
58
|
+
"-sow",
|
|
59
|
+
"false",
|
|
60
|
+
"-nt",
|
|
61
|
+
"-et",
|
|
62
|
+
"2.4",
|
|
63
|
+
"-lpt",
|
|
64
|
+
"-1.0"
|
|
65
|
+
];
|
|
66
|
+
if (this.audioLang) {
|
|
67
|
+
args.push("-l", this.audioLang);
|
|
68
|
+
}
|
|
69
|
+
if (this.srtLang === "en" && this.audioLang !== "en") {
|
|
70
|
+
args.push("-tr");
|
|
71
|
+
}
|
|
72
|
+
const binaryPath = path.join(__dirname2, "whisper", binaryName);
|
|
73
|
+
console.log("Running:", binaryPath, args.join(" "));
|
|
74
|
+
const process2 = spawn(binaryPath, args);
|
|
75
|
+
process2.stdout.on("data", (data) => {
|
|
76
|
+
console.log(`[stdout]: ${data}`);
|
|
77
|
+
});
|
|
78
|
+
process2.stderr.on("data", (data) => {
|
|
79
|
+
console.log(`[stderr]: ${data}`);
|
|
80
|
+
});
|
|
81
|
+
process2.on("close", (code) => {
|
|
82
|
+
if (code !== 0) {
|
|
83
|
+
return reject(new Error(`Process exited with code ${code}`));
|
|
84
|
+
}
|
|
85
|
+
resolve();
|
|
86
|
+
});
|
|
87
|
+
process2.on("error", (err) => {
|
|
88
|
+
reject(err);
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
export {
|
|
94
|
+
Whisper
|
|
95
|
+
};
|
package/init/index.js
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
import inquirer from "inquirer";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import os from "os";
|
|
5
|
+
import { execSync } from "child_process";
|
|
6
|
+
|
|
7
|
+
const init = async () => {
|
|
8
|
+
const { modelSize } = await inquirer.prompt([
|
|
9
|
+
{
|
|
10
|
+
type: "list",
|
|
11
|
+
name: "modelSize",
|
|
12
|
+
message: "Which Whisper model do you want to download?",
|
|
13
|
+
choices: [
|
|
14
|
+
{ name: "Small — fast, lower accuracy", value: "small" },
|
|
15
|
+
{ name: "Medium — balanced", value: "medium" },
|
|
16
|
+
{ name: "Large — slow, highest accuracy", value: "large" },
|
|
17
|
+
],
|
|
18
|
+
default: "small",
|
|
19
|
+
},
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
const modelUrls = {
|
|
23
|
+
small:
|
|
24
|
+
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.bin",
|
|
25
|
+
medium:
|
|
26
|
+
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.bin",
|
|
27
|
+
large:
|
|
28
|
+
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const modelUrl = modelUrls[modelSize];
|
|
32
|
+
|
|
33
|
+
// Helper function to download both models and zip files
|
|
34
|
+
async function downloadFile(url, dest, label) {
|
|
35
|
+
if (fs.existsSync(dest)) {
|
|
36
|
+
console.log(`${label} already exists at ${dest}`);
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
console.log(`Downloading ${label}...`);
|
|
41
|
+
const res = await fetch(url);
|
|
42
|
+
|
|
43
|
+
if (!res.ok) throw new Error(`Failed to download: ${res.statusText}`);
|
|
44
|
+
|
|
45
|
+
const total = Number(res.headers.get("content-length")) || 0;
|
|
46
|
+
let downloaded = 0;
|
|
47
|
+
|
|
48
|
+
const file = fs.createWriteStream(dest);
|
|
49
|
+
|
|
50
|
+
await res.body.pipeTo(
|
|
51
|
+
new WritableStream({
|
|
52
|
+
write(chunk) {
|
|
53
|
+
downloaded += chunk.length;
|
|
54
|
+
if (total) {
|
|
55
|
+
const percent = ((downloaded / total) * 100).toFixed(2);
|
|
56
|
+
process.stdout.write(`Downloading ${label}... ${percent}%\r`);
|
|
57
|
+
} else {
|
|
58
|
+
process.stdout.write(`Downloaded ${downloaded} bytes\r`);
|
|
59
|
+
}
|
|
60
|
+
file.write(chunk);
|
|
61
|
+
},
|
|
62
|
+
close() {
|
|
63
|
+
file.end();
|
|
64
|
+
console.log(`\n${label} download complete.`);
|
|
65
|
+
},
|
|
66
|
+
abort(err) {
|
|
67
|
+
file.destroy(err);
|
|
68
|
+
},
|
|
69
|
+
}),
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// 1. Download the AI Model
|
|
74
|
+
const modelsDir = path.join(process.cwd(), "models");
|
|
75
|
+
if (!fs.existsSync(modelsDir)) {
|
|
76
|
+
fs.mkdirSync(modelsDir, { recursive: true });
|
|
77
|
+
}
|
|
78
|
+
const destModelPath = path.join(modelsDir, path.basename(modelUrl));
|
|
79
|
+
await downloadFile(modelUrl, destModelPath, `${modelSize} model`);
|
|
80
|
+
|
|
81
|
+
// 2. Setup the Binary based on OS
|
|
82
|
+
console.log("\nSetting up whisper.cpp binary...");
|
|
83
|
+
const platform = os.platform();
|
|
84
|
+
const whisperDir = path.join(process.cwd(), "whisper");
|
|
85
|
+
const isWindows = platform === "win32";
|
|
86
|
+
|
|
87
|
+
if (!fs.existsSync(whisperDir)) {
|
|
88
|
+
fs.mkdirSync(whisperDir, { recursive: true });
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
if (isWindows) {
|
|
92
|
+
// Windows Setup
|
|
93
|
+
const binaryDest = path.join(whisperDir, "whisper-cli.exe");
|
|
94
|
+
if (!fs.existsSync(binaryDest)) {
|
|
95
|
+
// Using latest pre-built x64 binaries from ggml-org
|
|
96
|
+
const zipUrl =
|
|
97
|
+
"https://github.com/ggerganov/whisper.cpp/releases/latest/download/whisper-bin-x64.zip";
|
|
98
|
+
const zipPath = path.join(whisperDir, "whisper.zip");
|
|
99
|
+
|
|
100
|
+
await downloadFile(zipUrl, zipPath, "Whisper Windows Binary");
|
|
101
|
+
|
|
102
|
+
console.log("Extracting binary...");
|
|
103
|
+
// Windows 10+ has native tar for unzipping
|
|
104
|
+
execSync(`tar -xf "${zipPath}" -C "${whisperDir}"`);
|
|
105
|
+
|
|
106
|
+
// 1. Move everything out of the "Release" folder (or any other subfolder the zip might use)
|
|
107
|
+
const possibleReleaseDir = path.join(whisperDir, "Release");
|
|
108
|
+
|
|
109
|
+
if (fs.existsSync(possibleReleaseDir)) {
|
|
110
|
+
// Read all files inside the Release folder
|
|
111
|
+
const files = fs.readdirSync(possibleReleaseDir);
|
|
112
|
+
|
|
113
|
+
// Move them up one level into the main whisperDir
|
|
114
|
+
for (const file of files) {
|
|
115
|
+
fs.renameSync(
|
|
116
|
+
path.join(possibleReleaseDir, file),
|
|
117
|
+
path.join(whisperDir, file),
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
// Delete the now-empty Release folder
|
|
121
|
+
fs.rmdirSync(possibleReleaseDir);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// 2. Rename extracted main.exe to match our wrapper expectations
|
|
125
|
+
const extractedMain = path.join(whisperDir, "main.exe");
|
|
126
|
+
if (fs.existsSync(extractedMain)) {
|
|
127
|
+
fs.renameSync(extractedMain, binaryDest);
|
|
128
|
+
} else {
|
|
129
|
+
console.error("Warning: Could not find main.exe after extraction.");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
// 3. Clean up the zip file so it doesn't clutter the folder
|
|
133
|
+
if (fs.existsSync(zipPath)) {
|
|
134
|
+
fs.unlinkSync(zipPath);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
console.log("Whisper binary setup complete for Windows.");
|
|
138
|
+
} else {
|
|
139
|
+
console.log("Whisper binary already exists for Windows.");
|
|
140
|
+
}
|
|
141
|
+
} else {
|
|
142
|
+
// Mac / Linux Setup
|
|
143
|
+
const binaryDest = path.join(whisperDir, "whisper");
|
|
144
|
+
if (!fs.existsSync(binaryDest)) {
|
|
145
|
+
console.log("Mac/Linux detected. Building whisper.cpp from source...");
|
|
146
|
+
console.log(
|
|
147
|
+
"Note: This requires 'git', 'make', and a C/C++ compiler (gcc/clang).",
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
const repoDir = path.join(whisperDir, "source");
|
|
151
|
+
if (!fs.existsSync(repoDir)) {
|
|
152
|
+
// Shallow clone to save time and bandwidth
|
|
153
|
+
execSync(
|
|
154
|
+
`git clone --depth 1 https://github.com/ggerganov/whisper.cpp.git "${repoDir}"`,
|
|
155
|
+
{ stdio: "inherit" },
|
|
156
|
+
);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
console.log("Compiling... This might take a minute.");
|
|
160
|
+
// Compile only the main CLI tool
|
|
161
|
+
execSync(`cd "${repoDir}" && make main`, { stdio: "inherit" });
|
|
162
|
+
|
|
163
|
+
const compiledPath = path.join(repoDir, "main");
|
|
164
|
+
if (fs.existsSync(compiledPath)) {
|
|
165
|
+
fs.copyFileSync(compiledPath, binaryDest);
|
|
166
|
+
execSync(`chmod +x "${binaryDest}"`);
|
|
167
|
+
console.log("Whisper binary compiled and configured successfully.");
|
|
168
|
+
} else {
|
|
169
|
+
console.error(
|
|
170
|
+
"Error: Could not find the compiled binary. Compilation might have failed.",
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
} else {
|
|
174
|
+
console.log("Whisper binary already exists.");
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
export default init;
|
package/package.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@codearcade/subtitle-generator",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"main": "index.js",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"module": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "bun ./builder.ts && bun run generate-types",
|
|
16
|
+
"prepublishOnly": "bun run build",
|
|
17
|
+
"generate-types": "bunx tsc --emitDeclarationOnly --declaration --outDir dist",
|
|
18
|
+
"clear": "rm -rf dist"
|
|
19
|
+
},
|
|
20
|
+
"bin": {
|
|
21
|
+
"subtitle-generator": "./cli/index.js"
|
|
22
|
+
},
|
|
23
|
+
"files": [
|
|
24
|
+
"dist",
|
|
25
|
+
"cli",
|
|
26
|
+
"init"
|
|
27
|
+
],
|
|
28
|
+
"homepage": "https://github.com/codearcade-io/subtitle-generator.git#readme",
|
|
29
|
+
"repository": {
|
|
30
|
+
"type": "git",
|
|
31
|
+
"url": "git+https://github.com/codearcade-io/subtitle-generator.git"
|
|
32
|
+
},
|
|
33
|
+
"bugs": "https://github.com/codearcade-io/subtitle-generator/issues",
|
|
34
|
+
"author": "Abhishek Singh <official.6packprogrammer@gmail.com>",
|
|
35
|
+
"license": "MIT",
|
|
36
|
+
"publishConfig": {
|
|
37
|
+
"access": "public"
|
|
38
|
+
},
|
|
39
|
+
"description": "Generate subtitles from audio files using OpenAI Whisper with support for SRT, VTT, and TXT formats. Automatically downloads required binaries and models, with cross-platform support and configurable performance options.",
|
|
40
|
+
"keywords": [
|
|
41
|
+
"whisper",
|
|
42
|
+
"subtitle",
|
|
43
|
+
"subtitles",
|
|
44
|
+
"speech-to-text",
|
|
45
|
+
"transcription",
|
|
46
|
+
"audio",
|
|
47
|
+
"srt",
|
|
48
|
+
"vtt",
|
|
49
|
+
"nodejs",
|
|
50
|
+
"npm",
|
|
51
|
+
"openai",
|
|
52
|
+
"ai",
|
|
53
|
+
"speech-recognition",
|
|
54
|
+
"cli",
|
|
55
|
+
"cross-platform"
|
|
56
|
+
],
|
|
57
|
+
"devDependencies": {
|
|
58
|
+
"@types/bun": "^1.3.11"
|
|
59
|
+
},
|
|
60
|
+
"dependencies": {
|
|
61
|
+
"inquirer": "^13.3.2"
|
|
62
|
+
}
|
|
63
|
+
}
|