@derogab/stt-proxy 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -21
- package/dist/cjs/index.js +133 -0
- package/dist/cjs/index.js.map +1 -0
- package/dist/cjs/package.json +1 -0
- package/dist/esm/index.js +130 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/types/index.d.ts +10 -0
- package/dist/types/index.d.ts.map +1 -0
- package/package.json +4 -1
- package/.github/workflows/release.yml +0 -131
- package/.github/workflows/tests.yml +0 -42
- package/CLAUDE.md +0 -47
- package/src/index.ts +0 -176
- package/test/index.test.ts +0 -172
- package/test/whisper-cpp.integration.test.ts +0 -135
- package/tsconfig.cjs.json +0 -14
- package/tsconfig.esm.json +0 -14
- package/tsconfig.json +0 -20
- package/tsconfig.types.json +0 -15
- package/vitest.config.ts +0 -13
package/README.md
CHANGED
|
@@ -42,10 +42,10 @@ curl -L -o ggml-base.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/ma
|
|
|
42
42
|
|
|
43
43
|
### `transcribe(audio: string | Buffer, options?): Promise<TranscribeOutput>`
|
|
44
44
|
|
|
45
|
-
Transcribes audio to text using the configured STT provider.
|
|
45
|
+
Transcribes audio to text using the configured STT provider. The package automatically manages provider initialization and cleanup.
|
|
46
46
|
|
|
47
47
|
**Parameters:**
|
|
48
|
-
- `audio`: Path to audio file or audio Buffer
|
|
48
|
+
- `audio`: Path to audio file (string) or audio Buffer
|
|
49
49
|
- `options` (optional): Transcription options
|
|
50
50
|
|
|
51
51
|
**Returns:**
|
|
@@ -66,25 +66,24 @@ type TranscribeOutput = {
|
|
|
66
66
|
};
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
Get HuggingFace download URL for a model.
|
|
69
|
+
**Example:**
|
|
70
|
+
```typescript
|
|
71
|
+
// Transcribe from file path
|
|
72
|
+
const result1 = await transcribe('/path/to/audio.wav');
|
|
73
|
+
console.log(result1.text);
|
|
74
|
+
|
|
75
|
+
// Transcribe from Buffer
|
|
76
|
+
const audioBuffer = fs.readFileSync('/path/to/audio.wav');
|
|
77
|
+
const result2 = await transcribe(audioBuffer);
|
|
78
|
+
console.log(result2.text);
|
|
79
|
+
|
|
80
|
+
// With options
|
|
81
|
+
const result3 = await transcribe('/path/to/audio.wav', {
|
|
82
|
+
language: 'en',
|
|
83
|
+
translate: false
|
|
84
|
+
});
|
|
85
|
+
console.log(result3.text);
|
|
86
|
+
```
|
|
88
87
|
|
|
89
88
|
## Provider Priority
|
|
90
89
|
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.transcribe = transcribe;
|
|
4
|
+
require("dotenv/config");
|
|
5
|
+
const fs = require("fs");
|
|
6
|
+
const path = require("path");
|
|
7
|
+
const os = require("os");
|
|
8
|
+
const child_process_1 = require("child_process");
|
|
9
|
+
let whisperInstance = null;
|
|
10
|
+
let currentModelPath = null;
|
|
11
|
+
function getWhisperModelPath() {
|
|
12
|
+
return process.env['WHISPER_CPP_MODEL_PATH'];
|
|
13
|
+
}
|
|
14
|
+
function isWhisperConfigured() {
|
|
15
|
+
const modelPath = getWhisperModelPath();
|
|
16
|
+
return modelPath !== undefined && fs.existsSync(modelPath);
|
|
17
|
+
}
|
|
18
|
+
async function getWhisperInstance() {
|
|
19
|
+
const modelPath = getWhisperModelPath();
|
|
20
|
+
if (!modelPath) {
|
|
21
|
+
throw new Error('WHISPER_CPP_MODEL_PATH environment variable is not set');
|
|
22
|
+
}
|
|
23
|
+
if (!fs.existsSync(modelPath)) {
|
|
24
|
+
throw new Error(`Whisper model not found at path: ${modelPath}`);
|
|
25
|
+
}
|
|
26
|
+
if (whisperInstance && currentModelPath === modelPath) {
|
|
27
|
+
return whisperInstance;
|
|
28
|
+
}
|
|
29
|
+
if (whisperInstance) {
|
|
30
|
+
await whisperInstance.free();
|
|
31
|
+
whisperInstance = null;
|
|
32
|
+
}
|
|
33
|
+
const { Whisper } = await Promise.resolve().then(() => require('smart-whisper'));
|
|
34
|
+
whisperInstance = new Whisper(modelPath, { gpu: true });
|
|
35
|
+
currentModelPath = modelPath;
|
|
36
|
+
return whisperInstance;
|
|
37
|
+
}
|
|
38
|
+
function audioToPcm(audioPath) {
|
|
39
|
+
const tempDir = os.tmpdir();
|
|
40
|
+
const tempPcmPath = path.join(tempDir, `whisper_${Date.now()}_${Math.random().toString(36).substring(7)}.pcm`);
|
|
41
|
+
try {
|
|
42
|
+
(0, child_process_1.execSync)(`ffmpeg -y -i "${audioPath}" -ar 16000 -ac 1 -f f32le "${tempPcmPath}"`, { stdio: 'pipe' });
|
|
43
|
+
const pcmBuffer = fs.readFileSync(tempPcmPath);
|
|
44
|
+
return new Float32Array(pcmBuffer.buffer, pcmBuffer.byteOffset, pcmBuffer.length / 4);
|
|
45
|
+
}
|
|
46
|
+
finally {
|
|
47
|
+
if (fs.existsSync(tempPcmPath)) {
|
|
48
|
+
fs.unlinkSync(tempPcmPath);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
function cleanTranscription(text) {
|
|
53
|
+
return text
|
|
54
|
+
.replace(/[\x00-\x1F\x7F]/g, '')
|
|
55
|
+
.trim();
|
|
56
|
+
}
|
|
57
|
+
function resultsToText(results) {
|
|
58
|
+
return results.map((r) => r.text).join(' ');
|
|
59
|
+
}
|
|
60
|
+
async function transcribe_whispercpp(audioPath, options = {}) {
|
|
61
|
+
if (!fs.existsSync(audioPath)) {
|
|
62
|
+
throw new Error(`Audio file not found: ${audioPath}`);
|
|
63
|
+
}
|
|
64
|
+
const whisper = await getWhisperInstance();
|
|
65
|
+
const pcmData = audioToPcm(audioPath);
|
|
66
|
+
const transcribeParams = {
|
|
67
|
+
format: 'simple',
|
|
68
|
+
};
|
|
69
|
+
if (options.language !== undefined) {
|
|
70
|
+
transcribeParams.language = options.language;
|
|
71
|
+
}
|
|
72
|
+
if (options.translate !== undefined) {
|
|
73
|
+
transcribeParams.translate = options.translate;
|
|
74
|
+
}
|
|
75
|
+
const task = await whisper.transcribe(pcmData, transcribeParams);
|
|
76
|
+
const results = await task.result;
|
|
77
|
+
const text = resultsToText(results);
|
|
78
|
+
return {
|
|
79
|
+
text: cleanTranscription(text),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
async function transcribe(audio, options = {}) {
|
|
83
|
+
const modelPath = getWhisperModelPath();
|
|
84
|
+
if (modelPath) {
|
|
85
|
+
if (Buffer.isBuffer(audio)) {
|
|
86
|
+
return transcribeBuffer(audio, options);
|
|
87
|
+
}
|
|
88
|
+
return transcribe_whispercpp(audio, options);
|
|
89
|
+
}
|
|
90
|
+
throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
|
|
91
|
+
}
|
|
92
|
+
async function transcribeBuffer(audioBuffer, options = {}) {
|
|
93
|
+
const modelPath = getWhisperModelPath();
|
|
94
|
+
if (!modelPath) {
|
|
95
|
+
throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
|
|
96
|
+
}
|
|
97
|
+
const tempDir = os.tmpdir();
|
|
98
|
+
const tempPath = path.join(tempDir, `whisper_input_${Date.now()}_${Math.random().toString(36).substring(7)}.audio`);
|
|
99
|
+
fs.writeFileSync(tempPath, audioBuffer);
|
|
100
|
+
try {
|
|
101
|
+
const result = await transcribe_whispercpp(tempPath, options);
|
|
102
|
+
return result;
|
|
103
|
+
}
|
|
104
|
+
finally {
|
|
105
|
+
if (fs.existsSync(tempPath)) {
|
|
106
|
+
fs.unlinkSync(tempPath);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
async function freeWhisper() {
|
|
111
|
+
if (whisperInstance) {
|
|
112
|
+
await whisperInstance.free();
|
|
113
|
+
whisperInstance = null;
|
|
114
|
+
currentModelPath = null;
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
// Automatically clean up Whisper instance on process exit
|
|
118
|
+
process.on('exit', () => {
|
|
119
|
+
if (whisperInstance) {
|
|
120
|
+
// Note: Cannot use async operations in 'exit' handler
|
|
121
|
+
// The instance will be cleaned up by the process termination
|
|
122
|
+
whisperInstance = null;
|
|
123
|
+
currentModelPath = null;
|
|
124
|
+
}
|
|
125
|
+
});
|
|
126
|
+
// Handle graceful shutdown signals
|
|
127
|
+
const shutdownHandler = async () => {
|
|
128
|
+
await freeWhisper();
|
|
129
|
+
process.exit(0);
|
|
130
|
+
};
|
|
131
|
+
process.on('SIGINT', shutdownHandler);
|
|
132
|
+
process.on('SIGTERM', shutdownHandler);
|
|
133
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;AAiHA,gCAWC;AA5HD,yBAAuB;AACvB,yBAAyB;AACzB,6BAA6B;AAC7B,yBAAyB;AACzB,iDAAyC;AAYzC,IAAI,eAAe,GAAmB,IAAI,CAAC;AAC3C,IAAI,gBAAgB,GAAkB,IAAI,CAAC;AAE3C,SAAS,mBAAmB;IAC1B,OAAO,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,mBAAmB;IAC1B,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IACxC,OAAO,SAAS,KAAK,SAAS,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;AAC7D,CAAC;AAED,KAAK,UAAU,kBAAkB;IAC/B,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IAExC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,oCAAoC,SAAS,EAAE,CAAC,CAAC;IACnE,CAAC;IAED,IAAI,eAAe,IAAI,gBAAgB,KAAK,SAAS,EAAE,CAAC;QACtD,OAAO,eAAe,CAAC;IACzB,CAAC;IAED,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,IAAI,EAAE,CAAC;QAC7B,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,GAAG,2CAAa,eAAe,EAAC,CAAC;IAClD,eAAe,GAAG,IAAI,OAAO,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,gBAAgB,GAAG,SAAS,CAAC;IAE7B,OAAO,eAAe,CAAC;AACzB,CAAC;AAED,SAAS,UAAU,CAAC,SAAiB;IACnC,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;IAC5B,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAE/G,IAAI,CAAC;QACH,IAAA,wBAAQ,EACN,iBAAiB,SAAS,+BAA+B,WAAW,GAAG,EACvE,EAAE,KAAK,EAAE,MAAM,EAAE,CAClB,CAAC;QAEF,MAAM,SAAS,GAAG,EAAE,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;QAC/C,OAAO,IAAI,YAAY,CAAC,SAAS,CAAC,MAAM,EAAE,SAAS,CAAC,UAAU,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACxF,CAAC;YAAS,CAAC;QACT,IAAI,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/B,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,IAAI;SACR,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;SAC/B,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,aAAa,CAAC,OAAqC;IAC1D,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9C,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,SAAiB,EAAE,UAA6B,EAAE;IACrF,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,yBAAyB,SAAS,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC3C,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;IAEtC,MAAM,gBAAgB,GAAiE;QACrF,MAAM,EAAE,QAAQ;KACjB,CAAC;IAEF,IAAI,OAAO,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnC,gBAAgB,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAC/C,CAAC;IAED,IAAI,OAAO,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;QACpC,gBAAgB,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IACjD,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC;IAClC,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,kBAAkB,CAAC,IAAI,CAAC;KAC/B,CAAC;AACJ,CAAC;AAEM,KAAK,UAAU,UAAU,CAAC,KAAsB,EAAE,UAA6B,EAAE;IACtF,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IAExC,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,gBAAgB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC;QACD,OAAO,qBAAqB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;AAClG,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,WAAmB,EAAE,UAA6B,EAAE;IAClF,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IAExC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;IAClG,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAEpH,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;IAExC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC9D,OAAO,MAAM,CAAC;IAChB,CAAC;YAAS,CAAC;QACT,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,WAAW;IACxB,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,IAAI,EAAE,CAAC;QAC7B,eAAe,GAAG,IAAI,CAAC;QACvB,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,0DAA0D;AAC1D,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;IACtB,IAAI,eAAe,EAAE,CAAC;QACpB,sDAAsD;QACtD,6DAA6D;QAC7D,eAAe,GAAG,IAAI,CAAC;QACvB,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,mCAAmC;AACnC,MAAM,eAAe,GAAG,KAAK,IAAI,EAAE;IACjC,MAAM,WAAW,EAAE,CAAC;IACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC;AAEF,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;AACtC,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"type":"commonjs"}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import 'dotenv/config';
|
|
2
|
+
import * as fs from 'fs';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
import * as os from 'os';
|
|
5
|
+
import { execSync } from 'child_process';
|
|
6
|
+
let whisperInstance = null;
|
|
7
|
+
let currentModelPath = null;
|
|
8
|
+
function getWhisperModelPath() {
|
|
9
|
+
return process.env['WHISPER_CPP_MODEL_PATH'];
|
|
10
|
+
}
|
|
11
|
+
function isWhisperConfigured() {
|
|
12
|
+
const modelPath = getWhisperModelPath();
|
|
13
|
+
return modelPath !== undefined && fs.existsSync(modelPath);
|
|
14
|
+
}
|
|
15
|
+
async function getWhisperInstance() {
|
|
16
|
+
const modelPath = getWhisperModelPath();
|
|
17
|
+
if (!modelPath) {
|
|
18
|
+
throw new Error('WHISPER_CPP_MODEL_PATH environment variable is not set');
|
|
19
|
+
}
|
|
20
|
+
if (!fs.existsSync(modelPath)) {
|
|
21
|
+
throw new Error(`Whisper model not found at path: ${modelPath}`);
|
|
22
|
+
}
|
|
23
|
+
if (whisperInstance && currentModelPath === modelPath) {
|
|
24
|
+
return whisperInstance;
|
|
25
|
+
}
|
|
26
|
+
if (whisperInstance) {
|
|
27
|
+
await whisperInstance.free();
|
|
28
|
+
whisperInstance = null;
|
|
29
|
+
}
|
|
30
|
+
const { Whisper } = await import('smart-whisper');
|
|
31
|
+
whisperInstance = new Whisper(modelPath, { gpu: true });
|
|
32
|
+
currentModelPath = modelPath;
|
|
33
|
+
return whisperInstance;
|
|
34
|
+
}
|
|
35
|
+
function audioToPcm(audioPath) {
|
|
36
|
+
const tempDir = os.tmpdir();
|
|
37
|
+
const tempPcmPath = path.join(tempDir, `whisper_${Date.now()}_${Math.random().toString(36).substring(7)}.pcm`);
|
|
38
|
+
try {
|
|
39
|
+
execSync(`ffmpeg -y -i "${audioPath}" -ar 16000 -ac 1 -f f32le "${tempPcmPath}"`, { stdio: 'pipe' });
|
|
40
|
+
const pcmBuffer = fs.readFileSync(tempPcmPath);
|
|
41
|
+
return new Float32Array(pcmBuffer.buffer, pcmBuffer.byteOffset, pcmBuffer.length / 4);
|
|
42
|
+
}
|
|
43
|
+
finally {
|
|
44
|
+
if (fs.existsSync(tempPcmPath)) {
|
|
45
|
+
fs.unlinkSync(tempPcmPath);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
function cleanTranscription(text) {
|
|
50
|
+
return text
|
|
51
|
+
.replace(/[\x00-\x1F\x7F]/g, '')
|
|
52
|
+
.trim();
|
|
53
|
+
}
|
|
54
|
+
function resultsToText(results) {
|
|
55
|
+
return results.map((r) => r.text).join(' ');
|
|
56
|
+
}
|
|
57
|
+
async function transcribe_whispercpp(audioPath, options = {}) {
|
|
58
|
+
if (!fs.existsSync(audioPath)) {
|
|
59
|
+
throw new Error(`Audio file not found: ${audioPath}`);
|
|
60
|
+
}
|
|
61
|
+
const whisper = await getWhisperInstance();
|
|
62
|
+
const pcmData = audioToPcm(audioPath);
|
|
63
|
+
const transcribeParams = {
|
|
64
|
+
format: 'simple',
|
|
65
|
+
};
|
|
66
|
+
if (options.language !== undefined) {
|
|
67
|
+
transcribeParams.language = options.language;
|
|
68
|
+
}
|
|
69
|
+
if (options.translate !== undefined) {
|
|
70
|
+
transcribeParams.translate = options.translate;
|
|
71
|
+
}
|
|
72
|
+
const task = await whisper.transcribe(pcmData, transcribeParams);
|
|
73
|
+
const results = await task.result;
|
|
74
|
+
const text = resultsToText(results);
|
|
75
|
+
return {
|
|
76
|
+
text: cleanTranscription(text),
|
|
77
|
+
};
|
|
78
|
+
}
|
|
79
|
+
export async function transcribe(audio, options = {}) {
|
|
80
|
+
const modelPath = getWhisperModelPath();
|
|
81
|
+
if (modelPath) {
|
|
82
|
+
if (Buffer.isBuffer(audio)) {
|
|
83
|
+
return transcribeBuffer(audio, options);
|
|
84
|
+
}
|
|
85
|
+
return transcribe_whispercpp(audio, options);
|
|
86
|
+
}
|
|
87
|
+
throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
|
|
88
|
+
}
|
|
89
|
+
async function transcribeBuffer(audioBuffer, options = {}) {
|
|
90
|
+
const modelPath = getWhisperModelPath();
|
|
91
|
+
if (!modelPath) {
|
|
92
|
+
throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
|
|
93
|
+
}
|
|
94
|
+
const tempDir = os.tmpdir();
|
|
95
|
+
const tempPath = path.join(tempDir, `whisper_input_${Date.now()}_${Math.random().toString(36).substring(7)}.audio`);
|
|
96
|
+
fs.writeFileSync(tempPath, audioBuffer);
|
|
97
|
+
try {
|
|
98
|
+
const result = await transcribe_whispercpp(tempPath, options);
|
|
99
|
+
return result;
|
|
100
|
+
}
|
|
101
|
+
finally {
|
|
102
|
+
if (fs.existsSync(tempPath)) {
|
|
103
|
+
fs.unlinkSync(tempPath);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
async function freeWhisper() {
|
|
108
|
+
if (whisperInstance) {
|
|
109
|
+
await whisperInstance.free();
|
|
110
|
+
whisperInstance = null;
|
|
111
|
+
currentModelPath = null;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Automatically clean up Whisper instance on process exit
|
|
115
|
+
process.on('exit', () => {
|
|
116
|
+
if (whisperInstance) {
|
|
117
|
+
// Note: Cannot use async operations in 'exit' handler
|
|
118
|
+
// The instance will be cleaned up by the process termination
|
|
119
|
+
whisperInstance = null;
|
|
120
|
+
currentModelPath = null;
|
|
121
|
+
}
|
|
122
|
+
});
|
|
123
|
+
// Handle graceful shutdown signals
|
|
124
|
+
const shutdownHandler = async () => {
|
|
125
|
+
await freeWhisper();
|
|
126
|
+
process.exit(0);
|
|
127
|
+
};
|
|
128
|
+
process.on('SIGINT', shutdownHandler);
|
|
129
|
+
process.on('SIGTERM', shutdownHandler);
|
|
130
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AAC7B,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AACzB,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAYzC,IAAI,eAAe,GAAmB,IAAI,CAAC;AAC3C,IAAI,gBAAgB,GAAkB,IAAI,CAAC;AAE3C,SAAS,mBAAmB;IAC1B,OAAO,OAAO,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,mBAAmB;IAC1B,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IACxC,OAAO,SAAS,KAAK,SAAS,IAAI,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;AAC7D,CAAC;AAED,KAAK,UAAU,kBAAkB;IAC/B,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IAExC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;IAC5E,CAAC;IAED,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,oCAAoC,SAAS,EAAE,CAAC,CAAC;IACnE,CAAC;IAED,IAAI,eAAe,IAAI,gBAAgB,KAAK,SAAS,EAAE,CAAC;QACtD,OAAO,eAAe,CAAC;IACzB,CAAC;IAED,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,IAAI,EAAE,CAAC;QAC7B,eAAe,GAAG,IAAI,CAAC;IACzB,CAAC;IAED,MAAM,EAAE,OAAO,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;IAClD,eAAe,GAAG,IAAI,OAAO,CAAC,SAAS,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACxD,gBAAgB,GAAG,SAAS,CAAC;IAE7B,OAAO,eAAe,CAAC;AACzB,CAAC;AAED,SAAS,UAAU,CAAC,SAAiB;IACnC,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;IAC5B,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,WAAW,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;IAE/G,IAAI,CAAC;QACH,QAAQ,CACN,iBAAiB,SAAS,+BAA+B,WAAW,GAAG,EACvE,EAAE,KAAK,EAAE,MAAM,EAAE,CAClB,CAAC;QAEF,MAAM,SAAS,GAAG,EAAE,CAAC,YAAY,CAAC,WAAW,CAAC,CAAC;QAC/C,OAAO,IAAI,YAAY,CAAC,SAAS,CAAC,MAAM,EAAE,SAAS,CAAC,UAAU,EAAE,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACxF,CAAC;YAAS,CAAC;QACT,IAAI,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/B,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;AACH,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACtC,OAAO,IAAI;SACR,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;SAC/B,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,aAAa,CAAC,OAAqC;IAC1D,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9C,CAAC;AAED,KAAK,UAAU,qBAAqB,CAAC,SAAiB,EAAE,UAA6B,EAAE;IACrF,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,yBAAyB,SAAS,EAAE,CAAC,CAAC;IACxD,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC3C,MAAM,OAAO,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC;IAEtC,MAAM,gBAAgB,GAAiE;QACrF,MAAM,EAAE,QAAQ;KACjB,CAAC;IAEF,IAAI,OAAO,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QACnC,gBAAgB,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;IAC/C,CAAC;IAED,IAAI,OAAO,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;QACpC,gBAAgB,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IACjD,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC;IAClC,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAEpC,OAAO;QACL,IAAI,EAAE,kBAAkB,CAAC,IAAI,CAAC;KAC/B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,KAAsB,EAAE,UAA6B,EAAE;IACtF,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IAExC,IAAI,SAAS,EAAE,CAAC;QACd,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,gBAAgB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAC1C,CAAC;QACD,OAAO,qBAAqB,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;AAClG,CAAC;AAED,KAAK,UAAU,gBAAgB,CAAC,WAAmB,EAAE,UAA6B,EAAE;IAClF,MAAM,SAAS,GAAG,mBAAmB,EAAE,CAAC;IAExC,IAAI,CAAC,SAAS,EAAE,CAAC;QACf,MAAM,IAAI,KAAK,CAAC,8EAA8E,CAAC,CAAC;IAClG,CAAC;IAED,MAAM,OAAO,GAAG,EAAE,CAAC,MAAM,EAAE,CAAC;IAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,iBAAiB,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAEpH,EAAE,CAAC,aAAa,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;IAExC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,MAAM,qBAAqB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QAC9D,OAAO,MAAM,CAAC;IAChB,CAAC;YAAS,CAAC;QACT,IAAI,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC5B,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,KAAK,UAAU,WAAW;IACxB,IAAI,eAAe,EAAE,CAAC;QACpB,MAAM,eAAe,CAAC,IAAI,EAAE,CAAC;QAC7B,eAAe,GAAG,IAAI,CAAC;QACvB,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;AACH,CAAC;AAED,0DAA0D;AAC1D,OAAO,CAAC,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE;IACtB,IAAI,eAAe,EAAE,CAAC;QACpB,sDAAsD;QACtD,6DAA6D;QAC7D,eAAe,GAAG,IAAI,CAAC;QACvB,gBAAgB,GAAG,IAAI,CAAC;IAC1B,CAAC;AACH,CAAC,CAAC,CAAC;AAEH,mCAAmC;AACnC,MAAM,eAAe,GAAG,KAAK,IAAI,EAAE;IACjC,MAAM,WAAW,EAAE,CAAC;IACpB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC;AAEF,OAAO,CAAC,EAAE,CAAC,QAAQ,EAAE,eAAe,CAAC,CAAC;AACtC,OAAO,CAAC,EAAE,CAAC,SAAS,EAAE,eAAe,CAAC,CAAC"}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import 'dotenv/config';
|
|
2
|
+
export interface TranscribeOptions {
|
|
3
|
+
language?: string;
|
|
4
|
+
translate?: boolean;
|
|
5
|
+
}
|
|
6
|
+
export interface TranscribeOutput {
|
|
7
|
+
text: string;
|
|
8
|
+
}
|
|
9
|
+
export declare function transcribe(audio: string | Buffer, options?: TranscribeOptions): Promise<TranscribeOutput>;
|
|
10
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,CAAC;AAOvB,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,OAAO,CAAC;CACrB;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAmGD,wBAAsB,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,MAAM,EAAE,OAAO,GAAE,iBAAsB,GAAG,OAAO,CAAC,gBAAgB,CAAC,CAWnH"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@derogab/stt-proxy",
|
|
3
|
-
"version": "0.1
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "A simple and lightweight proxy for seamless integration with multiple STT (Speech-to-Text) providers including Whisper.cpp",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/cjs/index.js",
|
|
@@ -18,6 +18,9 @@
|
|
|
18
18
|
}
|
|
19
19
|
}
|
|
20
20
|
},
|
|
21
|
+
"files": [
|
|
22
|
+
"dist"
|
|
23
|
+
],
|
|
21
24
|
"scripts": {
|
|
22
25
|
"build": "npm run build:cjs && npm run build:esm && npm run build:types",
|
|
23
26
|
"build:cjs": "tsc -p tsconfig.cjs.json && echo '{\"type\":\"commonjs\"}' > dist/cjs/package.json",
|
|
@@ -1,131 +0,0 @@
|
|
|
1
|
-
name: Release and publish package to NPM
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
# Publish `v1.2.3` tags as releases.
|
|
6
|
-
tags:
|
|
7
|
-
- v*
|
|
8
|
-
|
|
9
|
-
jobs:
|
|
10
|
-
# Release the TAG to GitHub.
|
|
11
|
-
release:
|
|
12
|
-
name: Release pushed tag
|
|
13
|
-
if: startsWith(github.ref, 'refs/tags/')
|
|
14
|
-
permissions:
|
|
15
|
-
contents: write
|
|
16
|
-
runs-on: ubuntu-latest
|
|
17
|
-
steps:
|
|
18
|
-
- name: Create release
|
|
19
|
-
env:
|
|
20
|
-
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
21
|
-
tag: ${{ github.ref_name }}
|
|
22
|
-
run: |
|
|
23
|
-
gh release create "$tag" \
|
|
24
|
-
--repo="$GITHUB_REPOSITORY" \
|
|
25
|
-
--title="v${tag#v}" \
|
|
26
|
-
--generate-notes
|
|
27
|
-
# Publish the package.
|
|
28
|
-
publish-npm:
|
|
29
|
-
name: Publish Package on NPM
|
|
30
|
-
needs: release
|
|
31
|
-
runs-on: ubuntu-latest
|
|
32
|
-
permissions:
|
|
33
|
-
contents: read
|
|
34
|
-
id-token: write
|
|
35
|
-
steps:
|
|
36
|
-
- name: Checkout
|
|
37
|
-
uses: actions/checkout@v6
|
|
38
|
-
- name: Setup Node
|
|
39
|
-
uses: actions/setup-node@v6
|
|
40
|
-
with:
|
|
41
|
-
node-version: '20.x'
|
|
42
|
-
cache: 'npm'
|
|
43
|
-
registry-url: 'https://registry.npmjs.org'
|
|
44
|
-
- name: Install FFmpeg
|
|
45
|
-
run: sudo apt-get update && sudo apt-get install -y ffmpeg
|
|
46
|
-
- name: Cache Whisper model
|
|
47
|
-
uses: actions/cache@v4
|
|
48
|
-
with:
|
|
49
|
-
path: test/models
|
|
50
|
-
key: whisper-model-tiny-v1
|
|
51
|
-
- name: Cache test audio
|
|
52
|
-
uses: actions/cache@v4
|
|
53
|
-
with:
|
|
54
|
-
path: test/audio
|
|
55
|
-
key: test-audio-jfk-v1
|
|
56
|
-
- name: Install dependencies (clean)
|
|
57
|
-
run: npm ci
|
|
58
|
-
- name: Type check
|
|
59
|
-
run: npm run typecheck
|
|
60
|
-
- name: Run tests
|
|
61
|
-
run: npm test --if-present
|
|
62
|
-
- name: Build
|
|
63
|
-
run: npm run build
|
|
64
|
-
- name: Verify tag matches package.json version
|
|
65
|
-
run: |
|
|
66
|
-
PKG_VERSION="$(node -p "require('./package.json').version")"
|
|
67
|
-
TAG_VERSION="${GITHUB_REF_NAME#v}" # supports tags like v1.2.3
|
|
68
|
-
echo "package.json: $PKG_VERSION"
|
|
69
|
-
echo "release tag: $TAG_VERSION"
|
|
70
|
-
if [ "$PKG_VERSION" != "$TAG_VERSION" ]; then
|
|
71
|
-
echo "Release tag ($TAG_VERSION) does not match package.json version ($PKG_VERSION)."
|
|
72
|
-
exit 1
|
|
73
|
-
fi
|
|
74
|
-
- name: Show publish contents (dry run)
|
|
75
|
-
run: npm pack --dry-run
|
|
76
|
-
- name: Publish to npm (with provenance)
|
|
77
|
-
env:
|
|
78
|
-
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
79
|
-
run: npm publish --provenance --access public
|
|
80
|
-
publish-github:
|
|
81
|
-
name: Publish Package on GitHub
|
|
82
|
-
needs: release
|
|
83
|
-
runs-on: ubuntu-latest
|
|
84
|
-
permissions:
|
|
85
|
-
contents: read
|
|
86
|
-
id-token: write
|
|
87
|
-
steps:
|
|
88
|
-
- name: Checkout
|
|
89
|
-
uses: actions/checkout@v6
|
|
90
|
-
- name: Setup Node
|
|
91
|
-
uses: actions/setup-node@v6
|
|
92
|
-
with:
|
|
93
|
-
node-version: '20.x'
|
|
94
|
-
cache: 'npm'
|
|
95
|
-
registry-url: 'https://npm.pkg.github.com'
|
|
96
|
-
- name: Install FFmpeg
|
|
97
|
-
run: sudo apt-get update && sudo apt-get install -y ffmpeg
|
|
98
|
-
- name: Cache Whisper model
|
|
99
|
-
uses: actions/cache@v4
|
|
100
|
-
with:
|
|
101
|
-
path: test/models
|
|
102
|
-
key: whisper-model-tiny-v1
|
|
103
|
-
- name: Cache test audio
|
|
104
|
-
uses: actions/cache@v4
|
|
105
|
-
with:
|
|
106
|
-
path: test/audio
|
|
107
|
-
key: test-audio-jfk-v1
|
|
108
|
-
- name: Install dependencies (clean)
|
|
109
|
-
run: npm ci
|
|
110
|
-
- name: Type check
|
|
111
|
-
run: npm run typecheck
|
|
112
|
-
- name: Run tests
|
|
113
|
-
run: npm test --if-present
|
|
114
|
-
- name: Build
|
|
115
|
-
run: npm run build
|
|
116
|
-
- name: Verify tag matches package.json version
|
|
117
|
-
run: |
|
|
118
|
-
PKG_VERSION="$(node -p "require('./package.json').version")"
|
|
119
|
-
TAG_VERSION="${GITHUB_REF_NAME#v}" # supports tags like v1.2.3
|
|
120
|
-
echo "package.json: $PKG_VERSION"
|
|
121
|
-
echo "release tag: $TAG_VERSION"
|
|
122
|
-
if [ "$PKG_VERSION" != "$TAG_VERSION" ]; then
|
|
123
|
-
echo "Release tag ($TAG_VERSION) does not match package.json version ($PKG_VERSION)."
|
|
124
|
-
exit 1
|
|
125
|
-
fi
|
|
126
|
-
- name: Show publish contents (dry run)
|
|
127
|
-
run: npm pack --dry-run
|
|
128
|
-
- name: Publish to GitHub Packages (with provenance)
|
|
129
|
-
env:
|
|
130
|
-
NODE_AUTH_TOKEN: ${{ secrets.NPM_GITHUB_TOKEN }}
|
|
131
|
-
run: npm publish --provenance --access public
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
name: Tests
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
branches:
|
|
6
|
-
- master
|
|
7
|
-
pull_request:
|
|
8
|
-
branches:
|
|
9
|
-
- master
|
|
10
|
-
|
|
11
|
-
jobs:
|
|
12
|
-
tests:
|
|
13
|
-
name: Run tests
|
|
14
|
-
runs-on: ubuntu-latest
|
|
15
|
-
steps:
|
|
16
|
-
- name: Checkout
|
|
17
|
-
uses: actions/checkout@v6
|
|
18
|
-
- name: Setup Node
|
|
19
|
-
uses: actions/setup-node@v6
|
|
20
|
-
with:
|
|
21
|
-
node-version: '20.x'
|
|
22
|
-
cache: 'npm'
|
|
23
|
-
- name: Install FFmpeg
|
|
24
|
-
run: sudo apt-get update && sudo apt-get install -y ffmpeg
|
|
25
|
-
- name: Cache Whisper model
|
|
26
|
-
uses: actions/cache@v4
|
|
27
|
-
with:
|
|
28
|
-
path: test/models
|
|
29
|
-
key: whisper-model-tiny-v1
|
|
30
|
-
- name: Cache test audio
|
|
31
|
-
uses: actions/cache@v4
|
|
32
|
-
with:
|
|
33
|
-
path: test/audio
|
|
34
|
-
key: test-audio-jfk-v1
|
|
35
|
-
- name: Install dependencies
|
|
36
|
-
run: npm ci
|
|
37
|
-
- name: Type check
|
|
38
|
-
run: npm run typecheck
|
|
39
|
-
- name: Build project
|
|
40
|
-
run: npm run build
|
|
41
|
-
- name: Run all tests
|
|
42
|
-
run: npm test
|
package/CLAUDE.md
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
# CLAUDE.md
|
|
2
|
-
|
|
3
|
-
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
-
|
|
5
|
-
## Build Commands
|
|
6
|
-
|
|
7
|
-
```bash
|
|
8
|
-
npm install # Install dependencies
|
|
9
|
-
npm run build # Build all outputs (CJS, ESM, and types)
|
|
10
|
-
npm run build:cjs # Build CommonJS output only
|
|
11
|
-
npm run build:esm # Build ESM output only
|
|
12
|
-
npm run build:types # Build type declarations only
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
## Test Commands
|
|
16
|
-
|
|
17
|
-
```bash
|
|
18
|
-
npm test # Run all tests (unit + integration)
|
|
19
|
-
npm run test:unit # Run unit tests only
|
|
20
|
-
npm run test:whisper # Run Whisper.cpp integration tests only
|
|
21
|
-
npm run test:watch # Run tests in watch mode
|
|
22
|
-
npm run test:coverage # Run tests with coverage report
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
**Important**: Always run `npm test` after making changes to verify nothing is broken. Tests are located in the `test/` folder.
|
|
26
|
-
|
|
27
|
-
Tests are written using Vitest and cover:
|
|
28
|
-
- Provider selection logic (Whisper.cpp priority)
|
|
29
|
-
- Error handling for all providers
|
|
30
|
-
- Audio transcription functionality
|
|
31
|
-
- API request formatting
|
|
32
|
-
|
|
33
|
-
## Architecture
|
|
34
|
-
|
|
35
|
-
This is a TypeScript npm package (`@derogab/stt-proxy`) that provides a unified interface for multiple STT providers. The entire implementation is in a single file: `src/index.ts`.
|
|
36
|
-
|
|
37
|
-
### Provider Selection
|
|
38
|
-
|
|
39
|
-
The `transcribe()` function automatically selects a provider based on environment variables in this priority order:
|
|
40
|
-
1. **Whisper.cpp** - if `WHISPER_CPP_MODEL_PATH` is set
|
|
41
|
-
|
|
42
|
-
### Build Output
|
|
43
|
-
|
|
44
|
-
The package builds to three output formats:
|
|
45
|
-
- `dist/cjs/` - CommonJS (for `require()`)
|
|
46
|
-
- `dist/esm/` - ES Modules (for `import`)
|
|
47
|
-
- `dist/types/` - TypeScript declarations
|
package/src/index.ts
DELETED
|
@@ -1,176 +0,0 @@
|
|
|
1
|
-
import 'dotenv/config';
|
|
2
|
-
import * as fs from 'fs';
|
|
3
|
-
import * as path from 'path';
|
|
4
|
-
import * as os from 'os';
|
|
5
|
-
import { execSync } from 'child_process';
|
|
6
|
-
import type { Whisper, TranscribeResult } from 'smart-whisper';
|
|
7
|
-
|
|
8
|
-
export interface TranscribeOptions {
|
|
9
|
-
language?: string;
|
|
10
|
-
translate?: boolean;
|
|
11
|
-
}
|
|
12
|
-
|
|
13
|
-
export interface TranscribeOutput {
|
|
14
|
-
text: string;
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
let whisperInstance: Whisper | null = null;
|
|
18
|
-
let currentModelPath: string | null = null;
|
|
19
|
-
|
|
20
|
-
function getWhisperModelPath(): string | undefined {
|
|
21
|
-
return process.env['WHISPER_CPP_MODEL_PATH'];
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export function isWhisperConfigured(): boolean {
|
|
25
|
-
const modelPath = getWhisperModelPath();
|
|
26
|
-
return modelPath !== undefined && fs.existsSync(modelPath);
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
async function getWhisperInstance(): Promise<Whisper> {
|
|
30
|
-
const modelPath = getWhisperModelPath();
|
|
31
|
-
|
|
32
|
-
if (!modelPath) {
|
|
33
|
-
throw new Error('WHISPER_CPP_MODEL_PATH environment variable is not set');
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
if (!fs.existsSync(modelPath)) {
|
|
37
|
-
throw new Error(`Whisper model not found at path: ${modelPath}`);
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
if (whisperInstance && currentModelPath === modelPath) {
|
|
41
|
-
return whisperInstance;
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
if (whisperInstance) {
|
|
45
|
-
await whisperInstance.free();
|
|
46
|
-
whisperInstance = null;
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
const { Whisper } = await import('smart-whisper');
|
|
50
|
-
whisperInstance = new Whisper(modelPath, { gpu: true });
|
|
51
|
-
currentModelPath = modelPath;
|
|
52
|
-
|
|
53
|
-
return whisperInstance;
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
function audioToPcm(audioPath: string): Float32Array {
|
|
57
|
-
const tempDir = os.tmpdir();
|
|
58
|
-
const tempPcmPath = path.join(tempDir, `whisper_${Date.now()}_${Math.random().toString(36).substring(7)}.pcm`);
|
|
59
|
-
|
|
60
|
-
try {
|
|
61
|
-
execSync(
|
|
62
|
-
`ffmpeg -y -i "${audioPath}" -ar 16000 -ac 1 -f f32le "${tempPcmPath}"`,
|
|
63
|
-
{ stdio: 'pipe' }
|
|
64
|
-
);
|
|
65
|
-
|
|
66
|
-
const pcmBuffer = fs.readFileSync(tempPcmPath);
|
|
67
|
-
return new Float32Array(pcmBuffer.buffer, pcmBuffer.byteOffset, pcmBuffer.length / 4);
|
|
68
|
-
} finally {
|
|
69
|
-
if (fs.existsSync(tempPcmPath)) {
|
|
70
|
-
fs.unlinkSync(tempPcmPath);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
function cleanTranscription(text: string): string {
|
|
76
|
-
return text
|
|
77
|
-
.replace(/[\x00-\x1F\x7F]/g, '')
|
|
78
|
-
.trim();
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
function resultsToText(results: TranscribeResult<'simple'>[]): string {
|
|
82
|
-
return results.map((r) => r.text).join(' ');
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
async function transcribe_whispercpp(audioPath: string, options: TranscribeOptions = {}): Promise<TranscribeOutput> {
|
|
86
|
-
if (!fs.existsSync(audioPath)) {
|
|
87
|
-
throw new Error(`Audio file not found: ${audioPath}`);
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
const whisper = await getWhisperInstance();
|
|
91
|
-
const pcmData = audioToPcm(audioPath);
|
|
92
|
-
|
|
93
|
-
const transcribeParams: { language?: string; translate?: boolean; format: 'simple' } = {
|
|
94
|
-
format: 'simple',
|
|
95
|
-
};
|
|
96
|
-
|
|
97
|
-
if (options.language !== undefined) {
|
|
98
|
-
transcribeParams.language = options.language;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
if (options.translate !== undefined) {
|
|
102
|
-
transcribeParams.translate = options.translate;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
const task = await whisper.transcribe(pcmData, transcribeParams);
|
|
106
|
-
const results = await task.result;
|
|
107
|
-
const text = resultsToText(results);
|
|
108
|
-
|
|
109
|
-
return {
|
|
110
|
-
text: cleanTranscription(text),
|
|
111
|
-
};
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
export async function transcribe(audio: string | Buffer, options: TranscribeOptions = {}): Promise<TranscribeOutput> {
|
|
115
|
-
const modelPath = getWhisperModelPath();
|
|
116
|
-
|
|
117
|
-
if (modelPath) {
|
|
118
|
-
if (Buffer.isBuffer(audio)) {
|
|
119
|
-
return transcribeBuffer(audio, options);
|
|
120
|
-
}
|
|
121
|
-
return transcribe_whispercpp(audio, options);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
export async function transcribeBuffer(audioBuffer: Buffer, options: TranscribeOptions = {}): Promise<TranscribeOutput> {
|
|
128
|
-
const modelPath = getWhisperModelPath();
|
|
129
|
-
|
|
130
|
-
if (!modelPath) {
|
|
131
|
-
throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
const tempDir = os.tmpdir();
|
|
135
|
-
const tempPath = path.join(tempDir, `whisper_input_${Date.now()}_${Math.random().toString(36).substring(7)}.audio`);
|
|
136
|
-
|
|
137
|
-
fs.writeFileSync(tempPath, audioBuffer);
|
|
138
|
-
|
|
139
|
-
try {
|
|
140
|
-
const result = await transcribe_whispercpp(tempPath, options);
|
|
141
|
-
return result;
|
|
142
|
-
} finally {
|
|
143
|
-
if (fs.existsSync(tempPath)) {
|
|
144
|
-
fs.unlinkSync(tempPath);
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
export async function freeWhisper(): Promise<void> {
|
|
150
|
-
if (whisperInstance) {
|
|
151
|
-
await whisperInstance.free();
|
|
152
|
-
whisperInstance = null;
|
|
153
|
-
currentModelPath = null;
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
export function getAvailableModels(): string[] {
|
|
158
|
-
return [
|
|
159
|
-
'tiny',
|
|
160
|
-
'tiny.en',
|
|
161
|
-
'base',
|
|
162
|
-
'base.en',
|
|
163
|
-
'small',
|
|
164
|
-
'small.en',
|
|
165
|
-
'medium',
|
|
166
|
-
'medium.en',
|
|
167
|
-
'large',
|
|
168
|
-
'large-v2',
|
|
169
|
-
'large-v3',
|
|
170
|
-
'large-v3-turbo',
|
|
171
|
-
];
|
|
172
|
-
}
|
|
173
|
-
|
|
174
|
-
export function getModelUrl(model: string): string {
|
|
175
|
-
return `https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${model}.bin`;
|
|
176
|
-
}
|
package/test/index.test.ts
DELETED
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
2
|
-
import * as fs from 'fs';
|
|
3
|
-
|
|
4
|
-
vi.mock('fs', async () => {
|
|
5
|
-
const actual = await vi.importActual<typeof import('fs')>('fs');
|
|
6
|
-
return {
|
|
7
|
-
...actual,
|
|
8
|
-
existsSync: vi.fn(),
|
|
9
|
-
readFileSync: vi.fn(),
|
|
10
|
-
writeFileSync: vi.fn(),
|
|
11
|
-
unlinkSync: vi.fn(),
|
|
12
|
-
};
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
vi.mock('child_process', () => ({
|
|
16
|
-
execSync: vi.fn(),
|
|
17
|
-
}));
|
|
18
|
-
|
|
19
|
-
vi.mock('smart-whisper', () => ({
|
|
20
|
-
Whisper: vi.fn().mockImplementation(() => ({
|
|
21
|
-
transcribe: vi.fn().mockResolvedValue({
|
|
22
|
-
result: Promise.resolve([{ text: 'Hello, world!', from: 0, to: 1000 }]),
|
|
23
|
-
}),
|
|
24
|
-
free: vi.fn().mockResolvedValue(undefined),
|
|
25
|
-
})),
|
|
26
|
-
}));
|
|
27
|
-
|
|
28
|
-
describe('stt-proxy', () => {
|
|
29
|
-
const originalEnv = process.env;
|
|
30
|
-
|
|
31
|
-
beforeEach(() => {
|
|
32
|
-
vi.clearAllMocks();
|
|
33
|
-
process.env = { ...originalEnv };
|
|
34
|
-
delete process.env['WHISPER_CPP_MODEL_PATH'];
|
|
35
|
-
});
|
|
36
|
-
|
|
37
|
-
afterEach(() => {
|
|
38
|
-
process.env = originalEnv;
|
|
39
|
-
vi.resetModules();
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
describe('isWhisperConfigured', () => {
|
|
43
|
-
it('should return false when WHISPER_CPP_MODEL_PATH is not set', async () => {
|
|
44
|
-
const { isWhisperConfigured } = await import('../src/index.js');
|
|
45
|
-
expect(isWhisperConfigured()).toBe(false);
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it('should return false when WHISPER_CPP_MODEL_PATH is set but file does not exist', async () => {
|
|
49
|
-
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
50
|
-
vi.mocked(fs.existsSync).mockReturnValue(false);
|
|
51
|
-
const { isWhisperConfigured } = await import('../src/index.js');
|
|
52
|
-
expect(isWhisperConfigured()).toBe(false);
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
it('should return true when WHISPER_CPP_MODEL_PATH is set and file exists', async () => {
|
|
56
|
-
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
57
|
-
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
58
|
-
const { isWhisperConfigured } = await import('../src/index.js');
|
|
59
|
-
expect(isWhisperConfigured()).toBe(true);
|
|
60
|
-
});
|
|
61
|
-
});
|
|
62
|
-
|
|
63
|
-
describe('transcribe', () => {
|
|
64
|
-
it('should throw error when no provider is configured', async () => {
|
|
65
|
-
const { transcribe } = await import('../src/index.js');
|
|
66
|
-
await expect(transcribe('/path/to/audio.wav')).rejects.toThrow(
|
|
67
|
-
'No STT provider configured'
|
|
68
|
-
);
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
it('should throw error when audio file does not exist', async () => {
|
|
72
|
-
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
73
|
-
vi.mocked(fs.existsSync).mockImplementation((path) => {
|
|
74
|
-
if (path === '/path/to/model.bin') return true;
|
|
75
|
-
return false;
|
|
76
|
-
});
|
|
77
|
-
const { transcribe } = await import('../src/index.js');
|
|
78
|
-
await expect(transcribe('/path/to/audio.wav')).rejects.toThrow(
|
|
79
|
-
'Audio file not found'
|
|
80
|
-
);
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
it('should throw error when model file does not exist', async () => {
|
|
84
|
-
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
85
|
-
vi.mocked(fs.existsSync).mockImplementation((path) => {
|
|
86
|
-
if (path === '/path/to/audio.wav') return true;
|
|
87
|
-
return false;
|
|
88
|
-
});
|
|
89
|
-
const { transcribe } = await import('../src/index.js');
|
|
90
|
-
await expect(transcribe('/path/to/audio.wav')).rejects.toThrow(
|
|
91
|
-
'Whisper model not found at path'
|
|
92
|
-
);
|
|
93
|
-
});
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
describe('getAvailableModels', () => {
|
|
97
|
-
it('should return list of available models', async () => {
|
|
98
|
-
const { getAvailableModels } = await import('../src/index.js');
|
|
99
|
-
const models = getAvailableModels();
|
|
100
|
-
expect(models).toContain('tiny');
|
|
101
|
-
expect(models).toContain('base');
|
|
102
|
-
expect(models).toContain('small');
|
|
103
|
-
expect(models).toContain('medium');
|
|
104
|
-
expect(models).toContain('large');
|
|
105
|
-
expect(models).toContain('large-v3-turbo');
|
|
106
|
-
expect(models.length).toBe(12);
|
|
107
|
-
});
|
|
108
|
-
});
|
|
109
|
-
|
|
110
|
-
describe('getModelUrl', () => {
|
|
111
|
-
it('should return correct HuggingFace URL for model', async () => {
|
|
112
|
-
const { getModelUrl } = await import('../src/index.js');
|
|
113
|
-
const url = getModelUrl('base');
|
|
114
|
-
expect(url).toBe('https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin');
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
it('should return correct URL for large-v3-turbo model', async () => {
|
|
118
|
-
const { getModelUrl } = await import('../src/index.js');
|
|
119
|
-
const url = getModelUrl('large-v3-turbo');
|
|
120
|
-
expect(url).toBe('https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin');
|
|
121
|
-
});
|
|
122
|
-
});
|
|
123
|
-
|
|
124
|
-
describe('freeWhisper', () => {
|
|
125
|
-
it('should not throw when called without active instance', async () => {
|
|
126
|
-
const { freeWhisper } = await import('../src/index.js');
|
|
127
|
-
await expect(freeWhisper()).resolves.not.toThrow();
|
|
128
|
-
});
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
describe('transcribeBuffer', () => {
|
|
132
|
-
it('should throw error when no provider is configured', async () => {
|
|
133
|
-
const { transcribeBuffer } = await import('../src/index.js');
|
|
134
|
-
const buffer = Buffer.from('test');
|
|
135
|
-
await expect(transcribeBuffer(buffer)).rejects.toThrow(
|
|
136
|
-
'No STT provider configured'
|
|
137
|
-
);
|
|
138
|
-
});
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
describe('type exports', () => {
|
|
142
|
-
it('should export transcribe function', async () => {
|
|
143
|
-
const module = await import('../src/index.js');
|
|
144
|
-
expect(typeof module.transcribe).toBe('function');
|
|
145
|
-
});
|
|
146
|
-
|
|
147
|
-
it('should export transcribeBuffer function', async () => {
|
|
148
|
-
const module = await import('../src/index.js');
|
|
149
|
-
expect(typeof module.transcribeBuffer).toBe('function');
|
|
150
|
-
});
|
|
151
|
-
|
|
152
|
-
it('should export isWhisperConfigured function', async () => {
|
|
153
|
-
const module = await import('../src/index.js');
|
|
154
|
-
expect(typeof module.isWhisperConfigured).toBe('function');
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
it('should export freeWhisper function', async () => {
|
|
158
|
-
const module = await import('../src/index.js');
|
|
159
|
-
expect(typeof module.freeWhisper).toBe('function');
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
it('should export getAvailableModels function', async () => {
|
|
163
|
-
const module = await import('../src/index.js');
|
|
164
|
-
expect(typeof module.getAvailableModels).toBe('function');
|
|
165
|
-
});
|
|
166
|
-
|
|
167
|
-
it('should export getModelUrl function', async () => {
|
|
168
|
-
const module = await import('../src/index.js');
|
|
169
|
-
expect(typeof module.getModelUrl).toBe('function');
|
|
170
|
-
});
|
|
171
|
-
});
|
|
172
|
-
});
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
|
2
|
-
import * as fs from 'fs';
|
|
3
|
-
import * as path from 'path';
|
|
4
|
-
import * as https from 'https';
|
|
5
|
-
import * as http from 'http';
|
|
6
|
-
|
|
7
|
-
const __dirname = path.dirname(new URL(import.meta.url).pathname);
|
|
8
|
-
|
|
9
|
-
const TEST_MODEL_DIR = path.join(__dirname, 'models');
|
|
10
|
-
const TEST_AUDIO_DIR = path.join(__dirname, 'audio');
|
|
11
|
-
const MODEL_NAME = 'ggml-tiny.bin';
|
|
12
|
-
const MODEL_PATH = path.join(TEST_MODEL_DIR, MODEL_NAME);
|
|
13
|
-
const AUDIO_FILE = path.join(TEST_AUDIO_DIR, 'jfk.wav');
|
|
14
|
-
|
|
15
|
-
const MODEL_URL = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin';
|
|
16
|
-
const JFK_AUDIO_URL = 'https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav';
|
|
17
|
-
|
|
18
|
-
async function downloadFile(url: string, destPath: string, maxRedirects = 10): Promise<void> {
|
|
19
|
-
return new Promise((resolve, reject) => {
|
|
20
|
-
if (maxRedirects <= 0) {
|
|
21
|
-
return reject(new Error('Too many redirects'));
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
const dir = path.dirname(destPath);
|
|
25
|
-
if (!fs.existsSync(dir)) {
|
|
26
|
-
fs.mkdirSync(dir, { recursive: true });
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const protocol = url.startsWith('https') ? https : http;
|
|
30
|
-
|
|
31
|
-
protocol.get(url, (response) => {
|
|
32
|
-
if (response.statusCode && response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
|
|
33
|
-
let redirectUrl = response.headers.location;
|
|
34
|
-
if (redirectUrl.startsWith('/')) {
|
|
35
|
-
const urlObj = new URL(url);
|
|
36
|
-
redirectUrl = `${urlObj.protocol}//${urlObj.host}${redirectUrl}`;
|
|
37
|
-
}
|
|
38
|
-
downloadFile(redirectUrl, destPath, maxRedirects - 1).then(resolve).catch(reject);
|
|
39
|
-
return;
|
|
40
|
-
} else if (response.statusCode === 200) {
|
|
41
|
-
const file = fs.createWriteStream(destPath);
|
|
42
|
-
response.pipe(file);
|
|
43
|
-
file.on('finish', () => {
|
|
44
|
-
file.close();
|
|
45
|
-
resolve();
|
|
46
|
-
});
|
|
47
|
-
file.on('error', (err) => {
|
|
48
|
-
fs.unlinkSync(destPath);
|
|
49
|
-
reject(err);
|
|
50
|
-
});
|
|
51
|
-
} else {
|
|
52
|
-
reject(new Error(`HTTP ${response.statusCode}`));
|
|
53
|
-
}
|
|
54
|
-
}).on('error', reject);
|
|
55
|
-
});
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
function normalizeTranscription(text: string): string {
|
|
59
|
-
return text.toLowerCase().replace(/[.,!?]/g, '').trim();
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
describe('whisper.cpp integration tests', () => {
|
|
63
|
-
let transcribe: typeof import('../src/index.js').transcribe;
|
|
64
|
-
let transcribeBuffer: typeof import('../src/index.js').transcribeBuffer;
|
|
65
|
-
let isWhisperConfigured: typeof import('../src/index.js').isWhisperConfigured;
|
|
66
|
-
let freeWhisper: typeof import('../src/index.js').freeWhisper;
|
|
67
|
-
|
|
68
|
-
beforeAll(async () => {
|
|
69
|
-
// Download model if needed
|
|
70
|
-
if (!fs.existsSync(MODEL_PATH) || fs.statSync(MODEL_PATH).size === 0) {
|
|
71
|
-
if (fs.existsSync(MODEL_PATH)) fs.unlinkSync(MODEL_PATH);
|
|
72
|
-
console.log(`Downloading Whisper tiny model to ${MODEL_PATH}...`);
|
|
73
|
-
console.log('This may take a few minutes on first run.');
|
|
74
|
-
await downloadFile(MODEL_URL, MODEL_PATH);
|
|
75
|
-
console.log('Model downloaded successfully.');
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// Download audio if needed
|
|
79
|
-
if (!fs.existsSync(AUDIO_FILE) || fs.statSync(AUDIO_FILE).size === 0) {
|
|
80
|
-
if (fs.existsSync(AUDIO_FILE)) fs.unlinkSync(AUDIO_FILE);
|
|
81
|
-
console.log(`Downloading JFK test audio to ${AUDIO_FILE}...`);
|
|
82
|
-
await downloadFile(JFK_AUDIO_URL, AUDIO_FILE);
|
|
83
|
-
console.log('Audio downloaded successfully.');
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// Set model path
|
|
87
|
-
process.env['WHISPER_CPP_MODEL_PATH'] = MODEL_PATH;
|
|
88
|
-
|
|
89
|
-
// Import module
|
|
90
|
-
const stt = await import('../src/index.js');
|
|
91
|
-
transcribe = stt.transcribe;
|
|
92
|
-
transcribeBuffer = stt.transcribeBuffer;
|
|
93
|
-
isWhisperConfigured = stt.isWhisperConfigured;
|
|
94
|
-
freeWhisper = stt.freeWhisper;
|
|
95
|
-
}, 600000); // 10 minute timeout for model download
|
|
96
|
-
|
|
97
|
-
afterAll(async () => {
|
|
98
|
-
if (freeWhisper) {
|
|
99
|
-
await freeWhisper();
|
|
100
|
-
}
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
it('should transcribe JFK speech audio file', async () => {
|
|
104
|
-
const result = await transcribe(AUDIO_FILE);
|
|
105
|
-
|
|
106
|
-
expect(result).toBeDefined();
|
|
107
|
-
expect(result.text).toBeDefined();
|
|
108
|
-
expect(typeof result.text).toBe('string');
|
|
109
|
-
expect(result.text.length).toBeGreaterThan(0);
|
|
110
|
-
|
|
111
|
-
const normalizedResult = normalizeTranscription(result.text);
|
|
112
|
-
expect(normalizedResult).toContain('ask not what your country can do for you');
|
|
113
|
-
}, 300000); // 5 minute timeout
|
|
114
|
-
|
|
115
|
-
it('should transcribe audio from buffer', async () => {
|
|
116
|
-
const audioBuffer = fs.readFileSync(AUDIO_FILE);
|
|
117
|
-
const result = await transcribeBuffer(audioBuffer);
|
|
118
|
-
|
|
119
|
-
expect(result).toBeDefined();
|
|
120
|
-
expect(result.text).toBeDefined();
|
|
121
|
-
expect(typeof result.text).toBe('string');
|
|
122
|
-
expect(result.text.length).toBeGreaterThan(0);
|
|
123
|
-
|
|
124
|
-
const normalizedResult = normalizeTranscription(result.text);
|
|
125
|
-
expect(normalizedResult).toContain('ask not what your country can do for you');
|
|
126
|
-
}, 300000); // 5 minute timeout
|
|
127
|
-
|
|
128
|
-
it('should return true for isWhisperConfigured', () => {
|
|
129
|
-
expect(isWhisperConfigured()).toBe(true);
|
|
130
|
-
});
|
|
131
|
-
|
|
132
|
-
it('should throw error for non-existent audio file', async () => {
|
|
133
|
-
await expect(transcribe('/non/existent/audio.wav')).rejects.toThrow('Audio file not found');
|
|
134
|
-
});
|
|
135
|
-
});
|
package/tsconfig.cjs.json
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"extends": "./tsconfig.json",
|
|
3
|
-
"compilerOptions": {
|
|
4
|
-
"module": "commonjs",
|
|
5
|
-
"moduleResolution": "node",
|
|
6
|
-
"outDir": "./dist/cjs",
|
|
7
|
-
"declaration": false,
|
|
8
|
-
"declarationMap": false,
|
|
9
|
-
"verbatimModuleSyntax": false,
|
|
10
|
-
"types": ["node"]
|
|
11
|
-
},
|
|
12
|
-
"include": ["src/**/*.ts"],
|
|
13
|
-
"exclude": ["**/*.test.ts", "vitest.config.ts"]
|
|
14
|
-
}
|
package/tsconfig.esm.json
DELETED
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"extends": "./tsconfig.json",
|
|
3
|
-
"compilerOptions": {
|
|
4
|
-
"module": "nodenext",
|
|
5
|
-
"moduleResolution": "nodenext",
|
|
6
|
-
"outDir": "./dist/esm",
|
|
7
|
-
"declaration": false,
|
|
8
|
-
"declarationMap": false,
|
|
9
|
-
"verbatimModuleSyntax": false,
|
|
10
|
-
"types": ["node"]
|
|
11
|
-
},
|
|
12
|
-
"include": ["src/**/*.ts"],
|
|
13
|
-
"exclude": ["**/*.test.ts", "vitest.config.ts"]
|
|
14
|
-
}
|
package/tsconfig.json
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
"rootDir": "./src",
|
|
4
|
-
"outDir": "./dist",
|
|
5
|
-
"module": "nodenext",
|
|
6
|
-
"moduleResolution": "nodenext",
|
|
7
|
-
"target": "esnext",
|
|
8
|
-
"sourceMap": true,
|
|
9
|
-
"declaration": true,
|
|
10
|
-
"declarationMap": true,
|
|
11
|
-
"strict": true,
|
|
12
|
-
"noUncheckedIndexedAccess": true,
|
|
13
|
-
"exactOptionalPropertyTypes": true,
|
|
14
|
-
"verbatimModuleSyntax": true,
|
|
15
|
-
"isolatedModules": true,
|
|
16
|
-
"noUncheckedSideEffectImports": true,
|
|
17
|
-
"moduleDetection": "force",
|
|
18
|
-
"skipLibCheck": true
|
|
19
|
-
}
|
|
20
|
-
}
|
package/tsconfig.types.json
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"extends": "./tsconfig.json",
|
|
3
|
-
"compilerOptions": {
|
|
4
|
-
"module": "nodenext",
|
|
5
|
-
"moduleResolution": "nodenext",
|
|
6
|
-
"outDir": "./dist/types",
|
|
7
|
-
"declaration": true,
|
|
8
|
-
"declarationMap": true,
|
|
9
|
-
"emitDeclarationOnly": true,
|
|
10
|
-
"verbatimModuleSyntax": false,
|
|
11
|
-
"types": ["node"]
|
|
12
|
-
},
|
|
13
|
-
"include": ["src/**/*.ts"],
|
|
14
|
-
"exclude": ["**/*.test.ts", "vitest.config.ts"]
|
|
15
|
-
}
|
package/vitest.config.ts
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { defineConfig } from 'vitest/config';
|
|
2
|
-
|
|
3
|
-
export default defineConfig({
|
|
4
|
-
test: {
|
|
5
|
-
environment: 'node',
|
|
6
|
-
include: ['test/**/*.test.ts'],
|
|
7
|
-
coverage: {
|
|
8
|
-
provider: 'v8',
|
|
9
|
-
reporter: ['text', 'html'],
|
|
10
|
-
include: ['src/**/*.ts'],
|
|
11
|
-
},
|
|
12
|
-
},
|
|
13
|
-
});
|