@derogab/stt-proxy 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -21
- package/package.json +1 -1
- package/src/index.ts +20 -22
- package/test/index.test.ts +38 -82
- package/test/whisper-cpp.integration.test.ts +1 -17
package/README.md
CHANGED
|
@@ -42,10 +42,10 @@ curl -L -o ggml-base.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/ma
|
|
|
42
42
|
|
|
43
43
|
### `transcribe(audio: string | Buffer, options?): Promise<TranscribeOutput>`
|
|
44
44
|
|
|
45
|
-
Transcribes audio to text using the configured STT provider.
|
|
45
|
+
Transcribes audio to text using the configured STT provider. The package automatically manages provider initialization and cleanup.
|
|
46
46
|
|
|
47
47
|
**Parameters:**
|
|
48
|
-
- `audio`: Path to audio file or audio Buffer
|
|
48
|
+
- `audio`: Path to audio file (string) or audio Buffer
|
|
49
49
|
- `options` (optional): Transcription options
|
|
50
50
|
|
|
51
51
|
**Returns:**
|
|
@@ -66,25 +66,24 @@ type TranscribeOutput = {
|
|
|
66
66
|
};
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
Get HuggingFace download URL for a model.
|
|
69
|
+
**Example:**
|
|
70
|
+
```typescript
|
|
71
|
+
// Transcribe from file path
|
|
72
|
+
const result1 = await transcribe('/path/to/audio.wav');
|
|
73
|
+
console.log(result1.text);
|
|
74
|
+
|
|
75
|
+
// Transcribe from Buffer
|
|
76
|
+
const audioBuffer = fs.readFileSync('/path/to/audio.wav');
|
|
77
|
+
const result2 = await transcribe(audioBuffer);
|
|
78
|
+
console.log(result2.text);
|
|
79
|
+
|
|
80
|
+
// With options
|
|
81
|
+
const result3 = await transcribe('/path/to/audio.wav', {
|
|
82
|
+
language: 'en',
|
|
83
|
+
translate: false
|
|
84
|
+
});
|
|
85
|
+
console.log(result3.text);
|
|
86
|
+
```
|
|
88
87
|
|
|
89
88
|
## Provider Priority
|
|
90
89
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@derogab/stt-proxy",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "A simple and lightweight proxy for seamless integration with multiple STT (Speech-to-Text) providers including Whisper.cpp",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/cjs/index.js",
|
package/src/index.ts
CHANGED
|
@@ -21,7 +21,7 @@ function getWhisperModelPath(): string | undefined {
|
|
|
21
21
|
return process.env['WHISPER_CPP_MODEL_PATH'];
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
|
|
24
|
+
function isWhisperConfigured(): boolean {
|
|
25
25
|
const modelPath = getWhisperModelPath();
|
|
26
26
|
return modelPath !== undefined && fs.existsSync(modelPath);
|
|
27
27
|
}
|
|
@@ -124,7 +124,7 @@ export async function transcribe(audio: string | Buffer, options: TranscribeOpti
|
|
|
124
124
|
throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
-
|
|
127
|
+
async function transcribeBuffer(audioBuffer: Buffer, options: TranscribeOptions = {}): Promise<TranscribeOutput> {
|
|
128
128
|
const modelPath = getWhisperModelPath();
|
|
129
129
|
|
|
130
130
|
if (!modelPath) {
|
|
@@ -146,7 +146,7 @@ export async function transcribeBuffer(audioBuffer: Buffer, options: TranscribeO
|
|
|
146
146
|
}
|
|
147
147
|
}
|
|
148
148
|
|
|
149
|
-
|
|
149
|
+
async function freeWhisper(): Promise<void> {
|
|
150
150
|
if (whisperInstance) {
|
|
151
151
|
await whisperInstance.free();
|
|
152
152
|
whisperInstance = null;
|
|
@@ -154,23 +154,21 @@ export async function freeWhisper(): Promise<void> {
|
|
|
154
154
|
}
|
|
155
155
|
}
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
'
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
'medium.en',
|
|
167
|
-
'large',
|
|
168
|
-
'large-v2',
|
|
169
|
-
'large-v3',
|
|
170
|
-
'large-v3-turbo',
|
|
171
|
-
];
|
|
172
|
-
}
|
|
157
|
+
// Automatically clean up Whisper instance on process exit
|
|
158
|
+
process.on('exit', () => {
|
|
159
|
+
if (whisperInstance) {
|
|
160
|
+
// Note: Cannot use async operations in 'exit' handler
|
|
161
|
+
// The instance will be cleaned up by the process termination
|
|
162
|
+
whisperInstance = null;
|
|
163
|
+
currentModelPath = null;
|
|
164
|
+
}
|
|
165
|
+
});
|
|
173
166
|
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
167
|
+
// Handle graceful shutdown signals
|
|
168
|
+
const shutdownHandler = async () => {
|
|
169
|
+
await freeWhisper();
|
|
170
|
+
process.exit(0);
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
process.on('SIGINT', shutdownHandler);
|
|
174
|
+
process.on('SIGTERM', shutdownHandler);
|
package/test/index.test.ts
CHANGED
|
@@ -39,35 +39,23 @@ describe('stt-proxy', () => {
|
|
|
39
39
|
vi.resetModules();
|
|
40
40
|
});
|
|
41
41
|
|
|
42
|
-
describe('isWhisperConfigured', () => {
|
|
43
|
-
it('should return false when WHISPER_CPP_MODEL_PATH is not set', async () => {
|
|
44
|
-
const { isWhisperConfigured } = await import('../src/index.js');
|
|
45
|
-
expect(isWhisperConfigured()).toBe(false);
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it('should return false when WHISPER_CPP_MODEL_PATH is set but file does not exist', async () => {
|
|
49
|
-
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
50
|
-
vi.mocked(fs.existsSync).mockReturnValue(false);
|
|
51
|
-
const { isWhisperConfigured } = await import('../src/index.js');
|
|
52
|
-
expect(isWhisperConfigured()).toBe(false);
|
|
53
|
-
});
|
|
54
|
-
|
|
55
|
-
it('should return true when WHISPER_CPP_MODEL_PATH is set and file exists', async () => {
|
|
56
|
-
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
57
|
-
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
58
|
-
const { isWhisperConfigured } = await import('../src/index.js');
|
|
59
|
-
expect(isWhisperConfigured()).toBe(true);
|
|
60
|
-
});
|
|
61
|
-
});
|
|
62
42
|
|
|
63
43
|
describe('transcribe', () => {
|
|
64
|
-
it('should throw error when no provider is configured', async () => {
|
|
44
|
+
it('should throw error when no provider is configured (string path)', async () => {
|
|
65
45
|
const { transcribe } = await import('../src/index.js');
|
|
66
46
|
await expect(transcribe('/path/to/audio.wav')).rejects.toThrow(
|
|
67
47
|
'No STT provider configured'
|
|
68
48
|
);
|
|
69
49
|
});
|
|
70
50
|
|
|
51
|
+
it('should throw error when no provider is configured (Buffer)', async () => {
|
|
52
|
+
const { transcribe } = await import('../src/index.js');
|
|
53
|
+
const buffer = Buffer.from('test');
|
|
54
|
+
await expect(transcribe(buffer)).rejects.toThrow(
|
|
55
|
+
'No STT provider configured'
|
|
56
|
+
);
|
|
57
|
+
});
|
|
58
|
+
|
|
71
59
|
it('should throw error when audio file does not exist', async () => {
|
|
72
60
|
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
73
61
|
vi.mocked(fs.existsSync).mockImplementation((path) => {
|
|
@@ -91,82 +79,50 @@ describe('stt-proxy', () => {
|
|
|
91
79
|
'Whisper model not found at path'
|
|
92
80
|
);
|
|
93
81
|
});
|
|
94
|
-
});
|
|
95
82
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
expect(models).toContain('medium');
|
|
104
|
-
expect(models).toContain('large');
|
|
105
|
-
expect(models).toContain('large-v3-turbo');
|
|
106
|
-
expect(models.length).toBe(12);
|
|
107
|
-
});
|
|
108
|
-
});
|
|
83
|
+
it('should successfully transcribe audio file', async () => {
|
|
84
|
+
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
85
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
86
|
+
// Mock readFileSync to return a valid PCM buffer (Float32Array requires 4-byte aligned buffer)
|
|
87
|
+
const pcmData = new Float32Array([0.1, 0.2, 0.3]);
|
|
88
|
+
vi.mocked(fs.readFileSync).mockReturnValue(Buffer.from(pcmData.buffer));
|
|
89
|
+
const { transcribe } = await import('../src/index.js');
|
|
109
90
|
|
|
110
|
-
|
|
111
|
-
it('should return correct HuggingFace URL for model', async () => {
|
|
112
|
-
const { getModelUrl } = await import('../src/index.js');
|
|
113
|
-
const url = getModelUrl('base');
|
|
114
|
-
expect(url).toBe('https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin');
|
|
115
|
-
});
|
|
91
|
+
const result = await transcribe('/path/to/audio.wav');
|
|
116
92
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
const url = getModelUrl('large-v3-turbo');
|
|
120
|
-
expect(url).toBe('https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin');
|
|
93
|
+
expect(result).toBeDefined();
|
|
94
|
+
expect(result.text).toBe('Hello, world!');
|
|
121
95
|
});
|
|
122
|
-
});
|
|
123
96
|
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
97
|
+
it('should successfully transcribe audio from buffer', async () => {
|
|
98
|
+
process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
|
|
99
|
+
vi.mocked(fs.existsSync).mockReturnValue(true);
|
|
100
|
+
// Mock readFileSync to return a valid PCM buffer (Float32Array requires 4-byte aligned buffer)
|
|
101
|
+
const pcmData = new Float32Array([0.1, 0.2, 0.3]);
|
|
102
|
+
vi.mocked(fs.readFileSync).mockReturnValue(Buffer.from(pcmData.buffer));
|
|
103
|
+
const { transcribe } = await import('../src/index.js');
|
|
130
104
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
'No STT provider configured'
|
|
137
|
-
);
|
|
105
|
+
const audioBuffer = Buffer.from('fake audio data');
|
|
106
|
+
const result = await transcribe(audioBuffer);
|
|
107
|
+
|
|
108
|
+
expect(result).toBeDefined();
|
|
109
|
+
expect(result.text).toBe('Hello, world!');
|
|
138
110
|
});
|
|
139
111
|
});
|
|
140
112
|
|
|
141
|
-
|
|
113
|
+
|
|
114
|
+
describe('API exports', () => {
|
|
142
115
|
it('should export transcribe function', async () => {
|
|
143
116
|
const module = await import('../src/index.js');
|
|
144
117
|
expect(typeof module.transcribe).toBe('function');
|
|
145
118
|
});
|
|
146
119
|
|
|
147
|
-
it('should export
|
|
120
|
+
it('should only export transcribe function (no other functions)', async () => {
|
|
148
121
|
const module = await import('../src/index.js');
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
const module = await import('../src/index.js');
|
|
154
|
-
expect(typeof module.isWhisperConfigured).toBe('function');
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
it('should export freeWhisper function', async () => {
|
|
158
|
-
const module = await import('../src/index.js');
|
|
159
|
-
expect(typeof module.freeWhisper).toBe('function');
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
it('should export getAvailableModels function', async () => {
|
|
163
|
-
const module = await import('../src/index.js');
|
|
164
|
-
expect(typeof module.getAvailableModels).toBe('function');
|
|
165
|
-
});
|
|
166
|
-
|
|
167
|
-
it('should export getModelUrl function', async () => {
|
|
168
|
-
const module = await import('../src/index.js');
|
|
169
|
-
expect(typeof module.getModelUrl).toBe('function');
|
|
122
|
+
const exportedFunctions = Object.keys(module).filter(
|
|
123
|
+
key => typeof module[key as keyof typeof module] === 'function'
|
|
124
|
+
);
|
|
125
|
+
expect(exportedFunctions).toEqual(['transcribe']);
|
|
170
126
|
});
|
|
171
127
|
});
|
|
172
128
|
});
|
|
@@ -61,9 +61,6 @@ function normalizeTranscription(text: string): string {
|
|
|
61
61
|
|
|
62
62
|
describe('whisper.cpp integration tests', () => {
|
|
63
63
|
let transcribe: typeof import('../src/index.js').transcribe;
|
|
64
|
-
let transcribeBuffer: typeof import('../src/index.js').transcribeBuffer;
|
|
65
|
-
let isWhisperConfigured: typeof import('../src/index.js').isWhisperConfigured;
|
|
66
|
-
let freeWhisper: typeof import('../src/index.js').freeWhisper;
|
|
67
64
|
|
|
68
65
|
beforeAll(async () => {
|
|
69
66
|
// Download model if needed
|
|
@@ -89,17 +86,8 @@ describe('whisper.cpp integration tests', () => {
|
|
|
89
86
|
// Import module
|
|
90
87
|
const stt = await import('../src/index.js');
|
|
91
88
|
transcribe = stt.transcribe;
|
|
92
|
-
transcribeBuffer = stt.transcribeBuffer;
|
|
93
|
-
isWhisperConfigured = stt.isWhisperConfigured;
|
|
94
|
-
freeWhisper = stt.freeWhisper;
|
|
95
89
|
}, 600000); // 10 minute timeout for model download
|
|
96
90
|
|
|
97
|
-
afterAll(async () => {
|
|
98
|
-
if (freeWhisper) {
|
|
99
|
-
await freeWhisper();
|
|
100
|
-
}
|
|
101
|
-
});
|
|
102
|
-
|
|
103
91
|
it('should transcribe JFK speech audio file', async () => {
|
|
104
92
|
const result = await transcribe(AUDIO_FILE);
|
|
105
93
|
|
|
@@ -114,7 +102,7 @@ describe('whisper.cpp integration tests', () => {
|
|
|
114
102
|
|
|
115
103
|
it('should transcribe audio from buffer', async () => {
|
|
116
104
|
const audioBuffer = fs.readFileSync(AUDIO_FILE);
|
|
117
|
-
const result = await
|
|
105
|
+
const result = await transcribe(audioBuffer);
|
|
118
106
|
|
|
119
107
|
expect(result).toBeDefined();
|
|
120
108
|
expect(result.text).toBeDefined();
|
|
@@ -125,10 +113,6 @@ describe('whisper.cpp integration tests', () => {
|
|
|
125
113
|
expect(normalizedResult).toContain('ask not what your country can do for you');
|
|
126
114
|
}, 300000); // 5 minute timeout
|
|
127
115
|
|
|
128
|
-
it('should return true for isWhisperConfigured', () => {
|
|
129
|
-
expect(isWhisperConfigured()).toBe(true);
|
|
130
|
-
});
|
|
131
|
-
|
|
132
116
|
it('should throw error for non-existent audio file', async () => {
|
|
133
117
|
await expect(transcribe('/non/existent/audio.wav')).rejects.toThrow('Audio file not found');
|
|
134
118
|
});
|