@derogab/stt-proxy 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ name: Release and publish package to NPM
2
+
3
+ on:
4
+ push:
5
+ # Publish `v1.2.3` tags as releases.
6
+ tags:
7
+ - v*
8
+
9
+ jobs:
10
+ # Release the TAG to GitHub.
11
+ release:
12
+ name: Release pushed tag
13
+ if: startsWith(github.ref, 'refs/tags/')
14
+ permissions:
15
+ contents: write
16
+ runs-on: ubuntu-latest
17
+ steps:
18
+ - name: Create release
19
+ env:
20
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21
+ tag: ${{ github.ref_name }}
22
+ run: |
23
+ gh release create "$tag" \
24
+ --repo="$GITHUB_REPOSITORY" \
25
+ --title="v${tag#v}" \
26
+ --generate-notes
27
+ # Publish the package.
28
+ publish-npm:
29
+ name: Publish Package on NPM
30
+ needs: release
31
+ runs-on: ubuntu-latest
32
+ permissions:
33
+ contents: read
34
+ id-token: write
35
+ steps:
36
+ - name: Checkout
37
+ uses: actions/checkout@v6
38
+ - name: Setup Node
39
+ uses: actions/setup-node@v6
40
+ with:
41
+ node-version: '20.x'
42
+ cache: 'npm'
43
+ registry-url: 'https://registry.npmjs.org'
44
+ - name: Install FFmpeg
45
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg
46
+ - name: Cache Whisper model
47
+ uses: actions/cache@v4
48
+ with:
49
+ path: test/models
50
+ key: whisper-model-tiny-v1
51
+ - name: Cache test audio
52
+ uses: actions/cache@v4
53
+ with:
54
+ path: test/audio
55
+ key: test-audio-jfk-v1
56
+ - name: Install dependencies (clean)
57
+ run: npm ci
58
+ - name: Type check
59
+ run: npm run typecheck
60
+ - name: Run tests
61
+ run: npm test --if-present
62
+ - name: Build
63
+ run: npm run build
64
+ - name: Verify tag matches package.json version
65
+ run: |
66
+ PKG_VERSION="$(node -p "require('./package.json').version")"
67
+ TAG_VERSION="${GITHUB_REF_NAME#v}" # supports tags like v1.2.3
68
+ echo "package.json: $PKG_VERSION"
69
+ echo "release tag: $TAG_VERSION"
70
+ if [ "$PKG_VERSION" != "$TAG_VERSION" ]; then
71
+ echo "Release tag ($TAG_VERSION) does not match package.json version ($PKG_VERSION)."
72
+ exit 1
73
+ fi
74
+ - name: Show publish contents (dry run)
75
+ run: npm pack --dry-run
76
+ - name: Publish to npm (with provenance)
77
+ env:
78
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
79
+ run: npm publish --provenance --access public
80
+ publish-github:
81
+ name: Publish Package on GitHub
82
+ needs: release
83
+ runs-on: ubuntu-latest
84
+ permissions:
85
+ contents: read
86
+ id-token: write
87
+ steps:
88
+ - name: Checkout
89
+ uses: actions/checkout@v6
90
+ - name: Setup Node
91
+ uses: actions/setup-node@v6
92
+ with:
93
+ node-version: '20.x'
94
+ cache: 'npm'
95
+ registry-url: 'https://npm.pkg.github.com'
96
+ - name: Install FFmpeg
97
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg
98
+ - name: Cache Whisper model
99
+ uses: actions/cache@v4
100
+ with:
101
+ path: test/models
102
+ key: whisper-model-tiny-v1
103
+ - name: Cache test audio
104
+ uses: actions/cache@v4
105
+ with:
106
+ path: test/audio
107
+ key: test-audio-jfk-v1
108
+ - name: Install dependencies (clean)
109
+ run: npm ci
110
+ - name: Type check
111
+ run: npm run typecheck
112
+ - name: Run tests
113
+ run: npm test --if-present
114
+ - name: Build
115
+ run: npm run build
116
+ - name: Verify tag matches package.json version
117
+ run: |
118
+ PKG_VERSION="$(node -p "require('./package.json').version")"
119
+ TAG_VERSION="${GITHUB_REF_NAME#v}" # supports tags like v1.2.3
120
+ echo "package.json: $PKG_VERSION"
121
+ echo "release tag: $TAG_VERSION"
122
+ if [ "$PKG_VERSION" != "$TAG_VERSION" ]; then
123
+ echo "Release tag ($TAG_VERSION) does not match package.json version ($PKG_VERSION)."
124
+ exit 1
125
+ fi
126
+ - name: Show publish contents (dry run)
127
+ run: npm pack --dry-run
128
+ - name: Publish to GitHub Packages (with provenance)
129
+ env:
130
+ NODE_AUTH_TOKEN: ${{ secrets.NPM_GITHUB_TOKEN }}
131
+ run: npm publish --provenance --access public
@@ -0,0 +1,42 @@
1
+ name: Tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ pull_request:
8
+ branches:
9
+ - master
10
+
11
+ jobs:
12
+ tests:
13
+ name: Run tests
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - name: Checkout
17
+ uses: actions/checkout@v6
18
+ - name: Setup Node
19
+ uses: actions/setup-node@v6
20
+ with:
21
+ node-version: '20.x'
22
+ cache: 'npm'
23
+ - name: Install FFmpeg
24
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg
25
+ - name: Cache Whisper model
26
+ uses: actions/cache@v4
27
+ with:
28
+ path: test/models
29
+ key: whisper-model-tiny-v1
30
+ - name: Cache test audio
31
+ uses: actions/cache@v4
32
+ with:
33
+ path: test/audio
34
+ key: test-audio-jfk-v1
35
+ - name: Install dependencies
36
+ run: npm ci
37
+ - name: Type check
38
+ run: npm run typecheck
39
+ - name: Build project
40
+ run: npm run build
41
+ - name: Run all tests
42
+ run: npm test
package/CLAUDE.md ADDED
@@ -0,0 +1,47 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Build Commands
6
+
7
+ ```bash
8
+ npm install # Install dependencies
9
+ npm run build # Build all outputs (CJS, ESM, and types)
10
+ npm run build:cjs # Build CommonJS output only
11
+ npm run build:esm # Build ESM output only
12
+ npm run build:types # Build type declarations only
13
+ ```
14
+
15
+ ## Test Commands
16
+
17
+ ```bash
18
+ npm test # Run all tests (unit + integration)
19
+ npm run test:unit # Run unit tests only
20
+ npm run test:whisper # Run Whisper.cpp integration tests only
21
+ npm run test:watch # Run tests in watch mode
22
+ npm run test:coverage # Run tests with coverage report
23
+ ```
24
+
25
+ **Important**: Always run `npm test` after making changes to verify nothing is broken. Tests are located in the `test/` folder.
26
+
27
+ Tests are written using Vitest and cover:
28
+ - Provider selection logic (Whisper.cpp priority)
29
+ - Error handling for all providers
30
+ - Audio transcription functionality
31
+ - API request formatting
32
+
33
+ ## Architecture
34
+
35
+ This is a TypeScript npm package (`@derogab/stt-proxy`) that provides a unified interface for multiple STT providers. The entire implementation is in a single file: `src/index.ts`.
36
+
37
+ ### Provider Selection
38
+
39
+ The `transcribe()` function automatically selects a provider based on environment variables in this priority order:
40
+ 1. **Whisper.cpp** - if `WHISPER_CPP_MODEL_PATH` is set
41
+
42
+ ### Build Output
43
+
44
+ The package builds to three output formats:
45
+ - `dist/cjs/` - CommonJS (for `require()`)
46
+ - `dist/esm/` - ES Modules (for `import`)
47
+ - `dist/types/` - TypeScript declarations
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Gabriele De Rosa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,136 @@
1
+ # stt-proxy
2
+ A simple and lightweight proxy for seamless integration with multiple STT providers including Whisper.cpp.
3
+
4
+ ## Features
5
+
6
+ - **Multi-provider support**: Switch between STT providers with environment variables.
7
+ - **TypeScript support**: Full TypeScript definitions included.
8
+ - **Simple API**: Single function interface for all providers.
9
+ - **Automatic provider detection**: Automatically selects the best available provider based on environment variables.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ npm install @derogab/stt-proxy
15
+ ```
16
+
17
+ ## Quick Start
18
+
19
+ ```typescript
20
+ import { transcribe } from '@derogab/stt-proxy';
21
+
22
+ const result = await transcribe('/path/to/audio.wav');
23
+ console.log(result.text);
24
+ ```
25
+
26
+ ## Configuration
27
+
28
+ The package automatically detects which STT provider to use based on your environment variables.
29
+ Configure one or more providers:
30
+
31
+ ### Whisper.cpp (Local)
32
+ ```bash
33
+ WHISPER_CPP_MODEL_PATH=/path/to/ggml-base.bin # Required, path to your GGML model file
34
+ ```
35
+
36
+ Download models from [HuggingFace](https://huggingface.co/ggerganov/whisper.cpp/tree/main):
37
+ ```bash
38
+ curl -L -o ggml-base.bin https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin
39
+ ```
40
+
41
+ ## API Reference
42
+
43
+ ### `transcribe(audio: string | Buffer, options?): Promise<TranscribeOutput>`
44
+
45
+ Transcribes audio to text using the configured STT provider.
46
+
47
+ **Parameters:**
48
+ - `audio`: Path to audio file or audio Buffer
49
+ - `options` (optional): Transcription options
50
+
51
+ **Returns:**
52
+ - Promise that resolves to an object with `text` property
53
+
54
+ **Options Format:**
55
+ ```typescript
56
+ type TranscribeOptions = {
57
+ language?: string; // Language code (e.g., 'en', 'es', 'fr')
58
+ translate?: boolean; // Translate to English
59
+ };
60
+ ```
61
+
62
+ **Output Format:**
63
+ ```typescript
64
+ type TranscribeOutput = {
65
+ text: string;
66
+ };
67
+ ```
68
+
69
+ ### `transcribeBuffer(buffer: Buffer, options?): Promise<TranscribeOutput>`
70
+
71
+ Transcribes audio from a Buffer.
72
+
73
+ ### `isWhisperConfigured(): boolean`
74
+
75
+ Check if Whisper.cpp is configured and ready.
76
+
77
+ ### `freeWhisper(): Promise<void>`
78
+
79
+ Release Whisper instance and free memory.
80
+
81
+ ### `getAvailableModels(): string[]`
82
+
83
+ Get list of available Whisper model names.
84
+
85
+ ### `getModelUrl(model: string): string`
86
+
87
+ Get HuggingFace download URL for a model.
88
+
89
+ ## Provider Priority
90
+
91
+ The package selects providers in the following order:
92
+ 1. **Whisper.cpp** (if `WHISPER_CPP_MODEL_PATH` is set)
93
+
94
+ If no providers are configured, the function throws an error.
95
+
96
+ ## Requirements
97
+
98
+ - **FFmpeg**: Required for audio conversion.
99
+ ```bash
100
+ # macOS
101
+ brew install ffmpeg
102
+
103
+ # Ubuntu/Debian
104
+ sudo apt install ffmpeg
105
+
106
+ # Windows (with Chocolatey)
107
+ choco install ffmpeg
108
+ ```
109
+
110
+ ## Development
111
+
112
+ ```bash
113
+ # Install dependencies
114
+ npm install
115
+
116
+ # Build the package
117
+ npm run build
118
+ ```
119
+
120
+ ## Credits
121
+ _STT Proxy_ is made with ♥ by [derogab](https://github.com/derogab) and it's released under the [MIT license](./LICENSE).
122
+
123
+ ## Contributors
124
+
125
+ <a href="https://github.com/derogab/stt-proxy/graphs/contributors">
126
+ <img src="https://contrib.rocks/image?repo=derogab/stt-proxy" />
127
+ </a>
128
+
129
+ ## Tip
130
+ If you like this project or directly benefit from it, please consider buying me a coffee:
131
+ 🔗 `bc1qd0qatgz8h62uvnr74utwncc6j5ckfz2v2g4lef`
132
+ ⚡️ `derogab@sats.mobi`
133
+ 💶 [Sponsor on GitHub](https://github.com/sponsors/derogab)
134
+
135
+ ## Stargazers over time
136
+ [![Stargazers over time](https://starchart.cc/derogab/stt-proxy.svg?variant=adaptive)](https://starchart.cc/derogab/stt-proxy)
package/package.json ADDED
@@ -0,0 +1,62 @@
1
+ {
2
+ "name": "@derogab/stt-proxy",
3
+ "version": "0.1.0",
4
+ "description": "A simple and lightweight proxy for seamless integration with multiple STT (Speech-to-Text) providers including Whisper.cpp",
5
+ "type": "module",
6
+ "main": "./dist/cjs/index.js",
7
+ "module": "./dist/esm/index.js",
8
+ "types": "./dist/types/index.d.ts",
9
+ "exports": {
10
+ ".": {
11
+ "import": {
12
+ "types": "./dist/types/index.d.ts",
13
+ "default": "./dist/esm/index.js"
14
+ },
15
+ "require": {
16
+ "types": "./dist/types/index.d.ts",
17
+ "default": "./dist/cjs/index.js"
18
+ }
19
+ }
20
+ },
21
+ "scripts": {
22
+ "build": "npm run build:cjs && npm run build:esm && npm run build:types",
23
+ "build:cjs": "tsc -p tsconfig.cjs.json && echo '{\"type\":\"commonjs\"}' > dist/cjs/package.json",
24
+ "build:esm": "tsc -p tsconfig.esm.json",
25
+ "build:types": "tsc -p tsconfig.types.json",
26
+ "test": "vitest run",
27
+ "test:unit": "vitest run --exclude='**/*.integration.test.*'",
28
+ "test:whisper": "vitest run --testTimeout=300000 --hookTimeout=600000 test/whisper-cpp.integration.test.ts",
29
+ "test:watch": "vitest watch",
30
+ "test:coverage": "vitest run --coverage",
31
+ "typecheck": "tsc --noEmit -p tsconfig.esm.json"
32
+ },
33
+ "repository": {
34
+ "type": "git",
35
+ "url": "git+https://github.com/derogab/stt-proxy.git"
36
+ },
37
+ "keywords": [
38
+ "STT",
39
+ "speech-to-text",
40
+ "transcription",
41
+ "whisper",
42
+ "whisper.cpp",
43
+ "proxy",
44
+ "gateway"
45
+ ],
46
+ "author": "derogab",
47
+ "license": "MIT",
48
+ "bugs": {
49
+ "url": "https://github.com/derogab/stt-proxy/issues"
50
+ },
51
+ "homepage": "https://github.com/derogab/stt-proxy#readme",
52
+ "dependencies": {
53
+ "dotenv": "16.5.0",
54
+ "smart-whisper": "0.4.2"
55
+ },
56
+ "devDependencies": {
57
+ "@types/node": "^22.15.21",
58
+ "@vitest/coverage-v8": "^3.1.4",
59
+ "typescript": "^5.8.3",
60
+ "vitest": "^3.1.4"
61
+ }
62
+ }
package/src/index.ts ADDED
@@ -0,0 +1,176 @@
1
+ import 'dotenv/config';
2
+ import * as fs from 'fs';
3
+ import * as path from 'path';
4
+ import * as os from 'os';
5
+ import { execSync } from 'child_process';
6
+ import type { Whisper, TranscribeResult } from 'smart-whisper';
7
+
8
+ export interface TranscribeOptions {
9
+ language?: string;
10
+ translate?: boolean;
11
+ }
12
+
13
+ export interface TranscribeOutput {
14
+ text: string;
15
+ }
16
+
17
+ let whisperInstance: Whisper | null = null;
18
+ let currentModelPath: string | null = null;
19
+
20
+ function getWhisperModelPath(): string | undefined {
21
+ return process.env['WHISPER_CPP_MODEL_PATH'];
22
+ }
23
+
24
+ export function isWhisperConfigured(): boolean {
25
+ const modelPath = getWhisperModelPath();
26
+ return modelPath !== undefined && fs.existsSync(modelPath);
27
+ }
28
+
29
+ async function getWhisperInstance(): Promise<Whisper> {
30
+ const modelPath = getWhisperModelPath();
31
+
32
+ if (!modelPath) {
33
+ throw new Error('WHISPER_CPP_MODEL_PATH environment variable is not set');
34
+ }
35
+
36
+ if (!fs.existsSync(modelPath)) {
37
+ throw new Error(`Whisper model not found at path: ${modelPath}`);
38
+ }
39
+
40
+ if (whisperInstance && currentModelPath === modelPath) {
41
+ return whisperInstance;
42
+ }
43
+
44
+ if (whisperInstance) {
45
+ await whisperInstance.free();
46
+ whisperInstance = null;
47
+ }
48
+
49
+ const { Whisper } = await import('smart-whisper');
50
+ whisperInstance = new Whisper(modelPath, { gpu: true });
51
+ currentModelPath = modelPath;
52
+
53
+ return whisperInstance;
54
+ }
55
+
56
+ function audioToPcm(audioPath: string): Float32Array {
57
+ const tempDir = os.tmpdir();
58
+ const tempPcmPath = path.join(tempDir, `whisper_${Date.now()}_${Math.random().toString(36).substring(7)}.pcm`);
59
+
60
+ try {
61
+ execSync(
62
+ `ffmpeg -y -i "${audioPath}" -ar 16000 -ac 1 -f f32le "${tempPcmPath}"`,
63
+ { stdio: 'pipe' }
64
+ );
65
+
66
+ const pcmBuffer = fs.readFileSync(tempPcmPath);
67
+ return new Float32Array(pcmBuffer.buffer, pcmBuffer.byteOffset, pcmBuffer.length / 4);
68
+ } finally {
69
+ if (fs.existsSync(tempPcmPath)) {
70
+ fs.unlinkSync(tempPcmPath);
71
+ }
72
+ }
73
+ }
74
+
75
+ function cleanTranscription(text: string): string {
76
+ return text
77
+ .replace(/[\x00-\x1F\x7F]/g, '')
78
+ .trim();
79
+ }
80
+
81
+ function resultsToText(results: TranscribeResult<'simple'>[]): string {
82
+ return results.map((r) => r.text).join(' ');
83
+ }
84
+
85
+ async function transcribe_whispercpp(audioPath: string, options: TranscribeOptions = {}): Promise<TranscribeOutput> {
86
+ if (!fs.existsSync(audioPath)) {
87
+ throw new Error(`Audio file not found: ${audioPath}`);
88
+ }
89
+
90
+ const whisper = await getWhisperInstance();
91
+ const pcmData = audioToPcm(audioPath);
92
+
93
+ const transcribeParams: { language?: string; translate?: boolean; format: 'simple' } = {
94
+ format: 'simple',
95
+ };
96
+
97
+ if (options.language !== undefined) {
98
+ transcribeParams.language = options.language;
99
+ }
100
+
101
+ if (options.translate !== undefined) {
102
+ transcribeParams.translate = options.translate;
103
+ }
104
+
105
+ const task = await whisper.transcribe(pcmData, transcribeParams);
106
+ const results = await task.result;
107
+ const text = resultsToText(results);
108
+
109
+ return {
110
+ text: cleanTranscription(text),
111
+ };
112
+ }
113
+
114
+ export async function transcribe(audio: string | Buffer, options: TranscribeOptions = {}): Promise<TranscribeOutput> {
115
+ const modelPath = getWhisperModelPath();
116
+
117
+ if (modelPath) {
118
+ if (Buffer.isBuffer(audio)) {
119
+ return transcribeBuffer(audio, options);
120
+ }
121
+ return transcribe_whispercpp(audio, options);
122
+ }
123
+
124
+ throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
125
+ }
126
+
127
+ export async function transcribeBuffer(audioBuffer: Buffer, options: TranscribeOptions = {}): Promise<TranscribeOutput> {
128
+ const modelPath = getWhisperModelPath();
129
+
130
+ if (!modelPath) {
131
+ throw new Error('No STT provider configured. Set WHISPER_CPP_MODEL_PATH environment variable.');
132
+ }
133
+
134
+ const tempDir = os.tmpdir();
135
+ const tempPath = path.join(tempDir, `whisper_input_${Date.now()}_${Math.random().toString(36).substring(7)}.audio`);
136
+
137
+ fs.writeFileSync(tempPath, audioBuffer);
138
+
139
+ try {
140
+ const result = await transcribe_whispercpp(tempPath, options);
141
+ return result;
142
+ } finally {
143
+ if (fs.existsSync(tempPath)) {
144
+ fs.unlinkSync(tempPath);
145
+ }
146
+ }
147
+ }
148
+
149
+ export async function freeWhisper(): Promise<void> {
150
+ if (whisperInstance) {
151
+ await whisperInstance.free();
152
+ whisperInstance = null;
153
+ currentModelPath = null;
154
+ }
155
+ }
156
+
157
+ export function getAvailableModels(): string[] {
158
+ return [
159
+ 'tiny',
160
+ 'tiny.en',
161
+ 'base',
162
+ 'base.en',
163
+ 'small',
164
+ 'small.en',
165
+ 'medium',
166
+ 'medium.en',
167
+ 'large',
168
+ 'large-v2',
169
+ 'large-v3',
170
+ 'large-v3-turbo',
171
+ ];
172
+ }
173
+
174
+ export function getModelUrl(model: string): string {
175
+ return `https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-${model}.bin`;
176
+ }
@@ -0,0 +1,172 @@
1
+ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
+ import * as fs from 'fs';
3
+
4
+ vi.mock('fs', async () => {
5
+ const actual = await vi.importActual<typeof import('fs')>('fs');
6
+ return {
7
+ ...actual,
8
+ existsSync: vi.fn(),
9
+ readFileSync: vi.fn(),
10
+ writeFileSync: vi.fn(),
11
+ unlinkSync: vi.fn(),
12
+ };
13
+ });
14
+
15
+ vi.mock('child_process', () => ({
16
+ execSync: vi.fn(),
17
+ }));
18
+
19
+ vi.mock('smart-whisper', () => ({
20
+ Whisper: vi.fn().mockImplementation(() => ({
21
+ transcribe: vi.fn().mockResolvedValue({
22
+ result: Promise.resolve([{ text: 'Hello, world!', from: 0, to: 1000 }]),
23
+ }),
24
+ free: vi.fn().mockResolvedValue(undefined),
25
+ })),
26
+ }));
27
+
28
+ describe('stt-proxy', () => {
29
+ const originalEnv = process.env;
30
+
31
+ beforeEach(() => {
32
+ vi.clearAllMocks();
33
+ process.env = { ...originalEnv };
34
+ delete process.env['WHISPER_CPP_MODEL_PATH'];
35
+ });
36
+
37
+ afterEach(() => {
38
+ process.env = originalEnv;
39
+ vi.resetModules();
40
+ });
41
+
42
+ describe('isWhisperConfigured', () => {
43
+ it('should return false when WHISPER_CPP_MODEL_PATH is not set', async () => {
44
+ const { isWhisperConfigured } = await import('../src/index.js');
45
+ expect(isWhisperConfigured()).toBe(false);
46
+ });
47
+
48
+ it('should return false when WHISPER_CPP_MODEL_PATH is set but file does not exist', async () => {
49
+ process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
50
+ vi.mocked(fs.existsSync).mockReturnValue(false);
51
+ const { isWhisperConfigured } = await import('../src/index.js');
52
+ expect(isWhisperConfigured()).toBe(false);
53
+ });
54
+
55
+ it('should return true when WHISPER_CPP_MODEL_PATH is set and file exists', async () => {
56
+ process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
57
+ vi.mocked(fs.existsSync).mockReturnValue(true);
58
+ const { isWhisperConfigured } = await import('../src/index.js');
59
+ expect(isWhisperConfigured()).toBe(true);
60
+ });
61
+ });
62
+
63
+ describe('transcribe', () => {
64
+ it('should throw error when no provider is configured', async () => {
65
+ const { transcribe } = await import('../src/index.js');
66
+ await expect(transcribe('/path/to/audio.wav')).rejects.toThrow(
67
+ 'No STT provider configured'
68
+ );
69
+ });
70
+
71
+ it('should throw error when audio file does not exist', async () => {
72
+ process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
73
+ vi.mocked(fs.existsSync).mockImplementation((path) => {
74
+ if (path === '/path/to/model.bin') return true;
75
+ return false;
76
+ });
77
+ const { transcribe } = await import('../src/index.js');
78
+ await expect(transcribe('/path/to/audio.wav')).rejects.toThrow(
79
+ 'Audio file not found'
80
+ );
81
+ });
82
+
83
+ it('should throw error when model file does not exist', async () => {
84
+ process.env['WHISPER_CPP_MODEL_PATH'] = '/path/to/model.bin';
85
+ vi.mocked(fs.existsSync).mockImplementation((path) => {
86
+ if (path === '/path/to/audio.wav') return true;
87
+ return false;
88
+ });
89
+ const { transcribe } = await import('../src/index.js');
90
+ await expect(transcribe('/path/to/audio.wav')).rejects.toThrow(
91
+ 'Whisper model not found at path'
92
+ );
93
+ });
94
+ });
95
+
96
+ describe('getAvailableModels', () => {
97
+ it('should return list of available models', async () => {
98
+ const { getAvailableModels } = await import('../src/index.js');
99
+ const models = getAvailableModels();
100
+ expect(models).toContain('tiny');
101
+ expect(models).toContain('base');
102
+ expect(models).toContain('small');
103
+ expect(models).toContain('medium');
104
+ expect(models).toContain('large');
105
+ expect(models).toContain('large-v3-turbo');
106
+ expect(models.length).toBe(12);
107
+ });
108
+ });
109
+
110
+ describe('getModelUrl', () => {
111
+ it('should return correct HuggingFace URL for model', async () => {
112
+ const { getModelUrl } = await import('../src/index.js');
113
+ const url = getModelUrl('base');
114
+ expect(url).toBe('https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.bin');
115
+ });
116
+
117
+ it('should return correct URL for large-v3-turbo model', async () => {
118
+ const { getModelUrl } = await import('../src/index.js');
119
+ const url = getModelUrl('large-v3-turbo');
120
+ expect(url).toBe('https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3-turbo.bin');
121
+ });
122
+ });
123
+
124
+ describe('freeWhisper', () => {
125
+ it('should not throw when called without active instance', async () => {
126
+ const { freeWhisper } = await import('../src/index.js');
127
+ await expect(freeWhisper()).resolves.not.toThrow();
128
+ });
129
+ });
130
+
131
+ describe('transcribeBuffer', () => {
132
+ it('should throw error when no provider is configured', async () => {
133
+ const { transcribeBuffer } = await import('../src/index.js');
134
+ const buffer = Buffer.from('test');
135
+ await expect(transcribeBuffer(buffer)).rejects.toThrow(
136
+ 'No STT provider configured'
137
+ );
138
+ });
139
+ });
140
+
141
+ describe('type exports', () => {
142
+ it('should export transcribe function', async () => {
143
+ const module = await import('../src/index.js');
144
+ expect(typeof module.transcribe).toBe('function');
145
+ });
146
+
147
+ it('should export transcribeBuffer function', async () => {
148
+ const module = await import('../src/index.js');
149
+ expect(typeof module.transcribeBuffer).toBe('function');
150
+ });
151
+
152
+ it('should export isWhisperConfigured function', async () => {
153
+ const module = await import('../src/index.js');
154
+ expect(typeof module.isWhisperConfigured).toBe('function');
155
+ });
156
+
157
+ it('should export freeWhisper function', async () => {
158
+ const module = await import('../src/index.js');
159
+ expect(typeof module.freeWhisper).toBe('function');
160
+ });
161
+
162
+ it('should export getAvailableModels function', async () => {
163
+ const module = await import('../src/index.js');
164
+ expect(typeof module.getAvailableModels).toBe('function');
165
+ });
166
+
167
+ it('should export getModelUrl function', async () => {
168
+ const module = await import('../src/index.js');
169
+ expect(typeof module.getModelUrl).toBe('function');
170
+ });
171
+ });
172
+ });
@@ -0,0 +1,135 @@
1
+ import { describe, it, expect, beforeAll, afterAll } from 'vitest';
2
+ import * as fs from 'fs';
3
+ import * as path from 'path';
4
+ import * as https from 'https';
5
+ import * as http from 'http';
6
+
7
+ const __dirname = path.dirname(new URL(import.meta.url).pathname);
8
+
9
+ const TEST_MODEL_DIR = path.join(__dirname, 'models');
10
+ const TEST_AUDIO_DIR = path.join(__dirname, 'audio');
11
+ const MODEL_NAME = 'ggml-tiny.bin';
12
+ const MODEL_PATH = path.join(TEST_MODEL_DIR, MODEL_NAME);
13
+ const AUDIO_FILE = path.join(TEST_AUDIO_DIR, 'jfk.wav');
14
+
15
+ const MODEL_URL = 'https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.bin';
16
+ const JFK_AUDIO_URL = 'https://github.com/ggerganov/whisper.cpp/raw/master/samples/jfk.wav';
17
+
18
+ async function downloadFile(url: string, destPath: string, maxRedirects = 10): Promise<void> {
19
+ return new Promise((resolve, reject) => {
20
+ if (maxRedirects <= 0) {
21
+ return reject(new Error('Too many redirects'));
22
+ }
23
+
24
+ const dir = path.dirname(destPath);
25
+ if (!fs.existsSync(dir)) {
26
+ fs.mkdirSync(dir, { recursive: true });
27
+ }
28
+
29
+ const protocol = url.startsWith('https') ? https : http;
30
+
31
+ protocol.get(url, (response) => {
32
+ if (response.statusCode && response.statusCode >= 300 && response.statusCode < 400 && response.headers.location) {
33
+ let redirectUrl = response.headers.location;
34
+ if (redirectUrl.startsWith('/')) {
35
+ const urlObj = new URL(url);
36
+ redirectUrl = `${urlObj.protocol}//${urlObj.host}${redirectUrl}`;
37
+ }
38
+ downloadFile(redirectUrl, destPath, maxRedirects - 1).then(resolve).catch(reject);
39
+ return;
40
+ } else if (response.statusCode === 200) {
41
+ const file = fs.createWriteStream(destPath);
42
+ response.pipe(file);
43
+ file.on('finish', () => {
44
+ file.close();
45
+ resolve();
46
+ });
47
+ file.on('error', (err) => {
48
+ fs.unlinkSync(destPath);
49
+ reject(err);
50
+ });
51
+ } else {
52
+ reject(new Error(`HTTP ${response.statusCode}`));
53
+ }
54
+ }).on('error', reject);
55
+ });
56
+ }
57
+
58
+ function normalizeTranscription(text: string): string {
59
+ return text.toLowerCase().replace(/[.,!?]/g, '').trim();
60
+ }
61
+
62
+ describe('whisper.cpp integration tests', () => {
63
+ let transcribe: typeof import('../src/index.js').transcribe;
64
+ let transcribeBuffer: typeof import('../src/index.js').transcribeBuffer;
65
+ let isWhisperConfigured: typeof import('../src/index.js').isWhisperConfigured;
66
+ let freeWhisper: typeof import('../src/index.js').freeWhisper;
67
+
68
+ beforeAll(async () => {
69
+ // Download model if needed
70
+ if (!fs.existsSync(MODEL_PATH) || fs.statSync(MODEL_PATH).size === 0) {
71
+ if (fs.existsSync(MODEL_PATH)) fs.unlinkSync(MODEL_PATH);
72
+ console.log(`Downloading Whisper tiny model to ${MODEL_PATH}...`);
73
+ console.log('This may take a few minutes on first run.');
74
+ await downloadFile(MODEL_URL, MODEL_PATH);
75
+ console.log('Model downloaded successfully.');
76
+ }
77
+
78
+ // Download audio if needed
79
+ if (!fs.existsSync(AUDIO_FILE) || fs.statSync(AUDIO_FILE).size === 0) {
80
+ if (fs.existsSync(AUDIO_FILE)) fs.unlinkSync(AUDIO_FILE);
81
+ console.log(`Downloading JFK test audio to ${AUDIO_FILE}...`);
82
+ await downloadFile(JFK_AUDIO_URL, AUDIO_FILE);
83
+ console.log('Audio downloaded successfully.');
84
+ }
85
+
86
+ // Set model path
87
+ process.env['WHISPER_CPP_MODEL_PATH'] = MODEL_PATH;
88
+
89
+ // Import module
90
+ const stt = await import('../src/index.js');
91
+ transcribe = stt.transcribe;
92
+ transcribeBuffer = stt.transcribeBuffer;
93
+ isWhisperConfigured = stt.isWhisperConfigured;
94
+ freeWhisper = stt.freeWhisper;
95
+ }, 600000); // 10 minute timeout for model download
96
+
97
+ afterAll(async () => {
98
+ if (freeWhisper) {
99
+ await freeWhisper();
100
+ }
101
+ });
102
+
103
+ it('should transcribe JFK speech audio file', async () => {
104
+ const result = await transcribe(AUDIO_FILE);
105
+
106
+ expect(result).toBeDefined();
107
+ expect(result.text).toBeDefined();
108
+ expect(typeof result.text).toBe('string');
109
+ expect(result.text.length).toBeGreaterThan(0);
110
+
111
+ const normalizedResult = normalizeTranscription(result.text);
112
+ expect(normalizedResult).toContain('ask not what your country can do for you');
113
+ }, 300000); // 5 minute timeout
114
+
115
+ it('should transcribe audio from buffer', async () => {
116
+ const audioBuffer = fs.readFileSync(AUDIO_FILE);
117
+ const result = await transcribeBuffer(audioBuffer);
118
+
119
+ expect(result).toBeDefined();
120
+ expect(result.text).toBeDefined();
121
+ expect(typeof result.text).toBe('string');
122
+ expect(result.text.length).toBeGreaterThan(0);
123
+
124
+ const normalizedResult = normalizeTranscription(result.text);
125
+ expect(normalizedResult).toContain('ask not what your country can do for you');
126
+ }, 300000); // 5 minute timeout
127
+
128
+ it('should return true for isWhisperConfigured', () => {
129
+ expect(isWhisperConfigured()).toBe(true);
130
+ });
131
+
132
+ it('should throw error for non-existent audio file', async () => {
133
+ await expect(transcribe('/non/existent/audio.wav')).rejects.toThrow('Audio file not found');
134
+ });
135
+ });
@@ -0,0 +1,14 @@
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "compilerOptions": {
4
+ "module": "commonjs",
5
+ "moduleResolution": "node",
6
+ "outDir": "./dist/cjs",
7
+ "declaration": false,
8
+ "declarationMap": false,
9
+ "verbatimModuleSyntax": false,
10
+ "types": ["node"]
11
+ },
12
+ "include": ["src/**/*.ts"],
13
+ "exclude": ["**/*.test.ts", "vitest.config.ts"]
14
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "compilerOptions": {
4
+ "module": "nodenext",
5
+ "moduleResolution": "nodenext",
6
+ "outDir": "./dist/esm",
7
+ "declaration": false,
8
+ "declarationMap": false,
9
+ "verbatimModuleSyntax": false,
10
+ "types": ["node"]
11
+ },
12
+ "include": ["src/**/*.ts"],
13
+ "exclude": ["**/*.test.ts", "vitest.config.ts"]
14
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "compilerOptions": {
3
+ "rootDir": "./src",
4
+ "outDir": "./dist",
5
+ "module": "nodenext",
6
+ "moduleResolution": "nodenext",
7
+ "target": "esnext",
8
+ "sourceMap": true,
9
+ "declaration": true,
10
+ "declarationMap": true,
11
+ "strict": true,
12
+ "noUncheckedIndexedAccess": true,
13
+ "exactOptionalPropertyTypes": true,
14
+ "verbatimModuleSyntax": true,
15
+ "isolatedModules": true,
16
+ "noUncheckedSideEffectImports": true,
17
+ "moduleDetection": "force",
18
+ "skipLibCheck": true
19
+ }
20
+ }
@@ -0,0 +1,15 @@
1
+ {
2
+ "extends": "./tsconfig.json",
3
+ "compilerOptions": {
4
+ "module": "nodenext",
5
+ "moduleResolution": "nodenext",
6
+ "outDir": "./dist/types",
7
+ "declaration": true,
8
+ "declarationMap": true,
9
+ "emitDeclarationOnly": true,
10
+ "verbatimModuleSyntax": false,
11
+ "types": ["node"]
12
+ },
13
+ "include": ["src/**/*.ts"],
14
+ "exclude": ["**/*.test.ts", "vitest.config.ts"]
15
+ }
@@ -0,0 +1,13 @@
1
+ import { defineConfig } from 'vitest/config';
2
+
3
+ export default defineConfig({
4
+ test: {
5
+ environment: 'node',
6
+ include: ['test/**/*.test.ts'],
7
+ coverage: {
8
+ provider: 'v8',
9
+ reporter: ['text', 'html'],
10
+ include: ['src/**/*.ts'],
11
+ },
12
+ },
13
+ });