@charivo/stt-provider-openai 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Zeikar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,297 @@
1
+ # @charivo/stt-provider-openai
2
+
3
+ OpenAI Whisper STT (Speech-to-Text) provider for Charivo framework (server-side).
4
+
5
+ ## ⚠️ Important Security Note
6
+
7
+ This is a **server-side provider** that directly calls OpenAI Whisper API and should **ONLY** be used in Node.js/server environments. Using this in client-side code will expose your API key.
8
+
9
+ For client-side usage, use [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) instead.
10
+
11
+ ## Architecture
12
+
13
+ ```
14
+ Node.js Server → OpenAISTTProvider → OpenAI Whisper API
15
+ ```
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pnpm add @charivo/stt-provider-openai @charivo/core openai
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ ### Server-side Only
26
+
27
+ ```typescript
28
+ import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
29
+
30
+ const provider = createOpenAISTTProvider({
31
+ apiKey: process.env.OPENAI_API_KEY!, // Server environment variable
32
+ defaultModel: "whisper-1",
33
+ defaultLanguage: "en"
34
+ });
35
+
36
+ // Transcribe audio data
37
+ const transcription = await provider.transcribe(audioBlob);
38
+
39
+ // With custom options
40
+ const transcription2 = await provider.transcribe(audioBlob, {
41
+ language: "es" // Spanish
42
+ });
43
+ ```
44
+
45
+ ### API Endpoint Usage
46
+
47
+ ```typescript
48
+ // Express.js example
49
+ import express from 'express';
50
+ import multer from 'multer';
51
+ import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
52
+
53
+ const app = express();
54
+ const upload = multer({ storage: multer.memoryStorage() });
55
+ const provider = createOpenAISTTProvider({
56
+ apiKey: process.env.OPENAI_API_KEY!
57
+ });
58
+
59
+ app.post('/api/stt', upload.single('audio'), async (req, res) => {
60
+ try {
61
+ if (!req.file) {
62
+ return res.status(400).json({ error: 'No audio file provided' });
63
+ }
64
+
65
+ const audioBlob = new Blob([req.file.buffer], {
66
+ type: req.file.mimetype
67
+ });
68
+
69
+ const transcription = await provider.transcribe(audioBlob, {
70
+ language: req.body.language
71
+ });
72
+
73
+ res.json({ transcription });
74
+ } catch (error) {
75
+ res.status(500).json({ error: 'Transcription failed' });
76
+ }
77
+ });
78
+ ```
79
+
80
+ ### Next.js API Route Example
81
+
82
+ ```typescript
83
+ // app/api/stt/route.ts
84
+ import { NextRequest, NextResponse } from "next/server";
85
+ import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
86
+
87
+ const provider = createOpenAISTTProvider({
88
+ apiKey: process.env.OPENAI_API_KEY!
89
+ });
90
+
91
+ export async function POST(request: NextRequest) {
92
+ try {
93
+ const formData = await request.formData();
94
+ const audioFile = formData.get('audio') as File;
95
+ const language = formData.get('language') as string | undefined;
96
+
97
+ if (!audioFile) {
98
+ return NextResponse.json(
99
+ { error: "No audio file provided" },
100
+ { status: 400 }
101
+ );
102
+ }
103
+
104
+ // Convert File to Blob
105
+ const audioBlob = new Blob([await audioFile.arrayBuffer()], {
106
+ type: audioFile.type
107
+ });
108
+
109
+ const transcription = await provider.transcribe(audioBlob, {
110
+ language
111
+ });
112
+
113
+ return NextResponse.json({ transcription });
114
+ } catch (error) {
115
+ console.error("STT error:", error);
116
+ return NextResponse.json(
117
+ { error: "Failed to transcribe audio" },
118
+ { status: 500 }
119
+ );
120
+ }
121
+ }
122
+ ```
123
+
124
+ ## API Reference
125
+
126
+ ### Configuration Options
127
+
128
+ ```typescript
129
+ interface OpenAISTTConfig {
130
+ /** OpenAI API key (required) */
131
+ apiKey: string;
132
+ /** Default OpenAI Whisper model (default: "whisper-1") */
133
+ defaultModel?: "whisper-1";
134
+ /** Default language for transcription (e.g., "en", "es", "fr") */
135
+ defaultLanguage?: string;
136
+ /** Allow browser usage (dangerous - exposes API key) */
137
+ dangerouslyAllowBrowser?: boolean;
138
+ }
139
+ ```
140
+
141
+ ### Available Models
142
+
143
+ - `whisper-1` - OpenAI's Whisper model for speech recognition
144
+
145
+ ### Supported Languages
146
+
147
+ Whisper supports 99+ languages including:
148
+ - English (`en`)
149
+ - Spanish (`es`)
150
+ - French (`fr`)
151
+ - German (`de`)
152
+ - Chinese (`zh`)
153
+ - Japanese (`ja`)
154
+ - Korean (`ko`)
155
+ - And many more...
156
+
157
+ For best results, specify the language if known. If not specified, Whisper will auto-detect.
158
+
159
+ ### Methods
160
+
161
+ #### `transcribe(audio, options?): Promise<string>`
162
+ Transcribe audio data to text.
163
+
164
+ ```typescript
165
+ // With Blob
166
+ const transcription = await provider.transcribe(audioBlob);
167
+
168
+ // With ArrayBuffer
169
+ const transcription = await provider.transcribe(audioBuffer);
170
+
171
+ // With language option
172
+ const transcription = await provider.transcribe(audioBlob, {
173
+ language: "es"
174
+ });
175
+ ```
176
+
177
+ **Parameters:**
178
+ - `audio: Blob | ArrayBuffer` - Audio data to transcribe
179
+ - `options?: STTOptions` - Optional transcription options
180
+ - `language?: string` - Language code (e.g., "en", "es")
181
+
182
+ **Returns:** `Promise<string>` - Transcribed text
183
+
184
+ ## Browser Usage (Not Recommended)
185
+
186
+ ⚠️ **Security Warning**: This provider should NOT be used in browser as it exposes your API key to users.
187
+
188
+ **Better alternative**: Use [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) for client-side usage.
189
+
190
+ ## Environment Variables
191
+
192
+ ```bash
193
+ OPENAI_API_KEY=your_openai_api_key_here
194
+ ```
195
+
196
+ ## Error Handling
197
+
198
+ ```typescript
199
+ try {
200
+ const transcription = await provider.transcribe(audioBlob);
201
+ } catch (error) {
202
+ console.error("Transcription failed:", error);
203
+ // Handle OpenAI API errors:
204
+ // - Invalid audio format
205
+ // - API key issues
206
+ // - Rate limiting
207
+ // - Network errors
208
+ }
209
+ ```
210
+
211
+ ## Use Cases
212
+
213
+ - **API Endpoints**: Provide STT service via your server
214
+ - **Secure Transcription**: Keep API keys on server, expose via HTTP endpoint
215
+ - **Language Support**: Leverage Whisper's multilingual capabilities
216
+ - **Rate Limiting**: Control STT usage per user
217
+ - **Cost Monitoring**: Track STT API usage and costs
218
+
219
+ ## Complete Example
220
+
221
+ ### Server (Next.js API Route)
222
+
223
+ ```typescript
224
+ // app/api/stt/route.ts
225
+ import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
226
+
227
+ const provider = createOpenAISTTProvider({
228
+ apiKey: process.env.OPENAI_API_KEY!,
229
+ defaultLanguage: "en"
230
+ });
231
+
232
+ export async function POST(request: NextRequest) {
233
+ const formData = await request.formData();
234
+ const audioFile = formData.get('audio') as File;
235
+ const language = formData.get('language') as string | undefined;
236
+
237
+ const audioBlob = new Blob([await audioFile.arrayBuffer()]);
238
+ const transcription = await provider.transcribe(audioBlob, { language });
239
+
240
+ return NextResponse.json({ transcription });
241
+ }
242
+ ```
243
+
244
+ ### Client (uses Remote Transcriber)
245
+
246
+ ```typescript
247
+ import { createRemoteSTTTranscriber } from "@charivo/stt-transcriber-remote";
248
+ import { createSTTManager } from "@charivo/stt-core";
249
+
250
+ const transcriber = createRemoteSTTTranscriber({
251
+ apiEndpoint: "/api/stt"
252
+ });
253
+ const sttManager = createSTTManager(transcriber);
254
+
255
+ // Start recording
256
+ await sttManager.start();
257
+
258
+ // Stop and get transcription
259
+ const text = await sttManager.stop();
260
+ console.log("User said:", text);
261
+ ```
262
+
263
+ ## Pricing (OpenAI Whisper)
264
+
265
+ - **whisper-1**: $0.006 per minute (rounded to the nearest second)
266
+
267
+ Example: 30 seconds of audio = $0.003
268
+
269
+ ## Audio Format Support
270
+
271
+ Whisper supports various audio formats:
272
+ - MP3
273
+ - MP4
274
+ - MPEG
275
+ - MPGA
276
+ - M4A
277
+ - WAV
278
+ - WEBM
279
+
280
+ Maximum file size: 25 MB
281
+
282
+ ## Performance Tips
283
+
284
+ 1. **Use appropriate audio quality**: Higher quality doesn't always mean better transcription
285
+ 2. **Specify language**: Improves accuracy and speed
286
+ 3. **Reduce background noise**: Pre-process audio for better results
287
+ 4. **Chunk long audio**: Split audio files > 10 minutes for faster processing
288
+
289
+ ## Related Packages
290
+
291
+ - [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) - Client-side HTTP STT transcriber (recommended)
292
+ - [`@charivo/stt-transcriber-openai`](../stt-transcriber-openai) - Client-side OpenAI transcriber (testing only)
293
+ - [`@charivo/stt-core`](../stt-core) - STT core functionality
294
+
295
+ ## License
296
+
297
+ MIT
@@ -0,0 +1,22 @@
1
+ import { STTProvider, STTOptions } from '@charivo/core';
2
+
3
+ interface OpenAISTTConfig {
4
+ apiKey: string;
5
+ defaultModel?: "whisper-1";
6
+ defaultLanguage?: string;
7
+ /**
8
+ * Allow usage in browser (dangerous - exposes API key)
9
+ * Only use for testing/development
10
+ */
11
+ dangerouslyAllowBrowser?: boolean;
12
+ }
13
+ declare class OpenAISTTProvider implements STTProvider {
14
+ private openai;
15
+ private defaultModel;
16
+ private defaultLanguage?;
17
+ constructor(config: OpenAISTTConfig);
18
+ transcribe(audio: Blob | ArrayBuffer, options?: STTOptions): Promise<string>;
19
+ }
20
+ declare function createOpenAISTTProvider(config: OpenAISTTConfig): OpenAISTTProvider;
21
+
22
+ export { type OpenAISTTConfig, OpenAISTTProvider, createOpenAISTTProvider };
@@ -0,0 +1,22 @@
1
+ import { STTProvider, STTOptions } from '@charivo/core';
2
+
3
+ interface OpenAISTTConfig {
4
+ apiKey: string;
5
+ defaultModel?: "whisper-1";
6
+ defaultLanguage?: string;
7
+ /**
8
+ * Allow usage in browser (dangerous - exposes API key)
9
+ * Only use for testing/development
10
+ */
11
+ dangerouslyAllowBrowser?: boolean;
12
+ }
13
+ declare class OpenAISTTProvider implements STTProvider {
14
+ private openai;
15
+ private defaultModel;
16
+ private defaultLanguage?;
17
+ constructor(config: OpenAISTTConfig);
18
+ transcribe(audio: Blob | ArrayBuffer, options?: STTOptions): Promise<string>;
19
+ }
20
+ declare function createOpenAISTTProvider(config: OpenAISTTConfig): OpenAISTTProvider;
21
+
22
+ export { type OpenAISTTConfig, OpenAISTTProvider, createOpenAISTTProvider };
package/dist/index.js ADDED
@@ -0,0 +1,75 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ OpenAISTTProvider: () => OpenAISTTProvider,
34
+ createOpenAISTTProvider: () => createOpenAISTTProvider
35
+ });
36
+ module.exports = __toCommonJS(index_exports);
37
+ var import_openai = __toESM(require("openai"));
38
+ var OpenAISTTProvider = class {
39
+ openai;
40
+ defaultModel;
41
+ defaultLanguage;
42
+ constructor(config) {
43
+ if (typeof window !== "undefined" && !config.dangerouslyAllowBrowser) {
44
+ throw new Error(
45
+ "OpenAI provider is for server-side use only. Set dangerouslyAllowBrowser: true for testing"
46
+ );
47
+ }
48
+ this.openai = new import_openai.default({
49
+ apiKey: config.apiKey,
50
+ dangerouslyAllowBrowser: config.dangerouslyAllowBrowser
51
+ });
52
+ this.defaultModel = config.defaultModel || "whisper-1";
53
+ this.defaultLanguage = config.defaultLanguage;
54
+ }
55
+ async transcribe(audio, options) {
56
+ const audioBlob = audio instanceof Blob ? audio : new Blob([audio], { type: "audio/wav" });
57
+ const audioFile = new File([audioBlob], "audio.wav", {
58
+ type: "audio/wav"
59
+ });
60
+ const response = await this.openai.audio.transcriptions.create({
61
+ file: audioFile,
62
+ model: this.defaultModel,
63
+ language: options?.language || this.defaultLanguage
64
+ });
65
+ return response.text;
66
+ }
67
+ };
68
+ function createOpenAISTTProvider(config) {
69
+ return new OpenAISTTProvider(config);
70
+ }
71
+ // Annotate the CommonJS export names for ESM import in node:
72
+ 0 && (module.exports = {
73
+ OpenAISTTProvider,
74
+ createOpenAISTTProvider
75
+ });
package/dist/index.mjs ADDED
@@ -0,0 +1,39 @@
1
+ // src/index.ts
2
+ import OpenAI from "openai";
3
+ var OpenAISTTProvider = class {
4
+ openai;
5
+ defaultModel;
6
+ defaultLanguage;
7
+ constructor(config) {
8
+ if (typeof window !== "undefined" && !config.dangerouslyAllowBrowser) {
9
+ throw new Error(
10
+ "OpenAI provider is for server-side use only. Set dangerouslyAllowBrowser: true for testing"
11
+ );
12
+ }
13
+ this.openai = new OpenAI({
14
+ apiKey: config.apiKey,
15
+ dangerouslyAllowBrowser: config.dangerouslyAllowBrowser
16
+ });
17
+ this.defaultModel = config.defaultModel || "whisper-1";
18
+ this.defaultLanguage = config.defaultLanguage;
19
+ }
20
+ async transcribe(audio, options) {
21
+ const audioBlob = audio instanceof Blob ? audio : new Blob([audio], { type: "audio/wav" });
22
+ const audioFile = new File([audioBlob], "audio.wav", {
23
+ type: "audio/wav"
24
+ });
25
+ const response = await this.openai.audio.transcriptions.create({
26
+ file: audioFile,
27
+ model: this.defaultModel,
28
+ language: options?.language || this.defaultLanguage
29
+ });
30
+ return response.text;
31
+ }
32
+ };
33
+ function createOpenAISTTProvider(config) {
34
+ return new OpenAISTTProvider(config);
35
+ }
36
+ export {
37
+ OpenAISTTProvider,
38
+ createOpenAISTTProvider
39
+ };
package/package.json ADDED
@@ -0,0 +1,40 @@
1
+ {
2
+ "name": "@charivo/stt-provider-openai",
3
+ "version": "0.0.1",
4
+ "description": "OpenAI STT provider for Charivo (server-side)",
5
+ "main": "dist/index.js",
6
+ "types": "dist/index.d.ts",
7
+ "dependencies": {
8
+ "openai": "^4.47.1",
9
+ "@charivo/core": "0.0.1"
10
+ },
11
+ "devDependencies": {
12
+ "tsup": "^8.0.0",
13
+ "typescript": "^5.0.0"
14
+ },
15
+ "files": [
16
+ "dist"
17
+ ],
18
+ "publishConfig": {
19
+ "access": "public"
20
+ },
21
+ "license": "MIT",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/zeikar/charivo.git",
25
+ "directory": "packages/stt-provider-openai"
26
+ },
27
+ "author": {
28
+ "name": "Zeikar",
29
+ "url": "https://github.com/zeikar"
30
+ },
31
+ "homepage": "https://github.com/zeikar/charivo#readme",
32
+ "bugs": {
33
+ "url": "https://github.com/zeikar/charivo/issues"
34
+ },
35
+ "scripts": {
36
+ "build": "tsup",
37
+ "dev": "tsup --watch",
38
+ "clean": "rm -rf dist"
39
+ }
40
+ }