@charivo/stt-provider-openai 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -278
- package/package.json +12 -3
package/README.md
CHANGED
|
@@ -1,297 +1,31 @@
|
|
|
1
1
|
# @charivo/stt-provider-openai
|
|
2
2
|
|
|
3
|
-
OpenAI
|
|
3
|
+
Server-side OpenAI STT provider for Charivo.
|
|
4
4
|
|
|
5
|
-
##
|
|
6
|
-
|
|
7
|
-
This is a **server-side provider** that directly calls OpenAI Whisper API and should **ONLY** be used in Node.js/server environments. Using this in client-side code will expose your API key.
|
|
8
|
-
|
|
9
|
-
For client-side usage, use [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) instead.
|
|
10
|
-
|
|
11
|
-
## Architecture
|
|
12
|
-
|
|
13
|
-
```
|
|
14
|
-
Node.js Server → OpenAISTTProvider → OpenAI Whisper API
|
|
15
|
-
```
|
|
16
|
-
|
|
17
|
-
## Installation
|
|
5
|
+
## Install
|
|
18
6
|
|
|
19
7
|
```bash
|
|
20
|
-
pnpm add @charivo/stt-provider-openai
|
|
8
|
+
pnpm add @charivo/stt-provider-openai
|
|
21
9
|
```
|
|
22
10
|
|
|
23
11
|
## Usage
|
|
24
12
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
```typescript
|
|
28
|
-
import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
|
|
29
|
-
|
|
30
|
-
const provider = createOpenAISTTProvider({
|
|
31
|
-
apiKey: process.env.OPENAI_API_KEY!, // Server environment variable
|
|
32
|
-
defaultModel: "whisper-1",
|
|
33
|
-
defaultLanguage: "en"
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
// Transcribe audio data
|
|
37
|
-
const transcription = await provider.transcribe(audioBlob);
|
|
38
|
-
|
|
39
|
-
// With custom options
|
|
40
|
-
const transcription2 = await provider.transcribe(audioBlob, {
|
|
41
|
-
language: "es" // Spanish
|
|
42
|
-
});
|
|
43
|
-
```
|
|
44
|
-
|
|
45
|
-
### API Endpoint Usage
|
|
46
|
-
|
|
47
|
-
```typescript
|
|
48
|
-
// Express.js example
|
|
49
|
-
import express from 'express';
|
|
50
|
-
import multer from 'multer';
|
|
51
|
-
import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
|
|
52
|
-
|
|
53
|
-
const app = express();
|
|
54
|
-
const upload = multer({ storage: multer.memoryStorage() });
|
|
55
|
-
const provider = createOpenAISTTProvider({
|
|
56
|
-
apiKey: process.env.OPENAI_API_KEY!
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
app.post('/api/stt', upload.single('audio'), async (req, res) => {
|
|
60
|
-
try {
|
|
61
|
-
if (!req.file) {
|
|
62
|
-
return res.status(400).json({ error: 'No audio file provided' });
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
const audioBlob = new Blob([req.file.buffer], {
|
|
66
|
-
type: req.file.mimetype
|
|
67
|
-
});
|
|
68
|
-
|
|
69
|
-
const transcription = await provider.transcribe(audioBlob, {
|
|
70
|
-
language: req.body.language
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
res.json({ transcription });
|
|
74
|
-
} catch (error) {
|
|
75
|
-
res.status(500).json({ error: 'Transcription failed' });
|
|
76
|
-
}
|
|
77
|
-
});
|
|
78
|
-
```
|
|
79
|
-
|
|
80
|
-
### Next.js API Route Example
|
|
81
|
-
|
|
82
|
-
```typescript
|
|
83
|
-
// app/api/stt/route.ts
|
|
84
|
-
import { NextRequest, NextResponse } from "next/server";
|
|
85
|
-
import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
|
|
86
|
-
|
|
87
|
-
const provider = createOpenAISTTProvider({
|
|
88
|
-
apiKey: process.env.OPENAI_API_KEY!
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
export async function POST(request: NextRequest) {
|
|
92
|
-
try {
|
|
93
|
-
const formData = await request.formData();
|
|
94
|
-
const audioFile = formData.get('audio') as File;
|
|
95
|
-
const language = formData.get('language') as string | undefined;
|
|
96
|
-
|
|
97
|
-
if (!audioFile) {
|
|
98
|
-
return NextResponse.json(
|
|
99
|
-
{ error: "No audio file provided" },
|
|
100
|
-
{ status: 400 }
|
|
101
|
-
);
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// Convert File to Blob
|
|
105
|
-
const audioBlob = new Blob([await audioFile.arrayBuffer()], {
|
|
106
|
-
type: audioFile.type
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
const transcription = await provider.transcribe(audioBlob, {
|
|
110
|
-
language
|
|
111
|
-
});
|
|
112
|
-
|
|
113
|
-
return NextResponse.json({ transcription });
|
|
114
|
-
} catch (error) {
|
|
115
|
-
console.error("STT error:", error);
|
|
116
|
-
return NextResponse.json(
|
|
117
|
-
{ error: "Failed to transcribe audio" },
|
|
118
|
-
{ status: 500 }
|
|
119
|
-
);
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
```
|
|
123
|
-
|
|
124
|
-
## API Reference
|
|
125
|
-
|
|
126
|
-
### Configuration Options
|
|
127
|
-
|
|
128
|
-
```typescript
|
|
129
|
-
interface OpenAISTTConfig {
|
|
130
|
-
/** OpenAI API key (required) */
|
|
131
|
-
apiKey: string;
|
|
132
|
-
/** Default OpenAI Whisper model (default: "whisper-1") */
|
|
133
|
-
defaultModel?: "whisper-1";
|
|
134
|
-
/** Default language for transcription (e.g., "en", "es", "fr") */
|
|
135
|
-
defaultLanguage?: string;
|
|
136
|
-
/** Allow browser usage (dangerous - exposes API key) */
|
|
137
|
-
dangerouslyAllowBrowser?: boolean;
|
|
138
|
-
}
|
|
139
|
-
```
|
|
140
|
-
|
|
141
|
-
### Available Models
|
|
142
|
-
|
|
143
|
-
- `whisper-1` - OpenAI's Whisper model for speech recognition
|
|
144
|
-
|
|
145
|
-
### Supported Languages
|
|
146
|
-
|
|
147
|
-
Whisper supports 99+ languages including:
|
|
148
|
-
- English (`en`)
|
|
149
|
-
- Spanish (`es`)
|
|
150
|
-
- French (`fr`)
|
|
151
|
-
- German (`de`)
|
|
152
|
-
- Chinese (`zh`)
|
|
153
|
-
- Japanese (`ja`)
|
|
154
|
-
- Korean (`ko`)
|
|
155
|
-
- And many more...
|
|
156
|
-
|
|
157
|
-
For best results, specify the language if known. If not specified, Whisper will auto-detect.
|
|
158
|
-
|
|
159
|
-
### Methods
|
|
160
|
-
|
|
161
|
-
#### `transcribe(audio, options?): Promise<string>`
|
|
162
|
-
Transcribe audio data to text.
|
|
163
|
-
|
|
164
|
-
```typescript
|
|
165
|
-
// With Blob
|
|
166
|
-
const transcription = await provider.transcribe(audioBlob);
|
|
167
|
-
|
|
168
|
-
// With ArrayBuffer
|
|
169
|
-
const transcription = await provider.transcribe(audioBuffer);
|
|
170
|
-
|
|
171
|
-
// With language option
|
|
172
|
-
const transcription = await provider.transcribe(audioBlob, {
|
|
173
|
-
language: "es"
|
|
174
|
-
});
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
**Parameters:**
|
|
178
|
-
- `audio: Blob | ArrayBuffer` - Audio data to transcribe
|
|
179
|
-
- `options?: STTOptions` - Optional transcription options
|
|
180
|
-
- `language?: string` - Language code (e.g., "en", "es")
|
|
181
|
-
|
|
182
|
-
**Returns:** `Promise<string>` - Transcribed text
|
|
183
|
-
|
|
184
|
-
## Browser Usage (Not Recommended)
|
|
185
|
-
|
|
186
|
-
⚠️ **Security Warning**: This provider should NOT be used in browser as it exposes your API key to users.
|
|
187
|
-
|
|
188
|
-
**Better alternative**: Use [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) for client-side usage.
|
|
189
|
-
|
|
190
|
-
## Environment Variables
|
|
191
|
-
|
|
192
|
-
```bash
|
|
193
|
-
OPENAI_API_KEY=your_openai_api_key_here
|
|
194
|
-
```
|
|
195
|
-
|
|
196
|
-
## Error Handling
|
|
197
|
-
|
|
198
|
-
```typescript
|
|
199
|
-
try {
|
|
200
|
-
const transcription = await provider.transcribe(audioBlob);
|
|
201
|
-
} catch (error) {
|
|
202
|
-
console.error("Transcription failed:", error);
|
|
203
|
-
// Handle OpenAI API errors:
|
|
204
|
-
// - Invalid audio format
|
|
205
|
-
// - API key issues
|
|
206
|
-
// - Rate limiting
|
|
207
|
-
// - Network errors
|
|
208
|
-
}
|
|
209
|
-
```
|
|
210
|
-
|
|
211
|
-
## Use Cases
|
|
212
|
-
|
|
213
|
-
- **API Endpoints**: Provide STT service via your server
|
|
214
|
-
- **Secure Transcription**: Keep API keys on server, expose via HTTP endpoint
|
|
215
|
-
- **Language Support**: Leverage Whisper's multilingual capabilities
|
|
216
|
-
- **Rate Limiting**: Control STT usage per user
|
|
217
|
-
- **Cost Monitoring**: Track STT API usage and costs
|
|
218
|
-
|
|
219
|
-
## Complete Example
|
|
220
|
-
|
|
221
|
-
### Server (Next.js API Route)
|
|
222
|
-
|
|
223
|
-
```typescript
|
|
224
|
-
// app/api/stt/route.ts
|
|
13
|
+
```ts
|
|
225
14
|
import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
|
|
226
15
|
|
|
227
16
|
const provider = createOpenAISTTProvider({
|
|
228
17
|
apiKey: process.env.OPENAI_API_KEY!,
|
|
229
|
-
|
|
18
|
+
defaultModel: "whisper-1",
|
|
230
19
|
});
|
|
231
20
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
const audioFile = formData.get('audio') as File;
|
|
235
|
-
const language = formData.get('language') as string | undefined;
|
|
236
|
-
|
|
237
|
-
const audioBlob = new Blob([await audioFile.arrayBuffer()]);
|
|
238
|
-
const transcription = await provider.transcribe(audioBlob, { language });
|
|
239
|
-
|
|
240
|
-
return NextResponse.json({ transcription });
|
|
241
|
-
}
|
|
242
|
-
```
|
|
243
|
-
|
|
244
|
-
### Client (uses Remote Transcriber)
|
|
245
|
-
|
|
246
|
-
```typescript
|
|
247
|
-
import { createRemoteSTTTranscriber } from "@charivo/stt-transcriber-remote";
|
|
248
|
-
import { createSTTManager } from "@charivo/stt-core";
|
|
249
|
-
|
|
250
|
-
const transcriber = createRemoteSTTTranscriber({
|
|
251
|
-
apiEndpoint: "/api/stt"
|
|
21
|
+
const text = await provider.transcribe(audioBlob, {
|
|
22
|
+
language: "ko",
|
|
252
23
|
});
|
|
253
|
-
const sttManager = createSTTManager(transcriber);
|
|
254
|
-
|
|
255
|
-
// Start recording
|
|
256
|
-
await sttManager.start();
|
|
257
|
-
|
|
258
|
-
// Stop and get transcription
|
|
259
|
-
const text = await sttManager.stop();
|
|
260
|
-
console.log("User said:", text);
|
|
261
24
|
```
|
|
262
25
|
|
|
263
|
-
##
|
|
264
|
-
|
|
265
|
-
- **whisper-1**: $0.006 per minute (rounded to the nearest second)
|
|
266
|
-
|
|
267
|
-
Example: 30 seconds of audio = $0.003
|
|
268
|
-
|
|
269
|
-
## Audio Format Support
|
|
270
|
-
|
|
271
|
-
Whisper supports various audio formats:
|
|
272
|
-
- MP3
|
|
273
|
-
- MP4
|
|
274
|
-
- MPEG
|
|
275
|
-
- MPGA
|
|
276
|
-
- M4A
|
|
277
|
-
- WAV
|
|
278
|
-
- WEBM
|
|
279
|
-
|
|
280
|
-
Maximum file size: 25 MB
|
|
281
|
-
|
|
282
|
-
## Performance Tips
|
|
283
|
-
|
|
284
|
-
1. **Use appropriate audio quality**: Higher quality doesn't always mean better transcription
|
|
285
|
-
2. **Specify language**: Improves accuracy and speed
|
|
286
|
-
3. **Reduce background noise**: Pre-process audio for better results
|
|
287
|
-
4. **Chunk long audio**: Split audio files > 10 minutes for faster processing
|
|
288
|
-
|
|
289
|
-
## Related Packages
|
|
290
|
-
|
|
291
|
-
- [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) - Client-side HTTP STT transcriber (recommended)
|
|
292
|
-
- [`@charivo/stt-transcriber-openai`](../stt-transcriber-openai) - Client-side OpenAI transcriber (testing only)
|
|
293
|
-
- [`@charivo/stt-core`](../stt-core) - STT core functionality
|
|
294
|
-
|
|
295
|
-
## License
|
|
26
|
+
## Config
|
|
296
27
|
|
|
297
|
-
|
|
28
|
+
- `apiKey`
|
|
29
|
+
- `defaultModel?` default: `whisper-1`
|
|
30
|
+
- `defaultLanguage?`
|
|
31
|
+
- `dangerouslyAllowBrowser?` testing only
|
package/package.json
CHANGED
|
@@ -1,12 +1,20 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@charivo/stt-provider-openai",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.2",
|
|
4
4
|
"description": "OpenAI STT provider for Charivo (server-side)",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
|
+
"module": "dist/index.mjs",
|
|
6
7
|
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.mjs",
|
|
12
|
+
"require": "./dist/index.js"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
7
15
|
"dependencies": {
|
|
8
16
|
"openai": "^4.47.1",
|
|
9
|
-
"@charivo/core": "0.0
|
|
17
|
+
"@charivo/core": "0.1.0"
|
|
10
18
|
},
|
|
11
19
|
"devDependencies": {
|
|
12
20
|
"tsup": "^8.0.0",
|
|
@@ -35,6 +43,7 @@
|
|
|
35
43
|
"scripts": {
|
|
36
44
|
"build": "tsup",
|
|
37
45
|
"dev": "tsup --watch",
|
|
38
|
-
"clean": "rm -rf dist"
|
|
46
|
+
"clean": "rm -rf dist",
|
|
47
|
+
"typecheck": "tsc --noEmit"
|
|
39
48
|
}
|
|
40
49
|
}
|