npm - @charivo/stt-provider-openai - Versions diffs - 0.0.1 - Mend

@charivo/stt-provider-openai 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Zeikar
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,297 @@
+# @charivo/stt-provider-openai
+OpenAI Whisper STT (Speech-to-Text) provider for Charivo framework (server-side).
+## ⚠️ Important Security Note
+This is a **server-side provider** that directly calls OpenAI Whisper API and should **ONLY** be used in Node.js/server environments. Using this in client-side code will expose your API key.
+For client-side usage, use [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) instead.
+## Architecture
+```
+Node.js Server → OpenAISTTProvider → OpenAI Whisper API
+```
+## Installation
+```bash
+pnpm add @charivo/stt-provider-openai @charivo/core openai
+```
+## Usage
+### Server-side Only
+```typescript
+import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
+const provider = createOpenAISTTProvider({
+  apiKey: process.env.OPENAI_API_KEY!, // Server environment variable
+  defaultModel: "whisper-1",
+  defaultLanguage: "en"
+});
+// Transcribe audio data
+const transcription = await provider.transcribe(audioBlob);
+// With custom options
+const transcription2 = await provider.transcribe(audioBlob, {
+  language: "es" // Spanish
+});
+```
+### API Endpoint Usage
+```typescript
+// Express.js example
+import express from 'express';
+import multer from 'multer';
+import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
+const app = express();
+const upload = multer({ storage: multer.memoryStorage() });
+const provider = createOpenAISTTProvider({
+  apiKey: process.env.OPENAI_API_KEY!
+});
+app.post('/api/stt', upload.single('audio'), async (req, res) => {
+  try {
+    if (!req.file) {
+      return res.status(400).json({ error: 'No audio file provided' });
+    }
+    const audioBlob = new Blob([req.file.buffer], {
+      type: req.file.mimetype
+    });
+    const transcription = await provider.transcribe(audioBlob, {
+      language: req.body.language
+    });
+    res.json({ transcription });
+  } catch (error) {
+    res.status(500).json({ error: 'Transcription failed' });
+  }
+});
+```
+### Next.js API Route Example
+```typescript
+// app/api/stt/route.ts
+import { NextRequest, NextResponse } from "next/server";
+import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
+const provider = createOpenAISTTProvider({
+  apiKey: process.env.OPENAI_API_KEY!
+});
+export async function POST(request: NextRequest) {
+  try {
+    const formData = await request.formData();
+    const audioFile = formData.get('audio') as File;
+    const language = formData.get('language') as string | undefined;
+    if (!audioFile) {
+      return NextResponse.json(
+        { error: "No audio file provided" },
+        { status: 400 }
+      );
+    }
+    // Convert File to Blob
+    const audioBlob = new Blob([await audioFile.arrayBuffer()], {
+      type: audioFile.type
+    });
+    const transcription = await provider.transcribe(audioBlob, {
+      language
+    });
+    return NextResponse.json({ transcription });
+  } catch (error) {
+    console.error("STT error:", error);
+    return NextResponse.json(
+      { error: "Failed to transcribe audio" },
+      { status: 500 }
+    );
+  }
+}
+```
+## API Reference
+### Configuration Options
+```typescript
+interface OpenAISTTConfig {
+  /** OpenAI API key (required) */
+  apiKey: string;
+  /** Default OpenAI Whisper model (default: "whisper-1") */
+  defaultModel?: "whisper-1";
+  /** Default language for transcription (e.g., "en", "es", "fr") */
+  defaultLanguage?: string;
+  /** Allow browser usage (dangerous - exposes API key) */
+  dangerouslyAllowBrowser?: boolean;
+}
+```
+### Available Models
+- `whisper-1` - OpenAI's Whisper model for speech recognition
+### Supported Languages
+Whisper supports 99+ languages including:
+- English (`en`)
+- Spanish (`es`)
+- French (`fr`)
+- German (`de`)
+- Chinese (`zh`)
+- Japanese (`ja`)
+- Korean (`ko`)
+- And many more...
+For best results, specify the language if known. If not specified, Whisper will auto-detect.
+### Methods
+#### `transcribe(audio, options?): Promise<string>`
+Transcribe audio data to text.
+```typescript
+// With Blob
+const transcription = await provider.transcribe(audioBlob);
+// With ArrayBuffer
+const transcription = await provider.transcribe(audioBuffer);
+// With language option
+const transcription = await provider.transcribe(audioBlob, {
+  language: "es"
+});
+```
+**Parameters:**
+- `audio: Blob | ArrayBuffer` - Audio data to transcribe
+- `options?: STTOptions` - Optional transcription options
+  - `language?: string` - Language code (e.g., "en", "es")
+**Returns:** `Promise<string>` - Transcribed text
+## Browser Usage (Not Recommended)
+⚠️ **Security Warning**: This provider should NOT be used in browser as it exposes your API key to users.
+**Better alternative**: Use [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) for client-side usage.
+## Environment Variables
+```bash
+OPENAI_API_KEY=your_openai_api_key_here
+```
+## Error Handling
+```typescript
+try {
+  const transcription = await provider.transcribe(audioBlob);
+} catch (error) {
+  console.error("Transcription failed:", error);
+  // Handle OpenAI API errors:
+  // - Invalid audio format
+  // - API key issues
+  // - Rate limiting
+  // - Network errors
+}
+```
+## Use Cases
+- **API Endpoints**: Provide STT service via your server
+- **Secure Transcription**: Keep API keys on server, expose via HTTP endpoint
+- **Language Support**: Leverage Whisper's multilingual capabilities
+- **Rate Limiting**: Control STT usage per user
+- **Cost Monitoring**: Track STT API usage and costs
+## Complete Example
+### Server (Next.js API Route)
+```typescript
+// app/api/stt/route.ts
+import { createOpenAISTTProvider } from "@charivo/stt-provider-openai";
+const provider = createOpenAISTTProvider({
+  apiKey: process.env.OPENAI_API_KEY!,
+  defaultLanguage: "en"
+});
+export async function POST(request: NextRequest) {
+  const formData = await request.formData();
+  const audioFile = formData.get('audio') as File;
+  const language = formData.get('language') as string | undefined;
+  const audioBlob = new Blob([await audioFile.arrayBuffer()]);
+  const transcription = await provider.transcribe(audioBlob, { language });
+  return NextResponse.json({ transcription });
+}
+```
+### Client (uses Remote Transcriber)
+```typescript
+import { createRemoteSTTTranscriber } from "@charivo/stt-transcriber-remote";
+import { createSTTManager } from "@charivo/stt-core";
+const transcriber = createRemoteSTTTranscriber({
+  apiEndpoint: "/api/stt"
+});
+const sttManager = createSTTManager(transcriber);
+// Start recording
+await sttManager.start();
+// Stop and get transcription
+const text = await sttManager.stop();
+console.log("User said:", text);
+```
+## Pricing (OpenAI Whisper)
+- **whisper-1**: $0.006 per minute (rounded to the nearest second)
+Example: 30 seconds of audio = $0.003
+## Audio Format Support
+Whisper supports various audio formats:
+- MP3
+- MP4
+- MPEG
+- MPGA
+- M4A
+- WAV
+- WEBM
+Maximum file size: 25 MB
+## Performance Tips
+1. **Use appropriate audio quality**: Higher quality doesn't always mean better transcription
+2. **Specify language**: Improves accuracy and speed
+3. **Reduce background noise**: Pre-process audio for better results
+4. **Chunk long audio**: Split audio files > 10 minutes for faster processing
+## Related Packages
+- [`@charivo/stt-transcriber-remote`](../stt-transcriber-remote) - Client-side HTTP STT transcriber (recommended)
+- [`@charivo/stt-transcriber-openai`](../stt-transcriber-openai) - Client-side OpenAI transcriber (testing only)
+- [`@charivo/stt-core`](../stt-core) - STT core functionality
+## License
+MIT

package/dist/index.d.mts ADDED Viewed

@@ -0,0 +1,22 @@
+import { STTProvider, STTOptions } from '@charivo/core';
+interface OpenAISTTConfig {
+    apiKey: string;
+    defaultModel?: "whisper-1";
+    defaultLanguage?: string;
+    /**
+     * Allow usage in browser (dangerous - exposes API key)
+     * Only use for testing/development
+     */
+    dangerouslyAllowBrowser?: boolean;
+}
+declare class OpenAISTTProvider implements STTProvider {
+    private openai;
+    private defaultModel;
+    private defaultLanguage?;
+    constructor(config: OpenAISTTConfig);
+    transcribe(audio: Blob | ArrayBuffer, options?: STTOptions): Promise<string>;
+}
+declare function createOpenAISTTProvider(config: OpenAISTTConfig): OpenAISTTProvider;
+export { type OpenAISTTConfig, OpenAISTTProvider, createOpenAISTTProvider };

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+import { STTProvider, STTOptions } from '@charivo/core';
+interface OpenAISTTConfig {
+    apiKey: string;
+    defaultModel?: "whisper-1";
+    defaultLanguage?: string;
+    /**
+     * Allow usage in browser (dangerous - exposes API key)
+     * Only use for testing/development
+     */
+    dangerouslyAllowBrowser?: boolean;
+}
+declare class OpenAISTTProvider implements STTProvider {
+    private openai;
+    private defaultModel;
+    private defaultLanguage?;
+    constructor(config: OpenAISTTConfig);
+    transcribe(audio: Blob | ArrayBuffer, options?: STTOptions): Promise<string>;
+}
+declare function createOpenAISTTProvider(config: OpenAISTTConfig): OpenAISTTProvider;
+export { type OpenAISTTConfig, OpenAISTTProvider, createOpenAISTTProvider };

package/dist/index.js ADDED Viewed

@@ -0,0 +1,75 @@
+"use strict";
+var __create = Object.create;
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/index.ts
+var index_exports = {};
+__export(index_exports, {
+  OpenAISTTProvider: () => OpenAISTTProvider,
+  createOpenAISTTProvider: () => createOpenAISTTProvider
+});
+module.exports = __toCommonJS(index_exports);
+var import_openai = __toESM(require("openai"));
+var OpenAISTTProvider = class {
+  openai;
+  defaultModel;
+  defaultLanguage;
+  constructor(config) {
+    if (typeof window !== "undefined" && !config.dangerouslyAllowBrowser) {
+      throw new Error(
+        "OpenAI provider is for server-side use only. Set dangerouslyAllowBrowser: true for testing"
+      );
+    }
+    this.openai = new import_openai.default({
+      apiKey: config.apiKey,
+      dangerouslyAllowBrowser: config.dangerouslyAllowBrowser
+    });
+    this.defaultModel = config.defaultModel || "whisper-1";
+    this.defaultLanguage = config.defaultLanguage;
+  }
+  async transcribe(audio, options) {
+    const audioBlob = audio instanceof Blob ? audio : new Blob([audio], { type: "audio/wav" });
+    const audioFile = new File([audioBlob], "audio.wav", {
+      type: "audio/wav"
+    });
+    const response = await this.openai.audio.transcriptions.create({
+      file: audioFile,
+      model: this.defaultModel,
+      language: options?.language || this.defaultLanguage
+    });
+    return response.text;
+  }
+};
+function createOpenAISTTProvider(config) {
+  return new OpenAISTTProvider(config);
+}
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  OpenAISTTProvider,
+  createOpenAISTTProvider
+});

package/dist/index.mjs ADDED Viewed

@@ -0,0 +1,39 @@
+// src/index.ts
+import OpenAI from "openai";
+var OpenAISTTProvider = class {
+  openai;
+  defaultModel;
+  defaultLanguage;
+  constructor(config) {
+    if (typeof window !== "undefined" && !config.dangerouslyAllowBrowser) {
+      throw new Error(
+        "OpenAI provider is for server-side use only. Set dangerouslyAllowBrowser: true for testing"
+      );
+    }
+    this.openai = new OpenAI({
+      apiKey: config.apiKey,
+      dangerouslyAllowBrowser: config.dangerouslyAllowBrowser
+    });
+    this.defaultModel = config.defaultModel || "whisper-1";
+    this.defaultLanguage = config.defaultLanguage;
+  }
+  async transcribe(audio, options) {
+    const audioBlob = audio instanceof Blob ? audio : new Blob([audio], { type: "audio/wav" });
+    const audioFile = new File([audioBlob], "audio.wav", {
+      type: "audio/wav"
+    });
+    const response = await this.openai.audio.transcriptions.create({
+      file: audioFile,
+      model: this.defaultModel,
+      language: options?.language || this.defaultLanguage
+    });
+    return response.text;
+  }
+};
+function createOpenAISTTProvider(config) {
+  return new OpenAISTTProvider(config);
+}
+export {
+  OpenAISTTProvider,
+  createOpenAISTTProvider
+};

package/package.json ADDED Viewed

@@ -0,0 +1,40 @@
+{
+  "name": "@charivo/stt-provider-openai",
+  "version": "0.0.1",
+  "description": "OpenAI STT provider for Charivo (server-side)",
+  "main": "dist/index.js",
+  "types": "dist/index.d.ts",
+  "dependencies": {
+    "openai": "^4.47.1",
+    "@charivo/core": "0.0.1"
+  },
+  "devDependencies": {
+    "tsup": "^8.0.0",
+    "typescript": "^5.0.0"
+  },
+  "files": [
+    "dist"
+  ],
+  "publishConfig": {
+    "access": "public"
+  },
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/zeikar/charivo.git",
+    "directory": "packages/stt-provider-openai"
+  },
+  "author": {
+    "name": "Zeikar",
+    "url": "https://github.com/zeikar"
+  },
+  "homepage": "https://github.com/zeikar/charivo#readme",
+  "bugs": {
+    "url": "https://github.com/zeikar/charivo/issues"
+  },
+  "scripts": {
+    "build": "tsup",
+    "dev": "tsup --watch",
+    "clean": "rm -rf dist"
+  }
+}