npm - geminisst - Versions diffs - 1.0.0 → 1.0.2 - Mend

geminisst 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/core.d.ts CHANGED Viewed

@@ -1,10 +1,5 @@
 import { SSTOptions, TranscriptionResult } from './types.js';
 /**
  * Processes audio using the Gemini API.
- * @param audioData - Base64 encoded audio string
- * @param mimeType - MIME type of the audio
- * @param apiKey - Google Gemini API Key
- * @param options - Configuration options
- * @returns Promise resolving to the transcription result
  */
 export declare function processAudioWithGemini(audioData: string, mimeType: string, apiKey: string, options: SSTOptions): Promise<TranscriptionResult>;

package/dist/core.js CHANGED Viewed

@@ -5,40 +5,30 @@ import { GoogleGenAI } from '@google/genai';
 import { DEFAULT_SYSTEM_INSTRUCTION } from './constants.js';
 /**
  * Processes audio using the Gemini API.
- * @param audioData - Base64 encoded audio string
- * @param mimeType - MIME type of the audio
- * @param apiKey - Google Gemini API Key
- * @param options - Configuration options
- * @returns Promise resolving to the transcription result
  */
 export async function processAudioWithGemini(audioData, mimeType, apiKey, options) {
     if (!apiKey) {
-        throw new Error("API Key is required");
+        throw new Error("[geminisst] API Key is required");
     }
-    // Initialize the AI client according to documentation: new GoogleGenAI({ apiKey })
+    // Initialize the AI client
     const ai = new GoogleGenAI({ apiKey: apiKey });
     const modelName = options.model || "gemini-2.5-flash-lite";
-    // Configure thinking mode as per Gemini 2.5 specifications in documentation
+    const startTime = Date.now();
+    // Configure as per latest Gemini 2.5 specifications
     const config = {
         thinkingConfig: {
-            includeThoughts: true, // Enabled to allow monitoring thoughts if needed
-            thinkingBudget: -1 // Dynamic thinking enabled (-1)
+            includeThoughts: true,
+            thinkingBudget: -1
         },
-        // Fixed System Instruction: Users cannot override this as it is the core STT logic.
         systemInstruction: DEFAULT_SYSTEM_INSTRUCTION
     };
     if (options.verbose) {
-        console.log(`[SSTLibrary] Model: ${modelName}`);
-        console.log(`[SSTLibrary] Thinking: Dynamic (-1)`);
-        console.log(`[SSTLibrary] System Instruction: Locked (Core)`);
+        console.log(`[geminisst] Model: ${modelName}`);
+        console.log(`[geminisst] Thinking: Enabled (Dynamic)`);
     }
     const promptText = options.prompt || "Transcribe this audio.";
-    const startTime = Date.now();
     try {
-        /**
-         * Using the syntax from the provided documentation:
-         * ai.models.generateContent({ model, contents, config })
-         */
+        // Standard call using the models.generateContent API
         const response = await ai.models.generateContent({
             model: modelName,
             contents: [
@@ -59,10 +49,9 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
         });
         const endTime = Date.now();
         const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
-        // Handle the response according to the documentation structure
         const candidate = response.candidates?.[0];
         const textParts = candidate?.content?.parts || [];
-        // Combine text parts and thought parts separately
+        // Separate transcript and thoughts
         const transcriptText = textParts
             .filter((p) => !p.thought)
             .map((p) => p.text)
@@ -71,7 +60,6 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
             .filter((p) => p.thought)
             .map((p) => p.text)
             .join('') || "";
-        // Extract usage details
         const usage = response.usageMetadata ? {
             inputTokens: response.usageMetadata.promptTokenCount || 0,
             outputTokens: response.usageMetadata.candidatesTokenCount || 0,
@@ -86,41 +74,7 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
         };
     }
     catch (error) {
-        // If the newer ai.models.generateContent syntax is not available in the installed SDK version,
-        // fallback to the widely supported getGenerativeModel method while keeping logic consistent.
-        if (options.verbose)
-            console.warn("[SSTLibrary] Newer syntax failed, trying fallback...");
-        try {
-            const model = ai.getGenerativeModel({ model: modelName }, config);
-            const result = await model.generateContent({
-                contents: [{
-                        role: 'user',
-                        parts: [
-                            { text: promptText },
-                            { inlineData: { mimeType, data: audioData } }
-                        ]
-                    }]
-            });
-            const endTime = Date.now();
-            const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
-            const resp = result.response;
-            const candidate = resp.candidates?.[0];
-            const parts = candidate?.content?.parts || [];
-            return {
-                text: parts.filter((p) => !p.thought).map((p) => p.text).join(''),
-                thoughts: parts.filter((p) => p.thought).map((p) => p.text).join(''),
-                model: modelName,
-                usage: resp.usageMetadata ? {
-                    inputTokens: resp.usageMetadata.promptTokenCount,
-                    outputTokens: resp.usageMetadata.candidatesTokenCount,
-                    totalTokens: resp.usageMetadata.totalTokenCount,
-                    processingTimeSec: processingTimeSec
-                } : undefined
-            };
-        }
-        catch (fallbackError) {
-            console.error("[SSTLibrary] Transcription failed:", fallbackError);
-            throw fallbackError;
-        }
+        console.error("[geminisst] Error calling Gemini API:", error.message);
+        throw error;
     }
 }

package/dist/index.js CHANGED Viewed

@@ -10,9 +10,13 @@ import { bufferToBase64 } from './utils.js';
  * @returns The transcription result object containing text and thoughts
  */
 export async function audioToText(audioFile, apiKey, options = {}) {
-    // 1. Read Audio File
+    // 1. Validate Audio File Path
     if (!fs.existsSync(audioFile)) {
-        throw new Error(`Audio file not found: ${audioFile}`);
+        throw new Error(`[geminisst] Audio file not found at path: ${audioFile}`);
+    }
+    const stats = fs.statSync(audioFile);
+    if (stats.isDirectory()) {
+        throw new Error(`[geminisst] Expected a file path but found a directory: ${audioFile}`);
     }
     // Simple mime type detection based on extension
     const ext = path.extname(audioFile).toLowerCase().replace('.', '');

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "geminisst",
-  "version": "1.0.0",
+  "version": "1.0.2",
   "description": "Revolutionary high-accuracy Audio-to-Text library powered by Gemini 2.5 Flash Lite with 1M+ context window.",
   "keywords": [
     "sst",