npm - geminisst - Versions diffs - 1.0.1 → 1.0.2 - Mend

geminisst 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of geminisst might be problematic. Click here for more details.

Files changed (3) hide show

package/dist/core.d.ts CHANGED Viewed

@@ -1,10 +1,5 @@
 import { SSTOptions, TranscriptionResult } from './types.js';
 /**
  * Processes audio using the Gemini API.
- * @param audioData - Base64 encoded audio string
- * @param mimeType - MIME type of the audio
- * @param apiKey - Google Gemini API Key
- * @param options - Configuration options
- * @returns Promise resolving to the transcription result
  */
 export declare function processAudioWithGemini(audioData: string, mimeType: string, apiKey: string, options: SSTOptions): Promise<TranscriptionResult>;

package/dist/core.js CHANGED Viewed

@@ -5,40 +5,30 @@ import { GoogleGenAI } from '@google/genai';
 import { DEFAULT_SYSTEM_INSTRUCTION } from './constants.js';
 /**
  * Processes audio using the Gemini API.
- * @param audioData - Base64 encoded audio string
- * @param mimeType - MIME type of the audio
- * @param apiKey - Google Gemini API Key
- * @param options - Configuration options
- * @returns Promise resolving to the transcription result
  */
 export async function processAudioWithGemini(audioData, mimeType, apiKey, options) {
     if (!apiKey) {
-        throw new Error("API Key is required");
+        throw new Error("[geminisst] API Key is required");
     }
-    // Initialize the AI client according to documentation: new GoogleGenAI({ apiKey })
+    // Initialize the AI client
     const ai = new GoogleGenAI({ apiKey: apiKey });
     const modelName = options.model || "gemini-2.5-flash-lite";
-    // Configure thinking mode as per Gemini 2.5 specifications in documentation
+    const startTime = Date.now();
+    // Configure as per latest Gemini 2.5 specifications
     const config = {
         thinkingConfig: {
-            includeThoughts: true, // Enabled to allow monitoring thoughts if needed
-            thinkingBudget: -1 // Dynamic thinking enabled (-1)
+            includeThoughts: true,
+            thinkingBudget: -1
         },
-        // Fixed System Instruction: Users cannot override this as it is the core STT logic.
         systemInstruction: DEFAULT_SYSTEM_INSTRUCTION
     };
     if (options.verbose) {
-        console.log(`[SSTLibrary] Model: ${modelName}`);
-        console.log(`[SSTLibrary] Thinking: Dynamic (-1)`);
-        console.log(`[SSTLibrary] System Instruction: Locked (Core)`);
+        console.log(`[geminisst] Model: ${modelName}`);
+        console.log(`[geminisst] Thinking: Enabled (Dynamic)`);
     }
     const promptText = options.prompt || "Transcribe this audio.";
-    const startTime = Date.now();
     try {
-        /**
-         * Using the syntax from the provided documentation:
-         * ai.models.generateContent({ model, contents, config })
-         */
+        // Standard call using the models.generateContent API
         const response = await ai.models.generateContent({
             model: modelName,
             contents: [
@@ -59,10 +49,9 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
         });
         const endTime = Date.now();
         const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
-        // Handle the response according to the documentation structure
         const candidate = response.candidates?.[0];
         const textParts = candidate?.content?.parts || [];
-        // Combine text parts and thought parts separately
+        // Separate transcript and thoughts
         const transcriptText = textParts
             .filter((p) => !p.thought)
             .map((p) => p.text)
@@ -71,7 +60,6 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
             .filter((p) => p.thought)
             .map((p) => p.text)
             .join('') || "";
-        // Extract usage details
         const usage = response.usageMetadata ? {
             inputTokens: response.usageMetadata.promptTokenCount || 0,
             outputTokens: response.usageMetadata.candidatesTokenCount || 0,
@@ -86,41 +74,7 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
         };
     }
     catch (error) {
-        // If the newer ai.models.generateContent syntax is not available in the installed SDK version,
-        // fallback to the widely supported getGenerativeModel method while keeping logic consistent.
-        if (options.verbose)
-            console.warn("[SSTLibrary] Newer syntax failed, trying fallback...");
-        try {
-            const model = ai.getGenerativeModel({ model: modelName }, config);
-            const result = await model.generateContent({
-                contents: [{
-                        role: 'user',
-                        parts: [
-                            { text: promptText },
-                            { inlineData: { mimeType, data: audioData } }
-                        ]
-                    }]
-            });
-            const endTime = Date.now();
-            const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
-            const resp = result.response;
-            const candidate = resp.candidates?.[0];
-            const parts = candidate?.content?.parts || [];
-            return {
-                text: parts.filter((p) => !p.thought).map((p) => p.text).join(''),
-                thoughts: parts.filter((p) => p.thought).map((p) => p.text).join(''),
-                model: modelName,
-                usage: resp.usageMetadata ? {
-                    inputTokens: resp.usageMetadata.promptTokenCount,
-                    outputTokens: resp.usageMetadata.candidatesTokenCount,
-                    totalTokens: resp.usageMetadata.totalTokenCount,
-                    processingTimeSec: processingTimeSec
-                } : undefined
-            };
-        }
-        catch (fallbackError) {
-            console.error("[SSTLibrary] Transcription failed:", fallbackError);
-            throw fallbackError;
-        }
+        console.error("[geminisst] Error calling Gemini API:", error.message);
+        throw error;
     }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "geminisst",
-  "version": "1.0.1",
+  "version": "1.0.2",
   "description": "Revolutionary high-accuracy Audio-to-Text library powered by Gemini 2.5 Flash Lite with 1M+ context window.",
   "keywords": [
     "sst",