geminisst 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/core.d.ts CHANGED
@@ -1,10 +1,5 @@
1
1
  import { SSTOptions, TranscriptionResult } from './types.js';
2
2
  /**
3
3
  * Processes audio using the Gemini API.
4
- * @param audioData - Base64 encoded audio string
5
- * @param mimeType - MIME type of the audio
6
- * @param apiKey - Google Gemini API Key
7
- * @param options - Configuration options
8
- * @returns Promise resolving to the transcription result
9
4
  */
10
5
  export declare function processAudioWithGemini(audioData: string, mimeType: string, apiKey: string, options: SSTOptions): Promise<TranscriptionResult>;
package/dist/core.js CHANGED
@@ -5,40 +5,30 @@ import { GoogleGenAI } from '@google/genai';
5
5
  import { DEFAULT_SYSTEM_INSTRUCTION } from './constants.js';
6
6
  /**
7
7
  * Processes audio using the Gemini API.
8
- * @param audioData - Base64 encoded audio string
9
- * @param mimeType - MIME type of the audio
10
- * @param apiKey - Google Gemini API Key
11
- * @param options - Configuration options
12
- * @returns Promise resolving to the transcription result
13
8
  */
14
9
  export async function processAudioWithGemini(audioData, mimeType, apiKey, options) {
15
10
  if (!apiKey) {
16
- throw new Error("API Key is required");
11
+ throw new Error("[geminisst] API Key is required");
17
12
  }
18
- // Initialize the AI client according to documentation: new GoogleGenAI({ apiKey })
13
+ // Initialize the AI client
19
14
  const ai = new GoogleGenAI({ apiKey: apiKey });
20
15
  const modelName = options.model || "gemini-2.5-flash-lite";
21
- // Configure thinking mode as per Gemini 2.5 specifications in documentation
16
+ const startTime = Date.now();
17
+ // Configure as per latest Gemini 2.5 specifications
22
18
  const config = {
23
19
  thinkingConfig: {
24
- includeThoughts: true, // Enabled to allow monitoring thoughts if needed
25
- thinkingBudget: -1 // Dynamic thinking enabled (-1)
20
+ includeThoughts: true,
21
+ thinkingBudget: -1
26
22
  },
27
- // Fixed System Instruction: Users cannot override this as it is the core STT logic.
28
23
  systemInstruction: DEFAULT_SYSTEM_INSTRUCTION
29
24
  };
30
25
  if (options.verbose) {
31
- console.log(`[SSTLibrary] Model: ${modelName}`);
32
- console.log(`[SSTLibrary] Thinking: Dynamic (-1)`);
33
- console.log(`[SSTLibrary] System Instruction: Locked (Core)`);
26
+ console.log(`[geminisst] Model: ${modelName}`);
27
+ console.log(`[geminisst] Thinking: Enabled (Dynamic)`);
34
28
  }
35
29
  const promptText = options.prompt || "Transcribe this audio.";
36
- const startTime = Date.now();
37
30
  try {
38
- /**
39
- * Using the syntax from the provided documentation:
40
- * ai.models.generateContent({ model, contents, config })
41
- */
31
+ // Standard call using the models.generateContent API
42
32
  const response = await ai.models.generateContent({
43
33
  model: modelName,
44
34
  contents: [
@@ -59,10 +49,9 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
59
49
  });
60
50
  const endTime = Date.now();
61
51
  const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
62
- // Handle the response according to the documentation structure
63
52
  const candidate = response.candidates?.[0];
64
53
  const textParts = candidate?.content?.parts || [];
65
- // Combine text parts and thought parts separately
54
+ // Separate transcript and thoughts
66
55
  const transcriptText = textParts
67
56
  .filter((p) => !p.thought)
68
57
  .map((p) => p.text)
@@ -71,7 +60,6 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
71
60
  .filter((p) => p.thought)
72
61
  .map((p) => p.text)
73
62
  .join('') || "";
74
- // Extract usage details
75
63
  const usage = response.usageMetadata ? {
76
64
  inputTokens: response.usageMetadata.promptTokenCount || 0,
77
65
  outputTokens: response.usageMetadata.candidatesTokenCount || 0,
@@ -86,41 +74,7 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
86
74
  };
87
75
  }
88
76
  catch (error) {
89
- // If the newer ai.models.generateContent syntax is not available in the installed SDK version,
90
- // fallback to the widely supported getGenerativeModel method while keeping logic consistent.
91
- if (options.verbose)
92
- console.warn("[SSTLibrary] Newer syntax failed, trying fallback...");
93
- try {
94
- const model = ai.getGenerativeModel({ model: modelName }, config);
95
- const result = await model.generateContent({
96
- contents: [{
97
- role: 'user',
98
- parts: [
99
- { text: promptText },
100
- { inlineData: { mimeType, data: audioData } }
101
- ]
102
- }]
103
- });
104
- const endTime = Date.now();
105
- const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
106
- const resp = result.response;
107
- const candidate = resp.candidates?.[0];
108
- const parts = candidate?.content?.parts || [];
109
- return {
110
- text: parts.filter((p) => !p.thought).map((p) => p.text).join(''),
111
- thoughts: parts.filter((p) => p.thought).map((p) => p.text).join(''),
112
- model: modelName,
113
- usage: resp.usageMetadata ? {
114
- inputTokens: resp.usageMetadata.promptTokenCount,
115
- outputTokens: resp.usageMetadata.candidatesTokenCount,
116
- totalTokens: resp.usageMetadata.totalTokenCount,
117
- processingTimeSec: processingTimeSec
118
- } : undefined
119
- };
120
- }
121
- catch (fallbackError) {
122
- console.error("[SSTLibrary] Transcription failed:", fallbackError);
123
- throw fallbackError;
124
- }
77
+ console.error("[geminisst] Error calling Gemini API:", error.message);
78
+ throw error;
125
79
  }
126
80
  }
package/dist/index.js CHANGED
@@ -10,9 +10,13 @@ import { bufferToBase64 } from './utils.js';
10
10
  * @returns The transcription result object containing text and thoughts
11
11
  */
12
12
  export async function audioToText(audioFile, apiKey, options = {}) {
13
- // 1. Read Audio File
13
+ // 1. Validate Audio File Path
14
14
  if (!fs.existsSync(audioFile)) {
15
- throw new Error(`Audio file not found: ${audioFile}`);
15
+ throw new Error(`[geminisst] Audio file not found at path: ${audioFile}`);
16
+ }
17
+ const stats = fs.statSync(audioFile);
18
+ if (stats.isDirectory()) {
19
+ throw new Error(`[geminisst] Expected a file path but found a directory: ${audioFile}`);
16
20
  }
17
21
  // Simple mime type detection based on extension
18
22
  const ext = path.extname(audioFile).toLowerCase().replace('.', '');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "geminisst",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "Revolutionary high-accuracy Audio-to-Text library powered by Gemini 2.5 Flash Lite with 1M+ context window.",
5
5
  "keywords": [
6
6
  "sst",