geminisst 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of geminisst might be problematic. Click here for more details.
- package/dist/core.d.ts +0 -5
- package/dist/core.js +12 -58
- package/package.json +1 -1
package/dist/core.d.ts
CHANGED
|
@@ -1,10 +1,5 @@
|
|
|
1
1
|
import { SSTOptions, TranscriptionResult } from './types.js';
|
|
2
2
|
/**
|
|
3
3
|
* Processes audio using the Gemini API.
|
|
4
|
-
* @param audioData - Base64 encoded audio string
|
|
5
|
-
* @param mimeType - MIME type of the audio
|
|
6
|
-
* @param apiKey - Google Gemini API Key
|
|
7
|
-
* @param options - Configuration options
|
|
8
|
-
* @returns Promise resolving to the transcription result
|
|
9
4
|
*/
|
|
10
5
|
export declare function processAudioWithGemini(audioData: string, mimeType: string, apiKey: string, options: SSTOptions): Promise<TranscriptionResult>;
|
package/dist/core.js
CHANGED
|
@@ -5,40 +5,30 @@ import { GoogleGenAI } from '@google/genai';
|
|
|
5
5
|
import { DEFAULT_SYSTEM_INSTRUCTION } from './constants.js';
|
|
6
6
|
/**
|
|
7
7
|
* Processes audio using the Gemini API.
|
|
8
|
-
* @param audioData - Base64 encoded audio string
|
|
9
|
-
* @param mimeType - MIME type of the audio
|
|
10
|
-
* @param apiKey - Google Gemini API Key
|
|
11
|
-
* @param options - Configuration options
|
|
12
|
-
* @returns Promise resolving to the transcription result
|
|
13
8
|
*/
|
|
14
9
|
export async function processAudioWithGemini(audioData, mimeType, apiKey, options) {
|
|
15
10
|
if (!apiKey) {
|
|
16
|
-
throw new Error("API Key is required");
|
|
11
|
+
throw new Error("[geminisst] API Key is required");
|
|
17
12
|
}
|
|
18
|
-
// Initialize the AI client
|
|
13
|
+
// Initialize the AI client
|
|
19
14
|
const ai = new GoogleGenAI({ apiKey: apiKey });
|
|
20
15
|
const modelName = options.model || "gemini-2.5-flash-lite";
|
|
21
|
-
|
|
16
|
+
const startTime = Date.now();
|
|
17
|
+
// Configure as per latest Gemini 2.5 specifications
|
|
22
18
|
const config = {
|
|
23
19
|
thinkingConfig: {
|
|
24
|
-
includeThoughts: true,
|
|
25
|
-
thinkingBudget: -1
|
|
20
|
+
includeThoughts: true,
|
|
21
|
+
thinkingBudget: -1
|
|
26
22
|
},
|
|
27
|
-
// Fixed System Instruction: Users cannot override this as it is the core STT logic.
|
|
28
23
|
systemInstruction: DEFAULT_SYSTEM_INSTRUCTION
|
|
29
24
|
};
|
|
30
25
|
if (options.verbose) {
|
|
31
|
-
console.log(`[
|
|
32
|
-
console.log(`[
|
|
33
|
-
console.log(`[SSTLibrary] System Instruction: Locked (Core)`);
|
|
26
|
+
console.log(`[geminisst] Model: ${modelName}`);
|
|
27
|
+
console.log(`[geminisst] Thinking: Enabled (Dynamic)`);
|
|
34
28
|
}
|
|
35
29
|
const promptText = options.prompt || "Transcribe this audio.";
|
|
36
|
-
const startTime = Date.now();
|
|
37
30
|
try {
|
|
38
|
-
|
|
39
|
-
* Using the syntax from the provided documentation:
|
|
40
|
-
* ai.models.generateContent({ model, contents, config })
|
|
41
|
-
*/
|
|
31
|
+
// Standard call using the models.generateContent API
|
|
42
32
|
const response = await ai.models.generateContent({
|
|
43
33
|
model: modelName,
|
|
44
34
|
contents: [
|
|
@@ -59,10 +49,9 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
|
|
|
59
49
|
});
|
|
60
50
|
const endTime = Date.now();
|
|
61
51
|
const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
|
|
62
|
-
// Handle the response according to the documentation structure
|
|
63
52
|
const candidate = response.candidates?.[0];
|
|
64
53
|
const textParts = candidate?.content?.parts || [];
|
|
65
|
-
//
|
|
54
|
+
// Separate transcript and thoughts
|
|
66
55
|
const transcriptText = textParts
|
|
67
56
|
.filter((p) => !p.thought)
|
|
68
57
|
.map((p) => p.text)
|
|
@@ -71,7 +60,6 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
|
|
|
71
60
|
.filter((p) => p.thought)
|
|
72
61
|
.map((p) => p.text)
|
|
73
62
|
.join('') || "";
|
|
74
|
-
// Extract usage details
|
|
75
63
|
const usage = response.usageMetadata ? {
|
|
76
64
|
inputTokens: response.usageMetadata.promptTokenCount || 0,
|
|
77
65
|
outputTokens: response.usageMetadata.candidatesTokenCount || 0,
|
|
@@ -86,41 +74,7 @@ export async function processAudioWithGemini(audioData, mimeType, apiKey, option
|
|
|
86
74
|
};
|
|
87
75
|
}
|
|
88
76
|
catch (error) {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
if (options.verbose)
|
|
92
|
-
console.warn("[SSTLibrary] Newer syntax failed, trying fallback...");
|
|
93
|
-
try {
|
|
94
|
-
const model = ai.getGenerativeModel({ model: modelName }, config);
|
|
95
|
-
const result = await model.generateContent({
|
|
96
|
-
contents: [{
|
|
97
|
-
role: 'user',
|
|
98
|
-
parts: [
|
|
99
|
-
{ text: promptText },
|
|
100
|
-
{ inlineData: { mimeType, data: audioData } }
|
|
101
|
-
]
|
|
102
|
-
}]
|
|
103
|
-
});
|
|
104
|
-
const endTime = Date.now();
|
|
105
|
-
const processingTimeSec = parseFloat(((endTime - startTime) / 1000).toFixed(2));
|
|
106
|
-
const resp = result.response;
|
|
107
|
-
const candidate = resp.candidates?.[0];
|
|
108
|
-
const parts = candidate?.content?.parts || [];
|
|
109
|
-
return {
|
|
110
|
-
text: parts.filter((p) => !p.thought).map((p) => p.text).join(''),
|
|
111
|
-
thoughts: parts.filter((p) => p.thought).map((p) => p.text).join(''),
|
|
112
|
-
model: modelName,
|
|
113
|
-
usage: resp.usageMetadata ? {
|
|
114
|
-
inputTokens: resp.usageMetadata.promptTokenCount,
|
|
115
|
-
outputTokens: resp.usageMetadata.candidatesTokenCount,
|
|
116
|
-
totalTokens: resp.usageMetadata.totalTokenCount,
|
|
117
|
-
processingTimeSec: processingTimeSec
|
|
118
|
-
} : undefined
|
|
119
|
-
};
|
|
120
|
-
}
|
|
121
|
-
catch (fallbackError) {
|
|
122
|
-
console.error("[SSTLibrary] Transcription failed:", fallbackError);
|
|
123
|
-
throw fallbackError;
|
|
124
|
-
}
|
|
77
|
+
console.error("[geminisst] Error calling Gemini API:", error.message);
|
|
78
|
+
throw error;
|
|
125
79
|
}
|
|
126
80
|
}
|