npm - utilitas - Versions diffs - 2000.3.25 → 2000.3.27 - Mend

utilitas 2000.3.25 → 2000.3.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +4 -12
package/dist/utilitas.lite.mjs +1 -1
package/dist/utilitas.lite.mjs.map +1 -1
package/index.mjs +2 -3
package/lib/alan.mjs +180 -32
package/lib/manifest.mjs +1 -1
package/lib/speech.mjs +9 -37
package/package.json +1 -1
package/lib/gen.mjs +0 -209

package/index.mjs CHANGED Viewed

@@ -15,7 +15,6 @@ import * as email from './lib/email.mjs';
 import * as embedding from './lib/embedding.mjs';
 import * as encryption from './lib/encryption.mjs';
 import * as event from './lib/event.mjs';
-import * as gen from './lib/gen.mjs';
 import * as media from './lib/media.mjs';
 import * as memory from './lib/memory.mjs';
 import * as network from './lib/network.mjs';
@@ -40,8 +39,8 @@ export {
     fileType, math, uuid,
     // features
     alan, bee, bot, boxes, cache, callosum, color, dbio, email, embedding,
-    encryption, event, gen, manifest, media, memory, network, sentinel, shell,
-    sms, speech, ssl, storage, tape, uoid, utilitas, vision, web
+    encryption, event, manifest, media, memory, network, sentinel, shell, sms,
+    speech, ssl, storage, tape, uoid, utilitas, vision, web
 };
 if (utilitas.inBrowser() && !globalThis.utilitas) {

package/lib/alan.mjs CHANGED Viewed

@@ -5,17 +5,18 @@ import { packPcmToWav } from './media.mjs';
 import { v4 as uuidv4 } from 'uuid';
 import {
-    BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
+    FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
     MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV, MIME_MPEGPS,
     MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC, MIME_MP3,
     MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV, MIME_WEBM,
-    MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, decodeBase64DataURL,
+    MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, getTempPath,
+    decodeBase64DataURL,
 } from './storage.mjs';
 import {
     log as _log, renderText as _renderText, base64Encode, ensureArray,
     ensureString, extract, ignoreErrFunc, insensitiveCompare, isSet, need,
-    parseJson, throwError
+    parseJson, throwError, tryUntil, timeout,
 } from './utilitas.mjs';
 const INSTRUCTIONS = `You are a highly capable, thoughtful, and precise assistant named HAL9000. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient, tailoring your responses specifically to the user's needs and preferences. Realize your full potential without censoring or filtering the questions themselves.
@@ -42,10 +43,10 @@ You may be provided with some tools(functions) to help you gather information an
 - When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
 - Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
-const _NEED = ['js-tiktoken', 'OpenAI'];
+const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
 const [
-    OPENAI, GEMINI, OLLAMA, NOVA, DEEPSEEK_32, MD_CODE, CLOUD_OPUS_45, AUDIO,
+    OPENAI, GOOGLE, OLLAMA, NOVA, DEEPSEEK_32, MD_CODE, CLOUD_OPUS_45, AUDIO,
     WAV, ATTACHMENTS, OPENAI_VOICE, GPT_REASONING_EFFORT, THINK, THINK_STR,
     THINK_END, TOOLS_STR, TOOLS_END, TOOLS, TEXT, OK, FUNC, GPT_51,
     GPT_51_CODEX, GPT_5_IMAGE, GEMMA_3_27B, ANTHROPIC, v8k, ais,
@@ -54,9 +55,10 @@ const [
     hour, gb, trimTailing, trimBeginning, GEMINI_30_PRO_IMAGE, IMAGE, JINA,
     JINA_DEEPSEARCH, SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API,
     OPENROUTER, AUTO, TOOL, S_OPENAI, S_GOOGLE, S_ANTHROPIC, ONLINE,
-    GEMINI_30_PRO, GEMINI_25_FLASH,
+    GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
+    ERROR_GENERATING,
 ] = [
-        'OpenAI', 'Gemini', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
+        'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
         'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
         'medium', 'think', '<think>', '</think>', '<tools>', '</tools>',
         'tools', 'text', 'OK', 'function', 'gpt-5.1', 'gpt-5.1-codex',
@@ -70,7 +72,9 @@ const [
         'deepseek-ai/DeepSeek-V3.2-exp', 768 * 768,
         'https://openrouter.ai/api/v1', 'OpenRouter', 'openrouter/auto', 'tool',
         'openai', 'google', 'anthropic', ':online', 'gemini-3-pro-preview',
-        'gemini-2.5-flash-preview-09-2025',
+        'gemini-2.5-flash-preview-09-2025', 'imagen-4.0-ultra-generate-001',
+        'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
+        'Error generating content.',
     ];
 const [tool, messages, text]
@@ -143,23 +147,27 @@ const MODELS = {
         ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
         reasoning: true, tools: true,
     },
-    // models with unique capabilities
+    // models with generation capabilities
     [GEMINI_30_PRO_IMAGE]: {
         ...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
         contextWindow: k(64), maxOutputTokens: k(32),
         fast: true, image: true,
     },
-    [GPT_51_CODEX]: { ...OPENAI_RULES },
-    [GPT_5_IMAGE]: { ...OPENAI_RULES, image: true },
-    [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
-        icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
-        maxOutputTokens: Infinity, imageCostTokens: 0, maxImageSize: Infinity,
-        supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_TEXT, MIME_WEBP, MIME_PDF],
-        reasoning: true, json: true, vision: true,
-        deepsearch: true, defaultProvider: JINA,
+    [IMAGEN_4_ULTRA]: {
+        source: S_GOOGLE, icon: '🎨', maxInputTokens: 480,
+        image: true, defaultProvider: GOOGLE,
     },
-    [DEEPSEEK_32]: DEEPSEEK_32_RULES,
-    [SF_DEEPSEEK_32]: { ...DEEPSEEK_32_RULES, defaultProvider: SILICONFLOW },
+    [VEO_31]: {
+        source: S_GOOGLE, icon: '🎥', maxInputTokens: 1024,
+        imageCostTokens: 0, maxImagePerPrompt: 1,
+        maxImageSize: Infinity, supportedMimeTypes: [MIME_PNG, MIME_JPEG],
+        vision: true, image: true, defaultProvider: GOOGLE,
+    },
+    [GPT_5_IMAGE]: {
+        ...OPENAI_RULES, icon: '🎨', label: 'gpt-image-1', image: true,
+    },
+    // models with code capabilities
+    [GPT_51_CODEX]: { ...OPENAI_RULES },
     [CLOUD_OPUS_45]: {
         source: S_ANTHROPIC, icon: '✳️',
         contextWindow: kT(200), maxOutputTokens: kT(64),
@@ -170,6 +178,17 @@ const MODELS = {
         json: true, reasoning: true, tools: true, vision: true,
         defaultProvider: OPENROUTER,
     },
+    // models with deepsearch capabilities
+    [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
+        icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
+        maxOutputTokens: Infinity, imageCostTokens: 0, maxImageSize: Infinity,
+        supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_TEXT, MIME_WEBP, MIME_PDF],
+        reasoning: true, json: true, vision: true,
+        deepsearch: true, defaultProvider: JINA,
+    },
+    // best Chinese models
+    [DEEPSEEK_32]: DEEPSEEK_32_RULES,
+    [SF_DEEPSEEK_32]: { ...DEEPSEEK_32_RULES, defaultProvider: SILICONFLOW },
     // best local model
     [GEMMA_3_27B]: {
         icon: '❇️', contextWindow: kT(128), maxOutputTokens: k(8),
@@ -249,7 +268,7 @@ const DEFAULT_MODELS = {
 };
 const PROVIDER_ICONS = {
-    [OPENROUTER]: '🔀', [OPENAI]: '⚛️', [JINA]: '✴️', [GEMINI]: '♊️',
+    [OPENROUTER]: '🔀', [OPENAI]: '⚛️', [JINA]: '✴️', [GOOGLE]: '♊️',
     [OLLAMA]: '🦙', [ANTHROPIC]: '✳️', [SILICONFLOW]: '🧬',
 };
@@ -273,7 +292,7 @@ let tokeniser, _tools;
 const unifyProvider = provider => {
     assert(provider = (provider || '').trim(), 'AI provider is required.');
-    for (let type of [OPENROUTER, JINA, OLLAMA, SILICONFLOW]) {
+    for (let type of [OPENROUTER, GOOGLE, JINA, OLLAMA, SILICONFLOW]) {
         if (insensitiveCompare(provider, type)) { return type; }
     }
     throwError(`Invalid AI provider: ${provider}.`);
@@ -406,6 +425,16 @@ const init = async (options = {}) => {
         `Model name or description is required for provider: ${provider}.`);
     _tools || (_tools = await packTools());
     switch (provider) {
+        case GOOGLE:
+            assertApiKey(provider, options);
+            const { GoogleGenAI } = await need('@google/genai');
+            var client = new GoogleGenAI({ vertexai: false, ...options });
+            for (let model of models) {
+                setupAi({
+                    provider, model, client, prompt: promptGoogle, priority,
+                });
+            }
+            break;
         case JINA:
             assertApiKey(provider, options);
             var client = await OpenAI({
@@ -588,7 +617,9 @@ const listOpenAIModels = async (aiId, options) => {
 };
 const streamResp = async (resp, options) => {
-    const msg = await packResp(resp, { ...options, processing: true });
+    const msg = options?.noPack ? resp : await packResp(
+        resp, { ...options, processing: true }
+    );
     return options?.stream
         && (msg?.text || msg?.audio?.length || msg?.images?.length)
         && await ignoreErrFunc(async () => await options.stream(msg), LOG);
@@ -606,13 +637,13 @@ const packResp = async (resp, options) => {
     if (options?.raw) { return resp; }
     let [
         txt, audio, images, annotations, simpleText, annotationsMarkdown, end,
-        json, audioMimeType, catched,
+        json, audioMimeType,
     ] = [
             resp.text || '',                                                    // ChatGPT / Claude / Gemini / Ollama
             resp?.audio?.data,                                                  // ChatGPT audio mode
             resp?.images || [],                                                 // Gemini images via Openrouter
             resp?.references,                                                   // Gemini references
-            '', '', '', null, MIME_PCM16, new Set(),
+            '', '', '', null, MIME_PCM16,
         ];
     simpleText = txt;
     while ((end = getInfoEnd(simpleText))) {
@@ -698,18 +729,23 @@ const packResp = async (resp, options) => {
         ...annotationsMarkdown ? { annotationsMarkdown } : {},
         ...audio ? { audio } : {}, ...images?.length ? { images } : {},
         processing: !!options?.processing,
-        model: [
+        model: packModelLabel([
             options.provider, options?.router?.provider,
             options?.router?.model || options?.model,
-        ].join('/').split('/').map(x => {
-            const key = ensureString(x, { case: 'UP' });
-            if (catched.has(key)) { return null; }
-            catched.add(key);
-            return x;
-        }).filter(x => x).join('/'),
+        ]),
     };
 };
+const packModelLabel = (model_reference) => {
+    const catched = new Set();
+    return model_reference.join('/').split('/').map(x => {
+        const key = ensureString(x, { case: 'UP' });
+        if (catched.has(key)) { return null; }
+        catched.add(key);
+        return x;
+    }).filter(x => x).join('/');
+};
 const buildPrompts = async (model, input, options = {}) => {
     assert(!(
         options.jsonMode && !model?.json
@@ -847,6 +883,18 @@ const promptOpenAI = async (aiId, content, options = {}) => {
         x => x.function.name === 'searchWeb'
     ) && !options.jsonMode ? ONLINE : '';
     const targetModel = `${isOpenrouter(provider, model) ? `${source}/` : ''}${options.model}${ext}`;
+    if (provider === OPENAI) {
+        // need more debug, currently openrouter is priority
+        packedTools.push(...[
+            // https://platform.openai.com/docs/guides/tools?tool-type=web-search
+            { type: 'web_search', },
+            // https://platform.openai.com/docs/guides/tools-image-generation?lang=javascript
+            // https://platform.openai.com/docs/api-reference/responses/create#responses-create-tools
+            { type: 'image_generation', input_fidelity: 'high', partial_images: 3, quality: 'high', size: '1536x1024' },
+            // https://platform.openai.com/docs/guides/tools-code-interpreter
+            { type: 'code_interpreter', container: { type: 'auto', memory_limit: '8g' } },
+        ]);
+    }
     if (source === S_GOOGLE) {
         packedTools.push(...[
             { googleSearch: {} }, { codeExecution: {} }, { urlContext: {} },
@@ -966,6 +1014,103 @@ const promptOpenAI = async (aiId, content, options = {}) => {
     return await packResp(event, options);
 };
+const promptGoogle = async (aiId, prompt, options = {}) => {
+    let { provider, client, model } = await getAi(aiId);
+    const M = MODELS[model.name];
+    prompt = ensureString(prompt, { trim: true });
+    assert(await countTokens(prompt, { fast: true })
+        <= M.maxInputTokens,
+        `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
+    );
+    switch (model?.name) {
+        case IMAGEN_4_ULTRA:
+            var resp = await client.models.generateImages({
+                model: model.name, prompt, config: {
+                    numberOfImages: options?.n || 4, sampleImageSize: '2K',
+                    includeRaiReason: true,
+                    // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
+                    aspectRatio: '16:9', personGeneration: 'allow_adult',
+                    ...options?.config || {},
+                },
+            });
+            var generated = resp?.generatedImages;
+            assert(!resp?.error && generated?.filter(
+                x => !x.raiFilteredReason
+            ).length, resp?.error?.message || generated?.find(
+                x => x.raiFilteredReason
+            )?.raiFilteredReason || ERROR_GENERATING);
+            if (!options?.raw) {
+                resp = {
+                    text: '', images: await Promise.all((
+                        resp?.generatedImages || []
+                    ).map(async x => ({
+                        data: await convert(x.image.imageBytes, {
+                            input: BASE64, suffix: 'png', ...options || {}
+                        }), mimeType: x.image.mimeType,
+                    }))), model: packModelLabel([
+                        provider, M.source, model.name,
+                    ]),
+                }
+            }
+            break;
+        case VEO_31:
+            var resp = await client.models.generateVideos({
+                model: model.name, prompt, config: {
+                    aspectRatio: '16:9', numberOfVideos: 1,
+                    // personGeneration: 'allow_adult',
+                    enablePromptRewriting: true, addWatermark: false,
+                    includeRaiReason: true, ...options?.config || {},
+                },
+            });
+            assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
+            if (options?.generateRaw) { return resp; }
+            await tryUntil(async () => {
+                resp = await client.operations.getVideosOperation({
+                    operation: resp,
+                });
+                assert(
+                    resp?.done,
+                    `Waiting for Google video generation: ${resp.name}`,
+                );
+            }, { maxTry: 60 * 10, log });
+            assert(!resp?.error && resp?.response?.generatedVideos?.filter(
+                x => !x.raiFilteredReason
+            ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
+                x => x.raiFilteredReason
+            )?.raiFilteredReason || ERROR_GENERATING);
+            if (options?.videoRaw) {
+                resp = resp?.response?.generatedVideos;
+            } else if (!options?.videoRaw) {
+                resp = {
+                    text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
+                        x => x?.video?.uri
+                    ).map(async x => {
+                        const downloadPath = `${getTempPath({
+                            seed: x?.video?.uri
+                        })}.mp4`;
+                        // @todo: fix this
+                        // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
+                        await client.files.download({ file: x, downloadPath });
+                        await timeout(1000 * 10); // hack to wait for file to be downloaded
+                        return {
+                            data: await convert(downloadPath, {
+                                input: FILE, suffix: 'mp4', ...options || {}
+                            }), mimeType: MIME_MP4, jobId: resp.name,
+                        };
+                    })), model: packModelLabel([
+                        provider, M.source, model.name,
+                    ]),
+                };
+            }
+            break;
+        default:
+            throw new Error('Unsupported model.');
+    }
+    await streamResp(
+        { ...resp, processing: true }, { ...options, noPack: true }
+    );
+    return { ...resp, processing: false };
+};
 const initChat = async (options = {}) => {
     if (options.sessions) {
@@ -1063,7 +1208,7 @@ const distillFile = async (attachments, o) => {
         '- You will receive various multimedia files, including images, audio, and videos.',
         '- Please analyze these documents, extract the information, and organize it into an easy-to-read format.',
         '- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present. Use markdown to format table and other rich text where possible. Use LaTeX for all formulas, subscripts, representations of formulas, and special symbols in mathematics and chemistry, enclosed by "$" symbols. Please mark the description of images in the same position as the original text without creating separate paragraphs for descriptions. Be sure ONLY describe important images and graphs, and ignore backgrounds and decorative small images. Ensure the returned document is clean, well-organized, and highly readable.',
-        '- For audio files, please provide a transcript of the spoken voices. If there are background noises or music, attempt to briefly describe the environmental sounds and music sections.',
+        '- For audio files, please transcribe the spoken voices into clean text. If there are background sounds, attempt to briefly describe the environmental sounds and music sections. Only care about the main speech content, meaningful music and environment sounds. Do not be disturbed by useless background noise.',
         '- For images or video files that are not primarily text-based, describe the tragic scene you observe, highlight key details, convey the emotional tone of the setting, and share your impressions.',
         '- For video files, please describe the content, including the theme, subjects, characters, scenes, objects, storyline, and emotional tone.',
         '- Please RETURN ONLY your analysis results without including your thought process or other unrelated information.',
@@ -1161,11 +1306,14 @@ export {
     FUNCTION,
     GEMINI_25_FLASH,
     GEMINI_30_PRO_IMAGE,
+    GPT_5_IMAGE,
     GPT_51,
+    IMAGEN_4_ULTRA,
     INSTRUCTIONS,
     MODELS,
     OPENAI_VOICE,
     RETRIEVAL,
+    VEO_31,
     analyzeSessions,
     countTokens,
     distillFile,

package/lib/manifest.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 const manifest = {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "2000.3.25",
+    "version": "2000.3.27",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",

package/lib/speech.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
 import { getFfmpeg, packPcmToWav } from './media.mjs';
 import { get } from './web.mjs';
-import { convert, getTempPath, MIME_WAV } from './storage.mjs';
+import { convert, getTempPath } from './storage.mjs';
 import { ensureString, mergeAtoB } from './utilitas.mjs';
 import {
@@ -18,20 +18,19 @@ const _NEED = ['@google/genai', 'OpenAI', 'whisper-node'];
 const [
     BUFFER, STREAM, BASE64, FILE, clients, suffix, SPEAKER, cleanup, wav,
-    GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH,
+    GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS,
     OPENAI_TTS_MAX_LENGTH, WHISPER_DEFAULT_MODEL, errorMessage
 ] = [
         'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'ogg', 'SPEAKER', true, 'wav',
         'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-flash-preview-tts',
-        'gemini-flash-latest', 4096, 'base', 'Invalid audio data.',
+        4096, 'base', 'Invalid audio data.',
     ];
 const [
     defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel,
-    defaultGeminiSttModel,
-] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH];
+] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS];
-const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
+const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
 const WHISPER_MODELS = [
     // npx whisper-node download tiny.en
@@ -111,9 +110,6 @@ const init = async (options) => {
                 if (options?.tts) {
                     clients.tts = client.models.generateContent;
                 }
-                if (options?.stt) {
-                    clients.stt = client.models.generateContent;
-                }
                 break;
             case '':
                 clients._provider = 'LOCAL';
@@ -166,13 +162,14 @@ const ttsGoogle = async (contents, options) => {
     assert(contents, 'Text is required.', 400);
     assert(await countTokens(contents) <= k(32), 'Text is too long.', 400);
     const resp = await clients.tts({
-        model: options?.model || defaultGeminiTtsModel, contents,
+        model: options?.model || defaultGeminiTtsModel,
+        contents: `${options?.prompt || TTS_PROMPT}: ${contents}`,
         config: mergeAtoB(options?.config, {
             responseModalities: ['AUDIO'],
             speechConfig: {
                 voiceConfig: {
                     prebuiltVoiceConfig: {
-                        voiceName: options?.voice || 'Leda',
+                        voiceName: options?.voice || 'Zephyr',
                     },
                 },
             },
@@ -227,29 +224,6 @@ const sttOpenAI = async (audio, options) => {
     return result;
 };
-const sttGoogle = async (audio, options) => {
-    assert(clients.stt, 'Google STT API has not been initialized.', 500);
-    const data = await convert(audio, {
-        input: options?.input, expected: BASE64, errorMessage,
-    });
-    const resp = await clients.stt({
-        model: options?.model || defaultGeminiSttModel, contents: {
-            parts: [{
-                inlineData: {
-                    mimeType: options?.mimeType || MIME_WAV, data,
-                },
-            }, { text: STT_PROMPT }],
-        },
-        config: { ...options?.config || {} },
-    });
-    assert(
-        resp?.candidates?.[0]?.content?.parts?.[0],
-        'Failed to transcribe audio.', 500
-    );
-    return options?.raw ? resp.candidates
-        : (resp.candidates[0].content.parts[0].text?.trim?.() || '');
-};
 // This function is not working properly, a pull request is filed:
 // https://github.com/ariym/whisper-node/pull/58
 const sttWhisper = async (audio, options) => {
@@ -288,8 +262,7 @@ const tts = async (text, options) => {
 const stt = async (audio, options) => {
     let engine;
-    if (clients?.stt && clients._provider === 'GOOGLE') { engine = sttGoogle; }
-    else if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
+    if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
     else if (await checkWhisper()) { engine = sttWhisper; }
     else { throwError('Speech-to-Text engine has not been initialized.', 500); }
     return await engine(audio, options);
@@ -303,7 +276,6 @@ export {
     checkWhisper,
     init,
     stt,
-    sttGoogle,
     sttOpenAI,
     sttWhisper,
     tts,

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "2000.3.25",
+    "version": "2000.3.27",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",