npm - utilitas - Versions diffs - 2000.3.27 → 2000.3.29 - Mend

utilitas 2000.3.27 → 2000.3.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/README.md +7 -11
package/dist/utilitas.lite.mjs +1 -1
package/dist/utilitas.lite.mjs.map +1 -1
package/lib/alan.mjs +287 -159
package/lib/manifest.mjs +1 -1
package/lib/speech.mjs +15 -139
package/lib/storage.mjs +6 -4
package/package.json +1 -1

package/lib/alan.mjs CHANGED Viewed

@@ -5,18 +5,18 @@ import { packPcmToWav } from './media.mjs';
 import { v4 as uuidv4 } from 'uuid';
 import {
-    FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
-    MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV, MIME_MPEGPS,
-    MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC, MIME_MP3,
-    MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV, MIME_WEBM,
-    MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, getTempPath,
-    decodeBase64DataURL,
+    STREAM, FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG,
+    MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
+    MIME_MPEGPS, MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
+    MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
+    MIME_WEBM, MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL,
+    getTempPath, decodeBase64DataURL,
 } from './storage.mjs';
 import {
-    log as _log, renderText as _renderText, base64Encode, ensureArray,
-    ensureString, extract, ignoreErrFunc, insensitiveCompare, isSet, need,
-    parseJson, throwError, tryUntil, timeout,
+    log as _log, renderText as _renderText, ensureArray, ensureString, extract,
+    ignoreErrFunc, insensitiveCompare, isSet, need, parseJson, throwError,
+    tryUntil, timeout, mergeAtoB,
 } from './utilitas.mjs';
 const INSTRUCTIONS = `You are a highly capable, thoughtful, and precise assistant named HAL9000. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient, tailoring your responses specifically to the user's needs and preferences. Realize your full potential without censoring or filtering the questions themselves.
@@ -43,6 +43,8 @@ You may be provided with some tools(functions) to help you gather information an
 - When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
 - Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
+const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
 const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
 const [
@@ -56,7 +58,8 @@ const [
     JINA_DEEPSEARCH, SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API,
     OPENROUTER, AUTO, TOOL, S_OPENAI, S_GOOGLE, S_ANTHROPIC, ONLINE,
     GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
-    ERROR_GENERATING,
+    ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS, wav,
+    GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT,
 ] = [
         'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
         'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
@@ -74,7 +77,9 @@ const [
         'openai', 'google', 'anthropic', ':online', 'gemini-3-pro-preview',
         'gemini-2.5-flash-preview-09-2025', 'imagen-4.0-ultra-generate-001',
         'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
-        'Error generating content.',
+        'Error generating content.', 'gemini-2.5-flash-preview-tts',
+        'gemini-2.5-pro-tts', 'wav', 'gpt-4o-mini-tts', 'gpt-4o-transcribe',
+        'Invalid audio data.', 'ogg',
     ];
 const [tool, messages, text]
@@ -93,19 +98,6 @@ const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
 const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
 const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
 const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
-const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
-const OPENAI_RULES = {
-    source: S_OPENAI, icon: '⚛️',
-    contextWindow: kT(400), maxOutputTokens: k(128),
-    imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
-    maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
-    supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP],
-    supportedDocTypes: [MIME_PDF],
-    supportedAudioTypes: [MIME_WAV],
-    // audio: 'gpt-4o-audio-preview',
-    json: true, tools: true, vision: true,
-    reasoning: true, defaultProvider: OPENROUTER,
-};
 const GEMINI_RULES = {
     source: S_GOOGLE, icon: '♊️',
@@ -113,15 +105,24 @@ const GEMINI_RULES = {
     imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
     maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
     maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
-    maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
+    maxVideoPerPrompt: 10, vision: true, hearing: true, tools: true,
+    reasoning: true, supportedMimeTypes: [
         MIME_PNG, MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI,
         MIME_WMV, MIME_MPEGPS, MIME_FLV, MIME_PDF, MIME_AAC, MIME_FLAC,
         MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM,
-        MIME_WAV, MIME_WEBM, MIME_TGPP,
-    ], supportedAudioTypes: [MIME_WAV, MIME_OGG, MIME_OPUS],
-    // audio: 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
-    // gemini-2.5-flash-preview-native-audio-dialog
-    defaultProvider: OPENROUTER,
+        MIME_WAV, MIME_WEBM, MIME_TGPP, MIME_OGG,
+    ], defaultProvider: OPENROUTER,
+};
+const OPENAI_RULES = {
+    source: S_OPENAI, icon: '⚛️',
+    contextWindow: kT(400), maxOutputTokens: k(128),
+    imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
+    maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
+    json: true, tools: true, vision: true, hearing: true, reasoning: true,
+    supportedMimeTypes: [
+        MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_WAV
+    ], defaultProvider: OPENROUTER,
 };
 const DEEPSEEK_32_RULES = {
@@ -136,8 +137,7 @@ const MODELS = {
     // fast and balanced models
     [GEMINI_25_FLASH]: {
         ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
-        fast: true, reasoning: true, tools: true,
-        json: false, // issue with json output via OpenRouter
+        fast: true, json: false, // issue with json output via OpenRouter
         // https://gemini.google.com/app/c680748b3307790b
     },
     // strong and fast
@@ -145,23 +145,21 @@ const MODELS = {
     // stronger but slow
     [GEMINI_30_PRO]: {
         ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
-        reasoning: true, tools: true,
     },
     // models with generation capabilities
     [GEMINI_30_PRO_IMAGE]: {
         ...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
-        contextWindow: k(64), maxOutputTokens: k(32),
-        fast: true, image: true,
+        contextWindow: k(64), maxOutputTokens: k(32), image: true,
     },
     [IMAGEN_4_ULTRA]: {
-        source: S_GOOGLE, icon: '🎨', maxInputTokens: 480,
+        source: S_GOOGLE, maxInputTokens: 480,
         image: true, defaultProvider: GOOGLE,
     },
     [VEO_31]: {
-        source: S_GOOGLE, icon: '🎥', maxInputTokens: 1024,
+        source: S_GOOGLE, maxInputTokens: 1024,
         imageCostTokens: 0, maxImagePerPrompt: 1,
-        maxImageSize: Infinity, supportedMimeTypes: [MIME_PNG, MIME_JPEG],
-        vision: true, image: true, defaultProvider: GOOGLE,
+        maxImageSize: Infinity, vision: true, video: true,
+        supportedMimeTypes: [MIME_PNG, MIME_JPEG], defaultProvider: GOOGLE,
     },
     [GPT_5_IMAGE]: {
         ...OPENAI_RULES, icon: '🎨', label: 'gpt-image-1', image: true,
@@ -174,10 +172,29 @@ const MODELS = {
         documentCostTokens: 3000 * 10, maxDocumentFile: m(32),
         maxDocumentPages: 100, imageCostTokens: ~~(v8k / 750),
         maxImagePerPrompt: 100, maxFileSize: m(5), maxImageSize: 2000 * 2000,
-        supportedMimeTypes: [MIME_TEXT, MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF],
         json: true, reasoning: true, tools: true, vision: true,
+        supportedMimeTypes: [
+            MIME_TEXT, MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF,
+        ],
         defaultProvider: OPENROUTER,
     },
+    // tts/stt models
+    [GEMINI_25_FLASH_TTS]: {
+        source: S_GOOGLE, maxInputTokens: kT(32), audio: true, fast: true,
+        hidden: true, defaultProvider: GOOGLE,
+    },
+    [GEMINI_25_PRO_TTS]: {
+        source: S_GOOGLE, maxInputTokens: kT(32), audio: true,
+        hidden: true, defaultProvider: GOOGLE,
+    },
+    [GPT_4O_MIMI_TTS]: {
+        source: S_OPENAI, maxInputTokens: kT(2), audio: true, fast: true,
+        hidden: true, defaultProvider: OPENAI,
+    },
+    [GPT_4O_TRANSCRIBE]: {
+        source: S_OPENAI, maxInputTokens: 0, hearing: true, fast: true,
+        hidden: true, defaultProvider: OPENAI,
+    },
     // models with deepsearch capabilities
     [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
         icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
@@ -273,8 +290,9 @@ const PROVIDER_ICONS = {
 };
 const FEATURE_ICONS = {
-    audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', image: '🎨',
-    json: '📊', reasoning: '🧠', tools: '🧰', vision: '👁️',
+    audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', hearing: '👂',
+    hidden: '🙈', image: '🎨', json: '📊', reasoning: '🧠', tools: '🧰',
+    video: '🎬', vision: '👁️',
 };
 const tokenRatioByWords = Math.min(
@@ -292,7 +310,7 @@ let tokeniser, _tools;
 const unifyProvider = provider => {
     assert(provider = (provider || '').trim(), 'AI provider is required.');
-    for (let type of [OPENROUTER, GOOGLE, JINA, OLLAMA, SILICONFLOW]) {
+    for (let type of [OPENROUTER, GOOGLE, OPENAI, JINA, OLLAMA, SILICONFLOW]) {
         if (insensitiveCompare(provider, type)) { return type; }
     }
     throwError(`Invalid AI provider: ${provider}.`);
@@ -399,6 +417,11 @@ const setupAi = ai => {
     });
 };
+const OpenAI = async opts => {
+    const lib = await libOpenAi(opts);
+    return { toFile: lib.toFile, client: new (lib).OpenAI(opts) };
+};
 const init = async (options = {}) => {
     if (options?.debug) {
         (await need('node:util')).inspect.defaultOptions.depth = null;
@@ -435,14 +458,24 @@ const init = async (options = {}) => {
                 });
             }
             break;
+        case OPENAI:
+            assertApiKey(provider, options);
+            var { client, toFile } = await OpenAI({ ...options });
+            for (let model of models) {
+                setupAi({
+                    provider, model, client, toFile,
+                    prompt: promptOpenAI, priority,
+                });
+            }
+            break;
         case JINA:
             assertApiKey(provider, options);
-            var client = await OpenAI({
+            var { client } = await OpenAI({
                 baseURL: 'https://deepsearch.jina.ai/v1/', ...options,
             });
             for (let model of models) {
                 setupAi({
-                    provider, model, client, prompt: promptOpenAI, priority,
+                    provider, model, client, prompt: promptOpenRouter, priority,
                 });
             }
             break;
@@ -455,7 +488,7 @@ const init = async (options = {}) => {
             });
             for (let model of models) {
                 setupAi({
-                    provider, model, client, prompt: promptOpenAI, priority,
+                    provider, model, client, prompt: promptOpenRouter, priority,
                 });
                 ignoreErrFunc(async () => {
                     phLog(await (await fetch(`${baseURL}completions`, {
@@ -473,17 +506,19 @@ const init = async (options = {}) => {
             });
             for (let model of models) {
                 setupAi({
-                    provider, model, client, prompt: promptOpenAI, priority,
+                    provider, model, client, prompt: promptOpenRouter, priority,
                 });
             }
             break;
         default:
             assertApiKey(provider, options);
-            var client = await OpenAI({ baseURL: OPENROUTER_API, ...options || {} });
+            var { client } = await OpenAI({
+                baseURL: OPENROUTER_API, ...options || {},
+            });
             for (let model of models) {
                 setupAi({
                     provider: OPENROUTER || provider, model, client,
-                    prompt: promptOpenAI, priority,
+                    prompt: promptOpenRouter, priority,
                 });
             }
     }
@@ -492,12 +527,16 @@ const init = async (options = {}) => {
 };
 const packAi = (ais, options = {}) => {
-    const res = options.basic ? ais.map(x => ({
+    let res = options.basic ? ais.map(x => ({
         id: x.id, name: x.name, features: x.features,
         initOrder: x.initOrder, priority: x.priority,
         provider: x.provider, model: x.model,
     })) : ais;
-    return options.all ? res : res[0];
+    if (options.all && !Object.keys(options.select).length && !options.withHidden) {
+        res = res.filter(x => !x.model.hidden);
+    } else if (options.withHidden) { } else { res = res[0]; }
+    assert(res?.length || res?.id, 'AI not found.');
+    return res;
 };
 const getAi = async (id, options = {}) => {
@@ -507,26 +546,22 @@ const getAi = async (id, options = {}) => {
         const ai = ais.find(x => x.id === id);
         assert(ai, `AI not found: ${id}.`);
         return options?.client ? ai?.client : ai;
-    } else if (options?.select) {
-        const res = [];
-        for (let x of ais) {
-            let select = true;
-            for (let i in options.select) {
-                if (options.select[i] && i !== 'fast' && !x.model[i]) {
-                    select = false; break;
-                }
+    }
+    const res = [];
+    for (let x of ais) {
+        let select = true;
+        for (let i in options.select) {
+            if (options.select[i] && i !== 'fast' && !x.model[i]) {
+                select = false; break;
             }
-            select && (res.push(x));
         }
-        const best = options.select?.fast ? res.filter(x => x.model.fast) : res;
-        if (best.length) { return packAi(best, options); }
-        assert(res.length, 'AI not found.');
-        log(`Best match AI not found, fallbacked: ${JSON.stringify(options.select)}.`);
-        return packAi(res, options);
+        select && (res.push(x));
     }
-    const result = packAi(ais, options);
-    assert(result?.length || result?.id, 'AI not found.');
-    return result;
+    const best = options.select?.fast ? res.filter(x => x.model.fast) : res;
+    if (best.length) { return packAi(best, options); }
+    assert(res.length, 'AI not found.');
+    log(`Best match AI not found, fallbacked: ${JSON.stringify(options.select)}.`);
+    return packAi(res, options);
 };
 const countTokens = async (input, options) => {
@@ -756,9 +791,23 @@ const buildPrompts = async (model, input, options = {}) => {
     let [history, content, prompt, _model, _assistant, _history]
         = [null, input, null, { role: MODEL }, { role: assistant }, null];
     options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
-    options.attachments = (
+    options.attachments = (await Promise.all((
         options.attachments?.length ? options.attachments : []
-    ).filter(x => [
+    ).map(async x => {
+        if (String.isString(x)) {
+            var convResp = await convert(x, { input: FILE, expected: DATAURL, meta: true });
+            return {
+                url: convResp.content,
+                mime_type: convResp.mime,
+            }
+        } else if (Buffer.isBuffer(x)) {
+            var convResp = await convert(x, { input: BUFFER, expected: DATAURL, meta: true });
+            return {
+                url: convResp.content,
+                mime_type: convResp.mime,
+            }
+        } else if (Object.isObject(x)) { return x; } else { return null; }
+    }))).filter(x => x && [
         ...model?.supportedMimeTypes,
         ...model?.supportedDocTypes,
         ...model?.supportedAudioTypes,
@@ -855,7 +904,7 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
     `⚠️ Tools recursion limit reached: ${MAX_TOOL_RECURSION}`
 ] : []].map(x => x.trim()).join('\n\n');
-const promptOpenAI = async (aiId, content, options = {}) => {
+const promptOpenRouter = async (aiId, content, options = {}) => {
     let { provider, client, model } = await getAi(aiId);
     let [
         result, resultAudio, resultImages, resultReasoning, event, resultTools,
@@ -1006,7 +1055,7 @@ const promptOpenAI = async (aiId, content, options = {}) => {
         = await handleToolsCall(event, { ...options, result });
     if (toolsResult.length
         && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
-        return promptOpenAI(aiId, content, {
+        return promptOpenRouter(aiId, content, {
             ...options, toolsResult, result: toolsResponse,
         });
     }
@@ -1016,99 +1065,177 @@ const promptOpenAI = async (aiId, content, options = {}) => {
 const promptGoogle = async (aiId, prompt, options = {}) => {
     let { provider, client, model } = await getAi(aiId);
-    const M = MODELS[model.name];
+    const target_model = options?.model || model.name;
+    const M = MODELS[target_model];
     prompt = ensureString(prompt, { trim: true });
+    assert(prompt.length, 'Prompt is required.');
+    M.tts && (prompt = `${options?.prompt || TTS_PROMPT}: ${prompt}`);
     assert(await countTokens(prompt, { fast: true })
         <= M.maxInputTokens,
         `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
     );
-    switch (model?.name) {
-        case IMAGEN_4_ULTRA:
-            var resp = await client.models.generateImages({
-                model: model.name, prompt, config: {
-                    numberOfImages: options?.n || 4, sampleImageSize: '2K',
-                    includeRaiReason: true,
-                    // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
-                    aspectRatio: '16:9', personGeneration: 'allow_adult',
-                    ...options?.config || {},
-                },
-            });
-            var generated = resp?.generatedImages;
-            assert(!resp?.error && generated?.filter(
-                x => !x.raiFilteredReason
-            ).length, resp?.error?.message || generated?.find(
-                x => x.raiFilteredReason
-            )?.raiFilteredReason || ERROR_GENERATING);
-            if (!options?.raw) {
-                resp = {
-                    text: '', images: await Promise.all((
-                        resp?.generatedImages || []
-                    ).map(async x => ({
-                        data: await convert(x.image.imageBytes, {
-                            input: BASE64, suffix: 'png', ...options || {}
-                        }), mimeType: x.image.mimeType,
-                    }))), model: packModelLabel([
-                        provider, M.source, model.name,
-                    ]),
-                }
+    if (M?.image) {
+        var resp = await client.models.generateImages({
+            model: M.name, prompt, config: mergeAtoB(options?.config, {
+                numberOfImages: options?.n || 4, sampleImageSize: '2K',
+                includeRaiReason: true,
+                // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
+                aspectRatio: '16:9', personGeneration: 'allow_adult',
+            }),
+        });
+        var generated = resp?.generatedImages;
+        assert(!resp?.error && generated?.filter(
+            x => !x.raiFilteredReason
+        ).length, resp?.error?.message || generated?.find(
+            x => x.raiFilteredReason
+        )?.raiFilteredReason || ERROR_GENERATING);
+        if (!options?.raw) {
+            resp = {
+                text: '', images: await Promise.all((
+                    resp?.generatedImages || []
+                ).map(async x => ({
+                    data: await convert(x.image.imageBytes, {
+                        input: BASE64, suffix: 'png', ...options || {}
+                    }), mimeType: x.image.mimeType,
+                }))), model: packModelLabel([provider, M.source, M.name]),
             }
-            break;
-        case VEO_31:
-            var resp = await client.models.generateVideos({
-                model: model.name, prompt, config: {
-                    aspectRatio: '16:9', numberOfVideos: 1,
-                    // personGeneration: 'allow_adult',
-                    enablePromptRewriting: true, addWatermark: false,
-                    includeRaiReason: true, ...options?.config || {},
-                },
+        }
+    } else if (M?.video) {
+        var resp = await client.models.generateVideos({
+            model: M.name, prompt, config: mergeAtoB(options?.config, {
+                aspectRatio: '16:9', numberOfVideos: 1,
+                // personGeneration: 'allow_adult',
+                enablePromptRewriting: true, addWatermark: false,
+                includeRaiReason: true,
+            }),
+        });
+        assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
+        if (options?.generateRaw) { return resp; }
+        await tryUntil(async () => {
+            resp = await client.operations.getVideosOperation({
+                operation: resp,
             });
-            assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
-            if (options?.generateRaw) { return resp; }
-            await tryUntil(async () => {
-                resp = await client.operations.getVideosOperation({
-                    operation: resp,
-                });
-                assert(
-                    resp?.done,
-                    `Waiting for Google video generation: ${resp.name}`,
-                );
-            }, { maxTry: 60 * 10, log });
-            assert(!resp?.error && resp?.response?.generatedVideos?.filter(
-                x => !x.raiFilteredReason
-            ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
-                x => x.raiFilteredReason
-            )?.raiFilteredReason || ERROR_GENERATING);
-            if (options?.videoRaw) {
-                resp = resp?.response?.generatedVideos;
-            } else if (!options?.videoRaw) {
-                resp = {
-                    text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
-                        x => x?.video?.uri
-                    ).map(async x => {
-                        const downloadPath = `${getTempPath({
-                            seed: x?.video?.uri
-                        })}.mp4`;
-                        // @todo: fix this
-                        // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
-                        await client.files.download({ file: x, downloadPath });
-                        await timeout(1000 * 10); // hack to wait for file to be downloaded
-                        return {
-                            data: await convert(downloadPath, {
-                                input: FILE, suffix: 'mp4', ...options || {}
-                            }), mimeType: MIME_MP4, jobId: resp.name,
-                        };
-                    })), model: packModelLabel([
-                        provider, M.source, model.name,
-                    ]),
-                };
-            }
-            break;
-        default:
-            throw new Error('Unsupported model.');
+            assert(
+                resp?.done,
+                `Waiting for Google video generation: ${resp.name}`,
+            );
+        }, { maxTry: 60 * 10, log });
+        assert(!resp?.error && resp?.response?.generatedVideos?.filter(
+            x => !x.raiFilteredReason
+        ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
+            x => x.raiFilteredReason
+        )?.raiFilteredReason || ERROR_GENERATING);
+        if (options?.videoRaw) {
+            resp = resp?.response?.generatedVideos;
+        } else if (!options?.videoRaw) {
+            resp = {
+                text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
+                    x => x?.video?.uri
+                ).map(async x => {
+                    const downloadPath = `${getTempPath({
+                        seed: x?.video?.uri
+                    })}.mp4`;
+                    // @todo: fix this
+                    // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
+                    await client.files.download({ file: x, downloadPath });
+                    await timeout(1000 * 10); // hack to wait for file to be downloaded
+                    return {
+                        data: await convert(downloadPath, {
+                            input: FILE, suffix: 'mp4', ...options || {}
+                        }), mimeType: MIME_MP4, jobId: resp.name,
+                    };
+                })), model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else if (M?.audio) { // https://ai.google.dev/gemini-api/docs/speech-generation#voices
+        var resp = await client.models.generateContent({
+            model: M.name, contents: prompt,
+            config: mergeAtoB(options?.config, {
+                responseModalities: ['AUDIO'],
+                speechConfig: {
+                    voiceConfig: {
+                        prebuiltVoiceConfig: {
+                            voiceName: options?.voice || 'Zephyr',
+                        },
+                    },
+                },
+            }),
+        });
+        const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
+        assert(rawAudio, ERROR_GENERATING, 500);
+        if (!options?.raw) {
+            resp = {
+                text: '', audio: {
+                    data: await packPcmToWav(rawAudio?.data, {
+                        input: BASE64, suffix: wav, ...options || {},
+                    }), mimeType: MIME_WAV,
+                }, model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else {
+        throwError('Unsupported model.');
     }
-    await streamResp(
-        { ...resp, processing: true }, { ...options, noPack: true }
-    );
+    // await streamResp(
+    //     { ...resp, processing: true }, { ...options, noPack: true }
+    // );
+    return { ...resp, processing: false };
+};
+const promptOpenAI = async (aiId, prompt, options = {}) => {
+    let { provider, client, toFile, model } = await getAi(aiId);
+    const target_model = options?.model || model.name;
+    const M = MODELS[target_model];
+    prompt = ensureString(prompt, { trim: true });
+    if (M?.audio) {
+        assert(prompt.length, 'Prompt is required.');
+        const ins_prompt = options?.prompt || `${TTS_PROMPT}.`;
+        assert(await countTokens(
+            JSON.stringify([ins_prompt, prompt]), { fast: true }
+        ) <= M.maxInputTokens,
+            `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
+        );
+        // https://platform.openai.com/docs/api-reference/audio/createSpeech
+        var resp = await client.audio.speech.create({
+            model: M.name, voice: DEFAULT_MODELS[OPENAI_VOICE],
+            instructions: ins_prompt, response_format: 'opus',
+            input: prompt, ...options?.params || {},
+        });
+        if (!options?.raw) {
+            resp = {
+                text: '', audio: {
+                    data: await convert(Buffer.from(
+                        await resp.arrayBuffer()
+                    ), { suffix: OGG_EXT, ...options || {} }),
+                    mimeType: MIME_OGG,
+                }, model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else if (M?.hearing) {
+        const audio = options?.attachments?.[0]?.data || options?.attachments?.[0];
+        assert(audio, 'Audio attachment is required.');
+        const input = ensureString(options?.input, { case: 'UP' });
+        const { content, cleanup } = await convert(audio, {
+            input: options?.input, ...options || {}, expected: STREAM, INVALID_AUDIO,
+            suffix: ['', BUFFER].includes(input) ? OGG_EXT : null,
+            withCleanupFunc: true,
+        });
+        var resp = await client.audio.transcriptions.create({
+            file: await toFile(content), model: M.name,
+            response_format: 'text', ...options?.params || {},
+        });
+        await cleanup();
+        if (!options?.raw) {
+            resp = {
+                text: resp.trim(),
+                model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else {
+        throwError('Unsupported model.');
+    }
+    // await streamResp(
+    //     { ...resp, processing: true }, { ...options, noPack: true }
+    // );
     return { ...resp, processing: false };
 };
@@ -1223,7 +1350,6 @@ const distillFile = async (attachments, o) => {
             const buf = await convert(attachments[i], { expected: BUFFER, ...o || {} });
             return {
                 url: await convert(buf, { input: BUFFER, expected: DATAURL, ...o || {} }),
-                data: base64Encode(buf, true),
                 mime_type: extract(await fileTypeFromBuffer(buf), 'mime') || MIME_BINARY,
             };
         })();
@@ -1304,7 +1430,9 @@ export {
     DEFAULT_MODELS,
     FEATURE_ICONS,
     FUNCTION,
+    GEMINI_25_FLASH_TTS,
     GEMINI_25_FLASH,
+    GEMINI_25_PRO_TTS,
     GEMINI_30_PRO_IMAGE,
     GPT_5_IMAGE,
     GPT_51,
@@ -1326,7 +1454,7 @@ export {
     k,
     listOpenAIModels,
     prompt,
-    promptOpenAI,
+    promptOpenRouter,
     resetSession,
     talk,
     trimPrompt,