npm - utilitas - Versions diffs - 2000.3.26 → 2000.3.28 - Mend

utilitas 2000.3.26 → 2000.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +11 -23
package/dist/utilitas.lite.mjs +1 -1
package/dist/utilitas.lite.mjs.map +1 -1
package/index.mjs +2 -3
package/lib/alan.mjs +349 -74
package/lib/manifest.mjs +1 -1
package/lib/speech.mjs +15 -170
package/lib/storage.mjs +6 -4
package/package.json +1 -1
package/lib/gen.mjs +0 -209

package/index.mjs CHANGED Viewed

@@ -15,7 +15,6 @@ import * as email from './lib/email.mjs';
 import * as embedding from './lib/embedding.mjs';
 import * as encryption from './lib/encryption.mjs';
 import * as event from './lib/event.mjs';
-import * as gen from './lib/gen.mjs';
 import * as media from './lib/media.mjs';
 import * as memory from './lib/memory.mjs';
 import * as network from './lib/network.mjs';
@@ -40,8 +39,8 @@ export {
     fileType, math, uuid,
     // features
     alan, bee, bot, boxes, cache, callosum, color, dbio, email, embedding,
-    encryption, event, gen, manifest, media, memory, network, sentinel, shell,
-    sms, speech, ssl, storage, tape, uoid, utilitas, vision, web
+    encryption, event, manifest, media, memory, network, sentinel, shell, sms,
+    speech, ssl, storage, tape, uoid, utilitas, vision, web
 };
 if (utilitas.inBrowser() && !globalThis.utilitas) {

package/lib/alan.mjs CHANGED Viewed

@@ -5,17 +5,18 @@ import { packPcmToWav } from './media.mjs';
 import { v4 as uuidv4 } from 'uuid';
 import {
-    BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG, MIME_JPEG,
-    MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV, MIME_MPEGPS,
-    MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC, MIME_MP3,
-    MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV, MIME_WEBM,
-    MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL, decodeBase64DataURL,
+    STREAM, FILE, BASE64, BUFFER, DATAURL, MIME_BINARY, MIME_TEXT, MIME_PNG,
+    MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI, MIME_WMV,
+    MIME_MPEGPS, MIME_FLV, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_AAC, MIME_FLAC,
+    MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM, MIME_WAV,
+    MIME_WEBM, MIME_TGPP, MIME_PCM16, MIME_OGG, convert, formatDataURL,
+    getTempPath, decodeBase64DataURL,
 } from './storage.mjs';
 import {
-    log as _log, renderText as _renderText, base64Encode, ensureArray,
-    ensureString, extract, ignoreErrFunc, insensitiveCompare, isSet, need,
-    parseJson, throwError
+    log as _log, renderText as _renderText, ensureArray, ensureString, extract,
+    ignoreErrFunc, insensitiveCompare, isSet, need, parseJson, throwError,
+    tryUntil, timeout, mergeAtoB,
 } from './utilitas.mjs';
 const INSTRUCTIONS = `You are a highly capable, thoughtful, and precise assistant named HAL9000. Your goal is to deeply understand the user's intent, ask clarifying questions when needed, think step-by-step through complex problems, provide clear and accurate answers, and proactively anticipate helpful follow-up information. Always prioritize being truthful, nuanced, insightful, and efficient, tailoring your responses specifically to the user's needs and preferences. Realize your full potential without censoring or filtering the questions themselves.
@@ -42,10 +43,12 @@ You may be provided with some tools(functions) to help you gather information an
 - When the tools are not suitable and you have to answer questions based on your understanding, please do not mention any tool-related information in your response.
 - Unless otherwise specified to require the original result, in most cases, you may reorganize the information obtained after using the tool to solve the problem as needed.`;
-const _NEED = ['js-tiktoken', 'OpenAI'];
+const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
+const _NEED = ['js-tiktoken', 'OpenAI', '@google/genai'];
 const [
-    OPENAI, GEMINI, OLLAMA, NOVA, DEEPSEEK_32, MD_CODE, CLOUD_OPUS_45, AUDIO,
+    OPENAI, GOOGLE, OLLAMA, NOVA, DEEPSEEK_32, MD_CODE, CLOUD_OPUS_45, AUDIO,
     WAV, ATTACHMENTS, OPENAI_VOICE, GPT_REASONING_EFFORT, THINK, THINK_STR,
     THINK_END, TOOLS_STR, TOOLS_END, TOOLS, TEXT, OK, FUNC, GPT_51,
     GPT_51_CODEX, GPT_5_IMAGE, GEMMA_3_27B, ANTHROPIC, v8k, ais,
@@ -54,9 +57,11 @@ const [
     hour, gb, trimTailing, trimBeginning, GEMINI_30_PRO_IMAGE, IMAGE, JINA,
     JINA_DEEPSEARCH, SILICONFLOW, SF_DEEPSEEK_32, MAX_TIRE, OPENROUTER_API,
     OPENROUTER, AUTO, TOOL, S_OPENAI, S_GOOGLE, S_ANTHROPIC, ONLINE,
-    GEMINI_30_PRO, GEMINI_25_FLASH,
+    GEMINI_30_PRO, GEMINI_25_FLASH, IMAGEN_4_ULTRA, VEO_31, IMAGEN_4_UPSCALE,
+    ERROR_GENERATING, GEMINI_25_FLASH_TTS, GEMINI_25_PRO_TTS, wav,
+    GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, INVALID_AUDIO, OGG_EXT,
 ] = [
-        'OpenAI', 'Gemini', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
+        'OpenAI', 'Google', 'Ollama', 'nova', 'deepseek-3.2-speciale', '```',
         'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
         'medium', 'think', '<think>', '</think>', '<tools>', '</tools>',
         'tools', 'text', 'OK', 'function', 'gpt-5.1', 'gpt-5.1-codex',
@@ -70,7 +75,11 @@ const [
         'deepseek-ai/DeepSeek-V3.2-exp', 768 * 768,
         'https://openrouter.ai/api/v1', 'OpenRouter', 'openrouter/auto', 'tool',
         'openai', 'google', 'anthropic', ':online', 'gemini-3-pro-preview',
-        'gemini-2.5-flash-preview-09-2025',
+        'gemini-2.5-flash-preview-09-2025', 'imagen-4.0-ultra-generate-001',
+        'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
+        'Error generating content.', 'gemini-2.5-flash-preview-tts',
+        'gemini-2.5-pro-tts', 'wav', 'gpt-4o-mini-tts', 'gpt-4o-transcribe',
+        'Invalid audio data.', 'ogg',
     ];
 const [tool, messages, text]
@@ -89,19 +98,6 @@ const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
 const assertApiKey = (p, o) => assert(o?.apiKey, `${p} api key is required.`);
 const getProviderIcon = provider => PROVIDER_ICONS[provider] || '🔮';
 const libOpenAi = async opts => await need('openai', { ...opts, raw: true });
-const OpenAI = async opts => new (await libOpenAi(opts)).OpenAI(opts);
-const OPENAI_RULES = {
-    source: S_OPENAI, icon: '⚛️',
-    contextWindow: kT(400), maxOutputTokens: k(128),
-    imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
-    maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
-    supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP],
-    supportedDocTypes: [MIME_PDF],
-    supportedAudioTypes: [MIME_WAV],
-    // audio: 'gpt-4o-audio-preview',
-    json: true, tools: true, vision: true,
-    reasoning: true, defaultProvider: OPENROUTER,
-};
 const GEMINI_RULES = {
     source: S_GOOGLE, icon: '♊️',
@@ -109,15 +105,24 @@ const GEMINI_RULES = {
     imageCostTokens: ~~(v8k / MAX_TIRE * 258), maxAudioLength: hour(8.4),
     maxAudioPerPrompt: 1, maxFileSize: m(20), maxImagePerPrompt: 3000,
     maxImageSize: Infinity, maxUrlSize: gb(2), maxVideoLength: minute(45),
-    maxVideoPerPrompt: 10, vision: true, supportedMimeTypes: [
+    maxVideoPerPrompt: 10, vision: true, hearing: true, tools: true,
+    reasoning: true, supportedMimeTypes: [
         MIME_PNG, MIME_JPEG, MIME_MOV, MIME_MPEG, MIME_MP4, MIME_MPG, MIME_AVI,
         MIME_WMV, MIME_MPEGPS, MIME_FLV, MIME_PDF, MIME_AAC, MIME_FLAC,
         MIME_MP3, MIME_MPEGA, MIME_M4A, MIME_MPGA, MIME_OPUS, MIME_PCM,
-        MIME_WAV, MIME_WEBM, MIME_TGPP,
-    ], supportedAudioTypes: [MIME_WAV, MIME_OGG, MIME_OPUS],
-    // audio: 'gemini-2.5-flash-exp-native-audio-thinking-dialog',
-    // gemini-2.5-flash-preview-native-audio-dialog
-    defaultProvider: OPENROUTER,
+        MIME_WAV, MIME_WEBM, MIME_TGPP, MIME_OGG,
+    ], defaultProvider: OPENROUTER,
+};
+const OPENAI_RULES = {
+    source: S_OPENAI, icon: '⚛️',
+    contextWindow: kT(400), maxOutputTokens: k(128),
+    imageCostTokens: ~~(OPENAI_HI_RES_SIZE / MAX_TIRE * 140 + 70),
+    maxFileSize: m(50), maxImageSize: OPENAI_HI_RES_SIZE,
+    json: true, tools: true, vision: true, hearing: true, reasoning: true,
+    supportedMimeTypes: [
+        MIME_PNG, MIME_JPEG, MIME_GIF, MIME_WEBP, MIME_PDF, MIME_WAV
+    ], defaultProvider: OPENROUTER,
 };
 const DEEPSEEK_32_RULES = {
@@ -132,8 +137,7 @@ const MODELS = {
     // fast and balanced models
     [GEMINI_25_FLASH]: {
         ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
-        fast: true, reasoning: true, tools: true,
-        json: false, // issue with json output via OpenRouter
+        fast: true, json: false, // issue with json output via OpenRouter
         // https://gemini.google.com/app/c680748b3307790b
     },
     // strong and fast
@@ -141,25 +145,27 @@ const MODELS = {
     // stronger but slow
     [GEMINI_30_PRO]: {
         ...GEMINI_RULES, contextWindow: m(1), maxOutputTokens: k(64),
-        reasoning: true, tools: true,
     },
-    // models with unique capabilities
+    // models with generation capabilities
     [GEMINI_30_PRO_IMAGE]: {
         ...GEMINI_RULES, icon: '🍌', label: 'Nano Banana Pro',
-        contextWindow: k(64), maxOutputTokens: k(32),
-        fast: true, image: true,
+        contextWindow: k(64), maxOutputTokens: k(32), image: true,
     },
-    [GPT_51_CODEX]: { ...OPENAI_RULES },
-    [GPT_5_IMAGE]: { ...OPENAI_RULES, image: true },
-    [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
-        icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
-        maxOutputTokens: Infinity, imageCostTokens: 0, maxImageSize: Infinity,
-        supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_TEXT, MIME_WEBP, MIME_PDF],
-        reasoning: true, json: true, vision: true,
-        deepsearch: true, defaultProvider: JINA,
+    [IMAGEN_4_ULTRA]: {
+        source: S_GOOGLE, maxInputTokens: 480,
+        image: true, defaultProvider: GOOGLE,
     },
-    [DEEPSEEK_32]: DEEPSEEK_32_RULES,
-    [SF_DEEPSEEK_32]: { ...DEEPSEEK_32_RULES, defaultProvider: SILICONFLOW },
+    [VEO_31]: {
+        source: S_GOOGLE, maxInputTokens: 1024,
+        imageCostTokens: 0, maxImagePerPrompt: 1,
+        maxImageSize: Infinity, vision: true, video: true,
+        supportedMimeTypes: [MIME_PNG, MIME_JPEG], defaultProvider: GOOGLE,
+    },
+    [GPT_5_IMAGE]: {
+        ...OPENAI_RULES, icon: '🎨', label: 'gpt-image-1', image: true,
+    },
+    // models with code capabilities
+    [GPT_51_CODEX]: { ...OPENAI_RULES },
     [CLOUD_OPUS_45]: {
         source: S_ANTHROPIC, icon: '✳️',
         contextWindow: kT(200), maxOutputTokens: kT(64),
@@ -170,6 +176,35 @@ const MODELS = {
         json: true, reasoning: true, tools: true, vision: true,
         defaultProvider: OPENROUTER,
     },
+    // tts/stt models
+    [GEMINI_25_FLASH_TTS]: {
+        source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
+        audio: true, fast: true, defaultProvider: GOOGLE,
+    },
+    [GEMINI_25_PRO_TTS]: {
+        source: S_GOOGLE, maxInputTokens: kT(32), func: 'generateAudio',
+        audio: true, defaultProvider: GOOGLE,
+    },
+    [GPT_4O_MIMI_TTS]: {
+        source: S_OPENAI, maxInputTokens: kT(2), func: 'generateAudio',
+        audio: true, fast: true, defaultProvider: OPENAI,
+    },
+    [GPT_4O_TRANSCRIBE]: {
+        source: S_OPENAI, maxInputTokens: 0,
+        func: 'transcribeAudio', hearing: true, fast: true,
+        defaultProvider: OPENAI,
+    },
+    // models with deepsearch capabilities
+    [JINA_DEEPSEARCH]: { // @todo: parse more details from results, eg: "reed urls".
+        icon: '✴️', contextWindow: Infinity, maxInputTokens: Infinity,
+        maxOutputTokens: Infinity, imageCostTokens: 0, maxImageSize: Infinity,
+        supportedMimeTypes: [MIME_PNG, MIME_JPEG, MIME_TEXT, MIME_WEBP, MIME_PDF],
+        reasoning: true, json: true, vision: true,
+        deepsearch: true, defaultProvider: JINA,
+    },
+    // best Chinese models
+    [DEEPSEEK_32]: DEEPSEEK_32_RULES,
+    [SF_DEEPSEEK_32]: { ...DEEPSEEK_32_RULES, defaultProvider: SILICONFLOW },
     // best local model
     [GEMMA_3_27B]: {
         icon: '❇️', contextWindow: kT(128), maxOutputTokens: k(8),
@@ -249,13 +284,14 @@ const DEFAULT_MODELS = {
 };
 const PROVIDER_ICONS = {
-    [OPENROUTER]: '🔀', [OPENAI]: '⚛️', [JINA]: '✴️', [GEMINI]: '♊️',
+    [OPENROUTER]: '🔀', [OPENAI]: '⚛️', [JINA]: '✴️', [GOOGLE]: '♊️',
     [OLLAMA]: '🦙', [ANTHROPIC]: '✳️', [SILICONFLOW]: '🧬',
 };
 const FEATURE_ICONS = {
-    audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', image: '🎨',
-    json: '📊', reasoning: '🧠', tools: '🧰', vision: '👁️',
+    audio: '📣', deepsearch: '🔍', fast: '⚡️', finetune: '🔧', hearing: '👂',
+    image: '🎨', json: '📊', reasoning: '🧠', tools: '🧰', video: '🎬',
+    vision: '👁️',
 };
 const tokenRatioByWords = Math.min(
@@ -273,7 +309,7 @@ let tokeniser, _tools;
 const unifyProvider = provider => {
     assert(provider = (provider || '').trim(), 'AI provider is required.');
-    for (let type of [OPENROUTER, JINA, OLLAMA, SILICONFLOW]) {
+    for (let type of [OPENROUTER, GOOGLE, OPENAI, JINA, OLLAMA, SILICONFLOW]) {
         if (insensitiveCompare(provider, type)) { return type; }
     }
     throwError(`Invalid AI provider: ${provider}.`);
@@ -380,6 +416,11 @@ const setupAi = ai => {
     });
 };
+const OpenAI = async opts => {
+    const lib = await libOpenAi(opts);
+    return { toFile: lib.toFile, client: new (lib).OpenAI(opts) };
+};
 const init = async (options = {}) => {
     if (options?.debug) {
         (await need('node:util')).inspect.defaultOptions.depth = null;
@@ -406,14 +447,34 @@ const init = async (options = {}) => {
         `Model name or description is required for provider: ${provider}.`);
     _tools || (_tools = await packTools());
     switch (provider) {
+        case GOOGLE:
+            assertApiKey(provider, options);
+            const { GoogleGenAI } = await need('@google/genai');
+            var client = new GoogleGenAI({ vertexai: false, ...options });
+            for (let model of models) {
+                setupAi({
+                    provider, model, client, prompt: promptGoogle, priority,
+                });
+            }
+            break;
+        case OPENAI:
+            assertApiKey(provider, options);
+            var { client, toFile } = await OpenAI({ ...options });
+            for (let model of models) {
+                setupAi({
+                    provider, model, client, toFile,
+                    prompt: promptOpenAI, priority,
+                });
+            }
+            break;
         case JINA:
             assertApiKey(provider, options);
-            var client = await OpenAI({
+            var { client } = await OpenAI({
                 baseURL: 'https://deepsearch.jina.ai/v1/', ...options,
             });
             for (let model of models) {
                 setupAi({
-                    provider, model, client, prompt: promptOpenAI, priority,
+                    provider, model, client, prompt: promptOpenRouter, priority,
                 });
             }
             break;
@@ -426,7 +487,7 @@ const init = async (options = {}) => {
             });
             for (let model of models) {
                 setupAi({
-                    provider, model, client, prompt: promptOpenAI, priority,
+                    provider, model, client, prompt: promptOpenRouter, priority,
                 });
                 ignoreErrFunc(async () => {
                     phLog(await (await fetch(`${baseURL}completions`, {
@@ -444,17 +505,19 @@ const init = async (options = {}) => {
             });
             for (let model of models) {
                 setupAi({
-                    provider, model, client, prompt: promptOpenAI, priority,
+                    provider, model, client, prompt: promptOpenRouter, priority,
                 });
             }
             break;
         default:
             assertApiKey(provider, options);
-            var client = await OpenAI({ baseURL: OPENROUTER_API, ...options || {} });
+            var { client } = await OpenAI({
+                baseURL: OPENROUTER_API, ...options || {},
+            });
             for (let model of models) {
                 setupAi({
                     provider: OPENROUTER || provider, model, client,
-                    prompt: promptOpenAI, priority,
+                    prompt: promptOpenRouter, priority,
                 });
             }
     }
@@ -588,7 +651,9 @@ const listOpenAIModels = async (aiId, options) => {
 };
 const streamResp = async (resp, options) => {
-    const msg = await packResp(resp, { ...options, processing: true });
+    const msg = options?.noPack ? resp : await packResp(
+        resp, { ...options, processing: true }
+    );
     return options?.stream
         && (msg?.text || msg?.audio?.length || msg?.images?.length)
         && await ignoreErrFunc(async () => await options.stream(msg), LOG);
@@ -606,13 +671,13 @@ const packResp = async (resp, options) => {
     if (options?.raw) { return resp; }
     let [
         txt, audio, images, annotations, simpleText, annotationsMarkdown, end,
-        json, audioMimeType, catched,
+        json, audioMimeType,
     ] = [
             resp.text || '',                                                    // ChatGPT / Claude / Gemini / Ollama
             resp?.audio?.data,                                                  // ChatGPT audio mode
             resp?.images || [],                                                 // Gemini images via Openrouter
             resp?.references,                                                   // Gemini references
-            '', '', '', null, MIME_PCM16, new Set(),
+            '', '', '', null, MIME_PCM16,
         ];
     simpleText = txt;
     while ((end = getInfoEnd(simpleText))) {
@@ -698,18 +763,23 @@ const packResp = async (resp, options) => {
         ...annotationsMarkdown ? { annotationsMarkdown } : {},
         ...audio ? { audio } : {}, ...images?.length ? { images } : {},
         processing: !!options?.processing,
-        model: [
+        model: packModelLabel([
             options.provider, options?.router?.provider,
             options?.router?.model || options?.model,
-        ].join('/').split('/').map(x => {
-            const key = ensureString(x, { case: 'UP' });
-            if (catched.has(key)) { return null; }
-            catched.add(key);
-            return x;
-        }).filter(x => x).join('/'),
+        ]),
     };
 };
+const packModelLabel = (model_reference) => {
+    const catched = new Set();
+    return model_reference.join('/').split('/').map(x => {
+        const key = ensureString(x, { case: 'UP' });
+        if (catched.has(key)) { return null; }
+        catched.add(key);
+        return x;
+    }).filter(x => x).join('/');
+};
 const buildPrompts = async (model, input, options = {}) => {
     assert(!(
         options.jsonMode && !model?.json
@@ -720,9 +790,23 @@ const buildPrompts = async (model, input, options = {}) => {
     let [history, content, prompt, _model, _assistant, _history]
         = [null, input, null, { role: MODEL }, { role: assistant }, null];
     options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
-    options.attachments = (
+    options.attachments = (await Promise.all((
         options.attachments?.length ? options.attachments : []
-    ).filter(x => [
+    ).map(async x => {
+        if (String.isString(x)) {
+            var convResp = await convert(x, { input: FILE, expected: DATAURL, meta: true });
+            return {
+                url: convResp.content,
+                mime_type: convResp.mime,
+            }
+        } else if (Buffer.isBuffer(x)) {
+            var convResp = await convert(x, { input: BUFFER, expected: DATAURL, meta: true });
+            return {
+                url: convResp.content,
+                mime_type: convResp.mime,
+            }
+        } else if (Object.isObject(x)) { return x; } else { return null; }
+    }))).filter(x => x && [
         ...model?.supportedMimeTypes,
         ...model?.supportedDocTypes,
         ...model?.supportedAudioTypes,
@@ -819,7 +903,7 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
     `⚠️ Tools recursion limit reached: ${MAX_TOOL_RECURSION}`
 ] : []].map(x => x.trim()).join('\n\n');
-const promptOpenAI = async (aiId, content, options = {}) => {
+const promptOpenRouter = async (aiId, content, options = {}) => {
     let { provider, client, model } = await getAi(aiId);
     let [
         result, resultAudio, resultImages, resultReasoning, event, resultTools,
@@ -847,6 +931,18 @@ const promptOpenAI = async (aiId, content, options = {}) => {
         x => x.function.name === 'searchWeb'
     ) && !options.jsonMode ? ONLINE : '';
     const targetModel = `${isOpenrouter(provider, model) ? `${source}/` : ''}${options.model}${ext}`;
+    if (provider === OPENAI) {
+        // need more debug, currently openrouter is priority
+        packedTools.push(...[
+            // https://platform.openai.com/docs/guides/tools?tool-type=web-search
+            { type: 'web_search', },
+            // https://platform.openai.com/docs/guides/tools-image-generation?lang=javascript
+            // https://platform.openai.com/docs/api-reference/responses/create#responses-create-tools
+            { type: 'image_generation', input_fidelity: 'high', partial_images: 3, quality: 'high', size: '1536x1024' },
+            // https://platform.openai.com/docs/guides/tools-code-interpreter
+            { type: 'code_interpreter', container: { type: 'auto', memory_limit: '8g' } },
+        ]);
+    }
     if (source === S_GOOGLE) {
         packedTools.push(...[
             { googleSearch: {} }, { codeExecution: {} }, { urlContext: {} },
@@ -958,7 +1054,7 @@ const promptOpenAI = async (aiId, content, options = {}) => {
         = await handleToolsCall(event, { ...options, result });
     if (toolsResult.length
         && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
-        return promptOpenAI(aiId, content, {
+        return promptOpenRouter(aiId, content, {
             ...options, toolsResult, result: toolsResponse,
         });
     }
@@ -966,6 +1062,181 @@ const promptOpenAI = async (aiId, content, options = {}) => {
     return await packResp(event, options);
 };
+const promptGoogle = async (aiId, prompt, options = {}) => {
+    let { provider, client, model } = await getAi(aiId);
+    const target_model = options?.model || model.name;
+    const M = MODELS[target_model];
+    prompt = ensureString(prompt, { trim: true });
+    assert(prompt.length, 'Prompt is required.');
+    M.tts && (prompt = `${options?.prompt || TTS_PROMPT}: ${prompt}`);
+    assert(await countTokens(prompt, { fast: true })
+        <= M.maxInputTokens,
+        `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
+    );
+    if (M?.image) {
+        var resp = await client.models.generateImages({
+            model: M.name, prompt, config: mergeAtoB(options?.config, {
+                numberOfImages: options?.n || 4, sampleImageSize: '2K',
+                includeRaiReason: true,
+                // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
+                aspectRatio: '16:9', personGeneration: 'allow_adult',
+            }),
+        });
+        var generated = resp?.generatedImages;
+        assert(!resp?.error && generated?.filter(
+            x => !x.raiFilteredReason
+        ).length, resp?.error?.message || generated?.find(
+            x => x.raiFilteredReason
+        )?.raiFilteredReason || ERROR_GENERATING);
+        if (!options?.raw) {
+            resp = {
+                text: '', images: await Promise.all((
+                    resp?.generatedImages || []
+                ).map(async x => ({
+                    data: await convert(x.image.imageBytes, {
+                        input: BASE64, suffix: 'png', ...options || {}
+                    }), mimeType: x.image.mimeType,
+                }))), model: packModelLabel([provider, M.source, M.name]),
+            }
+        }
+    } else if (M?.video) {
+        var resp = await client.models.generateVideos({
+            model: M.name, prompt, config: mergeAtoB(options?.config, {
+                aspectRatio: '16:9', numberOfVideos: 1,
+                // personGeneration: 'allow_adult',
+                enablePromptRewriting: true, addWatermark: false,
+                includeRaiReason: true,
+            }),
+        });
+        assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
+        if (options?.generateRaw) { return resp; }
+        await tryUntil(async () => {
+            resp = await client.operations.getVideosOperation({
+                operation: resp,
+            });
+            assert(
+                resp?.done,
+                `Waiting for Google video generation: ${resp.name}`,
+            );
+        }, { maxTry: 60 * 10, log });
+        assert(!resp?.error && resp?.response?.generatedVideos?.filter(
+            x => !x.raiFilteredReason
+        ).length, resp?.error?.message || resp?.response?.generatedVideos?.find(
+            x => x.raiFilteredReason
+        )?.raiFilteredReason || ERROR_GENERATING);
+        if (options?.videoRaw) {
+            resp = resp?.response?.generatedVideos;
+        } else if (!options?.videoRaw) {
+            resp = {
+                text: '', videos: await Promise.all(resp?.response?.generatedVideos?.filter(
+                    x => x?.video?.uri
+                ).map(async x => {
+                    const downloadPath = `${getTempPath({
+                        seed: x?.video?.uri
+                    })}.mp4`;
+                    // @todo: fix this
+                    // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
+                    await client.files.download({ file: x, downloadPath });
+                    await timeout(1000 * 10); // hack to wait for file to be downloaded
+                    return {
+                        data: await convert(downloadPath, {
+                            input: FILE, suffix: 'mp4', ...options || {}
+                        }), mimeType: MIME_MP4, jobId: resp.name,
+                    };
+                })), model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else if (M?.audio) { // https://ai.google.dev/gemini-api/docs/speech-generation#voices
+        var resp = await client.models.generateContent({
+            model: M.name, contents: prompt,
+            config: mergeAtoB(options?.config, {
+                responseModalities: ['AUDIO'],
+                speechConfig: {
+                    voiceConfig: {
+                        prebuiltVoiceConfig: {
+                            voiceName: options?.voice || 'Zephyr',
+                        },
+                    },
+                },
+            }),
+        });
+        const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
+        assert(rawAudio, ERROR_GENERATING, 500);
+        if (!options?.raw) {
+            resp = {
+                text: '', audio: {
+                    data: await packPcmToWav(rawAudio?.data, {
+                        input: BASE64, suffix: wav, ...options || {},
+                    }), mimeType: MIME_WAV,
+                }, model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else {
+        throwError('Unsupported model.');
+    }
+    // await streamResp(
+    //     { ...resp, processing: true }, { ...options, noPack: true }
+    // );
+    return { ...resp, processing: false };
+};
+const promptOpenAI = async (aiId, prompt, options = {}) => {
+    let { provider, client, toFile, model } = await getAi(aiId);
+    const target_model = options?.model || model.name;
+    const M = MODELS[target_model];
+    prompt = ensureString(prompt, { trim: true });
+    if (M?.audio) {
+        assert(prompt.length, 'Prompt is required.');
+        const ins_prompt = options?.prompt || `${TTS_PROMPT}.`;
+        assert(await countTokens(
+            JSON.stringify([ins_prompt, prompt]), { fast: true }
+        ) <= M.maxInputTokens,
+            `Prompt must be less than ${M.maxInputTokens} tokens.`, 400
+        );
+        // https://platform.openai.com/docs/api-reference/audio/createSpeech
+        var resp = await client.audio.speech.create({
+            model: M.name, voice: DEFAULT_MODELS[OPENAI_VOICE],
+            instructions: ins_prompt, response_format: 'opus',
+            input: prompt, ...options?.params || {},
+        });
+        if (!options?.raw) {
+            resp = {
+                text: '', audio: {
+                    data: await convert(Buffer.from(
+                        await resp.arrayBuffer()
+                    ), { suffix: OGG_EXT, ...options || {} }),
+                    mimeType: MIME_OGG,
+                }, model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else if (M?.hearing) {
+        const audio = options?.attachments?.[0]?.data || options?.attachments?.[0];
+        assert(audio, 'Audio attachment is required.');
+        const input = ensureString(options?.input, { case: 'UP' });
+        const { content, cleanup } = await convert(audio, {
+            input: options?.input, ...options || {}, expected: STREAM, INVALID_AUDIO,
+            suffix: ['', BUFFER].includes(input) ? OGG_EXT : null,
+            withCleanupFunc: true,
+        });
+        var resp = await client.audio.transcriptions.create({
+            file: await toFile(content), model: M.name,
+            response_format: 'text', ...options?.params || {},
+        });
+        await cleanup();
+        if (!options?.raw) {
+            resp = {
+                text: resp.trim(),
+                model: packModelLabel([provider, M.source, M.name]),
+            };
+        }
+    } else {
+        throwError('Unsupported model.');
+    }
+    // await streamResp(
+    //     { ...resp, processing: true }, { ...options, noPack: true }
+    // );
+    return { ...resp, processing: false };
+};
 const initChat = async (options = {}) => {
     if (options.sessions) {
@@ -1078,7 +1349,6 @@ const distillFile = async (attachments, o) => {
             const buf = await convert(attachments[i], { expected: BUFFER, ...o || {} });
             return {
                 url: await convert(buf, { input: BUFFER, expected: DATAURL, ...o || {} }),
-                data: base64Encode(buf, true),
                 mime_type: extract(await fileTypeFromBuffer(buf), 'mime') || MIME_BINARY,
             };
         })();
@@ -1159,13 +1429,18 @@ export {
     DEFAULT_MODELS,
     FEATURE_ICONS,
     FUNCTION,
+    GEMINI_25_FLASH_TTS,
     GEMINI_25_FLASH,
+    GEMINI_25_PRO_TTS,
     GEMINI_30_PRO_IMAGE,
+    GPT_5_IMAGE,
     GPT_51,
+    IMAGEN_4_ULTRA,
     INSTRUCTIONS,
     MODELS,
     OPENAI_VOICE,
     RETRIEVAL,
+    VEO_31,
     analyzeSessions,
     countTokens,
     distillFile,
@@ -1178,7 +1453,7 @@ export {
     k,
     listOpenAIModels,
     prompt,
-    promptOpenAI,
+    promptOpenRouter,
     resetSession,
     talk,
     trimPrompt,

package/lib/manifest.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 const manifest = {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "2000.3.26",
+    "version": "2000.3.28",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",