npm - utilitas - Versions diffs - 1995.2.49 → 1995.2.50 - Mend

utilitas 1995.2.49 → 1995.2.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md +2 -2
package/dist/utilitas.lite.mjs +1 -1
package/dist/utilitas.lite.mjs.map +1 -1
package/lib/alan.mjs +51 -12
package/lib/bot.mjs +31 -18
package/lib/manifest.mjs +1 -1
package/package.json +1 -1

package/lib/alan.mjs CHANGED Viewed

@@ -18,7 +18,7 @@ const [
     EMBEDDING_GECKO_001, EMBEDDING_GECKO_002, EMBEDDING_GECKO_ML001, MISTRAL,
 ] = [
         'gpt-3.5-turbo', 'gpt-3.5-turbo-1106', 'gpt-4', 'gpt-4-1106',
-        'gpt-4-1106-preview', 'gemini-pro', 'gemini-pro-vision',
+        'gpt-4-vision-preview', 'gemini-pro', 'gemini-pro-vision',
         'text-embedding-ada-002', 'embedding-001', 'textembedding-gecko@001',
         'textembedding-gecko@002', 'textembedding-gecko-multilingual@001',
         'mistral',
@@ -51,6 +51,7 @@ const trimTailing = text => text.replace(/[\.\s]*$/, '');
 const newSessionId = () => createUoid({ type: sessionType });
 const renderText = (t, o) => _renderText(t, { extraCodeBlock: 0, ...o || {} });
 const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
+const buildGeminiParts = (text, atcmt) => [{ text }, ...atcmt ? [atcmt] : []];
 const [png, jpeg, mov, mpeg, mp4, mpg, avi, wmv, mpegps, flv, gif, webp] = [
     'image/png', 'image/jpeg', 'video/mov', 'video/mpeg', 'video/mp4',
     'video/mpg', 'video/avi', 'video/wmv', 'video/mpegps', 'video/flv',
@@ -160,6 +161,7 @@ const MODELS = {
 for (const n in MODELS) {
     MODELS[n]['name'] = n;
     if ([TEXT_EMBEDDING_ADA_002].includes(n)) { continue; }
+    MODELS[n].supportedMimeTypes = MODELS[n].supportedMimeTypes || [];
     MODELS[n].maxOutputTokens = MODELS[n].maxOutputTokens
         || Math.ceil(MODELS[n].contextWindow * 0.4);
     MODELS[n].maxInputTokens = MODELS[n].maxInputTokens
@@ -265,19 +267,35 @@ const countTokens = input => tokenSafe((
 const buildGptMessage = (content, options) => {
     assert(content, 'Content is required.');
+    const attachments = (options?.attachments || []).map(x => ({
+        type: 'image_url', image_url: x.image_url
+    }));
     return String.isString(content) ? {
-        role: options?.role || user, content
+        role: options?.role || user,
+        content: [{ type: 'text', text: content }, ...attachments],
     } : content;
 };
 const buildVertexMessage = (text, options) => {
     assert(text, 'Text is required.');
+    // only 1 attachment is allowed while using inline_data:
+    const attachment = (options?.attachments || []).map(x => ({
+        inline_data: { mime_type: x.mime_type, data: x.data }
+    }))?.[0];
     return String.isString(text) ? {
-        role: options?.role || user, parts: [{ text }]
+        role: options?.role || user, parts: buildGeminiParts(text, attachment),
     } : text;
 };
-const buildGeminiMessage = text => String.isString(text) ? [{ text }] : text;
+const buildGeminiMessage = (text, options) => {
+    assert(text, 'Text is required.');
+    // @todo: check this issue similar to Vertex AI:
+    // only 1 attachment is allowed while using inline_data?
+    const attachment = (options?.attachments || []).map(x => ({
+        inlineData: { mimeType: x.mime_type, data: x.data }
+    }))?.[0];
+    return String.isString(text) ? buildGeminiParts(text, attachment) : text;
+};
 const [getOpenAIClient, getVertexClient, getGeminiClient, getOllamaClient]
     = [OPENAI, VERTEX, GEMINI, OLLAMA].map(
@@ -293,7 +311,7 @@ const listOpenAIModels = async (options) => {
 const packGptResp = (resp, options) => {
     if (options?.raw) { return resp; }
     else if (options?.simple) { return resp.choices[0].message.content; }
-    return packResp(resp.choices[0].message.content);
+    return packResp(resp?.choices?.[0]?.message?.content || '');
 };
 const promptChatGPT = async (content, options) => {
@@ -301,7 +319,9 @@ const promptChatGPT = async (content, options) => {
     // https://github.com/openai/openai-node?tab=readme-ov-file#streaming-responses
     // https://github.com/openai/openai-node?tab=readme-ov-file#streaming-responses-1
     let [resp, result, chunk] = [await chatGptClient.chat.completions.create({
-        ...messages([...options?.messages || [], buildGptMessage(content)]),
+        ...messages([
+            ...options?.messages || [], buildGptMessage(content, options)
+        ]),
         model: options?.model || DEFAULT_MODELS[CHATGPT],
         stream: !!options?.stream,
     }), '', null];
@@ -555,20 +575,29 @@ const handleGeminiResponse = async (resp, options) => {
 const promptVertex = async (content, options) => {
     const { generative } = await getVertexClient(options);
+    // https://cloud.google.com/vertex-ai/docs/generative-ai/model-reference/gemini?hl=en&_ga=2.265647247.-1650899985.1695692196
+    // Google's bug: history is not allowed while using inline_data.
     return await handleGeminiResponse(generative.generateContentStream({
-        contents: [...options?.messages || [], buildVertexMessage(content)],
+        contents: [
+            ...options?.messages && !options?.attachments?.length
+                ? options.messages : [],
+            buildVertexMessage(content, options),
+        ],
     }), options);
 };
 const promptGemini = async (content, options) => {
     const { generative } = await getGeminiClient(options);
     // https://github.com/google/generative-ai-js/blob/main/samples/node/advanced-chat.js
+    // @todo: check this issue similar to Vertex AI:
+    // Google's bug: history is not allowed while using inline_data?
     const chat = generative.startChat({
-        history: options?.messages || [],
+        history: options?.messages && !options?.attachments?.length
+            ? options.messages : [],
         generationConfig: { ...options?.generationConfig || {} },
     });
     return handleGeminiResponse(chat.sendMessageStream(
-        buildGeminiMessage(content)
+        buildGeminiMessage(content, options),
     ), options);
 };
@@ -813,10 +842,16 @@ const talk = async (input, options) => {
         }
     }
     const chat = { request: input };
+    const attachments = [];
+    (options?.attachments || []).filter(
+        x => _MODEL.supportedMimeTypes[x.mime_type]
+    ).map(attachments.push);
     log(`Prompt: ${JSON.stringify(input)}`);
     switch (engine) {
         case CHATGPT:
-            resp = await promptChatGPT(input, { messages, model, ...options });
+            resp = await promptChatGPT(input, {
+                messages, attachments, model, ...options,
+            });
             break;
         case ASSISTANT:
             resp = await promptAssistant(input, {
@@ -826,10 +861,14 @@ const talk = async (input, options) => {
             session.threadId = resp.thread.id;
             break;
         case GEMINI:
-            resp = await promptGemini(input, { messages, ...options });
+            resp = await promptGemini(input, {
+                messages, attachments, ...options,
+            });
             break;
         case VERTEX:
-            resp = await promptVertex(input, { messages, ...options });
+            resp = await promptVertex(input, {
+                messages, attachments, ...options,
+            });
             break;
         case OLLAMA:
             resp = await promptOllama(input, { messages, model, ...options });

package/lib/bot.mjs CHANGED Viewed

@@ -6,6 +6,7 @@ import {
     prettyJson, splitArgs, timeout, trim, which,
 } from './utilitas.mjs';
+import { base64Encode } from './utilitas.mjs';
 import { distill } from './web.mjs';
 import { fakeUuid } from './uoid.mjs';
 import { get } from './shot.mjs';
@@ -29,6 +30,7 @@ const oList = arr => lines(arr.map((v, k) => `${k + 1}. ${v}`));
 const map = obj => uList(Object.entries(obj).map(([k, v]) => `${k}: ${v}`));
 const isMarkdownError = e => e?.description?.includes?.("can't parse entities");
 const sendMd = (cId, cnt, opt) => send(cId, cnt, { parse_mode, ...opt || {} });
+const getFile = async (id, op) => (await get(await getFileUrl(id), op)).content;
 const [ // https://limits.tginfo.me/en
     BOT_SEND, provider, HELLO, GROUP, PRIVATE, CHANNEL, MENTION, CALLBACK_LIMIT,
@@ -88,13 +90,11 @@ const getExtra = (ctx, options) => {
     return resp;
 };
-const getFile = async (file_id, options) => {
+const getFileUrl = async (file_id) => {
     assert(file_id, 'File ID is required.', 400);
     const file = await (await init()).telegram.getFile(file_id);
     assert(file.file_path, 'Error getting file info.', 500);
-    return (await get(
-        `${API_ROOT}file/bot${bot.token}/${file.file_path}`, options
-    )).content;
+    return `${API_ROOT}file/bot${bot.token}/${file.file_path}`;
 };
 const officeParser = async file => await ignoreErrFunc(
@@ -431,7 +431,7 @@ const subconscious = [{
     },
 }, {
     run: true, priority: -8870, name: 'vision', func: async (ctx, next) => {
-        let fileId, type, file_name, mime_type, ocrFunc;
+        let fileId, type, file_name, mime_type, ocrFunc, asPrompt = false;
         if ('application/pdf' === ctx.msg.document?.mime_type) {
             ocrFunc = ctx._.vision?.read;
             fileId = ctx.msg.document.file_id;
@@ -439,6 +439,7 @@ const subconscious = [{
             mime_type = ctx.msg.document.mime_type;
             type = 'DOCUMENT';
         } else if (/^image\/.*$/ig.test(ctx.msg.document?.mime_type)) {
+            asPrompt = bot._.supportedMimeTypes.has(ctx.msg.document.mime_type);
             ocrFunc = ctx._.vision?.see;
             fileId = ctx.msg.document.file_id;
             file_name = ctx.msg.document.file_name;
@@ -457,27 +458,38 @@ const subconscious = [{
             mime_type = ctx.msg.document.mime_type;
             type = 'FILE';
         } else if (ctx.msg.photo) {
+            asPrompt = true;
             ocrFunc = ctx._.vision?.see;
             fileId = ctx.msg.photo[ctx.msg.photo.length - 1]?.file_id;
             mime_type = 'image';
             type = 'PHOTO';
         }
-        if (fileId && ocrFunc) {
+        if (fileId && (asPrompt || ocrFunc)) {
             await ctx.ok(EMOJI_LOOK);
             try {
-                const file = await getFile(fileId, BUFFER_ENCODE);
-                const content = trim(ensureArray(
-                    await ignoreErrFunc(async () => await ocrFunc(
-                        file, BUFFER_ENCODE
-                    ), logOptions)
-                ).filter(x => x).join('\n'));
-                if (content) {
+                const image_url = await getFileUrl(fileId);
+                const file = (await get(image_url, BUFFER_ENCODE)).content;
+                if (asPrompt) {
                     ctx.collect(ctx.msg.caption || '');
-                    content && ctx.collect(lines([
-                        '---', ...file_name ? [`file_name: ${file_name}`] : [],
-                        `mime_type: ${mime_type}`, `type: ${type}`, '---',
-                        content
-                    ]), 'VISION');
+                    ctx.collect({
+                        mime_type: mime_type === 'image' ? 'image/jpeg' : mime_type,
+                        image_url, data: base64Encode(file, true),
+                    }, 'PROMPT');
+                }
+                if (ocrFunc) {
+                    const content = trim(ensureArray(
+                        await ignoreErrFunc(async () => await ocrFunc(
+                            file, BUFFER_ENCODE
+                        ), logOptions)
+                    ).filter(x => x).join('\n'));
+                    if (content) {
+                        ctx.collect(ctx.msg.caption || '');
+                        ctx.collect(lines([
+                            '---', ...file_name ? [`file_name: ${file_name}`] : [],
+                            `mime_type: ${mime_type}`, `type: ${type}`, '---',
+                            content
+                        ]), 'VISION');
+                    }
                 }
             } catch (err) { return await ctx.er(err); }
         }
@@ -705,6 +717,7 @@ const init = async (options) => {
                 skills: { ...options?.skills || {} },
                 speech: options?.speech,
                 vision: options?.vision,
+                supportedMimeTypes: options?.supportedMimeTypes || [],
             };
             (!options?.session?.get || !options?.session?.set)
                 && log(`WARNING: Sessions persistence is not enabled.`);

package/lib/manifest.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 const manifest = {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "1995.2.49",
+    "version": "1995.2.50",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "1995.2.49",
+    "version": "1995.2.50",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",