npm - utilitas - Versions diffs - 1998.2.38 → 1998.2.40 - Mend

utilitas 1998.2.38 → 1998.2.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/utilitas.lite.mjs +1 -1
package/dist/utilitas.lite.mjs.map +1 -1
package/lib/alan.mjs +260 -277
package/lib/manifest.mjs +1 -1
package/package.json +1 -1

package/lib/alan.mjs CHANGED Viewed

@@ -101,7 +101,6 @@ const clients = {};
 const size8k = 7680 * 4320;
 const MAX_TOOL_RECURSION = 10;
 const LOG = { log: true };
-const OPENAI_BASE_URL = 'https://api.openai.com/v1';
 const sessionType = `${name.toUpperCase()}-SESSION`;
 const unifyProvider = options => unifyType(options?.provider, 'AI provider');
 const unifyEngine = options => unifyType(options?.engine, 'AI engine');
@@ -111,7 +110,6 @@ const renderText = (t, o) => _renderText(t, { extraCodeBlock: 0, ...o || {} });
 const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
 const CONTENT_IS_REQUIRED = 'Content is required.';
 const assertContent = content => assert(content.length, CONTENT_IS_REQUIRED);
-const packThink = thk => thk ? [`${THINK_STR}\n${thk}\n${THINK_END}`] : [];
 const countToolCalls = r => r?.split('\n').filter(x => x === TOOLS_STR).length;
 const DEFAULT_MODELS = {
@@ -498,26 +496,7 @@ const init = async (options) => {
             if (options?.apiKey) {
                 const { GoogleGenerativeAI } = await need('@google/generative-ai');
                 const genAi = new GoogleGenerativeAI(options.apiKey);
-                const genModel = options?.model || DEFAULT_MODELS[GEMINI];
-                clients[provider] = {
-                    generative: genAi.getGenerativeModel({
-                        model: genModel,
-                        systemInstruction: { role: system, parts: [{ text: INSTRUCTIONS }] },
-                        ...MODELS[genModel]?.tools ? (options?.tools ?? {
-                            tools: [
-                                // @todo: Gemini will failed when using these tools together.
-                                // https://ai.google.dev/gemini-api/docs/function-calling
-                                // { codeExecution: {} },
-                                // { googleSearch: {} },
-                                { functionDeclarations: toolsGemini.map(x => x.def) },
-                            ],
-                            toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
-                        }) : {},
-                    }),
-                    embedding: genAi.getGenerativeModel({
-                        model: DEFAULT_MODELS[GEMINI_EMEDDING],
-                    }), genModel,
-                };
+                clients[provider] = { client: genAi };
             }
             break;
         case CLAUDE:
@@ -623,7 +602,7 @@ const buildGeminiParts = (text, attachments) => {
 const buildGeminiMessage = (content, options) => {
     content = content || '';
-    const attachments = (options?.attachments || []).map(x => ({
+    const attachments = (options?.attachments?.length ? options.attachments : []).map(x => ({
         inlineData: { mimeType: x.mime_type, data: x.data }
     }));
     return String.isString(content) ? (options?.history ? {
@@ -634,7 +613,7 @@ const buildGeminiMessage = (content, options) => {
 const buildClaudeMessage = (text, options) => {
     assert(text, 'Text is required.');
-    const attachments = (options?.attachments || []).map(x => {
+    const attachments = (options?.attachments?.length ? options?.attachments : []).map(x => {
         let type = '';
         if ([pdf].includes(x.mime_type)) {
             type = 'document';
@@ -669,9 +648,38 @@ const listOpenAIModels = async (options) => {
     return options?.raw ? resp : resp.data;
 };
+const streamResp = async (resp, options) => {
+    const msg = await packResp(resp, { ...options, processing: true });
+    return options?.stream && (msg?.text || msg?.audio?.length)
+        && await ignoreErrFunc(async () => await options.stream(msg), LOG);
+};
+const getInfoEnd = text => Math.max(...[THINK_END, TOOLS_END].map(x => {
+    const keyEnd = text.indexOf(`${x}\n`);
+    return keyEnd >= 0 ? (keyEnd + x.length) : 0;
+}));
 const packResp = async (resp, options) => {
-    let { text: txt, audio, references }
-        = String.isString(resp) ? { text: resp } : resp;
+    if (options?.raw) { return resp; }
+    let [
+        txt, audio, references, markdown, simpleText, referencesMarkdown, end,
+        json
+    ] = [
+            resp.text                                                           // ChatGPT / Claude / Gemini
+            || resp?.message?.content || '',                                    // Ollama @tudo: Need to be updated
+            resp?.audio?.data,                                                  // ChatGPT audio mode
+            resp?.references,                                                   // Gemini references
+            '', '', '', null,
+        ];
+    markdown = simpleText = txt;
+    while ((end = getInfoEnd(simpleText))) {
+        simpleText = simpleText.slice(end).trim();
+        end = getInfoEnd(simpleText);
+    }
+    [THINK_STR, TOOLS_STR].map(x => {
+        const str = simpleText.indexOf(x);
+        str >= 0 && (simpleText = simpleText.slice(0, str).trim());
+    });
     audio && (audio = Buffer.isBuffer(audio) ? audio : await convert(audio, {
         input: BASE64, expected: BUFFER,
     })) && audio.length && (audio = Buffer.concat([
@@ -679,6 +687,14 @@ const packResp = async (resp, options) => {
     ])) && (audio = await convert(audio, {
         input: BUFFER, expected: BUFFER, ...options || {},
     }));
+    options?.jsonMode && !options?.delta && !options?.processing
+        && (json = parseJson(simpleText));
+    if (options?.simple && options?.audioMode) { return audio; }
+    else if (options?.simple && options?.jsonMode) { return json; }
+    else if (options?.simple) { return simpleText; }
+    else if (options?.jsonMode) {
+        markdown = `\`\`\`json\n${simpleText}\n\`\`\``;
+    }
     // references debug codes:
     // references = {
     //     "segments": [
@@ -701,77 +717,127 @@ const packResp = async (resp, options) => {
     //         },
     //     ]
     // };
-    let [richText, referencesMarkdown] = [null, null];
-    if (!options?.jsonMode) {
-        if (!options?.processing
-            && references?.segments?.length && references?.links?.length) {
-            richText = txt;
-            for (let i = references.segments.length - 1; i >= 0; i--) {
-                let idx = richText.indexOf(references.segments[i].text);
-                if (idx < 0) { continue; }
-                idx += references.segments[i].text.length;
-                richText = richText.slice(0, idx)
-                    + references.segments[i].indices.map(y => ` (${y + 1})`).join('')
-                    + richText.slice(idx);
-            }
-            referencesMarkdown = 'References:\n\n' + references.links.map((x, i) => {
-                return `${i + 1}. [${x.title}](${x.uri})`;
-            }).join('\n');
+    if (references?.segments?.length && references?.links?.length) {
+        for (let i = references.segments.length - 1; i >= 0; i--) {
+            let idx = markdown.indexOf(references.segments[i].text);
+            if (idx < 0) { continue; }
+            idx += references.segments[i].text.length;
+            markdown = markdown.slice(0, idx)
+                + references.segments[i].indices.map(y => ` (${y + 1})`).join('')
+                + markdown.slice(idx);
         }
-        let lines = (richText || txt).split('\n');
-        for (let i in lines) {
-            switch (lines[i]) {
-                case THINK_STR:
-                    lines[i] = MD_CODE + THINK;
-                    break;
-                case TOOLS_STR:
-                    lines[i] = MD_CODE + TOOLS;
-                    break;
-                case THINK_END:
-                case TOOLS_END:
-                    lines[i] = MD_CODE;
-            }
+        referencesMarkdown = 'References:\n\n' + references.links.map(
+            (x, i) => `${i + 1}. [${x.title}](${x.uri})`
+        ).join('\n');
+    }
+    markdown = markdown.split('\n');
+    for (let i in markdown) {
+        switch (markdown[i]) {
+            case THINK_STR: markdown[i] = MD_CODE + THINK; break;
+            case TOOLS_STR: markdown[i] = MD_CODE + TOOLS; break;
+            case THINK_END: case TOOLS_END: markdown[i] = MD_CODE;
         }
-        richText = lines.join('\n').trim();
+    }
+    markdown = markdown.join('\n');
+    if (!options?.delta && !options?.processing) {
+        txt = txt.trim();
+        markdown = markdown.trim();
     }
     return {
-        ...text(txt), ...options?.jsonMode && !(
-            options?.delta && options?.processing
-        ) ? { json: parseJson(txt) } : {},
-        ...richText ? { richText } : {},
-        ...references ? { references } : {},
+        ...text(txt), ...options?.jsonMode ? { json } : {},
+        markdown, ...references ? { references } : {},
         ...referencesMarkdown ? { referencesMarkdown } : {},
         ...audio ? { audio, audioMimeType: options?.audioMimeType } : {},
+        processing: options?.processing,
         model: options?.model,
     };
 };
-const streamResp = async (resp, options) => {
-    const msg = await packGptResp(resp, { ...options, processing: true });
-    return options?.stream && (msg?.text || msg?.audio?.length)
-        && await ignoreErrFunc(async () => await options.stream(msg), LOG);
-};
-const packGptResp = async (resp, options) => {
-    // simple mode is not recommended for streaming responses
-    let text = resp.text                                                        // ChatGPT / Claude / Gemini
-        || resp?.message?.content || '';                                        // Ollama
-    const audio = resp?.message?.audio?.data;                                   // ChatGPT audio mode
-    if (options?.raw) { return resp; }
-    else if (options?.simple && options?.jsonMode) { return parseJson(text); }
-    else if (options?.simple && options?.audioMode) { return audio; }
-    else if (options?.simple) {
-        for (const key of [[THINK_STR, THINK_END], [TOOLS_STR, TOOLS_END]]) {
-            const [findStr, findEnd] = key.map(x => text.indexOf(x));
-            if (findStr >= 0 && findEnd >= 0 && findStr < findEnd) {
-                text = text.split('')
-                text.splice(findStr, findEnd + THINK_END.length)
-                text = text.join('').trim();
+const buildPrompts = async (model, input, options = {}) => {
+    assert(!(
+        options.jsonMode && !model?.json
+    ), `This model does not support JSON output: ${options.model}`);
+    assert(!(
+        options.reasoning && !model?.reasoning
+    ), `This model does not support reasoning: ${options.model}`);
+    let [systemPrompt, history, content, prompt, _system, _user, _assistant] = [
+        null, null, input || ATTACHMENTS, null, [], null, // length hack: ATTACHMENTS
+        { role: system }, { role: user }, { role: assistant }
+    ];
+    options.systemPrompt = options.systemPrompt || INSTRUCTIONS;
+    options.attachments = (
+        options.attachments?.length ? options.attachments : []
+    ).filter(x => [
+        ...model?.supportedMimeTypes || [], ...model.supportedAudioTypes || []
+    ].includes(x.mime_type));
+    switch (options.flavor) {
+        case CHATGPT:
+            systemPrompt = buildGptMessage(options.systemPrompt, _system);
+            prompt = buildGptMessage(content, options);
+            break;
+        case CLAUDE:
+            systemPrompt = buildClaudeMessage(options.systemPrompt, _system);
+            prompt = buildClaudeMessage(content, options)
+            break;
+        case OLLAMA:
+            systemPrompt = buildOllamaMessage(options.systemPrompt, _system);
+            prompt = buildOllamaMessage(content, options);
+            break;
+        case GEMINI:
+            systemPrompt = buildGeminiHistory(options.systemPrompt, _system);
+            prompt = options.toolsResult?.[options.toolsResult?.length - 1]?.parts
+                || buildGeminiMessage(content, options)
+            break;
+    }
+    const msgBuilder = () => {
+        history = [];
+        (options.messages?.length ? options.messages : []).map(x => {
+            switch (options.flavor) {
+                case CHATGPT:
+                    history.push(buildGptMessage(x.request, _user));
+                    history.push(buildGptMessage(x.response, _assistant));
+                    break;
+                case CLAUDE:
+                    history.push(buildClaudeMessage(x.request, _user));
+                    history.push(buildClaudeMessage(x.response, _assistant));
+                    break;
+                case OLLAMA:
+                    history.push(buildClaudeMessage(x.request, _user));
+                    history.push(buildClaudeMessage(x.response, _assistant));
+                    break;
+                case GEMINI:
+                    if (options.attachments?.length) { return; }
+                    history.push(buildGeminiHistory(x.request, _user));
+                    history.push(buildGeminiHistory(x.response, { role: MODEL }));
+                    break;
             }
+        });
+        switch (options.flavor) {
+            case CHATGPT: case CLAUDE: case OLLAMA:
+                history.push(prompt, ...options.toolsResult?.length
+                    ? options.toolsResult : []);
+                history = messages(history);
+                break;
+            case GEMINI:
+                history.push(
+                    ...options.toolsResult?.length ? [
+                        buildGeminiHistory(content, { ...options, role: user }),
+                        ...options.toolsResult.slice(0, options.toolsResult.length - 1)
+                    ] : []
+                );
+                break;
         }
-        return text;
-    }
-    return await packResp({ text, audio, references: resp?.references }, options);
+    };
+    msgBuilder();
+    await trimPrompt(() => [systemPrompt, history, prompt], () => {
+        if (options.messages.length) {
+            options.messages.shift();
+            msgBuilder();
+        } else {
+            content = trimTailing(trimTailing(content).slice(0, -1)) + '...';
+        }
+    }, model.maxInputTokens - options.attachments?.length * ATTACHMENT_TOKEN_COST);
+    return { systemPrompt, history, prompt };
 };
 const handleToolsCall = async (msg, options) => {
@@ -867,43 +933,32 @@ const mergeMsgs = (resp, calls) => [resp, ...calls.length ? [
 ] : []].map(x => x.trim()).join('\n\n');
 const promptChatGPT = async (content, options = {}) => {
-    const { client } = await getOpenAIClient(options);
-    // https://github.com/openai/openai-node?tab=readme-ov-file#streaming-responses
-    // custom api endpoint not supported vision apis @todo by @Leask
-    // Structured Outputs: https://openai.com/index/introducing-structured-outputs-in-the-api/
-    client.baseURL !== OPENAI_BASE_URL
-        && options?.attachments?.length && (options.attachments = []);
-    if (options?.model) { } else if (options?.provider === AZURE) {
+    if (options.model) { } else if (options.provider === AZURE) {
         options.model = DEFAULT_MODELS[AZURE];
-    } else if (options?.reasoning) {
+    } else if (options.reasoning) {
         options.model = DEFAULT_MODELS[CHATGPT_REASONING];
     } else {
         options.model = DEFAULT_MODELS[CHATGPT];
     }
-    options?.reasoning && !options?.reasoning_effort
+    let [_MODEL, result, resultAudio, event, resultTools, responded] = [
+        MODELS[options.model], options?.result ?? '', Buffer.alloc(0), null, [],
+        false
+    ];
+    options.reasoning && !options.reasoning_effort
         && (options.reasoning_effort = GPT_REASONING_EFFORT);
-    const message = buildGptMessage(content, options);
-    const modalities = options?.modalities || (
-        options?.audioMode ? [TEXT, AUDIO] : undefined
-    );
-    assert(!(
-        options?.jsonMode && !MODELS[options.model]?.json
-    ), `This model does not support JSON output: ${options.model}`);
-    assert(!(
-        options?.reasoning && !MODELS[options.model]?.reasoning
-    ), `This model does not support reasoning: ${options.model}`);
+    const { client } = await getOpenAIClient(options);
+    const { history }
+        = await buildPrompts(_MODEL, content, { ...options, flavor: CHATGPT });
+    const modalities = options.modalities
+        || (options.audioMode ? [TEXT, AUDIO] : undefined);
     [options.audioMimeType, options.suffix] = [pcm16, 'pcm.wav'];
-    let [result, resultAudio, event, resultTools, responded]
-        = [options?.result ?? '', Buffer.alloc(0), null, [], false];
     const resp = await client.chat.completions.create({
-        modalities, audio: options?.audio || (
+        modalities, audio: options.audio || (
             modalities?.find?.(x => x === AUDIO)
             && { voice: DEFAULT_MODELS[OPENAI_VOICE], format: 'pcm16' }
-        ), ...messages([
-            ...options?.messages || [], message, ...options?.toolsResult || [],
-        ]), ...MODELS[options.model]?.tools ? {
-            tools: options?.tools ?? tools.map(x => x.def),
-        } : {}, ...options?.jsonMode ? {
+        ), ...history, ..._MODEL?.tools ? {
+            tools: options.tools ?? tools.map(x => x.def),
+        } : {}, ...options.jsonMode ? {
             response_format: { type: JSON_OBJECT }
         } : {}, model: options.model, stream: true,
         store: true, tool_choice: 'auto',
@@ -925,12 +980,13 @@ const promptChatGPT = async (content, options = {}) => {
             x?.function?.name && (curFunc.function.name += x.function.name);
             x?.function?.arguments && (curFunc.function.arguments += x.function.arguments);
         }
-        deltaText && (responded = responded || (deltaText = `\n\n${deltaText}`));
+        options.result && deltaText
+            && (responded = responded || (deltaText = `\n\n${deltaText}`));
         result += deltaText;
         resultAudio = Buffer.concat([resultAudio, deltaAudio]);
-        const respAudio = options?.delta ? deltaAudio : resultAudio;
-        await streamResp({
-            text: options?.delta ? deltaText : result,
+        const respAudio = options.delta ? deltaAudio : resultAudio;
+        (deltaText || deltaAudio?.length) && await streamResp({
+            text: options.delta ? deltaText : result,
             ...respAudio.length ? { audio: { data: respAudio } } : {},
         }, options);
     }
@@ -944,7 +1000,7 @@ const promptChatGPT = async (content, options = {}) => {
         return promptChatGPT(content, { ...options, toolsResult, result: toolsResponse });
     }
     event.text = mergeMsgs(toolsResponse, toolsResult);
-    return await packGptResp(event, options);
+    return await packResp(event, options);
 };
 const promptAzure = async (content, options = {}) =>
@@ -955,61 +1011,56 @@ const promptOllama = async (content, options = {}) => {
     // https://github.com/ollama/ollama-js
     // https://github.com/jmorganca/ollama/blob/main/examples/typescript-simplechat/client.ts
     options.model = options?.model || model;
-    const resp = await client.chat({
-        model: options.model, stream: true,
-        ...messages([...options?.messages || [], buildOllamaMessage(content)]),
-    })
-    let [chunk, result] = [null, ''];
+    let [_MODEL, chunk, result] = [MODELS[options.model], null, ''];
+    const { history: h }
+        = await buildPrompts(_MODEL, content, { ...options, flavor: OLLAMA });
+    const resp = await client.chat({ model: options.model, stream: true, ...h });
     for await (chunk of resp) {
         const delta = chunk.message.content || '';
-        if (delta === '') { continue; }
         result += delta;
-        chunk.message.content = options?.delta ? delta : result;
-        await ignoreErrFunc(async () => await options?.stream?.(
-            await packGptResp(chunk, { ...options || {}, processing: true })
-        ), LOG);
+        delta && await streamResp({
+            text: options.delta ? delta : result,
+        }, options);
     }
-    chunk.message.content = result;
-    return await packGptResp(chunk, options);
+    return await packResp({ text: result }, options);
 };
 const promptClaude = async (content, options = {}) => {
+    options.model = options.model || DEFAULT_MODELS[CLAUDE];
+    let [_MODEL, event, text, thinking, signature, result, thinkEnd, tool_use]
+        = [MODELS[options.model], null, '', '', '', options.result ?? '', '', []];
     const { client } = await getClaudeClient(options);
-    options.model = options?.model || DEFAULT_MODELS[CLAUDE];
+    const { history }
+        = await buildPrompts(_MODEL, content, { ...options, flavor: CLAUDE });
     const resp = await client.messages.create({
-        model: options.model, max_tokens: MODELS[options.model].maxOutputTokens,
-        messages: [
-            ...options?.messages || [], buildClaudeMessage(content, options),
-            ...options?.toolsResult || [],
-        ], stream: true,
-        ...options?.reasoning ?? MODELS[options.model]?.reasoning ? {
-            thinking: options?.thinking || { type: 'enabled', budget_tokens: 1024 },
-        } : {}, // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
-        ...MODELS[options.model]?.tools ? {
-            tools: options?.tools ?? toolsClaude.map(x => x.def),
+        model: options.model, max_tokens: _MODEL.maxOutputTokens, ...history,
+        stream: true, ...options.reasoning ?? _MODEL?.reasoning ? {
+            thinking: options.thinking || { type: 'enabled', budget_tokens: 1024 },
+        } : {}, ..._MODEL?.tools ? {  // https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
+            tools: options.tools ?? toolsClaude.map(x => x.def),
             tool_choice: { type: 'auto' },
         } : {},
     });
-    let [event, text, thinking, signature, result, thinkEnd, tool_use]
-        = [null, '', '', '', options?.result ?? '', '', []];
     for await (const chunk of resp) {
         event = chunk?.content_block || chunk?.delta || {};
-        let [thkDelta, txtDelta] = [event.thinking || '', event.text || ''];
-        text += txtDelta;
-        thinking += thkDelta;
+        let [deltaThink, deltaText] = [event.thinking || '', event.text || ''];
+        text += deltaText;
+        thinking += deltaThink;
         signature = signature || event?.signature || '';
-        thkDelta && thkDelta === thinking
-            && (thkDelta = `${THINK_STR}\n${thkDelta}`);
-        thinking && txtDelta && !thinkEnd
-            && (thinkEnd = thkDelta = `${thkDelta}\n${THINK_END}\n\n`);
+        deltaThink && deltaThink === thinking
+            && (deltaThink = `${THINK_STR}\n${deltaThink}`);
+        thinking && deltaText && !thinkEnd
+            && (thinkEnd = deltaThink = `${deltaThink}\n${THINK_END}\n\n`);
         if (event?.type === 'tool_use') {
             tool_use.push({ ...event, input: '' });
         } else if (event.partial_json) {
             tool_use[tool_use.length - 1].input += event.partial_json;
         }
-        txtDelta = thkDelta + txtDelta;
-        result += txtDelta;
-        await streamResp({ text: options?.delta ? txtDelta : result }, options);
+        deltaText = deltaThink + deltaText;
+        result += deltaText;
+        deltaText && await streamResp({
+            text: options.delta ? deltaText : result,
+        }, options);
     }
     event = {
         role: assistant, content: [
@@ -1022,11 +1073,11 @@ const promptClaude = async (content, options = {}) => {
     );
     if (tool_use.length && countToolCalls(toolsResponse) < MAX_TOOL_RECURSION) {
         return await promptClaude(content, {
-            ...options, toolsResult: [...options?.toolsResult || [],
+            ...options, toolsResult: [...options.toolsResult || [],
             ...toolsResult], result: toolsResponse,
         });
     }
-    return packGptResp({ text: mergeMsgs(toolsResponse, tool_use) }, options);
+    return packResp({ text: mergeMsgs(toolsResponse, tool_use) }, options);
 };
 const uploadFile = async (input, options) => {
@@ -1079,29 +1130,32 @@ const packGeminiReferences = (chunks, supports) => {
 };
 const promptGemini = async (content, options = {}) => {
-    const { generative, genModel } = await getGeminiClient(options);
+    options.model || (options.model = DEFAULT_MODELS[GEMINI]);
+    let [result, references, functionCalls, responded, _MODEL]
+        = [options.result ?? '', null, null, false, MODELS[options.model]];
+    const { client: _client } = await getGeminiClient(options);
+    const { systemPrompt: systemInstruction, history, prompt }
+        = await buildPrompts(_MODEL, content, { ...options, flavor: GEMINI });
+    const client = _client.getGenerativeModel({
+        model: options.model, systemInstruction,
+        ...MODELS[options.model]?.tools && !options.jsonMode ? (
+            options.tools ?? {
+                tools: [
+                    // @todo: Gemini will failed when using these tools together.
+                    // https://ai.google.dev/gemini-api/docs/function-calling
+                    // { codeExecution: {} },
+                    // { googleSearch: {} },
+                    { functionDeclarations: toolsGemini.map(x => x.def) },
+                ],
+                toolConfig: { functionCallingConfig: { mode: 'AUTO' } },
+            }
+        ) : {},
+    });
     // https://github.com/google/generative-ai-js/blob/main/samples/node/advanced-chat.js
     // @todo: check this issue similar to Vertex AI:
     // Google's bug: history is not allowed while using inline_data?
-    assert(!(
-        options?.jsonMode && MODELS[genModel]?.json == false
-    ), `This model does not support JSON output: ${genModel} `);
-    options.model = genModel;
-    const chat = generative.startChat({
-        history: [
-            ...options?.messages && !options?.attachments?.length ? options.messages : [],
-            ...options?.toolsResult ? [
-                buildGeminiMessage(content, { ...options, history: true }),
-                ...options.toolsResult.slice(0, options.toolsResult.length - 1)
-            ] : []
-        ], ...generationConfig(options),
-    });
-    const resp = await chat.sendMessageStream(
-        options?.toolsResult?.[options?.toolsResult?.length]?.parts
-        || buildGeminiMessage(content, options)
-    );
-    let [result, references, functionCalls, responded]
-        = [options?.result ?? '', null, null];
+    const chat = client.startChat({ history, ...generationConfig(options) });
+    const resp = await chat.sendMessageStream(prompt);
     for await (const chunk of resp.stream) {
         functionCalls || (functionCalls = chunk.functionCalls);
         const rfc = packGeminiReferences(
@@ -1110,12 +1164,17 @@ const promptGemini = async (content, options = {}) => {
         );
         rfc && (references = rfc);
         let delta = chunk?.text?.() || '';
-        delta && (responded = responded || (delta = `\n\n${delta}`));
+        options.result && delta
+            && (responded = responded || (delta = `\n\n${delta}`));
         result += delta;
-        await streamResp({ text: options?.delta ? delta : result }, options);
+        delta && await streamResp({
+            text: options.delta ? delta : result,
+        }, options);
     }
     const _resp = await resp.response;
-    functionCalls = (functionCalls() || _resp.functionCalls() || []).map(x => ({ functionCall: x }));
+    functionCalls = (
+        functionCalls() || _resp.functionCalls() || []
+    ).map(x => ({ functionCall: x }));
     const { toolsResult, toolsResponse } = await handleToolsCall(
         { role: MODEL, parts: functionCalls },
         { ...options, result, flavor: GEMINI }
@@ -1126,7 +1185,7 @@ const promptGemini = async (content, options = {}) => {
             ...toolsResult], result: toolsResponse,
         });
     }
-    return await packGptResp({
+    return await packResp({
         text: mergeMsgs(toolsResponse, toolsResult), references,
     }, options);
 };
@@ -1159,9 +1218,9 @@ const createOpenAIEmbedding = async (input, options) => {
 };
 const createGeminiEmbedding = async (input, options) => {
-    const { embedding } = await getGeminiClient(options);
+    const { client } = await getGeminiClient(options);
     const model = options?.model || DEFAULT_MODELS[GEMINI_EMEDDING];
-    const resp = await embedding.embedContent(
+    const resp = await client.getGenerativeModel({ model }).embedContent(
         await checkEmbeddingInput(input, model)
     );
     return options?.raw ? resp : resp?.embedding.values;
@@ -1295,116 +1354,40 @@ const resetSession = async (sessionId, options) => {
 const packResult = resp => {
     const result = {
-        ...resp, richText: resp.richText || resp.text, spoken: renderText(
-            resp.text, { noCode: true, noLink: true }
+        ...resp, spoken: renderText(
+            resp.markdown, { noCode: true, noLink: true }
         ).replace(/\[\^\d\^\]/ig, ''),
     };
-    log(`Response (${result.model}): ${JSON.stringify(result.text)}`);
+    log(`Response (${result.model}): ${JSON.stringify(result.markdown)}`);
     // log(result);
     return result;
 };
 const talk = async (input, options) => {
-    const engine = unifyEngine({
-        engine: Object.keys(chatConfig.engines)?.[0] || DEFAULT_MODELS[CHAT],
-        ...options,
-    });
+    let [engine, chat, resp, sessionId] = [
+        unifyEngine({
+            engine: Object.keys(chatConfig.engines)?.[0] || DEFAULT_MODELS[CHAT],
+            ...options,
+        }), { request: input || ATTACHMENTS }, null,
+        options?.sessionId || newSessionId(),
+    ];
     assert(chatConfig.engines[engine], NOT_INIT);
-    const model = options?.model || chatConfig.engines[engine].model;
-    const _MODEL = MODELS[model];
-    const sessionId = options?.sessionId || newSessionId();
     const session = await getSession(sessionId, { engine, ...options });
-    let [resp, sys, messages, msgBuilder] = [null, [], [], null];
-    switch (engine) {
-        case CHATGPT: case AZURE:
-            sys.push(buildGptMessage(session.systemPrompt, { role: system }));
-            msgBuilder = () => {
-                messages = [];
-                session.messages.map(x => {
-                    messages.push(buildGptMessage(x.request, { role: user }));
-                    messages.push(buildGptMessage(x.response, { role: assistant }));
-                });
-            };
-            msgBuilder()
-            break;
-        case GEMINI:
-            // already set in the while client initialization:
-            // sys.push(buildGeminiHistory(session.systemPrompt, { role: user }));
-            msgBuilder = () => {
-                messages = [];
-                session.messages.map(x => {
-                    messages.push(buildGeminiHistory(x.request, { role: user }));
-                    messages.push(buildGeminiHistory(x.response, { role: MODEL }));
-                });
-            };
-            msgBuilder()
-            break;
-        case CLAUDE:
-            sys.push(buildClaudeMessage(session.systemPrompt, { role: system }));
-            msgBuilder = () => {
-                messages = [];
-                session.messages.map(x => {
-                    messages.push(buildClaudeMessage(x.request, { role: user }));
-                    messages.push(buildClaudeMessage(x.response, { role: assistant }));
-                });
-            };
-            msgBuilder()
-            break;
-        case OLLAMA:
-            sys.push(buildOllamaMessage(session.systemPrompt, { role: system }));
-            msgBuilder = () => {
-                messages = [];
-                session.messages.map(x => {
-                    messages.push(buildOllamaMessage(x.request, { role: user }));
-                    messages.push(buildOllamaMessage(x.response, { role: assistant }));
-                });
-            };
-            msgBuilder()
-            break;
-        default:
-            throwError(`Invalid AI engine: '${engine}'.`);
-    }
-    await trimPrompt(() => [...sys, ...messages, buildGeminiHistory(
-        input || ATTACHMENTS, { role: user } // length hack: ATTACHMENTS
-    )], () => {
-        if (messages.length) {
-            session.messages.shift();
-            msgBuilder && msgBuilder();
-        } else {
-            input = trimTailing(trimTailing(input).slice(0, -1)) + '...';
-        }
-    }, _MODEL.maxInputTokens - options?.attachments?.length * ATTACHMENT_TOKEN_COST);
-    const chat = { request: input || ATTACHMENTS };
-    const attachments = [];
-    (options?.attachments || []).filter(x => [
-        ..._MODEL?.supportedMimeTypes || [], ..._MODEL.supportedAudioTypes || []
-    ].includes(x.mime_type)).map(x => attachments.push(x));
     log(`Prompt (${engine}): ${JSON.stringify(input)}`);
+    const pmtOptions = {
+        messages: session.messages, model: chatConfig.engines[engine].model,
+        ...options,
+    };
     switch (engine) {
-        case CHATGPT:
-            resp = await promptChatGPT(input, {
-                messages, model, ...options, attachments,
-            });
-            break;
-        case GEMINI:
-            resp = await promptGemini(input, {
-                messages, ...options, attachments,
-            });
-            break;
-        case CLAUDE:
-            resp = await promptClaude(input, {
-                messages, model, ...options, attachments,
-            });
-            break;
-        case OLLAMA:
-            resp = await promptOllama(input, { messages, model, ...options });
-            break;
-        case AZURE:
-            resp = await promptAzure(input, { messages, model, ...options });
-            break;
+        case CHATGPT: resp = await promptChatGPT(input, pmtOptions); break;
+        case GEMINI: resp = await promptGemini(input, pmtOptions); break;
+        case CLAUDE: resp = await promptClaude(input, pmtOptions); break;
+        case OLLAMA: resp = await promptOllama(input, pmtOptions); break;
+        case AZURE: resp = await promptAzure(input, pmtOptions); break;
+        default: throwError(`Invalid AI engine: '${engine}'.`);
     }
     chat.response = resp.text;
-    chat?.request && chat?.response && session.messages.push(chat);
+    chat.request && chat.response && session.messages.push(chat);
     await setSession(sessionId, session, options);
     return { sessionId, ...packResult(resp) };
 };