npm - utilitas - Versions diffs - 2000.3.26 → 2000.3.28 - Mend

utilitas 2000.3.26 → 2000.3.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +11 -23
package/dist/utilitas.lite.mjs +1 -1
package/dist/utilitas.lite.mjs.map +1 -1
package/index.mjs +2 -3
package/lib/alan.mjs +349 -74
package/lib/manifest.mjs +1 -1
package/lib/speech.mjs +15 -170
package/lib/storage.mjs +6 -4
package/package.json +1 -1
package/lib/gen.mjs +0 -209

package/lib/speech.mjs CHANGED Viewed

@@ -1,39 +1,20 @@
-import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
-import { getFfmpeg, packPcmToWav } from './media.mjs';
 import { get } from './web.mjs';
-import { convert, getTempPath, MIME_WAV } from './storage.mjs';
-import { ensureString, mergeAtoB } from './utilitas.mjs';
+import { getFfmpeg } from './media.mjs';
+import { getTempPath } from './storage.mjs';
+import { hash } from './encryption.mjs';
 import {
-    call, countKeys, ignoreErrFunc, inBrowser,
-    need, throwError
+    call, ignoreErrFunc, inBrowser, need, throwError,
 } from './utilitas.mjs';
 import {
-    convertAudioTo16kNanoOpusOgg,
-    convertAudioTo16kNanoPcmWave,
+    convertAudioTo16kNanoOpusOgg, convertAudioTo16kNanoPcmWave,
 } from './media.mjs';
-const _NEED = ['@google/genai', 'OpenAI', 'whisper-node'];
+const _NEED = ['whisper-node'];
-const [
-    BUFFER, STREAM, BASE64, FILE, clients, suffix, SPEAKER, cleanup, wav,
-    GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH,
-    OPENAI_TTS_MAX_LENGTH, WHISPER_DEFAULT_MODEL, errorMessage
-] = [
-        'BUFFER', 'STREAM', 'BASE64', 'FILE', {}, 'ogg', 'SPEAKER', true, 'wav',
-        'gpt-4o-mini-tts', 'gpt-4o-transcribe', 'gemini-2.5-flash-preview-tts',
-        'gemini-flash-latest', 4096, 'base', 'Invalid audio data.',
-    ];
-const [
-    defaultOpenAITtsModel, defaultOpenAISttModel, defaultGeminiTtsModel,
-    defaultGeminiSttModel,
-] = [GPT_4O_MIMI_TTS, GPT_4O_TRANSCRIBE, GEMINI_25_FLASH_TTS, GEMINI_FLASH];
-const TTS_PROMPT = "As an AI voice assistant, please say the following content in a warm, friendly and professional tone, if the language is English, use an American accent, if it's Traditional Chinese, use Hong Kong Cantonese, if it's Simplified Chinese, use standard Mandarin, for other languages, please speak with a standard, clear accent";
-const STT_PROMPT = 'Please transcribe the audio into clean text. Return only the text content, DO NOT include any additional information or metadata. You may encounter input that contains different languages. Please do your best to transcribe text from all possible languages. Please distinguish between background noise and the main speech content. Do not be disturbed by background noise. Only return the main speech content.';
+const [FILE, suffix, SPEAKER, cleanup, WHISPER_DEFAULT_MODEL, errorMessage]
+    = ['FILE', 'ogg', 'SPEAKER', true, 'base', 'Invalid audio data.'];
 const WHISPER_MODELS = [
     // npx whisper-node download tiny.en
@@ -86,108 +67,22 @@ const getWhisperModelReady = async (model, options) => {
     return (await get(getWhisperModelUrl(model), { fuzzy: true }))?.cache?.content;
 };
-const init = async (options) => {
-    if (options) {
-        assert(
-            options?.tts || options?.stt,
-            'At least one of TTS or STT is selected.', 500
-        );
-        const provider = ensureString(options?.provider, { case: 'UP' });
-        switch (provider) {
-            case 'OPENAI':
-                clients._provider = provider;
-                const OpenAI = await need('openai');
-                const openai = new OpenAI(options);
-                if (options?.tts) {
-                    clients.tts = openai.audio.speech;
-                }
-                if (options?.stt) {
-                    clients.stt = openai.audio.transcriptions;
-                    clients.toFile = OpenAI.toFile;
-                }
-                break;
-            case 'GOOGLE':
-                clients._provider = provider;
-                const { GoogleGenAI } = await need('@google/genai');
-                const client = new GoogleGenAI(options);
-                if (options?.tts) {
-                    clients.tts = client.models.generateContent;
-                }
-                if (options?.stt) {
-                    clients.stt = client.models.generateContent;
-                }
-                break;
-            case '':
-                clients._provider = 'LOCAL';
-                options?.tts && await checkSay({ assert: true });
-                options?.stt && await checkWhisper({ assert: true });
-                break;
-            default:
-                throwError('Invalid speech provider.', 500);
-        }
-    }
-    assert(
-        countKeys(clients), 'Speech API client has not been initialized.', 501
-    );
-    return clients;
-};
-const checkSay = async (options) => {
+const checkSay = async () => {
     const result = !!(await ignoreErrFunc(async () => (
         await Promise.all([need('node:os'), need('say'), getFfmpeg()])
     )[0].platform() === 'darwin'));
-    options?.assert && assert(result, 'Say API is not available.', 500);
+    assert(result, 'Say API is not available.', 500);
     return result;
 };
-const checkWhisper = async (options) => {
+const checkWhisper = async () => {
     const result = !!(await ignoreErrFunc(() => Promise.all([
         need('whisper-node'), getFfmpeg()
     ])));
-    options?.assert && assert(result, 'Whisper API is not available.', 500);
+    assert(result, 'Whisper API is not available.', 500);
     return result;
 };
-const ttsOpenAI = async (input, options) => {
-    assert(clients.tts, 'OpenAI TTS API has not been initialized.', 500);
-    assert(input, 'Text is required.', 400);
-    assert(input.length <= OPENAI_TTS_MAX_LENGTH, 'Text is too long.', 400);
-    // https://platform.openai.com/docs/api-reference/audio/createSpeech
-    const content = await clients.tts.create({
-        model: defaultOpenAITtsModel, voice: DEFAULT_MODELS[OPENAI_VOICE],
-        instructions: 'Speak in a friendly and sweet tone.',
-        response_format: 'opus', input, ...options?.params || {},
-    });
-    const buffer = Buffer.from(await content.arrayBuffer());
-    return await convert(buffer, { suffix, ...options || {} });
-};
-// https://ai.google.dev/gemini-api/docs/speech-generation#voices
-const ttsGoogle = async (contents, options) => {
-    assert(clients.tts, 'Google TTS API has not been initialized.', 500);
-    assert(contents, 'Text is required.', 400);
-    assert(await countTokens(contents) <= k(32), 'Text is too long.', 400);
-    const resp = await clients.tts({
-        model: options?.model || defaultGeminiTtsModel,
-        contents: `${options?.prompt || TTS_PROMPT}: ${contents}`,
-        config: mergeAtoB(options?.config, {
-            responseModalities: ['AUDIO'],
-            speechConfig: {
-                voiceConfig: {
-                    prebuiltVoiceConfig: {
-                        voiceName: options?.voice || 'Zephyr',
-                    },
-                },
-            },
-        }),
-    });
-    const rawAudio = resp?.candidates?.[0]?.content?.parts?.[0]?.inlineData;
-    assert(rawAudio, 'Failed to generate audio.', 500);
-    return options?.raw ? rawAudio : await packPcmToWav(rawAudio?.data, {
-        input: BASE64, expected: 'FILE', suffix: wav, ...options || {},
-    });
-};
 const ttsSay = async (text, options) => {
     const say = await need('say');
     assert(text, 'Text is required.', 400);
@@ -214,45 +109,6 @@ const ttsBrowser = async (text) => {
     return speechSynthesis.speak(new SpeechSynthesisUtterance(text));
 };
-const sttOpenAI = async (audio, options) => {
-    assert(clients.stt, 'OpenAI STT API has not been initialized.', 500);
-    const input = ensureString(options?.input, { case: 'UP' });
-    const { content, cleanup } = await convert(audio, {
-        input: options?.input, ...options || {}, expected: STREAM, errorMessage,
-        suffix: ['', BUFFER].includes(input) ? suffix : null,
-        withCleanupFunc: true,
-    });
-    const result = await clients.stt.create({
-        file: await clients.toFile(content), model: defaultOpenAISttModel,
-        response_format: 'text', ...options?.params || {},
-    });
-    await cleanup();
-    return result;
-};
-const sttGoogle = async (audio, options) => {
-    assert(clients.stt, 'Google STT API has not been initialized.', 500);
-    const data = await convert(audio, {
-        input: options?.input, expected: BASE64, errorMessage,
-    });
-    const resp = await clients.stt({
-        model: options?.model || defaultGeminiSttModel, contents: {
-            parts: [{
-                inlineData: {
-                    mimeType: options?.mimeType || MIME_WAV, data,
-                },
-            }, { text: STT_PROMPT }],
-        },
-        config: { ...options?.config || {} },
-    });
-    assert(
-        resp?.candidates?.[0]?.content?.parts?.[0],
-        'Failed to transcribe audio.', 500
-    );
-    return options?.raw ? resp.candidates
-        : (resp.candidates[0].content.parts[0].text?.trim?.() || '');
-};
 // This function is not working properly, a pull request is filed:
 // https://github.com/ariym/whisper-node/pull/58
 const sttWhisper = async (audio, options) => {
@@ -282,35 +138,24 @@ const sttWhisper = async (audio, options) => {
 const tts = async (text, options) => {
     let engine;
     if (inBrowser()) { engine = ttsBrowser }
-    else if (clients?.tts && clients._provider === 'GOOGLE') { engine = ttsGoogle; }
-    else if (clients?.tts && clients._provider === 'OPENAI') { engine = ttsOpenAI; }
     else if (await checkSay()) { engine = ttsSay; }
-    else { throwError('Text-to-Speech engine has not been initialized.', 500); }
+    else { throwError('Text-to-Speech engine is not available.', 500); }
     return await engine(text, options);
 };
 const stt = async (audio, options) => {
     let engine;
-    if (clients?.stt && clients._provider === 'GOOGLE') { engine = sttGoogle; }
-    else if (clients?.stt && clients._provider === 'OPENAI') { engine = sttOpenAI; }
-    else if (await checkWhisper()) { engine = sttWhisper; }
-    else { throwError('Speech-to-Text engine has not been initialized.', 500); }
+    if (await checkWhisper()) { engine = sttWhisper; }
+    else { throwError('Speech-to-Text engine is not available.', 500); }
     return await engine(audio, options);
 };
-export default init;
 export {
     _NEED,
-    OPENAI_TTS_MAX_LENGTH,
     checkSay,
     checkWhisper,
-    init,
     stt,
-    sttGoogle,
-    sttOpenAI,
     sttWhisper,
     tts,
-    ttsGoogle,
-    ttsOpenAI,
     ttsSay,
 };

package/lib/storage.mjs CHANGED Viewed

@@ -240,7 +240,7 @@ const blobToBuffer = async blob => {
 const convert = async (any, options) => {
     assert(any, options?.errorMessage || 'Invalid input.', 400);
-    const result = {}
+    let result = {};
     let [input, expected] = [(
         Buffer.isBuffer(any)
         || ArrayBuffer.isArrayBuffer(any)
@@ -248,7 +248,7 @@ const convert = async (any, options) => {
     ) ? BUFFER : options?.input, options?.expected || BUFFER].map(
         x => ensureString(x, { case: 'UP' })
     );
-    let [oriFile, meta, mime, subExp] = [null, null, MIME_BINARY, expected];
+    let [oriFile, meta, mime, subExp] = [null, null, null, expected];
     switch (input) {
         case FILE:
             oriFile = any;
@@ -269,6 +269,7 @@ const convert = async (any, options) => {
             input = BUFFER;
             break;
     }
+    mime || (mime = (await getMime(any, any))?.mime || MIME_BINARY);
     switch (expected) {
         case STREAM: subExp = FILE; break;
         case DATAURL: subExp = BUFFER; break;
@@ -313,8 +314,9 @@ const convert = async (any, options) => {
 const getMime = async (buf, filename) => {
     const mimeType = await ignoreErrFunc(() => need('mime-types'));
-    const mime = extract(await fileTypeFromBuffer(buf), 'mime')
-        || (filename && mimeType?.lookup?.(filename)) || MIME_BINARY;
+    const mime = (buf && Buffer.isBuffer(buf) && extract(await fileTypeFromBuffer(buf), 'mime'))
+        || (filename && String.isString(filename) && mimeType?.lookup?.(filename))
+        || MIME_BINARY;
     return { mime, extension: mimeType?.extension?.(mime) || 'bin' };
 };

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "2000.3.26",
+    "version": "2000.3.28",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",

package/lib/gen.mjs DELETED Viewed

@@ -1,209 +0,0 @@
-import {
-    ensureArray, ensureString, log as _log, need, throwError,
-    tryUntil, timeout,
-} from './utilitas.mjs';
-import { convert, MIME_PNG, MIME_MP4, getTempPath } from './storage.mjs';
-import { createReadStream } from 'fs';
-const _NEED = ['OpenAI', '@google/genai'];
-const log = (cnt, opt) => _log(cnt, import.meta.url, { time: 1, ...opt || {} });
-const [
-    clients, OPENAI, GOOGLE, BASE64, FILE, BUFFER, ERROR_GENERATING,
-    IMAGEN_MODEL, OPENAI_MODEL, VEO_MODEL, IMAGEN_UPSCALE_MODEL,
-] = [
-        {}, 'OPENAI', 'GOOGLE', 'BASE64', 'FILE', 'BUFFER',
-        'Error generating media.', 'imagen-4.0-ultra-generate-001',
-        'gpt-image-1', 'veo-3.1-generate-preview', 'imagen-4.0-upscale-preview',
-    ];
-const init = async (options) => {
-    assert(options?.apiKey, 'API key is required.');
-    const provider = ensureString(options?.provider, { case: 'UP' });
-    switch (provider) {
-        case OPENAI:
-            const OpenAI = await need('openai');
-            var client = new OpenAI(options);
-            clients[provider] = {
-                image: client.images,
-                toFile: OpenAI.toFile,
-            };
-            break;
-        case GOOGLE:
-            const { GoogleGenAI } = await need('@google/genai');
-            var client = new GoogleGenAI({ vertexai: false, ...options });
-            clients[provider] = {
-                gen: client,
-            };
-            break;
-        default:
-            throw new Error('Invalid provider.');
-    }
-    return clients;
-};
-const extractImage = async (data, options) => await convert(
-    data, { input: BASE64, suffix: 'png', ...options || {} }
-);
-const extractVideo = async (data, options) => await convert(
-    data, { input: FILE, suffix: 'mp4', ...options || {} }
-);
-const prepareImage = async (files, repack, options) => {
-    if (!files) { return }
-    const multiple = Array.isArray(files);
-    files = ensureArray(files);
-    const resp = await Promise.all(files.map(async x => await repack(
-        createReadStream(await convert(
-            x, { expected: 'FILE', ...options || {} }
-        )), null, { type: MIME_PNG } // don't need to be right MIME type
-    )));
-    return multiple ? resp : resp[0];
-};
-const image = async (prompt, options) => {
-    let provider = ensureString(options?.provider, { case: 'UP' });
-    if (!provider && clients?.[GOOGLE]) { provider = GOOGLE; }
-    else if (!provider && clients?.[OPENAI]) { provider = OPENAI; }
-    const client = clients?.[provider];
-    const n = options?.n || 4;
-    assert(client, 'No available image generation provider.');
-    prompt = ensureString(prompt);
-    assert(prompt.length <= 4000,
-        'Prompt must be less than 4000 characters.', 400);
-    options = {
-        ...options || {},
-        expected: ensureString(options?.expected || BUFFER, { case: 'LOW' }),
-    };
-    switch (provider) {
-        case OPENAI:
-            let [func, extraOptions] = ['generate', {}];
-            if (options?.reference || options?.mask) {
-                func = 'edit';
-                extraOptions = {
-                    image: await prepareImage(options?.reference, client.toFile, options),
-                    mask: await prepareImage(options?.mask, client.toFile, options),
-                };
-            }
-            try { // https://platform.openai.com/docs/guides/image-generation?image-generation-model=gpt-image-1
-                var resp = await client.image[func]({
-                    prompt, model: OPENAI_MODEL, n, quality: 'high',
-                    size: '1536x1024', moderation: 'low',
-                    // 1024x1024 (square), 1536x1024 (landscape), 1024x1536 (portrait), auto (default)
-                    // background: 'transparent',
-                    ...extraOptions, ...options?.params || {},
-                });
-            } catch (err) { throwError(err?.message || ERROR_GENERATING); }
-            if (!options?.raw) {
-                resp.data = await Promise.all(resp.data.map(async x => ({
-                    caption: `🎨 by ${OPENAI_MODEL}`,
-                    data: await extractImage(x.b64_json, {
-                        ...options || {}, input: BASE64,
-                    }),
-                    mimeType: MIME_PNG,
-                })));
-            }
-            return resp?.data;
-        case GOOGLE:
-            var resp = await client.gen.models.generateImages({
-                model: IMAGEN_MODEL, prompt, config: {
-                    numberOfImages: n, sampleImageSize: '2K',
-                    includeRaiReason: true,
-                    // "1:1" (default), "3:4", "4:3", "9:16", and "16:9"
-                    aspectRatio: '16:9', personGeneration: 'allow_adult',
-                    ...options?.config || {},
-                },
-            });
-            const generated = resp?.generatedImages;
-            assert(!resp?.error && generated?.filter(
-                x => !x.raiFilteredReason
-            ).length, resp?.error?.message || generated?.find(
-                x => x.raiFilteredReason
-            )?.raiFilteredReason || ERROR_GENERATING);
-            if (!options?.raw) {
-                resp = await Promise.all((resp?.generatedImages || []).map(
-                    async x => ({
-                        caption: `🎨 by ${IMAGEN_MODEL}`,
-                        data: await extractImage(x.image.imageBytes, options),
-                        mimeType: x.mimeType,
-                    })
-                ));
-            }
-            return resp;
-        default:
-            throw new Error('Invalid provider.');
-    }
-};
-const video = async (prompt, options) => {
-    let provider = ensureString(options?.provider, { case: 'UP' });
-    if (!provider && clients?.[GOOGLE]) { provider = GOOGLE; }
-    const client = clients?.[provider];
-    assert(client, 'No available video generation provider.');
-    prompt = ensureString(prompt);
-    assert(prompt.length <= 4000,
-        'Prompt must be less than 4000 characters.', 400);
-    options = {
-        ...options || {},
-        expected: ensureString(options?.expected || BUFFER, { case: 'LOW' }),
-    };
-    switch (provider) {
-        case GOOGLE:
-            var resp = await client.gen.models.generateVideos({
-                model: VEO_MODEL, prompt, config: {
-                    aspectRatio: '16:9', numberOfVideos: 1,
-                    // personGeneration: 'allow_adult',
-                    enablePromptRewriting: true, addWatermark: false,
-                    includeRaiReason: true, ...options?.config || {},
-                },
-            });
-            assert(!resp?.error, resp?.error?.message || ERROR_GENERATING);
-            if (options?.generateRaw) { return resp; }
-            await tryUntil(async () => {
-                resp = await client.gen.operations.getVideosOperation({
-                    operation: resp,
-                });
-                assert(
-                    resp?.done,
-                    `Waiting for Google video generation: ${resp.name}`,
-                );
-            }, { maxTry: 60 * 10, log });
-            let generated = resp?.response?.generatedVideos;
-            assert(!resp?.error && generated?.filter(
-                x => !x.raiFilteredReason
-            ).length, resp?.error?.message || generated?.find(
-                x => x.raiFilteredReason
-            )?.raiFilteredReason || ERROR_GENERATING);
-            if (!options?.videoRaw) {
-                generated = await Promise.all(generated?.filter(
-                    x => x?.video?.uri
-                ).map(async (x, i) => {
-                    const downloadPath = `${getTempPath({
-                        seed: x?.video?.uri
-                    })}.mp4`;
-                    // @todo: fix this
-                    // https://github.com/googleapis/js-genai/compare/main...Leask:js-genai:main
-                    await client.gen.files.download({ file: x, downloadPath });
-                    await timeout(1000 * 10); // hack to wait for file to be downloaded
-                    return {
-                        caption: `🎥 by ${VEO_MODEL}`,
-                        data: await extractVideo(downloadPath, options),
-                        mimeType: MIME_MP4, jobId: resp.name,
-                    };
-                }));
-            }
-            return generated;
-        default:
-            throw new Error('Invalid provider.');
-    }
-};
-export default init;
-export {
-    _NEED,
-    image,
-    init,
-    video,
-};