npm - utilitas - Versions diffs - 2000.3.15 → 2000.3.17 - Mend

utilitas 2000.3.15 → 2000.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +9 -11
package/dist/utilitas.lite.mjs +1 -1
package/dist/utilitas.lite.mjs.map +1 -1
package/lib/alan.mjs +1 -1
package/lib/encryption.mjs +28 -15
package/lib/manifest.mjs +2 -2
package/lib/speech.mjs +2 -2
package/lib/storage.mjs +1 -1
package/lib/vision.mjs +153 -180
package/package.json +2 -2

package/lib/alan.mjs CHANGED Viewed

@@ -1053,7 +1053,7 @@ const distillFile = async (attachments, o) => {
         'You are an intelligent document analyzer.',
         '- You will receive various multimedia files, including images, audio, and videos.',
         '- Please analyze these documents, extract the information, and organize it into an easy-to-read format.',
-        '- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present. You can use markdown table formatting to present table data. Please mark the description of images in the same position as the original text without creating separate paragraphs for descriptions. Be sure ONLY describe important images and graphs, and ignore backgrounds and decorative small images.',
+        '- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present. Use markdown to format table and other rich text where possible. Use LaTeX for all formulas, subscripts, representations of formulas, and special symbols in mathematics and chemistry, enclosed by "$" symbols. Please mark the description of images in the same position as the original text without creating separate paragraphs for descriptions. Be sure ONLY describe important images and graphs, and ignore backgrounds and decorative small images. Ensure the returned document is clean, well-organized, and highly readable.',
         '- For audio files, please provide a transcript of the spoken voices. If there are background noises or music, attempt to briefly describe the environmental sounds and music sections.',
         '- For images or video files that are not primarily text-based, describe the tragic scene you observe, highlight key details, convey the emotional tone of the setting, and share your impressions.',
         '- For video files, please describe the content, including the theme, subjects, characters, scenes, objects, storyline, and emotional tone.',

package/lib/encryption.mjs CHANGED Viewed

@@ -10,10 +10,7 @@ import { base64Decode, base64Encode, ensureString, hexEncode, need } from './uti
 import { networkInterfaces } from 'os';
 const _NEED = [
-    '@google-cloud/speech',
-    '@google-cloud/text-to-speech',
-    '@google-cloud/vision',
-    'google-gax',
+    '@google-cloud/speech', '@google-cloud/text-to-speech', 'google-gax',
 ];
 const defaultAlgorithm = 'sha256';
@@ -58,16 +55,6 @@ const hexToBigInt = (hex) => {
     return BigInt(hex, 16).toString(10);
 };
-const getApiKeyCredentials = async (options) => {
-    // Included in @google-cloud/vision, @google-cloud/speech and @google-cloud/text-to-speech
-    const { GoogleAuth, grpc } = await need('google-gax');
-    const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
-    return grpc.credentials.combineChannelCredentials(
-        grpc.credentials.createSsl(),
-        grpc.credentials.createFromGoogleCredential(authClient)
-    );
-};
 // Default 256-bit key: (256 / 8 = 32) bytes * 8 bits/byte = 256 bits
 const aesCreateKey = (options) => {
     const key = _upkKey(options?.key) || random((options?.length || 256) / 8);
@@ -124,6 +111,30 @@ const aesDecrypt = (any, options) => {
     return decrypted;
 };
+const getGoogleApiKeyCredentials = async (options) => {
+    // Included in @google-cloud/speech and @google-cloud/text-to-speech
+    const { GoogleAuth, grpc } = await need('google-gax');
+    const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
+    return grpc.credentials.combineChannelCredentials(
+        grpc.credentials.createSsl(),
+        grpc.credentials.createFromGoogleCredential(authClient)
+    );
+};
+const getGoogleAuthByCredentials = async (keyFilename) => {
+    const { GoogleAuth } = await need('google-gax');
+    return (new GoogleAuth({
+        keyFilename, scopes: ['https://www.googleapis.com/auth/cloud-platform'],
+    })).getClient();
+};
+const getGoogleAuthTokenByAuth = async (auth) => {
+    const resp = await auth.getAccessToken();
+    const token = resp?.token || null;
+    assert(token, 'Failed to get Google API token.');
+    return token;
+}
 export {
     _NEED,
     aesCreateIv,
@@ -133,7 +144,9 @@ export {
     defaultAlgorithm,
     defaultEncryption,
     digestObject,
-    getApiKeyCredentials,
+    getGoogleApiKeyCredentials,
+    getGoogleAuthByCredentials,
+    getGoogleAuthTokenByAuth,
     getSortedQueryString,
     hash as sha256,
     hash,

package/lib/manifest.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 const manifest = {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "2000.3.15",
+    "version": "2000.3.17",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",
@@ -28,7 +28,6 @@ const manifest = {
         "@ffprobe-installer/ffprobe": "^2.1.2",
         "@google-cloud/speech": "^7.2.1",
         "@google-cloud/storage": "^7.17.3",
-        "@google-cloud/vision": "^5.3.4",
         "@google/genai": "^1.30.0",
         "@mozilla/readability": "github:mozilla/readability",
         "@sentry/node": "^10.26.0",
@@ -52,6 +51,7 @@ const manifest = {
         "office-text-extractor": "^3.0.3",
         "openai": "^6.9.1",
         "pdfjs-dist": "^5.4.394",
+        "pdf-lib": "^1.17.1",
         "pg": "^8.16.3",
         "pgvector": "^0.2.1",
         "ping": "^1.0.0",

package/lib/speech.mjs CHANGED Viewed

@@ -1,5 +1,5 @@
 import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
-import { getApiKeyCredentials, hash } from './encryption.mjs';
+import { getGoogleApiKeyCredentials, hash } from './encryption.mjs';
 import { getFfmpeg, packPcmToWav } from './media.mjs';
 import { get } from './web.mjs';
 import { convert, getTempPath } from './storage.mjs';
@@ -124,7 +124,7 @@ const init = async (options) => {
                 }
                 if (options?.stt) {
                     const stt = (await need('@google-cloud/speech')).default;
-                    const sslCreds = await getApiKeyCredentials(options);
+                    const sslCreds = await getGoogleApiKeyCredentials(options);
                     clients.stt = new stt.SpeechClient({ sslCreds });
                 }
                 break;

package/lib/storage.mjs CHANGED Viewed

@@ -1,7 +1,7 @@
 import {
     log as _log,
     base64Decode, base64Encode, ensureString, extract, ignoreErrFunc,
-    mergeAtoB, need, throwError, trim, voidFunc, which,
+    mergeAtoB, need, throwError, trim, which,
 } from './utilitas.mjs';
 import { fileTypeFromBuffer } from 'file-type';

package/lib/vision.mjs CHANGED Viewed

@@ -1,50 +1,40 @@
 import {
-    convert, deleteOnCloud, downloadFromCloud, getIdByGs, uploadToCloud,
-} from './storage.mjs';
+    log as _log, ensureArray, ensureString, need, throwError,
+} from './utilitas.mjs';
 import {
-    log as _log,
-    ensureArray, ensureString, ignoreErrFunc,
-    need, throwError,
-    trim,
-} from './utilitas.mjs';
+    getGoogleAuthByCredentials, getGoogleAuthTokenByAuth,
+} from './encryption.mjs';
+import { convert, DATAURL, BUFFER, FILE } from './storage.mjs';
 import fs from 'node:fs';
-import path from 'node:path';
-import { v4 as uuidv4 } from 'uuid';
-import { getApiKeyCredentials } from './encryption.mjs';
-const _NEED = [
-    '@google-cloud/vision', 'office-text-extractor', 'pdfjs-dist',
-    'tesseract.js',
-];
-const [BASE64, BUFFER, FILE, DEFAULT_LANG] = ['BASE64', 'BUFFER', 'FILE', 'eng'];
-const ceil = num => num.toFixed(4);
+const _NEED = ['office-text-extractor', 'pdfjs-dist', 'pdf-lib', 'tesseract.js'];
+const clients = {};
 const errorMessage = 'Invalid input data.';
-const getTextFromBatch = b => b.responses.map(p => p?.fullTextAnnotation?.text || '');
-const DOCUMENT_TEXT_DETECTION = 'DOCUMENT_TEXT_DETECTION';
-const features = [{ type: DOCUMENT_TEXT_DETECTION }];
-const mimeType = 'application/pdf';
-const pages = [1, 2, 3, 4, 5]; // max 5 pages limit for batchAnnotateFiles API
 const log = content => _log(content, import.meta.url);
-let client;
+const [DEFAULT_LANG, GOOGLE_MISTRAL, MISTRAL_OCR_MODEL]
+    = ['eng', 'GOOGLE_MISTRAL', 'mistral-ocr-2505'];
 const init = async (options) => {
-    if (options) {
-        if (options?.credentials || options?.apiKey) {
-            const vision = (await need('@google-cloud/vision')).default;
-            client = new vision.ImageAnnotatorClient(options?.apiKey ? {
-                sslCreds: await getApiKeyCredentials(options)
-            } : options);
-        } else { await checkTesseract({ assert: true }); }
+    const provider = ensureString(options?.provider || GOOGLE_MISTRAL, { case: 'UP' });
+    switch (provider) {
+        case GOOGLE_MISTRAL:
+            assert(
+                options.credentials && options.project,
+                'Google credentials and project must be set.'
+            );
+            clients[provider] = {
+                auth: await getGoogleAuthByCredentials(options.credentials),
+                project: options?.project,
+                region: options?.region || 'us-central1',
+                model: options?.model || MISTRAL_OCR_MODEL,
+            };
+            break;
+        default:
+            throw new Error('Invalid provider.');
     }
-    assert(
-        client || await checkTesseract(),
-        'Vision API client has not been initialized.', 501
-    );
-    return client;
+    return clients;
 };
 const parseOfficeFile = async (source, options) => {
@@ -90,34 +80,9 @@ const parseOfficeFile = async (source, options) => {
     }
 };
-const checkTesseract = async (options) => {
-    const result = !!(await ignoreErrFunc(() => need('tesseract.js')));
-    options?.assert && assert(result, 'Tesseract API is not available.', 500);
-    return result;
-};
-const ocrImageGoogle = async (image, options) => {
-    assert(client, 'Vision API has not been initialized.', 500);
-    const { content, cleanup } = await convert(image, {
-        input: options?.input, expected: FILE, errorMessage,
-        withCleanupFunc: true,
-    });
-    const [response] = await client.textDetection(content);
-    await cleanup();
-    let detections = response.textAnnotations;
-    if (!options?.raw && detections[0]) {
-        detections = {
-            description: detections[0].description,
-            score: detections[0].score,
-            vertices: detections[0].boundingPoly.vertices,
-        };
-    }
-    return detections;
-};
 // https://github.com/naptha/tesseract.js#tesseractjs
 // https://github.com/naptha/tesseract.js/blob/master/docs/image-format.md
-const ocrImageTesseract = async (image, options) => {
+const ocrImage = async (image, options) => {
     const [content, lang, { createWorker }] = [
         await convert(image, { input: options?.input, expected: BUFFER, errorMessage }),
         ensureArray(options?.lang || DEFAULT_LANG).join('+'),
@@ -132,115 +97,28 @@ const ocrImageTesseract = async (image, options) => {
     return options?.raw ? resp : resp.data.text;
 };
-const ocrImage = async (image, options) => {
-    let engine;
-    if (client) { engine = ocrImageGoogle; }
-    else if (await checkTesseract()) { engine = ocrImageTesseract; }
-    else { throwError('Vision engine has not been initialized.', 500); }
-    return await engine(image, options);
-};
-const annotateImage = async (image, options) => {
-    assert(client, 'Vision API has not been initialized.', 500);
-    const content = await convert(image, {
-        input: options?.input, expected: BASE64, errorMessage,
-    });
-    const [response] = await client.objectLocalization({ image: { content } });
-    let objects = response.localizedObjectAnnotations;
-    if (!options?.raw) {
-        objects = objects.map(x => ({
-            description: x.name,
-            score: x.score,
-            vertices: x.boundingPoly.normalizedVertices,
-        }));
-    }
-    return objects;
-};
-const see = async (image, options) => {
-    const [text, objects] = await Promise.all([
-        ocrImage(image, options), annotateImage(image, options),
-    ]);
-    let result = { text, objects };
-    if (!options?.raw) {
-        result = [];
-        if (text?.description) {
-            result.push('text:', text.description);
-        }
-        if (objects.length) {
-            result.push('', 'objects:', ...objects.map(x => [
-                `- ${x.description}`, `score: ${ceil(x.score)}`,
-                `vertices: ${x.vertices.map(
-                    l => `(${ceil(l.x)}, ${ceil(l.y)})`
-                ).join(' ')}`,
-            ].join('\n')));
-        }
-        result = trim(result.join('\n'));
+const getPdfPage = async (doc, pages) => {
+    let [min, max, multiple] = [1, doc.numPages, Array.isArray(pages)];
+    if (!pages) {
+        pages = [];
+        for (let i = min; i <= max; i++) { pages.push(i); }
+        multiple = true;
     }
-    return result;
-};
-const read = async (image, options) => {
-    assert(client, 'Vision API has not been initialized.', 500);
-    if (options?.allPages) {
-        assert(options?.input === FILE, 'Only file input is supported.', 400);
-        if ((await getPdfInfo(image)).numPages > pages.length) {
-            return await readAll(image, options);
+    pages = ensureArray(pages).map(
+        x => x >= min && x <= max ? ~~x : null
+    ).filter(x => x);
+    assert(pages.length, 'Invalid page numbers.');
+    const result = await Promise.all(pages.map(p => (async p => {
+        const page = await doc.getPage(p);
+        const viewport = page.getViewport({ scale: 1.0 });
+        const res = {
+            pageNum: p, width: viewport.width, height: viewport.height,
+            content: (await page.getTextContent()).items.map(x => x.str).join(' '),
         }
-    }
-    const content = await convert(image, {
-        input: options?.input, expected: BASE64, errorMessage,
-    });
-    const result = await client.batchAnnotateFiles({
-        requests: [{ inputConfig: { mimeType, content }, features, pages }],
-    });
-    return options?.raw ? result : getTextFromBatch(result[0].responses[0]);
-};
-const readAll = async (image, options) => {
-    assert(client, 'Vision API has not been initialized.', 500);
-    const result = {};
-    result.upload = await uploadToCloud(image, {
-        destination: path.join(options?.prefix || '_vision', `${uuidv4()}.pdf`),
-        ...options || {},
-    });
-    const uri = result.upload?.gs;
-    const destination = `${uri}_result/`;
-    const resultId = getIdByGs(destination);
-    result.clear = await deleteOnCloud(resultId);
-    result.submit = await client.asyncBatchAnnotateFiles({
-        requests: [{
-            inputConfig: { mimeType, gcsSource: { uri } },
-            outputConfig: { gcsDestination: { uri: destination } }, features,
-        }],
-    });
-    result.response = await result.submit[0].promise();
-    result.result = await downloadFromCloud(resultId, { expected: 'JSON' });
-    options?.keep || (result.cleanup = await Promise.all(
-        [getIdByGs(uri), resultId].map(deleteOnCloud)
-    ));
-    return options?.raw ? result : Object.keys(result.result).map(
-        f => getTextFromBatch(result.result[f])
-    ).flat();
-};
-const getPdfPage = async (doc, pageNum) => {
-    const page = await doc.getPage(pageNum);
-    const viewport = page.getViewport({ scale: 1.0 });
-    const result = {
-        pageNum: pageNum,
-        width: viewport.width,
-        height: viewport.height,
-        content: (await page.getTextContent()).items.map(x => x.str).join(' '),
-    };
-    page.cleanup();
-    return result
-};
-const getPdfPages = async (doc) => {
-    const result = [];
-    for (let i = 1; i <= doc.numPages; i++) { result.push(getPdfPage(doc, i)); }
-    return await Promise.all(result);
+        page.cleanup();
+        return res;
+    })(p)));
+    return multiple ? result : result[0];
 };
 // https://github.com/mozilla/pdf.js/blob/master/examples/node/getinfo.mjs
@@ -249,26 +127,121 @@ const getPdfInfo = async (file, options) => {
     const doc = await getDocument(file).promise;
     const data = await doc.getMetadata();
     const result = {
-        numPages: doc.numPages,
-        info: data.info,
-        metadata: { ...data.metadata?.getAll() },
-        pages: options?.withPages ? await getPdfPages(doc) : null,
+        info: data.info, metadata: { ...data.metadata?.getAll() },
+        numPages: doc.numPages, ...options.withDoc ? { doc } : {},
+        pages: options?.withPages ? await getPdfPage(doc) : null,
     };
     return result;
 };
+const ocr = async (file, options = {}) => {
+    let provider = ensureString(options?.provider, { case: 'UP' });
+    if (!provider && clients?.[GOOGLE_MISTRAL]) {
+        provider = GOOGLE_MISTRAL;
+    } else if (!provider && Object.keys(clients).length) {
+        provider = Object.keys(clients)[0];
+    }
+    const client = clients?.[provider];
+    assert(client, 'No available OCR provider.');
+    const model = options?.model || client.model;
+    switch (provider) {
+        case GOOGLE_MISTRAL:
+            const key = await getGoogleAuthTokenByAuth(client.auth);
+            const inputPdfs = await splitPdf(file, {
+                ...options, expected: DATAURL, size: 2,
+            });
+            const resps = (await Promise.all(inputPdfs.map(
+                async document_url => await (await fetch(
+                    `https://${client.region}-aiplatform.googleapis.com/v1/`
+                    + `projects/${client.project}/locations/${client.region}/`
+                    + `publishers/mistralai/models/${model}:rawPredict`, {
+                    method: 'POST', headers: {
+                        'Content-Type': 'application/json',
+                        'Authorization': `Bearer ${key}`
+                    }, body: JSON.stringify({
+                        model, include_image_base64: true,
+                        document: { type: 'document_url', document_url },
+                    })
+                })).json()
+            ))).filter(x => x?.pages?.length);
+            const resp = {
+                pages: [], usage_info: { pages_processed: 0, doc_size_bytes: 0 }
+            };
+            resps.map(x => {
+                x.pages.map(p => {
+                    p.index = resp.pages.length;
+                    resp.pages.push(p);
+                    p.images.map(i => {
+                        const oId = i.id;
+                        i.id = `page-${p.index}-${oId}`;
+                        p.markdown = p.markdown.replaceAll(
+                            `![${oId}](${oId})`, `![${i.id}](${i.id})`
+                        );
+                    });
+                });
+                resp.model = x.model;
+                resp.usage_info.pages_processed += x.usage_info.pages_processed;
+                resp.usage_info.doc_size_bytes += x.usage_info.doc_size_bytes;
+            });
+            if (options?.raw) { return resp; }
+            else if (options?.paging) { return resp.pages; }
+            const markdown = [];
+            resp.images = {};
+            for (const p of resp.pages) {
+                markdown.push(p.markdown);
+                await Promise.all(p.images.map(async i => {
+                    const id = i.id;
+                    i.width = i.bottom_right_x - i.top_left_x;
+                    i.height = i.bottom_right_y - i.top_left_y;
+                    i.annotation = i.image_annotation;
+                    i.data = await convert(i.image_base64, {
+                        ...options, input: 'DATAURL',
+                    });
+                    [
+                        'id', 'image_annotation', 'image_base64', 'top_left_x',
+                        'top_left_y', 'bottom_right_x', 'bottom_right_y',
+                    ].map(k => delete i[k]);
+                    resp.images[id] = i;
+                }));
+            }
+            resp.text = markdown.join('\n\n');
+            delete resp.pages;
+            return resp;
+        default:
+            throw new Error('Invalid provider.');
+    }
+};
+const splitPdf = async (file, options) => {
+    const [content, { PDFDocument }] = await Promise.all([
+        convert(file, { ...options, expected: BUFFER }), need('pdf-lib')
+    ]);
+    const [doc, result] = [await PDFDocument.load(content), []];
+    const count = doc.getPageCount();
+    const size = ~~options?.size || Infinity;
+    for (let i = 0; i < count; i += size) {
+        result.push((async () => {
+            const sub = await PDFDocument.create();
+            const copied = await sub.copyPages(doc, Array.from(
+                { length: Math.min(size, count - i) }, (_, j) => i + j
+            ));
+            copied.forEach(page => sub.addPage(page));
+            return await convert(Buffer.from(await sub.save()), {
+                ...options, input: 'BUFFER',
+            });
+        })());
+    }
+    return await Promise.all(result);
+};
+export default init;
 export {
     _NEED,
-    annotateImage,
     getPdfInfo,
     getPdfPage,
-    getPdfPages,
     init,
+    ocr,
     ocrImage,
-    ocrImageGoogle,
-    ocrImageTesseract,
     parseOfficeFile,
-    read,
-    readAll,
-    see
+    splitPdf,
 };

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
     "name": "utilitas",
     "description": "Just another common utility for JavaScript.",
-    "version": "2000.3.15",
+    "version": "2000.3.17",
     "private": false,
     "homepage": "https://github.com/Leask/utilitas",
     "main": "index.mjs",
@@ -39,7 +39,6 @@
         "@ffprobe-installer/ffprobe": "^2.1.2",
         "@google-cloud/speech": "^7.2.1",
         "@google-cloud/storage": "^7.17.3",
-        "@google-cloud/vision": "^5.3.4",
         "@google/genai": "^1.30.0",
         "@mozilla/readability": "github:mozilla/readability",
         "@sentry/node": "^10.26.0",
@@ -63,6 +62,7 @@
         "office-text-extractor": "^3.0.3",
         "openai": "^6.9.1",
         "pdfjs-dist": "^5.4.394",
+        "pdf-lib": "^1.17.1",
         "pg": "^8.16.3",
         "pgvector": "^0.2.1",
         "ping": "^1.0.0",