utilitas 2000.3.15 → 2000.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -11
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/alan.mjs +1 -1
- package/lib/encryption.mjs +28 -15
- package/lib/manifest.mjs +2 -2
- package/lib/speech.mjs +2 -2
- package/lib/storage.mjs +1 -1
- package/lib/vision.mjs +153 -180
- package/package.json +2 -2
package/lib/alan.mjs
CHANGED
|
@@ -1053,7 +1053,7 @@ const distillFile = async (attachments, o) => {
|
|
|
1053
1053
|
'You are an intelligent document analyzer.',
|
|
1054
1054
|
'- You will receive various multimedia files, including images, audio, and videos.',
|
|
1055
1055
|
'- Please analyze these documents, extract the information, and organize it into an easy-to-read format.',
|
|
1056
|
-
'- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present.
|
|
1056
|
+
'- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present. Use markdown to format table and other rich text where possible. Use LaTeX for all formulas, subscripts, representations of formulas, and special symbols in mathematics and chemistry, enclosed by "$" symbols. Please mark the description of images in the same position as the original text without creating separate paragraphs for descriptions. Be sure ONLY describe important images and graphs, and ignore backgrounds and decorative small images. Ensure the returned document is clean, well-organized, and highly readable.',
|
|
1057
1057
|
'- For audio files, please provide a transcript of the spoken voices. If there are background noises or music, attempt to briefly describe the environmental sounds and music sections.',
|
|
1058
1058
|
'- For images or video files that are not primarily text-based, describe the tragic scene you observe, highlight key details, convey the emotional tone of the setting, and share your impressions.',
|
|
1059
1059
|
'- For video files, please describe the content, including the theme, subjects, characters, scenes, objects, storyline, and emotional tone.',
|
package/lib/encryption.mjs
CHANGED
|
@@ -10,10 +10,7 @@ import { base64Decode, base64Encode, ensureString, hexEncode, need } from './uti
|
|
|
10
10
|
import { networkInterfaces } from 'os';
|
|
11
11
|
|
|
12
12
|
const _NEED = [
|
|
13
|
-
'@google-cloud/speech',
|
|
14
|
-
'@google-cloud/text-to-speech',
|
|
15
|
-
'@google-cloud/vision',
|
|
16
|
-
'google-gax',
|
|
13
|
+
'@google-cloud/speech', '@google-cloud/text-to-speech', 'google-gax',
|
|
17
14
|
];
|
|
18
15
|
|
|
19
16
|
const defaultAlgorithm = 'sha256';
|
|
@@ -58,16 +55,6 @@ const hexToBigInt = (hex) => {
|
|
|
58
55
|
return BigInt(hex, 16).toString(10);
|
|
59
56
|
};
|
|
60
57
|
|
|
61
|
-
const getApiKeyCredentials = async (options) => {
|
|
62
|
-
// Included in @google-cloud/vision, @google-cloud/speech and @google-cloud/text-to-speech
|
|
63
|
-
const { GoogleAuth, grpc } = await need('google-gax');
|
|
64
|
-
const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
|
|
65
|
-
return grpc.credentials.combineChannelCredentials(
|
|
66
|
-
grpc.credentials.createSsl(),
|
|
67
|
-
grpc.credentials.createFromGoogleCredential(authClient)
|
|
68
|
-
);
|
|
69
|
-
};
|
|
70
|
-
|
|
71
58
|
// Default 256-bit key: (256 / 8 = 32) bytes * 8 bits/byte = 256 bits
|
|
72
59
|
const aesCreateKey = (options) => {
|
|
73
60
|
const key = _upkKey(options?.key) || random((options?.length || 256) / 8);
|
|
@@ -124,6 +111,30 @@ const aesDecrypt = (any, options) => {
|
|
|
124
111
|
return decrypted;
|
|
125
112
|
};
|
|
126
113
|
|
|
114
|
+
const getGoogleApiKeyCredentials = async (options) => {
|
|
115
|
+
// Included in @google-cloud/speech and @google-cloud/text-to-speech
|
|
116
|
+
const { GoogleAuth, grpc } = await need('google-gax');
|
|
117
|
+
const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
|
|
118
|
+
return grpc.credentials.combineChannelCredentials(
|
|
119
|
+
grpc.credentials.createSsl(),
|
|
120
|
+
grpc.credentials.createFromGoogleCredential(authClient)
|
|
121
|
+
);
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
const getGoogleAuthByCredentials = async (keyFilename) => {
|
|
125
|
+
const { GoogleAuth } = await need('google-gax');
|
|
126
|
+
return (new GoogleAuth({
|
|
127
|
+
keyFilename, scopes: ['https://www.googleapis.com/auth/cloud-platform'],
|
|
128
|
+
})).getClient();
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
const getGoogleAuthTokenByAuth = async (auth) => {
|
|
132
|
+
const resp = await auth.getAccessToken();
|
|
133
|
+
const token = resp?.token || null;
|
|
134
|
+
assert(token, 'Failed to get Google API token.');
|
|
135
|
+
return token;
|
|
136
|
+
}
|
|
137
|
+
|
|
127
138
|
export {
|
|
128
139
|
_NEED,
|
|
129
140
|
aesCreateIv,
|
|
@@ -133,7 +144,9 @@ export {
|
|
|
133
144
|
defaultAlgorithm,
|
|
134
145
|
defaultEncryption,
|
|
135
146
|
digestObject,
|
|
136
|
-
|
|
147
|
+
getGoogleApiKeyCredentials,
|
|
148
|
+
getGoogleAuthByCredentials,
|
|
149
|
+
getGoogleAuthTokenByAuth,
|
|
137
150
|
getSortedQueryString,
|
|
138
151
|
hash as sha256,
|
|
139
152
|
hash,
|
package/lib/manifest.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const manifest = {
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "2000.3.
|
|
4
|
+
"version": "2000.3.17",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -28,7 +28,6 @@ const manifest = {
|
|
|
28
28
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
29
29
|
"@google-cloud/speech": "^7.2.1",
|
|
30
30
|
"@google-cloud/storage": "^7.17.3",
|
|
31
|
-
"@google-cloud/vision": "^5.3.4",
|
|
32
31
|
"@google/genai": "^1.30.0",
|
|
33
32
|
"@mozilla/readability": "github:mozilla/readability",
|
|
34
33
|
"@sentry/node": "^10.26.0",
|
|
@@ -52,6 +51,7 @@ const manifest = {
|
|
|
52
51
|
"office-text-extractor": "^3.0.3",
|
|
53
52
|
"openai": "^6.9.1",
|
|
54
53
|
"pdfjs-dist": "^5.4.394",
|
|
54
|
+
"pdf-lib": "^1.17.1",
|
|
55
55
|
"pg": "^8.16.3",
|
|
56
56
|
"pgvector": "^0.2.1",
|
|
57
57
|
"ping": "^1.0.0",
|
package/lib/speech.mjs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
|
|
2
|
-
import {
|
|
2
|
+
import { getGoogleApiKeyCredentials, hash } from './encryption.mjs';
|
|
3
3
|
import { getFfmpeg, packPcmToWav } from './media.mjs';
|
|
4
4
|
import { get } from './web.mjs';
|
|
5
5
|
import { convert, getTempPath } from './storage.mjs';
|
|
@@ -124,7 +124,7 @@ const init = async (options) => {
|
|
|
124
124
|
}
|
|
125
125
|
if (options?.stt) {
|
|
126
126
|
const stt = (await need('@google-cloud/speech')).default;
|
|
127
|
-
const sslCreds = await
|
|
127
|
+
const sslCreds = await getGoogleApiKeyCredentials(options);
|
|
128
128
|
clients.stt = new stt.SpeechClient({ sslCreds });
|
|
129
129
|
}
|
|
130
130
|
break;
|
package/lib/storage.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
2
|
log as _log,
|
|
3
3
|
base64Decode, base64Encode, ensureString, extract, ignoreErrFunc,
|
|
4
|
-
mergeAtoB, need, throwError, trim,
|
|
4
|
+
mergeAtoB, need, throwError, trim, which,
|
|
5
5
|
} from './utilitas.mjs';
|
|
6
6
|
|
|
7
7
|
import { fileTypeFromBuffer } from 'file-type';
|
package/lib/vision.mjs
CHANGED
|
@@ -1,50 +1,40 @@
|
|
|
1
1
|
import {
|
|
2
|
-
|
|
3
|
-
} from './
|
|
2
|
+
log as _log, ensureArray, ensureString, need, throwError,
|
|
3
|
+
} from './utilitas.mjs';
|
|
4
4
|
|
|
5
5
|
import {
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
need, throwError,
|
|
9
|
-
trim,
|
|
10
|
-
} from './utilitas.mjs';
|
|
6
|
+
getGoogleAuthByCredentials, getGoogleAuthTokenByAuth,
|
|
7
|
+
} from './encryption.mjs';
|
|
11
8
|
|
|
9
|
+
import { convert, DATAURL, BUFFER, FILE } from './storage.mjs';
|
|
12
10
|
import fs from 'node:fs';
|
|
13
|
-
import path from 'node:path';
|
|
14
|
-
import { v4 as uuidv4 } from 'uuid';
|
|
15
|
-
import { getApiKeyCredentials } from './encryption.mjs';
|
|
16
11
|
|
|
17
|
-
const _NEED = [
|
|
18
|
-
|
|
19
|
-
'tesseract.js',
|
|
20
|
-
];
|
|
21
|
-
|
|
22
|
-
const [BASE64, BUFFER, FILE, DEFAULT_LANG] = ['BASE64', 'BUFFER', 'FILE', 'eng'];
|
|
23
|
-
const ceil = num => num.toFixed(4);
|
|
12
|
+
const _NEED = ['office-text-extractor', 'pdfjs-dist', 'pdf-lib', 'tesseract.js'];
|
|
13
|
+
const clients = {};
|
|
24
14
|
const errorMessage = 'Invalid input data.';
|
|
25
|
-
const getTextFromBatch = b => b.responses.map(p => p?.fullTextAnnotation?.text || '');
|
|
26
|
-
const DOCUMENT_TEXT_DETECTION = 'DOCUMENT_TEXT_DETECTION';
|
|
27
|
-
const features = [{ type: DOCUMENT_TEXT_DETECTION }];
|
|
28
|
-
const mimeType = 'application/pdf';
|
|
29
|
-
const pages = [1, 2, 3, 4, 5]; // max 5 pages limit for batchAnnotateFiles API
|
|
30
15
|
const log = content => _log(content, import.meta.url);
|
|
31
|
-
|
|
32
|
-
|
|
16
|
+
const [DEFAULT_LANG, GOOGLE_MISTRAL, MISTRAL_OCR_MODEL]
|
|
17
|
+
= ['eng', 'GOOGLE_MISTRAL', 'mistral-ocr-2505'];
|
|
33
18
|
|
|
34
19
|
const init = async (options) => {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
20
|
+
const provider = ensureString(options?.provider || GOOGLE_MISTRAL, { case: 'UP' });
|
|
21
|
+
switch (provider) {
|
|
22
|
+
case GOOGLE_MISTRAL:
|
|
23
|
+
assert(
|
|
24
|
+
options.credentials && options.project,
|
|
25
|
+
'Google credentials and project must be set.'
|
|
26
|
+
);
|
|
27
|
+
clients[provider] = {
|
|
28
|
+
auth: await getGoogleAuthByCredentials(options.credentials),
|
|
29
|
+
project: options?.project,
|
|
30
|
+
region: options?.region || 'us-central1',
|
|
31
|
+
model: options?.model || MISTRAL_OCR_MODEL,
|
|
32
|
+
};
|
|
33
|
+
break;
|
|
34
|
+
default:
|
|
35
|
+
throw new Error('Invalid provider.');
|
|
42
36
|
}
|
|
43
|
-
|
|
44
|
-
client || await checkTesseract(),
|
|
45
|
-
'Vision API client has not been initialized.', 501
|
|
46
|
-
);
|
|
47
|
-
return client;
|
|
37
|
+
return clients;
|
|
48
38
|
};
|
|
49
39
|
|
|
50
40
|
const parseOfficeFile = async (source, options) => {
|
|
@@ -90,34 +80,9 @@ const parseOfficeFile = async (source, options) => {
|
|
|
90
80
|
}
|
|
91
81
|
};
|
|
92
82
|
|
|
93
|
-
const checkTesseract = async (options) => {
|
|
94
|
-
const result = !!(await ignoreErrFunc(() => need('tesseract.js')));
|
|
95
|
-
options?.assert && assert(result, 'Tesseract API is not available.', 500);
|
|
96
|
-
return result;
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
const ocrImageGoogle = async (image, options) => {
|
|
100
|
-
assert(client, 'Vision API has not been initialized.', 500);
|
|
101
|
-
const { content, cleanup } = await convert(image, {
|
|
102
|
-
input: options?.input, expected: FILE, errorMessage,
|
|
103
|
-
withCleanupFunc: true,
|
|
104
|
-
});
|
|
105
|
-
const [response] = await client.textDetection(content);
|
|
106
|
-
await cleanup();
|
|
107
|
-
let detections = response.textAnnotations;
|
|
108
|
-
if (!options?.raw && detections[0]) {
|
|
109
|
-
detections = {
|
|
110
|
-
description: detections[0].description,
|
|
111
|
-
score: detections[0].score,
|
|
112
|
-
vertices: detections[0].boundingPoly.vertices,
|
|
113
|
-
};
|
|
114
|
-
}
|
|
115
|
-
return detections;
|
|
116
|
-
};
|
|
117
|
-
|
|
118
83
|
// https://github.com/naptha/tesseract.js#tesseractjs
|
|
119
84
|
// https://github.com/naptha/tesseract.js/blob/master/docs/image-format.md
|
|
120
|
-
const
|
|
85
|
+
const ocrImage = async (image, options) => {
|
|
121
86
|
const [content, lang, { createWorker }] = [
|
|
122
87
|
await convert(image, { input: options?.input, expected: BUFFER, errorMessage }),
|
|
123
88
|
ensureArray(options?.lang || DEFAULT_LANG).join('+'),
|
|
@@ -132,115 +97,28 @@ const ocrImageTesseract = async (image, options) => {
|
|
|
132
97
|
return options?.raw ? resp : resp.data.text;
|
|
133
98
|
};
|
|
134
99
|
|
|
135
|
-
const
|
|
136
|
-
let
|
|
137
|
-
if (
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
};
|
|
142
|
-
|
|
143
|
-
const annotateImage = async (image, options) => {
|
|
144
|
-
assert(client, 'Vision API has not been initialized.', 500);
|
|
145
|
-
const content = await convert(image, {
|
|
146
|
-
input: options?.input, expected: BASE64, errorMessage,
|
|
147
|
-
});
|
|
148
|
-
const [response] = await client.objectLocalization({ image: { content } });
|
|
149
|
-
let objects = response.localizedObjectAnnotations;
|
|
150
|
-
if (!options?.raw) {
|
|
151
|
-
objects = objects.map(x => ({
|
|
152
|
-
description: x.name,
|
|
153
|
-
score: x.score,
|
|
154
|
-
vertices: x.boundingPoly.normalizedVertices,
|
|
155
|
-
}));
|
|
156
|
-
}
|
|
157
|
-
return objects;
|
|
158
|
-
};
|
|
159
|
-
|
|
160
|
-
const see = async (image, options) => {
|
|
161
|
-
const [text, objects] = await Promise.all([
|
|
162
|
-
ocrImage(image, options), annotateImage(image, options),
|
|
163
|
-
]);
|
|
164
|
-
let result = { text, objects };
|
|
165
|
-
if (!options?.raw) {
|
|
166
|
-
result = [];
|
|
167
|
-
if (text?.description) {
|
|
168
|
-
result.push('text:', text.description);
|
|
169
|
-
}
|
|
170
|
-
if (objects.length) {
|
|
171
|
-
result.push('', 'objects:', ...objects.map(x => [
|
|
172
|
-
`- ${x.description}`, `score: ${ceil(x.score)}`,
|
|
173
|
-
`vertices: ${x.vertices.map(
|
|
174
|
-
l => `(${ceil(l.x)}, ${ceil(l.y)})`
|
|
175
|
-
).join(' ')}`,
|
|
176
|
-
].join('\n')));
|
|
177
|
-
}
|
|
178
|
-
result = trim(result.join('\n'));
|
|
100
|
+
const getPdfPage = async (doc, pages) => {
|
|
101
|
+
let [min, max, multiple] = [1, doc.numPages, Array.isArray(pages)];
|
|
102
|
+
if (!pages) {
|
|
103
|
+
pages = [];
|
|
104
|
+
for (let i = min; i <= max; i++) { pages.push(i); }
|
|
105
|
+
multiple = true;
|
|
179
106
|
}
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
107
|
+
pages = ensureArray(pages).map(
|
|
108
|
+
x => x >= min && x <= max ? ~~x : null
|
|
109
|
+
).filter(x => x);
|
|
110
|
+
assert(pages.length, 'Invalid page numbers.');
|
|
111
|
+
const result = await Promise.all(pages.map(p => (async p => {
|
|
112
|
+
const page = await doc.getPage(p);
|
|
113
|
+
const viewport = page.getViewport({ scale: 1.0 });
|
|
114
|
+
const res = {
|
|
115
|
+
pageNum: p, width: viewport.width, height: viewport.height,
|
|
116
|
+
content: (await page.getTextContent()).items.map(x => x.str).join(' '),
|
|
189
117
|
}
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
const result = await client.batchAnnotateFiles({
|
|
195
|
-
requests: [{ inputConfig: { mimeType, content }, features, pages }],
|
|
196
|
-
});
|
|
197
|
-
return options?.raw ? result : getTextFromBatch(result[0].responses[0]);
|
|
198
|
-
};
|
|
199
|
-
|
|
200
|
-
const readAll = async (image, options) => {
|
|
201
|
-
assert(client, 'Vision API has not been initialized.', 500);
|
|
202
|
-
const result = {};
|
|
203
|
-
result.upload = await uploadToCloud(image, {
|
|
204
|
-
destination: path.join(options?.prefix || '_vision', `${uuidv4()}.pdf`),
|
|
205
|
-
...options || {},
|
|
206
|
-
});
|
|
207
|
-
const uri = result.upload?.gs;
|
|
208
|
-
const destination = `${uri}_result/`;
|
|
209
|
-
const resultId = getIdByGs(destination);
|
|
210
|
-
result.clear = await deleteOnCloud(resultId);
|
|
211
|
-
result.submit = await client.asyncBatchAnnotateFiles({
|
|
212
|
-
requests: [{
|
|
213
|
-
inputConfig: { mimeType, gcsSource: { uri } },
|
|
214
|
-
outputConfig: { gcsDestination: { uri: destination } }, features,
|
|
215
|
-
}],
|
|
216
|
-
});
|
|
217
|
-
result.response = await result.submit[0].promise();
|
|
218
|
-
result.result = await downloadFromCloud(resultId, { expected: 'JSON' });
|
|
219
|
-
options?.keep || (result.cleanup = await Promise.all(
|
|
220
|
-
[getIdByGs(uri), resultId].map(deleteOnCloud)
|
|
221
|
-
));
|
|
222
|
-
return options?.raw ? result : Object.keys(result.result).map(
|
|
223
|
-
f => getTextFromBatch(result.result[f])
|
|
224
|
-
).flat();
|
|
225
|
-
};
|
|
226
|
-
|
|
227
|
-
const getPdfPage = async (doc, pageNum) => {
|
|
228
|
-
const page = await doc.getPage(pageNum);
|
|
229
|
-
const viewport = page.getViewport({ scale: 1.0 });
|
|
230
|
-
const result = {
|
|
231
|
-
pageNum: pageNum,
|
|
232
|
-
width: viewport.width,
|
|
233
|
-
height: viewport.height,
|
|
234
|
-
content: (await page.getTextContent()).items.map(x => x.str).join(' '),
|
|
235
|
-
};
|
|
236
|
-
page.cleanup();
|
|
237
|
-
return result
|
|
238
|
-
};
|
|
239
|
-
|
|
240
|
-
const getPdfPages = async (doc) => {
|
|
241
|
-
const result = [];
|
|
242
|
-
for (let i = 1; i <= doc.numPages; i++) { result.push(getPdfPage(doc, i)); }
|
|
243
|
-
return await Promise.all(result);
|
|
118
|
+
page.cleanup();
|
|
119
|
+
return res;
|
|
120
|
+
})(p)));
|
|
121
|
+
return multiple ? result : result[0];
|
|
244
122
|
};
|
|
245
123
|
|
|
246
124
|
// https://github.com/mozilla/pdf.js/blob/master/examples/node/getinfo.mjs
|
|
@@ -249,26 +127,121 @@ const getPdfInfo = async (file, options) => {
|
|
|
249
127
|
const doc = await getDocument(file).promise;
|
|
250
128
|
const data = await doc.getMetadata();
|
|
251
129
|
const result = {
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
pages: options?.withPages ? await getPdfPages(doc) : null,
|
|
130
|
+
info: data.info, metadata: { ...data.metadata?.getAll() },
|
|
131
|
+
numPages: doc.numPages, ...options.withDoc ? { doc } : {},
|
|
132
|
+
pages: options?.withPages ? await getPdfPage(doc) : null,
|
|
256
133
|
};
|
|
257
134
|
return result;
|
|
258
135
|
};
|
|
259
136
|
|
|
137
|
+
const ocr = async (file, options = {}) => {
|
|
138
|
+
let provider = ensureString(options?.provider, { case: 'UP' });
|
|
139
|
+
if (!provider && clients?.[GOOGLE_MISTRAL]) {
|
|
140
|
+
provider = GOOGLE_MISTRAL;
|
|
141
|
+
} else if (!provider && Object.keys(clients).length) {
|
|
142
|
+
provider = Object.keys(clients)[0];
|
|
143
|
+
}
|
|
144
|
+
const client = clients?.[provider];
|
|
145
|
+
assert(client, 'No available OCR provider.');
|
|
146
|
+
const model = options?.model || client.model;
|
|
147
|
+
switch (provider) {
|
|
148
|
+
case GOOGLE_MISTRAL:
|
|
149
|
+
const key = await getGoogleAuthTokenByAuth(client.auth);
|
|
150
|
+
const inputPdfs = await splitPdf(file, {
|
|
151
|
+
...options, expected: DATAURL, size: 2,
|
|
152
|
+
});
|
|
153
|
+
const resps = (await Promise.all(inputPdfs.map(
|
|
154
|
+
async document_url => await (await fetch(
|
|
155
|
+
`https://${client.region}-aiplatform.googleapis.com/v1/`
|
|
156
|
+
+ `projects/${client.project}/locations/${client.region}/`
|
|
157
|
+
+ `publishers/mistralai/models/${model}:rawPredict`, {
|
|
158
|
+
method: 'POST', headers: {
|
|
159
|
+
'Content-Type': 'application/json',
|
|
160
|
+
'Authorization': `Bearer ${key}`
|
|
161
|
+
}, body: JSON.stringify({
|
|
162
|
+
model, include_image_base64: true,
|
|
163
|
+
document: { type: 'document_url', document_url },
|
|
164
|
+
})
|
|
165
|
+
})).json()
|
|
166
|
+
))).filter(x => x?.pages?.length);
|
|
167
|
+
const resp = {
|
|
168
|
+
pages: [], usage_info: { pages_processed: 0, doc_size_bytes: 0 }
|
|
169
|
+
};
|
|
170
|
+
resps.map(x => {
|
|
171
|
+
x.pages.map(p => {
|
|
172
|
+
p.index = resp.pages.length;
|
|
173
|
+
resp.pages.push(p);
|
|
174
|
+
p.images.map(i => {
|
|
175
|
+
const oId = i.id;
|
|
176
|
+
i.id = `page-${p.index}-${oId}`;
|
|
177
|
+
p.markdown = p.markdown.replaceAll(
|
|
178
|
+
``, ``
|
|
179
|
+
);
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
resp.model = x.model;
|
|
183
|
+
resp.usage_info.pages_processed += x.usage_info.pages_processed;
|
|
184
|
+
resp.usage_info.doc_size_bytes += x.usage_info.doc_size_bytes;
|
|
185
|
+
});
|
|
186
|
+
if (options?.raw) { return resp; }
|
|
187
|
+
else if (options?.paging) { return resp.pages; }
|
|
188
|
+
const markdown = [];
|
|
189
|
+
resp.images = {};
|
|
190
|
+
for (const p of resp.pages) {
|
|
191
|
+
markdown.push(p.markdown);
|
|
192
|
+
await Promise.all(p.images.map(async i => {
|
|
193
|
+
const id = i.id;
|
|
194
|
+
i.width = i.bottom_right_x - i.top_left_x;
|
|
195
|
+
i.height = i.bottom_right_y - i.top_left_y;
|
|
196
|
+
i.annotation = i.image_annotation;
|
|
197
|
+
i.data = await convert(i.image_base64, {
|
|
198
|
+
...options, input: 'DATAURL',
|
|
199
|
+
});
|
|
200
|
+
[
|
|
201
|
+
'id', 'image_annotation', 'image_base64', 'top_left_x',
|
|
202
|
+
'top_left_y', 'bottom_right_x', 'bottom_right_y',
|
|
203
|
+
].map(k => delete i[k]);
|
|
204
|
+
resp.images[id] = i;
|
|
205
|
+
}));
|
|
206
|
+
}
|
|
207
|
+
resp.text = markdown.join('\n\n');
|
|
208
|
+
delete resp.pages;
|
|
209
|
+
return resp;
|
|
210
|
+
default:
|
|
211
|
+
throw new Error('Invalid provider.');
|
|
212
|
+
}
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
const splitPdf = async (file, options) => {
|
|
216
|
+
const [content, { PDFDocument }] = await Promise.all([
|
|
217
|
+
convert(file, { ...options, expected: BUFFER }), need('pdf-lib')
|
|
218
|
+
]);
|
|
219
|
+
const [doc, result] = [await PDFDocument.load(content), []];
|
|
220
|
+
const count = doc.getPageCount();
|
|
221
|
+
const size = ~~options?.size || Infinity;
|
|
222
|
+
for (let i = 0; i < count; i += size) {
|
|
223
|
+
result.push((async () => {
|
|
224
|
+
const sub = await PDFDocument.create();
|
|
225
|
+
const copied = await sub.copyPages(doc, Array.from(
|
|
226
|
+
{ length: Math.min(size, count - i) }, (_, j) => i + j
|
|
227
|
+
));
|
|
228
|
+
copied.forEach(page => sub.addPage(page));
|
|
229
|
+
return await convert(Buffer.from(await sub.save()), {
|
|
230
|
+
...options, input: 'BUFFER',
|
|
231
|
+
});
|
|
232
|
+
})());
|
|
233
|
+
}
|
|
234
|
+
return await Promise.all(result);
|
|
235
|
+
};
|
|
236
|
+
|
|
237
|
+
export default init;
|
|
260
238
|
export {
|
|
261
239
|
_NEED,
|
|
262
|
-
annotateImage,
|
|
263
240
|
getPdfInfo,
|
|
264
241
|
getPdfPage,
|
|
265
|
-
getPdfPages,
|
|
266
242
|
init,
|
|
243
|
+
ocr,
|
|
267
244
|
ocrImage,
|
|
268
|
-
ocrImageGoogle,
|
|
269
|
-
ocrImageTesseract,
|
|
270
245
|
parseOfficeFile,
|
|
271
|
-
|
|
272
|
-
readAll,
|
|
273
|
-
see
|
|
246
|
+
splitPdf,
|
|
274
247
|
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "2000.3.
|
|
4
|
+
"version": "2000.3.17",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -39,7 +39,6 @@
|
|
|
39
39
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
40
40
|
"@google-cloud/speech": "^7.2.1",
|
|
41
41
|
"@google-cloud/storage": "^7.17.3",
|
|
42
|
-
"@google-cloud/vision": "^5.3.4",
|
|
43
42
|
"@google/genai": "^1.30.0",
|
|
44
43
|
"@mozilla/readability": "github:mozilla/readability",
|
|
45
44
|
"@sentry/node": "^10.26.0",
|
|
@@ -63,6 +62,7 @@
|
|
|
63
62
|
"office-text-extractor": "^3.0.3",
|
|
64
63
|
"openai": "^6.9.1",
|
|
65
64
|
"pdfjs-dist": "^5.4.394",
|
|
65
|
+
"pdf-lib": "^1.17.1",
|
|
66
66
|
"pg": "^8.16.3",
|
|
67
67
|
"pgvector": "^0.2.1",
|
|
68
68
|
"ping": "^1.0.0",
|