utilitas 2000.3.14 → 2000.3.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/alan.mjs CHANGED
@@ -45,7 +45,7 @@ You may be provided with some tools(functions) to help you gather information an
45
45
  const _NEED = ['js-tiktoken', 'OpenAI'];
46
46
 
47
47
  const [
48
- OPENAI, GEMINI, OLLAMA, NOVA, DEEPSEEK_R1, MD_CODE, CLOUD_SONNET_45, AUDIO,
48
+ OPENAI, GEMINI, OLLAMA, NOVA, DEEPSEEK_R1, MD_CODE, CLOUD_OPUS_45, AUDIO,
49
49
  WAV, ATTACHMENTS, OPENAI_VOICE, GPT_REASONING_EFFORT, THINK, THINK_STR,
50
50
  THINK_END, TOOLS_STR, TOOLS_END, TOOLS, TEXT, OK, FUNC, GPT_51,
51
51
  GPT_51_CODEX, GPT_5_IMAGE, GEMMA_3_27B, ANTHROPIC, v8k, ais,
@@ -57,7 +57,7 @@ const [
57
57
  GEMINI_30_PRO, GEMINI_25_FLASH,
58
58
  ] = [
59
59
  'OpenAI', 'Gemini', 'Ollama', 'nova', 'deepseek-r1', '```',
60
- 'claude-sonnet-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
60
+ 'claude-opus-4.5', 'audio', 'wav', '[ATTACHMENTS]', 'OPENAI_VOICE',
61
61
  'medium', 'think', '<think>', '</think>', '<tools>', '</tools>',
62
62
  'tools', 'text', 'OK', 'function', 'gpt-5.1', 'gpt-5.1-codex',
63
63
  'gpt-5-image', 'gemma3:27b', 'Anthropic', 7680 * 4320, [], 30,
@@ -160,7 +160,7 @@ const MODELS = {
160
160
  },
161
161
  [DEEPSEEK_R1]: DEEPSEEK_R1_RULES,
162
162
  [SF_DEEPSEEK_R1]: { ...DEEPSEEK_R1_RULES, defaultProvider: SILICONFLOW },
163
- [CLOUD_SONNET_45]: {
163
+ [CLOUD_OPUS_45]: {
164
164
  source: S_ANTHROPIC, icon: '✳️',
165
165
  contextWindow: kT(200), maxOutputTokens: kT(64),
166
166
  documentCostTokens: 3000 * 10, maxDocumentFile: m(32),
@@ -1053,7 +1053,7 @@ const distillFile = async (attachments, o) => {
1053
1053
  'You are an intelligent document analyzer.',
1054
1054
  '- You will receive various multimedia files, including images, audio, and videos.',
1055
1055
  '- Please analyze these documents, extract the information, and organize it into an easy-to-read format.',
1056
- '- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present. You can use markdown table formatting to present table data. Please mark the description of images in the same position as the original text without creating separate paragraphs for descriptions. Be sure ONLY describe important images and graphs, and ignore backgrounds and decorative small images.',
1056
+ '- For document-type files or image files primarily containing text information, act as a document scanner, return the text content, and describe any important images and tables present. Use markdown to format table and other rich text where possible. Use LaTeX for all formulas, subscripts, representations of formulas, and special symbols in mathematics and chemistry, enclosed by "$" symbols. Please mark the description of images in the same position as the original text without creating separate paragraphs for descriptions. Be sure ONLY describe important images and graphs, and ignore backgrounds and decorative small images. Ensure the returned document is clean, well-organized, and highly readable.',
1057
1057
  '- For audio files, please provide a transcript of the spoken voices. If there are background noises or music, attempt to briefly describe the environmental sounds and music sections.',
1058
1058
  '- For images or video files that are not primarily text-based, describe the tragic scene you observe, highlight key details, convey the emotional tone of the setting, and share your impressions.',
1059
1059
  '- For video files, please describe the content, including the theme, subjects, characters, scenes, objects, storyline, and emotional tone.',
@@ -1144,7 +1144,7 @@ export {
1144
1144
  _NEED,
1145
1145
  _NO_RENDER,
1146
1146
  ATTACHMENTS,
1147
- CLOUD_SONNET_45,
1147
+ CLOUD_OPUS_45,
1148
1148
  CODE_INTERPRETER,
1149
1149
  DEEPSEEK_R1,
1150
1150
  DEFAULT_MODELS,
@@ -10,10 +10,7 @@ import { base64Decode, base64Encode, ensureString, hexEncode, need } from './uti
10
10
  import { networkInterfaces } from 'os';
11
11
 
12
12
  const _NEED = [
13
- '@google-cloud/speech',
14
- '@google-cloud/text-to-speech',
15
- '@google-cloud/vision',
16
- 'google-gax',
13
+ '@google-cloud/speech', '@google-cloud/text-to-speech', 'google-gax',
17
14
  ];
18
15
 
19
16
  const defaultAlgorithm = 'sha256';
@@ -58,16 +55,6 @@ const hexToBigInt = (hex) => {
58
55
  return BigInt(hex, 16).toString(10);
59
56
  };
60
57
 
61
- const getApiKeyCredentials = async (options) => {
62
- // Included in @google-cloud/vision, @google-cloud/speech and @google-cloud/text-to-speech
63
- const { GoogleAuth, grpc } = await need('google-gax');
64
- const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
65
- return grpc.credentials.combineChannelCredentials(
66
- grpc.credentials.createSsl(),
67
- grpc.credentials.createFromGoogleCredential(authClient)
68
- );
69
- };
70
-
71
58
  // Default 256-bit key: (256 / 8 = 32) bytes * 8 bits/byte = 256 bits
72
59
  const aesCreateKey = (options) => {
73
60
  const key = _upkKey(options?.key) || random((options?.length || 256) / 8);
@@ -124,6 +111,30 @@ const aesDecrypt = (any, options) => {
124
111
  return decrypted;
125
112
  };
126
113
 
114
+ const getGoogleApiKeyCredentials = async (options) => {
115
+ // Included in @google-cloud/speech and @google-cloud/text-to-speech
116
+ const { GoogleAuth, grpc } = await need('google-gax');
117
+ const authClient = new GoogleAuth().fromAPIKey(options?.apiKey);
118
+ return grpc.credentials.combineChannelCredentials(
119
+ grpc.credentials.createSsl(),
120
+ grpc.credentials.createFromGoogleCredential(authClient)
121
+ );
122
+ };
123
+
124
+ const getGoogleAuthByCredentials = async (keyFilename) => {
125
+ const { GoogleAuth } = await need('google-gax');
126
+ return (new GoogleAuth({
127
+ keyFilename, scopes: ['https://www.googleapis.com/auth/cloud-platform'],
128
+ })).getClient();
129
+ };
130
+
131
+ const getGoogleAuthTokenByAuth = async (auth) => {
132
+ const resp = await auth.getAccessToken();
133
+ const token = resp?.token || null;
134
+ assert(token, 'Failed to get Google API token.');
135
+ return token;
136
+ }
137
+
127
138
  export {
128
139
  _NEED,
129
140
  aesCreateIv,
@@ -133,7 +144,9 @@ export {
133
144
  defaultAlgorithm,
134
145
  defaultEncryption,
135
146
  digestObject,
136
- getApiKeyCredentials,
147
+ getGoogleApiKeyCredentials,
148
+ getGoogleAuthByCredentials,
149
+ getGoogleAuthTokenByAuth,
137
150
  getSortedQueryString,
138
151
  hash as sha256,
139
152
  hash,
package/lib/manifest.mjs CHANGED
@@ -1,7 +1,7 @@
1
1
  const manifest = {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.14",
4
+ "version": "2000.3.16",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
@@ -28,7 +28,6 @@ const manifest = {
28
28
  "@ffprobe-installer/ffprobe": "^2.1.2",
29
29
  "@google-cloud/speech": "^7.2.1",
30
30
  "@google-cloud/storage": "^7.17.3",
31
- "@google-cloud/vision": "^5.3.4",
32
31
  "@google/genai": "^1.30.0",
33
32
  "@mozilla/readability": "github:mozilla/readability",
34
33
  "@sentry/node": "^10.26.0",
package/lib/speech.mjs CHANGED
@@ -1,5 +1,5 @@
1
1
  import { DEFAULT_MODELS, OPENAI_VOICE, countTokens, k } from './alan.mjs';
2
- import { getApiKeyCredentials, hash } from './encryption.mjs';
2
+ import { getGoogleApiKeyCredentials, hash } from './encryption.mjs';
3
3
  import { getFfmpeg, packPcmToWav } from './media.mjs';
4
4
  import { get } from './web.mjs';
5
5
  import { convert, getTempPath } from './storage.mjs';
@@ -124,7 +124,7 @@ const init = async (options) => {
124
124
  }
125
125
  if (options?.stt) {
126
126
  const stt = (await need('@google-cloud/speech')).default;
127
- const sslCreds = await getApiKeyCredentials(options);
127
+ const sslCreds = await getGoogleApiKeyCredentials(options);
128
128
  clients.stt = new stt.SpeechClient({ sslCreds });
129
129
  }
130
130
  break;
package/lib/vision.mjs CHANGED
@@ -1,50 +1,40 @@
1
1
  import {
2
- convert, deleteOnCloud, downloadFromCloud, getIdByGs, uploadToCloud,
3
- } from './storage.mjs';
2
+ log as _log, ensureArray, ensureString, need, throwError,
3
+ } from './utilitas.mjs';
4
4
 
5
5
  import {
6
- log as _log,
7
- ensureArray, ensureString, ignoreErrFunc,
8
- need, throwError,
9
- trim,
10
- } from './utilitas.mjs';
6
+ getGoogleAuthByCredentials, getGoogleAuthTokenByAuth,
7
+ } from './encryption.mjs';
11
8
 
9
+ import { convert, DATAURL, BUFFER, FILE } from './storage.mjs';
12
10
  import fs from 'node:fs';
13
- import path from 'node:path';
14
- import { v4 as uuidv4 } from 'uuid';
15
- import { getApiKeyCredentials } from './encryption.mjs';
16
-
17
- const _NEED = [
18
- '@google-cloud/vision', 'office-text-extractor', 'pdfjs-dist',
19
- 'tesseract.js',
20
- ];
21
11
 
22
- const [BASE64, BUFFER, FILE, DEFAULT_LANG] = ['BASE64', 'BUFFER', 'FILE', 'eng'];
23
- const ceil = num => num.toFixed(4);
12
+ const _NEED = ['office-text-extractor', 'pdfjs-dist', 'tesseract.js'];
13
+ const clients = {};
24
14
  const errorMessage = 'Invalid input data.';
25
- const getTextFromBatch = b => b.responses.map(p => p?.fullTextAnnotation?.text || '');
26
- const DOCUMENT_TEXT_DETECTION = 'DOCUMENT_TEXT_DETECTION';
27
- const features = [{ type: DOCUMENT_TEXT_DETECTION }];
28
- const mimeType = 'application/pdf';
29
- const pages = [1, 2, 3, 4, 5]; // max 5 pages limit for batchAnnotateFiles API
30
15
  const log = content => _log(content, import.meta.url);
31
-
32
- let client;
16
+ const [DEFAULT_LANG, GOOGLE_MISTRAL, MISTRAL_OCR_MODEL]
17
+ = ['eng', 'GOOGLE_MISTRAL', 'mistral-ocr-2505'];
33
18
 
34
19
  const init = async (options) => {
35
- if (options) {
36
- if (options?.credentials || options?.apiKey) {
37
- const vision = (await need('@google-cloud/vision')).default;
38
- client = new vision.ImageAnnotatorClient(options?.apiKey ? {
39
- sslCreds: await getApiKeyCredentials(options)
40
- } : options);
41
- } else { await checkTesseract({ assert: true }); }
20
+ const provider = ensureString(options?.provider || GOOGLE_MISTRAL, { case: 'UP' });
21
+ switch (provider) {
22
+ case GOOGLE_MISTRAL:
23
+ assert(
24
+ options.credentials && options.project,
25
+ 'Google credentials and project must be set.'
26
+ );
27
+ clients[provider] = {
28
+ auth: await getGoogleAuthByCredentials(options.credentials),
29
+ project: options?.project,
30
+ region: options?.region || 'us-central1',
31
+ model: options?.model || MISTRAL_OCR_MODEL,
32
+ };
33
+ break;
34
+ default:
35
+ throw new Error('Invalid provider.');
42
36
  }
43
- assert(
44
- client || await checkTesseract(),
45
- 'Vision API client has not been initialized.', 501
46
- );
47
- return client;
37
+ return clients;
48
38
  };
49
39
 
50
40
  const parseOfficeFile = async (source, options) => {
@@ -90,34 +80,9 @@ const parseOfficeFile = async (source, options) => {
90
80
  }
91
81
  };
92
82
 
93
- const checkTesseract = async (options) => {
94
- const result = !!(await ignoreErrFunc(() => need('tesseract.js')));
95
- options?.assert && assert(result, 'Tesseract API is not available.', 500);
96
- return result;
97
- };
98
-
99
- const ocrImageGoogle = async (image, options) => {
100
- assert(client, 'Vision API has not been initialized.', 500);
101
- const { content, cleanup } = await convert(image, {
102
- input: options?.input, expected: FILE, errorMessage,
103
- withCleanupFunc: true,
104
- });
105
- const [response] = await client.textDetection(content);
106
- await cleanup();
107
- let detections = response.textAnnotations;
108
- if (!options?.raw && detections[0]) {
109
- detections = {
110
- description: detections[0].description,
111
- score: detections[0].score,
112
- vertices: detections[0].boundingPoly.vertices,
113
- };
114
- }
115
- return detections;
116
- };
117
-
118
83
  // https://github.com/naptha/tesseract.js#tesseractjs
119
84
  // https://github.com/naptha/tesseract.js/blob/master/docs/image-format.md
120
- const ocrImageTesseract = async (image, options) => {
85
+ const ocrImage = async (image, options) => {
121
86
  const [content, lang, { createWorker }] = [
122
87
  await convert(image, { input: options?.input, expected: BUFFER, errorMessage }),
123
88
  ensureArray(options?.lang || DEFAULT_LANG).join('+'),
@@ -132,115 +97,28 @@ const ocrImageTesseract = async (image, options) => {
132
97
  return options?.raw ? resp : resp.data.text;
133
98
  };
134
99
 
135
- const ocrImage = async (image, options) => {
136
- let engine;
137
- if (client) { engine = ocrImageGoogle; }
138
- else if (await checkTesseract()) { engine = ocrImageTesseract; }
139
- else { throwError('Vision engine has not been initialized.', 500); }
140
- return await engine(image, options);
141
- };
142
-
143
- const annotateImage = async (image, options) => {
144
- assert(client, 'Vision API has not been initialized.', 500);
145
- const content = await convert(image, {
146
- input: options?.input, expected: BASE64, errorMessage,
147
- });
148
- const [response] = await client.objectLocalization({ image: { content } });
149
- let objects = response.localizedObjectAnnotations;
150
- if (!options?.raw) {
151
- objects = objects.map(x => ({
152
- description: x.name,
153
- score: x.score,
154
- vertices: x.boundingPoly.normalizedVertices,
155
- }));
100
+ const getPdfPage = async (doc, pages) => {
101
+ let [min, max, multiple] = [1, doc.numPages, Array.isArray(pages)];
102
+ if (!pages) {
103
+ pages = [];
104
+ for (let i = min; i <= max; i++) { pages.push(i); }
105
+ multiple = true;
156
106
  }
157
- return objects;
158
- };
159
-
160
- const see = async (image, options) => {
161
- const [text, objects] = await Promise.all([
162
- ocrImage(image, options), annotateImage(image, options),
163
- ]);
164
- let result = { text, objects };
165
- if (!options?.raw) {
166
- result = [];
167
- if (text?.description) {
168
- result.push('text:', text.description);
107
+ pages = ensureArray(pages).map(
108
+ x => x >= min && x <= max ? ~~x : null
109
+ ).filter(x => x);
110
+ assert(pages.length, 'Invalid page numbers.');
111
+ const result = await Promise.all(pages.map(p => (async p => {
112
+ const page = await doc.getPage(p);
113
+ const viewport = page.getViewport({ scale: 1.0 });
114
+ const res = {
115
+ pageNum: p, width: viewport.width, height: viewport.height,
116
+ content: (await page.getTextContent()).items.map(x => x.str).join(' '),
169
117
  }
170
- if (objects.length) {
171
- result.push('', 'objects:', ...objects.map(x => [
172
- `- ${x.description}`, `score: ${ceil(x.score)}`,
173
- `vertices: ${x.vertices.map(
174
- l => `(${ceil(l.x)}, ${ceil(l.y)})`
175
- ).join(' ')}`,
176
- ].join('\n')));
177
- }
178
- result = trim(result.join('\n'));
179
- }
180
- return result;
181
- };
182
-
183
- const read = async (image, options) => {
184
- assert(client, 'Vision API has not been initialized.', 500);
185
- if (options?.allPages) {
186
- assert(options?.input === FILE, 'Only file input is supported.', 400);
187
- if ((await getPdfInfo(image)).numPages > pages.length) {
188
- return await readAll(image, options);
189
- }
190
- }
191
- const content = await convert(image, {
192
- input: options?.input, expected: BASE64, errorMessage,
193
- });
194
- const result = await client.batchAnnotateFiles({
195
- requests: [{ inputConfig: { mimeType, content }, features, pages }],
196
- });
197
- return options?.raw ? result : getTextFromBatch(result[0].responses[0]);
198
- };
199
-
200
- const readAll = async (image, options) => {
201
- assert(client, 'Vision API has not been initialized.', 500);
202
- const result = {};
203
- result.upload = await uploadToCloud(image, {
204
- destination: path.join(options?.prefix || '_vision', `${uuidv4()}.pdf`),
205
- ...options || {},
206
- });
207
- const uri = result.upload?.gs;
208
- const destination = `${uri}_result/`;
209
- const resultId = getIdByGs(destination);
210
- result.clear = await deleteOnCloud(resultId);
211
- result.submit = await client.asyncBatchAnnotateFiles({
212
- requests: [{
213
- inputConfig: { mimeType, gcsSource: { uri } },
214
- outputConfig: { gcsDestination: { uri: destination } }, features,
215
- }],
216
- });
217
- result.response = await result.submit[0].promise();
218
- result.result = await downloadFromCloud(resultId, { expected: 'JSON' });
219
- options?.keep || (result.cleanup = await Promise.all(
220
- [getIdByGs(uri), resultId].map(deleteOnCloud)
221
- ));
222
- return options?.raw ? result : Object.keys(result.result).map(
223
- f => getTextFromBatch(result.result[f])
224
- ).flat();
225
- };
226
-
227
- const getPdfPage = async (doc, pageNum) => {
228
- const page = await doc.getPage(pageNum);
229
- const viewport = page.getViewport({ scale: 1.0 });
230
- const result = {
231
- pageNum: pageNum,
232
- width: viewport.width,
233
- height: viewport.height,
234
- content: (await page.getTextContent()).items.map(x => x.str).join(' '),
235
- };
236
- page.cleanup();
237
- return result
238
- };
239
-
240
- const getPdfPages = async (doc) => {
241
- const result = [];
242
- for (let i = 1; i <= doc.numPages; i++) { result.push(getPdfPage(doc, i)); }
243
- return await Promise.all(result);
118
+ page.cleanup();
119
+ return res;
120
+ })(p)));
121
+ return multiple ? result : result[0];
244
122
  };
245
123
 
246
124
  // https://github.com/mozilla/pdf.js/blob/master/examples/node/getinfo.mjs
@@ -249,26 +127,51 @@ const getPdfInfo = async (file, options) => {
249
127
  const doc = await getDocument(file).promise;
250
128
  const data = await doc.getMetadata();
251
129
  const result = {
252
- numPages: doc.numPages,
253
- info: data.info,
254
- metadata: { ...data.metadata?.getAll() },
255
- pages: options?.withPages ? await getPdfPages(doc) : null,
130
+ info: data.info, metadata: { ...data.metadata?.getAll() },
131
+ numPages: doc.numPages, ...options.withDoc ? { doc } : {},
132
+ pages: options?.withPages ? await getPdfPage(doc) : null,
256
133
  };
257
134
  return result;
258
135
  };
259
136
 
137
+ const ocr = async (file, options = {}) => {
138
+ let provider = ensureString(options?.provider, { case: 'UP' });
139
+ if (!provider && clients?.[GOOGLE_MISTRAL]) {
140
+ provider = GOOGLE_MISTRAL;
141
+ } else if (!provider && Object.keys(clients).length) {
142
+ provider = Object.keys(clients)[0];
143
+ }
144
+ const client = clients?.[provider];
145
+ assert(client, 'No available OCR provider.');
146
+ const model = options?.model || client.model;
147
+ const document_url = await convert(file, { ...options, expected: DATAURL });
148
+ switch (provider) {
149
+ case GOOGLE_MISTRAL:
150
+ const key = await getGoogleAuthTokenByAuth(client.auth);
151
+ return await (await fetch(
152
+ `https://${client.region}-aiplatform.googleapis.com/v1/`
153
+ + `projects/${client.project}/locations/${client.region}/`
154
+ + `publishers/mistralai/models/${model}:rawPredict`, {
155
+ method: 'POST', headers: {
156
+ 'Content-Type': 'application/json',
157
+ 'Authorization': `Bearer ${key}`
158
+ }, body: JSON.stringify({
159
+ model, include_image_base64: true,
160
+ document: { type: 'document_url', document_url },
161
+ })
162
+ })).json();
163
+ default:
164
+ throw new Error('Invalid provider.');
165
+ }
166
+ };
167
+
168
+ export default init;
260
169
  export {
261
170
  _NEED,
262
- annotateImage,
263
171
  getPdfInfo,
264
172
  getPdfPage,
265
- getPdfPages,
266
173
  init,
174
+ ocr,
267
175
  ocrImage,
268
- ocrImageGoogle,
269
- ocrImageTesseract,
270
176
  parseOfficeFile,
271
- read,
272
- readAll,
273
- see
274
177
  };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "utilitas",
3
3
  "description": "Just another common utility for JavaScript.",
4
- "version": "2000.3.14",
4
+ "version": "2000.3.16",
5
5
  "private": false,
6
6
  "homepage": "https://github.com/Leask/utilitas",
7
7
  "main": "index.mjs",
@@ -39,7 +39,6 @@
39
39
  "@ffprobe-installer/ffprobe": "^2.1.2",
40
40
  "@google-cloud/speech": "^7.2.1",
41
41
  "@google-cloud/storage": "^7.17.3",
42
- "@google-cloud/vision": "^5.3.4",
43
42
  "@google/genai": "^1.30.0",
44
43
  "@mozilla/readability": "github:mozilla/readability",
45
44
  "@sentry/node": "^10.26.0",