utilitas 1995.2.2 → 1995.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -2
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/manifest.mjs +1 -1
- package/lib/storage.mjs +92 -8
- package/lib/vision.mjs +41 -22
- package/package.json +1 -1
package/lib/manifest.mjs
CHANGED
package/lib/storage.mjs
CHANGED
|
@@ -16,12 +16,13 @@ const _NEED = ['mime-types', '@google-cloud/storage'];
|
|
|
16
16
|
const errorMessage = 'Invalid file.';
|
|
17
17
|
const defaultMetadata = { cacheControl: 'public, max-age=31536000' };
|
|
18
18
|
const getGcUrlByBucket = bucket => `https://storage.cloud.google.com/${bucket}`;
|
|
19
|
+
const getGs = id => `gs://${id}`.replace(/[\/][^\/]*$/, '');
|
|
19
20
|
const mapFilename = name => join(name.substr(0, 2), name.substr(2, 2));
|
|
20
21
|
const [_zip, _unzip] = [__zip, __unzip].map(promisify);
|
|
21
22
|
const log = content => _log(content, import.meta.url);
|
|
22
23
|
|
|
23
|
-
const [NULL, BASE64, BUFFER, FILE, STREAM, encoding, BINARY, mode, dirMode]
|
|
24
|
-
= ['NULL', 'BASE64', 'BUFFER', 'FILE', 'STREAM', 'utf8', 'binary', '0644', '0755'];
|
|
24
|
+
const [NULL, BASE64, BUFFER, FILE, STREAM, TEXT, _JSON, encoding, BINARY, mode, dirMode]
|
|
25
|
+
= ['NULL', 'BASE64', 'BUFFER', 'FILE', 'STREAM', 'TEXT', 'JSON', 'utf8', 'binary', '0644', '0755'];
|
|
25
26
|
|
|
26
27
|
const [encodeBase64, encodeBinary, encodeNull]
|
|
27
28
|
= [{ encoding: BASE64 }, { encoding: BINARY }, { encoding: NULL }];
|
|
@@ -176,6 +177,7 @@ const outputFile = async (buffer, options) => {
|
|
|
176
177
|
return options?.file;
|
|
177
178
|
}
|
|
178
179
|
return await writeTempFile(buffer, {
|
|
180
|
+
filename: options?.filename,
|
|
179
181
|
encoding: _encoding, suffix: options?.suffix,
|
|
180
182
|
});
|
|
181
183
|
};
|
|
@@ -200,8 +202,16 @@ const convert = async (any, options) => {
|
|
|
200
202
|
(Buffer.isBuffer(any) || ArrayBuffer.isArrayBuffer(any)) ? BUFFER : options?.input,
|
|
201
203
|
options?.expected || BUFFER
|
|
202
204
|
].map(x => ensureString(x, { case: 'UP' }));
|
|
203
|
-
|
|
204
|
-
|
|
205
|
+
let [oriFile, meta, subExp] =
|
|
206
|
+
[input === FILE ? any : null, null, expected];
|
|
207
|
+
switch (expected) {
|
|
208
|
+
case STREAM:
|
|
209
|
+
subExp = FILE;
|
|
210
|
+
break;
|
|
211
|
+
case TEXT:
|
|
212
|
+
case _JSON:
|
|
213
|
+
subExp = BUFFER;
|
|
214
|
+
}
|
|
205
215
|
oriFile && (meta = await assertPath(any, 'F', 'R'));
|
|
206
216
|
if (input !== subExp) {
|
|
207
217
|
switch (`${input}-${subExp}`) {
|
|
@@ -214,7 +224,18 @@ const convert = async (any, options) => {
|
|
|
214
224
|
default: throwError('Invalid input or expected format.', 400);
|
|
215
225
|
}
|
|
216
226
|
}
|
|
217
|
-
|
|
227
|
+
switch (expected) {
|
|
228
|
+
case STREAM:
|
|
229
|
+
any = createReadStream(any);
|
|
230
|
+
break;
|
|
231
|
+
case TEXT:
|
|
232
|
+
case _JSON:
|
|
233
|
+
any = any.toString();
|
|
234
|
+
}
|
|
235
|
+
switch (expected) {
|
|
236
|
+
case _JSON:
|
|
237
|
+
any = JSON.parse(any);
|
|
238
|
+
}
|
|
218
239
|
oriFile && subExp !== FILE && options?.cleanup && await tryRm(ori, options);
|
|
219
240
|
return options?.meta ? { meta, content: any } : any;
|
|
220
241
|
};
|
|
@@ -322,16 +343,74 @@ const uploadToCloud = async (data, options) => {
|
|
|
322
343
|
suffix: options?.suffix, ...options || {},
|
|
323
344
|
});
|
|
324
345
|
const raw = await client.upload(file, {
|
|
325
|
-
gzip: true, destination: options?.destination
|
|
346
|
+
gzip: true, destination: options?.destination || join(
|
|
347
|
+
...options?.prefix ? [options.prefix] : [], basename(file)
|
|
348
|
+
), metadata: defaultMetadata, ...options || {},
|
|
326
349
|
});
|
|
327
350
|
const result = options?.raw ? raw : raw[0].metadata;
|
|
328
351
|
input !== FILE && await tryRm(file.path);
|
|
352
|
+
!options?.raw && result && (result.gs = getGs(result?.id));
|
|
353
|
+
return result;
|
|
354
|
+
};
|
|
355
|
+
|
|
356
|
+
const downloadFileFromCloud = async (path, options) => {
|
|
357
|
+
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
358
|
+
let result = await client.file(path).download(options);
|
|
359
|
+
if (options?.raw) { return result; }
|
|
360
|
+
result = await convert(
|
|
361
|
+
result[0], { ...options || {}, input: BUFFER }
|
|
362
|
+
);
|
|
363
|
+
if (options?.text) {
|
|
364
|
+
result = result.toString();
|
|
365
|
+
}
|
|
366
|
+
return result;
|
|
367
|
+
};
|
|
368
|
+
|
|
369
|
+
const downloadFromCloud = async (path, options) => {
|
|
370
|
+
assert(path, 'Path is required.', 400);
|
|
371
|
+
const isFolder = path.endsWith('/');
|
|
372
|
+
const paths = isFolder ? await lsOnCloud(path, { name: true }) : [path];
|
|
373
|
+
const resp = await Promise.all(paths.map(
|
|
374
|
+
x => downloadFileFromCloud(x, options)
|
|
375
|
+
));
|
|
376
|
+
if (!isFolder) { return resp[0]; }
|
|
377
|
+
const result = {};
|
|
378
|
+
for (let i in paths) { result[paths[i]] = resp[i]; }
|
|
329
379
|
return result;
|
|
330
380
|
};
|
|
331
381
|
|
|
332
|
-
const existsOnCloud = async (destination) => {
|
|
382
|
+
const existsOnCloud = async (destination, options) => {
|
|
383
|
+
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
384
|
+
const result = await client.file(destination).exists();
|
|
385
|
+
return options?.raw ? result : (result[0] ? {} : null);
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
const lsOnCloud = async (prefix, options) => {
|
|
333
389
|
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
334
|
-
|
|
390
|
+
let result = await client.getFiles({ prefix, ...options || {} });
|
|
391
|
+
if (options?.raw) { return result; }
|
|
392
|
+
result = (result[0] || []).map(x => x.metadata);
|
|
393
|
+
if (options?.name) { result = result.map(x => x.name); }
|
|
394
|
+
return result;
|
|
395
|
+
};
|
|
396
|
+
|
|
397
|
+
const deleteFileOnCloud = async (path, options) => {
|
|
398
|
+
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
399
|
+
const result = await client.file(path).delete(options);
|
|
400
|
+
return options?.raw ? result : result[0].toJSON();
|
|
401
|
+
};
|
|
402
|
+
|
|
403
|
+
const deleteOnCloud = async (path, options) => {
|
|
404
|
+
assert(path, 'Path is required.', 400);
|
|
405
|
+
const isFolder = path.endsWith('/');
|
|
406
|
+
const paths = isFolder ? await lsOnCloud(path, { name: true }) : [path];
|
|
407
|
+
const resp = await Promise.all(paths.map(
|
|
408
|
+
x => deleteFileOnCloud(x, options)
|
|
409
|
+
));
|
|
410
|
+
if (!isFolder) { return resp[0]; }
|
|
411
|
+
const result = {};
|
|
412
|
+
for (let i in paths) { result[paths[i]] = resp[i]; }
|
|
413
|
+
return result;
|
|
335
414
|
};
|
|
336
415
|
|
|
337
416
|
export {
|
|
@@ -339,6 +418,10 @@ export {
|
|
|
339
418
|
analyzeFile,
|
|
340
419
|
assertPath,
|
|
341
420
|
convert,
|
|
421
|
+
deleteFileOnCloud,
|
|
422
|
+
deleteOnCloud,
|
|
423
|
+
downloadFileFromCloud,
|
|
424
|
+
downloadFromCloud,
|
|
342
425
|
encodeBase64DataURL,
|
|
343
426
|
exists,
|
|
344
427
|
existsOnCloud,
|
|
@@ -350,6 +433,7 @@ export {
|
|
|
350
433
|
init,
|
|
351
434
|
isTextFile,
|
|
352
435
|
legalFilename,
|
|
436
|
+
lsOnCloud,
|
|
353
437
|
mapFilename,
|
|
354
438
|
mergeFile,
|
|
355
439
|
readFile,
|
package/lib/vision.mjs
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
import { convert } from './storage.mjs';
|
|
2
1
|
import { getApiKeyCredentials } from './encryption.mjs';
|
|
3
2
|
|
|
4
3
|
import {
|
|
5
|
-
|
|
4
|
+
convert, deleteOnCloud, downloadFromCloud, uploadToCloud,
|
|
5
|
+
} from './storage.mjs';
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
ensureArray, ignoreErrFunc, log as _log, need, throwError, trim, timeout,
|
|
6
9
|
} from './utilitas.mjs';
|
|
7
10
|
|
|
8
11
|
const _NEED = ['@google-cloud/vision', 'tesseract.js'];
|
|
@@ -15,10 +18,11 @@ let client;
|
|
|
15
18
|
|
|
16
19
|
const init = async (options) => {
|
|
17
20
|
if (options) {
|
|
18
|
-
if (options?.apiKey) {
|
|
19
|
-
const sslCreds = await getApiKeyCredentials(options);
|
|
21
|
+
if (options?.credentials || options?.apiKey) {
|
|
20
22
|
const vision = (await need('@google-cloud/vision')).default;
|
|
21
|
-
client = new vision.ImageAnnotatorClient(
|
|
23
|
+
client = new vision.ImageAnnotatorClient(options?.apiKey ? {
|
|
24
|
+
sslCreds: await getApiKeyCredentials(options)
|
|
25
|
+
} : options);
|
|
22
26
|
} else { await checkTesseract({ assert: true }); }
|
|
23
27
|
}
|
|
24
28
|
assert(
|
|
@@ -133,22 +137,37 @@ const read = async (image, options) => {
|
|
|
133
137
|
return pages;
|
|
134
138
|
};
|
|
135
139
|
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
140
|
+
const readAll = async (image, options) => {
|
|
141
|
+
assert(client, 'Vision API has not been initialized.', 500);
|
|
142
|
+
const result = {};
|
|
143
|
+
result.upload = await uploadToCloud(image, {
|
|
144
|
+
gzip: false, prefix: options?.prefix || '_vision', ...options || {},
|
|
145
|
+
});
|
|
146
|
+
const uri = result.upload?.gs;
|
|
147
|
+
const destination = `${uri}_result/`;
|
|
148
|
+
result.clear = await deleteOnCloud(destination);
|
|
149
|
+
result.submit = await client.asyncBatchAnnotateFiles({
|
|
150
|
+
requests: [{
|
|
151
|
+
inputConfig: { mimeType: 'application/pdf', gcsSource: { uri } },
|
|
152
|
+
outputConfig: { gcsDestination: { uri: destination } },
|
|
153
|
+
features: [{ type: 'DOCUMENT_TEXT_DETECTION' }],
|
|
154
|
+
}],
|
|
155
|
+
});
|
|
156
|
+
result.response = await result.submit[0].promise();
|
|
157
|
+
result.result = await downloadFromCloud(
|
|
158
|
+
result.response[0].responses[0].outputConfig.gcsDestination.uri.replace(
|
|
159
|
+
/^gs:\/\/[^/]*\/(.*)$/, '$1'
|
|
160
|
+
),
|
|
161
|
+
{ expected: 'JSON' }
|
|
162
|
+
);
|
|
163
|
+
if (options?.raw) { return result; }
|
|
164
|
+
const pages = [];
|
|
165
|
+
for (let file in result.result) {
|
|
166
|
+
result.result[file].responses.map(x => pages.push(
|
|
167
|
+
x.fullTextAnnotation.text
|
|
168
|
+
));
|
|
169
|
+
}
|
|
170
|
+
return pages;
|
|
152
171
|
};
|
|
153
172
|
|
|
154
173
|
export {
|
|
@@ -159,6 +178,6 @@ export {
|
|
|
159
178
|
ocrImageGoogle,
|
|
160
179
|
ocrImageTesseract,
|
|
161
180
|
read,
|
|
162
|
-
|
|
181
|
+
readAll,
|
|
163
182
|
see,
|
|
164
183
|
};
|