utilitas 1995.2.2 → 1995.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/lib/manifest.mjs +1 -1
- package/lib/storage.mjs +94 -8
- package/lib/vision.mjs +40 -22
- package/package.json +1 -1
package/lib/manifest.mjs
CHANGED
package/lib/storage.mjs
CHANGED
|
@@ -16,12 +16,14 @@ const _NEED = ['mime-types', '@google-cloud/storage'];
|
|
|
16
16
|
const errorMessage = 'Invalid file.';
|
|
17
17
|
const defaultMetadata = { cacheControl: 'public, max-age=31536000' };
|
|
18
18
|
const getGcUrlByBucket = bucket => `https://storage.cloud.google.com/${bucket}`;
|
|
19
|
+
const getGsById = id => `gs://${id}`.replace(/[\/][^\/]*$/, '');
|
|
20
|
+
const getIdByGs = gs => gs.replace(/^gs:\/\/[^/]*\/(.*)$/, '$1');
|
|
19
21
|
const mapFilename = name => join(name.substr(0, 2), name.substr(2, 2));
|
|
20
22
|
const [_zip, _unzip] = [__zip, __unzip].map(promisify);
|
|
21
23
|
const log = content => _log(content, import.meta.url);
|
|
22
24
|
|
|
23
|
-
const [NULL, BASE64, BUFFER, FILE, STREAM, encoding, BINARY, mode, dirMode]
|
|
24
|
-
= ['NULL', 'BASE64', 'BUFFER', 'FILE', 'STREAM', 'utf8', 'binary', '0644', '0755'];
|
|
25
|
+
const [NULL, BASE64, BUFFER, FILE, STREAM, TEXT, _JSON, encoding, BINARY, mode, dirMode]
|
|
26
|
+
= ['NULL', 'BASE64', 'BUFFER', 'FILE', 'STREAM', 'TEXT', 'JSON', 'utf8', 'binary', '0644', '0755'];
|
|
25
27
|
|
|
26
28
|
const [encodeBase64, encodeBinary, encodeNull]
|
|
27
29
|
= [{ encoding: BASE64 }, { encoding: BINARY }, { encoding: NULL }];
|
|
@@ -176,6 +178,7 @@ const outputFile = async (buffer, options) => {
|
|
|
176
178
|
return options?.file;
|
|
177
179
|
}
|
|
178
180
|
return await writeTempFile(buffer, {
|
|
181
|
+
filename: options?.filename,
|
|
179
182
|
encoding: _encoding, suffix: options?.suffix,
|
|
180
183
|
});
|
|
181
184
|
};
|
|
@@ -200,8 +203,16 @@ const convert = async (any, options) => {
|
|
|
200
203
|
(Buffer.isBuffer(any) || ArrayBuffer.isArrayBuffer(any)) ? BUFFER : options?.input,
|
|
201
204
|
options?.expected || BUFFER
|
|
202
205
|
].map(x => ensureString(x, { case: 'UP' }));
|
|
203
|
-
|
|
204
|
-
|
|
206
|
+
let [oriFile, meta, subExp] =
|
|
207
|
+
[input === FILE ? any : null, null, expected];
|
|
208
|
+
switch (expected) {
|
|
209
|
+
case STREAM:
|
|
210
|
+
subExp = FILE;
|
|
211
|
+
break;
|
|
212
|
+
case TEXT:
|
|
213
|
+
case _JSON:
|
|
214
|
+
subExp = BUFFER;
|
|
215
|
+
}
|
|
205
216
|
oriFile && (meta = await assertPath(any, 'F', 'R'));
|
|
206
217
|
if (input !== subExp) {
|
|
207
218
|
switch (`${input}-${subExp}`) {
|
|
@@ -214,7 +225,18 @@ const convert = async (any, options) => {
|
|
|
214
225
|
default: throwError('Invalid input or expected format.', 400);
|
|
215
226
|
}
|
|
216
227
|
}
|
|
217
|
-
|
|
228
|
+
switch (expected) {
|
|
229
|
+
case STREAM:
|
|
230
|
+
any = createReadStream(any);
|
|
231
|
+
break;
|
|
232
|
+
case TEXT:
|
|
233
|
+
case _JSON:
|
|
234
|
+
any = any.toString();
|
|
235
|
+
}
|
|
236
|
+
switch (expected) {
|
|
237
|
+
case _JSON:
|
|
238
|
+
any = JSON.parse(any);
|
|
239
|
+
}
|
|
218
240
|
oriFile && subExp !== FILE && options?.cleanup && await tryRm(ori, options);
|
|
219
241
|
return options?.meta ? { meta, content: any } : any;
|
|
220
242
|
};
|
|
@@ -322,16 +344,74 @@ const uploadToCloud = async (data, options) => {
|
|
|
322
344
|
suffix: options?.suffix, ...options || {},
|
|
323
345
|
});
|
|
324
346
|
const raw = await client.upload(file, {
|
|
325
|
-
gzip: true, destination: options?.destination
|
|
347
|
+
gzip: true, destination: options?.destination || join(
|
|
348
|
+
...options?.prefix ? [options.prefix] : [], basename(file)
|
|
349
|
+
), metadata: defaultMetadata, ...options || {},
|
|
326
350
|
});
|
|
327
351
|
const result = options?.raw ? raw : raw[0].metadata;
|
|
328
352
|
input !== FILE && await tryRm(file.path);
|
|
353
|
+
!options?.raw && result && (result.gs = getGsById(result?.id));
|
|
354
|
+
return result;
|
|
355
|
+
};
|
|
356
|
+
|
|
357
|
+
const downloadFileFromCloud = async (path, options) => {
|
|
358
|
+
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
359
|
+
let result = await client.file(path).download(options);
|
|
360
|
+
if (options?.raw) { return result; }
|
|
361
|
+
result = await convert(
|
|
362
|
+
result[0], { ...options || {}, input: BUFFER }
|
|
363
|
+
);
|
|
364
|
+
if (options?.text) {
|
|
365
|
+
result = result.toString();
|
|
366
|
+
}
|
|
367
|
+
return result;
|
|
368
|
+
};
|
|
369
|
+
|
|
370
|
+
const downloadFromCloud = async (path, options) => {
|
|
371
|
+
assert(path, 'Path is required.', 400);
|
|
372
|
+
const isFolder = path.endsWith('/');
|
|
373
|
+
const paths = isFolder ? await lsOnCloud(path, { name: true }) : [path];
|
|
374
|
+
const resp = await Promise.all(paths.map(
|
|
375
|
+
x => downloadFileFromCloud(x, options)
|
|
376
|
+
));
|
|
377
|
+
if (!isFolder) { return resp[0]; }
|
|
378
|
+
const result = {};
|
|
379
|
+
for (let i in paths) { result[paths[i]] = resp[i]; }
|
|
329
380
|
return result;
|
|
330
381
|
};
|
|
331
382
|
|
|
332
|
-
const existsOnCloud = async (destination) => {
|
|
383
|
+
const existsOnCloud = async (destination, options) => {
|
|
384
|
+
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
385
|
+
const result = await client.file(destination).exists();
|
|
386
|
+
return options?.raw ? result : (result[0] ? {} : null);
|
|
387
|
+
};
|
|
388
|
+
|
|
389
|
+
const lsOnCloud = async (prefix, options) => {
|
|
333
390
|
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
334
|
-
|
|
391
|
+
let result = await client.getFiles({ prefix, ...options || {} });
|
|
392
|
+
if (options?.raw) { return result; }
|
|
393
|
+
result = (result[0] || []).map(x => x.metadata);
|
|
394
|
+
if (options?.name) { result = result.map(x => x.name); }
|
|
395
|
+
return result;
|
|
396
|
+
};
|
|
397
|
+
|
|
398
|
+
const deleteFileOnCloud = async (path, options) => {
|
|
399
|
+
assert(client, 'Cloud storage has not been initialized.', 500);
|
|
400
|
+
const result = await client.file(path).delete(options);
|
|
401
|
+
return options?.raw ? result : result[0].toJSON();
|
|
402
|
+
};
|
|
403
|
+
|
|
404
|
+
const deleteOnCloud = async (path, options) => {
|
|
405
|
+
assert(path, 'Path is required.', 400);
|
|
406
|
+
const isFolder = path.endsWith('/');
|
|
407
|
+
const paths = isFolder ? await lsOnCloud(path, { name: true }) : [path];
|
|
408
|
+
const resp = await Promise.all(paths.map(
|
|
409
|
+
x => deleteFileOnCloud(x, options)
|
|
410
|
+
));
|
|
411
|
+
if (!isFolder) { return resp[0]; }
|
|
412
|
+
const result = {};
|
|
413
|
+
for (let i in paths) { result[paths[i]] = resp[i]; }
|
|
414
|
+
return result;
|
|
335
415
|
};
|
|
336
416
|
|
|
337
417
|
export {
|
|
@@ -339,17 +419,23 @@ export {
|
|
|
339
419
|
analyzeFile,
|
|
340
420
|
assertPath,
|
|
341
421
|
convert,
|
|
422
|
+
deleteFileOnCloud,
|
|
423
|
+
deleteOnCloud,
|
|
424
|
+
downloadFileFromCloud,
|
|
425
|
+
downloadFromCloud,
|
|
342
426
|
encodeBase64DataURL,
|
|
343
427
|
exists,
|
|
344
428
|
existsOnCloud,
|
|
345
429
|
getConfig,
|
|
346
430
|
getConfigFilename,
|
|
347
431
|
getGcUrlByBucket,
|
|
432
|
+
getIdByGs,
|
|
348
433
|
getTempPath,
|
|
349
434
|
handleError,
|
|
350
435
|
init,
|
|
351
436
|
isTextFile,
|
|
352
437
|
legalFilename,
|
|
438
|
+
lsOnCloud,
|
|
353
439
|
mapFilename,
|
|
354
440
|
mergeFile,
|
|
355
441
|
readFile,
|
package/lib/vision.mjs
CHANGED
|
@@ -1,8 +1,11 @@
|
|
|
1
|
-
import { convert } from './storage.mjs';
|
|
2
1
|
import { getApiKeyCredentials } from './encryption.mjs';
|
|
3
2
|
|
|
4
3
|
import {
|
|
5
|
-
|
|
4
|
+
convert, deleteOnCloud, downloadFromCloud, getIdByGs, uploadToCloud,
|
|
5
|
+
} from './storage.mjs';
|
|
6
|
+
|
|
7
|
+
import {
|
|
8
|
+
ensureArray, ignoreErrFunc, log as _log, need, throwError, trim, timeout,
|
|
6
9
|
} from './utilitas.mjs';
|
|
7
10
|
|
|
8
11
|
const _NEED = ['@google-cloud/vision', 'tesseract.js'];
|
|
@@ -15,10 +18,11 @@ let client;
|
|
|
15
18
|
|
|
16
19
|
const init = async (options) => {
|
|
17
20
|
if (options) {
|
|
18
|
-
if (options?.apiKey) {
|
|
19
|
-
const sslCreds = await getApiKeyCredentials(options);
|
|
21
|
+
if (options?.credentials || options?.apiKey) {
|
|
20
22
|
const vision = (await need('@google-cloud/vision')).default;
|
|
21
|
-
client = new vision.ImageAnnotatorClient(
|
|
23
|
+
client = new vision.ImageAnnotatorClient(options?.apiKey ? {
|
|
24
|
+
sslCreds: await getApiKeyCredentials(options)
|
|
25
|
+
} : options);
|
|
22
26
|
} else { await checkTesseract({ assert: true }); }
|
|
23
27
|
}
|
|
24
28
|
assert(
|
|
@@ -133,22 +137,36 @@ const read = async (image, options) => {
|
|
|
133
137
|
return pages;
|
|
134
138
|
};
|
|
135
139
|
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
140
|
+
const readAll = async (image, options) => {
|
|
141
|
+
assert(client, 'Vision API has not been initialized.', 500);
|
|
142
|
+
const result = {};
|
|
143
|
+
result.upload = await uploadToCloud(image, {
|
|
144
|
+
gzip: false, prefix: options?.prefix || '_vision', ...options || {},
|
|
145
|
+
});
|
|
146
|
+
const uri = result.upload?.gs;
|
|
147
|
+
const destination = `${uri}_result/`;
|
|
148
|
+
const resultId = getIdByGs(destination);
|
|
149
|
+
result.clear = await deleteOnCloud(resultId);
|
|
150
|
+
result.submit = await client.asyncBatchAnnotateFiles({
|
|
151
|
+
requests: [{
|
|
152
|
+
inputConfig: { mimeType: 'application/pdf', gcsSource: { uri } },
|
|
153
|
+
outputConfig: { gcsDestination: { uri: destination } },
|
|
154
|
+
features: [{ type: 'DOCUMENT_TEXT_DETECTION' }],
|
|
155
|
+
}],
|
|
156
|
+
});
|
|
157
|
+
result.response = await result.submit[0].promise();
|
|
158
|
+
result.result = await downloadFromCloud(resultId, { expected: 'JSON' });
|
|
159
|
+
options?.keep || (result.cleanup = await Promise.all(
|
|
160
|
+
[getIdByGs(uri), resultId].map(deleteOnCloud)
|
|
161
|
+
));
|
|
162
|
+
if (options?.raw) { return result; }
|
|
163
|
+
const pages = [];
|
|
164
|
+
for (let file in result.result) {
|
|
165
|
+
result.result[file].responses.map(x => pages.push(
|
|
166
|
+
x.fullTextAnnotation.text
|
|
167
|
+
));
|
|
168
|
+
}
|
|
169
|
+
return pages;
|
|
152
170
|
};
|
|
153
171
|
|
|
154
172
|
export {
|
|
@@ -159,6 +177,6 @@ export {
|
|
|
159
177
|
ocrImageGoogle,
|
|
160
178
|
ocrImageTesseract,
|
|
161
179
|
read,
|
|
162
|
-
|
|
180
|
+
readAll,
|
|
163
181
|
see,
|
|
164
182
|
};
|