@engine9-io/input-tools 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,741 @@
1
+ /* eslint-disable no-await-in-loop */
2
+ const fs = require('node:fs');
3
+
4
+ const fsp = fs.promises;
5
+ const path = require('node:path');
6
+ const zlib = require('node:zlib');
7
+ const {
8
+ Readable, Transform, PassThrough,
9
+ } = require('node:stream');
10
+ const { pipeline } = require('node:stream/promises');
11
+ const { stringify } = require('csv');
12
+
13
+ const debug = require('debug')('FileWorker');
14
+ // const through2 = require('through2');
15
+ const csv = require('csv');
16
+ const JSON5 = require('json5');// Useful for parsing extended JSON
17
+ const languageEncoding = require('detect-file-encoding-and-language');
18
+ const S3Worker = require('./S3');
19
+ const ParquetWorker = require('./Parquet');
20
+ const { streamPacket } = require('./tools');
21
+
22
+ const { bool, getTempDir } = require('./tools');
23
+
24
+ function Worker({ accountId }) { this.accountId = accountId; }
25
+
26
+ class LineReaderTransform extends Transform {
27
+ constructor(options = {}) {
28
+ super({ ...options, readableObjectMode: true });
29
+ this.buffer = '';
30
+ }
31
+
32
+ // eslint-disable-next-line no-underscore-dangle
33
+ _transform(chunk, encoding, callback) {
34
+ this.buffer += chunk.toString();
35
+ const lines = this.buffer.split(/\r?\n/);
36
+ this.buffer = lines.pop();
37
+ lines.forEach((line) => this.push(line));
38
+ callback();
39
+ }
40
+
41
+ // eslint-disable-next-line no-underscore-dangle
42
+ _flush(callback) {
43
+ if (this.buffer) {
44
+ this.push(this.buffer);
45
+ }
46
+ callback();
47
+ }
48
+ }
49
+
50
+ Worker.prototype.csvToObjectTransforms = function (options) {
51
+ const transforms = [];
52
+ const delimiter = options.delimiter || ',';
53
+
54
+ const headerMapping = options.headerMapping || function (d) { return d; };
55
+ let lastLine = null;
56
+ let head = null;
57
+
58
+ const skipLinesWithError = bool(options.skip_lines_with_error, false);
59
+ const parserOptions = {
60
+ relax: true,
61
+ skip_empty_lines: true,
62
+ delimiter,
63
+ max_limit_on_data_read: 10000000,
64
+ skip_lines_with_error: skipLinesWithError,
65
+ };
66
+ if (options.skip) parserOptions.from_line = options.skip;
67
+ if (options.relax_column_count) parserOptions.relax_column_count = true;
68
+ if (options.quote_escape) {
69
+ parserOptions.escape = options.quote_escape;
70
+ }
71
+
72
+ debug('Parser options=', parserOptions);
73
+ const parser = csv.parse(parserOptions);
74
+ parser.on('error', (error) => {
75
+ debug('fileToObjectStream: Error parsing csv file');
76
+ debug(lastLine);
77
+ throw new Error(error);
78
+ });
79
+
80
+ const blankAndHeaderCheck = new Transform({
81
+ objectMode: true,
82
+ transform(row, enc, cb) {
83
+ // Blank rows
84
+ if (row.length === 0) return cb();
85
+ if (row.length === 1 && !row[0]) return cb();
86
+
87
+ if (!head) {
88
+ head = row.map(headerMapping);
89
+ return cb();
90
+ }
91
+
92
+ const o = {};
93
+ head.forEach((_h, i) => {
94
+ const h = _h.trim();
95
+ if (h) {
96
+ o[h] = row[i];
97
+ }
98
+ });
99
+
100
+ lastLine = row.join(delimiter);
101
+ return cb(null, o);
102
+ },
103
+ });
104
+
105
+ transforms.push(parser);
106
+ transforms.push(blankAndHeaderCheck);
107
+
108
+ return { transforms };
109
+ };
110
+
111
+ Worker.prototype.detectEncoding = async function (options) {
112
+ if (options.encoding_override) return { encoding: options.encoding_override };
113
+ // Limit to only the top N bytes -- for perfomance
114
+ // Be wary, though, as gzip files may require a certain minimum number of bytes to decompress
115
+ const bytes = 64 * 1024;
116
+ const buff = Buffer.alloc(bytes);
117
+ const fd = await fsp.open(options.filename);
118
+ await fd.read(buff, 0, bytes);
119
+ let finalBuff = buff;
120
+ if (options.filename.slice(-3) === '.gz') {
121
+ // This code deals with scenarios where the buffer coming in may not be exactly the gzip
122
+ // needed chunk size.
123
+ finalBuff = await new Promise((resolve, reject) => {
124
+ const bufferBuilder = [];
125
+ const decompressStream = zlib.createGunzip()
126
+ .on('data', (chunk) => {
127
+ bufferBuilder.push(chunk);
128
+ }).on('close', () => {
129
+ resolve(Buffer.concat(bufferBuilder));
130
+ }).on('error', (err) => {
131
+ if (err.errno !== -5) {
132
+ // EOF: expected
133
+ reject(err);
134
+ }
135
+ });
136
+ decompressStream.write(buff);
137
+ decompressStream.end();
138
+ });
139
+ }
140
+
141
+ return languageEncoding(finalBuff);
142
+ };
143
+
144
+ Worker.prototype.detectEncoding.metadata = {
145
+ options: {
146
+ filename: { required: true },
147
+ },
148
+ };
149
+
150
+ /*
151
+ Internal method to transform a file into a stream of objects.
152
+ */
153
+ Worker.prototype.fileToObjectStream = async function (options) {
154
+ const { filename, columns, limit: limitOption } = options;
155
+
156
+ // handle stream item
157
+ if (options.stream) {
158
+ if (Array.isArray(options.stream)) {
159
+ return { stream: Readable.from(options.stream) };
160
+ }
161
+ // probably already a stream
162
+ if (typeof options.stream === 'object') return { stream: options.stream };
163
+ throw new Error(`Invalid stream type:${typeof options.stream}`);
164
+ }
165
+ let limit;
166
+ if (limitOption) limit = parseInt(limitOption, 10);
167
+ if (!filename) throw new Error('fileToObjectStream: filename is required');
168
+ let postfix = options.sourcePostfix || filename.toLowerCase().split('.').pop();
169
+ if (postfix === 'zip') {
170
+ debug('Invalid filename:', { filename });
171
+ throw new Error('Cowardly refusing to turn a .zip file into an object stream, turn into a csv first');
172
+ }
173
+
174
+ const streamInfo = await this.stream({
175
+ filename,
176
+ columns,
177
+ limit,
178
+ });
179
+ const { encoding } = streamInfo;
180
+ let { stream } = streamInfo;
181
+ if (!stream) throw new Error(`No stream found in fileToObjectStream from filename ${filename}`);
182
+ if (encoding === 'object') {
183
+ // already an object
184
+ return { stream };
185
+ }
186
+
187
+ let count = 0;
188
+
189
+ debug(`Reading file ${filename} with encoding:`, encoding);
190
+
191
+ const head = null;
192
+ let transforms = [];
193
+
194
+ if (postfix === 'gz') {
195
+ const gunzip = zlib.createGunzip();
196
+ transforms.push(gunzip);
197
+ gunzip.setEncoding(encoding);
198
+ // encoding = null;// Default encoding
199
+ postfix = filename.toLowerCase().split('.');
200
+ postfix = postfix[postfix.length - 2];
201
+ debug(`Using gunzip parser because postfix is .gz, encoding=${encoding}`);
202
+ } else {
203
+ stream.setEncoding(encoding);
204
+ }
205
+
206
+ if (postfix === 'csv') {
207
+ const csvTransforms = this.csvToObjectTransforms({ ...options });
208
+ transforms = transforms.concat(csvTransforms.transforms);
209
+ } else if (postfix === 'txt') {
210
+ const csvTransforms = this.csvToObjectTransforms({ ...options, delimiter: '\t' });
211
+ transforms = transforms.concat(csvTransforms.transforms);
212
+ } else if (postfix === 'jsonl') {
213
+ /* Type of JSON that has the names in an array in the first record,
214
+ and the values in JSON arrays thereafter
215
+ */
216
+ let headers = null;
217
+
218
+ const lineReader = new LineReaderTransform();
219
+
220
+ const jsonlTransform = new Transform({
221
+ objectMode: true,
222
+ transform(d, enc, cb) {
223
+ if (!d) return cb();
224
+ let obj;
225
+ try {
226
+ obj = JSON5.parse(d);
227
+ } catch (e) {
228
+ debug('Invalid line:');
229
+ debug(d);
230
+ throw e;
231
+ }
232
+ /* JSONL could potentially start with an array of names,
233
+ in which case we need to map the subsequent values
234
+ */
235
+ if (headers === null) {
236
+ if (Array.isArray(obj)) {
237
+ headers = obj;
238
+ return cb();
239
+ }
240
+ headers = false;
241
+ }
242
+ if (headers) {
243
+ const mapped = {};
244
+ headers.forEach((name, i) => { mapped[name] = obj[i]; });
245
+ this.push(mapped);
246
+ } else {
247
+ this.push(obj);
248
+ }
249
+ return cb();
250
+ },
251
+ });
252
+
253
+ transforms.push(lineReader);
254
+ transforms.push(jsonlTransform);
255
+ } else {
256
+ throw new Error(`Unsupported file type: ${postfix}`);
257
+ }
258
+ const countAndDebug = new Transform({
259
+ objectMode: true,
260
+ transform(d, enc, cb) {
261
+ if (count === 0) { debug('Sample object from file:', d); }
262
+ count += 1;
263
+ if ((count < 5000 && count % 1000 === 0) || (count % 50000 === 0)) {
264
+ debug(`fileToObjectStream transformed ${count} lines`);
265
+ }
266
+ this.push(d);
267
+ cb();
268
+ },
269
+ flush(cb) {
270
+ // If there's no records at all, push a dummy record, and specify 0 records
271
+ debug(`Completed reading file, records=${count}`);
272
+ if (count === 0) {
273
+ const o = { _is_placeholder: true };
274
+
275
+ if (head) head.forEach((c) => { o[c] = null; });
276
+ this.push(o);
277
+ }
278
+ cb();
279
+ },
280
+ });
281
+
282
+ transforms.push(countAndDebug);
283
+ transforms.forEach((t) => {
284
+ stream = stream.pipe(t);
285
+ });
286
+
287
+ return { stream };
288
+ };
289
+ Worker.prototype.getFileWriterStream = async function (options = {}) {
290
+ const accountId = options.accountId || this.accountId;
291
+ if (!accountId) throw new Error('getFileWriterStream has no accountId');
292
+ const targetFormat = options.targetFormat || 'csv';
293
+ const tempDir = await getTempDir({ accountId });
294
+ let { fileExtendedType } = options;
295
+ if (fileExtendedType) fileExtendedType += '.';
296
+ else fileExtendedType = '';
297
+ // So, this could change, but it's easier to read
298
+ // dates in a filename than UUIDs, so this is
299
+ // a unique-ish filename generator
300
+ const uniqueNumberedDate = `${new Date().toISOString().replace(/[^0-9]*/g, '')}.${Math.floor(Math.random() * 1000)}`;
301
+ let filename = `${tempDir}${path.sep}${uniqueNumberedDate}.${fileExtendedType}${targetFormat}`;
302
+ if (bool(options.gzip, false)) filename += '.gz';
303
+ const stream = fs.createWriteStream(filename);
304
+ debug('FileWriterStream writing to file ', filename);
305
+
306
+ return { filename, stream };
307
+ };
308
+
309
+ Worker.prototype.getOutputStreams = async function (options) {
310
+ const { filename, stream: fileWriterStream } = await this.getFileWriterStream(options);
311
+
312
+ let { transform } = options;
313
+ if (typeof options.transform === 'function') {
314
+ if (options.transform.length === 3) {
315
+ transform = new Transform({
316
+ objectMode: true,
317
+ async transform(item, encoding, cb) {
318
+ options.transform(item, encoding, cb);
319
+ },
320
+ });
321
+ } else {
322
+ transform = new Transform({
323
+ objectMode: true,
324
+ async transform(item, encoding, cb) {
325
+ cb(null, options.transform(item));
326
+ },
327
+ });
328
+ }
329
+ } else if (options.transform) {
330
+ transform = options.transform;
331
+ }
332
+ const { flatten } = options;
333
+ let flattenTransform = null;
334
+
335
+ if (bool(flatten, false)) {
336
+ flattenTransform = new Transform({
337
+ objectMode: true,
338
+ async transform(item, enc, cb) {
339
+ // first item establishes the keys to use
340
+ let o = {};
341
+ Object.keys(item).forEach((k) => {
342
+ let v = item[k];
343
+ if (!o[k]) {
344
+ if (typeof v === 'object') {
345
+ while (Array.isArray(v)) [v] = v;// get first array item
346
+ o = { ...o, ...v };
347
+ } else {
348
+ o[k] = v;
349
+ }
350
+ }
351
+ });
352
+ cb(null, o);
353
+ },
354
+ });
355
+ }
356
+
357
+ const stats = {
358
+ records: 0,
359
+ };
360
+ let stringifier;
361
+ if (options.targetFormat === 'jsonl') {
362
+ stringifier = new Transform({
363
+ objectMode: true,
364
+ transform(d, encoding, cb) {
365
+ cb(false, `${JSON.stringify(d)}\n`);
366
+ },
367
+ });
368
+ } else {
369
+ stringifier = stringify({ header: true });
370
+ }
371
+ let gzip = new PassThrough();
372
+ if (options.gzip) {
373
+ gzip = zlib.createGzip();
374
+ }
375
+ const streams = [
376
+ transform,
377
+ flattenTransform,
378
+ new Transform({
379
+ objectMode: true,
380
+ transform(d, enc, cb) {
381
+ stats.records += 1;
382
+ cb(null, d);
383
+ },
384
+ }),
385
+ stringifier,
386
+ gzip,
387
+ fileWriterStream,
388
+ ].filter(Boolean);
389
+ return { filename, streams, stats };
390
+ };
391
+ Worker.prototype.objectStreamToFile = async function (options) {
392
+ const { filename, streams, stats } = await this.getOutputStreams(options);
393
+ const { stream: inStream } = options;
394
+ streams.unshift(inStream);
395
+ await pipeline(
396
+ streams,
397
+ );
398
+ return { filename, records: stats.records };
399
+ };
400
+
401
+ Worker.prototype.transform = async function (options) {
402
+ const worker = this;
403
+
404
+ const filename = worker.getFilename(options);
405
+
406
+ debug(`Transforming ${filename}`);
407
+
408
+ options.filename = filename;
409
+ let { stream } = await worker.fileToObjectStream(options);
410
+ if (typeof stream.pipe !== 'function') {
411
+ debug(stream);
412
+ throw new Error('No pipe in stream');
413
+ }
414
+
415
+ let t = options.transform;
416
+
417
+ // No longer need this
418
+ delete options.transform;
419
+ if (!t) {
420
+ t = function (d, enc, cb) {
421
+ d.is_test_transform = true;
422
+ cb(null, d);
423
+ };
424
+ }
425
+
426
+ if (!Array.isArray(t)) t = [t];
427
+ Object.keys(t).forEach((key) => {
428
+ let f = t[key];
429
+ if (typeof f === 'function') {
430
+ f = new Transform({
431
+ objectMode: true,
432
+ transform: f,
433
+ });
434
+ }
435
+
436
+ stream = stream.pipe(f);
437
+ });
438
+
439
+ const { targetFormat } = options;
440
+
441
+ if (!targetFormat && (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')) {
442
+ options.targetFormat = 'csv';
443
+ }
444
+
445
+ return worker.objectStreamToFile({ ...options, stream });
446
+ };
447
+
448
+ Worker.prototype.transform.metadata = {
449
+ options: {
450
+ sourcePostfix: { description: "Override the source postfix, if for example it's a csv" },
451
+ encoding: { description: 'Manual override of source file encoding' },
452
+ names: { description: 'Target field names (e.g. my_new_field,x,y,z)' },
453
+ values: { description: "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)" },
454
+ targetFilename: { description: 'Custom name of the output file (default auto-generated)' },
455
+ targetFormat: { description: 'Output format -- csv supported, or none for txt (default)' },
456
+ targetRowDelimiter: { description: 'Row delimiter (default \n)' },
457
+ targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' },
458
+ },
459
+ };
460
+ Worker.prototype.testTransform = async function (options) {
461
+ return this.transform({
462
+ ...options,
463
+ transform(d, enc, cb) { d.transform_time = new Date(); cb(null, d); },
464
+ });
465
+ };
466
+ Worker.prototype.testTransform.metadata = {
467
+ options: {
468
+ filename: true,
469
+ },
470
+ };
471
+
472
+ /* Get a stream from an actual stream, or an array, or a file */
473
+ Worker.prototype.stream = async function (
474
+ options,
475
+ ) {
476
+ const {
477
+ stream: inputStream, packet, type, columns, limit,
478
+ filename: filenameOpt,
479
+ } = options;
480
+ let filename = filenameOpt;
481
+
482
+ if (inputStream) {
483
+ if (Array.isArray(inputStream)) {
484
+ return { stream: Readable.from(inputStream) };
485
+ }
486
+ // probably already a stream
487
+ if (typeof inputStream === 'object') return { stream: inputStream, encoding: 'object' };
488
+ throw new Error(`Invalid stream type:${typeof inputStream}`);
489
+ } else if (filename) {
490
+ if (filename.startsWith('engine9-accounts/')) {
491
+ filename = `${process.env.ENGINE9_ACCOUNT_DIR}/${filename.slice('engine9-accounts/'.length)}`;
492
+ debug(`Prepending file with ${process.env.ENGINE9_ACCOUNT_DIR}, filename=${filename}`);
493
+ } else {
494
+ debug(`Not prepending filename:${filename}`);
495
+ }
496
+ let encoding; let stream;
497
+ if (filename.slice(-8) === '.parquet') {
498
+ const pq = new ParquetWorker(this);
499
+ stream = (await pq.stream({ filename, columns, limit })).stream;
500
+ encoding = 'object';
501
+ } else if (filename.indexOf('s3://') === 0) {
502
+ const s3Worker = new S3Worker(this);
503
+ stream = (await s3Worker.stream({ filename, columns, limit })).stream;
504
+ encoding = 'UTF-8';
505
+ } else {
506
+ // Check if the file exists, and fast fail if not
507
+ // Otherwise the stream hangs out as a handle
508
+ try {
509
+ await fsp.stat(filename);
510
+ } catch (e) {
511
+ debug(`Error reading file ${filename}, current directory: ${process.cwd()},__dirname:${__dirname}`);
512
+ throw e;
513
+ }
514
+ stream = fs.createReadStream(filename);
515
+ encoding = (await this.detectEncoding({ filename })).encoding;
516
+ }
517
+ return { stream, encoding };
518
+ } else if (packet) {
519
+ let { stream: packetStream } = await streamPacket({ packet, type, limit });
520
+ const { transforms } = this.csvToObjectTransforms({});
521
+ transforms.forEach((t) => {
522
+ packetStream = packetStream.pipe(t);
523
+ });
524
+ return { stream: packetStream };
525
+ } else {
526
+ throw new Error('stream must be passed a stream, filename, or packet');
527
+ }
528
+ };
529
+
530
+ Worker.prototype.sample = async function (opts) {
531
+ opts.limit = opts.limit || 10;
532
+ const { stream } = await this.fileToObjectStream(opts);
533
+ return stream.toArray();
534
+ };
535
+ Worker.prototype.sample.metadata = {
536
+ options: {
537
+ filename: {},
538
+
539
+ },
540
+ };
541
+
542
+ Worker.prototype.write = async function (opts) {
543
+ const { filename, content } = opts;
544
+ if (filename.indexOf('s3://') === 0) {
545
+ const s3Worker = new S3Worker(this);
546
+ const parts = filename.split('/');
547
+ const directory = parts.slice(0, -1).join('/');
548
+ const file = parts.slice(-1)[0];
549
+ // debug(JSON.stringify({ parts, directory, file }));
550
+ await s3Worker.write({
551
+ directory,
552
+ file,
553
+ content,
554
+ });
555
+ } else {
556
+ await fsp.writeFile(filename, content);
557
+ }
558
+ return { success: true, filename };
559
+ };
560
+ Worker.prototype.write.metadata = {
561
+ options: {
562
+ filename: { description: 'Location to write content to, can be local or s3://' },
563
+ content: {},
564
+ },
565
+ };
566
+
567
+ async function streamToString(stream) {
568
+ // lets have a ReadableStream as a stream variable
569
+ const chunks = [];
570
+
571
+ // eslint-disable-next-line no-restricted-syntax
572
+ for await (const chunk of stream) {
573
+ chunks.push(Buffer.from(chunk));
574
+ }
575
+
576
+ return Buffer.concat(chunks).toString('utf-8');
577
+ }
578
+ /*
579
+ Retrieves and parsed
580
+ */
581
+ Worker.prototype.json = async function (opts) {
582
+ const { stream } = await this.stream(opts);
583
+ const str = await streamToString(stream);
584
+ try {
585
+ return JSON5.parse(str);
586
+ } catch (e) {
587
+ debug(e);
588
+ throw new Error(`Unparseable JSON received: ${opts.filename || '(native stream)'}`);
589
+ }
590
+ };
591
+ Worker.prototype.json.metadata = {
592
+ options: {
593
+ filename: { description: 'Get a javascript object from a file' },
594
+ },
595
+ };
596
+
597
+ Worker.prototype.list = async function ({ directory }) {
598
+ if (!directory) throw new Error('directory is required');
599
+ if (directory.indexOf('s3://') === 0) {
600
+ const s3Worker = new S3Worker(this);
601
+ return s3Worker.list({ directory });
602
+ }
603
+ const a = await fsp.readdir(directory, { withFileTypes: true });
604
+ return a.map((f) => ({
605
+ name: f.name,
606
+ type: f.isDirectory() ? 'directory' : 'file',
607
+ }));
608
+ };
609
+ Worker.prototype.list.metadata = {
610
+ options: {
611
+ directory: { required: true },
612
+ },
613
+ };
614
+
615
+ Worker.prototype.listAll = async function ({ directory }) {
616
+ if (!directory) throw new Error('directory is required');
617
+ if (directory.indexOf('s3://') === 0) {
618
+ const s3Worker = new S3Worker(this);
619
+ return s3Worker.listAll({ directory });
620
+ }
621
+ const a = await fsp.readdir(directory, { recursive: true });
622
+
623
+ return a.map((f) => `${directory}/${f}`);
624
+ };
625
+ Worker.prototype.listAll.metadata = {
626
+ options: {
627
+ directory: { required: true },
628
+ },
629
+ };
630
+
631
+ Worker.prototype.empty = async function ({ directory }) {
632
+ if (!directory) throw new Error('directory is required');
633
+ if (directory.indexOf('s3://') === 0) {
634
+ // currently not emptying S3 this way -- dangerous
635
+ throw new Error('Cannot empty an s3:// directory');
636
+ }
637
+ const removed = [];
638
+ // eslint-disable-next-line no-restricted-syntax
639
+ for (const file of await fsp.readdir(directory)) {
640
+ removed.push(file);
641
+ await fsp.unlink(path.join(directory, file));
642
+ }
643
+ return { directory, removed };
644
+ };
645
+ Worker.prototype.empty.metadata = {
646
+ options: {
647
+ directory: { required: true },
648
+ },
649
+ };
650
+
651
+ Worker.prototype.move = async function ({ filename, target }) {
652
+ if (!target) throw new Error('directory is required');
653
+ if (target.indexOf('s3://') === 0) {
654
+ const s3Worker = new S3Worker(this);
655
+ const parts = target.split('/');
656
+ return s3Worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
657
+ }
658
+ await fsp.mkdir(path.dirname(target), { recursive: true });
659
+ await fsp.rename(filename, target);
660
+ return { filename: target };
661
+ };
662
+ Worker.prototype.move.metadata = {
663
+ options: {
664
+ filename: {},
665
+ target: {},
666
+ },
667
+ };
668
+
669
+ Worker.prototype.stat = async function ({ filename }) {
670
+ if (!filename) throw new Error('filename is required');
671
+ if (filename.indexOf('s3://') === 0) {
672
+ const s3Worker = new S3Worker(this);
673
+ return s3Worker.stat({ filename });
674
+ }
675
+ const {
676
+ ctime,
677
+ birthtime,
678
+ size,
679
+ } = await fsp.stat(filename);
680
+ const modifiedAt = new Date(ctime);
681
+ let createdAt = birthtime;
682
+ if (createdAt === 0 || !createdAt) createdAt = ctime;
683
+ createdAt = new Date(createdAt);
684
+ return {
685
+ createdAt,
686
+ modifiedAt,
687
+ size,
688
+ };
689
+ };
690
+ Worker.prototype.stat.metadata = {
691
+ options: {
692
+ filename: {},
693
+ },
694
+ };
695
+
696
+ Worker.prototype.head = async function (options) {
697
+ const { stream } = await this.fileToObjectStream(options);
698
+ const chunks = [];
699
+
700
+ const limit = options.limit || 3;
701
+ let counter = 0;
702
+ // eslint-disable-next-line no-restricted-syntax
703
+ for await (const chunk of stream) {
704
+ chunks.push(chunk);
705
+ counter += 1;
706
+ if (counter >= limit) break;
707
+ }
708
+
709
+ return chunks;
710
+ };
711
+
712
+ Worker.prototype.head.metadata = {
713
+ options: {
714
+ filename: { required: true },
715
+ },
716
+ };
717
+
718
+ Worker.prototype.count = async function (options) {
719
+ const { stream } = await this.fileToObjectStream(options);
720
+ const sample = [];
721
+
722
+ const limit = options.limit || 5;
723
+ let records = 0;
724
+ // eslint-disable-next-line no-restricted-syntax
725
+ for await (const chunk of stream) {
726
+ records += 1;
727
+ if (records < limit) {
728
+ sample.push(chunk);
729
+ }
730
+ }
731
+
732
+ return { sample, records };
733
+ };
734
+
735
+ Worker.prototype.count.metadata = {
736
+ options: {
737
+ filename: { required: true },
738
+ },
739
+ };
740
+
741
+ module.exports = Worker;