@engine9-io/input-tools 1.7.9 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/file/FileUtilities.js +198 -135
  2. package/package.json +2 -1
@@ -1,17 +1,15 @@
1
- /* eslint-disable no-await-in-loop */
2
1
  const fs = require('node:fs');
3
2
 
4
3
  const fsp = fs.promises;
5
4
  const path = require('node:path');
6
5
  const zlib = require('node:zlib');
7
- const {
8
- Readable, Transform, PassThrough, Writable,
9
- } = require('node:stream');
6
+ const { Readable, Transform, PassThrough, Writable } = require('node:stream');
10
7
  const { pipeline } = require('node:stream/promises');
11
8
  const { stringify } = require('csv');
12
9
 
13
10
  const debug = require('debug')('FileWorker');
14
11
 
12
+ const { getXlsxStream } = require('xlstream');
15
13
  const csv = require('csv');
16
14
  const JSON5 = require('json5');
17
15
  const languageEncoding = require('detect-file-encoding-and-language');
@@ -20,10 +18,18 @@ const S3Worker = require('./S3');
20
18
  const ParquetWorker = require('./Parquet');
21
19
 
22
20
  const {
23
- bool, getStringArray, getTempDir, makeStrings, streamPacket,relativeDate
21
+ bool,
22
+ getTempFilename,
23
+ getStringArray,
24
+ getTempDir,
25
+ makeStrings,
26
+ streamPacket,
27
+ relativeDate
24
28
  } = require('./tools');
25
29
 
26
- function Worker({ accountId }) { this.accountId = accountId; }
30
+ function Worker({ accountId }) {
31
+ this.accountId = accountId;
32
+ }
27
33
 
28
34
  class LineReaderTransform extends Transform {
29
35
  constructor(options = {}) {
@@ -31,7 +37,6 @@ class LineReaderTransform extends Transform {
31
37
  this.buffer = '';
32
38
  }
33
39
 
34
- // eslint-disable-next-line no-underscore-dangle
35
40
  _transform(chunk, encoding, callback) {
36
41
  this.buffer += chunk.toString();
37
42
  const lines = this.buffer.split(/\r?\n/);
@@ -40,7 +45,6 @@ class LineReaderTransform extends Transform {
40
45
  callback();
41
46
  }
42
47
 
43
- // eslint-disable-next-line no-underscore-dangle
44
48
  _flush(callback) {
45
49
  if (this.buffer) {
46
50
  this.push(this.buffer);
@@ -53,7 +57,11 @@ Worker.prototype.csvToObjectTransforms = function (options) {
53
57
  const transforms = [];
54
58
  const delimiter = options.delimiter || ',';
55
59
 
56
- const headerMapping = options.headerMapping || function (d) { return d; };
60
+ const headerMapping =
61
+ options.headerMapping ||
62
+ function (d) {
63
+ return d;
64
+ };
57
65
  let lastLine = null;
58
66
  let head = null;
59
67
 
@@ -63,7 +71,7 @@ Worker.prototype.csvToObjectTransforms = function (options) {
63
71
  skip_empty_lines: true,
64
72
  delimiter,
65
73
  max_limit_on_data_read: 10000000,
66
- skip_lines_with_error: skipLinesWithError,
74
+ skip_lines_with_error: skipLinesWithError
67
75
  };
68
76
  if (options.skip) parserOptions.from_line = options.skip;
69
77
  if (options.relax_column_count) parserOptions.relax_column_count = true;
@@ -101,7 +109,7 @@ Worker.prototype.csvToObjectTransforms = function (options) {
101
109
 
102
110
  lastLine = row.join(delimiter);
103
111
  return cb(null, o);
104
- },
112
+ }
105
113
  });
106
114
 
107
115
  transforms.push(parser);
@@ -124,12 +132,15 @@ Worker.prototype.detectEncoding = async function (options) {
124
132
  // needed chunk size.
125
133
  finalBuff = await new Promise((resolve, reject) => {
126
134
  const bufferBuilder = [];
127
- const decompressStream = zlib.createGunzip()
135
+ const decompressStream = zlib
136
+ .createGunzip()
128
137
  .on('data', (chunk) => {
129
138
  bufferBuilder.push(chunk);
130
- }).on('close', () => {
139
+ })
140
+ .on('close', () => {
131
141
  resolve(Buffer.concat(bufferBuilder));
132
- }).on('error', (err) => {
142
+ })
143
+ .on('error', (err) => {
133
144
  if (err.errno !== -5) {
134
145
  // EOF: expected
135
146
  reject(err);
@@ -145,15 +156,57 @@ Worker.prototype.detectEncoding = async function (options) {
145
156
 
146
157
  Worker.prototype.detectEncoding.metadata = {
147
158
  options: {
148
- filename: { required: true },
149
- },
159
+ filename: { required: true }
160
+ }
161
+ };
162
+
163
+ Worker.prototype.xlsxToObjectStream = async function (options) {
164
+ let { filename } = options;
165
+
166
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
167
+ // We need to copy and delete
168
+ let worker = null;
169
+ if (filename.startsWith('r2://')) {
170
+ worker = new R2Worker(this);
171
+ } else {
172
+ worker = new S3Worker(this);
173
+ }
174
+ const target = getTempFilename({ targetFilename: filename.split('/').pop() });
175
+
176
+ await worker.copy({ filename, target });
177
+ filename = target;
178
+ }
179
+ let stream = await getXlsxStream({
180
+ filePath: filename,
181
+ sheet: 0
182
+ });
183
+ let keys = null;
184
+ stream = stream.pipe(
185
+ new Transform({
186
+ objectMode: true,
187
+ transform(d, enc, cb) {
188
+ if (!keys) {
189
+ keys = d?.raw.arr;
190
+ cb();
191
+ } else {
192
+ let o = {};
193
+ keys.forEach((k, i) => {
194
+ o[k] = d?.raw?.arr?.[i];
195
+ });
196
+ cb(null, o);
197
+ }
198
+ }
199
+ })
200
+ );
201
+
202
+ return { stream };
150
203
  };
151
204
 
152
205
  /*
153
- Internal method to transform a file into a stream of objects.
206
+ Commonly used method to transform a file into a stream of objects.
154
207
  */
155
208
  Worker.prototype.fileToObjectStream = async function (options) {
156
- const { filename, columns, limit: limitOption,format:formatOverride } = options;
209
+ const { filename, columns, limit: limitOption, format: formatOverride } = options;
157
210
 
158
211
  // handle stream item
159
212
  if (options.stream) {
@@ -167,6 +220,9 @@ Worker.prototype.fileToObjectStream = async function (options) {
167
220
  let limit;
168
221
  if (limitOption) limit = parseInt(limitOption, 10);
169
222
  if (!filename) throw new Error('fileToObjectStream: filename is required');
223
+ if (filename.split('.').pop().toLowerCase() === 'xlsx') {
224
+ return this.xlsxToObjectStream(options);
225
+ }
170
226
  let postfix = options.sourcePostfix || filename.toLowerCase().split('.').pop();
171
227
  if (postfix === 'zip') {
172
228
  debug('Invalid filename:', { filename });
@@ -176,7 +232,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
176
232
  const streamInfo = await this.stream({
177
233
  filename,
178
234
  columns,
179
- limit,
235
+ limit
180
236
  });
181
237
  const { encoding } = streamInfo;
182
238
  let { stream } = streamInfo;
@@ -203,7 +259,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
203
259
  } else {
204
260
  stream.setEncoding(encoding);
205
261
  }
206
- let format=formatOverride || postfix;
262
+ let format = formatOverride || postfix;
207
263
 
208
264
  if (format === 'csv') {
209
265
  const csvTransforms = this.csvToObjectTransforms({ ...options });
@@ -243,13 +299,15 @@ Worker.prototype.fileToObjectStream = async function (options) {
243
299
  }
244
300
  if (headers) {
245
301
  const mapped = {};
246
- headers.forEach((name, i) => { mapped[name] = obj[i]; });
302
+ headers.forEach((name, i) => {
303
+ mapped[name] = obj[i];
304
+ });
247
305
  this.push(mapped);
248
306
  } else {
249
307
  this.push(obj);
250
308
  }
251
309
  return cb();
252
- },
310
+ }
253
311
  });
254
312
 
255
313
  transforms.push(lineReader);
@@ -260,9 +318,11 @@ Worker.prototype.fileToObjectStream = async function (options) {
260
318
  const countAndDebug = new Transform({
261
319
  objectMode: true,
262
320
  transform(d, enc, cb) {
263
- if (count === 0) { debug('Sample object from file:', d); }
321
+ if (count === 0) {
322
+ debug('Sample object from file:', d);
323
+ }
264
324
  count += 1;
265
- if ((count < 5000 && count % 1000 === 0) || (count % 50000 === 0)) {
325
+ if ((count < 5000 && count % 1000 === 0) || count % 50000 === 0) {
266
326
  debug(`fileToObjectStream transformed ${count} lines`);
267
327
  }
268
328
  this.push(d);
@@ -279,7 +339,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
279
339
  this.push(o);
280
340
  } */
281
341
  cb();
282
- },
342
+ }
283
343
  });
284
344
 
285
345
  transforms.push(countAndDebug);
@@ -319,14 +379,14 @@ Worker.prototype.getOutputStreams = async function (options) {
319
379
  objectMode: true,
320
380
  async transform(item, encoding, cb) {
321
381
  options.transform(item, encoding, cb);
322
- },
382
+ }
323
383
  });
324
384
  } else {
325
385
  transform = new Transform({
326
386
  objectMode: true,
327
387
  async transform(item, encoding, cb) {
328
388
  cb(null, options.transform(item));
329
- },
389
+ }
330
390
  });
331
391
  }
332
392
  } else if (options.transform) {
@@ -345,7 +405,7 @@ Worker.prototype.getOutputStreams = async function (options) {
345
405
  let v = item[k];
346
406
  if (!o[k]) {
347
407
  if (typeof v === 'object') {
348
- while (Array.isArray(v)) [v] = v;// get first array item
408
+ while (Array.isArray(v)) [v] = v; // get first array item
349
409
  o = { ...o, ...v };
350
410
  } else {
351
411
  o[k] = v;
@@ -353,12 +413,12 @@ Worker.prototype.getOutputStreams = async function (options) {
353
413
  }
354
414
  });
355
415
  cb(null, o);
356
- },
416
+ }
357
417
  });
358
418
  }
359
419
 
360
420
  const stats = {
361
- records: 0,
421
+ records: 0
362
422
  };
363
423
  let stringifier;
364
424
  if (options.targetFormat === 'jsonl') {
@@ -366,7 +426,7 @@ Worker.prototype.getOutputStreams = async function (options) {
366
426
  objectMode: true,
367
427
  transform(d, encoding, cb) {
368
428
  cb(false, `${JSON.stringify(d)}\n`);
369
- },
429
+ }
370
430
  });
371
431
  } else {
372
432
  stringifier = stringify({ header: true });
@@ -383,11 +443,11 @@ Worker.prototype.getOutputStreams = async function (options) {
383
443
  transform(d, enc, cb) {
384
444
  stats.records += 1;
385
445
  cb(null, d);
386
- },
446
+ }
387
447
  }),
388
448
  stringifier,
389
449
  gzip,
390
- fileWriterStream,
450
+ fileWriterStream
391
451
  ].filter(Boolean);
392
452
  return { filename, streams, stats };
393
453
  };
@@ -395,9 +455,7 @@ Worker.prototype.objectStreamToFile = async function (options) {
395
455
  const { filename, streams, stats } = await this.getOutputStreams(options);
396
456
  const { stream: inStream } = options;
397
457
  streams.unshift(inStream);
398
- await pipeline(
399
- streams,
400
- );
458
+ await pipeline(streams);
401
459
  return { filename, records: stats.records };
402
460
  };
403
461
 
@@ -432,7 +490,7 @@ Worker.prototype.transform = async function (options) {
432
490
  if (typeof f === 'function') {
433
491
  f = new Transform({
434
492
  objectMode: true,
435
- transform: f,
493
+ transform: f
436
494
  });
437
495
  }
438
496
 
@@ -441,7 +499,10 @@ Worker.prototype.transform = async function (options) {
441
499
 
442
500
  const { targetFormat } = options;
443
501
 
444
- if (!targetFormat && (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')) {
502
+ if (
503
+ !targetFormat &&
504
+ (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')
505
+ ) {
445
506
  options.targetFormat = 'csv';
446
507
  }
447
508
 
@@ -453,33 +514,34 @@ Worker.prototype.transform.metadata = {
453
514
  sourcePostfix: { description: "Override the source postfix, if for example it's a csv" },
454
515
  encoding: { description: 'Manual override of source file encoding' },
455
516
  names: { description: 'Target field names (e.g. my_new_field,x,y,z)' },
456
- values: { description: "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)" },
517
+ values: {
518
+ description:
519
+ "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)"
520
+ },
457
521
  targetFilename: { description: 'Custom name of the output file (default auto-generated)' },
458
522
  targetFormat: { description: 'Output format -- csv supported, or none for txt (default)' },
459
523
  targetRowDelimiter: { description: 'Row delimiter (default \n)' },
460
- targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' },
461
- },
524
+ targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' }
525
+ }
462
526
  };
463
527
  Worker.prototype.testTransform = async function (options) {
464
528
  return this.transform({
465
529
  ...options,
466
- transform(d, enc, cb) { d.transform_time = new Date(); cb(null, d); },
530
+ transform(d, enc, cb) {
531
+ d.transform_time = new Date();
532
+ cb(null, d);
533
+ }
467
534
  });
468
535
  };
469
536
  Worker.prototype.testTransform.metadata = {
470
537
  options: {
471
- filename: true,
472
- },
538
+ filename: true
539
+ }
473
540
  };
474
541
 
475
542
  /* Get a stream from an actual stream, or an array, or a file */
476
- Worker.prototype.stream = async function (
477
- options,
478
- ) {
479
- const {
480
- stream: inputStream, packet, type, columns, limit,
481
- filename: filenameOpt,
482
- } = options;
543
+ Worker.prototype.stream = async function (options) {
544
+ const { stream: inputStream, packet, type, columns, limit, filename: filenameOpt } = options;
483
545
  let filename = filenameOpt;
484
546
 
485
547
  if (inputStream) {
@@ -496,7 +558,8 @@ Worker.prototype.stream = async function (
496
558
  } else {
497
559
  // debug(`Not prepending filename:${filename}`);
498
560
  }
499
- let encoding; let stream;
561
+ let encoding;
562
+ let stream;
500
563
  if (filename.slice(-8) === '.parquet') {
501
564
  const pq = new ParquetWorker(this);
502
565
  stream = (await pq.stream({ filename, columns, limit })).stream;
@@ -541,9 +604,8 @@ Worker.prototype.sample = async function (opts) {
541
604
  };
542
605
  Worker.prototype.sample.metadata = {
543
606
  options: {
544
- filename: {},
545
-
546
- },
607
+ filename: {}
608
+ }
547
609
  };
548
610
  Worker.prototype.toArray = async function (opts) {
549
611
  const { stream } = await this.fileToObjectStream(opts);
@@ -551,8 +613,8 @@ Worker.prototype.toArray = async function (opts) {
551
613
  };
552
614
  Worker.prototype.toArray.metadata = {
553
615
  options: {
554
- filename: {},
555
- },
616
+ filename: {}
617
+ }
556
618
  };
557
619
 
558
620
  Worker.prototype.write = async function (opts) {
@@ -566,7 +628,7 @@ Worker.prototype.write = async function (opts) {
566
628
  await worker.write({
567
629
  directory,
568
630
  file,
569
- content,
631
+ content
570
632
  });
571
633
  } else {
572
634
  await fsp.writeFile(filename, content);
@@ -576,15 +638,14 @@ Worker.prototype.write = async function (opts) {
576
638
  Worker.prototype.write.metadata = {
577
639
  options: {
578
640
  filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
579
- content: {},
580
- },
641
+ content: {}
642
+ }
581
643
  };
582
644
 
583
645
  async function streamToString(stream) {
584
646
  // lets have a ReadableStream as a stream variable
585
647
  const chunks = [];
586
648
 
587
- // eslint-disable-next-line no-restricted-syntax
588
649
  for await (const chunk of stream) {
589
650
  chunks.push(Buffer.from(chunk));
590
651
  }
@@ -606,47 +667,46 @@ Worker.prototype.json = async function (opts) {
606
667
  };
607
668
  Worker.prototype.json.metadata = {
608
669
  options: {
609
- filename: { description: 'Get a javascript object from a file' },
610
- },
670
+ filename: { description: 'Get a javascript object from a file' }
671
+ }
611
672
  };
612
673
 
613
- Worker.prototype.list = async function ({ directory, start:s, end:e }) {
674
+ Worker.prototype.list = async function ({ directory, start: s, end: e }) {
614
675
  if (!directory) throw new Error('directory is required');
615
- let start=null;
616
- let end=null;
617
- if (s) start=relativeDate(s);
618
- if (e) end=relativeDate(e);
619
-
676
+ let start = null;
677
+ let end = null;
678
+ if (s) start = relativeDate(s);
679
+ if (e) end = relativeDate(e);
680
+
620
681
  if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
621
682
  const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
622
683
  return worker.list({ directory, start, end });
623
684
  }
624
685
  const a = await fsp.readdir(directory, { withFileTypes: true });
625
686
 
626
- const withModified=[];
687
+ const withModified = [];
627
688
  for (const file of a) {
628
- const fullPath = path.join(directory, file.name);
629
- const stats = await fsp.stat(fullPath);
630
- if (start && stats.mtime<start.getTime()){
631
- //do not include
632
- }else if (end && stats.mtime>end.getTime()){
633
- //do nothing
634
- }else{
635
- withModified.push({
636
- name:file.name,
637
- type: file.isDirectory() ? 'directory' : 'file',
638
- modifiedAt:new Date(stats.mtime).toISOString(),
639
- });
640
- }
689
+ const fullPath = path.join(directory, file.name);
690
+ const stats = await fsp.stat(fullPath);
691
+ if (start && stats.mtime < start.getTime()) {
692
+ //do not include
693
+ } else if (end && stats.mtime > end.getTime()) {
694
+ //do nothing
695
+ } else {
696
+ withModified.push({
697
+ name: file.name,
698
+ type: file.isDirectory() ? 'directory' : 'file',
699
+ modifiedAt: new Date(stats.mtime).toISOString()
700
+ });
701
+ }
641
702
  }
642
-
703
+
643
704
  return withModified;
644
-
645
705
  };
646
706
  Worker.prototype.list.metadata = {
647
707
  options: {
648
- directory: { required: true },
649
- },
708
+ directory: { required: true }
709
+ }
650
710
  };
651
711
 
652
712
  Worker.prototype.listAll = async function ({ directory }) {
@@ -661,8 +721,8 @@ Worker.prototype.listAll = async function ({ directory }) {
661
721
  };
662
722
  Worker.prototype.listAll.metadata = {
663
723
  options: {
664
- directory: { required: true },
665
- },
724
+ directory: { required: true }
725
+ }
666
726
  };
667
727
 
668
728
  Worker.prototype.empty = async function ({ directory }) {
@@ -672,7 +732,7 @@ Worker.prototype.empty = async function ({ directory }) {
672
732
  throw new Error('Cannot empty an s3:// or r2:// directory');
673
733
  }
674
734
  const removed = [];
675
- // eslint-disable-next-line no-restricted-syntax
735
+
676
736
  for (const file of await fsp.readdir(directory)) {
677
737
  removed.push(file);
678
738
  await fsp.unlink(path.join(directory, file));
@@ -681,8 +741,8 @@ Worker.prototype.empty = async function ({ directory }) {
681
741
  };
682
742
  Worker.prototype.empty.metadata = {
683
743
  options: {
684
- directory: { required: true },
685
- },
744
+ directory: { required: true }
745
+ }
686
746
  };
687
747
 
688
748
  Worker.prototype.remove = async function ({ filename }) {
@@ -705,16 +765,18 @@ Worker.prototype.remove = async function ({ filename }) {
705
765
  };
706
766
  Worker.prototype.remove.metadata = {
707
767
  options: {
708
- filename: {},
709
- },
768
+ filename: {}
769
+ }
710
770
  };
711
771
 
712
772
  Worker.prototype.move = async function ({ filename, target }) {
713
773
  if (!target) throw new Error('target is required');
714
774
  if (typeof target !== 'string') throw new Error(`target isn't a string:${JSON.stringify(target)}`);
715
775
  if (target.startsWith('s3://') || target.startsWith('r2://')) {
716
- if ((target.startsWith('s3://') && filename.startsWith('r2://'))
717
- || (target.startsWith('r2://') && filename.startsWith('s3://'))) {
776
+ if (
777
+ (target.startsWith('s3://') && filename.startsWith('r2://')) ||
778
+ (target.startsWith('r2://') && filename.startsWith('s3://'))
779
+ ) {
718
780
  throw new Error('Cowardly not copying between services');
719
781
  }
720
782
 
@@ -741,8 +803,8 @@ Worker.prototype.move = async function ({ filename, target }) {
741
803
  Worker.prototype.move.metadata = {
742
804
  options: {
743
805
  filename: {},
744
- target: {},
745
- },
806
+ target: {}
807
+ }
746
808
  };
747
809
 
748
810
  Worker.prototype.stat = async function ({ filename }) {
@@ -751,11 +813,7 @@ Worker.prototype.stat = async function ({ filename }) {
751
813
  const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
752
814
  return worker.stat({ filename });
753
815
  }
754
- const {
755
- ctime,
756
- birthtime,
757
- size,
758
- } = await fsp.stat(filename);
816
+ const { ctime, birthtime, size } = await fsp.stat(filename);
759
817
  const modifiedAt = new Date(ctime);
760
818
  let createdAt = birthtime;
761
819
  if (createdAt === 0 || !createdAt) createdAt = ctime;
@@ -763,13 +821,13 @@ Worker.prototype.stat = async function ({ filename }) {
763
821
  return {
764
822
  createdAt,
765
823
  modifiedAt,
766
- size,
824
+ size
767
825
  };
768
826
  };
769
827
  Worker.prototype.stat.metadata = {
770
828
  options: {
771
- filename: {},
772
- },
829
+ filename: {}
830
+ }
773
831
  };
774
832
 
775
833
  Worker.prototype.download = async function ({ filename }) {
@@ -782,8 +840,8 @@ Worker.prototype.download = async function ({ filename }) {
782
840
  };
783
841
  Worker.prototype.download.metadata = {
784
842
  options: {
785
- filename: {},
786
- },
843
+ filename: {}
844
+ }
787
845
  };
788
846
 
789
847
  Worker.prototype.head = async function (options) {
@@ -792,7 +850,7 @@ Worker.prototype.head = async function (options) {
792
850
  const chunks = [];
793
851
 
794
852
  let counter = 0;
795
- // eslint-disable-next-line no-restricted-syntax
853
+
796
854
  for await (const chunk of stream) {
797
855
  chunks.push(chunk);
798
856
  counter += 1;
@@ -804,8 +862,8 @@ Worker.prototype.head = async function (options) {
804
862
 
805
863
  Worker.prototype.head.metadata = {
806
864
  options: {
807
- filename: { required: true },
808
- },
865
+ filename: { required: true }
866
+ }
809
867
  };
810
868
 
811
869
  Worker.prototype.count = async function (options) {
@@ -814,7 +872,7 @@ Worker.prototype.count = async function (options) {
814
872
 
815
873
  const limit = options.limit || 5;
816
874
  let records = 0;
817
- // eslint-disable-next-line no-restricted-syntax
875
+
818
876
  for await (const chunk of stream) {
819
877
  records += 1;
820
878
  if (records < limit) {
@@ -827,8 +885,8 @@ Worker.prototype.count = async function (options) {
827
885
 
828
886
  Worker.prototype.count.metadata = {
829
887
  options: {
830
- filename: { required: true },
831
- },
888
+ filename: { required: true }
889
+ }
832
890
  };
833
891
 
834
892
  // Get a set of unique entries from a uniqueFunction
@@ -839,10 +897,10 @@ Worker.prototype.getUniqueSet = async function (options) {
839
897
 
840
898
  let { uniqueFunction } = options;
841
899
  if (!uniqueFunction) {
842
- uniqueFunction = ((o) => JSON.stringify(o));
900
+ uniqueFunction = (o) => JSON.stringify(o);
843
901
  }
844
902
  const uniqueSet = new Set();
845
- // eslint-disable-next-line no-restricted-syntax, guard-for-in
903
+
846
904
  for (const filename of existingFiles) {
847
905
  const { stream: existsStream } = await this.fileToObjectStream({ filename });
848
906
  await pipeline(
@@ -856,14 +914,14 @@ Worker.prototype.getUniqueSet = async function (options) {
856
914
  }
857
915
  uniqueSet.add(v);
858
916
  cb(null, d);
859
- },
917
+ }
860
918
  }),
861
919
  new Writable({
862
920
  objectMode: true,
863
921
  write(d, enc, cb) {
864
922
  cb();
865
- },
866
- }),
923
+ }
924
+ })
867
925
  );
868
926
  debug(`Finished loading ${filename}`);
869
927
  }
@@ -875,7 +933,7 @@ Worker.prototype.getUniqueStream = async function (options) {
875
933
 
876
934
  const { uniqueSet, uniqueFunction, sample } = await this.getUniqueSet({
877
935
  filenames: options.existingFiles,
878
- uniqueFunction: options.uniqueFunction,
936
+ uniqueFunction: options.uniqueFunction
879
937
  });
880
938
 
881
939
  const { stream: inStream } = await this.fileToObjectStream(options);
@@ -899,8 +957,8 @@ Worker.prototype.getUniqueStream = async function (options) {
899
957
  }
900
958
  cb(null, d);
901
959
  }
902
- },
903
- }),
960
+ }
961
+ })
904
962
  );
905
963
  return { stream: uniqueStream, sample };
906
964
  };
@@ -912,9 +970,9 @@ Worker.prototype.getUniqueStream.metadata = {
912
970
  filename: { description: 'Specify a source filename or a stream' },
913
971
  stream: { description: 'Specify a source filename or a stream' },
914
972
  includeDuplicateSourceRecords: {
915
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
916
- },
917
- },
973
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
974
+ }
975
+ }
918
976
  };
919
977
  Worker.prototype.getUniqueFile = async function (options) {
920
978
  const { stream, sample } = await this.getUniqueStream(options);
@@ -929,9 +987,9 @@ Worker.prototype.getUniqueFile.metadata = {
929
987
  filename: { description: 'Specify a source filename or a stream' },
930
988
  stream: { description: 'Specify a source filename or a stream' },
931
989
  includeDuplicateSourceRecords: {
932
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
933
- },
934
- },
990
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
991
+ }
992
+ }
935
993
  };
936
994
 
937
995
  /*
@@ -940,7 +998,11 @@ Requires 2 passes of the files,
940
998
  but that's a better tradeoff than trying to store huge files in memory
941
999
  */
942
1000
  Worker.prototype.diff = async function ({
943
- fileA, fileB, uniqueFunction: ufOpt, fields, includeDuplicateSourceRecords,
1001
+ fileA,
1002
+ fileB,
1003
+ uniqueFunction: ufOpt,
1004
+ fields,
1005
+ includeDuplicateSourceRecords
944
1006
  }) {
945
1007
  if (ufOpt && fields) throw new Error('fields and uniqueFunction cannot both be specified');
946
1008
  let uniqueFunction = ufOpt;
@@ -953,17 +1015,18 @@ Worker.prototype.diff = async function ({
953
1015
  existingFiles: [fileB],
954
1016
  filename: fileA,
955
1017
  uniqueFunction,
956
- includeDuplicateSourceRecords,
1018
+ includeDuplicateSourceRecords
957
1019
  });
958
1020
  const right = await this.getUniqueFile({
959
1021
  existingFiles: [fileA],
960
1022
  filename: fileB,
961
1023
  uniqueFunction,
962
- includeDuplicateSourceRecords,
1024
+ includeDuplicateSourceRecords
963
1025
  });
964
1026
 
965
1027
  return {
966
- left, right,
1028
+ left,
1029
+ right
967
1030
  };
968
1031
  };
969
1032
  Worker.prototype.diff.metadata = {
@@ -973,9 +1036,9 @@ Worker.prototype.diff.metadata = {
973
1036
  fields: { description: 'Fields to use for uniqueness -- aka primary key. Defaults to JSON of line' },
974
1037
  uniqueFunction: {},
975
1038
  includeDuplicateSourceRecords: {
976
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
977
- },
978
- },
1039
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1040
+ }
1041
+ }
979
1042
  };
980
1043
 
981
1044
  module.exports = Worker;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@engine9-io/input-tools",
3
- "version": "1.7.9",
3
+ "version": "1.8.0",
4
4
  "description": "Tools for dealing with Engine9 inputs",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -30,6 +30,7 @@
30
30
  "throttle-debounce": "^5.0.2",
31
31
  "unzipper": "^0.12.1",
32
32
  "uuid": "^11.1.0",
33
+ "xlstream": "^2.5.5",
33
34
  "yargs": "^17.7.2"
34
35
  },
35
36
  "directories": {