@engine9-io/input-tools 1.7.9 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/file/FileUtilities.js +201 -135
  2. package/package.json +2 -1
@@ -1,17 +1,15 @@
1
- /* eslint-disable no-await-in-loop */
2
1
  const fs = require('node:fs');
3
2
 
4
3
  const fsp = fs.promises;
5
4
  const path = require('node:path');
6
5
  const zlib = require('node:zlib');
7
- const {
8
- Readable, Transform, PassThrough, Writable,
9
- } = require('node:stream');
6
+ const { Readable, Transform, PassThrough, Writable } = require('node:stream');
10
7
  const { pipeline } = require('node:stream/promises');
11
8
  const { stringify } = require('csv');
12
9
 
13
10
  const debug = require('debug')('FileWorker');
14
11
 
12
+ const { getXlsxStream } = require('xlstream');
15
13
  const csv = require('csv');
16
14
  const JSON5 = require('json5');
17
15
  const languageEncoding = require('detect-file-encoding-and-language');
@@ -20,10 +18,18 @@ const S3Worker = require('./S3');
20
18
  const ParquetWorker = require('./Parquet');
21
19
 
22
20
  const {
23
- bool, getStringArray, getTempDir, makeStrings, streamPacket,relativeDate
21
+ bool,
22
+ getTempFilename,
23
+ getStringArray,
24
+ getTempDir,
25
+ makeStrings,
26
+ streamPacket,
27
+ relativeDate
24
28
  } = require('./tools');
25
29
 
26
- function Worker({ accountId }) { this.accountId = accountId; }
30
+ function Worker({ accountId }) {
31
+ this.accountId = accountId;
32
+ }
27
33
 
28
34
  class LineReaderTransform extends Transform {
29
35
  constructor(options = {}) {
@@ -31,7 +37,6 @@ class LineReaderTransform extends Transform {
31
37
  this.buffer = '';
32
38
  }
33
39
 
34
- // eslint-disable-next-line no-underscore-dangle
35
40
  _transform(chunk, encoding, callback) {
36
41
  this.buffer += chunk.toString();
37
42
  const lines = this.buffer.split(/\r?\n/);
@@ -40,7 +45,6 @@ class LineReaderTransform extends Transform {
40
45
  callback();
41
46
  }
42
47
 
43
- // eslint-disable-next-line no-underscore-dangle
44
48
  _flush(callback) {
45
49
  if (this.buffer) {
46
50
  this.push(this.buffer);
@@ -53,7 +57,11 @@ Worker.prototype.csvToObjectTransforms = function (options) {
53
57
  const transforms = [];
54
58
  const delimiter = options.delimiter || ',';
55
59
 
56
- const headerMapping = options.headerMapping || function (d) { return d; };
60
+ const headerMapping =
61
+ options.headerMapping ||
62
+ function (d) {
63
+ return d;
64
+ };
57
65
  let lastLine = null;
58
66
  let head = null;
59
67
 
@@ -63,13 +71,16 @@ Worker.prototype.csvToObjectTransforms = function (options) {
63
71
  skip_empty_lines: true,
64
72
  delimiter,
65
73
  max_limit_on_data_read: 10000000,
66
- skip_lines_with_error: skipLinesWithError,
74
+ skip_lines_with_error: skipLinesWithError
67
75
  };
68
76
  if (options.skip) parserOptions.from_line = options.skip;
69
77
  if (options.relax_column_count) parserOptions.relax_column_count = true;
70
78
  if (options.quote_escape) {
71
79
  parserOptions.escape = options.quote_escape;
72
80
  }
81
+ if (options.limit) {
82
+ parserOptions.to = options.limit;
83
+ }
73
84
 
74
85
  debug('Parser options=', parserOptions);
75
86
  const parser = csv.parse(parserOptions);
@@ -101,7 +112,7 @@ Worker.prototype.csvToObjectTransforms = function (options) {
101
112
 
102
113
  lastLine = row.join(delimiter);
103
114
  return cb(null, o);
104
- },
115
+ }
105
116
  });
106
117
 
107
118
  transforms.push(parser);
@@ -124,12 +135,15 @@ Worker.prototype.detectEncoding = async function (options) {
124
135
  // needed chunk size.
125
136
  finalBuff = await new Promise((resolve, reject) => {
126
137
  const bufferBuilder = [];
127
- const decompressStream = zlib.createGunzip()
138
+ const decompressStream = zlib
139
+ .createGunzip()
128
140
  .on('data', (chunk) => {
129
141
  bufferBuilder.push(chunk);
130
- }).on('close', () => {
142
+ })
143
+ .on('close', () => {
131
144
  resolve(Buffer.concat(bufferBuilder));
132
- }).on('error', (err) => {
145
+ })
146
+ .on('error', (err) => {
133
147
  if (err.errno !== -5) {
134
148
  // EOF: expected
135
149
  reject(err);
@@ -145,15 +159,57 @@ Worker.prototype.detectEncoding = async function (options) {
145
159
 
146
160
  Worker.prototype.detectEncoding.metadata = {
147
161
  options: {
148
- filename: { required: true },
149
- },
162
+ filename: { required: true }
163
+ }
164
+ };
165
+
166
+ Worker.prototype.xlsxToObjectStream = async function (options) {
167
+ let { filename } = options;
168
+
169
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
170
+ // We need to copy and delete
171
+ let worker = null;
172
+ if (filename.startsWith('r2://')) {
173
+ worker = new R2Worker(this);
174
+ } else {
175
+ worker = new S3Worker(this);
176
+ }
177
+ const target = getTempFilename({ targetFilename: filename.split('/').pop() });
178
+
179
+ await worker.copy({ filename, target });
180
+ filename = target;
181
+ }
182
+ let stream = await getXlsxStream({
183
+ filePath: filename,
184
+ sheet: 0
185
+ });
186
+ let keys = null;
187
+ stream = stream.pipe(
188
+ new Transform({
189
+ objectMode: true,
190
+ transform(d, enc, cb) {
191
+ if (!keys) {
192
+ keys = d?.raw.arr;
193
+ cb();
194
+ } else {
195
+ let o = {};
196
+ keys.forEach((k, i) => {
197
+ o[k] = d?.raw?.arr?.[i];
198
+ });
199
+ cb(null, o);
200
+ }
201
+ }
202
+ })
203
+ );
204
+
205
+ return { stream };
150
206
  };
151
207
 
152
208
  /*
153
- Internal method to transform a file into a stream of objects.
209
+ Commonly used method to transform a file into a stream of objects.
154
210
  */
155
211
  Worker.prototype.fileToObjectStream = async function (options) {
156
- const { filename, columns, limit: limitOption,format:formatOverride } = options;
212
+ const { filename, columns, limit: limitOption, format: formatOverride } = options;
157
213
 
158
214
  // handle stream item
159
215
  if (options.stream) {
@@ -167,6 +223,9 @@ Worker.prototype.fileToObjectStream = async function (options) {
167
223
  let limit;
168
224
  if (limitOption) limit = parseInt(limitOption, 10);
169
225
  if (!filename) throw new Error('fileToObjectStream: filename is required');
226
+ if (filename.split('.').pop().toLowerCase() === 'xlsx') {
227
+ return this.xlsxToObjectStream(options);
228
+ }
170
229
  let postfix = options.sourcePostfix || filename.toLowerCase().split('.').pop();
171
230
  if (postfix === 'zip') {
172
231
  debug('Invalid filename:', { filename });
@@ -176,7 +235,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
176
235
  const streamInfo = await this.stream({
177
236
  filename,
178
237
  columns,
179
- limit,
238
+ limit
180
239
  });
181
240
  const { encoding } = streamInfo;
182
241
  let { stream } = streamInfo;
@@ -203,7 +262,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
203
262
  } else {
204
263
  stream.setEncoding(encoding);
205
264
  }
206
- let format=formatOverride || postfix;
265
+ let format = formatOverride || postfix;
207
266
 
208
267
  if (format === 'csv') {
209
268
  const csvTransforms = this.csvToObjectTransforms({ ...options });
@@ -243,13 +302,15 @@ Worker.prototype.fileToObjectStream = async function (options) {
243
302
  }
244
303
  if (headers) {
245
304
  const mapped = {};
246
- headers.forEach((name, i) => { mapped[name] = obj[i]; });
305
+ headers.forEach((name, i) => {
306
+ mapped[name] = obj[i];
307
+ });
247
308
  this.push(mapped);
248
309
  } else {
249
310
  this.push(obj);
250
311
  }
251
312
  return cb();
252
- },
313
+ }
253
314
  });
254
315
 
255
316
  transforms.push(lineReader);
@@ -260,9 +321,11 @@ Worker.prototype.fileToObjectStream = async function (options) {
260
321
  const countAndDebug = new Transform({
261
322
  objectMode: true,
262
323
  transform(d, enc, cb) {
263
- if (count === 0) { debug('Sample object from file:', d); }
324
+ if (count === 0) {
325
+ debug('Sample object from file:', d);
326
+ }
264
327
  count += 1;
265
- if ((count < 5000 && count % 1000 === 0) || (count % 50000 === 0)) {
328
+ if ((count < 5000 && count % 1000 === 0) || count % 50000 === 0) {
266
329
  debug(`fileToObjectStream transformed ${count} lines`);
267
330
  }
268
331
  this.push(d);
@@ -279,7 +342,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
279
342
  this.push(o);
280
343
  } */
281
344
  cb();
282
- },
345
+ }
283
346
  });
284
347
 
285
348
  transforms.push(countAndDebug);
@@ -319,14 +382,14 @@ Worker.prototype.getOutputStreams = async function (options) {
319
382
  objectMode: true,
320
383
  async transform(item, encoding, cb) {
321
384
  options.transform(item, encoding, cb);
322
- },
385
+ }
323
386
  });
324
387
  } else {
325
388
  transform = new Transform({
326
389
  objectMode: true,
327
390
  async transform(item, encoding, cb) {
328
391
  cb(null, options.transform(item));
329
- },
392
+ }
330
393
  });
331
394
  }
332
395
  } else if (options.transform) {
@@ -345,7 +408,7 @@ Worker.prototype.getOutputStreams = async function (options) {
345
408
  let v = item[k];
346
409
  if (!o[k]) {
347
410
  if (typeof v === 'object') {
348
- while (Array.isArray(v)) [v] = v;// get first array item
411
+ while (Array.isArray(v)) [v] = v; // get first array item
349
412
  o = { ...o, ...v };
350
413
  } else {
351
414
  o[k] = v;
@@ -353,12 +416,12 @@ Worker.prototype.getOutputStreams = async function (options) {
353
416
  }
354
417
  });
355
418
  cb(null, o);
356
- },
419
+ }
357
420
  });
358
421
  }
359
422
 
360
423
  const stats = {
361
- records: 0,
424
+ records: 0
362
425
  };
363
426
  let stringifier;
364
427
  if (options.targetFormat === 'jsonl') {
@@ -366,7 +429,7 @@ Worker.prototype.getOutputStreams = async function (options) {
366
429
  objectMode: true,
367
430
  transform(d, encoding, cb) {
368
431
  cb(false, `${JSON.stringify(d)}\n`);
369
- },
432
+ }
370
433
  });
371
434
  } else {
372
435
  stringifier = stringify({ header: true });
@@ -383,11 +446,11 @@ Worker.prototype.getOutputStreams = async function (options) {
383
446
  transform(d, enc, cb) {
384
447
  stats.records += 1;
385
448
  cb(null, d);
386
- },
449
+ }
387
450
  }),
388
451
  stringifier,
389
452
  gzip,
390
- fileWriterStream,
453
+ fileWriterStream
391
454
  ].filter(Boolean);
392
455
  return { filename, streams, stats };
393
456
  };
@@ -395,9 +458,7 @@ Worker.prototype.objectStreamToFile = async function (options) {
395
458
  const { filename, streams, stats } = await this.getOutputStreams(options);
396
459
  const { stream: inStream } = options;
397
460
  streams.unshift(inStream);
398
- await pipeline(
399
- streams,
400
- );
461
+ await pipeline(streams);
401
462
  return { filename, records: stats.records };
402
463
  };
403
464
 
@@ -432,7 +493,7 @@ Worker.prototype.transform = async function (options) {
432
493
  if (typeof f === 'function') {
433
494
  f = new Transform({
434
495
  objectMode: true,
435
- transform: f,
496
+ transform: f
436
497
  });
437
498
  }
438
499
 
@@ -441,7 +502,10 @@ Worker.prototype.transform = async function (options) {
441
502
 
442
503
  const { targetFormat } = options;
443
504
 
444
- if (!targetFormat && (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')) {
505
+ if (
506
+ !targetFormat &&
507
+ (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')
508
+ ) {
445
509
  options.targetFormat = 'csv';
446
510
  }
447
511
 
@@ -453,33 +517,34 @@ Worker.prototype.transform.metadata = {
453
517
  sourcePostfix: { description: "Override the source postfix, if for example it's a csv" },
454
518
  encoding: { description: 'Manual override of source file encoding' },
455
519
  names: { description: 'Target field names (e.g. my_new_field,x,y,z)' },
456
- values: { description: "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)" },
520
+ values: {
521
+ description:
522
+ "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)"
523
+ },
457
524
  targetFilename: { description: 'Custom name of the output file (default auto-generated)' },
458
525
  targetFormat: { description: 'Output format -- csv supported, or none for txt (default)' },
459
526
  targetRowDelimiter: { description: 'Row delimiter (default \n)' },
460
- targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' },
461
- },
527
+ targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' }
528
+ }
462
529
  };
463
530
  Worker.prototype.testTransform = async function (options) {
464
531
  return this.transform({
465
532
  ...options,
466
- transform(d, enc, cb) { d.transform_time = new Date(); cb(null, d); },
533
+ transform(d, enc, cb) {
534
+ d.transform_time = new Date();
535
+ cb(null, d);
536
+ }
467
537
  });
468
538
  };
469
539
  Worker.prototype.testTransform.metadata = {
470
540
  options: {
471
- filename: true,
472
- },
541
+ filename: true
542
+ }
473
543
  };
474
544
 
475
545
  /* Get a stream from an actual stream, or an array, or a file */
476
- Worker.prototype.stream = async function (
477
- options,
478
- ) {
479
- const {
480
- stream: inputStream, packet, type, columns, limit,
481
- filename: filenameOpt,
482
- } = options;
546
+ Worker.prototype.stream = async function (options) {
547
+ const { stream: inputStream, packet, type, columns, limit, filename: filenameOpt } = options;
483
548
  let filename = filenameOpt;
484
549
 
485
550
  if (inputStream) {
@@ -496,7 +561,8 @@ Worker.prototype.stream = async function (
496
561
  } else {
497
562
  // debug(`Not prepending filename:${filename}`);
498
563
  }
499
- let encoding; let stream;
564
+ let encoding;
565
+ let stream;
500
566
  if (filename.slice(-8) === '.parquet') {
501
567
  const pq = new ParquetWorker(this);
502
568
  stream = (await pq.stream({ filename, columns, limit })).stream;
@@ -541,9 +607,8 @@ Worker.prototype.sample = async function (opts) {
541
607
  };
542
608
  Worker.prototype.sample.metadata = {
543
609
  options: {
544
- filename: {},
545
-
546
- },
610
+ filename: {}
611
+ }
547
612
  };
548
613
  Worker.prototype.toArray = async function (opts) {
549
614
  const { stream } = await this.fileToObjectStream(opts);
@@ -551,8 +616,8 @@ Worker.prototype.toArray = async function (opts) {
551
616
  };
552
617
  Worker.prototype.toArray.metadata = {
553
618
  options: {
554
- filename: {},
555
- },
619
+ filename: {}
620
+ }
556
621
  };
557
622
 
558
623
  Worker.prototype.write = async function (opts) {
@@ -566,7 +631,7 @@ Worker.prototype.write = async function (opts) {
566
631
  await worker.write({
567
632
  directory,
568
633
  file,
569
- content,
634
+ content
570
635
  });
571
636
  } else {
572
637
  await fsp.writeFile(filename, content);
@@ -576,15 +641,14 @@ Worker.prototype.write = async function (opts) {
576
641
  Worker.prototype.write.metadata = {
577
642
  options: {
578
643
  filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
579
- content: {},
580
- },
644
+ content: {}
645
+ }
581
646
  };
582
647
 
583
648
  async function streamToString(stream) {
584
649
  // lets have a ReadableStream as a stream variable
585
650
  const chunks = [];
586
651
 
587
- // eslint-disable-next-line no-restricted-syntax
588
652
  for await (const chunk of stream) {
589
653
  chunks.push(Buffer.from(chunk));
590
654
  }
@@ -606,47 +670,46 @@ Worker.prototype.json = async function (opts) {
606
670
  };
607
671
  Worker.prototype.json.metadata = {
608
672
  options: {
609
- filename: { description: 'Get a javascript object from a file' },
610
- },
673
+ filename: { description: 'Get a javascript object from a file' }
674
+ }
611
675
  };
612
676
 
613
- Worker.prototype.list = async function ({ directory, start:s, end:e }) {
677
+ Worker.prototype.list = async function ({ directory, start: s, end: e }) {
614
678
  if (!directory) throw new Error('directory is required');
615
- let start=null;
616
- let end=null;
617
- if (s) start=relativeDate(s);
618
- if (e) end=relativeDate(e);
619
-
679
+ let start = null;
680
+ let end = null;
681
+ if (s) start = relativeDate(s);
682
+ if (e) end = relativeDate(e);
683
+
620
684
  if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
621
685
  const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
622
686
  return worker.list({ directory, start, end });
623
687
  }
624
688
  const a = await fsp.readdir(directory, { withFileTypes: true });
625
689
 
626
- const withModified=[];
690
+ const withModified = [];
627
691
  for (const file of a) {
628
- const fullPath = path.join(directory, file.name);
629
- const stats = await fsp.stat(fullPath);
630
- if (start && stats.mtime<start.getTime()){
631
- //do not include
632
- }else if (end && stats.mtime>end.getTime()){
633
- //do nothing
634
- }else{
635
- withModified.push({
636
- name:file.name,
637
- type: file.isDirectory() ? 'directory' : 'file',
638
- modifiedAt:new Date(stats.mtime).toISOString(),
639
- });
640
- }
692
+ const fullPath = path.join(directory, file.name);
693
+ const stats = await fsp.stat(fullPath);
694
+ if (start && stats.mtime < start.getTime()) {
695
+ //do not include
696
+ } else if (end && stats.mtime > end.getTime()) {
697
+ //do nothing
698
+ } else {
699
+ withModified.push({
700
+ name: file.name,
701
+ type: file.isDirectory() ? 'directory' : 'file',
702
+ modifiedAt: new Date(stats.mtime).toISOString()
703
+ });
704
+ }
641
705
  }
642
-
706
+
643
707
  return withModified;
644
-
645
708
  };
646
709
  Worker.prototype.list.metadata = {
647
710
  options: {
648
- directory: { required: true },
649
- },
711
+ directory: { required: true }
712
+ }
650
713
  };
651
714
 
652
715
  Worker.prototype.listAll = async function ({ directory }) {
@@ -661,8 +724,8 @@ Worker.prototype.listAll = async function ({ directory }) {
661
724
  };
662
725
  Worker.prototype.listAll.metadata = {
663
726
  options: {
664
- directory: { required: true },
665
- },
727
+ directory: { required: true }
728
+ }
666
729
  };
667
730
 
668
731
  Worker.prototype.empty = async function ({ directory }) {
@@ -672,7 +735,7 @@ Worker.prototype.empty = async function ({ directory }) {
672
735
  throw new Error('Cannot empty an s3:// or r2:// directory');
673
736
  }
674
737
  const removed = [];
675
- // eslint-disable-next-line no-restricted-syntax
738
+
676
739
  for (const file of await fsp.readdir(directory)) {
677
740
  removed.push(file);
678
741
  await fsp.unlink(path.join(directory, file));
@@ -681,8 +744,8 @@ Worker.prototype.empty = async function ({ directory }) {
681
744
  };
682
745
  Worker.prototype.empty.metadata = {
683
746
  options: {
684
- directory: { required: true },
685
- },
747
+ directory: { required: true }
748
+ }
686
749
  };
687
750
 
688
751
  Worker.prototype.remove = async function ({ filename }) {
@@ -705,16 +768,18 @@ Worker.prototype.remove = async function ({ filename }) {
705
768
  };
706
769
  Worker.prototype.remove.metadata = {
707
770
  options: {
708
- filename: {},
709
- },
771
+ filename: {}
772
+ }
710
773
  };
711
774
 
712
775
  Worker.prototype.move = async function ({ filename, target }) {
713
776
  if (!target) throw new Error('target is required');
714
777
  if (typeof target !== 'string') throw new Error(`target isn't a string:${JSON.stringify(target)}`);
715
778
  if (target.startsWith('s3://') || target.startsWith('r2://')) {
716
- if ((target.startsWith('s3://') && filename.startsWith('r2://'))
717
- || (target.startsWith('r2://') && filename.startsWith('s3://'))) {
779
+ if (
780
+ (target.startsWith('s3://') && filename.startsWith('r2://')) ||
781
+ (target.startsWith('r2://') && filename.startsWith('s3://'))
782
+ ) {
718
783
  throw new Error('Cowardly not copying between services');
719
784
  }
720
785
 
@@ -741,8 +806,8 @@ Worker.prototype.move = async function ({ filename, target }) {
741
806
  Worker.prototype.move.metadata = {
742
807
  options: {
743
808
  filename: {},
744
- target: {},
745
- },
809
+ target: {}
810
+ }
746
811
  };
747
812
 
748
813
  Worker.prototype.stat = async function ({ filename }) {
@@ -751,11 +816,7 @@ Worker.prototype.stat = async function ({ filename }) {
751
816
  const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
752
817
  return worker.stat({ filename });
753
818
  }
754
- const {
755
- ctime,
756
- birthtime,
757
- size,
758
- } = await fsp.stat(filename);
819
+ const { ctime, birthtime, size } = await fsp.stat(filename);
759
820
  const modifiedAt = new Date(ctime);
760
821
  let createdAt = birthtime;
761
822
  if (createdAt === 0 || !createdAt) createdAt = ctime;
@@ -763,13 +824,13 @@ Worker.prototype.stat = async function ({ filename }) {
763
824
  return {
764
825
  createdAt,
765
826
  modifiedAt,
766
- size,
827
+ size
767
828
  };
768
829
  };
769
830
  Worker.prototype.stat.metadata = {
770
831
  options: {
771
- filename: {},
772
- },
832
+ filename: {}
833
+ }
773
834
  };
774
835
 
775
836
  Worker.prototype.download = async function ({ filename }) {
@@ -782,8 +843,8 @@ Worker.prototype.download = async function ({ filename }) {
782
843
  };
783
844
  Worker.prototype.download.metadata = {
784
845
  options: {
785
- filename: {},
786
- },
846
+ filename: {}
847
+ }
787
848
  };
788
849
 
789
850
  Worker.prototype.head = async function (options) {
@@ -792,7 +853,7 @@ Worker.prototype.head = async function (options) {
792
853
  const chunks = [];
793
854
 
794
855
  let counter = 0;
795
- // eslint-disable-next-line no-restricted-syntax
856
+
796
857
  for await (const chunk of stream) {
797
858
  chunks.push(chunk);
798
859
  counter += 1;
@@ -804,8 +865,8 @@ Worker.prototype.head = async function (options) {
804
865
 
805
866
  Worker.prototype.head.metadata = {
806
867
  options: {
807
- filename: { required: true },
808
- },
868
+ filename: { required: true }
869
+ }
809
870
  };
810
871
 
811
872
  Worker.prototype.count = async function (options) {
@@ -814,7 +875,7 @@ Worker.prototype.count = async function (options) {
814
875
 
815
876
  const limit = options.limit || 5;
816
877
  let records = 0;
817
- // eslint-disable-next-line no-restricted-syntax
878
+
818
879
  for await (const chunk of stream) {
819
880
  records += 1;
820
881
  if (records < limit) {
@@ -827,8 +888,8 @@ Worker.prototype.count = async function (options) {
827
888
 
828
889
  Worker.prototype.count.metadata = {
829
890
  options: {
830
- filename: { required: true },
831
- },
891
+ filename: { required: true }
892
+ }
832
893
  };
833
894
 
834
895
  // Get a set of unique entries from a uniqueFunction
@@ -839,10 +900,10 @@ Worker.prototype.getUniqueSet = async function (options) {
839
900
 
840
901
  let { uniqueFunction } = options;
841
902
  if (!uniqueFunction) {
842
- uniqueFunction = ((o) => JSON.stringify(o));
903
+ uniqueFunction = (o) => JSON.stringify(o);
843
904
  }
844
905
  const uniqueSet = new Set();
845
- // eslint-disable-next-line no-restricted-syntax, guard-for-in
906
+
846
907
  for (const filename of existingFiles) {
847
908
  const { stream: existsStream } = await this.fileToObjectStream({ filename });
848
909
  await pipeline(
@@ -856,14 +917,14 @@ Worker.prototype.getUniqueSet = async function (options) {
856
917
  }
857
918
  uniqueSet.add(v);
858
919
  cb(null, d);
859
- },
920
+ }
860
921
  }),
861
922
  new Writable({
862
923
  objectMode: true,
863
924
  write(d, enc, cb) {
864
925
  cb();
865
- },
866
- }),
926
+ }
927
+ })
867
928
  );
868
929
  debug(`Finished loading ${filename}`);
869
930
  }
@@ -875,7 +936,7 @@ Worker.prototype.getUniqueStream = async function (options) {
875
936
 
876
937
  const { uniqueSet, uniqueFunction, sample } = await this.getUniqueSet({
877
938
  filenames: options.existingFiles,
878
- uniqueFunction: options.uniqueFunction,
939
+ uniqueFunction: options.uniqueFunction
879
940
  });
880
941
 
881
942
  const { stream: inStream } = await this.fileToObjectStream(options);
@@ -899,8 +960,8 @@ Worker.prototype.getUniqueStream = async function (options) {
899
960
  }
900
961
  cb(null, d);
901
962
  }
902
- },
903
- }),
963
+ }
964
+ })
904
965
  );
905
966
  return { stream: uniqueStream, sample };
906
967
  };
@@ -912,9 +973,9 @@ Worker.prototype.getUniqueStream.metadata = {
912
973
  filename: { description: 'Specify a source filename or a stream' },
913
974
  stream: { description: 'Specify a source filename or a stream' },
914
975
  includeDuplicateSourceRecords: {
915
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
916
- },
917
- },
976
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
977
+ }
978
+ }
918
979
  };
919
980
  Worker.prototype.getUniqueFile = async function (options) {
920
981
  const { stream, sample } = await this.getUniqueStream(options);
@@ -929,9 +990,9 @@ Worker.prototype.getUniqueFile.metadata = {
929
990
  filename: { description: 'Specify a source filename or a stream' },
930
991
  stream: { description: 'Specify a source filename or a stream' },
931
992
  includeDuplicateSourceRecords: {
932
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
933
- },
934
- },
993
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
994
+ }
995
+ }
935
996
  };
936
997
 
937
998
  /*
@@ -940,7 +1001,11 @@ Requires 2 passes of the files,
940
1001
  but that's a better tradeoff than trying to store huge files in memory
941
1002
  */
942
1003
  Worker.prototype.diff = async function ({
943
- fileA, fileB, uniqueFunction: ufOpt, fields, includeDuplicateSourceRecords,
1004
+ fileA,
1005
+ fileB,
1006
+ uniqueFunction: ufOpt,
1007
+ fields,
1008
+ includeDuplicateSourceRecords
944
1009
  }) {
945
1010
  if (ufOpt && fields) throw new Error('fields and uniqueFunction cannot both be specified');
946
1011
  let uniqueFunction = ufOpt;
@@ -953,17 +1018,18 @@ Worker.prototype.diff = async function ({
953
1018
  existingFiles: [fileB],
954
1019
  filename: fileA,
955
1020
  uniqueFunction,
956
- includeDuplicateSourceRecords,
1021
+ includeDuplicateSourceRecords
957
1022
  });
958
1023
  const right = await this.getUniqueFile({
959
1024
  existingFiles: [fileA],
960
1025
  filename: fileB,
961
1026
  uniqueFunction,
962
- includeDuplicateSourceRecords,
1027
+ includeDuplicateSourceRecords
963
1028
  });
964
1029
 
965
1030
  return {
966
- left, right,
1031
+ left,
1032
+ right
967
1033
  };
968
1034
  };
969
1035
  Worker.prototype.diff.metadata = {
@@ -973,9 +1039,9 @@ Worker.prototype.diff.metadata = {
973
1039
  fields: { description: 'Fields to use for uniqueness -- aka primary key. Defaults to JSON of line' },
974
1040
  uniqueFunction: {},
975
1041
  includeDuplicateSourceRecords: {
976
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
977
- },
978
- },
1042
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1043
+ }
1044
+ }
979
1045
  };
980
1046
 
981
1047
  module.exports = Worker;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@engine9-io/input-tools",
3
- "version": "1.7.9",
3
+ "version": "1.8.1",
4
4
  "description": "Tools for dealing with Engine9 inputs",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -30,6 +30,7 @@
30
30
  "throttle-debounce": "^5.0.2",
31
31
  "unzipper": "^0.12.1",
32
32
  "uuid": "^11.1.0",
33
+ "xlstream": "^2.5.5",
33
34
  "yargs": "^17.7.2"
34
35
  },
35
36
  "directories": {