@engine9-io/input-tools 1.7.8 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,17 +1,15 @@
1
- /* eslint-disable no-await-in-loop */
2
1
  const fs = require('node:fs');
3
2
 
4
3
  const fsp = fs.promises;
5
4
  const path = require('node:path');
6
5
  const zlib = require('node:zlib');
7
- const {
8
- Readable, Transform, PassThrough, Writable,
9
- } = require('node:stream');
6
+ const { Readable, Transform, PassThrough, Writable } = require('node:stream');
10
7
  const { pipeline } = require('node:stream/promises');
11
8
  const { stringify } = require('csv');
12
9
 
13
10
  const debug = require('debug')('FileWorker');
14
11
 
12
+ const { getXlsxStream } = require('xlstream');
15
13
  const csv = require('csv');
16
14
  const JSON5 = require('json5');
17
15
  const languageEncoding = require('detect-file-encoding-and-language');
@@ -20,10 +18,18 @@ const S3Worker = require('./S3');
20
18
  const ParquetWorker = require('./Parquet');
21
19
 
22
20
  const {
23
- bool, getStringArray, getTempDir, makeStrings, streamPacket,relativeDate
21
+ bool,
22
+ getTempFilename,
23
+ getStringArray,
24
+ getTempDir,
25
+ makeStrings,
26
+ streamPacket,
27
+ relativeDate
24
28
  } = require('./tools');
25
29
 
26
- function Worker({ accountId }) { this.accountId = accountId; }
30
+ function Worker({ accountId }) {
31
+ this.accountId = accountId;
32
+ }
27
33
 
28
34
  class LineReaderTransform extends Transform {
29
35
  constructor(options = {}) {
@@ -31,7 +37,6 @@ class LineReaderTransform extends Transform {
31
37
  this.buffer = '';
32
38
  }
33
39
 
34
- // eslint-disable-next-line no-underscore-dangle
35
40
  _transform(chunk, encoding, callback) {
36
41
  this.buffer += chunk.toString();
37
42
  const lines = this.buffer.split(/\r?\n/);
@@ -40,7 +45,6 @@ class LineReaderTransform extends Transform {
40
45
  callback();
41
46
  }
42
47
 
43
- // eslint-disable-next-line no-underscore-dangle
44
48
  _flush(callback) {
45
49
  if (this.buffer) {
46
50
  this.push(this.buffer);
@@ -53,7 +57,11 @@ Worker.prototype.csvToObjectTransforms = function (options) {
53
57
  const transforms = [];
54
58
  const delimiter = options.delimiter || ',';
55
59
 
56
- const headerMapping = options.headerMapping || function (d) { return d; };
60
+ const headerMapping =
61
+ options.headerMapping ||
62
+ function (d) {
63
+ return d;
64
+ };
57
65
  let lastLine = null;
58
66
  let head = null;
59
67
 
@@ -63,7 +71,7 @@ Worker.prototype.csvToObjectTransforms = function (options) {
63
71
  skip_empty_lines: true,
64
72
  delimiter,
65
73
  max_limit_on_data_read: 10000000,
66
- skip_lines_with_error: skipLinesWithError,
74
+ skip_lines_with_error: skipLinesWithError
67
75
  };
68
76
  if (options.skip) parserOptions.from_line = options.skip;
69
77
  if (options.relax_column_count) parserOptions.relax_column_count = true;
@@ -101,7 +109,7 @@ Worker.prototype.csvToObjectTransforms = function (options) {
101
109
 
102
110
  lastLine = row.join(delimiter);
103
111
  return cb(null, o);
104
- },
112
+ }
105
113
  });
106
114
 
107
115
  transforms.push(parser);
@@ -124,12 +132,15 @@ Worker.prototype.detectEncoding = async function (options) {
124
132
  // needed chunk size.
125
133
  finalBuff = await new Promise((resolve, reject) => {
126
134
  const bufferBuilder = [];
127
- const decompressStream = zlib.createGunzip()
135
+ const decompressStream = zlib
136
+ .createGunzip()
128
137
  .on('data', (chunk) => {
129
138
  bufferBuilder.push(chunk);
130
- }).on('close', () => {
139
+ })
140
+ .on('close', () => {
131
141
  resolve(Buffer.concat(bufferBuilder));
132
- }).on('error', (err) => {
142
+ })
143
+ .on('error', (err) => {
133
144
  if (err.errno !== -5) {
134
145
  // EOF: expected
135
146
  reject(err);
@@ -145,15 +156,57 @@ Worker.prototype.detectEncoding = async function (options) {
145
156
 
146
157
  Worker.prototype.detectEncoding.metadata = {
147
158
  options: {
148
- filename: { required: true },
149
- },
159
+ filename: { required: true }
160
+ }
161
+ };
162
+
163
+ Worker.prototype.xlsxToObjectStream = async function (options) {
164
+ let { filename } = options;
165
+
166
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
167
+ // We need to copy and delete
168
+ let worker = null;
169
+ if (filename.startsWith('r2://')) {
170
+ worker = new R2Worker(this);
171
+ } else {
172
+ worker = new S3Worker(this);
173
+ }
174
+ const target = getTempFilename({ targetFilename: filename.split('/').pop() });
175
+
176
+ await worker.copy({ filename, target });
177
+ filename = target;
178
+ }
179
+ let stream = await getXlsxStream({
180
+ filePath: filename,
181
+ sheet: 0
182
+ });
183
+ let keys = null;
184
+ stream = stream.pipe(
185
+ new Transform({
186
+ objectMode: true,
187
+ transform(d, enc, cb) {
188
+ if (!keys) {
189
+ keys = d?.raw.arr;
190
+ cb();
191
+ } else {
192
+ let o = {};
193
+ keys.forEach((k, i) => {
194
+ o[k] = d?.raw?.arr?.[i];
195
+ });
196
+ cb(null, o);
197
+ }
198
+ }
199
+ })
200
+ );
201
+
202
+ return { stream };
150
203
  };
151
204
 
152
205
  /*
153
- Internal method to transform a file into a stream of objects.
206
+ Commonly used method to transform a file into a stream of objects.
154
207
  */
155
208
  Worker.prototype.fileToObjectStream = async function (options) {
156
- const { filename, columns, limit: limitOption,format:formatOverride } = options;
209
+ const { filename, columns, limit: limitOption, format: formatOverride } = options;
157
210
 
158
211
  // handle stream item
159
212
  if (options.stream) {
@@ -167,6 +220,9 @@ Worker.prototype.fileToObjectStream = async function (options) {
167
220
  let limit;
168
221
  if (limitOption) limit = parseInt(limitOption, 10);
169
222
  if (!filename) throw new Error('fileToObjectStream: filename is required');
223
+ if (filename.split('.').pop().toLowerCase() === 'xlsx') {
224
+ return this.xlsxToObjectStream(options);
225
+ }
170
226
  let postfix = options.sourcePostfix || filename.toLowerCase().split('.').pop();
171
227
  if (postfix === 'zip') {
172
228
  debug('Invalid filename:', { filename });
@@ -176,7 +232,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
176
232
  const streamInfo = await this.stream({
177
233
  filename,
178
234
  columns,
179
- limit,
235
+ limit
180
236
  });
181
237
  const { encoding } = streamInfo;
182
238
  let { stream } = streamInfo;
@@ -203,7 +259,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
203
259
  } else {
204
260
  stream.setEncoding(encoding);
205
261
  }
206
- let format=formatOverride || postfix;
262
+ let format = formatOverride || postfix;
207
263
 
208
264
  if (format === 'csv') {
209
265
  const csvTransforms = this.csvToObjectTransforms({ ...options });
@@ -243,13 +299,15 @@ Worker.prototype.fileToObjectStream = async function (options) {
243
299
  }
244
300
  if (headers) {
245
301
  const mapped = {};
246
- headers.forEach((name, i) => { mapped[name] = obj[i]; });
302
+ headers.forEach((name, i) => {
303
+ mapped[name] = obj[i];
304
+ });
247
305
  this.push(mapped);
248
306
  } else {
249
307
  this.push(obj);
250
308
  }
251
309
  return cb();
252
- },
310
+ }
253
311
  });
254
312
 
255
313
  transforms.push(lineReader);
@@ -260,9 +318,11 @@ Worker.prototype.fileToObjectStream = async function (options) {
260
318
  const countAndDebug = new Transform({
261
319
  objectMode: true,
262
320
  transform(d, enc, cb) {
263
- if (count === 0) { debug('Sample object from file:', d); }
321
+ if (count === 0) {
322
+ debug('Sample object from file:', d);
323
+ }
264
324
  count += 1;
265
- if ((count < 5000 && count % 1000 === 0) || (count % 50000 === 0)) {
325
+ if ((count < 5000 && count % 1000 === 0) || count % 50000 === 0) {
266
326
  debug(`fileToObjectStream transformed ${count} lines`);
267
327
  }
268
328
  this.push(d);
@@ -279,7 +339,7 @@ Worker.prototype.fileToObjectStream = async function (options) {
279
339
  this.push(o);
280
340
  } */
281
341
  cb();
282
- },
342
+ }
283
343
  });
284
344
 
285
345
  transforms.push(countAndDebug);
@@ -319,14 +379,14 @@ Worker.prototype.getOutputStreams = async function (options) {
319
379
  objectMode: true,
320
380
  async transform(item, encoding, cb) {
321
381
  options.transform(item, encoding, cb);
322
- },
382
+ }
323
383
  });
324
384
  } else {
325
385
  transform = new Transform({
326
386
  objectMode: true,
327
387
  async transform(item, encoding, cb) {
328
388
  cb(null, options.transform(item));
329
- },
389
+ }
330
390
  });
331
391
  }
332
392
  } else if (options.transform) {
@@ -345,7 +405,7 @@ Worker.prototype.getOutputStreams = async function (options) {
345
405
  let v = item[k];
346
406
  if (!o[k]) {
347
407
  if (typeof v === 'object') {
348
- while (Array.isArray(v)) [v] = v;// get first array item
408
+ while (Array.isArray(v)) [v] = v; // get first array item
349
409
  o = { ...o, ...v };
350
410
  } else {
351
411
  o[k] = v;
@@ -353,12 +413,12 @@ Worker.prototype.getOutputStreams = async function (options) {
353
413
  }
354
414
  });
355
415
  cb(null, o);
356
- },
416
+ }
357
417
  });
358
418
  }
359
419
 
360
420
  const stats = {
361
- records: 0,
421
+ records: 0
362
422
  };
363
423
  let stringifier;
364
424
  if (options.targetFormat === 'jsonl') {
@@ -366,7 +426,7 @@ Worker.prototype.getOutputStreams = async function (options) {
366
426
  objectMode: true,
367
427
  transform(d, encoding, cb) {
368
428
  cb(false, `${JSON.stringify(d)}\n`);
369
- },
429
+ }
370
430
  });
371
431
  } else {
372
432
  stringifier = stringify({ header: true });
@@ -383,11 +443,11 @@ Worker.prototype.getOutputStreams = async function (options) {
383
443
  transform(d, enc, cb) {
384
444
  stats.records += 1;
385
445
  cb(null, d);
386
- },
446
+ }
387
447
  }),
388
448
  stringifier,
389
449
  gzip,
390
- fileWriterStream,
450
+ fileWriterStream
391
451
  ].filter(Boolean);
392
452
  return { filename, streams, stats };
393
453
  };
@@ -395,9 +455,7 @@ Worker.prototype.objectStreamToFile = async function (options) {
395
455
  const { filename, streams, stats } = await this.getOutputStreams(options);
396
456
  const { stream: inStream } = options;
397
457
  streams.unshift(inStream);
398
- await pipeline(
399
- streams,
400
- );
458
+ await pipeline(streams);
401
459
  return { filename, records: stats.records };
402
460
  };
403
461
 
@@ -432,7 +490,7 @@ Worker.prototype.transform = async function (options) {
432
490
  if (typeof f === 'function') {
433
491
  f = new Transform({
434
492
  objectMode: true,
435
- transform: f,
493
+ transform: f
436
494
  });
437
495
  }
438
496
 
@@ -441,7 +499,10 @@ Worker.prototype.transform = async function (options) {
441
499
 
442
500
  const { targetFormat } = options;
443
501
 
444
- if (!targetFormat && (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')) {
502
+ if (
503
+ !targetFormat &&
504
+ (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')
505
+ ) {
445
506
  options.targetFormat = 'csv';
446
507
  }
447
508
 
@@ -453,33 +514,34 @@ Worker.prototype.transform.metadata = {
453
514
  sourcePostfix: { description: "Override the source postfix, if for example it's a csv" },
454
515
  encoding: { description: 'Manual override of source file encoding' },
455
516
  names: { description: 'Target field names (e.g. my_new_field,x,y,z)' },
456
- values: { description: "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)" },
517
+ values: {
518
+ description:
519
+ "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)"
520
+ },
457
521
  targetFilename: { description: 'Custom name of the output file (default auto-generated)' },
458
522
  targetFormat: { description: 'Output format -- csv supported, or none for txt (default)' },
459
523
  targetRowDelimiter: { description: 'Row delimiter (default \n)' },
460
- targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' },
461
- },
524
+ targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' }
525
+ }
462
526
  };
463
527
  Worker.prototype.testTransform = async function (options) {
464
528
  return this.transform({
465
529
  ...options,
466
- transform(d, enc, cb) { d.transform_time = new Date(); cb(null, d); },
530
+ transform(d, enc, cb) {
531
+ d.transform_time = new Date();
532
+ cb(null, d);
533
+ }
467
534
  });
468
535
  };
469
536
  Worker.prototype.testTransform.metadata = {
470
537
  options: {
471
- filename: true,
472
- },
538
+ filename: true
539
+ }
473
540
  };
474
541
 
475
542
  /* Get a stream from an actual stream, or an array, or a file */
476
- Worker.prototype.stream = async function (
477
- options,
478
- ) {
479
- const {
480
- stream: inputStream, packet, type, columns, limit,
481
- filename: filenameOpt,
482
- } = options;
543
+ Worker.prototype.stream = async function (options) {
544
+ const { stream: inputStream, packet, type, columns, limit, filename: filenameOpt } = options;
483
545
  let filename = filenameOpt;
484
546
 
485
547
  if (inputStream) {
@@ -496,7 +558,8 @@ Worker.prototype.stream = async function (
496
558
  } else {
497
559
  // debug(`Not prepending filename:${filename}`);
498
560
  }
499
- let encoding; let stream;
561
+ let encoding;
562
+ let stream;
500
563
  if (filename.slice(-8) === '.parquet') {
501
564
  const pq = new ParquetWorker(this);
502
565
  stream = (await pq.stream({ filename, columns, limit })).stream;
@@ -541,9 +604,8 @@ Worker.prototype.sample = async function (opts) {
541
604
  };
542
605
  Worker.prototype.sample.metadata = {
543
606
  options: {
544
- filename: {},
545
-
546
- },
607
+ filename: {}
608
+ }
547
609
  };
548
610
  Worker.prototype.toArray = async function (opts) {
549
611
  const { stream } = await this.fileToObjectStream(opts);
@@ -551,8 +613,8 @@ Worker.prototype.toArray = async function (opts) {
551
613
  };
552
614
  Worker.prototype.toArray.metadata = {
553
615
  options: {
554
- filename: {},
555
- },
616
+ filename: {}
617
+ }
556
618
  };
557
619
 
558
620
  Worker.prototype.write = async function (opts) {
@@ -566,7 +628,7 @@ Worker.prototype.write = async function (opts) {
566
628
  await worker.write({
567
629
  directory,
568
630
  file,
569
- content,
631
+ content
570
632
  });
571
633
  } else {
572
634
  await fsp.writeFile(filename, content);
@@ -576,15 +638,14 @@ Worker.prototype.write = async function (opts) {
576
638
  Worker.prototype.write.metadata = {
577
639
  options: {
578
640
  filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
579
- content: {},
580
- },
641
+ content: {}
642
+ }
581
643
  };
582
644
 
583
645
  async function streamToString(stream) {
584
646
  // lets have a ReadableStream as a stream variable
585
647
  const chunks = [];
586
648
 
587
- // eslint-disable-next-line no-restricted-syntax
588
649
  for await (const chunk of stream) {
589
650
  chunks.push(Buffer.from(chunk));
590
651
  }
@@ -606,47 +667,46 @@ Worker.prototype.json = async function (opts) {
606
667
  };
607
668
  Worker.prototype.json.metadata = {
608
669
  options: {
609
- filename: { description: 'Get a javascript object from a file' },
610
- },
670
+ filename: { description: 'Get a javascript object from a file' }
671
+ }
611
672
  };
612
673
 
613
- Worker.prototype.list = async function ({ directory, start:s, end:e }) {
674
+ Worker.prototype.list = async function ({ directory, start: s, end: e }) {
614
675
  if (!directory) throw new Error('directory is required');
615
- let start=null;
616
- let end=null;
617
- if (s) start=relativeDate(s);
618
- if (e) end=relativeDate(e);
619
-
676
+ let start = null;
677
+ let end = null;
678
+ if (s) start = relativeDate(s);
679
+ if (e) end = relativeDate(e);
680
+
620
681
  if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
621
682
  const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
622
683
  return worker.list({ directory, start, end });
623
684
  }
624
685
  const a = await fsp.readdir(directory, { withFileTypes: true });
625
686
 
626
- const withModified=[];
687
+ const withModified = [];
627
688
  for (const file of a) {
628
- const fullPath = path.join(directory, file.name);
629
- const stats = await fsp.stat(fullPath);
630
- if (start && stats.mtime<start.getTime()){
631
- //do not include
632
- }else if (end && stats.mtime>end.getTime()){
633
- //do nothing
634
- }else{
635
- withModified.push({
636
- name:file.name,
637
- type: file.isDirectory() ? 'directory' : 'file',
638
- modifiedAt:new Date(stats.mtime).toISOString(),
639
- });
640
- }
689
+ const fullPath = path.join(directory, file.name);
690
+ const stats = await fsp.stat(fullPath);
691
+ if (start && stats.mtime < start.getTime()) {
692
+ //do not include
693
+ } else if (end && stats.mtime > end.getTime()) {
694
+ //do nothing
695
+ } else {
696
+ withModified.push({
697
+ name: file.name,
698
+ type: file.isDirectory() ? 'directory' : 'file',
699
+ modifiedAt: new Date(stats.mtime).toISOString()
700
+ });
701
+ }
641
702
  }
642
-
703
+
643
704
  return withModified;
644
-
645
705
  };
646
706
  Worker.prototype.list.metadata = {
647
707
  options: {
648
- directory: { required: true },
649
- },
708
+ directory: { required: true }
709
+ }
650
710
  };
651
711
 
652
712
  Worker.prototype.listAll = async function ({ directory }) {
@@ -661,8 +721,8 @@ Worker.prototype.listAll = async function ({ directory }) {
661
721
  };
662
722
  Worker.prototype.listAll.metadata = {
663
723
  options: {
664
- directory: { required: true },
665
- },
724
+ directory: { required: true }
725
+ }
666
726
  };
667
727
 
668
728
  Worker.prototype.empty = async function ({ directory }) {
@@ -672,7 +732,7 @@ Worker.prototype.empty = async function ({ directory }) {
672
732
  throw new Error('Cannot empty an s3:// or r2:// directory');
673
733
  }
674
734
  const removed = [];
675
- // eslint-disable-next-line no-restricted-syntax
735
+
676
736
  for (const file of await fsp.readdir(directory)) {
677
737
  removed.push(file);
678
738
  await fsp.unlink(path.join(directory, file));
@@ -681,8 +741,8 @@ Worker.prototype.empty = async function ({ directory }) {
681
741
  };
682
742
  Worker.prototype.empty.metadata = {
683
743
  options: {
684
- directory: { required: true },
685
- },
744
+ directory: { required: true }
745
+ }
686
746
  };
687
747
 
688
748
  Worker.prototype.remove = async function ({ filename }) {
@@ -705,16 +765,18 @@ Worker.prototype.remove = async function ({ filename }) {
705
765
  };
706
766
  Worker.prototype.remove.metadata = {
707
767
  options: {
708
- filename: {},
709
- },
768
+ filename: {}
769
+ }
710
770
  };
711
771
 
712
772
  Worker.prototype.move = async function ({ filename, target }) {
713
773
  if (!target) throw new Error('target is required');
714
774
  if (typeof target !== 'string') throw new Error(`target isn't a string:${JSON.stringify(target)}`);
715
775
  if (target.startsWith('s3://') || target.startsWith('r2://')) {
716
- if ((target.startsWith('s3://') && filename.startsWith('r2://'))
717
- || (target.startsWith('r2://') && filename.startsWith('s3://'))) {
776
+ if (
777
+ (target.startsWith('s3://') && filename.startsWith('r2://')) ||
778
+ (target.startsWith('r2://') && filename.startsWith('s3://'))
779
+ ) {
718
780
  throw new Error('Cowardly not copying between services');
719
781
  }
720
782
 
@@ -741,8 +803,8 @@ Worker.prototype.move = async function ({ filename, target }) {
741
803
  Worker.prototype.move.metadata = {
742
804
  options: {
743
805
  filename: {},
744
- target: {},
745
- },
806
+ target: {}
807
+ }
746
808
  };
747
809
 
748
810
  Worker.prototype.stat = async function ({ filename }) {
@@ -751,11 +813,7 @@ Worker.prototype.stat = async function ({ filename }) {
751
813
  const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
752
814
  return worker.stat({ filename });
753
815
  }
754
- const {
755
- ctime,
756
- birthtime,
757
- size,
758
- } = await fsp.stat(filename);
816
+ const { ctime, birthtime, size } = await fsp.stat(filename);
759
817
  const modifiedAt = new Date(ctime);
760
818
  let createdAt = birthtime;
761
819
  if (createdAt === 0 || !createdAt) createdAt = ctime;
@@ -763,13 +821,13 @@ Worker.prototype.stat = async function ({ filename }) {
763
821
  return {
764
822
  createdAt,
765
823
  modifiedAt,
766
- size,
824
+ size
767
825
  };
768
826
  };
769
827
  Worker.prototype.stat.metadata = {
770
828
  options: {
771
- filename: {},
772
- },
829
+ filename: {}
830
+ }
773
831
  };
774
832
 
775
833
  Worker.prototype.download = async function ({ filename }) {
@@ -782,8 +840,8 @@ Worker.prototype.download = async function ({ filename }) {
782
840
  };
783
841
  Worker.prototype.download.metadata = {
784
842
  options: {
785
- filename: {},
786
- },
843
+ filename: {}
844
+ }
787
845
  };
788
846
 
789
847
  Worker.prototype.head = async function (options) {
@@ -792,7 +850,7 @@ Worker.prototype.head = async function (options) {
792
850
  const chunks = [];
793
851
 
794
852
  let counter = 0;
795
- // eslint-disable-next-line no-restricted-syntax
853
+
796
854
  for await (const chunk of stream) {
797
855
  chunks.push(chunk);
798
856
  counter += 1;
@@ -804,8 +862,8 @@ Worker.prototype.head = async function (options) {
804
862
 
805
863
  Worker.prototype.head.metadata = {
806
864
  options: {
807
- filename: { required: true },
808
- },
865
+ filename: { required: true }
866
+ }
809
867
  };
810
868
 
811
869
  Worker.prototype.count = async function (options) {
@@ -814,7 +872,7 @@ Worker.prototype.count = async function (options) {
814
872
 
815
873
  const limit = options.limit || 5;
816
874
  let records = 0;
817
- // eslint-disable-next-line no-restricted-syntax
875
+
818
876
  for await (const chunk of stream) {
819
877
  records += 1;
820
878
  if (records < limit) {
@@ -827,8 +885,8 @@ Worker.prototype.count = async function (options) {
827
885
 
828
886
  Worker.prototype.count.metadata = {
829
887
  options: {
830
- filename: { required: true },
831
- },
888
+ filename: { required: true }
889
+ }
832
890
  };
833
891
 
834
892
  // Get a set of unique entries from a uniqueFunction
@@ -839,10 +897,10 @@ Worker.prototype.getUniqueSet = async function (options) {
839
897
 
840
898
  let { uniqueFunction } = options;
841
899
  if (!uniqueFunction) {
842
- uniqueFunction = ((o) => JSON.stringify(o));
900
+ uniqueFunction = (o) => JSON.stringify(o);
843
901
  }
844
902
  const uniqueSet = new Set();
845
- // eslint-disable-next-line no-restricted-syntax, guard-for-in
903
+
846
904
  for (const filename of existingFiles) {
847
905
  const { stream: existsStream } = await this.fileToObjectStream({ filename });
848
906
  await pipeline(
@@ -856,14 +914,14 @@ Worker.prototype.getUniqueSet = async function (options) {
856
914
  }
857
915
  uniqueSet.add(v);
858
916
  cb(null, d);
859
- },
917
+ }
860
918
  }),
861
919
  new Writable({
862
920
  objectMode: true,
863
921
  write(d, enc, cb) {
864
922
  cb();
865
- },
866
- }),
923
+ }
924
+ })
867
925
  );
868
926
  debug(`Finished loading ${filename}`);
869
927
  }
@@ -875,7 +933,7 @@ Worker.prototype.getUniqueStream = async function (options) {
875
933
 
876
934
  const { uniqueSet, uniqueFunction, sample } = await this.getUniqueSet({
877
935
  filenames: options.existingFiles,
878
- uniqueFunction: options.uniqueFunction,
936
+ uniqueFunction: options.uniqueFunction
879
937
  });
880
938
 
881
939
  const { stream: inStream } = await this.fileToObjectStream(options);
@@ -899,8 +957,8 @@ Worker.prototype.getUniqueStream = async function (options) {
899
957
  }
900
958
  cb(null, d);
901
959
  }
902
- },
903
- }),
960
+ }
961
+ })
904
962
  );
905
963
  return { stream: uniqueStream, sample };
906
964
  };
@@ -912,9 +970,9 @@ Worker.prototype.getUniqueStream.metadata = {
912
970
  filename: { description: 'Specify a source filename or a stream' },
913
971
  stream: { description: 'Specify a source filename or a stream' },
914
972
  includeDuplicateSourceRecords: {
915
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
916
- },
917
- },
973
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
974
+ }
975
+ }
918
976
  };
919
977
  Worker.prototype.getUniqueFile = async function (options) {
920
978
  const { stream, sample } = await this.getUniqueStream(options);
@@ -929,9 +987,9 @@ Worker.prototype.getUniqueFile.metadata = {
929
987
  filename: { description: 'Specify a source filename or a stream' },
930
988
  stream: { description: 'Specify a source filename or a stream' },
931
989
  includeDuplicateSourceRecords: {
932
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
933
- },
934
- },
990
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
991
+ }
992
+ }
935
993
  };
936
994
 
937
995
  /*
@@ -940,7 +998,11 @@ Requires 2 passes of the files,
940
998
  but that's a better tradeoff than trying to store huge files in memory
941
999
  */
942
1000
  Worker.prototype.diff = async function ({
943
- fileA, fileB, uniqueFunction: ufOpt, fields, includeDuplicateSourceRecords,
1001
+ fileA,
1002
+ fileB,
1003
+ uniqueFunction: ufOpt,
1004
+ fields,
1005
+ includeDuplicateSourceRecords
944
1006
  }) {
945
1007
  if (ufOpt && fields) throw new Error('fields and uniqueFunction cannot both be specified');
946
1008
  let uniqueFunction = ufOpt;
@@ -953,17 +1015,18 @@ Worker.prototype.diff = async function ({
953
1015
  existingFiles: [fileB],
954
1016
  filename: fileA,
955
1017
  uniqueFunction,
956
- includeDuplicateSourceRecords,
1018
+ includeDuplicateSourceRecords
957
1019
  });
958
1020
  const right = await this.getUniqueFile({
959
1021
  existingFiles: [fileA],
960
1022
  filename: fileB,
961
1023
  uniqueFunction,
962
- includeDuplicateSourceRecords,
1024
+ includeDuplicateSourceRecords
963
1025
  });
964
1026
 
965
1027
  return {
966
- left, right,
1028
+ left,
1029
+ right
967
1030
  };
968
1031
  };
969
1032
  Worker.prototype.diff.metadata = {
@@ -973,9 +1036,9 @@ Worker.prototype.diff.metadata = {
973
1036
  fields: { description: 'Fields to use for uniqueness -- aka primary key. Defaults to JSON of line' },
974
1037
  uniqueFunction: {},
975
1038
  includeDuplicateSourceRecords: {
976
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false',
977
- },
978
- },
1039
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1040
+ }
1041
+ }
979
1042
  };
980
1043
 
981
1044
  module.exports = Worker;
package/file/tools.js CHANGED
@@ -14,16 +14,9 @@ const unzipper = require('unzipper');
14
14
 
15
15
  const dayjs = require('dayjs');
16
16
 
17
- const {
18
- S3Client,
19
- HeadObjectCommand,
20
- GetObjectCommand,
21
- } = require('@aws-sdk/client-s3');
17
+ const { S3Client, HeadObjectCommand, GetObjectCommand } = require('@aws-sdk/client-s3');
22
18
 
23
-
24
- const {
25
- v7: uuidv7,
26
- } = require('uuid');
19
+ const { v7: uuidv7 } = require('uuid');
27
20
 
28
21
  async function getTempDir({ accountId = 'engine9' }) {
29
22
  const dir = [os.tmpdir(), accountId, new Date().toISOString().substring(0, 10)].join(path.sep);
@@ -52,7 +45,10 @@ async function getTempFilename(options) {
52
45
  }
53
46
 
54
47
  // make a distinct directory, so we don't overwrite the file
55
- dir = `${dir}/${new Date().toISOString().slice(0, -6).replace(/[^0-9]/g, '_')}`;
48
+ dir = `${dir}/${new Date()
49
+ .toISOString()
50
+ .slice(0, -6)
51
+ .replace(/[^0-9]/g, '_')}`;
56
52
 
57
53
  const newDir = await mkdirp(dir);
58
54
 
@@ -97,8 +93,8 @@ async function getPacketFiles({ packet }) {
97
93
  const info = await s3Client.send(
98
94
  new HeadObjectCommand({
99
95
  Bucket,
100
- Key,
101
- }),
96
+ Key
97
+ })
102
98
  );
103
99
  size = info.ContentLength;
104
100
  progress(`Retrieving file of size ${size / (1024 * 1024)} MB`);
@@ -107,13 +103,14 @@ async function getPacketFiles({ packet }) {
107
103
 
108
104
  stream(offset, length) {
109
105
  const ptStream = new PassThrough();
110
- s3Client.send(
111
- new GetObjectCommand({
112
- Bucket,
113
- Key,
114
- Range: `bytes=${offset}-${length ?? ''}`,
115
- }),
116
- )
106
+ s3Client
107
+ .send(
108
+ new GetObjectCommand({
109
+ Bucket,
110
+ Key,
111
+ Range: `bytes=${offset}-${length ?? ''}`
112
+ })
113
+ )
117
114
  .then((response) => {
118
115
  response.Body.pipe(ptStream);
119
116
  })
@@ -122,7 +119,7 @@ async function getPacketFiles({ packet }) {
122
119
  });
123
120
 
124
121
  return ptStream;
125
- },
122
+ }
126
123
  });
127
124
 
128
125
  return directory;
@@ -131,7 +128,6 @@ async function getPacketFiles({ packet }) {
131
128
  return directory;
132
129
  }
133
130
 
134
-
135
131
  async function getManifest({ packet }) {
136
132
  if (!packet) throw new Error('no packet option specififed');
137
133
  const { files } = await getPacketFiles({ packet });
@@ -156,8 +152,8 @@ function getBatchTransform({ batchSize = 100 }) {
156
152
  flush(cb) {
157
153
  if (this.buffer?.length > 0) this.push(this.buffer);
158
154
  cb();
159
- },
160
- }),
155
+ }
156
+ })
161
157
  };
162
158
  }
163
159
  function getDebatchTransform() {
@@ -167,8 +163,8 @@ function getDebatchTransform() {
167
163
  transform(chunk, encoding, cb) {
168
164
  chunk.forEach((c) => this.push(c));
169
165
  cb();
170
- },
171
- }),
166
+ }
167
+ })
172
168
  };
173
169
  }
174
170
 
@@ -218,7 +214,8 @@ async function downloadFile({ packet, type = 'person' }) {
218
214
  const filename = await getTempFilename({ targetFilename: filePath.split('/').pop() });
219
215
 
220
216
  return new Promise((resolve, reject) => {
221
- fileStream.pipe(fs.createWriteStream(filename))
217
+ fileStream
218
+ .pipe(fs.createWriteStream(filename))
222
219
  .on('error', reject)
223
220
  .on('finish', () => {
224
221
  resolve({ filename });
@@ -228,12 +225,12 @@ async function downloadFile({ packet, type = 'person' }) {
228
225
 
229
226
  function isValidDate(d) {
230
227
  // we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
231
- // eslint-disable-next-line no-restricted-globals
228
+
232
229
  return d instanceof Date && !isNaN(d);
233
230
  }
234
231
 
235
232
  function bool(x, _defaultVal) {
236
- const defaultVal = (_defaultVal === undefined) ? false : _defaultVal;
233
+ const defaultVal = _defaultVal === undefined ? false : _defaultVal;
237
234
  if (x === undefined || x === null || x === '') return defaultVal;
238
235
  if (typeof x !== 'string') return !!x;
239
236
  if (x === '1') return true; // 0 will return false, but '1' is true
@@ -255,7 +252,7 @@ function relativeDate(s, _initialDate) {
255
252
  if (!s || s === 'none') return null;
256
253
  if (typeof s.getMonth === 'function') return s;
257
254
  // We actually want a double equals here to test strings as well
258
- // eslint-disable-next-line eqeqeq
255
+
259
256
  if (parseInt(s, 10) == s) {
260
257
  const r = new Date(parseInt(s, 10));
261
258
  if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
@@ -274,15 +271,31 @@ function relativeDate(s, _initialDate) {
274
271
  let period = null;
275
272
  switch (r[3]) {
276
273
  case 'Y':
277
- case 'y': period = 'years'; break;
278
-
279
- case 'M': period = 'months'; break;
280
- case 'w': period = 'weeks'; break;
281
- case 'd': period = 'days'; break;
282
- case 'h': period = 'hours'; break;
283
- case 'm': period = 'minutes'; break;
284
- case 's': period = 'seconds'; break;
285
- default: period = 'minutes'; break;
274
+ case 'y':
275
+ period = 'years';
276
+ break;
277
+
278
+ case 'M':
279
+ period = 'months';
280
+ break;
281
+ case 'w':
282
+ period = 'weeks';
283
+ break;
284
+ case 'd':
285
+ period = 'days';
286
+ break;
287
+ case 'h':
288
+ period = 'hours';
289
+ break;
290
+ case 'm':
291
+ period = 'minutes';
292
+ break;
293
+ case 's':
294
+ period = 'seconds';
295
+ break;
296
+ default:
297
+ period = 'minutes';
298
+ break;
286
299
  }
287
300
 
288
301
  let d = dayjs(initialDate);
@@ -317,12 +330,29 @@ function relativeDate(s, _initialDate) {
317
330
  */
318
331
  function makeStrings(o) {
319
332
  return Object.entries(o).reduce((a, [k, v]) => {
320
- a[k] = (typeof v === 'object') ? JSON.stringify(v) : String(v);
333
+ a[k] = typeof v === 'object' ? JSON.stringify(v) : String(v);
321
334
  return a;
322
335
  }, {});
323
336
  }
337
+ function appendPostfix(filename, postfix) {
338
+ const filenameParts = filename.split('/');
339
+ const fileParts = filenameParts
340
+ .slice(-1)[0]
341
+ .split('.')
342
+ .filter(Boolean)
343
+ .filter((d) => d !== postfix);
344
+
345
+ let targetFile = null;
346
+ if (fileParts.slice(-1)[0] === 'gz') {
347
+ targetFile = fileParts.slice(0, -2).concat(postfix).concat(fileParts.slice(-2)).join('.');
348
+ } else {
349
+ targetFile = fileParts.slice(0, -1).concat(postfix).concat(fileParts.slice(-1)).join('.');
350
+ }
351
+ return filenameParts.slice(0, -1).concat(targetFile).join('/');
352
+ }
324
353
 
325
354
  module.exports = {
355
+ appendPostfix,
326
356
  bool,
327
357
  downloadFile,
328
358
  getTempFilename,
@@ -336,5 +366,5 @@ module.exports = {
336
366
  makeStrings,
337
367
  relativeDate,
338
368
  streamPacket,
339
- writeTempFile,
369
+ writeTempFile
340
370
  };
package/index.js CHANGED
@@ -6,15 +6,14 @@ const dayjs = require('dayjs');
6
6
  const debug = require('debug')('@engine9/input-tools');
7
7
 
8
8
  const unzipper = require('unzipper');
9
- const {
10
- v4: uuidv4, v5: uuidv5, v7: uuidv7, validate: uuidIsValid,
11
- } = require('uuid');
9
+ const { v4: uuidv4, v5: uuidv5, v7: uuidv7, validate: uuidIsValid } = require('uuid');
12
10
  const archiver = require('archiver');
13
11
  const handlebars = require('handlebars');
14
12
 
15
13
  const FileUtilities = require('./file/FileUtilities');
16
14
 
17
15
  const {
16
+ appendPostfix,
18
17
  bool,
19
18
  getManifest,
20
19
  getFile,
@@ -29,7 +28,7 @@ const {
29
28
  getDebatchTransform,
30
29
  getStringArray,
31
30
  makeStrings,
32
- writeTempFile,
31
+ writeTempFile
33
32
  } = require('./file/tools');
34
33
 
35
34
  const ForEachEntry = require('./ForEachEntry');
@@ -45,7 +44,7 @@ function getFormattedDate(dateObject, format = 'MMM DD,YYYY') {
45
44
 
46
45
  handlebars.registerHelper('date', (d, f) => {
47
46
  let format;
48
- if (typeof f === 'string')format = f;
47
+ if (typeof f === 'string') format = f;
49
48
  return getFormattedDate(d, format);
50
49
  });
51
50
  handlebars.registerHelper('json', (d) => JSON.stringify(d));
@@ -60,11 +59,7 @@ async function list(_path) {
60
59
  const directory = await unzipper.Open.file(_path);
61
60
 
62
61
  return new Promise((resolve, reject) => {
63
- directory.files[0]
64
- .stream()
65
- .pipe(fs.createWriteStream('firstFile'))
66
- .on('error', reject)
67
- .on('finish', resolve);
62
+ directory.files[0].stream().pipe(fs.createWriteStream('firstFile')).on('error', reject).on('finish', resolve);
68
63
  });
69
64
  }
70
65
 
@@ -74,11 +69,7 @@ async function extract(_path, _file) {
74
69
  const file = directory.files.find((d) => d.path === _file);
75
70
  const tempFilename = await getTempFilename({ source: _file });
76
71
  return new Promise((resolve, reject) => {
77
- file
78
- .stream()
79
- .pipe(fs.createWriteStream(tempFilename))
80
- .on('error', reject)
81
- .on('finish', resolve);
72
+ file.stream().pipe(fs.createWriteStream(tempFilename)).on('error', reject).on('finish', resolve);
82
73
  });
83
74
  }
84
75
 
@@ -87,7 +78,7 @@ function appendFiles(existingFiles, _newFiles, options) {
87
78
  if (newFiles.length === 0) return;
88
79
  let { type, dateCreated } = options || {};
89
80
  if (!type) type = 'unknown';
90
- if (!dateCreated)dateCreated = new Date().toISOString();
81
+ if (!dateCreated) dateCreated = new Date().toISOString();
91
82
  let arr = newFiles;
92
83
  if (!Array.isArray(newFiles)) arr = [arr];
93
84
 
@@ -96,7 +87,7 @@ function appendFiles(existingFiles, _newFiles, options) {
96
87
  type,
97
88
  originalFilename: '',
98
89
  isNew: true,
99
- dateCreated,
90
+ dateCreated
100
91
  };
101
92
 
102
93
  if (typeof p === 'string') {
@@ -121,7 +112,7 @@ async function create(options) {
121
112
  messageFiles = [], // file with contents of message, used for delivery
122
113
  personFiles = [], // files with data on people
123
114
  timelineFiles = [], // activity entry
124
- statisticsFiles = [], // files with aggregate statistics
115
+ statisticsFiles = [] // files with aggregate statistics
125
116
  } = options;
126
117
  if (options.peopleFiles) throw new Error('Unknown option: peopleFiles, did you mean personFiles?');
127
118
 
@@ -132,21 +123,21 @@ async function create(options) {
132
123
  appendFiles(files, timelineFiles, { type: 'timeline', dateCreated });
133
124
  appendFiles(files, statisticsFiles, { type: 'statistics', dateCreated });
134
125
 
135
- const zipFilename = target || await getTempFilename({ postfix: '.packet.zip' });
126
+ const zipFilename = target || (await getTempFilename({ postfix: '.packet.zip' }));
136
127
 
137
128
  const manifest = {
138
129
  accountId,
139
130
  source: {
140
- pluginId,
131
+ pluginId
141
132
  },
142
133
  dateCreated,
143
- files,
134
+ files
144
135
  };
145
136
 
146
137
  // create a file to stream archive data to.
147
138
  const output = fs.createWriteStream(zipFilename);
148
139
  const archive = archiver('zip', {
149
- zlib: { level: 9 }, // Sets the compression level.
140
+ zlib: { level: 9 } // Sets the compression level.
150
141
  });
151
142
  return new Promise((resolve, reject) => {
152
143
  debug(`Setting up write stream to ${zipFilename}`);
@@ -157,7 +148,7 @@ async function create(options) {
157
148
  debug(zipFilename);
158
149
  return resolve({
159
150
  filename: zipFilename,
160
- bytes: archive.pointer(),
151
+ bytes: archive.pointer()
161
152
  });
162
153
  });
163
154
 
@@ -196,7 +187,6 @@ function intToByteArray(_v) {
196
187
  const byteArray = [0, 0, 0, 0, 0, 0, 0, 0];
197
188
  let v = _v;
198
189
  for (let index = 0; index < byteArray.length; index += 1) {
199
- // eslint-disable-next-line no-bitwise
200
190
  const byte = v & 0xff;
201
191
  byteArray[index] = byte;
202
192
  v = (v - byte) / 256;
@@ -226,23 +216,26 @@ function getInputUUID(a, b) {
226
216
  return uuidv5(`${pluginId}:${rid}`, '3d0e5d99-6ba9-4fab-9bb2-c32304d3df8e');
227
217
  }
228
218
 
229
- function getUUIDv7(date, inputUuid) { /* optional date and input UUID */
219
+ function getUUIDv7(date, inputUuid) {
220
+ /* optional date and input UUID */
230
221
  const uuid = inputUuid || uuidv7();
231
222
  const bytes = Buffer.from(uuid.replace(/-/g, ''), 'hex');
232
223
  if (date !== undefined) {
233
224
  const d = new Date(date);
234
225
  // isNaN behaves differently than Number.isNaN -- we're actually going for the
235
226
  // attempted conversion here
236
- // eslint-disable-next-line no-restricted-globals
227
+
237
228
  if (isNaN(d)) throw new Error(`getUUIDv7 got an invalid date:${date || '<blank>'}`);
238
229
  const dateBytes = intToByteArray(d.getTime()).reverse();
239
- dateBytes.slice(2, 8).forEach((b, i) => { bytes[i] = b; });
230
+ dateBytes.slice(2, 8).forEach((b, i) => {
231
+ bytes[i] = b;
232
+ });
240
233
  }
241
234
  return uuidv4({ random: bytes });
242
235
  }
243
236
  /* Returns a date from a given uuid (assumed to be a v7, otherwise the results are ... weird */
244
237
  function getUUIDTimestamp(uuid) {
245
- const ts = parseInt((`${uuid}`).replace(/-/g, '').slice(0, 12), 16);
238
+ const ts = parseInt(`${uuid}`.replace(/-/g, '').slice(0, 12), 16);
246
239
  return new Date(ts);
247
240
  }
248
241
 
@@ -266,7 +259,8 @@ function getTimelineEntryUUID(inputObject, { defaults = {} } = {}) {
266
259
 
267
260
  if (o.remote_entry_id) {
268
261
  // get a temp ID
269
- if (!o.input_id) throw new Error('Error generating timeline entry uuid -- remote_entry_id specified, but no input_id');
262
+ if (!o.input_id)
263
+ throw new Error('Error generating timeline entry uuid -- remote_entry_id specified, but no input_id');
270
264
  const uuid = uuidv5(o.remote_entry_id, o.input_id);
271
265
  // Change out the ts to match the v7 sorting.
272
266
  // But because outside specified remote_entry_uuid
@@ -274,14 +268,13 @@ function getTimelineEntryUUID(inputObject, { defaults = {} } = {}) {
274
268
  return getUUIDv7(o.ts, uuid);
275
269
  }
276
270
 
277
- const missing = requiredTimelineEntryFields
278
- .filter((d) => o[d] === undefined);// 0 could be an entry type value
271
+ const missing = requiredTimelineEntryFields.filter((d) => o[d] === undefined); // 0 could be an entry type value
279
272
 
280
273
  if (missing.length > 0) throw new Error(`Missing required fields to append an entry_id:${missing.join(',')}`);
281
274
  const ts = new Date(o.ts);
282
275
  // isNaN behaves differently than Number.isNaN -- we're actually going for the
283
276
  // attempted conversion here
284
- // eslint-disable-next-line no-restricted-globals
277
+
285
278
  if (isNaN(ts)) throw new Error(`getTimelineEntryUUID got an invalid date:${o.ts || '<blank>'}`);
286
279
  const idString = `${ts.toISOString()}-${o.person_id}-${o.entry_type_id}-${o.source_code_id || 0}`;
287
280
 
@@ -308,6 +301,7 @@ function getEntryTypeId(o, { defaults = {} } = {}) {
308
301
  }
309
302
 
310
303
  module.exports = {
304
+ appendPostfix,
311
305
  bool,
312
306
  create,
313
307
  list,
@@ -339,5 +333,5 @@ module.exports = {
339
333
  uuidIsValid,
340
334
  uuidv4,
341
335
  uuidv5,
342
- uuidv7,
336
+ uuidv7
343
337
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@engine9-io/input-tools",
3
- "version": "1.7.8",
3
+ "version": "1.8.0",
4
4
  "description": "Tools for dealing with Engine9 inputs",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -30,6 +30,7 @@
30
30
  "throttle-debounce": "^5.0.2",
31
31
  "unzipper": "^0.12.1",
32
32
  "uuid": "^11.1.0",
33
+ "xlstream": "^2.5.5",
33
34
  "yargs": "^17.7.2"
34
35
  },
35
36
  "directories": {
@@ -0,0 +1,55 @@
1
+ const { describe, it } = require('node:test');
2
+ const assert = require('node:assert');
3
+ const debug = require('debug')('test/forEach');
4
+
5
+ const { ForEachEntry } = require('../../index');
6
+
7
+ describe('Test Person File For Each', async () => {
8
+ it('forEachPerson Should loop through 1000 sample people', async () => {
9
+ let counter = 0;
10
+ const forEach = new ForEachEntry();
11
+ const result = await forEach.process({
12
+ packet: 'test/sample/1000_message.packet.zip',
13
+ batchSize: 50,
14
+ bindings: {
15
+ timelineOutputFileStream: {
16
+ path: 'output.timeline',
17
+ options: {
18
+ entry_type: 'ENTRY_OPTION'
19
+ }
20
+ },
21
+ sampleOutputFileStream: {
22
+ path: 'output.stream'
23
+ }
24
+ },
25
+ async transform(props) {
26
+ const { batch, timelineOutputFileStream, sampleOutputFileStream } = props;
27
+
28
+ batch.forEach((p) => {
29
+ if (Math.random() > 0.9) {
30
+ sampleOutputFileStream.push({
31
+ // for testing we don't need real person_ids
32
+ person_id: p.person_id || Math.floor(Math.random() * 1000000),
33
+ email: p.email,
34
+ entry_type: 'SAMPLE_OUTPUT'
35
+ });
36
+ }
37
+ timelineOutputFileStream.push({
38
+ // for testing we don't need real person_ids
39
+ person_id: p.person_id || Math.floor(Math.random() * 1000000),
40
+ email: p.email,
41
+ entry_type: 'EMAIL_DELIVERED'
42
+ });
43
+ });
44
+
45
+ batch.forEach(() => {
46
+ counter += 1;
47
+ });
48
+ }
49
+ });
50
+ assert(result.outputFiles?.timelineOutputFileStream?.[0]?.records);
51
+ assert(result.outputFiles?.sampleOutputFileStream?.[0]?.records);
52
+ assert.equal(counter, 1000, `Expected to loop through 1000 people, actual:${counter}`);
53
+ });
54
+ debug('Completed tests');
55
+ });
@@ -1,63 +0,0 @@
1
- const {
2
- describe, it,
3
- } = require('node:test');
4
- const assert = require('node:assert');
5
- const debug = require('debug')('test/forEach');
6
-
7
- const { ForEachEntry } = require('../../index');
8
-
9
- describe('Test Person Packet For Each', async () => {
10
- it('forEachPerson Should loop through 1000 sample people', async () => {
11
- let counter = 0;
12
- const forEach = new ForEachEntry();
13
- const result = await forEach.process(
14
- {
15
- packet: 'test/sample/1000_message.packet.zip',
16
- batchSize: 50,
17
- bindings: {
18
- timelineOutputFileStream: {
19
- path: 'output.timeline',
20
- options: {
21
- entry_type: 'ENTRY_OPTION',
22
- },
23
- },
24
- sampleOutputFileStream: {
25
- path: 'output.stream',
26
- },
27
- },
28
- async transform(props) {
29
- const {
30
- batch,
31
- timelineOutputFileStream,
32
- sampleOutputFileStream,
33
- } = props;
34
-
35
- batch.forEach((p) => {
36
- if (Math.random() > 0.9) {
37
- sampleOutputFileStream.push({
38
- // for testing we don't need real person_ids
39
- person_id: p.person_id || Math.floor(Math.random() * 1000000),
40
- email: p.email,
41
- entry_type: 'SAMPLE_OUTPUT',
42
- });
43
- }
44
- timelineOutputFileStream.push(
45
- {
46
- // for testing we don't need real person_ids
47
- person_id: p.person_id || Math.floor(Math.random() * 1000000),
48
- email: p.email,
49
- entry_type: 'EMAIL_DELIVERED',
50
- },
51
- );
52
- });
53
-
54
- batch.forEach(() => { counter += 1; });
55
- },
56
- },
57
- );
58
- assert(result.outputFiles?.timelineOutputFileStream?.[0]?.records);
59
- assert(result.outputFiles?.sampleOutputFileStream?.[0]?.records);
60
- assert.equal(counter, 1000, `Expected to loop through 1000 people, actual:${counter}`);
61
- });
62
- debug('Completed tests');
63
- });
File without changes
File without changes
File without changes