@engine9-io/input-tools 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import path from 'node:path';
3
3
  import zlib from 'node:zlib';
4
4
  import nodestream from 'node:stream';
5
5
  import promises from 'node:stream/promises';
6
- import csv$0 from 'csv';
6
+ import { parse, stringify } from 'csv';
7
7
  import debug$0 from 'debug';
8
8
  import xlstream from 'xlstream';
9
9
  import JSON5 from 'json5';
@@ -15,1067 +15,1021 @@ import { bool, getTempFilename, getStringArray, getTempDir, makeStrings, streamP
15
15
  const fsp = fs.promises;
16
16
  const { Readable, Transform, PassThrough, Writable } = nodestream;
17
17
  const { pipeline } = promises;
18
- const { stringify } = csv$0;
18
+
19
19
  const debug = debug$0('@engine9-io/file');
20
20
  const { getXlsxStream } = xlstream;
21
- const csv = csv$0;
21
+
22
22
  function Worker({ accountId }) {
23
- this.accountId = accountId;
23
+ this.accountId = accountId;
24
24
  }
25
25
  class LineReaderTransform extends Transform {
26
- constructor(options = {}) {
27
- super({ ...options, readableObjectMode: true });
28
- this.buffer = '';
29
- }
30
- _transform(chunk, encoding, callback) {
31
- this.buffer += chunk.toString();
32
- const lines = this.buffer.split(/\r?\n/);
33
- this.buffer = lines.pop();
34
- lines.forEach((line) => this.push(line));
35
- callback();
36
- }
37
- _flush(callback) {
38
- if (this.buffer) {
39
- this.push(this.buffer);
40
- }
41
- callback();
42
- }
26
+ constructor(options = {}) {
27
+ super({ ...options, readableObjectMode: true });
28
+ this.buffer = '';
29
+ }
30
+ _transform(chunk, encoding, callback) {
31
+ this.buffer += chunk.toString();
32
+ const lines = this.buffer.split(/\r?\n/);
33
+ this.buffer = lines.pop();
34
+ lines.forEach((line) => this.push(line));
35
+ callback();
36
+ }
37
+ _flush(callback) {
38
+ if (this.buffer) {
39
+ this.push(this.buffer);
40
+ }
41
+ callback();
42
+ }
43
43
  }
44
44
  Worker.prototype.csvToObjectTransforms = function (options) {
45
- const transforms = [];
46
- const delimiter = options.delimiter || ',';
47
- const headerMapping = options.headerMapping ||
48
- function (d) {
49
- return d;
50
- };
51
- let lastLine = null;
52
- let head = null;
53
- const skipLinesWithError = bool(options.skip_lines_with_error, false);
54
- const parserOptions = {
55
- relax: true,
56
- skip_empty_lines: true,
57
- delimiter,
58
- max_limit_on_data_read: 10000000,
59
- skip_lines_with_error: skipLinesWithError
45
+ const transforms = [];
46
+ const delimiter = options.delimiter || ',';
47
+ const headerMapping =
48
+ options.headerMapping ||
49
+ function (d) {
50
+ return d;
60
51
  };
61
- if (options.skip)
62
- parserOptions.from_line = options.skip;
63
- if (options.relax_column_count)
64
- parserOptions.relax_column_count = true;
65
- if (options.quote_escape) {
66
- parserOptions.escape = options.quote_escape;
67
- }
68
- if (options.limit) {
69
- parserOptions.to = options.limit;
70
- }
71
- debug('Parser options=', parserOptions);
72
- const parser = csv.parse(parserOptions);
73
- parser.on('error', (error) => {
74
- debug('fileToObjectStream: Error parsing csv file');
75
- debug(lastLine);
76
- throw new Error(error);
77
- });
78
- const blankAndHeaderCheck = new Transform({
79
- objectMode: true,
80
- transform(row, enc, cb) {
81
- // Blank rows
82
- if (row.length === 0)
83
- return cb();
84
- if (row.length === 1 && !row[0])
85
- return cb();
86
- if (!head) {
87
- head = row.map(headerMapping);
88
- return cb();
89
- }
90
- const o = {};
91
- head.forEach((_h, i) => {
92
- const h = _h.trim();
93
- if (h) {
94
- o[h] = row[i];
95
- }
96
- });
97
- lastLine = row.join(delimiter);
98
- return cb(null, o);
52
+ let lastLine = null;
53
+ let head = null;
54
+ const skipLinesWithError = bool(options.skip_lines_with_error, false);
55
+ const parserOptions = {
56
+ relax: true,
57
+ skip_empty_lines: true,
58
+ delimiter,
59
+ max_limit_on_data_read: 10000000,
60
+ skip_lines_with_error: skipLinesWithError
61
+ };
62
+ if (options.skip) parserOptions.from_line = options.skip;
63
+ if (options.relax_column_count) parserOptions.relax_column_count = true;
64
+ if (options.quote_escape) {
65
+ parserOptions.escape = options.quote_escape;
66
+ }
67
+ if (options.limit) {
68
+ parserOptions.to = options.limit;
69
+ }
70
+ debug('Parser options=', parserOptions);
71
+ const parser = parse(parserOptions);
72
+ parser.on('error', (error) => {
73
+ debug('fileToObjectStream: Error parsing csv file');
74
+ debug(lastLine);
75
+ throw new Error(error);
76
+ });
77
+ const blankAndHeaderCheck = new Transform({
78
+ objectMode: true,
79
+ transform(row, enc, cb) {
80
+ // Blank rows
81
+ if (row.length === 0) return cb();
82
+ if (row.length === 1 && !row[0]) return cb();
83
+ if (!head) {
84
+ head = row.map(headerMapping);
85
+ return cb();
86
+ }
87
+ const o = {};
88
+ head.forEach((_h, i) => {
89
+ const h = _h.trim();
90
+ if (h) {
91
+ o[h] = row[i];
99
92
  }
100
- });
101
- transforms.push(parser);
102
- transforms.push(blankAndHeaderCheck);
103
- return { transforms };
93
+ });
94
+ lastLine = row.join(delimiter);
95
+ return cb(null, o);
96
+ }
97
+ });
98
+ transforms.push(parser);
99
+ transforms.push(blankAndHeaderCheck);
100
+ return { transforms };
104
101
  };
105
102
  Worker.prototype.detectEncoding = async function (options) {
106
- if (options.encoding_override)
107
- return { encoding: options.encoding_override };
108
- // Limit to only the top N bytes -- for perfomance
109
- // Be wary, though, as gzip files may require a certain minimum number of bytes to decompress
110
- const bytes = 64 * 1024;
111
- const buff = Buffer.alloc(bytes);
112
- const fd = await fsp.open(options.filename);
113
- await fd.read(buff, 0, bytes);
114
- let finalBuff = buff;
115
- if (options.filename.slice(-3) === '.gz') {
116
- // This code deals with scenarios where the buffer coming in may not be exactly the gzip
117
- // needed chunk size.
118
- finalBuff = await new Promise((resolve, reject) => {
119
- const bufferBuilder = [];
120
- const decompressStream = zlib
121
- .createGunzip()
122
- .on('data', (chunk) => {
123
- bufferBuilder.push(chunk);
124
- })
125
- .on('close', () => {
126
- resolve(Buffer.concat(bufferBuilder));
127
- })
128
- .on('error', (err) => {
129
- if (err.errno !== -5) {
130
- // EOF: expected
131
- reject(err);
132
- }
133
- });
134
- decompressStream.write(buff);
135
- decompressStream.end();
103
+ if (options.encoding_override) return { encoding: options.encoding_override };
104
+ // Limit to only the top N bytes -- for perfomance
105
+ // Be wary, though, as gzip files may require a certain minimum number of bytes to decompress
106
+ const bytes = 64 * 1024;
107
+ const buff = Buffer.alloc(bytes);
108
+ const fd = await fsp.open(options.filename);
109
+ await fd.read(buff, 0, bytes);
110
+ let finalBuff = buff;
111
+ if (options.filename.slice(-3) === '.gz') {
112
+ // This code deals with scenarios where the buffer coming in may not be exactly the gzip
113
+ // needed chunk size.
114
+ finalBuff = await new Promise((resolve, reject) => {
115
+ const bufferBuilder = [];
116
+ const decompressStream = zlib
117
+ .createGunzip()
118
+ .on('data', (chunk) => {
119
+ bufferBuilder.push(chunk);
120
+ })
121
+ .on('close', () => {
122
+ resolve(Buffer.concat(bufferBuilder));
123
+ })
124
+ .on('error', (err) => {
125
+ if (err.errno !== -5) {
126
+ // EOF: expected
127
+ reject(err);
128
+ }
136
129
  });
137
- }
138
- return languageEncoding(finalBuff);
130
+ decompressStream.write(buff);
131
+ decompressStream.end();
132
+ });
133
+ }
134
+ return languageEncoding(finalBuff);
139
135
  };
140
136
  Worker.prototype.detectEncoding.metadata = {
141
- options: {
142
- filename: { required: true }
143
- }
137
+ options: {
138
+ filename: { required: true }
139
+ }
144
140
  };
145
141
  Worker.prototype.xlsxToObjectStream = async function (options) {
146
- let { filename } = options;
147
- if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
148
- // We need to copy and delete
149
- let worker = null;
150
- if (filename.startsWith('r2://')) {
151
- worker = new R2Worker(this);
152
- }
153
- else {
154
- worker = new S3Worker(this);
155
- }
156
- const target = getTempFilename({ targetFilename: filename.split('/').pop() });
157
- await worker.copy({ filename, target });
158
- filename = target;
159
- }
160
- let stream = await getXlsxStream({
161
- filePath: filename,
162
- sheet: 0
163
- });
164
- let keys = null;
165
- stream = stream.pipe(new Transform({
166
- objectMode: true,
167
- transform(d, enc, cb) {
168
- if (!keys) {
169
- keys = d?.raw.arr;
170
- cb();
171
- }
172
- else {
173
- let o = {};
174
- keys.forEach((k, i) => {
175
- o[k] = d?.raw?.arr?.[i];
176
- });
177
- cb(null, o);
178
- }
142
+ let { filename } = options;
143
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
144
+ // We need to copy and delete
145
+ let worker = null;
146
+ if (filename.startsWith('r2://')) {
147
+ worker = new R2Worker(this);
148
+ } else {
149
+ worker = new S3Worker(this);
150
+ }
151
+ const target = getTempFilename({ targetFilename: filename.split('/').pop() });
152
+ await worker.copy({ filename, target });
153
+ filename = target;
154
+ }
155
+ let stream = await getXlsxStream({
156
+ filePath: filename,
157
+ sheet: 0
158
+ });
159
+ let keys = null;
160
+ stream = stream.pipe(
161
+ new Transform({
162
+ objectMode: true,
163
+ transform(d, enc, cb) {
164
+ if (!keys) {
165
+ keys = d?.raw.arr;
166
+ cb();
167
+ } else {
168
+ let o = {};
169
+ keys.forEach((k, i) => {
170
+ o[k] = d?.raw?.arr?.[i];
171
+ });
172
+ cb(null, o);
179
173
  }
180
- }));
181
- return { stream };
174
+ }
175
+ })
176
+ );
177
+ return { stream };
182
178
  };
183
179
  Worker.prototype.getFormat = async function (options) {
184
- const { sourcePostfix, filename, format: formatOverride } = options;
185
- let postfix = sourcePostfix || filename.toLowerCase().split('.').pop();
186
- if (postfix === 'gz') {
187
- postfix = filename.toLowerCase().split('.');
188
- postfix = postfix[postfix.length - 2];
189
- }
190
- return formatOverride || postfix;
180
+ const { sourcePostfix, filename, format: formatOverride } = options;
181
+ let postfix = sourcePostfix || filename.toLowerCase().split('.').pop();
182
+ if (postfix === 'gz') {
183
+ postfix = filename.toLowerCase().split('.');
184
+ postfix = postfix[postfix.length - 2];
185
+ }
186
+ return formatOverride || postfix;
191
187
  };
192
188
  /*
193
189
  Commonly used method to transform a file into a stream of objects.
194
190
  */
195
191
  Worker.prototype.fileToObjectStream = async function (options) {
196
- const { filename, columns, limit: limitOption, format: formatOverride } = options;
197
- // handle stream item
198
- if (options.stream) {
199
- if (Array.isArray(options.stream)) {
200
- return { stream: Readable.from(options.stream) };
201
- }
202
- // probably already a stream
203
- if (typeof options.stream === 'object')
204
- return { stream: options.stream };
205
- throw new Error(`Invalid stream type:${typeof options.stream}`);
206
- }
207
- let limit;
208
- if (limitOption)
209
- limit = parseInt(limitOption, 10);
210
- if (!filename)
211
- throw new Error('fileToObjectStream: filename is required');
212
- if (filename.split('.').pop().toLowerCase() === 'xlsx') {
213
- return this.xlsxToObjectStream(options);
214
- }
215
- let postfix = options.sourcePostfix || filename.toLowerCase().split('.').pop();
216
- if (postfix === 'zip') {
217
- debug('Invalid filename:', { filename });
218
- throw new Error('Cowardly refusing to turn a .zip file into an object stream, turn into a csv first');
219
- }
220
- const streamInfo = await this.stream({
221
- filename,
222
- columns,
223
- limit
224
- });
225
- const { encoding } = streamInfo;
226
- let { stream } = streamInfo;
227
- if (!stream)
228
- throw new Error(`No stream found in fileToObjectStream from filename ${filename}`);
229
- if (encoding === 'object') {
230
- // already an object
231
- return { stream };
232
- }
233
- let count = 0;
234
- let transforms = [];
235
- if (postfix === 'gz') {
236
- const gunzip = zlib.createGunzip();
237
- transforms.push(gunzip);
238
- gunzip.setEncoding(encoding);
239
- // encoding = null;// Default encoding
240
- postfix = filename.toLowerCase().split('.');
241
- postfix = postfix[postfix.length - 2];
242
- debug(`Using gunzip parser because postfix is .gz, encoding=${encoding}`);
243
- }
244
- else {
245
- stream.setEncoding(encoding);
246
- }
247
- let format = formatOverride || postfix;
248
- debug(`Reading file ${filename} with encoding: ${encoding} and format ${format}`);
249
- if (format === 'csv') {
250
- const csvTransforms = this.csvToObjectTransforms({ ...options });
251
- transforms = transforms.concat(csvTransforms.transforms);
252
- }
253
- else if (format === 'txt') {
254
- const csvTransforms = this.csvToObjectTransforms({ ...options, delimiter: '\t' });
255
- transforms = transforms.concat(csvTransforms.transforms);
256
- }
257
- else if (format === 'jsonl') {
258
- /* Type of JSON that has the names in an array in the first record,
192
+ const { filename, columns, limit: limitOption, format: formatOverride } = options;
193
+ // handle stream item
194
+ if (options.stream) {
195
+ if (Array.isArray(options.stream)) {
196
+ return { stream: Readable.from(options.stream) };
197
+ }
198
+ // probably already a stream
199
+ if (typeof options.stream === 'object') return { stream: options.stream };
200
+ throw new Error(`Invalid stream type:${typeof options.stream}`);
201
+ }
202
+ let limit;
203
+ if (limitOption) limit = parseInt(limitOption, 10);
204
+ if (!filename) throw new Error('fileToObjectStream: filename is required');
205
+ if (filename.split('.').pop().toLowerCase() === 'xlsx') {
206
+ return this.xlsxToObjectStream(options);
207
+ }
208
+ let postfix = options.sourcePostfix || filename.toLowerCase().split('.').pop();
209
+ if (postfix === 'zip') {
210
+ debug('Invalid filename:', { filename });
211
+ throw new Error('Cowardly refusing to turn a .zip file into an object stream, turn into a csv first');
212
+ }
213
+ const streamInfo = await this.stream({
214
+ filename,
215
+ columns,
216
+ limit
217
+ });
218
+ const { encoding } = streamInfo;
219
+ let { stream } = streamInfo;
220
+ if (!stream) throw new Error(`No stream found in fileToObjectStream from filename ${filename}`);
221
+ if (encoding === 'object') {
222
+ // already an object
223
+ return { stream };
224
+ }
225
+ let count = 0;
226
+ let transforms = [];
227
+ if (postfix === 'gz') {
228
+ const gunzip = zlib.createGunzip();
229
+ transforms.push(gunzip);
230
+ gunzip.setEncoding(encoding);
231
+ // encoding = null;// Default encoding
232
+ postfix = filename.toLowerCase().split('.');
233
+ postfix = postfix[postfix.length - 2];
234
+ debug(`Using gunzip parser because postfix is .gz, encoding=${encoding}`);
235
+ } else {
236
+ stream.setEncoding(encoding);
237
+ }
238
+ let format = formatOverride || postfix;
239
+ debug(`Reading file ${filename} with encoding: ${encoding} and format ${format}`);
240
+ if (format === 'csv') {
241
+ const csvTransforms = this.csvToObjectTransforms({ ...options });
242
+ transforms = transforms.concat(csvTransforms.transforms);
243
+ } else if (format === 'txt') {
244
+ const csvTransforms = this.csvToObjectTransforms({ ...options, delimiter: '\t' });
245
+ transforms = transforms.concat(csvTransforms.transforms);
246
+ } else if (format === 'jsonl') {
247
+ /* Type of JSON that has the names in an array in the first record,
259
248
  and the values in JSON arrays thereafter
260
249
  */
261
- let headers = null;
262
- const lineReader = new LineReaderTransform();
263
- const jsonlTransform = new Transform({
264
- objectMode: true,
265
- transform(d, enc, cb) {
266
- if (!d)
267
- return cb();
268
- let obj;
269
- try {
270
- obj = JSON5.parse(d);
271
- }
272
- catch (e) {
273
- debug('Invalid line:');
274
- debug(d);
275
- throw e;
276
- }
277
- /* JSONL could potentially start with an array of names,
250
+ let headers = null;
251
+ const lineReader = new LineReaderTransform();
252
+ const jsonlTransform = new Transform({
253
+ objectMode: true,
254
+ transform(d, enc, cb) {
255
+ if (!d) return cb();
256
+ let obj;
257
+ try {
258
+ obj = JSON5.parse(d);
259
+ } catch (e) {
260
+ debug('Invalid line:');
261
+ debug(d);
262
+ throw e;
263
+ }
264
+ /* JSONL could potentially start with an array of names,
278
265
  in which case we need to map the subsequent values
279
266
  */
280
- if (headers === null) {
281
- if (Array.isArray(obj)) {
282
- headers = obj;
283
- return cb();
284
- }
285
- headers = false;
286
- }
287
- if (headers) {
288
- const mapped = {};
289
- headers.forEach((name, i) => {
290
- mapped[name] = obj[i];
291
- });
292
- this.push(mapped);
293
- }
294
- else {
295
- this.push(obj);
296
- }
297
- return cb();
298
- }
299
- });
300
- transforms.push(lineReader);
301
- transforms.push(jsonlTransform);
302
- }
303
- else {
304
- throw new Error(`Unsupported file type: ${postfix}`);
305
- }
306
- const countAndDebug = new Transform({
307
- objectMode: true,
308
- transform(d, enc, cb) {
309
- if (count === 0) {
310
- debug('Sample object from file:', d);
311
- }
312
- count += 1;
313
- if ((count < 5000 && count % 1000 === 0) || count % 50000 === 0) {
314
- debug(`fileToObjectStream transformed ${count} lines`);
315
- }
316
- this.push(d);
317
- cb();
318
- },
319
- flush(cb) {
320
- // If there's no records at all, push a dummy record, and specify 0 records
321
- // Don't push dummy records anymore -- legacy cruft
322
- debug(`Completed reading file, records=${count}`);
323
- /* if (count === 0) {
267
+ if (headers === null) {
268
+ if (Array.isArray(obj)) {
269
+ headers = obj;
270
+ return cb();
271
+ }
272
+ headers = false;
273
+ }
274
+ if (headers) {
275
+ const mapped = {};
276
+ headers.forEach((name, i) => {
277
+ mapped[name] = obj[i];
278
+ });
279
+ this.push(mapped);
280
+ } else {
281
+ this.push(obj);
282
+ }
283
+ return cb();
284
+ }
285
+ });
286
+ transforms.push(lineReader);
287
+ transforms.push(jsonlTransform);
288
+ } else {
289
+ throw new Error(`Unsupported file type: ${postfix}`);
290
+ }
291
+ const countAndDebug = new Transform({
292
+ objectMode: true,
293
+ transform(d, enc, cb) {
294
+ if (count === 0) {
295
+ debug('Sample object from file:', d);
296
+ }
297
+ count += 1;
298
+ if ((count < 5000 && count % 1000 === 0) || count % 50000 === 0) {
299
+ debug(`fileToObjectStream transformed ${count} lines`);
300
+ }
301
+ this.push(d);
302
+ cb();
303
+ },
304
+ flush(cb) {
305
+ // If there's no records at all, push a dummy record, and specify 0 records
306
+ // Don't push dummy records anymore -- legacy cruft
307
+ debug(`Completed reading file, records=${count}`);
308
+ /* if (count === 0) {
324
309
  const o = { _is_placeholder: true };
325
310
 
326
311
  if (head) head.forEach((c) => { o[c] = null; });
327
312
  this.push(o);
328
313
  } */
329
- cb();
330
- }
331
- });
332
- transforms.push(countAndDebug);
333
- transforms.forEach((t) => {
334
- stream = stream.pipe(t);
335
- });
336
- return { stream };
314
+ cb();
315
+ }
316
+ });
317
+ transforms.push(countAndDebug);
318
+ transforms.forEach((t) => {
319
+ stream = stream.pipe(t);
320
+ });
321
+ return { stream };
337
322
  };
338
323
  Worker.prototype.getFileWriterStream = async function (options = {}) {
339
- const accountId = options.accountId || this.accountId;
340
- if (!accountId)
341
- throw new Error('getFileWriterStream has no accountId');
342
- const targetFormat = options.targetFormat || 'csv';
343
- const tempDir = await getTempDir({ accountId });
344
- let { fileExtendedType } = options;
345
- if (fileExtendedType)
346
- fileExtendedType += '.';
347
- else
348
- fileExtendedType = '';
349
- // So, this could change, but it's easier to read
350
- // dates in a filename than UUIDs, so this is
351
- // a unique-ish filename generator
352
- const uniqueNumberedDate = `${new Date().toISOString().replace(/[^0-9]*/g, '')}.${Math.floor(Math.random() * 1000)}`;
353
- let filename = `${tempDir}${path.sep}${uniqueNumberedDate}.${fileExtendedType}${targetFormat}`;
354
- if (bool(options.gzip, false))
355
- filename += '.gz';
356
- const stream = fs.createWriteStream(filename);
357
- debug('FileWriterStream writing to file ', filename);
358
- return { filename, stream };
324
+ const accountId = options.accountId || this.accountId;
325
+ if (!accountId) throw new Error('getFileWriterStream has no accountId');
326
+ const targetFormat = options.targetFormat || 'csv';
327
+ const tempDir = await getTempDir({ accountId });
328
+ let { fileExtendedType } = options;
329
+ if (fileExtendedType) fileExtendedType += '.';
330
+ else fileExtendedType = '';
331
+ // So, this could change, but it's easier to read
332
+ // dates in a filename than UUIDs, so this is
333
+ // a unique-ish filename generator
334
+ const uniqueNumberedDate = `${new Date().toISOString().replace(/[^0-9]*/g, '')}.${Math.floor(Math.random() * 1000)}`;
335
+ let filename = `${tempDir}${path.sep}${uniqueNumberedDate}.${fileExtendedType}${targetFormat}`;
336
+ if (bool(options.gzip, false)) filename += '.gz';
337
+ const stream = fs.createWriteStream(filename);
338
+ debug('FileWriterStream writing to file ', filename);
339
+ return { filename, stream };
359
340
  };
360
341
  Worker.prototype.getOutputStreams = async function (options) {
361
- const { filename, stream: fileWriterStream } = await this.getFileWriterStream(options);
362
- let { transform } = options;
363
- if (typeof options.transform === 'function') {
364
- if (options.transform.length === 3) {
365
- transform = new Transform({
366
- objectMode: true,
367
- async transform(item, encoding, cb) {
368
- options.transform(item, encoding, cb);
369
- }
370
- });
342
+ const { filename, stream: fileWriterStream } = await this.getFileWriterStream(options);
343
+ let { transform } = options;
344
+ if (typeof options.transform === 'function') {
345
+ if (options.transform.length === 3) {
346
+ transform = new Transform({
347
+ objectMode: true,
348
+ async transform(item, encoding, cb) {
349
+ options.transform(item, encoding, cb);
371
350
  }
372
- else {
373
- transform = new Transform({
374
- objectMode: true,
375
- async transform(item, encoding, cb) {
376
- cb(null, options.transform(item));
377
- }
378
- });
351
+ });
352
+ } else {
353
+ transform = new Transform({
354
+ objectMode: true,
355
+ async transform(item, encoding, cb) {
356
+ cb(null, options.transform(item));
379
357
  }
380
- }
381
- else if (options.transform) {
382
- transform = options.transform;
383
- }
384
- const { flatten } = options;
385
- let flattenTransform = null;
386
- if (bool(flatten, false)) {
387
- flattenTransform = new Transform({
388
- objectMode: true,
389
- async transform(item, enc, cb) {
390
- // first item establishes the keys to use
391
- let o = {};
392
- Object.keys(item).forEach((k) => {
393
- let v = item[k];
394
- if (!o[k]) {
395
- if (typeof v === 'object') {
396
- while (Array.isArray(v))
397
- [v] = v; // get first array item
398
- o = { ...o, ...v };
399
- }
400
- else {
401
- o[k] = v;
402
- }
403
- }
404
- });
405
- cb(null, o);
406
- }
407
- });
408
- }
409
- const stats = {
410
- records: 0
411
- };
412
- let stringifier;
413
- if (options.targetFormat === 'jsonl') {
414
- stringifier = new Transform({
415
- objectMode: true,
416
- transform(d, encoding, cb) {
417
- cb(false, `${JSON.stringify(d)}\n`);
358
+ });
359
+ }
360
+ } else if (options.transform) {
361
+ transform = options.transform;
362
+ }
363
+ const { flatten } = options;
364
+ let flattenTransform = null;
365
+ if (bool(flatten, false)) {
366
+ flattenTransform = new Transform({
367
+ objectMode: true,
368
+ async transform(item, enc, cb) {
369
+ // first item establishes the keys to use
370
+ let o = {};
371
+ Object.keys(item).forEach((k) => {
372
+ let v = item[k];
373
+ if (!o[k]) {
374
+ if (typeof v === 'object') {
375
+ while (Array.isArray(v)) [v] = v; // get first array item
376
+ o = { ...o, ...v };
377
+ } else {
378
+ o[k] = v;
418
379
  }
380
+ }
419
381
  });
420
- }
421
- else {
422
- stringifier = stringify({ header: true });
423
- }
424
- let gzip = new PassThrough();
425
- if (options.gzip) {
426
- gzip = zlib.createGzip();
427
- }
428
- const streams = [
429
- transform,
430
- flattenTransform,
431
- new Transform({
432
- objectMode: true,
433
- transform(d, enc, cb) {
434
- stats.records += 1;
435
- cb(null, d);
436
- }
437
- }),
438
- stringifier,
439
- gzip,
440
- fileWriterStream
441
- ].filter(Boolean);
442
- return { filename, streams, stats };
382
+ cb(null, o);
383
+ }
384
+ });
385
+ }
386
+ const stats = {
387
+ records: 0
388
+ };
389
+ let stringifier;
390
+ if (options.targetFormat === 'jsonl') {
391
+ stringifier = new Transform({
392
+ objectMode: true,
393
+ transform(d, encoding, cb) {
394
+ cb(false, `${JSON.stringify(d)}\n`);
395
+ }
396
+ });
397
+ } else {
398
+ stringifier = stringify({ header: true });
399
+ }
400
+ let gzip = new PassThrough();
401
+ if (options.gzip) {
402
+ gzip = zlib.createGzip();
403
+ }
404
+ const streams = [
405
+ transform,
406
+ flattenTransform,
407
+ new Transform({
408
+ objectMode: true,
409
+ transform(d, enc, cb) {
410
+ stats.records += 1;
411
+ cb(null, d);
412
+ }
413
+ }),
414
+ stringifier,
415
+ gzip,
416
+ fileWriterStream
417
+ ].filter(Boolean);
418
+ return { filename, streams, stats };
443
419
  };
444
420
  Worker.prototype.objectStreamToFile = async function (options) {
445
- const { filename, streams, stats } = await this.getOutputStreams(options);
446
- const { stream: inStream } = options;
447
- streams.unshift(inStream);
448
- await pipeline(streams);
449
- return { filename, records: stats.records };
421
+ const { filename, streams, stats } = await this.getOutputStreams(options);
422
+ const { stream: inStream } = options;
423
+ streams.unshift(inStream);
424
+ await pipeline(streams);
425
+ return { filename, records: stats.records };
450
426
  };
451
427
  Worker.prototype.transform = async function (options) {
452
- const worker = this;
453
- const { filename } = options;
454
- debug(`Transforming ${filename}`);
455
- options.filename = filename;
456
- let { stream } = await worker.fileToObjectStream(options);
457
- if (typeof stream.pipe !== 'function') {
458
- debug(stream);
459
- throw new Error('No pipe in stream');
460
- }
461
- let t = options.transform;
462
- // No longer need this
463
- delete options.transform;
464
- if (!t) {
465
- t = function (d, enc, cb) {
466
- d.is_test_transform = true;
467
- cb(null, d);
468
- };
469
- }
470
- if (!Array.isArray(t))
471
- t = [t];
472
- Object.keys(t).forEach((key) => {
473
- let f = t[key];
474
- if (typeof f === 'function') {
475
- f = new Transform({
476
- objectMode: true,
477
- transform: f
478
- });
479
- }
480
- stream = stream.pipe(f);
481
- });
482
- const { targetFormat } = options;
483
- if (!targetFormat &&
484
- (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')) {
485
- options.targetFormat = 'csv';
486
- }
487
- return worker.objectStreamToFile({ ...options, stream });
428
+ const worker = this;
429
+ const { filename } = options;
430
+ debug(`Transforming ${filename}`);
431
+ options.filename = filename;
432
+ let { stream } = await worker.fileToObjectStream(options);
433
+ if (typeof stream.pipe !== 'function') {
434
+ debug(stream);
435
+ throw new Error('No pipe in stream');
436
+ }
437
+ let t = options.transform;
438
+ // No longer need this
439
+ delete options.transform;
440
+ if (!t) {
441
+ t = function (d, enc, cb) {
442
+ d.is_test_transform = true;
443
+ cb(null, d);
444
+ };
445
+ }
446
+ if (!Array.isArray(t)) t = [t];
447
+ Object.keys(t).forEach((key) => {
448
+ let f = t[key];
449
+ if (typeof f === 'function') {
450
+ f = new Transform({
451
+ objectMode: true,
452
+ transform: f
453
+ });
454
+ }
455
+ stream = stream.pipe(f);
456
+ });
457
+ const { targetFormat } = options;
458
+ if (
459
+ !targetFormat &&
460
+ (filename.toLowerCase().slice(-4) === '.csv' || filename.toLowerCase().slice(-7) === '.csv.gz')
461
+ ) {
462
+ options.targetFormat = 'csv';
463
+ }
464
+ return worker.objectStreamToFile({ ...options, stream });
488
465
  };
489
466
  Worker.prototype.transform.metadata = {
490
- options: {
491
- sourcePostfix: { description: "Override the source postfix, if for example it's a csv" },
492
- encoding: { description: 'Manual override of source file encoding' },
493
- names: { description: 'Target field names (e.g. my_new_field,x,y,z)' },
494
- values: {
495
- description: "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)"
496
- },
497
- targetFilename: { description: 'Custom name of the output file (default auto-generated)' },
498
- targetFormat: { description: 'Output format -- csv supported, or none for txt (default)' },
499
- targetRowDelimiter: { description: 'Row delimiter (default \n)' },
500
- targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' }
501
- }
467
+ options: {
468
+ sourcePostfix: { description: "Override the source postfix, if for example it's a csv" },
469
+ encoding: { description: 'Manual override of source file encoding' },
470
+ names: { description: 'Target field names (e.g. my_new_field,x,y,z)' },
471
+ values: {
472
+ description:
473
+ "Comma delimited source field name, or Handlebars [[ ]] merge fields (e.g. 'my_field,x,y,z', '[[field1]]-[[field2]]', etc)"
474
+ },
475
+ targetFilename: { description: 'Custom name of the output file (default auto-generated)' },
476
+ targetFormat: { description: 'Output format -- csv supported, or none for txt (default)' },
477
+ targetRowDelimiter: { description: 'Row delimiter (default \n)' },
478
+ targetFieldDelimiter: { description: 'Field delimiter (default \t or ,)' }
479
+ }
502
480
  };
503
481
  Worker.prototype.testTransform = async function (options) {
504
- return this.transform({
505
- ...options,
506
- transform(d, enc, cb) {
507
- d.transform_time = new Date();
508
- cb(null, d);
509
- }
510
- });
482
+ return this.transform({
483
+ ...options,
484
+ transform(d, enc, cb) {
485
+ d.transform_time = new Date();
486
+ cb(null, d);
487
+ }
488
+ });
511
489
  };
512
490
  Worker.prototype.testTransform.metadata = {
513
- options: {
514
- filename: true
515
- }
491
+ options: {
492
+ filename: true
493
+ }
516
494
  };
517
495
  /* Get a stream from an actual stream, or an array, or a file */
518
496
  Worker.prototype.stream = async function (options) {
519
- const { stream: inputStream, packet, type, columns, limit, filename: filenameOpt } = options;
520
- let filename = filenameOpt;
521
- if (inputStream) {
522
- if (Array.isArray(inputStream)) {
523
- return { stream: Readable.from(inputStream) };
524
- }
525
- // probably already a stream
526
- if (typeof inputStream === 'object')
527
- return { stream: inputStream, encoding: 'object' };
528
- throw new Error(`Invalid stream type:${typeof inputStream}`);
529
- }
530
- else if (filename) {
531
- if (filename.startsWith('engine9-accounts/')) {
532
- filename = `${process.env.ENGINE9_ACCOUNT_DIR}/${filename.slice('engine9-accounts/'.length)}`;
533
- // debug(`Prepending file with ${process.env.ENGINE9_ACCOUNT_DIR}, filename=${filename}`);
534
- }
535
- else {
536
- // debug(`Not prepending filename:${filename}`);
537
- }
538
- let encoding;
539
- let stream;
540
- if (filename.slice(-8) === '.parquet') {
541
- const pq = new ParquetWorker(this);
542
- stream = (await pq.stream({ filename, columns, limit })).stream;
543
- encoding = 'object';
544
- }
545
- else if (filename.startsWith('s3://')) {
546
- const s3Worker = new S3Worker(this);
547
- stream = (await s3Worker.stream({ filename, columns, limit })).stream;
548
- encoding = 'UTF-8';
549
- }
550
- else if (filename.startsWith('r2://')) {
551
- const r2Worker = new R2Worker(this);
552
- stream = (await r2Worker.stream({ filename, columns, limit })).stream;
553
- encoding = 'UTF-8';
554
- }
555
- else {
556
- // Check if the file exists, and fast fail if not
557
- // Otherwise the stream hangs out as a handle
558
- try {
559
- await fsp.stat(filename);
560
- }
561
- catch (e) {
562
- debug(`Error reading file ${filename}, current directory: ${process.cwd()},__dirname:${__dirname}`);
563
- throw e;
564
- }
565
- stream = fs.createReadStream(filename);
566
- encoding = (await this.detectEncoding({ filename })).encoding;
567
- }
568
- return { stream, encoding };
569
- }
570
- else if (packet) {
571
- let { stream: packetStream } = await streamPacket({ packet, type, limit });
572
- const { transforms } = this.csvToObjectTransforms({});
573
- transforms.forEach((t) => {
574
- packetStream = packetStream.pipe(t);
575
- });
576
- return { stream: packetStream };
577
- }
578
- else {
579
- throw new Error('stream must be passed a stream, filename, or packet');
580
- }
497
+ const { stream: inputStream, packet, type, columns, limit, filename: filenameOpt } = options;
498
+ let filename = filenameOpt;
499
+ if (inputStream) {
500
+ if (Array.isArray(inputStream)) {
501
+ return { stream: Readable.from(inputStream) };
502
+ }
503
+ // probably already a stream
504
+ if (typeof inputStream === 'object') return { stream: inputStream, encoding: 'object' };
505
+ throw new Error(`Invalid stream type:${typeof inputStream}`);
506
+ } else if (filename) {
507
+ if (filename.startsWith('engine9-accounts/')) {
508
+ filename = `${process.env.ENGINE9_ACCOUNT_DIR}/${filename.slice('engine9-accounts/'.length)}`;
509
+ // debug(`Prepending file with ${process.env.ENGINE9_ACCOUNT_DIR}, filename=${filename}`);
510
+ } else {
511
+ // debug(`Not prepending filename:${filename}`);
512
+ }
513
+ let encoding;
514
+ let stream;
515
+ if (filename.slice(-8) === '.parquet') {
516
+ const pq = new ParquetWorker(this);
517
+ stream = (await pq.stream({ filename, columns, limit })).stream;
518
+ encoding = 'object';
519
+ } else if (filename.startsWith('s3://')) {
520
+ const s3Worker = new S3Worker(this);
521
+ stream = (await s3Worker.stream({ filename, columns, limit })).stream;
522
+ encoding = 'UTF-8';
523
+ } else if (filename.startsWith('r2://')) {
524
+ const r2Worker = new R2Worker(this);
525
+ stream = (await r2Worker.stream({ filename, columns, limit })).stream;
526
+ encoding = 'UTF-8';
527
+ } else {
528
+ // Check if the file exists, and fast fail if not
529
+ // Otherwise the stream hangs out as a handle
530
+ try {
531
+ await fsp.stat(filename);
532
+ } catch (e) {
533
+ debug(
534
+ `Error reading file ${filename}, current directory: ${process.cwd()},import.meta.dirname:${
535
+ import.meta.dirname
536
+ }`
537
+ );
538
+ throw e;
539
+ }
540
+ stream = fs.createReadStream(filename);
541
+ encoding = (await this.detectEncoding({ filename })).encoding;
542
+ }
543
+ return { stream, encoding };
544
+ } else if (packet) {
545
+ let { stream: packetStream } = await streamPacket({ packet, type, limit });
546
+ const { transforms } = this.csvToObjectTransforms({});
547
+ transforms.forEach((t) => {
548
+ packetStream = packetStream.pipe(t);
549
+ });
550
+ return { stream: packetStream };
551
+ } else {
552
+ throw new Error('stream must be passed a stream, filename, or packet');
553
+ }
581
554
  };
582
555
  Worker.prototype.sample = async function (opts) {
583
- opts.limit = opts.limit || 10;
584
- const { stream } = await this.fileToObjectStream(opts);
585
- return stream.toArray();
556
+ opts.limit = opts.limit || 10;
557
+ const { stream } = await this.fileToObjectStream(opts);
558
+ return stream.toArray();
586
559
  };
587
560
  Worker.prototype.sample.metadata = {
588
- options: {
589
- filename: {}
590
- }
561
+ options: {
562
+ filename: {}
563
+ }
591
564
  };
592
565
  Worker.prototype.toArray = async function (opts) {
593
- const { stream } = await this.fileToObjectStream(opts);
594
- return stream.toArray();
566
+ const { stream } = await this.fileToObjectStream(opts);
567
+ return stream.toArray();
595
568
  };
596
569
  Worker.prototype.toArray.metadata = {
597
- options: {
598
- filename: {}
599
- }
570
+ options: {
571
+ filename: {}
572
+ }
600
573
  };
601
574
  Worker.prototype.write = async function (opts) {
602
- const { filename, content } = opts;
603
- if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
604
- const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
605
- const parts = filename.split('/');
606
- const directory = parts.slice(0, -1).join('/');
607
- const file = parts.slice(-1)[0];
608
- // debug(JSON.stringify({ parts, directory, file }));
609
- await worker.write({
610
- directory,
611
- file,
612
- content
613
- });
614
- }
615
- else {
616
- const directory = path.dirname(filename);
617
- await fsp.mkdir(directory, { recursive: true });
618
- await fsp.writeFile(filename, content);
619
- }
620
- return { success: true, filename };
575
+ const { filename, content } = opts;
576
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
577
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
578
+ const parts = filename.split('/');
579
+ const directory = parts.slice(0, -1).join('/');
580
+ const file = parts.slice(-1)[0];
581
+ // debug(JSON.stringify({ parts, directory, file }));
582
+ await worker.write({
583
+ directory,
584
+ file,
585
+ content
586
+ });
587
+ } else {
588
+ const directory = path.dirname(filename);
589
+ await fsp.mkdir(directory, { recursive: true });
590
+ await fsp.writeFile(filename, content);
591
+ }
592
+ return { success: true, filename };
621
593
  };
622
594
  Worker.prototype.write.metadata = {
623
- options: {
624
- filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
625
- content: {}
626
- }
595
+ options: {
596
+ filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
597
+ content: {}
598
+ }
627
599
  };
628
600
  async function streamToString(stream) {
629
- // lets have a ReadableStream as a stream variable
630
- const chunks = [];
631
- for await (const chunk of stream) {
632
- chunks.push(Buffer.from(chunk));
633
- }
634
- return Buffer.concat(chunks).toString('utf-8');
601
+ // lets have a ReadableStream as a stream variable
602
+ const chunks = [];
603
+ for await (const chunk of stream) {
604
+ chunks.push(Buffer.from(chunk));
605
+ }
606
+ return Buffer.concat(chunks).toString('utf-8');
635
607
  }
636
608
  /*
637
609
  Retrieves and parsed
638
610
  */
639
611
  Worker.prototype.json = async function (opts) {
640
- const { stream } = await this.stream(opts);
641
- const str = await streamToString(stream);
642
- try {
643
- return JSON5.parse(str);
644
- }
645
- catch (e) {
646
- debug(e);
647
- throw new Error(`Unparseable JSON received: ${opts.filename || '(native stream)'}`);
648
- }
612
+ const { stream } = await this.stream(opts);
613
+ const str = await streamToString(stream);
614
+ try {
615
+ return JSON5.parse(str);
616
+ } catch (e) {
617
+ debug(e);
618
+ throw new Error(`Unparseable JSON received: ${opts.filename || '(native stream)'}`);
619
+ }
649
620
  };
650
621
  Worker.prototype.json.metadata = {
651
- options: {
652
- filename: { description: 'Get a javascript object from a file' }
653
- }
622
+ options: {
623
+ filename: { description: 'Get a javascript object from a file' }
624
+ }
654
625
  };
655
626
  Worker.prototype.list = async function ({ directory, start: s, end: e }) {
656
- if (!directory)
657
- throw new Error('directory is required');
658
- let start = null;
659
- let end = null;
660
- if (s)
661
- start = relativeDate(s);
662
- if (e)
663
- end = relativeDate(e);
664
- if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
665
- const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
666
- return worker.list({ directory, start, end });
667
- }
668
- const a = await fsp.readdir(directory, { withFileTypes: true });
669
- const withModified = [];
670
- for (const file of a) {
671
- const fullPath = path.join(directory, file.name);
672
- const stats = await fsp.stat(fullPath);
673
- if (start && stats.mtime < start.getTime()) {
674
- //do not include
675
- }
676
- else if (end && stats.mtime > end.getTime()) {
677
- //do nothing
678
- }
679
- else {
680
- withModified.push({
681
- name: file.name,
682
- type: file.isDirectory() ? 'directory' : 'file',
683
- modifiedAt: new Date(stats.mtime).toISOString()
684
- });
685
- }
686
- }
687
- return withModified;
627
+ if (!directory) throw new Error('directory is required');
628
+ let start = null;
629
+ let end = null;
630
+ if (s) start = relativeDate(s);
631
+ if (e) end = relativeDate(e);
632
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
633
+ const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
634
+ return worker.list({ directory, start, end });
635
+ }
636
+ const a = await fsp.readdir(directory, { withFileTypes: true });
637
+ const withModified = [];
638
+ for (const file of a) {
639
+ const fullPath = path.join(directory, file.name);
640
+ const stats = await fsp.stat(fullPath);
641
+ if (start && stats.mtime < start.getTime()) {
642
+ //do not include
643
+ } else if (end && stats.mtime > end.getTime()) {
644
+ //do nothing
645
+ } else {
646
+ withModified.push({
647
+ name: file.name,
648
+ type: file.isDirectory() ? 'directory' : 'file',
649
+ modifiedAt: new Date(stats.mtime).toISOString()
650
+ });
651
+ }
652
+ }
653
+ return withModified;
688
654
  };
689
655
  Worker.prototype.list.metadata = {
690
- options: {
691
- directory: { required: true }
692
- }
656
+ options: {
657
+ directory: { required: true }
658
+ }
693
659
  };
694
660
  Worker.prototype.listAll = async function ({ directory, start: s, end: e }) {
695
- if (!directory)
696
- throw new Error('directory is required');
697
- let start = null;
698
- let end = null;
699
- if (s)
700
- start = relativeDate(s).getTime();
701
- if (e)
702
- end = relativeDate(e).getTime();
703
- if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
704
- const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
705
- return worker.listAll({ directory, start, end });
706
- }
707
- const a = await fsp.readdir(directory, { recursive: true });
708
- let files = a.map((f) => `${directory}/${f}`);
709
- if (!start && !end) {
710
- return files;
711
- }
712
- const pLimit = await import('p-limit');
713
- const limitedMethod = pLimit.default(10);
714
- const filesWithinLimit = [];
715
- await Promise.all(files.map((filename) => limitedMethod(async () => {
661
+ if (!directory) throw new Error('directory is required');
662
+ let start = null;
663
+ let end = null;
664
+ if (s) start = relativeDate(s).getTime();
665
+ if (e) end = relativeDate(e).getTime();
666
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
667
+ const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
668
+ return worker.listAll({ directory, start, end });
669
+ }
670
+ const a = await fsp.readdir(directory, { recursive: true });
671
+ let files = a.map((f) => `${directory}/${f}`);
672
+ if (!start && !end) {
673
+ return files;
674
+ }
675
+ const pLimit = await import('p-limit');
676
+ const limitedMethod = pLimit.default(10);
677
+ const filesWithinLimit = [];
678
+ await Promise.all(
679
+ files.map((filename) =>
680
+ limitedMethod(async () => {
716
681
  const stats = await fsp.stat(filename);
717
682
  if (start && stats.mtime < start) {
718
- //do not include
719
- }
720
- else if (end && stats.mtime > end) {
721
- //do nothing
683
+ //do not include
684
+ } else if (end && stats.mtime > end) {
685
+ //do nothing
686
+ } else {
687
+ filesWithinLimit.push({
688
+ name: filename,
689
+ type: stats.isDirectory() ? 'directory' : 'file',
690
+ modifiedAt: new Date(stats.mtime).toISOString()
691
+ });
722
692
  }
723
- else {
724
- filesWithinLimit.push({
725
- name: filename,
726
- type: stats.isDirectory() ? 'directory' : 'file',
727
- modifiedAt: new Date(stats.mtime).toISOString()
728
- });
729
- }
730
- })));
731
- return filesWithinLimit;
693
+ })
694
+ )
695
+ );
696
+ return filesWithinLimit;
732
697
  };
733
698
  Worker.prototype.listAll.metadata = {
734
- options: {
735
- directory: { required: true },
736
- start: {},
737
- end: {}
738
- }
699
+ options: {
700
+ directory: { required: true },
701
+ start: {},
702
+ end: {}
703
+ }
739
704
  };
740
705
  Worker.prototype.moveAll = async function (options) {
741
- const { directory, targetDirectory } = options;
742
- if (!directory)
743
- throw new Error('directory is required');
744
- if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
745
- const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
746
- return worker.moveAll(options);
747
- }
748
- const a = await this.listAll(options);
749
- let configs = a.map((f) => {
750
- let filename = typeof f === 'string' ? f : f.filename;
751
- return {
752
- filename,
753
- target: filename.replace(directory, targetDirectory)
754
- };
755
- });
756
- const pLimit = await import('p-limit');
757
- const limitedMethod = pLimit.default(10);
758
- return Promise.all(configs.map(({ filename, target }) => limitedMethod(async () => this.move({ filename, target }))));
706
+ const { directory, targetDirectory } = options;
707
+ if (!directory) throw new Error('directory is required');
708
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
709
+ const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
710
+ return worker.moveAll(options);
711
+ }
712
+ const a = await this.listAll(options);
713
+ let configs = a.map((f) => {
714
+ let filename = typeof f === 'string' ? f : f.filename;
715
+ return {
716
+ filename,
717
+ target: filename.replace(directory, targetDirectory)
718
+ };
719
+ });
720
+ const pLimit = await import('p-limit');
721
+ const limitedMethod = pLimit.default(10);
722
+ return Promise.all(configs.map(({ filename, target }) => limitedMethod(async () => this.move({ filename, target }))));
759
723
  };
760
724
  Worker.prototype.moveAll.metadata = {
761
- options: {
762
- directory: { required: true },
763
- targetDirectory: { required: true }
764
- }
725
+ options: {
726
+ directory: { required: true },
727
+ targetDirectory: { required: true }
728
+ }
765
729
  };
766
730
  Worker.prototype.empty = async function ({ directory }) {
767
- if (!directory)
768
- throw new Error('directory is required');
769
- if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
770
- // currently not emptying S3 this way -- dangerous
771
- throw new Error('Cannot empty an s3:// or r2:// directory');
772
- }
773
- const removed = [];
774
- for (const file of await fsp.readdir(directory)) {
775
- removed.push(file);
776
- await fsp.unlink(path.join(directory, file));
777
- }
778
- return { directory, removed };
731
+ if (!directory) throw new Error('directory is required');
732
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
733
+ // currently not emptying S3 this way -- dangerous
734
+ throw new Error('Cannot empty an s3:// or r2:// directory');
735
+ }
736
+ const removed = [];
737
+ for (const file of await fsp.readdir(directory)) {
738
+ removed.push(file);
739
+ await fsp.unlink(path.join(directory, file));
740
+ }
741
+ return { directory, removed };
779
742
  };
780
743
  Worker.prototype.empty.metadata = {
781
- options: {
782
- directory: { required: true }
783
- }
744
+ options: {
745
+ directory: { required: true }
746
+ }
784
747
  };
785
748
  Worker.prototype.removeAll = async function (options) {
786
- const filenames = await this.listAll(options);
787
- const pLimit = await import('p-limit');
788
- const limitedMethod = pLimit.default(10);
789
- return Promise.all(filenames.map((filename) => limitedMethod(async () => this.remove({ filename }))));
749
+ const filenames = await this.listAll(options);
750
+ const pLimit = await import('p-limit');
751
+ const limitedMethod = pLimit.default(10);
752
+ return Promise.all(filenames.map((filename) => limitedMethod(async () => this.remove({ filename }))));
790
753
  };
791
754
  Worker.prototype.removeAll.metadata = {
792
- options: {
793
- directory: { required: true },
794
- start: {},
795
- end: {}
796
- }
755
+ options: {
756
+ directory: { required: true },
757
+ start: {},
758
+ end: {}
759
+ }
797
760
  };
798
761
  Worker.prototype.remove = async function ({ filename }) {
799
- if (!filename)
800
- throw new Error('filename is required');
801
- if (typeof filename !== 'string')
802
- throw new Error(`filename isn't a string:${JSON.stringify(filename)}`);
803
- if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
804
- let worker = null;
805
- if (filename.startsWith('r2://')) {
806
- worker = new R2Worker(this);
807
- }
808
- else {
809
- worker = new S3Worker(this);
810
- }
811
- await worker.remove({ filename });
812
- }
813
- else {
814
- fsp.unlink(filename);
815
- }
816
- return { removed: filename };
762
+ if (!filename) throw new Error('filename is required');
763
+ if (typeof filename !== 'string') throw new Error(`filename isn't a string:${JSON.stringify(filename)}`);
764
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
765
+ let worker = null;
766
+ if (filename.startsWith('r2://')) {
767
+ worker = new R2Worker(this);
768
+ } else {
769
+ worker = new S3Worker(this);
770
+ }
771
+ await worker.remove({ filename });
772
+ } else {
773
+ fsp.unlink(filename);
774
+ }
775
+ return { removed: filename };
817
776
  };
818
777
  Worker.prototype.remove.metadata = {
819
- options: {
820
- filename: {}
821
- }
778
+ options: {
779
+ filename: {}
780
+ }
822
781
  };
823
782
  Worker.prototype.move = async function ({ filename, target, remove = true }) {
824
- if (!target)
825
- throw new Error('target is required');
826
- if (typeof target !== 'string')
827
- throw new Error(`target isn't a string:${JSON.stringify(target)}`);
828
- if (target.startsWith('s3://') || target.startsWith('r2://')) {
829
- if ((target.startsWith('s3://') && filename.startsWith('r2://')) ||
830
- (target.startsWith('r2://') && filename.startsWith('s3://'))) {
831
- throw new Error('Cowardly not copying between services');
832
- }
833
- let worker = null;
834
- if (target.startsWith('r2://')) {
835
- worker = new R2Worker(this);
836
- }
837
- else {
838
- worker = new S3Worker(this);
839
- }
840
- if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
841
- // We need to copy and delete
842
- const output = await worker.copy({ filename, target });
843
- if (remove)
844
- await worker.remove({ filename });
845
- return output;
846
- }
847
- const parts = target.split('/');
848
- return worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
783
+ if (!target) throw new Error('target is required');
784
+ if (typeof target !== 'string') throw new Error(`target isn't a string:${JSON.stringify(target)}`);
785
+ if (target.startsWith('s3://') || target.startsWith('r2://')) {
786
+ if (
787
+ (target.startsWith('s3://') && filename.startsWith('r2://')) ||
788
+ (target.startsWith('r2://') && filename.startsWith('s3://'))
789
+ ) {
790
+ throw new Error('Cowardly not copying between services');
791
+ }
792
+ let worker = null;
793
+ if (target.startsWith('r2://')) {
794
+ worker = new R2Worker(this);
795
+ } else {
796
+ worker = new S3Worker(this);
849
797
  }
850
- await fsp.mkdir(path.dirname(target), { recursive: true });
851
- if (remove) {
852
- try {
853
- await fsp.rename(filename, target);
854
- }
855
- catch (e) {
856
- //it may be a filesystem issue moving between items
857
- debug('Assuming this is a filesystem crosslink error, ignoring ', e.getMessage());
858
- await fsp.copyFile(filename, target);
859
- await fsp.unlink(filename);
860
- }
861
- }
862
- else {
863
- await fsp.copyFile(filename, target);
864
- }
865
- return { filename: target };
798
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
799
+ // We need to copy and delete
800
+ const output = await worker.copy({ filename, target });
801
+ if (remove) await worker.remove({ filename });
802
+ return output;
803
+ }
804
+ const parts = target.split('/');
805
+ return worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
806
+ }
807
+ await fsp.mkdir(path.dirname(target), { recursive: true });
808
+ if (remove) {
809
+ try {
810
+ await fsp.rename(filename, target);
811
+ } catch (e) {
812
+ //it may be a filesystem issue moving between items
813
+ debug('Assuming this is a filesystem crosslink error, ignoring ', e.getMessage());
814
+ await fsp.copyFile(filename, target);
815
+ await fsp.unlink(filename);
816
+ }
817
+ } else {
818
+ await fsp.copyFile(filename, target);
819
+ }
820
+ return { filename: target };
866
821
  };
867
822
  Worker.prototype.move.metadata = {
868
- options: {
869
- filename: {},
870
- target: {}
871
- }
823
+ options: {
824
+ filename: {},
825
+ target: {}
826
+ }
872
827
  };
873
828
  Worker.prototype.copy = async function (opts) {
874
- return this.move({ ...opts, remove: false });
829
+ return this.move({ ...opts, remove: false });
875
830
  };
876
831
  Worker.prototype.copy.metadata = {
877
- options: {
878
- filename: {},
879
- target: {}
880
- }
832
+ options: {
833
+ filename: {},
834
+ target: {}
835
+ }
881
836
  };
882
837
  Worker.prototype.stat = async function ({ filename }) {
883
- if (!filename)
884
- throw new Error('filename is required');
885
- const output = {};
886
- if (filename.slice(-8) === '.parquet') {
887
- const pq = new ParquetWorker(this);
888
- output.schema = (await pq.schema({ filename }))?.schema;
889
- output.records = (await pq.meta({ filename }))?.records;
890
- }
891
- if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
892
- const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
893
- Object.assign(output, await worker.stat({ filename }));
894
- }
895
- else {
896
- const { ctime, birthtime, size } = await fsp.stat(filename);
897
- const modifiedAt = new Date(ctime);
898
- let createdAt = birthtime;
899
- if (createdAt === 0 || !createdAt)
900
- createdAt = ctime;
901
- createdAt = new Date(createdAt);
902
- Object.assign(output, {
903
- createdAt,
904
- modifiedAt,
905
- size
906
- });
907
- }
908
- return output;
838
+ if (!filename) throw new Error('filename is required');
839
+ const output = {};
840
+ if (filename.slice(-8) === '.parquet') {
841
+ const pq = new ParquetWorker(this);
842
+ output.schema = (await pq.schema({ filename }))?.schema;
843
+ output.records = (await pq.meta({ filename }))?.records;
844
+ }
845
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
846
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
847
+ Object.assign(output, await worker.stat({ filename }));
848
+ } else {
849
+ const { ctime, birthtime, size } = await fsp.stat(filename);
850
+ const modifiedAt = new Date(ctime);
851
+ let createdAt = birthtime;
852
+ if (createdAt === 0 || !createdAt) createdAt = ctime;
853
+ createdAt = new Date(createdAt);
854
+ Object.assign(output, {
855
+ createdAt,
856
+ modifiedAt,
857
+ size
858
+ });
859
+ }
860
+ return output;
909
861
  };
910
862
  Worker.prototype.stat.metadata = {
911
- options: {
912
- filename: {}
913
- }
863
+ options: {
864
+ filename: {}
865
+ }
914
866
  };
915
867
  Worker.prototype.download = async function ({ filename }) {
916
- if (!filename)
917
- throw new Error('filename is required');
918
- if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
919
- const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
920
- return worker.download({ filename });
921
- }
922
- throw new Error('Cannot download a local file');
868
+ if (!filename) throw new Error('filename is required');
869
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
870
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
871
+ return worker.download({ filename });
872
+ }
873
+ throw new Error('Cannot download a local file');
923
874
  };
924
875
  Worker.prototype.download.metadata = {
925
- options: {
926
- filename: {}
927
- }
876
+ options: {
877
+ filename: {}
878
+ }
928
879
  };
929
880
  Worker.prototype.head = async function (options) {
930
- const limit = options.limit || 3;
931
- const { stream } = await this.fileToObjectStream({ ...options, limit });
932
- const chunks = [];
933
- let counter = 0;
934
- for await (const chunk of stream) {
935
- chunks.push(chunk);
936
- counter += 1;
937
- if (counter >= limit)
938
- break;
939
- }
940
- return chunks;
881
+ const limit = options.limit || 3;
882
+ const { stream } = await this.fileToObjectStream({ ...options, limit });
883
+ const chunks = [];
884
+ let counter = 0;
885
+ for await (const chunk of stream) {
886
+ chunks.push(chunk);
887
+ counter += 1;
888
+ if (counter >= limit) break;
889
+ }
890
+ return chunks;
941
891
  };
942
892
  Worker.prototype.head.metadata = {
943
- options: {
944
- filename: { required: true }
945
- }
893
+ options: {
894
+ filename: { required: true }
895
+ }
946
896
  };
947
897
  Worker.prototype.columns = async function (options) {
948
- const head = await this.head(options);
949
- if (head.length == 0) {
950
- return {
951
- records: 0,
952
- likelyHeaderLines: 0,
953
- columns: []
954
- };
955
- }
956
- let likelyHeaderLines = 1;
957
- const columns = Object.keys(head[0]);
958
- let s = columns.join(',');
959
- if (s.match(/[()@#%!]/)) {
960
- likelyHeaderLines = 0;
961
- }
898
+ const head = await this.head(options);
899
+ if (head.length == 0) {
962
900
  return {
963
- likelyHeaderLines,
964
- columns
901
+ records: 0,
902
+ likelyHeaderLines: 0,
903
+ columns: []
965
904
  };
905
+ }
906
+ let likelyHeaderLines = 1;
907
+ const columns = Object.keys(head[0]);
908
+ let s = columns.join(',');
909
+ if (s.match(/[()@#%!]/)) {
910
+ likelyHeaderLines = 0;
911
+ }
912
+ return {
913
+ likelyHeaderLines,
914
+ columns
915
+ };
966
916
  };
967
917
  Worker.prototype.columns.metadata = {
968
- options: {
969
- filename: { required: true }
970
- }
918
+ options: {
919
+ filename: { required: true }
920
+ }
971
921
  };
972
922
  Worker.prototype.count = async function (options) {
973
- const { stream } = await this.fileToObjectStream(options);
974
- const sample = [];
975
- const limit = options.limit || 5;
976
- let records = 0;
977
- for await (const chunk of stream) {
978
- records += 1;
979
- if (records < limit) {
980
- sample.push(chunk);
981
- }
982
- }
983
- return { sample, records };
923
+ const { stream } = await this.fileToObjectStream(options);
924
+ const sample = [];
925
+ const limit = options.limit || 5;
926
+ let records = 0;
927
+ for await (const chunk of stream) {
928
+ records += 1;
929
+ if (records < limit) {
930
+ sample.push(chunk);
931
+ }
932
+ }
933
+ return { sample, records };
984
934
  };
985
935
  Worker.prototype.count.metadata = {
986
- options: {
987
- filename: { required: true }
988
- }
936
+ options: {
937
+ filename: { required: true }
938
+ }
989
939
  };
990
940
  // Get a set of unique entries from a uniqueFunction
991
941
  // This could be large
992
942
  Worker.prototype.getUniqueSet = async function (options) {
993
- const existingFiles = getStringArray(options.filenames);
994
- const sample = {};
995
- let { uniqueFunction } = options;
996
- if (!uniqueFunction) {
997
- uniqueFunction = (o) => JSON.stringify(o);
998
- }
999
- const uniqueSet = new Set();
1000
- for (const filename of existingFiles) {
1001
- const { stream: existsStream } = await this.fileToObjectStream({ filename });
1002
- await pipeline(existsStream, new Transform({
1003
- objectMode: true,
1004
- transform(d, enc, cb) {
1005
- const v = uniqueFunction(makeStrings(d)) || '';
1006
- if (uniqueSet.size < 3) {
1007
- sample[v] = d;
1008
- }
1009
- uniqueSet.add(v);
1010
- cb(null, d);
1011
- }
1012
- }), new Writable({
1013
- objectMode: true,
1014
- write(d, enc, cb) {
1015
- cb();
1016
- }
1017
- }));
1018
- debug(`Finished loading ${filename}`);
1019
- }
1020
- return { uniqueFunction, uniqueSet, sample };
1021
- };
1022
- Worker.prototype.getUniqueStream = async function (options) {
1023
- const includeDuplicateSourceRecords = bool(options.includeDuplicateSourceRecords, false);
1024
- const { uniqueSet, uniqueFunction, sample } = await this.getUniqueSet({
1025
- filenames: options.existingFiles,
1026
- uniqueFunction: options.uniqueFunction
1027
- });
1028
- const { stream: inStream } = await this.fileToObjectStream(options);
1029
- const uniqueStream = inStream.pipe(new Transform({
943
+ const existingFiles = getStringArray(options.filenames);
944
+ const sample = {};
945
+ let { uniqueFunction } = options;
946
+ if (!uniqueFunction) {
947
+ uniqueFunction = (o) => JSON.stringify(o);
948
+ }
949
+ const uniqueSet = new Set();
950
+ for (const filename of existingFiles) {
951
+ const { stream: existsStream } = await this.fileToObjectStream({ filename });
952
+ await pipeline(
953
+ existsStream,
954
+ new Transform({
1030
955
  objectMode: true,
1031
956
  transform(d, enc, cb) {
1032
- const v = uniqueFunction(makeStrings(d)) || '';
1033
- if (!v) {
1034
- // falsey unique function includes
1035
- // by default
1036
- cb(null, d);
1037
- }
1038
- else if (uniqueSet.has(v)) {
1039
- // do nothing
1040
- cb();
1041
- }
1042
- else {
1043
- if (!includeDuplicateSourceRecords) {
1044
- // add it to the set for the next time
1045
- uniqueSet.add(v);
1046
- }
1047
- cb(null, d);
1048
- }
957
+ const v = uniqueFunction(makeStrings(d)) || '';
958
+ if (uniqueSet.size < 3) {
959
+ sample[v] = d;
960
+ }
961
+ uniqueSet.add(v);
962
+ cb(null, d);
963
+ }
964
+ }),
965
+ new Writable({
966
+ objectMode: true,
967
+ write(d, enc, cb) {
968
+ cb();
1049
969
  }
1050
- }));
1051
- return { stream: uniqueStream, sample };
970
+ })
971
+ );
972
+ debug(`Finished loading ${filename}`);
973
+ }
974
+ return { uniqueFunction, uniqueSet, sample };
1052
975
  };
1053
- Worker.prototype.getUniqueStream.metadata = {
1054
- options: {
1055
- existingFiles: {},
1056
- uniqueFunction: {},
1057
- filename: { description: 'Specify a source filename or a stream' },
1058
- stream: { description: 'Specify a source filename or a stream' },
1059
- includeDuplicateSourceRecords: {
1060
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
976
+ Worker.prototype.getUniqueStream = async function (options) {
977
+ const includeDuplicateSourceRecords = bool(options.includeDuplicateSourceRecords, false);
978
+ const { uniqueSet, uniqueFunction, sample } = await this.getUniqueSet({
979
+ filenames: options.existingFiles,
980
+ uniqueFunction: options.uniqueFunction
981
+ });
982
+ const { stream: inStream } = await this.fileToObjectStream(options);
983
+ const uniqueStream = inStream.pipe(
984
+ new Transform({
985
+ objectMode: true,
986
+ transform(d, enc, cb) {
987
+ const v = uniqueFunction(makeStrings(d)) || '';
988
+ if (!v) {
989
+ // falsey unique function includes
990
+ // by default
991
+ cb(null, d);
992
+ } else if (uniqueSet.has(v)) {
993
+ // do nothing
994
+ cb();
995
+ } else {
996
+ if (!includeDuplicateSourceRecords) {
997
+ // add it to the set for the next time
998
+ uniqueSet.add(v);
999
+ }
1000
+ cb(null, d);
1061
1001
  }
1062
- }
1002
+ }
1003
+ })
1004
+ );
1005
+ return { stream: uniqueStream, sample };
1006
+ };
1007
+ Worker.prototype.getUniqueStream.metadata = {
1008
+ options: {
1009
+ existingFiles: {},
1010
+ uniqueFunction: {},
1011
+ filename: { description: 'Specify a source filename or a stream' },
1012
+ stream: { description: 'Specify a source filename or a stream' },
1013
+ includeDuplicateSourceRecords: {
1014
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1015
+ }
1016
+ }
1063
1017
  };
1064
1018
  Worker.prototype.getUniqueFile = async function (options) {
1065
- const { stream, sample } = await this.getUniqueStream(options);
1066
- const { filename, records } = await this.objectStreamToFile({ stream });
1067
- return { filename, records, sample };
1019
+ const { stream, sample } = await this.getUniqueStream(options);
1020
+ const { filename, records } = await this.objectStreamToFile({ stream });
1021
+ return { filename, records, sample };
1068
1022
  };
1069
1023
  Worker.prototype.getUniqueFile.metadata = {
1070
- options: {
1071
- existingFiles: {},
1072
- uniqueFunction: {},
1073
- filename: { description: 'Specify a source filename or a stream' },
1074
- stream: { description: 'Specify a source filename or a stream' },
1075
- includeDuplicateSourceRecords: {
1076
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1077
- }
1078
- }
1024
+ options: {
1025
+ existingFiles: {},
1026
+ uniqueFunction: {},
1027
+ filename: { description: 'Specify a source filename or a stream' },
1028
+ stream: { description: 'Specify a source filename or a stream' },
1029
+ includeDuplicateSourceRecords: {
1030
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1031
+ }
1032
+ }
1079
1033
  };
1080
1034
  /*
1081
1035
  diff that allows for unordered files, and doesn't store full objects in memory.
@@ -1083,42 +1037,40 @@ Requires 2 passes of the files,
1083
1037
  but that's a better tradeoff than trying to store huge files in memory
1084
1038
  */
1085
1039
  Worker.prototype.diff = async function (options) {
1086
- const { fileA, fileB, uniqueFunction: ufOpt, columns, includeDuplicateSourceRecords } = options;
1087
- if (options.fields)
1088
- throw new Error('fields is deprecated, use columns');
1089
- if (ufOpt && columns)
1090
- throw new Error('fields and uniqueFunction cannot both be specified');
1091
- let uniqueFunction = ufOpt;
1092
- if (!uniqueFunction && columns) {
1093
- const farr = getStringArray(columns);
1094
- uniqueFunction = (o) => farr.map((f) => o[f] || '').join('.');
1095
- }
1096
- const left = await this.getUniqueFile({
1097
- existingFiles: [fileB],
1098
- filename: fileA,
1099
- uniqueFunction,
1100
- includeDuplicateSourceRecords
1101
- });
1102
- const right = await this.getUniqueFile({
1103
- existingFiles: [fileA],
1104
- filename: fileB,
1105
- uniqueFunction,
1106
- includeDuplicateSourceRecords
1107
- });
1108
- return {
1109
- left,
1110
- right
1111
- };
1040
+ const { fileA, fileB, uniqueFunction: ufOpt, columns, includeDuplicateSourceRecords } = options;
1041
+ if (options.fields) throw new Error('fields is deprecated, use columns');
1042
+ if (ufOpt && columns) throw new Error('fields and uniqueFunction cannot both be specified');
1043
+ let uniqueFunction = ufOpt;
1044
+ if (!uniqueFunction && columns) {
1045
+ const farr = getStringArray(columns);
1046
+ uniqueFunction = (o) => farr.map((f) => o[f] || '').join('.');
1047
+ }
1048
+ const left = await this.getUniqueFile({
1049
+ existingFiles: [fileB],
1050
+ filename: fileA,
1051
+ uniqueFunction,
1052
+ includeDuplicateSourceRecords
1053
+ });
1054
+ const right = await this.getUniqueFile({
1055
+ existingFiles: [fileA],
1056
+ filename: fileB,
1057
+ uniqueFunction,
1058
+ includeDuplicateSourceRecords
1059
+ });
1060
+ return {
1061
+ left,
1062
+ right
1063
+ };
1112
1064
  };
1113
1065
  Worker.prototype.diff.metadata = {
1114
- options: {
1115
- fileA: {},
1116
- fileB: {},
1117
- columns: { description: 'Columns to use for uniqueness -- aka primary key. Defaults to JSON of line' },
1118
- uniqueFunction: {},
1119
- includeDuplicateSourceRecords: {
1120
- description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1121
- }
1122
- }
1066
+ options: {
1067
+ fileA: {},
1068
+ fileB: {},
1069
+ columns: { description: 'Columns to use for uniqueness -- aka primary key. Defaults to JSON of line' },
1070
+ uniqueFunction: {},
1071
+ includeDuplicateSourceRecords: {
1072
+ description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
1073
+ }
1074
+ }
1123
1075
  };
1124
1076
  export default Worker;