@engine9-io/input-tools 1.9.4 → 1.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ForEachEntry.js +5 -3
- package/file/FileUtilities.js +57 -12
- package/package.json +1 -1
- package/test/file.js +16 -10
- package/test/processing/forEachResume.js +56 -0
- package/test/sample/fileWithHead.csv +3 -0
- package/test/sample/fileWithoutHead.csv +2 -0
- package/timelineTypes.js +10 -10
package/ForEachEntry.js
CHANGED
|
@@ -24,7 +24,7 @@ class ForEachEntry {
|
|
|
24
24
|
this.fileUtilities = new FileUtilities({ accountId });
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
-
getOutputStream({ name, postfix = '.timeline.csv', validatorFunction = () => true }) {
|
|
27
|
+
getOutputStream({ name, filename, postfix = '.timeline.csv', validatorFunction = () => true }) {
|
|
28
28
|
this.outputStreams = this.outputStreams || {};
|
|
29
29
|
if (this.outputStreams[name]?.items) return this.outputStreams[name].items;
|
|
30
30
|
|
|
@@ -33,12 +33,14 @@ class ForEachEntry {
|
|
|
33
33
|
};
|
|
34
34
|
|
|
35
35
|
return this.outputStreams[name].mutex.runExclusive(async () => {
|
|
36
|
+
let f = filename || (await getTempFilename({ postfix }));
|
|
37
|
+
|
|
36
38
|
const fileInfo = {
|
|
37
|
-
filename:
|
|
39
|
+
filename: f,
|
|
38
40
|
records: 0
|
|
39
41
|
};
|
|
40
42
|
|
|
41
|
-
debug(`Output file requested, writing output to to: ${fileInfo.filename}`);
|
|
43
|
+
debug(`Output file requested ${name}, writing output to to: ${fileInfo.filename}`);
|
|
42
44
|
const outputStream = new ValidatingReadable(
|
|
43
45
|
{
|
|
44
46
|
objectMode: true
|
package/file/FileUtilities.js
CHANGED
|
@@ -206,6 +206,17 @@ Worker.prototype.xlsxToObjectStream = async function (options) {
|
|
|
206
206
|
return { stream };
|
|
207
207
|
};
|
|
208
208
|
|
|
209
|
+
Worker.prototype.getFormat = async function (options) {
|
|
210
|
+
const { sourcePostfix, filename, format: formatOverride } = options;
|
|
211
|
+
let postfix = sourcePostfix || filename.toLowerCase().split('.').pop();
|
|
212
|
+
|
|
213
|
+
if (postfix === 'gz') {
|
|
214
|
+
postfix = filename.toLowerCase().split('.');
|
|
215
|
+
postfix = postfix[postfix.length - 2];
|
|
216
|
+
}
|
|
217
|
+
return formatOverride || postfix;
|
|
218
|
+
};
|
|
219
|
+
|
|
209
220
|
/*
|
|
210
221
|
Commonly used method to transform a file into a stream of objects.
|
|
211
222
|
*/
|
|
@@ -635,6 +646,8 @@ Worker.prototype.write = async function (opts) {
|
|
|
635
646
|
content
|
|
636
647
|
});
|
|
637
648
|
} else {
|
|
649
|
+
const directory = path.dirname(filename);
|
|
650
|
+
await fsp.mkdir(directory, { recursive: true });
|
|
638
651
|
await fsp.writeFile(filename, content);
|
|
639
652
|
}
|
|
640
653
|
return { success: true, filename };
|
|
@@ -861,7 +874,14 @@ Worker.prototype.move = async function ({ filename, target, remove = true }) {
|
|
|
861
874
|
}
|
|
862
875
|
await fsp.mkdir(path.dirname(target), { recursive: true });
|
|
863
876
|
if (remove) {
|
|
864
|
-
|
|
877
|
+
try {
|
|
878
|
+
await fsp.rename(filename, target);
|
|
879
|
+
} catch (e) {
|
|
880
|
+
//it may be a filesystem issue moving between items
|
|
881
|
+
debug(e);
|
|
882
|
+
await fsp.copyFile(filename, target);
|
|
883
|
+
await fsp.unlink(filename);
|
|
884
|
+
}
|
|
865
885
|
} else {
|
|
866
886
|
await fsp.copyFile(filename, target);
|
|
867
887
|
}
|
|
@@ -953,6 +973,34 @@ Worker.prototype.head.metadata = {
|
|
|
953
973
|
}
|
|
954
974
|
};
|
|
955
975
|
|
|
976
|
+
Worker.prototype.columns = async function (options) {
|
|
977
|
+
const head = await this.head(options);
|
|
978
|
+
if (head.length == 0) {
|
|
979
|
+
return {
|
|
980
|
+
records: 0,
|
|
981
|
+
likelyHeaderLines: 0,
|
|
982
|
+
columns: []
|
|
983
|
+
};
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
let likelyHeaderLines = 1;
|
|
987
|
+
const columns = Object.keys(head[0]);
|
|
988
|
+
let s = columns.join(',');
|
|
989
|
+
if (s.match(/[()@#%!]/)) {
|
|
990
|
+
likelyHeaderLines = 0;
|
|
991
|
+
}
|
|
992
|
+
return {
|
|
993
|
+
likelyHeaderLines,
|
|
994
|
+
columns
|
|
995
|
+
};
|
|
996
|
+
};
|
|
997
|
+
|
|
998
|
+
Worker.prototype.columns.metadata = {
|
|
999
|
+
options: {
|
|
1000
|
+
filename: { required: true }
|
|
1001
|
+
}
|
|
1002
|
+
};
|
|
1003
|
+
|
|
956
1004
|
Worker.prototype.count = async function (options) {
|
|
957
1005
|
const { stream } = await this.fileToObjectStream(options);
|
|
958
1006
|
const sample = [];
|
|
@@ -1084,17 +1132,14 @@ diff that allows for unordered files, and doesn't store full objects in memory.
|
|
|
1084
1132
|
Requires 2 passes of the files,
|
|
1085
1133
|
but that's a better tradeoff than trying to store huge files in memory
|
|
1086
1134
|
*/
|
|
1087
|
-
Worker.prototype.diff = async function ({
|
|
1088
|
-
fileA,
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
fields
|
|
1092
|
-
includeDuplicateSourceRecords
|
|
1093
|
-
}) {
|
|
1094
|
-
if (ufOpt && fields) throw new Error('fields and uniqueFunction cannot both be specified');
|
|
1135
|
+
Worker.prototype.diff = async function (options) {
|
|
1136
|
+
const { fileA, fileB, uniqueFunction: ufOpt, columns, includeDuplicateSourceRecords } = options;
|
|
1137
|
+
if (options.fields) throw new Error('fields is deprecated, use columns');
|
|
1138
|
+
|
|
1139
|
+
if (ufOpt && columns) throw new Error('fields and uniqueFunction cannot both be specified');
|
|
1095
1140
|
let uniqueFunction = ufOpt;
|
|
1096
|
-
if (!uniqueFunction &&
|
|
1097
|
-
const farr = getStringArray(
|
|
1141
|
+
if (!uniqueFunction && columns) {
|
|
1142
|
+
const farr = getStringArray(columns);
|
|
1098
1143
|
uniqueFunction = (o) => farr.map((f) => o[f] || '').join('.');
|
|
1099
1144
|
}
|
|
1100
1145
|
|
|
@@ -1120,7 +1165,7 @@ Worker.prototype.diff.metadata = {
|
|
|
1120
1165
|
options: {
|
|
1121
1166
|
fileA: {},
|
|
1122
1167
|
fileB: {},
|
|
1123
|
-
|
|
1168
|
+
columns: { description: 'Columns to use for uniqueness -- aka primary key. Defaults to JSON of line' },
|
|
1124
1169
|
uniqueFunction: {},
|
|
1125
1170
|
includeDuplicateSourceRecords: {
|
|
1126
1171
|
description: 'Sometimes you want the output to include source dupes, sometimes not, default false'
|
package/package.json
CHANGED
package/test/file.js
CHANGED
|
@@ -1,18 +1,24 @@
|
|
|
1
|
-
const {
|
|
2
|
-
it,
|
|
3
|
-
} = require('node:test');
|
|
1
|
+
const { it } = require('node:test');
|
|
4
2
|
const assert = require('node:assert');
|
|
5
3
|
const debug = require('debug')('files');
|
|
6
4
|
|
|
7
5
|
const { FileUtilities } = require('../index');
|
|
8
6
|
|
|
9
7
|
it('Should list a directory', async () => {
|
|
10
|
-
const futil=new FileUtilities({accountId:'test'});
|
|
11
|
-
let files=await futil.list({directory:'.'});
|
|
12
|
-
assert(files.length,
|
|
8
|
+
const futil = new FileUtilities({ accountId: 'test' });
|
|
9
|
+
let files = await futil.list({ directory: '.' });
|
|
10
|
+
assert(files.length, 'Should have some files');
|
|
13
11
|
debug(files);
|
|
14
|
-
let startTest=await futil.list({directory:'.',start:'2040-01-01'});
|
|
15
|
-
assert(startTest.length===0,
|
|
16
|
-
let endTest=await futil.list({directory:'.',end:'1900-01-01'});
|
|
17
|
-
assert(endTest.length===0,
|
|
12
|
+
let startTest = await futil.list({ directory: '.', start: '2040-01-01' });
|
|
13
|
+
assert(startTest.length === 0, 'Should NOT have any files before future start date');
|
|
14
|
+
let endTest = await futil.list({ directory: '.', end: '1900-01-01' });
|
|
15
|
+
assert(endTest.length === 0, 'Should NOT have any files before past end date');
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
it('Should be able to analyze CSV files with and without header lines', async () => {
|
|
19
|
+
const futil = new FileUtilities({ accountId: 'test' });
|
|
20
|
+
const f1 = await futil.columns({ filename: __dirname + '/sample/fileWithHead.csv' });
|
|
21
|
+
assert.equal(f1.likelyHeaderLines, 1, 'Number of header lines should be 1');
|
|
22
|
+
const f2 = await futil.columns({ filename: __dirname + '/sample/fileWithoutHead.csv' });
|
|
23
|
+
assert.equal(f2.likelyHeaderLines, 0, 'Number of header lines should be 1');
|
|
18
24
|
});
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
const { describe, it } = require('node:test');
|
|
2
|
+
const assert = require('node:assert');
|
|
3
|
+
const debug = require('debug')('test/forEach');
|
|
4
|
+
|
|
5
|
+
const { ForEachEntry } = require('../../index');
|
|
6
|
+
|
|
7
|
+
describe('Test Person File For Each', async () => {
|
|
8
|
+
it('forEachPerson Should loop through 1000 sample people', async () => {
|
|
9
|
+
let counter = 0;
|
|
10
|
+
const forEach = new ForEachEntry();
|
|
11
|
+
const result = await forEach.process({
|
|
12
|
+
packet: 'test/sample/1000_message.packet.zip',
|
|
13
|
+
batchSize: 50,
|
|
14
|
+
bindings: {
|
|
15
|
+
timelineOutputFileStream: {
|
|
16
|
+
path: 'output.timeline',
|
|
17
|
+
options: {
|
|
18
|
+
entry_type: 'ENTRY_OPTION'
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
sampleOutputFileStream: {
|
|
22
|
+
path: 'output.stream'
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
async transform(props) {
|
|
26
|
+
const { batch, timelineOutputFileStream, sampleOutputFileStream } = props;
|
|
27
|
+
|
|
28
|
+
batch.forEach((p) => {
|
|
29
|
+
if (Math.random() > 0.9) {
|
|
30
|
+
sampleOutputFileStream.push({
|
|
31
|
+
// for testing we don't need real person_ids
|
|
32
|
+
person_id: p.person_id || Math.floor(Math.random() * 1000000),
|
|
33
|
+
email: p.email,
|
|
34
|
+
entry_type: 'SAMPLE_OUTPUT'
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
timelineOutputFileStream.push({
|
|
38
|
+
// for testing we don't need real person_ids
|
|
39
|
+
person_id: p.person_id || Math.floor(Math.random() * 1000000),
|
|
40
|
+
email: p.email,
|
|
41
|
+
entry_type: 'EMAIL_DELIVERED'
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
batch.forEach(() => {
|
|
46
|
+
counter += 1;
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
assert(result.outputFiles?.timelineOutputFileStream?.[0]?.records);
|
|
51
|
+
assert(result.outputFiles?.sampleOutputFileStream?.[0]?.records);
|
|
52
|
+
debug(result);
|
|
53
|
+
assert.equal(counter, 1000, `Expected to loop through 1000 people, actual:${counter}`);
|
|
54
|
+
});
|
|
55
|
+
debug('Completed tests');
|
|
56
|
+
});
|
package/timelineTypes.js
CHANGED
|
@@ -26,6 +26,16 @@ const TRANSACTION_REFUND = 15;
|
|
|
26
26
|
const SEGMENT_PERSON_ADD = 16;
|
|
27
27
|
const SEGMENT_PERSON_REMOVE = 17;
|
|
28
28
|
|
|
29
|
+
// unknown generic conversion on a message
|
|
30
|
+
const MESSAGE_CONVERSION = 20;
|
|
31
|
+
// advocacy conversion on a message
|
|
32
|
+
const MESSAGE_CONVERSION_ADVOCACY = 21;
|
|
33
|
+
// unknown transaction conversion on a message
|
|
34
|
+
const MESSAGE_CONVERSION_TRANSACTION = 22;
|
|
35
|
+
|
|
36
|
+
const MESSAGE_DELIVERY_FAILURE_SHOULD_RETRY = 25;
|
|
37
|
+
const MESSAGE_DELIVERY_FAILURE_SHOULD_NOT_RETRY = 26;
|
|
38
|
+
|
|
29
39
|
const SMS_SEND = 30;
|
|
30
40
|
const SMS_DELIVERED = 31;
|
|
31
41
|
const SMS_CLICK = 33;
|
|
@@ -54,16 +64,6 @@ const FORM_SUBMIT = 60;
|
|
|
54
64
|
const FORM_PETITION = 61;
|
|
55
65
|
const FORM_PETITION_CONTACT_TARGET = 62;
|
|
56
66
|
|
|
57
|
-
// unknown generic conversion on a message
|
|
58
|
-
const MESSAGE_CONVERSION = 63;
|
|
59
|
-
// advocacy conversion on a message
|
|
60
|
-
const MESSAGE_CONVERSION_ADVOCACY = 64;
|
|
61
|
-
// unknown transaction conversion on a message
|
|
62
|
-
const MESSAGE_CONVERSION_TRANSACTION = 65;
|
|
63
|
-
|
|
64
|
-
const MESSAGE_DELIVERY_FAILURE_SHOULD_RETRY = 66;
|
|
65
|
-
const MESSAGE_DELIVERY_FAILURE_SHOULD_NOT_RETRY = 66;
|
|
66
|
-
|
|
67
67
|
const FORM_ADVOCACY = 66;
|
|
68
68
|
const FORM_SURVEY = 67;
|
|
69
69
|
|