@engine9-io/input-tools 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintignore +5 -0
- package/.eslintrc.js +36 -0
- package/ForEachEntry.js +158 -0
- package/LICENSE +674 -0
- package/README.md +6 -0
- package/ValidatingReadable.js +25 -0
- package/buildSamplePackets.js +18 -0
- package/file/FileUtilities.js +741 -0
- package/file/GoogleDrive.js +45 -0
- package/file/Parquet.js +138 -0
- package/file/S3.js +246 -0
- package/file/tools.js +237 -0
- package/index.js +327 -0
- package/package.json +53 -0
- package/test/cli.js +10 -0
- package/test/packet/bigDataMessage.js +62 -0
- package/test/packet/forEach.js +51 -0
- package/test/packet/message.js +48 -0
- package/test/packet/timelineStream.js +26 -0
- package/test/packet/zip.js +22 -0
- package/test/parallelStream.js +48 -0
- package/test/sample/1000_message.packet.zip +0 -0
- package/test/sample/5_message.packet.zip +0 -0
- package/test/sample/message/1000_fake_people.csv +1001 -0
- package/test/sample/message/5_fake_people.csv +6 -0
- package/test/sample/message/message.json5 +41 -0
- package/test/uuid.js +25 -0
- package/timelineTypes.js +142 -0
package/index.js
ADDED
@@ -0,0 +1,327 @@
|
|
1
|
+
const fs = require('node:fs');
|
2
|
+
|
3
|
+
const path = require('node:path');
|
4
|
+
|
5
|
+
const debug = require('debug')('@engine9/input-tools');
|
6
|
+
|
7
|
+
const unzipper = require('unzipper');
|
8
|
+
const {
|
9
|
+
v4: uuidv4, v5: uuidv5, v7: uuidv7, validate: uuidIsValid,
|
10
|
+
} = require('uuid');
|
11
|
+
const archiver = require('archiver');
|
12
|
+
const FileUtilities = require('./file/FileUtilities');
|
13
|
+
|
14
|
+
const {
|
15
|
+
bool,
|
16
|
+
getManifest,
|
17
|
+
getFile,
|
18
|
+
downloadFile,
|
19
|
+
getTempFilename,
|
20
|
+
streamPacket,
|
21
|
+
getPacketFiles,
|
22
|
+
getBatchTransform,
|
23
|
+
getDebatchTransform,
|
24
|
+
} = require('./file/tools');
|
25
|
+
|
26
|
+
const ForEachEntry = require('./ForEachEntry');
|
27
|
+
|
28
|
+
const { TIMELINE_ENTRY_TYPES } = require('./timelineTypes');
|
29
|
+
|
30
|
+
function getStringArray(s, nonZeroLength) {
|
31
|
+
let a = s || [];
|
32
|
+
if (typeof a === 'number') a = String(a);
|
33
|
+
if (typeof a === 'string') a = [a];
|
34
|
+
|
35
|
+
if (typeof s === 'string') a = s.split(',');
|
36
|
+
a = a.map((x) => x.toString().trim()).filter(Boolean);
|
37
|
+
if (nonZeroLength && a.length === 0) a = [0];
|
38
|
+
return a;
|
39
|
+
}
|
40
|
+
|
41
|
+
/*
|
42
|
+
When comparing two objects, some may come from a file (thus strings), and some from
|
43
|
+
a database or elsewhere (not strings), so for deduping make sure to make them all strings
|
44
|
+
*/
|
45
|
+
function makeStrings(o) {
|
46
|
+
return Object.entries(o).reduce((a, [k, v]) => {
|
47
|
+
a[k] = (typeof v === 'object') ? JSON.stringify(v) : String(v);
|
48
|
+
return a;
|
49
|
+
}, {});
|
50
|
+
}
|
51
|
+
|
52
|
+
async function list(_path) {
|
53
|
+
const directory = await unzipper.Open.file(_path);
|
54
|
+
|
55
|
+
return new Promise((resolve, reject) => {
|
56
|
+
directory.files[0]
|
57
|
+
.stream()
|
58
|
+
.pipe(fs.createWriteStream('firstFile'))
|
59
|
+
.on('error', reject)
|
60
|
+
.on('finish', resolve);
|
61
|
+
});
|
62
|
+
}
|
63
|
+
|
64
|
+
async function extract(_path, _file) {
|
65
|
+
const directory = await unzipper.Open(_path);
|
66
|
+
// return directory.files.map((f) => f.path);
|
67
|
+
const file = directory.files.find((d) => d.path === _file);
|
68
|
+
const tempFilename = await getTempFilename({ source: _file });
|
69
|
+
return new Promise((resolve, reject) => {
|
70
|
+
file
|
71
|
+
.stream()
|
72
|
+
.pipe(fs.createWriteStream(tempFilename))
|
73
|
+
.on('error', reject)
|
74
|
+
.on('finish', resolve);
|
75
|
+
});
|
76
|
+
}
|
77
|
+
|
78
|
+
function appendFiles(existingFiles, _newFiles, options) {
|
79
|
+
const newFiles = getStringArray(_newFiles);
|
80
|
+
if (newFiles.length === 0) return;
|
81
|
+
let { type, dateCreated } = options || {};
|
82
|
+
if (!type) type = 'unknown';
|
83
|
+
if (!dateCreated)dateCreated = new Date().toISOString();
|
84
|
+
let arr = newFiles;
|
85
|
+
if (!Array.isArray(newFiles)) arr = [arr];
|
86
|
+
|
87
|
+
arr.forEach((p) => {
|
88
|
+
const item = {
|
89
|
+
type,
|
90
|
+
originalFilename: '',
|
91
|
+
isNew: true,
|
92
|
+
dateCreated,
|
93
|
+
};
|
94
|
+
|
95
|
+
if (typeof p === 'string') {
|
96
|
+
item.originalFilename = path.resolve(process.cwd(), p);
|
97
|
+
} else {
|
98
|
+
item.originalFilename = path.resolve(process.cwd(), item.originalFilename);
|
99
|
+
}
|
100
|
+
|
101
|
+
const file = item.originalFilename.split(path.sep).pop();
|
102
|
+
item.path = `${type}/${file}`;
|
103
|
+
const existingFile = existingFiles.find((f) => f.path === item.path);
|
104
|
+
if (existingFile) throw new Error('Error adding files, duplicate path found for path:', +item.path);
|
105
|
+
existingFiles.push(item);
|
106
|
+
});
|
107
|
+
}
|
108
|
+
|
109
|
+
async function create(options) {
|
110
|
+
const {
|
111
|
+
accountId = 'engine9',
|
112
|
+
pluginId = '',
|
113
|
+
target = '', // target filename, creates one if not specified
|
114
|
+
messageFiles = [], // file with contents of message, used for delivery
|
115
|
+
personFiles = [], // files with data on people
|
116
|
+
timelineFiles = [], // activity entry
|
117
|
+
statisticsFiles = [], // files with aggregate statistics
|
118
|
+
} = options;
|
119
|
+
if (options.peopleFiles) throw new Error('Unknown option: peopleFiles, did you mean personFiles?');
|
120
|
+
|
121
|
+
const files = [];
|
122
|
+
const dateCreated = new Date().toISOString();
|
123
|
+
appendFiles(files, messageFiles, { type: 'message', dateCreated });
|
124
|
+
appendFiles(files, personFiles, { type: 'person', dateCreated });
|
125
|
+
appendFiles(files, timelineFiles, { type: 'timeline', dateCreated });
|
126
|
+
appendFiles(files, statisticsFiles, { type: 'statistics', dateCreated });
|
127
|
+
|
128
|
+
const zipFilename = target || await getTempFilename({ postfix: '.packet.zip' });
|
129
|
+
|
130
|
+
const manifest = {
|
131
|
+
accountId,
|
132
|
+
source: {
|
133
|
+
pluginId,
|
134
|
+
},
|
135
|
+
dateCreated,
|
136
|
+
files,
|
137
|
+
};
|
138
|
+
|
139
|
+
// create a file to stream archive data to.
|
140
|
+
const output = fs.createWriteStream(zipFilename);
|
141
|
+
const archive = archiver('zip', {
|
142
|
+
zlib: { level: 9 }, // Sets the compression level.
|
143
|
+
});
|
144
|
+
return new Promise((resolve, reject) => {
|
145
|
+
debug(`Setting up write stream to ${zipFilename}`);
|
146
|
+
// listen for all archive data to be written
|
147
|
+
// 'close' event is fired only when a file descriptor is involved
|
148
|
+
output.on('close', () => {
|
149
|
+
debug('archiver has been finalized and the output file descriptor has closed, calling success');
|
150
|
+
debug(zipFilename);
|
151
|
+
return resolve({
|
152
|
+
filename: zipFilename,
|
153
|
+
bytes: archive.pointer(),
|
154
|
+
});
|
155
|
+
});
|
156
|
+
|
157
|
+
// This event is fired when the data source is drained no matter what was the data source.
|
158
|
+
// It is not part of this library but rather from the NodeJS Stream API.
|
159
|
+
// @see: https://nodejs.org/api/stream.html#stream_event_end
|
160
|
+
output.on('end', () => {
|
161
|
+
// debug('end event -- Data has been drained');
|
162
|
+
});
|
163
|
+
|
164
|
+
// warnings could be file not founds, etc, but we error even on those
|
165
|
+
archive.on('warning', (err) => {
|
166
|
+
reject(err);
|
167
|
+
});
|
168
|
+
|
169
|
+
// good practice to catch this error explicitly
|
170
|
+
archive.on('error', (err) => {
|
171
|
+
reject(err);
|
172
|
+
});
|
173
|
+
|
174
|
+
archive.pipe(output);
|
175
|
+
|
176
|
+
files.forEach(({ path: name, originalFilename }) => archive.file(originalFilename, { name }));
|
177
|
+
files.forEach((f) => {
|
178
|
+
delete f.originalFilename;
|
179
|
+
delete f.isNew;
|
180
|
+
});
|
181
|
+
|
182
|
+
archive.append(Buffer.from(JSON.stringify(manifest, null, 4), 'utf8'), { name: 'manifest.json' });
|
183
|
+
archive.finalize();
|
184
|
+
});
|
185
|
+
}
|
186
|
+
|
187
|
+
function intToByteArray(_v) {
|
188
|
+
// we want to represent the input as a 8-bytes array
|
189
|
+
const byteArray = [0, 0, 0, 0, 0, 0, 0, 0];
|
190
|
+
let v = _v;
|
191
|
+
for (let index = 0; index < byteArray.length; index += 1) {
|
192
|
+
// eslint-disable-next-line no-bitwise
|
193
|
+
const byte = v & 0xff;
|
194
|
+
byteArray[index] = byte;
|
195
|
+
v = (v - byte) / 256;
|
196
|
+
}
|
197
|
+
|
198
|
+
return byteArray;
|
199
|
+
}
|
200
|
+
function getPluginUUID(uniqueNamespaceLikeDomainName, valueWithinNamespace) {
|
201
|
+
// Random custom namespace for plugins -- not secure, just a namespace:
|
202
|
+
return uuidv5(`${uniqueNamespaceLikeDomainName}::${valueWithinNamespace}`, 'f9e1024d-21ac-473c-bac6-64796dd771dd');
|
203
|
+
}
|
204
|
+
|
205
|
+
function getInputUUID(a, b) {
|
206
|
+
let pluginId = a;
|
207
|
+
let remoteInputId = b;
|
208
|
+
if (typeof a === 'object') {
|
209
|
+
pluginId = a.pluginId;
|
210
|
+
remoteInputId = a.remoteInputId;
|
211
|
+
}
|
212
|
+
|
213
|
+
if (!pluginId) throw new Error('getInputUUID: Cowardly rejecting a blank plugin_id');
|
214
|
+
if (!uuidIsValid(pluginId)) throw new Error(`Invalid pluginId:${pluginId}, should be a UUID`);
|
215
|
+
const rid = (remoteInputId || '').trim();
|
216
|
+
if (!rid) throw new Error('getInputUUID: Cowardly rejecting a blank remote_input_id, set a default');
|
217
|
+
// Random custom namespace for inputs -- not secure, just a namespace:
|
218
|
+
// 3d0e5d99-6ba9-4fab-9bb2-c32304d3df8e
|
219
|
+
return uuidv5(`${pluginId}:${rid}`, '3d0e5d99-6ba9-4fab-9bb2-c32304d3df8e');
|
220
|
+
}
|
221
|
+
|
222
|
+
function getUUIDv7(date, inputUuid) { /* optional date and input UUID */
|
223
|
+
const uuid = inputUuid || uuidv7();
|
224
|
+
const bytes = Buffer.from(uuid.replace(/-/g, ''), 'hex');
|
225
|
+
if (date !== undefined) {
|
226
|
+
const d = new Date(date);
|
227
|
+
// isNaN behaves differently than Number.isNaN -- we're actually going for the
|
228
|
+
// attempted conversion here
|
229
|
+
// eslint-disable-next-line no-restricted-globals
|
230
|
+
if (isNaN(d)) throw new Error(`getUUIDv7 got an invalid date:${date || '<blank>'}`);
|
231
|
+
const dateBytes = intToByteArray(d.getTime()).reverse();
|
232
|
+
dateBytes.slice(2, 8).forEach((b, i) => { bytes[i] = b; });
|
233
|
+
}
|
234
|
+
return uuidv4({ random: bytes });
|
235
|
+
}
|
236
|
+
/* Returns a date from a given uuid (assumed to be a v7, otherwise the results are ... weird */
|
237
|
+
function getUUIDTimestamp(uuid) {
|
238
|
+
const ts = parseInt((`${uuid}`).replace(/-/g, '').slice(0, 12), 16);
|
239
|
+
return new Date(ts);
|
240
|
+
}
|
241
|
+
|
242
|
+
const requiredTimelineEntryFields = ['ts', 'entry_type_id', 'input_id', 'person_id'];
|
243
|
+
|
244
|
+
function getTimelineEntryUUID(inputObject, { defaults = {} } = {}) {
|
245
|
+
const o = { ...defaults, ...inputObject };
|
246
|
+
/*
|
247
|
+
Outside systems CAN specify a unique UUID as remote_entry_uuid,
|
248
|
+
which will be used for updates, etc.
|
249
|
+
If not, it will be generated using whatever info we have
|
250
|
+
*/
|
251
|
+
if (o.remote_entry_uuid) {
|
252
|
+
if (!uuidIsValid(o.remote_entry_uuid)) throw new Error('Invalid remote_entry_uuid, it must be a UUID');
|
253
|
+
return o.remote_entry_uuid;
|
254
|
+
}
|
255
|
+
/*
|
256
|
+
Outside systems CAN specify a unique remote_entry_id
|
257
|
+
If not, it will be generated using whatever info we have
|
258
|
+
*/
|
259
|
+
|
260
|
+
if (o.remote_entry_id) {
|
261
|
+
// get a temp ID
|
262
|
+
if (!o.input_id) throw new Error('Error generating timeline entry uuid -- remote_entry_id specified, but no input_id');
|
263
|
+
const uuid = uuidv5(o.remote_entry_id, o.input_id);
|
264
|
+
// Change out the ts to match the v7 sorting.
|
265
|
+
// But because outside specified remote_entry_uuid
|
266
|
+
// may not match this standard, uuid sorting isn't guaranteed
|
267
|
+
return getUUIDv7(o.ts, uuid);
|
268
|
+
}
|
269
|
+
|
270
|
+
const missing = requiredTimelineEntryFields
|
271
|
+
.filter((d) => o[d] === undefined);// 0 could be an entry type value
|
272
|
+
|
273
|
+
if (missing.length > 0) throw new Error(`Missing required fields to append an entry_id:${missing.join(',')}`);
|
274
|
+
const ts = new Date(o.ts);
|
275
|
+
// isNaN behaves differently than Number.isNaN -- we're actually going for the
|
276
|
+
// attempted conversion here
|
277
|
+
// eslint-disable-next-line no-restricted-globals
|
278
|
+
if (isNaN(ts)) throw new Error(`getTimelineEntryUUID got an invalid date:${o.ts || '<blank>'}`);
|
279
|
+
const idString = `${ts.toISOString()}-${o.person_id}-${o.entry_type_id}-${o.source_code_id || 0}`;
|
280
|
+
// get a temp ID
|
281
|
+
const uuid = uuidv5(idString, o.input_id);
|
282
|
+
// Change out the ts to match the v7 sorting.
|
283
|
+
// But because outside specified remote_entry_uuid
|
284
|
+
// may not match this standard, uuid sorting isn't guaranteed
|
285
|
+
return getUUIDv7(ts, uuid);
|
286
|
+
}
|
287
|
+
function getEntryTypeId(o, { defaults = {} } = {}) {
|
288
|
+
let id = o.entry_type_id || defaults.entry_type_id;
|
289
|
+
if (id) return id;
|
290
|
+
const etype = o.entry_type || defaults.entry_type;
|
291
|
+
if (!etype) {
|
292
|
+
throw new Error('No entry_type, nor entry_type_id specified, specify a defaultEntryType');
|
293
|
+
}
|
294
|
+
id = TIMELINE_ENTRY_TYPES[etype];
|
295
|
+
if (id === undefined) throw new Error(`Invalid entry_type: ${etype}`);
|
296
|
+
return id;
|
297
|
+
}
|
298
|
+
|
299
|
+
module.exports = {
|
300
|
+
bool,
|
301
|
+
create,
|
302
|
+
list,
|
303
|
+
extract,
|
304
|
+
streamPacket,
|
305
|
+
getBatchTransform,
|
306
|
+
getDebatchTransform,
|
307
|
+
getManifest,
|
308
|
+
getFile,
|
309
|
+
downloadFile,
|
310
|
+
getStringArray,
|
311
|
+
getTempFilename,
|
312
|
+
getTimelineEntryUUID,
|
313
|
+
getPacketFiles,
|
314
|
+
getPluginUUID,
|
315
|
+
getInputUUID,
|
316
|
+
getUUIDv7,
|
317
|
+
getUUIDTimestamp,
|
318
|
+
uuidIsValid,
|
319
|
+
uuidv4,
|
320
|
+
uuidv5,
|
321
|
+
uuidv7,
|
322
|
+
makeStrings,
|
323
|
+
ForEachEntry,
|
324
|
+
FileUtilities,
|
325
|
+
TIMELINE_ENTRY_TYPES,
|
326
|
+
getEntryTypeId,
|
327
|
+
};
|
package/package.json
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
{
|
2
|
+
"name": "@engine9-io/input-tools",
|
3
|
+
"version": "1.3.6",
|
4
|
+
"description": "Tools for dealing with Engine9 inputs",
|
5
|
+
"main": "index.js",
|
6
|
+
"scripts": {
|
7
|
+
"test-big-data": "export DEBUG=*;node --test --test-name-pattern=big-data",
|
8
|
+
"test": "export DEBUG=*;node --test --test-skip-pattern=big-data"
|
9
|
+
},
|
10
|
+
"author": "Engine9",
|
11
|
+
"license": "GPL-3.0-or-later",
|
12
|
+
"devDependencies": {
|
13
|
+
"eslint": "^8.57.0",
|
14
|
+
"eslint-config-airbnb-base": "^15.0.0",
|
15
|
+
"eslint-plugin-import": "^2.29.0",
|
16
|
+
"eslint-plugin-jsonc": "^2.15.1"
|
17
|
+
},
|
18
|
+
"dependencies": {
|
19
|
+
"@aws-sdk/client-s3": "^3.723.0",
|
20
|
+
"@dsnp/parquetjs": "^1.8.6",
|
21
|
+
"archiver": "^7.0.1",
|
22
|
+
"async-mutex": "^0.5.0",
|
23
|
+
"csv": "^6.3.11",
|
24
|
+
"debug": "^4.3.4",
|
25
|
+
"detect-file-encoding-and-language": "^2.4.0",
|
26
|
+
"googleapis": "^148.0.0",
|
27
|
+
"handlebars": "^4.7.8",
|
28
|
+
"json5": "^2.2.3",
|
29
|
+
"mime-type": "^5.0.3",
|
30
|
+
"mkdirp": "^3.0.1",
|
31
|
+
"parallel-transform": "^1.2.0",
|
32
|
+
"throttle-debounce": "^5.0.2",
|
33
|
+
"unzipper": "^0.12.1",
|
34
|
+
"uuid": "^11.1.0",
|
35
|
+
"yargs": "^17.7.2"
|
36
|
+
},
|
37
|
+
"directories": {
|
38
|
+
"test": "test"
|
39
|
+
},
|
40
|
+
"repository": {
|
41
|
+
"type": "git",
|
42
|
+
"url": "git+https://github.com/engine9-io/input-tools.git"
|
43
|
+
},
|
44
|
+
"keywords": [
|
45
|
+
"Engine9",
|
46
|
+
"CRM",
|
47
|
+
"CDP"
|
48
|
+
],
|
49
|
+
"bugs": {
|
50
|
+
"url": "https://github.com/engine9-io/input-tools/issues"
|
51
|
+
},
|
52
|
+
"homepage": "https://github.com/engine9-io/input-tools#readme"
|
53
|
+
}
|
package/test/cli.js
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
const argv = require('yargs/yargs')(process.argv.slice(2)).parse();
|
2
|
+
const methods = require('../index');
|
3
|
+
|
4
|
+
async function run() {
|
5
|
+
if (typeof methods[argv._[0]] !== 'function') throw new Error(`${argv._[0]} is not a function`);
|
6
|
+
const output = await methods[argv._[0]](argv);
|
7
|
+
// eslint-disable-next-line no-console
|
8
|
+
console.log(output);
|
9
|
+
}
|
10
|
+
run();
|
@@ -0,0 +1,62 @@
|
|
1
|
+
const {
|
2
|
+
describe, it,
|
3
|
+
} = require('node:test');
|
4
|
+
const assert = require('node:assert');
|
5
|
+
const debug = require('debug')('test:big-data');
|
6
|
+
const { setTimeout } = require('node:timers/promises');
|
7
|
+
const { v7: uuidv7 } = require('uuid');
|
8
|
+
|
9
|
+
const { ForEachEntry } = require('../../index');
|
10
|
+
|
11
|
+
describe('big-data message: forEachPerson', async () => {
|
12
|
+
it('message: forEachPerson should loop through 1000000 sample people', async () => {
|
13
|
+
const messageContent = [];
|
14
|
+
let counter = 0;
|
15
|
+
const forEach = new ForEachEntry();
|
16
|
+
|
17
|
+
const output = await forEach.process(
|
18
|
+
{
|
19
|
+
// packet: '../1000000_person_message.packet.zip',
|
20
|
+
filename: '../1000000_fake_people.csv',
|
21
|
+
batchSize: 10000,
|
22
|
+
concurrency: 1000,
|
23
|
+
bindings: {
|
24
|
+
timelineOutputStream: { path: 'output.timeline' },
|
25
|
+
message: { path: 'file', filename: '../1000000_person_message.packet/message/message.json5' },
|
26
|
+
handlebars: { path: 'handlebars' },
|
27
|
+
},
|
28
|
+
async transform({
|
29
|
+
batch,
|
30
|
+
message,
|
31
|
+
handlebars,
|
32
|
+
timelineOutputStream,
|
33
|
+
}) {
|
34
|
+
const id = uuidv7();
|
35
|
+
debug(`Processing batch of ${batch.length} - ${id}`);
|
36
|
+
if (!message?.content?.text) throw new Error(`Sample message has no content.text:${JSON.stringify(message)}`);
|
37
|
+
const template = handlebars.compile(message.content.text);
|
38
|
+
batch.forEach((person) => {
|
39
|
+
messageContent.push(template(person));
|
40
|
+
});
|
41
|
+
batch.forEach((p) => {
|
42
|
+
const o = {
|
43
|
+
person_id: p.person_id,
|
44
|
+
email: p.email,
|
45
|
+
entry_type: 'EMAIL_DELIVERED',
|
46
|
+
};
|
47
|
+
counter += 1;
|
48
|
+
if (counter % 10000 === 0) debug(`*** Processed ${counter} items, last person_id=${p.person_id}`, o);
|
49
|
+
timelineOutputStream.push(o);
|
50
|
+
});
|
51
|
+
// debug(`Processed batch of size ${batch.length}`);
|
52
|
+
await setTimeout(Math.random() * 3000);
|
53
|
+
debug(`Completed processing ${id}`);
|
54
|
+
},
|
55
|
+
},
|
56
|
+
);
|
57
|
+
debug(output);
|
58
|
+
|
59
|
+
assert.equal(counter, 1000000, `Expected to loop through 1000000 people, actual:${counter}`);
|
60
|
+
});
|
61
|
+
debug('Completed all tests');
|
62
|
+
});
|
@@ -0,0 +1,51 @@
|
|
1
|
+
const {
|
2
|
+
describe, it,
|
3
|
+
} = require('node:test');
|
4
|
+
const assert = require('node:assert');
|
5
|
+
const debug = require('debug')('test/forEach');
|
6
|
+
|
7
|
+
const { ForEachEntry } = require('../../index');
|
8
|
+
|
9
|
+
describe('Test Person Packet For Each', async () => {
|
10
|
+
it('forEachPerson Should loop through 1000 sample people', async () => {
|
11
|
+
let counter = 0;
|
12
|
+
const forEach = new ForEachEntry();
|
13
|
+
await forEach.process(
|
14
|
+
{
|
15
|
+
packet: 'test/sample/1000_message.packet.zip',
|
16
|
+
batchSize: 50,
|
17
|
+
bindings: {
|
18
|
+
timelineOutputStream: {
|
19
|
+
path: 'output.timeline',
|
20
|
+
options: {
|
21
|
+
entry_type: 'SAMPLE',
|
22
|
+
},
|
23
|
+
},
|
24
|
+
},
|
25
|
+
async transform(props) {
|
26
|
+
const {
|
27
|
+
batch,
|
28
|
+
timelineOutputStream,
|
29
|
+
} = props;
|
30
|
+
if (timelineOutputStream) {
|
31
|
+
batch.forEach((p) => {
|
32
|
+
timelineOutputStream.push(
|
33
|
+
{
|
34
|
+
// for testing we don't need real person_ids
|
35
|
+
person_id: p.person_id || Math.floor(Math.random() * 1000000),
|
36
|
+
email: p.email,
|
37
|
+
entry_type: 'EMAIL_DELIVERED',
|
38
|
+
},
|
39
|
+
);
|
40
|
+
});
|
41
|
+
} else {
|
42
|
+
throw new Error(`output.timeline did not put a timelineOutputStream into the bindings:${Object.keys(props)}`);
|
43
|
+
}
|
44
|
+
batch.forEach(() => { counter += 1; });
|
45
|
+
},
|
46
|
+
},
|
47
|
+
);
|
48
|
+
assert.equal(counter, 1000, `Expected to loop through 1000 people, actual:${counter}`);
|
49
|
+
});
|
50
|
+
debug('Completed tests');
|
51
|
+
});
|
@@ -0,0 +1,48 @@
|
|
1
|
+
const {
|
2
|
+
describe, it,
|
3
|
+
} = require('node:test');
|
4
|
+
const assert = require('node:assert');
|
5
|
+
const debug = require('debug')('message');
|
6
|
+
|
7
|
+
const { forEachPerson } = require('../../index');
|
8
|
+
|
9
|
+
describe('Test Person Packet Message For Each', async () => {
|
10
|
+
it('message: forEachPerson should loop through 1000 sample people', async () => {
|
11
|
+
const messageContent = [];
|
12
|
+
let counter = 0;
|
13
|
+
const results = await forEachPerson(
|
14
|
+
{
|
15
|
+
packet: 'test/sample/1000_message.packet.zip',
|
16
|
+
batchSize: 50,
|
17
|
+
bindings: {
|
18
|
+
timelineOutputStream: { type: 'packet.output.timeline' },
|
19
|
+
message: { type: 'packet.message' },
|
20
|
+
handlebars: { type: 'handlebars' },
|
21
|
+
},
|
22
|
+
async transform({
|
23
|
+
batch,
|
24
|
+
message,
|
25
|
+
handlebars,
|
26
|
+
timelineOutputStream,
|
27
|
+
}) {
|
28
|
+
const template = handlebars.compile(message.content.text);
|
29
|
+
batch.forEach((person) => {
|
30
|
+
messageContent.push(template(person));
|
31
|
+
});
|
32
|
+
batch.forEach(() => { counter += 1; });
|
33
|
+
batch.forEach((p) => {
|
34
|
+
timelineOutputStream.push(
|
35
|
+
{
|
36
|
+
person_id: p.person_id,
|
37
|
+
email: p.email,
|
38
|
+
entry_type: 'EMAIL_DELIVERED',
|
39
|
+
},
|
40
|
+
);
|
41
|
+
});
|
42
|
+
},
|
43
|
+
},
|
44
|
+
);
|
45
|
+
debug(results);
|
46
|
+
assert.equal(counter, 1000, `Expected to loop through 1000 people, actual:${counter}`);
|
47
|
+
});
|
48
|
+
});
|
@@ -0,0 +1,26 @@
|
|
1
|
+
const {
|
2
|
+
describe, it,
|
3
|
+
} = require('node:test');
|
4
|
+
const fs = require('node:fs');
|
5
|
+
const assert = require('node:assert');
|
6
|
+
|
7
|
+
const { getTimelineOutputStream } = require('../../index');
|
8
|
+
|
9
|
+
describe('TimelineOutputStream', async () => {
|
10
|
+
it('timeline: It should save items to a csv file', async () => {
|
11
|
+
const {
|
12
|
+
stream: timelineStream, promises,
|
13
|
+
files,
|
14
|
+
} = await getTimelineOutputStream({});
|
15
|
+
timelineStream.push({ foo: 'bar' });
|
16
|
+
// finish the input stream
|
17
|
+
timelineStream.push(null);
|
18
|
+
await promises[0];
|
19
|
+
const content = fs.readFileSync(files[0]).toString().split('\n').map((d) => d.trim())
|
20
|
+
.filter(Boolean);
|
21
|
+
|
22
|
+
const s = 'uuid,entry_type,person_id,reference_id';
|
23
|
+
assert.equal(content[0].slice(0, s.length), s, "Beginning of first line doesn't match expected timeline csv header");
|
24
|
+
assert.equal(content.length, 2, `There are ${content.length}, not 2 lines in the CSV file`);
|
25
|
+
});
|
26
|
+
});
|
@@ -0,0 +1,22 @@
|
|
1
|
+
const { describe, it } = require('node:test');
|
2
|
+
const assert = require('node:assert');
|
3
|
+
const debug = require('debug');
|
4
|
+
|
5
|
+
const { create, getManifest } = require('../../index');
|
6
|
+
|
7
|
+
describe('Test Person Packet Creator', async () => {
|
8
|
+
const pfile = './test/sample/message/5_fake_people.csv';
|
9
|
+
it(`should create a zip file from directory ${process.cwd()} with path ${pfile}`, async () => {
|
10
|
+
const out = await create({
|
11
|
+
personFiles: [pfile],
|
12
|
+
messageFiles: 'test/sample/message/message.json5',
|
13
|
+
});
|
14
|
+
debug('Successfully created:', out);
|
15
|
+
return out;
|
16
|
+
});
|
17
|
+
it('should retrieve a manifest', async () => {
|
18
|
+
const manifest = await getManifest({ packet: './test/sample/5_message.packet.zip' });
|
19
|
+
assert.equal(typeof manifest, 'object', 'Manifest is not an object');
|
20
|
+
assert.equal(manifest.accountId, 'engine9', 'Manifest does not have an accountId=engine9');
|
21
|
+
});
|
22
|
+
});
|
@@ -0,0 +1,48 @@
|
|
1
|
+
/* eslint-disable no-console */
|
2
|
+
const {
|
3
|
+
setTimeout,
|
4
|
+
} = require('node:timers/promises');
|
5
|
+
|
6
|
+
const { describe } = require('node:test');
|
7
|
+
// const assert = require('node:assert');
|
8
|
+
const { Readable } = require('node:stream');
|
9
|
+
const { createWriteStream } = require('node:fs');
|
10
|
+
const { pipeline } = require('node:stream/promises');
|
11
|
+
const ParallelStream = require('../ParallelStream');
|
12
|
+
const { getTempFilename } = require('../index');
|
13
|
+
|
14
|
+
describe('Should process items in parallel:', async () => {
|
15
|
+
const outputFile = await getTempFilename({});
|
16
|
+
const writeStream = createWriteStream(outputFile);
|
17
|
+
|
18
|
+
const CONCURRENCY = 500;
|
19
|
+
await pipeline(
|
20
|
+
Readable.from(
|
21
|
+
[...Array(1000)].map((v, i) => ({ i })),
|
22
|
+
),
|
23
|
+
|
24
|
+
new ParallelStream(
|
25
|
+
CONCURRENCY,
|
26
|
+
async (obj, enc, push, done) => {
|
27
|
+
let res;
|
28
|
+
|
29
|
+
try {
|
30
|
+
await setTimeout(Math.random() * 1000);
|
31
|
+
if (Math.random() > 0.7) throw new Error('Random error');
|
32
|
+
|
33
|
+
res = `${obj.id} is complete\n`;
|
34
|
+
} catch (err) {
|
35
|
+
await setTimeout(Math.random() * 2000);// longer timeouts for errors
|
36
|
+
res = `${obj.id} is error, ${err.name}\n`;
|
37
|
+
}
|
38
|
+
|
39
|
+
done(null, obj.id); // _onComplete actually
|
40
|
+
|
41
|
+
return res;
|
42
|
+
},
|
43
|
+
),
|
44
|
+
writeStream,
|
45
|
+
);
|
46
|
+
|
47
|
+
console.log('Wrote responses to ', outputFile);
|
48
|
+
});
|
Binary file
|
Binary file
|