@engine9-io/input-tools 1.9.2 → 1.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ForEachEntry.js CHANGED
@@ -17,9 +17,7 @@ const handlebars = require('handlebars');
17
17
  const ValidatingReadable = require('./ValidatingReadable');
18
18
  const FileUtilities = require('./file/FileUtilities');
19
19
 
20
- const {
21
- getTempFilename, getBatchTransform, getFile, streamPacket,
22
- } = require('./file/tools');
20
+ const { getTempFilename, getBatchTransform, getFile, streamPacket } = require('./file/tools');
23
21
 
24
22
  class ForEachEntry {
25
23
  constructor({ accountId } = {}) {
@@ -31,45 +29,52 @@ class ForEachEntry {
31
29
  if (this.outputStreams[name]?.items) return this.outputStreams[name].items;
32
30
 
33
31
  this.outputStreams[name] = this.outputStreams[name] || {
34
- mutex: new Mutex(),
32
+ mutex: new Mutex()
35
33
  };
36
34
 
37
35
  return this.outputStreams[name].mutex.runExclusive(async () => {
38
36
  const fileInfo = {
39
37
  filename: await getTempFilename({ postfix }),
40
- records: 0,
38
+ records: 0
41
39
  };
42
40
 
43
41
  debug(`Output file requested, writing output to to: ${fileInfo.filename}`);
44
- const outputStream = new ValidatingReadable({
45
- objectMode: true,
46
- }, validatorFunction);
47
- // eslint-disable-next-line no-underscore-dangle
42
+ const outputStream = new ValidatingReadable(
43
+ {
44
+ objectMode: true
45
+ },
46
+ validatorFunction
47
+ );
48
+
48
49
  outputStream._read = () => {};
49
50
 
50
51
  const writeStream = fs.createWriteStream(fileInfo.filename);
51
52
  const finishWritingOutputPromise = new Promise((resolve, reject) => {
52
- writeStream.on('finish', () => {
53
- resolve();
54
- }).on('error', (err) => {
55
- reject(err);
56
- });
53
+ writeStream
54
+ .on('finish', () => {
55
+ resolve();
56
+ })
57
+ .on('error', (err) => {
58
+ reject(err);
59
+ });
57
60
  });
58
61
 
59
62
  this.outputStreams[name].items = {
60
63
  stream: outputStream,
61
64
  promises: [finishWritingOutputPromise],
62
- files: [fileInfo],
65
+ files: [fileInfo]
63
66
  };
64
67
 
65
68
  outputStream
66
- .pipe(new Transform({
67
- objectMode: true,
68
- transform(o, enc, cb) {
69
- fileInfo.records += 1;
70
- cb(null, o);
71
- },
72
- }))
69
+ .pipe(
70
+ new Transform({
71
+ objectMode: true,
72
+ transform(o, enc, cb) {
73
+ fileInfo.records += 1;
74
+ cb(null, o);
75
+ }
76
+ })
77
+ )
73
78
  .pipe(csv.stringify({ header: true }))
74
79
  .pipe(writeStream);
75
80
 
@@ -80,10 +85,11 @@ class ForEachEntry {
80
85
  async process({
81
86
  packet,
82
87
  filename,
88
+ progress,
83
89
  transform: userTransform,
84
90
  batchSize = 500,
85
91
  concurrency = 10,
86
- bindings = {},
92
+ bindings = {}
87
93
  }) {
88
94
  let inStream = null;
89
95
 
@@ -97,6 +103,22 @@ class ForEachEntry {
97
103
  if (typeof userTransform !== 'function') throw new Error('async transform function is required');
98
104
  if (userTransform.length > 1) throw new Error('transform should be an async function that accepts one argument');
99
105
 
106
+ let progressThrottle = () => {};
107
+ if (typeof progress === 'function') {
108
+ const startTime = new Date().getTime();
109
+ progressThrottle = throttle(
110
+ 2000,
111
+ function ({ records, batches }) {
112
+ let message = `Processed ${records} across ${batches} batches,${(
113
+ (records * 60 * 1000) /
114
+ (new Date().getTime() - startTime)
115
+ ).toFixed(1)} records/minute`;
116
+ progress({ records, message });
117
+ },
118
+ { noLeading: false, noTrailing: false }
119
+ );
120
+ }
121
+
100
122
  let records = 0;
101
123
  let batches = 0;
102
124
 
@@ -110,72 +132,80 @@ class ForEachEntry {
110
132
  const newStreams = [];
111
133
 
112
134
  const bindingNames = Object.keys(bindings);
113
- // eslint-disable-next-line no-await-in-loop
114
- await Promise.all(bindingNames.map(async (bindingName) => {
115
- const binding = bindings[bindingName];
116
- if (!binding.path) throw new Error(`Invalid binding: path is required for binding ${bindingName}`);
117
- if (binding.path === 'output.timeline') {
118
- const { stream: streamImpl, promises, files } = await this.getOutputStream({
119
- name: bindingName,
120
- postfix: binding.options?.postfix || '.timeline.csv',
121
- validatorFunction: (data) => {
122
- if (!data) return true;
123
- if (typeof data !== 'object') throw new Error('Invalid timeline data push, must be an object');
124
- // Is this necessary?
125
- if (!data.person_id) throw new Error('Invalid timeline data push, must have a person_id, even if 0');
126
- if (!data.ts) data.ts = new Date().toISOString();
127
- return true;
128
- },
129
- });
130
- newStreams.push(streamImpl);
131
- transformArguments[bindingName] = streamImpl;
132
- bindingPromises = bindingPromises.concat(promises || []);
133
- outputFiles[bindingName] = files;
134
- } else if (binding.path === 'output.stream') {
135
- const { stream: streamImpl, promises, files } = await this.getOutputStream({
136
- name: bindingName,
137
- postfix: binding.options?.postfix || '.timeline.csv',
138
- });
139
- newStreams.push(streamImpl);
140
- transformArguments[bindingName] = streamImpl;
141
- bindingPromises = bindingPromises.concat(promises || []);
142
- outputFiles[bindingName] = files;
143
- } else if (binding.path === 'file') {
144
- transformArguments[bindingName] = await getFile(binding);
145
- } else if (binding.path === 'handlebars') {
146
- transformArguments[bindingName] = handlebars;
147
- } else {
148
- throw new Error(`Unsupported binding path for binding ${bindingName}: ${binding.path}`);
149
- }
150
- }));
135
+
136
+ await Promise.all(
137
+ bindingNames.map(async (bindingName) => {
138
+ const binding = bindings[bindingName];
139
+ if (!binding.path) throw new Error(`Invalid binding: path is required for binding ${bindingName}`);
140
+ if (binding.path === 'output.timeline') {
141
+ const {
142
+ stream: streamImpl,
143
+ promises,
144
+ files
145
+ } = await this.getOutputStream({
146
+ name: bindingName,
147
+ postfix: binding.options?.postfix || '.timeline.csv',
148
+ validatorFunction: (data) => {
149
+ if (!data) return true;
150
+ if (typeof data !== 'object') throw new Error('Invalid timeline data push, must be an object');
151
+ // Is this necessary?
152
+ if (!data.person_id) throw new Error('Invalid timeline data push, must have a person_id, even if 0');
153
+ if (!data.ts) data.ts = new Date().toISOString();
154
+ return true;
155
+ }
156
+ });
157
+ newStreams.push(streamImpl);
158
+ transformArguments[bindingName] = streamImpl;
159
+ bindingPromises = bindingPromises.concat(promises || []);
160
+ outputFiles[bindingName] = files;
161
+ } else if (binding.path === 'output.stream') {
162
+ const {
163
+ stream: streamImpl,
164
+ promises,
165
+ files
166
+ } = await this.getOutputStream({
167
+ name: bindingName,
168
+ postfix: binding.options?.postfix || '.timeline.csv'
169
+ });
170
+ newStreams.push(streamImpl);
171
+ transformArguments[bindingName] = streamImpl;
172
+ bindingPromises = bindingPromises.concat(promises || []);
173
+ outputFiles[bindingName] = files;
174
+ } else if (binding.path === 'file') {
175
+ transformArguments[bindingName] = await getFile(binding);
176
+ } else if (binding.path === 'handlebars') {
177
+ transformArguments[bindingName] = handlebars;
178
+ } else {
179
+ throw new Error(`Unsupported binding path for binding ${bindingName}: ${binding.path}`);
180
+ }
181
+ })
182
+ );
151
183
  await pipeline(
152
184
  inStream,
153
185
  csv.parse({
154
186
  relax: true,
155
187
  skip_empty_lines: true,
156
188
  max_limit_on_data_read: 10000000,
157
- columns: true,
189
+ columns: true
158
190
  }),
159
191
  getBatchTransform({ batchSize }).transform,
160
- parallelTransform(
161
- concurrency,
162
- (batch, cb) => {
163
- userTransform({ ...transformArguments, batch })
164
- .then((d) => cb(null, d))
165
- .catch(cb);
166
- },
167
-
168
- ),
192
+ parallelTransform(concurrency, (batch, cb) => {
193
+ userTransform({ ...transformArguments, batch })
194
+ .then((d) => {
195
+ batches += 1;
196
+ records += batch?.length || 0;
197
+ progressThrottle({ records, batches });
198
+ debugThrottle(`Processed ${batches} batches for a total of ${records} outbound records`);
199
+ cb(null, d);
200
+ })
201
+ .catch(cb);
202
+ }),
169
203
  new Writable({
170
204
  objectMode: true,
171
205
  write(batch, enc, cb) {
172
- batches += 1;
173
- records += batch?.length || 0;
174
-
175
- debugThrottle(`Processed ${batches} batches for a total of ${records} outbound records`);
176
206
  cb();
177
- },
178
- }),
207
+ }
208
+ })
179
209
  );
180
210
  debug('Completed all batches');
181
211
 
@@ -890,7 +890,7 @@ Worker.prototype.stat = async function ({ filename }) {
890
890
 
891
891
  if (filename.slice(-8) === '.parquet') {
892
892
  const pq = new ParquetWorker(this);
893
- output.schema = await pq.schema({ filename });
893
+ output.schema = (await pq.schema({ filename }))?.schema;
894
894
  output.records = (await pq.meta({ filename }))?.records;
895
895
  }
896
896
 
package/file/tools.js CHANGED
@@ -351,6 +351,15 @@ function appendPostfix(filename, postfix) {
351
351
  return filenameParts.slice(0, -1).concat(targetFile).join('/');
352
352
  }
353
353
 
354
+ function parseJSON5(o, defaultVal) {
355
+ if (o) {
356
+ if (typeof o === 'object') return o;
357
+ if (typeof o === 'string') return JSON5.parse(o);
358
+ throw new Error(`Could not parse object:${o}`);
359
+ }
360
+ return defaultVal || o;
361
+ }
362
+
354
363
  module.exports = {
355
364
  appendPostfix,
356
365
  bool,
@@ -364,6 +373,7 @@ module.exports = {
364
373
  getPacketFiles,
365
374
  getStringArray,
366
375
  makeStrings,
376
+ parseJSON5,
367
377
  relativeDate,
368
378
  streamPacket,
369
379
  writeTempFile
package/index.js CHANGED
@@ -15,18 +15,19 @@ const FileUtilities = require('./file/FileUtilities');
15
15
  const {
16
16
  appendPostfix,
17
17
  bool,
18
- getManifest,
18
+ getBatchTransform,
19
+ getDebatchTransform,
19
20
  getFile,
21
+ getManifest,
22
+ getPacketFiles,
23
+ getStringArray,
20
24
  downloadFile,
21
25
  getTempFilename,
22
26
  getTempDir,
23
27
  isValidDate,
28
+ parseJSON5,
24
29
  relativeDate,
25
30
  streamPacket,
26
- getPacketFiles,
27
- getBatchTransform,
28
- getDebatchTransform,
29
- getStringArray,
30
31
  makeStrings,
31
32
  writeTempFile
32
33
  } = require('./file/tools');
@@ -319,6 +320,79 @@ function getEntryType(o, defaults = {}) {
319
320
  return etype;
320
321
  }
321
322
 
323
+ function getDateRangeArray(startDate, endDate) {
324
+ const start = new Date(startDate);
325
+ const end = new Date(endDate);
326
+ const result = [];
327
+ const msInDay = 24 * 60 * 60 * 1000;
328
+
329
+ function addDays(date, days) {
330
+ const d = new Date(date);
331
+ d.setDate(d.getDate() + days);
332
+ return d;
333
+ }
334
+ function addMonths(date, months) {
335
+ const d = new Date(date);
336
+ d.setMonth(d.getMonth() + months);
337
+ return d;
338
+ }
339
+ function addYears(date, years) {
340
+ const d = new Date(date);
341
+ d.setFullYear(d.getFullYear() + years);
342
+ return d;
343
+ }
344
+
345
+ const diffDays = Math.floor((end - start) / msInDay);
346
+ const diffMonths = (end.getFullYear() - start.getFullYear()) * 12 + (end.getMonth() - start.getMonth());
347
+ const diffYears = end.getFullYear() - start.getFullYear();
348
+
349
+ let current = new Date(start);
350
+
351
+ let stepFn;
352
+ if (diffDays < 10) {
353
+ stepFn = (date) => addDays(date, 1);
354
+ } else if (diffDays < 32) {
355
+ stepFn = (date) => addDays(date, 3);
356
+ } else if (diffMonths < 4) {
357
+ stepFn = (date) => addDays(date, 7);
358
+ } else if (diffYears < 2) {
359
+ stepFn = (date) => addMonths(date, 1);
360
+ } else if (diffYears < 4) {
361
+ stepFn = (date) => addMonths(date, 3);
362
+ } else {
363
+ stepFn = (date) => addYears(date, 1);
364
+ }
365
+
366
+ while (current <= end) {
367
+ result.push(new Date(current));
368
+ const next = stepFn(current);
369
+ if (next > end) break;
370
+ current = next;
371
+ }
372
+ // Ensure the last date is exactly the end date
373
+ if (result.length === 0 || result[result.length - 1].getTime() !== end.getTime()) {
374
+ result.push(new Date(end));
375
+ }
376
+ return result;
377
+ }
378
+
379
+ class ObjectError extends Error {
380
+ constructor(data) {
381
+ if (typeof data === 'string') {
382
+ // normal behavior
383
+ super(data);
384
+ } else if (typeof data === 'object') {
385
+ super(data.message);
386
+ Object.keys(data).forEach((k) => {
387
+ this[k] = data[k];
388
+ });
389
+ this.status = data.status;
390
+ } else {
391
+ super('(No error message)');
392
+ }
393
+ }
394
+ }
395
+
322
396
  module.exports = {
323
397
  appendPostfix,
324
398
  bool,
@@ -329,6 +403,7 @@ module.exports = {
329
403
  ForEachEntry,
330
404
  FileUtilities,
331
405
  getBatchTransform,
406
+ getDateRangeArray,
332
407
  getDebatchTransform,
333
408
  getEntryType,
334
409
  getEntryTypeId,
@@ -346,6 +421,8 @@ module.exports = {
346
421
  handlebars,
347
422
  isValidDate,
348
423
  makeStrings,
424
+ ObjectError,
425
+ parseJSON5,
349
426
  relativeDate,
350
427
  streamPacket,
351
428
  TIMELINE_ENTRY_TYPES,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@engine9-io/input-tools",
3
- "version": "1.9.2",
3
+ "version": "1.9.4",
4
4
  "description": "Tools for dealing with Engine9 inputs",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -1,10 +1,8 @@
1
- const {
2
- describe, it,
3
- } = require('node:test');
1
+ const { describe, it } = require('node:test');
4
2
  const assert = require('node:assert');
5
3
  const debug = require('debug')('test:big-data');
6
4
  const { setTimeout } = require('node:timers/promises');
7
- const { v7: uuidv7 } = require('uuid');
5
+ //const { v7: uuidv7 } = require('uuid');
8
6
 
9
7
  const { ForEachEntry } = require('../../index');
10
8
 
@@ -14,46 +12,40 @@ describe('big-data message: forEachPerson', async () => {
14
12
  let counter = 0;
15
13
  const forEach = new ForEachEntry();
16
14
 
17
- const output = await forEach.process(
18
- {
19
- // packet: '../1000000_person_message.packet.zip',
20
- filename: '../1000000_fake_people.csv',
21
- batchSize: 10000,
22
- concurrency: 1000,
23
- bindings: {
24
- timelineOutputStream: { path: 'output.timeline' },
25
- message: { path: 'file', filename: '../1000000_person_message.packet/message/message.json5' },
26
- handlebars: { path: 'handlebars' },
27
- },
28
- async transform({
29
- batch,
30
- message,
31
- handlebars,
32
- timelineOutputStream,
33
- }) {
34
- const id = uuidv7();
35
- debug(`Processing batch of ${batch.length} - ${id}`);
36
- if (!message?.content?.text) throw new Error(`Sample message has no content.text:${JSON.stringify(message)}`);
37
- const template = handlebars.compile(message.content.text);
38
- batch.forEach((person) => {
39
- messageContent.push(template(person));
40
- });
41
- batch.forEach((p) => {
42
- const o = {
43
- person_id: p.person_id,
44
- email: p.email,
45
- entry_type: 'EMAIL_DELIVERED',
46
- };
47
- counter += 1;
48
- if (counter % 10000 === 0) debug(`*** Processed ${counter} items, last person_id=${p.person_id}`, o);
49
- timelineOutputStream.push(o);
50
- });
51
- // debug(`Processed batch of size ${batch.length}`);
52
- await setTimeout(Math.random() * 3000);
53
- debug(`Completed processing ${id}`);
54
- },
15
+ const output = await forEach.process({
16
+ // packet: '../1000000_person_message.packet.zip',
17
+ filename: '../1000000_person_message.packet/person/1000000_fake_people.csv',
18
+ batchSize: 10000,
19
+ concurrency: 1000,
20
+ progress: debug,
21
+ bindings: {
22
+ timelineOutputStream: { path: 'output.timeline' },
23
+ message: { path: 'file', filename: '../1000000_person_message.packet/message/message.json5' },
24
+ handlebars: { path: 'handlebars' }
55
25
  },
56
- );
26
+ async transform({ batch, message, handlebars, timelineOutputStream }) {
27
+ //const id = uuidv7();
28
+ //debug(`Processing batch of ${batch.length} - ${id}`);
29
+ if (!message?.content?.text) throw new Error(`Sample message has no content.text:${JSON.stringify(message)}`);
30
+ const template = handlebars.compile(message.content.text);
31
+ batch.forEach((person) => {
32
+ messageContent.push(template(person));
33
+ });
34
+ batch.forEach((p) => {
35
+ const o = {
36
+ person_id: p.person_id,
37
+ email: p.email,
38
+ entry_type: 'EMAIL_DELIVERED'
39
+ };
40
+ counter += 1;
41
+ //if (counter % 10000 === 0) debug(`*** Processed ${counter} items, last person_id=${p.person_id}`, o);
42
+ timelineOutputStream.push(o);
43
+ });
44
+ // debug(`Processed batch of size ${batch.length}`);
45
+ await setTimeout(Math.random() * 3000);
46
+ //debug(`Completed processing ${id}`);
47
+ }
48
+ });
57
49
  debug(output);
58
50
 
59
51
  assert.equal(counter, 1000000, `Expected to loop through 1000000 people, actual:${counter}`);
package/timelineTypes.js CHANGED
@@ -31,6 +31,7 @@ const SMS_DELIVERED = 31;
31
31
  const SMS_CLICK = 33;
32
32
  const SMS_UNSUBSCRIBE = 34;
33
33
  const SMS_BOUNCE = 37;
34
+ const SMS_SPAM = 38;
34
35
  const SMS_REPLY = 39;
35
36
 
36
37
  const EMAIL_SEND = 40;
@@ -112,6 +113,7 @@ const TIMELINE_ENTRY_TYPES = {
112
113
  SMS_CLICK,
113
114
  SMS_UNSUBSCRIBE,
114
115
  SMS_BOUNCE,
116
+ SMS_SPAM,
115
117
  SMS_REPLY,
116
118
 
117
119
  // email interactions