@engine9-io/input-tools 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.eslintignore ADDED
@@ -0,0 +1,5 @@
1
+ node_modules
2
+ **/node_modules
3
+ ./node_modules/*
4
+ ./node_modules/**
5
+ *.schema
package/.eslintrc.js ADDED
@@ -0,0 +1,36 @@
1
+ module.exports = {
2
+ env: {
3
+ browser: true,
4
+ commonjs: true,
5
+ es2021: true,
6
+ },
7
+ extends: [
8
+ 'airbnb-base',
9
+ 'plugin:jsonc/base',
10
+ 'plugin:jsonc/recommended-with-json5'
11
+ ],
12
+ overrides: [
13
+ {
14
+ env: {
15
+ node: true,
16
+ },
17
+ files: [
18
+ '.eslintrc.{js,cjs}',
19
+ ],
20
+ parserOptions: {
21
+ sourceType: 'script',
22
+ },
23
+ plugins: [
24
+ 'json5',
25
+ ],
26
+ },
27
+ ],
28
+ parserOptions: {
29
+ ecmaVersion: 'latest',
30
+ },
31
+
32
+ rules: {
33
+ 'func-names': 'off', // Anonymous functions have their useful cases
34
+ 'no-param-reassign': [2, { props: false }], // We often assign props of an object in a function, and that's generally safe.
35
+ },
36
+ };
@@ -0,0 +1,158 @@
1
+ const fs = require('node:fs');
2
+
3
+ const { Writable } = require('node:stream');
4
+ const { pipeline } = require('node:stream/promises');
5
+ const { throttle } = require('throttle-debounce');
6
+ const parallelTransform = require('parallel-transform');
7
+
8
+ const debug = require('debug')('packet-tools');
9
+
10
+ const debugThrottle = throttle(1000, debug, { noLeading: false, noTrailing: false });
11
+
12
+ const { Mutex } = require('async-mutex');
13
+
14
+ const csv = require('csv');
15
+
16
+ const handlebars = require('handlebars');
17
+ const ValidatingReadable = require('./ValidatingReadable');
18
+ const FileUtilities = require('./file/FileUtilities');
19
+
20
+ const {
21
+ getTempFilename, getBatchTransform, getFile, streamPacket,
22
+ } = require('./file/tools');
23
+
24
+ class ForEachEntry {
25
+ constructor({ accountId } = {}) {
26
+ this.timelineOutputMutex = new Mutex();
27
+ this.fileUtilities = new FileUtilities({ accountId });
28
+ }
29
+
30
+ getTimelineOutputStream() {
31
+ return this.timelineOutputMutex.runExclusive(async () => {
32
+ if (this.outputStream) return this.outputStream;
33
+ const timelineFile = await getTempFilename({ postfix: '.csv' });
34
+ debug(`Timeline output requested, writing timeline file to: ${timelineFile}`);
35
+ const timelineOutputStream = new ValidatingReadable({
36
+ objectMode: true,
37
+ }, (data) => {
38
+ if (!data) return true;
39
+ if (typeof data !== 'object') throw new Error('Invalid timeline data push, must be an object');
40
+ // Is this necessary?
41
+ if (!data.person_id) throw new Error('Invalid timeline data push, must have a person_id, even if 0');
42
+ if (!data.ts) data.ts = new Date().toISOString();
43
+ return true;
44
+ });
45
+ // eslint-disable-next-line no-underscore-dangle
46
+ timelineOutputStream._read = () => {};
47
+
48
+ const writeStream = fs.createWriteStream(timelineFile);
49
+ const finishWritingTimelinePromise = new Promise((resolve, reject) => {
50
+ writeStream.on('finish', () => {
51
+ resolve();
52
+ }).on('error', (err) => {
53
+ reject(err);
54
+ });
55
+ });
56
+
57
+ timelineOutputStream
58
+ .pipe(csv.stringify({ header: true }))
59
+ .pipe(writeStream);
60
+
61
+ this.outputStream = {
62
+ stream: timelineOutputStream,
63
+ promises: [finishWritingTimelinePromise],
64
+ files: [timelineFile],
65
+ };
66
+ return this.outputStream;
67
+ });
68
+ }
69
+
70
+ async process({
71
+ packet,
72
+ filename,
73
+ transform: userTransform,
74
+ batchSize = 500,
75
+ concurrency = 10,
76
+ bindings = {},
77
+ }) {
78
+ let inStream = null;
79
+
80
+ if (filename) {
81
+ debug(`Processing file ${filename}`);
82
+ inStream = (await this.fileUtilities.stream({ filename })).stream;
83
+ } else if (packet) {
84
+ debug(`Processing person file from packet ${packet}`);
85
+ inStream = (await streamPacket({ packet, type: 'person' })).stream;
86
+ }
87
+ if (typeof userTransform !== 'function') throw new Error('async transform function is required');
88
+ if (userTransform.length > 1) throw new Error('transform should be an async function that accepts one argument');
89
+
90
+ let records = 0;
91
+ let batches = 0;
92
+
93
+ let timelineFiles = [];
94
+
95
+ const transformArguments = {};
96
+ // An array of promises that must be completed, such as writing to disk
97
+ let bindingPromises = [];
98
+
99
+ // new Streams may be created, and they have to be completed when the file is completed
100
+ const newStreams = [];
101
+
102
+ const bindingNames = Object.keys(bindings);
103
+ // eslint-disable-next-line no-await-in-loop
104
+ await Promise.all(bindingNames.map(async (bindingName) => {
105
+ const binding = bindings[bindingName];
106
+ if (!binding.path) throw new Error(`Invalid binding: path is required for binding ${bindingName}`);
107
+ if (binding.path === 'output.timeline') {
108
+ const { stream: streamImpl, promises, files } = await this.getTimelineOutputStream({});
109
+ newStreams.push(streamImpl);
110
+ transformArguments[bindingName] = streamImpl;
111
+ bindingPromises = bindingPromises.concat(promises || []);
112
+ timelineFiles = timelineFiles.concat(files);
113
+ } else if (binding.path === 'file') {
114
+ transformArguments[bindingName] = await getFile(binding);
115
+ } else if (binding.path === 'handlebars') {
116
+ transformArguments[bindingName] = handlebars;
117
+ } else {
118
+ throw new Error(`Unsupported binding path for binding ${bindingName}: ${binding.path}`);
119
+ }
120
+ }));
121
+ await pipeline(
122
+ inStream,
123
+ csv.parse({
124
+ relax: true,
125
+ skip_empty_lines: true,
126
+ max_limit_on_data_read: 10000000,
127
+ columns: true,
128
+ }),
129
+ getBatchTransform({ batchSize }).transform,
130
+ parallelTransform(
131
+ concurrency,
132
+ (batch, cb) => {
133
+ userTransform({ ...transformArguments, batch })
134
+ .then((d) => cb(null, d))
135
+ .catch(cb);
136
+ },
137
+
138
+ ),
139
+ new Writable({
140
+ objectMode: true,
141
+ write(batch, enc, cb) {
142
+ batches += 1;
143
+ records += batch?.length || 0;
144
+
145
+ debugThrottle(`Processed ${batches} batches for a total of ${records} records`);
146
+ cb();
147
+ },
148
+ }),
149
+ );
150
+ debug('Completed all batches');
151
+
152
+ newStreams.forEach((s) => s.push(null));
153
+
154
+ return { timelineFiles };
155
+ }
156
+ }
157
+
158
+ module.exports = ForEachEntry;