@engine9-io/input-tools 1.9.11 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ForEachEntry.js +18 -45
- package/ValidatingReadable.js +3 -6
- package/buildSamplePackets.js +11 -16
- package/eslint.config.mjs +15 -11
- package/file/FileUtilities.js +29 -153
- package/file/GoogleDrive.js +32 -38
- package/file/Parquet.js +112 -124
- package/file/R2.js +27 -32
- package/file/S3.js +259 -293
- package/file/tools.js +33 -54
- package/index.js +59 -74
- package/package.json +2 -1
- package/test/cli.js +3 -4
- package/test/file.js +6 -7
- package/test/processing/bigDataMessage.js +8 -10
- package/test/processing/forEach.js +6 -8
- package/test/processing/forEachResume.js +6 -8
- package/test/processing/message.js +31 -39
- package/test/processing/zip.js +6 -7
- package/test/uuid.js +6 -11
- package/timelineTypes.js +2 -24
package/ForEachEntry.js
CHANGED
|
@@ -1,45 +1,35 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import nodestream from 'node:stream';
|
|
3
|
+
import promises from 'node:stream/promises';
|
|
4
|
+
import { throttle } from 'throttle-debounce';
|
|
5
|
+
import parallelTransform from 'parallel-transform';
|
|
6
|
+
import debug$0 from 'debug';
|
|
7
|
+
import { Mutex } from 'async-mutex';
|
|
8
|
+
import { stringify, parse } from 'csv';
|
|
9
|
+
import handlebars from 'handlebars';
|
|
10
|
+
import ValidatingReadable from './ValidatingReadable.js';
|
|
11
|
+
import FileUtilities from './file/FileUtilities.js';
|
|
12
|
+
import { getTempFilename, getBatchTransform, getFile, streamPacket } from './file/tools.js';
|
|
13
|
+
const { Transform, Writable } = nodestream;
|
|
14
|
+
const { pipeline } = promises;
|
|
15
|
+
const debug = debug$0('@engine9-io/input-tools');
|
|
10
16
|
const debugThrottle = throttle(1000, debug, { noLeading: false, noTrailing: false });
|
|
11
|
-
|
|
12
|
-
const { Mutex } = require('async-mutex');
|
|
13
|
-
|
|
14
|
-
const csv = require('csv');
|
|
15
|
-
|
|
16
|
-
const handlebars = require('handlebars');
|
|
17
|
-
const ValidatingReadable = require('./ValidatingReadable');
|
|
18
|
-
const FileUtilities = require('./file/FileUtilities');
|
|
19
|
-
|
|
20
|
-
const { getTempFilename, getBatchTransform, getFile, streamPacket } = require('./file/tools');
|
|
21
|
-
|
|
22
17
|
class ForEachEntry {
|
|
23
18
|
constructor({ accountId } = {}) {
|
|
24
19
|
this.fileUtilities = new FileUtilities({ accountId });
|
|
25
20
|
}
|
|
26
|
-
|
|
27
21
|
getOutputStream({ name, filename, postfix = '.timeline.csv', validatorFunction = () => true }) {
|
|
28
22
|
this.outputStreams = this.outputStreams || {};
|
|
29
23
|
if (this.outputStreams[name]?.items) return this.outputStreams[name].items;
|
|
30
|
-
|
|
31
24
|
this.outputStreams[name] = this.outputStreams[name] || {
|
|
32
25
|
mutex: new Mutex()
|
|
33
26
|
};
|
|
34
|
-
|
|
35
27
|
return this.outputStreams[name].mutex.runExclusive(async () => {
|
|
36
28
|
let f = filename || (await getTempFilename({ postfix }));
|
|
37
|
-
|
|
38
29
|
const fileInfo = {
|
|
39
30
|
filename: f,
|
|
40
31
|
records: 0
|
|
41
32
|
};
|
|
42
|
-
|
|
43
33
|
debug(`Output file requested ${name}, writing output to to: ${fileInfo.filename}`);
|
|
44
34
|
const outputStream = new ValidatingReadable(
|
|
45
35
|
{
|
|
@@ -47,9 +37,7 @@ class ForEachEntry {
|
|
|
47
37
|
},
|
|
48
38
|
validatorFunction
|
|
49
39
|
);
|
|
50
|
-
|
|
51
40
|
outputStream._read = () => {};
|
|
52
|
-
|
|
53
41
|
const writeStream = fs.createWriteStream(fileInfo.filename);
|
|
54
42
|
const finishWritingOutputPromise = new Promise((resolve, reject) => {
|
|
55
43
|
writeStream
|
|
@@ -60,13 +48,11 @@ class ForEachEntry {
|
|
|
60
48
|
reject(err);
|
|
61
49
|
});
|
|
62
50
|
});
|
|
63
|
-
|
|
64
51
|
this.outputStreams[name].items = {
|
|
65
52
|
stream: outputStream,
|
|
66
53
|
promises: [finishWritingOutputPromise],
|
|
67
54
|
files: [fileInfo]
|
|
68
55
|
};
|
|
69
|
-
|
|
70
56
|
outputStream
|
|
71
57
|
.pipe(
|
|
72
58
|
new Transform({
|
|
@@ -77,13 +63,11 @@ class ForEachEntry {
|
|
|
77
63
|
}
|
|
78
64
|
})
|
|
79
65
|
)
|
|
80
|
-
.pipe(
|
|
66
|
+
.pipe(stringify({ header: true }))
|
|
81
67
|
.pipe(writeStream);
|
|
82
|
-
|
|
83
68
|
return this.outputStreams[name].items;
|
|
84
69
|
});
|
|
85
70
|
}
|
|
86
|
-
|
|
87
71
|
async process({
|
|
88
72
|
packet,
|
|
89
73
|
filename,
|
|
@@ -94,7 +78,6 @@ class ForEachEntry {
|
|
|
94
78
|
bindings = {}
|
|
95
79
|
}) {
|
|
96
80
|
let inStream = null;
|
|
97
|
-
|
|
98
81
|
if (filename) {
|
|
99
82
|
debug(`Processing file ${filename}`);
|
|
100
83
|
inStream = (await this.fileUtilities.stream({ filename })).stream;
|
|
@@ -104,7 +87,6 @@ class ForEachEntry {
|
|
|
104
87
|
}
|
|
105
88
|
if (typeof userTransform !== 'function') throw new Error('async transform function is required');
|
|
106
89
|
if (userTransform.length > 1) throw new Error('transform should be an async function that accepts one argument');
|
|
107
|
-
|
|
108
90
|
let progressThrottle = () => {};
|
|
109
91
|
if (typeof progress === 'function') {
|
|
110
92
|
const startTime = new Date().getTime();
|
|
@@ -120,21 +102,15 @@ class ForEachEntry {
|
|
|
120
102
|
{ noLeading: false, noTrailing: false }
|
|
121
103
|
);
|
|
122
104
|
}
|
|
123
|
-
|
|
124
105
|
let records = 0;
|
|
125
106
|
let batches = 0;
|
|
126
|
-
|
|
127
107
|
const outputFiles = {};
|
|
128
|
-
|
|
129
108
|
const transformArguments = {};
|
|
130
109
|
// An array of promises that must be completed, such as writing to disk
|
|
131
110
|
let bindingPromises = [];
|
|
132
|
-
|
|
133
111
|
// new Streams may be created, and they have to be completed when the file is completed
|
|
134
112
|
const newStreams = [];
|
|
135
|
-
|
|
136
113
|
const bindingNames = Object.keys(bindings);
|
|
137
|
-
|
|
138
114
|
await Promise.all(
|
|
139
115
|
bindingNames.map(async (bindingName) => {
|
|
140
116
|
const binding = bindings[bindingName];
|
|
@@ -184,7 +160,7 @@ class ForEachEntry {
|
|
|
184
160
|
);
|
|
185
161
|
await pipeline(
|
|
186
162
|
inStream,
|
|
187
|
-
|
|
163
|
+
parse({
|
|
188
164
|
relax: true,
|
|
189
165
|
skip_empty_lines: true,
|
|
190
166
|
max_limit_on_data_read: 10000000,
|
|
@@ -210,12 +186,9 @@ class ForEachEntry {
|
|
|
210
186
|
})
|
|
211
187
|
);
|
|
212
188
|
debug('Completed all batches');
|
|
213
|
-
|
|
214
189
|
newStreams.forEach((s) => s.push(null));
|
|
215
190
|
await Promise.all(bindingPromises);
|
|
216
|
-
|
|
217
191
|
return { outputFiles };
|
|
218
192
|
}
|
|
219
193
|
}
|
|
220
|
-
|
|
221
|
-
module.exports = ForEachEntry;
|
|
194
|
+
export default ForEachEntry;
|
package/ValidatingReadable.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import nodestream from 'node:stream';
|
|
2
|
+
const { Readable } = nodestream;
|
|
3
3
|
/*
|
|
4
4
|
A readable that will check data prior to it going into the stream
|
|
5
5
|
*/
|
|
@@ -8,9 +8,7 @@ class ValidatingReadable extends Readable {
|
|
|
8
8
|
super(options);
|
|
9
9
|
this.validator = validator || (() => true);
|
|
10
10
|
}
|
|
11
|
-
|
|
12
11
|
// _read() {super._read(size)}
|
|
13
|
-
|
|
14
12
|
push(chunk) {
|
|
15
13
|
try {
|
|
16
14
|
this.validator(chunk);
|
|
@@ -20,5 +18,4 @@ class ValidatingReadable extends Readable {
|
|
|
20
18
|
}
|
|
21
19
|
}
|
|
22
20
|
}
|
|
23
|
-
|
|
24
|
-
module.exports = ValidatingReadable;
|
|
21
|
+
export default ValidatingReadable;
|
package/buildSamplePackets.js
CHANGED
|
@@ -1,18 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import InputTools from './index.js';
|
|
3
2
|
(async () => {
|
|
4
|
-
await InputTools.create(
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
personFiles: ['./test/sample/message/1000_fake_people.csv'],
|
|
15
|
-
messageFiles: ['./test/sample/message/message.json5'],
|
|
16
|
-
},
|
|
17
|
-
);
|
|
3
|
+
await InputTools.create({
|
|
4
|
+
target: './test/sample/5_message.packet.zip',
|
|
5
|
+
personFiles: ['./test/sample/message/5_fake_people.csv'],
|
|
6
|
+
messageFiles: ['./test/sample/message/message.json5']
|
|
7
|
+
});
|
|
8
|
+
await InputTools.create({
|
|
9
|
+
target: './test/sample/1000_message.packet.zip',
|
|
10
|
+
personFiles: ['./test/sample/message/1000_fake_people.csv'],
|
|
11
|
+
messageFiles: ['./test/sample/message/message.json5']
|
|
12
|
+
});
|
|
18
13
|
})();
|
package/eslint.config.mjs
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
import js from
|
|
2
|
-
import globals from
|
|
3
|
-
import { defineConfig } from
|
|
4
|
-
|
|
1
|
+
import js from '@eslint/js';
|
|
2
|
+
import globals from 'globals';
|
|
3
|
+
import { defineConfig } from 'eslint/config';
|
|
5
4
|
export default defineConfig([
|
|
6
|
-
{
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
5
|
+
{
|
|
6
|
+
files: ['**/*.{js,mjs,cjs}'],
|
|
7
|
+
plugins: { js },
|
|
8
|
+
extends: ['js/recommended'],
|
|
9
|
+
languageOptions: {
|
|
10
|
+
globals: {
|
|
11
|
+
...globals.node // This includes 'process' and other Node.js globals
|
|
12
|
+
// globals.browser
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
{ files: ['**/*.js'], languageOptions: { sourceType: 'module' } }
|
|
13
17
|
]);
|