@engine9-io/input-tools 1.9.11 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ForEachEntry.js +18 -43
- package/ValidatingReadable.js +3 -6
- package/buildSamplePackets.js +11 -16
- package/eslint.config.mjs +15 -11
- package/file/FileUtilities.js +976 -1048
- package/file/GoogleDrive.js +32 -38
- package/file/Parquet.js +112 -124
- package/file/R2.js +27 -32
- package/file/S3.js +259 -293
- package/file/tools.js +334 -326
- package/index.js +60 -75
- package/package.json +2 -1
- package/test/cli.js +3 -4
- package/test/file.js +6 -7
- package/test/processing/bigDataMessage.js +8 -10
- package/test/processing/forEach.js +6 -8
- package/test/processing/forEachResume.js +6 -8
- package/test/processing/message.js +31 -39
- package/test/processing/zip.js +6 -7
- package/test/uuid.js +6 -11
- package/timelineTypes.js +2 -24
package/ForEachEntry.js
CHANGED
|
@@ -1,45 +1,37 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import nodestream from 'node:stream';
|
|
3
|
+
import promises from 'node:stream/promises';
|
|
4
|
+
import throttleDebounce from 'throttle-debounce';
|
|
5
|
+
import parallelTransform from 'parallel-transform';
|
|
6
|
+
import debug$0 from 'debug';
|
|
7
|
+
import asyncMutex from 'async-mutex';
|
|
8
|
+
import csv from 'csv';
|
|
9
|
+
import handlebars from 'handlebars';
|
|
10
|
+
import ValidatingReadable from './ValidatingReadable.js';
|
|
11
|
+
import FileUtilities from './file/FileUtilities.js';
|
|
12
|
+
import { getTempFilename, getBatchTransform, getFile, streamPacket } from './file/tools.js';
|
|
13
|
+
const { Transform, Writable } = nodestream;
|
|
14
|
+
const { pipeline } = promises;
|
|
15
|
+
const { throttle } = throttleDebounce;
|
|
16
|
+
const debug = debug$0('@engine9-io/input-tools');
|
|
10
17
|
const debugThrottle = throttle(1000, debug, { noLeading: false, noTrailing: false });
|
|
11
|
-
|
|
12
|
-
const { Mutex } = require('async-mutex');
|
|
13
|
-
|
|
14
|
-
const csv = require('csv');
|
|
15
|
-
|
|
16
|
-
const handlebars = require('handlebars');
|
|
17
|
-
const ValidatingReadable = require('./ValidatingReadable');
|
|
18
|
-
const FileUtilities = require('./file/FileUtilities');
|
|
19
|
-
|
|
20
|
-
const { getTempFilename, getBatchTransform, getFile, streamPacket } = require('./file/tools');
|
|
21
|
-
|
|
18
|
+
const { Mutex } = asyncMutex;
|
|
22
19
|
class ForEachEntry {
|
|
23
20
|
constructor({ accountId } = {}) {
|
|
24
21
|
this.fileUtilities = new FileUtilities({ accountId });
|
|
25
22
|
}
|
|
26
|
-
|
|
27
23
|
getOutputStream({ name, filename, postfix = '.timeline.csv', validatorFunction = () => true }) {
|
|
28
24
|
this.outputStreams = this.outputStreams || {};
|
|
29
25
|
if (this.outputStreams[name]?.items) return this.outputStreams[name].items;
|
|
30
|
-
|
|
31
26
|
this.outputStreams[name] = this.outputStreams[name] || {
|
|
32
27
|
mutex: new Mutex()
|
|
33
28
|
};
|
|
34
|
-
|
|
35
29
|
return this.outputStreams[name].mutex.runExclusive(async () => {
|
|
36
30
|
let f = filename || (await getTempFilename({ postfix }));
|
|
37
|
-
|
|
38
31
|
const fileInfo = {
|
|
39
32
|
filename: f,
|
|
40
33
|
records: 0
|
|
41
34
|
};
|
|
42
|
-
|
|
43
35
|
debug(`Output file requested ${name}, writing output to to: ${fileInfo.filename}`);
|
|
44
36
|
const outputStream = new ValidatingReadable(
|
|
45
37
|
{
|
|
@@ -47,9 +39,7 @@ class ForEachEntry {
|
|
|
47
39
|
},
|
|
48
40
|
validatorFunction
|
|
49
41
|
);
|
|
50
|
-
|
|
51
42
|
outputStream._read = () => {};
|
|
52
|
-
|
|
53
43
|
const writeStream = fs.createWriteStream(fileInfo.filename);
|
|
54
44
|
const finishWritingOutputPromise = new Promise((resolve, reject) => {
|
|
55
45
|
writeStream
|
|
@@ -60,13 +50,11 @@ class ForEachEntry {
|
|
|
60
50
|
reject(err);
|
|
61
51
|
});
|
|
62
52
|
});
|
|
63
|
-
|
|
64
53
|
this.outputStreams[name].items = {
|
|
65
54
|
stream: outputStream,
|
|
66
55
|
promises: [finishWritingOutputPromise],
|
|
67
56
|
files: [fileInfo]
|
|
68
57
|
};
|
|
69
|
-
|
|
70
58
|
outputStream
|
|
71
59
|
.pipe(
|
|
72
60
|
new Transform({
|
|
@@ -79,11 +67,9 @@ class ForEachEntry {
|
|
|
79
67
|
)
|
|
80
68
|
.pipe(csv.stringify({ header: true }))
|
|
81
69
|
.pipe(writeStream);
|
|
82
|
-
|
|
83
70
|
return this.outputStreams[name].items;
|
|
84
71
|
});
|
|
85
72
|
}
|
|
86
|
-
|
|
87
73
|
async process({
|
|
88
74
|
packet,
|
|
89
75
|
filename,
|
|
@@ -94,7 +80,6 @@ class ForEachEntry {
|
|
|
94
80
|
bindings = {}
|
|
95
81
|
}) {
|
|
96
82
|
let inStream = null;
|
|
97
|
-
|
|
98
83
|
if (filename) {
|
|
99
84
|
debug(`Processing file ${filename}`);
|
|
100
85
|
inStream = (await this.fileUtilities.stream({ filename })).stream;
|
|
@@ -104,7 +89,6 @@ class ForEachEntry {
|
|
|
104
89
|
}
|
|
105
90
|
if (typeof userTransform !== 'function') throw new Error('async transform function is required');
|
|
106
91
|
if (userTransform.length > 1) throw new Error('transform should be an async function that accepts one argument');
|
|
107
|
-
|
|
108
92
|
let progressThrottle = () => {};
|
|
109
93
|
if (typeof progress === 'function') {
|
|
110
94
|
const startTime = new Date().getTime();
|
|
@@ -120,21 +104,15 @@ class ForEachEntry {
|
|
|
120
104
|
{ noLeading: false, noTrailing: false }
|
|
121
105
|
);
|
|
122
106
|
}
|
|
123
|
-
|
|
124
107
|
let records = 0;
|
|
125
108
|
let batches = 0;
|
|
126
|
-
|
|
127
109
|
const outputFiles = {};
|
|
128
|
-
|
|
129
110
|
const transformArguments = {};
|
|
130
111
|
// An array of promises that must be completed, such as writing to disk
|
|
131
112
|
let bindingPromises = [];
|
|
132
|
-
|
|
133
113
|
// new Streams may be created, and they have to be completed when the file is completed
|
|
134
114
|
const newStreams = [];
|
|
135
|
-
|
|
136
115
|
const bindingNames = Object.keys(bindings);
|
|
137
|
-
|
|
138
116
|
await Promise.all(
|
|
139
117
|
bindingNames.map(async (bindingName) => {
|
|
140
118
|
const binding = bindings[bindingName];
|
|
@@ -210,12 +188,9 @@ class ForEachEntry {
|
|
|
210
188
|
})
|
|
211
189
|
);
|
|
212
190
|
debug('Completed all batches');
|
|
213
|
-
|
|
214
191
|
newStreams.forEach((s) => s.push(null));
|
|
215
192
|
await Promise.all(bindingPromises);
|
|
216
|
-
|
|
217
193
|
return { outputFiles };
|
|
218
194
|
}
|
|
219
195
|
}
|
|
220
|
-
|
|
221
|
-
module.exports = ForEachEntry;
|
|
196
|
+
export default ForEachEntry;
|
package/ValidatingReadable.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import nodestream from 'node:stream';
|
|
2
|
+
const { Readable } = nodestream;
|
|
3
3
|
/*
|
|
4
4
|
A readable that will check data prior to it going into the stream
|
|
5
5
|
*/
|
|
@@ -8,9 +8,7 @@ class ValidatingReadable extends Readable {
|
|
|
8
8
|
super(options);
|
|
9
9
|
this.validator = validator || (() => true);
|
|
10
10
|
}
|
|
11
|
-
|
|
12
11
|
// _read() {super._read(size)}
|
|
13
|
-
|
|
14
12
|
push(chunk) {
|
|
15
13
|
try {
|
|
16
14
|
this.validator(chunk);
|
|
@@ -20,5 +18,4 @@ class ValidatingReadable extends Readable {
|
|
|
20
18
|
}
|
|
21
19
|
}
|
|
22
20
|
}
|
|
23
|
-
|
|
24
|
-
module.exports = ValidatingReadable;
|
|
21
|
+
export default ValidatingReadable;
|
package/buildSamplePackets.js
CHANGED
|
@@ -1,18 +1,13 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
1
|
+
import InputTools from './index.js';
|
|
3
2
|
(async () => {
|
|
4
|
-
await InputTools.create(
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
personFiles: ['./test/sample/message/1000_fake_people.csv'],
|
|
15
|
-
messageFiles: ['./test/sample/message/message.json5'],
|
|
16
|
-
},
|
|
17
|
-
);
|
|
3
|
+
await InputTools.create({
|
|
4
|
+
target: './test/sample/5_message.packet.zip',
|
|
5
|
+
personFiles: ['./test/sample/message/5_fake_people.csv'],
|
|
6
|
+
messageFiles: ['./test/sample/message/message.json5']
|
|
7
|
+
});
|
|
8
|
+
await InputTools.create({
|
|
9
|
+
target: './test/sample/1000_message.packet.zip',
|
|
10
|
+
personFiles: ['./test/sample/message/1000_fake_people.csv'],
|
|
11
|
+
messageFiles: ['./test/sample/message/message.json5']
|
|
12
|
+
});
|
|
18
13
|
})();
|
package/eslint.config.mjs
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
|
-
import js from
|
|
2
|
-
import globals from
|
|
3
|
-
import { defineConfig } from
|
|
4
|
-
|
|
1
|
+
import js from '@eslint/js';
|
|
2
|
+
import globals from 'globals';
|
|
3
|
+
import { defineConfig } from 'eslint/config';
|
|
5
4
|
export default defineConfig([
|
|
6
|
-
{
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
5
|
+
{
|
|
6
|
+
files: ['**/*.{js,mjs,cjs}'],
|
|
7
|
+
plugins: { js },
|
|
8
|
+
extends: ['js/recommended'],
|
|
9
|
+
languageOptions: {
|
|
10
|
+
globals: {
|
|
11
|
+
...globals.node // This includes 'process' and other Node.js globals
|
|
12
|
+
// globals.browser
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
{ files: ['**/*.js'], languageOptions: { sourceType: 'module' } }
|
|
13
17
|
]);
|