@engine9-io/input-tools 1.7.5 → 1.7.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ForEachEntry.js +1 -1
- package/eslint.config.mjs +13 -0
- package/file/FileUtilities.js +33 -11
- package/file/S3.js +11 -3
- package/file/tools.js +78 -6
- package/index.js +2 -67
- package/package.json +2 -5
- package/test/file.js +18 -0
- package/.eslintignore +0 -5
- package/.eslintrc.js +0 -36
package/ForEachEntry.js
CHANGED
|
@@ -172,7 +172,7 @@ class ForEachEntry {
|
|
|
172
172
|
batches += 1;
|
|
173
173
|
records += batch?.length || 0;
|
|
174
174
|
|
|
175
|
-
debugThrottle(`Processed ${batches} batches for a total of ${records} records`);
|
|
175
|
+
debugThrottle(`Processed ${batches} batches for a total of ${records} outbound records`);
|
|
176
176
|
cb();
|
|
177
177
|
},
|
|
178
178
|
}),
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import js from "@eslint/js";
|
|
2
|
+
import globals from "globals";
|
|
3
|
+
import { defineConfig } from "eslint/config";
|
|
4
|
+
|
|
5
|
+
export default defineConfig([
|
|
6
|
+
{ files: ["**/*.{js,mjs,cjs}"], plugins: { js }, extends: ["js/recommended"], languageOptions: {
|
|
7
|
+
globals: {
|
|
8
|
+
...globals.node, // This includes 'process' and other Node.js globals
|
|
9
|
+
// globals.browser
|
|
10
|
+
}
|
|
11
|
+
} },
|
|
12
|
+
{ files: ["**/*.js"], languageOptions: { sourceType: "commonjs" } },
|
|
13
|
+
]);
|
package/file/FileUtilities.js
CHANGED
|
@@ -20,7 +20,7 @@ const S3Worker = require('./S3');
|
|
|
20
20
|
const ParquetWorker = require('./Parquet');
|
|
21
21
|
|
|
22
22
|
const {
|
|
23
|
-
bool, getStringArray, getTempDir, makeStrings, streamPacket,
|
|
23
|
+
bool, getStringArray, getTempDir, makeStrings, streamPacket,relativeDate
|
|
24
24
|
} = require('./tools');
|
|
25
25
|
|
|
26
26
|
function Worker({ accountId }) { this.accountId = accountId; }
|
|
@@ -153,7 +153,7 @@ Worker.prototype.detectEncoding.metadata = {
|
|
|
153
153
|
Internal method to transform a file into a stream of objects.
|
|
154
154
|
*/
|
|
155
155
|
Worker.prototype.fileToObjectStream = async function (options) {
|
|
156
|
-
const { filename, columns, limit: limitOption } = options;
|
|
156
|
+
const { filename, columns, limit: limitOption,format:formatOverride } = options;
|
|
157
157
|
|
|
158
158
|
// handle stream item
|
|
159
159
|
if (options.stream) {
|
|
@@ -203,14 +203,15 @@ Worker.prototype.fileToObjectStream = async function (options) {
|
|
|
203
203
|
} else {
|
|
204
204
|
stream.setEncoding(encoding);
|
|
205
205
|
}
|
|
206
|
+
let format=formatOverride || postfix;
|
|
206
207
|
|
|
207
|
-
if (
|
|
208
|
+
if (format === 'csv') {
|
|
208
209
|
const csvTransforms = this.csvToObjectTransforms({ ...options });
|
|
209
210
|
transforms = transforms.concat(csvTransforms.transforms);
|
|
210
|
-
} else if (
|
|
211
|
+
} else if (format === 'txt') {
|
|
211
212
|
const csvTransforms = this.csvToObjectTransforms({ ...options, delimiter: '\t' });
|
|
212
213
|
transforms = transforms.concat(csvTransforms.transforms);
|
|
213
|
-
} else if (
|
|
214
|
+
} else if (format === 'jsonl') {
|
|
214
215
|
/* Type of JSON that has the names in an array in the first record,
|
|
215
216
|
and the values in JSON arrays thereafter
|
|
216
217
|
*/
|
|
@@ -609,17 +610,38 @@ Worker.prototype.json.metadata = {
|
|
|
609
610
|
},
|
|
610
611
|
};
|
|
611
612
|
|
|
612
|
-
Worker.prototype.list = async function ({ directory }) {
|
|
613
|
+
Worker.prototype.list = async function ({ directory, start:s, end:e }) {
|
|
613
614
|
if (!directory) throw new Error('directory is required');
|
|
615
|
+
let start=null;
|
|
616
|
+
let end=null;
|
|
617
|
+
if (s) start=relativeDate(s);
|
|
618
|
+
if (e) end=relativeDate(e);
|
|
619
|
+
|
|
614
620
|
if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
|
|
615
621
|
const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
|
616
|
-
return worker.list({ directory });
|
|
622
|
+
return worker.list({ directory, start, end });
|
|
617
623
|
}
|
|
618
624
|
const a = await fsp.readdir(directory, { withFileTypes: true });
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
625
|
+
|
|
626
|
+
const withModified=[];
|
|
627
|
+
for (const file of a) {
|
|
628
|
+
const fullPath = path.join(directory, file.name);
|
|
629
|
+
const stats = await fsp.stat(fullPath);
|
|
630
|
+
if (start && stats.mtime<start.getTime()){
|
|
631
|
+
//do not include
|
|
632
|
+
}else if (end && stats.mtime>end.getTime()){
|
|
633
|
+
//do nothing
|
|
634
|
+
}else{
|
|
635
|
+
withModified.push({
|
|
636
|
+
name:file.name,
|
|
637
|
+
type: file.isDirectory() ? 'directory' : 'file',
|
|
638
|
+
modifiedAt:new Date(stats.mtime).toISOString(),
|
|
639
|
+
});
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
return withModified;
|
|
644
|
+
|
|
623
645
|
};
|
|
624
646
|
Worker.prototype.list.metadata = {
|
|
625
647
|
options: {
|
package/file/S3.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
const debug = require('debug')('@engine9-io/input/S3');
|
|
2
2
|
const fs = require('node:fs');
|
|
3
|
-
// eslint-disable-next-line import/no-unresolved
|
|
4
3
|
const { mimeType: mime } = require('mime-type/with-db');
|
|
5
4
|
const {
|
|
6
5
|
S3Client,
|
|
@@ -187,7 +186,7 @@ Worker.prototype.write.metadata = {
|
|
|
187
186
|
},
|
|
188
187
|
};
|
|
189
188
|
|
|
190
|
-
Worker.prototype.list = async function ({ directory, raw }) {
|
|
189
|
+
Worker.prototype.list = async function ({ directory, start,end,raw }) {
|
|
191
190
|
if (!directory) throw new Error('directory is required');
|
|
192
191
|
let dir = directory;
|
|
193
192
|
while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
|
|
@@ -206,7 +205,16 @@ Worker.prototype.list = async function ({ directory, raw }) {
|
|
|
206
205
|
name: f.Prefix.slice(Prefix.length + 1, -1),
|
|
207
206
|
type: 'directory',
|
|
208
207
|
})))
|
|
209
|
-
.concat((files || [])
|
|
208
|
+
.concat((files || [])
|
|
209
|
+
.filter(({LastModified})=>{
|
|
210
|
+
if (start && new Date(LastModified)<start){
|
|
211
|
+
return false;
|
|
212
|
+
}else if (end && new Date(LastModified)>end){
|
|
213
|
+
return false;
|
|
214
|
+
}else{
|
|
215
|
+
return true;
|
|
216
|
+
}
|
|
217
|
+
}).map(({ Key, Size, LastModified }) => ({
|
|
210
218
|
name: Key.slice(Prefix.length + 1),
|
|
211
219
|
type: 'file',
|
|
212
220
|
size: Size,
|
package/file/tools.js
CHANGED
|
@@ -12,6 +12,15 @@ const { PassThrough } = require('node:stream');
|
|
|
12
12
|
const progress = require('debug')('info:@engine9/input-tools');
|
|
13
13
|
const unzipper = require('unzipper');
|
|
14
14
|
|
|
15
|
+
const dayjs = require('dayjs');
|
|
16
|
+
|
|
17
|
+
const {
|
|
18
|
+
S3Client,
|
|
19
|
+
HeadObjectCommand,
|
|
20
|
+
GetObjectCommand,
|
|
21
|
+
} = require('@aws-sdk/client-s3');
|
|
22
|
+
|
|
23
|
+
|
|
15
24
|
const {
|
|
16
25
|
v7: uuidv7,
|
|
17
26
|
} = require('uuid');
|
|
@@ -72,12 +81,6 @@ async function writeTempFile(options) {
|
|
|
72
81
|
return { filename };
|
|
73
82
|
}
|
|
74
83
|
|
|
75
|
-
const {
|
|
76
|
-
S3Client,
|
|
77
|
-
HeadObjectCommand,
|
|
78
|
-
GetObjectCommand,
|
|
79
|
-
} = require('@aws-sdk/client-s3');
|
|
80
|
-
|
|
81
84
|
async function getPacketFiles({ packet }) {
|
|
82
85
|
if (packet.indexOf('s3://') === 0) {
|
|
83
86
|
const parts = packet.split('/');
|
|
@@ -128,6 +131,7 @@ async function getPacketFiles({ packet }) {
|
|
|
128
131
|
return directory;
|
|
129
132
|
}
|
|
130
133
|
|
|
134
|
+
|
|
131
135
|
async function getManifest({ packet }) {
|
|
132
136
|
if (!packet) throw new Error('no packet option specififed');
|
|
133
137
|
const { files } = await getPacketFiles({ packet });
|
|
@@ -222,6 +226,12 @@ async function downloadFile({ packet, type = 'person' }) {
|
|
|
222
226
|
});
|
|
223
227
|
}
|
|
224
228
|
|
|
229
|
+
function isValidDate(d) {
|
|
230
|
+
// we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
|
|
231
|
+
// eslint-disable-next-line no-restricted-globals
|
|
232
|
+
return d instanceof Date && !isNaN(d);
|
|
233
|
+
}
|
|
234
|
+
|
|
225
235
|
function bool(x, _defaultVal) {
|
|
226
236
|
const defaultVal = (_defaultVal === undefined) ? false : _defaultVal;
|
|
227
237
|
if (x === undefined || x === null || x === '') return defaultVal;
|
|
@@ -240,6 +250,67 @@ function getStringArray(s, nonZeroLength) {
|
|
|
240
250
|
if (nonZeroLength && a.length === 0) a = [0];
|
|
241
251
|
return a;
|
|
242
252
|
}
|
|
253
|
+
function relativeDate(s, _initialDate) {
|
|
254
|
+
let initialDate = _initialDate;
|
|
255
|
+
if (!s || s === 'none') return null;
|
|
256
|
+
if (typeof s.getMonth === 'function') return s;
|
|
257
|
+
// We actually want a double equals here to test strings as well
|
|
258
|
+
// eslint-disable-next-line eqeqeq
|
|
259
|
+
if (parseInt(s, 10) == s) {
|
|
260
|
+
const r = new Date(parseInt(s, 10));
|
|
261
|
+
if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
|
|
262
|
+
return r;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (initialDate) {
|
|
266
|
+
initialDate = new Date(initialDate);
|
|
267
|
+
} else {
|
|
268
|
+
initialDate = new Date();
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
let r = s.match(/^([+-]{1})([0-9]+)([YyMwdhms]{1})([.a-z]*)$/);
|
|
272
|
+
|
|
273
|
+
if (r) {
|
|
274
|
+
let period = null;
|
|
275
|
+
switch (r[3]) {
|
|
276
|
+
case 'Y':
|
|
277
|
+
case 'y': period = 'years'; break;
|
|
278
|
+
|
|
279
|
+
case 'M': period = 'months'; break;
|
|
280
|
+
case 'w': period = 'weeks'; break;
|
|
281
|
+
case 'd': period = 'days'; break;
|
|
282
|
+
case 'h': period = 'hours'; break;
|
|
283
|
+
case 'm': period = 'minutes'; break;
|
|
284
|
+
case 's': period = 'seconds'; break;
|
|
285
|
+
default: period = 'minutes'; break;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
let d = dayjs(initialDate);
|
|
289
|
+
|
|
290
|
+
if (r[1] === '+') {
|
|
291
|
+
d = d.add(parseInt(r[2], 10), period);
|
|
292
|
+
} else {
|
|
293
|
+
d = d.subtract(parseInt(r[2], 10), period);
|
|
294
|
+
}
|
|
295
|
+
if (!isValidDate(d.toDate())) throw new Error(`Invalid date configuration:${r}`);
|
|
296
|
+
if (r[4]) {
|
|
297
|
+
const opts = r[4].split('.').filter(Boolean);
|
|
298
|
+
if (opts[0] === 'start') d = d.startOf(opts[1] || 'day');
|
|
299
|
+
else if (opts[0] === 'end') d = d.endOf(opts[1] || 'day');
|
|
300
|
+
else throw new Error(`Invalid relative date,unknown options:${r[4]}`);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return d.toDate();
|
|
304
|
+
}
|
|
305
|
+
if (s === 'now') {
|
|
306
|
+
r = dayjs(new Date()).toDate();
|
|
307
|
+
return r;
|
|
308
|
+
}
|
|
309
|
+
r = dayjs(new Date(s)).toDate();
|
|
310
|
+
if (!isValidDate(r)) throw new Error(`Invalid Date: ${s}`);
|
|
311
|
+
return r;
|
|
312
|
+
}
|
|
313
|
+
|
|
243
314
|
/*
|
|
244
315
|
When comparing two objects, some may come from a file (thus strings), and some from
|
|
245
316
|
a database or elsewhere (not strings), so for deduping make sure to make them all strings
|
|
@@ -263,6 +334,7 @@ module.exports = {
|
|
|
263
334
|
getPacketFiles,
|
|
264
335
|
getStringArray,
|
|
265
336
|
makeStrings,
|
|
337
|
+
relativeDate,
|
|
266
338
|
streamPacket,
|
|
267
339
|
writeTempFile,
|
|
268
340
|
};
|
package/index.js
CHANGED
|
@@ -21,6 +21,8 @@ const {
|
|
|
21
21
|
downloadFile,
|
|
22
22
|
getTempFilename,
|
|
23
23
|
getTempDir,
|
|
24
|
+
isValidDate,
|
|
25
|
+
relativeDate,
|
|
24
26
|
streamPacket,
|
|
25
27
|
getPacketFiles,
|
|
26
28
|
getBatchTransform,
|
|
@@ -54,73 +56,6 @@ handlebars.registerHelper('percent', (a, b) => `${((100 * a) / b).toFixed(2)}%`)
|
|
|
54
56
|
|
|
55
57
|
handlebars.registerHelper('or', (a, b, c) => a || b || c);
|
|
56
58
|
|
|
57
|
-
function isValidDate(d) {
|
|
58
|
-
// we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
|
|
59
|
-
// eslint-disable-next-line no-restricted-globals
|
|
60
|
-
return d instanceof Date && !isNaN(d);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
function relativeDate(s, _initialDate) {
|
|
64
|
-
let initialDate = _initialDate;
|
|
65
|
-
if (!s || s === 'none') return null;
|
|
66
|
-
if (typeof s.getMonth === 'function') return s;
|
|
67
|
-
// We actually want a double equals here to test strings as well
|
|
68
|
-
// eslint-disable-next-line eqeqeq
|
|
69
|
-
if (parseInt(s, 10) == s) {
|
|
70
|
-
const r = new Date(parseInt(s, 10));
|
|
71
|
-
if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
|
|
72
|
-
return r;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
if (initialDate) {
|
|
76
|
-
initialDate = new Date(initialDate);
|
|
77
|
-
} else {
|
|
78
|
-
initialDate = new Date();
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
let r = s.match(/^([+-]{1})([0-9]+)([YyMwdhms]{1})([.a-z]*)$/);
|
|
82
|
-
|
|
83
|
-
if (r) {
|
|
84
|
-
let period = null;
|
|
85
|
-
switch (r[3]) {
|
|
86
|
-
case 'Y':
|
|
87
|
-
case 'y': period = 'years'; break;
|
|
88
|
-
|
|
89
|
-
case 'M': period = 'months'; break;
|
|
90
|
-
case 'w': period = 'weeks'; break;
|
|
91
|
-
case 'd': period = 'days'; break;
|
|
92
|
-
case 'h': period = 'hours'; break;
|
|
93
|
-
case 'm': period = 'minutes'; break;
|
|
94
|
-
case 's': period = 'seconds'; break;
|
|
95
|
-
default: period = 'minutes'; break;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
let d = dayjs(initialDate);
|
|
99
|
-
|
|
100
|
-
if (r[1] === '+') {
|
|
101
|
-
d = d.add(parseInt(r[2], 10), period);
|
|
102
|
-
} else {
|
|
103
|
-
d = d.subtract(parseInt(r[2], 10), period);
|
|
104
|
-
}
|
|
105
|
-
if (!isValidDate(d.toDate())) throw new Error(`Invalid date configuration:${r}`);
|
|
106
|
-
if (r[4]) {
|
|
107
|
-
const opts = r[4].split('.').filter(Boolean);
|
|
108
|
-
if (opts[0] === 'start') d = d.startOf(opts[1] || 'day');
|
|
109
|
-
else if (opts[0] === 'end') d = d.endOf(opts[1] || 'day');
|
|
110
|
-
else throw new Error(`Invalid relative date,unknown options:${r[4]}`);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
return d.toDate();
|
|
114
|
-
}
|
|
115
|
-
if (s === 'now') {
|
|
116
|
-
r = dayjs(new Date()).toDate();
|
|
117
|
-
return r;
|
|
118
|
-
}
|
|
119
|
-
r = dayjs(new Date(s)).toDate();
|
|
120
|
-
if (!isValidDate(r)) throw new Error(`Invalid Date: ${s}`);
|
|
121
|
-
return r;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
59
|
async function list(_path) {
|
|
125
60
|
const directory = await unzipper.Open.file(_path);
|
|
126
61
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@engine9-io/input-tools",
|
|
3
|
-
"version": "1.7.
|
|
3
|
+
"version": "1.7.7",
|
|
4
4
|
"description": "Tools for dealing with Engine9 inputs",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -10,10 +10,7 @@
|
|
|
10
10
|
"author": "Engine9",
|
|
11
11
|
"license": "GPL-3.0-or-later",
|
|
12
12
|
"devDependencies": {
|
|
13
|
-
"eslint": "^
|
|
14
|
-
"eslint-config-airbnb-base": "^15.0.0",
|
|
15
|
-
"eslint-plugin-import": "^2.29.0",
|
|
16
|
-
"eslint-plugin-jsonc": "^2.15.1"
|
|
13
|
+
"eslint": "^9.33.0"
|
|
17
14
|
},
|
|
18
15
|
"dependencies": {
|
|
19
16
|
"@aws-sdk/client-s3": "^3.723.0",
|
package/test/file.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
const {
|
|
2
|
+
it,
|
|
3
|
+
} = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const debug = require('debug')('files');
|
|
6
|
+
|
|
7
|
+
const { FileUtilities } = require('../index');
|
|
8
|
+
|
|
9
|
+
it('Should list a directory', async () => {
|
|
10
|
+
const futil=new FileUtilities({accountId:'test'});
|
|
11
|
+
let files=await futil.list({directory:'.'});
|
|
12
|
+
assert(files.length,"Should have some files");
|
|
13
|
+
debug(files);
|
|
14
|
+
let startTest=await futil.list({directory:'.',start:'2040-01-01'});
|
|
15
|
+
assert(startTest.length===0,"Should NOT have any files before future start date");
|
|
16
|
+
let endTest=await futil.list({directory:'.',end:'1900-01-01'});
|
|
17
|
+
assert(endTest.length===0,"Should NOT have any files before past end date");
|
|
18
|
+
});
|
package/.eslintignore
DELETED
package/.eslintrc.js
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
module.exports = {
|
|
2
|
-
env: {
|
|
3
|
-
browser: true,
|
|
4
|
-
commonjs: true,
|
|
5
|
-
es2021: true,
|
|
6
|
-
},
|
|
7
|
-
extends: [
|
|
8
|
-
'airbnb-base',
|
|
9
|
-
'plugin:jsonc/base',
|
|
10
|
-
'plugin:jsonc/recommended-with-json5'
|
|
11
|
-
],
|
|
12
|
-
overrides: [
|
|
13
|
-
{
|
|
14
|
-
env: {
|
|
15
|
-
node: true,
|
|
16
|
-
},
|
|
17
|
-
files: [
|
|
18
|
-
'.eslintrc.{js,cjs}',
|
|
19
|
-
],
|
|
20
|
-
parserOptions: {
|
|
21
|
-
sourceType: 'script',
|
|
22
|
-
},
|
|
23
|
-
plugins: [
|
|
24
|
-
'json5',
|
|
25
|
-
],
|
|
26
|
-
},
|
|
27
|
-
],
|
|
28
|
-
parserOptions: {
|
|
29
|
-
ecmaVersion: 'latest',
|
|
30
|
-
},
|
|
31
|
-
|
|
32
|
-
rules: {
|
|
33
|
-
'func-names': 'off', // Anonymous functions have their useful cases
|
|
34
|
-
'no-param-reassign': [2, { props: false }], // We often assign props of an object in a function, and that's generally safe.
|
|
35
|
-
},
|
|
36
|
-
};
|