@engine9-io/input-tools 1.7.5 → 1.7.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ForEachEntry.js +1 -1
- package/eslint.config.mjs +13 -0
- package/file/FileUtilities.js +28 -7
- package/file/S3.js +11 -3
- package/file/tools.js +78 -6
- package/index.js +2 -67
- package/package.json +2 -5
- package/test/file.js +18 -0
- package/.eslintignore +0 -5
- package/.eslintrc.js +0 -36
package/ForEachEntry.js
CHANGED
|
@@ -172,7 +172,7 @@ class ForEachEntry {
|
|
|
172
172
|
batches += 1;
|
|
173
173
|
records += batch?.length || 0;
|
|
174
174
|
|
|
175
|
-
debugThrottle(`Processed ${batches} batches for a total of ${records} records`);
|
|
175
|
+
debugThrottle(`Processed ${batches} batches for a total of ${records} outbound records`);
|
|
176
176
|
cb();
|
|
177
177
|
},
|
|
178
178
|
}),
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import js from "@eslint/js";
|
|
2
|
+
import globals from "globals";
|
|
3
|
+
import { defineConfig } from "eslint/config";
|
|
4
|
+
|
|
5
|
+
export default defineConfig([
|
|
6
|
+
{ files: ["**/*.{js,mjs,cjs}"], plugins: { js }, extends: ["js/recommended"], languageOptions: {
|
|
7
|
+
globals: {
|
|
8
|
+
...globals.node, // This includes 'process' and other Node.js globals
|
|
9
|
+
// globals.browser
|
|
10
|
+
}
|
|
11
|
+
} },
|
|
12
|
+
{ files: ["**/*.js"], languageOptions: { sourceType: "commonjs" } },
|
|
13
|
+
]);
|
package/file/FileUtilities.js
CHANGED
|
@@ -20,7 +20,7 @@ const S3Worker = require('./S3');
|
|
|
20
20
|
const ParquetWorker = require('./Parquet');
|
|
21
21
|
|
|
22
22
|
const {
|
|
23
|
-
bool, getStringArray, getTempDir, makeStrings, streamPacket,
|
|
23
|
+
bool, getStringArray, getTempDir, makeStrings, streamPacket,relativeDate
|
|
24
24
|
} = require('./tools');
|
|
25
25
|
|
|
26
26
|
function Worker({ accountId }) { this.accountId = accountId; }
|
|
@@ -609,17 +609,38 @@ Worker.prototype.json.metadata = {
|
|
|
609
609
|
},
|
|
610
610
|
};
|
|
611
611
|
|
|
612
|
-
Worker.prototype.list = async function ({ directory }) {
|
|
612
|
+
Worker.prototype.list = async function ({ directory, start:s, end:e }) {
|
|
613
613
|
if (!directory) throw new Error('directory is required');
|
|
614
|
+
let start=null;
|
|
615
|
+
let end=null;
|
|
616
|
+
if (s) start=relativeDate(s);
|
|
617
|
+
if (e) end=relativeDate(e);
|
|
618
|
+
|
|
614
619
|
if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
|
|
615
620
|
const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
|
616
|
-
return worker.list({ directory });
|
|
621
|
+
return worker.list({ directory, start, end });
|
|
617
622
|
}
|
|
618
623
|
const a = await fsp.readdir(directory, { withFileTypes: true });
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
624
|
+
|
|
625
|
+
const withModified=[];
|
|
626
|
+
for (const file of a) {
|
|
627
|
+
const fullPath = path.join(directory, file.name);
|
|
628
|
+
const stats = await fsp.stat(fullPath);
|
|
629
|
+
if (start && stats.mtime<start.getTime()){
|
|
630
|
+
//do not include
|
|
631
|
+
}else if (end && stats.mtime>end.getTime()){
|
|
632
|
+
//do nothing
|
|
633
|
+
}else{
|
|
634
|
+
withModified.push({
|
|
635
|
+
name:file.name,
|
|
636
|
+
type: file.isDirectory() ? 'directory' : 'file',
|
|
637
|
+
modifiedAt:new Date(stats.mtime).toISOString(),
|
|
638
|
+
});
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
return withModified;
|
|
643
|
+
|
|
623
644
|
};
|
|
624
645
|
Worker.prototype.list.metadata = {
|
|
625
646
|
options: {
|
package/file/S3.js
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
const debug = require('debug')('@engine9-io/input/S3');
|
|
2
2
|
const fs = require('node:fs');
|
|
3
|
-
// eslint-disable-next-line import/no-unresolved
|
|
4
3
|
const { mimeType: mime } = require('mime-type/with-db');
|
|
5
4
|
const {
|
|
6
5
|
S3Client,
|
|
@@ -187,7 +186,7 @@ Worker.prototype.write.metadata = {
|
|
|
187
186
|
},
|
|
188
187
|
};
|
|
189
188
|
|
|
190
|
-
Worker.prototype.list = async function ({ directory, raw }) {
|
|
189
|
+
Worker.prototype.list = async function ({ directory, start,end,raw }) {
|
|
191
190
|
if (!directory) throw new Error('directory is required');
|
|
192
191
|
let dir = directory;
|
|
193
192
|
while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
|
|
@@ -206,7 +205,16 @@ Worker.prototype.list = async function ({ directory, raw }) {
|
|
|
206
205
|
name: f.Prefix.slice(Prefix.length + 1, -1),
|
|
207
206
|
type: 'directory',
|
|
208
207
|
})))
|
|
209
|
-
.concat((files || [])
|
|
208
|
+
.concat((files || [])
|
|
209
|
+
.filter(({LastModified})=>{
|
|
210
|
+
if (start && new Date(LastModified)<start){
|
|
211
|
+
return false;
|
|
212
|
+
}else if (end && new Date(LastModified)>end){
|
|
213
|
+
return false;
|
|
214
|
+
}else{
|
|
215
|
+
return true;
|
|
216
|
+
}
|
|
217
|
+
}).map(({ Key, Size, LastModified }) => ({
|
|
210
218
|
name: Key.slice(Prefix.length + 1),
|
|
211
219
|
type: 'file',
|
|
212
220
|
size: Size,
|
package/file/tools.js
CHANGED
|
@@ -12,6 +12,15 @@ const { PassThrough } = require('node:stream');
|
|
|
12
12
|
const progress = require('debug')('info:@engine9/input-tools');
|
|
13
13
|
const unzipper = require('unzipper');
|
|
14
14
|
|
|
15
|
+
const dayjs = require('dayjs');
|
|
16
|
+
|
|
17
|
+
const {
|
|
18
|
+
S3Client,
|
|
19
|
+
HeadObjectCommand,
|
|
20
|
+
GetObjectCommand,
|
|
21
|
+
} = require('@aws-sdk/client-s3');
|
|
22
|
+
|
|
23
|
+
|
|
15
24
|
const {
|
|
16
25
|
v7: uuidv7,
|
|
17
26
|
} = require('uuid');
|
|
@@ -72,12 +81,6 @@ async function writeTempFile(options) {
|
|
|
72
81
|
return { filename };
|
|
73
82
|
}
|
|
74
83
|
|
|
75
|
-
const {
|
|
76
|
-
S3Client,
|
|
77
|
-
HeadObjectCommand,
|
|
78
|
-
GetObjectCommand,
|
|
79
|
-
} = require('@aws-sdk/client-s3');
|
|
80
|
-
|
|
81
84
|
async function getPacketFiles({ packet }) {
|
|
82
85
|
if (packet.indexOf('s3://') === 0) {
|
|
83
86
|
const parts = packet.split('/');
|
|
@@ -128,6 +131,7 @@ async function getPacketFiles({ packet }) {
|
|
|
128
131
|
return directory;
|
|
129
132
|
}
|
|
130
133
|
|
|
134
|
+
|
|
131
135
|
async function getManifest({ packet }) {
|
|
132
136
|
if (!packet) throw new Error('no packet option specififed');
|
|
133
137
|
const { files } = await getPacketFiles({ packet });
|
|
@@ -222,6 +226,12 @@ async function downloadFile({ packet, type = 'person' }) {
|
|
|
222
226
|
});
|
|
223
227
|
}
|
|
224
228
|
|
|
229
|
+
function isValidDate(d) {
|
|
230
|
+
// we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
|
|
231
|
+
// eslint-disable-next-line no-restricted-globals
|
|
232
|
+
return d instanceof Date && !isNaN(d);
|
|
233
|
+
}
|
|
234
|
+
|
|
225
235
|
function bool(x, _defaultVal) {
|
|
226
236
|
const defaultVal = (_defaultVal === undefined) ? false : _defaultVal;
|
|
227
237
|
if (x === undefined || x === null || x === '') return defaultVal;
|
|
@@ -240,6 +250,67 @@ function getStringArray(s, nonZeroLength) {
|
|
|
240
250
|
if (nonZeroLength && a.length === 0) a = [0];
|
|
241
251
|
return a;
|
|
242
252
|
}
|
|
253
|
+
function relativeDate(s, _initialDate) {
|
|
254
|
+
let initialDate = _initialDate;
|
|
255
|
+
if (!s || s === 'none') return null;
|
|
256
|
+
if (typeof s.getMonth === 'function') return s;
|
|
257
|
+
// We actually want a double equals here to test strings as well
|
|
258
|
+
// eslint-disable-next-line eqeqeq
|
|
259
|
+
if (parseInt(s, 10) == s) {
|
|
260
|
+
const r = new Date(parseInt(s, 10));
|
|
261
|
+
if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
|
|
262
|
+
return r;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (initialDate) {
|
|
266
|
+
initialDate = new Date(initialDate);
|
|
267
|
+
} else {
|
|
268
|
+
initialDate = new Date();
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
let r = s.match(/^([+-]{1})([0-9]+)([YyMwdhms]{1})([.a-z]*)$/);
|
|
272
|
+
|
|
273
|
+
if (r) {
|
|
274
|
+
let period = null;
|
|
275
|
+
switch (r[3]) {
|
|
276
|
+
case 'Y':
|
|
277
|
+
case 'y': period = 'years'; break;
|
|
278
|
+
|
|
279
|
+
case 'M': period = 'months'; break;
|
|
280
|
+
case 'w': period = 'weeks'; break;
|
|
281
|
+
case 'd': period = 'days'; break;
|
|
282
|
+
case 'h': period = 'hours'; break;
|
|
283
|
+
case 'm': period = 'minutes'; break;
|
|
284
|
+
case 's': period = 'seconds'; break;
|
|
285
|
+
default: period = 'minutes'; break;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
let d = dayjs(initialDate);
|
|
289
|
+
|
|
290
|
+
if (r[1] === '+') {
|
|
291
|
+
d = d.add(parseInt(r[2], 10), period);
|
|
292
|
+
} else {
|
|
293
|
+
d = d.subtract(parseInt(r[2], 10), period);
|
|
294
|
+
}
|
|
295
|
+
if (!isValidDate(d.toDate())) throw new Error(`Invalid date configuration:${r}`);
|
|
296
|
+
if (r[4]) {
|
|
297
|
+
const opts = r[4].split('.').filter(Boolean);
|
|
298
|
+
if (opts[0] === 'start') d = d.startOf(opts[1] || 'day');
|
|
299
|
+
else if (opts[0] === 'end') d = d.endOf(opts[1] || 'day');
|
|
300
|
+
else throw new Error(`Invalid relative date,unknown options:${r[4]}`);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return d.toDate();
|
|
304
|
+
}
|
|
305
|
+
if (s === 'now') {
|
|
306
|
+
r = dayjs(new Date()).toDate();
|
|
307
|
+
return r;
|
|
308
|
+
}
|
|
309
|
+
r = dayjs(new Date(s)).toDate();
|
|
310
|
+
if (!isValidDate(r)) throw new Error(`Invalid Date: ${s}`);
|
|
311
|
+
return r;
|
|
312
|
+
}
|
|
313
|
+
|
|
243
314
|
/*
|
|
244
315
|
When comparing two objects, some may come from a file (thus strings), and some from
|
|
245
316
|
a database or elsewhere (not strings), so for deduping make sure to make them all strings
|
|
@@ -263,6 +334,7 @@ module.exports = {
|
|
|
263
334
|
getPacketFiles,
|
|
264
335
|
getStringArray,
|
|
265
336
|
makeStrings,
|
|
337
|
+
relativeDate,
|
|
266
338
|
streamPacket,
|
|
267
339
|
writeTempFile,
|
|
268
340
|
};
|
package/index.js
CHANGED
|
@@ -21,6 +21,8 @@ const {
|
|
|
21
21
|
downloadFile,
|
|
22
22
|
getTempFilename,
|
|
23
23
|
getTempDir,
|
|
24
|
+
isValidDate,
|
|
25
|
+
relativeDate,
|
|
24
26
|
streamPacket,
|
|
25
27
|
getPacketFiles,
|
|
26
28
|
getBatchTransform,
|
|
@@ -54,73 +56,6 @@ handlebars.registerHelper('percent', (a, b) => `${((100 * a) / b).toFixed(2)}%`)
|
|
|
54
56
|
|
|
55
57
|
handlebars.registerHelper('or', (a, b, c) => a || b || c);
|
|
56
58
|
|
|
57
|
-
function isValidDate(d) {
|
|
58
|
-
// we WANT to use isNaN, not the Number.isNaN -- we're checking the date type
|
|
59
|
-
// eslint-disable-next-line no-restricted-globals
|
|
60
|
-
return d instanceof Date && !isNaN(d);
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
function relativeDate(s, _initialDate) {
|
|
64
|
-
let initialDate = _initialDate;
|
|
65
|
-
if (!s || s === 'none') return null;
|
|
66
|
-
if (typeof s.getMonth === 'function') return s;
|
|
67
|
-
// We actually want a double equals here to test strings as well
|
|
68
|
-
// eslint-disable-next-line eqeqeq
|
|
69
|
-
if (parseInt(s, 10) == s) {
|
|
70
|
-
const r = new Date(parseInt(s, 10));
|
|
71
|
-
if (!isValidDate(r)) throw new Error(`Invalid integer date:${s}`);
|
|
72
|
-
return r;
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
if (initialDate) {
|
|
76
|
-
initialDate = new Date(initialDate);
|
|
77
|
-
} else {
|
|
78
|
-
initialDate = new Date();
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
let r = s.match(/^([+-]{1})([0-9]+)([YyMwdhms]{1})([.a-z]*)$/);
|
|
82
|
-
|
|
83
|
-
if (r) {
|
|
84
|
-
let period = null;
|
|
85
|
-
switch (r[3]) {
|
|
86
|
-
case 'Y':
|
|
87
|
-
case 'y': period = 'years'; break;
|
|
88
|
-
|
|
89
|
-
case 'M': period = 'months'; break;
|
|
90
|
-
case 'w': period = 'weeks'; break;
|
|
91
|
-
case 'd': period = 'days'; break;
|
|
92
|
-
case 'h': period = 'hours'; break;
|
|
93
|
-
case 'm': period = 'minutes'; break;
|
|
94
|
-
case 's': period = 'seconds'; break;
|
|
95
|
-
default: period = 'minutes'; break;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
let d = dayjs(initialDate);
|
|
99
|
-
|
|
100
|
-
if (r[1] === '+') {
|
|
101
|
-
d = d.add(parseInt(r[2], 10), period);
|
|
102
|
-
} else {
|
|
103
|
-
d = d.subtract(parseInt(r[2], 10), period);
|
|
104
|
-
}
|
|
105
|
-
if (!isValidDate(d.toDate())) throw new Error(`Invalid date configuration:${r}`);
|
|
106
|
-
if (r[4]) {
|
|
107
|
-
const opts = r[4].split('.').filter(Boolean);
|
|
108
|
-
if (opts[0] === 'start') d = d.startOf(opts[1] || 'day');
|
|
109
|
-
else if (opts[0] === 'end') d = d.endOf(opts[1] || 'day');
|
|
110
|
-
else throw new Error(`Invalid relative date,unknown options:${r[4]}`);
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
return d.toDate();
|
|
114
|
-
}
|
|
115
|
-
if (s === 'now') {
|
|
116
|
-
r = dayjs(new Date()).toDate();
|
|
117
|
-
return r;
|
|
118
|
-
}
|
|
119
|
-
r = dayjs(new Date(s)).toDate();
|
|
120
|
-
if (!isValidDate(r)) throw new Error(`Invalid Date: ${s}`);
|
|
121
|
-
return r;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
59
|
async function list(_path) {
|
|
125
60
|
const directory = await unzipper.Open.file(_path);
|
|
126
61
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@engine9-io/input-tools",
|
|
3
|
-
"version": "1.7.
|
|
3
|
+
"version": "1.7.6",
|
|
4
4
|
"description": "Tools for dealing with Engine9 inputs",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -10,10 +10,7 @@
|
|
|
10
10
|
"author": "Engine9",
|
|
11
11
|
"license": "GPL-3.0-or-later",
|
|
12
12
|
"devDependencies": {
|
|
13
|
-
"eslint": "^
|
|
14
|
-
"eslint-config-airbnb-base": "^15.0.0",
|
|
15
|
-
"eslint-plugin-import": "^2.29.0",
|
|
16
|
-
"eslint-plugin-jsonc": "^2.15.1"
|
|
13
|
+
"eslint": "^9.33.0"
|
|
17
14
|
},
|
|
18
15
|
"dependencies": {
|
|
19
16
|
"@aws-sdk/client-s3": "^3.723.0",
|
package/test/file.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
const {
|
|
2
|
+
it,
|
|
3
|
+
} = require('node:test');
|
|
4
|
+
const assert = require('node:assert');
|
|
5
|
+
const debug = require('debug')('files');
|
|
6
|
+
|
|
7
|
+
const { FileUtilities } = require('../index');
|
|
8
|
+
|
|
9
|
+
it('Should list a directory', async () => {
|
|
10
|
+
const futil=new FileUtilities({accountId:'test'});
|
|
11
|
+
let files=await futil.list({directory:'.'});
|
|
12
|
+
assert(files.length,"Should have some files");
|
|
13
|
+
debug(files);
|
|
14
|
+
let startTest=await futil.list({directory:'.',start:'2040-01-01'});
|
|
15
|
+
assert(startTest.length===0,"Should NOT have any files before future start date");
|
|
16
|
+
let endTest=await futil.list({directory:'.',end:'1900-01-01'});
|
|
17
|
+
assert(endTest.length===0,"Should NOT have any files before past end date");
|
|
18
|
+
});
|
package/.eslintignore
DELETED
package/.eslintrc.js
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
module.exports = {
|
|
2
|
-
env: {
|
|
3
|
-
browser: true,
|
|
4
|
-
commonjs: true,
|
|
5
|
-
es2021: true,
|
|
6
|
-
},
|
|
7
|
-
extends: [
|
|
8
|
-
'airbnb-base',
|
|
9
|
-
'plugin:jsonc/base',
|
|
10
|
-
'plugin:jsonc/recommended-with-json5'
|
|
11
|
-
],
|
|
12
|
-
overrides: [
|
|
13
|
-
{
|
|
14
|
-
env: {
|
|
15
|
-
node: true,
|
|
16
|
-
},
|
|
17
|
-
files: [
|
|
18
|
-
'.eslintrc.{js,cjs}',
|
|
19
|
-
],
|
|
20
|
-
parserOptions: {
|
|
21
|
-
sourceType: 'script',
|
|
22
|
-
},
|
|
23
|
-
plugins: [
|
|
24
|
-
'json5',
|
|
25
|
-
],
|
|
26
|
-
},
|
|
27
|
-
],
|
|
28
|
-
parserOptions: {
|
|
29
|
-
ecmaVersion: 'latest',
|
|
30
|
-
},
|
|
31
|
-
|
|
32
|
-
rules: {
|
|
33
|
-
'func-names': 'off', // Anonymous functions have their useful cases
|
|
34
|
-
'no-param-reassign': [2, { props: false }], // We often assign props of an object in a function, and that's generally safe.
|
|
35
|
-
},
|
|
36
|
-
};
|