@engine9/input-tools 2.0.10 → 2.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/file/FileUtilities.js +48 -7
- package/file/S3.js +108 -38
- package/file/tools.js +11 -0
- package/package.json +1 -1
package/file/FileUtilities.js
CHANGED
|
@@ -11,7 +11,17 @@ import languageEncoding from 'detect-file-encoding-and-language';
|
|
|
11
11
|
import R2Worker from './R2.js';
|
|
12
12
|
import S3Worker from './S3.js';
|
|
13
13
|
import ParquetWorker from './Parquet.js';
|
|
14
|
-
import {
|
|
14
|
+
import {
|
|
15
|
+
bool,
|
|
16
|
+
getTempFilename,
|
|
17
|
+
getStringArray,
|
|
18
|
+
getTempDir,
|
|
19
|
+
getFilePostfix,
|
|
20
|
+
makeStrings,
|
|
21
|
+
normalizeListDepth,
|
|
22
|
+
streamPacket,
|
|
23
|
+
relativeDate
|
|
24
|
+
} from './tools.js';
|
|
15
25
|
const fsp = fs.promises;
|
|
16
26
|
const { Readable, Transform, PassThrough, Writable } = nodestream;
|
|
17
27
|
const { pipeline } = promises;
|
|
@@ -623,15 +633,42 @@ Worker.prototype.json.metadata = {
|
|
|
623
633
|
filename: { description: 'Get a javascript object from a file' }
|
|
624
634
|
}
|
|
625
635
|
};
|
|
626
|
-
Worker.prototype.list = async function ({ directory, start: s, end: e }) {
|
|
636
|
+
Worker.prototype.list = async function ({ directory, start: s, end: e, depth: depthOpt }) {
|
|
627
637
|
if (!directory) throw new Error('directory is required');
|
|
628
638
|
let start = null;
|
|
629
639
|
let end = null;
|
|
630
640
|
if (s) start = relativeDate(s);
|
|
631
641
|
if (e) end = relativeDate(e);
|
|
642
|
+
const maxDepth = normalizeListDepth(depthOpt);
|
|
632
643
|
if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
|
|
633
644
|
const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
|
634
|
-
return worker.list({ directory, start, end });
|
|
645
|
+
return worker.list({ directory, start, end, depth: maxDepth });
|
|
646
|
+
}
|
|
647
|
+
if (maxDepth) {
|
|
648
|
+
const baseDir = path.resolve(directory);
|
|
649
|
+
const withModified = [];
|
|
650
|
+
const walk = async (dir, relParts) => {
|
|
651
|
+
const entries = await fsp.readdir(dir, { withFileTypes: true });
|
|
652
|
+
for (const ent of entries) {
|
|
653
|
+
const fullPath = path.join(dir, ent.name);
|
|
654
|
+
const segCount = relParts.length + 1;
|
|
655
|
+
if (segCount > maxDepth) continue;
|
|
656
|
+
const stats = await fsp.stat(fullPath);
|
|
657
|
+
if (start && stats.mtime < start.getTime()) continue;
|
|
658
|
+
if (end && stats.mtime > end.getTime()) continue;
|
|
659
|
+
const name = relParts.length ? `${relParts.join('/')}/${ent.name}` : ent.name;
|
|
660
|
+
withModified.push({
|
|
661
|
+
name,
|
|
662
|
+
type: ent.isDirectory() ? 'directory' : 'file',
|
|
663
|
+
modifiedAt: new Date(stats.mtime).toISOString()
|
|
664
|
+
});
|
|
665
|
+
if (ent.isDirectory() && segCount < maxDepth) {
|
|
666
|
+
await walk(fullPath, [...relParts, ent.name]);
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
};
|
|
670
|
+
await walk(baseDir, []);
|
|
671
|
+
return withModified;
|
|
635
672
|
}
|
|
636
673
|
const a = await fsp.readdir(directory, { withFileTypes: true });
|
|
637
674
|
const withModified = [];
|
|
@@ -654,14 +691,18 @@ Worker.prototype.list = async function ({ directory, start: s, end: e }) {
|
|
|
654
691
|
};
|
|
655
692
|
Worker.prototype.list.metadata = {
|
|
656
693
|
options: {
|
|
657
|
-
directory: { required: true }
|
|
694
|
+
directory: { required: true },
|
|
695
|
+
depth: {
|
|
696
|
+
description:
|
|
697
|
+
'If set, recursively list files and directories up to this path depth (relative to directory); omit for a single-level listing only'
|
|
698
|
+
}
|
|
658
699
|
}
|
|
659
700
|
};
|
|
660
|
-
Worker.prototype.
|
|
701
|
+
Worker.prototype.analyzeDirectory = async function ({ directory }) {
|
|
661
702
|
if (!directory) throw new Error('directory is required');
|
|
662
703
|
if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
|
|
663
704
|
const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
|
664
|
-
return worker.
|
|
705
|
+
return worker.analyzeDirectory({ directory });
|
|
665
706
|
}
|
|
666
707
|
let fileCount = 0;
|
|
667
708
|
let directoryCount = 0;
|
|
@@ -704,7 +745,7 @@ Worker.prototype.analyze = async function ({ directory }) {
|
|
|
704
745
|
lastModified: fileCount ? lastModified : null
|
|
705
746
|
};
|
|
706
747
|
};
|
|
707
|
-
Worker.prototype.
|
|
748
|
+
Worker.prototype.analyzeDirectory.metadata = {
|
|
708
749
|
options: {
|
|
709
750
|
directory: { required: true }
|
|
710
751
|
}
|
package/file/S3.js
CHANGED
|
@@ -2,7 +2,7 @@ import debug$0 from 'debug';
|
|
|
2
2
|
import fs from 'node:fs';
|
|
3
3
|
import withDb from 'mime-type/with-db';
|
|
4
4
|
import clientS3 from '@aws-sdk/client-s3';
|
|
5
|
-
import { getTempFilename, getFilePostfix, relativeDate } from './tools.js';
|
|
5
|
+
import { getTempFilename, getFilePostfix, normalizeListDepth, relativeDate } from './tools.js';
|
|
6
6
|
const debug = debug$0('@engine9/input/S3');
|
|
7
7
|
const { mimeType: mime } = withDb;
|
|
8
8
|
const {
|
|
@@ -187,53 +187,123 @@ Worker.prototype.write.metadata = {
|
|
|
187
187
|
content: { description: 'Contents of file' }
|
|
188
188
|
}
|
|
189
189
|
};
|
|
190
|
-
Worker.prototype.list = async function ({ directory, start, end, raw }) {
|
|
190
|
+
Worker.prototype.list = async function ({ directory, start, end, raw, depth: depthOpt }) {
|
|
191
191
|
if (!directory) throw new Error('directory is required');
|
|
192
192
|
let dir = directory;
|
|
193
193
|
while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
|
|
194
|
-
const { Bucket, Key:
|
|
194
|
+
const { Bucket, Key: rootPrefix } = getParts(dir);
|
|
195
195
|
const s3Client = this.getClient();
|
|
196
|
-
const
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
return true;
|
|
220
|
-
}
|
|
221
|
-
})
|
|
222
|
-
.map(({ Key, Size, LastModified }) => ({
|
|
223
|
-
name: Key.slice(Prefix.length + 1),
|
|
224
|
-
type: 'file',
|
|
225
|
-
size: Size,
|
|
226
|
-
modifiedAt: new Date(LastModified).toISOString()
|
|
196
|
+
const maxDepth = normalizeListDepth(depthOpt);
|
|
197
|
+
|
|
198
|
+
const relToRoot = (keyOrPrefix) => {
|
|
199
|
+
const normalized = keyOrPrefix.replace(/\/$/, '');
|
|
200
|
+
if (!rootPrefix) return normalized;
|
|
201
|
+
if (normalized.length <= rootPrefix.length) return '';
|
|
202
|
+
return normalized.slice(rootPrefix.length + 1);
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
if (!maxDepth) {
|
|
206
|
+
const Prefix = rootPrefix;
|
|
207
|
+
const command = new ListObjectsV2Command({
|
|
208
|
+
Bucket,
|
|
209
|
+
Prefix: `${Prefix}/`,
|
|
210
|
+
Delimiter: '/'
|
|
211
|
+
});
|
|
212
|
+
const { Contents: files, CommonPrefixes } = await s3Client.send(command);
|
|
213
|
+
if (raw) return files;
|
|
214
|
+
const output = []
|
|
215
|
+
.concat(
|
|
216
|
+
(CommonPrefixes || []).map((f) => ({
|
|
217
|
+
name: f.Prefix.slice(Prefix.length + 1, -1),
|
|
218
|
+
type: 'directory'
|
|
227
219
|
}))
|
|
228
|
-
|
|
220
|
+
)
|
|
221
|
+
.concat(
|
|
222
|
+
(files || [])
|
|
223
|
+
.filter(({ LastModified }) => {
|
|
224
|
+
if (start && new Date(LastModified) < start) {
|
|
225
|
+
return false;
|
|
226
|
+
} else if (end && new Date(LastModified) > end) {
|
|
227
|
+
return false;
|
|
228
|
+
} else {
|
|
229
|
+
return true;
|
|
230
|
+
}
|
|
231
|
+
})
|
|
232
|
+
.map(({ Key, Size, LastModified }) => ({
|
|
233
|
+
name: Key.slice(Prefix.length + 1),
|
|
234
|
+
type: 'file',
|
|
235
|
+
size: Size,
|
|
236
|
+
modifiedAt: new Date(LastModified).toISOString()
|
|
237
|
+
}))
|
|
238
|
+
);
|
|
239
|
+
return output;
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
if (raw) {
|
|
243
|
+
throw new Error('list raw output is not supported together with depth');
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const output = [];
|
|
247
|
+
|
|
248
|
+
async function listLevel(currentPrefix) {
|
|
249
|
+
const prefixParam = currentPrefix === '' ? '' : `${currentPrefix}/`;
|
|
250
|
+
let ContinuationToken = undefined;
|
|
251
|
+
const allPrefixes = [];
|
|
252
|
+
const allFiles = [];
|
|
253
|
+
do {
|
|
254
|
+
const result = await s3Client.send(
|
|
255
|
+
new ListObjectsV2Command({
|
|
256
|
+
Bucket,
|
|
257
|
+
Prefix: prefixParam,
|
|
258
|
+
Delimiter: '/',
|
|
259
|
+
ContinuationToken
|
|
260
|
+
})
|
|
261
|
+
);
|
|
262
|
+
allPrefixes.push(...(result.CommonPrefixes || []));
|
|
263
|
+
allFiles.push(...(result.Contents || []));
|
|
264
|
+
ContinuationToken = result.IsTruncated ? result.NextContinuationToken : undefined;
|
|
265
|
+
} while (ContinuationToken);
|
|
266
|
+
|
|
267
|
+
for (const cp of allPrefixes) {
|
|
268
|
+
const subPrefix = cp.Prefix.replace(/\/$/, '');
|
|
269
|
+
const rel = relToRoot(cp.Prefix);
|
|
270
|
+
if (!rel) continue;
|
|
271
|
+
const segCount = rel.split('/').length;
|
|
272
|
+
if (segCount > maxDepth) continue;
|
|
273
|
+
output.push({ name: rel, type: 'directory' });
|
|
274
|
+
if (segCount < maxDepth) {
|
|
275
|
+
await listLevel(subPrefix);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
for (const obj of allFiles) {
|
|
279
|
+
const { Key, Size, LastModified } = obj;
|
|
280
|
+
const rel = relToRoot(Key);
|
|
281
|
+
if (!rel) continue;
|
|
282
|
+
if (rel.split('/').length > maxDepth) continue;
|
|
283
|
+
if (start && new Date(LastModified) < start) continue;
|
|
284
|
+
if (end && new Date(LastModified) > end) continue;
|
|
285
|
+
output.push({
|
|
286
|
+
name: rel,
|
|
287
|
+
type: 'file',
|
|
288
|
+
size: Size,
|
|
289
|
+
modifiedAt: new Date(LastModified).toISOString()
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
await listLevel(rootPrefix);
|
|
229
295
|
return output;
|
|
230
296
|
};
|
|
231
297
|
Worker.prototype.list.metadata = {
|
|
232
298
|
options: {
|
|
233
|
-
directory: { required: true }
|
|
299
|
+
directory: { required: true },
|
|
300
|
+
depth: {
|
|
301
|
+
description:
|
|
302
|
+
'If set, recursively list objects and prefixes up to this key depth (relative to directory); omit for a single-level listing only'
|
|
303
|
+
}
|
|
234
304
|
}
|
|
235
305
|
};
|
|
236
|
-
Worker.prototype.
|
|
306
|
+
Worker.prototype.analyzeDirectory = async function ({ directory }) {
|
|
237
307
|
if (!directory) throw new Error('directory is required');
|
|
238
308
|
let dir = directory;
|
|
239
309
|
while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
|
|
@@ -295,7 +365,7 @@ Worker.prototype.analyze = async function ({ directory }) {
|
|
|
295
365
|
lastModified: fileCount ? lastModified : null
|
|
296
366
|
};
|
|
297
367
|
};
|
|
298
|
-
Worker.prototype.
|
|
368
|
+
Worker.prototype.analyzeDirectory.metadata = {
|
|
299
369
|
options: {
|
|
300
370
|
directory: { required: true }
|
|
301
371
|
}
|
package/file/tools.js
CHANGED
|
@@ -335,6 +335,15 @@ function parseJSON5(o, defaultVal) {
|
|
|
335
335
|
}
|
|
336
336
|
return defaultVal || o;
|
|
337
337
|
}
|
|
338
|
+
/** @returns {number|undefined} positive integer max depth, or undefined if absent */
|
|
339
|
+
function normalizeListDepth(depth) {
|
|
340
|
+
if (depth === undefined || depth === null || depth === '') return undefined;
|
|
341
|
+
const n = typeof depth === 'number' ? depth : Number.parseInt(String(depth), 10);
|
|
342
|
+
if (!Number.isFinite(n) || n < 1 || Math.floor(n) !== n) {
|
|
343
|
+
throw new Error('depth must be a positive integer');
|
|
344
|
+
}
|
|
345
|
+
return n;
|
|
346
|
+
}
|
|
338
347
|
export { appendPostfix };
|
|
339
348
|
export { bool };
|
|
340
349
|
export { downloadFile };
|
|
@@ -348,6 +357,7 @@ export { getManifest };
|
|
|
348
357
|
export { getPacketFiles };
|
|
349
358
|
export { getStringArray };
|
|
350
359
|
export { makeStrings };
|
|
360
|
+
export { normalizeListDepth };
|
|
351
361
|
export { parseJSON5 };
|
|
352
362
|
export { relativeDate };
|
|
353
363
|
export { streamPacket };
|
|
@@ -366,6 +376,7 @@ export default {
|
|
|
366
376
|
getPacketFiles,
|
|
367
377
|
getStringArray,
|
|
368
378
|
makeStrings,
|
|
379
|
+
normalizeListDepth,
|
|
369
380
|
parseJSON5,
|
|
370
381
|
relativeDate,
|
|
371
382
|
streamPacket,
|