@engine9-io/input-tools 1.4.2 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/file/FileUtilities.js +47 -23
- package/file/R2.js +36 -0
- package/file/S3.js +9 -6
- package/package.json +1 -1
package/file/FileUtilities.js
CHANGED
@@ -15,6 +15,7 @@ const debug = require('debug')('FileWorker');
|
|
15
15
|
const csv = require('csv');
|
16
16
|
const JSON5 = require('json5');// Useful for parsing extended JSON
|
17
17
|
const languageEncoding = require('detect-file-encoding-and-language');
|
18
|
+
const R2Worker = require('./R2');
|
18
19
|
const S3Worker = require('./S3');
|
19
20
|
const ParquetWorker = require('./Parquet');
|
20
21
|
const { streamPacket } = require('./tools');
|
@@ -401,7 +402,7 @@ Worker.prototype.objectStreamToFile = async function (options) {
|
|
401
402
|
Worker.prototype.transform = async function (options) {
|
402
403
|
const worker = this;
|
403
404
|
|
404
|
-
const filename =
|
405
|
+
const { filename } = options;
|
405
406
|
|
406
407
|
debug(`Transforming ${filename}`);
|
407
408
|
|
@@ -498,10 +499,14 @@ Worker.prototype.stream = async function (
|
|
498
499
|
const pq = new ParquetWorker(this);
|
499
500
|
stream = (await pq.stream({ filename, columns, limit })).stream;
|
500
501
|
encoding = 'object';
|
501
|
-
} else if (filename.
|
502
|
+
} else if (filename.startsWith('s3://')) {
|
502
503
|
const s3Worker = new S3Worker(this);
|
503
504
|
stream = (await s3Worker.stream({ filename, columns, limit })).stream;
|
504
505
|
encoding = 'UTF-8';
|
506
|
+
} else if (filename.startsWith('r2://')) {
|
507
|
+
const r2Worker = new R2Worker(this);
|
508
|
+
stream = (await r2Worker.stream({ filename, columns, limit })).stream;
|
509
|
+
encoding = 'UTF-8';
|
505
510
|
} else {
|
506
511
|
// Check if the file exists, and fast fail if not
|
507
512
|
// Otherwise the stream hangs out as a handle
|
@@ -541,13 +546,13 @@ Worker.prototype.sample.metadata = {
|
|
541
546
|
|
542
547
|
Worker.prototype.write = async function (opts) {
|
543
548
|
const { filename, content } = opts;
|
544
|
-
if (filename.
|
545
|
-
const
|
549
|
+
if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
|
550
|
+
const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
546
551
|
const parts = filename.split('/');
|
547
552
|
const directory = parts.slice(0, -1).join('/');
|
548
553
|
const file = parts.slice(-1)[0];
|
549
554
|
// debug(JSON.stringify({ parts, directory, file }));
|
550
|
-
await
|
555
|
+
await worker.write({
|
551
556
|
directory,
|
552
557
|
file,
|
553
558
|
content,
|
@@ -559,7 +564,7 @@ Worker.prototype.write = async function (opts) {
|
|
559
564
|
};
|
560
565
|
Worker.prototype.write.metadata = {
|
561
566
|
options: {
|
562
|
-
filename: { description: 'Location to write content to, can be local or s3://' },
|
567
|
+
filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
|
563
568
|
content: {},
|
564
569
|
},
|
565
570
|
};
|
@@ -596,9 +601,9 @@ Worker.prototype.json.metadata = {
|
|
596
601
|
|
597
602
|
Worker.prototype.list = async function ({ directory }) {
|
598
603
|
if (!directory) throw new Error('directory is required');
|
599
|
-
if (directory.
|
600
|
-
const
|
601
|
-
return
|
604
|
+
if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
|
605
|
+
const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
606
|
+
return worker.list({ directory });
|
602
607
|
}
|
603
608
|
const a = await fsp.readdir(directory, { withFileTypes: true });
|
604
609
|
return a.map((f) => ({
|
@@ -614,9 +619,9 @@ Worker.prototype.list.metadata = {
|
|
614
619
|
|
615
620
|
Worker.prototype.listAll = async function ({ directory }) {
|
616
621
|
if (!directory) throw new Error('directory is required');
|
617
|
-
if (directory.
|
618
|
-
const
|
619
|
-
return
|
622
|
+
if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
|
623
|
+
const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
624
|
+
return worker.listAll({ directory });
|
620
625
|
}
|
621
626
|
const a = await fsp.readdir(directory, { recursive: true });
|
622
627
|
|
@@ -630,9 +635,9 @@ Worker.prototype.listAll.metadata = {
|
|
630
635
|
|
631
636
|
Worker.prototype.empty = async function ({ directory }) {
|
632
637
|
if (!directory) throw new Error('directory is required');
|
633
|
-
if (directory.
|
638
|
+
if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
|
634
639
|
// currently not emptying S3 this way -- dangerous
|
635
|
-
throw new Error('Cannot empty an s3:// directory');
|
640
|
+
throw new Error('Cannot empty an s3:// or r2:// directory');
|
636
641
|
}
|
637
642
|
const removed = [];
|
638
643
|
// eslint-disable-next-line no-restricted-syntax
|
@@ -650,17 +655,22 @@ Worker.prototype.empty.metadata = {
|
|
650
655
|
|
651
656
|
Worker.prototype.move = async function ({ filename, target }) {
|
652
657
|
if (!target) throw new Error('target is required');
|
653
|
-
if (target.
|
654
|
-
|
658
|
+
if (target.startsWith('s3://') || target.startsWith('r2://')) {
|
659
|
+
if ((target.startsWith('s3://') && filename.startsWith('r2://'))
|
660
|
+
|| (target.startsWith('r2://') && filename.startsWith('s3://'))) {
|
661
|
+
throw new Error('Cowardly not copying between services');
|
662
|
+
}
|
663
|
+
|
664
|
+
const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
655
665
|
|
656
|
-
if (filename.
|
666
|
+
if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
|
657
667
|
// We need to copy and delete
|
658
|
-
const output = await
|
659
|
-
await
|
668
|
+
const output = await worker.copy({ filename, target });
|
669
|
+
await worker.remove({ filename });
|
660
670
|
return output;
|
661
671
|
}
|
662
672
|
const parts = target.split('/');
|
663
|
-
return
|
673
|
+
return worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
|
664
674
|
}
|
665
675
|
await fsp.mkdir(path.dirname(target), { recursive: true });
|
666
676
|
await fsp.rename(filename, target);
|
@@ -675,9 +685,9 @@ Worker.prototype.move.metadata = {
|
|
675
685
|
|
676
686
|
Worker.prototype.stat = async function ({ filename }) {
|
677
687
|
if (!filename) throw new Error('filename is required');
|
678
|
-
if (filename.
|
679
|
-
const
|
680
|
-
return
|
688
|
+
if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
|
689
|
+
const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
690
|
+
return worker.stat({ filename });
|
681
691
|
}
|
682
692
|
const {
|
683
693
|
ctime,
|
@@ -700,6 +710,20 @@ Worker.prototype.stat.metadata = {
|
|
700
710
|
},
|
701
711
|
};
|
702
712
|
|
713
|
+
Worker.prototype.download = async function ({ filename }) {
|
714
|
+
if (!filename) throw new Error('filename is required');
|
715
|
+
if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
|
716
|
+
const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
|
717
|
+
return worker.download({ filename });
|
718
|
+
}
|
719
|
+
throw new Error('Cannot download a local file');
|
720
|
+
};
|
721
|
+
Worker.prototype.download.metadata = {
|
722
|
+
options: {
|
723
|
+
filename: {},
|
724
|
+
},
|
725
|
+
};
|
726
|
+
|
703
727
|
Worker.prototype.head = async function (options) {
|
704
728
|
const { stream } = await this.fileToObjectStream(options);
|
705
729
|
const chunks = [];
|
package/file/R2.js
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
const util = require('node:util');
|
2
|
+
const {
|
3
|
+
S3Client,
|
4
|
+
} = require('@aws-sdk/client-s3');
|
5
|
+
const S3 = require('./S3');
|
6
|
+
|
7
|
+
function R2(worker) {
|
8
|
+
S3.call(this, worker);
|
9
|
+
}
|
10
|
+
util.inherits(R2, S3);
|
11
|
+
|
12
|
+
R2.prototype.getClient = function () {
|
13
|
+
const missing = ['CLOUDFLARE_R2_ACCOUNT_ID', 'CLOUDFLARE_R2_ACCESS_KEY_ID', 'CLOUDFLARE_R2_SECRET_ACCESS_KEY']
|
14
|
+
.filter((r) => !process.env[r]);
|
15
|
+
if (missing.length > 0) throw new Error(`Missing environment variables for Cloudflare access:${missing.join(',')}`);
|
16
|
+
const ACCOUNT_ID = process.env.CLOUDFLARE_R2_ACCOUNT_ID;
|
17
|
+
const ACCESS_KEY_ID = process.env.CLOUDFLARE_R2_ACCESS_KEY_ID;
|
18
|
+
const SECRET_ACCESS_KEY = process.env.CLOUDFLARE_R2_SECRET_ACCESS_KEY;
|
19
|
+
|
20
|
+
if (!this.client) {
|
21
|
+
this.client = new S3Client({
|
22
|
+
// R2 does not strictly require a region, but the SDK expects one. 'auto' works fine.
|
23
|
+
region: 'auto',
|
24
|
+
endpoint: `https://${ACCOUNT_ID}.r2.cloudflarestorage.com`,
|
25
|
+
credentials: {
|
26
|
+
accessKeyId: ACCESS_KEY_ID,
|
27
|
+
secretAccessKey: SECRET_ACCESS_KEY,
|
28
|
+
},
|
29
|
+
forcePathStyle: true, // Important for R2 compatibility
|
30
|
+
|
31
|
+
});
|
32
|
+
}
|
33
|
+
return this.client;
|
34
|
+
};
|
35
|
+
|
36
|
+
module.exports = R2;
|
package/file/S3.js
CHANGED
@@ -16,7 +16,10 @@ const { getTempFilename } = require('./tools');
|
|
16
16
|
function Worker() {}
|
17
17
|
|
18
18
|
function getParts(filename) {
|
19
|
-
if (!filename
|
19
|
+
if (!filename) throw new Error(`Invalid filename: ${filename}`);
|
20
|
+
if (!filename.startsWith('r2://') && !filename.startsWith('s3://')) {
|
21
|
+
throw new Error(`Invalid filename, must start with r2:// or s3://: ${filename}`);
|
22
|
+
}
|
20
23
|
const parts = filename.split('/');
|
21
24
|
const Bucket = parts[2];
|
22
25
|
const Key = parts.slice(3).join('/');
|
@@ -102,7 +105,7 @@ Worker.prototype.remove.metadata = {
|
|
102
105
|
Worker.prototype.download = async function ({ filename }) {
|
103
106
|
const file = filename.split('/').pop();
|
104
107
|
const localPath = await getTempFilename({ targetFilename: file });
|
105
|
-
const s3Client =
|
108
|
+
const s3Client = this.getClient();
|
106
109
|
const { Bucket, Key } = getParts(filename);
|
107
110
|
const command = new GetObjectCommand({ Bucket, Key });
|
108
111
|
debug(`Downloading ${file} to ${localPath}`);
|
@@ -139,7 +142,7 @@ Worker.prototype.put = async function (options) {
|
|
139
142
|
const ContentType = mime.lookup(file);
|
140
143
|
|
141
144
|
debug(`Putting ${filename} to ${JSON.stringify({ Bucket, Key, ContentType })}}`);
|
142
|
-
const s3Client =
|
145
|
+
const s3Client = this.getClient();
|
143
146
|
|
144
147
|
const command = new PutObjectCommand({
|
145
148
|
Bucket, Key, Body, ContentType,
|
@@ -166,7 +169,7 @@ Worker.prototype.write = async function (options) {
|
|
166
169
|
const Body = content;
|
167
170
|
|
168
171
|
debug(`Writing content of length ${content.length} to ${JSON.stringify({ Bucket, Key })}}`);
|
169
|
-
const s3Client =
|
172
|
+
const s3Client = this.getClient();
|
170
173
|
const ContentType = mime.lookup(file);
|
171
174
|
|
172
175
|
const command = new PutObjectCommand({
|
@@ -188,7 +191,7 @@ Worker.prototype.list = async function ({ directory }) {
|
|
188
191
|
let dir = directory;
|
189
192
|
while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
|
190
193
|
const { Bucket, Key: Prefix } = getParts(dir);
|
191
|
-
const s3Client =
|
194
|
+
const s3Client = this.getClient();
|
192
195
|
const command = new ListObjectsV2Command({
|
193
196
|
Bucket,
|
194
197
|
Prefix: `${Prefix}/`,
|
@@ -248,7 +251,7 @@ Worker.prototype.listAll.metadata = {
|
|
248
251
|
Worker.prototype.stat = async function ({ filename }) {
|
249
252
|
if (!filename) throw new Error('filename is required');
|
250
253
|
|
251
|
-
const s3Client =
|
254
|
+
const s3Client = this.getClient();
|
252
255
|
const { Bucket, Key } = getParts(filename);
|
253
256
|
const command = new HeadObjectCommand({ Bucket, Key });
|
254
257
|
const response = await s3Client.send(command);
|