@engine9-io/input-tools 1.4.2 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ const debug = require('debug')('FileWorker');
15
15
  const csv = require('csv');
16
16
  const JSON5 = require('json5');// Useful for parsing extended JSON
17
17
  const languageEncoding = require('detect-file-encoding-and-language');
18
+ const R2Worker = require('./R2');
18
19
  const S3Worker = require('./S3');
19
20
  const ParquetWorker = require('./Parquet');
20
21
  const { streamPacket } = require('./tools');
@@ -401,7 +402,7 @@ Worker.prototype.objectStreamToFile = async function (options) {
401
402
  Worker.prototype.transform = async function (options) {
402
403
  const worker = this;
403
404
 
404
- const filename = worker.getFilename(options);
405
+ const { filename } = options;
405
406
 
406
407
  debug(`Transforming ${filename}`);
407
408
 
@@ -498,10 +499,14 @@ Worker.prototype.stream = async function (
498
499
  const pq = new ParquetWorker(this);
499
500
  stream = (await pq.stream({ filename, columns, limit })).stream;
500
501
  encoding = 'object';
501
- } else if (filename.indexOf('s3://') === 0) {
502
+ } else if (filename.startsWith('s3://')) {
502
503
  const s3Worker = new S3Worker(this);
503
504
  stream = (await s3Worker.stream({ filename, columns, limit })).stream;
504
505
  encoding = 'UTF-8';
506
+ } else if (filename.startsWith('r2://')) {
507
+ const r2Worker = new R2Worker(this);
508
+ stream = (await r2Worker.stream({ filename, columns, limit })).stream;
509
+ encoding = 'UTF-8';
505
510
  } else {
506
511
  // Check if the file exists, and fast fail if not
507
512
  // Otherwise the stream hangs out as a handle
@@ -541,13 +546,13 @@ Worker.prototype.sample.metadata = {
541
546
 
542
547
  Worker.prototype.write = async function (opts) {
543
548
  const { filename, content } = opts;
544
- if (filename.indexOf('s3://') === 0) {
545
- const s3Worker = new S3Worker(this);
549
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
550
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
546
551
  const parts = filename.split('/');
547
552
  const directory = parts.slice(0, -1).join('/');
548
553
  const file = parts.slice(-1)[0];
549
554
  // debug(JSON.stringify({ parts, directory, file }));
550
- await s3Worker.write({
555
+ await worker.write({
551
556
  directory,
552
557
  file,
553
558
  content,
@@ -559,7 +564,7 @@ Worker.prototype.write = async function (opts) {
559
564
  };
560
565
  Worker.prototype.write.metadata = {
561
566
  options: {
562
- filename: { description: 'Location to write content to, can be local or s3://' },
567
+ filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
563
568
  content: {},
564
569
  },
565
570
  };
@@ -596,9 +601,9 @@ Worker.prototype.json.metadata = {
596
601
 
597
602
  Worker.prototype.list = async function ({ directory }) {
598
603
  if (!directory) throw new Error('directory is required');
599
- if (directory.indexOf('s3://') === 0) {
600
- const s3Worker = new S3Worker(this);
601
- return s3Worker.list({ directory });
604
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
605
+ const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
606
+ return worker.list({ directory });
602
607
  }
603
608
  const a = await fsp.readdir(directory, { withFileTypes: true });
604
609
  return a.map((f) => ({
@@ -614,9 +619,9 @@ Worker.prototype.list.metadata = {
614
619
 
615
620
  Worker.prototype.listAll = async function ({ directory }) {
616
621
  if (!directory) throw new Error('directory is required');
617
- if (directory.indexOf('s3://') === 0) {
618
- const s3Worker = new S3Worker(this);
619
- return s3Worker.listAll({ directory });
622
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
623
+ const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
624
+ return worker.listAll({ directory });
620
625
  }
621
626
  const a = await fsp.readdir(directory, { recursive: true });
622
627
 
@@ -630,9 +635,9 @@ Worker.prototype.listAll.metadata = {
630
635
 
631
636
  Worker.prototype.empty = async function ({ directory }) {
632
637
  if (!directory) throw new Error('directory is required');
633
- if (directory.indexOf('s3://') === 0) {
638
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
634
639
  // currently not emptying S3 this way -- dangerous
635
- throw new Error('Cannot empty an s3:// directory');
640
+ throw new Error('Cannot empty an s3:// or r2:// directory');
636
641
  }
637
642
  const removed = [];
638
643
  // eslint-disable-next-line no-restricted-syntax
@@ -650,17 +655,22 @@ Worker.prototype.empty.metadata = {
650
655
 
651
656
  Worker.prototype.move = async function ({ filename, target }) {
652
657
  if (!target) throw new Error('target is required');
653
- if (target.indexOf('s3://') === 0) {
654
- const s3Worker = new S3Worker(this);
658
+ if (target.startsWith('s3://') || target.startsWith('r2://')) {
659
+ if ((target.startsWith('s3://') && filename.startsWith('r2://'))
660
+ || (target.startsWith('r2://') && filename.startsWith('s3://'))) {
661
+ throw new Error('Cowardly not copying between services');
662
+ }
655
663
 
656
- if (filename.indexOf('s3://') === 0) {
664
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
665
+
666
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
657
667
  // We need to copy and delete
658
- const output = await s3Worker.copy({ filename, target });
659
- await s3Worker.remove({ filename });
668
+ const output = await worker.copy({ filename, target });
669
+ await worker.remove({ filename });
660
670
  return output;
661
671
  }
662
672
  const parts = target.split('/');
663
- return s3Worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
673
+ return worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
664
674
  }
665
675
  await fsp.mkdir(path.dirname(target), { recursive: true });
666
676
  await fsp.rename(filename, target);
@@ -675,9 +685,9 @@ Worker.prototype.move.metadata = {
675
685
 
676
686
  Worker.prototype.stat = async function ({ filename }) {
677
687
  if (!filename) throw new Error('filename is required');
678
- if (filename.indexOf('s3://') === 0) {
679
- const s3Worker = new S3Worker(this);
680
- return s3Worker.stat({ filename });
688
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
689
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
690
+ return worker.stat({ filename });
681
691
  }
682
692
  const {
683
693
  ctime,
@@ -700,11 +710,25 @@ Worker.prototype.stat.metadata = {
700
710
  },
701
711
  };
702
712
 
713
+ Worker.prototype.download = async function ({ filename }) {
714
+ if (!filename) throw new Error('filename is required');
715
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
716
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
717
+ return worker.download({ filename });
718
+ }
719
+ throw new Error('Cannot download a local file');
720
+ };
721
+ Worker.prototype.download.metadata = {
722
+ options: {
723
+ filename: {},
724
+ },
725
+ };
726
+
703
727
  Worker.prototype.head = async function (options) {
704
- const { stream } = await this.fileToObjectStream(options);
728
+ const limit = options.limit || 3;
729
+ const { stream } = await this.fileToObjectStream({ ...options, limit });
705
730
  const chunks = [];
706
731
 
707
- const limit = options.limit || 3;
708
732
  let counter = 0;
709
733
  // eslint-disable-next-line no-restricted-syntax
710
734
  for await (const chunk of stream) {
package/file/Parquet.js CHANGED
@@ -45,7 +45,7 @@ Worker.prototype.schema.metadata = {
45
45
  };
46
46
 
47
47
  function cleanColumnName(name) {
48
- name.toLowerCase().replace(/[^a-z0-9_]/g, '_');
48
+ return name.toLowerCase().replace(/[^a-z0-9_]/g, '_');
49
49
  }
50
50
 
51
51
  Worker.prototype.stream = async function (options) {
@@ -60,11 +60,10 @@ Worker.prototype.stream = async function (options) {
60
60
  if (typeof options.columns === 'string') requestedColumns = options.columns.split(',').map((d) => d.trim());
61
61
  else requestedColumns = options.columns.map((d) => (d.name ? d.name.trim() : d.trim()));
62
62
  requestedColumns.forEach((c) => {
63
- columns = columns.concat(
64
- fieldList.filter((f) => (
65
- f.name === c || cleanColumnName(f.name) === cleanColumnName(c)
66
- )).map((f) => f.name),
67
- );
63
+ const matchingCols = fieldList.filter((f) => (
64
+ f.name === c || cleanColumnName(f.name) === cleanColumnName(c)
65
+ )).map((f) => f.name);
66
+ columns = columns.concat(matchingCols);
68
67
  });
69
68
  }
70
69
  let limit = 0;
package/file/R2.js ADDED
@@ -0,0 +1,36 @@
1
+ const util = require('node:util');
2
+ const {
3
+ S3Client,
4
+ } = require('@aws-sdk/client-s3');
5
+ const S3 = require('./S3');
6
+
7
+ function R2(worker) {
8
+ S3.call(this, worker);
9
+ }
10
+ util.inherits(R2, S3);
11
+
12
+ R2.prototype.getClient = function () {
13
+ const missing = ['CLOUDFLARE_R2_ACCOUNT_ID', 'CLOUDFLARE_R2_ACCESS_KEY_ID', 'CLOUDFLARE_R2_SECRET_ACCESS_KEY']
14
+ .filter((r) => !process.env[r]);
15
+ if (missing.length > 0) throw new Error(`Missing environment variables for Cloudflare access:${missing.join(',')}`);
16
+ const ACCOUNT_ID = process.env.CLOUDFLARE_R2_ACCOUNT_ID;
17
+ const ACCESS_KEY_ID = process.env.CLOUDFLARE_R2_ACCESS_KEY_ID;
18
+ const SECRET_ACCESS_KEY = process.env.CLOUDFLARE_R2_SECRET_ACCESS_KEY;
19
+
20
+ if (!this.client) {
21
+ this.client = new S3Client({
22
+ // R2 does not strictly require a region, but the SDK expects one. 'auto' works fine.
23
+ region: 'auto',
24
+ endpoint: `https://${ACCOUNT_ID}.r2.cloudflarestorage.com`,
25
+ credentials: {
26
+ accessKeyId: ACCESS_KEY_ID,
27
+ secretAccessKey: SECRET_ACCESS_KEY,
28
+ },
29
+ forcePathStyle: true, // Important for R2 compatibility
30
+
31
+ });
32
+ }
33
+ return this.client;
34
+ };
35
+
36
+ module.exports = R2;
package/file/S3.js CHANGED
@@ -16,7 +16,10 @@ const { getTempFilename } = require('./tools');
16
16
  function Worker() {}
17
17
 
18
18
  function getParts(filename) {
19
- if (!filename || filename.indexOf('s3://') !== 0) throw new Error(`Invalid filename for s3:${filename}`);
19
+ if (!filename) throw new Error(`Invalid filename: ${filename}`);
20
+ if (!filename.startsWith('r2://') && !filename.startsWith('s3://')) {
21
+ throw new Error(`Invalid filename, must start with r2:// or s3://: ${filename}`);
22
+ }
20
23
  const parts = filename.split('/');
21
24
  const Bucket = parts[2];
22
25
  const Key = parts.slice(3).join('/');
@@ -102,7 +105,7 @@ Worker.prototype.remove.metadata = {
102
105
  Worker.prototype.download = async function ({ filename }) {
103
106
  const file = filename.split('/').pop();
104
107
  const localPath = await getTempFilename({ targetFilename: file });
105
- const s3Client = new S3Client({});
108
+ const s3Client = this.getClient();
106
109
  const { Bucket, Key } = getParts(filename);
107
110
  const command = new GetObjectCommand({ Bucket, Key });
108
111
  debug(`Downloading ${file} to ${localPath}`);
@@ -139,7 +142,7 @@ Worker.prototype.put = async function (options) {
139
142
  const ContentType = mime.lookup(file);
140
143
 
141
144
  debug(`Putting ${filename} to ${JSON.stringify({ Bucket, Key, ContentType })}}`);
142
- const s3Client = new S3Client({});
145
+ const s3Client = this.getClient();
143
146
 
144
147
  const command = new PutObjectCommand({
145
148
  Bucket, Key, Body, ContentType,
@@ -166,7 +169,7 @@ Worker.prototype.write = async function (options) {
166
169
  const Body = content;
167
170
 
168
171
  debug(`Writing content of length ${content.length} to ${JSON.stringify({ Bucket, Key })}}`);
169
- const s3Client = new S3Client({});
172
+ const s3Client = this.getClient();
170
173
  const ContentType = mime.lookup(file);
171
174
 
172
175
  const command = new PutObjectCommand({
@@ -188,7 +191,7 @@ Worker.prototype.list = async function ({ directory }) {
188
191
  let dir = directory;
189
192
  while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
190
193
  const { Bucket, Key: Prefix } = getParts(dir);
191
- const s3Client = new S3Client({});
194
+ const s3Client = this.getClient();
192
195
  const command = new ListObjectsV2Command({
193
196
  Bucket,
194
197
  Prefix: `${Prefix}/`,
@@ -248,7 +251,7 @@ Worker.prototype.listAll.metadata = {
248
251
  Worker.prototype.stat = async function ({ filename }) {
249
252
  if (!filename) throw new Error('filename is required');
250
253
 
251
- const s3Client = new S3Client({});
254
+ const s3Client = this.getClient();
252
255
  const { Bucket, Key } = getParts(filename);
253
256
  const command = new HeadObjectCommand({ Bucket, Key });
254
257
  const response = await s3Client.send(command);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@engine9-io/input-tools",
3
- "version": "1.4.2",
3
+ "version": "1.5.1",
4
4
  "description": "Tools for dealing with Engine9 inputs",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -17,7 +17,7 @@
17
17
  },
18
18
  "dependencies": {
19
19
  "@aws-sdk/client-s3": "^3.723.0",
20
- "@dsnp/parquetjs": "^1.8.6",
20
+ "@dsnp/parquetjs": "^1.8.7",
21
21
  "archiver": "^7.0.1",
22
22
  "async-mutex": "^0.5.0",
23
23
  "csv": "^6.3.11",