@engine9-io/input-tools 1.4.1 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,7 @@ const debug = require('debug')('FileWorker');
15
15
  const csv = require('csv');
16
16
  const JSON5 = require('json5');// Useful for parsing extended JSON
17
17
  const languageEncoding = require('detect-file-encoding-and-language');
18
+ const R2Worker = require('./R2');
18
19
  const S3Worker = require('./S3');
19
20
  const ParquetWorker = require('./Parquet');
20
21
  const { streamPacket } = require('./tools');
@@ -401,7 +402,7 @@ Worker.prototype.objectStreamToFile = async function (options) {
401
402
  Worker.prototype.transform = async function (options) {
402
403
  const worker = this;
403
404
 
404
- const filename = worker.getFilename(options);
405
+ const { filename } = options;
405
406
 
406
407
  debug(`Transforming ${filename}`);
407
408
 
@@ -489,19 +490,23 @@ Worker.prototype.stream = async function (
489
490
  } else if (filename) {
490
491
  if (filename.startsWith('engine9-accounts/')) {
491
492
  filename = `${process.env.ENGINE9_ACCOUNT_DIR}/${filename.slice('engine9-accounts/'.length)}`;
492
- debug(`Prepending file with ${process.env.ENGINE9_ACCOUNT_DIR}, filename=${filename}`);
493
+ // debug(`Prepending file with ${process.env.ENGINE9_ACCOUNT_DIR}, filename=${filename}`);
493
494
  } else {
494
- debug(`Not prepending filename:${filename}`);
495
+ // debug(`Not prepending filename:${filename}`);
495
496
  }
496
497
  let encoding; let stream;
497
498
  if (filename.slice(-8) === '.parquet') {
498
499
  const pq = new ParquetWorker(this);
499
500
  stream = (await pq.stream({ filename, columns, limit })).stream;
500
501
  encoding = 'object';
501
- } else if (filename.indexOf('s3://') === 0) {
502
+ } else if (filename.startsWith('s3://')) {
502
503
  const s3Worker = new S3Worker(this);
503
504
  stream = (await s3Worker.stream({ filename, columns, limit })).stream;
504
505
  encoding = 'UTF-8';
506
+ } else if (filename.startsWith('r2://')) {
507
+ const r2Worker = new R2Worker(this);
508
+ stream = (await r2Worker.stream({ filename, columns, limit })).stream;
509
+ encoding = 'UTF-8';
505
510
  } else {
506
511
  // Check if the file exists, and fast fail if not
507
512
  // Otherwise the stream hangs out as a handle
@@ -541,13 +546,13 @@ Worker.prototype.sample.metadata = {
541
546
 
542
547
  Worker.prototype.write = async function (opts) {
543
548
  const { filename, content } = opts;
544
- if (filename.indexOf('s3://') === 0) {
545
- const s3Worker = new S3Worker(this);
549
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
550
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
546
551
  const parts = filename.split('/');
547
552
  const directory = parts.slice(0, -1).join('/');
548
553
  const file = parts.slice(-1)[0];
549
554
  // debug(JSON.stringify({ parts, directory, file }));
550
- await s3Worker.write({
555
+ await worker.write({
551
556
  directory,
552
557
  file,
553
558
  content,
@@ -559,7 +564,7 @@ Worker.prototype.write = async function (opts) {
559
564
  };
560
565
  Worker.prototype.write.metadata = {
561
566
  options: {
562
- filename: { description: 'Location to write content to, can be local or s3://' },
567
+ filename: { description: 'Location to write content to, can be local or s3:// or r2://' },
563
568
  content: {},
564
569
  },
565
570
  };
@@ -596,9 +601,9 @@ Worker.prototype.json.metadata = {
596
601
 
597
602
  Worker.prototype.list = async function ({ directory }) {
598
603
  if (!directory) throw new Error('directory is required');
599
- if (directory.indexOf('s3://') === 0) {
600
- const s3Worker = new S3Worker(this);
601
- return s3Worker.list({ directory });
604
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
605
+ const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
606
+ return worker.list({ directory });
602
607
  }
603
608
  const a = await fsp.readdir(directory, { withFileTypes: true });
604
609
  return a.map((f) => ({
@@ -614,9 +619,9 @@ Worker.prototype.list.metadata = {
614
619
 
615
620
  Worker.prototype.listAll = async function ({ directory }) {
616
621
  if (!directory) throw new Error('directory is required');
617
- if (directory.indexOf('s3://') === 0) {
618
- const s3Worker = new S3Worker(this);
619
- return s3Worker.listAll({ directory });
622
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
623
+ const worker = new (directory.startsWith('r2://') ? R2Worker : S3Worker)(this);
624
+ return worker.listAll({ directory });
620
625
  }
621
626
  const a = await fsp.readdir(directory, { recursive: true });
622
627
 
@@ -630,9 +635,9 @@ Worker.prototype.listAll.metadata = {
630
635
 
631
636
  Worker.prototype.empty = async function ({ directory }) {
632
637
  if (!directory) throw new Error('directory is required');
633
- if (directory.indexOf('s3://') === 0) {
638
+ if (directory.startsWith('s3://') || directory.startsWith('r2://')) {
634
639
  // currently not emptying S3 this way -- dangerous
635
- throw new Error('Cannot empty an s3:// directory');
640
+ throw new Error('Cannot empty an s3:// or r2:// directory');
636
641
  }
637
642
  const removed = [];
638
643
  // eslint-disable-next-line no-restricted-syntax
@@ -649,11 +654,23 @@ Worker.prototype.empty.metadata = {
649
654
  };
650
655
 
651
656
  Worker.prototype.move = async function ({ filename, target }) {
652
- if (!target) throw new Error('directory is required');
653
- if (target.indexOf('s3://') === 0) {
654
- const s3Worker = new S3Worker(this);
657
+ if (!target) throw new Error('target is required');
658
+ if (target.startsWith('s3://') || target.startsWith('r2://')) {
659
+ if ((target.startsWith('s3://') && filename.startsWith('r2://'))
660
+ || (target.startsWith('r2://') && filename.startsWith('s3://'))) {
661
+ throw new Error('Cowardly not copying between services');
662
+ }
663
+
664
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
665
+
666
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
667
+ // We need to copy and delete
668
+ const output = await worker.copy({ filename, target });
669
+ await worker.remove({ filename });
670
+ return output;
671
+ }
655
672
  const parts = target.split('/');
656
- return s3Worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
673
+ return worker.put({ filename, directory: parts.slice(0, -1).join('/'), file: parts.slice(-1)[0] });
657
674
  }
658
675
  await fsp.mkdir(path.dirname(target), { recursive: true });
659
676
  await fsp.rename(filename, target);
@@ -668,9 +685,9 @@ Worker.prototype.move.metadata = {
668
685
 
669
686
  Worker.prototype.stat = async function ({ filename }) {
670
687
  if (!filename) throw new Error('filename is required');
671
- if (filename.indexOf('s3://') === 0) {
672
- const s3Worker = new S3Worker(this);
673
- return s3Worker.stat({ filename });
688
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
689
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
690
+ return worker.stat({ filename });
674
691
  }
675
692
  const {
676
693
  ctime,
@@ -693,6 +710,20 @@ Worker.prototype.stat.metadata = {
693
710
  },
694
711
  };
695
712
 
713
+ Worker.prototype.download = async function ({ filename }) {
714
+ if (!filename) throw new Error('filename is required');
715
+ if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
716
+ const worker = new (filename.startsWith('r2://') ? R2Worker : S3Worker)(this);
717
+ return worker.download({ filename });
718
+ }
719
+ throw new Error('Cannot download a local file');
720
+ };
721
+ Worker.prototype.download.metadata = {
722
+ options: {
723
+ filename: {},
724
+ },
725
+ };
726
+
696
727
  Worker.prototype.head = async function (options) {
697
728
  const { stream } = await this.fileToObjectStream(options);
698
729
  const chunks = [];
package/file/R2.js ADDED
@@ -0,0 +1,36 @@
1
+ const util = require('node:util');
2
+ const {
3
+ S3Client,
4
+ } = require('@aws-sdk/client-s3');
5
+ const S3 = require('./S3');
6
+
7
+ function R2(worker) {
8
+ S3.call(this, worker);
9
+ }
10
+ util.inherits(R2, S3);
11
+
12
+ R2.prototype.getClient = function () {
13
+ const missing = ['CLOUDFLARE_R2_ACCOUNT_ID', 'CLOUDFLARE_R2_ACCESS_KEY_ID', 'CLOUDFLARE_R2_SECRET_ACCESS_KEY']
14
+ .filter((r) => !process.env[r]);
15
+ if (missing.length > 0) throw new Error(`Missing environment variables for Cloudflare access:${missing.join(',')}`);
16
+ const ACCOUNT_ID = process.env.CLOUDFLARE_R2_ACCOUNT_ID;
17
+ const ACCESS_KEY_ID = process.env.CLOUDFLARE_R2_ACCESS_KEY_ID;
18
+ const SECRET_ACCESS_KEY = process.env.CLOUDFLARE_R2_SECRET_ACCESS_KEY;
19
+
20
+ if (!this.client) {
21
+ this.client = new S3Client({
22
+ // R2 does not strictly require a region, but the SDK expects one. 'auto' works fine.
23
+ region: 'auto',
24
+ endpoint: `https://${ACCOUNT_ID}.r2.cloudflarestorage.com`,
25
+ credentials: {
26
+ accessKeyId: ACCESS_KEY_ID,
27
+ secretAccessKey: SECRET_ACCESS_KEY,
28
+ },
29
+ forcePathStyle: true, // Important for R2 compatibility
30
+
31
+ });
32
+ }
33
+ return this.client;
34
+ };
35
+
36
+ module.exports = R2;
package/file/S3.js CHANGED
@@ -1,9 +1,11 @@
1
- const debug = require('debug')('S3Worker');
1
+ const debug = require('debug')('@engine9-io/input/S3');
2
2
  const fs = require('node:fs');
3
3
  // eslint-disable-next-line import/no-unresolved
4
4
  const { mimeType: mime } = require('mime-type/with-db');
5
5
  const {
6
6
  S3Client,
7
+ CopyObjectCommand,
8
+ DeleteObjectCommand,
7
9
  GetObjectCommand,
8
10
  HeadObjectCommand,
9
11
  GetObjectAttributesCommand, PutObjectCommand,
@@ -14,7 +16,10 @@ const { getTempFilename } = require('./tools');
14
16
  function Worker() {}
15
17
 
16
18
  function getParts(filename) {
17
- if (!filename || filename.indexOf('s3://') !== 0) throw new Error(`Invalid filename for s3:${filename}`);
19
+ if (!filename) throw new Error(`Invalid filename: ${filename}`);
20
+ if (!filename.startsWith('r2://') && !filename.startsWith('s3://')) {
21
+ throw new Error(`Invalid filename, must start with r2:// or s3://: ${filename}`);
22
+ }
18
23
  const parts = filename.split('/');
19
24
  const Bucket = parts[2];
20
25
  const Key = parts.slice(3).join('/');
@@ -44,7 +49,7 @@ Worker.prototype.getMetadata.metadata = {
44
49
  };
45
50
 
46
51
  Worker.prototype.stream = async function ({ filename }) {
47
- const s3Client = new S3Client({});
52
+ const s3Client = this.getClient();
48
53
  const { Bucket, Key } = getParts(filename);
49
54
  const command = new GetObjectCommand({ Bucket, Key });
50
55
  try {
@@ -62,10 +67,45 @@ Worker.prototype.stream.metadata = {
62
67
  },
63
68
  };
64
69
 
70
+ Worker.prototype.copy = async function ({ filename, target }) {
71
+ if (!filename.startsWith('s3://')) throw new Error('Cowardly not copying a file not from s3 -- use put instead');
72
+ const s3Client = this.getClient();
73
+ const { Bucket, Key } = getParts(target);
74
+
75
+ debug(`Copying ${filename} to ${JSON.stringify({ Bucket, Key })}}`);
76
+
77
+ const command = new CopyObjectCommand({
78
+ CopySource: filename.slice(4), // remove the s3:/
79
+ Bucket,
80
+ Key,
81
+ });
82
+
83
+ return s3Client.send(command);
84
+ };
85
+
86
+ Worker.prototype.copy.metadata = {
87
+ options: {
88
+ filename: {},
89
+ target: {},
90
+ },
91
+ };
92
+
93
+ Worker.prototype.remove = async function ({ filename }) {
94
+ const s3Client = this.getClient();
95
+ const { Bucket, Key } = getParts(filename);
96
+ const command = new DeleteObjectCommand({ Bucket, Key });
97
+ return s3Client.send(command);
98
+ };
99
+ Worker.prototype.remove.metadata = {
100
+ options: {
101
+ filename: {},
102
+ },
103
+ };
104
+
65
105
  Worker.prototype.download = async function ({ filename }) {
66
106
  const file = filename.split('/').pop();
67
107
  const localPath = await getTempFilename({ targetFilename: file });
68
- const s3Client = new S3Client({});
108
+ const s3Client = this.getClient();
69
109
  const { Bucket, Key } = getParts(filename);
70
110
  const command = new GetObjectCommand({ Bucket, Key });
71
111
  debug(`Downloading ${file} to ${localPath}`);
@@ -102,7 +142,7 @@ Worker.prototype.put = async function (options) {
102
142
  const ContentType = mime.lookup(file);
103
143
 
104
144
  debug(`Putting ${filename} to ${JSON.stringify({ Bucket, Key, ContentType })}}`);
105
- const s3Client = new S3Client({});
145
+ const s3Client = this.getClient();
106
146
 
107
147
  const command = new PutObjectCommand({
108
148
  Bucket, Key, Body, ContentType,
@@ -129,7 +169,7 @@ Worker.prototype.write = async function (options) {
129
169
  const Body = content;
130
170
 
131
171
  debug(`Writing content of length ${content.length} to ${JSON.stringify({ Bucket, Key })}}`);
132
- const s3Client = new S3Client({});
172
+ const s3Client = this.getClient();
133
173
  const ContentType = mime.lookup(file);
134
174
 
135
175
  const command = new PutObjectCommand({
@@ -151,7 +191,7 @@ Worker.prototype.list = async function ({ directory }) {
151
191
  let dir = directory;
152
192
  while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
153
193
  const { Bucket, Key: Prefix } = getParts(dir);
154
- const s3Client = new S3Client({});
194
+ const s3Client = this.getClient();
155
195
  const command = new ListObjectsV2Command({
156
196
  Bucket,
157
197
  Prefix: `${Prefix}/`,
@@ -182,7 +222,7 @@ Worker.prototype.listAll = async function ({ directory }) {
182
222
  let dir = directory;
183
223
  while (dir.slice(-1) === '/') dir = dir.slice(0, -1);
184
224
  const { Bucket, Key: Prefix } = getParts(dir);
185
- const s3Client = new S3Client({});
225
+ const s3Client = this.getClient();
186
226
  const files = [];
187
227
  let ContinuationToken = null;
188
228
  do {
@@ -211,7 +251,7 @@ Worker.prototype.listAll.metadata = {
211
251
  Worker.prototype.stat = async function ({ filename }) {
212
252
  if (!filename) throw new Error('filename is required');
213
253
 
214
- const s3Client = new S3Client({});
254
+ const s3Client = this.getClient();
215
255
  const { Bucket, Key } = getParts(filename);
216
256
  const command = new HeadObjectCommand({ Bucket, Key });
217
257
  const response = await s3Client.send(command);
package/index.js CHANGED
@@ -364,6 +364,10 @@ function getTimelineEntryUUID(inputObject, { defaults = {} } = {}) {
364
364
  // eslint-disable-next-line no-restricted-globals
365
365
  if (isNaN(ts)) throw new Error(`getTimelineEntryUUID got an invalid date:${o.ts || '<blank>'}`);
366
366
  const idString = `${ts.toISOString()}-${o.person_id}-${o.entry_type_id}-${o.source_code_id || 0}`;
367
+
368
+ if (!uuidIsValid(o.input_id)) {
369
+ throw new Error(`Invalid input_id:'${o.input_id}', type ${typeof o.input_id} -- should be a uuid`);
370
+ }
367
371
  // get a temp ID
368
372
  const uuid = uuidv5(idString, o.input_id);
369
373
  // Change out the ts to match the v7 sorting.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@engine9-io/input-tools",
3
- "version": "1.4.1",
3
+ "version": "1.5.0",
4
4
  "description": "Tools for dealing with Engine9 inputs",
5
5
  "main": "index.js",
6
6
  "scripts": {