@engine9/input-tools 2.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.prettierrc +7 -0
- package/ForEachEntry.js +194 -0
- package/LICENSE +674 -0
- package/README.md +6 -0
- package/ValidatingReadable.js +21 -0
- package/buildSamplePackets.js +13 -0
- package/eslint.config.mjs +17 -0
- package/file/FileUtilities.js +1076 -0
- package/file/GoogleDrive.js +39 -0
- package/file/Parquet.js +137 -0
- package/file/R2.js +32 -0
- package/file/S3.js +329 -0
- package/file/tools.js +359 -0
- package/index.js +426 -0
- package/package.json +54 -0
- package/skills/transaction-mapping/SKILL.md +105 -0
- package/skills/transaction-mapping/reference.md +72 -0
- package/test/cli.js +9 -0
- package/test/file.js +23 -0
- package/test/processing/bigDataMessage.js +52 -0
- package/test/processing/forEach.js +53 -0
- package/test/processing/forEachResume.js +54 -0
- package/test/processing/message.js +40 -0
- package/test/processing/zip.js +21 -0
- package/test/sample/1000_message.packet.zip +0 -0
- package/test/sample/5_message.packet.zip +0 -0
- package/test/sample/fileWithHead.csv +3 -0
- package/test/sample/fileWithoutHead.csv +2 -0
- package/test/sample/message/1000_fake_people.csv +1001 -0
- package/test/sample/message/5_fake_people.csv +6 -0
- package/test/sample/message/message.json5 +41 -0
- package/test/uuid.js +20 -0
- package/timelineTypes.js +139 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { google } from "googleapis";
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
const fsp = fs.promises;
|
|
4
|
+
function Worker() { }
|
|
5
|
+
Worker.prototype.setAuth = async function () {
|
|
6
|
+
const keyFile = process.env.GOOGLE_APPLICATION_CREDENTIALS;
|
|
7
|
+
const settings = JSON.parse(await fsp.readFile(keyFile));
|
|
8
|
+
if (!settings.subject_to_impersonate)
|
|
9
|
+
throw new Error(`You should include subject_to_impersonate in file ${keyFile}`);
|
|
10
|
+
const auth = new google.auth.GoogleAuth({
|
|
11
|
+
clientOptions: {
|
|
12
|
+
subject: settings.subject_to_impersonate,
|
|
13
|
+
},
|
|
14
|
+
keyFile,
|
|
15
|
+
scopes: ['https://www.googleapis.com/auth/drive'],
|
|
16
|
+
});
|
|
17
|
+
google.options({
|
|
18
|
+
auth,
|
|
19
|
+
});
|
|
20
|
+
};
|
|
21
|
+
Worker.prototype.list = async function ({ path }) {
|
|
22
|
+
await this.setAuth();
|
|
23
|
+
const drive = google.drive({ version: 'v3' });
|
|
24
|
+
const folderId = path;
|
|
25
|
+
const q = `'${folderId}' in parents and trashed=false`;
|
|
26
|
+
const raw = await drive.files.list({
|
|
27
|
+
pageSize: 150,
|
|
28
|
+
q,
|
|
29
|
+
supportsAllDrives: true, // include share drives as well
|
|
30
|
+
includeItemsFromAllDrives: true,
|
|
31
|
+
});
|
|
32
|
+
return raw.data?.files;
|
|
33
|
+
};
|
|
34
|
+
Worker.prototype.list.metadata = {
|
|
35
|
+
options: {
|
|
36
|
+
path: {},
|
|
37
|
+
},
|
|
38
|
+
};
|
|
39
|
+
export default Worker;
|
package/file/Parquet.js
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import parquet from "@dsnp/parquetjs";
|
|
2
|
+
import nodestream from "node:stream";
|
|
3
|
+
import debug$0 from "debug";
|
|
4
|
+
import clientS3 from "@aws-sdk/client-s3";
|
|
5
|
+
import FileWorker from "./FileUtilities.js";
|
|
6
|
+
const { Readable } = nodestream;
|
|
7
|
+
const debug = debug$0('Parquet');
|
|
8
|
+
const { S3Client } = clientS3;
|
|
9
|
+
function Worker() { }
|
|
10
|
+
async function getReader(options) {
|
|
11
|
+
const { filename } = options;
|
|
12
|
+
if (!filename)
|
|
13
|
+
throw new Error('filename is required');
|
|
14
|
+
if (filename.indexOf('s3://') === 0) {
|
|
15
|
+
const client = new S3Client({});
|
|
16
|
+
const parts = filename.split('/');
|
|
17
|
+
return parquet.ParquetReader.openS3(client, {
|
|
18
|
+
Bucket: parts[2],
|
|
19
|
+
Key: parts.slice(3).join('/')
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
return parquet.ParquetReader.openFile(filename);
|
|
23
|
+
}
|
|
24
|
+
Worker.prototype.meta = async function (options) {
|
|
25
|
+
const reader = await getReader(options);
|
|
26
|
+
const schema = reader.getSchema();
|
|
27
|
+
return {
|
|
28
|
+
//stored as a buffer
|
|
29
|
+
schema,
|
|
30
|
+
records: parseInt(reader.metadata?.num_rows?.toString(), 10)
|
|
31
|
+
};
|
|
32
|
+
// getMetadata();
|
|
33
|
+
};
|
|
34
|
+
Worker.prototype.meta.metadata = {
|
|
35
|
+
options: {
|
|
36
|
+
path: {}
|
|
37
|
+
}
|
|
38
|
+
};
|
|
39
|
+
Worker.prototype.schema = async function (options) {
|
|
40
|
+
const reader = await getReader(options);
|
|
41
|
+
return reader.getSchema();
|
|
42
|
+
};
|
|
43
|
+
Worker.prototype.schema.metadata = {
|
|
44
|
+
options: {
|
|
45
|
+
path: {}
|
|
46
|
+
}
|
|
47
|
+
};
|
|
48
|
+
function cleanColumnName(name) {
|
|
49
|
+
return name.toLowerCase().replace(/[^a-z0-9_]/g, '_');
|
|
50
|
+
}
|
|
51
|
+
Worker.prototype.stream = async function (options) {
|
|
52
|
+
const reader = await getReader(options);
|
|
53
|
+
let columns;
|
|
54
|
+
if (options.columns) {
|
|
55
|
+
const { fieldList } = await this.schema(options);
|
|
56
|
+
columns = [];
|
|
57
|
+
let requestedColumns = options.columns;
|
|
58
|
+
if (typeof options.columns === 'string')
|
|
59
|
+
requestedColumns = options.columns.split(',').map((d) => d.trim());
|
|
60
|
+
else
|
|
61
|
+
requestedColumns = options.columns.map((d) => (d.name ? d.name.trim() : d.trim()));
|
|
62
|
+
requestedColumns.forEach((c) => {
|
|
63
|
+
const matchingCols = fieldList
|
|
64
|
+
.filter((f) => f.name === c || cleanColumnName(f.name) === cleanColumnName(c))
|
|
65
|
+
.map((f) => f.name);
|
|
66
|
+
columns = columns.concat(matchingCols);
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
let limit = 0;
|
|
70
|
+
if (parseInt(options.limit, 10) === options.limit)
|
|
71
|
+
limit = parseInt(options.limit, 10);
|
|
72
|
+
// create a new cursor
|
|
73
|
+
debug(`Reading parquet file ${options.filename} with columns ${columns?.join(',')} and limit ${limit}`);
|
|
74
|
+
const cursor = reader.getCursor(columns);
|
|
75
|
+
let counter = 0;
|
|
76
|
+
const start = new Date().getTime();
|
|
77
|
+
const stream = new Readable({
|
|
78
|
+
objectMode: true,
|
|
79
|
+
async read() {
|
|
80
|
+
const token = await cursor.next();
|
|
81
|
+
if (token) {
|
|
82
|
+
counter += 1;
|
|
83
|
+
if (limit && counter > limit) {
|
|
84
|
+
debug(`Reached limit of ${limit}, stopping`);
|
|
85
|
+
this.push(null);
|
|
86
|
+
await reader.close();
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
if (counter % 10000 === 0) {
|
|
90
|
+
let m = process.memoryUsage().heapTotal;
|
|
91
|
+
const end = new Date().getTime();
|
|
92
|
+
debug(`Read ${counter} ${(counter * 1000) / (end - start)}/sec, Node reported memory usage: ${m / 1024 / 1024} MBs`);
|
|
93
|
+
}
|
|
94
|
+
this.push(token);
|
|
95
|
+
}
|
|
96
|
+
else {
|
|
97
|
+
await reader.close();
|
|
98
|
+
this.push(null);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
return { stream };
|
|
103
|
+
};
|
|
104
|
+
Worker.prototype.stream.metadata = {
|
|
105
|
+
options: {
|
|
106
|
+
path: {}
|
|
107
|
+
}
|
|
108
|
+
};
|
|
109
|
+
Worker.prototype.toFile = async function (options) {
|
|
110
|
+
const { stream } = await this.stream(options);
|
|
111
|
+
const fworker = new FileWorker(this);
|
|
112
|
+
return fworker.objectStreamToFile({ ...options, stream });
|
|
113
|
+
};
|
|
114
|
+
Worker.prototype.toFile.metadata = {
|
|
115
|
+
options: {
|
|
116
|
+
path: {}
|
|
117
|
+
}
|
|
118
|
+
};
|
|
119
|
+
Worker.prototype.stats = async function (options) {
|
|
120
|
+
const reader = await getReader(options);
|
|
121
|
+
const schema = reader.getSchema();
|
|
122
|
+
const fileMetadata = reader.getFileMetaData();
|
|
123
|
+
const rowGroups = reader.getRowGroups();
|
|
124
|
+
// const reader = await parquet.ParquetReader.openS3(client, getParams(options));
|
|
125
|
+
// return reader.getSchema();
|
|
126
|
+
return {
|
|
127
|
+
schema,
|
|
128
|
+
fileMetadata,
|
|
129
|
+
rowGroups
|
|
130
|
+
};
|
|
131
|
+
};
|
|
132
|
+
Worker.prototype.stats.metadata = {
|
|
133
|
+
options: {
|
|
134
|
+
path: {}
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
export default Worker;
|
package/file/R2.js
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import util from "node:util";
|
|
2
|
+
import clientS3 from "@aws-sdk/client-s3";
|
|
3
|
+
import S3 from "./S3.js";
|
|
4
|
+
const { S3Client, } = clientS3;
|
|
5
|
+
function R2(worker) {
|
|
6
|
+
S3.call(this, worker);
|
|
7
|
+
this.prefix = 'r2';
|
|
8
|
+
}
|
|
9
|
+
util.inherits(R2, S3);
|
|
10
|
+
R2.prototype.getClient = function () {
|
|
11
|
+
const missing = ['CLOUDFLARE_R2_ACCOUNT_ID', 'CLOUDFLARE_R2_ACCESS_KEY_ID', 'CLOUDFLARE_R2_SECRET_ACCESS_KEY']
|
|
12
|
+
.filter((r) => !process.env[r]);
|
|
13
|
+
if (missing.length > 0)
|
|
14
|
+
throw new Error(`Missing environment variables for Cloudflare access:${missing.join(',')}`);
|
|
15
|
+
const ACCOUNT_ID = process.env.CLOUDFLARE_R2_ACCOUNT_ID;
|
|
16
|
+
const ACCESS_KEY_ID = process.env.CLOUDFLARE_R2_ACCESS_KEY_ID;
|
|
17
|
+
const SECRET_ACCESS_KEY = process.env.CLOUDFLARE_R2_SECRET_ACCESS_KEY;
|
|
18
|
+
if (!this.client) {
|
|
19
|
+
this.client = new S3Client({
|
|
20
|
+
// R2 does not strictly require a region, but the SDK expects one. 'auto' works fine.
|
|
21
|
+
region: 'auto',
|
|
22
|
+
endpoint: `https://${ACCOUNT_ID}.r2.cloudflarestorage.com`,
|
|
23
|
+
credentials: {
|
|
24
|
+
accessKeyId: ACCESS_KEY_ID,
|
|
25
|
+
secretAccessKey: SECRET_ACCESS_KEY,
|
|
26
|
+
},
|
|
27
|
+
forcePathStyle: true, // Important for R2 compatibility
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
return this.client;
|
|
31
|
+
};
|
|
32
|
+
export default R2;
|
package/file/S3.js
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
import debug$0 from "debug";
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import withDb from "mime-type/with-db";
|
|
4
|
+
import clientS3 from "@aws-sdk/client-s3";
|
|
5
|
+
import { getTempFilename, relativeDate } from "./tools.js";
|
|
6
|
+
const debug = debug$0('@engine9-io/input/S3');
|
|
7
|
+
const { mimeType: mime } = withDb;
|
|
8
|
+
const { S3Client, CopyObjectCommand, DeleteObjectCommand, GetObjectCommand, HeadObjectCommand, GetObjectAttributesCommand, PutObjectCommand, ListObjectsV2Command } = clientS3;
|
|
9
|
+
function Worker() {
|
|
10
|
+
this.prefix = 's3';
|
|
11
|
+
}
|
|
12
|
+
function getParts(filename) {
|
|
13
|
+
if (!filename)
|
|
14
|
+
throw new Error(`Invalid filename: ${filename}`);
|
|
15
|
+
if (!filename.startsWith('r2://') && !filename.startsWith('s3://')) {
|
|
16
|
+
throw new Error(`Invalid filename, must start with r2:// or s3://: ${filename}`);
|
|
17
|
+
}
|
|
18
|
+
const parts = filename.split('/');
|
|
19
|
+
const Bucket = parts[2];
|
|
20
|
+
const Key = parts.slice(3).join('/');
|
|
21
|
+
return { Bucket, Key };
|
|
22
|
+
}
|
|
23
|
+
Worker.prototype.getClient = function () {
|
|
24
|
+
if (!this.client)
|
|
25
|
+
this.client = new S3Client({});
|
|
26
|
+
return this.client;
|
|
27
|
+
};
|
|
28
|
+
Worker.prototype.getMetadata = async function ({ filename }) {
|
|
29
|
+
const s3Client = this.getClient();
|
|
30
|
+
const { Bucket, Key } = getParts(filename);
|
|
31
|
+
const resp = await s3Client.send(new GetObjectAttributesCommand({
|
|
32
|
+
Bucket,
|
|
33
|
+
Key,
|
|
34
|
+
ObjectAttributes: ['ETag', 'Checksum', 'ObjectParts', 'StorageClass', 'ObjectSize']
|
|
35
|
+
}));
|
|
36
|
+
return resp;
|
|
37
|
+
};
|
|
38
|
+
Worker.prototype.getMetadata.metadata = {
|
|
39
|
+
options: {
|
|
40
|
+
filename: {}
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
Worker.prototype.stream = async function ({ filename }) {
|
|
44
|
+
const s3Client = this.getClient();
|
|
45
|
+
const { Bucket, Key } = getParts(filename);
|
|
46
|
+
const command = new GetObjectCommand({ Bucket, Key });
|
|
47
|
+
try {
|
|
48
|
+
debug(`Streaming file ${Key}`);
|
|
49
|
+
const response = await s3Client.send(command);
|
|
50
|
+
return { stream: response.Body };
|
|
51
|
+
}
|
|
52
|
+
catch (e) {
|
|
53
|
+
debug(`Could not stream filename:${filename}`);
|
|
54
|
+
throw e;
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
Worker.prototype.stream.metadata = {
|
|
58
|
+
options: {
|
|
59
|
+
filename: {}
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
Worker.prototype.copy = async function ({ filename, target }) {
|
|
63
|
+
if (filename.startsWith('s3://') || filename.startsWith('r2://')) {
|
|
64
|
+
//we're fine
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
throw new Error('Cowardly not copying a file not from s3 -- use put instead');
|
|
68
|
+
}
|
|
69
|
+
const s3Client = this.getClient();
|
|
70
|
+
const { Bucket, Key } = getParts(target);
|
|
71
|
+
debug(`Copying ${filename} to ${JSON.stringify({ Bucket, Key })}}`);
|
|
72
|
+
const command = new CopyObjectCommand({
|
|
73
|
+
CopySource: filename.slice(4), // remove the s3:/
|
|
74
|
+
Bucket,
|
|
75
|
+
Key
|
|
76
|
+
});
|
|
77
|
+
return s3Client.send(command);
|
|
78
|
+
};
|
|
79
|
+
Worker.prototype.copy.metadata = {
|
|
80
|
+
options: {
|
|
81
|
+
filename: {},
|
|
82
|
+
target: {}
|
|
83
|
+
}
|
|
84
|
+
};
|
|
85
|
+
Worker.prototype.move = async function ({ filename, target }) {
|
|
86
|
+
await this.copy({ filename, target });
|
|
87
|
+
await this.remove({ filename });
|
|
88
|
+
return { filename: target };
|
|
89
|
+
};
|
|
90
|
+
Worker.prototype.move.metadata = {
|
|
91
|
+
options: {
|
|
92
|
+
filename: {},
|
|
93
|
+
target: {}
|
|
94
|
+
}
|
|
95
|
+
};
|
|
96
|
+
Worker.prototype.remove = async function ({ filename }) {
|
|
97
|
+
const s3Client = this.getClient();
|
|
98
|
+
const { Bucket, Key } = getParts(filename);
|
|
99
|
+
const command = new DeleteObjectCommand({ Bucket, Key });
|
|
100
|
+
return s3Client.send(command);
|
|
101
|
+
};
|
|
102
|
+
Worker.prototype.remove.metadata = {
|
|
103
|
+
options: {
|
|
104
|
+
filename: {}
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
Worker.prototype.download = async function ({ filename }) {
|
|
108
|
+
const file = filename.split('/').pop();
|
|
109
|
+
const localPath = await getTempFilename({ targetFilename: file });
|
|
110
|
+
const s3Client = this.getClient();
|
|
111
|
+
const { Bucket, Key } = getParts(filename);
|
|
112
|
+
const command = new GetObjectCommand({ Bucket, Key });
|
|
113
|
+
debug(`Downloading ${file} to ${localPath}`);
|
|
114
|
+
const response = await s3Client.send(command);
|
|
115
|
+
const fileStream = fs.createWriteStream(localPath);
|
|
116
|
+
response.Body.pipe(fileStream);
|
|
117
|
+
return new Promise((resolve, reject) => {
|
|
118
|
+
fileStream.on('finish', async () => {
|
|
119
|
+
const { size } = await fs.promises.stat(localPath);
|
|
120
|
+
resolve({ size, filename: localPath });
|
|
121
|
+
});
|
|
122
|
+
fileStream.on('error', reject);
|
|
123
|
+
});
|
|
124
|
+
};
|
|
125
|
+
Worker.prototype.download.metadata = {
|
|
126
|
+
options: {
|
|
127
|
+
filename: {}
|
|
128
|
+
}
|
|
129
|
+
};
|
|
130
|
+
Worker.prototype.put = async function (options) {
|
|
131
|
+
const { filename, directory } = options;
|
|
132
|
+
if (!filename)
|
|
133
|
+
throw new Error('Local filename required');
|
|
134
|
+
if (directory?.indexOf('s3://') !== 0 && directory?.indexOf('r2://') !== 0)
|
|
135
|
+
throw new Error(`directory path must start with s3:// or r2://, is ${directory}`);
|
|
136
|
+
const file = options.file || filename.split('/').pop();
|
|
137
|
+
const parts = directory.split('/');
|
|
138
|
+
const Bucket = parts[2];
|
|
139
|
+
const Key = parts.slice(3).filter(Boolean).concat(file).join('/');
|
|
140
|
+
const Body = fs.createReadStream(filename);
|
|
141
|
+
const ContentType = mime.lookup(file);
|
|
142
|
+
debug(`Putting ${filename} to ${JSON.stringify({ Bucket, Key, ContentType })}}`);
|
|
143
|
+
const s3Client = this.getClient();
|
|
144
|
+
const command = new PutObjectCommand({
|
|
145
|
+
Bucket,
|
|
146
|
+
Key,
|
|
147
|
+
Body,
|
|
148
|
+
ContentType
|
|
149
|
+
});
|
|
150
|
+
return s3Client.send(command);
|
|
151
|
+
};
|
|
152
|
+
Worker.prototype.put.metadata = {
|
|
153
|
+
options: {
|
|
154
|
+
filename: {},
|
|
155
|
+
directory: { description: 'Directory to put file, e.g. s3://foo-bar/dir/xyz' },
|
|
156
|
+
file: { description: 'Name of file, defaults to the filename' }
|
|
157
|
+
}
|
|
158
|
+
};
|
|
159
|
+
Worker.prototype.write = async function (options) {
|
|
160
|
+
const { directory, file, content } = options;
|
|
161
|
+
if (!directory?.indexOf('s3://') === 0)
|
|
162
|
+
throw new Error('directory must start with s3://');
|
|
163
|
+
const parts = directory.split('/');
|
|
164
|
+
const Bucket = parts[2];
|
|
165
|
+
const Key = parts.slice(3).filter(Boolean).concat(file).join('/');
|
|
166
|
+
const Body = content;
|
|
167
|
+
debug(`Writing content of length ${content.length} to ${JSON.stringify({ Bucket, Key })}}`);
|
|
168
|
+
const s3Client = this.getClient();
|
|
169
|
+
const ContentType = mime.lookup(file);
|
|
170
|
+
const command = new PutObjectCommand({
|
|
171
|
+
Bucket,
|
|
172
|
+
Key,
|
|
173
|
+
Body,
|
|
174
|
+
ContentType
|
|
175
|
+
});
|
|
176
|
+
return s3Client.send(command);
|
|
177
|
+
};
|
|
178
|
+
Worker.prototype.write.metadata = {
|
|
179
|
+
options: {
|
|
180
|
+
directory: { description: 'Directory to put file, e.g. s3://foo-bar/dir/xyz' },
|
|
181
|
+
file: { description: 'Name of file, defaults to the filename' },
|
|
182
|
+
content: { description: 'Contents of file' }
|
|
183
|
+
}
|
|
184
|
+
};
|
|
185
|
+
Worker.prototype.list = async function ({ directory, start, end, raw }) {
|
|
186
|
+
if (!directory)
|
|
187
|
+
throw new Error('directory is required');
|
|
188
|
+
let dir = directory;
|
|
189
|
+
while (dir.slice(-1) === '/')
|
|
190
|
+
dir = dir.slice(0, -1);
|
|
191
|
+
const { Bucket, Key: Prefix } = getParts(dir);
|
|
192
|
+
const s3Client = this.getClient();
|
|
193
|
+
const command = new ListObjectsV2Command({
|
|
194
|
+
Bucket,
|
|
195
|
+
Prefix: `${Prefix}/`,
|
|
196
|
+
Delimiter: '/'
|
|
197
|
+
});
|
|
198
|
+
const { Contents: files, CommonPrefixes } = await s3Client.send(command);
|
|
199
|
+
if (raw)
|
|
200
|
+
return files;
|
|
201
|
+
// debug('Prefixes:', { CommonPrefixes });
|
|
202
|
+
const output = []
|
|
203
|
+
.concat((CommonPrefixes || []).map((f) => ({
|
|
204
|
+
name: f.Prefix.slice(Prefix.length + 1, -1),
|
|
205
|
+
type: 'directory'
|
|
206
|
+
})))
|
|
207
|
+
.concat((files || [])
|
|
208
|
+
.filter(({ LastModified }) => {
|
|
209
|
+
if (start && new Date(LastModified) < start) {
|
|
210
|
+
return false;
|
|
211
|
+
}
|
|
212
|
+
else if (end && new Date(LastModified) > end) {
|
|
213
|
+
return false;
|
|
214
|
+
}
|
|
215
|
+
else {
|
|
216
|
+
return true;
|
|
217
|
+
}
|
|
218
|
+
})
|
|
219
|
+
.map(({ Key, Size, LastModified }) => ({
|
|
220
|
+
name: Key.slice(Prefix.length + 1),
|
|
221
|
+
type: 'file',
|
|
222
|
+
size: Size,
|
|
223
|
+
modifiedAt: new Date(LastModified).toISOString()
|
|
224
|
+
})));
|
|
225
|
+
return output;
|
|
226
|
+
};
|
|
227
|
+
Worker.prototype.list.metadata = {
|
|
228
|
+
options: {
|
|
229
|
+
directory: { required: true }
|
|
230
|
+
}
|
|
231
|
+
};
|
|
232
|
+
/* List everything with the prefix */
|
|
233
|
+
Worker.prototype.listAll = async function (options) {
|
|
234
|
+
const { directory } = options;
|
|
235
|
+
if (!directory)
|
|
236
|
+
throw new Error('directory is required');
|
|
237
|
+
let dir = directory;
|
|
238
|
+
const start = options.start && relativeDate(options.start);
|
|
239
|
+
const end = options.end && relativeDate(options.end);
|
|
240
|
+
while (dir.slice(-1) === '/')
|
|
241
|
+
dir = dir.slice(0, -1);
|
|
242
|
+
const { Bucket, Key } = getParts(dir);
|
|
243
|
+
const s3Client = this.getClient();
|
|
244
|
+
const files = [];
|
|
245
|
+
let ContinuationToken = null;
|
|
246
|
+
let Prefix = null;
|
|
247
|
+
if (Key)
|
|
248
|
+
Prefix = `${Key}/`;
|
|
249
|
+
do {
|
|
250
|
+
const command = new ListObjectsV2Command({
|
|
251
|
+
Bucket,
|
|
252
|
+
Prefix,
|
|
253
|
+
ContinuationToken
|
|
254
|
+
// Delimiter: '/',
|
|
255
|
+
});
|
|
256
|
+
debug(`Sending List command with prefix ${Prefix} with ContinuationToken ${ContinuationToken}`);
|
|
257
|
+
const result = await s3Client.send(command);
|
|
258
|
+
const newFiles = result.Contents?.filter(({ LastModified }) => {
|
|
259
|
+
if (start && new Date(LastModified) < start) {
|
|
260
|
+
return false;
|
|
261
|
+
}
|
|
262
|
+
else if (end && new Date(LastModified) > end) {
|
|
263
|
+
return false;
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
return true;
|
|
267
|
+
}
|
|
268
|
+
})?.map((d) => `${this.prefix}://${Bucket}/${d.Key}`) || [];
|
|
269
|
+
debug(`Retrieved ${newFiles.length} new files, total ${files.length},sample ${newFiles.slice(0, 3).join(',')}`);
|
|
270
|
+
files.push(...newFiles);
|
|
271
|
+
ContinuationToken = result.NextContinuationToken;
|
|
272
|
+
} while (ContinuationToken);
|
|
273
|
+
return files;
|
|
274
|
+
};
|
|
275
|
+
Worker.prototype.listAll.metadata = {
|
|
276
|
+
options: {
|
|
277
|
+
directory: { required: true }
|
|
278
|
+
}
|
|
279
|
+
};
|
|
280
|
+
Worker.prototype.moveAll = async function ({ directory, targetDirectory }) {
|
|
281
|
+
if (!directory || !targetDirectory)
|
|
282
|
+
throw new Error('directory and targetDirectory required');
|
|
283
|
+
const files = await this.listAll({ directory });
|
|
284
|
+
const configs = files.map((d) => ({
|
|
285
|
+
filename: d,
|
|
286
|
+
target: d.replace(directory, targetDirectory)
|
|
287
|
+
}));
|
|
288
|
+
const pLimit = await import('p-limit');
|
|
289
|
+
const limitedMethod = pLimit.default(10);
|
|
290
|
+
return Promise.all(configs.map(({ filename, target }) => limitedMethod(async () => this.move({ filename, target }))));
|
|
291
|
+
};
|
|
292
|
+
Worker.prototype.moveAll.metadata = {
|
|
293
|
+
options: {
|
|
294
|
+
directory: { required: true },
|
|
295
|
+
targetDirectory: { required: true }
|
|
296
|
+
}
|
|
297
|
+
};
|
|
298
|
+
Worker.prototype.stat = async function ({ filename }) {
|
|
299
|
+
if (!filename)
|
|
300
|
+
throw new Error('filename is required');
|
|
301
|
+
const s3Client = this.getClient();
|
|
302
|
+
const { Bucket, Key } = getParts(filename);
|
|
303
|
+
const command = new HeadObjectCommand({ Bucket, Key });
|
|
304
|
+
const response = await s3Client.send(command);
|
|
305
|
+
const {
|
|
306
|
+
// "AcceptRanges": "bytes",
|
|
307
|
+
ContentLength, // : "3191",
|
|
308
|
+
ContentType, // : "image/jpeg",
|
|
309
|
+
// ETag": "\"6805f2cfc46c0f04559748bb039d69ae\"",
|
|
310
|
+
LastModified // : "2016-12-15T01:19:41.000Z",
|
|
311
|
+
// Metadata": {},
|
|
312
|
+
// VersionId": "null"
|
|
313
|
+
} = response;
|
|
314
|
+
const modifiedAt = new Date(LastModified);
|
|
315
|
+
const createdAt = modifiedAt; // Same for S3
|
|
316
|
+
const size = parseInt(ContentLength, 10);
|
|
317
|
+
return {
|
|
318
|
+
createdAt,
|
|
319
|
+
modifiedAt,
|
|
320
|
+
contentType: ContentType,
|
|
321
|
+
size
|
|
322
|
+
};
|
|
323
|
+
};
|
|
324
|
+
Worker.prototype.stat.metadata = {
|
|
325
|
+
options: {
|
|
326
|
+
filename: {}
|
|
327
|
+
}
|
|
328
|
+
};
|
|
329
|
+
export default Worker;
|