@cumulus/move-granules 9.8.0 → 10.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/index.js +12278 -9266
- package/dist/lambda.zip +0 -0
- package/dist/schemas/config.json +4 -0
- package/dist/schemas/input.json +34 -7
- package/dist/schemas/input.json.template +24 -0
- package/dist/schemas/output.json +56 -17
- package/dist/schemas/output.json.template +43 -0
- package/index.js +62 -46
- package/package.json +14 -10
- package/schemas/config.json +4 -0
- package/schemas/input.json +34 -7
- package/schemas/input.json.template +24 -0
- package/schemas/output.json +56 -17
- package/schemas/output.json.template +43 -0
package/dist/lambda.zip
CHANGED
|
Binary file
|
package/dist/schemas/config.json
CHANGED
|
@@ -71,6 +71,10 @@
|
|
|
71
71
|
"description": "Specifies how duplicate filenames should be handled. `error` will throw an error that, if not caught, will fail the task/workflow execution. `version` will add a suffix to the existing filename to avoid a clash.",
|
|
72
72
|
"enum": ["replace", "version", "skip", "error"],
|
|
73
73
|
"default": "error"
|
|
74
|
+
},
|
|
75
|
+
"s3MultipartChunksizeMb": {
|
|
76
|
+
"type": ["number", "null"],
|
|
77
|
+
"description": "S3 multipart upload chunk size in MB. If none is specified, the default default_s3_multipart_chunksize_mb is used."
|
|
74
78
|
}
|
|
75
79
|
}
|
|
76
80
|
}
|
package/dist/schemas/input.json
CHANGED
|
@@ -11,7 +11,10 @@
|
|
|
11
11
|
"description": "Array of all granules",
|
|
12
12
|
"items": {
|
|
13
13
|
"type": "object",
|
|
14
|
-
"required": [
|
|
14
|
+
"required": [
|
|
15
|
+
"granuleId",
|
|
16
|
+
"files"
|
|
17
|
+
],
|
|
15
18
|
"properties": {
|
|
16
19
|
"granuleId": {
|
|
17
20
|
"type": "string"
|
|
@@ -19,19 +22,43 @@
|
|
|
19
22
|
"files": {
|
|
20
23
|
"type": "array",
|
|
21
24
|
"items": {
|
|
25
|
+
"additionalProperties": false,
|
|
22
26
|
"type": "object",
|
|
23
|
-
"required": [
|
|
27
|
+
"required": [
|
|
28
|
+
"bucket",
|
|
29
|
+
"key"
|
|
30
|
+
],
|
|
24
31
|
"properties": {
|
|
25
|
-
"
|
|
32
|
+
"bucket": {
|
|
33
|
+
"description": "Bucket where file is archived in S3",
|
|
26
34
|
"type": "string"
|
|
27
35
|
},
|
|
28
|
-
"
|
|
36
|
+
"checksum": {
|
|
37
|
+
"description": "Checksum value for file",
|
|
29
38
|
"type": "string"
|
|
30
39
|
},
|
|
31
|
-
"
|
|
40
|
+
"checksumType": {
|
|
41
|
+
"description": "Type of checksum (e.g. md5, sha256, etc)",
|
|
32
42
|
"type": "string"
|
|
33
43
|
},
|
|
34
|
-
"
|
|
44
|
+
"fileName": {
|
|
45
|
+
"description": "Name of file (e.g. file.txt)",
|
|
46
|
+
"type": "string"
|
|
47
|
+
},
|
|
48
|
+
"key": {
|
|
49
|
+
"description": "S3 Key for archived file",
|
|
50
|
+
"type": "string"
|
|
51
|
+
},
|
|
52
|
+
"size": {
|
|
53
|
+
"description": "Size of file (in bytes)",
|
|
54
|
+
"type": "number"
|
|
55
|
+
},
|
|
56
|
+
"source": {
|
|
57
|
+
"description": "Source URI of the file from origin system (e.g. S3, FTP, HTTP)",
|
|
58
|
+
"type": "string"
|
|
59
|
+
},
|
|
60
|
+
"type": {
|
|
61
|
+
"description": "Type of file (e.g. data, metadata, browse)",
|
|
35
62
|
"type": "string"
|
|
36
63
|
}
|
|
37
64
|
}
|
|
@@ -41,4 +68,4 @@
|
|
|
41
68
|
}
|
|
42
69
|
}
|
|
43
70
|
}
|
|
44
|
-
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "MoveGranulesInput",
|
|
3
|
+
"description": "Describes the input expected by the move-granules task",
|
|
4
|
+
"type": "object",
|
|
5
|
+
"required": [
|
|
6
|
+
"granules"
|
|
7
|
+
],
|
|
8
|
+
"properties": {
|
|
9
|
+
"granules": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"description": "Array of all granules",
|
|
12
|
+
"items": {
|
|
13
|
+
"type": "object",
|
|
14
|
+
"required": ["granuleId", "files"],
|
|
15
|
+
"properties": {
|
|
16
|
+
"granuleId": {
|
|
17
|
+
"type": "string"
|
|
18
|
+
},
|
|
19
|
+
"files": "{{files}}"
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
package/dist/schemas/output.json
CHANGED
|
@@ -3,12 +3,40 @@
|
|
|
3
3
|
"description": "Describes the output produced by the move-granules task",
|
|
4
4
|
"type": "object",
|
|
5
5
|
"properties": {
|
|
6
|
+
"granuleDuplicates": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"additionalProperties": {
|
|
9
|
+
"files": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"items": {
|
|
12
|
+
"type": "object",
|
|
13
|
+
"required": [
|
|
14
|
+
"bucket",
|
|
15
|
+
"key"
|
|
16
|
+
],
|
|
17
|
+
"properties": {
|
|
18
|
+
"bucket": {
|
|
19
|
+
"description": "Bucket in-process file is being staged in in S3",
|
|
20
|
+
"type": "string"
|
|
21
|
+
},
|
|
22
|
+
"key": {
|
|
23
|
+
"description": "S3 Key for in-process file",
|
|
24
|
+
"type": "string"
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
},
|
|
6
31
|
"granules": {
|
|
7
32
|
"type": "array",
|
|
8
33
|
"description": "Array of all granules",
|
|
9
34
|
"items": {
|
|
10
35
|
"type": "object",
|
|
11
|
-
"required": [
|
|
36
|
+
"required": [
|
|
37
|
+
"granuleId",
|
|
38
|
+
"files"
|
|
39
|
+
],
|
|
12
40
|
"properties": {
|
|
13
41
|
"granuleId": {
|
|
14
42
|
"type": "string"
|
|
@@ -16,33 +44,44 @@
|
|
|
16
44
|
"files": {
|
|
17
45
|
"type": "array",
|
|
18
46
|
"items": {
|
|
47
|
+
"additionalProperties": false,
|
|
19
48
|
"type": "object",
|
|
20
|
-
"required": [
|
|
49
|
+
"required": [
|
|
50
|
+
"bucket",
|
|
51
|
+
"key"
|
|
52
|
+
],
|
|
21
53
|
"properties": {
|
|
22
|
-
"
|
|
54
|
+
"bucket": {
|
|
55
|
+
"description": "Bucket where file is archived in S3",
|
|
23
56
|
"type": "string"
|
|
24
57
|
},
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
58
|
+
"checksum": {
|
|
59
|
+
"description": "Checksum value for file",
|
|
60
|
+
"type": "string"
|
|
28
61
|
},
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
62
|
+
"checksumType": {
|
|
63
|
+
"description": "Type of checksum (e.g. md5, sha256, etc)",
|
|
64
|
+
"type": "string"
|
|
32
65
|
},
|
|
33
|
-
"
|
|
66
|
+
"fileName": {
|
|
67
|
+
"description": "Name of file (e.g. file.txt)",
|
|
34
68
|
"type": "string"
|
|
35
69
|
},
|
|
36
|
-
"
|
|
70
|
+
"key": {
|
|
71
|
+
"description": "S3 Key for archived file",
|
|
37
72
|
"type": "string"
|
|
38
73
|
},
|
|
39
|
-
"
|
|
40
|
-
"description": "
|
|
74
|
+
"size": {
|
|
75
|
+
"description": "Size of file (in bytes)",
|
|
76
|
+
"type": "number"
|
|
77
|
+
},
|
|
78
|
+
"source": {
|
|
79
|
+
"description": "Source URI of the file from origin system (e.g. S3, FTP, HTTP)",
|
|
41
80
|
"type": "string"
|
|
42
81
|
},
|
|
43
|
-
"
|
|
44
|
-
"description": "
|
|
45
|
-
"type": "
|
|
82
|
+
"type": {
|
|
83
|
+
"description": "Type of file (e.g. data, metadata, browse)",
|
|
84
|
+
"type": "string"
|
|
46
85
|
}
|
|
47
86
|
}
|
|
48
87
|
}
|
|
@@ -51,4 +90,4 @@
|
|
|
51
90
|
}
|
|
52
91
|
}
|
|
53
92
|
}
|
|
54
|
-
}
|
|
93
|
+
}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "MoveGranulesOutput",
|
|
3
|
+
"description": "Describes the output produced by the move-granules task",
|
|
4
|
+
"type": "object",
|
|
5
|
+
"properties": {
|
|
6
|
+
"granuleDuplicates": {
|
|
7
|
+
"type": "object",
|
|
8
|
+
"additionalProperties": {
|
|
9
|
+
"files": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"items": {
|
|
12
|
+
"type": "object",
|
|
13
|
+
"required": ["bucket", "key"],
|
|
14
|
+
"properties": {
|
|
15
|
+
"bucket": {
|
|
16
|
+
"description": "Bucket in-process file is being staged in in S3",
|
|
17
|
+
"type": "string"
|
|
18
|
+
},
|
|
19
|
+
"key": {
|
|
20
|
+
"description": "S3 Key for in-process file",
|
|
21
|
+
"type": "string"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"granules": {
|
|
29
|
+
"type": "array",
|
|
30
|
+
"description": "Array of all granules",
|
|
31
|
+
"items": {
|
|
32
|
+
"type": "object",
|
|
33
|
+
"required": ["granuleId", "files"],
|
|
34
|
+
"properties": {
|
|
35
|
+
"granuleId": {
|
|
36
|
+
"type": "string"
|
|
37
|
+
},
|
|
38
|
+
"files": "{{files}}"
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
package/index.js
CHANGED
|
@@ -7,13 +7,7 @@ const flatten = require('lodash/flatten');
|
|
|
7
7
|
const keyBy = require('lodash/keyBy');
|
|
8
8
|
const path = require('path');
|
|
9
9
|
|
|
10
|
-
const
|
|
11
|
-
buildS3Uri,
|
|
12
|
-
moveObject,
|
|
13
|
-
s3Join,
|
|
14
|
-
s3ObjectExists,
|
|
15
|
-
waitForObjectToExist,
|
|
16
|
-
} = require('@cumulus/aws-client/S3');
|
|
10
|
+
const S3 = require('@cumulus/aws-client/S3');
|
|
17
11
|
|
|
18
12
|
const { InvalidArgument } = require('@cumulus/errors');
|
|
19
13
|
|
|
@@ -25,6 +19,7 @@ const {
|
|
|
25
19
|
|
|
26
20
|
const {
|
|
27
21
|
isCMRFile,
|
|
22
|
+
isISOFile,
|
|
28
23
|
metadataObjectFromCMRFile,
|
|
29
24
|
granulesToCmrFileObjects,
|
|
30
25
|
} = require('@cumulus/cmrjs');
|
|
@@ -34,6 +29,25 @@ const BucketsConfig = require('@cumulus/common/BucketsConfig');
|
|
|
34
29
|
const { urlPathTemplate } = require('@cumulus/ingest/url-path-template');
|
|
35
30
|
const log = require('@cumulus/common/log');
|
|
36
31
|
|
|
32
|
+
const MB = 1024 * 1024;
|
|
33
|
+
|
|
34
|
+
function buildGranuleDuplicatesObject(movedGranulesByGranuleId) {
|
|
35
|
+
const duplicatesObject = {};
|
|
36
|
+
Object.keys(movedGranulesByGranuleId).forEach((k) => {
|
|
37
|
+
duplicatesObject[k] = {
|
|
38
|
+
files: movedGranulesByGranuleId[k].files.filter((file) => {
|
|
39
|
+
if (file.duplicate_found) {
|
|
40
|
+
// eslint-disable-next-line no-param-reassign
|
|
41
|
+
delete file.duplicate_found;
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
return false;
|
|
45
|
+
}),
|
|
46
|
+
};
|
|
47
|
+
});
|
|
48
|
+
return duplicatesObject;
|
|
49
|
+
}
|
|
50
|
+
|
|
37
51
|
/**
|
|
38
52
|
* Validates the file matched only one collection.file and has a valid bucket
|
|
39
53
|
* config.
|
|
@@ -73,7 +87,7 @@ function validateMatch(match, bucketsConfig, fileName, fileSpecs) {
|
|
|
73
87
|
*/
|
|
74
88
|
async function updateGranuleMetadata(granulesObject, collection, cmrFiles, bucketsConfig) {
|
|
75
89
|
const updatedGranules = {};
|
|
76
|
-
const cmrFileNames = cmrFiles.map((f) => path.basename(f.
|
|
90
|
+
const cmrFileNames = cmrFiles.map((f) => path.basename(f.key));
|
|
77
91
|
const fileSpecs = collection.files;
|
|
78
92
|
|
|
79
93
|
await Promise.all(Object.keys(granulesObject).map(async (granuleId) => {
|
|
@@ -81,35 +95,33 @@ async function updateGranuleMetadata(granulesObject, collection, cmrFiles, bucke
|
|
|
81
95
|
updatedGranules[granuleId] = { ...granulesObject[granuleId] };
|
|
82
96
|
|
|
83
97
|
const cmrFile = cmrFiles.find((f) => f.granuleId === granuleId);
|
|
84
|
-
const cmrMetadata = cmrFile ? await metadataObjectFromCMRFile(cmrFile.
|
|
98
|
+
const cmrMetadata = cmrFile ? await metadataObjectFromCMRFile(`s3://${cmrFile.bucket}/${cmrFile.key}`) : {};
|
|
85
99
|
|
|
86
100
|
granulesObject[granuleId].files.forEach((file) => {
|
|
87
101
|
const cmrFileTypeObject = {};
|
|
88
|
-
|
|
102
|
+
const fileName = path.basename(file.key);
|
|
103
|
+
if (cmrFileNames.includes(fileName) && !file.type) {
|
|
89
104
|
cmrFileTypeObject.type = 'metadata';
|
|
90
105
|
}
|
|
91
106
|
|
|
92
|
-
const match = fileSpecs.filter((cf) => unversionFilename(
|
|
93
|
-
validateMatch(match, bucketsConfig,
|
|
107
|
+
const match = fileSpecs.filter((cf) => unversionFilename(fileName).match(cf.regex));
|
|
108
|
+
validateMatch(match, bucketsConfig, fileName, fileSpecs);
|
|
94
109
|
|
|
95
|
-
const URLPathTemplate =
|
|
110
|
+
const URLPathTemplate = match[0].url_path || collection.url_path || '';
|
|
96
111
|
const urlPath = urlPathTemplate(URLPathTemplate, {
|
|
97
112
|
file,
|
|
98
113
|
granule: granulesObject[granuleId],
|
|
99
114
|
cmrMetadata,
|
|
100
115
|
});
|
|
101
116
|
const bucketName = bucketsConfig.nameByKey(match[0].bucket);
|
|
102
|
-
const
|
|
117
|
+
const updatedKey = S3.s3Join(urlPath, fileName);
|
|
103
118
|
|
|
104
119
|
updatedFiles.push({
|
|
105
|
-
...file,
|
|
120
|
+
...file,
|
|
106
121
|
...cmrFileTypeObject, // Add type if the file is a CMR file
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
filename: `s3://${s3Join(bucketName, filepath)}`,
|
|
111
|
-
url_path: URLPathTemplate,
|
|
112
|
-
},
|
|
122
|
+
bucket: bucketName,
|
|
123
|
+
sourceKey: file.key,
|
|
124
|
+
key: updatedKey,
|
|
113
125
|
});
|
|
114
126
|
});
|
|
115
127
|
updatedGranules[granuleId].files = [...updatedFiles];
|
|
@@ -124,37 +136,35 @@ async function updateGranuleMetadata(granulesObject, collection, cmrFiles, bucke
|
|
|
124
136
|
* @param {Object} file - granule file to be moved
|
|
125
137
|
* @param {string} sourceBucket - source bucket location of files
|
|
126
138
|
* @param {string} duplicateHandling - how to handle duplicate files
|
|
127
|
-
* @param {BucketsConfig} bucketsConfig - BucketsConfig instance
|
|
128
139
|
* @param {boolean} markDuplicates - Override to handle cmr metadata files that
|
|
129
140
|
* shouldn't be marked as duplicates
|
|
141
|
+
* @param {number} s3MultipartChunksizeMb - S3 multipart upload chunk size in MB
|
|
130
142
|
* @returns {Array<Object>} returns the file moved and the renamed existing duplicates if any
|
|
131
143
|
*/
|
|
132
144
|
async function moveFileRequest(
|
|
133
145
|
file,
|
|
134
146
|
sourceBucket,
|
|
135
147
|
duplicateHandling,
|
|
136
|
-
|
|
137
|
-
|
|
148
|
+
markDuplicates = true,
|
|
149
|
+
s3MultipartChunksizeMb
|
|
138
150
|
) {
|
|
139
|
-
const fileStagingDir = file.fileStagingDir || 'file-staging';
|
|
140
151
|
const source = {
|
|
141
152
|
Bucket: sourceBucket,
|
|
142
|
-
Key:
|
|
153
|
+
Key: file.sourceKey,
|
|
143
154
|
};
|
|
144
155
|
const target = {
|
|
145
156
|
Bucket: file.bucket,
|
|
146
|
-
Key: file.
|
|
157
|
+
Key: file.key,
|
|
147
158
|
};
|
|
148
159
|
|
|
149
160
|
// Due to S3's eventual consistency model, we need to make sure that the
|
|
150
161
|
// source object is available in S3.
|
|
151
|
-
await waitForObjectToExist({ bucket: source.Bucket, key: source.Key });
|
|
152
|
-
|
|
162
|
+
await S3.waitForObjectToExist({ bucket: source.Bucket, key: source.Key });
|
|
153
163
|
// the file moved to destination
|
|
154
164
|
const fileMoved = { ...file };
|
|
155
|
-
delete fileMoved.
|
|
165
|
+
delete fileMoved.sourceKey;
|
|
156
166
|
|
|
157
|
-
const s3ObjAlreadyExists = await s3ObjectExists(target);
|
|
167
|
+
const s3ObjAlreadyExists = await S3.s3ObjectExists(target);
|
|
158
168
|
log.debug(`file ${target.Key} exists in ${target.Bucket}: ${s3ObjAlreadyExists}`);
|
|
159
169
|
|
|
160
170
|
let versionedFiles = [];
|
|
@@ -167,22 +177,21 @@ async function moveFileRequest(
|
|
|
167
177
|
duplicateHandling,
|
|
168
178
|
});
|
|
169
179
|
} else {
|
|
170
|
-
|
|
180
|
+
const chunkSize = s3MultipartChunksizeMb ? Number(s3MultipartChunksizeMb) * MB : undefined;
|
|
181
|
+
await S3.moveObject({
|
|
171
182
|
sourceBucket: source.Bucket,
|
|
172
183
|
sourceKey: source.Key,
|
|
173
184
|
destinationBucket: target.Bucket,
|
|
174
185
|
destinationKey: target.Key,
|
|
175
186
|
copyTags: true,
|
|
187
|
+
chunkSize,
|
|
176
188
|
});
|
|
177
189
|
}
|
|
178
190
|
|
|
179
191
|
const renamedFiles = versionedFiles.map((f) => ({
|
|
180
192
|
bucket: f.Bucket,
|
|
181
|
-
|
|
182
|
-
filename: buildS3Uri(f.Bucket, f.Key),
|
|
183
|
-
filepath: f.Key,
|
|
193
|
+
key: f.Key,
|
|
184
194
|
size: f.size,
|
|
185
|
-
url_path: file.url_path,
|
|
186
195
|
}));
|
|
187
196
|
|
|
188
197
|
// return both file moved and renamed files
|
|
@@ -190,34 +199,32 @@ async function moveFileRequest(
|
|
|
190
199
|
}
|
|
191
200
|
|
|
192
201
|
/**
|
|
193
|
-
* Move all files in a collection of granules from staging location
|
|
202
|
+
* Move all files in a collection of granules from staging location to final location,
|
|
194
203
|
* and update granule files to include renamed files if any.
|
|
195
204
|
*
|
|
196
205
|
* @param {Object} granulesObject - an object of the granules where the key is the granuleId
|
|
197
206
|
* @param {string} sourceBucket - source bucket location of files
|
|
198
207
|
* @param {string} duplicateHandling - how to handle duplicate files
|
|
199
|
-
* @param {
|
|
208
|
+
* @param {number} s3MultipartChunksizeMb - S3 multipart upload chunk size in MB
|
|
200
209
|
* @returns {Object} the object with updated granules
|
|
201
210
|
*/
|
|
202
211
|
async function moveFilesForAllGranules(
|
|
203
212
|
granulesObject,
|
|
204
213
|
sourceBucket,
|
|
205
214
|
duplicateHandling,
|
|
206
|
-
|
|
215
|
+
s3MultipartChunksizeMb
|
|
207
216
|
) {
|
|
208
217
|
const moveFileRequests = Object.keys(granulesObject).map(async (granuleKey) => {
|
|
209
218
|
const granule = granulesObject[granuleKey];
|
|
210
219
|
const filesToMove = granule.files.filter((file) => !isCMRFile(file));
|
|
211
220
|
const cmrFiles = granule.files.filter((file) => isCMRFile(file));
|
|
212
221
|
const filesMoved = await Promise.all(
|
|
213
|
-
filesToMove.map(
|
|
214
|
-
|
|
215
|
-
)
|
|
222
|
+
filesToMove.map((file) =>
|
|
223
|
+
moveFileRequest(file, sourceBucket, duplicateHandling, true, s3MultipartChunksizeMb))
|
|
216
224
|
);
|
|
217
|
-
const markDuplicates = false;
|
|
218
225
|
const cmrFilesMoved = await Promise.all(
|
|
219
226
|
cmrFiles.map(
|
|
220
|
-
(file) => moveFileRequest(file, sourceBucket, 'replace',
|
|
227
|
+
(file) => moveFileRequest(file, sourceBucket, 'replace', false)
|
|
221
228
|
)
|
|
222
229
|
);
|
|
223
230
|
granule.files = flatten(filesMoved).concat(flatten(cmrFilesMoved));
|
|
@@ -250,11 +257,17 @@ async function moveGranules(event) {
|
|
|
250
257
|
const bucketsConfig = new BucketsConfig(config.buckets);
|
|
251
258
|
|
|
252
259
|
const moveStagedFiles = get(config, 'moveStagedFiles', true);
|
|
260
|
+
const s3MultipartChunksizeMb = config.s3MultipartChunksizeMb
|
|
261
|
+
? config.s3MultipartChunksizeMb : process.env.default_s3_multipart_chunksize_mb;
|
|
253
262
|
|
|
254
263
|
const duplicateHandling = duplicateHandlingType(event);
|
|
255
264
|
|
|
265
|
+
log.debug(`moveGranules config duplicateHandling: ${duplicateHandling}, moveStagedFiles: ${moveStagedFiles}, s3MultipartChunksizeMb: ${s3MultipartChunksizeMb}`);
|
|
266
|
+
|
|
256
267
|
const granulesInput = event.input.granules;
|
|
257
|
-
|
|
268
|
+
|
|
269
|
+
const filterFunc = (fileobject) => isCMRFile(fileobject) || isISOFile(fileobject);
|
|
270
|
+
const cmrFiles = granulesToCmrFileObjects(granulesInput, filterFunc);
|
|
258
271
|
const granulesByGranuleId = keyBy(granulesInput, 'granuleId');
|
|
259
272
|
|
|
260
273
|
let movedGranulesByGranuleId;
|
|
@@ -269,13 +282,16 @@ async function moveGranules(event) {
|
|
|
269
282
|
|
|
270
283
|
// Move files from staging location to final location
|
|
271
284
|
movedGranulesByGranuleId = await moveFilesForAllGranules(
|
|
272
|
-
granulesToMove, config.bucket, duplicateHandling,
|
|
285
|
+
granulesToMove, config.bucket, duplicateHandling, s3MultipartChunksizeMb
|
|
273
286
|
);
|
|
274
287
|
} else {
|
|
275
288
|
movedGranulesByGranuleId = granulesByGranuleId;
|
|
276
289
|
}
|
|
277
290
|
|
|
291
|
+
const granuleDuplicates = buildGranuleDuplicatesObject(movedGranulesByGranuleId);
|
|
292
|
+
|
|
278
293
|
return {
|
|
294
|
+
granuleDuplicates,
|
|
279
295
|
granules: Object.values(movedGranulesByGranuleId),
|
|
280
296
|
};
|
|
281
297
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cumulus/move-granules",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "10.0.1",
|
|
4
4
|
"description": "Move granule files from staging to final location",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"directories": {
|
|
@@ -18,7 +18,8 @@
|
|
|
18
18
|
"node": ">=12.18.0"
|
|
19
19
|
},
|
|
20
20
|
"scripts": {
|
|
21
|
-
"
|
|
21
|
+
"generate-task-schemas": "npx generate-task-schemas . files",
|
|
22
|
+
"build": "rm -rf dist && mkdir dist && npm run generate-task-schemas && cp -R schemas dist/ && ../../node_modules/.bin/webpack",
|
|
22
23
|
"package": "npm run build && (cd dist && node ../../../bin/zip.js lambda.zip index.js schemas)",
|
|
23
24
|
"test": "../../node_modules/.bin/ava",
|
|
24
25
|
"test:coverage": "../../node_modules/.bin/nyc npm test",
|
|
@@ -36,14 +37,17 @@
|
|
|
36
37
|
"author": "Cumulus Authors",
|
|
37
38
|
"license": "Apache-2.0",
|
|
38
39
|
"dependencies": {
|
|
39
|
-
"@cumulus/aws-client": "
|
|
40
|
-
"@cumulus/cmrjs": "
|
|
41
|
-
"@cumulus/common": "
|
|
42
|
-
"@cumulus/cumulus-message-adapter-js": "2.0.
|
|
43
|
-
"@cumulus/distribution-utils": "
|
|
44
|
-
"@cumulus/errors": "
|
|
45
|
-
"@cumulus/ingest": "
|
|
40
|
+
"@cumulus/aws-client": "10.0.1",
|
|
41
|
+
"@cumulus/cmrjs": "10.0.1",
|
|
42
|
+
"@cumulus/common": "10.0.1",
|
|
43
|
+
"@cumulus/cumulus-message-adapter-js": "2.0.4",
|
|
44
|
+
"@cumulus/distribution-utils": "10.0.1",
|
|
45
|
+
"@cumulus/errors": "10.0.1",
|
|
46
|
+
"@cumulus/ingest": "10.0.1",
|
|
46
47
|
"lodash": "^4.17.20"
|
|
47
48
|
},
|
|
48
|
-
"
|
|
49
|
+
"devDependencies": {
|
|
50
|
+
"@cumulus/schemas": "10.0.1"
|
|
51
|
+
},
|
|
52
|
+
"gitHead": "49c3c88336838184f22f35fbce298c71cd269138"
|
|
49
53
|
}
|
package/schemas/config.json
CHANGED
|
@@ -71,6 +71,10 @@
|
|
|
71
71
|
"description": "Specifies how duplicate filenames should be handled. `error` will throw an error that, if not caught, will fail the task/workflow execution. `version` will add a suffix to the existing filename to avoid a clash.",
|
|
72
72
|
"enum": ["replace", "version", "skip", "error"],
|
|
73
73
|
"default": "error"
|
|
74
|
+
},
|
|
75
|
+
"s3MultipartChunksizeMb": {
|
|
76
|
+
"type": ["number", "null"],
|
|
77
|
+
"description": "S3 multipart upload chunk size in MB. If none is specified, the default default_s3_multipart_chunksize_mb is used."
|
|
74
78
|
}
|
|
75
79
|
}
|
|
76
80
|
}
|
package/schemas/input.json
CHANGED
|
@@ -11,7 +11,10 @@
|
|
|
11
11
|
"description": "Array of all granules",
|
|
12
12
|
"items": {
|
|
13
13
|
"type": "object",
|
|
14
|
-
"required": [
|
|
14
|
+
"required": [
|
|
15
|
+
"granuleId",
|
|
16
|
+
"files"
|
|
17
|
+
],
|
|
15
18
|
"properties": {
|
|
16
19
|
"granuleId": {
|
|
17
20
|
"type": "string"
|
|
@@ -19,19 +22,43 @@
|
|
|
19
22
|
"files": {
|
|
20
23
|
"type": "array",
|
|
21
24
|
"items": {
|
|
25
|
+
"additionalProperties": false,
|
|
22
26
|
"type": "object",
|
|
23
|
-
"required": [
|
|
27
|
+
"required": [
|
|
28
|
+
"bucket",
|
|
29
|
+
"key"
|
|
30
|
+
],
|
|
24
31
|
"properties": {
|
|
25
|
-
"
|
|
32
|
+
"bucket": {
|
|
33
|
+
"description": "Bucket where file is archived in S3",
|
|
26
34
|
"type": "string"
|
|
27
35
|
},
|
|
28
|
-
"
|
|
36
|
+
"checksum": {
|
|
37
|
+
"description": "Checksum value for file",
|
|
29
38
|
"type": "string"
|
|
30
39
|
},
|
|
31
|
-
"
|
|
40
|
+
"checksumType": {
|
|
41
|
+
"description": "Type of checksum (e.g. md5, sha256, etc)",
|
|
32
42
|
"type": "string"
|
|
33
43
|
},
|
|
34
|
-
"
|
|
44
|
+
"fileName": {
|
|
45
|
+
"description": "Name of file (e.g. file.txt)",
|
|
46
|
+
"type": "string"
|
|
47
|
+
},
|
|
48
|
+
"key": {
|
|
49
|
+
"description": "S3 Key for archived file",
|
|
50
|
+
"type": "string"
|
|
51
|
+
},
|
|
52
|
+
"size": {
|
|
53
|
+
"description": "Size of file (in bytes)",
|
|
54
|
+
"type": "number"
|
|
55
|
+
},
|
|
56
|
+
"source": {
|
|
57
|
+
"description": "Source URI of the file from origin system (e.g. S3, FTP, HTTP)",
|
|
58
|
+
"type": "string"
|
|
59
|
+
},
|
|
60
|
+
"type": {
|
|
61
|
+
"description": "Type of file (e.g. data, metadata, browse)",
|
|
35
62
|
"type": "string"
|
|
36
63
|
}
|
|
37
64
|
}
|
|
@@ -41,4 +68,4 @@
|
|
|
41
68
|
}
|
|
42
69
|
}
|
|
43
70
|
}
|
|
44
|
-
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "MoveGranulesInput",
|
|
3
|
+
"description": "Describes the input expected by the move-granules task",
|
|
4
|
+
"type": "object",
|
|
5
|
+
"required": [
|
|
6
|
+
"granules"
|
|
7
|
+
],
|
|
8
|
+
"properties": {
|
|
9
|
+
"granules": {
|
|
10
|
+
"type": "array",
|
|
11
|
+
"description": "Array of all granules",
|
|
12
|
+
"items": {
|
|
13
|
+
"type": "object",
|
|
14
|
+
"required": ["granuleId", "files"],
|
|
15
|
+
"properties": {
|
|
16
|
+
"granuleId": {
|
|
17
|
+
"type": "string"
|
|
18
|
+
},
|
|
19
|
+
"files": "{{files}}"
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|