@cumulus/move-granules 20.2.1 → 21.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/index.js +1949 -153
- package/dist/lambda.zip +0 -0
- package/dist/schemas/config.json +2 -1
- package/index.js +302 -63
- package/package.json +11 -10
- package/schemas/config.json +2 -1
- package/types.js +82 -0
package/dist/lambda.zip
CHANGED
|
Binary file
|
package/dist/schemas/config.json
CHANGED
package/index.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
//@ts-check
|
|
2
|
+
|
|
1
3
|
'use strict';
|
|
2
4
|
|
|
3
5
|
const cumulusMessageAdapter = require('@cumulus/cumulus-message-adapter-js');
|
|
@@ -9,7 +11,9 @@ const path = require('path');
|
|
|
9
11
|
|
|
10
12
|
const S3 = require('@cumulus/aws-client/S3');
|
|
11
13
|
|
|
12
|
-
const { InvalidArgument } = require('@cumulus/errors');
|
|
14
|
+
const { InvalidArgument, ValidationError } = require('@cumulus/errors');
|
|
15
|
+
|
|
16
|
+
const { getRequiredEnvVar } = require('@cumulus/common/env');
|
|
13
17
|
|
|
14
18
|
const {
|
|
15
19
|
handleDuplicateFile,
|
|
@@ -24,15 +28,50 @@ const {
|
|
|
24
28
|
granulesToCmrFileObjects,
|
|
25
29
|
} = require('@cumulus/cmrjs');
|
|
26
30
|
|
|
31
|
+
const { getFileGranuleAndCollectionByBucketAndKey } = require('@cumulus/api-client/granules');
|
|
32
|
+
|
|
27
33
|
const BucketsConfig = require('@cumulus/common/BucketsConfig');
|
|
28
34
|
|
|
29
35
|
const { urlPathTemplate } = require('@cumulus/ingest/url-path-template');
|
|
30
36
|
const { isFileExtensionMatched } = require('@cumulus/message/utils');
|
|
37
|
+
const { constructCollectionId } = require('@cumulus/message/Collections');
|
|
31
38
|
const log = require('@cumulus/common/log');
|
|
32
39
|
|
|
40
|
+
// Import type definitions
|
|
41
|
+
/**
|
|
42
|
+
* @typedef {import('./types').BucketsConfigType} BucketsConfigType
|
|
43
|
+
* @typedef {import('./types').ApiGranule} ApiGranule
|
|
44
|
+
* @typedef {import('./types').ApiCollection} ApiCollection
|
|
45
|
+
* @typedef {import('./types').DuplicateHandling} DuplicateHandling
|
|
46
|
+
* @typedef {import('./types').MoveGranulesFile} MoveGranulesFile
|
|
47
|
+
* @typedef {import('./types').MoveGranulesFileWithSourceKey} MoveGranulesFileWithSourceKey
|
|
48
|
+
* @typedef {import('./types').MoveGranulesGranule} MoveGranulesGranule
|
|
49
|
+
* @typedef {import('./types').MoveGranulesGranuleOptionalFilesFields}
|
|
50
|
+
* MoveGranulesGranuleOptionalFilesFields
|
|
51
|
+
* @typedef {import('./types').GranulesObject} GranulesObject
|
|
52
|
+
* @typedef {import('./types').GranulesOutputObject} GranulesOutputObject
|
|
53
|
+
* @typedef {import('./types').CollectionFile} CollectionFile
|
|
54
|
+
* @typedef {import('./types').Collection} Collection
|
|
55
|
+
* @typedef {import('./types').S3Object} S3Object
|
|
56
|
+
* @typedef {import('@cumulus/cmrjs/cmr-utils').CmrFile} CmrFile
|
|
57
|
+
* @typedef {import('./types').GranuleFileInfo} GranuleFileInfo
|
|
58
|
+
*/
|
|
59
|
+
|
|
33
60
|
const MB = 1024 * 1024;
|
|
34
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Builds a granule duplicates object from moved granules
|
|
64
|
+
*
|
|
65
|
+
* This function identifies files that were detected as duplicates during moving
|
|
66
|
+
* and builds an object mapping granule IDs to lists of duplicate files.
|
|
67
|
+
*
|
|
68
|
+
* @param {GranulesOutputObject} movedGranulesByGranuleId - Object mapping granule IDs
|
|
69
|
+
* to granule objects
|
|
70
|
+
* @returns {Object.<string, {files: MoveGranulesFile[]}>} Object
|
|
71
|
+
* containing duplicate file information
|
|
72
|
+
*/
|
|
35
73
|
function buildGranuleDuplicatesObject(movedGranulesByGranuleId) {
|
|
74
|
+
/** @type {Object.<string, {files: MoveGranulesFile[]}>} */
|
|
36
75
|
const duplicatesObject = {};
|
|
37
76
|
Object.keys(movedGranulesByGranuleId).forEach((k) => {
|
|
38
77
|
duplicatesObject[k] = {
|
|
@@ -53,11 +92,14 @@ function buildGranuleDuplicatesObject(movedGranulesByGranuleId) {
|
|
|
53
92
|
* Validates the file matched only one collection.file and has a valid bucket
|
|
54
93
|
* config.
|
|
55
94
|
*
|
|
56
|
-
*
|
|
57
|
-
*
|
|
58
|
-
*
|
|
59
|
-
* @param {
|
|
60
|
-
* @
|
|
95
|
+
* This function checks that a file name matches exactly one collection file pattern
|
|
96
|
+
* and that the specified bucket exists in the configuration.
|
|
97
|
+
*
|
|
98
|
+
* @param {CollectionFile[]} match - list of matched collection.file
|
|
99
|
+
* @param {BucketsConfigType} bucketsConfig - instance describing stack configuration
|
|
100
|
+
* @param {string} fileName - the file name tested
|
|
101
|
+
* @param {CollectionFile[]} fileSpecs - array of collection file specifications objects
|
|
102
|
+
* @throws {InvalidArgument} - If match is invalid, throws an error
|
|
61
103
|
*/
|
|
62
104
|
function validateMatch(match, bucketsConfig, fileName, fileSpecs) {
|
|
63
105
|
const collectionRegexes = fileSpecs.map((spec) => spec.regex);
|
|
@@ -74,19 +116,19 @@ function validateMatch(match, bucketsConfig, fileName, fileSpecs) {
|
|
|
74
116
|
}
|
|
75
117
|
|
|
76
118
|
/**
|
|
77
|
-
*
|
|
78
|
-
*
|
|
79
|
-
* `collection.files.regexp`. CMR metadata files have a file type added.
|
|
119
|
+
* This function determines the final destinations for granule files and updates
|
|
120
|
+
* their metadata accordingly, applying URL templates and setting appropriate buckets.
|
|
80
121
|
*
|
|
81
|
-
* @param {
|
|
82
|
-
* @param {
|
|
83
|
-
*
|
|
84
|
-
* @param {
|
|
85
|
-
* @param {
|
|
86
|
-
* @returns {
|
|
87
|
-
* the correct target buckets/paths/and s3uri filenames
|
|
122
|
+
* @param {GranulesObject} granulesObject - an object of granules where the key is the granuleId
|
|
123
|
+
* @param {Collection} collection - configuration object defining a collection of
|
|
124
|
+
* granules and their files
|
|
125
|
+
* @param {CmrFile[]} cmrFiles - array of objects that include CMR xmls uris and granuleIds
|
|
126
|
+
* @param {BucketsConfigType} bucketsConfig - instance associated with the stack
|
|
127
|
+
* @returns {Promise<GranulesObject>} new granulesObject where each granules' files are updated with
|
|
128
|
+
* the correct target buckets/paths/and s3uri filenames
|
|
88
129
|
*/
|
|
89
130
|
async function updateGranuleMetadata(granulesObject, collection, cmrFiles, bucketsConfig) {
|
|
131
|
+
/** @type {GranulesObject} */
|
|
90
132
|
const updatedGranules = {};
|
|
91
133
|
const cmrFileNames = cmrFiles.map((f) => path.basename(f.key));
|
|
92
134
|
const fileSpecs = collection.files;
|
|
@@ -130,25 +172,92 @@ async function updateGranuleMetadata(granulesObject, collection, cmrFiles, bucke
|
|
|
130
172
|
return updatedGranules;
|
|
131
173
|
}
|
|
132
174
|
|
|
175
|
+
/**
|
|
176
|
+
* Checks for cross-collection collisions for a given file.
|
|
177
|
+
*
|
|
178
|
+
* This function retrieves the granule and collection information associated
|
|
179
|
+
* with a file identified by its S3 `bucket` and
|
|
180
|
+
* `key`. If the file is already associated with a
|
|
181
|
+
* collection and that collection ID is different from the provided
|
|
182
|
+
* `granuleCollectionId it throws an
|
|
183
|
+
* `InvalidArgument` error, indicating a cross-collection collision.
|
|
184
|
+
*
|
|
185
|
+
* @param {object} params - The parameters for the collision check
|
|
186
|
+
* @param {string} params.bucket - The S3 bucket name where the file is located
|
|
187
|
+
* @param {string} params.key - The S3 key (path) of the file
|
|
188
|
+
* @param {string} params.granuleCollectionId - The ID of the collection that the granule belongs to
|
|
189
|
+
* @param {Function} [params.getFileGranuleAndCollectionByBucketAndKeyMethod] - Direct
|
|
190
|
+
* injection test mock for database method to get file granule and collection
|
|
191
|
+
* @returns {Promise<void>} A Promise that resolves if no collision is detected
|
|
192
|
+
* @throws {ValidationError|InvalidArgument} -- throws if validation fails
|
|
193
|
+
* or a collision is detected
|
|
194
|
+
*/
|
|
195
|
+
async function _checkCrossCollectionCollisions({
|
|
196
|
+
bucket,
|
|
197
|
+
key,
|
|
198
|
+
granuleCollectionId,
|
|
199
|
+
getFileGranuleAndCollectionByBucketAndKeyMethod = getFileGranuleAndCollectionByBucketAndKey,
|
|
200
|
+
}) {
|
|
201
|
+
if (!granuleCollectionId) {
|
|
202
|
+
// If we can't determine the collection, we can't make the comparison
|
|
203
|
+
throw new ValidationError(
|
|
204
|
+
`File ${key} in bucket ${bucket} has an unknown collection Cannot determine if it is a cross-collection collision.`
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
const apiResponse = await getFileGranuleAndCollectionByBucketAndKeyMethod({ bucket, key, prefix: getRequiredEnvVar('stackName') });
|
|
208
|
+
const { granuleId, collectionId } = JSON.parse(apiResponse.body);
|
|
209
|
+
|
|
210
|
+
const collectionsDiffer =
|
|
211
|
+
collectionId && granuleCollectionId && collectionId !== granuleCollectionId;
|
|
212
|
+
|
|
213
|
+
if (collectionsDiffer) {
|
|
214
|
+
// If the file is in a different collection, or we can't make the comparison,
|
|
215
|
+
// we need to handle it as a cross-collection collision
|
|
216
|
+
log.error('Cross granule collection detected');
|
|
217
|
+
log.error(`File ${key} in bucket ${bucket} is associated with granuleId ${granuleId}, collection ${collectionId}`);
|
|
218
|
+
throw new InvalidArgument(
|
|
219
|
+
`File already exists in bucket ${bucket} with key ${key} ` +
|
|
220
|
+
`for collection ${collectionId} and granuleId: ${granuleId}, ` +
|
|
221
|
+
`but is being moved for collection ${granuleCollectionId}.`
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
log.debug(`File ${key} in bucket ${bucket} is not associated with a granule in a different collection. ${JSON.stringify(apiResponse)}`);
|
|
225
|
+
}
|
|
226
|
+
|
|
133
227
|
/**
|
|
134
228
|
* Move file from source bucket to target location, and return the file moved.
|
|
135
229
|
* In case of 'version' duplicateHandling, also return the renamed files.
|
|
136
230
|
*
|
|
137
|
-
*
|
|
138
|
-
*
|
|
139
|
-
*
|
|
140
|
-
* @param {
|
|
141
|
-
*
|
|
142
|
-
* @param {
|
|
143
|
-
* @
|
|
231
|
+
* This function moves a single granule file from its source location to its target location,
|
|
232
|
+
* handling duplicate files according to the specified duplicate handling strategy.
|
|
233
|
+
*
|
|
234
|
+
* @param {object} params - Move file parameters
|
|
235
|
+
* @param {MoveGranulesFileWithSourceKey} params.file - granule file to be moved
|
|
236
|
+
* @param {string} params.sourceBucket - source bucket location of files
|
|
237
|
+
* @param {DuplicateHandling} params.duplicateHandling - how to handle duplicate files
|
|
238
|
+
* @param {string} params.granuleCollectionId - Collection ID of the granule
|
|
239
|
+
* @param {boolean} [params.markDuplicates=true] - Override to handle cmr
|
|
240
|
+
* metadata files that shouldn't be marked as duplicates
|
|
241
|
+
* @param {number} [params.s3MultipartChunksizeMb] - S3 multipart upload chunk
|
|
242
|
+
* size in MB
|
|
243
|
+
* @param {boolean} [params.checkCrossCollectionCollisions=true] - Whether to
|
|
244
|
+
* check for cross-collection collisions
|
|
245
|
+
* @param {object} [params.testOverrides={}] - Test overrides
|
|
246
|
+
* @param {function} [params.testOverrides.getFileGranuleAndCollectionByBucketAndKeyMethod] -
|
|
247
|
+
* Method to get file details
|
|
248
|
+
* @returns {Promise<MoveGranulesFile[]>} returns the file moved and the renamed
|
|
249
|
+
* existing duplicates if any
|
|
144
250
|
*/
|
|
145
|
-
async function moveFileRequest(
|
|
251
|
+
async function moveFileRequest({
|
|
146
252
|
file,
|
|
147
253
|
sourceBucket,
|
|
148
254
|
duplicateHandling,
|
|
149
255
|
markDuplicates = true,
|
|
150
|
-
s3MultipartChunksizeMb
|
|
151
|
-
|
|
256
|
+
s3MultipartChunksizeMb,
|
|
257
|
+
checkCrossCollectionCollisions = true,
|
|
258
|
+
granuleCollectionId,
|
|
259
|
+
testOverrides = {},
|
|
260
|
+
}) {
|
|
152
261
|
const source = {
|
|
153
262
|
Bucket: sourceBucket,
|
|
154
263
|
Key: file.sourceKey,
|
|
@@ -161,7 +270,10 @@ async function moveFileRequest(
|
|
|
161
270
|
// Due to S3's eventual consistency model, we need to make sure that the
|
|
162
271
|
// source object is available in S3.
|
|
163
272
|
await S3.waitForObjectToExist({ bucket: source.Bucket, key: source.Key });
|
|
273
|
+
|
|
164
274
|
// the file moved to destination
|
|
275
|
+
|
|
276
|
+
/** @type {MoveGranulesFile} */
|
|
165
277
|
const fileMoved = { ...file };
|
|
166
278
|
delete fileMoved.sourceKey;
|
|
167
279
|
|
|
@@ -170,8 +282,20 @@ async function moveFileRequest(
|
|
|
170
282
|
|
|
171
283
|
let versionedFiles = [];
|
|
172
284
|
if (s3ObjAlreadyExists) {
|
|
285
|
+
// If there is a collision, per IART-924 we need to check if it's a cross
|
|
286
|
+
// collection collision and fail in all cases if it is
|
|
287
|
+
if (checkCrossCollectionCollisions) {
|
|
288
|
+
await _checkCrossCollectionCollisions({
|
|
289
|
+
bucket: target.Bucket,
|
|
290
|
+
key: target.Key,
|
|
291
|
+
granuleCollectionId,
|
|
292
|
+
getFileGranuleAndCollectionByBucketAndKeyMethod:
|
|
293
|
+
testOverrides.getFileGranuleAndCollectionByBucketAndKeyMethod
|
|
294
|
+
|| getFileGranuleAndCollectionByBucketAndKey,
|
|
295
|
+
});
|
|
296
|
+
}
|
|
173
297
|
if (markDuplicates) fileMoved.duplicate_found = true;
|
|
174
|
-
|
|
298
|
+
|
|
175
299
|
versionedFiles = await handleDuplicateFile({
|
|
176
300
|
source,
|
|
177
301
|
target,
|
|
@@ -199,35 +323,130 @@ async function moveFileRequest(
|
|
|
199
323
|
return [fileMoved, ...renamedFiles];
|
|
200
324
|
}
|
|
201
325
|
|
|
326
|
+
/**
|
|
327
|
+
* Determines the collection ID for a granule based on granule metadata or config
|
|
328
|
+
*
|
|
329
|
+
* This function tries to construct a collection ID from either the granule's metadata
|
|
330
|
+
* or from the collection configuration.
|
|
331
|
+
*
|
|
332
|
+
* @param {MoveGranulesGranule} granule - The granule object
|
|
333
|
+
* @param {Collection} configCollection - The collection configuration
|
|
334
|
+
* @returns {string|undefined} The collection ID if available
|
|
335
|
+
*/
|
|
336
|
+
function determineGranuleCollectionId(granule, configCollection) {
|
|
337
|
+
if (granule.dataType && granule.version) {
|
|
338
|
+
return constructCollectionId(granule.dataType, granule.version);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (configCollection.name && configCollection.version) {
|
|
342
|
+
return constructCollectionId(configCollection.name, configCollection.version);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
return undefined;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Process and move a list of files with given parameters
|
|
350
|
+
*
|
|
351
|
+
* This function processes a list of files and moves them to their target locations,
|
|
352
|
+
* handling CMR files differently from regular files.
|
|
353
|
+
*
|
|
354
|
+
* @param {MoveGranulesFileWithSourceKey[]} files - List of files to move
|
|
355
|
+
* @param {object} moveParams - Common parameters for moving files
|
|
356
|
+
* @param {string} moveParams.sourceBucket - Source bucket location
|
|
357
|
+
* @param {DuplicateHandling} [moveParams.duplicateHandling] - How to handle duplicates
|
|
358
|
+
* @param {number} [moveParams.s3MultipartChunksizeMb] - Chunk size for multipart uploads
|
|
359
|
+
* @param {boolean} [moveParams.checkCrossCollectionCollisions] - Whether to check
|
|
360
|
+
* cross-collection collisions
|
|
361
|
+
* @param {string} moveParams.granuleCollectionId - Collection ID
|
|
362
|
+
* @param {object} [moveParams.testOverrides] - Test overrides
|
|
363
|
+
* @param {boolean} [isCmrFile=false] - Whether these are CMR files
|
|
364
|
+
* @returns {Promise<MoveGranulesFile[][]>} Moved files results
|
|
365
|
+
* @throws {Error} If duplicateHandling is not provided for non-CMR files
|
|
366
|
+
*/
|
|
367
|
+
function processAndMoveFiles(files, moveParams, isCmrFile = false) {
|
|
368
|
+
/** @type {DuplicateHandling} */
|
|
369
|
+
let duplicateHandling;
|
|
370
|
+
if (isCmrFile) {
|
|
371
|
+
duplicateHandling = 'replace';
|
|
372
|
+
} else {
|
|
373
|
+
if (!moveParams.duplicateHandling) {
|
|
374
|
+
throw new Error('duplicateHandling is required when processing non-CMR files');
|
|
375
|
+
}
|
|
376
|
+
duplicateHandling = moveParams.duplicateHandling;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
if (!isCmrFile && !moveParams.duplicateHandling) {
|
|
380
|
+
throw new Error('duplicateHandling is required when processing non-CMR files');
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return Promise.all(
|
|
384
|
+
files.map((file) =>
|
|
385
|
+
moveFileRequest({
|
|
386
|
+
...moveParams,
|
|
387
|
+
file,
|
|
388
|
+
duplicateHandling,
|
|
389
|
+
markDuplicates: !isCmrFile,
|
|
390
|
+
}))
|
|
391
|
+
);
|
|
392
|
+
}
|
|
393
|
+
|
|
202
394
|
/**
|
|
203
395
|
* Move all files in a collection of granules from staging location to final location,
|
|
204
396
|
* and update granule files to include renamed files if any.
|
|
205
397
|
*
|
|
206
|
-
*
|
|
207
|
-
*
|
|
208
|
-
*
|
|
209
|
-
* @param {
|
|
210
|
-
* @
|
|
398
|
+
* This function processes all the granules and moves their files to the target locations,
|
|
399
|
+
* handling CMR files and regular files appropriately and updating granule metadata.
|
|
400
|
+
*
|
|
401
|
+
* @param {object} params - Move parameters
|
|
402
|
+
* @param {Collection} params.configCollection - Collection configuration
|
|
403
|
+
* @param {GranulesObject} params.granulesObject - an object of granules where key is granuleId
|
|
404
|
+
* @param {string} params.sourceBucket - source bucket location of files
|
|
405
|
+
* @param {DuplicateHandling} params.duplicateHandling - how to handle duplicate files
|
|
406
|
+
* @param {number} [params.s3MultipartChunksizeMb] - S3 multipart upload chunk size in MB
|
|
407
|
+
* @param {boolean} [params.checkCrossCollectionCollisions=true] - Whether to check
|
|
408
|
+
* for cross-collection collisions
|
|
409
|
+
* @param {object} [params.testOverrides={}] - Test overrides
|
|
410
|
+
* @returns {Promise<GranulesObject>} the object with updated granules
|
|
211
411
|
*/
|
|
212
|
-
async function moveFilesForAllGranules(
|
|
412
|
+
async function moveFilesForAllGranules({
|
|
413
|
+
configCollection,
|
|
213
414
|
granulesObject,
|
|
214
415
|
sourceBucket,
|
|
215
416
|
duplicateHandling,
|
|
216
|
-
s3MultipartChunksizeMb
|
|
217
|
-
|
|
417
|
+
s3MultipartChunksizeMb,
|
|
418
|
+
checkCrossCollectionCollisions = true,
|
|
419
|
+
testOverrides = {},
|
|
420
|
+
}) {
|
|
218
421
|
const moveFileRequests = Object.keys(granulesObject).map(async (granuleKey) => {
|
|
219
|
-
const
|
|
220
|
-
const
|
|
221
|
-
|
|
222
|
-
const
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
);
|
|
226
|
-
const cmrFilesMoved = await Promise.all(
|
|
227
|
-
cmrFiles.map(
|
|
228
|
-
(file) => moveFileRequest(file, sourceBucket, 'replace', false)
|
|
229
|
-
)
|
|
422
|
+
const filesToMove = granulesObject[granuleKey].files.filter((file) => !isCMRFile(file));
|
|
423
|
+
const cmrFiles = granulesObject[granuleKey].files.filter((file) => isCMRFile(file));
|
|
424
|
+
|
|
425
|
+
const granuleCollectionId = determineGranuleCollectionId(
|
|
426
|
+
granulesObject[granuleKey],
|
|
427
|
+
configCollection
|
|
230
428
|
);
|
|
429
|
+
if (!granuleCollectionId) {
|
|
430
|
+
throw new ValidationError(`Unable to determine collection ID for granule ${granuleKey}`);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/** @type {MoveGranulesGranuleOptionalFilesFields} */
|
|
434
|
+
const granule = granulesObject[granuleKey];
|
|
435
|
+
const commonMoveParams = {
|
|
436
|
+
sourceBucket,
|
|
437
|
+
checkCrossCollectionCollisions,
|
|
438
|
+
granuleCollectionId,
|
|
439
|
+
testOverrides,
|
|
440
|
+
};
|
|
441
|
+
|
|
442
|
+
const filesMoved = await processAndMoveFiles(filesToMove, {
|
|
443
|
+
...commonMoveParams,
|
|
444
|
+
duplicateHandling,
|
|
445
|
+
s3MultipartChunksizeMb,
|
|
446
|
+
});
|
|
447
|
+
|
|
448
|
+
const cmrFilesMoved = await processAndMoveFiles(cmrFiles, commonMoveParams, true);
|
|
449
|
+
|
|
231
450
|
granule.files = flatten(filesMoved).concat(flatten(cmrFilesMoved));
|
|
232
451
|
});
|
|
233
452
|
|
|
@@ -237,20 +456,27 @@ async function moveFilesForAllGranules(
|
|
|
237
456
|
|
|
238
457
|
/**
|
|
239
458
|
* Move Granule files to final location.
|
|
240
|
-
* See the schemas directory for detailed input and output schemas.
|
|
241
459
|
*
|
|
242
|
-
*
|
|
243
|
-
*
|
|
460
|
+
* This function is the main entry point for the moveGranules task. It takes granules
|
|
461
|
+
* from the input, updates their metadata based on collection configuration, and
|
|
462
|
+
* moves their files to the target locations.
|
|
463
|
+
*
|
|
464
|
+
* @param {object} event - Lambda function payload
|
|
465
|
+
* @param {object} event.config - the config object
|
|
244
466
|
* @param {string} event.config.bucket - AWS S3 bucket that contains the granule files
|
|
245
|
-
* @param {
|
|
467
|
+
* @param {object} event.config.buckets - Buckets config
|
|
246
468
|
* @param {string} event.config.distribution_endpoint - distribution endpoint for the api
|
|
247
|
-
* @param {
|
|
248
|
-
* https://nasa.github.io/cumulus/docs/data-cookbooks/setup#collections
|
|
469
|
+
* @param {Collection} event.config.collection - collection configuration
|
|
249
470
|
* @param {boolean} [event.config.moveStagedFiles=true] - set to false to skip moving files
|
|
250
|
-
*
|
|
251
|
-
* @param {
|
|
252
|
-
*
|
|
253
|
-
* @
|
|
471
|
+
* @param {number} [event.config.s3MultipartChunksizeMb] - S3 multipart upload chunk size in MB
|
|
472
|
+
* @param {boolean} [event.config.checkCrossCollectionCollisions=true] - Whether to check for
|
|
473
|
+
* cross-collection collisions
|
|
474
|
+
* @param {object} event.input - a granules object containing an array of granules
|
|
475
|
+
* @param {MoveGranulesGranule[]} event.input.granules - Array of granule objects
|
|
476
|
+
* @param {object} [event.testOverrides] - Test overrides
|
|
477
|
+
* @returns {Promise<{granuleDuplicates: Record<string, {files: MoveGranulesFile[]}>,
|
|
478
|
+
* granules: MoveGranulesGranuleOptionalFilesFields[]}>}
|
|
479
|
+
* Returns updated event object with moved granules and duplicate information
|
|
254
480
|
*/
|
|
255
481
|
async function moveGranules(event) {
|
|
256
482
|
// We have to post the meta-xml file of all output granules
|
|
@@ -258,6 +484,8 @@ async function moveGranules(event) {
|
|
|
258
484
|
const bucketsConfig = new BucketsConfig(config.buckets);
|
|
259
485
|
|
|
260
486
|
const moveStagedFiles = get(config, 'moveStagedFiles', true);
|
|
487
|
+
const checkCrossCollectionCollisions = get(config, 'checkCrossCollectionCollisions', true);
|
|
488
|
+
|
|
261
489
|
const s3MultipartChunksizeMb = config.s3MultipartChunksizeMb
|
|
262
490
|
? config.s3MultipartChunksizeMb : process.env.default_s3_multipart_chunksize_mb;
|
|
263
491
|
|
|
@@ -280,6 +508,7 @@ async function moveGranules(event) {
|
|
|
280
508
|
const cmrFiles = granulesToCmrFileObjects(granulesInput, filterFunc);
|
|
281
509
|
const granulesByGranuleId = keyBy(granulesInput, 'granuleId');
|
|
282
510
|
|
|
511
|
+
/** @type {GranulesOutputObject} */
|
|
283
512
|
let movedGranulesByGranuleId;
|
|
284
513
|
|
|
285
514
|
// allows us to disable moving the files
|
|
@@ -291,9 +520,15 @@ async function moveGranules(event) {
|
|
|
291
520
|
);
|
|
292
521
|
|
|
293
522
|
// Move files from staging location to final location
|
|
294
|
-
movedGranulesByGranuleId = await moveFilesForAllGranules(
|
|
295
|
-
|
|
296
|
-
|
|
523
|
+
movedGranulesByGranuleId = await moveFilesForAllGranules({
|
|
524
|
+
configCollection: config.collection,
|
|
525
|
+
granulesObject: granulesToMove,
|
|
526
|
+
sourceBucket: config.bucket,
|
|
527
|
+
duplicateHandling,
|
|
528
|
+
s3MultipartChunksizeMb: Number(s3MultipartChunksizeMb),
|
|
529
|
+
checkCrossCollectionCollisions,
|
|
530
|
+
testOverrides: get(event, 'testOverrides', {}),
|
|
531
|
+
});
|
|
297
532
|
} else {
|
|
298
533
|
movedGranulesByGranuleId = granulesByGranuleId;
|
|
299
534
|
}
|
|
@@ -309,10 +544,12 @@ async function moveGranules(event) {
|
|
|
309
544
|
/**
|
|
310
545
|
* Lambda handler
|
|
311
546
|
*
|
|
312
|
-
*
|
|
313
|
-
*
|
|
314
|
-
*
|
|
315
|
-
*
|
|
547
|
+
* This is the Lambda handler function that uses the Cumulus Message Adapter
|
|
548
|
+
* to run the moveGranules task.
|
|
549
|
+
*
|
|
550
|
+
* @param {object} event - a Cumulus Message
|
|
551
|
+
* @param {object} context - an AWS Lambda context
|
|
552
|
+
* @returns {Promise<object>} - Returns output from task.
|
|
316
553
|
*/
|
|
317
554
|
async function handler(event, context) {
|
|
318
555
|
return await cumulusMessageAdapter.runCumulusTask(moveGranules, event, context);
|
|
@@ -320,3 +557,5 @@ async function handler(event, context) {
|
|
|
320
557
|
|
|
321
558
|
exports.handler = handler;
|
|
322
559
|
exports.moveGranules = moveGranules;
|
|
560
|
+
exports._checkCrossCollectionCollisions = _checkCrossCollectionCollisions;
|
|
561
|
+
exports.determineGranuleCollectionId = determineGranuleCollectionId;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cumulus/move-granules",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "21.0.0",
|
|
4
4
|
"description": "Move granule files from staging to final location",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"directories": {
|
|
@@ -41,18 +41,19 @@
|
|
|
41
41
|
"author": "Cumulus Authors",
|
|
42
42
|
"license": "Apache-2.0",
|
|
43
43
|
"dependencies": {
|
|
44
|
-
"@cumulus/
|
|
45
|
-
"@cumulus/
|
|
46
|
-
"@cumulus/
|
|
44
|
+
"@cumulus/api-client": "21.0.0",
|
|
45
|
+
"@cumulus/aws-client": "21.0.0",
|
|
46
|
+
"@cumulus/cmrjs": "21.0.0",
|
|
47
|
+
"@cumulus/common": "21.0.0",
|
|
47
48
|
"@cumulus/cumulus-message-adapter-js": "2.3.0",
|
|
48
|
-
"@cumulus/distribution-utils": "
|
|
49
|
-
"@cumulus/errors": "
|
|
50
|
-
"@cumulus/ingest": "
|
|
51
|
-
"@cumulus/message": "
|
|
49
|
+
"@cumulus/distribution-utils": "21.0.0",
|
|
50
|
+
"@cumulus/errors": "21.0.0",
|
|
51
|
+
"@cumulus/ingest": "21.0.0",
|
|
52
|
+
"@cumulus/message": "21.0.0",
|
|
52
53
|
"lodash": "^4.17.21"
|
|
53
54
|
},
|
|
54
55
|
"devDependencies": {
|
|
55
|
-
"@cumulus/schemas": "
|
|
56
|
+
"@cumulus/schemas": "21.0.0"
|
|
56
57
|
},
|
|
57
|
-
"gitHead": "
|
|
58
|
+
"gitHead": "19bb3477969662a9e0b300f10f6df23b6c0654db"
|
|
58
59
|
}
|
package/schemas/config.json
CHANGED
package/types.js
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
//@ts-check
|
|
2
|
+
|
|
3
|
+
// Imported for JSDoc typedef
|
|
4
|
+
// eslint-disable-next-line no-unused-vars
|
|
5
|
+
const BucketsConfig = require('@cumulus/common/BucketsConfig');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* @typedef {InstanceType<typeof BucketsConfig>} BucketsConfigType
|
|
9
|
+
* @typedef {import('@cumulus/types/api/granules').ApiGranule} ApiGranule
|
|
10
|
+
* @typedef {import('@cumulus/types/api/collections').PartialCollectionRecord} ApiCollection
|
|
11
|
+
* @typedef {import('@cumulus/types').DuplicateHandling} DuplicateHandling
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* @typedef {object} MoveGranulesFile
|
|
16
|
+
* @property {string} bucket - S3 bucket name
|
|
17
|
+
* @property {string} key - S3 key
|
|
18
|
+
* @property {string} [sourceKey] - Original source key before move
|
|
19
|
+
* @property {string} [fileName] - File name
|
|
20
|
+
* @property {number} [size] - File size
|
|
21
|
+
* @property {string} [type] - File type
|
|
22
|
+
* @property {boolean} [duplicate_found] - Whether a duplicate was found
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* @typedef {MoveGranulesFile & {sourceKey: string}} MoveGranulesFileWithSourceKey
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* @typedef {object} MoveGranulesGranule
|
|
31
|
+
* @property {string} granuleId - Granule ID
|
|
32
|
+
* @property {string} [producerGranuleId] - Producer granule ID
|
|
33
|
+
* @property {string} [dataType] - Data type
|
|
34
|
+
* @property {string} [version] - Version
|
|
35
|
+
* @property {Array<MoveGranulesFileWithSourceKey>} files - Granule files
|
|
36
|
+
*/
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* @typedef {object} MoveGranulesGranuleOptionalFilesFields
|
|
40
|
+
* @property {string} granuleId - Granule ID
|
|
41
|
+
* @property {string} [producerGranuleId] - Producer granule ID
|
|
42
|
+
* @property {string} [dataType] - Data type
|
|
43
|
+
* @property {string} [version] - Version
|
|
44
|
+
* @property {Array<MoveGranulesFile>} files - Granule files
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* @typedef {Object.<string, MoveGranulesGranule>} GranulesObject
|
|
49
|
+
* @typedef {Object.<string, MoveGranulesGranuleOptionalFilesFields>} GranulesOutputObject
|
|
50
|
+
*/
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* @typedef {object} CollectionFile
|
|
54
|
+
* @property {string} regex - Regular expression to match file
|
|
55
|
+
* @property {string} bucket - Bucket to store file
|
|
56
|
+
* @property {string} [url_path] - URL path template
|
|
57
|
+
*/
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* @typedef {object} Collection
|
|
61
|
+
* @property {string} [name] - Collection name
|
|
62
|
+
* @property {string} [version] - Collection version
|
|
63
|
+
* @property {string} [url_path] - Default URL path template
|
|
64
|
+
* @property {DuplicateHandling} [duplicateHandling] - Duplicate handling option
|
|
65
|
+
* @property {Array<CollectionFile>} files - File specifications
|
|
66
|
+
*/
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* @typedef {object} S3Object
|
|
70
|
+
* @property {string} Bucket - S3 bucket name
|
|
71
|
+
* @property {string} Key - S3 object key
|
|
72
|
+
* @property {number} [size] - object size
|
|
73
|
+
*/
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* @typedef {object} GranuleFileInfo
|
|
77
|
+
* @property {string} granuleId - The ID of the granule found for the file
|
|
78
|
+
* @property {string | null | undefined} [collectionId] - The ID of the
|
|
79
|
+
* collection associated with the file
|
|
80
|
+
*/
|
|
81
|
+
|
|
82
|
+
module.exports = {};
|