@cumulus/files-to-granules 20.3.0 → 21.0.0-echo10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +46 -13
- package/dist/lambda.zip +0 -0
- package/dist/schemas/config.json +12 -0
- package/index.js +44 -14
- package/package.json +6 -5
- package/schemas/config.json +12 -0
package/dist/lambda.zip
CHANGED
|
Binary file
|
package/dist/schemas/config.json
CHANGED
|
@@ -16,6 +16,9 @@
|
|
|
16
16
|
"granuleId": {
|
|
17
17
|
"type": "string"
|
|
18
18
|
},
|
|
19
|
+
"producerGranuleId": {
|
|
20
|
+
"type": "string"
|
|
21
|
+
},
|
|
19
22
|
"files": {
|
|
20
23
|
"type": "array",
|
|
21
24
|
"items": {
|
|
@@ -39,6 +42,15 @@
|
|
|
39
42
|
"granuleIdExtraction": {
|
|
40
43
|
"type": "string",
|
|
41
44
|
"description": "The regex needed for extracting granuleId from filenames"
|
|
45
|
+
},
|
|
46
|
+
"matchFilesWithProducerGranuleId": {
|
|
47
|
+
"description": "When set to true/'true', use the 'producerGranuleId' instead default behavior of using 'granuleId' when mapping files to granules. Defaults to false",
|
|
48
|
+
"anyOf": [
|
|
49
|
+
{ "type": "null"},
|
|
50
|
+
{ "type": "boolean" },
|
|
51
|
+
{ "type": "string", "enum": ["true", "false"] }
|
|
52
|
+
],
|
|
53
|
+
"default": false
|
|
42
54
|
}
|
|
43
55
|
}
|
|
44
56
|
}
|
package/index.js
CHANGED
|
@@ -8,6 +8,7 @@ const path = require('path');
|
|
|
8
8
|
const { getObjectSize, parseS3Uri } = require('@cumulus/aws-client/S3');
|
|
9
9
|
const { s3 } = require('@cumulus/aws-client/services');
|
|
10
10
|
const cumulusMessageAdapter = require('@cumulus/cumulus-message-adapter-js');
|
|
11
|
+
const { UnmetRequirementsError } = require('@cumulus/errors');
|
|
11
12
|
|
|
12
13
|
const { getGranuleId } = require('./utils');
|
|
13
14
|
|
|
@@ -36,15 +37,28 @@ async function fileObjectFromS3URI(s3URI) {
|
|
|
36
37
|
* Takes the files from input and granules and merges them into an object where
|
|
37
38
|
* each file is associated with its granuleId.
|
|
38
39
|
*
|
|
39
|
-
* @param {
|
|
40
|
-
* @param {Array<
|
|
41
|
-
* @param {
|
|
40
|
+
* @param {Object} params - params object
|
|
41
|
+
* @param {Array<string>} params.inputFiles - list of s3 files to add to the inputgranules
|
|
42
|
+
* @param {Array<Object>} params.inputGranules - an array of the granules
|
|
43
|
+
* @param {string} params.regex - regex needed to extract granuleId from filenames
|
|
44
|
+
* @param {boolean} params.matchFilesWithProducerGranuleId -
|
|
45
|
+
* If true, match files to granules using producerGranuleId. Else, granuleId.
|
|
46
|
+
* @param {Object} params.testMocks - Mocks used for testing.
|
|
42
47
|
* @returns {Object} inputGranules with updated file lists
|
|
43
48
|
*/
|
|
44
|
-
async function mergeInputFilesWithInputGranules(
|
|
49
|
+
async function mergeInputFilesWithInputGranules({
|
|
50
|
+
inputFiles,
|
|
51
|
+
inputGranules,
|
|
52
|
+
regex,
|
|
53
|
+
matchFilesWithProducerGranuleId,
|
|
54
|
+
testMocks,
|
|
55
|
+
}) {
|
|
45
56
|
// create hash list of the granules
|
|
46
57
|
// and a list of files
|
|
47
|
-
const granulesHash =
|
|
58
|
+
const granulesHash = matchFilesWithProducerGranuleId ?
|
|
59
|
+
keyBy(inputGranules, 'producerGranuleId') :
|
|
60
|
+
keyBy(inputGranules, 'granuleId');
|
|
61
|
+
|
|
48
62
|
const filesFromInputGranules = flatten(inputGranules.map((g) => g.files.map((f) => `s3://${f.bucket}/${f.key}`)));
|
|
49
63
|
|
|
50
64
|
// add input files to corresponding granules
|
|
@@ -56,11 +70,20 @@ async function mergeInputFilesWithInputGranules(inputFiles, inputGranules, regex
|
|
|
56
70
|
/* eslint-disable no-await-in-loop */
|
|
57
71
|
for (let i = 0; i < filesToAdd.length; i += 1) {
|
|
58
72
|
const f = filesToAdd[i];
|
|
59
|
-
const
|
|
73
|
+
const fileId = getGranuleId(f, regex);
|
|
60
74
|
try {
|
|
61
|
-
granulesHash[
|
|
75
|
+
granulesHash[fileId].files.push(
|
|
76
|
+
testMocks?.fileObjectFromS3URI ?
|
|
77
|
+
await testMocks.fileObjectFromS3URI(f) :
|
|
78
|
+
await fileObjectFromS3URI(f)
|
|
79
|
+
);
|
|
62
80
|
} catch (error) {
|
|
63
|
-
|
|
81
|
+
if (!granulesHash[fileId]) {
|
|
82
|
+
throw new UnmetRequirementsError(
|
|
83
|
+
`fileId ${fileId} does not match an input granule. Check that 'matchFilesWithProducerGranuleId' is configured as expected.`
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
throw new Error(`Failed adding ${f} to ${fileId}'s files: ${error.name} ${error.message}`);
|
|
64
87
|
}
|
|
65
88
|
}
|
|
66
89
|
/* eslint-enable no-await-in-loop */
|
|
@@ -79,17 +102,24 @@ async function mergeInputFilesWithInputGranules(inputFiles, inputGranules, regex
|
|
|
79
102
|
* from filenames
|
|
80
103
|
* @param {Array<Object>} event.config.inputGranules - an array of granules
|
|
81
104
|
* @param {Array<string>} event.input - an array of s3 uris
|
|
105
|
+
* @param {Object} testMocks - Mocks used for testing.
|
|
82
106
|
*
|
|
83
107
|
* @returns {Object} Granules object
|
|
84
108
|
*/
|
|
85
|
-
function filesToGranules(event) {
|
|
86
|
-
const
|
|
109
|
+
function filesToGranules(event, testMocks) {
|
|
110
|
+
const regex = get(event.config, 'granuleIdExtraction', '(.*)');
|
|
111
|
+
const matchFilesWithProducerGranuleIdConfigValue = get(event.config, 'matchFilesWithProducerGranuleId');
|
|
112
|
+
const matchFilesWithProducerGranuleId = [true, 'true'].includes(matchFilesWithProducerGranuleIdConfigValue);
|
|
87
113
|
const inputGranules = event.config.inputGranules;
|
|
88
|
-
const
|
|
114
|
+
const inputFiles = event.input;
|
|
89
115
|
|
|
90
|
-
return mergeInputFilesWithInputGranules(
|
|
91
|
-
|
|
92
|
-
|
|
116
|
+
return mergeInputFilesWithInputGranules({
|
|
117
|
+
inputFiles,
|
|
118
|
+
inputGranules,
|
|
119
|
+
regex,
|
|
120
|
+
matchFilesWithProducerGranuleId,
|
|
121
|
+
testMocks,
|
|
122
|
+
});
|
|
93
123
|
}
|
|
94
124
|
exports.filesToGranules = filesToGranules;
|
|
95
125
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cumulus/files-to-granules",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "21.0.0-echo10",
|
|
4
4
|
"description": "Converts array-of-files input into a granules object by extracting granuleId from filename",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"directories": {
|
|
@@ -36,13 +36,14 @@
|
|
|
36
36
|
"author": "Cumulus Authors",
|
|
37
37
|
"license": "Apache-2.0",
|
|
38
38
|
"dependencies": {
|
|
39
|
-
"@cumulus/aws-client": "
|
|
39
|
+
"@cumulus/aws-client": "21.0.0-echo10",
|
|
40
40
|
"@cumulus/cumulus-message-adapter-js": "2.3.0",
|
|
41
|
+
"@cumulus/errors": "21.0.0-echo10",
|
|
41
42
|
"lodash": "^4.17.21"
|
|
42
43
|
},
|
|
43
44
|
"devDependencies": {
|
|
44
|
-
"@cumulus/common": "
|
|
45
|
-
"@cumulus/schemas": "
|
|
45
|
+
"@cumulus/common": "21.0.0-echo10",
|
|
46
|
+
"@cumulus/schemas": "21.0.0-echo10"
|
|
46
47
|
},
|
|
47
|
-
"gitHead": "
|
|
48
|
+
"gitHead": "b0560aa0ef1d3b7401eedc4a4ba90f57f16bcda3"
|
|
48
49
|
}
|
package/schemas/config.json
CHANGED
|
@@ -16,6 +16,9 @@
|
|
|
16
16
|
"granuleId": {
|
|
17
17
|
"type": "string"
|
|
18
18
|
},
|
|
19
|
+
"producerGranuleId": {
|
|
20
|
+
"type": "string"
|
|
21
|
+
},
|
|
19
22
|
"files": {
|
|
20
23
|
"type": "array",
|
|
21
24
|
"items": {
|
|
@@ -39,6 +42,15 @@
|
|
|
39
42
|
"granuleIdExtraction": {
|
|
40
43
|
"type": "string",
|
|
41
44
|
"description": "The regex needed for extracting granuleId from filenames"
|
|
45
|
+
},
|
|
46
|
+
"matchFilesWithProducerGranuleId": {
|
|
47
|
+
"description": "When set to true/'true', use the 'producerGranuleId' instead default behavior of using 'granuleId' when mapping files to granules. Defaults to false",
|
|
48
|
+
"anyOf": [
|
|
49
|
+
{ "type": "null"},
|
|
50
|
+
{ "type": "boolean" },
|
|
51
|
+
{ "type": "string", "enum": ["true", "false"] }
|
|
52
|
+
],
|
|
53
|
+
"default": false
|
|
42
54
|
}
|
|
43
55
|
}
|
|
44
56
|
}
|