@cumulus/ingest 18.2.2 → 18.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HttpProviderClient.d.ts +2 -1
- package/HttpProviderClient.d.ts.map +1 -1
- package/HttpProviderClient.js +25 -22
- package/HttpProviderClient.js.map +1 -1
- package/package.json +17 -17
- package/src/HttpProviderClient.js +29 -24
- package/tsconfig.json +1 -1
- package/tsconfig.tsbuildinfo +1 -1
package/HttpProviderClient.d.ts
CHANGED
|
@@ -21,9 +21,10 @@ declare class HttpProviderClient {
|
|
|
21
21
|
* List all PDR files from a given endpoint
|
|
22
22
|
*
|
|
23
23
|
* @param {string} path - the remote path to list
|
|
24
|
+
* @param {testMocks} - Mocks for testing
|
|
24
25
|
* @returns {Promise<Array>} a list of files
|
|
25
26
|
*/
|
|
26
|
-
list(path: string): Promise<any[]>;
|
|
27
|
+
list(path: string, testMocks?: {}): Promise<any[]>;
|
|
27
28
|
/**
|
|
28
29
|
* Download a remote file to disk
|
|
29
30
|
*
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HttpProviderClient.d.ts","sourceRoot":"","sources":["src/HttpProviderClient.js"],"names":[],"mappings":";
|
|
1
|
+
{"version":3,"file":"HttpProviderClient.d.ts","sourceRoot":"","sources":["src/HttpProviderClient.js"],"names":[],"mappings":";AAiCA;IACE,iCAsBC;IArBC,oBAAoC;IACpC,cAAuC;IACvC,UAA+B;IAC/B,UAA+B;IAC/B,qBAAqD;IACrD,eAAoB;IACpB,oBAAmD;IAInD,eAAyC;IAEzC,qBAA0E;IAC1E,sBAAkE;IAGlE,iBAIE;IAGJ,iCA2CC;IAzCG,cAA2D;IAC3D,cAA2D;IA0C/D,wCAUC;IANG,gCAAqE;IAQzE;;;;;;OAMG;IACH,WAJW,MAAM,mBAEJ,cAAc,CAoF1B;IAED;;;;;;;OAOG;IACH;QAJ0B,UAAU,EAAzB,MAAM;QACS,SAAS,EAAxB,MAAM;QACJ,QAAS,MAAM,CAAC,CAgC5B;IAED;;;;;;;;;OASG;IACH;QAN0B,cAAc,EAA7B,MAAM;QACS,iBAAiB,EAAhC,MAAM;QACS,cAAc,EAA7B,MAAM;;eACc,MAAM;cAAQ,MAAM;OA2ClD;IAED;;;;;;;OAOG;IACH;QAJ0B,SAAS,EAAxB,MAAM;QACS,UAAU,EAAzB,MAAM;QACJ,QAAQ,MAAM,CAAC,CA0B3B;IAGD,yBAAkB;IAElB,qBAAc;CAEf"}
|
package/HttpProviderClient.js
CHANGED
|
@@ -3,12 +3,11 @@ const fs = require('fs');
|
|
|
3
3
|
const https = require('https');
|
|
4
4
|
const isIp = require('is-ip');
|
|
5
5
|
const { basename } = require('path');
|
|
6
|
-
const { pipeline } = require('stream');
|
|
6
|
+
const { pipeline } = require('stream/promises');
|
|
7
|
+
const { PassThrough } = require('stream');
|
|
7
8
|
const Crawler = require('simplecrawler');
|
|
8
|
-
const got = require('got');
|
|
9
9
|
const { CookieJar } = require('tough-cookie');
|
|
10
|
-
const {
|
|
11
|
-
const stream = require('node:stream');
|
|
10
|
+
const { importGot } = require('@cumulus/common/importEsm');
|
|
12
11
|
const { buildS3Uri, getTextObject, parseS3Uri, streamS3Upload, } = require('@cumulus/aws-client/S3');
|
|
13
12
|
const log = require('@cumulus/common/log');
|
|
14
13
|
const isValidHostname = require('is-valid-hostname');
|
|
@@ -103,35 +102,36 @@ class HttpProviderClient {
|
|
|
103
102
|
* List all PDR files from a given endpoint
|
|
104
103
|
*
|
|
105
104
|
* @param {string} path - the remote path to list
|
|
105
|
+
* @param {testMocks} - Mocks for testing
|
|
106
106
|
* @returns {Promise<Array>} a list of files
|
|
107
107
|
*/
|
|
108
|
-
async list(path) {
|
|
108
|
+
async list(path, testMocks = {}) {
|
|
109
109
|
validateHost(this.host);
|
|
110
110
|
await this.downloadTLSCertificate();
|
|
111
111
|
// Make pattern case-insensitive and return all matches
|
|
112
112
|
// instead of just first one
|
|
113
113
|
const matchLinksPattern = /<a href="([^>]*)">[^<]+<\/a>/gi;
|
|
114
114
|
const matchLeadingSlashesPattern = /^\/+/;
|
|
115
|
-
const
|
|
115
|
+
const listCrawler = testMocks.crawler ? testMocks.crawler : new Crawler(buildURL({
|
|
116
116
|
protocol: this.protocol,
|
|
117
117
|
host: this.host,
|
|
118
118
|
port: this.port,
|
|
119
119
|
path,
|
|
120
120
|
}));
|
|
121
121
|
if (this.protocol === 'https' && this.certificate !== undefined) {
|
|
122
|
-
|
|
122
|
+
listCrawler.httpsAgent = new https.Agent({ ca: this.certificate });
|
|
123
123
|
}
|
|
124
124
|
if (this.httpListTimeout) {
|
|
125
|
-
|
|
125
|
+
listCrawler.timeout = this.httpListTimeout * 1000;
|
|
126
126
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
127
|
+
listCrawler.interval = 0;
|
|
128
|
+
listCrawler.maxConcurrency = 10;
|
|
129
|
+
listCrawler.respectRobotsTxt = false;
|
|
130
|
+
listCrawler.userAgent = 'Cumulus';
|
|
131
|
+
listCrawler.maxDepth = 1;
|
|
132
132
|
const files = [];
|
|
133
133
|
return new Promise((resolve, reject) => {
|
|
134
|
-
|
|
134
|
+
listCrawler.on('fetchcomplete', (_, responseBuffer) => {
|
|
135
135
|
const lines = responseBuffer.toString().trim().split('\n');
|
|
136
136
|
lines.forEach((line) => {
|
|
137
137
|
const trimmedLine = line.trim();
|
|
@@ -149,8 +149,8 @@ class HttpProviderClient {
|
|
|
149
149
|
});
|
|
150
150
|
return resolve(files);
|
|
151
151
|
});
|
|
152
|
-
|
|
153
|
-
|
|
152
|
+
listCrawler.on('fetchtimeout', () => reject(new errors.RemoteResourceError('Connection timed out')));
|
|
153
|
+
listCrawler.on('fetcherror', (queueItem, response) => {
|
|
154
154
|
let responseBody = '';
|
|
155
155
|
response.on('data', (chunk) => {
|
|
156
156
|
responseBody += chunk;
|
|
@@ -161,13 +161,13 @@ class HttpProviderClient {
|
|
|
161
161
|
return reject(err);
|
|
162
162
|
});
|
|
163
163
|
});
|
|
164
|
-
|
|
165
|
-
|
|
164
|
+
listCrawler.on('fetchclienterror', (_, errorData) => reject(new errors.RemoteResourceError(`Connection Error: ${JSON.stringify(errorData)}`)));
|
|
165
|
+
listCrawler.on('fetch404', (queueItem, _) => {
|
|
166
166
|
const errorToThrow = new Error(`Received a 404 error from ${this.endpoint}. Check your endpoint!`);
|
|
167
167
|
errorToThrow.details = queueItem;
|
|
168
168
|
return reject(errorToThrow);
|
|
169
169
|
});
|
|
170
|
-
|
|
170
|
+
listCrawler.start();
|
|
171
171
|
});
|
|
172
172
|
}
|
|
173
173
|
/**
|
|
@@ -179,6 +179,7 @@ class HttpProviderClient {
|
|
|
179
179
|
* @returns {Promise.<string>} - the path that the file was saved to
|
|
180
180
|
*/
|
|
181
181
|
async download(params) {
|
|
182
|
+
const got = await importGot();
|
|
182
183
|
const { remotePath, localPath } = params;
|
|
183
184
|
validateHost(this.host);
|
|
184
185
|
await this.setUpGotOptions();
|
|
@@ -191,7 +192,7 @@ class HttpProviderClient {
|
|
|
191
192
|
});
|
|
192
193
|
log.info(`Downloading ${remoteUrl} to ${localPath}`);
|
|
193
194
|
try {
|
|
194
|
-
await
|
|
195
|
+
await pipeline(got.stream(remoteUrl, this.gotOptions), fs.createWriteStream(localPath));
|
|
195
196
|
}
|
|
196
197
|
catch (error) {
|
|
197
198
|
if (error.message && error.message.includes('Unexpected HTTP status code: 403')) {
|
|
@@ -215,6 +216,7 @@ class HttpProviderClient {
|
|
|
215
216
|
* the S3 URI and ETag of the destination file
|
|
216
217
|
*/
|
|
217
218
|
async sync(params) {
|
|
219
|
+
const got = await importGot();
|
|
218
220
|
const { destinationBucket, destinationKey, fileRemotePath } = params;
|
|
219
221
|
validateHost(this.host);
|
|
220
222
|
await this.setUpGotOptions();
|
|
@@ -256,8 +258,9 @@ class HttpProviderClient {
|
|
|
256
258
|
* @returns {Promise<string>} the uri of the uploaded file
|
|
257
259
|
*/
|
|
258
260
|
async upload(params) {
|
|
261
|
+
const got = await importGot();
|
|
259
262
|
const { localPath, uploadPath } = params;
|
|
260
|
-
log.info(
|
|
263
|
+
log.info({ localPath, uploadPath });
|
|
261
264
|
await this.setUpGotOptions();
|
|
262
265
|
await this.downloadTLSCertificate();
|
|
263
266
|
const options = {
|
|
@@ -269,7 +272,7 @@ class HttpProviderClient {
|
|
|
269
272
|
};
|
|
270
273
|
const remoteUrl = buildURL(options);
|
|
271
274
|
got.stream.options = options;
|
|
272
|
-
await
|
|
275
|
+
await pipeline(fs.createReadStream(localPath), got.stream.post(remoteUrl), new PassThrough());
|
|
273
276
|
log.info(`Finishing uploading ${localPath} to ${remoteUrl}`);
|
|
274
277
|
return remoteUrl;
|
|
275
278
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HttpProviderClient.js","sourceRoot":"","sources":["src/HttpProviderClient.js"],"names":[],"mappings":"AAAA,YAAY,CAAC;AAEb,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AACzB,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;AAC/B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;AAC9B,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AACrC,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,
|
|
1
|
+
{"version":3,"file":"HttpProviderClient.js","sourceRoot":"","sources":["src/HttpProviderClient.js"],"names":[],"mappings":"AAAA,YAAY,CAAC;AAEb,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;AACzB,MAAM,KAAK,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;AAC/B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC;AAC9B,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;AACrC,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;AAChD,MAAM,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;AAC1C,MAAM,OAAO,GAAG,OAAO,CAAC,eAAe,CAAC,CAAC;AACzC,MAAM,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC,cAAc,CAAC,CAAC;AAC9C,MAAM,EAAE,SAAS,EAAE,GAAG,OAAO,CAAC,2BAA2B,CAAC,CAAC;AAE3D,MAAM,EACJ,UAAU,EACV,aAAa,EACb,UAAU,EACV,cAAc,GACf,GAAG,OAAO,CAAC,wBAAwB,CAAC,CAAC;AACtC,MAAM,GAAG,GAAG,OAAO,CAAC,qBAAqB,CAAC,CAAC;AAC3C,MAAM,eAAe,GAAG,OAAO,CAAC,mBAAmB,CAAC,CAAC;AACrD,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC,0BAA0B,CAAC,CAAC;AACzD,MAAM,MAAM,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAC;AAE1C,MAAM,EAAE,cAAc,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;AAEtD,MAAM,YAAY,GAAG,CAAC,IAAI,EAAE,EAAE;IAC5B,IAAI,eAAe,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO;IAEhD,MAAM,IAAI,SAAS,CAAC,gDAAgD,IAAI,EAAE,CAAC,CAAC;AAC9E,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AAEnE,MAAM,kBAAkB;IACtB,YAAY,cAAc;QACxB,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC;QACrC,IAAI,CAAC,QAAQ,GAAG,cAAc,CAAC,QAAQ,CAAC;QACxC,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC;QAChC,IAAI,CAAC,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC;QAChC,IAAI,CAAC,eAAe,GAAG,cAAc,CAAC,eAAe,CAAC;QACtD,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,IAAI,CAAC,cAAc,GAAG,cAAc,CAAC,cAAc,CAAC;QACpD,IAAI,cAAc,CAAC,QAAQ,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE;YACvD,MAAM,IAAI,cAAc,CAAC,2EAA2E,CAAC,CAAC;SACvG;QACD,IAAI,CAAC,SAAS,GAAG,cAAc,CAAC,SAAS,CAAC;QAE1C,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;QAC3E,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC,cAAc,CAAC,gBAAgB,IAAI,EAAE,CAAC;QACnE,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC;QAEjD,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC;YACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;SAChB,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,eAAe;QACnB,IAAI,IAAI,CAAC,SAAS,KAAK,IAAI,EAAE;YAC3B,IAAI,CAAC,QAAQ,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;YAC5D,IAAI,CAAC,QAAQ,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;SAC7D;aAAM;YACL,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC;YAC7C,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC;SAC9C;QAED,IAAI,CAAC,UAAU,CAAC,SAAS,GAAG,IAAI,SAAS,EAAE,CAAC;QAE5C,IAAI,IAAI,CAAC,QAAQ;YAAE,IAAI,CAAC,UAAU,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC5D,IAAI,IAAI,CAAC,QAAQ;YAAE,IAAI,CAAC,UAAU,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;QAE5D,MAAM,eAAe,GAAG;YACtB,+CAA+C;YAC/C,gDAAgD;YAChD,YAAY;YACZ,oBAAoB,CAAC,OAAO,EAAE,QAAQ;gBACpC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE;oBACrD,GAAG,CAAC,KAAK,CAAC;cACN,OAAO,CAAC,GAAG,CAAC,IAAI;iBACb,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,gBAAgB,CAAC;;;WAG3C,CAAC,CAAC;oBACH,OAAO;iBACR;gBAED,IAAI,aAAa,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,EAAE;oBAC1C,sCAAsC;oBACtC,OAAO,CAAC,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;oBACrC,OAAO,CAAC,GAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC;oBACrC,qCAAqC;iBACtC;YACH,CAAC;SACF,CAAC;QACF,MAAM,yBAAyB,GAAG,eAAe,CAAC,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAClF,IAAI,CAAC,UAAU,CAAC,KAAK,GAAG;YACtB,cAAc,EAAE;gBACd,yBAAyB;aAC1B;SACF,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,sBAAsB;QAC1B,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS;YAAE,OAAO;QACnE,IAAI;YACF,MAAM,QAAQ,GAAG,UAAU,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YACjD,IAAI,CAAC,WAAW,GAAG,MAAM,aAAa,CAAC,QAAQ,CAAC,MAAM,EAAE,QAAQ,CAAC,GAAG,CAAC,CAAC;YACtE,IAAI,CAAC,UAAU,CAAC,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,KAAK,IAAI,EAAE,CAAC;YACpD,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,oBAAoB,GAAG,IAAI,CAAC,WAAW,CAAC;SAC/D;QAAC,OAAO,KAAK,EAAE;YACd,MAAM,IAAI,MAAM,CAAC,mBAAmB,CAAC,mCAAmC,KAAK,EAAE,CAAC,CAAC;SAClF;IACH,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,GAAG,EAAE;QAC7B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,MAAM,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAEpC,uDAAuD;QACvD,4BAA4B;QAC5B,MAAM,iBAAiB,GAAG,gCAAgC,CAAC;QAC3D,MAAM,0BAA0B,GAAG,MAAM,CAAC;QAE1C,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,OAAO,CACrE,QAAQ,CAAC;YACP,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI;SACL,CAAC,CACH,CAAC;QAEF,IAAI,IAAI,CAAC,QAAQ,KAAK,OAAO,IAAI,IAAI,CAAC,WAAW,KAAK,SAAS,EAAE;YAC/D,WAAW,CAAC,UAAU,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,EAAE,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC;SACpE;QACD,IAAI,IAAI,CAAC,eAAe,EAAE;YACxB,WAAW,CAAC,OAAO,GAAG,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;SACnD;QACD,WAAW,CAAC,QAAQ,GAAG,CAAC,CAAC;QACzB,WAAW,CAAC,cAAc,GAAG,EAAE,CAAC;QAChC,WAAW,CAAC,gBAAgB,GAAG,KAAK,CAAC;QACrC,WAAW,CAAC,SAAS,GAAG,SAAS,CAAC;QAClC,WAAW,CAAC,QAAQ,GAAG,CAAC,CAAC;QACzB,MAAM,KAAK,GAAG,EAAE,CAAC;QAEjB,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YACrC,WAAW,CAAC,EAAE,CAAC,eAAe,EAAE,CAAC,CAAC,EAAE,cAAc,EAAE,EAAE;gBACpD,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAC3D,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE;oBACrB,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;oBAChC,IAAI,KAAK,GAAG,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;oBAEhD,OAAO,KAAK,KAAK,IAAI,EAAE;wBACrB,MAAM,UAAU,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;wBAC5B,yDAAyD;wBACzD,MAAM,IAAI,GAAG,UAAU;6BACpB,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;6BACjB,OAAO,CAAC,0BAA0B,EAAE,EAAE,CAAC;6BACvC,SAAS,EAAE,CAAC;wBACf,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;wBAC3B,KAAK,GAAG,iBAAiB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;qBAC7C;gBACH,CAAC,CAAC,CAAC;gBAEH,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC;YACxB,CAAC,CAAC,CAAC;YAEH,WAAW,CAAC,EAAE,CAAC,cAAc,EAAE,GAAG,EAAE,CAClC,MAAM,CAAC,IAAI,MAAM,CAAC,mBAAmB,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC;YAElE,WAAW,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,SAAS,EAAE,QAAQ,EAAE,EAAE;gBACnD,IAAI,YAAY,GAAG,EAAE,CAAC;gBACtB,QAAQ,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE;oBAC5B,YAAY,IAAI,KAAK,CAAC;gBACxB,CAAC,CAAC,CAAC;gBAEH,QAAQ,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;oBACtB,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,mBAAmB,CACxC,IAAI,QAAQ,CAAC,GAAG,CAAC,MAAM,IAAI,SAAS,CAAC,GAAG,6BAA6B,QAAQ,CAAC,UAAU,EAAE,CAC3F,CAAC;oBACF,GAAG,CAAC,OAAO,GAAG,YAAY,CAAC;oBAC3B,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC;gBACrB,CAAC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;YAEH,WAAW,CAAC,EAAE,CAAC,kBAAkB,EAAE,CAAC,CAAC,EAAE,SAAS,EAAE,EAAE,CAClD,MAAM,CAAC,IAAI,MAAM,CAAC,mBAAmB,CAAC,qBAAqB,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YAE5F,WAAW,CAAC,EAAE,CAAC,UAAU,EAAE,CAAC,SAAS,EAAE,CAAC,EAAE,EAAE;gBAC1C,MAAM,YAAY,GAAG,IAAI,KAAK,CAAC,6BAA6B,IAAI,CAAC,QAAQ,wBAAwB,CAAC,CAAC;gBACnG,YAAY,CAAC,OAAO,GAAG,SAAS,CAAC;gBACjC,OAAO,MAAM,CAAC,YAAY,CAAC,CAAC;YAC9B,CAAC,CAAC,CAAC;YAEH,WAAW,CAAC,KAAK,EAAE,CAAC;QACtB,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,QAAQ,CAAC,MAAM;QACnB,MAAM,GAAG,GAAG,MAAM,SAAS,EAAE,CAAC;QAE9B,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,GAAG,MAAM,CAAC;QACzC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,MAAM,IAAI,CAAC,eAAe,EAAE,CAAC;QAC7B,MAAM,IAAI,CAAC,sBAAsB,EAAE,CAAC;QAEpC,MAAM,SAAS,GAAG,QAAQ,CAAC;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,UAAU;SACjB,CAAC,CAAC;QAEH,GAAG,CAAC,IAAI,CAAC,eAAe,SAAS,OAAO,SAAS,EAAE,CAAC,CAAC;QACrD,IAAI;YACF,MAAM,QAAQ,CACZ,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,EACtC,EAAE,CAAC,iBAAiB,CAAC,SAAS,CAAC,CAChC,CAAC;SACH;QAAC,OAAO,KAAK,EAAE;YACd,IAAI,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,QAAQ,CAAC,kCAAkC,CAAC,EAAE;gBAC/E,MAAM,OAAO,GAAG,GAAG,QAAQ,CAAC,UAAU,CAAC,8CAA8C,CAAC;gBACtF,MAAM,IAAI,MAAM,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;aACxC;;gBAAM,MAAM,KAAK,CAAC;SACpB;QACD,GAAG,CAAC,IAAI,CAAC,yBAAyB,SAAS,EAAE,CAAC,CAAC;QAE/C,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,IAAI,CAAC,MAAM;QACf,MAAM,GAAG,GAAG,MAAM,SAAS,EAAE,CAAC;QAE9B,MAAM,EAAE,iBAAiB,EAAE,cAAc,EAAE,cAAc,EAAE,GAAG,MAAM,CAAC;QACrE,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACxB,MAAM,IAAI,CAAC,eAAe,EAAE,CAAC;QAC7B,MAAM,IAAI,CAAC,sBAAsB,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,QAAQ,CAAC;YACzB,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,cAAc;SACrB,CAAC,CAAC;QAEH,MAAM,KAAK,GAAG,UAAU,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QAC5D,GAAG,CAAC,IAAI,CAAC,QAAQ,SAAS,OAAO,KAAK,EAAE,CAAC,CAAC;QAE1C,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI;YACF,MAAM,YAAY,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;YAChE,OAAO,GAAG,YAAY,CAAC,OAAO,CAAC;SAChC;QAAC,OAAO,KAAK,EAAE;YACd,GAAG,CAAC,IAAI,CAAC,mBAAmB,SAAS,gBAAgB,KAAK,GAAG,CAAC,CAAC;SAChE;QACD,MAAM,WAAW,GAAG,OAAO,CAAC,cAAc,CAAC,IAAI,cAAc,CAAC,cAAc,CAAC,CAAC;QAE9E,MAAM,YAAY,GAAG,GAAG,CAAC,MAAM,CAAC,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAC5D,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,MAAM,cAAc,CACzC,YAAY,EACZ;YACE,MAAM,EAAE;gBACN,MAAM,EAAE,iBAAiB;gBACzB,GAAG,EAAE,cAAc;gBACnB,WAAW,EAAE,WAAW;aACzB;SACF,CACF,CAAC;QAEF,GAAG,CAAC,IAAI,CAAC,oCAAoC,EAAE,KAAK,CAAC,CAAC;QACtD,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;IACzB,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,MAAM,CAAC,MAAM;QACjB,MAAM,GAAG,GAAG,MAAM,SAAS,EAAE,CAAC;QAC9B,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,GAAG,MAAM,CAAC;QACzC,GAAG,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,UAAU,EAAE,CAAC,CAAC;QACpC,MAAM,IAAI,CAAC,eAAe,EAAE,CAAC;QAC7B,MAAM,IAAI,CAAC,sBAAsB,EAAE,CAAC;QACpC,MAAM,OAAO,GAAG;YACd,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,UAAU;YAChB,MAAM,EAAE,MAAM;SACf,CAAC;QAEF,MAAM,SAAS,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;QACpC,GAAG,CAAC,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAC7B,MAAM,QAAQ,CACZ,EAAE,CAAC,gBAAgB,CAAC,SAAS,CAAC,EAC9B,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,EAC1B,IAAI,WAAW,EAAE,CAClB,CAAC;QAEF,GAAG,CAAC,IAAI,CAAC,uBAAuB,SAAS,OAAO,SAAS,EAAE,CAAC,CAAC;QAC7D,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,sCAAsC;IACtC,KAAK,CAAC,OAAO,KAAI,CAAC;IAElB,KAAK,CAAC,GAAG,KAAI,CAAC;CAEf;AAED,MAAM,CAAC,OAAO,GAAG,kBAAkB,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cumulus/ingest",
|
|
3
|
-
"version": "18.
|
|
3
|
+
"version": "18.3.1",
|
|
4
4
|
"description": "Ingest utilities",
|
|
5
5
|
"engines": {
|
|
6
|
-
"node": ">=
|
|
6
|
+
"node": ">=20.12.2"
|
|
7
7
|
},
|
|
8
8
|
"scripts": {
|
|
9
9
|
"build": "rm -rf dist && mkdir dist && npm run prepare",
|
|
10
10
|
"clean": "git clean -d -x -e node_modules -f",
|
|
11
11
|
"test": "../../node_modules/.bin/ava",
|
|
12
|
+
"test:ci": "../../scripts/run_package_ci_unit.sh",
|
|
12
13
|
"test:coverage": "../../node_modules/.bin/nyc npm test",
|
|
13
14
|
"prepare": "npm run tsc",
|
|
14
15
|
"tsc": "../../node_modules/.bin/tsc",
|
|
@@ -23,7 +24,8 @@
|
|
|
23
24
|
"files": [
|
|
24
25
|
"!test/fixtures/**/*"
|
|
25
26
|
],
|
|
26
|
-
"timeout": "15m"
|
|
27
|
+
"timeout": "15m",
|
|
28
|
+
"failFast": true
|
|
27
29
|
},
|
|
28
30
|
"keywords": [
|
|
29
31
|
"GIBS",
|
|
@@ -39,18 +41,16 @@
|
|
|
39
41
|
"author": "Cumulus Authors",
|
|
40
42
|
"license": "Apache-2.0",
|
|
41
43
|
"dependencies": {
|
|
42
|
-
"@cumulus/aws-client": "18.
|
|
43
|
-
"@cumulus/common": "18.
|
|
44
|
-
"@cumulus/db": "18.
|
|
45
|
-
"@cumulus/errors": "18.
|
|
46
|
-
"@cumulus/logger": "18.
|
|
47
|
-
"@cumulus/message": "18.
|
|
48
|
-
"@cumulus/sftp-client": "18.
|
|
49
|
-
"aws-sdk": "^2.1492.0",
|
|
44
|
+
"@cumulus/aws-client": "18.3.1",
|
|
45
|
+
"@cumulus/common": "18.3.1",
|
|
46
|
+
"@cumulus/db": "18.3.1",
|
|
47
|
+
"@cumulus/errors": "18.3.1",
|
|
48
|
+
"@cumulus/logger": "18.3.1",
|
|
49
|
+
"@cumulus/message": "18.3.1",
|
|
50
|
+
"@cumulus/sftp-client": "18.3.1",
|
|
50
51
|
"cksum": "^1.3.0",
|
|
51
52
|
"encodeurl": "^1.0.2",
|
|
52
53
|
"fs-extra": "^5.0.0",
|
|
53
|
-
"got": "^11.8.5",
|
|
54
54
|
"is-ip": "^2.0.0",
|
|
55
55
|
"is-valid-hostname": "^0.1.1",
|
|
56
56
|
"jsftp": "https://github.com/jkovarik/jsftp.git#add_288",
|
|
@@ -61,10 +61,10 @@
|
|
|
61
61
|
"tough-cookie": "~4.0.0"
|
|
62
62
|
},
|
|
63
63
|
"devDependencies": {
|
|
64
|
-
"@cumulus/checksum": "18.
|
|
65
|
-
"@cumulus/cmrjs": "18.
|
|
66
|
-
"@cumulus/test-data": "18.
|
|
67
|
-
"@cumulus/types": "18.
|
|
64
|
+
"@cumulus/checksum": "18.3.1",
|
|
65
|
+
"@cumulus/cmrjs": "18.3.1",
|
|
66
|
+
"@cumulus/test-data": "18.3.1",
|
|
67
|
+
"@cumulus/types": "18.3.1"
|
|
68
68
|
},
|
|
69
|
-
"gitHead": "
|
|
69
|
+
"gitHead": "0393f90c6401ef0c524068e4636c1dcc389020b8"
|
|
70
70
|
}
|
|
@@ -4,12 +4,11 @@ const fs = require('fs');
|
|
|
4
4
|
const https = require('https');
|
|
5
5
|
const isIp = require('is-ip');
|
|
6
6
|
const { basename } = require('path');
|
|
7
|
-
const { pipeline } = require('stream');
|
|
7
|
+
const { pipeline } = require('stream/promises');
|
|
8
|
+
const { PassThrough } = require('stream');
|
|
8
9
|
const Crawler = require('simplecrawler');
|
|
9
|
-
const got = require('got');
|
|
10
10
|
const { CookieJar } = require('tough-cookie');
|
|
11
|
-
const {
|
|
12
|
-
const stream = require('node:stream');
|
|
11
|
+
const { importGot } = require('@cumulus/common/importEsm');
|
|
13
12
|
|
|
14
13
|
const {
|
|
15
14
|
buildS3Uri,
|
|
@@ -118,9 +117,10 @@ class HttpProviderClient {
|
|
|
118
117
|
* List all PDR files from a given endpoint
|
|
119
118
|
*
|
|
120
119
|
* @param {string} path - the remote path to list
|
|
120
|
+
* @param {testMocks} - Mocks for testing
|
|
121
121
|
* @returns {Promise<Array>} a list of files
|
|
122
122
|
*/
|
|
123
|
-
async list(path) {
|
|
123
|
+
async list(path, testMocks = {}) {
|
|
124
124
|
validateHost(this.host);
|
|
125
125
|
await this.downloadTLSCertificate();
|
|
126
126
|
|
|
@@ -129,7 +129,7 @@ class HttpProviderClient {
|
|
|
129
129
|
const matchLinksPattern = /<a href="([^>]*)">[^<]+<\/a>/gi;
|
|
130
130
|
const matchLeadingSlashesPattern = /^\/+/;
|
|
131
131
|
|
|
132
|
-
const
|
|
132
|
+
const listCrawler = testMocks.crawler ? testMocks.crawler : new Crawler(
|
|
133
133
|
buildURL({
|
|
134
134
|
protocol: this.protocol,
|
|
135
135
|
host: this.host,
|
|
@@ -139,20 +139,20 @@ class HttpProviderClient {
|
|
|
139
139
|
);
|
|
140
140
|
|
|
141
141
|
if (this.protocol === 'https' && this.certificate !== undefined) {
|
|
142
|
-
|
|
142
|
+
listCrawler.httpsAgent = new https.Agent({ ca: this.certificate });
|
|
143
143
|
}
|
|
144
144
|
if (this.httpListTimeout) {
|
|
145
|
-
|
|
145
|
+
listCrawler.timeout = this.httpListTimeout * 1000;
|
|
146
146
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
147
|
+
listCrawler.interval = 0;
|
|
148
|
+
listCrawler.maxConcurrency = 10;
|
|
149
|
+
listCrawler.respectRobotsTxt = false;
|
|
150
|
+
listCrawler.userAgent = 'Cumulus';
|
|
151
|
+
listCrawler.maxDepth = 1;
|
|
152
152
|
const files = [];
|
|
153
153
|
|
|
154
154
|
return new Promise((resolve, reject) => {
|
|
155
|
-
|
|
155
|
+
listCrawler.on('fetchcomplete', (_, responseBuffer) => {
|
|
156
156
|
const lines = responseBuffer.toString().trim().split('\n');
|
|
157
157
|
lines.forEach((line) => {
|
|
158
158
|
const trimmedLine = line.trim();
|
|
@@ -173,10 +173,10 @@ class HttpProviderClient {
|
|
|
173
173
|
return resolve(files);
|
|
174
174
|
});
|
|
175
175
|
|
|
176
|
-
|
|
176
|
+
listCrawler.on('fetchtimeout', () =>
|
|
177
177
|
reject(new errors.RemoteResourceError('Connection timed out')));
|
|
178
178
|
|
|
179
|
-
|
|
179
|
+
listCrawler.on('fetcherror', (queueItem, response) => {
|
|
180
180
|
let responseBody = '';
|
|
181
181
|
response.on('data', (chunk) => {
|
|
182
182
|
responseBody += chunk;
|
|
@@ -191,16 +191,16 @@ class HttpProviderClient {
|
|
|
191
191
|
});
|
|
192
192
|
});
|
|
193
193
|
|
|
194
|
-
|
|
194
|
+
listCrawler.on('fetchclienterror', (_, errorData) =>
|
|
195
195
|
reject(new errors.RemoteResourceError(`Connection Error: ${JSON.stringify(errorData)}`)));
|
|
196
196
|
|
|
197
|
-
|
|
197
|
+
listCrawler.on('fetch404', (queueItem, _) => {
|
|
198
198
|
const errorToThrow = new Error(`Received a 404 error from ${this.endpoint}. Check your endpoint!`);
|
|
199
199
|
errorToThrow.details = queueItem;
|
|
200
200
|
return reject(errorToThrow);
|
|
201
201
|
});
|
|
202
202
|
|
|
203
|
-
|
|
203
|
+
listCrawler.start();
|
|
204
204
|
});
|
|
205
205
|
}
|
|
206
206
|
|
|
@@ -213,6 +213,8 @@ class HttpProviderClient {
|
|
|
213
213
|
* @returns {Promise.<string>} - the path that the file was saved to
|
|
214
214
|
*/
|
|
215
215
|
async download(params) {
|
|
216
|
+
const got = await importGot();
|
|
217
|
+
|
|
216
218
|
const { remotePath, localPath } = params;
|
|
217
219
|
validateHost(this.host);
|
|
218
220
|
await this.setUpGotOptions();
|
|
@@ -227,7 +229,7 @@ class HttpProviderClient {
|
|
|
227
229
|
|
|
228
230
|
log.info(`Downloading ${remoteUrl} to ${localPath}`);
|
|
229
231
|
try {
|
|
230
|
-
await
|
|
232
|
+
await pipeline(
|
|
231
233
|
got.stream(remoteUrl, this.gotOptions),
|
|
232
234
|
fs.createWriteStream(localPath)
|
|
233
235
|
);
|
|
@@ -253,6 +255,8 @@ class HttpProviderClient {
|
|
|
253
255
|
* the S3 URI and ETag of the destination file
|
|
254
256
|
*/
|
|
255
257
|
async sync(params) {
|
|
258
|
+
const got = await importGot();
|
|
259
|
+
|
|
256
260
|
const { destinationBucket, destinationKey, fileRemotePath } = params;
|
|
257
261
|
validateHost(this.host);
|
|
258
262
|
await this.setUpGotOptions();
|
|
@@ -301,8 +305,9 @@ class HttpProviderClient {
|
|
|
301
305
|
* @returns {Promise<string>} the uri of the uploaded file
|
|
302
306
|
*/
|
|
303
307
|
async upload(params) {
|
|
308
|
+
const got = await importGot();
|
|
304
309
|
const { localPath, uploadPath } = params;
|
|
305
|
-
log.info(
|
|
310
|
+
log.info({ localPath, uploadPath });
|
|
306
311
|
await this.setUpGotOptions();
|
|
307
312
|
await this.downloadTLSCertificate();
|
|
308
313
|
const options = {
|
|
@@ -315,10 +320,10 @@ class HttpProviderClient {
|
|
|
315
320
|
|
|
316
321
|
const remoteUrl = buildURL(options);
|
|
317
322
|
got.stream.options = options;
|
|
318
|
-
await
|
|
323
|
+
await pipeline(
|
|
319
324
|
fs.createReadStream(localPath),
|
|
320
|
-
|
|
321
|
-
new
|
|
325
|
+
got.stream.post(remoteUrl),
|
|
326
|
+
new PassThrough()
|
|
322
327
|
);
|
|
323
328
|
|
|
324
329
|
log.info(`Finishing uploading ${localPath} to ${remoteUrl}`);
|
package/tsconfig.json
CHANGED