@adobe/spacecat-shared-scrape-client 2.3.7 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-scrape-client-v2.5.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.4.0...@adobe/spacecat-shared-scrape-client-v2.5.0) (2026-02-08)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* Bot press hold detection addition + add abort info to scrapeJob schema ([#1308](https://github.com/adobe/spacecat-shared/issues/1308)) ([4f19f91](https://github.com/adobe/spacecat-shared/commit/4f19f9143435aa283d8b9e57c17dd79873168177))
|
|
7
|
+
|
|
8
|
+
# [@adobe/spacecat-shared-scrape-client-v2.4.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.3.7...@adobe/spacecat-shared-scrape-client-v2.4.0) (2026-02-04)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* scrape client slack message ([#1314](https://github.com/adobe/spacecat-shared/issues/1314)) ([bcf2e83](https://github.com/adobe/spacecat-shared/commit/bcf2e8369743c97a3cd1f4d512a69f0f5abedd6b))
|
|
14
|
+
|
|
1
15
|
# [@adobe/spacecat-shared-scrape-client-v2.3.7](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.3.6...@adobe/spacecat-shared-scrape-client-v2.3.7) (2026-01-29)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/spacecat-shared-scrape-client",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.5.0",
|
|
4
4
|
"description": "Shared modules of the Spacecat Services - Scrape Client",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@adobe/helix-universal": "5.4.0",
|
|
39
|
-
"@adobe/spacecat-shared-data-access": "2.
|
|
39
|
+
"@adobe/spacecat-shared-data-access": "2.101.0",
|
|
40
40
|
"@adobe/spacecat-shared-utils": "1.81.1"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
@@ -187,7 +187,7 @@ export default class ScrapeClient {
|
|
|
187
187
|
customHeaders,
|
|
188
188
|
processingType = ScrapeJobModel.ScrapeProcessingType.DEFAULT,
|
|
189
189
|
maxScrapeAge = 24,
|
|
190
|
-
|
|
190
|
+
metaData = {},
|
|
191
191
|
} = data;
|
|
192
192
|
|
|
193
193
|
this.config.log.debug(`Creating a new scrape job with ${urls.length} URLs.`);
|
|
@@ -205,7 +205,7 @@ export default class ScrapeClient {
|
|
|
205
205
|
mergedOptions,
|
|
206
206
|
customHeaders,
|
|
207
207
|
maxScrapeAge,
|
|
208
|
-
|
|
208
|
+
metaData,
|
|
209
209
|
);
|
|
210
210
|
return ScrapeJobDto.toJSON(job);
|
|
211
211
|
} catch (error) {
|
|
@@ -119,7 +119,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
119
119
|
* @param {object} auditData - Step-Audit specific data
|
|
120
120
|
*/
|
|
121
121
|
// eslint-disable-next-line max-len
|
|
122
|
-
async function queueUrlsForScrapeWorker(urls, scrapeJob, customHeaders, maxScrapeAge,
|
|
122
|
+
async function queueUrlsForScrapeWorker(urls, scrapeJob, customHeaders, maxScrapeAge, metaData) {
|
|
123
123
|
log.info(`Starting a new scrape job of baseUrl: ${scrapeJob.getBaseURL()} with ${urls.length}`
|
|
124
124
|
+ ' URLs.'
|
|
125
125
|
+ `(jobId: ${scrapeJob.getId()})`);
|
|
@@ -151,7 +151,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
151
151
|
customHeaders,
|
|
152
152
|
options,
|
|
153
153
|
maxScrapeAge,
|
|
154
|
-
|
|
154
|
+
metaData,
|
|
155
155
|
};
|
|
156
156
|
|
|
157
157
|
// eslint-disable-next-line no-await-in-loop
|
|
@@ -175,7 +175,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
175
175
|
options,
|
|
176
176
|
customHeaders,
|
|
177
177
|
maxScrapeAge,
|
|
178
|
-
|
|
178
|
+
metaData,
|
|
179
179
|
) {
|
|
180
180
|
const newScrapeJob = await createNewScrapeJob(
|
|
181
181
|
urls,
|
|
@@ -195,7 +195,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
195
195
|
|
|
196
196
|
// Queue all URLs for scrape as a single message. This enables the controller to respond with
|
|
197
197
|
// a job ID ASAP, while the individual URLs are queued up asynchronously by another function.
|
|
198
|
-
await queueUrlsForScrapeWorker(urls, newScrapeJob, customHeaders, maxScrapeAge,
|
|
198
|
+
await queueUrlsForScrapeWorker(urls, newScrapeJob, customHeaders, maxScrapeAge, metaData);
|
|
199
199
|
|
|
200
200
|
return newScrapeJob;
|
|
201
201
|
}
|