@adobe/spacecat-shared-scrape-client 2.1.4 → 2.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-scrape-client-v2.1.6](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.1.5...@adobe/spacecat-shared-scrape-client-v2.1.6) (2025-10-28)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Bug Fixes
|
|
5
|
+
|
|
6
|
+
* **deps:** update external fixes ([#1046](https://github.com/adobe/spacecat-shared/issues/1046)) ([bb6e118](https://github.com/adobe/spacecat-shared/commit/bb6e11886b323f73624fcb9e3c2b14d318aa00c9))
|
|
7
|
+
|
|
8
|
+
# [@adobe/spacecat-shared-scrape-client-v2.1.5](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.1.4...@adobe/spacecat-shared-scrape-client-v2.1.5) (2025-09-25)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* remove unnecessary logs to reduce Coralogix usage ([#947](https://github.com/adobe/spacecat-shared/issues/947)) ([c93fa4f](https://github.com/adobe/spacecat-shared/commit/c93fa4f69238106caa0f8150df029e4535c99e39))
|
|
14
|
+
|
|
1
15
|
# [@adobe/spacecat-shared-scrape-client-v2.1.4](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.1.3...@adobe/spacecat-shared-scrape-client-v2.1.4) (2025-09-16)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/spacecat-shared-scrape-client",
|
|
3
|
-
"version": "2.1.
|
|
3
|
+
"version": "2.1.6",
|
|
4
4
|
"description": "Shared modules of the Spacecat Services - Scrape Client",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -40,11 +40,11 @@
|
|
|
40
40
|
"@adobe/spacecat-shared-utils": "1.31.0"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
|
-
"chai": "6.0
|
|
43
|
+
"chai": "6.2.0",
|
|
44
44
|
"chai-as-promised": "8.0.2",
|
|
45
45
|
"nock": "14.0.10",
|
|
46
46
|
"sinon": "21.0.0",
|
|
47
47
|
"sinon-chai": "4.0.1",
|
|
48
|
-
"typescript": "5.9.
|
|
48
|
+
"typescript": "5.9.3"
|
|
49
49
|
}
|
|
50
50
|
}
|
|
@@ -189,7 +189,7 @@ export default class ScrapeClient {
|
|
|
189
189
|
auditData = {},
|
|
190
190
|
} = data;
|
|
191
191
|
|
|
192
|
-
this.config.log.
|
|
192
|
+
this.config.log.debug(`Creating a new scrape job with ${urls.length} URLs.`);
|
|
193
193
|
|
|
194
194
|
// Merge the scrape configuration options with the request options allowing the user options
|
|
195
195
|
// to override the defaults
|
|
@@ -66,7 +66,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
66
66
|
status: ScrapeJobModel.ScrapeJobStatus.RUNNING,
|
|
67
67
|
customHeaders,
|
|
68
68
|
};
|
|
69
|
-
log.
|
|
69
|
+
log.debug(`Creating a new scrape job. Job data: ${JSON.stringify(jobData)}`);
|
|
70
70
|
return ScrapeJob.create(jobData);
|
|
71
71
|
}
|
|
72
72
|
|
|
@@ -110,7 +110,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
110
110
|
for (let i = 0; i < urls.length; i += batchSize) {
|
|
111
111
|
batches.push(urls.slice(i, i + batchSize));
|
|
112
112
|
}
|
|
113
|
-
log.
|
|
113
|
+
log.debug(`Split ${urls.length} URLs into ${batches.length} batches of size ${batchSize}.`);
|
|
114
114
|
return batches;
|
|
115
115
|
}
|
|
116
116
|
|
|
@@ -139,10 +139,10 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
139
139
|
// If there are more than 1000 URLs, split them into multiple messages
|
|
140
140
|
if (totalUrlCount > maxUrlsPerMessage) {
|
|
141
141
|
urlBatches = splitUrlsIntoBatches(urls, maxUrlsPerMessage);
|
|
142
|
-
log.
|
|
142
|
+
log.debug(`Queuing ${totalUrlCount} URLs for scrape in ${urlBatches.length} messages.`);
|
|
143
143
|
} else {
|
|
144
144
|
// If there are 1000 or fewer URLs, we can send them all in a single message
|
|
145
|
-
log.
|
|
145
|
+
log.debug(`Queuing ${totalUrlCount} URLs for scrape in a single message.`);
|
|
146
146
|
urlBatches = [urls]; // Wrap in an array to maintain consistent structure
|
|
147
147
|
}
|
|
148
148
|
|
|
@@ -190,7 +190,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
190
190
|
customHeaders,
|
|
191
191
|
);
|
|
192
192
|
|
|
193
|
-
log.info(
|
|
193
|
+
log.info( // debug?
|
|
194
194
|
'New scrape job created:\n'
|
|
195
195
|
+ `- baseUrl: ${newScrapeJob.getBaseURL()}\n`
|
|
196
196
|
+ `- urlCount: ${urls.length}\n`
|