@adobe/spacecat-shared-scrape-client 2.5.0 → 2.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
## [@adobe/spacecat-shared-scrape-client-v2.5.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.5.0...@adobe/spacecat-shared-scrape-client-v2.5.1) (2026-02-17)
|
|
2
|
+
|
|
3
|
+
### Bug Fixes
|
|
4
|
+
|
|
5
|
+
* **data-access:** decouple shared packages for v2/v3 alias wrapper rollout ([#1355](https://github.com/adobe/spacecat-shared/issues/1355)) ([ba48df7](https://github.com/adobe/spacecat-shared/commit/ba48df710e0030c1cb3ef4f90661cff1b548d42f))
|
|
6
|
+
|
|
1
7
|
# [@adobe/spacecat-shared-scrape-client-v2.5.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.4.0...@adobe/spacecat-shared-scrape-client-v2.5.0) (2026-02-08)
|
|
2
8
|
|
|
3
9
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/spacecat-shared-scrape-client",
|
|
3
|
-
"version": "2.5.
|
|
3
|
+
"version": "2.5.1",
|
|
4
4
|
"description": "Shared modules of the Spacecat Services - Scrape Client",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -36,8 +36,8 @@
|
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@adobe/helix-universal": "5.4.0",
|
|
39
|
-
"@adobe/spacecat-shared-
|
|
40
|
-
"@
|
|
39
|
+
"@adobe/spacecat-shared-utils": "1.81.1",
|
|
40
|
+
"@mysticat/data-service-types": "git+https://github.com/adobe/mysticat-data-service.git#types-ts-v1.11.1"
|
|
41
41
|
},
|
|
42
42
|
"devDependencies": {
|
|
43
43
|
"chai": "6.2.1",
|
|
@@ -13,11 +13,15 @@
|
|
|
13
13
|
import {
|
|
14
14
|
hasText, isIsoDate, isNonEmptyArray, isObject, isValidUrl, isValidUUID,
|
|
15
15
|
} from '@adobe/spacecat-shared-utils';
|
|
16
|
-
import {
|
|
16
|
+
import { MYSTICAT_ENUMS_BY_TYPE } from '@mysticat/data-service-types';
|
|
17
17
|
import { ScrapeJobDto } from './scrapeJobDto.js';
|
|
18
18
|
import ScrapeJobSupervisor from './scrape-job-supervisor.js';
|
|
19
19
|
import { ScrapeUrlDto } from './scrapeUrlDto.js';
|
|
20
20
|
|
|
21
|
+
// Not a DB enum in mysticat-data-service; keep local canonical default here.
|
|
22
|
+
const SCRAPE_PROCESSING_TYPE_DEFAULT = 'default';
|
|
23
|
+
const SCRAPE_URL_STATUS_COMPLETE = MYSTICAT_ENUMS_BY_TYPE.SCRAPE_URL_STATUS.COMPLETE;
|
|
24
|
+
|
|
21
25
|
export default class ScrapeClient {
|
|
22
26
|
config = null;
|
|
23
27
|
|
|
@@ -185,7 +189,7 @@ export default class ScrapeClient {
|
|
|
185
189
|
urls,
|
|
186
190
|
options,
|
|
187
191
|
customHeaders,
|
|
188
|
-
processingType =
|
|
192
|
+
processingType = SCRAPE_PROCESSING_TYPE_DEFAULT,
|
|
189
193
|
maxScrapeAge = 24,
|
|
190
194
|
metaData = {},
|
|
191
195
|
} = data;
|
|
@@ -299,7 +303,9 @@ export default class ScrapeClient {
|
|
|
299
303
|
const { ScrapeUrl } = this.config.dataAccess;
|
|
300
304
|
const scrapeUrls = await ScrapeUrl.allByScrapeJobId(job.getId());
|
|
301
305
|
return scrapeUrls
|
|
302
|
-
.filter((url) =>
|
|
306
|
+
.filter((url) => (
|
|
307
|
+
url.getStatus() === SCRAPE_URL_STATUS_COMPLETE
|
|
308
|
+
))
|
|
303
309
|
.reduce((map, url) => map.set(url.getUrl(), url.getPath()), new Map());
|
|
304
310
|
} catch (error) {
|
|
305
311
|
const msgError = `Failed to fetch the scrape job result: ${error.message}`;
|
|
@@ -10,8 +10,10 @@
|
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
-
import { ScrapeJob as ScrapeJobModel } from '@adobe/spacecat-shared-data-access';
|
|
14
13
|
import { isValidUrl, isValidUUID, composeBaseURL } from '@adobe/spacecat-shared-utils';
|
|
14
|
+
import { MYSTICAT_ENUMS_BY_TYPE } from '@mysticat/data-service-types';
|
|
15
|
+
|
|
16
|
+
const SCRAPE_JOB_STATUS_RUNNING = MYSTICAT_ENUMS_BY_TYPE.SCRAPE_JOB_STATUS.RUNNING;
|
|
15
17
|
|
|
16
18
|
/**
|
|
17
19
|
* Scrape Supervisor provides functionality to start and manage scrape jobs.
|
|
@@ -57,7 +59,7 @@ function ScrapeJobSupervisor(services, config) {
|
|
|
57
59
|
processingType,
|
|
58
60
|
options,
|
|
59
61
|
urlCount: urls.length,
|
|
60
|
-
status:
|
|
62
|
+
status: SCRAPE_JOB_STATUS_RUNNING,
|
|
61
63
|
customHeaders,
|
|
62
64
|
};
|
|
63
65
|
log.debug(`Creating a new scrape job. Job data: ${JSON.stringify(jobData)}`);
|