npm - @adobe/spacecat-shared-scrape-client - Versions diffs - 2.0.0 → 2.1.0 - Mend

@adobe/spacecat-shared-scrape-client 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md +7 -0
package/package.json +1 -1
package/src/clients/scrape-client.js +21 -2
package/src/clients/scrape-job-supervisor.js +9 -4

package/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,10 @@
+# [@adobe/spacecat-shared-scrape-client-v2.1.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v2.0.0...@adobe/spacecat-shared-scrape-client-v2.1.0) (2025-08-20)
+### Features
+* add scrape-client destination  ([#913](https://github.com/adobe/spacecat-shared/issues/913)) ([e208a87](https://github.com/adobe/spacecat-shared/commit/e208a87214874a2708ac2d7614fcfd4c0770fe17))
 # [@adobe/spacecat-shared-scrape-client-v2.0.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-scrape-client-v1.0.7...@adobe/spacecat-shared-scrape-client-v2.0.0) (2025-08-13)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@adobe/spacecat-shared-scrape-client",
-  "version": "2.0.0",
+  "version": "2.1.0",
   "description": "Shared modules of the Spacecat Services - Scrape Client",
   "type": "module",
   "engines": {

package/src/clients/scrape-client.js CHANGED Viewed

@@ -11,8 +11,7 @@
  */
 import {
-  isIsoDate, isObject, isValidUrl, isNonEmptyArray, hasText,
-  isValidUUID,
+  hasText, isIsoDate, isNonEmptyArray, isObject, isValidUrl, isValidUUID,
 } from '@adobe/spacecat-shared-utils';
 import { ScrapeJob as ScrapeJobModel } from '@adobe/spacecat-shared-data-access';
 import { ScrapeJobDto } from './scrapeJobDto.js';
@@ -132,6 +131,7 @@ export default class ScrapeClient {
         customHeaders,
         processingType = ScrapeJobModel.ScrapeProcessingType.DEFAULT,
         maxScrapeAge = 24,
+        auditData = {},
       } = data;
       this.config.log.info(`Creating a new scrape job with ${urls.length} URLs.`);
@@ -149,6 +149,7 @@ export default class ScrapeClient {
         mergedOptions,
         customHeaders,
         maxScrapeAge,
+        auditData,
       );
       return ScrapeJobDto.toJSON(job);
     } catch (error) {
@@ -228,6 +229,24 @@ export default class ScrapeClient {
     }
   }
+  async getScrapeResultPaths(jobId) {
+    try {
+      const job = await this.scrapeSupervisor.getScrapeJob(jobId);
+      if (!job) {
+        return null;
+      }
+      const { ScrapeUrl } = this.config.dataAccess;
+      const scrapeUrls = await ScrapeUrl.allByScrapeJobId(job.getId());
+      return scrapeUrls
+        .filter((url) => url.getStatus() === ScrapeJobModel.ScrapeUrlStatus.COMPLETE)
+        .reduce((map, url) => map.set(url.getUrl(), url.getPath()), new Map());
+    } catch (error) {
+      const msgError = `Failed to fetch the scrape job result: ${error.message}`;
+      this.config.log.error(msgError);
+      throw new Error(msgError);
+    }
+  }
   /**
    * Get all scrape jobs by baseURL and processing type
    * @param {string} baseURL - The baseURL of the jobs to fetch.

package/src/clients/scrape-job-supervisor.js CHANGED Viewed

@@ -122,10 +122,12 @@ function ScrapeJobSupervisor(services, config) {
    * @param {object} scrapeJob - The scrape job record.
    * @param {object} customHeaders - Optional custom headers to be sent with each request.
    * @param {string} maxScrapeAge - The maximum age of the scrape job
+   * @param auditContext
    */
-  async function queueUrlsForScrapeWorker(urls, scrapeJob, customHeaders, maxScrapeAge) {
+  // eslint-disable-next-line max-len
+  async function queueUrlsForScrapeWorker(urls, scrapeJob, customHeaders, maxScrapeAge, auditData) {
     log.info(`Starting a new scrape job of baseUrl: ${scrapeJob.getBaseURL()} with ${urls.length}`
-      + ` URLs. This new job has claimed: ${scrapeJob.getScrapeQueueId()} `
+      + ' URLs.'
       + `(jobId: ${scrapeJob.getId()})`);
     const options = scrapeJob.getOptions();
@@ -155,6 +157,7 @@ function ScrapeJobSupervisor(services, config) {
         customHeaders,
         options,
         maxScrapeAge,
+        auditData,
       };
       // eslint-disable-next-line no-await-in-loop
@@ -168,7 +171,8 @@ function ScrapeJobSupervisor(services, config) {
    * @param {string} processingType - The type of processing to perform.
    * @param {object} options - Optional configuration params for the scrape job.
    * @param {object} customHeaders - Optional custom headers to be sent with each request.
-   * @param {string} maxScrapeAge - The maximum age of the scrape job
+   * @param {number} maxScrapeAge - The maximum age of the scrape job
+   * @param auditContext
    * @returns {Promise<ScrapeJob>} newly created job object
    */
   async function startNewJob(
@@ -177,6 +181,7 @@ function ScrapeJobSupervisor(services, config) {
     options,
     customHeaders,
     maxScrapeAge,
+    auditContext,
   ) {
     const newScrapeJob = await createNewScrapeJob(
       urls,
@@ -196,7 +201,7 @@ function ScrapeJobSupervisor(services, config) {
     // Queue all URLs for scrape as a single message. This enables the controller to respond with
     // a job ID ASAP, while the individual URLs are queued up asynchronously by another function.
-    await queueUrlsForScrapeWorker(urls, newScrapeJob, customHeaders, maxScrapeAge);
+    await queueUrlsForScrapeWorker(urls, newScrapeJob, customHeaders, maxScrapeAge, auditContext);
     return newScrapeJob;
   }