@adobe/spacecat-shared-data-access 2.24.1 → 2.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-data-access-v2.26.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.25.0...@adobe/spacecat-shared-data-access-v2.26.0) (2025-06-19)
2
+
3
+
4
+ ### Features
5
+
6
+ * added scrape client ([#814](https://github.com/adobe/spacecat-shared/issues/814)) ([fad6614](https://github.com/adobe/spacecat-shared/commit/fad6614672a046da5319e493cc7c26bfdc3993d2))
7
+
8
+ # [@adobe/spacecat-shared-data-access-v2.25.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.24.1...@adobe/spacecat-shared-data-access-v2.25.0) (2025-06-18)
9
+
10
+
11
+ ### Features
12
+
13
+ * introduce ScrapeJob and ScrapeUrl entities ([#803](https://github.com/adobe/spacecat-shared/issues/803)) ([d295f65](https://github.com/adobe/spacecat-shared/commit/d295f65a89a986f08d5d3b28fe60b45d4c65ee36))
14
+
1
15
  # [@adobe/spacecat-shared-data-access-v2.24.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.24.0...@adobe/spacecat-shared-data-access-v2.24.1) (2025-06-14)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-data-access",
3
- "version": "2.24.1",
3
+ "version": "2.26.0",
4
4
  "description": "Shared modules of the Spacecat Services - Data Access",
5
5
  "type": "module",
6
6
  "engines": {
@@ -25,6 +25,8 @@ import KeyEventCollection from '../key-event/key-event.collection.js';
25
25
  import LatestAuditCollection from '../latest-audit/latest-audit.collection.js';
26
26
  import OpportunityCollection from '../opportunity/opportunity.collection.js';
27
27
  import OrganizationCollection from '../organization/organization.collection.js';
28
+ import ScrapeJobCollection from '../scrape-job/scrape-job.collection.js';
29
+ import ScrapeUrlCollection from '../scrape-url/scrape-url.collection.js';
28
30
  import SiteCandidateCollection from '../site-candidate/site-candidate.collection.js';
29
31
  import SiteCollection from '../site/site.collection.js';
30
32
  import SiteTopPageCollection from '../site-top-page/site-top-page.collection.js';
@@ -42,6 +44,8 @@ import KeyEventSchema from '../key-event/key-event.schema.js';
42
44
  import LatestAuditSchema from '../latest-audit/latest-audit.schema.js';
43
45
  import OpportunitySchema from '../opportunity/opportunity.schema.js';
44
46
  import OrganizationSchema from '../organization/organization.schema.js';
47
+ import ScrapeJobSchema from '../scrape-job/scrape-job.schema.js';
48
+ import ScrapeUrlSchema from '../scrape-url/scrape-url.schema.js';
45
49
  import SiteSchema from '../site/site.schema.js';
46
50
  import SiteCandidateSchema from '../site-candidate/site-candidate.schema.js';
47
51
  import SiteTopPageSchema from '../site-top-page/site-top-page.schema.js';
@@ -140,6 +144,8 @@ EntityRegistry.registerEntity(KeyEventSchema, KeyEventCollection);
140
144
  EntityRegistry.registerEntity(LatestAuditSchema, LatestAuditCollection);
141
145
  EntityRegistry.registerEntity(OpportunitySchema, OpportunityCollection);
142
146
  EntityRegistry.registerEntity(OrganizationSchema, OrganizationCollection);
147
+ EntityRegistry.registerEntity(ScrapeJobSchema, ScrapeJobCollection);
148
+ EntityRegistry.registerEntity(ScrapeUrlSchema, ScrapeUrlCollection);
143
149
  EntityRegistry.registerEntity(SiteSchema, SiteCollection);
144
150
  EntityRegistry.registerEntity(SiteCandidateSchema, SiteCandidateCollection);
145
151
  EntityRegistry.registerEntity(SiteTopPageSchema, SiteTopPageCollection);
@@ -22,6 +22,8 @@ export type * from './key-event';
22
22
  export type * from './latest-audit';
23
23
  export type * from './opportunity';
24
24
  export type * from './organization';
25
+ export type * from './scrape-job';
26
+ export type * from './scrape-url';
25
27
  export type * from './site';
26
28
  export type * from './site-candidate';
27
29
  export type * from './site-top-page';
@@ -23,6 +23,8 @@ export * from './key-event/index.js';
23
23
  export * from './latest-audit/index.js';
24
24
  export * from './opportunity/index.js';
25
25
  export * from './organization/index.js';
26
+ export * from './scrape-job/index.js';
27
+ export * from './scrape-url/index.js';
26
28
  export * from './site-candidate/index.js';
27
29
  export * from './site-top-page/index.js';
28
30
  export * from './site/index.js';
@@ -0,0 +1,70 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import type { BaseCollection, BaseModel } from '../base';
14
+ import type { ScrapeUrl } from '../scrape-url';
15
+
16
+ export interface ScrapeJob extends BaseModel {
17
+ getBaseURL(): string,
18
+ getCustomHeaders(): IOptions,
19
+ getDuration(): number,
20
+ getEndedAt(): string,
21
+ getFailedCount(): number,
22
+ getOptions(): string,
23
+ getProcessingType(): string,
24
+ getRedirectCount(): number,
25
+ getResults(): string,
26
+ getScrapeQueueId(): string,
27
+ getScrapeUrls(): Promise<ScrapeUrl[]>,
28
+ getScrapeUrlsByStatus(status: string): Promise<ScrapeUrl[]>,
29
+ getStartedAt(): string,
30
+ getStatus(): string,
31
+ getSuccessCount(): number,
32
+ getUrlCount(): number,
33
+ setBaseURL(baseURL: string): void,
34
+ setCustomHeaders(customHeaders: IOptions): void,
35
+ setDuration(duration: number): void,
36
+ setEndedAt(endTime: string): void,
37
+ setFailedCount(failedCount: number): void,
38
+ setOptions(options: string): void,
39
+ setProcessingType(processingType: string): void,
40
+ setRedirectCount(redirectCount: number): void,
41
+ setResults(results: string): void,
42
+ setScrapeQueueId(ScrapeQueueId: string): void,
43
+ setStatus(status: string): void,
44
+ setSuccessCount(successCount: number): void,
45
+ setUrlCount(urlCount: number): void,
46
+ }
47
+
48
+ export interface ScrapeJobCollection extends BaseCollection<ScrapeJob> {
49
+ allByBaseURL(baseURL: string): Promise<ScrapeJob[]>;
50
+ allByBaseURLAndProcessingType(baseURL: string, processingType: string): Promise<ScrapeJob[]>;
51
+ allByBaseURLAndProcessingTypeAndOptEnableJavascriptAndOptHideConsentBanner(
52
+ baseURL: string,
53
+ processingType: string,
54
+ optEnableJavascript: string,
55
+ optHideConsentBanner: string): Promise<ScrapeJob[]>;
56
+ allByDateRange(startDate: string, endDate: string): Promise<ScrapeJob[]>;
57
+ allByStartedAt(startDate: string): Promise<ScrapeJob[]>;
58
+ allByStatus(status: string): Promise<ScrapeJob[]>;
59
+ allByStatusAndUpdatedAt(status: string, updatedAt: string): Promise<ScrapeJob[]>;
60
+ findByBaseURL(baseURL: string): Promise<ScrapeJob[]>;
61
+ findByBaseURLAndProcessingType(baseURL: string, processingType: string): Promise<ScrapeJob[]>;
62
+ findByBaseURLAndProcessingTypeAndOptEnableJavascriptAndOptHideConsentBanner(
63
+ baseURL: string,
64
+ processingType: string,
65
+ optEnableJavascript: string,
66
+ optHideConsentBanner: string): Promise<ScrapeJob[]>;
67
+ findByStartedAt(startDate: string): Promise<ScrapeJob | null>;
68
+ findByStatus(status: string): Promise<ScrapeJob | null>;
69
+ findByStatusAndUpdatedAt(status: string, updatedAt: string): Promise<ScrapeJob | null>;
70
+ }
@@ -0,0 +1,19 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import ScrapeJob from './scrape-job.model.js';
14
+ import ScrapeJobCollection from './scrape-job.collection.js';
15
+
16
+ export {
17
+ ScrapeJob,
18
+ ScrapeJobCollection,
19
+ };
@@ -0,0 +1,45 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import { isIsoDate } from '@adobe/spacecat-shared-utils';
14
+
15
+ import { ValidationError } from '../../errors/index.js';
16
+ import BaseCollection from '../base/base.collection.js';
17
+
18
+ /**
19
+ * ScrapeJobCollection - A collection class responsible for managing ScrapeJob entities.
20
+ * Extends the BaseCollection to provide specific methods for interacting with ScrapeJob records.
21
+ *
22
+ * @class ScrapeJobCollection
23
+ * @extends BaseCollection
24
+ */
25
+ class ScrapeJobCollection extends BaseCollection {
26
+ async allByDateRange(startDate, endDate) {
27
+ if (!isIsoDate(startDate)) {
28
+ throw new ValidationError(`Invalid start date: ${startDate}`);
29
+ }
30
+
31
+ if (!isIsoDate(endDate)) {
32
+ throw new ValidationError(`Invalid end date: ${endDate}`);
33
+ }
34
+
35
+ return this.all({}, {
36
+ between: {
37
+ attribute: 'startedAt',
38
+ start: startDate,
39
+ end: endDate,
40
+ },
41
+ });
42
+ }
43
+ }
44
+
45
+ export default ScrapeJobCollection;
@@ -0,0 +1,77 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import BaseModel from '../base/base.model.js';
14
+
15
+ /**
16
+ * ScrapeJob - A class representing an ScrapeJob entity.
17
+ * Provides methods to access and manipulate ScrapeJob-specific data.
18
+ *
19
+ * @class ScrapeJob
20
+ * @extends BaseModel
21
+ */
22
+ class ScrapeJob extends BaseModel {
23
+ static SCRAPE_JOB_EXPIRES_IN_DAYS = 14;
24
+
25
+ /**
26
+ * Scrape Job Status types.
27
+ * Any changes to this object needs to be reflected in the index.d.ts file as well.
28
+ */
29
+ static ScrapeJobStatus = {
30
+ RUNNING: 'RUNNING',
31
+ COMPLETE: 'COMPLETE',
32
+ FAILED: 'FAILED',
33
+ STOPPED: 'STOPPED',
34
+ };
35
+
36
+ /**
37
+ * ScrapeURL Status types.
38
+ * Any changes to this object needs to be reflected in the index.d.ts file as well.
39
+ */
40
+ static ScrapeUrlStatus = {
41
+ PENDING: 'PENDING',
42
+ REDIRECT: 'REDIRECT',
43
+ ...ScrapeJob.ScrapeJobStatus,
44
+ };
45
+
46
+ /**
47
+ * Supported Scrape Options.
48
+ */
49
+ static ScrapeOptions = {
50
+ ENABLE_JAVASCRIPT: 'enableJavascript',
51
+ HIDE_CONSENT_BANNER: 'hideConsentBanners',
52
+ PAGE_LOAD_TIMEOUT: 'pageLoadTimeout',
53
+ WAIT_FOR_SELECTOR: 'waitForSelector',
54
+ SECTION_LOAD_WAIT_TIME: 'sectionLoadWaitTime',
55
+ SCREENSHOT_TYPES: 'screenshotTypes',
56
+ };
57
+
58
+ static ScrapeProcessingType = {
59
+ DEFAULT: 'default',
60
+ ACCESSIBILITY: 'accessibility',
61
+ FORM_ACCESSIBILITY: 'form-accessibility',
62
+ FORM: 'form',
63
+ TEXT_CONTENT: 'text-content',
64
+ };
65
+
66
+ static ScrapeScreenshotType = {
67
+ FULL_PAGE: 'fullPage',
68
+ THUMBNAIL: 'thumbnail',
69
+ SECTION: 'section',
70
+ BLOCK: 'block',
71
+ SCROLL: 'scroll',
72
+ };
73
+
74
+ // add your custom methods or overrides here
75
+ }
76
+
77
+ export default ScrapeJob;
@@ -0,0 +1,133 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ /* c8 ignore start */
14
+
15
+ import {
16
+ isInteger,
17
+ isIsoDate,
18
+ isNumber,
19
+ isObject,
20
+ isValidUrl,
21
+ isString,
22
+ } from '@adobe/spacecat-shared-utils';
23
+
24
+ import SchemaBuilder from '../base/schema.builder.js';
25
+ import ScrapeJob from './scrape-job.model.js';
26
+ import ScrapeJobCollection from './scrape-job.collection.js';
27
+
28
+ /*
29
+ Schema Doc: https://electrodb.dev/en/modeling/schema/
30
+ Attribute Doc: https://electrodb.dev/en/modeling/attributes/
31
+ Indexes Doc: https://electrodb.dev/en/modeling/indexes/
32
+ */
33
+
34
+ const schema = new SchemaBuilder(ScrapeJob, ScrapeJobCollection)
35
+ .withRecordExpiry(ScrapeJob.SCRAPE_JOB_EXPIRES_IN_DAYS)
36
+ .addReference('has_many', 'ScrapeUrls')
37
+ .addAttribute('baseURL', {
38
+ type: 'string',
39
+ required: true,
40
+ validate: (value) => isValidUrl(value),
41
+ })
42
+ .addAttribute('processingType', {
43
+ type: 'string',
44
+ required: true,
45
+ validate: (value) => isString(value),
46
+ })
47
+ .addAttribute('duration', {
48
+ type: 'number',
49
+ default: 0,
50
+ validate: (value) => !value || isNumber(value),
51
+ })
52
+ .addAttribute('endedAt', {
53
+ type: 'string',
54
+ validate: (value) => !value || isIsoDate(value),
55
+ })
56
+ .addAttribute('failedCount', {
57
+ type: 'number',
58
+ default: 0,
59
+ validate: (value) => !value || isInteger(value),
60
+ })
61
+ .addAttribute('scrapeQueueId', {
62
+ type: 'string',
63
+ })
64
+ .addAttribute('options', {
65
+ type: 'any',
66
+ validate: (value) => !value || isObject(value),
67
+ })
68
+ .addAttribute('customHeaders', {
69
+ type: 'any',
70
+ })
71
+ .addAttribute('redirectCount', {
72
+ type: 'number',
73
+ default: 0,
74
+ validate: (value) => !value || isInteger(value),
75
+ })
76
+ .addAttribute('status', {
77
+ type: Object.values(ScrapeJob.ScrapeJobStatus),
78
+ required: true,
79
+ })
80
+ .addAttribute('startedAt', {
81
+ type: 'string',
82
+ required: true,
83
+ readOnly: true,
84
+ default: () => new Date().toISOString(),
85
+ validate: (value) => isIsoDate(value),
86
+ })
87
+ .addAttribute('successCount', {
88
+ type: 'number',
89
+ default: 0,
90
+ validate: (value) => !value || isInteger(value),
91
+ })
92
+ .addAttribute('urlCount', {
93
+ type: 'number',
94
+ default: 0,
95
+ validate: (value) => !value || isInteger(value),
96
+ })
97
+ .addAttribute('results', {
98
+ type: 'any',
99
+ })
100
+ .addAttribute('optEnableJavascript', {
101
+ type: 'string',
102
+ hidden: true,
103
+ readOnly: true,
104
+ watch: ['options'],
105
+ set: (_, { options }) => (options[ScrapeJob.ScrapeOptions.ENABLE_JAVASCRIPT] ? 'T' : 'F'),
106
+ })
107
+ .addAttribute('optHideConsentBanner', {
108
+ type: 'string',
109
+ hidden: true,
110
+ readOnly: true,
111
+ watch: ['options'],
112
+ set: (_, { options }) => (options[ScrapeJob.ScrapeOptions.HIDE_CONSENT_BANNER] ? 'T' : 'F'),
113
+ })
114
+ // access pattern: get all jobs sorted by startedAt
115
+ .addAllIndex(['startedAt'])
116
+ .addIndex(
117
+ { composite: ['baseURL'] },
118
+ { composite: ['processingType', 'startedAt'] },
119
+ )
120
+ // access pattern: get all jobs for a given baseURL and processingType,
121
+ // can be filtered by optEnableJavascript and optHideConsentBanner
122
+ // are solrted by startedAt
123
+ .addIndex(
124
+ { composite: ['baseURL', 'processingType'] },
125
+ { composite: ['optEnableJavascript', 'optHideConsentBanner', 'startedAt'] },
126
+ )
127
+ // access pattern: get all jobs for a given status, sorted by updatedAt
128
+ .addIndex(
129
+ { composite: ['status'] },
130
+ { composite: ['updatedAt'] },
131
+ );
132
+
133
+ export default schema.build();
@@ -0,0 +1,36 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import type { BaseCollection, BaseModel, ScrapeJob } from '../index';
14
+
15
+ export interface ScrapeUrl extends BaseModel {
16
+ getFile(): string,
17
+ getScrapeJob(): Promise<ScrapeJob>,
18
+ getScrapeJobId(): string,
19
+ getPath(): string,
20
+ getReason(): string,
21
+ getStatus(): string,
22
+ getUrl(): string,
23
+ setFile(file: string): void,
24
+ setScrapeJobId(ScrapeJobId: string): void,
25
+ setPath(path: string): void,
26
+ setReason(reason: string): void,
27
+ setStatus(status: string): void,
28
+ setUrl(url: string): void,
29
+ }
30
+
31
+ export interface ScrapeUrlCollection extends BaseCollection<ScrapeUrl> {
32
+ allByScrapeJobId(ScrapeJobId: string): Promise<ScrapeUrl[]>;
33
+ allByScrapeUrlsByJobIdAndStatus(ScrapeJobId: string, status: string): Promise<ScrapeUrl[]>;
34
+ findByScrapeJobId(ScrapeJobId: string): Promise<ScrapeUrl | null>;
35
+ findByScrapeJobIdAndUrl(ScrapeJobId: string, url: string): Promise<ScrapeUrl | null>;
36
+ }
@@ -0,0 +1,19 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import ScrapeUrl from './scrape-url.model.js';
14
+ import ScrapeUrlCollection from './scrape-url.collection.js';
15
+
16
+ export {
17
+ ScrapeUrl,
18
+ ScrapeUrlCollection,
19
+ };
@@ -0,0 +1,26 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import BaseCollection from '../base/base.collection.js';
14
+
15
+ /**
16
+ * ScraperUrlCollection - A collection class responsible for managing ScraperUrl entities.
17
+ * Extends the BaseCollection to provide specific methods for interacting with ScraperUrl records.
18
+ *
19
+ * @class ScraperUrlCollection
20
+ * @extends BaseCollection
21
+ */
22
+ class ScrapeUrlCollection extends BaseCollection {
23
+ // add custom methods here
24
+ }
25
+
26
+ export default ScrapeUrlCollection;
@@ -0,0 +1,28 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import BaseModel from '../base/base.model.js';
14
+
15
+ /**
16
+ * ScraperUrl - A class representing an ScraperUrl entity.
17
+ * Provides methods to access and manipulate ScraperUrl-specific data.
18
+ *
19
+ * @class ScraperUrl
20
+ * @extends BaseModel
21
+ */
22
+ class ScrapeUrl extends BaseModel {
23
+ static SCRAPE_URL_EXPIRES_IN_DAYS = 14;
24
+
25
+ // add your custom methods or overrides here
26
+ }
27
+
28
+ export default ScrapeUrl;
@@ -0,0 +1,50 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ /* c8 ignore start */
14
+
15
+ import { isValidUrl } from '@adobe/spacecat-shared-utils';
16
+
17
+ import SchemaBuilder from '../base/schema.builder.js';
18
+ import ScrapeUrl from './scrape-url.model.js';
19
+ import ScrapeUrlCollection from './scrape-url.collection.js';
20
+ import { ScrapeJob } from '../scrape-job/index.js';
21
+
22
+ /*
23
+ Schema Doc: https://electrodb.dev/en/modeling/schema/
24
+ Attribute Doc: https://electrodb.dev/en/modeling/attributes/
25
+ Indexes Doc: https://electrodb.dev/en/modeling/indexes/
26
+ */
27
+
28
+ const schema = new SchemaBuilder(ScrapeUrl, ScrapeUrlCollection)
29
+ .withRecordExpiry(ScrapeUrl.SCRAPE_URL_EXPIRES_IN_DAYS)
30
+ .addReference('belongs_to', 'ScrapeJob', ['status'])
31
+ .addAttribute('file', {
32
+ type: 'string',
33
+ })
34
+ .addAttribute('path', {
35
+ type: 'string',
36
+ })
37
+ .addAttribute('reason', {
38
+ type: 'string',
39
+ })
40
+ .addAttribute('status', {
41
+ type: Object.values(ScrapeJob.ScrapeUrlStatus),
42
+ required: true,
43
+ })
44
+ .addAttribute('url', {
45
+ type: 'string',
46
+ required: true,
47
+ validate: (value) => isValidUrl(value),
48
+ });
49
+
50
+ export default schema.build();