@adobe/spacecat-shared-data-access 2.35.0 → 2.36.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # [@adobe/spacecat-shared-data-access-v2.36.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.35.0...@adobe/spacecat-shared-data-access-v2.36.0) (2025-07-17)
2
+
3
+
4
+ ### Features
5
+
6
+ * page intent entity ([#856](https://github.com/adobe/spacecat-shared/issues/856)) ([9e6f709](https://github.com/adobe/spacecat-shared/commit/9e6f70925dc8b9467081e2a1a1b23968a2089638))
7
+
1
8
  # [@adobe/spacecat-shared-data-access-v2.35.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.34.1...@adobe/spacecat-shared-data-access-v2.35.0) (2025-07-17)
2
9
 
3
10
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-data-access",
3
- "version": "2.35.0",
3
+ "version": "2.36.0",
4
4
  "description": "Shared modules of the Spacecat Services - Data Access",
5
5
  "type": "module",
6
6
  "engines": {
@@ -31,6 +31,7 @@ import SiteCandidateCollection from '../site-candidate/site-candidate.collection
31
31
  import SiteCollection from '../site/site.collection.js';
32
32
  import SiteTopPageCollection from '../site-top-page/site-top-page.collection.js';
33
33
  import SuggestionCollection from '../suggestion/suggestion.collection.js';
34
+ import PageIntentCollection from '../page-intent/page-intent.collection.js';
34
35
 
35
36
  import ApiKeySchema from '../api-key/api-key.schema.js';
36
37
  import AsyncJobSchema from '../async-job/async-job.schema.js';
@@ -50,6 +51,7 @@ import SiteSchema from '../site/site.schema.js';
50
51
  import SiteCandidateSchema from '../site-candidate/site-candidate.schema.js';
51
52
  import SiteTopPageSchema from '../site-top-page/site-top-page.schema.js';
52
53
  import SuggestionSchema from '../suggestion/suggestion.schema.js';
54
+ import PageIntentSchema from '../page-intent/page-intent.schema.js';
53
55
 
54
56
  /**
55
57
  * EntityRegistry - A registry class responsible for managing entities, their schema and collection.
@@ -150,5 +152,6 @@ EntityRegistry.registerEntity(SiteSchema, SiteCollection);
150
152
  EntityRegistry.registerEntity(SiteCandidateSchema, SiteCandidateCollection);
151
153
  EntityRegistry.registerEntity(SiteTopPageSchema, SiteTopPageCollection);
152
154
  EntityRegistry.registerEntity(SuggestionSchema, SuggestionCollection);
155
+ EntityRegistry.registerEntity(PageIntentSchema, PageIntentCollection);
153
156
 
154
157
  export default EntityRegistry;
@@ -29,3 +29,4 @@ export * from './site-candidate/index.js';
29
29
  export * from './site-top-page/index.js';
30
30
  export * from './site/index.js';
31
31
  export * from './suggestion/index.js';
32
+ export * from './page-intent/index.js';
@@ -0,0 +1,65 @@
1
+ # PageIntent Entity
2
+
3
+ ## Use Case
4
+
5
+ The `PageIntent` entity captures the intent and topical classification of individual pages within a site.
6
+
7
+ - **Page intent** (`INFORMATIONAL`, `NAVIGATIONAL`, `TRANSACTIONAL`, `COMMERCIAL`) helps determine how users interact with each page.
8
+ - **Topic** (arbitrary string, changes per site, like `firefly`, `photoshop`, `express`) groups pages into thematic buckets.
9
+
10
+ You can:
11
+ 1. **Record page metadata** as pages are discovered or crawled.
12
+ 2. **Query all pages** for a given site (`siteId`) to analyze overall content strategy.
13
+ 3. **Fetch a single page** by its unique URL to inspect or update its intent/topic.
14
+
15
+ ## PageIntent Schema Overview
16
+
17
+ The `PageIntent` entity persists each page’s metadata. Key attributes include:
18
+
19
+ - **`pageIntentId`** (UUID v4) – primary key for the record.
20
+ - **`siteId`** (UUID v4) – foreign key to the Site entity.
21
+ - **`url`** (string) – unique full URL of the page.
22
+ - **`pageIntent`** (enum) – one of:
23
+ - `INFORMATIONAL`
24
+ - `NAVIGATIONAL`
25
+ - `TRANSACTIONAL`
26
+ - `COMMERCIAL`
27
+ - **`topic`** (string) – arbitrary topic label for the page.
28
+ - **`createdAt`, `updatedAt`** (ISO timestamp) – automatically maintained by ElectroDB.
29
+
30
+ ## Best Practices
31
+
32
+ - **Uniqueness**: enforce URL uniqueness to avoid duplicate page records.
33
+ - **Indexing**:
34
+ - Use `siteId` index to fetch all pages in a site quickly.
35
+ - Use unique `url` index to locate or upsert a specific page.
36
+ - **Defaults & Validation**:
37
+ - Validate `url` format with a URL‐validator.
38
+ - Validate `siteId` as UUID v4.
39
+ - Default `updatedBy` to your automation user (e.g. `spacecat`).
40
+
41
+ ## Usage Example
42
+
43
+ ```js
44
+ const { PageIntent } = dataAccess;
45
+
46
+ // 1. Create a new page intent record
47
+ const pi = await PageIntent.create({
48
+ siteId: 'b1ec63c4-87de-4500-bbc9-276039e4bc10',
49
+ url: 'https://www.adobe.com/firefly/overview.html',
50
+ pageIntent: 'INFORMATIONAL',
51
+ topic: 'firefly',
52
+ });
53
+
54
+ // 2. Query all pages for a site
55
+ const all = await PageIntent.allBySiteId(pi.getSiteId());
56
+ console.log(`Found ${all.length} pages for this site`);
57
+
58
+ // 3. Fetch a single page by URL
59
+ const single = await PageIntent.findByUrl(pi.getUrl());
60
+ console.log(`Intent: ${single.getPageIntent()}, Topic: ${single.getTopic()}`);
61
+
62
+ // 4. Update a page’s intent/topic
63
+ single.setPageIntent('NAVIGATIONAL');
64
+ single.setTopic('firefly-navigation');
65
+ await single.save();
@@ -0,0 +1,34 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import type { BaseCollection, BaseModel, Site } from '../index.js';
14
+
15
+ export interface PageIntent extends BaseModel {
16
+ getSiteId(): string;
17
+ getSite(): Promise<Site>;
18
+ getUrl(): string;
19
+ getPageIntent(): string;
20
+ getTopic(): string;
21
+
22
+ setSiteId(siteId: string): PageIntent;
23
+ setUrl(url: string): PageIntent;
24
+ setPageIntent(pageIntent: string): PageIntent;
25
+ setTopic(topic: string): PageIntent;
26
+ }
27
+
28
+ export interface PageIntentCollection extends BaseCollection<PageIntent> {
29
+ allBySiteId(siteId: string): Promise<PageIntent[]>;
30
+ findBySiteId(siteId: string): Promise<PageIntent | null>;
31
+
32
+ allByUrl(url: string): Promise<PageIntent[]>;
33
+ findByUrl(url: string): Promise<PageIntent | null>;
34
+ }
@@ -0,0 +1,19 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import PageIntent from './page-intent.model.js';
14
+ import PageIntentCollection from './page-intent.collection.js';
15
+
16
+ export {
17
+ PageIntent,
18
+ PageIntentCollection,
19
+ };
@@ -0,0 +1,25 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import BaseCollection from '../base/base.collection.js';
14
+
15
+ /**
16
+ * PageIntentCollection - Manages PageIntent entities.
17
+ *
18
+ * @class PageIntentCollection
19
+ * @extends BaseCollection
20
+ */
21
+ class PageIntentCollection extends BaseCollection {
22
+ // add custom collection-level methods here, if needed
23
+ }
24
+
25
+ export default PageIntentCollection;
@@ -0,0 +1,34 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import BaseModel from '../base/base.model.js';
14
+
15
+ /**
16
+ * PageIntent - Represents a page’s intent & topic within a site.
17
+ *
18
+ * @class PageIntent
19
+ * @extends BaseModel
20
+ */
21
+ class PageIntent extends BaseModel {
22
+ static DEFAULT_UPDATED_BY = 'spacecat';
23
+
24
+ static PAGE_INTENTS = {
25
+ INFORMATIONAL: 'INFORMATIONAL',
26
+ NAVIGATIONAL: 'NAVIGATIONAL',
27
+ TRANSACTIONAL: 'TRANSACTIONAL',
28
+ COMMERCIAL: 'COMMERCIAL',
29
+ };
30
+
31
+ // add any custom methods or overrides here
32
+ }
33
+
34
+ export default PageIntent;
@@ -0,0 +1,60 @@
1
+ /*
2
+ * Copyright 2025 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import { isValidUrl } from '@adobe/spacecat-shared-utils';
14
+
15
+ import SchemaBuilder from '../base/schema.builder.js';
16
+ import PageIntent from './page-intent.model.js';
17
+ import PageIntentCollection from './page-intent.collection.js';
18
+
19
+ /*
20
+ Schema: https://electrodb.dev/en/modeling/schema/
21
+ Attributes: https://electrodb.dev/en/modeling/attributes/
22
+ Indexes: https://electrodb.dev/en/modeling/indexes/
23
+ */
24
+
25
+ const schema = new SchemaBuilder(PageIntent, PageIntentCollection)
26
+ // link back to Site entity
27
+ .addReference('belongs_to', 'Site')
28
+
29
+ // page’s full URL (must be unique)
30
+ .addAttribute('url', {
31
+ type: 'string',
32
+ required: true,
33
+ validate: (value) => isValidUrl(value),
34
+ })
35
+
36
+ // one of INFORMATIONAL, NAVIGATIONAL, TRANSACTIONAL, COMMERCIAL
37
+ .addAttribute('pageIntent', {
38
+ type: Object.values(PageIntent.PAGE_INTENTS),
39
+ required: true,
40
+ })
41
+
42
+ // arbitrary topic string like “firefly” or “photoshop”
43
+ .addAttribute('topic', {
44
+ type: 'string',
45
+ required: true,
46
+ })
47
+
48
+ // optionally track who last updated
49
+ .addAttribute('updatedBy', {
50
+ type: 'string',
51
+ default: PageIntent.DEFAULT_UPDATED_BY,
52
+ })
53
+
54
+ // allow fetching the single record by its URL
55
+ .addIndex(
56
+ { composite: ['url'] },
57
+ { composite: ['updatedAt'] },
58
+ );
59
+
60
+ export default schema.build();
@@ -43,6 +43,7 @@ const schema = new SchemaBuilder(Site, SiteCollection)
43
43
  .addReference('has_many', 'Opportunities')
44
44
  .addReference('has_many', 'SiteCandidates')
45
45
  .addReference('has_many', 'SiteTopPages')
46
+ .addReference('has_many', 'PageIntents')
46
47
  .addAttribute('baseURL', {
47
48
  type: 'string',
48
49
  required: true,