@adobe/spacecat-shared-data-access 2.35.0 → 2.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/package.json +1 -1
- package/src/models/base/entity.registry.js +3 -0
- package/src/models/index.js +1 -0
- package/src/models/page-intent/README.md +65 -0
- package/src/models/page-intent/index.d.ts +34 -0
- package/src/models/page-intent/index.js +19 -0
- package/src/models/page-intent/page-intent.collection.js +25 -0
- package/src/models/page-intent/page-intent.model.js +34 -0
- package/src/models/page-intent/page-intent.schema.js +60 -0
- package/src/models/site/site.schema.js +1 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-data-access-v2.36.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.35.0...@adobe/spacecat-shared-data-access-v2.36.0) (2025-07-17)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* page intent entity ([#856](https://github.com/adobe/spacecat-shared/issues/856)) ([9e6f709](https://github.com/adobe/spacecat-shared/commit/9e6f70925dc8b9467081e2a1a1b23968a2089638))
|
|
7
|
+
|
|
1
8
|
# [@adobe/spacecat-shared-data-access-v2.35.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v2.34.1...@adobe/spacecat-shared-data-access-v2.35.0) (2025-07-17)
|
|
2
9
|
|
|
3
10
|
|
package/package.json
CHANGED
|
@@ -31,6 +31,7 @@ import SiteCandidateCollection from '../site-candidate/site-candidate.collection
|
|
|
31
31
|
import SiteCollection from '../site/site.collection.js';
|
|
32
32
|
import SiteTopPageCollection from '../site-top-page/site-top-page.collection.js';
|
|
33
33
|
import SuggestionCollection from '../suggestion/suggestion.collection.js';
|
|
34
|
+
import PageIntentCollection from '../page-intent/page-intent.collection.js';
|
|
34
35
|
|
|
35
36
|
import ApiKeySchema from '../api-key/api-key.schema.js';
|
|
36
37
|
import AsyncJobSchema from '../async-job/async-job.schema.js';
|
|
@@ -50,6 +51,7 @@ import SiteSchema from '../site/site.schema.js';
|
|
|
50
51
|
import SiteCandidateSchema from '../site-candidate/site-candidate.schema.js';
|
|
51
52
|
import SiteTopPageSchema from '../site-top-page/site-top-page.schema.js';
|
|
52
53
|
import SuggestionSchema from '../suggestion/suggestion.schema.js';
|
|
54
|
+
import PageIntentSchema from '../page-intent/page-intent.schema.js';
|
|
53
55
|
|
|
54
56
|
/**
|
|
55
57
|
* EntityRegistry - A registry class responsible for managing entities, their schema and collection.
|
|
@@ -150,5 +152,6 @@ EntityRegistry.registerEntity(SiteSchema, SiteCollection);
|
|
|
150
152
|
EntityRegistry.registerEntity(SiteCandidateSchema, SiteCandidateCollection);
|
|
151
153
|
EntityRegistry.registerEntity(SiteTopPageSchema, SiteTopPageCollection);
|
|
152
154
|
EntityRegistry.registerEntity(SuggestionSchema, SuggestionCollection);
|
|
155
|
+
EntityRegistry.registerEntity(PageIntentSchema, PageIntentCollection);
|
|
153
156
|
|
|
154
157
|
export default EntityRegistry;
|
package/src/models/index.js
CHANGED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# PageIntent Entity
|
|
2
|
+
|
|
3
|
+
## Use Case
|
|
4
|
+
|
|
5
|
+
The `PageIntent` entity captures the intent and topical classification of individual pages within a site.
|
|
6
|
+
|
|
7
|
+
- **Page intent** (`INFORMATIONAL`, `NAVIGATIONAL`, `TRANSACTIONAL`, `COMMERCIAL`) helps determine how users interact with each page.
|
|
8
|
+
- **Topic** (arbitrary string, changes per site, like `firefly`, `photoshop`, `express`) groups pages into thematic buckets.
|
|
9
|
+
|
|
10
|
+
You can:
|
|
11
|
+
1. **Record page metadata** as pages are discovered or crawled.
|
|
12
|
+
2. **Query all pages** for a given site (`siteId`) to analyze overall content strategy.
|
|
13
|
+
3. **Fetch a single page** by its unique URL to inspect or update its intent/topic.
|
|
14
|
+
|
|
15
|
+
## PageIntent Schema Overview
|
|
16
|
+
|
|
17
|
+
The `PageIntent` entity persists each page’s metadata. Key attributes include:
|
|
18
|
+
|
|
19
|
+
- **`pageIntentId`** (UUID v4) – primary key for the record.
|
|
20
|
+
- **`siteId`** (UUID v4) – foreign key to the Site entity.
|
|
21
|
+
- **`url`** (string) – unique full URL of the page.
|
|
22
|
+
- **`pageIntent`** (enum) – one of:
|
|
23
|
+
- `INFORMATIONAL`
|
|
24
|
+
- `NAVIGATIONAL`
|
|
25
|
+
- `TRANSACTIONAL`
|
|
26
|
+
- `COMMERCIAL`
|
|
27
|
+
- **`topic`** (string) – arbitrary topic label for the page.
|
|
28
|
+
- **`createdAt`, `updatedAt`** (ISO timestamp) – automatically maintained by ElectroDB.
|
|
29
|
+
|
|
30
|
+
## Best Practices
|
|
31
|
+
|
|
32
|
+
- **Uniqueness**: enforce URL uniqueness to avoid duplicate page records.
|
|
33
|
+
- **Indexing**:
|
|
34
|
+
- Use `siteId` index to fetch all pages in a site quickly.
|
|
35
|
+
- Use unique `url` index to locate or upsert a specific page.
|
|
36
|
+
- **Defaults & Validation**:
|
|
37
|
+
- Validate `url` format with a URL‐validator.
|
|
38
|
+
- Validate `siteId` as UUID v4.
|
|
39
|
+
- Default `updatedBy` to your automation user (e.g. `spacecat`).
|
|
40
|
+
|
|
41
|
+
## Usage Example
|
|
42
|
+
|
|
43
|
+
```js
|
|
44
|
+
const { PageIntent } = dataAccess;
|
|
45
|
+
|
|
46
|
+
// 1. Create a new page intent record
|
|
47
|
+
const pi = await PageIntent.create({
|
|
48
|
+
siteId: 'b1ec63c4-87de-4500-bbc9-276039e4bc10',
|
|
49
|
+
url: 'https://www.adobe.com/firefly/overview.html',
|
|
50
|
+
pageIntent: 'INFORMATIONAL',
|
|
51
|
+
topic: 'firefly',
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
// 2. Query all pages for a site
|
|
55
|
+
const all = await PageIntent.allBySiteId(pi.getSiteId());
|
|
56
|
+
console.log(`Found ${all.length} pages for this site`);
|
|
57
|
+
|
|
58
|
+
// 3. Fetch a single page by URL
|
|
59
|
+
const single = await PageIntent.findByUrl(pi.getUrl());
|
|
60
|
+
console.log(`Intent: ${single.getPageIntent()}, Topic: ${single.getTopic()}`);
|
|
61
|
+
|
|
62
|
+
// 4. Update a page’s intent/topic
|
|
63
|
+
single.setPageIntent('NAVIGATIONAL');
|
|
64
|
+
single.setTopic('firefly-navigation');
|
|
65
|
+
await single.save();
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { BaseCollection, BaseModel, Site } from '../index.js';
|
|
14
|
+
|
|
15
|
+
export interface PageIntent extends BaseModel {
|
|
16
|
+
getSiteId(): string;
|
|
17
|
+
getSite(): Promise<Site>;
|
|
18
|
+
getUrl(): string;
|
|
19
|
+
getPageIntent(): string;
|
|
20
|
+
getTopic(): string;
|
|
21
|
+
|
|
22
|
+
setSiteId(siteId: string): PageIntent;
|
|
23
|
+
setUrl(url: string): PageIntent;
|
|
24
|
+
setPageIntent(pageIntent: string): PageIntent;
|
|
25
|
+
setTopic(topic: string): PageIntent;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export interface PageIntentCollection extends BaseCollection<PageIntent> {
|
|
29
|
+
allBySiteId(siteId: string): Promise<PageIntent[]>;
|
|
30
|
+
findBySiteId(siteId: string): Promise<PageIntent | null>;
|
|
31
|
+
|
|
32
|
+
allByUrl(url: string): Promise<PageIntent[]>;
|
|
33
|
+
findByUrl(url: string): Promise<PageIntent | null>;
|
|
34
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import PageIntent from './page-intent.model.js';
|
|
14
|
+
import PageIntentCollection from './page-intent.collection.js';
|
|
15
|
+
|
|
16
|
+
export {
|
|
17
|
+
PageIntent,
|
|
18
|
+
PageIntentCollection,
|
|
19
|
+
};
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import BaseCollection from '../base/base.collection.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* PageIntentCollection - Manages PageIntent entities.
|
|
17
|
+
*
|
|
18
|
+
* @class PageIntentCollection
|
|
19
|
+
* @extends BaseCollection
|
|
20
|
+
*/
|
|
21
|
+
class PageIntentCollection extends BaseCollection {
|
|
22
|
+
// add custom collection-level methods here, if needed
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export default PageIntentCollection;
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import BaseModel from '../base/base.model.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* PageIntent - Represents a page’s intent & topic within a site.
|
|
17
|
+
*
|
|
18
|
+
* @class PageIntent
|
|
19
|
+
* @extends BaseModel
|
|
20
|
+
*/
|
|
21
|
+
class PageIntent extends BaseModel {
|
|
22
|
+
static DEFAULT_UPDATED_BY = 'spacecat';
|
|
23
|
+
|
|
24
|
+
static PAGE_INTENTS = {
|
|
25
|
+
INFORMATIONAL: 'INFORMATIONAL',
|
|
26
|
+
NAVIGATIONAL: 'NAVIGATIONAL',
|
|
27
|
+
TRANSACTIONAL: 'TRANSACTIONAL',
|
|
28
|
+
COMMERCIAL: 'COMMERCIAL',
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
// add any custom methods or overrides here
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export default PageIntent;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { isValidUrl } from '@adobe/spacecat-shared-utils';
|
|
14
|
+
|
|
15
|
+
import SchemaBuilder from '../base/schema.builder.js';
|
|
16
|
+
import PageIntent from './page-intent.model.js';
|
|
17
|
+
import PageIntentCollection from './page-intent.collection.js';
|
|
18
|
+
|
|
19
|
+
/*
|
|
20
|
+
Schema: https://electrodb.dev/en/modeling/schema/
|
|
21
|
+
Attributes: https://electrodb.dev/en/modeling/attributes/
|
|
22
|
+
Indexes: https://electrodb.dev/en/modeling/indexes/
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
const schema = new SchemaBuilder(PageIntent, PageIntentCollection)
|
|
26
|
+
// link back to Site entity
|
|
27
|
+
.addReference('belongs_to', 'Site')
|
|
28
|
+
|
|
29
|
+
// page’s full URL (must be unique)
|
|
30
|
+
.addAttribute('url', {
|
|
31
|
+
type: 'string',
|
|
32
|
+
required: true,
|
|
33
|
+
validate: (value) => isValidUrl(value),
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
// one of INFORMATIONAL, NAVIGATIONAL, TRANSACTIONAL, COMMERCIAL
|
|
37
|
+
.addAttribute('pageIntent', {
|
|
38
|
+
type: Object.values(PageIntent.PAGE_INTENTS),
|
|
39
|
+
required: true,
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
// arbitrary topic string like “firefly” or “photoshop”
|
|
43
|
+
.addAttribute('topic', {
|
|
44
|
+
type: 'string',
|
|
45
|
+
required: true,
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
// optionally track who last updated
|
|
49
|
+
.addAttribute('updatedBy', {
|
|
50
|
+
type: 'string',
|
|
51
|
+
default: PageIntent.DEFAULT_UPDATED_BY,
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
// allow fetching the single record by its URL
|
|
55
|
+
.addIndex(
|
|
56
|
+
{ composite: ['url'] },
|
|
57
|
+
{ composite: ['updatedAt'] },
|
|
58
|
+
);
|
|
59
|
+
|
|
60
|
+
export default schema.build();
|
|
@@ -43,6 +43,7 @@ const schema = new SchemaBuilder(Site, SiteCollection)
|
|
|
43
43
|
.addReference('has_many', 'Opportunities')
|
|
44
44
|
.addReference('has_many', 'SiteCandidates')
|
|
45
45
|
.addReference('has_many', 'SiteTopPages')
|
|
46
|
+
.addReference('has_many', 'PageIntents')
|
|
46
47
|
.addAttribute('baseURL', {
|
|
47
48
|
type: 'string',
|
|
48
49
|
required: true,
|