@adobe/spacecat-shared-data-access 1.21.2 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-data-access-v1.22.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v1.21.3...@adobe/spacecat-shared-data-access-v1.22.0) (2024-04-29)
2
+
3
+
4
+ ### Features
5
+
6
+ * add site top pages data access (SITES-21274) ([#213](https://github.com/adobe/spacecat-shared/issues/213)) ([b892e7c](https://github.com/adobe/spacecat-shared/commit/b892e7cb056824a4f5917fa2cf8a89c7dd077f7f))
7
+
8
+ # [@adobe/spacecat-shared-data-access-v1.21.3](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v1.21.2...@adobe/spacecat-shared-data-access-v1.21.3) (2024-04-25)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * **deps:** update external fixes ([#215](https://github.com/adobe/spacecat-shared/issues/215)) ([4227263](https://github.com/adobe/spacecat-shared/commit/4227263b6bc917982d361cd621c7001c4ee1fa56))
14
+
1
15
  # [@adobe/spacecat-shared-data-access-v1.21.2](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-data-access-v1.21.1...@adobe/spacecat-shared-data-access-v1.21.2) (2024-04-16)
2
16
 
3
17
 
package/README.md CHANGED
@@ -34,6 +34,14 @@ npm install @adobe/spacecat-shared-data-access
34
34
  - **expiresAt** (Number): Expiry timestamp of the audit.
35
35
  - **fullAuditRef** (String): Reference to the full audit details.
36
36
 
37
+ ### SiteTopPages
38
+ - **siteId** (String): Identifier of the site.
39
+ - **url** (String): URL of the top page.
40
+ - **traffic** (Number): Traffic of the top page.
41
+ - **source** (String): Source of the data.
42
+ - **geo** (String): Geo of the top page.
43
+ - **importedAt** (String): Timestamp of the import.
44
+
37
45
  ## DynamoDB Data Model
38
46
 
39
47
  The module is designed to work with the following DynamoDB tables:
@@ -41,6 +49,8 @@ The module is designed to work with the following DynamoDB tables:
41
49
  1. **Sites Table**: Manages site records.
42
50
  2. **Audits Table**: Stores audit information for each site.
43
51
  3. **Latest Audits Table**: Holds only the latest audit for each site for quick access.
52
+ 4. **Site Candidates Table**: Manages site candidates.
53
+ 5. **Site Top Pages Table**: Stores top pages for each site.
44
54
 
45
55
  Each table is designed with scalability and efficient querying in mind, utilizing both key and non-key attributes effectively.
46
56
 
@@ -58,7 +68,7 @@ These tests create the schema, generate sample data, and test the data access pa
58
68
 
59
69
  ## Data Access API
60
70
 
61
- The module provides two main DAOs:
71
+ The module provides the following DAOs:
62
72
 
63
73
  ### Site Functions
64
74
  - `getSites`
@@ -85,10 +95,14 @@ The module provides two main DAOs:
85
95
  - `getLatestAuditForSite`
86
96
  - `addAudit`
87
97
 
98
+ ### Site Top Pages Functions
99
+ - `getTopPagesForSite`
100
+ - `addSiteTopPage`
88
101
 
89
102
  ## Integrating Data Access in AWS Lambda Functions
90
103
 
91
- Our `spacecat-shared-data-access` module includes a wrapper that can be easily integrated into AWS Lambda functions using `@adobe/helix-shared-wrap`. This integration allows your Lambda functions to access and manipulate data seamlessly.
104
+ Our `spacecat-shared-data-access` module includes a wrapper that can be easily integrated into AWS Lambda functions using `@adobe/helix-shared-wrap`.
105
+ This integration allows your Lambda functions to access and manipulate data seamlessly.
92
106
 
93
107
  ### Steps for Integration
94
108
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-data-access",
3
- "version": "1.21.2",
3
+ "version": "1.22.0",
4
4
  "description": "Shared modules of the Spacecat Services - Data Access",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -31,10 +31,10 @@
31
31
  "dependencies": {
32
32
  "@adobe/spacecat-shared-dynamo": "1.2.5",
33
33
  "@adobe/spacecat-shared-utils": "1.2.0",
34
- "@aws-sdk/client-dynamodb": "3.554.0",
35
- "@aws-sdk/lib-dynamodb": "3.554.0",
34
+ "@aws-sdk/client-dynamodb": "3.556.0",
35
+ "@aws-sdk/lib-dynamodb": "3.556.0",
36
36
  "@types/joi": "17.2.3",
37
- "joi": "17.12.3",
37
+ "joi": "17.13.0",
38
38
  "uuid": "9.0.1"
39
39
  },
40
40
  "devDependencies": {
@@ -0,0 +1,48 @@
1
+ /*
2
+ * Copyright 2024 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import { createSiteTopPage } from '../models/site-top-page.js';
14
+
15
+ export function padWithZeros(number) {
16
+ return String(number).padStart(12, '0');
17
+ }
18
+
19
+ export const SiteTopPageDto = {
20
+ /**
21
+ * Converts a SiteTopPage object into a DynamoDB item.
22
+ * @param {Readonly<SiteTopPage>} siteTopPage - SiteTopPage object.
23
+ * @returns {{siteId, url, traffic, source, geo, importedAt, SK: string}}
24
+ */
25
+ toDynamoItem: (siteTopPage) => ({
26
+ siteId: siteTopPage.getSiteId(),
27
+ url: siteTopPage.getURL(),
28
+ traffic: siteTopPage.getTraffic(),
29
+ source: siteTopPage.getSource(),
30
+ geo: siteTopPage.getGeo(),
31
+ importedAt: siteTopPage.getImportedAt(),
32
+ SK: `${siteTopPage.getSource()}#${siteTopPage.getGeo()}#${padWithZeros(siteTopPage.getTraffic())}`,
33
+ }),
34
+
35
+ /**
36
+ * Converts a DynamoDB item into a SiteTopPage object.
37
+ * @param {{siteId, url, traffic, source, geo, importedAt, SK: string}} item - DynamoDB item.
38
+ * @returns {SiteTopPage}
39
+ */
40
+ fromDynamoItem: (item) => createSiteTopPage({
41
+ siteId: item.siteId,
42
+ url: item.url,
43
+ traffic: item.traffic,
44
+ source: item.source,
45
+ geo: item.geo,
46
+ importedAt: item.importedAt,
47
+ }),
48
+ };
package/src/index.d.ts CHANGED
@@ -281,6 +281,44 @@ export interface SiteCandidate {
281
281
  getUpdatedBy: () => string;
282
282
  }
283
283
 
284
+ export interface SiteTopPage {
285
+ /**
286
+ * Retrieves the site ID of the site top page.
287
+ * @returns {string} The site ID.
288
+ */
289
+ getSiteId: () => string;
290
+
291
+ /**
292
+ * Retrieves the URL of the site top page.
293
+ * @returns {string} The URL.
294
+ */
295
+ getURL: () => string;
296
+
297
+ /**
298
+ * Retrieves the traffic of the site top page.
299
+ * @returns {number} The traffic.
300
+ */
301
+ getTraffic: () => number;
302
+
303
+ /**
304
+ * Retrieves the source of the site top page.
305
+ * @returns {string} The source.
306
+ */
307
+ getSource: () => string;
308
+
309
+ /**
310
+ * Retrieves the geo of the site top page.
311
+ * @returns {string} The geo.
312
+ */
313
+ getGeo: () => string;
314
+
315
+ /**
316
+ * Retrieves the timestamp when the import was performed.
317
+ * @returns {string} The import timestamp.
318
+ */
319
+ getImportedAt: () => string;
320
+ }
321
+
284
322
  export interface Organization {
285
323
  /**
286
324
  * Retrieves the ID of the site.
@@ -442,6 +480,12 @@ export interface DataAccess {
442
480
  siteCandidateExists: (baseURL: string) => Promise<boolean>;
443
481
  updateSiteCandidate: (siteCandidate: SiteCandidate) => Promise<SiteCandidate>;
444
482
 
483
+ // site top pages functions
484
+ getTopPagesForSite: (siteId: string, source: string, geo: string)
485
+ => Promise<Readonly<SiteTopPage>[]>;
486
+ addSiteTopPage: (siteTopPageData: object) => Promise<SiteTopPage>;
487
+ removeSiteTopPages: (siteId: string, source: string, geo: string) => Promise<void>;
488
+
445
489
  // configuration functions
446
490
  getConfiguration: () => Promise<Readonly<Configuration>>
447
491
  getConfigurations: () => Promise<Readonly<Configuration>[]>
@@ -456,6 +500,7 @@ interface DataAccessConfig {
456
500
  tableNameSites: string;
457
501
  tableNameSiteCandidates: string;
458
502
  tableNameConfigurations: string;
503
+ tableNameSiteTopPages: string;
459
504
  indexNameAllSites: string;
460
505
  indexNameAllSitesOrganizations: string,
461
506
  indexNameAllSitesByDeliveryType: string;
package/src/index.js CHANGED
@@ -18,6 +18,7 @@ const TABLE_NAME_SITES = 'spacecat-services-sites-dev';
18
18
  const TABLE_NAME_SITE_CANDIDATES = 'spacecat-services-site-candidates-dev';
19
19
  const TABLE_NAME_ORGANIZATIONS = 'spacecat-services-organizations-dev';
20
20
  const TABLE_NAME_CONFIGURATIONS = 'spacecat-services-configurations-dev';
21
+ const TABLE_NAME_SITE_TOP_PAGES = 'spacecat-services-site-top-pages-dev';
21
22
 
22
23
  const INDEX_NAME_ALL_SITES = 'spacecat-services-all-sites-dev';
23
24
  const INDEX_NAME_ALL_ORGANIZATIONS = 'spacecat-services-all-organizations-dev';
@@ -43,6 +44,7 @@ export default function dataAccessWrapper(fn) {
43
44
  DYNAMO_TABLE_NAME_SITE_CANDIDATES = TABLE_NAME_SITE_CANDIDATES,
44
45
  DYNAMO_TABLE_NAME_ORGANIZATIONS = TABLE_NAME_ORGANIZATIONS,
45
46
  DYNAMO_TABLE_NAME_CONFIGURATIONS = TABLE_NAME_CONFIGURATIONS,
47
+ DYNAMO_TABLE_NAME_SITE_TOP_PAGES = TABLE_NAME_SITE_TOP_PAGES,
46
48
  DYNAMO_INDEX_NAME_ALL_SITES = INDEX_NAME_ALL_SITES,
47
49
  DYNAMO_INDEX_NAME_ALL_SITES_BY_DELIVERY_TYPE = INDEX_NAME_ALL_SITES_BY_DELIVERY_TYPE,
48
50
  DYNAMO_INDEX_NAME_ALL_LATEST_AUDIT_SCORES = INDEX_NAME_ALL_LATEST_AUDIT_SCORES,
@@ -59,6 +61,7 @@ export default function dataAccessWrapper(fn) {
59
61
  tableNameSites: DYNAMO_TABLE_NAME_SITES,
60
62
  tableNameSiteCandidates: DYNAMO_TABLE_NAME_SITE_CANDIDATES,
61
63
  tableNameConfigurations: DYNAMO_TABLE_NAME_CONFIGURATIONS,
64
+ tableNameSiteTopPages: DYNAMO_TABLE_NAME_SITE_TOP_PAGES,
62
65
  indexNameAllSites: DYNAMO_INDEX_NAME_ALL_SITES,
63
66
  indexNameAllOrganizations: DYNAMO_INDEX_NAME_ALL_ORGANIZATIONS,
64
67
  indexNameAllOrganizationsByImsOrgId: DYNAMO_INDEX_NAME_ALL_ORGANIZATIONS_BY_IMS_ORG_ID,
@@ -0,0 +1,61 @@
1
+ /*
2
+ * Copyright 2024 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import {
14
+ hasText, isInteger, isIsoDate, isValidUrl,
15
+ } from '@adobe/spacecat-shared-utils';
16
+ import { Base } from './base.js';
17
+
18
+ export const DEFAULT_GEO = 'global';
19
+
20
+ const SiteTopPage = (data = {}) => {
21
+ const self = Base(data);
22
+
23
+ self.getSiteId = () => self.state.siteId;
24
+ self.getURL = () => self.state.url;
25
+ self.getTraffic = () => self.state.traffic;
26
+ self.getSource = () => self.state.source.toLowerCase();
27
+ self.getGeo = () => self.state.geo;
28
+ self.getImportedAt = () => self.state.importedAt;
29
+
30
+ return Object.freeze(self);
31
+ };
32
+
33
+ export const createSiteTopPage = (data) => {
34
+ const newState = { ...data };
35
+
36
+ if (!hasText(newState.siteId)) {
37
+ throw new Error('Site ID must be provided');
38
+ }
39
+
40
+ if (!isValidUrl(newState.url)) {
41
+ throw new Error('Valid Url must be provided');
42
+ }
43
+
44
+ if (!isInteger(newState.traffic)) {
45
+ throw new Error('Traffic must be provided');
46
+ }
47
+
48
+ if (!hasText(newState.source)) {
49
+ throw new Error('Source must be provided');
50
+ }
51
+
52
+ if (!hasText(newState.geo)) {
53
+ newState.geo = DEFAULT_GEO;
54
+ }
55
+
56
+ if (!isIsoDate(newState.importedAt)) {
57
+ throw new Error('Imported at must be a valid ISO date');
58
+ }
59
+
60
+ return SiteTopPage(newState);
61
+ };
@@ -16,6 +16,7 @@ import { siteFunctions } from './sites/index.js';
16
16
  import { siteCandidateFunctions } from './site-candidates/index.js';
17
17
  import { organizationFunctions } from './organizations/index.js';
18
18
  import { configurationFunctions } from './configurations/index.js';
19
+ import { siteTopPagesFunctions } from './site-top-pages/index.js';
19
20
 
20
21
  /**
21
22
  * Creates a data access object.
@@ -23,8 +24,8 @@ import { configurationFunctions } from './configurations/index.js';
23
24
  * @param {{pkAllSites: string, pkAllLatestAudits: string, indexNameAllLatestAuditScores: string,
24
25
  * tableNameAudits: string,tableNameLatestAudits: string, indexNameAllSitesOrganizations: string,
25
26
  * tableNameSites: string, tableNameOrganizations: string, indexNameAllSites: string,
26
- * indexNameAllOrganizations: string, indexNameAllOrganizationsByImsOrgId: string,
27
- * pkAllOrganizations: string}} config configuration
27
+ * tableNameSiteTopPages: string, indexNameAllOrganizations: string,
28
+ * indexNameAllOrganizationsByImsOrgId: string, pkAllOrganizations: string}} config configuration
28
29
  * @param {Logger} log logger
29
30
  * @returns {object} data access object
30
31
  */
@@ -36,6 +37,7 @@ export const createDataAccess = (config, log = console) => {
36
37
  const siteCandidateFuncs = siteCandidateFunctions(dynamoClient, config, log);
37
38
  const organizationFuncs = organizationFunctions(dynamoClient, config, log);
38
39
  const configurationFuncs = configurationFunctions(dynamoClient, config);
40
+ const siteTopPagesFuncs = siteTopPagesFunctions(dynamoClient, config);
39
41
 
40
42
  return {
41
43
  ...auditFuncs,
@@ -43,5 +45,6 @@ export const createDataAccess = (config, log = console) => {
43
45
  ...siteCandidateFuncs,
44
46
  ...organizationFuncs,
45
47
  ...configurationFuncs,
48
+ ...siteTopPagesFuncs,
46
49
  };
47
50
  };
@@ -0,0 +1,83 @@
1
+ /*
2
+ * Copyright 2024 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import { hasText } from '@adobe/spacecat-shared-utils';
14
+
15
+ import { createSiteTopPage } from '../../models/site-top-page.js';
16
+ import { padWithZeros, SiteTopPageDto } from '../../dto/site-top-page.js';
17
+
18
+ export const getTopPagesForSite = async (
19
+ dynamoClient,
20
+ config,
21
+ log,
22
+ siteId,
23
+ source,
24
+ geo,
25
+ ) => {
26
+ const queryParams = {
27
+ TableName: config.tableNameSiteTopPages,
28
+ KeyConditionExpression: 'siteId = :siteId',
29
+ ExpressionAttributeValues: { ':siteId': siteId },
30
+ ScanIndexForward: false,
31
+ };
32
+
33
+ if (hasText(source)) {
34
+ if (hasText(geo)) {
35
+ queryParams.KeyConditionExpression += ' AND begins_with(SK, :sourceGeo)';
36
+ queryParams.ExpressionAttributeValues[':sourceGeo'] = `${source}#${geo}#`;
37
+ } else {
38
+ queryParams.KeyConditionExpression += ' AND begins_with(SK, :source)';
39
+ queryParams.ExpressionAttributeValues[':source'] = `${source}#`;
40
+ }
41
+ }
42
+
43
+ const dynamoItems = await dynamoClient.query(queryParams);
44
+
45
+ return dynamoItems.map((item) => SiteTopPageDto.fromDynamoItem(item));
46
+ };
47
+
48
+ export const addSiteTopPage = async (
49
+ dynamoClient,
50
+ config,
51
+ log,
52
+ siteTopPageData,
53
+ ) => {
54
+ const newSiteTopPage = createSiteTopPage(siteTopPageData);
55
+
56
+ await dynamoClient.putItem(
57
+ config.tableNameSiteTopPages,
58
+ SiteTopPageDto.toDynamoItem(newSiteTopPage),
59
+ );
60
+
61
+ return newSiteTopPage;
62
+ };
63
+
64
+ export const removeSiteTopPages = async (
65
+ dynamoClient,
66
+ config,
67
+ log,
68
+ siteId,
69
+ source,
70
+ geo,
71
+ ) => {
72
+ const siteTopPages = await getTopPagesForSite(dynamoClient, config, log, siteId, source, geo);
73
+
74
+ const removePromises = siteTopPages.map((siteTopPage) => dynamoClient.removeItem(
75
+ config.tableNameSiteTopPages,
76
+ {
77
+ siteId: siteTopPage.getSiteId(),
78
+ SK: `${siteTopPage.getSource()}#${siteTopPage.getGeo()}#${padWithZeros(siteTopPage.getTraffic())}`,
79
+ },
80
+ ));
81
+
82
+ await Promise.all(removePromises);
83
+ };
@@ -0,0 +1,42 @@
1
+ /*
2
+ * Copyright 2024 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import {
14
+ addSiteTopPage,
15
+ getTopPagesForSite,
16
+ removeSiteTopPages,
17
+ } from './accessPatterns.js';
18
+
19
+ export const siteTopPagesFunctions = (dynamoClient, config, log) => ({
20
+ getTopPagesForSite: (siteId, source, geo) => getTopPagesForSite(
21
+ dynamoClient,
22
+ config,
23
+ log,
24
+ siteId,
25
+ source,
26
+ geo,
27
+ ),
28
+ addSiteTopPage: (siteTopPageData) => addSiteTopPage(
29
+ dynamoClient,
30
+ config,
31
+ log,
32
+ siteTopPageData,
33
+ ),
34
+ removeSiteTopPages: (siteId, source, geo) => removeSiteTopPages(
35
+ dynamoClient,
36
+ config,
37
+ log,
38
+ siteId,
39
+ source,
40
+ geo,
41
+ ),
42
+ });