@adobe/spacecat-shared-rum-api-client 2.9.1 → 2.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-rum-api-client-v2.9.3](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.9.2...@adobe/spacecat-shared-rum-api-client-v2.9.3) (2024-09-21)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * **deps:** update adobe fixes ([#374](https://github.com/adobe/spacecat-shared/issues/374)) ([426e61b](https://github.com/adobe/spacecat-shared/commit/426e61b2e77a955a33651245344724881b0f4f55))
7
+
8
+ # [@adobe/spacecat-shared-rum-api-client-v2.9.2](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.9.1...@adobe/spacecat-shared-rum-api-client-v2.9.2) (2024-09-20)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * Improve granularity of high organic traffic audit ([#372](https://github.com/adobe/spacecat-shared/issues/372)) ([a2d3770](https://github.com/adobe/spacecat-shared/commit/a2d37707d94134d7998ba17fa4505c4fc25b46c3))
14
+
1
15
  # [@adobe/spacecat-shared-rum-api-client-v2.9.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.9.0...@adobe/spacecat-shared-rum-api-client-v2.9.1) (2024-09-14)
2
16
 
3
17
 
package/README.md CHANGED
@@ -203,6 +203,7 @@ Calculates the amount of inorganic traffic and the bounce rate for each page. Id
203
203
  "screenshot": "",
204
204
  "trackedPageKPIName": "Bounce Rate",
205
205
  "trackedPageKPIValue": 0.6507592190889371,
206
+ "trackedKPISiteAverage": "",
206
207
  "pageViews": 46100,
207
208
  "samples": 46100,
208
209
  "metrics": [
@@ -223,6 +224,7 @@ Calculates the amount of inorganic traffic and the bounce rate for each page. Id
223
224
  "screenshot": "",
224
225
  "trackedPageKPIName": "Bounce Rate",
225
226
  "trackedPageKPIValue": 0.8723897911832946,
227
+ "trackedKPISiteAverage": "",
226
228
  "pageViews": 43100,
227
229
  "samples": 43100,
228
230
  "metrics": [
@@ -242,7 +244,7 @@ Calculates the amount of inorganic traffic and the bounce rate for each page. Id
242
244
 
243
245
  ### high-organic-low-ctr (Experimentation Opportunity)
244
246
 
245
- Calculates the amount of non-inorganic (earned and owned) traffic and the click-through rate for each page. Identifies pages with high non-inorganic traffic and low click-through rates, which can be targeted for future experimentation opportunities. An example payload is provided below:
247
+ Calculates the amount of non-inorganic (earned and owned) traffic and the click-through rate for each page and vendor. Identifies pages with high non-inorganic traffic and low click-through rates, which can be targeted for future experimentation opportunities. An example payload is provided below:
246
248
 
247
249
  ```json
248
250
  [
@@ -251,24 +253,43 @@ Calculates the amount of non-inorganic (earned and owned) traffic and the click-
251
253
  "page": "https://www.spacecat.com/about-us",
252
254
  "screenshot": "",
253
255
  "trackedPageKPIName": "Click Through Rate",
254
- "trackedPageKPIValue": 0.14099783080260303,
256
+ "trackedPageKPIValue": 0.14316702819956617,
257
+ "trackedKPISiteAverage": 0.40828402366863903,
255
258
  "pageViews": 46100,
256
259
  "samples": 46100,
257
260
  "metrics": [
258
261
  {
259
262
  "type": "traffic",
263
+ "vendor": "*",
260
264
  "value": {
261
265
  "total": 46100,
262
- "paid": 0,
263
- "owned": 46100,
266
+ "paid": 300,
267
+ "owned": 45800,
264
268
  "earned": 0
265
269
  }
266
270
  },
267
271
  {
268
272
  "type": "ctr",
273
+ "vendor": "*",
269
274
  "value": {
270
- "page": 0.14099783080260303,
271
- "siteAverage": 0.4077909270216962
275
+ "page": 0.14316702819956617
276
+ }
277
+ },
278
+ {
279
+ "type": "traffic",
280
+ "vendor": "tiktok",
281
+ "value": {
282
+ "total": 300,
283
+ "owned": 0,
284
+ "earned": 0,
285
+ "paid": 300
286
+ }
287
+ },
288
+ {
289
+ "type": "ctr",
290
+ "vendor": "tiktok",
291
+ "value": {
292
+ "page": 0.3333333333333333
272
293
  }
273
294
  }
274
295
  ]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-rum-api-client",
3
- "version": "2.9.1",
3
+ "version": "2.9.3",
4
4
  "description": "Shared modules of the Spacecat Services - Rum API client",
5
5
  "type": "module",
6
6
  "engines": {
@@ -35,7 +35,7 @@
35
35
  "access": "public"
36
36
  },
37
37
  "dependencies": {
38
- "@adobe/fetch": "4.1.8",
38
+ "@adobe/fetch": "4.1.9",
39
39
  "@adobe/helix-shared-wrap": "2.0.2",
40
40
  "@adobe/helix-universal": "5.0.5",
41
41
  "@adobe/spacecat-shared-utils": "1.4.0",
@@ -10,6 +10,8 @@
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
12
 
13
+ import { extractTrafficHints, classifyVendor, getSecondLevelDomain } from './traffic.js';
14
+
13
15
  /**
14
16
  * Calculates the total page views by URL from an array of bundles.
15
17
  * @param {Array<Object>} bundles - An array of RUM bundles (NOT Flat bundles).
@@ -52,6 +54,59 @@ function getCTRByUrl(bundles) {
52
54
  }, {});
53
55
  }
54
56
 
57
+ /**
58
+ * Calculates the Click-Through Rate (CTR) by URL and Referrer.
59
+ * CTR is defined as the total number of sessions with at least one click event per referrer.
60
+ * divided by the total number of pageviews for each URL per referrer.
61
+ *
62
+ * @param {Array<Object>} bundles - An array of RUM bundles (NOT Flat bundles).
63
+ * @returns {Object} - An object where the key is the URL and the value is an object
64
+ * with the CTR value by referrer.
65
+ */
66
+ function getCTRByUrlAndVendor(bundles) {
67
+ const aggregated = bundles.reduce((acc, bundle) => {
68
+ const { url } = bundle;
69
+ const trafficHints = extractTrafficHints(bundle);
70
+ const referrerDomain = getSecondLevelDomain(trafficHints.referrer);
71
+ const vendor = classifyVendor(referrerDomain, trafficHints.utmSource, trafficHints.utmMedium);
72
+ if (!acc[url]) {
73
+ acc[url] = { sessionsWithClick: 0, totalPageviews: 0, vendors: {} };
74
+ }
75
+ const hasClick = bundle.events.some((event) => event.checkpoint === 'click');
76
+
77
+ acc[url].totalPageviews += bundle.weight;
78
+ if (hasClick) {
79
+ acc[url].sessionsWithClick += bundle.weight;
80
+ }
81
+ if (vendor) {
82
+ if (!acc[url].vendors[vendor]) {
83
+ acc[url].vendors[vendor] = { sessionsWithClick: 0, totalPageviews: 0 };
84
+ }
85
+ acc[url].vendors[vendor].totalPageviews += bundle.weight;
86
+ if (hasClick) {
87
+ acc[url].vendors[vendor].sessionsWithClick += bundle.weight;
88
+ }
89
+ }
90
+ return acc;
91
+ }, {});
92
+ return Object.entries(aggregated)
93
+ .reduce((acc, [url, { sessionsWithClick, totalPageviews, vendors }]) => {
94
+ if (!acc[url]) {
95
+ acc[url] = { value: 0, vendors: {} };
96
+ }
97
+ acc[url].value = (sessionsWithClick / totalPageviews);
98
+ acc[url].vendors = Object.entries(vendors)
99
+ .reduce((_acc, [source, {
100
+ sessionsWithClick: _sessionsWithClick, totalPageviews: _totalPageviews,
101
+ }]) => {
102
+ // eslint-disable-next-line no-param-reassign
103
+ _acc[source] = (_sessionsWithClick / _totalPageviews);
104
+ return _acc;
105
+ }, {});
106
+ return acc;
107
+ }, {});
108
+ }
109
+
55
110
  /**
56
111
  * Calculates the Click-Through Rate (CTR) average for the entire site.
57
112
  * CTR is defined as the total number of sessions with at least one click event
@@ -78,5 +133,6 @@ function getSiteAvgCTR(bundles) {
78
133
  export {
79
134
  getSiteAvgCTR,
80
135
  getCTRByUrl,
136
+ getCTRByUrlAndVendor,
81
137
  pageviewsByUrl,
82
138
  };
@@ -24,7 +24,7 @@ import URI from 'urijs';
24
24
  * @returns {string} The second-level domain of the given URL, or the original
25
25
  * URL if it does not contain any text.
26
26
  */
27
- function getSecondLevelDomain(url) {
27
+ export function getSecondLevelDomain(url) {
28
28
  if (!hasText(url)) return url;
29
29
  const uri = new URI(url);
30
30
  const domain = uri.domain();
@@ -39,7 +39,7 @@ function getSecondLevelDomain(url) {
39
39
  // Referrer related
40
40
  const referrers = {
41
41
  search: /google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask/,
42
- social: /^\b(x)\b|(.*(facebook|tiktok|snapchat|x|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
42
+ social: /^\b(x)\b|(.*(facebook|tiktok|snapchat|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
43
43
  ad: /googlesyndication|2mdn|doubleclick|syndicatedsearch/,
44
44
  video: /youtube|vimeo|twitch|dailymotion|wistia/,
45
45
  };
@@ -63,11 +63,48 @@ const sources = {
63
63
  social: /^\b(ig|fb|x|soc)\b|(.*(meta|tiktok|facebook|snapchat|twitter|igshopping|instagram|linkedin|reddit).*)$/,
64
64
  search: /^\b(goo)\b|(.*(sea|google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask).*)$/,
65
65
  video: /youtube|vimeo|twitch|dailymotion|wistia/,
66
- display: /optumib2b|jun|googleads|dv36|dv360|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
66
+ display: /optumib2b|jun|googleads|dv360|dv36|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
67
67
  affiliate: /brandreward|yieldkit|fashionistatop|partner|linkbux|stylesblog|linkinbio|affiliate/,
68
68
  email: /sfmc|email/,
69
69
  };
70
70
 
71
+ /**
72
+ * Vendor classification rules from https://github.com/adobe/helix-website/blob/main/tools/oversight/acquisition.js#L12
73
+ * Added dailymotion, twitch to the list
74
+ * Using full word match for social media shorts like ig, fb, x
75
+ */
76
+ const vendorClassifications = [
77
+ { regex: /google|googleads|google-ads|google_search|google_deman|adwords|dv360|gdn|doubleclick|dbm|gmb/i, result: 'google' },
78
+ { regex: /instagram|\b(ig)\b/i, result: 'instagram' },
79
+ { regex: /facebook|\b(fb)\b|meta/i, result: 'facebook' },
80
+ { regex: /bing/i, result: 'bing' },
81
+ { regex: /tiktok/i, result: 'tiktok' },
82
+ { regex: /youtube|yt/i, result: 'youtube' },
83
+ { regex: /linkedin/i, result: 'linkedin' },
84
+ { regex: /twitter|^\b(x)\b/i, result: 'x' },
85
+ { regex: /snapchat/i, result: 'snapchat' },
86
+ { regex: /microsoft/i, result: 'microsoft' },
87
+ { regex: /pinterest/i, result: 'pinterest' },
88
+ { regex: /reddit/i, result: 'reddit' },
89
+ { regex: /spotify/i, result: 'spotify' },
90
+ { regex: /criteo/i, result: 'criteo' },
91
+ { regex: /taboola/i, result: 'taboola' },
92
+ { regex: /outbrain/i, result: 'outbrain' },
93
+ { regex: /yahoo/i, result: 'yahoo' },
94
+ { regex: /marketo/i, result: 'marketo' },
95
+ { regex: /eloqua/i, result: 'eloqua' },
96
+ { regex: /substack/i, result: 'substack' },
97
+ { regex: /line/i, result: 'line' },
98
+ { regex: /yext/i, result: 'yext' },
99
+ { regex: /teads/i, result: 'teads' },
100
+ { regex: /yandex/i, result: 'yandex' },
101
+ { regex: /baidu/i, result: 'baidu' },
102
+ { regex: /amazon|ctv/i, result: 'amazon' },
103
+ { regex: /dailymotion/i, result: 'dailymotion' },
104
+ { regex: /twitch/i, result: 'twitch' },
105
+ { regex: /direct/i, result: 'direct' },
106
+ ];
107
+
71
108
  // Tracking params - based on the checkpoints we have in rum-enhancer now
72
109
  // const organicTrackingParams = ['srsltid']; WE DO NOT HAVE THIS AS OF NOW
73
110
  const paidTrackingParams = ['paid'];
@@ -160,6 +197,39 @@ const RULES = (domain) => ([
160
197
  { type: 'owned', category: 'uncategorized', referrer: any, utmSource: any, utmMedium: any, tracking: any },
161
198
  ]);
162
199
 
200
+ export function extractTrafficHints(bundle) {
201
+ const findEvent = (checkpoint, source = '') => bundle.events.find((e) => e.checkpoint === checkpoint && (!source || e.source === source)) || {};
202
+
203
+ const referrer = findEvent('enter').source || '';
204
+ const utmSource = findEvent('utm', 'utm_source').target || '';
205
+ const utmMedium = findEvent('utm', 'utm_medium').target || '';
206
+ const tracking = findEvent('paid').checkpoint || findEvent('email').checkpoint || '';
207
+
208
+ return {
209
+ url: bundle.url,
210
+ weight: bundle.weight,
211
+ referrer,
212
+ utmSource,
213
+ utmMedium,
214
+ tracking,
215
+ };
216
+ }
217
+
218
+ /**
219
+ * Returns the name of the vendor obtained from respective order: referrer, utmSource, utmMedium.
220
+ * For example: facebook instead of www.facebook.com
221
+ * @param {*} referrer
222
+ */
223
+ export function classifyVendor(referrer, utmSource, utmMedium) {
224
+ const result = vendorClassifications.find(({ regex }) => {
225
+ if (regex.test(referrer)) return true;
226
+ if (regex.test(utmSource)) return true;
227
+ if (regex.test(utmMedium)) return true;
228
+ return false;
229
+ });
230
+ return result ? result.result : '';
231
+ }
232
+
163
233
  export function classifyTrafficSource(url, referrer, utmSource, utmMedium, trackingParams) {
164
234
  const secondLevelDomain = getSecondLevelDomain(url);
165
235
  const rules = RULES(secondLevelDomain);
@@ -174,9 +244,11 @@ export function classifyTrafficSource(url, referrer, utmSource, utmMedium, track
174
244
  && rule.utmMedium(sanitize(utmMedium))
175
245
  && rule.tracking(trackingParams)
176
246
  ));
247
+ const vendor = classifyVendor(referrerDomain, utmSource, utmMedium);
177
248
 
178
249
  return {
179
250
  type,
180
251
  category,
252
+ vendor,
181
253
  };
182
254
  }
@@ -29,6 +29,7 @@ function convertToOpportunity(traffic) {
29
29
  screenshot: '',
30
30
  trackedPageKPIName: 'Bounce Rate',
31
31
  trackedPageKPIValue: bounceRate,
32
+ trackedKPISiteAverage: '',
32
33
  pageViews: total,
33
34
  samples: total, // todo: get the actual number of samples
34
35
  metrics: [{
@@ -11,27 +11,49 @@
11
11
  */
12
12
 
13
13
  import trafficAcquisition from '../traffic-acquisition.js';
14
- import { getCTRByUrl, getSiteAvgCTR } from '../../common/aggregateFns.js';
14
+ import { getCTRByUrlAndVendor, getSiteAvgCTR } from '../../common/aggregateFns.js';
15
15
 
16
16
  const DAILY_EARNED_THRESHOLD = 5000;
17
17
  const CTR_THRESHOLD_RATIO = 0.95;
18
18
  const DAILY_PAGEVIEW_THRESHOLD = 1000;
19
+ const VENDORS_TO_CONSIDER = 5;
20
+
21
+ const MAIN_TYPES = ['paid', 'earned', 'owned'];
19
22
 
20
23
  function convertToOpportunity(traffic) {
21
24
  const {
22
- url, total, ctr, paid, owned, earned, siteAvgCTR,
25
+ url, total, ctr, paid, owned, earned, sources, siteAvgCTR, ctrByUrlAndVendor,
23
26
  } = traffic;
24
27
 
25
- return {
28
+ const vendors = sources.reduce((acc, { type, views }) => {
29
+ const [trafficType, , vendor] = type.split(':');
30
+ if (!vendor) {
31
+ return acc;
32
+ }
33
+ if (MAIN_TYPES.includes(trafficType)) {
34
+ acc[vendor] = acc[vendor] || {
35
+ total: 0, owned: 0, earned: 0, paid: 0,
36
+ };
37
+ acc[vendor].total += views;
38
+ acc[vendor][trafficType] += views;
39
+ }
40
+ return acc;
41
+ }, {});
42
+
43
+ const topVendors = Object.entries(vendors)
44
+ .sort((a, b) => b[1].total - a[1].total).slice(0, VENDORS_TO_CONSIDER);
45
+ const opportunity = {
26
46
  type: 'high-organic-low-ctr',
27
47
  page: url,
28
48
  screenshot: '',
29
49
  trackedPageKPIName: 'Click Through Rate',
30
50
  trackedPageKPIValue: ctr,
51
+ trackedKPISiteAverage: siteAvgCTR,
31
52
  pageViews: total,
32
53
  samples: total, // todo: get the actual number of samples
33
54
  metrics: [{
34
55
  type: 'traffic',
56
+ vendor: '*',
35
57
  value: {
36
58
  total,
37
59
  paid,
@@ -40,12 +62,35 @@ function convertToOpportunity(traffic) {
40
62
  },
41
63
  }, {
42
64
  type: 'ctr',
65
+ vendor: '*',
43
66
  value: {
44
67
  page: ctr,
45
- siteAverage: siteAvgCTR,
46
68
  },
47
69
  }],
48
70
  };
71
+ opportunity.metrics.push(...topVendors.flatMap(([vendor, {
72
+ total: _total, owned: _owned, earned: _earned, paid: _paid,
73
+ }]) => {
74
+ const trafficMetrics = {
75
+ type: 'traffic',
76
+ vendor,
77
+ value: {
78
+ total: _total,
79
+ owned: _owned,
80
+ earned: _earned,
81
+ paid: _paid,
82
+ },
83
+ };
84
+ const ctrMetrics = {
85
+ type: 'ctr',
86
+ vendor,
87
+ value: {
88
+ page: ctrByUrlAndVendor[vendor],
89
+ },
90
+ };
91
+ return [trafficMetrics, ctrMetrics];
92
+ }));
93
+ return opportunity;
49
94
  }
50
95
 
51
96
  function hasHighOrganicTraffic(interval, traffic) {
@@ -61,13 +106,18 @@ function handler(bundles, opts = {}) {
61
106
  const { interval = 7 } = opts;
62
107
 
63
108
  const trafficByUrl = trafficAcquisition.handler(bundles);
64
- const ctrByUrl = getCTRByUrl(bundles);
109
+ const ctrByUrlAndVendor = getCTRByUrlAndVendor(bundles);
65
110
  const siteAvgCTR = getSiteAvgCTR(bundles);
66
111
 
67
112
  return trafficByUrl.filter((traffic) => traffic.total > interval * DAILY_PAGEVIEW_THRESHOLD)
68
113
  .filter(hasHighOrganicTraffic.bind(null, interval))
69
- .filter((traffic) => hasLowerCTR(ctrByUrl[traffic.url], siteAvgCTR))
70
- .map((traffic) => ({ ...traffic, ctr: ctrByUrl[traffic.url], siteAvgCTR }))
114
+ .filter((traffic) => hasLowerCTR(ctrByUrlAndVendor[traffic.url].value, siteAvgCTR))
115
+ .map((traffic) => ({
116
+ ...traffic,
117
+ ctr: ctrByUrlAndVendor[traffic.url].value,
118
+ siteAvgCTR,
119
+ ctrByUrlAndVendor: ctrByUrlAndVendor[traffic.url].vendors,
120
+ }))
71
121
  .map(convertToOpportunity);
72
122
  }
73
123
 
@@ -118,6 +118,7 @@ function getRageClickOpportunities(rageClickInstances) {
118
118
  screenshot: '',
119
119
  trackedPageKPIName: OPPORTUNITY_DESCRIPTION,
120
120
  trackedPageKPIValue: '',
121
+ trackedKPISiteAverage: '',
121
122
  pageViews: rageClickInstances[url].pageViews,
122
123
  samples: rageClickInstances[url].samples,
123
124
  metrics: [],
@@ -10,35 +10,20 @@
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
12
 
13
- import { classifyTrafficSource } from '../common/traffic.js';
13
+ import { classifyTrafficSource, extractTrafficHints } from '../common/traffic.js';
14
14
 
15
15
  const MAIN_TYPES = ['total', 'paid', 'earned', 'owned'];
16
16
 
17
- function extractHints(bundle) {
18
- const findEvent = (checkpoint, source = '') => bundle.events.find((e) => e.checkpoint === checkpoint && (!source || e.source === source)) || {};
19
-
20
- const referrer = findEvent('enter').source || '';
21
- const utmSource = findEvent('utm', 'utm_source').target || '';
22
- const utmMedium = findEvent('utm', 'utm_medium').target || '';
23
- const tracking = findEvent('paid').checkpoint || findEvent('email').checkpoint || '';
24
-
25
- return {
26
- url: bundle.url,
27
- weight: bundle.weight,
28
- referrer,
29
- utmSource,
30
- utmMedium,
31
- tracking,
32
- };
33
- }
34
-
35
- function collectByUrlAndTrafficSource(acc, { url, weight, trafficSource }) {
17
+ function collectByUrlAndTrafficSource(acc, {
18
+ url, weight, trafficSource,
19
+ }) {
36
20
  acc[url] = acc[url] || {
37
21
  total: 0, owned: 0, earned: 0, paid: 0,
38
22
  };
39
23
  acc[url][trafficSource] = (acc[url][trafficSource] || 0) + weight;
40
24
  acc[url].total += weight;
41
- acc[url][trafficSource.split(':')[0]] += weight;
25
+ const trafficType = trafficSource.split(':')[0];
26
+ acc[url][trafficType] += weight;
42
27
  return acc;
43
28
  }
44
29
 
@@ -57,16 +42,17 @@ function transformFormat(trafficSources) {
57
42
 
58
43
  function handler(bundles) {
59
44
  const trafficSources = bundles
60
- .map(extractHints)
45
+ .map(extractTrafficHints)
61
46
  .map((row) => {
62
47
  const {
63
48
  type,
64
49
  category,
50
+ vendor,
65
51
  } = classifyTrafficSource(row.url, row.referrer, row.utmSource, row.utmMedium, row.tracking);
66
52
  return {
67
53
  url: row.url,
68
54
  weight: row.weight,
69
- trafficSource: `${type}:${category}`,
55
+ trafficSource: vendor ? `${type}:${category}:${vendor}` : `${type}:${category}`,
70
56
  };
71
57
  })
72
58
  .reduce(collectByUrlAndTrafficSource, {});