@adobe/spacecat-shared-rum-api-client 2.5.3 → 2.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # [@adobe/spacecat-shared-rum-api-client-v2.5.4](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.5.3...@adobe/spacecat-shared-rum-api-client-v2.5.4) (2024-08-01)
2
+
3
+
4
+ ### Bug Fixes
5
+
6
+ * improve traffic acquisition detection ([#310](https://github.com/adobe/spacecat-shared/issues/310)) ([25d46ff](https://github.com/adobe/spacecat-shared/commit/25d46ffed66643e0581f23067b0f6922beff8e0e))
7
+
1
8
  # [@adobe/spacecat-shared-rum-api-client-v2.5.3](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.5.2...@adobe/spacecat-shared-rum-api-client-v2.5.3) (2024-07-31)
2
9
 
3
10
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-rum-api-client",
3
- "version": "2.5.3",
3
+ "version": "2.5.4",
4
4
  "description": "Shared modules of the Spacecat Services - Rum API client",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -36,7 +36,8 @@
36
36
  "@adobe/helix-universal": "5.0.5",
37
37
  "@adobe/spacecat-shared-utils": "1.4.0",
38
38
  "aws4": "1.13.0",
39
- "d3-array": "3.2.4"
39
+ "d3-array": "3.2.4",
40
+ "urijs": "^1.19.11"
40
41
  },
41
42
  "devDependencies": {
42
43
  "chai": "4.5.0",
@@ -12,33 +12,61 @@
12
12
  /* eslint-disable object-curly-newline */
13
13
 
14
14
  import { hasText } from '@adobe/spacecat-shared-utils';
15
+ import URI from 'urijs';
16
+
17
+ /**
18
+ * Extracts the second-level domain (SLD) from a given URL.
19
+ *
20
+ * For example, for the URL `https://subdomain.example.co.uk/path`, this
21
+ * function will return `example` (excluding the TLD `co.uk`).
22
+ *
23
+ * @param {string} url - The URL from which to extract the second-level domain.
24
+ * @returns {string} The second-level domain of the given URL, or the original
25
+ * URL if it does not contain any text.
26
+ */
27
+ function getSecondLevelDomain(url) {
28
+ if (!hasText(url)) return url;
29
+ const uri = new URI(url);
30
+ const domain = uri.domain();
31
+ const tld = uri.tld();
32
+ return domain.split(`.${tld}`)[0];
33
+ }
15
34
 
16
35
  /*
17
36
  * --------- DEFINITIONS ----------------
18
37
  */
19
38
 
20
39
  // Referrer related
21
- // matches second level domains 1:1 ignoring subdomains and top-level domains
22
- // for example: https://l.instagram.com matches, whereas https://wwww.linstagram.com does not
23
- const searchEngines = /^(https?:\/\/)?(.*\.)?(google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask)\.(.*)(\/|$)/;
24
- const socialMedias = /^(https?:\/\/)?(.*\.)?(facebook|tiktok|snapchat|x|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram)\.(.*)(\/|$)/;
25
- const adNetworks = /googlesyndication|2mdn/;
26
- const videoPlatforms = /^(https?:\/\/)?(.*\.)?(youtube|vimeo|twitch|dailymotion|wistia)\.(.*)(\/|$)/;
27
-
28
- // UTM Source related
29
- const paidDisplaySources = ['gdn'];
30
-
31
- // UTM Medium related
32
- // matches 'pp', *cp[acmuv]*, *ppc*, *paid*
33
- const paidUTMMediums = /^\bpp\b|(.*(cp[acmuv]|ppc|paid|display|banner|poster|placement).*)$/;
34
- const searchEngineUTMMediums = ['google', 'paidsearch', 'paidsearchnb', 'sea', 'sem'];
35
- const socialMediaUTMMediums = ['facebook', 'gnews', 'instagramfeed', 'instagramreels', 'instagramstories', 'line', 'linkedin', 'metasearch', 'organicsocialown', 'paidsocial', 'social', 'sociallinkedin', 'socialpaid'];
36
- const affiliateUTMMediums = ['aff', 'affiliate', 'affiliatemarketing'];
37
- const organicUTMMediums = ['organicsocial'];
38
- const emailUTMMediums = ['em', 'email', 'mail', 'newsletter'];
39
- const smsUTMMediums = ['sms', 'mms'];
40
- const qrUTMMediums = ['qr', 'qrcode'];
41
- const pushUTMMediums = ['push', 'pushnotification'];
40
+ const referrers = {
41
+ search: /google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask/,
42
+ social: /^\b(x)\b|(.*(facebook|tiktok|snapchat|x|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
43
+ ad: /googlesyndication|2mdn|doubleclick|syndicatedsearch/,
44
+ video: /youtube|vimeo|twitch|dailymotion|wistia/,
45
+ };
46
+
47
+ const mediums = {
48
+ paidall: /^\bpp\b|(.*(cp[acmuv]|ppc|paid).*)$/, // matches 'pp', *cp[acmuv]*, *ppc*, *paid*
49
+ paidsearch: /google|paidsearch|sea|sem|maps/,
50
+ paidsocial: /paidsocial|socialpaid|fbig|facebook|gnews|instagram|line|linkedin|metasearch/,
51
+ organic: /organic/,
52
+ socialall: /^\b(soc)\b|(.*(social).*)$/,
53
+ display: /display|banner|poster|placement|image|dcm|businesslistings/,
54
+ video: /video/,
55
+ affiliate: /^aff|(.*(patrocinados|referral)).*$/,
56
+ email: ['em', 'email', 'mail', 'newsletter'],
57
+ sms: ['sms', 'mms'],
58
+ qr: ['qr', 'qrcode'],
59
+ push: ['push', 'pushnotification'],
60
+ };
61
+
62
+ const sources = {
63
+ social: /^\b(ig|fb|x|soc)\b|(.*(meta|tiktok|facebook|snapchat|twitter|igshopping|instagram|linkedin|reddit).*)$/,
64
+ search: /^\b(goo)\b|(.*(sea|google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask).*)$/,
65
+ video: /youtube|vimeo|twitch|dailymotion|wistia/,
66
+ display: /optumib2b|jun|googleads|dv36|dv360|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
67
+ affiliate: /brandreward|yieldkit|fashionistatop|partner|linkbux|stylesblog|linkinbio|affiliate/,
68
+ email: /sfmc|email/,
69
+ };
42
70
 
43
71
  // Tracking params - based on the checkpoints we have in rum-enhancer now
44
72
  // const organicTrackingParams = ['srsltid']; WE DO NOT HAVE THIS AS OF NOW
@@ -73,51 +101,77 @@ const notEmpty = (text) => hasText(text);
73
101
  */
74
102
 
75
103
  // ORDER IS IMPORTANT
76
- const RULES = (origin) => ([
104
+ const RULES = (domain) => ([
77
105
  // PAID
78
- { type: 'paid', category: 'search', referrer: anyOf(searchEngines), utmSource: any, utmMedium: anyOf(searchEngineUTMMediums), tracking: none },
79
- { type: 'paid', category: 'search', referrer: anyOf(searchEngines), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
80
- { type: 'paid', category: 'social', referrer: anyOf(socialMedias), utmSource: any, utmMedium: anyOf(socialMediaUTMMediums), tracking: none },
81
- { type: 'paid', category: 'social', referrer: anyOf(socialMedias), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
82
- { type: 'paid', category: 'video', referrer: anyOf(videoPlatforms), utmSource: any, utmMedium: anyOf(paidUTMMediums), tracking: any },
83
- { type: 'paid', category: 'video', referrer: anyOf(videoPlatforms), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
84
- { type: 'paid', category: 'display', referrer: notEmpty, utmSource: any, utmMedium: anyOf(paidUTMMediums), tracking: any },
85
- { type: 'paid', category: 'display', referrer: anyOf(adNetworks), utmSource: any, utmMedium: any, tracking: any },
86
- { type: 'paid', category: 'display', referrer: notEmpty, utmSource: anyOf(paidDisplaySources), utmMedium: any, tracking: any },
87
- { type: 'paid', category: 'affiliate', referrer: notEmpty, utmSource: any, utmMedium: anyOf(affiliateUTMMediums), tracking: any },
88
- { type: 'paid', category: 'uncategorized', referrer: not(origin), utmSource: any, utmMedium: anyOf(paidUTMMediums), tracking: any },
89
- { type: 'paid', category: 'uncategorized', referrer: not(origin), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
106
+ { type: 'paid', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: anyOf(mediums.paidsearch), tracking: none },
107
+ { type: 'paid', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
108
+ { type: 'paid', category: 'search', referrer: anyOf(referrers.ad), utmSource: any, utmMedium: anyOf(mediums.paidsearch), tracking: any },
109
+ { type: 'paid', category: 'search', referrer: none, utmSource: anyOf(sources.search), utmMedium: anyOf(mediums.paidsearch), tracking: any },
110
+
111
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: any, utmMedium: anyOf(mediums.paidsocial), tracking: none },
112
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
113
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: notEmpty, utmMedium: anyOf(mediums.socialall), tracking: any },
114
+ { type: 'paid', category: 'social', referrer: none, utmSource: anyOf(sources.social), utmMedium: anyOf(mediums.paidsocial), tracking: any },
115
+ { type: 'paid', category: 'social', referrer: none, utmSource: anyOf(sources.social), utmMedium: anyOf(mediums.paidall), tracking: any },
116
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: notEmpty, utmMedium: notEmpty, tracking: any },
117
+ { type: 'paid', category: 'social', referrer: none, utmSource: anyOf(sources.social), utmMedium: anyOf(mediums.socialall), tracking: any },
118
+
119
+ { type: 'paid', category: 'video', referrer: anyOf(referrers.video), utmSource: any, utmMedium: anyOf(mediums.paidall), tracking: any },
120
+ { type: 'paid', category: 'video', referrer: anyOf(referrers.video), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
121
+ { type: 'paid', category: 'video', referrer: none, utmSource: anyOf(sources.video), utmMedium: anyOf(mediums.video), tracking: any },
122
+
123
+ { type: 'paid', category: 'display', referrer: notEmpty, utmSource: any, utmMedium: anyOf(mediums.paidall), tracking: any },
124
+ { type: 'paid', category: 'display', referrer: notEmpty, utmSource: any, utmMedium: anyOf(mediums.display), tracking: any },
125
+ { type: 'paid', category: 'display', referrer: anyOf(referrers.ad), utmSource: any, utmMedium: any, tracking: any },
126
+ { type: 'paid', category: 'display', referrer: notEmpty, utmSource: anyOf(sources.display), utmMedium: any, tracking: any },
127
+ { type: 'paid', category: 'display', referrer: none, utmSource: notEmpty, utmMedium: anyOf(mediums.display), tracking: any },
128
+ { type: 'paid', category: 'display', referrer: none, utmSource: notEmpty, utmMedium: anyOf(mediums.paidall), tracking: any },
129
+ { type: 'paid', category: 'display', referrer: none, utmSource: anyOf(sources.display), utmMedium: notEmpty, tracking: any },
130
+ { type: 'paid', category: 'display', referrer: any, utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
131
+ { type: 'paid', category: 'display', referrer: anyOf(referrers.ad), utmSource: any, utmMedium: any, tracking: any },
132
+
133
+ { type: 'paid', category: 'affiliate', referrer: any, utmSource: any, utmMedium: anyOf(mediums.affiliate), tracking: any },
134
+
135
+ // low prio PAIDs
136
+ { type: 'paid', category: 'search', referrer: none, utmSource: anyOf(sources.search), utmMedium: any, tracking: any },
137
+ { type: 'paid', category: 'uncategorized', referrer: not(domain), utmSource: any, utmMedium: anyOf(mediums.paidall), tracking: any },
138
+ { type: 'paid', category: 'uncategorized', referrer: not(domain), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
90
139
 
91
140
  // EARNED
92
- { type: 'earned', category: 'search', referrer: anyOf(searchEngines), utmSource: none, utmMedium: none, tracking: none },
93
- { type: 'earned', category: 'search', referrer: anyOf(searchEngines), utmSource: any, utmMedium: not(paidUTMMediums), tracking: not(paidTrackingParams) },
94
- { type: 'earned', category: 'social', referrer: anyOf(socialMedias), utmSource: none, utmMedium: none, tracking: none },
95
- { type: 'earned', category: 'social', referrer: not(origin), utmSource: any, utmMedium: anyOf(organicUTMMediums), tracking: none },
96
- { type: 'earned', category: 'video', referrer: anyOf(videoPlatforms), utmSource: none, utmMedium: none, tracking: none },
97
- { type: 'earned', category: 'video', referrer: anyOf(videoPlatforms), utmSource: any, utmMedium: not(paidUTMMediums), tracking: none },
98
- { type: 'earned', category: 'referral', referrer: not(origin), utmSource: none, utmMedium: none, tracking: none },
141
+ { type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: none, utmMedium: none, tracking: none },
142
+ { type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: not(mediums.paidall), tracking: not(paidTrackingParams) },
143
+ { type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: anyOf(mediums.organic), tracking: none },
144
+ { type: 'earned', category: 'social', referrer: anyOf(referrers.social), utmSource: none, utmMedium: none, tracking: none },
145
+ { type: 'earned', category: 'social', referrer: not(domain), utmSource: any, utmMedium: anyOf(mediums.organic), tracking: none },
146
+ { type: 'earned', category: 'video', referrer: anyOf(referrers.video), utmSource: none, utmMedium: none, tracking: none },
147
+ { type: 'earned', category: 'video', referrer: anyOf(referrers.video), utmSource: any, utmMedium: not(mediums.paidall), tracking: none },
148
+ { type: 'earned', category: 'referral', referrer: not(domain), utmSource: none, utmMedium: none, tracking: none },
99
149
 
100
150
  // OWNED
101
151
  { type: 'owned', category: 'direct', referrer: none, utmSource: none, utmMedium: none, tracking: none },
102
- { type: 'owned', category: 'internal', referrer: anyOf(origin), utmSource: none, utmMedium: none, tracking: none },
152
+ { type: 'owned', category: 'internal', referrer: anyOf(domain), utmSource: none, utmMedium: none, tracking: none },
103
153
  { type: 'owned', category: 'email', referrer: any, utmSource: any, utmMedium: any, tracking: anyOf(emailTrackingParams) },
104
- { type: 'owned', category: 'email', referrer: any, utmSource: any, utmMedium: anyOf(emailUTMMediums), tracking: any },
105
- { type: 'owned', category: 'sms', referrer: none, utmSource: any, utmMedium: anyOf(smsUTMMediums), tracking: none },
106
- { type: 'owned', category: 'qr', referrer: none, utmSource: any, utmMedium: anyOf(qrUTMMediums), tracking: none },
107
- { type: 'owned', category: 'push', referrer: none, utmSource: any, utmMedium: anyOf(pushUTMMediums), tracking: none },
154
+ { type: 'owned', category: 'email', referrer: any, utmSource: any, utmMedium: anyOf(mediums.email), tracking: any },
155
+ { type: 'owned', category: 'sms', referrer: none, utmSource: any, utmMedium: anyOf(mediums.sms), tracking: none },
156
+ { type: 'owned', category: 'qr', referrer: none, utmSource: any, utmMedium: anyOf(mediums.qr), tracking: none },
157
+ { type: 'owned', category: 'push', referrer: none, utmSource: any, utmMedium: anyOf(mediums.push), tracking: none },
108
158
 
109
159
  // FALLBACK
110
160
  { type: 'owned', category: 'uncategorized', referrer: any, utmSource: any, utmMedium: any, tracking: any },
111
161
  ]);
112
162
 
113
163
  export function classifyTrafficSource(url, referrer, utmSource, utmMedium, trackingParams) {
114
- const { origin } = new URL(url);
115
- const rules = RULES(origin);
164
+ const secondLevelDomain = getSecondLevelDomain(url);
165
+ const rules = RULES(secondLevelDomain);
166
+
167
+ const referrerDomain = getSecondLevelDomain(referrer);
168
+
169
+ const sanitize = (str) => (str || '').toLowerCase().replace(/[^a-zA-Z0-9]/, '');
116
170
 
117
171
  const { type, category } = rules.find((rule) => (
118
- rule.referrer(referrer)
119
- && rule.utmSource(utmSource)
120
- && rule.utmMedium(utmMedium)
172
+ rule.referrer(referrerDomain)
173
+ && rule.utmSource(sanitize(utmSource))
174
+ && rule.utmMedium(sanitize(utmMedium))
121
175
  && rule.tracking(trackingParams)
122
176
  ));
123
177