@adobe/spacecat-shared-rum-api-client 2.5.3 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-rum-api-client-v2.6.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.5.4...@adobe/spacecat-shared-rum-api-client-v2.6.0) (2024-08-02)
2
+
3
+
4
+ ### Features
5
+
6
+ * multi-query support for rum-client ([#311](https://github.com/adobe/spacecat-shared/issues/311)) ([c3ac6a2](https://github.com/adobe/spacecat-shared/commit/c3ac6a20396874d0abffdcdcd50406e9718a426b))
7
+
8
+ # [@adobe/spacecat-shared-rum-api-client-v2.5.4](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.5.3...@adobe/spacecat-shared-rum-api-client-v2.5.4) (2024-08-01)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * improve traffic acquisition detection ([#310](https://github.com/adobe/spacecat-shared/issues/310)) ([25d46ff](https://github.com/adobe/spacecat-shared/commit/25d46ffed66643e0581f23067b0f6922beff8e0e))
14
+
1
15
  # [@adobe/spacecat-shared-rum-api-client-v2.5.3](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.5.2...@adobe/spacecat-shared-rum-api-client-v2.5.3) (2024-07-31)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-rum-api-client",
3
- "version": "2.5.3",
3
+ "version": "2.6.0",
4
4
  "description": "Shared modules of the Spacecat Services - Rum API client",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -36,7 +36,8 @@
36
36
  "@adobe/helix-universal": "5.0.5",
37
37
  "@adobe/spacecat-shared-utils": "1.4.0",
38
38
  "aws4": "1.13.0",
39
- "d3-array": "3.2.4"
39
+ "d3-array": "3.2.4",
40
+ "urijs": "^1.19.11"
40
41
  },
41
42
  "devDependencies": {
42
43
  "chai": "4.5.0",
@@ -12,33 +12,61 @@
12
12
  /* eslint-disable object-curly-newline */
13
13
 
14
14
  import { hasText } from '@adobe/spacecat-shared-utils';
15
+ import URI from 'urijs';
16
+
17
+ /**
18
+ * Extracts the second-level domain (SLD) from a given URL.
19
+ *
20
+ * For example, for the URL `https://subdomain.example.co.uk/path`, this
21
+ * function will return `example` (excluding the TLD `co.uk`).
22
+ *
23
+ * @param {string} url - The URL from which to extract the second-level domain.
24
+ * @returns {string} The second-level domain of the given URL, or the original
25
+ * URL if it does not contain any text.
26
+ */
27
+ function getSecondLevelDomain(url) {
28
+ if (!hasText(url)) return url;
29
+ const uri = new URI(url);
30
+ const domain = uri.domain();
31
+ const tld = uri.tld();
32
+ return domain.split(`.${tld}`)[0];
33
+ }
15
34
 
16
35
  /*
17
36
  * --------- DEFINITIONS ----------------
18
37
  */
19
38
 
20
39
  // Referrer related
21
- // matches second level domains 1:1 ignoring subdomains and top-level domains
22
- // for example: https://l.instagram.com matches, whereas https://wwww.linstagram.com does not
23
- const searchEngines = /^(https?:\/\/)?(.*\.)?(google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask)\.(.*)(\/|$)/;
24
- const socialMedias = /^(https?:\/\/)?(.*\.)?(facebook|tiktok|snapchat|x|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram)\.(.*)(\/|$)/;
25
- const adNetworks = /googlesyndication|2mdn/;
26
- const videoPlatforms = /^(https?:\/\/)?(.*\.)?(youtube|vimeo|twitch|dailymotion|wistia)\.(.*)(\/|$)/;
27
-
28
- // UTM Source related
29
- const paidDisplaySources = ['gdn'];
30
-
31
- // UTM Medium related
32
- // matches 'pp', *cp[acmuv]*, *ppc*, *paid*
33
- const paidUTMMediums = /^\bpp\b|(.*(cp[acmuv]|ppc|paid|display|banner|poster|placement).*)$/;
34
- const searchEngineUTMMediums = ['google', 'paidsearch', 'paidsearchnb', 'sea', 'sem'];
35
- const socialMediaUTMMediums = ['facebook', 'gnews', 'instagramfeed', 'instagramreels', 'instagramstories', 'line', 'linkedin', 'metasearch', 'organicsocialown', 'paidsocial', 'social', 'sociallinkedin', 'socialpaid'];
36
- const affiliateUTMMediums = ['aff', 'affiliate', 'affiliatemarketing'];
37
- const organicUTMMediums = ['organicsocial'];
38
- const emailUTMMediums = ['em', 'email', 'mail', 'newsletter'];
39
- const smsUTMMediums = ['sms', 'mms'];
40
- const qrUTMMediums = ['qr', 'qrcode'];
41
- const pushUTMMediums = ['push', 'pushnotification'];
40
+ const referrers = {
41
+ search: /google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask/,
42
+ social: /^\b(x)\b|(.*(facebook|tiktok|snapchat|x|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
43
+ ad: /googlesyndication|2mdn|doubleclick|syndicatedsearch/,
44
+ video: /youtube|vimeo|twitch|dailymotion|wistia/,
45
+ };
46
+
47
+ const mediums = {
48
+ paidall: /^\bpp\b|(.*(cp[acmuv]|ppc|paid).*)$/, // matches 'pp', *cp[acmuv]*, *ppc*, *paid*
49
+ paidsearch: /google|paidsearch|sea|sem|maps/,
50
+ paidsocial: /paidsocial|socialpaid|fbig|facebook|gnews|instagram|line|linkedin|metasearch/,
51
+ organic: /organic/,
52
+ socialall: /^\b(soc)\b|(.*(social).*)$/,
53
+ display: /display|banner|poster|placement|image|dcm|businesslistings/,
54
+ video: /video/,
55
+ affiliate: /^aff|(.*(patrocinados|referral)).*$/,
56
+ email: ['em', 'email', 'mail', 'newsletter'],
57
+ sms: ['sms', 'mms'],
58
+ qr: ['qr', 'qrcode'],
59
+ push: ['push', 'pushnotification'],
60
+ };
61
+
62
+ const sources = {
63
+ social: /^\b(ig|fb|x|soc)\b|(.*(meta|tiktok|facebook|snapchat|twitter|igshopping|instagram|linkedin|reddit).*)$/,
64
+ search: /^\b(goo)\b|(.*(sea|google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask).*)$/,
65
+ video: /youtube|vimeo|twitch|dailymotion|wistia/,
66
+ display: /optumib2b|jun|googleads|dv36|dv360|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
67
+ affiliate: /brandreward|yieldkit|fashionistatop|partner|linkbux|stylesblog|linkinbio|affiliate/,
68
+ email: /sfmc|email/,
69
+ };
42
70
 
43
71
  // Tracking params - based on the checkpoints we have in rum-enhancer now
44
72
  // const organicTrackingParams = ['srsltid']; WE DO NOT HAVE THIS AS OF NOW
@@ -73,51 +101,77 @@ const notEmpty = (text) => hasText(text);
73
101
  */
74
102
 
75
103
  // ORDER IS IMPORTANT
76
- const RULES = (origin) => ([
104
+ const RULES = (domain) => ([
77
105
  // PAID
78
- { type: 'paid', category: 'search', referrer: anyOf(searchEngines), utmSource: any, utmMedium: anyOf(searchEngineUTMMediums), tracking: none },
79
- { type: 'paid', category: 'search', referrer: anyOf(searchEngines), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
80
- { type: 'paid', category: 'social', referrer: anyOf(socialMedias), utmSource: any, utmMedium: anyOf(socialMediaUTMMediums), tracking: none },
81
- { type: 'paid', category: 'social', referrer: anyOf(socialMedias), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
82
- { type: 'paid', category: 'video', referrer: anyOf(videoPlatforms), utmSource: any, utmMedium: anyOf(paidUTMMediums), tracking: any },
83
- { type: 'paid', category: 'video', referrer: anyOf(videoPlatforms), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
84
- { type: 'paid', category: 'display', referrer: notEmpty, utmSource: any, utmMedium: anyOf(paidUTMMediums), tracking: any },
85
- { type: 'paid', category: 'display', referrer: anyOf(adNetworks), utmSource: any, utmMedium: any, tracking: any },
86
- { type: 'paid', category: 'display', referrer: notEmpty, utmSource: anyOf(paidDisplaySources), utmMedium: any, tracking: any },
87
- { type: 'paid', category: 'affiliate', referrer: notEmpty, utmSource: any, utmMedium: anyOf(affiliateUTMMediums), tracking: any },
88
- { type: 'paid', category: 'uncategorized', referrer: not(origin), utmSource: any, utmMedium: anyOf(paidUTMMediums), tracking: any },
89
- { type: 'paid', category: 'uncategorized', referrer: not(origin), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
106
+ { type: 'paid', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: anyOf(mediums.paidsearch), tracking: none },
107
+ { type: 'paid', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
108
+ { type: 'paid', category: 'search', referrer: anyOf(referrers.ad), utmSource: any, utmMedium: anyOf(mediums.paidsearch), tracking: any },
109
+ { type: 'paid', category: 'search', referrer: none, utmSource: anyOf(sources.search), utmMedium: anyOf(mediums.paidsearch), tracking: any },
110
+
111
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: any, utmMedium: anyOf(mediums.paidsocial), tracking: none },
112
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
113
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: notEmpty, utmMedium: anyOf(mediums.socialall), tracking: any },
114
+ { type: 'paid', category: 'social', referrer: none, utmSource: anyOf(sources.social), utmMedium: anyOf(mediums.paidsocial), tracking: any },
115
+ { type: 'paid', category: 'social', referrer: none, utmSource: anyOf(sources.social), utmMedium: anyOf(mediums.paidall), tracking: any },
116
+ { type: 'paid', category: 'social', referrer: anyOf(referrers.social), utmSource: notEmpty, utmMedium: notEmpty, tracking: any },
117
+ { type: 'paid', category: 'social', referrer: none, utmSource: anyOf(sources.social), utmMedium: anyOf(mediums.socialall), tracking: any },
118
+
119
+ { type: 'paid', category: 'video', referrer: anyOf(referrers.video), utmSource: any, utmMedium: anyOf(mediums.paidall), tracking: any },
120
+ { type: 'paid', category: 'video', referrer: anyOf(referrers.video), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
121
+ { type: 'paid', category: 'video', referrer: none, utmSource: anyOf(sources.video), utmMedium: anyOf(mediums.video), tracking: any },
122
+
123
+ { type: 'paid', category: 'display', referrer: notEmpty, utmSource: any, utmMedium: anyOf(mediums.paidall), tracking: any },
124
+ { type: 'paid', category: 'display', referrer: notEmpty, utmSource: any, utmMedium: anyOf(mediums.display), tracking: any },
125
+ { type: 'paid', category: 'display', referrer: anyOf(referrers.ad), utmSource: any, utmMedium: any, tracking: any },
126
+ { type: 'paid', category: 'display', referrer: notEmpty, utmSource: anyOf(sources.display), utmMedium: any, tracking: any },
127
+ { type: 'paid', category: 'display', referrer: none, utmSource: notEmpty, utmMedium: anyOf(mediums.display), tracking: any },
128
+ { type: 'paid', category: 'display', referrer: none, utmSource: notEmpty, utmMedium: anyOf(mediums.paidall), tracking: any },
129
+ { type: 'paid', category: 'display', referrer: none, utmSource: anyOf(sources.display), utmMedium: notEmpty, tracking: any },
130
+ { type: 'paid', category: 'display', referrer: any, utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
131
+ { type: 'paid', category: 'display', referrer: anyOf(referrers.ad), utmSource: any, utmMedium: any, tracking: any },
132
+
133
+ { type: 'paid', category: 'affiliate', referrer: any, utmSource: any, utmMedium: anyOf(mediums.affiliate), tracking: any },
134
+
135
+ // low prio PAIDs
136
+ { type: 'paid', category: 'search', referrer: none, utmSource: anyOf(sources.search), utmMedium: any, tracking: any },
137
+ { type: 'paid', category: 'uncategorized', referrer: not(domain), utmSource: any, utmMedium: anyOf(mediums.paidall), tracking: any },
138
+ { type: 'paid', category: 'uncategorized', referrer: not(domain), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
90
139
 
91
140
  // EARNED
92
- { type: 'earned', category: 'search', referrer: anyOf(searchEngines), utmSource: none, utmMedium: none, tracking: none },
93
- { type: 'earned', category: 'search', referrer: anyOf(searchEngines), utmSource: any, utmMedium: not(paidUTMMediums), tracking: not(paidTrackingParams) },
94
- { type: 'earned', category: 'social', referrer: anyOf(socialMedias), utmSource: none, utmMedium: none, tracking: none },
95
- { type: 'earned', category: 'social', referrer: not(origin), utmSource: any, utmMedium: anyOf(organicUTMMediums), tracking: none },
96
- { type: 'earned', category: 'video', referrer: anyOf(videoPlatforms), utmSource: none, utmMedium: none, tracking: none },
97
- { type: 'earned', category: 'video', referrer: anyOf(videoPlatforms), utmSource: any, utmMedium: not(paidUTMMediums), tracking: none },
98
- { type: 'earned', category: 'referral', referrer: not(origin), utmSource: none, utmMedium: none, tracking: none },
141
+ { type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: none, utmMedium: none, tracking: none },
142
+ { type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: not(mediums.paidall), tracking: not(paidTrackingParams) },
143
+ { type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: anyOf(mediums.organic), tracking: none },
144
+ { type: 'earned', category: 'social', referrer: anyOf(referrers.social), utmSource: none, utmMedium: none, tracking: none },
145
+ { type: 'earned', category: 'social', referrer: not(domain), utmSource: any, utmMedium: anyOf(mediums.organic), tracking: none },
146
+ { type: 'earned', category: 'video', referrer: anyOf(referrers.video), utmSource: none, utmMedium: none, tracking: none },
147
+ { type: 'earned', category: 'video', referrer: anyOf(referrers.video), utmSource: any, utmMedium: not(mediums.paidall), tracking: none },
148
+ { type: 'earned', category: 'referral', referrer: not(domain), utmSource: none, utmMedium: none, tracking: none },
99
149
 
100
150
  // OWNED
101
151
  { type: 'owned', category: 'direct', referrer: none, utmSource: none, utmMedium: none, tracking: none },
102
- { type: 'owned', category: 'internal', referrer: anyOf(origin), utmSource: none, utmMedium: none, tracking: none },
152
+ { type: 'owned', category: 'internal', referrer: anyOf(domain), utmSource: none, utmMedium: none, tracking: none },
103
153
  { type: 'owned', category: 'email', referrer: any, utmSource: any, utmMedium: any, tracking: anyOf(emailTrackingParams) },
104
- { type: 'owned', category: 'email', referrer: any, utmSource: any, utmMedium: anyOf(emailUTMMediums), tracking: any },
105
- { type: 'owned', category: 'sms', referrer: none, utmSource: any, utmMedium: anyOf(smsUTMMediums), tracking: none },
106
- { type: 'owned', category: 'qr', referrer: none, utmSource: any, utmMedium: anyOf(qrUTMMediums), tracking: none },
107
- { type: 'owned', category: 'push', referrer: none, utmSource: any, utmMedium: anyOf(pushUTMMediums), tracking: none },
154
+ { type: 'owned', category: 'email', referrer: any, utmSource: any, utmMedium: anyOf(mediums.email), tracking: any },
155
+ { type: 'owned', category: 'sms', referrer: none, utmSource: any, utmMedium: anyOf(mediums.sms), tracking: none },
156
+ { type: 'owned', category: 'qr', referrer: none, utmSource: any, utmMedium: anyOf(mediums.qr), tracking: none },
157
+ { type: 'owned', category: 'push', referrer: none, utmSource: any, utmMedium: anyOf(mediums.push), tracking: none },
108
158
 
109
159
  // FALLBACK
110
160
  { type: 'owned', category: 'uncategorized', referrer: any, utmSource: any, utmMedium: any, tracking: any },
111
161
  ]);
112
162
 
113
163
  export function classifyTrafficSource(url, referrer, utmSource, utmMedium, trackingParams) {
114
- const { origin } = new URL(url);
115
- const rules = RULES(origin);
164
+ const secondLevelDomain = getSecondLevelDomain(url);
165
+ const rules = RULES(secondLevelDomain);
166
+
167
+ const referrerDomain = getSecondLevelDomain(referrer);
168
+
169
+ const sanitize = (str) => (str || '').toLowerCase().replace(/[^a-zA-Z0-9]/, '');
116
170
 
117
171
  const { type, category } = rules.find((rule) => (
118
- rule.referrer(referrer)
119
- && rule.utmSource(utmSource)
120
- && rule.utmMedium(utmMedium)
172
+ rule.referrer(referrerDomain)
173
+ && rule.utmSource(sanitize(utmSource))
174
+ && rule.utmMedium(sanitize(utmMedium))
121
175
  && rule.tracking(trackingParams)
122
176
  ));
123
177
 
package/src/index.d.ts CHANGED
@@ -21,26 +21,40 @@ export interface RUMAPIOptions {
21
21
 
22
22
  export default class RUMAPIClient {
23
23
  /**
24
- * Static factory method to create an instance of RUMAPIClient.
25
- * @param {UniversalContext} context - An object containing the AWS Lambda context information
26
- * @returns An instance of RUMAPIClient.
27
- * @remarks This method is designed to create a new instance from an AWS Lambda context.
28
- * The created instance is stored in the Lambda context, and subsequent calls to
29
- * this method will return the singleton instance if previously created.
30
- */
24
+ * Static factory method to create an instance of RUMAPIClient.
25
+ * @param {UniversalContext} context - An object containing the AWS Lambda context information
26
+ * @returns An instance of RUMAPIClient.
27
+ * @remarks This method is designed to create a new instance from an AWS Lambda context.
28
+ * The created instance is stored in the Lambda context, and subsequent calls to
29
+ * this method will return the singleton instance if previously created.
30
+ */
31
31
  static createFrom(context: UniversalContext): RUMAPIClient;
32
32
 
33
33
  /**
34
- * Constructor for creating an instance of RUMAPIClient.
35
- */
34
+ * Constructor for creating an instance of RUMAPIClient.
35
+ */
36
36
  constructor();
37
37
 
38
38
  /**
39
- * Asynchronous method to run queries against RUM Bundler API.
40
- * @param {string} query - Name of the query to run.
41
- * @param {RUMAPIOptions} opts - A object containing options for query to run.
42
- * @returns A Promise resolving to an object with the query results.
43
- * @remarks See the README.md for the available queries.
44
- */
39
+ * Asynchronous method to run queries against RUM Bundler API.
40
+ * @param {string} query - Name of the query to run.
41
+ * @param {RUMAPIOptions} opts - A object containing options for query to run.
42
+ * @returns A Promise resolving to an object with the query results.
43
+ * @remarks See the README.md for the available queries.
44
+ */
45
45
  query(query: string, opts?: RUMAPIOptions): Promise<object>;
46
+
47
+ /**
48
+ * Asynchronous method to run multiple queries against the data fetched from RUM Bundler API.
49
+ *
50
+ * This method makes a single call to the RUM Bundler API to fetch the raw data, then applies
51
+ * all the requested queries to this raw data. The results are returned in an object where each
52
+ * key corresponds to a query name and each value contains the result of that query.
53
+ *
54
+ * @param {string[]} queries - An array of query names to execute.
55
+ * @param {RUMAPIOptions} [opts] - Optional object containing options for the queries.
56
+ * @returns {Promise<object>} A Promise that resolves to an object where each key is the name
57
+ * of a query, and each value is the result of that query.
58
+ */
59
+ queryMulti(queries: string[], opts?: RUMAPIOptions): Promise<object[]>;
46
60
  }
package/src/index.js CHANGED
@@ -49,4 +49,41 @@ export default class RUMAPIClient {
49
49
  throw new Error(`Query '${query}' failed. Opts: ${JSON.stringify(opts)}. Reason: ${e.message}`);
50
50
  }
51
51
  }
52
+
53
+ // eslint-disable-next-line class-methods-use-this
54
+ async queryMulti(queries, opts) {
55
+ const queryHandlers = [];
56
+ const allCheckpoints = new Set();
57
+
58
+ for (const query of queries) {
59
+ const { handler, checkpoints = [] } = HANDLERS[query] || {};
60
+
61
+ if (!handler) {
62
+ throw new Error(`Unknown query: ${query}`);
63
+ }
64
+
65
+ queryHandlers.push({ query, handler });
66
+ checkpoints.forEach((checkpoint) => allCheckpoints.add(checkpoint));
67
+ }
68
+
69
+ try {
70
+ // Fetch bundles with deduplicated checkpoints
71
+ const bundles = await fetchBundles({
72
+ ...opts,
73
+ checkpoints: [...allCheckpoints],
74
+ });
75
+
76
+ const results = {};
77
+
78
+ // Execute each query handler sequentially
79
+ for (const { query, handler } of queryHandlers) {
80
+ // eslint-disable-next-line no-await-in-loop
81
+ results[query] = await handler(bundles);
82
+ }
83
+
84
+ return results;
85
+ } catch (e) {
86
+ throw new Error(`Multi query failed. Queries: ${JSON.stringify(queries)}, Opts: ${JSON.stringify(opts)}. Reason: ${e.message}`);
87
+ }
88
+ }
52
89
  }