@adobe/spacecat-shared-rum-api-client 2.34.1 → 2.36.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/src/common/rum-bundler-client.js +120 -32
- package/src/common/traffic.js +11 -5
- package/src/index.js +18 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-rum-api-client-v2.36.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.35.0...@adobe/spacecat-shared-rum-api-client-v2.36.0) (2025-07-25)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* classify organic llm referral traffic ([#877](https://github.com/adobe/spacecat-shared/issues/877)) ([63e029e](https://github.com/adobe/spacecat-shared/commit/63e029ee3db343d487ba1cbe2a006b11dce683bf))
|
|
7
|
+
|
|
8
|
+
# [@adobe/spacecat-shared-rum-api-client-v2.35.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.34.1...@adobe/spacecat-shared-rum-api-client-v2.35.0) (2025-07-24)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* **rum-api-client:** add query stream ([#872](https://github.com/adobe/spacecat-shared/issues/872)) ([7702138](https://github.com/adobe/spacecat-shared/commit/77021388a298843f3ec2571c9a6f148184fa2814))
|
|
14
|
+
|
|
1
15
|
# [@adobe/spacecat-shared-rum-api-client-v2.34.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.34.0...@adobe/spacecat-shared-rum-api-client-v2.34.1) (2025-07-24)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
|
@@ -207,22 +207,7 @@ async function mergeBundlesWithSameId(bundles) {
|
|
|
207
207
|
}
|
|
208
208
|
/* c8 ignore end */
|
|
209
209
|
|
|
210
|
-
|
|
211
|
-
const {
|
|
212
|
-
domain,
|
|
213
|
-
domainkey,
|
|
214
|
-
interval = 7,
|
|
215
|
-
granularity = GRANULARITY.DAILY,
|
|
216
|
-
checkpoints = [],
|
|
217
|
-
filterBotTraffic = true,
|
|
218
|
-
startTime,
|
|
219
|
-
endTime,
|
|
220
|
-
} = opts;
|
|
221
|
-
|
|
222
|
-
if (!hasText(domain) || !hasText(domainkey)) {
|
|
223
|
-
throw new Error('Missing required parameters');
|
|
224
|
-
}
|
|
225
|
-
|
|
210
|
+
function validateDateRange(startTime, endTime) {
|
|
226
211
|
// Validate startTime and endTime if provided
|
|
227
212
|
if (startTime && endTime) {
|
|
228
213
|
const start = parseDate(startTime);
|
|
@@ -236,27 +221,48 @@ async function fetchBundles(opts, log) {
|
|
|
236
221
|
throw new Error('startTime must be before endTime');
|
|
237
222
|
}
|
|
238
223
|
}
|
|
224
|
+
}
|
|
239
225
|
|
|
240
|
-
|
|
241
|
-
|
|
226
|
+
function generateURLs(domain, granularity, domainkey, startTime, endTime, interval) {
|
|
242
227
|
if (startTime && endTime) {
|
|
228
|
+
validateDateRange(startTime, endTime);
|
|
243
229
|
// Use custom date range
|
|
244
|
-
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
230
|
+
return generateUrlsForDateRange(startTime, endTime, domain, granularity, domainkey);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
// Use existing interval-based logic
|
|
234
|
+
const multiplier = granularity.toUpperCase() === GRANULARITY.HOURLY ? ONE_HOUR : ONE_DAY;
|
|
235
|
+
const range = granularity.toUpperCase() === GRANULARITY.HOURLY
|
|
236
|
+
? interval * HOURS_IN_DAY
|
|
237
|
+
: interval + 1;
|
|
238
|
+
|
|
239
|
+
const currentDate = new Date();
|
|
240
|
+
const urls = [];
|
|
241
|
+
|
|
242
|
+
for (let i = 0; i < range; i += 1) {
|
|
243
|
+
const date = new Date(currentDate.getTime() - i * multiplier);
|
|
244
|
+
urls.push(constructUrl(domain, date, granularity, domainkey));
|
|
245
|
+
}
|
|
246
|
+
return urls;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
async function fetchBundles(opts, log) {
|
|
250
|
+
const {
|
|
251
|
+
domain,
|
|
252
|
+
domainkey,
|
|
253
|
+
interval = 7,
|
|
254
|
+
granularity = GRANULARITY.DAILY,
|
|
255
|
+
checkpoints = [],
|
|
256
|
+
filterBotTraffic = true,
|
|
257
|
+
startTime,
|
|
258
|
+
endTime,
|
|
259
|
+
} = opts;
|
|
260
|
+
|
|
261
|
+
if (!hasText(domain) || !hasText(domainkey)) {
|
|
262
|
+
throw new Error('Missing required parameters');
|
|
258
263
|
}
|
|
259
264
|
|
|
265
|
+
const urls = generateURLs(domain, granularity, domainkey, startTime, endTime, interval);
|
|
260
266
|
const chunks = getUrlChunks(urls, CHUNK_SIZE);
|
|
261
267
|
|
|
262
268
|
let totalTransferSize = 0;
|
|
@@ -311,6 +317,88 @@ async function fetchBundles(opts, log) {
|
|
|
311
317
|
return mergeBundlesWithSameId(result);
|
|
312
318
|
}
|
|
313
319
|
|
|
320
|
+
function createBundleStream(opts, log) {
|
|
321
|
+
const {
|
|
322
|
+
domain,
|
|
323
|
+
domainkey,
|
|
324
|
+
interval = 7,
|
|
325
|
+
granularity = GRANULARITY.DAILY,
|
|
326
|
+
checkpoints = [],
|
|
327
|
+
filterBotTraffic = true,
|
|
328
|
+
startTime,
|
|
329
|
+
endTime,
|
|
330
|
+
handler,
|
|
331
|
+
} = opts;
|
|
332
|
+
|
|
333
|
+
if (!hasText(domain) || !hasText(domainkey)) {
|
|
334
|
+
throw new Error('Missing required parameters');
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
const urls = generateURLs(domain, granularity, domainkey, startTime, endTime, interval);
|
|
338
|
+
|
|
339
|
+
return new ReadableStream({
|
|
340
|
+
async start(controller) {
|
|
341
|
+
const failedUrls = [];
|
|
342
|
+
let totalTransferSize = 0;
|
|
343
|
+
let bundlesCount = 0;
|
|
344
|
+
let lastCheckpoint = 0;
|
|
345
|
+
|
|
346
|
+
async function streamBundle(url) {
|
|
347
|
+
const response = await fetch(url);
|
|
348
|
+
totalTransferSize += parseInt(response.headers.get('content-length'), 10);
|
|
349
|
+
|
|
350
|
+
if (!response.ok) {
|
|
351
|
+
log.warn(`Failed to fetch URL: ${sanitizeURL(url)} - status: ${response.status}`);
|
|
352
|
+
failedUrls.push(url);
|
|
353
|
+
return;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
const bundles = await response.json();
|
|
357
|
+
|
|
358
|
+
const filtered = bundles?.rumBundles?.filter(
|
|
359
|
+
(bundle) => !filterBotTraffic || !isBotTraffic(bundle),
|
|
360
|
+
).map(filterEvents(checkpoints));
|
|
361
|
+
|
|
362
|
+
bundlesCount += filtered.length;
|
|
363
|
+
const currentCheckpoint = Math.floor(bundlesCount / 50000);
|
|
364
|
+
|
|
365
|
+
if (currentCheckpoint > lastCheckpoint) {
|
|
366
|
+
log.info(`Checkpoint: Fetched ${bundlesCount} bundles; resuming...`);
|
|
367
|
+
lastCheckpoint = currentCheckpoint;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
const crunchedBundle = handler(filtered || []);
|
|
371
|
+
controller.enqueue(crunchedBundle);
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
async function worker() {
|
|
375
|
+
while (urls.length > 0) {
|
|
376
|
+
const url = urls.shift();
|
|
377
|
+
// eslint-disable-next-line no-await-in-loop
|
|
378
|
+
await streamBundle(url);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const workers = Array(CHUNK_SIZE)
|
|
383
|
+
.fill()
|
|
384
|
+
.map(() => worker());
|
|
385
|
+
|
|
386
|
+
await Promise.all(workers);
|
|
387
|
+
|
|
388
|
+
log.info(`Retrieved all RUM bundles. Total transfer size (in KB): ${(totalTransferSize / 1024).toFixed(2)}`);
|
|
389
|
+
|
|
390
|
+
// Add failedUrls to opts object for access by callers
|
|
391
|
+
if (failedUrls.length > 0) {
|
|
392
|
+
// eslint-disable-next-line no-param-reassign
|
|
393
|
+
opts.failedUrls = failedUrls;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
controller.close();
|
|
397
|
+
},
|
|
398
|
+
});
|
|
399
|
+
}
|
|
400
|
+
|
|
314
401
|
export {
|
|
315
402
|
fetchBundles,
|
|
403
|
+
createBundleStream,
|
|
316
404
|
};
|
package/src/common/traffic.js
CHANGED
|
@@ -27,9 +27,8 @@ import URI from 'urijs';
|
|
|
27
27
|
export function getSecondLevelDomain(url) {
|
|
28
28
|
if (!hasText(url)) return url;
|
|
29
29
|
const uri = new URI(url);
|
|
30
|
-
const domain = uri.domain();
|
|
31
30
|
const tld = uri.tld();
|
|
32
|
-
return
|
|
31
|
+
return uri.hostname().split(`.${tld}`)[0];
|
|
33
32
|
}
|
|
34
33
|
|
|
35
34
|
/*
|
|
@@ -39,9 +38,10 @@ export function getSecondLevelDomain(url) {
|
|
|
39
38
|
// Referrer related
|
|
40
39
|
const referrers = {
|
|
41
40
|
search: /google|yahoo|bing|yandex|baidu|duckduckgo|brave|ecosia|aol|startpage|ask/,
|
|
42
|
-
social: /^\b(x)\b|(.*(facebook|tiktok|snapchat|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
|
|
41
|
+
social: /^\b((www\.)?x)\b|(.*(facebook|tiktok|snapchat|twitter|pinterest|reddit|linkedin|threads|quora|discord|tumblr|mastodon|bluesky|instagram).*)$/,
|
|
43
42
|
ad: /googlesyndication|2mdn|doubleclick|syndicatedsearch/,
|
|
44
43
|
video: /youtube|vimeo|twitch|dailymotion|wistia/,
|
|
44
|
+
llm: /chatgpt|openai|perplexity|claude|gemini\.google|copilot\.microsoft/,
|
|
45
45
|
};
|
|
46
46
|
|
|
47
47
|
const mediums = {
|
|
@@ -66,6 +66,7 @@ const sources = {
|
|
|
66
66
|
display: /optumib2b|jun|googleads|dv360|dv36|microsoft|flipboard|programmatic|yext|gdn|banner|newsshowcase/,
|
|
67
67
|
affiliate: /brandreward|yieldkit|fashionistatop|partner|linkbux|stylesblog|linkinbio|affiliate/,
|
|
68
68
|
email: /sfmc|email/,
|
|
69
|
+
llm: /chatgpt/,
|
|
69
70
|
};
|
|
70
71
|
|
|
71
72
|
/**
|
|
@@ -74,7 +75,7 @@ const sources = {
|
|
|
74
75
|
* Using full word match for social media shorts like ig, fb, x
|
|
75
76
|
*/
|
|
76
77
|
const vendorClassifications = [
|
|
77
|
-
{ regex: /google|googleads|google-ads|google_search|google_deman|adwords|dv360|gdn|doubleclick|dbm|gmb/i, result: 'google' },
|
|
78
|
+
{ regex: /google|googleads|google-ads|google_search|google_deman|adwords|dv360|gdn|doubleclick|dbm|gmb|gemini/i, result: 'google' },
|
|
78
79
|
{ regex: /instagram|\b(ig)\b/i, result: 'instagram' },
|
|
79
80
|
{ regex: /facebook|\b(fb)\b|meta/i, result: 'facebook' },
|
|
80
81
|
{ regex: /bing/i, result: 'bing' },
|
|
@@ -83,7 +84,7 @@ const vendorClassifications = [
|
|
|
83
84
|
{ regex: /linkedin/i, result: 'linkedin' },
|
|
84
85
|
{ regex: /twitter|^\b(x)\b/i, result: 'x' },
|
|
85
86
|
{ regex: /snapchat/i, result: 'snapchat' },
|
|
86
|
-
{ regex: /microsoft/i, result: 'microsoft' },
|
|
87
|
+
{ regex: /microsoft|copilot/i, result: 'microsoft' },
|
|
87
88
|
{ regex: /pinterest/i, result: 'pinterest' },
|
|
88
89
|
{ regex: /reddit/i, result: 'reddit' },
|
|
89
90
|
{ regex: /spotify/i, result: 'spotify' },
|
|
@@ -102,6 +103,9 @@ const vendorClassifications = [
|
|
|
102
103
|
{ regex: /amazon|ctv/i, result: 'amazon' },
|
|
103
104
|
{ regex: /dailymotion/i, result: 'dailymotion' },
|
|
104
105
|
{ regex: /twitch/i, result: 'twitch' },
|
|
106
|
+
{ regex: /chatgpt|openai/i, result: 'openai' },
|
|
107
|
+
{ regex: /perplexity/i, result: 'perplexity' },
|
|
108
|
+
{ regex: /claude/i, result: 'claude' },
|
|
105
109
|
{ regex: /direct/i, result: 'direct' },
|
|
106
110
|
];
|
|
107
111
|
|
|
@@ -176,6 +180,8 @@ const RULES = (domain) => ([
|
|
|
176
180
|
{ type: 'paid', category: 'uncategorized', referrer: not(domain), utmSource: any, utmMedium: any, tracking: anyOf(paidTrackingParams) },
|
|
177
181
|
|
|
178
182
|
// EARNED
|
|
183
|
+
{ type: 'earned', category: 'llm', referrer: anyOf(referrers.llm), utmSource: any, utmMedium: any, tracking: none },
|
|
184
|
+
{ type: 'earned', category: 'llm', referrer: any, utmSource: anyOf(sources.llm), utmMedium: any, tracking: none },
|
|
179
185
|
{ type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: none, utmMedium: none, tracking: none },
|
|
180
186
|
{ type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: not(mediums.paidall), tracking: not(paidTrackingParams) },
|
|
181
187
|
{ type: 'earned', category: 'search', referrer: anyOf(referrers.search), utmSource: any, utmMedium: anyOf(mediums.organic), tracking: none },
|
package/src/index.js
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
* governing permissions and limitations under the License.
|
|
11
11
|
*/
|
|
12
12
|
import { hasText, fetch } from '@adobe/spacecat-shared-utils';
|
|
13
|
-
import { fetchBundles } from './common/rum-bundler-client.js';
|
|
13
|
+
import { fetchBundles, createBundleStream } from './common/rum-bundler-client.js';
|
|
14
14
|
import notfound from './functions/404.js';
|
|
15
15
|
import notfoundInternalLinks from './functions/404-internal-links.js';
|
|
16
16
|
import cwv from './functions/cwv.js';
|
|
@@ -177,4 +177,21 @@ export default class RUMAPIClient {
|
|
|
177
177
|
throw new Error(`Multi query failed. Queries: ${JSON.stringify(queries)}, Opts: ${JSON.stringify(sanitize(opts))}. Reason: ${e.message}`);
|
|
178
178
|
}
|
|
179
179
|
}
|
|
180
|
+
|
|
181
|
+
async queryStream(query, opts) {
|
|
182
|
+
const { handler, checkpoints } = HANDLERS[query] || {};
|
|
183
|
+
if (!handler) throw new Error(`Unknown query ${query}`);
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
const domainkey = await this._getDomainkey(opts);
|
|
187
|
+
return createBundleStream({
|
|
188
|
+
...opts,
|
|
189
|
+
domainkey,
|
|
190
|
+
checkpoints,
|
|
191
|
+
handler,
|
|
192
|
+
}, this.log);
|
|
193
|
+
} catch (e) {
|
|
194
|
+
throw new Error(`Query stream '${query}' failed. Opts: ${JSON.stringify(sanitize(opts))}. Reason: ${e.message}`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
180
197
|
}
|