@adobe/spacecat-shared-rum-api-client 2.34.1 → 2.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # [@adobe/spacecat-shared-rum-api-client-v2.35.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.34.1...@adobe/spacecat-shared-rum-api-client-v2.35.0) (2025-07-24)
2
+
3
+
4
+ ### Features
5
+
6
+ * **rum-api-client:** add query stream ([#872](https://github.com/adobe/spacecat-shared/issues/872)) ([7702138](https://github.com/adobe/spacecat-shared/commit/77021388a298843f3ec2571c9a6f148184fa2814))
7
+
1
8
  # [@adobe/spacecat-shared-rum-api-client-v2.34.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-rum-api-client-v2.34.0...@adobe/spacecat-shared-rum-api-client-v2.34.1) (2025-07-24)
2
9
 
3
10
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-rum-api-client",
3
- "version": "2.34.1",
3
+ "version": "2.35.0",
4
4
  "description": "Shared modules of the Spacecat Services - Rum API client",
5
5
  "type": "module",
6
6
  "engines": {
@@ -207,22 +207,7 @@ async function mergeBundlesWithSameId(bundles) {
207
207
  }
208
208
  /* c8 ignore end */
209
209
 
210
- async function fetchBundles(opts, log) {
211
- const {
212
- domain,
213
- domainkey,
214
- interval = 7,
215
- granularity = GRANULARITY.DAILY,
216
- checkpoints = [],
217
- filterBotTraffic = true,
218
- startTime,
219
- endTime,
220
- } = opts;
221
-
222
- if (!hasText(domain) || !hasText(domainkey)) {
223
- throw new Error('Missing required parameters');
224
- }
225
-
210
+ function validateDateRange(startTime, endTime) {
226
211
  // Validate startTime and endTime if provided
227
212
  if (startTime && endTime) {
228
213
  const start = parseDate(startTime);
@@ -236,27 +221,48 @@ async function fetchBundles(opts, log) {
236
221
  throw new Error('startTime must be before endTime');
237
222
  }
238
223
  }
224
+ }
239
225
 
240
- let urls = [];
241
-
226
+ function generateURLs(domain, granularity, domainkey, startTime, endTime, interval) {
242
227
  if (startTime && endTime) {
228
+ validateDateRange(startTime, endTime);
243
229
  // Use custom date range
244
- urls = generateUrlsForDateRange(startTime, endTime, domain, granularity, domainkey);
245
- } else {
246
- // Use existing interval-based logic
247
- const multiplier = granularity.toUpperCase() === GRANULARITY.HOURLY ? ONE_HOUR : ONE_DAY;
248
- const range = granularity.toUpperCase() === GRANULARITY.HOURLY
249
- ? interval * HOURS_IN_DAY
250
- : interval + 1;
251
-
252
- const currentDate = new Date();
253
-
254
- for (let i = 0; i < range; i += 1) {
255
- const date = new Date(currentDate.getTime() - i * multiplier);
256
- urls.push(constructUrl(domain, date, granularity, domainkey));
257
- }
230
+ return generateUrlsForDateRange(startTime, endTime, domain, granularity, domainkey);
231
+ }
232
+
233
+ // Use existing interval-based logic
234
+ const multiplier = granularity.toUpperCase() === GRANULARITY.HOURLY ? ONE_HOUR : ONE_DAY;
235
+ const range = granularity.toUpperCase() === GRANULARITY.HOURLY
236
+ ? interval * HOURS_IN_DAY
237
+ : interval + 1;
238
+
239
+ const currentDate = new Date();
240
+ const urls = [];
241
+
242
+ for (let i = 0; i < range; i += 1) {
243
+ const date = new Date(currentDate.getTime() - i * multiplier);
244
+ urls.push(constructUrl(domain, date, granularity, domainkey));
245
+ }
246
+ return urls;
247
+ }
248
+
249
+ async function fetchBundles(opts, log) {
250
+ const {
251
+ domain,
252
+ domainkey,
253
+ interval = 7,
254
+ granularity = GRANULARITY.DAILY,
255
+ checkpoints = [],
256
+ filterBotTraffic = true,
257
+ startTime,
258
+ endTime,
259
+ } = opts;
260
+
261
+ if (!hasText(domain) || !hasText(domainkey)) {
262
+ throw new Error('Missing required parameters');
258
263
  }
259
264
 
265
+ const urls = generateURLs(domain, granularity, domainkey, startTime, endTime, interval);
260
266
  const chunks = getUrlChunks(urls, CHUNK_SIZE);
261
267
 
262
268
  let totalTransferSize = 0;
@@ -311,6 +317,88 @@ async function fetchBundles(opts, log) {
311
317
  return mergeBundlesWithSameId(result);
312
318
  }
313
319
 
320
+ function createBundleStream(opts, log) {
321
+ const {
322
+ domain,
323
+ domainkey,
324
+ interval = 7,
325
+ granularity = GRANULARITY.DAILY,
326
+ checkpoints = [],
327
+ filterBotTraffic = true,
328
+ startTime,
329
+ endTime,
330
+ handler,
331
+ } = opts;
332
+
333
+ if (!hasText(domain) || !hasText(domainkey)) {
334
+ throw new Error('Missing required parameters');
335
+ }
336
+
337
+ const urls = generateURLs(domain, granularity, domainkey, startTime, endTime, interval);
338
+
339
+ return new ReadableStream({
340
+ async start(controller) {
341
+ const failedUrls = [];
342
+ let totalTransferSize = 0;
343
+ let bundlesCount = 0;
344
+ let lastCheckpoint = 0;
345
+
346
+ async function streamBundle(url) {
347
+ const response = await fetch(url);
348
+ totalTransferSize += parseInt(response.headers.get('content-length'), 10);
349
+
350
+ if (!response.ok) {
351
+ log.warn(`Failed to fetch URL: ${sanitizeURL(url)} - status: ${response.status}`);
352
+ failedUrls.push(url);
353
+ return;
354
+ }
355
+
356
+ const bundles = await response.json();
357
+
358
+ const filtered = bundles?.rumBundles?.filter(
359
+ (bundle) => !filterBotTraffic || !isBotTraffic(bundle),
360
+ ).map(filterEvents(checkpoints));
361
+
362
+ bundlesCount += filtered.length;
363
+ const currentCheckpoint = Math.floor(bundlesCount / 50000);
364
+
365
+ if (currentCheckpoint > lastCheckpoint) {
366
+ log.info(`Checkpoint: Fetched ${bundlesCount} bundles; resuming...`);
367
+ lastCheckpoint = currentCheckpoint;
368
+ }
369
+
370
+ const crunchedBundle = handler(filtered || []);
371
+ controller.enqueue(crunchedBundle);
372
+ }
373
+
374
+ async function worker() {
375
+ while (urls.length > 0) {
376
+ const url = urls.shift();
377
+ // eslint-disable-next-line no-await-in-loop
378
+ await streamBundle(url);
379
+ }
380
+ }
381
+
382
+ const workers = Array(CHUNK_SIZE)
383
+ .fill()
384
+ .map(() => worker());
385
+
386
+ await Promise.all(workers);
387
+
388
+ log.info(`Retrieved all RUM bundles. Total transfer size (in KB): ${(totalTransferSize / 1024).toFixed(2)}`);
389
+
390
+ // Add failedUrls to opts object for access by callers
391
+ if (failedUrls.length > 0) {
392
+ // eslint-disable-next-line no-param-reassign
393
+ opts.failedUrls = failedUrls;
394
+ }
395
+
396
+ controller.close();
397
+ },
398
+ });
399
+ }
400
+
314
401
  export {
315
402
  fetchBundles,
403
+ createBundleStream,
316
404
  };
package/src/index.js CHANGED
@@ -10,7 +10,7 @@
10
10
  * governing permissions and limitations under the License.
11
11
  */
12
12
  import { hasText, fetch } from '@adobe/spacecat-shared-utils';
13
- import { fetchBundles } from './common/rum-bundler-client.js';
13
+ import { fetchBundles, createBundleStream } from './common/rum-bundler-client.js';
14
14
  import notfound from './functions/404.js';
15
15
  import notfoundInternalLinks from './functions/404-internal-links.js';
16
16
  import cwv from './functions/cwv.js';
@@ -177,4 +177,21 @@ export default class RUMAPIClient {
177
177
  throw new Error(`Multi query failed. Queries: ${JSON.stringify(queries)}, Opts: ${JSON.stringify(sanitize(opts))}. Reason: ${e.message}`);
178
178
  }
179
179
  }
180
+
181
+ async queryStream(query, opts) {
182
+ const { handler, checkpoints } = HANDLERS[query] || {};
183
+ if (!handler) throw new Error(`Unknown query ${query}`);
184
+
185
+ try {
186
+ const domainkey = await this._getDomainkey(opts);
187
+ return createBundleStream({
188
+ ...opts,
189
+ domainkey,
190
+ checkpoints,
191
+ handler,
192
+ }, this.log);
193
+ } catch (e) {
194
+ throw new Error(`Query stream '${query}' failed. Opts: ${JSON.stringify(sanitize(opts))}. Reason: ${e.message}`);
195
+ }
196
+ }
180
197
  }