@adobe/spacecat-shared-utils 1.81.0 → 1.82.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-utils-v1.82.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.81.1...@adobe/spacecat-shared-utils-v1.82.0) (2025-12-03)
2
+
3
+
4
+ ### Features
5
+
6
+ * move www url resolver in audit worker to shared utils ([#1207](https://github.com/adobe/spacecat-shared/issues/1207)) ([40dbf3a](https://github.com/adobe/spacecat-shared/commit/40dbf3a18681cd304c168602d74b51c0baf8ead6))
7
+
8
+ # [@adobe/spacecat-shared-utils-v1.81.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.81.0...@adobe/spacecat-shared-utils-v1.81.1) (2025-11-28)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * update to node 24 ([#1179](https://github.com/adobe/spacecat-shared/issues/1179)) ([0e60c0a](https://github.com/adobe/spacecat-shared/commit/0e60c0ab791b47662d07822f7c93009a8f7048fd))
14
+
1
15
  # [@adobe/spacecat-shared-utils-v1.81.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.80.0...@adobe/spacecat-shared-utils-v1.81.0) (2025-11-26)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-utils",
3
- "version": "1.81.0",
3
+ "version": "1.82.0",
4
4
  "description": "Shared modules of the Spacecat Services - utils",
5
5
  "type": "module",
6
6
  "exports": {
@@ -10,7 +10,7 @@
10
10
  }
11
11
  },
12
12
  "engines": {
13
- "node": ">=22.0.0 <23.0.0",
13
+ "node": ">=22.0.0 <25.0.0",
14
14
  "npm": ">=10.9.0 <12.0.0"
15
15
  },
16
16
  "main": "src/index.js",
@@ -53,14 +53,15 @@
53
53
  },
54
54
  "dependencies": {
55
55
  "@adobe/fetch": "4.2.3",
56
- "@aws-sdk/client-s3": "3.937.0",
57
- "@aws-sdk/client-sqs": "3.936.0",
56
+ "@aws-sdk/client-s3": "3.940.0",
57
+ "@aws-sdk/client-sqs": "3.940.0",
58
58
  "@json2csv/plainjs": "7.0.6",
59
59
  "aws-xray-sdk": "3.12.0",
60
60
  "cheerio": "1.1.2",
61
61
  "date-fns": "4.1.0",
62
62
  "franc-min": "6.2.0",
63
63
  "iso-639-3": "3.0.1",
64
+ "urijs": "1.19.11",
64
65
  "validator": "^13.15.15",
65
66
  "world-countries": "5.1.0",
66
67
  "zod": "^4.1.11"
package/src/index.js CHANGED
@@ -66,6 +66,9 @@ export {
66
66
  getSpacecatRequestHeaders,
67
67
  ensureHttps,
68
68
  urlMatchesFilter,
69
+ hasNonWWWSubdomain,
70
+ toggleWWWHostname,
71
+ wwwUrlResolver,
69
72
  } from './url-helpers.js';
70
73
 
71
74
  export {
@@ -11,6 +11,8 @@
11
11
  */
12
12
 
13
13
  import { context as h2, h1 } from '@adobe/fetch';
14
+ import URI from 'urijs';
15
+ import { hasText, isValidUrl } from './functions.js';
14
16
  import { SPACECAT_USER_AGENT } from './tracing-fetch.js';
15
17
 
16
18
  /* c8 ignore next 3 */
@@ -240,6 +242,77 @@ function urlMatchesFilter(url, filterUrls) {
240
242
  }
241
243
  }
242
244
 
245
+ /**
246
+ * Checks if a URL has a subdomain other than 'www'.
247
+ * @param {string} baseUrl - The URL to check.
248
+ * @returns {boolean} - True if the URL has a non-www subdomain, false otherwise.
249
+ * @throws {Error} - If the baseURL cannot be parsed.
250
+ */
251
+ function hasNonWWWSubdomain(baseUrl) {
252
+ try {
253
+ const uri = new URI(baseUrl);
254
+ return hasText(uri.domain()) && hasText(uri.subdomain()) && uri.subdomain() !== 'www';
255
+ } catch {
256
+ throw new Error(`Cannot parse baseURL: ${baseUrl}`);
257
+ }
258
+ }
259
+
260
+ /**
261
+ * Toggles the www subdomain in a given hostname.
262
+ * @param {string} hostname - The hostname to toggle the www subdomain in.
263
+ * @returns {string} - The hostname with the www subdomain toggled.
264
+ */
265
+ function toggleWWWHostname(hostname) {
266
+ if (hasNonWWWSubdomain(`https://${hostname}`)) return hostname;
267
+ return hostname.startsWith('www.') ? hostname.replace('www.', '') : `www.${hostname}`;
268
+ }
269
+
270
+ /**
271
+ * Resolves the correct URL for a site by checking RUM data availability.
272
+ * Tries www-toggled version first, then falls back to original.
273
+ * @param {object} site - The site object with getBaseURL() and getConfig() methods.
274
+ * @param {object} rumApiClient - The RUM API client instance with retrieveDomainkey method.
275
+ * @param {object} log - Logger instance with debug() and error() methods.
276
+ * @returns {Promise<string>} - The resolved hostname without protocol.
277
+ */
278
+ async function wwwUrlResolver(site, rumApiClient, log) {
279
+ const overrideBaseURL = site.getConfig()?.getFetchConfig()?.overrideBaseURL;
280
+ if (isValidUrl(overrideBaseURL)) {
281
+ return overrideBaseURL.replace(/^https?:\/\//, '');
282
+ }
283
+
284
+ const baseURL = site.getBaseURL();
285
+ const uri = new URI(baseURL);
286
+ const hostname = uri.hostname();
287
+ const subdomain = uri.subdomain();
288
+
289
+ if (hasText(subdomain) && subdomain !== 'www') {
290
+ log.debug(`Resolved URL ${hostname} since ${baseURL} contains subdomain`);
291
+ return hostname;
292
+ }
293
+
294
+ try {
295
+ const wwwToggledHostname = toggleWWWHostname(hostname);
296
+ await rumApiClient.retrieveDomainkey(wwwToggledHostname);
297
+ log.debug(`Resolved URL ${wwwToggledHostname} for ${baseURL} using RUM API Client`);
298
+ return wwwToggledHostname;
299
+ } catch (e) {
300
+ log.error(`Could not retrieved RUM domainkey for ${hostname}: ${e.message}`);
301
+ }
302
+
303
+ try {
304
+ await rumApiClient.retrieveDomainkey(hostname);
305
+ log.debug(`Resolved URL ${hostname} for ${baseURL} using RUM API Client`);
306
+ return hostname;
307
+ } catch (e) {
308
+ log.error(`Could not retrieved RUM domainkey for ${hostname}: ${e.message}`);
309
+ }
310
+
311
+ const fallback = hostname.startsWith('www.') ? hostname : `www.${hostname}`;
312
+ log.debug(`Fallback to ${fallback} for URL resolution for ${baseURL}`);
313
+ return fallback;
314
+ }
315
+
243
316
  export {
244
317
  ensureHttps,
245
318
  getSpacecatRequestHeaders,
@@ -252,4 +325,7 @@ export {
252
325
  stripTrailingSlash,
253
326
  stripWWW,
254
327
  urlMatchesFilter,
328
+ hasNonWWWSubdomain,
329
+ toggleWWWHostname,
330
+ wwwUrlResolver,
255
331
  };