@adobe/spacecat-shared-utils 1.81.1 → 1.82.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/package.json +2 -1
- package/src/index.js +3 -0
- package/src/url-helpers.js +76 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-utils-v1.82.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.81.1...@adobe/spacecat-shared-utils-v1.82.0) (2025-12-03)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* move www url resolver in audit worker to shared utils ([#1207](https://github.com/adobe/spacecat-shared/issues/1207)) ([40dbf3a](https://github.com/adobe/spacecat-shared/commit/40dbf3a18681cd304c168602d74b51c0baf8ead6))
|
|
7
|
+
|
|
1
8
|
# [@adobe/spacecat-shared-utils-v1.81.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.81.0...@adobe/spacecat-shared-utils-v1.81.1) (2025-11-28)
|
|
2
9
|
|
|
3
10
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@adobe/spacecat-shared-utils",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.82.0",
|
|
4
4
|
"description": "Shared modules of the Spacecat Services - utils",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": {
|
|
@@ -61,6 +61,7 @@
|
|
|
61
61
|
"date-fns": "4.1.0",
|
|
62
62
|
"franc-min": "6.2.0",
|
|
63
63
|
"iso-639-3": "3.0.1",
|
|
64
|
+
"urijs": "1.19.11",
|
|
64
65
|
"validator": "^13.15.15",
|
|
65
66
|
"world-countries": "5.1.0",
|
|
66
67
|
"zod": "^4.1.11"
|
package/src/index.js
CHANGED
package/src/url-helpers.js
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
import { context as h2, h1 } from '@adobe/fetch';
|
|
14
|
+
import URI from 'urijs';
|
|
15
|
+
import { hasText, isValidUrl } from './functions.js';
|
|
14
16
|
import { SPACECAT_USER_AGENT } from './tracing-fetch.js';
|
|
15
17
|
|
|
16
18
|
/* c8 ignore next 3 */
|
|
@@ -240,6 +242,77 @@ function urlMatchesFilter(url, filterUrls) {
|
|
|
240
242
|
}
|
|
241
243
|
}
|
|
242
244
|
|
|
245
|
+
/**
|
|
246
|
+
* Checks if a URL has a subdomain other than 'www'.
|
|
247
|
+
* @param {string} baseUrl - The URL to check.
|
|
248
|
+
* @returns {boolean} - True if the URL has a non-www subdomain, false otherwise.
|
|
249
|
+
* @throws {Error} - If the baseURL cannot be parsed.
|
|
250
|
+
*/
|
|
251
|
+
function hasNonWWWSubdomain(baseUrl) {
|
|
252
|
+
try {
|
|
253
|
+
const uri = new URI(baseUrl);
|
|
254
|
+
return hasText(uri.domain()) && hasText(uri.subdomain()) && uri.subdomain() !== 'www';
|
|
255
|
+
} catch {
|
|
256
|
+
throw new Error(`Cannot parse baseURL: ${baseUrl}`);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
/**
|
|
261
|
+
* Toggles the www subdomain in a given hostname.
|
|
262
|
+
* @param {string} hostname - The hostname to toggle the www subdomain in.
|
|
263
|
+
* @returns {string} - The hostname with the www subdomain toggled.
|
|
264
|
+
*/
|
|
265
|
+
function toggleWWWHostname(hostname) {
|
|
266
|
+
if (hasNonWWWSubdomain(`https://${hostname}`)) return hostname;
|
|
267
|
+
return hostname.startsWith('www.') ? hostname.replace('www.', '') : `www.${hostname}`;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Resolves the correct URL for a site by checking RUM data availability.
|
|
272
|
+
* Tries www-toggled version first, then falls back to original.
|
|
273
|
+
* @param {object} site - The site object with getBaseURL() and getConfig() methods.
|
|
274
|
+
* @param {object} rumApiClient - The RUM API client instance with retrieveDomainkey method.
|
|
275
|
+
* @param {object} log - Logger instance with debug() and error() methods.
|
|
276
|
+
* @returns {Promise<string>} - The resolved hostname without protocol.
|
|
277
|
+
*/
|
|
278
|
+
async function wwwUrlResolver(site, rumApiClient, log) {
|
|
279
|
+
const overrideBaseURL = site.getConfig()?.getFetchConfig()?.overrideBaseURL;
|
|
280
|
+
if (isValidUrl(overrideBaseURL)) {
|
|
281
|
+
return overrideBaseURL.replace(/^https?:\/\//, '');
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
const baseURL = site.getBaseURL();
|
|
285
|
+
const uri = new URI(baseURL);
|
|
286
|
+
const hostname = uri.hostname();
|
|
287
|
+
const subdomain = uri.subdomain();
|
|
288
|
+
|
|
289
|
+
if (hasText(subdomain) && subdomain !== 'www') {
|
|
290
|
+
log.debug(`Resolved URL ${hostname} since ${baseURL} contains subdomain`);
|
|
291
|
+
return hostname;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
try {
|
|
295
|
+
const wwwToggledHostname = toggleWWWHostname(hostname);
|
|
296
|
+
await rumApiClient.retrieveDomainkey(wwwToggledHostname);
|
|
297
|
+
log.debug(`Resolved URL ${wwwToggledHostname} for ${baseURL} using RUM API Client`);
|
|
298
|
+
return wwwToggledHostname;
|
|
299
|
+
} catch (e) {
|
|
300
|
+
log.error(`Could not retrieved RUM domainkey for ${hostname}: ${e.message}`);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
try {
|
|
304
|
+
await rumApiClient.retrieveDomainkey(hostname);
|
|
305
|
+
log.debug(`Resolved URL ${hostname} for ${baseURL} using RUM API Client`);
|
|
306
|
+
return hostname;
|
|
307
|
+
} catch (e) {
|
|
308
|
+
log.error(`Could not retrieved RUM domainkey for ${hostname}: ${e.message}`);
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
const fallback = hostname.startsWith('www.') ? hostname : `www.${hostname}`;
|
|
312
|
+
log.debug(`Fallback to ${fallback} for URL resolution for ${baseURL}`);
|
|
313
|
+
return fallback;
|
|
314
|
+
}
|
|
315
|
+
|
|
243
316
|
export {
|
|
244
317
|
ensureHttps,
|
|
245
318
|
getSpacecatRequestHeaders,
|
|
@@ -252,4 +325,7 @@ export {
|
|
|
252
325
|
stripTrailingSlash,
|
|
253
326
|
stripWWW,
|
|
254
327
|
urlMatchesFilter,
|
|
328
|
+
hasNonWWWSubdomain,
|
|
329
|
+
toggleWWWHostname,
|
|
330
|
+
wwwUrlResolver,
|
|
255
331
|
};
|