@adobe/spacecat-shared-utils 1.94.0 → 1.95.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,10 @@
1
+ # [@adobe/spacecat-shared-utils-v1.95.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.94.0...@adobe/spacecat-shared-utils-v1.95.0) (2026-02-12)
2
+
3
+
4
+ ### Features
5
+
6
+ * add canonicalizeUrl utility for consistent URL comparison ([#1334](https://github.com/adobe/spacecat-shared/issues/1334)) ([eb15132](https://github.com/adobe/spacecat-shared/commit/eb1513228e5d2e1701211b6aaf36108bd1642d68))
7
+
1
8
  # [@adobe/spacecat-shared-utils-v1.94.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.93.0...@adobe/spacecat-shared-utils-v1.94.0) (2026-02-11)
2
9
 
3
10
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-utils",
3
- "version": "1.94.0",
3
+ "version": "1.95.0",
4
4
  "description": "Shared modules of the Spacecat Services - utils",
5
5
  "type": "module",
6
6
  "exports": {
package/src/index.d.ts CHANGED
@@ -133,6 +133,20 @@ export declare function stripTrailingSlash(url: string): string;
133
133
  */
134
134
  export declare function stripWWW(url: string): string;
135
135
 
136
+ /**
137
+ * Canonicalizes a URL by removing protocol, www prefix, and trailing slash
138
+ * for comparison and matching purposes.
139
+ * Optionally strips query parameters and fragments.
140
+ * @param url - URL to canonicalize
141
+ * @param options - Canonicalization options
142
+ * @param options.stripQuery - Whether to strip query parameters and fragments
143
+ * @returns Canonicalized URL
144
+ */
145
+ export declare function canonicalizeUrl(
146
+ url: string,
147
+ options?: { stripQuery?: boolean }
148
+ ): string;
149
+
136
150
  /**
137
151
  * Composes a base URL by applying a series of transformations to the given domain.
138
152
  * @param domain - The domain to compose the base URL from.
package/src/index.js CHANGED
@@ -55,6 +55,7 @@ export { logWrapper } from './log-wrapper.js';
55
55
  export { instrumentAWSClient, getTraceId, addTraceIdHeader } from './xray.js';
56
56
 
57
57
  export {
58
+ canonicalizeUrl,
58
59
  composeBaseURL,
59
60
  composeAuditURL,
60
61
  prependSchema,
@@ -318,6 +318,40 @@ async function wwwUrlResolver(site, rumApiClient, log) {
318
318
  return fallback;
319
319
  }
320
320
 
321
+ /**
322
+ * Canonicalizes a URL by removing protocol, www prefix, and trailing slash
323
+ * for comparison and matching purposes.
324
+ * Optionally strips query parameters and fragments.
325
+ * @param {string} url - URL to canonicalize
326
+ * @param {object} options - Canonicalization options
327
+ * @param {boolean} options.stripQuery - Whether to strip query parameters and fragments
328
+ * @returns {string} Canonicalized URL
329
+ */
330
+ export function canonicalizeUrl(url, { stripQuery = false } = {}) {
331
+ if (!url || typeof url !== 'string') {
332
+ return '';
333
+ }
334
+
335
+ let canonicalized = url
336
+ .toLowerCase() // Case insensitive
337
+ .trim()
338
+ .replace(/^https?:\/\//, '') // Remove protocol
339
+ .replace(/^www\d*\./, '') // Remove www, www2, www3, etc.
340
+ .replace(/\/$/, ''); // Remove trailing slash
341
+
342
+ // Optionally strip query parameters and fragments
343
+ if (stripQuery) {
344
+ const queryIndex = canonicalized.search(/[?#]/);
345
+ if (queryIndex !== -1) {
346
+ canonicalized = canonicalized.substring(0, queryIndex);
347
+ }
348
+ // Remove any trailing slash that may have been revealed
349
+ canonicalized = canonicalized.replace(/\/$/, '');
350
+ }
351
+
352
+ return canonicalized;
353
+ }
354
+
321
355
  export {
322
356
  ensureHttps,
323
357
  getSpacecatRequestHeaders,