@adobe/spacecat-shared-utils 1.47.0 → 1.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/src/aem.js +222 -0
- package/src/index.js +3 -0
- package/src/url-helpers.js +70 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-utils-v1.49.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.48.0...@adobe/spacecat-shared-utils-v1.49.0) (2025-08-26)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* AEM utils ([#927](https://github.com/adobe/spacecat-shared/issues/927)) ([57fc3df](https://github.com/adobe/spacecat-shared/commit/57fc3df80cbacfdc66443090e073367856ec2f7c))
|
|
7
|
+
|
|
8
|
+
# [@adobe/spacecat-shared-utils-v1.48.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.47.0...@adobe/spacecat-shared-utils-v1.48.0) (2025-08-18)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Features
|
|
12
|
+
|
|
13
|
+
* added `urlMatchesFilter` in `spacecat-shared-utils` ([#921](https://github.com/adobe/spacecat-shared/issues/921)) ([74e11e4](https://github.com/adobe/spacecat-shared/commit/74e11e4124137b13942b0c58ead620905c438538))
|
|
14
|
+
|
|
1
15
|
# [@adobe/spacecat-shared-utils-v1.47.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.46.0...@adobe/spacecat-shared-utils-v1.47.0) (2025-08-15)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
package/src/aem.js
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Copyright 2025 Adobe. All rights reserved.
|
|
3
|
+
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
* you may not use this file except in compliance with the License. You may obtain a copy
|
|
5
|
+
* of the License at http://www.apache.org/licenses/LICENSE-2.0
|
|
6
|
+
*
|
|
7
|
+
* Unless required by applicable law or agreed to in writing, software distributed under
|
|
8
|
+
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
|
|
9
|
+
* OF ANY KIND, either express or implied. See the License for the specific language
|
|
10
|
+
* governing permissions and limitations under the License.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { fetch } from './adobe-fetch.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Delivery types for AEM deployment
|
|
17
|
+
*/
|
|
18
|
+
export const DELIVERY_TYPES = {
|
|
19
|
+
AEM_CS: 'aem_cs',
|
|
20
|
+
AEM_EDGE: 'aem_edge',
|
|
21
|
+
AEM_AMS: 'aem_ams',
|
|
22
|
+
AEM_HEADLESS: 'aem_headless',
|
|
23
|
+
OTHER: 'other',
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Detects the AEM delivery type from HTML source code
|
|
28
|
+
* @param {string} htmlSource - The HTML source code of the page
|
|
29
|
+
* @return {string|null} - 'aem_edge', 'aem_cs', 'aem_ams', 'aem_headless'
|
|
30
|
+
* or 'other' if undetermined, null if no HTML source is provided
|
|
31
|
+
*/
|
|
32
|
+
export function detectAEMVersion(htmlSource, headers = {}) {
|
|
33
|
+
if (!htmlSource || typeof htmlSource !== 'string') {
|
|
34
|
+
return null;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Create a normalized version of the HTML for simpler pattern matching
|
|
38
|
+
const normalizedHtml = htmlSource.toLowerCase();
|
|
39
|
+
|
|
40
|
+
// EDS Indicators
|
|
41
|
+
const edsPatterns = [
|
|
42
|
+
// Core library references
|
|
43
|
+
/lib-franklin\.js/i,
|
|
44
|
+
/aem\.js/i,
|
|
45
|
+
// Block structure
|
|
46
|
+
/data-block-status/i,
|
|
47
|
+
// Franklin-specific markup patterns
|
|
48
|
+
/scripts\.js/i,
|
|
49
|
+
// Block HTML patterns
|
|
50
|
+
/<div class="[^"]*block[^"]*"[^>]*>/i,
|
|
51
|
+
// RUM data-routing for EDS
|
|
52
|
+
/data-routing="[^"]*eds=([^,"]*)/i,
|
|
53
|
+
];
|
|
54
|
+
|
|
55
|
+
// CS Indicators (Cloud Service)
|
|
56
|
+
const csPatterns = [
|
|
57
|
+
// Core Components patterns
|
|
58
|
+
/<div class="[^"]*cmp-[^"]*"[^>]*>/i,
|
|
59
|
+
// CS-specific clientlib pattern with lc- prefix/suffix
|
|
60
|
+
// (more specific than general etc.clientlibs)
|
|
61
|
+
/\/etc\.clientlibs\/[^"']+\.lc-[a-f0-9]+-lc\.min\.(js|css)/i,
|
|
62
|
+
// Modern libs clientlib paths
|
|
63
|
+
/\/libs\.clientlibs\//i,
|
|
64
|
+
// Core components comments or data attributes
|
|
65
|
+
/data-cmp-/i,
|
|
66
|
+
/data-sly-/i,
|
|
67
|
+
// Cloud Manager references
|
|
68
|
+
/content\/experience-fragments\//i,
|
|
69
|
+
// SPA editor references
|
|
70
|
+
/data-cq-/i,
|
|
71
|
+
// RUM data-routing for CS
|
|
72
|
+
/data-routing="[^"]*cs=([^,"]*)/i,
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
// AMS Indicators (Managed Services) - typically older AEM patterns
|
|
76
|
+
const amsPatterns = [
|
|
77
|
+
// Legacy clientlib paths
|
|
78
|
+
/\/etc\/clientlibs\//i,
|
|
79
|
+
/\/etc\/designs\//i,
|
|
80
|
+
// AMS-specific clientlib pattern with fingerprinted hashes (both JS and CSS)
|
|
81
|
+
/\/etc\.clientlibs\/[^"']+\.min\.[a-f0-9]{32}\.(js|css)/i,
|
|
82
|
+
// Classic UI patterns
|
|
83
|
+
/foundation-/i,
|
|
84
|
+
/cq:template/i,
|
|
85
|
+
/cq-commons/i,
|
|
86
|
+
// Legacy component patterns
|
|
87
|
+
/parsys/i,
|
|
88
|
+
// Legacy CQ references
|
|
89
|
+
/\/CQ\//i,
|
|
90
|
+
/\/apps\//i,
|
|
91
|
+
// RUM data-routing for AMS
|
|
92
|
+
/data-routing="[^"]*ams=([^,"]*)/i,
|
|
93
|
+
];
|
|
94
|
+
|
|
95
|
+
const amsHeaderPatterns = [
|
|
96
|
+
/^dispatcher[0-9].*$/,
|
|
97
|
+
];
|
|
98
|
+
|
|
99
|
+
const aemHeadlessPatterns = [
|
|
100
|
+
/aem-headless/i,
|
|
101
|
+
/\/content\/dam\//i,
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
// Count matches for each type
|
|
105
|
+
let edsMatches = 0;
|
|
106
|
+
let csMatches = 0;
|
|
107
|
+
let amsMatches = 0;
|
|
108
|
+
let aemHeadlessMatches = 0;
|
|
109
|
+
|
|
110
|
+
// Check EDS patterns
|
|
111
|
+
for (const pattern of edsPatterns) {
|
|
112
|
+
if (pattern.test(normalizedHtml)) {
|
|
113
|
+
edsMatches += 1;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Check CS patterns
|
|
118
|
+
for (const pattern of csPatterns) {
|
|
119
|
+
if (pattern.test(normalizedHtml)) {
|
|
120
|
+
csMatches += 1;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Check AMS patterns
|
|
125
|
+
for (const pattern of amsPatterns) {
|
|
126
|
+
if (pattern.test(normalizedHtml)) {
|
|
127
|
+
amsMatches += 1;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Check AMS header patterns
|
|
132
|
+
for (const pattern of amsHeaderPatterns) {
|
|
133
|
+
if (pattern.test(headers['x-dispatcher'])) {
|
|
134
|
+
amsMatches += 1;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
for (const pattern of aemHeadlessPatterns) {
|
|
139
|
+
if (pattern.test(normalizedHtml)) {
|
|
140
|
+
aemHeadlessMatches += 1;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Check for decisive indicators with higher weight
|
|
145
|
+
if (normalizedHtml.includes('lib-franklin.js') || normalizedHtml.includes('aem.js')) {
|
|
146
|
+
edsMatches += 3;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// Only give CS weight for core components, but reduced since they can exist in AMS too
|
|
150
|
+
if (normalizedHtml.match(/class="[^"]*cmp-[^"]*"/)) {
|
|
151
|
+
csMatches += 1; // Reduced weight since core components can exist in both AMS and CS
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
// Check for decisive indicators with higher weight
|
|
155
|
+
if (normalizedHtml.includes('/etc/designs/') || normalizedHtml.includes('foundation-')) {
|
|
156
|
+
amsMatches += 2;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Check for decisive indicators with higher weight
|
|
160
|
+
// Give extra weight to AMS clientlib format pattern as it's very distinctive
|
|
161
|
+
if (/\/etc\.clientlibs\/[^"']+\.min\.[a-f0-9]{32}\.(js|css)/i.test(normalizedHtml)) {
|
|
162
|
+
amsMatches += 5; // Increased weight since this is a very reliable AMS indicator
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// Give extra weight to CS clientlib format pattern as it's very distinctive
|
|
166
|
+
if (/\/etc\.clientlibs\/[^"']+\.lc-[a-f0-9]+-lc\.min\.(js|css)/i.test(normalizedHtml)) {
|
|
167
|
+
csMatches += 3;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Give significant weight to explicit RUM data-routing indicators
|
|
171
|
+
if (/data-routing="[^"]*ams=([^,"]*)/i.test(normalizedHtml)) {
|
|
172
|
+
amsMatches += 5;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (/data-routing="[^"]*eds=([^,"]*)/i.test(normalizedHtml)) {
|
|
176
|
+
edsMatches += 5;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
if (/data-routing="[^"]*cs=([^,"]*)/i.test(normalizedHtml)) {
|
|
180
|
+
csMatches += 5;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
// Determine the most likely version based on match counts
|
|
184
|
+
const maxMatches = Math.max(edsMatches, csMatches, amsMatches, aemHeadlessMatches);
|
|
185
|
+
|
|
186
|
+
// Require a minimum threshold of matches to make a determination
|
|
187
|
+
const MIN_THRESHOLD = 2;
|
|
188
|
+
|
|
189
|
+
if (maxMatches < MIN_THRESHOLD) {
|
|
190
|
+
return DELIVERY_TYPES.OTHER;
|
|
191
|
+
}
|
|
192
|
+
// Create an array of [type, matches] and find the first with maxMatches, or 'other'
|
|
193
|
+
const types = [
|
|
194
|
+
[DELIVERY_TYPES.AEM_EDGE, edsMatches],
|
|
195
|
+
[DELIVERY_TYPES.AEM_CS, csMatches],
|
|
196
|
+
[DELIVERY_TYPES.AEM_AMS, amsMatches],
|
|
197
|
+
[DELIVERY_TYPES.AEM_HEADLESS, aemHeadlessMatches],
|
|
198
|
+
];
|
|
199
|
+
const found = types.find(([, count]) => count === maxMatches);
|
|
200
|
+
return found[0];
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Determines the AEM CS page ID for Content API, from the page URL
|
|
205
|
+
* @param {string} pageURL - The URL of the page
|
|
206
|
+
* @return {string|null} - The AEM CS page ID
|
|
207
|
+
*/
|
|
208
|
+
export async function determineAEMCSPageId(pageURL) {
|
|
209
|
+
const htmlResponse = await fetch(pageURL);
|
|
210
|
+
if (!htmlResponse.ok) {
|
|
211
|
+
return null;
|
|
212
|
+
}
|
|
213
|
+
const html = await htmlResponse.text();
|
|
214
|
+
const metaTagRegex = /<meta\s+name=['"]content-page-id['"]\s+content=['"]([^'"]*)['"]\s*\/?>/i;
|
|
215
|
+
const match = html.match(metaTagRegex);
|
|
216
|
+
|
|
217
|
+
let pageId = null;
|
|
218
|
+
if (match && match[1] && match[1].trim() !== '') {
|
|
219
|
+
pageId = match[1].trim();
|
|
220
|
+
}
|
|
221
|
+
return pageId;
|
|
222
|
+
}
|
package/src/index.js
CHANGED
|
@@ -64,6 +64,7 @@ export {
|
|
|
64
64
|
resolveCanonicalUrl,
|
|
65
65
|
getSpacecatRequestHeaders,
|
|
66
66
|
ensureHttps,
|
|
67
|
+
urlMatchesFilter,
|
|
67
68
|
} from './url-helpers.js';
|
|
68
69
|
|
|
69
70
|
export { getStoredMetrics, storeMetrics } from './metrics-store.js';
|
|
@@ -88,3 +89,5 @@ export {
|
|
|
88
89
|
getMonthInfo,
|
|
89
90
|
getTemporalCondition,
|
|
90
91
|
} from './calendar-week-helper.js';
|
|
92
|
+
|
|
93
|
+
export { detectAEMVersion, determineAEMCSPageId, DELIVERY_TYPES } from './aem.js';
|
package/src/url-helpers.js
CHANGED
|
@@ -171,6 +171,75 @@ async function resolveCanonicalUrl(urlString, method = 'HEAD') {
|
|
|
171
171
|
}
|
|
172
172
|
}
|
|
173
173
|
|
|
174
|
+
/**
|
|
175
|
+
* Normalize a URL by trimming whitespace and handling trailing slashes
|
|
176
|
+
* @param {string} url - The URL to normalize
|
|
177
|
+
* @returns {string} The normalized URL
|
|
178
|
+
*/
|
|
179
|
+
function normalizeUrl(url) {
|
|
180
|
+
if (!url || typeof url !== 'string') return url;
|
|
181
|
+
// Trim whitespace from beginning and end
|
|
182
|
+
let normalized = url.trim();
|
|
183
|
+
// Handle trailing slashes - normalize multiple trailing slashes to single slash
|
|
184
|
+
// or no slash depending on whether it's a root path
|
|
185
|
+
if (normalized.endsWith('/')) {
|
|
186
|
+
// Remove all trailing slashes
|
|
187
|
+
normalized = normalized.replace(/\/+$/, '');
|
|
188
|
+
// Add back a single slash if it's a root path (domain only)
|
|
189
|
+
const parts = normalized.split('/');
|
|
190
|
+
if (parts.length === 1 || (parts.length === 2 && parts[1] === '')) {
|
|
191
|
+
normalized += '/';
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
return normalized;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Normalize a pathname by removing trailing slashes
|
|
199
|
+
* @param {string} pathname - The pathname to normalize
|
|
200
|
+
* @returns {string} The normalized pathname
|
|
201
|
+
*/
|
|
202
|
+
function normalizePathname(pathname) {
|
|
203
|
+
if (!pathname || typeof pathname !== 'string') return pathname;
|
|
204
|
+
if (pathname === '/') return '/';
|
|
205
|
+
return pathname.replace(/\/+$/, '');
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Check if a URL matches any of the filter URLs by comparing pathnames
|
|
210
|
+
* @param {string} url - URL to check (format: https://domain.com/path)
|
|
211
|
+
* @param {string[]} filterUrls - Array of filter URLs (format: domain.com/path)
|
|
212
|
+
* @returns {boolean} True if URL matches any filter URL, false if any URL is invalid
|
|
213
|
+
*/
|
|
214
|
+
function urlMatchesFilter(url, filterUrls) {
|
|
215
|
+
if (!filterUrls || filterUrls.length === 0) return true;
|
|
216
|
+
try {
|
|
217
|
+
// Normalize the input URL
|
|
218
|
+
const normalizedInputUrl = normalizeUrl(url);
|
|
219
|
+
const normalizedUrl = prependSchema(normalizedInputUrl);
|
|
220
|
+
const urlPath = normalizePathname(new URL(normalizedUrl).pathname);
|
|
221
|
+
return filterUrls.some((filterUrl) => {
|
|
222
|
+
try {
|
|
223
|
+
// Normalize each filter URL
|
|
224
|
+
const normalizedInputFilterUrl = normalizeUrl(filterUrl);
|
|
225
|
+
const normalizedFilterUrl = prependSchema(normalizedInputFilterUrl);
|
|
226
|
+
const filterPath = normalizePathname(new URL(normalizedFilterUrl).pathname);
|
|
227
|
+
return urlPath === filterPath;
|
|
228
|
+
} catch (error) {
|
|
229
|
+
// If any filter URL is invalid, skip it and continue checking others
|
|
230
|
+
/* eslint-disable-next-line no-console */
|
|
231
|
+
console.warn(`Invalid filter URL: ${filterUrl}`, error.message);
|
|
232
|
+
return false;
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
} catch (error) {
|
|
236
|
+
// If the main URL is invalid, return false
|
|
237
|
+
/* eslint-disable-next-line no-console */
|
|
238
|
+
console.warn(`Invalid URL: ${url}`, error.message);
|
|
239
|
+
return false;
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
174
243
|
export {
|
|
175
244
|
ensureHttps,
|
|
176
245
|
getSpacecatRequestHeaders,
|
|
@@ -182,4 +251,5 @@ export {
|
|
|
182
251
|
stripTrailingDot,
|
|
183
252
|
stripTrailingSlash,
|
|
184
253
|
stripWWW,
|
|
254
|
+
urlMatchesFilter,
|
|
185
255
|
};
|