@adobe/spacecat-shared-utils 1.89.0 → 1.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/src/bot-blocker-detect/bot-blocker-detect.js +154 -2
- package/src/cdn-helpers.js +43 -11
- package/src/index.js +7 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
# [@adobe/spacecat-shared-utils-v1.90.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.89.1...@adobe/spacecat-shared-utils-v1.90.0) (2026-01-29)
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
### Features
|
|
5
|
+
|
|
6
|
+
* LLMO-1534 Pretty format old and new log forwarding credentials ([#1290](https://github.com/adobe/spacecat-shared/issues/1290)) ([df84629](https://github.com/adobe/spacecat-shared/commit/df84629c1a49e81b43e352eb4b8b542b9eb3085e))
|
|
7
|
+
|
|
8
|
+
# [@adobe/spacecat-shared-utils-v1.89.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.89.0...@adobe/spacecat-shared-utils-v1.89.1) (2026-01-26)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* Additional checks and methods on bot protection ([#1250](https://github.com/adobe/spacecat-shared/issues/1250)) ([0c34a8d](https://github.com/adobe/spacecat-shared/commit/0c34a8d850abef6e2a024132bc1c61d10865c1a0))
|
|
14
|
+
|
|
1
15
|
# [@adobe/spacecat-shared-utils-v1.89.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.88.0...@adobe/spacecat-shared-utils-v1.89.0) (2026-01-22)
|
|
2
16
|
|
|
3
17
|
|
package/package.json
CHANGED
|
@@ -30,7 +30,85 @@ const CONFIDENCE_MEDIUM = 0.95;
|
|
|
30
30
|
const CONFIDENCE_ABSOLUTE = 1.0;
|
|
31
31
|
const DEFAULT_TIMEOUT = 5000;
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
/**
|
|
34
|
+
* SpaceCat bot identification constants
|
|
35
|
+
*/
|
|
36
|
+
export const SPACECAT_BOT_USER_AGENT = 'Spacecat/1.0';
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Gets SpaceCat bot IPs from environment variable
|
|
40
|
+
* @param {string} ipsString - Comma-separated IPs (from env/secrets) - REQUIRED
|
|
41
|
+
* @returns {Array<string>} Array of IP addresses
|
|
42
|
+
* @throws {Error} If ipsString is not provided
|
|
43
|
+
*/
|
|
44
|
+
export function getSpacecatBotIps(ipsString) {
|
|
45
|
+
if (!ipsString) {
|
|
46
|
+
throw new Error('SPACECAT_BOT_IPS environment variable is required but not set');
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return ipsString.split(',').map((ip) => ip.trim()).filter((ip) => ip);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Formats allowlist message with current bot IPs
|
|
54
|
+
* @param {string} botIps - Comma-separated IPs from secrets - REQUIRED
|
|
55
|
+
* @returns {object} Formatted message with IPs and user-agent
|
|
56
|
+
* @throws {Error} If botIps is not provided
|
|
57
|
+
*/
|
|
58
|
+
export function formatAllowlistMessage(botIps) {
|
|
59
|
+
const ips = getSpacecatBotIps(botIps);
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
title: 'To allowlist SpaceCat bot:',
|
|
63
|
+
ips,
|
|
64
|
+
userAgent: SPACECAT_BOT_USER_AGENT,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* HTML patterns for detecting challenge pages
|
|
70
|
+
*/
|
|
71
|
+
const CHALLENGE_PATTERNS = {
|
|
72
|
+
cloudflare: [
|
|
73
|
+
/Checking your browser/i,
|
|
74
|
+
/Just a moment\.\.\./i,
|
|
75
|
+
/Verifying you are human/i,
|
|
76
|
+
/Please wait.*CloudFlare/i,
|
|
77
|
+
/cf-turnstile/i,
|
|
78
|
+
/challenge-platform/i,
|
|
79
|
+
/cf-chl-widget/i, // Cloudflare challenge widget
|
|
80
|
+
/ray\s*id.*cloudflare/i, // Cloudflare Ray ID in error pages
|
|
81
|
+
/__cf_chl_tk/i, // Cloudflare challenge token
|
|
82
|
+
/cloudflare.*security/i,
|
|
83
|
+
/attention required.*cloudflare/i,
|
|
84
|
+
],
|
|
85
|
+
imperva: [
|
|
86
|
+
/_Incapsula_Resource/i,
|
|
87
|
+
/Incapsula incident ID/i,
|
|
88
|
+
/incap_ses/i, // Imperva session cookie
|
|
89
|
+
/visid_incap/i, // Imperva visitor ID
|
|
90
|
+
],
|
|
91
|
+
akamai: [
|
|
92
|
+
/Access Denied.*Akamai/i,
|
|
93
|
+
/Reference.*Akamai/i,
|
|
94
|
+
],
|
|
95
|
+
general: [
|
|
96
|
+
/captcha/i,
|
|
97
|
+
/human verification/i,
|
|
98
|
+
/recaptcha/i,
|
|
99
|
+
/hcaptcha/i,
|
|
100
|
+
/datadome/i,
|
|
101
|
+
/dd-request-id/i,
|
|
102
|
+
],
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Analyzes response for bot protection indicators
|
|
107
|
+
* @param {Object} response - Response object with status and headers
|
|
108
|
+
* @param {string} [html] - Optional HTML content for deeper analysis
|
|
109
|
+
* @returns {Object} Detection result
|
|
110
|
+
*/
|
|
111
|
+
function analyzeResponse(response, html = null) {
|
|
34
112
|
const { status, headers } = response;
|
|
35
113
|
|
|
36
114
|
// Check for CDN/blocker infrastructure presence (lazy evaluation for performance)
|
|
@@ -45,6 +123,12 @@ function analyzeResponse(response) {
|
|
|
45
123
|
|| headers.get('x-amz-cf-pop')
|
|
46
124
|
|| headers.get('via')?.includes('CloudFront');
|
|
47
125
|
|
|
126
|
+
// Check HTML content for challenge page patterns (if HTML provided)
|
|
127
|
+
const htmlHasChallenge = (patterns) => {
|
|
128
|
+
if (!html) return false;
|
|
129
|
+
return patterns.some((pattern) => pattern.test(html));
|
|
130
|
+
};
|
|
131
|
+
|
|
48
132
|
// Active blocking (403 status with known blocker)
|
|
49
133
|
if (status === 403 && hasCloudflare()) {
|
|
50
134
|
return {
|
|
@@ -88,6 +172,16 @@ function analyzeResponse(response) {
|
|
|
88
172
|
|
|
89
173
|
// Success with known infrastructure present (infrastructure detected but allowing requests)
|
|
90
174
|
if (status === 200 && hasCloudflare()) {
|
|
175
|
+
// Check if HTML contains challenge page (even though status is 200)
|
|
176
|
+
if (htmlHasChallenge(CHALLENGE_PATTERNS.cloudflare)) {
|
|
177
|
+
return {
|
|
178
|
+
crawlable: false,
|
|
179
|
+
type: 'cloudflare',
|
|
180
|
+
confidence: CONFIDENCE_HIGH,
|
|
181
|
+
reason: 'Challenge page detected despite 200 status',
|
|
182
|
+
};
|
|
183
|
+
}
|
|
184
|
+
|
|
91
185
|
return {
|
|
92
186
|
crawlable: true,
|
|
93
187
|
type: 'cloudflare-allowed',
|
|
@@ -96,6 +190,14 @@ function analyzeResponse(response) {
|
|
|
96
190
|
}
|
|
97
191
|
|
|
98
192
|
if (status === 200 && hasImperva()) {
|
|
193
|
+
if (htmlHasChallenge(CHALLENGE_PATTERNS.imperva)) {
|
|
194
|
+
return {
|
|
195
|
+
crawlable: false,
|
|
196
|
+
type: 'imperva',
|
|
197
|
+
confidence: CONFIDENCE_HIGH,
|
|
198
|
+
reason: 'Challenge page detected despite 200 status',
|
|
199
|
+
};
|
|
200
|
+
}
|
|
99
201
|
return {
|
|
100
202
|
crawlable: true,
|
|
101
203
|
type: 'imperva-allowed',
|
|
@@ -104,6 +206,14 @@ function analyzeResponse(response) {
|
|
|
104
206
|
}
|
|
105
207
|
|
|
106
208
|
if (status === 200 && hasAkamai()) {
|
|
209
|
+
if (htmlHasChallenge(CHALLENGE_PATTERNS.akamai)) {
|
|
210
|
+
return {
|
|
211
|
+
crawlable: false,
|
|
212
|
+
type: 'akamai',
|
|
213
|
+
confidence: CONFIDENCE_HIGH,
|
|
214
|
+
reason: 'Challenge page detected despite 200 status',
|
|
215
|
+
};
|
|
216
|
+
}
|
|
107
217
|
return {
|
|
108
218
|
crawlable: true,
|
|
109
219
|
type: 'akamai-allowed',
|
|
@@ -129,6 +239,15 @@ function analyzeResponse(response) {
|
|
|
129
239
|
|
|
130
240
|
// Success with no known infrastructure
|
|
131
241
|
if (status === 200) {
|
|
242
|
+
// Still check for generic challenge patterns
|
|
243
|
+
if (htmlHasChallenge(CHALLENGE_PATTERNS.general)) {
|
|
244
|
+
return {
|
|
245
|
+
crawlable: false,
|
|
246
|
+
type: 'unknown',
|
|
247
|
+
confidence: 0.7,
|
|
248
|
+
reason: 'Generic challenge patterns detected',
|
|
249
|
+
};
|
|
250
|
+
}
|
|
132
251
|
return {
|
|
133
252
|
crawlable: true,
|
|
134
253
|
type: 'none',
|
|
@@ -136,7 +255,16 @@ function analyzeResponse(response) {
|
|
|
136
255
|
};
|
|
137
256
|
}
|
|
138
257
|
|
|
139
|
-
//
|
|
258
|
+
// Potential CDN/protection blocked the request
|
|
259
|
+
if (status === 403) {
|
|
260
|
+
return {
|
|
261
|
+
crawlable: false,
|
|
262
|
+
type: 'unknown',
|
|
263
|
+
confidence: 0.7,
|
|
264
|
+
reason: 'HTTP 403 Forbidden - access denied',
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
140
268
|
return {
|
|
141
269
|
crawlable: true,
|
|
142
270
|
type: 'unknown',
|
|
@@ -207,3 +335,27 @@ export async function detectBotBlocker({ baseUrl, timeout = DEFAULT_TIMEOUT }) {
|
|
|
207
335
|
return analyzeError(error);
|
|
208
336
|
}
|
|
209
337
|
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Analyzes already-fetched response data for bot protection.
|
|
341
|
+
* Used by content scraper to analyze Puppeteer results without making another request.
|
|
342
|
+
*
|
|
343
|
+
* @param {Object} data - Response data to analyze
|
|
344
|
+
* @param {number} data.status - HTTP status code
|
|
345
|
+
* @param {Object} data.headers - Response headers (plain object or Headers object)
|
|
346
|
+
* @param {string} [data.html] - Optional HTML content for challenge page detection
|
|
347
|
+
* @returns {Object} Detection result (same format as detectBotBlocker)
|
|
348
|
+
*/
|
|
349
|
+
export function analyzeBotProtection({ status, headers, html }) {
|
|
350
|
+
// Convert headers to Headers object if plain object
|
|
351
|
+
const headersObj = headers instanceof Headers
|
|
352
|
+
? headers
|
|
353
|
+
: new Headers(Object.entries(headers || {}));
|
|
354
|
+
|
|
355
|
+
const response = {
|
|
356
|
+
status,
|
|
357
|
+
headers: headersObj,
|
|
358
|
+
};
|
|
359
|
+
|
|
360
|
+
return analyzeResponse(response, html);
|
|
361
|
+
}
|
package/src/cdn-helpers.js
CHANGED
|
@@ -11,9 +11,44 @@
|
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
/**
|
|
14
|
-
*
|
|
14
|
+
* Transforms credential fields object with backwards compatibility
|
|
15
|
+
* @param {Object} payload - The payload containing credential information
|
|
16
|
+
* @returns {Object} - Object with credential fields
|
|
15
17
|
*/
|
|
18
|
+
const transformCredentialFields = (payload) => {
|
|
19
|
+
const fields = {};
|
|
20
|
+
|
|
21
|
+
if (payload.currentAccessKey) {
|
|
22
|
+
fields['Access Key (current)'] = payload.currentAccessKey;
|
|
23
|
+
} else if (payload.accessKey) {
|
|
24
|
+
fields['Access Key'] = payload.accessKey;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
if (payload.currentSecretKey) {
|
|
28
|
+
fields['Secret Key (current)'] = payload.currentSecretKey;
|
|
29
|
+
} else if (payload.secretKey) {
|
|
30
|
+
fields['Secret Key'] = payload.secretKey;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
if (payload.oldAccessKey) {
|
|
34
|
+
fields['Access Key (to be retired)'] = payload.oldAccessKey;
|
|
35
|
+
}
|
|
16
36
|
|
|
37
|
+
if (payload.oldSecretKey) {
|
|
38
|
+
fields['Secret Key (to be retired)'] = payload.oldSecretKey;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
fields.currentCredentialsCreatedAt = payload.currentCredentialsCreatedAt;
|
|
42
|
+
fields.currentCredentialsLastUsed = payload.currentCredentialsLastUsed;
|
|
43
|
+
fields.oldCredentialsCreatedAt = payload.oldCredentialsCreatedAt;
|
|
44
|
+
fields.oldCredentialsLastUsed = payload.oldCredentialsLastUsed;
|
|
45
|
+
|
|
46
|
+
return fields;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* CDN-specific transformations for log forwarding configuration preparation
|
|
51
|
+
*/
|
|
17
52
|
const FASTLY_LOG_FORMAT = `{
|
|
18
53
|
"timestamp": "%{strftime(\\{"%Y-%m-%dT%H:%M:%S%z"\\}, time.start)}V",
|
|
19
54
|
"host": "%{if(req.http.Fastly-Orig-Host, req.http.Fastly-Orig-Host, req.http.Host)}V",
|
|
@@ -35,8 +70,7 @@ const CDN_TRANSFORMATIONS = {
|
|
|
35
70
|
Placement: 'Format Version Default',
|
|
36
71
|
'Log format': FASTLY_LOG_FORMAT,
|
|
37
72
|
'Access method': 'User credentials',
|
|
38
|
-
|
|
39
|
-
'Secret key': payload.secretKey,
|
|
73
|
+
...transformCredentialFields(payload),
|
|
40
74
|
Period: 300,
|
|
41
75
|
'Log line format': 'Blank',
|
|
42
76
|
Compression: 'Gzip',
|
|
@@ -67,8 +101,7 @@ const CDN_TRANSFORMATIONS = {
|
|
|
67
101
|
'Log file prefix': '{%Y}-{%m}-{%d}T{%H}:{%M}:{%S}.000',
|
|
68
102
|
'Log file suffix': '.log',
|
|
69
103
|
'Log interval': '60 seconds',
|
|
70
|
-
|
|
71
|
-
'Secret key': payload.secretKey,
|
|
104
|
+
...transformCredentialFields(payload),
|
|
72
105
|
HelpUrl: 'https://techdocs.akamai.com/datastream2/docs/stream-amazon-s3',
|
|
73
106
|
}),
|
|
74
107
|
'byocdn-cloudflare': (payload) => ({
|
|
@@ -156,8 +189,7 @@ const CDN_TRANSFORMATIONS = {
|
|
|
156
189
|
'Bucket name': payload.bucketName,
|
|
157
190
|
Region: payload.region,
|
|
158
191
|
Path: `${payload.allowedPaths?.[0] || ''}<year>/<month>/<day>`,
|
|
159
|
-
|
|
160
|
-
'Secret Key': payload.secretKey,
|
|
192
|
+
...transformCredentialFields(payload),
|
|
161
193
|
'Timestamp format': 'RFC3339',
|
|
162
194
|
'Log format': 'JSON lines (one log per line)',
|
|
163
195
|
Compression: 'Optional, but prefered. Please use Gzip compression if you decide to compress the log files.',
|
|
@@ -206,11 +238,11 @@ const prettifyLogForwardingConfig = (payload) => {
|
|
|
206
238
|
}
|
|
207
239
|
|
|
208
240
|
if (payload.logSource === 'byocdn-fastly' || payload.logSource === 'byocdn-akamai' || payload.logSource === 'byocdn-other') {
|
|
209
|
-
if (!payload.accessKey) {
|
|
210
|
-
throw new Error('accessKey is required in payload');
|
|
241
|
+
if (!payload.accessKey && !payload.currentAccessKey) {
|
|
242
|
+
throw new Error('accessKey or currentAccessKey is required in payload');
|
|
211
243
|
}
|
|
212
|
-
if (!payload.secretKey) {
|
|
213
|
-
throw new Error('secretKey is required in payload');
|
|
244
|
+
if (!payload.secretKey && !payload.currentSecretKey) {
|
|
245
|
+
throw new Error('secretKey or currentSecretKey is required in payload');
|
|
214
246
|
}
|
|
215
247
|
}
|
|
216
248
|
|
package/src/index.js
CHANGED
|
@@ -111,7 +111,13 @@ export * as llmoStrategy from './llmo-strategy.js';
|
|
|
111
111
|
export * as schemas from './schemas.js';
|
|
112
112
|
|
|
113
113
|
export { detectLocale } from './locale-detect/locale-detect.js';
|
|
114
|
-
export {
|
|
114
|
+
export {
|
|
115
|
+
detectBotBlocker,
|
|
116
|
+
analyzeBotProtection,
|
|
117
|
+
SPACECAT_BOT_USER_AGENT,
|
|
118
|
+
getSpacecatBotIps,
|
|
119
|
+
formatAllowlistMessage,
|
|
120
|
+
} from './bot-blocker-detect/bot-blocker-detect.js';
|
|
115
121
|
export { prettifyLogForwardingConfig } from './cdn-helpers.js';
|
|
116
122
|
|
|
117
123
|
export {
|