@adobe/spacecat-shared-utils 1.85.1 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,17 @@
1
+ # [@adobe/spacecat-shared-utils-v1.86.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.85.2...@adobe/spacecat-shared-utils-v1.86.0) (2025-12-12)
2
+
3
+
4
+ ### Features
5
+
6
+ * add detection for Akamai, Fastly, and CloudFront ([#1238](https://github.com/adobe/spacecat-shared/issues/1238)) ([3f7aad9](https://github.com/adobe/spacecat-shared/commit/3f7aad96fbc823b2e9d59541a71ba3b4e6d315e8))
7
+
8
+ # [@adobe/spacecat-shared-utils-v1.85.2](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.85.1...@adobe/spacecat-shared-utils-v1.85.2) (2025-12-11)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * Implement Structured (JSON) Logging for Spacecat Audits - rollback ([#1239](https://github.com/adobe/spacecat-shared/issues/1239)) ([1f174d7](https://github.com/adobe/spacecat-shared/commit/1f174d7dd188dbdc610b75bf58644992925755b1))
14
+
1
15
  # [@adobe/spacecat-shared-utils-v1.85.1](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-utils-v1.85.0...@adobe/spacecat-shared-utils-v1.85.1) (2025-12-11)
2
16
 
3
17
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-utils",
3
- "version": "1.85.1",
3
+ "version": "1.86.0",
4
4
  "description": "Shared modules of the Spacecat Services - utils",
5
5
  "type": "module",
6
6
  "exports": {
@@ -16,7 +16,8 @@ import { isValidUrl } from '../functions.js';
16
16
  /**
17
17
  * Confidence levels used in bot blocker detection:
18
18
  * - 1.0 (ABSOLUTE): Site responds successfully with 200 OK - definitively crawlable
19
- * - 0.99 (HIGH): Known bot blocker signature detected (Cloudflare cf-ray, Imperva x-iinfo/x-cdn)
19
+ * - 0.99 (HIGH): Known bot blocker signature detected
20
+ * (Cloudflare, Imperva, Akamai, Fastly, CloudFront)
20
21
  * - 0.95 (MEDIUM): HTTP/2 protocol errors indicating potential blocking
21
22
  * - 0.5: Unknown status code without known blocker signature (e.g., 403 without headers)
22
23
  * - 0.3: Unknown error occurred during request
@@ -32,7 +33,20 @@ const DEFAULT_TIMEOUT = 5000;
32
33
  function analyzeResponse(response) {
33
34
  const { status, headers } = response;
34
35
 
35
- if (status === 403 && headers.get('cf-ray')) {
36
+ // Check for CDN/blocker infrastructure presence (lazy evaluation for performance)
37
+ const hasCloudflare = () => headers.get('cf-ray') || headers.get('server') === 'cloudflare';
38
+ const hasImperva = () => headers.get('x-iinfo') || headers.get('x-cdn') === 'Incapsula';
39
+ const hasAkamai = () => headers.get('x-akamai-request-id')
40
+ || headers.get('x-akamai-session-id')
41
+ || headers.get('server')?.includes('AkamaiGHost');
42
+ const hasFastly = () => headers.get('x-served-by')?.startsWith('cache-')
43
+ || headers.get('fastly-io-info');
44
+ const hasCloudFront = () => headers.get('x-amz-cf-id')
45
+ || headers.get('x-amz-cf-pop')
46
+ || headers.get('via')?.includes('CloudFront');
47
+
48
+ // Active blocking (403 status with known blocker)
49
+ if (status === 403 && hasCloudflare()) {
36
50
  return {
37
51
  crawlable: false,
38
52
  type: 'cloudflare',
@@ -40,7 +54,7 @@ function analyzeResponse(response) {
40
54
  };
41
55
  }
42
56
 
43
- if (status === 403 && (headers.get('x-iinfo') || headers.get('x-cdn') === 'Incapsula')) {
57
+ if (status === 403 && hasImperva()) {
44
58
  return {
45
59
  crawlable: false,
46
60
  type: 'imperva',
@@ -48,6 +62,72 @@ function analyzeResponse(response) {
48
62
  };
49
63
  }
50
64
 
65
+ if (status === 403 && hasAkamai()) {
66
+ return {
67
+ crawlable: false,
68
+ type: 'akamai',
69
+ confidence: CONFIDENCE_HIGH,
70
+ };
71
+ }
72
+
73
+ if (status === 403 && hasFastly()) {
74
+ return {
75
+ crawlable: false,
76
+ type: 'fastly',
77
+ confidence: CONFIDENCE_HIGH,
78
+ };
79
+ }
80
+
81
+ if (status === 403 && hasCloudFront()) {
82
+ return {
83
+ crawlable: false,
84
+ type: 'cloudfront',
85
+ confidence: CONFIDENCE_HIGH,
86
+ };
87
+ }
88
+
89
+ // Success with known infrastructure present (infrastructure detected but allowing requests)
90
+ if (status === 200 && hasCloudflare()) {
91
+ return {
92
+ crawlable: true,
93
+ type: 'cloudflare-allowed',
94
+ confidence: CONFIDENCE_ABSOLUTE,
95
+ };
96
+ }
97
+
98
+ if (status === 200 && hasImperva()) {
99
+ return {
100
+ crawlable: true,
101
+ type: 'imperva-allowed',
102
+ confidence: CONFIDENCE_ABSOLUTE,
103
+ };
104
+ }
105
+
106
+ if (status === 200 && hasAkamai()) {
107
+ return {
108
+ crawlable: true,
109
+ type: 'akamai-allowed',
110
+ confidence: CONFIDENCE_ABSOLUTE,
111
+ };
112
+ }
113
+
114
+ if (status === 200 && hasFastly()) {
115
+ return {
116
+ crawlable: true,
117
+ type: 'fastly-allowed',
118
+ confidence: CONFIDENCE_ABSOLUTE,
119
+ };
120
+ }
121
+
122
+ if (status === 200 && hasCloudFront()) {
123
+ return {
124
+ crawlable: true,
125
+ type: 'cloudfront-allowed',
126
+ confidence: CONFIDENCE_ABSOLUTE,
127
+ };
128
+ }
129
+
130
+ // Success with no known infrastructure
51
131
  if (status === 200) {
52
132
  return {
53
133
  crawlable: true,
@@ -56,6 +136,7 @@ function analyzeResponse(response) {
56
136
  };
57
137
  }
58
138
 
139
+ // Unknown status without known blocker signature
59
140
  return {
60
141
  crawlable: true,
61
142
  type: 'unknown',
@@ -86,14 +167,24 @@ function analyzeError(error) {
86
167
  * Currently detects:
87
168
  * - Cloudflare bot blocking (403 + cf-ray header)
88
169
  * - Imperva/Incapsula (403 + x-iinfo or x-cdn: Incapsula header)
170
+ * - Akamai (403 + x-akamai-request-id or related headers)
171
+ * - Fastly (403 + x-served-by or fastly-io-info headers)
172
+ * - AWS CloudFront (403 + x-amz-cf-id or via: CloudFront header)
89
173
  * - HTTP/2 stream errors (NGHTTP2_INTERNAL_ERROR, ERR_HTTP2_STREAM_ERROR)
90
174
  *
175
+ * Also detects infrastructure presence on successful requests (200 OK):
176
+ * - Returns 'cloudflare-allowed', 'imperva-allowed', 'akamai-allowed',
177
+ * 'fastly-allowed', or 'cloudfront-allowed' when infrastructure is present
178
+ * but allowing the request through
179
+ *
91
180
  * @param {Object} config - Configuration object
92
181
  * @param {string} config.baseUrl - The base URL to check
93
182
  * @param {number} [config.timeout=5000] - Request timeout in milliseconds
94
183
  * @returns {Promise<Object>} Detection result with:
95
184
  * - crawlable {boolean}: Whether the site can be crawled by bots
96
- * - type {string}: Blocker type ('cloudflare', 'imperva', 'http2-block', 'none', 'unknown')
185
+ * - type {string}: Blocker type ('cloudflare', 'imperva', 'akamai', 'fastly',
186
+ * 'cloudfront', 'http2-block', 'cloudflare-allowed', 'imperva-allowed',
187
+ * 'akamai-allowed', 'fastly-allowed', 'cloudfront-allowed', 'none', 'unknown')
97
188
  * - confidence {number}: Confidence level (0.0-1.0, see confidence level constants)
98
189
  * @throws {Error} If baseUrl is invalid
99
190
  */
@@ -17,7 +17,10 @@ export interface BotBlockerConfig {
17
17
 
18
18
  export interface BotBlockerResult {
19
19
  crawlable: boolean;
20
- type: 'cloudflare' | 'imperva' | 'http2-block' | 'none' | 'unknown';
20
+ type: 'cloudflare' | 'imperva' | 'akamai' | 'fastly' | 'cloudfront'
21
+ | 'cloudflare-allowed' | 'imperva-allowed' | 'akamai-allowed'
22
+ | 'fastly-allowed' | 'cloudfront-allowed'
23
+ | 'http2-block' | 'none' | 'unknown';
21
24
  confidence: number;
22
25
  }
23
26
 
@@ -13,92 +13,64 @@
13
13
  import { getTraceId } from './xray.js';
14
14
 
15
15
  /**
16
- * Check if a value is a plain object (not Array, not Error, not null, not other special objects)
17
- * @param {*} value - The value to check
18
- * @returns {boolean} - True if the value is a plain object
19
- */
20
- function isPlainObject(value) {
21
- return typeof value === 'object'
22
- && value !== null
23
- && !Array.isArray(value)
24
- && !(value instanceof Error)
25
- && value.constructor === Object;
26
- }
27
-
28
- /**
29
- * A higher-order function that wraps a given function and enhances logging by converting
30
- * all logs to JSON format and appending `severity`, `jobId`and `traceId`
31
- * to log messages when available.
16
+ * A higher-order function that wraps a given function and enhances logging by appending
17
+ * a `jobId` and `traceId` to log messages when available. This improves traceability of logs
18
+ * associated with specific jobs or processes.
19
+ *
20
+ * The wrapper checks if a `log` object exists in the `context` and whether the `message`
21
+ * contains a `jobId`. It also extracts the AWS X-Ray trace ID if available. If found, log
22
+ * methods (e.g., `info`, `error`, etc.) will prepend the `jobId` and/or `traceId` to all log
23
+ * statements. All existing code using `context.log` will automatically include these markers.
32
24
  *
33
- * All log messages are automatically converted to structured JSON format:
34
- * - String messages become: { severity: "info", message: "...", jobId: "...", traceId: "..." }
35
- * - Object messages are merged with:
36
- * { severity: "info", ...yourObject, jobId: "...", traceId: "..." }
25
+ * @param {function} fn - The original function to be wrapped, called with the provided
26
+ * message and context after logging enhancement.
27
+ * @returns {function(object, object): Promise<Response>} - A wrapped function that enhances
28
+ * logging and returns the result of the original function.
37
29
  *
38
- * @param {function} fn - The original function to be wrapped
39
- * @returns {function(object, object): Promise<Response>} - A wrapped function with JSON logging
30
+ * `context.log` will be enhanced in place to include `jobId` and/or `traceId` prefixed to all
31
+ * log messages. No code changes needed - existing `context.log` calls work automatically.
40
32
  */
41
33
  export function logWrapper(fn) {
42
34
  return async (message, context) => {
43
35
  const { log } = context;
44
36
 
45
37
  if (log && !context.contextualLog) {
46
- const markers = {};
38
+ const markers = [];
47
39
 
48
40
  // Extract jobId from message if available
49
41
  if (typeof message === 'object' && message !== null && 'jobId' in message) {
50
- markers.jobId = message.jobId;
42
+ const { jobId } = message;
43
+ markers.push(`[jobId=${jobId}]`);
51
44
  }
52
45
 
53
46
  // Extract traceId from AWS X-Ray
54
47
  const traceId = getTraceId();
55
48
  if (traceId) {
56
- markers.traceId = traceId;
49
+ markers.push(`[traceId=${traceId}]`);
57
50
  }
58
51
 
59
- // Define log levels
60
- const logLevels = ['info', 'error', 'debug', 'warn', 'trace', 'verbose', 'silly', 'fatal'];
52
+ // If we have markers, enhance the log object directly
53
+ if (markers.length > 0) {
54
+ const markerString = markers.join(' ');
61
55
 
62
- // Wrap all log methods to output structured JSON
63
- context.log = logLevels.reduce((accumulator, level) => {
64
- if (typeof log[level] === 'function') {
65
- accumulator[level] = (...args) => {
66
- // If first argument is a plain object, merge with markers
67
- if (args.length > 0 && isPlainObject(args[0])) {
68
- return log[level](JSON.stringify({ severity: level, ...markers, ...args[0] }));
69
- }
56
+ // Define log levels
57
+ const logLevels = ['info', 'error', 'debug', 'warn', 'trace', 'verbose', 'silly', 'fatal'];
70
58
 
71
- // If first argument is a string, convert to structured format
72
- if (args.length > 0 && typeof args[0] === 'string') {
73
- const logObject = {
74
- severity: level,
75
- ...markers,
76
- message: args[0],
77
- };
78
-
79
- // If second argument is a plain object, merge it into the log object
80
- if (args.length > 1 && isPlainObject(args[1])) {
81
- Object.assign(logObject, args[1]);
82
-
83
- // If there are more arguments after the object, add them as 'data'
84
- if (args.length > 2) {
85
- logObject.data = args.slice(2);
86
- }
87
- } else if (args.length > 1) {
88
- // If there are additional arguments but second is not a plain object,
89
- // add all additional args as 'data'
90
- logObject.data = args.slice(1);
59
+ // Enhance context.log directly to include markers in all log statements
60
+ context.log = logLevels.reduce((accumulator, level) => {
61
+ if (typeof log[level] === 'function') {
62
+ accumulator[level] = (...args) => {
63
+ // If first argument is a string (format string), prepend the marker to it
64
+ if (args.length > 0 && typeof args[0] === 'string') {
65
+ const enhancedArgs = [`${markerString} ${args[0]}`, ...args.slice(1)];
66
+ return log[level](...enhancedArgs);
91
67
  }
92
-
93
- return log[level](JSON.stringify(logObject));
94
- }
95
-
96
- // For other types (arrays, primitives, Error objects), wrap in object
97
- return log[level](JSON.stringify({ severity: level, ...markers, data: args }));
98
- };
99
- }
100
- return accumulator;
101
- }, {});
68
+ return log[level](...args);
69
+ };
70
+ }
71
+ return accumulator;
72
+ }, {});
73
+ }
102
74
 
103
75
  // Mark that we've processed this context
104
76
  context.contextualLog = context.log;