@adobe/spacecat-shared-tokowaka-client 1.13.5 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ ## [@adobe/spacecat-shared-tokowaka-client-v1.14.0](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-tokowaka-client-v1.13.5...@adobe/spacecat-shared-tokowaka-client-v1.14.0) (2026-05-04)
2
+
3
+ ### Features
4
+
5
+ * **tokowaka-client:** add checkWafConnectivity method ([#1552](https://github.com/adobe/spacecat-shared/issues/1552)) ([46f72e3](https://github.com/adobe/spacecat-shared/commit/46f72e39964d2e6906746e2d72f843a7d4a08428))
6
+
1
7
  ## [@adobe/spacecat-shared-tokowaka-client-v1.13.5](https://github.com/adobe/spacecat-shared/compare/@adobe/spacecat-shared-tokowaka-client-v1.13.4...@adobe/spacecat-shared-tokowaka-client-v1.13.5) (2026-05-04)
2
8
 
3
9
  ### Bug Fixes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adobe/spacecat-shared-tokowaka-client",
3
- "version": "1.13.5",
3
+ "version": "1.14.0",
4
4
  "description": "Tokowaka Client for SpaceCat - Edge optimization config management",
5
5
  "type": "module",
6
6
  "engines": {
package/src/index.js CHANGED
@@ -12,7 +12,9 @@
12
12
 
13
13
  import crypto from 'crypto';
14
14
  import { GetObjectCommand, PutObjectCommand } from '@aws-sdk/client-s3';
15
- import { hasText, isNonEmptyObject, tracingFetch } from '@adobe/spacecat-shared-utils';
15
+ import {
16
+ hasText, isNonEmptyObject, prependSchema, tracingFetch,
17
+ } from '@adobe/spacecat-shared-utils';
16
18
  import { v4 as uuidv4 } from 'uuid';
17
19
  import MapperRegistry from './mappers/mapper-registry.js';
18
20
  import CdnClientRegistry from './cdn/cdn-client-registry.js';
@@ -26,6 +28,12 @@ import {
26
28
  import { groupSuggestionsByUrlPath, filterEligibleSuggestions } from './utils/suggestion-utils.js';
27
29
  import { getEffectiveBaseURL } from './utils/site-utils.js';
28
30
  import { fetchHtmlWithWarmup, calculateForwardedHost } from './utils/custom-html-utils.js';
31
+ import {
32
+ EDGE_OPTIMIZE_PROXY_BASE_URL_DEFAULT,
33
+ PRIVATE_HOST_RE,
34
+ WAF_PROBE_TIMEOUT_MS,
35
+ classifyProbeResponse,
36
+ } from './utils/waf-probe-utils.js';
29
37
 
30
38
  export { FastlyKVClient } from './fastly-kv-client.js';
31
39
  export { calculateForwardedHost } from './utils/custom-html-utils.js';
@@ -1209,6 +1217,55 @@ class TokowakaClient {
1209
1217
  /* c8 ignore stop */
1210
1218
  }
1211
1219
 
1220
+ /**
1221
+ * Probes whether a WAF or Bot Manager is blocking AdobeEdgeOptimize/1.0 traffic
1222
+ * for the site.
1223
+ *
1224
+ * Probe outcomes:
1225
+ * - Hard block: HTTP 401/403/406/429/503 → `{ reachable: false, blocked: true }`
1226
+ * - CF challenge: cf-mitigated: challenge header → `{ reachable: false, blocked: true }`
1227
+ * - Soft block: 2xx with bot-challenge HTML → `{ reachable: false, blocked: true }`
1228
+ * - Pass: 2xx with real content → `{ reachable: true, blocked: false }`
1229
+ * - Network/timeout error → `{ reachable: false, blocked: null }`
1230
+ *
1231
+ * This method never throws — all errors are captured into the return value.
1232
+ * Use the separate edge-optimize status API to determine if edge optimize is active.
1233
+ *
1234
+ * @param {Object} site - Site entity with a `getBaseURL()` method.
1235
+ * @returns {Promise<Object>} WAF probe result.
1236
+ */
1237
+ async checkWafConnectivity(site) {
1238
+ const siteBaseUrl = site.getBaseURL();
1239
+ let probeResult = { probedUrl: String(siteBaseUrl) };
1240
+
1241
+ try {
1242
+ const normalizedUrl = prependSchema(siteBaseUrl);
1243
+ const { host: targetHost, hostname, href: probedUrl } = new URL(normalizedUrl);
1244
+ probeResult = { probedUrl };
1245
+
1246
+ if (PRIVATE_HOST_RE.test(hostname)) {
1247
+ this.log.warn(`[edge-optimize-probe] Refusing to probe private/loopback host: ${hostname}`);
1248
+ return { ...probeResult, reachable: false, blocked: null };
1249
+ }
1250
+
1251
+ this.log.info(`[edge-optimize-probe] Probing ${targetHost} via edge optimize proxy`);
1252
+
1253
+ const response = await tracingFetch(EDGE_OPTIMIZE_PROXY_BASE_URL_DEFAULT, {
1254
+ method: 'GET',
1255
+ headers: { 'x-forwarded-host': targetHost },
1256
+ signal: AbortSignal.timeout(WAF_PROBE_TIMEOUT_MS),
1257
+ });
1258
+
1259
+ const classification = await classifyProbeResponse(response, targetHost, this.log);
1260
+ probeResult = { ...probeResult, ...classification };
1261
+ } catch (error) {
1262
+ this.log.warn(`[edge-optimize-probe] Probe failed for ${siteBaseUrl}: ${error.message}`);
1263
+ probeResult = { ...probeResult, reachable: false, blocked: null };
1264
+ }
1265
+
1266
+ return probeResult;
1267
+ }
1268
+
1212
1269
  /**
1213
1270
  * Deploys suggestions to edge, handling both regular and domain-wide suggestions.
1214
1271
  *
@@ -0,0 +1,82 @@
1
+ /*
2
+ * Copyright 2026 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ export const EDGE_OPTIMIZE_PROXY_BASE_URL_DEFAULT = 'https://live.edgeoptimize.net';
14
+
15
+ // Blocks loopback, link-local, and RFC1918 ranges — never forward these as probe targets.
16
+ export const PRIVATE_HOST_RE = /^(localhost$|127\.|10\.|192\.168\.|172\.(1[6-9]|2\d|3[01])\.|169\.254\.)/i;
17
+
18
+ export const WAF_PROBE_TIMEOUT_MS = 15000;
19
+
20
+ // Soft-block detection: vendor-specific technical identifiers that only appear in
21
+ // WAF-generated challenge pages, never in real page content. Broad natural-language
22
+ // terms ('challenge', 'captcha', 'access denied') are intentionally excluded — they
23
+ // match legitimate marketing copy and reCAPTCHA script tags, producing false positives
24
+ // at any body scan depth.
25
+ export const BOT_CHALLENGE_KEYWORDS = [
26
+ 'cf-chl-widget', // Cloudflare challenge widget CSS class
27
+ 'completing the challenge', // Cloudflare-specific challenge phrase
28
+ '_incapsula_resource', // Imperva/Incapsula JS artifact — only in WAF-generated pages
29
+ 'errors.edgesuite.net', // Akamai error page domain
30
+ 'errors.edgekey.net', // Akamai edge key domain
31
+ ];
32
+
33
+ // 403 and 429 are universal WAF block signals; 406 is Fastly Next-Gen WAF (Signal Sciences)
34
+ // and some Imperva configurations. 401 covers WAF-gated auth challenges. 503 is used by
35
+ // Akamai and others as a block response in certain configurations.
36
+ export const HARD_BLOCK_STATUS_CODES = new Set([401, 403, 406, 429, 503]);
37
+
38
+ /**
39
+ * Classifies an already-fetched Tokowaka-proxied response into one of four probe outcomes:
40
+ * - Hard block : HTTP status in HARD_BLOCK_STATUS_CODES → { reachable: false, blocked: true }
41
+ * - CF challenge: cf-mitigated: challenge header → { reachable: false, blocked: true }
42
+ * - Soft block : 2xx HTML body with vendor keywords → { reachable: false, blocked: true }
43
+ * - Clean pass : 2xx with real content → { reachable: true, blocked: false }
44
+ * - Other : unexpected status (e.g. redirect) → { reachable: false, blocked: false }
45
+ *
46
+ * @param {Response} response - Fetch response from the Tokowaka proxy.
47
+ * @param {string} targetHost - Customer hostname, used only for log messages.
48
+ * @param {Object} log - Logger with an `info` method.
49
+ * @returns {Promise<Object>} Classification result.
50
+ */
51
+ export async function classifyProbeResponse(response, targetHost, log) {
52
+ const { status } = response;
53
+
54
+ if (HARD_BLOCK_STATUS_CODES.has(status)) {
55
+ log.info(`[edge-optimize-probe] Hard block for ${targetHost}: HTTP ${status}`);
56
+ return { reachable: false, blocked: true, statusCode: status };
57
+ }
58
+
59
+ // Cloudflare active challenge: present on any response where CF is serving a managed
60
+ // challenge — definitive block signal regardless of HTTP status code.
61
+ if (response.headers.get('cf-mitigated') === 'challenge') {
62
+ log.info(`[edge-optimize-probe] Cloudflare challenge for ${targetHost} (cf-mitigated: challenge)`);
63
+ return { reachable: false, blocked: true, statusCode: status };
64
+ }
65
+
66
+ if (status >= 200 && status < 300) {
67
+ const contentType = response.headers.get('content-type') || '';
68
+ if (contentType.includes('text/html')) {
69
+ const text = await response.text();
70
+ const isSoftBlock = BOT_CHALLENGE_KEYWORDS.some((kw) => text.toLowerCase().includes(kw));
71
+ if (isSoftBlock) {
72
+ log.info(`[edge-optimize-probe] Soft block (challenge page) for ${targetHost}: HTTP ${status}`);
73
+ return { reachable: false, blocked: true, statusCode: status };
74
+ }
75
+ }
76
+ log.info(`[edge-optimize-probe] Clean pass for ${targetHost}: HTTP ${status}`);
77
+ return { reachable: true, blocked: false, statusCode: status };
78
+ }
79
+
80
+ log.info(`[edge-optimize-probe] Unexpected status for ${targetHost}: HTTP ${status}`);
81
+ return { reachable: false, blocked: false, statusCode: status };
82
+ }
@@ -4338,6 +4338,194 @@ describe('TokowakaClient', () => {
4338
4338
  });
4339
4339
  });
4340
4340
 
4341
+ describe('checkWafConnectivity', () => {
4342
+ let tracingFetchStub;
4343
+ let esmockClient;
4344
+ let mockSiteWaf;
4345
+
4346
+ beforeEach(async () => {
4347
+ tracingFetchStub = sinon.stub();
4348
+
4349
+ const MockedTokowakaClient = await esmock('../src/index.js', {
4350
+ '@adobe/spacecat-shared-utils': {
4351
+ hasText: (val) => typeof val === 'string' && val.trim().length > 0,
4352
+ isNonEmptyObject: (val) => val !== null && typeof val === 'object' && Object.keys(val).length > 0,
4353
+ prependSchema: (url) => (url.startsWith('http') ? url : `https://${url}`),
4354
+ tracingFetch: tracingFetchStub,
4355
+ },
4356
+ });
4357
+
4358
+ esmockClient = new MockedTokowakaClient(
4359
+ {
4360
+ bucketName: 'test-bucket',
4361
+ previewBucketName: 'test-preview-bucket',
4362
+ s3Client: { send: sinon.stub().resolves() },
4363
+ env: {
4364
+ TOKOWAKA_CDN_PROVIDER: 'cloudfront',
4365
+ TOKOWAKA_CDN_CONFIG: JSON.stringify({ cloudfront: { distributionId: 'E123456', region: 'us-east-1' } }),
4366
+ },
4367
+ },
4368
+ log,
4369
+ );
4370
+
4371
+ mockSiteWaf = {
4372
+ getId: () => 'waf-site-id',
4373
+ getBaseURL: () => 'https://example.com',
4374
+ };
4375
+ });
4376
+
4377
+ const makeHeaders = (plain = {}) => new Headers(plain);
4378
+
4379
+ describe('Hard block — status codes', () => {
4380
+ [401, 403, 406, 429, 503].forEach((status) => {
4381
+ it(`returns blocked:true for HTTP ${status}`, async () => {
4382
+ tracingFetchStub.resolves({ status, headers: makeHeaders() });
4383
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4384
+ expect(result.blocked).to.equal(true);
4385
+ expect(result.reachable).to.equal(false);
4386
+ expect(result.statusCode).to.equal(status);
4387
+ });
4388
+ });
4389
+ });
4390
+
4391
+ describe('Cloudflare header detection', () => {
4392
+ it('returns blocked:true when cf-mitigated: challenge header is present', async () => {
4393
+ tracingFetchStub.resolves({
4394
+ status: 200,
4395
+ headers: makeHeaders({ 'cf-mitigated': 'challenge' }),
4396
+ text: sinon.stub().resolves('<html>Just a moment</html>'),
4397
+ });
4398
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4399
+ expect(result.blocked).to.equal(true);
4400
+ expect(result.reachable).to.equal(false);
4401
+ expect(result.statusCode).to.equal(200);
4402
+ });
4403
+
4404
+ it('returns blocked:false when cf-mitigated header is absent (Cloudflare passing)', async () => {
4405
+ tracingFetchStub.resolves({
4406
+ status: 200,
4407
+ headers: makeHeaders({ 'cf-ray': 'abc123-LHR' }),
4408
+ text: sinon.stub().resolves('<html><body>Welcome</body></html>'),
4409
+ });
4410
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4411
+ expect(result.blocked).to.equal(false);
4412
+ expect(result.reachable).to.equal(true);
4413
+ });
4414
+ });
4415
+
4416
+ describe('Soft block — vendor-specific keyword detection', () => {
4417
+ const makeSoftBlockResponse = (bodyKeyword) => ({
4418
+ status: 200,
4419
+ headers: makeHeaders({ 'content-type': 'text/html' }),
4420
+ text: sinon.stub().resolves(`<html><body>${bodyKeyword}</body></html>`),
4421
+ });
4422
+
4423
+ it('detects Cloudflare challenge via cf-chl-widget', async () => {
4424
+ tracingFetchStub.resolves(makeSoftBlockResponse('<div class="cf-chl-widget"></div>'));
4425
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4426
+ expect(result.blocked).to.equal(true);
4427
+ });
4428
+
4429
+ it('detects Imperva challenge via _Incapsula_Resource', async () => {
4430
+ tracingFetchStub.resolves(makeSoftBlockResponse('window._incapsula_resource={}'));
4431
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4432
+ expect(result.blocked).to.equal(true);
4433
+ });
4434
+
4435
+ it('detects Akamai error page via errors.edgesuite.net', async () => {
4436
+ tracingFetchStub.resolves(makeSoftBlockResponse('<a href="https://errors.edgesuite.net/abc">Reference</a>'));
4437
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4438
+ expect(result.blocked).to.equal(true);
4439
+ });
4440
+
4441
+ it('detects Akamai error page via errors.edgekey.net', async () => {
4442
+ tracingFetchStub.resolves(makeSoftBlockResponse('<a href="https://errors.edgekey.net/abc">Reference</a>'));
4443
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4444
+ expect(result.blocked).to.equal(true);
4445
+ });
4446
+ });
4447
+
4448
+ describe('False positive prevention — broad natural-language terms no longer trigger block', () => {
4449
+ const makeNormalPage = (text) => ({
4450
+ status: 200,
4451
+ headers: makeHeaders({ 'content-type': 'text/html' }),
4452
+ text: sinon.stub().resolves(`<html><body>${text}</body></html>`),
4453
+ });
4454
+
4455
+ it('does not flag page containing the word "challenge" in marketing copy', async () => {
4456
+ tracingFetchStub.resolves(makeNormalPage('Take the 30-day challenge today!'));
4457
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4458
+ expect(result.blocked).to.equal(false);
4459
+ expect(result.reachable).to.equal(true);
4460
+ });
4461
+
4462
+ it('does not flag page containing "captcha" in reCAPTCHA script tag', async () => {
4463
+ tracingFetchStub.resolves(makeNormalPage(
4464
+ '<script src="https://www.google.com/recaptcha/api.js"></script>',
4465
+ ));
4466
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4467
+ expect(result.blocked).to.equal(false);
4468
+ expect(result.reachable).to.equal(true);
4469
+ });
4470
+
4471
+ it('does not flag page containing "access denied" in help text', async () => {
4472
+ tracingFetchStub.resolves(makeNormalPage(
4473
+ '<p>If access is denied, contact your administrator.</p>',
4474
+ ));
4475
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4476
+ expect(result.blocked).to.equal(false);
4477
+ expect(result.reachable).to.equal(true);
4478
+ });
4479
+
4480
+ it('does not flag 200 JSON response', async () => {
4481
+ tracingFetchStub.resolves({
4482
+ status: 200,
4483
+ headers: makeHeaders({ 'content-type': 'application/json' }),
4484
+ });
4485
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4486
+ expect(result.blocked).to.equal(false);
4487
+ expect(result.reachable).to.equal(true);
4488
+ });
4489
+ });
4490
+
4491
+ describe('Network errors', () => {
4492
+ it('returns blocked:null on AbortError (timeout)', async () => {
4493
+ const err = new Error('The operation was aborted');
4494
+ err.name = 'TimeoutError';
4495
+ tracingFetchStub.rejects(err);
4496
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4497
+ expect(result.blocked).to.equal(null);
4498
+ expect(result.reachable).to.equal(false);
4499
+ });
4500
+
4501
+ it('returns blocked:null on network failure', async () => {
4502
+ tracingFetchStub.rejects(new Error('ECONNREFUSED'));
4503
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4504
+ expect(result.blocked).to.equal(null);
4505
+ expect(result.reachable).to.equal(false);
4506
+ });
4507
+ });
4508
+
4509
+ describe('Unexpected status (e.g. redirect)', () => {
4510
+ it('returns blocked:false for a 301 redirect response', async () => {
4511
+ tracingFetchStub.resolves({ status: 301, headers: makeHeaders() });
4512
+ const result = await esmockClient.checkWafConnectivity(mockSiteWaf);
4513
+ expect(result.blocked).to.equal(false);
4514
+ expect(result.reachable).to.equal(false);
4515
+ expect(result.statusCode).to.equal(301);
4516
+ });
4517
+ });
4518
+
4519
+ describe('Private host rejection', () => {
4520
+ it('returns blocked:null without probing for private IP host', async () => {
4521
+ const privateSite = { getId: () => 'p1', getBaseURL: () => 'http://192.168.1.1' };
4522
+ const result = await esmockClient.checkWafConnectivity(privateSite);
4523
+ expect(result.blocked).to.equal(null);
4524
+ expect(tracingFetchStub).to.not.have.been.called;
4525
+ });
4526
+ });
4527
+ });
4528
+
4341
4529
  describe('deployToEdge', () => {
4342
4530
  let deploySuggestionsStub;
4343
4531
  let fetchMetaconfigStub;
@@ -0,0 +1,197 @@
1
+ /*
2
+ * Copyright 2026 Adobe. All rights reserved.
3
+ * This file is licensed to you under the Apache License, Version 2.0 (the "License");
4
+ * you may not use this file except in compliance with the License. You may obtain a copy
5
+ * of the License at http://www.apache.org/licenses/LICENSE-2.0
6
+ *
7
+ * Unless required by applicable law or agreed to in writing, software distributed under
8
+ * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
9
+ * OF ANY KIND, either express or implied. See the License for the specific language
10
+ * governing permissions and limitations under the License.
11
+ */
12
+
13
+ import { expect } from 'chai';
14
+ import sinon from 'sinon';
15
+ import {
16
+ classifyProbeResponse,
17
+ BOT_CHALLENGE_KEYWORDS,
18
+ HARD_BLOCK_STATUS_CODES,
19
+ PRIVATE_HOST_RE,
20
+ WAF_PROBE_TIMEOUT_MS,
21
+ EDGE_OPTIMIZE_PROXY_BASE_URL_DEFAULT,
22
+ } from '../../src/utils/waf-probe-utils.js';
23
+
24
+ function makeResponse(status, headers = {}, body = '') {
25
+ return {
26
+ status,
27
+ headers: { get: (name) => headers[name.toLowerCase()] ?? null },
28
+ text: async () => body,
29
+ };
30
+ }
31
+
32
+ describe('waf-probe-utils', () => {
33
+ let log;
34
+
35
+ beforeEach(() => {
36
+ log = { info: sinon.stub() };
37
+ });
38
+
39
+ // ── Exported constants ──────────────────────────────────────────────────────
40
+
41
+ describe('constants', () => {
42
+ it('exports the correct proxy base URL', () => {
43
+ expect(EDGE_OPTIMIZE_PROXY_BASE_URL_DEFAULT).to.equal('https://live.edgeoptimize.net');
44
+ });
45
+
46
+ it('exports the correct timeout', () => {
47
+ expect(WAF_PROBE_TIMEOUT_MS).to.equal(15000);
48
+ });
49
+
50
+ it('HARD_BLOCK_STATUS_CODES covers expected codes', () => {
51
+ const hardCodes = [401, 403, 406, 429, 503];
52
+ expect(hardCodes.every((code) => HARD_BLOCK_STATUS_CODES.has(code))).to.be.true;
53
+ expect(HARD_BLOCK_STATUS_CODES.has(200)).to.be.false;
54
+ });
55
+
56
+ it('PRIVATE_HOST_RE blocks loopback, link-local, and RFC1918 ranges', () => {
57
+ ['localhost', '127.0.0.1', '10.0.0.1', '192.168.1.1', '172.16.0.1', '169.254.1.1'].forEach(
58
+ (host) => expect(PRIVATE_HOST_RE.test(host), host).to.be.true,
59
+ );
60
+ expect(PRIVATE_HOST_RE.test('example.com')).to.be.false;
61
+ });
62
+
63
+ it('BOT_CHALLENGE_KEYWORDS are all vendor-specific identifiers', () => {
64
+ expect(BOT_CHALLENGE_KEYWORDS).to.be.an('array').with.length.above(0);
65
+ // Broad natural-language terms must not appear — they cause false positives
66
+ ['challenge', 'captcha', 'access denied'].forEach(
67
+ (broad) => expect(BOT_CHALLENGE_KEYWORDS).to.not.include(broad),
68
+ );
69
+ });
70
+ });
71
+
72
+ // ── classifyProbeResponse ───────────────────────────────────────────────────
73
+
74
+ describe('classifyProbeResponse', () => {
75
+ describe('hard block status codes', () => {
76
+ [401, 403, 406, 429, 503].forEach((code) => {
77
+ it(`classifies HTTP ${code} as blocked`, async () => {
78
+ const result = await classifyProbeResponse(makeResponse(code), 'example.com', log);
79
+ expect(result).to.deep.equal({ reachable: false, blocked: true, statusCode: code });
80
+ });
81
+ });
82
+ });
83
+
84
+ describe('Cloudflare active challenge', () => {
85
+ it('classifies cf-mitigated: challenge header as blocked on 200', async () => {
86
+ const response = makeResponse(200, { 'cf-mitigated': 'challenge' });
87
+ const result = await classifyProbeResponse(response, 'example.com', log);
88
+ expect(result).to.deep.equal({ reachable: false, blocked: true, statusCode: 200 });
89
+ });
90
+
91
+ it('classifies cf-mitigated: challenge header as blocked on non-block status', async () => {
92
+ const response = makeResponse(202, { 'cf-mitigated': 'challenge' });
93
+ const result = await classifyProbeResponse(response, 'example.com', log);
94
+ expect(result).to.deep.equal({ reachable: false, blocked: true, statusCode: 202 });
95
+ });
96
+ });
97
+
98
+ describe('soft block — vendor keyword detection', () => {
99
+ [
100
+ ['Cloudflare widget class', 'cf-chl-widget', '<div class="cf-chl-widget"></div>'],
101
+ ['Cloudflare challenge phrase', 'completing the challenge', 'Please completing the challenge to continue'],
102
+ ['Imperva artifact', '_incapsula_resource', 'window._incapsula_resource={}'],
103
+ ['Akamai edgesuite domain', 'errors.edgesuite.net', 'See errors.edgesuite.net for details'],
104
+ ['Akamai edgekey domain', 'errors.edgekey.net', 'See errors.edgekey.net for details'],
105
+ ].forEach(([label, , body]) => {
106
+ it(`detects soft block: ${label}`, async () => {
107
+ const response = makeResponse(200, { 'content-type': 'text/html' }, body);
108
+ const result = await classifyProbeResponse(response, 'example.com', log);
109
+ expect(result).to.deep.equal({ reachable: false, blocked: true, statusCode: 200 });
110
+ });
111
+ });
112
+
113
+ it('is case-insensitive for keyword matching', async () => {
114
+ const response = makeResponse(200, { 'content-type': 'text/html' }, 'CF-CHL-WIDGET visible');
115
+ const result = await classifyProbeResponse(response, 'example.com', log);
116
+ expect(result).to.deep.equal({ reachable: false, blocked: true, statusCode: 200 });
117
+ });
118
+ });
119
+
120
+ describe('false positive prevention — real content must not trigger soft block', () => {
121
+ [
122
+ ['reCAPTCHA script tag', '<script src="recaptcha/api.js">'],
123
+ ['legitimate captcha link text', 'Complete the CAPTCHA to prove you are human'],
124
+ ['marketing copy with challenge', 'This challenge will test your creativity'],
125
+ ['access denied in content', '<p>Access denied to premium content without subscription</p>'],
126
+ ['JSON non-HTML response', '{"status":"ok"}'],
127
+ ['plain text non-HTML', 'Hello world'],
128
+ ].forEach(([label, body]) => {
129
+ it(`passes clean: ${label}`, async () => {
130
+ const response = makeResponse(200, { 'content-type': 'text/html' }, body);
131
+ const result = await classifyProbeResponse(response, 'example.com', log);
132
+ expect(result).to.deep.equal({ reachable: true, blocked: false, statusCode: 200 });
133
+ });
134
+ });
135
+
136
+ it('skips body scan for non-HTML content types', async () => {
137
+ const response = makeResponse(200, { 'content-type': 'application/json' }, 'cf-chl-widget');
138
+ const result = await classifyProbeResponse(response, 'example.com', log);
139
+ expect(result).to.deep.equal({ reachable: true, blocked: false, statusCode: 200 });
140
+ });
141
+
142
+ it('skips body scan when content-type header is absent', async () => {
143
+ const response = makeResponse(200, {}, 'cf-chl-widget');
144
+ const result = await classifyProbeResponse(response, 'example.com', log);
145
+ expect(result).to.deep.equal({ reachable: true, blocked: false, statusCode: 200 });
146
+ });
147
+ });
148
+
149
+ describe('clean pass', () => {
150
+ [200, 201, 204].forEach((code) => {
151
+ it(`classifies HTTP ${code} clean HTML as reachable`, async () => {
152
+ const response = makeResponse(code, { 'content-type': 'text/html' }, '<h1>Welcome</h1>');
153
+ const result = await classifyProbeResponse(response, 'example.com', log);
154
+ expect(result).to.deep.equal({ reachable: true, blocked: false, statusCode: code });
155
+ });
156
+ });
157
+ });
158
+
159
+ describe('unexpected / redirect status codes', () => {
160
+ [301, 302, 307].forEach((code) => {
161
+ it(`classifies HTTP ${code} as not reachable, not blocked`, async () => {
162
+ const result = await classifyProbeResponse(makeResponse(code), 'example.com', log);
163
+ expect(result).to.deep.equal({ reachable: false, blocked: false, statusCode: code });
164
+ });
165
+ });
166
+ });
167
+
168
+ describe('logging', () => {
169
+ it('logs hard block with status code', async () => {
170
+ await classifyProbeResponse(makeResponse(403), 'example.com', log);
171
+ expect(log.info.calledWithMatch('[edge-optimize-probe] Hard block for example.com: HTTP 403')).to.be.true;
172
+ });
173
+
174
+ it('logs Cloudflare challenge with header info', async () => {
175
+ await classifyProbeResponse(makeResponse(200, { 'cf-mitigated': 'challenge' }), 'example.com', log);
176
+ expect(log.info.calledWithMatch('cf-mitigated: challenge')).to.be.true;
177
+ });
178
+
179
+ it('logs soft block with status code', async () => {
180
+ const response = makeResponse(200, { 'content-type': 'text/html' }, 'cf-chl-widget');
181
+ await classifyProbeResponse(response, 'example.com', log);
182
+ expect(log.info.calledWithMatch('[edge-optimize-probe] Soft block')).to.be.true;
183
+ });
184
+
185
+ it('logs clean pass with status code', async () => {
186
+ const response = makeResponse(200, { 'content-type': 'text/html' }, '<h1>OK</h1>');
187
+ await classifyProbeResponse(response, 'example.com', log);
188
+ expect(log.info.calledWithMatch('[edge-optimize-probe] Clean pass for example.com: HTTP 200')).to.be.true;
189
+ });
190
+
191
+ it('logs unexpected status', async () => {
192
+ await classifyProbeResponse(makeResponse(302), 'example.com', log);
193
+ expect(log.info.calledWithMatch('[edge-optimize-probe] Unexpected status for example.com: HTTP 302')).to.be.true;
194
+ });
195
+ });
196
+ });
197
+ });