is-antibot 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -1
  2. package/src/index.js +195 -15
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "is-antibot",
3
3
  "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
4
4
  "homepage": "https://github.com/microlinkhq/is-antibot",
5
- "version": "1.1.0",
5
+ "version": "1.3.0",
6
6
  "exports": {
7
7
  ".": "./src/index.js"
8
8
  },
@@ -55,6 +55,7 @@
55
55
  "waf"
56
56
  ],
57
57
  "dependencies": {
58
+ "cookie-es": "~3.0.1",
58
59
  "debug-logfmt": "~1.4.7"
59
60
  },
60
61
  "devDependencies": {
package/src/index.js CHANGED
@@ -1,5 +1,6 @@
1
1
  'use strict'
2
2
 
3
+ const { splitSetCookieString } = require('cookie-es')
3
4
  const debug = require('debug-logfmt')('is-antibot')
4
5
 
5
6
  const getHeader = (headers, name) =>
@@ -22,6 +23,11 @@ const createResult = (detected, provider) => {
22
23
  return { detected, provider }
23
24
  }
24
25
 
26
+ const testSetCookie = (headers, pattern) => {
27
+ const cookiesString = getHeader(headers, 'set-cookie')
28
+ return splitSetCookieString(cookiesString).some(c => c.startsWith(pattern))
29
+ }
30
+
25
31
  module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
26
32
  // CloudFlare: Check for cf-mitigated header with 'challenge' value
27
33
  // Official docs: https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
@@ -29,6 +35,11 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
29
35
  return createResult(true, 'cloudflare')
30
36
  }
31
37
 
38
+ // Cloudflare: cf_clearance cookie indicates Cloudflare challenge flow
39
+ if (testSetCookie(headers, 'cf_clearance=')) {
40
+ return createResult(true, 'cloudflare')
41
+ }
42
+
32
43
  // Vercel: Check for x-vercel-mitigated header with 'challenge' value
33
44
  // Solver reference: https://github.com/glizzykingdreko/Vercel-Attack-Mode-Solver
34
45
  if (getHeader(headers, 'x-vercel-mitigated') === 'challenge') {
@@ -50,17 +61,30 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
50
61
  return createResult(true, 'akamai')
51
62
  }
52
63
 
64
+ // Akamai: _abck bot manager tracking cookie
65
+ if (testSetCookie(headers, '_abck=')) {
66
+ return createResult(true, 'akamai')
67
+ }
68
+
69
+ // Akamai: Bot Manager API namespace (bmak) in body
70
+ if (body && testPattern(body, 'bmak.')) {
71
+ return createResult(true, 'akamai')
72
+ }
73
+
53
74
  // DataDome: Check for x-dd-b header with values '1' (soft challenge) or '2' (hard challenge/CAPTCHA)
54
75
  // Official docs: https://docs.datadome.co/reference/validate-request
55
- // 1: Soft challenge / JS redirect / interstitial
56
- // 2: Hard challenge / HTML redirect / CAPTCHA
57
76
  if (['1', '2'].includes(getHeader(headers, 'x-dd-b'))) {
58
77
  return createResult(true, 'datadome')
59
78
  }
60
79
 
61
- // DataDome: Check for x-datadome header presence
80
+ // DataDome: Check for x-datadome or x-datadome-cid header presence
62
81
  // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/datadome.json
63
- if (getHeader(headers, 'x-datadome')) {
82
+ if (getHeader(headers, 'x-datadome') || getHeader(headers, 'x-datadome-cid')) {
83
+ return createResult(true, 'datadome')
84
+ }
85
+
86
+ // DataDome: datadome tracking cookie
87
+ if (testSetCookie(headers, 'datadome=')) {
64
88
  return createResult(true, 'datadome')
65
89
  }
66
90
 
@@ -70,9 +94,19 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
70
94
  return createResult(true, 'perimeterx')
71
95
  }
72
96
 
73
- // PerimeterX: Check for window._pxAppId in body (JavaScript initialization)
97
+ // PerimeterX: Check for window._pxAppId, pxInit, or _pxAction in body
74
98
  // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/perimeterx.json#L130-L137
75
- if (body && testPattern(body, 'window._pxAppId')) {
99
+ if (
100
+ body &&
101
+ (testPattern(body, 'window._pxAppId') ||
102
+ testPattern(body, 'pxInit') ||
103
+ testPattern(body, '_pxAction'))
104
+ ) {
105
+ return createResult(true, 'perimeterx')
106
+ }
107
+
108
+ // PerimeterX: _px3 or _pxhd cookies
109
+ if (testSetCookie(headers, '_px3=') || testSetCookie(headers, '_pxhd=')) {
76
110
  return createResult(true, 'perimeterx')
77
111
  }
78
112
 
@@ -128,12 +162,95 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
128
162
  return createResult(true, 'imperva')
129
163
  }
130
164
 
131
- // reCAPTCHA: Check for recaptcha/api or google.com/recaptcha in URL
165
+ // Imperva/Incapsula: incap_ses_, visid_incap_, or reese84 cookies
166
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/incapsula.json
167
+ if (
168
+ testSetCookie(headers, 'incap_ses_') ||
169
+ testSetCookie(headers, 'visid_incap_') ||
170
+ testSetCookie(headers, 'reese84=')
171
+ ) {
172
+ return createResult(true, 'imperva')
173
+ }
174
+
175
+ // Reblaze: rbzid or rbzsessionid cookies
176
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/reblaze.json
177
+ if (
178
+ testSetCookie(headers, 'rbzid=') ||
179
+ testSetCookie(headers, 'rbzsessionid=')
180
+ ) {
181
+ return createResult(true, 'reblaze')
182
+ }
183
+
184
+ // Reblaze: Check for 'reblaze' text in response body
185
+ if (body && testPattern(body, 'reblaze')) {
186
+ return createResult(true, 'reblaze')
187
+ }
188
+
189
+ // Cheq: Check for CheqSdk or cheqzone.com in body
190
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/cheq.json
191
+ if (
192
+ body &&
193
+ (testPattern(body, 'CheqSdk') || testPattern(body, 'cheqzone.com'))
194
+ ) {
195
+ return createResult(true, 'cheq')
196
+ }
197
+
198
+ // Cheq: Check for cheqzone.com or cheq.ai in URL
199
+ if (
200
+ url &&
201
+ (testPattern(url, 'cheqzone\\.com', true) ||
202
+ testPattern(url, 'cheq\\.ai', true))
203
+ ) {
204
+ return createResult(true, 'cheq')
205
+ }
206
+
207
+ // Sucuri: Check for 'sucuri' text in response body
208
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/sucuri.json
209
+ if (body && testPattern(body, 'sucuri')) {
210
+ return createResult(true, 'sucuri')
211
+ }
212
+
213
+ // ThreatMetrix: Check for 'ThreatMetrix' in body
214
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/threatmetrix.json
215
+ if (body && testPattern(body, 'ThreatMetrix')) {
216
+ return createResult(true, 'threatmetrix')
217
+ }
218
+
219
+ // ThreatMetrix: Check for fp/check.js fingerprint endpoint in URL
220
+ if (url && testPattern(url, 'fp/check.js')) {
221
+ return createResult(true, 'threatmetrix')
222
+ }
223
+
224
+ // Meetrics: Check for 'meetrics' text in response body
225
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/meetrics.json
226
+ if (body && testPattern(body, 'meetrics')) {
227
+ return createResult(true, 'meetrics')
228
+ }
229
+
230
+ // Meetrics: Check for meetrics.com in URL
231
+ if (url && testPattern(url, 'meetrics\\.com', true)) {
232
+ return createResult(true, 'meetrics')
233
+ }
234
+
235
+ // Ocule: Check for ocule.co.uk in body
236
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/ocule.json
237
+ if (body && testPattern(body, 'ocule.co.uk')) {
238
+ return createResult(true, 'ocule')
239
+ }
240
+
241
+ // Ocule: Check for ocule.co.uk in URL
242
+ if (url && testPattern(url, 'ocule\\.co\\.uk', true)) {
243
+ return createResult(true, 'ocule')
244
+ }
245
+
246
+ // reCAPTCHA: Check for recaptcha/api, google.com/recaptcha, gstatic.com/recaptcha, or recaptcha.net in URL
132
247
  // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/recaptcha.json#L13-L48
133
248
  if (
134
249
  url &&
135
250
  (testPattern(url, 'recaptcha/api') ||
136
- testPattern(url, 'google\\.com/recaptcha', true))
251
+ testPattern(url, 'google\\.com/recaptcha', true) ||
252
+ testPattern(url, 'gstatic.com/recaptcha') ||
253
+ testPattern(url, 'recaptcha.net'))
137
254
  ) {
138
255
  return createResult(true, 'recaptcha')
139
256
  }
@@ -206,7 +323,6 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
206
323
  }
207
324
 
208
325
  // Cloudflare Turnstile: Check for challenges.cloudflare.com/turnstile in URL
209
- // Turnstile is Cloudflare's CAPTCHA alternative with privacy focus
210
326
  if (
211
327
  url &&
212
328
  testPattern(url, 'challenges\\.cloudflare\\.com/turnstile', true)
@@ -214,18 +330,77 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
214
330
  return createResult(true, 'cloudflare-turnstile')
215
331
  }
216
332
 
217
- // Cloudflare Turnstile: Check for cf-turnstile class in body (primary indicator)
333
+ // Cloudflare Turnstile: Check for cf-turnstile class in body
218
334
  if (body && testPattern(body, 'cf-turnstile')) {
219
335
  return createResult(true, 'cloudflare-turnstile')
220
336
  }
221
337
 
222
- // Cloudflare Turnstile: Check for turnstile text in body (secondary indicator)
338
+ // Cloudflare Turnstile: Check for turnstile text in body
223
339
  if (body && testPattern(body, 'turnstile')) {
224
340
  return createResult(true, 'cloudflare-turnstile')
225
341
  }
226
342
 
343
+ // Friendly Captcha: Check for friendlycaptcha.com in URL
344
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/friendlycaptcha.json
345
+ if (url && testPattern(url, 'friendlycaptcha\\.com', true)) {
346
+ return createResult(true, 'friendly-captcha')
347
+ }
348
+
349
+ // Friendly Captcha: Check for frc-captcha container or friendlyChallenge object in body
350
+ if (
351
+ body &&
352
+ (testPattern(body, 'frc-captcha') ||
353
+ testPattern(body, 'friendlyChallenge'))
354
+ ) {
355
+ return createResult(true, 'friendly-captcha')
356
+ }
357
+
358
+ // Captcha.eu: Check for captcha.eu in URL
359
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/captchaeu.json
360
+ if (url && testPattern(url, 'captcha\\.eu', true)) {
361
+ return createResult(true, 'captcha-eu')
362
+ }
363
+
364
+ // Captcha.eu: Check for CaptchaEU or captchaeu in body
365
+ if (
366
+ body &&
367
+ (testPattern(body, 'CaptchaEU') || testPattern(body, 'captchaeu'))
368
+ ) {
369
+ return createResult(true, 'captcha-eu')
370
+ }
371
+
372
+ // QCloud Captcha (Tencent): Check for turing.captcha.qcloud.com in URL
373
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/qcloud.json
374
+ if (url && testPattern(url, 'turing\\.captcha\\.qcloud\\.com', true)) {
375
+ return createResult(true, 'qcloud-captcha')
376
+ }
377
+
378
+ // QCloud Captcha: Check for TencentCaptcha or turing.captcha in body
379
+ if (
380
+ body &&
381
+ (testPattern(body, 'TencentCaptcha') ||
382
+ testPattern(body, 'turing.captcha'))
383
+ ) {
384
+ return createResult(true, 'qcloud-captcha')
385
+ }
386
+
387
+ // AliExpress CAPTCHA: Check for punish?x5secdata in URL
388
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/aliexpress.json
389
+ if (url && testPattern(url, 'punish\\?x5secdata', true)) {
390
+ return createResult(true, 'aliexpress-captcha')
391
+ }
392
+
393
+ // AliExpress CAPTCHA: Check for x5secdata in body
394
+ if (body && testPattern(body, 'x5secdata')) {
395
+ return createResult(true, 'aliexpress-captcha')
396
+ }
397
+
398
+ // LinkedIn: trkCode=bf cookie ("bot filter") is set when LinkedIn blocks a request
399
+ if (testSetCookie(headers, 'trkCode=bf')) {
400
+ return createResult(true, 'linkedin')
401
+ }
402
+
227
403
  // AWS WAF: Check for x-amzn-waf-action or x-amzn-requestid headers
228
- // These headers are set by AWS WAF when bot control rules are triggered
229
404
  // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/aws-waf.json
230
405
  if (
231
406
  getHeader(headers, 'x-amzn-waf-action') ||
@@ -234,9 +409,13 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
234
409
  return createResult(true, 'aws-waf')
235
410
  }
236
411
 
237
- // AWS WAF: Check for aws-waf text in body (challenge page indicator)
238
- // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/aws-waf.json#L47-L73
239
- if (body && testPattern(body, 'aws-waf')) {
412
+ // AWS WAF: Check for aws-waf or awswaf text in body
413
+ if (body && (testPattern(body, 'aws-waf') || testPattern(body, 'awswaf'))) {
414
+ return createResult(true, 'aws-waf')
415
+ }
416
+
417
+ // AWS WAF: aws-waf-token cookie
418
+ if (testSetCookie(headers, 'aws-waf-token=')) {
240
419
  return createResult(true, 'aws-waf')
241
420
  }
242
421
 
@@ -245,3 +424,4 @@ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
245
424
 
246
425
  module.exports.debug = debug
247
426
  module.exports.testPattern = testPattern
427
+ module.exports.testSetCookie = testSetCookie