is-antibot 0.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +39 -0
  2. package/package.json +18 -2
  3. package/src/index.js +222 -17
package/README.md CHANGED
@@ -9,6 +9,28 @@
9
9
 
10
10
  > Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, and more.
11
11
 
12
+ ## Supported Providers
13
+
14
+ ### Anti-Bot Systems
15
+
16
+ - **CloudFlare** - Bot management and challenge pages
17
+ - **Vercel** - Attack mode protection
18
+ - **Akamai** - Bot Manager and Web Application Protector
19
+ - **DataDome** - Bot protection with CAPTCHA challenges
20
+ - **PerimeterX** - Behavioral bot detection
21
+ - **Shape Security** - Enterprise bot management
22
+ - **Kasada** - Advanced bot mitigation
23
+ - **Imperva/Incapsula** - Web application firewall
24
+ - **AWS WAF** - Amazon Web Services Web Application Firewall
25
+
26
+ ### CAPTCHA Providers
27
+
28
+ - **reCAPTCHA** - Google's CAPTCHA service (v2 and v3)
29
+ - **hCaptcha** - Privacy-focused CAPTCHA alternative
30
+ - **FunCaptcha** - Arkose Labs interactive challenges
31
+ - **GeeTest** - AI-powered CAPTCHA
32
+ - **Cloudflare Turnstile** - Privacy-preserving CAPTCHA alternative
33
+
12
34
  ## Why
13
35
 
14
36
  Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
@@ -40,6 +62,23 @@ if (detected) {
40
62
 
41
63
  The library expects a [Fetch Response](https://developer.mozilla.org/en-US/docs/Web/API/Response) object, a [Node.js Response](https://nodejs.org/api/http.html#class-httpincomingmessage) object, or an object representing HTTP response headers as input.
42
64
 
65
+ You can also pass optional `body` and `url` parameters for enhanced detection:
66
+
67
+ ```js
68
+ const result = isAntibot({
69
+ headers: response.headers,
70
+ body: await response.text(),
71
+ url: response.url
72
+ })
73
+ ```
74
+
75
+ ### Response
76
+
77
+ The library returns an object with the following properties:
78
+
79
+ - `detected` (boolean): Whether an antibot challenge was detected
80
+ - `provider` (string|null): The name of the detected provider (e.g., 'cloudflare', 'recaptcha')
81
+
43
82
  ## License
44
83
 
45
84
  **is-antibot** © [microlink.io](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/is-antibot/blob/master/LICENSE.md) License.<br>
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "is-antibot",
3
- "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, and more.",
3
+ "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
4
4
  "homepage": "https://github.com/microlinkhq/is-antibot",
5
- "version": "0.0.5",
5
+ "version": "1.1.0",
6
6
  "exports": {
7
7
  ".": "./src/index.js"
8
8
  },
@@ -15,6 +15,10 @@
15
15
  {
16
16
  "name": "Kiko Beats",
17
17
  "email": "josefrancisco.verdu@gmail.com"
18
+ },
19
+ {
20
+ "name": "Copilot",
21
+ "email": "198982749+Copilot@users.noreply.github.com"
18
22
  }
19
23
  ],
20
24
  "repository": {
@@ -27,14 +31,26 @@
27
31
  "keywords": [
28
32
  "akamai",
29
33
  "antibot",
34
+ "arkose",
35
+ "aws-waf",
30
36
  "bot",
31
37
  "captcha",
32
38
  "challenge",
33
39
  "cloudflare",
34
40
  "datadome",
35
41
  "detection",
42
+ "funcaptcha",
43
+ "geetest",
44
+ "hcaptcha",
45
+ "imperva",
46
+ "incapsula",
47
+ "kasada",
48
+ "perimeterx",
49
+ "recaptcha",
36
50
  "scraper",
37
51
  "scraping",
52
+ "shapesecurity",
53
+ "turnstile",
38
54
  "vercel",
39
55
  "waf"
40
56
  ],
package/src/index.js CHANGED
@@ -5,38 +5,243 @@ const debug = require('debug-logfmt')('is-antibot')
5
5
  const getHeader = (headers, name) =>
6
6
  typeof headers.get === 'function' ? headers.get(name) : headers[name]
7
7
 
8
- module.exports = ({ headers = {} } = {}) => {
9
- let detected = false
10
- let provider = null
8
+ const testPattern = (value, pattern, isRegex = false) => {
9
+ if (!value) return false
10
+ if (isRegex) {
11
+ try {
12
+ return new RegExp(pattern, 'i').test(value)
13
+ } catch {
14
+ return false
15
+ }
16
+ }
17
+ return value.toLowerCase().includes(pattern.toLowerCase())
18
+ }
19
+
20
+ const createResult = (detected, provider) => {
21
+ debug({ detected, provider })
22
+ return { detected, provider }
23
+ }
11
24
 
12
- // https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
25
+ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
26
+ // CloudFlare: Check for cf-mitigated header with 'challenge' value
27
+ // Official docs: https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
13
28
  if (getHeader(headers, 'cf-mitigated') === 'challenge') {
14
- detected = true
15
- provider = 'cloudflare'
29
+ return createResult(true, 'cloudflare')
16
30
  }
17
31
 
18
- // https://github.com/glizzykingdreko/Vercel-Attack-Mode-Solver
32
+ // Vercel: Check for x-vercel-mitigated header with 'challenge' value
33
+ // Solver reference: https://github.com/glizzykingdreko/Vercel-Attack-Mode-Solver
19
34
  if (getHeader(headers, 'x-vercel-mitigated') === 'challenge') {
20
- detected = true
21
- provider = 'vercel'
35
+ return createResult(true, 'vercel')
22
36
  }
23
37
 
24
- // https://techdocs.akamai.com/property-mgr/docs/return-cache-status
38
+ // Akamai: Check for akamai-cache-status header starting with 'Error'
39
+ // Official docs: https://techdocs.akamai.com/property-mgr/docs/return-cache-status
25
40
  if (getHeader(headers, 'akamai-cache-status')?.startsWith('Error')) {
26
- detected = true
27
- provider = 'akamai'
41
+ return createResult(true, 'akamai')
42
+ }
43
+
44
+ // Akamai: Check for additional identifying headers (akamai-grn, x-akamai-session-info)
45
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/akamai.json
46
+ if (
47
+ getHeader(headers, 'akamai-grn') ||
48
+ getHeader(headers, 'x-akamai-session-info')
49
+ ) {
50
+ return createResult(true, 'akamai')
28
51
  }
29
52
 
30
- // https://docs.datadome.co/reference/validate-request
53
+ // DataDome: Check for x-dd-b header with values '1' (soft challenge) or '2' (hard challenge/CAPTCHA)
54
+ // Official docs: https://docs.datadome.co/reference/validate-request
31
55
  // 1: Soft challenge / JS redirect / interstitial
32
56
  // 2: Hard challenge / HTML redirect / CAPTCHA
33
57
  if (['1', '2'].includes(getHeader(headers, 'x-dd-b'))) {
34
- detected = true
35
- provider = 'datadome'
58
+ return createResult(true, 'datadome')
36
59
  }
37
60
 
38
- debug({ detected, provider })
39
- return { detected, provider }
61
+ // DataDome: Check for x-datadome header presence
62
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/datadome.json
63
+ if (getHeader(headers, 'x-datadome')) {
64
+ return createResult(true, 'datadome')
65
+ }
66
+
67
+ // PerimeterX: Check for X-PX-Authorization header (primary indicator)
68
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/perimeterx.json#L71-L84
69
+ if (getHeader(headers, 'x-px-authorization')) {
70
+ return createResult(true, 'perimeterx')
71
+ }
72
+
73
+ // PerimeterX: Check for window._pxAppId in body (JavaScript initialization)
74
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/perimeterx.json#L130-L137
75
+ if (body && testPattern(body, 'window._pxAppId')) {
76
+ return createResult(true, 'perimeterx')
77
+ }
78
+
79
+ // Shape Security: Check for dynamic header patterns x-[8chars]-[abcdfz]
80
+ // These headers use 8 random characters followed by suffixes like -a, -b, -c, -d, -f, or -z
81
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/shapesecurity.json#L30-L113
82
+ const headerNames = Object.keys(headers)
83
+ for (const name of headerNames) {
84
+ if (/^x-[a-z0-9]{8}-[abcdfz]$/i.test(name)) {
85
+ return createResult(true, 'shapesecurity')
86
+ }
87
+ }
88
+
89
+ // Shape Security: Check for 'shapesecurity' text in response body
90
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/shapesecurity.json#L136-L142
91
+ if (body && testPattern(body, 'shapesecurity')) {
92
+ return createResult(true, 'shapesecurity')
93
+ }
94
+
95
+ // Kasada: Check for x-kasada or x-kasada-challenge headers
96
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/kasada.json#L57-L85
97
+ if (
98
+ getHeader(headers, 'x-kasada') ||
99
+ getHeader(headers, 'x-kasada-challenge')
100
+ ) {
101
+ return createResult(true, 'kasada')
102
+ }
103
+
104
+ // Kasada: Check for __kasada global object or kasada.js script in body
105
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/kasada.json#L117-L144
106
+ if (
107
+ body &&
108
+ (testPattern(body, '__kasada') || testPattern(body, 'kasada.js'))
109
+ ) {
110
+ return createResult(true, 'kasada')
111
+ }
112
+
113
+ // Imperva/Incapsula: Check for x-cdn header with 'Incapsula' value or x-iinfo header
114
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/incapsula.json#L86-L109
115
+ if (
116
+ getHeader(headers, 'x-cdn') === 'Incapsula' ||
117
+ getHeader(headers, 'x-iinfo')
118
+ ) {
119
+ return createResult(true, 'imperva')
120
+ }
121
+
122
+ // Imperva/Incapsula: Check for 'incapsula' or 'imperva' text in response body
123
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/incapsula.json#L111-L124
124
+ if (
125
+ body &&
126
+ (testPattern(body, 'incapsula') || testPattern(body, 'imperva'))
127
+ ) {
128
+ return createResult(true, 'imperva')
129
+ }
130
+
131
+ // reCAPTCHA: Check for recaptcha/api or google.com/recaptcha in URL
132
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/recaptcha.json#L13-L48
133
+ if (
134
+ url &&
135
+ (testPattern(url, 'recaptcha/api') ||
136
+ testPattern(url, 'google\\.com/recaptcha', true))
137
+ ) {
138
+ return createResult(true, 'recaptcha')
139
+ }
140
+
141
+ // reCAPTCHA: Check for grecaptcha global object in body (primary JavaScript indicator)
142
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/recaptcha.json#L51-L58
143
+ if (body && testPattern(body, 'grecaptcha')) {
144
+ return createResult(true, 'recaptcha')
145
+ }
146
+
147
+ // reCAPTCHA: Check for g-recaptcha container class in body
148
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/recaptcha.json#L66-L73
149
+ if (body && testPattern(body, 'g-recaptcha')) {
150
+ return createResult(true, 'recaptcha')
151
+ }
152
+
153
+ // hCaptcha: Check for hcaptcha.com domain in URL
154
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/hcaptcha.json#L13-L22
155
+ if (url && testPattern(url, 'hcaptcha\\.com', true)) {
156
+ return createResult(true, 'hcaptcha')
157
+ }
158
+
159
+ // hCaptcha: Check for hcaptcha object in body
160
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/hcaptcha.json#L42-L50
161
+ if (body && testPattern(body, 'hcaptcha')) {
162
+ return createResult(true, 'hcaptcha')
163
+ }
164
+
165
+ // hCaptcha: Check for h-captcha container class in body
166
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/hcaptcha.json#L51-L58
167
+ if (body && testPattern(body, 'h-captcha')) {
168
+ return createResult(true, 'hcaptcha')
169
+ }
170
+
171
+ // FunCaptcha (Arkose Labs): Check for arkoselabs.com or funcaptcha in URL
172
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/funcaptcha.json#L13-L40
173
+ if (
174
+ url &&
175
+ (testPattern(url, 'arkoselabs\\.com', true) ||
176
+ testPattern(url, 'funcaptcha'))
177
+ ) {
178
+ return createResult(true, 'funcaptcha')
179
+ }
180
+
181
+ // FunCaptcha (Arkose Labs): Check for funcaptcha or arkose text in body
182
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/funcaptcha.json#L42-L55
183
+ if (
184
+ body &&
185
+ (testPattern(body, 'funcaptcha') || testPattern(body, 'arkose'))
186
+ ) {
187
+ return createResult(true, 'funcaptcha')
188
+ }
189
+
190
+ // GeeTest: Check for geetest.com domain in URL
191
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/geetest.json#L13-L43
192
+ if (url && testPattern(url, 'geetest\\.com', true)) {
193
+ return createResult(true, 'geetest')
194
+ }
195
+
196
+ // GeeTest: Check for geetest object or text in body
197
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/geetest.json#L45-L52
198
+ if (body && testPattern(body, 'geetest')) {
199
+ return createResult(true, 'geetest')
200
+ }
201
+
202
+ // GeeTest: Check for gt.js script in body
203
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/geetest.json#L53-L60
204
+ if (body && testPattern(body, 'gt.js')) {
205
+ return createResult(true, 'geetest')
206
+ }
207
+
208
+ // Cloudflare Turnstile: Check for challenges.cloudflare.com/turnstile in URL
209
+ // Turnstile is Cloudflare's CAPTCHA alternative with privacy focus
210
+ if (
211
+ url &&
212
+ testPattern(url, 'challenges\\.cloudflare\\.com/turnstile', true)
213
+ ) {
214
+ return createResult(true, 'cloudflare-turnstile')
215
+ }
216
+
217
+ // Cloudflare Turnstile: Check for cf-turnstile class in body (primary indicator)
218
+ if (body && testPattern(body, 'cf-turnstile')) {
219
+ return createResult(true, 'cloudflare-turnstile')
220
+ }
221
+
222
+ // Cloudflare Turnstile: Check for turnstile text in body (secondary indicator)
223
+ if (body && testPattern(body, 'turnstile')) {
224
+ return createResult(true, 'cloudflare-turnstile')
225
+ }
226
+
227
+ // AWS WAF: Check for x-amzn-waf-action or x-amzn-requestid headers
228
+ // These headers are set by AWS WAF when bot control rules are triggered
229
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/aws-waf.json
230
+ if (
231
+ getHeader(headers, 'x-amzn-waf-action') ||
232
+ getHeader(headers, 'x-amzn-requestid')
233
+ ) {
234
+ return createResult(true, 'aws-waf')
235
+ }
236
+
237
+ // AWS WAF: Check for aws-waf text in body (challenge page indicator)
238
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/aws-waf.json#L47-L73
239
+ if (body && testPattern(body, 'aws-waf')) {
240
+ return createResult(true, 'aws-waf')
241
+ }
242
+
243
+ return createResult(false, null)
40
244
  }
41
245
 
42
246
  module.exports.debug = debug
247
+ module.exports.testPattern = testPattern