is-antibot 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md CHANGED
File without changes
package/README.md CHANGED
@@ -9,6 +9,28 @@
9
9
 
10
10
  > Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, and more.
11
11
 
12
+ ## Supported Providers
13
+
14
+ ### Anti-Bot Systems
15
+
16
+ - **CloudFlare** - Bot management and challenge pages
17
+ - **Vercel** - Attack mode protection
18
+ - **Akamai** - Bot Manager and Web Application Protector
19
+ - **DataDome** - Bot protection with CAPTCHA challenges
20
+ - **PerimeterX** - Behavioral bot detection
21
+ - **Shape Security** - Enterprise bot management
22
+ - **Kasada** - Advanced bot mitigation
23
+ - **Imperva/Incapsula** - Web application firewall
24
+ - **AWS WAF** - Amazon Web Services Web Application Firewall
25
+
26
+ ### CAPTCHA Providers
27
+
28
+ - **reCAPTCHA** - Google's CAPTCHA service (v2 and v3)
29
+ - **hCaptcha** - Privacy-focused CAPTCHA alternative
30
+ - **FunCaptcha** - Arkose Labs interactive challenges
31
+ - **GeeTest** - AI-powered CAPTCHA
32
+ - **Cloudflare Turnstile** - Privacy-preserving CAPTCHA alternative
33
+
12
34
  ## Why
13
35
 
14
36
  Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
@@ -40,6 +62,23 @@ if (detected) {
40
62
 
41
63
  The library expects a [Fetch Response](https://developer.mozilla.org/en-US/docs/Web/API/Response) object, a [Node.js Response](https://nodejs.org/api/http.html#class-httpincomingmessage) object, or an object representing HTTP response headers as input.
42
64
 
65
+ You can also pass optional `body` and `url` parameters for enhanced detection:
66
+
67
+ ```js
68
+ const result = isAntibot({
69
+ headers: response.headers,
70
+ body: await response.text(),
71
+ url: response.url
72
+ })
73
+ ```
74
+
75
+ ### Response
76
+
77
+ The library returns an object with the following properties:
78
+
79
+ - `detected` (boolean): Whether an antibot challenge was detected
80
+ - `provider` (string|null): The name of the detected provider (e.g., 'cloudflare', 'recaptcha')
81
+
43
82
  ## License
44
83
 
45
84
  **is-antibot** © [microlink.io](https://microlink.io), released under the [MIT](https://github.com/microlinkhq/is-antibot/blob/master/LICENSE.md) License.<br>
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "is-antibot",
3
- "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, and more.",
3
+ "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
4
4
  "homepage": "https://github.com/microlinkhq/is-antibot",
5
- "version": "1.0.0",
5
+ "version": "1.2.0",
6
6
  "exports": {
7
7
  ".": "./src/index.js"
8
8
  },
@@ -15,6 +15,10 @@
15
15
  {
16
16
  "name": "Kiko Beats",
17
17
  "email": "josefrancisco.verdu@gmail.com"
18
+ },
19
+ {
20
+ "name": "Copilot",
21
+ "email": "198982749+Copilot@users.noreply.github.com"
18
22
  }
19
23
  ],
20
24
  "repository": {
@@ -27,18 +31,31 @@
27
31
  "keywords": [
28
32
  "akamai",
29
33
  "antibot",
34
+ "arkose",
35
+ "aws-waf",
30
36
  "bot",
31
37
  "captcha",
32
38
  "challenge",
33
39
  "cloudflare",
34
40
  "datadome",
35
41
  "detection",
42
+ "funcaptcha",
43
+ "geetest",
44
+ "hcaptcha",
45
+ "imperva",
46
+ "incapsula",
47
+ "kasada",
48
+ "perimeterx",
49
+ "recaptcha",
36
50
  "scraper",
37
51
  "scraping",
52
+ "shapesecurity",
53
+ "turnstile",
38
54
  "vercel",
39
55
  "waf"
40
56
  ],
41
57
  "dependencies": {
58
+ "cookie-es": "~3.0.1",
42
59
  "debug-logfmt": "~1.4.7"
43
60
  },
44
61
  "devDependencies": {
@@ -63,6 +80,22 @@
63
80
  "files": [
64
81
  "src"
65
82
  ],
83
+ "scripts": {
84
+ "clean": "rm -rf node_modules",
85
+ "contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
86
+ "coverage": "c8 report --reporter=text-lcov > coverage/lcov.info",
87
+ "lint": "standard",
88
+ "postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
89
+ "pretest": "npm run lint",
90
+ "release": "pnpm run release:version && pnpm run release:changelog && pnpm run release:commit && pnpm run release:tag",
91
+ "release:changelog": "conventional-changelog -p conventionalcommits -i CHANGELOG.md -s",
92
+ "release:commit": "git add package.json CHANGELOG.md && git commit -m \"chore(release): $(node -p \"require('./package.json').version\")\"",
93
+ "release:github": "github-generate-release",
94
+ "release:tag": "git tag -a v$(node -p \"require('./package.json').version\") -m \"v$(node -p \"require('./package.json').version\")\"",
95
+ "release:tags": "git push origin HEAD:master --follow-tags",
96
+ "release:version": "standard-version --skip.changelog --skip.commit --skip.tag",
97
+ "test": "c8 ava"
98
+ },
66
99
  "license": "MIT",
67
100
  "ava": {
68
101
  "files": [
@@ -92,21 +125,5 @@
92
125
  "simple-git-hooks": {
93
126
  "commit-msg": "npx commitlint --edit",
94
127
  "pre-commit": "npx nano-staged"
95
- },
96
- "scripts": {
97
- "clean": "rm -rf node_modules",
98
- "contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
99
- "coverage": "c8 report --reporter=text-lcov > coverage/lcov.info",
100
- "lint": "standard",
101
- "postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
102
- "pretest": "npm run lint",
103
- "release": "pnpm run release:version && pnpm run release:changelog && pnpm run release:commit && pnpm run release:tag",
104
- "release:changelog": "conventional-changelog -p conventionalcommits -i CHANGELOG.md -s",
105
- "release:commit": "git add package.json CHANGELOG.md && git commit -m \"chore(release): $(node -p \"require('./package.json').version\")\"",
106
- "release:github": "github-generate-release",
107
- "release:tag": "git tag -a v$(node -p \"require('./package.json').version\") -m \"v$(node -p \"require('./package.json').version\")\"",
108
- "release:tags": "git push origin HEAD:master --follow-tags",
109
- "release:version": "standard-version --skip.changelog --skip.commit --skip.tag",
110
- "test": "c8 ava"
111
128
  }
112
- }
129
+ }
package/src/index.js CHANGED
@@ -1,42 +1,259 @@
1
1
  'use strict'
2
2
 
3
+ const { splitSetCookieString } = require('cookie-es')
3
4
  const debug = require('debug-logfmt')('is-antibot')
4
5
 
5
6
  const getHeader = (headers, name) =>
6
7
  typeof headers.get === 'function' ? headers.get(name) : headers[name]
7
8
 
8
- module.exports = ({ headers = {} } = {}) => {
9
- let detected = false
10
- let provider = null
9
+ const testPattern = (value, pattern, isRegex = false) => {
10
+ if (!value) return false
11
+ if (isRegex) {
12
+ try {
13
+ return new RegExp(pattern, 'i').test(value)
14
+ } catch {
15
+ return false
16
+ }
17
+ }
18
+ return value.toLowerCase().includes(pattern.toLowerCase())
19
+ }
20
+
21
+ const createResult = (detected, provider) => {
22
+ debug({ detected, provider })
23
+ return { detected, provider }
24
+ }
25
+
26
+ const testSetCookie = (headers, pattern) => {
27
+ const cookiesString = getHeader(headers, 'set-cookie')
28
+ return splitSetCookieString(cookiesString).some(c => c.startsWith(pattern))
29
+ }
11
30
 
12
- // https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
31
+ module.exports = ({ headers = {}, body = '', url = '' } = {}) => {
32
+ // CloudFlare: Check for cf-mitigated header with 'challenge' value
33
+ // Official docs: https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
13
34
  if (getHeader(headers, 'cf-mitigated') === 'challenge') {
14
- detected = true
15
- provider = 'cloudflare'
35
+ return createResult(true, 'cloudflare')
16
36
  }
17
37
 
18
- // https://github.com/glizzykingdreko/Vercel-Attack-Mode-Solver
38
+ // Vercel: Check for x-vercel-mitigated header with 'challenge' value
39
+ // Solver reference: https://github.com/glizzykingdreko/Vercel-Attack-Mode-Solver
19
40
  if (getHeader(headers, 'x-vercel-mitigated') === 'challenge') {
20
- detected = true
21
- provider = 'vercel'
41
+ return createResult(true, 'vercel')
22
42
  }
23
43
 
24
- // https://techdocs.akamai.com/property-mgr/docs/return-cache-status
44
+ // Akamai: Check for akamai-cache-status header starting with 'Error'
45
+ // Official docs: https://techdocs.akamai.com/property-mgr/docs/return-cache-status
25
46
  if (getHeader(headers, 'akamai-cache-status')?.startsWith('Error')) {
26
- detected = true
27
- provider = 'akamai'
47
+ return createResult(true, 'akamai')
28
48
  }
29
49
 
30
- // https://docs.datadome.co/reference/validate-request
50
+ // Akamai: Check for additional identifying headers (akamai-grn, x-akamai-session-info)
51
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/akamai.json
52
+ if (
53
+ getHeader(headers, 'akamai-grn') ||
54
+ getHeader(headers, 'x-akamai-session-info')
55
+ ) {
56
+ return createResult(true, 'akamai')
57
+ }
58
+
59
+ // DataDome: Check for x-dd-b header with values '1' (soft challenge) or '2' (hard challenge/CAPTCHA)
60
+ // Official docs: https://docs.datadome.co/reference/validate-request
31
61
  // 1: Soft challenge / JS redirect / interstitial
32
62
  // 2: Hard challenge / HTML redirect / CAPTCHA
33
63
  if (['1', '2'].includes(getHeader(headers, 'x-dd-b'))) {
34
- detected = true
35
- provider = 'datadome'
64
+ return createResult(true, 'datadome')
36
65
  }
37
66
 
38
- debug({ detected, provider })
39
- return { detected, provider }
67
+ // DataDome: Check for x-datadome header presence
68
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/datadome.json
69
+ if (getHeader(headers, 'x-datadome')) {
70
+ return createResult(true, 'datadome')
71
+ }
72
+
73
+ // PerimeterX: Check for X-PX-Authorization header (primary indicator)
74
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/perimeterx.json#L71-L84
75
+ if (getHeader(headers, 'x-px-authorization')) {
76
+ return createResult(true, 'perimeterx')
77
+ }
78
+
79
+ // PerimeterX: Check for window._pxAppId in body (JavaScript initialization)
80
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/perimeterx.json#L130-L137
81
+ if (body && testPattern(body, 'window._pxAppId')) {
82
+ return createResult(true, 'perimeterx')
83
+ }
84
+
85
+ // Shape Security: Check for dynamic header patterns x-[8chars]-[abcdfz]
86
+ // These headers use 8 random characters followed by suffixes like -a, -b, -c, -d, -f, or -z
87
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/shapesecurity.json#L30-L113
88
+ const headerNames = Object.keys(headers)
89
+ for (const name of headerNames) {
90
+ if (/^x-[a-z0-9]{8}-[abcdfz]$/i.test(name)) {
91
+ return createResult(true, 'shapesecurity')
92
+ }
93
+ }
94
+
95
+ // Shape Security: Check for 'shapesecurity' text in response body
96
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/shapesecurity.json#L136-L142
97
+ if (body && testPattern(body, 'shapesecurity')) {
98
+ return createResult(true, 'shapesecurity')
99
+ }
100
+
101
+ // Kasada: Check for x-kasada or x-kasada-challenge headers
102
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/kasada.json#L57-L85
103
+ if (
104
+ getHeader(headers, 'x-kasada') ||
105
+ getHeader(headers, 'x-kasada-challenge')
106
+ ) {
107
+ return createResult(true, 'kasada')
108
+ }
109
+
110
+ // Kasada: Check for __kasada global object or kasada.js script in body
111
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/kasada.json#L117-L144
112
+ if (
113
+ body &&
114
+ (testPattern(body, '__kasada') || testPattern(body, 'kasada.js'))
115
+ ) {
116
+ return createResult(true, 'kasada')
117
+ }
118
+
119
+ // Imperva/Incapsula: Check for x-cdn header with 'Incapsula' value or x-iinfo header
120
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/incapsula.json#L86-L109
121
+ if (
122
+ getHeader(headers, 'x-cdn') === 'Incapsula' ||
123
+ getHeader(headers, 'x-iinfo')
124
+ ) {
125
+ return createResult(true, 'imperva')
126
+ }
127
+
128
+ // Imperva/Incapsula: Check for 'incapsula' or 'imperva' text in response body
129
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/incapsula.json#L111-L124
130
+ if (
131
+ body &&
132
+ (testPattern(body, 'incapsula') || testPattern(body, 'imperva'))
133
+ ) {
134
+ return createResult(true, 'imperva')
135
+ }
136
+
137
+ // reCAPTCHA: Check for recaptcha/api or google.com/recaptcha in URL
138
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/recaptcha.json#L13-L48
139
+ if (
140
+ url &&
141
+ (testPattern(url, 'recaptcha/api') ||
142
+ testPattern(url, 'google\\.com/recaptcha', true))
143
+ ) {
144
+ return createResult(true, 'recaptcha')
145
+ }
146
+
147
+ // reCAPTCHA: Check for grecaptcha global object in body (primary JavaScript indicator)
148
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/recaptcha.json#L51-L58
149
+ if (body && testPattern(body, 'grecaptcha')) {
150
+ return createResult(true, 'recaptcha')
151
+ }
152
+
153
+ // reCAPTCHA: Check for g-recaptcha container class in body
154
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/recaptcha.json#L66-L73
155
+ if (body && testPattern(body, 'g-recaptcha')) {
156
+ return createResult(true, 'recaptcha')
157
+ }
158
+
159
+ // hCaptcha: Check for hcaptcha.com domain in URL
160
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/hcaptcha.json#L13-L22
161
+ if (url && testPattern(url, 'hcaptcha\\.com', true)) {
162
+ return createResult(true, 'hcaptcha')
163
+ }
164
+
165
+ // hCaptcha: Check for hcaptcha object in body
166
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/hcaptcha.json#L42-L50
167
+ if (body && testPattern(body, 'hcaptcha')) {
168
+ return createResult(true, 'hcaptcha')
169
+ }
170
+
171
+ // hCaptcha: Check for h-captcha container class in body
172
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/hcaptcha.json#L51-L58
173
+ if (body && testPattern(body, 'h-captcha')) {
174
+ return createResult(true, 'hcaptcha')
175
+ }
176
+
177
+ // FunCaptcha (Arkose Labs): Check for arkoselabs.com or funcaptcha in URL
178
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/funcaptcha.json#L13-L40
179
+ if (
180
+ url &&
181
+ (testPattern(url, 'arkoselabs\\.com', true) ||
182
+ testPattern(url, 'funcaptcha'))
183
+ ) {
184
+ return createResult(true, 'funcaptcha')
185
+ }
186
+
187
+ // FunCaptcha (Arkose Labs): Check for funcaptcha or arkose text in body
188
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/funcaptcha.json#L42-L55
189
+ if (
190
+ body &&
191
+ (testPattern(body, 'funcaptcha') || testPattern(body, 'arkose'))
192
+ ) {
193
+ return createResult(true, 'funcaptcha')
194
+ }
195
+
196
+ // GeeTest: Check for geetest.com domain in URL
197
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/geetest.json#L13-L43
198
+ if (url && testPattern(url, 'geetest\\.com', true)) {
199
+ return createResult(true, 'geetest')
200
+ }
201
+
202
+ // GeeTest: Check for geetest object or text in body
203
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/geetest.json#L45-L52
204
+ if (body && testPattern(body, 'geetest')) {
205
+ return createResult(true, 'geetest')
206
+ }
207
+
208
+ // GeeTest: Check for gt.js script in body
209
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/geetest.json#L53-L60
210
+ if (body && testPattern(body, 'gt.js')) {
211
+ return createResult(true, 'geetest')
212
+ }
213
+
214
+ // Cloudflare Turnstile: Check for challenges.cloudflare.com/turnstile in URL
215
+ // Turnstile is Cloudflare's CAPTCHA alternative with privacy focus
216
+ if (
217
+ url &&
218
+ testPattern(url, 'challenges\\.cloudflare\\.com/turnstile', true)
219
+ ) {
220
+ return createResult(true, 'cloudflare-turnstile')
221
+ }
222
+
223
+ // Cloudflare Turnstile: Check for cf-turnstile class in body (primary indicator)
224
+ if (body && testPattern(body, 'cf-turnstile')) {
225
+ return createResult(true, 'cloudflare-turnstile')
226
+ }
227
+
228
+ // Cloudflare Turnstile: Check for turnstile text in body (secondary indicator)
229
+ if (body && testPattern(body, 'turnstile')) {
230
+ return createResult(true, 'cloudflare-turnstile')
231
+ }
232
+
233
+ // LinkedIn: trkCode=bf cookie ("bot filter") is set when LinkedIn blocks a request
234
+ if (testSetCookie(headers, 'trkCode=bf')) {
235
+ return createResult(true, 'linkedin')
236
+ }
237
+
238
+ // AWS WAF: Check for x-amzn-waf-action or x-amzn-requestid headers
239
+ // These headers are set by AWS WAF when bot control rules are triggered
240
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/aws-waf.json
241
+ if (
242
+ getHeader(headers, 'x-amzn-waf-action') ||
243
+ getHeader(headers, 'x-amzn-requestid')
244
+ ) {
245
+ return createResult(true, 'aws-waf')
246
+ }
247
+
248
+ // AWS WAF: Check for aws-waf text in body (challenge page indicator)
249
+ // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/aws-waf.json#L47-L73
250
+ if (body && testPattern(body, 'aws-waf')) {
251
+ return createResult(true, 'aws-waf')
252
+ }
253
+
254
+ return createResult(false, null)
40
255
  }
41
256
 
42
257
  module.exports.debug = debug
258
+ module.exports.testPattern = testPattern
259
+ module.exports.testSetCookie = testSetCookie