is-antibot 1.3.6 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/package.json +1 -1
- package/src/index.js +152 -124
package/README.md
CHANGED
|
@@ -67,14 +67,14 @@ const isAntibot = require('is-antibot')
|
|
|
67
67
|
const response = await fetch('https://www.linkedin.com/in/kikobeats/')
|
|
68
68
|
const html = await response.text()
|
|
69
69
|
|
|
70
|
-
const { detected, provider } = isAntibot({
|
|
70
|
+
const { detected, provider, detection } = isAntibot({
|
|
71
71
|
headers: response.headers,
|
|
72
72
|
html,
|
|
73
73
|
url: response.url
|
|
74
74
|
})
|
|
75
75
|
|
|
76
76
|
if (detected) {
|
|
77
|
-
console.log(`Antibot detected: ${provider}`)
|
|
77
|
+
console.log(`Antibot detected: ${provider} via ${detection}`)
|
|
78
78
|
}
|
|
79
79
|
```
|
|
80
80
|
|
|
@@ -84,10 +84,10 @@ It also works with [got](https://github.com/sindresorhus/got) or any library whe
|
|
|
84
84
|
const response = await got('https://www.linkedin.com/in/kikobeats/')
|
|
85
85
|
.catch(error => errorresponse)
|
|
86
86
|
|
|
87
|
-
const { detected, provider } = isAntibot(response)
|
|
87
|
+
const { detected, provider, detection } = isAntibot(response)
|
|
88
88
|
|
|
89
89
|
if (detected) {
|
|
90
|
-
console.log(`Antibot detected: ${provider}`)
|
|
90
|
+
console.log(`Antibot detected: ${provider} via ${detection}`)
|
|
91
91
|
}
|
|
92
92
|
```
|
|
93
93
|
|
|
@@ -95,6 +95,7 @@ The library returns an object with the following properties:
|
|
|
95
95
|
|
|
96
96
|
- `detected` (boolean): Whether an antibot challenge was detected
|
|
97
97
|
- `provider` (string|null): The name of the detected provider (e.g., 'cloudflare', 'recaptcha')
|
|
98
|
+
- `detection` (string|null): Where the signal came from: `'headers'`, `'cookies'`, `'html'`, or `'url'`
|
|
98
99
|
|
|
99
100
|
## License
|
|
100
101
|
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "is-antibot",
|
|
3
3
|
"description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
|
|
4
4
|
"homepage": "https://github.com/microlinkhq/is-antibot",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.4.0",
|
|
6
6
|
"exports": {
|
|
7
7
|
".": "./src/index.js"
|
|
8
8
|
},
|
package/src/index.js
CHANGED
|
@@ -3,6 +3,13 @@
|
|
|
3
3
|
const { splitSetCookieString } = require('cookie-es')
|
|
4
4
|
const debug = require('debug-logfmt')('is-antibot')
|
|
5
5
|
|
|
6
|
+
const DETECTION = {
|
|
7
|
+
HEADERS: 'headers',
|
|
8
|
+
COOKIES: 'cookies',
|
|
9
|
+
HTML: 'html',
|
|
10
|
+
URL: 'url'
|
|
11
|
+
}
|
|
12
|
+
|
|
6
13
|
const createGetHeader = headers =>
|
|
7
14
|
typeof headers.get === 'function'
|
|
8
15
|
? name => headers.get(name)
|
|
@@ -12,6 +19,14 @@ const createTestPattern = value => {
|
|
|
12
19
|
if (!value) return () => false
|
|
13
20
|
const lowerValue = value.toLowerCase()
|
|
14
21
|
return (pattern, isRegex = false) => {
|
|
22
|
+
if (pattern instanceof RegExp) {
|
|
23
|
+
try {
|
|
24
|
+
return pattern.test(value)
|
|
25
|
+
} catch {
|
|
26
|
+
return false
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
15
30
|
if (isRegex) {
|
|
16
31
|
try {
|
|
17
32
|
return new RegExp(pattern, 'i').test(value)
|
|
@@ -23,9 +38,9 @@ const createTestPattern = value => {
|
|
|
23
38
|
}
|
|
24
39
|
}
|
|
25
40
|
|
|
26
|
-
const createResult = (detected, provider) => {
|
|
27
|
-
debug({ detected, provider })
|
|
28
|
-
return { detected, provider }
|
|
41
|
+
const createResult = (detected, provider, detection = null) => {
|
|
42
|
+
debug({ detected, provider, detection })
|
|
43
|
+
return { detected, provider, detection }
|
|
29
44
|
}
|
|
30
45
|
|
|
31
46
|
const createHasCookie = headers => {
|
|
@@ -36,351 +51,364 @@ const createHasCookie = headers => {
|
|
|
36
51
|
)
|
|
37
52
|
}
|
|
38
53
|
|
|
54
|
+
const getHeaderNames = headers =>
|
|
55
|
+
typeof headers.keys === 'function'
|
|
56
|
+
? Array.from(headers.keys())
|
|
57
|
+
: Object.keys(headers)
|
|
58
|
+
|
|
39
59
|
const detect = ({ headers = {}, html = '', url = '' } = {}) => {
|
|
40
60
|
const getHeader = createGetHeader(headers)
|
|
41
61
|
const hasCookie = createHasCookie(headers)
|
|
42
62
|
const htmlHas = createTestPattern(html)
|
|
43
63
|
const urlHas = createTestPattern(url)
|
|
44
64
|
|
|
65
|
+
const hasAnyHeader = headerNames =>
|
|
66
|
+
headerNames.some(headerName => getHeader(headerName))
|
|
67
|
+
|
|
68
|
+
const hasAnyCookie = cookieNames =>
|
|
69
|
+
cookieNames.some(cookieName => hasCookie(cookieName))
|
|
70
|
+
|
|
71
|
+
const hasAnyHtml = patterns => patterns.some(pattern => htmlHas(pattern))
|
|
72
|
+
|
|
73
|
+
const hasAnyUrl = patterns => patterns.some(pattern => urlHas(pattern))
|
|
74
|
+
|
|
75
|
+
const byHeaders = provider => createResult(true, provider, DETECTION.HEADERS)
|
|
76
|
+
|
|
77
|
+
const byCookies = provider => createResult(true, provider, DETECTION.COOKIES)
|
|
78
|
+
|
|
79
|
+
const byHtml = provider => createResult(true, provider, DETECTION.HTML)
|
|
80
|
+
|
|
81
|
+
const byUrl = provider => createResult(true, provider, DETECTION.URL)
|
|
82
|
+
|
|
45
83
|
// CloudFlare: Check for cf-mitigated header with 'challenge' value
|
|
46
84
|
// Official docs: https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
|
|
47
85
|
if (getHeader('cf-mitigated') === 'challenge') {
|
|
48
|
-
return
|
|
86
|
+
return byHeaders('cloudflare')
|
|
49
87
|
}
|
|
50
88
|
|
|
51
89
|
// Cloudflare: cf_clearance cookie indicates Cloudflare challenge flow
|
|
52
|
-
if (
|
|
53
|
-
return
|
|
90
|
+
if (hasAnyCookie(['cf_clearance='])) {
|
|
91
|
+
return byCookies('cloudflare')
|
|
54
92
|
}
|
|
55
93
|
|
|
56
94
|
// Vercel: Check for x-vercel-mitigated header with 'challenge' value
|
|
57
95
|
// Solver reference: https://github.com/glizzykingdreko/Vercel-Attack-Mode-Solver
|
|
58
96
|
if (getHeader('x-vercel-mitigated') === 'challenge') {
|
|
59
|
-
return
|
|
97
|
+
return byHeaders('vercel')
|
|
60
98
|
}
|
|
61
99
|
|
|
62
100
|
// Akamai: Check for akamai-cache-status header starting with 'Error'
|
|
63
101
|
// Official docs: https://techdocs.akamai.com/property-mgr/docs/return-cache-status
|
|
64
102
|
if (getHeader('akamai-cache-status')?.startsWith('Error')) {
|
|
65
|
-
return
|
|
103
|
+
return byHeaders('akamai')
|
|
66
104
|
}
|
|
67
105
|
|
|
68
106
|
// Akamai: Check for additional identifying headers (akamai-grn, x-akamai-session-info)
|
|
69
107
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-akamai.json
|
|
70
|
-
if (
|
|
71
|
-
return
|
|
108
|
+
if (hasAnyHeader(['akamai-grn', 'x-akamai-session-info'])) {
|
|
109
|
+
return byHeaders('akamai')
|
|
72
110
|
}
|
|
73
111
|
|
|
74
112
|
// Akamai: _abck bot manager tracking cookie
|
|
75
|
-
if (
|
|
76
|
-
return
|
|
113
|
+
if (hasAnyCookie(['_abck='])) {
|
|
114
|
+
return byCookies('akamai')
|
|
77
115
|
}
|
|
78
116
|
|
|
79
117
|
// Akamai: Bot Manager API namespace (bmak) in html
|
|
80
|
-
if (
|
|
81
|
-
return
|
|
118
|
+
if (hasAnyHtml(['bmak.'])) {
|
|
119
|
+
return byHtml('akamai')
|
|
82
120
|
}
|
|
83
121
|
|
|
84
122
|
// DataDome: Check for x-dd-b header with values '1' (soft challenge) or '2' (hard challenge/CAPTCHA)
|
|
85
123
|
// Official docs: https://docs.datadome.co/reference/validate-request
|
|
86
124
|
if (['1', '2'].includes(getHeader('x-dd-b'))) {
|
|
87
|
-
return
|
|
125
|
+
return byHeaders('datadome')
|
|
88
126
|
}
|
|
89
127
|
|
|
90
128
|
// DataDome: Check for x-datadome or x-datadome-cid header presence
|
|
91
129
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-datadome.json
|
|
92
|
-
if (
|
|
93
|
-
return
|
|
130
|
+
if (hasAnyHeader(['x-datadome', 'x-datadome-cid'])) {
|
|
131
|
+
return byHeaders('datadome')
|
|
94
132
|
}
|
|
95
133
|
|
|
96
134
|
// DataDome: datadome tracking cookie
|
|
97
|
-
if (
|
|
98
|
-
return
|
|
135
|
+
if (hasAnyCookie(['datadome='])) {
|
|
136
|
+
return byCookies('datadome')
|
|
99
137
|
}
|
|
100
138
|
|
|
101
139
|
// PerimeterX: Check for X-PX-Authorization header (primary indicator)
|
|
102
140
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-perimeterx.json
|
|
103
141
|
if (getHeader('x-px-authorization')) {
|
|
104
|
-
return
|
|
142
|
+
return byHeaders('perimeterx')
|
|
105
143
|
}
|
|
106
144
|
|
|
107
145
|
// PerimeterX: Check for window._pxAppId, pxInit, or _pxAction in html
|
|
108
146
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-perimeterx.json
|
|
109
|
-
if (
|
|
110
|
-
return
|
|
147
|
+
if (hasAnyHtml(['window._pxAppId', 'pxInit', '_pxAction'])) {
|
|
148
|
+
return byHtml('perimeterx')
|
|
111
149
|
}
|
|
112
150
|
|
|
113
151
|
// PerimeterX: _px3 or _pxhd cookies
|
|
114
|
-
if (
|
|
115
|
-
return
|
|
152
|
+
if (hasAnyCookie(['_px3=', '_pxhd='])) {
|
|
153
|
+
return byCookies('perimeterx')
|
|
116
154
|
}
|
|
117
155
|
|
|
118
156
|
// Shape Security: Check for dynamic header patterns x-[8chars]-[abcdfz]
|
|
119
157
|
// These headers use 8 random characters followed by suffixes like -a, -b, -c, -d, -f, or -z
|
|
120
158
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-shapesecurity.json
|
|
121
|
-
const headerNames =
|
|
159
|
+
const headerNames = getHeaderNames(headers)
|
|
122
160
|
for (const name of headerNames) {
|
|
123
161
|
if (/^x-[a-z0-9]{8}-[abcdfz]$/i.test(name)) {
|
|
124
|
-
return
|
|
162
|
+
return byHeaders('shapesecurity')
|
|
125
163
|
}
|
|
126
164
|
}
|
|
127
165
|
|
|
128
166
|
// Shape Security: Check for 'shapesecurity' text in response html
|
|
129
167
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-shapesecurity.json
|
|
130
|
-
if (
|
|
131
|
-
return
|
|
168
|
+
if (hasAnyHtml(['shapesecurity'])) {
|
|
169
|
+
return byHtml('shapesecurity')
|
|
132
170
|
}
|
|
133
171
|
|
|
134
172
|
// Kasada: Check for x-kasada or x-kasada-challenge headers
|
|
135
173
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-kasada.json
|
|
136
|
-
if (
|
|
137
|
-
return
|
|
174
|
+
if (hasAnyHeader(['x-kasada', 'x-kasada-challenge'])) {
|
|
175
|
+
return byHeaders('kasada')
|
|
138
176
|
}
|
|
139
177
|
|
|
140
178
|
// Kasada: Check for __kasada global object or kasada.js script in html
|
|
141
179
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-kasada.json
|
|
142
|
-
if (
|
|
143
|
-
return
|
|
180
|
+
if (hasAnyHtml(['__kasada', 'kasada.js'])) {
|
|
181
|
+
return byHtml('kasada')
|
|
144
182
|
}
|
|
145
183
|
|
|
146
184
|
// Imperva/Incapsula: Check for x-cdn header with 'Incapsula' value or x-iinfo header
|
|
147
185
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-incapsula.json
|
|
148
|
-
if (getHeader('x-cdn') === 'Incapsula' ||
|
|
149
|
-
return
|
|
186
|
+
if (getHeader('x-cdn') === 'Incapsula' || hasAnyHeader(['x-iinfo'])) {
|
|
187
|
+
return byHeaders('imperva')
|
|
150
188
|
}
|
|
151
189
|
|
|
152
190
|
// Imperva/Incapsula: Check for 'incapsula' or 'imperva' text in response html
|
|
153
191
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-incapsula.json
|
|
154
|
-
if (
|
|
155
|
-
return
|
|
192
|
+
if (hasAnyHtml(['incapsula', 'imperva'])) {
|
|
193
|
+
return byHtml('imperva')
|
|
156
194
|
}
|
|
157
195
|
|
|
158
196
|
// Imperva/Incapsula: incap_ses_, visid_incap_, or reese84 cookies
|
|
159
197
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-incapsula.json
|
|
160
|
-
if (
|
|
161
|
-
|
|
162
|
-
hasCookie('visid_incap_') ||
|
|
163
|
-
hasCookie('reese84=')
|
|
164
|
-
) {
|
|
165
|
-
return createResult(true, 'imperva')
|
|
198
|
+
if (hasAnyCookie(['incap_ses_', 'visid_incap_', 'reese84='])) {
|
|
199
|
+
return byCookies('imperva')
|
|
166
200
|
}
|
|
167
201
|
|
|
168
202
|
// Reblaze: rbzid or rbzsessionid cookies
|
|
169
203
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-reblaze.json
|
|
170
|
-
if (
|
|
171
|
-
return
|
|
204
|
+
if (hasAnyCookie(['rbzid=', 'rbzsessionid='])) {
|
|
205
|
+
return byCookies('reblaze')
|
|
172
206
|
}
|
|
173
207
|
|
|
174
208
|
// Reblaze: Check for 'reblaze' text in response html
|
|
175
|
-
if (
|
|
176
|
-
return
|
|
209
|
+
if (hasAnyHtml(['reblaze'])) {
|
|
210
|
+
return byHtml('reblaze')
|
|
177
211
|
}
|
|
178
212
|
|
|
179
213
|
// Cheq: Check for CheqSdk or cheqzone.com in html
|
|
180
214
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-cheq.json
|
|
181
|
-
if (
|
|
182
|
-
return
|
|
215
|
+
if (hasAnyHtml(['CheqSdk', 'cheqzone.com'])) {
|
|
216
|
+
return byHtml('cheq')
|
|
183
217
|
}
|
|
184
218
|
|
|
185
219
|
// Cheq: Check for cheqzone.com or cheq.ai in URL
|
|
186
|
-
if (
|
|
187
|
-
return
|
|
220
|
+
if (hasAnyUrl([/cheqzone\.com/i, /cheq\.ai/i])) {
|
|
221
|
+
return byUrl('cheq')
|
|
188
222
|
}
|
|
189
223
|
|
|
190
224
|
// Sucuri: Check for 'sucuri' text in response html
|
|
191
225
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-sucuri.json
|
|
192
|
-
if (
|
|
193
|
-
return
|
|
226
|
+
if (hasAnyHtml(['sucuri'])) {
|
|
227
|
+
return byHtml('sucuri')
|
|
194
228
|
}
|
|
195
229
|
|
|
196
230
|
// ThreatMetrix: Check for 'ThreatMetrix' in html
|
|
197
231
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-threatmetrix.json
|
|
198
|
-
if (
|
|
199
|
-
return
|
|
232
|
+
if (hasAnyHtml(['ThreatMetrix'])) {
|
|
233
|
+
return byHtml('threatmetrix')
|
|
200
234
|
}
|
|
201
235
|
|
|
202
236
|
// ThreatMetrix: Check for fp/check.js fingerprint endpoint in URL
|
|
203
|
-
if (
|
|
204
|
-
return
|
|
237
|
+
if (hasAnyUrl(['fp/check.js'])) {
|
|
238
|
+
return byUrl('threatmetrix')
|
|
205
239
|
}
|
|
206
240
|
|
|
207
241
|
// Meetrics: Check for 'meetrics' text in response html
|
|
208
242
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-meetrics.json
|
|
209
|
-
if (
|
|
210
|
-
return
|
|
243
|
+
if (hasAnyHtml(['meetrics'])) {
|
|
244
|
+
return byHtml('meetrics')
|
|
211
245
|
}
|
|
212
246
|
|
|
213
247
|
// Meetrics: Check for meetrics.com in URL
|
|
214
|
-
if (
|
|
215
|
-
return
|
|
248
|
+
if (hasAnyUrl([/meetrics\.com/i])) {
|
|
249
|
+
return byUrl('meetrics')
|
|
216
250
|
}
|
|
217
251
|
|
|
218
252
|
// Ocule: Check for ocule.co.uk in html
|
|
219
253
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-ocule.json
|
|
220
|
-
if (
|
|
221
|
-
return
|
|
254
|
+
if (hasAnyHtml(['ocule.co.uk'])) {
|
|
255
|
+
return byHtml('ocule')
|
|
222
256
|
}
|
|
223
257
|
|
|
224
258
|
// Ocule: Check for ocule.co.uk in URL
|
|
225
|
-
if (
|
|
226
|
-
return
|
|
259
|
+
if (hasAnyUrl([/ocule\.co\.uk/i])) {
|
|
260
|
+
return byUrl('ocule')
|
|
227
261
|
}
|
|
228
262
|
|
|
229
263
|
// reCAPTCHA: Check for recaptcha/api, google.com/recaptcha, gstatic.com/recaptcha, or recaptcha.net in URL
|
|
230
264
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-recaptcha.json
|
|
231
265
|
if (
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
urlHas('gstatic.com/recaptcha') ||
|
|
235
|
-
urlHas('recaptcha.net')
|
|
266
|
+
hasAnyUrl(['recaptcha/api', 'gstatic.com/recaptcha', 'recaptcha.net']) ||
|
|
267
|
+
hasAnyUrl([/google\.com\/recaptcha/i])
|
|
236
268
|
) {
|
|
237
|
-
return
|
|
269
|
+
return byUrl('recaptcha')
|
|
238
270
|
}
|
|
239
271
|
|
|
240
272
|
// reCAPTCHA: Check for grecaptcha API usage in html (JavaScript indicator)
|
|
241
273
|
// Note: plain "grecaptcha" is too broad (e.g. ".grecaptcha-badge" CSS appears on normal YouTube pages)
|
|
242
274
|
if (
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
htmlHas('\\b__grecaptcha_cfg\\b', true)
|
|
275
|
+
hasAnyHtml([
|
|
276
|
+
/\b(?:window\.)?grecaptcha\s*\.(?:execute|render|ready|getResponse|enterprise)\b/i,
|
|
277
|
+
/\b(?:window\.)?grecaptcha\s*\(/i,
|
|
278
|
+
/\b__grecaptcha_cfg\b/i
|
|
279
|
+
])
|
|
249
280
|
) {
|
|
250
|
-
return
|
|
281
|
+
return byHtml('recaptcha')
|
|
251
282
|
}
|
|
252
283
|
|
|
253
284
|
// reCAPTCHA: Check for g-recaptcha container class in html
|
|
254
285
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-recaptcha.json
|
|
255
|
-
if (
|
|
256
|
-
return
|
|
286
|
+
if (hasAnyHtml(['g-recaptcha'])) {
|
|
287
|
+
return byHtml('recaptcha')
|
|
257
288
|
}
|
|
258
289
|
|
|
259
290
|
// hCaptcha: Check for hcaptcha.com domain in URL
|
|
260
291
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-hcaptcha.json
|
|
261
|
-
if (
|
|
262
|
-
return
|
|
292
|
+
if (hasAnyUrl([/hcaptcha\.com/i])) {
|
|
293
|
+
return byUrl('hcaptcha')
|
|
263
294
|
}
|
|
264
295
|
|
|
265
296
|
// hCaptcha: Check for hcaptcha.com API domain or h-captcha container class in html
|
|
266
297
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-hcaptcha.json
|
|
267
298
|
// Note: bare 'hcaptcha' matches too broadly (could appear in articles discussing hCaptcha)
|
|
268
|
-
if (
|
|
269
|
-
return
|
|
299
|
+
if (hasAnyHtml(['hcaptcha.com', 'h-captcha'])) {
|
|
300
|
+
return byHtml('hcaptcha')
|
|
270
301
|
}
|
|
271
302
|
|
|
272
303
|
// FunCaptcha (Arkose Labs): Check for arkoselabs.com or funcaptcha in URL
|
|
273
304
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-funcaptcha.json
|
|
274
|
-
if (
|
|
275
|
-
return
|
|
305
|
+
if (hasAnyUrl([/arkoselabs\.com/i]) || hasAnyUrl(['funcaptcha'])) {
|
|
306
|
+
return byUrl('funcaptcha')
|
|
276
307
|
}
|
|
277
308
|
|
|
278
309
|
// FunCaptcha (Arkose Labs): Check for arkoselabs.com API domain or funcaptcha in html
|
|
279
310
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-funcaptcha.json
|
|
280
311
|
// Note: bare 'arkose' matches too broadly (e.g. Facebook bundles Arkose SDK for login without blocking content)
|
|
281
|
-
if (
|
|
282
|
-
return
|
|
312
|
+
if (hasAnyHtml(['arkoselabs.com', 'funcaptcha'])) {
|
|
313
|
+
return byHtml('funcaptcha')
|
|
283
314
|
}
|
|
284
315
|
|
|
285
316
|
// GeeTest: Check for geetest.com domain in URL
|
|
286
317
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-geetest.json
|
|
287
|
-
if (
|
|
288
|
-
return
|
|
318
|
+
if (hasAnyUrl([/geetest\.com/i])) {
|
|
319
|
+
return byUrl('geetest')
|
|
289
320
|
}
|
|
290
321
|
|
|
291
322
|
// GeeTest: Check for geetest object or text in html
|
|
292
323
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-geetest.json
|
|
293
324
|
// Note: bare 'gt.js' removed (too generic, any script named gt.js would match)
|
|
294
|
-
if (
|
|
295
|
-
return
|
|
325
|
+
if (hasAnyHtml(['geetest'])) {
|
|
326
|
+
return byHtml('geetest')
|
|
296
327
|
}
|
|
297
328
|
|
|
298
329
|
// Cloudflare Turnstile: Check for challenges.cloudflare.com/turnstile in URL
|
|
299
|
-
if (
|
|
300
|
-
return
|
|
330
|
+
if (hasAnyUrl([/challenges\.cloudflare\.com\/turnstile/i])) {
|
|
331
|
+
return byUrl('cloudflare-turnstile')
|
|
301
332
|
}
|
|
302
333
|
|
|
303
334
|
// Cloudflare Turnstile: Check for cf-turnstile class or turnstile API script in html
|
|
304
335
|
// Note: bare 'turnstile' matches too broadly (common English word)
|
|
305
|
-
if (
|
|
306
|
-
|
|
307
|
-
htmlHas('challenges.cloudflare.com/turnstile')
|
|
308
|
-
) {
|
|
309
|
-
return createResult(true, 'cloudflare-turnstile')
|
|
336
|
+
if (hasAnyHtml(['cf-turnstile', 'challenges.cloudflare.com/turnstile'])) {
|
|
337
|
+
return byHtml('cloudflare-turnstile')
|
|
310
338
|
}
|
|
311
339
|
|
|
312
340
|
// Friendly Captcha: Check for friendlycaptcha.com in URL
|
|
313
341
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-friendlycaptcha.json
|
|
314
|
-
if (
|
|
315
|
-
return
|
|
342
|
+
if (hasAnyUrl([/friendlycaptcha\.com/i])) {
|
|
343
|
+
return byUrl('friendly-captcha')
|
|
316
344
|
}
|
|
317
345
|
|
|
318
346
|
// Friendly Captcha: Check for frc-captcha container or friendlyChallenge object in html
|
|
319
|
-
if (
|
|
320
|
-
return
|
|
347
|
+
if (hasAnyHtml(['frc-captcha', 'friendlyChallenge'])) {
|
|
348
|
+
return byHtml('friendly-captcha')
|
|
321
349
|
}
|
|
322
350
|
|
|
323
351
|
// Captcha.eu: Check for captcha.eu in URL
|
|
324
352
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-captchaeu.json
|
|
325
|
-
if (
|
|
326
|
-
return
|
|
353
|
+
if (hasAnyUrl([/captcha\.eu/i])) {
|
|
354
|
+
return byUrl('captcha-eu')
|
|
327
355
|
}
|
|
328
356
|
|
|
329
357
|
// Captcha.eu: Check for CaptchaEU or captchaeu in html
|
|
330
|
-
if (
|
|
331
|
-
return
|
|
358
|
+
if (hasAnyHtml(['CaptchaEU', 'captchaeu'])) {
|
|
359
|
+
return byHtml('captcha-eu')
|
|
332
360
|
}
|
|
333
361
|
|
|
334
362
|
// QCloud Captcha (Tencent): Check for turing.captcha.qcloud.com in URL
|
|
335
363
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-qcloud.json
|
|
336
|
-
if (
|
|
337
|
-
return
|
|
364
|
+
if (hasAnyUrl([/turing\.captcha\.qcloud\.com/i])) {
|
|
365
|
+
return byUrl('qcloud-captcha')
|
|
338
366
|
}
|
|
339
367
|
|
|
340
368
|
// QCloud Captcha: Check for TencentCaptcha or turing.captcha in html
|
|
341
|
-
if (
|
|
342
|
-
return
|
|
369
|
+
if (hasAnyHtml(['TencentCaptcha', 'turing.captcha'])) {
|
|
370
|
+
return byHtml('qcloud-captcha')
|
|
343
371
|
}
|
|
344
372
|
|
|
345
373
|
// AliExpress CAPTCHA: Check for punish?x5secdata in URL
|
|
346
374
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/captcha/detect-aliexpress.json
|
|
347
|
-
if (
|
|
348
|
-
return
|
|
375
|
+
if (hasAnyUrl([/punish\?x5secdata/i])) {
|
|
376
|
+
return byUrl('aliexpress-captcha')
|
|
349
377
|
}
|
|
350
378
|
|
|
351
379
|
// AliExpress CAPTCHA: Check for x5secdata in html
|
|
352
|
-
if (
|
|
353
|
-
return
|
|
380
|
+
if (hasAnyHtml(['x5secdata'])) {
|
|
381
|
+
return byHtml('aliexpress-captcha')
|
|
354
382
|
}
|
|
355
383
|
|
|
356
384
|
// LinkedIn: trkCode=bf cookie ("bot filter") is set when LinkedIn blocks a request
|
|
357
|
-
if (
|
|
358
|
-
return
|
|
385
|
+
if (hasAnyCookie(['trkCode=bf'])) {
|
|
386
|
+
return byCookies('linkedin')
|
|
359
387
|
}
|
|
360
388
|
|
|
361
389
|
// YouTube: empty title pattern indicates a degraded response requiring BotGuard JS attestation
|
|
362
390
|
// Normal pages have `<title>Video Title - YouTube</title>`, bots get `<title> - YouTube</title>`
|
|
363
|
-
if (
|
|
364
|
-
return
|
|
391
|
+
if (hasAnyHtml([/<title>\s*-\s*YouTube<\/title>/i])) {
|
|
392
|
+
return byHtml('youtube')
|
|
365
393
|
}
|
|
366
394
|
|
|
367
395
|
// AWS WAF: Check for x-amzn-waf-action or x-amzn-requestid headers
|
|
368
396
|
// Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-aws-waf.json
|
|
369
|
-
if (
|
|
370
|
-
return
|
|
397
|
+
if (hasAnyHeader(['x-amzn-waf-action', 'x-amzn-requestid'])) {
|
|
398
|
+
return byHeaders('aws-waf')
|
|
371
399
|
}
|
|
372
400
|
|
|
373
401
|
// AWS WAF: Check for aws-waf or awswaf text in html
|
|
374
|
-
if (
|
|
375
|
-
return
|
|
402
|
+
if (hasAnyHtml(['aws-waf', 'awswaf'])) {
|
|
403
|
+
return byHtml('aws-waf')
|
|
376
404
|
}
|
|
377
405
|
|
|
378
406
|
// AWS WAF: aws-waf-token cookie
|
|
379
|
-
if (
|
|
380
|
-
return
|
|
407
|
+
if (hasAnyCookie(['aws-waf-token='])) {
|
|
408
|
+
return byCookies('aws-waf')
|
|
381
409
|
}
|
|
382
410
|
|
|
383
|
-
return createResult(false, null)
|
|
411
|
+
return createResult(false, null, null)
|
|
384
412
|
}
|
|
385
413
|
|
|
386
414
|
const isAntibot = (input = {}) => {
|