is-antibot 1.4.1 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -3
- package/package.json +2 -1
- package/src/index.js +26 -7
package/README.md
CHANGED
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
- **Ocule** - Bot detection with advanced obfuscation
|
|
31
31
|
- **YouTube** - BotGuard attestation and abuse detection
|
|
32
32
|
- **LinkedIn** - Bot filter protection
|
|
33
|
+
- **Reddit** - Network security challenge-page detection
|
|
33
34
|
|
|
34
35
|
### CAPTCHA Providers
|
|
35
36
|
|
|
@@ -45,7 +46,7 @@
|
|
|
45
46
|
|
|
46
47
|
## Why
|
|
47
48
|
|
|
48
|
-
Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
|
|
49
|
+
Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Reddit, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
|
|
49
50
|
|
|
50
51
|
When you try to fetch the HTML of these sites without the right tools, you often hit a 403 Forbidden, 429 Too Many Requests, or a "Please prove you're human" challenge, leaving you with a response that contains no useful data.
|
|
51
52
|
|
|
@@ -59,7 +60,7 @@ $ npm install is-antibot --save
|
|
|
59
60
|
|
|
60
61
|
## Usage
|
|
61
62
|
|
|
62
|
-
Just pass `headers`, `html`, and `
|
|
63
|
+
Just pass `headers`, `html`, `url`, and `statusCode` from any HTTP response:
|
|
63
64
|
|
|
64
65
|
```js
|
|
65
66
|
const isAntibot = require('is-antibot')
|
|
@@ -69,6 +70,7 @@ const html = await response.text()
|
|
|
69
70
|
|
|
70
71
|
const { detected, provider, detection } = isAntibot({
|
|
71
72
|
headers: response.headers,
|
|
73
|
+
statusCode: response.status,
|
|
72
74
|
html,
|
|
73
75
|
url: response.url
|
|
74
76
|
})
|
|
@@ -95,7 +97,7 @@ The library returns an object with the following properties:
|
|
|
95
97
|
|
|
96
98
|
- `detected` (boolean): Whether an antibot challenge was detected
|
|
97
99
|
- `provider` (string|null): The name of the detected provider (e.g., 'cloudflare', 'recaptcha')
|
|
98
|
-
- `detection` (string|null): Where the signal came from: `'headers'`, `'cookies'`, `'html'`, or `'
|
|
100
|
+
- `detection` (string|null): Where the signal came from: `'headers'`, `'cookies'`, `'html'`, `'url'`, or `'statusCode'`
|
|
99
101
|
|
|
100
102
|
## License
|
|
101
103
|
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "is-antibot",
|
|
3
3
|
"description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
|
|
4
4
|
"homepage": "https://github.com/microlinkhq/is-antibot",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.6.0",
|
|
6
6
|
"exports": {
|
|
7
7
|
".": "./src/index.js"
|
|
8
8
|
},
|
|
@@ -55,6 +55,7 @@
|
|
|
55
55
|
"waf"
|
|
56
56
|
],
|
|
57
57
|
"dependencies": {
|
|
58
|
+
"@metascraper/helpers": "~5.50.0",
|
|
58
59
|
"cookie-es": "~3.1.1",
|
|
59
60
|
"debug-logfmt": "~1.4.7"
|
|
60
61
|
},
|
package/src/index.js
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
3
|
const { splitSetCookieString } = require('cookie-es')
|
|
4
|
+
const { parseUrl } = require('@metascraper/helpers')
|
|
4
5
|
const debug = require('debug-logfmt')('is-antibot')
|
|
5
6
|
|
|
6
7
|
const DETECTION = {
|
|
7
8
|
HEADERS: 'headers',
|
|
8
9
|
COOKIES: 'cookies',
|
|
9
10
|
HTML: 'html',
|
|
10
|
-
URL: 'url'
|
|
11
|
+
URL: 'url',
|
|
12
|
+
STATUS_CODE: 'statusCode'
|
|
11
13
|
}
|
|
12
14
|
|
|
13
15
|
const createGetHeader = headers =>
|
|
@@ -56,7 +58,7 @@ const getHeaderNames = headers =>
|
|
|
56
58
|
? Array.from(headers.keys())
|
|
57
59
|
: Object.keys(headers)
|
|
58
60
|
|
|
59
|
-
const detect = ({ headers = {}, html = '', url = '' } = {}) => {
|
|
61
|
+
const detect = ({ headers = {}, html = '', url = '', statusCode } = {}) => {
|
|
60
62
|
const getHeader = createGetHeader(headers)
|
|
61
63
|
const hasCookie = createHasCookie(headers)
|
|
62
64
|
const htmlHas = createTestPattern(html)
|
|
@@ -80,6 +82,9 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
|
|
|
80
82
|
|
|
81
83
|
const byUrl = provider => createResult(true, provider, DETECTION.URL)
|
|
82
84
|
|
|
85
|
+
const byStatusCode = provider =>
|
|
86
|
+
createResult(true, provider, DETECTION.STATUS_CODE)
|
|
87
|
+
|
|
83
88
|
// CloudFlare: Check for cf-mitigated header with 'challenge' value
|
|
84
89
|
// Official docs: https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
|
|
85
90
|
if (getHeader('cf-mitigated') === 'challenge') {
|
|
@@ -387,9 +392,18 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
|
|
|
387
392
|
return byHtml('aliexpress-captcha')
|
|
388
393
|
}
|
|
389
394
|
|
|
390
|
-
//
|
|
391
|
-
|
|
392
|
-
|
|
395
|
+
// Reddit: blocked requests are served as HTML challenge pages.
|
|
396
|
+
// Strongest signal is the blocked-page copy in HTML.
|
|
397
|
+
if (
|
|
398
|
+
parseUrl(url).domain === 'reddit.com' &&
|
|
399
|
+
hasAnyHtml([/blocked by network security\./i])
|
|
400
|
+
) {
|
|
401
|
+
return byHtml('reddit')
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
// LinkedIn: status 999 is LinkedIn's dedicated bot-detection response
|
|
405
|
+
if (parseUrl(url).domain === 'linkedin.com' && statusCode === 999) {
|
|
406
|
+
return byStatusCode('linkedin')
|
|
393
407
|
}
|
|
394
408
|
|
|
395
409
|
// YouTube: empty title pattern indicates a degraded response requiring BotGuard JS attestation
|
|
@@ -418,8 +432,13 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
|
|
|
418
432
|
}
|
|
419
433
|
|
|
420
434
|
const isAntibot = (input = {}) => {
|
|
421
|
-
const { headers, html, body, url } = input
|
|
422
|
-
return detect({
|
|
435
|
+
const { headers, html, body, url, statusCode, status } = input
|
|
436
|
+
return detect({
|
|
437
|
+
headers,
|
|
438
|
+
html: html || body,
|
|
439
|
+
url,
|
|
440
|
+
statusCode: statusCode ?? status
|
|
441
|
+
})
|
|
423
442
|
}
|
|
424
443
|
|
|
425
444
|
module.exports = isAntibot
|