is-antibot 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,6 +30,7 @@
30
30
  - **Ocule** - Bot detection with advanced obfuscation
31
31
  - **YouTube** - BotGuard attestation and abuse detection
32
32
  - **LinkedIn** - Bot filter protection
33
+ - **Reddit** - Network security challenge-page detection
33
34
 
34
35
  ### CAPTCHA Providers
35
36
 
@@ -45,7 +46,7 @@
45
46
 
46
47
  ## Why
47
48
 
48
- Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
49
+ Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Reddit, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
49
50
 
50
51
  When you try to fetch the HTML of these sites without the right tools, you often hit a 403 Forbidden, 429 Too Many Requests, or a "Please prove you're human" challenge, leaving you with a response that contains no useful data.
51
52
 
@@ -59,7 +60,7 @@ $ npm install is-antibot --save
59
60
 
60
61
  ## Usage
61
62
 
62
- Just pass `headers`, `html`, and `url` from any HTTP response:
63
+ Just pass `headers`, `html`, `url`, and `statusCode` from any HTTP response:
63
64
 
64
65
  ```js
65
66
  const isAntibot = require('is-antibot')
@@ -69,6 +70,7 @@ const html = await response.text()
69
70
 
70
71
  const { detected, provider, detection } = isAntibot({
71
72
  headers: response.headers,
73
+ statusCode: response.status,
72
74
  html,
73
75
  url: response.url
74
76
  })
@@ -95,7 +97,7 @@ The library returns an object with the following properties:
95
97
 
96
98
  - `detected` (boolean): Whether an antibot challenge was detected
97
99
  - `provider` (string|null): The name of the detected provider (e.g., 'cloudflare', 'recaptcha')
98
- - `detection` (string|null): Where the signal came from: `'headers'`, `'cookies'`, `'html'`, or `'url'`
100
+ - `detection` (string|null): Where the signal came from: `'headers'`, `'cookies'`, `'html'`, `'url'`, or `'statusCode'`
99
101
 
100
102
  ## License
101
103
 
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "is-antibot",
3
3
  "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
4
4
  "homepage": "https://github.com/microlinkhq/is-antibot",
5
- "version": "1.4.1",
5
+ "version": "1.6.0",
6
6
  "exports": {
7
7
  ".": "./src/index.js"
8
8
  },
@@ -55,6 +55,7 @@
55
55
  "waf"
56
56
  ],
57
57
  "dependencies": {
58
+ "@metascraper/helpers": "~5.50.0",
58
59
  "cookie-es": "~3.1.1",
59
60
  "debug-logfmt": "~1.4.7"
60
61
  },
package/src/index.js CHANGED
@@ -1,13 +1,15 @@
1
1
  'use strict'
2
2
 
3
3
  const { splitSetCookieString } = require('cookie-es')
4
+ const { parseUrl } = require('@metascraper/helpers')
4
5
  const debug = require('debug-logfmt')('is-antibot')
5
6
 
6
7
  const DETECTION = {
7
8
  HEADERS: 'headers',
8
9
  COOKIES: 'cookies',
9
10
  HTML: 'html',
10
- URL: 'url'
11
+ URL: 'url',
12
+ STATUS_CODE: 'statusCode'
11
13
  }
12
14
 
13
15
  const createGetHeader = headers =>
@@ -56,7 +58,7 @@ const getHeaderNames = headers =>
56
58
  ? Array.from(headers.keys())
57
59
  : Object.keys(headers)
58
60
 
59
- const detect = ({ headers = {}, html = '', url = '' } = {}) => {
61
+ const detect = ({ headers = {}, html = '', url = '', statusCode } = {}) => {
60
62
  const getHeader = createGetHeader(headers)
61
63
  const hasCookie = createHasCookie(headers)
62
64
  const htmlHas = createTestPattern(html)
@@ -80,6 +82,9 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
80
82
 
81
83
  const byUrl = provider => createResult(true, provider, DETECTION.URL)
82
84
 
85
+ const byStatusCode = provider =>
86
+ createResult(true, provider, DETECTION.STATUS_CODE)
87
+
83
88
  // CloudFlare: Check for cf-mitigated header with 'challenge' value
84
89
  // Official docs: https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
85
90
  if (getHeader('cf-mitigated') === 'challenge') {
@@ -387,9 +392,18 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
387
392
  return byHtml('aliexpress-captcha')
388
393
  }
389
394
 
390
- // LinkedIn: trkCode=bf cookie ("bot filter") is set when LinkedIn blocks a request
391
- if (hasAnyCookie(['trkCode=bf'])) {
392
- return byCookies('linkedin')
395
+ // Reddit: blocked requests are served as HTML challenge pages.
396
+ // Strongest signal is the blocked-page copy in HTML.
397
+ if (
398
+ parseUrl(url).domain === 'reddit.com' &&
399
+ hasAnyHtml([/blocked by network security\./i])
400
+ ) {
401
+ return byHtml('reddit')
402
+ }
403
+
404
+ // LinkedIn: status 999 is LinkedIn's dedicated bot-detection response
405
+ if (parseUrl(url).domain === 'linkedin.com' && statusCode === 999) {
406
+ return byStatusCode('linkedin')
393
407
  }
394
408
 
395
409
  // YouTube: empty title pattern indicates a degraded response requiring BotGuard JS attestation
@@ -418,8 +432,13 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
418
432
  }
419
433
 
420
434
  const isAntibot = (input = {}) => {
421
- const { headers, html, body, url } = input
422
- return detect({ headers, html: html || body, url })
435
+ const { headers, html, body, url, statusCode, status } = input
436
+ return detect({
437
+ headers,
438
+ html: html || body,
439
+ url,
440
+ statusCode: statusCode ?? status
441
+ })
423
442
  }
424
443
 
425
444
  module.exports = isAntibot