npm - is-antibot - Versions diffs - 1.4.1 → 1.6.0 - Mend

is-antibot 1.4.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -30,6 +30,7 @@
 - **Ocule** - Bot detection with advanced obfuscation
 - **YouTube** - BotGuard attestation and abuse detection
 - **LinkedIn** - Bot filter protection
+- **Reddit** - Network security challenge-page detection
 ### CAPTCHA Providers
@@ -45,7 +46,7 @@
 ## Why
-Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
+Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Reddit, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
 When you try to fetch the HTML of these sites without the right tools, you often hit a 403 Forbidden, 429 Too Many Requests, or a "Please prove you're human" challenge, leaving you with a response that contains no useful data.
@@ -59,7 +60,7 @@ $ npm install is-antibot --save
 ## Usage
-Just pass `headers`, `html`, and `url` from any HTTP response:
+Just pass `headers`, `html`, `url`, and `statusCode` from any HTTP response:
 ```js
 const isAntibot = require('is-antibot')
@@ -69,6 +70,7 @@ const html = await response.text()
 const { detected, provider, detection } = isAntibot({
   headers: response.headers,
+  statusCode: response.status,
   html,
   url: response.url
 })
@@ -95,7 +97,7 @@ The library returns an object with the following properties:
 - `detected` (boolean): Whether an antibot challenge was detected
 - `provider` (string|null): The name of the detected provider (e.g., 'cloudflare', 'recaptcha')
-- `detection` (string|null): Where the signal came from: `'headers'`, `'cookies'`, `'html'`, or `'url'`
+- `detection` (string|null): Where the signal came from: `'headers'`, `'cookies'`, `'html'`, `'url'`, or `'statusCode'`
 ## License

package/package.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "name": "is-antibot",
   "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
   "homepage": "https://github.com/microlinkhq/is-antibot",
-  "version": "1.4.1",
+  "version": "1.6.0",
   "exports": {
     ".": "./src/index.js"
   },
@@ -55,6 +55,7 @@
     "waf"
   ],
   "dependencies": {
+    "@metascraper/helpers": "~5.50.0",
     "cookie-es": "~3.1.1",
     "debug-logfmt": "~1.4.7"
   },

package/src/index.js CHANGED Viewed

@@ -1,13 +1,15 @@
 'use strict'
 const { splitSetCookieString } = require('cookie-es')
+const { parseUrl } = require('@metascraper/helpers')
 const debug = require('debug-logfmt')('is-antibot')
 const DETECTION = {
   HEADERS: 'headers',
   COOKIES: 'cookies',
   HTML: 'html',
-  URL: 'url'
+  URL: 'url',
+  STATUS_CODE: 'statusCode'
 }
 const createGetHeader = headers =>
@@ -56,7 +58,7 @@ const getHeaderNames = headers =>
     ? Array.from(headers.keys())
     : Object.keys(headers)
-const detect = ({ headers = {}, html = '', url = '' } = {}) => {
+const detect = ({ headers = {}, html = '', url = '', statusCode } = {}) => {
   const getHeader = createGetHeader(headers)
   const hasCookie = createHasCookie(headers)
   const htmlHas = createTestPattern(html)
@@ -80,6 +82,9 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
   const byUrl = provider => createResult(true, provider, DETECTION.URL)
+  const byStatusCode = provider =>
+    createResult(true, provider, DETECTION.STATUS_CODE)
   // CloudFlare: Check for cf-mitigated header with 'challenge' value
   // Official docs: https://developers.cloudflare.com/cloudflare-challenges/challenge-types/challenge-pages/detect-response/
   if (getHeader('cf-mitigated') === 'challenge') {
@@ -387,9 +392,18 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
     return byHtml('aliexpress-captcha')
   }
-  // LinkedIn: trkCode=bf cookie ("bot filter") is set when LinkedIn blocks a request
-  if (hasAnyCookie(['trkCode=bf'])) {
-    return byCookies('linkedin')
+  // Reddit: blocked requests are served as HTML challenge pages.
+  // Strongest signal is the blocked-page copy in HTML.
+  if (
+    parseUrl(url).domain === 'reddit.com' &&
+    hasAnyHtml([/blocked by network security\./i])
+  ) {
+    return byHtml('reddit')
+  }
+  // LinkedIn: status 999 is LinkedIn's dedicated bot-detection response
+  if (parseUrl(url).domain === 'linkedin.com' && statusCode === 999) {
+    return byStatusCode('linkedin')
   }
   // YouTube: empty title pattern indicates a degraded response requiring BotGuard JS attestation
@@ -418,8 +432,13 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
 }
 const isAntibot = (input = {}) => {
-  const { headers, html, body, url } = input
-  return detect({ headers, html: html || body, url })
+  const { headers, html, body, url, statusCode, status } = input
+  return detect({
+    headers,
+    html: html || body,
+    url,
+    statusCode: statusCode ?? status
+  })
 }
 module.exports = isAntibot