is-antibot 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE.md CHANGED
File without changes
package/README.md CHANGED
@@ -30,6 +30,7 @@
30
30
  - **Ocule** - Bot detection with advanced obfuscation
31
31
  - **YouTube** - BotGuard attestation and abuse detection
32
32
  - **LinkedIn** - Bot filter protection
33
+ - **Reddit** - Network security challenge-page detection
33
34
 
34
35
  ### CAPTCHA Providers
35
36
 
@@ -45,7 +46,7 @@
45
46
 
46
47
  ## Why
47
48
 
48
- Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
49
+ Websites receiving massive quantities of traffic throughout the day, like LinkedIn, Reddit, Instagram, or YouTube, have sophisticated antibot systems to prevent automated access.
49
50
 
50
51
  When you try to fetch the HTML of these sites without the right tools, you often hit a 403 Forbidden, 429 Too Many Requests, or a "Please prove you're human" challenge, leaving you with a response that contains no useful data.
51
52
 
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "is-antibot",
3
3
  "description": "Identify if a response is an antibot challenge from CloudFlare, Akamai, DataDome, Vercel, PerimeterX, Shape Security, and more, including CAPTCHA providers like reCAPTCHA and hCaptcha.",
4
4
  "homepage": "https://github.com/microlinkhq/is-antibot",
5
- "version": "1.4.0",
5
+ "version": "1.5.0",
6
6
  "exports": {
7
7
  ".": "./src/index.js"
8
8
  },
@@ -55,6 +55,7 @@
55
55
  "waf"
56
56
  ],
57
57
  "dependencies": {
58
+ "@metascraper/helpers": "~5.50.0",
58
59
  "cookie-es": "~3.1.1",
59
60
  "debug-logfmt": "~1.4.7"
60
61
  },
@@ -81,22 +82,6 @@
81
82
  "files": [
82
83
  "src"
83
84
  ],
84
- "scripts": {
85
- "clean": "rm -rf node_modules",
86
- "contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
87
- "coverage": "c8 report --reporter=text-lcov > coverage/lcov.info",
88
- "lint": "standard",
89
- "postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
90
- "pretest": "npm run lint",
91
- "release": "pnpm run release:version && pnpm run release:changelog && pnpm run release:commit && pnpm run release:tag",
92
- "release:changelog": "conventional-changelog -p conventionalcommits -i CHANGELOG.md -s",
93
- "release:commit": "git add package.json CHANGELOG.md && git commit -m \"chore(release): $(node -p \"require('./package.json').version\")\"",
94
- "release:github": "github-generate-release",
95
- "release:tag": "git tag -a v$(node -p \"require('./package.json').version\") -m \"v$(node -p \"require('./package.json').version\")\"",
96
- "release:tags": "git push origin HEAD:master --follow-tags",
97
- "release:version": "standard-version --skip.changelog --skip.commit --skip.tag",
98
- "test": "c8 ava"
99
- },
100
85
  "license": "MIT",
101
86
  "ava": {
102
87
  "files": [
@@ -126,5 +111,21 @@
126
111
  "simple-git-hooks": {
127
112
  "commit-msg": "npx commitlint --edit",
128
113
  "pre-commit": "npx nano-staged"
114
+ },
115
+ "scripts": {
116
+ "clean": "rm -rf node_modules",
117
+ "contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
118
+ "coverage": "c8 report --reporter=text-lcov > coverage/lcov.info",
119
+ "lint": "standard",
120
+ "postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
121
+ "pretest": "npm run lint",
122
+ "release": "pnpm run release:version && pnpm run release:changelog && pnpm run release:commit && pnpm run release:tag",
123
+ "release:changelog": "conventional-changelog -p conventionalcommits -i CHANGELOG.md -s",
124
+ "release:commit": "git add package.json CHANGELOG.md && git commit -m \"chore(release): $(node -p \"require('./package.json').version\")\"",
125
+ "release:github": "github-generate-release",
126
+ "release:tag": "git tag -a v$(node -p \"require('./package.json').version\") -m \"v$(node -p \"require('./package.json').version\")\"",
127
+ "release:tags": "git push origin HEAD:master --follow-tags",
128
+ "release:version": "standard-version --skip.changelog --skip.commit --skip.tag",
129
+ "test": "c8 ava"
129
130
  }
130
- }
131
+ }
package/src/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  'use strict'
2
2
 
3
3
  const { splitSetCookieString } = require('cookie-es')
4
+ const { parseUrl } = require('@metascraper/helpers')
4
5
  const debug = require('debug-logfmt')('is-antibot')
5
6
 
6
7
  const DETECTION = {
@@ -125,9 +126,15 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
125
126
  return byHeaders('datadome')
126
127
  }
127
128
 
128
- // DataDome: Check for x-datadome or x-datadome-cid header presence
129
- // Reference: https://github.com/scrapfly/Antibot-Detector/blob/main/detectors/antibot/detect-datadome.json
130
- if (hasAnyHeader(['x-datadome', 'x-datadome-cid'])) {
129
+ // DataDome: x-datadome header presence.
130
+ // Note: `x-datadome: protected` can appear on successful responses.
131
+ const xDatadome = getHeader('x-datadome')
132
+ if (xDatadome && String(xDatadome).toLowerCase() !== 'protected') {
133
+ return byHeaders('datadome')
134
+ }
135
+
136
+ // DataDome: x-datadome-cid header presence
137
+ if (hasAnyHeader(['x-datadome-cid'])) {
131
138
  return byHeaders('datadome')
132
139
  }
133
140
 
@@ -381,6 +388,15 @@ const detect = ({ headers = {}, html = '', url = '' } = {}) => {
381
388
  return byHtml('aliexpress-captcha')
382
389
  }
383
390
 
391
+ // Reddit: blocked requests are served as HTML challenge pages.
392
+ // Strongest signal is the blocked-page copy in HTML.
393
+ if (
394
+ parseUrl(url).domain === 'reddit.com' &&
395
+ hasAnyHtml([/blocked by network security\./i])
396
+ ) {
397
+ return byHtml('reddit')
398
+ }
399
+
384
400
  // LinkedIn: trkCode=bf cookie ("bot filter") is set when LinkedIn blocks a request
385
401
  if (hasAnyCookie(['trkCode=bf'])) {
386
402
  return byCookies('linkedin')