cloudflare-bot-directory 0.0.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +52 -25
- package/package.json +33 -10
- package/src/index.d.ts +56 -0
- package/.editorconfig +0 -19
- package/.gitattributes +0 -1
- package/.github/dependabot.yml +0 -11
- package/.github/workflows/cron.yml +0 -32
- package/.github/workflows/main.yml +0 -68
- package/.github/workflows/pull_request.yml +0 -36
- package/scripts/fetch-bots.mjs +0 -29
package/README.md
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# CloudFlare Bot Directory
|
|
2
2
|
|
|
3
3
|
<p align="center">
|
|
4
4
|
<br>
|
|
@@ -6,43 +6,70 @@
|
|
|
6
6
|
<br>
|
|
7
7
|
</p>
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+

|
|
10
|
+
[](https://www.npmjs.org/package/cloudflare-bot-directory)
|
|
11
|
+
|
|
12
|
+
> A comprehensive list of **500+ verified bots** and web crawlers from [CloudFlare Radar](https://radar.cloudflare.com/traffic/verified-bots), available as a JSON dataset for bot detection, user agent analysis, and web scraping identification.
|
|
13
|
+
|
|
14
|
+
## Why
|
|
15
|
+
|
|
16
|
+
Identifying legitimate bots from malicious scrapers is essential for web security and analytics. This package provides the official CloudFlare Radar verified bots directory, helping you:
|
|
17
|
+
|
|
18
|
+
- **Detect verified bots** – Identify legitimate crawlers like Googlebot, Bingbot, and more
|
|
19
|
+
- **Filter analytics** – Exclude known bots from your traffic reports
|
|
20
|
+
- **Allow-list crawlers** – Permit verified bots while blocking suspicious traffic
|
|
21
|
+
- **User agent lookup** – Match user agent strings against known bot patterns
|
|
22
|
+
|
|
23
|
+
## Data Structure
|
|
24
|
+
|
|
25
|
+
Each bot entry includes:
|
|
26
|
+
|
|
27
|
+
| Field | Description |
|
|
28
|
+
| ------------------- | ---------------------------------------------- |
|
|
29
|
+
| `slug` | URL-friendly unique identifier |
|
|
30
|
+
| `name` | Human-readable bot name |
|
|
31
|
+
| `kind` | Bot kind classification |
|
|
32
|
+
| `operator` | Company or organization operating the bot |
|
|
33
|
+
| `operatorUrl` | URL to operator's documentation |
|
|
34
|
+
| `category` | Bot category (Search Engine, Monitoring, etc.) |
|
|
35
|
+
| `description` | What the bot does |
|
|
36
|
+
| `followsRobotsTxt` | Whether the bot respects robots.txt |
|
|
37
|
+
| `userAgentPatterns` | User agent pattern(s) for matching |
|
|
38
|
+
| `userAgents` | Known user agent string(s) |
|
|
10
39
|
|
|
11
40
|
## Install
|
|
12
41
|
|
|
13
42
|
```bash
|
|
14
|
-
|
|
43
|
+
npm install cloudflare-bot-directory
|
|
15
44
|
```
|
|
16
45
|
|
|
17
46
|
## Usage
|
|
18
47
|
|
|
19
48
|
```js
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
console.log(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
//
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
//
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
//
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
// 'addsearchbot',
|
|
36
|
-
// 'addthis',
|
|
37
|
-
// 'adidxbot',
|
|
38
|
-
// 'adstxtcrawler',
|
|
39
|
-
// ... 502 more items
|
|
40
|
-
// ]
|
|
49
|
+
const bots = require('cloudflare-bot-directory')
|
|
50
|
+
|
|
51
|
+
// Get all bot slugs
|
|
52
|
+
console.log(bots.map(bot => bot.slug))
|
|
53
|
+
// ['2checkout', '360monitoring', 'googlebot', 'bingbot', ...]
|
|
54
|
+
|
|
55
|
+
// Find a specific bot
|
|
56
|
+
const googlebot = bots.find(bot => bot.slug === 'googlebot')
|
|
57
|
+
|
|
58
|
+
// Filter by category
|
|
59
|
+
const searchBots = bots.filter(bot => bot.category === 'Search Engine Crawler')
|
|
60
|
+
|
|
61
|
+
// Check if a user agent is a known bot
|
|
62
|
+
const isKnownBot = (userAgent) =>
|
|
63
|
+
bots.some(bot => userAgent.includes(bot.name))
|
|
41
64
|
```
|
|
42
65
|
|
|
66
|
+
## Related
|
|
67
|
+
|
|
68
|
+
- [top-user-agents](https://github.com/microlinkhq/top-user-agents) – An always up-to-date list of the top 100 HTTP user-agents most used over the Internet.
|
|
69
|
+
|
|
43
70
|
## License
|
|
44
71
|
|
|
45
72
|
**cloudflare-bot-directory** © [Kiko Beats](https://kikobeats.com), released under the [MIT](https://github.com/kikobeats/cloudflare-bot-directory/blob/master/LICENSE.md) License.<br>
|
|
46
73
|
Authored and maintained by [Kiko Beats](https://kikobeats.com) with help from [contributors](https://github.com/kikobeats/cloudflare-bot-directory/contributors).
|
|
47
74
|
|
|
48
|
-
> [kikobeats.com](https://kikobeats.com) · GitHub [Kiko Beats](https://github.com/kikobeats) ·
|
|
75
|
+
> [kikobeats.com](https://kikobeats.com) · GitHub [Kiko Beats](https://github.com/kikobeats) · X [@kikobeats](https://x.com/kikobeats)
|
package/package.json
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cloudflare-bot-directory",
|
|
3
|
-
"description": "
|
|
3
|
+
"description": "CloudFlare Radar verified bots directory - 500+ web crawlers, search engine bots, and user agents as JSON for bot detection and traffic filtering",
|
|
4
4
|
"homepage": "https://github.com/kikobeats/cloudflare-bot-directory",
|
|
5
|
-
"version": "
|
|
5
|
+
"version": "1.0.0",
|
|
6
|
+
"types": "./src/index.d.ts",
|
|
7
|
+
"main": "./src/index.js",
|
|
6
8
|
"exports": {
|
|
7
9
|
".": "./src/index.js"
|
|
8
10
|
},
|
|
@@ -11,6 +13,7 @@
|
|
|
11
13
|
"name": "Kiko Beats",
|
|
12
14
|
"url": "https://kikobeats.com"
|
|
13
15
|
},
|
|
16
|
+
"contributors": [],
|
|
14
17
|
"repository": {
|
|
15
18
|
"type": "git",
|
|
16
19
|
"url": "git+https://github.com/kikobeats/cloudflare-bot-directory.git"
|
|
@@ -19,10 +22,29 @@
|
|
|
19
22
|
"url": "https://github.com/kikobeats/cloudflare-bot-directory/issues"
|
|
20
23
|
},
|
|
21
24
|
"keywords": [
|
|
25
|
+
"bingbot",
|
|
26
|
+
"bot-detection",
|
|
27
|
+
"bot-filtering",
|
|
28
|
+
"bot-list",
|
|
22
29
|
"bots",
|
|
23
30
|
"cloudflare",
|
|
31
|
+
"cloudflare-radar",
|
|
32
|
+
"crawler",
|
|
33
|
+
"crawlers",
|
|
24
34
|
"directory",
|
|
25
|
-
"
|
|
35
|
+
"googlebot",
|
|
36
|
+
"radar",
|
|
37
|
+
"scraper",
|
|
38
|
+
"scraper-detection",
|
|
39
|
+
"search-engine",
|
|
40
|
+
"security",
|
|
41
|
+
"spam-detection",
|
|
42
|
+
"traffic-filtering",
|
|
43
|
+
"user-agent",
|
|
44
|
+
"user-agents",
|
|
45
|
+
"verified-bots",
|
|
46
|
+
"web-crawler",
|
|
47
|
+
"web-security"
|
|
26
48
|
],
|
|
27
49
|
"devDependencies": {
|
|
28
50
|
"@commitlint/cli": "latest",
|
|
@@ -36,6 +58,7 @@
|
|
|
36
58
|
"git-authors-cli": "latest",
|
|
37
59
|
"github-generate-release": "latest",
|
|
38
60
|
"nano-staged": "latest",
|
|
61
|
+
"p-map": "latest",
|
|
39
62
|
"simple-git-hooks": "latest",
|
|
40
63
|
"standard": "latest",
|
|
41
64
|
"standard-version": "latest"
|
|
@@ -43,6 +66,9 @@
|
|
|
43
66
|
"engines": {
|
|
44
67
|
"node": ">= 20"
|
|
45
68
|
},
|
|
69
|
+
"files": [
|
|
70
|
+
"src"
|
|
71
|
+
],
|
|
46
72
|
"license": "MIT",
|
|
47
73
|
"commitlint": {
|
|
48
74
|
"extends": [
|
|
@@ -67,9 +93,6 @@
|
|
|
67
93
|
"commit-msg": "npx commitlint --edit",
|
|
68
94
|
"pre-commit": "npx nano-staged"
|
|
69
95
|
},
|
|
70
|
-
"dependencies": {
|
|
71
|
-
"p-map": "~7.0.4"
|
|
72
|
-
},
|
|
73
96
|
"scripts": {
|
|
74
97
|
"clean": "rm -rf node_modules",
|
|
75
98
|
"contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
|
|
@@ -78,12 +101,12 @@
|
|
|
78
101
|
"postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
|
|
79
102
|
"pretest": "npm run lint",
|
|
80
103
|
"release": "pnpm run release:version && pnpm run release:changelog && pnpm run release:commit && pnpm run release:tag",
|
|
81
|
-
"release:github": "github-generate-release",
|
|
82
|
-
"release:tags": "git push origin HEAD:master --follow-tags",
|
|
83
|
-
"test": "c8 ava",
|
|
84
104
|
"release:changelog": "conventional-changelog -p conventionalcommits -i CHANGELOG.md -s",
|
|
85
105
|
"release:commit": "git add package.json CHANGELOG.md && git commit -m \"chore(release): $(node -p \"require('./package.json').version\")\"",
|
|
106
|
+
"release:github": "github-generate-release",
|
|
86
107
|
"release:tag": "git tag -a v$(node -p \"require('./package.json').version\") -m \"v$(node -p \"require('./package.json').version\")\"",
|
|
87
|
-
"release:
|
|
108
|
+
"release:tags": "git push origin HEAD:master --follow-tags",
|
|
109
|
+
"release:version": "standard-version --skip.changelog --skip.commit --skip.tag",
|
|
110
|
+
"test": "c8 ava"
|
|
88
111
|
}
|
|
89
112
|
}
|
package/src/index.d.ts
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Bot category types from CloudFlare Radar
|
|
3
|
+
*/
|
|
4
|
+
export type BotCategory =
|
|
5
|
+
| 'ACCESSIBILITY'
|
|
6
|
+
| 'ADVERTISING'
|
|
7
|
+
| 'AI_CRAWLER'
|
|
8
|
+
| 'ARCHIVING'
|
|
9
|
+
| 'FEED_FETCHER'
|
|
10
|
+
| 'MONITORING_AND_ANALYTICS'
|
|
11
|
+
| 'ONLINE_PAYMENTS'
|
|
12
|
+
| 'OTHER'
|
|
13
|
+
| 'PAGE_PREVIEW'
|
|
14
|
+
| 'SEARCH_ENGINE_CRAWLER'
|
|
15
|
+
| 'SECURITY'
|
|
16
|
+
| 'SEO'
|
|
17
|
+
| 'SOCIAL_MEDIA_MARKETING'
|
|
18
|
+
| 'WEBHOOKS'
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Bot kind types
|
|
22
|
+
*/
|
|
23
|
+
export type BotKind = 'BOT'
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* A verified bot entry from CloudFlare Radar Bot Directory
|
|
27
|
+
*/
|
|
28
|
+
export interface Bot {
|
|
29
|
+
/** URL-friendly unique identifier */
|
|
30
|
+
slug: string
|
|
31
|
+
/** Human-readable bot name */
|
|
32
|
+
name: string
|
|
33
|
+
/** Bot kind classification */
|
|
34
|
+
kind: BotKind
|
|
35
|
+
/** Company or organization operating the bot */
|
|
36
|
+
operator: string
|
|
37
|
+
/** URL to operator's documentation or website */
|
|
38
|
+
operatorUrl: string
|
|
39
|
+
/** Bot category classification */
|
|
40
|
+
category: BotCategory
|
|
41
|
+
/** Description of what the bot does */
|
|
42
|
+
description: string
|
|
43
|
+
/** Whether the bot respects robots.txt */
|
|
44
|
+
followsRobotsTxt: boolean
|
|
45
|
+
/** User agent pattern(s) for matching */
|
|
46
|
+
userAgentPatterns: string[]
|
|
47
|
+
/** Known user agent string(s) */
|
|
48
|
+
userAgents: string[]
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Array of all verified bots from CloudFlare Radar Bot Directory
|
|
53
|
+
*/
|
|
54
|
+
declare const bots: Bot[]
|
|
55
|
+
|
|
56
|
+
export default bots
|
package/.editorconfig
DELETED
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
# https://editorconfig.org
|
|
2
|
-
|
|
3
|
-
root = true
|
|
4
|
-
|
|
5
|
-
[*]
|
|
6
|
-
indent_style = space
|
|
7
|
-
indent_size = 2
|
|
8
|
-
end_of_line = lf
|
|
9
|
-
charset = utf-8
|
|
10
|
-
trim_trailing_whitespace = true
|
|
11
|
-
insert_final_newline = true
|
|
12
|
-
max_line_length = 80
|
|
13
|
-
indent_brace_style = 1TBS
|
|
14
|
-
spaces_around_operators = true
|
|
15
|
-
quote_type = auto
|
|
16
|
-
|
|
17
|
-
[package.json]
|
|
18
|
-
indent_style = space
|
|
19
|
-
indent_size = 2
|
package/.gitattributes
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
* text=auto
|
package/.github/dependabot.yml
DELETED
|
@@ -1,32 +0,0 @@
|
|
|
1
|
-
name: cron
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
workflow_dispatch:
|
|
5
|
-
schedule:
|
|
6
|
-
# Cron job every Monday at 00:00
|
|
7
|
-
# https://crontab.guru/every-monday
|
|
8
|
-
- cron: '0 0 * * MON'
|
|
9
|
-
|
|
10
|
-
jobs:
|
|
11
|
-
update:
|
|
12
|
-
runs-on: ubuntu-latest
|
|
13
|
-
steps:
|
|
14
|
-
- name: Checkout
|
|
15
|
-
uses: actions/checkout@v4
|
|
16
|
-
with:
|
|
17
|
-
token: ${{ secrets.GH_TOKEN }}
|
|
18
|
-
- name: Setup Node.js
|
|
19
|
-
uses: actions/setup-node@v4
|
|
20
|
-
with:
|
|
21
|
-
node-version: lts/*
|
|
22
|
-
- name: Update
|
|
23
|
-
env:
|
|
24
|
-
CLOUDFLARE_EMAIL: ${{ secrets.CLOUDFLARE_EMAIL }}
|
|
25
|
-
CLOUDFLARE_API_KEY: ${{ secrets.CLOUDFLARE_API_KEY }}
|
|
26
|
-
run: |
|
|
27
|
-
git config --global user.email ${{ secrets.GIT_EMAIL }}
|
|
28
|
-
git config --global user.name ${{ secrets.GIT_USERNAME }}
|
|
29
|
-
node scripts/fetch-bots.mjs
|
|
30
|
-
git add src/index.json
|
|
31
|
-
git diff-index --quiet HEAD || git commit -m 'build(update): bots' --no-verify
|
|
32
|
-
git push origin ${{ github.head_ref }} || true
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
name: main
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
branches:
|
|
6
|
-
- master
|
|
7
|
-
|
|
8
|
-
jobs:
|
|
9
|
-
contributors:
|
|
10
|
-
if: "${{ github.event.head_commit.message != 'build: contributors' }}"
|
|
11
|
-
runs-on: ubuntu-latest
|
|
12
|
-
steps:
|
|
13
|
-
- name: Checkout
|
|
14
|
-
uses: actions/checkout@v6
|
|
15
|
-
with:
|
|
16
|
-
fetch-depth: 0
|
|
17
|
-
token: ${{ secrets.GITHUB_TOKEN }}
|
|
18
|
-
- name: Setup Node.js
|
|
19
|
-
uses: actions/setup-node@v6
|
|
20
|
-
with:
|
|
21
|
-
node-version: lts/*
|
|
22
|
-
- name: Contributors
|
|
23
|
-
run: |
|
|
24
|
-
git config --global user.email ${{ secrets.GIT_EMAIL }}
|
|
25
|
-
git config --global user.name ${{ secrets.GIT_USERNAME }}
|
|
26
|
-
npm run contributors
|
|
27
|
-
- name: Push changes
|
|
28
|
-
run: |
|
|
29
|
-
git push origin ${{ github.head_ref }}
|
|
30
|
-
|
|
31
|
-
release:
|
|
32
|
-
if: |
|
|
33
|
-
!startsWith(github.event.head_commit.message, 'chore(release):') &&
|
|
34
|
-
!startsWith(github.event.head_commit.message, 'docs:') &&
|
|
35
|
-
!startsWith(github.event.head_commit.message, 'ci:')
|
|
36
|
-
needs: [contributors]
|
|
37
|
-
runs-on: ubuntu-latest
|
|
38
|
-
steps:
|
|
39
|
-
- name: Checkout
|
|
40
|
-
uses: actions/checkout@v6
|
|
41
|
-
with:
|
|
42
|
-
token: ${{ secrets.GITHUB_TOKEN }}
|
|
43
|
-
- name: Setup Node.js
|
|
44
|
-
uses: actions/setup-node@v6
|
|
45
|
-
with:
|
|
46
|
-
node-version: lts/*
|
|
47
|
-
- name: Setup PNPM
|
|
48
|
-
uses: pnpm/action-setup@v4
|
|
49
|
-
with:
|
|
50
|
-
version: latest
|
|
51
|
-
run_install: true
|
|
52
|
-
- name: Test
|
|
53
|
-
run: pnpm test
|
|
54
|
-
- name: Report
|
|
55
|
-
run: npx c8 report --reporter=text-lcov > coverage/lcov.info
|
|
56
|
-
- name: Coverage
|
|
57
|
-
uses: coverallsapp/github-action@main
|
|
58
|
-
with:
|
|
59
|
-
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
60
|
-
- name: Release
|
|
61
|
-
env:
|
|
62
|
-
GH_TOKEN: ${{ secrets.GH_TOKEN }}
|
|
63
|
-
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
|
|
64
|
-
run: |
|
|
65
|
-
git config --global user.email ${{ secrets.GIT_EMAIL }}
|
|
66
|
-
git config --global user.name ${{ secrets.GIT_USERNAME }}
|
|
67
|
-
git pull origin master
|
|
68
|
-
pnpm run release
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
name: pull_request
|
|
2
|
-
|
|
3
|
-
on:
|
|
4
|
-
push:
|
|
5
|
-
branches:
|
|
6
|
-
- master
|
|
7
|
-
pull_request:
|
|
8
|
-
branches:
|
|
9
|
-
- master
|
|
10
|
-
|
|
11
|
-
jobs:
|
|
12
|
-
test:
|
|
13
|
-
if: github.ref != 'refs/heads/master'
|
|
14
|
-
runs-on: ubuntu-latest
|
|
15
|
-
steps:
|
|
16
|
-
- name: Checkout
|
|
17
|
-
uses: actions/checkout@v6
|
|
18
|
-
with:
|
|
19
|
-
token: ${{ secrets.GITHUB_TOKEN }}
|
|
20
|
-
- name: Setup Node.js
|
|
21
|
-
uses: actions/setup-node@v6
|
|
22
|
-
with:
|
|
23
|
-
node-version: lts/*
|
|
24
|
-
- name: Setup PNPM
|
|
25
|
-
uses: pnpm/action-setup@v4
|
|
26
|
-
with:
|
|
27
|
-
version: latest
|
|
28
|
-
run_install: true
|
|
29
|
-
- name: Test
|
|
30
|
-
run: pnpm test
|
|
31
|
-
- name: Report
|
|
32
|
-
run: npx c8 report --reporter=text-lcov > coverage/lcov.info
|
|
33
|
-
- name: Coverage
|
|
34
|
-
uses: coverallsapp/github-action@main
|
|
35
|
-
with:
|
|
36
|
-
github-token: ${{ secrets.GITHUB_TOKEN }}
|
package/scripts/fetch-bots.mjs
DELETED
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import { writeFile } from 'node:fs/promises'
|
|
2
|
-
import pMap from 'p-map'
|
|
3
|
-
|
|
4
|
-
const cloudflare = path =>
|
|
5
|
-
fetch(`https://api.cloudflare.com/client/v4/radar/${path}`, {
|
|
6
|
-
headers: {
|
|
7
|
-
'x-auth-email': process.env.CLOUDFLARE_EMAIL,
|
|
8
|
-
'x-auth-key': process.env.CLOUDFLARE_API_KEY
|
|
9
|
-
}
|
|
10
|
-
}).then(res => res.json())
|
|
11
|
-
|
|
12
|
-
const allBots = (await cloudflare('/bots?limit=999')).result.bots
|
|
13
|
-
|
|
14
|
-
let completed = 0
|
|
15
|
-
|
|
16
|
-
const percentage = completed =>
|
|
17
|
-
`${Math.round((completed / allBots.length) * 100)}%`
|
|
18
|
-
|
|
19
|
-
const bots = await pMap(
|
|
20
|
-
allBots,
|
|
21
|
-
async bot => {
|
|
22
|
-
const details = await cloudflare(`/bots/${bot.slug}`)
|
|
23
|
-
process.stdout.write(`…${percentage(++completed)}`)
|
|
24
|
-
return details.result.bot
|
|
25
|
-
},
|
|
26
|
-
{ concurrency: 2 }
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
await writeFile('src/index.json', JSON.stringify(bots, null, 2))
|