bot_verification 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +20 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +256 -0
- data/LICENSE +21 -0
- data/README.md +355 -0
- data/Rakefile +8 -0
- data/bot_verification.gemspec +37 -0
- data/lib/bot_verification/bot_patterns.rb +121 -0
- data/lib/bot_verification/configuration.rb +139 -0
- data/lib/bot_verification/controller_concern.rb +150 -0
- data/lib/bot_verification/ip_range_fetcher.rb +155 -0
- data/lib/bot_verification/ip_range_model.rb +132 -0
- data/lib/bot_verification/railtie.rb +22 -0
- data/lib/bot_verification/refresh_job.rb +36 -0
- data/lib/bot_verification/service.rb +232 -0
- data/lib/bot_verification/version.rb +5 -0
- data/lib/bot_verification.rb +74 -0
- data/lib/generators/bot_verification/install_generator.rb +92 -0
- data/lib/generators/bot_verification/templates/initializer.rb.erb +58 -0
- data/lib/generators/bot_verification/templates/migration.rb.erb +18 -0
- data/lib/generators/bot_verification/templates/model.rb.erb +13 -0
- data/lib/generators/bot_verification/templates/refresh_job.rb.erb +21 -0
- data/lib/tasks/bot_verification.rake +95 -0
- metadata +127 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: b20a18dec9de1772a0c48135b190d1018b823a418ae5d1e114210dfe53fc6e5b
|
|
4
|
+
data.tar.gz: 4178233855e6c63a40b4a3bca25215b000888068805855f85a20d159681f7032
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 5540546cf2c83a714426c1fce497d7e76f753bfc6640d6604311afacea228dba21689beb1422bbf15d900f943fc79a72375b5d2ae0c161b149744ebd4a5b629d
|
|
7
|
+
data.tar.gz: 3089f729478e5d40ffafa31582670f6d83e34bcd11df3d1ed969d06cb8a83f87208078019cb622a5d86c849a817fe354cac2acc3984a60d1070a0c1f084eb122
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2025-01-19
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- Initial release
|
|
12
|
+
- IP range verification for search engine bots (Google, Bing)
|
|
13
|
+
- IP range verification for AI bots (OpenAI GPTBot, ChatGPT-User, OAI-SearchBot, PerplexityBot, Amazonbot)
|
|
14
|
+
- Reverse DNS verification fallback for search engines (Google, Bing, Apple, Yandex, Baidu)
|
|
15
|
+
- User agent pattern detection for search, AI, and social bots
|
|
16
|
+
- Rails controller concern with session-based caching
|
|
17
|
+
- Configurable caching (Rails cache and session)
|
|
18
|
+
- Background job for IP range refresh
|
|
19
|
+
- Rake tasks for management and debugging
|
|
20
|
+
- Rails generator for installation
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
bot_verification (0.1.0)
|
|
5
|
+
rails (>= 7.0)
|
|
6
|
+
resolv (>= 0.2)
|
|
7
|
+
|
|
8
|
+
GEM
|
|
9
|
+
remote: https://rubygems.org/
|
|
10
|
+
specs:
|
|
11
|
+
action_text-trix (2.1.16)
|
|
12
|
+
railties
|
|
13
|
+
actioncable (8.1.2)
|
|
14
|
+
actionpack (= 8.1.2)
|
|
15
|
+
activesupport (= 8.1.2)
|
|
16
|
+
nio4r (~> 2.0)
|
|
17
|
+
websocket-driver (>= 0.6.1)
|
|
18
|
+
zeitwerk (~> 2.6)
|
|
19
|
+
actionmailbox (8.1.2)
|
|
20
|
+
actionpack (= 8.1.2)
|
|
21
|
+
activejob (= 8.1.2)
|
|
22
|
+
activerecord (= 8.1.2)
|
|
23
|
+
activestorage (= 8.1.2)
|
|
24
|
+
activesupport (= 8.1.2)
|
|
25
|
+
mail (>= 2.8.0)
|
|
26
|
+
actionmailer (8.1.2)
|
|
27
|
+
actionpack (= 8.1.2)
|
|
28
|
+
actionview (= 8.1.2)
|
|
29
|
+
activejob (= 8.1.2)
|
|
30
|
+
activesupport (= 8.1.2)
|
|
31
|
+
mail (>= 2.8.0)
|
|
32
|
+
rails-dom-testing (~> 2.2)
|
|
33
|
+
actionpack (8.1.2)
|
|
34
|
+
actionview (= 8.1.2)
|
|
35
|
+
activesupport (= 8.1.2)
|
|
36
|
+
nokogiri (>= 1.8.5)
|
|
37
|
+
rack (>= 2.2.4)
|
|
38
|
+
rack-session (>= 1.0.1)
|
|
39
|
+
rack-test (>= 0.6.3)
|
|
40
|
+
rails-dom-testing (~> 2.2)
|
|
41
|
+
rails-html-sanitizer (~> 1.6)
|
|
42
|
+
useragent (~> 0.16)
|
|
43
|
+
actiontext (8.1.2)
|
|
44
|
+
action_text-trix (~> 2.1.15)
|
|
45
|
+
actionpack (= 8.1.2)
|
|
46
|
+
activerecord (= 8.1.2)
|
|
47
|
+
activestorage (= 8.1.2)
|
|
48
|
+
activesupport (= 8.1.2)
|
|
49
|
+
globalid (>= 0.6.0)
|
|
50
|
+
nokogiri (>= 1.8.5)
|
|
51
|
+
actionview (8.1.2)
|
|
52
|
+
activesupport (= 8.1.2)
|
|
53
|
+
builder (~> 3.1)
|
|
54
|
+
erubi (~> 1.11)
|
|
55
|
+
rails-dom-testing (~> 2.2)
|
|
56
|
+
rails-html-sanitizer (~> 1.6)
|
|
57
|
+
activejob (8.1.2)
|
|
58
|
+
activesupport (= 8.1.2)
|
|
59
|
+
globalid (>= 0.3.6)
|
|
60
|
+
activemodel (8.1.2)
|
|
61
|
+
activesupport (= 8.1.2)
|
|
62
|
+
activerecord (8.1.2)
|
|
63
|
+
activemodel (= 8.1.2)
|
|
64
|
+
activesupport (= 8.1.2)
|
|
65
|
+
timeout (>= 0.4.0)
|
|
66
|
+
activestorage (8.1.2)
|
|
67
|
+
actionpack (= 8.1.2)
|
|
68
|
+
activejob (= 8.1.2)
|
|
69
|
+
activerecord (= 8.1.2)
|
|
70
|
+
activesupport (= 8.1.2)
|
|
71
|
+
marcel (~> 1.0)
|
|
72
|
+
activesupport (8.1.2)
|
|
73
|
+
base64
|
|
74
|
+
bigdecimal
|
|
75
|
+
concurrent-ruby (~> 1.0, >= 1.3.1)
|
|
76
|
+
connection_pool (>= 2.2.5)
|
|
77
|
+
drb
|
|
78
|
+
i18n (>= 1.6, < 2)
|
|
79
|
+
json
|
|
80
|
+
logger (>= 1.4.2)
|
|
81
|
+
minitest (>= 5.1)
|
|
82
|
+
securerandom (>= 0.3)
|
|
83
|
+
tzinfo (~> 2.0, >= 2.0.5)
|
|
84
|
+
uri (>= 0.13.1)
|
|
85
|
+
base64 (0.3.0)
|
|
86
|
+
bigdecimal (4.0.1)
|
|
87
|
+
builder (3.3.0)
|
|
88
|
+
concurrent-ruby (1.3.6)
|
|
89
|
+
connection_pool (3.0.2)
|
|
90
|
+
crass (1.0.6)
|
|
91
|
+
date (3.5.1)
|
|
92
|
+
diff-lcs (1.6.2)
|
|
93
|
+
drb (2.2.3)
|
|
94
|
+
erb (6.0.1)
|
|
95
|
+
erubi (1.13.1)
|
|
96
|
+
globalid (1.3.0)
|
|
97
|
+
activesupport (>= 6.1)
|
|
98
|
+
i18n (1.14.8)
|
|
99
|
+
concurrent-ruby (~> 1.0)
|
|
100
|
+
io-console (0.8.2)
|
|
101
|
+
irb (1.16.0)
|
|
102
|
+
pp (>= 0.6.0)
|
|
103
|
+
rdoc (>= 4.0.0)
|
|
104
|
+
reline (>= 0.4.2)
|
|
105
|
+
json (2.18.0)
|
|
106
|
+
logger (1.7.0)
|
|
107
|
+
loofah (2.25.0)
|
|
108
|
+
crass (~> 1.0.2)
|
|
109
|
+
nokogiri (>= 1.12.0)
|
|
110
|
+
mail (2.9.0)
|
|
111
|
+
logger
|
|
112
|
+
mini_mime (>= 0.1.1)
|
|
113
|
+
net-imap
|
|
114
|
+
net-pop
|
|
115
|
+
net-smtp
|
|
116
|
+
marcel (1.1.0)
|
|
117
|
+
mini_mime (1.1.5)
|
|
118
|
+
minitest (6.0.1)
|
|
119
|
+
prism (~> 1.5)
|
|
120
|
+
net-imap (0.6.2)
|
|
121
|
+
date
|
|
122
|
+
net-protocol
|
|
123
|
+
net-pop (0.1.2)
|
|
124
|
+
net-protocol
|
|
125
|
+
net-protocol (0.2.2)
|
|
126
|
+
timeout
|
|
127
|
+
net-smtp (0.5.1)
|
|
128
|
+
net-protocol
|
|
129
|
+
nio4r (2.7.5)
|
|
130
|
+
nokogiri (1.19.0-aarch64-linux-gnu)
|
|
131
|
+
racc (~> 1.4)
|
|
132
|
+
nokogiri (1.19.0-aarch64-linux-musl)
|
|
133
|
+
racc (~> 1.4)
|
|
134
|
+
nokogiri (1.19.0-arm-linux-gnu)
|
|
135
|
+
racc (~> 1.4)
|
|
136
|
+
nokogiri (1.19.0-arm-linux-musl)
|
|
137
|
+
racc (~> 1.4)
|
|
138
|
+
nokogiri (1.19.0-arm64-darwin)
|
|
139
|
+
racc (~> 1.4)
|
|
140
|
+
nokogiri (1.19.0-x86_64-darwin)
|
|
141
|
+
racc (~> 1.4)
|
|
142
|
+
nokogiri (1.19.0-x86_64-linux-gnu)
|
|
143
|
+
racc (~> 1.4)
|
|
144
|
+
nokogiri (1.19.0-x86_64-linux-musl)
|
|
145
|
+
racc (~> 1.4)
|
|
146
|
+
pp (0.6.3)
|
|
147
|
+
prettyprint
|
|
148
|
+
prettyprint (0.2.0)
|
|
149
|
+
prism (1.8.0)
|
|
150
|
+
psych (5.3.1)
|
|
151
|
+
date
|
|
152
|
+
stringio
|
|
153
|
+
racc (1.8.1)
|
|
154
|
+
rack (3.2.4)
|
|
155
|
+
rack-session (2.1.1)
|
|
156
|
+
base64 (>= 0.1.0)
|
|
157
|
+
rack (>= 3.0.0)
|
|
158
|
+
rack-test (2.2.0)
|
|
159
|
+
rack (>= 1.3)
|
|
160
|
+
rackup (2.3.1)
|
|
161
|
+
rack (>= 3)
|
|
162
|
+
rails (8.1.2)
|
|
163
|
+
actioncable (= 8.1.2)
|
|
164
|
+
actionmailbox (= 8.1.2)
|
|
165
|
+
actionmailer (= 8.1.2)
|
|
166
|
+
actionpack (= 8.1.2)
|
|
167
|
+
actiontext (= 8.1.2)
|
|
168
|
+
actionview (= 8.1.2)
|
|
169
|
+
activejob (= 8.1.2)
|
|
170
|
+
activemodel (= 8.1.2)
|
|
171
|
+
activerecord (= 8.1.2)
|
|
172
|
+
activestorage (= 8.1.2)
|
|
173
|
+
activesupport (= 8.1.2)
|
|
174
|
+
bundler (>= 1.15.0)
|
|
175
|
+
railties (= 8.1.2)
|
|
176
|
+
rails-dom-testing (2.3.0)
|
|
177
|
+
activesupport (>= 5.0.0)
|
|
178
|
+
minitest
|
|
179
|
+
nokogiri (>= 1.6)
|
|
180
|
+
rails-html-sanitizer (1.6.2)
|
|
181
|
+
loofah (~> 2.21)
|
|
182
|
+
nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0)
|
|
183
|
+
railties (8.1.2)
|
|
184
|
+
actionpack (= 8.1.2)
|
|
185
|
+
activesupport (= 8.1.2)
|
|
186
|
+
irb (~> 1.13)
|
|
187
|
+
rackup (>= 1.0.0)
|
|
188
|
+
rake (>= 12.2)
|
|
189
|
+
thor (~> 1.0, >= 1.2.2)
|
|
190
|
+
tsort (>= 0.2)
|
|
191
|
+
zeitwerk (~> 2.6)
|
|
192
|
+
rake (13.3.1)
|
|
193
|
+
rdoc (7.1.0)
|
|
194
|
+
erb
|
|
195
|
+
psych (>= 4.0.0)
|
|
196
|
+
tsort
|
|
197
|
+
reline (0.6.3)
|
|
198
|
+
io-console (~> 0.5)
|
|
199
|
+
resolv (0.7.0)
|
|
200
|
+
rspec (3.13.2)
|
|
201
|
+
rspec-core (~> 3.13.0)
|
|
202
|
+
rspec-expectations (~> 3.13.0)
|
|
203
|
+
rspec-mocks (~> 3.13.0)
|
|
204
|
+
rspec-core (3.13.6)
|
|
205
|
+
rspec-support (~> 3.13.0)
|
|
206
|
+
rspec-expectations (3.13.5)
|
|
207
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
208
|
+
rspec-support (~> 3.13.0)
|
|
209
|
+
rspec-mocks (3.13.7)
|
|
210
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
211
|
+
rspec-support (~> 3.13.0)
|
|
212
|
+
rspec-support (3.13.6)
|
|
213
|
+
securerandom (0.4.1)
|
|
214
|
+
sqlite3 (2.9.0-aarch64-linux-gnu)
|
|
215
|
+
sqlite3 (2.9.0-aarch64-linux-musl)
|
|
216
|
+
sqlite3 (2.9.0-arm-linux-gnu)
|
|
217
|
+
sqlite3 (2.9.0-arm-linux-musl)
|
|
218
|
+
sqlite3 (2.9.0-arm64-darwin)
|
|
219
|
+
sqlite3 (2.9.0-x86_64-darwin)
|
|
220
|
+
sqlite3 (2.9.0-x86_64-linux-gnu)
|
|
221
|
+
sqlite3 (2.9.0-x86_64-linux-musl)
|
|
222
|
+
stringio (3.2.0)
|
|
223
|
+
thor (1.5.0)
|
|
224
|
+
timeout (0.6.0)
|
|
225
|
+
tsort (0.2.0)
|
|
226
|
+
tzinfo (2.0.6)
|
|
227
|
+
concurrent-ruby (~> 1.0)
|
|
228
|
+
uri (1.1.1)
|
|
229
|
+
useragent (0.16.11)
|
|
230
|
+
websocket-driver (0.8.0)
|
|
231
|
+
base64
|
|
232
|
+
websocket-extensions (>= 0.1.0)
|
|
233
|
+
websocket-extensions (0.1.5)
|
|
234
|
+
zeitwerk (2.7.4)
|
|
235
|
+
|
|
236
|
+
PLATFORMS
|
|
237
|
+
aarch64-linux
|
|
238
|
+
aarch64-linux-gnu
|
|
239
|
+
aarch64-linux-musl
|
|
240
|
+
arm-linux
|
|
241
|
+
arm-linux-gnu
|
|
242
|
+
arm-linux-musl
|
|
243
|
+
arm64-darwin
|
|
244
|
+
x86_64-darwin
|
|
245
|
+
x86_64-linux
|
|
246
|
+
x86_64-linux-gnu
|
|
247
|
+
x86_64-linux-musl
|
|
248
|
+
|
|
249
|
+
DEPENDENCIES
|
|
250
|
+
bot_verification!
|
|
251
|
+
rake (~> 13.0)
|
|
252
|
+
rspec (~> 3.12)
|
|
253
|
+
sqlite3 (>= 2.1)
|
|
254
|
+
|
|
255
|
+
BUNDLED WITH
|
|
256
|
+
2.5.23
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Web Ventures Ltd - www.webven.nz
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
# BotVerification
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/rb/bot_verification)
|
|
4
|
+
[](https://github.com/webventures/bot_verification/actions/workflows/ci.yml)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
A Rails gem for verifying that requests claiming to be from search engine bots (Google, Bing, etc.) and AI bots (GPTBot, PerplexityBot) are actually from those services.
|
|
8
|
+
|
|
9
|
+
## Why?
|
|
10
|
+
|
|
11
|
+
User agents can be easily spoofed. This gem verifies bot requests using:
|
|
12
|
+
|
|
13
|
+
1. **IP Range Matching** (fast) - Checks against known IP ranges from official sources
|
|
14
|
+
2. **Reverse DNS Verification** (authoritative) - Falls back to DNS verification if IP range check fails
|
|
15
|
+
|
|
16
|
+
## Supported Bots
|
|
17
|
+
|
|
18
|
+
### Search Engines (IP + DNS verification)
|
|
19
|
+
- Google (Googlebot, Google-Extended, etc.)
|
|
20
|
+
- Bing (Bingbot, BingPreview)
|
|
21
|
+
- Apple (Applebot)
|
|
22
|
+
- Yandex (YandexBot)
|
|
23
|
+
- Baidu (Baiduspider)
|
|
24
|
+
|
|
25
|
+
### AI Bots (IP verification only)
|
|
26
|
+
- OpenAI (GPTBot, ChatGPT-User, OAI-SearchBot)
|
|
27
|
+
- Perplexity (PerplexityBot)
|
|
28
|
+
- Amazon (Amazonbot)
|
|
29
|
+
|
|
30
|
+
### Social Bots (user agent only - no verification available)
|
|
31
|
+
- Facebook, Twitter, LinkedIn, Slack, Discord, Telegram
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
Add to your Gemfile:
|
|
36
|
+
|
|
37
|
+
```ruby
|
|
38
|
+
gem "bot_verification"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Run the installer:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
bundle install
|
|
45
|
+
rails generate bot_verification:install
|
|
46
|
+
rails db:migrate
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Fetch initial IP ranges:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
rails bot_verification:refresh
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
## Usage
|
|
56
|
+
|
|
57
|
+
### In Controllers
|
|
58
|
+
|
|
59
|
+
```ruby
|
|
60
|
+
class MyController < ApplicationController
|
|
61
|
+
include BotVerification::ControllerConcern
|
|
62
|
+
|
|
63
|
+
def show
|
|
64
|
+
if verified_good_bot?
|
|
65
|
+
# Request is from a verified search engine bot
|
|
66
|
+
# Serve full content
|
|
67
|
+
elsif verified_good_bot?(mode: :search_and_ai)
|
|
68
|
+
# Also includes verified AI bots (GPTBot, etc.)
|
|
69
|
+
else
|
|
70
|
+
# Regular user or unverified bot
|
|
71
|
+
# Apply rate limiting, require auth, etc.
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Verification Modes
|
|
78
|
+
|
|
79
|
+
```ruby
|
|
80
|
+
# Only verified search engine bots (default, most secure)
|
|
81
|
+
verified_good_bot?(mode: :search_engines)
|
|
82
|
+
|
|
83
|
+
# Search engines + verified AI bots
|
|
84
|
+
verified_good_bot?(mode: :search_and_ai)
|
|
85
|
+
|
|
86
|
+
# All known bot patterns (trusts user agent, least secure)
|
|
87
|
+
verified_good_bot?(mode: :all_known)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Check Specifically for AI Bots
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
if verified_ai_bot?
|
|
94
|
+
# Verified AI bot (GPTBot, PerplexityBot, etc.)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Include unverifiable AI bots like ClaudeBot
|
|
98
|
+
if verified_ai_bot?(strict: false)
|
|
99
|
+
# Any recognized AI bot
|
|
100
|
+
end
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Direct Service Usage
|
|
104
|
+
|
|
105
|
+
```ruby
|
|
106
|
+
# Verify a request
|
|
107
|
+
BotVerification.verify(ip, user_agent)
|
|
108
|
+
BotVerification.verify(ip, user_agent, mode: :search_and_ai)
|
|
109
|
+
|
|
110
|
+
# Check specific IP
|
|
111
|
+
BotVerification.verify_ip("66.249.66.1", :google)
|
|
112
|
+
|
|
113
|
+
# Detect bot type from user agent
|
|
114
|
+
BotVerification.detect_bot("Googlebot/2.1") # => :google
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Performance
|
|
118
|
+
|
|
119
|
+
Verification uses a tiered approach, fastest first:
|
|
120
|
+
|
|
121
|
+
| Tier | Method | Latency | When Used |
|
|
122
|
+
|------|--------|---------|-----------|
|
|
123
|
+
| 1 | Session cache | ~1ms | Same session, same IP+UA |
|
|
124
|
+
| 2 | Rails cache | ~1ms | Previously verified IP |
|
|
125
|
+
| 3 | IP range check | ~5-10ms | First verification for IP |
|
|
126
|
+
| 4 | Reverse DNS | 100-2000ms | IP range miss, only for bots |
|
|
127
|
+
|
|
128
|
+
**Important:** DNS lookups only occur when:
|
|
129
|
+
- User agent matches a known bot pattern, AND
|
|
130
|
+
- IP range check fails
|
|
131
|
+
|
|
132
|
+
Regular users never trigger DNS lookups.
|
|
133
|
+
|
|
134
|
+
## Configuration
|
|
135
|
+
|
|
136
|
+
```ruby
|
|
137
|
+
# config/initializers/bot_verification.rb
|
|
138
|
+
|
|
139
|
+
BotVerification.configure do |config|
|
|
140
|
+
# Table name for storing bot IP ranges
|
|
141
|
+
config.table_name = "bot_ip_ranges"
|
|
142
|
+
|
|
143
|
+
# Skip DNS verification entirely (only use IP range matching)
|
|
144
|
+
# Set to true if DNS lookups are unacceptable for your project.
|
|
145
|
+
# Note: Apple, Yandex, Baidu don't publish IP ranges, so they won't
|
|
146
|
+
# be verifiable when DNS is skipped.
|
|
147
|
+
config.skip_dns_verification = false
|
|
148
|
+
|
|
149
|
+
# Timeout for each DNS lookup (seconds)
|
|
150
|
+
# Only applies when skip_dns_verification is false
|
|
151
|
+
config.dns_timeout = 1.0
|
|
152
|
+
|
|
153
|
+
# Total timeout for all DNS operations (seconds)
|
|
154
|
+
# Only applies when skip_dns_verification is false
|
|
155
|
+
config.dns_total_timeout = 2.0
|
|
156
|
+
|
|
157
|
+
# How long to cache verification results
|
|
158
|
+
config.cache_ttl = 24.hours
|
|
159
|
+
|
|
160
|
+
# How long to cache in session
|
|
161
|
+
config.session_cache_ttl = 1.hour
|
|
162
|
+
|
|
163
|
+
# Custom model class (optional)
|
|
164
|
+
# config.ip_range_model_name = "MyBotIpRange"
|
|
165
|
+
|
|
166
|
+
# Error callback - integrate with error tracking (Airbrake, Sentry, etc.)
|
|
167
|
+
config.on_error = ->(error, context) {
|
|
168
|
+
Airbrake.notify(error, context)
|
|
169
|
+
# or: Sentry.capture_exception(error, extra: context)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
# Refresh complete callback - for monitoring/notifications
|
|
173
|
+
config.on_refresh_complete = ->(results) {
|
|
174
|
+
failures = results.select { |_, r| !r[:success] }
|
|
175
|
+
if failures.any?
|
|
176
|
+
Rails.logger.warn("BotVerification refresh failures: #{failures.keys}")
|
|
177
|
+
end
|
|
178
|
+
}
|
|
179
|
+
end
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Error Handling
|
|
183
|
+
|
|
184
|
+
The gem reports errors through multiple channels:
|
|
185
|
+
|
|
186
|
+
### Logger
|
|
187
|
+
|
|
188
|
+
All operations log to `config.logger` (defaults to `Rails.logger`):
|
|
189
|
+
- `INFO` - Successful operations
|
|
190
|
+
- `WARN` - Non-critical issues (e.g., no ranges fetched)
|
|
191
|
+
- `ERROR` - Failures (HTTP errors, parse errors)
|
|
192
|
+
|
|
193
|
+
### Error Callback
|
|
194
|
+
|
|
195
|
+
For integration with error tracking services:
|
|
196
|
+
|
|
197
|
+
```ruby
|
|
198
|
+
config.on_error = ->(error, context) {
|
|
199
|
+
# error: The exception object
|
|
200
|
+
# context: Hash with :bot_type, :source, :url
|
|
201
|
+
Airbrake.notify(error, context)
|
|
202
|
+
}
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
### Refresh Results
|
|
206
|
+
|
|
207
|
+
`refresh_ip_ranges!` returns a hash with success/failure for each bot type:
|
|
208
|
+
|
|
209
|
+
```ruby
|
|
210
|
+
results = BotVerification.refresh_ip_ranges!
|
|
211
|
+
# => {
|
|
212
|
+
# google: { success: true, count: 142 },
|
|
213
|
+
# bing: { success: true, count: 8 },
|
|
214
|
+
# openai_gptbot: { success: false, error: "HTTP 503: Service Unavailable" }
|
|
215
|
+
# }
|
|
216
|
+
|
|
217
|
+
# Check for failures
|
|
218
|
+
failures = results.select { |_, r| !r[:success] }
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
## Refreshing IP Ranges
|
|
222
|
+
|
|
223
|
+
IP ranges should be refreshed daily. Choose the method that fits your deployment:
|
|
224
|
+
|
|
225
|
+
### Rake Task (Heroku Scheduler, cron, etc.)
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Refresh all bot types
|
|
229
|
+
rails bot_verification:refresh
|
|
230
|
+
|
|
231
|
+
# Refresh specific bot type
|
|
232
|
+
rails bot_verification:refresh_bot[google]
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
**Heroku Scheduler:** Add `rake bot_verification:refresh` as a daily job.
|
|
236
|
+
|
|
237
|
+
### Background Job (Sidekiq, etc.)
|
|
238
|
+
|
|
239
|
+
The gem includes `BotVerification::RefreshJob` for background processing:
|
|
240
|
+
|
|
241
|
+
```ruby
|
|
242
|
+
# Enqueue to run now
|
|
243
|
+
BotVerification::RefreshJob.perform_later
|
|
244
|
+
|
|
245
|
+
# Refresh specific bot type
|
|
246
|
+
BotVerification::RefreshJob.perform_later("google")
|
|
247
|
+
|
|
248
|
+
# With Sidekiq-Cron (config/sidekiq.yml)
|
|
249
|
+
:schedule:
|
|
250
|
+
refresh_bot_ips:
|
|
251
|
+
cron: '0 4 * * *' # 4am daily
|
|
252
|
+
class: BotVerification::RefreshJob
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### Custom Job
|
|
256
|
+
|
|
257
|
+
Subclass for custom queue, error handling, or notifications:
|
|
258
|
+
|
|
259
|
+
```ruby
|
|
260
|
+
# app/jobs/refresh_bot_ip_ranges_job.rb
|
|
261
|
+
class RefreshBotIpRangesJob < BotVerification::RefreshJob
|
|
262
|
+
queue_as :low
|
|
263
|
+
|
|
264
|
+
def perform(bot_type = nil)
|
|
265
|
+
super
|
|
266
|
+
rescue => e
|
|
267
|
+
Airbrake.notify(e)
|
|
268
|
+
raise
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
### Direct Call
|
|
274
|
+
|
|
275
|
+
```ruby
|
|
276
|
+
# In a script or console
|
|
277
|
+
BotVerification.refresh_ip_ranges!
|
|
278
|
+
BotVerification.refresh_ip_ranges!(:google)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### Cron (via whenever gem)
|
|
282
|
+
|
|
283
|
+
```ruby
|
|
284
|
+
# config/schedule.rb
|
|
285
|
+
every 1.day, at: '4:00 am' do
|
|
286
|
+
rake "bot_verification:refresh"
|
|
287
|
+
end
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Rake Tasks
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
# Refresh all IP ranges
|
|
294
|
+
rails bot_verification:refresh
|
|
295
|
+
|
|
296
|
+
# Refresh specific bot type
|
|
297
|
+
rails bot_verification:refresh_bot[google]
|
|
298
|
+
|
|
299
|
+
# Show statistics
|
|
300
|
+
rails bot_verification:stats
|
|
301
|
+
|
|
302
|
+
# Clear caches
|
|
303
|
+
rails bot_verification:clear_cache
|
|
304
|
+
|
|
305
|
+
# Verify an IP
|
|
306
|
+
rails bot_verification:verify_ip[66.249.66.1,google]
|
|
307
|
+
|
|
308
|
+
# Check if table exists
|
|
309
|
+
rails bot_verification:check_table
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
## Using Your Own Model
|
|
313
|
+
|
|
314
|
+
If you want more control, you can use your own model:
|
|
315
|
+
|
|
316
|
+
```ruby
|
|
317
|
+
# app/models/my_bot_ip_range.rb
|
|
318
|
+
class MyBotIpRange < ApplicationRecord
|
|
319
|
+
include BotVerification::IpRangeModel
|
|
320
|
+
|
|
321
|
+
self.table_name = "my_bot_ip_ranges"
|
|
322
|
+
|
|
323
|
+
# Add custom methods...
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
# config/initializers/bot_verification.rb
|
|
327
|
+
BotVerification.configure do |config|
|
|
328
|
+
config.ip_range_model_name = "MyBotIpRange"
|
|
329
|
+
end
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
## References
|
|
333
|
+
|
|
334
|
+
- [Google Bot Verification](https://developers.google.com/search/docs/crawling-indexing/verifying-googlebot)
|
|
335
|
+
- [Bing Bot Verification](https://www.bing.com/webmasters/help/how-to-verify-bingbot-3905dc26)
|
|
336
|
+
- [OpenAI Bot Documentation](https://platform.openai.com/docs/bots/)
|
|
337
|
+
- [Perplexity Crawlers](https://docs.perplexity.ai/guides/bots)
|
|
338
|
+
|
|
339
|
+
## Changelog
|
|
340
|
+
|
|
341
|
+
For a detailed list of changes for each version of this project, please see the [CHANGELOG](CHANGELOG.md).
|
|
342
|
+
|
|
343
|
+
## Development
|
|
344
|
+
|
|
345
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
346
|
+
|
|
347
|
+
To install this gem onto your local machine, run `bundle exec rake install`.
|
|
348
|
+
|
|
349
|
+
## Contributing
|
|
350
|
+
|
|
351
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/webventures/bot_verification.
|
|
352
|
+
|
|
353
|
+
## License
|
|
354
|
+
|
|
355
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE).
|