bot_verification 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +20 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +256 -0
- data/LICENSE +21 -0
- data/README.md +355 -0
- data/Rakefile +8 -0
- data/bot_verification.gemspec +37 -0
- data/lib/bot_verification/bot_patterns.rb +121 -0
- data/lib/bot_verification/configuration.rb +139 -0
- data/lib/bot_verification/controller_concern.rb +150 -0
- data/lib/bot_verification/ip_range_fetcher.rb +155 -0
- data/lib/bot_verification/ip_range_model.rb +132 -0
- data/lib/bot_verification/railtie.rb +22 -0
- data/lib/bot_verification/refresh_job.rb +36 -0
- data/lib/bot_verification/service.rb +232 -0
- data/lib/bot_verification/version.rb +5 -0
- data/lib/bot_verification.rb +74 -0
- data/lib/generators/bot_verification/install_generator.rb +92 -0
- data/lib/generators/bot_verification/templates/initializer.rb.erb +58 -0
- data/lib/generators/bot_verification/templates/migration.rb.erb +18 -0
- data/lib/generators/bot_verification/templates/model.rb.erb +13 -0
- data/lib/generators/bot_verification/templates/refresh_job.rb.erb +21 -0
- data/lib/tasks/bot_verification.rake +95 -0
- metadata +127 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Job to refresh bot IP ranges from official sources
|
|
4
|
+
# Should be scheduled to run daily via cron/Sidekiq-Cron
|
|
5
|
+
#
|
|
6
|
+
# @example Run manually
|
|
7
|
+
# RefreshBotIpRangesJob.perform_now
|
|
8
|
+
#
|
|
9
|
+
# @example Run for specific bot type
|
|
10
|
+
# RefreshBotIpRangesJob.perform_now("google")
|
|
11
|
+
#
|
|
12
|
+
# @example Schedule with Sidekiq-Cron (in sidekiq.yml or initializer)
|
|
13
|
+
# RefreshBotIpRangesJob.perform_later
|
|
14
|
+
#
|
|
15
|
+
class RefreshBotIpRangesJob < ApplicationJob
|
|
16
|
+
queue_as :low
|
|
17
|
+
|
|
18
|
+
def perform(bot_type = nil)
|
|
19
|
+
BotVerification.refresh_ip_ranges!(bot_type)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
namespace :bot_verification do
|
|
4
|
+
desc "Refresh all bot IP ranges from official sources"
|
|
5
|
+
task refresh: :environment do
|
|
6
|
+
puts "Refreshing all bot IP ranges..."
|
|
7
|
+
results = BotVerification.refresh_ip_ranges!
|
|
8
|
+
|
|
9
|
+
results.each do |bot_type, result|
|
|
10
|
+
if result[:success]
|
|
11
|
+
puts " #{bot_type}: #{result[:count]} ranges imported"
|
|
12
|
+
else
|
|
13
|
+
puts " #{bot_type}: FAILED - #{result[:error]}"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
puts "Done!"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
desc "Refresh IP ranges for a specific bot type (e.g., google, bing, openai_gptbot)"
|
|
21
|
+
task :refresh_bot, [ :bot_type ] => :environment do |_t, args|
|
|
22
|
+
bot_type = args[:bot_type]
|
|
23
|
+
unless bot_type
|
|
24
|
+
puts "Usage: rake bot_verification:refresh_bot[google]"
|
|
25
|
+
exit 1
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
puts "Refreshing IP ranges for #{bot_type}..."
|
|
29
|
+
results = BotVerification.refresh_ip_ranges!(bot_type)
|
|
30
|
+
|
|
31
|
+
result = results[bot_type.to_sym]
|
|
32
|
+
if result[:success]
|
|
33
|
+
puts " #{result[:count]} ranges imported"
|
|
34
|
+
else
|
|
35
|
+
puts " FAILED - #{result[:error]}"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
puts "Done!"
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
desc "Show statistics about stored bot IP ranges"
|
|
42
|
+
task stats: :environment do
|
|
43
|
+
puts "Bot IP Range Statistics:"
|
|
44
|
+
puts "-" * 50
|
|
45
|
+
|
|
46
|
+
stats = BotVerification.ip_range_model.stats
|
|
47
|
+
|
|
48
|
+
stats.each do |bot_type, data|
|
|
49
|
+
last_updated = data[:last_updated]&.strftime("%Y-%m-%d %H:%M") || "Never"
|
|
50
|
+
puts " #{bot_type.ljust(20)} #{data[:count].to_s.rjust(5)} ranges (Last: #{last_updated})"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
total = stats.values.sum { |d| d[:count] }
|
|
54
|
+
puts "-" * 50
|
|
55
|
+
puts " #{'Total'.ljust(20)} #{total.to_s.rjust(5)} ranges"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
desc "Clear all bot IP range caches"
|
|
59
|
+
task clear_cache: :environment do
|
|
60
|
+
puts "Clearing all bot IP range caches..."
|
|
61
|
+
BotVerification.ip_range_model.clear_all_caches
|
|
62
|
+
puts "Done!"
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
desc "Verify if an IP belongs to a known bot (e.g., rake bot_verification:verify_ip[66.249.66.1,google])"
|
|
66
|
+
task :verify_ip, [ :ip, :bot_type ] => :environment do |_t, args|
|
|
67
|
+
ip = args[:ip]
|
|
68
|
+
bot_type = args[:bot_type]
|
|
69
|
+
|
|
70
|
+
unless ip && bot_type
|
|
71
|
+
puts "Usage: rake bot_verification:verify_ip[66.249.66.1,google]"
|
|
72
|
+
exit 1
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
puts "Verifying IP #{ip} for bot type #{bot_type}..."
|
|
76
|
+
|
|
77
|
+
# Check IP range first
|
|
78
|
+
in_range = BotVerification.ip_range_model.ip_belongs_to_bot?(ip, bot_type)
|
|
79
|
+
puts " IP in known ranges: #{in_range}"
|
|
80
|
+
|
|
81
|
+
# Try full verification
|
|
82
|
+
verified = BotVerification.verify_ip(ip, bot_type.to_sym)
|
|
83
|
+
puts " Full verification: #{verified}"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
desc "Check if table exists"
|
|
87
|
+
task check_table: :environment do
|
|
88
|
+
if BotVerification.table_exists?
|
|
89
|
+
puts "Table '#{BotVerification.configuration.table_name}' exists."
|
|
90
|
+
else
|
|
91
|
+
puts "Table '#{BotVerification.configuration.table_name}' does NOT exist."
|
|
92
|
+
puts "Run: rails generate bot_verification:install"
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: bot_verification
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Web Ventures Ltd
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-01-19 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rails
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ">="
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '7.0'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - ">="
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '7.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: resolv
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - ">="
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '0.2'
|
|
34
|
+
type: :runtime
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - ">="
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '0.2'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rspec
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '3.12'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '3.12'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: sqlite3
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - ">="
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '2.1'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - ">="
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '2.1'
|
|
69
|
+
description: A Rails engine for verifying that requests claiming to be from search
|
|
70
|
+
engine bots (Google, Bing, etc.) and AI bots (GPTBot, PerplexityBot) are actually
|
|
71
|
+
from those services, using IP range matching and reverse DNS verification.
|
|
72
|
+
email:
|
|
73
|
+
- gems@dev.webven.nz
|
|
74
|
+
executables: []
|
|
75
|
+
extensions: []
|
|
76
|
+
extra_rdoc_files: []
|
|
77
|
+
files:
|
|
78
|
+
- CHANGELOG.md
|
|
79
|
+
- Gemfile
|
|
80
|
+
- Gemfile.lock
|
|
81
|
+
- LICENSE
|
|
82
|
+
- README.md
|
|
83
|
+
- Rakefile
|
|
84
|
+
- bot_verification.gemspec
|
|
85
|
+
- lib/bot_verification.rb
|
|
86
|
+
- lib/bot_verification/bot_patterns.rb
|
|
87
|
+
- lib/bot_verification/configuration.rb
|
|
88
|
+
- lib/bot_verification/controller_concern.rb
|
|
89
|
+
- lib/bot_verification/ip_range_fetcher.rb
|
|
90
|
+
- lib/bot_verification/ip_range_model.rb
|
|
91
|
+
- lib/bot_verification/railtie.rb
|
|
92
|
+
- lib/bot_verification/refresh_job.rb
|
|
93
|
+
- lib/bot_verification/service.rb
|
|
94
|
+
- lib/bot_verification/version.rb
|
|
95
|
+
- lib/generators/bot_verification/install_generator.rb
|
|
96
|
+
- lib/generators/bot_verification/templates/initializer.rb.erb
|
|
97
|
+
- lib/generators/bot_verification/templates/migration.rb.erb
|
|
98
|
+
- lib/generators/bot_verification/templates/model.rb.erb
|
|
99
|
+
- lib/generators/bot_verification/templates/refresh_job.rb.erb
|
|
100
|
+
- lib/tasks/bot_verification.rake
|
|
101
|
+
homepage: https://github.com/webventures/bot_verification
|
|
102
|
+
licenses:
|
|
103
|
+
- MIT
|
|
104
|
+
metadata:
|
|
105
|
+
homepage_uri: https://github.com/webventures/bot_verification
|
|
106
|
+
source_code_uri: https://github.com/webventures/bot_verification
|
|
107
|
+
changelog_uri: https://github.com/webventures/bot_verification/blob/main/CHANGELOG.md
|
|
108
|
+
post_install_message:
|
|
109
|
+
rdoc_options: []
|
|
110
|
+
require_paths:
|
|
111
|
+
- lib
|
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
113
|
+
requirements:
|
|
114
|
+
- - ">="
|
|
115
|
+
- !ruby/object:Gem::Version
|
|
116
|
+
version: 3.2.0
|
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
|
+
requirements:
|
|
119
|
+
- - ">="
|
|
120
|
+
- !ruby/object:Gem::Version
|
|
121
|
+
version: '0'
|
|
122
|
+
requirements: []
|
|
123
|
+
rubygems_version: 3.5.23
|
|
124
|
+
signing_key:
|
|
125
|
+
specification_version: 4
|
|
126
|
+
summary: Verify legitimate search engine and AI bots by IP
|
|
127
|
+
test_files: []
|