legitbot 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/Gemfile +2 -0
- data/Rakefile +5 -3
- data/legitbot.gemspec +4 -3
- data/lib/legitbot/ahrefs.rb +13 -8
- data/lib/legitbot/apple.rb +11 -11
- data/lib/legitbot/baidu.rb +5 -7
- data/lib/legitbot/bing.rb +5 -7
- data/lib/legitbot/botmatch.rb +17 -44
- data/lib/legitbot/config/resolver.rb +18 -0
- data/lib/legitbot/duckduckgo.rb +15 -7
- data/lib/legitbot/facebook.rb +8 -34
- data/lib/legitbot/google.rb +5 -8
- data/lib/legitbot/legitbot.rb +14 -9
- data/lib/legitbot/pinterest.rb +5 -8
- data/lib/legitbot/validators/domains.rb +71 -0
- data/lib/legitbot/validators/ip_ranges.rb +81 -0
- data/lib/legitbot/version.rb +3 -1
- data/lib/legitbot/yandex.rb +28 -12
- data/lib/legitbot.rb +2 -0
- data/test/ahrefs_test.rb +16 -8
- data/test/apple_as_google_test.rb +9 -4
- data/test/apple_test.rb +11 -4
- data/test/botmatch_test.rb +4 -22
- data/test/facebook_test.rb +24 -9
- data/test/google_test.rb +24 -14
- data/test/legitbot/validators/domains_test.rb +58 -0
- data/test/legitbot/validators/ip_ranges_test.rb +113 -0
- data/test/legitbot_test.rb +8 -4
- data/test/pinterest_test.rb +26 -14
- metadata +30 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dfc1b3322ff4f85957dabf6790d535f27f99feed47bdb4ff1bba65f6242d31a2
|
4
|
+
data.tar.gz: 32e842cc3d297b3afda0ef9b265121a11c363857a726819f886b441e6c53a53c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3654c256da13b37045425457a96ac9a8b41c5ae5c0cce49b7898170e2d23a66a5cb7e612503dc1d88dc2e1240dcb07c9ccc5d5aa8439f280144abe969dc0ae7b
|
7
|
+
data.tar.gz: 554e120d1001a71f455aedcd4d30397b22130279a6f99f8e022d40ade50427590dd52e982820399852ec52fbb2c96b7ea1461f82d8e88cb89acc053097136b32
|
data/.rubocop.yml
ADDED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1,14 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'bundler'
|
3
5
|
require 'bump/tasks'
|
4
|
-
require
|
6
|
+
require 'rake/testtask'
|
5
7
|
Bundler::GemHelper.install_tasks
|
6
8
|
|
7
9
|
Bump.tag_by_default = true
|
8
10
|
|
9
11
|
Rake::TestTask.new do |t|
|
10
|
-
t.libs <<
|
11
|
-
t.test_files = FileList['test
|
12
|
+
t.libs << 'test'
|
13
|
+
t.test_files = FileList['test/**/*_test.rb']
|
12
14
|
t.warning = true
|
13
15
|
t.verbose = true
|
14
16
|
end
|
data/legitbot.gemspec
CHANGED
@@ -17,9 +17,10 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.required_ruby_version = '>= 2.3.0'
|
18
18
|
spec.add_dependency "irrc", ">= 0.2.1"
|
19
19
|
spec.add_dependency "augmented_interval_tree", ">= 0.1.1"
|
20
|
-
spec.add_development_dependency "bump"
|
21
|
-
spec.add_development_dependency "rake"
|
22
|
-
spec.add_development_dependency "
|
20
|
+
spec.add_development_dependency "bump", '>= 0.8.0'
|
21
|
+
spec.add_development_dependency "rake", '>= 12.3.0'
|
22
|
+
spec.add_development_dependency "rubocop", '>= 0.74.0'
|
23
|
+
spec.add_development_dependency "minitest", '>= 5.1.0'
|
23
24
|
|
24
25
|
spec.files = `git ls-files`.split($/)
|
25
26
|
spec.rdoc_options = ["--charset=UTF-8"]
|
data/lib/legitbot/ahrefs.rb
CHANGED
@@ -1,13 +1,18 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://ahrefs.com/robot
|
3
5
|
class Ahrefs < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
ip_ranges %w[
|
7
|
+
54.36.148.0/24
|
8
|
+
54.36.149.0/24
|
9
|
+
54.36.150.0/24
|
10
|
+
195.154.122.0/24
|
11
|
+
195.154.123.0/24
|
12
|
+
195.154.126.0/24
|
13
|
+
195.154.127.0/24
|
14
|
+
]
|
10
15
|
end
|
11
16
|
|
12
|
-
rule Legitbot::Ahrefs, %w
|
17
|
+
rule Legitbot::Ahrefs, %w[AhrefsBot]
|
13
18
|
end
|
data/lib/legitbot/apple.rb
CHANGED
@@ -1,20 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'ipaddr'
|
2
4
|
|
3
|
-
module Legitbot
|
5
|
+
module Legitbot # :nodoc:
|
4
6
|
# https://support.apple.com/en-us/HT204683
|
5
|
-
|
6
7
|
class Apple < BotMatch
|
7
|
-
|
8
|
-
|
9
|
-
def valid?
|
10
|
-
ip = IPAddr.new @ip
|
11
|
-
Range.include? ip
|
12
|
-
end
|
8
|
+
ip_ranges '17.0.0.0/8'
|
13
9
|
end
|
14
10
|
|
15
|
-
|
11
|
+
# https://support.apple.com/en-us/HT204683
|
12
|
+
# rubocop:disable Naming/ClassAndModuleCamelCase
|
13
|
+
class Apple_as_Google < BotMatch
|
14
|
+
ip_ranges '17.0.0.0/8'
|
16
15
|
end
|
16
|
+
# rubocop:enable Naming/ClassAndModuleCamelCase
|
17
17
|
|
18
|
-
rule Legitbot::Apple, %w
|
19
|
-
rule Legitbot::Apple_as_Google, %w
|
18
|
+
rule Legitbot::Apple, %w[Applebot]
|
19
|
+
rule Legitbot::Apple_as_Google, %w[Googlebot]
|
20
20
|
end
|
data/lib/legitbot/baidu.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# http://help.baidu.com/question?prod_en=master&class=498&id=1000973
|
3
5
|
class Baidu < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
def valid?
|
7
|
-
subdomain_of?(*Baidu::ValidDomains)
|
8
|
-
end
|
6
|
+
domains 'baidu.com.', 'baidu.jp.', reverse: false
|
9
7
|
end
|
10
8
|
|
11
|
-
rule Legitbot::Baidu, %w
|
9
|
+
rule Legitbot::Baidu, %w[Baiduspider]
|
12
10
|
end
|
data/lib/legitbot/bing.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/
|
3
5
|
class Bing < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
def valid?
|
7
|
-
subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
|
8
|
-
end
|
6
|
+
domains 'search.msn.com.'
|
9
7
|
end
|
10
8
|
|
11
|
-
rule Legitbot::Bing, %w
|
9
|
+
rule Legitbot::Bing, %w[Bingbot bingbot]
|
12
10
|
end
|
data/lib/legitbot/botmatch.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'config/resolver'
|
4
|
+
require_relative 'validators/domains'
|
5
|
+
require_relative 'validators/ip_ranges'
|
3
6
|
|
4
7
|
module Legitbot
|
5
8
|
##
|
@@ -7,61 +10,31 @@ module Legitbot
|
|
7
10
|
# +valid?+, +fake?+ and +detected_as+
|
8
11
|
#
|
9
12
|
class BotMatch
|
10
|
-
|
11
|
-
|
12
|
-
@ip = ip
|
13
|
-
end
|
14
|
-
|
15
|
-
##
|
16
|
-
# Returns a Resolv::DNS::Name instance with
|
17
|
-
# the reverse name
|
18
|
-
def reverse_domain
|
19
|
-
@reverse_domain ||= @dns.getname(@ip)
|
20
|
-
rescue Resolv::ResolvError
|
21
|
-
@reverse_domain ||= nil
|
22
|
-
end
|
23
|
-
|
24
|
-
##
|
25
|
-
# Returns a String with the reverse name
|
26
|
-
def reverse_name
|
27
|
-
reverse_domain&.to_s
|
28
|
-
end
|
29
|
-
|
30
|
-
##
|
31
|
-
# Returns a String with IP created from the reverse name
|
32
|
-
def reversed_ip
|
33
|
-
return nil if reverse_name.nil?
|
13
|
+
include Legitbot::Validators::IpRanges
|
14
|
+
include Legitbot::Validators::Domains
|
34
15
|
|
35
|
-
|
36
|
-
@
|
37
|
-
end
|
38
|
-
|
39
|
-
def reverse_resolves?
|
40
|
-
@ip == reversed_ip
|
41
|
-
end
|
42
|
-
|
43
|
-
def subdomain_of?(*domains)
|
44
|
-
return false if reverse_name.nil?
|
45
|
-
|
46
|
-
domains.any? { |d|
|
47
|
-
reverse_domain.subdomain_of? Resolv::DNS::Name.create(d)
|
48
|
-
}
|
16
|
+
def initialize(ip)
|
17
|
+
@ip = ip
|
49
18
|
end
|
50
19
|
|
51
20
|
def detected_as
|
52
21
|
self.class.name.split('::').last.downcase.to_sym
|
53
22
|
end
|
54
23
|
|
24
|
+
def valid?
|
25
|
+
valid_ip? && valid_domain?
|
26
|
+
end
|
27
|
+
|
55
28
|
def fake?
|
56
29
|
!valid?
|
57
30
|
end
|
58
31
|
|
59
|
-
def self.valid?(ip
|
60
|
-
|
32
|
+
def self.valid?(ip)
|
33
|
+
new(ip).valid?
|
61
34
|
end
|
62
35
|
|
63
|
-
def self.fake?(ip
|
64
|
-
|
36
|
+
def self.fake?(ip)
|
37
|
+
new(ip).fake?
|
65
38
|
end
|
66
39
|
end
|
67
40
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'resolv'
|
4
|
+
|
5
|
+
module Legitbot
|
6
|
+
module Config
|
7
|
+
module Resolver # :nodoc:
|
8
|
+
def resolver_config(options = nil)
|
9
|
+
@resolver_config = options
|
10
|
+
end
|
11
|
+
|
12
|
+
def resolver
|
13
|
+
@resolver_config ||= Legitbot.resolver_config
|
14
|
+
@resolver ||= Resolv::DNS.new @resolver_config
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -1,12 +1,20 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://duckduckgo.com/duckduckbot
|
3
5
|
class DuckDuckGo < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
ip_ranges %w[
|
7
|
+
50.16.241.113
|
8
|
+
50.16.241.114
|
9
|
+
50.16.241.117
|
10
|
+
50.16.247.234
|
11
|
+
52.204.97.54
|
12
|
+
52.5.190.19
|
13
|
+
54.197.234.188
|
14
|
+
54.208.100.253
|
15
|
+
23.21.227.69
|
16
|
+
]
|
9
17
|
end
|
10
18
|
|
11
|
-
rule Legitbot::DuckDuckGo, %w
|
19
|
+
rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
|
12
20
|
end
|
data/lib/legitbot/facebook.rb
CHANGED
@@ -1,48 +1,22 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'irrc'
|
3
|
-
require 'interval_tree'
|
4
4
|
|
5
|
-
module Legitbot
|
5
|
+
module Legitbot # :nodoc:
|
6
6
|
# https://developers.facebook.com/docs/sharing/webmasters/crawler
|
7
|
-
|
8
7
|
class Facebook < BotMatch
|
9
8
|
AS = 'AS32934'
|
10
9
|
|
11
|
-
|
12
|
-
ip = IPAddr.new(@ip)
|
13
|
-
Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0
|
14
|
-
end
|
15
|
-
|
16
|
-
@mutex = Mutex.new
|
17
|
-
|
18
|
-
def self.valid_ips
|
19
|
-
@mutex.synchronize { @ips ||= load_ips }
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.reload!
|
23
|
-
@mutex.synchronize { @ips = load_ips }
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.load_ips
|
27
|
-
whois.map do |(family, records)|
|
28
|
-
ranges = records.map do |cidr|
|
29
|
-
range = IPAddr.new(cidr).to_range
|
30
|
-
(range.begin.to_i..range.end.to_i)
|
31
|
-
end
|
32
|
-
[family, IntervalTree::Tree.new(ranges)]
|
33
|
-
end.to_h
|
34
|
-
end
|
35
|
-
|
36
|
-
def self.whois
|
10
|
+
ip_ranges do
|
37
11
|
client = Irrc::Client.new
|
38
12
|
client.query :radb, AS
|
39
13
|
results = client.perform
|
40
14
|
|
41
|
-
%i
|
42
|
-
|
43
|
-
end.
|
15
|
+
%i[ipv4 ipv6].map do |family|
|
16
|
+
results[AS][family][AS]
|
17
|
+
end.flatten
|
44
18
|
end
|
45
19
|
end
|
46
20
|
|
47
|
-
rule Legitbot::Facebook, %w
|
21
|
+
rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
|
48
22
|
end
|
data/lib/legitbot/google.rb
CHANGED
@@ -1,14 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://support.google.com/webmasters/answer/1061943
|
3
5
|
# https://support.google.com/webmasters/answer/80553
|
4
|
-
|
5
6
|
class Google < BotMatch
|
6
|
-
|
7
|
-
|
8
|
-
def valid?
|
9
|
-
subdomain_of?(*Google::ValidDomains) && reverse_resolves?
|
10
|
-
end
|
7
|
+
domains 'google.com.', 'googlebot.com.'
|
11
8
|
end
|
12
9
|
|
13
|
-
rule Legitbot::Google, %w
|
10
|
+
rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
|
14
11
|
end
|
data/lib/legitbot/legitbot.rb
CHANGED
@@ -1,6 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
##
|
4
|
+
# Bot lookup based on user agent
|
1
5
|
module Legitbot
|
2
6
|
@rules = []
|
3
7
|
|
8
|
+
class << self
|
9
|
+
attr_accessor :resolver_config
|
10
|
+
end
|
11
|
+
|
4
12
|
##
|
5
13
|
# Lookup a bot based on its signature from +User-Agent+ header.
|
6
14
|
#
|
@@ -10,15 +18,12 @@ module Legitbot
|
|
10
18
|
# otherwise.
|
11
19
|
# :yields: a found bot
|
12
20
|
#
|
13
|
-
def self.bot(
|
14
|
-
bots =
|
15
|
-
|
16
|
-
|
17
|
-
}.map { |rule|
|
18
|
-
rule[:class].new(ip, resolver_config)
|
19
|
-
}
|
21
|
+
def self.bot(user_agent, ip)
|
22
|
+
bots = @rules
|
23
|
+
.select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
|
24
|
+
.map { |rule| rule[:class].new(ip) }
|
20
25
|
|
21
|
-
selected = bots.select
|
26
|
+
selected = bots.select(&:valid?).first if bots.size > 1
|
22
27
|
selected = bots.last if selected.nil?
|
23
28
|
|
24
29
|
if selected && block_given?
|
@@ -29,6 +34,6 @@ module Legitbot
|
|
29
34
|
end
|
30
35
|
|
31
36
|
def self.rule(clazz, fragments)
|
32
|
-
@rules << {:
|
37
|
+
@rules << { class: clazz, fragments: fragments }
|
33
38
|
end
|
34
39
|
end
|
data/lib/legitbot/pinterest.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
2
|
-
# https://help.pinterest.com/en/articles/about-pinterest-crawler-0
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://help.pinterest.com/en/articles/about-pinterest-crawler-0
|
4
5
|
class Pinterest < BotMatch
|
5
|
-
|
6
|
-
|
7
|
-
def valid?
|
8
|
-
subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
|
9
|
-
end
|
6
|
+
domains 'pinterest.com.'
|
10
7
|
end
|
11
8
|
|
12
|
-
rule Legitbot::Pinterest, %w
|
9
|
+
rule Legitbot::Pinterest, %w[Pinterestbot Pinterest/0.2]
|
13
10
|
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'resolv'
|
4
|
+
require 'ipaddr'
|
5
|
+
|
6
|
+
module Legitbot
|
7
|
+
module Validators
|
8
|
+
#
|
9
|
+
# In a bot matcher:
|
10
|
+
# `domains 'search.msn.com', ...`
|
11
|
+
# `domains 'googlebot.com', reverse: false`
|
12
|
+
#
|
13
|
+
# `reverse` is true by default.
|
14
|
+
module Domains
|
15
|
+
class << self
|
16
|
+
def included(base)
|
17
|
+
base.extend ClassMethods
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def valid_domain?
|
22
|
+
self.class.valid_domain?(@ip)
|
23
|
+
end
|
24
|
+
|
25
|
+
module ClassMethods # :nodoc:
|
26
|
+
include Legitbot::Config::Resolver
|
27
|
+
|
28
|
+
def domains(*list, reverse: true)
|
29
|
+
@valid_domains = list.flatten.map { |d| Resolv::DNS::Name.create(d) }
|
30
|
+
@validate_reverse_record = reverse
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_domains?
|
34
|
+
instance_variable_defined?(:@valid_domains)
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid_domain?(ip)
|
38
|
+
return true unless check_domains?
|
39
|
+
return true if @valid_domains.empty?
|
40
|
+
|
41
|
+
domains = reverse_domains(ip)
|
42
|
+
return false if domains.empty?
|
43
|
+
|
44
|
+
record = find_subdomain_record(domains)
|
45
|
+
return false unless record
|
46
|
+
return true unless @validate_reverse_record
|
47
|
+
|
48
|
+
ip == reverse_ip(record)
|
49
|
+
end
|
50
|
+
|
51
|
+
def reverse_domains(ip)
|
52
|
+
resolver.getnames(ip)
|
53
|
+
rescue Resolv::ResolvError
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
|
57
|
+
def find_subdomain_record(domains)
|
58
|
+
domains.find do |d|
|
59
|
+
@valid_domains.any? { |vd| d.subdomain_of?(vd) }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def reverse_ip(record)
|
64
|
+
return nil if record.nil?
|
65
|
+
|
66
|
+
resolver.getaddress(record.to_s).to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ipaddr'
|
4
|
+
require 'interval_tree'
|
5
|
+
|
6
|
+
module Legitbot
|
7
|
+
module Validators
|
8
|
+
#
|
9
|
+
# In a bot matcher:
|
10
|
+
# `ip_ranges ip, range, ip, ...`
|
11
|
+
# `ip_ranges do [ip, range, ...]; end`
|
12
|
+
module IpRanges
|
13
|
+
class << self
|
14
|
+
def included(base)
|
15
|
+
base.extend ClassMethods
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def valid_ip?
|
20
|
+
self.class.valid_ip?(@ip)
|
21
|
+
end
|
22
|
+
|
23
|
+
module ClassMethods # :nodoc:
|
24
|
+
FAMILIES = %i[ipv4 ipv6].freeze
|
25
|
+
EMPTY_GENERATOR = proc { [] }
|
26
|
+
|
27
|
+
def ip_ranges(*ips, &block)
|
28
|
+
@ip_ranges = partition_ips(ips.flatten) unless ips.empty?
|
29
|
+
@ip_ranges_loader = block_given? ? block : EMPTY_GENERATOR
|
30
|
+
@ip_loader_mutex = Mutex.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_ranges?
|
34
|
+
instance_variable_defined?(:@ip_ranges_loader)
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid_ip?(ip)
|
38
|
+
return true unless check_ranges?
|
39
|
+
return true if valid_ips.empty?
|
40
|
+
|
41
|
+
obj = IPAddr.new(ip)
|
42
|
+
ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
|
43
|
+
!ranges.empty?
|
44
|
+
end
|
45
|
+
|
46
|
+
def valid_ips
|
47
|
+
@ip_loader_mutex.synchronize do
|
48
|
+
@ip_ranges ||= load_ips
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def reload_ips
|
53
|
+
@ip_loader_mutex.synchronize do
|
54
|
+
@ip_ranges = load_ips
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def load_ips
|
59
|
+
partition_ips(@ip_ranges_loader.call)
|
60
|
+
end
|
61
|
+
|
62
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
63
|
+
def partition_ips(ips)
|
64
|
+
return [] if ips.empty?
|
65
|
+
|
66
|
+
ips
|
67
|
+
.map { |cidr| IPAddr.new(cidr) }
|
68
|
+
.partition(&:ipv4?)
|
69
|
+
.each_with_index
|
70
|
+
.map do |list, index|
|
71
|
+
ranges = list.map(&:to_range).map do |r|
|
72
|
+
(r.begin.to_i..r.end.to_i)
|
73
|
+
end
|
74
|
+
[FAMILIES[index], IntervalTree::Tree.new(ranges)]
|
75
|
+
end.to_h
|
76
|
+
end
|
77
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot/yandex.rb
CHANGED
@@ -1,17 +1,33 @@
|
|
1
|
-
|
2
|
-
# https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
|
4
5
|
class Yandex < BotMatch
|
5
|
-
|
6
|
-
|
7
|
-
def valid?
|
8
|
-
subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
|
9
|
-
end
|
6
|
+
domains 'yandex.ru.', 'yandex.net.', 'yandex.com.'
|
10
7
|
end
|
11
8
|
|
12
|
-
rule Legitbot::Yandex, %w
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
9
|
+
rule Legitbot::Yandex, %w[
|
10
|
+
YandexBot
|
11
|
+
YandexAccessibilityBot
|
12
|
+
YandexMobileBot
|
13
|
+
YandexDirectDyn
|
14
|
+
YandexScreenshotBot
|
15
|
+
YandexImages
|
16
|
+
YandexVideo
|
17
|
+
YandexVideoParser
|
18
|
+
YandexMedia
|
19
|
+
YandexBlogs
|
20
|
+
YandexFavicons
|
21
|
+
YandexWebmaster
|
22
|
+
YandexPagechecker
|
23
|
+
YandexImageResizer
|
24
|
+
YaDirectFetcher
|
25
|
+
YandexCalendar
|
26
|
+
YandexSitelinks
|
27
|
+
YandexMetrika
|
28
|
+
YandexNews
|
29
|
+
YandexVertis
|
30
|
+
YandexSearchShop
|
31
|
+
YandexVerticals
|
32
|
+
]
|
17
33
|
end
|