legitbot 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/Gemfile +2 -0
- data/Rakefile +5 -3
- data/legitbot.gemspec +4 -3
- data/lib/legitbot/ahrefs.rb +13 -8
- data/lib/legitbot/apple.rb +11 -11
- data/lib/legitbot/baidu.rb +5 -7
- data/lib/legitbot/bing.rb +5 -7
- data/lib/legitbot/botmatch.rb +17 -44
- data/lib/legitbot/config/resolver.rb +18 -0
- data/lib/legitbot/duckduckgo.rb +15 -7
- data/lib/legitbot/facebook.rb +8 -34
- data/lib/legitbot/google.rb +5 -8
- data/lib/legitbot/legitbot.rb +14 -9
- data/lib/legitbot/pinterest.rb +5 -8
- data/lib/legitbot/validators/domains.rb +71 -0
- data/lib/legitbot/validators/ip_ranges.rb +81 -0
- data/lib/legitbot/version.rb +3 -1
- data/lib/legitbot/yandex.rb +28 -12
- data/lib/legitbot.rb +2 -0
- data/test/ahrefs_test.rb +16 -8
- data/test/apple_as_google_test.rb +9 -4
- data/test/apple_test.rb +11 -4
- data/test/botmatch_test.rb +4 -22
- data/test/facebook_test.rb +24 -9
- data/test/google_test.rb +24 -14
- data/test/legitbot/validators/domains_test.rb +58 -0
- data/test/legitbot/validators/ip_ranges_test.rb +113 -0
- data/test/legitbot_test.rb +8 -4
- data/test/pinterest_test.rb +26 -14
- metadata +30 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dfc1b3322ff4f85957dabf6790d535f27f99feed47bdb4ff1bba65f6242d31a2
|
4
|
+
data.tar.gz: 32e842cc3d297b3afda0ef9b265121a11c363857a726819f886b441e6c53a53c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3654c256da13b37045425457a96ac9a8b41c5ae5c0cce49b7898170e2d23a66a5cb7e612503dc1d88dc2e1240dcb07c9ccc5d5aa8439f280144abe969dc0ae7b
|
7
|
+
data.tar.gz: 554e120d1001a71f455aedcd4d30397b22130279a6f99f8e022d40ade50427590dd52e982820399852ec52fbb2c96b7ea1461f82d8e88cb89acc053097136b32
|
data/.rubocop.yml
ADDED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
@@ -1,14 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'bundler'
|
3
5
|
require 'bump/tasks'
|
4
|
-
require
|
6
|
+
require 'rake/testtask'
|
5
7
|
Bundler::GemHelper.install_tasks
|
6
8
|
|
7
9
|
Bump.tag_by_default = true
|
8
10
|
|
9
11
|
Rake::TestTask.new do |t|
|
10
|
-
t.libs <<
|
11
|
-
t.test_files = FileList['test
|
12
|
+
t.libs << 'test'
|
13
|
+
t.test_files = FileList['test/**/*_test.rb']
|
12
14
|
t.warning = true
|
13
15
|
t.verbose = true
|
14
16
|
end
|
data/legitbot.gemspec
CHANGED
@@ -17,9 +17,10 @@ Gem::Specification.new do |spec|
|
|
17
17
|
spec.required_ruby_version = '>= 2.3.0'
|
18
18
|
spec.add_dependency "irrc", ">= 0.2.1"
|
19
19
|
spec.add_dependency "augmented_interval_tree", ">= 0.1.1"
|
20
|
-
spec.add_development_dependency "bump"
|
21
|
-
spec.add_development_dependency "rake"
|
22
|
-
spec.add_development_dependency "
|
20
|
+
spec.add_development_dependency "bump", '>= 0.8.0'
|
21
|
+
spec.add_development_dependency "rake", '>= 12.3.0'
|
22
|
+
spec.add_development_dependency "rubocop", '>= 0.74.0'
|
23
|
+
spec.add_development_dependency "minitest", '>= 5.1.0'
|
23
24
|
|
24
25
|
spec.files = `git ls-files`.split($/)
|
25
26
|
spec.rdoc_options = ["--charset=UTF-8"]
|
data/lib/legitbot/ahrefs.rb
CHANGED
@@ -1,13 +1,18 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://ahrefs.com/robot
|
3
5
|
class Ahrefs < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
ip_ranges %w[
|
7
|
+
54.36.148.0/24
|
8
|
+
54.36.149.0/24
|
9
|
+
54.36.150.0/24
|
10
|
+
195.154.122.0/24
|
11
|
+
195.154.123.0/24
|
12
|
+
195.154.126.0/24
|
13
|
+
195.154.127.0/24
|
14
|
+
]
|
10
15
|
end
|
11
16
|
|
12
|
-
rule Legitbot::Ahrefs, %w
|
17
|
+
rule Legitbot::Ahrefs, %w[AhrefsBot]
|
13
18
|
end
|
data/lib/legitbot/apple.rb
CHANGED
@@ -1,20 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'ipaddr'
|
2
4
|
|
3
|
-
module Legitbot
|
5
|
+
module Legitbot # :nodoc:
|
4
6
|
# https://support.apple.com/en-us/HT204683
|
5
|
-
|
6
7
|
class Apple < BotMatch
|
7
|
-
|
8
|
-
|
9
|
-
def valid?
|
10
|
-
ip = IPAddr.new @ip
|
11
|
-
Range.include? ip
|
12
|
-
end
|
8
|
+
ip_ranges '17.0.0.0/8'
|
13
9
|
end
|
14
10
|
|
15
|
-
|
11
|
+
# https://support.apple.com/en-us/HT204683
|
12
|
+
# rubocop:disable Naming/ClassAndModuleCamelCase
|
13
|
+
class Apple_as_Google < BotMatch
|
14
|
+
ip_ranges '17.0.0.0/8'
|
16
15
|
end
|
16
|
+
# rubocop:enable Naming/ClassAndModuleCamelCase
|
17
17
|
|
18
|
-
rule Legitbot::Apple, %w
|
19
|
-
rule Legitbot::Apple_as_Google, %w
|
18
|
+
rule Legitbot::Apple, %w[Applebot]
|
19
|
+
rule Legitbot::Apple_as_Google, %w[Googlebot]
|
20
20
|
end
|
data/lib/legitbot/baidu.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# http://help.baidu.com/question?prod_en=master&class=498&id=1000973
|
3
5
|
class Baidu < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
def valid?
|
7
|
-
subdomain_of?(*Baidu::ValidDomains)
|
8
|
-
end
|
6
|
+
domains 'baidu.com.', 'baidu.jp.', reverse: false
|
9
7
|
end
|
10
8
|
|
11
|
-
rule Legitbot::Baidu, %w
|
9
|
+
rule Legitbot::Baidu, %w[Baiduspider]
|
12
10
|
end
|
data/lib/legitbot/bing.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/
|
3
5
|
class Bing < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
def valid?
|
7
|
-
subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
|
8
|
-
end
|
6
|
+
domains 'search.msn.com.'
|
9
7
|
end
|
10
8
|
|
11
|
-
rule Legitbot::Bing, %w
|
9
|
+
rule Legitbot::Bing, %w[Bingbot bingbot]
|
12
10
|
end
|
data/lib/legitbot/botmatch.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'config/resolver'
|
4
|
+
require_relative 'validators/domains'
|
5
|
+
require_relative 'validators/ip_ranges'
|
3
6
|
|
4
7
|
module Legitbot
|
5
8
|
##
|
@@ -7,61 +10,31 @@ module Legitbot
|
|
7
10
|
# +valid?+, +fake?+ and +detected_as+
|
8
11
|
#
|
9
12
|
class BotMatch
|
10
|
-
|
11
|
-
|
12
|
-
@ip = ip
|
13
|
-
end
|
14
|
-
|
15
|
-
##
|
16
|
-
# Returns a Resolv::DNS::Name instance with
|
17
|
-
# the reverse name
|
18
|
-
def reverse_domain
|
19
|
-
@reverse_domain ||= @dns.getname(@ip)
|
20
|
-
rescue Resolv::ResolvError
|
21
|
-
@reverse_domain ||= nil
|
22
|
-
end
|
23
|
-
|
24
|
-
##
|
25
|
-
# Returns a String with the reverse name
|
26
|
-
def reverse_name
|
27
|
-
reverse_domain&.to_s
|
28
|
-
end
|
29
|
-
|
30
|
-
##
|
31
|
-
# Returns a String with IP created from the reverse name
|
32
|
-
def reversed_ip
|
33
|
-
return nil if reverse_name.nil?
|
13
|
+
include Legitbot::Validators::IpRanges
|
14
|
+
include Legitbot::Validators::Domains
|
34
15
|
|
35
|
-
|
36
|
-
@
|
37
|
-
end
|
38
|
-
|
39
|
-
def reverse_resolves?
|
40
|
-
@ip == reversed_ip
|
41
|
-
end
|
42
|
-
|
43
|
-
def subdomain_of?(*domains)
|
44
|
-
return false if reverse_name.nil?
|
45
|
-
|
46
|
-
domains.any? { |d|
|
47
|
-
reverse_domain.subdomain_of? Resolv::DNS::Name.create(d)
|
48
|
-
}
|
16
|
+
def initialize(ip)
|
17
|
+
@ip = ip
|
49
18
|
end
|
50
19
|
|
51
20
|
def detected_as
|
52
21
|
self.class.name.split('::').last.downcase.to_sym
|
53
22
|
end
|
54
23
|
|
24
|
+
def valid?
|
25
|
+
valid_ip? && valid_domain?
|
26
|
+
end
|
27
|
+
|
55
28
|
def fake?
|
56
29
|
!valid?
|
57
30
|
end
|
58
31
|
|
59
|
-
def self.valid?(ip
|
60
|
-
|
32
|
+
def self.valid?(ip)
|
33
|
+
new(ip).valid?
|
61
34
|
end
|
62
35
|
|
63
|
-
def self.fake?(ip
|
64
|
-
|
36
|
+
def self.fake?(ip)
|
37
|
+
new(ip).fake?
|
65
38
|
end
|
66
39
|
end
|
67
40
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'resolv'
|
4
|
+
|
5
|
+
module Legitbot
|
6
|
+
module Config
|
7
|
+
module Resolver # :nodoc:
|
8
|
+
def resolver_config(options = nil)
|
9
|
+
@resolver_config = options
|
10
|
+
end
|
11
|
+
|
12
|
+
def resolver
|
13
|
+
@resolver_config ||= Legitbot.resolver_config
|
14
|
+
@resolver ||= Resolv::DNS.new @resolver_config
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/legitbot/duckduckgo.rb
CHANGED
@@ -1,12 +1,20 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://duckduckgo.com/duckduckbot
|
3
5
|
class DuckDuckGo < BotMatch
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
ip_ranges %w[
|
7
|
+
50.16.241.113
|
8
|
+
50.16.241.114
|
9
|
+
50.16.241.117
|
10
|
+
50.16.247.234
|
11
|
+
52.204.97.54
|
12
|
+
52.5.190.19
|
13
|
+
54.197.234.188
|
14
|
+
54.208.100.253
|
15
|
+
23.21.227.69
|
16
|
+
]
|
9
17
|
end
|
10
18
|
|
11
|
-
rule Legitbot::DuckDuckGo, %w
|
19
|
+
rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
|
12
20
|
end
|
data/lib/legitbot/facebook.rb
CHANGED
@@ -1,48 +1,22 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'irrc'
|
3
|
-
require 'interval_tree'
|
4
4
|
|
5
|
-
module Legitbot
|
5
|
+
module Legitbot # :nodoc:
|
6
6
|
# https://developers.facebook.com/docs/sharing/webmasters/crawler
|
7
|
-
|
8
7
|
class Facebook < BotMatch
|
9
8
|
AS = 'AS32934'
|
10
9
|
|
11
|
-
|
12
|
-
ip = IPAddr.new(@ip)
|
13
|
-
Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0
|
14
|
-
end
|
15
|
-
|
16
|
-
@mutex = Mutex.new
|
17
|
-
|
18
|
-
def self.valid_ips
|
19
|
-
@mutex.synchronize { @ips ||= load_ips }
|
20
|
-
end
|
21
|
-
|
22
|
-
def self.reload!
|
23
|
-
@mutex.synchronize { @ips = load_ips }
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.load_ips
|
27
|
-
whois.map do |(family, records)|
|
28
|
-
ranges = records.map do |cidr|
|
29
|
-
range = IPAddr.new(cidr).to_range
|
30
|
-
(range.begin.to_i..range.end.to_i)
|
31
|
-
end
|
32
|
-
[family, IntervalTree::Tree.new(ranges)]
|
33
|
-
end.to_h
|
34
|
-
end
|
35
|
-
|
36
|
-
def self.whois
|
10
|
+
ip_ranges do
|
37
11
|
client = Irrc::Client.new
|
38
12
|
client.query :radb, AS
|
39
13
|
results = client.perform
|
40
14
|
|
41
|
-
%i
|
42
|
-
|
43
|
-
end.
|
15
|
+
%i[ipv4 ipv6].map do |family|
|
16
|
+
results[AS][family][AS]
|
17
|
+
end.flatten
|
44
18
|
end
|
45
19
|
end
|
46
20
|
|
47
|
-
rule Legitbot::Facebook, %w
|
21
|
+
rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
|
48
22
|
end
|
data/lib/legitbot/google.rb
CHANGED
@@ -1,14 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Legitbot # :nodoc:
|
2
4
|
# https://support.google.com/webmasters/answer/1061943
|
3
5
|
# https://support.google.com/webmasters/answer/80553
|
4
|
-
|
5
6
|
class Google < BotMatch
|
6
|
-
|
7
|
-
|
8
|
-
def valid?
|
9
|
-
subdomain_of?(*Google::ValidDomains) && reverse_resolves?
|
10
|
-
end
|
7
|
+
domains 'google.com.', 'googlebot.com.'
|
11
8
|
end
|
12
9
|
|
13
|
-
rule Legitbot::Google, %w
|
10
|
+
rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
|
14
11
|
end
|
data/lib/legitbot/legitbot.rb
CHANGED
@@ -1,6 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
##
|
4
|
+
# Bot lookup based on user agent
|
1
5
|
module Legitbot
|
2
6
|
@rules = []
|
3
7
|
|
8
|
+
class << self
|
9
|
+
attr_accessor :resolver_config
|
10
|
+
end
|
11
|
+
|
4
12
|
##
|
5
13
|
# Lookup a bot based on its signature from +User-Agent+ header.
|
6
14
|
#
|
@@ -10,15 +18,12 @@ module Legitbot
|
|
10
18
|
# otherwise.
|
11
19
|
# :yields: a found bot
|
12
20
|
#
|
13
|
-
def self.bot(
|
14
|
-
bots =
|
15
|
-
|
16
|
-
|
17
|
-
}.map { |rule|
|
18
|
-
rule[:class].new(ip, resolver_config)
|
19
|
-
}
|
21
|
+
def self.bot(user_agent, ip)
|
22
|
+
bots = @rules
|
23
|
+
.select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
|
24
|
+
.map { |rule| rule[:class].new(ip) }
|
20
25
|
|
21
|
-
selected = bots.select
|
26
|
+
selected = bots.select(&:valid?).first if bots.size > 1
|
22
27
|
selected = bots.last if selected.nil?
|
23
28
|
|
24
29
|
if selected && block_given?
|
@@ -29,6 +34,6 @@ module Legitbot
|
|
29
34
|
end
|
30
35
|
|
31
36
|
def self.rule(clazz, fragments)
|
32
|
-
@rules << {:
|
37
|
+
@rules << { class: clazz, fragments: fragments }
|
33
38
|
end
|
34
39
|
end
|
data/lib/legitbot/pinterest.rb
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
|
2
|
-
# https://help.pinterest.com/en/articles/about-pinterest-crawler-0
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://help.pinterest.com/en/articles/about-pinterest-crawler-0
|
4
5
|
class Pinterest < BotMatch
|
5
|
-
|
6
|
-
|
7
|
-
def valid?
|
8
|
-
subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
|
9
|
-
end
|
6
|
+
domains 'pinterest.com.'
|
10
7
|
end
|
11
8
|
|
12
|
-
rule Legitbot::Pinterest, %w
|
9
|
+
rule Legitbot::Pinterest, %w[Pinterestbot Pinterest/0.2]
|
13
10
|
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'resolv'
|
4
|
+
require 'ipaddr'
|
5
|
+
|
6
|
+
module Legitbot
|
7
|
+
module Validators
|
8
|
+
#
|
9
|
+
# In a bot matcher:
|
10
|
+
# `domains 'search.msn.com', ...`
|
11
|
+
# `domains 'googlebot.com', reverse: false`
|
12
|
+
#
|
13
|
+
# `reverse` is true by default.
|
14
|
+
module Domains
|
15
|
+
class << self
|
16
|
+
def included(base)
|
17
|
+
base.extend ClassMethods
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def valid_domain?
|
22
|
+
self.class.valid_domain?(@ip)
|
23
|
+
end
|
24
|
+
|
25
|
+
module ClassMethods # :nodoc:
|
26
|
+
include Legitbot::Config::Resolver
|
27
|
+
|
28
|
+
def domains(*list, reverse: true)
|
29
|
+
@valid_domains = list.flatten.map { |d| Resolv::DNS::Name.create(d) }
|
30
|
+
@validate_reverse_record = reverse
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_domains?
|
34
|
+
instance_variable_defined?(:@valid_domains)
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid_domain?(ip)
|
38
|
+
return true unless check_domains?
|
39
|
+
return true if @valid_domains.empty?
|
40
|
+
|
41
|
+
domains = reverse_domains(ip)
|
42
|
+
return false if domains.empty?
|
43
|
+
|
44
|
+
record = find_subdomain_record(domains)
|
45
|
+
return false unless record
|
46
|
+
return true unless @validate_reverse_record
|
47
|
+
|
48
|
+
ip == reverse_ip(record)
|
49
|
+
end
|
50
|
+
|
51
|
+
def reverse_domains(ip)
|
52
|
+
resolver.getnames(ip)
|
53
|
+
rescue Resolv::ResolvError
|
54
|
+
nil
|
55
|
+
end
|
56
|
+
|
57
|
+
def find_subdomain_record(domains)
|
58
|
+
domains.find do |d|
|
59
|
+
@valid_domains.any? { |vd| d.subdomain_of?(vd) }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def reverse_ip(record)
|
64
|
+
return nil if record.nil?
|
65
|
+
|
66
|
+
resolver.getaddress(record.to_s).to_s
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ipaddr'
|
4
|
+
require 'interval_tree'
|
5
|
+
|
6
|
+
module Legitbot
|
7
|
+
module Validators
|
8
|
+
#
|
9
|
+
# In a bot matcher:
|
10
|
+
# `ip_ranges ip, range, ip, ...`
|
11
|
+
# `ip_ranges do [ip, range, ...]; end`
|
12
|
+
module IpRanges
|
13
|
+
class << self
|
14
|
+
def included(base)
|
15
|
+
base.extend ClassMethods
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def valid_ip?
|
20
|
+
self.class.valid_ip?(@ip)
|
21
|
+
end
|
22
|
+
|
23
|
+
module ClassMethods # :nodoc:
|
24
|
+
FAMILIES = %i[ipv4 ipv6].freeze
|
25
|
+
EMPTY_GENERATOR = proc { [] }
|
26
|
+
|
27
|
+
def ip_ranges(*ips, &block)
|
28
|
+
@ip_ranges = partition_ips(ips.flatten) unless ips.empty?
|
29
|
+
@ip_ranges_loader = block_given? ? block : EMPTY_GENERATOR
|
30
|
+
@ip_loader_mutex = Mutex.new
|
31
|
+
end
|
32
|
+
|
33
|
+
def check_ranges?
|
34
|
+
instance_variable_defined?(:@ip_ranges_loader)
|
35
|
+
end
|
36
|
+
|
37
|
+
def valid_ip?(ip)
|
38
|
+
return true unless check_ranges?
|
39
|
+
return true if valid_ips.empty?
|
40
|
+
|
41
|
+
obj = IPAddr.new(ip)
|
42
|
+
ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
|
43
|
+
!ranges.empty?
|
44
|
+
end
|
45
|
+
|
46
|
+
def valid_ips
|
47
|
+
@ip_loader_mutex.synchronize do
|
48
|
+
@ip_ranges ||= load_ips
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def reload_ips
|
53
|
+
@ip_loader_mutex.synchronize do
|
54
|
+
@ip_ranges = load_ips
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def load_ips
|
59
|
+
partition_ips(@ip_ranges_loader.call)
|
60
|
+
end
|
61
|
+
|
62
|
+
# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
|
63
|
+
def partition_ips(ips)
|
64
|
+
return [] if ips.empty?
|
65
|
+
|
66
|
+
ips
|
67
|
+
.map { |cidr| IPAddr.new(cidr) }
|
68
|
+
.partition(&:ipv4?)
|
69
|
+
.each_with_index
|
70
|
+
.map do |list, index|
|
71
|
+
ranges = list.map(&:to_range).map do |r|
|
72
|
+
(r.begin.to_i..r.end.to_i)
|
73
|
+
end
|
74
|
+
[FAMILIES[index], IntervalTree::Tree.new(ranges)]
|
75
|
+
end.to_h
|
76
|
+
end
|
77
|
+
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
data/lib/legitbot/version.rb
CHANGED
data/lib/legitbot/yandex.rb
CHANGED
@@ -1,17 +1,33 @@
|
|
1
|
-
|
2
|
-
# https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
3
|
+
module Legitbot # :nodoc:
|
4
|
+
# https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
|
4
5
|
class Yandex < BotMatch
|
5
|
-
|
6
|
-
|
7
|
-
def valid?
|
8
|
-
subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
|
9
|
-
end
|
6
|
+
domains 'yandex.ru.', 'yandex.net.', 'yandex.com.'
|
10
7
|
end
|
11
8
|
|
12
|
-
rule Legitbot::Yandex, %w
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
9
|
+
rule Legitbot::Yandex, %w[
|
10
|
+
YandexBot
|
11
|
+
YandexAccessibilityBot
|
12
|
+
YandexMobileBot
|
13
|
+
YandexDirectDyn
|
14
|
+
YandexScreenshotBot
|
15
|
+
YandexImages
|
16
|
+
YandexVideo
|
17
|
+
YandexVideoParser
|
18
|
+
YandexMedia
|
19
|
+
YandexBlogs
|
20
|
+
YandexFavicons
|
21
|
+
YandexWebmaster
|
22
|
+
YandexPagechecker
|
23
|
+
YandexImageResizer
|
24
|
+
YaDirectFetcher
|
25
|
+
YandexCalendar
|
26
|
+
YandexSitelinks
|
27
|
+
YandexMetrika
|
28
|
+
YandexNews
|
29
|
+
YandexVertis
|
30
|
+
YandexSearchShop
|
31
|
+
YandexVerticals
|
32
|
+
]
|
17
33
|
end
|