legitbot 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d775b4e4e434615989b8ef35aca9ac6498b06742ca6e8534f9bb4d6f3d5c9b04
4
- data.tar.gz: 239069018d1b043ec99bce8496b3760fc189611f6aa83545835d5f71ce7f94c5
3
+ metadata.gz: dfc1b3322ff4f85957dabf6790d535f27f99feed47bdb4ff1bba65f6242d31a2
4
+ data.tar.gz: 32e842cc3d297b3afda0ef9b265121a11c363857a726819f886b441e6c53a53c
5
5
  SHA512:
6
- metadata.gz: '08e08f94cbb4979566982d91a525f7d03618c9c10e85613bcebfd51f8f8f8b0dab669ba9cd1b67c7a6d6a36c23d23ad88b64996bf252bd884082045ae590d38c'
7
- data.tar.gz: 97c4dd80bf62205ea4a03a0bc7570d57520611b4015f3bf9f66b450d78878a3fde1c9097e0ac79691e40d5f29e9ba676aaa3af70f71650adb61c7f4a0ee1ddf0
6
+ metadata.gz: 3654c256da13b37045425457a96ac9a8b41c5ae5c0cce49b7898170e2d23a66a5cb7e612503dc1d88dc2e1240dcb07c9ccc5d5aa8439f280144abe969dc0ae7b
7
+ data.tar.gz: 554e120d1001a71f455aedcd4d30397b22130279a6f99f8e022d40ade50427590dd52e982820399852ec52fbb2c96b7ea1461f82d8e88cb89acc053097136b32
data/.rubocop.yml ADDED
@@ -0,0 +1,8 @@
1
+ AllCops:
2
+ Include:
3
+ - '**/Gemfile'
4
+ - '**/Rakefile'
5
+ - 'lib/**/*.rb'
6
+ - 'test/**/*.rb'
7
+ Exclude:
8
+ - 'pkg/**'
data/Gemfile CHANGED
@@ -1,2 +1,4 @@
1
+ # frozen_string_literal: true
2
+
1
3
  source 'https://rubygems.org'
2
4
  gemspec
data/Rakefile CHANGED
@@ -1,14 +1,16 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'rubygems'
2
4
  require 'bundler'
3
5
  require 'bump/tasks'
4
- require "rake/testtask"
6
+ require 'rake/testtask'
5
7
  Bundler::GemHelper.install_tasks
6
8
 
7
9
  Bump.tag_by_default = true
8
10
 
9
11
  Rake::TestTask.new do |t|
10
- t.libs << "test"
11
- t.test_files = FileList['test/*_test.rb']
12
+ t.libs << 'test'
13
+ t.test_files = FileList['test/**/*_test.rb']
12
14
  t.warning = true
13
15
  t.verbose = true
14
16
  end
data/legitbot.gemspec CHANGED
@@ -17,9 +17,10 @@ Gem::Specification.new do |spec|
17
17
  spec.required_ruby_version = '>= 2.3.0'
18
18
  spec.add_dependency "irrc", ">= 0.2.1"
19
19
  spec.add_dependency "augmented_interval_tree", ">= 0.1.1"
20
- spec.add_development_dependency "bump"
21
- spec.add_development_dependency "rake"
22
- spec.add_development_dependency "minitest"
20
+ spec.add_development_dependency "bump", '>= 0.8.0'
21
+ spec.add_development_dependency "rake", '>= 12.3.0'
22
+ spec.add_development_dependency "rubocop", '>= 0.74.0'
23
+ spec.add_development_dependency "minitest", '>= 5.1.0'
23
24
 
24
25
  spec.files = `git ls-files`.split($/)
25
26
  spec.rdoc_options = ["--charset=UTF-8"]
@@ -1,13 +1,18 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://ahrefs.com/robot
3
5
  class Ahrefs < BotMatch
4
- Ranges = %w(54.36.148.0/24 54.36.149.0/24 54.36.150.0/24 195.154.122.0/24 195.154.123.0/24 195.154.126.0/24 195.154.127.0/24)
5
-
6
- def valid?
7
- ip = IPAddr.new @ip
8
- Ranges.any? { |range| IPAddr.new(range).include? ip }
9
- end
6
+ ip_ranges %w[
7
+ 54.36.148.0/24
8
+ 54.36.149.0/24
9
+ 54.36.150.0/24
10
+ 195.154.122.0/24
11
+ 195.154.123.0/24
12
+ 195.154.126.0/24
13
+ 195.154.127.0/24
14
+ ]
10
15
  end
11
16
 
12
- rule Legitbot::Ahrefs, %w(AhrefsBot)
17
+ rule Legitbot::Ahrefs, %w[AhrefsBot]
13
18
  end
@@ -1,20 +1,20 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'ipaddr'
2
4
 
3
- module Legitbot
5
+ module Legitbot # :nodoc:
4
6
  # https://support.apple.com/en-us/HT204683
5
-
6
7
  class Apple < BotMatch
7
- Range = IPAddr.new('17.0.0.0/8')
8
-
9
- def valid?
10
- ip = IPAddr.new @ip
11
- Range.include? ip
12
- end
8
+ ip_ranges '17.0.0.0/8'
13
9
  end
14
10
 
15
- class Apple_as_Google < Apple
11
+ # https://support.apple.com/en-us/HT204683
12
+ # rubocop:disable Naming/ClassAndModuleCamelCase
13
+ class Apple_as_Google < BotMatch
14
+ ip_ranges '17.0.0.0/8'
16
15
  end
16
+ # rubocop:enable Naming/ClassAndModuleCamelCase
17
17
 
18
- rule Legitbot::Apple, %w(Applebot)
19
- rule Legitbot::Apple_as_Google, %w(Googlebot)
18
+ rule Legitbot::Apple, %w[Applebot]
19
+ rule Legitbot::Apple_as_Google, %w[Googlebot]
20
20
  end
@@ -1,12 +1,10 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # http://help.baidu.com/question?prod_en=master&class=498&id=1000973
3
5
  class Baidu < BotMatch
4
- ValidDomains = ["baidu.com.", "baidu.jp."]
5
-
6
- def valid?
7
- subdomain_of?(*Baidu::ValidDomains)
8
- end
6
+ domains 'baidu.com.', 'baidu.jp.', reverse: false
9
7
  end
10
8
 
11
- rule Legitbot::Baidu, %w(Baiduspider)
9
+ rule Legitbot::Baidu, %w[Baiduspider]
12
10
  end
data/lib/legitbot/bing.rb CHANGED
@@ -1,12 +1,10 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/
3
5
  class Bing < BotMatch
4
- ValidDomains = ["search.msn.com."]
5
-
6
- def valid?
7
- subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
8
- end
6
+ domains 'search.msn.com.'
9
7
  end
10
8
 
11
- rule Legitbot::Bing, %w(Bingbot bingbot)
9
+ rule Legitbot::Bing, %w[Bingbot bingbot]
12
10
  end
@@ -1,5 +1,8 @@
1
- require 'resolv'
2
- require 'ipaddr'
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'config/resolver'
4
+ require_relative 'validators/domains'
5
+ require_relative 'validators/ip_ranges'
3
6
 
4
7
  module Legitbot
5
8
  ##
@@ -7,61 +10,31 @@ module Legitbot
7
10
  # +valid?+, +fake?+ and +detected_as+
8
11
  #
9
12
  class BotMatch
10
- def initialize(ip, resolver_config = nil)
11
- @dns = Resolv::DNS.new(resolver_config)
12
- @ip = ip
13
- end
14
-
15
- ##
16
- # Returns a Resolv::DNS::Name instance with
17
- # the reverse name
18
- def reverse_domain
19
- @reverse_domain ||= @dns.getname(@ip)
20
- rescue Resolv::ResolvError
21
- @reverse_domain ||= nil
22
- end
23
-
24
- ##
25
- # Returns a String with the reverse name
26
- def reverse_name
27
- reverse_domain&.to_s
28
- end
29
-
30
- ##
31
- # Returns a String with IP created from the reverse name
32
- def reversed_ip
33
- return nil if reverse_name.nil?
13
+ include Legitbot::Validators::IpRanges
14
+ include Legitbot::Validators::Domains
34
15
 
35
- @reverse_ip ||= @dns.getaddress(reverse_name)
36
- @reverse_ip.to_s
37
- end
38
-
39
- def reverse_resolves?
40
- @ip == reversed_ip
41
- end
42
-
43
- def subdomain_of?(*domains)
44
- return false if reverse_name.nil?
45
-
46
- domains.any? { |d|
47
- reverse_domain.subdomain_of? Resolv::DNS::Name.create(d)
48
- }
16
+ def initialize(ip)
17
+ @ip = ip
49
18
  end
50
19
 
51
20
  def detected_as
52
21
  self.class.name.split('::').last.downcase.to_sym
53
22
  end
54
23
 
24
+ def valid?
25
+ valid_ip? && valid_domain?
26
+ end
27
+
55
28
  def fake?
56
29
  !valid?
57
30
  end
58
31
 
59
- def self.valid?(ip, resolver_config = nil)
60
- self.new(ip, resolver_config).valid?
32
+ def self.valid?(ip)
33
+ new(ip).valid?
61
34
  end
62
35
 
63
- def self.fake?(ip, resolver_config = nil)
64
- self.new(ip, resolver_config).fake?
36
+ def self.fake?(ip)
37
+ new(ip).fake?
65
38
  end
66
39
  end
67
40
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'resolv'
4
+
5
+ module Legitbot
6
+ module Config
7
+ module Resolver # :nodoc:
8
+ def resolver_config(options = nil)
9
+ @resolver_config = options
10
+ end
11
+
12
+ def resolver
13
+ @resolver_config ||= Legitbot.resolver_config
14
+ @resolver ||= Resolv::DNS.new @resolver_config
15
+ end
16
+ end
17
+ end
18
+ end
@@ -1,12 +1,20 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://duckduckgo.com/duckduckbot
3
5
  class DuckDuckGo < BotMatch
4
- ValidIPs = %w(50.16.241.113 50.16.241.114 50.16.241.117 50.16.247.234 52.204.97.54 52.5.190.19 54.197.234.188 54.208.100.253 23.21.227.69)
5
-
6
- def valid?
7
- DuckDuckGo::ValidIPs.include? @ip
8
- end
6
+ ip_ranges %w[
7
+ 50.16.241.113
8
+ 50.16.241.114
9
+ 50.16.241.117
10
+ 50.16.247.234
11
+ 52.204.97.54
12
+ 52.5.190.19
13
+ 54.197.234.188
14
+ 54.208.100.253
15
+ 23.21.227.69
16
+ ]
9
17
  end
10
18
 
11
- rule Legitbot::DuckDuckGo, %w(DuckDuckBot)
19
+ rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
12
20
  end
@@ -1,48 +1,22 @@
1
- require 'ipaddr'
1
+ # frozen_string_literal: true
2
+
2
3
  require 'irrc'
3
- require 'interval_tree'
4
4
 
5
- module Legitbot
5
+ module Legitbot # :nodoc:
6
6
  # https://developers.facebook.com/docs/sharing/webmasters/crawler
7
-
8
7
  class Facebook < BotMatch
9
8
  AS = 'AS32934'
10
9
 
11
- def valid?
12
- ip = IPAddr.new(@ip)
13
- Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0
14
- end
15
-
16
- @mutex = Mutex.new
17
-
18
- def self.valid_ips
19
- @mutex.synchronize { @ips ||= load_ips }
20
- end
21
-
22
- def self.reload!
23
- @mutex.synchronize { @ips = load_ips }
24
- end
25
-
26
- def self.load_ips
27
- whois.map do |(family, records)|
28
- ranges = records.map do |cidr|
29
- range = IPAddr.new(cidr).to_range
30
- (range.begin.to_i..range.end.to_i)
31
- end
32
- [family, IntervalTree::Tree.new(ranges)]
33
- end.to_h
34
- end
35
-
36
- def self.whois
10
+ ip_ranges do
37
11
  client = Irrc::Client.new
38
12
  client.query :radb, AS
39
13
  results = client.perform
40
14
 
41
- %i(ipv4 ipv6).map do |family|
42
- [family, results[AS][family][AS]]
43
- end.to_h
15
+ %i[ipv4 ipv6].map do |family|
16
+ results[AS][family][AS]
17
+ end.flatten
44
18
  end
45
19
  end
46
20
 
47
- rule Legitbot::Facebook, %w(Facebot facebookexternalhit/1.1)
21
+ rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
48
22
  end
@@ -1,14 +1,11 @@
1
- module Legitbot
1
+ # frozen_string_literal: true
2
+
3
+ module Legitbot # :nodoc:
2
4
  # https://support.google.com/webmasters/answer/1061943
3
5
  # https://support.google.com/webmasters/answer/80553
4
-
5
6
  class Google < BotMatch
6
- ValidDomains = ["google.com.", "googlebot.com."]
7
-
8
- def valid?
9
- subdomain_of?(*Google::ValidDomains) && reverse_resolves?
10
- end
7
+ domains 'google.com.', 'googlebot.com.'
11
8
  end
12
9
 
13
- rule Legitbot::Google, %w(Googlebot Mediapartners-Google AdsBot-Google)
10
+ rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
14
11
  end
@@ -1,6 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Bot lookup based on user agent
1
5
  module Legitbot
2
6
  @rules = []
3
7
 
8
+ class << self
9
+ attr_accessor :resolver_config
10
+ end
11
+
4
12
  ##
5
13
  # Lookup a bot based on its signature from +User-Agent+ header.
6
14
  #
@@ -10,15 +18,12 @@ module Legitbot
10
18
  # otherwise.
11
19
  # :yields: a found bot
12
20
  #
13
- def self.bot(userAgent, ip, resolver_config = nil)
14
- bots =
15
- @rules.select { |rule|
16
- rule[:fragments].any? {|f| userAgent.index f}
17
- }.map { |rule|
18
- rule[:class].new(ip, resolver_config)
19
- }
21
+ def self.bot(user_agent, ip)
22
+ bots = @rules
23
+ .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
24
+ .map { |rule| rule[:class].new(ip) }
20
25
 
21
- selected = bots.select { |b| b.valid? }.first if bots.size > 1
26
+ selected = bots.select(&:valid?).first if bots.size > 1
22
27
  selected = bots.last if selected.nil?
23
28
 
24
29
  if selected && block_given?
@@ -29,6 +34,6 @@ module Legitbot
29
34
  end
30
35
 
31
36
  def self.rule(clazz, fragments)
32
- @rules << {:class => clazz, :fragments => fragments}
37
+ @rules << { class: clazz, fragments: fragments }
33
38
  end
34
39
  end
@@ -1,13 +1,10 @@
1
- module Legitbot
2
- # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
1
+ # frozen_string_literal: true
3
2
 
3
+ module Legitbot # :nodoc:
4
+ # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
4
5
  class Pinterest < BotMatch
5
- ValidDomains = ["pinterest.com."]
6
-
7
- def valid?
8
- subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
9
- end
6
+ domains 'pinterest.com.'
10
7
  end
11
8
 
12
- rule Legitbot::Pinterest, %w(Pinterestbot Pinterest/0.2)
9
+ rule Legitbot::Pinterest, %w[Pinterestbot Pinterest/0.2]
13
10
  end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'resolv'
4
+ require 'ipaddr'
5
+
6
+ module Legitbot
7
+ module Validators
8
+ #
9
+ # In a bot matcher:
10
+ # `domains 'search.msn.com', ...`
11
+ # `domains 'googlebot.com', reverse: false`
12
+ #
13
+ # `reverse` is true by default.
14
+ module Domains
15
+ class << self
16
+ def included(base)
17
+ base.extend ClassMethods
18
+ end
19
+ end
20
+
21
+ def valid_domain?
22
+ self.class.valid_domain?(@ip)
23
+ end
24
+
25
+ module ClassMethods # :nodoc:
26
+ include Legitbot::Config::Resolver
27
+
28
+ def domains(*list, reverse: true)
29
+ @valid_domains = list.flatten.map { |d| Resolv::DNS::Name.create(d) }
30
+ @validate_reverse_record = reverse
31
+ end
32
+
33
+ def check_domains?
34
+ instance_variable_defined?(:@valid_domains)
35
+ end
36
+
37
+ def valid_domain?(ip)
38
+ return true unless check_domains?
39
+ return true if @valid_domains.empty?
40
+
41
+ domains = reverse_domains(ip)
42
+ return false if domains.empty?
43
+
44
+ record = find_subdomain_record(domains)
45
+ return false unless record
46
+ return true unless @validate_reverse_record
47
+
48
+ ip == reverse_ip(record)
49
+ end
50
+
51
+ def reverse_domains(ip)
52
+ resolver.getnames(ip)
53
+ rescue Resolv::ResolvError
54
+ nil
55
+ end
56
+
57
+ def find_subdomain_record(domains)
58
+ domains.find do |d|
59
+ @valid_domains.any? { |vd| d.subdomain_of?(vd) }
60
+ end
61
+ end
62
+
63
+ def reverse_ip(record)
64
+ return nil if record.nil?
65
+
66
+ resolver.getaddress(record.to_s).to_s
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ipaddr'
4
+ require 'interval_tree'
5
+
6
+ module Legitbot
7
+ module Validators
8
+ #
9
+ # In a bot matcher:
10
+ # `ip_ranges ip, range, ip, ...`
11
+ # `ip_ranges do [ip, range, ...]; end`
12
+ module IpRanges
13
+ class << self
14
+ def included(base)
15
+ base.extend ClassMethods
16
+ end
17
+ end
18
+
19
+ def valid_ip?
20
+ self.class.valid_ip?(@ip)
21
+ end
22
+
23
+ module ClassMethods # :nodoc:
24
+ FAMILIES = %i[ipv4 ipv6].freeze
25
+ EMPTY_GENERATOR = proc { [] }
26
+
27
+ def ip_ranges(*ips, &block)
28
+ @ip_ranges = partition_ips(ips.flatten) unless ips.empty?
29
+ @ip_ranges_loader = block_given? ? block : EMPTY_GENERATOR
30
+ @ip_loader_mutex = Mutex.new
31
+ end
32
+
33
+ def check_ranges?
34
+ instance_variable_defined?(:@ip_ranges_loader)
35
+ end
36
+
37
+ def valid_ip?(ip)
38
+ return true unless check_ranges?
39
+ return true if valid_ips.empty?
40
+
41
+ obj = IPAddr.new(ip)
42
+ ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
43
+ !ranges.empty?
44
+ end
45
+
46
+ def valid_ips
47
+ @ip_loader_mutex.synchronize do
48
+ @ip_ranges ||= load_ips
49
+ end
50
+ end
51
+
52
+ def reload_ips
53
+ @ip_loader_mutex.synchronize do
54
+ @ip_ranges = load_ips
55
+ end
56
+ end
57
+
58
+ def load_ips
59
+ partition_ips(@ip_ranges_loader.call)
60
+ end
61
+
62
+ # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
63
+ def partition_ips(ips)
64
+ return [] if ips.empty?
65
+
66
+ ips
67
+ .map { |cidr| IPAddr.new(cidr) }
68
+ .partition(&:ipv4?)
69
+ .each_with_index
70
+ .map do |list, index|
71
+ ranges = list.map(&:to_range).map do |r|
72
+ (r.begin.to_i..r.end.to_i)
73
+ end
74
+ [FAMILIES[index], IntervalTree::Tree.new(ranges)]
75
+ end.to_h
76
+ end
77
+ # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
78
+ end
79
+ end
80
+ end
81
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Legitbot
2
- VERSION = '0.3.2'
4
+ VERSION = '0.4.0'
3
5
  end
@@ -1,17 +1,33 @@
1
- module Legitbot
2
- # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
1
+ # frozen_string_literal: true
3
2
 
3
+ module Legitbot # :nodoc:
4
+ # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
4
5
  class Yandex < BotMatch
5
- ValidDomains = ["yandex.ru.", "yandex.net.", "yandex.com."]
6
-
7
- def valid?
8
- subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
9
- end
6
+ domains 'yandex.ru.', 'yandex.net.', 'yandex.com.'
10
7
  end
11
8
 
12
- rule Legitbot::Yandex, %w(YandexBot YandexAccessibilityBot YandexMobileBot
13
- YandexDirectDyn YandexScreenshotBot YandexImages YandexVideo YandexVideoParser
14
- YandexMedia YandexBlogs YandexFavicons YandexWebmaster YandexPagechecker
15
- YandexImageResizer YaDirectFetcher YandexCalendar YandexSitelinks YandexMetrika
16
- YandexNews YandexVertis YandexSearchShop YandexVerticals)
9
+ rule Legitbot::Yandex, %w[
10
+ YandexBot
11
+ YandexAccessibilityBot
12
+ YandexMobileBot
13
+ YandexDirectDyn
14
+ YandexScreenshotBot
15
+ YandexImages
16
+ YandexVideo
17
+ YandexVideoParser
18
+ YandexMedia
19
+ YandexBlogs
20
+ YandexFavicons
21
+ YandexWebmaster
22
+ YandexPagechecker
23
+ YandexImageResizer
24
+ YaDirectFetcher
25
+ YandexCalendar
26
+ YandexSitelinks
27
+ YandexMetrika
28
+ YandexNews
29
+ YandexVertis
30
+ YandexSearchShop
31
+ YandexVerticals
32
+ ]
17
33
  end
data/lib/legitbot.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'legitbot/legitbot'
2
4
  require_relative 'legitbot/botmatch'
3
5