legitbot 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +8 -0
- data/Gemfile +2 -0
- data/Rakefile +5 -3
- data/legitbot.gemspec +4 -3
- data/lib/legitbot/ahrefs.rb +13 -8
- data/lib/legitbot/apple.rb +11 -11
- data/lib/legitbot/baidu.rb +5 -7
- data/lib/legitbot/bing.rb +5 -7
- data/lib/legitbot/botmatch.rb +17 -44
- data/lib/legitbot/config/resolver.rb +18 -0
- data/lib/legitbot/duckduckgo.rb +15 -7
- data/lib/legitbot/facebook.rb +8 -34
- data/lib/legitbot/google.rb +5 -8
- data/lib/legitbot/legitbot.rb +14 -9
- data/lib/legitbot/pinterest.rb +5 -8
- data/lib/legitbot/validators/domains.rb +71 -0
- data/lib/legitbot/validators/ip_ranges.rb +81 -0
- data/lib/legitbot/version.rb +3 -1
- data/lib/legitbot/yandex.rb +28 -12
- data/lib/legitbot.rb +2 -0
- data/test/ahrefs_test.rb +16 -8
- data/test/apple_as_google_test.rb +9 -4
- data/test/apple_test.rb +11 -4
- data/test/botmatch_test.rb +4 -22
- data/test/facebook_test.rb +24 -9
- data/test/google_test.rb +24 -14
- data/test/legitbot/validators/domains_test.rb +58 -0
- data/test/legitbot/validators/ip_ranges_test.rb +113 -0
- data/test/legitbot_test.rb +8 -4
- data/test/pinterest_test.rb +26 -14
- metadata +30 -8
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: dfc1b3322ff4f85957dabf6790d535f27f99feed47bdb4ff1bba65f6242d31a2
         | 
| 4 | 
            +
              data.tar.gz: 32e842cc3d297b3afda0ef9b265121a11c363857a726819f886b441e6c53a53c
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 3654c256da13b37045425457a96ac9a8b41c5ae5c0cce49b7898170e2d23a66a5cb7e612503dc1d88dc2e1240dcb07c9ccc5d5aa8439f280144abe969dc0ae7b
         | 
| 7 | 
            +
              data.tar.gz: 554e120d1001a71f455aedcd4d30397b22130279a6f99f8e022d40ade50427590dd52e982820399852ec52fbb2c96b7ea1461f82d8e88cb89acc053097136b32
         | 
    
        data/.rubocop.yml
    ADDED
    
    
    
        data/Gemfile
    CHANGED
    
    
    
        data/Rakefile
    CHANGED
    
    | @@ -1,14 +1,16 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require 'rubygems'
         | 
| 2 4 | 
             
            require 'bundler'
         | 
| 3 5 | 
             
            require 'bump/tasks'
         | 
| 4 | 
            -
            require  | 
| 6 | 
            +
            require 'rake/testtask'
         | 
| 5 7 | 
             
            Bundler::GemHelper.install_tasks
         | 
| 6 8 |  | 
| 7 9 | 
             
            Bump.tag_by_default = true
         | 
| 8 10 |  | 
| 9 11 | 
             
            Rake::TestTask.new do |t|
         | 
| 10 | 
            -
              t.libs <<  | 
| 11 | 
            -
              t.test_files = FileList['test | 
| 12 | 
            +
              t.libs << 'test'
         | 
| 13 | 
            +
              t.test_files = FileList['test/**/*_test.rb']
         | 
| 12 14 | 
             
              t.warning = true
         | 
| 13 15 | 
             
              t.verbose = true
         | 
| 14 16 | 
             
            end
         | 
    
        data/legitbot.gemspec
    CHANGED
    
    | @@ -17,9 +17,10 @@ Gem::Specification.new do |spec| | |
| 17 17 | 
             
              spec.required_ruby_version = '>= 2.3.0'
         | 
| 18 18 | 
             
              spec.add_dependency "irrc", ">= 0.2.1"
         | 
| 19 19 | 
             
              spec.add_dependency "augmented_interval_tree", ">= 0.1.1"
         | 
| 20 | 
            -
              spec.add_development_dependency "bump"
         | 
| 21 | 
            -
              spec.add_development_dependency "rake"
         | 
| 22 | 
            -
              spec.add_development_dependency " | 
| 20 | 
            +
              spec.add_development_dependency "bump", '>= 0.8.0'
         | 
| 21 | 
            +
              spec.add_development_dependency "rake", '>= 12.3.0'
         | 
| 22 | 
            +
              spec.add_development_dependency "rubocop", '>= 0.74.0'
         | 
| 23 | 
            +
              spec.add_development_dependency "minitest", '>= 5.1.0'
         | 
| 23 24 |  | 
| 24 25 | 
             
              spec.files = `git ls-files`.split($/)
         | 
| 25 26 | 
             
              spec.rdoc_options = ["--charset=UTF-8"]
         | 
    
        data/lib/legitbot/ahrefs.rb
    CHANGED
    
    | @@ -1,13 +1,18 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Legitbot # :nodoc:
         | 
| 2 4 | 
             
              # https://ahrefs.com/robot
         | 
| 3 5 | 
             
              class Ahrefs < BotMatch
         | 
| 4 | 
            -
                 | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
                   | 
| 8 | 
            -
                   | 
| 9 | 
            -
             | 
| 6 | 
            +
                ip_ranges %w[
         | 
| 7 | 
            +
                  54.36.148.0/24
         | 
| 8 | 
            +
                  54.36.149.0/24
         | 
| 9 | 
            +
                  54.36.150.0/24
         | 
| 10 | 
            +
                  195.154.122.0/24
         | 
| 11 | 
            +
                  195.154.123.0/24
         | 
| 12 | 
            +
                  195.154.126.0/24
         | 
| 13 | 
            +
                  195.154.127.0/24
         | 
| 14 | 
            +
                ]
         | 
| 10 15 | 
             
              end
         | 
| 11 16 |  | 
| 12 | 
            -
              rule Legitbot::Ahrefs, %w | 
| 17 | 
            +
              rule Legitbot::Ahrefs, %w[AhrefsBot]
         | 
| 13 18 | 
             
            end
         | 
    
        data/lib/legitbot/apple.rb
    CHANGED
    
    | @@ -1,20 +1,20 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require 'ipaddr'
         | 
| 2 4 |  | 
| 3 | 
            -
            module Legitbot
         | 
| 5 | 
            +
            module Legitbot # :nodoc:
         | 
| 4 6 | 
             
              # https://support.apple.com/en-us/HT204683
         | 
| 5 | 
            -
             | 
| 6 7 | 
             
              class Apple < BotMatch
         | 
| 7 | 
            -
                 | 
| 8 | 
            -
             | 
| 9 | 
            -
                def valid?
         | 
| 10 | 
            -
                  ip = IPAddr.new @ip
         | 
| 11 | 
            -
                  Range.include? ip
         | 
| 12 | 
            -
                end
         | 
| 8 | 
            +
                ip_ranges '17.0.0.0/8'
         | 
| 13 9 | 
             
              end
         | 
| 14 10 |  | 
| 15 | 
            -
               | 
| 11 | 
            +
              # https://support.apple.com/en-us/HT204683
         | 
| 12 | 
            +
              # rubocop:disable Naming/ClassAndModuleCamelCase
         | 
| 13 | 
            +
              class Apple_as_Google < BotMatch
         | 
| 14 | 
            +
                ip_ranges '17.0.0.0/8'
         | 
| 16 15 | 
             
              end
         | 
| 16 | 
            +
              # rubocop:enable Naming/ClassAndModuleCamelCase
         | 
| 17 17 |  | 
| 18 | 
            -
              rule Legitbot::Apple, %w | 
| 19 | 
            -
              rule Legitbot::Apple_as_Google, %w | 
| 18 | 
            +
              rule Legitbot::Apple, %w[Applebot]
         | 
| 19 | 
            +
              rule Legitbot::Apple_as_Google, %w[Googlebot]
         | 
| 20 20 | 
             
            end
         | 
    
        data/lib/legitbot/baidu.rb
    CHANGED
    
    | @@ -1,12 +1,10 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Legitbot # :nodoc:
         | 
| 2 4 | 
             
              # http://help.baidu.com/question?prod_en=master&class=498&id=1000973
         | 
| 3 5 | 
             
              class Baidu < BotMatch
         | 
| 4 | 
            -
                 | 
| 5 | 
            -
             | 
| 6 | 
            -
                def valid?
         | 
| 7 | 
            -
                  subdomain_of?(*Baidu::ValidDomains)
         | 
| 8 | 
            -
                end
         | 
| 6 | 
            +
                domains 'baidu.com.', 'baidu.jp.', reverse: false
         | 
| 9 7 | 
             
              end
         | 
| 10 8 |  | 
| 11 | 
            -
              rule Legitbot::Baidu, %w | 
| 9 | 
            +
              rule Legitbot::Baidu, %w[Baiduspider]
         | 
| 12 10 | 
             
            end
         | 
    
        data/lib/legitbot/bing.rb
    CHANGED
    
    | @@ -1,12 +1,10 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Legitbot # :nodoc:
         | 
| 2 4 | 
             
              # https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/
         | 
| 3 5 | 
             
              class Bing < BotMatch
         | 
| 4 | 
            -
                 | 
| 5 | 
            -
             | 
| 6 | 
            -
                def valid?
         | 
| 7 | 
            -
                  subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
         | 
| 8 | 
            -
                end
         | 
| 6 | 
            +
                domains 'search.msn.com.'
         | 
| 9 7 | 
             
              end
         | 
| 10 8 |  | 
| 11 | 
            -
              rule Legitbot::Bing, %w | 
| 9 | 
            +
              rule Legitbot::Bing, %w[Bingbot bingbot]
         | 
| 12 10 | 
             
            end
         | 
    
        data/lib/legitbot/botmatch.rb
    CHANGED
    
    | @@ -1,5 +1,8 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative 'config/resolver'
         | 
| 4 | 
            +
            require_relative 'validators/domains'
         | 
| 5 | 
            +
            require_relative 'validators/ip_ranges'
         | 
| 3 6 |  | 
| 4 7 | 
             
            module Legitbot
         | 
| 5 8 | 
             
              ##
         | 
| @@ -7,61 +10,31 @@ module Legitbot | |
| 7 10 | 
             
              # +valid?+, +fake?+ and +detected_as+
         | 
| 8 11 | 
             
              #
         | 
| 9 12 | 
             
              class BotMatch
         | 
| 10 | 
            -
                 | 
| 11 | 
            -
             | 
| 12 | 
            -
                  @ip = ip
         | 
| 13 | 
            -
                end
         | 
| 14 | 
            -
             | 
| 15 | 
            -
                ##
         | 
| 16 | 
            -
                # Returns a Resolv::DNS::Name instance with
         | 
| 17 | 
            -
                # the reverse name
         | 
| 18 | 
            -
                def reverse_domain
         | 
| 19 | 
            -
                  @reverse_domain ||= @dns.getname(@ip)
         | 
| 20 | 
            -
                rescue Resolv::ResolvError
         | 
| 21 | 
            -
                  @reverse_domain ||= nil
         | 
| 22 | 
            -
                end
         | 
| 23 | 
            -
             | 
| 24 | 
            -
                ##
         | 
| 25 | 
            -
                # Returns a String with the reverse name
         | 
| 26 | 
            -
                def reverse_name
         | 
| 27 | 
            -
                  reverse_domain&.to_s
         | 
| 28 | 
            -
                end
         | 
| 29 | 
            -
             | 
| 30 | 
            -
                ##
         | 
| 31 | 
            -
                # Returns a String with IP created from the reverse name
         | 
| 32 | 
            -
                def reversed_ip
         | 
| 33 | 
            -
                  return nil if reverse_name.nil?
         | 
| 13 | 
            +
                include Legitbot::Validators::IpRanges
         | 
| 14 | 
            +
                include Legitbot::Validators::Domains
         | 
| 34 15 |  | 
| 35 | 
            -
             | 
| 36 | 
            -
                  @ | 
| 37 | 
            -
                end
         | 
| 38 | 
            -
             | 
| 39 | 
            -
                def reverse_resolves?
         | 
| 40 | 
            -
                  @ip == reversed_ip
         | 
| 41 | 
            -
                end
         | 
| 42 | 
            -
             | 
| 43 | 
            -
                def subdomain_of?(*domains)
         | 
| 44 | 
            -
                  return false if reverse_name.nil?
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                  domains.any? { |d|
         | 
| 47 | 
            -
                    reverse_domain.subdomain_of? Resolv::DNS::Name.create(d)
         | 
| 48 | 
            -
                  }
         | 
| 16 | 
            +
                def initialize(ip)
         | 
| 17 | 
            +
                  @ip = ip
         | 
| 49 18 | 
             
                end
         | 
| 50 19 |  | 
| 51 20 | 
             
                def detected_as
         | 
| 52 21 | 
             
                  self.class.name.split('::').last.downcase.to_sym
         | 
| 53 22 | 
             
                end
         | 
| 54 23 |  | 
| 24 | 
            +
                def valid?
         | 
| 25 | 
            +
                  valid_ip? && valid_domain?
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 55 28 | 
             
                def fake?
         | 
| 56 29 | 
             
                  !valid?
         | 
| 57 30 | 
             
                end
         | 
| 58 31 |  | 
| 59 | 
            -
                def self.valid?(ip | 
| 60 | 
            -
                   | 
| 32 | 
            +
                def self.valid?(ip)
         | 
| 33 | 
            +
                  new(ip).valid?
         | 
| 61 34 | 
             
                end
         | 
| 62 35 |  | 
| 63 | 
            -
                def self.fake?(ip | 
| 64 | 
            -
                   | 
| 36 | 
            +
                def self.fake?(ip)
         | 
| 37 | 
            +
                  new(ip).fake?
         | 
| 65 38 | 
             
                end
         | 
| 66 39 | 
             
              end
         | 
| 67 40 | 
             
            end
         | 
| @@ -0,0 +1,18 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'resolv'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            module Legitbot
         | 
| 6 | 
            +
              module Config
         | 
| 7 | 
            +
                module Resolver # :nodoc:
         | 
| 8 | 
            +
                  def resolver_config(options = nil)
         | 
| 9 | 
            +
                    @resolver_config = options
         | 
| 10 | 
            +
                  end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  def resolver
         | 
| 13 | 
            +
                    @resolver_config ||= Legitbot.resolver_config
         | 
| 14 | 
            +
                    @resolver ||= Resolv::DNS.new @resolver_config
         | 
| 15 | 
            +
                  end
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
            end
         | 
    
        data/lib/legitbot/duckduckgo.rb
    CHANGED
    
    | @@ -1,12 +1,20 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Legitbot # :nodoc:
         | 
| 2 4 | 
             
              # https://duckduckgo.com/duckduckbot
         | 
| 3 5 | 
             
              class DuckDuckGo < BotMatch
         | 
| 4 | 
            -
                 | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
                   | 
| 8 | 
            -
             | 
| 6 | 
            +
                ip_ranges %w[
         | 
| 7 | 
            +
                  50.16.241.113
         | 
| 8 | 
            +
                  50.16.241.114
         | 
| 9 | 
            +
                  50.16.241.117
         | 
| 10 | 
            +
                  50.16.247.234
         | 
| 11 | 
            +
                  52.204.97.54
         | 
| 12 | 
            +
                  52.5.190.19
         | 
| 13 | 
            +
                  54.197.234.188
         | 
| 14 | 
            +
                  54.208.100.253
         | 
| 15 | 
            +
                  23.21.227.69
         | 
| 16 | 
            +
                ]
         | 
| 9 17 | 
             
              end
         | 
| 10 18 |  | 
| 11 | 
            -
              rule Legitbot::DuckDuckGo, %w | 
| 19 | 
            +
              rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
         | 
| 12 20 | 
             
            end
         | 
    
        data/lib/legitbot/facebook.rb
    CHANGED
    
    | @@ -1,48 +1,22 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 2 3 | 
             
            require 'irrc'
         | 
| 3 | 
            -
            require 'interval_tree'
         | 
| 4 4 |  | 
| 5 | 
            -
            module Legitbot
         | 
| 5 | 
            +
            module Legitbot # :nodoc:
         | 
| 6 6 | 
             
              # https://developers.facebook.com/docs/sharing/webmasters/crawler
         | 
| 7 | 
            -
             | 
| 8 7 | 
             
              class Facebook < BotMatch
         | 
| 9 8 | 
             
                AS = 'AS32934'
         | 
| 10 9 |  | 
| 11 | 
            -
                 | 
| 12 | 
            -
                  ip = IPAddr.new(@ip)
         | 
| 13 | 
            -
                  Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0
         | 
| 14 | 
            -
                end
         | 
| 15 | 
            -
             | 
| 16 | 
            -
                @mutex = Mutex.new
         | 
| 17 | 
            -
             | 
| 18 | 
            -
                def self.valid_ips
         | 
| 19 | 
            -
                  @mutex.synchronize { @ips ||= load_ips }
         | 
| 20 | 
            -
                end
         | 
| 21 | 
            -
             | 
| 22 | 
            -
                def self.reload!
         | 
| 23 | 
            -
                  @mutex.synchronize { @ips = load_ips }
         | 
| 24 | 
            -
                end
         | 
| 25 | 
            -
             | 
| 26 | 
            -
                def self.load_ips
         | 
| 27 | 
            -
                  whois.map do |(family, records)|
         | 
| 28 | 
            -
                    ranges = records.map do |cidr|
         | 
| 29 | 
            -
                      range = IPAddr.new(cidr).to_range
         | 
| 30 | 
            -
                      (range.begin.to_i..range.end.to_i)
         | 
| 31 | 
            -
                    end
         | 
| 32 | 
            -
                    [family, IntervalTree::Tree.new(ranges)]
         | 
| 33 | 
            -
                  end.to_h
         | 
| 34 | 
            -
                end
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                def self.whois
         | 
| 10 | 
            +
                ip_ranges do
         | 
| 37 11 | 
             
                  client = Irrc::Client.new
         | 
| 38 12 | 
             
                  client.query :radb, AS
         | 
| 39 13 | 
             
                  results = client.perform
         | 
| 40 14 |  | 
| 41 | 
            -
                  %i | 
| 42 | 
            -
                     | 
| 43 | 
            -
                  end. | 
| 15 | 
            +
                  %i[ipv4 ipv6].map do |family|
         | 
| 16 | 
            +
                    results[AS][family][AS]
         | 
| 17 | 
            +
                  end.flatten
         | 
| 44 18 | 
             
                end
         | 
| 45 19 | 
             
              end
         | 
| 46 20 |  | 
| 47 | 
            -
              rule Legitbot::Facebook, %w | 
| 21 | 
            +
              rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
         | 
| 48 22 | 
             
            end
         | 
    
        data/lib/legitbot/google.rb
    CHANGED
    
    | @@ -1,14 +1,11 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Legitbot # :nodoc:
         | 
| 2 4 | 
             
              # https://support.google.com/webmasters/answer/1061943
         | 
| 3 5 | 
             
              # https://support.google.com/webmasters/answer/80553
         | 
| 4 | 
            -
             | 
| 5 6 | 
             
              class Google < BotMatch
         | 
| 6 | 
            -
                 | 
| 7 | 
            -
             | 
| 8 | 
            -
                def valid?
         | 
| 9 | 
            -
                  subdomain_of?(*Google::ValidDomains) && reverse_resolves?
         | 
| 10 | 
            -
                end
         | 
| 7 | 
            +
                domains 'google.com.', 'googlebot.com.'
         | 
| 11 8 | 
             
              end
         | 
| 12 9 |  | 
| 13 | 
            -
              rule Legitbot::Google, %w | 
| 10 | 
            +
              rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
         | 
| 14 11 | 
             
            end
         | 
    
        data/lib/legitbot/legitbot.rb
    CHANGED
    
    | @@ -1,6 +1,14 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            ##
         | 
| 4 | 
            +
            # Bot lookup based on user agent
         | 
| 1 5 | 
             
            module Legitbot
         | 
| 2 6 | 
             
              @rules = []
         | 
| 3 7 |  | 
| 8 | 
            +
              class << self
         | 
| 9 | 
            +
                attr_accessor :resolver_config
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 4 12 | 
             
              ##
         | 
| 5 13 | 
             
              # Lookup a bot based on its signature from +User-Agent+ header.
         | 
| 6 14 | 
             
              #
         | 
| @@ -10,15 +18,12 @@ module Legitbot | |
| 10 18 | 
             
              # otherwise.
         | 
| 11 19 | 
             
              # :yields: a found bot
         | 
| 12 20 | 
             
              #
         | 
| 13 | 
            -
              def self.bot( | 
| 14 | 
            -
                bots =
         | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
                  }.map { |rule|
         | 
| 18 | 
            -
                    rule[:class].new(ip, resolver_config)
         | 
| 19 | 
            -
                  }
         | 
| 21 | 
            +
              def self.bot(user_agent, ip)
         | 
| 22 | 
            +
                bots = @rules
         | 
| 23 | 
            +
                       .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
         | 
| 24 | 
            +
                       .map { |rule| rule[:class].new(ip) }
         | 
| 20 25 |  | 
| 21 | 
            -
                selected = bots.select | 
| 26 | 
            +
                selected = bots.select(&:valid?).first if bots.size > 1
         | 
| 22 27 | 
             
                selected = bots.last if selected.nil?
         | 
| 23 28 |  | 
| 24 29 | 
             
                if selected && block_given?
         | 
| @@ -29,6 +34,6 @@ module Legitbot | |
| 29 34 | 
             
              end
         | 
| 30 35 |  | 
| 31 36 | 
             
              def self.rule(clazz, fragments)
         | 
| 32 | 
            -
                @rules << {: | 
| 37 | 
            +
                @rules << { class: clazz, fragments: fragments }
         | 
| 33 38 | 
             
              end
         | 
| 34 39 | 
             
            end
         | 
    
        data/lib/legitbot/pinterest.rb
    CHANGED
    
    | @@ -1,13 +1,10 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
              # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
         | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 3 2 |  | 
| 3 | 
            +
            module Legitbot # :nodoc:
         | 
| 4 | 
            +
              # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
         | 
| 4 5 | 
             
              class Pinterest < BotMatch
         | 
| 5 | 
            -
                 | 
| 6 | 
            -
             | 
| 7 | 
            -
                def valid?
         | 
| 8 | 
            -
                  subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
         | 
| 9 | 
            -
                end
         | 
| 6 | 
            +
                domains 'pinterest.com.'
         | 
| 10 7 | 
             
              end
         | 
| 11 8 |  | 
| 12 | 
            -
              rule Legitbot::Pinterest, %w | 
| 9 | 
            +
              rule Legitbot::Pinterest, %w[Pinterestbot Pinterest/0.2]
         | 
| 13 10 | 
             
            end
         | 
| @@ -0,0 +1,71 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'resolv'
         | 
| 4 | 
            +
            require 'ipaddr'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            module Legitbot
         | 
| 7 | 
            +
              module Validators
         | 
| 8 | 
            +
                #
         | 
| 9 | 
            +
                # In a bot matcher:
         | 
| 10 | 
            +
                # `domains 'search.msn.com', ...`
         | 
| 11 | 
            +
                # `domains 'googlebot.com', reverse: false`
         | 
| 12 | 
            +
                #
         | 
| 13 | 
            +
                # `reverse` is true by default.
         | 
| 14 | 
            +
                module Domains
         | 
| 15 | 
            +
                  class << self
         | 
| 16 | 
            +
                    def included(base)
         | 
| 17 | 
            +
                      base.extend ClassMethods
         | 
| 18 | 
            +
                    end
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  def valid_domain?
         | 
| 22 | 
            +
                    self.class.valid_domain?(@ip)
         | 
| 23 | 
            +
                  end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                  module ClassMethods # :nodoc:
         | 
| 26 | 
            +
                    include Legitbot::Config::Resolver
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                    def domains(*list, reverse: true)
         | 
| 29 | 
            +
                      @valid_domains = list.flatten.map { |d| Resolv::DNS::Name.create(d) }
         | 
| 30 | 
            +
                      @validate_reverse_record = reverse
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    def check_domains?
         | 
| 34 | 
            +
                      instance_variable_defined?(:@valid_domains)
         | 
| 35 | 
            +
                    end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    def valid_domain?(ip)
         | 
| 38 | 
            +
                      return true unless check_domains?
         | 
| 39 | 
            +
                      return true if @valid_domains.empty?
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                      domains = reverse_domains(ip)
         | 
| 42 | 
            +
                      return false if domains.empty?
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                      record = find_subdomain_record(domains)
         | 
| 45 | 
            +
                      return false unless record
         | 
| 46 | 
            +
                      return true unless @validate_reverse_record
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                      ip == reverse_ip(record)
         | 
| 49 | 
            +
                    end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                    def reverse_domains(ip)
         | 
| 52 | 
            +
                      resolver.getnames(ip)
         | 
| 53 | 
            +
                    rescue Resolv::ResolvError
         | 
| 54 | 
            +
                      nil
         | 
| 55 | 
            +
                    end
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                    def find_subdomain_record(domains)
         | 
| 58 | 
            +
                      domains.find do |d|
         | 
| 59 | 
            +
                        @valid_domains.any? { |vd| d.subdomain_of?(vd) }
         | 
| 60 | 
            +
                      end
         | 
| 61 | 
            +
                    end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                    def reverse_ip(record)
         | 
| 64 | 
            +
                      return nil if record.nil?
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                      resolver.getaddress(record.to_s).to_s
         | 
| 67 | 
            +
                    end
         | 
| 68 | 
            +
                  end
         | 
| 69 | 
            +
                end
         | 
| 70 | 
            +
              end
         | 
| 71 | 
            +
            end
         | 
| @@ -0,0 +1,81 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'ipaddr'
         | 
| 4 | 
            +
            require 'interval_tree'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            module Legitbot
         | 
| 7 | 
            +
              module Validators
         | 
| 8 | 
            +
                #
         | 
| 9 | 
            +
                # In a bot matcher:
         | 
| 10 | 
            +
                # `ip_ranges ip, range, ip, ...`
         | 
| 11 | 
            +
                # `ip_ranges do [ip, range, ...]; end`
         | 
| 12 | 
            +
                module IpRanges
         | 
| 13 | 
            +
                  class << self
         | 
| 14 | 
            +
                    def included(base)
         | 
| 15 | 
            +
                      base.extend ClassMethods
         | 
| 16 | 
            +
                    end
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def valid_ip?
         | 
| 20 | 
            +
                    self.class.valid_ip?(@ip)
         | 
| 21 | 
            +
                  end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                  module ClassMethods # :nodoc:
         | 
| 24 | 
            +
                    FAMILIES = %i[ipv4 ipv6].freeze
         | 
| 25 | 
            +
                    EMPTY_GENERATOR = proc { [] }
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    def ip_ranges(*ips, &block)
         | 
| 28 | 
            +
                      @ip_ranges = partition_ips(ips.flatten) unless ips.empty?
         | 
| 29 | 
            +
                      @ip_ranges_loader = block_given? ? block : EMPTY_GENERATOR
         | 
| 30 | 
            +
                      @ip_loader_mutex = Mutex.new
         | 
| 31 | 
            +
                    end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    def check_ranges?
         | 
| 34 | 
            +
                      instance_variable_defined?(:@ip_ranges_loader)
         | 
| 35 | 
            +
                    end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    def valid_ip?(ip)
         | 
| 38 | 
            +
                      return true unless check_ranges?
         | 
| 39 | 
            +
                      return true if valid_ips.empty?
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                      obj = IPAddr.new(ip)
         | 
| 42 | 
            +
                      ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
         | 
| 43 | 
            +
                      !ranges.empty?
         | 
| 44 | 
            +
                    end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                    def valid_ips
         | 
| 47 | 
            +
                      @ip_loader_mutex.synchronize do
         | 
| 48 | 
            +
                        @ip_ranges ||= load_ips
         | 
| 49 | 
            +
                      end
         | 
| 50 | 
            +
                    end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                    def reload_ips
         | 
| 53 | 
            +
                      @ip_loader_mutex.synchronize do
         | 
| 54 | 
            +
                        @ip_ranges = load_ips
         | 
| 55 | 
            +
                      end
         | 
| 56 | 
            +
                    end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    def load_ips
         | 
| 59 | 
            +
                      partition_ips(@ip_ranges_loader.call)
         | 
| 60 | 
            +
                    end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                    # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
         | 
| 63 | 
            +
                    def partition_ips(ips)
         | 
| 64 | 
            +
                      return [] if ips.empty?
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                      ips
         | 
| 67 | 
            +
                        .map { |cidr| IPAddr.new(cidr) }
         | 
| 68 | 
            +
                        .partition(&:ipv4?)
         | 
| 69 | 
            +
                        .each_with_index
         | 
| 70 | 
            +
                        .map do |list, index|
         | 
| 71 | 
            +
                          ranges = list.map(&:to_range).map do |r|
         | 
| 72 | 
            +
                            (r.begin.to_i..r.end.to_i)
         | 
| 73 | 
            +
                          end
         | 
| 74 | 
            +
                          [FAMILIES[index], IntervalTree::Tree.new(ranges)]
         | 
| 75 | 
            +
                        end.to_h
         | 
| 76 | 
            +
                    end
         | 
| 77 | 
            +
                    # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
         | 
| 78 | 
            +
                  end
         | 
| 79 | 
            +
                end
         | 
| 80 | 
            +
              end
         | 
| 81 | 
            +
            end
         | 
    
        data/lib/legitbot/version.rb
    CHANGED
    
    
    
        data/lib/legitbot/yandex.rb
    CHANGED
    
    | @@ -1,17 +1,33 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
              # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
         | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 3 2 |  | 
| 3 | 
            +
            module Legitbot # :nodoc:
         | 
| 4 | 
            +
              # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
         | 
| 4 5 | 
             
              class Yandex < BotMatch
         | 
| 5 | 
            -
                 | 
| 6 | 
            -
             | 
| 7 | 
            -
                def valid?
         | 
| 8 | 
            -
                  subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
         | 
| 9 | 
            -
                end
         | 
| 6 | 
            +
                domains 'yandex.ru.', 'yandex.net.', 'yandex.com.'
         | 
| 10 7 | 
             
              end
         | 
| 11 8 |  | 
| 12 | 
            -
              rule Legitbot::Yandex, %w | 
| 13 | 
            -
                 | 
| 14 | 
            -
                 | 
| 15 | 
            -
                 | 
| 16 | 
            -
                 | 
| 9 | 
            +
              rule Legitbot::Yandex, %w[
         | 
| 10 | 
            +
                YandexBot
         | 
| 11 | 
            +
                YandexAccessibilityBot
         | 
| 12 | 
            +
                YandexMobileBot
         | 
| 13 | 
            +
                YandexDirectDyn
         | 
| 14 | 
            +
                YandexScreenshotBot
         | 
| 15 | 
            +
                YandexImages
         | 
| 16 | 
            +
                YandexVideo
         | 
| 17 | 
            +
                YandexVideoParser
         | 
| 18 | 
            +
                YandexMedia
         | 
| 19 | 
            +
                YandexBlogs
         | 
| 20 | 
            +
                YandexFavicons
         | 
| 21 | 
            +
                YandexWebmaster
         | 
| 22 | 
            +
                YandexPagechecker
         | 
| 23 | 
            +
                YandexImageResizer
         | 
| 24 | 
            +
                YaDirectFetcher
         | 
| 25 | 
            +
                YandexCalendar
         | 
| 26 | 
            +
                YandexSitelinks
         | 
| 27 | 
            +
                YandexMetrika
         | 
| 28 | 
            +
                YandexNews
         | 
| 29 | 
            +
                YandexVertis
         | 
| 30 | 
            +
                YandexSearchShop
         | 
| 31 | 
            +
                YandexVerticals
         | 
| 32 | 
            +
              ]
         | 
| 17 33 | 
             
            end
         |