legitbot 0.3.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.github/workflows/build.yml +60 -0
 - data/.gitignore +1 -0
 - data/.rubocop.yml +2 -0
 - data/.ruby-version +1 -0
 - data/Gemfile +2 -0
 - data/README.md +3 -1
 - data/Rakefile +5 -3
 - data/legitbot.gemspec +19 -18
 - data/lib/legitbot.rb +4 -0
 - data/lib/legitbot/ahrefs.rb +13 -8
 - data/lib/legitbot/apple.rb +11 -11
 - data/lib/legitbot/baidu.rb +5 -7
 - data/lib/legitbot/bing.rb +5 -7
 - data/lib/legitbot/botmatch.rb +17 -44
 - data/lib/legitbot/config/resolver.rb +18 -0
 - data/lib/legitbot/duckduckgo.rb +18 -7
 - data/lib/legitbot/facebook.rb +8 -34
 - data/lib/legitbot/google.rb +5 -8
 - data/lib/legitbot/legitbot.rb +14 -9
 - data/lib/legitbot/oracle.rb +10 -0
 - data/lib/legitbot/pinterest.rb +5 -8
 - data/lib/legitbot/twitter.rb +14 -0
 - data/lib/legitbot/validators/domains.rb +71 -0
 - data/lib/legitbot/validators/ip_ranges.rb +81 -0
 - data/lib/legitbot/version.rb +3 -1
 - data/lib/legitbot/yandex.rb +41 -12
 - data/test/ahrefs_test.rb +16 -8
 - data/test/apple_as_google_test.rb +9 -4
 - data/test/apple_test.rb +11 -4
 - data/test/botmatch_test.rb +4 -22
 - data/test/facebook_test.rb +25 -10
 - data/test/google_test.rb +24 -14
 - data/test/legitbot/validators/domains_test.rb +58 -0
 - data/test/legitbot/validators/ip_ranges_test.rb +113 -0
 - data/test/legitbot_test.rb +8 -4
 - data/test/oracle_test.rb +36 -0
 - data/test/pinterest_test.rb +26 -14
 - data/test/twitter_test.rb +36 -0
 - metadata +87 -23
 - data/.travis.yml +0 -12
 
    
        data/lib/legitbot/google.rb
    CHANGED
    
    | 
         @@ -1,14 +1,11 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
       2 
4 
     | 
    
         
             
              # https://support.google.com/webmasters/answer/1061943
         
     | 
| 
       3 
5 
     | 
    
         
             
              # https://support.google.com/webmasters/answer/80553
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
6 
     | 
    
         
             
              class Google < BotMatch
         
     | 
| 
       6 
     | 
    
         
            -
                 
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
                def valid?
         
     | 
| 
       9 
     | 
    
         
            -
                  subdomain_of?(*Google::ValidDomains) && reverse_resolves?
         
     | 
| 
       10 
     | 
    
         
            -
                end
         
     | 
| 
      
 7 
     | 
    
         
            +
                domains 'google.com.', 'googlebot.com.'
         
     | 
| 
       11 
8 
     | 
    
         
             
              end
         
     | 
| 
       12 
9 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
              rule Legitbot::Google, %w 
     | 
| 
      
 10 
     | 
    
         
            +
              rule Legitbot::Google, %w[Googlebot Mediapartners-Google AdsBot-Google]
         
     | 
| 
       14 
11 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot/legitbot.rb
    CHANGED
    
    | 
         @@ -1,6 +1,14 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            ##
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Bot lookup based on user agent
         
     | 
| 
       1 
5 
     | 
    
         
             
            module Legitbot
         
     | 
| 
       2 
6 
     | 
    
         
             
              @rules = []
         
     | 
| 
       3 
7 
     | 
    
         | 
| 
      
 8 
     | 
    
         
            +
              class << self
         
     | 
| 
      
 9 
     | 
    
         
            +
                attr_accessor :resolver_config
         
     | 
| 
      
 10 
     | 
    
         
            +
              end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
       4 
12 
     | 
    
         
             
              ##
         
     | 
| 
       5 
13 
     | 
    
         
             
              # Lookup a bot based on its signature from +User-Agent+ header.
         
     | 
| 
       6 
14 
     | 
    
         
             
              #
         
     | 
| 
         @@ -10,15 +18,12 @@ module Legitbot 
     | 
|
| 
       10 
18 
     | 
    
         
             
              # otherwise.
         
     | 
| 
       11 
19 
     | 
    
         
             
              # :yields: a found bot
         
     | 
| 
       12 
20 
     | 
    
         
             
              #
         
     | 
| 
       13 
     | 
    
         
            -
              def self.bot( 
     | 
| 
       14 
     | 
    
         
            -
                bots =
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
                  }.map { |rule|
         
     | 
| 
       18 
     | 
    
         
            -
                    rule[:class].new(ip, resolver_config)
         
     | 
| 
       19 
     | 
    
         
            -
                  }
         
     | 
| 
      
 21 
     | 
    
         
            +
              def self.bot(user_agent, ip)
         
     | 
| 
      
 22 
     | 
    
         
            +
                bots = @rules
         
     | 
| 
      
 23 
     | 
    
         
            +
                       .select { |rule| rule[:fragments].any? { |f| user_agent.index f } }
         
     | 
| 
      
 24 
     | 
    
         
            +
                       .map { |rule| rule[:class].new(ip) }
         
     | 
| 
       20 
25 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
                selected = bots.select 
     | 
| 
      
 26 
     | 
    
         
            +
                selected = bots.select(&:valid?).first if bots.size > 1
         
     | 
| 
       22 
27 
     | 
    
         
             
                selected = bots.last if selected.nil?
         
     | 
| 
       23 
28 
     | 
    
         | 
| 
       24 
29 
     | 
    
         
             
                if selected && block_given?
         
     | 
| 
         @@ -29,6 +34,6 @@ module Legitbot 
     | 
|
| 
       29 
34 
     | 
    
         
             
              end
         
     | 
| 
       30 
35 
     | 
    
         | 
| 
       31 
36 
     | 
    
         
             
              def self.rule(clazz, fragments)
         
     | 
| 
       32 
     | 
    
         
            -
                @rules << {: 
     | 
| 
      
 37 
     | 
    
         
            +
                @rules << { class: clazz, fragments: fragments }
         
     | 
| 
       33 
38 
     | 
    
         
             
              end
         
     | 
| 
       34 
39 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot/pinterest.rb
    CHANGED
    
    | 
         @@ -1,13 +1,10 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
              # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
         
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
       3 
2 
     | 
    
         | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
      
 4 
     | 
    
         
            +
              # https://help.pinterest.com/en/articles/about-pinterest-crawler-0
         
     | 
| 
       4 
5 
     | 
    
         
             
              class Pinterest < BotMatch
         
     | 
| 
       5 
     | 
    
         
            -
                 
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                def valid?
         
     | 
| 
       8 
     | 
    
         
            -
                  subdomain_of?(*Pinterest::ValidDomains) && reverse_resolves?
         
     | 
| 
       9 
     | 
    
         
            -
                end
         
     | 
| 
      
 6 
     | 
    
         
            +
                domains 'pinterest.com.'
         
     | 
| 
       10 
7 
     | 
    
         
             
              end
         
     | 
| 
       11 
8 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
              rule Legitbot::Pinterest, %w 
     | 
| 
      
 9 
     | 
    
         
            +
              rule Legitbot::Pinterest, %w[Pinterestbot Pinterest/0.2]
         
     | 
| 
       13 
10 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,14 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
      
 4 
     | 
    
         
            +
              # https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started
         
     | 
| 
      
 5 
     | 
    
         
            +
              # https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards
         
     | 
| 
      
 6 
     | 
    
         
            +
              class Twitter < BotMatch
         
     | 
| 
      
 7 
     | 
    
         
            +
                ip_ranges %w[
         
     | 
| 
      
 8 
     | 
    
         
            +
                  199.16.156.0/22
         
     | 
| 
      
 9 
     | 
    
         
            +
                  199.59.148.0/22
         
     | 
| 
      
 10 
     | 
    
         
            +
                ]
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              rule Legitbot::Twitter, %w[Twitterbot]
         
     | 
| 
      
 14 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,71 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'resolv'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'ipaddr'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module Legitbot
         
     | 
| 
      
 7 
     | 
    
         
            +
              module Validators
         
     | 
| 
      
 8 
     | 
    
         
            +
                #
         
     | 
| 
      
 9 
     | 
    
         
            +
                # In a bot matcher:
         
     | 
| 
      
 10 
     | 
    
         
            +
                # `domains 'search.msn.com', ...`
         
     | 
| 
      
 11 
     | 
    
         
            +
                # `domains 'googlebot.com', reverse: false`
         
     | 
| 
      
 12 
     | 
    
         
            +
                #
         
     | 
| 
      
 13 
     | 
    
         
            +
                # `reverse` is true by default.
         
     | 
| 
      
 14 
     | 
    
         
            +
                module Domains
         
     | 
| 
      
 15 
     | 
    
         
            +
                  class << self
         
     | 
| 
      
 16 
     | 
    
         
            +
                    def included(base)
         
     | 
| 
      
 17 
     | 
    
         
            +
                      base.extend ClassMethods
         
     | 
| 
      
 18 
     | 
    
         
            +
                    end
         
     | 
| 
      
 19 
     | 
    
         
            +
                  end
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
                  def valid_domain?
         
     | 
| 
      
 22 
     | 
    
         
            +
                    self.class.valid_domain?(@ip)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  end
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                  module ClassMethods # :nodoc:
         
     | 
| 
      
 26 
     | 
    
         
            +
                    include Legitbot::Config::Resolver
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                    def domains(*list, reverse: true)
         
     | 
| 
      
 29 
     | 
    
         
            +
                      @valid_domains = list.flatten.map { |d| Resolv::DNS::Name.create(d) }
         
     | 
| 
      
 30 
     | 
    
         
            +
                      @validate_reverse_record = reverse
         
     | 
| 
      
 31 
     | 
    
         
            +
                    end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                    def check_domains?
         
     | 
| 
      
 34 
     | 
    
         
            +
                      instance_variable_defined?(:@valid_domains)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                    def valid_domain?(ip)
         
     | 
| 
      
 38 
     | 
    
         
            +
                      return true unless check_domains?
         
     | 
| 
      
 39 
     | 
    
         
            +
                      return true if @valid_domains.empty?
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                      domains = reverse_domains(ip)
         
     | 
| 
      
 42 
     | 
    
         
            +
                      return false if domains.empty?
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                      record = find_subdomain_record(domains)
         
     | 
| 
      
 45 
     | 
    
         
            +
                      return false unless record
         
     | 
| 
      
 46 
     | 
    
         
            +
                      return true unless @validate_reverse_record
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                      ip == reverse_ip(record)
         
     | 
| 
      
 49 
     | 
    
         
            +
                    end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                    def reverse_domains(ip)
         
     | 
| 
      
 52 
     | 
    
         
            +
                      resolver.getnames(ip)
         
     | 
| 
      
 53 
     | 
    
         
            +
                    rescue Resolv::ResolvError
         
     | 
| 
      
 54 
     | 
    
         
            +
                      nil
         
     | 
| 
      
 55 
     | 
    
         
            +
                    end
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
                    def find_subdomain_record(domains)
         
     | 
| 
      
 58 
     | 
    
         
            +
                      domains.find do |d|
         
     | 
| 
      
 59 
     | 
    
         
            +
                        @valid_domains.any? { |vd| d.subdomain_of?(vd) }
         
     | 
| 
      
 60 
     | 
    
         
            +
                      end
         
     | 
| 
      
 61 
     | 
    
         
            +
                    end
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                    def reverse_ip(record)
         
     | 
| 
      
 64 
     | 
    
         
            +
                      return nil if record.nil?
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                      resolver.getaddress(record.to_s).to_s
         
     | 
| 
      
 67 
     | 
    
         
            +
                    end
         
     | 
| 
      
 68 
     | 
    
         
            +
                  end
         
     | 
| 
      
 69 
     | 
    
         
            +
                end
         
     | 
| 
      
 70 
     | 
    
         
            +
              end
         
     | 
| 
      
 71 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,81 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'ipaddr'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'interval_tree'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            module Legitbot
         
     | 
| 
      
 7 
     | 
    
         
            +
              module Validators
         
     | 
| 
      
 8 
     | 
    
         
            +
                #
         
     | 
| 
      
 9 
     | 
    
         
            +
                # In a bot matcher:
         
     | 
| 
      
 10 
     | 
    
         
            +
                # `ip_ranges ip, range, ip, ...`
         
     | 
| 
      
 11 
     | 
    
         
            +
                # `ip_ranges do [ip, range, ...]; end`
         
     | 
| 
      
 12 
     | 
    
         
            +
                module IpRanges
         
     | 
| 
      
 13 
     | 
    
         
            +
                  class << self
         
     | 
| 
      
 14 
     | 
    
         
            +
                    def included(base)
         
     | 
| 
      
 15 
     | 
    
         
            +
                      base.extend ClassMethods
         
     | 
| 
      
 16 
     | 
    
         
            +
                    end
         
     | 
| 
      
 17 
     | 
    
         
            +
                  end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                  def valid_ip?
         
     | 
| 
      
 20 
     | 
    
         
            +
                    self.class.valid_ip?(@ip)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  end
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  module ClassMethods # :nodoc:
         
     | 
| 
      
 24 
     | 
    
         
            +
                    FAMILIES = %i[ipv4 ipv6].freeze
         
     | 
| 
      
 25 
     | 
    
         
            +
                    EMPTY_GENERATOR = proc { [] }
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                    def ip_ranges(*ips, &block)
         
     | 
| 
      
 28 
     | 
    
         
            +
                      @ip_ranges = partition_ips(ips.flatten) unless ips.empty?
         
     | 
| 
      
 29 
     | 
    
         
            +
                      @ip_ranges_loader = block_given? ? block : EMPTY_GENERATOR
         
     | 
| 
      
 30 
     | 
    
         
            +
                      @ip_loader_mutex = Mutex.new
         
     | 
| 
      
 31 
     | 
    
         
            +
                    end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                    def check_ranges?
         
     | 
| 
      
 34 
     | 
    
         
            +
                      instance_variable_defined?(:@ip_ranges_loader)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                    def valid_ip?(ip)
         
     | 
| 
      
 38 
     | 
    
         
            +
                      return true unless check_ranges?
         
     | 
| 
      
 39 
     | 
    
         
            +
                      return true if valid_ips.empty?
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                      obj = IPAddr.new(ip)
         
     | 
| 
      
 42 
     | 
    
         
            +
                      ranges = valid_ips[obj.ipv4? ? :ipv4 : :ipv6].search(obj.to_i)
         
     | 
| 
      
 43 
     | 
    
         
            +
                      !ranges.empty?
         
     | 
| 
      
 44 
     | 
    
         
            +
                    end
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                    def valid_ips
         
     | 
| 
      
 47 
     | 
    
         
            +
                      @ip_loader_mutex.synchronize do
         
     | 
| 
      
 48 
     | 
    
         
            +
                        @ip_ranges ||= load_ips
         
     | 
| 
      
 49 
     | 
    
         
            +
                      end
         
     | 
| 
      
 50 
     | 
    
         
            +
                    end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                    def reload_ips
         
     | 
| 
      
 53 
     | 
    
         
            +
                      @ip_loader_mutex.synchronize do
         
     | 
| 
      
 54 
     | 
    
         
            +
                        @ip_ranges = load_ips
         
     | 
| 
      
 55 
     | 
    
         
            +
                      end
         
     | 
| 
      
 56 
     | 
    
         
            +
                    end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                    def load_ips
         
     | 
| 
      
 59 
     | 
    
         
            +
                      partition_ips(@ip_ranges_loader.call)
         
     | 
| 
      
 60 
     | 
    
         
            +
                    end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                    # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
         
     | 
| 
      
 63 
     | 
    
         
            +
                    def partition_ips(ips)
         
     | 
| 
      
 64 
     | 
    
         
            +
                      return [] if ips.empty?
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                      ips
         
     | 
| 
      
 67 
     | 
    
         
            +
                        .map { |cidr| IPAddr.new(cidr) }
         
     | 
| 
      
 68 
     | 
    
         
            +
                        .partition(&:ipv4?)
         
     | 
| 
      
 69 
     | 
    
         
            +
                        .each_with_index
         
     | 
| 
      
 70 
     | 
    
         
            +
                        .map do |list, index|
         
     | 
| 
      
 71 
     | 
    
         
            +
                          ranges = list.map(&:to_range).map do |r|
         
     | 
| 
      
 72 
     | 
    
         
            +
                            (r.begin.to_i..r.end.to_i)
         
     | 
| 
      
 73 
     | 
    
         
            +
                          end
         
     | 
| 
      
 74 
     | 
    
         
            +
                          [FAMILIES[index], IntervalTree::Tree.new(ranges)]
         
     | 
| 
      
 75 
     | 
    
         
            +
                        end.to_h
         
     | 
| 
      
 76 
     | 
    
         
            +
                    end
         
     | 
| 
      
 77 
     | 
    
         
            +
                    # rubocop:enable Metrics/AbcSize, Metrics/MethodLength
         
     | 
| 
      
 78 
     | 
    
         
            +
                  end
         
     | 
| 
      
 79 
     | 
    
         
            +
                end
         
     | 
| 
      
 80 
     | 
    
         
            +
              end
         
     | 
| 
      
 81 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/legitbot/version.rb
    CHANGED
    
    
    
        data/lib/legitbot/yandex.rb
    CHANGED
    
    | 
         @@ -1,17 +1,46 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
              # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
         
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
       3 
2 
     | 
    
         | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
      
 4 
     | 
    
         
            +
              # https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.html
         
     | 
| 
       4 
5 
     | 
    
         
             
              class Yandex < BotMatch
         
     | 
| 
       5 
     | 
    
         
            -
                 
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                def valid?
         
     | 
| 
       8 
     | 
    
         
            -
                  subdomain_of?(*Yandex::ValidDomains) && reverse_resolves?
         
     | 
| 
       9 
     | 
    
         
            -
                end
         
     | 
| 
      
 6 
     | 
    
         
            +
                domains 'yandex.ru.', 'yandex.net.', 'yandex.com.'
         
     | 
| 
       10 
7 
     | 
    
         
             
              end
         
     | 
| 
       11 
8 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
              rule Legitbot::Yandex, %w 
     | 
| 
       13 
     | 
    
         
            -
                 
     | 
| 
       14 
     | 
    
         
            -
                 
     | 
| 
       15 
     | 
    
         
            -
                 
     | 
| 
       16 
     | 
    
         
            -
                 
     | 
| 
      
 9 
     | 
    
         
            +
              rule Legitbot::Yandex, %w[
         
     | 
| 
      
 10 
     | 
    
         
            +
                YandexAccessibilityBot
         
     | 
| 
      
 11 
     | 
    
         
            +
                YandexAdNet
         
     | 
| 
      
 12 
     | 
    
         
            +
                YandexBlogs
         
     | 
| 
      
 13 
     | 
    
         
            +
                YandexBot/
         
     | 
| 
      
 14 
     | 
    
         
            +
                YandexCalendar
         
     | 
| 
      
 15 
     | 
    
         
            +
                YandexDirect/
         
     | 
| 
      
 16 
     | 
    
         
            +
                YandexDirectDyn
         
     | 
| 
      
 17 
     | 
    
         
            +
                YandexFavicons
         
     | 
| 
      
 18 
     | 
    
         
            +
                YaDirectFetcher
         
     | 
| 
      
 19 
     | 
    
         
            +
                YandexForDomain
         
     | 
| 
      
 20 
     | 
    
         
            +
                YandexImages
         
     | 
| 
      
 21 
     | 
    
         
            +
                YandexImageResizer
         
     | 
| 
      
 22 
     | 
    
         
            +
                YandexMobileBot
         
     | 
| 
      
 23 
     | 
    
         
            +
                YandexMarket
         
     | 
| 
      
 24 
     | 
    
         
            +
                YandexMedia
         
     | 
| 
      
 25 
     | 
    
         
            +
                YandexMetrika
         
     | 
| 
      
 26 
     | 
    
         
            +
                YandexMobileScreenShotBot
         
     | 
| 
      
 27 
     | 
    
         
            +
                YandexNews
         
     | 
| 
      
 28 
     | 
    
         
            +
                YandexOntoDB
         
     | 
| 
      
 29 
     | 
    
         
            +
                YandexOntoDBAPI
         
     | 
| 
      
 30 
     | 
    
         
            +
                YandexPagechecker
         
     | 
| 
      
 31 
     | 
    
         
            +
                YandexPartner
         
     | 
| 
      
 32 
     | 
    
         
            +
                YandexRCA
         
     | 
| 
      
 33 
     | 
    
         
            +
                YandexSearchShop
         
     | 
| 
      
 34 
     | 
    
         
            +
                YandexSitelinks
         
     | 
| 
      
 35 
     | 
    
         
            +
                YandexSpravBot
         
     | 
| 
      
 36 
     | 
    
         
            +
                YandexTracker
         
     | 
| 
      
 37 
     | 
    
         
            +
                YandexTurbo
         
     | 
| 
      
 38 
     | 
    
         
            +
                YandexVertis
         
     | 
| 
      
 39 
     | 
    
         
            +
                YandexVerticals
         
     | 
| 
      
 40 
     | 
    
         
            +
                YandexVideo
         
     | 
| 
      
 41 
     | 
    
         
            +
                YandexVideoParser
         
     | 
| 
      
 42 
     | 
    
         
            +
                YandexWebmaster
         
     | 
| 
      
 43 
     | 
    
         
            +
                YandexScreenshotBot
         
     | 
| 
      
 44 
     | 
    
         
            +
                YandexMedianaBot
         
     | 
| 
      
 45 
     | 
    
         
            +
              ]
         
     | 
| 
       17 
46 
     | 
    
         
             
            end
         
     | 
    
        data/test/ahrefs_test.rb
    CHANGED
    
    | 
         @@ -1,28 +1,36 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'minitest/autorun'
         
     | 
| 
       2 
4 
     | 
    
         
             
            require 'legitbot'
         
     | 
| 
       3 
5 
     | 
    
         | 
| 
       4 
6 
     | 
    
         
             
            class AhrefsTest < Minitest::Test
         
     | 
| 
       5 
7 
     | 
    
         
             
              def test_malicious_ip
         
     | 
| 
       6 
     | 
    
         
            -
                ip =  
     | 
| 
      
 8 
     | 
    
         
            +
                ip = '149.210.164.47'
         
     | 
| 
       7 
9 
     | 
    
         
             
                match = Legitbot::Ahrefs.new ip
         
     | 
| 
       8 
10 
     | 
    
         
             
                assert !match.valid?, msg: "#{ip} is not a real Ahrefs IP"
         
     | 
| 
       9 
11 
     | 
    
         
             
              end
         
     | 
| 
       10 
12 
     | 
    
         | 
| 
       11 
13 
     | 
    
         
             
              def test_valid_ip
         
     | 
| 
       12 
     | 
    
         
            -
                ip =  
     | 
| 
      
 14 
     | 
    
         
            +
                ip = '54.36.148.0'
         
     | 
| 
       13 
15 
     | 
    
         
             
                match = Legitbot::Ahrefs.new ip
         
     | 
| 
       14 
16 
     | 
    
         
             
                assert match.valid?, msg: "#{ip} is a valid Ahrefs IP"
         
     | 
| 
       15 
17 
     | 
    
         
             
              end
         
     | 
| 
       16 
18 
     | 
    
         | 
| 
       17 
19 
     | 
    
         
             
              def test_malicious_ua
         
     | 
| 
       18 
     | 
    
         
            -
                bot = Legitbot.bot( 
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
      
 20 
     | 
    
         
            +
                bot = Legitbot.bot(
         
     | 
| 
      
 21 
     | 
    
         
            +
                  'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
         
     | 
| 
      
 22 
     | 
    
         
            +
                  '149.210.164.47'
         
     | 
| 
      
 23 
     | 
    
         
            +
                )
         
     | 
| 
      
 24 
     | 
    
         
            +
                assert bot, msg: 'Ahrefs detected from User-Agent'
         
     | 
| 
      
 25 
     | 
    
         
            +
                assert !bot.valid?, msg: 'Not a valid Ahrefs'
         
     | 
| 
       21 
26 
     | 
    
         
             
              end
         
     | 
| 
       22 
27 
     | 
    
         | 
| 
       23 
28 
     | 
    
         
             
              def test_valid_ua
         
     | 
| 
       24 
     | 
    
         
            -
                bot = Legitbot.bot( 
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
      
 29 
     | 
    
         
            +
                bot = Legitbot.bot(
         
     | 
| 
      
 30 
     | 
    
         
            +
                  'Mozilla/5.0 (compatible; AhrefsBot/6.1; +http://ahrefs.com/robot/)',
         
     | 
| 
      
 31 
     | 
    
         
            +
                  '54.36.148.0'
         
     | 
| 
      
 32 
     | 
    
         
            +
                )
         
     | 
| 
      
 33 
     | 
    
         
            +
                assert bot, msg: 'Ahrefs detected from User-Agent'
         
     | 
| 
      
 34 
     | 
    
         
            +
                assert bot.valid?, msg: 'Valid Ahrefs'
         
     | 
| 
       27 
35 
     | 
    
         
             
              end
         
     | 
| 
       28 
36 
     | 
    
         
             
            end
         
     | 
| 
         @@ -1,22 +1,27 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'minitest/autorun'
         
     | 
| 
       2 
4 
     | 
    
         
             
            require 'legitbot'
         
     | 
| 
       3 
5 
     | 
    
         | 
| 
       4 
6 
     | 
    
         
             
            class AppleAsGoogleTest < Minitest::Test
         
     | 
| 
       5 
7 
     | 
    
         
             
              def test_valid_ip
         
     | 
| 
       6 
     | 
    
         
            -
                ip =  
     | 
| 
      
 8 
     | 
    
         
            +
                ip = '17.58.98.60'
         
     | 
| 
       7 
9 
     | 
    
         
             
                match = Legitbot::Apple_as_Google.new(ip)
         
     | 
| 
       8 
10 
     | 
    
         
             
                assert match.valid?, msg: "#{ip} is a valid Applebot IP"
         
     | 
| 
       9 
11 
     | 
    
         
             
              end
         
     | 
| 
       10 
12 
     | 
    
         | 
| 
       11 
13 
     | 
    
         
             
              def test_invalid_ip
         
     | 
| 
       12 
     | 
    
         
            -
                ip =  
     | 
| 
      
 14 
     | 
    
         
            +
                ip = '127.0.0.1'
         
     | 
| 
       13 
15 
     | 
    
         
             
                match = Legitbot::Apple_as_Google.new(ip)
         
     | 
| 
       14 
16 
     | 
    
         
             
                assert match.fake?, msg: "#{ip} is a fake Applebot IP"
         
     | 
| 
       15 
17 
     | 
    
         
             
              end
         
     | 
| 
       16 
18 
     | 
    
         | 
| 
       17 
19 
     | 
    
         
             
              def test_user_agent
         
     | 
| 
       18 
     | 
    
         
            -
                bot = Legitbot.bot( 
     | 
| 
      
 20 
     | 
    
         
            +
                bot = Legitbot.bot(
         
     | 
| 
      
 21 
     | 
    
         
            +
                  'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
         
     | 
| 
      
 22 
     | 
    
         
            +
                  '17.58.98.60'
         
     | 
| 
      
 23 
     | 
    
         
            +
                )
         
     | 
| 
       19 
24 
     | 
    
         
             
                assert_equal :apple_as_google, bot.detected_as
         
     | 
| 
       20 
     | 
    
         
            -
                assert bot.valid?, msg:  
     | 
| 
      
 25 
     | 
    
         
            +
                assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
         
     | 
| 
       21 
26 
     | 
    
         
             
              end
         
     | 
| 
       22 
27 
     | 
    
         
             
            end
         
     | 
    
        data/test/apple_test.rb
    CHANGED
    
    | 
         @@ -1,22 +1,29 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'minitest/autorun'
         
     | 
| 
       2 
4 
     | 
    
         
             
            require 'legitbot'
         
     | 
| 
       3 
5 
     | 
    
         | 
| 
       4 
6 
     | 
    
         
             
            class AppleTest < Minitest::Test
         
     | 
| 
       5 
7 
     | 
    
         
             
              def test_valid_ip
         
     | 
| 
       6 
     | 
    
         
            -
                ip =  
     | 
| 
      
 8 
     | 
    
         
            +
                ip = '17.58.98.60'
         
     | 
| 
       7 
9 
     | 
    
         
             
                match = Legitbot::Apple.new(ip)
         
     | 
| 
       8 
10 
     | 
    
         
             
                assert match.valid?, msg: "#{ip} is a valid Applebot IP"
         
     | 
| 
       9 
11 
     | 
    
         
             
              end
         
     | 
| 
       10 
12 
     | 
    
         | 
| 
       11 
13 
     | 
    
         
             
              def test_invalid_ip
         
     | 
| 
       12 
     | 
    
         
            -
                ip =  
     | 
| 
      
 14 
     | 
    
         
            +
                ip = '127.0.0.1'
         
     | 
| 
       13 
15 
     | 
    
         
             
                match = Legitbot::Apple.new(ip)
         
     | 
| 
       14 
16 
     | 
    
         
             
                assert match.fake?, msg: "#{ip} is a fake Applebot IP"
         
     | 
| 
       15 
17 
     | 
    
         
             
              end
         
     | 
| 
       16 
18 
     | 
    
         | 
| 
      
 19 
     | 
    
         
            +
              # rubocop:disable Layout/LineLength
         
     | 
| 
       17 
20 
     | 
    
         
             
              def test_user_agent
         
     | 
| 
       18 
     | 
    
         
            -
                bot = Legitbot.bot( 
     | 
| 
      
 21 
     | 
    
         
            +
                bot = Legitbot.bot(
         
     | 
| 
      
 22 
     | 
    
         
            +
                  'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/600.2.5 (KHTML, like Gecko) Version/8.0.2 Safari/600.2.5 (Applebot/0.1; +http://www.apple.com/go/applebot)',
         
     | 
| 
      
 23 
     | 
    
         
            +
                  '17.58.98.60'
         
     | 
| 
      
 24 
     | 
    
         
            +
                )
         
     | 
| 
       19 
25 
     | 
    
         
             
                assert_equal :apple, bot.detected_as
         
     | 
| 
       20 
     | 
    
         
            -
                assert bot.valid?, msg:  
     | 
| 
      
 26 
     | 
    
         
            +
                assert bot.valid?, msg: 'A valid Applebot User-agent and IP'
         
     | 
| 
       21 
27 
     | 
    
         
             
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
              # rubocop:enable Layout/LineLength
         
     | 
| 
       22 
29 
     | 
    
         
             
            end
         
     | 
    
        data/test/botmatch_test.rb
    CHANGED
    
    | 
         @@ -1,29 +1,11 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'minitest/autorun'
         
     | 
| 
       2 
4 
     | 
    
         
             
            require 'legitbot'
         
     | 
| 
       3 
5 
     | 
    
         | 
| 
       4 
6 
     | 
    
         
             
            class BotMatchTest < Minitest::Test
         
     | 
| 
       5 
     | 
    
         
            -
              def test_reverse_name
         
     | 
| 
       6 
     | 
    
         
            -
                match = Legitbot::BotMatch.new "66.249.64.141"
         
     | 
| 
       7 
     | 
    
         
            -
                assert_equal "crawl-66-249-64-141.googlebot.com", match.reverse_name
         
     | 
| 
       8 
     | 
    
         
            -
              end
         
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
              def test_reverse_ip
         
     | 
| 
       11 
     | 
    
         
            -
                match = Legitbot::BotMatch.new "66.249.64.141"
         
     | 
| 
       12 
     | 
    
         
            -
                assert_equal "66.249.64.141", match.reversed_ip
         
     | 
| 
       13 
     | 
    
         
            -
              end
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
              def test_reverse_resolves
         
     | 
| 
       16 
     | 
    
         
            -
                match = Legitbot::BotMatch.new "66.249.64.141"
         
     | 
| 
       17 
     | 
    
         
            -
                assert_equal true, match.reverse_resolves?
         
     | 
| 
       18 
     | 
    
         
            -
              end
         
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
              def test_reverse_doesnt_resolve
         
     | 
| 
       21 
     | 
    
         
            -
                match = Legitbot::BotMatch.new "5.140.70.64"
         
     | 
| 
       22 
     | 
    
         
            -
                assert !match.reverse_resolves?
         
     | 
| 
       23 
     | 
    
         
            -
              end
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
7 
     | 
    
         
             
              def test_valid_class_syntax
         
     | 
| 
       26 
     | 
    
         
            -
                assert Legitbot::Google.valid?( 
     | 
| 
       27 
     | 
    
         
            -
                assert Legitbot::Google.fake?( 
     | 
| 
      
 8 
     | 
    
         
            +
                assert Legitbot::Google.valid?('66.249.64.141'), msg: 'Valid Googlebot'
         
     | 
| 
      
 9 
     | 
    
         
            +
                assert Legitbot::Google.fake?('149.210.164.47'), msg: 'Fake Googlebot'
         
     | 
| 
       28 
10 
     | 
    
         
             
              end
         
     | 
| 
       29 
11 
     | 
    
         
             
            end
         
     |