legitbot 0.3.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/.github/workflows/build.yml +60 -0
 - data/.gitignore +1 -0
 - data/.rubocop.yml +2 -0
 - data/.ruby-version +1 -0
 - data/Gemfile +2 -0
 - data/README.md +3 -1
 - data/Rakefile +5 -3
 - data/legitbot.gemspec +19 -18
 - data/lib/legitbot.rb +4 -0
 - data/lib/legitbot/ahrefs.rb +13 -8
 - data/lib/legitbot/apple.rb +11 -11
 - data/lib/legitbot/baidu.rb +5 -7
 - data/lib/legitbot/bing.rb +5 -7
 - data/lib/legitbot/botmatch.rb +17 -44
 - data/lib/legitbot/config/resolver.rb +18 -0
 - data/lib/legitbot/duckduckgo.rb +18 -7
 - data/lib/legitbot/facebook.rb +8 -34
 - data/lib/legitbot/google.rb +5 -8
 - data/lib/legitbot/legitbot.rb +14 -9
 - data/lib/legitbot/oracle.rb +10 -0
 - data/lib/legitbot/pinterest.rb +5 -8
 - data/lib/legitbot/twitter.rb +14 -0
 - data/lib/legitbot/validators/domains.rb +71 -0
 - data/lib/legitbot/validators/ip_ranges.rb +81 -0
 - data/lib/legitbot/version.rb +3 -1
 - data/lib/legitbot/yandex.rb +41 -12
 - data/test/ahrefs_test.rb +16 -8
 - data/test/apple_as_google_test.rb +9 -4
 - data/test/apple_test.rb +11 -4
 - data/test/botmatch_test.rb +4 -22
 - data/test/facebook_test.rb +25 -10
 - data/test/google_test.rb +24 -14
 - data/test/legitbot/validators/domains_test.rb +58 -0
 - data/test/legitbot/validators/ip_ranges_test.rb +113 -0
 - data/test/legitbot_test.rb +8 -4
 - data/test/oracle_test.rb +36 -0
 - data/test/pinterest_test.rb +26 -14
 - data/test/twitter_test.rb +36 -0
 - metadata +87 -23
 - data/.travis.yml +0 -12
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA256:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 34d1432c7c405d783b22a46851db88ccdea9f303defeccdd1cf98604bbb6ce09
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: a66b586f4b2dca67fb875ea37add6e7d89a7ce5d0705c3d1898d96ecf091036e
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: cad1db2571e939020f74e871365c4748dd78ff18eb8ad9f005ea5bf5b0707835e296afa2601fc6309c994f69b7903d21da788fc219f5d712ee75e1ae9885fb7b
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: c72af598d60c55aff35a1b5e244dcde160a67589c588d68b9482d5e5c5f0590441c92505ec94beb462ff70a730ec9aabe80877a2d9db2f72c566c3a9c0b19059
         
     | 
| 
         @@ -0,0 +1,60 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            name: build
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            on: [push]
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            jobs:
         
     | 
| 
      
 6 
     | 
    
         
            +
              test:
         
     | 
| 
      
 7 
     | 
    
         
            +
                runs-on: ubuntu-latest
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                strategy:
         
     | 
| 
      
 10 
     | 
    
         
            +
                  fail-fast: false
         
     | 
| 
      
 11 
     | 
    
         
            +
                  matrix:
         
     | 
| 
      
 12 
     | 
    
         
            +
                    ruby: [ jruby, 2.6 ]
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                steps:
         
     | 
| 
      
 15 
     | 
    
         
            +
                - uses: actions/checkout@v2
         
     | 
| 
      
 16 
     | 
    
         
            +
                - name: Set up Ruby
         
     | 
| 
      
 17 
     | 
    
         
            +
                  uses: ruby/setup-ruby@v1
         
     | 
| 
      
 18 
     | 
    
         
            +
                  with:
         
     | 
| 
      
 19 
     | 
    
         
            +
                    ruby-version: ${{ matrix.ruby }}
         
     | 
| 
      
 20 
     | 
    
         
            +
                - name: Cache dependencies
         
     | 
| 
      
 21 
     | 
    
         
            +
                  uses: actions/cache@v1
         
     | 
| 
      
 22 
     | 
    
         
            +
                  with:
         
     | 
| 
      
 23 
     | 
    
         
            +
                    path: vendor/bundle
         
     | 
| 
      
 24 
     | 
    
         
            +
                    key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
         
     | 
| 
      
 25 
     | 
    
         
            +
                    restore-keys: |
         
     | 
| 
      
 26 
     | 
    
         
            +
                      ${{ runner.os }}-${{ matrix.ruby }}-gems-
         
     | 
| 
      
 27 
     | 
    
         
            +
                - name: Install dependencies
         
     | 
| 
      
 28 
     | 
    
         
            +
                  run: |
         
     | 
| 
      
 29 
     | 
    
         
            +
                    bundle config path vendor/bundle
         
     | 
| 
      
 30 
     | 
    
         
            +
                    bundle install --jobs 4 --retry 3
         
     | 
| 
      
 31 
     | 
    
         
            +
                - name: Run tests
         
     | 
| 
      
 32 
     | 
    
         
            +
                  run: bundle exec rake test
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
              lint:
         
     | 
| 
      
 35 
     | 
    
         
            +
                needs: test
         
     | 
| 
      
 36 
     | 
    
         
            +
                runs-on: ubuntu-latest
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                strategy:
         
     | 
| 
      
 39 
     | 
    
         
            +
                  matrix:
         
     | 
| 
      
 40 
     | 
    
         
            +
                    ruby: [ 2.6 ]
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                steps:
         
     | 
| 
      
 43 
     | 
    
         
            +
                - uses: actions/checkout@v2
         
     | 
| 
      
 44 
     | 
    
         
            +
                - name: Set up Ruby
         
     | 
| 
      
 45 
     | 
    
         
            +
                  uses: ruby/setup-ruby@v1
         
     | 
| 
      
 46 
     | 
    
         
            +
                  with:
         
     | 
| 
      
 47 
     | 
    
         
            +
                    ruby-version: ${{ matrix.ruby }}
         
     | 
| 
      
 48 
     | 
    
         
            +
                - name: Cache dependencies
         
     | 
| 
      
 49 
     | 
    
         
            +
                  uses: actions/cache@v1
         
     | 
| 
      
 50 
     | 
    
         
            +
                  with:
         
     | 
| 
      
 51 
     | 
    
         
            +
                    path: vendor/bundle
         
     | 
| 
      
 52 
     | 
    
         
            +
                    key: ${{ runner.os }}-${{ matrix.ruby }}-gems-${{ hashFiles('**/Gemfile.lock') }}
         
     | 
| 
      
 53 
     | 
    
         
            +
                    restore-keys: |
         
     | 
| 
      
 54 
     | 
    
         
            +
                      ${{ runner.os }}-${{ matrix.ruby }}-gems-
         
     | 
| 
      
 55 
     | 
    
         
            +
                - name: Install dependencies
         
     | 
| 
      
 56 
     | 
    
         
            +
                  run: |
         
     | 
| 
      
 57 
     | 
    
         
            +
                    bundle config path vendor/bundle
         
     | 
| 
      
 58 
     | 
    
         
            +
                    bundle install --jobs 4 --retry 3
         
     | 
| 
      
 59 
     | 
    
         
            +
                - name: Run linter
         
     | 
| 
      
 60 
     | 
    
         
            +
                  run: bundle exec rubocop
         
     | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/.rubocop.yml
    ADDED
    
    
    
        data/.ruby-version
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            2.4
         
     | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | 
         @@ -1,4 +1,4 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # Legitbot  
     | 
| 
      
 1 
     | 
    
         
            +
            # Legitbot  [](https://badge.fury.io/rb/legitbot)
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            Ruby gem to check that an IP belongs to a bot, typically a search
         
     | 
| 
       4 
4 
     | 
    
         
             
            engine. This can be of help in protecting a web site from fake search
         
     | 
| 
         @@ -50,7 +50,9 @@ end 
     | 
|
| 
       50 
50 
     | 
    
         
             
            * [DuckDuckGo bot](https://duckduckgo.com/duckduckbot)
         
     | 
| 
       51 
51 
     | 
    
         
             
            * [Facebook crawler](https://developers.facebook.com/docs/sharing/webmasters/crawler)
         
     | 
| 
       52 
52 
     | 
    
         
             
            * [Google crawlers](https://support.google.com/webmasters/answer/1061943)
         
     | 
| 
      
 53 
     | 
    
         
            +
            * [Oracle Data Cloud Crawler](https://www.oracle.com/corporate/acquisitions/grapeshot/crawler.html)
         
     | 
| 
       53 
54 
     | 
    
         
             
            * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
         
     | 
| 
      
 55 
     | 
    
         
            +
            * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
         
     | 
| 
       54 
56 
     | 
    
         
             
            * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
         
     | 
| 
       55 
57 
     | 
    
         | 
| 
       56 
58 
     | 
    
         
             
            ## License
         
     | 
    
        data/Rakefile
    CHANGED
    
    | 
         @@ -1,14 +1,16 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'rubygems'
         
     | 
| 
       2 
4 
     | 
    
         
             
            require 'bundler'
         
     | 
| 
       3 
5 
     | 
    
         
             
            require 'bump/tasks'
         
     | 
| 
       4 
     | 
    
         
            -
            require  
     | 
| 
      
 6 
     | 
    
         
            +
            require 'rake/testtask'
         
     | 
| 
       5 
7 
     | 
    
         
             
            Bundler::GemHelper.install_tasks
         
     | 
| 
       6 
8 
     | 
    
         | 
| 
       7 
9 
     | 
    
         
             
            Bump.tag_by_default = true
         
     | 
| 
       8 
10 
     | 
    
         | 
| 
       9 
11 
     | 
    
         
             
            Rake::TestTask.new do |t|
         
     | 
| 
       10 
     | 
    
         
            -
              t.libs <<  
     | 
| 
       11 
     | 
    
         
            -
              t.test_files = FileList['test 
     | 
| 
      
 12 
     | 
    
         
            +
              t.libs << 'test'
         
     | 
| 
      
 13 
     | 
    
         
            +
              t.test_files = FileList['test/**/*_test.rb']
         
     | 
| 
       12 
14 
     | 
    
         
             
              t.warning = true
         
     | 
| 
       13 
15 
     | 
    
         
             
              t.verbose = true
         
     | 
| 
       14 
16 
     | 
    
         
             
            end
         
     | 
    
        data/legitbot.gemspec
    CHANGED
    
    | 
         @@ -1,27 +1,28 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #  
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.push File.expand_path('lib', __dir__)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'legitbot/version'
         
     | 
| 
       4 
5 
     | 
    
         | 
| 
       5 
6 
     | 
    
         
             
            Gem::Specification.new do |spec|
         
     | 
| 
       6 
7 
     | 
    
         
             
              spec.name = 'legitbot'
         
     | 
| 
       7 
8 
     | 
    
         
             
              spec.version = Legitbot::VERSION
         
     | 
| 
       8 
9 
     | 
    
         
             
              spec.license = 'Apache-2.0'
         
     | 
| 
       9 
10 
     | 
    
         | 
| 
       10 
     | 
    
         
            -
              spec.author =  
     | 
| 
       11 
     | 
    
         
            -
              spec.email =  
     | 
| 
       12 
     | 
    
         
            -
              spec.homepage =  
     | 
| 
       13 
     | 
    
         
            -
              spec.summary =  
     | 
| 
       14 
     | 
    
         
            -
              spec.description =  
     | 
| 
       15 
     | 
    
         
            -
                "made by a real search engine, not a malicious agent"
         
     | 
| 
      
 11 
     | 
    
         
            +
              spec.author = 'Alexander Azarov'
         
     | 
| 
      
 12 
     | 
    
         
            +
              spec.email = 'self@alaz.me'
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.homepage = 'https://github.com/alaz/legitbot'
         
     | 
| 
      
 14 
     | 
    
         
            +
              spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
         
     | 
| 
      
 15 
     | 
    
         
            +
              spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
              spec.required_ruby_version = '>= 2. 
     | 
| 
       18 
     | 
    
         
            -
              spec.add_dependency  
     | 
| 
       19 
     | 
    
         
            -
              spec.add_dependency  
     | 
| 
       20 
     | 
    
         
            -
              spec.add_development_dependency  
     | 
| 
       21 
     | 
    
         
            -
              spec.add_development_dependency  
     | 
| 
       22 
     | 
    
         
            -
              spec.add_development_dependency  
     | 
| 
      
 17 
     | 
    
         
            +
              spec.required_ruby_version = '>= 2.4.0'
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
         
     | 
| 
      
 20 
     | 
    
         
            +
              spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
         
     | 
| 
      
 21 
     | 
    
         
            +
              spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
         
     | 
| 
      
 22 
     | 
    
         
            +
              spec.add_development_dependency 'rake', '~> 12.3', '>= 12.3.0'
         
     | 
| 
      
 23 
     | 
    
         
            +
              spec.add_development_dependency 'rubocop', '~> 0.74', '>= 0.74.0'
         
     | 
| 
       23 
24 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
              spec.files = `git ls-files`.split( 
     | 
| 
       25 
     | 
    
         
            -
              spec.rdoc_options = [ 
     | 
| 
       26 
     | 
    
         
            -
              spec.test_files = Dir.glob( 
     | 
| 
      
 25 
     | 
    
         
            +
              spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
         
     | 
| 
      
 26 
     | 
    
         
            +
              spec.rdoc_options = ['--charset=UTF-8']
         
     | 
| 
      
 27 
     | 
    
         
            +
              spec.test_files = Dir.glob('test/**/*')
         
     | 
| 
       27 
28 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot.rb
    CHANGED
    
    | 
         @@ -1,3 +1,5 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require_relative 'legitbot/legitbot'
         
     | 
| 
       2 
4 
     | 
    
         
             
            require_relative 'legitbot/botmatch'
         
     | 
| 
       3 
5 
     | 
    
         | 
| 
         @@ -8,5 +10,7 @@ require_relative 'legitbot/bing' 
     | 
|
| 
       8 
10 
     | 
    
         
             
            require_relative 'legitbot/duckduckgo'
         
     | 
| 
       9 
11 
     | 
    
         
             
            require_relative 'legitbot/facebook'
         
     | 
| 
       10 
12 
     | 
    
         
             
            require_relative 'legitbot/google'
         
     | 
| 
      
 13 
     | 
    
         
            +
            require_relative 'legitbot/oracle'
         
     | 
| 
       11 
14 
     | 
    
         
             
            require_relative 'legitbot/pinterest'
         
     | 
| 
      
 15 
     | 
    
         
            +
            require_relative 'legitbot/twitter'
         
     | 
| 
       12 
16 
     | 
    
         
             
            require_relative 'legitbot/yandex'
         
     | 
    
        data/lib/legitbot/ahrefs.rb
    CHANGED
    
    | 
         @@ -1,13 +1,18 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
       2 
4 
     | 
    
         
             
              # https://ahrefs.com/robot
         
     | 
| 
       3 
5 
     | 
    
         
             
              class Ahrefs < BotMatch
         
     | 
| 
       4 
     | 
    
         
            -
                 
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                   
     | 
| 
       8 
     | 
    
         
            -
                   
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
      
 6 
     | 
    
         
            +
                ip_ranges %w[
         
     | 
| 
      
 7 
     | 
    
         
            +
                  54.36.148.0/24
         
     | 
| 
      
 8 
     | 
    
         
            +
                  54.36.149.0/24
         
     | 
| 
      
 9 
     | 
    
         
            +
                  54.36.150.0/24
         
     | 
| 
      
 10 
     | 
    
         
            +
                  195.154.122.0/24
         
     | 
| 
      
 11 
     | 
    
         
            +
                  195.154.123.0/24
         
     | 
| 
      
 12 
     | 
    
         
            +
                  195.154.126.0/24
         
     | 
| 
      
 13 
     | 
    
         
            +
                  195.154.127.0/24
         
     | 
| 
      
 14 
     | 
    
         
            +
                ]
         
     | 
| 
       10 
15 
     | 
    
         
             
              end
         
     | 
| 
       11 
16 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
              rule Legitbot::Ahrefs, %w 
     | 
| 
      
 17 
     | 
    
         
            +
              rule Legitbot::Ahrefs, %w[AhrefsBot]
         
     | 
| 
       13 
18 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot/apple.rb
    CHANGED
    
    | 
         @@ -1,20 +1,20 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            require 'ipaddr'
         
     | 
| 
       2 
4 
     | 
    
         | 
| 
       3 
     | 
    
         
            -
            module Legitbot
         
     | 
| 
      
 5 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
       4 
6 
     | 
    
         
             
              # https://support.apple.com/en-us/HT204683
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
7 
     | 
    
         
             
              class Apple < BotMatch
         
     | 
| 
       7 
     | 
    
         
            -
                 
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
                def valid?
         
     | 
| 
       10 
     | 
    
         
            -
                  ip = IPAddr.new @ip
         
     | 
| 
       11 
     | 
    
         
            -
                  Range.include? ip
         
     | 
| 
       12 
     | 
    
         
            -
                end
         
     | 
| 
      
 8 
     | 
    
         
            +
                ip_ranges '17.0.0.0/8'
         
     | 
| 
       13 
9 
     | 
    
         
             
              end
         
     | 
| 
       14 
10 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
               
     | 
| 
      
 11 
     | 
    
         
            +
              # https://support.apple.com/en-us/HT204683
         
     | 
| 
      
 12 
     | 
    
         
            +
              # rubocop:disable Naming/ClassAndModuleCamelCase
         
     | 
| 
      
 13 
     | 
    
         
            +
              class Apple_as_Google < BotMatch
         
     | 
| 
      
 14 
     | 
    
         
            +
                ip_ranges '17.0.0.0/8'
         
     | 
| 
       16 
15 
     | 
    
         
             
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
              # rubocop:enable Naming/ClassAndModuleCamelCase
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
              rule Legitbot::Apple, %w 
     | 
| 
       19 
     | 
    
         
            -
              rule Legitbot::Apple_as_Google, %w 
     | 
| 
      
 18 
     | 
    
         
            +
              rule Legitbot::Apple, %w[Applebot]
         
     | 
| 
      
 19 
     | 
    
         
            +
              rule Legitbot::Apple_as_Google, %w[Googlebot]
         
     | 
| 
       20 
20 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot/baidu.rb
    CHANGED
    
    | 
         @@ -1,12 +1,10 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
       2 
4 
     | 
    
         
             
              # http://help.baidu.com/question?prod_en=master&class=498&id=1000973
         
     | 
| 
       3 
5 
     | 
    
         
             
              class Baidu < BotMatch
         
     | 
| 
       4 
     | 
    
         
            -
                 
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
                def valid?
         
     | 
| 
       7 
     | 
    
         
            -
                  subdomain_of?(*Baidu::ValidDomains)
         
     | 
| 
       8 
     | 
    
         
            -
                end
         
     | 
| 
      
 6 
     | 
    
         
            +
                domains 'baidu.com.', 'baidu.jp.', reverse: false
         
     | 
| 
       9 
7 
     | 
    
         
             
              end
         
     | 
| 
       10 
8 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
              rule Legitbot::Baidu, %w 
     | 
| 
      
 9 
     | 
    
         
            +
              rule Legitbot::Baidu, %w[Baiduspider]
         
     | 
| 
       12 
10 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot/bing.rb
    CHANGED
    
    | 
         @@ -1,12 +1,10 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
       2 
4 
     | 
    
         
             
              # https://blogs.bing.com/webmaster/2012/08/31/how-to-verify-that-bingbot-is-bingbot/
         
     | 
| 
       3 
5 
     | 
    
         
             
              class Bing < BotMatch
         
     | 
| 
       4 
     | 
    
         
            -
                 
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
                def valid?
         
     | 
| 
       7 
     | 
    
         
            -
                  subdomain_of?(*Bing::ValidDomains) && reverse_resolves?
         
     | 
| 
       8 
     | 
    
         
            -
                end
         
     | 
| 
      
 6 
     | 
    
         
            +
                domains 'search.msn.com.'
         
     | 
| 
       9 
7 
     | 
    
         
             
              end
         
     | 
| 
       10 
8 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
              rule Legitbot::Bing, %w 
     | 
| 
      
 9 
     | 
    
         
            +
              rule Legitbot::Bing, %w[Bingbot bingbot]
         
     | 
| 
       12 
10 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot/botmatch.rb
    CHANGED
    
    | 
         @@ -1,5 +1,8 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
       2 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require_relative 'config/resolver'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require_relative 'validators/domains'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require_relative 'validators/ip_ranges'
         
     | 
| 
       3 
6 
     | 
    
         | 
| 
       4 
7 
     | 
    
         
             
            module Legitbot
         
     | 
| 
       5 
8 
     | 
    
         
             
              ##
         
     | 
| 
         @@ -7,61 +10,31 @@ module Legitbot 
     | 
|
| 
       7 
10 
     | 
    
         
             
              # +valid?+, +fake?+ and +detected_as+
         
     | 
| 
       8 
11 
     | 
    
         
             
              #
         
     | 
| 
       9 
12 
     | 
    
         
             
              class BotMatch
         
     | 
| 
       10 
     | 
    
         
            -
                 
     | 
| 
       11 
     | 
    
         
            -
             
     | 
| 
       12 
     | 
    
         
            -
                  @ip = ip
         
     | 
| 
       13 
     | 
    
         
            -
                end
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
                ##
         
     | 
| 
       16 
     | 
    
         
            -
                # Returns a Resolv::DNS::Name instance with
         
     | 
| 
       17 
     | 
    
         
            -
                # the reverse name
         
     | 
| 
       18 
     | 
    
         
            -
                def reverse_domain
         
     | 
| 
       19 
     | 
    
         
            -
                  @reverse_domain ||= @dns.getname(@ip)
         
     | 
| 
       20 
     | 
    
         
            -
                rescue Resolv::ResolvError
         
     | 
| 
       21 
     | 
    
         
            -
                  @reverse_domain ||= nil
         
     | 
| 
       22 
     | 
    
         
            -
                end
         
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
                ##
         
     | 
| 
       25 
     | 
    
         
            -
                # Returns a String with the reverse name
         
     | 
| 
       26 
     | 
    
         
            -
                def reverse_name
         
     | 
| 
       27 
     | 
    
         
            -
                  reverse_domain&.to_s
         
     | 
| 
       28 
     | 
    
         
            -
                end
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
                ##
         
     | 
| 
       31 
     | 
    
         
            -
                # Returns a String with IP created from the reverse name
         
     | 
| 
       32 
     | 
    
         
            -
                def reversed_ip
         
     | 
| 
       33 
     | 
    
         
            -
                  return nil if reverse_name.nil?
         
     | 
| 
      
 13 
     | 
    
         
            +
                include Legitbot::Validators::IpRanges
         
     | 
| 
      
 14 
     | 
    
         
            +
                include Legitbot::Validators::Domains
         
     | 
| 
       34 
15 
     | 
    
         | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
                  @ 
     | 
| 
       37 
     | 
    
         
            -
                end
         
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
                def reverse_resolves?
         
     | 
| 
       40 
     | 
    
         
            -
                  @ip == reversed_ip
         
     | 
| 
       41 
     | 
    
         
            -
                end
         
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
       43 
     | 
    
         
            -
                def subdomain_of?(*domains)
         
     | 
| 
       44 
     | 
    
         
            -
                  return false if reverse_name.nil?
         
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
                  domains.any? { |d|
         
     | 
| 
       47 
     | 
    
         
            -
                    reverse_domain.subdomain_of? Resolv::DNS::Name.create(d)
         
     | 
| 
       48 
     | 
    
         
            -
                  }
         
     | 
| 
      
 16 
     | 
    
         
            +
                def initialize(ip)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  @ip = ip
         
     | 
| 
       49 
18 
     | 
    
         
             
                end
         
     | 
| 
       50 
19 
     | 
    
         | 
| 
       51 
20 
     | 
    
         
             
                def detected_as
         
     | 
| 
       52 
21 
     | 
    
         
             
                  self.class.name.split('::').last.downcase.to_sym
         
     | 
| 
       53 
22 
     | 
    
         
             
                end
         
     | 
| 
       54 
23 
     | 
    
         | 
| 
      
 24 
     | 
    
         
            +
                def valid?
         
     | 
| 
      
 25 
     | 
    
         
            +
                  valid_ip? && valid_domain?
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
       55 
28 
     | 
    
         
             
                def fake?
         
     | 
| 
       56 
29 
     | 
    
         
             
                  !valid?
         
     | 
| 
       57 
30 
     | 
    
         
             
                end
         
     | 
| 
       58 
31 
     | 
    
         | 
| 
       59 
     | 
    
         
            -
                def self.valid?(ip 
     | 
| 
       60 
     | 
    
         
            -
                   
     | 
| 
      
 32 
     | 
    
         
            +
                def self.valid?(ip)
         
     | 
| 
      
 33 
     | 
    
         
            +
                  new(ip).valid?
         
     | 
| 
       61 
34 
     | 
    
         
             
                end
         
     | 
| 
       62 
35 
     | 
    
         | 
| 
       63 
     | 
    
         
            -
                def self.fake?(ip 
     | 
| 
       64 
     | 
    
         
            -
                   
     | 
| 
      
 36 
     | 
    
         
            +
                def self.fake?(ip)
         
     | 
| 
      
 37 
     | 
    
         
            +
                  new(ip).fake?
         
     | 
| 
       65 
38 
     | 
    
         
             
                end
         
     | 
| 
       66 
39 
     | 
    
         
             
              end
         
     | 
| 
       67 
40 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,18 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'resolv'
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            module Legitbot
         
     | 
| 
      
 6 
     | 
    
         
            +
              module Config
         
     | 
| 
      
 7 
     | 
    
         
            +
                module Resolver # :nodoc:
         
     | 
| 
      
 8 
     | 
    
         
            +
                  def resolver_config(options = nil)
         
     | 
| 
      
 9 
     | 
    
         
            +
                    @resolver_config = options
         
     | 
| 
      
 10 
     | 
    
         
            +
                  end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                  def resolver
         
     | 
| 
      
 13 
     | 
    
         
            +
                    @resolver_config ||= Legitbot.resolver_config
         
     | 
| 
      
 14 
     | 
    
         
            +
                    @resolver ||= Resolv::DNS.new @resolver_config
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/legitbot/duckduckgo.rb
    CHANGED
    
    | 
         @@ -1,12 +1,23 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
       2 
4 
     | 
    
         
             
              # https://duckduckgo.com/duckduckbot
         
     | 
| 
       3 
5 
     | 
    
         
             
              class DuckDuckGo < BotMatch
         
     | 
| 
       4 
     | 
    
         
            -
                 
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
     | 
    
         
            -
                   
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
      
 6 
     | 
    
         
            +
                ip_ranges %w[
         
     | 
| 
      
 7 
     | 
    
         
            +
                  23.21.227.69
         
     | 
| 
      
 8 
     | 
    
         
            +
                  40.88.21.235
         
     | 
| 
      
 9 
     | 
    
         
            +
                  50.16.241.113
         
     | 
| 
      
 10 
     | 
    
         
            +
                  50.16.241.114
         
     | 
| 
      
 11 
     | 
    
         
            +
                  50.16.241.117
         
     | 
| 
      
 12 
     | 
    
         
            +
                  50.16.247.234
         
     | 
| 
      
 13 
     | 
    
         
            +
                  52.204.97.54
         
     | 
| 
      
 14 
     | 
    
         
            +
                  52.5.190.19
         
     | 
| 
      
 15 
     | 
    
         
            +
                  54.197.234.188
         
     | 
| 
      
 16 
     | 
    
         
            +
                  54.208.100.253
         
     | 
| 
      
 17 
     | 
    
         
            +
                  54.208.102.37
         
     | 
| 
      
 18 
     | 
    
         
            +
                  107.21.1.8
         
     | 
| 
      
 19 
     | 
    
         
            +
                ]
         
     | 
| 
       9 
20 
     | 
    
         
             
              end
         
     | 
| 
       10 
21 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
              rule Legitbot::DuckDuckGo, %w 
     | 
| 
      
 22 
     | 
    
         
            +
              rule Legitbot::DuckDuckGo, %w[DuckDuckBot]
         
     | 
| 
       12 
23 
     | 
    
         
             
            end
         
     | 
    
        data/lib/legitbot/facebook.rb
    CHANGED
    
    | 
         @@ -1,48 +1,22 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            require 'irrc'
         
     | 
| 
       3 
     | 
    
         
            -
            require 'interval_tree'
         
     | 
| 
       4 
4 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
            module Legitbot
         
     | 
| 
      
 5 
     | 
    
         
            +
            module Legitbot # :nodoc:
         
     | 
| 
       6 
6 
     | 
    
         
             
              # https://developers.facebook.com/docs/sharing/webmasters/crawler
         
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
7 
     | 
    
         
             
              class Facebook < BotMatch
         
     | 
| 
       9 
8 
     | 
    
         
             
                AS = 'AS32934'
         
     | 
| 
       10 
9 
     | 
    
         | 
| 
       11 
     | 
    
         
            -
                 
     | 
| 
       12 
     | 
    
         
            -
                  ip = IPAddr.new(@ip)
         
     | 
| 
       13 
     | 
    
         
            -
                  Facebook.valid_ips[ip.ipv4? ? :ipv4 : :ipv6].search(ip.to_i).size > 0
         
     | 
| 
       14 
     | 
    
         
            -
                end
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                @mutex = Mutex.new
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
                def self.valid_ips
         
     | 
| 
       19 
     | 
    
         
            -
                  @mutex.synchronize { @ips ||= load_ips }
         
     | 
| 
       20 
     | 
    
         
            -
                end
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
                def self.reload!
         
     | 
| 
       23 
     | 
    
         
            -
                  @mutex.synchronize { @ips = load_ips }
         
     | 
| 
       24 
     | 
    
         
            -
                end
         
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
                def self.load_ips
         
     | 
| 
       27 
     | 
    
         
            -
                  whois.map do |(family, records)|
         
     | 
| 
       28 
     | 
    
         
            -
                    ranges = records.map do |cidr|
         
     | 
| 
       29 
     | 
    
         
            -
                      range = IPAddr.new(cidr).to_range
         
     | 
| 
       30 
     | 
    
         
            -
                      (range.begin.to_i..range.end.to_i)
         
     | 
| 
       31 
     | 
    
         
            -
                    end
         
     | 
| 
       32 
     | 
    
         
            -
                    [family, IntervalTree::Tree.new(ranges)]
         
     | 
| 
       33 
     | 
    
         
            -
                  end.to_h
         
     | 
| 
       34 
     | 
    
         
            -
                end
         
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
                def self.whois
         
     | 
| 
      
 10 
     | 
    
         
            +
                ip_ranges do
         
     | 
| 
       37 
11 
     | 
    
         
             
                  client = Irrc::Client.new
         
     | 
| 
       38 
12 
     | 
    
         
             
                  client.query :radb, AS
         
     | 
| 
       39 
13 
     | 
    
         
             
                  results = client.perform
         
     | 
| 
       40 
14 
     | 
    
         | 
| 
       41 
     | 
    
         
            -
                  %i 
     | 
| 
       42 
     | 
    
         
            -
                     
     | 
| 
       43 
     | 
    
         
            -
                  end. 
     | 
| 
      
 15 
     | 
    
         
            +
                  %i[ipv4 ipv6].map do |family|
         
     | 
| 
      
 16 
     | 
    
         
            +
                    results[AS][family][AS]
         
     | 
| 
      
 17 
     | 
    
         
            +
                  end.flatten
         
     | 
| 
       44 
18 
     | 
    
         
             
                end
         
     | 
| 
       45 
19 
     | 
    
         
             
              end
         
     | 
| 
       46 
20 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
              rule Legitbot::Facebook, %w 
     | 
| 
      
 21 
     | 
    
         
            +
              rule Legitbot::Facebook, %w[Facebot facebookexternalhit/1.1]
         
     | 
| 
       48 
22 
     | 
    
         
             
            end
         
     |