legitbot 1.2.0 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/build.yml +7 -3
- data/.rubocop.yml +7 -0
- data/.ruby-version +1 -1
- data/README.md +8 -9
- data/legitbot.gemspec +2 -2
- data/lib/legitbot.rb +1 -0
- data/lib/legitbot/duckduckgo.rb +2 -1
- data/lib/legitbot/petalbot.rb +11 -0
- data/lib/legitbot/version.rb +1 -1
- data/test/petalbot_test.rb +52 -0
- data/test/pinterest_test.rb +2 -2
- metadata +11 -8
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 48fa3cd6d810e32c24c3027b57879438c658b64bdf2dfae3fbbcfd5ac67fb790
         | 
| 4 | 
            +
              data.tar.gz: f8b77b3571978137339b06e633c9725ee8d1f4f6793bcdae8321a16cb9b37dfd
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: ff62eb5208ee4e565504ada36c3d94129171f2487c5cf61e3cfbc5388a189ab5e5645537d311a01376e16abd486d69c465d148d6a4ebf35f5cf5d4c51a212bd8
         | 
| 7 | 
            +
              data.tar.gz: c3a9d54b1add735629ac5caaf744c8bb75d2d9fa88fe5e793bd351e860cf00ef39cb084eb3bb048fe34b1ab9d95dec66eb08b075b9ee0bd7657e0160873f24bb
         | 
    
        data/.github/workflows/build.yml
    CHANGED
    
    | @@ -1,6 +1,10 @@ | |
| 1 1 | 
             
            name: build
         | 
| 2 2 |  | 
| 3 | 
            -
            on: | 
| 3 | 
            +
            on:
         | 
| 4 | 
            +
              pull_request:
         | 
| 5 | 
            +
              push:
         | 
| 6 | 
            +
              schedule:
         | 
| 7 | 
            +
              - cron: '29 6 * * 6'
         | 
| 4 8 |  | 
| 5 9 | 
             
            jobs:
         | 
| 6 10 | 
             
              test:
         | 
| @@ -9,7 +13,7 @@ jobs: | |
| 9 13 | 
             
                strategy:
         | 
| 10 14 | 
             
                  fail-fast: false
         | 
| 11 15 | 
             
                  matrix:
         | 
| 12 | 
            -
                    ruby: [ jruby, 2.6 ]
         | 
| 16 | 
            +
                    ruby: [ jruby, 2.5, 2.6, 2.7 ]
         | 
| 13 17 |  | 
| 14 18 | 
             
                steps:
         | 
| 15 19 | 
             
                - uses: actions/checkout@v2
         | 
| @@ -37,7 +41,7 @@ jobs: | |
| 37 41 |  | 
| 38 42 | 
             
                strategy:
         | 
| 39 43 | 
             
                  matrix:
         | 
| 40 | 
            -
                    ruby: [ 2. | 
| 44 | 
            +
                    ruby: [ 2.7 ]
         | 
| 41 45 |  | 
| 42 46 | 
             
                steps:
         | 
| 43 47 | 
             
                - uses: actions/checkout@v2
         | 
    
        data/.rubocop.yml
    CHANGED
    
    
    
        data/.ruby-version
    CHANGED
    
    | @@ -1 +1 @@ | |
| 1 | 
            -
            2. | 
| 1 | 
            +
            2.7.3
         | 
    
        data/README.md
    CHANGED
    
    | @@ -1,13 +1,11 @@ | |
| 1 | 
            -
            # Legitbot   | 
| 1 | 
            +
            # Legitbot  
         | 
| 2 2 |  | 
| 3 | 
            -
            Ruby gem to  | 
| 4 | 
            -
            engine. | 
| 5 | 
            -
            engines.
         | 
| 3 | 
            +
            Ruby gem to make sure that an IP really belongs to a bot, typically a search
         | 
| 4 | 
            +
            engine.
         | 
| 6 5 |  | 
| 7 6 | 
             
            ## Usage
         | 
| 8 7 |  | 
| 9 | 
            -
            Suppose you have a Web request and you | 
| 10 | 
            -
            search engine:
         | 
| 8 | 
            +
            Suppose you have a Web request and you would like to check it is not diguised:
         | 
| 11 9 |  | 
| 12 10 | 
             
            ```ruby
         | 
| 13 11 | 
             
            bot = Legitbot.bot(userAgent, ip)
         | 
| @@ -22,7 +20,7 @@ bot.valid? # => true | |
| 22 20 | 
             
            bot.fake? # => false
         | 
| 23 21 | 
             
            ```
         | 
| 24 22 |  | 
| 25 | 
            -
            Sometimes you already know  | 
| 23 | 
            +
            Sometimes you already know which search engine to expect. For example, you might
         | 
| 26 24 | 
             
            be using [rack-attack](https://github.com/kickstarter/rack-attack):
         | 
| 27 25 |  | 
| 28 26 | 
             
            ```ruby
         | 
| @@ -31,8 +29,8 @@ Rack::Attack.blocklist("fake Googlebot") do |req| | |
| 31 29 | 
             
            end
         | 
| 32 30 | 
             
            ```
         | 
| 33 31 |  | 
| 34 | 
            -
            Or if you do not like all  | 
| 35 | 
            -
             | 
| 32 | 
            +
            Or if you do not like all those ghoulish crawlers stealing your
         | 
| 33 | 
            +
            content, evaluating it and getting ready to invade your site with spammers,
         | 
| 36 34 | 
             
            then block them all:
         | 
| 37 35 |  | 
| 38 36 | 
             
            ```ruby
         | 
| @@ -55,6 +53,7 @@ end | |
| 55 53 | 
             
            * [Pinterest](https://help.pinterest.com/en/articles/about-pinterest-crawler-0)
         | 
| 56 54 | 
             
            * [Twitterbot](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/getting-started), the list of IPs is in the [Troubleshooting page](https://developer.twitter.com/en/docs/tweets/optimize-with-cards/guides/troubleshooting-cards)
         | 
| 57 55 | 
             
            * [Yandex robots](https://yandex.com/support/webmaster/robot-workings/check-yandex-robots.xml)
         | 
| 56 | 
            +
            * [Petal search engine](http://aspiegel.com/petalbot)
         | 
| 58 57 |  | 
| 59 58 | 
             
            ## License
         | 
| 60 59 |  | 
    
        data/legitbot.gemspec
    CHANGED
    
    | @@ -14,13 +14,13 @@ Gem::Specification.new do |spec| | |
| 14 14 | 
             
              spec.summary = 'Validate requests from Web crawlers: impersonating or not?'
         | 
| 15 15 | 
             
              spec.description = 'Does Web request come from a real search engine or from an impersonating agent?'
         | 
| 16 16 |  | 
| 17 | 
            -
              spec.required_ruby_version = '>= 2. | 
| 17 | 
            +
              spec.required_ruby_version = '>= 2.5.0'
         | 
| 18 18 | 
             
              spec.add_dependency 'augmented_interval_tree', '~> 0.1', '>= 0.1.1'
         | 
| 19 19 | 
             
              spec.add_dependency 'irrc', '~> 0.2', '>= 0.2.1'
         | 
| 20 20 | 
             
              spec.add_development_dependency 'bump', '~> 0.8', '>= 0.8.0'
         | 
| 21 21 | 
             
              spec.add_development_dependency 'minitest', '~> 5.1', '>= 5.1.0'
         | 
| 22 22 | 
             
              spec.add_development_dependency 'rake', '~> 13.0', '>= 13.0.0'
         | 
| 23 | 
            -
              spec.add_development_dependency 'rubocop', '~>  | 
| 23 | 
            +
              spec.add_development_dependency 'rubocop', '~> 1.18.0', '>= 1.18.0'
         | 
| 24 24 |  | 
| 25 25 | 
             
              spec.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
         | 
| 26 26 | 
             
              spec.rdoc_options = ['--charset=UTF-8']
         | 
    
        data/lib/legitbot.rb
    CHANGED
    
    
    
        data/lib/legitbot/duckduckgo.rb
    CHANGED
    
    | @@ -4,14 +4,15 @@ module Legitbot # :nodoc: | |
| 4 4 | 
             
              # https://duckduckgo.com/duckduckbot
         | 
| 5 5 | 
             
              class DuckDuckGo < BotMatch
         | 
| 6 6 | 
             
                ip_ranges %w[
         | 
| 7 | 
            +
                  20.191.45.212
         | 
| 7 8 | 
             
                  23.21.227.69
         | 
| 8 9 | 
             
                  40.88.21.235
         | 
| 9 10 | 
             
                  50.16.241.113
         | 
| 10 11 | 
             
                  50.16.241.114
         | 
| 11 12 | 
             
                  50.16.241.117
         | 
| 12 13 | 
             
                  50.16.247.234
         | 
| 13 | 
            -
                  52.204.97.54
         | 
| 14 14 | 
             
                  52.5.190.19
         | 
| 15 | 
            +
                  52.204.97.54
         | 
| 15 16 | 
             
                  54.197.234.188
         | 
| 16 17 | 
             
                  54.208.100.253
         | 
| 17 18 | 
             
                  54.208.102.37
         | 
    
        data/lib/legitbot/version.rb
    CHANGED
    
    
| @@ -0,0 +1,52 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'minitest/autorun'
         | 
| 4 | 
            +
            require 'legitbot'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class PetalbotTest < Minitest::Test
         | 
| 7 | 
            +
              def test_malicious_ip
         | 
| 8 | 
            +
                ip = '149.210.164.47'
         | 
| 9 | 
            +
                match = Legitbot::Petalbot.new ip
         | 
| 10 | 
            +
                assert !match.valid?, msg: "#{ip} is not a real Petalbot IP"
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              def test_valid_ip
         | 
| 14 | 
            +
                ip = '114.119.128.10'
         | 
| 15 | 
            +
                match = Legitbot::Petalbot.new ip
         | 
| 16 | 
            +
                assert match.valid?, msg: "#{ip} is a valid Petalbot IP"
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
              def test_malicious_ua
         | 
| 20 | 
            +
                bot = Legitbot.bot(
         | 
| 21 | 
            +
                  'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
         | 
| 22 | 
            +
                  '149.210.164.47'
         | 
| 23 | 
            +
                )
         | 
| 24 | 
            +
                assert bot, msg: 'Petalbot detected from User-Agent'
         | 
| 25 | 
            +
                assert !bot.valid?, msg: 'Not a valid Petalbot'
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              def test_valid_ua
         | 
| 29 | 
            +
                bot = Legitbot.bot(
         | 
| 30 | 
            +
                  'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
         | 
| 31 | 
            +
                  '114.119.128.10'
         | 
| 32 | 
            +
                )
         | 
| 33 | 
            +
                assert bot, msg: 'Petalbot detected from User-Agent'
         | 
| 34 | 
            +
                assert bot.valid?, msg: 'Valid Petalbot'
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              def test_valid_name
         | 
| 38 | 
            +
                bot = Legitbot.bot(
         | 
| 39 | 
            +
                  'Mozilla/5.0 (compatible;PetalBot; +https://aspiegel.com/petalbot)',
         | 
| 40 | 
            +
                  '66.249.64.141'
         | 
| 41 | 
            +
                )
         | 
| 42 | 
            +
                assert_equal :petalbot, bot.detected_as
         | 
| 43 | 
            +
              end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
              def test_fake_name
         | 
| 46 | 
            +
                bot = Legitbot.bot(
         | 
| 47 | 
            +
                  'Mozilla/5.0 (compatible; PetalBot/2.1; +http://www.google.com/bot.html)',
         | 
| 48 | 
            +
                  '81.1.172.108'
         | 
| 49 | 
            +
                )
         | 
| 50 | 
            +
                assert_equal :petalbot, bot.detected_as
         | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
            end
         | 
    
        data/test/pinterest_test.rb
    CHANGED
    
    | @@ -34,7 +34,7 @@ class PinterestTest < Minitest::Test | |
| 34 34 | 
             
                assert bot.valid?, msg: 'Valid Pinterest'
         | 
| 35 35 | 
             
              end
         | 
| 36 36 |  | 
| 37 | 
            -
              # rubocop:disable  | 
| 37 | 
            +
              # rubocop:disable Layout/LineLength
         | 
| 38 38 | 
             
              def test_android_not_bot
         | 
| 39 39 | 
             
                bot = Legitbot.bot(
         | 
| 40 40 | 
             
                  'Mozilla/5.0 (Linux; Android 8.0.0; SM-G965F Build/R16NW; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/70.0.3538.64 Mobile Safari/537.36 [Pinterest/Android]',
         | 
| @@ -42,7 +42,7 @@ class PinterestTest < Minitest::Test | |
| 42 42 | 
             
                )
         | 
| 43 43 | 
             
                assert_nil bot
         | 
| 44 44 | 
             
              end
         | 
| 45 | 
            -
              # rubocop:enable  | 
| 45 | 
            +
              # rubocop:enable Layout/LineLength
         | 
| 46 46 |  | 
| 47 47 | 
             
              def test_engine_name
         | 
| 48 48 | 
             
                bot = Legitbot.bot(
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: legitbot
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1.2 | 
| 4 | 
            +
              version: 1.4.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Alexander Azarov
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2021-08-10 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: augmented_interval_tree
         | 
| @@ -116,20 +116,20 @@ dependencies: | |
| 116 116 | 
             
                requirements:
         | 
| 117 117 | 
             
                - - "~>"
         | 
| 118 118 | 
             
                  - !ruby/object:Gem::Version
         | 
| 119 | 
            -
                    version:  | 
| 119 | 
            +
                    version: 1.18.0
         | 
| 120 120 | 
             
                - - ">="
         | 
| 121 121 | 
             
                  - !ruby/object:Gem::Version
         | 
| 122 | 
            -
                    version:  | 
| 122 | 
            +
                    version: 1.18.0
         | 
| 123 123 | 
             
              type: :development
         | 
| 124 124 | 
             
              prerelease: false
         | 
| 125 125 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 126 126 | 
             
                requirements:
         | 
| 127 127 | 
             
                - - "~>"
         | 
| 128 128 | 
             
                  - !ruby/object:Gem::Version
         | 
| 129 | 
            -
                    version:  | 
| 129 | 
            +
                    version: 1.18.0
         | 
| 130 130 | 
             
                - - ">="
         | 
| 131 131 | 
             
                  - !ruby/object:Gem::Version
         | 
| 132 | 
            -
                    version:  | 
| 132 | 
            +
                    version: 1.18.0
         | 
| 133 133 | 
             
            description: Does Web request come from a real search engine or from an impersonating
         | 
| 134 134 | 
             
              agent?
         | 
| 135 135 | 
             
            email: self@alaz.me
         | 
| @@ -160,6 +160,7 @@ files: | |
| 160 160 | 
             
            - lib/legitbot/google.rb
         | 
| 161 161 | 
             
            - lib/legitbot/legitbot.rb
         | 
| 162 162 | 
             
            - lib/legitbot/oracle.rb
         | 
| 163 | 
            +
            - lib/legitbot/petalbot.rb
         | 
| 163 164 | 
             
            - lib/legitbot/pinterest.rb
         | 
| 164 165 | 
             
            - lib/legitbot/twitter.rb
         | 
| 165 166 | 
             
            - lib/legitbot/validators/domains.rb
         | 
| @@ -176,6 +177,7 @@ files: | |
| 176 177 | 
             
            - test/legitbot/validators/ip_ranges_test.rb
         | 
| 177 178 | 
             
            - test/legitbot_test.rb
         | 
| 178 179 | 
             
            - test/oracle_test.rb
         | 
| 180 | 
            +
            - test/petalbot_test.rb
         | 
| 179 181 | 
             
            - test/pinterest_test.rb
         | 
| 180 182 | 
             
            - test/twitter_test.rb
         | 
| 181 183 | 
             
            homepage: https://github.com/alaz/legitbot
         | 
| @@ -191,14 +193,14 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 191 193 | 
             
              requirements:
         | 
| 192 194 | 
             
              - - ">="
         | 
| 193 195 | 
             
                - !ruby/object:Gem::Version
         | 
| 194 | 
            -
                  version: 2. | 
| 196 | 
            +
                  version: 2.5.0
         | 
| 195 197 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 196 198 | 
             
              requirements:
         | 
| 197 199 | 
             
              - - ">="
         | 
| 198 200 | 
             
                - !ruby/object:Gem::Version
         | 
| 199 201 | 
             
                  version: '0'
         | 
| 200 202 | 
             
            requirements: []
         | 
| 201 | 
            -
            rubygems_version: 3.1. | 
| 203 | 
            +
            rubygems_version: 3.1.6
         | 
| 202 204 | 
             
            signing_key:
         | 
| 203 205 | 
             
            specification_version: 4
         | 
| 204 206 | 
             
            summary: 'Validate requests from Web crawlers: impersonating or not?'
         | 
| @@ -212,6 +214,7 @@ test_files: | |
| 212 214 | 
             
            - test/apple_test.rb
         | 
| 213 215 | 
             
            - test/oracle_test.rb
         | 
| 214 216 | 
             
            - test/google_test.rb
         | 
| 217 | 
            +
            - test/petalbot_test.rb
         | 
| 215 218 | 
             
            - test/botmatch_test.rb
         | 
| 216 219 | 
             
            - test/facebook_test.rb
         | 
| 217 220 | 
             
            - test/twitter_test.rb
         |