robot_rules 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/VERSION.yml +2 -1
- data/lib/robot_rules.rb +7 -7
- data/robot_rules.gemspec +5 -2
- data/test/robot_rules_test.rb +22 -2
- metadata +18 -5
    
        data/Rakefile
    CHANGED
    
    | @@ -9,6 +9,7 @@ begin | |
| 9 9 | 
             
                gem.email = "edgargonzalez@gmail.com"
         | 
| 10 10 | 
             
                gem.homepage = "http://github.com/hasmanydevelopers/robot_rules"
         | 
| 11 11 | 
             
                gem.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
         | 
| 12 | 
            +
                gem.add_dependency('addressable', '>= 2.1.2')
         | 
| 12 13 | 
             
              end
         | 
| 13 14 | 
             
              Jeweler::GemcutterTasks.new
         | 
| 14 15 | 
             
            rescue LoadError
         | 
    
        data/VERSION.yml
    CHANGED
    
    
    
        data/lib/robot_rules.rb
    CHANGED
    
    | @@ -5,18 +5,17 @@ | |
| 5 5 | 
             
            #  Created by James Edward Gray II on 2006-01-31.
         | 
| 6 6 | 
             
            #  Copyright 2006 Gray Productions. All rights reserved.
         | 
| 7 7 |  | 
| 8 | 
            -
            require  | 
| 8 | 
            +
            require 'addressable/uri'
         | 
| 9 9 |  | 
| 10 10 | 
             
            # Based on Perl's WWW::RobotRules module, by Gisle Aas.
         | 
| 11 11 | 
             
            class RobotRules
         | 
| 12 12 | 
             
               def initialize( user_agent )
         | 
| 13 | 
            -
                 @user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*}, | 
| 14 | 
            -
            "").downcase
         | 
| 13 | 
            +
                 @user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},"").downcase
         | 
| 15 14 | 
             
                 @rules      = Hash.new { |rules, rule| rules[rule] = Array.new }
         | 
| 16 15 | 
             
               end
         | 
| 17 16 |  | 
| 18 | 
            -
               def parse(  | 
| 19 | 
            -
                 uri      = URI | 
| 17 | 
            +
               def parse( site, robots_data )
         | 
| 18 | 
            +
                 uri      = site.kind_of?(Addressable::URI) ? site : Addressable::URI::parse(site)
         | 
| 20 19 | 
             
                 location = "#{uri.host}:#{uri.port}"
         | 
| 21 20 | 
             
                 @rules.delete(location)
         | 
| 22 21 |  | 
| @@ -64,8 +63,8 @@ class RobotRules | |
| 64 63 | 
             
                 end
         | 
| 65 64 | 
             
               end
         | 
| 66 65 |  | 
| 67 | 
            -
               def allowed?(  | 
| 68 | 
            -
                 uri      = URI | 
| 66 | 
            +
               def allowed?( u )
         | 
| 67 | 
            +
                 uri      = u.kind_of?(Addressable::URI) ? u : Addressable::URI::parse(u)
         | 
| 69 68 | 
             
                 location = "#{uri.host}:#{uri.port}"
         | 
| 70 69 | 
             
                 path     = uri.path
         | 
| 71 70 |  | 
| @@ -74,3 +73,4 @@ class RobotRules | |
| 74 73 | 
             
                 not @rules[location].any? { |rule| path.index(rule) == 0 }
         | 
| 75 74 | 
             
               end
         | 
| 76 75 | 
             
            end
         | 
| 76 | 
            +
             | 
    
        data/robot_rules.gemspec
    CHANGED
    
    | @@ -5,11 +5,11 @@ | |
| 5 5 |  | 
| 6 6 | 
             
            Gem::Specification.new do |s|
         | 
| 7 7 | 
             
              s.name = %q{robot_rules}
         | 
| 8 | 
            -
              s.version = "0.9. | 
| 8 | 
            +
              s.version = "0.9.2"
         | 
| 9 9 |  | 
| 10 10 | 
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         | 
| 11 11 | 
             
              s.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
         | 
| 12 | 
            -
              s.date = %q{2010-07- | 
| 12 | 
            +
              s.date = %q{2010-07-21}
         | 
| 13 13 | 
             
              s.email = %q{edgargonzalez@gmail.com}
         | 
| 14 14 | 
             
              s.extra_rdoc_files = [
         | 
| 15 15 | 
             
                "LICENSE",
         | 
| @@ -42,9 +42,12 @@ Gem::Specification.new do |s| | |
| 42 42 | 
             
                s.specification_version = 3
         | 
| 43 43 |  | 
| 44 44 | 
             
                if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
         | 
| 45 | 
            +
                  s.add_runtime_dependency(%q<addressable>, [">= 2.1.2"])
         | 
| 45 46 | 
             
                else
         | 
| 47 | 
            +
                  s.add_dependency(%q<addressable>, [">= 2.1.2"])
         | 
| 46 48 | 
             
                end
         | 
| 47 49 | 
             
              else
         | 
| 50 | 
            +
                s.add_dependency(%q<addressable>, [">= 2.1.2"])
         | 
| 48 51 | 
             
              end
         | 
| 49 52 | 
             
            end
         | 
| 50 53 |  | 
    
        data/test/robot_rules_test.rb
    CHANGED
    
    | @@ -23,7 +23,7 @@ class RobotRulesTest < Test::Unit::TestCase | |
| 23 23 | 
             
                @robot_rule = RobotRules.new('Microsoft')
         | 
| 24 24 | 
             
                robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow:  google\nUser-agent:  *\nDisallow:    images)
         | 
| 25 25 | 
             
                @robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
         | 
| 26 | 
            -
             | 
| 26 | 
            +
             | 
| 27 27 | 
             
                assert_equal false, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
         | 
| 28 28 | 
             
              end
         | 
| 29 29 |  | 
| @@ -31,7 +31,27 @@ class RobotRulesTest < Test::Unit::TestCase | |
| 31 31 | 
             
                @robot_rule = RobotRules.new('Google')
         | 
| 32 32 | 
             
                robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow:  google\nUser-agent:  *\nDisallow:    images)
         | 
| 33 33 | 
             
                @robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
         | 
| 34 | 
            -
             | 
| 34 | 
            +
             | 
| 35 35 | 
             
                assert_equal true, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
         | 
| 36 36 | 
             
              end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              def test_should_allow_user_agent_when_disallowed_all_for_specific_user_agent
         | 
| 39 | 
            +
                @robot_rule = RobotRules.new('MyBot')
         | 
| 40 | 
            +
                robots_txt = "User-agent: mxbot\nDisallow: /"
         | 
| 41 | 
            +
                @robot_rule.parse("#{SITE_URL}/robots.txt", robots_txt)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                assert_equal true, @robot_rule.allowed?("#{SITE_URL}/hellow_world")
         | 
| 44 | 
            +
              end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
              def test_using_uri_as_parameters_should_allow_user_agent_to_specified_path
         | 
| 47 | 
            +
                robots_url = Addressable::URI::parse("#{SITE_URL}/robots.txt")
         | 
| 48 | 
            +
                url = Addressable::URI::parse("#{SITE_URL}/google/hellow_world.txt")
         | 
| 49 | 
            +
                @robot_rule = RobotRules.new('Google')
         | 
| 50 | 
            +
                robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow:  google\nUser-agent:  *\nDisallow:    images)
         | 
| 51 | 
            +
                @robot_rule.parse(robots_url, robots_txt)
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                assert_equal true, @robot_rule.allowed?(url)
         | 
| 54 | 
            +
              end
         | 
| 55 | 
            +
             | 
| 37 56 | 
             
            end
         | 
| 57 | 
            +
             | 
    
        metadata
    CHANGED
    
    | @@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version | |
| 5 5 | 
             
              segments: 
         | 
| 6 6 | 
             
              - 0
         | 
| 7 7 | 
             
              - 9
         | 
| 8 | 
            -
              -  | 
| 9 | 
            -
              version: 0.9. | 
| 8 | 
            +
              - 2
         | 
| 9 | 
            +
              version: 0.9.2
         | 
| 10 10 | 
             
            platform: ruby
         | 
| 11 11 | 
             
            authors: 
         | 
| 12 12 | 
             
            - James Edward Gray II
         | 
| @@ -16,10 +16,23 @@ autorequire: | |
| 16 16 | 
             
            bindir: bin
         | 
| 17 17 | 
             
            cert_chain: []
         | 
| 18 18 |  | 
| 19 | 
            -
            date: 2010-07- | 
| 19 | 
            +
            date: 2010-07-21 00:00:00 -04:30
         | 
| 20 20 | 
             
            default_executable: 
         | 
| 21 | 
            -
            dependencies:  | 
| 22 | 
            -
             | 
| 21 | 
            +
            dependencies: 
         | 
| 22 | 
            +
            - !ruby/object:Gem::Dependency 
         | 
| 23 | 
            +
              name: addressable
         | 
| 24 | 
            +
              prerelease: false
         | 
| 25 | 
            +
              requirement: &id001 !ruby/object:Gem::Requirement 
         | 
| 26 | 
            +
                requirements: 
         | 
| 27 | 
            +
                - - ">="
         | 
| 28 | 
            +
                  - !ruby/object:Gem::Version 
         | 
| 29 | 
            +
                    segments: 
         | 
| 30 | 
            +
                    - 2
         | 
| 31 | 
            +
                    - 1
         | 
| 32 | 
            +
                    - 2
         | 
| 33 | 
            +
                    version: 2.1.2
         | 
| 34 | 
            +
              type: :runtime
         | 
| 35 | 
            +
              version_requirements: *id001
         | 
| 23 36 | 
             
            description: 
         | 
| 24 37 | 
             
            email: edgargonzalez@gmail.com
         | 
| 25 38 | 
             
            executables: []
         |