robot_rules 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -9,6 +9,7 @@ begin
9
9
  gem.email = "edgargonzalez@gmail.com"
10
10
  gem.homepage = "http://github.com/hasmanydevelopers/robot_rules"
11
11
  gem.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
12
+ gem.add_dependency('addressable', '>= 2.1.2')
12
13
  end
13
14
  Jeweler::GemcutterTasks.new
14
15
  rescue LoadError
data/VERSION.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 9
4
- :patch: 1
4
+ :build:
5
+ :patch: 2
data/lib/robot_rules.rb CHANGED
@@ -5,18 +5,17 @@
5
5
  # Created by James Edward Gray II on 2006-01-31.
6
6
  # Copyright 2006 Gray Productions. All rights reserved.
7
7
 
8
- require "uri"
8
+ require 'addressable/uri'
9
9
 
10
10
  # Based on Perl's WWW::RobotRules module, by Gisle Aas.
11
11
  class RobotRules
12
12
  def initialize( user_agent )
13
- @user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},
14
- "").downcase
13
+ @user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},"").downcase
15
14
  @rules = Hash.new { |rules, rule| rules[rule] = Array.new }
16
15
  end
17
16
 
18
- def parse( text_uri, robots_data )
19
- uri = URI.parse(text_uri)
17
+ def parse( site, robots_data )
18
+ uri = site.kind_of?(Addressable::URI) ? site : Addressable::URI::parse(site)
20
19
  location = "#{uri.host}:#{uri.port}"
21
20
  @rules.delete(location)
22
21
 
@@ -64,8 +63,8 @@ class RobotRules
64
63
  end
65
64
  end
66
65
 
67
- def allowed?( text_uri )
68
- uri = URI.parse(text_uri)
66
+ def allowed?( u )
67
+ uri = u.kind_of?(Addressable::URI) ? u : Addressable::URI::parse(u)
69
68
  location = "#{uri.host}:#{uri.port}"
70
69
  path = uri.path
71
70
 
@@ -74,3 +73,4 @@ class RobotRules
74
73
  not @rules[location].any? { |rule| path.index(rule) == 0 }
75
74
  end
76
75
  end
76
+
data/robot_rules.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{robot_rules}
8
- s.version = "0.9.1"
8
+ s.version = "0.9.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
12
- s.date = %q{2010-07-09}
12
+ s.date = %q{2010-07-21}
13
13
  s.email = %q{edgargonzalez@gmail.com}
14
14
  s.extra_rdoc_files = [
15
15
  "LICENSE",
@@ -42,9 +42,12 @@ Gem::Specification.new do |s|
42
42
  s.specification_version = 3
43
43
 
44
44
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
45
+ s.add_runtime_dependency(%q<addressable>, [">= 2.1.2"])
45
46
  else
47
+ s.add_dependency(%q<addressable>, [">= 2.1.2"])
46
48
  end
47
49
  else
50
+ s.add_dependency(%q<addressable>, [">= 2.1.2"])
48
51
  end
49
52
  end
50
53
 
@@ -23,7 +23,7 @@ class RobotRulesTest < Test::Unit::TestCase
23
23
  @robot_rule = RobotRules.new('Microsoft')
24
24
  robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
25
25
  @robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
26
-
26
+
27
27
  assert_equal false, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
28
28
  end
29
29
 
@@ -31,7 +31,27 @@ class RobotRulesTest < Test::Unit::TestCase
31
31
  @robot_rule = RobotRules.new('Google')
32
32
  robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
33
33
  @robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
34
-
34
+
35
35
  assert_equal true, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
36
36
  end
37
+
38
+ def test_should_allow_user_agent_when_disallowed_all_for_specific_user_agent
39
+ @robot_rule = RobotRules.new('MyBot')
40
+ robots_txt = "User-agent: mxbot\nDisallow: /"
41
+ @robot_rule.parse("#{SITE_URL}/robots.txt", robots_txt)
42
+
43
+ assert_equal true, @robot_rule.allowed?("#{SITE_URL}/hellow_world")
44
+ end
45
+
46
+ def test_using_uri_as_parameters_should_allow_user_agent_to_specified_path
47
+ robots_url = Addressable::URI::parse("#{SITE_URL}/robots.txt")
48
+ url = Addressable::URI::parse("#{SITE_URL}/google/hellow_world.txt")
49
+ @robot_rule = RobotRules.new('Google')
50
+ robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
51
+ @robot_rule.parse(robots_url, robots_txt)
52
+
53
+ assert_equal true, @robot_rule.allowed?(url)
54
+ end
55
+
37
56
  end
57
+
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 9
8
- - 1
9
- version: 0.9.1
8
+ - 2
9
+ version: 0.9.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - James Edward Gray II
@@ -16,10 +16,23 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-07-09 00:00:00 -04:30
19
+ date: 2010-07-21 00:00:00 -04:30
20
20
  default_executable:
21
- dependencies: []
22
-
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: addressable
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 2
31
+ - 1
32
+ - 2
33
+ version: 2.1.2
34
+ type: :runtime
35
+ version_requirements: *id001
23
36
  description:
24
37
  email: edgargonzalez@gmail.com
25
38
  executables: []