robot_rules 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -9,6 +9,7 @@ begin
9
9
  gem.email = "edgargonzalez@gmail.com"
10
10
  gem.homepage = "http://github.com/hasmanydevelopers/robot_rules"
11
11
  gem.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
12
+ gem.add_dependency('addressable', '>= 2.1.2')
12
13
  end
13
14
  Jeweler::GemcutterTasks.new
14
15
  rescue LoadError
data/VERSION.yml CHANGED
@@ -1,4 +1,5 @@
1
1
  ---
2
2
  :major: 0
3
3
  :minor: 9
4
- :patch: 1
4
+ :build:
5
+ :patch: 2
data/lib/robot_rules.rb CHANGED
@@ -5,18 +5,17 @@
5
5
  # Created by James Edward Gray II on 2006-01-31.
6
6
  # Copyright 2006 Gray Productions. All rights reserved.
7
7
 
8
- require "uri"
8
+ require 'addressable/uri'
9
9
 
10
10
  # Based on Perl's WWW::RobotRules module, by Gisle Aas.
11
11
  class RobotRules
12
12
  def initialize( user_agent )
13
- @user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},
14
- "").downcase
13
+ @user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},"").downcase
15
14
  @rules = Hash.new { |rules, rule| rules[rule] = Array.new }
16
15
  end
17
16
 
18
- def parse( text_uri, robots_data )
19
- uri = URI.parse(text_uri)
17
+ def parse( site, robots_data )
18
+ uri = site.kind_of?(Addressable::URI) ? site : Addressable::URI::parse(site)
20
19
  location = "#{uri.host}:#{uri.port}"
21
20
  @rules.delete(location)
22
21
 
@@ -64,8 +63,8 @@ class RobotRules
64
63
  end
65
64
  end
66
65
 
67
- def allowed?( text_uri )
68
- uri = URI.parse(text_uri)
66
+ def allowed?( u )
67
+ uri = u.kind_of?(Addressable::URI) ? u : Addressable::URI::parse(u)
69
68
  location = "#{uri.host}:#{uri.port}"
70
69
  path = uri.path
71
70
 
@@ -74,3 +73,4 @@ class RobotRules
74
73
  not @rules[location].any? { |rule| path.index(rule) == 0 }
75
74
  end
76
75
  end
76
+
data/robot_rules.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{robot_rules}
8
- s.version = "0.9.1"
8
+ s.version = "0.9.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
12
- s.date = %q{2010-07-09}
12
+ s.date = %q{2010-07-21}
13
13
  s.email = %q{edgargonzalez@gmail.com}
14
14
  s.extra_rdoc_files = [
15
15
  "LICENSE",
@@ -42,9 +42,12 @@ Gem::Specification.new do |s|
42
42
  s.specification_version = 3
43
43
 
44
44
  if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
45
+ s.add_runtime_dependency(%q<addressable>, [">= 2.1.2"])
45
46
  else
47
+ s.add_dependency(%q<addressable>, [">= 2.1.2"])
46
48
  end
47
49
  else
50
+ s.add_dependency(%q<addressable>, [">= 2.1.2"])
48
51
  end
49
52
  end
50
53
 
@@ -23,7 +23,7 @@ class RobotRulesTest < Test::Unit::TestCase
23
23
  @robot_rule = RobotRules.new('Microsoft')
24
24
  robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
25
25
  @robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
26
-
26
+
27
27
  assert_equal false, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
28
28
  end
29
29
 
@@ -31,7 +31,27 @@ class RobotRulesTest < Test::Unit::TestCase
31
31
  @robot_rule = RobotRules.new('Google')
32
32
  robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
33
33
  @robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
34
-
34
+
35
35
  assert_equal true, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
36
36
  end
37
+
38
+ def test_should_allow_user_agent_when_disallowed_all_for_specific_user_agent
39
+ @robot_rule = RobotRules.new('MyBot')
40
+ robots_txt = "User-agent: mxbot\nDisallow: /"
41
+ @robot_rule.parse("#{SITE_URL}/robots.txt", robots_txt)
42
+
43
+ assert_equal true, @robot_rule.allowed?("#{SITE_URL}/hellow_world")
44
+ end
45
+
46
+ def test_using_uri_as_parameters_should_allow_user_agent_to_specified_path
47
+ robots_url = Addressable::URI::parse("#{SITE_URL}/robots.txt")
48
+ url = Addressable::URI::parse("#{SITE_URL}/google/hellow_world.txt")
49
+ @robot_rule = RobotRules.new('Google')
50
+ robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
51
+ @robot_rule.parse(robots_url, robots_txt)
52
+
53
+ assert_equal true, @robot_rule.allowed?(url)
54
+ end
55
+
37
56
  end
57
+
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 9
8
- - 1
9
- version: 0.9.1
8
+ - 2
9
+ version: 0.9.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - James Edward Gray II
@@ -16,10 +16,23 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2010-07-09 00:00:00 -04:30
19
+ date: 2010-07-21 00:00:00 -04:30
20
20
  default_executable:
21
- dependencies: []
22
-
21
+ dependencies:
22
+ - !ruby/object:Gem::Dependency
23
+ name: addressable
24
+ prerelease: false
25
+ requirement: &id001 !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 2
31
+ - 1
32
+ - 2
33
+ version: 2.1.2
34
+ type: :runtime
35
+ version_requirements: *id001
23
36
  description:
24
37
  email: edgargonzalez@gmail.com
25
38
  executables: []