robot_rules 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/VERSION.yml +2 -1
- data/lib/robot_rules.rb +7 -7
- data/robot_rules.gemspec +5 -2
- data/test/robot_rules_test.rb +22 -2
- metadata +18 -5
data/Rakefile
CHANGED
@@ -9,6 +9,7 @@ begin
|
|
9
9
|
gem.email = "edgargonzalez@gmail.com"
|
10
10
|
gem.homepage = "http://github.com/hasmanydevelopers/robot_rules"
|
11
11
|
gem.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
|
12
|
+
gem.add_dependency('addressable', '>= 2.1.2')
|
12
13
|
end
|
13
14
|
Jeweler::GemcutterTasks.new
|
14
15
|
rescue LoadError
|
data/VERSION.yml
CHANGED
data/lib/robot_rules.rb
CHANGED
@@ -5,18 +5,17 @@
|
|
5
5
|
# Created by James Edward Gray II on 2006-01-31.
|
6
6
|
# Copyright 2006 Gray Productions. All rights reserved.
|
7
7
|
|
8
|
-
require
|
8
|
+
require 'addressable/uri'
|
9
9
|
|
10
10
|
# Based on Perl's WWW::RobotRules module, by Gisle Aas.
|
11
11
|
class RobotRules
|
12
12
|
def initialize( user_agent )
|
13
|
-
@user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},
|
14
|
-
"").downcase
|
13
|
+
@user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},"").downcase
|
15
14
|
@rules = Hash.new { |rules, rule| rules[rule] = Array.new }
|
16
15
|
end
|
17
16
|
|
18
|
-
def parse(
|
19
|
-
uri = URI
|
17
|
+
def parse( site, robots_data )
|
18
|
+
uri = site.kind_of?(Addressable::URI) ? site : Addressable::URI::parse(site)
|
20
19
|
location = "#{uri.host}:#{uri.port}"
|
21
20
|
@rules.delete(location)
|
22
21
|
|
@@ -64,8 +63,8 @@ class RobotRules
|
|
64
63
|
end
|
65
64
|
end
|
66
65
|
|
67
|
-
def allowed?(
|
68
|
-
uri = URI
|
66
|
+
def allowed?( u )
|
67
|
+
uri = u.kind_of?(Addressable::URI) ? u : Addressable::URI::parse(u)
|
69
68
|
location = "#{uri.host}:#{uri.port}"
|
70
69
|
path = uri.path
|
71
70
|
|
@@ -74,3 +73,4 @@ class RobotRules
|
|
74
73
|
not @rules[location].any? { |rule| path.index(rule) == 0 }
|
75
74
|
end
|
76
75
|
end
|
76
|
+
|
data/robot_rules.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{robot_rules}
|
8
|
-
s.version = "0.9.
|
8
|
+
s.version = "0.9.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
|
12
|
-
s.date = %q{2010-07-
|
12
|
+
s.date = %q{2010-07-21}
|
13
13
|
s.email = %q{edgargonzalez@gmail.com}
|
14
14
|
s.extra_rdoc_files = [
|
15
15
|
"LICENSE",
|
@@ -42,9 +42,12 @@ Gem::Specification.new do |s|
|
|
42
42
|
s.specification_version = 3
|
43
43
|
|
44
44
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
45
|
+
s.add_runtime_dependency(%q<addressable>, [">= 2.1.2"])
|
45
46
|
else
|
47
|
+
s.add_dependency(%q<addressable>, [">= 2.1.2"])
|
46
48
|
end
|
47
49
|
else
|
50
|
+
s.add_dependency(%q<addressable>, [">= 2.1.2"])
|
48
51
|
end
|
49
52
|
end
|
50
53
|
|
data/test/robot_rules_test.rb
CHANGED
@@ -23,7 +23,7 @@ class RobotRulesTest < Test::Unit::TestCase
|
|
23
23
|
@robot_rule = RobotRules.new('Microsoft')
|
24
24
|
robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
|
25
25
|
@robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
|
26
|
-
|
26
|
+
|
27
27
|
assert_equal false, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
|
28
28
|
end
|
29
29
|
|
@@ -31,7 +31,27 @@ class RobotRulesTest < Test::Unit::TestCase
|
|
31
31
|
@robot_rule = RobotRules.new('Google')
|
32
32
|
robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
|
33
33
|
@robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
|
34
|
-
|
34
|
+
|
35
35
|
assert_equal true, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
|
36
36
|
end
|
37
|
+
|
38
|
+
def test_should_allow_user_agent_when_disallowed_all_for_specific_user_agent
|
39
|
+
@robot_rule = RobotRules.new('MyBot')
|
40
|
+
robots_txt = "User-agent: mxbot\nDisallow: /"
|
41
|
+
@robot_rule.parse("#{SITE_URL}/robots.txt", robots_txt)
|
42
|
+
|
43
|
+
assert_equal true, @robot_rule.allowed?("#{SITE_URL}/hellow_world")
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_using_uri_as_parameters_should_allow_user_agent_to_specified_path
|
47
|
+
robots_url = Addressable::URI::parse("#{SITE_URL}/robots.txt")
|
48
|
+
url = Addressable::URI::parse("#{SITE_URL}/google/hellow_world.txt")
|
49
|
+
@robot_rule = RobotRules.new('Google')
|
50
|
+
robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
|
51
|
+
@robot_rule.parse(robots_url, robots_txt)
|
52
|
+
|
53
|
+
assert_equal true, @robot_rule.allowed?(url)
|
54
|
+
end
|
55
|
+
|
37
56
|
end
|
57
|
+
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 9
|
8
|
-
-
|
9
|
-
version: 0.9.
|
8
|
+
- 2
|
9
|
+
version: 0.9.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- James Edward Gray II
|
@@ -16,10 +16,23 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-07-
|
19
|
+
date: 2010-07-21 00:00:00 -04:30
|
20
20
|
default_executable:
|
21
|
-
dependencies:
|
22
|
-
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: addressable
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
segments:
|
30
|
+
- 2
|
31
|
+
- 1
|
32
|
+
- 2
|
33
|
+
version: 2.1.2
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
23
36
|
description:
|
24
37
|
email: edgargonzalez@gmail.com
|
25
38
|
executables: []
|