robot_rules 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -0
- data/VERSION.yml +2 -1
- data/lib/robot_rules.rb +7 -7
- data/robot_rules.gemspec +5 -2
- data/test/robot_rules_test.rb +22 -2
- metadata +18 -5
data/Rakefile
CHANGED
@@ -9,6 +9,7 @@ begin
|
|
9
9
|
gem.email = "edgargonzalez@gmail.com"
|
10
10
|
gem.homepage = "http://github.com/hasmanydevelopers/robot_rules"
|
11
11
|
gem.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
|
12
|
+
gem.add_dependency('addressable', '>= 2.1.2')
|
12
13
|
end
|
13
14
|
Jeweler::GemcutterTasks.new
|
14
15
|
rescue LoadError
|
data/VERSION.yml
CHANGED
data/lib/robot_rules.rb
CHANGED
@@ -5,18 +5,17 @@
|
|
5
5
|
# Created by James Edward Gray II on 2006-01-31.
|
6
6
|
# Copyright 2006 Gray Productions. All rights reserved.
|
7
7
|
|
8
|
-
require
|
8
|
+
require 'addressable/uri'
|
9
9
|
|
10
10
|
# Based on Perl's WWW::RobotRules module, by Gisle Aas.
|
11
11
|
class RobotRules
|
12
12
|
def initialize( user_agent )
|
13
|
-
@user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},
|
14
|
-
"").downcase
|
13
|
+
@user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},"").downcase
|
15
14
|
@rules = Hash.new { |rules, rule| rules[rule] = Array.new }
|
16
15
|
end
|
17
16
|
|
18
|
-
def parse(
|
19
|
-
uri = URI
|
17
|
+
def parse( site, robots_data )
|
18
|
+
uri = site.kind_of?(Addressable::URI) ? site : Addressable::URI::parse(site)
|
20
19
|
location = "#{uri.host}:#{uri.port}"
|
21
20
|
@rules.delete(location)
|
22
21
|
|
@@ -64,8 +63,8 @@ class RobotRules
|
|
64
63
|
end
|
65
64
|
end
|
66
65
|
|
67
|
-
def allowed?(
|
68
|
-
uri = URI
|
66
|
+
def allowed?( u )
|
67
|
+
uri = u.kind_of?(Addressable::URI) ? u : Addressable::URI::parse(u)
|
69
68
|
location = "#{uri.host}:#{uri.port}"
|
70
69
|
path = uri.path
|
71
70
|
|
@@ -74,3 +73,4 @@ class RobotRules
|
|
74
73
|
not @rules[location].any? { |rule| path.index(rule) == 0 }
|
75
74
|
end
|
76
75
|
end
|
76
|
+
|
data/robot_rules.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{robot_rules}
|
8
|
-
s.version = "0.9.
|
8
|
+
s.version = "0.9.2"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["James Edward Gray II", "Jeremy Friesen", "Edgar Gonzalez"]
|
12
|
-
s.date = %q{2010-07-
|
12
|
+
s.date = %q{2010-07-21}
|
13
13
|
s.email = %q{edgargonzalez@gmail.com}
|
14
14
|
s.extra_rdoc_files = [
|
15
15
|
"LICENSE",
|
@@ -42,9 +42,12 @@ Gem::Specification.new do |s|
|
|
42
42
|
s.specification_version = 3
|
43
43
|
|
44
44
|
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
45
|
+
s.add_runtime_dependency(%q<addressable>, [">= 2.1.2"])
|
45
46
|
else
|
47
|
+
s.add_dependency(%q<addressable>, [">= 2.1.2"])
|
46
48
|
end
|
47
49
|
else
|
50
|
+
s.add_dependency(%q<addressable>, [">= 2.1.2"])
|
48
51
|
end
|
49
52
|
end
|
50
53
|
|
data/test/robot_rules_test.rb
CHANGED
@@ -23,7 +23,7 @@ class RobotRulesTest < Test::Unit::TestCase
|
|
23
23
|
@robot_rule = RobotRules.new('Microsoft')
|
24
24
|
robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
|
25
25
|
@robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
|
26
|
-
|
26
|
+
|
27
27
|
assert_equal false, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
|
28
28
|
end
|
29
29
|
|
@@ -31,7 +31,27 @@ class RobotRulesTest < Test::Unit::TestCase
|
|
31
31
|
@robot_rule = RobotRules.new('Google')
|
32
32
|
robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
|
33
33
|
@robot_rule.parse(File.join(SITE_URL,'robots.txt'), robots_txt)
|
34
|
-
|
34
|
+
|
35
35
|
assert_equal true, @robot_rule.allowed?(File.join(SITE_URL, 'google/hellow_world.txt'))
|
36
36
|
end
|
37
|
+
|
38
|
+
def test_should_allow_user_agent_when_disallowed_all_for_specific_user_agent
|
39
|
+
@robot_rule = RobotRules.new('MyBot')
|
40
|
+
robots_txt = "User-agent: mxbot\nDisallow: /"
|
41
|
+
@robot_rule.parse("#{SITE_URL}/robots.txt", robots_txt)
|
42
|
+
|
43
|
+
assert_equal true, @robot_rule.allowed?("#{SITE_URL}/hellow_world")
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_using_uri_as_parameters_should_allow_user_agent_to_specified_path
|
47
|
+
robots_url = Addressable::URI::parse("#{SITE_URL}/robots.txt")
|
48
|
+
url = Addressable::URI::parse("#{SITE_URL}/google/hellow_world.txt")
|
49
|
+
@robot_rule = RobotRules.new('Google')
|
50
|
+
robots_txt = %(/robots.txt:\nUser-agent: Microsoft\nDisallow: google\nUser-agent: *\nDisallow: images)
|
51
|
+
@robot_rule.parse(robots_url, robots_txt)
|
52
|
+
|
53
|
+
assert_equal true, @robot_rule.allowed?(url)
|
54
|
+
end
|
55
|
+
|
37
56
|
end
|
57
|
+
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 9
|
8
|
-
-
|
9
|
-
version: 0.9.
|
8
|
+
- 2
|
9
|
+
version: 0.9.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- James Edward Gray II
|
@@ -16,10 +16,23 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2010-07-
|
19
|
+
date: 2010-07-21 00:00:00 -04:30
|
20
20
|
default_executable:
|
21
|
-
dependencies:
|
22
|
-
|
21
|
+
dependencies:
|
22
|
+
- !ruby/object:Gem::Dependency
|
23
|
+
name: addressable
|
24
|
+
prerelease: false
|
25
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
segments:
|
30
|
+
- 2
|
31
|
+
- 1
|
32
|
+
- 2
|
33
|
+
version: 2.1.2
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
23
36
|
description:
|
24
37
|
email: edgargonzalez@gmail.com
|
25
38
|
executables: []
|