webrobots 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -3,6 +3,7 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
  gem "racc", ">= 0"
6
+ gem "nokogiri", ">= 1.4.4"
6
7
 
7
8
  # Add dependencies to develop your gem here.
8
9
  # Include everything needed to run rake, tests, features, etc.
data/Gemfile.lock CHANGED
@@ -6,6 +6,7 @@ GEM
6
6
  bundler (~> 1.0.0)
7
7
  git (>= 1.2.5)
8
8
  rake
9
+ nokogiri (1.4.4)
9
10
  racc (1.4.6)
10
11
  rake (0.8.7)
11
12
  rcov (0.9.9)
@@ -17,6 +18,7 @@ PLATFORMS
17
18
  DEPENDENCIES
18
19
  bundler (~> 1.0.0)
19
20
  jeweler (~> 1.5.1)
21
+ nokogiri (>= 1.4.4)
20
22
  racc
21
23
  rcov
22
24
  shoulda
data/README.rdoc CHANGED
@@ -2,6 +2,22 @@
2
2
 
3
3
  This is a library to help write robots.txt compliant web robots.
4
4
 
5
+ == Usage
6
+
7
+ require 'webrobots'
8
+ require 'uri'
9
+ require 'net/http'
10
+
11
+ robots = WebRobots.new('MyBot/1.0')
12
+
13
+ uri = URI('http://digg.com/news/24hr')
14
+ if robots.disallowed?(uri)
15
+ STDERR.puts "Access disallowed: #{uri}"
16
+ exit 1
17
+ end
18
+ body = Net::HTTP.get(uri)
19
+ # ...
20
+
5
21
  == Contributing to webrobots
6
22
 
7
23
  * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
data/Rakefile CHANGED
@@ -24,7 +24,7 @@ This library helps write robots.txt compliant web robots in Ruby.
24
24
  # Include your dependencies below. Runtime dependencies are required when using your gem,
25
25
  # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
26
26
  # gem.add_runtime_dependency 'jabber4r', '> 0.1'
27
- # gem.add_development_dependency 'rspec', '> 1.2.3'
27
+ gem.add_development_dependency 'racc'
28
28
  end
29
29
  Jeweler::RubygemsDotOrgTasks.new
30
30
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
data/lib/webrobots.rb CHANGED
@@ -1,6 +1,11 @@
1
1
  require 'webrobots/robotstxt'
2
2
  require 'uri'
3
3
  require 'net/https'
4
+ if defined?(Nokogiri)
5
+ require 'webrobots/nokogiri'
6
+ else
7
+ autoload :Nokogiri, 'webrobots/nokogiri'
8
+ end
4
9
 
5
10
  class WebRobots
6
11
  # Creates a WebRobots object for a robot named +user_agent+, with
@@ -0,0 +1,32 @@
1
+ require 'nokogiri'
2
+
3
+ class Nokogiri::HTML::Document
4
+ # Returns an array of lower-cased <meta name="ROBOTS"> tokens. If
5
+ # no tag is found, returns an empty array. An optional
6
+ # +custom_name+ specifies the name of a meta tag to look for ahead
7
+ # of "ROBOTS". Names are compared in a case-insensitive manner.
8
+ def meta_robots(custom_name = nil)
9
+ (@meta_robots ||= {})[custom_name] =
10
+ (custom_name && parse_meta_robots(custom_name)) || parse_meta_robots('robots')
11
+ end
12
+
13
+ # Equivalent to meta_robots(custom_name).include?('noindex').
14
+ def noindex?(custom_name = nil)
15
+ meta_robots(custom_name).include?('noindex')
16
+ end
17
+
18
+ # Equivalent to meta_robots(custom_name).include?('nofollow').
19
+ def nofollow?(custom_name = nil)
20
+ meta_robots(custom_name).include?('nofollow')
21
+ end
22
+
23
+ private
24
+
25
+ def parse_meta_robots(custom_name)
26
+ pattern = /\A#{Regexp.quote(custom_name)}\z/i
27
+ meta = css('meta[@name]').find { |meta|
28
+ meta['name'].match(pattern)
29
+ } and content = meta['content'] or return []
30
+ content.downcase.split(/[,\s]+/)
31
+ end
32
+ end
@@ -283,9 +283,38 @@ Disallow: /
283
283
  assert !@testbot.allowed?("http://store.apple.com/vieworder")
284
284
  assert @msnbot.allowed?("http://store.apple.com/vieworder")
285
285
  }
286
- # assert_nothing_raised {
286
+ assert_nothing_raised {
287
287
  assert !@testbot.allowed?("http://github.com/login")
288
- # }
288
+ }
289
+ end
290
+ end
291
+
292
+ context "meta robots tag" do
293
+ setup do
294
+ @doc = Nokogiri::HTML(<<-HTML)
295
+ <html>
296
+ <head>
297
+ <meta name="ROBOTS" content="NOFOLLOW">
298
+ <meta name="Slurp" content="noindex,nofollow">
299
+ <meta name="googlebot" content="noarchive, noindex">
300
+ </head>
301
+ <body>
302
+ test
303
+ </body>
304
+ </html>
305
+ HTML
306
+ end
307
+
308
+ should "be properly parsed when given in HTML string" do
309
+ assert !@doc.noindex?
310
+ assert @doc.nofollow?
311
+
312
+ assert @doc.noindex?('slurp')
313
+ assert @doc.nofollow?('slurp')
314
+
315
+ assert @doc.noindex?('googlebot')
316
+ assert !@doc.nofollow?('googlebot')
317
+ assert @doc.meta_robots('googlebot').include?('noarchive')
289
318
  end
290
319
  end
291
320
  end
data/webrobots.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{webrobots}
8
- s.version = "0.0.2"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Akinori MUSHA"]
12
- s.date = %q{2011-01-03}
12
+ s.date = %q{2011-01-05}
13
13
  s.description = %q{This library helps write robots.txt compliant web robots in Ruby.
14
14
  }
15
15
  s.email = %q{knu@idaemons.org}
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
26
26
  "Rakefile",
27
27
  "VERSION",
28
28
  "lib/webrobots.rb",
29
+ "lib/webrobots/nokogiri.rb",
29
30
  "lib/webrobots/robotstxt.rb",
30
31
  "lib/webrobots/robotstxt.ry",
31
32
  "test/helper.rb",
@@ -46,23 +47,29 @@ Gem::Specification.new do |s|
46
47
 
47
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
49
  s.add_runtime_dependency(%q<racc>, [">= 0"])
50
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.4"])
49
51
  s.add_development_dependency(%q<shoulda>, [">= 0"])
50
52
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
51
53
  s.add_development_dependency(%q<jeweler>, ["~> 1.5.1"])
52
54
  s.add_development_dependency(%q<rcov>, [">= 0"])
55
+ s.add_development_dependency(%q<racc>, [">= 0"])
53
56
  else
54
57
  s.add_dependency(%q<racc>, [">= 0"])
58
+ s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
55
59
  s.add_dependency(%q<shoulda>, [">= 0"])
56
60
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
57
61
  s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
58
62
  s.add_dependency(%q<rcov>, [">= 0"])
63
+ s.add_dependency(%q<racc>, [">= 0"])
59
64
  end
60
65
  else
61
66
  s.add_dependency(%q<racc>, [">= 0"])
67
+ s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
62
68
  s.add_dependency(%q<shoulda>, [">= 0"])
63
69
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
64
70
  s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
65
71
  s.add_dependency(%q<rcov>, [">= 0"])
72
+ s.add_dependency(%q<racc>, [">= 0"])
66
73
  end
67
74
  end
68
75
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webrobots
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Akinori MUSHA
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-03 00:00:00 +09:00
18
+ date: 2011-01-05 00:00:00 +09:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -33,8 +33,24 @@ dependencies:
33
33
  prerelease: false
34
34
  name: racc
35
35
  - !ruby/object:Gem::Dependency
36
- type: :development
36
+ type: :runtime
37
37
  version_requirements: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 15
43
+ segments:
44
+ - 1
45
+ - 4
46
+ - 4
47
+ version: 1.4.4
48
+ requirement: *id002
49
+ prerelease: false
50
+ name: nokogiri
51
+ - !ruby/object:Gem::Dependency
52
+ type: :development
53
+ version_requirements: &id003 !ruby/object:Gem::Requirement
38
54
  none: false
39
55
  requirements:
40
56
  - - ">="
@@ -43,12 +59,12 @@ dependencies:
43
59
  segments:
44
60
  - 0
45
61
  version: "0"
46
- requirement: *id002
62
+ requirement: *id003
47
63
  prerelease: false
48
64
  name: shoulda
49
65
  - !ruby/object:Gem::Dependency
50
66
  type: :development
51
- version_requirements: &id003 !ruby/object:Gem::Requirement
67
+ version_requirements: &id004 !ruby/object:Gem::Requirement
52
68
  none: false
53
69
  requirements:
54
70
  - - ~>
@@ -59,12 +75,12 @@ dependencies:
59
75
  - 0
60
76
  - 0
61
77
  version: 1.0.0
62
- requirement: *id003
78
+ requirement: *id004
63
79
  prerelease: false
64
80
  name: bundler
65
81
  - !ruby/object:Gem::Dependency
66
82
  type: :development
67
- version_requirements: &id004 !ruby/object:Gem::Requirement
83
+ version_requirements: &id005 !ruby/object:Gem::Requirement
68
84
  none: false
69
85
  requirements:
70
86
  - - ~>
@@ -75,12 +91,12 @@ dependencies:
75
91
  - 5
76
92
  - 1
77
93
  version: 1.5.1
78
- requirement: *id004
94
+ requirement: *id005
79
95
  prerelease: false
80
96
  name: jeweler
81
97
  - !ruby/object:Gem::Dependency
82
98
  type: :development
83
- version_requirements: &id005 !ruby/object:Gem::Requirement
99
+ version_requirements: &id006 !ruby/object:Gem::Requirement
84
100
  none: false
85
101
  requirements:
86
102
  - - ">="
@@ -89,9 +105,23 @@ dependencies:
89
105
  segments:
90
106
  - 0
91
107
  version: "0"
92
- requirement: *id005
108
+ requirement: *id006
93
109
  prerelease: false
94
110
  name: rcov
111
+ - !ruby/object:Gem::Dependency
112
+ type: :development
113
+ version_requirements: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 3
119
+ segments:
120
+ - 0
121
+ version: "0"
122
+ requirement: *id007
123
+ prerelease: false
124
+ name: racc
95
125
  description: |
96
126
  This library helps write robots.txt compliant web robots in Ruby.
97
127
 
@@ -112,6 +142,7 @@ files:
112
142
  - Rakefile
113
143
  - VERSION
114
144
  - lib/webrobots.rb
145
+ - lib/webrobots/nokogiri.rb
115
146
  - lib/webrobots/robotstxt.rb
116
147
  - lib/webrobots/robotstxt.ry
117
148
  - test/helper.rb