webrobots 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -3,6 +3,7 @@ source "http://rubygems.org"
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
5
  gem "racc", ">= 0"
6
+ gem "nokogiri", ">= 1.4.4"
6
7
 
7
8
  # Add dependencies to develop your gem here.
8
9
  # Include everything needed to run rake, tests, features, etc.
data/Gemfile.lock CHANGED
@@ -6,6 +6,7 @@ GEM
6
6
  bundler (~> 1.0.0)
7
7
  git (>= 1.2.5)
8
8
  rake
9
+ nokogiri (1.4.4)
9
10
  racc (1.4.6)
10
11
  rake (0.8.7)
11
12
  rcov (0.9.9)
@@ -17,6 +18,7 @@ PLATFORMS
17
18
  DEPENDENCIES
18
19
  bundler (~> 1.0.0)
19
20
  jeweler (~> 1.5.1)
21
+ nokogiri (>= 1.4.4)
20
22
  racc
21
23
  rcov
22
24
  shoulda
data/README.rdoc CHANGED
@@ -2,6 +2,22 @@
2
2
 
3
3
  This is a library to help write robots.txt compliant web robots.
4
4
 
5
+ == Usage
6
+
7
+ require 'webrobots'
8
+ require 'uri'
9
+ require 'net/http'
10
+
11
+ robots = WebRobots.new('MyBot/1.0')
12
+
13
+ uri = URI('http://digg.com/news/24hr')
14
+ if robots.disallowed?(uri)
15
+ STDERR.puts "Access disallowed: #{uri}"
16
+ exit 1
17
+ end
18
+ body = Net::HTTP.get(uri)
19
+ # ...
20
+
5
21
  == Contributing to webrobots
6
22
 
7
23
  * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
data/Rakefile CHANGED
@@ -24,7 +24,7 @@ This library helps write robots.txt compliant web robots in Ruby.
24
24
  # Include your dependencies below. Runtime dependencies are required when using your gem,
25
25
  # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
26
26
  # gem.add_runtime_dependency 'jabber4r', '> 0.1'
27
- # gem.add_development_dependency 'rspec', '> 1.2.3'
27
+ gem.add_development_dependency 'racc'
28
28
  end
29
29
  Jeweler::RubygemsDotOrgTasks.new
30
30
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.2
1
+ 0.0.3
data/lib/webrobots.rb CHANGED
@@ -1,6 +1,11 @@
1
1
  require 'webrobots/robotstxt'
2
2
  require 'uri'
3
3
  require 'net/https'
4
+ if defined?(Nokogiri)
5
+ require 'webrobots/nokogiri'
6
+ else
7
+ autoload :Nokogiri, 'webrobots/nokogiri'
8
+ end
4
9
 
5
10
  class WebRobots
6
11
  # Creates a WebRobots object for a robot named +user_agent+, with
@@ -0,0 +1,32 @@
1
+ require 'nokogiri'
2
+
3
+ class Nokogiri::HTML::Document
4
+ # Returns an array of lower-cased <meta name="ROBOTS"> tokens. If
5
+ # no tag is found, returns an empty array. An optional
6
+ # +custom_name+ specifies the name of a meta tag to look for ahead
7
+ # of "ROBOTS". Names are compared in a case-insensitive manner.
8
+ def meta_robots(custom_name = nil)
9
+ (@meta_robots ||= {})[custom_name] =
10
+ (custom_name && parse_meta_robots(custom_name)) || parse_meta_robots('robots')
11
+ end
12
+
13
+ # Equivalent to meta_robots(custom_name).include?('noindex').
14
+ def noindex?(custom_name = nil)
15
+ meta_robots(custom_name).include?('noindex')
16
+ end
17
+
18
+ # Equivalent to meta_robots(custom_name).include?('nofollow').
19
+ def nofollow?(custom_name = nil)
20
+ meta_robots(custom_name).include?('nofollow')
21
+ end
22
+
23
+ private
24
+
25
+ def parse_meta_robots(custom_name)
26
+ pattern = /\A#{Regexp.quote(custom_name)}\z/i
27
+ meta = css('meta[@name]').find { |meta|
28
+ meta['name'].match(pattern)
29
+ } and content = meta['content'] or return []
30
+ content.downcase.split(/[,\s]+/)
31
+ end
32
+ end
@@ -283,9 +283,38 @@ Disallow: /
283
283
  assert !@testbot.allowed?("http://store.apple.com/vieworder")
284
284
  assert @msnbot.allowed?("http://store.apple.com/vieworder")
285
285
  }
286
- # assert_nothing_raised {
286
+ assert_nothing_raised {
287
287
  assert !@testbot.allowed?("http://github.com/login")
288
- # }
288
+ }
289
+ end
290
+ end
291
+
292
+ context "meta robots tag" do
293
+ setup do
294
+ @doc = Nokogiri::HTML(<<-HTML)
295
+ <html>
296
+ <head>
297
+ <meta name="ROBOTS" content="NOFOLLOW">
298
+ <meta name="Slurp" content="noindex,nofollow">
299
+ <meta name="googlebot" content="noarchive, noindex">
300
+ </head>
301
+ <body>
302
+ test
303
+ </body>
304
+ </html>
305
+ HTML
306
+ end
307
+
308
+ should "be properly parsed when given in HTML string" do
309
+ assert !@doc.noindex?
310
+ assert @doc.nofollow?
311
+
312
+ assert @doc.noindex?('slurp')
313
+ assert @doc.nofollow?('slurp')
314
+
315
+ assert @doc.noindex?('googlebot')
316
+ assert !@doc.nofollow?('googlebot')
317
+ assert @doc.meta_robots('googlebot').include?('noarchive')
289
318
  end
290
319
  end
291
320
  end
data/webrobots.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{webrobots}
8
- s.version = "0.0.2"
8
+ s.version = "0.0.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Akinori MUSHA"]
12
- s.date = %q{2011-01-03}
12
+ s.date = %q{2011-01-05}
13
13
  s.description = %q{This library helps write robots.txt compliant web robots in Ruby.
14
14
  }
15
15
  s.email = %q{knu@idaemons.org}
@@ -26,6 +26,7 @@ Gem::Specification.new do |s|
26
26
  "Rakefile",
27
27
  "VERSION",
28
28
  "lib/webrobots.rb",
29
+ "lib/webrobots/nokogiri.rb",
29
30
  "lib/webrobots/robotstxt.rb",
30
31
  "lib/webrobots/robotstxt.ry",
31
32
  "test/helper.rb",
@@ -46,23 +47,29 @@ Gem::Specification.new do |s|
46
47
 
47
48
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
48
49
  s.add_runtime_dependency(%q<racc>, [">= 0"])
50
+ s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.4"])
49
51
  s.add_development_dependency(%q<shoulda>, [">= 0"])
50
52
  s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
51
53
  s.add_development_dependency(%q<jeweler>, ["~> 1.5.1"])
52
54
  s.add_development_dependency(%q<rcov>, [">= 0"])
55
+ s.add_development_dependency(%q<racc>, [">= 0"])
53
56
  else
54
57
  s.add_dependency(%q<racc>, [">= 0"])
58
+ s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
55
59
  s.add_dependency(%q<shoulda>, [">= 0"])
56
60
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
57
61
  s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
58
62
  s.add_dependency(%q<rcov>, [">= 0"])
63
+ s.add_dependency(%q<racc>, [">= 0"])
59
64
  end
60
65
  else
61
66
  s.add_dependency(%q<racc>, [">= 0"])
67
+ s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
62
68
  s.add_dependency(%q<shoulda>, [">= 0"])
63
69
  s.add_dependency(%q<bundler>, ["~> 1.0.0"])
64
70
  s.add_dependency(%q<jeweler>, ["~> 1.5.1"])
65
71
  s.add_dependency(%q<rcov>, [">= 0"])
72
+ s.add_dependency(%q<racc>, [">= 0"])
66
73
  end
67
74
  end
68
75
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webrobots
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Akinori MUSHA
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-01-03 00:00:00 +09:00
18
+ date: 2011-01-05 00:00:00 +09:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -33,8 +33,24 @@ dependencies:
33
33
  prerelease: false
34
34
  name: racc
35
35
  - !ruby/object:Gem::Dependency
36
- type: :development
36
+ type: :runtime
37
37
  version_requirements: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 15
43
+ segments:
44
+ - 1
45
+ - 4
46
+ - 4
47
+ version: 1.4.4
48
+ requirement: *id002
49
+ prerelease: false
50
+ name: nokogiri
51
+ - !ruby/object:Gem::Dependency
52
+ type: :development
53
+ version_requirements: &id003 !ruby/object:Gem::Requirement
38
54
  none: false
39
55
  requirements:
40
56
  - - ">="
@@ -43,12 +59,12 @@ dependencies:
43
59
  segments:
44
60
  - 0
45
61
  version: "0"
46
- requirement: *id002
62
+ requirement: *id003
47
63
  prerelease: false
48
64
  name: shoulda
49
65
  - !ruby/object:Gem::Dependency
50
66
  type: :development
51
- version_requirements: &id003 !ruby/object:Gem::Requirement
67
+ version_requirements: &id004 !ruby/object:Gem::Requirement
52
68
  none: false
53
69
  requirements:
54
70
  - - ~>
@@ -59,12 +75,12 @@ dependencies:
59
75
  - 0
60
76
  - 0
61
77
  version: 1.0.0
62
- requirement: *id003
78
+ requirement: *id004
63
79
  prerelease: false
64
80
  name: bundler
65
81
  - !ruby/object:Gem::Dependency
66
82
  type: :development
67
- version_requirements: &id004 !ruby/object:Gem::Requirement
83
+ version_requirements: &id005 !ruby/object:Gem::Requirement
68
84
  none: false
69
85
  requirements:
70
86
  - - ~>
@@ -75,12 +91,12 @@ dependencies:
75
91
  - 5
76
92
  - 1
77
93
  version: 1.5.1
78
- requirement: *id004
94
+ requirement: *id005
79
95
  prerelease: false
80
96
  name: jeweler
81
97
  - !ruby/object:Gem::Dependency
82
98
  type: :development
83
- version_requirements: &id005 !ruby/object:Gem::Requirement
99
+ version_requirements: &id006 !ruby/object:Gem::Requirement
84
100
  none: false
85
101
  requirements:
86
102
  - - ">="
@@ -89,9 +105,23 @@ dependencies:
89
105
  segments:
90
106
  - 0
91
107
  version: "0"
92
- requirement: *id005
108
+ requirement: *id006
93
109
  prerelease: false
94
110
  name: rcov
111
+ - !ruby/object:Gem::Dependency
112
+ type: :development
113
+ version_requirements: &id007 !ruby/object:Gem::Requirement
114
+ none: false
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ hash: 3
119
+ segments:
120
+ - 0
121
+ version: "0"
122
+ requirement: *id007
123
+ prerelease: false
124
+ name: racc
95
125
  description: |
96
126
  This library helps write robots.txt compliant web robots in Ruby.
97
127
 
@@ -112,6 +142,7 @@ files:
112
142
  - Rakefile
113
143
  - VERSION
114
144
  - lib/webrobots.rb
145
+ - lib/webrobots/nokogiri.rb
115
146
  - lib/webrobots/robotstxt.rb
116
147
  - lib/webrobots/robotstxt.ry
117
148
  - test/helper.rb