webrobots 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -2,14 +2,16 @@ source "http://rubygems.org"
2
2
  # Add dependencies required to use your gem here.
3
3
  # Example:
4
4
  # gem "activesupport", ">= 2.3.5"
5
- gem "nokogiri", ">= 1.4.4"
6
5
 
7
6
  # Add dependencies to develop your gem here.
8
7
  # Include everything needed to run rake, tests, features, etc.
9
8
  group :development do
10
9
  gem "racc", ">= 0"
11
10
  gem "shoulda", ">= 0"
12
- gem "bundler", "~> 1.0.0"
11
+ gem "bundler", ">= 1.0.0"
13
12
  gem "jeweler", "~> 1.6.4"
14
- gem "rcov", ">= 0"
13
+ gem "rcov", "~> 0.9.11"
14
+
15
+ # To test the webrobots/nokogiri module.
16
+ gem "nokogiri", ">= 1.4.4"
15
17
  end
@@ -7,18 +7,18 @@ GEM
7
7
  git (>= 1.2.5)
8
8
  rake
9
9
  nokogiri (1.5.0)
10
- racc (1.4.6)
11
- rake (0.9.2)
12
- rcov (0.9.10)
10
+ racc (1.4.7)
11
+ rake (0.9.2.2)
12
+ rcov (0.9.11)
13
13
  shoulda (2.11.3)
14
14
 
15
15
  PLATFORMS
16
16
  ruby
17
17
 
18
18
  DEPENDENCIES
19
- bundler (~> 1.0.0)
19
+ bundler (>= 1.0.0)
20
20
  jeweler (~> 1.6.4)
21
21
  nokogiri (>= 1.4.4)
22
22
  racc
23
- rcov
23
+ rcov (~> 0.9.11)
24
24
  shoulda
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2011 Akinori MUSHA
1
+ Copyright (c) 2010, 2011, 2012 Akinori MUSHA
2
2
 
3
3
  All rights reserved.
4
4
 
@@ -21,7 +21,6 @@ This is a library to help write robots.txt compliant web robots.
21
21
  == Requirements
22
22
 
23
23
  - Ruby 1.8.7 or 1.9.2+
24
- - Nokogiri 1.4.4+
25
24
 
26
25
  == Contributing to webrobots
27
26
 
@@ -35,6 +34,5 @@ This is a library to help write robots.txt compliant web robots.
35
34
 
36
35
  == Copyright
37
36
 
38
- Copyright (c) 2010, 2011 Akinori MUSHA. See LICENSE.txt for
37
+ Copyright (c) 2010, 2011, 2012 Akinori MUSHA. See LICENSE.txt for
39
38
  further details.
40
-
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.12
1
+ 0.0.13
@@ -145,8 +145,12 @@ class WebRobots
145
145
  referer = nil
146
146
  10.times {
147
147
  http = Net::HTTP.new(uri.host, uri.port)
148
- http.use_ssl = uri.is_a?(URI::HTTPS)
149
- http.verify_mode = OpenSSL::SSL::VERIFY_NONE
148
+ if http.use_ssl = uri.is_a?(URI::HTTPS)
149
+ http.verify_mode = OpenSSL::SSL::VERIFY_PEER
150
+ http.cert_store = OpenSSL::X509::Store.new.tap { |store|
151
+ store.set_default_paths
152
+ }
153
+ end
150
154
  header = { 'User-Agent' => @user_agent }
151
155
  header['Referer'] = referer if referer
152
156
  # header is destroyed by this in ruby 1.9.2!
@@ -1,6 +1,6 @@
1
1
  #
2
2
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by Racc 1.4.6
3
+ # This file is automatically generated by Racc 1.4.7
4
4
  # from Racc grammer file "".
5
5
  #
6
6
 
@@ -14,12 +14,23 @@ class WebRobots
14
14
  end
15
15
 
16
16
  class ParseError < Error
17
+ # The site's root URI
18
+ attr_reader :site
19
+
20
+ def initialize(message, site)
21
+ @message = message
22
+ @site = site
23
+ end
24
+
25
+ def to_s
26
+ @message
27
+ end
17
28
  end
18
29
 
19
30
  class RobotsTxt
20
31
  class Parser < Racc::Parser
21
32
 
22
- module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 158)
33
+ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 169)
23
34
 
24
35
  def initialize(target = nil)
25
36
  super()
@@ -90,7 +101,7 @@ module_eval(<<'...end robotstxt.ry/module_eval...', 'robotstxt.ry', 158)
90
101
 
91
102
  do_parse
92
103
  rescue Racc::ParseError => e
93
- raise ParseError, e.message
104
+ raise ParseError.new(e.message, @site)
94
105
  ensure
95
106
  @q.clear
96
107
  end
@@ -606,7 +617,7 @@ Disallow: /
606
617
  else
607
618
  @options[ruleline.token.downcase] = ruleline.value
608
619
  end
609
- }
620
+ } if rulelines
610
621
  @acls.replace @acls.sort_by { |x|
611
622
  [-x.value.length, x.is_a?(AllowLine) ? -1 : 0]
612
623
  }
@@ -151,6 +151,17 @@ class WebRobots
151
151
  end
152
152
 
153
153
  class ParseError < Error
154
+ # The site's root URI
155
+ attr_reader :site
156
+
157
+ def initialize(message, site)
158
+ @message = message
159
+ @site = site
160
+ end
161
+
162
+ def to_s
163
+ @message
164
+ end
154
165
  end
155
166
 
156
167
  class RobotsTxt
@@ -225,7 +236,7 @@ class WebRobots
225
236
 
226
237
  do_parse
227
238
  rescue Racc::ParseError => e
228
- raise ParseError, e.message
239
+ raise ParseError.new(e.message, @site)
229
240
  ensure
230
241
  @q.clear
231
242
  end
@@ -341,7 +352,7 @@ Disallow: /
341
352
  else
342
353
  @options[ruleline.token.downcase] = ruleline.value
343
354
  end
344
- }
355
+ } if rulelines
345
356
  @acls.replace @acls.sort_by { |x|
346
357
  [-x.value.length, x.is_a?(AllowLine) ? -1 : 0]
347
358
  }
@@ -327,7 +327,9 @@ Allow: /2heavy/*.html
327
327
  robots.reset(url)
328
328
 
329
329
  assert robots.allowed?(url)
330
- assert_instance_of WebRobots::ParseError, robots.error(url)
330
+ error = robots.error(url)
331
+ assert_instance_of WebRobots::ParseError, error
332
+ assert_equal URI('http://www.example.org/'), error.site
331
333
  assert_raise(WebRobots::ParseError) {
332
334
  robots.error!(url)
333
335
  }
@@ -472,6 +474,7 @@ Disallow: /
472
474
  should "be parsed for major sites" do
473
475
  assert_nothing_raised {
474
476
  assert !@testbot.allowed?("http://www.google.com/search")
477
+ assert !@testbot.allowed?("https://www.google.com/search")
475
478
  assert !@testbot.allowed?("http://www.google.com/news/section?pz=1&cf=all&ned=jp&topic=y&ict=ln")
476
479
  assert @testbot.allowed?("http://www.google.com/news/directory?pz=1&cf=all&ned=us&hl=en&sort=users&category=6")
477
480
  }
@@ -655,4 +658,25 @@ TXT
655
658
  assert !@fetched
656
659
  end
657
660
  end
661
+
662
+ context "robots.txt with just user-agent & sitemap and no blank line between them" do
663
+ setup do
664
+ @robots = WebRobots.new('RandomBot', :http_get => lambda { |uri|
665
+ res = case uri.to_s
666
+ when 'http://site1.example.com/robots.txt'
667
+ <<-'TXT'
668
+ User-agent: *
669
+ Sitemap: http://site1.example.com/text/sitemap.xml
670
+ TXT
671
+ else
672
+ raise "#{uri} is not supposed to be fetched"
673
+ end
674
+ })
675
+ end
676
+
677
+ should "be properly parsed" do
678
+ assert @robots.allowed?("http://site1.example.com/foo")
679
+ assert_equal(["http://site1.example.com/text/sitemap.xml"], @robots.sitemaps("http://site1.example.com/"))
680
+ end
681
+ end
658
682
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "webrobots"
8
- s.version = "0.0.12"
8
+ s.version = "0.0.13"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Akinori MUSHA"]
12
- s.date = "2011-09-26"
12
+ s.date = "2012-01-24"
13
13
  s.description = "This library helps write robots.txt compliant web robots in Ruby.\n"
14
14
  s.email = "knu@idaemons.org"
15
15
  s.extra_rdoc_files = [
@@ -35,34 +35,34 @@ Gem::Specification.new do |s|
35
35
  s.homepage = "https://github.com/knu/webrobots"
36
36
  s.licenses = ["2-clause BSDL"]
37
37
  s.require_paths = ["lib"]
38
- s.rubygems_version = "1.8.10"
38
+ s.rubygems_version = "1.8.15"
39
39
  s.summary = "A Ruby library to help write robots.txt compliant web robots"
40
40
 
41
41
  if s.respond_to? :specification_version then
42
42
  s.specification_version = 3
43
43
 
44
44
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
45
- s.add_runtime_dependency(%q<nokogiri>, [">= 1.4.4"])
46
45
  s.add_development_dependency(%q<racc>, [">= 0"])
47
46
  s.add_development_dependency(%q<shoulda>, [">= 0"])
48
- s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
47
+ s.add_development_dependency(%q<bundler>, [">= 1.0.0"])
49
48
  s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
50
- s.add_development_dependency(%q<rcov>, [">= 0"])
49
+ s.add_development_dependency(%q<rcov>, ["~> 0.9.11"])
50
+ s.add_development_dependency(%q<nokogiri>, [">= 1.4.4"])
51
51
  else
52
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
53
52
  s.add_dependency(%q<racc>, [">= 0"])
54
53
  s.add_dependency(%q<shoulda>, [">= 0"])
55
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
54
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
56
55
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
57
- s.add_dependency(%q<rcov>, [">= 0"])
56
+ s.add_dependency(%q<rcov>, ["~> 0.9.11"])
57
+ s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
58
58
  end
59
59
  else
60
- s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
61
60
  s.add_dependency(%q<racc>, [">= 0"])
62
61
  s.add_dependency(%q<shoulda>, [">= 0"])
63
- s.add_dependency(%q<bundler>, ["~> 1.0.0"])
62
+ s.add_dependency(%q<bundler>, [">= 1.0.0"])
64
63
  s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
65
- s.add_dependency(%q<rcov>, [">= 0"])
64
+ s.add_dependency(%q<rcov>, ["~> 0.9.11"])
65
+ s.add_dependency(%q<nokogiri>, [">= 1.4.4"])
66
66
  end
67
67
  end
68
68
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: webrobots
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 5
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 12
10
- version: 0.0.12
9
+ - 13
10
+ version: 0.0.13
11
11
  platform: ruby
12
12
  authors:
13
13
  - Akinori MUSHA
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-09-26 00:00:00 Z
18
+ date: 2012-01-24 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  requirement: &id001 !ruby/object:Gem::Requirement
@@ -23,16 +23,14 @@ dependencies:
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- hash: 15
26
+ hash: 3
27
27
  segments:
28
- - 1
29
- - 4
30
- - 4
31
- version: 1.4.4
28
+ - 0
29
+ version: "0"
32
30
  version_requirements: *id001
33
- name: nokogiri
31
+ name: racc
34
32
  prerelease: false
35
- type: :runtime
33
+ type: :development
36
34
  - !ruby/object:Gem::Dependency
37
35
  requirement: &id002 !ruby/object:Gem::Requirement
38
36
  none: false
@@ -44,7 +42,7 @@ dependencies:
44
42
  - 0
45
43
  version: "0"
46
44
  version_requirements: *id002
47
- name: racc
45
+ name: shoulda
48
46
  prerelease: false
49
47
  type: :development
50
48
  - !ruby/object:Gem::Dependency
@@ -53,12 +51,14 @@ dependencies:
53
51
  requirements:
54
52
  - - ">="
55
53
  - !ruby/object:Gem::Version
56
- hash: 3
54
+ hash: 23
57
55
  segments:
56
+ - 1
58
57
  - 0
59
- version: "0"
58
+ - 0
59
+ version: 1.0.0
60
60
  version_requirements: *id003
61
- name: shoulda
61
+ name: bundler
62
62
  prerelease: false
63
63
  type: :development
64
64
  - !ruby/object:Gem::Dependency
@@ -67,14 +67,14 @@ dependencies:
67
67
  requirements:
68
68
  - - ~>
69
69
  - !ruby/object:Gem::Version
70
- hash: 23
70
+ hash: 7
71
71
  segments:
72
72
  - 1
73
- - 0
74
- - 0
75
- version: 1.0.0
73
+ - 6
74
+ - 4
75
+ version: 1.6.4
76
76
  version_requirements: *id004
77
- name: bundler
77
+ name: jeweler
78
78
  prerelease: false
79
79
  type: :development
80
80
  - !ruby/object:Gem::Dependency
@@ -83,14 +83,14 @@ dependencies:
83
83
  requirements:
84
84
  - - ~>
85
85
  - !ruby/object:Gem::Version
86
- hash: 7
86
+ hash: 45
87
87
  segments:
88
- - 1
89
- - 6
90
- - 4
91
- version: 1.6.4
88
+ - 0
89
+ - 9
90
+ - 11
91
+ version: 0.9.11
92
92
  version_requirements: *id005
93
- name: jeweler
93
+ name: rcov
94
94
  prerelease: false
95
95
  type: :development
96
96
  - !ruby/object:Gem::Dependency
@@ -99,12 +99,14 @@ dependencies:
99
99
  requirements:
100
100
  - - ">="
101
101
  - !ruby/object:Gem::Version
102
- hash: 3
102
+ hash: 15
103
103
  segments:
104
- - 0
105
- version: "0"
104
+ - 1
105
+ - 4
106
+ - 4
107
+ version: 1.4.4
106
108
  version_requirements: *id006
107
- name: rcov
109
+ name: nokogiri
108
110
  prerelease: false
109
111
  type: :development
110
112
  description: |
@@ -162,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
162
164
  requirements: []
163
165
 
164
166
  rubyforge_project:
165
- rubygems_version: 1.8.10
167
+ rubygems_version: 1.8.15
166
168
  signing_key:
167
169
  specification_version: 3
168
170
  summary: A Ruby library to help write robots.txt compliant web robots