robotstxt 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/robotstxt.rb
CHANGED
@@ -14,7 +14,7 @@
|
|
14
14
|
#++
|
15
15
|
|
16
16
|
|
17
|
-
require 'robotstxt/
|
17
|
+
require 'robotstxt/parser'
|
18
18
|
require 'uri'
|
19
19
|
|
20
20
|
|
@@ -24,7 +24,7 @@ module Robotstxt
|
|
24
24
|
NAME = 'Robotstxt'
|
25
25
|
GEM = 'robotstxt'
|
26
26
|
AUTHORS = ['Simone Rinzivillo <srinzivillo@gmail.com>']
|
27
|
-
VERSION = '0.5.
|
27
|
+
VERSION = '0.5.1'
|
28
28
|
|
29
29
|
|
30
30
|
# Check if the <tt>URL</tt> is allowed to be crawled from the current <tt>Robot_id</tt>.
|
@@ -33,9 +33,9 @@ module Robotstxt
|
|
33
33
|
# <tt>Robotstxt.allowed?('http:// www.simonerinzivillo.it/', 'rubytest')</tt>
|
34
34
|
#
|
35
35
|
def self.allowed?(url, robot_id)
|
36
|
-
|
36
|
+
|
37
37
|
u = URI.parse(url)
|
38
|
-
r = Robotstxt::
|
38
|
+
r = Robotstxt::Parser.new(robot_id)
|
39
39
|
return r.allowed?(url) if r.get(u.scheme + '://' + u.host)
|
40
40
|
|
41
41
|
end
|
@@ -45,9 +45,9 @@ module Robotstxt
|
|
45
45
|
# <tt>Robotstxt.sitemaps('http:// www.simonerinzivillo.it/', 'rubytest')</tt>
|
46
46
|
#
|
47
47
|
def self.sitemaps(url, robot_id)
|
48
|
-
|
48
|
+
|
49
49
|
u = URI.parse(url)
|
50
|
-
r = Robotstxt::
|
50
|
+
r = Robotstxt::Parser.new(robot_id)
|
51
51
|
return r.sitemaps if r.get(u.scheme + '://' + u.host)
|
52
52
|
|
53
53
|
end
|
@@ -13,13 +13,12 @@
|
|
13
13
|
#
|
14
14
|
#++
|
15
15
|
|
16
|
-
require 'rubygems'
|
17
16
|
require 'net/http'
|
18
17
|
require 'uri'
|
19
18
|
|
20
19
|
|
21
20
|
module Robotstxt
|
22
|
-
class
|
21
|
+
class Parser
|
23
22
|
attr_accessor :robot_id
|
24
23
|
attr_reader :found, :body, :sitemaps, :rules
|
25
24
|
|
@@ -98,6 +97,7 @@ module Robotstxt
|
|
98
97
|
url = URI.parse(var)
|
99
98
|
querystring = (!url.query.nil?) ? '?' + url.query : ''
|
100
99
|
url_path = url.path + querystring
|
100
|
+
|
101
101
|
@rules.each {|ua|
|
102
102
|
|
103
103
|
if @robot_id == ua[0] || ua[0] == '*'
|
@@ -129,14 +129,14 @@ module Robotstxt
|
|
129
129
|
#
|
130
130
|
# <tt>end</tt>
|
131
131
|
#
|
132
|
-
def sitemaps
|
133
|
-
|
132
|
+
def sitemaps
|
133
|
+
@sitemaps
|
134
134
|
end
|
135
135
|
|
136
136
|
# This method returns <tt>true</tt> if the Robots.txt parsing is gone.
|
137
137
|
#
|
138
|
-
def found?
|
139
|
-
|
138
|
+
def found?
|
139
|
+
!!@found
|
140
140
|
end
|
141
141
|
|
142
142
|
|
@@ -1,18 +1,18 @@
|
|
1
1
|
$:.unshift(File.dirname(__FILE__) + '/../lib')
|
2
2
|
|
3
3
|
require 'test/unit'
|
4
|
-
require '
|
4
|
+
require 'robotstxt'
|
5
5
|
|
6
|
-
class
|
6
|
+
class TestParser < Test::Unit::TestCase
|
7
7
|
|
8
8
|
def setup
|
9
|
-
@client = Robotstxt::
|
9
|
+
@client = Robotstxt::Parser.new('rubytest')
|
10
10
|
@client.get('http://www.simonerinzivillo.it')
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_initialize
|
14
|
-
client = Robotstxt::
|
15
|
-
assert_instance_of Robotstxt::
|
14
|
+
client = Robotstxt::Parser.new('*')
|
15
|
+
assert_instance_of Robotstxt::Parser, client
|
16
16
|
end
|
17
17
|
|
18
18
|
def test_get_file_robotstxt
|
data/test/robotstxt_test.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: robotstxt
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Rinzivillo
|
@@ -22,7 +22,7 @@ extensions: []
|
|
22
22
|
extra_rdoc_files:
|
23
23
|
- README.rdoc
|
24
24
|
files:
|
25
|
-
- lib/robotstxt/
|
25
|
+
- lib/robotstxt/parser.rb
|
26
26
|
- lib/robotstxt.rb
|
27
27
|
- README.rdoc
|
28
28
|
has_rdoc: true
|
@@ -54,5 +54,5 @@ signing_key:
|
|
54
54
|
specification_version: 3
|
55
55
|
summary: Robotstxt is an Ruby robots.txt file parser
|
56
56
|
test_files:
|
57
|
+
- test/parser_test.rb
|
57
58
|
- test/robotstxt_test.rb
|
58
|
-
- test/robotstxtistance_test.rb
|