fizx-robots 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/robots.rb +12 -17
- metadata +11 -3
data/lib/robots.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
require "open-uri"
|
2
2
|
require "uri"
|
3
|
+
require "rubygems"
|
4
|
+
require "loggable"
|
3
5
|
class Robots
|
6
|
+
include Loggable
|
7
|
+
|
4
8
|
class ParsedRobots
|
9
|
+
include Loggable
|
10
|
+
|
5
11
|
def initialize(uri)
|
6
12
|
io = open(URI.join(uri.to_s, "/robots.txt")) rescue nil
|
7
13
|
if !io || io.content_type != "text/plain" || io.status != ["200", "OK"]
|
@@ -38,14 +44,14 @@ class Robots
|
|
38
44
|
return true unless @parsed
|
39
45
|
allowed = true
|
40
46
|
path = uri.request_uri
|
41
|
-
|
47
|
+
debug "path: #{path}"
|
42
48
|
|
43
49
|
@disallows.each do |key, value|
|
44
50
|
if user_agent =~ key
|
45
|
-
|
51
|
+
debug "matched #{key.inspect}"
|
46
52
|
value.each do |rule|
|
47
53
|
if path =~ rule
|
48
|
-
|
54
|
+
debug "matched Disallow: #{rule.inspect}"
|
49
55
|
allowed = false
|
50
56
|
end
|
51
57
|
end
|
@@ -56,10 +62,10 @@ class Robots
|
|
56
62
|
|
57
63
|
@allows.each do |key, value|
|
58
64
|
if user_agent =~ key
|
59
|
-
|
65
|
+
debug "matched #{key.inspect}"
|
60
66
|
value.each do |rule|
|
61
67
|
if path =~ rule
|
62
|
-
|
68
|
+
debug "matched Allow: #{rule.inspect}"
|
63
69
|
return true
|
64
70
|
end
|
65
71
|
end
|
@@ -100,15 +106,4 @@ class Robots
|
|
100
106
|
@parsed[host] ||= ParsedRobots.new(uri)
|
101
107
|
@parsed[host].other_values
|
102
108
|
end
|
103
|
-
end
|
104
|
-
|
105
|
-
if __FILE__ == $0
|
106
|
-
require "test/unit"
|
107
|
-
class RobotsTest < Test::Unit::TestCase
|
108
|
-
def test_robots
|
109
|
-
robots = Robots.new "Ruby-Robot.txt Parser Test Script"
|
110
|
-
assert robots.allowed?("http://www.yelp.com/foo")
|
111
|
-
assert !robots.allowed?("http://www.yelp.com/mail?foo=bar")
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
109
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fizx-robots
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kyle Maxwell
|
@@ -11,8 +11,16 @@ cert_chain: []
|
|
11
11
|
|
12
12
|
date: 2008-08-10 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: fizx-loggable
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.0.0
|
23
|
+
version:
|
16
24
|
description: It parses robots.txt files
|
17
25
|
email: kyle@kylemaxwell.com
|
18
26
|
executables: []
|