fizx-robots 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/robots.rb +12 -17
- metadata +11 -3
data/lib/robots.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
require "open-uri"
|
2
2
|
require "uri"
|
3
|
+
require "rubygems"
|
4
|
+
require "loggable"
|
3
5
|
class Robots
|
6
|
+
include Loggable
|
7
|
+
|
4
8
|
class ParsedRobots
|
9
|
+
include Loggable
|
10
|
+
|
5
11
|
def initialize(uri)
|
6
12
|
io = open(URI.join(uri.to_s, "/robots.txt")) rescue nil
|
7
13
|
if !io || io.content_type != "text/plain" || io.status != ["200", "OK"]
|
@@ -38,14 +44,14 @@ class Robots
|
|
38
44
|
return true unless @parsed
|
39
45
|
allowed = true
|
40
46
|
path = uri.request_uri
|
41
|
-
|
47
|
+
debug "path: #{path}"
|
42
48
|
|
43
49
|
@disallows.each do |key, value|
|
44
50
|
if user_agent =~ key
|
45
|
-
|
51
|
+
debug "matched #{key.inspect}"
|
46
52
|
value.each do |rule|
|
47
53
|
if path =~ rule
|
48
|
-
|
54
|
+
debug "matched Disallow: #{rule.inspect}"
|
49
55
|
allowed = false
|
50
56
|
end
|
51
57
|
end
|
@@ -56,10 +62,10 @@ class Robots
|
|
56
62
|
|
57
63
|
@allows.each do |key, value|
|
58
64
|
if user_agent =~ key
|
59
|
-
|
65
|
+
debug "matched #{key.inspect}"
|
60
66
|
value.each do |rule|
|
61
67
|
if path =~ rule
|
62
|
-
|
68
|
+
debug "matched Allow: #{rule.inspect}"
|
63
69
|
return true
|
64
70
|
end
|
65
71
|
end
|
@@ -100,15 +106,4 @@ class Robots
|
|
100
106
|
@parsed[host] ||= ParsedRobots.new(uri)
|
101
107
|
@parsed[host].other_values
|
102
108
|
end
|
103
|
-
end
|
104
|
-
|
105
|
-
if __FILE__ == $0
|
106
|
-
require "test/unit"
|
107
|
-
class RobotsTest < Test::Unit::TestCase
|
108
|
-
def test_robots
|
109
|
-
robots = Robots.new "Ruby-Robot.txt Parser Test Script"
|
110
|
-
assert robots.allowed?("http://www.yelp.com/foo")
|
111
|
-
assert !robots.allowed?("http://www.yelp.com/mail?foo=bar")
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
109
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fizx-robots
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kyle Maxwell
|
@@ -11,8 +11,16 @@ cert_chain: []
|
|
11
11
|
|
12
12
|
date: 2008-08-10 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: fizx-loggable
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.0.0
|
23
|
+
version:
|
16
24
|
description: It parses robots.txt files
|
17
25
|
email: kyle@kylemaxwell.com
|
18
26
|
executables: []
|