robotx 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/robotx.rb +25 -8
- data/robotx.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38247d7fb4545eed6e9a78ba05a9499fec08238c
|
4
|
+
data.tar.gz: 8042484caaa6979bd9a4eb6e7c704d493aac06d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0e97890f42ccaa6e24aa194f34088252df7861290cca8bd2d8acc4266e57f12fe419c660f3491895d02530aee36b6f57d42177f0befab6761b960f6bac2dd71
|
7
|
+
data.tar.gz: 9c73d05ad22f458c05bed28062aa65914c45b0c9dc90400e66963e621c53de22d135c1393a483f7bb9152bc157187ffba7307414907a4cb2d5b67f8f89a9d5ae
|
data/Gemfile.lock
CHANGED
data/lib/robotx.rb
CHANGED
@@ -77,24 +77,24 @@ private
|
|
77
77
|
hash[agent] ||= {}
|
78
78
|
when 'allow'
|
79
79
|
hash[agent]['allow'] ||= []
|
80
|
-
hash[agent]['allow'] << value
|
80
|
+
hash[agent]['allow'] << strip_slashes(value)
|
81
81
|
when 'disallow'
|
82
|
-
# Disallow: ''
|
82
|
+
# Disallow: '' equals Allow: '/'
|
83
83
|
if value.empty?
|
84
84
|
hash[agent]['allow'] ||= []
|
85
85
|
hash[agent]['allow'] << '/'
|
86
86
|
else
|
87
87
|
hash[agent]['disallow'] ||= []
|
88
|
-
hash[agent]['disallow'] << value
|
88
|
+
hash[agent]['disallow'] << strip_slashes(value)
|
89
89
|
end
|
90
90
|
when 'crawl-delay'
|
91
91
|
hash[agent]['crawl-delay'] = value.to_i
|
92
92
|
when 'sitemap'
|
93
93
|
hash['sitemap'] ||= []
|
94
|
-
hash['sitemap'] << value.sub(
|
94
|
+
hash['sitemap'] << strip_slashes(value).sub(/\/*$/, '')
|
95
95
|
else
|
96
96
|
hash[key] ||= []
|
97
|
-
hash[key] << value
|
97
|
+
hash[key] << strip_slashes(value)
|
98
98
|
end
|
99
99
|
end
|
100
100
|
end
|
@@ -102,13 +102,30 @@ private
|
|
102
102
|
{}
|
103
103
|
end
|
104
104
|
|
105
|
+
def strip_slashes(value='')
|
106
|
+
return value.sub(/\/*$/, '/')
|
107
|
+
end
|
108
|
+
|
109
|
+
def regex_value(value='')
|
110
|
+
return strip_slashes(value).gsub(/\*/,'.*').gsub(/\?/,'\?')
|
111
|
+
end
|
112
|
+
|
113
|
+
def disallow_regex
|
114
|
+
disallow_data = @robots_data.fetch(@user_agent, {}).fetch('disallow', [])
|
115
|
+
@disallow_regex ||= disallow_data.empty? ? nil : Regexp.compile(disallow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
|
116
|
+
end
|
117
|
+
|
118
|
+
def allow_regex
|
119
|
+
allow_data = @robots_data.fetch(@user_agent, {}).fetch('allow', [])
|
120
|
+
@allow_regex ||= allow_data.empty? ? nil : Regexp.compile(allow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
|
121
|
+
end
|
122
|
+
|
105
123
|
def check_permission(uri)
|
106
124
|
uri = URI.parse(URI.encode(uri))
|
107
125
|
return true unless (@robots_data or @robots_data.any?) or (uri.scheme and uri.host)
|
108
126
|
|
109
|
-
uri_path = uri.path
|
110
|
-
|
111
|
-
return (@robots_data.fetch(@user_agent, {}).fetch('disallow', []).grep(pattern).empty? or @robots_data.fetch(@user_agent, {}).fetch('allow', []).grep(pattern).any?)
|
127
|
+
uri_path = strip_slashes(uri.path)
|
128
|
+
return (!!!(uri_path =~ disallow_regex) or !!(uri_path =~ allow_regex))
|
112
129
|
end
|
113
130
|
|
114
131
|
end
|
data/robotx.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "robotx"
|
7
|
-
spec.version = "0.1.
|
7
|
+
spec.version = "0.1.1"
|
8
8
|
spec.authors = ["Matthias Kalb"]
|
9
9
|
spec.email = ["matthias.kalb@railsmechanic.de"]
|
10
10
|
spec.summary = %q{A parser for the robots.txt file}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: robotx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthias Kalb
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|