robotx 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/robotx.rb +25 -8
- data/robotx.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38247d7fb4545eed6e9a78ba05a9499fec08238c
|
4
|
+
data.tar.gz: 8042484caaa6979bd9a4eb6e7c704d493aac06d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0e97890f42ccaa6e24aa194f34088252df7861290cca8bd2d8acc4266e57f12fe419c660f3491895d02530aee36b6f57d42177f0befab6761b960f6bac2dd71
|
7
|
+
data.tar.gz: 9c73d05ad22f458c05bed28062aa65914c45b0c9dc90400e66963e621c53de22d135c1393a483f7bb9152bc157187ffba7307414907a4cb2d5b67f8f89a9d5ae
|
data/Gemfile.lock
CHANGED
data/lib/robotx.rb
CHANGED
@@ -77,24 +77,24 @@ private
|
|
77
77
|
hash[agent] ||= {}
|
78
78
|
when 'allow'
|
79
79
|
hash[agent]['allow'] ||= []
|
80
|
-
hash[agent]['allow'] << value
|
80
|
+
hash[agent]['allow'] << strip_slashes(value)
|
81
81
|
when 'disallow'
|
82
|
-
# Disallow: ''
|
82
|
+
# Disallow: '' equals Allow: '/'
|
83
83
|
if value.empty?
|
84
84
|
hash[agent]['allow'] ||= []
|
85
85
|
hash[agent]['allow'] << '/'
|
86
86
|
else
|
87
87
|
hash[agent]['disallow'] ||= []
|
88
|
-
hash[agent]['disallow'] << value
|
88
|
+
hash[agent]['disallow'] << strip_slashes(value)
|
89
89
|
end
|
90
90
|
when 'crawl-delay'
|
91
91
|
hash[agent]['crawl-delay'] = value.to_i
|
92
92
|
when 'sitemap'
|
93
93
|
hash['sitemap'] ||= []
|
94
|
-
hash['sitemap'] << value.sub(
|
94
|
+
hash['sitemap'] << strip_slashes(value).sub(/\/*$/, '')
|
95
95
|
else
|
96
96
|
hash[key] ||= []
|
97
|
-
hash[key] << value
|
97
|
+
hash[key] << strip_slashes(value)
|
98
98
|
end
|
99
99
|
end
|
100
100
|
end
|
@@ -102,13 +102,30 @@ private
|
|
102
102
|
{}
|
103
103
|
end
|
104
104
|
|
105
|
+
def strip_slashes(value='')
|
106
|
+
return value.sub(/\/*$/, '/')
|
107
|
+
end
|
108
|
+
|
109
|
+
def regex_value(value='')
|
110
|
+
return strip_slashes(value).gsub(/\*/,'.*').gsub(/\?/,'\?')
|
111
|
+
end
|
112
|
+
|
113
|
+
def disallow_regex
|
114
|
+
disallow_data = @robots_data.fetch(@user_agent, {}).fetch('disallow', [])
|
115
|
+
@disallow_regex ||= disallow_data.empty? ? nil : Regexp.compile(disallow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
|
116
|
+
end
|
117
|
+
|
118
|
+
def allow_regex
|
119
|
+
allow_data = @robots_data.fetch(@user_agent, {}).fetch('allow', [])
|
120
|
+
@allow_regex ||= allow_data.empty? ? nil : Regexp.compile(allow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
|
121
|
+
end
|
122
|
+
|
105
123
|
def check_permission(uri)
|
106
124
|
uri = URI.parse(URI.encode(uri))
|
107
125
|
return true unless (@robots_data or @robots_data.any?) or (uri.scheme and uri.host)
|
108
126
|
|
109
|
-
uri_path = uri.path
|
110
|
-
|
111
|
-
return (@robots_data.fetch(@user_agent, {}).fetch('disallow', []).grep(pattern).empty? or @robots_data.fetch(@user_agent, {}).fetch('allow', []).grep(pattern).any?)
|
127
|
+
uri_path = strip_slashes(uri.path)
|
128
|
+
return (!!!(uri_path =~ disallow_regex) or !!(uri_path =~ allow_regex))
|
112
129
|
end
|
113
130
|
|
114
131
|
end
|
data/robotx.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "robotx"
|
7
|
-
spec.version = "0.1.
|
7
|
+
spec.version = "0.1.1"
|
8
8
|
spec.authors = ["Matthias Kalb"]
|
9
9
|
spec.email = ["matthias.kalb@railsmechanic.de"]
|
10
10
|
spec.summary = %q{A parser for the robots.txt file}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: robotx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Matthias Kalb
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|