robotx 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/lib/robotx.rb +25 -8
  4. data/robotx.gemspec +1 -1
  5. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 161a4310d0e1b28e499ce5dd6226125c6e345dd6
4
- data.tar.gz: 2c33050af6edcdc516611e7eb8e1efc5a497ecf5
3
+ metadata.gz: 38247d7fb4545eed6e9a78ba05a9499fec08238c
4
+ data.tar.gz: 8042484caaa6979bd9a4eb6e7c704d493aac06d1
5
5
  SHA512:
6
- metadata.gz: 6dc47d5c31e4629bb462ed353e31ec5e2b5b98fbf2a56363d87c8e9c9a8ed5a611341d88268f29b66b5b268acf4dce8e7766b7be0d6f189f1696544602d86d89
7
- data.tar.gz: b939d2cf78e12054a92f8693ad35e5cc55efe75098df8b2e0c76c4347e3ad3763a15812a51968eba40c72b849c51b5373abddc9f0dfd92d49c3c2b1b85d59e84
6
+ metadata.gz: f0e97890f42ccaa6e24aa194f34088252df7861290cca8bd2d8acc4266e57f12fe419c660f3491895d02530aee36b6f57d42177f0befab6761b960f6bac2dd71
7
+ data.tar.gz: 9c73d05ad22f458c05bed28062aa65914c45b0c9dc90400e66963e621c53de22d135c1393a483f7bb9152bc157187ffba7307414907a4cb2d5b67f8f89a9d5ae
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- robotx (0.1.0)
4
+ robotx (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -77,24 +77,24 @@ private
77
77
  hash[agent] ||= {}
78
78
  when 'allow'
79
79
  hash[agent]['allow'] ||= []
80
- hash[agent]['allow'] << value.sub(/(\/){2,}$/, '')
80
+ hash[agent]['allow'] << strip_slashes(value)
81
81
  when 'disallow'
82
- # Disallow: '' means Allow: '/'
82
+ # Disallow: '' equals Allow: '/'
83
83
  if value.empty?
84
84
  hash[agent]['allow'] ||= []
85
85
  hash[agent]['allow'] << '/'
86
86
  else
87
87
  hash[agent]['disallow'] ||= []
88
- hash[agent]['disallow'] << value.sub(/(\/){2,}$/, '')
88
+ hash[agent]['disallow'] << strip_slashes(value)
89
89
  end
90
90
  when 'crawl-delay'
91
91
  hash[agent]['crawl-delay'] = value.to_i
92
92
  when 'sitemap'
93
93
  hash['sitemap'] ||= []
94
- hash['sitemap'] << value.sub(/(\/){2,}$/, '')
94
+ hash['sitemap'] << strip_slashes(value).sub(/\/*$/, '')
95
95
  else
96
96
  hash[key] ||= []
97
- hash[key] << value.sub(/(\/){2,}$/, '')
97
+ hash[key] << strip_slashes(value)
98
98
  end
99
99
  end
100
100
  end
@@ -102,13 +102,30 @@ private
102
102
  {}
103
103
  end
104
104
 
105
+ def strip_slashes(value='')
106
+ return value.sub(/\/*$/, '/')
107
+ end
108
+
109
+ def regex_value(value='')
110
+ return strip_slashes(value).gsub(/\*/,'.*').gsub(/\?/,'\?')
111
+ end
112
+
113
+ def disallow_regex
114
+ disallow_data = @robots_data.fetch(@user_agent, {}).fetch('disallow', [])
115
+ @disallow_regex ||= disallow_data.empty? ? nil : Regexp.compile(disallow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
116
+ end
117
+
118
+ def allow_regex
119
+ allow_data = @robots_data.fetch(@user_agent, {}).fetch('allow', [])
120
+ @allow_regex ||= allow_data.empty? ? nil : Regexp.compile(allow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
121
+ end
122
+
105
123
  def check_permission(uri)
106
124
  uri = URI.parse(URI.encode(uri))
107
125
  return true unless (@robots_data or @robots_data.any?) or (uri.scheme and uri.host)
108
126
 
109
- uri_path = uri.path.sub(/(\/){2,}$/, '')
110
- pattern = Regexp.compile("(^#{Regexp.escape(uri_path)}[\/]*$)|(^/$)")
111
- return (@robots_data.fetch(@user_agent, {}).fetch('disallow', []).grep(pattern).empty? or @robots_data.fetch(@user_agent, {}).fetch('allow', []).grep(pattern).any?)
127
+ uri_path = strip_slashes(uri.path)
128
+ return (!!!(uri_path =~ disallow_regex) or !!(uri_path =~ allow_regex))
112
129
  end
113
130
 
114
131
  end
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "robotx"
7
- spec.version = "0.1.0"
7
+ spec.version = "0.1.1"
8
8
  spec.authors = ["Matthias Kalb"]
9
9
  spec.email = ["matthias.kalb@railsmechanic.de"]
10
10
  spec.summary = %q{A parser for the robots.txt file}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: robotx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthias Kalb
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-04 00:00:00.000000000 Z
11
+ date: 2014-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler