robotx 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile.lock +1 -1
  3. data/lib/robotx.rb +25 -8
  4. data/robotx.gemspec +1 -1
  5. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 161a4310d0e1b28e499ce5dd6226125c6e345dd6
4
- data.tar.gz: 2c33050af6edcdc516611e7eb8e1efc5a497ecf5
3
+ metadata.gz: 38247d7fb4545eed6e9a78ba05a9499fec08238c
4
+ data.tar.gz: 8042484caaa6979bd9a4eb6e7c704d493aac06d1
5
5
  SHA512:
6
- metadata.gz: 6dc47d5c31e4629bb462ed353e31ec5e2b5b98fbf2a56363d87c8e9c9a8ed5a611341d88268f29b66b5b268acf4dce8e7766b7be0d6f189f1696544602d86d89
7
- data.tar.gz: b939d2cf78e12054a92f8693ad35e5cc55efe75098df8b2e0c76c4347e3ad3763a15812a51968eba40c72b849c51b5373abddc9f0dfd92d49c3c2b1b85d59e84
6
+ metadata.gz: f0e97890f42ccaa6e24aa194f34088252df7861290cca8bd2d8acc4266e57f12fe419c660f3491895d02530aee36b6f57d42177f0befab6761b960f6bac2dd71
7
+ data.tar.gz: 9c73d05ad22f458c05bed28062aa65914c45b0c9dc90400e66963e621c53de22d135c1393a483f7bb9152bc157187ffba7307414907a4cb2d5b67f8f89a9d5ae
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- robotx (0.1.0)
4
+ robotx (0.1.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -77,24 +77,24 @@ private
77
77
  hash[agent] ||= {}
78
78
  when 'allow'
79
79
  hash[agent]['allow'] ||= []
80
- hash[agent]['allow'] << value.sub(/(\/){2,}$/, '')
80
+ hash[agent]['allow'] << strip_slashes(value)
81
81
  when 'disallow'
82
- # Disallow: '' means Allow: '/'
82
+ # Disallow: '' equals Allow: '/'
83
83
  if value.empty?
84
84
  hash[agent]['allow'] ||= []
85
85
  hash[agent]['allow'] << '/'
86
86
  else
87
87
  hash[agent]['disallow'] ||= []
88
- hash[agent]['disallow'] << value.sub(/(\/){2,}$/, '')
88
+ hash[agent]['disallow'] << strip_slashes(value)
89
89
  end
90
90
  when 'crawl-delay'
91
91
  hash[agent]['crawl-delay'] = value.to_i
92
92
  when 'sitemap'
93
93
  hash['sitemap'] ||= []
94
- hash['sitemap'] << value.sub(/(\/){2,}$/, '')
94
+ hash['sitemap'] << strip_slashes(value).sub(/\/*$/, '')
95
95
  else
96
96
  hash[key] ||= []
97
- hash[key] << value.sub(/(\/){2,}$/, '')
97
+ hash[key] << strip_slashes(value)
98
98
  end
99
99
  end
100
100
  end
@@ -102,13 +102,30 @@ private
102
102
  {}
103
103
  end
104
104
 
105
+ def strip_slashes(value='')
106
+ return value.sub(/\/*$/, '/')
107
+ end
108
+
109
+ def regex_value(value='')
110
+ return strip_slashes(value).gsub(/\*/,'.*').gsub(/\?/,'\?')
111
+ end
112
+
113
+ def disallow_regex
114
+ disallow_data = @robots_data.fetch(@user_agent, {}).fetch('disallow', [])
115
+ @disallow_regex ||= disallow_data.empty? ? nil : Regexp.compile(disallow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
116
+ end
117
+
118
+ def allow_regex
119
+ allow_data = @robots_data.fetch(@user_agent, {}).fetch('allow', [])
120
+ @allow_regex ||= allow_data.empty? ? nil : Regexp.compile(allow_data.map { |uri| "^#{regex_value(uri)}" }.join("|"))
121
+ end
122
+
105
123
  def check_permission(uri)
106
124
  uri = URI.parse(URI.encode(uri))
107
125
  return true unless (@robots_data or @robots_data.any?) or (uri.scheme and uri.host)
108
126
 
109
- uri_path = uri.path.sub(/(\/){2,}$/, '')
110
- pattern = Regexp.compile("(^#{Regexp.escape(uri_path)}[\/]*$)|(^/$)")
111
- return (@robots_data.fetch(@user_agent, {}).fetch('disallow', []).grep(pattern).empty? or @robots_data.fetch(@user_agent, {}).fetch('allow', []).grep(pattern).any?)
127
+ uri_path = strip_slashes(uri.path)
128
+ return (!!!(uri_path =~ disallow_regex) or !!(uri_path =~ allow_regex))
112
129
  end
113
130
 
114
131
  end
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "robotx"
7
- spec.version = "0.1.0"
7
+ spec.version = "0.1.1"
8
8
  spec.authors = ["Matthias Kalb"]
9
9
  spec.email = ["matthias.kalb@railsmechanic.de"]
10
10
  spec.summary = %q{A parser for the robots.txt file}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: robotx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Matthias Kalb
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-04 00:00:00.000000000 Z
11
+ date: 2014-07-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler