rcrawl 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +9 -7
- data/Rakefile +5 -5
- data/lib/rcrawl/crawler.rb +4 -0
- data/lib/rcrawl/process/html.rb +8 -0
- data/lib/rcrawl/robot_rules.rb +82 -0
- data/lib/rcrawl.rb +147 -144
- metadata +10 -6
- data/lib/robot_rules.rb +0 -79
data/README
CHANGED
@@ -9,21 +9,23 @@ The structure of the crawling process was inspired by the specs of the Mercator
|
|
9
9
|
== The Rcrawl process
|
10
10
|
1. Remove an absolute URL from the URL Server.
|
11
11
|
2. Download corresponding document from the internet, grabbing and processing robots.txt first, if available.
|
12
|
-
3. Feed the document into a
|
13
|
-
4.
|
12
|
+
3. Feed the document into a rewind input stream(ris) to be read/re-read as needed. Based on MIME type, invoke the process method of the
|
13
|
+
4. processing module associated with that MIME type. For example, a link extractor or tag counter module for text/html MIME types, or a gif stats module for image/gif. By default, all text/html MIME types will pass through the link extractor. Each link will be converted to an absolute URL and tested against a (ideally user-supplied) URL filter to determine if it should be downloaded.
|
14
14
|
5. If the URL passes the filter (currently hard coded as Same Domain?), then call the URL-seen? test.
|
15
|
-
6. Has the URL been seen before? Namely, is it in the URL Server
|
15
|
+
6. Has the URL been seen before? Namely, is it in the URL Server or has it been downloaded already? If the URL is new, it is added to the URL Server.
|
16
16
|
7. Back to step 1, repeat until the URL Server is empty.
|
17
17
|
|
18
18
|
== Examples
|
19
|
+
|
19
20
|
# Instantiate a new Rcrawl object
|
20
|
-
crawler = Rcrawl.new(url)
|
21
|
+
crawler = Rcrawl::Crawler.new(url)
|
21
22
|
|
22
23
|
|
23
24
|
# Begin the crawl process
|
24
25
|
crawler.crawl
|
25
26
|
|
26
27
|
== After the crawler is done crawling
|
28
|
+
|
27
29
|
# Returns an array of visited links
|
28
30
|
crawler.visited_links
|
29
31
|
|
@@ -42,8 +44,8 @@ The structure of the crawling process was inspired by the specs of the Mercator
|
|
42
44
|
crawler.external_links
|
43
45
|
|
44
46
|
== License
|
45
|
-
Copyright © 2006
|
47
|
+
Copyright © 2006 Digital Duckies, LLC, under MIT License
|
46
48
|
|
47
|
-
Developed for http://
|
49
|
+
Developed for http://digitalduckies.net
|
48
50
|
|
49
|
-
News, code, and documentation at http://
|
51
|
+
News, code, and documentation at http://digitalduckies.net
|
data/Rakefile
CHANGED
@@ -9,7 +9,7 @@ require 'rake/gempackagetask'
|
|
9
9
|
desc "Generate documentation"
|
10
10
|
Rake::RDocTask.new(:rdoc) do |rdoc|
|
11
11
|
rdoc.rdoc_dir = "rdoc"
|
12
|
-
rdoc.title = "
|
12
|
+
rdoc.title = "Rcrawl"
|
13
13
|
rdoc.options << "--line-numbers"
|
14
14
|
rdoc.options << "--inline-source"
|
15
15
|
rdoc.rdoc_files.include("README")
|
@@ -18,10 +18,10 @@ end
|
|
18
18
|
|
19
19
|
spec = Gem::Specification.new do |s|
|
20
20
|
s.name = "rcrawl"
|
21
|
-
s.version = "0.
|
22
|
-
s.author = "
|
23
|
-
s.email = "
|
24
|
-
s.homepage = "http://
|
21
|
+
s.version = "0.3.0"
|
22
|
+
s.author = "Digital Duckies"
|
23
|
+
s.email = "rcrawl@digitalduckies.net"
|
24
|
+
s.homepage = "http://digitalduckies.net"
|
25
25
|
s.platform = Gem::Platform::RUBY
|
26
26
|
s.summary = "A web crawler written in ruby"
|
27
27
|
s.files = FileList["{test,lib}/**/*", "README", "MIT-LICENSE", "Rakefile", "TODO"].to_a
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# robot_rules.rb
|
4
|
+
#
|
5
|
+
# Created by James Edward Gray II on 2006-01-31.
|
6
|
+
# Copyright 2006 Gray Productions. All rights reserved.
|
7
|
+
# Included with rcrawl by permission from James Edward Gray II
|
8
|
+
|
9
|
+
require "uri"
|
10
|
+
|
11
|
+
module Rcrawl
|
12
|
+
|
13
|
+
# Based on Perl's WWW::RobotRules module, by Gisle Aas.
|
14
|
+
class RobotRules
|
15
|
+
def initialize( user_agent )
|
16
|
+
@user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},
|
17
|
+
"").downcase
|
18
|
+
@rules = Hash.new { |rules, rule| rules[rule] = Array.new }
|
19
|
+
end
|
20
|
+
|
21
|
+
def parse( text_uri, robots_data )
|
22
|
+
uri = URI.parse(text_uri)
|
23
|
+
location = "#{uri.host}:#{uri.port}"
|
24
|
+
@rules.delete(location)
|
25
|
+
|
26
|
+
rules = robots_data.split(/[\015\012]+/).
|
27
|
+
map { |rule| rule.sub(/\s*#.*$/, "") }
|
28
|
+
anon_rules = Array.new
|
29
|
+
my_rules = Array.new
|
30
|
+
current = anon_rules
|
31
|
+
rules.each do |rule|
|
32
|
+
case rule
|
33
|
+
when /^\s*User-Agent\s*:\s*(.+?)\s*$/i
|
34
|
+
break unless my_rules.empty?
|
35
|
+
|
36
|
+
current = if $1 == "*"
|
37
|
+
anon_rules
|
38
|
+
elsif $1.downcase.index(@user_agent)
|
39
|
+
my_rules
|
40
|
+
else
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
when /^\s*Disallow\s*:\s*(.*?)\s*$/i
|
44
|
+
next if current.nil?
|
45
|
+
|
46
|
+
if $1.empty?
|
47
|
+
current << nil
|
48
|
+
else
|
49
|
+
disallow = URI.parse($1)
|
50
|
+
|
51
|
+
next unless disallow.scheme.nil? or disallow.scheme == uri.scheme
|
52
|
+
next unless disallow.port.nil? or disallow.port == uri.port
|
53
|
+
next unless disallow.host.nil? or
|
54
|
+
disallow.host.downcase == uri.host.downcase
|
55
|
+
|
56
|
+
disallow = disallow.path
|
57
|
+
disallow = "/" if disallow.empty?
|
58
|
+
disallow = "/#{disallow}" unless disallow[0] == ?/
|
59
|
+
|
60
|
+
current << disallow
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
@rules[location] = if my_rules.empty?
|
66
|
+
anon_rules.compact
|
67
|
+
else
|
68
|
+
my_rules.compact
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def allowed?( text_uri )
|
73
|
+
uri = URI.parse(text_uri)
|
74
|
+
location = "#{uri.host}:#{uri.port}"
|
75
|
+
path = uri.path
|
76
|
+
|
77
|
+
return true unless %w{http https}.include?(uri.scheme)
|
78
|
+
|
79
|
+
not @rules[location].any? { |rule| path.index(rule) == 0 }
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
data/lib/rcrawl.rb
CHANGED
@@ -1,178 +1,181 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
# rcrawl/0.2.0
|
3
2
|
|
4
3
|
require 'rubygems'
|
5
4
|
require 'open-uri'
|
6
5
|
require 'scrapi'
|
7
|
-
require 'robot_rules'
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
def crawl
|
29
|
-
until @links_to_visit.empty? do
|
30
|
-
begin
|
31
|
-
# Get link
|
32
|
-
url_server
|
33
|
-
next unless robot_safe? @url
|
34
|
-
# Parse robots.txt, then download document if robot_safe
|
35
|
-
fetch_http(@url)
|
36
|
-
# Store raw HTML in variable to read/reread as needed
|
37
|
-
# Then call any processing modules you need for the current document
|
38
|
-
ris(@document)
|
39
|
-
rescue
|
40
|
-
puts ""
|
41
|
-
puts "I died on #{@url}"
|
42
|
-
$stderr.puts $!
|
43
|
-
@errors[@url] = $!
|
44
|
-
next
|
45
|
-
ensure
|
46
|
-
# Stuff you want to make sure gets printed out
|
47
|
-
puts " done!"
|
48
|
-
end
|
6
|
+
require 'rcrawl/robot_rules'
|
7
|
+
require 'rcrawl/process/html'
|
8
|
+
|
9
|
+
module Rcrawl
|
10
|
+
|
11
|
+
# Crawler will retrieve an entire website, one page at a time,
|
12
|
+
# parsing the page using whatever modules you pass it to.
|
13
|
+
class Crawler
|
14
|
+
|
15
|
+
# Initializes various variables when a new Rcrawl object is instantiated
|
16
|
+
def initialize(site)
|
17
|
+
@links_to_visit = Array.new
|
18
|
+
@visited_links = Array.new
|
19
|
+
@external_links = Array.new
|
20
|
+
@raw_html = Hash.new
|
21
|
+
@rules = RobotRules.new("Rcrawl")
|
22
|
+
@sites = Hash.new
|
23
|
+
@site = URI.parse(site)
|
24
|
+
@links_to_visit << site
|
25
|
+
@errors = Hash.new
|
26
|
+
puts "Site is #{site}"
|
49
27
|
end
|
50
28
|
|
51
|
-
|
52
|
-
|
29
|
+
# Coordinates the whole crawling process
|
30
|
+
def crawl
|
31
|
+
until @links_to_visit.empty? do
|
32
|
+
begin
|
33
|
+
# Get link
|
34
|
+
url_server
|
35
|
+
next unless robot_safe? @url
|
36
|
+
# Parse robots.txt, then download document if robot_safe
|
37
|
+
fetch_http(@url)
|
38
|
+
# Store raw HTML in variable to read/reread as needed
|
39
|
+
# Then call any processing modules you need for the current document
|
40
|
+
ris(@document)
|
41
|
+
rescue
|
42
|
+
puts ""
|
43
|
+
puts "I died on #{@url}"
|
44
|
+
$stderr.puts $!
|
45
|
+
@errors[@url] = $!
|
46
|
+
next
|
47
|
+
ensure
|
48
|
+
# Stuff you want to make sure gets printed out
|
49
|
+
puts " done!"
|
50
|
+
end
|
51
|
+
end
|
53
52
|
|
54
|
-
|
55
|
-
def url_server
|
56
|
-
unless @links_to_visit.empty?
|
57
|
-
@url = @links_to_visit.pop
|
53
|
+
puts "Visited #{@visited_links.size} links."
|
58
54
|
end
|
59
|
-
end
|
60
55
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
print "Visiting: #{url}"
|
67
|
-
@document = uri.read
|
68
|
-
@visited_links << url
|
69
|
-
end
|
70
|
-
|
71
|
-
# Rewind Input Stream, for storing and reading of raw HTML
|
72
|
-
def ris(document)
|
73
|
-
print "."
|
74
|
-
# Store raw HTML into local variable
|
75
|
-
# Based on MIME type, invoke the proper processing modules
|
76
|
-
case document.content_type
|
77
|
-
when "text/html"
|
78
|
-
link_extractor(document)
|
79
|
-
process_html(document)
|
80
|
-
else
|
81
|
-
print "... not HTML, skipping..."
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
# HTML processing module for extracting links
|
86
|
-
def link_extractor(document)
|
87
|
-
print "."
|
88
|
-
|
89
|
-
# Parse all links from HTML into an array
|
90
|
-
# Set up the scrAPI (http://labnotes.org)
|
91
|
-
links = Scraper.define do
|
92
|
-
array :urls
|
93
|
-
process "a[href]", :urls => "@href"
|
94
|
-
result :urls
|
56
|
+
# Authoritative list of URLs to be processed by Rcrawl
|
57
|
+
def url_server
|
58
|
+
unless @links_to_visit.empty?
|
59
|
+
@url = @links_to_visit.pop
|
60
|
+
end
|
95
61
|
end
|
96
|
-
|
97
|
-
urls = links.scrape(document)
|
98
62
|
|
99
|
-
|
63
|
+
# Download the document
|
64
|
+
def fetch_http(url)
|
65
|
+
# Make sure robots.txt has been parsed for this site first,
|
66
|
+
# if not, parse robots.txt then grab document.
|
100
67
|
uri = URI.parse(url)
|
68
|
+
print "Visiting: #{url}"
|
69
|
+
@document = uri.read
|
70
|
+
@visited_links << url
|
71
|
+
end
|
101
72
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
73
|
+
# Rewind Input Stream, for storing and reading of raw HTML
|
74
|
+
def ris(document)
|
75
|
+
print "."
|
76
|
+
# Store raw HTML into local variable
|
77
|
+
# Based on MIME type, invoke the proper processing modules
|
78
|
+
case document.content_type
|
79
|
+
when "text/html"
|
80
|
+
link_extractor(document)
|
81
|
+
process_html(document)
|
82
|
+
else
|
83
|
+
print "... not HTML, skipping..."
|
106
84
|
end
|
85
|
+
end
|
107
86
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
@external_links.uniq!
|
112
|
-
next
|
113
|
-
end
|
87
|
+
# HTML processing module for extracting links
|
88
|
+
def link_extractor(document)
|
89
|
+
print "."
|
114
90
|
|
115
|
-
#
|
116
|
-
|
117
|
-
|
91
|
+
# Parse all links from HTML into an array
|
92
|
+
# Set up the scrAPI (http://labnotes.org)
|
93
|
+
links = Scraper.define do
|
94
|
+
array :urls
|
95
|
+
process "a[href]", :urls => "@href"
|
96
|
+
result :urls
|
118
97
|
end
|
98
|
+
|
99
|
+
urls = links.scrape(document)
|
119
100
|
|
120
|
-
|
121
|
-
|
101
|
+
urls.each { |url|
|
102
|
+
uri = URI.parse(url)
|
122
103
|
|
123
|
-
|
104
|
+
# Derelativeize links if necessary
|
105
|
+
if uri.relative?
|
106
|
+
url = @site.merge(url).to_s
|
107
|
+
uri = URI.parse(url)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Check domain, if in same domain, keep link, else trash it
|
111
|
+
if uri.host != @site.host
|
112
|
+
@external_links << url
|
113
|
+
@external_links.uniq!
|
114
|
+
next
|
115
|
+
end
|
116
|
+
|
117
|
+
# Find out if we've seen this link already
|
118
|
+
if (@visited_links.include? url) || (@links_to_visit.include? url)
|
119
|
+
next
|
120
|
+
end
|
121
|
+
|
122
|
+
@links_to_visit << url
|
123
|
+
}
|
124
124
|
|
125
|
-
# HTML processing module for raw HTML storage
|
126
|
-
def process_html(document)
|
127
|
-
# Add link and raw HTML to a hash as key/value
|
128
|
-
# for later storage in database
|
129
|
-
unless @raw_html.has_value?(document)
|
130
|
-
print "."
|
131
|
-
@raw_html[document.base_uri] = document
|
132
125
|
end
|
133
|
-
end
|
134
126
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
127
|
+
# HTML processing module for raw HTML storage
|
128
|
+
def process_html(document)
|
129
|
+
# Add link and raw HTML to a hash as key/value
|
130
|
+
# for later storage in database
|
131
|
+
unless @raw_html.has_value?(document)
|
132
|
+
print "."
|
133
|
+
@raw_html[document.base_uri] = document
|
134
|
+
end
|
135
|
+
end
|
139
136
|
|
140
|
-
|
137
|
+
# robots.txt parsing
|
138
|
+
def robot_safe?(url)
|
139
|
+
uri = URI.parse(url)
|
140
|
+
location = "#{uri.host}:#{uri.port}"
|
141
141
|
|
142
|
-
|
143
|
-
@sites[location] = true
|
142
|
+
return true unless %w{http https}.include?(uri.scheme)
|
144
143
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
144
|
+
unless @sites.include? location
|
145
|
+
@sites[location] = true
|
146
|
+
|
147
|
+
robot_url = "http://#{location}/robots.txt"
|
148
|
+
begin
|
149
|
+
robot_file = open(robot_url) { |page| page.read }
|
150
|
+
rescue
|
151
|
+
return true
|
152
|
+
end
|
153
|
+
@rules.parse(robot_url, robot_file)
|
150
154
|
end
|
151
|
-
|
155
|
+
|
156
|
+
@rules.allowed? url
|
152
157
|
end
|
153
158
|
|
154
|
-
|
155
|
-
|
159
|
+
# Returns array of links visited during crawl
|
160
|
+
def visited_links
|
161
|
+
return @visited_links
|
162
|
+
end
|
156
163
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
164
|
+
# Returns array of external links
|
165
|
+
def external_links
|
166
|
+
return @external_links
|
167
|
+
end
|
168
|
+
|
169
|
+
# Returns a hash where {key => URL, value => HTML} from all pages crawled
|
170
|
+
def dump
|
171
|
+
return @raw_html
|
172
|
+
end
|
161
173
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
# Returns a hash where {key => URL, value => HTML} from all pages crawled
|
168
|
-
def dump
|
169
|
-
return @raw_html
|
170
|
-
end
|
174
|
+
# Returns a hash where {key => URL, value => "Error message"} from any
|
175
|
+
# errors encountered during the crawl
|
176
|
+
def errors
|
177
|
+
return @errors
|
178
|
+
end
|
171
179
|
|
172
|
-
# Returns a hash where {key => URL, value => "Error message"} from any
|
173
|
-
# errors encountered during the crawl
|
174
|
-
def errors
|
175
|
-
return @errors
|
176
180
|
end
|
177
|
-
|
178
181
|
end
|
metadata
CHANGED
@@ -3,13 +3,13 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: rcrawl
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.3.0
|
7
|
+
date: 2006-09-22 00:00:00 -05:00
|
8
8
|
summary: A web crawler written in ruby
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
|
-
email:
|
12
|
-
homepage: http://
|
11
|
+
email: rcrawl@digitalduckies.net
|
12
|
+
homepage: http://digitalduckies.net
|
13
13
|
rubyforge_project: rcrawl
|
14
14
|
description:
|
15
15
|
autorequire: rcrawl.rb
|
@@ -27,10 +27,14 @@ signing_key:
|
|
27
27
|
cert_chain:
|
28
28
|
post_install_message:
|
29
29
|
authors:
|
30
|
-
-
|
30
|
+
- Digital Duckies
|
31
31
|
files:
|
32
|
-
- lib/robot_rules.rb
|
33
32
|
- lib/rcrawl.rb
|
33
|
+
- lib/rcrawl
|
34
|
+
- lib/rcrawl/crawler.rb
|
35
|
+
- lib/rcrawl/robot_rules.rb
|
36
|
+
- lib/rcrawl/process
|
37
|
+
- lib/rcrawl/process/html.rb
|
34
38
|
- README
|
35
39
|
- MIT-LICENSE
|
36
40
|
- Rakefile
|
data/lib/robot_rules.rb
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# robot_rules.rb
|
4
|
-
#
|
5
|
-
# Created by James Edward Gray II on 2006-01-31.
|
6
|
-
# Copyright 2006 Gray Productions. All rights reserved.
|
7
|
-
# Included with rcrawl by permission from James Edward Gray II
|
8
|
-
|
9
|
-
require "uri"
|
10
|
-
|
11
|
-
# Based on Perl's WWW::RobotRules module, by Gisle Aas.
|
12
|
-
class RobotRules
|
13
|
-
def initialize( user_agent )
|
14
|
-
@user_agent = user_agent.scan(/\S+/).first.sub(%r{/.*},
|
15
|
-
"").downcase
|
16
|
-
@rules = Hash.new { |rules, rule| rules[rule] = Array.new }
|
17
|
-
end
|
18
|
-
|
19
|
-
def parse( text_uri, robots_data )
|
20
|
-
uri = URI.parse(text_uri)
|
21
|
-
location = "#{uri.host}:#{uri.port}"
|
22
|
-
@rules.delete(location)
|
23
|
-
|
24
|
-
rules = robots_data.split(/[\015\012]+/).
|
25
|
-
map { |rule| rule.sub(/\s*#.*$/, "") }
|
26
|
-
anon_rules = Array.new
|
27
|
-
my_rules = Array.new
|
28
|
-
current = anon_rules
|
29
|
-
rules.each do |rule|
|
30
|
-
case rule
|
31
|
-
when /^\s*User-Agent\s*:\s*(.+?)\s*$/i
|
32
|
-
break unless my_rules.empty?
|
33
|
-
|
34
|
-
current = if $1 == "*"
|
35
|
-
anon_rules
|
36
|
-
elsif $1.downcase.index(@user_agent)
|
37
|
-
my_rules
|
38
|
-
else
|
39
|
-
nil
|
40
|
-
end
|
41
|
-
when /^\s*Disallow\s*:\s*(.*?)\s*$/i
|
42
|
-
next if current.nil?
|
43
|
-
|
44
|
-
if $1.empty?
|
45
|
-
current << nil
|
46
|
-
else
|
47
|
-
disallow = URI.parse($1)
|
48
|
-
|
49
|
-
next unless disallow.scheme.nil? or disallow.scheme == uri.scheme
|
50
|
-
next unless disallow.port.nil? or disallow.port == uri.port
|
51
|
-
next unless disallow.host.nil? or
|
52
|
-
disallow.host.downcase == uri.host.downcase
|
53
|
-
|
54
|
-
disallow = disallow.path
|
55
|
-
disallow = "/" if disallow.empty?
|
56
|
-
disallow = "/#{disallow}" unless disallow[0] == ?/
|
57
|
-
|
58
|
-
current << disallow
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
@rules[location] = if my_rules.empty?
|
64
|
-
anon_rules.compact
|
65
|
-
else
|
66
|
-
my_rules.compact
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def allowed?( text_uri )
|
71
|
-
uri = URI.parse(text_uri)
|
72
|
-
location = "#{uri.host}:#{uri.port}"
|
73
|
-
path = uri.path
|
74
|
-
|
75
|
-
return true unless %w{http https}.include?(uri.scheme)
|
76
|
-
|
77
|
-
not @rules[location].any? { |rule| path.index(rule) == 0 }
|
78
|
-
end
|
79
|
-
end
|