krawler 1.0.12 → 1.0.13
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/krawl +2 -2
- data/lib/krawler/version.rb +1 -1
- data/lib/krawler.rb +1 -1
- metadata +1 -1
data/bin/krawl
CHANGED
@@ -22,11 +22,11 @@ optparse = OptionParser.new do |opts|
|
|
22
22
|
end
|
23
23
|
|
24
24
|
opts.on('-d', '--ignore-domain', 'Ignore domain restrictions', 'Default: false') do |d|
|
25
|
-
options[:
|
25
|
+
options[:d] = true
|
26
26
|
end
|
27
27
|
|
28
28
|
opts.on('-c', '--concurrent count', 'Crawl with count number of concurrent connections', 'Default: 4') do |c|
|
29
|
-
options[:
|
29
|
+
options[:c] = c.to_i
|
30
30
|
end
|
31
31
|
|
32
32
|
opts.on('-r', '--randomize', 'Randomize crawl path', 'Default: true') do |r|
|
data/lib/krawler/version.rb
CHANGED
data/lib/krawler.rb
CHANGED
@@ -127,7 +127,7 @@ module Krawler
|
|
127
127
|
rescue ArgumentError # junk link
|
128
128
|
next
|
129
129
|
end
|
130
|
-
|
130
|
+
|
131
131
|
if @domain || (new_link =~ /^#{Regexp.escape(@host)}/) || (new_link =~ /^\//) # don't crawl external domains
|
132
132
|
|
133
133
|
next if @crawled_links.include?(new_link) || @links_to_crawl.include?(new_link) # don't crawl what we've alread crawled
|