krawler 1.0.12 → 1.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/krawl +2 -2
- data/lib/krawler/version.rb +1 -1
- data/lib/krawler.rb +1 -1
- metadata +1 -1
data/bin/krawl
CHANGED
@@ -22,11 +22,11 @@ optparse = OptionParser.new do |opts|
|
|
22
22
|
end
|
23
23
|
|
24
24
|
opts.on('-d', '--ignore-domain', 'Ignore domain restrictions', 'Default: false') do |d|
|
25
|
-
options[:
|
25
|
+
options[:d] = true
|
26
26
|
end
|
27
27
|
|
28
28
|
opts.on('-c', '--concurrent count', 'Crawl with count number of concurrent connections', 'Default: 4') do |c|
|
29
|
-
options[:
|
29
|
+
options[:c] = c.to_i
|
30
30
|
end
|
31
31
|
|
32
32
|
opts.on('-r', '--randomize', 'Randomize crawl path', 'Default: true') do |r|
|
data/lib/krawler/version.rb
CHANGED
data/lib/krawler.rb
CHANGED
@@ -127,7 +127,7 @@ module Krawler
|
|
127
127
|
rescue ArgumentError # junk link
|
128
128
|
next
|
129
129
|
end
|
130
|
-
|
130
|
+
|
131
131
|
if @domain || (new_link =~ /^#{Regexp.escape(@host)}/) || (new_link =~ /^\//) # don't crawl external domains
|
132
132
|
|
133
133
|
next if @crawled_links.include?(new_link) || @links_to_crawl.include?(new_link) # don't crawl what we've alread crawled
|