rcrawl 0.4.5 → 0.4.6
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/rcrawl/crawler.rb +4 -0
- data/lib/rcrawl/version.rb +1 -1
- metadata +1 -1
data/Rakefile
CHANGED
data/lib/rcrawl/crawler.rb
CHANGED
@@ -27,6 +27,10 @@ module Rcrawl
|
|
27
27
|
# Get link
|
28
28
|
url_server
|
29
29
|
next unless robot_safe? @url
|
30
|
+
if @url.include? '#'
|
31
|
+
print "... Anchor link found, skipping..."
|
32
|
+
next
|
33
|
+
end
|
30
34
|
# Parse robots.txt, then download document if robot_safe
|
31
35
|
fetch_http(@url)
|
32
36
|
# Store raw HTML in variable to read/reread as needed
|
data/lib/rcrawl/version.rb
CHANGED