pantopoda 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pantopoda/version.rb +1 -1
- data/lib/pantopoda.rb +5 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0b37fa28e3130b06c5458f8e5e37695d340b5c7
|
4
|
+
data.tar.gz: c251c2f9d6235c29c913917f7bbf4366bfce7e5f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8a2675abf7fafe9c6e4f73bf0935d86d6f9469463d3a07b48afe34e25ea8a017a7dc239683917dae44a6f8c0c0cfa65b6b60fcec7409864096ae15450842efc
|
7
|
+
data.tar.gz: 20c6c6da2a3b323c94bf84e44d0584756efcc56efe8836fa7ace1939a7ee0ea26afad7eabf6c4a834e01730d5eea6b4c47fb41e441cd36c6af8427bc235cf569
|
data/lib/pantopoda/version.rb
CHANGED
data/lib/pantopoda.rb
CHANGED
@@ -14,6 +14,7 @@ module Pantopoda
|
|
14
14
|
@split_url_at_hash = options[:split_url_at_hash] ? options[:split_url_at_hash] : false
|
15
15
|
@exclude_urls_with_hash = options[:exclude_urls_with_hash] ? options[:exclude_urls_with_hash] : false
|
16
16
|
@exclude_urls_with_extensions = options[:exclude_urls_with_extensions] ? options[:exclude_urls_with_extensions] : false
|
17
|
+
@debug = options[:debug] ? options[:debug] : false
|
17
18
|
end
|
18
19
|
|
19
20
|
def crawl(options = {})
|
@@ -35,7 +36,7 @@ module Pantopoda
|
|
35
36
|
begin
|
36
37
|
ip,port,user,pass = nil
|
37
38
|
|
38
|
-
request = Typhoeus::Request.new(q, :timeout =>
|
39
|
+
request = Typhoeus::Request.new(q, :timeout => 100, :follow_location => true) if ip == nil
|
39
40
|
request.on_complete do |response|
|
40
41
|
yield response
|
41
42
|
links = Nokogiri::HTML.parse(response.body).xpath('.//a/@href')
|
@@ -67,7 +68,7 @@ module Pantopoda
|
|
67
68
|
end
|
68
69
|
|
69
70
|
def parse_domain(url)
|
70
|
-
puts "Parsing URL: #{url}"
|
71
|
+
puts "Parsing URL: #{url}" if @debug
|
71
72
|
|
72
73
|
begin
|
73
74
|
parsed_domain = Domainatrix.parse(url)
|
@@ -78,7 +79,7 @@ module Pantopoda
|
|
78
79
|
end
|
79
80
|
|
80
81
|
rescue NoMethodError, Addressable::URI::InvalidURIError => e
|
81
|
-
puts "URL Parsing Exception (#{url}) : #{e}"
|
82
|
+
puts "URL Parsing Exception (#{url}) : #{e}" if @debug
|
82
83
|
return nil
|
83
84
|
end
|
84
85
|
end
|
@@ -117,7 +118,7 @@ module Pantopoda
|
|
117
118
|
@exclude_urls_with_extensions.each do |e|
|
118
119
|
if(url.to_s.length > e.size && url.to_s[-e.size .. -1].downcase == e.to_s.downcase)
|
119
120
|
not_found = false
|
120
|
-
puts "#{e} Found At URL: #{url}"
|
121
|
+
puts "#{e} Found At URL: #{url}" if @debug
|
121
122
|
end
|
122
123
|
end
|
123
124
|
|