pantopoda 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pantopoda/version.rb +1 -1
- data/lib/pantopoda.rb +5 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a0b37fa28e3130b06c5458f8e5e37695d340b5c7
|
4
|
+
data.tar.gz: c251c2f9d6235c29c913917f7bbf4366bfce7e5f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8a2675abf7fafe9c6e4f73bf0935d86d6f9469463d3a07b48afe34e25ea8a017a7dc239683917dae44a6f8c0c0cfa65b6b60fcec7409864096ae15450842efc
|
7
|
+
data.tar.gz: 20c6c6da2a3b323c94bf84e44d0584756efcc56efe8836fa7ace1939a7ee0ea26afad7eabf6c4a834e01730d5eea6b4c47fb41e441cd36c6af8427bc235cf569
|
data/lib/pantopoda/version.rb
CHANGED
data/lib/pantopoda.rb
CHANGED
@@ -14,6 +14,7 @@ module Pantopoda
|
|
14
14
|
@split_url_at_hash = options[:split_url_at_hash] ? options[:split_url_at_hash] : false
|
15
15
|
@exclude_urls_with_hash = options[:exclude_urls_with_hash] ? options[:exclude_urls_with_hash] : false
|
16
16
|
@exclude_urls_with_extensions = options[:exclude_urls_with_extensions] ? options[:exclude_urls_with_extensions] : false
|
17
|
+
@debug = options[:debug] ? options[:debug] : false
|
17
18
|
end
|
18
19
|
|
19
20
|
def crawl(options = {})
|
@@ -35,7 +36,7 @@ module Pantopoda
|
|
35
36
|
begin
|
36
37
|
ip,port,user,pass = nil
|
37
38
|
|
38
|
-
request = Typhoeus::Request.new(q, :timeout =>
|
39
|
+
request = Typhoeus::Request.new(q, :timeout => 100, :follow_location => true) if ip == nil
|
39
40
|
request.on_complete do |response|
|
40
41
|
yield response
|
41
42
|
links = Nokogiri::HTML.parse(response.body).xpath('.//a/@href')
|
@@ -67,7 +68,7 @@ module Pantopoda
|
|
67
68
|
end
|
68
69
|
|
69
70
|
def parse_domain(url)
|
70
|
-
puts "Parsing URL: #{url}"
|
71
|
+
puts "Parsing URL: #{url}" if @debug
|
71
72
|
|
72
73
|
begin
|
73
74
|
parsed_domain = Domainatrix.parse(url)
|
@@ -78,7 +79,7 @@ module Pantopoda
|
|
78
79
|
end
|
79
80
|
|
80
81
|
rescue NoMethodError, Addressable::URI::InvalidURIError => e
|
81
|
-
puts "URL Parsing Exception (#{url}) : #{e}"
|
82
|
+
puts "URL Parsing Exception (#{url}) : #{e}" if @debug
|
82
83
|
return nil
|
83
84
|
end
|
84
85
|
end
|
@@ -117,7 +118,7 @@ module Pantopoda
|
|
117
118
|
@exclude_urls_with_extensions.each do |e|
|
118
119
|
if(url.to_s.length > e.size && url.to_s[-e.size .. -1].downcase == e.to_s.downcase)
|
119
120
|
not_found = false
|
120
|
-
puts "#{e} Found At URL: #{url}"
|
121
|
+
puts "#{e} Found At URL: #{url}" if @debug
|
121
122
|
end
|
122
123
|
end
|
123
124
|
|