gimme_poc 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ca0656c586244edfaaac44d81b092956e1ed801
4
- data.tar.gz: b2bbc85c51a79ba5a10a1a5e77d8084ac28adeee
3
+ metadata.gz: 35a045a491109a5ae34152577508585667371af0
4
+ data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
5
5
  SHA512:
6
- metadata.gz: f2e131c8e68fb8a55169f62b1d9a5662901f6fcb45907878c30d8d700d784b66aa8cae0305c060a611da7d60b810cc2d56bb56959db1da378a3088fa6568d707
7
- data.tar.gz: 6f32874d9fc287ce588baa0e91fa854ed7f75f045f88d798ebf02a0e781918f50d6922ffd06578965d7c033cacdb6a927d03efc7b7967c6a225d7579a8631853
6
+ metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
7
+ data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
data/lib/gimme_poc.rb CHANGED
@@ -14,10 +14,10 @@ module Gimme
14
14
  attr_accessor :page, :contact, :contact_links, :url
15
15
 
16
16
  # Simple regex that looks for ###.#### or ###-####
17
- PHONE_REGEX = %r{/\d{3}[-]\d{4}|\d{3}[.]\d{4}/}
17
+ PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
18
18
 
19
19
  # Captures http:// and https://
20
- HTTP_REGEX = %r{/\A\bhttps:\/\/|\bhttp:\/\//}
20
+ HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
21
21
 
22
22
  ##
23
23
  # The main method!
@@ -28,19 +28,26 @@ module Gimme
28
28
  arr.each do |url|
29
29
  puts '-' * 50
30
30
  puts "starting: #{url}"
31
+ unless LazyDomain.valid?(url)
32
+ puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
33
+ next
34
+ end
31
35
  case
32
36
  when subdomain?(url)
33
- get(orig_domain(url)) if get(url).nil?
37
+ puts '(This url is a subdomain. Will try both sub and root domain.)'
38
+ next if get(url).nil? && get(orig_domain(url)).nil?
34
39
  else
35
40
  next if get(url).nil?
36
41
  end
37
- puts 'now looking for contact pages'
38
42
  start_contact_links
39
43
  mechpage = go_to_contact_page(url)
40
- next if mechpage.nil?
41
- save_available_contacts(mechpage.uri.to_s)
44
+ if mechpage.nil?
45
+ puts '(empty page, exiting.)'
46
+ else
47
+ save_available_contacts(mechpage.uri.to_s)
48
+ end
42
49
  end
43
- Search.all_sites
50
+ Search.all_sites # Return results from all sites.
44
51
  end
45
52
 
46
53
  # Convenience method.
@@ -15,6 +15,7 @@ module Gimme
15
15
  # If no contact link is available, it will blind test '../contact'.
16
16
  # Returns nil if nothing can be found.
17
17
  def contact_page(url)
18
+ puts 'now looking for contact pages'
18
19
  contact_link = link_with_href(/contact|Contact/)
19
20
  contact_test_page = merged_link('../contact')
20
21
 
@@ -37,17 +38,17 @@ module Gimme
37
38
  # Returns nil if nothing can be found.
38
39
  def english_contact_page(url)
39
40
  puts "\nLooking for english page..."
40
- english_link = page.link_with(href: /english|English/)
41
+ english_link = page.link_with(href: %r{en\/|english|English})
41
42
  test_en_page = merged_link('../en')
42
43
  test_english_page = merged_link('../english')
43
44
 
44
45
  case
45
46
  when !english_link.nil?
46
47
  puts "#{'Success:'.green} found english link!"
47
- get(merged(english_link)) # already merged link
48
+ get(merged_link(english_link.uri))
48
49
  else
49
50
  blind_test(test_en_page) || blind_test(test_english_page)
50
- puts 'ready to start again'
51
+ puts "\n(restarting)\n"
51
52
  contact_page(url)
52
53
  end
53
54
  end
@@ -20,6 +20,11 @@ module Gimme
20
20
  }
21
21
  end
22
22
 
23
+ # Starts/Restarts @contacts_links hash
24
+ def start_contact_links
25
+ @contact_links = {}
26
+ end
27
+
23
28
  # Used in save_available_contacts to save each valid link.
24
29
  def save_link(key, url)
25
30
  return if key.nil? || url.nil?
@@ -36,18 +41,22 @@ module Gimme
36
41
 
37
42
  # Saves any available contact info to @contact_links.
38
43
  def save_available_contacts(url, hsh = scan_for_contacts)
39
- return unless something_to_save?(hsh)
40
- puts "\nsaving available contact information from #{url}"
41
- if hsh.is_a?(Hash)
42
- hsh.each do |k, v|
43
- save_link(k, v) # saves to @contact_links
44
+ if something_to_save?(hsh)
45
+ puts "\nsaving available contact information from #{url}"
46
+ if hsh.is_a?(Hash)
47
+ hsh.each do |k, v|
48
+ save_link(k, v) # saves to @contact_links
49
+ end
50
+ delete_failures(@contact_links)
51
+ puts "#{@contact_links}".cyan # same as @contact_links
52
+ else
53
+ fail ArgumentError, "expected hash but got #{hsh.class}"
44
54
  end
45
- delete_failures(@contact_links)
46
- puts "#{@contact_links}".cyan # same as @contact_links
55
+ Search::POC.new(url, @contact_links)
47
56
  else
48
- fail ArgumentError, "expected hash but got #{hsh.class}"
57
+ puts '(nothing to save)'
58
+ return
49
59
  end
50
- Search::POC.new(url, @contact_links)
51
60
  end
52
61
  end
53
62
  end
@@ -1,3 +1,3 @@
1
1
  module Gimme
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
data/lib/gimme_poc/web.rb CHANGED
@@ -10,19 +10,24 @@ module Gimme
10
10
  url = format_url(str)
11
11
  puts "sending GET request to: #{url}"
12
12
  sleep(0.1)
13
- @page = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' }.get(url)
13
+ @page = Mechanize.new do |a|
14
+ a.user_agent_alias = 'Mac Safari'
15
+ a.open_timeout = 7
16
+ a.read_timeout = 7
17
+ a.idle_timeout = 7
18
+ a.redirect_ok = true
19
+ end.get(url)
20
+
14
21
  rescue Mechanize::ResponseCodeError => e
15
22
  puts "#{'Response Error:'.red} #{e}"
16
23
  rescue SocketError => e
17
24
  puts "#{'Socket Error:'.red} #{e}"
25
+ rescue Net::OpenTimeout => e
26
+ puts "#{'Connection Timeout:'.red} #{e}"
18
27
  rescue Errno::ETIMEDOUT => e
19
28
  puts "#{'Connection Timeout:'.red} #{e}"
20
- end
21
-
22
- # Starts/Restarts @contacts_links hash
23
- def start_contact_links
24
- puts 'setting contact links hash to {}'
25
- @contact_links = {}
29
+ rescue Net::HTTP::Persistent::Error
30
+ puts "#{'Connection Timeout:'.red} read timeout, too many resets."
26
31
  end
27
32
 
28
33
  ##
@@ -32,6 +37,11 @@ module Gimme
32
37
  LazyDomain.autohttp(str)
33
38
  end
34
39
 
40
+ # Used for subdomain check. Not a permanent change to url variable.
41
+ def unformat_url(str)
42
+ str.gsub(HTTP_REGEX, '')
43
+ end
44
+
35
45
  ##
36
46
  # Outputs domain of a url. Useful if subdomains are given to GimmePOC
37
47
  # and they don't work.
@@ -40,6 +50,8 @@ module Gimme
40
50
  # Given http://maps.google.com, returns 'google.com'.
41
51
  def orig_domain(str)
42
52
  LazyDomain.parse(str).domain
53
+ rescue PublicSuffix::DomainInvalid => e
54
+ puts "#{'Invalid Domain:'.red} #{e}"
43
55
  end
44
56
 
45
57
  ##
@@ -63,7 +75,7 @@ module Gimme
63
75
 
64
76
  # Boolean, returns true if url is not identical to original domain.
65
77
  def subdomain?(str)
66
- (str != orig_domain(str))
78
+ (unformat_url(str) != orig_domain(str))
67
79
  end
68
80
 
69
81
  # TODO: Sometimes DNS will do a redirect and not give a 404.
@@ -72,7 +84,7 @@ module Gimme
72
84
  # Blindly tests to see if a url goes through. If there is a 404 error,
73
85
  # this will return nil.
74
86
  def blind_test(url)
75
- puts "\nblind testing: #{url}"
87
+ puts "\n(blind testing: #{url})"
76
88
  get(url)
77
89
  end
78
90
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gimme_poc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mason
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-10 00:00:00.000000000 Z
11
+ date: 2015-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize