gimme_poc 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ca0656c586244edfaaac44d81b092956e1ed801
4
- data.tar.gz: b2bbc85c51a79ba5a10a1a5e77d8084ac28adeee
3
+ metadata.gz: 35a045a491109a5ae34152577508585667371af0
4
+ data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
5
5
  SHA512:
6
- metadata.gz: f2e131c8e68fb8a55169f62b1d9a5662901f6fcb45907878c30d8d700d784b66aa8cae0305c060a611da7d60b810cc2d56bb56959db1da378a3088fa6568d707
7
- data.tar.gz: 6f32874d9fc287ce588baa0e91fa854ed7f75f045f88d798ebf02a0e781918f50d6922ffd06578965d7c033cacdb6a927d03efc7b7967c6a225d7579a8631853
6
+ metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
7
+ data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
data/lib/gimme_poc.rb CHANGED
@@ -14,10 +14,10 @@ module Gimme
14
14
  attr_accessor :page, :contact, :contact_links, :url
15
15
 
16
16
  # Simple regex that looks for ###.#### or ###-####
17
- PHONE_REGEX = %r{/\d{3}[-]\d{4}|\d{3}[.]\d{4}/}
17
+ PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
18
18
 
19
19
  # Captures http:// and https://
20
- HTTP_REGEX = %r{/\A\bhttps:\/\/|\bhttp:\/\//}
20
+ HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
21
21
 
22
22
  ##
23
23
  # The main method!
@@ -28,19 +28,26 @@ module Gimme
28
28
  arr.each do |url|
29
29
  puts '-' * 50
30
30
  puts "starting: #{url}"
31
+ unless LazyDomain.valid?(url)
32
+ puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
33
+ next
34
+ end
31
35
  case
32
36
  when subdomain?(url)
33
- get(orig_domain(url)) if get(url).nil?
37
+ puts '(This url is a subdomain. Will try both sub and root domain.)'
38
+ next if get(url).nil? && get(orig_domain(url)).nil?
34
39
  else
35
40
  next if get(url).nil?
36
41
  end
37
- puts 'now looking for contact pages'
38
42
  start_contact_links
39
43
  mechpage = go_to_contact_page(url)
40
- next if mechpage.nil?
41
- save_available_contacts(mechpage.uri.to_s)
44
+ if mechpage.nil?
45
+ puts '(empty page, exiting.)'
46
+ else
47
+ save_available_contacts(mechpage.uri.to_s)
48
+ end
42
49
  end
43
- Search.all_sites
50
+ Search.all_sites # Return results from all sites.
44
51
  end
45
52
 
46
53
  # Convenience method.
@@ -15,6 +15,7 @@ module Gimme
15
15
  # If no contact link is available, it will blind test '../contact'.
16
16
  # Returns nil if nothing can be found.
17
17
  def contact_page(url)
18
+ puts 'now looking for contact pages'
18
19
  contact_link = link_with_href(/contact|Contact/)
19
20
  contact_test_page = merged_link('../contact')
20
21
 
@@ -37,17 +38,17 @@ module Gimme
37
38
  # Returns nil if nothing can be found.
38
39
  def english_contact_page(url)
39
40
  puts "\nLooking for english page..."
40
- english_link = page.link_with(href: /english|English/)
41
+ english_link = page.link_with(href: %r{en\/|english|English})
41
42
  test_en_page = merged_link('../en')
42
43
  test_english_page = merged_link('../english')
43
44
 
44
45
  case
45
46
  when !english_link.nil?
46
47
  puts "#{'Success:'.green} found english link!"
47
- get(merged(english_link)) # already merged link
48
+ get(merged_link(english_link.uri))
48
49
  else
49
50
  blind_test(test_en_page) || blind_test(test_english_page)
50
- puts 'ready to start again'
51
+ puts "\n(restarting)\n"
51
52
  contact_page(url)
52
53
  end
53
54
  end
@@ -20,6 +20,11 @@ module Gimme
20
20
  }
21
21
  end
22
22
 
23
+ # Starts/Restarts @contacts_links hash
24
+ def start_contact_links
25
+ @contact_links = {}
26
+ end
27
+
23
28
  # Used in save_available_contacts to save each valid link.
24
29
  def save_link(key, url)
25
30
  return if key.nil? || url.nil?
@@ -36,18 +41,22 @@ module Gimme
36
41
 
37
42
  # Saves any available contact info to @contact_links.
38
43
  def save_available_contacts(url, hsh = scan_for_contacts)
39
- return unless something_to_save?(hsh)
40
- puts "\nsaving available contact information from #{url}"
41
- if hsh.is_a?(Hash)
42
- hsh.each do |k, v|
43
- save_link(k, v) # saves to @contact_links
44
+ if something_to_save?(hsh)
45
+ puts "\nsaving available contact information from #{url}"
46
+ if hsh.is_a?(Hash)
47
+ hsh.each do |k, v|
48
+ save_link(k, v) # saves to @contact_links
49
+ end
50
+ delete_failures(@contact_links)
51
+ puts "#{@contact_links}".cyan # same as @contact_links
52
+ else
53
+ fail ArgumentError, "expected hash but got #{hsh.class}"
44
54
  end
45
- delete_failures(@contact_links)
46
- puts "#{@contact_links}".cyan # same as @contact_links
55
+ Search::POC.new(url, @contact_links)
47
56
  else
48
- fail ArgumentError, "expected hash but got #{hsh.class}"
57
+ puts '(nothing to save)'
58
+ return
49
59
  end
50
- Search::POC.new(url, @contact_links)
51
60
  end
52
61
  end
53
62
  end
@@ -1,3 +1,3 @@
1
1
  module Gimme
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
data/lib/gimme_poc/web.rb CHANGED
@@ -10,19 +10,24 @@ module Gimme
10
10
  url = format_url(str)
11
11
  puts "sending GET request to: #{url}"
12
12
  sleep(0.1)
13
- @page = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' }.get(url)
13
+ @page = Mechanize.new do |a|
14
+ a.user_agent_alias = 'Mac Safari'
15
+ a.open_timeout = 7
16
+ a.read_timeout = 7
17
+ a.idle_timeout = 7
18
+ a.redirect_ok = true
19
+ end.get(url)
20
+
14
21
  rescue Mechanize::ResponseCodeError => e
15
22
  puts "#{'Response Error:'.red} #{e}"
16
23
  rescue SocketError => e
17
24
  puts "#{'Socket Error:'.red} #{e}"
25
+ rescue Net::OpenTimeout => e
26
+ puts "#{'Connection Timeout:'.red} #{e}"
18
27
  rescue Errno::ETIMEDOUT => e
19
28
  puts "#{'Connection Timeout:'.red} #{e}"
20
- end
21
-
22
- # Starts/Restarts @contacts_links hash
23
- def start_contact_links
24
- puts 'setting contact links hash to {}'
25
- @contact_links = {}
29
+ rescue Net::HTTP::Persistent::Error
30
+ puts "#{'Connection Timeout:'.red} read timeout, too many resets."
26
31
  end
27
32
 
28
33
  ##
@@ -32,6 +37,11 @@ module Gimme
32
37
  LazyDomain.autohttp(str)
33
38
  end
34
39
 
40
+ # Used for subdomain check. Not a permanent change to url variable.
41
+ def unformat_url(str)
42
+ str.gsub(HTTP_REGEX, '')
43
+ end
44
+
35
45
  ##
36
46
  # Outputs domain of a url. Useful if subdomains are given to GimmePOC
37
47
  # and they don't work.
@@ -40,6 +50,8 @@ module Gimme
40
50
  # Given http://maps.google.com, returns 'google.com'.
41
51
  def orig_domain(str)
42
52
  LazyDomain.parse(str).domain
53
+ rescue PublicSuffix::DomainInvalid => e
54
+ puts "#{'Invalid Domain:'.red} #{e}"
43
55
  end
44
56
 
45
57
  ##
@@ -63,7 +75,7 @@ module Gimme
63
75
 
64
76
  # Boolean, returns true if url is not identical to original domain.
65
77
  def subdomain?(str)
66
- (str != orig_domain(str))
78
+ (unformat_url(str) != orig_domain(str))
67
79
  end
68
80
 
69
81
  # TODO: Sometimes DNS will do a redirect and not give a 404.
@@ -72,7 +84,7 @@ module Gimme
72
84
  # Blindly tests to see if a url goes through. If there is a 404 error,
73
85
  # this will return nil.
74
86
  def blind_test(url)
75
- puts "\nblind testing: #{url}"
87
+ puts "\n(blind testing: #{url})"
76
88
  get(url)
77
89
  end
78
90
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gimme_poc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mason
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-10 00:00:00.000000000 Z
11
+ date: 2015-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize