gimme_poc 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/gimme_poc.rb +14 -7
- data/lib/gimme_poc/contactpage.rb +4 -3
- data/lib/gimme_poc/save.rb +18 -9
- data/lib/gimme_poc/version.rb +1 -1
- data/lib/gimme_poc/web.rb +21 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35a045a491109a5ae34152577508585667371af0
|
4
|
+
data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
|
7
|
+
data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
|
data/lib/gimme_poc.rb
CHANGED
@@ -14,10 +14,10 @@ module Gimme
|
|
14
14
|
attr_accessor :page, :contact, :contact_links, :url
|
15
15
|
|
16
16
|
# Simple regex that looks for ###.#### or ###-####
|
17
|
-
PHONE_REGEX =
|
17
|
+
PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
|
18
18
|
|
19
19
|
# Captures http:// and https://
|
20
|
-
HTTP_REGEX = %r{
|
20
|
+
HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
|
21
21
|
|
22
22
|
##
|
23
23
|
# The main method!
|
@@ -28,19 +28,26 @@ module Gimme
|
|
28
28
|
arr.each do |url|
|
29
29
|
puts '-' * 50
|
30
30
|
puts "starting: #{url}"
|
31
|
+
unless LazyDomain.valid?(url)
|
32
|
+
puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
|
33
|
+
next
|
34
|
+
end
|
31
35
|
case
|
32
36
|
when subdomain?(url)
|
33
|
-
|
37
|
+
puts '(This url is a subdomain. Will try both sub and root domain.)'
|
38
|
+
next if get(url).nil? && get(orig_domain(url)).nil?
|
34
39
|
else
|
35
40
|
next if get(url).nil?
|
36
41
|
end
|
37
|
-
puts 'now looking for contact pages'
|
38
42
|
start_contact_links
|
39
43
|
mechpage = go_to_contact_page(url)
|
40
|
-
|
41
|
-
|
44
|
+
if mechpage.nil?
|
45
|
+
puts '(empty page, exiting.)'
|
46
|
+
else
|
47
|
+
save_available_contacts(mechpage.uri.to_s)
|
48
|
+
end
|
42
49
|
end
|
43
|
-
Search.all_sites
|
50
|
+
Search.all_sites # Return results from all sites.
|
44
51
|
end
|
45
52
|
|
46
53
|
# Convenience method.
|
@@ -15,6 +15,7 @@ module Gimme
|
|
15
15
|
# If no contact link is available, it will blind test '../contact'.
|
16
16
|
# Returns nil if nothing can be found.
|
17
17
|
def contact_page(url)
|
18
|
+
puts 'now looking for contact pages'
|
18
19
|
contact_link = link_with_href(/contact|Contact/)
|
19
20
|
contact_test_page = merged_link('../contact')
|
20
21
|
|
@@ -37,17 +38,17 @@ module Gimme
|
|
37
38
|
# Returns nil if nothing can be found.
|
38
39
|
def english_contact_page(url)
|
39
40
|
puts "\nLooking for english page..."
|
40
|
-
english_link = page.link_with(href:
|
41
|
+
english_link = page.link_with(href: %r{en\/|english|English})
|
41
42
|
test_en_page = merged_link('../en')
|
42
43
|
test_english_page = merged_link('../english')
|
43
44
|
|
44
45
|
case
|
45
46
|
when !english_link.nil?
|
46
47
|
puts "#{'Success:'.green} found english link!"
|
47
|
-
get(
|
48
|
+
get(merged_link(english_link.uri))
|
48
49
|
else
|
49
50
|
blind_test(test_en_page) || blind_test(test_english_page)
|
50
|
-
puts
|
51
|
+
puts "\n(restarting)\n"
|
51
52
|
contact_page(url)
|
52
53
|
end
|
53
54
|
end
|
data/lib/gimme_poc/save.rb
CHANGED
@@ -20,6 +20,11 @@ module Gimme
|
|
20
20
|
}
|
21
21
|
end
|
22
22
|
|
23
|
+
# Starts/Restarts @contacts_links hash
|
24
|
+
def start_contact_links
|
25
|
+
@contact_links = {}
|
26
|
+
end
|
27
|
+
|
23
28
|
# Used in save_available_contacts to save each valid link.
|
24
29
|
def save_link(key, url)
|
25
30
|
return if key.nil? || url.nil?
|
@@ -36,18 +41,22 @@ module Gimme
|
|
36
41
|
|
37
42
|
# Saves any available contact info to @contact_links.
|
38
43
|
def save_available_contacts(url, hsh = scan_for_contacts)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
if something_to_save?(hsh)
|
45
|
+
puts "\nsaving available contact information from #{url}"
|
46
|
+
if hsh.is_a?(Hash)
|
47
|
+
hsh.each do |k, v|
|
48
|
+
save_link(k, v) # saves to @contact_links
|
49
|
+
end
|
50
|
+
delete_failures(@contact_links)
|
51
|
+
puts "#{@contact_links}".cyan # same as @contact_links
|
52
|
+
else
|
53
|
+
fail ArgumentError, "expected hash but got #{hsh.class}"
|
44
54
|
end
|
45
|
-
|
46
|
-
puts "#{@contact_links}".cyan # same as @contact_links
|
55
|
+
Search::POC.new(url, @contact_links)
|
47
56
|
else
|
48
|
-
|
57
|
+
puts '(nothing to save)'
|
58
|
+
return
|
49
59
|
end
|
50
|
-
Search::POC.new(url, @contact_links)
|
51
60
|
end
|
52
61
|
end
|
53
62
|
end
|
data/lib/gimme_poc/version.rb
CHANGED
data/lib/gimme_poc/web.rb
CHANGED
@@ -10,19 +10,24 @@ module Gimme
|
|
10
10
|
url = format_url(str)
|
11
11
|
puts "sending GET request to: #{url}"
|
12
12
|
sleep(0.1)
|
13
|
-
@page = Mechanize.new
|
13
|
+
@page = Mechanize.new do |a|
|
14
|
+
a.user_agent_alias = 'Mac Safari'
|
15
|
+
a.open_timeout = 7
|
16
|
+
a.read_timeout = 7
|
17
|
+
a.idle_timeout = 7
|
18
|
+
a.redirect_ok = true
|
19
|
+
end.get(url)
|
20
|
+
|
14
21
|
rescue Mechanize::ResponseCodeError => e
|
15
22
|
puts "#{'Response Error:'.red} #{e}"
|
16
23
|
rescue SocketError => e
|
17
24
|
puts "#{'Socket Error:'.red} #{e}"
|
25
|
+
rescue Net::OpenTimeout => e
|
26
|
+
puts "#{'Connection Timeout:'.red} #{e}"
|
18
27
|
rescue Errno::ETIMEDOUT => e
|
19
28
|
puts "#{'Connection Timeout:'.red} #{e}"
|
20
|
-
|
21
|
-
|
22
|
-
# Starts/Restarts @contacts_links hash
|
23
|
-
def start_contact_links
|
24
|
-
puts 'setting contact links hash to {}'
|
25
|
-
@contact_links = {}
|
29
|
+
rescue Net::HTTP::Persistent::Error
|
30
|
+
puts "#{'Connection Timeout:'.red} read timeout, too many resets."
|
26
31
|
end
|
27
32
|
|
28
33
|
##
|
@@ -32,6 +37,11 @@ module Gimme
|
|
32
37
|
LazyDomain.autohttp(str)
|
33
38
|
end
|
34
39
|
|
40
|
+
# Used for subdomain check. Not a permanent change to url variable.
|
41
|
+
def unformat_url(str)
|
42
|
+
str.gsub(HTTP_REGEX, '')
|
43
|
+
end
|
44
|
+
|
35
45
|
##
|
36
46
|
# Outputs domain of a url. Useful if subdomains are given to GimmePOC
|
37
47
|
# and they don't work.
|
@@ -40,6 +50,8 @@ module Gimme
|
|
40
50
|
# Given http://maps.google.com, returns 'google.com'.
|
41
51
|
def orig_domain(str)
|
42
52
|
LazyDomain.parse(str).domain
|
53
|
+
rescue PublicSuffix::DomainInvalid => e
|
54
|
+
puts "#{'Invalid Domain:'.red} #{e}"
|
43
55
|
end
|
44
56
|
|
45
57
|
##
|
@@ -63,7 +75,7 @@ module Gimme
|
|
63
75
|
|
64
76
|
# Boolean, returns true if url is not identical to original domain.
|
65
77
|
def subdomain?(str)
|
66
|
-
(str != orig_domain(str))
|
78
|
+
(unformat_url(str) != orig_domain(str))
|
67
79
|
end
|
68
80
|
|
69
81
|
# TODO: Sometimes DNS will do a redirect and not give a 404.
|
@@ -72,7 +84,7 @@ module Gimme
|
|
72
84
|
# Blindly tests to see if a url goes through. If there is a 404 error,
|
73
85
|
# this will return nil.
|
74
86
|
def blind_test(url)
|
75
|
-
puts "\
|
87
|
+
puts "\n(blind testing: #{url})"
|
76
88
|
get(url)
|
77
89
|
end
|
78
90
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimme_poc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|