gimme_poc 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gimme_poc.rb +14 -7
- data/lib/gimme_poc/contactpage.rb +4 -3
- data/lib/gimme_poc/save.rb +18 -9
- data/lib/gimme_poc/version.rb +1 -1
- data/lib/gimme_poc/web.rb +21 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35a045a491109a5ae34152577508585667371af0
|
4
|
+
data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
|
7
|
+
data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
|
data/lib/gimme_poc.rb
CHANGED
@@ -14,10 +14,10 @@ module Gimme
|
|
14
14
|
attr_accessor :page, :contact, :contact_links, :url
|
15
15
|
|
16
16
|
# Simple regex that looks for ###.#### or ###-####
|
17
|
-
PHONE_REGEX =
|
17
|
+
PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
|
18
18
|
|
19
19
|
# Captures http:// and https://
|
20
|
-
HTTP_REGEX = %r{
|
20
|
+
HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
|
21
21
|
|
22
22
|
##
|
23
23
|
# The main method!
|
@@ -28,19 +28,26 @@ module Gimme
|
|
28
28
|
arr.each do |url|
|
29
29
|
puts '-' * 50
|
30
30
|
puts "starting: #{url}"
|
31
|
+
unless LazyDomain.valid?(url)
|
32
|
+
puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
|
33
|
+
next
|
34
|
+
end
|
31
35
|
case
|
32
36
|
when subdomain?(url)
|
33
|
-
|
37
|
+
puts '(This url is a subdomain. Will try both sub and root domain.)'
|
38
|
+
next if get(url).nil? && get(orig_domain(url)).nil?
|
34
39
|
else
|
35
40
|
next if get(url).nil?
|
36
41
|
end
|
37
|
-
puts 'now looking for contact pages'
|
38
42
|
start_contact_links
|
39
43
|
mechpage = go_to_contact_page(url)
|
40
|
-
|
41
|
-
|
44
|
+
if mechpage.nil?
|
45
|
+
puts '(empty page, exiting.)'
|
46
|
+
else
|
47
|
+
save_available_contacts(mechpage.uri.to_s)
|
48
|
+
end
|
42
49
|
end
|
43
|
-
Search.all_sites
|
50
|
+
Search.all_sites # Return results from all sites.
|
44
51
|
end
|
45
52
|
|
46
53
|
# Convenience method.
|
@@ -15,6 +15,7 @@ module Gimme
|
|
15
15
|
# If no contact link is available, it will blind test '../contact'.
|
16
16
|
# Returns nil if nothing can be found.
|
17
17
|
def contact_page(url)
|
18
|
+
puts 'now looking for contact pages'
|
18
19
|
contact_link = link_with_href(/contact|Contact/)
|
19
20
|
contact_test_page = merged_link('../contact')
|
20
21
|
|
@@ -37,17 +38,17 @@ module Gimme
|
|
37
38
|
# Returns nil if nothing can be found.
|
38
39
|
def english_contact_page(url)
|
39
40
|
puts "\nLooking for english page..."
|
40
|
-
english_link = page.link_with(href:
|
41
|
+
english_link = page.link_with(href: %r{en\/|english|English})
|
41
42
|
test_en_page = merged_link('../en')
|
42
43
|
test_english_page = merged_link('../english')
|
43
44
|
|
44
45
|
case
|
45
46
|
when !english_link.nil?
|
46
47
|
puts "#{'Success:'.green} found english link!"
|
47
|
-
get(
|
48
|
+
get(merged_link(english_link.uri))
|
48
49
|
else
|
49
50
|
blind_test(test_en_page) || blind_test(test_english_page)
|
50
|
-
puts
|
51
|
+
puts "\n(restarting)\n"
|
51
52
|
contact_page(url)
|
52
53
|
end
|
53
54
|
end
|
data/lib/gimme_poc/save.rb
CHANGED
@@ -20,6 +20,11 @@ module Gimme
|
|
20
20
|
}
|
21
21
|
end
|
22
22
|
|
23
|
+
# Starts/Restarts @contacts_links hash
|
24
|
+
def start_contact_links
|
25
|
+
@contact_links = {}
|
26
|
+
end
|
27
|
+
|
23
28
|
# Used in save_available_contacts to save each valid link.
|
24
29
|
def save_link(key, url)
|
25
30
|
return if key.nil? || url.nil?
|
@@ -36,18 +41,22 @@ module Gimme
|
|
36
41
|
|
37
42
|
# Saves any available contact info to @contact_links.
|
38
43
|
def save_available_contacts(url, hsh = scan_for_contacts)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
if something_to_save?(hsh)
|
45
|
+
puts "\nsaving available contact information from #{url}"
|
46
|
+
if hsh.is_a?(Hash)
|
47
|
+
hsh.each do |k, v|
|
48
|
+
save_link(k, v) # saves to @contact_links
|
49
|
+
end
|
50
|
+
delete_failures(@contact_links)
|
51
|
+
puts "#{@contact_links}".cyan # same as @contact_links
|
52
|
+
else
|
53
|
+
fail ArgumentError, "expected hash but got #{hsh.class}"
|
44
54
|
end
|
45
|
-
|
46
|
-
puts "#{@contact_links}".cyan # same as @contact_links
|
55
|
+
Search::POC.new(url, @contact_links)
|
47
56
|
else
|
48
|
-
|
57
|
+
puts '(nothing to save)'
|
58
|
+
return
|
49
59
|
end
|
50
|
-
Search::POC.new(url, @contact_links)
|
51
60
|
end
|
52
61
|
end
|
53
62
|
end
|
data/lib/gimme_poc/version.rb
CHANGED
data/lib/gimme_poc/web.rb
CHANGED
@@ -10,19 +10,24 @@ module Gimme
|
|
10
10
|
url = format_url(str)
|
11
11
|
puts "sending GET request to: #{url}"
|
12
12
|
sleep(0.1)
|
13
|
-
@page = Mechanize.new
|
13
|
+
@page = Mechanize.new do |a|
|
14
|
+
a.user_agent_alias = 'Mac Safari'
|
15
|
+
a.open_timeout = 7
|
16
|
+
a.read_timeout = 7
|
17
|
+
a.idle_timeout = 7
|
18
|
+
a.redirect_ok = true
|
19
|
+
end.get(url)
|
20
|
+
|
14
21
|
rescue Mechanize::ResponseCodeError => e
|
15
22
|
puts "#{'Response Error:'.red} #{e}"
|
16
23
|
rescue SocketError => e
|
17
24
|
puts "#{'Socket Error:'.red} #{e}"
|
25
|
+
rescue Net::OpenTimeout => e
|
26
|
+
puts "#{'Connection Timeout:'.red} #{e}"
|
18
27
|
rescue Errno::ETIMEDOUT => e
|
19
28
|
puts "#{'Connection Timeout:'.red} #{e}"
|
20
|
-
|
21
|
-
|
22
|
-
# Starts/Restarts @contacts_links hash
|
23
|
-
def start_contact_links
|
24
|
-
puts 'setting contact links hash to {}'
|
25
|
-
@contact_links = {}
|
29
|
+
rescue Net::HTTP::Persistent::Error
|
30
|
+
puts "#{'Connection Timeout:'.red} read timeout, too many resets."
|
26
31
|
end
|
27
32
|
|
28
33
|
##
|
@@ -32,6 +37,11 @@ module Gimme
|
|
32
37
|
LazyDomain.autohttp(str)
|
33
38
|
end
|
34
39
|
|
40
|
+
# Used for subdomain check. Not a permanent change to url variable.
|
41
|
+
def unformat_url(str)
|
42
|
+
str.gsub(HTTP_REGEX, '')
|
43
|
+
end
|
44
|
+
|
35
45
|
##
|
36
46
|
# Outputs domain of a url. Useful if subdomains are given to GimmePOC
|
37
47
|
# and they don't work.
|
@@ -40,6 +50,8 @@ module Gimme
|
|
40
50
|
# Given http://maps.google.com, returns 'google.com'.
|
41
51
|
def orig_domain(str)
|
42
52
|
LazyDomain.parse(str).domain
|
53
|
+
rescue PublicSuffix::DomainInvalid => e
|
54
|
+
puts "#{'Invalid Domain:'.red} #{e}"
|
43
55
|
end
|
44
56
|
|
45
57
|
##
|
@@ -63,7 +75,7 @@ module Gimme
|
|
63
75
|
|
64
76
|
# Boolean, returns true if url is not identical to original domain.
|
65
77
|
def subdomain?(str)
|
66
|
-
(str != orig_domain(str))
|
78
|
+
(unformat_url(str) != orig_domain(str))
|
67
79
|
end
|
68
80
|
|
69
81
|
# TODO: Sometimes DNS will do a redirect and not give a 404.
|
@@ -72,7 +84,7 @@ module Gimme
|
|
72
84
|
# Blindly tests to see if a url goes through. If there is a 404 error,
|
73
85
|
# this will return nil.
|
74
86
|
def blind_test(url)
|
75
|
-
puts "\
|
87
|
+
puts "\n(blind testing: #{url})"
|
76
88
|
get(url)
|
77
89
|
end
|
78
90
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimme_poc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|