gimme_poc 0.0.5 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -31
- data/Rakefile +15 -0
- data/lib/gimme_poc.rb +32 -25
- data/lib/gimme_poc/contactpage.rb +47 -48
- data/lib/gimme_poc/logger.rb +16 -0
- data/lib/gimme_poc/logger/messages.rb +77 -0
- data/lib/gimme_poc/poc.rb +6 -5
- data/lib/gimme_poc/questions.rb +23 -29
- data/lib/gimme_poc/save.rb +60 -52
- data/lib/gimme_poc/test_case.rb +329 -0
- data/lib/gimme_poc/version.rb +1 -1
- data/lib/gimme_poc/web.rb +85 -79
- metadata +39 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84e5aa1c8960ade9b3f438008c7308856e7cbc30
|
4
|
+
data.tar.gz: e7d75a282e2d644ea9c0a24c466cd8f002013477
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed1704cd7a334ea8ba478cb01b487b8e54b2c16e1ad3e99db1c381797e413c43f1058d2a2c7a6b47212c7342ced310f2f1f4338a7153f4a84c06e5d7d64f90f4
|
7
|
+
data.tar.gz: 84191e49f72d95da75a46453b001aa29a74b1f128a1d6bc24687e3a9afda24df67055648b93841092d0a1fdfea887249f83f80d979d8c4a77586f4340b734458
|
data/README.md
CHANGED
@@ -10,21 +10,19 @@ Gimme POC simply looks for a contact page and extracts social media contact info
|
|
10
10
|
## Installation
|
11
11
|
|
12
12
|
```
|
13
|
-
gem install gimme_poc
|
13
|
+
$ gem install gimme_poc
|
14
14
|
|
15
15
|
```
|
16
16
|
|
17
17
|
## Set Up
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
require 'gimme_poc'
|
20
|
+
require 'gimme_poc'
|
21
21
|
|
22
22
|
```
|
23
23
|
|
24
24
|
## How it works
|
25
25
|
|
26
|
-
Gimme POC is easy to use! Simply run this command.
|
27
|
-
|
28
26
|
```ruby
|
29
27
|
|
30
28
|
Gimme.poc 'http://example.com'
|
@@ -56,30 +54,3 @@ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com'])
|
|
56
54
|
|
57
55
|
```
|
58
56
|
|
59
|
-
## Referencing the search results
|
60
|
-
|
61
|
-
To use your search results, simply run:
|
62
|
-
|
63
|
-
```ruby
|
64
|
-
|
65
|
-
Gimme.memory
|
66
|
-
|
67
|
-
```
|
68
|
-
|
69
|
-
## Clearing the search results
|
70
|
-
|
71
|
-
To clear search results and start afresh, run:
|
72
|
-
|
73
|
-
```ruby
|
74
|
-
|
75
|
-
Gimme.reset!
|
76
|
-
|
77
|
-
```
|
78
|
-
|
79
|
-
## To do:
|
80
|
-
|
81
|
-
- Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
|
82
|
-
- Work on false positives of bad urls. (Bad urls should be skipped + DNS redirects don't give 404 errors)
|
83
|
-
|
84
|
-
|
85
|
-
More to follow...
|
data/Rakefile
CHANGED
@@ -1,7 +1,22 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new(:test) do |test|
|
6
|
+
test.libs << 'lib' << 'test'
|
7
|
+
test.pattern = 'test/**/test*.rb'
|
8
|
+
test.verbose = true
|
9
|
+
end
|
3
10
|
|
4
11
|
desc 'Open console with gimme_poc loaded'
|
5
12
|
task :console do
|
6
13
|
exec 'pry -r ./lib/gimme_poc.rb'
|
7
14
|
end
|
15
|
+
|
16
|
+
desc 'make a release'
|
17
|
+
task :release do
|
18
|
+
exec './script/release'
|
19
|
+
end
|
20
|
+
|
21
|
+
task c: :console # alias 'c' for console
|
22
|
+
task default: :test
|
data/lib/gimme_poc.rb
CHANGED
@@ -3,6 +3,7 @@ require 'lazy_domain'
|
|
3
3
|
require 'mechanize'
|
4
4
|
require_relative './gimme_poc/contactpage'
|
5
5
|
require_relative './gimme_poc/poc'
|
6
|
+
require_relative './gimme_poc/logger'
|
6
7
|
require_relative './gimme_poc/questions'
|
7
8
|
require_relative './gimme_poc/save'
|
8
9
|
require_relative './gimme_poc/version'
|
@@ -11,13 +12,35 @@ require_relative './gimme_poc/web'
|
|
11
12
|
# Find the contact
|
12
13
|
module Gimme
|
13
14
|
class << self
|
15
|
+
include Web
|
16
|
+
include Questions
|
17
|
+
include Save
|
18
|
+
include ContactPage
|
19
|
+
|
14
20
|
attr_accessor :page, :contact, :contact_links, :url
|
21
|
+
attr_reader :status_code
|
15
22
|
|
16
|
-
|
17
|
-
|
23
|
+
def start_url_process(url)
|
24
|
+
LogMessages.start_url(url)
|
25
|
+
case
|
26
|
+
when LazyDomain.valid?(url) == false
|
27
|
+
LogMessages.invalid_domain(url)
|
28
|
+
@status_code = 0
|
29
|
+
when subdomain?(url)
|
30
|
+
LogMessages.subdomain
|
31
|
+
@status_code = 0 if get(url).nil? && get(orig_domain(url)).nil?
|
32
|
+
else
|
33
|
+
@status_code = 0 if get(url).nil?
|
34
|
+
end
|
35
|
+
end
|
18
36
|
|
19
|
-
|
20
|
-
|
37
|
+
def start_contact_process(url)
|
38
|
+
start_contact_links
|
39
|
+
attempt = save_available_contacts(url)
|
40
|
+
info = attempt.info if attempt && attempt.respond_to?(:info)
|
41
|
+
return attempt unless info.nil? || info.empty?
|
42
|
+
go_to_contact_page(url)
|
43
|
+
end
|
21
44
|
|
22
45
|
##
|
23
46
|
# The main method!
|
@@ -25,29 +48,13 @@ module Gimme
|
|
25
48
|
# If url is bad, it's converted to nil in 'get' method and skipped over.
|
26
49
|
def poc(arr)
|
27
50
|
arr = arr.split unless arr.is_a?(Array)
|
51
|
+
results = []
|
28
52
|
arr.each do |url|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
|
33
|
-
next
|
34
|
-
end
|
35
|
-
case
|
36
|
-
when subdomain?(url)
|
37
|
-
puts '(This url is a subdomain. Will try both sub and root domain.)'
|
38
|
-
next if get(url).nil? && get(orig_domain(url)).nil?
|
39
|
-
else
|
40
|
-
next if get(url).nil?
|
41
|
-
end
|
42
|
-
start_contact_links
|
43
|
-
mechpage = go_to_contact_page(url)
|
44
|
-
if mechpage.nil?
|
45
|
-
puts '(empty page, exiting.)'
|
46
|
-
else
|
47
|
-
save_available_contacts(mechpage.uri.to_s)
|
48
|
-
end
|
53
|
+
start_url_process(url)
|
54
|
+
next if @status_code == 0
|
55
|
+
results << start_contact_process(url)
|
49
56
|
end
|
50
|
-
|
57
|
+
results.length == 1 ? results.first : results
|
51
58
|
end
|
52
59
|
|
53
60
|
# Convenience method.
|
@@ -1,56 +1,55 @@
|
|
1
1
|
# Find the contact
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
##
|
14
|
-
# Looks for contact page. Gets page if available.
|
15
|
-
# If no contact link is available, it will blind test '../contact'.
|
16
|
-
# Returns nil if nothing can be found.
|
17
|
-
def contact_page(url)
|
18
|
-
puts 'now looking for contact pages'
|
19
|
-
contact_link = link_with_href(/contact|Contact/)
|
20
|
-
contact_test_page = merged_link('../contact')
|
2
|
+
module ContactPage
|
3
|
+
attr_accessor :contact_link
|
4
|
+
|
5
|
+
##
|
6
|
+
# Scans for contact page. If it doesn't work on the first try,
|
7
|
+
# It will look for english versions and try again. Processes left to right.
|
8
|
+
#
|
9
|
+
# Returns nil if no contact page can be found.
|
10
|
+
def go_to_contact_page(url)
|
11
|
+
contact_page(url) || english_contact_page(url)
|
12
|
+
end
|
21
13
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
14
|
+
##
|
15
|
+
# Looks for contact page. Gets page if available.
|
16
|
+
# If no contact link is available, it will blind test '../contact'.
|
17
|
+
# Returns nil if nothing can be found.
|
18
|
+
def contact_page(url)
|
19
|
+
LogMessages.looking_for_contact_page
|
20
|
+
@contact_link = link_with_href(/contact|Contact/)
|
21
|
+
contact_test_page = merged_link('../contact')
|
22
|
+
case
|
23
|
+
when !contact_link.nil?
|
24
|
+
LogMessages.found_contact_link
|
25
|
+
get(merged_link(@contact_link))
|
26
|
+
else
|
27
|
+
LogMessages.no_contact_link
|
28
|
+
get(orig_domain(url)) if blind_test(contact_test_page).nil?
|
30
29
|
end
|
30
|
+
end
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
32
|
+
##
|
33
|
+
# Looks for english page. Gets page if available then looks for
|
34
|
+
# english contact page.
|
35
|
+
#
|
36
|
+
# If no english link is available,
|
37
|
+
# it will blind test '../en' and '../english'.
|
38
|
+
# Returns nil if nothing can be found.
|
39
|
+
def english_contact_page(url)
|
40
|
+
LogMessages.looking_for_english_page
|
41
|
+
english_link = @page.link_with(href: %r{en\/|english|English})
|
42
|
+
test_en_page = merged_link('../en')
|
43
|
+
test_english_page = merged_link('../english')
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
end
|
45
|
+
case
|
46
|
+
when !english_link.nil?
|
47
|
+
LogMessages.found_english_link
|
48
|
+
get(merged_link(english_link.uri))
|
49
|
+
else
|
50
|
+
blind_test(test_en_page) || blind_test(test_english_page)
|
51
|
+
LogMessages.restarting
|
52
|
+
contact_page(url)
|
54
53
|
end
|
55
54
|
end
|
56
55
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require_relative './logger/messages'
|
3
|
+
|
4
|
+
# Output info messages during gimme poc crawl.
|
5
|
+
module Gimme
|
6
|
+
class << self
|
7
|
+
include LogMessages
|
8
|
+
attr_accessor :logger
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
Gimme.logger = Logger.new(STDOUT)
|
13
|
+
Gimme.logger.level = Logger::INFO
|
14
|
+
Gimme.logger.formatter = proc do |_severity, _datetime, _progname, msg|
|
15
|
+
"#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}\n"
|
16
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
|
2
|
+
module LogMessages
|
3
|
+
class << self
|
4
|
+
def loginfo(str)
|
5
|
+
Gimme.logger.info(str)
|
6
|
+
end
|
7
|
+
|
8
|
+
def logwarn(str)
|
9
|
+
Gimme.logger.info(str)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Info
|
13
|
+
# -----------------------------------------------------------------
|
14
|
+
def start_url(url)
|
15
|
+
puts '-' * 50
|
16
|
+
loginfo "starting: #{url}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def sending_get_request(url)
|
20
|
+
loginfo("sending GET request to: #{url}")
|
21
|
+
end
|
22
|
+
|
23
|
+
def blind_testing(url)
|
24
|
+
loginfo("blind testing: #{url}")
|
25
|
+
end
|
26
|
+
|
27
|
+
def invalid_domain(url)
|
28
|
+
loginfo("#{'Invalid Domain:'.red} `#{url}' is not a valid domain")
|
29
|
+
end
|
30
|
+
|
31
|
+
def subdomain
|
32
|
+
loginfo '(This url is a subdomain. Will try both sub and root domain.)'
|
33
|
+
end
|
34
|
+
|
35
|
+
def empty_page
|
36
|
+
loginfo '(empty page, exiting.)'
|
37
|
+
end
|
38
|
+
|
39
|
+
def looking_for_contact_page
|
40
|
+
loginfo('now looking for contact pages')
|
41
|
+
end
|
42
|
+
|
43
|
+
def found_contact_link
|
44
|
+
loginfo("#{'Success:'.green} Found contact link!")
|
45
|
+
end
|
46
|
+
|
47
|
+
def looking_for_english_page
|
48
|
+
loginfo('Looking for english page...')
|
49
|
+
end
|
50
|
+
|
51
|
+
def found_english_link
|
52
|
+
loginfo("#{'Success:'.green} found english link!")
|
53
|
+
end
|
54
|
+
|
55
|
+
def saving_contact_info(url)
|
56
|
+
loginfo("saving available contact information from #{url}")
|
57
|
+
end
|
58
|
+
|
59
|
+
# Warnings
|
60
|
+
# -----------------------------------------------------------------
|
61
|
+
def no_contact_link
|
62
|
+
logwarn("#{'Warning:'.yellow} couldn't find contact link")
|
63
|
+
end
|
64
|
+
|
65
|
+
def restarting
|
66
|
+
logwarn('restarting'.yellow)
|
67
|
+
end
|
68
|
+
|
69
|
+
def nothing_to_save
|
70
|
+
logwarn '(nothing to save)'
|
71
|
+
end
|
72
|
+
|
73
|
+
def warn_err(error)
|
74
|
+
logwarn("#{'Error:'.red} #{error}")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/gimme_poc/poc.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
require "ostruct"
|
2
|
+
|
1
3
|
module Gimme
|
2
4
|
# Collection of sites searched.
|
3
5
|
class Search
|
4
|
-
|
6
|
+
attr_accessor :all_sites
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
+
def initialize
|
9
|
+
@all_sites = []
|
8
10
|
end
|
9
11
|
|
10
12
|
# Each site is saved to this class
|
@@ -13,8 +15,7 @@ module Gimme
|
|
13
15
|
|
14
16
|
def initialize(url, contact_info_hsh)
|
15
17
|
@host = url
|
16
|
-
@info = contact_info_hsh
|
17
|
-
Search.all_sites << self
|
18
|
+
@info = OpenStruct.new(contact_info_hsh)
|
18
19
|
end
|
19
20
|
end
|
20
21
|
end
|
data/lib/gimme_poc/questions.rb
CHANGED
@@ -1,33 +1,27 @@
|
|
1
|
-
#
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
# Boolean, returns true if email is present.
|
12
|
-
def email_available?
|
13
|
-
!link_with_href('mailto').nil?
|
14
|
-
end
|
1
|
+
# Reflective questions for situational awareness.
|
2
|
+
module Questions
|
3
|
+
# Simple regex that looks for ###.#### or ###-####
|
4
|
+
PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
|
5
|
+
|
6
|
+
# Boolean, returns true if email is present.
|
7
|
+
def email_available?
|
8
|
+
!link_with_href('mailto').nil?
|
9
|
+
end
|
15
10
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
# Boolean, returns true if phone number is present.
|
12
|
+
def phone_available?
|
13
|
+
!(@page.body =~ PHONE_REGEX).nil?
|
14
|
+
end
|
20
15
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
16
|
+
##
|
17
|
+
# TODO: build better conditional to prevent false positives.
|
18
|
+
# There could be other forms like newsletter signup, etc.
|
19
|
+
#
|
20
|
+
# If there is a form with more than one field, this returns true.
|
21
|
+
# Forms with one field are typically search boxes.
|
22
|
+
#
|
23
|
+
# Boolean, returns true if form is present on page.
|
24
|
+
def contactform_available?
|
25
|
+
!(@page.forms.select { |x| x.fields.length > 1 }.empty?)
|
32
26
|
end
|
33
27
|
end
|
data/lib/gimme_poc/save.rb
CHANGED
@@ -1,62 +1,70 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
def scan_for_contacts
|
10
|
-
{
|
11
|
-
contactpage: link_with_href('contact'),
|
12
|
-
email_present: "#{email_available?}",
|
13
|
-
phone_present: "#{phone_available?}",
|
14
|
-
contact_form: "#{contactform_available?}",
|
15
|
-
facebook: link_with_href('facebook\.'),
|
16
|
-
twitter: link_with_href('twitter\.'),
|
17
|
-
youtube: link_with_href('youtube\.'),
|
18
|
-
googleplus: link_with_href('plus\.google\.'),
|
19
|
-
linkedin: link_with_href('linkedin\.')
|
20
|
-
}
|
21
|
-
end
|
1
|
+
module Save
|
2
|
+
##
|
3
|
+
# Boolean, returns true if anything is present
|
4
|
+
# after running scan_for_contacts and deleting failures.
|
5
|
+
# Remember that false is a string in the hash
|
6
|
+
def something_to_save?(hsh)
|
7
|
+
hsh.reject! { |k, v| v.nil? || v == 'false' }.any?
|
8
|
+
end
|
22
9
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
10
|
+
##
|
11
|
+
# Returns anything that is possible to save, otherwise returns nil.
|
12
|
+
# Booleans for phone, email, or contact form will display True or False.
|
13
|
+
#
|
14
|
+
# Add periods to link hrefs to prevent false positives. Must escape periods
|
15
|
+
# with a backslash or else it will be a regex wild card.
|
16
|
+
def scan_for_contacts
|
17
|
+
{
|
18
|
+
contactpage: link_with_href('contact'),
|
19
|
+
email_present: "#{email_available?}",
|
20
|
+
phone_present: "#{phone_available?}",
|
21
|
+
contact_form: "#{contactform_available?}",
|
22
|
+
facebook: link_with_href('facebook\.'),
|
23
|
+
twitter: link_with_href('twitter\.'),
|
24
|
+
youtube: link_with_href('youtube\.'),
|
25
|
+
googleplus: link_with_href('plus\.google\.'),
|
26
|
+
linkedin: link_with_href('linkedin\.')
|
27
|
+
}
|
28
|
+
rescue => e
|
29
|
+
puts "Error: #{e}"
|
30
|
+
end
|
27
31
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
32
|
+
# Starts/Restarts @contacts_links hash
|
33
|
+
def start_contact_links
|
34
|
+
@contact_links = {}
|
35
|
+
end
|
33
36
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
# Used in save_available_contacts to save each valid link.
|
38
|
+
def save_link(key, url)
|
39
|
+
return if key.nil? || url.nil?
|
40
|
+
@contact_links[key] = url
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Remove negatives from the contacts hash.
|
45
|
+
# Deletes a key value pair with a value of either nil or false.
|
46
|
+
# Remember that false is a stored in hash as a string.
|
47
|
+
def delete_failures(hsh)
|
48
|
+
hsh.delete_if { |_k, v| v.nil? || v == 'false' }
|
49
|
+
end
|
41
50
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
-
delete_failures(@contact_links)
|
51
|
-
puts "#{@contact_links}".cyan # same as @contact_links
|
52
|
-
else
|
53
|
-
fail ArgumentError, "expected hash but got #{hsh.class}"
|
51
|
+
# Saves any available contact info to @contact_links.
|
52
|
+
def save_available_contacts(url, hsh = scan_for_contacts)
|
53
|
+
if something_to_save?(hsh)
|
54
|
+
LogMessages.saving_contact_info(url)
|
55
|
+
if hsh.is_a?(Hash)
|
56
|
+
hsh.each do |k, v|
|
57
|
+
save_link(k, v) # saves to @contact_links
|
54
58
|
end
|
55
|
-
|
59
|
+
delete_failures(@contact_links)
|
60
|
+
puts "#{@contact_links}".cyan # same as @contact_links
|
56
61
|
else
|
57
|
-
|
58
|
-
return
|
62
|
+
fail ArgumentError, "expected hash but got #{hsh.class}"
|
59
63
|
end
|
64
|
+
Gimme::Search::POC.new(url, @contact_links)
|
65
|
+
else
|
66
|
+
LogMessages.nothing_to_save
|
67
|
+
return
|
60
68
|
end
|
61
69
|
end
|
62
70
|
end
|
@@ -0,0 +1,329 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'logger'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'tmpdir'
|
5
|
+
require 'webrick'
|
6
|
+
require 'zlib'
|
7
|
+
|
8
|
+
require 'rubygems'
|
9
|
+
|
10
|
+
begin
|
11
|
+
gem 'minitest'
|
12
|
+
rescue Gem::LoadError
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# Source:
|
17
|
+
#
|
18
|
+
# http://bit.ly/1Pt2KAd
|
19
|
+
# --------------------------------------------------------------
|
20
|
+
|
21
|
+
##
|
22
|
+
# A generic test case for testing mechanize. Using a subclass of
|
23
|
+
# Mechanize::TestCase for your tests will create an isolated mechanize
|
24
|
+
# instance that won't pollute your filesystem or other tests.
|
25
|
+
#
|
26
|
+
# Once Mechanize::TestCase is loaded no HTTP requests will be made outside
|
27
|
+
# mechanize itself. All requests are handled via WEBrick servlets.
|
28
|
+
#
|
29
|
+
# Mechanize uses WEBrick servlets to test some functionality. You can run
|
30
|
+
# other HTTP clients against the servlets using:
|
31
|
+
#
|
32
|
+
# ruby -rmechanize/test_case/server -e0
|
33
|
+
#
|
34
|
+
# Which will launch a test server at http://localhost:8000
|
35
|
+
|
36
|
+
class Mechanize::TestCase < Minitest::Test
|
37
|
+
|
38
|
+
TEST_DIR = File.expand_path '../../../test', __FILE__
|
39
|
+
REQUESTS = []
|
40
|
+
|
41
|
+
##
|
42
|
+
# Creates a clean mechanize instance +@mech+ for use in tests.
|
43
|
+
|
44
|
+
def setup
|
45
|
+
super
|
46
|
+
|
47
|
+
REQUESTS.clear
|
48
|
+
@mech = Mechanize.new
|
49
|
+
@ssl_private_key = nil
|
50
|
+
@ssl_certificate = nil
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Creates a fake page with URI http://fake.example and an empty, submittable
|
55
|
+
# form.
|
56
|
+
|
57
|
+
def fake_page agent = @mech
|
58
|
+
uri = URI 'http://fake.example/'
|
59
|
+
html = <<-END
|
60
|
+
<html>
|
61
|
+
<body>
|
62
|
+
<form><input type="submit" value="submit" /></form>
|
63
|
+
</body>
|
64
|
+
</html>
|
65
|
+
END
|
66
|
+
|
67
|
+
Mechanize::Page.new uri, nil, html, 200, agent
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Is the Encoding constant defined?
|
72
|
+
|
73
|
+
def have_encoding?
|
74
|
+
Object.const_defined? :Encoding
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# Creates a Mechanize::Page with the given +body+
|
79
|
+
|
80
|
+
def html_page body
|
81
|
+
uri = URI 'http://example/'
|
82
|
+
Mechanize::Page.new uri, nil, body, 200, @mech
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Creates a Mechanize::CookieJar by parsing the given +str+
|
87
|
+
|
88
|
+
def cookie_jar str, uri = URI('http://example')
|
89
|
+
jar = Mechanize::CookieJar.new
|
90
|
+
|
91
|
+
jar.parse str, uri
|
92
|
+
|
93
|
+
jar
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Runs the block inside a temporary directory
|
98
|
+
|
99
|
+
def in_tmpdir
|
100
|
+
Dir.mktmpdir do |dir|
|
101
|
+
Dir.chdir dir do
|
102
|
+
yield
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Creates a Nokogiri Node +element+ with the given +attributes+
|
109
|
+
|
110
|
+
def node element, attributes = {}
|
111
|
+
doc = Nokogiri::HTML::Document.new
|
112
|
+
|
113
|
+
node = Nokogiri::XML::Node.new element, doc
|
114
|
+
|
115
|
+
attributes.each do |name, value|
|
116
|
+
node[name] = value
|
117
|
+
end
|
118
|
+
|
119
|
+
node
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Creates a Mechanize::Page for the given +uri+ with the given
|
124
|
+
# +content_type+, response +body+ and HTTP status +code+
|
125
|
+
|
126
|
+
def page uri, content_type = 'text/html', body = '', code = 200
|
127
|
+
uri = URI uri unless URI::Generic === uri
|
128
|
+
|
129
|
+
Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
|
130
|
+
@mech)
|
131
|
+
end
|
132
|
+
|
133
|
+
##
|
134
|
+
# Requests made during this tests
|
135
|
+
|
136
|
+
def requests
|
137
|
+
REQUESTS
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# An SSL private key. This key is the same across all test runs
|
142
|
+
|
143
|
+
def ssl_private_key
|
144
|
+
@ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
|
145
|
+
-----BEGIN RSA PRIVATE KEY-----
|
146
|
+
MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
|
147
|
+
bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
|
148
|
+
p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
|
149
|
+
6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
|
150
|
+
-----END RSA PRIVATE KEY-----
|
151
|
+
KEY
|
152
|
+
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# An X509 certificate. This certificate is the same across all test runs
|
156
|
+
|
157
|
+
def ssl_certificate
|
158
|
+
@ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
|
159
|
+
-----BEGIN CERTIFICATE-----
|
160
|
+
MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
|
161
|
+
eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
|
162
|
+
OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
|
163
|
+
B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
|
164
|
+
4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
|
165
|
+
UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
|
166
|
+
//8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
|
167
|
+
-----END CERTIFICATE-----
|
168
|
+
CERT
|
169
|
+
end
|
170
|
+
|
171
|
+
##
|
172
|
+
# Creates a Tempfile with +content+ that is immediately unlinked
|
173
|
+
|
174
|
+
def tempfile content
|
175
|
+
body_io = Tempfile.new @NAME
|
176
|
+
body_io.unlink
|
177
|
+
body_io.write content
|
178
|
+
body_io.flush
|
179
|
+
body_io.rewind
|
180
|
+
|
181
|
+
body_io
|
182
|
+
end
|
183
|
+
|
184
|
+
end
|
185
|
+
|
186
|
+
require 'mechanize/test_case/servlets'
|
187
|
+
|
188
|
+
module Net # :nodoc:
|
189
|
+
end
|
190
|
+
|
191
|
+
class Net::HTTP # :nodoc:
|
192
|
+
alias :old_do_start :do_start
|
193
|
+
|
194
|
+
def do_start
|
195
|
+
@started = true
|
196
|
+
end
|
197
|
+
|
198
|
+
PAGE_CACHE = {}
|
199
|
+
|
200
|
+
alias :old_request :request
|
201
|
+
|
202
|
+
def request(req, *data, &block)
|
203
|
+
url = URI.parse(req.path)
|
204
|
+
path = WEBrick::HTTPUtils.unescape(url.path)
|
205
|
+
|
206
|
+
path = '/index.html' if path == '/'
|
207
|
+
|
208
|
+
res = ::Response.new
|
209
|
+
res.query_params = url.query
|
210
|
+
|
211
|
+
req.query = if 'POST' != req.method && url.query then
|
212
|
+
WEBrick::HTTPUtils.parse_query url.query
|
213
|
+
elsif req['content-type'] =~ /www-form-urlencoded/ then
|
214
|
+
WEBrick::HTTPUtils.parse_query req.body
|
215
|
+
elsif req['content-type'] =~ /boundary=(.+)/ then
|
216
|
+
boundary = WEBrick::HTTPUtils.dequote $1
|
217
|
+
WEBrick::HTTPUtils.parse_form_data req.body, boundary
|
218
|
+
else
|
219
|
+
{}
|
220
|
+
end
|
221
|
+
|
222
|
+
req.cookies = WEBrick::Cookie.parse(req['Cookie'])
|
223
|
+
|
224
|
+
Mechanize::TestCase::REQUESTS << req
|
225
|
+
|
226
|
+
if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path]
|
227
|
+
servlet = servlet_klass.new({})
|
228
|
+
servlet.send "do_#{req.method}", req, res
|
229
|
+
else
|
230
|
+
filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
|
231
|
+
unless PAGE_CACHE[filename]
|
232
|
+
open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
|
233
|
+
PAGE_CACHE[filename] = io.read
|
234
|
+
}
|
235
|
+
end
|
236
|
+
|
237
|
+
res.body = PAGE_CACHE[filename]
|
238
|
+
case filename
|
239
|
+
when /\.txt$/
|
240
|
+
res['Content-Type'] = 'text/plain'
|
241
|
+
when /\.jpg$/
|
242
|
+
res['Content-Type'] = 'image/jpeg'
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
res['Content-Type'] ||= 'text/html'
|
247
|
+
res.code ||= "200"
|
248
|
+
|
249
|
+
response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
|
250
|
+
response = response_klass.new res.http_version, res.code, res.message
|
251
|
+
|
252
|
+
res.header.each do |k,v|
|
253
|
+
v = v.first if v.length == 1
|
254
|
+
response[k] = v
|
255
|
+
end
|
256
|
+
|
257
|
+
res.cookies.each do |cookie|
|
258
|
+
response.add_field 'Set-Cookie', cookie.to_s
|
259
|
+
end
|
260
|
+
|
261
|
+
response['Content-Type'] ||= 'text/html'
|
262
|
+
response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
|
263
|
+
|
264
|
+
io = StringIO.new(res.body)
|
265
|
+
response.instance_variable_set :@socket, io
|
266
|
+
def io.read clen, dest = nil, _ = nil
|
267
|
+
if dest then
|
268
|
+
dest << super(clen)
|
269
|
+
else
|
270
|
+
super clen
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
body_exist = req.response_body_permitted? &&
|
275
|
+
response_klass.body_permitted?
|
276
|
+
|
277
|
+
response.instance_variable_set :@body_exist, body_exist
|
278
|
+
|
279
|
+
yield response if block_given?
|
280
|
+
|
281
|
+
response
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
class Net::HTTPRequest # :nodoc:
|
286
|
+
attr_accessor :query, :body, :cookies, :user
|
287
|
+
|
288
|
+
def host
|
289
|
+
'example'
|
290
|
+
end
|
291
|
+
|
292
|
+
def port
|
293
|
+
80
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
class Response # :nodoc:
|
298
|
+
include Net::HTTPHeader
|
299
|
+
|
300
|
+
attr_reader :code
|
301
|
+
attr_accessor :body, :query, :cookies
|
302
|
+
attr_accessor :query_params, :http_version
|
303
|
+
attr_accessor :header
|
304
|
+
|
305
|
+
def code=(c)
|
306
|
+
@code = c.to_s
|
307
|
+
end
|
308
|
+
|
309
|
+
alias :status :code
|
310
|
+
alias :status= :code=
|
311
|
+
|
312
|
+
def initialize
|
313
|
+
@header = {}
|
314
|
+
@body = ''
|
315
|
+
@code = nil
|
316
|
+
@query = nil
|
317
|
+
@cookies = []
|
318
|
+
@http_version = '1.1'
|
319
|
+
end
|
320
|
+
|
321
|
+
def read_body
|
322
|
+
yield body
|
323
|
+
end
|
324
|
+
|
325
|
+
def message
|
326
|
+
''
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
data/lib/gimme_poc/version.rb
CHANGED
data/lib/gimme_poc/web.rb
CHANGED
@@ -1,91 +1,97 @@
|
|
1
1
|
# Find the contact
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
2
|
+
module Web
|
3
|
+
attr_accessor :page, :agent, :url
|
4
|
+
|
5
|
+
# Captures http:// and https://
|
6
|
+
HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
|
7
|
+
|
8
|
+
##
|
9
|
+
# Go to a page using Mechanize.
|
10
|
+
# Sleep for a split second to not overload any servers.
|
11
|
+
#
|
12
|
+
# Returns nil if bad url is given.
|
13
|
+
def get(str)
|
14
|
+
prepare_get_request(str)
|
15
|
+
@page = @agent.get(@url)
|
16
|
+
rescue Exception => e
|
17
|
+
LogMessages.warn_err(e)
|
18
|
+
end
|
19
|
+
|
20
|
+
def prepare_get_request(str)
|
21
|
+
mech_setup
|
22
|
+
@url = format_url(str)
|
23
|
+
LogMessages.sending_get_request(url)
|
24
|
+
sleep(0.1)
|
25
|
+
end
|
20
26
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
puts "#{'Connection Timeout:'.red} #{e}"
|
29
|
-
rescue Net::HTTP::Persistent::Error
|
30
|
-
puts "#{'Connection Timeout:'.red} read timeout, too many resets."
|
27
|
+
def mech_setup
|
28
|
+
@agent = Mechanize.new do |a|
|
29
|
+
a.user_agent_alias = 'Mac Safari'
|
30
|
+
a.open_timeout = 7
|
31
|
+
a.read_timeout = 7
|
32
|
+
a.idle_timeout = 7
|
33
|
+
a.redirect_ok = true
|
31
34
|
end
|
35
|
+
end
|
32
36
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
37
|
+
##
|
38
|
+
# Mechanize needs absolute urls to work.
|
39
|
+
# If http:// or https:// isn't present, append http://.
|
40
|
+
def format_url(str)
|
41
|
+
LazyDomain.autohttp(str)
|
42
|
+
end
|
39
43
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
# Used for subdomain check. Not a permanent change to url variable.
|
45
|
+
def unformat_url(str)
|
46
|
+
str.gsub(HTTP_REGEX, '')
|
47
|
+
end
|
44
48
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
49
|
+
##
|
50
|
+
# Outputs domain of a url. Useful if subdomains are given to GimmePOC
|
51
|
+
# and they don't work.
|
52
|
+
#
|
53
|
+
# For example:
|
54
|
+
# Given http://maps.google.com, returns 'google.com'.
|
55
|
+
def orig_domain(str)
|
56
|
+
LazyDomain.parse(str).domain
|
57
|
+
rescue PublicSuffix::DomainInvalid => err
|
58
|
+
LogMessages.invalid_domain(err)
|
59
|
+
end
|
56
60
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
61
|
+
##
|
62
|
+
# Used in case of relative paths. Merging guarantees correct url.
|
63
|
+
# This needs a url string as argument to work.
|
64
|
+
# Produces a merged uri string.
|
65
|
+
def merged_link(url_str)
|
66
|
+
@page.uri.merge(url_str).to_s
|
67
|
+
end
|
64
68
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
##
|
70
|
+
# Expects relative paths and merges everything.
|
71
|
+
# Returns a string. If there's nothing, return nil.
|
72
|
+
#
|
73
|
+
# Add \b word block to ensure whole word is searched.
|
74
|
+
def link_with_href(str)
|
75
|
+
merged_link(@page.link_with(href: /\b#{str}/).uri.to_s)
|
76
|
+
rescue
|
77
|
+
nil
|
78
|
+
end
|
75
79
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
+
# Boolean, returns true if url is not identical to original domain.
|
81
|
+
#
|
82
|
+
# In the event that the url has a path, this splits everything on forward
|
83
|
+
# slash and selects far left item.
|
84
|
+
def subdomain?(str)
|
85
|
+
(unformat_url(str).split('/')[0] != orig_domain(str))
|
86
|
+
end
|
80
87
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
end
|
88
|
+
# TODO: Sometimes DNS will do a redirect and not give a 404.
|
89
|
+
# Need to prevent redirects.
|
90
|
+
#
|
91
|
+
# Blindly tests to see if a url goes through. If there is a 404 error,
|
92
|
+
# this will return nil.
|
93
|
+
def blind_test(url)
|
94
|
+
LogMessages.blind_testing(url)
|
95
|
+
get(url)
|
90
96
|
end
|
91
97
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimme_poc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -44,28 +44,56 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.0.
|
47
|
+
version: 0.0.2
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.0.
|
54
|
+
version: 0.0.2
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: shoulda
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '3.
|
61
|
+
version: '3.5'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '3.
|
68
|
+
version: '3.5'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: shoulda-context
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.2'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.2'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: minitest-reporters
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.1'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.1'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: pry
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,9 +134,12 @@ files:
|
|
106
134
|
- Rakefile
|
107
135
|
- lib/gimme_poc.rb
|
108
136
|
- lib/gimme_poc/contactpage.rb
|
137
|
+
- lib/gimme_poc/logger.rb
|
138
|
+
- lib/gimme_poc/logger/messages.rb
|
109
139
|
- lib/gimme_poc/poc.rb
|
110
140
|
- lib/gimme_poc/questions.rb
|
111
141
|
- lib/gimme_poc/save.rb
|
142
|
+
- lib/gimme_poc/test_case.rb
|
112
143
|
- lib/gimme_poc/version.rb
|
113
144
|
- lib/gimme_poc/web.rb
|
114
145
|
homepage: http://github.com/m8ss/gimme_poc
|
@@ -131,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
162
|
version: '0'
|
132
163
|
requirements: []
|
133
164
|
rubyforge_project:
|
134
|
-
rubygems_version: 2.
|
165
|
+
rubygems_version: 2.5.1
|
135
166
|
signing_key:
|
136
167
|
specification_version: 4
|
137
168
|
summary: Get a point of contact. Given a url or array of urls, extracts social media
|