gimme_poc 0.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -31
- data/Rakefile +15 -0
- data/lib/gimme_poc.rb +32 -25
- data/lib/gimme_poc/contactpage.rb +47 -48
- data/lib/gimme_poc/logger.rb +16 -0
- data/lib/gimme_poc/logger/messages.rb +77 -0
- data/lib/gimme_poc/poc.rb +6 -5
- data/lib/gimme_poc/questions.rb +23 -29
- data/lib/gimme_poc/save.rb +60 -52
- data/lib/gimme_poc/test_case.rb +329 -0
- data/lib/gimme_poc/version.rb +1 -1
- data/lib/gimme_poc/web.rb +85 -79
- metadata +39 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 84e5aa1c8960ade9b3f438008c7308856e7cbc30
|
4
|
+
data.tar.gz: e7d75a282e2d644ea9c0a24c466cd8f002013477
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed1704cd7a334ea8ba478cb01b487b8e54b2c16e1ad3e99db1c381797e413c43f1058d2a2c7a6b47212c7342ced310f2f1f4338a7153f4a84c06e5d7d64f90f4
|
7
|
+
data.tar.gz: 84191e49f72d95da75a46453b001aa29a74b1f128a1d6bc24687e3a9afda24df67055648b93841092d0a1fdfea887249f83f80d979d8c4a77586f4340b734458
|
data/README.md
CHANGED
@@ -10,21 +10,19 @@ Gimme POC simply looks for a contact page and extracts social media contact info
|
|
10
10
|
## Installation
|
11
11
|
|
12
12
|
```
|
13
|
-
gem install gimme_poc
|
13
|
+
$ gem install gimme_poc
|
14
14
|
|
15
15
|
```
|
16
16
|
|
17
17
|
## Set Up
|
18
18
|
|
19
19
|
```ruby
|
20
|
-
require 'gimme_poc'
|
20
|
+
require 'gimme_poc'
|
21
21
|
|
22
22
|
```
|
23
23
|
|
24
24
|
## How it works
|
25
25
|
|
26
|
-
Gimme POC is easy to use! Simply run this command.
|
27
|
-
|
28
26
|
```ruby
|
29
27
|
|
30
28
|
Gimme.poc 'http://example.com'
|
@@ -56,30 +54,3 @@ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com'])
|
|
56
54
|
|
57
55
|
```
|
58
56
|
|
59
|
-
## Referencing the search results
|
60
|
-
|
61
|
-
To use your search results, simply run:
|
62
|
-
|
63
|
-
```ruby
|
64
|
-
|
65
|
-
Gimme.memory
|
66
|
-
|
67
|
-
```
|
68
|
-
|
69
|
-
## Clearing the search results
|
70
|
-
|
71
|
-
To clear search results and start afresh, run:
|
72
|
-
|
73
|
-
```ruby
|
74
|
-
|
75
|
-
Gimme.reset!
|
76
|
-
|
77
|
-
```
|
78
|
-
|
79
|
-
## To do:
|
80
|
-
|
81
|
-
- Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
|
82
|
-
- Work on false positives of bad urls. (Bad urls should be skipped + DNS redirects don't give 404 errors)
|
83
|
-
|
84
|
-
|
85
|
-
More to follow...
|
data/Rakefile
CHANGED
@@ -1,7 +1,22 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'rake'
|
3
|
+
require 'rake/testtask'
|
4
|
+
|
5
|
+
Rake::TestTask.new(:test) do |test|
|
6
|
+
test.libs << 'lib' << 'test'
|
7
|
+
test.pattern = 'test/**/test*.rb'
|
8
|
+
test.verbose = true
|
9
|
+
end
|
3
10
|
|
4
11
|
desc 'Open console with gimme_poc loaded'
|
5
12
|
task :console do
|
6
13
|
exec 'pry -r ./lib/gimme_poc.rb'
|
7
14
|
end
|
15
|
+
|
16
|
+
desc 'make a release'
|
17
|
+
task :release do
|
18
|
+
exec './script/release'
|
19
|
+
end
|
20
|
+
|
21
|
+
task c: :console # alias 'c' for console
|
22
|
+
task default: :test
|
data/lib/gimme_poc.rb
CHANGED
@@ -3,6 +3,7 @@ require 'lazy_domain'
|
|
3
3
|
require 'mechanize'
|
4
4
|
require_relative './gimme_poc/contactpage'
|
5
5
|
require_relative './gimme_poc/poc'
|
6
|
+
require_relative './gimme_poc/logger'
|
6
7
|
require_relative './gimme_poc/questions'
|
7
8
|
require_relative './gimme_poc/save'
|
8
9
|
require_relative './gimme_poc/version'
|
@@ -11,13 +12,35 @@ require_relative './gimme_poc/web'
|
|
11
12
|
# Find the contact
|
12
13
|
module Gimme
|
13
14
|
class << self
|
15
|
+
include Web
|
16
|
+
include Questions
|
17
|
+
include Save
|
18
|
+
include ContactPage
|
19
|
+
|
14
20
|
attr_accessor :page, :contact, :contact_links, :url
|
21
|
+
attr_reader :status_code
|
15
22
|
|
16
|
-
|
17
|
-
|
23
|
+
def start_url_process(url)
|
24
|
+
LogMessages.start_url(url)
|
25
|
+
case
|
26
|
+
when LazyDomain.valid?(url) == false
|
27
|
+
LogMessages.invalid_domain(url)
|
28
|
+
@status_code = 0
|
29
|
+
when subdomain?(url)
|
30
|
+
LogMessages.subdomain
|
31
|
+
@status_code = 0 if get(url).nil? && get(orig_domain(url)).nil?
|
32
|
+
else
|
33
|
+
@status_code = 0 if get(url).nil?
|
34
|
+
end
|
35
|
+
end
|
18
36
|
|
19
|
-
|
20
|
-
|
37
|
+
def start_contact_process(url)
|
38
|
+
start_contact_links
|
39
|
+
attempt = save_available_contacts(url)
|
40
|
+
info = attempt.info if attempt && attempt.respond_to?(:info)
|
41
|
+
return attempt unless info.nil? || info.empty?
|
42
|
+
go_to_contact_page(url)
|
43
|
+
end
|
21
44
|
|
22
45
|
##
|
23
46
|
# The main method!
|
@@ -25,29 +48,13 @@ module Gimme
|
|
25
48
|
# If url is bad, it's converted to nil in 'get' method and skipped over.
|
26
49
|
def poc(arr)
|
27
50
|
arr = arr.split unless arr.is_a?(Array)
|
51
|
+
results = []
|
28
52
|
arr.each do |url|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
|
33
|
-
next
|
34
|
-
end
|
35
|
-
case
|
36
|
-
when subdomain?(url)
|
37
|
-
puts '(This url is a subdomain. Will try both sub and root domain.)'
|
38
|
-
next if get(url).nil? && get(orig_domain(url)).nil?
|
39
|
-
else
|
40
|
-
next if get(url).nil?
|
41
|
-
end
|
42
|
-
start_contact_links
|
43
|
-
mechpage = go_to_contact_page(url)
|
44
|
-
if mechpage.nil?
|
45
|
-
puts '(empty page, exiting.)'
|
46
|
-
else
|
47
|
-
save_available_contacts(mechpage.uri.to_s)
|
48
|
-
end
|
53
|
+
start_url_process(url)
|
54
|
+
next if @status_code == 0
|
55
|
+
results << start_contact_process(url)
|
49
56
|
end
|
50
|
-
|
57
|
+
results.length == 1 ? results.first : results
|
51
58
|
end
|
52
59
|
|
53
60
|
# Convenience method.
|
@@ -1,56 +1,55 @@
|
|
1
1
|
# Find the contact
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
##
|
14
|
-
# Looks for contact page. Gets page if available.
|
15
|
-
# If no contact link is available, it will blind test '../contact'.
|
16
|
-
# Returns nil if nothing can be found.
|
17
|
-
def contact_page(url)
|
18
|
-
puts 'now looking for contact pages'
|
19
|
-
contact_link = link_with_href(/contact|Contact/)
|
20
|
-
contact_test_page = merged_link('../contact')
|
2
|
+
module ContactPage
|
3
|
+
attr_accessor :contact_link
|
4
|
+
|
5
|
+
##
|
6
|
+
# Scans for contact page. If it doesn't work on the first try,
|
7
|
+
# It will look for english versions and try again. Processes left to right.
|
8
|
+
#
|
9
|
+
# Returns nil if no contact page can be found.
|
10
|
+
def go_to_contact_page(url)
|
11
|
+
contact_page(url) || english_contact_page(url)
|
12
|
+
end
|
21
13
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
14
|
+
##
|
15
|
+
# Looks for contact page. Gets page if available.
|
16
|
+
# If no contact link is available, it will blind test '../contact'.
|
17
|
+
# Returns nil if nothing can be found.
|
18
|
+
def contact_page(url)
|
19
|
+
LogMessages.looking_for_contact_page
|
20
|
+
@contact_link = link_with_href(/contact|Contact/)
|
21
|
+
contact_test_page = merged_link('../contact')
|
22
|
+
case
|
23
|
+
when !contact_link.nil?
|
24
|
+
LogMessages.found_contact_link
|
25
|
+
get(merged_link(@contact_link))
|
26
|
+
else
|
27
|
+
LogMessages.no_contact_link
|
28
|
+
get(orig_domain(url)) if blind_test(contact_test_page).nil?
|
30
29
|
end
|
30
|
+
end
|
31
31
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
32
|
+
##
|
33
|
+
# Looks for english page. Gets page if available then looks for
|
34
|
+
# english contact page.
|
35
|
+
#
|
36
|
+
# If no english link is available,
|
37
|
+
# it will blind test '../en' and '../english'.
|
38
|
+
# Returns nil if nothing can be found.
|
39
|
+
def english_contact_page(url)
|
40
|
+
LogMessages.looking_for_english_page
|
41
|
+
english_link = @page.link_with(href: %r{en\/|english|English})
|
42
|
+
test_en_page = merged_link('../en')
|
43
|
+
test_english_page = merged_link('../english')
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
end
|
45
|
+
case
|
46
|
+
when !english_link.nil?
|
47
|
+
LogMessages.found_english_link
|
48
|
+
get(merged_link(english_link.uri))
|
49
|
+
else
|
50
|
+
blind_test(test_en_page) || blind_test(test_english_page)
|
51
|
+
LogMessages.restarting
|
52
|
+
contact_page(url)
|
54
53
|
end
|
55
54
|
end
|
56
55
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require_relative './logger/messages'
|
3
|
+
|
4
|
+
# Output info messages during gimme poc crawl.
|
5
|
+
module Gimme
|
6
|
+
class << self
|
7
|
+
include LogMessages
|
8
|
+
attr_accessor :logger
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
Gimme.logger = Logger.new(STDOUT)
|
13
|
+
Gimme.logger.level = Logger::INFO
|
14
|
+
Gimme.logger.formatter = proc do |_severity, _datetime, _progname, msg|
|
15
|
+
"#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}\n"
|
16
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
|
2
|
+
module LogMessages
|
3
|
+
class << self
|
4
|
+
def loginfo(str)
|
5
|
+
Gimme.logger.info(str)
|
6
|
+
end
|
7
|
+
|
8
|
+
def logwarn(str)
|
9
|
+
Gimme.logger.info(str)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Info
|
13
|
+
# -----------------------------------------------------------------
|
14
|
+
def start_url(url)
|
15
|
+
puts '-' * 50
|
16
|
+
loginfo "starting: #{url}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def sending_get_request(url)
|
20
|
+
loginfo("sending GET request to: #{url}")
|
21
|
+
end
|
22
|
+
|
23
|
+
def blind_testing(url)
|
24
|
+
loginfo("blind testing: #{url}")
|
25
|
+
end
|
26
|
+
|
27
|
+
def invalid_domain(url)
|
28
|
+
loginfo("#{'Invalid Domain:'.red} `#{url}' is not a valid domain")
|
29
|
+
end
|
30
|
+
|
31
|
+
def subdomain
|
32
|
+
loginfo '(This url is a subdomain. Will try both sub and root domain.)'
|
33
|
+
end
|
34
|
+
|
35
|
+
def empty_page
|
36
|
+
loginfo '(empty page, exiting.)'
|
37
|
+
end
|
38
|
+
|
39
|
+
def looking_for_contact_page
|
40
|
+
loginfo('now looking for contact pages')
|
41
|
+
end
|
42
|
+
|
43
|
+
def found_contact_link
|
44
|
+
loginfo("#{'Success:'.green} Found contact link!")
|
45
|
+
end
|
46
|
+
|
47
|
+
def looking_for_english_page
|
48
|
+
loginfo('Looking for english page...')
|
49
|
+
end
|
50
|
+
|
51
|
+
def found_english_link
|
52
|
+
loginfo("#{'Success:'.green} found english link!")
|
53
|
+
end
|
54
|
+
|
55
|
+
def saving_contact_info(url)
|
56
|
+
loginfo("saving available contact information from #{url}")
|
57
|
+
end
|
58
|
+
|
59
|
+
# Warnings
|
60
|
+
# -----------------------------------------------------------------
|
61
|
+
def no_contact_link
|
62
|
+
logwarn("#{'Warning:'.yellow} couldn't find contact link")
|
63
|
+
end
|
64
|
+
|
65
|
+
def restarting
|
66
|
+
logwarn('restarting'.yellow)
|
67
|
+
end
|
68
|
+
|
69
|
+
def nothing_to_save
|
70
|
+
logwarn '(nothing to save)'
|
71
|
+
end
|
72
|
+
|
73
|
+
def warn_err(error)
|
74
|
+
logwarn("#{'Error:'.red} #{error}")
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/gimme_poc/poc.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
+
require "ostruct"
|
2
|
+
|
1
3
|
module Gimme
|
2
4
|
# Collection of sites searched.
|
3
5
|
class Search
|
4
|
-
|
6
|
+
attr_accessor :all_sites
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
+
def initialize
|
9
|
+
@all_sites = []
|
8
10
|
end
|
9
11
|
|
10
12
|
# Each site is saved to this class
|
@@ -13,8 +15,7 @@ module Gimme
|
|
13
15
|
|
14
16
|
def initialize(url, contact_info_hsh)
|
15
17
|
@host = url
|
16
|
-
@info = contact_info_hsh
|
17
|
-
Search.all_sites << self
|
18
|
+
@info = OpenStruct.new(contact_info_hsh)
|
18
19
|
end
|
19
20
|
end
|
20
21
|
end
|
data/lib/gimme_poc/questions.rb
CHANGED
@@ -1,33 +1,27 @@
|
|
1
|
-
#
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
# Boolean, returns true if email is present.
|
12
|
-
def email_available?
|
13
|
-
!link_with_href('mailto').nil?
|
14
|
-
end
|
1
|
+
# Reflective questions for situational awareness.
|
2
|
+
module Questions
|
3
|
+
# Simple regex that looks for ###.#### or ###-####
|
4
|
+
PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
|
5
|
+
|
6
|
+
# Boolean, returns true if email is present.
|
7
|
+
def email_available?
|
8
|
+
!link_with_href('mailto').nil?
|
9
|
+
end
|
15
10
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
# Boolean, returns true if phone number is present.
|
12
|
+
def phone_available?
|
13
|
+
!(@page.body =~ PHONE_REGEX).nil?
|
14
|
+
end
|
20
15
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
16
|
+
##
|
17
|
+
# TODO: build better conditional to prevent false positives.
|
18
|
+
# There could be other forms like newsletter signup, etc.
|
19
|
+
#
|
20
|
+
# If there is a form with more than one field, this returns true.
|
21
|
+
# Forms with one field are typically search boxes.
|
22
|
+
#
|
23
|
+
# Boolean, returns true if form is present on page.
|
24
|
+
def contactform_available?
|
25
|
+
!(@page.forms.select { |x| x.fields.length > 1 }.empty?)
|
32
26
|
end
|
33
27
|
end
|
data/lib/gimme_poc/save.rb
CHANGED
@@ -1,62 +1,70 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
def scan_for_contacts
|
10
|
-
{
|
11
|
-
contactpage: link_with_href('contact'),
|
12
|
-
email_present: "#{email_available?}",
|
13
|
-
phone_present: "#{phone_available?}",
|
14
|
-
contact_form: "#{contactform_available?}",
|
15
|
-
facebook: link_with_href('facebook\.'),
|
16
|
-
twitter: link_with_href('twitter\.'),
|
17
|
-
youtube: link_with_href('youtube\.'),
|
18
|
-
googleplus: link_with_href('plus\.google\.'),
|
19
|
-
linkedin: link_with_href('linkedin\.')
|
20
|
-
}
|
21
|
-
end
|
1
|
+
module Save
|
2
|
+
##
|
3
|
+
# Boolean, returns true if anything is present
|
4
|
+
# after running scan_for_contacts and deleting failures.
|
5
|
+
# Remember that false is a string in the hash
|
6
|
+
def something_to_save?(hsh)
|
7
|
+
hsh.reject! { |k, v| v.nil? || v == 'false' }.any?
|
8
|
+
end
|
22
9
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
10
|
+
##
|
11
|
+
# Returns anything that is possible to save, otherwise returns nil.
|
12
|
+
# Booleans for phone, email, or contact form will display True or False.
|
13
|
+
#
|
14
|
+
# Add periods to link hrefs to prevent false positives. Must escape periods
|
15
|
+
# with a backslash or else it will be a regex wild card.
|
16
|
+
def scan_for_contacts
|
17
|
+
{
|
18
|
+
contactpage: link_with_href('contact'),
|
19
|
+
email_present: "#{email_available?}",
|
20
|
+
phone_present: "#{phone_available?}",
|
21
|
+
contact_form: "#{contactform_available?}",
|
22
|
+
facebook: link_with_href('facebook\.'),
|
23
|
+
twitter: link_with_href('twitter\.'),
|
24
|
+
youtube: link_with_href('youtube\.'),
|
25
|
+
googleplus: link_with_href('plus\.google\.'),
|
26
|
+
linkedin: link_with_href('linkedin\.')
|
27
|
+
}
|
28
|
+
rescue => e
|
29
|
+
puts "Error: #{e}"
|
30
|
+
end
|
27
31
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
end
|
32
|
+
# Starts/Restarts @contacts_links hash
|
33
|
+
def start_contact_links
|
34
|
+
@contact_links = {}
|
35
|
+
end
|
33
36
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
37
|
+
# Used in save_available_contacts to save each valid link.
|
38
|
+
def save_link(key, url)
|
39
|
+
return if key.nil? || url.nil?
|
40
|
+
@contact_links[key] = url
|
41
|
+
end
|
42
|
+
|
43
|
+
##
|
44
|
+
# Remove negatives from the contacts hash.
|
45
|
+
# Deletes a key value pair with a value of either nil or false.
|
46
|
+
# Remember that false is a stored in hash as a string.
|
47
|
+
def delete_failures(hsh)
|
48
|
+
hsh.delete_if { |_k, v| v.nil? || v == 'false' }
|
49
|
+
end
|
41
50
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
-
delete_failures(@contact_links)
|
51
|
-
puts "#{@contact_links}".cyan # same as @contact_links
|
52
|
-
else
|
53
|
-
fail ArgumentError, "expected hash but got #{hsh.class}"
|
51
|
+
# Saves any available contact info to @contact_links.
|
52
|
+
def save_available_contacts(url, hsh = scan_for_contacts)
|
53
|
+
if something_to_save?(hsh)
|
54
|
+
LogMessages.saving_contact_info(url)
|
55
|
+
if hsh.is_a?(Hash)
|
56
|
+
hsh.each do |k, v|
|
57
|
+
save_link(k, v) # saves to @contact_links
|
54
58
|
end
|
55
|
-
|
59
|
+
delete_failures(@contact_links)
|
60
|
+
puts "#{@contact_links}".cyan # same as @contact_links
|
56
61
|
else
|
57
|
-
|
58
|
-
return
|
62
|
+
fail ArgumentError, "expected hash but got #{hsh.class}"
|
59
63
|
end
|
64
|
+
Gimme::Search::POC.new(url, @contact_links)
|
65
|
+
else
|
66
|
+
LogMessages.nothing_to_save
|
67
|
+
return
|
60
68
|
end
|
61
69
|
end
|
62
70
|
end
|
@@ -0,0 +1,329 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'logger'
|
3
|
+
require 'tempfile'
|
4
|
+
require 'tmpdir'
|
5
|
+
require 'webrick'
|
6
|
+
require 'zlib'
|
7
|
+
|
8
|
+
require 'rubygems'
|
9
|
+
|
10
|
+
begin
|
11
|
+
gem 'minitest'
|
12
|
+
rescue Gem::LoadError
|
13
|
+
end
|
14
|
+
|
15
|
+
##
|
16
|
+
# Source:
|
17
|
+
#
|
18
|
+
# http://bit.ly/1Pt2KAd
|
19
|
+
# --------------------------------------------------------------
|
20
|
+
|
21
|
+
##
|
22
|
+
# A generic test case for testing mechanize. Using a subclass of
|
23
|
+
# Mechanize::TestCase for your tests will create an isolated mechanize
|
24
|
+
# instance that won't pollute your filesystem or other tests.
|
25
|
+
#
|
26
|
+
# Once Mechanize::TestCase is loaded no HTTP requests will be made outside
|
27
|
+
# mechanize itself. All requests are handled via WEBrick servlets.
|
28
|
+
#
|
29
|
+
# Mechanize uses WEBrick servlets to test some functionality. You can run
|
30
|
+
# other HTTP clients against the servlets using:
|
31
|
+
#
|
32
|
+
# ruby -rmechanize/test_case/server -e0
|
33
|
+
#
|
34
|
+
# Which will launch a test server at http://localhost:8000
|
35
|
+
|
36
|
+
class Mechanize::TestCase < Minitest::Test
|
37
|
+
|
38
|
+
TEST_DIR = File.expand_path '../../../test', __FILE__
|
39
|
+
REQUESTS = []
|
40
|
+
|
41
|
+
##
|
42
|
+
# Creates a clean mechanize instance +@mech+ for use in tests.
|
43
|
+
|
44
|
+
def setup
|
45
|
+
super
|
46
|
+
|
47
|
+
REQUESTS.clear
|
48
|
+
@mech = Mechanize.new
|
49
|
+
@ssl_private_key = nil
|
50
|
+
@ssl_certificate = nil
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Creates a fake page with URI http://fake.example and an empty, submittable
|
55
|
+
# form.
|
56
|
+
|
57
|
+
def fake_page agent = @mech
|
58
|
+
uri = URI 'http://fake.example/'
|
59
|
+
html = <<-END
|
60
|
+
<html>
|
61
|
+
<body>
|
62
|
+
<form><input type="submit" value="submit" /></form>
|
63
|
+
</body>
|
64
|
+
</html>
|
65
|
+
END
|
66
|
+
|
67
|
+
Mechanize::Page.new uri, nil, html, 200, agent
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Is the Encoding constant defined?
|
72
|
+
|
73
|
+
def have_encoding?
|
74
|
+
Object.const_defined? :Encoding
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# Creates a Mechanize::Page with the given +body+
|
79
|
+
|
80
|
+
def html_page body
|
81
|
+
uri = URI 'http://example/'
|
82
|
+
Mechanize::Page.new uri, nil, body, 200, @mech
|
83
|
+
end
|
84
|
+
|
85
|
+
##
|
86
|
+
# Creates a Mechanize::CookieJar by parsing the given +str+
|
87
|
+
|
88
|
+
def cookie_jar str, uri = URI('http://example')
|
89
|
+
jar = Mechanize::CookieJar.new
|
90
|
+
|
91
|
+
jar.parse str, uri
|
92
|
+
|
93
|
+
jar
|
94
|
+
end
|
95
|
+
|
96
|
+
##
|
97
|
+
# Runs the block inside a temporary directory
|
98
|
+
|
99
|
+
def in_tmpdir
|
100
|
+
Dir.mktmpdir do |dir|
|
101
|
+
Dir.chdir dir do
|
102
|
+
yield
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Creates a Nokogiri Node +element+ with the given +attributes+
|
109
|
+
|
110
|
+
def node element, attributes = {}
|
111
|
+
doc = Nokogiri::HTML::Document.new
|
112
|
+
|
113
|
+
node = Nokogiri::XML::Node.new element, doc
|
114
|
+
|
115
|
+
attributes.each do |name, value|
|
116
|
+
node[name] = value
|
117
|
+
end
|
118
|
+
|
119
|
+
node
|
120
|
+
end
|
121
|
+
|
122
|
+
##
|
123
|
+
# Creates a Mechanize::Page for the given +uri+ with the given
|
124
|
+
# +content_type+, response +body+ and HTTP status +code+
|
125
|
+
|
126
|
+
def page uri, content_type = 'text/html', body = '', code = 200
|
127
|
+
uri = URI uri unless URI::Generic === uri
|
128
|
+
|
129
|
+
Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
|
130
|
+
@mech)
|
131
|
+
end
|
132
|
+
|
133
|
+
##
|
134
|
+
# Requests made during this tests
|
135
|
+
|
136
|
+
def requests
|
137
|
+
REQUESTS
|
138
|
+
end
|
139
|
+
|
140
|
+
##
|
141
|
+
# An SSL private key. This key is the same across all test runs
|
142
|
+
|
143
|
+
def ssl_private_key
|
144
|
+
@ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
|
145
|
+
-----BEGIN RSA PRIVATE KEY-----
|
146
|
+
MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
|
147
|
+
bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
|
148
|
+
p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
|
149
|
+
6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
|
150
|
+
-----END RSA PRIVATE KEY-----
|
151
|
+
KEY
|
152
|
+
end
|
153
|
+
|
154
|
+
##
|
155
|
+
# An X509 certificate. This certificate is the same across all test runs
|
156
|
+
|
157
|
+
def ssl_certificate
|
158
|
+
@ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
|
159
|
+
-----BEGIN CERTIFICATE-----
|
160
|
+
MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
|
161
|
+
eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
|
162
|
+
OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
|
163
|
+
B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
|
164
|
+
4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
|
165
|
+
UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
|
166
|
+
//8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
|
167
|
+
-----END CERTIFICATE-----
|
168
|
+
CERT
|
169
|
+
end
|
170
|
+
|
171
|
+
##
|
172
|
+
# Creates a Tempfile with +content+ that is immediately unlinked
|
173
|
+
|
174
|
+
def tempfile content
|
175
|
+
body_io = Tempfile.new @NAME
|
176
|
+
body_io.unlink
|
177
|
+
body_io.write content
|
178
|
+
body_io.flush
|
179
|
+
body_io.rewind
|
180
|
+
|
181
|
+
body_io
|
182
|
+
end
|
183
|
+
|
184
|
+
end
|
185
|
+
|
186
|
+
require 'mechanize/test_case/servlets'
|
187
|
+
|
188
|
+
module Net # :nodoc:
|
189
|
+
end
|
190
|
+
|
191
|
+
class Net::HTTP # :nodoc:
|
192
|
+
alias :old_do_start :do_start
|
193
|
+
|
194
|
+
def do_start
|
195
|
+
@started = true
|
196
|
+
end
|
197
|
+
|
198
|
+
PAGE_CACHE = {}
|
199
|
+
|
200
|
+
alias :old_request :request
|
201
|
+
|
202
|
+
def request(req, *data, &block)
|
203
|
+
url = URI.parse(req.path)
|
204
|
+
path = WEBrick::HTTPUtils.unescape(url.path)
|
205
|
+
|
206
|
+
path = '/index.html' if path == '/'
|
207
|
+
|
208
|
+
res = ::Response.new
|
209
|
+
res.query_params = url.query
|
210
|
+
|
211
|
+
req.query = if 'POST' != req.method && url.query then
|
212
|
+
WEBrick::HTTPUtils.parse_query url.query
|
213
|
+
elsif req['content-type'] =~ /www-form-urlencoded/ then
|
214
|
+
WEBrick::HTTPUtils.parse_query req.body
|
215
|
+
elsif req['content-type'] =~ /boundary=(.+)/ then
|
216
|
+
boundary = WEBrick::HTTPUtils.dequote $1
|
217
|
+
WEBrick::HTTPUtils.parse_form_data req.body, boundary
|
218
|
+
else
|
219
|
+
{}
|
220
|
+
end
|
221
|
+
|
222
|
+
req.cookies = WEBrick::Cookie.parse(req['Cookie'])
|
223
|
+
|
224
|
+
Mechanize::TestCase::REQUESTS << req
|
225
|
+
|
226
|
+
if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path]
|
227
|
+
servlet = servlet_klass.new({})
|
228
|
+
servlet.send "do_#{req.method}", req, res
|
229
|
+
else
|
230
|
+
filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
|
231
|
+
unless PAGE_CACHE[filename]
|
232
|
+
open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
|
233
|
+
PAGE_CACHE[filename] = io.read
|
234
|
+
}
|
235
|
+
end
|
236
|
+
|
237
|
+
res.body = PAGE_CACHE[filename]
|
238
|
+
case filename
|
239
|
+
when /\.txt$/
|
240
|
+
res['Content-Type'] = 'text/plain'
|
241
|
+
when /\.jpg$/
|
242
|
+
res['Content-Type'] = 'image/jpeg'
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
res['Content-Type'] ||= 'text/html'
|
247
|
+
res.code ||= "200"
|
248
|
+
|
249
|
+
response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
|
250
|
+
response = response_klass.new res.http_version, res.code, res.message
|
251
|
+
|
252
|
+
res.header.each do |k,v|
|
253
|
+
v = v.first if v.length == 1
|
254
|
+
response[k] = v
|
255
|
+
end
|
256
|
+
|
257
|
+
res.cookies.each do |cookie|
|
258
|
+
response.add_field 'Set-Cookie', cookie.to_s
|
259
|
+
end
|
260
|
+
|
261
|
+
response['Content-Type'] ||= 'text/html'
|
262
|
+
response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
|
263
|
+
|
264
|
+
io = StringIO.new(res.body)
|
265
|
+
response.instance_variable_set :@socket, io
|
266
|
+
def io.read clen, dest = nil, _ = nil
|
267
|
+
if dest then
|
268
|
+
dest << super(clen)
|
269
|
+
else
|
270
|
+
super clen
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
body_exist = req.response_body_permitted? &&
|
275
|
+
response_klass.body_permitted?
|
276
|
+
|
277
|
+
response.instance_variable_set :@body_exist, body_exist
|
278
|
+
|
279
|
+
yield response if block_given?
|
280
|
+
|
281
|
+
response
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
class Net::HTTPRequest # :nodoc:
|
286
|
+
attr_accessor :query, :body, :cookies, :user
|
287
|
+
|
288
|
+
def host
|
289
|
+
'example'
|
290
|
+
end
|
291
|
+
|
292
|
+
def port
|
293
|
+
80
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
class Response # :nodoc:
|
298
|
+
include Net::HTTPHeader
|
299
|
+
|
300
|
+
attr_reader :code
|
301
|
+
attr_accessor :body, :query, :cookies
|
302
|
+
attr_accessor :query_params, :http_version
|
303
|
+
attr_accessor :header
|
304
|
+
|
305
|
+
def code=(c)
|
306
|
+
@code = c.to_s
|
307
|
+
end
|
308
|
+
|
309
|
+
alias :status :code
|
310
|
+
alias :status= :code=
|
311
|
+
|
312
|
+
def initialize
|
313
|
+
@header = {}
|
314
|
+
@body = ''
|
315
|
+
@code = nil
|
316
|
+
@query = nil
|
317
|
+
@cookies = []
|
318
|
+
@http_version = '1.1'
|
319
|
+
end
|
320
|
+
|
321
|
+
def read_body
|
322
|
+
yield body
|
323
|
+
end
|
324
|
+
|
325
|
+
def message
|
326
|
+
''
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
data/lib/gimme_poc/version.rb
CHANGED
data/lib/gimme_poc/web.rb
CHANGED
@@ -1,91 +1,97 @@
|
|
1
1
|
# Find the contact
|
2
|
-
module
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
2
|
+
module Web
|
3
|
+
attr_accessor :page, :agent, :url
|
4
|
+
|
5
|
+
# Captures http:// and https://
|
6
|
+
HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
|
7
|
+
|
8
|
+
##
|
9
|
+
# Go to a page using Mechanize.
|
10
|
+
# Sleep for a split second to not overload any servers.
|
11
|
+
#
|
12
|
+
# Returns nil if bad url is given.
|
13
|
+
def get(str)
|
14
|
+
prepare_get_request(str)
|
15
|
+
@page = @agent.get(@url)
|
16
|
+
rescue Exception => e
|
17
|
+
LogMessages.warn_err(e)
|
18
|
+
end
|
19
|
+
|
20
|
+
def prepare_get_request(str)
|
21
|
+
mech_setup
|
22
|
+
@url = format_url(str)
|
23
|
+
LogMessages.sending_get_request(url)
|
24
|
+
sleep(0.1)
|
25
|
+
end
|
20
26
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
puts "#{'Connection Timeout:'.red} #{e}"
|
29
|
-
rescue Net::HTTP::Persistent::Error
|
30
|
-
puts "#{'Connection Timeout:'.red} read timeout, too many resets."
|
27
|
+
def mech_setup
|
28
|
+
@agent = Mechanize.new do |a|
|
29
|
+
a.user_agent_alias = 'Mac Safari'
|
30
|
+
a.open_timeout = 7
|
31
|
+
a.read_timeout = 7
|
32
|
+
a.idle_timeout = 7
|
33
|
+
a.redirect_ok = true
|
31
34
|
end
|
35
|
+
end
|
32
36
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
37
|
+
##
|
38
|
+
# Mechanize needs absolute urls to work.
|
39
|
+
# If http:// or https:// isn't present, append http://.
|
40
|
+
def format_url(str)
|
41
|
+
LazyDomain.autohttp(str)
|
42
|
+
end
|
39
43
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
+
# Used for subdomain check. Not a permanent change to url variable.
|
45
|
+
def unformat_url(str)
|
46
|
+
str.gsub(HTTP_REGEX, '')
|
47
|
+
end
|
44
48
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
49
|
+
##
|
50
|
+
# Outputs domain of a url. Useful if subdomains are given to GimmePOC
|
51
|
+
# and they don't work.
|
52
|
+
#
|
53
|
+
# For example:
|
54
|
+
# Given http://maps.google.com, returns 'google.com'.
|
55
|
+
def orig_domain(str)
|
56
|
+
LazyDomain.parse(str).domain
|
57
|
+
rescue PublicSuffix::DomainInvalid => err
|
58
|
+
LogMessages.invalid_domain(err)
|
59
|
+
end
|
56
60
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
61
|
+
##
|
62
|
+
# Used in case of relative paths. Merging guarantees correct url.
|
63
|
+
# This needs a url string as argument to work.
|
64
|
+
# Produces a merged uri string.
|
65
|
+
def merged_link(url_str)
|
66
|
+
@page.uri.merge(url_str).to_s
|
67
|
+
end
|
64
68
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
69
|
+
##
|
70
|
+
# Expects relative paths and merges everything.
|
71
|
+
# Returns a string. If there's nothing, return nil.
|
72
|
+
#
|
73
|
+
# Add \b word block to ensure whole word is searched.
|
74
|
+
def link_with_href(str)
|
75
|
+
merged_link(@page.link_with(href: /\b#{str}/).uri.to_s)
|
76
|
+
rescue
|
77
|
+
nil
|
78
|
+
end
|
75
79
|
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
+
# Boolean, returns true if url is not identical to original domain.
|
81
|
+
#
|
82
|
+
# In the event that the url has a path, this splits everything on forward
|
83
|
+
# slash and selects far left item.
|
84
|
+
def subdomain?(str)
|
85
|
+
(unformat_url(str).split('/')[0] != orig_domain(str))
|
86
|
+
end
|
80
87
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
end
|
88
|
+
# TODO: Sometimes DNS will do a redirect and not give a 404.
|
89
|
+
# Need to prevent redirects.
|
90
|
+
#
|
91
|
+
# Blindly tests to see if a url goes through. If there is a 404 error,
|
92
|
+
# this will return nil.
|
93
|
+
def blind_test(url)
|
94
|
+
LogMessages.blind_testing(url)
|
95
|
+
get(url)
|
90
96
|
end
|
91
97
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gimme_poc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Mason
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -44,28 +44,56 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.0.
|
47
|
+
version: 0.0.2
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.0.
|
54
|
+
version: 0.0.2
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: shoulda
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '3.
|
61
|
+
version: '3.5'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '3.
|
68
|
+
version: '3.5'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: shoulda-context
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '1.2'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '1.2'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: minitest-reporters
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '1.1'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '1.1'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: pry
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,9 +134,12 @@ files:
|
|
106
134
|
- Rakefile
|
107
135
|
- lib/gimme_poc.rb
|
108
136
|
- lib/gimme_poc/contactpage.rb
|
137
|
+
- lib/gimme_poc/logger.rb
|
138
|
+
- lib/gimme_poc/logger/messages.rb
|
109
139
|
- lib/gimme_poc/poc.rb
|
110
140
|
- lib/gimme_poc/questions.rb
|
111
141
|
- lib/gimme_poc/save.rb
|
142
|
+
- lib/gimme_poc/test_case.rb
|
112
143
|
- lib/gimme_poc/version.rb
|
113
144
|
- lib/gimme_poc/web.rb
|
114
145
|
homepage: http://github.com/m8ss/gimme_poc
|
@@ -131,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
162
|
version: '0'
|
132
163
|
requirements: []
|
133
164
|
rubyforge_project:
|
134
|
-
rubygems_version: 2.
|
165
|
+
rubygems_version: 2.5.1
|
135
166
|
signing_key:
|
136
167
|
specification_version: 4
|
137
168
|
summary: Get a point of contact. Given a url or array of urls, extracts social media
|