gimme_poc 0.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35a045a491109a5ae34152577508585667371af0
4
- data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
3
+ metadata.gz: 84e5aa1c8960ade9b3f438008c7308856e7cbc30
4
+ data.tar.gz: e7d75a282e2d644ea9c0a24c466cd8f002013477
5
5
  SHA512:
6
- metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
7
- data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
6
+ metadata.gz: ed1704cd7a334ea8ba478cb01b487b8e54b2c16e1ad3e99db1c381797e413c43f1058d2a2c7a6b47212c7342ced310f2f1f4338a7153f4a84c06e5d7d64f90f4
7
+ data.tar.gz: 84191e49f72d95da75a46453b001aa29a74b1f128a1d6bc24687e3a9afda24df67055648b93841092d0a1fdfea887249f83f80d979d8c4a77586f4340b734458
data/README.md CHANGED
@@ -10,21 +10,19 @@ Gimme POC simply looks for a contact page and extracts social media contact info
10
10
  ## Installation
11
11
 
12
12
  ```
13
- gem install gimme_poc
13
+ $ gem install gimme_poc
14
14
 
15
15
  ```
16
16
 
17
17
  ## Set Up
18
18
 
19
19
  ```ruby
20
- require 'gimme_poc' # => that's it!
20
+ require 'gimme_poc'
21
21
 
22
22
  ```
23
23
 
24
24
  ## How it works
25
25
 
26
- Gimme POC is easy to use! Simply run this command.
27
-
28
26
  ```ruby
29
27
 
30
28
  Gimme.poc 'http://example.com'
@@ -56,30 +54,3 @@ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com'])
56
54
 
57
55
  ```
58
56
 
59
- ## Referencing the search results
60
-
61
- To use your search results, simply run:
62
-
63
- ```ruby
64
-
65
- Gimme.memory
66
-
67
- ```
68
-
69
- ## Clearing the search results
70
-
71
- To clear search results and start afresh, run:
72
-
73
- ```ruby
74
-
75
- Gimme.reset!
76
-
77
- ```
78
-
79
- ## To do:
80
-
81
- - Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
82
- - Work on false positives of bad urls. (Bad urls should be skipped + DNS redirects don't give 404 errors)
83
-
84
-
85
- More to follow...
data/Rakefile CHANGED
@@ -1,7 +1,22 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test*.rb'
8
+ test.verbose = true
9
+ end
3
10
 
4
11
  desc 'Open console with gimme_poc loaded'
5
12
  task :console do
6
13
  exec 'pry -r ./lib/gimme_poc.rb'
7
14
  end
15
+
16
+ desc 'make a release'
17
+ task :release do
18
+ exec './script/release'
19
+ end
20
+
21
+ task c: :console # alias 'c' for console
22
+ task default: :test
@@ -3,6 +3,7 @@ require 'lazy_domain'
3
3
  require 'mechanize'
4
4
  require_relative './gimme_poc/contactpage'
5
5
  require_relative './gimme_poc/poc'
6
+ require_relative './gimme_poc/logger'
6
7
  require_relative './gimme_poc/questions'
7
8
  require_relative './gimme_poc/save'
8
9
  require_relative './gimme_poc/version'
@@ -11,13 +12,35 @@ require_relative './gimme_poc/web'
11
12
  # Find the contact
12
13
  module Gimme
13
14
  class << self
15
+ include Web
16
+ include Questions
17
+ include Save
18
+ include ContactPage
19
+
14
20
  attr_accessor :page, :contact, :contact_links, :url
21
+ attr_reader :status_code
15
22
 
16
- # Simple regex that looks for ###.#### or ###-####
17
- PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
23
+ def start_url_process(url)
24
+ LogMessages.start_url(url)
25
+ case
26
+ when LazyDomain.valid?(url) == false
27
+ LogMessages.invalid_domain(url)
28
+ @status_code = 0
29
+ when subdomain?(url)
30
+ LogMessages.subdomain
31
+ @status_code = 0 if get(url).nil? && get(orig_domain(url)).nil?
32
+ else
33
+ @status_code = 0 if get(url).nil?
34
+ end
35
+ end
18
36
 
19
- # Captures http:// and https://
20
- HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
37
+ def start_contact_process(url)
38
+ start_contact_links
39
+ attempt = save_available_contacts(url)
40
+ info = attempt.info if attempt && attempt.respond_to?(:info)
41
+ return attempt unless info.nil? || info.empty?
42
+ go_to_contact_page(url)
43
+ end
21
44
 
22
45
  ##
23
46
  # The main method!
@@ -25,29 +48,13 @@ module Gimme
25
48
  # If url is bad, it's converted to nil in 'get' method and skipped over.
26
49
  def poc(arr)
27
50
  arr = arr.split unless arr.is_a?(Array)
51
+ results = []
28
52
  arr.each do |url|
29
- puts '-' * 50
30
- puts "starting: #{url}"
31
- unless LazyDomain.valid?(url)
32
- puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
33
- next
34
- end
35
- case
36
- when subdomain?(url)
37
- puts '(This url is a subdomain. Will try both sub and root domain.)'
38
- next if get(url).nil? && get(orig_domain(url)).nil?
39
- else
40
- next if get(url).nil?
41
- end
42
- start_contact_links
43
- mechpage = go_to_contact_page(url)
44
- if mechpage.nil?
45
- puts '(empty page, exiting.)'
46
- else
47
- save_available_contacts(mechpage.uri.to_s)
48
- end
53
+ start_url_process(url)
54
+ next if @status_code == 0
55
+ results << start_contact_process(url)
49
56
  end
50
- Search.all_sites # Return results from all sites.
57
+ results.length == 1 ? results.first : results
51
58
  end
52
59
 
53
60
  # Convenience method.
@@ -1,56 +1,55 @@
1
1
  # Find the contact
2
- module Gimme
3
- class << self
4
- ##
5
- # Scans for contact page. If it doesn't work on the first try,
6
- # It will look for english versions and try again. Processes left to right.
7
- #
8
- # Returns nil if no contact page can be found.
9
- def go_to_contact_page(url)
10
- contact_page(url) || english_contact_page(url)
11
- end
12
-
13
- ##
14
- # Looks for contact page. Gets page if available.
15
- # If no contact link is available, it will blind test '../contact'.
16
- # Returns nil if nothing can be found.
17
- def contact_page(url)
18
- puts 'now looking for contact pages'
19
- contact_link = link_with_href(/contact|Contact/)
20
- contact_test_page = merged_link('../contact')
2
+ module ContactPage
3
+ attr_accessor :contact_link
4
+
5
+ ##
6
+ # Scans for contact page. If it doesn't work on the first try,
7
+ # It will look for english versions and try again. Processes left to right.
8
+ #
9
+ # Returns nil if no contact page can be found.
10
+ def go_to_contact_page(url)
11
+ contact_page(url) || english_contact_page(url)
12
+ end
21
13
 
22
- case
23
- when !contact_link.nil?
24
- puts "#{'Success:'.green} Found contact link!\n"
25
- get(merged_link(contact_link))
26
- else
27
- puts "#{'Warning:'.yellow} couldn't find contact link"
28
- blind_test(contact_test_page) || get(orig_domain(url))
29
- end
14
+ ##
15
+ # Looks for contact page. Gets page if available.
16
+ # If no contact link is available, it will blind test '../contact'.
17
+ # Returns nil if nothing can be found.
18
+ def contact_page(url)
19
+ LogMessages.looking_for_contact_page
20
+ @contact_link = link_with_href(/contact|Contact/)
21
+ contact_test_page = merged_link('../contact')
22
+ case
23
+ when !contact_link.nil?
24
+ LogMessages.found_contact_link
25
+ get(merged_link(@contact_link))
26
+ else
27
+ LogMessages.no_contact_link
28
+ get(orig_domain(url)) if blind_test(contact_test_page).nil?
30
29
  end
30
+ end
31
31
 
32
- ##
33
- # Looks for english page. Gets page if available then looks for
34
- # english contact page.
35
- #
36
- # If no english link is available,
37
- # it will blind test '../en' and '../english'.
38
- # Returns nil if nothing can be found.
39
- def english_contact_page(url)
40
- puts "\nLooking for english page..."
41
- english_link = page.link_with(href: %r{en\/|english|English})
42
- test_en_page = merged_link('../en')
43
- test_english_page = merged_link('../english')
32
+ ##
33
+ # Looks for english page. Gets page if available then looks for
34
+ # english contact page.
35
+ #
36
+ # If no english link is available,
37
+ # it will blind test '../en' and '../english'.
38
+ # Returns nil if nothing can be found.
39
+ def english_contact_page(url)
40
+ LogMessages.looking_for_english_page
41
+ english_link = @page.link_with(href: %r{en\/|english|English})
42
+ test_en_page = merged_link('../en')
43
+ test_english_page = merged_link('../english')
44
44
 
45
- case
46
- when !english_link.nil?
47
- puts "#{'Success:'.green} found english link!"
48
- get(merged_link(english_link.uri))
49
- else
50
- blind_test(test_en_page) || blind_test(test_english_page)
51
- puts "\n(restarting)\n"
52
- contact_page(url)
53
- end
45
+ case
46
+ when !english_link.nil?
47
+ LogMessages.found_english_link
48
+ get(merged_link(english_link.uri))
49
+ else
50
+ blind_test(test_en_page) || blind_test(test_english_page)
51
+ LogMessages.restarting
52
+ contact_page(url)
54
53
  end
55
54
  end
56
55
  end
@@ -0,0 +1,16 @@
1
+ require 'logger'
2
+ require_relative './logger/messages'
3
+
4
+ # Output info messages during gimme poc crawl.
5
+ module Gimme
6
+ class << self
7
+ include LogMessages
8
+ attr_accessor :logger
9
+ end
10
+ end
11
+
12
+ Gimme.logger = Logger.new(STDOUT)
13
+ Gimme.logger.level = Logger::INFO
14
+ Gimme.logger.formatter = proc do |_severity, _datetime, _progname, msg|
15
+ "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}\n"
16
+ end
@@ -0,0 +1,77 @@
1
+
2
+ module LogMessages
3
+ class << self
4
+ def loginfo(str)
5
+ Gimme.logger.info(str)
6
+ end
7
+
8
+ def logwarn(str)
9
+ Gimme.logger.info(str)
10
+ end
11
+
12
+ # Info
13
+ # -----------------------------------------------------------------
14
+ def start_url(url)
15
+ puts '-' * 50
16
+ loginfo "starting: #{url}"
17
+ end
18
+
19
+ def sending_get_request(url)
20
+ loginfo("sending GET request to: #{url}")
21
+ end
22
+
23
+ def blind_testing(url)
24
+ loginfo("blind testing: #{url}")
25
+ end
26
+
27
+ def invalid_domain(url)
28
+ loginfo("#{'Invalid Domain:'.red} `#{url}' is not a valid domain")
29
+ end
30
+
31
+ def subdomain
32
+ loginfo '(This url is a subdomain. Will try both sub and root domain.)'
33
+ end
34
+
35
+ def empty_page
36
+ loginfo '(empty page, exiting.)'
37
+ end
38
+
39
+ def looking_for_contact_page
40
+ loginfo('now looking for contact pages')
41
+ end
42
+
43
+ def found_contact_link
44
+ loginfo("#{'Success:'.green} Found contact link!")
45
+ end
46
+
47
+ def looking_for_english_page
48
+ loginfo('Looking for english page...')
49
+ end
50
+
51
+ def found_english_link
52
+ loginfo("#{'Success:'.green} found english link!")
53
+ end
54
+
55
+ def saving_contact_info(url)
56
+ loginfo("saving available contact information from #{url}")
57
+ end
58
+
59
+ # Warnings
60
+ # -----------------------------------------------------------------
61
+ def no_contact_link
62
+ logwarn("#{'Warning:'.yellow} couldn't find contact link")
63
+ end
64
+
65
+ def restarting
66
+ logwarn('restarting'.yellow)
67
+ end
68
+
69
+ def nothing_to_save
70
+ logwarn '(nothing to save)'
71
+ end
72
+
73
+ def warn_err(error)
74
+ logwarn("#{'Error:'.red} #{error}")
75
+ end
76
+ end
77
+ end
@@ -1,10 +1,12 @@
1
+ require "ostruct"
2
+
1
3
  module Gimme
2
4
  # Collection of sites searched.
3
5
  class Search
4
- @all_sites = []
6
+ attr_accessor :all_sites
5
7
 
6
- class << self
7
- attr_accessor :all_sites
8
+ def initialize
9
+ @all_sites = []
8
10
  end
9
11
 
10
12
  # Each site is saved to this class
@@ -13,8 +15,7 @@ module Gimme
13
15
 
14
16
  def initialize(url, contact_info_hsh)
15
17
  @host = url
16
- @info = contact_info_hsh
17
- Search.all_sites << self
18
+ @info = OpenStruct.new(contact_info_hsh)
18
19
  end
19
20
  end
20
21
  end
@@ -1,33 +1,27 @@
1
- # Find the contact
2
- module Gimme
3
- class << self
4
- ##
5
- # Boolean, returns true if anything is present
6
- # after running scan_for_contacts and deleting failures.
7
- def something_to_save?(hsh)
8
- delete_failures(hsh).any?
9
- end
10
-
11
- # Boolean, returns true if email is present.
12
- def email_available?
13
- !link_with_href('mailto').nil?
14
- end
1
+ # Reflective questions for situational awareness.
2
+ module Questions
3
+ # Simple regex that looks for ###.#### or ###-####
4
+ PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
5
+
6
+ # Boolean, returns true if email is present.
7
+ def email_available?
8
+ !link_with_href('mailto').nil?
9
+ end
15
10
 
16
- # Boolean, returns true if phone number is present.
17
- def phone_available?
18
- !(page.body =~ PHONE_REGEX).nil?
19
- end
11
+ # Boolean, returns true if phone number is present.
12
+ def phone_available?
13
+ !(@page.body =~ PHONE_REGEX).nil?
14
+ end
20
15
 
21
- ##
22
- # TODO: build better conditional to prevent false positives.
23
- # There could be other forms like newsletter signup, etc.
24
- #
25
- # If there is a form with more than one field, this returns true.
26
- # Forms with one field are typically search boxes.
27
- #
28
- # Boolean, returns true if form is present on page.
29
- def contactform_available?
30
- !(page.forms.select { |x| x.fields.length > 1 }.empty?)
31
- end
16
+ ##
17
+ # TODO: build better conditional to prevent false positives.
18
+ # There could be other forms like newsletter signup, etc.
19
+ #
20
+ # If there is a form with more than one field, this returns true.
21
+ # Forms with one field are typically search boxes.
22
+ #
23
+ # Boolean, returns true if form is present on page.
24
+ def contactform_available?
25
+ !(@page.forms.select { |x| x.fields.length > 1 }.empty?)
32
26
  end
33
27
  end
@@ -1,62 +1,70 @@
1
- module Gimme
2
- class << self
3
- ##
4
- # Returns anything that is possible to save, otherwise returns nil.
5
- # Booleans for phone, email, or contact form will display True or False.
6
- #
7
- # Add periods to link hrefs to prevent false positives. Must escape periods
8
- # with a backslash or else it will be a regex wild card.
9
- def scan_for_contacts
10
- {
11
- contactpage: link_with_href('contact'),
12
- email_present: "#{email_available?}",
13
- phone_present: "#{phone_available?}",
14
- contact_form: "#{contactform_available?}",
15
- facebook: link_with_href('facebook\.'),
16
- twitter: link_with_href('twitter\.'),
17
- youtube: link_with_href('youtube\.'),
18
- googleplus: link_with_href('plus\.google\.'),
19
- linkedin: link_with_href('linkedin\.')
20
- }
21
- end
1
+ module Save
2
+ ##
3
+ # Boolean, returns true if anything is present
4
+ # after running scan_for_contacts and deleting failures.
5
+ # Remember that false is a string in the hash
6
+ def something_to_save?(hsh)
7
+ hsh.reject! { |k, v| v.nil? || v == 'false' }.any?
8
+ end
22
9
 
23
- # Starts/Restarts @contacts_links hash
24
- def start_contact_links
25
- @contact_links = {}
26
- end
10
+ ##
11
+ # Returns anything that is possible to save, otherwise returns nil.
12
+ # Booleans for phone, email, or contact form will display True or False.
13
+ #
14
+ # Add periods to link hrefs to prevent false positives. Must escape periods
15
+ # with a backslash or else it will be a regex wild card.
16
+ def scan_for_contacts
17
+ {
18
+ contactpage: link_with_href('contact'),
19
+ email_present: "#{email_available?}",
20
+ phone_present: "#{phone_available?}",
21
+ contact_form: "#{contactform_available?}",
22
+ facebook: link_with_href('facebook\.'),
23
+ twitter: link_with_href('twitter\.'),
24
+ youtube: link_with_href('youtube\.'),
25
+ googleplus: link_with_href('plus\.google\.'),
26
+ linkedin: link_with_href('linkedin\.')
27
+ }
28
+ rescue => e
29
+ puts "Error: #{e}"
30
+ end
27
31
 
28
- # Used in save_available_contacts to save each valid link.
29
- def save_link(key, url)
30
- return if key.nil? || url.nil?
31
- @contact_links[key] = url
32
- end
32
+ # Starts/Restarts @contacts_links hash
33
+ def start_contact_links
34
+ @contact_links = {}
35
+ end
33
36
 
34
- ##
35
- # Remove negatives from the contacts hash.
36
- # Deletes a key value pair with a value of either nil or false.
37
- # Remember that false is a string.
38
- def delete_failures(hsh)
39
- hsh.delete_if { |_k, v| v.nil? || v == 'false' }
40
- end
37
+ # Used in save_available_contacts to save each valid link.
38
+ def save_link(key, url)
39
+ return if key.nil? || url.nil?
40
+ @contact_links[key] = url
41
+ end
42
+
43
+ ##
44
+ # Remove negatives from the contacts hash.
45
+ # Deletes a key value pair with a value of either nil or false.
46
+ # Remember that false is a stored in hash as a string.
47
+ def delete_failures(hsh)
48
+ hsh.delete_if { |_k, v| v.nil? || v == 'false' }
49
+ end
41
50
 
42
- # Saves any available contact info to @contact_links.
43
- def save_available_contacts(url, hsh = scan_for_contacts)
44
- if something_to_save?(hsh)
45
- puts "\nsaving available contact information from #{url}"
46
- if hsh.is_a?(Hash)
47
- hsh.each do |k, v|
48
- save_link(k, v) # saves to @contact_links
49
- end
50
- delete_failures(@contact_links)
51
- puts "#{@contact_links}".cyan # same as @contact_links
52
- else
53
- fail ArgumentError, "expected hash but got #{hsh.class}"
51
+ # Saves any available contact info to @contact_links.
52
+ def save_available_contacts(url, hsh = scan_for_contacts)
53
+ if something_to_save?(hsh)
54
+ LogMessages.saving_contact_info(url)
55
+ if hsh.is_a?(Hash)
56
+ hsh.each do |k, v|
57
+ save_link(k, v) # saves to @contact_links
54
58
  end
55
- Search::POC.new(url, @contact_links)
59
+ delete_failures(@contact_links)
60
+ puts "#{@contact_links}".cyan # same as @contact_links
56
61
  else
57
- puts '(nothing to save)'
58
- return
62
+ fail ArgumentError, "expected hash but got #{hsh.class}"
59
63
  end
64
+ Gimme::Search::POC.new(url, @contact_links)
65
+ else
66
+ LogMessages.nothing_to_save
67
+ return
60
68
  end
61
69
  end
62
70
  end
@@ -0,0 +1,329 @@
1
+ require 'mechanize'
2
+ require 'logger'
3
+ require 'tempfile'
4
+ require 'tmpdir'
5
+ require 'webrick'
6
+ require 'zlib'
7
+
8
+ require 'rubygems'
9
+
10
+ begin
11
+ gem 'minitest'
12
+ rescue Gem::LoadError
13
+ end
14
+
15
+ ##
16
+ # Source:
17
+ #
18
+ # http://bit.ly/1Pt2KAd
19
+ # --------------------------------------------------------------
20
+
21
+ ##
22
+ # A generic test case for testing mechanize. Using a subclass of
23
+ # Mechanize::TestCase for your tests will create an isolated mechanize
24
+ # instance that won't pollute your filesystem or other tests.
25
+ #
26
+ # Once Mechanize::TestCase is loaded no HTTP requests will be made outside
27
+ # mechanize itself. All requests are handled via WEBrick servlets.
28
+ #
29
+ # Mechanize uses WEBrick servlets to test some functionality. You can run
30
+ # other HTTP clients against the servlets using:
31
+ #
32
+ # ruby -rmechanize/test_case/server -e0
33
+ #
34
+ # Which will launch a test server at http://localhost:8000
35
+
36
+ class Mechanize::TestCase < Minitest::Test
37
+
38
+ TEST_DIR = File.expand_path '../../../test', __FILE__
39
+ REQUESTS = []
40
+
41
+ ##
42
+ # Creates a clean mechanize instance +@mech+ for use in tests.
43
+
44
+ def setup
45
+ super
46
+
47
+ REQUESTS.clear
48
+ @mech = Mechanize.new
49
+ @ssl_private_key = nil
50
+ @ssl_certificate = nil
51
+ end
52
+
53
+ ##
54
+ # Creates a fake page with URI http://fake.example and an empty, submittable
55
+ # form.
56
+
57
+ def fake_page agent = @mech
58
+ uri = URI 'http://fake.example/'
59
+ html = <<-END
60
+ <html>
61
+ <body>
62
+ <form><input type="submit" value="submit" /></form>
63
+ </body>
64
+ </html>
65
+ END
66
+
67
+ Mechanize::Page.new uri, nil, html, 200, agent
68
+ end
69
+
70
+ ##
71
+ # Is the Encoding constant defined?
72
+
73
+ def have_encoding?
74
+ Object.const_defined? :Encoding
75
+ end
76
+
77
+ ##
78
+ # Creates a Mechanize::Page with the given +body+
79
+
80
+ def html_page body
81
+ uri = URI 'http://example/'
82
+ Mechanize::Page.new uri, nil, body, 200, @mech
83
+ end
84
+
85
+ ##
86
+ # Creates a Mechanize::CookieJar by parsing the given +str+
87
+
88
+ def cookie_jar str, uri = URI('http://example')
89
+ jar = Mechanize::CookieJar.new
90
+
91
+ jar.parse str, uri
92
+
93
+ jar
94
+ end
95
+
96
+ ##
97
+ # Runs the block inside a temporary directory
98
+
99
+ def in_tmpdir
100
+ Dir.mktmpdir do |dir|
101
+ Dir.chdir dir do
102
+ yield
103
+ end
104
+ end
105
+ end
106
+
107
+ ##
108
+ # Creates a Nokogiri Node +element+ with the given +attributes+
109
+
110
+ def node element, attributes = {}
111
+ doc = Nokogiri::HTML::Document.new
112
+
113
+ node = Nokogiri::XML::Node.new element, doc
114
+
115
+ attributes.each do |name, value|
116
+ node[name] = value
117
+ end
118
+
119
+ node
120
+ end
121
+
122
+ ##
123
+ # Creates a Mechanize::Page for the given +uri+ with the given
124
+ # +content_type+, response +body+ and HTTP status +code+
125
+
126
+ def page uri, content_type = 'text/html', body = '', code = 200
127
+ uri = URI uri unless URI::Generic === uri
128
+
129
+ Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
130
+ @mech)
131
+ end
132
+
133
+ ##
134
+ # Requests made during this tests
135
+
136
+ def requests
137
+ REQUESTS
138
+ end
139
+
140
+ ##
141
+ # An SSL private key. This key is the same across all test runs
142
+
143
+ def ssl_private_key
144
+ @ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
145
+ -----BEGIN RSA PRIVATE KEY-----
146
+ MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
147
+ bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
148
+ p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
149
+ 6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
150
+ -----END RSA PRIVATE KEY-----
151
+ KEY
152
+ end
153
+
154
+ ##
155
+ # An X509 certificate. This certificate is the same across all test runs
156
+
157
+ def ssl_certificate
158
+ @ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
159
+ -----BEGIN CERTIFICATE-----
160
+ MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
161
+ eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
162
+ OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
163
+ B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
164
+ 4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
165
+ UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
166
+ //8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
167
+ -----END CERTIFICATE-----
168
+ CERT
169
+ end
170
+
171
+ ##
172
+ # Creates a Tempfile with +content+ that is immediately unlinked
173
+
174
+ def tempfile content
175
+ body_io = Tempfile.new @NAME
176
+ body_io.unlink
177
+ body_io.write content
178
+ body_io.flush
179
+ body_io.rewind
180
+
181
+ body_io
182
+ end
183
+
184
+ end
185
+
186
+ require 'mechanize/test_case/servlets'
187
+
188
+ module Net # :nodoc:
189
+ end
190
+
191
+ class Net::HTTP # :nodoc:
192
+ alias :old_do_start :do_start
193
+
194
+ def do_start
195
+ @started = true
196
+ end
197
+
198
+ PAGE_CACHE = {}
199
+
200
+ alias :old_request :request
201
+
202
+ def request(req, *data, &block)
203
+ url = URI.parse(req.path)
204
+ path = WEBrick::HTTPUtils.unescape(url.path)
205
+
206
+ path = '/index.html' if path == '/'
207
+
208
+ res = ::Response.new
209
+ res.query_params = url.query
210
+
211
+ req.query = if 'POST' != req.method && url.query then
212
+ WEBrick::HTTPUtils.parse_query url.query
213
+ elsif req['content-type'] =~ /www-form-urlencoded/ then
214
+ WEBrick::HTTPUtils.parse_query req.body
215
+ elsif req['content-type'] =~ /boundary=(.+)/ then
216
+ boundary = WEBrick::HTTPUtils.dequote $1
217
+ WEBrick::HTTPUtils.parse_form_data req.body, boundary
218
+ else
219
+ {}
220
+ end
221
+
222
+ req.cookies = WEBrick::Cookie.parse(req['Cookie'])
223
+
224
+ Mechanize::TestCase::REQUESTS << req
225
+
226
+ if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path]
227
+ servlet = servlet_klass.new({})
228
+ servlet.send "do_#{req.method}", req, res
229
+ else
230
+ filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
231
+ unless PAGE_CACHE[filename]
232
+ open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
233
+ PAGE_CACHE[filename] = io.read
234
+ }
235
+ end
236
+
237
+ res.body = PAGE_CACHE[filename]
238
+ case filename
239
+ when /\.txt$/
240
+ res['Content-Type'] = 'text/plain'
241
+ when /\.jpg$/
242
+ res['Content-Type'] = 'image/jpeg'
243
+ end
244
+ end
245
+
246
+ res['Content-Type'] ||= 'text/html'
247
+ res.code ||= "200"
248
+
249
+ response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
250
+ response = response_klass.new res.http_version, res.code, res.message
251
+
252
+ res.header.each do |k,v|
253
+ v = v.first if v.length == 1
254
+ response[k] = v
255
+ end
256
+
257
+ res.cookies.each do |cookie|
258
+ response.add_field 'Set-Cookie', cookie.to_s
259
+ end
260
+
261
+ response['Content-Type'] ||= 'text/html'
262
+ response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
263
+
264
+ io = StringIO.new(res.body)
265
+ response.instance_variable_set :@socket, io
266
+ def io.read clen, dest = nil, _ = nil
267
+ if dest then
268
+ dest << super(clen)
269
+ else
270
+ super clen
271
+ end
272
+ end
273
+
274
+ body_exist = req.response_body_permitted? &&
275
+ response_klass.body_permitted?
276
+
277
+ response.instance_variable_set :@body_exist, body_exist
278
+
279
+ yield response if block_given?
280
+
281
+ response
282
+ end
283
+ end
284
+
285
+ class Net::HTTPRequest # :nodoc:
286
+ attr_accessor :query, :body, :cookies, :user
287
+
288
+ def host
289
+ 'example'
290
+ end
291
+
292
+ def port
293
+ 80
294
+ end
295
+ end
296
+
297
+ class Response # :nodoc:
298
+ include Net::HTTPHeader
299
+
300
+ attr_reader :code
301
+ attr_accessor :body, :query, :cookies
302
+ attr_accessor :query_params, :http_version
303
+ attr_accessor :header
304
+
305
+ def code=(c)
306
+ @code = c.to_s
307
+ end
308
+
309
+ alias :status :code
310
+ alias :status= :code=
311
+
312
+ def initialize
313
+ @header = {}
314
+ @body = ''
315
+ @code = nil
316
+ @query = nil
317
+ @cookies = []
318
+ @http_version = '1.1'
319
+ end
320
+
321
+ def read_body
322
+ yield body
323
+ end
324
+
325
+ def message
326
+ ''
327
+ end
328
+ end
329
+
@@ -1,3 +1,3 @@
1
1
  module Gimme
2
- VERSION = '0.0.5'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -1,91 +1,97 @@
1
1
  # Find the contact
2
- module Gimme
3
- class << self
4
- ##
5
- # Go to a page using Mechanize.
6
- # Sleep for a split second to not overload any servers.
7
- #
8
- # Returns nil if bad url is given.
9
- def get(str)
10
- url = format_url(str)
11
- puts "sending GET request to: #{url}"
12
- sleep(0.1)
13
- @page = Mechanize.new do |a|
14
- a.user_agent_alias = 'Mac Safari'
15
- a.open_timeout = 7
16
- a.read_timeout = 7
17
- a.idle_timeout = 7
18
- a.redirect_ok = true
19
- end.get(url)
2
+ module Web
3
+ attr_accessor :page, :agent, :url
4
+
5
+ # Captures http:// and https://
6
+ HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
7
+
8
+ ##
9
+ # Go to a page using Mechanize.
10
+ # Sleep for a split second to not overload any servers.
11
+ #
12
+ # Returns nil if bad url is given.
13
+ def get(str)
14
+ prepare_get_request(str)
15
+ @page = @agent.get(@url)
16
+ rescue Exception => e
17
+ LogMessages.warn_err(e)
18
+ end
19
+
20
+ def prepare_get_request(str)
21
+ mech_setup
22
+ @url = format_url(str)
23
+ LogMessages.sending_get_request(url)
24
+ sleep(0.1)
25
+ end
20
26
 
21
- rescue Mechanize::ResponseCodeError => e
22
- puts "#{'Response Error:'.red} #{e}"
23
- rescue SocketError => e
24
- puts "#{'Socket Error:'.red} #{e}"
25
- rescue Net::OpenTimeout => e
26
- puts "#{'Connection Timeout:'.red} #{e}"
27
- rescue Errno::ETIMEDOUT => e
28
- puts "#{'Connection Timeout:'.red} #{e}"
29
- rescue Net::HTTP::Persistent::Error
30
- puts "#{'Connection Timeout:'.red} read timeout, too many resets."
27
+ def mech_setup
28
+ @agent = Mechanize.new do |a|
29
+ a.user_agent_alias = 'Mac Safari'
30
+ a.open_timeout = 7
31
+ a.read_timeout = 7
32
+ a.idle_timeout = 7
33
+ a.redirect_ok = true
31
34
  end
35
+ end
32
36
 
33
- ##
34
- # Mechanize needs absolute urls to work.
35
- # If http:// or https:// isn't present, append http://.
36
- def format_url(str)
37
- LazyDomain.autohttp(str)
38
- end
37
+ ##
38
+ # Mechanize needs absolute urls to work.
39
+ # If http:// or https:// isn't present, append http://.
40
+ def format_url(str)
41
+ LazyDomain.autohttp(str)
42
+ end
39
43
 
40
- # Used for subdomain check. Not a permanent change to url variable.
41
- def unformat_url(str)
42
- str.gsub(HTTP_REGEX, '')
43
- end
44
+ # Used for subdomain check. Not a permanent change to url variable.
45
+ def unformat_url(str)
46
+ str.gsub(HTTP_REGEX, '')
47
+ end
44
48
 
45
- ##
46
- # Outputs domain of a url. Useful if subdomains are given to GimmePOC
47
- # and they don't work.
48
- #
49
- # For example:
50
- # Given http://maps.google.com, returns 'google.com'.
51
- def orig_domain(str)
52
- LazyDomain.parse(str).domain
53
- rescue PublicSuffix::DomainInvalid => e
54
- puts "#{'Invalid Domain:'.red} #{e}"
55
- end
49
+ ##
50
+ # Outputs domain of a url. Useful if subdomains are given to GimmePOC
51
+ # and they don't work.
52
+ #
53
+ # For example:
54
+ # Given http://maps.google.com, returns 'google.com'.
55
+ def orig_domain(str)
56
+ LazyDomain.parse(str).domain
57
+ rescue PublicSuffix::DomainInvalid => err
58
+ LogMessages.invalid_domain(err)
59
+ end
56
60
 
57
- ##
58
- # Used in case of relative paths. Merging guarantees correct url.
59
- # This needs a url string as argument to work.
60
- # Produces a merged uri string.
61
- def merged_link(url_str)
62
- page.uri.merge(url_str).to_s
63
- end
61
+ ##
62
+ # Used in case of relative paths. Merging guarantees correct url.
63
+ # This needs a url string as argument to work.
64
+ # Produces a merged uri string.
65
+ def merged_link(url_str)
66
+ @page.uri.merge(url_str).to_s
67
+ end
64
68
 
65
- ##
66
- # Expects relative paths and merges everything.
67
- # Returns a string. If there's nothing, return nil.
68
- #
69
- # Add \b word block to ensure whole word is searched.
70
- def link_with_href(str)
71
- merged_link(page.link_with(href: /\b#{str}/).uri.to_s)
72
- rescue
73
- nil
74
- end
69
+ ##
70
+ # Expects relative paths and merges everything.
71
+ # Returns a string. If there's nothing, return nil.
72
+ #
73
+ # Add \b word block to ensure whole word is searched.
74
+ def link_with_href(str)
75
+ merged_link(@page.link_with(href: /\b#{str}/).uri.to_s)
76
+ rescue
77
+ nil
78
+ end
75
79
 
76
- # Boolean, returns true if url is not identical to original domain.
77
- def subdomain?(str)
78
- (unformat_url(str) != orig_domain(str))
79
- end
80
+ # Boolean, returns true if url is not identical to original domain.
81
+ #
82
+ # In the event that the url has a path, this splits everything on forward
83
+ # slash and selects far left item.
84
+ def subdomain?(str)
85
+ (unformat_url(str).split('/')[0] != orig_domain(str))
86
+ end
80
87
 
81
- # TODO: Sometimes DNS will do a redirect and not give a 404.
82
- # Need to prevent redirects.
83
- #
84
- # Blindly tests to see if a url goes through. If there is a 404 error,
85
- # this will return nil.
86
- def blind_test(url)
87
- puts "\n(blind testing: #{url})"
88
- get(url)
89
- end
88
+ # TODO: Sometimes DNS will do a redirect and not give a 404.
89
+ # Need to prevent redirects.
90
+ #
91
+ # Blindly tests to see if a url goes through. If there is a 404 error,
92
+ # this will return nil.
93
+ def blind_test(url)
94
+ LogMessages.blind_testing(url)
95
+ get(url)
90
96
  end
91
97
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gimme_poc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mason
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-11 00:00:00.000000000 Z
11
+ date: 2017-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -44,28 +44,56 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.0.1
47
+ version: 0.0.2
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.0.1
54
+ version: 0.0.2
55
55
  - !ruby/object:Gem::Dependency
56
- name: rspec
56
+ name: shoulda
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '3.3'
61
+ version: '3.5'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '3.3'
68
+ version: '3.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: shoulda-context
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: minitest-reporters
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.1'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: pry
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -106,9 +134,12 @@ files:
106
134
  - Rakefile
107
135
  - lib/gimme_poc.rb
108
136
  - lib/gimme_poc/contactpage.rb
137
+ - lib/gimme_poc/logger.rb
138
+ - lib/gimme_poc/logger/messages.rb
109
139
  - lib/gimme_poc/poc.rb
110
140
  - lib/gimme_poc/questions.rb
111
141
  - lib/gimme_poc/save.rb
142
+ - lib/gimme_poc/test_case.rb
112
143
  - lib/gimme_poc/version.rb
113
144
  - lib/gimme_poc/web.rb
114
145
  homepage: http://github.com/m8ss/gimme_poc
@@ -131,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
162
  version: '0'
132
163
  requirements: []
133
164
  rubyforge_project:
134
- rubygems_version: 2.4.5
165
+ rubygems_version: 2.5.1
135
166
  signing_key:
136
167
  specification_version: 4
137
168
  summary: Get a point of contact. Given a url or array of urls, extracts social media