RubyGems - gimme_poc - Versions diffs - 0.0.5 → 1.1.0 - Mend

gimme_poc 0.0.5 → 1.1.0

Files changed (14) hide show

checksums.yaml +4 -4
data/README.md +2 -31
data/Rakefile +15 -0
data/lib/gimme_poc.rb +32 -25
data/lib/gimme_poc/contactpage.rb +47 -48
data/lib/gimme_poc/logger.rb +16 -0
data/lib/gimme_poc/logger/messages.rb +77 -0
data/lib/gimme_poc/poc.rb +6 -5
data/lib/gimme_poc/questions.rb +23 -29
data/lib/gimme_poc/save.rb +60 -52
data/lib/gimme_poc/test_case.rb +329 -0
data/lib/gimme_poc/version.rb +1 -1
data/lib/gimme_poc/web.rb +85 -79
metadata +39 -8

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 35a045a491109a5ae34152577508585667371af0
-  data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
+  metadata.gz: 84e5aa1c8960ade9b3f438008c7308856e7cbc30
+  data.tar.gz: e7d75a282e2d644ea9c0a24c466cd8f002013477
 SHA512:
-  metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
-  data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
+  metadata.gz: ed1704cd7a334ea8ba478cb01b487b8e54b2c16e1ad3e99db1c381797e413c43f1058d2a2c7a6b47212c7342ced310f2f1f4338a7153f4a84c06e5d7d64f90f4
+  data.tar.gz: 84191e49f72d95da75a46453b001aa29a74b1f128a1d6bc24687e3a9afda24df67055648b93841092d0a1fdfea887249f83f80d979d8c4a77586f4340b734458

data/README.md CHANGED

@@ -10,21 +10,19 @@ Gimme POC simply looks for a contact page and extracts social media contact info
 ## Installation
 ```
-gem install gimme_poc
+$ gem install gimme_poc
 ```
 ## Set Up
 ```ruby
-require 'gimme_poc' # => that's it!
+require 'gimme_poc'
 ```
 ## How it works
-Gimme POC is easy to use! Simply run this command.
 ```ruby
 Gimme.poc 'http://example.com'
@@ -56,30 +54,3 @@ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com'])
 ```
-## Referencing the search results
- To use your search results, simply run:
-```ruby
-Gimme.memory
-```
-## Clearing the search results
- To clear search results and start afresh, run:
-```ruby
-Gimme.reset!
-```
-## To do:
- - Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
- - Work on false positives of bad urls.  (Bad urls should be skipped + DNS redirects don't give 404 errors)
-More to follow...

data/Rakefile CHANGED

@@ -1,7 +1,22 @@
 require 'rubygems'
 require 'rake'
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/**/test*.rb'
+  test.verbose = true
+end
 desc 'Open console with gimme_poc loaded'
 task :console do
   exec 'pry -r ./lib/gimme_poc.rb'
 end
+desc 'make a release'
+task :release do
+  exec './script/release'
+end
+task c: :console # alias 'c' for console
+task default: :test

data/lib/gimme_poc.rb CHANGED

@@ -3,6 +3,7 @@ require 'lazy_domain'
 require 'mechanize'
 require_relative './gimme_poc/contactpage'
 require_relative './gimme_poc/poc'
+require_relative './gimme_poc/logger'
 require_relative './gimme_poc/questions'
 require_relative './gimme_poc/save'
 require_relative './gimme_poc/version'
@@ -11,13 +12,35 @@ require_relative './gimme_poc/web'
 # Find the contact
 module Gimme
   class << self
+    include Web
+    include Questions
+    include Save
+    include ContactPage
     attr_accessor :page, :contact, :contact_links, :url
+    attr_reader :status_code
-    # Simple regex that looks for ###.#### or ###-####
-    PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
+    def start_url_process(url)
+      LogMessages.start_url(url)
+      case
+      when LazyDomain.valid?(url) == false
+        LogMessages.invalid_domain(url)
+        @status_code = 0
+      when subdomain?(url)
+        LogMessages.subdomain
+        @status_code = 0 if get(url).nil? && get(orig_domain(url)).nil?
+      else
+        @status_code = 0 if get(url).nil?
+      end
+    end
-    # Captures http:// and https://
-    HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
+    def start_contact_process(url)
+      start_contact_links
+      attempt = save_available_contacts(url)
+      info = attempt.info if attempt && attempt.respond_to?(:info)
+      return attempt unless info.nil? || info.empty?
+      go_to_contact_page(url)
+    end
     ##
     # The main method!
@@ -25,29 +48,13 @@ module Gimme
     # If url is bad, it's converted to nil in 'get' method and skipped over.
     def poc(arr)
       arr = arr.split unless arr.is_a?(Array)
+      results = []
       arr.each do |url|
-        puts '-' * 50
-        puts "starting: #{url}"
-        unless LazyDomain.valid?(url)
-          puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
-          next
-        end
-        case
-        when subdomain?(url)
-          puts '(This url is a subdomain.  Will try both sub and root domain.)'
-          next if get(url).nil? && get(orig_domain(url)).nil?
-        else
-          next if get(url).nil?
-        end
-        start_contact_links
-        mechpage = go_to_contact_page(url)
-        if mechpage.nil?
-          puts '(empty page, exiting.)'
-        else
-          save_available_contacts(mechpage.uri.to_s)
-        end
+        start_url_process(url)
+        next if @status_code == 0
+        results << start_contact_process(url)
       end
-      Search.all_sites # Return results from all sites.
+      results.length == 1 ? results.first : results
     end
     # Convenience method.

data/lib/gimme_poc/contactpage.rb CHANGED

@@ -1,56 +1,55 @@
 # Find the contact
-module Gimme
-  class << self
-    ##
-    # Scans for contact page.  If it doesn't work on the first try,
-    # It will look for english versions and try again. Processes left to right.
-    #
-    # Returns nil if no contact page can be found.
-    def go_to_contact_page(url)
-      contact_page(url) || english_contact_page(url)
-    end
-    ##
-    # Looks for contact page.  Gets page if available.
-    # If no contact link is available, it will blind test '../contact'.
-    # Returns nil if nothing can be found.
-    def contact_page(url)
-      puts 'now looking for contact pages'
-      contact_link = link_with_href(/contact|Contact/)
-      contact_test_page = merged_link('../contact')
+module ContactPage
+  attr_accessor :contact_link
+  ##
+  # Scans for contact page.  If it doesn't work on the first try,
+  # It will look for english versions and try again. Processes left to right.
+  #
+  # Returns nil if no contact page can be found.
+  def go_to_contact_page(url)
+    contact_page(url) || english_contact_page(url)
+  end
-      case
-      when !contact_link.nil?
-        puts "#{'Success:'.green} Found contact link!\n"
-        get(merged_link(contact_link))
-      else
-        puts "#{'Warning:'.yellow} couldn't find contact link"
-        blind_test(contact_test_page) || get(orig_domain(url))
-      end
+  ##
+  # Looks for contact page.  Gets page if available.
+  # If no contact link is available, it will blind test '../contact'.
+  # Returns nil if nothing can be found.
+  def contact_page(url)
+    LogMessages.looking_for_contact_page
+    @contact_link = link_with_href(/contact|Contact/)
+    contact_test_page = merged_link('../contact')
+    case
+    when !contact_link.nil?
+      LogMessages.found_contact_link
+      get(merged_link(@contact_link))
+    else
+      LogMessages.no_contact_link
+      get(orig_domain(url)) if blind_test(contact_test_page).nil?
     end
+  end
-    ##
-    # Looks for english page.  Gets page if available then looks for
-    # english contact page.
-    #
-    # If no english link is available,
-    # it will blind test '../en' and '../english'.
-    # Returns nil if nothing can be found.
-    def english_contact_page(url)
-      puts "\nLooking for english page..."
-      english_link = page.link_with(href: %r{en\/|english|English})
-      test_en_page = merged_link('../en')
-      test_english_page = merged_link('../english')
+  ##
+  # Looks for english page.  Gets page if available then looks for
+  # english contact page.
+  #
+  # If no english link is available,
+  # it will blind test '../en' and '../english'.
+  # Returns nil if nothing can be found.
+  def english_contact_page(url)
+    LogMessages.looking_for_english_page
+    english_link = @page.link_with(href: %r{en\/|english|English})
+    test_en_page = merged_link('../en')
+    test_english_page = merged_link('../english')
-      case
-      when !english_link.nil?
-        puts "#{'Success:'.green} found english link!"
-        get(merged_link(english_link.uri))
-      else
-        blind_test(test_en_page) || blind_test(test_english_page)
-        puts "\n(restarting)\n"
-        contact_page(url)
-      end
+    case
+    when !english_link.nil?
+      LogMessages.found_english_link
+      get(merged_link(english_link.uri))
+    else
+      blind_test(test_en_page) || blind_test(test_english_page)
+      LogMessages.restarting
+      contact_page(url)
     end
   end
 end

data/lib/gimme_poc/logger.rb ADDED

@@ -0,0 +1,16 @@
+require 'logger'
+require_relative './logger/messages'
+# Output info messages during gimme poc crawl.
+module Gimme
+  class << self
+    include LogMessages
+    attr_accessor :logger
+  end
+end
+Gimme.logger = Logger.new(STDOUT)
+Gimme.logger.level = Logger::INFO
+Gimme.logger.formatter = proc do |_severity, _datetime, _progname, msg|
+  "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}\n"
+end

data/lib/gimme_poc/logger/messages.rb ADDED

@@ -0,0 +1,77 @@
+module LogMessages
+  class << self
+    def loginfo(str)
+      Gimme.logger.info(str)
+    end
+    def logwarn(str)
+      Gimme.logger.info(str)
+    end
+    # Info
+    # -----------------------------------------------------------------
+    def start_url(url)
+      puts '-' * 50
+      loginfo "starting: #{url}"
+    end
+    def sending_get_request(url)
+      loginfo("sending GET request to: #{url}")
+    end
+    def blind_testing(url)
+      loginfo("blind testing: #{url}")
+    end
+    def invalid_domain(url)
+      loginfo("#{'Invalid Domain:'.red} `#{url}' is not a valid domain")
+    end
+    def subdomain
+      loginfo '(This url is a subdomain.  Will try both sub and root domain.)'
+    end
+    def empty_page
+      loginfo '(empty page, exiting.)'
+    end
+    def looking_for_contact_page
+      loginfo('now looking for contact pages')
+    end
+    def found_contact_link
+      loginfo("#{'Success:'.green} Found contact link!")
+    end
+    def looking_for_english_page
+      loginfo('Looking for english page...')
+    end
+    def found_english_link
+      loginfo("#{'Success:'.green} found english link!")
+    end
+    def saving_contact_info(url)
+      loginfo("saving available contact information from #{url}")
+    end
+    # Warnings
+    # -----------------------------------------------------------------
+    def no_contact_link
+      logwarn("#{'Warning:'.yellow} couldn't find contact link")
+    end
+    def restarting
+      logwarn('restarting'.yellow)
+    end
+    def nothing_to_save
+      logwarn '(nothing to save)'
+    end
+    def warn_err(error)
+      logwarn("#{'Error:'.red} #{error}")
+    end
+  end
+end

data/lib/gimme_poc/poc.rb CHANGED

@@ -1,10 +1,12 @@
+require "ostruct"
 module Gimme
   # Collection of sites searched.
   class Search
-    @all_sites = []
+    attr_accessor :all_sites
-    class << self
-      attr_accessor :all_sites
+    def initialize
+      @all_sites = []
     end
     # Each site is saved to this class
@@ -13,8 +15,7 @@ module Gimme
       def initialize(url, contact_info_hsh)
         @host = url
-        @info = contact_info_hsh
-        Search.all_sites << self
+        @info = OpenStruct.new(contact_info_hsh)
       end
     end
   end

data/lib/gimme_poc/questions.rb CHANGED

@@ -1,33 +1,27 @@
-# Find the contact
-module Gimme
-  class << self
-    ##
-    # Boolean, returns true if anything is present
-    # after running scan_for_contacts and deleting failures.
-    def something_to_save?(hsh)
-      delete_failures(hsh).any?
-    end
-    # Boolean, returns true if email is present.
-    def email_available?
-      !link_with_href('mailto').nil?
-    end
+# Reflective questions for situational awareness.
+module Questions
+  # Simple regex that looks for ###.#### or ###-####
+  PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
+  # Boolean, returns true if email is present.
+  def email_available?
+    !link_with_href('mailto').nil?
+  end
-    # Boolean, returns true if phone number is present.
-    def phone_available?
-      !(page.body =~ PHONE_REGEX).nil?
-    end
+  # Boolean, returns true if phone number is present.
+  def phone_available?
+    !(@page.body =~ PHONE_REGEX).nil?
+  end
-    ##
-    # TODO: build better conditional to prevent false positives.
-    #   There could be other forms like newsletter signup, etc.
-    #
-    # If there is a form with more than one field, this returns true.
-    # Forms with one field are typically search boxes.
-    #
-    # Boolean, returns true if form is present on page.
-    def contactform_available?
-      !(page.forms.select { |x| x.fields.length > 1 }.empty?)
-    end
+  ##
+  # TODO: build better conditional to prevent false positives.
+  #   There could be other forms like newsletter signup, etc.
+  #
+  # If there is a form with more than one field, this returns true.
+  # Forms with one field are typically search boxes.
+  #
+  # Boolean, returns true if form is present on page.
+  def contactform_available?
+    !(@page.forms.select { |x| x.fields.length > 1 }.empty?)
   end
 end

data/lib/gimme_poc/save.rb CHANGED

@@ -1,62 +1,70 @@
-module Gimme
-  class << self
-    ##
-    # Returns anything that is possible to save, otherwise returns nil.
-    # Booleans for phone, email, or contact form will display True or False.
-    #
-    # Add periods to link hrefs to prevent false positives. Must escape periods
-    # with a backslash or else it will be a regex wild card.
-    def scan_for_contacts
-      {
-        contactpage: link_with_href('contact'),
-        email_present: "#{email_available?}",
-        phone_present: "#{phone_available?}",
-        contact_form: "#{contactform_available?}",
-        facebook: link_with_href('facebook\.'),
-        twitter: link_with_href('twitter\.'),
-        youtube: link_with_href('youtube\.'),
-        googleplus: link_with_href('plus\.google\.'),
-        linkedin: link_with_href('linkedin\.')
-      }
-    end
+module Save
+  ##
+  # Boolean, returns true if anything is present
+  # after running scan_for_contacts and deleting failures.
+  # Remember that false is a string in the hash
+  def something_to_save?(hsh)
+      hsh.reject! { |k, v| v.nil? || v == 'false' }.any?
+  end
-    # Starts/Restarts @contacts_links hash
-    def start_contact_links
-      @contact_links = {}
-    end
+  ##
+  # Returns anything that is possible to save, otherwise returns nil.
+  # Booleans for phone, email, or contact form will display True or False.
+  #
+  # Add periods to link hrefs to prevent false positives. Must escape periods
+  # with a backslash or else it will be a regex wild card.
+  def scan_for_contacts
+    {
+      contactpage: link_with_href('contact'),
+      email_present: "#{email_available?}",
+      phone_present: "#{phone_available?}",
+      contact_form: "#{contactform_available?}",
+      facebook: link_with_href('facebook\.'),
+      twitter: link_with_href('twitter\.'),
+      youtube: link_with_href('youtube\.'),
+      googleplus: link_with_href('plus\.google\.'),
+      linkedin: link_with_href('linkedin\.')
+    }
+  rescue => e
+    puts "Error: #{e}"
+  end
-    # Used in save_available_contacts to save each valid link.
-    def save_link(key, url)
-      return if key.nil? || url.nil?
-      @contact_links[key] = url
-    end
+  # Starts/Restarts @contacts_links hash
+  def start_contact_links
+    @contact_links = {}
+  end
-    ##
-    # Remove negatives from the contacts hash.
-    # Deletes a key value pair with a value of either nil or false.
-    # Remember that false is a string.
-    def delete_failures(hsh)
-      hsh.delete_if { |_k, v| v.nil? || v == 'false' }
-    end
+  # Used in save_available_contacts to save each valid link.
+  def save_link(key, url)
+    return if key.nil? || url.nil?
+    @contact_links[key] = url
+  end
+  ##
+  # Remove negatives from the contacts hash.
+  # Deletes a key value pair with a value of either nil or false.
+  # Remember that false is a stored in hash as a string.
+  def delete_failures(hsh)
+    hsh.delete_if { |_k, v| v.nil? || v == 'false' }
+  end
-    # Saves any available contact info to @contact_links.
-    def save_available_contacts(url, hsh = scan_for_contacts)
-      if something_to_save?(hsh)
-        puts "\nsaving available contact information from #{url}"
-        if hsh.is_a?(Hash)
-          hsh.each do |k, v|
-            save_link(k, v) # saves to @contact_links
-          end
-          delete_failures(@contact_links)
-          puts "#{@contact_links}".cyan # same as @contact_links
-        else
-          fail ArgumentError, "expected hash but got #{hsh.class}"
+  # Saves any available contact info to @contact_links.
+  def save_available_contacts(url, hsh = scan_for_contacts)
+    if something_to_save?(hsh)
+      LogMessages.saving_contact_info(url)
+      if hsh.is_a?(Hash)
+        hsh.each do |k, v|
+          save_link(k, v) # saves to @contact_links
         end
-        Search::POC.new(url, @contact_links)
+        delete_failures(@contact_links)
+        puts "#{@contact_links}".cyan # same as @contact_links
       else
-        puts '(nothing to save)'
-        return
+        fail ArgumentError, "expected hash but got #{hsh.class}"
       end
+      Gimme::Search::POC.new(url, @contact_links)
+    else
+      LogMessages.nothing_to_save
+      return
     end
   end
 end

data/lib/gimme_poc/test_case.rb ADDED

@@ -0,0 +1,329 @@
+require 'mechanize'
+require 'logger'
+require 'tempfile'
+require 'tmpdir'
+require 'webrick'
+require 'zlib'
+require 'rubygems'
+begin
+  gem 'minitest'
+rescue Gem::LoadError
+end
+##
+# Source:
+#
+# http://bit.ly/1Pt2KAd
+# --------------------------------------------------------------
+##
+# A generic test case for testing mechanize.  Using a subclass of
+# Mechanize::TestCase for your tests will create an isolated mechanize
+# instance that won't pollute your filesystem or other tests.
+#
+# Once Mechanize::TestCase is loaded no HTTP requests will be made outside
+# mechanize itself.  All requests are handled via WEBrick servlets.
+#
+# Mechanize uses WEBrick servlets to test some functionality.  You can run
+# other HTTP clients against the servlets using:
+#
+#   ruby -rmechanize/test_case/server -e0
+#
+# Which will launch a test server at http://localhost:8000
+class Mechanize::TestCase < Minitest::Test
+  TEST_DIR = File.expand_path '../../../test', __FILE__
+  REQUESTS = []
+  ##
+  # Creates a clean mechanize instance +@mech+ for use in tests.
+  def setup
+    super
+    REQUESTS.clear
+    @mech = Mechanize.new
+    @ssl_private_key = nil
+    @ssl_certificate = nil
+  end
+  ##
+  # Creates a fake page with URI http://fake.example and an empty, submittable
+  # form.
+  def fake_page agent = @mech
+    uri = URI 'http://fake.example/'
+    html = <<-END
+<html>
+<body>
+<form><input type="submit" value="submit" /></form>
+</body>
+</html>
+    END
+    Mechanize::Page.new uri, nil, html, 200, agent
+  end
+  ##
+  # Is the Encoding constant defined?
+  def have_encoding?
+    Object.const_defined? :Encoding
+  end
+  ##
+  # Creates a Mechanize::Page with the given +body+
+  def html_page body
+    uri = URI 'http://example/'
+    Mechanize::Page.new uri, nil, body, 200, @mech
+  end
+  ##
+  # Creates a Mechanize::CookieJar by parsing the given +str+
+  def cookie_jar str, uri = URI('http://example')
+    jar = Mechanize::CookieJar.new
+    jar.parse str, uri
+    jar
+  end
+  ##
+  # Runs the block inside a temporary directory
+  def in_tmpdir
+    Dir.mktmpdir do |dir|
+      Dir.chdir dir do
+        yield
+      end
+    end
+  end
+  ##
+  # Creates a Nokogiri Node +element+ with the given +attributes+
+  def node element, attributes = {}
+    doc = Nokogiri::HTML::Document.new
+    node = Nokogiri::XML::Node.new element, doc
+    attributes.each do |name, value|
+      node[name] = value
+    end
+    node
+  end
+  ##
+  # Creates a Mechanize::Page for the given +uri+ with the given
+  # +content_type+, response +body+ and HTTP status +code+
+  def page uri, content_type = 'text/html', body = '', code = 200
+    uri = URI uri unless URI::Generic === uri
+    Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
+                        @mech)
+  end
+  ##
+  # Requests made during this tests
+  def requests
+    REQUESTS
+  end
+  ##
+  # An SSL private key.  This key is the same across all test runs
+  def ssl_private_key
+    @ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
+-----BEGIN RSA PRIVATE KEY-----
+MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
+bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
+p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
+6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
+-----END RSA PRIVATE KEY-----
+    KEY
+  end
+  ##
+  # An X509 certificate.  This certificate is the same across all test runs
+  def ssl_certificate
+    @ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
+-----BEGIN CERTIFICATE-----
+MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
+eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
+OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
+B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
+4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
+UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
+//8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
+-----END CERTIFICATE-----
+    CERT
+  end
+  ##
+  # Creates a Tempfile with +content+ that is immediately unlinked
+  def tempfile content
+    body_io = Tempfile.new @NAME
+    body_io.unlink
+    body_io.write content
+    body_io.flush
+    body_io.rewind
+    body_io
+  end
+end
+require 'mechanize/test_case/servlets'
+module Net # :nodoc:
+end
+class Net::HTTP # :nodoc:
+  alias :old_do_start :do_start
+  def do_start
+    @started = true
+  end
+  PAGE_CACHE = {}
+  alias :old_request :request
+  def request(req, *data, &block)
+    url = URI.parse(req.path)
+    path = WEBrick::HTTPUtils.unescape(url.path)
+    path = '/index.html' if path == '/'
+    res = ::Response.new
+    res.query_params = url.query
+    req.query = if 'POST' != req.method && url.query then
+                  WEBrick::HTTPUtils.parse_query url.query
+                elsif req['content-type'] =~ /www-form-urlencoded/ then
+                  WEBrick::HTTPUtils.parse_query req.body
+                elsif req['content-type'] =~ /boundary=(.+)/ then
+                  boundary = WEBrick::HTTPUtils.dequote $1
+                  WEBrick::HTTPUtils.parse_form_data req.body, boundary
+                else
+                  {}
+                end
+    req.cookies = WEBrick::Cookie.parse(req['Cookie'])
+    Mechanize::TestCase::REQUESTS << req
+    if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path]
+      servlet = servlet_klass.new({})
+      servlet.send "do_#{req.method}", req, res
+    else
+      filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
+      unless PAGE_CACHE[filename]
+        open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
+          PAGE_CACHE[filename] = io.read
+        }
+      end
+      res.body = PAGE_CACHE[filename]
+      case filename
+      when /\.txt$/
+        res['Content-Type'] = 'text/plain'
+      when /\.jpg$/
+        res['Content-Type'] = 'image/jpeg'
+      end
+    end
+    res['Content-Type'] ||= 'text/html'
+    res.code ||= "200"
+    response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
+    response = response_klass.new res.http_version, res.code, res.message
+    res.header.each do |k,v|
+      v = v.first if v.length == 1
+      response[k] = v
+    end
+    res.cookies.each do |cookie|
+      response.add_field 'Set-Cookie', cookie.to_s
+    end
+    response['Content-Type'] ||= 'text/html'
+    response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
+    io = StringIO.new(res.body)
+    response.instance_variable_set :@socket, io
+    def io.read clen, dest = nil, _ = nil
+      if dest then
+        dest << super(clen)
+      else
+        super clen
+      end
+    end
+    body_exist = req.response_body_permitted? &&
+      response_klass.body_permitted?
+    response.instance_variable_set :@body_exist, body_exist
+    yield response if block_given?
+    response
+  end
+end
+class Net::HTTPRequest # :nodoc:
+  attr_accessor :query, :body, :cookies, :user
+  def host
+    'example'
+  end
+  def port
+    80
+  end
+end
+class Response # :nodoc:
+  include Net::HTTPHeader
+  attr_reader :code
+  attr_accessor :body, :query, :cookies
+  attr_accessor :query_params, :http_version
+  attr_accessor :header
+  def code=(c)
+    @code = c.to_s
+  end
+  alias :status :code
+  alias :status= :code=
+    def initialize
+      @header = {}
+      @body = ''
+      @code = nil
+      @query = nil
+      @cookies = []
+      @http_version = '1.1'
+    end
+  def read_body
+    yield body
+  end
+  def message
+    ''
+  end
+end

data/lib/gimme_poc/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Gimme
-  VERSION = '0.0.5'
+  VERSION = '1.1.0'
 end

data/lib/gimme_poc/web.rb CHANGED

@@ -1,91 +1,97 @@
 # Find the contact
-module Gimme
-  class << self
-    ##
-    # Go to a page using Mechanize.
-    # Sleep for a split second to not overload any servers.
-    #
-    # Returns nil if bad url is given.
-    def get(str)
-      url = format_url(str)
-      puts "sending GET request to: #{url}"
-      sleep(0.1)
-      @page = Mechanize.new do |a|
-        a.user_agent_alias = 'Mac Safari'
-        a.open_timeout = 7
-        a.read_timeout = 7
-        a.idle_timeout = 7
-        a.redirect_ok = true
-      end.get(url)
+module Web
+  attr_accessor :page, :agent, :url
+  # Captures http:// and https://
+  HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
+  ##
+  # Go to a page using Mechanize.
+  # Sleep for a split second to not overload any servers.
+  #
+  # Returns nil if bad url is given.
+  def get(str)
+    prepare_get_request(str)
+    @page = @agent.get(@url)
+  rescue Exception => e
+    LogMessages.warn_err(e)
+  end
+  def prepare_get_request(str)
+    mech_setup
+    @url = format_url(str)
+    LogMessages.sending_get_request(url)
+    sleep(0.1)
+  end
-    rescue Mechanize::ResponseCodeError => e
-      puts "#{'Response Error:'.red} #{e}"
-    rescue SocketError => e
-      puts "#{'Socket Error:'.red} #{e}"
-    rescue Net::OpenTimeout => e
-      puts "#{'Connection Timeout:'.red} #{e}"
-    rescue Errno::ETIMEDOUT => e
-      puts "#{'Connection Timeout:'.red} #{e}"
-    rescue Net::HTTP::Persistent::Error
-      puts "#{'Connection Timeout:'.red} read timeout, too many resets."
+  def mech_setup
+    @agent = Mechanize.new do |a|
+      a.user_agent_alias = 'Mac Safari'
+      a.open_timeout = 7
+      a.read_timeout = 7
+      a.idle_timeout = 7
+      a.redirect_ok = true
     end
+  end
-    ##
-    # Mechanize needs absolute urls to work.
-    # If http:// or https:// isn't present, append http://.
-    def format_url(str)
-      LazyDomain.autohttp(str)
-    end
+  ##
+  # Mechanize needs absolute urls to work.
+  # If http:// or https:// isn't present, append http://.
+  def format_url(str)
+    LazyDomain.autohttp(str)
+  end
-    # Used for subdomain check.  Not a permanent change to url variable.
-    def unformat_url(str)
-      str.gsub(HTTP_REGEX, '')
-    end
+  # Used for subdomain check.  Not a permanent change to url variable.
+  def unformat_url(str)
+    str.gsub(HTTP_REGEX, '')
+  end
-    ##
-    # Outputs domain of a url. Useful if subdomains are given to GimmePOC
-    # and they don't work.
-    #
-    # For example:
-    # Given http://maps.google.com, returns 'google.com'.
-    def orig_domain(str)
-      LazyDomain.parse(str).domain
-    rescue PublicSuffix::DomainInvalid => e
-      puts "#{'Invalid Domain:'.red} #{e}"
-    end
+  ##
+  # Outputs domain of a url. Useful if subdomains are given to GimmePOC
+  # and they don't work.
+  #
+  # For example:
+  # Given http://maps.google.com, returns 'google.com'.
+  def orig_domain(str)
+    LazyDomain.parse(str).domain
+  rescue PublicSuffix::DomainInvalid => err
+    LogMessages.invalid_domain(err)
+  end
-    ##
-    # Used in case of relative paths. Merging guarantees correct url.
-    # This needs a url string as argument to work.
-    # Produces a merged uri string.
-    def merged_link(url_str)
-      page.uri.merge(url_str).to_s
-    end
+  ##
+  # Used in case of relative paths. Merging guarantees correct url.
+  # This needs a url string as argument to work.
+  # Produces a merged uri string.
+  def merged_link(url_str)
+    @page.uri.merge(url_str).to_s
+  end
-    ##
-    # Expects relative paths and merges everything.
-    # Returns a string.  If there's nothing, return nil.
-    #
-    # Add \b word block to ensure whole word is searched.
-    def link_with_href(str)
-      merged_link(page.link_with(href: /\b#{str}/).uri.to_s)
-    rescue
-      nil
-    end
+  ##
+  # Expects relative paths and merges everything.
+  # Returns a string.  If there's nothing, return nil.
+  #
+  # Add \b word block to ensure whole word is searched.
+  def link_with_href(str)
+    merged_link(@page.link_with(href: /\b#{str}/).uri.to_s)
+  rescue
+    nil
+  end
-    # Boolean, returns true if url is not identical to original domain.
-    def subdomain?(str)
-      (unformat_url(str) != orig_domain(str))
-    end
+  # Boolean, returns true if url is not identical to original domain.
+  #
+  # In the event that the url has a path, this splits everything on forward
+  # slash and selects far left item.
+  def subdomain?(str)
+    (unformat_url(str).split('/')[0] != orig_domain(str))
+  end
-    # TODO: Sometimes DNS will do a redirect and not give a 404.
-    #   Need to prevent redirects.
-    #
-    # Blindly tests to see if a url goes through.  If there is a 404 error,
-    # this will return nil.
-    def blind_test(url)
-      puts "\n(blind testing: #{url})"
-      get(url)
-    end
+  # TODO: Sometimes DNS will do a redirect and not give a 404.
+  #   Need to prevent redirects.
+  #
+  # Blindly tests to see if a url goes through.  If there is a 404 error,
+  # this will return nil.
+  def blind_test(url)
+    LogMessages.blind_testing(url)
+    get(url)
   end
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: gimme_poc
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 1.1.0
 platform: ruby
 authors:
 - John Mason
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-10-11 00:00:00.000000000 Z
+date: 2017-04-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -44,28 +44,56 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.0.1
+        version: 0.0.2
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.0.1
+        version: 0.0.2
 - !ruby/object:Gem::Dependency
-  name: rspec
+  name: shoulda
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.3'
+        version: '3.5'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.3'
+        version: '3.5'
+- !ruby/object:Gem::Dependency
+  name: shoulda-context
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
+- !ruby/object:Gem::Dependency
+  name: minitest-reporters
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.1'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.1'
 - !ruby/object:Gem::Dependency
   name: pry
   requirement: !ruby/object:Gem::Requirement
@@ -106,9 +134,12 @@ files:
 - Rakefile
 - lib/gimme_poc.rb
 - lib/gimme_poc/contactpage.rb
+- lib/gimme_poc/logger.rb
+- lib/gimme_poc/logger/messages.rb
 - lib/gimme_poc/poc.rb
 - lib/gimme_poc/questions.rb
 - lib/gimme_poc/save.rb
+- lib/gimme_poc/test_case.rb
 - lib/gimme_poc/version.rb
 - lib/gimme_poc/web.rb
 homepage: http://github.com/m8ss/gimme_poc
@@ -131,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.5
+rubygems_version: 2.5.1
 signing_key:
 specification_version: 4
 summary: Get a point of contact.  Given a url or array of urls, extracts social media