RubyGems - gimme_poc - Versions diffs - 0.0.5 → 1.1.0 - Mend

gimme_poc 0.0.5 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/README.md +2 -31
data/Rakefile +15 -0
data/lib/gimme_poc.rb +32 -25
data/lib/gimme_poc/contactpage.rb +47 -48
data/lib/gimme_poc/logger.rb +16 -0
data/lib/gimme_poc/logger/messages.rb +77 -0
data/lib/gimme_poc/poc.rb +6 -5
data/lib/gimme_poc/questions.rb +23 -29
data/lib/gimme_poc/save.rb +60 -52
data/lib/gimme_poc/test_case.rb +329 -0
data/lib/gimme_poc/version.rb +1 -1
data/lib/gimme_poc/web.rb +85 -79
metadata +39 -8

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 35a045a491109a5ae34152577508585667371af0
-  data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
+  metadata.gz: 84e5aa1c8960ade9b3f438008c7308856e7cbc30
+  data.tar.gz: e7d75a282e2d644ea9c0a24c466cd8f002013477
 SHA512:
-  metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
-  data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
+  metadata.gz: ed1704cd7a334ea8ba478cb01b487b8e54b2c16e1ad3e99db1c381797e413c43f1058d2a2c7a6b47212c7342ced310f2f1f4338a7153f4a84c06e5d7d64f90f4
+  data.tar.gz: 84191e49f72d95da75a46453b001aa29a74b1f128a1d6bc24687e3a9afda24df67055648b93841092d0a1fdfea887249f83f80d979d8c4a77586f4340b734458

data/README.md CHANGED

@@ -10,21 +10,19 @@ Gimme POC simply looks for a contact page and extracts social media contact info
 ## Installation
 ```
-gem install gimme_poc
+$ gem install gimme_poc
 ```
 ## Set Up
 ```ruby
-require 'gimme_poc' # => that's it!
+require 'gimme_poc'
 ```
 ## How it works
-Gimme POC is easy to use! Simply run this command.
 ```ruby
 Gimme.poc 'http://example.com'
@@ -56,30 +54,3 @@ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com'])
 ```
-## Referencing the search results
- To use your search results, simply run:
-```ruby
-Gimme.memory
-```
-## Clearing the search results
- To clear search results and start afresh, run:
-```ruby
-Gimme.reset!
-```
-## To do:
- - Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
- - Work on false positives of bad urls.  (Bad urls should be skipped + DNS redirects don't give 404 errors)
-More to follow...

data/Rakefile CHANGED

@@ -1,7 +1,22 @@
 require 'rubygems'
 require 'rake'
+require 'rake/testtask'
+Rake::TestTask.new(:test) do |test|
+  test.libs << 'lib' << 'test'
+  test.pattern = 'test/**/test*.rb'
+  test.verbose = true
+end
 desc 'Open console with gimme_poc loaded'
 task :console do
   exec 'pry -r ./lib/gimme_poc.rb'
 end
+desc 'make a release'
+task :release do
+  exec './script/release'
+end
+task c: :console # alias 'c' for console
+task default: :test

data/lib/gimme_poc.rb CHANGED

@@ -3,6 +3,7 @@ require 'lazy_domain'
 require 'mechanize'
 require_relative './gimme_poc/contactpage'
 require_relative './gimme_poc/poc'
+require_relative './gimme_poc/logger'
 require_relative './gimme_poc/questions'
 require_relative './gimme_poc/save'
 require_relative './gimme_poc/version'
@@ -11,13 +12,35 @@ require_relative './gimme_poc/web'
 # Find the contact
 module Gimme
   class << self
+    include Web
+    include Questions
+    include Save
+    include ContactPage
     attr_accessor :page, :contact, :contact_links, :url
+    attr_reader :status_code
-    # Simple regex that looks for ###.#### or ###-####
-    PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
+    def start_url_process(url)
+      LogMessages.start_url(url)
+      case
+      when LazyDomain.valid?(url) == false
+        LogMessages.invalid_domain(url)
+        @status_code = 0
+      when subdomain?(url)
+        LogMessages.subdomain
+        @status_code = 0 if get(url).nil? && get(orig_domain(url)).nil?
+      else
+        @status_code = 0 if get(url).nil?
+      end
+    end
-    # Captures http:// and https://
-    HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
+    def start_contact_process(url)
+      start_contact_links
+      attempt = save_available_contacts(url)
+      info = attempt.info if attempt && attempt.respond_to?(:info)
+      return attempt unless info.nil? || info.empty?
+      go_to_contact_page(url)
+    end
     ##
     # The main method!
@@ -25,29 +48,13 @@ module Gimme
     # If url is bad, it's converted to nil in 'get' method and skipped over.
     def poc(arr)
       arr = arr.split unless arr.is_a?(Array)
+      results = []
       arr.each do |url|
-        puts '-' * 50
-        puts "starting: #{url}"
-        unless LazyDomain.valid?(url)
-          puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
-          next
-        end
-        case
-        when subdomain?(url)
-          puts '(This url is a subdomain.  Will try both sub and root domain.)'
-          next if get(url).nil? && get(orig_domain(url)).nil?
-        else
-          next if get(url).nil?
-        end
-        start_contact_links
-        mechpage = go_to_contact_page(url)
-        if mechpage.nil?
-          puts '(empty page, exiting.)'
-        else
-          save_available_contacts(mechpage.uri.to_s)
-        end
+        start_url_process(url)
+        next if @status_code == 0
+        results << start_contact_process(url)
       end
-      Search.all_sites # Return results from all sites.
+      results.length == 1 ? results.first : results
     end
     # Convenience method.

data/lib/gimme_poc/contactpage.rb CHANGED

@@ -1,56 +1,55 @@
 # Find the contact
-module Gimme
-  class << self
-    ##
-    # Scans for contact page.  If it doesn't work on the first try,
-    # It will look for english versions and try again. Processes left to right.
-    #
-    # Returns nil if no contact page can be found.
-    def go_to_contact_page(url)
-      contact_page(url) || english_contact_page(url)
-    end
-    ##
-    # Looks for contact page.  Gets page if available.
-    # If no contact link is available, it will blind test '../contact'.
-    # Returns nil if nothing can be found.
-    def contact_page(url)
-      puts 'now looking for contact pages'
-      contact_link = link_with_href(/contact|Contact/)
-      contact_test_page = merged_link('../contact')
+module ContactPage
+  attr_accessor :contact_link
+  ##
+  # Scans for contact page.  If it doesn't work on the first try,
+  # It will look for english versions and try again. Processes left to right.
+  #
+  # Returns nil if no contact page can be found.
+  def go_to_contact_page(url)
+    contact_page(url) || english_contact_page(url)
+  end
-      case
-      when !contact_link.nil?
-        puts "#{'Success:'.green} Found contact link!\n"
-        get(merged_link(contact_link))
-      else
-        puts "#{'Warning:'.yellow} couldn't find contact link"
-        blind_test(contact_test_page) || get(orig_domain(url))
-      end
+  ##
+  # Looks for contact page.  Gets page if available.
+  # If no contact link is available, it will blind test '../contact'.
+  # Returns nil if nothing can be found.
+  def contact_page(url)
+    LogMessages.looking_for_contact_page
+    @contact_link = link_with_href(/contact|Contact/)
+    contact_test_page = merged_link('../contact')
+    case
+    when !contact_link.nil?
+      LogMessages.found_contact_link
+      get(merged_link(@contact_link))
+    else
+      LogMessages.no_contact_link
+      get(orig_domain(url)) if blind_test(contact_test_page).nil?
     end
+  end
-    ##
-    # Looks for english page.  Gets page if available then looks for
-    # english contact page.
-    #
-    # If no english link is available,
-    # it will blind test '../en' and '../english'.
-    # Returns nil if nothing can be found.
-    def english_contact_page(url)
-      puts "\nLooking for english page..."
-      english_link = page.link_with(href: %r{en\/|english|English})
-      test_en_page = merged_link('../en')
-      test_english_page = merged_link('../english')
+  ##
+  # Looks for english page.  Gets page if available then looks for
+  # english contact page.
+  #
+  # If no english link is available,
+  # it will blind test '../en' and '../english'.
+  # Returns nil if nothing can be found.
+  def english_contact_page(url)
+    LogMessages.looking_for_english_page
+    english_link = @page.link_with(href: %r{en\/|english|English})
+    test_en_page = merged_link('../en')
+    test_english_page = merged_link('../english')
-      case
-      when !english_link.nil?
-        puts "#{'Success:'.green} found english link!"
-        get(merged_link(english_link.uri))
-      else
-        blind_test(test_en_page) || blind_test(test_english_page)
-        puts "\n(restarting)\n"
-        contact_page(url)
-      end
+    case
+    when !english_link.nil?
+      LogMessages.found_english_link
+      get(merged_link(english_link.uri))
+    else
+      blind_test(test_en_page) || blind_test(test_english_page)
+      LogMessages.restarting
+      contact_page(url)
     end
   end
 end

data/lib/gimme_poc/logger.rb ADDED

@@ -0,0 +1,16 @@
+require 'logger'
+require_relative './logger/messages'
+# Output info messages during gimme poc crawl.
+module Gimme
+  class << self
+    include LogMessages
+    attr_accessor :logger
+  end
+end
+Gimme.logger = Logger.new(STDOUT)
+Gimme.logger.level = Logger::INFO
+Gimme.logger.formatter = proc do |_severity, _datetime, _progname, msg|
+  "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}\n"
+end

data/lib/gimme_poc/logger/messages.rb ADDED

@@ -0,0 +1,77 @@
+module LogMessages
+  class << self
+    def loginfo(str)
+      Gimme.logger.info(str)
+    end
+    def logwarn(str)
+      Gimme.logger.info(str)
+    end
+    # Info
+    # -----------------------------------------------------------------
+    def start_url(url)
+      puts '-' * 50
+      loginfo "starting: #{url}"
+    end
+    def sending_get_request(url)
+      loginfo("sending GET request to: #{url}")
+    end
+    def blind_testing(url)
+      loginfo("blind testing: #{url}")
+    end
+    def invalid_domain(url)
+      loginfo("#{'Invalid Domain:'.red} `#{url}' is not a valid domain")
+    end
+    def subdomain
+      loginfo '(This url is a subdomain.  Will try both sub and root domain.)'
+    end
+    def empty_page
+      loginfo '(empty page, exiting.)'
+    end
+    def looking_for_contact_page
+      loginfo('now looking for contact pages')
+    end
+    def found_contact_link
+      loginfo("#{'Success:'.green} Found contact link!")
+    end
+    def looking_for_english_page
+      loginfo('Looking for english page...')
+    end
+    def found_english_link
+      loginfo("#{'Success:'.green} found english link!")
+    end
+    def saving_contact_info(url)
+      loginfo("saving available contact information from #{url}")
+    end
+    # Warnings
+    # -----------------------------------------------------------------
+    def no_contact_link
+      logwarn("#{'Warning:'.yellow} couldn't find contact link")
+    end
+    def restarting
+      logwarn('restarting'.yellow)
+    end
+    def nothing_to_save
+      logwarn '(nothing to save)'
+    end
+    def warn_err(error)
+      logwarn("#{'Error:'.red} #{error}")
+    end
+  end
+end

data/lib/gimme_poc/poc.rb CHANGED

@@ -1,10 +1,12 @@
+require "ostruct"
 module Gimme
   # Collection of sites searched.
   class Search
-    @all_sites = []
+    attr_accessor :all_sites
-    class << self
-      attr_accessor :all_sites
+    def initialize
+      @all_sites = []
     end
     # Each site is saved to this class
@@ -13,8 +15,7 @@ module Gimme
       def initialize(url, contact_info_hsh)
         @host = url
-        @info = contact_info_hsh
-        Search.all_sites << self
+        @info = OpenStruct.new(contact_info_hsh)
       end
     end
   end

data/lib/gimme_poc/questions.rb CHANGED

@@ -1,33 +1,27 @@
-# Find the contact
-module Gimme
-  class << self
-    ##
-    # Boolean, returns true if anything is present
-    # after running scan_for_contacts and deleting failures.
-    def something_to_save?(hsh)
-      delete_failures(hsh).any?
-    end
-    # Boolean, returns true if email is present.
-    def email_available?
-      !link_with_href('mailto').nil?
-    end
+# Reflective questions for situational awareness.
+module Questions
+  # Simple regex that looks for ###.#### or ###-####
+  PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
+  # Boolean, returns true if email is present.
+  def email_available?
+    !link_with_href('mailto').nil?
+  end
-    # Boolean, returns true if phone number is present.
-    def phone_available?
-      !(page.body =~ PHONE_REGEX).nil?
-    end
+  # Boolean, returns true if phone number is present.
+  def phone_available?
+    !(@page.body =~ PHONE_REGEX).nil?
+  end
-    ##
-    # TODO: build better conditional to prevent false positives.
-    #   There could be other forms like newsletter signup, etc.
-    #
-    # If there is a form with more than one field, this returns true.
-    # Forms with one field are typically search boxes.
-    #
-    # Boolean, returns true if form is present on page.
-    def contactform_available?
-      !(page.forms.select { |x| x.fields.length > 1 }.empty?)
-    end
+  ##
+  # TODO: build better conditional to prevent false positives.
+  #   There could be other forms like newsletter signup, etc.
+  #
+  # If there is a form with more than one field, this returns true.
+  # Forms with one field are typically search boxes.
+  #
+  # Boolean, returns true if form is present on page.
+  def contactform_available?
+    !(@page.forms.select { |x| x.fields.length > 1 }.empty?)
   end
 end

data/lib/gimme_poc/save.rb CHANGED

@@ -1,62 +1,70 @@
-module Gimme
-  class << self
-    ##
-    # Returns anything that is possible to save, otherwise returns nil.
-    # Booleans for phone, email, or contact form will display True or False.
-    #
-    # Add periods to link hrefs to prevent false positives. Must escape periods
-    # with a backslash or else it will be a regex wild card.
-    def scan_for_contacts
-      {
-        contactpage: link_with_href('contact'),
-        email_present: "#{email_available?}",
-        phone_present: "#{phone_available?}",
-        contact_form: "#{contactform_available?}",
-        facebook: link_with_href('facebook\.'),
-        twitter: link_with_href('twitter\.'),
-        youtube: link_with_href('youtube\.'),
-        googleplus: link_with_href('plus\.google\.'),
-        linkedin: link_with_href('linkedin\.')
-      }
-    end
+module Save
+  ##
+  # Boolean, returns true if anything is present
+  # after running scan_for_contacts and deleting failures.
+  # Remember that false is a string in the hash
+  def something_to_save?(hsh)
+      hsh.reject! { |k, v| v.nil? || v == 'false' }.any?
+  end
-    # Starts/Restarts @contacts_links hash
-    def start_contact_links
-      @contact_links = {}
-    end
+  ##
+  # Returns anything that is possible to save, otherwise returns nil.
+  # Booleans for phone, email, or contact form will display True or False.
+  #
+  # Add periods to link hrefs to prevent false positives. Must escape periods
+  # with a backslash or else it will be a regex wild card.
+  def scan_for_contacts
+    {
+      contactpage: link_with_href('contact'),
+      email_present: "#{email_available?}",
+      phone_present: "#{phone_available?}",
+      contact_form: "#{contactform_available?}",
+      facebook: link_with_href('facebook\.'),
+      twitter: link_with_href('twitter\.'),
+      youtube: link_with_href('youtube\.'),
+      googleplus: link_with_href('plus\.google\.'),
+      linkedin: link_with_href('linkedin\.')
+    }
+  rescue => e
+    puts "Error: #{e}"
+  end
-    # Used in save_available_contacts to save each valid link.
-    def save_link(key, url)
-      return if key.nil? || url.nil?
-      @contact_links[key] = url
-    end
+  # Starts/Restarts @contacts_links hash
+  def start_contact_links
+    @contact_links = {}
+  end
-    ##
-    # Remove negatives from the contacts hash.
-    # Deletes a key value pair with a value of either nil or false.
-    # Remember that false is a string.
-    def delete_failures(hsh)
-      hsh.delete_if { |_k, v| v.nil? || v == 'false' }
-    end
+  # Used in save_available_contacts to save each valid link.
+  def save_link(key, url)
+    return if key.nil? || url.nil?
+    @contact_links[key] = url
+  end
+  ##
+  # Remove negatives from the contacts hash.
+  # Deletes a key value pair with a value of either nil or false.
+  # Remember that false is a stored in hash as a string.
+  def delete_failures(hsh)
+    hsh.delete_if { |_k, v| v.nil? || v == 'false' }
+  end
-    # Saves any available contact info to @contact_links.
-    def save_available_contacts(url, hsh = scan_for_contacts)
-      if something_to_save?(hsh)
-        puts "\nsaving available contact information from #{url}"
-        if hsh.is_a?(Hash)
-          hsh.each do |k, v|
-            save_link(k, v) # saves to @contact_links
-          end
-          delete_failures(@contact_links)
-          puts "#{@contact_links}".cyan # same as @contact_links
-        else
-          fail ArgumentError, "expected hash but got #{hsh.class}"
+  # Saves any available contact info to @contact_links.
+  def save_available_contacts(url, hsh = scan_for_contacts)
+    if something_to_save?(hsh)
+      LogMessages.saving_contact_info(url)
+      if hsh.is_a?(Hash)
+        hsh.each do |k, v|
+          save_link(k, v) # saves to @contact_links
         end
-        Search::POC.new(url, @contact_links)
+        delete_failures(@contact_links)
+        puts "#{@contact_links}".cyan # same as @contact_links
       else
-        puts '(nothing to save)'
-        return
+        fail ArgumentError, "expected hash but got #{hsh.class}"
       end
+      Gimme::Search::POC.new(url, @contact_links)
+    else
+      LogMessages.nothing_to_save
+      return
     end
   end
 end

data/lib/gimme_poc/test_case.rb ADDED

@@ -0,0 +1,329 @@
+require 'mechanize'
+require 'logger'
+require 'tempfile'
+require 'tmpdir'
+require 'webrick'
+require 'zlib'
+require 'rubygems'
+begin
+  gem 'minitest'
+rescue Gem::LoadError
+end
+##
+# Source:
+#
+# http://bit.ly/1Pt2KAd
+# --------------------------------------------------------------
+##
+# A generic test case for testing mechanize.  Using a subclass of
+# Mechanize::TestCase for your tests will create an isolated mechanize
+# instance that won't pollute your filesystem or other tests.
+#
+# Once Mechanize::TestCase is loaded no HTTP requests will be made outside
+# mechanize itself.  All requests are handled via WEBrick servlets.
+#
+# Mechanize uses WEBrick servlets to test some functionality.  You can run
+# other HTTP clients against the servlets using:
+#
+#   ruby -rmechanize/test_case/server -e0
+#
+# Which will launch a test server at http://localhost:8000
+class Mechanize::TestCase < Minitest::Test
+  TEST_DIR = File.expand_path '../../../test', __FILE__
+  REQUESTS = []
+  ##
+  # Creates a clean mechanize instance +@mech+ for use in tests.
+  def setup
+    super
+    REQUESTS.clear
+    @mech = Mechanize.new
+    @ssl_private_key = nil
+    @ssl_certificate = nil
+  end
+  ##
+  # Creates a fake page with URI http://fake.example and an empty, submittable
+  # form.
+  def fake_page agent = @mech
+    uri = URI 'http://fake.example/'
+    html = <<-END
+<html>
+<body>
+<form><input type="submit" value="submit" /></form>
+</body>
+</html>
+    END
+    Mechanize::Page.new uri, nil, html, 200, agent
+  end
+  ##
+  # Is the Encoding constant defined?
+  def have_encoding?
+    Object.const_defined? :Encoding
+  end
+  ##
+  # Creates a Mechanize::Page with the given +body+
+  def html_page body
+    uri = URI 'http://example/'
+    Mechanize::Page.new uri, nil, body, 200, @mech
+  end
+  ##
+  # Creates a Mechanize::CookieJar by parsing the given +str+
+  def cookie_jar str, uri = URI('http://example')
+    jar = Mechanize::CookieJar.new
+    jar.parse str, uri
+    jar
+  end
+  ##
+  # Runs the block inside a temporary directory
+  def in_tmpdir
+    Dir.mktmpdir do |dir|
+      Dir.chdir dir do
+        yield
+      end
+    end
+  end
+  ##
+  # Creates a Nokogiri Node +element+ with the given +attributes+
+  def node element, attributes = {}
+    doc = Nokogiri::HTML::Document.new
+    node = Nokogiri::XML::Node.new element, doc
+    attributes.each do |name, value|
+      node[name] = value
+    end
+    node
+  end
+  ##
+  # Creates a Mechanize::Page for the given +uri+ with the given
+  # +content_type+, response +body+ and HTTP status +code+
+  def page uri, content_type = 'text/html', body = '', code = 200
+    uri = URI uri unless URI::Generic === uri
+    Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
+                        @mech)
+  end
+  ##
+  # Requests made during this tests
+  def requests
+    REQUESTS
+  end
+  ##
+  # An SSL private key.  This key is the same across all test runs
+  def ssl_private_key
+    @ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
+-----BEGIN RSA PRIVATE KEY-----
+MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
+bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
+p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
+6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
+-----END RSA PRIVATE KEY-----
+    KEY
+  end
+  ##
+  # An X509 certificate.  This certificate is the same across all test runs
+  def ssl_certificate
+    @ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
+-----BEGIN CERTIFICATE-----
+MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
+eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
+OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
+B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
+4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
+UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
+//8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
+-----END CERTIFICATE-----
+    CERT
+  end
+  ##
+  # Creates a Tempfile with +content+ that is immediately unlinked
+  def tempfile content
+    body_io = Tempfile.new @NAME
+    body_io.unlink
+    body_io.write content
+    body_io.flush
+    body_io.rewind
+    body_io
+  end
+end
+require 'mechanize/test_case/servlets'
+module Net # :nodoc:
+end
+class Net::HTTP # :nodoc:
+  alias :old_do_start :do_start
+  def do_start
+    @started = true
+  end
+  PAGE_CACHE = {}
+  alias :old_request :request
+  def request(req, *data, &block)
+    url = URI.parse(req.path)
+    path = WEBrick::HTTPUtils.unescape(url.path)
+    path = '/index.html' if path == '/'
+    res = ::Response.new
+    res.query_params = url.query
+    req.query = if 'POST' != req.method && url.query then
+                  WEBrick::HTTPUtils.parse_query url.query
+                elsif req['content-type'] =~ /www-form-urlencoded/ then
+                  WEBrick::HTTPUtils.parse_query req.body
+                elsif req['content-type'] =~ /boundary=(.+)/ then
+                  boundary = WEBrick::HTTPUtils.dequote $1
+                  WEBrick::HTTPUtils.parse_form_data req.body, boundary
+                else
+                  {}
+                end
+    req.cookies = WEBrick::Cookie.parse(req['Cookie'])
+    Mechanize::TestCase::REQUESTS << req
+    if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path]
+      servlet = servlet_klass.new({})
+      servlet.send "do_#{req.method}", req, res
+    else
+      filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
+      unless PAGE_CACHE[filename]
+        open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
+          PAGE_CACHE[filename] = io.read
+        }
+      end
+      res.body = PAGE_CACHE[filename]
+      case filename
+      when /\.txt$/
+        res['Content-Type'] = 'text/plain'
+      when /\.jpg$/
+        res['Content-Type'] = 'image/jpeg'
+      end
+    end
+    res['Content-Type'] ||= 'text/html'
+    res.code ||= "200"
+    response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
+    response = response_klass.new res.http_version, res.code, res.message
+    res.header.each do |k,v|
+      v = v.first if v.length == 1
+      response[k] = v
+    end
+    res.cookies.each do |cookie|
+      response.add_field 'Set-Cookie', cookie.to_s
+    end
+    response['Content-Type'] ||= 'text/html'
+    response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
+    io = StringIO.new(res.body)
+    response.instance_variable_set :@socket, io
+    def io.read clen, dest = nil, _ = nil
+      if dest then
+        dest << super(clen)
+      else
+        super clen
+      end
+    end
+    body_exist = req.response_body_permitted? &&
+      response_klass.body_permitted?
+    response.instance_variable_set :@body_exist, body_exist
+    yield response if block_given?
+    response
+  end
+end
+class Net::HTTPRequest # :nodoc:
+  attr_accessor :query, :body, :cookies, :user
+  def host
+    'example'
+  end
+  def port
+    80
+  end
+end
+class Response # :nodoc:
+  include Net::HTTPHeader
+  attr_reader :code
+  attr_accessor :body, :query, :cookies
+  attr_accessor :query_params, :http_version
+  attr_accessor :header
+  def code=(c)
+    @code = c.to_s
+  end
+  alias :status :code
+  alias :status= :code=
+    def initialize
+      @header = {}
+      @body = ''
+      @code = nil
+      @query = nil
+      @cookies = []
+      @http_version = '1.1'
+    end
+  def read_body
+    yield body
+  end
+  def message
+    ''
+  end
+end

data/lib/gimme_poc/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Gimme
-  VERSION = '0.0.5'
+  VERSION = '1.1.0'
 end

data/lib/gimme_poc/web.rb CHANGED

@@ -1,91 +1,97 @@
 # Find the contact
-module Gimme
-  class << self
-    ##
-    # Go to a page using Mechanize.
-    # Sleep for a split second to not overload any servers.
-    #
-    # Returns nil if bad url is given.
-    def get(str)
-      url = format_url(str)
-      puts "sending GET request to: #{url}"
-      sleep(0.1)
-      @page = Mechanize.new do |a|
-        a.user_agent_alias = 'Mac Safari'
-        a.open_timeout = 7
-        a.read_timeout = 7
-        a.idle_timeout = 7
-        a.redirect_ok = true
-      end.get(url)
+module Web
+  attr_accessor :page, :agent, :url
+  # Captures http:// and https://
+  HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
+  ##
+  # Go to a page using Mechanize.
+  # Sleep for a split second to not overload any servers.
+  #
+  # Returns nil if bad url is given.
+  def get(str)
+    prepare_get_request(str)
+    @page = @agent.get(@url)
+  rescue Exception => e
+    LogMessages.warn_err(e)
+  end
+  def prepare_get_request(str)
+    mech_setup
+    @url = format_url(str)
+    LogMessages.sending_get_request(url)
+    sleep(0.1)
+  end
-    rescue Mechanize::ResponseCodeError => e
-      puts "#{'Response Error:'.red} #{e}"
-    rescue SocketError => e
-      puts "#{'Socket Error:'.red} #{e}"
-    rescue Net::OpenTimeout => e
-      puts "#{'Connection Timeout:'.red} #{e}"
-    rescue Errno::ETIMEDOUT => e
-      puts "#{'Connection Timeout:'.red} #{e}"
-    rescue Net::HTTP::Persistent::Error
-      puts "#{'Connection Timeout:'.red} read timeout, too many resets."
+  def mech_setup
+    @agent = Mechanize.new do |a|
+      a.user_agent_alias = 'Mac Safari'
+      a.open_timeout = 7
+      a.read_timeout = 7
+      a.idle_timeout = 7
+      a.redirect_ok = true
     end
+  end
-    ##
-    # Mechanize needs absolute urls to work.
-    # If http:// or https:// isn't present, append http://.
-    def format_url(str)
-      LazyDomain.autohttp(str)
-    end
+  ##
+  # Mechanize needs absolute urls to work.
+  # If http:// or https:// isn't present, append http://.
+  def format_url(str)
+    LazyDomain.autohttp(str)
+  end
-    # Used for subdomain check.  Not a permanent change to url variable.
-    def unformat_url(str)
-      str.gsub(HTTP_REGEX, '')
-    end
+  # Used for subdomain check.  Not a permanent change to url variable.
+  def unformat_url(str)
+    str.gsub(HTTP_REGEX, '')
+  end
-    ##
-    # Outputs domain of a url. Useful if subdomains are given to GimmePOC
-    # and they don't work.
-    #
-    # For example:
-    # Given http://maps.google.com, returns 'google.com'.
-    def orig_domain(str)
-      LazyDomain.parse(str).domain
-    rescue PublicSuffix::DomainInvalid => e
-      puts "#{'Invalid Domain:'.red} #{e}"
-    end
+  ##
+  # Outputs domain of a url. Useful if subdomains are given to GimmePOC
+  # and they don't work.
+  #
+  # For example:
+  # Given http://maps.google.com, returns 'google.com'.
+  def orig_domain(str)
+    LazyDomain.parse(str).domain
+  rescue PublicSuffix::DomainInvalid => err
+    LogMessages.invalid_domain(err)
+  end
-    ##
-    # Used in case of relative paths. Merging guarantees correct url.
-    # This needs a url string as argument to work.
-    # Produces a merged uri string.
-    def merged_link(url_str)
-      page.uri.merge(url_str).to_s
-    end
+  ##
+  # Used in case of relative paths. Merging guarantees correct url.
+  # This needs a url string as argument to work.
+  # Produces a merged uri string.
+  def merged_link(url_str)
+    @page.uri.merge(url_str).to_s
+  end
-    ##
-    # Expects relative paths and merges everything.
-    # Returns a string.  If there's nothing, return nil.
-    #
-    # Add \b word block to ensure whole word is searched.
-    def link_with_href(str)
-      merged_link(page.link_with(href: /\b#{str}/).uri.to_s)
-    rescue
-      nil
-    end
+  ##
+  # Expects relative paths and merges everything.
+  # Returns a string.  If there's nothing, return nil.
+  #
+  # Add \b word block to ensure whole word is searched.
+  def link_with_href(str)
+    merged_link(@page.link_with(href: /\b#{str}/).uri.to_s)
+  rescue
+    nil
+  end
-    # Boolean, returns true if url is not identical to original domain.
-    def subdomain?(str)
-      (unformat_url(str) != orig_domain(str))
-    end
+  # Boolean, returns true if url is not identical to original domain.
+  #
+  # In the event that the url has a path, this splits everything on forward
+  # slash and selects far left item.
+  def subdomain?(str)
+    (unformat_url(str).split('/')[0] != orig_domain(str))
+  end
-    # TODO: Sometimes DNS will do a redirect and not give a 404.
-    #   Need to prevent redirects.
-    #
-    # Blindly tests to see if a url goes through.  If there is a 404 error,
-    # this will return nil.
-    def blind_test(url)
-      puts "\n(blind testing: #{url})"
-      get(url)
-    end
+  # TODO: Sometimes DNS will do a redirect and not give a 404.
+  #   Need to prevent redirects.
+  #
+  # Blindly tests to see if a url goes through.  If there is a 404 error,
+  # this will return nil.
+  def blind_test(url)
+    LogMessages.blind_testing(url)
+    get(url)
   end
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: gimme_poc
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 1.1.0
 platform: ruby
 authors:
 - John Mason
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-10-11 00:00:00.000000000 Z
+date: 2017-04-13 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -44,28 +44,56 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.0.1
+        version: 0.0.2
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.0.1
+        version: 0.0.2
 - !ruby/object:Gem::Dependency
-  name: rspec
+  name: shoulda
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.3'
+        version: '3.5'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '3.3'
+        version: '3.5'
+- !ruby/object:Gem::Dependency
+  name: shoulda-context
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.2'
+- !ruby/object:Gem::Dependency
+  name: minitest-reporters
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.1'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '1.1'
 - !ruby/object:Gem::Dependency
   name: pry
   requirement: !ruby/object:Gem::Requirement
@@ -106,9 +134,12 @@ files:
 - Rakefile
 - lib/gimme_poc.rb
 - lib/gimme_poc/contactpage.rb
+- lib/gimme_poc/logger.rb
+- lib/gimme_poc/logger/messages.rb
 - lib/gimme_poc/poc.rb
 - lib/gimme_poc/questions.rb
 - lib/gimme_poc/save.rb
+- lib/gimme_poc/test_case.rb
 - lib/gimme_poc/version.rb
 - lib/gimme_poc/web.rb
 homepage: http://github.com/m8ss/gimme_poc
@@ -131,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.5
+rubygems_version: 2.5.1
 signing_key:
 specification_version: 4
 summary: Get a point of contact.  Given a url or array of urls, extracts social media