gimme_poc 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/gimme_poc/contactpage.rb +55 -0
- data/lib/gimme_poc/poc.rb +21 -0
- data/lib/gimme_poc/questions.rb +33 -0
- data/lib/gimme_poc/save.rb +53 -0
- data/lib/gimme_poc/version.rb +1 -1
- data/lib/gimme_poc/web.rb +79 -0
- data/lib/gimme_poc.rb +16 -223
- metadata +21 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 6ca0656c586244edfaaac44d81b092956e1ed801
         | 
| 4 | 
            +
              data.tar.gz: b2bbc85c51a79ba5a10a1a5e77d8084ac28adeee
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: f2e131c8e68fb8a55169f62b1d9a5662901f6fcb45907878c30d8d700d784b66aa8cae0305c060a611da7d60b810cc2d56bb56959db1da378a3088fa6568d707
         | 
| 7 | 
            +
              data.tar.gz: 6f32874d9fc287ce588baa0e91fa854ed7f75f045f88d798ebf02a0e781918f50d6922ffd06578965d7c033cacdb6a927d03efc7b7967c6a225d7579a8631853
         | 
| @@ -0,0 +1,55 @@ | |
| 1 | 
            +
            # Find the contact
         | 
| 2 | 
            +
            module Gimme
         | 
| 3 | 
            +
              class << self
         | 
| 4 | 
            +
                ##
         | 
| 5 | 
            +
                # Scans for contact page.  If it doesn't work on the first try,
         | 
| 6 | 
            +
                # It will look for english versions and try again. Processes left to right.
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
                # Returns nil if no contact page can be found.
         | 
| 9 | 
            +
                def go_to_contact_page(url)
         | 
| 10 | 
            +
                  contact_page(url) || english_contact_page(url)
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                ##
         | 
| 14 | 
            +
                # Looks for contact page.  Gets page if available.
         | 
| 15 | 
            +
                # If no contact link is available, it will blind test '../contact'.
         | 
| 16 | 
            +
                # Returns nil if nothing can be found.
         | 
| 17 | 
            +
                def contact_page(url)
         | 
| 18 | 
            +
                  contact_link = link_with_href(/contact|Contact/)
         | 
| 19 | 
            +
                  contact_test_page = merged_link('../contact')
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  case
         | 
| 22 | 
            +
                  when !contact_link.nil?
         | 
| 23 | 
            +
                    puts "#{'Success:'.green} Found contact link!\n"
         | 
| 24 | 
            +
                    get(merged_link(contact_link))
         | 
| 25 | 
            +
                  else
         | 
| 26 | 
            +
                    puts "#{'Warning:'.yellow} couldn't find contact link"
         | 
| 27 | 
            +
                    blind_test(contact_test_page) || get(orig_domain(url))
         | 
| 28 | 
            +
                  end
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                ##
         | 
| 32 | 
            +
                # Looks for english page.  Gets page if available then looks for
         | 
| 33 | 
            +
                # english contact page.
         | 
| 34 | 
            +
                #
         | 
| 35 | 
            +
                # If no english link is available,
         | 
| 36 | 
            +
                # it will blind test '../en' and '../english'.
         | 
| 37 | 
            +
                # Returns nil if nothing can be found.
         | 
| 38 | 
            +
                def english_contact_page(url)
         | 
| 39 | 
            +
                  puts "\nLooking for english page..."
         | 
| 40 | 
            +
                  english_link = page.link_with(href: /english|English/)
         | 
| 41 | 
            +
                  test_en_page = merged_link('../en')
         | 
| 42 | 
            +
                  test_english_page = merged_link('../english')
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  case
         | 
| 45 | 
            +
                  when !english_link.nil?
         | 
| 46 | 
            +
                    puts "#{'Success:'.green} found english link!"
         | 
| 47 | 
            +
                    get(merged(english_link)) # already merged link
         | 
| 48 | 
            +
                  else
         | 
| 49 | 
            +
                    blind_test(test_en_page) || blind_test(test_english_page)
         | 
| 50 | 
            +
                    puts 'ready to start again'
         | 
| 51 | 
            +
                    contact_page(url)
         | 
| 52 | 
            +
                  end
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
              end
         | 
| 55 | 
            +
            end
         | 
| @@ -0,0 +1,21 @@ | |
| 1 | 
            +
            module Gimme
         | 
| 2 | 
            +
              # Collection of sites searched.
         | 
| 3 | 
            +
              class Search
         | 
| 4 | 
            +
                @all_sites = []
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                class << self
         | 
| 7 | 
            +
                  attr_accessor :all_sites
         | 
| 8 | 
            +
                end
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                # Each site is saved to this class
         | 
| 11 | 
            +
                class POC
         | 
| 12 | 
            +
                  attr_accessor :host, :info
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  def initialize(url, contact_info_hsh)
         | 
| 15 | 
            +
                    @host = url
         | 
| 16 | 
            +
                    @info = contact_info_hsh
         | 
| 17 | 
            +
                    Search.all_sites << self
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
            end
         | 
| @@ -0,0 +1,33 @@ | |
| 1 | 
            +
            # Find the contact
         | 
| 2 | 
            +
            module Gimme
         | 
| 3 | 
            +
              class << self
         | 
| 4 | 
            +
                ##
         | 
| 5 | 
            +
                # Boolean, returns true if anything is present
         | 
| 6 | 
            +
                # after running scan_for_contacts and deleting failures.
         | 
| 7 | 
            +
                def something_to_save?(hsh)
         | 
| 8 | 
            +
                  delete_failures(hsh).any?
         | 
| 9 | 
            +
                end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                # Boolean, returns true if email is present.
         | 
| 12 | 
            +
                def email_available?
         | 
| 13 | 
            +
                  !link_with_href('mailto').nil?
         | 
| 14 | 
            +
                end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                # Boolean, returns true if phone number is present.
         | 
| 17 | 
            +
                def phone_available?
         | 
| 18 | 
            +
                  !(page.body =~ PHONE_REGEX).nil?
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                ##
         | 
| 22 | 
            +
                # TODO: build better conditional to prevent false positives.
         | 
| 23 | 
            +
                #   There could be other forms like newsletter signup, etc.
         | 
| 24 | 
            +
                #
         | 
| 25 | 
            +
                # If there is a form with more than one field, this returns true.
         | 
| 26 | 
            +
                # Forms with one field are typically search boxes.
         | 
| 27 | 
            +
                #
         | 
| 28 | 
            +
                # Boolean, returns true if form is present on page.
         | 
| 29 | 
            +
                def contactform_available?
         | 
| 30 | 
            +
                  !(page.forms.select { |x| x.fields.length > 1 }.empty?)
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
              end
         | 
| 33 | 
            +
            end
         | 
| @@ -0,0 +1,53 @@ | |
| 1 | 
            +
            module Gimme
         | 
| 2 | 
            +
              class << self
         | 
| 3 | 
            +
                ##
         | 
| 4 | 
            +
                # Returns anything that is possible to save, otherwise returns nil.
         | 
| 5 | 
            +
                # Booleans for phone, email, or contact form will display True or False.
         | 
| 6 | 
            +
                #
         | 
| 7 | 
            +
                # Add periods to link hrefs to prevent false positives. Must escape periods
         | 
| 8 | 
            +
                # with a backslash or else it will be a regex wild card.
         | 
| 9 | 
            +
                def scan_for_contacts
         | 
| 10 | 
            +
                  {
         | 
| 11 | 
            +
                    contactpage: link_with_href('contact'),
         | 
| 12 | 
            +
                    email_present: "#{email_available?}",
         | 
| 13 | 
            +
                    phone_present: "#{phone_available?}",
         | 
| 14 | 
            +
                    contact_form: "#{contactform_available?}",
         | 
| 15 | 
            +
                    facebook: link_with_href('facebook\.'),
         | 
| 16 | 
            +
                    twitter: link_with_href('twitter\.'),
         | 
| 17 | 
            +
                    youtube: link_with_href('youtube\.'),
         | 
| 18 | 
            +
                    googleplus: link_with_href('plus\.google\.'),
         | 
| 19 | 
            +
                    linkedin: link_with_href('linkedin\.')
         | 
| 20 | 
            +
                  }
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                # Used in save_available_contacts to save each valid link.
         | 
| 24 | 
            +
                def save_link(key, url)
         | 
| 25 | 
            +
                  return if key.nil? || url.nil?
         | 
| 26 | 
            +
                  @contact_links[key] = url
         | 
| 27 | 
            +
                end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                ##
         | 
| 30 | 
            +
                # Remove negatives from the contacts hash.
         | 
| 31 | 
            +
                # Deletes a key value pair with a value of either nil or false.
         | 
| 32 | 
            +
                # Remember that false is a string.
         | 
| 33 | 
            +
                def delete_failures(hsh)
         | 
| 34 | 
            +
                  hsh.delete_if { |_k, v| v.nil? || v == 'false' }
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                # Saves any available contact info to @contact_links.
         | 
| 38 | 
            +
                def save_available_contacts(url, hsh = scan_for_contacts)
         | 
| 39 | 
            +
                  return unless something_to_save?(hsh)
         | 
| 40 | 
            +
                  puts "\nsaving available contact information from #{url}"
         | 
| 41 | 
            +
                  if hsh.is_a?(Hash)
         | 
| 42 | 
            +
                    hsh.each do |k, v|
         | 
| 43 | 
            +
                      save_link(k, v) # saves to @contact_links
         | 
| 44 | 
            +
                    end
         | 
| 45 | 
            +
                    delete_failures(@contact_links)
         | 
| 46 | 
            +
                    puts "#{@contact_links}".cyan # same as @contact_links
         | 
| 47 | 
            +
                  else
         | 
| 48 | 
            +
                    fail ArgumentError, "expected hash but got #{hsh.class}"
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
                  Search::POC.new(url, @contact_links)
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
              end
         | 
| 53 | 
            +
            end
         | 
    
        data/lib/gimme_poc/version.rb
    CHANGED
    
    
| @@ -0,0 +1,79 @@ | |
| 1 | 
            +
            # Find the contact
         | 
| 2 | 
            +
            module Gimme
         | 
| 3 | 
            +
              class << self
         | 
| 4 | 
            +
                ##
         | 
| 5 | 
            +
                # Go to a page using Mechanize.
         | 
| 6 | 
            +
                # Sleep for a split second to not overload any servers.
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
                # Returns nil if bad url is given.
         | 
| 9 | 
            +
                def get(str)
         | 
| 10 | 
            +
                  url = format_url(str)
         | 
| 11 | 
            +
                  puts "sending GET request to: #{url}"
         | 
| 12 | 
            +
                  sleep(0.1)
         | 
| 13 | 
            +
                  @page = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' }.get(url)
         | 
| 14 | 
            +
                rescue Mechanize::ResponseCodeError => e
         | 
| 15 | 
            +
                  puts "#{'Response Error:'.red} #{e}"
         | 
| 16 | 
            +
                rescue SocketError => e
         | 
| 17 | 
            +
                  puts "#{'Socket Error:'.red} #{e}"
         | 
| 18 | 
            +
                rescue Errno::ETIMEDOUT => e
         | 
| 19 | 
            +
                  puts "#{'Connection Timeout:'.red} #{e}"
         | 
| 20 | 
            +
                end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                # Starts/Restarts @contacts_links hash
         | 
| 23 | 
            +
                def start_contact_links
         | 
| 24 | 
            +
                  puts 'setting contact links hash to {}'
         | 
| 25 | 
            +
                  @contact_links = {}
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                ##
         | 
| 29 | 
            +
                # Mechanize needs absolute urls to work.
         | 
| 30 | 
            +
                # If http:// or https:// isn't present, append http://.
         | 
| 31 | 
            +
                def format_url(str)
         | 
| 32 | 
            +
                  LazyDomain.autohttp(str)
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                ##
         | 
| 36 | 
            +
                # Outputs domain of a url. Useful if subdomains are given to GimmePOC
         | 
| 37 | 
            +
                # and they don't work.
         | 
| 38 | 
            +
                #
         | 
| 39 | 
            +
                # For example:
         | 
| 40 | 
            +
                # Given http://maps.google.com, returns 'google.com'.
         | 
| 41 | 
            +
                def orig_domain(str)
         | 
| 42 | 
            +
                  LazyDomain.parse(str).domain
         | 
| 43 | 
            +
                end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                ##
         | 
| 46 | 
            +
                # Used in case of relative paths. Merging guarantees correct url.
         | 
| 47 | 
            +
                # This needs a url string as argument to work.
         | 
| 48 | 
            +
                # Produces a merged uri string.
         | 
| 49 | 
            +
                def merged_link(url_str)
         | 
| 50 | 
            +
                  page.uri.merge(url_str).to_s
         | 
| 51 | 
            +
                end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                ##
         | 
| 54 | 
            +
                # Expects relative paths and merges everything.
         | 
| 55 | 
            +
                # Returns a string.  If there's nothing, return nil.
         | 
| 56 | 
            +
                #
         | 
| 57 | 
            +
                # Add \b word block to ensure whole word is searched.
         | 
| 58 | 
            +
                def link_with_href(str)
         | 
| 59 | 
            +
                  merged_link(page.link_with(href: /\b#{str}/).uri.to_s)
         | 
| 60 | 
            +
                rescue
         | 
| 61 | 
            +
                  nil
         | 
| 62 | 
            +
                end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                # Boolean, returns true if url is not identical to original domain.
         | 
| 65 | 
            +
                def subdomain?(str)
         | 
| 66 | 
            +
                  (str != orig_domain(str))
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                # TODO: Sometimes DNS will do a redirect and not give a 404.
         | 
| 70 | 
            +
                #   Need to prevent redirects.
         | 
| 71 | 
            +
                #
         | 
| 72 | 
            +
                # Blindly tests to see if a url goes through.  If there is a 404 error,
         | 
| 73 | 
            +
                # this will return nil.
         | 
| 74 | 
            +
                def blind_test(url)
         | 
| 75 | 
            +
                  puts "\nblind testing: #{url}"
         | 
| 76 | 
            +
                  get(url)
         | 
| 77 | 
            +
                end
         | 
| 78 | 
            +
              end
         | 
| 79 | 
            +
            end
         | 
    
        data/lib/gimme_poc.rb
    CHANGED
    
    | @@ -1,6 +1,12 @@ | |
| 1 | 
            -
            require 'mechanize'
         | 
| 2 1 | 
             
            require 'colored'
         | 
| 2 | 
            +
            require 'lazy_domain'
         | 
| 3 | 
            +
            require 'mechanize'
         | 
| 4 | 
            +
            require_relative './gimme_poc/contactpage'
         | 
| 5 | 
            +
            require_relative './gimme_poc/poc'
         | 
| 6 | 
            +
            require_relative './gimme_poc/questions'
         | 
| 7 | 
            +
            require_relative './gimme_poc/save'
         | 
| 3 8 | 
             
            require_relative './gimme_poc/version'
         | 
| 9 | 
            +
            require_relative './gimme_poc/web'
         | 
| 4 10 |  | 
| 5 11 | 
             
            # Find the contact
         | 
| 6 12 | 
             
            module Gimme
         | 
| @@ -8,51 +14,10 @@ module Gimme | |
| 8 14 | 
             
                attr_accessor :page, :contact, :contact_links, :url
         | 
| 9 15 |  | 
| 10 16 | 
             
                # Simple regex that looks for ###.#### or ###-####
         | 
| 11 | 
            -
                PHONE_REGEX = /\d{3}[-]\d{4}|\d{3}[.]\d{4}/
         | 
| 17 | 
            +
                PHONE_REGEX = %r{/\d{3}[-]\d{4}|\d{3}[.]\d{4}/}
         | 
| 12 18 |  | 
| 13 19 | 
             
                # Captures http:// and https://
         | 
| 14 | 
            -
                HTTP_REGEX = /\A\bhttps:\/\/|\bhttp:\/\//
         | 
| 15 | 
            -
             | 
| 16 | 
            -
                ## ----------------------------------------------------------------
         | 
| 17 | 
            -
                # Questions
         | 
| 18 | 
            -
                #
         | 
| 19 | 
            -
                #
         | 
| 20 | 
            -
                #
         | 
| 21 | 
            -
             | 
| 22 | 
            -
                ##
         | 
| 23 | 
            -
                # Boolean, returns true if anything is present
         | 
| 24 | 
            -
                # after running scan_for_contacts.
         | 
| 25 | 
            -
                def something_to_save?
         | 
| 26 | 
            -
                  scan_for_contacts.any?
         | 
| 27 | 
            -
                end
         | 
| 28 | 
            -
             | 
| 29 | 
            -
                # Boolean, returns true if email is present.
         | 
| 30 | 
            -
                def email_available?
         | 
| 31 | 
            -
                  !link_with_href('mailto').nil?
         | 
| 32 | 
            -
                end
         | 
| 33 | 
            -
             | 
| 34 | 
            -
                # Boolean, returns true if phone number is present.
         | 
| 35 | 
            -
                def phone_available?
         | 
| 36 | 
            -
                  !(page.body =~ PHONE_REGEX).nil?
         | 
| 37 | 
            -
                end
         | 
| 38 | 
            -
             | 
| 39 | 
            -
                ##
         | 
| 40 | 
            -
                # TODO: build better conditional to prevent false positives.
         | 
| 41 | 
            -
                #   There could be other forms like newsletter signup, etc.
         | 
| 42 | 
            -
                #
         | 
| 43 | 
            -
                # If there is a form with more than one field, this returns true.
         | 
| 44 | 
            -
                # Forms with one field are typically search boxes.
         | 
| 45 | 
            -
                #
         | 
| 46 | 
            -
                # Boolean, returns true if form is present on page.
         | 
| 47 | 
            -
                def contactform_available?
         | 
| 48 | 
            -
                  !(page.forms.select { |x| x.fields.length > 1 }.empty?)
         | 
| 49 | 
            -
                end
         | 
| 50 | 
            -
             | 
| 51 | 
            -
                ## ----------------------------------------------------------------
         | 
| 52 | 
            -
                # Actions
         | 
| 53 | 
            -
                #
         | 
| 54 | 
            -
                #
         | 
| 55 | 
            -
                #
         | 
| 20 | 
            +
                HTTP_REGEX = %r{/\A\bhttps:\/\/|\bhttp:\/\//}
         | 
| 56 21 |  | 
| 57 22 | 
             
                ##
         | 
| 58 23 | 
             
                # The main method!
         | 
| @@ -63,175 +28,21 @@ module Gimme | |
| 63 28 | 
             
                  arr.each do |url|
         | 
| 64 29 | 
             
                    puts '-' * 50
         | 
| 65 30 | 
             
                    puts "starting: #{url}"
         | 
| 66 | 
            -
                     | 
| 31 | 
            +
                    case
         | 
| 32 | 
            +
                    when subdomain?(url)
         | 
| 33 | 
            +
                      get(orig_domain(url)) if get(url).nil?
         | 
| 34 | 
            +
                    else
         | 
| 35 | 
            +
                      next if get(url).nil?
         | 
| 36 | 
            +
                    end
         | 
| 67 37 | 
             
                    puts 'now looking for contact pages'
         | 
| 68 38 | 
             
                    start_contact_links
         | 
| 69 | 
            -
                    mechpage = go_to_contact_page
         | 
| 39 | 
            +
                    mechpage = go_to_contact_page(url)
         | 
| 70 40 | 
             
                    next if mechpage.nil?
         | 
| 71 41 | 
             
                    save_available_contacts(mechpage.uri.to_s)
         | 
| 72 42 | 
             
                  end
         | 
| 73 43 | 
             
                  Search.all_sites
         | 
| 74 44 | 
             
                end
         | 
| 75 45 |  | 
| 76 | 
            -
                # Mechanize needs absolute urls to work.
         | 
| 77 | 
            -
                # If http:// or https:// isn't present, append http://.
         | 
| 78 | 
            -
                def format_url(str)
         | 
| 79 | 
            -
                  str.prepend('http://') if (str =~ HTTP_REGEX).nil?
         | 
| 80 | 
            -
                  str
         | 
| 81 | 
            -
                end
         | 
| 82 | 
            -
             | 
| 83 | 
            -
                ##
         | 
| 84 | 
            -
                # Go to a page using Mechanize.
         | 
| 85 | 
            -
                # Sleep for a split second to not overload any servers.
         | 
| 86 | 
            -
                #
         | 
| 87 | 
            -
                # Returns nil if bad url is given.
         | 
| 88 | 
            -
                def get(str)
         | 
| 89 | 
            -
                  url = format_url(str)
         | 
| 90 | 
            -
                  puts "sending GET request to: #{url}"
         | 
| 91 | 
            -
                  sleep(0.1)
         | 
| 92 | 
            -
                  @page = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' }.get(url)
         | 
| 93 | 
            -
                rescue SocketError => e
         | 
| 94 | 
            -
                  puts "#{'skipping:'.red} -- #{e}"
         | 
| 95 | 
            -
                end
         | 
| 96 | 
            -
             | 
| 97 | 
            -
                # Starts/Restarts @contacts_links hash
         | 
| 98 | 
            -
                def start_contact_links
         | 
| 99 | 
            -
                  puts 'setting contact links hash to {}'
         | 
| 100 | 
            -
                  @contact_links = {}
         | 
| 101 | 
            -
                end
         | 
| 102 | 
            -
             | 
| 103 | 
            -
                ##
         | 
| 104 | 
            -
                # Scans for contact page.  If it doesn't work on the first try,
         | 
| 105 | 
            -
                # It will look for english versions and try again. Processes left to right.
         | 
| 106 | 
            -
                #
         | 
| 107 | 
            -
                # Returns nil if no contact page can be found.
         | 
| 108 | 
            -
                def go_to_contact_page
         | 
| 109 | 
            -
                  contact_page || english_contact_page
         | 
| 110 | 
            -
                end
         | 
| 111 | 
            -
             | 
| 112 | 
            -
                ##
         | 
| 113 | 
            -
                # Looks for contact page.  Gets page if available.
         | 
| 114 | 
            -
                # If no contact link is available, it will blind test '../contact'.
         | 
| 115 | 
            -
                # Returns nil if nothing can be found.
         | 
| 116 | 
            -
                def contact_page
         | 
| 117 | 
            -
                  contact_link = link_with_href(/contact|Contact/)
         | 
| 118 | 
            -
                  contact_test_page = merged_link('../contact')
         | 
| 119 | 
            -
             | 
| 120 | 
            -
                  case
         | 
| 121 | 
            -
                  when !contact_link.nil?
         | 
| 122 | 
            -
                    puts "#{'success:'.green} Found contact link!\n"
         | 
| 123 | 
            -
                    get(merged_link(contact_link))
         | 
| 124 | 
            -
                  else
         | 
| 125 | 
            -
                    puts "#{'warning:'.yellow}couldn't find contact link"
         | 
| 126 | 
            -
                    blind_test(contact_test_page)
         | 
| 127 | 
            -
                  end
         | 
| 128 | 
            -
                end
         | 
| 129 | 
            -
             | 
| 130 | 
            -
                ##
         | 
| 131 | 
            -
                # Looks for english page.  Gets page if available then looks for
         | 
| 132 | 
            -
                # english contact page.
         | 
| 133 | 
            -
                #
         | 
| 134 | 
            -
                # If no english link is available,
         | 
| 135 | 
            -
                # it will blind test '../en' and '../english'.
         | 
| 136 | 
            -
                # Returns nil if nothing can be found.
         | 
| 137 | 
            -
                def english_contact_page
         | 
| 138 | 
            -
                  puts "\nLooking for english page..."
         | 
| 139 | 
            -
                  english_link = page.link_with(href: /english|English/)
         | 
| 140 | 
            -
                  test_en_page = merged_link('../en')
         | 
| 141 | 
            -
                  test_english_page = merged_link('../english')
         | 
| 142 | 
            -
             | 
| 143 | 
            -
                  case
         | 
| 144 | 
            -
                  when !english_link.nil?
         | 
| 145 | 
            -
                    puts "#{'success:'.green} found english link!"
         | 
| 146 | 
            -
                    get(merged(english_link)) # already merged link
         | 
| 147 | 
            -
                  else
         | 
| 148 | 
            -
                    blind_test(test_en_page) || blind_test(test_english_page)
         | 
| 149 | 
            -
                    puts 'ready to start again'
         | 
| 150 | 
            -
                    contact_page
         | 
| 151 | 
            -
                  end
         | 
| 152 | 
            -
                end
         | 
| 153 | 
            -
             | 
| 154 | 
            -
                # TODO: Sometimes DNS will do a redirect and not give a 404.
         | 
| 155 | 
            -
                #   Need to prevent redirects.
         | 
| 156 | 
            -
                #
         | 
| 157 | 
            -
                # Blindly tests to see if a url goes through.  If there is a 404 error,
         | 
| 158 | 
            -
                # this will return nil.
         | 
| 159 | 
            -
                def blind_test(url)
         | 
| 160 | 
            -
                  puts "\nblind testing: #{url}"
         | 
| 161 | 
            -
                  get(url)
         | 
| 162 | 
            -
                rescue Mechanize::ResponseCodeError
         | 
| 163 | 
            -
                  puts "#{'404 Error:'.red} #{url}"
         | 
| 164 | 
            -
                end
         | 
| 165 | 
            -
             | 
| 166 | 
            -
                ##
         | 
| 167 | 
            -
                # Used in case of relative paths. Merging guarantees correct url.
         | 
| 168 | 
            -
                # This needs a url string as argument to work.
         | 
| 169 | 
            -
                # Produces a merged uri string.
         | 
| 170 | 
            -
                def merged_link(url_str)
         | 
| 171 | 
            -
                  page.uri.merge(url_str).to_s
         | 
| 172 | 
            -
                end
         | 
| 173 | 
            -
             | 
| 174 | 
            -
                ##
         | 
| 175 | 
            -
                # Expects relative paths and merges everything.
         | 
| 176 | 
            -
                # Returns a string.  If there's nothing, return nil.
         | 
| 177 | 
            -
                #
         | 
| 178 | 
            -
                # Add \b word block to ensure whole word is searched.
         | 
| 179 | 
            -
                def link_with_href(str)
         | 
| 180 | 
            -
                  merged_link(page.link_with(href: /\b#{str}/).uri.to_s)
         | 
| 181 | 
            -
                rescue
         | 
| 182 | 
            -
                  nil
         | 
| 183 | 
            -
                end
         | 
| 184 | 
            -
             | 
| 185 | 
            -
                ##
         | 
| 186 | 
            -
                # Returns anything that is possible to save, otherwise returns nil.
         | 
| 187 | 
            -
                # Booleans for phone, email, or contact form will display True or False.
         | 
| 188 | 
            -
                #
         | 
| 189 | 
            -
                # Add periods to link hrefs to prevent false positives. Must escape periods
         | 
| 190 | 
            -
                # with a backslash or else it will be a regex wild card.
         | 
| 191 | 
            -
                def scan_for_contacts
         | 
| 192 | 
            -
                  {
         | 
| 193 | 
            -
                    contactpage: link_with_href('contact'),
         | 
| 194 | 
            -
                    email_present: "#{email_available?}",
         | 
| 195 | 
            -
                    phone_present: "#{phone_available?}",
         | 
| 196 | 
            -
                    contact_form: "#{contactform_available?}",
         | 
| 197 | 
            -
                    facebook: link_with_href('facebook\.'),
         | 
| 198 | 
            -
                    twitter: link_with_href('twitter\.'),
         | 
| 199 | 
            -
                    youtube: link_with_href('youtube\.'),
         | 
| 200 | 
            -
                    googleplus: link_with_href('plus\.google\.'),
         | 
| 201 | 
            -
                    linkedin: link_with_href('linkedin\.')
         | 
| 202 | 
            -
                  }
         | 
| 203 | 
            -
                end
         | 
| 204 | 
            -
             | 
| 205 | 
            -
                # Used in save_available_contacts to save each valid link.
         | 
| 206 | 
            -
                def save_link(key, url)
         | 
| 207 | 
            -
                  return if key.nil? || url.nil?
         | 
| 208 | 
            -
                  @contact_links[key] = url
         | 
| 209 | 
            -
                end
         | 
| 210 | 
            -
             | 
| 211 | 
            -
                ##
         | 
| 212 | 
            -
                # Remove negatives from the contacts hash.
         | 
| 213 | 
            -
                # Deletes a key value pair with a value of either nil or false.
         | 
| 214 | 
            -
                # Remember that false is a string.
         | 
| 215 | 
            -
                def delete_failures(hsh)
         | 
| 216 | 
            -
                  hsh.delete_if { |_k, v| v.nil? || v == 'false' }
         | 
| 217 | 
            -
                end
         | 
| 218 | 
            -
             | 
| 219 | 
            -
                # Saves any available contact info to @contact_links.
         | 
| 220 | 
            -
                def save_available_contacts(url, hsh = scan_for_contacts)
         | 
| 221 | 
            -
                  puts "\nsaving available contact information from #{url}"
         | 
| 222 | 
            -
                  return unless something_to_save?
         | 
| 223 | 
            -
                  if hsh.is_a?(Hash)
         | 
| 224 | 
            -
                    hsh.each do |k, v|
         | 
| 225 | 
            -
                      save_link(k, v) # saves to @contact_links
         | 
| 226 | 
            -
                    end
         | 
| 227 | 
            -
                    delete_failures(@contact_links)
         | 
| 228 | 
            -
                    puts "#{@contact_links}".cyan # same as @contact_links
         | 
| 229 | 
            -
                  else
         | 
| 230 | 
            -
                    fail ArgumentError, "expected hash but got #{hsh.class}"
         | 
| 231 | 
            -
                  end
         | 
| 232 | 
            -
                  Search::POC.new(url, @contact_links)
         | 
| 233 | 
            -
                end
         | 
| 234 | 
            -
             | 
| 235 46 | 
             
                # Convenience method.
         | 
| 236 47 | 
             
                def memory
         | 
| 237 48 | 
             
                  Search.all_sites
         | 
| @@ -243,21 +54,3 @@ module Gimme | |
| 243 54 | 
             
                end
         | 
| 244 55 | 
             
              end
         | 
| 245 56 | 
             
            end
         | 
| 246 | 
            -
             | 
| 247 | 
            -
            # Collection of sites searched.
         | 
| 248 | 
            -
            class Search
         | 
| 249 | 
            -
              @all_sites = []
         | 
| 250 | 
            -
             | 
| 251 | 
            -
              class << self
         | 
| 252 | 
            -
                attr_accessor :all_sites
         | 
| 253 | 
            -
              end
         | 
| 254 | 
            -
             | 
| 255 | 
            -
              # Each site is saved to this class
         | 
| 256 | 
            -
              class POC
         | 
| 257 | 
            -
                def initialize(url, contact_info_hsh)
         | 
| 258 | 
            -
                  @host = url
         | 
| 259 | 
            -
                  @info = contact_info_hsh
         | 
| 260 | 
            -
                  Search.all_sites << self
         | 
| 261 | 
            -
                end
         | 
| 262 | 
            -
              end
         | 
| 263 | 
            -
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: gimme_poc
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.4
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - John Mason
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015-10- | 
| 11 | 
            +
            date: 2015-10-10 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: mechanize
         | 
| @@ -38,6 +38,20 @@ dependencies: | |
| 38 38 | 
             
                - - "~>"
         | 
| 39 39 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 40 | 
             
                    version: '1.2'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: lazy_domain
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - "~>"
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: 0.0.1
         | 
| 48 | 
            +
              type: :runtime
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - "~>"
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: 0.0.1
         | 
| 41 55 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 42 56 | 
             
              name: rspec
         | 
| 43 57 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -91,7 +105,12 @@ files: | |
| 91 105 | 
             
            - README.md
         | 
| 92 106 | 
             
            - Rakefile
         | 
| 93 107 | 
             
            - lib/gimme_poc.rb
         | 
| 108 | 
            +
            - lib/gimme_poc/contactpage.rb
         | 
| 109 | 
            +
            - lib/gimme_poc/poc.rb
         | 
| 110 | 
            +
            - lib/gimme_poc/questions.rb
         | 
| 111 | 
            +
            - lib/gimme_poc/save.rb
         | 
| 94 112 | 
             
            - lib/gimme_poc/version.rb
         | 
| 113 | 
            +
            - lib/gimme_poc/web.rb
         | 
| 95 114 | 
             
            homepage: http://github.com/m8ss/gimme_poc
         | 
| 96 115 | 
             
            licenses:
         | 
| 97 116 | 
             
            - MIT
         |