gimme_poc 0.0.5 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -31
- data/Rakefile +15 -0
- data/lib/gimme_poc.rb +32 -25
- data/lib/gimme_poc/contactpage.rb +47 -48
- data/lib/gimme_poc/logger.rb +16 -0
- data/lib/gimme_poc/logger/messages.rb +77 -0
- data/lib/gimme_poc/poc.rb +6 -5
- data/lib/gimme_poc/questions.rb +23 -29
- data/lib/gimme_poc/save.rb +60 -52
- data/lib/gimme_poc/test_case.rb +329 -0
- data/lib/gimme_poc/version.rb +1 -1
- data/lib/gimme_poc/web.rb +85 -79
- metadata +39 -8
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 84e5aa1c8960ade9b3f438008c7308856e7cbc30
         | 
| 4 | 
            +
              data.tar.gz: e7d75a282e2d644ea9c0a24c466cd8f002013477
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: ed1704cd7a334ea8ba478cb01b487b8e54b2c16e1ad3e99db1c381797e413c43f1058d2a2c7a6b47212c7342ced310f2f1f4338a7153f4a84c06e5d7d64f90f4
         | 
| 7 | 
            +
              data.tar.gz: 84191e49f72d95da75a46453b001aa29a74b1f128a1d6bc24687e3a9afda24df67055648b93841092d0a1fdfea887249f83f80d979d8c4a77586f4340b734458
         | 
    
        data/README.md
    CHANGED
    
    | @@ -10,21 +10,19 @@ Gimme POC simply looks for a contact page and extracts social media contact info | |
| 10 10 | 
             
            ## Installation
         | 
| 11 11 |  | 
| 12 12 | 
             
            ```
         | 
| 13 | 
            -
            gem install gimme_poc
         | 
| 13 | 
            +
            $ gem install gimme_poc
         | 
| 14 14 |  | 
| 15 15 | 
             
            ```
         | 
| 16 16 |  | 
| 17 17 | 
             
            ## Set Up
         | 
| 18 18 |  | 
| 19 19 | 
             
            ```ruby
         | 
| 20 | 
            -
            require 'gimme_poc'  | 
| 20 | 
            +
            require 'gimme_poc' 
         | 
| 21 21 |  | 
| 22 22 | 
             
            ```
         | 
| 23 23 |  | 
| 24 24 | 
             
            ## How it works
         | 
| 25 25 |  | 
| 26 | 
            -
            Gimme POC is easy to use! Simply run this command.
         | 
| 27 | 
            -
             | 
| 28 26 | 
             
            ```ruby
         | 
| 29 27 |  | 
| 30 28 | 
             
            Gimme.poc 'http://example.com'
         | 
| @@ -56,30 +54,3 @@ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com']) | |
| 56 54 |  | 
| 57 55 | 
             
            ```
         | 
| 58 56 |  | 
| 59 | 
            -
            ## Referencing the search results
         | 
| 60 | 
            -
             | 
| 61 | 
            -
             To use your search results, simply run:
         | 
| 62 | 
            -
             | 
| 63 | 
            -
            ```ruby
         | 
| 64 | 
            -
             | 
| 65 | 
            -
            Gimme.memory
         | 
| 66 | 
            -
             | 
| 67 | 
            -
            ```
         | 
| 68 | 
            -
             | 
| 69 | 
            -
            ## Clearing the search results
         | 
| 70 | 
            -
             | 
| 71 | 
            -
             To clear search results and start afresh, run:
         | 
| 72 | 
            -
             | 
| 73 | 
            -
            ```ruby
         | 
| 74 | 
            -
             | 
| 75 | 
            -
            Gimme.reset!
         | 
| 76 | 
            -
             | 
| 77 | 
            -
            ```
         | 
| 78 | 
            -
             | 
| 79 | 
            -
            ## To do:
         | 
| 80 | 
            -
             | 
| 81 | 
            -
             - Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
         | 
| 82 | 
            -
             - Work on false positives of bad urls.  (Bad urls should be skipped + DNS redirects don't give 404 errors)
         | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
            More to follow...
         | 
    
        data/Rakefile
    CHANGED
    
    | @@ -1,7 +1,22 @@ | |
| 1 1 | 
             
            require 'rubygems'
         | 
| 2 2 | 
             
            require 'rake'
         | 
| 3 | 
            +
            require 'rake/testtask'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Rake::TestTask.new(:test) do |test|
         | 
| 6 | 
            +
              test.libs << 'lib' << 'test'
         | 
| 7 | 
            +
              test.pattern = 'test/**/test*.rb'
         | 
| 8 | 
            +
              test.verbose = true
         | 
| 9 | 
            +
            end
         | 
| 3 10 |  | 
| 4 11 | 
             
            desc 'Open console with gimme_poc loaded'
         | 
| 5 12 | 
             
            task :console do
         | 
| 6 13 | 
             
              exec 'pry -r ./lib/gimme_poc.rb'
         | 
| 7 14 | 
             
            end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            desc 'make a release'
         | 
| 17 | 
            +
            task :release do
         | 
| 18 | 
            +
              exec './script/release'
         | 
| 19 | 
            +
            end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            task c: :console # alias 'c' for console
         | 
| 22 | 
            +
            task default: :test
         | 
    
        data/lib/gimme_poc.rb
    CHANGED
    
    | @@ -3,6 +3,7 @@ require 'lazy_domain' | |
| 3 3 | 
             
            require 'mechanize'
         | 
| 4 4 | 
             
            require_relative './gimme_poc/contactpage'
         | 
| 5 5 | 
             
            require_relative './gimme_poc/poc'
         | 
| 6 | 
            +
            require_relative './gimme_poc/logger'
         | 
| 6 7 | 
             
            require_relative './gimme_poc/questions'
         | 
| 7 8 | 
             
            require_relative './gimme_poc/save'
         | 
| 8 9 | 
             
            require_relative './gimme_poc/version'
         | 
| @@ -11,13 +12,35 @@ require_relative './gimme_poc/web' | |
| 11 12 | 
             
            # Find the contact
         | 
| 12 13 | 
             
            module Gimme
         | 
| 13 14 | 
             
              class << self
         | 
| 15 | 
            +
                include Web
         | 
| 16 | 
            +
                include Questions
         | 
| 17 | 
            +
                include Save
         | 
| 18 | 
            +
                include ContactPage
         | 
| 19 | 
            +
             | 
| 14 20 | 
             
                attr_accessor :page, :contact, :contact_links, :url
         | 
| 21 | 
            +
                attr_reader :status_code
         | 
| 15 22 |  | 
| 16 | 
            -
                 | 
| 17 | 
            -
             | 
| 23 | 
            +
                def start_url_process(url)
         | 
| 24 | 
            +
                  LogMessages.start_url(url)
         | 
| 25 | 
            +
                  case
         | 
| 26 | 
            +
                  when LazyDomain.valid?(url) == false
         | 
| 27 | 
            +
                    LogMessages.invalid_domain(url)
         | 
| 28 | 
            +
                    @status_code = 0
         | 
| 29 | 
            +
                  when subdomain?(url)
         | 
| 30 | 
            +
                    LogMessages.subdomain
         | 
| 31 | 
            +
                    @status_code = 0 if get(url).nil? && get(orig_domain(url)).nil?
         | 
| 32 | 
            +
                  else
         | 
| 33 | 
            +
                    @status_code = 0 if get(url).nil?
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
                end
         | 
| 18 36 |  | 
| 19 | 
            -
                 | 
| 20 | 
            -
             | 
| 37 | 
            +
                def start_contact_process(url)
         | 
| 38 | 
            +
                  start_contact_links
         | 
| 39 | 
            +
                  attempt = save_available_contacts(url)
         | 
| 40 | 
            +
                  info = attempt.info if attempt && attempt.respond_to?(:info)
         | 
| 41 | 
            +
                  return attempt unless info.nil? || info.empty?
         | 
| 42 | 
            +
                  go_to_contact_page(url)
         | 
| 43 | 
            +
                end
         | 
| 21 44 |  | 
| 22 45 | 
             
                ##
         | 
| 23 46 | 
             
                # The main method!
         | 
| @@ -25,29 +48,13 @@ module Gimme | |
| 25 48 | 
             
                # If url is bad, it's converted to nil in 'get' method and skipped over.
         | 
| 26 49 | 
             
                def poc(arr)
         | 
| 27 50 | 
             
                  arr = arr.split unless arr.is_a?(Array)
         | 
| 51 | 
            +
                  results = []
         | 
| 28 52 | 
             
                  arr.each do |url|
         | 
| 29 | 
            -
                     | 
| 30 | 
            -
                     | 
| 31 | 
            -
                     | 
| 32 | 
            -
                      puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
         | 
| 33 | 
            -
                      next
         | 
| 34 | 
            -
                    end
         | 
| 35 | 
            -
                    case
         | 
| 36 | 
            -
                    when subdomain?(url)
         | 
| 37 | 
            -
                      puts '(This url is a subdomain.  Will try both sub and root domain.)'
         | 
| 38 | 
            -
                      next if get(url).nil? && get(orig_domain(url)).nil?
         | 
| 39 | 
            -
                    else
         | 
| 40 | 
            -
                      next if get(url).nil?
         | 
| 41 | 
            -
                    end
         | 
| 42 | 
            -
                    start_contact_links
         | 
| 43 | 
            -
                    mechpage = go_to_contact_page(url)
         | 
| 44 | 
            -
                    if mechpage.nil?
         | 
| 45 | 
            -
                      puts '(empty page, exiting.)'
         | 
| 46 | 
            -
                    else
         | 
| 47 | 
            -
                      save_available_contacts(mechpage.uri.to_s)
         | 
| 48 | 
            -
                    end
         | 
| 53 | 
            +
                    start_url_process(url)
         | 
| 54 | 
            +
                    next if @status_code == 0
         | 
| 55 | 
            +
                    results << start_contact_process(url)
         | 
| 49 56 | 
             
                  end
         | 
| 50 | 
            -
                   | 
| 57 | 
            +
                  results.length == 1 ? results.first : results
         | 
| 51 58 | 
             
                end
         | 
| 52 59 |  | 
| 53 60 | 
             
                # Convenience method.
         | 
| @@ -1,56 +1,55 @@ | |
| 1 1 | 
             
            # Find the contact
         | 
| 2 | 
            -
            module  | 
| 3 | 
            -
               | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
                 | 
| 12 | 
            -
             | 
| 13 | 
            -
                ##
         | 
| 14 | 
            -
                # Looks for contact page.  Gets page if available.
         | 
| 15 | 
            -
                # If no contact link is available, it will blind test '../contact'.
         | 
| 16 | 
            -
                # Returns nil if nothing can be found.
         | 
| 17 | 
            -
                def contact_page(url)
         | 
| 18 | 
            -
                  puts 'now looking for contact pages'
         | 
| 19 | 
            -
                  contact_link = link_with_href(/contact|Contact/)
         | 
| 20 | 
            -
                  contact_test_page = merged_link('../contact')
         | 
| 2 | 
            +
            module ContactPage
         | 
| 3 | 
            +
              attr_accessor :contact_link
         | 
| 4 | 
            +
              
         | 
| 5 | 
            +
              ##
         | 
| 6 | 
            +
              # Scans for contact page.  If it doesn't work on the first try,
         | 
| 7 | 
            +
              # It will look for english versions and try again. Processes left to right.
         | 
| 8 | 
            +
              #
         | 
| 9 | 
            +
              # Returns nil if no contact page can be found.
         | 
| 10 | 
            +
              def go_to_contact_page(url)
         | 
| 11 | 
            +
                contact_page(url) || english_contact_page(url)
         | 
| 12 | 
            +
              end
         | 
| 21 13 |  | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 14 | 
            +
              ##
         | 
| 15 | 
            +
              # Looks for contact page.  Gets page if available.
         | 
| 16 | 
            +
              # If no contact link is available, it will blind test '../contact'.
         | 
| 17 | 
            +
              # Returns nil if nothing can be found.
         | 
| 18 | 
            +
              def contact_page(url)
         | 
| 19 | 
            +
                LogMessages.looking_for_contact_page
         | 
| 20 | 
            +
                @contact_link = link_with_href(/contact|Contact/)
         | 
| 21 | 
            +
                contact_test_page = merged_link('../contact')
         | 
| 22 | 
            +
                case
         | 
| 23 | 
            +
                when !contact_link.nil?
         | 
| 24 | 
            +
                  LogMessages.found_contact_link
         | 
| 25 | 
            +
                  get(merged_link(@contact_link))
         | 
| 26 | 
            +
                else
         | 
| 27 | 
            +
                  LogMessages.no_contact_link
         | 
| 28 | 
            +
                  get(orig_domain(url)) if blind_test(contact_test_page).nil?
         | 
| 30 29 | 
             
                end
         | 
| 30 | 
            +
              end
         | 
| 31 31 |  | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 32 | 
            +
              ##
         | 
| 33 | 
            +
              # Looks for english page.  Gets page if available then looks for
         | 
| 34 | 
            +
              # english contact page.
         | 
| 35 | 
            +
              #
         | 
| 36 | 
            +
              # If no english link is available,
         | 
| 37 | 
            +
              # it will blind test '../en' and '../english'.
         | 
| 38 | 
            +
              # Returns nil if nothing can be found.
         | 
| 39 | 
            +
              def english_contact_page(url)
         | 
| 40 | 
            +
                LogMessages.looking_for_english_page
         | 
| 41 | 
            +
                english_link = @page.link_with(href: %r{en\/|english|English})
         | 
| 42 | 
            +
                test_en_page = merged_link('../en')
         | 
| 43 | 
            +
                test_english_page = merged_link('../english')
         | 
| 44 44 |  | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
                  end
         | 
| 45 | 
            +
                case
         | 
| 46 | 
            +
                when !english_link.nil?
         | 
| 47 | 
            +
                  LogMessages.found_english_link
         | 
| 48 | 
            +
                  get(merged_link(english_link.uri))
         | 
| 49 | 
            +
                else
         | 
| 50 | 
            +
                  blind_test(test_en_page) || blind_test(test_english_page)
         | 
| 51 | 
            +
                  LogMessages.restarting
         | 
| 52 | 
            +
                  contact_page(url)
         | 
| 54 53 | 
             
                end
         | 
| 55 54 | 
             
              end
         | 
| 56 55 | 
             
            end
         | 
| @@ -0,0 +1,16 @@ | |
| 1 | 
            +
            require 'logger'
         | 
| 2 | 
            +
            require_relative './logger/messages'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            # Output info messages during gimme poc crawl.
         | 
| 5 | 
            +
            module Gimme
         | 
| 6 | 
            +
              class << self
         | 
| 7 | 
            +
                include LogMessages
         | 
| 8 | 
            +
                attr_accessor :logger
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
            end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            Gimme.logger = Logger.new(STDOUT)
         | 
| 13 | 
            +
            Gimme.logger.level = Logger::INFO
         | 
| 14 | 
            +
            Gimme.logger.formatter = proc do |_severity, _datetime, _progname, msg|
         | 
| 15 | 
            +
              "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}\n"
         | 
| 16 | 
            +
            end
         | 
| @@ -0,0 +1,77 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            module LogMessages
         | 
| 3 | 
            +
              class << self
         | 
| 4 | 
            +
                def loginfo(str)
         | 
| 5 | 
            +
                  Gimme.logger.info(str)
         | 
| 6 | 
            +
                end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                def logwarn(str)
         | 
| 9 | 
            +
                  Gimme.logger.info(str)
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                # Info
         | 
| 13 | 
            +
                # -----------------------------------------------------------------
         | 
| 14 | 
            +
                def start_url(url)
         | 
| 15 | 
            +
                  puts '-' * 50
         | 
| 16 | 
            +
                  loginfo "starting: #{url}"
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                def sending_get_request(url)
         | 
| 20 | 
            +
                  loginfo("sending GET request to: #{url}")
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def blind_testing(url)
         | 
| 24 | 
            +
                  loginfo("blind testing: #{url}")
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                def invalid_domain(url)
         | 
| 28 | 
            +
                  loginfo("#{'Invalid Domain:'.red} `#{url}' is not a valid domain")
         | 
| 29 | 
            +
                end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                def subdomain
         | 
| 32 | 
            +
                  loginfo '(This url is a subdomain.  Will try both sub and root domain.)'
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                def empty_page
         | 
| 36 | 
            +
                  loginfo '(empty page, exiting.)'
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                def looking_for_contact_page
         | 
| 40 | 
            +
                  loginfo('now looking for contact pages')
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                def found_contact_link
         | 
| 44 | 
            +
                  loginfo("#{'Success:'.green} Found contact link!")
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def looking_for_english_page
         | 
| 48 | 
            +
                  loginfo('Looking for english page...')
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                def found_english_link
         | 
| 52 | 
            +
                  loginfo("#{'Success:'.green} found english link!")
         | 
| 53 | 
            +
                end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                def saving_contact_info(url)
         | 
| 56 | 
            +
                  loginfo("saving available contact information from #{url}")
         | 
| 57 | 
            +
                end
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                # Warnings
         | 
| 60 | 
            +
                # -----------------------------------------------------------------
         | 
| 61 | 
            +
                def no_contact_link
         | 
| 62 | 
            +
                  logwarn("#{'Warning:'.yellow} couldn't find contact link")
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                def restarting
         | 
| 66 | 
            +
                  logwarn('restarting'.yellow)
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                def nothing_to_save
         | 
| 70 | 
            +
                  logwarn '(nothing to save)'
         | 
| 71 | 
            +
                end
         | 
| 72 | 
            +
                
         | 
| 73 | 
            +
                def warn_err(error)
         | 
| 74 | 
            +
                  logwarn("#{'Error:'.red} #{error}")
         | 
| 75 | 
            +
                end
         | 
| 76 | 
            +
              end
         | 
| 77 | 
            +
            end
         | 
    
        data/lib/gimme_poc/poc.rb
    CHANGED
    
    | @@ -1,10 +1,12 @@ | |
| 1 | 
            +
            require "ostruct"
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module Gimme
         | 
| 2 4 | 
             
              # Collection of sites searched.
         | 
| 3 5 | 
             
              class Search
         | 
| 4 | 
            -
                 | 
| 6 | 
            +
                attr_accessor :all_sites
         | 
| 5 7 |  | 
| 6 | 
            -
                 | 
| 7 | 
            -
                   | 
| 8 | 
            +
                def initialize
         | 
| 9 | 
            +
                  @all_sites = []
         | 
| 8 10 | 
             
                end
         | 
| 9 11 |  | 
| 10 12 | 
             
                # Each site is saved to this class
         | 
| @@ -13,8 +15,7 @@ module Gimme | |
| 13 15 |  | 
| 14 16 | 
             
                  def initialize(url, contact_info_hsh)
         | 
| 15 17 | 
             
                    @host = url
         | 
| 16 | 
            -
                    @info = contact_info_hsh
         | 
| 17 | 
            -
                    Search.all_sites << self
         | 
| 18 | 
            +
                    @info = OpenStruct.new(contact_info_hsh)
         | 
| 18 19 | 
             
                  end
         | 
| 19 20 | 
             
                end
         | 
| 20 21 | 
             
              end
         | 
    
        data/lib/gimme_poc/questions.rb
    CHANGED
    
    | @@ -1,33 +1,27 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
            module  | 
| 3 | 
            -
               | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
                # Boolean, returns true if email is present.
         | 
| 12 | 
            -
                def email_available?
         | 
| 13 | 
            -
                  !link_with_href('mailto').nil?
         | 
| 14 | 
            -
                end
         | 
| 1 | 
            +
            # Reflective questions for situational awareness.
         | 
| 2 | 
            +
            module Questions
         | 
| 3 | 
            +
              # Simple regex that looks for ###.#### or ###-####
         | 
| 4 | 
            +
              PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/  
         | 
| 5 | 
            +
              
         | 
| 6 | 
            +
              # Boolean, returns true if email is present.
         | 
| 7 | 
            +
              def email_available?
         | 
| 8 | 
            +
                !link_with_href('mailto').nil?
         | 
| 9 | 
            +
              end
         | 
| 15 10 |  | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 11 | 
            +
              # Boolean, returns true if phone number is present.
         | 
| 12 | 
            +
              def phone_available?
         | 
| 13 | 
            +
                !(@page.body =~ PHONE_REGEX).nil?
         | 
| 14 | 
            +
              end
         | 
| 20 15 |  | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
                end
         | 
| 16 | 
            +
              ##
         | 
| 17 | 
            +
              # TODO: build better conditional to prevent false positives.
         | 
| 18 | 
            +
              #   There could be other forms like newsletter signup, etc.
         | 
| 19 | 
            +
              #
         | 
| 20 | 
            +
              # If there is a form with more than one field, this returns true.
         | 
| 21 | 
            +
              # Forms with one field are typically search boxes.
         | 
| 22 | 
            +
              #
         | 
| 23 | 
            +
              # Boolean, returns true if form is present on page.
         | 
| 24 | 
            +
              def contactform_available?
         | 
| 25 | 
            +
                !(@page.forms.select { |x| x.fields.length > 1 }.empty?)
         | 
| 32 26 | 
             
              end
         | 
| 33 27 | 
             
            end
         | 
    
        data/lib/gimme_poc/save.rb
    CHANGED
    
    | @@ -1,62 +1,70 @@ | |
| 1 | 
            -
            module  | 
| 2 | 
            -
               | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
                def scan_for_contacts
         | 
| 10 | 
            -
                  {
         | 
| 11 | 
            -
                    contactpage: link_with_href('contact'),
         | 
| 12 | 
            -
                    email_present: "#{email_available?}",
         | 
| 13 | 
            -
                    phone_present: "#{phone_available?}",
         | 
| 14 | 
            -
                    contact_form: "#{contactform_available?}",
         | 
| 15 | 
            -
                    facebook: link_with_href('facebook\.'),
         | 
| 16 | 
            -
                    twitter: link_with_href('twitter\.'),
         | 
| 17 | 
            -
                    youtube: link_with_href('youtube\.'),
         | 
| 18 | 
            -
                    googleplus: link_with_href('plus\.google\.'),
         | 
| 19 | 
            -
                    linkedin: link_with_href('linkedin\.')
         | 
| 20 | 
            -
                  }
         | 
| 21 | 
            -
                end
         | 
| 1 | 
            +
            module Save
         | 
| 2 | 
            +
              ##
         | 
| 3 | 
            +
              # Boolean, returns true if anything is present
         | 
| 4 | 
            +
              # after running scan_for_contacts and deleting failures.
         | 
| 5 | 
            +
              # Remember that false is a string in the hash
         | 
| 6 | 
            +
              def something_to_save?(hsh)
         | 
| 7 | 
            +
                  hsh.reject! { |k, v| v.nil? || v == 'false' }.any?
         | 
| 8 | 
            +
              end
         | 
| 22 9 |  | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 10 | 
            +
              ##
         | 
| 11 | 
            +
              # Returns anything that is possible to save, otherwise returns nil.
         | 
| 12 | 
            +
              # Booleans for phone, email, or contact form will display True or False.
         | 
| 13 | 
            +
              #
         | 
| 14 | 
            +
              # Add periods to link hrefs to prevent false positives. Must escape periods
         | 
| 15 | 
            +
              # with a backslash or else it will be a regex wild card.
         | 
| 16 | 
            +
              def scan_for_contacts
         | 
| 17 | 
            +
                {
         | 
| 18 | 
            +
                  contactpage: link_with_href('contact'),
         | 
| 19 | 
            +
                  email_present: "#{email_available?}",
         | 
| 20 | 
            +
                  phone_present: "#{phone_available?}",
         | 
| 21 | 
            +
                  contact_form: "#{contactform_available?}",
         | 
| 22 | 
            +
                  facebook: link_with_href('facebook\.'),
         | 
| 23 | 
            +
                  twitter: link_with_href('twitter\.'),
         | 
| 24 | 
            +
                  youtube: link_with_href('youtube\.'),
         | 
| 25 | 
            +
                  googleplus: link_with_href('plus\.google\.'),
         | 
| 26 | 
            +
                  linkedin: link_with_href('linkedin\.')
         | 
| 27 | 
            +
                }
         | 
| 28 | 
            +
              rescue => e
         | 
| 29 | 
            +
                puts "Error: #{e}"
         | 
| 30 | 
            +
              end
         | 
| 27 31 |  | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 32 | 
            -
                end
         | 
| 32 | 
            +
              # Starts/Restarts @contacts_links hash
         | 
| 33 | 
            +
              def start_contact_links
         | 
| 34 | 
            +
                @contact_links = {}
         | 
| 35 | 
            +
              end
         | 
| 33 36 |  | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
                 | 
| 37 | 
            -
                 | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 37 | 
            +
              # Used in save_available_contacts to save each valid link.
         | 
| 38 | 
            +
              def save_link(key, url)
         | 
| 39 | 
            +
                return if key.nil? || url.nil?
         | 
| 40 | 
            +
                @contact_links[key] = url
         | 
| 41 | 
            +
              end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
              ##
         | 
| 44 | 
            +
              # Remove negatives from the contacts hash.
         | 
| 45 | 
            +
              # Deletes a key value pair with a value of either nil or false.
         | 
| 46 | 
            +
              # Remember that false is a stored in hash as a string.
         | 
| 47 | 
            +
              def delete_failures(hsh)
         | 
| 48 | 
            +
                hsh.delete_if { |_k, v| v.nil? || v == 'false' }
         | 
| 49 | 
            +
              end
         | 
| 41 50 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
                      end
         | 
| 50 | 
            -
                      delete_failures(@contact_links)
         | 
| 51 | 
            -
                      puts "#{@contact_links}".cyan # same as @contact_links
         | 
| 52 | 
            -
                    else
         | 
| 53 | 
            -
                      fail ArgumentError, "expected hash but got #{hsh.class}"
         | 
| 51 | 
            +
              # Saves any available contact info to @contact_links.
         | 
| 52 | 
            +
              def save_available_contacts(url, hsh = scan_for_contacts)
         | 
| 53 | 
            +
                if something_to_save?(hsh)
         | 
| 54 | 
            +
                  LogMessages.saving_contact_info(url)
         | 
| 55 | 
            +
                  if hsh.is_a?(Hash)
         | 
| 56 | 
            +
                    hsh.each do |k, v|
         | 
| 57 | 
            +
                      save_link(k, v) # saves to @contact_links
         | 
| 54 58 | 
             
                    end
         | 
| 55 | 
            -
                     | 
| 59 | 
            +
                    delete_failures(@contact_links)
         | 
| 60 | 
            +
                    puts "#{@contact_links}".cyan # same as @contact_links
         | 
| 56 61 | 
             
                  else
         | 
| 57 | 
            -
                     | 
| 58 | 
            -
                    return
         | 
| 62 | 
            +
                    fail ArgumentError, "expected hash but got #{hsh.class}"
         | 
| 59 63 | 
             
                  end
         | 
| 64 | 
            +
                  Gimme::Search::POC.new(url, @contact_links)
         | 
| 65 | 
            +
                else
         | 
| 66 | 
            +
                  LogMessages.nothing_to_save
         | 
| 67 | 
            +
                  return
         | 
| 60 68 | 
             
                end
         | 
| 61 69 | 
             
              end
         | 
| 62 70 | 
             
            end
         | 
| @@ -0,0 +1,329 @@ | |
| 1 | 
            +
            require 'mechanize'
         | 
| 2 | 
            +
            require 'logger'
         | 
| 3 | 
            +
            require 'tempfile'
         | 
| 4 | 
            +
            require 'tmpdir'
         | 
| 5 | 
            +
            require 'webrick'
         | 
| 6 | 
            +
            require 'zlib'
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            require 'rubygems'
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            begin
         | 
| 11 | 
            +
              gem 'minitest'
         | 
| 12 | 
            +
            rescue Gem::LoadError
         | 
| 13 | 
            +
            end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            ##
         | 
| 16 | 
            +
            # Source:
         | 
| 17 | 
            +
            #
         | 
| 18 | 
            +
            # http://bit.ly/1Pt2KAd
         | 
| 19 | 
            +
            # --------------------------------------------------------------
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            ##
         | 
| 22 | 
            +
            # A generic test case for testing mechanize.  Using a subclass of
         | 
| 23 | 
            +
            # Mechanize::TestCase for your tests will create an isolated mechanize
         | 
| 24 | 
            +
            # instance that won't pollute your filesystem or other tests.
         | 
| 25 | 
            +
            #
         | 
| 26 | 
            +
            # Once Mechanize::TestCase is loaded no HTTP requests will be made outside
         | 
| 27 | 
            +
            # mechanize itself.  All requests are handled via WEBrick servlets.
         | 
| 28 | 
            +
            #
         | 
| 29 | 
            +
            # Mechanize uses WEBrick servlets to test some functionality.  You can run
         | 
| 30 | 
            +
            # other HTTP clients against the servlets using:
         | 
| 31 | 
            +
            #
         | 
| 32 | 
            +
            #   ruby -rmechanize/test_case/server -e0
         | 
| 33 | 
            +
            #
         | 
| 34 | 
            +
            # Which will launch a test server at http://localhost:8000
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            class Mechanize::TestCase < Minitest::Test
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              TEST_DIR = File.expand_path '../../../test', __FILE__
         | 
| 39 | 
            +
              REQUESTS = []
         | 
| 40 | 
            +
             | 
| 41 | 
            +
              ##
         | 
| 42 | 
            +
              # Creates a clean mechanize instance +@mech+ for use in tests.
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              def setup
         | 
| 45 | 
            +
                super
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                REQUESTS.clear
         | 
| 48 | 
            +
                @mech = Mechanize.new
         | 
| 49 | 
            +
                @ssl_private_key = nil
         | 
| 50 | 
            +
                @ssl_certificate = nil
         | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
              ##
         | 
| 54 | 
            +
              # Creates a fake page with URI http://fake.example and an empty, submittable
         | 
| 55 | 
            +
              # form.
         | 
| 56 | 
            +
             | 
| 57 | 
            +
              def fake_page agent = @mech
         | 
| 58 | 
            +
                uri = URI 'http://fake.example/'
         | 
| 59 | 
            +
                html = <<-END
         | 
| 60 | 
            +
            <html>
         | 
| 61 | 
            +
            <body>
         | 
| 62 | 
            +
            <form><input type="submit" value="submit" /></form>
         | 
| 63 | 
            +
            </body>
         | 
| 64 | 
            +
            </html>
         | 
| 65 | 
            +
                END
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                Mechanize::Page.new uri, nil, html, 200, agent
         | 
| 68 | 
            +
              end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
              ##
         | 
| 71 | 
            +
              # Is the Encoding constant defined?
         | 
| 72 | 
            +
             | 
| 73 | 
            +
              def have_encoding?
         | 
| 74 | 
            +
                Object.const_defined? :Encoding
         | 
| 75 | 
            +
              end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
              ##
         | 
| 78 | 
            +
              # Creates a Mechanize::Page with the given +body+
         | 
| 79 | 
            +
             | 
| 80 | 
            +
              def html_page body
         | 
| 81 | 
            +
                uri = URI 'http://example/'
         | 
| 82 | 
            +
                Mechanize::Page.new uri, nil, body, 200, @mech
         | 
| 83 | 
            +
              end
         | 
| 84 | 
            +
             | 
| 85 | 
            +
              ##
         | 
| 86 | 
            +
              # Creates a Mechanize::CookieJar by parsing the given +str+
         | 
| 87 | 
            +
             | 
| 88 | 
            +
              def cookie_jar str, uri = URI('http://example')
         | 
| 89 | 
            +
                jar = Mechanize::CookieJar.new
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                jar.parse str, uri
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                jar
         | 
| 94 | 
            +
              end
         | 
| 95 | 
            +
             | 
| 96 | 
            +
              ##
         | 
| 97 | 
            +
              # Runs the block inside a temporary directory
         | 
| 98 | 
            +
             | 
| 99 | 
            +
              def in_tmpdir
         | 
| 100 | 
            +
                Dir.mktmpdir do |dir|
         | 
| 101 | 
            +
                  Dir.chdir dir do
         | 
| 102 | 
            +
                    yield
         | 
| 103 | 
            +
                  end
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
              end
         | 
| 106 | 
            +
             | 
| 107 | 
            +
              ##
         | 
| 108 | 
            +
              # Creates a Nokogiri Node +element+ with the given +attributes+
         | 
| 109 | 
            +
             | 
| 110 | 
            +
              def node element, attributes = {}
         | 
| 111 | 
            +
                doc = Nokogiri::HTML::Document.new
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                node = Nokogiri::XML::Node.new element, doc
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                attributes.each do |name, value|
         | 
| 116 | 
            +
                  node[name] = value
         | 
| 117 | 
            +
                end
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                node
         | 
| 120 | 
            +
              end
         | 
| 121 | 
            +
             | 
| 122 | 
            +
              ##
         | 
| 123 | 
            +
              # Creates a Mechanize::Page for the given +uri+ with the given
         | 
| 124 | 
            +
              # +content_type+, response +body+ and HTTP status +code+
         | 
| 125 | 
            +
             | 
| 126 | 
            +
              def page uri, content_type = 'text/html', body = '', code = 200
         | 
| 127 | 
            +
                uri = URI uri unless URI::Generic === uri
         | 
| 128 | 
            +
             | 
| 129 | 
            +
                Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
         | 
| 130 | 
            +
                                    @mech)
         | 
| 131 | 
            +
              end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
              ##
         | 
| 134 | 
            +
              # Requests made during this tests
         | 
| 135 | 
            +
             | 
| 136 | 
            +
              def requests
         | 
| 137 | 
            +
                REQUESTS
         | 
| 138 | 
            +
              end
         | 
| 139 | 
            +
             | 
| 140 | 
            +
              ##
         | 
| 141 | 
            +
              # An SSL private key.  This key is the same across all test runs
         | 
| 142 | 
            +
             | 
| 143 | 
            +
              def ssl_private_key
         | 
| 144 | 
            +
                @ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
         | 
| 145 | 
            +
            -----BEGIN RSA PRIVATE KEY-----
         | 
| 146 | 
            +
            MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
         | 
| 147 | 
            +
            bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
         | 
| 148 | 
            +
            p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
         | 
| 149 | 
            +
            6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
         | 
| 150 | 
            +
            -----END RSA PRIVATE KEY-----
         | 
| 151 | 
            +
                KEY
         | 
| 152 | 
            +
              end
         | 
| 153 | 
            +
             | 
| 154 | 
            +
              ##
         | 
| 155 | 
            +
              # An X509 certificate.  This certificate is the same across all test runs
         | 
| 156 | 
            +
             | 
| 157 | 
            +
              def ssl_certificate
         | 
| 158 | 
            +
                @ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
         | 
| 159 | 
            +
            -----BEGIN CERTIFICATE-----
         | 
| 160 | 
            +
            MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
         | 
| 161 | 
            +
            eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
         | 
| 162 | 
            +
            OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
         | 
| 163 | 
            +
            B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
         | 
| 164 | 
            +
            4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
         | 
| 165 | 
            +
            UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
         | 
| 166 | 
            +
            //8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
         | 
| 167 | 
            +
            -----END CERTIFICATE-----
         | 
| 168 | 
            +
                CERT
         | 
| 169 | 
            +
              end
         | 
| 170 | 
            +
             | 
| 171 | 
            +
              ##
         | 
| 172 | 
            +
              # Creates a Tempfile with +content+ that is immediately unlinked
         | 
| 173 | 
            +
             | 
| 174 | 
            +
              def tempfile content
         | 
| 175 | 
            +
                body_io = Tempfile.new @NAME
         | 
| 176 | 
            +
                body_io.unlink
         | 
| 177 | 
            +
                body_io.write content
         | 
| 178 | 
            +
                body_io.flush
         | 
| 179 | 
            +
                body_io.rewind
         | 
| 180 | 
            +
             | 
| 181 | 
            +
                body_io
         | 
| 182 | 
            +
              end
         | 
| 183 | 
            +
             | 
| 184 | 
            +
            end
         | 
| 185 | 
            +
             | 
| 186 | 
            +
            require 'mechanize/test_case/servlets'
         | 
| 187 | 
            +
             | 
| 188 | 
            +
            module Net # :nodoc:
         | 
| 189 | 
            +
            end
         | 
| 190 | 
            +
             | 
| 191 | 
            +
            class Net::HTTP # :nodoc:
         | 
| 192 | 
            +
              alias :old_do_start :do_start
         | 
| 193 | 
            +
             | 
| 194 | 
            +
              def do_start
         | 
| 195 | 
            +
                @started = true
         | 
| 196 | 
            +
              end
         | 
| 197 | 
            +
             | 
| 198 | 
            +
              PAGE_CACHE = {}
         | 
| 199 | 
            +
             | 
| 200 | 
            +
              alias :old_request :request
         | 
| 201 | 
            +
             | 
| 202 | 
            +
              def request(req, *data, &block)
         | 
| 203 | 
            +
                url = URI.parse(req.path)
         | 
| 204 | 
            +
                path = WEBrick::HTTPUtils.unescape(url.path)
         | 
| 205 | 
            +
             | 
| 206 | 
            +
                path = '/index.html' if path == '/'
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                res = ::Response.new
         | 
| 209 | 
            +
                res.query_params = url.query
         | 
| 210 | 
            +
             | 
| 211 | 
            +
                req.query = if 'POST' != req.method && url.query then
         | 
| 212 | 
            +
                              WEBrick::HTTPUtils.parse_query url.query
         | 
| 213 | 
            +
                            elsif req['content-type'] =~ /www-form-urlencoded/ then
         | 
| 214 | 
            +
                              WEBrick::HTTPUtils.parse_query req.body
         | 
| 215 | 
            +
                            elsif req['content-type'] =~ /boundary=(.+)/ then
         | 
| 216 | 
            +
                              boundary = WEBrick::HTTPUtils.dequote $1
         | 
| 217 | 
            +
                              WEBrick::HTTPUtils.parse_form_data req.body, boundary
         | 
| 218 | 
            +
                            else
         | 
| 219 | 
            +
                              {}
         | 
| 220 | 
            +
                            end
         | 
| 221 | 
            +
             | 
| 222 | 
            +
                req.cookies = WEBrick::Cookie.parse(req['Cookie'])
         | 
| 223 | 
            +
             | 
| 224 | 
            +
                Mechanize::TestCase::REQUESTS << req
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path]
         | 
| 227 | 
            +
                  servlet = servlet_klass.new({})
         | 
| 228 | 
            +
                  servlet.send "do_#{req.method}", req, res
         | 
| 229 | 
            +
                else
         | 
| 230 | 
            +
                  filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
         | 
| 231 | 
            +
                  unless PAGE_CACHE[filename]
         | 
| 232 | 
            +
                    open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
         | 
| 233 | 
            +
                      PAGE_CACHE[filename] = io.read
         | 
| 234 | 
            +
                    }
         | 
| 235 | 
            +
                  end
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                  res.body = PAGE_CACHE[filename]
         | 
| 238 | 
            +
                  case filename
         | 
| 239 | 
            +
                  when /\.txt$/
         | 
| 240 | 
            +
                    res['Content-Type'] = 'text/plain'
         | 
| 241 | 
            +
                  when /\.jpg$/
         | 
| 242 | 
            +
                    res['Content-Type'] = 'image/jpeg'
         | 
| 243 | 
            +
                  end
         | 
| 244 | 
            +
                end
         | 
| 245 | 
            +
             | 
| 246 | 
            +
                res['Content-Type'] ||= 'text/html'
         | 
| 247 | 
            +
                res.code ||= "200"
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
         | 
| 250 | 
            +
                response = response_klass.new res.http_version, res.code, res.message
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                res.header.each do |k,v|
         | 
| 253 | 
            +
                  v = v.first if v.length == 1
         | 
| 254 | 
            +
                  response[k] = v
         | 
| 255 | 
            +
                end
         | 
| 256 | 
            +
             | 
| 257 | 
            +
                res.cookies.each do |cookie|
         | 
| 258 | 
            +
                  response.add_field 'Set-Cookie', cookie.to_s
         | 
| 259 | 
            +
                end
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                response['Content-Type'] ||= 'text/html'
         | 
| 262 | 
            +
                response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
         | 
| 263 | 
            +
             | 
| 264 | 
            +
                io = StringIO.new(res.body)
         | 
| 265 | 
            +
                response.instance_variable_set :@socket, io
         | 
| 266 | 
            +
                def io.read clen, dest = nil, _ = nil
         | 
| 267 | 
            +
                  if dest then
         | 
| 268 | 
            +
                    dest << super(clen)
         | 
| 269 | 
            +
                  else
         | 
| 270 | 
            +
                    super clen
         | 
| 271 | 
            +
                  end
         | 
| 272 | 
            +
                end
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                body_exist = req.response_body_permitted? &&
         | 
| 275 | 
            +
                  response_klass.body_permitted?
         | 
| 276 | 
            +
             | 
| 277 | 
            +
                response.instance_variable_set :@body_exist, body_exist
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                yield response if block_given?
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                response
         | 
| 282 | 
            +
              end
         | 
| 283 | 
            +
            end
         | 
| 284 | 
            +
             | 
| 285 | 
            +
            class Net::HTTPRequest # :nodoc:
         | 
| 286 | 
            +
              attr_accessor :query, :body, :cookies, :user
         | 
| 287 | 
            +
             | 
| 288 | 
            +
              def host
         | 
| 289 | 
            +
                'example'
         | 
| 290 | 
            +
              end
         | 
| 291 | 
            +
             | 
| 292 | 
            +
              def port
         | 
| 293 | 
            +
                80
         | 
| 294 | 
            +
              end
         | 
| 295 | 
            +
            end
         | 
| 296 | 
            +
             | 
| 297 | 
            +
            class Response # :nodoc:
         | 
| 298 | 
            +
              include Net::HTTPHeader
         | 
| 299 | 
            +
             | 
| 300 | 
            +
              attr_reader :code
         | 
| 301 | 
            +
              attr_accessor :body, :query, :cookies
         | 
| 302 | 
            +
              attr_accessor :query_params, :http_version
         | 
| 303 | 
            +
              attr_accessor :header
         | 
| 304 | 
            +
             | 
| 305 | 
            +
              def code=(c)
         | 
| 306 | 
            +
                @code = c.to_s
         | 
| 307 | 
            +
              end
         | 
| 308 | 
            +
             | 
| 309 | 
            +
              alias :status :code
         | 
| 310 | 
            +
              alias :status= :code=
         | 
| 311 | 
            +
             | 
| 312 | 
            +
                def initialize
         | 
| 313 | 
            +
                  @header = {}
         | 
| 314 | 
            +
                  @body = ''
         | 
| 315 | 
            +
                  @code = nil
         | 
| 316 | 
            +
                  @query = nil
         | 
| 317 | 
            +
                  @cookies = []
         | 
| 318 | 
            +
                  @http_version = '1.1'
         | 
| 319 | 
            +
                end
         | 
| 320 | 
            +
             | 
| 321 | 
            +
              def read_body
         | 
| 322 | 
            +
                yield body
         | 
| 323 | 
            +
              end
         | 
| 324 | 
            +
             | 
| 325 | 
            +
              def message
         | 
| 326 | 
            +
                ''
         | 
| 327 | 
            +
              end
         | 
| 328 | 
            +
            end
         | 
| 329 | 
            +
             | 
    
        data/lib/gimme_poc/version.rb
    CHANGED
    
    
    
        data/lib/gimme_poc/web.rb
    CHANGED
    
    | @@ -1,91 +1,97 @@ | |
| 1 1 | 
             
            # Find the contact
         | 
| 2 | 
            -
            module  | 
| 3 | 
            -
               | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 2 | 
            +
            module Web
         | 
| 3 | 
            +
              attr_accessor :page, :agent, :url
         | 
| 4 | 
            +
              
         | 
| 5 | 
            +
              # Captures http:// and https://
         | 
| 6 | 
            +
              HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
         | 
| 7 | 
            +
               
         | 
| 8 | 
            +
              ##
         | 
| 9 | 
            +
              # Go to a page using Mechanize.
         | 
| 10 | 
            +
              # Sleep for a split second to not overload any servers.
         | 
| 11 | 
            +
              #
         | 
| 12 | 
            +
              # Returns nil if bad url is given.
         | 
| 13 | 
            +
              def get(str)
         | 
| 14 | 
            +
                prepare_get_request(str)
         | 
| 15 | 
            +
                @page = @agent.get(@url)
         | 
| 16 | 
            +
              rescue Exception => e
         | 
| 17 | 
            +
                LogMessages.warn_err(e)
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
              
         | 
| 20 | 
            +
              def prepare_get_request(str)
         | 
| 21 | 
            +
                mech_setup
         | 
| 22 | 
            +
                @url = format_url(str)
         | 
| 23 | 
            +
                LogMessages.sending_get_request(url)
         | 
| 24 | 
            +
                sleep(0.1)     
         | 
| 25 | 
            +
              end
         | 
| 20 26 |  | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
                   | 
| 25 | 
            -
             | 
| 26 | 
            -
                   | 
| 27 | 
            -
             | 
| 28 | 
            -
                  puts "#{'Connection Timeout:'.red} #{e}"
         | 
| 29 | 
            -
                rescue Net::HTTP::Persistent::Error
         | 
| 30 | 
            -
                  puts "#{'Connection Timeout:'.red} read timeout, too many resets."
         | 
| 27 | 
            +
              def mech_setup
         | 
| 28 | 
            +
                @agent = Mechanize.new do |a|
         | 
| 29 | 
            +
                  a.user_agent_alias = 'Mac Safari'
         | 
| 30 | 
            +
                  a.open_timeout = 7
         | 
| 31 | 
            +
                  a.read_timeout = 7
         | 
| 32 | 
            +
                  a.idle_timeout = 7
         | 
| 33 | 
            +
                  a.redirect_ok = true
         | 
| 31 34 | 
             
                end
         | 
| 35 | 
            +
              end
         | 
| 32 36 |  | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 37 | 
            +
              ##
         | 
| 38 | 
            +
              # Mechanize needs absolute urls to work.
         | 
| 39 | 
            +
              # If http:// or https:// isn't present, append http://.
         | 
| 40 | 
            +
              def format_url(str)
         | 
| 41 | 
            +
                LazyDomain.autohttp(str)
         | 
| 42 | 
            +
              end
         | 
| 39 43 |  | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            +
              # Used for subdomain check.  Not a permanent change to url variable.
         | 
| 45 | 
            +
              def unformat_url(str)
         | 
| 46 | 
            +
                str.gsub(HTTP_REGEX, '')
         | 
| 47 | 
            +
              end
         | 
| 44 48 |  | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 49 | 
            +
              ##
         | 
| 50 | 
            +
              # Outputs domain of a url. Useful if subdomains are given to GimmePOC
         | 
| 51 | 
            +
              # and they don't work.
         | 
| 52 | 
            +
              #
         | 
| 53 | 
            +
              # For example:
         | 
| 54 | 
            +
              # Given http://maps.google.com, returns 'google.com'.
         | 
| 55 | 
            +
              def orig_domain(str)
         | 
| 56 | 
            +
                LazyDomain.parse(str).domain
         | 
| 57 | 
            +
              rescue PublicSuffix::DomainInvalid => err
         | 
| 58 | 
            +
                LogMessages.invalid_domain(err)
         | 
| 59 | 
            +
              end
         | 
| 56 60 |  | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 61 | 
            +
              ##
         | 
| 62 | 
            +
              # Used in case of relative paths. Merging guarantees correct url.
         | 
| 63 | 
            +
              # This needs a url string as argument to work.
         | 
| 64 | 
            +
              # Produces a merged uri string.
         | 
| 65 | 
            +
              def merged_link(url_str)
         | 
| 66 | 
            +
                @page.uri.merge(url_str).to_s
         | 
| 67 | 
            +
              end
         | 
| 64 68 |  | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 69 | 
            +
              ##
         | 
| 70 | 
            +
              # Expects relative paths and merges everything.
         | 
| 71 | 
            +
              # Returns a string.  If there's nothing, return nil.
         | 
| 72 | 
            +
              #
         | 
| 73 | 
            +
              # Add \b word block to ensure whole word is searched.
         | 
| 74 | 
            +
              def link_with_href(str)
         | 
| 75 | 
            +
                merged_link(@page.link_with(href: /\b#{str}/).uri.to_s)
         | 
| 76 | 
            +
              rescue
         | 
| 77 | 
            +
                nil
         | 
| 78 | 
            +
              end
         | 
| 75 79 |  | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            +
              # Boolean, returns true if url is not identical to original domain.
         | 
| 81 | 
            +
              #
         | 
| 82 | 
            +
              # In the event that the url has a path, this splits everything on forward
         | 
| 83 | 
            +
              # slash and selects far left item.
         | 
| 84 | 
            +
              def subdomain?(str)
         | 
| 85 | 
            +
                (unformat_url(str).split('/')[0] != orig_domain(str))
         | 
| 86 | 
            +
              end
         | 
| 80 87 |  | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
             | 
| 89 | 
            -
                end
         | 
| 88 | 
            +
              # TODO: Sometimes DNS will do a redirect and not give a 404.
         | 
| 89 | 
            +
              #   Need to prevent redirects.
         | 
| 90 | 
            +
              #
         | 
| 91 | 
            +
              # Blindly tests to see if a url goes through.  If there is a 404 error,
         | 
| 92 | 
            +
              # this will return nil.
         | 
| 93 | 
            +
              def blind_test(url)
         | 
| 94 | 
            +
                LogMessages.blind_testing(url)
         | 
| 95 | 
            +
                get(url)
         | 
| 90 96 | 
             
              end
         | 
| 91 97 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: gimme_poc
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version:  | 
| 4 | 
            +
              version: 1.1.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - John Mason
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2017-04-13 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: mechanize
         | 
| @@ -44,28 +44,56 @@ dependencies: | |
| 44 44 | 
             
                requirements:
         | 
| 45 45 | 
             
                - - "~>"
         | 
| 46 46 | 
             
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            -
                    version: 0.0. | 
| 47 | 
            +
                    version: 0.0.2
         | 
| 48 48 | 
             
              type: :runtime
         | 
| 49 49 | 
             
              prerelease: false
         | 
| 50 50 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 51 | 
             
                requirements:
         | 
| 52 52 | 
             
                - - "~>"
         | 
| 53 53 | 
             
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            -
                    version: 0.0. | 
| 54 | 
            +
                    version: 0.0.2
         | 
| 55 55 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            -
              name:  | 
| 56 | 
            +
              name: shoulda
         | 
| 57 57 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 58 | 
             
                requirements:
         | 
| 59 59 | 
             
                - - "~>"
         | 
| 60 60 | 
             
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            -
                    version: '3. | 
| 61 | 
            +
                    version: '3.5'
         | 
| 62 62 | 
             
              type: :development
         | 
| 63 63 | 
             
              prerelease: false
         | 
| 64 64 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 65 | 
             
                requirements:
         | 
| 66 66 | 
             
                - - "~>"
         | 
| 67 67 | 
             
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            -
                    version: '3. | 
| 68 | 
            +
                    version: '3.5'
         | 
| 69 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 70 | 
            +
              name: shoulda-context
         | 
| 71 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 72 | 
            +
                requirements:
         | 
| 73 | 
            +
                - - "~>"
         | 
| 74 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 75 | 
            +
                    version: '1.2'
         | 
| 76 | 
            +
              type: :development
         | 
| 77 | 
            +
              prerelease: false
         | 
| 78 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 79 | 
            +
                requirements:
         | 
| 80 | 
            +
                - - "~>"
         | 
| 81 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 82 | 
            +
                    version: '1.2'
         | 
| 83 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 84 | 
            +
              name: minitest-reporters
         | 
| 85 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 86 | 
            +
                requirements:
         | 
| 87 | 
            +
                - - "~>"
         | 
| 88 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 89 | 
            +
                    version: '1.1'
         | 
| 90 | 
            +
              type: :development
         | 
| 91 | 
            +
              prerelease: false
         | 
| 92 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 93 | 
            +
                requirements:
         | 
| 94 | 
            +
                - - "~>"
         | 
| 95 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 96 | 
            +
                    version: '1.1'
         | 
| 69 97 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 70 98 | 
             
              name: pry
         | 
| 71 99 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -106,9 +134,12 @@ files: | |
| 106 134 | 
             
            - Rakefile
         | 
| 107 135 | 
             
            - lib/gimme_poc.rb
         | 
| 108 136 | 
             
            - lib/gimme_poc/contactpage.rb
         | 
| 137 | 
            +
            - lib/gimme_poc/logger.rb
         | 
| 138 | 
            +
            - lib/gimme_poc/logger/messages.rb
         | 
| 109 139 | 
             
            - lib/gimme_poc/poc.rb
         | 
| 110 140 | 
             
            - lib/gimme_poc/questions.rb
         | 
| 111 141 | 
             
            - lib/gimme_poc/save.rb
         | 
| 142 | 
            +
            - lib/gimme_poc/test_case.rb
         | 
| 112 143 | 
             
            - lib/gimme_poc/version.rb
         | 
| 113 144 | 
             
            - lib/gimme_poc/web.rb
         | 
| 114 145 | 
             
            homepage: http://github.com/m8ss/gimme_poc
         | 
| @@ -131,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 131 162 | 
             
                  version: '0'
         | 
| 132 163 | 
             
            requirements: []
         | 
| 133 164 | 
             
            rubyforge_project: 
         | 
| 134 | 
            -
            rubygems_version: 2. | 
| 165 | 
            +
            rubygems_version: 2.5.1
         | 
| 135 166 | 
             
            signing_key: 
         | 
| 136 167 | 
             
            specification_version: 4
         | 
| 137 168 | 
             
            summary: Get a point of contact.  Given a url or array of urls, extracts social media
         |