gimme_poc 0.0.5 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35a045a491109a5ae34152577508585667371af0
4
- data.tar.gz: cae3580199f6cea7f2d5ceac4e689c931f743ff3
3
+ metadata.gz: 84e5aa1c8960ade9b3f438008c7308856e7cbc30
4
+ data.tar.gz: e7d75a282e2d644ea9c0a24c466cd8f002013477
5
5
  SHA512:
6
- metadata.gz: 200a0ba0bedded51c4b6aa50ceb46dc41d01a211fc52dbfdd7596989c9429e05db802f18b5f0c625d8353dbe7800da6447ed2a4fe43735438f16a9cf24b12728
7
- data.tar.gz: e8c6def1d4e085c3c3dc05c92cdd894c43b2be287ce98ed79bc91111377b5c8e27db1898501de8726f86e3888577d0d207dbb3a23bba69991134d19cf1b03b70
6
+ metadata.gz: ed1704cd7a334ea8ba478cb01b487b8e54b2c16e1ad3e99db1c381797e413c43f1058d2a2c7a6b47212c7342ced310f2f1f4338a7153f4a84c06e5d7d64f90f4
7
+ data.tar.gz: 84191e49f72d95da75a46453b001aa29a74b1f128a1d6bc24687e3a9afda24df67055648b93841092d0a1fdfea887249f83f80d979d8c4a77586f4340b734458
data/README.md CHANGED
@@ -10,21 +10,19 @@ Gimme POC simply looks for a contact page and extracts social media contact info
10
10
  ## Installation
11
11
 
12
12
  ```
13
- gem install gimme_poc
13
+ $ gem install gimme_poc
14
14
 
15
15
  ```
16
16
 
17
17
  ## Set Up
18
18
 
19
19
  ```ruby
20
- require 'gimme_poc' # => that's it!
20
+ require 'gimme_poc'
21
21
 
22
22
  ```
23
23
 
24
24
  ## How it works
25
25
 
26
- Gimme POC is easy to use! Simply run this command.
27
-
28
26
  ```ruby
29
27
 
30
28
  Gimme.poc 'http://example.com'
@@ -56,30 +54,3 @@ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com'])
56
54
 
57
55
  ```
58
56
 
59
- ## Referencing the search results
60
-
61
- To use your search results, simply run:
62
-
63
- ```ruby
64
-
65
- Gimme.memory
66
-
67
- ```
68
-
69
- ## Clearing the search results
70
-
71
- To clear search results and start afresh, run:
72
-
73
- ```ruby
74
-
75
- Gimme.reset!
76
-
77
- ```
78
-
79
- ## To do:
80
-
81
- - Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
82
- - Work on false positives of bad urls. (Bad urls should be skipped + DNS redirects don't give 404 errors)
83
-
84
-
85
- More to follow...
data/Rakefile CHANGED
@@ -1,7 +1,22 @@
1
1
  require 'rubygems'
2
2
  require 'rake'
3
+ require 'rake/testtask'
4
+
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test*.rb'
8
+ test.verbose = true
9
+ end
3
10
 
4
11
  desc 'Open console with gimme_poc loaded'
5
12
  task :console do
6
13
  exec 'pry -r ./lib/gimme_poc.rb'
7
14
  end
15
+
16
+ desc 'make a release'
17
+ task :release do
18
+ exec './script/release'
19
+ end
20
+
21
+ task c: :console # alias 'c' for console
22
+ task default: :test
@@ -3,6 +3,7 @@ require 'lazy_domain'
3
3
  require 'mechanize'
4
4
  require_relative './gimme_poc/contactpage'
5
5
  require_relative './gimme_poc/poc'
6
+ require_relative './gimme_poc/logger'
6
7
  require_relative './gimme_poc/questions'
7
8
  require_relative './gimme_poc/save'
8
9
  require_relative './gimme_poc/version'
@@ -11,13 +12,35 @@ require_relative './gimme_poc/web'
11
12
  # Find the contact
12
13
  module Gimme
13
14
  class << self
15
+ include Web
16
+ include Questions
17
+ include Save
18
+ include ContactPage
19
+
14
20
  attr_accessor :page, :contact, :contact_links, :url
21
+ attr_reader :status_code
15
22
 
16
- # Simple regex that looks for ###.#### or ###-####
17
- PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
23
+ def start_url_process(url)
24
+ LogMessages.start_url(url)
25
+ case
26
+ when LazyDomain.valid?(url) == false
27
+ LogMessages.invalid_domain(url)
28
+ @status_code = 0
29
+ when subdomain?(url)
30
+ LogMessages.subdomain
31
+ @status_code = 0 if get(url).nil? && get(orig_domain(url)).nil?
32
+ else
33
+ @status_code = 0 if get(url).nil?
34
+ end
35
+ end
18
36
 
19
- # Captures http:// and https://
20
- HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
37
+ def start_contact_process(url)
38
+ start_contact_links
39
+ attempt = save_available_contacts(url)
40
+ info = attempt.info if attempt && attempt.respond_to?(:info)
41
+ return attempt unless info.nil? || info.empty?
42
+ go_to_contact_page(url)
43
+ end
21
44
 
22
45
  ##
23
46
  # The main method!
@@ -25,29 +48,13 @@ module Gimme
25
48
  # If url is bad, it's converted to nil in 'get' method and skipped over.
26
49
  def poc(arr)
27
50
  arr = arr.split unless arr.is_a?(Array)
51
+ results = []
28
52
  arr.each do |url|
29
- puts '-' * 50
30
- puts "starting: #{url}"
31
- unless LazyDomain.valid?(url)
32
- puts "#{'Invalid Domain:'.red} `#{url}' is not a valid domain"
33
- next
34
- end
35
- case
36
- when subdomain?(url)
37
- puts '(This url is a subdomain. Will try both sub and root domain.)'
38
- next if get(url).nil? && get(orig_domain(url)).nil?
39
- else
40
- next if get(url).nil?
41
- end
42
- start_contact_links
43
- mechpage = go_to_contact_page(url)
44
- if mechpage.nil?
45
- puts '(empty page, exiting.)'
46
- else
47
- save_available_contacts(mechpage.uri.to_s)
48
- end
53
+ start_url_process(url)
54
+ next if @status_code == 0
55
+ results << start_contact_process(url)
49
56
  end
50
- Search.all_sites # Return results from all sites.
57
+ results.length == 1 ? results.first : results
51
58
  end
52
59
 
53
60
  # Convenience method.
@@ -1,56 +1,55 @@
1
1
  # Find the contact
2
- module Gimme
3
- class << self
4
- ##
5
- # Scans for contact page. If it doesn't work on the first try,
6
- # It will look for english versions and try again. Processes left to right.
7
- #
8
- # Returns nil if no contact page can be found.
9
- def go_to_contact_page(url)
10
- contact_page(url) || english_contact_page(url)
11
- end
12
-
13
- ##
14
- # Looks for contact page. Gets page if available.
15
- # If no contact link is available, it will blind test '../contact'.
16
- # Returns nil if nothing can be found.
17
- def contact_page(url)
18
- puts 'now looking for contact pages'
19
- contact_link = link_with_href(/contact|Contact/)
20
- contact_test_page = merged_link('../contact')
2
+ module ContactPage
3
+ attr_accessor :contact_link
4
+
5
+ ##
6
+ # Scans for contact page. If it doesn't work on the first try,
7
+ # It will look for english versions and try again. Processes left to right.
8
+ #
9
+ # Returns nil if no contact page can be found.
10
+ def go_to_contact_page(url)
11
+ contact_page(url) || english_contact_page(url)
12
+ end
21
13
 
22
- case
23
- when !contact_link.nil?
24
- puts "#{'Success:'.green} Found contact link!\n"
25
- get(merged_link(contact_link))
26
- else
27
- puts "#{'Warning:'.yellow} couldn't find contact link"
28
- blind_test(contact_test_page) || get(orig_domain(url))
29
- end
14
+ ##
15
+ # Looks for contact page. Gets page if available.
16
+ # If no contact link is available, it will blind test '../contact'.
17
+ # Returns nil if nothing can be found.
18
+ def contact_page(url)
19
+ LogMessages.looking_for_contact_page
20
+ @contact_link = link_with_href(/contact|Contact/)
21
+ contact_test_page = merged_link('../contact')
22
+ case
23
+ when !contact_link.nil?
24
+ LogMessages.found_contact_link
25
+ get(merged_link(@contact_link))
26
+ else
27
+ LogMessages.no_contact_link
28
+ get(orig_domain(url)) if blind_test(contact_test_page).nil?
30
29
  end
30
+ end
31
31
 
32
- ##
33
- # Looks for english page. Gets page if available then looks for
34
- # english contact page.
35
- #
36
- # If no english link is available,
37
- # it will blind test '../en' and '../english'.
38
- # Returns nil if nothing can be found.
39
- def english_contact_page(url)
40
- puts "\nLooking for english page..."
41
- english_link = page.link_with(href: %r{en\/|english|English})
42
- test_en_page = merged_link('../en')
43
- test_english_page = merged_link('../english')
32
+ ##
33
+ # Looks for english page. Gets page if available then looks for
34
+ # english contact page.
35
+ #
36
+ # If no english link is available,
37
+ # it will blind test '../en' and '../english'.
38
+ # Returns nil if nothing can be found.
39
+ def english_contact_page(url)
40
+ LogMessages.looking_for_english_page
41
+ english_link = @page.link_with(href: %r{en\/|english|English})
42
+ test_en_page = merged_link('../en')
43
+ test_english_page = merged_link('../english')
44
44
 
45
- case
46
- when !english_link.nil?
47
- puts "#{'Success:'.green} found english link!"
48
- get(merged_link(english_link.uri))
49
- else
50
- blind_test(test_en_page) || blind_test(test_english_page)
51
- puts "\n(restarting)\n"
52
- contact_page(url)
53
- end
45
+ case
46
+ when !english_link.nil?
47
+ LogMessages.found_english_link
48
+ get(merged_link(english_link.uri))
49
+ else
50
+ blind_test(test_en_page) || blind_test(test_english_page)
51
+ LogMessages.restarting
52
+ contact_page(url)
54
53
  end
55
54
  end
56
55
  end
@@ -0,0 +1,16 @@
1
+ require 'logger'
2
+ require_relative './logger/messages'
3
+
4
+ # Output info messages during gimme poc crawl.
5
+ module Gimme
6
+ class << self
7
+ include LogMessages
8
+ attr_accessor :logger
9
+ end
10
+ end
11
+
12
+ Gimme.logger = Logger.new(STDOUT)
13
+ Gimme.logger.level = Logger::INFO
14
+ Gimme.logger.formatter = proc do |_severity, _datetime, _progname, msg|
15
+ "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}\n"
16
+ end
@@ -0,0 +1,77 @@
1
+
2
+ module LogMessages
3
+ class << self
4
+ def loginfo(str)
5
+ Gimme.logger.info(str)
6
+ end
7
+
8
+ def logwarn(str)
9
+ Gimme.logger.info(str)
10
+ end
11
+
12
+ # Info
13
+ # -----------------------------------------------------------------
14
+ def start_url(url)
15
+ puts '-' * 50
16
+ loginfo "starting: #{url}"
17
+ end
18
+
19
+ def sending_get_request(url)
20
+ loginfo("sending GET request to: #{url}")
21
+ end
22
+
23
+ def blind_testing(url)
24
+ loginfo("blind testing: #{url}")
25
+ end
26
+
27
+ def invalid_domain(url)
28
+ loginfo("#{'Invalid Domain:'.red} `#{url}' is not a valid domain")
29
+ end
30
+
31
+ def subdomain
32
+ loginfo '(This url is a subdomain. Will try both sub and root domain.)'
33
+ end
34
+
35
+ def empty_page
36
+ loginfo '(empty page, exiting.)'
37
+ end
38
+
39
+ def looking_for_contact_page
40
+ loginfo('now looking for contact pages')
41
+ end
42
+
43
+ def found_contact_link
44
+ loginfo("#{'Success:'.green} Found contact link!")
45
+ end
46
+
47
+ def looking_for_english_page
48
+ loginfo('Looking for english page...')
49
+ end
50
+
51
+ def found_english_link
52
+ loginfo("#{'Success:'.green} found english link!")
53
+ end
54
+
55
+ def saving_contact_info(url)
56
+ loginfo("saving available contact information from #{url}")
57
+ end
58
+
59
+ # Warnings
60
+ # -----------------------------------------------------------------
61
+ def no_contact_link
62
+ logwarn("#{'Warning:'.yellow} couldn't find contact link")
63
+ end
64
+
65
+ def restarting
66
+ logwarn('restarting'.yellow)
67
+ end
68
+
69
+ def nothing_to_save
70
+ logwarn '(nothing to save)'
71
+ end
72
+
73
+ def warn_err(error)
74
+ logwarn("#{'Error:'.red} #{error}")
75
+ end
76
+ end
77
+ end
@@ -1,10 +1,12 @@
1
+ require "ostruct"
2
+
1
3
  module Gimme
2
4
  # Collection of sites searched.
3
5
  class Search
4
- @all_sites = []
6
+ attr_accessor :all_sites
5
7
 
6
- class << self
7
- attr_accessor :all_sites
8
+ def initialize
9
+ @all_sites = []
8
10
  end
9
11
 
10
12
  # Each site is saved to this class
@@ -13,8 +15,7 @@ module Gimme
13
15
 
14
16
  def initialize(url, contact_info_hsh)
15
17
  @host = url
16
- @info = contact_info_hsh
17
- Search.all_sites << self
18
+ @info = OpenStruct.new(contact_info_hsh)
18
19
  end
19
20
  end
20
21
  end
@@ -1,33 +1,27 @@
1
- # Find the contact
2
- module Gimme
3
- class << self
4
- ##
5
- # Boolean, returns true if anything is present
6
- # after running scan_for_contacts and deleting failures.
7
- def something_to_save?(hsh)
8
- delete_failures(hsh).any?
9
- end
10
-
11
- # Boolean, returns true if email is present.
12
- def email_available?
13
- !link_with_href('mailto').nil?
14
- end
1
+ # Reflective questions for situational awareness.
2
+ module Questions
3
+ # Simple regex that looks for ###.#### or ###-####
4
+ PHONE_REGEX = /(\d{3}[-]\d{4}|\d{3}[.]\d{4})/
5
+
6
+ # Boolean, returns true if email is present.
7
+ def email_available?
8
+ !link_with_href('mailto').nil?
9
+ end
15
10
 
16
- # Boolean, returns true if phone number is present.
17
- def phone_available?
18
- !(page.body =~ PHONE_REGEX).nil?
19
- end
11
+ # Boolean, returns true if phone number is present.
12
+ def phone_available?
13
+ !(@page.body =~ PHONE_REGEX).nil?
14
+ end
20
15
 
21
- ##
22
- # TODO: build better conditional to prevent false positives.
23
- # There could be other forms like newsletter signup, etc.
24
- #
25
- # If there is a form with more than one field, this returns true.
26
- # Forms with one field are typically search boxes.
27
- #
28
- # Boolean, returns true if form is present on page.
29
- def contactform_available?
30
- !(page.forms.select { |x| x.fields.length > 1 }.empty?)
31
- end
16
+ ##
17
+ # TODO: build better conditional to prevent false positives.
18
+ # There could be other forms like newsletter signup, etc.
19
+ #
20
+ # If there is a form with more than one field, this returns true.
21
+ # Forms with one field are typically search boxes.
22
+ #
23
+ # Boolean, returns true if form is present on page.
24
+ def contactform_available?
25
+ !(@page.forms.select { |x| x.fields.length > 1 }.empty?)
32
26
  end
33
27
  end
@@ -1,62 +1,70 @@
1
- module Gimme
2
- class << self
3
- ##
4
- # Returns anything that is possible to save, otherwise returns nil.
5
- # Booleans for phone, email, or contact form will display True or False.
6
- #
7
- # Add periods to link hrefs to prevent false positives. Must escape periods
8
- # with a backslash or else it will be a regex wild card.
9
- def scan_for_contacts
10
- {
11
- contactpage: link_with_href('contact'),
12
- email_present: "#{email_available?}",
13
- phone_present: "#{phone_available?}",
14
- contact_form: "#{contactform_available?}",
15
- facebook: link_with_href('facebook\.'),
16
- twitter: link_with_href('twitter\.'),
17
- youtube: link_with_href('youtube\.'),
18
- googleplus: link_with_href('plus\.google\.'),
19
- linkedin: link_with_href('linkedin\.')
20
- }
21
- end
1
+ module Save
2
+ ##
3
+ # Boolean, returns true if anything is present
4
+ # after running scan_for_contacts and deleting failures.
5
+ # Remember that false is a string in the hash
6
+ def something_to_save?(hsh)
7
+ hsh.reject! { |k, v| v.nil? || v == 'false' }.any?
8
+ end
22
9
 
23
- # Starts/Restarts @contacts_links hash
24
- def start_contact_links
25
- @contact_links = {}
26
- end
10
+ ##
11
+ # Returns anything that is possible to save, otherwise returns nil.
12
+ # Booleans for phone, email, or contact form will display True or False.
13
+ #
14
+ # Add periods to link hrefs to prevent false positives. Must escape periods
15
+ # with a backslash or else it will be a regex wild card.
16
+ def scan_for_contacts
17
+ {
18
+ contactpage: link_with_href('contact'),
19
+ email_present: "#{email_available?}",
20
+ phone_present: "#{phone_available?}",
21
+ contact_form: "#{contactform_available?}",
22
+ facebook: link_with_href('facebook\.'),
23
+ twitter: link_with_href('twitter\.'),
24
+ youtube: link_with_href('youtube\.'),
25
+ googleplus: link_with_href('plus\.google\.'),
26
+ linkedin: link_with_href('linkedin\.')
27
+ }
28
+ rescue => e
29
+ puts "Error: #{e}"
30
+ end
27
31
 
28
- # Used in save_available_contacts to save each valid link.
29
- def save_link(key, url)
30
- return if key.nil? || url.nil?
31
- @contact_links[key] = url
32
- end
32
+ # Starts/Restarts @contacts_links hash
33
+ def start_contact_links
34
+ @contact_links = {}
35
+ end
33
36
 
34
- ##
35
- # Remove negatives from the contacts hash.
36
- # Deletes a key value pair with a value of either nil or false.
37
- # Remember that false is a string.
38
- def delete_failures(hsh)
39
- hsh.delete_if { |_k, v| v.nil? || v == 'false' }
40
- end
37
+ # Used in save_available_contacts to save each valid link.
38
+ def save_link(key, url)
39
+ return if key.nil? || url.nil?
40
+ @contact_links[key] = url
41
+ end
42
+
43
+ ##
44
+ # Remove negatives from the contacts hash.
45
+ # Deletes a key value pair with a value of either nil or false.
46
+ # Remember that false is a stored in hash as a string.
47
+ def delete_failures(hsh)
48
+ hsh.delete_if { |_k, v| v.nil? || v == 'false' }
49
+ end
41
50
 
42
- # Saves any available contact info to @contact_links.
43
- def save_available_contacts(url, hsh = scan_for_contacts)
44
- if something_to_save?(hsh)
45
- puts "\nsaving available contact information from #{url}"
46
- if hsh.is_a?(Hash)
47
- hsh.each do |k, v|
48
- save_link(k, v) # saves to @contact_links
49
- end
50
- delete_failures(@contact_links)
51
- puts "#{@contact_links}".cyan # same as @contact_links
52
- else
53
- fail ArgumentError, "expected hash but got #{hsh.class}"
51
+ # Saves any available contact info to @contact_links.
52
+ def save_available_contacts(url, hsh = scan_for_contacts)
53
+ if something_to_save?(hsh)
54
+ LogMessages.saving_contact_info(url)
55
+ if hsh.is_a?(Hash)
56
+ hsh.each do |k, v|
57
+ save_link(k, v) # saves to @contact_links
54
58
  end
55
- Search::POC.new(url, @contact_links)
59
+ delete_failures(@contact_links)
60
+ puts "#{@contact_links}".cyan # same as @contact_links
56
61
  else
57
- puts '(nothing to save)'
58
- return
62
+ fail ArgumentError, "expected hash but got #{hsh.class}"
59
63
  end
64
+ Gimme::Search::POC.new(url, @contact_links)
65
+ else
66
+ LogMessages.nothing_to_save
67
+ return
60
68
  end
61
69
  end
62
70
  end
@@ -0,0 +1,329 @@
1
+ require 'mechanize'
2
+ require 'logger'
3
+ require 'tempfile'
4
+ require 'tmpdir'
5
+ require 'webrick'
6
+ require 'zlib'
7
+
8
+ require 'rubygems'
9
+
10
+ begin
11
+ gem 'minitest'
12
+ rescue Gem::LoadError
13
+ end
14
+
15
+ ##
16
+ # Source:
17
+ #
18
+ # http://bit.ly/1Pt2KAd
19
+ # --------------------------------------------------------------
20
+
21
+ ##
22
+ # A generic test case for testing mechanize. Using a subclass of
23
+ # Mechanize::TestCase for your tests will create an isolated mechanize
24
+ # instance that won't pollute your filesystem or other tests.
25
+ #
26
+ # Once Mechanize::TestCase is loaded no HTTP requests will be made outside
27
+ # mechanize itself. All requests are handled via WEBrick servlets.
28
+ #
29
+ # Mechanize uses WEBrick servlets to test some functionality. You can run
30
+ # other HTTP clients against the servlets using:
31
+ #
32
+ # ruby -rmechanize/test_case/server -e0
33
+ #
34
+ # Which will launch a test server at http://localhost:8000
35
+
36
+ class Mechanize::TestCase < Minitest::Test
37
+
38
+ TEST_DIR = File.expand_path '../../../test', __FILE__
39
+ REQUESTS = []
40
+
41
+ ##
42
+ # Creates a clean mechanize instance +@mech+ for use in tests.
43
+
44
+ def setup
45
+ super
46
+
47
+ REQUESTS.clear
48
+ @mech = Mechanize.new
49
+ @ssl_private_key = nil
50
+ @ssl_certificate = nil
51
+ end
52
+
53
+ ##
54
+ # Creates a fake page with URI http://fake.example and an empty, submittable
55
+ # form.
56
+
57
+ def fake_page agent = @mech
58
+ uri = URI 'http://fake.example/'
59
+ html = <<-END
60
+ <html>
61
+ <body>
62
+ <form><input type="submit" value="submit" /></form>
63
+ </body>
64
+ </html>
65
+ END
66
+
67
+ Mechanize::Page.new uri, nil, html, 200, agent
68
+ end
69
+
70
+ ##
71
+ # Is the Encoding constant defined?
72
+
73
+ def have_encoding?
74
+ Object.const_defined? :Encoding
75
+ end
76
+
77
+ ##
78
+ # Creates a Mechanize::Page with the given +body+
79
+
80
+ def html_page body
81
+ uri = URI 'http://example/'
82
+ Mechanize::Page.new uri, nil, body, 200, @mech
83
+ end
84
+
85
+ ##
86
+ # Creates a Mechanize::CookieJar by parsing the given +str+
87
+
88
+ def cookie_jar str, uri = URI('http://example')
89
+ jar = Mechanize::CookieJar.new
90
+
91
+ jar.parse str, uri
92
+
93
+ jar
94
+ end
95
+
96
+ ##
97
+ # Runs the block inside a temporary directory
98
+
99
+ def in_tmpdir
100
+ Dir.mktmpdir do |dir|
101
+ Dir.chdir dir do
102
+ yield
103
+ end
104
+ end
105
+ end
106
+
107
+ ##
108
+ # Creates a Nokogiri Node +element+ with the given +attributes+
109
+
110
+ def node element, attributes = {}
111
+ doc = Nokogiri::HTML::Document.new
112
+
113
+ node = Nokogiri::XML::Node.new element, doc
114
+
115
+ attributes.each do |name, value|
116
+ node[name] = value
117
+ end
118
+
119
+ node
120
+ end
121
+
122
+ ##
123
+ # Creates a Mechanize::Page for the given +uri+ with the given
124
+ # +content_type+, response +body+ and HTTP status +code+
125
+
126
+ def page uri, content_type = 'text/html', body = '', code = 200
127
+ uri = URI uri unless URI::Generic === uri
128
+
129
+ Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
130
+ @mech)
131
+ end
132
+
133
+ ##
134
+ # Requests made during this tests
135
+
136
+ def requests
137
+ REQUESTS
138
+ end
139
+
140
+ ##
141
+ # An SSL private key. This key is the same across all test runs
142
+
143
+ def ssl_private_key
144
+ @ssl_private_key ||= OpenSSL::PKey::RSA.new <<-KEY
145
+ -----BEGIN RSA PRIVATE KEY-----
146
+ MIG7AgEAAkEA8pmEfmP0Ibir91x6pbts4JmmsVZd3xvD5p347EFvBCbhBW1nv1Gs
147
+ bCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTXUQIBAQIBAQIhAPumXslvf6YasXa1hni3
148
+ p80joKOug2UUgqOLD2GUSO//AiEA9ssY6AFxjHWuwo/+/rkLmkfO2s1Lz3OeUEWq
149
+ 6DiHOK8CAQECAQECIQDt8bc4vS6wh9VXApNSKIpVygtxSFe/IwLeX26n77j6Qg==
150
+ -----END RSA PRIVATE KEY-----
151
+ KEY
152
+ end
153
+
154
+ ##
155
+ # An X509 certificate. This certificate is the same across all test runs
156
+
157
+ def ssl_certificate
158
+ @ssl_certificate ||= OpenSSL::X509::Certificate.new <<-CERT
159
+ -----BEGIN CERTIFICATE-----
160
+ MIIBQjCB7aADAgECAgEAMA0GCSqGSIb3DQEBBQUAMCoxDzANBgNVBAMMBm5vYm9k
161
+ eTEXMBUGCgmSJomT8ixkARkWB2V4YW1wbGUwIBcNMTExMTAzMjEwODU5WhgPOTk5
162
+ OTEyMzExMjU5NTlaMCoxDzANBgNVBAMMBm5vYm9keTEXMBUGCgmSJomT8ixkARkW
163
+ B2V4YW1wbGUwWjANBgkqhkiG9w0BAQEFAANJADBGAkEA8pmEfmP0Ibir91x6pbts
164
+ 4JmmsVZd3xvD5p347EFvBCbhBW1nv1GsbCBEFlSiT1q2qvxGb5IlbrfdhdgyqdTX
165
+ UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
166
+ //8AMCEwCQYFKw4DAhoFAAQUePiv+QrJxyjtEJNnH5pB9OTWIqA=
167
+ -----END CERTIFICATE-----
168
+ CERT
169
+ end
170
+
171
+ ##
172
+ # Creates a Tempfile with +content+ that is immediately unlinked
173
+
174
+ def tempfile content
175
+ body_io = Tempfile.new @NAME
176
+ body_io.unlink
177
+ body_io.write content
178
+ body_io.flush
179
+ body_io.rewind
180
+
181
+ body_io
182
+ end
183
+
184
+ end
185
+
186
+ require 'mechanize/test_case/servlets'
187
+
188
+ module Net # :nodoc:
189
+ end
190
+
191
+ class Net::HTTP # :nodoc:
192
+ alias :old_do_start :do_start
193
+
194
+ def do_start
195
+ @started = true
196
+ end
197
+
198
+ PAGE_CACHE = {}
199
+
200
+ alias :old_request :request
201
+
202
+ def request(req, *data, &block)
203
+ url = URI.parse(req.path)
204
+ path = WEBrick::HTTPUtils.unescape(url.path)
205
+
206
+ path = '/index.html' if path == '/'
207
+
208
+ res = ::Response.new
209
+ res.query_params = url.query
210
+
211
+ req.query = if 'POST' != req.method && url.query then
212
+ WEBrick::HTTPUtils.parse_query url.query
213
+ elsif req['content-type'] =~ /www-form-urlencoded/ then
214
+ WEBrick::HTTPUtils.parse_query req.body
215
+ elsif req['content-type'] =~ /boundary=(.+)/ then
216
+ boundary = WEBrick::HTTPUtils.dequote $1
217
+ WEBrick::HTTPUtils.parse_form_data req.body, boundary
218
+ else
219
+ {}
220
+ end
221
+
222
+ req.cookies = WEBrick::Cookie.parse(req['Cookie'])
223
+
224
+ Mechanize::TestCase::REQUESTS << req
225
+
226
+ if servlet_klass = MECHANIZE_TEST_CASE_SERVLETS[path]
227
+ servlet = servlet_klass.new({})
228
+ servlet.send "do_#{req.method}", req, res
229
+ else
230
+ filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
231
+ unless PAGE_CACHE[filename]
232
+ open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') { |io|
233
+ PAGE_CACHE[filename] = io.read
234
+ }
235
+ end
236
+
237
+ res.body = PAGE_CACHE[filename]
238
+ case filename
239
+ when /\.txt$/
240
+ res['Content-Type'] = 'text/plain'
241
+ when /\.jpg$/
242
+ res['Content-Type'] = 'image/jpeg'
243
+ end
244
+ end
245
+
246
+ res['Content-Type'] ||= 'text/html'
247
+ res.code ||= "200"
248
+
249
+ response_klass = Net::HTTPResponse::CODE_TO_OBJ[res.code.to_s]
250
+ response = response_klass.new res.http_version, res.code, res.message
251
+
252
+ res.header.each do |k,v|
253
+ v = v.first if v.length == 1
254
+ response[k] = v
255
+ end
256
+
257
+ res.cookies.each do |cookie|
258
+ response.add_field 'Set-Cookie', cookie.to_s
259
+ end
260
+
261
+ response['Content-Type'] ||= 'text/html'
262
+ response['Content-Length'] = res['Content-Length'] || res.body.length.to_s
263
+
264
+ io = StringIO.new(res.body)
265
+ response.instance_variable_set :@socket, io
266
+ def io.read clen, dest = nil, _ = nil
267
+ if dest then
268
+ dest << super(clen)
269
+ else
270
+ super clen
271
+ end
272
+ end
273
+
274
+ body_exist = req.response_body_permitted? &&
275
+ response_klass.body_permitted?
276
+
277
+ response.instance_variable_set :@body_exist, body_exist
278
+
279
+ yield response if block_given?
280
+
281
+ response
282
+ end
283
+ end
284
+
285
+ class Net::HTTPRequest # :nodoc:
286
+ attr_accessor :query, :body, :cookies, :user
287
+
288
+ def host
289
+ 'example'
290
+ end
291
+
292
+ def port
293
+ 80
294
+ end
295
+ end
296
+
297
+ class Response # :nodoc:
298
+ include Net::HTTPHeader
299
+
300
+ attr_reader :code
301
+ attr_accessor :body, :query, :cookies
302
+ attr_accessor :query_params, :http_version
303
+ attr_accessor :header
304
+
305
+ def code=(c)
306
+ @code = c.to_s
307
+ end
308
+
309
+ alias :status :code
310
+ alias :status= :code=
311
+
312
+ def initialize
313
+ @header = {}
314
+ @body = ''
315
+ @code = nil
316
+ @query = nil
317
+ @cookies = []
318
+ @http_version = '1.1'
319
+ end
320
+
321
+ def read_body
322
+ yield body
323
+ end
324
+
325
+ def message
326
+ ''
327
+ end
328
+ end
329
+
@@ -1,3 +1,3 @@
1
1
  module Gimme
2
- VERSION = '0.0.5'
2
+ VERSION = '1.1.0'
3
3
  end
@@ -1,91 +1,97 @@
1
1
  # Find the contact
2
- module Gimme
3
- class << self
4
- ##
5
- # Go to a page using Mechanize.
6
- # Sleep for a split second to not overload any servers.
7
- #
8
- # Returns nil if bad url is given.
9
- def get(str)
10
- url = format_url(str)
11
- puts "sending GET request to: #{url}"
12
- sleep(0.1)
13
- @page = Mechanize.new do |a|
14
- a.user_agent_alias = 'Mac Safari'
15
- a.open_timeout = 7
16
- a.read_timeout = 7
17
- a.idle_timeout = 7
18
- a.redirect_ok = true
19
- end.get(url)
2
+ module Web
3
+ attr_accessor :page, :agent, :url
4
+
5
+ # Captures http:// and https://
6
+ HTTP_REGEX = %r{(\A\bhttps:\/\/|\bhttp:\/\/)}
7
+
8
+ ##
9
+ # Go to a page using Mechanize.
10
+ # Sleep for a split second to not overload any servers.
11
+ #
12
+ # Returns nil if bad url is given.
13
+ def get(str)
14
+ prepare_get_request(str)
15
+ @page = @agent.get(@url)
16
+ rescue Exception => e
17
+ LogMessages.warn_err(e)
18
+ end
19
+
20
+ def prepare_get_request(str)
21
+ mech_setup
22
+ @url = format_url(str)
23
+ LogMessages.sending_get_request(url)
24
+ sleep(0.1)
25
+ end
20
26
 
21
- rescue Mechanize::ResponseCodeError => e
22
- puts "#{'Response Error:'.red} #{e}"
23
- rescue SocketError => e
24
- puts "#{'Socket Error:'.red} #{e}"
25
- rescue Net::OpenTimeout => e
26
- puts "#{'Connection Timeout:'.red} #{e}"
27
- rescue Errno::ETIMEDOUT => e
28
- puts "#{'Connection Timeout:'.red} #{e}"
29
- rescue Net::HTTP::Persistent::Error
30
- puts "#{'Connection Timeout:'.red} read timeout, too many resets."
27
+ def mech_setup
28
+ @agent = Mechanize.new do |a|
29
+ a.user_agent_alias = 'Mac Safari'
30
+ a.open_timeout = 7
31
+ a.read_timeout = 7
32
+ a.idle_timeout = 7
33
+ a.redirect_ok = true
31
34
  end
35
+ end
32
36
 
33
- ##
34
- # Mechanize needs absolute urls to work.
35
- # If http:// or https:// isn't present, append http://.
36
- def format_url(str)
37
- LazyDomain.autohttp(str)
38
- end
37
+ ##
38
+ # Mechanize needs absolute urls to work.
39
+ # If http:// or https:// isn't present, append http://.
40
+ def format_url(str)
41
+ LazyDomain.autohttp(str)
42
+ end
39
43
 
40
- # Used for subdomain check. Not a permanent change to url variable.
41
- def unformat_url(str)
42
- str.gsub(HTTP_REGEX, '')
43
- end
44
+ # Used for subdomain check. Not a permanent change to url variable.
45
+ def unformat_url(str)
46
+ str.gsub(HTTP_REGEX, '')
47
+ end
44
48
 
45
- ##
46
- # Outputs domain of a url. Useful if subdomains are given to GimmePOC
47
- # and they don't work.
48
- #
49
- # For example:
50
- # Given http://maps.google.com, returns 'google.com'.
51
- def orig_domain(str)
52
- LazyDomain.parse(str).domain
53
- rescue PublicSuffix::DomainInvalid => e
54
- puts "#{'Invalid Domain:'.red} #{e}"
55
- end
49
+ ##
50
+ # Outputs domain of a url. Useful if subdomains are given to GimmePOC
51
+ # and they don't work.
52
+ #
53
+ # For example:
54
+ # Given http://maps.google.com, returns 'google.com'.
55
+ def orig_domain(str)
56
+ LazyDomain.parse(str).domain
57
+ rescue PublicSuffix::DomainInvalid => err
58
+ LogMessages.invalid_domain(err)
59
+ end
56
60
 
57
- ##
58
- # Used in case of relative paths. Merging guarantees correct url.
59
- # This needs a url string as argument to work.
60
- # Produces a merged uri string.
61
- def merged_link(url_str)
62
- page.uri.merge(url_str).to_s
63
- end
61
+ ##
62
+ # Used in case of relative paths. Merging guarantees correct url.
63
+ # This needs a url string as argument to work.
64
+ # Produces a merged uri string.
65
+ def merged_link(url_str)
66
+ @page.uri.merge(url_str).to_s
67
+ end
64
68
 
65
- ##
66
- # Expects relative paths and merges everything.
67
- # Returns a string. If there's nothing, return nil.
68
- #
69
- # Add \b word block to ensure whole word is searched.
70
- def link_with_href(str)
71
- merged_link(page.link_with(href: /\b#{str}/).uri.to_s)
72
- rescue
73
- nil
74
- end
69
+ ##
70
+ # Expects relative paths and merges everything.
71
+ # Returns a string. If there's nothing, return nil.
72
+ #
73
+ # Add \b word block to ensure whole word is searched.
74
+ def link_with_href(str)
75
+ merged_link(@page.link_with(href: /\b#{str}/).uri.to_s)
76
+ rescue
77
+ nil
78
+ end
75
79
 
76
- # Boolean, returns true if url is not identical to original domain.
77
- def subdomain?(str)
78
- (unformat_url(str) != orig_domain(str))
79
- end
80
+ # Boolean, returns true if url is not identical to original domain.
81
+ #
82
+ # In the event that the url has a path, this splits everything on forward
83
+ # slash and selects far left item.
84
+ def subdomain?(str)
85
+ (unformat_url(str).split('/')[0] != orig_domain(str))
86
+ end
80
87
 
81
- # TODO: Sometimes DNS will do a redirect and not give a 404.
82
- # Need to prevent redirects.
83
- #
84
- # Blindly tests to see if a url goes through. If there is a 404 error,
85
- # this will return nil.
86
- def blind_test(url)
87
- puts "\n(blind testing: #{url})"
88
- get(url)
89
- end
88
+ # TODO: Sometimes DNS will do a redirect and not give a 404.
89
+ # Need to prevent redirects.
90
+ #
91
+ # Blindly tests to see if a url goes through. If there is a 404 error,
92
+ # this will return nil.
93
+ def blind_test(url)
94
+ LogMessages.blind_testing(url)
95
+ get(url)
90
96
  end
91
97
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gimme_poc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mason
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-11 00:00:00.000000000 Z
11
+ date: 2017-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -44,28 +44,56 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.0.1
47
+ version: 0.0.2
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.0.1
54
+ version: 0.0.2
55
55
  - !ruby/object:Gem::Dependency
56
- name: rspec
56
+ name: shoulda
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: '3.3'
61
+ version: '3.5'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: '3.3'
68
+ version: '3.5'
69
+ - !ruby/object:Gem::Dependency
70
+ name: shoulda-context
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.2'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.2'
83
+ - !ruby/object:Gem::Dependency
84
+ name: minitest-reporters
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.1'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.1'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: pry
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -106,9 +134,12 @@ files:
106
134
  - Rakefile
107
135
  - lib/gimme_poc.rb
108
136
  - lib/gimme_poc/contactpage.rb
137
+ - lib/gimme_poc/logger.rb
138
+ - lib/gimme_poc/logger/messages.rb
109
139
  - lib/gimme_poc/poc.rb
110
140
  - lib/gimme_poc/questions.rb
111
141
  - lib/gimme_poc/save.rb
142
+ - lib/gimme_poc/test_case.rb
112
143
  - lib/gimme_poc/version.rb
113
144
  - lib/gimme_poc/web.rb
114
145
  homepage: http://github.com/m8ss/gimme_poc
@@ -131,7 +162,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
131
162
  version: '0'
132
163
  requirements: []
133
164
  rubyforge_project:
134
- rubygems_version: 2.4.5
165
+ rubygems_version: 2.5.1
135
166
  signing_key:
136
167
  specification_version: 4
137
168
  summary: Get a point of contact. Given a url or array of urls, extracts social media