gimme_poc 0.0.0.beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5863cfeab7d822a42e8a69f6bc304a7f93feb475
4
+ data.tar.gz: be4262c816f306de6fc53fd088317d552dd3ce70
5
+ SHA512:
6
+ metadata.gz: ca6993932877a4b6c1a8f02c6dfa11cee7848cd232d0083ba4b9ef1c5e39439fd785eb3bfc17fee52c641021ab2d144c210134fc4ec46ab6be2cf3b0147d2a9d
7
+ data.tar.gz: 702e680dc3f8f44a67bec6d08258b8ebe9a3c6b9dcfed58872a8ac56d53a8298ce8b40b0f1230970cc2f85e093c5957013c13ce67bc2d07f89fa65533632bbd8
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ The MIT License (MIT)
2
+ Copyright (c) 2015 John Mason
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ ## gimme_poc
2
+
3
+ MTF
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+
4
+ begin
5
+ Bundler.setup(:default, :development)
6
+ rescue Bundler::BundlerError => e
7
+ $stderr.puts e.message
8
+ $stderr.puts 'Run `bundle install` to install missing gems'
9
+ exit e.status_code
10
+ end
11
+ require 'rake'
12
+
13
+ require 'rake/testtask'
14
+ Rake::TestTask.new(:test) do |test|
15
+ test.libs << 'lib' << 'test'
16
+ test.pattern = 'test/**/test_gimme_poc*.rb'
17
+ test.verbose = true
18
+ end
19
+
20
+ desc 'Open console with gimme_poc loaded'
21
+ task :console do
22
+ exec 'pry -r ./lib/gimme_poc.rb'
23
+ end
@@ -0,0 +1,3 @@
1
+ module Gimme
2
+ VERSION = "0.0.0.beta"
3
+ end
data/lib/gimme_poc.rb ADDED
@@ -0,0 +1,202 @@
1
+ require 'mechanize'
2
+ require 'colored'
3
+ require_relative './gimme_poc/version'
4
+
5
+ # Find the contact
6
+ module Gimme
7
+ class << self
8
+ attr_accessor :page, :contact, :contact_links, :url
9
+
10
+ # Simple regex that looks for ###.#### or ###-####
11
+ PHONE_REGEX = /\d{3}[-]\d{4}|\d{3}[.]\d{4}/
12
+
13
+ ## ----------------------------------------------------------------
14
+ # Questions
15
+ #
16
+ #
17
+ #
18
+
19
+ # Boolean, returns true if contact link is present.
20
+ def contact_link?
21
+ @url = link_with_href('contact')
22
+ !!(@url != nil)
23
+ end
24
+
25
+ # Check if contact page '../contact' gets a 404 error.
26
+ def contact_page?
27
+ @url = page.uri.merge('../contact').to_s
28
+ begin
29
+ true if Mechanize.new.get(@url)
30
+ rescue Mechanize::ResponseCodeError
31
+ false
32
+ end
33
+ end
34
+
35
+
36
+ # Boolean, returns true if link to English version is present.
37
+ def english_link?
38
+ return false if page.link_with(href: /english/).nil?
39
+ @url = page.uri.merge(page.link_with(href: /english/).uri.to_s).to_s
40
+ !!(@url != nil)
41
+ end
42
+
43
+ # Check if english page '../en' gets a 404 error.
44
+ def en_page?
45
+ @url = page.uri.merge('../en').to_s
46
+ begin
47
+ true if Mechanize.new.get(@url)
48
+ rescue Mechanize::ResponseCodeError
49
+ false
50
+ end
51
+ end
52
+
53
+
54
+ # Check if english page '../english' gets a 404 error.
55
+ def english_page?
56
+ @url = page.uri.merge('../english').to_s
57
+ begin
58
+ true if Mechanize.new.get(@url)
59
+ rescue Mechanize::ResponseCodeError
60
+ false
61
+ end
62
+ end
63
+
64
+ # Boolean, returns true if anything is present
65
+ # after running scan_for_contacts.
66
+ def something_to_save?
67
+ scan_for_contacts.any?
68
+ end
69
+
70
+ # Boolean, returns true if email is present.
71
+ def email_available?
72
+ !!(link_with_href('mailto') != nil)
73
+ end
74
+
75
+
76
+ # Boolean, returns true if phone number is present.
77
+ def phone_available?
78
+ !!(page.body =~ PHONE_REGEX)
79
+ end
80
+
81
+ # TODO: build better conditional to prevent false positives.
82
+ # There could be other forms like newsletter signup, etc.
83
+ #
84
+ # If there is a form with more than one field, this returns true.
85
+ # Forms with one field are typically search boxes.
86
+ #
87
+ # Boolean, returns true if form is present on page.
88
+ def contactform_available?
89
+ !!(page.forms.select {|x| x.fields.length > 1}.empty? != true)
90
+ end
91
+
92
+ ## ----------------------------------------------------------------
93
+ # Actions
94
+ #
95
+ #
96
+ #
97
+
98
+ # The main method!
99
+ # Takes array of urls and gets contact info for each if possible.
100
+ def poc(arr)
101
+ arr = arr.split unless arr.is_a?(Array)
102
+ arr.each do |url|
103
+ get(url)
104
+ start_contact_links
105
+ go_to_contact_page
106
+ save_available_contacts
107
+ end
108
+ puts "\nexited each url iteration and about to return final result"
109
+ puts 'press RETURN'
110
+ gets
111
+ p @contacts_links # need to add to an overall array at some point.
112
+ end
113
+
114
+ # Go to a page using Mechanize. Doing this reinitializes the hash.
115
+ def get(url)
116
+ puts "getting: #{url}"
117
+ @page = Mechanize.new.get(url)
118
+ end
119
+
120
+ # Starts/Restarts @contacts_links hash
121
+ def start_contact_links
122
+ puts "starting contact links hash."
123
+ @contact_links = {}
124
+ end
125
+
126
+ # Scans for contact page. If it doesn't work on the first try,
127
+ # It will look for english versions and try again.
128
+ #
129
+ # If contact page is found, go directly there and don't try again.
130
+ def go_to_contact_page
131
+ 1.times do
132
+ if contact_link?
133
+ puts "found contact link!".green
134
+ get(@url)
135
+ elsif contact_page?
136
+ puts "found contact page!".green
137
+ get(@url)
138
+ elsif english_link? # look for link first
139
+ puts "found english link!"
140
+ get(@url); redo
141
+ elsif en_page?
142
+ puts "found en page!"
143
+ get(@url); redo
144
+ elsif english_page?
145
+ puts "found english page!"
146
+ get(@url); redo
147
+ end
148
+ end
149
+ end
150
+
151
+ # Expects relative paths and merges everything.
152
+ # Returns a string. If there's nothing, return nil.
153
+ def link_with_href(str)
154
+ page.uri.merge(page.link_with(href: /#{str}/).uri.to_s).to_s rescue nil
155
+ end
156
+
157
+ # Returns anything that is possible to save, otherwise returns nil.
158
+ # Booleans for phone, email, or contact form will display True or False.
159
+ def scan_for_contacts
160
+ {
161
+ contactpage: link_with_href('contact'),
162
+ email_present?: "#{email_available?}",
163
+ phone_present?: "#{phone_available?}",
164
+ contact_form?: "#{contactform_available?}",
165
+ facebook: link_with_href('facebook'),
166
+ twitter: link_with_href('twitter'),
167
+ youtube: link_with_href('youtube'),
168
+ googleplus: link_with_href('plus.google'),
169
+ linkedin: link_with_href('linkedin')
170
+ }
171
+ end
172
+
173
+ # Used in save_available_contacts to save each valid link.
174
+ def save_link(key, url)
175
+ return if key.nil? || url.nil?
176
+ @contact_links[key] = url
177
+ end
178
+
179
+ # Remove negatives from the contacts hash.
180
+ # Deletes a key value pair with a value of either nil or false.
181
+ # Remember that false is a string.
182
+ def delete_failures(hsh)
183
+ hsh.delete_if {|k, v| v == nil || v == 'false'}
184
+ end
185
+
186
+ # Saves any available contact info to @contact_links.
187
+ def save_available_contacts(hsh = scan_for_contacts)
188
+ puts 'starting save of available contact information...'
189
+ return unless something_to_save?
190
+ if hsh.is_a?(Hash)
191
+ hsh.each do |k, v|
192
+ save_link(k, v)
193
+ end
194
+ delete_failures(hsh)
195
+ puts "this is hsh after nil strip:"
196
+ puts "#{hsh}".cyan
197
+ else
198
+ raise ArgumentError, "expected hash but got #{hsh.class}"
199
+ end
200
+ end
201
+ end
202
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gimme_poc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0.beta
5
+ platform: ruby
6
+ authors:
7
+ - John Mason
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colored
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.9'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.3'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.3'
69
+ description: Gimme POC (Point of Contact) simplifies the process of extracting the
70
+ common 'contact us' information from a website.
71
+ email: mace2345@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - LICENSE.txt
77
+ - README.md
78
+ - Rakefile
79
+ - lib/gimme_poc.rb
80
+ - lib/gimme_poc/version.rb
81
+ homepage: http://github.com/m8ss/gimme_poc
82
+ licenses:
83
+ - MIT
84
+ metadata: {}
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">"
97
+ - !ruby/object:Gem::Version
98
+ version: 1.3.1
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 2.4.3
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: Get a point of contact. Given a url or array of urls, extracts social media
105
+ contact information.
106
+ test_files: []