gimme_poc 0.0.0.beta

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5863cfeab7d822a42e8a69f6bc304a7f93feb475
4
+ data.tar.gz: be4262c816f306de6fc53fd088317d552dd3ce70
5
+ SHA512:
6
+ metadata.gz: ca6993932877a4b6c1a8f02c6dfa11cee7848cd232d0083ba4b9ef1c5e39439fd785eb3bfc17fee52c641021ab2d144c210134fc4ec46ab6be2cf3b0147d2a9d
7
+ data.tar.gz: 702e680dc3f8f44a67bec6d08258b8ebe9a3c6b9dcfed58872a8ac56d53a8298ce8b40b0f1230970cc2f85e093c5957013c13ce67bc2d07f89fa65533632bbd8
data/LICENSE.txt ADDED
@@ -0,0 +1,19 @@
1
+ The MIT License (MIT)
2
+ Copyright (c) 2015 John Mason
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+ The above copyright notice and this permission notice shall be included in all
11
+ copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ ## gimme_poc
2
+
3
+ MTF
data/Rakefile ADDED
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+
4
+ begin
5
+ Bundler.setup(:default, :development)
6
+ rescue Bundler::BundlerError => e
7
+ $stderr.puts e.message
8
+ $stderr.puts 'Run `bundle install` to install missing gems'
9
+ exit e.status_code
10
+ end
11
+ require 'rake'
12
+
13
+ require 'rake/testtask'
14
+ Rake::TestTask.new(:test) do |test|
15
+ test.libs << 'lib' << 'test'
16
+ test.pattern = 'test/**/test_gimme_poc*.rb'
17
+ test.verbose = true
18
+ end
19
+
20
+ desc 'Open console with gimme_poc loaded'
21
+ task :console do
22
+ exec 'pry -r ./lib/gimme_poc.rb'
23
+ end
@@ -0,0 +1,3 @@
1
+ module Gimme
2
+ VERSION = "0.0.0.beta"
3
+ end
data/lib/gimme_poc.rb ADDED
@@ -0,0 +1,202 @@
1
+ require 'mechanize'
2
+ require 'colored'
3
+ require_relative './gimme_poc/version'
4
+
5
+ # Find the contact
6
+ module Gimme
7
+ class << self
8
+ attr_accessor :page, :contact, :contact_links, :url
9
+
10
+ # Simple regex that looks for ###.#### or ###-####
11
+ PHONE_REGEX = /\d{3}[-]\d{4}|\d{3}[.]\d{4}/
12
+
13
+ ## ----------------------------------------------------------------
14
+ # Questions
15
+ #
16
+ #
17
+ #
18
+
19
+ # Boolean, returns true if contact link is present.
20
+ def contact_link?
21
+ @url = link_with_href('contact')
22
+ !!(@url != nil)
23
+ end
24
+
25
+ # Check if contact page '../contact' gets a 404 error.
26
+ def contact_page?
27
+ @url = page.uri.merge('../contact').to_s
28
+ begin
29
+ true if Mechanize.new.get(@url)
30
+ rescue Mechanize::ResponseCodeError
31
+ false
32
+ end
33
+ end
34
+
35
+
36
+ # Boolean, returns true if link to English version is present.
37
+ def english_link?
38
+ return false if page.link_with(href: /english/).nil?
39
+ @url = page.uri.merge(page.link_with(href: /english/).uri.to_s).to_s
40
+ !!(@url != nil)
41
+ end
42
+
43
+ # Check if english page '../en' gets a 404 error.
44
+ def en_page?
45
+ @url = page.uri.merge('../en').to_s
46
+ begin
47
+ true if Mechanize.new.get(@url)
48
+ rescue Mechanize::ResponseCodeError
49
+ false
50
+ end
51
+ end
52
+
53
+
54
+ # Check if english page '../english' gets a 404 error.
55
+ def english_page?
56
+ @url = page.uri.merge('../english').to_s
57
+ begin
58
+ true if Mechanize.new.get(@url)
59
+ rescue Mechanize::ResponseCodeError
60
+ false
61
+ end
62
+ end
63
+
64
+ # Boolean, returns true if anything is present
65
+ # after running scan_for_contacts.
66
+ def something_to_save?
67
+ scan_for_contacts.any?
68
+ end
69
+
70
+ # Boolean, returns true if email is present.
71
+ def email_available?
72
+ !!(link_with_href('mailto') != nil)
73
+ end
74
+
75
+
76
+ # Boolean, returns true if phone number is present.
77
+ def phone_available?
78
+ !!(page.body =~ PHONE_REGEX)
79
+ end
80
+
81
+ # TODO: build better conditional to prevent false positives.
82
+ # There could be other forms like newsletter signup, etc.
83
+ #
84
+ # If there is a form with more than one field, this returns true.
85
+ # Forms with one field are typically search boxes.
86
+ #
87
+ # Boolean, returns true if form is present on page.
88
+ def contactform_available?
89
+ !!(page.forms.select {|x| x.fields.length > 1}.empty? != true)
90
+ end
91
+
92
+ ## ----------------------------------------------------------------
93
+ # Actions
94
+ #
95
+ #
96
+ #
97
+
98
+ # The main method!
99
+ # Takes array of urls and gets contact info for each if possible.
100
+ def poc(arr)
101
+ arr = arr.split unless arr.is_a?(Array)
102
+ arr.each do |url|
103
+ get(url)
104
+ start_contact_links
105
+ go_to_contact_page
106
+ save_available_contacts
107
+ end
108
+ puts "\nexited each url iteration and about to return final result"
109
+ puts 'press RETURN'
110
+ gets
111
+ p @contacts_links # need to add to an overall array at some point.
112
+ end
113
+
114
+ # Go to a page using Mechanize. Doing this reinitializes the hash.
115
+ def get(url)
116
+ puts "getting: #{url}"
117
+ @page = Mechanize.new.get(url)
118
+ end
119
+
120
+ # Starts/Restarts @contacts_links hash
121
+ def start_contact_links
122
+ puts "starting contact links hash."
123
+ @contact_links = {}
124
+ end
125
+
126
+ # Scans for contact page. If it doesn't work on the first try,
127
+ # It will look for english versions and try again.
128
+ #
129
+ # If contact page is found, go directly there and don't try again.
130
+ def go_to_contact_page
131
+ 1.times do
132
+ if contact_link?
133
+ puts "found contact link!".green
134
+ get(@url)
135
+ elsif contact_page?
136
+ puts "found contact page!".green
137
+ get(@url)
138
+ elsif english_link? # look for link first
139
+ puts "found english link!"
140
+ get(@url); redo
141
+ elsif en_page?
142
+ puts "found en page!"
143
+ get(@url); redo
144
+ elsif english_page?
145
+ puts "found english page!"
146
+ get(@url); redo
147
+ end
148
+ end
149
+ end
150
+
151
+ # Expects relative paths and merges everything.
152
+ # Returns a string. If there's nothing, return nil.
153
+ def link_with_href(str)
154
+ page.uri.merge(page.link_with(href: /#{str}/).uri.to_s).to_s rescue nil
155
+ end
156
+
157
+ # Returns anything that is possible to save, otherwise returns nil.
158
+ # Booleans for phone, email, or contact form will display True or False.
159
+ def scan_for_contacts
160
+ {
161
+ contactpage: link_with_href('contact'),
162
+ email_present?: "#{email_available?}",
163
+ phone_present?: "#{phone_available?}",
164
+ contact_form?: "#{contactform_available?}",
165
+ facebook: link_with_href('facebook'),
166
+ twitter: link_with_href('twitter'),
167
+ youtube: link_with_href('youtube'),
168
+ googleplus: link_with_href('plus.google'),
169
+ linkedin: link_with_href('linkedin')
170
+ }
171
+ end
172
+
173
+ # Used in save_available_contacts to save each valid link.
174
+ def save_link(key, url)
175
+ return if key.nil? || url.nil?
176
+ @contact_links[key] = url
177
+ end
178
+
179
+ # Remove negatives from the contacts hash.
180
+ # Deletes a key value pair with a value of either nil or false.
181
+ # Remember that false is a string.
182
+ def delete_failures(hsh)
183
+ hsh.delete_if {|k, v| v == nil || v == 'false'}
184
+ end
185
+
186
+ # Saves any available contact info to @contact_links.
187
+ def save_available_contacts(hsh = scan_for_contacts)
188
+ puts 'starting save of available contact information...'
189
+ return unless something_to_save?
190
+ if hsh.is_a?(Hash)
191
+ hsh.each do |k, v|
192
+ save_link(k, v)
193
+ end
194
+ delete_failures(hsh)
195
+ puts "this is hsh after nil strip:"
196
+ puts "#{hsh}".cyan
197
+ else
198
+ raise ArgumentError, "expected hash but got #{hsh.class}"
199
+ end
200
+ end
201
+ end
202
+ end
metadata ADDED
@@ -0,0 +1,106 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gimme_poc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0.beta
5
+ platform: ruby
6
+ authors:
7
+ - John Mason
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-10-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: colored
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '1.2'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '1.2'
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.9'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.9'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.3'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.3'
69
+ description: Gimme POC (Point of Contact) simplifies the process of extracting the
70
+ common 'contact us' information from a website.
71
+ email: mace2345@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - LICENSE.txt
77
+ - README.md
78
+ - Rakefile
79
+ - lib/gimme_poc.rb
80
+ - lib/gimme_poc/version.rb
81
+ homepage: http://github.com/m8ss/gimme_poc
82
+ licenses:
83
+ - MIT
84
+ metadata: {}
85
+ post_install_message:
86
+ rdoc_options: []
87
+ require_paths:
88
+ - lib
89
+ required_ruby_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ required_rubygems_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">"
97
+ - !ruby/object:Gem::Version
98
+ version: 1.3.1
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 2.4.3
102
+ signing_key:
103
+ specification_version: 4
104
+ summary: Get a point of contact. Given a url or array of urls, extracts social media
105
+ contact information.
106
+ test_files: []