gimme_poc 0.0.0.beta
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +19 -0
- data/README.md +3 -0
- data/Rakefile +23 -0
- data/lib/gimme_poc/version.rb +3 -0
- data/lib/gimme_poc.rb +202 -0
- metadata +106 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5863cfeab7d822a42e8a69f6bc304a7f93feb475
|
4
|
+
data.tar.gz: be4262c816f306de6fc53fd088317d552dd3ce70
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ca6993932877a4b6c1a8f02c6dfa11cee7848cd232d0083ba4b9ef1c5e39439fd785eb3bfc17fee52c641021ab2d144c210134fc4ec46ab6be2cf3b0147d2a9d
|
7
|
+
data.tar.gz: 702e680dc3f8f44a67bec6d08258b8ebe9a3c6b9dcfed58872a8ac56d53a8298ce8b40b0f1230970cc2f85e093c5957013c13ce67bc2d07f89fa65533632bbd8
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
Copyright (c) 2015 John Mason
|
3
|
+
|
4
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
5
|
+
of this software and associated documentation files (the "Software"), to deal
|
6
|
+
in the Software without restriction, including without limitation the rights
|
7
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the Software is
|
9
|
+
furnished to do so, subject to the following conditions:
|
10
|
+
The above copyright notice and this permission notice shall be included in all
|
11
|
+
copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
SOFTWARE.
|
data/README.md
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
|
4
|
+
begin
|
5
|
+
Bundler.setup(:default, :development)
|
6
|
+
rescue Bundler::BundlerError => e
|
7
|
+
$stderr.puts e.message
|
8
|
+
$stderr.puts 'Run `bundle install` to install missing gems'
|
9
|
+
exit e.status_code
|
10
|
+
end
|
11
|
+
require 'rake'
|
12
|
+
|
13
|
+
require 'rake/testtask'
|
14
|
+
Rake::TestTask.new(:test) do |test|
|
15
|
+
test.libs << 'lib' << 'test'
|
16
|
+
test.pattern = 'test/**/test_gimme_poc*.rb'
|
17
|
+
test.verbose = true
|
18
|
+
end
|
19
|
+
|
20
|
+
desc 'Open console with gimme_poc loaded'
|
21
|
+
task :console do
|
22
|
+
exec 'pry -r ./lib/gimme_poc.rb'
|
23
|
+
end
|
data/lib/gimme_poc.rb
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
require 'mechanize'
|
2
|
+
require 'colored'
|
3
|
+
require_relative './gimme_poc/version'
|
4
|
+
|
5
|
+
# Find the contact
|
6
|
+
module Gimme
|
7
|
+
class << self
|
8
|
+
attr_accessor :page, :contact, :contact_links, :url
|
9
|
+
|
10
|
+
# Simple regex that looks for ###.#### or ###-####
|
11
|
+
PHONE_REGEX = /\d{3}[-]\d{4}|\d{3}[.]\d{4}/
|
12
|
+
|
13
|
+
## ----------------------------------------------------------------
|
14
|
+
# Questions
|
15
|
+
#
|
16
|
+
#
|
17
|
+
#
|
18
|
+
|
19
|
+
# Boolean, returns true if contact link is present.
|
20
|
+
def contact_link?
|
21
|
+
@url = link_with_href('contact')
|
22
|
+
!!(@url != nil)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Check if contact page '../contact' gets a 404 error.
|
26
|
+
def contact_page?
|
27
|
+
@url = page.uri.merge('../contact').to_s
|
28
|
+
begin
|
29
|
+
true if Mechanize.new.get(@url)
|
30
|
+
rescue Mechanize::ResponseCodeError
|
31
|
+
false
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Boolean, returns true if link to English version is present.
|
37
|
+
def english_link?
|
38
|
+
return false if page.link_with(href: /english/).nil?
|
39
|
+
@url = page.uri.merge(page.link_with(href: /english/).uri.to_s).to_s
|
40
|
+
!!(@url != nil)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Check if english page '../en' gets a 404 error.
|
44
|
+
def en_page?
|
45
|
+
@url = page.uri.merge('../en').to_s
|
46
|
+
begin
|
47
|
+
true if Mechanize.new.get(@url)
|
48
|
+
rescue Mechanize::ResponseCodeError
|
49
|
+
false
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Check if english page '../english' gets a 404 error.
|
55
|
+
def english_page?
|
56
|
+
@url = page.uri.merge('../english').to_s
|
57
|
+
begin
|
58
|
+
true if Mechanize.new.get(@url)
|
59
|
+
rescue Mechanize::ResponseCodeError
|
60
|
+
false
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Boolean, returns true if anything is present
|
65
|
+
# after running scan_for_contacts.
|
66
|
+
def something_to_save?
|
67
|
+
scan_for_contacts.any?
|
68
|
+
end
|
69
|
+
|
70
|
+
# Boolean, returns true if email is present.
|
71
|
+
def email_available?
|
72
|
+
!!(link_with_href('mailto') != nil)
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
# Boolean, returns true if phone number is present.
|
77
|
+
def phone_available?
|
78
|
+
!!(page.body =~ PHONE_REGEX)
|
79
|
+
end
|
80
|
+
|
81
|
+
# TODO: build better conditional to prevent false positives.
|
82
|
+
# There could be other forms like newsletter signup, etc.
|
83
|
+
#
|
84
|
+
# If there is a form with more than one field, this returns true.
|
85
|
+
# Forms with one field are typically search boxes.
|
86
|
+
#
|
87
|
+
# Boolean, returns true if form is present on page.
|
88
|
+
def contactform_available?
|
89
|
+
!!(page.forms.select {|x| x.fields.length > 1}.empty? != true)
|
90
|
+
end
|
91
|
+
|
92
|
+
## ----------------------------------------------------------------
|
93
|
+
# Actions
|
94
|
+
#
|
95
|
+
#
|
96
|
+
#
|
97
|
+
|
98
|
+
# The main method!
|
99
|
+
# Takes array of urls and gets contact info for each if possible.
|
100
|
+
def poc(arr)
|
101
|
+
arr = arr.split unless arr.is_a?(Array)
|
102
|
+
arr.each do |url|
|
103
|
+
get(url)
|
104
|
+
start_contact_links
|
105
|
+
go_to_contact_page
|
106
|
+
save_available_contacts
|
107
|
+
end
|
108
|
+
puts "\nexited each url iteration and about to return final result"
|
109
|
+
puts 'press RETURN'
|
110
|
+
gets
|
111
|
+
p @contacts_links # need to add to an overall array at some point.
|
112
|
+
end
|
113
|
+
|
114
|
+
# Go to a page using Mechanize. Doing this reinitializes the hash.
|
115
|
+
def get(url)
|
116
|
+
puts "getting: #{url}"
|
117
|
+
@page = Mechanize.new.get(url)
|
118
|
+
end
|
119
|
+
|
120
|
+
# Starts/Restarts @contacts_links hash
|
121
|
+
def start_contact_links
|
122
|
+
puts "starting contact links hash."
|
123
|
+
@contact_links = {}
|
124
|
+
end
|
125
|
+
|
126
|
+
# Scans for contact page. If it doesn't work on the first try,
|
127
|
+
# It will look for english versions and try again.
|
128
|
+
#
|
129
|
+
# If contact page is found, go directly there and don't try again.
|
130
|
+
def go_to_contact_page
|
131
|
+
1.times do
|
132
|
+
if contact_link?
|
133
|
+
puts "found contact link!".green
|
134
|
+
get(@url)
|
135
|
+
elsif contact_page?
|
136
|
+
puts "found contact page!".green
|
137
|
+
get(@url)
|
138
|
+
elsif english_link? # look for link first
|
139
|
+
puts "found english link!"
|
140
|
+
get(@url); redo
|
141
|
+
elsif en_page?
|
142
|
+
puts "found en page!"
|
143
|
+
get(@url); redo
|
144
|
+
elsif english_page?
|
145
|
+
puts "found english page!"
|
146
|
+
get(@url); redo
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
# Expects relative paths and merges everything.
|
152
|
+
# Returns a string. If there's nothing, return nil.
|
153
|
+
def link_with_href(str)
|
154
|
+
page.uri.merge(page.link_with(href: /#{str}/).uri.to_s).to_s rescue nil
|
155
|
+
end
|
156
|
+
|
157
|
+
# Returns anything that is possible to save, otherwise returns nil.
|
158
|
+
# Booleans for phone, email, or contact form will display True or False.
|
159
|
+
def scan_for_contacts
|
160
|
+
{
|
161
|
+
contactpage: link_with_href('contact'),
|
162
|
+
email_present?: "#{email_available?}",
|
163
|
+
phone_present?: "#{phone_available?}",
|
164
|
+
contact_form?: "#{contactform_available?}",
|
165
|
+
facebook: link_with_href('facebook'),
|
166
|
+
twitter: link_with_href('twitter'),
|
167
|
+
youtube: link_with_href('youtube'),
|
168
|
+
googleplus: link_with_href('plus.google'),
|
169
|
+
linkedin: link_with_href('linkedin')
|
170
|
+
}
|
171
|
+
end
|
172
|
+
|
173
|
+
# Used in save_available_contacts to save each valid link.
|
174
|
+
def save_link(key, url)
|
175
|
+
return if key.nil? || url.nil?
|
176
|
+
@contact_links[key] = url
|
177
|
+
end
|
178
|
+
|
179
|
+
# Remove negatives from the contacts hash.
|
180
|
+
# Deletes a key value pair with a value of either nil or false.
|
181
|
+
# Remember that false is a string.
|
182
|
+
def delete_failures(hsh)
|
183
|
+
hsh.delete_if {|k, v| v == nil || v == 'false'}
|
184
|
+
end
|
185
|
+
|
186
|
+
# Saves any available contact info to @contact_links.
|
187
|
+
def save_available_contacts(hsh = scan_for_contacts)
|
188
|
+
puts 'starting save of available contact information...'
|
189
|
+
return unless something_to_save?
|
190
|
+
if hsh.is_a?(Hash)
|
191
|
+
hsh.each do |k, v|
|
192
|
+
save_link(k, v)
|
193
|
+
end
|
194
|
+
delete_failures(hsh)
|
195
|
+
puts "this is hsh after nil strip:"
|
196
|
+
puts "#{hsh}".cyan
|
197
|
+
else
|
198
|
+
raise ArgumentError, "expected hash but got #{hsh.class}"
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gimme_poc
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0.beta
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John Mason
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-08 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mechanize
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.7'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '2.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: colored
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '1.2'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '1.2'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.9'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.9'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '10.3'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '10.3'
|
69
|
+
description: Gimme POC (Point of Contact) simplifies the process of extracting the
|
70
|
+
common 'contact us' information from a website.
|
71
|
+
email: mace2345@gmail.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- LICENSE.txt
|
77
|
+
- README.md
|
78
|
+
- Rakefile
|
79
|
+
- lib/gimme_poc.rb
|
80
|
+
- lib/gimme_poc/version.rb
|
81
|
+
homepage: http://github.com/m8ss/gimme_poc
|
82
|
+
licenses:
|
83
|
+
- MIT
|
84
|
+
metadata: {}
|
85
|
+
post_install_message:
|
86
|
+
rdoc_options: []
|
87
|
+
require_paths:
|
88
|
+
- lib
|
89
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ">"
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: 1.3.1
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 2.4.3
|
102
|
+
signing_key:
|
103
|
+
specification_version: 4
|
104
|
+
summary: Get a point of contact. Given a url or array of urls, extracts social media
|
105
|
+
contact information.
|
106
|
+
test_files: []
|