gimme_poc 0.0.0.beta → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5863cfeab7d822a42e8a69f6bc304a7f93feb475
4
- data.tar.gz: be4262c816f306de6fc53fd088317d552dd3ce70
3
+ metadata.gz: 0e6506c640bcafe56d6906bfaae60f8a17e72865
4
+ data.tar.gz: 21e4fc7176c2d5aa28a0fe31ab993889a7fbf74d
5
5
  SHA512:
6
- metadata.gz: ca6993932877a4b6c1a8f02c6dfa11cee7848cd232d0083ba4b9ef1c5e39439fd785eb3bfc17fee52c641021ab2d144c210134fc4ec46ab6be2cf3b0147d2a9d
7
- data.tar.gz: 702e680dc3f8f44a67bec6d08258b8ebe9a3c6b9dcfed58872a8ac56d53a8298ce8b40b0f1230970cc2f85e093c5957013c13ce67bc2d07f89fa65533632bbd8
6
+ metadata.gz: 39c35961983b31246c7414067d607bbc6c7a6dbddc5af9dfa97122ff094b881248db7a4a9c6415c33e393b3f59e5c632f0b06c3fe04e70dd9035892c5c2b11cf
7
+ data.tar.gz: b1a194c4f0d51d7b3ea77f81786cc5b2d981b7c27f22dbeb6db98f8c1cca688437d04a1af5c9193d25e92072f88c664468d59ff89a2a853695e59809792ee457
data/README.md CHANGED
@@ -1,3 +1,77 @@
1
- ## gimme_poc
1
+ # Gimme POC
2
2
 
3
- MTF
3
+ Gimme POC (Point of Contact) simplifies the process of extracting the common 'contact us' information from a website.
4
+
5
+ ## Usage
6
+
7
+ Gimme POC simply looks for a contact page and extracts social media contact information, if present. Due to CAN-SPAM Act of 2003, emails are not harvested. Instead Gimme POC will return True or False if an email or contact form is available.
8
+
9
+ ## Installation
10
+
11
+ ```
12
+ gem install gimme_poc
13
+
14
+ ```
15
+
16
+ ## How it works
17
+
18
+ Gimme POC is easy to use! Simply run this command.
19
+
20
+ ```ruby
21
+
22
+ Gimme.poc 'http://example.com'
23
+
24
+ # => returns this:
25
+ #
26
+ # {
27
+ # :contactpage=>"http://example.com/contact/",
28
+ # :phone_present?=>"true",
29
+ # :contact_form?=>"true",
30
+ # :facebook=>"http://www.facebook.com/example",
31
+ # :twitter=>"http://twitter.com/@example",
32
+ # :googleplus=>"http://plus.google.com/+example",
33
+ # :linkedin=>"http://www.linkedin.com/in/example"
34
+ # }
35
+ #
36
+ #
37
+
38
+
39
+ ```
40
+
41
+ ## Searching more than one site
42
+
43
+ You also have the ability to pass multiple urls in the form of array. For example, you could run the command below and get contact information from each site all at once.
44
+
45
+ ```ruby
46
+
47
+ Gimme.poc(['http://example.com', 'http://foo.com', 'http://bar.com'])
48
+
49
+ ```
50
+
51
+ ## Referencing the search results
52
+
53
+ To use your search results, simply run:
54
+
55
+ ```ruby
56
+
57
+ Gimme.memory
58
+
59
+ ```
60
+
61
+ ## Clearing the search results
62
+
63
+ To clear search results and start afresh, simply run:
64
+
65
+ ```ruby
66
+
67
+ Gimme.reset!
68
+
69
+ ```
70
+
71
+ ## To do:
72
+
73
+ - Convenience methods for returning specific information from all sites, (ie. just facebook or just twitter)
74
+ - Work on false positives of bad urls. (DNS redirects don't give 404 errors)
75
+
76
+
77
+ More to follow...
@@ -1,3 +1,3 @@
1
1
  module Gimme
2
- VERSION = "0.0.0.beta"
3
- end
2
+ VERSION = '0.0.1'
3
+ end
data/lib/gimme_poc.rb CHANGED
@@ -6,23 +6,24 @@ require_relative './gimme_poc/version'
6
6
  module Gimme
7
7
  class << self
8
8
  attr_accessor :page, :contact, :contact_links, :url
9
-
9
+
10
10
  # Simple regex that looks for ###.#### or ###-####
11
11
  PHONE_REGEX = /\d{3}[-]\d{4}|\d{3}[.]\d{4}/
12
-
12
+
13
13
  ## ----------------------------------------------------------------
14
- # Questions
14
+ # Questions
15
15
  #
16
16
  #
17
17
  #
18
-
18
+
19
19
  # Boolean, returns true if contact link is present.
20
20
  def contact_link?
21
21
  @url = link_with_href('contact')
22
- !!(@url != nil)
22
+
23
+ !@url.nil?
23
24
  end
24
-
25
- # Check if contact page '../contact' gets a 404 error.
25
+
26
+ # True if contact page '../contact' does NOT get a 404 error.
26
27
  def contact_page?
27
28
  @url = page.uri.merge('../contact').to_s
28
29
  begin
@@ -31,16 +32,16 @@ module Gimme
31
32
  false
32
33
  end
33
34
  end
34
-
35
35
 
36
36
  # Boolean, returns true if link to English version is present.
37
37
  def english_link?
38
38
  return false if page.link_with(href: /english/).nil?
39
39
  @url = page.uri.merge(page.link_with(href: /english/).uri.to_s).to_s
40
- !!(@url != nil)
40
+
41
+ !@url.nil?
41
42
  end
42
-
43
- # Check if english page '../en' gets a 404 error.
43
+
44
+ # True if english page '../en' does NOT get a 404 error.
44
45
  def en_page?
45
46
  @url = page.uri.merge('../en').to_s
46
47
  begin
@@ -50,8 +51,7 @@ module Gimme
50
51
  end
51
52
  end
52
53
 
53
-
54
- # Check if english page '../english' gets a 404 error.
54
+ # True if english page '../english' does NOT get a 404 error.
55
55
  def english_page?
56
56
  @url = page.uri.merge('../english').to_s
57
57
  begin
@@ -66,18 +66,17 @@ module Gimme
66
66
  def something_to_save?
67
67
  scan_for_contacts.any?
68
68
  end
69
-
69
+
70
70
  # Boolean, returns true if email is present.
71
71
  def email_available?
72
- !!(link_with_href('mailto') != nil)
72
+ !link_with_href('mailto').nil?
73
73
  end
74
-
75
-
74
+
76
75
  # Boolean, returns true if phone number is present.
77
76
  def phone_available?
78
- !!(page.body =~ PHONE_REGEX)
77
+ !(page.body =~ PHONE_REGEX).nil?
79
78
  end
80
-
79
+
81
80
  # TODO: build better conditional to prevent false positives.
82
81
  # There could be other forms like newsletter signup, etc.
83
82
  #
@@ -86,11 +85,11 @@ module Gimme
86
85
  #
87
86
  # Boolean, returns true if form is present on page.
88
87
  def contactform_available?
89
- !!(page.forms.select {|x| x.fields.length > 1}.empty? != true)
88
+ !(page.forms.select { |x| x.fields.length > 1 }.empty?)
90
89
  end
91
-
90
+
92
91
  ## ----------------------------------------------------------------
93
- # Actions
92
+ # Actions
94
93
  #
95
94
  #
96
95
  #
@@ -100,68 +99,72 @@ module Gimme
100
99
  def poc(arr)
101
100
  arr = arr.split unless arr.is_a?(Array)
102
101
  arr.each do |url|
102
+ puts '-' * 50
103
+ puts "starting: #{url}"
103
104
  get(url)
104
105
  start_contact_links
105
106
  go_to_contact_page
106
107
  save_available_contacts
107
108
  end
108
- puts "\nexited each url iteration and about to return final result"
109
- puts 'press RETURN'
110
- gets
111
- p @contacts_links # need to add to an overall array at some point.
109
+ Search.all_sites
112
110
  end
113
-
114
- # Go to a page using Mechanize. Doing this reinitializes the hash.
111
+
112
+ # Go to a page using Mechanize.
115
113
  def get(url)
116
- puts "getting: #{url}"
117
- @page = Mechanize.new.get(url)
114
+ puts "sending GET request: #{url}..."
115
+ @page = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' }.get(url)
118
116
  end
119
-
117
+
120
118
  # Starts/Restarts @contacts_links hash
121
119
  def start_contact_links
122
- puts "starting contact links hash."
120
+ puts 'setting contact links hash to {}'
123
121
  @contact_links = {}
124
122
  end
125
-
126
- # Scans for contact page. If it doesn't work on the first try,
123
+
124
+ # Scans for contact page. If it doesn't work on the first try,
127
125
  # It will look for english versions and try again.
128
126
  #
129
127
  # If contact page is found, go directly there and don't try again.
130
128
  def go_to_contact_page
131
129
  1.times do
132
- if contact_link?
133
- puts "found contact link!".green
130
+ case
131
+ when contact_link?
132
+ puts 'found contact link!'.green
134
133
  get(@url)
135
- elsif contact_page?
136
- puts "found contact page!".green
134
+ when contact_page?
135
+ puts 'found contact page!'.green
137
136
  get(@url)
138
- elsif english_link? # look for link first
139
- puts "found english link!"
140
- get(@url); redo
141
- elsif en_page?
142
- puts "found en page!"
143
- get(@url); redo
144
- elsif english_page?
145
- puts "found english page!"
146
- get(@url); redo
137
+ else
138
+ if english_link? # look for link first, not check the page.
139
+ puts 'found english link!'
140
+ get(@url)
141
+ redo
142
+ elsif en_page?
143
+ puts 'found en page!'
144
+ get(@url)
145
+ redo
146
+ elsif english_page?
147
+ puts 'found english page!'
148
+ redo
149
+ end
147
150
  end
148
151
  end
149
152
  end
150
153
 
151
- # Expects relative paths and merges everything.
154
+ # Expects relative paths and merges everything.
152
155
  # Returns a string. If there's nothing, return nil.
153
156
  def link_with_href(str)
154
157
  page.uri.merge(page.link_with(href: /#{str}/).uri.to_s).to_s rescue nil
155
- end
158
+ end
156
159
 
157
160
  # Returns anything that is possible to save, otherwise returns nil.
158
161
  # Booleans for phone, email, or contact form will display True or False.
159
162
  def scan_for_contacts
160
163
  {
161
164
  contactpage: link_with_href('contact'),
162
- email_present?: "#{email_available?}",
163
- phone_present?: "#{phone_available?}",
164
- contact_form?: "#{contactform_available?}",
165
+ email_present: "#{email_available?}",
166
+ phone_present: "#{phone_available?}",
167
+ contact_form: "#{contactform_available?}",
165
168
  facebook: link_with_href('facebook'),
166
169
  twitter: link_with_href('twitter'),
167
170
  youtube: link_with_href('youtube'),
@@ -175,28 +178,55 @@ module Gimme
175
178
  return if key.nil? || url.nil?
176
179
  @contact_links[key] = url
177
180
  end
178
-
181
+
179
182
  # Remove negatives from the contacts hash.
180
183
  # Deletes a key value pair with a value of either nil or false.
181
184
  # Remember that false is a string.
182
185
  def delete_failures(hsh)
183
- hsh.delete_if {|k, v| v == nil || v == 'false'}
186
+ hsh.delete_if { |_k, v| v.nil? || v == 'false' }
184
187
  end
185
188
 
186
189
  # Saves any available contact info to @contact_links.
187
190
  def save_available_contacts(hsh = scan_for_contacts)
188
- puts 'starting save of available contact information...'
191
+ puts 'saving available contact information...'
189
192
  return unless something_to_save?
190
193
  if hsh.is_a?(Hash)
191
194
  hsh.each do |k, v|
192
- save_link(k, v)
195
+ save_link(k, v) # saves to @contact_links
193
196
  end
194
- delete_failures(hsh)
195
- puts "this is hsh after nil strip:"
196
- puts "#{hsh}".cyan
197
- else
198
- raise ArgumentError, "expected hash but got #{hsh.class}"
197
+ delete_failures(@contact_links)
198
+ puts "#{@contact_links}".cyan # same as @contact_links
199
+ else
200
+ fail ArgumentError, "expected hash but got #{hsh.class}"
199
201
  end
202
+ Search::POC.new(@contact_links)
203
+ end
204
+
205
+ # Convenience method
206
+ def memory
207
+ Search.all_sites
208
+ end
209
+
210
+ # Clears entire collection.
211
+ def reset!
212
+ Search.all_sites = []
213
+ end
214
+ end
215
+ end
216
+
217
+ # Collectino of sites searched
218
+ class Search
219
+ @all_sites = []
220
+
221
+ class << self
222
+ attr_accessor :all_sites
223
+ end
224
+
225
+ # Each site is saved to this class
226
+ class POC
227
+ def initialize(contact_info_hsh)
228
+ @contact_info = contact_info_hsh
229
+ Search.all_sites << self
200
230
  end
201
231
  end
202
232
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gimme_poc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.beta
4
+ version: 0.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Mason
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-08 00:00:00.000000000 Z
11
+ date: 2015-10-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -93,12 +93,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
93
93
  version: '0'
94
94
  required_rubygems_version: !ruby/object:Gem::Requirement
95
95
  requirements:
96
- - - ">"
96
+ - - ">="
97
97
  - !ruby/object:Gem::Version
98
- version: 1.3.1
98
+ version: '0'
99
99
  requirements: []
100
100
  rubyforge_project:
101
- rubygems_version: 2.4.3
101
+ rubygems_version: 2.4.5
102
102
  signing_key:
103
103
  specification_version: 4
104
104
  summary: Get a point of contact. Given a url or array of urls, extracts social media