appending 0.2 → 1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/appending.rb +455 -0
- metadata +44 -4
- data/appending.rb +0 -141
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67bdb77eef8558d6e35c4b4459f6cb9fec576cd1b0fb139b521b42c5314d8471
|
4
|
+
data.tar.gz: ba9f19e7c87466eabf297bc3e1f685bf13ada1ad18cfd2d346a2b22e7edfb181
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92635a6cb6d3117e32c58b5cc372dba8f5d161655cb7d60af3f3ed3200b2bd5f948e6586238a211f62ba5dae7d26e7f06c6a9182d73c0b5a0f000c710f554933
|
7
|
+
data.tar.gz: 2cf1e90ca32603cdb498107016b887b251f5118522641cb6f9ef1c8fec811a0033650acd0a4511164d4672cd80d03fce317005663ed20a9f1c6f701c059a94a3
|
data/lib/appending.rb
ADDED
@@ -0,0 +1,455 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'email_verifier'
|
3
|
+
require 'csv-indexer'
|
4
|
+
require 'simple_cloud_logging'
|
5
|
+
|
6
|
+
module BlackStack
|
7
|
+
module Appending
|
8
|
+
@@logger = nil
|
9
|
+
@@report = nil
|
10
|
+
@@indexes = []
|
11
|
+
@@verifier_url = 'https://connectionsphere.com/api1.0/emails/verify.json'
|
12
|
+
@@verifier_api_key = nil
|
13
|
+
@@email_fields = []
|
14
|
+
@@phone_fields = []
|
15
|
+
@@company_domain_fields = []
|
16
|
+
|
17
|
+
## @@logger
|
18
|
+
def self.set_logger(logger)
|
19
|
+
@@logger = logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.logger
|
23
|
+
@@logger
|
24
|
+
end
|
25
|
+
|
26
|
+
## @@indexes
|
27
|
+
def self.add_index(index)
|
28
|
+
expected = [:company_name, :first_name, :last_name]
|
29
|
+
|
30
|
+
# validation: keys must be `[:company_name, :first_name, :last_name]`
|
31
|
+
if !index.keys.eql?(expected)
|
32
|
+
raise "Invalid index: #{index.keys}. Expected: #{expected}."
|
33
|
+
end
|
34
|
+
# add the index
|
35
|
+
@@indexes << index
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.set_indexes(indexes)
|
39
|
+
@@indexes = indexes
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.indexes
|
43
|
+
@@indexes
|
44
|
+
end
|
45
|
+
|
46
|
+
# @@report
|
47
|
+
def self.report
|
48
|
+
@@report
|
49
|
+
end
|
50
|
+
|
51
|
+
# @@verifier_url
|
52
|
+
def self.set_verifier_url(url)
|
53
|
+
@@verifier_url = url
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.verifier_url
|
57
|
+
@@verifier_url
|
58
|
+
end
|
59
|
+
|
60
|
+
# @@verifier_api_key
|
61
|
+
def self.set_verifier_api_key(key)
|
62
|
+
@@verifier_api_key = key
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.verifier_api_key
|
66
|
+
@@verifier_api_key
|
67
|
+
end
|
68
|
+
|
69
|
+
# @@email_fields
|
70
|
+
def self.set_email_fields(fields)
|
71
|
+
@@email_fields = fields
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.email_fields
|
75
|
+
@@email_fields
|
76
|
+
end
|
77
|
+
|
78
|
+
# @@phone_fields
|
79
|
+
def self.set_phone_fields(fields)
|
80
|
+
@@phone_fields = fields
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.phone_fields
|
84
|
+
@@phone_fields
|
85
|
+
end
|
86
|
+
|
87
|
+
# @@company_domain_fields
|
88
|
+
def self.set_company_fields(fields)
|
89
|
+
@@company_domain_fields = fields
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.company_domain_fields
|
93
|
+
@@company_domain_fields
|
94
|
+
end
|
95
|
+
|
96
|
+
# set configuration
|
97
|
+
def self.set(h)
|
98
|
+
errors = []
|
99
|
+
|
100
|
+
# validation: if :indexes is present, it must be an array of objects BlackStack::CSVIndexer::Index
|
101
|
+
if h[:indexes]
|
102
|
+
if !h[:indexes].is_a?(Array)
|
103
|
+
errors << "Invalid :indexes: #{h[:indexes].class}. Expected: Array."
|
104
|
+
else
|
105
|
+
h[:indexes].each { |index|
|
106
|
+
if !index.is_a?(BlackStack::CSVIndexer::Index)
|
107
|
+
errors << "Invalid :indexes: #{index.class}. Expected: BlackStack::CSVIndexer::Index."
|
108
|
+
end
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# validation: if :verifier_url is present, it must be a string
|
114
|
+
errors << ":verifier_url must be a string." if h[:verifier_url] && !h[:verifier_url].is_a?(String)
|
115
|
+
|
116
|
+
# validation: if :verifier_api_key is present, it must be a string
|
117
|
+
errors << ":verifier_api_key must be a string." if h[:verifier_api_key] && !h[:verifier_api_key].is_a?(String)
|
118
|
+
|
119
|
+
# validation: if :email_fields is present, it must be an array of strings
|
120
|
+
if h[:email_fields]
|
121
|
+
if !h[:email_fields].is_a?(Array)
|
122
|
+
errors << "Invalid :email_fields: #{h[:email_fields].class}. Expected: Array."
|
123
|
+
else
|
124
|
+
h[:email_fields].each { |field|
|
125
|
+
if !field.is_a?(String)
|
126
|
+
errors << "Invalid :email_fields: #{field.class}. Expected: String."
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# validation: if :phone_fields is present, it must be an array of strings
|
133
|
+
if h[:phone_fields]
|
134
|
+
if !h[:phone_fields].is_a?(Array)
|
135
|
+
errors << "Invalid :phone_fields: #{h[:phone_fields].class}. Expected: Array."
|
136
|
+
else
|
137
|
+
h[:phone_fields].each { |field|
|
138
|
+
if !field.is_a?(String)
|
139
|
+
errors << "Invalid :phone_fields: #{field.class}. Expected: String."
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# validation: if :company_domain_fields is present, it must be an array of strings
|
146
|
+
if h[:company_domain_fields]
|
147
|
+
if !h[:company_domain_fields].is_a?(Array)
|
148
|
+
errors << "Invalid :company_domain_fields: #{h[:company_domain_fields].class}. Expected: Array."
|
149
|
+
else
|
150
|
+
h[:company_domain_fields].each { |field|
|
151
|
+
if !field.is_a?(String)
|
152
|
+
errors << "Invalid :company_domain_fields: #{field.class}. Expected: String."
|
153
|
+
end
|
154
|
+
}
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# mapping
|
159
|
+
@@indexes = h[:indexes] if h[:indexes]
|
160
|
+
@@verifier_url = h[:verifier_url] if h[:verifier_url]
|
161
|
+
@@verifier_api_key = h[:verifier_api_key] if h[:verifier_api_key]
|
162
|
+
@@email_fields = h[:email_fields] if h[:email_fields]
|
163
|
+
@@phone_fields = h[:phone_fields] if h[:phone_fields]
|
164
|
+
@@company_domain_fields = h[:company_domain_fields] if h[:company_domain_fields]
|
165
|
+
end
|
166
|
+
|
167
|
+
# return true if the domain get any random address as valid
|
168
|
+
#
|
169
|
+
# This is a support method for the `append` methods.
|
170
|
+
# The end-user should not call this method directly.
|
171
|
+
#
|
172
|
+
def self.catch_all?(domain)
|
173
|
+
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
174
|
+
end
|
175
|
+
|
176
|
+
# verify an email address using the AWS IP address of our website, wich is more reliable
|
177
|
+
#
|
178
|
+
# This is a support method for the `append` methods.
|
179
|
+
# The end-user should not call this method directly.
|
180
|
+
#
|
181
|
+
def self.verify(email)
|
182
|
+
url = @@verifier_url
|
183
|
+
params = {
|
184
|
+
:email => email,
|
185
|
+
}
|
186
|
+
res = BlackStack::Netting::call_get(url, params)
|
187
|
+
parsed = JSON.parse(res.body)
|
188
|
+
parsed['status'] == 'success'
|
189
|
+
end
|
190
|
+
|
191
|
+
# verify an email address
|
192
|
+
def self.append(fname, lname, domain)
|
193
|
+
ret = []
|
194
|
+
if !catch_all?(domain)
|
195
|
+
EmailVerifier.config do |config|
|
196
|
+
config.verifier_email = "leandro.sardi@expandedventure.com"
|
197
|
+
end
|
198
|
+
[
|
199
|
+
"#{fname}@#{domain}",
|
200
|
+
"#{lname}@#{domain}",
|
201
|
+
|
202
|
+
"#{fname}.#{lname}@#{domain}",
|
203
|
+
"#{lname}.#{fname}@#{domain}",
|
204
|
+
|
205
|
+
"#{fname}#{lname}@#{domain}",
|
206
|
+
"#{lname}#{fname}@#{domain}",
|
207
|
+
|
208
|
+
"#{fname[0]}#{lname}@#{domain}",
|
209
|
+
"#{fname[0]}.#{lname}@#{domain}",
|
210
|
+
].each { |email|
|
211
|
+
ret << email.downcase if verify(email)
|
212
|
+
}
|
213
|
+
end
|
214
|
+
ret
|
215
|
+
end
|
216
|
+
|
217
|
+
# This is a support method for the `append` methods.
|
218
|
+
# The end-user should not call this method directly.
|
219
|
+
def self.cleanup_fname(name)
|
220
|
+
return '' if name.nil?
|
221
|
+
a = name.split(/[^a-zA-Z]/)
|
222
|
+
a.size > 0 ? a[0] : ''
|
223
|
+
end
|
224
|
+
|
225
|
+
# This is a support method for the `append` methods.
|
226
|
+
# The end-user should not call this method directly.
|
227
|
+
def self.cleanup_lname(name)
|
228
|
+
return '' if name.nil?
|
229
|
+
a = name.split(/[^a-zA-Z]/)
|
230
|
+
a.size > 1 ? a[1] : ''
|
231
|
+
end
|
232
|
+
|
233
|
+
# This is a support method for the `append` methods.
|
234
|
+
# The end-user should not call this method directly.
|
235
|
+
def self.cleanup_company(company)
|
236
|
+
return '' if company.nil?
|
237
|
+
ret = ''
|
238
|
+
# stage 1: remove company-type suffixes
|
239
|
+
company = company.split(/ at /).last
|
240
|
+
company.gsub!(/LLC/, '')
|
241
|
+
company.gsub!(/Inc/, '')
|
242
|
+
company.strip! # remove leading and trailing spaces
|
243
|
+
# stage 2: remove LinkedIn suffixes
|
244
|
+
company.gsub!(/\(\d\d\d\d - Present\)/, '')
|
245
|
+
company.strip! # remove leading and trailing spaces
|
246
|
+
# stege 3: remove non-alphanumeric characters
|
247
|
+
company.gsub!(/\.$/, '')
|
248
|
+
company.gsub!(/\,$/, '')
|
249
|
+
# stege 4: remove extra spaces
|
250
|
+
company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
|
251
|
+
company.strip! # remove leading and trailing spaces
|
252
|
+
# stage 5: choose the first part of the company name
|
253
|
+
company.split(' ').each { |word|
|
254
|
+
ret += word + ' '
|
255
|
+
#break if word.size >= 5 || ret.split(' ').size > 2
|
256
|
+
break if ret.split(' ').size > 2
|
257
|
+
}
|
258
|
+
ret.strip!
|
259
|
+
# return
|
260
|
+
ret
|
261
|
+
end
|
262
|
+
|
263
|
+
# Find a person in the indexes by its full name and company name.
|
264
|
+
# Append all the information in the index row.
|
265
|
+
def self.find_persons_with_full_name(name, cname)
|
266
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
267
|
+
|
268
|
+
l.logs "Guessing fname from #{name}... "
|
269
|
+
fname = BlackStack::Appending::cleanup_fname(name)
|
270
|
+
l.logf fname
|
271
|
+
|
272
|
+
l.logs "Guessing lname from #{name}... "
|
273
|
+
lname = BlackStack::Appending::cleanup_lname(name)
|
274
|
+
l.logf lname
|
275
|
+
|
276
|
+
BlackStack::Appending.find_persons(fname, lname, cname)
|
277
|
+
end
|
278
|
+
|
279
|
+
# Find a person in the indexes by its first name, last name and company name.
|
280
|
+
# Append all the information in the index row.
|
281
|
+
def self.find_persons(fname, lname, cname)
|
282
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
283
|
+
h = {
|
284
|
+
:matches => [],
|
285
|
+
:enlapsed_seconds => 0,
|
286
|
+
:files_processed => 0,
|
287
|
+
}
|
288
|
+
# cleaning up company name
|
289
|
+
l.logs "Cleaning up company name #{cname}... "
|
290
|
+
cname = BlackStack::Appending::cleanup_company(cname)
|
291
|
+
l.logf cname
|
292
|
+
# looking for a record that matches with first name, last name and company name
|
293
|
+
appends = []
|
294
|
+
enlapsed_seconds = 0
|
295
|
+
files_processed = 0
|
296
|
+
BlackStack::Appending.indexes.each { |i|
|
297
|
+
l.logs "Searching into #{i.name}... "
|
298
|
+
ret = i.find([cname, fname, lname], false, nil)
|
299
|
+
# add the name of the index in the last position of the match
|
300
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
301
|
+
# add matches to the list
|
302
|
+
h[:matches] += ret[:matches]
|
303
|
+
# sum the total files and the total enlapsed seconds
|
304
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
305
|
+
h[:files_processed] += ret[:files_processed]
|
306
|
+
l.done
|
307
|
+
}
|
308
|
+
# update report
|
309
|
+
@@report = h
|
310
|
+
# return results
|
311
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
312
|
+
end
|
313
|
+
|
314
|
+
# Find a company in the indexes by its first name, last name and company name.
|
315
|
+
# Append all the information in the index row.
|
316
|
+
def self.find_persons_by_company(cname)
|
317
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
318
|
+
h = {
|
319
|
+
:matches => [],
|
320
|
+
:enlapsed_seconds => 0,
|
321
|
+
:files_processed => 0,
|
322
|
+
}
|
323
|
+
# looking for a record that matches with first name, last name and company name
|
324
|
+
appends = []
|
325
|
+
enlapsed_seconds = 0
|
326
|
+
files_processed = 0
|
327
|
+
BlackStack::Appending.indexes.each { |i|
|
328
|
+
l.logs "Searching into #{i.name}... "
|
329
|
+
ret = i.find([cname], true, nil)
|
330
|
+
# add the name of the index in the last position of the match
|
331
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
332
|
+
# add matches to the list
|
333
|
+
h[:matches] += ret[:matches]
|
334
|
+
# sum the total files and the total enlapsed seconds
|
335
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
336
|
+
h[:files_processed] += ret[:files_processed]
|
337
|
+
l.done
|
338
|
+
}
|
339
|
+
# update report
|
340
|
+
@@report = h
|
341
|
+
# return results
|
342
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
343
|
+
end
|
344
|
+
|
345
|
+
def self.find_verified_emails(fname, lname, cname)
|
346
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
347
|
+
emails = []
|
348
|
+
domains = []
|
349
|
+
verified_emails = []
|
350
|
+
# get lead emails from in the indexes
|
351
|
+
l.logs ("Searching index emails... ")
|
352
|
+
emails = BlackStack::Appending.find_persons(fname, lname, cname).map { |res|
|
353
|
+
res.emails
|
354
|
+
}.flatten.uniq.reject { |email|
|
355
|
+
email.to_s.empty?
|
356
|
+
}
|
357
|
+
l.done
|
358
|
+
# get company domains from the indexes
|
359
|
+
l.logs ("Searching index domains... ")
|
360
|
+
domains = BlackStack::Appending.find_persons_by_company(cname).map { |res|
|
361
|
+
res.company_domains
|
362
|
+
}.flatten.reject { |email|
|
363
|
+
email.to_s.empty?
|
364
|
+
}.map { |domain|
|
365
|
+
# normalize domain
|
366
|
+
domain.to_s.gsub('www.', '').downcase
|
367
|
+
}.uniq
|
368
|
+
l.done
|
369
|
+
# verify all the emails found in the indexes
|
370
|
+
l.logs ("Verifying index emails... ")
|
371
|
+
emails.each { |email|
|
372
|
+
l.logs "Verifying #{email}... "
|
373
|
+
domain = email.split('@').last
|
374
|
+
verified_emails << email if BlackStack::Appending.verify(email) && !BlackStack::Appending.catch_all?(domain)
|
375
|
+
l.done
|
376
|
+
}
|
377
|
+
l.done
|
378
|
+
# appending with domains found in the indexes
|
379
|
+
l.logs ("Appending with domains... ")
|
380
|
+
domains.each { |domain|
|
381
|
+
l.logs "Appending with #{domain}... "
|
382
|
+
verified_emails += BlackStack::Appending.append(fname, lname, domain)
|
383
|
+
l.done
|
384
|
+
}
|
385
|
+
l.done
|
386
|
+
# return
|
387
|
+
verified_emails.uniq
|
388
|
+
end
|
389
|
+
|
390
|
+
#
|
391
|
+
class Result
|
392
|
+
# array of values.
|
393
|
+
# first 3 values are index name, key and row-number.
|
394
|
+
attr_accessor :match
|
395
|
+
|
396
|
+
def initialize(a)
|
397
|
+
self.match = a
|
398
|
+
end
|
399
|
+
|
400
|
+
# From a given match (with the name of its index in the first position), get the value of a field by its name.
|
401
|
+
def value(field)
|
402
|
+
# get the index_name
|
403
|
+
index_name = match[0]
|
404
|
+
# get the index descriptor
|
405
|
+
index = BlackStack::CSVIndexer.indexes.select { |i| i.name == index_name }.first
|
406
|
+
# get position of the field into the hash descriptior
|
407
|
+
k = index.mapping.to_a.map { |m| m[0].to_s }.index(field.to_s)
|
408
|
+
# return nil if the field is not found
|
409
|
+
return nil if k.nil?
|
410
|
+
# get the field value
|
411
|
+
match[k+3].to_s
|
412
|
+
end
|
413
|
+
|
414
|
+
# Call value() method.
|
415
|
+
def val(field)
|
416
|
+
self.value(field)
|
417
|
+
end
|
418
|
+
|
419
|
+
# From a given match (with the name of its index in the first position), get the email addresses.
|
420
|
+
def emails()
|
421
|
+
keys = BlackStack::Appending.email_fields
|
422
|
+
ret = []
|
423
|
+
keys.each { |k|
|
424
|
+
v = self.value(k)
|
425
|
+
ret << v if v
|
426
|
+
}
|
427
|
+
ret
|
428
|
+
end
|
429
|
+
|
430
|
+
# From a given match (with the name of its index in the first position), get the phone numbers.
|
431
|
+
def phones()
|
432
|
+
keys = BlackStack::Appending.phone_fields
|
433
|
+
ret = []
|
434
|
+
keys.each { |k|
|
435
|
+
v = self.value(k)
|
436
|
+
ret << v if v
|
437
|
+
}
|
438
|
+
ret
|
439
|
+
end
|
440
|
+
|
441
|
+
# From a given match (with the name of its index in the first position), get the company domains.
|
442
|
+
def company_domains()
|
443
|
+
keys = BlackStack::Appending.company_domain_fields
|
444
|
+
ret = []
|
445
|
+
keys.each { |k|
|
446
|
+
v = self.value(k)
|
447
|
+
ret << v if v
|
448
|
+
}
|
449
|
+
ret
|
450
|
+
end
|
451
|
+
|
452
|
+
end # class Result
|
453
|
+
|
454
|
+
end # Appending
|
455
|
+
end # BlackStack
|
metadata
CHANGED
@@ -1,15 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: appending
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0
|
4
|
+
version: '1.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leandro Daniel Sardi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csv
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.2.2
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 3.2.2
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 3.2.2
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 3.2.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: email_verifier
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.1.0
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.1.0
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.1.0
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.1.0
|
13
53
|
- !ruby/object:Gem::Dependency
|
14
54
|
name: blackstack-core
|
15
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,8 +176,8 @@ executables: []
|
|
136
176
|
extensions: []
|
137
177
|
extra_rdoc_files: []
|
138
178
|
files:
|
139
|
-
- appending.rb
|
140
|
-
homepage: https://rubygems.org/gems/
|
179
|
+
- lib/appending.rb
|
180
|
+
homepage: https://rubygems.org/gems/appending
|
141
181
|
licenses:
|
142
182
|
- MIT
|
143
183
|
metadata: {}
|
data/appending.rb
DELETED
@@ -1,141 +0,0 @@
|
|
1
|
-
require 'csv'
|
2
|
-
require 'email_verifier'
|
3
|
-
require 'nokogiri'
|
4
|
-
|
5
|
-
puts "Appending is in development. Please check back later."
|
6
|
-
|
7
|
-
=begin
|
8
|
-
module BlackStack
|
9
|
-
module Appending
|
10
|
-
# This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
|
11
|
-
module Parser
|
12
|
-
# parse search results pages from sales navigator, and save the company name and full name into a CSV file
|
13
|
-
def self.parse_sales_navigator_result_pages(search_name, l=nil)
|
14
|
-
# create logger if not passed
|
15
|
-
l = BlackStack::DummyLogger.new(nil) if l.nil?
|
16
|
-
# define output filename
|
17
|
-
output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
|
18
|
-
raise 'Output file already exists.' if File.exists?(output_file)
|
19
|
-
output = File.open(output_file, 'w')
|
20
|
-
# parse
|
21
|
-
i = 0
|
22
|
-
source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
|
23
|
-
Dir.glob(source).each do |file|
|
24
|
-
doc = Nokogiri::HTML(open(file))
|
25
|
-
lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
|
26
|
-
lis.each { |li|
|
27
|
-
i += 1
|
28
|
-
doc2 = Nokogiri::HTML(li.inner_html)
|
29
|
-
# this is where to find the full name of the lead
|
30
|
-
n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
|
31
|
-
# this is where to find the name of the company, when it has a link to a linkedin company page
|
32
|
-
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
|
33
|
-
# this is where to find the name of the company, when it has not a link to a linkedin company page
|
34
|
-
company_name = nil
|
35
|
-
if n2
|
36
|
-
company_name = n2.text
|
37
|
-
else
|
38
|
-
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
|
39
|
-
if n2
|
40
|
-
company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
|
41
|
-
end
|
42
|
-
end
|
43
|
-
# add the information to the output file
|
44
|
-
line = []
|
45
|
-
line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
|
46
|
-
line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
|
47
|
-
l.logs "#{i.to_s}, #{line.join(',')}... "
|
48
|
-
output.puts line.join(',')
|
49
|
-
output.flush
|
50
|
-
l.done
|
51
|
-
}
|
52
|
-
end
|
53
|
-
# close output file
|
54
|
-
output.close
|
55
|
-
end # def self.parse_sales_navigator_result_pages(search_name)
|
56
|
-
end # module Parser
|
57
|
-
|
58
|
-
# return true if the domain get any random address as valid
|
59
|
-
def self.catch_all?(domain)
|
60
|
-
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
61
|
-
end
|
62
|
-
|
63
|
-
# verify an email address using the AWS IP address of our website, wich is more reliable
|
64
|
-
def self.verify(email)
|
65
|
-
url = "https://connectionsphere.com/api1.0/emails/verify.json"
|
66
|
-
params = {
|
67
|
-
:email => email,
|
68
|
-
}
|
69
|
-
res = BlackStack::Netting::call_get(url, params)
|
70
|
-
parsed = JSON.parse(res.body)
|
71
|
-
parsed['status'] == 'success'
|
72
|
-
end
|
73
|
-
|
74
|
-
# verify an email address
|
75
|
-
def self.append(fname, lname, domain)
|
76
|
-
ret = []
|
77
|
-
if !catch_all?(domain)
|
78
|
-
EmailVerifier.config do |config|
|
79
|
-
config.verifier_email = "leandro.sardi@expandedventure.com"
|
80
|
-
end
|
81
|
-
[
|
82
|
-
"#{fname}@#{domain}",
|
83
|
-
"#{lname}@#{domain}",
|
84
|
-
|
85
|
-
"#{fname}.#{lname}@#{domain}",
|
86
|
-
"#{lname}.#{fname}@#{domain}",
|
87
|
-
|
88
|
-
"#{fname}#{lname}@#{domain}",
|
89
|
-
"#{lname}#{fname}@#{domain}",
|
90
|
-
|
91
|
-
"#{fname[0]}#{lname}@#{domain}",
|
92
|
-
"#{fname[0]}.#{lname}@#{domain}",
|
93
|
-
].each { |email|
|
94
|
-
ret << email.downcase if verify(email)
|
95
|
-
}
|
96
|
-
end
|
97
|
-
ret
|
98
|
-
end
|
99
|
-
|
100
|
-
def self.cleanup_fname(name)
|
101
|
-
return '' if name.nil?
|
102
|
-
a = name.split(/[^a-zA-Z]/)
|
103
|
-
a.size > 0 ? a[0] : ''
|
104
|
-
end
|
105
|
-
|
106
|
-
def self.cleanup_lname(name)
|
107
|
-
return '' if name.nil?
|
108
|
-
a = name.split(/[^a-zA-Z]/)
|
109
|
-
a.size > 1 ? a[1] : ''
|
110
|
-
end
|
111
|
-
|
112
|
-
def self.cleanup_company(company)
|
113
|
-
return '' if company.nil?
|
114
|
-
ret = ''
|
115
|
-
# stage 1: remove company-type suffixes
|
116
|
-
company = company.split(/ at /).last
|
117
|
-
company.gsub!(/LLC/, '')
|
118
|
-
company.gsub!(/Inc/, '')
|
119
|
-
company.strip! # remove leading and trailing spaces
|
120
|
-
# stage 2: remove LinkedIn suffixes
|
121
|
-
company.gsub!(/\(\d\d\d\d - Present\)/, '')
|
122
|
-
company.strip! # remove leading and trailing spaces
|
123
|
-
# stege 3: remove non-alphanumeric characters
|
124
|
-
company.gsub!(/\.$/, '')
|
125
|
-
company.gsub!(/\,$/, '')
|
126
|
-
# stege 4: remove extra spaces
|
127
|
-
company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
|
128
|
-
company.strip! # remove leading and trailing spaces
|
129
|
-
# stage 5: choose the first part of the company name
|
130
|
-
company.split(' ').each { |word|
|
131
|
-
ret += word + ' '
|
132
|
-
#break if word.size >= 5 || ret.split(' ').size > 2
|
133
|
-
break if ret.split(' ').size > 2
|
134
|
-
}
|
135
|
-
ret.strip!
|
136
|
-
# return
|
137
|
-
ret
|
138
|
-
end
|
139
|
-
end # Appending
|
140
|
-
end # BlackStack
|
141
|
-
=end
|