appending 0.2 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/appending.rb +455 -0
- metadata +44 -4
- data/appending.rb +0 -141
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67bdb77eef8558d6e35c4b4459f6cb9fec576cd1b0fb139b521b42c5314d8471
|
4
|
+
data.tar.gz: ba9f19e7c87466eabf297bc3e1f685bf13ada1ad18cfd2d346a2b22e7edfb181
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92635a6cb6d3117e32c58b5cc372dba8f5d161655cb7d60af3f3ed3200b2bd5f948e6586238a211f62ba5dae7d26e7f06c6a9182d73c0b5a0f000c710f554933
|
7
|
+
data.tar.gz: 2cf1e90ca32603cdb498107016b887b251f5118522641cb6f9ef1c8fec811a0033650acd0a4511164d4672cd80d03fce317005663ed20a9f1c6f701c059a94a3
|
data/lib/appending.rb
ADDED
@@ -0,0 +1,455 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'email_verifier'
|
3
|
+
require 'csv-indexer'
|
4
|
+
require 'simple_cloud_logging'
|
5
|
+
|
6
|
+
module BlackStack
|
7
|
+
module Appending
|
8
|
+
@@logger = nil
|
9
|
+
@@report = nil
|
10
|
+
@@indexes = []
|
11
|
+
@@verifier_url = 'https://connectionsphere.com/api1.0/emails/verify.json'
|
12
|
+
@@verifier_api_key = nil
|
13
|
+
@@email_fields = []
|
14
|
+
@@phone_fields = []
|
15
|
+
@@company_domain_fields = []
|
16
|
+
|
17
|
+
## @@logger
|
18
|
+
def self.set_logger(logger)
|
19
|
+
@@logger = logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.logger
|
23
|
+
@@logger
|
24
|
+
end
|
25
|
+
|
26
|
+
## @@indexes
|
27
|
+
def self.add_index(index)
|
28
|
+
expected = [:company_name, :first_name, :last_name]
|
29
|
+
|
30
|
+
# validation: keys must be `[:company_name, :first_name, :last_name]`
|
31
|
+
if !index.keys.eql?(expected)
|
32
|
+
raise "Invalid index: #{index.keys}. Expected: #{expected}."
|
33
|
+
end
|
34
|
+
# add the index
|
35
|
+
@@indexes << index
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.set_indexes(indexes)
|
39
|
+
@@indexes = indexes
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.indexes
|
43
|
+
@@indexes
|
44
|
+
end
|
45
|
+
|
46
|
+
# @@report
|
47
|
+
def self.report
|
48
|
+
@@report
|
49
|
+
end
|
50
|
+
|
51
|
+
# @@verifier_url
|
52
|
+
def self.set_verifier_url(url)
|
53
|
+
@@verifier_url = url
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.verifier_url
|
57
|
+
@@verifier_url
|
58
|
+
end
|
59
|
+
|
60
|
+
# @@verifier_api_key
|
61
|
+
def self.set_verifier_api_key(key)
|
62
|
+
@@verifier_api_key = key
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.verifier_api_key
|
66
|
+
@@verifier_api_key
|
67
|
+
end
|
68
|
+
|
69
|
+
# @@email_fields
|
70
|
+
def self.set_email_fields(fields)
|
71
|
+
@@email_fields = fields
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.email_fields
|
75
|
+
@@email_fields
|
76
|
+
end
|
77
|
+
|
78
|
+
# @@phone_fields
|
79
|
+
def self.set_phone_fields(fields)
|
80
|
+
@@phone_fields = fields
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.phone_fields
|
84
|
+
@@phone_fields
|
85
|
+
end
|
86
|
+
|
87
|
+
# @@company_domain_fields
|
88
|
+
def self.set_company_fields(fields)
|
89
|
+
@@company_domain_fields = fields
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.company_domain_fields
|
93
|
+
@@company_domain_fields
|
94
|
+
end
|
95
|
+
|
96
|
+
# set configuration
|
97
|
+
def self.set(h)
|
98
|
+
errors = []
|
99
|
+
|
100
|
+
# validation: if :indexes is present, it must be an array of objects BlackStack::CSVIndexer::Index
|
101
|
+
if h[:indexes]
|
102
|
+
if !h[:indexes].is_a?(Array)
|
103
|
+
errors << "Invalid :indexes: #{h[:indexes].class}. Expected: Array."
|
104
|
+
else
|
105
|
+
h[:indexes].each { |index|
|
106
|
+
if !index.is_a?(BlackStack::CSVIndexer::Index)
|
107
|
+
errors << "Invalid :indexes: #{index.class}. Expected: BlackStack::CSVIndexer::Index."
|
108
|
+
end
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# validation: if :verifier_url is present, it must be a string
|
114
|
+
errors << ":verifier_url must be a string." if h[:verifier_url] && !h[:verifier_url].is_a?(String)
|
115
|
+
|
116
|
+
# validation: if :verifier_api_key is present, it must be a string
|
117
|
+
errors << ":verifier_api_key must be a string." if h[:verifier_api_key] && !h[:verifier_api_key].is_a?(String)
|
118
|
+
|
119
|
+
# validation: if :email_fields is present, it must be an array of strings
|
120
|
+
if h[:email_fields]
|
121
|
+
if !h[:email_fields].is_a?(Array)
|
122
|
+
errors << "Invalid :email_fields: #{h[:email_fields].class}. Expected: Array."
|
123
|
+
else
|
124
|
+
h[:email_fields].each { |field|
|
125
|
+
if !field.is_a?(String)
|
126
|
+
errors << "Invalid :email_fields: #{field.class}. Expected: String."
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# validation: if :phone_fields is present, it must be an array of strings
|
133
|
+
if h[:phone_fields]
|
134
|
+
if !h[:phone_fields].is_a?(Array)
|
135
|
+
errors << "Invalid :phone_fields: #{h[:phone_fields].class}. Expected: Array."
|
136
|
+
else
|
137
|
+
h[:phone_fields].each { |field|
|
138
|
+
if !field.is_a?(String)
|
139
|
+
errors << "Invalid :phone_fields: #{field.class}. Expected: String."
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# validation: if :company_domain_fields is present, it must be an array of strings
|
146
|
+
if h[:company_domain_fields]
|
147
|
+
if !h[:company_domain_fields].is_a?(Array)
|
148
|
+
errors << "Invalid :company_domain_fields: #{h[:company_domain_fields].class}. Expected: Array."
|
149
|
+
else
|
150
|
+
h[:company_domain_fields].each { |field|
|
151
|
+
if !field.is_a?(String)
|
152
|
+
errors << "Invalid :company_domain_fields: #{field.class}. Expected: String."
|
153
|
+
end
|
154
|
+
}
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# mapping
|
159
|
+
@@indexes = h[:indexes] if h[:indexes]
|
160
|
+
@@verifier_url = h[:verifier_url] if h[:verifier_url]
|
161
|
+
@@verifier_api_key = h[:verifier_api_key] if h[:verifier_api_key]
|
162
|
+
@@email_fields = h[:email_fields] if h[:email_fields]
|
163
|
+
@@phone_fields = h[:phone_fields] if h[:phone_fields]
|
164
|
+
@@company_domain_fields = h[:company_domain_fields] if h[:company_domain_fields]
|
165
|
+
end
|
166
|
+
|
167
|
+
# return true if the domain get any random address as valid
|
168
|
+
#
|
169
|
+
# This is a support method for the `append` methods.
|
170
|
+
# The end-user should not call this method directly.
|
171
|
+
#
|
172
|
+
def self.catch_all?(domain)
|
173
|
+
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
174
|
+
end
|
175
|
+
|
176
|
+
# verify an email address using the AWS IP address of our website, wich is more reliable
|
177
|
+
#
|
178
|
+
# This is a support method for the `append` methods.
|
179
|
+
# The end-user should not call this method directly.
|
180
|
+
#
|
181
|
+
def self.verify(email)
|
182
|
+
url = @@verifier_url
|
183
|
+
params = {
|
184
|
+
:email => email,
|
185
|
+
}
|
186
|
+
res = BlackStack::Netting::call_get(url, params)
|
187
|
+
parsed = JSON.parse(res.body)
|
188
|
+
parsed['status'] == 'success'
|
189
|
+
end
|
190
|
+
|
191
|
+
# verify an email address
|
192
|
+
def self.append(fname, lname, domain)
|
193
|
+
ret = []
|
194
|
+
if !catch_all?(domain)
|
195
|
+
EmailVerifier.config do |config|
|
196
|
+
config.verifier_email = "leandro.sardi@expandedventure.com"
|
197
|
+
end
|
198
|
+
[
|
199
|
+
"#{fname}@#{domain}",
|
200
|
+
"#{lname}@#{domain}",
|
201
|
+
|
202
|
+
"#{fname}.#{lname}@#{domain}",
|
203
|
+
"#{lname}.#{fname}@#{domain}",
|
204
|
+
|
205
|
+
"#{fname}#{lname}@#{domain}",
|
206
|
+
"#{lname}#{fname}@#{domain}",
|
207
|
+
|
208
|
+
"#{fname[0]}#{lname}@#{domain}",
|
209
|
+
"#{fname[0]}.#{lname}@#{domain}",
|
210
|
+
].each { |email|
|
211
|
+
ret << email.downcase if verify(email)
|
212
|
+
}
|
213
|
+
end
|
214
|
+
ret
|
215
|
+
end
|
216
|
+
|
217
|
+
# This is a support method for the `append` methods.
|
218
|
+
# The end-user should not call this method directly.
|
219
|
+
def self.cleanup_fname(name)
|
220
|
+
return '' if name.nil?
|
221
|
+
a = name.split(/[^a-zA-Z]/)
|
222
|
+
a.size > 0 ? a[0] : ''
|
223
|
+
end
|
224
|
+
|
225
|
+
# This is a support method for the `append` methods.
|
226
|
+
# The end-user should not call this method directly.
|
227
|
+
def self.cleanup_lname(name)
|
228
|
+
return '' if name.nil?
|
229
|
+
a = name.split(/[^a-zA-Z]/)
|
230
|
+
a.size > 1 ? a[1] : ''
|
231
|
+
end
|
232
|
+
|
233
|
+
# This is a support method for the `append` methods.
|
234
|
+
# The end-user should not call this method directly.
|
235
|
+
def self.cleanup_company(company)
|
236
|
+
return '' if company.nil?
|
237
|
+
ret = ''
|
238
|
+
# stage 1: remove company-type suffixes
|
239
|
+
company = company.split(/ at /).last
|
240
|
+
company.gsub!(/LLC/, '')
|
241
|
+
company.gsub!(/Inc/, '')
|
242
|
+
company.strip! # remove leading and trailing spaces
|
243
|
+
# stage 2: remove LinkedIn suffixes
|
244
|
+
company.gsub!(/\(\d\d\d\d - Present\)/, '')
|
245
|
+
company.strip! # remove leading and trailing spaces
|
246
|
+
# stege 3: remove non-alphanumeric characters
|
247
|
+
company.gsub!(/\.$/, '')
|
248
|
+
company.gsub!(/\,$/, '')
|
249
|
+
# stege 4: remove extra spaces
|
250
|
+
company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
|
251
|
+
company.strip! # remove leading and trailing spaces
|
252
|
+
# stage 5: choose the first part of the company name
|
253
|
+
company.split(' ').each { |word|
|
254
|
+
ret += word + ' '
|
255
|
+
#break if word.size >= 5 || ret.split(' ').size > 2
|
256
|
+
break if ret.split(' ').size > 2
|
257
|
+
}
|
258
|
+
ret.strip!
|
259
|
+
# return
|
260
|
+
ret
|
261
|
+
end
|
262
|
+
|
263
|
+
# Find a person in the indexes by its full name and company name.
|
264
|
+
# Append all the information in the index row.
|
265
|
+
def self.find_persons_with_full_name(name, cname)
|
266
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
267
|
+
|
268
|
+
l.logs "Guessing fname from #{name}... "
|
269
|
+
fname = BlackStack::Appending::cleanup_fname(name)
|
270
|
+
l.logf fname
|
271
|
+
|
272
|
+
l.logs "Guessing lname from #{name}... "
|
273
|
+
lname = BlackStack::Appending::cleanup_lname(name)
|
274
|
+
l.logf lname
|
275
|
+
|
276
|
+
BlackStack::Appending.find_persons(fname, lname, cname)
|
277
|
+
end
|
278
|
+
|
279
|
+
# Find a person in the indexes by its first name, last name and company name.
|
280
|
+
# Append all the information in the index row.
|
281
|
+
def self.find_persons(fname, lname, cname)
|
282
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
283
|
+
h = {
|
284
|
+
:matches => [],
|
285
|
+
:enlapsed_seconds => 0,
|
286
|
+
:files_processed => 0,
|
287
|
+
}
|
288
|
+
# cleaning up company name
|
289
|
+
l.logs "Cleaning up company name #{cname}... "
|
290
|
+
cname = BlackStack::Appending::cleanup_company(cname)
|
291
|
+
l.logf cname
|
292
|
+
# looking for a record that matches with first name, last name and company name
|
293
|
+
appends = []
|
294
|
+
enlapsed_seconds = 0
|
295
|
+
files_processed = 0
|
296
|
+
BlackStack::Appending.indexes.each { |i|
|
297
|
+
l.logs "Searching into #{i.name}... "
|
298
|
+
ret = i.find([cname, fname, lname], false, nil)
|
299
|
+
# add the name of the index in the last position of the match
|
300
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
301
|
+
# add matches to the list
|
302
|
+
h[:matches] += ret[:matches]
|
303
|
+
# sum the total files and the total enlapsed seconds
|
304
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
305
|
+
h[:files_processed] += ret[:files_processed]
|
306
|
+
l.done
|
307
|
+
}
|
308
|
+
# update report
|
309
|
+
@@report = h
|
310
|
+
# return results
|
311
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
312
|
+
end
|
313
|
+
|
314
|
+
# Find a company in the indexes by its first name, last name and company name.
|
315
|
+
# Append all the information in the index row.
|
316
|
+
def self.find_persons_by_company(cname)
|
317
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
318
|
+
h = {
|
319
|
+
:matches => [],
|
320
|
+
:enlapsed_seconds => 0,
|
321
|
+
:files_processed => 0,
|
322
|
+
}
|
323
|
+
# looking for a record that matches with first name, last name and company name
|
324
|
+
appends = []
|
325
|
+
enlapsed_seconds = 0
|
326
|
+
files_processed = 0
|
327
|
+
BlackStack::Appending.indexes.each { |i|
|
328
|
+
l.logs "Searching into #{i.name}... "
|
329
|
+
ret = i.find([cname], true, nil)
|
330
|
+
# add the name of the index in the last position of the match
|
331
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
332
|
+
# add matches to the list
|
333
|
+
h[:matches] += ret[:matches]
|
334
|
+
# sum the total files and the total enlapsed seconds
|
335
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
336
|
+
h[:files_processed] += ret[:files_processed]
|
337
|
+
l.done
|
338
|
+
}
|
339
|
+
# update report
|
340
|
+
@@report = h
|
341
|
+
# return results
|
342
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
343
|
+
end
|
344
|
+
|
345
|
+
def self.find_verified_emails(fname, lname, cname)
|
346
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
347
|
+
emails = []
|
348
|
+
domains = []
|
349
|
+
verified_emails = []
|
350
|
+
# get lead emails from in the indexes
|
351
|
+
l.logs ("Searching index emails... ")
|
352
|
+
emails = BlackStack::Appending.find_persons(fname, lname, cname).map { |res|
|
353
|
+
res.emails
|
354
|
+
}.flatten.uniq.reject { |email|
|
355
|
+
email.to_s.empty?
|
356
|
+
}
|
357
|
+
l.done
|
358
|
+
# get company domains from the indexes
|
359
|
+
l.logs ("Searching index domains... ")
|
360
|
+
domains = BlackStack::Appending.find_persons_by_company(cname).map { |res|
|
361
|
+
res.company_domains
|
362
|
+
}.flatten.reject { |email|
|
363
|
+
email.to_s.empty?
|
364
|
+
}.map { |domain|
|
365
|
+
# normalize domain
|
366
|
+
domain.to_s.gsub('www.', '').downcase
|
367
|
+
}.uniq
|
368
|
+
l.done
|
369
|
+
# verify all the emails found in the indexes
|
370
|
+
l.logs ("Verifying index emails... ")
|
371
|
+
emails.each { |email|
|
372
|
+
l.logs "Verifying #{email}... "
|
373
|
+
domain = email.split('@').last
|
374
|
+
verified_emails << email if BlackStack::Appending.verify(email) && !BlackStack::Appending.catch_all?(domain)
|
375
|
+
l.done
|
376
|
+
}
|
377
|
+
l.done
|
378
|
+
# appending with domains found in the indexes
|
379
|
+
l.logs ("Appending with domains... ")
|
380
|
+
domains.each { |domain|
|
381
|
+
l.logs "Appending with #{domain}... "
|
382
|
+
verified_emails += BlackStack::Appending.append(fname, lname, domain)
|
383
|
+
l.done
|
384
|
+
}
|
385
|
+
l.done
|
386
|
+
# return
|
387
|
+
verified_emails.uniq
|
388
|
+
end
|
389
|
+
|
390
|
+
#
|
391
|
+
class Result
|
392
|
+
# array of values.
|
393
|
+
# first 3 values are index name, key and row-number.
|
394
|
+
attr_accessor :match
|
395
|
+
|
396
|
+
def initialize(a)
|
397
|
+
self.match = a
|
398
|
+
end
|
399
|
+
|
400
|
+
# From a given match (with the name of its index in the first position), get the value of a field by its name.
|
401
|
+
def value(field)
|
402
|
+
# get the index_name
|
403
|
+
index_name = match[0]
|
404
|
+
# get the index descriptor
|
405
|
+
index = BlackStack::CSVIndexer.indexes.select { |i| i.name == index_name }.first
|
406
|
+
# get position of the field into the hash descriptior
|
407
|
+
k = index.mapping.to_a.map { |m| m[0].to_s }.index(field.to_s)
|
408
|
+
# return nil if the field is not found
|
409
|
+
return nil if k.nil?
|
410
|
+
# get the field value
|
411
|
+
match[k+3].to_s
|
412
|
+
end
|
413
|
+
|
414
|
+
# Call value() method.
|
415
|
+
def val(field)
|
416
|
+
self.value(field)
|
417
|
+
end
|
418
|
+
|
419
|
+
# From a given match (with the name of its index in the first position), get the email addresses.
|
420
|
+
def emails()
|
421
|
+
keys = BlackStack::Appending.email_fields
|
422
|
+
ret = []
|
423
|
+
keys.each { |k|
|
424
|
+
v = self.value(k)
|
425
|
+
ret << v if v
|
426
|
+
}
|
427
|
+
ret
|
428
|
+
end
|
429
|
+
|
430
|
+
# From a given match (with the name of its index in the first position), get the phone numbers.
|
431
|
+
def phones()
|
432
|
+
keys = BlackStack::Appending.phone_fields
|
433
|
+
ret = []
|
434
|
+
keys.each { |k|
|
435
|
+
v = self.value(k)
|
436
|
+
ret << v if v
|
437
|
+
}
|
438
|
+
ret
|
439
|
+
end
|
440
|
+
|
441
|
+
# From a given match (with the name of its index in the first position), get the company domains.
|
442
|
+
def company_domains()
|
443
|
+
keys = BlackStack::Appending.company_domain_fields
|
444
|
+
ret = []
|
445
|
+
keys.each { |k|
|
446
|
+
v = self.value(k)
|
447
|
+
ret << v if v
|
448
|
+
}
|
449
|
+
ret
|
450
|
+
end
|
451
|
+
|
452
|
+
end # class Result
|
453
|
+
|
454
|
+
end # Appending
|
455
|
+
end # BlackStack
|
metadata
CHANGED
@@ -1,15 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: appending
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0
|
4
|
+
version: '1.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leandro Daniel Sardi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csv
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 3.2.2
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 3.2.2
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 3.2.2
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 3.2.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: email_verifier
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.1.0
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.1.0
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.1.0
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.1.0
|
13
53
|
- !ruby/object:Gem::Dependency
|
14
54
|
name: blackstack-core
|
15
55
|
requirement: !ruby/object:Gem::Requirement
|
@@ -136,8 +176,8 @@ executables: []
|
|
136
176
|
extensions: []
|
137
177
|
extra_rdoc_files: []
|
138
178
|
files:
|
139
|
-
- appending.rb
|
140
|
-
homepage: https://rubygems.org/gems/
|
179
|
+
- lib/appending.rb
|
180
|
+
homepage: https://rubygems.org/gems/appending
|
141
181
|
licenses:
|
142
182
|
- MIT
|
143
183
|
metadata: {}
|
data/appending.rb
DELETED
@@ -1,141 +0,0 @@
|
|
1
|
-
require 'csv'
|
2
|
-
require 'email_verifier'
|
3
|
-
require 'nokogiri'
|
4
|
-
|
5
|
-
puts "Appending is in development. Please check back later."
|
6
|
-
|
7
|
-
=begin
|
8
|
-
module BlackStack
|
9
|
-
module Appending
|
10
|
-
# This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
|
11
|
-
module Parser
|
12
|
-
# parse search results pages from sales navigator, and save the company name and full name into a CSV file
|
13
|
-
def self.parse_sales_navigator_result_pages(search_name, l=nil)
|
14
|
-
# create logger if not passed
|
15
|
-
l = BlackStack::DummyLogger.new(nil) if l.nil?
|
16
|
-
# define output filename
|
17
|
-
output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
|
18
|
-
raise 'Output file already exists.' if File.exists?(output_file)
|
19
|
-
output = File.open(output_file, 'w')
|
20
|
-
# parse
|
21
|
-
i = 0
|
22
|
-
source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
|
23
|
-
Dir.glob(source).each do |file|
|
24
|
-
doc = Nokogiri::HTML(open(file))
|
25
|
-
lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
|
26
|
-
lis.each { |li|
|
27
|
-
i += 1
|
28
|
-
doc2 = Nokogiri::HTML(li.inner_html)
|
29
|
-
# this is where to find the full name of the lead
|
30
|
-
n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
|
31
|
-
# this is where to find the name of the company, when it has a link to a linkedin company page
|
32
|
-
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
|
33
|
-
# this is where to find the name of the company, when it has not a link to a linkedin company page
|
34
|
-
company_name = nil
|
35
|
-
if n2
|
36
|
-
company_name = n2.text
|
37
|
-
else
|
38
|
-
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
|
39
|
-
if n2
|
40
|
-
company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
|
41
|
-
end
|
42
|
-
end
|
43
|
-
# add the information to the output file
|
44
|
-
line = []
|
45
|
-
line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
|
46
|
-
line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
|
47
|
-
l.logs "#{i.to_s}, #{line.join(',')}... "
|
48
|
-
output.puts line.join(',')
|
49
|
-
output.flush
|
50
|
-
l.done
|
51
|
-
}
|
52
|
-
end
|
53
|
-
# close output file
|
54
|
-
output.close
|
55
|
-
end # def self.parse_sales_navigator_result_pages(search_name)
|
56
|
-
end # module Parser
|
57
|
-
|
58
|
-
# return true if the domain get any random address as valid
|
59
|
-
def self.catch_all?(domain)
|
60
|
-
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
61
|
-
end
|
62
|
-
|
63
|
-
# verify an email address using the AWS IP address of our website, wich is more reliable
|
64
|
-
def self.verify(email)
|
65
|
-
url = "https://connectionsphere.com/api1.0/emails/verify.json"
|
66
|
-
params = {
|
67
|
-
:email => email,
|
68
|
-
}
|
69
|
-
res = BlackStack::Netting::call_get(url, params)
|
70
|
-
parsed = JSON.parse(res.body)
|
71
|
-
parsed['status'] == 'success'
|
72
|
-
end
|
73
|
-
|
74
|
-
# verify an email address
|
75
|
-
def self.append(fname, lname, domain)
|
76
|
-
ret = []
|
77
|
-
if !catch_all?(domain)
|
78
|
-
EmailVerifier.config do |config|
|
79
|
-
config.verifier_email = "leandro.sardi@expandedventure.com"
|
80
|
-
end
|
81
|
-
[
|
82
|
-
"#{fname}@#{domain}",
|
83
|
-
"#{lname}@#{domain}",
|
84
|
-
|
85
|
-
"#{fname}.#{lname}@#{domain}",
|
86
|
-
"#{lname}.#{fname}@#{domain}",
|
87
|
-
|
88
|
-
"#{fname}#{lname}@#{domain}",
|
89
|
-
"#{lname}#{fname}@#{domain}",
|
90
|
-
|
91
|
-
"#{fname[0]}#{lname}@#{domain}",
|
92
|
-
"#{fname[0]}.#{lname}@#{domain}",
|
93
|
-
].each { |email|
|
94
|
-
ret << email.downcase if verify(email)
|
95
|
-
}
|
96
|
-
end
|
97
|
-
ret
|
98
|
-
end
|
99
|
-
|
100
|
-
def self.cleanup_fname(name)
|
101
|
-
return '' if name.nil?
|
102
|
-
a = name.split(/[^a-zA-Z]/)
|
103
|
-
a.size > 0 ? a[0] : ''
|
104
|
-
end
|
105
|
-
|
106
|
-
def self.cleanup_lname(name)
|
107
|
-
return '' if name.nil?
|
108
|
-
a = name.split(/[^a-zA-Z]/)
|
109
|
-
a.size > 1 ? a[1] : ''
|
110
|
-
end
|
111
|
-
|
112
|
-
def self.cleanup_company(company)
|
113
|
-
return '' if company.nil?
|
114
|
-
ret = ''
|
115
|
-
# stage 1: remove company-type suffixes
|
116
|
-
company = company.split(/ at /).last
|
117
|
-
company.gsub!(/LLC/, '')
|
118
|
-
company.gsub!(/Inc/, '')
|
119
|
-
company.strip! # remove leading and trailing spaces
|
120
|
-
# stage 2: remove LinkedIn suffixes
|
121
|
-
company.gsub!(/\(\d\d\d\d - Present\)/, '')
|
122
|
-
company.strip! # remove leading and trailing spaces
|
123
|
-
# stege 3: remove non-alphanumeric characters
|
124
|
-
company.gsub!(/\.$/, '')
|
125
|
-
company.gsub!(/\,$/, '')
|
126
|
-
# stege 4: remove extra spaces
|
127
|
-
company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
|
128
|
-
company.strip! # remove leading and trailing spaces
|
129
|
-
# stage 5: choose the first part of the company name
|
130
|
-
company.split(' ').each { |word|
|
131
|
-
ret += word + ' '
|
132
|
-
#break if word.size >= 5 || ret.split(' ').size > 2
|
133
|
-
break if ret.split(' ').size > 2
|
134
|
-
}
|
135
|
-
ret.strip!
|
136
|
-
# return
|
137
|
-
ret
|
138
|
-
end
|
139
|
-
end # Appending
|
140
|
-
end # BlackStack
|
141
|
-
=end
|