appending 0.3 → 1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/appending.rb +367 -47
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: af430bc2a0068c4071ded22bc951e173e97fe4a6379b97bcdd0dbb2806eca428
|
4
|
+
data.tar.gz: b70f6eddb447ebafc4d52a9abbec52074a6c3b265bb7ad0dfc6e097a4eba4077
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5434061f4fae7f2fa1279e931b884cf73189a28e7b6c401d3adfb6af39ed27559649dd92cb0bcb7854eae3a074524dc9a0d55fb375868b91dc2ec31d2fc97356
|
7
|
+
data.tar.gz: 7b854b912585e614b44734af2969de6d97050e1265173eca39fab1395208db1bbbe0ecd2bdf5512c2b89f6ff155fc4184c21390f44c83898d50c89b54f94b686
|
data/lib/appending.rb
CHANGED
@@ -1,63 +1,185 @@
|
|
1
1
|
require 'csv'
|
2
|
+
require 'email_verifier'
|
3
|
+
require 'csv-indexer'
|
4
|
+
require 'simple_cloud_logging'
|
2
5
|
|
3
6
|
module BlackStack
|
4
7
|
module Appending
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
8
|
+
@@logger = nil
|
9
|
+
@@report = nil
|
10
|
+
@@indexes = []
|
11
|
+
@@verifier_url = 'https://connectionsphere.com/api1.0/emails/verify.json'
|
12
|
+
@@verifier_api_key = nil
|
13
|
+
@@email_fields = []
|
14
|
+
@@phone_fields = []
|
15
|
+
@@company_domain_fields = []
|
16
|
+
|
17
|
+
## @@logger
|
18
|
+
def self.set_logger(logger)
|
19
|
+
@@logger = logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.logger
|
23
|
+
@@logger
|
24
|
+
end
|
25
|
+
|
26
|
+
## @@indexes
|
27
|
+
def self.add_index(index)
|
28
|
+
expected = [:company_name, :first_name, :last_name]
|
29
|
+
|
30
|
+
# validation: keys must be `[:company_name, :first_name, :last_name]`
|
31
|
+
if !index.keys.eql?(expected)
|
32
|
+
raise "Invalid index: #{index.keys}. Expected: #{expected}."
|
33
|
+
end
|
34
|
+
# add the index
|
35
|
+
@@indexes << index
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.set_indexes(indexes)
|
39
|
+
@@indexes = indexes
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.indexes
|
43
|
+
@@indexes
|
44
|
+
end
|
45
|
+
|
46
|
+
# @@report
|
47
|
+
def self.report
|
48
|
+
@@report
|
49
|
+
end
|
50
|
+
|
51
|
+
# @@verifier_url
|
52
|
+
def self.set_verifier_url(url)
|
53
|
+
@@verifier_url = url
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.verifier_url
|
57
|
+
@@verifier_url
|
58
|
+
end
|
59
|
+
|
60
|
+
# @@verifier_api_key
|
61
|
+
def self.set_verifier_api_key(key)
|
62
|
+
@@verifier_api_key = key
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.verifier_api_key
|
66
|
+
@@verifier_api_key
|
67
|
+
end
|
68
|
+
|
69
|
+
# @@email_fields
|
70
|
+
def self.set_email_fields(fields)
|
71
|
+
@@email_fields = fields
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.email_fields
|
75
|
+
@@email_fields
|
76
|
+
end
|
77
|
+
|
78
|
+
# @@phone_fields
|
79
|
+
def self.set_phone_fields(fields)
|
80
|
+
@@phone_fields = fields
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.phone_fields
|
84
|
+
@@phone_fields
|
85
|
+
end
|
86
|
+
|
87
|
+
# @@company_domain_fields
|
88
|
+
def self.set_company_fields(fields)
|
89
|
+
@@company_domain_fields = fields
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.company_domain_fields
|
93
|
+
@@company_domain_fields
|
94
|
+
end
|
95
|
+
|
96
|
+
# set configuration
|
97
|
+
def self.set(h)
|
98
|
+
errors = []
|
99
|
+
|
100
|
+
# validation: if :indexes is present, it must be an array of objects BlackStack::CSVIndexer::Index
|
101
|
+
if h[:indexes]
|
102
|
+
if !h[:indexes].is_a?(Array)
|
103
|
+
errors << "Invalid :indexes: #{h[:indexes].class}. Expected: Array."
|
104
|
+
else
|
105
|
+
h[:indexes].each { |index|
|
106
|
+
if !index.is_a?(BlackStack::CSVIndexer::Index)
|
107
|
+
errors << "Invalid :indexes: #{index.class}. Expected: BlackStack::CSVIndexer::Index."
|
37
108
|
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# validation: if :verifier_url is present, it must be a string
|
114
|
+
errors << ":verifier_url must be a string." if h[:verifier_url] && !h[:verifier_url].is_a?(String)
|
115
|
+
|
116
|
+
# validation: if :verifier_api_key is present, it must be a string
|
117
|
+
errors << ":verifier_api_key must be a string." if h[:verifier_api_key] && !h[:verifier_api_key].is_a?(String)
|
118
|
+
|
119
|
+
# validation: if :email_fields is present, it must be an array of strings
|
120
|
+
if h[:email_fields]
|
121
|
+
if !h[:email_fields].is_a?(Array)
|
122
|
+
errors << "Invalid :email_fields: #{h[:email_fields].class}. Expected: Array."
|
123
|
+
else
|
124
|
+
h[:email_fields].each { |field|
|
125
|
+
if !field.is_a?(String)
|
126
|
+
errors << "Invalid :email_fields: #{field.class}. Expected: String."
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# validation: if :phone_fields is present, it must be an array of strings
|
133
|
+
if h[:phone_fields]
|
134
|
+
if !h[:phone_fields].is_a?(Array)
|
135
|
+
errors << "Invalid :phone_fields: #{h[:phone_fields].class}. Expected: Array."
|
136
|
+
else
|
137
|
+
h[:phone_fields].each { |field|
|
138
|
+
if !field.is_a?(String)
|
139
|
+
errors << "Invalid :phone_fields: #{field.class}. Expected: String."
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# validation: if :company_domain_fields is present, it must be an array of strings
|
146
|
+
if h[:company_domain_fields]
|
147
|
+
if !h[:company_domain_fields].is_a?(Array)
|
148
|
+
errors << "Invalid :company_domain_fields: #{h[:company_domain_fields].class}. Expected: Array."
|
149
|
+
else
|
150
|
+
h[:company_domain_fields].each { |field|
|
151
|
+
if !field.is_a?(String)
|
152
|
+
errors << "Invalid :company_domain_fields: #{field.class}. Expected: String."
|
153
|
+
end
|
154
|
+
}
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# mapping
|
159
|
+
@@indexes = h[:indexes] if h[:indexes]
|
160
|
+
@@verifier_url = h[:verifier_url] if h[:verifier_url]
|
161
|
+
@@verifier_api_key = h[:verifier_api_key] if h[:verifier_api_key]
|
162
|
+
@@email_fields = h[:email_fields] if h[:email_fields]
|
163
|
+
@@phone_fields = h[:phone_fields] if h[:phone_fields]
|
164
|
+
@@company_domain_fields = h[:company_domain_fields] if h[:company_domain_fields]
|
165
|
+
end
|
52
166
|
|
53
167
|
# return true if the domain get any random address as valid
|
168
|
+
#
|
169
|
+
# This is a support method for the `append` methods.
|
170
|
+
# The end-user should not call this method directly.
|
171
|
+
#
|
54
172
|
def self.catch_all?(domain)
|
55
173
|
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
56
174
|
end
|
57
175
|
|
58
176
|
# verify an email address using the AWS IP address of our website, wich is more reliable
|
177
|
+
#
|
178
|
+
# This is a support method for the `append` methods.
|
179
|
+
# The end-user should not call this method directly.
|
180
|
+
#
|
59
181
|
def self.verify(email)
|
60
|
-
url =
|
182
|
+
url = @@verifier_url
|
61
183
|
params = {
|
62
184
|
:email => email,
|
63
185
|
}
|
@@ -92,18 +214,24 @@ module BlackStack
|
|
92
214
|
ret
|
93
215
|
end
|
94
216
|
|
217
|
+
# This is a support method for the `append` methods.
|
218
|
+
# The end-user should not call this method directly.
|
95
219
|
def self.cleanup_fname(name)
|
96
220
|
return '' if name.nil?
|
97
221
|
a = name.split(/[^a-zA-Z]/)
|
98
222
|
a.size > 0 ? a[0] : ''
|
99
223
|
end
|
100
224
|
|
225
|
+
# This is a support method for the `append` methods.
|
226
|
+
# The end-user should not call this method directly.
|
101
227
|
def self.cleanup_lname(name)
|
102
228
|
return '' if name.nil?
|
103
229
|
a = name.split(/[^a-zA-Z]/)
|
104
230
|
a.size > 1 ? a[1] : ''
|
105
231
|
end
|
106
232
|
|
233
|
+
# This is a support method for the `append` methods.
|
234
|
+
# The end-user should not call this method directly.
|
107
235
|
def self.cleanup_company(company)
|
108
236
|
return '' if company.nil?
|
109
237
|
ret = ''
|
@@ -131,5 +259,197 @@ module BlackStack
|
|
131
259
|
# return
|
132
260
|
ret
|
133
261
|
end
|
262
|
+
|
263
|
+
# Find a person in the indexes by its full name and company name.
|
264
|
+
# Append all the information in the index row.
|
265
|
+
def self.find_persons_with_full_name(name, cname)
|
266
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
267
|
+
|
268
|
+
l.logs "Guessing fname from #{name}... "
|
269
|
+
fname = BlackStack::Appending::cleanup_fname(name)
|
270
|
+
l.logf fname
|
271
|
+
|
272
|
+
l.logs "Guessing lname from #{name}... "
|
273
|
+
lname = BlackStack::Appending::cleanup_lname(name)
|
274
|
+
l.logf lname
|
275
|
+
|
276
|
+
BlackStack::Appending.find_persons(fname, lname, cname)
|
277
|
+
end
|
278
|
+
|
279
|
+
# Find a person in the indexes by its first name, last name and company name.
|
280
|
+
# Append all the information in the index row.
|
281
|
+
def self.find_persons(fname, lname, cname)
|
282
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
283
|
+
h = {
|
284
|
+
:matches => [],
|
285
|
+
:enlapsed_seconds => 0,
|
286
|
+
:files_processed => 0,
|
287
|
+
}
|
288
|
+
# cleaning up company name
|
289
|
+
l.logs "Cleaning up company name #{cname}... "
|
290
|
+
cname = BlackStack::Appending::cleanup_company(cname)
|
291
|
+
l.logf cname
|
292
|
+
# looking for a record that matches with first name, last name and company name
|
293
|
+
appends = []
|
294
|
+
enlapsed_seconds = 0
|
295
|
+
files_processed = 0
|
296
|
+
BlackStack::Appending.indexes.each { |i|
|
297
|
+
l.logs "Searching into #{i.name}... "
|
298
|
+
ret = i.find([cname, fname, lname], false, nil)
|
299
|
+
# add the name of the index in the last position of the match
|
300
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
301
|
+
# add matches to the list
|
302
|
+
h[:matches] += ret[:matches]
|
303
|
+
# sum the total files and the total enlapsed seconds
|
304
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
305
|
+
h[:files_processed] += ret[:files_processed]
|
306
|
+
l.done
|
307
|
+
}
|
308
|
+
# update report
|
309
|
+
@@report = h
|
310
|
+
# return results
|
311
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
312
|
+
end
|
313
|
+
|
314
|
+
# Find a company in the indexes by its first name, last name and company name.
|
315
|
+
# Append all the information in the index row.
|
316
|
+
def self.find_persons_by_company(cname)
|
317
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
318
|
+
h = {
|
319
|
+
:matches => [],
|
320
|
+
:enlapsed_seconds => 0,
|
321
|
+
:files_processed => 0,
|
322
|
+
}
|
323
|
+
# looking for a record that matches with first name, last name and company name
|
324
|
+
appends = []
|
325
|
+
enlapsed_seconds = 0
|
326
|
+
files_processed = 0
|
327
|
+
BlackStack::Appending.indexes.each { |i|
|
328
|
+
l.logs "Searching into #{i.name}... "
|
329
|
+
ret = i.find([cname], true, nil)
|
330
|
+
# add the name of the index in the last position of the match
|
331
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
332
|
+
# add matches to the list
|
333
|
+
h[:matches] += ret[:matches]
|
334
|
+
# sum the total files and the total enlapsed seconds
|
335
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
336
|
+
h[:files_processed] += ret[:files_processed]
|
337
|
+
l.done
|
338
|
+
}
|
339
|
+
# update report
|
340
|
+
@@report = h
|
341
|
+
# return results
|
342
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
343
|
+
end
|
344
|
+
|
345
|
+
def self.find_verified_emails(fname, lname, cname)
|
346
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
347
|
+
emails = []
|
348
|
+
domains = []
|
349
|
+
verified_emails = []
|
350
|
+
# get lead emails from in the indexes
|
351
|
+
l.logs ("Searching index emails... ")
|
352
|
+
emails = BlackStack::Appending.find_persons(fname, lname, cname).map { |res|
|
353
|
+
res.emails
|
354
|
+
}.flatten.uniq.reject { |email|
|
355
|
+
email.to_s.empty?
|
356
|
+
}
|
357
|
+
l.done
|
358
|
+
# get company domains from the indexes
|
359
|
+
l.logs ("Searching index domains... ")
|
360
|
+
domains = BlackStack::Appending.find_persons_by_company(cname).map { |res|
|
361
|
+
res.company_domains
|
362
|
+
}.flatten.reject { |email|
|
363
|
+
email.to_s.empty?
|
364
|
+
}.map { |domain|
|
365
|
+
# normalize domain
|
366
|
+
domain.to_s.gsub('www.', '').downcase
|
367
|
+
}.uniq
|
368
|
+
l.done
|
369
|
+
# verify all the emails found in the indexes
|
370
|
+
l.logs ("Verifying index emails... ")
|
371
|
+
emails.each { |email|
|
372
|
+
l.logs "Verifying #{email}... "
|
373
|
+
domain = email.split('@').last
|
374
|
+
verified_emails << email if BlackStack::Appending.verify(email) && !BlackStack::Appending.catch_all?(domain)
|
375
|
+
l.done
|
376
|
+
}
|
377
|
+
l.done
|
378
|
+
# appending with domains found in the indexes
|
379
|
+
l.logs ("Appending with domains... ")
|
380
|
+
domains.each { |domain|
|
381
|
+
l.logs "Appending with #{domain}... "
|
382
|
+
verified_emails += BlackStack::Appending.append(fname, lname, domain)
|
383
|
+
l.done
|
384
|
+
}
|
385
|
+
l.done
|
386
|
+
# return
|
387
|
+
verified_emails.uniq
|
388
|
+
end
|
389
|
+
|
390
|
+
#
|
391
|
+
class Result
|
392
|
+
# array of values.
|
393
|
+
# first 3 values are index name, key and row-number.
|
394
|
+
attr_accessor :match
|
395
|
+
|
396
|
+
def initialize(a)
|
397
|
+
self.match = a
|
398
|
+
end
|
399
|
+
|
400
|
+
# From a given match (with the name of its index in the first position), get the value of a field by its name.
|
401
|
+
def value(field)
|
402
|
+
# get the index_name
|
403
|
+
index_name = match[0]
|
404
|
+
# get the index descriptor
|
405
|
+
index = BlackStack::CSVIndexer.indexes.select { |i| i.name == index_name }.first
|
406
|
+
# get position of the field into the hash descriptior
|
407
|
+
k = index.mapping.to_a.map { |m| m[0].to_s }.index(field.to_s)
|
408
|
+
# return nil if the field is not found
|
409
|
+
return nil if k.nil?
|
410
|
+
# get the field value
|
411
|
+
match[k+3].to_s
|
412
|
+
end
|
413
|
+
|
414
|
+
# Call value() method.
|
415
|
+
def val(field)
|
416
|
+
self.value(field)
|
417
|
+
end
|
418
|
+
|
419
|
+
# From a given match (with the name of its index in the first position), get the email addresses.
|
420
|
+
def emails()
|
421
|
+
keys = BlackStack::Appending.email_fields
|
422
|
+
ret = []
|
423
|
+
keys.each { |k|
|
424
|
+
v = self.value(k)
|
425
|
+
ret << v if v
|
426
|
+
}
|
427
|
+
ret
|
428
|
+
end
|
429
|
+
|
430
|
+
# From a given match (with the name of its index in the first position), get the phone numbers.
|
431
|
+
def phones()
|
432
|
+
keys = BlackStack::Appending.phone_fields
|
433
|
+
ret = []
|
434
|
+
keys.each { |k|
|
435
|
+
v = self.value(k)
|
436
|
+
ret << v if v
|
437
|
+
}
|
438
|
+
ret
|
439
|
+
end
|
440
|
+
|
441
|
+
# From a given match (with the name of its index in the first position), get the company domains.
|
442
|
+
def company_domains()
|
443
|
+
keys = BlackStack::Appending.company_domain_fields
|
444
|
+
ret = []
|
445
|
+
keys.each { |k|
|
446
|
+
v = self.value(k)
|
447
|
+
ret << v if v
|
448
|
+
}
|
449
|
+
ret
|
450
|
+
end
|
451
|
+
|
452
|
+
end # class Result
|
453
|
+
|
134
454
|
end # Appending
|
135
455
|
end # BlackStack
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: appending
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '
|
4
|
+
version: '1.1'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leandro Daniel Sardi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|
@@ -30,6 +30,26 @@ dependencies:
|
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 3.2.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: email_verifier
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.1.0
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.1.0
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.1.0
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.1.0
|
33
53
|
- !ruby/object:Gem::Dependency
|
34
54
|
name: blackstack-core
|
35
55
|
requirement: !ruby/object:Gem::Requirement
|