appending 0.3 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/appending.rb +367 -47
- metadata +22 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67bdb77eef8558d6e35c4b4459f6cb9fec576cd1b0fb139b521b42c5314d8471
|
4
|
+
data.tar.gz: ba9f19e7c87466eabf297bc3e1f685bf13ada1ad18cfd2d346a2b22e7edfb181
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92635a6cb6d3117e32c58b5cc372dba8f5d161655cb7d60af3f3ed3200b2bd5f948e6586238a211f62ba5dae7d26e7f06c6a9182d73c0b5a0f000c710f554933
|
7
|
+
data.tar.gz: 2cf1e90ca32603cdb498107016b887b251f5118522641cb6f9ef1c8fec811a0033650acd0a4511164d4672cd80d03fce317005663ed20a9f1c6f701c059a94a3
|
data/lib/appending.rb
CHANGED
@@ -1,63 +1,185 @@
|
|
1
1
|
require 'csv'
|
2
|
+
require 'email_verifier'
|
3
|
+
require 'csv-indexer'
|
4
|
+
require 'simple_cloud_logging'
|
2
5
|
|
3
6
|
module BlackStack
|
4
7
|
module Appending
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
8
|
+
@@logger = nil
|
9
|
+
@@report = nil
|
10
|
+
@@indexes = []
|
11
|
+
@@verifier_url = 'https://connectionsphere.com/api1.0/emails/verify.json'
|
12
|
+
@@verifier_api_key = nil
|
13
|
+
@@email_fields = []
|
14
|
+
@@phone_fields = []
|
15
|
+
@@company_domain_fields = []
|
16
|
+
|
17
|
+
## @@logger
|
18
|
+
def self.set_logger(logger)
|
19
|
+
@@logger = logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.logger
|
23
|
+
@@logger
|
24
|
+
end
|
25
|
+
|
26
|
+
## @@indexes
|
27
|
+
def self.add_index(index)
|
28
|
+
expected = [:company_name, :first_name, :last_name]
|
29
|
+
|
30
|
+
# validation: keys must be `[:company_name, :first_name, :last_name]`
|
31
|
+
if !index.keys.eql?(expected)
|
32
|
+
raise "Invalid index: #{index.keys}. Expected: #{expected}."
|
33
|
+
end
|
34
|
+
# add the index
|
35
|
+
@@indexes << index
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.set_indexes(indexes)
|
39
|
+
@@indexes = indexes
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.indexes
|
43
|
+
@@indexes
|
44
|
+
end
|
45
|
+
|
46
|
+
# @@report
|
47
|
+
def self.report
|
48
|
+
@@report
|
49
|
+
end
|
50
|
+
|
51
|
+
# @@verifier_url
|
52
|
+
def self.set_verifier_url(url)
|
53
|
+
@@verifier_url = url
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.verifier_url
|
57
|
+
@@verifier_url
|
58
|
+
end
|
59
|
+
|
60
|
+
# @@verifier_api_key
|
61
|
+
def self.set_verifier_api_key(key)
|
62
|
+
@@verifier_api_key = key
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.verifier_api_key
|
66
|
+
@@verifier_api_key
|
67
|
+
end
|
68
|
+
|
69
|
+
# @@email_fields
|
70
|
+
def self.set_email_fields(fields)
|
71
|
+
@@email_fields = fields
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.email_fields
|
75
|
+
@@email_fields
|
76
|
+
end
|
77
|
+
|
78
|
+
# @@phone_fields
|
79
|
+
def self.set_phone_fields(fields)
|
80
|
+
@@phone_fields = fields
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.phone_fields
|
84
|
+
@@phone_fields
|
85
|
+
end
|
86
|
+
|
87
|
+
# @@company_domain_fields
|
88
|
+
def self.set_company_fields(fields)
|
89
|
+
@@company_domain_fields = fields
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.company_domain_fields
|
93
|
+
@@company_domain_fields
|
94
|
+
end
|
95
|
+
|
96
|
+
# set configuration
|
97
|
+
def self.set(h)
|
98
|
+
errors = []
|
99
|
+
|
100
|
+
# validation: if :indexes is present, it must be an array of objects BlackStack::CSVIndexer::Index
|
101
|
+
if h[:indexes]
|
102
|
+
if !h[:indexes].is_a?(Array)
|
103
|
+
errors << "Invalid :indexes: #{h[:indexes].class}. Expected: Array."
|
104
|
+
else
|
105
|
+
h[:indexes].each { |index|
|
106
|
+
if !index.is_a?(BlackStack::CSVIndexer::Index)
|
107
|
+
errors << "Invalid :indexes: #{index.class}. Expected: BlackStack::CSVIndexer::Index."
|
37
108
|
end
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
109
|
+
}
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# validation: if :verifier_url is present, it must be a string
|
114
|
+
errors << ":verifier_url must be a string." if h[:verifier_url] && !h[:verifier_url].is_a?(String)
|
115
|
+
|
116
|
+
# validation: if :verifier_api_key is present, it must be a string
|
117
|
+
errors << ":verifier_api_key must be a string." if h[:verifier_api_key] && !h[:verifier_api_key].is_a?(String)
|
118
|
+
|
119
|
+
# validation: if :email_fields is present, it must be an array of strings
|
120
|
+
if h[:email_fields]
|
121
|
+
if !h[:email_fields].is_a?(Array)
|
122
|
+
errors << "Invalid :email_fields: #{h[:email_fields].class}. Expected: Array."
|
123
|
+
else
|
124
|
+
h[:email_fields].each { |field|
|
125
|
+
if !field.is_a?(String)
|
126
|
+
errors << "Invalid :email_fields: #{field.class}. Expected: String."
|
127
|
+
end
|
128
|
+
}
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# validation: if :phone_fields is present, it must be an array of strings
|
133
|
+
if h[:phone_fields]
|
134
|
+
if !h[:phone_fields].is_a?(Array)
|
135
|
+
errors << "Invalid :phone_fields: #{h[:phone_fields].class}. Expected: Array."
|
136
|
+
else
|
137
|
+
h[:phone_fields].each { |field|
|
138
|
+
if !field.is_a?(String)
|
139
|
+
errors << "Invalid :phone_fields: #{field.class}. Expected: String."
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# validation: if :company_domain_fields is present, it must be an array of strings
|
146
|
+
if h[:company_domain_fields]
|
147
|
+
if !h[:company_domain_fields].is_a?(Array)
|
148
|
+
errors << "Invalid :company_domain_fields: #{h[:company_domain_fields].class}. Expected: Array."
|
149
|
+
else
|
150
|
+
h[:company_domain_fields].each { |field|
|
151
|
+
if !field.is_a?(String)
|
152
|
+
errors << "Invalid :company_domain_fields: #{field.class}. Expected: String."
|
153
|
+
end
|
154
|
+
}
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
# mapping
|
159
|
+
@@indexes = h[:indexes] if h[:indexes]
|
160
|
+
@@verifier_url = h[:verifier_url] if h[:verifier_url]
|
161
|
+
@@verifier_api_key = h[:verifier_api_key] if h[:verifier_api_key]
|
162
|
+
@@email_fields = h[:email_fields] if h[:email_fields]
|
163
|
+
@@phone_fields = h[:phone_fields] if h[:phone_fields]
|
164
|
+
@@company_domain_fields = h[:company_domain_fields] if h[:company_domain_fields]
|
165
|
+
end
|
52
166
|
|
53
167
|
# return true if the domain get any random address as valid
|
168
|
+
#
|
169
|
+
# This is a support method for the `append` methods.
|
170
|
+
# The end-user should not call this method directly.
|
171
|
+
#
|
54
172
|
def self.catch_all?(domain)
|
55
173
|
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
56
174
|
end
|
57
175
|
|
58
176
|
# verify an email address using the AWS IP address of our website, wich is more reliable
|
177
|
+
#
|
178
|
+
# This is a support method for the `append` methods.
|
179
|
+
# The end-user should not call this method directly.
|
180
|
+
#
|
59
181
|
def self.verify(email)
|
60
|
-
url =
|
182
|
+
url = @@verifier_url
|
61
183
|
params = {
|
62
184
|
:email => email,
|
63
185
|
}
|
@@ -92,18 +214,24 @@ module BlackStack
|
|
92
214
|
ret
|
93
215
|
end
|
94
216
|
|
217
|
+
# This is a support method for the `append` methods.
|
218
|
+
# The end-user should not call this method directly.
|
95
219
|
def self.cleanup_fname(name)
|
96
220
|
return '' if name.nil?
|
97
221
|
a = name.split(/[^a-zA-Z]/)
|
98
222
|
a.size > 0 ? a[0] : ''
|
99
223
|
end
|
100
224
|
|
225
|
+
# This is a support method for the `append` methods.
|
226
|
+
# The end-user should not call this method directly.
|
101
227
|
def self.cleanup_lname(name)
|
102
228
|
return '' if name.nil?
|
103
229
|
a = name.split(/[^a-zA-Z]/)
|
104
230
|
a.size > 1 ? a[1] : ''
|
105
231
|
end
|
106
232
|
|
233
|
+
# This is a support method for the `append` methods.
|
234
|
+
# The end-user should not call this method directly.
|
107
235
|
def self.cleanup_company(company)
|
108
236
|
return '' if company.nil?
|
109
237
|
ret = ''
|
@@ -131,5 +259,197 @@ module BlackStack
|
|
131
259
|
# return
|
132
260
|
ret
|
133
261
|
end
|
262
|
+
|
263
|
+
# Find a person in the indexes by its full name and company name.
|
264
|
+
# Append all the information in the index row.
|
265
|
+
def self.find_persons_with_full_name(name, cname)
|
266
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
267
|
+
|
268
|
+
l.logs "Guessing fname from #{name}... "
|
269
|
+
fname = BlackStack::Appending::cleanup_fname(name)
|
270
|
+
l.logf fname
|
271
|
+
|
272
|
+
l.logs "Guessing lname from #{name}... "
|
273
|
+
lname = BlackStack::Appending::cleanup_lname(name)
|
274
|
+
l.logf lname
|
275
|
+
|
276
|
+
BlackStack::Appending.find_persons(fname, lname, cname)
|
277
|
+
end
|
278
|
+
|
279
|
+
# Find a person in the indexes by its first name, last name and company name.
|
280
|
+
# Append all the information in the index row.
|
281
|
+
def self.find_persons(fname, lname, cname)
|
282
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
283
|
+
h = {
|
284
|
+
:matches => [],
|
285
|
+
:enlapsed_seconds => 0,
|
286
|
+
:files_processed => 0,
|
287
|
+
}
|
288
|
+
# cleaning up company name
|
289
|
+
l.logs "Cleaning up company name #{cname}... "
|
290
|
+
cname = BlackStack::Appending::cleanup_company(cname)
|
291
|
+
l.logf cname
|
292
|
+
# looking for a record that matches with first name, last name and company name
|
293
|
+
appends = []
|
294
|
+
enlapsed_seconds = 0
|
295
|
+
files_processed = 0
|
296
|
+
BlackStack::Appending.indexes.each { |i|
|
297
|
+
l.logs "Searching into #{i.name}... "
|
298
|
+
ret = i.find([cname, fname, lname], false, nil)
|
299
|
+
# add the name of the index in the last position of the match
|
300
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
301
|
+
# add matches to the list
|
302
|
+
h[:matches] += ret[:matches]
|
303
|
+
# sum the total files and the total enlapsed seconds
|
304
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
305
|
+
h[:files_processed] += ret[:files_processed]
|
306
|
+
l.done
|
307
|
+
}
|
308
|
+
# update report
|
309
|
+
@@report = h
|
310
|
+
# return results
|
311
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
312
|
+
end
|
313
|
+
|
314
|
+
# Find a company in the indexes by its first name, last name and company name.
|
315
|
+
# Append all the information in the index row.
|
316
|
+
def self.find_persons_by_company(cname)
|
317
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
318
|
+
h = {
|
319
|
+
:matches => [],
|
320
|
+
:enlapsed_seconds => 0,
|
321
|
+
:files_processed => 0,
|
322
|
+
}
|
323
|
+
# looking for a record that matches with first name, last name and company name
|
324
|
+
appends = []
|
325
|
+
enlapsed_seconds = 0
|
326
|
+
files_processed = 0
|
327
|
+
BlackStack::Appending.indexes.each { |i|
|
328
|
+
l.logs "Searching into #{i.name}... "
|
329
|
+
ret = i.find([cname], true, nil)
|
330
|
+
# add the name of the index in the last position of the match
|
331
|
+
ret[:matches].each { |m| m.unshift(i.name.to_s) }
|
332
|
+
# add matches to the list
|
333
|
+
h[:matches] += ret[:matches]
|
334
|
+
# sum the total files and the total enlapsed seconds
|
335
|
+
h[:enlapsed_seconds] += ret[:enlapsed_seconds]
|
336
|
+
h[:files_processed] += ret[:files_processed]
|
337
|
+
l.done
|
338
|
+
}
|
339
|
+
# update report
|
340
|
+
@@report = h
|
341
|
+
# return results
|
342
|
+
h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
|
343
|
+
end
|
344
|
+
|
345
|
+
def self.find_verified_emails(fname, lname, cname)
|
346
|
+
l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
|
347
|
+
emails = []
|
348
|
+
domains = []
|
349
|
+
verified_emails = []
|
350
|
+
# get lead emails from in the indexes
|
351
|
+
l.logs ("Searching index emails... ")
|
352
|
+
emails = BlackStack::Appending.find_persons(fname, lname, cname).map { |res|
|
353
|
+
res.emails
|
354
|
+
}.flatten.uniq.reject { |email|
|
355
|
+
email.to_s.empty?
|
356
|
+
}
|
357
|
+
l.done
|
358
|
+
# get company domains from the indexes
|
359
|
+
l.logs ("Searching index domains... ")
|
360
|
+
domains = BlackStack::Appending.find_persons_by_company(cname).map { |res|
|
361
|
+
res.company_domains
|
362
|
+
}.flatten.reject { |email|
|
363
|
+
email.to_s.empty?
|
364
|
+
}.map { |domain|
|
365
|
+
# normalize domain
|
366
|
+
domain.to_s.gsub('www.', '').downcase
|
367
|
+
}.uniq
|
368
|
+
l.done
|
369
|
+
# verify all the emails found in the indexes
|
370
|
+
l.logs ("Verifying index emails... ")
|
371
|
+
emails.each { |email|
|
372
|
+
l.logs "Verifying #{email}... "
|
373
|
+
domain = email.split('@').last
|
374
|
+
verified_emails << email if BlackStack::Appending.verify(email) && !BlackStack::Appending.catch_all?(domain)
|
375
|
+
l.done
|
376
|
+
}
|
377
|
+
l.done
|
378
|
+
# appending with domains found in the indexes
|
379
|
+
l.logs ("Appending with domains... ")
|
380
|
+
domains.each { |domain|
|
381
|
+
l.logs "Appending with #{domain}... "
|
382
|
+
verified_emails += BlackStack::Appending.append(fname, lname, domain)
|
383
|
+
l.done
|
384
|
+
}
|
385
|
+
l.done
|
386
|
+
# return
|
387
|
+
verified_emails.uniq
|
388
|
+
end
|
389
|
+
|
390
|
+
#
|
391
|
+
class Result
|
392
|
+
# array of values.
|
393
|
+
# first 3 values are index name, key and row-number.
|
394
|
+
attr_accessor :match
|
395
|
+
|
396
|
+
def initialize(a)
|
397
|
+
self.match = a
|
398
|
+
end
|
399
|
+
|
400
|
+
# From a given match (with the name of its index in the first position), get the value of a field by its name.
|
401
|
+
def value(field)
|
402
|
+
# get the index_name
|
403
|
+
index_name = match[0]
|
404
|
+
# get the index descriptor
|
405
|
+
index = BlackStack::CSVIndexer.indexes.select { |i| i.name == index_name }.first
|
406
|
+
# get position of the field into the hash descriptior
|
407
|
+
k = index.mapping.to_a.map { |m| m[0].to_s }.index(field.to_s)
|
408
|
+
# return nil if the field is not found
|
409
|
+
return nil if k.nil?
|
410
|
+
# get the field value
|
411
|
+
match[k+3].to_s
|
412
|
+
end
|
413
|
+
|
414
|
+
# Call value() method.
|
415
|
+
def val(field)
|
416
|
+
self.value(field)
|
417
|
+
end
|
418
|
+
|
419
|
+
# From a given match (with the name of its index in the first position), get the email addresses.
|
420
|
+
def emails()
|
421
|
+
keys = BlackStack::Appending.email_fields
|
422
|
+
ret = []
|
423
|
+
keys.each { |k|
|
424
|
+
v = self.value(k)
|
425
|
+
ret << v if v
|
426
|
+
}
|
427
|
+
ret
|
428
|
+
end
|
429
|
+
|
430
|
+
# From a given match (with the name of its index in the first position), get the phone numbers.
|
431
|
+
def phones()
|
432
|
+
keys = BlackStack::Appending.phone_fields
|
433
|
+
ret = []
|
434
|
+
keys.each { |k|
|
435
|
+
v = self.value(k)
|
436
|
+
ret << v if v
|
437
|
+
}
|
438
|
+
ret
|
439
|
+
end
|
440
|
+
|
441
|
+
# From a given match (with the name of its index in the first position), get the company domains.
|
442
|
+
def company_domains()
|
443
|
+
keys = BlackStack::Appending.company_domain_fields
|
444
|
+
ret = []
|
445
|
+
keys.each { |k|
|
446
|
+
v = self.value(k)
|
447
|
+
ret << v if v
|
448
|
+
}
|
449
|
+
ret
|
450
|
+
end
|
451
|
+
|
452
|
+
end # class Result
|
453
|
+
|
134
454
|
end # Appending
|
135
455
|
end # BlackStack
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: appending
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0
|
4
|
+
version: '1.0'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Leandro Daniel Sardi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-12-
|
11
|
+
date: 2022-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csv
|
@@ -30,6 +30,26 @@ dependencies:
|
|
30
30
|
- - ">="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 3.2.2
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: email_verifier
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 0.1.0
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 0.1.0
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 0.1.0
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 0.1.0
|
33
53
|
- !ruby/object:Gem::Dependency
|
34
54
|
name: blackstack-core
|
35
55
|
requirement: !ruby/object:Gem::Requirement
|