appending 0.3 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/appending.rb +367 -47
  3. metadata +22 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 947bf74ff34b74bf14eff43808faf649b534f1d7c4939d2c5cc1215f211d922b
4
- data.tar.gz: 663ba17cc651af3e83abd91b9dda7a256831d19a458ac04b46bddd721e6d6c55
3
+ metadata.gz: 67bdb77eef8558d6e35c4b4459f6cb9fec576cd1b0fb139b521b42c5314d8471
4
+ data.tar.gz: ba9f19e7c87466eabf297bc3e1f685bf13ada1ad18cfd2d346a2b22e7edfb181
5
5
  SHA512:
6
- metadata.gz: d182a0b718d74367247d25e1189c39c07438b56b5fd27dba8b8c47dc96fa8db44f87e2543740856a674631217b82006bcbe4689fea1c8a2248a9b6038304a62d
7
- data.tar.gz: 06acd4a0cbbea440c777b64593d89116f1d9aa370a6ebc34976b6aca868cfbbaba924494f3bb74d7d9b6668aba8b246522b8fb1c57f02a0cae3a3d29051c1b6f
6
+ metadata.gz: 92635a6cb6d3117e32c58b5cc372dba8f5d161655cb7d60af3f3ed3200b2bd5f948e6586238a211f62ba5dae7d26e7f06c6a9182d73c0b5a0f000c710f554933
7
+ data.tar.gz: 2cf1e90ca32603cdb498107016b887b251f5118522641cb6f9ef1c8fec811a0033650acd0a4511164d4672cd80d03fce317005663ed20a9f1c6f701c059a94a3
data/lib/appending.rb CHANGED
@@ -1,63 +1,185 @@
1
1
  require 'csv'
2
+ require 'email_verifier'
3
+ require 'csv-indexer'
4
+ require 'simple_cloud_logging'
2
5
 
3
6
  module BlackStack
4
7
  module Appending
5
- # This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
6
- module Parser
7
- # parse search results pages from sales navigator, and save the company name and full name into a CSV file
8
- def self.parse_sales_navigator_result_pages(search_name, l=nil)
9
- # create logger if not passed
10
- l = BlackStack::DummyLogger.new(nil) if l.nil?
11
- # define output filename
12
- output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
13
- raise 'Output file already exists.' if File.exists?(output_file)
14
- output = File.open(output_file, 'w')
15
- # parse
16
- i = 0
17
- source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
18
- Dir.glob(source).each do |file|
19
- doc = Nokogiri::HTML(open(file))
20
- lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
21
- lis.each { |li|
22
- i += 1
23
- doc2 = Nokogiri::HTML(li.inner_html)
24
- # this is where to find the full name of the lead
25
- n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
26
- # this is where to find the name of the company, when it has a link to a linkedin company page
27
- n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
28
- # this is where to find the name of the company, when it has not a link to a linkedin company page
29
- company_name = nil
30
- if n2
31
- company_name = n2.text
32
- else
33
- n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
34
- if n2
35
- company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
36
- end
8
+ @@logger = nil
9
+ @@report = nil
10
+ @@indexes = []
11
+ @@verifier_url = 'https://connectionsphere.com/api1.0/emails/verify.json'
12
+ @@verifier_api_key = nil
13
+ @@email_fields = []
14
+ @@phone_fields = []
15
+ @@company_domain_fields = []
16
+
17
+ ## @@logger
18
+ def self.set_logger(logger)
19
+ @@logger = logger
20
+ end
21
+
22
+ def self.logger
23
+ @@logger
24
+ end
25
+
26
+ ## @@indexes
27
+ def self.add_index(index)
28
+ expected = [:company_name, :first_name, :last_name]
29
+
30
+ # validation: keys must be `[:company_name, :first_name, :last_name]`
31
+ if !index.keys.eql?(expected)
32
+ raise "Invalid index: #{index.keys}. Expected: #{expected}."
33
+ end
34
+ # add the index
35
+ @@indexes << index
36
+ end
37
+
38
+ def self.set_indexes(indexes)
39
+ @@indexes = indexes
40
+ end
41
+
42
+ def self.indexes
43
+ @@indexes
44
+ end
45
+
46
+ # @@report
47
+ def self.report
48
+ @@report
49
+ end
50
+
51
+ # @@verifier_url
52
+ def self.set_verifier_url(url)
53
+ @@verifier_url = url
54
+ end
55
+
56
+ def self.verifier_url
57
+ @@verifier_url
58
+ end
59
+
60
+ # @@verifier_api_key
61
+ def self.set_verifier_api_key(key)
62
+ @@verifier_api_key = key
63
+ end
64
+
65
+ def self.verifier_api_key
66
+ @@verifier_api_key
67
+ end
68
+
69
+ # @@email_fields
70
+ def self.set_email_fields(fields)
71
+ @@email_fields = fields
72
+ end
73
+
74
+ def self.email_fields
75
+ @@email_fields
76
+ end
77
+
78
+ # @@phone_fields
79
+ def self.set_phone_fields(fields)
80
+ @@phone_fields = fields
81
+ end
82
+
83
+ def self.phone_fields
84
+ @@phone_fields
85
+ end
86
+
87
+ # @@company_domain_fields
88
+ def self.set_company_fields(fields)
89
+ @@company_domain_fields = fields
90
+ end
91
+
92
+ def self.company_domain_fields
93
+ @@company_domain_fields
94
+ end
95
+
96
+ # set configuration
97
+ def self.set(h)
98
+ errors = []
99
+
100
+ # validation: if :indexes is present, it must be an array of objects BlackStack::CSVIndexer::Index
101
+ if h[:indexes]
102
+ if !h[:indexes].is_a?(Array)
103
+ errors << "Invalid :indexes: #{h[:indexes].class}. Expected: Array."
104
+ else
105
+ h[:indexes].each { |index|
106
+ if !index.is_a?(BlackStack::CSVIndexer::Index)
107
+ errors << "Invalid :indexes: #{index.class}. Expected: BlackStack::CSVIndexer::Index."
37
108
  end
38
- # add the information to the output file
39
- line = []
40
- line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
41
- line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
42
- l.logs "#{i.to_s}, #{line.join(',')}... "
43
- output.puts line.join(',')
44
- output.flush
45
- l.done
46
- }
47
- end
48
- # close output file
49
- output.close
50
- end # def self.parse_sales_navigator_result_pages(search_name)
51
- end # module Parser
109
+ }
110
+ end
111
+ end
112
+
113
+ # validation: if :verifier_url is present, it must be a string
114
+ errors << ":verifier_url must be a string." if h[:verifier_url] && !h[:verifier_url].is_a?(String)
115
+
116
+ # validation: if :verifier_api_key is present, it must be a string
117
+ errors << ":verifier_api_key must be a string." if h[:verifier_api_key] && !h[:verifier_api_key].is_a?(String)
118
+
119
+ # validation: if :email_fields is present, it must be an array of strings
120
+ if h[:email_fields]
121
+ if !h[:email_fields].is_a?(Array)
122
+ errors << "Invalid :email_fields: #{h[:email_fields].class}. Expected: Array."
123
+ else
124
+ h[:email_fields].each { |field|
125
+ if !field.is_a?(String)
126
+ errors << "Invalid :email_fields: #{field.class}. Expected: String."
127
+ end
128
+ }
129
+ end
130
+ end
131
+
132
+ # validation: if :phone_fields is present, it must be an array of strings
133
+ if h[:phone_fields]
134
+ if !h[:phone_fields].is_a?(Array)
135
+ errors << "Invalid :phone_fields: #{h[:phone_fields].class}. Expected: Array."
136
+ else
137
+ h[:phone_fields].each { |field|
138
+ if !field.is_a?(String)
139
+ errors << "Invalid :phone_fields: #{field.class}. Expected: String."
140
+ end
141
+ }
142
+ end
143
+ end
144
+
145
+ # validation: if :company_domain_fields is present, it must be an array of strings
146
+ if h[:company_domain_fields]
147
+ if !h[:company_domain_fields].is_a?(Array)
148
+ errors << "Invalid :company_domain_fields: #{h[:company_domain_fields].class}. Expected: Array."
149
+ else
150
+ h[:company_domain_fields].each { |field|
151
+ if !field.is_a?(String)
152
+ errors << "Invalid :company_domain_fields: #{field.class}. Expected: String."
153
+ end
154
+ }
155
+ end
156
+ end
157
+
158
+ # mapping
159
+ @@indexes = h[:indexes] if h[:indexes]
160
+ @@verifier_url = h[:verifier_url] if h[:verifier_url]
161
+ @@verifier_api_key = h[:verifier_api_key] if h[:verifier_api_key]
162
+ @@email_fields = h[:email_fields] if h[:email_fields]
163
+ @@phone_fields = h[:phone_fields] if h[:phone_fields]
164
+ @@company_domain_fields = h[:company_domain_fields] if h[:company_domain_fields]
165
+ end
52
166
 
53
167
  # return true if the domain get any random address as valid
168
+ #
169
+ # This is a support method for the `append` methods.
170
+ # The end-user should not call this method directly.
171
+ #
54
172
  def self.catch_all?(domain)
55
173
  BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
56
174
  end
57
175
 
58
176
  # verify an email address using the AWS IP address of our website, wich is more reliable
177
+ #
178
+ # This is a support method for the `append` methods.
179
+ # The end-user should not call this method directly.
180
+ #
59
181
  def self.verify(email)
60
- url = "https://connectionsphere.com/api1.0/emails/verify.json"
182
+ url = @@verifier_url
61
183
  params = {
62
184
  :email => email,
63
185
  }
@@ -92,18 +214,24 @@ module BlackStack
92
214
  ret
93
215
  end
94
216
 
217
+ # This is a support method for the `append` methods.
218
+ # The end-user should not call this method directly.
95
219
  def self.cleanup_fname(name)
96
220
  return '' if name.nil?
97
221
  a = name.split(/[^a-zA-Z]/)
98
222
  a.size > 0 ? a[0] : ''
99
223
  end
100
224
 
225
+ # This is a support method for the `append` methods.
226
+ # The end-user should not call this method directly.
101
227
  def self.cleanup_lname(name)
102
228
  return '' if name.nil?
103
229
  a = name.split(/[^a-zA-Z]/)
104
230
  a.size > 1 ? a[1] : ''
105
231
  end
106
232
 
233
+ # This is a support method for the `append` methods.
234
+ # The end-user should not call this method directly.
107
235
  def self.cleanup_company(company)
108
236
  return '' if company.nil?
109
237
  ret = ''
@@ -131,5 +259,197 @@ module BlackStack
131
259
  # return
132
260
  ret
133
261
  end
262
+
263
+ # Find a person in the indexes by its full name and company name.
264
+ # Append all the information in the index row.
265
+ def self.find_persons_with_full_name(name, cname)
266
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
267
+
268
+ l.logs "Guessing fname from #{name}... "
269
+ fname = BlackStack::Appending::cleanup_fname(name)
270
+ l.logf fname
271
+
272
+ l.logs "Guessing lname from #{name}... "
273
+ lname = BlackStack::Appending::cleanup_lname(name)
274
+ l.logf lname
275
+
276
+ BlackStack::Appending.find_persons(fname, lname, cname)
277
+ end
278
+
279
+ # Find a person in the indexes by its first name, last name and company name.
280
+ # Append all the information in the index row.
281
+ def self.find_persons(fname, lname, cname)
282
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
283
+ h = {
284
+ :matches => [],
285
+ :enlapsed_seconds => 0,
286
+ :files_processed => 0,
287
+ }
288
+ # cleaning up company name
289
+ l.logs "Cleaning up company name #{cname}... "
290
+ cname = BlackStack::Appending::cleanup_company(cname)
291
+ l.logf cname
292
+ # looking for a record that matches with first name, last name and company name
293
+ appends = []
294
+ enlapsed_seconds = 0
295
+ files_processed = 0
296
+ BlackStack::Appending.indexes.each { |i|
297
+ l.logs "Searching into #{i.name}... "
298
+ ret = i.find([cname, fname, lname], false, nil)
299
+ # add the name of the index in the last position of the match
300
+ ret[:matches].each { |m| m.unshift(i.name.to_s) }
301
+ # add matches to the list
302
+ h[:matches] += ret[:matches]
303
+ # sum the total files and the total enlapsed seconds
304
+ h[:enlapsed_seconds] += ret[:enlapsed_seconds]
305
+ h[:files_processed] += ret[:files_processed]
306
+ l.done
307
+ }
308
+ # update report
309
+ @@report = h
310
+ # return results
311
+ h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
312
+ end
313
+
314
+ # Find a company in the indexes by its first name, last name and company name.
315
+ # Append all the information in the index row.
316
+ def self.find_persons_by_company(cname)
317
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
318
+ h = {
319
+ :matches => [],
320
+ :enlapsed_seconds => 0,
321
+ :files_processed => 0,
322
+ }
323
+ # looking for a record that matches with first name, last name and company name
324
+ appends = []
325
+ enlapsed_seconds = 0
326
+ files_processed = 0
327
+ BlackStack::Appending.indexes.each { |i|
328
+ l.logs "Searching into #{i.name}... "
329
+ ret = i.find([cname], true, nil)
330
+ # add the name of the index in the last position of the match
331
+ ret[:matches].each { |m| m.unshift(i.name.to_s) }
332
+ # add matches to the list
333
+ h[:matches] += ret[:matches]
334
+ # sum the total files and the total enlapsed seconds
335
+ h[:enlapsed_seconds] += ret[:enlapsed_seconds]
336
+ h[:files_processed] += ret[:files_processed]
337
+ l.done
338
+ }
339
+ # update report
340
+ @@report = h
341
+ # return results
342
+ h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
343
+ end
344
+
345
+ def self.find_verified_emails(fname, lname, cname)
346
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
347
+ emails = []
348
+ domains = []
349
+ verified_emails = []
350
+ # get lead emails from in the indexes
351
+ l.logs ("Searching index emails... ")
352
+ emails = BlackStack::Appending.find_persons(fname, lname, cname).map { |res|
353
+ res.emails
354
+ }.flatten.uniq.reject { |email|
355
+ email.to_s.empty?
356
+ }
357
+ l.done
358
+ # get company domains from the indexes
359
+ l.logs ("Searching index domains... ")
360
+ domains = BlackStack::Appending.find_persons_by_company(cname).map { |res|
361
+ res.company_domains
362
+ }.flatten.reject { |email|
363
+ email.to_s.empty?
364
+ }.map { |domain|
365
+ # normalize domain
366
+ domain.to_s.gsub('www.', '').downcase
367
+ }.uniq
368
+ l.done
369
+ # verify all the emails found in the indexes
370
+ l.logs ("Verifying index emails... ")
371
+ emails.each { |email|
372
+ l.logs "Verifying #{email}... "
373
+ domain = email.split('@').last
374
+ verified_emails << email if BlackStack::Appending.verify(email) && !BlackStack::Appending.catch_all?(domain)
375
+ l.done
376
+ }
377
+ l.done
378
+ # appending with domains found in the indexes
379
+ l.logs ("Appending with domains... ")
380
+ domains.each { |domain|
381
+ l.logs "Appending with #{domain}... "
382
+ verified_emails += BlackStack::Appending.append(fname, lname, domain)
383
+ l.done
384
+ }
385
+ l.done
386
+ # return
387
+ verified_emails.uniq
388
+ end
389
+
390
+ #
391
+ class Result
392
+ # array of values.
393
+ # first 3 values are index name, key and row-number.
394
+ attr_accessor :match
395
+
396
+ def initialize(a)
397
+ self.match = a
398
+ end
399
+
400
+ # From a given match (with the name of its index in the first position), get the value of a field by its name.
401
+ def value(field)
402
+ # get the index_name
403
+ index_name = match[0]
404
+ # get the index descriptor
405
+ index = BlackStack::CSVIndexer.indexes.select { |i| i.name == index_name }.first
406
+ # get position of the field into the hash descriptior
407
+ k = index.mapping.to_a.map { |m| m[0].to_s }.index(field.to_s)
408
+ # return nil if the field is not found
409
+ return nil if k.nil?
410
+ # get the field value
411
+ match[k+3].to_s
412
+ end
413
+
414
+ # Call value() method.
415
+ def val(field)
416
+ self.value(field)
417
+ end
418
+
419
+ # From a given match (with the name of its index in the first position), get the email addresses.
420
+ def emails()
421
+ keys = BlackStack::Appending.email_fields
422
+ ret = []
423
+ keys.each { |k|
424
+ v = self.value(k)
425
+ ret << v if v
426
+ }
427
+ ret
428
+ end
429
+
430
+ # From a given match (with the name of its index in the first position), get the phone numbers.
431
+ def phones()
432
+ keys = BlackStack::Appending.phone_fields
433
+ ret = []
434
+ keys.each { |k|
435
+ v = self.value(k)
436
+ ret << v if v
437
+ }
438
+ ret
439
+ end
440
+
441
+ # From a given match (with the name of its index in the first position), get the company domains.
442
+ def company_domains()
443
+ keys = BlackStack::Appending.company_domain_fields
444
+ ret = []
445
+ keys.each { |k|
446
+ v = self.value(k)
447
+ ret << v if v
448
+ }
449
+ ret
450
+ end
451
+
452
+ end # class Result
453
+
134
454
  end # Appending
135
455
  end # BlackStack
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: appending
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: '1.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leandro Daniel Sardi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-19 00:00:00.000000000 Z
11
+ date: 2022-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: csv
@@ -30,6 +30,26 @@ dependencies:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 3.2.2
33
+ - !ruby/object:Gem::Dependency
34
+ name: email_verifier
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.1.0
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 0.1.0
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 0.1.0
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 0.1.0
33
53
  - !ruby/object:Gem::Dependency
34
54
  name: blackstack-core
35
55
  requirement: !ruby/object:Gem::Requirement