appending 0.3 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/appending.rb +367 -47
  3. metadata +22 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 947bf74ff34b74bf14eff43808faf649b534f1d7c4939d2c5cc1215f211d922b
4
- data.tar.gz: 663ba17cc651af3e83abd91b9dda7a256831d19a458ac04b46bddd721e6d6c55
3
+ metadata.gz: 67bdb77eef8558d6e35c4b4459f6cb9fec576cd1b0fb139b521b42c5314d8471
4
+ data.tar.gz: ba9f19e7c87466eabf297bc3e1f685bf13ada1ad18cfd2d346a2b22e7edfb181
5
5
  SHA512:
6
- metadata.gz: d182a0b718d74367247d25e1189c39c07438b56b5fd27dba8b8c47dc96fa8db44f87e2543740856a674631217b82006bcbe4689fea1c8a2248a9b6038304a62d
7
- data.tar.gz: 06acd4a0cbbea440c777b64593d89116f1d9aa370a6ebc34976b6aca868cfbbaba924494f3bb74d7d9b6668aba8b246522b8fb1c57f02a0cae3a3d29051c1b6f
6
+ metadata.gz: 92635a6cb6d3117e32c58b5cc372dba8f5d161655cb7d60af3f3ed3200b2bd5f948e6586238a211f62ba5dae7d26e7f06c6a9182d73c0b5a0f000c710f554933
7
+ data.tar.gz: 2cf1e90ca32603cdb498107016b887b251f5118522641cb6f9ef1c8fec811a0033650acd0a4511164d4672cd80d03fce317005663ed20a9f1c6f701c059a94a3
data/lib/appending.rb CHANGED
@@ -1,63 +1,185 @@
1
1
  require 'csv'
2
+ require 'email_verifier'
3
+ require 'csv-indexer'
4
+ require 'simple_cloud_logging'
2
5
 
3
6
  module BlackStack
4
7
  module Appending
5
- # This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
6
- module Parser
7
- # parse search results pages from sales navigator, and save the company name and full name into a CSV file
8
- def self.parse_sales_navigator_result_pages(search_name, l=nil)
9
- # create logger if not passed
10
- l = BlackStack::DummyLogger.new(nil) if l.nil?
11
- # define output filename
12
- output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
13
- raise 'Output file already exists.' if File.exists?(output_file)
14
- output = File.open(output_file, 'w')
15
- # parse
16
- i = 0
17
- source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
18
- Dir.glob(source).each do |file|
19
- doc = Nokogiri::HTML(open(file))
20
- lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
21
- lis.each { |li|
22
- i += 1
23
- doc2 = Nokogiri::HTML(li.inner_html)
24
- # this is where to find the full name of the lead
25
- n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
26
- # this is where to find the name of the company, when it has a link to a linkedin company page
27
- n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
28
- # this is where to find the name of the company, when it has not a link to a linkedin company page
29
- company_name = nil
30
- if n2
31
- company_name = n2.text
32
- else
33
- n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
34
- if n2
35
- company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
36
- end
8
+ @@logger = nil
9
+ @@report = nil
10
+ @@indexes = []
11
+ @@verifier_url = 'https://connectionsphere.com/api1.0/emails/verify.json'
12
+ @@verifier_api_key = nil
13
+ @@email_fields = []
14
+ @@phone_fields = []
15
+ @@company_domain_fields = []
16
+
17
+ ## @@logger
18
+ def self.set_logger(logger)
19
+ @@logger = logger
20
+ end
21
+
22
+ def self.logger
23
+ @@logger
24
+ end
25
+
26
+ ## @@indexes
27
+ def self.add_index(index)
28
+ expected = [:company_name, :first_name, :last_name]
29
+
30
+ # validation: keys must be `[:company_name, :first_name, :last_name]`
31
+ if !index.keys.eql?(expected)
32
+ raise "Invalid index: #{index.keys}. Expected: #{expected}."
33
+ end
34
+ # add the index
35
+ @@indexes << index
36
+ end
37
+
38
+ def self.set_indexes(indexes)
39
+ @@indexes = indexes
40
+ end
41
+
42
+ def self.indexes
43
+ @@indexes
44
+ end
45
+
46
+ # @@report
47
+ def self.report
48
+ @@report
49
+ end
50
+
51
+ # @@verifier_url
52
+ def self.set_verifier_url(url)
53
+ @@verifier_url = url
54
+ end
55
+
56
+ def self.verifier_url
57
+ @@verifier_url
58
+ end
59
+
60
+ # @@verifier_api_key
61
+ def self.set_verifier_api_key(key)
62
+ @@verifier_api_key = key
63
+ end
64
+
65
+ def self.verifier_api_key
66
+ @@verifier_api_key
67
+ end
68
+
69
+ # @@email_fields
70
+ def self.set_email_fields(fields)
71
+ @@email_fields = fields
72
+ end
73
+
74
+ def self.email_fields
75
+ @@email_fields
76
+ end
77
+
78
+ # @@phone_fields
79
+ def self.set_phone_fields(fields)
80
+ @@phone_fields = fields
81
+ end
82
+
83
+ def self.phone_fields
84
+ @@phone_fields
85
+ end
86
+
87
+ # @@company_domain_fields
88
+ def self.set_company_fields(fields)
89
+ @@company_domain_fields = fields
90
+ end
91
+
92
+ def self.company_domain_fields
93
+ @@company_domain_fields
94
+ end
95
+
96
+ # set configuration
97
+ def self.set(h)
98
+ errors = []
99
+
100
+ # validation: if :indexes is present, it must be an array of objects BlackStack::CSVIndexer::Index
101
+ if h[:indexes]
102
+ if !h[:indexes].is_a?(Array)
103
+ errors << "Invalid :indexes: #{h[:indexes].class}. Expected: Array."
104
+ else
105
+ h[:indexes].each { |index|
106
+ if !index.is_a?(BlackStack::CSVIndexer::Index)
107
+ errors << "Invalid :indexes: #{index.class}. Expected: BlackStack::CSVIndexer::Index."
37
108
  end
38
- # add the information to the output file
39
- line = []
40
- line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
41
- line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
42
- l.logs "#{i.to_s}, #{line.join(',')}... "
43
- output.puts line.join(',')
44
- output.flush
45
- l.done
46
- }
47
- end
48
- # close output file
49
- output.close
50
- end # def self.parse_sales_navigator_result_pages(search_name)
51
- end # module Parser
109
+ }
110
+ end
111
+ end
112
+
113
+ # validation: if :verifier_url is present, it must be a string
114
+ errors << ":verifier_url must be a string." if h[:verifier_url] && !h[:verifier_url].is_a?(String)
115
+
116
+ # validation: if :verifier_api_key is present, it must be a string
117
+ errors << ":verifier_api_key must be a string." if h[:verifier_api_key] && !h[:verifier_api_key].is_a?(String)
118
+
119
+ # validation: if :email_fields is present, it must be an array of strings
120
+ if h[:email_fields]
121
+ if !h[:email_fields].is_a?(Array)
122
+ errors << "Invalid :email_fields: #{h[:email_fields].class}. Expected: Array."
123
+ else
124
+ h[:email_fields].each { |field|
125
+ if !field.is_a?(String)
126
+ errors << "Invalid :email_fields: #{field.class}. Expected: String."
127
+ end
128
+ }
129
+ end
130
+ end
131
+
132
+ # validation: if :phone_fields is present, it must be an array of strings
133
+ if h[:phone_fields]
134
+ if !h[:phone_fields].is_a?(Array)
135
+ errors << "Invalid :phone_fields: #{h[:phone_fields].class}. Expected: Array."
136
+ else
137
+ h[:phone_fields].each { |field|
138
+ if !field.is_a?(String)
139
+ errors << "Invalid :phone_fields: #{field.class}. Expected: String."
140
+ end
141
+ }
142
+ end
143
+ end
144
+
145
+ # validation: if :company_domain_fields is present, it must be an array of strings
146
+ if h[:company_domain_fields]
147
+ if !h[:company_domain_fields].is_a?(Array)
148
+ errors << "Invalid :company_domain_fields: #{h[:company_domain_fields].class}. Expected: Array."
149
+ else
150
+ h[:company_domain_fields].each { |field|
151
+ if !field.is_a?(String)
152
+ errors << "Invalid :company_domain_fields: #{field.class}. Expected: String."
153
+ end
154
+ }
155
+ end
156
+ end
157
+
158
+ # mapping
159
+ @@indexes = h[:indexes] if h[:indexes]
160
+ @@verifier_url = h[:verifier_url] if h[:verifier_url]
161
+ @@verifier_api_key = h[:verifier_api_key] if h[:verifier_api_key]
162
+ @@email_fields = h[:email_fields] if h[:email_fields]
163
+ @@phone_fields = h[:phone_fields] if h[:phone_fields]
164
+ @@company_domain_fields = h[:company_domain_fields] if h[:company_domain_fields]
165
+ end
52
166
 
53
167
  # return true if the domain get any random address as valid
168
+ #
169
+ # This is a support method for the `append` methods.
170
+ # The end-user should not call this method directly.
171
+ #
54
172
  def self.catch_all?(domain)
55
173
  BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
56
174
  end
57
175
 
58
176
  # verify an email address using the AWS IP address of our website, wich is more reliable
177
+ #
178
+ # This is a support method for the `append` methods.
179
+ # The end-user should not call this method directly.
180
+ #
59
181
  def self.verify(email)
60
- url = "https://connectionsphere.com/api1.0/emails/verify.json"
182
+ url = @@verifier_url
61
183
  params = {
62
184
  :email => email,
63
185
  }
@@ -92,18 +214,24 @@ module BlackStack
92
214
  ret
93
215
  end
94
216
 
217
+ # This is a support method for the `append` methods.
218
+ # The end-user should not call this method directly.
95
219
  def self.cleanup_fname(name)
96
220
  return '' if name.nil?
97
221
  a = name.split(/[^a-zA-Z]/)
98
222
  a.size > 0 ? a[0] : ''
99
223
  end
100
224
 
225
+ # This is a support method for the `append` methods.
226
+ # The end-user should not call this method directly.
101
227
  def self.cleanup_lname(name)
102
228
  return '' if name.nil?
103
229
  a = name.split(/[^a-zA-Z]/)
104
230
  a.size > 1 ? a[1] : ''
105
231
  end
106
232
 
233
+ # This is a support method for the `append` methods.
234
+ # The end-user should not call this method directly.
107
235
  def self.cleanup_company(company)
108
236
  return '' if company.nil?
109
237
  ret = ''
@@ -131,5 +259,197 @@ module BlackStack
131
259
  # return
132
260
  ret
133
261
  end
262
+
263
+ # Find a person in the indexes by its full name and company name.
264
+ # Append all the information in the index row.
265
+ def self.find_persons_with_full_name(name, cname)
266
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
267
+
268
+ l.logs "Guessing fname from #{name}... "
269
+ fname = BlackStack::Appending::cleanup_fname(name)
270
+ l.logf fname
271
+
272
+ l.logs "Guessing lname from #{name}... "
273
+ lname = BlackStack::Appending::cleanup_lname(name)
274
+ l.logf lname
275
+
276
+ BlackStack::Appending.find_persons(fname, lname, cname)
277
+ end
278
+
279
+ # Find a person in the indexes by its first name, last name and company name.
280
+ # Append all the information in the index row.
281
+ def self.find_persons(fname, lname, cname)
282
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
283
+ h = {
284
+ :matches => [],
285
+ :enlapsed_seconds => 0,
286
+ :files_processed => 0,
287
+ }
288
+ # cleaning up company name
289
+ l.logs "Cleaning up company name #{cname}... "
290
+ cname = BlackStack::Appending::cleanup_company(cname)
291
+ l.logf cname
292
+ # looking for a record that matches with first name, last name and company name
293
+ appends = []
294
+ enlapsed_seconds = 0
295
+ files_processed = 0
296
+ BlackStack::Appending.indexes.each { |i|
297
+ l.logs "Searching into #{i.name}... "
298
+ ret = i.find([cname, fname, lname], false, nil)
299
+ # add the name of the index in the last position of the match
300
+ ret[:matches].each { |m| m.unshift(i.name.to_s) }
301
+ # add matches to the list
302
+ h[:matches] += ret[:matches]
303
+ # sum the total files and the total enlapsed seconds
304
+ h[:enlapsed_seconds] += ret[:enlapsed_seconds]
305
+ h[:files_processed] += ret[:files_processed]
306
+ l.done
307
+ }
308
+ # update report
309
+ @@report = h
310
+ # return results
311
+ h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
312
+ end
313
+
314
+ # Find a company in the indexes by its first name, last name and company name.
315
+ # Append all the information in the index row.
316
+ def self.find_persons_by_company(cname)
317
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
318
+ h = {
319
+ :matches => [],
320
+ :enlapsed_seconds => 0,
321
+ :files_processed => 0,
322
+ }
323
+ # looking for a record that matches with first name, last name and company name
324
+ appends = []
325
+ enlapsed_seconds = 0
326
+ files_processed = 0
327
+ BlackStack::Appending.indexes.each { |i|
328
+ l.logs "Searching into #{i.name}... "
329
+ ret = i.find([cname], true, nil)
330
+ # add the name of the index in the last position of the match
331
+ ret[:matches].each { |m| m.unshift(i.name.to_s) }
332
+ # add matches to the list
333
+ h[:matches] += ret[:matches]
334
+ # sum the total files and the total enlapsed seconds
335
+ h[:enlapsed_seconds] += ret[:enlapsed_seconds]
336
+ h[:files_processed] += ret[:files_processed]
337
+ l.done
338
+ }
339
+ # update report
340
+ @@report = h
341
+ # return results
342
+ h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
343
+ end
344
+
345
+ def self.find_verified_emails(fname, lname, cname)
346
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
347
+ emails = []
348
+ domains = []
349
+ verified_emails = []
350
+ # get lead emails from in the indexes
351
+ l.logs ("Searching index emails... ")
352
+ emails = BlackStack::Appending.find_persons(fname, lname, cname).map { |res|
353
+ res.emails
354
+ }.flatten.uniq.reject { |email|
355
+ email.to_s.empty?
356
+ }
357
+ l.done
358
+ # get company domains from the indexes
359
+ l.logs ("Searching index domains... ")
360
+ domains = BlackStack::Appending.find_persons_by_company(cname).map { |res|
361
+ res.company_domains
362
+ }.flatten.reject { |email|
363
+ email.to_s.empty?
364
+ }.map { |domain|
365
+ # normalize domain
366
+ domain.to_s.gsub('www.', '').downcase
367
+ }.uniq
368
+ l.done
369
+ # verify all the emails found in the indexes
370
+ l.logs ("Verifying index emails... ")
371
+ emails.each { |email|
372
+ l.logs "Verifying #{email}... "
373
+ domain = email.split('@').last
374
+ verified_emails << email if BlackStack::Appending.verify(email) && !BlackStack::Appending.catch_all?(domain)
375
+ l.done
376
+ }
377
+ l.done
378
+ # appending with domains found in the indexes
379
+ l.logs ("Appending with domains... ")
380
+ domains.each { |domain|
381
+ l.logs "Appending with #{domain}... "
382
+ verified_emails += BlackStack::Appending.append(fname, lname, domain)
383
+ l.done
384
+ }
385
+ l.done
386
+ # return
387
+ verified_emails.uniq
388
+ end
389
+
390
+ #
391
+ class Result
392
+ # array of values.
393
+ # first 3 values are index name, key and row-number.
394
+ attr_accessor :match
395
+
396
+ def initialize(a)
397
+ self.match = a
398
+ end
399
+
400
+ # From a given match (with the name of its index in the first position), get the value of a field by its name.
401
+ def value(field)
402
+ # get the index_name
403
+ index_name = match[0]
404
+ # get the index descriptor
405
+ index = BlackStack::CSVIndexer.indexes.select { |i| i.name == index_name }.first
406
+ # get position of the field into the hash descriptior
407
+ k = index.mapping.to_a.map { |m| m[0].to_s }.index(field.to_s)
408
+ # return nil if the field is not found
409
+ return nil if k.nil?
410
+ # get the field value
411
+ match[k+3].to_s
412
+ end
413
+
414
+ # Call value() method.
415
+ def val(field)
416
+ self.value(field)
417
+ end
418
+
419
+ # From a given match (with the name of its index in the first position), get the email addresses.
420
+ def emails()
421
+ keys = BlackStack::Appending.email_fields
422
+ ret = []
423
+ keys.each { |k|
424
+ v = self.value(k)
425
+ ret << v if v
426
+ }
427
+ ret
428
+ end
429
+
430
+ # From a given match (with the name of its index in the first position), get the phone numbers.
431
+ def phones()
432
+ keys = BlackStack::Appending.phone_fields
433
+ ret = []
434
+ keys.each { |k|
435
+ v = self.value(k)
436
+ ret << v if v
437
+ }
438
+ ret
439
+ end
440
+
441
+ # From a given match (with the name of its index in the first position), get the company domains.
442
+ def company_domains()
443
+ keys = BlackStack::Appending.company_domain_fields
444
+ ret = []
445
+ keys.each { |k|
446
+ v = self.value(k)
447
+ ret << v if v
448
+ }
449
+ ret
450
+ end
451
+
452
+ end # class Result
453
+
134
454
  end # Appending
135
455
  end # BlackStack
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: appending
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.3'
4
+ version: '1.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leandro Daniel Sardi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-19 00:00:00.000000000 Z
11
+ date: 2022-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: csv
@@ -30,6 +30,26 @@ dependencies:
30
30
  - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 3.2.2
33
+ - !ruby/object:Gem::Dependency
34
+ name: email_verifier
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.1.0
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 0.1.0
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 0.1.0
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 0.1.0
33
53
  - !ruby/object:Gem::Dependency
34
54
  name: blackstack-core
35
55
  requirement: !ruby/object:Gem::Requirement