appending 0.2 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/appending.rb +455 -0
  3. metadata +44 -4
  4. data/appending.rb +0 -141
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 511aa4fb2b48eab5db9cb290c7e58449d4d04e5ec88057d025ed48551a2c2d4c
4
- data.tar.gz: f95e050a801ab8727f96500743ebd03c67cb745d93d6d93fda71939971655589
3
+ metadata.gz: 67bdb77eef8558d6e35c4b4459f6cb9fec576cd1b0fb139b521b42c5314d8471
4
+ data.tar.gz: ba9f19e7c87466eabf297bc3e1f685bf13ada1ad18cfd2d346a2b22e7edfb181
5
5
  SHA512:
6
- metadata.gz: 62418ca562f10dd0dd2e69546c33639625f166d8f627e31e9b4104de76d32d12a16743824aa54ee01807c6bfb0d9b8e27f94162ee6cc2e0e7733c04c5871eff4
7
- data.tar.gz: 507d4bcd5d681e0021db947dcd7e7011a2e3c36e25d0dc0d1266a1a76f2bc4a9de09aa91ac7daf15b7d7abb8539a6ae36c44271a6e14541a3199ab96ac0c7cbe
6
+ metadata.gz: 92635a6cb6d3117e32c58b5cc372dba8f5d161655cb7d60af3f3ed3200b2bd5f948e6586238a211f62ba5dae7d26e7f06c6a9182d73c0b5a0f000c710f554933
7
+ data.tar.gz: 2cf1e90ca32603cdb498107016b887b251f5118522641cb6f9ef1c8fec811a0033650acd0a4511164d4672cd80d03fce317005663ed20a9f1c6f701c059a94a3
data/lib/appending.rb ADDED
@@ -0,0 +1,455 @@
1
+ require 'csv'
2
+ require 'email_verifier'
3
+ require 'csv-indexer'
4
+ require 'simple_cloud_logging'
5
+
6
+ module BlackStack
7
+ module Appending
8
+ @@logger = nil
9
+ @@report = nil
10
+ @@indexes = []
11
+ @@verifier_url = 'https://connectionsphere.com/api1.0/emails/verify.json'
12
+ @@verifier_api_key = nil
13
+ @@email_fields = []
14
+ @@phone_fields = []
15
+ @@company_domain_fields = []
16
+
17
+ ## @@logger
18
+ def self.set_logger(logger)
19
+ @@logger = logger
20
+ end
21
+
22
+ def self.logger
23
+ @@logger
24
+ end
25
+
26
+ ## @@indexes
27
+ def self.add_index(index)
28
+ expected = [:company_name, :first_name, :last_name]
29
+
30
+ # validation: keys must be `[:company_name, :first_name, :last_name]`
31
+ if !index.keys.eql?(expected)
32
+ raise "Invalid index: #{index.keys}. Expected: #{expected}."
33
+ end
34
+ # add the index
35
+ @@indexes << index
36
+ end
37
+
38
+ def self.set_indexes(indexes)
39
+ @@indexes = indexes
40
+ end
41
+
42
+ def self.indexes
43
+ @@indexes
44
+ end
45
+
46
+ # @@report
47
+ def self.report
48
+ @@report
49
+ end
50
+
51
+ # @@verifier_url
52
+ def self.set_verifier_url(url)
53
+ @@verifier_url = url
54
+ end
55
+
56
+ def self.verifier_url
57
+ @@verifier_url
58
+ end
59
+
60
+ # @@verifier_api_key
61
+ def self.set_verifier_api_key(key)
62
+ @@verifier_api_key = key
63
+ end
64
+
65
+ def self.verifier_api_key
66
+ @@verifier_api_key
67
+ end
68
+
69
+ # @@email_fields
70
+ def self.set_email_fields(fields)
71
+ @@email_fields = fields
72
+ end
73
+
74
+ def self.email_fields
75
+ @@email_fields
76
+ end
77
+
78
+ # @@phone_fields
79
+ def self.set_phone_fields(fields)
80
+ @@phone_fields = fields
81
+ end
82
+
83
+ def self.phone_fields
84
+ @@phone_fields
85
+ end
86
+
87
+ # @@company_domain_fields
88
+ def self.set_company_fields(fields)
89
+ @@company_domain_fields = fields
90
+ end
91
+
92
+ def self.company_domain_fields
93
+ @@company_domain_fields
94
+ end
95
+
96
+ # set configuration
97
+ def self.set(h)
98
+ errors = []
99
+
100
+ # validation: if :indexes is present, it must be an array of objects BlackStack::CSVIndexer::Index
101
+ if h[:indexes]
102
+ if !h[:indexes].is_a?(Array)
103
+ errors << "Invalid :indexes: #{h[:indexes].class}. Expected: Array."
104
+ else
105
+ h[:indexes].each { |index|
106
+ if !index.is_a?(BlackStack::CSVIndexer::Index)
107
+ errors << "Invalid :indexes: #{index.class}. Expected: BlackStack::CSVIndexer::Index."
108
+ end
109
+ }
110
+ end
111
+ end
112
+
113
+ # validation: if :verifier_url is present, it must be a string
114
+ errors << ":verifier_url must be a string." if h[:verifier_url] && !h[:verifier_url].is_a?(String)
115
+
116
+ # validation: if :verifier_api_key is present, it must be a string
117
+ errors << ":verifier_api_key must be a string." if h[:verifier_api_key] && !h[:verifier_api_key].is_a?(String)
118
+
119
+ # validation: if :email_fields is present, it must be an array of strings
120
+ if h[:email_fields]
121
+ if !h[:email_fields].is_a?(Array)
122
+ errors << "Invalid :email_fields: #{h[:email_fields].class}. Expected: Array."
123
+ else
124
+ h[:email_fields].each { |field|
125
+ if !field.is_a?(String)
126
+ errors << "Invalid :email_fields: #{field.class}. Expected: String."
127
+ end
128
+ }
129
+ end
130
+ end
131
+
132
+ # validation: if :phone_fields is present, it must be an array of strings
133
+ if h[:phone_fields]
134
+ if !h[:phone_fields].is_a?(Array)
135
+ errors << "Invalid :phone_fields: #{h[:phone_fields].class}. Expected: Array."
136
+ else
137
+ h[:phone_fields].each { |field|
138
+ if !field.is_a?(String)
139
+ errors << "Invalid :phone_fields: #{field.class}. Expected: String."
140
+ end
141
+ }
142
+ end
143
+ end
144
+
145
+ # validation: if :company_domain_fields is present, it must be an array of strings
146
+ if h[:company_domain_fields]
147
+ if !h[:company_domain_fields].is_a?(Array)
148
+ errors << "Invalid :company_domain_fields: #{h[:company_domain_fields].class}. Expected: Array."
149
+ else
150
+ h[:company_domain_fields].each { |field|
151
+ if !field.is_a?(String)
152
+ errors << "Invalid :company_domain_fields: #{field.class}. Expected: String."
153
+ end
154
+ }
155
+ end
156
+ end
157
+
158
+ # mapping
159
+ @@indexes = h[:indexes] if h[:indexes]
160
+ @@verifier_url = h[:verifier_url] if h[:verifier_url]
161
+ @@verifier_api_key = h[:verifier_api_key] if h[:verifier_api_key]
162
+ @@email_fields = h[:email_fields] if h[:email_fields]
163
+ @@phone_fields = h[:phone_fields] if h[:phone_fields]
164
+ @@company_domain_fields = h[:company_domain_fields] if h[:company_domain_fields]
165
+ end
166
+
167
+ # return true if the domain get any random address as valid
168
+ #
169
+ # This is a support method for the `append` methods.
170
+ # The end-user should not call this method directly.
171
+ #
172
+ def self.catch_all?(domain)
173
+ BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
174
+ end
175
+
176
+ # verify an email address using the AWS IP address of our website, wich is more reliable
177
+ #
178
+ # This is a support method for the `append` methods.
179
+ # The end-user should not call this method directly.
180
+ #
181
+ def self.verify(email)
182
+ url = @@verifier_url
183
+ params = {
184
+ :email => email,
185
+ }
186
+ res = BlackStack::Netting::call_get(url, params)
187
+ parsed = JSON.parse(res.body)
188
+ parsed['status'] == 'success'
189
+ end
190
+
191
+ # verify an email address
192
+ def self.append(fname, lname, domain)
193
+ ret = []
194
+ if !catch_all?(domain)
195
+ EmailVerifier.config do |config|
196
+ config.verifier_email = "leandro.sardi@expandedventure.com"
197
+ end
198
+ [
199
+ "#{fname}@#{domain}",
200
+ "#{lname}@#{domain}",
201
+
202
+ "#{fname}.#{lname}@#{domain}",
203
+ "#{lname}.#{fname}@#{domain}",
204
+
205
+ "#{fname}#{lname}@#{domain}",
206
+ "#{lname}#{fname}@#{domain}",
207
+
208
+ "#{fname[0]}#{lname}@#{domain}",
209
+ "#{fname[0]}.#{lname}@#{domain}",
210
+ ].each { |email|
211
+ ret << email.downcase if verify(email)
212
+ }
213
+ end
214
+ ret
215
+ end
216
+
217
+ # This is a support method for the `append` methods.
218
+ # The end-user should not call this method directly.
219
+ def self.cleanup_fname(name)
220
+ return '' if name.nil?
221
+ a = name.split(/[^a-zA-Z]/)
222
+ a.size > 0 ? a[0] : ''
223
+ end
224
+
225
+ # This is a support method for the `append` methods.
226
+ # The end-user should not call this method directly.
227
+ def self.cleanup_lname(name)
228
+ return '' if name.nil?
229
+ a = name.split(/[^a-zA-Z]/)
230
+ a.size > 1 ? a[1] : ''
231
+ end
232
+
233
+ # This is a support method for the `append` methods.
234
+ # The end-user should not call this method directly.
235
+ def self.cleanup_company(company)
236
+ return '' if company.nil?
237
+ ret = ''
238
+ # stage 1: remove company-type suffixes
239
+ company = company.split(/ at /).last
240
+ company.gsub!(/LLC/, '')
241
+ company.gsub!(/Inc/, '')
242
+ company.strip! # remove leading and trailing spaces
243
+ # stage 2: remove LinkedIn suffixes
244
+ company.gsub!(/\(\d\d\d\d - Present\)/, '')
245
+ company.strip! # remove leading and trailing spaces
246
+ # stege 3: remove non-alphanumeric characters
247
+ company.gsub!(/\.$/, '')
248
+ company.gsub!(/\,$/, '')
249
+ # stege 4: remove extra spaces
250
+ company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
251
+ company.strip! # remove leading and trailing spaces
252
+ # stage 5: choose the first part of the company name
253
+ company.split(' ').each { |word|
254
+ ret += word + ' '
255
+ #break if word.size >= 5 || ret.split(' ').size > 2
256
+ break if ret.split(' ').size > 2
257
+ }
258
+ ret.strip!
259
+ # return
260
+ ret
261
+ end
262
+
263
+ # Find a person in the indexes by its full name and company name.
264
+ # Append all the information in the index row.
265
+ def self.find_persons_with_full_name(name, cname)
266
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
267
+
268
+ l.logs "Guessing fname from #{name}... "
269
+ fname = BlackStack::Appending::cleanup_fname(name)
270
+ l.logf fname
271
+
272
+ l.logs "Guessing lname from #{name}... "
273
+ lname = BlackStack::Appending::cleanup_lname(name)
274
+ l.logf lname
275
+
276
+ BlackStack::Appending.find_persons(fname, lname, cname)
277
+ end
278
+
279
+ # Find a person in the indexes by its first name, last name and company name.
280
+ # Append all the information in the index row.
281
+ def self.find_persons(fname, lname, cname)
282
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
283
+ h = {
284
+ :matches => [],
285
+ :enlapsed_seconds => 0,
286
+ :files_processed => 0,
287
+ }
288
+ # cleaning up company name
289
+ l.logs "Cleaning up company name #{cname}... "
290
+ cname = BlackStack::Appending::cleanup_company(cname)
291
+ l.logf cname
292
+ # looking for a record that matches with first name, last name and company name
293
+ appends = []
294
+ enlapsed_seconds = 0
295
+ files_processed = 0
296
+ BlackStack::Appending.indexes.each { |i|
297
+ l.logs "Searching into #{i.name}... "
298
+ ret = i.find([cname, fname, lname], false, nil)
299
+ # add the name of the index in the last position of the match
300
+ ret[:matches].each { |m| m.unshift(i.name.to_s) }
301
+ # add matches to the list
302
+ h[:matches] += ret[:matches]
303
+ # sum the total files and the total enlapsed seconds
304
+ h[:enlapsed_seconds] += ret[:enlapsed_seconds]
305
+ h[:files_processed] += ret[:files_processed]
306
+ l.done
307
+ }
308
+ # update report
309
+ @@report = h
310
+ # return results
311
+ h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
312
+ end
313
+
314
+ # Find a company in the indexes by its first name, last name and company name.
315
+ # Append all the information in the index row.
316
+ def self.find_persons_by_company(cname)
317
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
318
+ h = {
319
+ :matches => [],
320
+ :enlapsed_seconds => 0,
321
+ :files_processed => 0,
322
+ }
323
+ # looking for a record that matches with first name, last name and company name
324
+ appends = []
325
+ enlapsed_seconds = 0
326
+ files_processed = 0
327
+ BlackStack::Appending.indexes.each { |i|
328
+ l.logs "Searching into #{i.name}... "
329
+ ret = i.find([cname], true, nil)
330
+ # add the name of the index in the last position of the match
331
+ ret[:matches].each { |m| m.unshift(i.name.to_s) }
332
+ # add matches to the list
333
+ h[:matches] += ret[:matches]
334
+ # sum the total files and the total enlapsed seconds
335
+ h[:enlapsed_seconds] += ret[:enlapsed_seconds]
336
+ h[:files_processed] += ret[:files_processed]
337
+ l.done
338
+ }
339
+ # update report
340
+ @@report = h
341
+ # return results
342
+ h[:matches].map { |m| BlackStack::Appending::Result.new(m) }
343
+ end
344
+
345
+ def self.find_verified_emails(fname, lname, cname)
346
+ l = BlackStack::Appending.logger || BlackStack::DummyLogger.new
347
+ emails = []
348
+ domains = []
349
+ verified_emails = []
350
+ # get lead emails from in the indexes
351
+ l.logs ("Searching index emails... ")
352
+ emails = BlackStack::Appending.find_persons(fname, lname, cname).map { |res|
353
+ res.emails
354
+ }.flatten.uniq.reject { |email|
355
+ email.to_s.empty?
356
+ }
357
+ l.done
358
+ # get company domains from the indexes
359
+ l.logs ("Searching index domains... ")
360
+ domains = BlackStack::Appending.find_persons_by_company(cname).map { |res|
361
+ res.company_domains
362
+ }.flatten.reject { |email|
363
+ email.to_s.empty?
364
+ }.map { |domain|
365
+ # normalize domain
366
+ domain.to_s.gsub('www.', '').downcase
367
+ }.uniq
368
+ l.done
369
+ # verify all the emails found in the indexes
370
+ l.logs ("Verifying index emails... ")
371
+ emails.each { |email|
372
+ l.logs "Verifying #{email}... "
373
+ domain = email.split('@').last
374
+ verified_emails << email if BlackStack::Appending.verify(email) && !BlackStack::Appending.catch_all?(domain)
375
+ l.done
376
+ }
377
+ l.done
378
+ # appending with domains found in the indexes
379
+ l.logs ("Appending with domains... ")
380
+ domains.each { |domain|
381
+ l.logs "Appending with #{domain}... "
382
+ verified_emails += BlackStack::Appending.append(fname, lname, domain)
383
+ l.done
384
+ }
385
+ l.done
386
+ # return
387
+ verified_emails.uniq
388
+ end
389
+
390
+ #
391
+ class Result
392
+ # array of values.
393
+ # first 3 values are index name, key and row-number.
394
+ attr_accessor :match
395
+
396
+ def initialize(a)
397
+ self.match = a
398
+ end
399
+
400
+ # From a given match (with the name of its index in the first position), get the value of a field by its name.
401
+ def value(field)
402
+ # get the index_name
403
+ index_name = match[0]
404
+ # get the index descriptor
405
+ index = BlackStack::CSVIndexer.indexes.select { |i| i.name == index_name }.first
406
+ # get position of the field into the hash descriptior
407
+ k = index.mapping.to_a.map { |m| m[0].to_s }.index(field.to_s)
408
+ # return nil if the field is not found
409
+ return nil if k.nil?
410
+ # get the field value
411
+ match[k+3].to_s
412
+ end
413
+
414
+ # Call value() method.
415
+ def val(field)
416
+ self.value(field)
417
+ end
418
+
419
+ # From a given match (with the name of its index in the first position), get the email addresses.
420
+ def emails()
421
+ keys = BlackStack::Appending.email_fields
422
+ ret = []
423
+ keys.each { |k|
424
+ v = self.value(k)
425
+ ret << v if v
426
+ }
427
+ ret
428
+ end
429
+
430
+ # From a given match (with the name of its index in the first position), get the phone numbers.
431
+ def phones()
432
+ keys = BlackStack::Appending.phone_fields
433
+ ret = []
434
+ keys.each { |k|
435
+ v = self.value(k)
436
+ ret << v if v
437
+ }
438
+ ret
439
+ end
440
+
441
+ # From a given match (with the name of its index in the first position), get the company domains.
442
+ def company_domains()
443
+ keys = BlackStack::Appending.company_domain_fields
444
+ ret = []
445
+ keys.each { |k|
446
+ v = self.value(k)
447
+ ret << v if v
448
+ }
449
+ ret
450
+ end
451
+
452
+ end # class Result
453
+
454
+ end # Appending
455
+ end # BlackStack
metadata CHANGED
@@ -1,15 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: appending
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '1.0'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Leandro Daniel Sardi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-19 00:00:00.000000000 Z
11
+ date: 2022-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 3.2.2
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 3.2.2
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 3.2.2
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 3.2.2
33
+ - !ruby/object:Gem::Dependency
34
+ name: email_verifier
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 0.1.0
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 0.1.0
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 0.1.0
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 0.1.0
13
53
  - !ruby/object:Gem::Dependency
14
54
  name: blackstack-core
15
55
  requirement: !ruby/object:Gem::Requirement
@@ -136,8 +176,8 @@ executables: []
136
176
  extensions: []
137
177
  extra_rdoc_files: []
138
178
  files:
139
- - appending.rb
140
- homepage: https://rubygems.org/gems/pampa
179
+ - lib/appending.rb
180
+ homepage: https://rubygems.org/gems/appending
141
181
  licenses:
142
182
  - MIT
143
183
  metadata: {}
data/appending.rb DELETED
@@ -1,141 +0,0 @@
1
- require 'csv'
2
- require 'email_verifier'
3
- require 'nokogiri'
4
-
5
- puts "Appending is in development. Please check back later."
6
-
7
- =begin
8
- module BlackStack
9
- module Appending
10
- # This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
11
- module Parser
12
- # parse search results pages from sales navigator, and save the company name and full name into a CSV file
13
- def self.parse_sales_navigator_result_pages(search_name, l=nil)
14
- # create logger if not passed
15
- l = BlackStack::DummyLogger.new(nil) if l.nil?
16
- # define output filename
17
- output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
18
- raise 'Output file already exists.' if File.exists?(output_file)
19
- output = File.open(output_file, 'w')
20
- # parse
21
- i = 0
22
- source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
23
- Dir.glob(source).each do |file|
24
- doc = Nokogiri::HTML(open(file))
25
- lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
26
- lis.each { |li|
27
- i += 1
28
- doc2 = Nokogiri::HTML(li.inner_html)
29
- # this is where to find the full name of the lead
30
- n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
31
- # this is where to find the name of the company, when it has a link to a linkedin company page
32
- n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
33
- # this is where to find the name of the company, when it has not a link to a linkedin company page
34
- company_name = nil
35
- if n2
36
- company_name = n2.text
37
- else
38
- n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
39
- if n2
40
- company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
41
- end
42
- end
43
- # add the information to the output file
44
- line = []
45
- line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
46
- line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
47
- l.logs "#{i.to_s}, #{line.join(',')}... "
48
- output.puts line.join(',')
49
- output.flush
50
- l.done
51
- }
52
- end
53
- # close output file
54
- output.close
55
- end # def self.parse_sales_navigator_result_pages(search_name)
56
- end # module Parser
57
-
58
- # return true if the domain get any random address as valid
59
- def self.catch_all?(domain)
60
- BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
61
- end
62
-
63
- # verify an email address using the AWS IP address of our website, wich is more reliable
64
- def self.verify(email)
65
- url = "https://connectionsphere.com/api1.0/emails/verify.json"
66
- params = {
67
- :email => email,
68
- }
69
- res = BlackStack::Netting::call_get(url, params)
70
- parsed = JSON.parse(res.body)
71
- parsed['status'] == 'success'
72
- end
73
-
74
- # verify an email address
75
- def self.append(fname, lname, domain)
76
- ret = []
77
- if !catch_all?(domain)
78
- EmailVerifier.config do |config|
79
- config.verifier_email = "leandro.sardi@expandedventure.com"
80
- end
81
- [
82
- "#{fname}@#{domain}",
83
- "#{lname}@#{domain}",
84
-
85
- "#{fname}.#{lname}@#{domain}",
86
- "#{lname}.#{fname}@#{domain}",
87
-
88
- "#{fname}#{lname}@#{domain}",
89
- "#{lname}#{fname}@#{domain}",
90
-
91
- "#{fname[0]}#{lname}@#{domain}",
92
- "#{fname[0]}.#{lname}@#{domain}",
93
- ].each { |email|
94
- ret << email.downcase if verify(email)
95
- }
96
- end
97
- ret
98
- end
99
-
100
- def self.cleanup_fname(name)
101
- return '' if name.nil?
102
- a = name.split(/[^a-zA-Z]/)
103
- a.size > 0 ? a[0] : ''
104
- end
105
-
106
- def self.cleanup_lname(name)
107
- return '' if name.nil?
108
- a = name.split(/[^a-zA-Z]/)
109
- a.size > 1 ? a[1] : ''
110
- end
111
-
112
- def self.cleanup_company(company)
113
- return '' if company.nil?
114
- ret = ''
115
- # stage 1: remove company-type suffixes
116
- company = company.split(/ at /).last
117
- company.gsub!(/LLC/, '')
118
- company.gsub!(/Inc/, '')
119
- company.strip! # remove leading and trailing spaces
120
- # stage 2: remove LinkedIn suffixes
121
- company.gsub!(/\(\d\d\d\d - Present\)/, '')
122
- company.strip! # remove leading and trailing spaces
123
- # stege 3: remove non-alphanumeric characters
124
- company.gsub!(/\.$/, '')
125
- company.gsub!(/\,$/, '')
126
- # stege 4: remove extra spaces
127
- company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
128
- company.strip! # remove leading and trailing spaces
129
- # stage 5: choose the first part of the company name
130
- company.split(' ').each { |word|
131
- ret += word + ' '
132
- #break if word.size >= 5 || ret.split(' ').size > 2
133
- break if ret.split(' ').size > 2
134
- }
135
- ret.strip!
136
- # return
137
- ret
138
- end
139
- end # Appending
140
- end # BlackStack
141
- =end