appending 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/appending.rb +145 -0
  3. metadata +163 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 39f9443f81936b25c1564601f25d53f2444b9d17fdf864b68fd918b01ce4c3c5
4
+ data.tar.gz: 965989073d0be533dd504eca8902775f9599fb094b18f173f8600e38a092b1e6
5
+ SHA512:
6
+ metadata.gz: 8040fcf14dcb3797b4241c676fd06b856a8fb4126fb7b495ad2eaf0de2409a89bb644bf5b7aa7ca2fd3dead6cbe34e9cd15fbc386d8038c7b2b4add517b3e5ae
7
+ data.tar.gz: ceba3742c7e0128a04f7026b1ef446bc39726dc4253d42511ca6ead0e350850f95f32b27e7f0dd78589dc172a1ab9480f162b23cc4f24c75dc7b6f1fc7bab218
data/appending.rb ADDED
@@ -0,0 +1,145 @@
1
+ require 'csv'
2
+ require 'email_verifier'
3
+ require 'nokogiri'
4
+
5
+ module BlackStack
6
+ module Appending
7
+ # This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
8
+ module Parser
9
+ # parse search results pages from sales navigator, and save the company name and full name into a CSV file
10
+ def self.parse_sales_navigator_result_pages(search_name, l=nil)
11
+ # create logger if not passed
12
+ l = BlackStack::DummyLogger.new(nil) if l.nil?
13
+ # define output filename
14
+ output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
15
+ raise 'Output file already exists.' if File.exists?(output_file)
16
+ output = File.open(output_file, 'w')
17
+ # parse
18
+ i = 0
19
+ source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
20
+ Dir.glob(source).each do |file|
21
+ doc = Nokogiri::HTML(open(file))
22
+ lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
23
+ lis.each { |li|
24
+ i += 1
25
+ doc2 = Nokogiri::HTML(li.inner_html)
26
+ # this is where to find the full name of the lead
27
+ n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
28
+ # this is where to find the name of the company, when it has a link to a linkedin company page
29
+ n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
30
+ # this is where to find the name of the company, when it has not a link to a linkedin company page
31
+ company_name = nil
32
+ if n2
33
+ company_name = n2.text
34
+ else
35
+ n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
36
+ if n2
37
+ company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
38
+ end
39
+ end
40
+ # add the information to the output file
41
+ line = []
42
+ line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
43
+ line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
44
+ l.logs "#{i.to_s}, #{line.join(',')}... "
45
+ output.puts line.join(',')
46
+ output.flush
47
+ l.done
48
+ }
49
+ end
50
+ # close output file
51
+ output.close
52
+ end # def self.parse_sales_navigator_result_pages(search_name)
53
+ end # module Parser
54
+
55
+ # return true if the domain get any random address as valid
56
+ def self.catch_all?(domain)
57
+ BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
58
+ end
59
+
60
+ # verify an email address using the AWS IP address of our website, wich is more reliable
61
+ def self.verify(email)
62
+ url = "https://connectionsphere.com/api1.0/emails/verify.json"
63
+ params = {
64
+ :email => email,
65
+ }
66
+ res = BlackStack::Netting::call_get(url, params)
67
+ parsed = JSON.parse(res.body)
68
+ parsed['status'] == 'success'
69
+ =begin
70
+ EmailVerifier.config do |config|
71
+ config.verifier_email = "leandro.sardi@expandedventure.com"
72
+ end
73
+ res = EmailVerifier.check(email)
74
+ res
75
+ =end
76
+ end
77
+
78
+ # verify an email address
79
+ def self.append(fname, lname, domain)
80
+ ret = []
81
+ if !catch_all?(domain)
82
+ EmailVerifier.config do |config|
83
+ config.verifier_email = "leandro.sardi@expandedventure.com"
84
+ end
85
+ [
86
+ "#{fname}@#{domain}",
87
+ "#{lname}@#{domain}",
88
+
89
+ "#{fname}.#{lname}@#{domain}",
90
+ "#{lname}.#{fname}@#{domain}",
91
+
92
+ "#{fname}#{lname}@#{domain}",
93
+ "#{lname}#{fname}@#{domain}",
94
+
95
+ "#{fname[0]}#{lname}@#{domain}",
96
+ "#{fname[0]}.#{lname}@#{domain}",
97
+ ].each { |email|
98
+ #binding.pry
99
+ ret << email.downcase if verify(email)
100
+ }
101
+ end
102
+ ret
103
+ end
104
+
105
+ def self.cleanup_fname(name)
106
+ return '' if name.nil?
107
+ a = name.split(/[^a-zA-Z]/)
108
+ a.size > 0 ? a[0] : ''
109
+ end
110
+
111
+ def self.cleanup_lname(name)
112
+ return '' if name.nil?
113
+ a = name.split(/[^a-zA-Z]/)
114
+ a.size > 1 ? a[1] : ''
115
+ end
116
+
117
+ def self.cleanup_company(company)
118
+ return '' if company.nil?
119
+ ret = ''
120
+ # stage 1: remove company-type suffixes
121
+ company = company.split(/ at /).last
122
+ company.gsub!(/LLC/, '')
123
+ company.gsub!(/Inc/, '')
124
+ company.strip! # remove leading and trailing spaces
125
+ # stage 2: remove LinkedIn suffixes
126
+ company.gsub!(/\(\d\d\d\d - Present\)/, '')
127
+ company.strip! # remove leading and trailing spaces
128
+ # stege 3: remove non-alphanumeric characters
129
+ company.gsub!(/\.$/, '')
130
+ company.gsub!(/\,$/, '')
131
+ # stege 4: remove extra spaces
132
+ company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
133
+ company.strip! # remove leading and trailing spaces
134
+ # stage 5: choose the first part of the company name
135
+ company.split(' ').each { |word|
136
+ ret += word + ' '
137
+ #break if word.size >= 5 || ret.split(' ').size > 2
138
+ break if ret.split(' ').size > 2
139
+ }
140
+ ret.strip!
141
+ # return
142
+ ret
143
+ end
144
+ end # Appending
145
+ end # BlackStack
metadata ADDED
@@ -0,0 +1,163 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: appending
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.1'
5
+ platform: ruby
6
+ authors:
7
+ - Leandro Daniel Sardi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-12-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: blackstack-core
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.2.3
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.2.3
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 1.2.3
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.2.3
33
+ - !ruby/object:Gem::Dependency
34
+ name: blackstack-nodes
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 1.2.11
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.11
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.11
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.11
53
+ - !ruby/object:Gem::Dependency
54
+ name: blackstack-deployer
55
+ requirement: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - "~>"
58
+ - !ruby/object:Gem::Version
59
+ version: 1.2.24
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: 1.2.24
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 1.2.24
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: 1.2.24
73
+ - !ruby/object:Gem::Dependency
74
+ name: simple_command_line_parser
75
+ requirement: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - "~>"
78
+ - !ruby/object:Gem::Version
79
+ version: 1.1.2
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: 1.1.2
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 1.1.2
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: 1.1.2
93
+ - !ruby/object:Gem::Dependency
94
+ name: simple_cloud_logging
95
+ requirement: !ruby/object:Gem::Requirement
96
+ requirements:
97
+ - - "~>"
98
+ - !ruby/object:Gem::Version
99
+ version: 1.2.2
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: 1.2.2
103
+ type: :runtime
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: 1.2.2
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: 1.2.2
113
+ - !ruby/object:Gem::Dependency
114
+ name: csv-indexer
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - "~>"
118
+ - !ruby/object:Gem::Version
119
+ version: 1.0.2
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: 1.0.2
123
+ type: :runtime
124
+ prerelease: false
125
+ version_requirements: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - "~>"
128
+ - !ruby/object:Gem::Version
129
+ version: 1.0.2
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: 1.0.2
133
+ description: Appending is a Ruby gem for data enrichment of people and companies.
134
+ email: leandro.sardi@expandedventure.com
135
+ executables: []
136
+ extensions: []
137
+ extra_rdoc_files: []
138
+ files:
139
+ - appending.rb
140
+ homepage: https://rubygems.org/gems/pampa
141
+ licenses:
142
+ - MIT
143
+ metadata: {}
144
+ post_install_message:
145
+ rdoc_options: []
146
+ require_paths:
147
+ - lib
148
+ required_ruby_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ required_rubygems_version: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ">="
156
+ - !ruby/object:Gem::Version
157
+ version: '0'
158
+ requirements: []
159
+ rubygems_version: 3.3.7
160
+ signing_key:
161
+ specification_version: 4
162
+ summary: Appending is a Ruby gem for data enrichment of people and companies.
163
+ test_files: []