appending 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/appending.rb +145 -0
- metadata +163 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 39f9443f81936b25c1564601f25d53f2444b9d17fdf864b68fd918b01ce4c3c5
|
4
|
+
data.tar.gz: 965989073d0be533dd504eca8902775f9599fb094b18f173f8600e38a092b1e6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8040fcf14dcb3797b4241c676fd06b856a8fb4126fb7b495ad2eaf0de2409a89bb644bf5b7aa7ca2fd3dead6cbe34e9cd15fbc386d8038c7b2b4add517b3e5ae
|
7
|
+
data.tar.gz: ceba3742c7e0128a04f7026b1ef446bc39726dc4253d42511ca6ead0e350850f95f32b27e7f0dd78589dc172a1ab9480f162b23cc4f24c75dc7b6f1fc7bab218
|
data/appending.rb
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'email_verifier'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module BlackStack
|
6
|
+
module Appending
|
7
|
+
# This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
|
8
|
+
module Parser
|
9
|
+
# parse search results pages from sales navigator, and save the company name and full name into a CSV file
|
10
|
+
def self.parse_sales_navigator_result_pages(search_name, l=nil)
|
11
|
+
# create logger if not passed
|
12
|
+
l = BlackStack::DummyLogger.new(nil) if l.nil?
|
13
|
+
# define output filename
|
14
|
+
output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
|
15
|
+
raise 'Output file already exists.' if File.exists?(output_file)
|
16
|
+
output = File.open(output_file, 'w')
|
17
|
+
# parse
|
18
|
+
i = 0
|
19
|
+
source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
|
20
|
+
Dir.glob(source).each do |file|
|
21
|
+
doc = Nokogiri::HTML(open(file))
|
22
|
+
lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
|
23
|
+
lis.each { |li|
|
24
|
+
i += 1
|
25
|
+
doc2 = Nokogiri::HTML(li.inner_html)
|
26
|
+
# this is where to find the full name of the lead
|
27
|
+
n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
|
28
|
+
# this is where to find the name of the company, when it has a link to a linkedin company page
|
29
|
+
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
|
30
|
+
# this is where to find the name of the company, when it has not a link to a linkedin company page
|
31
|
+
company_name = nil
|
32
|
+
if n2
|
33
|
+
company_name = n2.text
|
34
|
+
else
|
35
|
+
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
|
36
|
+
if n2
|
37
|
+
company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
|
38
|
+
end
|
39
|
+
end
|
40
|
+
# add the information to the output file
|
41
|
+
line = []
|
42
|
+
line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
|
43
|
+
line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
|
44
|
+
l.logs "#{i.to_s}, #{line.join(',')}... "
|
45
|
+
output.puts line.join(',')
|
46
|
+
output.flush
|
47
|
+
l.done
|
48
|
+
}
|
49
|
+
end
|
50
|
+
# close output file
|
51
|
+
output.close
|
52
|
+
end # def self.parse_sales_navigator_result_pages(search_name)
|
53
|
+
end # module Parser
|
54
|
+
|
55
|
+
# return true if the domain get any random address as valid
|
56
|
+
def self.catch_all?(domain)
|
57
|
+
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
58
|
+
end
|
59
|
+
|
60
|
+
# verify an email address using the AWS IP address of our website, wich is more reliable
|
61
|
+
def self.verify(email)
|
62
|
+
url = "https://connectionsphere.com/api1.0/emails/verify.json"
|
63
|
+
params = {
|
64
|
+
:email => email,
|
65
|
+
}
|
66
|
+
res = BlackStack::Netting::call_get(url, params)
|
67
|
+
parsed = JSON.parse(res.body)
|
68
|
+
parsed['status'] == 'success'
|
69
|
+
=begin
|
70
|
+
EmailVerifier.config do |config|
|
71
|
+
config.verifier_email = "leandro.sardi@expandedventure.com"
|
72
|
+
end
|
73
|
+
res = EmailVerifier.check(email)
|
74
|
+
res
|
75
|
+
=end
|
76
|
+
end
|
77
|
+
|
78
|
+
# verify an email address
|
79
|
+
def self.append(fname, lname, domain)
|
80
|
+
ret = []
|
81
|
+
if !catch_all?(domain)
|
82
|
+
EmailVerifier.config do |config|
|
83
|
+
config.verifier_email = "leandro.sardi@expandedventure.com"
|
84
|
+
end
|
85
|
+
[
|
86
|
+
"#{fname}@#{domain}",
|
87
|
+
"#{lname}@#{domain}",
|
88
|
+
|
89
|
+
"#{fname}.#{lname}@#{domain}",
|
90
|
+
"#{lname}.#{fname}@#{domain}",
|
91
|
+
|
92
|
+
"#{fname}#{lname}@#{domain}",
|
93
|
+
"#{lname}#{fname}@#{domain}",
|
94
|
+
|
95
|
+
"#{fname[0]}#{lname}@#{domain}",
|
96
|
+
"#{fname[0]}.#{lname}@#{domain}",
|
97
|
+
].each { |email|
|
98
|
+
#binding.pry
|
99
|
+
ret << email.downcase if verify(email)
|
100
|
+
}
|
101
|
+
end
|
102
|
+
ret
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.cleanup_fname(name)
|
106
|
+
return '' if name.nil?
|
107
|
+
a = name.split(/[^a-zA-Z]/)
|
108
|
+
a.size > 0 ? a[0] : ''
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.cleanup_lname(name)
|
112
|
+
return '' if name.nil?
|
113
|
+
a = name.split(/[^a-zA-Z]/)
|
114
|
+
a.size > 1 ? a[1] : ''
|
115
|
+
end
|
116
|
+
|
117
|
+
def self.cleanup_company(company)
|
118
|
+
return '' if company.nil?
|
119
|
+
ret = ''
|
120
|
+
# stage 1: remove company-type suffixes
|
121
|
+
company = company.split(/ at /).last
|
122
|
+
company.gsub!(/LLC/, '')
|
123
|
+
company.gsub!(/Inc/, '')
|
124
|
+
company.strip! # remove leading and trailing spaces
|
125
|
+
# stage 2: remove LinkedIn suffixes
|
126
|
+
company.gsub!(/\(\d\d\d\d - Present\)/, '')
|
127
|
+
company.strip! # remove leading and trailing spaces
|
128
|
+
# stege 3: remove non-alphanumeric characters
|
129
|
+
company.gsub!(/\.$/, '')
|
130
|
+
company.gsub!(/\,$/, '')
|
131
|
+
# stege 4: remove extra spaces
|
132
|
+
company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
|
133
|
+
company.strip! # remove leading and trailing spaces
|
134
|
+
# stage 5: choose the first part of the company name
|
135
|
+
company.split(' ').each { |word|
|
136
|
+
ret += word + ' '
|
137
|
+
#break if word.size >= 5 || ret.split(' ').size > 2
|
138
|
+
break if ret.split(' ').size > 2
|
139
|
+
}
|
140
|
+
ret.strip!
|
141
|
+
# return
|
142
|
+
ret
|
143
|
+
end
|
144
|
+
end # Appending
|
145
|
+
end # BlackStack
|
metadata
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: appending
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leandro Daniel Sardi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: blackstack-core
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.2.3
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.2.3
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.2.3
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.2.3
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: blackstack-nodes
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 1.2.11
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.2.11
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.2.11
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.2.11
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: blackstack-deployer
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.2.24
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.2.24
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.2.24
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 1.2.24
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: simple_command_line_parser
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.1.2
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.1.2
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.1.2
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.1.2
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: simple_cloud_logging
|
95
|
+
requirement: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.2.2
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 1.2.2
|
103
|
+
type: :runtime
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.2.2
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 1.2.2
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: csv-indexer
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 1.0.2
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 1.0.2
|
123
|
+
type: :runtime
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: 1.0.2
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 1.0.2
|
133
|
+
description: Appending is a Ruby gem for data enrichment of people and companies.
|
134
|
+
email: leandro.sardi@expandedventure.com
|
135
|
+
executables: []
|
136
|
+
extensions: []
|
137
|
+
extra_rdoc_files: []
|
138
|
+
files:
|
139
|
+
- appending.rb
|
140
|
+
homepage: https://rubygems.org/gems/pampa
|
141
|
+
licenses:
|
142
|
+
- MIT
|
143
|
+
metadata: {}
|
144
|
+
post_install_message:
|
145
|
+
rdoc_options: []
|
146
|
+
require_paths:
|
147
|
+
- lib
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
|
+
requirements:
|
155
|
+
- - ">="
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
158
|
+
requirements: []
|
159
|
+
rubygems_version: 3.3.7
|
160
|
+
signing_key:
|
161
|
+
specification_version: 4
|
162
|
+
summary: Appending is a Ruby gem for data enrichment of people and companies.
|
163
|
+
test_files: []
|