appending 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/appending.rb +145 -0
- metadata +163 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 39f9443f81936b25c1564601f25d53f2444b9d17fdf864b68fd918b01ce4c3c5
|
4
|
+
data.tar.gz: 965989073d0be533dd504eca8902775f9599fb094b18f173f8600e38a092b1e6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8040fcf14dcb3797b4241c676fd06b856a8fb4126fb7b495ad2eaf0de2409a89bb644bf5b7aa7ca2fd3dead6cbe34e9cd15fbc386d8038c7b2b4add517b3e5ae
|
7
|
+
data.tar.gz: ceba3742c7e0128a04f7026b1ef446bc39726dc4253d42511ca6ead0e350850f95f32b27e7f0dd78589dc172a1ab9480f162b23cc4f24c75dc7b6f1fc7bab218
|
data/appending.rb
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'email_verifier'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module BlackStack
|
6
|
+
module Appending
|
7
|
+
# This class is used to parse the HTML files downloaded from Sales Navigator and other sources.
|
8
|
+
module Parser
|
9
|
+
# parse search results pages from sales navigator, and save the company name and full name into a CSV file
|
10
|
+
def self.parse_sales_navigator_result_pages(search_name, l=nil)
|
11
|
+
# create logger if not passed
|
12
|
+
l = BlackStack::DummyLogger.new(nil) if l.nil?
|
13
|
+
# define output filename
|
14
|
+
output_file = "#{DATA_PATH}/searches/#{search_name}.csv" # the output file
|
15
|
+
raise 'Output file already exists.' if File.exists?(output_file)
|
16
|
+
output = File.open(output_file, 'w')
|
17
|
+
# parse
|
18
|
+
i = 0
|
19
|
+
source = "#{DATA_PATH}/searches/#{search_name}/*.html" # the files to be imported
|
20
|
+
Dir.glob(source).each do |file|
|
21
|
+
doc = Nokogiri::HTML(open(file))
|
22
|
+
lis = doc.xpath('//li[contains(@class, "artdeco-list__item")]')
|
23
|
+
lis.each { |li|
|
24
|
+
i += 1
|
25
|
+
doc2 = Nokogiri::HTML(li.inner_html)
|
26
|
+
# this is where to find the full name of the lead
|
27
|
+
n1 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__title")]/a/span').first
|
28
|
+
# this is where to find the name of the company, when it has a link to a linkedin company page
|
29
|
+
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]/a').first
|
30
|
+
# this is where to find the name of the company, when it has not a link to a linkedin company page
|
31
|
+
company_name = nil
|
32
|
+
if n2
|
33
|
+
company_name = n2.text
|
34
|
+
else
|
35
|
+
n2 = doc2.xpath('//div[contains(@class,"artdeco-entity-lockup__subtitle")]').first
|
36
|
+
if n2
|
37
|
+
company_name = n2.text.split("\n").reject { |s| s.strip.empty? }.last.strip
|
38
|
+
end
|
39
|
+
end
|
40
|
+
# add the information to the output file
|
41
|
+
line = []
|
42
|
+
line << "\"#{n1.text.strip.gsub('"', '')}\"" if n1
|
43
|
+
line << "\"#{company_name.strip.gsub('"', '')}\"" if company_name
|
44
|
+
l.logs "#{i.to_s}, #{line.join(',')}... "
|
45
|
+
output.puts line.join(',')
|
46
|
+
output.flush
|
47
|
+
l.done
|
48
|
+
}
|
49
|
+
end
|
50
|
+
# close output file
|
51
|
+
output.close
|
52
|
+
end # def self.parse_sales_navigator_result_pages(search_name)
|
53
|
+
end # module Parser
|
54
|
+
|
55
|
+
# return true if the domain get any random address as valid
|
56
|
+
def self.catch_all?(domain)
|
57
|
+
BlackStack::Appending.verify("008e77980535470e848a4ca859a83db0@#{domain}")
|
58
|
+
end
|
59
|
+
|
60
|
+
# verify an email address using the AWS IP address of our website, wich is more reliable
|
61
|
+
def self.verify(email)
|
62
|
+
url = "https://connectionsphere.com/api1.0/emails/verify.json"
|
63
|
+
params = {
|
64
|
+
:email => email,
|
65
|
+
}
|
66
|
+
res = BlackStack::Netting::call_get(url, params)
|
67
|
+
parsed = JSON.parse(res.body)
|
68
|
+
parsed['status'] == 'success'
|
69
|
+
=begin
|
70
|
+
EmailVerifier.config do |config|
|
71
|
+
config.verifier_email = "leandro.sardi@expandedventure.com"
|
72
|
+
end
|
73
|
+
res = EmailVerifier.check(email)
|
74
|
+
res
|
75
|
+
=end
|
76
|
+
end
|
77
|
+
|
78
|
+
# verify an email address
|
79
|
+
def self.append(fname, lname, domain)
|
80
|
+
ret = []
|
81
|
+
if !catch_all?(domain)
|
82
|
+
EmailVerifier.config do |config|
|
83
|
+
config.verifier_email = "leandro.sardi@expandedventure.com"
|
84
|
+
end
|
85
|
+
[
|
86
|
+
"#{fname}@#{domain}",
|
87
|
+
"#{lname}@#{domain}",
|
88
|
+
|
89
|
+
"#{fname}.#{lname}@#{domain}",
|
90
|
+
"#{lname}.#{fname}@#{domain}",
|
91
|
+
|
92
|
+
"#{fname}#{lname}@#{domain}",
|
93
|
+
"#{lname}#{fname}@#{domain}",
|
94
|
+
|
95
|
+
"#{fname[0]}#{lname}@#{domain}",
|
96
|
+
"#{fname[0]}.#{lname}@#{domain}",
|
97
|
+
].each { |email|
|
98
|
+
#binding.pry
|
99
|
+
ret << email.downcase if verify(email)
|
100
|
+
}
|
101
|
+
end
|
102
|
+
ret
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.cleanup_fname(name)
|
106
|
+
return '' if name.nil?
|
107
|
+
a = name.split(/[^a-zA-Z]/)
|
108
|
+
a.size > 0 ? a[0] : ''
|
109
|
+
end
|
110
|
+
|
111
|
+
def self.cleanup_lname(name)
|
112
|
+
return '' if name.nil?
|
113
|
+
a = name.split(/[^a-zA-Z]/)
|
114
|
+
a.size > 1 ? a[1] : ''
|
115
|
+
end
|
116
|
+
|
117
|
+
def self.cleanup_company(company)
|
118
|
+
return '' if company.nil?
|
119
|
+
ret = ''
|
120
|
+
# stage 1: remove company-type suffixes
|
121
|
+
company = company.split(/ at /).last
|
122
|
+
company.gsub!(/LLC/, '')
|
123
|
+
company.gsub!(/Inc/, '')
|
124
|
+
company.strip! # remove leading and trailing spaces
|
125
|
+
# stage 2: remove LinkedIn suffixes
|
126
|
+
company.gsub!(/\(\d\d\d\d - Present\)/, '')
|
127
|
+
company.strip! # remove leading and trailing spaces
|
128
|
+
# stege 3: remove non-alphanumeric characters
|
129
|
+
company.gsub!(/\.$/, '')
|
130
|
+
company.gsub!(/\,$/, '')
|
131
|
+
# stege 4: remove extra spaces
|
132
|
+
company.gsub!(/[^a-zA-Z0-9,\.\-\s]/, '') # remove extra spaces
|
133
|
+
company.strip! # remove leading and trailing spaces
|
134
|
+
# stage 5: choose the first part of the company name
|
135
|
+
company.split(' ').each { |word|
|
136
|
+
ret += word + ' '
|
137
|
+
#break if word.size >= 5 || ret.split(' ').size > 2
|
138
|
+
break if ret.split(' ').size > 2
|
139
|
+
}
|
140
|
+
ret.strip!
|
141
|
+
# return
|
142
|
+
ret
|
143
|
+
end
|
144
|
+
end # Appending
|
145
|
+
end # BlackStack
|
metadata
ADDED
@@ -0,0 +1,163 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: appending
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: '0.1'
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leandro Daniel Sardi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: blackstack-core
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.2.3
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.2.3
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.2.3
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.2.3
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: blackstack-nodes
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 1.2.11
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.2.11
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.2.11
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.2.11
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: blackstack-deployer
|
55
|
+
requirement: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - "~>"
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.2.24
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.2.24
|
63
|
+
type: :runtime
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - "~>"
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: 1.2.24
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: 1.2.24
|
73
|
+
- !ruby/object:Gem::Dependency
|
74
|
+
name: simple_command_line_parser
|
75
|
+
requirement: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - "~>"
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 1.1.2
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 1.1.2
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 1.1.2
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: 1.1.2
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: simple_cloud_logging
|
95
|
+
requirement: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - "~>"
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 1.2.2
|
100
|
+
- - ">="
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: 1.2.2
|
103
|
+
type: :runtime
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.2.2
|
110
|
+
- - ">="
|
111
|
+
- !ruby/object:Gem::Version
|
112
|
+
version: 1.2.2
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: csv-indexer
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - "~>"
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: 1.0.2
|
120
|
+
- - ">="
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 1.0.2
|
123
|
+
type: :runtime
|
124
|
+
prerelease: false
|
125
|
+
version_requirements: !ruby/object:Gem::Requirement
|
126
|
+
requirements:
|
127
|
+
- - "~>"
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: 1.0.2
|
130
|
+
- - ">="
|
131
|
+
- !ruby/object:Gem::Version
|
132
|
+
version: 1.0.2
|
133
|
+
description: Appending is a Ruby gem for data enrichment of people and companies.
|
134
|
+
email: leandro.sardi@expandedventure.com
|
135
|
+
executables: []
|
136
|
+
extensions: []
|
137
|
+
extra_rdoc_files: []
|
138
|
+
files:
|
139
|
+
- appending.rb
|
140
|
+
homepage: https://rubygems.org/gems/pampa
|
141
|
+
licenses:
|
142
|
+
- MIT
|
143
|
+
metadata: {}
|
144
|
+
post_install_message:
|
145
|
+
rdoc_options: []
|
146
|
+
require_paths:
|
147
|
+
- lib
|
148
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - ">="
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '0'
|
153
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
154
|
+
requirements:
|
155
|
+
- - ">="
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: '0'
|
158
|
+
requirements: []
|
159
|
+
rubygems_version: 3.3.7
|
160
|
+
signing_key:
|
161
|
+
specification_version: 4
|
162
|
+
summary: Appending is a Ruby gem for data enrichment of people and companies.
|
163
|
+
test_files: []
|