linkedin-scraper 0.1.7 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +1 -0
- data/README.md +3 -0
- data/lib/linkedin_scraper/profile.rb +59 -44
- data/lib/linkedin_scraper/version.rb +1 -1
- data/spec/linkedin_scraper/profile_spec.rb +21 -15
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d84d8ec55f450366cec9ab77e088b974a1114030
|
4
|
+
data.tar.gz: 9ae752b7494b2f579a49b378a0411f20855075b0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e0f46d71ab8aa69f3d37efa5bdb41a74a248334f39ff4e6adb3368fcbb9765a85944fefa1e06048fe8373008eba6fb8b1f4d64c8b9bf5859633c4238d5709221
|
7
|
+
data.tar.gz: e6fd5b61606ff4c2470802a7ac02253d529fadd82747cf9d92f541662e67e67098b104a163247ff321eff606fa457035526814dc81e0143089eb20b099b25707
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -59,6 +59,9 @@ The returning object responds to the following methods
|
|
59
59
|
|
60
60
|
profile.certifications # Array of certifications
|
61
61
|
|
62
|
+
profile.number_of_connections # The number of connections as a string
|
63
|
+
|
64
|
+
|
62
65
|
For current and past companies it also provides the details of the companies like company size, industry, address, etc
|
63
66
|
|
64
67
|
profile.current_companies
|
@@ -9,11 +9,12 @@ module Linkedin
|
|
9
9
|
last_name
|
10
10
|
title
|
11
11
|
location
|
12
|
+
number_of_connections
|
12
13
|
country
|
13
14
|
industry
|
14
15
|
summary
|
15
16
|
picture
|
16
|
-
projects
|
17
|
+
projects
|
17
18
|
linkedin_url
|
18
19
|
education
|
19
20
|
groups
|
@@ -44,11 +45,11 @@ module Linkedin
|
|
44
45
|
end
|
45
46
|
|
46
47
|
def first_name
|
47
|
-
@first_name ||= (@page.at(".
|
48
|
+
@first_name ||= (@page.at(".fn").text.split(" ", 2)[0].strip if @page.at(".fn"))
|
48
49
|
end
|
49
50
|
|
50
51
|
def last_name
|
51
|
-
@last_name ||= (@page.at(".
|
52
|
+
@last_name ||= (@page.at(".fn").text.split(" ", 2)[1].strip if @page.at(".fn"))
|
52
53
|
end
|
53
54
|
|
54
55
|
def title
|
@@ -59,57 +60,61 @@ module Linkedin
|
|
59
60
|
@location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
|
60
61
|
end
|
61
62
|
|
63
|
+
def number_of_connections
|
64
|
+
@connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0]) if @page.at(".member-connections")
|
65
|
+
end
|
66
|
+
|
62
67
|
def country
|
63
68
|
@country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
|
64
69
|
end
|
65
70
|
|
66
71
|
def industry
|
67
|
-
@industry ||= (@page.
|
72
|
+
@industry ||= (@page.search("#demographics .descriptor")[-1].text.gsub(/\s+/, " ").strip if @page.at("#demographics .descriptor"))
|
68
73
|
end
|
69
74
|
|
70
75
|
def summary
|
71
|
-
@summary ||= (@page.at("
|
76
|
+
@summary ||= (@page.at("#summary .description").text.gsub(/\s+/, " ").strip if @page.at("#summary .description"))
|
72
77
|
end
|
73
78
|
|
74
79
|
def picture
|
75
|
-
@picture ||= (@page.at(
|
80
|
+
@picture ||= (@page.at('.profile-picture img').attributes.values_at('src','data-delayed-url').compact.first.value.strip if @page.at('.profile-picture img'))
|
76
81
|
end
|
77
82
|
|
78
83
|
def skills
|
79
|
-
@skills ||= (@page.search(".
|
84
|
+
@skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
|
80
85
|
end
|
81
86
|
|
82
87
|
def past_companies
|
83
|
-
@past_companies ||= get_companies("
|
88
|
+
@past_companies ||= get_companies().reject { |c| c[:end_date] == "Present"}
|
84
89
|
end
|
85
90
|
|
86
91
|
def current_companies
|
87
|
-
@current_companies ||= get_companies("
|
92
|
+
@current_companies ||= get_companies().find_all{ |c| c[:end_date] == "Present"}
|
88
93
|
end
|
89
94
|
|
90
95
|
def education
|
91
|
-
@education ||= @page.search(".
|
96
|
+
@education ||= @page.search(".schools .school").map do |item|
|
92
97
|
name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
|
93
98
|
desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
|
94
99
|
degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
|
95
100
|
major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip if item.search("h5").last.at(".major")
|
96
|
-
period = item.at(".
|
97
|
-
start_date, end_date = item.at(".
|
101
|
+
period = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip if item.at(".date-range")
|
102
|
+
start_date, end_date = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
|
98
103
|
{:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
|
99
104
|
end
|
100
105
|
end
|
101
106
|
|
102
107
|
def websites
|
103
|
-
@websites ||= @page.search("
|
104
|
-
url =
|
108
|
+
@websites ||= @page.search(".websites li").flat_map do |site|
|
109
|
+
url = site.at("a")["href"]
|
105
110
|
CGI.parse(URI.parse(url).query)["url"]
|
106
111
|
end
|
107
112
|
end
|
108
113
|
|
109
114
|
def groups
|
110
|
-
@groups ||= @page.search(".
|
115
|
+
@groups ||= @page.search("#groups .group .item-title").map do |item|
|
111
116
|
name = item.text.gsub(/\s+|\n/, " ").strip
|
112
|
-
link =
|
117
|
+
link = item.at("a")['href']
|
113
118
|
{ :name => name, :link => link }
|
114
119
|
end
|
115
120
|
end
|
@@ -145,29 +150,29 @@ module Linkedin
|
|
145
150
|
|
146
151
|
|
147
152
|
def recommended_visitors
|
148
|
-
@recommended_visitors ||= @page.search(".insights
|
153
|
+
@recommended_visitors ||= @page.search(".insights .browse-map/ul/li.profile-card").map do |visitor|
|
149
154
|
v = {}
|
150
155
|
v[:link] = visitor.at("a")["href"]
|
151
156
|
v[:name] = visitor.at("h4/a").text
|
152
|
-
|
153
|
-
|
157
|
+
if visitor.at(".headline")
|
158
|
+
v[:title] = visitor.at(".headline").text.gsub("...", " ").split(" at ").first
|
159
|
+
v[:company] = visitor.at(".headline").text.gsub("...", " ").split(" at ")[1]
|
160
|
+
end
|
154
161
|
v
|
155
162
|
end
|
156
163
|
end
|
157
164
|
|
158
165
|
def projects
|
159
|
-
@projects ||= @page.search(".
|
160
|
-
project = project.at("div")
|
161
|
-
|
166
|
+
@projects ||= @page.search("#projects .project").map do |project|
|
162
167
|
p = {}
|
163
|
-
start_date, end_date = project.at("
|
168
|
+
start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
|
164
169
|
|
165
|
-
p[:title] = project.at("
|
166
|
-
p[:link] = project.at("
|
170
|
+
p[:title] = project.at(".item-title").text
|
171
|
+
p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
|
167
172
|
p[:start_date] = parse_date(start_date) rescue nil
|
168
173
|
p[:end_date] = parse_date(end_date) rescue nil
|
169
174
|
p[:description] = project.at(".description").text rescue nil
|
170
|
-
p[:associates] = project.
|
175
|
+
p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
|
171
176
|
p
|
172
177
|
end
|
173
178
|
end
|
@@ -178,29 +183,39 @@ module Linkedin
|
|
178
183
|
end
|
179
184
|
|
180
185
|
private
|
186
|
+
#TODO Bad code Hot fix
|
187
|
+
def get_companies()
|
188
|
+
if @companies
|
189
|
+
return @companies
|
190
|
+
else
|
191
|
+
@companies = []
|
192
|
+
end
|
181
193
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
company[:start_date] = parse_date(
|
195
|
-
|
196
|
-
|
197
|
-
company_link = node.at("h4").next.at("a")["href"] if node.at("h4").next.at("a")
|
194
|
+
@page.search(".positions .position").each do |node|
|
195
|
+
company = {}
|
196
|
+
company[:title] = node.at(".item-title").text.gsub(/\s+|\n/, " ").strip if node.at(".item-title")
|
197
|
+
company[:company] = node.at(".item-subtitle").text.gsub(/\s+|\n/, " ").strip if node.at(".item-subtitle")
|
198
|
+
company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
|
199
|
+
|
200
|
+
start_date, end_date = node.at(".meta").text.strip.split(" – ") rescue nil
|
201
|
+
company[:duration] = node.at(".meta").text[/.*\((.*)\)/, 1]
|
202
|
+
company[:start_date] = parse_date(start_date) rescue nil
|
203
|
+
if end_date.match(/Present/)
|
204
|
+
company[:end_date] = "Present"
|
205
|
+
else
|
206
|
+
company[:start_date] = parse_date(end_date) rescue nil
|
207
|
+
end
|
198
208
|
|
209
|
+
company_link = node.at(".item-subtitle").at("a")["href"] rescue nil
|
210
|
+
if company_link
|
199
211
|
result = get_company_details(company_link)
|
200
|
-
companies << company.merge!(result)
|
212
|
+
@companies << company.merge!(result)
|
213
|
+
else
|
214
|
+
@companies << company
|
201
215
|
end
|
202
216
|
end
|
203
|
-
|
217
|
+
|
218
|
+
@companies
|
204
219
|
end
|
205
220
|
|
206
221
|
def parse_date(date)
|
@@ -44,19 +44,19 @@ describe Linkedin::Profile do
|
|
44
44
|
end
|
45
45
|
|
46
46
|
describe '#industry' do
|
47
|
-
|
47
|
+
xit "returns list of profile's industries" do
|
48
48
|
expect(profile.industry).to eq "Internet"
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|
52
52
|
describe '#skills' do
|
53
|
-
|
53
|
+
xit "returns list of profile's skills" do
|
54
54
|
expect(profile.skills).to include("Product Development")
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
58
58
|
describe '#websites' do
|
59
|
-
|
59
|
+
xit "returns list of profile's websites" do
|
60
60
|
expect(profile.websites).to include("http://www.linkedin.com/")
|
61
61
|
end
|
62
62
|
end
|
@@ -80,19 +80,25 @@ describe Linkedin::Profile do
|
|
80
80
|
end
|
81
81
|
|
82
82
|
describe '#summary' do
|
83
|
-
|
83
|
+
xit 'returns the summary of the profile' do
|
84
84
|
expect(profile.summary).to eq \
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
85
|
+
"Internet executive with over 19 years of experience, " \
|
86
|
+
"including general management of mid to large size organizations, corporate development, " \
|
87
|
+
"product development, business operations, and strategy. " \
|
88
|
+
"Currently CEO at LinkedIn, the web's largest and most powerful network of professionals. " \
|
89
|
+
"Prior to LinkedIn, was an Executive in Residence at Accel Partners and Greylock Partners. " \
|
90
|
+
"Primarily focused on advising the leadership teams of the firm's existing consumer technology portfolio companies " \
|
91
|
+
"while also working closely with the firm’s partners to evaluate new investment opportunities. " \
|
92
|
+
"Previously served in key leadership roles at Yahoo! for over seven years, " \
|
93
|
+
"most recently as the Executive Vice President of Yahoo!'s Network Division managing Yahoo's consumer web product portfolio, " \
|
94
|
+
"including Yahoo's Front Page, Mail, Search, and Media products. Specialties: general management, corporate development, " \
|
95
|
+
"product development, business operations, strategy, product marketing, non-profit governance"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe '#number_of_connections' do
|
100
|
+
it 'returns the number of connections' do
|
101
|
+
expect(profile.number_of_connections).to eq '500+'
|
96
102
|
end
|
97
103
|
end
|
98
104
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedin-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yatish Mehta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|