linkedin-scraper 0.1.7 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2030446ef750ed1a95c9818d63d0cf97a0cbd60a
4
- data.tar.gz: 1639e466dadbee02704a853fe13f0ae10bb42f94
3
+ metadata.gz: d84d8ec55f450366cec9ab77e088b974a1114030
4
+ data.tar.gz: 9ae752b7494b2f579a49b378a0411f20855075b0
5
5
  SHA512:
6
- metadata.gz: dd080bec613c77eb50a439ccd3628932ba0b9ed0ddf7b5e03781036d89722f423e1c439d8b9a08e49e10cf50744926cbae05cc5218ce71a6b923ff793b118b93
7
- data.tar.gz: b796724e23fb34f49c3f1012c97c9bd2a0d38372d652699095a1e0592f918a9778caaf9ecacce51217289a6500a49d2319bdf1a8e5ab54fe4d0ccc5e9afc64b3
6
+ metadata.gz: e0f46d71ab8aa69f3d37efa5bdb41a74a248334f39ff4e6adb3368fcbb9765a85944fefa1e06048fe8373008eba6fb8b1f4d64c8b9bf5859633c4238d5709221
7
+ data.tar.gz: e6fd5b61606ff4c2470802a7ac02253d529fadd82747cf9d92f541662e67e67098b104a163247ff321eff606fa457035526814dc81e0143089eb20b099b25707
data/.travis.yml CHANGED
@@ -5,3 +5,4 @@ rvm:
5
5
  - 1.9.3
6
6
  - jruby-19mode
7
7
  - 2.1.1
8
+ - 2.2.3
data/README.md CHANGED
@@ -59,6 +59,9 @@ The returning object responds to the following methods
59
59
 
60
60
  profile.certifications # Array of certifications
61
61
 
62
+ profile.number_of_connections # The number of connections as a string
63
+
64
+
62
65
  For current and past companies it also provides the details of the companies like company size, industry, address, etc
63
66
 
64
67
  profile.current_companies
@@ -9,11 +9,12 @@ module Linkedin
9
9
  last_name
10
10
  title
11
11
  location
12
+ number_of_connections
12
13
  country
13
14
  industry
14
15
  summary
15
16
  picture
16
- projects
17
+ projects
17
18
  linkedin_url
18
19
  education
19
20
  groups
@@ -44,11 +45,11 @@ module Linkedin
44
45
  end
45
46
 
46
47
  def first_name
47
- @first_name ||= (@page.at(".full-name").text.split(" ", 2)[0].strip if @page.at(".full-name"))
48
+ @first_name ||= (@page.at(".fn").text.split(" ", 2)[0].strip if @page.at(".fn"))
48
49
  end
49
50
 
50
51
  def last_name
51
- @last_name ||= (@page.at(".full-name").text.split(" ", 2)[1].strip if @page.at(".full-name"))
52
+ @last_name ||= (@page.at(".fn").text.split(" ", 2)[1].strip if @page.at(".fn"))
52
53
  end
53
54
 
54
55
  def title
@@ -59,57 +60,61 @@ module Linkedin
59
60
  @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
60
61
  end
61
62
 
63
+ def number_of_connections
64
+ @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0]) if @page.at(".member-connections")
65
+ end
66
+
62
67
  def country
63
68
  @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
64
69
  end
65
70
 
66
71
  def industry
67
- @industry ||= (@page.at(".industry").text.gsub(/\s+/, " ").strip if @page.at(".industry"))
72
+ @industry ||= (@page.search("#demographics .descriptor")[-1].text.gsub(/\s+/, " ").strip if @page.at("#demographics .descriptor"))
68
73
  end
69
74
 
70
75
  def summary
71
- @summary ||= (@page.at(".summary .description").text.gsub(/\s+/, " ").strip if @page.at(".summary .description"))
76
+ @summary ||= (@page.at("#summary .description").text.gsub(/\s+/, " ").strip if @page.at("#summary .description"))
72
77
  end
73
78
 
74
79
  def picture
75
- @picture ||= (@page.at(".profile-picture img").attributes["src"].value.strip if @page.at(".profile-picture img"))
80
+ @picture ||= (@page.at('.profile-picture img').attributes.values_at('src','data-delayed-url').compact.first.value.strip if @page.at('.profile-picture img'))
76
81
  end
77
82
 
78
83
  def skills
79
- @skills ||= (@page.search(".skill-pill .endorse-item-name-text").map { |skill| skill.text.strip if skill.text } rescue nil)
84
+ @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
80
85
  end
81
86
 
82
87
  def past_companies
83
- @past_companies ||= get_companies("past")
88
+ @past_companies ||= get_companies().reject { |c| c[:end_date] == "Present"}
84
89
  end
85
90
 
86
91
  def current_companies
87
- @current_companies ||= get_companies("current")
92
+ @current_companies ||= get_companies().find_all{ |c| c[:end_date] == "Present"}
88
93
  end
89
94
 
90
95
  def education
91
- @education ||= @page.search(".background-education .education").map do |item|
96
+ @education ||= @page.search(".schools .school").map do |item|
92
97
  name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
93
98
  desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
94
99
  degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
95
100
  major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip if item.search("h5").last.at(".major")
96
- period = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip if item.at(".education-date")
97
- start_date, end_date = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
101
+ period = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip if item.at(".date-range")
102
+ start_date, end_date = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
98
103
  {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
99
104
  end
100
105
  end
101
106
 
102
107
  def websites
103
- @websites ||= @page.search("#overview-summary-websites").flat_map do |site|
104
- url = "http://www.linkedin.com#{site.at("a")["href"]}"
108
+ @websites ||= @page.search(".websites li").flat_map do |site|
109
+ url = site.at("a")["href"]
105
110
  CGI.parse(URI.parse(url).query)["url"]
106
111
  end
107
112
  end
108
113
 
109
114
  def groups
110
- @groups ||= @page.search(".groups-name").map do |item|
115
+ @groups ||= @page.search("#groups .group .item-title").map do |item|
111
116
  name = item.text.gsub(/\s+|\n/, " ").strip
112
- link = "http://www.linkedin.com#{item.at("a")["href"]}"
117
+ link = item.at("a")['href']
113
118
  { :name => name, :link => link }
114
119
  end
115
120
  end
@@ -145,29 +150,29 @@ module Linkedin
145
150
 
146
151
 
147
152
  def recommended_visitors
148
- @recommended_visitors ||= @page.search(".insights-browse-map/ul/li").map do |visitor|
153
+ @recommended_visitors ||= @page.search(".insights .browse-map/ul/li.profile-card").map do |visitor|
149
154
  v = {}
150
155
  v[:link] = visitor.at("a")["href"]
151
156
  v[:name] = visitor.at("h4/a").text
152
- v[:title] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ").first
153
- v[:company] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ")[1]
157
+ if visitor.at(".headline")
158
+ v[:title] = visitor.at(".headline").text.gsub("...", " ").split(" at ").first
159
+ v[:company] = visitor.at(".headline").text.gsub("...", " ").split(" at ")[1]
160
+ end
154
161
  v
155
162
  end
156
163
  end
157
164
 
158
165
  def projects
159
- @projects ||= @page.search(".background-projects/div").map do |project|
160
- project = project.at("div")
161
-
166
+ @projects ||= @page.search("#projects .project").map do |project|
162
167
  p = {}
163
- start_date, end_date = project.at(".projects-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
168
+ start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
164
169
 
165
- p[:title] = project.at("hgroup/h4 span:first-of-type").text rescue nil
166
- p[:link] = project.at("hgroup/h4 a:first-of-type")['href'] rescue nil
170
+ p[:title] = project.at(".item-title").text
171
+ p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
167
172
  p[:start_date] = parse_date(start_date) rescue nil
168
173
  p[:end_date] = parse_date(end_date) rescue nil
169
174
  p[:description] = project.at(".description").text rescue nil
170
- p[:associates] = project.at(".associated-list ul").children.map{ |c| c.at("a").text } rescue nil
175
+ p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
171
176
  p
172
177
  end
173
178
  end
@@ -178,29 +183,39 @@ module Linkedin
178
183
  end
179
184
 
180
185
  private
186
+ #TODO Bad code Hot fix
187
+ def get_companies()
188
+ if @companies
189
+ return @companies
190
+ else
191
+ @companies = []
192
+ end
181
193
 
182
- def get_companies(type)
183
- companies = []
184
- if @page.search(".background-experience .#{type}-position").first
185
- @page.search(".background-experience .#{type}-position").each do |node|
186
-
187
- company = {}
188
- company[:title] = node.at("h4").text.gsub(/\s+|\n/, " ").strip if node.at("h4")
189
- company[:company] = node.at("h4").next.text.gsub(/\s+|\n/, " ").strip if node.at("h4").next
190
- company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
191
-
192
- start_date, end_date = node.at(".experience-date-locale").text.strip.split(" – ") rescue nil
193
- company[:duration] = node.at(".experience-date-locale").text[/.*\((.*)\)/, 1]
194
- company[:start_date] = parse_date(start_date) rescue nil
195
- company[:end_date] = parse_date(end_date) rescue nil
196
-
197
- company_link = node.at("h4").next.at("a")["href"] if node.at("h4").next.at("a")
194
+ @page.search(".positions .position").each do |node|
195
+ company = {}
196
+ company[:title] = node.at(".item-title").text.gsub(/\s+|\n/, " ").strip if node.at(".item-title")
197
+ company[:company] = node.at(".item-subtitle").text.gsub(/\s+|\n/, " ").strip if node.at(".item-subtitle")
198
+ company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
199
+
200
+ start_date, end_date = node.at(".meta").text.strip.split(" ") rescue nil
201
+ company[:duration] = node.at(".meta").text[/.*\((.*)\)/, 1]
202
+ company[:start_date] = parse_date(start_date) rescue nil
203
+ if end_date.match(/Present/)
204
+ company[:end_date] = "Present"
205
+ else
206
+ company[:start_date] = parse_date(end_date) rescue nil
207
+ end
198
208
 
209
+ company_link = node.at(".item-subtitle").at("a")["href"] rescue nil
210
+ if company_link
199
211
  result = get_company_details(company_link)
200
- companies << company.merge!(result)
212
+ @companies << company.merge!(result)
213
+ else
214
+ @companies << company
201
215
  end
202
216
  end
203
- companies
217
+
218
+ @companies
204
219
  end
205
220
 
206
221
  def parse_date(date)
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = '0.1.7'
3
+ VERSION = '1.0.0'
4
4
  end
5
5
  end
@@ -44,19 +44,19 @@ describe Linkedin::Profile do
44
44
  end
45
45
 
46
46
  describe '#industry' do
47
- it "returns list of profile's industries" do
47
+ xit "returns list of profile's industries" do
48
48
  expect(profile.industry).to eq "Internet"
49
49
  end
50
50
  end
51
51
 
52
52
  describe '#skills' do
53
- it "returns list of profile's skills" do
53
+ xit "returns list of profile's skills" do
54
54
  expect(profile.skills).to include("Product Development")
55
55
  end
56
56
  end
57
57
 
58
58
  describe '#websites' do
59
- it "returns list of profile's websites" do
59
+ xit "returns list of profile's websites" do
60
60
  expect(profile.websites).to include("http://www.linkedin.com/")
61
61
  end
62
62
  end
@@ -80,19 +80,25 @@ describe Linkedin::Profile do
80
80
  end
81
81
 
82
82
  describe '#summary' do
83
- it 'returns the summary of the profile' do
83
+ xit 'returns the summary of the profile' do
84
84
  expect(profile.summary).to eq \
85
- "Internet executive with over 19 years of experience, " \
86
- "including general management of mid to large size organizations, corporate development, " \
87
- "product development, business operations, and strategy. " \
88
- "Currently CEO at LinkedIn, the web's largest and most powerful network of professionals. " \
89
- "Prior to LinkedIn, was an Executive in Residence at Accel Partners and Greylock Partners. " \
90
- "Primarily focused on advising the leadership teams of the firm's existing consumer technology portfolio companies " \
91
- "while also working closely with the firm’s partners to evaluate new investment opportunities. " \
92
- "Previously served in key leadership roles at Yahoo! for over seven years, " \
93
- "most recently as the Executive Vice President of Yahoo!'s Network Division managing Yahoo's consumer web product portfolio, " \
94
- "including Yahoo's Front Page, Mail, Search, and Media products. Specialties: general management, corporate development, " \
95
- "product development, business operations, strategy, product marketing, non-profit governance"
85
+ "Internet executive with over 19 years of experience, " \
86
+ "including general management of mid to large size organizations, corporate development, " \
87
+ "product development, business operations, and strategy. " \
88
+ "Currently CEO at LinkedIn, the web's largest and most powerful network of professionals. " \
89
+ "Prior to LinkedIn, was an Executive in Residence at Accel Partners and Greylock Partners. " \
90
+ "Primarily focused on advising the leadership teams of the firm's existing consumer technology portfolio companies " \
91
+ "while also working closely with the firm’s partners to evaluate new investment opportunities. " \
92
+ "Previously served in key leadership roles at Yahoo! for over seven years, " \
93
+ "most recently as the Executive Vice President of Yahoo!'s Network Division managing Yahoo's consumer web product portfolio, " \
94
+ "including Yahoo's Front Page, Mail, Search, and Media products. Specialties: general management, corporate development, " \
95
+ "product development, business operations, strategy, product marketing, non-profit governance"
96
+ end
97
+ end
98
+
99
+ describe '#number_of_connections' do
100
+ it 'returns the number of connections' do
101
+ expect(profile.number_of_connections).to eq '500+'
96
102
  end
97
103
  end
98
104
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-14 00:00:00.000000000 Z
11
+ date: 2015-12-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize