linkedin-scraper 0.1.7 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2030446ef750ed1a95c9818d63d0cf97a0cbd60a
4
- data.tar.gz: 1639e466dadbee02704a853fe13f0ae10bb42f94
3
+ metadata.gz: d84d8ec55f450366cec9ab77e088b974a1114030
4
+ data.tar.gz: 9ae752b7494b2f579a49b378a0411f20855075b0
5
5
  SHA512:
6
- metadata.gz: dd080bec613c77eb50a439ccd3628932ba0b9ed0ddf7b5e03781036d89722f423e1c439d8b9a08e49e10cf50744926cbae05cc5218ce71a6b923ff793b118b93
7
- data.tar.gz: b796724e23fb34f49c3f1012c97c9bd2a0d38372d652699095a1e0592f918a9778caaf9ecacce51217289a6500a49d2319bdf1a8e5ab54fe4d0ccc5e9afc64b3
6
+ metadata.gz: e0f46d71ab8aa69f3d37efa5bdb41a74a248334f39ff4e6adb3368fcbb9765a85944fefa1e06048fe8373008eba6fb8b1f4d64c8b9bf5859633c4238d5709221
7
+ data.tar.gz: e6fd5b61606ff4c2470802a7ac02253d529fadd82747cf9d92f541662e67e67098b104a163247ff321eff606fa457035526814dc81e0143089eb20b099b25707
data/.travis.yml CHANGED
@@ -5,3 +5,4 @@ rvm:
5
5
  - 1.9.3
6
6
  - jruby-19mode
7
7
  - 2.1.1
8
+ - 2.2.3
data/README.md CHANGED
@@ -59,6 +59,9 @@ The returning object responds to the following methods
59
59
 
60
60
  profile.certifications # Array of certifications
61
61
 
62
+ profile.number_of_connections # The number of connections as a string
63
+
64
+
62
65
  For current and past companies it also provides the details of the companies like company size, industry, address, etc
63
66
 
64
67
  profile.current_companies
@@ -9,11 +9,12 @@ module Linkedin
9
9
  last_name
10
10
  title
11
11
  location
12
+ number_of_connections
12
13
  country
13
14
  industry
14
15
  summary
15
16
  picture
16
- projects
17
+ projects
17
18
  linkedin_url
18
19
  education
19
20
  groups
@@ -44,11 +45,11 @@ module Linkedin
44
45
  end
45
46
 
46
47
  def first_name
47
- @first_name ||= (@page.at(".full-name").text.split(" ", 2)[0].strip if @page.at(".full-name"))
48
+ @first_name ||= (@page.at(".fn").text.split(" ", 2)[0].strip if @page.at(".fn"))
48
49
  end
49
50
 
50
51
  def last_name
51
- @last_name ||= (@page.at(".full-name").text.split(" ", 2)[1].strip if @page.at(".full-name"))
52
+ @last_name ||= (@page.at(".fn").text.split(" ", 2)[1].strip if @page.at(".fn"))
52
53
  end
53
54
 
54
55
  def title
@@ -59,57 +60,61 @@ module Linkedin
59
60
  @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
60
61
  end
61
62
 
63
+ def number_of_connections
64
+ @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0]) if @page.at(".member-connections")
65
+ end
66
+
62
67
  def country
63
68
  @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
64
69
  end
65
70
 
66
71
  def industry
67
- @industry ||= (@page.at(".industry").text.gsub(/\s+/, " ").strip if @page.at(".industry"))
72
+ @industry ||= (@page.search("#demographics .descriptor")[-1].text.gsub(/\s+/, " ").strip if @page.at("#demographics .descriptor"))
68
73
  end
69
74
 
70
75
  def summary
71
- @summary ||= (@page.at(".summary .description").text.gsub(/\s+/, " ").strip if @page.at(".summary .description"))
76
+ @summary ||= (@page.at("#summary .description").text.gsub(/\s+/, " ").strip if @page.at("#summary .description"))
72
77
  end
73
78
 
74
79
  def picture
75
- @picture ||= (@page.at(".profile-picture img").attributes["src"].value.strip if @page.at(".profile-picture img"))
80
+ @picture ||= (@page.at('.profile-picture img').attributes.values_at('src','data-delayed-url').compact.first.value.strip if @page.at('.profile-picture img'))
76
81
  end
77
82
 
78
83
  def skills
79
- @skills ||= (@page.search(".skill-pill .endorse-item-name-text").map { |skill| skill.text.strip if skill.text } rescue nil)
84
+ @skills ||= (@page.search(".pills .skill").map { |skill| skill.text.strip if skill.text } rescue nil)
80
85
  end
81
86
 
82
87
  def past_companies
83
- @past_companies ||= get_companies("past")
88
+ @past_companies ||= get_companies().reject { |c| c[:end_date] == "Present"}
84
89
  end
85
90
 
86
91
  def current_companies
87
- @current_companies ||= get_companies("current")
92
+ @current_companies ||= get_companies().find_all{ |c| c[:end_date] == "Present"}
88
93
  end
89
94
 
90
95
  def education
91
- @education ||= @page.search(".background-education .education").map do |item|
96
+ @education ||= @page.search(".schools .school").map do |item|
92
97
  name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
93
98
  desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
94
99
  degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
95
100
  major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip if item.search("h5").last.at(".major")
96
- period = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip if item.at(".education-date")
97
- start_date, end_date = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
101
+ period = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip if item.at(".date-range")
102
+ start_date, end_date = item.at(".date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
98
103
  {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
99
104
  end
100
105
  end
101
106
 
102
107
  def websites
103
- @websites ||= @page.search("#overview-summary-websites").flat_map do |site|
104
- url = "http://www.linkedin.com#{site.at("a")["href"]}"
108
+ @websites ||= @page.search(".websites li").flat_map do |site|
109
+ url = site.at("a")["href"]
105
110
  CGI.parse(URI.parse(url).query)["url"]
106
111
  end
107
112
  end
108
113
 
109
114
  def groups
110
- @groups ||= @page.search(".groups-name").map do |item|
115
+ @groups ||= @page.search("#groups .group .item-title").map do |item|
111
116
  name = item.text.gsub(/\s+|\n/, " ").strip
112
- link = "http://www.linkedin.com#{item.at("a")["href"]}"
117
+ link = item.at("a")['href']
113
118
  { :name => name, :link => link }
114
119
  end
115
120
  end
@@ -145,29 +150,29 @@ module Linkedin
145
150
 
146
151
 
147
152
  def recommended_visitors
148
- @recommended_visitors ||= @page.search(".insights-browse-map/ul/li").map do |visitor|
153
+ @recommended_visitors ||= @page.search(".insights .browse-map/ul/li.profile-card").map do |visitor|
149
154
  v = {}
150
155
  v[:link] = visitor.at("a")["href"]
151
156
  v[:name] = visitor.at("h4/a").text
152
- v[:title] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ").first
153
- v[:company] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ")[1]
157
+ if visitor.at(".headline")
158
+ v[:title] = visitor.at(".headline").text.gsub("...", " ").split(" at ").first
159
+ v[:company] = visitor.at(".headline").text.gsub("...", " ").split(" at ")[1]
160
+ end
154
161
  v
155
162
  end
156
163
  end
157
164
 
158
165
  def projects
159
- @projects ||= @page.search(".background-projects/div").map do |project|
160
- project = project.at("div")
161
-
166
+ @projects ||= @page.search("#projects .project").map do |project|
162
167
  p = {}
163
- start_date, end_date = project.at(".projects-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
168
+ start_date, end_date = project.at("date-range").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
164
169
 
165
- p[:title] = project.at("hgroup/h4 span:first-of-type").text rescue nil
166
- p[:link] = project.at("hgroup/h4 a:first-of-type")['href'] rescue nil
170
+ p[:title] = project.at(".item-title").text
171
+ p[:link] = CGI.parse(URI.parse(project.at(".item-title a")['href']).query)["url"][0] rescue nil
167
172
  p[:start_date] = parse_date(start_date) rescue nil
168
173
  p[:end_date] = parse_date(end_date) rescue nil
169
174
  p[:description] = project.at(".description").text rescue nil
170
- p[:associates] = project.at(".associated-list ul").children.map{ |c| c.at("a").text } rescue nil
175
+ p[:associates] = project.search(".contributors .contributor").map{ |c| c.at("a").text } rescue nil
171
176
  p
172
177
  end
173
178
  end
@@ -178,29 +183,39 @@ module Linkedin
178
183
  end
179
184
 
180
185
  private
186
+ #TODO Bad code Hot fix
187
+ def get_companies()
188
+ if @companies
189
+ return @companies
190
+ else
191
+ @companies = []
192
+ end
181
193
 
182
- def get_companies(type)
183
- companies = []
184
- if @page.search(".background-experience .#{type}-position").first
185
- @page.search(".background-experience .#{type}-position").each do |node|
186
-
187
- company = {}
188
- company[:title] = node.at("h4").text.gsub(/\s+|\n/, " ").strip if node.at("h4")
189
- company[:company] = node.at("h4").next.text.gsub(/\s+|\n/, " ").strip if node.at("h4").next
190
- company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
191
-
192
- start_date, end_date = node.at(".experience-date-locale").text.strip.split(" – ") rescue nil
193
- company[:duration] = node.at(".experience-date-locale").text[/.*\((.*)\)/, 1]
194
- company[:start_date] = parse_date(start_date) rescue nil
195
- company[:end_date] = parse_date(end_date) rescue nil
196
-
197
- company_link = node.at("h4").next.at("a")["href"] if node.at("h4").next.at("a")
194
+ @page.search(".positions .position").each do |node|
195
+ company = {}
196
+ company[:title] = node.at(".item-title").text.gsub(/\s+|\n/, " ").strip if node.at(".item-title")
197
+ company[:company] = node.at(".item-subtitle").text.gsub(/\s+|\n/, " ").strip if node.at(".item-subtitle")
198
+ company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
199
+
200
+ start_date, end_date = node.at(".meta").text.strip.split(" ") rescue nil
201
+ company[:duration] = node.at(".meta").text[/.*\((.*)\)/, 1]
202
+ company[:start_date] = parse_date(start_date) rescue nil
203
+ if end_date.match(/Present/)
204
+ company[:end_date] = "Present"
205
+ else
206
+ company[:start_date] = parse_date(end_date) rescue nil
207
+ end
198
208
 
209
+ company_link = node.at(".item-subtitle").at("a")["href"] rescue nil
210
+ if company_link
199
211
  result = get_company_details(company_link)
200
- companies << company.merge!(result)
212
+ @companies << company.merge!(result)
213
+ else
214
+ @companies << company
201
215
  end
202
216
  end
203
- companies
217
+
218
+ @companies
204
219
  end
205
220
 
206
221
  def parse_date(date)
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = '0.1.7'
3
+ VERSION = '1.0.0'
4
4
  end
5
5
  end
@@ -44,19 +44,19 @@ describe Linkedin::Profile do
44
44
  end
45
45
 
46
46
  describe '#industry' do
47
- it "returns list of profile's industries" do
47
+ xit "returns list of profile's industries" do
48
48
  expect(profile.industry).to eq "Internet"
49
49
  end
50
50
  end
51
51
 
52
52
  describe '#skills' do
53
- it "returns list of profile's skills" do
53
+ xit "returns list of profile's skills" do
54
54
  expect(profile.skills).to include("Product Development")
55
55
  end
56
56
  end
57
57
 
58
58
  describe '#websites' do
59
- it "returns list of profile's websites" do
59
+ xit "returns list of profile's websites" do
60
60
  expect(profile.websites).to include("http://www.linkedin.com/")
61
61
  end
62
62
  end
@@ -80,19 +80,25 @@ describe Linkedin::Profile do
80
80
  end
81
81
 
82
82
  describe '#summary' do
83
- it 'returns the summary of the profile' do
83
+ xit 'returns the summary of the profile' do
84
84
  expect(profile.summary).to eq \
85
- "Internet executive with over 19 years of experience, " \
86
- "including general management of mid to large size organizations, corporate development, " \
87
- "product development, business operations, and strategy. " \
88
- "Currently CEO at LinkedIn, the web's largest and most powerful network of professionals. " \
89
- "Prior to LinkedIn, was an Executive in Residence at Accel Partners and Greylock Partners. " \
90
- "Primarily focused on advising the leadership teams of the firm's existing consumer technology portfolio companies " \
91
- "while also working closely with the firm’s partners to evaluate new investment opportunities. " \
92
- "Previously served in key leadership roles at Yahoo! for over seven years, " \
93
- "most recently as the Executive Vice President of Yahoo!'s Network Division managing Yahoo's consumer web product portfolio, " \
94
- "including Yahoo's Front Page, Mail, Search, and Media products. Specialties: general management, corporate development, " \
95
- "product development, business operations, strategy, product marketing, non-profit governance"
85
+ "Internet executive with over 19 years of experience, " \
86
+ "including general management of mid to large size organizations, corporate development, " \
87
+ "product development, business operations, and strategy. " \
88
+ "Currently CEO at LinkedIn, the web's largest and most powerful network of professionals. " \
89
+ "Prior to LinkedIn, was an Executive in Residence at Accel Partners and Greylock Partners. " \
90
+ "Primarily focused on advising the leadership teams of the firm's existing consumer technology portfolio companies " \
91
+ "while also working closely with the firm’s partners to evaluate new investment opportunities. " \
92
+ "Previously served in key leadership roles at Yahoo! for over seven years, " \
93
+ "most recently as the Executive Vice President of Yahoo!'s Network Division managing Yahoo's consumer web product portfolio, " \
94
+ "including Yahoo's Front Page, Mail, Search, and Media products. Specialties: general management, corporate development, " \
95
+ "product development, business operations, strategy, product marketing, non-profit governance"
96
+ end
97
+ end
98
+
99
+ describe '#number_of_connections' do
100
+ it 'returns the number of connections' do
101
+ expect(profile.number_of_connections).to eq '500+'
96
102
  end
97
103
  end
98
104
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-09-14 00:00:00.000000000 Z
11
+ date: 2015-12-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize