linkedin-scraper 0.0.9 → 0.0.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/linkedin-scraper/profile.rb +59 -44
- data/lib/linkedin-scraper/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbde57b3c40b5f330ed4ab346f42cad639de8d3e
|
4
|
+
data.tar.gz: 464882b2139ff63b164568c104ea47c76ff8b10f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd72bef448e5f91167de5902d91f99874e3153e3e3d0750a0708c6bf4b2fd26995ed3f69c406b5161b3391542d2b0fe71515b70c27bad5dd6edec9933213b92c
|
7
|
+
data.tar.gz: 09292e4bf18775fb423fd50666931c7bab4ca6033cca8147b308c0be7ac97c62351c0d52f60ef337efcbe088d255359e42100db01f47f670240bfff86eea971f
|
@@ -47,13 +47,13 @@ module Linkedin
|
|
47
47
|
@agent.max_history = 0
|
48
48
|
page = @agent.get(url)
|
49
49
|
return Linkedin::Profile.new(page, url)
|
50
|
-
rescue=>e
|
50
|
+
rescue => e
|
51
51
|
puts e
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
55
55
|
def get_skills(page)
|
56
|
-
page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
|
56
|
+
page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
|
57
57
|
end
|
58
58
|
|
59
59
|
def get_company_url(node)
|
@@ -188,25 +188,28 @@ module Linkedin
|
|
188
188
|
organizations = []
|
189
189
|
# if the profile contains org data
|
190
190
|
if page.search('ul.organizations li.organization').first
|
191
|
-
|
192
191
|
# loop over each element with org data
|
193
192
|
page.search('ul.organizations li.organization').each do |item|
|
194
|
-
# find the h3 element within the above section and get the text with excess white space stripped
|
195
|
-
name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
|
196
|
-
position = nil # add this later
|
197
|
-
occupation = nil # add this latetr too, this relates to the experience/work
|
198
|
-
start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
|
199
|
-
if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
|
200
|
-
end_date = nil
|
201
|
-
else
|
202
|
-
Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
|
203
|
-
end
|
204
193
|
|
205
|
-
|
206
|
-
|
194
|
+
begin
|
195
|
+
# find the h3 element within the above section and get the text with excess white space stripped
|
196
|
+
name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
|
197
|
+
position = nil # add this later
|
198
|
+
occupation = nil # add this latetr too, this relates to the experience/work
|
199
|
+
start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
|
200
|
+
if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
|
201
|
+
end_date = nil
|
202
|
+
else
|
203
|
+
Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
|
204
|
+
end
|
205
|
+
|
206
|
+
organizations << { name: name, start_date: start_date, end_date: end_date }
|
207
|
+
rescue => e
|
207
208
|
|
209
|
+
end
|
210
|
+
end
|
208
211
|
return organizations
|
209
|
-
end
|
212
|
+
end
|
210
213
|
end
|
211
214
|
|
212
215
|
def get_languages(page)
|
@@ -216,10 +219,13 @@ module Linkedin
|
|
216
219
|
|
217
220
|
# loop over each element with org data
|
218
221
|
page.search('ul.languages li.language').each do |item|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
222
|
+
begin
|
223
|
+
# find the h3 element within the above section and get the text with excess white space stripped
|
224
|
+
language = item.at('h3').text
|
225
|
+
proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
|
226
|
+
languages << { language:language, proficiency:proficiency }
|
227
|
+
rescue => e
|
228
|
+
end
|
223
229
|
end
|
224
230
|
|
225
231
|
return languages
|
@@ -228,6 +234,7 @@ module Linkedin
|
|
228
234
|
|
229
235
|
def get_certifications(page)
|
230
236
|
certifications = []
|
237
|
+
|
231
238
|
# search string to use with Nokogiri
|
232
239
|
query = 'ul.certifications li.certification'
|
233
240
|
months = 'January|February|March|April|May|June|July|August|September|November|December'
|
@@ -238,19 +245,23 @@ module Linkedin
|
|
238
245
|
|
239
246
|
# loop over each element with cert data
|
240
247
|
page.search(query).each do |item|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
248
|
+
begin
|
249
|
+
item_text = item.text.gsub(/\s+|\n/, " ").strip
|
250
|
+
name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
|
251
|
+
authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
|
252
|
+
license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
|
253
|
+
start_date = Date.parse(item_text.scan(regex)[0].join(' '))
|
254
|
+
|
255
|
+
includes_end_date = item_text.scan(regex).count > 1
|
256
|
+
end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
|
257
|
+
|
258
|
+
certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
|
259
|
+
rescue => e
|
260
|
+
end
|
251
261
|
end
|
252
262
|
return certifications
|
253
263
|
end
|
264
|
+
|
254
265
|
end
|
255
266
|
|
256
267
|
|
@@ -261,26 +272,29 @@ module Linkedin
|
|
261
272
|
|
262
273
|
# loop over each element with org data
|
263
274
|
page.search('ul.organizations li.organization').each do |item|
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
275
|
+
begin
|
276
|
+
# find the h3 element within the above section and get the text with excess white space stripped
|
277
|
+
name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
|
278
|
+
position = nil # add this later
|
279
|
+
occupation = nil # add this latetr too, this relates to the experience/work
|
280
|
+
start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
|
281
|
+
if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
|
282
|
+
end_date = nil
|
283
|
+
else
|
284
|
+
Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
|
285
|
+
end
|
286
|
+
|
287
|
+
organizations << { name: name, start_date: start_date, end_date: end_date }
|
288
|
+
rescue => e
|
273
289
|
end
|
274
|
-
|
275
|
-
organizations << { name: name, start_date: start_date, end_date: end_date }
|
276
290
|
end
|
277
|
-
|
278
|
-
|
279
|
-
end # page.search('ul.organizations li.organization').first
|
291
|
+
end
|
292
|
+
return organizations
|
280
293
|
end
|
281
294
|
|
282
295
|
|
283
296
|
|
297
|
+
|
284
298
|
def get_recommended_visitors(page)
|
285
299
|
recommended_vs=[]
|
286
300
|
if page.search(".browsemap").first
|
@@ -295,5 +309,6 @@ module Linkedin
|
|
295
309
|
return recommended_vs
|
296
310
|
end
|
297
311
|
end
|
312
|
+
|
298
313
|
end
|
299
314
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedin-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yatish Mehta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
61
61
|
version: '0'
|
62
62
|
requirements: []
|
63
63
|
rubyforge_project:
|
64
|
-
rubygems_version: 2.0.
|
64
|
+
rubygems_version: 2.0.3
|
65
65
|
signing_key:
|
66
66
|
specification_version: 4
|
67
67
|
summary: when a url of public linkedin profile page is given it scrapes the entire
|