linkedin-scraper 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/linkedin-scraper/profile.rb +59 -44
- data/lib/linkedin-scraper/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbde57b3c40b5f330ed4ab346f42cad639de8d3e
|
4
|
+
data.tar.gz: 464882b2139ff63b164568c104ea47c76ff8b10f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fd72bef448e5f91167de5902d91f99874e3153e3e3d0750a0708c6bf4b2fd26995ed3f69c406b5161b3391542d2b0fe71515b70c27bad5dd6edec9933213b92c
|
7
|
+
data.tar.gz: 09292e4bf18775fb423fd50666931c7bab4ca6033cca8147b308c0be7ac97c62351c0d52f60ef337efcbe088d255359e42100db01f47f670240bfff86eea971f
|
@@ -47,13 +47,13 @@ module Linkedin
|
|
47
47
|
@agent.max_history = 0
|
48
48
|
page = @agent.get(url)
|
49
49
|
return Linkedin::Profile.new(page, url)
|
50
|
-
rescue=>e
|
50
|
+
rescue => e
|
51
51
|
puts e
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
55
55
|
def get_skills(page)
|
56
|
-
page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
|
56
|
+
page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
|
57
57
|
end
|
58
58
|
|
59
59
|
def get_company_url(node)
|
@@ -188,25 +188,28 @@ module Linkedin
|
|
188
188
|
organizations = []
|
189
189
|
# if the profile contains org data
|
190
190
|
if page.search('ul.organizations li.organization').first
|
191
|
-
|
192
191
|
# loop over each element with org data
|
193
192
|
page.search('ul.organizations li.organization').each do |item|
|
194
|
-
# find the h3 element within the above section and get the text with excess white space stripped
|
195
|
-
name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
|
196
|
-
position = nil # add this later
|
197
|
-
occupation = nil # add this latetr too, this relates to the experience/work
|
198
|
-
start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
|
199
|
-
if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
|
200
|
-
end_date = nil
|
201
|
-
else
|
202
|
-
Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
|
203
|
-
end
|
204
193
|
|
205
|
-
|
206
|
-
|
194
|
+
begin
|
195
|
+
# find the h3 element within the above section and get the text with excess white space stripped
|
196
|
+
name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
|
197
|
+
position = nil # add this later
|
198
|
+
occupation = nil # add this latetr too, this relates to the experience/work
|
199
|
+
start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
|
200
|
+
if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
|
201
|
+
end_date = nil
|
202
|
+
else
|
203
|
+
Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
|
204
|
+
end
|
205
|
+
|
206
|
+
organizations << { name: name, start_date: start_date, end_date: end_date }
|
207
|
+
rescue => e
|
207
208
|
|
209
|
+
end
|
210
|
+
end
|
208
211
|
return organizations
|
209
|
-
end
|
212
|
+
end
|
210
213
|
end
|
211
214
|
|
212
215
|
def get_languages(page)
|
@@ -216,10 +219,13 @@ module Linkedin
|
|
216
219
|
|
217
220
|
# loop over each element with org data
|
218
221
|
page.search('ul.languages li.language').each do |item|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
222
|
+
begin
|
223
|
+
# find the h3 element within the above section and get the text with excess white space stripped
|
224
|
+
language = item.at('h3').text
|
225
|
+
proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
|
226
|
+
languages << { language:language, proficiency:proficiency }
|
227
|
+
rescue => e
|
228
|
+
end
|
223
229
|
end
|
224
230
|
|
225
231
|
return languages
|
@@ -228,6 +234,7 @@ module Linkedin
|
|
228
234
|
|
229
235
|
def get_certifications(page)
|
230
236
|
certifications = []
|
237
|
+
|
231
238
|
# search string to use with Nokogiri
|
232
239
|
query = 'ul.certifications li.certification'
|
233
240
|
months = 'January|February|March|April|May|June|July|August|September|November|December'
|
@@ -238,19 +245,23 @@ module Linkedin
|
|
238
245
|
|
239
246
|
# loop over each element with cert data
|
240
247
|
page.search(query).each do |item|
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
248
|
+
begin
|
249
|
+
item_text = item.text.gsub(/\s+|\n/, " ").strip
|
250
|
+
name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
|
251
|
+
authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
|
252
|
+
license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
|
253
|
+
start_date = Date.parse(item_text.scan(regex)[0].join(' '))
|
254
|
+
|
255
|
+
includes_end_date = item_text.scan(regex).count > 1
|
256
|
+
end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
|
257
|
+
|
258
|
+
certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
|
259
|
+
rescue => e
|
260
|
+
end
|
251
261
|
end
|
252
262
|
return certifications
|
253
263
|
end
|
264
|
+
|
254
265
|
end
|
255
266
|
|
256
267
|
|
@@ -261,26 +272,29 @@ module Linkedin
|
|
261
272
|
|
262
273
|
# loop over each element with org data
|
263
274
|
page.search('ul.organizations li.organization').each do |item|
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
275
|
+
begin
|
276
|
+
# find the h3 element within the above section and get the text with excess white space stripped
|
277
|
+
name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
|
278
|
+
position = nil # add this later
|
279
|
+
occupation = nil # add this latetr too, this relates to the experience/work
|
280
|
+
start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
|
281
|
+
if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
|
282
|
+
end_date = nil
|
283
|
+
else
|
284
|
+
Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
|
285
|
+
end
|
286
|
+
|
287
|
+
organizations << { name: name, start_date: start_date, end_date: end_date }
|
288
|
+
rescue => e
|
273
289
|
end
|
274
|
-
|
275
|
-
organizations << { name: name, start_date: start_date, end_date: end_date }
|
276
290
|
end
|
277
|
-
|
278
|
-
|
279
|
-
end # page.search('ul.organizations li.organization').first
|
291
|
+
end
|
292
|
+
return organizations
|
280
293
|
end
|
281
294
|
|
282
295
|
|
283
296
|
|
297
|
+
|
284
298
|
def get_recommended_visitors(page)
|
285
299
|
recommended_vs=[]
|
286
300
|
if page.search(".browsemap").first
|
@@ -295,5 +309,6 @@ module Linkedin
|
|
295
309
|
return recommended_vs
|
296
310
|
end
|
297
311
|
end
|
312
|
+
|
298
313
|
end
|
299
314
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: linkedin-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yatish Mehta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-06-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
61
61
|
version: '0'
|
62
62
|
requirements: []
|
63
63
|
rubyforge_project:
|
64
|
-
rubygems_version: 2.0.
|
64
|
+
rubygems_version: 2.0.3
|
65
65
|
signing_key:
|
66
66
|
specification_version: 4
|
67
67
|
summary: when a url of public linkedin profile page is given it scrapes the entire
|