linkedin-scraper 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 866d6d1d021d57faea4d513f88b12de0766d397c
4
- data.tar.gz: 0fb3a6819f14e19a8a2f41d6059f2cbf9c693a8d
3
+ metadata.gz: dbde57b3c40b5f330ed4ab346f42cad639de8d3e
4
+ data.tar.gz: 464882b2139ff63b164568c104ea47c76ff8b10f
5
5
  SHA512:
6
- metadata.gz: 90f39ed05ce81c3abb2c3c157ea5e4d32fbbb3d2c6716c4c83ea1f8b7462eed24f8e569594ae0a7594c52fdbbcb469e4b37d3aee47e14d71a574e26f90ebc4a9
7
- data.tar.gz: a1f6c4f0bd5f6d9e759c7b1f20d946d5d73b926211beee9e93caea05347480211a20840130097971ace64f1a98117fd4b3f56285aef70e589c8bf906098ef76c
6
+ metadata.gz: fd72bef448e5f91167de5902d91f99874e3153e3e3d0750a0708c6bf4b2fd26995ed3f69c406b5161b3391542d2b0fe71515b70c27bad5dd6edec9933213b92c
7
+ data.tar.gz: 09292e4bf18775fb423fd50666931c7bab4ca6033cca8147b308c0be7ac97c62351c0d52f60ef337efcbe088d255359e42100db01f47f670240bfff86eea971f
@@ -47,13 +47,13 @@ module Linkedin
47
47
  @agent.max_history = 0
48
48
  page = @agent.get(url)
49
49
  return Linkedin::Profile.new(page, url)
50
- rescue=>e
50
+ rescue => e
51
51
  puts e
52
52
  end
53
53
  end
54
54
 
55
55
  def get_skills(page)
56
- page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
56
+ page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
57
57
  end
58
58
 
59
59
  def get_company_url(node)
@@ -188,25 +188,28 @@ module Linkedin
188
188
  organizations = []
189
189
  # if the profile contains org data
190
190
  if page.search('ul.organizations li.organization').first
191
-
192
191
  # loop over each element with org data
193
192
  page.search('ul.organizations li.organization').each do |item|
194
- # find the h3 element within the above section and get the text with excess white space stripped
195
- name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
196
- position = nil # add this later
197
- occupation = nil # add this latetr too, this relates to the experience/work
198
- start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
199
- if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
200
- end_date = nil
201
- else
202
- Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
203
- end
204
193
 
205
- organizations << { name: name, start_date: start_date, end_date: end_date }
206
- end
194
+ begin
195
+ # find the h3 element within the above section and get the text with excess white space stripped
196
+ name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
197
+ position = nil # add this later
198
+ occupation = nil # add this latetr too, this relates to the experience/work
199
+ start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
200
+ if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
201
+ end_date = nil
202
+ else
203
+ Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
204
+ end
205
+
206
+ organizations << { name: name, start_date: start_date, end_date: end_date }
207
+ rescue => e
207
208
 
209
+ end
210
+ end
208
211
  return organizations
209
- end # page.search('ul.organizations li.organization').first
212
+ end
210
213
  end
211
214
 
212
215
  def get_languages(page)
@@ -216,10 +219,13 @@ module Linkedin
216
219
 
217
220
  # loop over each element with org data
218
221
  page.search('ul.languages li.language').each do |item|
219
- # find the h3 element within the above section and get the text with excess white space stripped
220
- language = item.at('h3').text
221
- proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
222
- languages << { language:language, proficiency:proficiency }
222
+ begin
223
+ # find the h3 element within the above section and get the text with excess white space stripped
224
+ language = item.at('h3').text
225
+ proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
226
+ languages << { language:language, proficiency:proficiency }
227
+ rescue => e
228
+ end
223
229
  end
224
230
 
225
231
  return languages
@@ -228,6 +234,7 @@ module Linkedin
228
234
 
229
235
  def get_certifications(page)
230
236
  certifications = []
237
+
231
238
  # search string to use with Nokogiri
232
239
  query = 'ul.certifications li.certification'
233
240
  months = 'January|February|March|April|May|June|July|August|September|November|December'
@@ -238,19 +245,23 @@ module Linkedin
238
245
 
239
246
  # loop over each element with cert data
240
247
  page.search(query).each do |item|
241
- item_text = item.text.gsub(/\s+|\n/, " ").strip
242
- name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
243
- authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
244
- license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
245
- start_date = Date.parse(item_text.scan(regex)[0].join(' '))
246
-
247
- includes_end_date = item_text.scan(regex).count > 1
248
- end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
249
-
250
- certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
248
+ begin
249
+ item_text = item.text.gsub(/\s+|\n/, " ").strip
250
+ name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
251
+ authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
252
+ license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
253
+ start_date = Date.parse(item_text.scan(regex)[0].join(' '))
254
+
255
+ includes_end_date = item_text.scan(regex).count > 1
256
+ end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
257
+
258
+ certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
259
+ rescue => e
260
+ end
251
261
  end
252
262
  return certifications
253
263
  end
264
+
254
265
  end
255
266
 
256
267
 
@@ -261,26 +272,29 @@ module Linkedin
261
272
 
262
273
  # loop over each element with org data
263
274
  page.search('ul.organizations li.organization').each do |item|
264
- # find the h3 element within the above section and get the text with excess white space stripped
265
- name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
266
- position = nil # add this later
267
- occupation = nil # add this latetr too, this relates to the experience/work
268
- start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
269
- if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
270
- end_date = nil
271
- else
272
- Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
275
+ begin
276
+ # find the h3 element within the above section and get the text with excess white space stripped
277
+ name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
278
+ position = nil # add this later
279
+ occupation = nil # add this latetr too, this relates to the experience/work
280
+ start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
281
+ if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
282
+ end_date = nil
283
+ else
284
+ Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
285
+ end
286
+
287
+ organizations << { name: name, start_date: start_date, end_date: end_date }
288
+ rescue => e
273
289
  end
274
-
275
- organizations << { name: name, start_date: start_date, end_date: end_date }
276
290
  end
277
-
278
- return organizations
279
- end # page.search('ul.organizations li.organization').first
291
+ end
292
+ return organizations
280
293
  end
281
294
 
282
295
 
283
296
 
297
+
284
298
  def get_recommended_visitors(page)
285
299
  recommended_vs=[]
286
300
  if page.search(".browsemap").first
@@ -295,5 +309,6 @@ module Linkedin
295
309
  return recommended_vs
296
310
  end
297
311
  end
312
+
298
313
  end
299
314
  end
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = "0.0.9"
3
+ VERSION = "0.0.10"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-28 00:00:00.000000000 Z
11
+ date: 2013-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  version: '0'
62
62
  requirements: []
63
63
  rubyforge_project:
64
- rubygems_version: 2.0.0
64
+ rubygems_version: 2.0.3
65
65
  signing_key:
66
66
  specification_version: 4
67
67
  summary: when a url of public linkedin profile page is given it scrapes the entire