linkedin-scraper 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 866d6d1d021d57faea4d513f88b12de0766d397c
4
- data.tar.gz: 0fb3a6819f14e19a8a2f41d6059f2cbf9c693a8d
3
+ metadata.gz: dbde57b3c40b5f330ed4ab346f42cad639de8d3e
4
+ data.tar.gz: 464882b2139ff63b164568c104ea47c76ff8b10f
5
5
  SHA512:
6
- metadata.gz: 90f39ed05ce81c3abb2c3c157ea5e4d32fbbb3d2c6716c4c83ea1f8b7462eed24f8e569594ae0a7594c52fdbbcb469e4b37d3aee47e14d71a574e26f90ebc4a9
7
- data.tar.gz: a1f6c4f0bd5f6d9e759c7b1f20d946d5d73b926211beee9e93caea05347480211a20840130097971ace64f1a98117fd4b3f56285aef70e589c8bf906098ef76c
6
+ metadata.gz: fd72bef448e5f91167de5902d91f99874e3153e3e3d0750a0708c6bf4b2fd26995ed3f69c406b5161b3391542d2b0fe71515b70c27bad5dd6edec9933213b92c
7
+ data.tar.gz: 09292e4bf18775fb423fd50666931c7bab4ca6033cca8147b308c0be7ac97c62351c0d52f60ef337efcbe088d255359e42100db01f47f670240bfff86eea971f
@@ -47,13 +47,13 @@ module Linkedin
47
47
  @agent.max_history = 0
48
48
  page = @agent.get(url)
49
49
  return Linkedin::Profile.new(page, url)
50
- rescue=>e
50
+ rescue => e
51
51
  puts e
52
52
  end
53
53
  end
54
54
 
55
55
  def get_skills(page)
56
- page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
56
+ page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
57
57
  end
58
58
 
59
59
  def get_company_url(node)
@@ -188,25 +188,28 @@ module Linkedin
188
188
  organizations = []
189
189
  # if the profile contains org data
190
190
  if page.search('ul.organizations li.organization').first
191
-
192
191
  # loop over each element with org data
193
192
  page.search('ul.organizations li.organization').each do |item|
194
- # find the h3 element within the above section and get the text with excess white space stripped
195
- name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
196
- position = nil # add this later
197
- occupation = nil # add this latetr too, this relates to the experience/work
198
- start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
199
- if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
200
- end_date = nil
201
- else
202
- Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
203
- end
204
193
 
205
- organizations << { name: name, start_date: start_date, end_date: end_date }
206
- end
194
+ begin
195
+ # find the h3 element within the above section and get the text with excess white space stripped
196
+ name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
197
+ position = nil # add this later
198
+ occupation = nil # add this latetr too, this relates to the experience/work
199
+ start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
200
+ if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
201
+ end_date = nil
202
+ else
203
+ Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
204
+ end
205
+
206
+ organizations << { name: name, start_date: start_date, end_date: end_date }
207
+ rescue => e
207
208
 
209
+ end
210
+ end
208
211
  return organizations
209
- end # page.search('ul.organizations li.organization').first
212
+ end
210
213
  end
211
214
 
212
215
  def get_languages(page)
@@ -216,10 +219,13 @@ module Linkedin
216
219
 
217
220
  # loop over each element with org data
218
221
  page.search('ul.languages li.language').each do |item|
219
- # find the h3 element within the above section and get the text with excess white space stripped
220
- language = item.at('h3').text
221
- proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
222
- languages << { language:language, proficiency:proficiency }
222
+ begin
223
+ # find the h3 element within the above section and get the text with excess white space stripped
224
+ language = item.at('h3').text
225
+ proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
226
+ languages << { language:language, proficiency:proficiency }
227
+ rescue => e
228
+ end
223
229
  end
224
230
 
225
231
  return languages
@@ -228,6 +234,7 @@ module Linkedin
228
234
 
229
235
  def get_certifications(page)
230
236
  certifications = []
237
+
231
238
  # search string to use with Nokogiri
232
239
  query = 'ul.certifications li.certification'
233
240
  months = 'January|February|March|April|May|June|July|August|September|November|December'
@@ -238,19 +245,23 @@ module Linkedin
238
245
 
239
246
  # loop over each element with cert data
240
247
  page.search(query).each do |item|
241
- item_text = item.text.gsub(/\s+|\n/, " ").strip
242
- name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
243
- authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
244
- license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
245
- start_date = Date.parse(item_text.scan(regex)[0].join(' '))
246
-
247
- includes_end_date = item_text.scan(regex).count > 1
248
- end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
249
-
250
- certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
248
+ begin
249
+ item_text = item.text.gsub(/\s+|\n/, " ").strip
250
+ name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
251
+ authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
252
+ license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
253
+ start_date = Date.parse(item_text.scan(regex)[0].join(' '))
254
+
255
+ includes_end_date = item_text.scan(regex).count > 1
256
+ end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
257
+
258
+ certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
259
+ rescue => e
260
+ end
251
261
  end
252
262
  return certifications
253
263
  end
264
+
254
265
  end
255
266
 
256
267
 
@@ -261,26 +272,29 @@ module Linkedin
261
272
 
262
273
  # loop over each element with org data
263
274
  page.search('ul.organizations li.organization').each do |item|
264
- # find the h3 element within the above section and get the text with excess white space stripped
265
- name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
266
- position = nil # add this later
267
- occupation = nil # add this latetr too, this relates to the experience/work
268
- start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
269
- if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
270
- end_date = nil
271
- else
272
- Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
275
+ begin
276
+ # find the h3 element within the above section and get the text with excess white space stripped
277
+ name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
278
+ position = nil # add this later
279
+ occupation = nil # add this latetr too, this relates to the experience/work
280
+ start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
281
+ if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
282
+ end_date = nil
283
+ else
284
+ Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
285
+ end
286
+
287
+ organizations << { name: name, start_date: start_date, end_date: end_date }
288
+ rescue => e
273
289
  end
274
-
275
- organizations << { name: name, start_date: start_date, end_date: end_date }
276
290
  end
277
-
278
- return organizations
279
- end # page.search('ul.organizations li.organization').first
291
+ end
292
+ return organizations
280
293
  end
281
294
 
282
295
 
283
296
 
297
+
284
298
  def get_recommended_visitors(page)
285
299
  recommended_vs=[]
286
300
  if page.search(".browsemap").first
@@ -295,5 +309,6 @@ module Linkedin
295
309
  return recommended_vs
296
310
  end
297
311
  end
312
+
298
313
  end
299
314
  end
@@ -1,5 +1,5 @@
1
1
  module Linkedin
2
2
  module Scraper
3
- VERSION = "0.0.9"
3
+ VERSION = "0.0.10"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: linkedin-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yatish Mehta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-05-28 00:00:00.000000000 Z
11
+ date: 2013-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
61
61
  version: '0'
62
62
  requirements: []
63
63
  rubyforge_project:
64
- rubygems_version: 2.0.0
64
+ rubygems_version: 2.0.3
65
65
  signing_key:
66
66
  specification_version: 4
67
67
  summary: when a url of public linkedin profile page is given it scrapes the entire