RubyGems - linkedin-scraper - Versions diffs - 0.0.9 → 0.0.10 - Mend

linkedin-scraper 0.0.9 → 0.0.10

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/linkedin-scraper/profile.rb +59 -44
data/lib/linkedin-scraper/version.rb +1 -1
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 866d6d1d021d57faea4d513f88b12de0766d397c
-  data.tar.gz: 0fb3a6819f14e19a8a2f41d6059f2cbf9c693a8d
+  metadata.gz: dbde57b3c40b5f330ed4ab346f42cad639de8d3e
+  data.tar.gz: 464882b2139ff63b164568c104ea47c76ff8b10f
 SHA512:
-  metadata.gz: 90f39ed05ce81c3abb2c3c157ea5e4d32fbbb3d2c6716c4c83ea1f8b7462eed24f8e569594ae0a7594c52fdbbcb469e4b37d3aee47e14d71a574e26f90ebc4a9
-  data.tar.gz: a1f6c4f0bd5f6d9e759c7b1f20d946d5d73b926211beee9e93caea05347480211a20840130097971ace64f1a98117fd4b3f56285aef70e589c8bf906098ef76c
+  metadata.gz: fd72bef448e5f91167de5902d91f99874e3153e3e3d0750a0708c6bf4b2fd26995ed3f69c406b5161b3391542d2b0fe71515b70c27bad5dd6edec9933213b92c
+  data.tar.gz: 09292e4bf18775fb423fd50666931c7bab4ca6033cca8147b308c0be7ac97c62351c0d52f60ef337efcbe088d255359e42100db01f47f670240bfff86eea971f

data/lib/linkedin-scraper/profile.rb CHANGED Viewed

@@ -47,13 +47,13 @@ module Linkedin
         @agent.max_history = 0
         page = @agent.get(url)
         return Linkedin::Profile.new(page, url)
-      rescue=>e
+      rescue => e
         puts e
       end
     end
     def get_skills(page)
-      page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
+      page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
     end
     def get_company_url(node)
@@ -188,25 +188,28 @@ module Linkedin
       organizations = []
       # if the profile contains org data
       if page.search('ul.organizations li.organization').first
         # loop over each element with org data
         page.search('ul.organizations li.organization').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
-          position = nil # add this later
-          occupation = nil # add this latetr too, this relates to the experience/work
-          start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
-          if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
-            end_date = nil
-          else
-            Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
-          end
-          organizations << { name: name, start_date: start_date, end_date: end_date }
-        end
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
+            position = nil # add this later
+            occupation = nil # add this latetr too, this relates to the experience/work
+            start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
+            if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
+              end_date = nil
+            else
+              Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+            end
+            organizations << { name: name, start_date: start_date, end_date: end_date }
+          rescue => e
+          end
+        end
         return organizations
-      end # page.search('ul.organizations li.organization').first
+      end
     end
     def get_languages(page)
@@ -216,10 +219,13 @@ module Linkedin
         # loop over each element with org data
         page.search('ul.languages li.language').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          language = item.at('h3').text
-          proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
-          languages << { language:language, proficiency:proficiency }
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            language = item.at('h3').text
+            proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
+            languages << { language:language, proficiency:proficiency }
+          rescue => e
+          end
         end
         return languages
@@ -228,6 +234,7 @@ module Linkedin
     def get_certifications(page)
       certifications = []
       # search string to use with Nokogiri
       query = 'ul.certifications li.certification'
       months = 'January|February|March|April|May|June|July|August|September|November|December'
@@ -238,19 +245,23 @@ module Linkedin
         # loop over each element with cert data
         page.search(query).each do |item|
-          item_text = item.text.gsub(/\s+|\n/, " ").strip
-          name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
-          authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          start_date = Date.parse(item_text.scan(regex)[0].join(' '))
-          includes_end_date = item_text.scan(regex).count > 1
-          end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
+          begin
+            item_text = item.text.gsub(/\s+|\n/, " ").strip
+            name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
+            authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            start_date = Date.parse(item_text.scan(regex)[0].join(' '))
+            includes_end_date = item_text.scan(regex).count > 1
+            end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
+          rescue => e
+          end
         end
         return certifications
       end
     end
@@ -261,26 +272,29 @@ module Linkedin
         # loop over each element with org data
         page.search('ul.organizations li.organization').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
-          position = nil # add this later
-          occupation = nil # add this latetr too, this relates to the experience/work
-          start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
-          if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
-            end_date = nil
-          else
-            Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
+            position = nil # add this later
+            occupation = nil # add this latetr too, this relates to the experience/work
+            start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
+            if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
+              end_date = nil
+            else
+              Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+            end
+            organizations << { name: name, start_date: start_date, end_date: end_date }
+          rescue => e
           end
-          organizations << { name: name, start_date: start_date, end_date: end_date }
         end
-        return organizations
-      end # page.search('ul.organizations li.organization').first
+      end
+      return organizations
     end
     def get_recommended_visitors(page)
       recommended_vs=[]
       if page.search(".browsemap").first
@@ -295,5 +309,6 @@ module Linkedin
         return recommended_vs
       end
     end
   end
 end

data/lib/linkedin-scraper/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Linkedin
   module Scraper
-    VERSION = "0.0.9"
+    VERSION = "0.0.10"
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: linkedin-scraper
 version: !ruby/object:Gem::Version
-  version: 0.0.9
+  version: 0.0.10
 platform: ruby
 authors:
 - Yatish Mehta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-28 00:00:00.000000000 Z
+date: 2013-06-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.0
+rubygems_version: 2.0.3
 signing_key:
 specification_version: 4
 summary: when a url of  public linkedin profile page is given it scrapes the entire