RubyGems - linkedin-scraper - Versions diffs - 0.0.9 → 0.0.10 - Mend

linkedin-scraper 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/linkedin-scraper/profile.rb +59 -44
data/lib/linkedin-scraper/version.rb +1 -1
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 866d6d1d021d57faea4d513f88b12de0766d397c
-  data.tar.gz: 0fb3a6819f14e19a8a2f41d6059f2cbf9c693a8d
+  metadata.gz: dbde57b3c40b5f330ed4ab346f42cad639de8d3e
+  data.tar.gz: 464882b2139ff63b164568c104ea47c76ff8b10f
 SHA512:
-  metadata.gz: 90f39ed05ce81c3abb2c3c157ea5e4d32fbbb3d2c6716c4c83ea1f8b7462eed24f8e569594ae0a7594c52fdbbcb469e4b37d3aee47e14d71a574e26f90ebc4a9
-  data.tar.gz: a1f6c4f0bd5f6d9e759c7b1f20d946d5d73b926211beee9e93caea05347480211a20840130097971ace64f1a98117fd4b3f56285aef70e589c8bf906098ef76c
+  metadata.gz: fd72bef448e5f91167de5902d91f99874e3153e3e3d0750a0708c6bf4b2fd26995ed3f69c406b5161b3391542d2b0fe71515b70c27bad5dd6edec9933213b92c
+  data.tar.gz: 09292e4bf18775fb423fd50666931c7bab4ca6033cca8147b308c0be7ac97c62351c0d52f60ef337efcbe088d255359e42100db01f47f670240bfff86eea971f

data/lib/linkedin-scraper/profile.rb CHANGED Viewed

@@ -47,13 +47,13 @@ module Linkedin
         @agent.max_history = 0
         page = @agent.get(url)
         return Linkedin::Profile.new(page, url)
-      rescue=>e
+      rescue => e
         puts e
       end
     end
     def get_skills(page)
-      page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
+      page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
     end
     def get_company_url(node)
@@ -188,25 +188,28 @@ module Linkedin
       organizations = []
       # if the profile contains org data
       if page.search('ul.organizations li.organization').first
         # loop over each element with org data
         page.search('ul.organizations li.organization').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
-          position = nil # add this later
-          occupation = nil # add this latetr too, this relates to the experience/work
-          start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
-          if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
-            end_date = nil
-          else
-            Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
-          end
-          organizations << { name: name, start_date: start_date, end_date: end_date }
-        end
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
+            position = nil # add this later
+            occupation = nil # add this latetr too, this relates to the experience/work
+            start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
+            if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
+              end_date = nil
+            else
+              Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+            end
+            organizations << { name: name, start_date: start_date, end_date: end_date }
+          rescue => e
+          end
+        end
         return organizations
-      end # page.search('ul.organizations li.organization').first
+      end
     end
     def get_languages(page)
@@ -216,10 +219,13 @@ module Linkedin
         # loop over each element with org data
         page.search('ul.languages li.language').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          language = item.at('h3').text
-          proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
-          languages << { language:language, proficiency:proficiency }
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            language = item.at('h3').text
+            proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
+            languages << { language:language, proficiency:proficiency }
+          rescue => e
+          end
         end
         return languages
@@ -228,6 +234,7 @@ module Linkedin
     def get_certifications(page)
       certifications = []
       # search string to use with Nokogiri
       query = 'ul.certifications li.certification'
       months = 'January|February|March|April|May|June|July|August|September|November|December'
@@ -238,19 +245,23 @@ module Linkedin
         # loop over each element with cert data
         page.search(query).each do |item|
-          item_text = item.text.gsub(/\s+|\n/, " ").strip
-          name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
-          authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          start_date = Date.parse(item_text.scan(regex)[0].join(' '))
-          includes_end_date = item_text.scan(regex).count > 1
-          end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
-          certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
+          begin
+            item_text = item.text.gsub(/\s+|\n/, " ").strip
+            name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
+            authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            start_date = Date.parse(item_text.scan(regex)[0].join(' '))
+            includes_end_date = item_text.scan(regex).count > 1
+            end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
+            certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
+          rescue => e
+          end
         end
         return certifications
       end
     end
@@ -261,26 +272,29 @@ module Linkedin
         # loop over each element with org data
         page.search('ul.organizations li.organization').each do |item|
-          # find the h3 element within the above section and get the text with excess white space stripped
-          name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
-          position = nil # add this later
-          occupation = nil # add this latetr too, this relates to the experience/work
-          start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
-          if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
-            end_date = nil
-          else
-            Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+          begin
+            # find the h3 element within the above section and get the text with excess white space stripped
+            name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
+            position = nil # add this later
+            occupation = nil # add this latetr too, this relates to the experience/work
+            start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
+            if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
+              end_date = nil
+            else
+              Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
+            end
+            organizations << { name: name, start_date: start_date, end_date: end_date }
+          rescue => e
           end
-          organizations << { name: name, start_date: start_date, end_date: end_date }
         end
-        return organizations
-      end # page.search('ul.organizations li.organization').first
+      end
+      return organizations
     end
     def get_recommended_visitors(page)
       recommended_vs=[]
       if page.search(".browsemap").first
@@ -295,5 +309,6 @@ module Linkedin
         return recommended_vs
       end
     end
   end
 end

data/lib/linkedin-scraper/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Linkedin
   module Scraper
-    VERSION = "0.0.9"
+    VERSION = "0.0.10"
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: linkedin-scraper
 version: !ruby/object:Gem::Version
-  version: 0.0.9
+  version: 0.0.10
 platform: ruby
 authors:
 - Yatish Mehta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-05-28 00:00:00.000000000 Z
+date: 2013-06-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.0.0
+rubygems_version: 2.0.3
 signing_key:
 specification_version: 4
 summary: when a url of  public linkedin profile page is given it scrapes the entire