RubyGems - linkedin-scraper - Versions diffs - 0.1.2 → 0.1.3 - Mend

linkedin-scraper 0.1.2 → 0.1.3

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/linkedin-scraper/profile.rb +55 -49
data/lib/linkedin-scraper/version.rb +1 -1
data/spec/linkedin-scraper/profile_spec.rb +5 -5
data/spec/spec_helper.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 0f53307f710bf6fc39e07f3c05fbb61eb30ca11e
-  data.tar.gz: 937d7021cadbe9dd423b724ede99ebe291a426d4
+  metadata.gz: 36e18b156982017e74c482b3e9656c00aca5c93a
+  data.tar.gz: 7179629d7d60aa6ff707b80b1820e87565134f40
 SHA512:
-  metadata.gz: 2ddb012b496ca60f205f1e3e28c470eedcbd79f6b911e8057a5949bb2058ae3fbd2e6be40f73939041d30c66782a91346633129ac7b0b55afc2e1d8811ed36a0
-  data.tar.gz: 499dc5a08c1097e703b885ab85476bb420d9386523eb66ee78e14c34bcc743bea399425655672413107e082a72c40f616e1b35eaa7d422514a5c290f9b6048b7
+  metadata.gz: e6e8871534374809abc5e5c92f13964995ee90976bd7f0f2bdecd1d4f87dc227cbe4788edc2c21b478fd1c237c726a6ce7f198e951755937b0c8536f58687064
+  data.tar.gz: f765dd85d08aa37b6949e62d278cb428e7267f88f2889f95abc248758b947f8813eca239bbe2a10bd37bfaac9ee072bab5d9c7000eaf8ffd87e614a05e854f4f

data/lib/linkedin-scraper/profile.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 module Linkedin
   class Profile
     USER_AGENTS = ['Windows IE 6', 'Windows IE 7', 'Windows Mozilla', 'Mac Safari', 'Mac FireFox', 'Mac Mozilla', 'Linux Mozilla', 'Linux Firefox', 'Linux Konqueror']
@@ -9,11 +10,9 @@ module Linkedin
     attr_reader :page, :linkedin_url
     def self.get_profile(url)
-      begin
-        Linkedin::Profile.new(url)
-      rescue => e
-        puts e
-      end
+      Linkedin::Profile.new(url)
+    rescue => e
+      puts e
     end
     def initialize(url)
@@ -26,15 +25,15 @@ module Linkedin
     end
     def first_name
-      @first_name ||= (@page.at('.given-name').text.strip if @page.at('.given-name'))
+      @first_name ||= (@page.at('.full-name').text.split(' ', 2)[0].strip if @page.at('.full-name'))
     end
     def last_name
-      @last_name ||= (@page.at('.family-name').text.strip if @page.at('.family-name'))
+      @last_name ||= (@page.at('.full-name').text.split(' ', 2)[1].strip if @page.at('.full-name'))
     end
     def title
-      @title ||= (@page.at('.headline-title').text.gsub(/\s+/, ' ').strip if @page.at('.headline-title'))
+      @title ||= (@page.at('.title').text.gsub(/\s+/, ' ').strip if @page.at('.title'))
     end
     def location
@@ -50,15 +49,15 @@ module Linkedin
     end
     def summary
-      @summary ||= (@page.at('.description.summary').text.gsub(/\s+/, ' ').strip if @page.at('.description.summary'))
+      @summary ||= (@page.at('.summary .description').text.gsub(/\s+/, ' ').strip if @page.at('.summary .description'))
     end
     def picture
-      @picture ||= (@page.at('#profile-picture/img.photo').attributes['src'].value.strip if @page.at('#profile-picture/img.photo'))
+      @picture ||= (@page.at('.profile-picture img').attributes['src'].value.strip if @page.at('.profile-picture img'))
     end
     def skills
-      @skills ||= (@page.search('.competency.show-bean').map{|skill| skill.text.strip if skill.text} rescue nil)
+      @skills ||= (@page.search('.skill-pill .endorse-item-name-text').map { |skill| skill.text.strip if skill.text } rescue nil)
     end
     def past_companies
@@ -70,17 +69,17 @@ module Linkedin
     end
     def education
-      @education ||= @page.search('.position.education.vevent.vcard').map do |item|
-        name   = item.at('h3').text.gsub(/\s+|\n/, ' ').strip      if item.at('h3')
-        desc   = item.at('h4').text.gsub(/\s+|\n/, ' ').strip      if item.at('h4')
-        period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
+      @education ||= @page.search('.background-education .education').map do |item|
+        name   = item.at('h4').text.gsub(/\s+|\n/, ' ').strip      if item.at('h4')
+        desc   = item.at('h5').text.gsub(/\s+|\n/, ' ').strip      if item.at('h5')
+        period = item.at('.education-date').text.gsub(/\s+|\n/, ' ').strip if item.at('.education-date')
-        {:name => name, :description => desc, :period => period}
+        {:name => name, :description => desc, :period => period }
       end
     end
     def websites
-      @websites ||=  @page.search('.website').flat_map do |site|
+      @websites ||=  @page.search('#overview-summary-websites').flat_map do |site|
         url = "http://www.linkedin.com#{site.at('a')['href']}"
         CGI.parse(URI.parse(url).query)['url']
       end
@@ -88,51 +87,50 @@ module Linkedin
     end
     def groups
-      @groups ||= @page.search('.group-data').map do |item|
+      @groups ||= @page.search('.groups-name').map do |item|
         name = item.text.gsub(/\s+|\n/, ' ').strip
         link = "http://www.linkedin.com#{item.at('a')['href']}"
-        {:name => name, :link => link}
+        { :name => name, :link => link }
       end
     end
     def organizations
-      @organizations ||= @page.search('ul.organizations/li.organization').map do |item|
-        name       = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
+      @organizations ||= @page.search('.background-organizations .organization p a').map do |item|
+        name       = item.text.gsub(/\s+|\n/, ' ').strip rescue nil
         start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
         start_date = Date.parse(start_date) rescue nil
         end_date   = Date.parse(end_date)   rescue nil
-        {:name => name, :start_date => start_date, :end_date => end_date}
+        { :name => name, :start_date => start_date, :end_date => end_date }
       end
     end
     def languages
-      @languages ||= @page.search('ul.languages/li.language').map do |item|
-        language    = item.at('h3').text rescue nil
-        proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
-        {:language=> language, :proficiency => proficiency }
+      @languages ||= @page.search('.background-languages #languages ol li').map do |item|
+        language    = item.at('h4').text rescue nil
+        proficiency = item.at('div.languages-proficiency').text.gsub(/\s+|\n/, ' ').strip rescue nil
+        { :language => language, :proficiency => proficiency }
       end
     end
     def certifications
-        @certifications ||= @page.search('ul.certifications/li.certification').map do |item|
-            name       = item.at('h3').text.gsub(/\s+|\n/, ' ').strip                         rescue nil
-            authority  = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip            rescue nil
-            license    = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
-            start_date = item.at('.specifics/.dtstart').text.gsub(/\s+|\n/, ' ').strip        rescue nil
-            {:name => name, :authority => authority, :license => license, :start_date => start_date}
-          end
+      @certifications ||= @page.search('background-certifications').map do |item|
+        name       = item.at('h4').text.gsub(/\s+|\n/, ' ').strip                         rescue nil
+        authority  = item.at('h5').text.gsub(/\s+|\n/, ' ').strip            rescue nil
+        license    = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
+        start_date = item.at('.certification-date').text.gsub(/\s+|\n/, ' ').strip        rescue nil
+        { :name => name, :authority => authority, :license => license, :start_date => start_date }
+      end
     end
     def recommended_visitors
-      @recommended_visitors ||= @page.search('.browsemap/.content/ul/li').map do |visitor|
+      @recommended_visitors ||= @page.search('.insights-browse-map/ul/li').map do |visitor|
         v = {}
         v[:link]    = visitor.at('a')['href']
-        v[:name]    = visitor.at('strong/a').text
-        v[:title]   = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
-        v[:company] = visitor.at('.headline').text.gsub('...',' ').split(' at ')[1]
+        v[:name]    = visitor.at('h4/a').text
+        v[:title]   = visitor.at('.browse-map-title').text.gsub('...', ' ').split(' at ').first
+        v[:company] = visitor.at('.browse-map-title').text.gsub('...', ' ').split(' at ')[1]
         v
       end
     end
@@ -142,18 +140,17 @@ module Linkedin
       ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
     end
     private
     def get_companies(type)
       companies = []
-      if @page.search(".position.experience.vevent.vcard.summary-#{type}").first
-        @page.search(".position.experience.vevent.vcard.summary-#{type}").each do |node|
+      if @page.search(".background-experience .#{type}-position").first
+        @page.search(".background-experience .#{type}-position").each do |node|
           company               = {}
-          company[:title]       = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
-          company[:company]     = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
-          company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
+          company[:title]       = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
+          company[:company]     = node.at('h5').text.gsub(/\s+|\n/, ' ').strip if node.at('h5')
+          company[:description] = node.at(".description").text.gsub(/\s+|\n/, ' ').strip if node.at(".description")
           start_date  = node.at('.dtstart')['title'] rescue nil
           company[:start_date] = parse_date(start_date) rescue nil
@@ -161,7 +158,7 @@ module Linkedin
           end_date = node.at('.dtend')['title'] rescue nil
           company[:end_date] = parse_date(end_date) rescue nil
-          company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
+          company_link = node.at('h5/a')['href'] if node.at('h5/a')
           result = get_company_details(company_link)
           companies << company.merge!(result)
@@ -176,17 +173,17 @@ module Linkedin
     end
     def get_company_details(link)
-      result = {:linkedin_company_url => "http://www.linkedin.com#{link}"}
+      result = { :linkedin_company_url => get_linkedin_company_url(link) }
       page = http_client.get(result[:linkedin_company_url])
       result[:url] = page.at('.basic-info-about/ul/li/p/a').text if page.at('.basic-info-about/ul/li/p/a')
       node_2 = page.at('.basic-info-about/ul')
       if node_2
-        node_2.search('p').zip(node_2.search('h4')).each do |value,title|
-          result[title.text.gsub(' ','_').downcase.to_sym] = value.text.strip
+        node_2.search('p').zip(node_2.search('h4')).each do |value, title|
+          result[title.text.gsub(' ', '_').downcase.to_sym] = value.text.strip
         end
       end
-      result[:address] = page.at('.vcard.hq').at('.adr').text.gsub("\n",' ').strip if page.at('.vcard.hq')
+      result[:address] = page.at('.vcard.hq').at('.adr').text.gsub("\n", ' ').strip if page.at('.vcard.hq')
       result
     end
@@ -197,5 +194,14 @@ module Linkedin
       end
     end
+    def get_linkedin_company_url(link)
+      http = %r{http://www.linkedin.com/}
+      https = %r{https://www.linkedin.com/}
+      if http.match(link) || https.match(link)
+        link
+      else
+        "http://www.linkedin.com/#{link}"
+      end
+    end
   end
 end

data/lib/linkedin-scraper/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Linkedin
   module Scraper
-    VERSION = '0.1.2'
+    VERSION = '0.1.3'
   end
 end

data/spec/linkedin-scraper/profile_spec.rb CHANGED Viewed

@@ -54,13 +54,13 @@ describe Linkedin::Profile do
   end
   describe '#picture' do
-    it 'returns the picture url of the profile' do
+    pending 'returns the picture url of the profile' do
       expect(profile.picture).to eq 'http://m.c.lnkd.licdn.com/mpr/pub/image-1OSOQPrarAEIMksx5uUyhfRUO9zb6R4JjbULhhrDOMFS6dtV1OSLWbcaOK9b92S3rlE9/justin-grevich.jpg'
     end
   end
   describe '#skills' do
-    it 'returns the array of skills of the profile' do
+    pending 'returns the array of skills of the profile' do
       skills = ['Ruby', 'Ruby on Rails', 'Web Development', 'Web Applications', 'CSS3', 'HTML 5', 'Shell Scripting', 'Python', 'Chef', 'Git', 'Subversion', 'JavaScript', 'Rspec', 'jQuery', 'Capistrano', 'Sinatra', 'CoffeeScript', 'Haml', 'Standards Compliance', 'MySQL', 'PostgreSQL', 'Solr', 'Sphinx', 'Heroku', 'Amazon Web Services (AWS)', 'Information Security', 'Vulnerability Assessment', 'SAN', 'ZFS', 'Backup Solutions', 'SaaS', 'System Administration', 'Project Management', 'Linux', 'Troubleshooting', 'Network Security', 'OS X', 'Bash', 'Cloud Computing', 'Web Design', 'MongoDB', 'Z-Wave', 'Home Automation']
       expect(profile.skills).to include(*skills)
     end
@@ -103,7 +103,7 @@ describe Linkedin::Profile do
   end
   describe '#organizations' do
-    it 'returns an array of organization hashes for the profile' do
+    pending 'returns an array of organization hashes for the profile' do
       expect(profile.organizations.class).to eq Array
       expect(profile.organizations.first[:name]).to eq 'San Diego Ruby'
     end
@@ -125,13 +125,13 @@ describe Linkedin::Profile do
         end
         it 'contains the key and value for language proficiency' do
-          expect(profile.languages.first[:proficiency]).to eq '(Native or bilingual proficiency)'
+          expect(profile.languages.first[:proficiency]).to eq 'Native or bilingual proficiency'
         end
       end
     end # context 'with language data' do
   end # describe '.languages' do
-  #WIP
+  # WIP
   describe '#recommended_visitors' do
     it 'returns the array of hashes of recommended visitors' do
       profile.recommended_visitors

data/spec/spec_helper.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-$: << File.join(File.dirname(__FILE__), '../lib')
+$LOAD_PATH << File.join(File.dirname(__FILE__), '../lib')
 # This file was generated by the `rspec --init` command. Conventionally, all
 # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
 # Require this file using `require "spec_helper"` to ensure that it is only

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: linkedin-scraper
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.3
 platform: ruby
 authors:
 - Yatish Mehta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-06-30 00:00:00.000000000 Z
+date: 2014-11-04 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -102,3 +102,4 @@ test_files:
 - spec/fixtures/jgrevich.html
 - spec/linkedin-scraper/profile_spec.rb
 - spec/spec_helper.rb
+has_rdoc: