RubyGems - linkedin-scraper - Versions diffs - 0.0.11 → 0.0.12 - Mend

linkedin-scraper 0.0.11 → 0.0.12

Files changed (10) hide show

checksums.yaml +4 -4
data/.rubocop.yml +11 -0
data/README.md +3 -2
data/bin/linkedin-scraper +0 -1
data/lib/linkedin-scraper.rb +5 -8
data/lib/linkedin-scraper/profile.rb +27 -27
data/lib/linkedin-scraper/version.rb +1 -1
data/linkedin-scraper.gemspec +7 -8
data/spec/linkedin-scraper/profile_spec.rb +21 -16
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c7cfeee1f051d529594d6d827d6ad373b6aca496
-  data.tar.gz: 681cfad543c0d7daa2863e6c6c2525560cf640df
+  metadata.gz: 98225f23a99fa755b3e29e92cfcff488c11ede5a
+  data.tar.gz: aaaae8060e81d59cc8f17848606acd382a158cea
 SHA512:
-  metadata.gz: 9c52c63f97a7855b088bb467bab0b72ac4a1616424348318b1072a0768dac512d4279fc04d1c1e8c213d3e57dc3eb368c43d757a8b937f979217d529f2a29510
-  data.tar.gz: 7d6e965dd00cb7ffc23f244eaa70d342e63d58b4a8ce760726f7907661dfbdae76c6a0bb740b3cdbdc20bd32b2f18230a40c4be25b1da1e289373e5da098397c
+  metadata.gz: f6ae1cd6a3eb3b9b66d7b32cc340cfba163627c0c958d2162f5eec43782f00631e5dc8e0802e4df7781350ba9c08afc397c2495ff07624540d002d871bcf62f2
+  data.tar.gz: 0aed555859a8ef26ce93a724da89ad34011d43986e6a0e6b80d43d7424472b5a1ba4fefac8f17c0789b5326e3cd4464ab7ce2198d106b7c497c62ed6a2e091bc

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,11 @@
+Documentation:
+  Enabled: false
+DotPosition:
+  Enabled: false
+LineLength:
+  Enabled: false
+MethodLength:
+  Enabled: false

data/README.md CHANGED Viewed

@@ -1,4 +1,5 @@
-[![Build Status](https://secure.travis-ci.org/yatishmehta27/linkedin-scraper.png)](http://travis-ci.org/yatishmehta27/linkedin-scraper)
+[![Build Status](https://secure.travis-ci.org/yatish27/linkedin-scraper.png)](http://travis-ci.org/yatish27/linkedin-scraper)
+[![Gem Version](https://badge.fury.io/rb/linkedin-scraper.png)](http://badge.fury.io/rb/linkedin-scraper)
 Linkedin Scraper
 ================
@@ -251,7 +252,7 @@ For current and past comapnies it also provides the details of the companies lik
     ]
-The gem also comes with a binary and can be used from teh command line to get a json response of the scraped data. It takes the url as the first argument.
+The gem also comes with a binary and can be used from the command line to get a json response of the scraped data. It takes the url as the first argument.
     linkedin-scraper http://www.linkedin.com/in/jeffweiner08

data/bin/linkedin-scraper CHANGED Viewed

@@ -1,4 +1,3 @@
-#!/usr/bin/env ruby
 require './lib/linkedin-scraper'
 profile = Linkedin::Profile.new(ARGV[0])

data/lib/linkedin-scraper.rb CHANGED Viewed

@@ -1,8 +1,5 @@
-require "rubygems"
-require "mechanize"
-require "cgi"
-require "net/http"
-Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin-scraper/*.rb"].each {|file| require file }
+require 'rubygems'
+require 'mechanize'
+require 'cgi'
+require 'net/http'
+Dir["#{File.expand_path(File.dirname(__FILE__))}/linkedin-scraper/*.rb"].each { |file| require file }

data/lib/linkedin-scraper/profile.rb CHANGED Viewed

@@ -5,9 +5,9 @@ module Linkedin
     USER_AGENTS = ['Windows IE 6', 'Windows IE 7', 'Windows Mozilla', 'Mac Safari', 'Mac FireFox', 'Mac Mozilla', 'Linux Mozilla', 'Linux Firefox', 'Linux Konqueror']
     ATTRIBUTES = %w(name first_name last_name title location country industry summary picture linkedin_url education groups websites languages skills certifications organizations past_companies current_companies recommended_visitors)
     attr_reader :page, :linkedin_url
     def self.get_profile(url)
       begin
         Linkedin::Profile.new(url)
@@ -20,12 +20,12 @@ module Linkedin
       @linkedin_url = url
       @page         = http_client.get(url)
     end
     def name
       "#{first_name} #{last_name}"
     end
-    def first_name
+    def first_name
       @first_name ||= (@page.at('.given-name').text.strip if @page.at('.given-name'))
     end
@@ -33,7 +33,7 @@ module Linkedin
       @last_name ||= (@page.at('.family-name').text.strip if @page.at('.family-name'))
     end
-    def title
+    def title
       @title ||= (@page.at('.headline-title').text.gsub(/\s+/, ' ').strip if @page.at('.headline-title'))
     end
@@ -77,12 +77,12 @@ module Linkedin
             name   = item.at('h3').text.gsub(/\s+|\n/, ' ').strip      if item.at('h3')
             desc   = item.at('h4').text.gsub(/\s+|\n/, ' ').strip      if item.at('h4')
             period = item.at('.period').text.gsub(/\s+|\n/, ' ').strip if item.at('.period')
             {:name => name, :description => desc, :period => period}
           end
         end
       end
-       @education
+      @education
     end
     def websites
@@ -118,7 +118,7 @@ module Linkedin
         @organizations = []
         if @page.search('ul.organizations/li.organization').first
           @organizations = @page.search('ul.organizations/li.organization').map do |item|
             name       = item.search('h3').text.gsub(/\s+|\n/, ' ').strip rescue nil
             start_date, end_date = item.search('ul.specifics li').text.gsub(/\s+|\n/, ' ').strip.split(' to ')
             start_date = Date.parse(start_date) rescue nil
@@ -153,21 +153,21 @@ module Linkedin
             authority  = item.at('.specifics/.org').text.gsub(/\s+|\n/, ' ').strip            rescue nil
             license    = item.at('.specifics/.licence-number').text.gsub(/\s+|\n/, ' ').strip rescue nil
             start_date = item.at('.specifics/.dtstart').text.gsub(/\s+|\n/, ' ').strip        rescue nil
             {:name => name, :authority => authority, :license => license, :start_date => start_date}
           end
         end
       end
       @certifications
     end
     def recommended_visitors
       unless @recommended_visitors
         @recommended_visitors = []
         if @page.at('.browsemap/.content/ul/li')
           @recommended_visitors = @page.search('.browsemap/.content/ul/li').map do |visitor|
-            v = {}
+            v = {}
             v[:link]    = visitor.at('a')['href']
             v[:name]    = visitor.at('strong/a').text
             v[:title]   = visitor.at('.headline').text.gsub('...',' ').split(' at ').first
@@ -181,46 +181,46 @@ module Linkedin
     def to_json
       require 'json'
-      hash = {}
-      ATTRIBUTES.each do |attribute|
-        hash[attribute.to_sym] = self.send(attribute.to_sym)
-      end
-      hash.to_json
+      ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
     end
     private
     def get_companies(type)
       companies = []
       if @page.search(".position.experience.vevent.vcard.summary-#{type}").first
         @page.search(".position.experience.vevent.vcard.summary-#{type}").each do |node|
           company               = {}
           company[:title]       = node.at('h3').text.gsub(/\s+|\n/, ' ').strip if node.at('h3')
           company[:company]     = node.at('h4').text.gsub(/\s+|\n/, ' ').strip if node.at('h4')
           company[:description] = node.at(".description.#{type}-position").text.gsub(/\s+|\n/, ' ').strip if node.at(".description.#{type}-position")
           start_date  = node.at('.dtstart').text.gsub(/\s+|\n/, ' ').strip rescue nil
-          company[:start_date] = Date.parse(start_date) rescue nil
+          company[:start_date] = parse_date(start_date) rescue nil
-          end_date  = node.at('.dtend').text.gsub(/\s+|\n/, ' ').strip rescue nil
-          company[:end_date] = Date.parse(end_date) rescue nil
+          end_date = node.at('.dtend').text.gsub(/\s+|\n/, ' ').strip rescue nil
+          end_date ||= node.at('.dtstamp').text.gsub(/\s+|\n/, ' ').strip rescue nil
+          company[:end_date] = parse_date(end_date) rescue nil
           company_link = node.at('h4/strong/a')['href'] if node.at('h4/strong/a')
-          result = get_company_details(company_link)
+          result = get_company_details(company_link)
           companies << company.merge!(result)
         end
       end
       companies
     end
+    def parse_date(date)
+      date = "#{date}-01-01" if date =~ /^(19|20)\d{2}$/
+      Date.parse(date)
+    end
     def get_company_details(link)
       result = {:linkedin_company_url => "http://www.linkedin.com#{link}"}
       page = http_client.get(result[:linkedin_company_url])
       result[:url] = page.at('.basic-info/div/dl/dd/a').text if page.at('.basic-info/div/dl/dd/a')
       node_2 = page.at('.basic-info/.content.inner-mod')
       if node_2
@@ -231,7 +231,7 @@ module Linkedin
       result[:address] = page.at('.vcard.hq').at('.adr').text.gsub("\n",' ').strip if page.at('.vcard.hq')
       result
     end
     def http_client
       Mechanize.new do |agent|
         agent.user_agent_alias = USER_AGENTS.sample

data/lib/linkedin-scraper/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 module Linkedin
   module Scraper
-    VERSION = "0.0.11"
+    VERSION = '0.0.12'
   end
 end

data/linkedin-scraper.gemspec CHANGED Viewed

@@ -2,20 +2,19 @@
 require File.expand_path('../lib/linkedin-scraper/version', __FILE__)
 Gem::Specification.new do |gem|
-  gem.authors       = ["Yatish Mehta"]
+  gem.authors       = ['Yatish Mehta']
   gem.description   = %q{Scrapes the linkedin profile when a url is given }
   gem.summary       = %q{when a url of  public linkedin profile page is given it scrapes the entire page and converts into a accessible object}
-  gem.homepage      = "https://github.com/yatishmehta27/linkedin-scraper"
+  gem.homepage      = 'https://github.com/yatishmehta27/linkedin-scraper'
   gem.files         = `git ls-files`.split($\)
-  gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
+  gem.executables   = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
   gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
-  gem.name          = "linkedin-scraper"
-  gem.require_paths = ["lib"]
+  gem.name          = 'linkedin-scraper'
+  gem.require_paths = ['lib']
   gem.version       = Linkedin::Scraper::VERSION
-  gem.add_dependency(%q<mechanize>, [">= 0"])
-  gem.add_development_dependency 'rspec','>=0'
+  gem.add_dependency(%q<mechanize>, ['>= 0'])
+  gem.add_development_dependency 'rspec', '>=0'
   gem.add_development_dependency 'rake'
 end

data/spec/linkedin-scraper/profile_spec.rb CHANGED Viewed

@@ -5,25 +5,25 @@ describe Linkedin::Profile do
   before(:all) do
-    @page = Nokogiri::HTML(File.open("spec/fixtures/jgrevich.html", 'r') { |f| f.read })
-    @profile = Linkedin::Profile.new("http://www.linkedin.com/in/jgrevich")
+    @page = Nokogiri::HTML(File.open('spec/fixtures/jgrevich.html', 'r') { |f| f.read })
+    @profile = Linkedin::Profile.new('http://www.linkedin.com/in/jgrevich')
   end
-  describe ".get_profile" do
-    it "Create an instance of Linkedin::Profile class" do
+  describe '.get_profile' do
+    it 'Create an instance of Linkedin::Profile class' do
       expect(@profile).to be_instance_of Linkedin::Profile
     end
   end
-  describe "#first_name" do
+  describe '#first_name' do
     it 'returns the first name of the profile' do
-      expect(@profile.first_name).to eq "Justin"
+      expect(@profile.first_name).to eq 'Justin'
     end
   end
-  describe "#last_name" do
+  describe '#last_name' do
     it 'returns the last name of the profile' do
-      expect(@profile.last_name).to eq "Grevich"
+      expect(@profile.last_name).to eq 'Grevich'
     end
   end
@@ -65,7 +65,7 @@ describe Linkedin::Profile do
   describe '#skills' do
     it 'returns the array of skills of the profile' do
-      skills = ["Ruby", "Ruby on Rails", "Web Development", "Web Applications", "CSS3", "HTML 5", "Shell Scripting", "Python", "Chef", "Git", "Subversion", "JavaScript", "Rspec", "jQuery", "Capistrano", "Sinatra", "CoffeeScript", "Haml", "Standards Compliance", "MySQL", "PostgreSQL", "Solr", "Sphinx", "Heroku", "Amazon Web Services (AWS)", "Information Security", "Vulnerability Assessment", "SAN", "ZFS", "Backup Solutions", "SaaS", "System Administration", "Project Management", "Linux", "Troubleshooting", "Network Security", "OS X", "Bash", "Cloud Computing", "Web Design", "MongoDB", "Z-Wave", "Home Automation"]
+      skills = ['Ruby', 'Ruby on Rails', 'Web Development', 'Web Applications', 'CSS3', 'HTML 5', 'Shell Scripting', 'Python', 'Chef', 'Git', 'Subversion', 'JavaScript', 'Rspec', 'jQuery', 'Capistrano', 'Sinatra', 'CoffeeScript', 'Haml', 'Standards Compliance', 'MySQL', 'PostgreSQL', 'Solr', 'Sphinx', 'Heroku', 'Amazon Web Services (AWS)', 'Information Security', 'Vulnerability Assessment', 'SAN', 'ZFS', 'Backup Solutions', 'SaaS', 'System Administration', 'Project Management', 'Linux', 'Troubleshooting', 'Network Security', 'OS X', 'Bash', 'Cloud Computing', 'Web Design', 'MongoDB', 'Z-Wave', 'Home Automation']
       expect(@profile.skills).to include(*skills)
     end
   end
@@ -100,20 +100,20 @@ describe Linkedin::Profile do
     end
   end
-  describe "#name" do
+  describe '#name' do
     it 'returns the first and last name of the profile' do
-      expect(@profile.name).to eq "Justin Grevich"
+      expect(@profile.name).to eq 'Justin Grevich'
     end
-  end
+  end
-  describe "#organizations" do
+  describe '#organizations' do
     it 'returns an array of organization hashes for the profile' do
       expect(@profile.organizations.class).to eq Array
       expect(@profile.organizations.first[:name]).to eq 'San Diego Ruby'
     end
   end
-  describe "#languages" do
+  describe '#languages' do
     it 'returns an array of languages hashes' do
       expect(@profile.languages.class).to eq Array
     end
@@ -133,8 +133,8 @@ describe Linkedin::Profile do
         end
       end
     end # context 'with language data' do
-  end # describe ".languages" do
+  end # describe '.languages' do
   describe '#recommended_visitors' do
     it 'returns the array of hashes of recommended visitors' do
@@ -148,5 +148,10 @@ describe Linkedin::Profile do
     end
   end
+  describe '#to_json' do
+    it 'returns the json format of the profile' do
+      @profile.to_json
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: linkedin-scraper
 version: !ruby/object:Gem::Version
-  version: 0.0.11
+  version: 0.0.12
 platform: ruby
 authors:
 - Yatish Mehta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-09-23 00:00:00.000000000 Z
+date: 2013-11-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: mechanize
@@ -60,6 +60,7 @@ extensions: []
 extra_rdoc_files: []
 files:
 - .gitignore
+- .rubocop.yml
 - .travis.yml
 - Gemfile
 - LICENSE