linkedin-scraper-v2 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +23 -0
 - data/.rubocop.yml +11 -0
 - data/.travis.yml +8 -0
 - data/Gemfile +4 -0
 - data/LICENSE +22 -0
 - data/README.md +272 -0
 - data/Rakefile +3 -0
 - data/bin/linkedin-scraper +5 -0
 - data/lib/linkedin_scraper.rb +5 -0
 - data/lib/linkedin_scraper/profile.rb +252 -0
 - data/lib/linkedin_scraper/version.rb +5 -0
 - data/linkedin-scraper.gemspec +22 -0
 - data/spec/fixtures/jeffweiner08.html +308 -0
 - data/spec/linkedin_scraper/profile_spec.rb +110 -0
 - data/spec/spec_helper.rb +17 -0
 - metadata +107 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: ea72cf17a2f1766bdb4eb7b320d3251392d21824
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: ebccbf3dcd9f3511b452e103986916067a502e6d
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: cd1a28b3081b4d1ab7f86423cc169af184822cdb0ae96b5077a59314b6012a8e3ed837cc03e032c0878df7bc32c672bcfb1c32e8f3cbf56ea989b075c5468d71
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 7d5c7acbde250b8976ddb64d0b0b496131baf3784993e5f6df17756762a9cdd37461771fcc12146e6174b64444ec64c460c2a7e9dd78b0edcd58ddbcd9327d15
         
     | 
    
        data/.gitignore
    ADDED
    
    | 
         @@ -0,0 +1,23 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            *.gem
         
     | 
| 
      
 2 
     | 
    
         
            +
            *.rbc
         
     | 
| 
      
 3 
     | 
    
         
            +
            .bundle
         
     | 
| 
      
 4 
     | 
    
         
            +
            .config
         
     | 
| 
      
 5 
     | 
    
         
            +
            .rspec
         
     | 
| 
      
 6 
     | 
    
         
            +
            .yardoc
         
     | 
| 
      
 7 
     | 
    
         
            +
            Gemfile.lock
         
     | 
| 
      
 8 
     | 
    
         
            +
            InstalledFiles
         
     | 
| 
      
 9 
     | 
    
         
            +
            _yardoc
         
     | 
| 
      
 10 
     | 
    
         
            +
            coverage
         
     | 
| 
      
 11 
     | 
    
         
            +
            doc/
         
     | 
| 
      
 12 
     | 
    
         
            +
            lib/bundler/man
         
     | 
| 
      
 13 
     | 
    
         
            +
            pkg
         
     | 
| 
      
 14 
     | 
    
         
            +
            rdoc
         
     | 
| 
      
 15 
     | 
    
         
            +
            spec/reports
         
     | 
| 
      
 16 
     | 
    
         
            +
            test/tmp
         
     | 
| 
      
 17 
     | 
    
         
            +
            test/version_tmp
         
     | 
| 
      
 18 
     | 
    
         
            +
            tmp
         
     | 
| 
      
 19 
     | 
    
         
            +
            .ruby-version
         
     | 
| 
      
 20 
     | 
    
         
            +
            .ruby-gemset
         
     | 
| 
      
 21 
     | 
    
         
            +
            .projectile
         
     | 
| 
      
 22 
     | 
    
         
            +
            *.DS_Store
         
     | 
| 
      
 23 
     | 
    
         
            +
            .idea/*
         
     | 
    
        data/.rubocop.yml
    ADDED
    
    
    
        data/.travis.yml
    ADDED
    
    
    
        data/Gemfile
    ADDED
    
    
    
        data/LICENSE
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            Copyright (c) 2012 Yatish Mehta
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            MIT License
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Permission is hereby granted, free of charge, to any person obtaining
         
     | 
| 
      
 6 
     | 
    
         
            +
            a copy of this software and associated documentation files (the
         
     | 
| 
      
 7 
     | 
    
         
            +
            "Software"), to deal in the Software without restriction, including
         
     | 
| 
      
 8 
     | 
    
         
            +
            without limitation the rights to use, copy, modify, merge, publish,
         
     | 
| 
      
 9 
     | 
    
         
            +
            distribute, sublicense, and/or sell copies of the Software, and to
         
     | 
| 
      
 10 
     | 
    
         
            +
            permit persons to whom the Software is furnished to do so, subject to
         
     | 
| 
      
 11 
     | 
    
         
            +
            the following conditions:
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            The above copyright notice and this permission notice shall be
         
     | 
| 
      
 14 
     | 
    
         
            +
            included in all copies or substantial portions of the Software.
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         
     | 
| 
      
 17 
     | 
    
         
            +
            EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         
     | 
| 
      
 18 
     | 
    
         
            +
            MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         
     | 
| 
      
 19 
     | 
    
         
            +
            NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
         
     | 
| 
      
 20 
     | 
    
         
            +
            LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
         
     | 
| 
      
 21 
     | 
    
         
            +
            OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
         
     | 
| 
      
 22 
     | 
    
         
            +
            WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         
     | 
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1,272 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            [](http://travis-ci.org/yatish27/linkedin-scraper)
         
     | 
| 
      
 2 
     | 
    
         
            +
            [](http://badge.fury.io/rb/linkedin-scraper)
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            Linkedin Scraper
         
     | 
| 
      
 5 
     | 
    
         
            +
            ================
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            Linkedin-scraper is a gem for scraping linkedin public profiles.
         
     | 
| 
      
 8 
     | 
    
         
            +
            Given the URL of the profile, it gets the name, country, title, area, current companies, past companies,
         
     | 
| 
      
 9 
     | 
    
         
            +
            organizations, skills, groups, etc
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            ## Installation
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            Install the gem from RubyGems:
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
                gem install linkedin-scraper
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            This gem is tested on 1.9.2, 1.9.3, 2.0.0, JRuby1.9, rbx1.9,
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            ## Usage
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            Initialize a scraper instance
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
                profile = Linkedin::Profile.get_profile("http://www.linkedin.com/in/jeffweiner08")
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            The returning object responds to the following methods
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                profile.first_name          # The first name of the contact
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                profile.last_name           # The last name of the contact
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                profile.name                # The full name of the profile
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                profile.title               # The job title
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            	profile.summary             # The summary of the profile
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
                profile.location            # The location of the contact
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
                profile.country             # The country of the contact
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                profile.industry            # The domain for which the contact belongs
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                profile.picture             # The profile picture link of profile
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                profile.skills              # Array of skills of the profile
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                profile.organizations       # Array organizations of the profile
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                profile.education           # Array of hashes for education
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                profile.websites            # Array of websites
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
            	profile.groups              # Array of groups
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
            	profile.languages           # Array of languages
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
            	profile.certifications      # Array of certifications
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
            	profile.number_of_connections # The number of connections as a string
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            For current and past companies it also provides the details of the companies like company size, industry, address, etc
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                profile.current_companies
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                [
         
     | 
| 
      
 70 
     | 
    
         
            +
                [0] {
         
     | 
| 
      
 71 
     | 
    
         
            +
                         :current_company => "LinkedIn",
         
     | 
| 
      
 72 
     | 
    
         
            +
                           :current_title => "CEO",
         
     | 
| 
      
 73 
     | 
    
         
            +
                     :current_company_url => "http://www.linkedin.com",
         
     | 
| 
      
 74 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 75 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/linkedin?trk=ppro_cprof",
         
     | 
| 
      
 76 
     | 
    
         
            +
                                     :url => "http://www.linkedin.com",
         
     | 
| 
      
 77 
     | 
    
         
            +
                                    :type => "Public Company",
         
     | 
| 
      
 78 
     | 
    
         
            +
                            :company_size => "1001-5000 employees",
         
     | 
| 
      
 79 
     | 
    
         
            +
                                 :website => "http://www.linkedin.com",
         
     | 
| 
      
 80 
     | 
    
         
            +
                                :industry => "Internet",
         
     | 
| 
      
 81 
     | 
    
         
            +
                                 :founded => "2003",
         
     | 
| 
      
 82 
     | 
    
         
            +
                                 :address => "2029 Stierlin Court  Mountain View, CA 94043 United States"
         
     | 
| 
      
 83 
     | 
    
         
            +
                },
         
     | 
| 
      
 84 
     | 
    
         
            +
                [1] {
         
     | 
| 
      
 85 
     | 
    
         
            +
                         :current_company => "Intuit",
         
     | 
| 
      
 86 
     | 
    
         
            +
                           :current_title => "Member, Board of Directors",
         
     | 
| 
      
 87 
     | 
    
         
            +
                     :current_company_url => "http://network.intuit.com/",
         
     | 
| 
      
 88 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 89 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/intuit?trk=ppro_cprof",
         
     | 
| 
      
 90 
     | 
    
         
            +
                                     :url => "http://network.intuit.com/",
         
     | 
| 
      
 91 
     | 
    
         
            +
                                    :type => "Public Company",
         
     | 
| 
      
 92 
     | 
    
         
            +
                            :company_size => "5001-10,000 employees",
         
     | 
| 
      
 93 
     | 
    
         
            +
                                 :website => "http://network.intuit.com/",
         
     | 
| 
      
 94 
     | 
    
         
            +
                                :industry => "Computer Software",
         
     | 
| 
      
 95 
     | 
    
         
            +
                                 :founded => "1983",
         
     | 
| 
      
 96 
     | 
    
         
            +
                                 :address => "2632 Marine Way  Mountain View, CA 94043 United States"
         
     | 
| 
      
 97 
     | 
    
         
            +
                },
         
     | 
| 
      
 98 
     | 
    
         
            +
                [2] {
         
     | 
| 
      
 99 
     | 
    
         
            +
                         :current_company => "DonorsChoose",
         
     | 
| 
      
 100 
     | 
    
         
            +
                           :current_title => "Member, Board of Directors",
         
     | 
| 
      
 101 
     | 
    
         
            +
                     :current_company_url => "http://www.donorschoose.org",
         
     | 
| 
      
 102 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 103 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/donorschoose.org?trk=ppro_cprof",
         
     | 
| 
      
 104 
     | 
    
         
            +
                                     :url => "http://www.donorschoose.org",
         
     | 
| 
      
 105 
     | 
    
         
            +
                                    :type => "Nonprofit",
         
     | 
| 
      
 106 
     | 
    
         
            +
                            :company_size => "51-200 employees",
         
     | 
| 
      
 107 
     | 
    
         
            +
                                 :website => "http://www.donorschoose.org",
         
     | 
| 
      
 108 
     | 
    
         
            +
                                :industry => "Nonprofit Organization Management",
         
     | 
| 
      
 109 
     | 
    
         
            +
                                 :founded => "2000",
         
     | 
| 
      
 110 
     | 
    
         
            +
                                 :address => "213 West 35th Street 2nd Floor East New York, NY 10001 United States"
         
     | 
| 
      
 111 
     | 
    
         
            +
                },
         
     | 
| 
      
 112 
     | 
    
         
            +
                [3] {
         
     | 
| 
      
 113 
     | 
    
         
            +
                        :current_company => "Malaria No More",
         
     | 
| 
      
 114 
     | 
    
         
            +
                          :current_title => "Member, Board of Directors",
         
     | 
| 
      
 115 
     | 
    
         
            +
                    :current_company_url => nil,
         
     | 
| 
      
 116 
     | 
    
         
            +
                            :description => nil
         
     | 
| 
      
 117 
     | 
    
         
            +
                },
         
     | 
| 
      
 118 
     | 
    
         
            +
                [4] {
         
     | 
| 
      
 119 
     | 
    
         
            +
                         :current_company => "Venture For America",
         
     | 
| 
      
 120 
     | 
    
         
            +
                           :current_title => "Member, Advisory Board",
         
     | 
| 
      
 121 
     | 
    
         
            +
                     :current_company_url => "http://ventureforamerica.org/",
         
     | 
| 
      
 122 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 123 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/venture-for-america?trk=ppro_cprof",
         
     | 
| 
      
 124 
     | 
    
         
            +
                                     :url => "http://ventureforamerica.org/",
         
     | 
| 
      
 125 
     | 
    
         
            +
                                    :type => "Nonprofit",
         
     | 
| 
      
 126 
     | 
    
         
            +
                            :company_size => "1-10 employees",
         
     | 
| 
      
 127 
     | 
    
         
            +
                                 :website => "http://ventureforamerica.org/",
         
     | 
| 
      
 128 
     | 
    
         
            +
                                :industry => "Nonprofit Organization Management",
         
     | 
| 
      
 129 
     | 
    
         
            +
                                 :founded => "2011"
         
     | 
| 
      
 130 
     | 
    
         
            +
                }
         
     | 
| 
      
 131 
     | 
    
         
            +
                ]
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
             
     | 
| 
      
 134 
     | 
    
         
            +
                profile.past_companies
         
     | 
| 
      
 135 
     | 
    
         
            +
                [
         
     | 
| 
      
 136 
     | 
    
         
            +
                [0] {
         
     | 
| 
      
 137 
     | 
    
         
            +
                            :past_company => "Accel Partners",
         
     | 
| 
      
 138 
     | 
    
         
            +
                              :past_title => "Executive in Residence",
         
     | 
| 
      
 139 
     | 
    
         
            +
                    :past_company_website => "http://www.facebook.com/accel",
         
     | 
| 
      
 140 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 141 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/accel-partners?trk=ppro_cprof",
         
     | 
| 
      
 142 
     | 
    
         
            +
                                     :url => "http://www.facebook.com/accel",
         
     | 
| 
      
 143 
     | 
    
         
            +
                                    :type => "Partnership",
         
     | 
| 
      
 144 
     | 
    
         
            +
                            :company_size => "51-200 employees",
         
     | 
| 
      
 145 
     | 
    
         
            +
                                 :website => "http://www.facebook.com/accel",
         
     | 
| 
      
 146 
     | 
    
         
            +
                                :industry => "Venture Capital & Private Equity",
         
     | 
| 
      
 147 
     | 
    
         
            +
                                 :address => "428 University Palo Alto, CA 94301 United States"
         
     | 
| 
      
 148 
     | 
    
         
            +
                },
         
     | 
| 
      
 149 
     | 
    
         
            +
                [1] {
         
     | 
| 
      
 150 
     | 
    
         
            +
                            :past_company => "Greylock",
         
     | 
| 
      
 151 
     | 
    
         
            +
                              :past_title => "Executive in Residence",
         
     | 
| 
      
 152 
     | 
    
         
            +
                    :past_company_website => "http://www.greylock.com",
         
     | 
| 
      
 153 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 154 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/greylock-partners?trk=ppro_cprof",
         
     | 
| 
      
 155 
     | 
    
         
            +
                                     :url => "http://www.greylock.com",
         
     | 
| 
      
 156 
     | 
    
         
            +
                                    :type => "Partnership",
         
     | 
| 
      
 157 
     | 
    
         
            +
                            :company_size => "51-200 employees",
         
     | 
| 
      
 158 
     | 
    
         
            +
                                 :website => "http://www.greylock.com",
         
     | 
| 
      
 159 
     | 
    
         
            +
                                :industry => "Venture Capital & Private Equity",
         
     | 
| 
      
 160 
     | 
    
         
            +
                                 :address => "2550 Sand Hill Road  Menlo Park, CA 94025 United States"
         
     | 
| 
      
 161 
     | 
    
         
            +
                },
         
     | 
| 
      
 162 
     | 
    
         
            +
                [2] {
         
     | 
| 
      
 163 
     | 
    
         
            +
                            :past_company => "Yahoo!",
         
     | 
| 
      
 164 
     | 
    
         
            +
                              :past_title => "Executive Vice President Network Division",
         
     | 
| 
      
 165 
     | 
    
         
            +
                    :past_company_website => "http://www.yahoo.com",
         
     | 
| 
      
 166 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 167 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/yahoo?trk=ppro_cprof",
         
     | 
| 
      
 168 
     | 
    
         
            +
                                     :url => "http://www.yahoo.com",
         
     | 
| 
      
 169 
     | 
    
         
            +
                                    :type => "Public Company",
         
     | 
| 
      
 170 
     | 
    
         
            +
                            :company_size => "10,001+ employees",
         
     | 
| 
      
 171 
     | 
    
         
            +
                                 :website => "http://www.yahoo.com",
         
     | 
| 
      
 172 
     | 
    
         
            +
                                :industry => "Internet",
         
     | 
| 
      
 173 
     | 
    
         
            +
                                 :founded => "1994",
         
     | 
| 
      
 174 
     | 
    
         
            +
                                 :address => "701 First Avenue  Sunnyvale, CA 94089 United States"
         
     | 
| 
      
 175 
     | 
    
         
            +
                },
         
     | 
| 
      
 176 
     | 
    
         
            +
                [3] {
         
     | 
| 
      
 177 
     | 
    
         
            +
                            :past_company => "Windsor Media",
         
     | 
| 
      
 178 
     | 
    
         
            +
                              :past_title => "Founding Partner",
         
     | 
| 
      
 179 
     | 
    
         
            +
                    :past_company_website => nil,
         
     | 
| 
      
 180 
     | 
    
         
            +
                             :description => nil
         
     | 
| 
      
 181 
     | 
    
         
            +
                },
         
     | 
| 
      
 182 
     | 
    
         
            +
                [4] {
         
     | 
| 
      
 183 
     | 
    
         
            +
                            :past_company => "Warner Bros.",
         
     | 
| 
      
 184 
     | 
    
         
            +
                              :past_title => "Vice President Online",
         
     | 
| 
      
 185 
     | 
    
         
            +
                    :past_company_website => "http://www.warnerbros.com/",
         
     | 
| 
      
 186 
     | 
    
         
            +
                             :description => nil,
         
     | 
| 
      
 187 
     | 
    
         
            +
                    :linkedin_company_url => "http://www.linkedin.com/company/warner-bros.-entertainment-group-of-companies?trk=ppro_cprof",
         
     | 
| 
      
 188 
     | 
    
         
            +
                                     :url => "http://www.warnerbros.com/",
         
     | 
| 
      
 189 
     | 
    
         
            +
                                    :type => "Public Company",
         
     | 
| 
      
 190 
     | 
    
         
            +
                            :company_size => "10,001+ employees",
         
     | 
| 
      
 191 
     | 
    
         
            +
                                 :website => "http://www.warnerbros.com/",
         
     | 
| 
      
 192 
     | 
    
         
            +
                                :industry => "Entertainment",
         
     | 
| 
      
 193 
     | 
    
         
            +
                                 :address => "4000 Warner Boulevard  Burbank, CA 91522 United States"
         
     | 
| 
      
 194 
     | 
    
         
            +
                }
         
     | 
| 
      
 195 
     | 
    
         
            +
                ]
         
     | 
| 
      
 196 
     | 
    
         
            +
             
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
                profile.recommended_visitors
         
     | 
| 
      
 199 
     | 
    
         
            +
                #It is the list of visitors "Viewers of this profile also viewed..."
         
     | 
| 
      
 200 
     | 
    
         
            +
                [
         
     | 
| 
      
 201 
     | 
    
         
            +
                [0] {
         
     | 
| 
      
 202 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/in/barackobama?trk=pub-pbmap",
         
     | 
| 
      
 203 
     | 
    
         
            +
                       :name => "Barack Obama",
         
     | 
| 
      
 204 
     | 
    
         
            +
                      :title => "President of the United States of ",
         
     | 
| 
      
 205 
     | 
    
         
            +
                    :company => nil
         
     | 
| 
      
 206 
     | 
    
         
            +
                },
         
     | 
| 
      
 207 
     | 
    
         
            +
                [1] {
         
     | 
| 
      
 208 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/in/marissamayer?trk=pub-pbmap",
         
     | 
| 
      
 209 
     | 
    
         
            +
                       :name => "Marissa Mayer",
         
     | 
| 
      
 210 
     | 
    
         
            +
                      :title => "Yahoo!, President & CEO",
         
     | 
| 
      
 211 
     | 
    
         
            +
                    :company => nil
         
     | 
| 
      
 212 
     | 
    
         
            +
                },
         
     | 
| 
      
 213 
     | 
    
         
            +
                [2] {
         
     | 
| 
      
 214 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/pub/sean-parker/0/1/826?trk=pub-pbmap",
         
     | 
| 
      
 215 
     | 
    
         
            +
                       :name => "Sean Parker",
         
     | 
| 
      
 216 
     | 
    
         
            +
                      :title => nil,
         
     | 
| 
      
 217 
     | 
    
         
            +
                    :company => nil
         
     | 
| 
      
 218 
     | 
    
         
            +
                },
         
     | 
| 
      
 219 
     | 
    
         
            +
                [3] {
         
     | 
| 
      
 220 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/pub/eduardo-saverin/0/70a/31b?trk=pub-pbmap",
         
     | 
| 
      
 221 
     | 
    
         
            +
                       :name => "Eduardo Saverin",
         
     | 
| 
      
 222 
     | 
    
         
            +
                      :title => nil,
         
     | 
| 
      
 223 
     | 
    
         
            +
                    :company => nil
         
     | 
| 
      
 224 
     | 
    
         
            +
                },
         
     | 
| 
      
 225 
     | 
    
         
            +
                [4] {
         
     | 
| 
      
 226 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/in/rbranson?trk=pub-pbmap",
         
     | 
| 
      
 227 
     | 
    
         
            +
                       :name => "Richard Branson",
         
     | 
| 
      
 228 
     | 
    
         
            +
                      :title => "Founder",
         
     | 
| 
      
 229 
     | 
    
         
            +
                    :company => "Virgin Group"
         
     | 
| 
      
 230 
     | 
    
         
            +
                },
         
     | 
| 
      
 231 
     | 
    
         
            +
                [5] {
         
     | 
| 
      
 232 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/in/reidhoffman?trk=pub-pbmap",
         
     | 
| 
      
 233 
     | 
    
         
            +
                       :name => "Reid Hoffman",
         
     | 
| 
      
 234 
     | 
    
         
            +
                      :title => "Entrepreneur. Product Strategist.  ",
         
     | 
| 
      
 235 
     | 
    
         
            +
                    :company => nil
         
     | 
| 
      
 236 
     | 
    
         
            +
                },
         
     | 
| 
      
 237 
     | 
    
         
            +
                [6] {
         
     | 
| 
      
 238 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/in/mdell?trk=pub-pbmap",
         
     | 
| 
      
 239 
     | 
    
         
            +
                       :name => "Michael Dell",
         
     | 
| 
      
 240 
     | 
    
         
            +
                      :title => "Chairman and CEO",
         
     | 
| 
      
 241 
     | 
    
         
            +
                    :company => "Dell"
         
     | 
| 
      
 242 
     | 
    
         
            +
                },
         
     | 
| 
      
 243 
     | 
    
         
            +
                [7] {
         
     | 
| 
      
 244 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/in/mittromney?trk=pub-pbmap",
         
     | 
| 
      
 245 
     | 
    
         
            +
                       :name => "Mitt Romney",
         
     | 
| 
      
 246 
     | 
    
         
            +
                      :title => "Believe in America",
         
     | 
| 
      
 247 
     | 
    
         
            +
                    :company => nil
         
     | 
| 
      
 248 
     | 
    
         
            +
                },
         
     | 
| 
      
 249 
     | 
    
         
            +
                [8] {
         
     | 
| 
      
 250 
     | 
    
         
            +
                       :link => "http://www.linkedin.com/pub/sheryl-sandberg/2/665/512?trk=pub-pbmap",
         
     | 
| 
      
 251 
     | 
    
         
            +
                       :name => "Sheryl Sandberg",
         
     | 
| 
      
 252 
     | 
    
         
            +
                      :title => nil,
         
     | 
| 
      
 253 
     | 
    
         
            +
                    :company => nil
         
     | 
| 
      
 254 
     | 
    
         
            +
                }
         
     | 
| 
      
 255 
     | 
    
         
            +
                ]
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
            The gem also comes with a binary and can be used from the command line to get a json response of the scraped data.
         
     | 
| 
      
 259 
     | 
    
         
            +
            It takes the url as the first argument.
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
                linkedin-scraper http://www.linkedin.com/in/jeffweiner08
         
     | 
| 
      
 262 
     | 
    
         
            +
             
     | 
| 
      
 263 
     | 
    
         
            +
            ## Contributing
         
     | 
| 
      
 264 
     | 
    
         
            +
             
     | 
| 
      
 265 
     | 
    
         
            +
            Bug reports and pull requests are welcome on GitHub at https://github.com/yatish27/linkedin-scraper.
         
     | 
| 
      
 266 
     | 
    
         
            +
            This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the
         
     | 
| 
      
 267 
     | 
    
         
            +
            [Contributor Covenant](contributor-covenant.org) code of conduct.
         
     | 
| 
      
 268 
     | 
    
         
            +
             
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
            ## License
         
     | 
| 
      
 271 
     | 
    
         
            +
             
     | 
| 
      
 272 
     | 
    
         
            +
            The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
         
     | 
    
        data/Rakefile
    ADDED
    
    
| 
         @@ -0,0 +1,252 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # -*- encoding: utf-8 -*-
         
     | 
| 
      
 2 
     | 
    
         
            +
            module Linkedin
         
     | 
| 
      
 3 
     | 
    
         
            +
              class Profile
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                USER_AGENTS = ["Windows IE 6", "Windows IE 7", "Windows Mozilla", "Mac Safari", "Mac FireFox", "Mac Mozilla", "Linux Mozilla", "Linux Firefox", "Linux Konqueror"]
         
     | 
| 
      
 6 
     | 
    
         
            +
                ATTRIBUTES = %w(
         
     | 
| 
      
 7 
     | 
    
         
            +
                name
         
     | 
| 
      
 8 
     | 
    
         
            +
                first_name
         
     | 
| 
      
 9 
     | 
    
         
            +
                last_name
         
     | 
| 
      
 10 
     | 
    
         
            +
                title
         
     | 
| 
      
 11 
     | 
    
         
            +
                location
         
     | 
| 
      
 12 
     | 
    
         
            +
                number_of_connections
         
     | 
| 
      
 13 
     | 
    
         
            +
                country
         
     | 
| 
      
 14 
     | 
    
         
            +
                industry
         
     | 
| 
      
 15 
     | 
    
         
            +
                summary
         
     | 
| 
      
 16 
     | 
    
         
            +
                picture
         
     | 
| 
      
 17 
     | 
    
         
            +
                projects
         
     | 
| 
      
 18 
     | 
    
         
            +
                linkedin_url
         
     | 
| 
      
 19 
     | 
    
         
            +
                education
         
     | 
| 
      
 20 
     | 
    
         
            +
                groups
         
     | 
| 
      
 21 
     | 
    
         
            +
                websites
         
     | 
| 
      
 22 
     | 
    
         
            +
                languages
         
     | 
| 
      
 23 
     | 
    
         
            +
                skills
         
     | 
| 
      
 24 
     | 
    
         
            +
                certifications
         
     | 
| 
      
 25 
     | 
    
         
            +
                organizations
         
     | 
| 
      
 26 
     | 
    
         
            +
                past_companies
         
     | 
| 
      
 27 
     | 
    
         
            +
                current_companies
         
     | 
| 
      
 28 
     | 
    
         
            +
                recommended_visitors)
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
                attr_reader :page, :linkedin_url
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                def self.get_profile(url, options = {})
         
     | 
| 
      
 33 
     | 
    
         
            +
                  Linkedin::Profile.new(url, options)
         
     | 
| 
      
 34 
     | 
    
         
            +
                rescue => e
         
     | 
| 
      
 35 
     | 
    
         
            +
                  puts e
         
     | 
| 
      
 36 
     | 
    
         
            +
                end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                def initialize(url, options = {})
         
     | 
| 
      
 39 
     | 
    
         
            +
                  @linkedin_url = url
         
     | 
| 
      
 40 
     | 
    
         
            +
                  @options = options
         
     | 
| 
      
 41 
     | 
    
         
            +
                  @page = http_client.get(url)
         
     | 
| 
      
 42 
     | 
    
         
            +
                end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                def name
         
     | 
| 
      
 45 
     | 
    
         
            +
                  "#{first_name} #{last_name}"
         
     | 
| 
      
 46 
     | 
    
         
            +
                end
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                def first_name
         
     | 
| 
      
 49 
     | 
    
         
            +
                  @first_name ||= (@page.at(".full-name").text.split(" ", 2)[0].strip if @page.at(".full-name"))
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                def last_name
         
     | 
| 
      
 53 
     | 
    
         
            +
                  @last_name ||= (@page.at(".full-name").text.split(" ", 2)[1].strip if @page.at(".full-name"))
         
     | 
| 
      
 54 
     | 
    
         
            +
                end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                def title
         
     | 
| 
      
 57 
     | 
    
         
            +
                  @title ||= (@page.at(".title").text.gsub(/\s+/, " ").strip if @page.at(".title"))
         
     | 
| 
      
 58 
     | 
    
         
            +
                end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                def location
         
     | 
| 
      
 61 
     | 
    
         
            +
                  @location ||= (@page.at(".locality").text.split(",").first.strip if @page.at(".locality"))
         
     | 
| 
      
 62 
     | 
    
         
            +
                end
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                def number_of_connections
         
     | 
| 
      
 65 
     | 
    
         
            +
                  @connections ||= (@page.at(".member-connections").text.match(/[0-9]+[\+]{0,1}/)[0])
         
     | 
| 
      
 66 
     | 
    
         
            +
                end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                def country
         
     | 
| 
      
 69 
     | 
    
         
            +
                  @country ||= (@page.at(".locality").text.split(",").last.strip if @page.at(".locality"))
         
     | 
| 
      
 70 
     | 
    
         
            +
                end
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                def industry
         
     | 
| 
      
 73 
     | 
    
         
            +
                  @industry ||= (@page.at(".industry").text.gsub(/\s+/, " ").strip if @page.at(".industry"))
         
     | 
| 
      
 74 
     | 
    
         
            +
                end
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                def summary
         
     | 
| 
      
 77 
     | 
    
         
            +
                  @summary ||= (@page.at(".summary .description").text.gsub(/\s+/, " ").strip if @page.at(".summary .description"))
         
     | 
| 
      
 78 
     | 
    
         
            +
                end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                def picture
         
     | 
| 
      
 81 
     | 
    
         
            +
                  @picture ||= (@page.at(".profile-picture img").attributes["src"].value.strip if @page.at(".profile-picture img"))
         
     | 
| 
      
 82 
     | 
    
         
            +
                end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                def skills
         
     | 
| 
      
 85 
     | 
    
         
            +
                  @skills ||= (@page.search(".skill-pill .endorse-item-name-text").map { |skill| skill.text.strip if skill.text } rescue nil)
         
     | 
| 
      
 86 
     | 
    
         
            +
                end
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                def past_companies
         
     | 
| 
      
 89 
     | 
    
         
            +
                  @past_companies ||= get_companies("past")
         
     | 
| 
      
 90 
     | 
    
         
            +
                end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                def current_companies
         
     | 
| 
      
 93 
     | 
    
         
            +
                  @current_companies ||= get_companies("current")
         
     | 
| 
      
 94 
     | 
    
         
            +
                end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                def education
         
     | 
| 
      
 97 
     | 
    
         
            +
                  @education ||= @page.search(".background-education .education").map do |item|
         
     | 
| 
      
 98 
     | 
    
         
            +
                    name = item.at("h4").text.gsub(/\s+|\n/, " ").strip if item.at("h4")
         
     | 
| 
      
 99 
     | 
    
         
            +
                    desc = item.search("h5").last.text.gsub(/\s+|\n/, " ").strip if item.search("h5").last
         
     | 
| 
      
 100 
     | 
    
         
            +
                    degree = item.search("h5").last.at(".degree").text.gsub(/\s+|\n/, " ").strip.gsub(/,$/, "") if item.search("h5").last.at(".degree")
         
     | 
| 
      
 101 
     | 
    
         
            +
                    major = item.search("h5").last.at(".major").text.gsub(/\s+|\n/, " ").strip      if item.search("h5").last.at(".major")
         
     | 
| 
      
 102 
     | 
    
         
            +
                    period = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip if item.at(".education-date")
         
     | 
| 
      
 103 
     | 
    
         
            +
                    start_date, end_date = item.at(".education-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
         
     | 
| 
      
 104 
     | 
    
         
            +
                    {:name => name, :description => desc, :degree => degree, :major => major, :period => period, :start_date => start_date, :end_date => end_date }
         
     | 
| 
      
 105 
     | 
    
         
            +
                  end
         
     | 
| 
      
 106 
     | 
    
         
            +
                end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                def websites
         
     | 
| 
      
 109 
     | 
    
         
            +
                  @websites ||= @page.search("#overview-summary-websites").flat_map do |site|
         
     | 
| 
      
 110 
     | 
    
         
            +
                    url = "http://www.linkedin.com#{site.at("a")["href"]}"
         
     | 
| 
      
 111 
     | 
    
         
            +
                    CGI.parse(URI.parse(url).query)["url"]
         
     | 
| 
      
 112 
     | 
    
         
            +
                  end
         
     | 
| 
      
 113 
     | 
    
         
            +
                end
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                def groups
         
     | 
| 
      
 116 
     | 
    
         
            +
                  @groups ||= @page.search(".groups-name").map do |item|
         
     | 
| 
      
 117 
     | 
    
         
            +
                    name = item.text.gsub(/\s+|\n/, " ").strip
         
     | 
| 
      
 118 
     | 
    
         
            +
                    link = "http://www.linkedin.com#{item.at("a")["href"]}"
         
     | 
| 
      
 119 
     | 
    
         
            +
                    { :name => name, :link => link }
         
     | 
| 
      
 120 
     | 
    
         
            +
                  end
         
     | 
| 
      
 121 
     | 
    
         
            +
                end
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                def organizations
         
     | 
| 
      
 124 
     | 
    
         
            +
                  @organizations ||= @page.search("#background-organizations .section-item").map do |item|
         
     | 
| 
      
 125 
     | 
    
         
            +
                    name = item.at(".summary").text.gsub(/\s+|\n/, " ").strip rescue nil
         
     | 
| 
      
 126 
     | 
    
         
            +
                    start_date, end_date = item.at(".organizations-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
         
     | 
| 
      
 127 
     | 
    
         
            +
                    start_date = Date.parse(start_date) rescue nil
         
     | 
| 
      
 128 
     | 
    
         
            +
                    end_date = Date.parse(end_date)   rescue nil
         
     | 
| 
      
 129 
     | 
    
         
            +
                    { :name => name, :start_date => start_date, :end_date => end_date }
         
     | 
| 
      
 130 
     | 
    
         
            +
                  end
         
     | 
| 
      
 131 
     | 
    
         
            +
                end
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                def languages
         
     | 
| 
      
 134 
     | 
    
         
            +
                  @languages ||= @page.search(".background-languages #languages ol li").map do |item|
         
     | 
| 
      
 135 
     | 
    
         
            +
                    language = item.at("h4").text rescue nil
         
     | 
| 
      
 136 
     | 
    
         
            +
                    proficiency = item.at("div.languages-proficiency").text.gsub(/\s+|\n/, " ").strip rescue nil
         
     | 
| 
      
 137 
     | 
    
         
            +
                    { :language => language, :proficiency => proficiency }
         
     | 
| 
      
 138 
     | 
    
         
            +
                  end
         
     | 
| 
      
 139 
     | 
    
         
            +
                end
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
                def certifications
         
     | 
| 
      
 142 
     | 
    
         
            +
                  @certifications ||= @page.search("background-certifications").map do |item|
         
     | 
| 
      
 143 
     | 
    
         
            +
                    name       = item.at("h4").text.gsub(/\s+|\n/, " ").strip rescue nil
         
     | 
| 
      
 144 
     | 
    
         
            +
                    authority  = item.at("h5").text.gsub(/\s+|\n/, " ").strip rescue nil
         
     | 
| 
      
 145 
     | 
    
         
            +
                    license    = item.at(".specifics/.licence-number").text.gsub(/\s+|\n/, " ").strip rescue nil
         
     | 
| 
      
 146 
     | 
    
         
            +
                    start_date = item.at(".certification-date").text.gsub(/\s+|\n/, " ").strip rescue nil
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                    { :name => name, :authority => authority, :license => license, :start_date => start_date }
         
     | 
| 
      
 149 
     | 
    
         
            +
                  end
         
     | 
| 
      
 150 
     | 
    
         
            +
                end
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
             
     | 
| 
      
 153 
     | 
    
         
            +
                def recommended_visitors
         
     | 
| 
      
 154 
     | 
    
         
            +
                  @recommended_visitors ||= @page.search(".insights-browse-map/ul/li").map do |visitor|
         
     | 
| 
      
 155 
     | 
    
         
            +
                    v = {}
         
     | 
| 
      
 156 
     | 
    
         
            +
                    v[:link] = visitor.at("a")["href"]
         
     | 
| 
      
 157 
     | 
    
         
            +
                    v[:name] = visitor.at("h4/a").text
         
     | 
| 
      
 158 
     | 
    
         
            +
                    v[:title] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ").first
         
     | 
| 
      
 159 
     | 
    
         
            +
                    v[:company] = visitor.at(".browse-map-title").text.gsub("...", " ").split(" at ")[1]
         
     | 
| 
      
 160 
     | 
    
         
            +
                    v
         
     | 
| 
      
 161 
     | 
    
         
            +
                  end
         
     | 
| 
      
 162 
     | 
    
         
            +
                end
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
                def projects
         
     | 
| 
      
 165 
     | 
    
         
            +
                  @projects ||= @page.search(".background-projects/div").map do |project|
         
     | 
| 
      
 166 
     | 
    
         
            +
                    project = project.at("div")
         
     | 
| 
      
 167 
     | 
    
         
            +
             
     | 
| 
      
 168 
     | 
    
         
            +
                    p = {}
         
     | 
| 
      
 169 
     | 
    
         
            +
                    start_date, end_date = project.at(".projects-date").text.gsub(/\s+|\n/, " ").strip.split(" – ") rescue nil
         
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
                    p[:title] = project.at("hgroup/h4 span:first-of-type").text rescue nil
         
     | 
| 
      
 172 
     | 
    
         
            +
                    p[:link] =  project.at("hgroup/h4 a:first-of-type")['href'] rescue nil
         
     | 
| 
      
 173 
     | 
    
         
            +
                    p[:start_date] = parse_date(start_date) rescue nil
         
     | 
| 
      
 174 
     | 
    
         
            +
                    p[:end_date] = parse_date(end_date)  rescue nil
         
     | 
| 
      
 175 
     | 
    
         
            +
                    p[:description] = project.at(".description").text rescue nil
         
     | 
| 
      
 176 
     | 
    
         
            +
                    p[:associates] = project.at(".associated-list ul").children.map{ |c| c.at("a").text } rescue nil
         
     | 
| 
      
 177 
     | 
    
         
            +
                    p
         
     | 
| 
      
 178 
     | 
    
         
            +
                  end
         
     | 
| 
      
 179 
     | 
    
         
            +
                end
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
                def to_json
         
     | 
| 
      
 182 
     | 
    
         
            +
                  require "json"
         
     | 
| 
      
 183 
     | 
    
         
            +
                  ATTRIBUTES.reduce({}){ |hash,attr| hash[attr.to_sym] = self.send(attr.to_sym);hash }.to_json
         
     | 
| 
      
 184 
     | 
    
         
            +
                end
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
                private
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
                def get_companies(type)
         
     | 
| 
      
 189 
     | 
    
         
            +
                  companies = []
         
     | 
| 
      
 190 
     | 
    
         
            +
                  if @page.search(".background-experience .#{type}-position").first
         
     | 
| 
      
 191 
     | 
    
         
            +
                    @page.search(".background-experience .#{type}-position").each do |node|
         
     | 
| 
      
 192 
     | 
    
         
            +
             
     | 
| 
      
 193 
     | 
    
         
            +
                      company = {}
         
     | 
| 
      
 194 
     | 
    
         
            +
                      company[:title] = node.at("h4").text.gsub(/\s+|\n/, " ").strip if node.at("h4")
         
     | 
| 
      
 195 
     | 
    
         
            +
                      company[:company] = node.at("h4").next.text.gsub(/\s+|\n/, " ").strip if node.at("h4").next
         
     | 
| 
      
 196 
     | 
    
         
            +
                      company[:description] = node.at(".description").text.gsub(/\s+|\n/, " ").strip if node.at(".description")
         
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
                      start_date, end_date = node.at(".experience-date-locale").text.strip.split(" – ") rescue nil
         
     | 
| 
      
 199 
     | 
    
         
            +
                      company[:duration] = node.at(".experience-date-locale").text[/.*\((.*)\)/, 1]
         
     | 
| 
      
 200 
     | 
    
         
            +
                      company[:start_date] = parse_date(start_date) rescue nil
         
     | 
| 
      
 201 
     | 
    
         
            +
                      company[:end_date] = parse_date(end_date) rescue nil
         
     | 
| 
      
 202 
     | 
    
         
            +
             
     | 
| 
      
 203 
     | 
    
         
            +
                      company_link = node.at("h4").next.at("a")["href"] if node.at("h4").next.at("a")
         
     | 
| 
      
 204 
     | 
    
         
            +
             
     | 
| 
      
 205 
     | 
    
         
            +
                      result = get_company_details(company_link)
         
     | 
| 
      
 206 
     | 
    
         
            +
                      companies << company.merge!(result)
         
     | 
| 
      
 207 
     | 
    
         
            +
                    end
         
     | 
| 
      
 208 
     | 
    
         
            +
                  end
         
     | 
| 
      
 209 
     | 
    
         
            +
                  companies
         
     | 
| 
      
 210 
     | 
    
         
            +
                end
         
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
                def parse_date(date)
         
     | 
| 
      
 213 
     | 
    
         
            +
                  date = "#{date}-01-01" if date =~ /^(19|20)\d{2}$/
         
     | 
| 
      
 214 
     | 
    
         
            +
                  Date.parse(date)
         
     | 
| 
      
 215 
     | 
    
         
            +
                end
         
     | 
| 
      
 216 
     | 
    
         
            +
             
     | 
| 
      
 217 
     | 
    
         
            +
                def get_company_details(link)
         
     | 
| 
      
 218 
     | 
    
         
            +
                  result = { :linkedin_company_url => get_linkedin_company_url(link) }
         
     | 
| 
      
 219 
     | 
    
         
            +
                  page = http_client.get(result[:linkedin_company_url])
         
     | 
| 
      
 220 
     | 
    
         
            +
             
     | 
| 
      
 221 
     | 
    
         
            +
                  result[:url] = page.at(".basic-info-about/ul/li/p/a").text if page.at(".basic-info-about/ul/li/p/a")
         
     | 
| 
      
 222 
     | 
    
         
            +
                  node_2 = page.at(".basic-info-about/ul")
         
     | 
| 
      
 223 
     | 
    
         
            +
                  if node_2
         
     | 
| 
      
 224 
     | 
    
         
            +
                    node_2.search("p").zip(node_2.search("h4")).each do |value, title|
         
     | 
| 
      
 225 
     | 
    
         
            +
                      result[title.text.gsub(" ", "_").downcase.to_sym] = value.text.strip
         
     | 
| 
      
 226 
     | 
    
         
            +
                    end
         
     | 
| 
      
 227 
     | 
    
         
            +
                  end
         
     | 
| 
      
 228 
     | 
    
         
            +
                  result[:address] = page.at(".vcard.hq").at(".adr").text.gsub("\n", " ").strip if page.at(".vcard.hq")
         
     | 
| 
      
 229 
     | 
    
         
            +
                  result
         
     | 
| 
      
 230 
     | 
    
         
            +
                end
         
     | 
| 
      
 231 
     | 
    
         
            +
             
     | 
| 
      
 232 
     | 
    
         
            +
                def http_client
         
     | 
| 
      
 233 
     | 
    
         
            +
                  Mechanize.new do |agent|
         
     | 
| 
      
 234 
     | 
    
         
            +
                    agent.user_agent_alias = USER_AGENTS.sample
         
     | 
| 
      
 235 
     | 
    
         
            +
                    unless @options.empty?
         
     | 
| 
      
 236 
     | 
    
         
            +
                      agent.set_proxy(@options[:proxy_ip], @options[:proxy_port])
         
     | 
| 
      
 237 
     | 
    
         
            +
                    end
         
     | 
| 
      
 238 
     | 
    
         
            +
                    agent.max_history = 0
         
     | 
| 
      
 239 
     | 
    
         
            +
                  end
         
     | 
| 
      
 240 
     | 
    
         
            +
                end
         
     | 
| 
      
 241 
     | 
    
         
            +
             
     | 
| 
      
 242 
     | 
    
         
            +
                def get_linkedin_company_url(link)
         
     | 
| 
      
 243 
     | 
    
         
            +
                  http = %r{http://www.linkedin.com/}
         
     | 
| 
      
 244 
     | 
    
         
            +
                  https = %r{https://www.linkedin.com/}
         
     | 
| 
      
 245 
     | 
    
         
            +
                  if http.match(link) || https.match(link)
         
     | 
| 
      
 246 
     | 
    
         
            +
                    link
         
     | 
| 
      
 247 
     | 
    
         
            +
                  else
         
     | 
| 
      
 248 
     | 
    
         
            +
                    "http://www.linkedin.com/#{link}"
         
     | 
| 
      
 249 
     | 
    
         
            +
                  end
         
     | 
| 
      
 250 
     | 
    
         
            +
                end
         
     | 
| 
      
 251 
     | 
    
         
            +
              end
         
     | 
| 
      
 252 
     | 
    
         
            +
            end
         
     |