linkedin-scraper 0.0.9 → 0.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/linkedin-scraper/profile.rb +59 -44
 - data/lib/linkedin-scraper/version.rb +1 -1
 - metadata +3 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: dbde57b3c40b5f330ed4ab346f42cad639de8d3e
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 464882b2139ff63b164568c104ea47c76ff8b10f
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: fd72bef448e5f91167de5902d91f99874e3153e3e3d0750a0708c6bf4b2fd26995ed3f69c406b5161b3391542d2b0fe71515b70c27bad5dd6edec9933213b92c
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 09292e4bf18775fb423fd50666931c7bab4ca6033cca8147b308c0be7ac97c62351c0d52f60ef337efcbe088d255359e42100db01f47f670240bfff86eea971f
         
     | 
| 
         @@ -47,13 +47,13 @@ module Linkedin 
     | 
|
| 
       47 
47 
     | 
    
         
             
                    @agent.max_history = 0
         
     | 
| 
       48 
48 
     | 
    
         
             
                    page = @agent.get(url)
         
     | 
| 
       49 
49 
     | 
    
         
             
                    return Linkedin::Profile.new(page, url)
         
     | 
| 
       50 
     | 
    
         
            -
                  rescue=>e
         
     | 
| 
      
 50 
     | 
    
         
            +
                  rescue => e
         
     | 
| 
       51 
51 
     | 
    
         
             
                    puts e
         
     | 
| 
       52 
52 
     | 
    
         
             
                  end
         
     | 
| 
       53 
53 
     | 
    
         
             
                end
         
     | 
| 
       54 
54 
     | 
    
         | 
| 
       55 
55 
     | 
    
         
             
                def get_skills(page)
         
     | 
| 
       56 
     | 
    
         
            -
                  page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text}
         
     | 
| 
      
 56 
     | 
    
         
            +
                  page.search('.competency.show-bean').map{|skill|skill.text.strip if skill.text} rescue nil
         
     | 
| 
       57 
57 
     | 
    
         
             
                end
         
     | 
| 
       58 
58 
     | 
    
         | 
| 
       59 
59 
     | 
    
         
             
                def get_company_url(node)
         
     | 
| 
         @@ -188,25 +188,28 @@ module Linkedin 
     | 
|
| 
       188 
188 
     | 
    
         
             
                  organizations = []
         
     | 
| 
       189 
189 
     | 
    
         
             
                  # if the profile contains org data
         
     | 
| 
       190 
190 
     | 
    
         
             
                  if page.search('ul.organizations li.organization').first
         
     | 
| 
       191 
     | 
    
         
            -
             
     | 
| 
       192 
191 
     | 
    
         
             
                    # loop over each element with org data
         
     | 
| 
       193 
192 
     | 
    
         
             
                    page.search('ul.organizations li.organization').each do |item|
         
     | 
| 
       194 
     | 
    
         
            -
                      # find the h3 element within the above section and get the text with excess white space stripped
         
     | 
| 
       195 
     | 
    
         
            -
                      name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
         
     | 
| 
       196 
     | 
    
         
            -
                      position = nil # add this later
         
     | 
| 
       197 
     | 
    
         
            -
                      occupation = nil # add this latetr too, this relates to the experience/work
         
     | 
| 
       198 
     | 
    
         
            -
                      start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
         
     | 
| 
       199 
     | 
    
         
            -
                      if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
         
     | 
| 
       200 
     | 
    
         
            -
                        end_date = nil
         
     | 
| 
       201 
     | 
    
         
            -
                      else
         
     | 
| 
       202 
     | 
    
         
            -
                        Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
         
     | 
| 
       203 
     | 
    
         
            -
                      end
         
     | 
| 
       204 
193 
     | 
    
         | 
| 
       205 
     | 
    
         
            -
                       
     | 
| 
       206 
     | 
    
         
            -
             
     | 
| 
      
 194 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 195 
     | 
    
         
            +
                        # find the h3 element within the above section and get the text with excess white space stripped
         
     | 
| 
      
 196 
     | 
    
         
            +
                        name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
         
     | 
| 
      
 197 
     | 
    
         
            +
                        position = nil # add this later
         
     | 
| 
      
 198 
     | 
    
         
            +
                        occupation = nil # add this latetr too, this relates to the experience/work
         
     | 
| 
      
 199 
     | 
    
         
            +
                        start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
         
     | 
| 
      
 200 
     | 
    
         
            +
                        if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
         
     | 
| 
      
 201 
     | 
    
         
            +
                          end_date = nil
         
     | 
| 
      
 202 
     | 
    
         
            +
                        else
         
     | 
| 
      
 203 
     | 
    
         
            +
                          Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
         
     | 
| 
      
 204 
     | 
    
         
            +
                        end
         
     | 
| 
      
 205 
     | 
    
         
            +
             
     | 
| 
      
 206 
     | 
    
         
            +
                        organizations << { name: name, start_date: start_date, end_date: end_date }
         
     | 
| 
      
 207 
     | 
    
         
            +
                      rescue => e
         
     | 
| 
       207 
208 
     | 
    
         | 
| 
      
 209 
     | 
    
         
            +
                      end
         
     | 
| 
      
 210 
     | 
    
         
            +
                    end
         
     | 
| 
       208 
211 
     | 
    
         
             
                    return organizations
         
     | 
| 
       209 
     | 
    
         
            -
                  end 
     | 
| 
      
 212 
     | 
    
         
            +
                  end
         
     | 
| 
       210 
213 
     | 
    
         
             
                end
         
     | 
| 
       211 
214 
     | 
    
         | 
| 
       212 
215 
     | 
    
         
             
                def get_languages(page)
         
     | 
| 
         @@ -216,10 +219,13 @@ module Linkedin 
     | 
|
| 
       216 
219 
     | 
    
         | 
| 
       217 
220 
     | 
    
         
             
                    # loop over each element with org data
         
     | 
| 
       218 
221 
     | 
    
         
             
                    page.search('ul.languages li.language').each do |item|
         
     | 
| 
       219 
     | 
    
         
            -
                       
     | 
| 
       220 
     | 
    
         
            -
             
     | 
| 
       221 
     | 
    
         
            -
             
     | 
| 
       222 
     | 
    
         
            -
             
     | 
| 
      
 222 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 223 
     | 
    
         
            +
                        # find the h3 element within the above section and get the text with excess white space stripped
         
     | 
| 
      
 224 
     | 
    
         
            +
                        language = item.at('h3').text
         
     | 
| 
      
 225 
     | 
    
         
            +
                        proficiency = item.at('span.proficiency').text.gsub(/\s+|\n/, " ").strip
         
     | 
| 
      
 226 
     | 
    
         
            +
                        languages << { language:language, proficiency:proficiency }
         
     | 
| 
      
 227 
     | 
    
         
            +
                      rescue => e
         
     | 
| 
      
 228 
     | 
    
         
            +
                      end
         
     | 
| 
       223 
229 
     | 
    
         
             
                    end
         
     | 
| 
       224 
230 
     | 
    
         | 
| 
       225 
231 
     | 
    
         
             
                    return languages
         
     | 
| 
         @@ -228,6 +234,7 @@ module Linkedin 
     | 
|
| 
       228 
234 
     | 
    
         | 
| 
       229 
235 
     | 
    
         
             
                def get_certifications(page)
         
     | 
| 
       230 
236 
     | 
    
         
             
                  certifications = []
         
     | 
| 
      
 237 
     | 
    
         
            +
             
     | 
| 
       231 
238 
     | 
    
         
             
                  # search string to use with Nokogiri
         
     | 
| 
       232 
239 
     | 
    
         
             
                  query = 'ul.certifications li.certification'
         
     | 
| 
       233 
240 
     | 
    
         
             
                  months = 'January|February|March|April|May|June|July|August|September|November|December'
         
     | 
| 
         @@ -238,19 +245,23 @@ module Linkedin 
     | 
|
| 
       238 
245 
     | 
    
         | 
| 
       239 
246 
     | 
    
         
             
                    # loop over each element with cert data
         
     | 
| 
       240 
247 
     | 
    
         
             
                    page.search(query).each do |item|
         
     | 
| 
       241 
     | 
    
         
            -
                       
     | 
| 
       242 
     | 
    
         
            -
             
     | 
| 
       243 
     | 
    
         
            -
             
     | 
| 
       244 
     | 
    
         
            -
             
     | 
| 
       245 
     | 
    
         
            -
             
     | 
| 
       246 
     | 
    
         
            -
             
     | 
| 
       247 
     | 
    
         
            -
             
     | 
| 
       248 
     | 
    
         
            -
             
     | 
| 
       249 
     | 
    
         
            -
             
     | 
| 
       250 
     | 
    
         
            -
             
     | 
| 
      
 248 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 249 
     | 
    
         
            +
                        item_text = item.text.gsub(/\s+|\n/, " ").strip
         
     | 
| 
      
 250 
     | 
    
         
            +
                        name = item_text.split(" #{item_text.scan(/#{months} \d{4}/)[0]}")[0]
         
     | 
| 
      
 251 
     | 
    
         
            +
                        authority = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
         
     | 
| 
      
 252 
     | 
    
         
            +
                        license = nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
         
     | 
| 
      
 253 
     | 
    
         
            +
                        start_date = Date.parse(item_text.scan(regex)[0].join(' '))
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
                        includes_end_date = item_text.scan(regex).count > 1
         
     | 
| 
      
 256 
     | 
    
         
            +
                        end_date = includes_end_date ? Date.parse(item_text.scan(regex)[0].join(' ')) : nil # we need a profile with an example of this and probably will need to use the API to accuratetly get this data
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
                        certifications << { name:name, authority:authority, license:license, start_date:start_date, end_date:end_date }
         
     | 
| 
      
 259 
     | 
    
         
            +
                      rescue => e
         
     | 
| 
      
 260 
     | 
    
         
            +
                      end
         
     | 
| 
       251 
261 
     | 
    
         
             
                    end
         
     | 
| 
       252 
262 
     | 
    
         
             
                    return certifications
         
     | 
| 
       253 
263 
     | 
    
         
             
                  end
         
     | 
| 
      
 264 
     | 
    
         
            +
             
     | 
| 
       254 
265 
     | 
    
         
             
                end
         
     | 
| 
       255 
266 
     | 
    
         | 
| 
       256 
267 
     | 
    
         | 
| 
         @@ -261,26 +272,29 @@ module Linkedin 
     | 
|
| 
       261 
272 
     | 
    
         | 
| 
       262 
273 
     | 
    
         
             
                    # loop over each element with org data
         
     | 
| 
       263 
274 
     | 
    
         
             
                    page.search('ul.organizations li.organization').each do |item|
         
     | 
| 
       264 
     | 
    
         
            -
                       
     | 
| 
       265 
     | 
    
         
            -
             
     | 
| 
       266 
     | 
    
         
            -
             
     | 
| 
       267 
     | 
    
         
            -
             
     | 
| 
       268 
     | 
    
         
            -
             
     | 
| 
       269 
     | 
    
         
            -
             
     | 
| 
       270 
     | 
    
         
            -
                         
     | 
| 
       271 
     | 
    
         
            -
             
     | 
| 
       272 
     | 
    
         
            -
                         
     | 
| 
      
 275 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 276 
     | 
    
         
            +
                        # find the h3 element within the above section and get the text with excess white space stripped
         
     | 
| 
      
 277 
     | 
    
         
            +
                        name = item.search('h3').text.gsub(/\s+|\n/, " ").strip
         
     | 
| 
      
 278 
     | 
    
         
            +
                        position = nil # add this later
         
     | 
| 
      
 279 
     | 
    
         
            +
                        occupation = nil # add this latetr too, this relates to the experience/work
         
     | 
| 
      
 280 
     | 
    
         
            +
                        start_date = Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').first)
         
     | 
| 
      
 281 
     | 
    
         
            +
                        if item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last == 'Present'
         
     | 
| 
      
 282 
     | 
    
         
            +
                          end_date = nil
         
     | 
| 
      
 283 
     | 
    
         
            +
                        else
         
     | 
| 
      
 284 
     | 
    
         
            +
                          Date.parse(item.search('ul.specifics li').text.gsub(/\s+|\n/, " ").strip.split(' to ').last)
         
     | 
| 
      
 285 
     | 
    
         
            +
                        end
         
     | 
| 
      
 286 
     | 
    
         
            +
             
     | 
| 
      
 287 
     | 
    
         
            +
                        organizations << { name: name, start_date: start_date, end_date: end_date }
         
     | 
| 
      
 288 
     | 
    
         
            +
                      rescue => e
         
     | 
| 
       273 
289 
     | 
    
         
             
                      end
         
     | 
| 
       274 
     | 
    
         
            -
             
     | 
| 
       275 
     | 
    
         
            -
                      organizations << { name: name, start_date: start_date, end_date: end_date }
         
     | 
| 
       276 
290 
     | 
    
         
             
                    end
         
     | 
| 
       277 
     | 
    
         
            -
             
     | 
| 
       278 
     | 
    
         
            -
             
     | 
| 
       279 
     | 
    
         
            -
                  end # page.search('ul.organizations li.organization').first
         
     | 
| 
      
 291 
     | 
    
         
            +
                  end
         
     | 
| 
      
 292 
     | 
    
         
            +
                  return organizations
         
     | 
| 
       280 
293 
     | 
    
         
             
                end
         
     | 
| 
       281 
294 
     | 
    
         | 
| 
       282 
295 
     | 
    
         | 
| 
       283 
296 
     | 
    
         | 
| 
      
 297 
     | 
    
         
            +
             
     | 
| 
       284 
298 
     | 
    
         
             
                def get_recommended_visitors(page)
         
     | 
| 
       285 
299 
     | 
    
         
             
                  recommended_vs=[]
         
     | 
| 
       286 
300 
     | 
    
         
             
                  if page.search(".browsemap").first
         
     | 
| 
         @@ -295,5 +309,6 @@ module Linkedin 
     | 
|
| 
       295 
309 
     | 
    
         
             
                    return recommended_vs
         
     | 
| 
       296 
310 
     | 
    
         
             
                  end
         
     | 
| 
       297 
311 
     | 
    
         
             
                end
         
     | 
| 
      
 312 
     | 
    
         
            +
             
     | 
| 
       298 
313 
     | 
    
         
             
              end
         
     | 
| 
       299 
314 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: linkedin-scraper
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.10
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Yatish Mehta
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2013- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2013-06-18 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: mechanize
         
     | 
| 
         @@ -61,7 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       61 
61 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       62 
62 
     | 
    
         
             
            requirements: []
         
     | 
| 
       63 
63 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       64 
     | 
    
         
            -
            rubygems_version: 2.0. 
     | 
| 
      
 64 
     | 
    
         
            +
            rubygems_version: 2.0.3
         
     | 
| 
       65 
65 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       66 
66 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       67 
67 
     | 
    
         
             
            summary: when a url of  public linkedin profile page is given it scrapes the entire
         
     |