RubyGems - linkedinparser - Versions diffs - 0.0.7 → 0.0.8 - Mend

linkedinparser 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3bfb352fd7f2c469a2d3858295f0a563e5ba3593
-  data.tar.gz: da6090b7f7f54e641fcdd484f15bacb4da30d106
+  metadata.gz: 1bc27e7c376c9641f15b6dd2454318d2082fed25
+  data.tar.gz: 77872a929f8844da3afc4c8bea62a1a8ddec729f
 SHA512:
-  metadata.gz: 5441c537eb7e899a7bf3c514336bb027d0b5f3afafe35cefc912dc86710fe628b125dfe776620df17887192663183a7b0c40d3005d35ca39452e1b806f640706
-  data.tar.gz: 5bdff258b614db3b4d9b4e0a80ccde0accb537666deef3589179e0f3896aa9d4480011db81de87c2dc5f44f8df757855c8d753c1f29c2cd09ca6e45e76cca8e4
+  metadata.gz: 7c64448f45ef78f600251bdf43f56e4cda01d35528f7d0877d46b78cf1061a17407742d3d99d0732456d43f50505efcd8d9473873ac290cec04a7c8e5127a978
+  data.tar.gz: cb5649581361d23fa8357d04c336f45801eb8259b067c65ffc927d45ad8d6b2386dabb65751093178fff9fc650611690b497fd07de22d527fe3c9b6a24c1f966

data/lib/causes.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# coding: utf-8
+load 'utilities.rb'
+class Causes
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_causes
+  end
+  # Get list of causes
+  def get_causes
+    return @cause_hash
+  end
+  def parse_causes
+    volunteering = @html.css("#volunteering")
+    if !is_empty?(volunteering)
+      @cause_hash = Hash.new
+      @cause_hash[:volunteer_opportunities] = volunteer_opportunities(volunteering)
+      @cause_hash[:supported_causes] = supported_causes(volunteering)
+      @cause_hash[:supported_organizations] = supported_organizations(volunteering)
+    end
+  end
+  # Get opportunities they are looking for
+  def volunteer_opportunities(volunteering)
+    section = volunteering.css(".opportunities").css("li")
+    return make_list(section) if !is_empty?(section)
+  end
+  # Get causes they support
+  def supported_causes(volunteering)
+    section = get_right_section("Causes", volunteering.css(".extra-section"))
+    return make_list(section.css("li")) if !is_empty?(section)
+  end
+  # Get organizations they support
+  def supported_organizations(volunteering)
+    section = get_right_section("Organizations", volunteering.css(".extra-section"))
+    return make_list(section.css("li")) if !is_empty?(section)
+  end
+  def get_right_section(look_for, sections)
+    sections.each do |section|
+      return section if section.css("h4").text.include?(look_for)
+    end
+    return nil
+  end
+end

data/lib/certifications.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# coding: utf-8
+load 'utilities.rb'
+class Certifications
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_certifications
+  end
+  # Get list of certifications
+  def get_certifications
+    return @certificate_list
+  end
+  def parse_certifications
+    certifications = @html.css(".certifications").css("li")
+    @certificate_list = Array.new
+    certifications.each do |certificate|
+      @certificate_list.push({
+                               certificate_name: certificate_name(certificate),
+                               certificate_authority: certificate_authority(certificate),
+                               license_num: license_num(certificate),
+                               certificate_start: certificate_start(certificate),
+                               certificate_end: certificate_end(certificate)
+                             })
+    end
+  end
+  # Name of certification
+  def certificate_name(certificate)
+    cert_name = certificate.css("h4")
+    return cert_name.text if !is_empty?(cert_name)
+  end
+  # Issuing authority
+  def certificate_authority(certificate)
+    cert_auth = certificate.css("h5")
+    return cert_auth.text.split(", ")[0] if !is_empty?(cert_auth)
+  end
+  # License Number
+  def license_num(certificate)
+    cert_num = certificate.css("h5")
+    return cert_num.text.split(", ")[1] if !is_empty?(cert_num)
+  end
+  # Start date for certificate
+  def certificate_start(certificate)
+    cert_start = certificate.css(".date-range").css("time")
+    return cert_start[0].text if !is_empty?(cert_start[0])
+  end
+  # Expiry date for certificate
+  def certificate_end(certificate)
+    cert_end = certificate.css(".date-range").css("time")
+    return cert_end[1].text if !is_empty?(cert_end[1])
+  end
+end

data/lib/education.rb ADDED Viewed

@@ -0,0 +1,58 @@
+# coding: utf-8
+load 'utilities.rb'
+class Education
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_education
+  end
+  # Get list of jobs
+  def get_education
+    return @degree_list
+  end
+  def parse_education
+    schools = @html.css(".schools").css(".school")
+    @degree_list = Array.new
+    schools.each do |school|
+      @degree_list.push({
+                          school_name: school_name(school),
+                          education_desc: education_desc(school),
+                          education_degree: education_degree(school),
+                          degree_start_date: degree_start_date(school),
+                          degree_end_date: degree_end_date(school)
+                        })
+    end
+  end
+  # Get the name of the school
+  def school_name(school)
+    return school.css("h4").text
+  end
+  # Get the description
+  def education_desc(school)
+    return school.css(".description").text
+  end
+  # Get the degree info
+  def education_degree(school)
+    return school.css("h5").text
+  end
+  # Get the start date for the degree
+  def degree_start_date(school)
+    start_date = school.css(".date-range").css("time")
+    return start_date[0].text if !is_empty?(start_date[0])
+  end
+  # Get the end date for the degree
+  def degree_end_date(school)
+    end_date = school.css(".date-range").css("time")
+    return end_date[1].text if !is_empty?(end_date[1])
+  end
+end

data/lib/groups.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# coding: utf-8
+load 'utilities.rb'
+class Groups
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_groups
+  end
+  # Get list of groups
+  def get_groups
+    return @group_list
+  end
+  def parse_groups
+    groups = @html.css('#groups').css('.group').css('.item-title')
+    @group_list = Array.new
+    groups.each do |group|
+      @group_list.push({
+                         group_name: group_name(group),
+                         group_link: group_link(group)
+                       })
+    end
+  end
+  # Get group name
+  def group_name(group)
+    return group.text
+  end
+  # Get group link
+  def group_link(group)
+    return group.css("a")[0]["href"]
+  end
+end

data/lib/languages.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# coding: utf-8
+load 'utilities.rb'
+class Languages
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_languages
+  end
+  # Get list of langauges
+  def get_languages
+    return @language_list
+  end
+  def parse_languages
+    languages = @html.css("#languages").css("li")
+    @language_list = Array.new
+    languages.each do |l|
+      @language_list.push({
+                            language: language(l),
+                            proficiency: proficiency(l)
+                          })
+    end
+  end
+  # Language name
+  def language(language_name)
+    language_name.css("h4").text
+  end
+  # Get proficiency
+  def proficiency(language_name)
+    language_name.css(".proficiency").text
+  end
+end

data/lib/linkedinparser.rb CHANGED Viewed

@@ -14,12 +14,25 @@ class LinkedinParser
   def parse
     # Get details about the person
-    p = PersonalInfo.new(@profile, @profile_url)
-    @personal_info = p.get_personal_info
+    begin
+      p = PersonalInfo.new(@profile, @profile_url)
+      @personal_info = p.get_personal_info
+      @personal_info.merge!({parsing_failed: false})
+    rescue # Handle failed parsing
+      @personal_info = {
+        profile_url: @profile_url,
+        full_html: @profile,
+        parsing_failed: true
+      }
+    end
     # Get job info
-    j = Jobs.new(@profile)
-    @job_info = j.get_jobs
+    begin
+      j = Jobs.new(@profile)
+      @job_info = j.get_jobs
+    rescue # Handle failed job parsing
+      @job_info = {job_parsing_failed: true}
+    end
   end
   # Return results with new item for each job
@@ -38,27 +51,5 @@ class LinkedinParser
     output[:jobs] = @job_info
     output.merge!(@crawler_fields)
     JSON.pretty_generate(output)
-  end
-  # TODO: Fields to add to parser-
-  # Organizations
-  # Education
-  # Projects
-  # Related people
-  # Languages
-  # Certifications
-  # Groups
+  end
 end
-# Test:
-#profile = Selenium::WebDriver::Firefox::Profile.new
-#profile['intl.accept_languages'] = 'en'
-#profile["javascript.enabled"] = false
-#driver = Selenium::WebDriver.for :firefox, profile: profile
-#url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
-#url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
-#url = "https://www.linkedin.com/pub/kenneth-chamberlin/32/8bb/b22"
-#driver.navigate.to url
-#l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
-#puts l.results_by_job

data/lib/personal_info.rb CHANGED Viewed

@@ -1,5 +1,11 @@
 load 'picture.rb'
 load 'utilities.rb'
+load 'education.rb'
+load 'groups.rb'
+load 'languages.rb'
+load 'related_people.rb'
+load 'certifications.rb'
+load 'causes.rb'
 class PersonalInfo
   include Utilities
@@ -23,6 +29,12 @@ class PersonalInfo
       summary: summary,
       current_title: title,
       interests: interests,
+      education: education,
+      groups: groups,
+      causes: causes,
+      certifications: certifications,
+      languages: languages,
+      related_people: related_people,
       number_of_connections: number_of_connections,
       picture: p.picture,
       pic_path: p.pic_path,
@@ -36,7 +48,9 @@ class PersonalInfo
   # Get the full name of the person
   def full_name
-    @html.css(".profile-overview").css('h1').text
+    name = @html.css(".profile-overview").css('h1')
+    name = @html.css(".profile-overview-content").css('h1') if is_empty?(name)
+    return name.text
   end
   # Get first part of name
@@ -49,6 +63,42 @@ class PersonalInfo
     full_name.split(" ", 2).last.strip
   end
+  # Get education info
+  def education
+    e = Education.new(@html)
+    return e.get_education
+  end
+  # Get a list of groups they are in
+  def groups
+    g = Groups.new(@html)
+    return g.get_groups
+  end
+  # Get causes they care about
+  def causes
+    c = Causes.new(@html)
+    return c.get_causes
+  end
+  # Get the person's certifications
+  def certifications
+    c = Certifications.new(@html)
+    return c.get_certifications
+  end
+  # Get a list of languages they speak
+  def languages
+    l = Languages.new(@html)
+    return l.get_languages
+  end
+  # Get the people also viewed list from the side
+  def related_people
+    r = RelatedPeople.new(@html)
+    return r.get_related
+  end
   # Get list of skills
   def skills
     skill_list = Array.new
@@ -71,12 +121,12 @@ class PersonalInfo
   # Get town
   def location
-    full_location.split(",").first.strip
+    full_location.split(",").first.strip if !full_location.empty?
   end
   # Get country/state
   def area
-    full_location.split(",").last.strip
+    full_location.split(",").last.strip if !full_location.empty?
   end
   # Get the industry the person works in (2 different formats)
@@ -90,7 +140,7 @@ class PersonalInfo
   def summary
     summary = @html.css('#summary').css('.description')
     summary = @html.css('.summary').first if is_empty?(summary)
-    return summary.text
+    return summary.text if summary
   end
   # Get the overall/current title

data/lib/picture.rb CHANGED Viewed

@@ -5,7 +5,8 @@ class Picture
   # Get path to the picture url
   def picture
-    @html.css('.profile-picture').css('img').first['src']
+    pic = @html.css('.profile-picture').css('img').first
+    return pic['src'] if pic
   end
   # Download picture

data/lib/related_people.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# coding: utf-8
+load 'utilities.rb'
+class RelatedPeople
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_related
+  end
+  # Get list of groups
+  def get_related
+    return @related_people_list
+  end
+  def parse_related
+    related_people = @html.css(".insights").css(".browse-map").css(".profile-card")
+    @related_people_list = Array.new
+    related_people.each do |person|
+      @related_people_list.push({
+                                  related_name: related_name(person),
+                                  related_link: related_link(person),
+                                  related_person_company: related_person_company(person),
+                                  related_person_title: related_person_title(person)
+                                })
+    end
+  end
+  # Get name of related person
+  def related_name(person)
+    return person.css("h4").text
+  end
+  # Get link to related person's profile
+  def related_link(person)
+    return person.css("h4").css("a")[0]["href"]
+  end
+  # Get related person's company
+  def related_person_company(person)
+    return person.css(".headline").text.split(" at ")[1]
+  end
+  # Get title of related person
+  def related_person_title(person)
+    return person.css(".headline").text.split(" at ")[0]
+  end
+end

data/lib/utilities.rb CHANGED Viewed

@@ -3,4 +3,13 @@ module Utilities
   def is_empty?(item)
     item == nil || item.text.empty?
   end
+  # Make an array list of items
+  def make_list(elements)
+    listarr = Array.new
+    elements.each do |item|
+      listarr.push(item.text)
+    end
+    return listarr
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: linkedinparser
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.8
 platform: ruby
 authors:
 - M. C. McGrath
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-11-03 00:00:00.000000000 Z
+date: 2015-11-06 00:00:00.000000000 Z
 dependencies: []
 description: Parses public LinkedIn profiles
 email: shidash@shidash.com
@@ -16,10 +16,16 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- lib/causes.rb
+- lib/certifications.rb
+- lib/education.rb
+- lib/groups.rb
 - lib/jobs.rb
+- lib/languages.rb
 - lib/linkedinparser.rb
 - lib/personal_info.rb
 - lib/picture.rb
+- lib/related_people.rb
 - lib/utilities.rb
 homepage: https://github.com/TransparencyToolkit/linkedinparser
 licenses: