RubyGems - linkedinparser - Versions diffs - 0.0.7 → 0.0.8 - Mend

linkedinparser 0.0.7 → 0.0.8

Files changed (12) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3bfb352fd7f2c469a2d3858295f0a563e5ba3593
-  data.tar.gz: da6090b7f7f54e641fcdd484f15bacb4da30d106
+  metadata.gz: 1bc27e7c376c9641f15b6dd2454318d2082fed25
+  data.tar.gz: 77872a929f8844da3afc4c8bea62a1a8ddec729f
 SHA512:
-  metadata.gz: 5441c537eb7e899a7bf3c514336bb027d0b5f3afafe35cefc912dc86710fe628b125dfe776620df17887192663183a7b0c40d3005d35ca39452e1b806f640706
-  data.tar.gz: 5bdff258b614db3b4d9b4e0a80ccde0accb537666deef3589179e0f3896aa9d4480011db81de87c2dc5f44f8df757855c8d753c1f29c2cd09ca6e45e76cca8e4
+  metadata.gz: 7c64448f45ef78f600251bdf43f56e4cda01d35528f7d0877d46b78cf1061a17407742d3d99d0732456d43f50505efcd8d9473873ac290cec04a7c8e5127a978
+  data.tar.gz: cb5649581361d23fa8357d04c336f45801eb8259b067c65ffc927d45ad8d6b2386dabb65751093178fff9fc650611690b497fd07de22d527fe3c9b6a24c1f966

data/lib/causes.rb ADDED Viewed

@@ -0,0 +1,50 @@
+# coding: utf-8
+load 'utilities.rb'
+class Causes
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_causes
+  end
+  # Get list of causes
+  def get_causes
+    return @cause_hash
+  end
+  def parse_causes
+    volunteering = @html.css("#volunteering")
+    if !is_empty?(volunteering)
+      @cause_hash = Hash.new
+      @cause_hash[:volunteer_opportunities] = volunteer_opportunities(volunteering)
+      @cause_hash[:supported_causes] = supported_causes(volunteering)
+      @cause_hash[:supported_organizations] = supported_organizations(volunteering)
+    end
+  end
+  # Get opportunities they are looking for
+  def volunteer_opportunities(volunteering)
+    section = volunteering.css(".opportunities").css("li")
+    return make_list(section) if !is_empty?(section)
+  end
+  # Get causes they support
+  def supported_causes(volunteering)
+    section = get_right_section("Causes", volunteering.css(".extra-section"))
+    return make_list(section.css("li")) if !is_empty?(section)
+  end
+  # Get organizations they support
+  def supported_organizations(volunteering)
+    section = get_right_section("Organizations", volunteering.css(".extra-section"))
+    return make_list(section.css("li")) if !is_empty?(section)
+  end
+  def get_right_section(look_for, sections)
+    sections.each do |section|
+      return section if section.css("h4").text.include?(look_for)
+    end
+    return nil
+  end
+end

data/lib/certifications.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# coding: utf-8
+load 'utilities.rb'
+class Certifications
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_certifications
+  end
+  # Get list of certifications
+  def get_certifications
+    return @certificate_list
+  end
+  def parse_certifications
+    certifications = @html.css(".certifications").css("li")
+    @certificate_list = Array.new
+    certifications.each do |certificate|
+      @certificate_list.push({
+                               certificate_name: certificate_name(certificate),
+                               certificate_authority: certificate_authority(certificate),
+                               license_num: license_num(certificate),
+                               certificate_start: certificate_start(certificate),
+                               certificate_end: certificate_end(certificate)
+                             })
+    end
+  end
+  # Name of certification
+  def certificate_name(certificate)
+    cert_name = certificate.css("h4")
+    return cert_name.text if !is_empty?(cert_name)
+  end
+  # Issuing authority
+  def certificate_authority(certificate)
+    cert_auth = certificate.css("h5")
+    return cert_auth.text.split(", ")[0] if !is_empty?(cert_auth)
+  end
+  # License Number
+  def license_num(certificate)
+    cert_num = certificate.css("h5")
+    return cert_num.text.split(", ")[1] if !is_empty?(cert_num)
+  end
+  # Start date for certificate
+  def certificate_start(certificate)
+    cert_start = certificate.css(".date-range").css("time")
+    return cert_start[0].text if !is_empty?(cert_start[0])
+  end
+  # Expiry date for certificate
+  def certificate_end(certificate)
+    cert_end = certificate.css(".date-range").css("time")
+    return cert_end[1].text if !is_empty?(cert_end[1])
+  end
+end

data/lib/education.rb ADDED Viewed

@@ -0,0 +1,58 @@
+# coding: utf-8
+load 'utilities.rb'
+class Education
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_education
+  end
+  # Get list of jobs
+  def get_education
+    return @degree_list
+  end
+  def parse_education
+    schools = @html.css(".schools").css(".school")
+    @degree_list = Array.new
+    schools.each do |school|
+      @degree_list.push({
+                          school_name: school_name(school),
+                          education_desc: education_desc(school),
+                          education_degree: education_degree(school),
+                          degree_start_date: degree_start_date(school),
+                          degree_end_date: degree_end_date(school)
+                        })
+    end
+  end
+  # Get the name of the school
+  def school_name(school)
+    return school.css("h4").text
+  end
+  # Get the description
+  def education_desc(school)
+    return school.css(".description").text
+  end
+  # Get the degree info
+  def education_degree(school)
+    return school.css("h5").text
+  end
+  # Get the start date for the degree
+  def degree_start_date(school)
+    start_date = school.css(".date-range").css("time")
+    return start_date[0].text if !is_empty?(start_date[0])
+  end
+  # Get the end date for the degree
+  def degree_end_date(school)
+    end_date = school.css(".date-range").css("time")
+    return end_date[1].text if !is_empty?(end_date[1])
+  end
+end

data/lib/groups.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# coding: utf-8
+load 'utilities.rb'
+class Groups
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_groups
+  end
+  # Get list of groups
+  def get_groups
+    return @group_list
+  end
+  def parse_groups
+    groups = @html.css('#groups').css('.group').css('.item-title')
+    @group_list = Array.new
+    groups.each do |group|
+      @group_list.push({
+                         group_name: group_name(group),
+                         group_link: group_link(group)
+                       })
+    end
+  end
+  # Get group name
+  def group_name(group)
+    return group.text
+  end
+  # Get group link
+  def group_link(group)
+    return group.css("a")[0]["href"]
+  end
+end

data/lib/languages.rb ADDED Viewed

@@ -0,0 +1,37 @@
+# coding: utf-8
+load 'utilities.rb'
+class Languages
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_languages
+  end
+  # Get list of langauges
+  def get_languages
+    return @language_list
+  end
+  def parse_languages
+    languages = @html.css("#languages").css("li")
+    @language_list = Array.new
+    languages.each do |l|
+      @language_list.push({
+                            language: language(l),
+                            proficiency: proficiency(l)
+                          })
+    end
+  end
+  # Language name
+  def language(language_name)
+    language_name.css("h4").text
+  end
+  # Get proficiency
+  def proficiency(language_name)
+    language_name.css(".proficiency").text
+  end
+end

data/lib/linkedinparser.rb CHANGED Viewed

@@ -14,12 +14,25 @@ class LinkedinParser
   def parse
     # Get details about the person
-    p = PersonalInfo.new(@profile, @profile_url)
-    @personal_info = p.get_personal_info
+    begin
+      p = PersonalInfo.new(@profile, @profile_url)
+      @personal_info = p.get_personal_info
+      @personal_info.merge!({parsing_failed: false})
+    rescue # Handle failed parsing
+      @personal_info = {
+        profile_url: @profile_url,
+        full_html: @profile,
+        parsing_failed: true
+      }
+    end
     # Get job info
-    j = Jobs.new(@profile)
-    @job_info = j.get_jobs
+    begin
+      j = Jobs.new(@profile)
+      @job_info = j.get_jobs
+    rescue # Handle failed job parsing
+      @job_info = {job_parsing_failed: true}
+    end
   end
   # Return results with new item for each job
@@ -38,27 +51,5 @@ class LinkedinParser
     output[:jobs] = @job_info
     output.merge!(@crawler_fields)
     JSON.pretty_generate(output)
-  end
-  # TODO: Fields to add to parser-
-  # Organizations
-  # Education
-  # Projects
-  # Related people
-  # Languages
-  # Certifications
-  # Groups
+  end
 end
-# Test:
-#profile = Selenium::WebDriver::Firefox::Profile.new
-#profile['intl.accept_languages'] = 'en'
-#profile["javascript.enabled"] = false
-#driver = Selenium::WebDriver.for :firefox, profile: profile
-#url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
-#url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
-#url = "https://www.linkedin.com/pub/kenneth-chamberlin/32/8bb/b22"
-#driver.navigate.to url
-#l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
-#puts l.results_by_job

data/lib/personal_info.rb CHANGED Viewed

@@ -1,5 +1,11 @@
 load 'picture.rb'
 load 'utilities.rb'
+load 'education.rb'
+load 'groups.rb'
+load 'languages.rb'
+load 'related_people.rb'
+load 'certifications.rb'
+load 'causes.rb'
 class PersonalInfo
   include Utilities
@@ -23,6 +29,12 @@ class PersonalInfo
       summary: summary,
       current_title: title,
       interests: interests,
+      education: education,
+      groups: groups,
+      causes: causes,
+      certifications: certifications,
+      languages: languages,
+      related_people: related_people,
       number_of_connections: number_of_connections,
       picture: p.picture,
       pic_path: p.pic_path,
@@ -36,7 +48,9 @@ class PersonalInfo
   # Get the full name of the person
   def full_name
-    @html.css(".profile-overview").css('h1').text
+    name = @html.css(".profile-overview").css('h1')
+    name = @html.css(".profile-overview-content").css('h1') if is_empty?(name)
+    return name.text
   end
   # Get first part of name
@@ -49,6 +63,42 @@ class PersonalInfo
     full_name.split(" ", 2).last.strip
   end
+  # Get education info
+  def education
+    e = Education.new(@html)
+    return e.get_education
+  end
+  # Get a list of groups they are in
+  def groups
+    g = Groups.new(@html)
+    return g.get_groups
+  end
+  # Get causes they care about
+  def causes
+    c = Causes.new(@html)
+    return c.get_causes
+  end
+  # Get the person's certifications
+  def certifications
+    c = Certifications.new(@html)
+    return c.get_certifications
+  end
+  # Get a list of languages they speak
+  def languages
+    l = Languages.new(@html)
+    return l.get_languages
+  end
+  # Get the people also viewed list from the side
+  def related_people
+    r = RelatedPeople.new(@html)
+    return r.get_related
+  end
   # Get list of skills
   def skills
     skill_list = Array.new
@@ -71,12 +121,12 @@ class PersonalInfo
   # Get town
   def location
-    full_location.split(",").first.strip
+    full_location.split(",").first.strip if !full_location.empty?
   end
   # Get country/state
   def area
-    full_location.split(",").last.strip
+    full_location.split(",").last.strip if !full_location.empty?
   end
   # Get the industry the person works in (2 different formats)
@@ -90,7 +140,7 @@ class PersonalInfo
   def summary
     summary = @html.css('#summary').css('.description')
     summary = @html.css('.summary').first if is_empty?(summary)
-    return summary.text
+    return summary.text if summary
   end
   # Get the overall/current title

data/lib/picture.rb CHANGED Viewed

@@ -5,7 +5,8 @@ class Picture
   # Get path to the picture url
   def picture
-    @html.css('.profile-picture').css('img').first['src']
+    pic = @html.css('.profile-picture').css('img').first
+    return pic['src'] if pic
   end
   # Download picture

data/lib/related_people.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# coding: utf-8
+load 'utilities.rb'
+class RelatedPeople
+  include Utilities
+  def initialize(html)
+    @html = html
+    parse_related
+  end
+  # Get list of groups
+  def get_related
+    return @related_people_list
+  end
+  def parse_related
+    related_people = @html.css(".insights").css(".browse-map").css(".profile-card")
+    @related_people_list = Array.new
+    related_people.each do |person|
+      @related_people_list.push({
+                                  related_name: related_name(person),
+                                  related_link: related_link(person),
+                                  related_person_company: related_person_company(person),
+                                  related_person_title: related_person_title(person)
+                                })
+    end
+  end
+  # Get name of related person
+  def related_name(person)
+    return person.css("h4").text
+  end
+  # Get link to related person's profile
+  def related_link(person)
+    return person.css("h4").css("a")[0]["href"]
+  end
+  # Get related person's company
+  def related_person_company(person)
+    return person.css(".headline").text.split(" at ")[1]
+  end
+  # Get title of related person
+  def related_person_title(person)
+    return person.css(".headline").text.split(" at ")[0]
+  end
+end

data/lib/utilities.rb CHANGED Viewed

@@ -3,4 +3,13 @@ module Utilities
   def is_empty?(item)
     item == nil || item.text.empty?
   end
+  # Make an array list of items
+  def make_list(elements)
+    listarr = Array.new
+    elements.each do |item|
+      listarr.push(item.text)
+    end
+    return listarr
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: linkedinparser
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.8
 platform: ruby
 authors:
 - M. C. McGrath
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-11-03 00:00:00.000000000 Z
+date: 2015-11-06 00:00:00.000000000 Z
 dependencies: []
 description: Parses public LinkedIn profiles
 email: shidash@shidash.com
@@ -16,10 +16,16 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
+- lib/causes.rb
+- lib/certifications.rb
+- lib/education.rb
+- lib/groups.rb
 - lib/jobs.rb
+- lib/languages.rb
 - lib/linkedinparser.rb
 - lib/personal_info.rb
 - lib/picture.rb
+- lib/related_people.rb
 - lib/utilities.rb
 homepage: https://github.com/TransparencyToolkit/linkedinparser
 licenses: