RubyGems - linkedinparser - Versions diffs - 0.0.3 → 0.0.4 - Mend

linkedinparser 0.0.3 → 0.0.4

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 33dd5468e0b1c0ed881f443d224a247eda5b37d4
-  data.tar.gz: 6eb3e10f6954f0ce5d82a60ecd9fc3041d70f1db
+  metadata.gz: 55b734b221bd743748b734b022e649c57461fdc5
+  data.tar.gz: b22cab96393fd2b48b1e4ff7c623fe93f5a09c88
 SHA512:
-  metadata.gz: 609148d3105b8215854a96efd502b0dd1ca6662a54a8f8160b32eeb3037ca319abc80e4fdb8243119f0b15198ffa1eb99c795aed6aef95ebd86a7806a4d734d6
-  data.tar.gz: 4404b31657ce3b78ddaec10a43c005bf86fc1321e0c2cc68865dcd2b2ea1603e53459bc6f29fb6a0ba2b128a7bb01d751cdf913b6250c6b53e01aecbfe7c95fe
+  metadata.gz: 7715be94c914d84bd0226e6bf1a9df77bc0b3275134e648089e7f8e1a692995b7a56f96eb9d424d97e21b43c27af96c243155ba349ce6b99e41f90dca667acdb
+  data.tar.gz: 1e2aa43c38a5e83298689bd2d6be9e880c4371493d21a424aa0400f6b60e35a238d118f93d57a41be2f91c4f6f5deba92f2b497230f67ecf69b22ab81d97664c

data/lib/jobs.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# coding: utf-8
+load 'utilities.rb'
+class Jobs
+  include Utilities
+  def initialize(profile)
+    @html = Nokogiri::HTML(profile)
+    parse_jobs
+  end
+  # Get list of jobs
+  def get_jobs
+    return @positions_list
+  end
+  def parse_jobs
+    # Multiple html options
+    positions = @html.css('#experience').css('.position')
+    positions = @html.css('#background-experience').css('.current-position') +
+                @html.css('#background-experience').css('.past-position') if is_empty?(positions)
+    # Get lists of positions
+    @positions_list = Array.new
+    positions.each do |position|
+      @positions_list.push({
+                             title: title(position),
+                             company: company(position),
+                             description: description(position),
+                             start_date: start_date(position),
+                             end_date: end_date(position),
+                             work_location: work_location(position),
+                             current: current(position)})
+    end
+  end
+  # Check if it is a current position or not
+  def current(position)
+    if end_date(position) == "Present"
+      return "Yes"
+    else return "No"
+    end
+  end
+  # Get the job title
+  def title(position)
+    position.css('h4').text
+  end
+  # Get the company for the position
+  def company(position)
+    position.css('h5').text
+  end
+  # Get job description
+  def description(position)
+    position.css('.description').text
+  end
+  # Get dates
+  def get_dates(position)
+    dates = position.css('.meta').css('.date-range')
+    dates = position.css('.experience-date-locale') if is_empty?(dates)
+    return dates
+  end
+  # Get start date
+  def start_date(position)
+    start_date = get_dates(position).text.split(' – ')[0]
+    return date_parse(start_date)
+  end
+  # Get end date
+  def end_date(position)
+    end_date = get_dates(position).text.split(' – ').last.split("(").first.strip
+    if end_date == "Present"
+      return end_date
+    elsif end_date && !end_date.empty?
+      return Date.parse(end_date)
+    end
+  end
+  # Parse date
+  def date_parse(date)
+    date = date+"-01-01" if date =~ /^(19|20)\d{2}$/
+    Date.parse(date)
+  end
+  # Get location for work
+  def work_location(position)
+    position.css('.experience-date-locale').css('.locality').text
+  end
+end

data/lib/linkedinparser.rb ADDED Viewed

@@ -0,0 +1,63 @@
+require 'selenium-webdriver'
+require 'pry'
+require 'nokogiri'
+load 'personal_info.rb'
+load 'jobs.rb'
+class LinkedinParser
+  def initialize(profile, profile_url, crawler_fields)
+    @profile = profile
+    @profile_url = profile_url
+    @crawler_fields = crawler_fields
+    parse
+  end
+  def parse
+    # Get details about the person
+    p = PersonalInfo.new(@profile, @profile_url)
+    @personal_info = p.get_personal_info
+    # Get job info
+    j = Jobs.new(@profile)
+    @job_info = j.get_jobs
+  end
+  # Return results with new item for each job
+  def results_by_job
+    output = Array.new
+    @job_info.each do |job|
+      output.push(job.merge!(@personal_info).merge!(@crawler_fields))
+    end
+    JSON.pretty_generate(output)
+  end
+  # Return results in nested JSON
+  def results_by_person
+    output = @personal_info
+    output[:jobs] = @job_info
+    output.merge!(@crawler_fields)
+    JSON.pretty_generate(output)
+  end
+  # TODO: Fields to add to parser-
+  # Organizations
+  # Education
+  # Projects
+  # Related people
+  # Languages
+  # Certifications
+  # Groups
+end
+# Test:
+#profile = Selenium::WebDriver::Firefox::Profile.new
+#profile['intl.accept_languages'] = 'en'
+#profile["javascript.enabled"] = false
+#driver = Selenium::WebDriver.for :firefox, profile: profile
+#url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
+#url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
+#driver.navigate.to url
+#l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
+#puts l.results_by_job

data/lib/personal_info.rb ADDED Viewed

@@ -0,0 +1,126 @@
+load 'picture.rb'
+load 'utilities.rb'
+class PersonalInfo
+  include Utilities
+  def initialize(profile, profile_url)
+    @profile = profile
+    @html = Nokogiri::HTML(profile)
+    @profile_url = profile_url
+    # Parse attributes
+    p = Picture.new(@html)
+    @personal_info = {
+      profile_url: @profile_url,
+      full_name: full_name,
+      first_name: first_name,
+      last_name: last_name,
+      skills: skills,
+      full_location: full_location,
+      location: location,
+      area: area,
+      industry: industry,
+      summary: summary,
+      current_title: title,
+      interests: interests,
+      number_of_connections: number_of_connections,
+      picture: p.picture,
+      pic_path: p.pic_path,
+      full_html: full_html}
+  end
+  # Return person hash
+  def get_personal_info
+    return @personal_info
+  end
+  # Get the full name of the person
+  def full_name
+    @html.css(".profile-overview").css('h1').text
+  end
+  # Get first part of name
+  def first_name
+    full_name.split(" ", 2).first.strip
+  end
+  # Get last part of name
+  def last_name
+    full_name.split(" ", 2).last.strip
+  end
+  # Get list of skills
+  def skills
+    skill_list = Array.new
+    # Two formatting options for skills
+    skills = @html.css('#skills').css('.skill')
+    skills = @html.css('.skill-pill .endorse-item-name-text') if is_empty?(skills)
+    # Make list of skills
+    skills.each do |skill|
+      skill_list.push(skill.text)
+    end
+    return skill_list
+  end
+  # Get full location
+  def full_location
+    @html.css('.profile-overview').css('.locality').text
+  end
+  # Get town
+  def location
+    full_location.split(",").first.strip
+  end
+  # Get country/state
+  def area
+    full_location.split(",").last.strip
+  end
+  # Get the industry the person works in (2 different formats)
+  def industry
+    industry = @html.css('.profile-overview').css('.descriptor')[1]
+    industry = @html.css('.profile-overview').css('.industry') if is_empty?(industry)
+    return industry.text
+  end
+  # Get the summary field (2 different formats)
+  def summary
+    summary = @html.css('#summary').css('.description')
+    summary = @html.css('.summary').first if is_empty?(summary)
+    return summary.text
+  end
+  # Get the overall/current title
+  def title
+    title = @html.css('.title').css('.headline')
+    title = @html.css('#headline').css('.title') if is_empty?(title)
+    title = @html.css('.title') if is_empty?(title)
+    return title.text
+  end
+  # Get the number of connections
+  def number_of_connections
+    @html.css('.member-connections')[0].text.gsub("connections", "").strip
+  end
+  # Get list of interests
+  def interests
+    interest_list = Array.new
+    interests = @html.css('#interests').css('.interest')
+    interests = @html.css('#background-interests').css('.interest-item') if is_empty?(interests)
+    interests.each do |interest|
+      interest_list.push(interest.text)
+    end
+    return interest_list
+  end
+  # Save the full html of the page
+  def full_html
+    @profile
+  end
+end

data/lib/picture.rb ADDED Viewed

@@ -0,0 +1,32 @@
+class Picture
+  def initialize(html)
+    @html = html
+  end
+  # Get path to the picture url
+  def picture
+    @html.css('.profile-picture').css('img').first['src']
+  end
+  # Download picture
+  def pic_path
+    if picture
+      # Get path
+      dir = "pictures/"
+      full_path = dir+picture.split("/").last.chomp.strip
+      # Get file
+      `wget -P #{dir} #{picture}` if !File.file?(full_path)
+      delete_duplicate_pics
+      return full_path
+    end
+  end
+  # Deletes duplicate pictures
+  def delete_duplicate_pics
+    pics = Dir["public/uploads/pictures/*.jpg.*"]
+    pics.each do |p|
+      File.delete(p)
+    end
+  end
+end

data/lib/utilities.rb ADDED Viewed

@@ -0,0 +1,6 @@
+module Utilities
+  # Check if item is nil or empty
+  def is_empty?(item)
+    item == nil || item.text.empty?
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: linkedinparser
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - M. C. McGrath
@@ -15,7 +15,12 @@ email: shidash@shidash.com
 executables: []
 extensions: []
 extra_rdoc_files: []
-files: []
+files:
+- lib/jobs.rb
+- lib/linkedinparser.rb
+- lib/personal_info.rb
+- lib/picture.rb
+- lib/utilities.rb
 homepage: https://github.com/TransparencyToolkit/linkedinparser
 licenses:
 - GPL