RubyGems - linkedinparser - Versions diffs - 0.0.3 → 0.0.4 - Mend

linkedinparser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 33dd5468e0b1c0ed881f443d224a247eda5b37d4
-  data.tar.gz: 6eb3e10f6954f0ce5d82a60ecd9fc3041d70f1db
+  metadata.gz: 55b734b221bd743748b734b022e649c57461fdc5
+  data.tar.gz: b22cab96393fd2b48b1e4ff7c623fe93f5a09c88
 SHA512:
-  metadata.gz: 609148d3105b8215854a96efd502b0dd1ca6662a54a8f8160b32eeb3037ca319abc80e4fdb8243119f0b15198ffa1eb99c795aed6aef95ebd86a7806a4d734d6
-  data.tar.gz: 4404b31657ce3b78ddaec10a43c005bf86fc1321e0c2cc68865dcd2b2ea1603e53459bc6f29fb6a0ba2b128a7bb01d751cdf913b6250c6b53e01aecbfe7c95fe
+  metadata.gz: 7715be94c914d84bd0226e6bf1a9df77bc0b3275134e648089e7f8e1a692995b7a56f96eb9d424d97e21b43c27af96c243155ba349ce6b99e41f90dca667acdb
+  data.tar.gz: 1e2aa43c38a5e83298689bd2d6be9e880c4371493d21a424aa0400f6b60e35a238d118f93d57a41be2f91c4f6f5deba92f2b497230f67ecf69b22ab81d97664c

data/lib/jobs.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# coding: utf-8
+load 'utilities.rb'
+class Jobs
+  include Utilities
+  def initialize(profile)
+    @html = Nokogiri::HTML(profile)
+    parse_jobs
+  end
+  # Get list of jobs
+  def get_jobs
+    return @positions_list
+  end
+  def parse_jobs
+    # Multiple html options
+    positions = @html.css('#experience').css('.position')
+    positions = @html.css('#background-experience').css('.current-position') +
+                @html.css('#background-experience').css('.past-position') if is_empty?(positions)
+    # Get lists of positions
+    @positions_list = Array.new
+    positions.each do |position|
+      @positions_list.push({
+                             title: title(position),
+                             company: company(position),
+                             description: description(position),
+                             start_date: start_date(position),
+                             end_date: end_date(position),
+                             work_location: work_location(position),
+                             current: current(position)})
+    end
+  end
+  # Check if it is a current position or not
+  def current(position)
+    if end_date(position) == "Present"
+      return "Yes"
+    else return "No"
+    end
+  end
+  # Get the job title
+  def title(position)
+    position.css('h4').text
+  end
+  # Get the company for the position
+  def company(position)
+    position.css('h5').text
+  end
+  # Get job description
+  def description(position)
+    position.css('.description').text
+  end
+  # Get dates
+  def get_dates(position)
+    dates = position.css('.meta').css('.date-range')
+    dates = position.css('.experience-date-locale') if is_empty?(dates)
+    return dates
+  end
+  # Get start date
+  def start_date(position)
+    start_date = get_dates(position).text.split(' – ')[0]
+    return date_parse(start_date)
+  end
+  # Get end date
+  def end_date(position)
+    end_date = get_dates(position).text.split(' – ').last.split("(").first.strip
+    if end_date == "Present"
+      return end_date
+    elsif end_date && !end_date.empty?
+      return Date.parse(end_date)
+    end
+  end
+  # Parse date
+  def date_parse(date)
+    date = date+"-01-01" if date =~ /^(19|20)\d{2}$/
+    Date.parse(date)
+  end
+  # Get location for work
+  def work_location(position)
+    position.css('.experience-date-locale').css('.locality').text
+  end
+end

data/lib/linkedinparser.rb ADDED Viewed

@@ -0,0 +1,63 @@
+require 'selenium-webdriver'
+require 'pry'
+require 'nokogiri'
+load 'personal_info.rb'
+load 'jobs.rb'
+class LinkedinParser
+  def initialize(profile, profile_url, crawler_fields)
+    @profile = profile
+    @profile_url = profile_url
+    @crawler_fields = crawler_fields
+    parse
+  end
+  def parse
+    # Get details about the person
+    p = PersonalInfo.new(@profile, @profile_url)
+    @personal_info = p.get_personal_info
+    # Get job info
+    j = Jobs.new(@profile)
+    @job_info = j.get_jobs
+  end
+  # Return results with new item for each job
+  def results_by_job
+    output = Array.new
+    @job_info.each do |job|
+      output.push(job.merge!(@personal_info).merge!(@crawler_fields))
+    end
+    JSON.pretty_generate(output)
+  end
+  # Return results in nested JSON
+  def results_by_person
+    output = @personal_info
+    output[:jobs] = @job_info
+    output.merge!(@crawler_fields)
+    JSON.pretty_generate(output)
+  end
+  # TODO: Fields to add to parser-
+  # Organizations
+  # Education
+  # Projects
+  # Related people
+  # Languages
+  # Certifications
+  # Groups
+end
+# Test:
+#profile = Selenium::WebDriver::Firefox::Profile.new
+#profile['intl.accept_languages'] = 'en'
+#profile["javascript.enabled"] = false
+#driver = Selenium::WebDriver.for :firefox, profile: profile
+#url = "https://www.linkedin.com/pub/christopher-mcclellan/5b/a09/ba9"
+#url = "https://www.linkedin.com/pub/maryann-holmes/2b/770/3b2"
+#driver.navigate.to url
+#l = LinkedinParser.new(driver.page_source, url, {timestamp: Time.now})
+#puts l.results_by_job

data/lib/personal_info.rb ADDED Viewed

@@ -0,0 +1,126 @@
+load 'picture.rb'
+load 'utilities.rb'
+class PersonalInfo
+  include Utilities
+  def initialize(profile, profile_url)
+    @profile = profile
+    @html = Nokogiri::HTML(profile)
+    @profile_url = profile_url
+    # Parse attributes
+    p = Picture.new(@html)
+    @personal_info = {
+      profile_url: @profile_url,
+      full_name: full_name,
+      first_name: first_name,
+      last_name: last_name,
+      skills: skills,
+      full_location: full_location,
+      location: location,
+      area: area,
+      industry: industry,
+      summary: summary,
+      current_title: title,
+      interests: interests,
+      number_of_connections: number_of_connections,
+      picture: p.picture,
+      pic_path: p.pic_path,
+      full_html: full_html}
+  end
+  # Return person hash
+  def get_personal_info
+    return @personal_info
+  end
+  # Get the full name of the person
+  def full_name
+    @html.css(".profile-overview").css('h1').text
+  end
+  # Get first part of name
+  def first_name
+    full_name.split(" ", 2).first.strip
+  end
+  # Get last part of name
+  def last_name
+    full_name.split(" ", 2).last.strip
+  end
+  # Get list of skills
+  def skills
+    skill_list = Array.new
+    # Two formatting options for skills
+    skills = @html.css('#skills').css('.skill')
+    skills = @html.css('.skill-pill .endorse-item-name-text') if is_empty?(skills)
+    # Make list of skills
+    skills.each do |skill|
+      skill_list.push(skill.text)
+    end
+    return skill_list
+  end
+  # Get full location
+  def full_location
+    @html.css('.profile-overview').css('.locality').text
+  end
+  # Get town
+  def location
+    full_location.split(",").first.strip
+  end
+  # Get country/state
+  def area
+    full_location.split(",").last.strip
+  end
+  # Get the industry the person works in (2 different formats)
+  def industry
+    industry = @html.css('.profile-overview').css('.descriptor')[1]
+    industry = @html.css('.profile-overview').css('.industry') if is_empty?(industry)
+    return industry.text
+  end
+  # Get the summary field (2 different formats)
+  def summary
+    summary = @html.css('#summary').css('.description')
+    summary = @html.css('.summary').first if is_empty?(summary)
+    return summary.text
+  end
+  # Get the overall/current title
+  def title
+    title = @html.css('.title').css('.headline')
+    title = @html.css('#headline').css('.title') if is_empty?(title)
+    title = @html.css('.title') if is_empty?(title)
+    return title.text
+  end
+  # Get the number of connections
+  def number_of_connections
+    @html.css('.member-connections')[0].text.gsub("connections", "").strip
+  end
+  # Get list of interests
+  def interests
+    interest_list = Array.new
+    interests = @html.css('#interests').css('.interest')
+    interests = @html.css('#background-interests').css('.interest-item') if is_empty?(interests)
+    interests.each do |interest|
+      interest_list.push(interest.text)
+    end
+    return interest_list
+  end
+  # Save the full html of the page
+  def full_html
+    @profile
+  end
+end

data/lib/picture.rb ADDED Viewed

@@ -0,0 +1,32 @@
+class Picture
+  def initialize(html)
+    @html = html
+  end
+  # Get path to the picture url
+  def picture
+    @html.css('.profile-picture').css('img').first['src']
+  end
+  # Download picture
+  def pic_path
+    if picture
+      # Get path
+      dir = "pictures/"
+      full_path = dir+picture.split("/").last.chomp.strip
+      # Get file
+      `wget -P #{dir} #{picture}` if !File.file?(full_path)
+      delete_duplicate_pics
+      return full_path
+    end
+  end
+  # Deletes duplicate pictures
+  def delete_duplicate_pics
+    pics = Dir["public/uploads/pictures/*.jpg.*"]
+    pics.each do |p|
+      File.delete(p)
+    end
+  end
+end

data/lib/utilities.rb ADDED Viewed

@@ -0,0 +1,6 @@
+module Utilities
+  # Check if item is nil or empty
+  def is_empty?(item)
+    item == nil || item.text.empty?
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: linkedinparser
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - M. C. McGrath
@@ -15,7 +15,12 @@ email: shidash@shidash.com
 executables: []
 extensions: []
 extra_rdoc_files: []
-files: []
+files:
+- lib/jobs.rb
+- lib/linkedinparser.rb
+- lib/personal_info.rb
+- lib/picture.rb
+- lib/utilities.rb
 homepage: https://github.com/TransparencyToolkit/linkedinparser
 licenses:
 - GPL